diff --git a/data/api/scikit-learn_v0.24.2_api.json b/data/api/scikit-learn_v0.24.2_api.json new file mode 100644 index 000000000..714b43dda --- /dev/null +++ b/data/api/scikit-learn_v0.24.2_api.json @@ -0,0 +1,252312 @@ +{ + "schemaVersion": 1, + "distribution": "", + "package": "scikit-learn", + "version": "0.24.2", + "modules": [ + { + "id": "scikit-learn/sklearn", + "name": "sklearn", + "imports": [ + { + "module": "sys", + "alias": null + }, + { + "module": "logging", + "alias": null + }, + { + "module": "os", + "alias": null + }, + { + "module": "random", + "alias": null + } + ], + "from_imports": [ + { + "module": "sklearn._config", + "declaration": "get_config", + "alias": null + }, + { + "module": "sklearn._config", + "declaration": "set_config", + "alias": null + }, + { + "module": "sklearn._config", + "declaration": "config_context", + "alias": null + }, + { + "module": "sklearn", + "declaration": "_distributor_init", + "alias": null + }, + { + "module": "sklearn", + "declaration": "__check_build", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "clone", + "alias": null + }, + { + "module": "sklearn.utils._show_versions", + "declaration": "show_versions", + "alias": null + } + ], + "classes": [], + "functions": ["scikit-learn/sklearn/setup_module"] + }, + { + "id": "scikit-learn/sklearn.__check_build", + "name": "sklearn.__check_build", + "imports": [ + { + "module": "os", + "alias": null + } + ], + "from_imports": [ + { + "module": "sklearn.__check_build._check_build", + "declaration": "check_build", + "alias": null + } + ], + "classes": [], + "functions": ["scikit-learn/sklearn.__check_build/raise_build_error"] + }, + { + "id": "scikit-learn/sklearn.__check_build.setup", + "name": "sklearn.__check_build.setup", + "imports": [ + { + "module": "numpy", + "alias": null + } + ], + "from_imports": [ + { + "module": "numpy.distutils.core", + "declaration": "setup", + "alias": null + } + ], + "classes": [], + "functions": ["scikit-learn/sklearn.__check_build.setup/configuration"] + }, + { + "id": "scikit-learn/sklearn._build_utils", + "name": "sklearn._build_utils", + "imports": [ + { + "module": "os", + "alias": null + }, + { + "module": "sklearn", + "alias": null + }, + { + "module": "contextlib", + "alias": null + } + ], + "from_imports": [ + { + "module": "distutils.version", + "declaration": "LooseVersion", + "alias": null + }, + { + "module": "sklearn._build_utils.pre_build_helpers", + "declaration": "basic_check_build", + "alias": null + }, + { + "module": "sklearn._build_utils.openmp_helpers", + "declaration": "check_openmp_support", + "alias": null + }, + { + "module": "sklearn._min_dependencies", + "declaration": "CYTHON_MIN_VERSION", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn._build_utils/_check_cython_version", + "scikit-learn/sklearn._build_utils/cythonize_extensions", + "scikit-learn/sklearn._build_utils/gen_from_templates" + ] + }, + { + "id": "scikit-learn/sklearn._build_utils.openmp_helpers", + "name": "sklearn._build_utils.openmp_helpers", + "imports": [ + { + "module": "os", + "alias": null + }, + { + "module": "sys", + "alias": null + }, + { + "module": "textwrap", + "alias": null + }, + { + "module": "warnings", + "alias": null + }, + { + "module": "subprocess", + "alias": null + } + ], + "from_imports": [ + { + "module": "distutils.errors", + "declaration": "CompileError", + "alias": null + }, + { + "module": "distutils.errors", + "declaration": "LinkError", + "alias": null + }, + { + "module": "sklearn._build_utils.pre_build_helpers", + "declaration": "compile_test_program", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn._build_utils.openmp_helpers/get_openmp_flag", + "scikit-learn/sklearn._build_utils.openmp_helpers/check_openmp_support" + ] + }, + { + "id": "scikit-learn/sklearn._build_utils.pre_build_helpers", + "name": "sklearn._build_utils.pre_build_helpers", + "imports": [ + { + "module": "os", + "alias": null + }, + { + "module": "sys", + "alias": null + }, + { + "module": "glob", + "alias": null + }, + { + "module": "tempfile", + "alias": null + }, + { + "module": "textwrap", + "alias": null + }, + { + "module": "setuptools", + "alias": null + }, + { + "module": "subprocess", + "alias": null + } + ], + "from_imports": [ + { + "module": "distutils.dist", + "declaration": "Distribution", + "alias": null + }, + { + "module": "distutils.sysconfig", + "declaration": "customize_compiler", + "alias": null + }, + { + "module": "numpy.distutils.ccompiler", + "declaration": "new_compiler", + "alias": null + }, + { + "module": "numpy.distutils.command.config_compiler", + "declaration": "config_cc", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn._build_utils.pre_build_helpers/_get_compiler", + "scikit-learn/sklearn._build_utils.pre_build_helpers/compile_test_program", + "scikit-learn/sklearn._build_utils.pre_build_helpers/basic_check_build" + ] + }, + { + "id": "scikit-learn/sklearn._config", + "name": "sklearn._config", + "imports": [ + { + "module": "os", + "alias": null + } + ], + "from_imports": [ + { + "module": "contextlib", + "declaration": "contextmanager", + "alias": "contextmanager" + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn._config/get_config", + "scikit-learn/sklearn._config/set_config", + "scikit-learn/sklearn._config/config_context" + ] + }, + { + "id": "scikit-learn/sklearn._distributor_init", + "name": "sklearn._distributor_init", + "imports": [], + "from_imports": [], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn._loss", + "name": "sklearn._loss", + "imports": [], + "from_imports": [], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution", + "name": "sklearn._loss.glm_distribution", + "imports": [ + { + "module": "numbers", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "collections", + "declaration": "namedtuple", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "xlogy", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel", + "scikit-learn/sklearn._loss.glm_distribution/TweedieDistribution", + "scikit-learn/sklearn._loss.glm_distribution/NormalDistribution", + "scikit-learn/sklearn._loss.glm_distribution/PoissonDistribution", + "scikit-learn/sklearn._loss.glm_distribution/GammaDistribution", + "scikit-learn/sklearn._loss.glm_distribution/InverseGaussianDistribution" + ], + "functions": [] + }, + { + "id": "scikit-learn/sklearn._min_dependencies", + "name": "sklearn._min_dependencies", + "imports": [ + { + "module": "platform", + "alias": null + }, + { + "module": "argparse", + "alias": null + } + ], + "from_imports": [], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.base", + "name": "sklearn.base", + "imports": [ + { + "module": "copy", + "alias": null + }, + { + "module": "warnings", + "alias": null + }, + { + "module": "platform", + "alias": null + }, + { + "module": "inspect", + "alias": null + }, + { + "module": "re", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "collections", + "declaration": "defaultdict", + "alias": null + }, + { + "module": "sklearn", + "declaration": "__version__", + "alias": null + }, + { + "module": "sklearn._config", + "declaration": "get_config", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "_IS_32BIT", + "alias": null + }, + { + "module": "sklearn.utils._tags", + "declaration": "_DEFAULT_TAGS", + "alias": null + }, + { + "module": "sklearn.utils._tags", + "declaration": "_safe_tags", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_X_y", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils._estimator_html_repr", + "declaration": "estimator_html_repr", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.base/BaseEstimator", + "scikit-learn/sklearn.base/ClassifierMixin", + "scikit-learn/sklearn.base/RegressorMixin", + "scikit-learn/sklearn.base/ClusterMixin", + "scikit-learn/sklearn.base/BiclusterMixin", + "scikit-learn/sklearn.base/TransformerMixin", + "scikit-learn/sklearn.base/DensityMixin", + "scikit-learn/sklearn.base/OutlierMixin", + "scikit-learn/sklearn.base/MetaEstimatorMixin", + "scikit-learn/sklearn.base/MultiOutputMixin", + "scikit-learn/sklearn.base/_UnstableArchMixin" + ], + "functions": [ + "scikit-learn/sklearn.base/clone", + "scikit-learn/sklearn.base/_pprint", + "scikit-learn/sklearn.base/is_classifier", + "scikit-learn/sklearn.base/is_regressor", + "scikit-learn/sklearn.base/is_outlier_detector", + "scikit-learn/sklearn.base/_is_pairwise" + ] + }, + { + "id": "scikit-learn/sklearn.calibration", + "name": "sklearn.calibration", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "inspect", + "declaration": "signature", + "alias": null + }, + { + "module": "contextlib", + "declaration": "suppress", + "alias": null + }, + { + "module": "functools", + "declaration": "partial", + "alias": null + }, + { + "module": "math", + "declaration": "log", + "alias": null + }, + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "expit", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "xlogy", + "alias": null + }, + { + "module": "scipy.optimize", + "declaration": "fmin_bfgs", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClassifierMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "clone", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "MetaEstimatorMixin", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "label_binarize", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "LabelEncoder", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "column_or_1d", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "deprecated", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "indexable", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "check_classification_targets", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_consistent_length", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.pipeline", + "declaration": "Pipeline", + "alias": null + }, + { + "module": "sklearn.isotonic", + "declaration": "IsotonicRegression", + "alias": null + }, + { + "module": "sklearn.svm", + "declaration": "LinearSVC", + "alias": null + }, + { + "module": "sklearn.model_selection", + "declaration": "check_cv", + "alias": null + }, + { + "module": "sklearn.model_selection", + "declaration": "cross_val_predict", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.calibration/CalibratedClassifierCV", + "scikit-learn/sklearn.calibration/_CalibratedClassifier", + "scikit-learn/sklearn.calibration/_SigmoidCalibration" + ], + "functions": [ + "scikit-learn/sklearn.calibration/_fit_classifier_calibrator_pair", + "scikit-learn/sklearn.calibration/_get_prediction_method", + "scikit-learn/sklearn.calibration/_compute_predictions", + "scikit-learn/sklearn.calibration/_fit_calibrator", + "scikit-learn/sklearn.calibration/_sigmoid_calibration", + "scikit-learn/sklearn.calibration/calibration_curve" + ] + }, + { + "id": "scikit-learn/sklearn.cluster", + "name": "sklearn.cluster", + "imports": [], + "from_imports": [ + { + "module": "sklearn.cluster._spectral", + "declaration": "spectral_clustering", + "alias": null + }, + { + "module": "sklearn.cluster._spectral", + "declaration": "SpectralClustering", + "alias": null + }, + { + "module": "sklearn.cluster._mean_shift", + "declaration": "mean_shift", + "alias": null + }, + { + "module": "sklearn.cluster._mean_shift", + "declaration": "MeanShift", + "alias": null + }, + { + "module": "sklearn.cluster._mean_shift", + "declaration": "estimate_bandwidth", + "alias": null + }, + { + "module": "sklearn.cluster._mean_shift", + "declaration": "get_bin_seeds", + "alias": null + }, + { + "module": "sklearn.cluster._affinity_propagation", + "declaration": "affinity_propagation", + "alias": null + }, + { + "module": "sklearn.cluster._affinity_propagation", + "declaration": "AffinityPropagation", + "alias": null + }, + { + "module": "sklearn.cluster._agglomerative", + "declaration": "ward_tree", + "alias": null + }, + { + "module": "sklearn.cluster._agglomerative", + "declaration": "AgglomerativeClustering", + "alias": null + }, + { + "module": "sklearn.cluster._agglomerative", + "declaration": "linkage_tree", + "alias": null + }, + { + "module": "sklearn.cluster._agglomerative", + "declaration": "FeatureAgglomeration", + "alias": null + }, + { + "module": "sklearn.cluster._kmeans", + "declaration": "k_means", + "alias": null + }, + { + "module": "sklearn.cluster._kmeans", + "declaration": "KMeans", + "alias": null + }, + { + "module": "sklearn.cluster._kmeans", + "declaration": "MiniBatchKMeans", + "alias": null + }, + { + "module": "sklearn.cluster._kmeans", + "declaration": "kmeans_plusplus", + "alias": null + }, + { + "module": "sklearn.cluster._dbscan", + "declaration": "dbscan", + "alias": null + }, + { + "module": "sklearn.cluster._dbscan", + "declaration": "DBSCAN", + "alias": null + }, + { + "module": "sklearn.cluster._optics", + "declaration": "OPTICS", + "alias": null + }, + { + "module": "sklearn.cluster._optics", + "declaration": "cluster_optics_dbscan", + "alias": null + }, + { + "module": "sklearn.cluster._optics", + "declaration": "compute_optics_graph", + "alias": null + }, + { + "module": "sklearn.cluster._optics", + "declaration": "cluster_optics_xi", + "alias": null + }, + { + "module": "sklearn.cluster._bicluster", + "declaration": "SpectralBiclustering", + "alias": null + }, + { + "module": "sklearn.cluster._bicluster", + "declaration": "SpectralCoclustering", + "alias": null + }, + { + "module": "sklearn.cluster._birch", + "declaration": "Birch", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation", + "name": "sklearn.cluster._affinity_propagation", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "warnings", + "alias": null + } + ], + "from_imports": [ + { + "module": "sklearn.exceptions", + "declaration": "ConvergenceWarning", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClusterMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "as_float_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.deprecation", + "declaration": "deprecated", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "euclidean_distances", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "pairwise_distances_argmin", + "alias": null + }, + { + "module": "sklearn._config", + "declaration": "config_context", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation"], + "functions": [ + "scikit-learn/sklearn.cluster._affinity_propagation/_equal_similarities_and_preferences", + "scikit-learn/sklearn.cluster._affinity_propagation/affinity_propagation" + ] + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative", + "name": "sklearn.cluster._agglomerative", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "heapq", + "declaration": "heapify", + "alias": null + }, + { + "module": "heapq", + "declaration": "heappop", + "alias": null + }, + { + "module": "heapq", + "declaration": "heappush", + "alias": null + }, + { + "module": "heapq", + "declaration": "heappushpop", + "alias": null + }, + { + "module": "scipy", + "declaration": "sparse", + "alias": null + }, + { + "module": "scipy.sparse.csgraph", + "declaration": "connected_components", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClusterMixin", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "paired_distances", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "pairwise_distances", + "alias": null + }, + { + "module": "sklearn.neighbors", + "declaration": "DistanceMetric", + "alias": null + }, + { + "module": "sklearn.neighbors._dist_metrics", + "declaration": "METRIC_MAPPING", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils._fast_dict", + "declaration": "IntFloatDict", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "_astype_copy_false", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_memory", + "alias": null + }, + { + "module": "sklearn.cluster", + "declaration": "_hierarchical_fast", + "alias": "_hierarchical" + }, + { + "module": "sklearn.cluster._feature_agglomeration", + "declaration": "AgglomerationTransform", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.cluster._agglomerative/AgglomerativeClustering", + "scikit-learn/sklearn.cluster._agglomerative/FeatureAgglomeration" + ], + "functions": [ + "scikit-learn/sklearn.cluster._agglomerative/_fix_connectivity", + "scikit-learn/sklearn.cluster._agglomerative/_single_linkage_tree", + "scikit-learn/sklearn.cluster._agglomerative/ward_tree", + "scikit-learn/sklearn.cluster._agglomerative/linkage_tree", + "scikit-learn/sklearn.cluster._agglomerative/_complete_linkage", + "scikit-learn/sklearn.cluster._agglomerative/_average_linkage", + "scikit-learn/sklearn.cluster._agglomerative/_single_linkage", + "scikit-learn/sklearn.cluster._agglomerative/_hc_cut" + ] + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster", + "name": "sklearn.cluster._bicluster", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "scipy.linalg", + "declaration": "norm", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "dia_matrix", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "issparse", + "alias": null + }, + { + "module": "scipy.sparse.linalg", + "declaration": "eigsh", + "alias": null + }, + { + "module": "scipy.sparse.linalg", + "declaration": "svds", + "alias": null + }, + { + "module": "sklearn.cluster", + "declaration": "KMeans", + "alias": null + }, + { + "module": "sklearn.cluster", + "declaration": "MiniBatchKMeans", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BiclusterMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "make_nonnegative", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "randomized_svd", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "safe_sparse_dot", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "assert_all_finite", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.cluster._bicluster/BaseSpectral", + "scikit-learn/sklearn.cluster._bicluster/SpectralCoclustering", + "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering" + ], + "functions": [ + "scikit-learn/sklearn.cluster._bicluster/_scale_normalize", + "scikit-learn/sklearn.cluster._bicluster/_bistochastic_normalize", + "scikit-learn/sklearn.cluster._bicluster/_log_normalize" + ] + }, + { + "id": "scikit-learn/sklearn.cluster._birch", + "name": "sklearn.cluster._birch", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numbers", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "scipy", + "declaration": "sparse", + "alias": null + }, + { + "module": "math", + "declaration": "sqrt", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "pairwise_distances_argmin", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "euclidean_distances", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClusterMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "row_norms", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "ConvergenceWarning", + "alias": null + }, + { + "module": "sklearn.cluster", + "declaration": "AgglomerativeClustering", + "alias": null + }, + { + "module": "sklearn._config", + "declaration": "config_context", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.cluster._birch/_CFNode", + "scikit-learn/sklearn.cluster._birch/_CFSubcluster", + "scikit-learn/sklearn.cluster._birch/Birch" + ], + "functions": [ + "scikit-learn/sklearn.cluster._birch/_iterate_sparse_X", + "scikit-learn/sklearn.cluster._birch/_split_node" + ] + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan", + "name": "sklearn.cluster._dbscan", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "warnings", + "alias": null + } + ], + "from_imports": [ + { + "module": "scipy", + "declaration": "sparse", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClusterMixin", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.neighbors", + "declaration": "NearestNeighbors", + "alias": null + }, + { + "module": "sklearn.cluster._dbscan_inner", + "declaration": "dbscan_inner", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.cluster._dbscan/DBSCAN"], + "functions": ["scikit-learn/sklearn.cluster._dbscan/dbscan"] + }, + { + "id": "scikit-learn/sklearn.cluster._feature_agglomeration", + "name": "sklearn.cluster._feature_agglomeration", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "issparse", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.cluster._feature_agglomeration/AgglomerationTransform"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans", + "name": "sklearn.cluster._kmeans", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": "sp" + } + ], + "from_imports": [ + { + "module": "threadpoolctl", + "declaration": "threadpool_limits", + "alias": null + }, + { + "module": "threadpoolctl", + "declaration": "threadpool_info", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClusterMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "euclidean_distances", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "row_norms", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "stable_cumsum", + "alias": null + }, + { + "module": "sklearn.utils.sparsefuncs_fast", + "declaration": "assign_rows_csr", + "alias": null + }, + { + "module": "sklearn.utils.sparsefuncs", + "declaration": "mean_variance_axis", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "gen_batches", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "deprecated", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils._openmp_helpers", + "declaration": "_openmp_effective_n_threads", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "ConvergenceWarning", + "alias": null + }, + { + "module": "sklearn.cluster._k_means_fast", + "declaration": "CHUNK_SIZE", + "alias": null + }, + { + "module": "sklearn.cluster._k_means_fast", + "declaration": "_inertia_dense", + "alias": null + }, + { + "module": "sklearn.cluster._k_means_fast", + "declaration": "_inertia_sparse", + "alias": null + }, + { + "module": "sklearn.cluster._k_means_fast", + "declaration": "_mini_batch_update_csr", + "alias": null + }, + { + "module": "sklearn.cluster._k_means_lloyd", + "declaration": "lloyd_iter_chunked_dense", + "alias": null + }, + { + "module": "sklearn.cluster._k_means_lloyd", + "declaration": "lloyd_iter_chunked_sparse", + "alias": null + }, + { + "module": "sklearn.cluster._k_means_elkan", + "declaration": "init_bounds_dense", + "alias": null + }, + { + "module": "sklearn.cluster._k_means_elkan", + "declaration": "init_bounds_sparse", + "alias": null + }, + { + "module": "sklearn.cluster._k_means_elkan", + "declaration": "elkan_iter_chunked_dense", + "alias": null + }, + { + "module": "sklearn.cluster._k_means_elkan", + "declaration": "elkan_iter_chunked_sparse", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.cluster._kmeans/KMeans", + "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans" + ], + "functions": [ + "scikit-learn/sklearn.cluster._kmeans/_kmeans_plusplus", + "scikit-learn/sklearn.cluster._kmeans/_tolerance", + "scikit-learn/sklearn.cluster._kmeans/k_means", + "scikit-learn/sklearn.cluster._kmeans/_kmeans_single_elkan", + "scikit-learn/sklearn.cluster._kmeans/_kmeans_single_lloyd", + "scikit-learn/sklearn.cluster._kmeans/_labels_inertia", + "scikit-learn/sklearn.cluster._kmeans/_mini_batch_step", + "scikit-learn/sklearn.cluster._kmeans/_mini_batch_convergence", + "scikit-learn/sklearn.cluster._kmeans/kmeans_plusplus" + ] + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift", + "name": "sklearn.cluster._mean_shift", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "warnings", + "alias": null + } + ], + "from_imports": [ + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "collections", + "declaration": "defaultdict", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "gen_batches", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClusterMixin", + "alias": null + }, + { + "module": "sklearn.neighbors", + "declaration": "NearestNeighbors", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "pairwise_distances_argmin", + "alias": null + }, + { + "module": "sklearn._config", + "declaration": "config_context", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.cluster._mean_shift/MeanShift"], + "functions": [ + "scikit-learn/sklearn.cluster._mean_shift/estimate_bandwidth", + "scikit-learn/sklearn.cluster._mean_shift/_mean_shift_single_seed", + "scikit-learn/sklearn.cluster._mean_shift/mean_shift", + "scikit-learn/sklearn.cluster._mean_shift/get_bin_seeds" + ] + }, + { + "id": "scikit-learn/sklearn.cluster._optics", + "name": "sklearn.cluster._optics", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.utils", + "declaration": "gen_batches", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "get_chunk_n_rows", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.neighbors", + "declaration": "NearestNeighbors", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClusterMixin", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "pairwise_distances", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.cluster._optics/OPTICS"], + "functions": [ + "scikit-learn/sklearn.cluster._optics/_validate_size", + "scikit-learn/sklearn.cluster._optics/_compute_core_distances_", + "scikit-learn/sklearn.cluster._optics/compute_optics_graph", + "scikit-learn/sklearn.cluster._optics/_set_reach_dist", + "scikit-learn/sklearn.cluster._optics/cluster_optics_dbscan", + "scikit-learn/sklearn.cluster._optics/cluster_optics_xi", + "scikit-learn/sklearn.cluster._optics/_extend_region", + "scikit-learn/sklearn.cluster._optics/_update_filter_sdas", + "scikit-learn/sklearn.cluster._optics/_correct_predecessor", + "scikit-learn/sklearn.cluster._optics/_xi_cluster", + "scikit-learn/sklearn.cluster._optics/_extract_xi_labels" + ] + }, + { + "id": "scikit-learn/sklearn.cluster._spectral", + "name": "sklearn.cluster._spectral", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClusterMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "as_float_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.deprecation", + "declaration": "deprecated", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "pairwise_kernels", + "alias": null + }, + { + "module": "sklearn.neighbors", + "declaration": "kneighbors_graph", + "alias": null + }, + { + "module": "sklearn.neighbors", + "declaration": "NearestNeighbors", + "alias": null + }, + { + "module": "sklearn.manifold", + "declaration": "spectral_embedding", + "alias": null + }, + { + "module": "sklearn.cluster._kmeans", + "declaration": "k_means", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.cluster._spectral/SpectralClustering"], + "functions": [ + "scikit-learn/sklearn.cluster._spectral/discretize", + "scikit-learn/sklearn.cluster._spectral/spectral_clustering" + ] + }, + { + "id": "scikit-learn/sklearn.cluster.setup", + "name": "sklearn.cluster.setup", + "imports": [ + { + "module": "os", + "alias": null + }, + { + "module": "numpy", + "alias": null + } + ], + "from_imports": [ + { + "module": "numpy.distutils.core", + "declaration": "setup", + "alias": null + } + ], + "classes": [], + "functions": ["scikit-learn/sklearn.cluster.setup/configuration"] + }, + { + "id": "scikit-learn/sklearn.compose", + "name": "sklearn.compose", + "imports": [], + "from_imports": [ + { + "module": "sklearn.compose._column_transformer", + "declaration": "ColumnTransformer", + "alias": null + }, + { + "module": "sklearn.compose._column_transformer", + "declaration": "make_column_transformer", + "alias": null + }, + { + "module": "sklearn.compose._column_transformer", + "declaration": "make_column_selector", + "alias": null + }, + { + "module": "sklearn.compose._target", + "declaration": "TransformedTargetRegressor", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer", + "name": "sklearn.compose._column_transformer", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "itertools", + "declaration": "chain", + "alias": null + }, + { + "module": "scipy", + "declaration": "sparse", + "alias": null + }, + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "clone", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.utils._estimator_html_repr", + "declaration": "_VisualBlock", + "alias": null + }, + { + "module": "sklearn.pipeline", + "declaration": "_fit_transform_one", + "alias": null + }, + { + "module": "sklearn.pipeline", + "declaration": "_transform_one", + "alias": null + }, + { + "module": "sklearn.pipeline", + "declaration": "_name_estimators", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "FunctionTransformer", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "Bunch", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "_safe_indexing", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "_get_column_indices", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "_determine_key_type", + "alias": null + }, + { + "module": "sklearn.utils.metaestimators", + "declaration": "_BaseComposition", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer", + "scikit-learn/sklearn.compose._column_transformer/make_column_selector" + ], + "functions": [ + "scikit-learn/sklearn.compose._column_transformer/_check_X", + "scikit-learn/sklearn.compose._column_transformer/_is_empty_column_selection", + "scikit-learn/sklearn.compose._column_transformer/_get_transformer_list", + "scikit-learn/sklearn.compose._column_transformer/make_column_transformer" + ] + }, + { + "id": "scikit-learn/sklearn.compose._target", + "name": "sklearn.compose._target", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "clone", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "_safe_indexing", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "FunctionTransformer", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "NotFittedError", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.compose._target/TransformedTargetRegressor"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.conftest", + "name": "sklearn.conftest", + "imports": [ + { + "module": "os", + "alias": null + }, + { + "module": "pytest", + "alias": null + } + ], + "from_imports": [ + { + "module": "os", + "declaration": "environ", + "alias": null + }, + { + "module": "functools", + "declaration": "wraps", + "alias": null + }, + { + "module": "threadpoolctl", + "declaration": "threadpool_limits", + "alias": null + }, + { + "module": "sklearn.utils._openmp_helpers", + "declaration": "_openmp_effective_n_threads", + "alias": null + }, + { + "module": "sklearn.datasets", + "declaration": "fetch_20newsgroups", + "alias": null + }, + { + "module": "sklearn.datasets", + "declaration": "fetch_20newsgroups_vectorized", + "alias": null + }, + { + "module": "sklearn.datasets", + "declaration": "fetch_california_housing", + "alias": null + }, + { + "module": "sklearn.datasets", + "declaration": "fetch_covtype", + "alias": null + }, + { + "module": "sklearn.datasets", + "declaration": "fetch_kddcup99", + "alias": null + }, + { + "module": "sklearn.datasets", + "declaration": "fetch_olivetti_faces", + "alias": null + }, + { + "module": "sklearn.datasets", + "declaration": "fetch_rcv1", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.conftest/_fetch_fixture", + "scikit-learn/sklearn.conftest/pytest_collection_modifyitems", + "scikit-learn/sklearn.conftest/pyplot", + "scikit-learn/sklearn.conftest/pytest_runtest_setup" + ] + }, + { + "id": "scikit-learn/sklearn.covariance", + "name": "sklearn.covariance", + "imports": [], + "from_imports": [ + { + "module": "sklearn.covariance._empirical_covariance", + "declaration": "empirical_covariance", + "alias": null + }, + { + "module": "sklearn.covariance._empirical_covariance", + "declaration": "EmpiricalCovariance", + "alias": null + }, + { + "module": "sklearn.covariance._empirical_covariance", + "declaration": "log_likelihood", + "alias": null + }, + { + "module": "sklearn.covariance._shrunk_covariance", + "declaration": "shrunk_covariance", + "alias": null + }, + { + "module": "sklearn.covariance._shrunk_covariance", + "declaration": "ShrunkCovariance", + "alias": null + }, + { + "module": "sklearn.covariance._shrunk_covariance", + "declaration": "ledoit_wolf", + "alias": null + }, + { + "module": "sklearn.covariance._shrunk_covariance", + "declaration": "ledoit_wolf_shrinkage", + "alias": null + }, + { + "module": "sklearn.covariance._shrunk_covariance", + "declaration": "LedoitWolf", + "alias": null + }, + { + "module": "sklearn.covariance._shrunk_covariance", + "declaration": "oas", + "alias": null + }, + { + "module": "sklearn.covariance._shrunk_covariance", + "declaration": "OAS", + "alias": null + }, + { + "module": "sklearn.covariance._robust_covariance", + "declaration": "fast_mcd", + "alias": null + }, + { + "module": "sklearn.covariance._robust_covariance", + "declaration": "MinCovDet", + "alias": null + }, + { + "module": "sklearn.covariance._graph_lasso", + "declaration": "graphical_lasso", + "alias": null + }, + { + "module": "sklearn.covariance._graph_lasso", + "declaration": "GraphicalLasso", + "alias": null + }, + { + "module": "sklearn.covariance._graph_lasso", + "declaration": "GraphicalLassoCV", + "alias": null + }, + { + "module": "sklearn.covariance._elliptic_envelope", + "declaration": "EllipticEnvelope", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope", + "name": "sklearn.covariance._elliptic_envelope", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.covariance", + "declaration": "MinCovDet", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "accuracy_score", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "OutlierMixin", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance", + "name": "sklearn.covariance._empirical_covariance", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "scipy", + "declaration": "linalg", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "fast_logdet", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "pairwise_distances", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance"], + "functions": [ + "scikit-learn/sklearn.covariance._empirical_covariance/log_likelihood", + "scikit-learn/sklearn.covariance._empirical_covariance/empirical_covariance" + ] + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso", + "name": "sklearn.covariance._graph_lasso", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "operator", + "alias": null + }, + { + "module": "sys", + "alias": null + }, + { + "module": "time", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "collections.abc", + "declaration": "Sequence", + "alias": null + }, + { + "module": "scipy", + "declaration": "linalg", + "alias": null + }, + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "sklearn.covariance", + "declaration": "empirical_covariance", + "alias": null + }, + { + "module": "sklearn.covariance", + "declaration": "EmpiricalCovariance", + "alias": null + }, + { + "module": "sklearn.covariance", + "declaration": "log_likelihood", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "ConvergenceWarning", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + }, + { + "module": "sklearn.linear_model", + "declaration": "_cd_fast", + "alias": "cd_fast" + }, + { + "module": "sklearn.linear_model", + "declaration": "lars_path_gram", + "alias": null + }, + { + "module": "sklearn.model_selection", + "declaration": "check_cv", + "alias": null + }, + { + "module": "sklearn.model_selection", + "declaration": "cross_val_score", + "alias": null + }, + { + "module": "sklearn.utils.deprecation", + "declaration": "deprecated", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLasso", + "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV" + ], + "functions": [ + "scikit-learn/sklearn.covariance._graph_lasso/_objective", + "scikit-learn/sklearn.covariance._graph_lasso/_dual_gap", + "scikit-learn/sklearn.covariance._graph_lasso/alpha_max", + "scikit-learn/sklearn.covariance._graph_lasso/graphical_lasso", + "scikit-learn/sklearn.covariance._graph_lasso/graphical_lasso_path" + ] + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance", + "name": "sklearn.covariance._robust_covariance", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numbers", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "scipy", + "declaration": "linalg", + "alias": null + }, + { + "module": "scipy.stats", + "declaration": "chi2", + "alias": null + }, + { + "module": "sklearn.covariance", + "declaration": "empirical_covariance", + "alias": null + }, + { + "module": "sklearn.covariance", + "declaration": "EmpiricalCovariance", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "fast_logdet", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.covariance._robust_covariance/MinCovDet"], + "functions": [ + "scikit-learn/sklearn.covariance._robust_covariance/c_step", + "scikit-learn/sklearn.covariance._robust_covariance/_c_step", + "scikit-learn/sklearn.covariance._robust_covariance/select_candidates", + "scikit-learn/sklearn.covariance._robust_covariance/fast_mcd" + ] + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance", + "name": "sklearn.covariance._shrunk_covariance", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.covariance", + "declaration": "empirical_covariance", + "alias": null + }, + { + "module": "sklearn.covariance", + "declaration": "EmpiricalCovariance", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.covariance._shrunk_covariance/ShrunkCovariance", + "scikit-learn/sklearn.covariance._shrunk_covariance/LedoitWolf", + "scikit-learn/sklearn.covariance._shrunk_covariance/OAS" + ], + "functions": [ + "scikit-learn/sklearn.covariance._shrunk_covariance/shrunk_covariance", + "scikit-learn/sklearn.covariance._shrunk_covariance/ledoit_wolf_shrinkage", + "scikit-learn/sklearn.covariance._shrunk_covariance/ledoit_wolf", + "scikit-learn/sklearn.covariance._shrunk_covariance/oas" + ] + }, + { + "id": "scikit-learn/sklearn.cross_decomposition", + "name": "sklearn.cross_decomposition", + "imports": [], + "from_imports": [ + { + "module": "sklearn.cross_decomposition._pls", + "declaration": "PLSCanonical", + "alias": null + }, + { + "module": "sklearn.cross_decomposition._pls", + "declaration": "PLSRegression", + "alias": null + }, + { + "module": "sklearn.cross_decomposition._pls", + "declaration": "PLSSVD", + "alias": null + }, + { + "module": "sklearn.cross_decomposition._pls", + "declaration": "CCA", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls", + "name": "sklearn.cross_decomposition._pls", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "scipy.linalg", + "declaration": "pinv2", + "alias": null + }, + { + "module": "scipy.linalg", + "declaration": "svd", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "MultiOutputMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_consistent_length", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "svd_flip", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "FLOAT_DTYPES", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "ConvergenceWarning", + "alias": null + }, + { + "module": "sklearn.utils.deprecation", + "declaration": "deprecated", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.cross_decomposition._pls/_PLS", + "scikit-learn/sklearn.cross_decomposition._pls/PLSRegression", + "scikit-learn/sklearn.cross_decomposition._pls/PLSCanonical", + "scikit-learn/sklearn.cross_decomposition._pls/CCA", + "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD" + ], + "functions": [ + "scikit-learn/sklearn.cross_decomposition._pls/_pinv2_old", + "scikit-learn/sklearn.cross_decomposition._pls/_get_first_singular_vectors_power_method", + "scikit-learn/sklearn.cross_decomposition._pls/_get_first_singular_vectors_svd", + "scikit-learn/sklearn.cross_decomposition._pls/_center_scale_xy", + "scikit-learn/sklearn.cross_decomposition._pls/_svd_flip_1d" + ] + }, + { + "id": "scikit-learn/sklearn.datasets", + "name": "sklearn.datasets", + "imports": [], + "from_imports": [ + { + "module": "sklearn.datasets._base", + "declaration": "load_breast_cancer", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "load_boston", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "load_diabetes", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "load_digits", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "load_files", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "load_iris", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "load_linnerud", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "load_sample_images", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "load_sample_image", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "load_wine", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "get_data_home", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "clear_data_home", + "alias": null + }, + { + "module": "sklearn.datasets._covtype", + "declaration": "fetch_covtype", + "alias": null + }, + { + "module": "sklearn.datasets._kddcup99", + "declaration": "fetch_kddcup99", + "alias": null + }, + { + "module": "sklearn.datasets._lfw", + "declaration": "fetch_lfw_pairs", + "alias": null + }, + { + "module": "sklearn.datasets._lfw", + "declaration": "fetch_lfw_people", + "alias": null + }, + { + "module": "sklearn.datasets._twenty_newsgroups", + "declaration": "fetch_20newsgroups", + "alias": null + }, + { + "module": "sklearn.datasets._twenty_newsgroups", + "declaration": "fetch_20newsgroups_vectorized", + "alias": null + }, + { + "module": "sklearn.datasets._openml", + "declaration": "fetch_openml", + "alias": null + }, + { + "module": "sklearn.datasets._samples_generator", + "declaration": "make_classification", + "alias": null + }, + { + "module": "sklearn.datasets._samples_generator", + "declaration": "make_multilabel_classification", + "alias": null + }, + { + "module": "sklearn.datasets._samples_generator", + "declaration": "make_hastie_10_2", + "alias": null + }, + { + "module": "sklearn.datasets._samples_generator", + "declaration": "make_regression", + "alias": null + }, + { + "module": "sklearn.datasets._samples_generator", + "declaration": "make_blobs", + "alias": null + }, + { + "module": "sklearn.datasets._samples_generator", + "declaration": "make_moons", + "alias": null + }, + { + "module": "sklearn.datasets._samples_generator", + "declaration": "make_circles", + "alias": null + }, + { + "module": "sklearn.datasets._samples_generator", + "declaration": "make_friedman1", + "alias": null + }, + { + "module": "sklearn.datasets._samples_generator", + "declaration": "make_friedman2", + "alias": null + }, + { + "module": "sklearn.datasets._samples_generator", + "declaration": "make_friedman3", + "alias": null + }, + { + "module": "sklearn.datasets._samples_generator", + "declaration": "make_low_rank_matrix", + "alias": null + }, + { + "module": "sklearn.datasets._samples_generator", + "declaration": "make_sparse_coded_signal", + "alias": null + }, + { + "module": "sklearn.datasets._samples_generator", + "declaration": "make_sparse_uncorrelated", + "alias": null + }, + { + "module": "sklearn.datasets._samples_generator", + "declaration": "make_spd_matrix", + "alias": null + }, + { + "module": "sklearn.datasets._samples_generator", + "declaration": "make_swiss_roll", + "alias": null + }, + { + "module": "sklearn.datasets._samples_generator", + "declaration": "make_s_curve", + "alias": null + }, + { + "module": "sklearn.datasets._samples_generator", + "declaration": "make_sparse_spd_matrix", + "alias": null + }, + { + "module": "sklearn.datasets._samples_generator", + "declaration": "make_gaussian_quantiles", + "alias": null + }, + { + "module": "sklearn.datasets._samples_generator", + "declaration": "make_biclusters", + "alias": null + }, + { + "module": "sklearn.datasets._samples_generator", + "declaration": "make_checkerboard", + "alias": null + }, + { + "module": "sklearn.datasets._svmlight_format_io", + "declaration": "load_svmlight_file", + "alias": null + }, + { + "module": "sklearn.datasets._svmlight_format_io", + "declaration": "load_svmlight_files", + "alias": null + }, + { + "module": "sklearn.datasets._svmlight_format_io", + "declaration": "dump_svmlight_file", + "alias": null + }, + { + "module": "sklearn.datasets._olivetti_faces", + "declaration": "fetch_olivetti_faces", + "alias": null + }, + { + "module": "sklearn.datasets._species_distributions", + "declaration": "fetch_species_distributions", + "alias": null + }, + { + "module": "sklearn.datasets._california_housing", + "declaration": "fetch_california_housing", + "alias": null + }, + { + "module": "sklearn.datasets._rcv1", + "declaration": "fetch_rcv1", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.datasets._base", + "name": "sklearn.datasets._base", + "imports": [ + { + "module": "csv", + "alias": null + }, + { + "module": "hashlib", + "alias": null + }, + { + "module": "os", + "alias": null + }, + { + "module": "shutil", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "collections", + "declaration": "namedtuple", + "alias": null + }, + { + "module": "os", + "declaration": "environ", + "alias": null + }, + { + "module": "os", + "declaration": "listdir", + "alias": null + }, + { + "module": "os", + "declaration": "makedirs", + "alias": null + }, + { + "module": "os.path", + "declaration": "dirname", + "alias": null + }, + { + "module": "os.path", + "declaration": "expanduser", + "alias": null + }, + { + "module": "os.path", + "declaration": "isdir", + "alias": null + }, + { + "module": "os.path", + "declaration": "join", + "alias": null + }, + { + "module": "os.path", + "declaration": "splitext", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "Bunch", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_pandas_support", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "urllib.request", + "declaration": "urlretrieve", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.datasets._base/get_data_home", + "scikit-learn/sklearn.datasets._base/clear_data_home", + "scikit-learn/sklearn.datasets._base/_convert_data_dataframe", + "scikit-learn/sklearn.datasets._base/load_files", + "scikit-learn/sklearn.datasets._base/load_data", + "scikit-learn/sklearn.datasets._base/load_wine", + "scikit-learn/sklearn.datasets._base/load_iris", + "scikit-learn/sklearn.datasets._base/load_breast_cancer", + "scikit-learn/sklearn.datasets._base/load_digits", + "scikit-learn/sklearn.datasets._base/load_diabetes", + "scikit-learn/sklearn.datasets._base/load_linnerud", + "scikit-learn/sklearn.datasets._base/load_boston", + "scikit-learn/sklearn.datasets._base/load_sample_images", + "scikit-learn/sklearn.datasets._base/load_sample_image", + "scikit-learn/sklearn.datasets._base/_pkl_filepath", + "scikit-learn/sklearn.datasets._base/_sha256", + "scikit-learn/sklearn.datasets._base/_fetch_remote" + ] + }, + { + "id": "scikit-learn/sklearn.datasets._california_housing", + "name": "sklearn.datasets._california_housing", + "imports": [ + { + "module": "tarfile", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "logging", + "alias": null + }, + { + "module": "joblib", + "alias": null + } + ], + "from_imports": [ + { + "module": "os.path", + "declaration": "dirname", + "alias": null + }, + { + "module": "os.path", + "declaration": "exists", + "alias": null + }, + { + "module": "os.path", + "declaration": "join", + "alias": null + }, + { + "module": "os", + "declaration": "makedirs", + "alias": null + }, + { + "module": "os", + "declaration": "remove", + "alias": null + }, + { + "module": "sklearn.datasets", + "declaration": "get_data_home", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "_convert_data_dataframe", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "_fetch_remote", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "_pkl_filepath", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "RemoteFileMetadata", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "Bunch", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [], + "functions": ["scikit-learn/sklearn.datasets._california_housing/fetch_california_housing"] + }, + { + "id": "scikit-learn/sklearn.datasets._covtype", + "name": "sklearn.datasets._covtype", + "imports": [ + { + "module": "logging", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "joblib", + "alias": null + } + ], + "from_imports": [ + { + "module": "gzip", + "declaration": "GzipFile", + "alias": null + }, + { + "module": "os.path", + "declaration": "dirname", + "alias": null + }, + { + "module": "os.path", + "declaration": "exists", + "alias": null + }, + { + "module": "os.path", + "declaration": "join", + "alias": null + }, + { + "module": "os", + "declaration": "remove", + "alias": null + }, + { + "module": "os", + "declaration": "makedirs", + "alias": null + }, + { + "module": "sklearn.datasets", + "declaration": "get_data_home", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "_convert_data_dataframe", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "_fetch_remote", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "RemoteFileMetadata", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "Bunch", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "_pkl_filepath", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [], + "functions": ["scikit-learn/sklearn.datasets._covtype/fetch_covtype"] + }, + { + "id": "scikit-learn/sklearn.datasets._kddcup99", + "name": "sklearn.datasets._kddcup99", + "imports": [ + { + "module": "errno", + "alias": null + }, + { + "module": "logging", + "alias": null + }, + { + "module": "os", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "joblib", + "alias": null + } + ], + "from_imports": [ + { + "module": "gzip", + "declaration": "GzipFile", + "alias": null + }, + { + "module": "os.path", + "declaration": "dirname", + "alias": null + }, + { + "module": "os.path", + "declaration": "exists", + "alias": null + }, + { + "module": "os.path", + "declaration": "join", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "_fetch_remote", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "_convert_data_dataframe", + "alias": null + }, + { + "module": "sklearn.datasets", + "declaration": "get_data_home", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "RemoteFileMetadata", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "Bunch", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "shuffle", + "alias": "shuffle_method" + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.datasets._kddcup99/fetch_kddcup99", + "scikit-learn/sklearn.datasets._kddcup99/_fetch_brute_kddcup99", + "scikit-learn/sklearn.datasets._kddcup99/_mkdirp" + ] + }, + { + "id": "scikit-learn/sklearn.datasets._lfw", + "name": "sklearn.datasets._lfw", + "imports": [ + { + "module": "logging", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "joblib", + "alias": null + } + ], + "from_imports": [ + { + "module": "os", + "declaration": "listdir", + "alias": null + }, + { + "module": "os", + "declaration": "makedirs", + "alias": null + }, + { + "module": "os", + "declaration": "remove", + "alias": null + }, + { + "module": "os.path", + "declaration": "dirname", + "alias": null + }, + { + "module": "os.path", + "declaration": "join", + "alias": null + }, + { + "module": "os.path", + "declaration": "exists", + "alias": null + }, + { + "module": "os.path", + "declaration": "isdir", + "alias": null + }, + { + "module": "joblib", + "declaration": "Memory", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "get_data_home", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "_fetch_remote", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "RemoteFileMetadata", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "Bunch", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "parse_version", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.datasets._lfw/_check_fetch_lfw", + "scikit-learn/sklearn.datasets._lfw/_load_imgs", + "scikit-learn/sklearn.datasets._lfw/_fetch_lfw_people", + "scikit-learn/sklearn.datasets._lfw/fetch_lfw_people", + "scikit-learn/sklearn.datasets._lfw/_fetch_lfw_pairs", + "scikit-learn/sklearn.datasets._lfw/fetch_lfw_pairs" + ] + }, + { + "id": "scikit-learn/sklearn.datasets._olivetti_faces", + "name": "sklearn.datasets._olivetti_faces", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "joblib", + "alias": null + } + ], + "from_imports": [ + { + "module": "os.path", + "declaration": "dirname", + "alias": null + }, + { + "module": "os.path", + "declaration": "exists", + "alias": null + }, + { + "module": "os.path", + "declaration": "join", + "alias": null + }, + { + "module": "os", + "declaration": "makedirs", + "alias": null + }, + { + "module": "os", + "declaration": "remove", + "alias": null + }, + { + "module": "scipy.io.matlab", + "declaration": "loadmat", + "alias": null + }, + { + "module": "sklearn.datasets", + "declaration": "get_data_home", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "_fetch_remote", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "RemoteFileMetadata", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "_pkl_filepath", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "Bunch", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [], + "functions": ["scikit-learn/sklearn.datasets._olivetti_faces/fetch_olivetti_faces"] + }, + { + "id": "scikit-learn/sklearn.datasets._openml", + "name": "sklearn.datasets._openml", + "imports": [ + { + "module": "gzip", + "alias": null + }, + { + "module": "json", + "alias": null + }, + { + "module": "os", + "alias": null + }, + { + "module": "shutil", + "alias": null + }, + { + "module": "hashlib", + "alias": null + }, + { + "module": "itertools", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": null + } + ], + "from_imports": [ + { + "module": "os.path", + "declaration": "join", + "alias": null + }, + { + "module": "warnings", + "declaration": "warn", + "alias": null + }, + { + "module": "contextlib", + "declaration": "closing", + "alias": null + }, + { + "module": "functools", + "declaration": "wraps", + "alias": null + }, + { + "module": "typing", + "declaration": "Callable", + "alias": null + }, + { + "module": "typing", + "declaration": "Optional", + "alias": null + }, + { + "module": "typing", + "declaration": "Dict", + "alias": null + }, + { + "module": "typing", + "declaration": "Tuple", + "alias": null + }, + { + "module": "typing", + "declaration": "List", + "alias": null + }, + { + "module": "typing", + "declaration": "Any", + "alias": null + }, + { + "module": "typing", + "declaration": "Union", + "alias": null + }, + { + "module": "collections.abc", + "declaration": "Generator", + "alias": null + }, + { + "module": "collections", + "declaration": "OrderedDict", + "alias": null + }, + { + "module": "functools", + "declaration": "partial", + "alias": null + }, + { + "module": "urllib.request", + "declaration": "urlopen", + "alias": null + }, + { + "module": "urllib.request", + "declaration": "Request", + "alias": null + }, + { + "module": "sklearn.externals", + "declaration": "_arff", + "alias": null + }, + { + "module": "sklearn.externals._arff", + "declaration": "ArffSparseDataType", + "alias": null + }, + { + "module": "sklearn.externals._arff", + "declaration": "ArffContainerType", + "alias": null + }, + { + "module": "sklearn.datasets", + "declaration": "get_data_home", + "alias": null + }, + { + "module": "urllib.error", + "declaration": "HTTPError", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "Bunch", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "get_chunk_n_rows", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "_chunk_generator", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_pandas_support", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.datasets._openml/OpenMLError"], + "functions": [ + "scikit-learn/sklearn.datasets._openml/_get_local_path", + "scikit-learn/sklearn.datasets._openml/_retry_with_clean_cache", + "scikit-learn/sklearn.datasets._openml/_open_openml_url", + "scikit-learn/sklearn.datasets._openml/_get_json_content_from_openml_api", + "scikit-learn/sklearn.datasets._openml/_split_sparse_columns", + "scikit-learn/sklearn.datasets._openml/_sparse_data_to_array", + "scikit-learn/sklearn.datasets._openml/_convert_arff_data", + "scikit-learn/sklearn.datasets._openml/_feature_to_dtype", + "scikit-learn/sklearn.datasets._openml/_convert_arff_data_dataframe", + "scikit-learn/sklearn.datasets._openml/_get_data_info_by_name", + "scikit-learn/sklearn.datasets._openml/_get_data_description_by_id", + "scikit-learn/sklearn.datasets._openml/_get_data_features", + "scikit-learn/sklearn.datasets._openml/_get_data_qualities", + "scikit-learn/sklearn.datasets._openml/_get_num_samples", + "scikit-learn/sklearn.datasets._openml/_load_arff_response", + "scikit-learn/sklearn.datasets._openml/_download_data_to_bunch", + "scikit-learn/sklearn.datasets._openml/_verify_target_data_type", + "scikit-learn/sklearn.datasets._openml/_valid_data_column_names", + "scikit-learn/sklearn.datasets._openml/fetch_openml" + ] + }, + { + "id": "scikit-learn/sklearn.datasets._rcv1", + "name": "sklearn.datasets._rcv1", + "imports": [ + { + "module": "logging", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": "sp" + }, + { + "module": "joblib", + "alias": null + } + ], + "from_imports": [ + { + "module": "os", + "declaration": "remove", + "alias": null + }, + { + "module": "os", + "declaration": "makedirs", + "alias": null + }, + { + "module": "os.path", + "declaration": "dirname", + "alias": null + }, + { + "module": "os.path", + "declaration": "exists", + "alias": null + }, + { + "module": "os.path", + "declaration": "join", + "alias": null + }, + { + "module": "gzip", + "declaration": "GzipFile", + "alias": null + }, + { + "module": "sklearn.datasets", + "declaration": "get_data_home", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "_pkl_filepath", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "_fetch_remote", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "RemoteFileMetadata", + "alias": null + }, + { + "module": "sklearn.datasets._svmlight_format_io", + "declaration": "load_svmlight_files", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "shuffle", + "alias": "shuffle_" + }, + { + "module": "sklearn.utils", + "declaration": "Bunch", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.datasets._rcv1/fetch_rcv1", + "scikit-learn/sklearn.datasets._rcv1/_inverse_permutation", + "scikit-learn/sklearn.datasets._rcv1/_find_permutation" + ] + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator", + "name": "sklearn.datasets._samples_generator", + "imports": [ + { + "module": "numbers", + "alias": null + }, + { + "module": "array", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": "sp" + } + ], + "from_imports": [ + { + "module": "collections.abc", + "declaration": "Iterable", + "alias": null + }, + { + "module": "scipy", + "declaration": "linalg", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "MultiLabelBinarizer", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "shuffle", + "alias": "util_shuffle" + }, + { + "module": "sklearn.utils.random", + "declaration": "sample_without_replacement", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.datasets._samples_generator/_generate_hypercube", + "scikit-learn/sklearn.datasets._samples_generator/make_classification", + "scikit-learn/sklearn.datasets._samples_generator/make_multilabel_classification", + "scikit-learn/sklearn.datasets._samples_generator/make_hastie_10_2", + "scikit-learn/sklearn.datasets._samples_generator/make_regression", + "scikit-learn/sklearn.datasets._samples_generator/make_circles", + "scikit-learn/sklearn.datasets._samples_generator/make_moons", + "scikit-learn/sklearn.datasets._samples_generator/make_blobs", + "scikit-learn/sklearn.datasets._samples_generator/make_friedman1", + "scikit-learn/sklearn.datasets._samples_generator/make_friedman2", + "scikit-learn/sklearn.datasets._samples_generator/make_friedman3", + "scikit-learn/sklearn.datasets._samples_generator/make_low_rank_matrix", + "scikit-learn/sklearn.datasets._samples_generator/make_sparse_coded_signal", + "scikit-learn/sklearn.datasets._samples_generator/make_sparse_uncorrelated", + "scikit-learn/sklearn.datasets._samples_generator/make_spd_matrix", + "scikit-learn/sklearn.datasets._samples_generator/make_sparse_spd_matrix", + "scikit-learn/sklearn.datasets._samples_generator/make_swiss_roll", + "scikit-learn/sklearn.datasets._samples_generator/make_s_curve", + "scikit-learn/sklearn.datasets._samples_generator/make_gaussian_quantiles", + "scikit-learn/sklearn.datasets._samples_generator/_shuffle", + "scikit-learn/sklearn.datasets._samples_generator/make_biclusters", + "scikit-learn/sklearn.datasets._samples_generator/make_checkerboard" + ] + }, + { + "id": "scikit-learn/sklearn.datasets._species_distributions", + "name": "sklearn.datasets._species_distributions", + "imports": [ + { + "module": "logging", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "joblib", + "alias": null + } + ], + "from_imports": [ + { + "module": "io", + "declaration": "BytesIO", + "alias": null + }, + { + "module": "os", + "declaration": "makedirs", + "alias": null + }, + { + "module": "os", + "declaration": "remove", + "alias": null + }, + { + "module": "os.path", + "declaration": "exists", + "alias": null + }, + { + "module": "sklearn.datasets", + "declaration": "get_data_home", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "_fetch_remote", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "RemoteFileMetadata", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "Bunch", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "_pkl_filepath", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.datasets._species_distributions/_load_coverage", + "scikit-learn/sklearn.datasets._species_distributions/_load_csv", + "scikit-learn/sklearn.datasets._species_distributions/construct_grids", + "scikit-learn/sklearn.datasets._species_distributions/fetch_species_distributions" + ] + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io", + "name": "sklearn.datasets._svmlight_format_io", + "imports": [ + { + "module": "io", + "alias": null + }, + { + "module": "os.path", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": "sp" + } + ], + "from_imports": [ + { + "module": "sklearn.datasets._svmlight_format_fast", + "declaration": "_load_svmlight_file", + "alias": null + }, + { + "module": "contextlib", + "declaration": "closing", + "alias": null + }, + { + "module": "sklearn", + "declaration": "__version__", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "IS_PYPY", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.datasets._svmlight_format_io/_load_svmlight_file", + "scikit-learn/sklearn.datasets._svmlight_format_io/load_svmlight_file", + "scikit-learn/sklearn.datasets._svmlight_format_io/_gen_open", + "scikit-learn/sklearn.datasets._svmlight_format_io/_open_and_load", + "scikit-learn/sklearn.datasets._svmlight_format_io/load_svmlight_files", + "scikit-learn/sklearn.datasets._svmlight_format_io/_dump_svmlight", + "scikit-learn/sklearn.datasets._svmlight_format_io/dump_svmlight_file" + ] + }, + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups", + "name": "sklearn.datasets._twenty_newsgroups", + "imports": [ + { + "module": "os", + "alias": null + }, + { + "module": "logging", + "alias": null + }, + { + "module": "tarfile", + "alias": null + }, + { + "module": "pickle", + "alias": null + }, + { + "module": "shutil", + "alias": null + }, + { + "module": "re", + "alias": null + }, + { + "module": "codecs", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": "sp" + }, + { + "module": "joblib", + "alias": null + } + ], + "from_imports": [ + { + "module": "os.path", + "declaration": "dirname", + "alias": null + }, + { + "module": "os.path", + "declaration": "join", + "alias": null + }, + { + "module": "sklearn.datasets", + "declaration": "get_data_home", + "alias": null + }, + { + "module": "sklearn.datasets", + "declaration": "load_files", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "_convert_data_dataframe", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "_pkl_filepath", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "_fetch_remote", + "alias": null + }, + { + "module": "sklearn.datasets._base", + "declaration": "RemoteFileMetadata", + "alias": null + }, + { + "module": "sklearn.feature_extraction.text", + "declaration": "CountVectorizer", + "alias": null + }, + { + "module": "sklearn", + "declaration": "preprocessing", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "Bunch", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.datasets._twenty_newsgroups/_download_20newsgroups", + "scikit-learn/sklearn.datasets._twenty_newsgroups/strip_newsgroup_header", + "scikit-learn/sklearn.datasets._twenty_newsgroups/strip_newsgroup_quoting", + "scikit-learn/sklearn.datasets._twenty_newsgroups/strip_newsgroup_footer", + "scikit-learn/sklearn.datasets._twenty_newsgroups/fetch_20newsgroups", + "scikit-learn/sklearn.datasets._twenty_newsgroups/fetch_20newsgroups_vectorized" + ] + }, + { + "id": "scikit-learn/sklearn.datasets.setup", + "name": "sklearn.datasets.setup", + "imports": [ + { + "module": "numpy", + "alias": null + }, + { + "module": "os", + "alias": null + }, + { + "module": "platform", + "alias": null + } + ], + "from_imports": [ + { + "module": "numpy.distutils.core", + "declaration": "setup", + "alias": null + } + ], + "classes": [], + "functions": ["scikit-learn/sklearn.datasets.setup/configuration"] + }, + { + "id": "scikit-learn/sklearn.decomposition", + "name": "sklearn.decomposition", + "imports": [], + "from_imports": [ + { + "module": "sklearn.decomposition._nmf", + "declaration": "NMF", + "alias": null + }, + { + "module": "sklearn.decomposition._nmf", + "declaration": "non_negative_factorization", + "alias": null + }, + { + "module": "sklearn.decomposition._pca", + "declaration": "PCA", + "alias": null + }, + { + "module": "sklearn.decomposition._incremental_pca", + "declaration": "IncrementalPCA", + "alias": null + }, + { + "module": "sklearn.decomposition._kernel_pca", + "declaration": "KernelPCA", + "alias": null + }, + { + "module": "sklearn.decomposition._sparse_pca", + "declaration": "SparsePCA", + "alias": null + }, + { + "module": "sklearn.decomposition._sparse_pca", + "declaration": "MiniBatchSparsePCA", + "alias": null + }, + { + "module": "sklearn.decomposition._truncated_svd", + "declaration": "TruncatedSVD", + "alias": null + }, + { + "module": "sklearn.decomposition._fastica", + "declaration": "FastICA", + "alias": null + }, + { + "module": "sklearn.decomposition._fastica", + "declaration": "fastica", + "alias": null + }, + { + "module": "sklearn.decomposition._dict_learning", + "declaration": "dict_learning", + "alias": null + }, + { + "module": "sklearn.decomposition._dict_learning", + "declaration": "dict_learning_online", + "alias": null + }, + { + "module": "sklearn.decomposition._dict_learning", + "declaration": "sparse_encode", + "alias": null + }, + { + "module": "sklearn.decomposition._dict_learning", + "declaration": "DictionaryLearning", + "alias": null + }, + { + "module": "sklearn.decomposition._dict_learning", + "declaration": "MiniBatchDictionaryLearning", + "alias": null + }, + { + "module": "sklearn.decomposition._dict_learning", + "declaration": "SparseCoder", + "alias": null + }, + { + "module": "sklearn.decomposition._factor_analysis", + "declaration": "FactorAnalysis", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "randomized_svd", + "alias": null + }, + { + "module": "sklearn.decomposition._lda", + "declaration": "LatentDirichletAllocation", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.decomposition._base", + "name": "sklearn.decomposition._base", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "scipy", + "declaration": "linalg", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.decomposition._base/_BasePCA"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning", + "name": "sklearn.decomposition._dict_learning", + "imports": [ + { + "module": "time", + "alias": null + }, + { + "module": "sys", + "alias": null + }, + { + "module": "itertools", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "math", + "declaration": "ceil", + "alias": null + }, + { + "module": "scipy", + "declaration": "linalg", + "alias": null + }, + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "joblib", + "declaration": "effective_n_jobs", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "deprecated", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "gen_even_slices", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "gen_batches", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "randomized_svd", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "row_norms", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + }, + { + "module": "sklearn.linear_model", + "declaration": "Lasso", + "alias": null + }, + { + "module": "sklearn.linear_model", + "declaration": "orthogonal_mp_gram", + "alias": null + }, + { + "module": "sklearn.linear_model", + "declaration": "LassoLars", + "alias": null + }, + { + "module": "sklearn.linear_model", + "declaration": "Lars", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.decomposition._dict_learning/_BaseSparseCoding", + "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder", + "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning", + "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning" + ], + "functions": [ + "scikit-learn/sklearn.decomposition._dict_learning/_check_positive_coding", + "scikit-learn/sklearn.decomposition._dict_learning/_sparse_encode", + "scikit-learn/sklearn.decomposition._dict_learning/sparse_encode", + "scikit-learn/sklearn.decomposition._dict_learning/_update_dict", + "scikit-learn/sklearn.decomposition._dict_learning/dict_learning", + "scikit-learn/sklearn.decomposition._dict_learning/dict_learning_online" + ] + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis", + "name": "sklearn.decomposition._factor_analysis", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "math", + "declaration": "sqrt", + "alias": null + }, + { + "module": "math", + "declaration": "log", + "alias": null + }, + { + "module": "scipy", + "declaration": "linalg", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "fast_logdet", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "randomized_svd", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "squared_norm", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "ConvergenceWarning", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis"], + "functions": ["scikit-learn/sklearn.decomposition._factor_analysis/_ortho_rotation"] + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica", + "name": "sklearn.decomposition._fastica", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "scipy", + "declaration": "linalg", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "ConvergenceWarning", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "as_float_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "FLOAT_DTYPES", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.decomposition._fastica/FastICA"], + "functions": [ + "scikit-learn/sklearn.decomposition._fastica/_gs_decorrelation", + "scikit-learn/sklearn.decomposition._fastica/_sym_decorrelation", + "scikit-learn/sklearn.decomposition._fastica/_ica_def", + "scikit-learn/sklearn.decomposition._fastica/_ica_par", + "scikit-learn/sklearn.decomposition._fastica/_logcosh", + "scikit-learn/sklearn.decomposition._fastica/_exp", + "scikit-learn/sklearn.decomposition._fastica/_cube", + "scikit-learn/sklearn.decomposition._fastica/fastica" + ] + }, + { + "id": "scikit-learn/sklearn.decomposition._incremental_pca", + "name": "sklearn.decomposition._incremental_pca", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "scipy", + "declaration": "linalg", + "alias": null + }, + { + "module": "scipy", + "declaration": "sparse", + "alias": null + }, + { + "module": "sklearn.decomposition._base", + "declaration": "_BasePCA", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "gen_batches", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "svd_flip", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "_incremental_mean_and_var", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.decomposition._incremental_pca/IncrementalPCA"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca", + "name": "sklearn.decomposition._kernel_pca", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "scipy", + "declaration": "linalg", + "alias": null + }, + { + "module": "scipy.sparse.linalg", + "declaration": "eigsh", + "alias": null + }, + { + "module": "sklearn.utils._arpack", + "declaration": "_init_arpack_v0", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "svd_flip", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_psd_eigenvalues", + "alias": null + }, + { + "module": "sklearn.utils.deprecation", + "declaration": "deprecated", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "NotFittedError", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "KernelCenterer", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "pairwise_kernels", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.decomposition._lda", + "name": "sklearn.decomposition._lda", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": "sp" + } + ], + "from_imports": [ + { + "module": "scipy.special", + "declaration": "gammaln", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "logsumexp", + "alias": null + }, + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "joblib", + "declaration": "effective_n_jobs", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "gen_batches", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "gen_even_slices", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_non_negative", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + }, + { + "module": "sklearn.decomposition._online_lda_fast", + "declaration": "mean_change", + "alias": null + }, + { + "module": "sklearn.decomposition._online_lda_fast", + "declaration": "_dirichlet_expectation_1d", + "alias": null + }, + { + "module": "sklearn.decomposition._online_lda_fast", + "declaration": "_dirichlet_expectation_2d", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation"], + "functions": ["scikit-learn/sklearn.decomposition._lda/_update_doc_distribution"] + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf", + "name": "sklearn.decomposition._nmf", + "imports": [ + { + "module": "numbers", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": "sp" + }, + { + "module": "time", + "alias": null + }, + { + "module": "warnings", + "alias": null + } + ], + "from_imports": [ + { + "module": "math", + "declaration": "sqrt", + "alias": null + }, + { + "module": "sklearn.decomposition._cdnmf_fast", + "declaration": "_update_cdnmf_fast", + "alias": null + }, + { + "module": "sklearn._config", + "declaration": "config_context", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "ConvergenceWarning", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "randomized_svd", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "safe_sparse_dot", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "squared_norm", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_non_negative", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.decomposition._nmf/NMF"], + "functions": [ + "scikit-learn/sklearn.decomposition._nmf/norm", + "scikit-learn/sklearn.decomposition._nmf/trace_dot", + "scikit-learn/sklearn.decomposition._nmf/_check_init", + "scikit-learn/sklearn.decomposition._nmf/_beta_divergence", + "scikit-learn/sklearn.decomposition._nmf/_special_sparse_dot", + "scikit-learn/sklearn.decomposition._nmf/_compute_regularization", + "scikit-learn/sklearn.decomposition._nmf/_check_string_param", + "scikit-learn/sklearn.decomposition._nmf/_beta_loss_to_float", + "scikit-learn/sklearn.decomposition._nmf/_initialize_nmf", + "scikit-learn/sklearn.decomposition._nmf/_update_coordinate_descent", + "scikit-learn/sklearn.decomposition._nmf/_fit_coordinate_descent", + "scikit-learn/sklearn.decomposition._nmf/_multiplicative_update_w", + "scikit-learn/sklearn.decomposition._nmf/_multiplicative_update_h", + "scikit-learn/sklearn.decomposition._nmf/_fit_multiplicative_update", + "scikit-learn/sklearn.decomposition._nmf/non_negative_factorization" + ] + }, + { + "id": "scikit-learn/sklearn.decomposition._pca", + "name": "sklearn.decomposition._pca", + "imports": [ + { + "module": "numbers", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "math", + "declaration": "log", + "alias": null + }, + { + "module": "math", + "declaration": "sqrt", + "alias": null + }, + { + "module": "scipy", + "declaration": "linalg", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "gammaln", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "issparse", + "alias": null + }, + { + "module": "scipy.sparse.linalg", + "declaration": "svds", + "alias": null + }, + { + "module": "sklearn.decomposition._base", + "declaration": "_BasePCA", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils._arpack", + "declaration": "_init_arpack_v0", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "fast_logdet", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "randomized_svd", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "svd_flip", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "stable_cumsum", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.decomposition._pca/PCA"], + "functions": [ + "scikit-learn/sklearn.decomposition._pca/_assess_dimension", + "scikit-learn/sklearn.decomposition._pca/_infer_dimension" + ] + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca", + "name": "sklearn.decomposition._sparse_pca", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.linear_model", + "declaration": "ridge_regression", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.decomposition._dict_learning", + "declaration": "dict_learning", + "alias": null + }, + { + "module": "sklearn.decomposition._dict_learning", + "declaration": "dict_learning_online", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA", + "scikit-learn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA" + ], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd", + "name": "sklearn.decomposition._truncated_svd", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": "sp" + } + ], + "from_imports": [ + { + "module": "scipy.sparse.linalg", + "declaration": "svds", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils._arpack", + "declaration": "_init_arpack_v0", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "randomized_svd", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "safe_sparse_dot", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "svd_flip", + "alias": null + }, + { + "module": "sklearn.utils.sparsefuncs", + "declaration": "mean_variance_axis", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.decomposition.setup", + "name": "sklearn.decomposition.setup", + "imports": [ + { + "module": "os", + "alias": null + }, + { + "module": "numpy", + "alias": null + } + ], + "from_imports": [ + { + "module": "numpy.distutils.misc_util", + "declaration": "Configuration", + "alias": null + }, + { + "module": "numpy.distutils.core", + "declaration": "setup", + "alias": null + } + ], + "classes": [], + "functions": ["scikit-learn/sklearn.decomposition.setup/configuration"] + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis", + "name": "sklearn.discriminant_analysis", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "scipy", + "declaration": "linalg", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "expit", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClassifierMixin", + "alias": null + }, + { + "module": "sklearn.linear_model._base", + "declaration": "LinearClassifierMixin", + "alias": null + }, + { + "module": "sklearn.covariance", + "declaration": "ledoit_wolf", + "alias": null + }, + { + "module": "sklearn.covariance", + "declaration": "empirical_covariance", + "alias": null + }, + { + "module": "sklearn.covariance", + "declaration": "shrunk_covariance", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "unique_labels", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "check_classification_targets", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "softmax", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "StandardScaler", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis", + "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis" + ], + "functions": [ + "scikit-learn/sklearn.discriminant_analysis/_cov", + "scikit-learn/sklearn.discriminant_analysis/_class_means", + "scikit-learn/sklearn.discriminant_analysis/_class_cov" + ] + }, + { + "id": "scikit-learn/sklearn.dummy", + "name": "sklearn.dummy", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": "sp" + } + ], + "from_imports": [ + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClassifierMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "MultiOutputMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_num_samples", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_consistent_length", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils.random", + "declaration": "_random_choice_csc", + "alias": null + }, + { + "module": "sklearn.utils.stats", + "declaration": "_weighted_percentile", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "class_distribution", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.dummy/DummyClassifier", "scikit-learn/sklearn.dummy/DummyRegressor"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.ensemble", + "name": "sklearn.ensemble", + "imports": [ + { + "module": "typing", + "alias": null + } + ], + "from_imports": [ + { + "module": "sklearn.ensemble._base", + "declaration": "BaseEnsemble", + "alias": null + }, + { + "module": "sklearn.ensemble._forest", + "declaration": "RandomForestClassifier", + "alias": null + }, + { + "module": "sklearn.ensemble._forest", + "declaration": "RandomForestRegressor", + "alias": null + }, + { + "module": "sklearn.ensemble._forest", + "declaration": "RandomTreesEmbedding", + "alias": null + }, + { + "module": "sklearn.ensemble._forest", + "declaration": "ExtraTreesClassifier", + "alias": null + }, + { + "module": "sklearn.ensemble._forest", + "declaration": "ExtraTreesRegressor", + "alias": null + }, + { + "module": "sklearn.ensemble._bagging", + "declaration": "BaggingClassifier", + "alias": null + }, + { + "module": "sklearn.ensemble._bagging", + "declaration": "BaggingRegressor", + "alias": null + }, + { + "module": "sklearn.ensemble._iforest", + "declaration": "IsolationForest", + "alias": null + }, + { + "module": "sklearn.ensemble._weight_boosting", + "declaration": "AdaBoostClassifier", + "alias": null + }, + { + "module": "sklearn.ensemble._weight_boosting", + "declaration": "AdaBoostRegressor", + "alias": null + }, + { + "module": "sklearn.ensemble._gb", + "declaration": "GradientBoostingClassifier", + "alias": null + }, + { + "module": "sklearn.ensemble._gb", + "declaration": "GradientBoostingRegressor", + "alias": null + }, + { + "module": "sklearn.ensemble._voting", + "declaration": "VotingClassifier", + "alias": null + }, + { + "module": "sklearn.ensemble._voting", + "declaration": "VotingRegressor", + "alias": null + }, + { + "module": "sklearn.ensemble._stacking", + "declaration": "StackingClassifier", + "alias": null + }, + { + "module": "sklearn.ensemble._stacking", + "declaration": "StackingRegressor", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting", + "declaration": "HistGradientBoostingRegressor", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting", + "declaration": "HistGradientBoostingClassifier", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging", + "name": "sklearn.ensemble._bagging", + "imports": [ + { + "module": "itertools", + "alias": null + }, + { + "module": "numbers", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "warnings", + "declaration": "warn", + "alias": null + }, + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "sklearn.ensemble._base", + "declaration": "BaseEnsemble", + "alias": null + }, + { + "module": "sklearn.ensemble._base", + "declaration": "_partition_estimators", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClassifierMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "r2_score", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "accuracy_score", + "alias": null + }, + { + "module": "sklearn.tree", + "declaration": "DecisionTreeClassifier", + "alias": null + }, + { + "module": "sklearn.tree", + "declaration": "DecisionTreeRegressor", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "column_or_1d", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "indices_to_mask", + "alias": null + }, + { + "module": "sklearn.utils.metaestimators", + "declaration": "if_delegate_has_method", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "check_classification_targets", + "alias": null + }, + { + "module": "sklearn.utils.random", + "declaration": "sample_without_replacement", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "has_fit_parameter", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.ensemble._bagging/BaseBagging", + "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier", + "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor" + ], + "functions": [ + "scikit-learn/sklearn.ensemble._bagging/_generate_indices", + "scikit-learn/sklearn.ensemble._bagging/_generate_bagging_indices", + "scikit-learn/sklearn.ensemble._bagging/_parallel_build_estimators", + "scikit-learn/sklearn.ensemble._bagging/_parallel_predict_proba", + "scikit-learn/sklearn.ensemble._bagging/_parallel_predict_log_proba", + "scikit-learn/sklearn.ensemble._bagging/_parallel_decision_function", + "scikit-learn/sklearn.ensemble._bagging/_parallel_predict_regression" + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._base", + "name": "sklearn.ensemble._base", + "imports": [ + { + "module": "numbers", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "typing", + "declaration": "List", + "alias": null + }, + { + "module": "joblib", + "declaration": "effective_n_jobs", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "clone", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_classifier", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_regressor", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "MetaEstimatorMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "Bunch", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "_print_elapsed_time", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.metaestimators", + "declaration": "_BaseComposition", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.ensemble._base/BaseEnsemble", + "scikit-learn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble" + ], + "functions": [ + "scikit-learn/sklearn.ensemble._base/_fit_single_estimator", + "scikit-learn/sklearn.ensemble._base/_set_random_states", + "scikit-learn/sklearn.ensemble._base/_partition_estimators" + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._forest", + "name": "sklearn.ensemble._forest", + "imports": [ + { + "module": "numbers", + "alias": null + }, + { + "module": "threading", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "warnings", + "declaration": "catch_warnings", + "alias": null + }, + { + "module": "warnings", + "declaration": "simplefilter", + "alias": null + }, + { + "module": "warnings", + "declaration": "warn", + "alias": null + }, + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "issparse", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "hstack", + "alias": "sparse_hstack" + }, + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClassifierMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "MultiOutputMixin", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "r2_score", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "OneHotEncoder", + "alias": null + }, + { + "module": "sklearn.tree", + "declaration": "DecisionTreeClassifier", + "alias": null + }, + { + "module": "sklearn.tree", + "declaration": "DecisionTreeRegressor", + "alias": null + }, + { + "module": "sklearn.tree", + "declaration": "ExtraTreeClassifier", + "alias": null + }, + { + "module": "sklearn.tree", + "declaration": "ExtraTreeRegressor", + "alias": null + }, + { + "module": "sklearn.tree._tree", + "declaration": "DTYPE", + "alias": null + }, + { + "module": "sklearn.tree._tree", + "declaration": "DOUBLE", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "compute_sample_weight", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "DataConversionWarning", + "alias": null + }, + { + "module": "sklearn.ensemble._base", + "declaration": "BaseEnsemble", + "alias": null + }, + { + "module": "sklearn.ensemble._base", + "declaration": "_partition_estimators", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "_joblib_parallel_args", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "check_classification_targets", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.ensemble._forest/BaseForest", + "scikit-learn/sklearn.ensemble._forest/ForestClassifier", + "scikit-learn/sklearn.ensemble._forest/ForestRegressor", + "scikit-learn/sklearn.ensemble._forest/RandomForestClassifier", + "scikit-learn/sklearn.ensemble._forest/RandomForestRegressor", + "scikit-learn/sklearn.ensemble._forest/ExtraTreesClassifier", + "scikit-learn/sklearn.ensemble._forest/ExtraTreesRegressor", + "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding" + ], + "functions": [ + "scikit-learn/sklearn.ensemble._forest/_get_n_samples_bootstrap", + "scikit-learn/sklearn.ensemble._forest/_generate_sample_indices", + "scikit-learn/sklearn.ensemble._forest/_generate_unsampled_indices", + "scikit-learn/sklearn.ensemble._forest/_parallel_build_trees", + "scikit-learn/sklearn.ensemble._forest/_accumulate_prediction" + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._gb", + "name": "sklearn.ensemble._gb", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numbers", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "sklearn.ensemble._base", + "declaration": "BaseEnsemble", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClassifierMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_classifier", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "deprecated", + "alias": null + }, + { + "module": "sklearn.ensemble._gradient_boosting", + "declaration": "predict_stages", + "alias": null + }, + { + "module": "sklearn.ensemble._gradient_boosting", + "declaration": "predict_stage", + "alias": null + }, + { + "module": "sklearn.ensemble._gradient_boosting", + "declaration": "_random_sample_mask", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "csc_matrix", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "csr_matrix", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "issparse", + "alias": null + }, + { + "module": "time", + "declaration": "time", + "alias": null + }, + { + "module": "sklearn.model_selection", + "declaration": "train_test_split", + "alias": null + }, + { + "module": "sklearn.tree", + "declaration": "DecisionTreeRegressor", + "alias": null + }, + { + "module": "sklearn.tree._tree", + "declaration": "DTYPE", + "alias": null + }, + { + "module": "sklearn.tree._tree", + "declaration": "DOUBLE", + "alias": null + }, + { + "module": "sklearn.ensemble", + "declaration": "_gb_losses", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "column_or_1d", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "check_classification_targets", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "NotFittedError", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.ensemble._gb/VerboseReporter", + "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting", + "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier", + "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor" + ], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses", + "name": "sklearn.ensemble._gb_losses", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "expit", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "logsumexp", + "alias": null + }, + { + "module": "sklearn.tree._tree", + "declaration": "TREE_LEAF", + "alias": null + }, + { + "module": "sklearn.utils.stats", + "declaration": "_weighted_percentile", + "alias": null + }, + { + "module": "sklearn.dummy", + "declaration": "DummyClassifier", + "alias": null + }, + { + "module": "sklearn.dummy", + "declaration": "DummyRegressor", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.ensemble._gb_losses/LossFunction", + "scikit-learn/sklearn.ensemble._gb_losses/RegressionLossFunction", + "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError", + "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError", + "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction", + "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction", + "scikit-learn/sklearn.ensemble._gb_losses/ClassificationLossFunction", + "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance", + "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance", + "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss" + ], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting", + "name": "sklearn.ensemble._hist_gradient_boosting", + "imports": [], + "from_imports": [], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning", + "name": "sklearn.ensemble._hist_gradient_boosting.binning", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting._binning", + "declaration": "_map_to_bins", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.common", + "declaration": "X_DTYPE", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.common", + "declaration": "X_BINNED_DTYPE", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.common", + "declaration": "ALMOST_INF", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.common", + "declaration": "X_BITSET_INNER_DTYPE", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting._bitset", + "declaration": "set_bitset_memoryview", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_BinMapper"], + "functions": ["scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_find_binning_thresholds"] + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting", + "name": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABC", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "functools", + "declaration": "partial", + "alias": null + }, + { + "module": "timeit", + "declaration": "default_timer", + "alias": "time" + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClassifierMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_classifier", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "resample", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_consistent_length", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "check_classification_targets", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "check_scoring", + "alias": null + }, + { + "module": "sklearn.model_selection", + "declaration": "train_test_split", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "LabelEncoder", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting._gradient_boosting", + "declaration": "_update_raw_predictions", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.common", + "declaration": "Y_DTYPE", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.common", + "declaration": "X_DTYPE", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.common", + "declaration": "X_BINNED_DTYPE", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.binning", + "declaration": "_BinMapper", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.grower", + "declaration": "TreeGrower", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.loss", + "declaration": "_LOSSES", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.loss", + "declaration": "BaseLoss", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier" + ], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower", + "name": "sklearn.ensemble._hist_gradient_boosting.grower", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "numbers", + "alias": null + } + ], + "from_imports": [ + { + "module": "heapq", + "declaration": "heappush", + "alias": null + }, + { + "module": "heapq", + "declaration": "heappop", + "alias": null + }, + { + "module": "timeit", + "declaration": "default_timer", + "alias": "time" + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.splitting", + "declaration": "Splitter", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.histogram", + "declaration": "HistogramBuilder", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.predictor", + "declaration": "TreePredictor", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.utils", + "declaration": "sum_parallel", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.common", + "declaration": "PREDICTOR_RECORD_DTYPE", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.common", + "declaration": "X_BITSET_INNER_DTYPE", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.common", + "declaration": "Y_DTYPE", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.common", + "declaration": "MonotonicConstraint", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting._bitset", + "declaration": "set_raw_bitset_from_binned_bitset", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeNode", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower" + ], + "functions": ["scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/_fill_predictor_arrays"] + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss", + "name": "sklearn.ensemble._hist_gradient_boosting.loss", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABC", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "expit", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "logsumexp", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "xlogy", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.common", + "declaration": "Y_DTYPE", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.common", + "declaration": "G_H_DTYPE", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting._loss", + "declaration": "_update_gradients_least_squares", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting._loss", + "declaration": "_update_gradients_hessians_least_squares", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting._loss", + "declaration": "_update_gradients_least_absolute_deviation", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting._loss", + "declaration": "_update_gradients_hessians_least_absolute_deviation", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting._loss", + "declaration": "_update_gradients_hessians_binary_crossentropy", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting._loss", + "declaration": "_update_gradients_hessians_categorical_crossentropy", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting._loss", + "declaration": "_update_gradients_hessians_poisson", + "alias": null + }, + { + "module": "sklearn.utils.stats", + "declaration": "_weighted_percentile", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy" + ], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor", + "name": "sklearn.ensemble._hist_gradient_boosting.predictor", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.ensemble._hist_gradient_boosting.common", + "declaration": "Y_DTYPE", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting._predictor", + "declaration": "_predict_from_raw_data", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting._predictor", + "declaration": "_predict_from_binned_data", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting._predictor", + "declaration": "_compute_partial_dependence", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest", + "name": "sklearn.ensemble._iforest", + "imports": [ + { + "module": "numbers", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "scipy.sparse", + "declaration": "issparse", + "alias": null + }, + { + "module": "warnings", + "declaration": "warn", + "alias": null + }, + { + "module": "sklearn.tree", + "declaration": "ExtraTreeRegressor", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "gen_batches", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "get_chunk_n_rows", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "_joblib_parallel_args", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_num_samples", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "OutlierMixin", + "alias": null + }, + { + "module": "sklearn.ensemble._bagging", + "declaration": "BaseBagging", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.ensemble._iforest/IsolationForest"], + "functions": ["scikit-learn/sklearn.ensemble._iforest/_average_path_length"] + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking", + "name": "sklearn.ensemble._stacking", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": "sparse" + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "copy", + "declaration": "deepcopy", + "alias": null + }, + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "clone", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClassifierMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_classifier", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_regressor", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "NotFittedError", + "alias": null + }, + { + "module": "sklearn.utils._estimator_html_repr", + "declaration": "_VisualBlock", + "alias": null + }, + { + "module": "sklearn.ensemble._base", + "declaration": "_fit_single_estimator", + "alias": null + }, + { + "module": "sklearn.ensemble._base", + "declaration": "_BaseHeterogeneousEnsemble", + "alias": null + }, + { + "module": "sklearn.linear_model", + "declaration": "LogisticRegression", + "alias": null + }, + { + "module": "sklearn.linear_model", + "declaration": "RidgeCV", + "alias": null + }, + { + "module": "sklearn.model_selection", + "declaration": "cross_val_predict", + "alias": null + }, + { + "module": "sklearn.model_selection", + "declaration": "check_cv", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "LabelEncoder", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "Bunch", + "alias": null + }, + { + "module": "sklearn.utils.metaestimators", + "declaration": "if_delegate_has_method", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "check_classification_targets", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "column_or_1d", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.ensemble._stacking/_BaseStacking", + "scikit-learn/sklearn.ensemble._stacking/StackingClassifier", + "scikit-learn/sklearn.ensemble._stacking/StackingRegressor" + ], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._voting", + "name": "sklearn.ensemble._voting", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClassifierMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "clone", + "alias": null + }, + { + "module": "sklearn.ensemble._base", + "declaration": "_fit_single_estimator", + "alias": null + }, + { + "module": "sklearn.ensemble._base", + "declaration": "_BaseHeterogeneousEnsemble", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "LabelEncoder", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "Bunch", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "check_classification_targets", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "column_or_1d", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "NotFittedError", + "alias": null + }, + { + "module": "sklearn.utils._estimator_html_repr", + "declaration": "_VisualBlock", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.ensemble._voting/_BaseVoting", + "scikit-learn/sklearn.ensemble._voting/VotingClassifier", + "scikit-learn/sklearn.ensemble._voting/VotingRegressor" + ], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting", + "name": "sklearn.ensemble._weight_boosting", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "xlogy", + "alias": null + }, + { + "module": "sklearn.ensemble._base", + "declaration": "BaseEnsemble", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClassifierMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_classifier", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_regressor", + "alias": null + }, + { + "module": "sklearn.tree", + "declaration": "DecisionTreeClassifier", + "alias": null + }, + { + "module": "sklearn.tree", + "declaration": "DecisionTreeRegressor", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "_safe_indexing", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "softmax", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "stable_cumsum", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "accuracy_score", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "r2_score", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "has_fit_parameter", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_num_samples", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting", + "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier", + "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor" + ], + "functions": ["scikit-learn/sklearn.ensemble._weight_boosting/_samme_proba"] + }, + { + "id": "scikit-learn/sklearn.ensemble.setup", + "name": "sklearn.ensemble.setup", + "imports": [ + { + "module": "numpy", + "alias": null + } + ], + "from_imports": [ + { + "module": "numpy.distutils.misc_util", + "declaration": "Configuration", + "alias": null + }, + { + "module": "numpy.distutils.core", + "declaration": "setup", + "alias": null + } + ], + "classes": [], + "functions": ["scikit-learn/sklearn.ensemble.setup/configuration"] + }, + { + "id": "scikit-learn/sklearn.exceptions", + "name": "sklearn.exceptions", + "imports": [], + "from_imports": [ + { + "module": "sklearn.utils.deprecation", + "declaration": "deprecated", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.exceptions/NotFittedError", + "scikit-learn/sklearn.exceptions/ChangedBehaviorWarning", + "scikit-learn/sklearn.exceptions/ConvergenceWarning", + "scikit-learn/sklearn.exceptions/DataConversionWarning", + "scikit-learn/sklearn.exceptions/DataDimensionalityWarning", + "scikit-learn/sklearn.exceptions/EfficiencyWarning", + "scikit-learn/sklearn.exceptions/FitFailedWarning", + "scikit-learn/sklearn.exceptions/NonBLASDotWarning", + "scikit-learn/sklearn.exceptions/SkipTestWarning", + "scikit-learn/sklearn.exceptions/UndefinedMetricWarning", + "scikit-learn/sklearn.exceptions/PositiveSpectrumWarning" + ], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.experimental", + "name": "sklearn.experimental", + "imports": [], + "from_imports": [], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.experimental.enable_halving_search_cv", + "name": "sklearn.experimental.enable_halving_search_cv", + "imports": [], + "from_imports": [ + { + "module": "sklearn.model_selection._search_successive_halving", + "declaration": "HalvingRandomSearchCV", + "alias": null + }, + { + "module": "sklearn.model_selection._search_successive_halving", + "declaration": "HalvingGridSearchCV", + "alias": null + }, + { + "module": "sklearn", + "declaration": "model_selection", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.experimental.enable_hist_gradient_boosting", + "name": "sklearn.experimental.enable_hist_gradient_boosting", + "imports": [], + "from_imports": [ + { + "module": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting", + "declaration": "HistGradientBoostingClassifier", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting", + "declaration": "HistGradientBoostingRegressor", + "alias": null + }, + { + "module": "sklearn", + "declaration": "ensemble", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.experimental.enable_iterative_imputer", + "name": "sklearn.experimental.enable_iterative_imputer", + "imports": [], + "from_imports": [ + { + "module": "sklearn.impute._iterative", + "declaration": "IterativeImputer", + "alias": null + }, + { + "module": "sklearn", + "declaration": "impute", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.externals", + "name": "sklearn.externals", + "imports": [], + "from_imports": [], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.externals._arff", + "name": "sklearn.externals._arff", + "imports": [ + { + "module": "re", + "alias": null + }, + { + "module": "sys", + "alias": null + }, + { + "module": "csv", + "alias": null + }, + { + "module": "typing", + "alias": null + } + ], + "from_imports": [ + { + "module": "typing", + "declaration": "Optional", + "alias": null + }, + { + "module": "typing", + "declaration": "List", + "alias": null + }, + { + "module": "typing", + "declaration": "Dict", + "alias": null + }, + { + "module": "typing", + "declaration": "Any", + "alias": null + }, + { + "module": "typing", + "declaration": "Iterator", + "alias": null + }, + { + "module": "typing", + "declaration": "Union", + "alias": null + }, + { + "module": "typing", + "declaration": "Tuple", + "alias": null + }, + { + "module": "typing_extensions", + "declaration": "TypedDict", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.externals._arff/ArffContainerType", + "scikit-learn/sklearn.externals._arff/ArffException", + "scikit-learn/sklearn.externals._arff/BadRelationFormat", + "scikit-learn/sklearn.externals._arff/BadAttributeFormat", + "scikit-learn/sklearn.externals._arff/BadDataFormat", + "scikit-learn/sklearn.externals._arff/BadAttributeType", + "scikit-learn/sklearn.externals._arff/BadAttributeName", + "scikit-learn/sklearn.externals._arff/BadNominalValue", + "scikit-learn/sklearn.externals._arff/BadNominalFormatting", + "scikit-learn/sklearn.externals._arff/BadNumericalValue", + "scikit-learn/sklearn.externals._arff/BadStringValue", + "scikit-learn/sklearn.externals._arff/BadLayout", + "scikit-learn/sklearn.externals._arff/BadObject", + "scikit-learn/sklearn.externals._arff/EncodedNominalConversor", + "scikit-learn/sklearn.externals._arff/NominalConversor", + "scikit-learn/sklearn.externals._arff/DenseGeneratorData", + "scikit-learn/sklearn.externals._arff/_DataListMixin", + "scikit-learn/sklearn.externals._arff/Data", + "scikit-learn/sklearn.externals._arff/COOData", + "scikit-learn/sklearn.externals._arff/LODGeneratorData", + "scikit-learn/sklearn.externals._arff/LODData", + "scikit-learn/sklearn.externals._arff/ArffDecoder", + "scikit-learn/sklearn.externals._arff/ArffEncoder" + ], + "functions": [ + "scikit-learn/sklearn.externals._arff/_build_re_values", + "scikit-learn/sklearn.externals._arff/_escape_sub_callback", + "scikit-learn/sklearn.externals._arff/_unquote", + "scikit-learn/sklearn.externals._arff/_parse_values", + "scikit-learn/sklearn.externals._arff/_unescape_sub_callback", + "scikit-learn/sklearn.externals._arff/encode_string", + "scikit-learn/sklearn.externals._arff/_get_data_object_for_decoding", + "scikit-learn/sklearn.externals._arff/_get_data_object_for_encoding", + "scikit-learn/sklearn.externals._arff/load", + "scikit-learn/sklearn.externals._arff/loads", + "scikit-learn/sklearn.externals._arff/dump", + "scikit-learn/sklearn.externals._arff/dumps" + ] + }, + { + "id": "scikit-learn/sklearn.externals._lobpcg", + "name": "sklearn.externals._lobpcg", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "__future__", + "declaration": "division", + "alias": null + }, + { + "module": "__future__", + "declaration": "print_function", + "alias": null + }, + { + "module": "__future__", + "declaration": "absolute_import", + "alias": null + }, + { + "module": "scipy.linalg", + "declaration": "inv", + "alias": null + }, + { + "module": "scipy.linalg", + "declaration": "eigh", + "alias": null + }, + { + "module": "scipy.linalg", + "declaration": "cho_factor", + "alias": null + }, + { + "module": "scipy.linalg", + "declaration": "cho_solve", + "alias": null + }, + { + "module": "scipy.linalg", + "declaration": "cholesky", + "alias": null + }, + { + "module": "scipy.linalg", + "declaration": "orth", + "alias": null + }, + { + "module": "scipy.linalg", + "declaration": "LinAlgError", + "alias": null + }, + { + "module": "scipy.sparse.linalg", + "declaration": "aslinearoperator", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.externals._lobpcg/bmat", + "scikit-learn/sklearn.externals._lobpcg/_save", + "scikit-learn/sklearn.externals._lobpcg/_report_nonhermitian", + "scikit-learn/sklearn.externals._lobpcg/_as2d", + "scikit-learn/sklearn.externals._lobpcg/_makeOperator", + "scikit-learn/sklearn.externals._lobpcg/_applyConstraints", + "scikit-learn/sklearn.externals._lobpcg/_b_orthonormalize", + "scikit-learn/sklearn.externals._lobpcg/_get_indx", + "scikit-learn/sklearn.externals._lobpcg/lobpcg" + ] + }, + { + "id": "scikit-learn/sklearn.externals._pep562", + "name": "sklearn.externals._pep562", + "imports": [ + { + "module": "sys", + "alias": null + } + ], + "from_imports": [ + { + "module": "__future__", + "declaration": "unicode_literals", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.externals._pep562/Pep562"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.externals._pilutil", + "name": "sklearn.externals._pilutil", + "imports": [ + { + "module": "numpy", + "alias": null + } + ], + "from_imports": [ + { + "module": "PIL", + "declaration": "Image", + "alias": null + }, + { + "module": "__future__", + "declaration": "division", + "alias": null + }, + { + "module": "__future__", + "declaration": "print_function", + "alias": null + }, + { + "module": "__future__", + "declaration": "absolute_import", + "alias": null + }, + { + "module": "numpy", + "declaration": "amin", + "alias": null + }, + { + "module": "numpy", + "declaration": "amax", + "alias": null + }, + { + "module": "numpy", + "declaration": "ravel", + "alias": null + }, + { + "module": "numpy", + "declaration": "asarray", + "alias": null + }, + { + "module": "numpy", + "declaration": "arange", + "alias": null + }, + { + "module": "numpy", + "declaration": "ones", + "alias": null + }, + { + "module": "numpy", + "declaration": "newaxis", + "alias": null + }, + { + "module": "numpy", + "declaration": "transpose", + "alias": null + }, + { + "module": "numpy", + "declaration": "iscomplexobj", + "alias": null + }, + { + "module": "numpy", + "declaration": "uint8", + "alias": null + }, + { + "module": "numpy", + "declaration": "issubdtype", + "alias": null + }, + { + "module": "numpy", + "declaration": "array", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.externals._pilutil/bytescale", + "scikit-learn/sklearn.externals._pilutil/imread", + "scikit-learn/sklearn.externals._pilutil/imsave", + "scikit-learn/sklearn.externals._pilutil/fromimage", + "scikit-learn/sklearn.externals._pilutil/toimage", + "scikit-learn/sklearn.externals._pilutil/imresize" + ] + }, + { + "id": "scikit-learn/sklearn.externals.conftest", + "name": "sklearn.externals.conftest", + "imports": [], + "from_imports": [], + "classes": [], + "functions": ["scikit-learn/sklearn.externals.conftest/pytest_ignore_collect"] + }, + { + "id": "scikit-learn/sklearn.feature_extraction", + "name": "sklearn.feature_extraction", + "imports": [], + "from_imports": [ + { + "module": "sklearn.feature_extraction._dict_vectorizer", + "declaration": "DictVectorizer", + "alias": null + }, + { + "module": "sklearn.feature_extraction._hash", + "declaration": "FeatureHasher", + "alias": null + }, + { + "module": "sklearn.feature_extraction.image", + "declaration": "img_to_graph", + "alias": null + }, + { + "module": "sklearn.feature_extraction.image", + "declaration": "grid_to_graph", + "alias": null + }, + { + "module": "sklearn.feature_extraction", + "declaration": "text", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer", + "name": "sklearn.feature_extraction._dict_vectorizer", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": "sp" + } + ], + "from_imports": [ + { + "module": "array", + "declaration": "array", + "alias": null + }, + { + "module": "collections.abc", + "declaration": "Mapping", + "alias": null + }, + { + "module": "collections.abc", + "declaration": "Iterable", + "alias": null + }, + { + "module": "operator", + "declaration": "itemgetter", + "alias": null + }, + { + "module": "numbers", + "declaration": "Number", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "tosequence", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer"], + "functions": ["scikit-learn/sklearn.feature_extraction._dict_vectorizer/_tosequence"] + }, + { + "id": "scikit-learn/sklearn.feature_extraction._hash", + "name": "sklearn.feature_extraction._hash", + "imports": [ + { + "module": "numbers", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": "sp" + } + ], + "from_imports": [ + { + "module": "sklearn.feature_extraction._hashing_fast", + "declaration": "transform", + "alias": "_hashing_transform" + }, + { + "module": "sklearn.utils", + "declaration": "IS_PYPY", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.feature_extraction._hash/FeatureHasher"], + "functions": [ + "scikit-learn/sklearn.feature_extraction._hash/_hashing_transform", + "scikit-learn/sklearn.feature_extraction._hash/_iteritems" + ] + }, + { + "id": "scikit-learn/sklearn.feature_extraction._stop_words", + "name": "sklearn.feature_extraction._stop_words", + "imports": [], + "from_imports": [], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image", + "name": "sklearn.feature_extraction.image", + "imports": [ + { + "module": "numbers", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "itertools", + "declaration": "product", + "alias": null + }, + { + "module": "scipy", + "declaration": "sparse", + "alias": null + }, + { + "module": "numpy.lib.stride_tricks", + "declaration": "as_strided", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.feature_extraction.image/PatchExtractor"], + "functions": [ + "scikit-learn/sklearn.feature_extraction.image/_make_edges_3d", + "scikit-learn/sklearn.feature_extraction.image/_compute_gradient_3d", + "scikit-learn/sklearn.feature_extraction.image/_mask_edges_weights", + "scikit-learn/sklearn.feature_extraction.image/_to_graph", + "scikit-learn/sklearn.feature_extraction.image/img_to_graph", + "scikit-learn/sklearn.feature_extraction.image/grid_to_graph", + "scikit-learn/sklearn.feature_extraction.image/_compute_n_patches", + "scikit-learn/sklearn.feature_extraction.image/_extract_patches", + "scikit-learn/sklearn.feature_extraction.image/extract_patches_2d", + "scikit-learn/sklearn.feature_extraction.image/reconstruct_from_patches_2d" + ] + }, + { + "id": "scikit-learn/sklearn.feature_extraction.setup", + "name": "sklearn.feature_extraction.setup", + "imports": [ + { + "module": "os", + "alias": null + }, + { + "module": "platform", + "alias": null + } + ], + "from_imports": [], + "classes": [], + "functions": ["scikit-learn/sklearn.feature_extraction.setup/configuration"] + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text", + "name": "sklearn.feature_extraction.text", + "imports": [ + { + "module": "array", + "alias": null + }, + { + "module": "numbers", + "alias": null + }, + { + "module": "re", + "alias": null + }, + { + "module": "unicodedata", + "alias": null + }, + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": "sp" + } + ], + "from_imports": [ + { + "module": "collections", + "declaration": "defaultdict", + "alias": null + }, + { + "module": "collections.abc", + "declaration": "Mapping", + "alias": null + }, + { + "module": "functools", + "declaration": "partial", + "alias": null + }, + { + "module": "operator", + "declaration": "itemgetter", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "normalize", + "alias": null + }, + { + "module": "sklearn.feature_extraction._hash", + "declaration": "FeatureHasher", + "alias": null + }, + { + "module": "sklearn.feature_extraction._stop_words", + "declaration": "ENGLISH_STOP_WORDS", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "FLOAT_DTYPES", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "_IS_32BIT", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "_astype_copy_false", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "NotFittedError", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin", + "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer", + "scikit-learn/sklearn.feature_extraction.text/CountVectorizer", + "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer", + "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer" + ], + "functions": [ + "scikit-learn/sklearn.feature_extraction.text/_preprocess", + "scikit-learn/sklearn.feature_extraction.text/_analyze", + "scikit-learn/sklearn.feature_extraction.text/strip_accents_unicode", + "scikit-learn/sklearn.feature_extraction.text/strip_accents_ascii", + "scikit-learn/sklearn.feature_extraction.text/strip_tags", + "scikit-learn/sklearn.feature_extraction.text/_check_stop_list", + "scikit-learn/sklearn.feature_extraction.text/_document_frequency", + "scikit-learn/sklearn.feature_extraction.text/_make_int_array" + ] + }, + { + "id": "scikit-learn/sklearn.feature_selection", + "name": "sklearn.feature_selection", + "imports": [], + "from_imports": [ + { + "module": "sklearn.feature_selection._univariate_selection", + "declaration": "chi2", + "alias": null + }, + { + "module": "sklearn.feature_selection._univariate_selection", + "declaration": "f_classif", + "alias": null + }, + { + "module": "sklearn.feature_selection._univariate_selection", + "declaration": "f_oneway", + "alias": null + }, + { + "module": "sklearn.feature_selection._univariate_selection", + "declaration": "f_regression", + "alias": null + }, + { + "module": "sklearn.feature_selection._univariate_selection", + "declaration": "SelectPercentile", + "alias": null + }, + { + "module": "sklearn.feature_selection._univariate_selection", + "declaration": "SelectKBest", + "alias": null + }, + { + "module": "sklearn.feature_selection._univariate_selection", + "declaration": "SelectFpr", + "alias": null + }, + { + "module": "sklearn.feature_selection._univariate_selection", + "declaration": "SelectFdr", + "alias": null + }, + { + "module": "sklearn.feature_selection._univariate_selection", + "declaration": "SelectFwe", + "alias": null + }, + { + "module": "sklearn.feature_selection._univariate_selection", + "declaration": "GenericUnivariateSelect", + "alias": null + }, + { + "module": "sklearn.feature_selection._variance_threshold", + "declaration": "VarianceThreshold", + "alias": null + }, + { + "module": "sklearn.feature_selection._rfe", + "declaration": "RFE", + "alias": null + }, + { + "module": "sklearn.feature_selection._rfe", + "declaration": "RFECV", + "alias": null + }, + { + "module": "sklearn.feature_selection._from_model", + "declaration": "SelectFromModel", + "alias": null + }, + { + "module": "sklearn.feature_selection._sequential", + "declaration": "SequentialFeatureSelector", + "alias": null + }, + { + "module": "sklearn.feature_selection._mutual_info", + "declaration": "mutual_info_regression", + "alias": null + }, + { + "module": "sklearn.feature_selection._mutual_info", + "declaration": "mutual_info_classif", + "alias": null + }, + { + "module": "sklearn.feature_selection._base", + "declaration": "SelectorMixin", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.feature_selection._base", + "name": "sklearn.feature_selection._base", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "warnings", + "declaration": "warn", + "alias": null + }, + { + "module": "operator", + "declaration": "attrgetter", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "issparse", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "csc_matrix", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "safe_mask", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "safe_sqr", + "alias": null + }, + { + "module": "sklearn.utils._tags", + "declaration": "_safe_tags", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.feature_selection._base/SelectorMixin"], + "functions": ["scikit-learn/sklearn.feature_selection._base/_get_feature_importances"] + }, + { + "id": "scikit-learn/sklearn.feature_selection._from_model", + "name": "sklearn.feature_selection._from_model", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "numbers", + "alias": null + } + ], + "from_imports": [ + { + "module": "sklearn.feature_selection._base", + "declaration": "SelectorMixin", + "alias": null + }, + { + "module": "sklearn.feature_selection._base", + "declaration": "_get_feature_importances", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "clone", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "MetaEstimatorMixin", + "alias": null + }, + { + "module": "sklearn.utils._tags", + "declaration": "_safe_tags", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "NotFittedError", + "alias": null + }, + { + "module": "sklearn.utils.metaestimators", + "declaration": "if_delegate_has_method", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.feature_selection._from_model/SelectFromModel"], + "functions": ["scikit-learn/sklearn.feature_selection._from_model/_calculate_threshold"] + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info", + "name": "sklearn.feature_selection._mutual_info", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "scipy.sparse", + "declaration": "issparse", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "digamma", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "mutual_info_score", + "alias": null + }, + { + "module": "sklearn.neighbors", + "declaration": "NearestNeighbors", + "alias": null + }, + { + "module": "sklearn.neighbors", + "declaration": "KDTree", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "scale", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "_astype_copy_false", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_X_y", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "check_classification_targets", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.feature_selection._mutual_info/_compute_mi_cc", + "scikit-learn/sklearn.feature_selection._mutual_info/_compute_mi_cd", + "scikit-learn/sklearn.feature_selection._mutual_info/_compute_mi", + "scikit-learn/sklearn.feature_selection._mutual_info/_iterate_columns", + "scikit-learn/sklearn.feature_selection._mutual_info/_estimate_mi", + "scikit-learn/sklearn.feature_selection._mutual_info/mutual_info_regression", + "scikit-learn/sklearn.feature_selection._mutual_info/mutual_info_classif" + ] + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe", + "name": "sklearn.feature_selection._rfe", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "numbers", + "alias": null + } + ], + "from_imports": [ + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "joblib", + "declaration": "effective_n_jobs", + "alias": null + }, + { + "module": "sklearn.utils.metaestimators", + "declaration": "if_delegate_has_method", + "alias": null + }, + { + "module": "sklearn.utils.metaestimators", + "declaration": "_safe_split", + "alias": null + }, + { + "module": "sklearn.utils._tags", + "declaration": "_safe_tags", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "MetaEstimatorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "clone", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_classifier", + "alias": null + }, + { + "module": "sklearn.model_selection", + "declaration": "check_cv", + "alias": null + }, + { + "module": "sklearn.model_selection._validation", + "declaration": "_score", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "check_scoring", + "alias": null + }, + { + "module": "sklearn.feature_selection._base", + "declaration": "SelectorMixin", + "alias": null + }, + { + "module": "sklearn.feature_selection._base", + "declaration": "_get_feature_importances", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.feature_selection._rfe/RFE", + "scikit-learn/sklearn.feature_selection._rfe/RFECV" + ], + "functions": ["scikit-learn/sklearn.feature_selection._rfe/_rfe_single_fit"] + }, + { + "id": "scikit-learn/sklearn.feature_selection._sequential", + "name": "sklearn.feature_selection._sequential", + "imports": [ + { + "module": "numbers", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.feature_selection._base", + "declaration": "SelectorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "MetaEstimatorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "clone", + "alias": null + }, + { + "module": "sklearn.utils._tags", + "declaration": "_safe_tags", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.model_selection", + "declaration": "cross_val_score", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection", + "name": "sklearn.feature_selection._univariate_selection", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "warnings", + "alias": null + } + ], + "from_imports": [ + { + "module": "scipy", + "declaration": "special", + "alias": null + }, + { + "module": "scipy", + "declaration": "stats", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "issparse", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "LabelBinarizer", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "as_float_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_X_y", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "safe_sqr", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "safe_mask", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "safe_sparse_dot", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "row_norms", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.feature_selection._base", + "declaration": "SelectorMixin", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.feature_selection._univariate_selection/_BaseFilter", + "scikit-learn/sklearn.feature_selection._univariate_selection/SelectPercentile", + "scikit-learn/sklearn.feature_selection._univariate_selection/SelectKBest", + "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFpr", + "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFdr", + "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFwe", + "scikit-learn/sklearn.feature_selection._univariate_selection/GenericUnivariateSelect" + ], + "functions": [ + "scikit-learn/sklearn.feature_selection._univariate_selection/_clean_nans", + "scikit-learn/sklearn.feature_selection._univariate_selection/f_oneway", + "scikit-learn/sklearn.feature_selection._univariate_selection/f_classif", + "scikit-learn/sklearn.feature_selection._univariate_selection/_chisquare", + "scikit-learn/sklearn.feature_selection._univariate_selection/chi2", + "scikit-learn/sklearn.feature_selection._univariate_selection/f_regression" + ] + }, + { + "id": "scikit-learn/sklearn.feature_selection._variance_threshold", + "name": "sklearn.feature_selection._variance_threshold", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.feature_selection._base", + "declaration": "SelectorMixin", + "alias": null + }, + { + "module": "sklearn.utils.sparsefuncs", + "declaration": "mean_variance_axis", + "alias": null + }, + { + "module": "sklearn.utils.sparsefuncs", + "declaration": "min_max_axis", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.feature_selection._variance_threshold/VarianceThreshold"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.gaussian_process", + "name": "sklearn.gaussian_process", + "imports": [], + "from_imports": [ + { + "module": "sklearn.gaussian_process._gpr", + "declaration": "GaussianProcessRegressor", + "alias": null + }, + { + "module": "sklearn.gaussian_process._gpc", + "declaration": "GaussianProcessClassifier", + "alias": null + }, + { + "module": "sklearn.gaussian_process", + "declaration": "kernels", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc", + "name": "sklearn.gaussian_process._gpc", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.optimize", + "alias": null + } + ], + "from_imports": [ + { + "module": "operator", + "declaration": "itemgetter", + "alias": null + }, + { + "module": "scipy.linalg", + "declaration": "cholesky", + "alias": null + }, + { + "module": "scipy.linalg", + "declaration": "cho_solve", + "alias": null + }, + { + "module": "scipy.linalg", + "declaration": "solve", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "erf", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "expit", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClassifierMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "clone", + "alias": null + }, + { + "module": "sklearn.gaussian_process.kernels", + "declaration": "RBF", + "alias": null + }, + { + "module": "sklearn.gaussian_process.kernels", + "declaration": "CompoundKernel", + "alias": null + }, + { + "module": "sklearn.gaussian_process.kernels", + "declaration": "ConstantKernel", + "alias": "C" + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.optimize", + "declaration": "_check_optimize_result", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "LabelEncoder", + "alias": null + }, + { + "module": "sklearn.multiclass", + "declaration": "OneVsRestClassifier", + "alias": null + }, + { + "module": "sklearn.multiclass", + "declaration": "OneVsOneClassifier", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace", + "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier" + ], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr", + "name": "sklearn.gaussian_process._gpr", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.optimize", + "alias": null + } + ], + "from_imports": [ + { + "module": "operator", + "declaration": "itemgetter", + "alias": null + }, + { + "module": "scipy.linalg", + "declaration": "cholesky", + "alias": null + }, + { + "module": "scipy.linalg", + "declaration": "cho_solve", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "clone", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "MultiOutputMixin", + "alias": null + }, + { + "module": "sklearn.gaussian_process.kernels", + "declaration": "RBF", + "alias": null + }, + { + "module": "sklearn.gaussian_process.kernels", + "declaration": "ConstantKernel", + "alias": "C" + }, + { + "module": "sklearn.preprocessing._data", + "declaration": "_handle_zeros_in_scale", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.optimize", + "declaration": "_check_optimize_result", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels", + "name": "sklearn.gaussian_process.kernels", + "imports": [ + { + "module": "math", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "warnings", + "alias": null + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "collections", + "declaration": "namedtuple", + "alias": null + }, + { + "module": "inspect", + "declaration": "signature", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "kv", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "gamma", + "alias": null + }, + { + "module": "scipy.spatial.distance", + "declaration": "pdist", + "alias": null + }, + { + "module": "scipy.spatial.distance", + "declaration": "cdist", + "alias": null + }, + { + "module": "scipy.spatial.distance", + "declaration": "squareform", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "pairwise_kernels", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "clone", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_num_samples", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "ConvergenceWarning", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.gaussian_process.kernels/Hyperparameter", + "scikit-learn/sklearn.gaussian_process.kernels/Kernel", + "scikit-learn/sklearn.gaussian_process.kernels/NormalizedKernelMixin", + "scikit-learn/sklearn.gaussian_process.kernels/StationaryKernelMixin", + "scikit-learn/sklearn.gaussian_process.kernels/GenericKernelMixin", + "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel", + "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator", + "scikit-learn/sklearn.gaussian_process.kernels/Sum", + "scikit-learn/sklearn.gaussian_process.kernels/Product", + "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation", + "scikit-learn/sklearn.gaussian_process.kernels/ConstantKernel", + "scikit-learn/sklearn.gaussian_process.kernels/WhiteKernel", + "scikit-learn/sklearn.gaussian_process.kernels/RBF", + "scikit-learn/sklearn.gaussian_process.kernels/Matern", + "scikit-learn/sklearn.gaussian_process.kernels/RationalQuadratic", + "scikit-learn/sklearn.gaussian_process.kernels/ExpSineSquared", + "scikit-learn/sklearn.gaussian_process.kernels/DotProduct", + "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel" + ], + "functions": [ + "scikit-learn/sklearn.gaussian_process.kernels/_check_length_scale", + "scikit-learn/sklearn.gaussian_process.kernels/_approx_fprime" + ] + }, + { + "id": "scikit-learn/sklearn.impute", + "name": "sklearn.impute", + "imports": [ + { + "module": "typing", + "alias": null + } + ], + "from_imports": [ + { + "module": "sklearn.impute._base", + "declaration": "MissingIndicator", + "alias": null + }, + { + "module": "sklearn.impute._base", + "declaration": "SimpleImputer", + "alias": null + }, + { + "module": "sklearn.impute._knn", + "declaration": "KNNImputer", + "alias": null + }, + { + "module": "sklearn.impute._iterative", + "declaration": "IterativeImputer", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.impute._base", + "name": "sklearn.impute._base", + "imports": [ + { + "module": "numbers", + "alias": null + }, + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "numpy.ma", + "alias": "ma" + } + ], + "from_imports": [ + { + "module": "collections", + "declaration": "Counter", + "alias": null + }, + { + "module": "scipy", + "declaration": "sparse", + "alias": "sp" + }, + { + "module": "scipy", + "declaration": "stats", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.utils.sparsefuncs", + "declaration": "_get_median", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "FLOAT_DTYPES", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils._mask", + "declaration": "_get_mask", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "is_scalar_nan", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.impute._base/_BaseImputer", + "scikit-learn/sklearn.impute._base/SimpleImputer", + "scikit-learn/sklearn.impute._base/MissingIndicator" + ], + "functions": [ + "scikit-learn/sklearn.impute._base/_check_inputs_dtype", + "scikit-learn/sklearn.impute._base/_most_frequent" + ] + }, + { + "id": "scikit-learn/sklearn.impute._iterative", + "name": "sklearn.impute._iterative", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "time", + "declaration": "time", + "alias": null + }, + { + "module": "collections", + "declaration": "namedtuple", + "alias": null + }, + { + "module": "scipy", + "declaration": "stats", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "clone", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "ConvergenceWarning", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "normalize", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "_safe_indexing", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "is_scalar_nan", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "FLOAT_DTYPES", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils._mask", + "declaration": "_get_mask", + "alias": null + }, + { + "module": "sklearn.impute._base", + "declaration": "_BaseImputer", + "alias": null + }, + { + "module": "sklearn.impute._base", + "declaration": "SimpleImputer", + "alias": null + }, + { + "module": "sklearn.impute._base", + "declaration": "_check_inputs_dtype", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.impute._iterative/IterativeImputer"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.impute._knn", + "name": "sklearn.impute._knn", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.impute._base", + "declaration": "_BaseImputer", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "FLOAT_DTYPES", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "pairwise_distances_chunked", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "_NAN_METRICS", + "alias": null + }, + { + "module": "sklearn.neighbors._base", + "declaration": "_get_weights", + "alias": null + }, + { + "module": "sklearn.neighbors._base", + "declaration": "_check_weights", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "is_scalar_nan", + "alias": null + }, + { + "module": "sklearn.utils._mask", + "declaration": "_get_mask", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.impute._knn/KNNImputer"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.inspection", + "name": "sklearn.inspection", + "imports": [], + "from_imports": [ + { + "module": "sklearn.inspection._permutation_importance", + "declaration": "permutation_importance", + "alias": null + }, + { + "module": "sklearn.inspection._partial_dependence", + "declaration": "partial_dependence", + "alias": null + }, + { + "module": "sklearn.inspection._plot.partial_dependence", + "declaration": "plot_partial_dependence", + "alias": null + }, + { + "module": "sklearn.inspection._plot.partial_dependence", + "declaration": "PartialDependenceDisplay", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.inspection._partial_dependence", + "name": "sklearn.inspection._partial_dependence", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "collections.abc", + "declaration": "Iterable", + "alias": null + }, + { + "module": "scipy", + "declaration": "sparse", + "alias": null + }, + { + "module": "scipy.stats.mstats", + "declaration": "mquantiles", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_classifier", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_regressor", + "alias": null + }, + { + "module": "sklearn.pipeline", + "declaration": "Pipeline", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "cartesian", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_matplotlib_support", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "_safe_indexing", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "_determine_key_type", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "_get_column_indices", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "Bunch", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.tree", + "declaration": "DecisionTreeRegressor", + "alias": null + }, + { + "module": "sklearn.ensemble", + "declaration": "RandomForestRegressor", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "NotFittedError", + "alias": null + }, + { + "module": "sklearn.ensemble._gb", + "declaration": "BaseGradientBoosting", + "alias": null + }, + { + "module": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting", + "declaration": "BaseHistGradientBoosting", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.inspection._partial_dependence/_grid_from_X", + "scikit-learn/sklearn.inspection._partial_dependence/_partial_dependence_recursion", + "scikit-learn/sklearn.inspection._partial_dependence/_partial_dependence_brute", + "scikit-learn/sklearn.inspection._partial_dependence/partial_dependence" + ] + }, + { + "id": "scikit-learn/sklearn.inspection._permutation_importance", + "name": "sklearn.inspection._permutation_importance", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "check_scoring", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "Bunch", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.inspection._permutation_importance/_weights_scorer", + "scikit-learn/sklearn.inspection._permutation_importance/_calculate_permutation_scores", + "scikit-learn/sklearn.inspection._permutation_importance/permutation_importance" + ] + }, + { + "id": "scikit-learn/sklearn.inspection._plot", + "name": "sklearn.inspection._plot", + "imports": [], + "from_imports": [], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence", + "name": "sklearn.inspection._plot.partial_dependence", + "imports": [ + { + "module": "numbers", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "itertools", + "declaration": "chain", + "alias": null + }, + { + "module": "math", + "declaration": "ceil", + "alias": null + }, + { + "module": "scipy", + "declaration": "sparse", + "alias": null + }, + { + "module": "scipy.stats.mstats", + "declaration": "mquantiles", + "alias": null + }, + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "sklearn.inspection", + "declaration": "partial_dependence", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_regressor", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_matplotlib_support", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "_safe_indexing", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay"], + "functions": ["scikit-learn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence"] + }, + { + "id": "scikit-learn/sklearn.inspection.setup", + "name": "sklearn.inspection.setup", + "imports": [], + "from_imports": [ + { + "module": "numpy.distutils.misc_util", + "declaration": "Configuration", + "alias": null + }, + { + "module": "numpy.distutils.core", + "declaration": "setup", + "alias": null + } + ], + "classes": [], + "functions": ["scikit-learn/sklearn.inspection.setup/configuration"] + }, + { + "id": "scikit-learn/sklearn.isotonic", + "name": "sklearn.isotonic", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "warnings", + "alias": null + }, + { + "module": "math", + "alias": null + } + ], + "from_imports": [ + { + "module": "scipy", + "declaration": "interpolate", + "alias": null + }, + { + "module": "scipy.stats", + "declaration": "spearmanr", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_consistent_length", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn._isotonic", + "declaration": "_inplace_contiguous_isotonic_regression", + "alias": null + }, + { + "module": "sklearn._isotonic", + "declaration": "_make_unique", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.isotonic/IsotonicRegression"], + "functions": [ + "scikit-learn/sklearn.isotonic/check_increasing", + "scikit-learn/sklearn.isotonic/isotonic_regression" + ] + }, + { + "id": "scikit-learn/sklearn.kernel_approximation", + "name": "sklearn.kernel_approximation", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": "sp" + } + ], + "from_imports": [ + { + "module": "scipy.linalg", + "declaration": "svd", + "alias": null + }, + { + "module": "scipy.fft", + "declaration": "fft", + "alias": null + }, + { + "module": "scipy.fft", + "declaration": "ifft", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "as_float_array", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "safe_sparse_dot", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "pairwise_kernels", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "KERNEL_PARAMS", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_non_negative", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.kernel_approximation/PolynomialCountSketch", + "scikit-learn/sklearn.kernel_approximation/RBFSampler", + "scikit-learn/sklearn.kernel_approximation/SkewedChi2Sampler", + "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler", + "scikit-learn/sklearn.kernel_approximation/Nystroem" + ], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.kernel_ridge", + "name": "sklearn.kernel_ridge", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "MultiOutputMixin", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "pairwise_kernels", + "alias": null + }, + { + "module": "sklearn.linear_model._ridge", + "declaration": "_solve_cholesky_kernel", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.deprecation", + "declaration": "deprecated", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.kernel_ridge/KernelRidge"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.linear_model", + "name": "sklearn.linear_model", + "imports": [], + "from_imports": [ + { + "module": "sklearn.linear_model._base", + "declaration": "LinearRegression", + "alias": null + }, + { + "module": "sklearn.linear_model._bayes", + "declaration": "BayesianRidge", + "alias": null + }, + { + "module": "sklearn.linear_model._bayes", + "declaration": "ARDRegression", + "alias": null + }, + { + "module": "sklearn.linear_model._least_angle", + "declaration": "Lars", + "alias": null + }, + { + "module": "sklearn.linear_model._least_angle", + "declaration": "LassoLars", + "alias": null + }, + { + "module": "sklearn.linear_model._least_angle", + "declaration": "lars_path", + "alias": null + }, + { + "module": "sklearn.linear_model._least_angle", + "declaration": "lars_path_gram", + "alias": null + }, + { + "module": "sklearn.linear_model._least_angle", + "declaration": "LarsCV", + "alias": null + }, + { + "module": "sklearn.linear_model._least_angle", + "declaration": "LassoLarsCV", + "alias": null + }, + { + "module": "sklearn.linear_model._least_angle", + "declaration": "LassoLarsIC", + "alias": null + }, + { + "module": "sklearn.linear_model._coordinate_descent", + "declaration": "Lasso", + "alias": null + }, + { + "module": "sklearn.linear_model._coordinate_descent", + "declaration": "ElasticNet", + "alias": null + }, + { + "module": "sklearn.linear_model._coordinate_descent", + "declaration": "LassoCV", + "alias": null + }, + { + "module": "sklearn.linear_model._coordinate_descent", + "declaration": "ElasticNetCV", + "alias": null + }, + { + "module": "sklearn.linear_model._coordinate_descent", + "declaration": "lasso_path", + "alias": null + }, + { + "module": "sklearn.linear_model._coordinate_descent", + "declaration": "enet_path", + "alias": null + }, + { + "module": "sklearn.linear_model._coordinate_descent", + "declaration": "MultiTaskLasso", + "alias": null + }, + { + "module": "sklearn.linear_model._coordinate_descent", + "declaration": "MultiTaskElasticNet", + "alias": null + }, + { + "module": "sklearn.linear_model._coordinate_descent", + "declaration": "MultiTaskElasticNetCV", + "alias": null + }, + { + "module": "sklearn.linear_model._coordinate_descent", + "declaration": "MultiTaskLassoCV", + "alias": null + }, + { + "module": "sklearn.linear_model._glm", + "declaration": "PoissonRegressor", + "alias": null + }, + { + "module": "sklearn.linear_model._glm", + "declaration": "GammaRegressor", + "alias": null + }, + { + "module": "sklearn.linear_model._glm", + "declaration": "TweedieRegressor", + "alias": null + }, + { + "module": "sklearn.linear_model._huber", + "declaration": "HuberRegressor", + "alias": null + }, + { + "module": "sklearn.linear_model._sgd_fast", + "declaration": "Hinge", + "alias": null + }, + { + "module": "sklearn.linear_model._sgd_fast", + "declaration": "Log", + "alias": null + }, + { + "module": "sklearn.linear_model._sgd_fast", + "declaration": "ModifiedHuber", + "alias": null + }, + { + "module": "sklearn.linear_model._sgd_fast", + "declaration": "SquaredLoss", + "alias": null + }, + { + "module": "sklearn.linear_model._sgd_fast", + "declaration": "Huber", + "alias": null + }, + { + "module": "sklearn.linear_model._stochastic_gradient", + "declaration": "SGDClassifier", + "alias": null + }, + { + "module": "sklearn.linear_model._stochastic_gradient", + "declaration": "SGDRegressor", + "alias": null + }, + { + "module": "sklearn.linear_model._ridge", + "declaration": "Ridge", + "alias": null + }, + { + "module": "sklearn.linear_model._ridge", + "declaration": "RidgeCV", + "alias": null + }, + { + "module": "sklearn.linear_model._ridge", + "declaration": "RidgeClassifier", + "alias": null + }, + { + "module": "sklearn.linear_model._ridge", + "declaration": "RidgeClassifierCV", + "alias": null + }, + { + "module": "sklearn.linear_model._ridge", + "declaration": "ridge_regression", + "alias": null + }, + { + "module": "sklearn.linear_model._logistic", + "declaration": "LogisticRegression", + "alias": null + }, + { + "module": "sklearn.linear_model._logistic", + "declaration": "LogisticRegressionCV", + "alias": null + }, + { + "module": "sklearn.linear_model._omp", + "declaration": "orthogonal_mp", + "alias": null + }, + { + "module": "sklearn.linear_model._omp", + "declaration": "orthogonal_mp_gram", + "alias": null + }, + { + "module": "sklearn.linear_model._omp", + "declaration": "OrthogonalMatchingPursuit", + "alias": null + }, + { + "module": "sklearn.linear_model._omp", + "declaration": "OrthogonalMatchingPursuitCV", + "alias": null + }, + { + "module": "sklearn.linear_model._passive_aggressive", + "declaration": "PassiveAggressiveClassifier", + "alias": null + }, + { + "module": "sklearn.linear_model._passive_aggressive", + "declaration": "PassiveAggressiveRegressor", + "alias": null + }, + { + "module": "sklearn.linear_model._perceptron", + "declaration": "Perceptron", + "alias": null + }, + { + "module": "sklearn.linear_model._ransac", + "declaration": "RANSACRegressor", + "alias": null + }, + { + "module": "sklearn.linear_model._theil_sen", + "declaration": "TheilSenRegressor", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._base", + "name": "sklearn.linear_model._base", + "imports": [ + { + "module": "numbers", + "alias": null + }, + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": "sp" + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "scipy", + "declaration": "linalg", + "alias": null + }, + { + "module": "scipy", + "declaration": "optimize", + "alias": null + }, + { + "module": "scipy", + "declaration": "sparse", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "expit", + "alias": null + }, + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClassifierMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "MultiOutputMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "FLOAT_DTYPES", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "safe_sparse_dot", + "alias": null + }, + { + "module": "sklearn.utils.sparsefuncs", + "declaration": "mean_variance_axis", + "alias": null + }, + { + "module": "sklearn.utils.sparsefuncs", + "declaration": "inplace_column_scale", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "sparse_lsqr", + "alias": null + }, + { + "module": "sklearn.utils._seq_dataset", + "declaration": "ArrayDataset32", + "alias": null + }, + { + "module": "sklearn.utils._seq_dataset", + "declaration": "CSRDataset32", + "alias": null + }, + { + "module": "sklearn.utils._seq_dataset", + "declaration": "ArrayDataset64", + "alias": null + }, + { + "module": "sklearn.utils._seq_dataset", + "declaration": "CSRDataset64", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "normalize", + "alias": "f_normalize" + } + ], + "classes": [ + "scikit-learn/sklearn.linear_model._base/LinearModel", + "scikit-learn/sklearn.linear_model._base/LinearClassifierMixin", + "scikit-learn/sklearn.linear_model._base/SparseCoefMixin", + "scikit-learn/sklearn.linear_model._base/LinearRegression" + ], + "functions": [ + "scikit-learn/sklearn.linear_model._base/make_dataset", + "scikit-learn/sklearn.linear_model._base/_preprocess_data", + "scikit-learn/sklearn.linear_model._base/_rescale_data", + "scikit-learn/sklearn.linear_model._base/_pre_fit" + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes", + "name": "sklearn.linear_model._bayes", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "math", + "declaration": "log", + "alias": null + }, + { + "module": "scipy", + "declaration": "linalg", + "alias": null + }, + { + "module": "sklearn.linear_model._base", + "declaration": "LinearModel", + "alias": null + }, + { + "module": "sklearn.linear_model._base", + "declaration": "_rescale_data", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "fast_logdet", + "alias": null + }, + { + "module": "scipy.linalg", + "declaration": "pinvh", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.linear_model._bayes/BayesianRidge", + "scikit-learn/sklearn.linear_model._bayes/ARDRegression" + ], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent", + "name": "sklearn.linear_model._coordinate_descent", + "imports": [ + { + "module": "sys", + "alias": null + }, + { + "module": "warnings", + "alias": null + }, + { + "module": "numbers", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "scipy", + "declaration": "sparse", + "alias": null + }, + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "joblib", + "declaration": "effective_n_jobs", + "alias": null + }, + { + "module": "sklearn.linear_model._base", + "declaration": "LinearModel", + "alias": null + }, + { + "module": "sklearn.linear_model._base", + "declaration": "_pre_fit", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "MultiOutputMixin", + "alias": null + }, + { + "module": "sklearn.linear_model._base", + "declaration": "_preprocess_data", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.model_selection", + "declaration": "check_cv", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "safe_sparse_dot", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "_astype_copy_false", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "_joblib_parallel_args", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "column_or_1d", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + }, + { + "module": "sklearn.linear_model", + "declaration": "_cd_fast", + "alias": "cd_fast" + } + ], + "classes": [ + "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet", + "scikit-learn/sklearn.linear_model._coordinate_descent/Lasso", + "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV", + "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV", + "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV", + "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet", + "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLasso", + "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV", + "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV" + ], + "functions": [ + "scikit-learn/sklearn.linear_model._coordinate_descent/_set_order", + "scikit-learn/sklearn.linear_model._coordinate_descent/_alpha_grid", + "scikit-learn/sklearn.linear_model._coordinate_descent/lasso_path", + "scikit-learn/sklearn.linear_model._coordinate_descent/enet_path", + "scikit-learn/sklearn.linear_model._coordinate_descent/_path_residuals" + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._glm", + "name": "sklearn.linear_model._glm", + "imports": [], + "from_imports": [ + { + "module": "sklearn.linear_model._glm.glm", + "declaration": "GeneralizedLinearRegressor", + "alias": null + }, + { + "module": "sklearn.linear_model._glm.glm", + "declaration": "PoissonRegressor", + "alias": null + }, + { + "module": "sklearn.linear_model._glm.glm", + "declaration": "GammaRegressor", + "alias": null + }, + { + "module": "sklearn.linear_model._glm.glm", + "declaration": "TweedieRegressor", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm", + "name": "sklearn.linear_model._glm.glm", + "imports": [ + { + "module": "numbers", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.optimize", + "alias": null + } + ], + "from_imports": [ + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_X_y", + "alias": null + }, + { + "module": "sklearn.utils.optimize", + "declaration": "_check_optimize_result", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn._loss.glm_distribution", + "declaration": "ExponentialDispersionModel", + "alias": null + }, + { + "module": "sklearn._loss.glm_distribution", + "declaration": "TweedieDistribution", + "alias": null + }, + { + "module": "sklearn._loss.glm_distribution", + "declaration": "EDM_DISTRIBUTIONS", + "alias": null + }, + { + "module": "sklearn.linear_model._glm.link", + "declaration": "BaseLink", + "alias": null + }, + { + "module": "sklearn.linear_model._glm.link", + "declaration": "IdentityLink", + "alias": null + }, + { + "module": "sklearn.linear_model._glm.link", + "declaration": "LogLink", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor", + "scikit-learn/sklearn.linear_model._glm.glm/PoissonRegressor", + "scikit-learn/sklearn.linear_model._glm.glm/GammaRegressor", + "scikit-learn/sklearn.linear_model._glm.glm/TweedieRegressor" + ], + "functions": [ + "scikit-learn/sklearn.linear_model._glm.glm/_safe_lin_pred", + "scikit-learn/sklearn.linear_model._glm.glm/_y_pred_deviance_derivative" + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link", + "name": "sklearn.linear_model._glm.link", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "expit", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "logit", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.linear_model._glm.link/BaseLink", + "scikit-learn/sklearn.linear_model._glm.link/IdentityLink", + "scikit-learn/sklearn.linear_model._glm.link/LogLink", + "scikit-learn/sklearn.linear_model._glm.link/LogitLink" + ], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._huber", + "name": "sklearn.linear_model._huber", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "scipy", + "declaration": "optimize", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.linear_model._base", + "declaration": "LinearModel", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "axis0_safe_slice", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "safe_sparse_dot", + "alias": null + }, + { + "module": "sklearn.utils.optimize", + "declaration": "_check_optimize_result", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.linear_model._huber/HuberRegressor"], + "functions": ["scikit-learn/sklearn.linear_model._huber/_huber_loss_and_gradient"] + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle", + "name": "sklearn.linear_model._least_angle", + "imports": [ + { + "module": "sys", + "alias": null + }, + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "math", + "declaration": "log", + "alias": null + }, + { + "module": "scipy", + "declaration": "linalg", + "alias": null + }, + { + "module": "scipy", + "declaration": "interpolate", + "alias": null + }, + { + "module": "scipy.linalg.lapack", + "declaration": "get_lapack_funcs", + "alias": null + }, + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "sklearn.linear_model._base", + "declaration": "LinearModel", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "MultiOutputMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "arrayfuncs", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "as_float_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.model_selection", + "declaration": "check_cv", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "ConvergenceWarning", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.linear_model._least_angle/Lars", + "scikit-learn/sklearn.linear_model._least_angle/LassoLars", + "scikit-learn/sklearn.linear_model._least_angle/LarsCV", + "scikit-learn/sklearn.linear_model._least_angle/LassoLarsCV", + "scikit-learn/sklearn.linear_model._least_angle/LassoLarsIC" + ], + "functions": [ + "scikit-learn/sklearn.linear_model._least_angle/lars_path", + "scikit-learn/sklearn.linear_model._least_angle/lars_path_gram", + "scikit-learn/sklearn.linear_model._least_angle/_lars_path_solver", + "scikit-learn/sklearn.linear_model._least_angle/_check_copy_and_writeable", + "scikit-learn/sklearn.linear_model._least_angle/_lars_path_residues" + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic", + "name": "sklearn.linear_model._logistic", + "imports": [ + { + "module": "numbers", + "alias": null + }, + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "scipy", + "declaration": "optimize", + "alias": null + }, + { + "module": "scipy", + "declaration": "sparse", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "expit", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "logsumexp", + "alias": null + }, + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "joblib", + "declaration": "effective_n_jobs", + "alias": null + }, + { + "module": "sklearn.linear_model._base", + "declaration": "LinearClassifierMixin", + "alias": null + }, + { + "module": "sklearn.linear_model._base", + "declaration": "SparseCoefMixin", + "alias": null + }, + { + "module": "sklearn.linear_model._base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.linear_model._sag", + "declaration": "sag_solver", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "LabelEncoder", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "LabelBinarizer", + "alias": null + }, + { + "module": "sklearn.svm._base", + "declaration": "_fit_liblinear", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_consistent_length", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "compute_class_weight", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "log_logistic", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "safe_sparse_dot", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "softmax", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "squared_norm", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "row_norms", + "alias": null + }, + { + "module": "sklearn.utils.optimize", + "declaration": "_newton_cg", + "alias": null + }, + { + "module": "sklearn.utils.optimize", + "declaration": "_check_optimize_result", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "check_classification_targets", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "_joblib_parallel_args", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + }, + { + "module": "sklearn.model_selection", + "declaration": "check_cv", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "get_scorer", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.linear_model._logistic/LogisticRegression", + "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV" + ], + "functions": [ + "scikit-learn/sklearn.linear_model._logistic/_intercept_dot", + "scikit-learn/sklearn.linear_model._logistic/_logistic_loss_and_grad", + "scikit-learn/sklearn.linear_model._logistic/_logistic_loss", + "scikit-learn/sklearn.linear_model._logistic/_logistic_grad_hess", + "scikit-learn/sklearn.linear_model._logistic/_multinomial_loss", + "scikit-learn/sklearn.linear_model._logistic/_multinomial_loss_grad", + "scikit-learn/sklearn.linear_model._logistic/_multinomial_grad_hess", + "scikit-learn/sklearn.linear_model._logistic/_check_solver", + "scikit-learn/sklearn.linear_model._logistic/_check_multi_class", + "scikit-learn/sklearn.linear_model._logistic/_logistic_regression_path", + "scikit-learn/sklearn.linear_model._logistic/_log_reg_scoring_path" + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._omp", + "name": "sklearn.linear_model._omp", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "math", + "declaration": "sqrt", + "alias": null + }, + { + "module": "scipy", + "declaration": "linalg", + "alias": null + }, + { + "module": "scipy.linalg.lapack", + "declaration": "get_lapack_funcs", + "alias": null + }, + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "sklearn.linear_model._base", + "declaration": "LinearModel", + "alias": null + }, + { + "module": "sklearn.linear_model._base", + "declaration": "_pre_fit", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "MultiOutputMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "as_float_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + }, + { + "module": "sklearn.model_selection", + "declaration": "check_cv", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuit", + "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuitCV" + ], + "functions": [ + "scikit-learn/sklearn.linear_model._omp/_cholesky_omp", + "scikit-learn/sklearn.linear_model._omp/_gram_omp", + "scikit-learn/sklearn.linear_model._omp/orthogonal_mp", + "scikit-learn/sklearn.linear_model._omp/orthogonal_mp_gram", + "scikit-learn/sklearn.linear_model._omp/_omp_path_residues" + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive", + "name": "sklearn.linear_model._passive_aggressive", + "imports": [], + "from_imports": [ + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.linear_model._stochastic_gradient", + "declaration": "BaseSGDClassifier", + "alias": null + }, + { + "module": "sklearn.linear_model._stochastic_gradient", + "declaration": "BaseSGDRegressor", + "alias": null + }, + { + "module": "sklearn.linear_model._stochastic_gradient", + "declaration": "DEFAULT_EPSILON", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier", + "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor" + ], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._perceptron", + "name": "sklearn.linear_model._perceptron", + "imports": [], + "from_imports": [ + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.linear_model._stochastic_gradient", + "declaration": "BaseSGDClassifier", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.linear_model._perceptron/Perceptron"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac", + "name": "sklearn.linear_model._ransac", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "warnings", + "alias": null + } + ], + "from_imports": [ + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "MetaEstimatorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "clone", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "MultiOutputMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_consistent_length", + "alias": null + }, + { + "module": "sklearn.utils.random", + "declaration": "sample_without_replacement", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.linear_model._base", + "declaration": "LinearRegression", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "has_fit_parameter", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "ConvergenceWarning", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.linear_model._ransac/RANSACRegressor"], + "functions": ["scikit-learn/sklearn.linear_model._ransac/_dynamic_max_trials"] + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge", + "name": "sklearn.linear_model._ridge", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "scipy", + "declaration": "linalg", + "alias": null + }, + { + "module": "scipy", + "declaration": "sparse", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "linalg", + "alias": "sp_linalg" + }, + { + "module": "sklearn.linear_model._base", + "declaration": "LinearClassifierMixin", + "alias": null + }, + { + "module": "sklearn.linear_model._base", + "declaration": "LinearModel", + "alias": null + }, + { + "module": "sklearn.linear_model._base", + "declaration": "_rescale_data", + "alias": null + }, + { + "module": "sklearn.linear_model._sag", + "declaration": "sag_solver", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "MultiOutputMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_classifier", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "safe_sparse_dot", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "row_norms", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_consistent_length", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "compute_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "column_or_1d", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "LabelBinarizer", + "alias": null + }, + { + "module": "sklearn.model_selection", + "declaration": "GridSearchCV", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "check_scoring", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "ConvergenceWarning", + "alias": null + }, + { + "module": "sklearn.utils.sparsefuncs", + "declaration": "mean_variance_axis", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.linear_model._ridge/_BaseRidge", + "scikit-learn/sklearn.linear_model._ridge/Ridge", + "scikit-learn/sklearn.linear_model._ridge/RidgeClassifier", + "scikit-learn/sklearn.linear_model._ridge/_X_CenterStackOp", + "scikit-learn/sklearn.linear_model._ridge/_XT_CenterStackOp", + "scikit-learn/sklearn.linear_model._ridge/_IdentityRegressor", + "scikit-learn/sklearn.linear_model._ridge/_IdentityClassifier", + "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV", + "scikit-learn/sklearn.linear_model._ridge/_BaseRidgeCV", + "scikit-learn/sklearn.linear_model._ridge/RidgeCV", + "scikit-learn/sklearn.linear_model._ridge/RidgeClassifierCV" + ], + "functions": [ + "scikit-learn/sklearn.linear_model._ridge/_solve_sparse_cg", + "scikit-learn/sklearn.linear_model._ridge/_solve_lsqr", + "scikit-learn/sklearn.linear_model._ridge/_solve_cholesky", + "scikit-learn/sklearn.linear_model._ridge/_solve_cholesky_kernel", + "scikit-learn/sklearn.linear_model._ridge/_solve_svd", + "scikit-learn/sklearn.linear_model._ridge/_get_valid_accept_sparse", + "scikit-learn/sklearn.linear_model._ridge/ridge_regression", + "scikit-learn/sklearn.linear_model._ridge/_ridge_regression", + "scikit-learn/sklearn.linear_model._ridge/_check_gcv_mode", + "scikit-learn/sklearn.linear_model._ridge/_find_smallest_angle" + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._sag", + "name": "sklearn.linear_model._sag", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.linear_model._base", + "declaration": "make_dataset", + "alias": null + }, + { + "module": "sklearn.linear_model._sag_fast", + "declaration": "sag32", + "alias": null + }, + { + "module": "sklearn.linear_model._sag_fast", + "declaration": "sag64", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "ConvergenceWarning", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "row_norms", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.linear_model._sag/get_auto_step_size", + "scikit-learn/sklearn.linear_model._sag/sag_solver" + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient", + "name": "sklearn.linear_model._stochastic_gradient", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "warnings", + "alias": null + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "clone", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_classifier", + "alias": null + }, + { + "module": "sklearn.linear_model._base", + "declaration": "LinearClassifierMixin", + "alias": null + }, + { + "module": "sklearn.linear_model._base", + "declaration": "SparseCoefMixin", + "alias": null + }, + { + "module": "sklearn.linear_model._base", + "declaration": "make_dataset", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_X_y", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "safe_sparse_dot", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "_check_partial_fit_first_call", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "ConvergenceWarning", + "alias": null + }, + { + "module": "sklearn.model_selection", + "declaration": "StratifiedShuffleSplit", + "alias": null + }, + { + "module": "sklearn.model_selection", + "declaration": "ShuffleSplit", + "alias": null + }, + { + "module": "sklearn.linear_model._sgd_fast", + "declaration": "_plain_sgd", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "compute_class_weight", + "alias": null + }, + { + "module": "sklearn.linear_model._sgd_fast", + "declaration": "Hinge", + "alias": null + }, + { + "module": "sklearn.linear_model._sgd_fast", + "declaration": "SquaredHinge", + "alias": null + }, + { + "module": "sklearn.linear_model._sgd_fast", + "declaration": "Log", + "alias": null + }, + { + "module": "sklearn.linear_model._sgd_fast", + "declaration": "ModifiedHuber", + "alias": null + }, + { + "module": "sklearn.linear_model._sgd_fast", + "declaration": "SquaredLoss", + "alias": null + }, + { + "module": "sklearn.linear_model._sgd_fast", + "declaration": "Huber", + "alias": null + }, + { + "module": "sklearn.linear_model._sgd_fast", + "declaration": "EpsilonInsensitive", + "alias": null + }, + { + "module": "sklearn.linear_model._sgd_fast", + "declaration": "SquaredEpsilonInsensitive", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "_joblib_parallel_args", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "deprecated", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.linear_model._stochastic_gradient/_ValidationScoreCallback", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier", + "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor", + "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor" + ], + "functions": [ + "scikit-learn/sklearn.linear_model._stochastic_gradient/_prepare_fit_binary", + "scikit-learn/sklearn.linear_model._stochastic_gradient/fit_binary" + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen", + "name": "sklearn.linear_model._theil_sen", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "itertools", + "declaration": "combinations", + "alias": null + }, + { + "module": "scipy", + "declaration": "linalg", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "binom", + "alias": null + }, + { + "module": "scipy.linalg.lapack", + "declaration": "get_lapack_funcs", + "alias": null + }, + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "joblib", + "declaration": "effective_n_jobs", + "alias": null + }, + { + "module": "sklearn.linear_model._base", + "declaration": "LinearModel", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "ConvergenceWarning", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.linear_model._theil_sen/TheilSenRegressor"], + "functions": [ + "scikit-learn/sklearn.linear_model._theil_sen/_modified_weiszfeld_step", + "scikit-learn/sklearn.linear_model._theil_sen/_spatial_median", + "scikit-learn/sklearn.linear_model._theil_sen/_breakdown_point", + "scikit-learn/sklearn.linear_model._theil_sen/_lstsq" + ] + }, + { + "id": "scikit-learn/sklearn.linear_model.setup", + "name": "sklearn.linear_model.setup", + "imports": [ + { + "module": "os", + "alias": null + }, + { + "module": "numpy", + "alias": null + } + ], + "from_imports": [ + { + "module": "sklearn._build_utils", + "declaration": "gen_from_templates", + "alias": null + }, + { + "module": "numpy.distutils.core", + "declaration": "setup", + "alias": null + } + ], + "classes": [], + "functions": ["scikit-learn/sklearn.linear_model.setup/configuration"] + }, + { + "id": "scikit-learn/sklearn.manifold", + "name": "sklearn.manifold", + "imports": [], + "from_imports": [ + { + "module": "sklearn.manifold._locally_linear", + "declaration": "locally_linear_embedding", + "alias": null + }, + { + "module": "sklearn.manifold._locally_linear", + "declaration": "LocallyLinearEmbedding", + "alias": null + }, + { + "module": "sklearn.manifold._isomap", + "declaration": "Isomap", + "alias": null + }, + { + "module": "sklearn.manifold._mds", + "declaration": "MDS", + "alias": null + }, + { + "module": "sklearn.manifold._mds", + "declaration": "smacof", + "alias": null + }, + { + "module": "sklearn.manifold._spectral_embedding", + "declaration": "SpectralEmbedding", + "alias": null + }, + { + "module": "sklearn.manifold._spectral_embedding", + "declaration": "spectral_embedding", + "alias": null + }, + { + "module": "sklearn.manifold._t_sne", + "declaration": "TSNE", + "alias": null + }, + { + "module": "sklearn.manifold._t_sne", + "declaration": "trustworthiness", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.manifold._isomap", + "name": "sklearn.manifold._isomap", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.neighbors", + "declaration": "NearestNeighbors", + "alias": null + }, + { + "module": "sklearn.neighbors", + "declaration": "kneighbors_graph", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.graph", + "declaration": "graph_shortest_path", + "alias": null + }, + { + "module": "sklearn.decomposition", + "declaration": "KernelPCA", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "KernelCenterer", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.manifold._isomap/Isomap"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear", + "name": "sklearn.manifold._locally_linear", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "scipy.linalg", + "declaration": "eigh", + "alias": null + }, + { + "module": "scipy.linalg", + "declaration": "svd", + "alias": null + }, + { + "module": "scipy.linalg", + "declaration": "qr", + "alias": null + }, + { + "module": "scipy.linalg", + "declaration": "solve", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "eye", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "csr_matrix", + "alias": null + }, + { + "module": "scipy.sparse.linalg", + "declaration": "eigsh", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "_UnstableArchMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils._arpack", + "declaration": "_init_arpack_v0", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "stable_cumsum", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "FLOAT_DTYPES", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.neighbors", + "declaration": "NearestNeighbors", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding"], + "functions": [ + "scikit-learn/sklearn.manifold._locally_linear/barycenter_weights", + "scikit-learn/sklearn.manifold._locally_linear/barycenter_kneighbors_graph", + "scikit-learn/sklearn.manifold._locally_linear/null_space", + "scikit-learn/sklearn.manifold._locally_linear/locally_linear_embedding" + ] + }, + { + "id": "scikit-learn/sklearn.manifold._mds", + "name": "sklearn.manifold._mds", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "warnings", + "alias": null + } + ], + "from_imports": [ + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "joblib", + "declaration": "effective_n_jobs", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "euclidean_distances", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_symmetric", + "alias": null + }, + { + "module": "sklearn.isotonic", + "declaration": "IsotonicRegression", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.deprecation", + "declaration": "deprecated", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.manifold._mds/MDS"], + "functions": [ + "scikit-learn/sklearn.manifold._mds/_smacof_single", + "scikit-learn/sklearn.manifold._mds/smacof" + ] + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding", + "name": "sklearn.manifold._spectral_embedding", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "scipy", + "declaration": "sparse", + "alias": null + }, + { + "module": "scipy.linalg", + "declaration": "eigh", + "alias": null + }, + { + "module": "scipy.sparse.linalg", + "declaration": "eigsh", + "alias": null + }, + { + "module": "scipy.sparse.csgraph", + "declaration": "connected_components", + "alias": null + }, + { + "module": "scipy.sparse.csgraph", + "declaration": "laplacian", + "alias": "csgraph_laplacian" + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_symmetric", + "alias": null + }, + { + "module": "sklearn.utils._arpack", + "declaration": "_init_arpack_v0", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "_deterministic_vector_sign_flip", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "lobpcg", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "rbf_kernel", + "alias": null + }, + { + "module": "sklearn.neighbors", + "declaration": "kneighbors_graph", + "alias": null + }, + { + "module": "sklearn.neighbors", + "declaration": "NearestNeighbors", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.deprecation", + "declaration": "deprecated", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding"], + "functions": [ + "scikit-learn/sklearn.manifold._spectral_embedding/_graph_connected_component", + "scikit-learn/sklearn.manifold._spectral_embedding/_graph_is_connected", + "scikit-learn/sklearn.manifold._spectral_embedding/_set_diag", + "scikit-learn/sklearn.manifold._spectral_embedding/spectral_embedding" + ] + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne", + "name": "sklearn.manifold._t_sne", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "time", + "declaration": "time", + "alias": null + }, + { + "module": "scipy", + "declaration": "linalg", + "alias": null + }, + { + "module": "scipy.spatial.distance", + "declaration": "pdist", + "alias": null + }, + { + "module": "scipy.spatial.distance", + "declaration": "squareform", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "csr_matrix", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "issparse", + "alias": null + }, + { + "module": "sklearn.neighbors", + "declaration": "NearestNeighbors", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils._openmp_helpers", + "declaration": "_openmp_effective_n_threads", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_non_negative", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.decomposition", + "declaration": "PCA", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "pairwise_distances", + "alias": null + }, + { + "module": "sklearn.manifold", + "declaration": "_utils", + "alias": null + }, + { + "module": "sklearn.manifold", + "declaration": "_barnes_hut_tsne", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.manifold._t_sne/TSNE"], + "functions": [ + "scikit-learn/sklearn.manifold._t_sne/_joint_probabilities", + "scikit-learn/sklearn.manifold._t_sne/_joint_probabilities_nn", + "scikit-learn/sklearn.manifold._t_sne/_kl_divergence", + "scikit-learn/sklearn.manifold._t_sne/_kl_divergence_bh", + "scikit-learn/sklearn.manifold._t_sne/_gradient_descent", + "scikit-learn/sklearn.manifold._t_sne/trustworthiness" + ] + }, + { + "id": "scikit-learn/sklearn.manifold.setup", + "name": "sklearn.manifold.setup", + "imports": [ + { + "module": "os", + "alias": null + }, + { + "module": "numpy", + "alias": null + } + ], + "from_imports": [ + { + "module": "numpy.distutils.core", + "declaration": "setup", + "alias": null + } + ], + "classes": [], + "functions": ["scikit-learn/sklearn.manifold.setup/configuration"] + }, + { + "id": "scikit-learn/sklearn.metrics", + "name": "sklearn.metrics", + "imports": [], + "from_imports": [ + { + "module": "sklearn.metrics._ranking", + "declaration": "auc", + "alias": null + }, + { + "module": "sklearn.metrics._ranking", + "declaration": "average_precision_score", + "alias": null + }, + { + "module": "sklearn.metrics._ranking", + "declaration": "coverage_error", + "alias": null + }, + { + "module": "sklearn.metrics._ranking", + "declaration": "det_curve", + "alias": null + }, + { + "module": "sklearn.metrics._ranking", + "declaration": "dcg_score", + "alias": null + }, + { + "module": "sklearn.metrics._ranking", + "declaration": "label_ranking_average_precision_score", + "alias": null + }, + { + "module": "sklearn.metrics._ranking", + "declaration": "label_ranking_loss", + "alias": null + }, + { + "module": "sklearn.metrics._ranking", + "declaration": "ndcg_score", + "alias": null + }, + { + "module": "sklearn.metrics._ranking", + "declaration": "precision_recall_curve", + "alias": null + }, + { + "module": "sklearn.metrics._ranking", + "declaration": "roc_auc_score", + "alias": null + }, + { + "module": "sklearn.metrics._ranking", + "declaration": "roc_curve", + "alias": null + }, + { + "module": "sklearn.metrics._ranking", + "declaration": "top_k_accuracy_score", + "alias": null + }, + { + "module": "sklearn.metrics._classification", + "declaration": "accuracy_score", + "alias": null + }, + { + "module": "sklearn.metrics._classification", + "declaration": "balanced_accuracy_score", + "alias": null + }, + { + "module": "sklearn.metrics._classification", + "declaration": "classification_report", + "alias": null + }, + { + "module": "sklearn.metrics._classification", + "declaration": "cohen_kappa_score", + "alias": null + }, + { + "module": "sklearn.metrics._classification", + "declaration": "confusion_matrix", + "alias": null + }, + { + "module": "sklearn.metrics._classification", + "declaration": "f1_score", + "alias": null + }, + { + "module": "sklearn.metrics._classification", + "declaration": "fbeta_score", + "alias": null + }, + { + "module": "sklearn.metrics._classification", + "declaration": "hamming_loss", + "alias": null + }, + { + "module": "sklearn.metrics._classification", + "declaration": "hinge_loss", + "alias": null + }, + { + "module": "sklearn.metrics._classification", + "declaration": "jaccard_score", + "alias": null + }, + { + "module": "sklearn.metrics._classification", + "declaration": "log_loss", + "alias": null + }, + { + "module": "sklearn.metrics._classification", + "declaration": "matthews_corrcoef", + "alias": null + }, + { + "module": "sklearn.metrics._classification", + "declaration": "precision_recall_fscore_support", + "alias": null + }, + { + "module": "sklearn.metrics._classification", + "declaration": "precision_score", + "alias": null + }, + { + "module": "sklearn.metrics._classification", + "declaration": "recall_score", + "alias": null + }, + { + "module": "sklearn.metrics._classification", + "declaration": "zero_one_loss", + "alias": null + }, + { + "module": "sklearn.metrics._classification", + "declaration": "brier_score_loss", + "alias": null + }, + { + "module": "sklearn.metrics._classification", + "declaration": "multilabel_confusion_matrix", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "cluster", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "adjusted_mutual_info_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "adjusted_rand_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "rand_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "pair_confusion_matrix", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "completeness_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "consensus_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "homogeneity_completeness_v_measure", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "homogeneity_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "mutual_info_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "normalized_mutual_info_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "fowlkes_mallows_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "silhouette_samples", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "silhouette_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "calinski_harabasz_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "v_measure_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "davies_bouldin_score", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "euclidean_distances", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "nan_euclidean_distances", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "pairwise_distances", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "pairwise_distances_argmin", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "pairwise_distances_argmin_min", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "pairwise_kernels", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "pairwise_distances_chunked", + "alias": null + }, + { + "module": "sklearn.metrics._regression", + "declaration": "explained_variance_score", + "alias": null + }, + { + "module": "sklearn.metrics._regression", + "declaration": "max_error", + "alias": null + }, + { + "module": "sklearn.metrics._regression", + "declaration": "mean_absolute_error", + "alias": null + }, + { + "module": "sklearn.metrics._regression", + "declaration": "mean_squared_error", + "alias": null + }, + { + "module": "sklearn.metrics._regression", + "declaration": "mean_squared_log_error", + "alias": null + }, + { + "module": "sklearn.metrics._regression", + "declaration": "median_absolute_error", + "alias": null + }, + { + "module": "sklearn.metrics._regression", + "declaration": "mean_absolute_percentage_error", + "alias": null + }, + { + "module": "sklearn.metrics._regression", + "declaration": "r2_score", + "alias": null + }, + { + "module": "sklearn.metrics._regression", + "declaration": "mean_tweedie_deviance", + "alias": null + }, + { + "module": "sklearn.metrics._regression", + "declaration": "mean_poisson_deviance", + "alias": null + }, + { + "module": "sklearn.metrics._regression", + "declaration": "mean_gamma_deviance", + "alias": null + }, + { + "module": "sklearn.metrics._scorer", + "declaration": "check_scoring", + "alias": null + }, + { + "module": "sklearn.metrics._scorer", + "declaration": "make_scorer", + "alias": null + }, + { + "module": "sklearn.metrics._scorer", + "declaration": "SCORERS", + "alias": null + }, + { + "module": "sklearn.metrics._scorer", + "declaration": "get_scorer", + "alias": null + }, + { + "module": "sklearn.metrics._plot.det_curve", + "declaration": "plot_det_curve", + "alias": null + }, + { + "module": "sklearn.metrics._plot.det_curve", + "declaration": "DetCurveDisplay", + "alias": null + }, + { + "module": "sklearn.metrics._plot.roc_curve", + "declaration": "plot_roc_curve", + "alias": null + }, + { + "module": "sklearn.metrics._plot.roc_curve", + "declaration": "RocCurveDisplay", + "alias": null + }, + { + "module": "sklearn.metrics._plot.precision_recall_curve", + "declaration": "plot_precision_recall_curve", + "alias": null + }, + { + "module": "sklearn.metrics._plot.precision_recall_curve", + "declaration": "PrecisionRecallDisplay", + "alias": null + }, + { + "module": "sklearn.metrics._plot.confusion_matrix", + "declaration": "plot_confusion_matrix", + "alias": null + }, + { + "module": "sklearn.metrics._plot.confusion_matrix", + "declaration": "ConfusionMatrixDisplay", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.metrics._base", + "name": "sklearn.metrics._base", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "itertools", + "declaration": "combinations", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_consistent_length", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "type_of_target", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.metrics._base/_average_binary_score", + "scikit-learn/sklearn.metrics._base/_average_multiclass_ovo_score", + "scikit-learn/sklearn.metrics._base/_check_pos_label_consistency" + ] + }, + { + "id": "scikit-learn/sklearn.metrics._classification", + "name": "sklearn.metrics._classification", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "scipy.sparse", + "declaration": "coo_matrix", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "csr_matrix", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "LabelBinarizer", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "LabelEncoder", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "assert_all_finite", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_consistent_length", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "column_or_1d", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "unique_labels", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "type_of_target", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_num_samples", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.sparsefuncs", + "declaration": "count_nonzero", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "UndefinedMetricWarning", + "alias": null + }, + { + "module": "sklearn.metrics._base", + "declaration": "_check_pos_label_consistency", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.metrics._classification/_check_zero_division", + "scikit-learn/sklearn.metrics._classification/_check_targets", + "scikit-learn/sklearn.metrics._classification/_weighted_sum", + "scikit-learn/sklearn.metrics._classification/accuracy_score", + "scikit-learn/sklearn.metrics._classification/confusion_matrix", + "scikit-learn/sklearn.metrics._classification/multilabel_confusion_matrix", + "scikit-learn/sklearn.metrics._classification/cohen_kappa_score", + "scikit-learn/sklearn.metrics._classification/jaccard_score", + "scikit-learn/sklearn.metrics._classification/matthews_corrcoef", + "scikit-learn/sklearn.metrics._classification/zero_one_loss", + "scikit-learn/sklearn.metrics._classification/f1_score", + "scikit-learn/sklearn.metrics._classification/fbeta_score", + "scikit-learn/sklearn.metrics._classification/_prf_divide", + "scikit-learn/sklearn.metrics._classification/_warn_prf", + "scikit-learn/sklearn.metrics._classification/_check_set_wise_labels", + "scikit-learn/sklearn.metrics._classification/precision_recall_fscore_support", + "scikit-learn/sklearn.metrics._classification/precision_score", + "scikit-learn/sklearn.metrics._classification/recall_score", + "scikit-learn/sklearn.metrics._classification/balanced_accuracy_score", + "scikit-learn/sklearn.metrics._classification/classification_report", + "scikit-learn/sklearn.metrics._classification/hamming_loss", + "scikit-learn/sklearn.metrics._classification/log_loss", + "scikit-learn/sklearn.metrics._classification/hinge_loss", + "scikit-learn/sklearn.metrics._classification/brier_score_loss" + ] + }, + { + "id": "scikit-learn/sklearn.metrics._plot", + "name": "sklearn.metrics._plot", + "imports": [], + "from_imports": [], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.metrics._plot.base", + "name": "sklearn.metrics._plot.base", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.base", + "declaration": "is_classifier", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.metrics._plot.base/_check_classifier_response_method", + "scikit-learn/sklearn.metrics._plot.base/_get_response" + ] + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix", + "name": "sklearn.metrics._plot.confusion_matrix", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "itertools", + "declaration": "product", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "confusion_matrix", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_matplotlib_support", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "unique_labels", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_classifier", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.metrics._plot.confusion_matrix/ConfusionMatrixDisplay"], + "functions": ["scikit-learn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix"] + }, + { + "id": "scikit-learn/sklearn.metrics._plot.det_curve", + "name": "sklearn.metrics._plot.det_curve", + "imports": [ + { + "module": "scipy", + "alias": "sp" + } + ], + "from_imports": [ + { + "module": "sklearn.metrics._plot.base", + "declaration": "_get_response", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "det_curve", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_matplotlib_support", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.metrics._plot.det_curve/DetCurveDisplay"], + "functions": ["scikit-learn/sklearn.metrics._plot.det_curve/plot_det_curve"] + }, + { + "id": "scikit-learn/sklearn.metrics._plot.precision_recall_curve", + "name": "sklearn.metrics._plot.precision_recall_curve", + "imports": [], + "from_imports": [ + { + "module": "sklearn.metrics._plot.base", + "declaration": "_get_response", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "average_precision_score", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "precision_recall_curve", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_matplotlib_support", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay"], + "functions": ["scikit-learn/sklearn.metrics._plot.precision_recall_curve/plot_precision_recall_curve"] + }, + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve", + "name": "sklearn.metrics._plot.roc_curve", + "imports": [], + "from_imports": [ + { + "module": "sklearn.metrics._plot.base", + "declaration": "_get_response", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "auc", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "roc_curve", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_matplotlib_support", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.metrics._plot.roc_curve/RocCurveDisplay"], + "functions": ["scikit-learn/sklearn.metrics._plot.roc_curve/plot_roc_curve"] + }, + { + "id": "scikit-learn/sklearn.metrics._ranking", + "name": "sklearn.metrics._ranking", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "functools", + "declaration": "partial", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "csr_matrix", + "alias": null + }, + { + "module": "scipy.stats", + "declaration": "rankdata", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "assert_all_finite", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_consistent_length", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "column_or_1d", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "type_of_target", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "stable_cumsum", + "alias": null + }, + { + "module": "sklearn.utils.sparsefuncs", + "declaration": "count_nonzero", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "UndefinedMetricWarning", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "label_binarize", + "alias": null + }, + { + "module": "sklearn.utils._encode", + "declaration": "_encode", + "alias": null + }, + { + "module": "sklearn.utils._encode", + "declaration": "_unique", + "alias": null + }, + { + "module": "sklearn.metrics._base", + "declaration": "_average_binary_score", + "alias": null + }, + { + "module": "sklearn.metrics._base", + "declaration": "_average_multiclass_ovo_score", + "alias": null + }, + { + "module": "sklearn.metrics._base", + "declaration": "_check_pos_label_consistency", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.metrics._ranking/auc", + "scikit-learn/sklearn.metrics._ranking/average_precision_score", + "scikit-learn/sklearn.metrics._ranking/det_curve", + "scikit-learn/sklearn.metrics._ranking/_binary_roc_auc_score", + "scikit-learn/sklearn.metrics._ranking/roc_auc_score", + "scikit-learn/sklearn.metrics._ranking/_multiclass_roc_auc_score", + "scikit-learn/sklearn.metrics._ranking/_binary_clf_curve", + "scikit-learn/sklearn.metrics._ranking/precision_recall_curve", + "scikit-learn/sklearn.metrics._ranking/roc_curve", + "scikit-learn/sklearn.metrics._ranking/label_ranking_average_precision_score", + "scikit-learn/sklearn.metrics._ranking/coverage_error", + "scikit-learn/sklearn.metrics._ranking/label_ranking_loss", + "scikit-learn/sklearn.metrics._ranking/_dcg_sample_scores", + "scikit-learn/sklearn.metrics._ranking/_tie_averaged_dcg", + "scikit-learn/sklearn.metrics._ranking/_check_dcg_target_type", + "scikit-learn/sklearn.metrics._ranking/dcg_score", + "scikit-learn/sklearn.metrics._ranking/_ndcg_sample_scores", + "scikit-learn/sklearn.metrics._ranking/ndcg_score", + "scikit-learn/sklearn.metrics._ranking/top_k_accuracy_score" + ] + }, + { + "id": "scikit-learn/sklearn.metrics._regression", + "name": "sklearn.metrics._regression", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "warnings", + "alias": null + } + ], + "from_imports": [ + { + "module": "sklearn._loss.glm_distribution", + "declaration": "TweedieDistribution", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_consistent_length", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_num_samples", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "column_or_1d", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils.stats", + "declaration": "_weighted_percentile", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "UndefinedMetricWarning", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.metrics._regression/_check_reg_targets", + "scikit-learn/sklearn.metrics._regression/mean_absolute_error", + "scikit-learn/sklearn.metrics._regression/mean_absolute_percentage_error", + "scikit-learn/sklearn.metrics._regression/mean_squared_error", + "scikit-learn/sklearn.metrics._regression/mean_squared_log_error", + "scikit-learn/sklearn.metrics._regression/median_absolute_error", + "scikit-learn/sklearn.metrics._regression/explained_variance_score", + "scikit-learn/sklearn.metrics._regression/r2_score", + "scikit-learn/sklearn.metrics._regression/max_error", + "scikit-learn/sklearn.metrics._regression/mean_tweedie_deviance", + "scikit-learn/sklearn.metrics._regression/mean_poisson_deviance", + "scikit-learn/sklearn.metrics._regression/mean_gamma_deviance" + ] + }, + { + "id": "scikit-learn/sklearn.metrics._scorer", + "name": "sklearn.metrics._scorer", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "collections.abc", + "declaration": "Iterable", + "alias": null + }, + { + "module": "functools", + "declaration": "partial", + "alias": null + }, + { + "module": "collections", + "declaration": "Counter", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "r2_score", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "median_absolute_error", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "max_error", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "mean_absolute_error", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "mean_squared_error", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "mean_squared_log_error", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "mean_poisson_deviance", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "mean_gamma_deviance", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "accuracy_score", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "top_k_accuracy_score", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "f1_score", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "roc_auc_score", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "average_precision_score", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "precision_score", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "recall_score", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "log_loss", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "balanced_accuracy_score", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "explained_variance_score", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "brier_score_loss", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "jaccard_score", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "mean_absolute_percentage_error", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "adjusted_rand_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "rand_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "homogeneity_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "completeness_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "v_measure_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "mutual_info_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "adjusted_mutual_info_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "normalized_mutual_info_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster", + "declaration": "fowlkes_mallows_score", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "type_of_target", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_regressor", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.metrics._scorer/_MultimetricScorer", + "scikit-learn/sklearn.metrics._scorer/_BaseScorer", + "scikit-learn/sklearn.metrics._scorer/_PredictScorer", + "scikit-learn/sklearn.metrics._scorer/_ProbaScorer", + "scikit-learn/sklearn.metrics._scorer/_ThresholdScorer" + ], + "functions": [ + "scikit-learn/sklearn.metrics._scorer/_cached_call", + "scikit-learn/sklearn.metrics._scorer/get_scorer", + "scikit-learn/sklearn.metrics._scorer/_passthrough_scorer", + "scikit-learn/sklearn.metrics._scorer/check_scoring", + "scikit-learn/sklearn.metrics._scorer/_check_multimetric_scoring", + "scikit-learn/sklearn.metrics._scorer/make_scorer" + ] + }, + { + "id": "scikit-learn/sklearn.metrics.cluster", + "name": "sklearn.metrics.cluster", + "imports": [], + "from_imports": [ + { + "module": "sklearn.metrics.cluster._supervised", + "declaration": "adjusted_mutual_info_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster._supervised", + "declaration": "normalized_mutual_info_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster._supervised", + "declaration": "adjusted_rand_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster._supervised", + "declaration": "rand_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster._supervised", + "declaration": "completeness_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster._supervised", + "declaration": "contingency_matrix", + "alias": null + }, + { + "module": "sklearn.metrics.cluster._supervised", + "declaration": "pair_confusion_matrix", + "alias": null + }, + { + "module": "sklearn.metrics.cluster._supervised", + "declaration": "expected_mutual_information", + "alias": null + }, + { + "module": "sklearn.metrics.cluster._supervised", + "declaration": "homogeneity_completeness_v_measure", + "alias": null + }, + { + "module": "sklearn.metrics.cluster._supervised", + "declaration": "homogeneity_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster._supervised", + "declaration": "mutual_info_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster._supervised", + "declaration": "v_measure_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster._supervised", + "declaration": "fowlkes_mallows_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster._supervised", + "declaration": "entropy", + "alias": null + }, + { + "module": "sklearn.metrics.cluster._unsupervised", + "declaration": "silhouette_samples", + "alias": null + }, + { + "module": "sklearn.metrics.cluster._unsupervised", + "declaration": "silhouette_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster._unsupervised", + "declaration": "calinski_harabasz_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster._unsupervised", + "declaration": "davies_bouldin_score", + "alias": null + }, + { + "module": "sklearn.metrics.cluster._bicluster", + "declaration": "consensus_score", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._bicluster", + "name": "sklearn.metrics.cluster._bicluster", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "scipy.optimize", + "declaration": "linear_sum_assignment", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_consistent_length", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.metrics.cluster._bicluster/_check_rows_and_columns", + "scikit-learn/sklearn.metrics.cluster._bicluster/_jaccard", + "scikit-learn/sklearn.metrics.cluster._bicluster/_pairwise_similarity", + "scikit-learn/sklearn.metrics.cluster._bicluster/consensus_score" + ] + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised", + "name": "sklearn.metrics.cluster._supervised", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "math", + "declaration": "log", + "alias": null + }, + { + "module": "scipy", + "declaration": "sparse", + "alias": "sp" + }, + { + "module": "sklearn.metrics.cluster._expected_mutual_info_fast", + "declaration": "expected_mutual_information", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "_astype_copy_false", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "type_of_target", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_consistent_length", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.metrics.cluster._supervised/check_clusterings", + "scikit-learn/sklearn.metrics.cluster._supervised/_generalized_average", + "scikit-learn/sklearn.metrics.cluster._supervised/contingency_matrix", + "scikit-learn/sklearn.metrics.cluster._supervised/pair_confusion_matrix", + "scikit-learn/sklearn.metrics.cluster._supervised/rand_score", + "scikit-learn/sklearn.metrics.cluster._supervised/adjusted_rand_score", + "scikit-learn/sklearn.metrics.cluster._supervised/homogeneity_completeness_v_measure", + "scikit-learn/sklearn.metrics.cluster._supervised/homogeneity_score", + "scikit-learn/sklearn.metrics.cluster._supervised/completeness_score", + "scikit-learn/sklearn.metrics.cluster._supervised/v_measure_score", + "scikit-learn/sklearn.metrics.cluster._supervised/mutual_info_score", + "scikit-learn/sklearn.metrics.cluster._supervised/adjusted_mutual_info_score", + "scikit-learn/sklearn.metrics.cluster._supervised/normalized_mutual_info_score", + "scikit-learn/sklearn.metrics.cluster._supervised/fowlkes_mallows_score", + "scikit-learn/sklearn.metrics.cluster._supervised/entropy" + ] + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised", + "name": "sklearn.metrics.cluster._unsupervised", + "imports": [ + { + "module": "functools", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_X_y", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "_safe_indexing", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "pairwise_distances_chunked", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "pairwise_distances", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "LabelEncoder", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.metrics.cluster._unsupervised/check_number_of_labels", + "scikit-learn/sklearn.metrics.cluster._unsupervised/silhouette_score", + "scikit-learn/sklearn.metrics.cluster._unsupervised/_silhouette_reduce", + "scikit-learn/sklearn.metrics.cluster._unsupervised/silhouette_samples", + "scikit-learn/sklearn.metrics.cluster._unsupervised/calinski_harabasz_score", + "scikit-learn/sklearn.metrics.cluster._unsupervised/davies_bouldin_score" + ] + }, + { + "id": "scikit-learn/sklearn.metrics.cluster.setup", + "name": "sklearn.metrics.cluster.setup", + "imports": [ + { + "module": "os", + "alias": null + }, + { + "module": "numpy", + "alias": null + } + ], + "from_imports": [ + { + "module": "numpy.distutils.misc_util", + "declaration": "Configuration", + "alias": null + }, + { + "module": "numpy.distutils.core", + "declaration": "setup", + "alias": null + } + ], + "classes": [], + "functions": ["scikit-learn/sklearn.metrics.cluster.setup/configuration"] + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise", + "name": "sklearn.metrics.pairwise", + "imports": [ + { + "module": "itertools", + "alias": null + }, + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "functools", + "declaration": "partial", + "alias": null + }, + { + "module": "scipy.spatial", + "declaration": "distance", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "csr_matrix", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "issparse", + "alias": null + }, + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "joblib", + "declaration": "effective_n_jobs", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_num_samples", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_non_negative", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "gen_even_slices", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "gen_batches", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "get_chunk_n_rows", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "is_scalar_nan", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "row_norms", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "safe_sparse_dot", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "normalize", + "alias": null + }, + { + "module": "sklearn.utils._mask", + "declaration": "_get_mask", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "sp_version", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "parse_version", + "alias": null + }, + { + "module": "sklearn.metrics._pairwise_fast", + "declaration": "_chi2_kernel_fast", + "alias": null + }, + { + "module": "sklearn.metrics._pairwise_fast", + "declaration": "_sparse_manhattan", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "DataConversionWarning", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.metrics.pairwise/_return_float_dtype", + "scikit-learn/sklearn.metrics.pairwise/check_pairwise_arrays", + "scikit-learn/sklearn.metrics.pairwise/check_paired_arrays", + "scikit-learn/sklearn.metrics.pairwise/euclidean_distances", + "scikit-learn/sklearn.metrics.pairwise/nan_euclidean_distances", + "scikit-learn/sklearn.metrics.pairwise/_euclidean_distances_upcast", + "scikit-learn/sklearn.metrics.pairwise/_argmin_min_reduce", + "scikit-learn/sklearn.metrics.pairwise/pairwise_distances_argmin_min", + "scikit-learn/sklearn.metrics.pairwise/pairwise_distances_argmin", + "scikit-learn/sklearn.metrics.pairwise/haversine_distances", + "scikit-learn/sklearn.metrics.pairwise/manhattan_distances", + "scikit-learn/sklearn.metrics.pairwise/cosine_distances", + "scikit-learn/sklearn.metrics.pairwise/paired_euclidean_distances", + "scikit-learn/sklearn.metrics.pairwise/paired_manhattan_distances", + "scikit-learn/sklearn.metrics.pairwise/paired_cosine_distances", + "scikit-learn/sklearn.metrics.pairwise/paired_distances", + "scikit-learn/sklearn.metrics.pairwise/linear_kernel", + "scikit-learn/sklearn.metrics.pairwise/polynomial_kernel", + "scikit-learn/sklearn.metrics.pairwise/sigmoid_kernel", + "scikit-learn/sklearn.metrics.pairwise/rbf_kernel", + "scikit-learn/sklearn.metrics.pairwise/laplacian_kernel", + "scikit-learn/sklearn.metrics.pairwise/cosine_similarity", + "scikit-learn/sklearn.metrics.pairwise/additive_chi2_kernel", + "scikit-learn/sklearn.metrics.pairwise/chi2_kernel", + "scikit-learn/sklearn.metrics.pairwise/distance_metrics", + "scikit-learn/sklearn.metrics.pairwise/_dist_wrapper", + "scikit-learn/sklearn.metrics.pairwise/_parallel_pairwise", + "scikit-learn/sklearn.metrics.pairwise/_pairwise_callable", + "scikit-learn/sklearn.metrics.pairwise/_check_chunk_size", + "scikit-learn/sklearn.metrics.pairwise/_precompute_metric_params", + "scikit-learn/sklearn.metrics.pairwise/pairwise_distances_chunked", + "scikit-learn/sklearn.metrics.pairwise/pairwise_distances", + "scikit-learn/sklearn.metrics.pairwise/kernel_metrics", + "scikit-learn/sklearn.metrics.pairwise/pairwise_kernels" + ] + }, + { + "id": "scikit-learn/sklearn.metrics.setup", + "name": "sklearn.metrics.setup", + "imports": [ + { + "module": "os", + "alias": null + } + ], + "from_imports": [ + { + "module": "numpy.distutils.misc_util", + "declaration": "Configuration", + "alias": null + }, + { + "module": "numpy.distutils.core", + "declaration": "setup", + "alias": null + } + ], + "classes": [], + "functions": ["scikit-learn/sklearn.metrics.setup/configuration"] + }, + { + "id": "scikit-learn/sklearn.mixture", + "name": "sklearn.mixture", + "imports": [], + "from_imports": [ + { + "module": "sklearn.mixture._gaussian_mixture", + "declaration": "GaussianMixture", + "alias": null + }, + { + "module": "sklearn.mixture._bayesian_mixture", + "declaration": "BayesianGaussianMixture", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.mixture._base", + "name": "sklearn.mixture._base", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "time", + "declaration": "time", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "logsumexp", + "alias": null + }, + { + "module": "sklearn", + "declaration": "cluster", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "DensityMixin", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "ConvergenceWarning", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.mixture._base/BaseMixture"], + "functions": [ + "scikit-learn/sklearn.mixture._base/_check_shape", + "scikit-learn/sklearn.mixture._base/_check_X" + ] + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture", + "name": "sklearn.mixture._bayesian_mixture", + "imports": [ + { + "module": "math", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "scipy.special", + "declaration": "betaln", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "digamma", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "gammaln", + "alias": null + }, + { + "module": "sklearn.mixture._base", + "declaration": "BaseMixture", + "alias": null + }, + { + "module": "sklearn.mixture._base", + "declaration": "_check_shape", + "alias": null + }, + { + "module": "sklearn.mixture._gaussian_mixture", + "declaration": "_check_precision_matrix", + "alias": null + }, + { + "module": "sklearn.mixture._gaussian_mixture", + "declaration": "_check_precision_positivity", + "alias": null + }, + { + "module": "sklearn.mixture._gaussian_mixture", + "declaration": "_compute_log_det_cholesky", + "alias": null + }, + { + "module": "sklearn.mixture._gaussian_mixture", + "declaration": "_compute_precision_cholesky", + "alias": null + }, + { + "module": "sklearn.mixture._gaussian_mixture", + "declaration": "_estimate_gaussian_parameters", + "alias": null + }, + { + "module": "sklearn.mixture._gaussian_mixture", + "declaration": "_estimate_log_gaussian_prob", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture"], + "functions": [ + "scikit-learn/sklearn.mixture._bayesian_mixture/_log_dirichlet_norm", + "scikit-learn/sklearn.mixture._bayesian_mixture/_log_wishart_norm" + ] + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture", + "name": "sklearn.mixture._gaussian_mixture", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "scipy", + "declaration": "linalg", + "alias": null + }, + { + "module": "sklearn.mixture._base", + "declaration": "BaseMixture", + "alias": null + }, + { + "module": "sklearn.mixture._base", + "declaration": "_check_shape", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "row_norms", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture"], + "functions": [ + "scikit-learn/sklearn.mixture._gaussian_mixture/_check_weights", + "scikit-learn/sklearn.mixture._gaussian_mixture/_check_means", + "scikit-learn/sklearn.mixture._gaussian_mixture/_check_precision_positivity", + "scikit-learn/sklearn.mixture._gaussian_mixture/_check_precision_matrix", + "scikit-learn/sklearn.mixture._gaussian_mixture/_check_precisions_full", + "scikit-learn/sklearn.mixture._gaussian_mixture/_check_precisions", + "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_full", + "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_tied", + "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_diag", + "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_spherical", + "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_parameters", + "scikit-learn/sklearn.mixture._gaussian_mixture/_compute_precision_cholesky", + "scikit-learn/sklearn.mixture._gaussian_mixture/_compute_log_det_cholesky", + "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_log_gaussian_prob" + ] + }, + { + "id": "scikit-learn/sklearn.model_selection", + "name": "sklearn.model_selection", + "imports": [ + { + "module": "typing", + "alias": null + } + ], + "from_imports": [ + { + "module": "sklearn.model_selection._split", + "declaration": "BaseCrossValidator", + "alias": null + }, + { + "module": "sklearn.model_selection._split", + "declaration": "KFold", + "alias": null + }, + { + "module": "sklearn.model_selection._split", + "declaration": "GroupKFold", + "alias": null + }, + { + "module": "sklearn.model_selection._split", + "declaration": "StratifiedKFold", + "alias": null + }, + { + "module": "sklearn.model_selection._split", + "declaration": "TimeSeriesSplit", + "alias": null + }, + { + "module": "sklearn.model_selection._split", + "declaration": "LeaveOneGroupOut", + "alias": null + }, + { + "module": "sklearn.model_selection._split", + "declaration": "LeaveOneOut", + "alias": null + }, + { + "module": "sklearn.model_selection._split", + "declaration": "LeavePGroupsOut", + "alias": null + }, + { + "module": "sklearn.model_selection._split", + "declaration": "LeavePOut", + "alias": null + }, + { + "module": "sklearn.model_selection._split", + "declaration": "RepeatedKFold", + "alias": null + }, + { + "module": "sklearn.model_selection._split", + "declaration": "RepeatedStratifiedKFold", + "alias": null + }, + { + "module": "sklearn.model_selection._split", + "declaration": "ShuffleSplit", + "alias": null + }, + { + "module": "sklearn.model_selection._split", + "declaration": "GroupShuffleSplit", + "alias": null + }, + { + "module": "sklearn.model_selection._split", + "declaration": "StratifiedShuffleSplit", + "alias": null + }, + { + "module": "sklearn.model_selection._split", + "declaration": "PredefinedSplit", + "alias": null + }, + { + "module": "sklearn.model_selection._split", + "declaration": "train_test_split", + "alias": null + }, + { + "module": "sklearn.model_selection._split", + "declaration": "check_cv", + "alias": null + }, + { + "module": "sklearn.model_selection._validation", + "declaration": "cross_val_score", + "alias": null + }, + { + "module": "sklearn.model_selection._validation", + "declaration": "cross_val_predict", + "alias": null + }, + { + "module": "sklearn.model_selection._validation", + "declaration": "cross_validate", + "alias": null + }, + { + "module": "sklearn.model_selection._validation", + "declaration": "learning_curve", + "alias": null + }, + { + "module": "sklearn.model_selection._validation", + "declaration": "permutation_test_score", + "alias": null + }, + { + "module": "sklearn.model_selection._validation", + "declaration": "validation_curve", + "alias": null + }, + { + "module": "sklearn.model_selection._search", + "declaration": "GridSearchCV", + "alias": null + }, + { + "module": "sklearn.model_selection._search", + "declaration": "RandomizedSearchCV", + "alias": null + }, + { + "module": "sklearn.model_selection._search", + "declaration": "ParameterGrid", + "alias": null + }, + { + "module": "sklearn.model_selection._search", + "declaration": "ParameterSampler", + "alias": null + }, + { + "module": "sklearn.model_selection._search", + "declaration": "fit_grid_point", + "alias": null + }, + { + "module": "sklearn.model_selection._search_successive_halving", + "declaration": "HalvingGridSearchCV", + "alias": null + }, + { + "module": "sklearn.model_selection._search_successive_halving", + "declaration": "HalvingRandomSearchCV", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.model_selection._search", + "name": "sklearn.model_selection._search", + "imports": [ + { + "module": "numbers", + "alias": null + }, + { + "module": "operator", + "alias": null + }, + { + "module": "time", + "alias": null + }, + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "collections", + "declaration": "defaultdict", + "alias": null + }, + { + "module": "collections.abc", + "declaration": "Mapping", + "alias": null + }, + { + "module": "collections.abc", + "declaration": "Sequence", + "alias": null + }, + { + "module": "collections.abc", + "declaration": "Iterable", + "alias": null + }, + { + "module": "functools", + "declaration": "partial", + "alias": null + }, + { + "module": "functools", + "declaration": "reduce", + "alias": null + }, + { + "module": "itertools", + "declaration": "product", + "alias": null + }, + { + "module": "numpy.ma", + "declaration": "MaskedArray", + "alias": null + }, + { + "module": "scipy.stats", + "declaration": "rankdata", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_classifier", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "clone", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "MetaEstimatorMixin", + "alias": null + }, + { + "module": "sklearn.model_selection._split", + "declaration": "check_cv", + "alias": null + }, + { + "module": "sklearn.model_selection._validation", + "declaration": "_fit_and_score", + "alias": null + }, + { + "module": "sklearn.model_selection._validation", + "declaration": "_aggregate_score_dicts", + "alias": null + }, + { + "module": "sklearn.model_selection._validation", + "declaration": "_insert_error_scores", + "alias": null + }, + { + "module": "sklearn.model_selection._validation", + "declaration": "_normalize_score_results", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "NotFittedError", + "alias": null + }, + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.random", + "declaration": "sample_without_replacement", + "alias": null + }, + { + "module": "sklearn.utils._tags", + "declaration": "_safe_tags", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "indexable", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_fit_params", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.metaestimators", + "declaration": "if_delegate_has_method", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + }, + { + "module": "sklearn.metrics._scorer", + "declaration": "_check_multimetric_scoring", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "check_scoring", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "deprecated", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.model_selection._search/ParameterGrid", + "scikit-learn/sklearn.model_selection._search/ParameterSampler", + "scikit-learn/sklearn.model_selection._search/BaseSearchCV", + "scikit-learn/sklearn.model_selection._search/GridSearchCV", + "scikit-learn/sklearn.model_selection._search/RandomizedSearchCV" + ], + "functions": [ + "scikit-learn/sklearn.model_selection._search/fit_grid_point", + "scikit-learn/sklearn.model_selection._search/_check_param_grid" + ] + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving", + "name": "sklearn.model_selection._search_successive_halving", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "math", + "declaration": "ceil", + "alias": null + }, + { + "module": "math", + "declaration": "floor", + "alias": null + }, + { + "module": "math", + "declaration": "log", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "numbers", + "declaration": "Integral", + "alias": null + }, + { + "module": "sklearn.model_selection._search", + "declaration": "_check_param_grid", + "alias": null + }, + { + "module": "sklearn.model_selection._search", + "declaration": "BaseSearchCV", + "alias": null + }, + { + "module": "sklearn.model_selection", + "declaration": "ParameterGrid", + "alias": null + }, + { + "module": "sklearn.model_selection", + "declaration": "ParameterSampler", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_num_samples", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_classifier", + "alias": null + }, + { + "module": "sklearn.model_selection._split", + "declaration": "check_cv", + "alias": null + }, + { + "module": "sklearn.model_selection._split", + "declaration": "_yields_constant_splits", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "resample", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.model_selection._search_successive_halving/_SubsampleMetaSplitter", + "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving", + "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingGridSearchCV", + "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingRandomSearchCV" + ], + "functions": [ + "scikit-learn/sklearn.model_selection._search_successive_halving/_refit_callable", + "scikit-learn/sklearn.model_selection._search_successive_halving/_top_k" + ] + }, + { + "id": "scikit-learn/sklearn.model_selection._split", + "name": "sklearn.model_selection._split", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numbers", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "collections.abc", + "declaration": "Iterable", + "alias": null + }, + { + "module": "itertools", + "declaration": "chain", + "alias": null + }, + { + "module": "itertools", + "declaration": "combinations", + "alias": null + }, + { + "module": "math", + "declaration": "ceil", + "alias": null + }, + { + "module": "math", + "declaration": "floor", + "alias": null + }, + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "inspect", + "declaration": "signature", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "comb", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "indexable", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "_safe_indexing", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "_approximate_mode", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_num_samples", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "column_or_1d", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "type_of_target", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "_pprint", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.model_selection._split/BaseCrossValidator", + "scikit-learn/sklearn.model_selection._split/LeaveOneOut", + "scikit-learn/sklearn.model_selection._split/LeavePOut", + "scikit-learn/sklearn.model_selection._split/_BaseKFold", + "scikit-learn/sklearn.model_selection._split/KFold", + "scikit-learn/sklearn.model_selection._split/GroupKFold", + "scikit-learn/sklearn.model_selection._split/StratifiedKFold", + "scikit-learn/sklearn.model_selection._split/TimeSeriesSplit", + "scikit-learn/sklearn.model_selection._split/LeaveOneGroupOut", + "scikit-learn/sklearn.model_selection._split/LeavePGroupsOut", + "scikit-learn/sklearn.model_selection._split/_RepeatedSplits", + "scikit-learn/sklearn.model_selection._split/RepeatedKFold", + "scikit-learn/sklearn.model_selection._split/RepeatedStratifiedKFold", + "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit", + "scikit-learn/sklearn.model_selection._split/ShuffleSplit", + "scikit-learn/sklearn.model_selection._split/GroupShuffleSplit", + "scikit-learn/sklearn.model_selection._split/StratifiedShuffleSplit", + "scikit-learn/sklearn.model_selection._split/PredefinedSplit", + "scikit-learn/sklearn.model_selection._split/_CVIterableWrapper" + ], + "functions": [ + "scikit-learn/sklearn.model_selection._split/_validate_shuffle_split", + "scikit-learn/sklearn.model_selection._split/check_cv", + "scikit-learn/sklearn.model_selection._split/train_test_split", + "scikit-learn/sklearn.model_selection._split/_build_repr", + "scikit-learn/sklearn.model_selection._split/_yields_constant_splits" + ] + }, + { + "id": "scikit-learn/sklearn.model_selection._validation", + "name": "sklearn.model_selection._validation", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numbers", + "alias": null + }, + { + "module": "time", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": "sp" + } + ], + "from_imports": [ + { + "module": "traceback", + "declaration": "format_exc", + "alias": null + }, + { + "module": "contextlib", + "declaration": "suppress", + "alias": null + }, + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "joblib", + "declaration": "logger", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_classifier", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "clone", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "indexable", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "_safe_indexing", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_fit_params", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_num_samples", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + }, + { + "module": "sklearn.utils.metaestimators", + "declaration": "_safe_split", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "check_scoring", + "alias": null + }, + { + "module": "sklearn.metrics._scorer", + "declaration": "_check_multimetric_scoring", + "alias": null + }, + { + "module": "sklearn.metrics._scorer", + "declaration": "_MultimetricScorer", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "FitFailedWarning", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "NotFittedError", + "alias": null + }, + { + "module": "sklearn.model_selection._split", + "declaration": "check_cv", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "LabelEncoder", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.model_selection._validation/cross_validate", + "scikit-learn/sklearn.model_selection._validation/_insert_error_scores", + "scikit-learn/sklearn.model_selection._validation/_normalize_score_results", + "scikit-learn/sklearn.model_selection._validation/cross_val_score", + "scikit-learn/sklearn.model_selection._validation/_fit_and_score", + "scikit-learn/sklearn.model_selection._validation/_score", + "scikit-learn/sklearn.model_selection._validation/cross_val_predict", + "scikit-learn/sklearn.model_selection._validation/_fit_and_predict", + "scikit-learn/sklearn.model_selection._validation/_enforce_prediction_order", + "scikit-learn/sklearn.model_selection._validation/_check_is_permutation", + "scikit-learn/sklearn.model_selection._validation/permutation_test_score", + "scikit-learn/sklearn.model_selection._validation/_permutation_test_score", + "scikit-learn/sklearn.model_selection._validation/_shuffle", + "scikit-learn/sklearn.model_selection._validation/learning_curve", + "scikit-learn/sklearn.model_selection._validation/_translate_train_sizes", + "scikit-learn/sklearn.model_selection._validation/_incremental_fit_estimator", + "scikit-learn/sklearn.model_selection._validation/validation_curve", + "scikit-learn/sklearn.model_selection._validation/_aggregate_score_dicts" + ] + }, + { + "id": "scikit-learn/sklearn.multiclass", + "name": "sklearn.multiclass", + "imports": [ + { + "module": "array", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "warnings", + "alias": null + }, + { + "module": "scipy.sparse", + "alias": "sp" + }, + { + "module": "itertools", + "alias": null + } + ], + "from_imports": [ + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClassifierMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "clone", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_classifier", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "MultiOutputMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "MetaEstimatorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_regressor", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "_is_pairwise", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "LabelBinarizer", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "euclidean_distances", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.deprecation", + "declaration": "deprecated", + "alias": null + }, + { + "module": "sklearn.utils._tags", + "declaration": "_safe_tags", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_num_samples", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_X_y", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "_check_partial_fit_first_call", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "check_classification_targets", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "_ovr_decision_function", + "alias": null + }, + { + "module": "sklearn.utils.metaestimators", + "declaration": "_safe_split", + "alias": null + }, + { + "module": "sklearn.utils.metaestimators", + "declaration": "if_delegate_has_method", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "NotFittedError", + "alias": null + }, + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.multiclass/_ConstantPredictor", + "scikit-learn/sklearn.multiclass/OneVsRestClassifier", + "scikit-learn/sklearn.multiclass/OneVsOneClassifier", + "scikit-learn/sklearn.multiclass/OutputCodeClassifier" + ], + "functions": [ + "scikit-learn/sklearn.multiclass/_fit_binary", + "scikit-learn/sklearn.multiclass/_partial_fit_binary", + "scikit-learn/sklearn.multiclass/_predict_binary", + "scikit-learn/sklearn.multiclass/_check_estimator", + "scikit-learn/sklearn.multiclass/_fit_ovo_binary", + "scikit-learn/sklearn.multiclass/_partial_fit_ovo_binary" + ] + }, + { + "id": "scikit-learn/sklearn.multioutput", + "name": "sklearn.multioutput", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": "sp" + } + ], + "from_imports": [ + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "clone", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "MetaEstimatorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClassifierMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_classifier", + "alias": null + }, + { + "module": "sklearn.model_selection", + "declaration": "cross_val_predict", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_X_y", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.metaestimators", + "declaration": "if_delegate_has_method", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "has_fit_parameter", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_fit_params", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "check_classification_targets", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.multioutput/_MultiOutputEstimator", + "scikit-learn/sklearn.multioutput/MultiOutputRegressor", + "scikit-learn/sklearn.multioutput/MultiOutputClassifier", + "scikit-learn/sklearn.multioutput/_BaseChain", + "scikit-learn/sklearn.multioutput/ClassifierChain", + "scikit-learn/sklearn.multioutput/RegressorChain" + ], + "functions": [ + "scikit-learn/sklearn.multioutput/_fit_estimator", + "scikit-learn/sklearn.multioutput/_partial_fit_estimator" + ] + }, + { + "id": "scikit-learn/sklearn.naive_bayes", + "name": "sklearn.naive_bayes", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "logsumexp", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClassifierMixin", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "binarize", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "LabelBinarizer", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "label_binarize", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_X_y", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "deprecated", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "safe_sparse_dot", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "_check_partial_fit_first_call", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_non_negative", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "column_or_1d", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.naive_bayes/_BaseNB", + "scikit-learn/sklearn.naive_bayes/GaussianNB", + "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB", + "scikit-learn/sklearn.naive_bayes/MultinomialNB", + "scikit-learn/sklearn.naive_bayes/ComplementNB", + "scikit-learn/sklearn.naive_bayes/BernoulliNB", + "scikit-learn/sklearn.naive_bayes/CategoricalNB" + ], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.neighbors", + "name": "sklearn.neighbors", + "imports": [], + "from_imports": [ + { + "module": "sklearn.neighbors._ball_tree", + "declaration": "BallTree", + "alias": null + }, + { + "module": "sklearn.neighbors._kd_tree", + "declaration": "KDTree", + "alias": null + }, + { + "module": "sklearn.neighbors._dist_metrics", + "declaration": "DistanceMetric", + "alias": null + }, + { + "module": "sklearn.neighbors._graph", + "declaration": "kneighbors_graph", + "alias": null + }, + { + "module": "sklearn.neighbors._graph", + "declaration": "radius_neighbors_graph", + "alias": null + }, + { + "module": "sklearn.neighbors._graph", + "declaration": "KNeighborsTransformer", + "alias": null + }, + { + "module": "sklearn.neighbors._graph", + "declaration": "RadiusNeighborsTransformer", + "alias": null + }, + { + "module": "sklearn.neighbors._unsupervised", + "declaration": "NearestNeighbors", + "alias": null + }, + { + "module": "sklearn.neighbors._classification", + "declaration": "KNeighborsClassifier", + "alias": null + }, + { + "module": "sklearn.neighbors._classification", + "declaration": "RadiusNeighborsClassifier", + "alias": null + }, + { + "module": "sklearn.neighbors._regression", + "declaration": "KNeighborsRegressor", + "alias": null + }, + { + "module": "sklearn.neighbors._regression", + "declaration": "RadiusNeighborsRegressor", + "alias": null + }, + { + "module": "sklearn.neighbors._nearest_centroid", + "declaration": "NearestCentroid", + "alias": null + }, + { + "module": "sklearn.neighbors._kde", + "declaration": "KernelDensity", + "alias": null + }, + { + "module": "sklearn.neighbors._lof", + "declaration": "LocalOutlierFactor", + "alias": null + }, + { + "module": "sklearn.neighbors._nca", + "declaration": "NeighborhoodComponentsAnalysis", + "alias": null + }, + { + "module": "sklearn.neighbors._base", + "declaration": "VALID_METRICS", + "alias": null + }, + { + "module": "sklearn.neighbors._base", + "declaration": "VALID_METRICS_SPARSE", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.neighbors._base", + "name": "sklearn.neighbors._base", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numbers", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "joblib", + "alias": null + } + ], + "from_imports": [ + { + "module": "functools", + "declaration": "partial", + "alias": null + }, + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "csr_matrix", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "issparse", + "alias": null + }, + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "joblib", + "declaration": "effective_n_jobs", + "alias": null + }, + { + "module": "sklearn.neighbors._ball_tree", + "declaration": "BallTree", + "alias": null + }, + { + "module": "sklearn.neighbors._kd_tree", + "declaration": "KDTree", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "MultiOutputMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_classifier", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "pairwise_distances_chunked", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "PAIRWISE_DISTANCE_FUNCTIONS", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "gen_even_slices", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "_to_object_array", + "alias": null + }, + { + "module": "sklearn.utils.deprecation", + "declaration": "deprecated", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "check_classification_targets", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_non_negative", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "parse_version", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "DataConversionWarning", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "EfficiencyWarning", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.neighbors._base/NeighborsBase", + "scikit-learn/sklearn.neighbors._base/KNeighborsMixin", + "scikit-learn/sklearn.neighbors._base/RadiusNeighborsMixin" + ], + "functions": [ + "scikit-learn/sklearn.neighbors._base/_check_weights", + "scikit-learn/sklearn.neighbors._base/_get_weights", + "scikit-learn/sklearn.neighbors._base/_is_sorted_by_data", + "scikit-learn/sklearn.neighbors._base/_check_precomputed", + "scikit-learn/sklearn.neighbors._base/_kneighbors_from_graph", + "scikit-learn/sklearn.neighbors._base/_radius_neighbors_from_graph", + "scikit-learn/sklearn.neighbors._base/_tree_query_parallel_helper", + "scikit-learn/sklearn.neighbors._base/_tree_query_radius_parallel_helper" + ] + }, + { + "id": "scikit-learn/sklearn.neighbors._classification", + "name": "sklearn.neighbors._classification", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "warnings", + "alias": null + } + ], + "from_imports": [ + { + "module": "scipy", + "declaration": "stats", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "weighted_mode", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_is_arraylike", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_num_samples", + "alias": null + }, + { + "module": "sklearn.neighbors._base", + "declaration": "_check_weights", + "alias": null + }, + { + "module": "sklearn.neighbors._base", + "declaration": "_get_weights", + "alias": null + }, + { + "module": "sklearn.neighbors._base", + "declaration": "NeighborsBase", + "alias": null + }, + { + "module": "sklearn.neighbors._base", + "declaration": "KNeighborsMixin", + "alias": null + }, + { + "module": "sklearn.neighbors._base", + "declaration": "RadiusNeighborsMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClassifierMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier", + "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier" + ], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.neighbors._graph", + "name": "sklearn.neighbors._graph", + "imports": [], + "from_imports": [ + { + "module": "sklearn.neighbors._base", + "declaration": "KNeighborsMixin", + "alias": null + }, + { + "module": "sklearn.neighbors._base", + "declaration": "RadiusNeighborsMixin", + "alias": null + }, + { + "module": "sklearn.neighbors._base", + "declaration": "NeighborsBase", + "alias": null + }, + { + "module": "sklearn.neighbors._unsupervised", + "declaration": "NearestNeighbors", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer", + "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer" + ], + "functions": [ + "scikit-learn/sklearn.neighbors._graph/_check_params", + "scikit-learn/sklearn.neighbors._graph/_query_include_self", + "scikit-learn/sklearn.neighbors._graph/kneighbors_graph", + "scikit-learn/sklearn.neighbors._graph/radius_neighbors_graph" + ] + }, + { + "id": "scikit-learn/sklearn.neighbors._kde", + "name": "sklearn.neighbors._kde", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "scipy.special", + "declaration": "gammainc", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "row_norms", + "alias": null + }, + { + "module": "sklearn.neighbors._ball_tree", + "declaration": "BallTree", + "alias": null + }, + { + "module": "sklearn.neighbors._ball_tree", + "declaration": "DTYPE", + "alias": null + }, + { + "module": "sklearn.neighbors._kd_tree", + "declaration": "KDTree", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.neighbors._kde/KernelDensity"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.neighbors._lof", + "name": "sklearn.neighbors._lof", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "warnings", + "alias": null + } + ], + "from_imports": [ + { + "module": "sklearn.neighbors._base", + "declaration": "NeighborsBase", + "alias": null + }, + { + "module": "sklearn.neighbors._base", + "declaration": "KNeighborsMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "OutlierMixin", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.neighbors._nca", + "name": "sklearn.neighbors._nca", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "sys", + "alias": null + }, + { + "module": "time", + "alias": null + }, + { + "module": "numbers", + "alias": null + } + ], + "from_imports": [ + { + "module": "__future__", + "declaration": "print_function", + "alias": null + }, + { + "module": "warnings", + "declaration": "warn", + "alias": null + }, + { + "module": "scipy.optimize", + "declaration": "minimize", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "softmax", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "pairwise_distances", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "LabelEncoder", + "alias": null + }, + { + "module": "sklearn.decomposition", + "declaration": "PCA", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "check_classification_targets", + "alias": null + }, + { + "module": "sklearn.utils.random", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_scalar", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "ConvergenceWarning", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.neighbors._nearest_centroid", + "name": "sklearn.neighbors._nearest_centroid", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "scipy", + "declaration": "sparse", + "alias": "sp" + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClassifierMixin", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "pairwise_distances", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "LabelEncoder", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.sparsefuncs", + "declaration": "csc_median_axis_0", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "check_classification_targets", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.neighbors._nearest_centroid/NearestCentroid"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.neighbors._regression", + "name": "sklearn.neighbors._regression", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.neighbors._base", + "declaration": "_get_weights", + "alias": null + }, + { + "module": "sklearn.neighbors._base", + "declaration": "_check_weights", + "alias": null + }, + { + "module": "sklearn.neighbors._base", + "declaration": "NeighborsBase", + "alias": null + }, + { + "module": "sklearn.neighbors._base", + "declaration": "KNeighborsMixin", + "alias": null + }, + { + "module": "sklearn.neighbors._base", + "declaration": "RadiusNeighborsMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.deprecation", + "declaration": "deprecated", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor", + "scikit-learn/sklearn.neighbors._regression/RadiusNeighborsRegressor" + ], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.neighbors._unsupervised", + "name": "sklearn.neighbors._unsupervised", + "imports": [], + "from_imports": [ + { + "module": "sklearn.neighbors._base", + "declaration": "NeighborsBase", + "alias": null + }, + { + "module": "sklearn.neighbors._base", + "declaration": "KNeighborsMixin", + "alias": null + }, + { + "module": "sklearn.neighbors._base", + "declaration": "RadiusNeighborsMixin", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.neighbors._unsupervised/NearestNeighbors"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.neighbors.setup", + "name": "sklearn.neighbors.setup", + "imports": [ + { + "module": "os", + "alias": null + } + ], + "from_imports": [], + "classes": [], + "functions": ["scikit-learn/sklearn.neighbors.setup/configuration"] + }, + { + "id": "scikit-learn/sklearn.neural_network", + "name": "sklearn.neural_network", + "imports": [], + "from_imports": [ + { + "module": "sklearn.neural_network._rbm", + "declaration": "BernoulliRBM", + "alias": null + }, + { + "module": "sklearn.neural_network._multilayer_perceptron", + "declaration": "MLPClassifier", + "alias": null + }, + { + "module": "sklearn.neural_network._multilayer_perceptron", + "declaration": "MLPRegressor", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.neural_network._base", + "name": "sklearn.neural_network._base", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "scipy.special", + "declaration": "expit", + "alias": "logistic_sigmoid" + }, + { + "module": "scipy.special", + "declaration": "xlogy", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.neural_network._base/inplace_identity", + "scikit-learn/sklearn.neural_network._base/inplace_logistic", + "scikit-learn/sklearn.neural_network._base/inplace_tanh", + "scikit-learn/sklearn.neural_network._base/inplace_relu", + "scikit-learn/sklearn.neural_network._base/inplace_softmax", + "scikit-learn/sklearn.neural_network._base/inplace_identity_derivative", + "scikit-learn/sklearn.neural_network._base/inplace_logistic_derivative", + "scikit-learn/sklearn.neural_network._base/inplace_tanh_derivative", + "scikit-learn/sklearn.neural_network._base/inplace_relu_derivative", + "scikit-learn/sklearn.neural_network._base/squared_loss", + "scikit-learn/sklearn.neural_network._base/log_loss", + "scikit-learn/sklearn.neural_network._base/binary_log_loss" + ] + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron", + "name": "sklearn.neural_network._multilayer_perceptron", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "warnings", + "alias": null + }, + { + "module": "scipy.optimize", + "alias": null + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClassifierMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_classifier", + "alias": null + }, + { + "module": "sklearn.neural_network._base", + "declaration": "ACTIVATIONS", + "alias": null + }, + { + "module": "sklearn.neural_network._base", + "declaration": "DERIVATIVES", + "alias": null + }, + { + "module": "sklearn.neural_network._base", + "declaration": "LOSS_FUNCTIONS", + "alias": null + }, + { + "module": "sklearn.neural_network._stochastic_optimizers", + "declaration": "SGDOptimizer", + "alias": null + }, + { + "module": "sklearn.neural_network._stochastic_optimizers", + "declaration": "AdamOptimizer", + "alias": null + }, + { + "module": "sklearn.model_selection", + "declaration": "train_test_split", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "LabelBinarizer", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "gen_batches", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "shuffle", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "_safe_indexing", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "column_or_1d", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "ConvergenceWarning", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "safe_sparse_dot", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "_check_partial_fit_first_call", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "unique_labels", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "type_of_target", + "alias": null + }, + { + "module": "sklearn.utils.optimize", + "declaration": "_check_optimize_result", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor" + ], + "functions": ["scikit-learn/sklearn.neural_network._multilayer_perceptron/_pack"] + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm", + "name": "sklearn.neural_network._rbm", + "imports": [ + { + "module": "time", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": "sp" + } + ], + "from_imports": [ + { + "module": "scipy.special", + "declaration": "expit", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "gen_even_slices", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "safe_sparse_dot", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "log_logistic", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.neural_network._rbm/BernoulliRBM"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers", + "name": "sklearn.neural_network._stochastic_optimizers", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [], + "classes": [ + "scikit-learn/sklearn.neural_network._stochastic_optimizers/BaseOptimizer", + "scikit-learn/sklearn.neural_network._stochastic_optimizers/SGDOptimizer", + "scikit-learn/sklearn.neural_network._stochastic_optimizers/AdamOptimizer" + ], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.pipeline", + "name": "sklearn.pipeline", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "collections", + "declaration": "defaultdict", + "alias": null + }, + { + "module": "itertools", + "declaration": "islice", + "alias": null + }, + { + "module": "scipy", + "declaration": "sparse", + "alias": null + }, + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "clone", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.utils._estimator_html_repr", + "declaration": "_VisualBlock", + "alias": null + }, + { + "module": "sklearn.utils.metaestimators", + "declaration": "if_delegate_has_method", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "Bunch", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "_print_elapsed_time", + "alias": null + }, + { + "module": "sklearn.utils.deprecation", + "declaration": "deprecated", + "alias": null + }, + { + "module": "sklearn.utils._tags", + "declaration": "_safe_tags", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_memory", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "delayed", + "alias": null + }, + { + "module": "sklearn.utils.metaestimators", + "declaration": "_BaseComposition", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.pipeline/Pipeline", "scikit-learn/sklearn.pipeline/FeatureUnion"], + "functions": [ + "scikit-learn/sklearn.pipeline/_name_estimators", + "scikit-learn/sklearn.pipeline/make_pipeline", + "scikit-learn/sklearn.pipeline/_transform_one", + "scikit-learn/sklearn.pipeline/_fit_transform_one", + "scikit-learn/sklearn.pipeline/_fit_one", + "scikit-learn/sklearn.pipeline/make_union" + ] + }, + { + "id": "scikit-learn/sklearn.preprocessing", + "name": "sklearn.preprocessing", + "imports": [], + "from_imports": [ + { + "module": "sklearn.preprocessing._function_transformer", + "declaration": "FunctionTransformer", + "alias": null + }, + { + "module": "sklearn.preprocessing._data", + "declaration": "Binarizer", + "alias": null + }, + { + "module": "sklearn.preprocessing._data", + "declaration": "KernelCenterer", + "alias": null + }, + { + "module": "sklearn.preprocessing._data", + "declaration": "MinMaxScaler", + "alias": null + }, + { + "module": "sklearn.preprocessing._data", + "declaration": "MaxAbsScaler", + "alias": null + }, + { + "module": "sklearn.preprocessing._data", + "declaration": "Normalizer", + "alias": null + }, + { + "module": "sklearn.preprocessing._data", + "declaration": "RobustScaler", + "alias": null + }, + { + "module": "sklearn.preprocessing._data", + "declaration": "StandardScaler", + "alias": null + }, + { + "module": "sklearn.preprocessing._data", + "declaration": "QuantileTransformer", + "alias": null + }, + { + "module": "sklearn.preprocessing._data", + "declaration": "add_dummy_feature", + "alias": null + }, + { + "module": "sklearn.preprocessing._data", + "declaration": "binarize", + "alias": null + }, + { + "module": "sklearn.preprocessing._data", + "declaration": "normalize", + "alias": null + }, + { + "module": "sklearn.preprocessing._data", + "declaration": "scale", + "alias": null + }, + { + "module": "sklearn.preprocessing._data", + "declaration": "robust_scale", + "alias": null + }, + { + "module": "sklearn.preprocessing._data", + "declaration": "maxabs_scale", + "alias": null + }, + { + "module": "sklearn.preprocessing._data", + "declaration": "minmax_scale", + "alias": null + }, + { + "module": "sklearn.preprocessing._data", + "declaration": "quantile_transform", + "alias": null + }, + { + "module": "sklearn.preprocessing._data", + "declaration": "power_transform", + "alias": null + }, + { + "module": "sklearn.preprocessing._data", + "declaration": "PowerTransformer", + "alias": null + }, + { + "module": "sklearn.preprocessing._data", + "declaration": "PolynomialFeatures", + "alias": null + }, + { + "module": "sklearn.preprocessing._encoders", + "declaration": "OneHotEncoder", + "alias": null + }, + { + "module": "sklearn.preprocessing._encoders", + "declaration": "OrdinalEncoder", + "alias": null + }, + { + "module": "sklearn.preprocessing._label", + "declaration": "label_binarize", + "alias": null + }, + { + "module": "sklearn.preprocessing._label", + "declaration": "LabelBinarizer", + "alias": null + }, + { + "module": "sklearn.preprocessing._label", + "declaration": "LabelEncoder", + "alias": null + }, + { + "module": "sklearn.preprocessing._label", + "declaration": "MultiLabelBinarizer", + "alias": null + }, + { + "module": "sklearn.preprocessing._discretization", + "declaration": "KBinsDiscretizer", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.preprocessing._data", + "name": "sklearn.preprocessing._data", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "itertools", + "declaration": "chain", + "alias": null + }, + { + "module": "itertools", + "declaration": "combinations", + "alias": null + }, + { + "module": "itertools", + "declaration": "combinations_with_replacement", + "alias": "combinations_w_r" + }, + { + "module": "scipy", + "declaration": "sparse", + "alias": null + }, + { + "module": "scipy", + "declaration": "stats", + "alias": null + }, + { + "module": "scipy", + "declaration": "optimize", + "alias": null + }, + { + "module": "scipy.special", + "declaration": "boxcox", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.deprecation", + "declaration": "deprecated", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "row_norms", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "_incremental_mean_and_var", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "_incremental_weighted_mean_and_var", + "alias": null + }, + { + "module": "sklearn.utils.sparsefuncs_fast", + "declaration": "inplace_csr_row_normalize_l1", + "alias": null + }, + { + "module": "sklearn.utils.sparsefuncs_fast", + "declaration": "inplace_csr_row_normalize_l2", + "alias": null + }, + { + "module": "sklearn.utils.sparsefuncs", + "declaration": "inplace_column_scale", + "alias": null + }, + { + "module": "sklearn.utils.sparsefuncs", + "declaration": "mean_variance_axis", + "alias": null + }, + { + "module": "sklearn.utils.sparsefuncs", + "declaration": "incr_mean_variance_axis", + "alias": null + }, + { + "module": "sklearn.utils.sparsefuncs", + "declaration": "min_max_axis", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "FLOAT_DTYPES", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.preprocessing._csr_polynomial_expansion", + "declaration": "_csr_polynomial_expansion", + "alias": null + }, + { + "module": "sklearn.preprocessing._encoders", + "declaration": "OneHotEncoder", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.preprocessing._data/MinMaxScaler", + "scikit-learn/sklearn.preprocessing._data/StandardScaler", + "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler", + "scikit-learn/sklearn.preprocessing._data/RobustScaler", + "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures", + "scikit-learn/sklearn.preprocessing._data/Normalizer", + "scikit-learn/sklearn.preprocessing._data/Binarizer", + "scikit-learn/sklearn.preprocessing._data/KernelCenterer", + "scikit-learn/sklearn.preprocessing._data/QuantileTransformer", + "scikit-learn/sklearn.preprocessing._data/PowerTransformer" + ], + "functions": [ + "scikit-learn/sklearn.preprocessing._data/_handle_zeros_in_scale", + "scikit-learn/sklearn.preprocessing._data/scale", + "scikit-learn/sklearn.preprocessing._data/minmax_scale", + "scikit-learn/sklearn.preprocessing._data/maxabs_scale", + "scikit-learn/sklearn.preprocessing._data/robust_scale", + "scikit-learn/sklearn.preprocessing._data/normalize", + "scikit-learn/sklearn.preprocessing._data/binarize", + "scikit-learn/sklearn.preprocessing._data/add_dummy_feature", + "scikit-learn/sklearn.preprocessing._data/quantile_transform", + "scikit-learn/sklearn.preprocessing._data/power_transform" + ] + }, + { + "id": "scikit-learn/sklearn.preprocessing._discretization", + "name": "sklearn.preprocessing._discretization", + "imports": [ + { + "module": "numbers", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "warnings", + "alias": null + } + ], + "from_imports": [ + { + "module": "sklearn.preprocessing", + "declaration": "OneHotEncoder", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer"], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders", + "name": "sklearn.preprocessing._encoders", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "numbers", + "alias": null + } + ], + "from_imports": [ + { + "module": "scipy", + "declaration": "sparse", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "is_scalar_nan", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils._encode", + "declaration": "_encode", + "alias": null + }, + { + "module": "sklearn.utils._encode", + "declaration": "_check_unknown", + "alias": null + }, + { + "module": "sklearn.utils._encode", + "declaration": "_unique", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder", + "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder", + "scikit-learn/sklearn.preprocessing._encoders/OrdinalEncoder" + ], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer", + "name": "sklearn.preprocessing._function_transformer", + "imports": [ + { + "module": "warnings", + "alias": null + } + ], + "from_imports": [ + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_allclose_dense_sparse", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer"], + "functions": ["scikit-learn/sklearn.preprocessing._function_transformer/_identity"] + }, + { + "id": "scikit-learn/sklearn.preprocessing._label", + "name": "sklearn.preprocessing._label", + "imports": [ + { + "module": "itertools", + "alias": null + }, + { + "module": "array", + "alias": null + }, + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": "sp" + } + ], + "from_imports": [ + { + "module": "collections", + "declaration": "defaultdict", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.utils.sparsefuncs", + "declaration": "min_max_axis", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "column_or_1d", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_num_samples", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "unique_labels", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "type_of_target", + "alias": null + }, + { + "module": "sklearn.utils._encode", + "declaration": "_encode", + "alias": null + }, + { + "module": "sklearn.utils._encode", + "declaration": "_unique", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.preprocessing._label/LabelEncoder", + "scikit-learn/sklearn.preprocessing._label/LabelBinarizer", + "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer" + ], + "functions": [ + "scikit-learn/sklearn.preprocessing._label/label_binarize", + "scikit-learn/sklearn.preprocessing._label/_inverse_binarize_multiclass", + "scikit-learn/sklearn.preprocessing._label/_inverse_binarize_thresholding" + ] + }, + { + "id": "scikit-learn/sklearn.preprocessing.setup", + "name": "sklearn.preprocessing.setup", + "imports": [ + { + "module": "os", + "alias": null + } + ], + "from_imports": [], + "classes": [], + "functions": ["scikit-learn/sklearn.preprocessing.setup/configuration"] + }, + { + "id": "scikit-learn/sklearn.random_projection", + "name": "sklearn.random_projection", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": "sp" + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "TransformerMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "safe_sparse_dot", + "alias": null + }, + { + "module": "sklearn.utils.random", + "declaration": "sample_without_replacement", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "DataDimensionalityWarning", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.random_projection/BaseRandomProjection", + "scikit-learn/sklearn.random_projection/GaussianRandomProjection", + "scikit-learn/sklearn.random_projection/SparseRandomProjection" + ], + "functions": [ + "scikit-learn/sklearn.random_projection/johnson_lindenstrauss_min_dim", + "scikit-learn/sklearn.random_projection/_check_density", + "scikit-learn/sklearn.random_projection/_check_input_size", + "scikit-learn/sklearn.random_projection/_gaussian_random_matrix", + "scikit-learn/sklearn.random_projection/_sparse_random_matrix" + ] + }, + { + "id": "scikit-learn/sklearn.semi_supervised", + "name": "sklearn.semi_supervised", + "imports": [], + "from_imports": [ + { + "module": "sklearn.semi_supervised._label_propagation", + "declaration": "LabelPropagation", + "alias": null + }, + { + "module": "sklearn.semi_supervised._label_propagation", + "declaration": "LabelSpreading", + "alias": null + }, + { + "module": "sklearn.semi_supervised._self_training", + "declaration": "SelfTrainingClassifier", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation", + "name": "sklearn.semi_supervised._label_propagation", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "scipy", + "declaration": "sparse", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "csgraph", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClassifierMixin", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "rbf_kernel", + "alias": null + }, + { + "module": "sklearn.neighbors", + "declaration": "NearestNeighbors", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "safe_sparse_dot", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "check_classification_targets", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "ConvergenceWarning", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation", + "scikit-learn/sklearn.semi_supervised._label_propagation/LabelPropagation", + "scikit-learn/sklearn.semi_supervised._label_propagation/LabelSpreading" + ], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.semi_supervised._self_training", + "name": "sklearn.semi_supervised._self_training", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.base", + "declaration": "MetaEstimatorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "clone", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.metaestimators", + "declaration": "if_delegate_has_method", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "safe_mask", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier"], + "functions": ["scikit-learn/sklearn.semi_supervised._self_training/_validate_estimator"] + }, + { + "id": "scikit-learn/sklearn.setup", + "name": "sklearn.setup", + "imports": [ + { + "module": "sys", + "alias": null + }, + { + "module": "os", + "alias": null + } + ], + "from_imports": [ + { + "module": "sklearn._build_utils", + "declaration": "cythonize_extensions", + "alias": null + }, + { + "module": "numpy.distutils.core", + "declaration": "setup", + "alias": null + } + ], + "classes": [], + "functions": ["scikit-learn/sklearn.setup/configuration"] + }, + { + "id": "scikit-learn/sklearn.svm", + "name": "sklearn.svm", + "imports": [], + "from_imports": [ + { + "module": "sklearn.svm._classes", + "declaration": "SVC", + "alias": null + }, + { + "module": "sklearn.svm._classes", + "declaration": "NuSVC", + "alias": null + }, + { + "module": "sklearn.svm._classes", + "declaration": "SVR", + "alias": null + }, + { + "module": "sklearn.svm._classes", + "declaration": "NuSVR", + "alias": null + }, + { + "module": "sklearn.svm._classes", + "declaration": "OneClassSVM", + "alias": null + }, + { + "module": "sklearn.svm._classes", + "declaration": "LinearSVC", + "alias": null + }, + { + "module": "sklearn.svm._classes", + "declaration": "LinearSVR", + "alias": null + }, + { + "module": "sklearn.svm._bounds", + "declaration": "l1_min_c", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.svm._base", + "name": "sklearn.svm._base", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": "sp" + }, + { + "module": "warnings", + "alias": null + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "sklearn.svm", + "declaration": "_libsvm", + "alias": "libsvm" + }, + { + "module": "sklearn.svm", + "declaration": "_liblinear", + "alias": "liblinear" + }, + { + "module": "sklearn.svm", + "declaration": "_libsvm_sparse", + "alias": "libsvm_sparse" + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClassifierMixin", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "LabelEncoder", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "_ovr_decision_function", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "column_or_1d", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "compute_class_weight", + "alias": null + }, + { + "module": "sklearn.utils.deprecation", + "declaration": "deprecated", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "safe_sparse_dot", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_large_sparse", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_num_samples", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_consistent_length", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "check_classification_targets", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "ConvergenceWarning", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "NotFittedError", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.svm._base/BaseLibSVM", "scikit-learn/sklearn.svm._base/BaseSVC"], + "functions": [ + "scikit-learn/sklearn.svm._base/_one_vs_one_coef", + "scikit-learn/sklearn.svm._base/_get_liblinear_solver_type", + "scikit-learn/sklearn.svm._base/_fit_liblinear" + ] + }, + { + "id": "scikit-learn/sklearn.svm._bounds", + "name": "sklearn.svm._bounds", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.preprocessing", + "declaration": "LabelBinarizer", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_consistent_length", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.extmath", + "declaration": "safe_sparse_dot", + "alias": null + } + ], + "classes": [], + "functions": ["scikit-learn/sklearn.svm._bounds/l1_min_c"] + }, + { + "id": "scikit-learn/sklearn.svm._classes", + "name": "sklearn.svm._classes", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.svm._base", + "declaration": "_fit_liblinear", + "alias": null + }, + { + "module": "sklearn.svm._base", + "declaration": "BaseSVC", + "alias": null + }, + { + "module": "sklearn.svm._base", + "declaration": "BaseLibSVM", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "OutlierMixin", + "alias": null + }, + { + "module": "sklearn.linear_model._base", + "declaration": "LinearClassifierMixin", + "alias": null + }, + { + "module": "sklearn.linear_model._base", + "declaration": "SparseCoefMixin", + "alias": null + }, + { + "module": "sklearn.linear_model._base", + "declaration": "LinearModel", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_num_samples", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "check_classification_targets", + "alias": null + }, + { + "module": "sklearn.utils.deprecation", + "declaration": "deprecated", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.svm._classes/LinearSVC", + "scikit-learn/sklearn.svm._classes/LinearSVR", + "scikit-learn/sklearn.svm._classes/SVC", + "scikit-learn/sklearn.svm._classes/NuSVC", + "scikit-learn/sklearn.svm._classes/SVR", + "scikit-learn/sklearn.svm._classes/NuSVR", + "scikit-learn/sklearn.svm._classes/OneClassSVM" + ], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.svm.setup", + "name": "sklearn.svm.setup", + "imports": [ + { + "module": "os", + "alias": null + }, + { + "module": "numpy", + "alias": null + } + ], + "from_imports": [ + { + "module": "os.path", + "declaration": "join", + "alias": null + }, + { + "module": "numpy.distutils.core", + "declaration": "setup", + "alias": null + } + ], + "classes": [], + "functions": ["scikit-learn/sklearn.svm.setup/configuration"] + }, + { + "id": "scikit-learn/sklearn.tree", + "name": "sklearn.tree", + "imports": [], + "from_imports": [ + { + "module": "sklearn.tree._classes", + "declaration": "BaseDecisionTree", + "alias": null + }, + { + "module": "sklearn.tree._classes", + "declaration": "DecisionTreeClassifier", + "alias": null + }, + { + "module": "sklearn.tree._classes", + "declaration": "DecisionTreeRegressor", + "alias": null + }, + { + "module": "sklearn.tree._classes", + "declaration": "ExtraTreeClassifier", + "alias": null + }, + { + "module": "sklearn.tree._classes", + "declaration": "ExtraTreeRegressor", + "alias": null + }, + { + "module": "sklearn.tree._export", + "declaration": "export_graphviz", + "alias": null + }, + { + "module": "sklearn.tree._export", + "declaration": "plot_tree", + "alias": null + }, + { + "module": "sklearn.tree._export", + "declaration": "export_text", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.tree._classes", + "name": "sklearn.tree._classes", + "imports": [ + { + "module": "numbers", + "alias": null + }, + { + "module": "warnings", + "alias": null + }, + { + "module": "copy", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "math", + "declaration": "ceil", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "issparse", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClassifierMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "clone", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_classifier", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "MultiOutputMixin", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "Bunch", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "compute_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "check_classification_targets", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.tree._criterion", + "declaration": "Criterion", + "alias": null + }, + { + "module": "sklearn.tree._splitter", + "declaration": "Splitter", + "alias": null + }, + { + "module": "sklearn.tree._tree", + "declaration": "DepthFirstTreeBuilder", + "alias": null + }, + { + "module": "sklearn.tree._tree", + "declaration": "BestFirstTreeBuilder", + "alias": null + }, + { + "module": "sklearn.tree._tree", + "declaration": "Tree", + "alias": null + }, + { + "module": "sklearn.tree._tree", + "declaration": "_build_pruned_tree_ccp", + "alias": null + }, + { + "module": "sklearn.tree._tree", + "declaration": "ccp_pruning_path", + "alias": null + }, + { + "module": "sklearn.tree", + "declaration": "_tree", + "alias": null + }, + { + "module": "sklearn.tree", + "declaration": "_splitter", + "alias": null + }, + { + "module": "sklearn.tree", + "declaration": "_criterion", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.tree._classes/BaseDecisionTree", + "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier", + "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor", + "scikit-learn/sklearn.tree._classes/ExtraTreeClassifier", + "scikit-learn/sklearn.tree._classes/ExtraTreeRegressor" + ], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.tree._export", + "name": "sklearn.tree._export", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "warnings", + "alias": null + } + ], + "from_imports": [ + { + "module": "io", + "declaration": "StringIO", + "alias": null + }, + { + "module": "numbers", + "declaration": "Integral", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_classifier", + "alias": null + }, + { + "module": "sklearn.tree", + "declaration": "_criterion", + "alias": null + }, + { + "module": "sklearn.tree", + "declaration": "_tree", + "alias": null + }, + { + "module": "sklearn.tree._reingold_tilford", + "declaration": "buchheim", + "alias": null + }, + { + "module": "sklearn.tree._reingold_tilford", + "declaration": "Tree", + "alias": null + }, + { + "module": "sklearn.tree", + "declaration": "DecisionTreeClassifier", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.tree._export/Sentinel", + "scikit-learn/sklearn.tree._export/_BaseTreeExporter", + "scikit-learn/sklearn.tree._export/_DOTTreeExporter", + "scikit-learn/sklearn.tree._export/_MPLTreeExporter" + ], + "functions": [ + "scikit-learn/sklearn.tree._export/_color_brew", + "scikit-learn/sklearn.tree._export/plot_tree", + "scikit-learn/sklearn.tree._export/export_graphviz", + "scikit-learn/sklearn.tree._export/_compute_depth", + "scikit-learn/sklearn.tree._export/export_text" + ] + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford", + "name": "sklearn.tree._reingold_tilford", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [], + "classes": [ + "scikit-learn/sklearn.tree._reingold_tilford/DrawTree", + "scikit-learn/sklearn.tree._reingold_tilford/Tree" + ], + "functions": [ + "scikit-learn/sklearn.tree._reingold_tilford/buchheim", + "scikit-learn/sklearn.tree._reingold_tilford/third_walk", + "scikit-learn/sklearn.tree._reingold_tilford/first_walk", + "scikit-learn/sklearn.tree._reingold_tilford/apportion", + "scikit-learn/sklearn.tree._reingold_tilford/move_subtree", + "scikit-learn/sklearn.tree._reingold_tilford/execute_shifts", + "scikit-learn/sklearn.tree._reingold_tilford/ancestor", + "scikit-learn/sklearn.tree._reingold_tilford/second_walk" + ] + }, + { + "id": "scikit-learn/sklearn.tree.setup", + "name": "sklearn.tree.setup", + "imports": [ + { + "module": "os", + "alias": null + }, + { + "module": "numpy", + "alias": null + } + ], + "from_imports": [ + { + "module": "numpy.distutils.misc_util", + "declaration": "Configuration", + "alias": null + }, + { + "module": "numpy.distutils.core", + "declaration": "setup", + "alias": null + } + ], + "classes": [], + "functions": ["scikit-learn/sklearn.tree.setup/configuration"] + }, + { + "id": "scikit-learn/sklearn.utils", + "name": "sklearn.utils", + "imports": [ + { + "module": "pkgutil", + "alias": null + }, + { + "module": "inspect", + "alias": null + }, + { + "module": "numbers", + "alias": null + }, + { + "module": "platform", + "alias": null + }, + { + "module": "struct", + "alias": null + }, + { + "module": "timeit", + "alias": null + }, + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "importlib", + "declaration": "import_module", + "alias": null + }, + { + "module": "operator", + "declaration": "itemgetter", + "alias": null + }, + { + "module": "collections.abc", + "declaration": "Sequence", + "alias": null + }, + { + "module": "contextlib", + "declaration": "contextmanager", + "alias": null + }, + { + "module": "itertools", + "declaration": "compress", + "alias": null + }, + { + "module": "itertools", + "declaration": "islice", + "alias": null + }, + { + "module": "pathlib", + "declaration": "Path", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "issparse", + "alias": null + }, + { + "module": "sklearn.utils.murmurhash", + "declaration": "murmurhash3_32", + "alias": null + }, + { + "module": "sklearn.utils.class_weight", + "declaration": "compute_class_weight", + "alias": null + }, + { + "module": "sklearn.utils.class_weight", + "declaration": "compute_sample_weight", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "_joblib", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "DataConversionWarning", + "alias": null + }, + { + "module": "sklearn.utils.deprecation", + "declaration": "deprecated", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "np_version", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "parse_version", + "alias": null + }, + { + "module": "sklearn.utils._estimator_html_repr", + "declaration": "estimator_html_repr", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "as_float_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "assert_all_finite", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "column_or_1d", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_consistent_length", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_X_y", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "indexable", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_symmetric", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_scalar", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn", + "declaration": "get_config", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.utils/Bunch"], + "functions": [ + "scikit-learn/sklearn.utils/safe_mask", + "scikit-learn/sklearn.utils/axis0_safe_slice", + "scikit-learn/sklearn.utils/_array_indexing", + "scikit-learn/sklearn.utils/_pandas_indexing", + "scikit-learn/sklearn.utils/_list_indexing", + "scikit-learn/sklearn.utils/_determine_key_type", + "scikit-learn/sklearn.utils/_safe_indexing", + "scikit-learn/sklearn.utils/_get_column_indices", + "scikit-learn/sklearn.utils/resample", + "scikit-learn/sklearn.utils/shuffle", + "scikit-learn/sklearn.utils/safe_sqr", + "scikit-learn/sklearn.utils/_chunk_generator", + "scikit-learn/sklearn.utils/gen_batches", + "scikit-learn/sklearn.utils/gen_even_slices", + "scikit-learn/sklearn.utils/tosequence", + "scikit-learn/sklearn.utils/_to_object_array", + "scikit-learn/sklearn.utils/indices_to_mask", + "scikit-learn/sklearn.utils/_message_with_time", + "scikit-learn/sklearn.utils/_print_elapsed_time", + "scikit-learn/sklearn.utils/get_chunk_n_rows", + "scikit-learn/sklearn.utils/is_scalar_nan", + "scikit-learn/sklearn.utils/_approximate_mode", + "scikit-learn/sklearn.utils/check_matplotlib_support", + "scikit-learn/sklearn.utils/check_pandas_support", + "scikit-learn/sklearn.utils/all_estimators" + ] + }, + { + "id": "scikit-learn/sklearn.utils._arpack", + "name": "sklearn.utils._arpack", + "imports": [], + "from_imports": [ + { + "module": "sklearn.utils.validation", + "declaration": "check_random_state", + "alias": null + } + ], + "classes": [], + "functions": ["scikit-learn/sklearn.utils._arpack/_init_arpack_v0"] + }, + { + "id": "scikit-learn/sklearn.utils._encode", + "name": "sklearn.utils._encode", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "typing", + "declaration": "NamedTuple", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "is_scalar_nan", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.utils._encode/MissingValues", + "scikit-learn/sklearn.utils._encode/_nandict" + ], + "functions": [ + "scikit-learn/sklearn.utils._encode/_unique", + "scikit-learn/sklearn.utils._encode/_extract_missing", + "scikit-learn/sklearn.utils._encode/_map_to_integer", + "scikit-learn/sklearn.utils._encode/_unique_python", + "scikit-learn/sklearn.utils._encode/_encode", + "scikit-learn/sklearn.utils._encode/_check_unknown" + ] + }, + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr", + "name": "sklearn.utils._estimator_html_repr", + "imports": [ + { + "module": "uuid", + "alias": null + }, + { + "module": "html", + "alias": null + } + ], + "from_imports": [ + { + "module": "contextlib", + "declaration": "closing", + "alias": null + }, + { + "module": "contextlib", + "declaration": "suppress", + "alias": null + }, + { + "module": "io", + "declaration": "StringIO", + "alias": null + }, + { + "module": "string", + "declaration": "Template", + "alias": null + }, + { + "module": "sklearn", + "declaration": "config_context", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.utils._estimator_html_repr/_VisualBlock"], + "functions": [ + "scikit-learn/sklearn.utils._estimator_html_repr/_write_label_html", + "scikit-learn/sklearn.utils._estimator_html_repr/_get_visual_block", + "scikit-learn/sklearn.utils._estimator_html_repr/_write_estimator_html", + "scikit-learn/sklearn.utils._estimator_html_repr/estimator_html_repr" + ] + }, + { + "id": "scikit-learn/sklearn.utils._joblib", + "name": "sklearn.utils._joblib", + "imports": [ + { + "module": "warnings", + "alias": "_warnings" + }, + { + "module": "joblib", + "alias": null + } + ], + "from_imports": [ + { + "module": "joblib", + "declaration": "logger", + "alias": null + }, + { + "module": "joblib", + "declaration": "dump", + "alias": null + }, + { + "module": "joblib", + "declaration": "load", + "alias": null + }, + { + "module": "joblib", + "declaration": "__version__", + "alias": null + }, + { + "module": "joblib", + "declaration": "effective_n_jobs", + "alias": null + }, + { + "module": "joblib", + "declaration": "hash", + "alias": null + }, + { + "module": "joblib", + "declaration": "cpu_count", + "alias": null + }, + { + "module": "joblib", + "declaration": "Parallel", + "alias": null + }, + { + "module": "joblib", + "declaration": "Memory", + "alias": null + }, + { + "module": "joblib", + "declaration": "delayed", + "alias": null + }, + { + "module": "joblib", + "declaration": "parallel_backend", + "alias": null + }, + { + "module": "joblib", + "declaration": "register_parallel_backend", + "alias": null + } + ], + "classes": [], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.utils._mask", + "name": "sklearn.utils._mask", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "scipy", + "declaration": "sparse", + "alias": "sp" + }, + { + "module": "sklearn.utils", + "declaration": "is_scalar_nan", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "_object_dtype_isnan", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.utils._mask/_get_dense_mask", + "scikit-learn/sklearn.utils._mask/_get_mask" + ] + }, + { + "id": "scikit-learn/sklearn.utils._mocking", + "name": "sklearn.utils._mocking", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClassifierMixin", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_num_samples", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.utils._mocking/ArraySlicingWrapper", + "scikit-learn/sklearn.utils._mocking/MockDataFrame", + "scikit-learn/sklearn.utils._mocking/CheckingClassifier", + "scikit-learn/sklearn.utils._mocking/NoSampleWeightWrapper" + ], + "functions": [] + }, + { + "id": "scikit-learn/sklearn.utils._pprint", + "name": "sklearn.utils._pprint", + "imports": [ + { + "module": "inspect", + "alias": null + }, + { + "module": "pprint", + "alias": null + } + ], + "from_imports": [ + { + "module": "collections", + "declaration": "OrderedDict", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn._config", + "declaration": "get_config", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "is_scalar_nan", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.utils._pprint/KeyValTuple", + "scikit-learn/sklearn.utils._pprint/KeyValTupleParam", + "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter" + ], + "functions": [ + "scikit-learn/sklearn.utils._pprint/_changed_params", + "scikit-learn/sklearn.utils._pprint/_safe_repr" + ] + }, + { + "id": "scikit-learn/sklearn.utils._show_versions", + "name": "sklearn.utils._show_versions", + "imports": [ + { + "module": "platform", + "alias": null + }, + { + "module": "sys", + "alias": null + }, + { + "module": "importlib", + "alias": null + } + ], + "from_imports": [ + { + "module": "sklearn.utils._openmp_helpers", + "declaration": "_openmp_parallelism_enabled", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.utils._show_versions/_get_sys_info", + "scikit-learn/sklearn.utils._show_versions/_get_deps_info", + "scikit-learn/sklearn.utils._show_versions/show_versions" + ] + }, + { + "id": "scikit-learn/sklearn.utils._tags", + "name": "sklearn.utils._tags", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [], + "classes": [], + "functions": ["scikit-learn/sklearn.utils._tags/_safe_tags"] + }, + { + "id": "scikit-learn/sklearn.utils._testing", + "name": "sklearn.utils._testing", + "imports": [ + { + "module": "os", + "alias": null + }, + { + "module": "os.path", + "alias": "op" + }, + { + "module": "inspect", + "alias": null + }, + { + "module": "warnings", + "alias": null + }, + { + "module": "sys", + "alias": null + }, + { + "module": "functools", + "alias": null + }, + { + "module": "tempfile", + "alias": null + }, + { + "module": "re", + "alias": null + }, + { + "module": "contextlib", + "alias": null + }, + { + "module": "scipy", + "alias": "sp" + }, + { + "module": "shutil", + "alias": null + }, + { + "module": "atexit", + "alias": null + }, + { + "module": "unittest", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "joblib", + "alias": null + }, + { + "module": "sklearn", + "alias": null + }, + { + "module": "pytest", + "alias": null + } + ], + "from_imports": [ + { + "module": "subprocess", + "declaration": "check_output", + "alias": null + }, + { + "module": "subprocess", + "declaration": "STDOUT", + "alias": null + }, + { + "module": "subprocess", + "declaration": "CalledProcessError", + "alias": null + }, + { + "module": "subprocess", + "declaration": "TimeoutExpired", + "alias": null + }, + { + "module": "collections.abc", + "declaration": "Iterable", + "alias": null + }, + { + "module": "functools", + "declaration": "wraps", + "alias": null + }, + { + "module": "inspect", + "declaration": "signature", + "alias": null + }, + { + "module": "unittest", + "declaration": "TestCase", + "alias": null + }, + { + "module": "numpy.testing", + "declaration": "assert_allclose", + "alias": null + }, + { + "module": "numpy.testing", + "declaration": "assert_almost_equal", + "alias": null + }, + { + "module": "numpy.testing", + "declaration": "assert_approx_equal", + "alias": null + }, + { + "module": "numpy.testing", + "declaration": "assert_array_equal", + "alias": null + }, + { + "module": "numpy.testing", + "declaration": "assert_array_almost_equal", + "alias": null + }, + { + "module": "numpy.testing", + "declaration": "assert_array_less", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "IS_PYPY", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "_IS_32BIT", + "alias": null + }, + { + "module": "sklearn.utils.multiclass", + "declaration": "check_classification_targets", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_is_fitted", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_X_y", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.utils._testing/_IgnoreWarnings", + "scikit-learn/sklearn.utils._testing/TempMemmap", + "scikit-learn/sklearn.utils._testing/_Raises", + "scikit-learn/sklearn.utils._testing/MinimalClassifier", + "scikit-learn/sklearn.utils._testing/MinimalRegressor", + "scikit-learn/sklearn.utils._testing/MinimalTransformer" + ], + "functions": [ + "scikit-learn/sklearn.utils._testing/assert_warns", + "scikit-learn/sklearn.utils._testing/assert_warns_message", + "scikit-learn/sklearn.utils._testing/assert_warns_div0", + "scikit-learn/sklearn.utils._testing/assert_no_warnings", + "scikit-learn/sklearn.utils._testing/ignore_warnings", + "scikit-learn/sklearn.utils._testing/assert_raise_message", + "scikit-learn/sklearn.utils._testing/assert_allclose_dense_sparse", + "scikit-learn/sklearn.utils._testing/set_random_state", + "scikit-learn/sklearn.utils._testing/check_skip_network", + "scikit-learn/sklearn.utils._testing/_delete_folder", + "scikit-learn/sklearn.utils._testing/create_memmap_backed_data", + "scikit-learn/sklearn.utils._testing/_get_args", + "scikit-learn/sklearn.utils._testing/_get_func_name", + "scikit-learn/sklearn.utils._testing/check_docstring_parameters", + "scikit-learn/sklearn.utils._testing/assert_run_python_script", + "scikit-learn/sklearn.utils._testing/_convert_container", + "scikit-learn/sklearn.utils._testing/raises" + ] + }, + { + "id": "scikit-learn/sklearn.utils.class_weight", + "name": "sklearn.utils.class_weight", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.utils.class_weight/compute_class_weight", + "scikit-learn/sklearn.utils.class_weight/compute_sample_weight" + ] + }, + { + "id": "scikit-learn/sklearn.utils.deprecation", + "name": "sklearn.utils.deprecation", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "functools", + "alias": null + } + ], + "from_imports": [], + "classes": ["scikit-learn/sklearn.utils.deprecation/deprecated"], + "functions": ["scikit-learn/sklearn.utils.deprecation/_is_deprecated"] + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks", + "name": "sklearn.utils.estimator_checks", + "imports": [ + { + "module": "types", + "alias": null + }, + { + "module": "warnings", + "alias": null + }, + { + "module": "pickle", + "alias": null + }, + { + "module": "re", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "joblib", + "alias": null + } + ], + "from_imports": [ + { + "module": "copy", + "declaration": "deepcopy", + "alias": null + }, + { + "module": "functools", + "declaration": "partial", + "alias": null + }, + { + "module": "functools", + "declaration": "wraps", + "alias": null + }, + { + "module": "inspect", + "declaration": "signature", + "alias": null + }, + { + "module": "scipy", + "declaration": "sparse", + "alias": null + }, + { + "module": "scipy.stats", + "declaration": "rankdata", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "IS_PYPY", + "alias": null + }, + { + "module": "sklearn", + "declaration": "config_context", + "alias": null + }, + { + "module": "sklearn.utils._testing", + "declaration": "_get_args", + "alias": null + }, + { + "module": "sklearn.utils._testing", + "declaration": "assert_raise_message", + "alias": null + }, + { + "module": "sklearn.utils._testing", + "declaration": "assert_array_equal", + "alias": null + }, + { + "module": "sklearn.utils._testing", + "declaration": "assert_array_almost_equal", + "alias": null + }, + { + "module": "sklearn.utils._testing", + "declaration": "assert_allclose", + "alias": null + }, + { + "module": "sklearn.utils._testing", + "declaration": "assert_allclose_dense_sparse", + "alias": null + }, + { + "module": "sklearn.utils._testing", + "declaration": "set_random_state", + "alias": null + }, + { + "module": "sklearn.utils._testing", + "declaration": "SkipTest", + "alias": null + }, + { + "module": "sklearn.utils._testing", + "declaration": "ignore_warnings", + "alias": null + }, + { + "module": "sklearn.utils._testing", + "declaration": "create_memmap_backed_data", + "alias": null + }, + { + "module": "sklearn.utils._testing", + "declaration": "raises", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "is_scalar_nan", + "alias": null + }, + { + "module": "sklearn.linear_model", + "declaration": "LogisticRegression", + "alias": null + }, + { + "module": "sklearn.linear_model", + "declaration": "Ridge", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "clone", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "ClusterMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_classifier", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_regressor", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "is_outlier_detector", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "RegressorMixin", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "_is_pairwise", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "accuracy_score", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "adjusted_rand_score", + "alias": null + }, + { + "module": "sklearn.metrics", + "declaration": "f1_score", + "alias": null + }, + { + "module": "sklearn.random_projection", + "declaration": "BaseRandomProjection", + "alias": null + }, + { + "module": "sklearn.feature_selection", + "declaration": "SelectKBest", + "alias": null + }, + { + "module": "sklearn.pipeline", + "declaration": "make_pipeline", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "DataConversionWarning", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "NotFittedError", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "SkipTestWarning", + "alias": null + }, + { + "module": "sklearn.model_selection", + "declaration": "train_test_split", + "alias": null + }, + { + "module": "sklearn.model_selection", + "declaration": "ShuffleSplit", + "alias": null + }, + { + "module": "sklearn.model_selection._validation", + "declaration": "_safe_split", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "rbf_kernel", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "linear_kernel", + "alias": null + }, + { + "module": "sklearn.metrics.pairwise", + "declaration": "pairwise_distances", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "shuffle", + "alias": null + }, + { + "module": "sklearn.utils._tags", + "declaration": "_DEFAULT_TAGS", + "alias": null + }, + { + "module": "sklearn.utils._tags", + "declaration": "_safe_tags", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "has_fit_parameter", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_num_samples", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "StandardScaler", + "alias": null + }, + { + "module": "sklearn.preprocessing", + "declaration": "scale", + "alias": null + }, + { + "module": "sklearn.datasets", + "declaration": "load_iris", + "alias": null + }, + { + "module": "sklearn.datasets", + "declaration": "make_blobs", + "alias": null + }, + { + "module": "sklearn.datasets", + "declaration": "make_multilabel_classification", + "alias": null + }, + { + "module": "sklearn.datasets", + "declaration": "make_regression", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.utils.estimator_checks/_NotAnArray"], + "functions": [ + "scikit-learn/sklearn.utils.estimator_checks/_yield_checks", + "scikit-learn/sklearn.utils.estimator_checks/_yield_classifier_checks", + "scikit-learn/sklearn.utils.estimator_checks/check_supervised_y_no_nan", + "scikit-learn/sklearn.utils.estimator_checks/_yield_regressor_checks", + "scikit-learn/sklearn.utils.estimator_checks/_yield_transformer_checks", + "scikit-learn/sklearn.utils.estimator_checks/_yield_clustering_checks", + "scikit-learn/sklearn.utils.estimator_checks/_yield_outliers_checks", + "scikit-learn/sklearn.utils.estimator_checks/_yield_all_checks", + "scikit-learn/sklearn.utils.estimator_checks/_get_check_estimator_ids", + "scikit-learn/sklearn.utils.estimator_checks/_construct_instance", + "scikit-learn/sklearn.utils.estimator_checks/_maybe_mark_xfail", + "scikit-learn/sklearn.utils.estimator_checks/_maybe_skip", + "scikit-learn/sklearn.utils.estimator_checks/_should_be_skipped_or_marked", + "scikit-learn/sklearn.utils.estimator_checks/parametrize_with_checks", + "scikit-learn/sklearn.utils.estimator_checks/check_estimator", + "scikit-learn/sklearn.utils.estimator_checks/_regression_dataset", + "scikit-learn/sklearn.utils.estimator_checks/_set_checking_parameters", + "scikit-learn/sklearn.utils.estimator_checks/_is_pairwise_metric", + "scikit-learn/sklearn.utils.estimator_checks/_pairwise_estimator_convert_X", + "scikit-learn/sklearn.utils.estimator_checks/_generate_sparse_matrix", + "scikit-learn/sklearn.utils.estimator_checks/check_estimator_sparse_data", + "scikit-learn/sklearn.utils.estimator_checks/check_sample_weights_pandas_series", + "scikit-learn/sklearn.utils.estimator_checks/check_sample_weights_not_an_array", + "scikit-learn/sklearn.utils.estimator_checks/check_sample_weights_list", + "scikit-learn/sklearn.utils.estimator_checks/check_sample_weights_shape", + "scikit-learn/sklearn.utils.estimator_checks/check_sample_weights_invariance", + "scikit-learn/sklearn.utils.estimator_checks/check_dtype_object", + "scikit-learn/sklearn.utils.estimator_checks/check_complex_data", + "scikit-learn/sklearn.utils.estimator_checks/check_dict_unchanged", + "scikit-learn/sklearn.utils.estimator_checks/_is_public_parameter", + "scikit-learn/sklearn.utils.estimator_checks/check_dont_overwrite_parameters", + "scikit-learn/sklearn.utils.estimator_checks/check_fit2d_predict1d", + "scikit-learn/sklearn.utils.estimator_checks/_apply_on_subsets", + "scikit-learn/sklearn.utils.estimator_checks/check_methods_subset_invariance", + "scikit-learn/sklearn.utils.estimator_checks/check_methods_sample_order_invariance", + "scikit-learn/sklearn.utils.estimator_checks/check_fit2d_1sample", + "scikit-learn/sklearn.utils.estimator_checks/check_fit2d_1feature", + "scikit-learn/sklearn.utils.estimator_checks/check_fit1d", + "scikit-learn/sklearn.utils.estimator_checks/check_transformer_general", + "scikit-learn/sklearn.utils.estimator_checks/check_transformer_data_not_an_array", + "scikit-learn/sklearn.utils.estimator_checks/check_transformers_unfitted", + "scikit-learn/sklearn.utils.estimator_checks/_check_transformer", + "scikit-learn/sklearn.utils.estimator_checks/check_pipeline_consistency", + "scikit-learn/sklearn.utils.estimator_checks/check_fit_score_takes_y", + "scikit-learn/sklearn.utils.estimator_checks/check_estimators_dtypes", + "scikit-learn/sklearn.utils.estimator_checks/check_transformer_preserve_dtypes", + "scikit-learn/sklearn.utils.estimator_checks/check_estimators_empty_data_messages", + "scikit-learn/sklearn.utils.estimator_checks/check_estimators_nan_inf", + "scikit-learn/sklearn.utils.estimator_checks/check_nonsquare_error", + "scikit-learn/sklearn.utils.estimator_checks/check_estimators_pickle", + "scikit-learn/sklearn.utils.estimator_checks/check_estimators_partial_fit_n_features", + "scikit-learn/sklearn.utils.estimator_checks/check_classifier_multioutput", + "scikit-learn/sklearn.utils.estimator_checks/check_regressor_multioutput", + "scikit-learn/sklearn.utils.estimator_checks/check_clustering", + "scikit-learn/sklearn.utils.estimator_checks/check_clusterer_compute_labels_predict", + "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_one_label", + "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_train", + "scikit-learn/sklearn.utils.estimator_checks/check_outlier_corruption", + "scikit-learn/sklearn.utils.estimator_checks/check_outliers_train", + "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_multilabel_representation_invariance", + "scikit-learn/sklearn.utils.estimator_checks/check_estimators_fit_returns_self", + "scikit-learn/sklearn.utils.estimator_checks/check_estimators_unfitted", + "scikit-learn/sklearn.utils.estimator_checks/check_supervised_y_2d", + "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_predictions", + "scikit-learn/sklearn.utils.estimator_checks/_choose_check_classifiers_labels", + "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_classes", + "scikit-learn/sklearn.utils.estimator_checks/check_regressors_int", + "scikit-learn/sklearn.utils.estimator_checks/check_regressors_train", + "scikit-learn/sklearn.utils.estimator_checks/check_regressors_no_decision_function", + "scikit-learn/sklearn.utils.estimator_checks/check_class_weight_classifiers", + "scikit-learn/sklearn.utils.estimator_checks/check_class_weight_balanced_classifiers", + "scikit-learn/sklearn.utils.estimator_checks/check_class_weight_balanced_linear_classifier", + "scikit-learn/sklearn.utils.estimator_checks/check_estimators_overwrite_params", + "scikit-learn/sklearn.utils.estimator_checks/check_no_attributes_set_in_init", + "scikit-learn/sklearn.utils.estimator_checks/check_sparsify_coefficients", + "scikit-learn/sklearn.utils.estimator_checks/check_classifier_data_not_an_array", + "scikit-learn/sklearn.utils.estimator_checks/check_regressor_data_not_an_array", + "scikit-learn/sklearn.utils.estimator_checks/check_estimators_data_not_an_array", + "scikit-learn/sklearn.utils.estimator_checks/check_parameters_default_constructible", + "scikit-learn/sklearn.utils.estimator_checks/_enforce_estimator_tags_y", + "scikit-learn/sklearn.utils.estimator_checks/_enforce_estimator_tags_x", + "scikit-learn/sklearn.utils.estimator_checks/check_non_transformer_estimators_n_iter", + "scikit-learn/sklearn.utils.estimator_checks/check_transformer_n_iter", + "scikit-learn/sklearn.utils.estimator_checks/check_get_params_invariance", + "scikit-learn/sklearn.utils.estimator_checks/check_set_params", + "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_regression_target", + "scikit-learn/sklearn.utils.estimator_checks/check_decision_proba_consistency", + "scikit-learn/sklearn.utils.estimator_checks/check_outliers_fit_predict", + "scikit-learn/sklearn.utils.estimator_checks/check_fit_non_negative", + "scikit-learn/sklearn.utils.estimator_checks/check_fit_idempotent", + "scikit-learn/sklearn.utils.estimator_checks/check_n_features_in", + "scikit-learn/sklearn.utils.estimator_checks/check_requires_y_none", + "scikit-learn/sklearn.utils.estimator_checks/check_n_features_in_after_fitting", + "scikit-learn/sklearn.utils.estimator_checks/check_estimator_get_tags_default_keys" + ] + }, + { + "id": "scikit-learn/sklearn.utils.extmath", + "name": "sklearn.utils.extmath", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "scipy", + "declaration": "linalg", + "alias": null + }, + { + "module": "scipy", + "declaration": "sparse", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils._logistic_sigmoid", + "declaration": "_log_logistic_sigmoid", + "alias": null + }, + { + "module": "sklearn.utils.sparsefuncs_fast", + "declaration": "csr_row_norms", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.utils.extmath/squared_norm", + "scikit-learn/sklearn.utils.extmath/row_norms", + "scikit-learn/sklearn.utils.extmath/fast_logdet", + "scikit-learn/sklearn.utils.extmath/density", + "scikit-learn/sklearn.utils.extmath/safe_sparse_dot", + "scikit-learn/sklearn.utils.extmath/randomized_range_finder", + "scikit-learn/sklearn.utils.extmath/randomized_svd", + "scikit-learn/sklearn.utils.extmath/weighted_mode", + "scikit-learn/sklearn.utils.extmath/cartesian", + "scikit-learn/sklearn.utils.extmath/svd_flip", + "scikit-learn/sklearn.utils.extmath/log_logistic", + "scikit-learn/sklearn.utils.extmath/softmax", + "scikit-learn/sklearn.utils.extmath/make_nonnegative", + "scikit-learn/sklearn.utils.extmath/_safe_accumulator_op", + "scikit-learn/sklearn.utils.extmath/_incremental_weighted_mean_and_var", + "scikit-learn/sklearn.utils.extmath/_incremental_mean_and_var", + "scikit-learn/sklearn.utils.extmath/_deterministic_vector_sign_flip", + "scikit-learn/sklearn.utils.extmath/stable_cumsum" + ] + }, + { + "id": "scikit-learn/sklearn.utils.fixes", + "name": "sklearn.utils.fixes", + "imports": [ + { + "module": "functools", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": "sp" + }, + { + "module": "scipy", + "alias": null + } + ], + "from_imports": [ + { + "module": "pkg_resources", + "declaration": "parse_version", + "alias": null + }, + { + "module": "functools", + "declaration": "update_wrapper", + "alias": null + }, + { + "module": "distutils.version", + "declaration": "LooseVersion", + "alias": null + }, + { + "module": "scipy.sparse.linalg", + "declaration": "lsqr", + "alias": "sparse_lsqr" + }, + { + "module": "numpy.ma", + "declaration": "MaskedArray", + "alias": "_MaskedArray" + }, + { + "module": "sklearn._config", + "declaration": "config_context", + "alias": null + }, + { + "module": "sklearn._config", + "declaration": "get_config", + "alias": null + }, + { + "module": "sklearn.utils.deprecation", + "declaration": "deprecated", + "alias": null + }, + { + "module": "scipy.sparse.linalg", + "declaration": "lobpcg", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.utils.fixes/loguniform", + "scikit-learn/sklearn.utils.fixes/MaskedArray", + "scikit-learn/sklearn.utils.fixes/_FuncWrapper" + ], + "functions": [ + "scikit-learn/sklearn.utils.fixes/_object_dtype_isnan", + "scikit-learn/sklearn.utils.fixes/_astype_copy_false", + "scikit-learn/sklearn.utils.fixes/_joblib_parallel_args", + "scikit-learn/sklearn.utils.fixes/_take_along_axis", + "scikit-learn/sklearn.utils.fixes/delayed" + ] + }, + { + "id": "scikit-learn/sklearn.utils.graph", + "name": "sklearn.utils.graph", + "imports": [], + "from_imports": [ + { + "module": "scipy", + "declaration": "sparse", + "alias": null + }, + { + "module": "sklearn.utils.graph_shortest_path", + "declaration": "graph_shortest_path", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + } + ], + "classes": [], + "functions": ["scikit-learn/sklearn.utils.graph/single_source_shortest_path_length"] + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators", + "name": "sklearn.utils.metaestimators", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "typing", + "declaration": "List", + "alias": null + }, + { + "module": "typing", + "declaration": "Any", + "alias": null + }, + { + "module": "abc", + "declaration": "ABCMeta", + "alias": null + }, + { + "module": "abc", + "declaration": "abstractmethod", + "alias": null + }, + { + "module": "operator", + "declaration": "attrgetter", + "alias": null + }, + { + "module": "functools", + "declaration": "update_wrapper", + "alias": null + }, + { + "module": "sklearn.utils", + "declaration": "_safe_indexing", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "BaseEstimator", + "alias": null + }, + { + "module": "sklearn.base", + "declaration": "_is_pairwise", + "alias": null + } + ], + "classes": [ + "scikit-learn/sklearn.utils.metaestimators/_BaseComposition", + "scikit-learn/sklearn.utils.metaestimators/_IffHasAttrDescriptor" + ], + "functions": [ + "scikit-learn/sklearn.utils.metaestimators/if_delegate_has_method", + "scikit-learn/sklearn.utils.metaestimators/_safe_split" + ] + }, + { + "id": "scikit-learn/sklearn.utils.multiclass", + "name": "sklearn.utils.multiclass", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "collections.abc", + "declaration": "Sequence", + "alias": null + }, + { + "module": "itertools", + "declaration": "chain", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "issparse", + "alias": null + }, + { + "module": "scipy.sparse.base", + "declaration": "spmatrix", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "dok_matrix", + "alias": null + }, + { + "module": "scipy.sparse", + "declaration": "lil_matrix", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "check_array", + "alias": null + }, + { + "module": "sklearn.utils.validation", + "declaration": "_assert_all_finite", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.utils.multiclass/_unique_multiclass", + "scikit-learn/sklearn.utils.multiclass/_unique_indicator", + "scikit-learn/sklearn.utils.multiclass/unique_labels", + "scikit-learn/sklearn.utils.multiclass/_is_integral_float", + "scikit-learn/sklearn.utils.multiclass/is_multilabel", + "scikit-learn/sklearn.utils.multiclass/check_classification_targets", + "scikit-learn/sklearn.utils.multiclass/type_of_target", + "scikit-learn/sklearn.utils.multiclass/_check_partial_fit_first_call", + "scikit-learn/sklearn.utils.multiclass/class_distribution", + "scikit-learn/sklearn.utils.multiclass/_ovr_decision_function" + ] + }, + { + "id": "scikit-learn/sklearn.utils.optimize", + "name": "sklearn.utils.optimize", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "warnings", + "alias": null + } + ], + "from_imports": [ + { + "module": "scipy.optimize.linesearch", + "declaration": "line_search_wolfe2", + "alias": null + }, + { + "module": "scipy.optimize.linesearch", + "declaration": "line_search_wolfe1", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "ConvergenceWarning", + "alias": null + } + ], + "classes": ["scikit-learn/sklearn.utils.optimize/_LineSearchError"], + "functions": [ + "scikit-learn/sklearn.utils.optimize/_line_search_wolfe12", + "scikit-learn/sklearn.utils.optimize/_cg", + "scikit-learn/sklearn.utils.optimize/_newton_cg", + "scikit-learn/sklearn.utils.optimize/_check_optimize_result" + ] + }, + { + "id": "scikit-learn/sklearn.utils.random", + "name": "sklearn.utils.random", + "imports": [ + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": "sp" + }, + { + "module": "array", + "alias": null + } + ], + "from_imports": [ + { + "module": "sklearn.utils", + "declaration": "check_random_state", + "alias": null + }, + { + "module": "sklearn.utils._random", + "declaration": "sample_without_replacement", + "alias": null + } + ], + "classes": [], + "functions": ["scikit-learn/sklearn.utils.random/_random_choice_csc"] + }, + { + "id": "scikit-learn/sklearn.utils.setup", + "name": "sklearn.utils.setup", + "imports": [ + { + "module": "os", + "alias": null + } + ], + "from_imports": [ + { + "module": "os.path", + "declaration": "join", + "alias": null + }, + { + "module": "sklearn._build_utils", + "declaration": "gen_from_templates", + "alias": null + }, + { + "module": "numpy.distutils.core", + "declaration": "setup", + "alias": null + } + ], + "classes": [], + "functions": ["scikit-learn/sklearn.utils.setup/configuration"] + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs", + "name": "sklearn.utils.sparsefuncs", + "imports": [ + { + "module": "scipy.sparse", + "alias": "sp" + }, + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.utils.validation", + "declaration": "_deprecate_positional_args", + "alias": null + }, + { + "module": "sklearn.utils.sparsefuncs_fast", + "declaration": "csr_mean_variance_axis0", + "alias": "_csr_mean_var_axis0" + }, + { + "module": "sklearn.utils.sparsefuncs_fast", + "declaration": "csc_mean_variance_axis0", + "alias": "_csc_mean_var_axis0" + }, + { + "module": "sklearn.utils.sparsefuncs_fast", + "declaration": "incr_mean_variance_axis0", + "alias": "_incr_mean_var_axis0" + }, + { + "module": "sklearn.utils.validation", + "declaration": "_check_sample_weight", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.utils.sparsefuncs/_raise_typeerror", + "scikit-learn/sklearn.utils.sparsefuncs/_raise_error_wrong_axis", + "scikit-learn/sklearn.utils.sparsefuncs/inplace_csr_column_scale", + "scikit-learn/sklearn.utils.sparsefuncs/inplace_csr_row_scale", + "scikit-learn/sklearn.utils.sparsefuncs/mean_variance_axis", + "scikit-learn/sklearn.utils.sparsefuncs/incr_mean_variance_axis", + "scikit-learn/sklearn.utils.sparsefuncs/inplace_column_scale", + "scikit-learn/sklearn.utils.sparsefuncs/inplace_row_scale", + "scikit-learn/sklearn.utils.sparsefuncs/inplace_swap_row_csc", + "scikit-learn/sklearn.utils.sparsefuncs/inplace_swap_row_csr", + "scikit-learn/sklearn.utils.sparsefuncs/inplace_swap_row", + "scikit-learn/sklearn.utils.sparsefuncs/inplace_swap_column", + "scikit-learn/sklearn.utils.sparsefuncs/_minor_reduce", + "scikit-learn/sklearn.utils.sparsefuncs/_min_or_max_axis", + "scikit-learn/sklearn.utils.sparsefuncs/_sparse_min_or_max", + "scikit-learn/sklearn.utils.sparsefuncs/_sparse_min_max", + "scikit-learn/sklearn.utils.sparsefuncs/_sparse_nan_min_max", + "scikit-learn/sklearn.utils.sparsefuncs/min_max_axis", + "scikit-learn/sklearn.utils.sparsefuncs/count_nonzero", + "scikit-learn/sklearn.utils.sparsefuncs/_get_median", + "scikit-learn/sklearn.utils.sparsefuncs/_get_elem_at_rank", + "scikit-learn/sklearn.utils.sparsefuncs/csc_median_axis_0" + ] + }, + { + "id": "scikit-learn/sklearn.utils.stats", + "name": "sklearn.utils.stats", + "imports": [ + { + "module": "numpy", + "alias": "np" + } + ], + "from_imports": [ + { + "module": "sklearn.utils.extmath", + "declaration": "stable_cumsum", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "_take_along_axis", + "alias": null + } + ], + "classes": [], + "functions": ["scikit-learn/sklearn.utils.stats/_weighted_percentile"] + }, + { + "id": "scikit-learn/sklearn.utils.validation", + "name": "sklearn.utils.validation", + "imports": [ + { + "module": "warnings", + "alias": null + }, + { + "module": "numbers", + "alias": null + }, + { + "module": "numpy", + "alias": "np" + }, + { + "module": "scipy.sparse", + "alias": "sp" + }, + { + "module": "joblib", + "alias": null + } + ], + "from_imports": [ + { + "module": "functools", + "declaration": "wraps", + "alias": null + }, + { + "module": "inspect", + "declaration": "signature", + "alias": null + }, + { + "module": "inspect", + "declaration": "isclass", + "alias": null + }, + { + "module": "inspect", + "declaration": "Parameter", + "alias": null + }, + { + "module": "numpy.core.numeric", + "declaration": "ComplexWarning", + "alias": null + }, + { + "module": "contextlib", + "declaration": "suppress", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "_object_dtype_isnan", + "alias": null + }, + { + "module": "sklearn.utils.fixes", + "declaration": "parse_version", + "alias": null + }, + { + "module": "sklearn", + "declaration": "get_config", + "alias": "_get_config" + }, + { + "module": "sklearn.exceptions", + "declaration": "PositiveSpectrumWarning", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "NotFittedError", + "alias": null + }, + { + "module": "sklearn.exceptions", + "declaration": "DataConversionWarning", + "alias": null + } + ], + "classes": [], + "functions": [ + "scikit-learn/sklearn.utils.validation/_deprecate_positional_args", + "scikit-learn/sklearn.utils.validation/_assert_all_finite", + "scikit-learn/sklearn.utils.validation/assert_all_finite", + "scikit-learn/sklearn.utils.validation/as_float_array", + "scikit-learn/sklearn.utils.validation/_is_arraylike", + "scikit-learn/sklearn.utils.validation/_num_features", + "scikit-learn/sklearn.utils.validation/_num_samples", + "scikit-learn/sklearn.utils.validation/check_memory", + "scikit-learn/sklearn.utils.validation/check_consistent_length", + "scikit-learn/sklearn.utils.validation/_make_indexable", + "scikit-learn/sklearn.utils.validation/indexable", + "scikit-learn/sklearn.utils.validation/_ensure_sparse_format", + "scikit-learn/sklearn.utils.validation/_ensure_no_complex_data", + "scikit-learn/sklearn.utils.validation/check_array", + "scikit-learn/sklearn.utils.validation/_check_large_sparse", + "scikit-learn/sklearn.utils.validation/check_X_y", + "scikit-learn/sklearn.utils.validation/column_or_1d", + "scikit-learn/sklearn.utils.validation/check_random_state", + "scikit-learn/sklearn.utils.validation/has_fit_parameter", + "scikit-learn/sklearn.utils.validation/check_symmetric", + "scikit-learn/sklearn.utils.validation/check_is_fitted", + "scikit-learn/sklearn.utils.validation/check_non_negative", + "scikit-learn/sklearn.utils.validation/check_scalar", + "scikit-learn/sklearn.utils.validation/_check_psd_eigenvalues", + "scikit-learn/sklearn.utils.validation/_check_sample_weight", + "scikit-learn/sklearn.utils.validation/_allclose_dense_sparse", + "scikit-learn/sklearn.utils.validation/_check_fit_params" + ] + } + ], + "classes": [ + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel", + "name": "ExponentialDispersionModel", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/in_y_range", + "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/unit_variance", + "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/unit_deviance", + "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/unit_deviance_derivative", + "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/deviance", + "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/deviance_derivative" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for reproductive Exponential Dispersion Models (EDM).\n\nThe pdf of :math:`Y\\sim \\mathrm{EDM}(y_\\textrm{pred}, \\phi)` is given by\n\n.. math:: p(y| \\theta, \\phi) = c(y, \\phi)\n \\exp\\left(\\frac{\\theta y-A(\\theta)}{\\phi}\\right)\n = \\tilde{c}(y, \\phi)\n \\exp\\left(-\\frac{d(y, y_\\textrm{pred})}{2\\phi}\\right)\n\nwith mean :math:`\\mathrm{E}[Y] = A'(\\theta) = y_\\textrm{pred}`,\nvariance :math:`\\mathrm{Var}[Y] = \\phi \\cdot v(y_\\textrm{pred})`,\nunit variance :math:`v(y_\\textrm{pred})` and\nunit deviance :math:`d(y,y_\\textrm{pred})`.", + "docstring": "Base class for reproductive Exponential Dispersion Models (EDM).\n\nThe pdf of :math:`Y\\sim \\mathrm{EDM}(y_\\textrm{pred}, \\phi)` is given by\n\n.. math:: p(y| \\theta, \\phi) = c(y, \\phi)\n \\exp\\left(\\frac{\\theta y-A(\\theta)}{\\phi}\\right)\n = \\tilde{c}(y, \\phi)\n \\exp\\left(-\\frac{d(y, y_\\textrm{pred})}{2\\phi}\\right)\n\nwith mean :math:`\\mathrm{E}[Y] = A'(\\theta) = y_\\textrm{pred}`,\nvariance :math:`\\mathrm{Var}[Y] = \\phi \\cdot v(y_\\textrm{pred})`,\nunit variance :math:`v(y_\\textrm{pred})` and\nunit deviance :math:`d(y,y_\\textrm{pred})`.\n\nMethods\n-------\ndeviance\ndeviance_derivative\nin_y_range\nunit_deviance\nunit_deviance_derivative\nunit_variance\n\nReferences\n----------\nhttps://en.wikipedia.org/wiki/Exponential_dispersion_model.", + "code": "class ExponentialDispersionModel(metaclass=ABCMeta):\n r\"\"\"Base class for reproductive Exponential Dispersion Models (EDM).\n\n The pdf of :math:`Y\\sim \\mathrm{EDM}(y_\\textrm{pred}, \\phi)` is given by\n\n .. math:: p(y| \\theta, \\phi) = c(y, \\phi)\n \\exp\\left(\\frac{\\theta y-A(\\theta)}{\\phi}\\right)\n = \\tilde{c}(y, \\phi)\n \\exp\\left(-\\frac{d(y, y_\\textrm{pred})}{2\\phi}\\right)\n\n with mean :math:`\\mathrm{E}[Y] = A'(\\theta) = y_\\textrm{pred}`,\n variance :math:`\\mathrm{Var}[Y] = \\phi \\cdot v(y_\\textrm{pred})`,\n unit variance :math:`v(y_\\textrm{pred})` and\n unit deviance :math:`d(y,y_\\textrm{pred})`.\n\n Methods\n -------\n deviance\n deviance_derivative\n in_y_range\n unit_deviance\n unit_deviance_derivative\n unit_variance\n\n References\n ----------\n https://en.wikipedia.org/wiki/Exponential_dispersion_model.\n \"\"\"\n\n def in_y_range(self, y):\n \"\"\"Returns ``True`` if y is in the valid range of Y~EDM.\n\n Parameters\n ----------\n y : array of shape (n_samples,)\n Target values.\n \"\"\"\n # Note that currently supported distributions have +inf upper bound\n\n if not isinstance(self._lower_bound, DistributionBoundary):\n raise TypeError('_lower_bound attribute must be of type '\n 'DistributionBoundary')\n\n if self._lower_bound.inclusive:\n return np.greater_equal(y, self._lower_bound.value)\n else:\n return np.greater(y, self._lower_bound.value)\n\n @abstractmethod\n def unit_variance(self, y_pred):\n r\"\"\"Compute the unit variance function.\n\n The unit variance :math:`v(y_\\textrm{pred})` determines the variance as\n a function of the mean :math:`y_\\textrm{pred}` by\n :math:`\\mathrm{Var}[Y_i] = \\phi/s_i*v(y_\\textrm{pred}_i)`.\n It can also be derived from the unit deviance\n :math:`d(y,y_\\textrm{pred})` as\n\n .. math:: v(y_\\textrm{pred}) = \\frac{2}{\n \\frac{\\partial^2 d(y,y_\\textrm{pred})}{\n \\partialy_\\textrm{pred}^2}}\\big|_{y=y_\\textrm{pred}}\n\n See also :func:`variance`.\n\n Parameters\n ----------\n y_pred : array of shape (n_samples,)\n Predicted mean.\n \"\"\"\n\n @abstractmethod\n def unit_deviance(self, y, y_pred, check_input=False):\n r\"\"\"Compute the unit deviance.\n\n The unit_deviance :math:`d(y,y_\\textrm{pred})` can be defined by the\n log-likelihood as\n :math:`d(y,y_\\textrm{pred}) = -2\\phi\\cdot\n \\left(loglike(y,y_\\textrm{pred},\\phi) - loglike(y,y,\\phi)\\right).`\n\n Parameters\n ----------\n y : array of shape (n_samples,)\n Target values.\n\n y_pred : array of shape (n_samples,)\n Predicted mean.\n\n check_input : bool, default=False\n If True raise an exception on invalid y or y_pred values, otherwise\n they will be propagated as NaN.\n Returns\n -------\n deviance: array of shape (n_samples,)\n Computed deviance\n \"\"\"\n\n def unit_deviance_derivative(self, y, y_pred):\n r\"\"\"Compute the derivative of the unit deviance w.r.t. y_pred.\n\n The derivative of the unit deviance is given by\n :math:`\\frac{\\partial}{\\partialy_\\textrm{pred}}d(y,y_\\textrm{pred})\n = -2\\frac{y-y_\\textrm{pred}}{v(y_\\textrm{pred})}`\n with unit variance :math:`v(y_\\textrm{pred})`.\n\n Parameters\n ----------\n y : array of shape (n_samples,)\n Target values.\n\n y_pred : array of shape (n_samples,)\n Predicted mean.\n \"\"\"\n return -2 * (y - y_pred) / self.unit_variance(y_pred)\n\n def deviance(self, y, y_pred, weights=1):\n r\"\"\"Compute the deviance.\n\n The deviance is a weighted sum of the per sample unit deviances,\n :math:`D = \\sum_i s_i \\cdot d(y_i, y_\\textrm{pred}_i)`\n with weights :math:`s_i` and unit deviance\n :math:`d(y,y_\\textrm{pred})`.\n In terms of the log-likelihood it is :math:`D = -2\\phi\\cdot\n \\left(loglike(y,y_\\textrm{pred},\\frac{phi}{s})\n - loglike(y,y,\\frac{phi}{s})\\right)`.\n\n Parameters\n ----------\n y : array of shape (n_samples,)\n Target values.\n\n y_pred : array of shape (n_samples,)\n Predicted mean.\n\n weights : {int, array of shape (n_samples,)}, default=1\n Weights or exposure to which variance is inverse proportional.\n \"\"\"\n return np.sum(weights * self.unit_deviance(y, y_pred))\n\n def deviance_derivative(self, y, y_pred, weights=1):\n r\"\"\"Compute the derivative of the deviance w.r.t. y_pred.\n\n It gives :math:`\\frac{\\partial}{\\partial y_\\textrm{pred}}\n D(y, \\y_\\textrm{pred}; weights)`.\n\n Parameters\n ----------\n y : array, shape (n_samples,)\n Target values.\n\n y_pred : array, shape (n_samples,)\n Predicted mean.\n\n weights : {int, array of shape (n_samples,)}, default=1\n Weights or exposure to which variance is inverse proportional.\n \"\"\"\n return weights * self.unit_deviance_derivative(y, y_pred)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/GammaDistribution", + "name": "GammaDistribution", + "qname": "sklearn._loss.glm_distribution.GammaDistribution", + "decorators": [], + "superclasses": ["TweedieDistribution"], + "methods": ["scikit-learn/sklearn._loss.glm_distribution/GammaDistribution/__init__"], + "is_public": false, + "reexported_by": [], + "description": "Class for the Gamma distribution.", + "docstring": "Class for the Gamma distribution.", + "code": "class GammaDistribution(TweedieDistribution):\n \"\"\"Class for the Gamma distribution.\"\"\"\n def __init__(self):\n super().__init__(power=2)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/InverseGaussianDistribution", + "name": "InverseGaussianDistribution", + "qname": "sklearn._loss.glm_distribution.InverseGaussianDistribution", + "decorators": [], + "superclasses": ["TweedieDistribution"], + "methods": ["scikit-learn/sklearn._loss.glm_distribution/InverseGaussianDistribution/__init__"], + "is_public": false, + "reexported_by": [], + "description": "Class for the scaled InverseGaussianDistribution distribution.", + "docstring": "Class for the scaled InverseGaussianDistribution distribution.", + "code": "class InverseGaussianDistribution(TweedieDistribution):\n \"\"\"Class for the scaled InverseGaussianDistribution distribution.\"\"\"\n def __init__(self):\n super().__init__(power=3)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/NormalDistribution", + "name": "NormalDistribution", + "qname": "sklearn._loss.glm_distribution.NormalDistribution", + "decorators": [], + "superclasses": ["TweedieDistribution"], + "methods": ["scikit-learn/sklearn._loss.glm_distribution/NormalDistribution/__init__"], + "is_public": false, + "reexported_by": [], + "description": "Class for the Normal (aka Gaussian) distribution.", + "docstring": "Class for the Normal (aka Gaussian) distribution.", + "code": "class NormalDistribution(TweedieDistribution):\n \"\"\"Class for the Normal (aka Gaussian) distribution.\"\"\"\n def __init__(self):\n super().__init__(power=0)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/PoissonDistribution", + "name": "PoissonDistribution", + "qname": "sklearn._loss.glm_distribution.PoissonDistribution", + "decorators": [], + "superclasses": ["TweedieDistribution"], + "methods": ["scikit-learn/sklearn._loss.glm_distribution/PoissonDistribution/__init__"], + "is_public": false, + "reexported_by": [], + "description": "Class for the scaled Poisson distribution.", + "docstring": "Class for the scaled Poisson distribution.", + "code": "class PoissonDistribution(TweedieDistribution):\n \"\"\"Class for the scaled Poisson distribution.\"\"\"\n def __init__(self):\n super().__init__(power=1)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/TweedieDistribution", + "name": "TweedieDistribution", + "qname": "sklearn._loss.glm_distribution.TweedieDistribution", + "decorators": [], + "superclasses": ["ExponentialDispersionModel"], + "methods": [ + "scikit-learn/sklearn._loss.glm_distribution/TweedieDistribution/__init__", + "scikit-learn/sklearn._loss.glm_distribution/TweedieDistribution/power@getter", + "scikit-learn/sklearn._loss.glm_distribution/TweedieDistribution/power@setter", + "scikit-learn/sklearn._loss.glm_distribution/TweedieDistribution/unit_variance", + "scikit-learn/sklearn._loss.glm_distribution/TweedieDistribution/unit_deviance" + ], + "is_public": false, + "reexported_by": [], + "description": "A class for the Tweedie distribution.\n\nA Tweedie distribution with mean :math:`y_\\textrm{pred}=\\mathrm{E}[Y]`\nis uniquely defined by it's mean-variance relationship\n:math:`\\mathrm{Var}[Y] \\propto y_\\textrm{pred}^power`.\n\nSpecial cases are:\n\n===== ================\nPower Distribution\n===== ================\n0 Normal\n1 Poisson\n(1,2) Compound Poisson\n2 Gamma\n3 Inverse Gaussian", + "docstring": "A class for the Tweedie distribution.\n\nA Tweedie distribution with mean :math:`y_\\textrm{pred}=\\mathrm{E}[Y]`\nis uniquely defined by it's mean-variance relationship\n:math:`\\mathrm{Var}[Y] \\propto y_\\textrm{pred}^power`.\n\nSpecial cases are:\n\n===== ================\nPower Distribution\n===== ================\n0 Normal\n1 Poisson\n(1,2) Compound Poisson\n2 Gamma\n3 Inverse Gaussian\n\nParameters\n----------\npower : float, default=0\n The variance power of the `unit_variance`\n :math:`v(y_\\textrm{pred}) = y_\\textrm{pred}^{power}`.\n For ``0=1.')\n elif 1 <= power < 2:\n # Poisson or Compound Poisson distribution\n self._lower_bound = DistributionBoundary(0, inclusive=True)\n elif power >= 2:\n # Gamma, Positive Stable, Inverse Gaussian distributions\n self._lower_bound = DistributionBoundary(0, inclusive=False)\n else: # pragma: no cover\n # this branch should be unreachable.\n raise ValueError\n\n self._power = power\n\n def unit_variance(self, y_pred):\n \"\"\"Compute the unit variance of a Tweedie distribution\n v(y_\\textrm{pred})=y_\\textrm{pred}**power.\n\n Parameters\n ----------\n y_pred : array of shape (n_samples,)\n Predicted mean.\n \"\"\"\n return np.power(y_pred, self.power)\n\n def unit_deviance(self, y, y_pred, check_input=False):\n r\"\"\"Compute the unit deviance.\n\n The unit_deviance :math:`d(y,y_\\textrm{pred})` can be defined by the\n log-likelihood as\n :math:`d(y,y_\\textrm{pred}) = -2\\phi\\cdot\n \\left(loglike(y,y_\\textrm{pred},\\phi) - loglike(y,y,\\phi)\\right).`\n\n Parameters\n ----------\n y : array of shape (n_samples,)\n Target values.\n\n y_pred : array of shape (n_samples,)\n Predicted mean.\n\n check_input : bool, default=False\n If True raise an exception on invalid y or y_pred values, otherwise\n they will be propagated as NaN.\n Returns\n -------\n deviance: array of shape (n_samples,)\n Computed deviance\n \"\"\"\n p = self.power\n\n if check_input:\n message = (\"Mean Tweedie deviance error with power={} can only be \"\n \"used on \".format(p))\n if p < 0:\n # 'Extreme stable', y any realy number, y_pred > 0\n if (y_pred <= 0).any():\n raise ValueError(message + \"strictly positive y_pred.\")\n elif p == 0:\n # Normal, y and y_pred can be any real number\n pass\n elif 0 < p < 1:\n raise ValueError(\"Tweedie deviance is only defined for \"\n \"power<=0 and power>=1.\")\n elif 1 <= p < 2:\n # Poisson and Compount poisson distribution, y >= 0, y_pred > 0\n if (y < 0).any() or (y_pred <= 0).any():\n raise ValueError(message + \"non-negative y and strictly \"\n \"positive y_pred.\")\n elif p >= 2:\n # Gamma and Extreme stable distribution, y and y_pred > 0\n if (y <= 0).any() or (y_pred <= 0).any():\n raise ValueError(message\n + \"strictly positive y and y_pred.\")\n else: # pragma: nocover\n # Unreachable statement\n raise ValueError\n\n if p < 0:\n # 'Extreme stable', y any realy number, y_pred > 0\n dev = 2 * (np.power(np.maximum(y, 0), 2-p) / ((1-p) * (2-p))\n - y * np.power(y_pred, 1-p) / (1-p)\n + np.power(y_pred, 2-p) / (2-p))\n\n elif p == 0:\n # Normal distribution, y and y_pred any real number\n dev = (y - y_pred)**2\n elif p < 1:\n raise ValueError(\"Tweedie deviance is only defined for power<=0 \"\n \"and power>=1.\")\n elif p == 1:\n # Poisson distribution\n dev = 2 * (xlogy(y, y/y_pred) - y + y_pred)\n elif p == 2:\n # Gamma distribution\n dev = 2 * (np.log(y_pred/y) + y/y_pred - 1)\n else:\n dev = 2 * (np.power(y, 2-p) / ((1-p) * (2-p))\n - y * np.power(y_pred, 1-p) / (1-p)\n + np.power(y_pred, 2-p) / (2-p))\n return dev", + "instance_attributes": [ + { + "name": "power", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "_lower_bound", + "types": { + "kind": "NamedType", + "name": "DistributionBoundary" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator", + "name": "BaseEstimator", + "qname": "sklearn.base.BaseEstimator", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.base/BaseEstimator/_get_param_names", + "scikit-learn/sklearn.base/BaseEstimator/get_params", + "scikit-learn/sklearn.base/BaseEstimator/set_params", + "scikit-learn/sklearn.base/BaseEstimator/__repr__", + "scikit-learn/sklearn.base/BaseEstimator/__getstate__", + "scikit-learn/sklearn.base/BaseEstimator/__setstate__", + "scikit-learn/sklearn.base/BaseEstimator/_more_tags", + "scikit-learn/sklearn.base/BaseEstimator/_get_tags", + "scikit-learn/sklearn.base/BaseEstimator/_check_n_features", + "scikit-learn/sklearn.base/BaseEstimator/_validate_data", + "scikit-learn/sklearn.base/BaseEstimator/_repr_html_@getter", + "scikit-learn/sklearn.base/BaseEstimator/_repr_html_inner", + "scikit-learn/sklearn.base/BaseEstimator/_repr_mimebundle_" + ], + "is_public": true, + "reexported_by": [], + "description": "Base class for all estimators in scikit-learn.", + "docstring": "Base class for all estimators in scikit-learn.\n\nNotes\n-----\nAll estimators should specify all the parameters that can be set\nat the class level in their ``__init__`` as explicit keyword\narguments (no ``*args`` or ``**kwargs``).", + "code": "class BaseEstimator:\n \"\"\"Base class for all estimators in scikit-learn.\n\n Notes\n -----\n All estimators should specify all the parameters that can be set\n at the class level in their ``__init__`` as explicit keyword\n arguments (no ``*args`` or ``**kwargs``).\n \"\"\"\n\n @classmethod\n def _get_param_names(cls):\n \"\"\"Get parameter names for the estimator\"\"\"\n # fetch the constructor or the original constructor before\n # deprecation wrapping if any\n init = getattr(cls.__init__, 'deprecated_original', cls.__init__)\n if init is object.__init__:\n # No explicit constructor to introspect\n return []\n\n # introspect the constructor arguments to find the model parameters\n # to represent\n init_signature = inspect.signature(init)\n # Consider the constructor parameters excluding 'self'\n parameters = [p for p in init_signature.parameters.values()\n if p.name != 'self' and p.kind != p.VAR_KEYWORD]\n for p in parameters:\n if p.kind == p.VAR_POSITIONAL:\n raise RuntimeError(\"scikit-learn estimators should always \"\n \"specify their parameters in the signature\"\n \" of their __init__ (no varargs).\"\n \" %s with constructor %s doesn't \"\n \" follow this convention.\"\n % (cls, init_signature))\n # Extract and sort argument names excluding 'self'\n return sorted([p.name for p in parameters])\n\n def get_params(self, deep=True):\n \"\"\"\n Get parameters for this estimator.\n\n Parameters\n ----------\n deep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\n Returns\n -------\n params : dict\n Parameter names mapped to their values.\n \"\"\"\n out = dict()\n for key in self._get_param_names():\n value = getattr(self, key)\n if deep and hasattr(value, 'get_params'):\n deep_items = value.get_params().items()\n out.update((key + '__' + k, val) for k, val in deep_items)\n out[key] = value\n return out\n\n def set_params(self, **params):\n \"\"\"\n Set the parameters of this estimator.\n\n The method works on simple estimators as well as on nested objects\n (such as :class:`~sklearn.pipeline.Pipeline`). The latter have\n parameters of the form ``__`` so that it's\n possible to update each component of a nested object.\n\n Parameters\n ----------\n **params : dict\n Estimator parameters.\n\n Returns\n -------\n self : estimator instance\n Estimator instance.\n \"\"\"\n if not params:\n # Simple optimization to gain speed (inspect is slow)\n return self\n valid_params = self.get_params(deep=True)\n\n nested_params = defaultdict(dict) # grouped by prefix\n for key, value in params.items():\n key, delim, sub_key = key.partition('__')\n if key not in valid_params:\n raise ValueError('Invalid parameter %s for estimator %s. '\n 'Check the list of available parameters '\n 'with `estimator.get_params().keys()`.' %\n (key, self))\n\n if delim:\n nested_params[key][sub_key] = value\n else:\n setattr(self, key, value)\n valid_params[key] = value\n\n for key, sub_params in nested_params.items():\n valid_params[key].set_params(**sub_params)\n\n return self\n\n def __repr__(self, N_CHAR_MAX=700):\n # N_CHAR_MAX is the (approximate) maximum number of non-blank\n # characters to render. We pass it as an optional parameter to ease\n # the tests.\n\n from .utils._pprint import _EstimatorPrettyPrinter\n\n N_MAX_ELEMENTS_TO_SHOW = 30 # number of elements to show in sequences\n\n # use ellipsis for sequences with a lot of elements\n pp = _EstimatorPrettyPrinter(\n compact=True, indent=1, indent_at_name=True,\n n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW)\n\n repr_ = pp.pformat(self)\n\n # Use bruteforce ellipsis when there are a lot of non-blank characters\n n_nonblank = len(''.join(repr_.split()))\n if n_nonblank > N_CHAR_MAX:\n lim = N_CHAR_MAX // 2 # apprx number of chars to keep on both ends\n regex = r'^(\\s*\\S){%d}' % lim\n # The regex '^(\\s*\\S){%d}' % n\n # matches from the start of the string until the nth non-blank\n # character:\n # - ^ matches the start of string\n # - (pattern){n} matches n repetitions of pattern\n # - \\s*\\S matches a non-blank char following zero or more blanks\n left_lim = re.match(regex, repr_).end()\n right_lim = re.match(regex, repr_[::-1]).end()\n\n if '\\n' in repr_[left_lim:-right_lim]:\n # The left side and right side aren't on the same line.\n # To avoid weird cuts, e.g.:\n # categoric...ore',\n # we need to start the right side with an appropriate newline\n # character so that it renders properly as:\n # categoric...\n # handle_unknown='ignore',\n # so we add [^\\n]*\\n which matches until the next \\n\n regex += r'[^\\n]*\\n'\n right_lim = re.match(regex, repr_[::-1]).end()\n\n ellipsis = '...'\n if left_lim + len(ellipsis) < len(repr_) - right_lim:\n # Only add ellipsis if it results in a shorter repr\n repr_ = repr_[:left_lim] + '...' + repr_[-right_lim:]\n\n return repr_\n\n def __getstate__(self):\n try:\n state = super().__getstate__()\n except AttributeError:\n state = self.__dict__.copy()\n\n if type(self).__module__.startswith('sklearn.'):\n return dict(state.items(), _sklearn_version=__version__)\n else:\n return state\n\n def __setstate__(self, state):\n if type(self).__module__.startswith('sklearn.'):\n pickle_version = state.pop(\"_sklearn_version\", \"pre-0.18\")\n if pickle_version != __version__:\n warnings.warn(\n \"Trying to unpickle estimator {0} from version {1} when \"\n \"using version {2}. This might lead to breaking code or \"\n \"invalid results. Use at your own risk.\".format(\n self.__class__.__name__, pickle_version, __version__),\n UserWarning)\n try:\n super().__setstate__(state)\n except AttributeError:\n self.__dict__.update(state)\n\n def _more_tags(self):\n return _DEFAULT_TAGS\n\n def _get_tags(self):\n collected_tags = {}\n for base_class in reversed(inspect.getmro(self.__class__)):\n if hasattr(base_class, '_more_tags'):\n # need the if because mixins might not have _more_tags\n # but might do redundant work in estimators\n # (i.e. calling more tags on BaseEstimator multiple times)\n more_tags = base_class._more_tags(self)\n collected_tags.update(more_tags)\n return collected_tags\n\n def _check_n_features(self, X, reset):\n \"\"\"Set the `n_features_in_` attribute, or check against it.\n\n Parameters\n ----------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n reset : bool\n If True, the `n_features_in_` attribute is set to `X.shape[1]`.\n If False and the attribute exists, then check that it is equal to\n `X.shape[1]`. If False and the attribute does *not* exist, then\n the check is skipped.\n .. note::\n It is recommended to call reset=True in `fit` and in the first\n call to `partial_fit`. All other methods that validate `X`\n should set `reset=False`.\n \"\"\"\n n_features = X.shape[1]\n\n if reset:\n self.n_features_in_ = n_features\n return\n\n if not hasattr(self, \"n_features_in_\"):\n # Skip this check if the expected number of expected input features\n # was not recorded by calling fit first. This is typically the case\n # for stateless transformers.\n return\n\n if n_features != self.n_features_in_:\n raise ValueError(\n f\"X has {n_features} features, but {self.__class__.__name__} \"\n f\"is expecting {self.n_features_in_} features as input.\")\n\n def _validate_data(self, X, y='no_validation', reset=True,\n validate_separately=False, **check_params):\n \"\"\"Validate input data and set or check the `n_features_in_` attribute.\n\n Parameters\n ----------\n X : {array-like, sparse matrix, dataframe} of shape \\\n (n_samples, n_features)\n The input samples.\n y : array-like of shape (n_samples,), default='no_validation'\n The targets.\n\n - If `None`, `check_array` is called on `X`. If the estimator's\n requires_y tag is True, then an error will be raised.\n - If `'no_validation'`, `check_array` is called on `X` and the\n estimator's requires_y tag is ignored. This is a default\n placeholder and is never meant to be explicitly set.\n - Otherwise, both `X` and `y` are checked with either `check_array`\n or `check_X_y` depending on `validate_separately`.\n\n reset : bool, default=True\n Whether to reset the `n_features_in_` attribute.\n If False, the input will be checked for consistency with data\n provided when reset was last True.\n .. note::\n It is recommended to call reset=True in `fit` and in the first\n call to `partial_fit`. All other methods that validate `X`\n should set `reset=False`.\n validate_separately : False or tuple of dicts, default=False\n Only used if y is not None.\n If False, call validate_X_y(). Else, it must be a tuple of kwargs\n to be used for calling check_array() on X and y respectively.\n **check_params : kwargs\n Parameters passed to :func:`sklearn.utils.check_array` or\n :func:`sklearn.utils.check_X_y`. Ignored if validate_separately\n is not False.\n\n Returns\n -------\n out : {ndarray, sparse matrix} or tuple of these\n The validated input. A tuple is returned if `y` is not None.\n \"\"\"\n\n if y is None:\n if self._get_tags()['requires_y']:\n raise ValueError(\n f\"This {self.__class__.__name__} estimator \"\n f\"requires y to be passed, but the target y is None.\"\n )\n X = check_array(X, **check_params)\n out = X\n elif isinstance(y, str) and y == 'no_validation':\n X = check_array(X, **check_params)\n out = X\n else:\n if validate_separately:\n # We need this because some estimators validate X and y\n # separately, and in general, separately calling check_array()\n # on X and y isn't equivalent to just calling check_X_y()\n # :(\n check_X_params, check_y_params = validate_separately\n X = check_array(X, **check_X_params)\n y = check_array(y, **check_y_params)\n else:\n X, y = check_X_y(X, y, **check_params)\n out = X, y\n\n if check_params.get('ensure_2d', True):\n self._check_n_features(X, reset=reset)\n\n return out\n\n @property\n def _repr_html_(self):\n \"\"\"HTML representation of estimator.\n\n This is redundant with the logic of `_repr_mimebundle_`. The latter\n should be favorted in the long term, `_repr_html_` is only\n implemented for consumers who do not interpret `_repr_mimbundle_`.\n \"\"\"\n if get_config()[\"display\"] != 'diagram':\n raise AttributeError(\"_repr_html_ is only defined when the \"\n \"'display' configuration option is set to \"\n \"'diagram'\")\n return self._repr_html_inner\n\n def _repr_html_inner(self):\n \"\"\"This function is returned by the @property `_repr_html_` to make\n `hasattr(estimator, \"_repr_html_\") return `True` or `False` depending\n on `get_config()[\"display\"]`.\n \"\"\"\n return estimator_html_repr(self)\n\n def _repr_mimebundle_(self, **kwargs):\n \"\"\"Mime bundle used by jupyter kernels to display estimator\"\"\"\n output = {\"text/plain\": repr(self)}\n if get_config()[\"display\"] == 'diagram':\n output[\"text/html\"] = estimator_html_repr(self)\n return output", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.base/BiclusterMixin", + "name": "BiclusterMixin", + "qname": "sklearn.base.BiclusterMixin", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.base/BiclusterMixin/biclusters_@getter", + "scikit-learn/sklearn.base/BiclusterMixin/get_indices", + "scikit-learn/sklearn.base/BiclusterMixin/get_shape", + "scikit-learn/sklearn.base/BiclusterMixin/get_submatrix" + ], + "is_public": true, + "reexported_by": [], + "description": "Mixin class for all bicluster estimators in scikit-learn.", + "docstring": "Mixin class for all bicluster estimators in scikit-learn.", + "code": "class BiclusterMixin:\n \"\"\"Mixin class for all bicluster estimators in scikit-learn.\"\"\"\n\n @property\n def biclusters_(self):\n \"\"\"Convenient way to get row and column indicators together.\n\n Returns the ``rows_`` and ``columns_`` members.\n \"\"\"\n return self.rows_, self.columns_\n\n def get_indices(self, i):\n \"\"\"Row and column indices of the `i`'th bicluster.\n\n Only works if ``rows_`` and ``columns_`` attributes exist.\n\n Parameters\n ----------\n i : int\n The index of the cluster.\n\n Returns\n -------\n row_ind : ndarray, dtype=np.intp\n Indices of rows in the dataset that belong to the bicluster.\n col_ind : ndarray, dtype=np.intp\n Indices of columns in the dataset that belong to the bicluster.\n\n \"\"\"\n rows = self.rows_[i]\n columns = self.columns_[i]\n return np.nonzero(rows)[0], np.nonzero(columns)[0]\n\n def get_shape(self, i):\n \"\"\"Shape of the `i`'th bicluster.\n\n Parameters\n ----------\n i : int\n The index of the cluster.\n\n Returns\n -------\n n_rows : int\n Number of rows in the bicluster.\n\n n_cols : int\n Number of columns in the bicluster.\n \"\"\"\n indices = self.get_indices(i)\n return tuple(len(i) for i in indices)\n\n def get_submatrix(self, i, data):\n \"\"\"Return the submatrix corresponding to bicluster `i`.\n\n Parameters\n ----------\n i : int\n The index of the cluster.\n data : array-like of shape (n_samples, n_features)\n The data.\n\n Returns\n -------\n submatrix : ndarray of shape (n_rows, n_cols)\n The submatrix corresponding to bicluster `i`.\n\n Notes\n -----\n Works with sparse matrices. Only works if ``rows_`` and\n ``columns_`` attributes exist.\n \"\"\"\n from .utils.validation import check_array\n data = check_array(data, accept_sparse='csr')\n row_ind, col_ind = self.get_indices(i)\n return data[row_ind[:, np.newaxis], col_ind]", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.base/ClassifierMixin", + "name": "ClassifierMixin", + "qname": "sklearn.base.ClassifierMixin", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.base/ClassifierMixin/score", + "scikit-learn/sklearn.base/ClassifierMixin/_more_tags" + ], + "is_public": true, + "reexported_by": [], + "description": "Mixin class for all classifiers in scikit-learn.", + "docstring": "Mixin class for all classifiers in scikit-learn.", + "code": "class ClassifierMixin:\n \"\"\"Mixin class for all classifiers in scikit-learn.\"\"\"\n\n _estimator_type = \"classifier\"\n\n def score(self, X, y, sample_weight=None):\n \"\"\"\n Return the mean accuracy on the given test data and labels.\n\n In multi-label classification, this is the subset accuracy\n which is a harsh metric since you require for each sample that\n each label set be correctly predicted.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Test samples.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True labels for `X`.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n score : float\n Mean accuracy of ``self.predict(X)`` wrt. `y`.\n \"\"\"\n from .metrics import accuracy_score\n return accuracy_score(y, self.predict(X), sample_weight=sample_weight)\n\n def _more_tags(self):\n return {'requires_y': True}", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.base/ClusterMixin", + "name": "ClusterMixin", + "qname": "sklearn.base.ClusterMixin", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.base/ClusterMixin/fit_predict", + "scikit-learn/sklearn.base/ClusterMixin/_more_tags" + ], + "is_public": true, + "reexported_by": [], + "description": "Mixin class for all cluster estimators in scikit-learn.", + "docstring": "Mixin class for all cluster estimators in scikit-learn.", + "code": "class ClusterMixin:\n \"\"\"Mixin class for all cluster estimators in scikit-learn.\"\"\"\n _estimator_type = \"clusterer\"\n\n def fit_predict(self, X, y=None):\n \"\"\"\n Perform clustering on `X` and returns cluster labels.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Input data.\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n labels : ndarray of shape (n_samples,), dtype=np.int64\n Cluster labels.\n \"\"\"\n # non-optimized default implementation; override when a better\n # method is possible for a given clustering algorithm\n self.fit(X)\n return self.labels_\n\n def _more_tags(self):\n return {\"preserves_dtype\": []}", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.base/DensityMixin", + "name": "DensityMixin", + "qname": "sklearn.base.DensityMixin", + "decorators": [], + "superclasses": [], + "methods": ["scikit-learn/sklearn.base/DensityMixin/score"], + "is_public": true, + "reexported_by": [], + "description": "Mixin class for all density estimators in scikit-learn.", + "docstring": "Mixin class for all density estimators in scikit-learn.", + "code": "class DensityMixin:\n \"\"\"Mixin class for all density estimators in scikit-learn.\"\"\"\n _estimator_type = \"DensityEstimator\"\n\n def score(self, X, y=None):\n \"\"\"Return the score of the model on the data `X`.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Test samples.\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n score : float\n \"\"\"\n pass", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.base/MetaEstimatorMixin", + "name": "MetaEstimatorMixin", + "qname": "sklearn.base.MetaEstimatorMixin", + "decorators": [], + "superclasses": [], + "methods": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class MetaEstimatorMixin:\n _required_parameters = [\"estimator\"]\n \"\"\"Mixin class for all meta estimators in scikit-learn.\"\"\"", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.base/MultiOutputMixin", + "name": "MultiOutputMixin", + "qname": "sklearn.base.MultiOutputMixin", + "decorators": [], + "superclasses": [], + "methods": ["scikit-learn/sklearn.base/MultiOutputMixin/_more_tags"], + "is_public": true, + "reexported_by": [], + "description": "Mixin to mark estimators that support multioutput.", + "docstring": "Mixin to mark estimators that support multioutput.", + "code": "class MultiOutputMixin:\n \"\"\"Mixin to mark estimators that support multioutput.\"\"\"\n def _more_tags(self):\n return {'multioutput': True}", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.base/OutlierMixin", + "name": "OutlierMixin", + "qname": "sklearn.base.OutlierMixin", + "decorators": [], + "superclasses": [], + "methods": ["scikit-learn/sklearn.base/OutlierMixin/fit_predict"], + "is_public": true, + "reexported_by": [], + "description": "Mixin class for all outlier detection estimators in scikit-learn.", + "docstring": "Mixin class for all outlier detection estimators in scikit-learn.", + "code": "class OutlierMixin:\n \"\"\"Mixin class for all outlier detection estimators in scikit-learn.\"\"\"\n _estimator_type = \"outlier_detector\"\n\n def fit_predict(self, X, y=None):\n \"\"\"Perform fit on X and returns labels for X.\n\n Returns -1 for outliers and 1 for inliers.\n\n Parameters\n ----------\n X : {array-like, sparse matrix, dataframe} of shape \\\n (n_samples, n_features)\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n y : ndarray of shape (n_samples,)\n 1 for inliers, -1 for outliers.\n \"\"\"\n # override for transductive outlier detectors like LocalOulierFactor\n return self.fit(X).predict(X)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.base/RegressorMixin", + "name": "RegressorMixin", + "qname": "sklearn.base.RegressorMixin", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.base/RegressorMixin/score", + "scikit-learn/sklearn.base/RegressorMixin/_more_tags" + ], + "is_public": true, + "reexported_by": [], + "description": "Mixin class for all regression estimators in scikit-learn.", + "docstring": "Mixin class for all regression estimators in scikit-learn.", + "code": "class RegressorMixin:\n \"\"\"Mixin class for all regression estimators in scikit-learn.\"\"\"\n _estimator_type = \"regressor\"\n\n def score(self, X, y, sample_weight=None):\n \"\"\"Return the coefficient of determination :math:`R^2` of the\n prediction.\n\n The coefficient :math:`R^2` is defined as :math:`(1 - \\\\frac{u}{v})`,\n where :math:`u` is the residual sum of squares ``((y_true - y_pred)\n ** 2).sum()`` and :math:`v` is the total sum of squares ``((y_true -\n y_true.mean()) ** 2).sum()``. The best possible score is 1.0 and it\n can be negative (because the model can be arbitrarily worse). A\n constant model that always predicts the expected value of `y`,\n disregarding the input features, would get a :math:`R^2` score of\n 0.0.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Test samples. For some estimators this may be a precomputed\n kernel matrix or a list of generic objects instead with shape\n ``(n_samples, n_samples_fitted)``, where ``n_samples_fitted``\n is the number of samples used in the fitting for the estimator.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True values for `X`.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n score : float\n :math:`R^2` of ``self.predict(X)`` wrt. `y`.\n\n Notes\n -----\n The :math:`R^2` score used when calling ``score`` on a regressor uses\n ``multioutput='uniform_average'`` from version 0.23 to keep consistent\n with default value of :func:`~sklearn.metrics.r2_score`.\n This influences the ``score`` method of all the multioutput\n regressors (except for\n :class:`~sklearn.multioutput.MultiOutputRegressor`).\n \"\"\"\n\n from .metrics import r2_score\n y_pred = self.predict(X)\n return r2_score(y, y_pred, sample_weight=sample_weight)\n\n def _more_tags(self):\n return {'requires_y': True}", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.base/TransformerMixin", + "name": "TransformerMixin", + "qname": "sklearn.base.TransformerMixin", + "decorators": [], + "superclasses": [], + "methods": ["scikit-learn/sklearn.base/TransformerMixin/fit_transform"], + "is_public": true, + "reexported_by": [], + "description": "Mixin class for all transformers in scikit-learn.", + "docstring": "Mixin class for all transformers in scikit-learn.", + "code": "class TransformerMixin:\n \"\"\"Mixin class for all transformers in scikit-learn.\"\"\"\n\n def fit_transform(self, X, y=None, **fit_params):\n \"\"\"\n Fit to data, then transform it.\n\n Fits transformer to `X` and `y` with optional parameters `fit_params`\n and returns a transformed version of `X`.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Input samples.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs), \\\n default=None\n Target values (None for unsupervised transformations).\n\n **fit_params : dict\n Additional fit parameters.\n\n Returns\n -------\n X_new : ndarray array of shape (n_samples, n_features_new)\n Transformed array.\n \"\"\"\n # non-optimized default implementation; override when a better\n # method is possible for a given clustering algorithm\n if y is None:\n # fit method of arity 1 (unsupervised transformation)\n return self.fit(X, **fit_params).transform(X)\n else:\n # fit method of arity 2 (supervised transformation)\n return self.fit(X, y, **fit_params).transform(X)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.base/_UnstableArchMixin", + "name": "_UnstableArchMixin", + "qname": "sklearn.base._UnstableArchMixin", + "decorators": [], + "superclasses": [], + "methods": ["scikit-learn/sklearn.base/_UnstableArchMixin/_more_tags"], + "is_public": false, + "reexported_by": [], + "description": "Mark estimators that are non-determinstic on 32bit or PowerPC", + "docstring": "Mark estimators that are non-determinstic on 32bit or PowerPC", + "code": "class _UnstableArchMixin:\n \"\"\"Mark estimators that are non-determinstic on 32bit or PowerPC\"\"\"\n def _more_tags(self):\n return {'non_deterministic': (\n _IS_32BIT or platform.machine().startswith(('ppc', 'powerpc')))}", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.calibration/CalibratedClassifierCV", + "name": "CalibratedClassifierCV", + "qname": "sklearn.calibration.CalibratedClassifierCV", + "decorators": [], + "superclasses": ["ClassifierMixin", "MetaEstimatorMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.calibration/CalibratedClassifierCV/__init__", + "scikit-learn/sklearn.calibration/CalibratedClassifierCV/fit", + "scikit-learn/sklearn.calibration/CalibratedClassifierCV/predict_proba", + "scikit-learn/sklearn.calibration/CalibratedClassifierCV/predict", + "scikit-learn/sklearn.calibration/CalibratedClassifierCV/_more_tags" + ], + "is_public": true, + "reexported_by": [], + "description": "Probability calibration with isotonic regression or logistic regression.\n\nThis class uses cross-validation to both estimate the parameters of a\nclassifier and subsequently calibrate a classifier. With default\n`ensemble=True`, for each cv split it\nfits a copy of the base estimator to the training subset, and calibrates it\nusing the testing subset. For prediction, predicted probabilities are\naveraged across these individual calibrated classifiers. When\n`ensemble=False`, cross-validation is used to obtain unbiased predictions,\nvia :func:`~sklearn.model_selection.cross_val_predict`, which are then\nused for calibration. For prediction, the base estimator, trained using all\nthe data, is used. This is the method implemented when `probabilities=True`\nfor :mod:`sklearn.svm` estimators.\n\nAlready fitted classifiers can be calibrated via the parameter\n`cv=\"prefit\"`. In this case, no cross-validation is used and all provided\ndata is used for calibration. The user has to take care manually that data\nfor model fitting and calibration are disjoint.\n\nThe calibration is based on the :term:`decision_function` method of the\n`base_estimator` if it exists, else on :term:`predict_proba`.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Probability calibration with isotonic regression or logistic regression.\n\nThis class uses cross-validation to both estimate the parameters of a\nclassifier and subsequently calibrate a classifier. With default\n`ensemble=True`, for each cv split it\nfits a copy of the base estimator to the training subset, and calibrates it\nusing the testing subset. For prediction, predicted probabilities are\naveraged across these individual calibrated classifiers. When\n`ensemble=False`, cross-validation is used to obtain unbiased predictions,\nvia :func:`~sklearn.model_selection.cross_val_predict`, which are then\nused for calibration. For prediction, the base estimator, trained using all\nthe data, is used. This is the method implemented when `probabilities=True`\nfor :mod:`sklearn.svm` estimators.\n\nAlready fitted classifiers can be calibrated via the parameter\n`cv=\"prefit\"`. In this case, no cross-validation is used and all provided\ndata is used for calibration. The user has to take care manually that data\nfor model fitting and calibration are disjoint.\n\nThe calibration is based on the :term:`decision_function` method of the\n`base_estimator` if it exists, else on :term:`predict_proba`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nbase_estimator : estimator instance, default=None\n The classifier whose output need to be calibrated to provide more\n accurate `predict_proba` outputs. The default classifier is\n a :class:`~sklearn.svm.LinearSVC`.\n\nmethod : {'sigmoid', 'isotonic'}, default='sigmoid'\n The method to use for calibration. Can be 'sigmoid' which\n corresponds to Platt's method (i.e. a logistic regression model) or\n 'isotonic' which is a non-parametric approach. It is not advised to\n use isotonic calibration with too few calibration samples\n ``(<<1000)`` since it tends to overfit.\n\ncv : int, cross-validation generator, iterable or \"prefit\", default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if ``y`` is binary or multiclass,\n :class:`~sklearn.model_selection.StratifiedKFold` is used. If ``y`` is\n neither binary nor multiclass, :class:`~sklearn.model_selection.KFold`\n is used.\n\n Refer to the :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n If \"prefit\" is passed, it is assumed that `base_estimator` has been\n fitted already and all data is used for calibration.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors.\n\n Base estimator clones are fitted in parallel across cross-validation\n iterations. Therefore parallelism happens only when `cv != \"prefit\"`.\n\n See :term:`Glossary ` for more details.\n\n .. versionadded:: 0.24\n\nensemble : bool, default=True\n Determines how the calibrator is fitted when `cv` is not `'prefit'`.\n Ignored if `cv='prefit'`.\n\n If `True`, the `base_estimator` is fitted using training data and\n calibrated using testing data, for each `cv` fold. The final estimator\n is an ensemble of `n_cv` fitted classifer and calibrator pairs, where\n `n_cv` is the number of cross-validation folds. The output is the\n average predicted probabilities of all pairs.\n\n If `False`, `cv` is used to compute unbiased predictions, via\n :func:`~sklearn.model_selection.cross_val_predict`, which are then\n used for calibration. At prediction time, the classifier used is the\n `base_estimator` trained on all the data.\n Note that this method is also internally implemented in\n :mod:`sklearn.svm` estimators with the `probabilities=True` parameter.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n The class labels.\n\ncalibrated_classifiers_ : list (len() equal to cv or 1 if `cv=\"prefit\"` or `ensemble=False`)\n The list of classifier and calibrator pairs.\n\n - When `cv=\"prefit\"`, the fitted `base_estimator` and fitted\n calibrator.\n - When `cv` is not \"prefit\" and `ensemble=True`, `n_cv` fitted\n `base_estimator` and calibrator pairs. `n_cv` is the number of\n cross-validation folds.\n - When `cv` is not \"prefit\" and `ensemble=False`, the `base_estimator`,\n fitted on all the data, and fitted calibrator.\n\n .. versionchanged:: 0.24\n Single calibrated classifier case when `ensemble=False`.\n\nExamples\n--------\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.naive_bayes import GaussianNB\n>>> from sklearn.calibration import CalibratedClassifierCV\n>>> X, y = make_classification(n_samples=100, n_features=2,\n... n_redundant=0, random_state=42)\n>>> base_clf = GaussianNB()\n>>> calibrated_clf = CalibratedClassifierCV(base_estimator=base_clf, cv=3)\n>>> calibrated_clf.fit(X, y)\nCalibratedClassifierCV(base_estimator=GaussianNB(), cv=3)\n>>> len(calibrated_clf.calibrated_classifiers_)\n3\n>>> calibrated_clf.predict_proba(X)[:5, :]\narray([[0.110..., 0.889...],\n [0.072..., 0.927...],\n [0.928..., 0.071...],\n [0.928..., 0.071...],\n [0.071..., 0.928...]])\n\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = make_classification(n_samples=100, n_features=2,\n... n_redundant=0, random_state=42)\n>>> X_train, X_calib, y_train, y_calib = train_test_split(\n... X, y, random_state=42\n... )\n>>> base_clf = GaussianNB()\n>>> base_clf.fit(X_train, y_train)\nGaussianNB()\n>>> calibrated_clf = CalibratedClassifierCV(\n... base_estimator=base_clf,\n... cv=\"prefit\"\n... )\n>>> calibrated_clf.fit(X_calib, y_calib)\nCalibratedClassifierCV(base_estimator=GaussianNB(), cv='prefit')\n>>> len(calibrated_clf.calibrated_classifiers_)\n1\n>>> calibrated_clf.predict_proba([[-0.5, 0.5]])\narray([[0.936..., 0.063...]])\n\nReferences\n----------\n.. [1] Obtaining calibrated probability estimates from decision trees\n and naive Bayesian classifiers, B. Zadrozny & C. Elkan, ICML 2001\n\n.. [2] Transforming Classifier Scores into Accurate Multiclass\n Probability Estimates, B. Zadrozny & C. Elkan, (KDD 2002)\n\n.. [3] Probabilistic Outputs for Support Vector Machines and Comparisons to\n Regularized Likelihood Methods, J. Platt, (1999)\n\n.. [4] Predicting Good Probabilities with Supervised Learning,\n A. Niculescu-Mizil & R. Caruana, ICML 2005", + "code": "class CalibratedClassifierCV(ClassifierMixin,\n MetaEstimatorMixin,\n BaseEstimator):\n \"\"\"Probability calibration with isotonic regression or logistic regression.\n\n This class uses cross-validation to both estimate the parameters of a\n classifier and subsequently calibrate a classifier. With default\n `ensemble=True`, for each cv split it\n fits a copy of the base estimator to the training subset, and calibrates it\n using the testing subset. For prediction, predicted probabilities are\n averaged across these individual calibrated classifiers. When\n `ensemble=False`, cross-validation is used to obtain unbiased predictions,\n via :func:`~sklearn.model_selection.cross_val_predict`, which are then\n used for calibration. For prediction, the base estimator, trained using all\n the data, is used. This is the method implemented when `probabilities=True`\n for :mod:`sklearn.svm` estimators.\n\n Already fitted classifiers can be calibrated via the parameter\n `cv=\"prefit\"`. In this case, no cross-validation is used and all provided\n data is used for calibration. The user has to take care manually that data\n for model fitting and calibration are disjoint.\n\n The calibration is based on the :term:`decision_function` method of the\n `base_estimator` if it exists, else on :term:`predict_proba`.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n base_estimator : estimator instance, default=None\n The classifier whose output need to be calibrated to provide more\n accurate `predict_proba` outputs. The default classifier is\n a :class:`~sklearn.svm.LinearSVC`.\n\n method : {'sigmoid', 'isotonic'}, default='sigmoid'\n The method to use for calibration. Can be 'sigmoid' which\n corresponds to Platt's method (i.e. a logistic regression model) or\n 'isotonic' which is a non-parametric approach. It is not advised to\n use isotonic calibration with too few calibration samples\n ``(<<1000)`` since it tends to overfit.\n\n cv : int, cross-validation generator, iterable or \"prefit\", \\\n default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if ``y`` is binary or multiclass,\n :class:`~sklearn.model_selection.StratifiedKFold` is used. If ``y`` is\n neither binary nor multiclass, :class:`~sklearn.model_selection.KFold`\n is used.\n\n Refer to the :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n If \"prefit\" is passed, it is assumed that `base_estimator` has been\n fitted already and all data is used for calibration.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\n n_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors.\n\n Base estimator clones are fitted in parallel across cross-validation\n iterations. Therefore parallelism happens only when `cv != \"prefit\"`.\n\n See :term:`Glossary ` for more details.\n\n .. versionadded:: 0.24\n\n ensemble : bool, default=True\n Determines how the calibrator is fitted when `cv` is not `'prefit'`.\n Ignored if `cv='prefit'`.\n\n If `True`, the `base_estimator` is fitted using training data and\n calibrated using testing data, for each `cv` fold. The final estimator\n is an ensemble of `n_cv` fitted classifer and calibrator pairs, where\n `n_cv` is the number of cross-validation folds. The output is the\n average predicted probabilities of all pairs.\n\n If `False`, `cv` is used to compute unbiased predictions, via\n :func:`~sklearn.model_selection.cross_val_predict`, which are then\n used for calibration. At prediction time, the classifier used is the\n `base_estimator` trained on all the data.\n Note that this method is also internally implemented in\n :mod:`sklearn.svm` estimators with the `probabilities=True` parameter.\n\n .. versionadded:: 0.24\n\n Attributes\n ----------\n classes_ : ndarray of shape (n_classes,)\n The class labels.\n\n calibrated_classifiers_ : list (len() equal to cv or 1 if `cv=\"prefit\"` \\\n or `ensemble=False`)\n The list of classifier and calibrator pairs.\n\n - When `cv=\"prefit\"`, the fitted `base_estimator` and fitted\n calibrator.\n - When `cv` is not \"prefit\" and `ensemble=True`, `n_cv` fitted\n `base_estimator` and calibrator pairs. `n_cv` is the number of\n cross-validation folds.\n - When `cv` is not \"prefit\" and `ensemble=False`, the `base_estimator`,\n fitted on all the data, and fitted calibrator.\n\n .. versionchanged:: 0.24\n Single calibrated classifier case when `ensemble=False`.\n\n Examples\n --------\n >>> from sklearn.datasets import make_classification\n >>> from sklearn.naive_bayes import GaussianNB\n >>> from sklearn.calibration import CalibratedClassifierCV\n >>> X, y = make_classification(n_samples=100, n_features=2,\n ... n_redundant=0, random_state=42)\n >>> base_clf = GaussianNB()\n >>> calibrated_clf = CalibratedClassifierCV(base_estimator=base_clf, cv=3)\n >>> calibrated_clf.fit(X, y)\n CalibratedClassifierCV(base_estimator=GaussianNB(), cv=3)\n >>> len(calibrated_clf.calibrated_classifiers_)\n 3\n >>> calibrated_clf.predict_proba(X)[:5, :]\n array([[0.110..., 0.889...],\n [0.072..., 0.927...],\n [0.928..., 0.071...],\n [0.928..., 0.071...],\n [0.071..., 0.928...]])\n\n >>> from sklearn.model_selection import train_test_split\n >>> X, y = make_classification(n_samples=100, n_features=2,\n ... n_redundant=0, random_state=42)\n >>> X_train, X_calib, y_train, y_calib = train_test_split(\n ... X, y, random_state=42\n ... )\n >>> base_clf = GaussianNB()\n >>> base_clf.fit(X_train, y_train)\n GaussianNB()\n >>> calibrated_clf = CalibratedClassifierCV(\n ... base_estimator=base_clf,\n ... cv=\"prefit\"\n ... )\n >>> calibrated_clf.fit(X_calib, y_calib)\n CalibratedClassifierCV(base_estimator=GaussianNB(), cv='prefit')\n >>> len(calibrated_clf.calibrated_classifiers_)\n 1\n >>> calibrated_clf.predict_proba([[-0.5, 0.5]])\n array([[0.936..., 0.063...]])\n\n References\n ----------\n .. [1] Obtaining calibrated probability estimates from decision trees\n and naive Bayesian classifiers, B. Zadrozny & C. Elkan, ICML 2001\n\n .. [2] Transforming Classifier Scores into Accurate Multiclass\n Probability Estimates, B. Zadrozny & C. Elkan, (KDD 2002)\n\n .. [3] Probabilistic Outputs for Support Vector Machines and Comparisons to\n Regularized Likelihood Methods, J. Platt, (1999)\n\n .. [4] Predicting Good Probabilities with Supervised Learning,\n A. Niculescu-Mizil & R. Caruana, ICML 2005\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, base_estimator=None, *, method='sigmoid',\n cv=None, n_jobs=None, ensemble=True):\n self.base_estimator = base_estimator\n self.method = method\n self.cv = cv\n self.n_jobs = n_jobs\n self.ensemble = ensemble\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the calibrated model.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\n Returns\n -------\n self : object\n Returns an instance of self.\n \"\"\"\n check_classification_targets(y)\n X, y = indexable(X, y)\n\n if self.base_estimator is None:\n # we want all classifiers that don't expose a random_state\n # to be deterministic (and we don't want to expose this one).\n base_estimator = LinearSVC(random_state=0)\n else:\n base_estimator = self.base_estimator\n\n self.calibrated_classifiers_ = []\n if self.cv == \"prefit\":\n # `classes_` and `n_features_in_` should be consistent with that\n # of base_estimator\n if isinstance(self.base_estimator, Pipeline):\n check_is_fitted(self.base_estimator[-1])\n else:\n check_is_fitted(self.base_estimator)\n with suppress(AttributeError):\n self.n_features_in_ = base_estimator.n_features_in_\n self.classes_ = self.base_estimator.classes_\n\n pred_method = _get_prediction_method(base_estimator)\n n_classes = len(self.classes_)\n predictions = _compute_predictions(pred_method, X, n_classes)\n\n calibrated_classifier = _fit_calibrator(\n base_estimator, predictions, y, self.classes_, self.method,\n sample_weight\n )\n self.calibrated_classifiers_.append(calibrated_classifier)\n else:\n X, y = self._validate_data(\n X, y, accept_sparse=['csc', 'csr', 'coo'],\n force_all_finite=False, allow_nd=True\n )\n # Set `classes_` using all `y`\n label_encoder_ = LabelEncoder().fit(y)\n self.classes_ = label_encoder_.classes_\n n_classes = len(self.classes_)\n\n # sample_weight checks\n fit_parameters = signature(base_estimator.fit).parameters\n supports_sw = \"sample_weight\" in fit_parameters\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X)\n if not supports_sw:\n estimator_name = type(base_estimator).__name__\n warnings.warn(f\"Since {estimator_name} does not support \"\n \"sample_weights, sample weights will only be\"\n \" used for the calibration itself.\")\n\n # Check that each cross-validation fold can have at least one\n # example per class\n if isinstance(self.cv, int):\n n_folds = self.cv\n elif hasattr(self.cv, \"n_splits\"):\n n_folds = self.cv.n_splits\n else:\n n_folds = None\n if n_folds and np.any([np.sum(y == class_) < n_folds\n for class_ in self.classes_]):\n raise ValueError(f\"Requesting {n_folds}-fold \"\n \"cross-validation but provided less than \"\n f\"{n_folds} examples for at least one class.\")\n cv = check_cv(self.cv, y, classifier=True)\n\n if self.ensemble:\n parallel = Parallel(n_jobs=self.n_jobs)\n\n self.calibrated_classifiers_ = parallel(\n delayed(_fit_classifier_calibrator_pair)(\n clone(base_estimator), X, y, train=train, test=test,\n method=self.method, classes=self.classes_,\n supports_sw=supports_sw, sample_weight=sample_weight)\n for train, test in cv.split(X, y)\n )\n else:\n this_estimator = clone(base_estimator)\n method_name = _get_prediction_method(this_estimator).__name__\n pred_method = partial(\n cross_val_predict, estimator=this_estimator, X=X, y=y,\n cv=cv, method=method_name, n_jobs=self.n_jobs\n )\n predictions = _compute_predictions(pred_method, X, n_classes)\n\n if sample_weight is not None and supports_sw:\n this_estimator.fit(X, y, sample_weight)\n else:\n this_estimator.fit(X, y)\n calibrated_classifier = _fit_calibrator(\n this_estimator, predictions, y, self.classes_, self.method,\n sample_weight\n )\n self.calibrated_classifiers_.append(calibrated_classifier)\n\n return self\n\n def predict_proba(self, X):\n \"\"\"Calibrated probabilities of classification.\n\n This function returns calibrated probabilities of classification\n according to each class on an array of test vectors X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The samples.\n\n Returns\n -------\n C : ndarray of shape (n_samples, n_classes)\n The predicted probas.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, accept_sparse=['csc', 'csr', 'coo'],\n force_all_finite=False)\n # Compute the arithmetic mean of the predictions of the calibrated\n # classifiers\n mean_proba = np.zeros((X.shape[0], len(self.classes_)))\n for calibrated_classifier in self.calibrated_classifiers_:\n proba = calibrated_classifier.predict_proba(X)\n mean_proba += proba\n\n mean_proba /= len(self.calibrated_classifiers_)\n\n return mean_proba\n\n def predict(self, X):\n \"\"\"Predict the target of new samples. The predicted class is the\n class that has the highest probability, and can thus be different\n from the prediction of the uncalibrated classifier.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The samples.\n\n Returns\n -------\n C : ndarray of shape (n_samples,)\n The predicted class.\n \"\"\"\n check_is_fitted(self)\n return self.classes_[np.argmax(self.predict_proba(X), axis=1)]\n\n def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }", + "instance_attributes": [ + { + "name": "method", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "ensemble", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "calibrated_classifiers_", + "types": { + "kind": "NamedType", + "name": "list" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.calibration/_CalibratedClassifier", + "name": "_CalibratedClassifier", + "qname": "sklearn.calibration._CalibratedClassifier", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.calibration/_CalibratedClassifier/__init__", + "scikit-learn/sklearn.calibration/_CalibratedClassifier/calibrators_@getter", + "scikit-learn/sklearn.calibration/_CalibratedClassifier/predict_proba" + ], + "is_public": false, + "reexported_by": [], + "description": "Pipeline-like chaining a fitted classifier and its fitted calibrators.", + "docstring": "Pipeline-like chaining a fitted classifier and its fitted calibrators.\n\nParameters\n----------\nbase_estimator : estimator instance\n Fitted classifier.\n\ncalibrators : list of fitted estimator instances\n List of fitted calibrators (either 'IsotonicRegression' or\n '_SigmoidCalibration'). The number of calibrators equals the number of\n classes. However, if there are 2 classes, the list contains only one\n fitted calibrator.\n\nclasses : array-like of shape (n_classes,)\n All the prediction classes.\n\nmethod : {'sigmoid', 'isotonic'}, default='sigmoid'\n The method to use for calibration. Can be 'sigmoid' which\n corresponds to Platt's method or 'isotonic' which is a\n non-parametric approach based on isotonic regression.\n\nAttributes\n----------\ncalibrators_ : list of fitted estimator instances\n Same as `calibrators`. Exposed for backward-compatibility. Use\n `calibrators` instead.\n\n .. deprecated:: 0.24\n `calibrators_` is deprecated from 0.24 and will be removed in\n 1.1 (renaming of 0.26). Use `calibrators` instead.", + "code": "class _CalibratedClassifier:\n \"\"\"Pipeline-like chaining a fitted classifier and its fitted calibrators.\n\n Parameters\n ----------\n base_estimator : estimator instance\n Fitted classifier.\n\n calibrators : list of fitted estimator instances\n List of fitted calibrators (either 'IsotonicRegression' or\n '_SigmoidCalibration'). The number of calibrators equals the number of\n classes. However, if there are 2 classes, the list contains only one\n fitted calibrator.\n\n classes : array-like of shape (n_classes,)\n All the prediction classes.\n\n method : {'sigmoid', 'isotonic'}, default='sigmoid'\n The method to use for calibration. Can be 'sigmoid' which\n corresponds to Platt's method or 'isotonic' which is a\n non-parametric approach based on isotonic regression.\n\n Attributes\n ----------\n calibrators_ : list of fitted estimator instances\n Same as `calibrators`. Exposed for backward-compatibility. Use\n `calibrators` instead.\n\n .. deprecated:: 0.24\n `calibrators_` is deprecated from 0.24 and will be removed in\n 1.1 (renaming of 0.26). Use `calibrators` instead.\n \"\"\"\n def __init__(self, base_estimator, calibrators, *, classes,\n method='sigmoid'):\n self.base_estimator = base_estimator\n self.calibrators = calibrators\n self.classes = classes\n self.method = method\n\n # TODO: Remove in 1.1\n # mypy error: Decorated property not supported\n @deprecated( # type: ignore\n \"calibrators_ is deprecated in 0.24 and will be removed in 1.1\"\n \"(renaming of 0.26). Use calibrators instead.\"\n )\n @property\n def calibrators_(self):\n return self.calibrators\n\n def predict_proba(self, X):\n \"\"\"Calculate calibrated probabilities.\n\n Calculates classification calibrated probabilities\n for each class, in a one-vs-all manner, for `X`.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n The sample data.\n\n Returns\n -------\n proba : array, shape (n_samples, n_classes)\n The predicted probabilities. Can be exact zeros.\n \"\"\"\n n_classes = len(self.classes)\n pred_method = _get_prediction_method(self.base_estimator)\n predictions = _compute_predictions(pred_method, X, n_classes)\n\n label_encoder = LabelEncoder().fit(self.classes)\n pos_class_indices = label_encoder.transform(\n self.base_estimator.classes_\n )\n\n proba = np.zeros((X.shape[0], n_classes))\n for class_idx, this_pred, calibrator in \\\n zip(pos_class_indices, predictions.T, self.calibrators):\n if n_classes == 2:\n # When binary, `predictions` consists only of predictions for\n # clf.classes_[1] but `pos_class_indices` = 0\n class_idx += 1\n proba[:, class_idx] = calibrator.predict(this_pred)\n\n # Normalize the probabilities\n if n_classes == 2:\n proba[:, 0] = 1. - proba[:, 1]\n else:\n proba /= np.sum(proba, axis=1)[:, np.newaxis]\n\n # XXX : for some reason all probas can be 0\n proba[np.isnan(proba)] = 1. / n_classes\n\n # Deal with cases where the predicted probability minimally exceeds 1.0\n proba[(1.0 < proba) & (proba <= 1.0 + 1e-5)] = 1.0\n\n return proba", + "instance_attributes": [ + { + "name": "method", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.calibration/_SigmoidCalibration", + "name": "_SigmoidCalibration", + "qname": "sklearn.calibration._SigmoidCalibration", + "decorators": [], + "superclasses": ["RegressorMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.calibration/_SigmoidCalibration/fit", + "scikit-learn/sklearn.calibration/_SigmoidCalibration/predict" + ], + "is_public": false, + "reexported_by": [], + "description": "Sigmoid regression model.", + "docstring": "Sigmoid regression model.\n\nAttributes\n----------\na_ : float\n The slope.\n\nb_ : float\n The intercept.", + "code": "class _SigmoidCalibration(RegressorMixin, BaseEstimator):\n \"\"\"Sigmoid regression model.\n\n Attributes\n ----------\n a_ : float\n The slope.\n\n b_ : float\n The intercept.\n \"\"\"\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the model using X, y as training data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples,)\n Training data.\n\n y : array-like of shape (n_samples,)\n Training target.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\n Returns\n -------\n self : object\n Returns an instance of self.\n \"\"\"\n X = column_or_1d(X)\n y = column_or_1d(y)\n X, y = indexable(X, y)\n\n self.a_, self.b_ = _sigmoid_calibration(X, y, sample_weight)\n return self\n\n def predict(self, T):\n \"\"\"Predict new data by linear interpolation.\n\n Parameters\n ----------\n T : array-like of shape (n_samples,)\n Data to predict from.\n\n Returns\n -------\n T_ : ndarray of shape (n_samples,)\n The predicted data.\n \"\"\"\n T = column_or_1d(T)\n return expit(-(self.a_ * T + self.b_))", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation", + "name": "AffinityPropagation", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation", + "decorators": [], + "superclasses": ["ClusterMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/__init__", + "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/_pairwise@getter", + "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/_more_tags", + "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/fit", + "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/predict", + "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/fit_predict" + ], + "is_public": false, + "reexported_by": [], + "description": "Perform Affinity Propagation Clustering of data.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Perform Affinity Propagation Clustering of data.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndamping : float, default=0.5\n Damping factor (between 0.5 and 1) is the extent to\n which the current value is maintained relative to\n incoming values (weighted 1 - damping). This in order\n to avoid numerical oscillations when updating these\n values (messages).\n\nmax_iter : int, default=200\n Maximum number of iterations.\n\nconvergence_iter : int, default=15\n Number of iterations with no change in the number\n of estimated clusters that stops the convergence.\n\ncopy : bool, default=True\n Make a copy of input data.\n\npreference : array-like of shape (n_samples,) or float, default=None\n Preferences for each point - points with larger values of\n preferences are more likely to be chosen as exemplars. The number\n of exemplars, ie of clusters, is influenced by the input\n preferences value. If the preferences are not passed as arguments,\n they will be set to the median of the input similarities.\n\naffinity : {'euclidean', 'precomputed'}, default='euclidean'\n Which affinity to use. At the moment 'precomputed' and\n ``euclidean`` are supported. 'euclidean' uses the\n negative squared euclidean distance between points.\n\nverbose : bool, default=False\n Whether to be verbose.\n\nrandom_state : int, RandomState instance or None, default=0\n Pseudo-random number generator to control the starting state.\n Use an int for reproducible results across function calls.\n See the :term:`Glossary `.\n\n .. versionadded:: 0.23\n this parameter was previously hardcoded as 0.\n\nAttributes\n----------\ncluster_centers_indices_ : ndarray of shape (n_clusters,)\n Indices of cluster centers.\n\ncluster_centers_ : ndarray of shape (n_clusters, n_features)\n Cluster centers (if affinity != ``precomputed``).\n\nlabels_ : ndarray of shape (n_samples,)\n Labels of each point.\n\naffinity_matrix_ : ndarray of shape (n_samples, n_samples)\n Stores the affinity matrix used in ``fit``.\n\nn_iter_ : int\n Number of iterations taken to converge.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_affinity_propagation.py\n`.\n\nThe algorithmic complexity of affinity propagation is quadratic\nin the number of points.\n\nWhen ``fit`` does not converge, ``cluster_centers_`` becomes an empty\narray and all training samples will be labelled as ``-1``. In addition,\n``predict`` will then label every sample as ``-1``.\n\nWhen all training samples have equal similarities and equal preferences,\nthe assignment of cluster centers and labels depends on the preference.\nIf the preference is smaller than the similarities, ``fit`` will result in\na single cluster center and label ``0`` for every sample. Otherwise, every\ntraining sample becomes its own cluster center and is assigned a unique\nlabel.\n\nReferences\n----------\n\nBrendan J. Frey and Delbert Dueck, \"Clustering by Passing Messages\nBetween Data Points\", Science Feb. 2007\n\nExamples\n--------\n>>> from sklearn.cluster import AffinityPropagation\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n... [4, 2], [4, 4], [4, 0]])\n>>> clustering = AffinityPropagation(random_state=5).fit(X)\n>>> clustering\nAffinityPropagation(random_state=5)\n>>> clustering.labels_\narray([0, 0, 0, 1, 1, 1])\n>>> clustering.predict([[0, 0], [4, 4]])\narray([0, 1])\n>>> clustering.cluster_centers_\narray([[1, 2],\n [4, 2]])", + "code": "class AffinityPropagation(ClusterMixin, BaseEstimator):\n \"\"\"Perform Affinity Propagation Clustering of data.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n damping : float, default=0.5\n Damping factor (between 0.5 and 1) is the extent to\n which the current value is maintained relative to\n incoming values (weighted 1 - damping). This in order\n to avoid numerical oscillations when updating these\n values (messages).\n\n max_iter : int, default=200\n Maximum number of iterations.\n\n convergence_iter : int, default=15\n Number of iterations with no change in the number\n of estimated clusters that stops the convergence.\n\n copy : bool, default=True\n Make a copy of input data.\n\n preference : array-like of shape (n_samples,) or float, default=None\n Preferences for each point - points with larger values of\n preferences are more likely to be chosen as exemplars. The number\n of exemplars, ie of clusters, is influenced by the input\n preferences value. If the preferences are not passed as arguments,\n they will be set to the median of the input similarities.\n\n affinity : {'euclidean', 'precomputed'}, default='euclidean'\n Which affinity to use. At the moment 'precomputed' and\n ``euclidean`` are supported. 'euclidean' uses the\n negative squared euclidean distance between points.\n\n verbose : bool, default=False\n Whether to be verbose.\n\n random_state : int, RandomState instance or None, default=0\n Pseudo-random number generator to control the starting state.\n Use an int for reproducible results across function calls.\n See the :term:`Glossary `.\n\n .. versionadded:: 0.23\n this parameter was previously hardcoded as 0.\n\n Attributes\n ----------\n cluster_centers_indices_ : ndarray of shape (n_clusters,)\n Indices of cluster centers.\n\n cluster_centers_ : ndarray of shape (n_clusters, n_features)\n Cluster centers (if affinity != ``precomputed``).\n\n labels_ : ndarray of shape (n_samples,)\n Labels of each point.\n\n affinity_matrix_ : ndarray of shape (n_samples, n_samples)\n Stores the affinity matrix used in ``fit``.\n\n n_iter_ : int\n Number of iterations taken to converge.\n\n Notes\n -----\n For an example, see :ref:`examples/cluster/plot_affinity_propagation.py\n `.\n\n The algorithmic complexity of affinity propagation is quadratic\n in the number of points.\n\n When ``fit`` does not converge, ``cluster_centers_`` becomes an empty\n array and all training samples will be labelled as ``-1``. In addition,\n ``predict`` will then label every sample as ``-1``.\n\n When all training samples have equal similarities and equal preferences,\n the assignment of cluster centers and labels depends on the preference.\n If the preference is smaller than the similarities, ``fit`` will result in\n a single cluster center and label ``0`` for every sample. Otherwise, every\n training sample becomes its own cluster center and is assigned a unique\n label.\n\n References\n ----------\n\n Brendan J. Frey and Delbert Dueck, \"Clustering by Passing Messages\n Between Data Points\", Science Feb. 2007\n\n Examples\n --------\n >>> from sklearn.cluster import AffinityPropagation\n >>> import numpy as np\n >>> X = np.array([[1, 2], [1, 4], [1, 0],\n ... [4, 2], [4, 4], [4, 0]])\n >>> clustering = AffinityPropagation(random_state=5).fit(X)\n >>> clustering\n AffinityPropagation(random_state=5)\n >>> clustering.labels_\n array([0, 0, 0, 1, 1, 1])\n >>> clustering.predict([[0, 0], [4, 4]])\n array([0, 1])\n >>> clustering.cluster_centers_\n array([[1, 2],\n [4, 2]])\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, damping=.5, max_iter=200, convergence_iter=15,\n copy=True, preference=None, affinity='euclidean',\n verbose=False, random_state='warn'):\n\n self.damping = damping\n self.max_iter = max_iter\n self.convergence_iter = convergence_iter\n self.copy = copy\n self.verbose = verbose\n self.preference = preference\n self.affinity = affinity\n self.random_state = random_state\n\n # TODO: Remove in 1.1\n # mypy error: Decorated property not supported\n @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n return self.affinity == \"precomputed\"\n\n def _more_tags(self):\n return {'pairwise': self.affinity == 'precomputed'}\n\n def fit(self, X, y=None):\n \"\"\"Fit the clustering from features, or affinity matrix.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features), or \\\n array-like of shape (n_samples, n_samples)\n Training instances to cluster, or similarities / affinities between\n instances if ``affinity='precomputed'``. If a sparse feature matrix\n is provided, it will be converted into a sparse ``csr_matrix``.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n Returns\n -------\n self\n\n \"\"\"\n if self.affinity == \"precomputed\":\n accept_sparse = False\n else:\n accept_sparse = 'csr'\n X = self._validate_data(X, accept_sparse=accept_sparse)\n if self.affinity == \"precomputed\":\n self.affinity_matrix_ = X\n elif self.affinity == \"euclidean\":\n self.affinity_matrix_ = -euclidean_distances(X, squared=True)\n else:\n raise ValueError(\"Affinity must be 'precomputed' or \"\n \"'euclidean'. Got %s instead\"\n % str(self.affinity))\n\n self.cluster_centers_indices_, self.labels_, self.n_iter_ = \\\n affinity_propagation(\n self.affinity_matrix_, preference=self.preference,\n max_iter=self.max_iter,\n convergence_iter=self.convergence_iter, damping=self.damping,\n copy=self.copy, verbose=self.verbose, return_n_iter=True,\n random_state=self.random_state)\n\n if self.affinity != \"precomputed\":\n self.cluster_centers_ = X[self.cluster_centers_indices_].copy()\n\n return self\n\n def predict(self, X):\n \"\"\"Predict the closest cluster each sample in X belongs to.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to predict. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\n Returns\n -------\n labels : ndarray of shape (n_samples,)\n Cluster labels.\n \"\"\"\n check_is_fitted(self)\n X = self._validate_data(X, reset=False)\n if not hasattr(self, \"cluster_centers_\"):\n raise ValueError(\"Predict method is not supported when \"\n \"affinity='precomputed'.\")\n\n if self.cluster_centers_.shape[0] > 0:\n with config_context(assume_finite=True):\n return pairwise_distances_argmin(X, self.cluster_centers_)\n else:\n warnings.warn(\"This model does not have any cluster centers \"\n \"because affinity propagation did not converge. \"\n \"Labeling every sample as '-1'.\", ConvergenceWarning)\n return np.array([-1] * X.shape[0])\n\n def fit_predict(self, X, y=None):\n \"\"\"Fit the clustering from features or affinity matrix, and return\n cluster labels.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features), or \\\n array-like of shape (n_samples, n_samples)\n Training instances to cluster, or similarities / affinities between\n instances if ``affinity='precomputed'``. If a sparse feature matrix\n is provided, it will be converted into a sparse ``csr_matrix``.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n Returns\n -------\n labels : ndarray of shape (n_samples,)\n Cluster labels.\n \"\"\"\n return super().fit_predict(X, y)", + "instance_attributes": [ + { + "name": "damping", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "convergence_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "copy", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "affinity", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "random_state", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/AgglomerativeClustering", + "name": "AgglomerativeClustering", + "qname": "sklearn.cluster._agglomerative.AgglomerativeClustering", + "decorators": [], + "superclasses": ["ClusterMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.cluster._agglomerative/AgglomerativeClustering/__init__", + "scikit-learn/sklearn.cluster._agglomerative/AgglomerativeClustering/fit", + "scikit-learn/sklearn.cluster._agglomerative/AgglomerativeClustering/fit_predict" + ], + "is_public": false, + "reexported_by": [], + "description": "Agglomerative Clustering\n\nRecursively merges the pair of clusters that minimally increases\na given linkage distance.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Agglomerative Clustering\n\nRecursively merges the pair of clusters that minimally increases\na given linkage distance.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_clusters : int or None, default=2\n The number of clusters to find. It must be ``None`` if\n ``distance_threshold`` is not ``None``.\n\naffinity : str or callable, default='euclidean'\n Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\",\n \"manhattan\", \"cosine\", or \"precomputed\".\n If linkage is \"ward\", only \"euclidean\" is accepted.\n If \"precomputed\", a distance matrix (instead of a similarity matrix)\n is needed as input for the fit method.\n\nmemory : str or object with the joblib.Memory interface, default=None\n Used to cache the output of the computation of the tree.\n By default, no caching is done. If a string is given, it is the\n path to the caching directory.\n\nconnectivity : array-like or callable, default=None\n Connectivity matrix. Defines for each sample the neighboring\n samples following a given structure of the data.\n This can be a connectivity matrix itself or a callable that transforms\n the data into a connectivity matrix, such as derived from\n kneighbors_graph. Default is ``None``, i.e, the\n hierarchical clustering algorithm is unstructured.\n\ncompute_full_tree : 'auto' or bool, default='auto'\n Stop early the construction of the tree at ``n_clusters``. This is\n useful to decrease computation time if the number of clusters is not\n small compared to the number of samples. This option is useful only\n when specifying a connectivity matrix. Note also that when varying the\n number of clusters and using caching, it may be advantageous to compute\n the full tree. It must be ``True`` if ``distance_threshold`` is not\n ``None``. By default `compute_full_tree` is \"auto\", which is equivalent\n to `True` when `distance_threshold` is not `None` or that `n_clusters`\n is inferior to the maximum between 100 or `0.02 * n_samples`.\n Otherwise, \"auto\" is equivalent to `False`.\n\nlinkage : {'ward', 'complete', 'average', 'single'}, default='ward'\n Which linkage criterion to use. The linkage criterion determines which\n distance to use between sets of observation. The algorithm will merge\n the pairs of cluster that minimize this criterion.\n\n - 'ward' minimizes the variance of the clusters being merged.\n - 'average' uses the average of the distances of each observation of\n the two sets.\n - 'complete' or 'maximum' linkage uses the maximum distances between\n all observations of the two sets.\n - 'single' uses the minimum of the distances between all observations\n of the two sets.\n\n .. versionadded:: 0.20\n Added the 'single' option\n\ndistance_threshold : float, default=None\n The linkage distance threshold above which, clusters will not be\n merged. If not ``None``, ``n_clusters`` must be ``None`` and\n ``compute_full_tree`` must be ``True``.\n\n .. versionadded:: 0.21\n\ncompute_distances : bool, default=False\n Computes distances between clusters even if `distance_threshold` is not\n used. This can be used to make dendrogram visualization, but introduces\n a computational and memory overhead.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nn_clusters_ : int\n The number of clusters found by the algorithm. If\n ``distance_threshold=None``, it will be equal to the given\n ``n_clusters``.\n\nlabels_ : ndarray of shape (n_samples)\n cluster labels for each point\n\nn_leaves_ : int\n Number of leaves in the hierarchical tree.\n\nn_connected_components_ : int\n The estimated number of connected components in the graph.\n\n .. versionadded:: 0.21\n ``n_connected_components_`` was added to replace ``n_components_``.\n\nchildren_ : array-like of shape (n_samples-1, 2)\n The children of each non-leaf node. Values less than `n_samples`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_samples` is a non-leaf\n node and has children `children_[i - n_samples]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_samples + i`\n\ndistances_ : array-like of shape (n_nodes-1,)\n Distances between nodes in the corresponding place in `children_`.\n Only computed if `distance_threshold` is used or `compute_distances`\n is set to `True`.\n\nExamples\n--------\n>>> from sklearn.cluster import AgglomerativeClustering\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n... [4, 2], [4, 4], [4, 0]])\n>>> clustering = AgglomerativeClustering().fit(X)\n>>> clustering\nAgglomerativeClustering()\n>>> clustering.labels_\narray([1, 1, 1, 0, 0, 0])", + "code": "class AgglomerativeClustering(ClusterMixin, BaseEstimator):\n \"\"\"\n Agglomerative Clustering\n\n Recursively merges the pair of clusters that minimally increases\n a given linkage distance.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_clusters : int or None, default=2\n The number of clusters to find. It must be ``None`` if\n ``distance_threshold`` is not ``None``.\n\n affinity : str or callable, default='euclidean'\n Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\",\n \"manhattan\", \"cosine\", or \"precomputed\".\n If linkage is \"ward\", only \"euclidean\" is accepted.\n If \"precomputed\", a distance matrix (instead of a similarity matrix)\n is needed as input for the fit method.\n\n memory : str or object with the joblib.Memory interface, default=None\n Used to cache the output of the computation of the tree.\n By default, no caching is done. If a string is given, it is the\n path to the caching directory.\n\n connectivity : array-like or callable, default=None\n Connectivity matrix. Defines for each sample the neighboring\n samples following a given structure of the data.\n This can be a connectivity matrix itself or a callable that transforms\n the data into a connectivity matrix, such as derived from\n kneighbors_graph. Default is ``None``, i.e, the\n hierarchical clustering algorithm is unstructured.\n\n compute_full_tree : 'auto' or bool, default='auto'\n Stop early the construction of the tree at ``n_clusters``. This is\n useful to decrease computation time if the number of clusters is not\n small compared to the number of samples. This option is useful only\n when specifying a connectivity matrix. Note also that when varying the\n number of clusters and using caching, it may be advantageous to compute\n the full tree. It must be ``True`` if ``distance_threshold`` is not\n ``None``. By default `compute_full_tree` is \"auto\", which is equivalent\n to `True` when `distance_threshold` is not `None` or that `n_clusters`\n is inferior to the maximum between 100 or `0.02 * n_samples`.\n Otherwise, \"auto\" is equivalent to `False`.\n\n linkage : {'ward', 'complete', 'average', 'single'}, default='ward'\n Which linkage criterion to use. The linkage criterion determines which\n distance to use between sets of observation. The algorithm will merge\n the pairs of cluster that minimize this criterion.\n\n - 'ward' minimizes the variance of the clusters being merged.\n - 'average' uses the average of the distances of each observation of\n the two sets.\n - 'complete' or 'maximum' linkage uses the maximum distances between\n all observations of the two sets.\n - 'single' uses the minimum of the distances between all observations\n of the two sets.\n\n .. versionadded:: 0.20\n Added the 'single' option\n\n distance_threshold : float, default=None\n The linkage distance threshold above which, clusters will not be\n merged. If not ``None``, ``n_clusters`` must be ``None`` and\n ``compute_full_tree`` must be ``True``.\n\n .. versionadded:: 0.21\n\n compute_distances : bool, default=False\n Computes distances between clusters even if `distance_threshold` is not\n used. This can be used to make dendrogram visualization, but introduces\n a computational and memory overhead.\n\n .. versionadded:: 0.24\n\n Attributes\n ----------\n n_clusters_ : int\n The number of clusters found by the algorithm. If\n ``distance_threshold=None``, it will be equal to the given\n ``n_clusters``.\n\n labels_ : ndarray of shape (n_samples)\n cluster labels for each point\n\n n_leaves_ : int\n Number of leaves in the hierarchical tree.\n\n n_connected_components_ : int\n The estimated number of connected components in the graph.\n\n .. versionadded:: 0.21\n ``n_connected_components_`` was added to replace ``n_components_``.\n\n children_ : array-like of shape (n_samples-1, 2)\n The children of each non-leaf node. Values less than `n_samples`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_samples` is a non-leaf\n node and has children `children_[i - n_samples]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_samples + i`\n\n distances_ : array-like of shape (n_nodes-1,)\n Distances between nodes in the corresponding place in `children_`.\n Only computed if `distance_threshold` is used or `compute_distances`\n is set to `True`.\n\n Examples\n --------\n >>> from sklearn.cluster import AgglomerativeClustering\n >>> import numpy as np\n >>> X = np.array([[1, 2], [1, 4], [1, 0],\n ... [4, 2], [4, 4], [4, 0]])\n >>> clustering = AgglomerativeClustering().fit(X)\n >>> clustering\n AgglomerativeClustering()\n >>> clustering.labels_\n array([1, 1, 1, 0, 0, 0])\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_clusters=2, *, affinity=\"euclidean\",\n memory=None,\n connectivity=None, compute_full_tree='auto',\n linkage='ward', distance_threshold=None,\n compute_distances=False):\n self.n_clusters = n_clusters\n self.distance_threshold = distance_threshold\n self.memory = memory\n self.connectivity = connectivity\n self.compute_full_tree = compute_full_tree\n self.linkage = linkage\n self.affinity = affinity\n self.compute_distances = compute_distances\n\n def fit(self, X, y=None):\n \"\"\"Fit the hierarchical clustering from features, or distance matrix.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features) or (n_samples, n_samples)\n Training instances to cluster, or distances between instances if\n ``affinity='precomputed'``.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n Returns\n -------\n self\n \"\"\"\n X = self._validate_data(X, ensure_min_samples=2, estimator=self)\n memory = check_memory(self.memory)\n\n if self.n_clusters is not None and self.n_clusters <= 0:\n raise ValueError(\"n_clusters should be an integer greater than 0.\"\n \" %s was provided.\" % str(self.n_clusters))\n\n if not ((self.n_clusters is None) ^ (self.distance_threshold is None)):\n raise ValueError(\"Exactly one of n_clusters and \"\n \"distance_threshold has to be set, and the other \"\n \"needs to be None.\")\n\n if (self.distance_threshold is not None\n and not self.compute_full_tree):\n raise ValueError(\"compute_full_tree must be True if \"\n \"distance_threshold is set.\")\n\n if self.linkage == \"ward\" and self.affinity != \"euclidean\":\n raise ValueError(\"%s was provided as affinity. Ward can only \"\n \"work with euclidean distances.\" %\n (self.affinity, ))\n\n if self.linkage not in _TREE_BUILDERS:\n raise ValueError(\"Unknown linkage type %s. \"\n \"Valid options are %s\" % (self.linkage,\n _TREE_BUILDERS.keys()))\n tree_builder = _TREE_BUILDERS[self.linkage]\n\n connectivity = self.connectivity\n if self.connectivity is not None:\n if callable(self.connectivity):\n connectivity = self.connectivity(X)\n connectivity = check_array(\n connectivity, accept_sparse=['csr', 'coo', 'lil'])\n\n n_samples = len(X)\n compute_full_tree = self.compute_full_tree\n if self.connectivity is None:\n compute_full_tree = True\n if compute_full_tree == 'auto':\n if self.distance_threshold is not None:\n compute_full_tree = True\n else:\n # Early stopping is likely to give a speed up only for\n # a large number of clusters. The actual threshold\n # implemented here is heuristic\n compute_full_tree = self.n_clusters < max(100, .02 * n_samples)\n n_clusters = self.n_clusters\n if compute_full_tree:\n n_clusters = None\n\n # Construct the tree\n kwargs = {}\n if self.linkage != 'ward':\n kwargs['linkage'] = self.linkage\n kwargs['affinity'] = self.affinity\n\n distance_threshold = self.distance_threshold\n\n return_distance = (\n (distance_threshold is not None) or self.compute_distances\n )\n\n out = memory.cache(tree_builder)(X, connectivity=connectivity,\n n_clusters=n_clusters,\n return_distance=return_distance,\n **kwargs)\n (self.children_,\n self.n_connected_components_,\n self.n_leaves_,\n parents) = out[:4]\n\n if return_distance:\n self.distances_ = out[-1]\n\n if self.distance_threshold is not None: # distance_threshold is used\n self.n_clusters_ = np.count_nonzero(\n self.distances_ >= distance_threshold) + 1\n else: # n_clusters is used\n self.n_clusters_ = self.n_clusters\n\n # Cut the tree\n if compute_full_tree:\n self.labels_ = _hc_cut(self.n_clusters_, self.children_,\n self.n_leaves_)\n else:\n labels = _hierarchical.hc_get_heads(parents, copy=False)\n # copy to avoid holding a reference on the original array\n labels = np.copy(labels[:n_samples])\n # Reassign cluster numbers\n self.labels_ = np.searchsorted(np.unique(labels), labels)\n return self\n\n def fit_predict(self, X, y=None):\n \"\"\"Fit the hierarchical clustering from features or distance matrix,\n and return cluster labels.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or \\\n (n_samples, n_samples)\n Training instances to cluster, or distances between instances if\n ``affinity='precomputed'``.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n Returns\n -------\n labels : ndarray of shape (n_samples,)\n Cluster labels.\n \"\"\"\n return super().fit_predict(X, y)", + "instance_attributes": [ + { + "name": "n_clusters", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "compute_full_tree", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "linkage", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "affinity", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "compute_distances", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "labels_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/FeatureAgglomeration", + "name": "FeatureAgglomeration", + "qname": "sklearn.cluster._agglomerative.FeatureAgglomeration", + "decorators": [], + "superclasses": ["AgglomerativeClustering", "AgglomerationTransform"], + "methods": [ + "scikit-learn/sklearn.cluster._agglomerative/FeatureAgglomeration/__init__", + "scikit-learn/sklearn.cluster._agglomerative/FeatureAgglomeration/fit", + "scikit-learn/sklearn.cluster._agglomerative/FeatureAgglomeration/fit_predict@getter" + ], + "is_public": false, + "reexported_by": [], + "description": "Agglomerate features.\n\nSimilar to AgglomerativeClustering, but recursively merges features\ninstead of samples.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Agglomerate features.\n\nSimilar to AgglomerativeClustering, but recursively merges features\ninstead of samples.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_clusters : int, default=2\n The number of clusters to find. It must be ``None`` if\n ``distance_threshold`` is not ``None``.\n\naffinity : str or callable, default='euclidean'\n Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\",\n \"manhattan\", \"cosine\", or 'precomputed'.\n If linkage is \"ward\", only \"euclidean\" is accepted.\n\nmemory : str or object with the joblib.Memory interface, default=None\n Used to cache the output of the computation of the tree.\n By default, no caching is done. If a string is given, it is the\n path to the caching directory.\n\nconnectivity : array-like or callable, default=None\n Connectivity matrix. Defines for each feature the neighboring\n features following a given structure of the data.\n This can be a connectivity matrix itself or a callable that transforms\n the data into a connectivity matrix, such as derived from\n kneighbors_graph. Default is None, i.e, the\n hierarchical clustering algorithm is unstructured.\n\ncompute_full_tree : 'auto' or bool, default='auto'\n Stop early the construction of the tree at n_clusters. This is useful\n to decrease computation time if the number of clusters is not small\n compared to the number of features. This option is useful only when\n specifying a connectivity matrix. Note also that when varying the\n number of clusters and using caching, it may be advantageous to compute\n the full tree. It must be ``True`` if ``distance_threshold`` is not\n ``None``. By default `compute_full_tree` is \"auto\", which is equivalent\n to `True` when `distance_threshold` is not `None` or that `n_clusters`\n is inferior to the maximum between 100 or `0.02 * n_samples`.\n Otherwise, \"auto\" is equivalent to `False`.\n\nlinkage : {'ward', 'complete', 'average', 'single'}, default='ward'\n Which linkage criterion to use. The linkage criterion determines which\n distance to use between sets of features. The algorithm will merge\n the pairs of cluster that minimize this criterion.\n\n - ward minimizes the variance of the clusters being merged.\n - average uses the average of the distances of each feature of\n the two sets.\n - complete or maximum linkage uses the maximum distances between\n all features of the two sets.\n - single uses the minimum of the distances between all features\n of the two sets.\n\npooling_func : callable, default=np.mean\n This combines the values of agglomerated features into a single\n value, and should accept an array of shape [M, N] and the keyword\n argument `axis=1`, and reduce it to an array of size [M].\n\ndistance_threshold : float, default=None\n The linkage distance threshold above which, clusters will not be\n merged. If not ``None``, ``n_clusters`` must be ``None`` and\n ``compute_full_tree`` must be ``True``.\n\n .. versionadded:: 0.21\n\ncompute_distances : bool, default=False\n Computes distances between clusters even if `distance_threshold` is not\n used. This can be used to make dendrogram visualization, but introduces\n a computational and memory overhead.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nn_clusters_ : int\n The number of clusters found by the algorithm. If\n ``distance_threshold=None``, it will be equal to the given\n ``n_clusters``.\n\nlabels_ : array-like of (n_features,)\n cluster labels for each feature.\n\nn_leaves_ : int\n Number of leaves in the hierarchical tree.\n\nn_connected_components_ : int\n The estimated number of connected components in the graph.\n\n .. versionadded:: 0.21\n ``n_connected_components_`` was added to replace ``n_components_``.\n\nchildren_ : array-like of shape (n_nodes-1, 2)\n The children of each non-leaf node. Values less than `n_features`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_features` is a non-leaf\n node and has children `children_[i - n_features]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_features + i`\n\ndistances_ : array-like of shape (n_nodes-1,)\n Distances between nodes in the corresponding place in `children_`.\n Only computed if `distance_threshold` is used or `compute_distances`\n is set to `True`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import datasets, cluster\n>>> digits = datasets.load_digits()\n>>> images = digits.images\n>>> X = np.reshape(images, (len(images), -1))\n>>> agglo = cluster.FeatureAgglomeration(n_clusters=32)\n>>> agglo.fit(X)\nFeatureAgglomeration(n_clusters=32)\n>>> X_reduced = agglo.transform(X)\n>>> X_reduced.shape\n(1797, 32)", + "code": "class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransform):\n \"\"\"Agglomerate features.\n\n Similar to AgglomerativeClustering, but recursively merges features\n instead of samples.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_clusters : int, default=2\n The number of clusters to find. It must be ``None`` if\n ``distance_threshold`` is not ``None``.\n\n affinity : str or callable, default='euclidean'\n Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\",\n \"manhattan\", \"cosine\", or 'precomputed'.\n If linkage is \"ward\", only \"euclidean\" is accepted.\n\n memory : str or object with the joblib.Memory interface, default=None\n Used to cache the output of the computation of the tree.\n By default, no caching is done. If a string is given, it is the\n path to the caching directory.\n\n connectivity : array-like or callable, default=None\n Connectivity matrix. Defines for each feature the neighboring\n features following a given structure of the data.\n This can be a connectivity matrix itself or a callable that transforms\n the data into a connectivity matrix, such as derived from\n kneighbors_graph. Default is None, i.e, the\n hierarchical clustering algorithm is unstructured.\n\n compute_full_tree : 'auto' or bool, default='auto'\n Stop early the construction of the tree at n_clusters. This is useful\n to decrease computation time if the number of clusters is not small\n compared to the number of features. This option is useful only when\n specifying a connectivity matrix. Note also that when varying the\n number of clusters and using caching, it may be advantageous to compute\n the full tree. It must be ``True`` if ``distance_threshold`` is not\n ``None``. By default `compute_full_tree` is \"auto\", which is equivalent\n to `True` when `distance_threshold` is not `None` or that `n_clusters`\n is inferior to the maximum between 100 or `0.02 * n_samples`.\n Otherwise, \"auto\" is equivalent to `False`.\n\n linkage : {'ward', 'complete', 'average', 'single'}, default='ward'\n Which linkage criterion to use. The linkage criterion determines which\n distance to use between sets of features. The algorithm will merge\n the pairs of cluster that minimize this criterion.\n\n - ward minimizes the variance of the clusters being merged.\n - average uses the average of the distances of each feature of\n the two sets.\n - complete or maximum linkage uses the maximum distances between\n all features of the two sets.\n - single uses the minimum of the distances between all features\n of the two sets.\n\n pooling_func : callable, default=np.mean\n This combines the values of agglomerated features into a single\n value, and should accept an array of shape [M, N] and the keyword\n argument `axis=1`, and reduce it to an array of size [M].\n\n distance_threshold : float, default=None\n The linkage distance threshold above which, clusters will not be\n merged. If not ``None``, ``n_clusters`` must be ``None`` and\n ``compute_full_tree`` must be ``True``.\n\n .. versionadded:: 0.21\n\n compute_distances : bool, default=False\n Computes distances between clusters even if `distance_threshold` is not\n used. This can be used to make dendrogram visualization, but introduces\n a computational and memory overhead.\n\n .. versionadded:: 0.24\n\n Attributes\n ----------\n n_clusters_ : int\n The number of clusters found by the algorithm. If\n ``distance_threshold=None``, it will be equal to the given\n ``n_clusters``.\n\n labels_ : array-like of (n_features,)\n cluster labels for each feature.\n\n n_leaves_ : int\n Number of leaves in the hierarchical tree.\n\n n_connected_components_ : int\n The estimated number of connected components in the graph.\n\n .. versionadded:: 0.21\n ``n_connected_components_`` was added to replace ``n_components_``.\n\n children_ : array-like of shape (n_nodes-1, 2)\n The children of each non-leaf node. Values less than `n_features`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_features` is a non-leaf\n node and has children `children_[i - n_features]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_features + i`\n\n distances_ : array-like of shape (n_nodes-1,)\n Distances between nodes in the corresponding place in `children_`.\n Only computed if `distance_threshold` is used or `compute_distances`\n is set to `True`.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn import datasets, cluster\n >>> digits = datasets.load_digits()\n >>> images = digits.images\n >>> X = np.reshape(images, (len(images), -1))\n >>> agglo = cluster.FeatureAgglomeration(n_clusters=32)\n >>> agglo.fit(X)\n FeatureAgglomeration(n_clusters=32)\n >>> X_reduced = agglo.transform(X)\n >>> X_reduced.shape\n (1797, 32)\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_clusters=2, *, affinity=\"euclidean\",\n memory=None,\n connectivity=None, compute_full_tree='auto',\n linkage='ward', pooling_func=np.mean,\n distance_threshold=None, compute_distances=False):\n super().__init__(\n n_clusters=n_clusters, memory=memory, connectivity=connectivity,\n compute_full_tree=compute_full_tree, linkage=linkage,\n affinity=affinity, distance_threshold=distance_threshold,\n compute_distances=compute_distances)\n self.pooling_func = pooling_func\n\n def fit(self, X, y=None, **params):\n \"\"\"Fit the hierarchical clustering on the data\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data\n\n y : Ignored\n\n Returns\n -------\n self\n \"\"\"\n X = self._validate_data(X, accept_sparse=['csr', 'csc', 'coo'],\n ensure_min_features=2, estimator=self)\n # save n_features_in_ attribute here to reset it after, because it will\n # be overridden in AgglomerativeClustering since we passed it X.T.\n n_features_in_ = self.n_features_in_\n AgglomerativeClustering.fit(self, X.T, **params)\n self.n_features_in_ = n_features_in_\n return self\n\n @property\n def fit_predict(self):\n raise AttributeError", + "instance_attributes": [ + { + "name": "pooling_func", + "types": { + "kind": "NamedType", + "name": "Callable" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral", + "name": "BaseSpectral", + "qname": "sklearn.cluster._bicluster.BaseSpectral", + "decorators": [], + "superclasses": ["BiclusterMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/__init__", + "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/_check_parameters", + "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/fit", + "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/_svd", + "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/_k_means" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for spectral biclustering.", + "docstring": "Base class for spectral biclustering.", + "code": "class BaseSpectral(BiclusterMixin, BaseEstimator, metaclass=ABCMeta):\n \"\"\"Base class for spectral biclustering.\"\"\"\n\n @abstractmethod\n def __init__(self, n_clusters=3, svd_method=\"randomized\",\n n_svd_vecs=None, mini_batch=False, init=\"k-means++\",\n n_init=10, n_jobs='deprecated', random_state=None):\n self.n_clusters = n_clusters\n self.svd_method = svd_method\n self.n_svd_vecs = n_svd_vecs\n self.mini_batch = mini_batch\n self.init = init\n self.n_init = n_init\n self.n_jobs = n_jobs\n self.random_state = random_state\n\n def _check_parameters(self):\n legal_svd_methods = ('randomized', 'arpack')\n if self.svd_method not in legal_svd_methods:\n raise ValueError(\"Unknown SVD method: '{0}'. svd_method must be\"\n \" one of {1}.\".format(self.svd_method,\n legal_svd_methods))\n\n def fit(self, X, y=None):\n \"\"\"Creates a biclustering for X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n y : Ignored\n\n \"\"\"\n if self.n_jobs != 'deprecated':\n warnings.warn(\"'n_jobs' was deprecated in version 0.23 and will be\"\n \" removed in 1.0 (renaming of 0.25).\", FutureWarning)\n\n X = self._validate_data(X, accept_sparse='csr', dtype=np.float64)\n self._check_parameters()\n self._fit(X)\n return self\n\n def _svd(self, array, n_components, n_discard):\n \"\"\"Returns first `n_components` left and right singular\n vectors u and v, discarding the first `n_discard`.\n\n \"\"\"\n if self.svd_method == 'randomized':\n kwargs = {}\n if self.n_svd_vecs is not None:\n kwargs['n_oversamples'] = self.n_svd_vecs\n u, _, vt = randomized_svd(array, n_components,\n random_state=self.random_state,\n **kwargs)\n\n elif self.svd_method == 'arpack':\n u, _, vt = svds(array, k=n_components, ncv=self.n_svd_vecs)\n if np.any(np.isnan(vt)):\n # some eigenvalues of A * A.T are negative, causing\n # sqrt() to be np.nan. This causes some vectors in vt\n # to be np.nan.\n A = safe_sparse_dot(array.T, array)\n random_state = check_random_state(self.random_state)\n # initialize with [-1,1] as in ARPACK\n v0 = random_state.uniform(-1, 1, A.shape[0])\n _, v = eigsh(A, ncv=self.n_svd_vecs, v0=v0)\n vt = v.T\n if np.any(np.isnan(u)):\n A = safe_sparse_dot(array, array.T)\n random_state = check_random_state(self.random_state)\n # initialize with [-1,1] as in ARPACK\n v0 = random_state.uniform(-1, 1, A.shape[0])\n _, u = eigsh(A, ncv=self.n_svd_vecs, v0=v0)\n\n assert_all_finite(u)\n assert_all_finite(vt)\n u = u[:, n_discard:]\n vt = vt[n_discard:]\n return u, vt.T\n\n def _k_means(self, data, n_clusters):\n if self.mini_batch:\n model = MiniBatchKMeans(n_clusters,\n init=self.init,\n n_init=self.n_init,\n random_state=self.random_state)\n else:\n model = KMeans(n_clusters, init=self.init,\n n_init=self.n_init, n_jobs=self.n_jobs,\n random_state=self.random_state)\n model.fit(data)\n centroid = model.cluster_centers_\n labels = model.labels_\n return centroid, labels", + "instance_attributes": [ + { + "name": "n_clusters", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "svd_method", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "mini_batch", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "init", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "n_init", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "n_jobs", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering", + "name": "SpectralBiclustering", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering", + "decorators": [], + "superclasses": ["BaseSpectral"], + "methods": [ + "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/__init__", + "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/_check_parameters", + "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/_fit", + "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/_fit_best_piecewise", + "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/_project_and_cluster" + ], + "is_public": false, + "reexported_by": [], + "description": "Spectral biclustering (Kluger, 2003).\n\nPartitions rows and columns under the assumption that the data has\nan underlying checkerboard structure. For instance, if there are\ntwo row partitions and three column partitions, each row will\nbelong to three biclusters, and each column will belong to two\nbiclusters. The outer product of the corresponding row and column\nlabel vectors gives this checkerboard structure.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Spectral biclustering (Kluger, 2003).\n\nPartitions rows and columns under the assumption that the data has\nan underlying checkerboard structure. For instance, if there are\ntwo row partitions and three column partitions, each row will\nbelong to three biclusters, and each column will belong to two\nbiclusters. The outer product of the corresponding row and column\nlabel vectors gives this checkerboard structure.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_clusters : int or tuple (n_row_clusters, n_column_clusters), default=3\n The number of row and column clusters in the checkerboard\n structure.\n\nmethod : {'bistochastic', 'scale', 'log'}, default='bistochastic'\n Method of normalizing and converting singular vectors into\n biclusters. May be one of 'scale', 'bistochastic', or 'log'.\n The authors recommend using 'log'. If the data is sparse,\n however, log normalization will not work, which is why the\n default is 'bistochastic'.\n\n .. warning::\n if `method='log'`, the data must be sparse.\n\nn_components : int, default=6\n Number of singular vectors to check.\n\nn_best : int, default=3\n Number of best singular vectors to which to project the data\n for clustering.\n\nsvd_method : {'randomized', 'arpack'}, default='randomized'\n Selects the algorithm for finding singular vectors. May be\n 'randomized' or 'arpack'. If 'randomized', uses\n :func:`~sklearn.utils.extmath.randomized_svd`, which may be faster\n for large matrices. If 'arpack', uses\n `scipy.sparse.linalg.svds`, which is more accurate, but\n possibly slower in some cases.\n\nn_svd_vecs : int, default=None\n Number of vectors to use in calculating the SVD. Corresponds\n to `ncv` when `svd_method=arpack` and `n_oversamples` when\n `svd_method` is 'randomized`.\n\nmini_batch : bool, default=False\n Whether to use mini-batch k-means, which is faster but may get\n different results.\n\ninit : {'k-means++', 'random'} or ndarray of (n_clusters, n_features), default='k-means++'\n Method for initialization of k-means algorithm; defaults to\n 'k-means++'.\n\nn_init : int, default=10\n Number of random initializations that are tried with the\n k-means algorithm.\n\n If mini-batch k-means is used, the best initialization is\n chosen and the algorithm runs once. Otherwise, the algorithm\n is run for each initialization and the best solution chosen.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the pairwise matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. deprecated:: 0.23\n ``n_jobs`` was deprecated in version 0.23 and will be removed in\n 1.0 (renaming of 0.25).\n\nrandom_state : int, RandomState instance, default=None\n Used for randomizing the singular value decomposition and the k-means\n initialization. Use an int to make the randomness deterministic.\n See :term:`Glossary `.\n\nAttributes\n----------\nrows_ : array-like of shape (n_row_clusters, n_rows)\n Results of the clustering. `rows[i, r]` is True if\n cluster `i` contains row `r`. Available only after calling ``fit``.\n\ncolumns_ : array-like of shape (n_column_clusters, n_columns)\n Results of the clustering, like `rows`.\n\nrow_labels_ : array-like of shape (n_rows,)\n Row partition labels.\n\ncolumn_labels_ : array-like of shape (n_cols,)\n Column partition labels.\n\nExamples\n--------\n>>> from sklearn.cluster import SpectralBiclustering\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [1, 0],\n... [4, 7], [3, 5], [3, 6]])\n>>> clustering = SpectralBiclustering(n_clusters=2, random_state=0).fit(X)\n>>> clustering.row_labels_\narray([1, 1, 1, 0, 0, 0], dtype=int32)\n>>> clustering.column_labels_\narray([0, 1], dtype=int32)\n>>> clustering\nSpectralBiclustering(n_clusters=2, random_state=0)\n\nReferences\n----------\n\n* Kluger, Yuval, et. al., 2003. `Spectral biclustering of microarray\n data: coclustering genes and conditions\n `__.", + "code": "class SpectralBiclustering(BaseSpectral):\n \"\"\"Spectral biclustering (Kluger, 2003).\n\n Partitions rows and columns under the assumption that the data has\n an underlying checkerboard structure. For instance, if there are\n two row partitions and three column partitions, each row will\n belong to three biclusters, and each column will belong to two\n biclusters. The outer product of the corresponding row and column\n label vectors gives this checkerboard structure.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_clusters : int or tuple (n_row_clusters, n_column_clusters), default=3\n The number of row and column clusters in the checkerboard\n structure.\n\n method : {'bistochastic', 'scale', 'log'}, default='bistochastic'\n Method of normalizing and converting singular vectors into\n biclusters. May be one of 'scale', 'bistochastic', or 'log'.\n The authors recommend using 'log'. If the data is sparse,\n however, log normalization will not work, which is why the\n default is 'bistochastic'.\n\n .. warning::\n if `method='log'`, the data must be sparse.\n\n n_components : int, default=6\n Number of singular vectors to check.\n\n n_best : int, default=3\n Number of best singular vectors to which to project the data\n for clustering.\n\n svd_method : {'randomized', 'arpack'}, default='randomized'\n Selects the algorithm for finding singular vectors. May be\n 'randomized' or 'arpack'. If 'randomized', uses\n :func:`~sklearn.utils.extmath.randomized_svd`, which may be faster\n for large matrices. If 'arpack', uses\n `scipy.sparse.linalg.svds`, which is more accurate, but\n possibly slower in some cases.\n\n n_svd_vecs : int, default=None\n Number of vectors to use in calculating the SVD. Corresponds\n to `ncv` when `svd_method=arpack` and `n_oversamples` when\n `svd_method` is 'randomized`.\n\n mini_batch : bool, default=False\n Whether to use mini-batch k-means, which is faster but may get\n different results.\n\n init : {'k-means++', 'random'} or ndarray of (n_clusters, n_features), \\\n default='k-means++'\n Method for initialization of k-means algorithm; defaults to\n 'k-means++'.\n\n n_init : int, default=10\n Number of random initializations that are tried with the\n k-means algorithm.\n\n If mini-batch k-means is used, the best initialization is\n chosen and the algorithm runs once. Otherwise, the algorithm\n is run for each initialization and the best solution chosen.\n\n n_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the pairwise matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. deprecated:: 0.23\n ``n_jobs`` was deprecated in version 0.23 and will be removed in\n 1.0 (renaming of 0.25).\n\n random_state : int, RandomState instance, default=None\n Used for randomizing the singular value decomposition and the k-means\n initialization. Use an int to make the randomness deterministic.\n See :term:`Glossary `.\n\n Attributes\n ----------\n rows_ : array-like of shape (n_row_clusters, n_rows)\n Results of the clustering. `rows[i, r]` is True if\n cluster `i` contains row `r`. Available only after calling ``fit``.\n\n columns_ : array-like of shape (n_column_clusters, n_columns)\n Results of the clustering, like `rows`.\n\n row_labels_ : array-like of shape (n_rows,)\n Row partition labels.\n\n column_labels_ : array-like of shape (n_cols,)\n Column partition labels.\n\n Examples\n --------\n >>> from sklearn.cluster import SpectralBiclustering\n >>> import numpy as np\n >>> X = np.array([[1, 1], [2, 1], [1, 0],\n ... [4, 7], [3, 5], [3, 6]])\n >>> clustering = SpectralBiclustering(n_clusters=2, random_state=0).fit(X)\n >>> clustering.row_labels_\n array([1, 1, 1, 0, 0, 0], dtype=int32)\n >>> clustering.column_labels_\n array([0, 1], dtype=int32)\n >>> clustering\n SpectralBiclustering(n_clusters=2, random_state=0)\n\n References\n ----------\n\n * Kluger, Yuval, et. al., 2003. `Spectral biclustering of microarray\n data: coclustering genes and conditions\n `__.\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_clusters=3, *, method='bistochastic',\n n_components=6, n_best=3, svd_method='randomized',\n n_svd_vecs=None, mini_batch=False, init='k-means++',\n n_init=10, n_jobs='deprecated', random_state=None):\n super().__init__(n_clusters,\n svd_method,\n n_svd_vecs,\n mini_batch,\n init,\n n_init,\n n_jobs,\n random_state)\n self.method = method\n self.n_components = n_components\n self.n_best = n_best\n\n def _check_parameters(self):\n super()._check_parameters()\n legal_methods = ('bistochastic', 'scale', 'log')\n if self.method not in legal_methods:\n raise ValueError(\"Unknown method: '{0}'. method must be\"\n \" one of {1}.\".format(self.method, legal_methods))\n try:\n int(self.n_clusters)\n except TypeError:\n try:\n r, c = self.n_clusters\n int(r)\n int(c)\n except (ValueError, TypeError) as e:\n raise ValueError(\"Incorrect parameter n_clusters has value:\"\n \" {}. It should either be a single integer\"\n \" or an iterable with two integers:\"\n \" (n_row_clusters, n_column_clusters)\") from e\n if self.n_components < 1:\n raise ValueError(\"Parameter n_components must be greater than 0,\"\n \" but its value is {}\".format(self.n_components))\n if self.n_best < 1:\n raise ValueError(\"Parameter n_best must be greater than 0,\"\n \" but its value is {}\".format(self.n_best))\n if self.n_best > self.n_components:\n raise ValueError(\"n_best cannot be larger than\"\n \" n_components, but {} > {}\"\n \"\".format(self.n_best, self.n_components))\n\n def _fit(self, X):\n n_sv = self.n_components\n if self.method == 'bistochastic':\n normalized_data = _bistochastic_normalize(X)\n n_sv += 1\n elif self.method == 'scale':\n normalized_data, _, _ = _scale_normalize(X)\n n_sv += 1\n elif self.method == 'log':\n normalized_data = _log_normalize(X)\n n_discard = 0 if self.method == 'log' else 1\n u, v = self._svd(normalized_data, n_sv, n_discard)\n ut = u.T\n vt = v.T\n\n try:\n n_row_clusters, n_col_clusters = self.n_clusters\n except TypeError:\n n_row_clusters = n_col_clusters = self.n_clusters\n\n best_ut = self._fit_best_piecewise(ut, self.n_best,\n n_row_clusters)\n\n best_vt = self._fit_best_piecewise(vt, self.n_best,\n n_col_clusters)\n\n self.row_labels_ = self._project_and_cluster(X, best_vt.T,\n n_row_clusters)\n\n self.column_labels_ = self._project_and_cluster(X.T, best_ut.T,\n n_col_clusters)\n\n self.rows_ = np.vstack([self.row_labels_ == label\n for label in range(n_row_clusters)\n for _ in range(n_col_clusters)])\n self.columns_ = np.vstack([self.column_labels_ == label\n for _ in range(n_row_clusters)\n for label in range(n_col_clusters)])\n\n def _fit_best_piecewise(self, vectors, n_best, n_clusters):\n \"\"\"Find the ``n_best`` vectors that are best approximated by piecewise\n constant vectors.\n\n The piecewise vectors are found by k-means; the best is chosen\n according to Euclidean distance.\n\n \"\"\"\n def make_piecewise(v):\n centroid, labels = self._k_means(v.reshape(-1, 1), n_clusters)\n return centroid[labels].ravel()\n piecewise_vectors = np.apply_along_axis(make_piecewise,\n axis=1, arr=vectors)\n dists = np.apply_along_axis(norm, axis=1,\n arr=(vectors - piecewise_vectors))\n result = vectors[np.argsort(dists)[:n_best]]\n return result\n\n def _project_and_cluster(self, data, vectors, n_clusters):\n \"\"\"Project ``data`` to ``vectors`` and cluster the result.\"\"\"\n projected = safe_sparse_dot(data, vectors)\n _, labels = self._k_means(projected, n_clusters)\n return labels", + "instance_attributes": [ + { + "name": "method", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "n_components", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "n_best", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralCoclustering", + "name": "SpectralCoclustering", + "qname": "sklearn.cluster._bicluster.SpectralCoclustering", + "decorators": [], + "superclasses": ["BaseSpectral"], + "methods": [ + "scikit-learn/sklearn.cluster._bicluster/SpectralCoclustering/__init__", + "scikit-learn/sklearn.cluster._bicluster/SpectralCoclustering/_fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Spectral Co-Clustering algorithm (Dhillon, 2001).\n\nClusters rows and columns of an array `X` to solve the relaxed\nnormalized cut of the bipartite graph created from `X` as follows:\nthe edge between row vertex `i` and column vertex `j` has weight\n`X[i, j]`.\n\nThe resulting bicluster structure is block-diagonal, since each\nrow and each column belongs to exactly one bicluster.\n\nSupports sparse matrices, as long as they are nonnegative.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Spectral Co-Clustering algorithm (Dhillon, 2001).\n\nClusters rows and columns of an array `X` to solve the relaxed\nnormalized cut of the bipartite graph created from `X` as follows:\nthe edge between row vertex `i` and column vertex `j` has weight\n`X[i, j]`.\n\nThe resulting bicluster structure is block-diagonal, since each\nrow and each column belongs to exactly one bicluster.\n\nSupports sparse matrices, as long as they are nonnegative.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_clusters : int, default=3\n The number of biclusters to find.\n\nsvd_method : {'randomized', 'arpack'}, default='randomized'\n Selects the algorithm for finding singular vectors. May be\n 'randomized' or 'arpack'. If 'randomized', use\n :func:`sklearn.utils.extmath.randomized_svd`, which may be faster\n for large matrices. If 'arpack', use\n :func:`scipy.sparse.linalg.svds`, which is more accurate, but\n possibly slower in some cases.\n\nn_svd_vecs : int, default=None\n Number of vectors to use in calculating the SVD. Corresponds\n to `ncv` when `svd_method=arpack` and `n_oversamples` when\n `svd_method` is 'randomized`.\n\nmini_batch : bool, default=False\n Whether to use mini-batch k-means, which is faster but may get\n different results.\n\ninit : {'k-means++', 'random', or ndarray of shape (n_clusters, n_features), default='k-means++'\n Method for initialization of k-means algorithm; defaults to\n 'k-means++'.\n\nn_init : int, default=10\n Number of random initializations that are tried with the\n k-means algorithm.\n\n If mini-batch k-means is used, the best initialization is\n chosen and the algorithm runs once. Otherwise, the algorithm\n is run for each initialization and the best solution chosen.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the pairwise matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. deprecated:: 0.23\n ``n_jobs`` was deprecated in version 0.23 and will be removed in\n 1.0 (renaming of 0.25).\n\nrandom_state : int, RandomState instance, default=None\n Used for randomizing the singular value decomposition and the k-means\n initialization. Use an int to make the randomness deterministic.\n See :term:`Glossary `.\n\nAttributes\n----------\nrows_ : array-like of shape (n_row_clusters, n_rows)\n Results of the clustering. `rows[i, r]` is True if\n cluster `i` contains row `r`. Available only after calling ``fit``.\n\ncolumns_ : array-like of shape (n_column_clusters, n_columns)\n Results of the clustering, like `rows`.\n\nrow_labels_ : array-like of shape (n_rows,)\n The bicluster label of each row.\n\ncolumn_labels_ : array-like of shape (n_cols,)\n The bicluster label of each column.\n\nExamples\n--------\n>>> from sklearn.cluster import SpectralCoclustering\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [1, 0],\n... [4, 7], [3, 5], [3, 6]])\n>>> clustering = SpectralCoclustering(n_clusters=2, random_state=0).fit(X)\n>>> clustering.row_labels_ #doctest: +SKIP\narray([0, 1, 1, 0, 0, 0], dtype=int32)\n>>> clustering.column_labels_ #doctest: +SKIP\narray([0, 0], dtype=int32)\n>>> clustering\nSpectralCoclustering(n_clusters=2, random_state=0)\n\nReferences\n----------\n\n* Dhillon, Inderjit S, 2001. `Co-clustering documents and words using\n bipartite spectral graph partitioning\n `__.", + "code": "class SpectralCoclustering(BaseSpectral):\n \"\"\"Spectral Co-Clustering algorithm (Dhillon, 2001).\n\n Clusters rows and columns of an array `X` to solve the relaxed\n normalized cut of the bipartite graph created from `X` as follows:\n the edge between row vertex `i` and column vertex `j` has weight\n `X[i, j]`.\n\n The resulting bicluster structure is block-diagonal, since each\n row and each column belongs to exactly one bicluster.\n\n Supports sparse matrices, as long as they are nonnegative.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_clusters : int, default=3\n The number of biclusters to find.\n\n svd_method : {'randomized', 'arpack'}, default='randomized'\n Selects the algorithm for finding singular vectors. May be\n 'randomized' or 'arpack'. If 'randomized', use\n :func:`sklearn.utils.extmath.randomized_svd`, which may be faster\n for large matrices. If 'arpack', use\n :func:`scipy.sparse.linalg.svds`, which is more accurate, but\n possibly slower in some cases.\n\n n_svd_vecs : int, default=None\n Number of vectors to use in calculating the SVD. Corresponds\n to `ncv` when `svd_method=arpack` and `n_oversamples` when\n `svd_method` is 'randomized`.\n\n mini_batch : bool, default=False\n Whether to use mini-batch k-means, which is faster but may get\n different results.\n\n init : {'k-means++', 'random', or ndarray of shape \\\n (n_clusters, n_features), default='k-means++'\n Method for initialization of k-means algorithm; defaults to\n 'k-means++'.\n\n n_init : int, default=10\n Number of random initializations that are tried with the\n k-means algorithm.\n\n If mini-batch k-means is used, the best initialization is\n chosen and the algorithm runs once. Otherwise, the algorithm\n is run for each initialization and the best solution chosen.\n\n n_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the pairwise matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. deprecated:: 0.23\n ``n_jobs`` was deprecated in version 0.23 and will be removed in\n 1.0 (renaming of 0.25).\n\n random_state : int, RandomState instance, default=None\n Used for randomizing the singular value decomposition and the k-means\n initialization. Use an int to make the randomness deterministic.\n See :term:`Glossary `.\n\n Attributes\n ----------\n rows_ : array-like of shape (n_row_clusters, n_rows)\n Results of the clustering. `rows[i, r]` is True if\n cluster `i` contains row `r`. Available only after calling ``fit``.\n\n columns_ : array-like of shape (n_column_clusters, n_columns)\n Results of the clustering, like `rows`.\n\n row_labels_ : array-like of shape (n_rows,)\n The bicluster label of each row.\n\n column_labels_ : array-like of shape (n_cols,)\n The bicluster label of each column.\n\n Examples\n --------\n >>> from sklearn.cluster import SpectralCoclustering\n >>> import numpy as np\n >>> X = np.array([[1, 1], [2, 1], [1, 0],\n ... [4, 7], [3, 5], [3, 6]])\n >>> clustering = SpectralCoclustering(n_clusters=2, random_state=0).fit(X)\n >>> clustering.row_labels_ #doctest: +SKIP\n array([0, 1, 1, 0, 0, 0], dtype=int32)\n >>> clustering.column_labels_ #doctest: +SKIP\n array([0, 0], dtype=int32)\n >>> clustering\n SpectralCoclustering(n_clusters=2, random_state=0)\n\n References\n ----------\n\n * Dhillon, Inderjit S, 2001. `Co-clustering documents and words using\n bipartite spectral graph partitioning\n `__.\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_clusters=3, *, svd_method='randomized',\n n_svd_vecs=None, mini_batch=False, init='k-means++',\n n_init=10, n_jobs='deprecated', random_state=None):\n super().__init__(n_clusters,\n svd_method,\n n_svd_vecs,\n mini_batch,\n init,\n n_init,\n n_jobs,\n random_state)\n\n def _fit(self, X):\n normalized_data, row_diag, col_diag = _scale_normalize(X)\n n_sv = 1 + int(np.ceil(np.log2(self.n_clusters)))\n u, v = self._svd(normalized_data, n_sv, n_discard=1)\n z = np.vstack((row_diag[:, np.newaxis] * u,\n col_diag[:, np.newaxis] * v))\n\n _, labels = self._k_means(z, self.n_clusters)\n\n n_rows = X.shape[0]\n self.row_labels_ = labels[:n_rows]\n self.column_labels_ = labels[n_rows:]\n\n self.rows_ = np.vstack([self.row_labels_ == c\n for c in range(self.n_clusters)])\n self.columns_ = np.vstack([self.column_labels_ == c\n for c in range(self.n_clusters)])", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.cluster._birch/Birch", + "name": "Birch", + "qname": "sklearn.cluster._birch.Birch", + "decorators": [], + "superclasses": ["ClusterMixin", "TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.cluster._birch/Birch/__init__", + "scikit-learn/sklearn.cluster._birch/Birch/fit", + "scikit-learn/sklearn.cluster._birch/Birch/_fit", + "scikit-learn/sklearn.cluster._birch/Birch/_get_leaves", + "scikit-learn/sklearn.cluster._birch/Birch/partial_fit", + "scikit-learn/sklearn.cluster._birch/Birch/_check_fit", + "scikit-learn/sklearn.cluster._birch/Birch/predict", + "scikit-learn/sklearn.cluster._birch/Birch/transform", + "scikit-learn/sklearn.cluster._birch/Birch/_global_clustering" + ], + "is_public": false, + "reexported_by": [], + "description": "Implements the BIRCH clustering algorithm.\n\nIt is a memory-efficient, online-learning algorithm provided as an\nalternative to :class:`MiniBatchKMeans`. It constructs a tree\ndata structure with the cluster centroids being read off the leaf.\nThese can be either the final cluster centroids or can be provided as input\nto another clustering algorithm such as :class:`AgglomerativeClustering`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.16", + "docstring": "Implements the BIRCH clustering algorithm.\n\nIt is a memory-efficient, online-learning algorithm provided as an\nalternative to :class:`MiniBatchKMeans`. It constructs a tree\ndata structure with the cluster centroids being read off the leaf.\nThese can be either the final cluster centroids or can be provided as input\nto another clustering algorithm such as :class:`AgglomerativeClustering`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.16\n\nParameters\n----------\nthreshold : float, default=0.5\n The radius of the subcluster obtained by merging a new sample and the\n closest subcluster should be lesser than the threshold. Otherwise a new\n subcluster is started. Setting this value to be very low promotes\n splitting and vice-versa.\n\nbranching_factor : int, default=50\n Maximum number of CF subclusters in each node. If a new samples enters\n such that the number of subclusters exceed the branching_factor then\n that node is split into two nodes with the subclusters redistributed\n in each. The parent subcluster of that node is removed and two new\n subclusters are added as parents of the 2 split nodes.\n\nn_clusters : int, instance of sklearn.cluster model, default=3\n Number of clusters after the final clustering step, which treats the\n subclusters from the leaves as new samples.\n\n - `None` : the final clustering step is not performed and the\n subclusters are returned as they are.\n\n - :mod:`sklearn.cluster` Estimator : If a model is provided, the model\n is fit treating the subclusters as new samples and the initial data\n is mapped to the label of the closest subcluster.\n\n - `int` : the model fit is :class:`AgglomerativeClustering` with\n `n_clusters` set to be equal to the int.\n\ncompute_labels : bool, default=True\n Whether or not to compute labels for each fit.\n\ncopy : bool, default=True\n Whether or not to make a copy of the given data. If set to False,\n the initial data will be overwritten.\n\nAttributes\n----------\nroot_ : _CFNode\n Root of the CFTree.\n\ndummy_leaf_ : _CFNode\n Start pointer to all the leaves.\n\nsubcluster_centers_ : ndarray\n Centroids of all subclusters read directly from the leaves.\n\nsubcluster_labels_ : ndarray\n Labels assigned to the centroids of the subclusters after\n they are clustered globally.\n\nlabels_ : ndarray of shape (n_samples,)\n Array of labels assigned to the input data.\n if partial_fit is used instead of fit, they are assigned to the\n last batch of data.\n\nSee Also\n--------\nMiniBatchKMeans : Alternative implementation that does incremental updates\n of the centers' positions using mini-batches.\n\nNotes\n-----\nThe tree data structure consists of nodes with each node consisting of\na number of subclusters. The maximum number of subclusters in a node\nis determined by the branching factor. Each subcluster maintains a\nlinear sum, squared sum and the number of samples in that subcluster.\nIn addition, each subcluster can also have a node as its child, if the\nsubcluster is not a member of a leaf node.\n\nFor a new point entering the root, it is merged with the subcluster closest\nto it and the linear sum, squared sum and the number of samples of that\nsubcluster are updated. This is done recursively till the properties of\nthe leaf node are updated.\n\nReferences\n----------\n* Tian Zhang, Raghu Ramakrishnan, Maron Livny\n BIRCH: An efficient data clustering method for large databases.\n https://www.cs.sfu.ca/CourseCentral/459/han/papers/zhang96.pdf\n\n* Roberto Perdisci\n JBirch - Java implementation of BIRCH clustering algorithm\n https://code.google.com/archive/p/jbirch\n\nExamples\n--------\n>>> from sklearn.cluster import Birch\n>>> X = [[0, 1], [0.3, 1], [-0.3, 1], [0, -1], [0.3, -1], [-0.3, -1]]\n>>> brc = Birch(n_clusters=None)\n>>> brc.fit(X)\nBirch(n_clusters=None)\n>>> brc.predict(X)\narray([0, 0, 0, 1, 1, 1])", + "code": "class Birch(ClusterMixin, TransformerMixin, BaseEstimator):\n \"\"\"Implements the BIRCH clustering algorithm.\n\n It is a memory-efficient, online-learning algorithm provided as an\n alternative to :class:`MiniBatchKMeans`. It constructs a tree\n data structure with the cluster centroids being read off the leaf.\n These can be either the final cluster centroids or can be provided as input\n to another clustering algorithm such as :class:`AgglomerativeClustering`.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.16\n\n Parameters\n ----------\n threshold : float, default=0.5\n The radius of the subcluster obtained by merging a new sample and the\n closest subcluster should be lesser than the threshold. Otherwise a new\n subcluster is started. Setting this value to be very low promotes\n splitting and vice-versa.\n\n branching_factor : int, default=50\n Maximum number of CF subclusters in each node. If a new samples enters\n such that the number of subclusters exceed the branching_factor then\n that node is split into two nodes with the subclusters redistributed\n in each. The parent subcluster of that node is removed and two new\n subclusters are added as parents of the 2 split nodes.\n\n n_clusters : int, instance of sklearn.cluster model, default=3\n Number of clusters after the final clustering step, which treats the\n subclusters from the leaves as new samples.\n\n - `None` : the final clustering step is not performed and the\n subclusters are returned as they are.\n\n - :mod:`sklearn.cluster` Estimator : If a model is provided, the model\n is fit treating the subclusters as new samples and the initial data\n is mapped to the label of the closest subcluster.\n\n - `int` : the model fit is :class:`AgglomerativeClustering` with\n `n_clusters` set to be equal to the int.\n\n compute_labels : bool, default=True\n Whether or not to compute labels for each fit.\n\n copy : bool, default=True\n Whether or not to make a copy of the given data. If set to False,\n the initial data will be overwritten.\n\n Attributes\n ----------\n root_ : _CFNode\n Root of the CFTree.\n\n dummy_leaf_ : _CFNode\n Start pointer to all the leaves.\n\n subcluster_centers_ : ndarray\n Centroids of all subclusters read directly from the leaves.\n\n subcluster_labels_ : ndarray\n Labels assigned to the centroids of the subclusters after\n they are clustered globally.\n\n labels_ : ndarray of shape (n_samples,)\n Array of labels assigned to the input data.\n if partial_fit is used instead of fit, they are assigned to the\n last batch of data.\n\n See Also\n --------\n MiniBatchKMeans : Alternative implementation that does incremental updates\n of the centers' positions using mini-batches.\n\n Notes\n -----\n The tree data structure consists of nodes with each node consisting of\n a number of subclusters. The maximum number of subclusters in a node\n is determined by the branching factor. Each subcluster maintains a\n linear sum, squared sum and the number of samples in that subcluster.\n In addition, each subcluster can also have a node as its child, if the\n subcluster is not a member of a leaf node.\n\n For a new point entering the root, it is merged with the subcluster closest\n to it and the linear sum, squared sum and the number of samples of that\n subcluster are updated. This is done recursively till the properties of\n the leaf node are updated.\n\n References\n ----------\n * Tian Zhang, Raghu Ramakrishnan, Maron Livny\n BIRCH: An efficient data clustering method for large databases.\n https://www.cs.sfu.ca/CourseCentral/459/han/papers/zhang96.pdf\n\n * Roberto Perdisci\n JBirch - Java implementation of BIRCH clustering algorithm\n https://code.google.com/archive/p/jbirch\n\n Examples\n --------\n >>> from sklearn.cluster import Birch\n >>> X = [[0, 1], [0.3, 1], [-0.3, 1], [0, -1], [0.3, -1], [-0.3, -1]]\n >>> brc = Birch(n_clusters=None)\n >>> brc.fit(X)\n Birch(n_clusters=None)\n >>> brc.predict(X)\n array([0, 0, 0, 1, 1, 1])\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, threshold=0.5, branching_factor=50, n_clusters=3,\n compute_labels=True, copy=True):\n self.threshold = threshold\n self.branching_factor = branching_factor\n self.n_clusters = n_clusters\n self.compute_labels = compute_labels\n self.copy = copy\n\n def fit(self, X, y=None):\n \"\"\"\n Build a CF Tree for the input data.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n Returns\n -------\n self\n Fitted estimator.\n \"\"\"\n self.fit_, self.partial_fit_ = True, False\n return self._fit(X)\n\n def _fit(self, X):\n has_root = getattr(self, 'root_', None)\n first_call = self.fit_ or (self.partial_fit_ and not has_root)\n\n X = self._validate_data(X, accept_sparse='csr', copy=self.copy,\n reset=first_call)\n threshold = self.threshold\n branching_factor = self.branching_factor\n\n if branching_factor <= 1:\n raise ValueError(\"Branching_factor should be greater than one.\")\n n_samples, n_features = X.shape\n\n # If partial_fit is called for the first time or fit is called, we\n # start a new tree.\n if first_call:\n # The first root is the leaf. Manipulate this object throughout.\n self.root_ = _CFNode(threshold=threshold,\n branching_factor=branching_factor,\n is_leaf=True,\n n_features=n_features)\n\n # To enable getting back subclusters.\n self.dummy_leaf_ = _CFNode(threshold=threshold,\n branching_factor=branching_factor,\n is_leaf=True, n_features=n_features)\n self.dummy_leaf_.next_leaf_ = self.root_\n self.root_.prev_leaf_ = self.dummy_leaf_\n\n # Cannot vectorize. Enough to convince to use cython.\n if not sparse.issparse(X):\n iter_func = iter\n else:\n iter_func = _iterate_sparse_X\n\n for sample in iter_func(X):\n subcluster = _CFSubcluster(linear_sum=sample)\n split = self.root_.insert_cf_subcluster(subcluster)\n\n if split:\n new_subcluster1, new_subcluster2 = _split_node(\n self.root_, threshold, branching_factor)\n del self.root_\n self.root_ = _CFNode(threshold=threshold,\n branching_factor=branching_factor,\n is_leaf=False,\n n_features=n_features)\n self.root_.append_subcluster(new_subcluster1)\n self.root_.append_subcluster(new_subcluster2)\n\n centroids = np.concatenate([\n leaf.centroids_ for leaf in self._get_leaves()])\n self.subcluster_centers_ = centroids\n\n self._global_clustering(X)\n return self\n\n def _get_leaves(self):\n \"\"\"\n Retrieve the leaves of the CF Node.\n\n Returns\n -------\n leaves : list of shape (n_leaves,)\n List of the leaf nodes.\n \"\"\"\n leaf_ptr = self.dummy_leaf_.next_leaf_\n leaves = []\n while leaf_ptr is not None:\n leaves.append(leaf_ptr)\n leaf_ptr = leaf_ptr.next_leaf_\n return leaves\n\n def partial_fit(self, X=None, y=None):\n \"\"\"\n Online learning. Prevents rebuilding of CFTree from scratch.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features), \\\n default=None\n Input data. If X is not provided, only the global clustering\n step is done.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n Returns\n -------\n self\n Fitted estimator.\n \"\"\"\n self.partial_fit_, self.fit_ = True, False\n if X is None:\n # Perform just the final global clustering step.\n self._global_clustering()\n return self\n else:\n return self._fit(X)\n\n def _check_fit(self, X):\n check_is_fitted(self)\n\n if (hasattr(self, 'subcluster_centers_') and\n X.shape[1] != self.subcluster_centers_.shape[1]):\n raise ValueError(\n \"Training data and predicted data do \"\n \"not have same number of features.\")\n\n def predict(self, X):\n \"\"\"\n Predict data using the ``centroids_`` of subclusters.\n\n Avoid computation of the row norms of X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data.\n\n Returns\n -------\n labels : ndarray of shape(n_samples,)\n Labelled data.\n \"\"\"\n check_is_fitted(self)\n X = self._validate_data(X, accept_sparse='csr', reset=False)\n kwargs = {'Y_norm_squared': self._subcluster_norms}\n\n with config_context(assume_finite=True):\n argmin = pairwise_distances_argmin(X, self.subcluster_centers_,\n metric_kwargs=kwargs)\n return self.subcluster_labels_[argmin]\n\n def transform(self, X):\n \"\"\"\n Transform X into subcluster centroids dimension.\n\n Each dimension represents the distance from the sample point to each\n cluster centroid.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data.\n\n Returns\n -------\n X_trans : {array-like, sparse matrix} of shape (n_samples, n_clusters)\n Transformed data.\n \"\"\"\n check_is_fitted(self)\n self._validate_data(X, accept_sparse='csr', reset=False)\n with config_context(assume_finite=True):\n return euclidean_distances(X, self.subcluster_centers_)\n\n def _global_clustering(self, X=None):\n \"\"\"\n Global clustering for the subclusters obtained after fitting\n \"\"\"\n clusterer = self.n_clusters\n centroids = self.subcluster_centers_\n compute_labels = (X is not None) and self.compute_labels\n\n # Preprocessing for the global clustering.\n not_enough_centroids = False\n if isinstance(clusterer, numbers.Integral):\n clusterer = AgglomerativeClustering(\n n_clusters=self.n_clusters)\n # There is no need to perform the global clustering step.\n if len(centroids) < self.n_clusters:\n not_enough_centroids = True\n elif (clusterer is not None and not\n hasattr(clusterer, 'fit_predict')):\n raise ValueError(\"n_clusters should be an instance of \"\n \"ClusterMixin or an int\")\n\n # To use in predict to avoid recalculation.\n self._subcluster_norms = row_norms(\n self.subcluster_centers_, squared=True)\n\n if clusterer is None or not_enough_centroids:\n self.subcluster_labels_ = np.arange(len(centroids))\n if not_enough_centroids:\n warnings.warn(\n \"Number of subclusters found (%d) by BIRCH is less \"\n \"than (%d). Decrease the threshold.\"\n % (len(centroids), self.n_clusters), ConvergenceWarning)\n else:\n # The global clustering step that clusters the subclusters of\n # the leaves. It assumes the centroids of the subclusters as\n # samples and finds the final centroids.\n self.subcluster_labels_ = clusterer.fit_predict(\n self.subcluster_centers_)\n\n if compute_labels:\n self.labels_ = self.predict(X)", + "instance_attributes": [ + { + "name": "threshold", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "branching_factor", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "n_clusters", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "compute_labels", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "copy", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "root_", + "types": { + "kind": "NamedType", + "name": "_CFNode" + } + }, + { + "name": "dummy_leaf_", + "types": { + "kind": "NamedType", + "name": "_CFNode" + } + }, + { + "name": "subcluster_centers_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_CFNode", + "name": "_CFNode", + "qname": "sklearn.cluster._birch._CFNode", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.cluster._birch/_CFNode/__init__", + "scikit-learn/sklearn.cluster._birch/_CFNode/append_subcluster", + "scikit-learn/sklearn.cluster._birch/_CFNode/update_split_subclusters", + "scikit-learn/sklearn.cluster._birch/_CFNode/insert_cf_subcluster" + ], + "is_public": false, + "reexported_by": [], + "description": "Each node in a CFTree is called a CFNode.\n\nThe CFNode can have a maximum of branching_factor\nnumber of CFSubclusters.", + "docstring": "Each node in a CFTree is called a CFNode.\n\nThe CFNode can have a maximum of branching_factor\nnumber of CFSubclusters.\n\nParameters\n----------\nthreshold : float\n Threshold needed for a new subcluster to enter a CFSubcluster.\n\nbranching_factor : int\n Maximum number of CF subclusters in each node.\n\nis_leaf : bool\n We need to know if the CFNode is a leaf or not, in order to\n retrieve the final subclusters.\n\nn_features : int\n The number of features.\n\nAttributes\n----------\nsubclusters_ : list\n List of subclusters for a particular CFNode.\n\nprev_leaf_ : _CFNode\n Useful only if is_leaf is True.\n\nnext_leaf_ : _CFNode\n next_leaf. Useful only if is_leaf is True.\n the final subclusters.\n\ninit_centroids_ : ndarray of shape (branching_factor + 1, n_features)\n Manipulate ``init_centroids_`` throughout rather than centroids_ since\n the centroids are just a view of the ``init_centroids_`` .\n\ninit_sq_norm_ : ndarray of shape (branching_factor + 1,)\n manipulate init_sq_norm_ throughout. similar to ``init_centroids_``.\n\ncentroids_ : ndarray of shape (branching_factor + 1, n_features)\n View of ``init_centroids_``.\n\nsquared_norm_ : ndarray of shape (branching_factor + 1,)\n View of ``init_sq_norm_``.", + "code": "class _CFNode:\n \"\"\"Each node in a CFTree is called a CFNode.\n\n The CFNode can have a maximum of branching_factor\n number of CFSubclusters.\n\n Parameters\n ----------\n threshold : float\n Threshold needed for a new subcluster to enter a CFSubcluster.\n\n branching_factor : int\n Maximum number of CF subclusters in each node.\n\n is_leaf : bool\n We need to know if the CFNode is a leaf or not, in order to\n retrieve the final subclusters.\n\n n_features : int\n The number of features.\n\n Attributes\n ----------\n subclusters_ : list\n List of subclusters for a particular CFNode.\n\n prev_leaf_ : _CFNode\n Useful only if is_leaf is True.\n\n next_leaf_ : _CFNode\n next_leaf. Useful only if is_leaf is True.\n the final subclusters.\n\n init_centroids_ : ndarray of shape (branching_factor + 1, n_features)\n Manipulate ``init_centroids_`` throughout rather than centroids_ since\n the centroids are just a view of the ``init_centroids_`` .\n\n init_sq_norm_ : ndarray of shape (branching_factor + 1,)\n manipulate init_sq_norm_ throughout. similar to ``init_centroids_``.\n\n centroids_ : ndarray of shape (branching_factor + 1, n_features)\n View of ``init_centroids_``.\n\n squared_norm_ : ndarray of shape (branching_factor + 1,)\n View of ``init_sq_norm_``.\n\n \"\"\"\n def __init__(self, *, threshold, branching_factor, is_leaf, n_features):\n self.threshold = threshold\n self.branching_factor = branching_factor\n self.is_leaf = is_leaf\n self.n_features = n_features\n\n # The list of subclusters, centroids and squared norms\n # to manipulate throughout.\n self.subclusters_ = []\n self.init_centroids_ = np.zeros((branching_factor + 1, n_features))\n self.init_sq_norm_ = np.zeros((branching_factor + 1))\n self.squared_norm_ = []\n self.prev_leaf_ = None\n self.next_leaf_ = None\n\n def append_subcluster(self, subcluster):\n n_samples = len(self.subclusters_)\n self.subclusters_.append(subcluster)\n self.init_centroids_[n_samples] = subcluster.centroid_\n self.init_sq_norm_[n_samples] = subcluster.sq_norm_\n\n # Keep centroids and squared norm as views. In this way\n # if we change init_centroids and init_sq_norm_, it is\n # sufficient,\n self.centroids_ = self.init_centroids_[:n_samples + 1, :]\n self.squared_norm_ = self.init_sq_norm_[:n_samples + 1]\n\n def update_split_subclusters(self, subcluster,\n new_subcluster1, new_subcluster2):\n \"\"\"Remove a subcluster from a node and update it with the\n split subclusters.\n \"\"\"\n ind = self.subclusters_.index(subcluster)\n self.subclusters_[ind] = new_subcluster1\n self.init_centroids_[ind] = new_subcluster1.centroid_\n self.init_sq_norm_[ind] = new_subcluster1.sq_norm_\n self.append_subcluster(new_subcluster2)\n\n def insert_cf_subcluster(self, subcluster):\n \"\"\"Insert a new subcluster into the node.\"\"\"\n if not self.subclusters_:\n self.append_subcluster(subcluster)\n return False\n\n threshold = self.threshold\n branching_factor = self.branching_factor\n # We need to find the closest subcluster among all the\n # subclusters so that we can insert our new subcluster.\n dist_matrix = np.dot(self.centroids_, subcluster.centroid_)\n dist_matrix *= -2.\n dist_matrix += self.squared_norm_\n closest_index = np.argmin(dist_matrix)\n closest_subcluster = self.subclusters_[closest_index]\n\n # If the subcluster has a child, we need a recursive strategy.\n if closest_subcluster.child_ is not None:\n split_child = closest_subcluster.child_.insert_cf_subcluster(\n subcluster)\n\n if not split_child:\n # If it is determined that the child need not be split, we\n # can just update the closest_subcluster\n closest_subcluster.update(subcluster)\n self.init_centroids_[closest_index] = \\\n self.subclusters_[closest_index].centroid_\n self.init_sq_norm_[closest_index] = \\\n self.subclusters_[closest_index].sq_norm_\n return False\n\n # things not too good. we need to redistribute the subclusters in\n # our child node, and add a new subcluster in the parent\n # subcluster to accommodate the new child.\n else:\n new_subcluster1, new_subcluster2 = _split_node(\n closest_subcluster.child_, threshold, branching_factor)\n self.update_split_subclusters(\n closest_subcluster, new_subcluster1, new_subcluster2)\n\n if len(self.subclusters_) > self.branching_factor:\n return True\n return False\n\n # good to go!\n else:\n merged = closest_subcluster.merge_subcluster(\n subcluster, self.threshold)\n if merged:\n self.init_centroids_[closest_index] = \\\n closest_subcluster.centroid_\n self.init_sq_norm_[closest_index] = \\\n closest_subcluster.sq_norm_\n return False\n\n # not close to any other subclusters, and we still\n # have space, so add.\n elif len(self.subclusters_) < self.branching_factor:\n self.append_subcluster(subcluster)\n return False\n\n # We do not have enough space nor is it closer to an\n # other subcluster. We need to split.\n else:\n self.append_subcluster(subcluster)\n return True", + "instance_attributes": [ + { + "name": "subclusters_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "init_centroids_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "init_sq_norm_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "squared_norm_", + "types": { + "kind": "NamedType", + "name": "list" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_CFSubcluster", + "name": "_CFSubcluster", + "qname": "sklearn.cluster._birch._CFSubcluster", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.cluster._birch/_CFSubcluster/__init__", + "scikit-learn/sklearn.cluster._birch/_CFSubcluster/update", + "scikit-learn/sklearn.cluster._birch/_CFSubcluster/merge_subcluster", + "scikit-learn/sklearn.cluster._birch/_CFSubcluster/radius@getter" + ], + "is_public": false, + "reexported_by": [], + "description": "Each subcluster in a CFNode is called a CFSubcluster.\n\nA CFSubcluster can have a CFNode has its child.", + "docstring": "Each subcluster in a CFNode is called a CFSubcluster.\n\nA CFSubcluster can have a CFNode has its child.\n\nParameters\n----------\nlinear_sum : ndarray of shape (n_features,), default=None\n Sample. This is kept optional to allow initialization of empty\n subclusters.\n\nAttributes\n----------\nn_samples_ : int\n Number of samples that belong to each subcluster.\n\nlinear_sum_ : ndarray\n Linear sum of all the samples in a subcluster. Prevents holding\n all sample data in memory.\n\nsquared_sum_ : float\n Sum of the squared l2 norms of all samples belonging to a subcluster.\n\ncentroid_ : ndarray of shape (branching_factor + 1, n_features)\n Centroid of the subcluster. Prevent recomputing of centroids when\n ``CFNode.centroids_`` is called.\n\nchild_ : _CFNode\n Child Node of the subcluster. Once a given _CFNode is set as the child\n of the _CFNode, it is set to ``self.child_``.\n\nsq_norm_ : ndarray of shape (branching_factor + 1,)\n Squared norm of the subcluster. Used to prevent recomputing when\n pairwise minimum distances are computed.", + "code": "class _CFSubcluster:\n \"\"\"Each subcluster in a CFNode is called a CFSubcluster.\n\n A CFSubcluster can have a CFNode has its child.\n\n Parameters\n ----------\n linear_sum : ndarray of shape (n_features,), default=None\n Sample. This is kept optional to allow initialization of empty\n subclusters.\n\n Attributes\n ----------\n n_samples_ : int\n Number of samples that belong to each subcluster.\n\n linear_sum_ : ndarray\n Linear sum of all the samples in a subcluster. Prevents holding\n all sample data in memory.\n\n squared_sum_ : float\n Sum of the squared l2 norms of all samples belonging to a subcluster.\n\n centroid_ : ndarray of shape (branching_factor + 1, n_features)\n Centroid of the subcluster. Prevent recomputing of centroids when\n ``CFNode.centroids_`` is called.\n\n child_ : _CFNode\n Child Node of the subcluster. Once a given _CFNode is set as the child\n of the _CFNode, it is set to ``self.child_``.\n\n sq_norm_ : ndarray of shape (branching_factor + 1,)\n Squared norm of the subcluster. Used to prevent recomputing when\n pairwise minimum distances are computed.\n \"\"\"\n def __init__(self, *, linear_sum=None):\n if linear_sum is None:\n self.n_samples_ = 0\n self.squared_sum_ = 0.0\n self.centroid_ = self.linear_sum_ = 0\n else:\n self.n_samples_ = 1\n self.centroid_ = self.linear_sum_ = linear_sum\n self.squared_sum_ = self.sq_norm_ = np.dot(\n self.linear_sum_, self.linear_sum_)\n self.child_ = None\n\n def update(self, subcluster):\n self.n_samples_ += subcluster.n_samples_\n self.linear_sum_ += subcluster.linear_sum_\n self.squared_sum_ += subcluster.squared_sum_\n self.centroid_ = self.linear_sum_ / self.n_samples_\n self.sq_norm_ = np.dot(self.centroid_, self.centroid_)\n\n def merge_subcluster(self, nominee_cluster, threshold):\n \"\"\"Check if a cluster is worthy enough to be merged. If\n yes then merge.\n \"\"\"\n new_ss = self.squared_sum_ + nominee_cluster.squared_sum_\n new_ls = self.linear_sum_ + nominee_cluster.linear_sum_\n new_n = self.n_samples_ + nominee_cluster.n_samples_\n new_centroid = (1 / new_n) * new_ls\n new_norm = np.dot(new_centroid, new_centroid)\n dot_product = (-2 * new_n) * new_norm\n sq_radius = (new_ss + dot_product) / new_n + new_norm\n if sq_radius <= threshold ** 2:\n (self.n_samples_, self.linear_sum_, self.squared_sum_,\n self.centroid_, self.sq_norm_) = \\\n new_n, new_ls, new_ss, new_centroid, new_norm\n return True\n return False\n\n @property\n def radius(self):\n \"\"\"Return radius of the subcluster\"\"\"\n dot_product = -2 * np.dot(self.linear_sum_, self.centroid_)\n return sqrt(\n ((self.squared_sum_ + dot_product) / self.n_samples_) +\n self.sq_norm_)", + "instance_attributes": [ + { + "name": "n_samples_", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "squared_sum_", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "centroid_", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "linear_sum_", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "sq_norm_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/DBSCAN", + "name": "DBSCAN", + "qname": "sklearn.cluster._dbscan.DBSCAN", + "decorators": [], + "superclasses": ["ClusterMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.cluster._dbscan/DBSCAN/__init__", + "scikit-learn/sklearn.cluster._dbscan/DBSCAN/fit", + "scikit-learn/sklearn.cluster._dbscan/DBSCAN/fit_predict" + ], + "is_public": false, + "reexported_by": [], + "description": "Perform DBSCAN clustering from vector array or distance matrix.\n\nDBSCAN - Density-Based Spatial Clustering of Applications with Noise.\nFinds core samples of high density and expands clusters from them.\nGood for data which contains clusters of similar density.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Perform DBSCAN clustering from vector array or distance matrix.\n\nDBSCAN - Density-Based Spatial Clustering of Applications with Noise.\nFinds core samples of high density and expands clusters from them.\nGood for data which contains clusters of similar density.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\neps : float, default=0.5\n The maximum distance between two samples for one to be considered\n as in the neighborhood of the other. This is not a maximum bound\n on the distances of points within a cluster. This is the most\n important DBSCAN parameter to choose appropriately for your data set\n and distance function.\n\nmin_samples : int, default=5\n The number of samples (or total weight) in a neighborhood for a point\n to be considered as a core point. This includes the point itself.\n\nmetric : string, or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string or callable, it must be one of\n the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n its metric parameter.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square. X may be a :term:`Glossary `, in which\n case only \"nonzero\" elements may be considered neighbors for DBSCAN.\n\n .. versionadded:: 0.17\n metric *precomputed* to accept precomputed sparse matrix.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\n .. versionadded:: 0.19\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n The algorithm to be used by the NearestNeighbors module\n to compute pointwise distances and find nearest neighbors.\n See NearestNeighbors module documentation for details.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or cKDTree. This can affect the speed\n of the construction and query, as well as the memory required\n to store the tree. The optimal value depends\n on the nature of the problem.\n\np : float, default=None\n The power of the Minkowski metric to be used to calculate distance\n between points. If None, then ``p=2`` (equivalent to the Euclidean\n distance).\n\nn_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\ncore_sample_indices_ : ndarray of shape (n_core_samples,)\n Indices of core samples.\n\ncomponents_ : ndarray of shape (n_core_samples, n_features)\n Copy of each core sample found by training.\n\nlabels_ : ndarray of shape (n_samples)\n Cluster labels for each point in the dataset given to fit().\n Noisy samples are given the label -1.\n\nExamples\n--------\n>>> from sklearn.cluster import DBSCAN\n>>> import numpy as np\n>>> X = np.array([[1, 2], [2, 2], [2, 3],\n... [8, 7], [8, 8], [25, 80]])\n>>> clustering = DBSCAN(eps=3, min_samples=2).fit(X)\n>>> clustering.labels_\narray([ 0, 0, 0, 1, 1, -1])\n>>> clustering\nDBSCAN(eps=3, min_samples=2)\n\nSee Also\n--------\nOPTICS : A similar clustering at multiple values of eps. Our implementation\n is optimized for memory usage.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_dbscan.py\n`.\n\nThis implementation bulk-computes all neighborhood queries, which increases\nthe memory complexity to O(n.d) where d is the average number of neighbors,\nwhile original DBSCAN had memory complexity O(n). It may attract a higher\nmemory complexity when querying these nearest neighborhoods, depending\non the ``algorithm``.\n\nOne way to avoid the query complexity is to pre-compute sparse\nneighborhoods in chunks using\n:func:`NearestNeighbors.radius_neighbors_graph\n` with\n``mode='distance'``, then using ``metric='precomputed'`` here.\n\nAnother way to reduce memory and computation time is to remove\n(near-)duplicate points and use ``sample_weight`` instead.\n\n:class:`cluster.OPTICS` provides a similar clustering with lower memory\nusage.\n\nReferences\n----------\nEster, M., H. P. Kriegel, J. Sander, and X. Xu, \"A Density-Based\nAlgorithm for Discovering Clusters in Large Spatial Databases with Noise\".\nIn: Proceedings of the 2nd International Conference on Knowledge Discovery\nand Data Mining, Portland, OR, AAAI Press, pp. 226-231. 1996\n\nSchubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017).\nDBSCAN revisited, revisited: why and how you should (still) use DBSCAN.\nACM Transactions on Database Systems (TODS), 42(3), 19.", + "code": "class DBSCAN(ClusterMixin, BaseEstimator):\n \"\"\"Perform DBSCAN clustering from vector array or distance matrix.\n\n DBSCAN - Density-Based Spatial Clustering of Applications with Noise.\n Finds core samples of high density and expands clusters from them.\n Good for data which contains clusters of similar density.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n eps : float, default=0.5\n The maximum distance between two samples for one to be considered\n as in the neighborhood of the other. This is not a maximum bound\n on the distances of points within a cluster. This is the most\n important DBSCAN parameter to choose appropriately for your data set\n and distance function.\n\n min_samples : int, default=5\n The number of samples (or total weight) in a neighborhood for a point\n to be considered as a core point. This includes the point itself.\n\n metric : string, or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string or callable, it must be one of\n the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n its metric parameter.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square. X may be a :term:`Glossary `, in which\n case only \"nonzero\" elements may be considered neighbors for DBSCAN.\n\n .. versionadded:: 0.17\n metric *precomputed* to accept precomputed sparse matrix.\n\n metric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\n .. versionadded:: 0.19\n\n algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n The algorithm to be used by the NearestNeighbors module\n to compute pointwise distances and find nearest neighbors.\n See NearestNeighbors module documentation for details.\n\n leaf_size : int, default=30\n Leaf size passed to BallTree or cKDTree. This can affect the speed\n of the construction and query, as well as the memory required\n to store the tree. The optimal value depends\n on the nature of the problem.\n\n p : float, default=None\n The power of the Minkowski metric to be used to calculate distance\n between points. If None, then ``p=2`` (equivalent to the Euclidean\n distance).\n\n n_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n Attributes\n ----------\n core_sample_indices_ : ndarray of shape (n_core_samples,)\n Indices of core samples.\n\n components_ : ndarray of shape (n_core_samples, n_features)\n Copy of each core sample found by training.\n\n labels_ : ndarray of shape (n_samples)\n Cluster labels for each point in the dataset given to fit().\n Noisy samples are given the label -1.\n\n Examples\n --------\n >>> from sklearn.cluster import DBSCAN\n >>> import numpy as np\n >>> X = np.array([[1, 2], [2, 2], [2, 3],\n ... [8, 7], [8, 8], [25, 80]])\n >>> clustering = DBSCAN(eps=3, min_samples=2).fit(X)\n >>> clustering.labels_\n array([ 0, 0, 0, 1, 1, -1])\n >>> clustering\n DBSCAN(eps=3, min_samples=2)\n\n See Also\n --------\n OPTICS : A similar clustering at multiple values of eps. Our implementation\n is optimized for memory usage.\n\n Notes\n -----\n For an example, see :ref:`examples/cluster/plot_dbscan.py\n `.\n\n This implementation bulk-computes all neighborhood queries, which increases\n the memory complexity to O(n.d) where d is the average number of neighbors,\n while original DBSCAN had memory complexity O(n). It may attract a higher\n memory complexity when querying these nearest neighborhoods, depending\n on the ``algorithm``.\n\n One way to avoid the query complexity is to pre-compute sparse\n neighborhoods in chunks using\n :func:`NearestNeighbors.radius_neighbors_graph\n ` with\n ``mode='distance'``, then using ``metric='precomputed'`` here.\n\n Another way to reduce memory and computation time is to remove\n (near-)duplicate points and use ``sample_weight`` instead.\n\n :class:`cluster.OPTICS` provides a similar clustering with lower memory\n usage.\n\n References\n ----------\n Ester, M., H. P. Kriegel, J. Sander, and X. Xu, \"A Density-Based\n Algorithm for Discovering Clusters in Large Spatial Databases with Noise\".\n In: Proceedings of the 2nd International Conference on Knowledge Discovery\n and Data Mining, Portland, OR, AAAI Press, pp. 226-231. 1996\n\n Schubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017).\n DBSCAN revisited, revisited: why and how you should (still) use DBSCAN.\n ACM Transactions on Database Systems (TODS), 42(3), 19.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, eps=0.5, *, min_samples=5, metric='euclidean',\n metric_params=None, algorithm='auto', leaf_size=30, p=None,\n n_jobs=None):\n self.eps = eps\n self.min_samples = min_samples\n self.metric = metric\n self.metric_params = metric_params\n self.algorithm = algorithm\n self.leaf_size = leaf_size\n self.p = p\n self.n_jobs = n_jobs\n\n def fit(self, X, y=None, sample_weight=None):\n \"\"\"Perform DBSCAN clustering from features, or distance matrix.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features), or \\\n (n_samples, n_samples)\n Training instances to cluster, or distances between instances if\n ``metric='precomputed'``. If a sparse matrix is provided, it will\n be converted into a sparse ``csr_matrix``.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Weight of each sample, such that a sample with a weight of at least\n ``min_samples`` is by itself a core sample; a sample with a\n negative weight may inhibit its eps-neighbor from being core.\n Note that weights are absolute, and default to 1.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n Returns\n -------\n self\n\n \"\"\"\n X = self._validate_data(X, accept_sparse='csr')\n\n if not self.eps > 0.0:\n raise ValueError(\"eps must be positive.\")\n\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X)\n\n # Calculate neighborhood for all samples. This leaves the original\n # point in, which needs to be considered later (i.e. point i is in the\n # neighborhood of point i. While True, its useless information)\n if self.metric == 'precomputed' and sparse.issparse(X):\n # set the diagonal to explicit values, as a point is its own\n # neighbor\n with warnings.catch_warnings():\n warnings.simplefilter('ignore', sparse.SparseEfficiencyWarning)\n X.setdiag(X.diagonal()) # XXX: modifies X's internals in-place\n\n neighbors_model = NearestNeighbors(\n radius=self.eps, algorithm=self.algorithm,\n leaf_size=self.leaf_size, metric=self.metric,\n metric_params=self.metric_params, p=self.p, n_jobs=self.n_jobs)\n neighbors_model.fit(X)\n # This has worst case O(n^2) memory complexity\n neighborhoods = neighbors_model.radius_neighbors(X,\n return_distance=False)\n\n if sample_weight is None:\n n_neighbors = np.array([len(neighbors)\n for neighbors in neighborhoods])\n else:\n n_neighbors = np.array([np.sum(sample_weight[neighbors])\n for neighbors in neighborhoods])\n\n # Initially, all samples are noise.\n labels = np.full(X.shape[0], -1, dtype=np.intp)\n\n # A list of all core samples found.\n core_samples = np.asarray(n_neighbors >= self.min_samples,\n dtype=np.uint8)\n dbscan_inner(core_samples, neighborhoods, labels)\n\n self.core_sample_indices_ = np.where(core_samples)[0]\n self.labels_ = labels\n\n if len(self.core_sample_indices_):\n # fix for scipy sparse indexing issue\n self.components_ = X[self.core_sample_indices_].copy()\n else:\n # no core samples\n self.components_ = np.empty((0, X.shape[1]))\n return self\n\n def fit_predict(self, X, y=None, sample_weight=None):\n \"\"\"Perform DBSCAN clustering from features or distance matrix,\n and return cluster labels.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features), or \\\n (n_samples, n_samples)\n Training instances to cluster, or distances between instances if\n ``metric='precomputed'``. If a sparse matrix is provided, it will\n be converted into a sparse ``csr_matrix``.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Weight of each sample, such that a sample with a weight of at least\n ``min_samples`` is by itself a core sample; a sample with a\n negative weight may inhibit its eps-neighbor from being core.\n Note that weights are absolute, and default to 1.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n Returns\n -------\n labels : ndarray of shape (n_samples,)\n Cluster labels. Noisy samples are given the label -1.\n \"\"\"\n self.fit(X, sample_weight=sample_weight)\n return self.labels_", + "instance_attributes": [ + { + "name": "eps", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "min_samples", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "metric", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "algorithm", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "leaf_size", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.cluster._feature_agglomeration/AgglomerationTransform", + "name": "AgglomerationTransform", + "qname": "sklearn.cluster._feature_agglomeration.AgglomerationTransform", + "decorators": [], + "superclasses": ["TransformerMixin"], + "methods": [ + "scikit-learn/sklearn.cluster._feature_agglomeration/AgglomerationTransform/transform", + "scikit-learn/sklearn.cluster._feature_agglomeration/AgglomerationTransform/inverse_transform" + ], + "is_public": false, + "reexported_by": [], + "description": "A class for feature agglomeration via the transform interface", + "docstring": "A class for feature agglomeration via the transform interface", + "code": "class AgglomerationTransform(TransformerMixin):\n \"\"\"\n A class for feature agglomeration via the transform interface\n \"\"\"\n\n def transform(self, X):\n \"\"\"\n Transform a new matrix using the built clustering\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or (n_samples,)\n A M by N array of M observations in N dimensions or a length\n M array of M one-dimensional observations.\n\n Returns\n -------\n Y : ndarray of shape (n_samples, n_clusters) or (n_clusters,)\n The pooled values for each feature cluster.\n \"\"\"\n check_is_fitted(self)\n\n X = self._validate_data(X, reset=False)\n if self.pooling_func == np.mean and not issparse(X):\n size = np.bincount(self.labels_)\n n_samples = X.shape[0]\n # a fast way to compute the mean of grouped features\n nX = np.array([np.bincount(self.labels_, X[i, :]) / size\n for i in range(n_samples)])\n else:\n nX = [self.pooling_func(X[:, self.labels_ == l], axis=1)\n for l in np.unique(self.labels_)]\n nX = np.array(nX).T\n return nX\n\n def inverse_transform(self, Xred):\n \"\"\"\n Inverse the transformation.\n Return a vector of size nb_features with the values of Xred assigned\n to each group of features\n\n Parameters\n ----------\n Xred : array-like of shape (n_samples, n_clusters) or (n_clusters,)\n The values to be assigned to each cluster of samples\n\n Returns\n -------\n X : ndarray of shape (n_samples, n_features) or (n_features,)\n A vector of size n_samples with the values of Xred assigned to\n each of the cluster of samples.\n \"\"\"\n check_is_fitted(self)\n\n unil, inverse = np.unique(self.labels_, return_inverse=True)\n return Xred[..., inverse]", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans", + "name": "KMeans", + "qname": "sklearn.cluster._kmeans.KMeans", + "decorators": [], + "superclasses": ["TransformerMixin", "ClusterMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.cluster._kmeans/KMeans/__init__", + "scikit-learn/sklearn.cluster._kmeans/KMeans/_check_params", + "scikit-learn/sklearn.cluster._kmeans/KMeans/_validate_center_shape", + "scikit-learn/sklearn.cluster._kmeans/KMeans/_check_test_data", + "scikit-learn/sklearn.cluster._kmeans/KMeans/_check_mkl_vcomp", + "scikit-learn/sklearn.cluster._kmeans/KMeans/_init_centroids", + "scikit-learn/sklearn.cluster._kmeans/KMeans/fit", + "scikit-learn/sklearn.cluster._kmeans/KMeans/fit_predict", + "scikit-learn/sklearn.cluster._kmeans/KMeans/fit_transform", + "scikit-learn/sklearn.cluster._kmeans/KMeans/transform", + "scikit-learn/sklearn.cluster._kmeans/KMeans/_transform", + "scikit-learn/sklearn.cluster._kmeans/KMeans/predict", + "scikit-learn/sklearn.cluster._kmeans/KMeans/score", + "scikit-learn/sklearn.cluster._kmeans/KMeans/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "K-Means clustering.\n\nRead more in the :ref:`User Guide `.", + "docstring": "K-Means clustering.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nn_clusters : int, default=8\n The number of clusters to form as well as the number of\n centroids to generate.\n\ninit : {'k-means++', 'random'}, callable or array-like of shape (n_clusters, n_features), default='k-means++'\n Method for initialization:\n\n 'k-means++' : selects initial cluster centers for k-mean\n clustering in a smart way to speed up convergence. See section\n Notes in k_init for more details.\n\n 'random': choose `n_clusters` observations (rows) at random from data\n for the initial centroids.\n\n If an array is passed, it should be of shape (n_clusters, n_features)\n and gives the initial centers.\n\n If a callable is passed, it should take arguments X, n_clusters and a\n random state and return an initialization.\n\nn_init : int, default=10\n Number of time the k-means algorithm will be run with different\n centroid seeds. The final results will be the best output of\n n_init consecutive runs in terms of inertia.\n\nmax_iter : int, default=300\n Maximum number of iterations of the k-means algorithm for a\n single run.\n\ntol : float, default=1e-4\n Relative tolerance with regards to Frobenius norm of the difference\n in the cluster centers of two consecutive iterations to declare\n convergence.\n\nprecompute_distances : {'auto', True, False}, default='auto'\n Precompute distances (faster but takes more memory).\n\n 'auto' : do not precompute distances if n_samples * n_clusters > 12\n million. This corresponds to about 100MB overhead per job using\n double precision.\n\n True : always precompute distances.\n\n False : never precompute distances.\n\n .. deprecated:: 0.23\n 'precompute_distances' was deprecated in version 0.22 and will be\n removed in 1.0 (renaming of 0.25). It has no effect.\n\nverbose : int, default=0\n Verbosity mode.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for centroid initialization. Use\n an int to make the randomness deterministic.\n See :term:`Glossary `.\n\ncopy_x : bool, default=True\n When pre-computing distances it is more numerically accurate to center\n the data first. If copy_x is True (default), then the original data is\n not modified. If False, the original data is modified, and put back\n before the function returns, but small numerical differences may be\n introduced by subtracting and then adding the data mean. Note that if\n the original data is not C-contiguous, a copy will be made even if\n copy_x is False. If the original data is sparse, but not in CSR format,\n a copy will be made even if copy_x is False.\n\nn_jobs : int, default=None\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\n ``None`` or ``-1`` means using all processors.\n\n .. deprecated:: 0.23\n ``n_jobs`` was deprecated in version 0.23 and will be removed in\n 1.0 (renaming of 0.25).\n\nalgorithm : {\"auto\", \"full\", \"elkan\"}, default=\"auto\"\n K-means algorithm to use. The classical EM-style algorithm is \"full\".\n The \"elkan\" variation is more efficient on data with well-defined\n clusters, by using the triangle inequality. However it's more memory\n intensive due to the allocation of an extra array of shape\n (n_samples, n_clusters).\n\n For now \"auto\" (kept for backward compatibiliy) chooses \"elkan\" but it\n might change in the future for a better heuristic.\n\n .. versionchanged:: 0.18\n Added Elkan algorithm\n\nAttributes\n----------\ncluster_centers_ : ndarray of shape (n_clusters, n_features)\n Coordinates of cluster centers. If the algorithm stops before fully\n converging (see ``tol`` and ``max_iter``), these will not be\n consistent with ``labels_``.\n\nlabels_ : ndarray of shape (n_samples,)\n Labels of each point\n\ninertia_ : float\n Sum of squared distances of samples to their closest cluster center.\n\nn_iter_ : int\n Number of iterations run.\n\nSee Also\n--------\nMiniBatchKMeans : Alternative online implementation that does incremental\n updates of the centers positions using mini-batches.\n For large scale learning (say n_samples > 10k) MiniBatchKMeans is\n probably much faster than the default batch implementation.\n\nNotes\n-----\nThe k-means problem is solved using either Lloyd's or Elkan's algorithm.\n\nThe average complexity is given by O(k n T), where n is the number of\nsamples and T is the number of iteration.\n\nThe worst case complexity is given by O(n^(k+2/p)) with\nn = n_samples, p = n_features. (D. Arthur and S. Vassilvitskii,\n'How slow is the k-means method?' SoCG2006)\n\nIn practice, the k-means algorithm is very fast (one of the fastest\nclustering algorithms available), but it falls in local minima. That's why\nit can be useful to restart it several times.\n\nIf the algorithm stops before fully converging (because of ``tol`` or\n``max_iter``), ``labels_`` and ``cluster_centers_`` will not be consistent,\ni.e. the ``cluster_centers_`` will not be the means of the points in each\ncluster. Also, the estimator will reassign ``labels_`` after the last\niteration to make ``labels_`` consistent with ``predict`` on the training\nset.\n\nExamples\n--------\n\n>>> from sklearn.cluster import KMeans\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n... [10, 2], [10, 4], [10, 0]])\n>>> kmeans = KMeans(n_clusters=2, random_state=0).fit(X)\n>>> kmeans.labels_\narray([1, 1, 1, 0, 0, 0], dtype=int32)\n>>> kmeans.predict([[0, 0], [12, 3]])\narray([1, 0], dtype=int32)\n>>> kmeans.cluster_centers_\narray([[10., 2.],\n [ 1., 2.]])", + "code": "class KMeans(TransformerMixin, ClusterMixin, BaseEstimator):\n \"\"\"K-Means clustering.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n\n n_clusters : int, default=8\n The number of clusters to form as well as the number of\n centroids to generate.\n\n init : {'k-means++', 'random'}, callable or array-like of shape \\\n (n_clusters, n_features), default='k-means++'\n Method for initialization:\n\n 'k-means++' : selects initial cluster centers for k-mean\n clustering in a smart way to speed up convergence. See section\n Notes in k_init for more details.\n\n 'random': choose `n_clusters` observations (rows) at random from data\n for the initial centroids.\n\n If an array is passed, it should be of shape (n_clusters, n_features)\n and gives the initial centers.\n\n If a callable is passed, it should take arguments X, n_clusters and a\n random state and return an initialization.\n\n n_init : int, default=10\n Number of time the k-means algorithm will be run with different\n centroid seeds. The final results will be the best output of\n n_init consecutive runs in terms of inertia.\n\n max_iter : int, default=300\n Maximum number of iterations of the k-means algorithm for a\n single run.\n\n tol : float, default=1e-4\n Relative tolerance with regards to Frobenius norm of the difference\n in the cluster centers of two consecutive iterations to declare\n convergence.\n\n precompute_distances : {'auto', True, False}, default='auto'\n Precompute distances (faster but takes more memory).\n\n 'auto' : do not precompute distances if n_samples * n_clusters > 12\n million. This corresponds to about 100MB overhead per job using\n double precision.\n\n True : always precompute distances.\n\n False : never precompute distances.\n\n .. deprecated:: 0.23\n 'precompute_distances' was deprecated in version 0.22 and will be\n removed in 1.0 (renaming of 0.25). It has no effect.\n\n verbose : int, default=0\n Verbosity mode.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for centroid initialization. Use\n an int to make the randomness deterministic.\n See :term:`Glossary `.\n\n copy_x : bool, default=True\n When pre-computing distances it is more numerically accurate to center\n the data first. If copy_x is True (default), then the original data is\n not modified. If False, the original data is modified, and put back\n before the function returns, but small numerical differences may be\n introduced by subtracting and then adding the data mean. Note that if\n the original data is not C-contiguous, a copy will be made even if\n copy_x is False. If the original data is sparse, but not in CSR format,\n a copy will be made even if copy_x is False.\n\n n_jobs : int, default=None\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\n ``None`` or ``-1`` means using all processors.\n\n .. deprecated:: 0.23\n ``n_jobs`` was deprecated in version 0.23 and will be removed in\n 1.0 (renaming of 0.25).\n\n algorithm : {\"auto\", \"full\", \"elkan\"}, default=\"auto\"\n K-means algorithm to use. The classical EM-style algorithm is \"full\".\n The \"elkan\" variation is more efficient on data with well-defined\n clusters, by using the triangle inequality. However it's more memory\n intensive due to the allocation of an extra array of shape\n (n_samples, n_clusters).\n\n For now \"auto\" (kept for backward compatibiliy) chooses \"elkan\" but it\n might change in the future for a better heuristic.\n\n .. versionchanged:: 0.18\n Added Elkan algorithm\n\n Attributes\n ----------\n cluster_centers_ : ndarray of shape (n_clusters, n_features)\n Coordinates of cluster centers. If the algorithm stops before fully\n converging (see ``tol`` and ``max_iter``), these will not be\n consistent with ``labels_``.\n\n labels_ : ndarray of shape (n_samples,)\n Labels of each point\n\n inertia_ : float\n Sum of squared distances of samples to their closest cluster center.\n\n n_iter_ : int\n Number of iterations run.\n\n See Also\n --------\n MiniBatchKMeans : Alternative online implementation that does incremental\n updates of the centers positions using mini-batches.\n For large scale learning (say n_samples > 10k) MiniBatchKMeans is\n probably much faster than the default batch implementation.\n\n Notes\n -----\n The k-means problem is solved using either Lloyd's or Elkan's algorithm.\n\n The average complexity is given by O(k n T), where n is the number of\n samples and T is the number of iteration.\n\n The worst case complexity is given by O(n^(k+2/p)) with\n n = n_samples, p = n_features. (D. Arthur and S. Vassilvitskii,\n 'How slow is the k-means method?' SoCG2006)\n\n In practice, the k-means algorithm is very fast (one of the fastest\n clustering algorithms available), but it falls in local minima. That's why\n it can be useful to restart it several times.\n\n If the algorithm stops before fully converging (because of ``tol`` or\n ``max_iter``), ``labels_`` and ``cluster_centers_`` will not be consistent,\n i.e. the ``cluster_centers_`` will not be the means of the points in each\n cluster. Also, the estimator will reassign ``labels_`` after the last\n iteration to make ``labels_`` consistent with ``predict`` on the training\n set.\n\n Examples\n --------\n\n >>> from sklearn.cluster import KMeans\n >>> import numpy as np\n >>> X = np.array([[1, 2], [1, 4], [1, 0],\n ... [10, 2], [10, 4], [10, 0]])\n >>> kmeans = KMeans(n_clusters=2, random_state=0).fit(X)\n >>> kmeans.labels_\n array([1, 1, 1, 0, 0, 0], dtype=int32)\n >>> kmeans.predict([[0, 0], [12, 3]])\n array([1, 0], dtype=int32)\n >>> kmeans.cluster_centers_\n array([[10., 2.],\n [ 1., 2.]])\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_clusters=8, *, init='k-means++', n_init=10,\n max_iter=300, tol=1e-4, precompute_distances='deprecated',\n verbose=0, random_state=None, copy_x=True,\n n_jobs='deprecated', algorithm='auto'):\n\n self.n_clusters = n_clusters\n self.init = init\n self.max_iter = max_iter\n self.tol = tol\n self.precompute_distances = precompute_distances\n self.n_init = n_init\n self.verbose = verbose\n self.random_state = random_state\n self.copy_x = copy_x\n self.n_jobs = n_jobs\n self.algorithm = algorithm\n\n def _check_params(self, X):\n # precompute_distances\n if self.precompute_distances != 'deprecated':\n warnings.warn(\"'precompute_distances' was deprecated in version \"\n \"0.23 and will be removed in 1.0 (renaming of 0.25)\"\n \". It has no effect\", FutureWarning)\n\n # n_jobs\n if self.n_jobs != 'deprecated':\n warnings.warn(\"'n_jobs' was deprecated in version 0.23 and will be\"\n \" removed in 1.0 (renaming of 0.25).\", FutureWarning)\n self._n_threads = self.n_jobs\n else:\n self._n_threads = None\n self._n_threads = _openmp_effective_n_threads(self._n_threads)\n\n # n_init\n if self.n_init <= 0:\n raise ValueError(\n f\"n_init should be > 0, got {self.n_init} instead.\")\n self._n_init = self.n_init\n\n # max_iter\n if self.max_iter <= 0:\n raise ValueError(\n f\"max_iter should be > 0, got {self.max_iter} instead.\")\n\n # n_clusters\n if X.shape[0] < self.n_clusters:\n raise ValueError(f\"n_samples={X.shape[0]} should be >= \"\n f\"n_clusters={self.n_clusters}.\")\n\n # tol\n self._tol = _tolerance(X, self.tol)\n\n # algorithm\n if self.algorithm not in (\"auto\", \"full\", \"elkan\"):\n raise ValueError(f\"Algorithm must be 'auto', 'full' or 'elkan', \"\n f\"got {self.algorithm} instead.\")\n\n self._algorithm = self.algorithm\n if self._algorithm == \"auto\":\n self._algorithm = \"full\" if self.n_clusters == 1 else \"elkan\"\n if self._algorithm == \"elkan\" and self.n_clusters == 1:\n warnings.warn(\"algorithm='elkan' doesn't make sense for a single \"\n \"cluster. Using 'full' instead.\", RuntimeWarning)\n self._algorithm = \"full\"\n\n # init\n if not (hasattr(self.init, '__array__') or callable(self.init)\n or (isinstance(self.init, str)\n and self.init in [\"k-means++\", \"random\"])):\n raise ValueError(\n f\"init should be either 'k-means++', 'random', a ndarray or a \"\n f\"callable, got '{self.init}' instead.\")\n\n if hasattr(self.init, '__array__') and self._n_init != 1:\n warnings.warn(\n f\"Explicit initial center position passed: performing only\"\n f\" one init in {self.__class__.__name__} instead of \"\n f\"n_init={self._n_init}.\", RuntimeWarning, stacklevel=2)\n self._n_init = 1\n\n def _validate_center_shape(self, X, centers):\n \"\"\"Check if centers is compatible with X and n_clusters.\"\"\"\n if centers.shape[0] != self.n_clusters:\n raise ValueError(\n f\"The shape of the initial centers {centers.shape} does not \"\n f\"match the number of clusters {self.n_clusters}.\")\n if centers.shape[1] != X.shape[1]:\n raise ValueError(\n f\"The shape of the initial centers {centers.shape} does not \"\n f\"match the number of features of the data {X.shape[1]}.\")\n\n def _check_test_data(self, X):\n X = self._validate_data(X, accept_sparse='csr', reset=False,\n dtype=[np.float64, np.float32],\n order='C', accept_large_sparse=False)\n return X\n\n def _check_mkl_vcomp(self, X, n_samples):\n \"\"\"Warns when vcomp and mkl are both present\"\"\"\n # The BLAS call inside a prange in lloyd_iter_chunked_dense is known to\n # cause a small memory leak when there are less chunks than the number\n # of available threads. It only happens when the OpenMP library is\n # vcomp (microsoft OpenMP) and the BLAS library is MKL. see #18653\n if sp.issparse(X):\n return\n\n active_threads = int(np.ceil(n_samples / CHUNK_SIZE))\n if active_threads < self._n_threads:\n modules = threadpool_info()\n has_vcomp = \"vcomp\" in [module[\"prefix\"] for module in modules]\n has_mkl = (\"mkl\", \"intel\") in [\n (module[\"internal_api\"], module.get(\"threading_layer\", None))\n for module in modules]\n if has_vcomp and has_mkl:\n if not hasattr(self, \"batch_size\"): # KMeans\n warnings.warn(\n f\"KMeans is known to have a memory leak on Windows \"\n f\"with MKL, when there are less chunks than available \"\n f\"threads. You can avoid it by setting the environment\"\n f\" variable OMP_NUM_THREADS={active_threads}.\")\n else: # MiniBatchKMeans\n warnings.warn(\n f\"MiniBatchKMeans is known to have a memory leak on \"\n f\"Windows with MKL, when there are less chunks than \"\n f\"available threads. You can prevent it by setting \"\n f\"batch_size >= {self._n_threads * CHUNK_SIZE} or by \"\n f\"setting the environment variable \"\n f\"OMP_NUM_THREADS={active_threads}\")\n\n def _init_centroids(self, X, x_squared_norms, init, random_state,\n init_size=None):\n \"\"\"Compute the initial centroids.\n\n Parameters\n ----------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\n x_squared_norms : ndarray of shape (n_samples,)\n Squared euclidean norm of each data point. Pass it if you have it\n at hands already to avoid it being recomputed here.\n\n init : {'k-means++', 'random'}, callable or ndarray of shape \\\n (n_clusters, n_features)\n Method for initialization.\n\n random_state : RandomState instance\n Determines random number generation for centroid initialization.\n See :term:`Glossary `.\n\n init_size : int, default=None\n Number of samples to randomly sample for speeding up the\n initialization (sometimes at the expense of accuracy).\n\n Returns\n -------\n centers : ndarray of shape (n_clusters, n_features)\n \"\"\"\n n_samples = X.shape[0]\n n_clusters = self.n_clusters\n\n if init_size is not None and init_size < n_samples:\n init_indices = random_state.randint(0, n_samples, init_size)\n X = X[init_indices]\n x_squared_norms = x_squared_norms[init_indices]\n n_samples = X.shape[0]\n\n if isinstance(init, str) and init == 'k-means++':\n centers, _ = _kmeans_plusplus(X, n_clusters,\n random_state=random_state,\n x_squared_norms=x_squared_norms)\n elif isinstance(init, str) and init == 'random':\n seeds = random_state.permutation(n_samples)[:n_clusters]\n centers = X[seeds]\n elif hasattr(init, '__array__'):\n centers = init\n elif callable(init):\n centers = init(X, n_clusters, random_state=random_state)\n centers = check_array(\n centers, dtype=X.dtype, copy=False, order='C')\n self._validate_center_shape(X, centers)\n\n if sp.issparse(centers):\n centers = centers.toarray()\n\n return centers\n\n def fit(self, X, y=None, sample_weight=None):\n \"\"\"Compute k-means clustering.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training instances to cluster. It must be noted that the data\n will be converted to C ordering, which will cause a memory\n copy if the given data is not C-contiguous.\n If a sparse matrix is passed, a copy will be made if it's not in\n CSR format.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n sample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\n .. versionadded:: 0.20\n\n Returns\n -------\n self\n Fitted estimator.\n \"\"\"\n X = self._validate_data(X, accept_sparse='csr',\n dtype=[np.float64, np.float32],\n order='C', copy=self.copy_x,\n accept_large_sparse=False)\n\n self._check_params(X)\n random_state = check_random_state(self.random_state)\n sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n # Validate init array\n init = self.init\n if hasattr(init, '__array__'):\n init = check_array(init, dtype=X.dtype, copy=True, order='C')\n self._validate_center_shape(X, init)\n\n # subtract of mean of x for more accurate distance computations\n if not sp.issparse(X):\n X_mean = X.mean(axis=0)\n # The copy was already done above\n X -= X_mean\n\n if hasattr(init, '__array__'):\n init -= X_mean\n\n # precompute squared norms of data points\n x_squared_norms = row_norms(X, squared=True)\n\n if self._algorithm == \"full\":\n kmeans_single = _kmeans_single_lloyd\n self._check_mkl_vcomp(X, X.shape[0])\n else:\n kmeans_single = _kmeans_single_elkan\n\n best_inertia = None\n\n for i in range(self._n_init):\n # Initialize centers\n centers_init = self._init_centroids(\n X, x_squared_norms=x_squared_norms, init=init,\n random_state=random_state)\n if self.verbose:\n print(\"Initialization complete\")\n\n # run a k-means once\n labels, inertia, centers, n_iter_ = kmeans_single(\n X, sample_weight, centers_init, max_iter=self.max_iter,\n verbose=self.verbose, tol=self._tol,\n x_squared_norms=x_squared_norms, n_threads=self._n_threads)\n\n # determine if these results are the best so far\n if best_inertia is None or inertia < best_inertia:\n best_labels = labels\n best_centers = centers\n best_inertia = inertia\n best_n_iter = n_iter_\n\n if not sp.issparse(X):\n if not self.copy_x:\n X += X_mean\n best_centers += X_mean\n\n distinct_clusters = len(set(best_labels))\n if distinct_clusters < self.n_clusters:\n warnings.warn(\n \"Number of distinct clusters ({}) found smaller than \"\n \"n_clusters ({}). Possibly due to duplicate points \"\n \"in X.\".format(distinct_clusters, self.n_clusters),\n ConvergenceWarning, stacklevel=2)\n\n self.cluster_centers_ = best_centers\n self.labels_ = best_labels\n self.inertia_ = best_inertia\n self.n_iter_ = best_n_iter\n return self\n\n def fit_predict(self, X, y=None, sample_weight=None):\n \"\"\"Compute cluster centers and predict cluster index for each sample.\n\n Convenience method; equivalent to calling fit(X) followed by\n predict(X).\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to transform.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n sample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\n Returns\n -------\n labels : ndarray of shape (n_samples,)\n Index of the cluster each sample belongs to.\n \"\"\"\n return self.fit(X, sample_weight=sample_weight).labels_\n\n def fit_transform(self, X, y=None, sample_weight=None):\n \"\"\"Compute clustering and transform X to cluster-distance space.\n\n Equivalent to fit(X).transform(X), but more efficiently implemented.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to transform.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n sample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_clusters)\n X transformed in the new space.\n \"\"\"\n # Currently, this just skips a copy of the data if it is not in\n # np.array or CSR format already.\n # XXX This skips _check_test_data, which may change the dtype;\n # we should refactor the input validation.\n return self.fit(X, sample_weight=sample_weight)._transform(X)\n\n def transform(self, X):\n \"\"\"Transform X to a cluster-distance space.\n\n In the new space, each dimension is the distance to the cluster\n centers. Note that even if X is sparse, the array returned by\n `transform` will typically be dense.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to transform.\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_clusters)\n X transformed in the new space.\n \"\"\"\n check_is_fitted(self)\n\n X = self._check_test_data(X)\n return self._transform(X)\n\n def _transform(self, X):\n \"\"\"Guts of transform method; no input validation.\"\"\"\n return euclidean_distances(X, self.cluster_centers_)\n\n def predict(self, X, sample_weight=None):\n \"\"\"Predict the closest cluster each sample in X belongs to.\n\n In the vector quantization literature, `cluster_centers_` is called\n the code book and each value returned by `predict` is the index of\n the closest code in the code book.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to predict.\n\n sample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\n Returns\n -------\n labels : ndarray of shape (n_samples,)\n Index of the cluster each sample belongs to.\n \"\"\"\n check_is_fitted(self)\n\n X = self._check_test_data(X)\n x_squared_norms = row_norms(X, squared=True)\n sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n return _labels_inertia(X, sample_weight, x_squared_norms,\n self.cluster_centers_, self._n_threads)[0]\n\n def score(self, X, y=None, sample_weight=None):\n \"\"\"Opposite of the value of X on the K-means objective.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n sample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\n Returns\n -------\n score : float\n Opposite of the value of X on the K-means objective.\n \"\"\"\n check_is_fitted(self)\n\n X = self._check_test_data(X)\n x_squared_norms = row_norms(X, squared=True)\n sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n return -_labels_inertia(X, sample_weight, x_squared_norms,\n self.cluster_centers_)[1]\n\n def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n },\n }", + "instance_attributes": [ + { + "name": "n_clusters", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "init", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "precompute_distances", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "n_init", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "copy_x", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "n_jobs", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "algorithm", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "_n_threads", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "_n_init", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "_tol", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "_algorithm", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans", + "name": "MiniBatchKMeans", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans", + "decorators": [], + "superclasses": ["KMeans"], + "methods": [ + "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/__init__", + "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/counts_@getter", + "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/init_size_@getter", + "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/random_state_@getter", + "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/_check_params", + "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/fit", + "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/_labels_inertia_minibatch", + "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/partial_fit", + "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/predict", + "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Mini-Batch K-Means clustering.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Mini-Batch K-Means clustering.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nn_clusters : int, default=8\n The number of clusters to form as well as the number of\n centroids to generate.\n\ninit : {'k-means++', 'random'}, callable or array-like of shape (n_clusters, n_features), default='k-means++'\n Method for initialization:\n\n 'k-means++' : selects initial cluster centers for k-mean\n clustering in a smart way to speed up convergence. See section\n Notes in k_init for more details.\n\n 'random': choose `n_clusters` observations (rows) at random from data\n for the initial centroids.\n\n If an array is passed, it should be of shape (n_clusters, n_features)\n and gives the initial centers.\n\n If a callable is passed, it should take arguments X, n_clusters and a\n random state and return an initialization.\n\nmax_iter : int, default=100\n Maximum number of iterations over the complete dataset before\n stopping independently of any early stopping criterion heuristics.\n\nbatch_size : int, default=100\n Size of the mini batches.\n\nverbose : int, default=0\n Verbosity mode.\n\ncompute_labels : bool, default=True\n Compute label assignment and inertia for the complete dataset\n once the minibatch optimization has converged in fit.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for centroid initialization and\n random reassignment. Use an int to make the randomness deterministic.\n See :term:`Glossary `.\n\ntol : float, default=0.0\n Control early stopping based on the relative center changes as\n measured by a smoothed, variance-normalized of the mean center\n squared position changes. This early stopping heuristics is\n closer to the one used for the batch variant of the algorithms\n but induces a slight computational and memory overhead over the\n inertia heuristic.\n\n To disable convergence detection based on normalized center\n change, set tol to 0.0 (default).\n\nmax_no_improvement : int, default=10\n Control early stopping based on the consecutive number of mini\n batches that does not yield an improvement on the smoothed inertia.\n\n To disable convergence detection based on inertia, set\n max_no_improvement to None.\n\ninit_size : int, default=None\n Number of samples to randomly sample for speeding up the\n initialization (sometimes at the expense of accuracy): the\n only algorithm is initialized by running a batch KMeans on a\n random subset of the data. This needs to be larger than n_clusters.\n\n If `None`, `init_size= 3 * batch_size`.\n\nn_init : int, default=3\n Number of random initializations that are tried.\n In contrast to KMeans, the algorithm is only run once, using the\n best of the ``n_init`` initializations as measured by inertia.\n\nreassignment_ratio : float, default=0.01\n Control the fraction of the maximum number of counts for a\n center to be reassigned. A higher value means that low count\n centers are more easily reassigned, which means that the\n model will take longer to converge, but should converge in a\n better clustering.\n\nAttributes\n----------\n\ncluster_centers_ : ndarray of shape (n_clusters, n_features)\n Coordinates of cluster centers.\n\nlabels_ : int\n Labels of each point (if compute_labels is set to True).\n\ninertia_ : float\n The value of the inertia criterion associated with the chosen\n partition (if compute_labels is set to True). The inertia is\n defined as the sum of square distances of samples to their nearest\n neighbor.\n\nn_iter_ : int\n Number of batches processed.\n\ncounts_ : ndarray of shape (n_clusters,)\n Weigth sum of each cluster.\n\n .. deprecated:: 0.24\n This attribute is deprecated in 0.24 and will be removed in\n 1.1 (renaming of 0.26).\n\ninit_size_ : int\n The effective number of samples used for the initialization.\n\n .. deprecated:: 0.24\n This attribute is deprecated in 0.24 and will be removed in\n 1.1 (renaming of 0.26).\n\nSee Also\n--------\nKMeans : The classic implementation of the clustering method based on the\n Lloyd's algorithm. It consumes the whole set of input data at each\n iteration.\n\nNotes\n-----\nSee https://www.eecs.tufts.edu/~dsculley/papers/fastkmeans.pdf\n\nExamples\n--------\n>>> from sklearn.cluster import MiniBatchKMeans\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n... [4, 2], [4, 0], [4, 4],\n... [4, 5], [0, 1], [2, 2],\n... [3, 2], [5, 5], [1, -1]])\n>>> # manually fit on batches\n>>> kmeans = MiniBatchKMeans(n_clusters=2,\n... random_state=0,\n... batch_size=6)\n>>> kmeans = kmeans.partial_fit(X[0:6,:])\n>>> kmeans = kmeans.partial_fit(X[6:12,:])\n>>> kmeans.cluster_centers_\narray([[2. , 1. ],\n [3.5, 4.5]])\n>>> kmeans.predict([[0, 0], [4, 4]])\narray([0, 1], dtype=int32)\n>>> # fit on the whole data\n>>> kmeans = MiniBatchKMeans(n_clusters=2,\n... random_state=0,\n... batch_size=6,\n... max_iter=10).fit(X)\n>>> kmeans.cluster_centers_\narray([[3.95918367, 2.40816327],\n [1.12195122, 1.3902439 ]])\n>>> kmeans.predict([[0, 0], [4, 4]])\narray([1, 0], dtype=int32)", + "code": "class MiniBatchKMeans(KMeans):\n \"\"\"\n Mini-Batch K-Means clustering.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n\n n_clusters : int, default=8\n The number of clusters to form as well as the number of\n centroids to generate.\n\n init : {'k-means++', 'random'}, callable or array-like of shape \\\n (n_clusters, n_features), default='k-means++'\n Method for initialization:\n\n 'k-means++' : selects initial cluster centers for k-mean\n clustering in a smart way to speed up convergence. See section\n Notes in k_init for more details.\n\n 'random': choose `n_clusters` observations (rows) at random from data\n for the initial centroids.\n\n If an array is passed, it should be of shape (n_clusters, n_features)\n and gives the initial centers.\n\n If a callable is passed, it should take arguments X, n_clusters and a\n random state and return an initialization.\n\n max_iter : int, default=100\n Maximum number of iterations over the complete dataset before\n stopping independently of any early stopping criterion heuristics.\n\n batch_size : int, default=100\n Size of the mini batches.\n\n verbose : int, default=0\n Verbosity mode.\n\n compute_labels : bool, default=True\n Compute label assignment and inertia for the complete dataset\n once the minibatch optimization has converged in fit.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for centroid initialization and\n random reassignment. Use an int to make the randomness deterministic.\n See :term:`Glossary `.\n\n tol : float, default=0.0\n Control early stopping based on the relative center changes as\n measured by a smoothed, variance-normalized of the mean center\n squared position changes. This early stopping heuristics is\n closer to the one used for the batch variant of the algorithms\n but induces a slight computational and memory overhead over the\n inertia heuristic.\n\n To disable convergence detection based on normalized center\n change, set tol to 0.0 (default).\n\n max_no_improvement : int, default=10\n Control early stopping based on the consecutive number of mini\n batches that does not yield an improvement on the smoothed inertia.\n\n To disable convergence detection based on inertia, set\n max_no_improvement to None.\n\n init_size : int, default=None\n Number of samples to randomly sample for speeding up the\n initialization (sometimes at the expense of accuracy): the\n only algorithm is initialized by running a batch KMeans on a\n random subset of the data. This needs to be larger than n_clusters.\n\n If `None`, `init_size= 3 * batch_size`.\n\n n_init : int, default=3\n Number of random initializations that are tried.\n In contrast to KMeans, the algorithm is only run once, using the\n best of the ``n_init`` initializations as measured by inertia.\n\n reassignment_ratio : float, default=0.01\n Control the fraction of the maximum number of counts for a\n center to be reassigned. A higher value means that low count\n centers are more easily reassigned, which means that the\n model will take longer to converge, but should converge in a\n better clustering.\n\n Attributes\n ----------\n\n cluster_centers_ : ndarray of shape (n_clusters, n_features)\n Coordinates of cluster centers.\n\n labels_ : int\n Labels of each point (if compute_labels is set to True).\n\n inertia_ : float\n The value of the inertia criterion associated with the chosen\n partition (if compute_labels is set to True). The inertia is\n defined as the sum of square distances of samples to their nearest\n neighbor.\n\n n_iter_ : int\n Number of batches processed.\n\n counts_ : ndarray of shape (n_clusters,)\n Weigth sum of each cluster.\n\n .. deprecated:: 0.24\n This attribute is deprecated in 0.24 and will be removed in\n 1.1 (renaming of 0.26).\n\n init_size_ : int\n The effective number of samples used for the initialization.\n\n .. deprecated:: 0.24\n This attribute is deprecated in 0.24 and will be removed in\n 1.1 (renaming of 0.26).\n\n See Also\n --------\n KMeans : The classic implementation of the clustering method based on the\n Lloyd's algorithm. It consumes the whole set of input data at each\n iteration.\n\n Notes\n -----\n See https://www.eecs.tufts.edu/~dsculley/papers/fastkmeans.pdf\n\n Examples\n --------\n >>> from sklearn.cluster import MiniBatchKMeans\n >>> import numpy as np\n >>> X = np.array([[1, 2], [1, 4], [1, 0],\n ... [4, 2], [4, 0], [4, 4],\n ... [4, 5], [0, 1], [2, 2],\n ... [3, 2], [5, 5], [1, -1]])\n >>> # manually fit on batches\n >>> kmeans = MiniBatchKMeans(n_clusters=2,\n ... random_state=0,\n ... batch_size=6)\n >>> kmeans = kmeans.partial_fit(X[0:6,:])\n >>> kmeans = kmeans.partial_fit(X[6:12,:])\n >>> kmeans.cluster_centers_\n array([[2. , 1. ],\n [3.5, 4.5]])\n >>> kmeans.predict([[0, 0], [4, 4]])\n array([0, 1], dtype=int32)\n >>> # fit on the whole data\n >>> kmeans = MiniBatchKMeans(n_clusters=2,\n ... random_state=0,\n ... batch_size=6,\n ... max_iter=10).fit(X)\n >>> kmeans.cluster_centers_\n array([[3.95918367, 2.40816327],\n [1.12195122, 1.3902439 ]])\n >>> kmeans.predict([[0, 0], [4, 4]])\n array([1, 0], dtype=int32)\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_clusters=8, *, init='k-means++', max_iter=100,\n batch_size=100, verbose=0, compute_labels=True,\n random_state=None, tol=0.0, max_no_improvement=10,\n init_size=None, n_init=3, reassignment_ratio=0.01):\n\n super().__init__(\n n_clusters=n_clusters, init=init, max_iter=max_iter,\n verbose=verbose, random_state=random_state, tol=tol, n_init=n_init)\n\n self.max_no_improvement = max_no_improvement\n self.batch_size = batch_size\n self.compute_labels = compute_labels\n self.init_size = init_size\n self.reassignment_ratio = reassignment_ratio\n\n @deprecated(\"The attribute 'counts_' is deprecated in 0.24\" # type: ignore\n \" and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def counts_(self):\n return self._counts\n\n @deprecated(\"The attribute 'init_size_' is deprecated in \" # type: ignore\n \"0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def init_size_(self):\n return self._init_size\n\n @deprecated(\"The attribute 'random_state_' is deprecated \" # type: ignore\n \"in 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def random_state_(self):\n return getattr(self, \"_random_state\", None)\n\n def _check_params(self, X):\n super()._check_params(X)\n\n # max_no_improvement\n if self.max_no_improvement is not None and self.max_no_improvement < 0:\n raise ValueError(\n f\"max_no_improvement should be >= 0, got \"\n f\"{self.max_no_improvement} instead.\")\n\n # batch_size\n if self.batch_size <= 0:\n raise ValueError(\n f\"batch_size should be > 0, got {self.batch_size} instead.\")\n\n # init_size\n if self.init_size is not None and self.init_size <= 0:\n raise ValueError(\n f\"init_size should be > 0, got {self.init_size} instead.\")\n self._init_size = self.init_size\n if self._init_size is None:\n self._init_size = 3 * self.batch_size\n if self._init_size < self.n_clusters:\n self._init_size = 3 * self.n_clusters\n elif self._init_size < self.n_clusters:\n warnings.warn(\n f\"init_size={self._init_size} should be larger than \"\n f\"n_clusters={self.n_clusters}. Setting it to \"\n f\"min(3*n_clusters, n_samples)\",\n RuntimeWarning, stacklevel=2)\n self._init_size = 3 * self.n_clusters\n self._init_size = min(self._init_size, X.shape[0])\n\n # reassignment_ratio\n if self.reassignment_ratio < 0:\n raise ValueError(\n f\"reassignment_ratio should be >= 0, got \"\n f\"{self.reassignment_ratio} instead.\")\n\n def fit(self, X, y=None, sample_weight=None):\n \"\"\"Compute the centroids on X by chunking it into mini-batches.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training instances to cluster. It must be noted that the data\n will be converted to C ordering, which will cause a memory copy\n if the given data is not C-contiguous.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n sample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight (default: None).\n\n .. versionadded:: 0.20\n\n Returns\n -------\n self\n \"\"\"\n X = self._validate_data(X, accept_sparse='csr',\n dtype=[np.float64, np.float32],\n order='C', accept_large_sparse=False)\n\n self._check_params(X)\n random_state = check_random_state(self.random_state)\n sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n # Validate init array\n init = self.init\n if hasattr(init, '__array__'):\n init = check_array(init, dtype=X.dtype, copy=True, order='C')\n self._validate_center_shape(X, init)\n\n n_samples, n_features = X.shape\n x_squared_norms = row_norms(X, squared=True)\n\n if self.tol > 0.0:\n tol = _tolerance(X, self.tol)\n\n # using tol-based early stopping needs the allocation of a\n # dedicated before which can be expensive for high dim data:\n # hence we allocate it outside of the main loop\n old_center_buffer = np.zeros(n_features, dtype=X.dtype)\n else:\n tol = 0.0\n # no need for the center buffer if tol-based early stopping is\n # disabled\n old_center_buffer = np.zeros(0, dtype=X.dtype)\n\n distances = np.zeros(self.batch_size, dtype=X.dtype)\n n_batches = int(np.ceil(float(n_samples) / self.batch_size))\n n_iter = int(self.max_iter * n_batches)\n\n self._check_mkl_vcomp(X, self.batch_size)\n\n validation_indices = random_state.randint(0, n_samples,\n self._init_size)\n X_valid = X[validation_indices]\n sample_weight_valid = sample_weight[validation_indices]\n x_squared_norms_valid = x_squared_norms[validation_indices]\n\n # perform several inits with random sub-sets\n best_inertia = None\n for init_idx in range(self._n_init):\n if self.verbose:\n print(\"Init %d/%d with method: %s\"\n % (init_idx + 1, self._n_init, init))\n weight_sums = np.zeros(self.n_clusters, dtype=sample_weight.dtype)\n\n # TODO: once the `k_means` function works with sparse input we\n # should refactor the following init to use it instead.\n\n # Initialize the centers using only a fraction of the data as we\n # expect n_samples to be very large when using MiniBatchKMeans\n cluster_centers = self._init_centroids(\n X, x_squared_norms=x_squared_norms,\n init=init,\n random_state=random_state,\n init_size=self._init_size)\n\n # Compute the label assignment on the init dataset\n _mini_batch_step(\n X_valid, sample_weight_valid,\n x_squared_norms[validation_indices], cluster_centers,\n weight_sums, old_center_buffer, False, distances=None,\n verbose=self.verbose)\n\n # Keep only the best cluster centers across independent inits on\n # the common validation set\n _, inertia = _labels_inertia(X_valid, sample_weight_valid,\n x_squared_norms_valid,\n cluster_centers)\n if self.verbose:\n print(\"Inertia for init %d/%d: %f\"\n % (init_idx + 1, self._n_init, inertia))\n if best_inertia is None or inertia < best_inertia:\n self.cluster_centers_ = cluster_centers\n self._counts = weight_sums\n best_inertia = inertia\n\n # Empty context to be used inplace by the convergence check routine\n convergence_context = {}\n\n # Perform the iterative optimization until the final convergence\n # criterion\n for iteration_idx in range(n_iter):\n # Sample a minibatch from the full dataset\n minibatch_indices = random_state.randint(\n 0, n_samples, self.batch_size)\n\n # Perform the actual update step on the minibatch data\n batch_inertia, centers_squared_diff = _mini_batch_step(\n X[minibatch_indices], sample_weight[minibatch_indices],\n x_squared_norms[minibatch_indices],\n self.cluster_centers_, self._counts,\n old_center_buffer, tol > 0.0, distances=distances,\n # Here we randomly choose whether to perform\n # random reassignment: the choice is done as a function\n # of the iteration index, and the minimum number of\n # counts, in order to force this reassignment to happen\n # every once in a while\n random_reassign=((iteration_idx + 1)\n % (10 + int(self._counts.min())) == 0),\n random_state=random_state,\n reassignment_ratio=self.reassignment_ratio,\n verbose=self.verbose)\n\n # Monitor convergence and do early stopping if necessary\n if _mini_batch_convergence(\n self, iteration_idx, n_iter, tol, n_samples,\n centers_squared_diff, batch_inertia, convergence_context,\n verbose=self.verbose):\n break\n\n self.n_iter_ = iteration_idx + 1\n\n if self.compute_labels:\n self.labels_, self.inertia_ = \\\n self._labels_inertia_minibatch(X, sample_weight)\n\n return self\n\n def _labels_inertia_minibatch(self, X, sample_weight):\n \"\"\"Compute labels and inertia using mini batches.\n\n This is slightly slower than doing everything at once but prevents\n memory errors / segfaults.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Input data.\n\n sample_weight : array-like of shape (n_samples,)\n The weights for each observation in X.\n\n Returns\n -------\n labels : ndarray of shape (n_samples,)\n Cluster labels for each point.\n\n inertia : float\n Sum of squared distances of points to nearest cluster.\n \"\"\"\n if self.verbose:\n print('Computing label assignment and total inertia')\n sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n x_squared_norms = row_norms(X, squared=True)\n slices = gen_batches(X.shape[0], self.batch_size)\n results = [_labels_inertia(X[s], sample_weight[s], x_squared_norms[s],\n self.cluster_centers_) for s in slices]\n labels, inertia = zip(*results)\n return np.hstack(labels), np.sum(inertia)\n\n def partial_fit(self, X, y=None, sample_weight=None):\n \"\"\"Update k means estimate on a single mini-batch X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Coordinates of the data points to cluster. It must be noted that\n X will be copied if it is not C-contiguous.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n sample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight (default: None).\n\n Returns\n -------\n self\n \"\"\"\n is_first_call_to_partial_fit = not hasattr(self, 'cluster_centers_')\n\n X = self._validate_data(X, accept_sparse='csr',\n dtype=[np.float64, np.float32],\n order='C', accept_large_sparse=False,\n reset=is_first_call_to_partial_fit)\n\n self._random_state = getattr(self, \"_random_state\",\n check_random_state(self.random_state))\n sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n x_squared_norms = row_norms(X, squared=True)\n\n if is_first_call_to_partial_fit:\n # this is the first call to partial_fit on this object\n self._check_params(X)\n\n # Validate init array\n init = self.init\n if hasattr(init, '__array__'):\n init = check_array(init, dtype=X.dtype, copy=True, order='C')\n self._validate_center_shape(X, init)\n\n self._check_mkl_vcomp(X, X.shape[0])\n\n # initialize the cluster centers\n self.cluster_centers_ = self._init_centroids(\n X, x_squared_norms=x_squared_norms,\n init=init,\n random_state=self._random_state,\n init_size=self._init_size)\n\n self._counts = np.zeros(self.n_clusters,\n dtype=sample_weight.dtype)\n random_reassign = False\n distances = None\n else:\n # The lower the minimum count is, the more we do random\n # reassignment, however, we don't want to do random\n # reassignment too often, to allow for building up counts\n random_reassign = self._random_state.randint(\n 10 * (1 + self._counts.min())) == 0\n distances = np.zeros(X.shape[0], dtype=X.dtype)\n\n _mini_batch_step(X, sample_weight, x_squared_norms,\n self.cluster_centers_, self._counts,\n np.zeros(0, dtype=X.dtype), 0,\n random_reassign=random_reassign, distances=distances,\n random_state=self._random_state,\n reassignment_ratio=self.reassignment_ratio,\n verbose=self.verbose)\n\n if self.compute_labels:\n self.labels_, self.inertia_ = _labels_inertia(\n X, sample_weight, x_squared_norms, self.cluster_centers_)\n\n return self\n\n def predict(self, X, sample_weight=None):\n \"\"\"Predict the closest cluster each sample in X belongs to.\n\n In the vector quantization literature, `cluster_centers_` is called\n the code book and each value returned by `predict` is the index of\n the closest code in the code book.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to predict.\n\n sample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight (default: None).\n\n Returns\n -------\n labels : ndarray of shape (n_samples,)\n Index of the cluster each sample belongs to.\n \"\"\"\n check_is_fitted(self)\n\n X = self._check_test_data(X)\n return self._labels_inertia_minibatch(X, sample_weight)[0]\n\n def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }", + "instance_attributes": [ + { + "name": "max_no_improvement", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "batch_size", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "compute_labels", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "reassignment_ratio", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "cluster_centers_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "_counts", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/MeanShift", + "name": "MeanShift", + "qname": "sklearn.cluster._mean_shift.MeanShift", + "decorators": [], + "superclasses": ["ClusterMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.cluster._mean_shift/MeanShift/__init__", + "scikit-learn/sklearn.cluster._mean_shift/MeanShift/fit", + "scikit-learn/sklearn.cluster._mean_shift/MeanShift/predict" + ], + "is_public": false, + "reexported_by": [], + "description": "Mean shift clustering using a flat kernel.\n\nMean shift clustering aims to discover \"blobs\" in a smooth density of\nsamples. It is a centroid-based algorithm, which works by updating\ncandidates for centroids to be the mean of the points within a given\nregion. These candidates are then filtered in a post-processing stage to\neliminate near-duplicates to form the final set of centroids.\n\nSeeding is performed using a binning technique for scalability.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Mean shift clustering using a flat kernel.\n\nMean shift clustering aims to discover \"blobs\" in a smooth density of\nsamples. It is a centroid-based algorithm, which works by updating\ncandidates for centroids to be the mean of the points within a given\nregion. These candidates are then filtered in a post-processing stage to\neliminate near-duplicates to form the final set of centroids.\n\nSeeding is performed using a binning technique for scalability.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nbandwidth : float, default=None\n Bandwidth used in the RBF kernel.\n\n If not given, the bandwidth is estimated using\n sklearn.cluster.estimate_bandwidth; see the documentation for that\n function for hints on scalability (see also the Notes, below).\n\nseeds : array-like of shape (n_samples, n_features), default=None\n Seeds used to initialize kernels. If not set,\n the seeds are calculated by clustering.get_bin_seeds\n with bandwidth as the grid size and default values for\n other parameters.\n\nbin_seeding : bool, default=False\n If true, initial kernel locations are not locations of all\n points, but rather the location of the discretized version of\n points, where points are binned onto a grid whose coarseness\n corresponds to the bandwidth. Setting this option to True will speed\n up the algorithm because fewer seeds will be initialized.\n The default value is False.\n Ignored if seeds argument is not None.\n\nmin_bin_freq : int, default=1\n To speed up the algorithm, accept only those bins with at least\n min_bin_freq points as seeds.\n\ncluster_all : bool, default=True\n If true, then all points are clustered, even those orphans that are\n not within any kernel. Orphans are assigned to the nearest kernel.\n If false, then orphans are given cluster label -1.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by computing\n each of the n_init runs in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nmax_iter : int, default=300\n Maximum number of iterations, per seed point before the clustering\n operation terminates (for that seed point), if has not converged yet.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\ncluster_centers_ : ndarray of shape (n_clusters, n_features)\n Coordinates of cluster centers.\n\nlabels_ : ndarray of shape (n_samples,)\n Labels of each point.\n\nn_iter_ : int\n Maximum number of iterations performed on each seed.\n\n .. versionadded:: 0.22\n\nExamples\n--------\n>>> from sklearn.cluster import MeanShift\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [1, 0],\n... [4, 7], [3, 5], [3, 6]])\n>>> clustering = MeanShift(bandwidth=2).fit(X)\n>>> clustering.labels_\narray([1, 1, 1, 0, 0, 0])\n>>> clustering.predict([[0, 0], [5, 5]])\narray([1, 0])\n>>> clustering\nMeanShift(bandwidth=2)\n\nNotes\n-----\n\nScalability:\n\nBecause this implementation uses a flat kernel and\na Ball Tree to look up members of each kernel, the complexity will tend\ntowards O(T*n*log(n)) in lower dimensions, with n the number of samples\nand T the number of points. In higher dimensions the complexity will\ntend towards O(T*n^2).\n\nScalability can be boosted by using fewer seeds, for example by using\na higher value of min_bin_freq in the get_bin_seeds function.\n\nNote that the estimate_bandwidth function is much less scalable than the\nmean shift algorithm and will be the bottleneck if it is used.\n\nReferences\n----------\n\nDorin Comaniciu and Peter Meer, \"Mean Shift: A robust approach toward\nfeature space analysis\". IEEE Transactions on Pattern Analysis and\nMachine Intelligence. 2002. pp. 603-619.", + "code": "class MeanShift(ClusterMixin, BaseEstimator):\n \"\"\"Mean shift clustering using a flat kernel.\n\n Mean shift clustering aims to discover \"blobs\" in a smooth density of\n samples. It is a centroid-based algorithm, which works by updating\n candidates for centroids to be the mean of the points within a given\n region. These candidates are then filtered in a post-processing stage to\n eliminate near-duplicates to form the final set of centroids.\n\n Seeding is performed using a binning technique for scalability.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n bandwidth : float, default=None\n Bandwidth used in the RBF kernel.\n\n If not given, the bandwidth is estimated using\n sklearn.cluster.estimate_bandwidth; see the documentation for that\n function for hints on scalability (see also the Notes, below).\n\n seeds : array-like of shape (n_samples, n_features), default=None\n Seeds used to initialize kernels. If not set,\n the seeds are calculated by clustering.get_bin_seeds\n with bandwidth as the grid size and default values for\n other parameters.\n\n bin_seeding : bool, default=False\n If true, initial kernel locations are not locations of all\n points, but rather the location of the discretized version of\n points, where points are binned onto a grid whose coarseness\n corresponds to the bandwidth. Setting this option to True will speed\n up the algorithm because fewer seeds will be initialized.\n The default value is False.\n Ignored if seeds argument is not None.\n\n min_bin_freq : int, default=1\n To speed up the algorithm, accept only those bins with at least\n min_bin_freq points as seeds.\n\n cluster_all : bool, default=True\n If true, then all points are clustered, even those orphans that are\n not within any kernel. Orphans are assigned to the nearest kernel.\n If false, then orphans are given cluster label -1.\n\n n_jobs : int, default=None\n The number of jobs to use for the computation. This works by computing\n each of the n_init runs in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n max_iter : int, default=300\n Maximum number of iterations, per seed point before the clustering\n operation terminates (for that seed point), if has not converged yet.\n\n .. versionadded:: 0.22\n\n Attributes\n ----------\n cluster_centers_ : ndarray of shape (n_clusters, n_features)\n Coordinates of cluster centers.\n\n labels_ : ndarray of shape (n_samples,)\n Labels of each point.\n\n n_iter_ : int\n Maximum number of iterations performed on each seed.\n\n .. versionadded:: 0.22\n\n Examples\n --------\n >>> from sklearn.cluster import MeanShift\n >>> import numpy as np\n >>> X = np.array([[1, 1], [2, 1], [1, 0],\n ... [4, 7], [3, 5], [3, 6]])\n >>> clustering = MeanShift(bandwidth=2).fit(X)\n >>> clustering.labels_\n array([1, 1, 1, 0, 0, 0])\n >>> clustering.predict([[0, 0], [5, 5]])\n array([1, 0])\n >>> clustering\n MeanShift(bandwidth=2)\n\n Notes\n -----\n\n Scalability:\n\n Because this implementation uses a flat kernel and\n a Ball Tree to look up members of each kernel, the complexity will tend\n towards O(T*n*log(n)) in lower dimensions, with n the number of samples\n and T the number of points. In higher dimensions the complexity will\n tend towards O(T*n^2).\n\n Scalability can be boosted by using fewer seeds, for example by using\n a higher value of min_bin_freq in the get_bin_seeds function.\n\n Note that the estimate_bandwidth function is much less scalable than the\n mean shift algorithm and will be the bottleneck if it is used.\n\n References\n ----------\n\n Dorin Comaniciu and Peter Meer, \"Mean Shift: A robust approach toward\n feature space analysis\". IEEE Transactions on Pattern Analysis and\n Machine Intelligence. 2002. pp. 603-619.\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, bandwidth=None, seeds=None, bin_seeding=False,\n min_bin_freq=1, cluster_all=True, n_jobs=None, max_iter=300):\n self.bandwidth = bandwidth\n self.seeds = seeds\n self.bin_seeding = bin_seeding\n self.cluster_all = cluster_all\n self.min_bin_freq = min_bin_freq\n self.n_jobs = n_jobs\n self.max_iter = max_iter\n\n def fit(self, X, y=None):\n \"\"\"Perform clustering.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Samples to cluster.\n\n y : Ignored\n\n \"\"\"\n X = self._validate_data(X)\n bandwidth = self.bandwidth\n if bandwidth is None:\n bandwidth = estimate_bandwidth(X, n_jobs=self.n_jobs)\n elif bandwidth <= 0:\n raise ValueError(\"bandwidth needs to be greater than zero or None,\"\n \" got %f\" % bandwidth)\n\n seeds = self.seeds\n if seeds is None:\n if self.bin_seeding:\n seeds = get_bin_seeds(X, bandwidth, self.min_bin_freq)\n else:\n seeds = X\n n_samples, n_features = X.shape\n center_intensity_dict = {}\n\n # We use n_jobs=1 because this will be used in nested calls under\n # parallel calls to _mean_shift_single_seed so there is no need for\n # for further parallelism.\n nbrs = NearestNeighbors(radius=bandwidth, n_jobs=1).fit(X)\n\n # execute iterations on all seeds in parallel\n all_res = Parallel(n_jobs=self.n_jobs)(\n delayed(_mean_shift_single_seed)\n (seed, X, nbrs, self.max_iter) for seed in seeds)\n # copy results in a dictionary\n for i in range(len(seeds)):\n if all_res[i][1]: # i.e. len(points_within) > 0\n center_intensity_dict[all_res[i][0]] = all_res[i][1]\n\n self.n_iter_ = max([x[2] for x in all_res])\n\n if not center_intensity_dict:\n # nothing near seeds\n raise ValueError(\"No point was within bandwidth=%f of any seed.\"\n \" Try a different seeding strategy \\\n or increase the bandwidth.\"\n % bandwidth)\n\n # POST PROCESSING: remove near duplicate points\n # If the distance between two kernels is less than the bandwidth,\n # then we have to remove one because it is a duplicate. Remove the\n # one with fewer points.\n\n sorted_by_intensity = sorted(center_intensity_dict.items(),\n key=lambda tup: (tup[1], tup[0]),\n reverse=True)\n sorted_centers = np.array([tup[0] for tup in sorted_by_intensity])\n unique = np.ones(len(sorted_centers), dtype=bool)\n nbrs = NearestNeighbors(radius=bandwidth,\n n_jobs=self.n_jobs).fit(sorted_centers)\n for i, center in enumerate(sorted_centers):\n if unique[i]:\n neighbor_idxs = nbrs.radius_neighbors([center],\n return_distance=False)[0]\n unique[neighbor_idxs] = 0\n unique[i] = 1 # leave the current point as unique\n cluster_centers = sorted_centers[unique]\n\n # ASSIGN LABELS: a point belongs to the cluster that it is closest to\n nbrs = NearestNeighbors(n_neighbors=1,\n n_jobs=self.n_jobs).fit(cluster_centers)\n labels = np.zeros(n_samples, dtype=int)\n distances, idxs = nbrs.kneighbors(X)\n if self.cluster_all:\n labels = idxs.flatten()\n else:\n labels.fill(-1)\n bool_selector = distances.flatten() <= bandwidth\n labels[bool_selector] = idxs.flatten()[bool_selector]\n\n self.cluster_centers_, self.labels_ = cluster_centers, labels\n return self\n\n def predict(self, X):\n \"\"\"Predict the closest cluster each sample in X belongs to.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to predict.\n\n Returns\n -------\n labels : ndarray of shape (n_samples,)\n Index of the cluster each sample belongs to.\n \"\"\"\n check_is_fitted(self)\n X = self._validate_data(X, reset=False)\n with config_context(assume_finite=True):\n return pairwise_distances_argmin(X, self.cluster_centers_)", + "instance_attributes": [ + { + "name": "bin_seeding", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "cluster_all", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "min_bin_freq", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.cluster._optics/OPTICS", + "name": "OPTICS", + "qname": "sklearn.cluster._optics.OPTICS", + "decorators": [], + "superclasses": ["ClusterMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.cluster._optics/OPTICS/__init__", + "scikit-learn/sklearn.cluster._optics/OPTICS/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Estimate clustering structure from vector array.\n\nOPTICS (Ordering Points To Identify the Clustering Structure), closely\nrelated to DBSCAN, finds core sample of high density and expands clusters\nfrom them [1]_. Unlike DBSCAN, keeps cluster hierarchy for a variable\nneighborhood radius. Better suited for usage on large datasets than the\ncurrent sklearn implementation of DBSCAN.\n\nClusters are then extracted using a DBSCAN-like method\n(cluster_method = 'dbscan') or an automatic\ntechnique proposed in [1]_ (cluster_method = 'xi').\n\nThis implementation deviates from the original OPTICS by first performing\nk-nearest-neighborhood searches on all points to identify core sizes, then\ncomputing only the distances to unprocessed points when constructing the\ncluster order. Note that we do not employ a heap to manage the expansion\ncandidates, so the time complexity will be O(n^2).\n\nRead more in the :ref:`User Guide `.", + "docstring": "Estimate clustering structure from vector array.\n\nOPTICS (Ordering Points To Identify the Clustering Structure), closely\nrelated to DBSCAN, finds core sample of high density and expands clusters\nfrom them [1]_. Unlike DBSCAN, keeps cluster hierarchy for a variable\nneighborhood radius. Better suited for usage on large datasets than the\ncurrent sklearn implementation of DBSCAN.\n\nClusters are then extracted using a DBSCAN-like method\n(cluster_method = 'dbscan') or an automatic\ntechnique proposed in [1]_ (cluster_method = 'xi').\n\nThis implementation deviates from the original OPTICS by first performing\nk-nearest-neighborhood searches on all points to identify core sizes, then\ncomputing only the distances to unprocessed points when constructing the\ncluster order. Note that we do not employ a heap to manage the expansion\ncandidates, so the time complexity will be O(n^2).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nmin_samples : int > 1 or float between 0 and 1, default=5\n The number of samples in a neighborhood for a point to be considered as\n a core point. Also, up and down steep regions can't have more than\n ``min_samples`` consecutive non-steep points. Expressed as an absolute\n number or a fraction of the number of samples (rounded to be at least\n 2).\n\nmax_eps : float, default=np.inf\n The maximum distance between two samples for one to be considered as\n in the neighborhood of the other. Default value of ``np.inf`` will\n identify clusters across all scales; reducing ``max_eps`` will result\n in shorter run times.\n\nmetric : str or callable, default='minkowski'\n Metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string. If metric is\n \"precomputed\", X is assumed to be a distance matrix and must be square.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\np : int, default=2\n Parameter for the Minkowski metric from\n :class:`~sklearn.metrics.pairwise_distances`. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\ncluster_method : str, default='xi'\n The extraction method used to extract clusters using the calculated\n reachability and ordering. Possible values are \"xi\" and \"dbscan\".\n\neps : float, default=None\n The maximum distance between two samples for one to be considered as\n in the neighborhood of the other. By default it assumes the same value\n as ``max_eps``.\n Used only when ``cluster_method='dbscan'``.\n\nxi : float between 0 and 1, default=0.05\n Determines the minimum steepness on the reachability plot that\n constitutes a cluster boundary. For example, an upwards point in the\n reachability plot is defined by the ratio from one point to its\n successor being at most 1-xi.\n Used only when ``cluster_method='xi'``.\n\npredecessor_correction : bool, default=True\n Correct clusters according to the predecessors calculated by OPTICS\n [2]_. This parameter has minimal effect on most datasets.\n Used only when ``cluster_method='xi'``.\n\nmin_cluster_size : int > 1 or float between 0 and 1, default=None\n Minimum number of samples in an OPTICS cluster, expressed as an\n absolute number or a fraction of the number of samples (rounded to be\n at least 2). If ``None``, the value of ``min_samples`` is used instead.\n Used only when ``cluster_method='xi'``.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method. (default)\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can\n affect the speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nlabels_ : ndarray of shape (n_samples,)\n Cluster labels for each point in the dataset given to fit().\n Noisy samples and points which are not included in a leaf cluster\n of ``cluster_hierarchy_`` are labeled as -1.\n\nreachability_ : ndarray of shape (n_samples,)\n Reachability distances per sample, indexed by object order. Use\n ``clust.reachability_[clust.ordering_]`` to access in cluster order.\n\nordering_ : ndarray of shape (n_samples,)\n The cluster ordered list of sample indices.\n\ncore_distances_ : ndarray of shape (n_samples,)\n Distance at which each sample becomes a core point, indexed by object\n order. Points which will never be core have a distance of inf. Use\n ``clust.core_distances_[clust.ordering_]`` to access in cluster order.\n\npredecessor_ : ndarray of shape (n_samples,)\n Point that a sample was reached from, indexed by object order.\n Seed points have a predecessor of -1.\n\ncluster_hierarchy_ : ndarray of shape (n_clusters, 2)\n The list of clusters in the form of ``[start, end]`` in each row, with\n all indices inclusive. The clusters are ordered according to\n ``(end, -start)`` (ascending) so that larger clusters encompassing\n smaller clusters come after those smaller ones. Since ``labels_`` does\n not reflect the hierarchy, usually\n ``len(cluster_hierarchy_) > np.unique(optics.labels_)``. Please also\n note that these indices are of the ``ordering_``, i.e.\n ``X[ordering_][start:end + 1]`` form a cluster.\n Only available when ``cluster_method='xi'``.\n\nSee Also\n--------\nDBSCAN : A similar clustering for a specified neighborhood radius (eps).\n Our implementation is optimized for runtime.\n\nReferences\n----------\n.. [1] Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel,\n and J\u00f6rg Sander. \"OPTICS: ordering points to identify the clustering\n structure.\" ACM SIGMOD Record 28, no. 2 (1999): 49-60.\n\n.. [2] Schubert, Erich, Michael Gertz.\n \"Improving the Cluster Structure Extracted from OPTICS Plots.\" Proc. of\n the Conference \"Lernen, Wissen, Daten, Analysen\" (LWDA) (2018): 318-329.\n\nExamples\n--------\n>>> from sklearn.cluster import OPTICS\n>>> import numpy as np\n>>> X = np.array([[1, 2], [2, 5], [3, 6],\n... [8, 7], [8, 8], [7, 3]])\n>>> clustering = OPTICS(min_samples=2).fit(X)\n>>> clustering.labels_\narray([0, 0, 0, 1, 1, 1])", + "code": "class OPTICS(ClusterMixin, BaseEstimator):\n \"\"\"Estimate clustering structure from vector array.\n\n OPTICS (Ordering Points To Identify the Clustering Structure), closely\n related to DBSCAN, finds core sample of high density and expands clusters\n from them [1]_. Unlike DBSCAN, keeps cluster hierarchy for a variable\n neighborhood radius. Better suited for usage on large datasets than the\n current sklearn implementation of DBSCAN.\n\n Clusters are then extracted using a DBSCAN-like method\n (cluster_method = 'dbscan') or an automatic\n technique proposed in [1]_ (cluster_method = 'xi').\n\n This implementation deviates from the original OPTICS by first performing\n k-nearest-neighborhood searches on all points to identify core sizes, then\n computing only the distances to unprocessed points when constructing the\n cluster order. Note that we do not employ a heap to manage the expansion\n candidates, so the time complexity will be O(n^2).\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n min_samples : int > 1 or float between 0 and 1, default=5\n The number of samples in a neighborhood for a point to be considered as\n a core point. Also, up and down steep regions can't have more than\n ``min_samples`` consecutive non-steep points. Expressed as an absolute\n number or a fraction of the number of samples (rounded to be at least\n 2).\n\n max_eps : float, default=np.inf\n The maximum distance between two samples for one to be considered as\n in the neighborhood of the other. Default value of ``np.inf`` will\n identify clusters across all scales; reducing ``max_eps`` will result\n in shorter run times.\n\n metric : str or callable, default='minkowski'\n Metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string. If metric is\n \"precomputed\", X is assumed to be a distance matrix and must be square.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\n p : int, default=2\n Parameter for the Minkowski metric from\n :class:`~sklearn.metrics.pairwise_distances`. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n metric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\n cluster_method : str, default='xi'\n The extraction method used to extract clusters using the calculated\n reachability and ordering. Possible values are \"xi\" and \"dbscan\".\n\n eps : float, default=None\n The maximum distance between two samples for one to be considered as\n in the neighborhood of the other. By default it assumes the same value\n as ``max_eps``.\n Used only when ``cluster_method='dbscan'``.\n\n xi : float between 0 and 1, default=0.05\n Determines the minimum steepness on the reachability plot that\n constitutes a cluster boundary. For example, an upwards point in the\n reachability plot is defined by the ratio from one point to its\n successor being at most 1-xi.\n Used only when ``cluster_method='xi'``.\n\n predecessor_correction : bool, default=True\n Correct clusters according to the predecessors calculated by OPTICS\n [2]_. This parameter has minimal effect on most datasets.\n Used only when ``cluster_method='xi'``.\n\n min_cluster_size : int > 1 or float between 0 and 1, default=None\n Minimum number of samples in an OPTICS cluster, expressed as an\n absolute number or a fraction of the number of samples (rounded to be\n at least 2). If ``None``, the value of ``min_samples`` is used instead.\n Used only when ``cluster_method='xi'``.\n\n algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method. (default)\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\n leaf_size : int, default=30\n Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can\n affect the speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\n n_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n Attributes\n ----------\n labels_ : ndarray of shape (n_samples,)\n Cluster labels for each point in the dataset given to fit().\n Noisy samples and points which are not included in a leaf cluster\n of ``cluster_hierarchy_`` are labeled as -1.\n\n reachability_ : ndarray of shape (n_samples,)\n Reachability distances per sample, indexed by object order. Use\n ``clust.reachability_[clust.ordering_]`` to access in cluster order.\n\n ordering_ : ndarray of shape (n_samples,)\n The cluster ordered list of sample indices.\n\n core_distances_ : ndarray of shape (n_samples,)\n Distance at which each sample becomes a core point, indexed by object\n order. Points which will never be core have a distance of inf. Use\n ``clust.core_distances_[clust.ordering_]`` to access in cluster order.\n\n predecessor_ : ndarray of shape (n_samples,)\n Point that a sample was reached from, indexed by object order.\n Seed points have a predecessor of -1.\n\n cluster_hierarchy_ : ndarray of shape (n_clusters, 2)\n The list of clusters in the form of ``[start, end]`` in each row, with\n all indices inclusive. The clusters are ordered according to\n ``(end, -start)`` (ascending) so that larger clusters encompassing\n smaller clusters come after those smaller ones. Since ``labels_`` does\n not reflect the hierarchy, usually\n ``len(cluster_hierarchy_) > np.unique(optics.labels_)``. Please also\n note that these indices are of the ``ordering_``, i.e.\n ``X[ordering_][start:end + 1]`` form a cluster.\n Only available when ``cluster_method='xi'``.\n\n See Also\n --------\n DBSCAN : A similar clustering for a specified neighborhood radius (eps).\n Our implementation is optimized for runtime.\n\n References\n ----------\n .. [1] Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel,\n and J\u00f6rg Sander. \"OPTICS: ordering points to identify the clustering\n structure.\" ACM SIGMOD Record 28, no. 2 (1999): 49-60.\n\n .. [2] Schubert, Erich, Michael Gertz.\n \"Improving the Cluster Structure Extracted from OPTICS Plots.\" Proc. of\n the Conference \"Lernen, Wissen, Daten, Analysen\" (LWDA) (2018): 318-329.\n\n Examples\n --------\n >>> from sklearn.cluster import OPTICS\n >>> import numpy as np\n >>> X = np.array([[1, 2], [2, 5], [3, 6],\n ... [8, 7], [8, 8], [7, 3]])\n >>> clustering = OPTICS(min_samples=2).fit(X)\n >>> clustering.labels_\n array([0, 0, 0, 1, 1, 1])\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, min_samples=5, max_eps=np.inf, metric='minkowski',\n p=2, metric_params=None, cluster_method='xi', eps=None,\n xi=0.05, predecessor_correction=True, min_cluster_size=None,\n algorithm='auto', leaf_size=30, n_jobs=None):\n self.max_eps = max_eps\n self.min_samples = min_samples\n self.min_cluster_size = min_cluster_size\n self.algorithm = algorithm\n self.metric = metric\n self.metric_params = metric_params\n self.p = p\n self.leaf_size = leaf_size\n self.cluster_method = cluster_method\n self.eps = eps\n self.xi = xi\n self.predecessor_correction = predecessor_correction\n self.n_jobs = n_jobs\n\n def fit(self, X, y=None):\n \"\"\"Perform OPTICS clustering.\n\n Extracts an ordered list of points and reachability distances, and\n performs initial clustering using ``max_eps`` distance specified at\n OPTICS object instantiation.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features), or \\\n (n_samples, n_samples) if metric=\u2019precomputed\u2019\n A feature array, or array of distances between samples if\n metric='precomputed'.\n\n y : ignored\n Ignored.\n\n Returns\n -------\n self : instance of OPTICS\n The instance.\n \"\"\"\n X = self._validate_data(X, dtype=float)\n\n if self.cluster_method not in ['dbscan', 'xi']:\n raise ValueError(\"cluster_method should be one of\"\n \" 'dbscan' or 'xi' but is %s\" %\n self.cluster_method)\n\n (self.ordering_, self.core_distances_, self.reachability_,\n self.predecessor_) = compute_optics_graph(\n X=X, min_samples=self.min_samples, algorithm=self.algorithm,\n leaf_size=self.leaf_size, metric=self.metric,\n metric_params=self.metric_params, p=self.p, n_jobs=self.n_jobs,\n max_eps=self.max_eps)\n\n # Extract clusters from the calculated orders and reachability\n if self.cluster_method == 'xi':\n labels_, clusters_ = cluster_optics_xi(\n reachability=self.reachability_,\n predecessor=self.predecessor_,\n ordering=self.ordering_,\n min_samples=self.min_samples,\n min_cluster_size=self.min_cluster_size,\n xi=self.xi,\n predecessor_correction=self.predecessor_correction)\n self.cluster_hierarchy_ = clusters_\n elif self.cluster_method == 'dbscan':\n if self.eps is None:\n eps = self.max_eps\n else:\n eps = self.eps\n\n if eps > self.max_eps:\n raise ValueError('Specify an epsilon smaller than %s. Got %s.'\n % (self.max_eps, eps))\n\n labels_ = cluster_optics_dbscan(\n reachability=self.reachability_,\n core_distances=self.core_distances_,\n ordering=self.ordering_, eps=eps)\n\n self.labels_ = labels_\n return self", + "instance_attributes": [ + { + "name": "min_samples", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "algorithm", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "metric", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "p", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "leaf_size", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "cluster_method", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "xi", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "predecessor_correction", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "cluster_hierarchy_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering", + "name": "SpectralClustering", + "qname": "sklearn.cluster._spectral.SpectralClustering", + "decorators": [], + "superclasses": ["ClusterMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.cluster._spectral/SpectralClustering/__init__", + "scikit-learn/sklearn.cluster._spectral/SpectralClustering/fit", + "scikit-learn/sklearn.cluster._spectral/SpectralClustering/fit_predict", + "scikit-learn/sklearn.cluster._spectral/SpectralClustering/_more_tags", + "scikit-learn/sklearn.cluster._spectral/SpectralClustering/_pairwise@getter" + ], + "is_public": false, + "reexported_by": [], + "description": "Apply clustering to a projection of the normalized Laplacian.\n\nIn practice Spectral Clustering is very useful when the structure of\nthe individual clusters is highly non-convex, or more generally when\na measure of the center and spread of the cluster is not a suitable\ndescription of the complete cluster, such as when clusters are\nnested circles on the 2D plane.\n\nIf the affinity matrix is the adjacency matrix of a graph, this method\ncan be used to find normalized graph cuts.\n\nWhen calling ``fit``, an affinity matrix is constructed using either\na kernel function such the Gaussian (aka RBF) kernel with Euclidean\ndistance ``d(X, X)``::\n\n np.exp(-gamma * d(X,X) ** 2)\n\nor a k-nearest neighbors connectivity matrix.\n\nAlternatively, a user-provided affinity matrix can be specified by\nsetting ``affinity='precomputed'``.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Apply clustering to a projection of the normalized Laplacian.\n\nIn practice Spectral Clustering is very useful when the structure of\nthe individual clusters is highly non-convex, or more generally when\na measure of the center and spread of the cluster is not a suitable\ndescription of the complete cluster, such as when clusters are\nnested circles on the 2D plane.\n\nIf the affinity matrix is the adjacency matrix of a graph, this method\ncan be used to find normalized graph cuts.\n\nWhen calling ``fit``, an affinity matrix is constructed using either\na kernel function such the Gaussian (aka RBF) kernel with Euclidean\ndistance ``d(X, X)``::\n\n np.exp(-gamma * d(X,X) ** 2)\n\nor a k-nearest neighbors connectivity matrix.\n\nAlternatively, a user-provided affinity matrix can be specified by\nsetting ``affinity='precomputed'``.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_clusters : int, default=8\n The dimension of the projection subspace.\n\neigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n The eigenvalue decomposition strategy to use. AMG requires pyamg\n to be installed. It can be faster on very large, sparse problems,\n but may also lead to instabilities. If None, then ``'arpack'`` is\n used.\n\nn_components : int, default=n_clusters\n Number of eigenvectors to use for the spectral embedding\n\nrandom_state : int, RandomState instance, default=None\n A pseudo random number generator used for the initialization of the\n lobpcg eigenvectors decomposition when ``eigen_solver='amg'`` and by\n the K-Means initialization. Use an int to make the randomness\n deterministic.\n See :term:`Glossary `.\n\nn_init : int, default=10\n Number of time the k-means algorithm will be run with different\n centroid seeds. The final results will be the best output of n_init\n consecutive runs in terms of inertia. Only used if\n ``assign_labels='kmeans'``.\n\ngamma : float, default=1.0\n Kernel coefficient for rbf, poly, sigmoid, laplacian and chi2 kernels.\n Ignored for ``affinity='nearest_neighbors'``.\n\naffinity : str or callable, default='rbf'\n How to construct the affinity matrix.\n - 'nearest_neighbors': construct the affinity matrix by computing a\n graph of nearest neighbors.\n - 'rbf': construct the affinity matrix using a radial basis function\n (RBF) kernel.\n - 'precomputed': interpret ``X`` as a precomputed affinity matrix,\n where larger values indicate greater similarity between instances.\n - 'precomputed_nearest_neighbors': interpret ``X`` as a sparse graph\n of precomputed distances, and construct a binary affinity matrix\n from the ``n_neighbors`` nearest neighbors of each instance.\n - one of the kernels supported by\n :func:`~sklearn.metrics.pairwise_kernels`.\n\n Only kernels that produce similarity scores (non-negative values that\n increase with similarity) should be used. This property is not checked\n by the clustering algorithm.\n\nn_neighbors : int, default=10\n Number of neighbors to use when constructing the affinity matrix using\n the nearest neighbors method. Ignored for ``affinity='rbf'``.\n\neigen_tol : float, default=0.0\n Stopping criterion for eigendecomposition of the Laplacian matrix\n when ``eigen_solver='arpack'``.\n\nassign_labels : {'kmeans', 'discretize'}, default='kmeans'\n The strategy for assigning labels in the embedding space. There are two\n ways to assign labels after the Laplacian embedding. k-means is a\n popular choice, but it can be sensitive to initialization.\n Discretization is another approach which is less sensitive to random\n initialization.\n\ndegree : float, default=3\n Degree of the polynomial kernel. Ignored by other kernels.\n\ncoef0 : float, default=1\n Zero coefficient for polynomial and sigmoid kernels.\n Ignored by other kernels.\n\nkernel_params : dict of str to any, default=None\n Parameters (keyword arguments) and values for kernel passed as\n callable object. Ignored by other kernels.\n\nn_jobs : int, default=None\n The number of parallel jobs to run when `affinity='nearest_neighbors'`\n or `affinity='precomputed_nearest_neighbors'`. The neighbors search\n will be done in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : bool, default=False\n Verbosity mode.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\naffinity_matrix_ : array-like of shape (n_samples, n_samples)\n Affinity matrix used for clustering. Available only after calling\n ``fit``.\n\nlabels_ : ndarray of shape (n_samples,)\n Labels of each point\n\nExamples\n--------\n>>> from sklearn.cluster import SpectralClustering\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [1, 0],\n... [4, 7], [3, 5], [3, 6]])\n>>> clustering = SpectralClustering(n_clusters=2,\n... assign_labels='discretize',\n... random_state=0).fit(X)\n>>> clustering.labels_\narray([1, 1, 1, 0, 0, 0])\n>>> clustering\nSpectralClustering(assign_labels='discretize', n_clusters=2,\n random_state=0)\n\nNotes\n-----\nA distance matrix for which 0 indicates identical elements and high values\nindicate very dissimilar elements can be transformed into an affinity /\nsimilarity matrix that is well-suited for the algorithm by\napplying the Gaussian (aka RBF, heat) kernel::\n\n np.exp(- dist_matrix ** 2 / (2. * delta ** 2))\n\nwhere ``delta`` is a free parameter representing the width of the Gaussian\nkernel.\n\nAn alternative is to take a symmetric version of the k-nearest neighbors\nconnectivity matrix of the points.\n\nIf the pyamg package is installed, it is used: this greatly\nspeeds up computation.\n\nReferences\n----------\n\n- Normalized cuts and image segmentation, 2000\n Jianbo Shi, Jitendra Malik\n http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.2324\n\n- A Tutorial on Spectral Clustering, 2007\n Ulrike von Luxburg\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323\n\n- Multiclass spectral clustering, 2003\n Stella X. Yu, Jianbo Shi\n https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf", + "code": "class SpectralClustering(ClusterMixin, BaseEstimator):\n \"\"\"Apply clustering to a projection of the normalized Laplacian.\n\n In practice Spectral Clustering is very useful when the structure of\n the individual clusters is highly non-convex, or more generally when\n a measure of the center and spread of the cluster is not a suitable\n description of the complete cluster, such as when clusters are\n nested circles on the 2D plane.\n\n If the affinity matrix is the adjacency matrix of a graph, this method\n can be used to find normalized graph cuts.\n\n When calling ``fit``, an affinity matrix is constructed using either\n a kernel function such the Gaussian (aka RBF) kernel with Euclidean\n distance ``d(X, X)``::\n\n np.exp(-gamma * d(X,X) ** 2)\n\n or a k-nearest neighbors connectivity matrix.\n\n Alternatively, a user-provided affinity matrix can be specified by\n setting ``affinity='precomputed'``.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_clusters : int, default=8\n The dimension of the projection subspace.\n\n eigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n The eigenvalue decomposition strategy to use. AMG requires pyamg\n to be installed. It can be faster on very large, sparse problems,\n but may also lead to instabilities. If None, then ``'arpack'`` is\n used.\n\n n_components : int, default=n_clusters\n Number of eigenvectors to use for the spectral embedding\n\n random_state : int, RandomState instance, default=None\n A pseudo random number generator used for the initialization of the\n lobpcg eigenvectors decomposition when ``eigen_solver='amg'`` and by\n the K-Means initialization. Use an int to make the randomness\n deterministic.\n See :term:`Glossary `.\n\n n_init : int, default=10\n Number of time the k-means algorithm will be run with different\n centroid seeds. The final results will be the best output of n_init\n consecutive runs in terms of inertia. Only used if\n ``assign_labels='kmeans'``.\n\n gamma : float, default=1.0\n Kernel coefficient for rbf, poly, sigmoid, laplacian and chi2 kernels.\n Ignored for ``affinity='nearest_neighbors'``.\n\n affinity : str or callable, default='rbf'\n How to construct the affinity matrix.\n - 'nearest_neighbors': construct the affinity matrix by computing a\n graph of nearest neighbors.\n - 'rbf': construct the affinity matrix using a radial basis function\n (RBF) kernel.\n - 'precomputed': interpret ``X`` as a precomputed affinity matrix,\n where larger values indicate greater similarity between instances.\n - 'precomputed_nearest_neighbors': interpret ``X`` as a sparse graph\n of precomputed distances, and construct a binary affinity matrix\n from the ``n_neighbors`` nearest neighbors of each instance.\n - one of the kernels supported by\n :func:`~sklearn.metrics.pairwise_kernels`.\n\n Only kernels that produce similarity scores (non-negative values that\n increase with similarity) should be used. This property is not checked\n by the clustering algorithm.\n\n n_neighbors : int, default=10\n Number of neighbors to use when constructing the affinity matrix using\n the nearest neighbors method. Ignored for ``affinity='rbf'``.\n\n eigen_tol : float, default=0.0\n Stopping criterion for eigendecomposition of the Laplacian matrix\n when ``eigen_solver='arpack'``.\n\n assign_labels : {'kmeans', 'discretize'}, default='kmeans'\n The strategy for assigning labels in the embedding space. There are two\n ways to assign labels after the Laplacian embedding. k-means is a\n popular choice, but it can be sensitive to initialization.\n Discretization is another approach which is less sensitive to random\n initialization.\n\n degree : float, default=3\n Degree of the polynomial kernel. Ignored by other kernels.\n\n coef0 : float, default=1\n Zero coefficient for polynomial and sigmoid kernels.\n Ignored by other kernels.\n\n kernel_params : dict of str to any, default=None\n Parameters (keyword arguments) and values for kernel passed as\n callable object. Ignored by other kernels.\n\n n_jobs : int, default=None\n The number of parallel jobs to run when `affinity='nearest_neighbors'`\n or `affinity='precomputed_nearest_neighbors'`. The neighbors search\n will be done in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n verbose : bool, default=False\n Verbosity mode.\n\n .. versionadded:: 0.24\n\n Attributes\n ----------\n affinity_matrix_ : array-like of shape (n_samples, n_samples)\n Affinity matrix used for clustering. Available only after calling\n ``fit``.\n\n labels_ : ndarray of shape (n_samples,)\n Labels of each point\n\n Examples\n --------\n >>> from sklearn.cluster import SpectralClustering\n >>> import numpy as np\n >>> X = np.array([[1, 1], [2, 1], [1, 0],\n ... [4, 7], [3, 5], [3, 6]])\n >>> clustering = SpectralClustering(n_clusters=2,\n ... assign_labels='discretize',\n ... random_state=0).fit(X)\n >>> clustering.labels_\n array([1, 1, 1, 0, 0, 0])\n >>> clustering\n SpectralClustering(assign_labels='discretize', n_clusters=2,\n random_state=0)\n\n Notes\n -----\n A distance matrix for which 0 indicates identical elements and high values\n indicate very dissimilar elements can be transformed into an affinity /\n similarity matrix that is well-suited for the algorithm by\n applying the Gaussian (aka RBF, heat) kernel::\n\n np.exp(- dist_matrix ** 2 / (2. * delta ** 2))\n\n where ``delta`` is a free parameter representing the width of the Gaussian\n kernel.\n\n An alternative is to take a symmetric version of the k-nearest neighbors\n connectivity matrix of the points.\n\n If the pyamg package is installed, it is used: this greatly\n speeds up computation.\n\n References\n ----------\n\n - Normalized cuts and image segmentation, 2000\n Jianbo Shi, Jitendra Malik\n http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.2324\n\n - A Tutorial on Spectral Clustering, 2007\n Ulrike von Luxburg\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323\n\n - Multiclass spectral clustering, 2003\n Stella X. Yu, Jianbo Shi\n https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_clusters=8, *, eigen_solver=None, n_components=None,\n random_state=None, n_init=10, gamma=1., affinity='rbf',\n n_neighbors=10, eigen_tol=0.0, assign_labels='kmeans',\n degree=3, coef0=1, kernel_params=None, n_jobs=None,\n verbose=False):\n self.n_clusters = n_clusters\n self.eigen_solver = eigen_solver\n self.n_components = n_components\n self.random_state = random_state\n self.n_init = n_init\n self.gamma = gamma\n self.affinity = affinity\n self.n_neighbors = n_neighbors\n self.eigen_tol = eigen_tol\n self.assign_labels = assign_labels\n self.degree = degree\n self.coef0 = coef0\n self.kernel_params = kernel_params\n self.n_jobs = n_jobs\n self.verbose = verbose\n\n def fit(self, X, y=None):\n \"\"\"Perform spectral clustering from features, or affinity matrix.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n (n_samples, n_samples)\n Training instances to cluster, similarities / affinities between\n instances if ``affinity='precomputed'``, or distances between\n instances if ``affinity='precomputed_nearest_neighbors``. If a\n sparse matrix is provided in a format other than ``csr_matrix``,\n ``csc_matrix``, or ``coo_matrix``, it will be converted into a\n sparse ``csr_matrix``.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n Returns\n -------\n self\n\n \"\"\"\n X = self._validate_data(X, accept_sparse=['csr', 'csc', 'coo'],\n dtype=np.float64, ensure_min_samples=2)\n allow_squared = self.affinity in [\"precomputed\",\n \"precomputed_nearest_neighbors\"]\n if X.shape[0] == X.shape[1] and not allow_squared:\n warnings.warn(\"The spectral clustering API has changed. ``fit``\"\n \"now constructs an affinity matrix from data. To use\"\n \" a custom affinity matrix, \"\n \"set ``affinity=precomputed``.\")\n\n if self.affinity == 'nearest_neighbors':\n connectivity = kneighbors_graph(X, n_neighbors=self.n_neighbors,\n include_self=True,\n n_jobs=self.n_jobs)\n self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)\n elif self.affinity == 'precomputed_nearest_neighbors':\n estimator = NearestNeighbors(n_neighbors=self.n_neighbors,\n n_jobs=self.n_jobs,\n metric=\"precomputed\").fit(X)\n connectivity = estimator.kneighbors_graph(X=X, mode='connectivity')\n self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)\n elif self.affinity == 'precomputed':\n self.affinity_matrix_ = X\n else:\n params = self.kernel_params\n if params is None:\n params = {}\n if not callable(self.affinity):\n params['gamma'] = self.gamma\n params['degree'] = self.degree\n params['coef0'] = self.coef0\n self.affinity_matrix_ = pairwise_kernels(X, metric=self.affinity,\n filter_params=True,\n **params)\n\n random_state = check_random_state(self.random_state)\n self.labels_ = spectral_clustering(self.affinity_matrix_,\n n_clusters=self.n_clusters,\n n_components=self.n_components,\n eigen_solver=self.eigen_solver,\n random_state=random_state,\n n_init=self.n_init,\n eigen_tol=self.eigen_tol,\n assign_labels=self.assign_labels,\n verbose=self.verbose)\n return self\n\n def fit_predict(self, X, y=None):\n \"\"\"Perform spectral clustering from features, or affinity matrix,\n and return cluster labels.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n (n_samples, n_samples)\n Training instances to cluster, similarities / affinities between\n instances if ``affinity='precomputed'``, or distances between\n instances if ``affinity='precomputed_nearest_neighbors``. If a\n sparse matrix is provided in a format other than ``csr_matrix``,\n ``csc_matrix``, or ``coo_matrix``, it will be converted into a\n sparse ``csr_matrix``.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n Returns\n -------\n labels : ndarray of shape (n_samples,)\n Cluster labels.\n \"\"\"\n return super().fit_predict(X, y)\n\n def _more_tags(self):\n return {'pairwise': self.affinity in [\"precomputed\",\n \"precomputed_nearest_neighbors\"]}\n\n # TODO: Remove in 1.1\n # mypy error: Decorated property not supported\n @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n return self.affinity in [\"precomputed\",\n \"precomputed_nearest_neighbors\"]", + "instance_attributes": [ + { + "name": "n_clusters", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "n_init", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "gamma", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "affinity", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "n_neighbors", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "eigen_tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "assign_labels", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "degree", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "coef0", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer", + "name": "ColumnTransformer", + "qname": "sklearn.compose._column_transformer.ColumnTransformer", + "decorators": [], + "superclasses": ["TransformerMixin", "_BaseComposition"], + "methods": [ + "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/__init__", + "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_transformers@getter", + "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_transformers@setter", + "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/get_params", + "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/set_params", + "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_iter", + "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_validate_transformers", + "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_validate_column_callables", + "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_validate_remainder", + "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/named_transformers_@getter", + "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/get_feature_names", + "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_update_fitted_transformers", + "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_validate_output", + "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_log_message", + "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_fit_transform", + "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/fit", + "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/fit_transform", + "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/transform", + "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_hstack", + "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_sk_visual_block_" + ], + "is_public": false, + "reexported_by": [], + "description": "Applies transformers to columns of an array or pandas DataFrame.\n\nThis estimator allows different columns or column subsets of the input\nto be transformed separately and the features generated by each transformer\nwill be concatenated to form a single feature space.\nThis is useful for heterogeneous or columnar data, to combine several\nfeature extraction mechanisms or transformations into a single transformer.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20", + "docstring": "Applies transformers to columns of an array or pandas DataFrame.\n\nThis estimator allows different columns or column subsets of the input\nto be transformed separately and the features generated by each transformer\nwill be concatenated to form a single feature space.\nThis is useful for heterogeneous or columnar data, to combine several\nfeature extraction mechanisms or transformations into a single transformer.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\ntransformers : list of tuples\n List of (name, transformer, columns) tuples specifying the\n transformer objects to be applied to subsets of the data.\n\n name : str\n Like in Pipeline and FeatureUnion, this allows the transformer and\n its parameters to be set using ``set_params`` and searched in grid\n search.\n transformer : {'drop', 'passthrough'} or estimator\n Estimator must support :term:`fit` and :term:`transform`.\n Special-cased strings 'drop' and 'passthrough' are accepted as\n well, to indicate to drop the columns or to pass them through\n untransformed, respectively.\n columns : str, array-like of str, int, array-like of int, array-like of bool, slice or callable\n Indexes the data on its second axis. Integers are interpreted as\n positional columns, while strings can reference DataFrame columns\n by name. A scalar string or int should be used where\n ``transformer`` expects X to be a 1d array-like (vector),\n otherwise a 2d array will be passed to the transformer.\n A callable is passed the input data `X` and can return any of the\n above. To select multiple columns by name or dtype, you can use\n :obj:`make_column_selector`.\n\nremainder : {'drop', 'passthrough'} or estimator, default='drop'\n By default, only the specified columns in `transformers` are\n transformed and combined in the output, and the non-specified\n columns are dropped. (default of ``'drop'``).\n By specifying ``remainder='passthrough'``, all remaining columns that\n were not specified in `transformers` will be automatically passed\n through. This subset of columns is concatenated with the output of\n the transformers.\n By setting ``remainder`` to be an estimator, the remaining\n non-specified columns will use the ``remainder`` estimator. The\n estimator must support :term:`fit` and :term:`transform`.\n Note that using this feature requires that the DataFrame columns\n input at :term:`fit` and :term:`transform` have identical order.\n\nsparse_threshold : float, default=0.3\n If the output of the different transformers contains sparse matrices,\n these will be stacked as a sparse matrix if the overall density is\n lower than this value. Use ``sparse_threshold=0`` to always return\n dense. When the transformed output consists of all dense data, the\n stacked result will be dense, and this keyword will be ignored.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\ntransformer_weights : dict, default=None\n Multiplicative weights for features per transformer. The output of the\n transformer is multiplied by these weights. Keys are transformer names,\n values the weights.\n\nverbose : bool, default=False\n If True, the time elapsed while fitting each transformer will be\n printed as it is completed.\n\nAttributes\n----------\ntransformers_ : list\n The collection of fitted transformers as tuples of\n (name, fitted_transformer, column). `fitted_transformer` can be an\n estimator, 'drop', or 'passthrough'. In case there were no columns\n selected, this will be the unfitted transformer.\n If there are remaining columns, the final element is a tuple of the\n form:\n ('remainder', transformer, remaining_columns) corresponding to the\n ``remainder`` parameter. If there are remaining columns, then\n ``len(transformers_)==len(transformers)+1``, otherwise\n ``len(transformers_)==len(transformers)``.\n\nnamed_transformers_ : :class:`~sklearn.utils.Bunch`\n Read-only attribute to access any transformer by given name.\n Keys are transformer names and values are the fitted transformer\n objects.\n\nsparse_output_ : bool\n Boolean flag indicating whether the output of ``transform`` is a\n sparse matrix or a dense numpy array, which depends on the output\n of the individual transformers and the `sparse_threshold` keyword.\n\nNotes\n-----\nThe order of the columns in the transformed feature matrix follows the\norder of how the columns are specified in the `transformers` list.\nColumns of the original feature matrix that are not specified are\ndropped from the resulting transformed feature matrix, unless specified\nin the `passthrough` keyword. Those columns specified with `passthrough`\nare added at the right to the output of the transformers.\n\nSee Also\n--------\nmake_column_transformer : Convenience function for\n combining the outputs of multiple transformer objects applied to\n column subsets of the original feature space.\nmake_column_selector : Convenience function for selecting\n columns based on datatype or the columns name with a regex pattern.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.compose import ColumnTransformer\n>>> from sklearn.preprocessing import Normalizer\n>>> ct = ColumnTransformer(\n... [(\"norm1\", Normalizer(norm='l1'), [0, 1]),\n... (\"norm2\", Normalizer(norm='l1'), slice(2, 4))])\n>>> X = np.array([[0., 1., 2., 2.],\n... [1., 1., 0., 1.]])\n>>> # Normalizer scales each row of X to unit norm. A separate scaling\n>>> # is applied for the two first and two last elements of each\n>>> # row independently.\n>>> ct.fit_transform(X)\narray([[0. , 1. , 0.5, 0.5],\n [0.5, 0.5, 0. , 1. ]])", + "code": "class ColumnTransformer(TransformerMixin, _BaseComposition):\n \"\"\"Applies transformers to columns of an array or pandas DataFrame.\n\n This estimator allows different columns or column subsets of the input\n to be transformed separately and the features generated by each transformer\n will be concatenated to form a single feature space.\n This is useful for heterogeneous or columnar data, to combine several\n feature extraction mechanisms or transformations into a single transformer.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.20\n\n Parameters\n ----------\n transformers : list of tuples\n List of (name, transformer, columns) tuples specifying the\n transformer objects to be applied to subsets of the data.\n\n name : str\n Like in Pipeline and FeatureUnion, this allows the transformer and\n its parameters to be set using ``set_params`` and searched in grid\n search.\n transformer : {'drop', 'passthrough'} or estimator\n Estimator must support :term:`fit` and :term:`transform`.\n Special-cased strings 'drop' and 'passthrough' are accepted as\n well, to indicate to drop the columns or to pass them through\n untransformed, respectively.\n columns : str, array-like of str, int, array-like of int, \\\n array-like of bool, slice or callable\n Indexes the data on its second axis. Integers are interpreted as\n positional columns, while strings can reference DataFrame columns\n by name. A scalar string or int should be used where\n ``transformer`` expects X to be a 1d array-like (vector),\n otherwise a 2d array will be passed to the transformer.\n A callable is passed the input data `X` and can return any of the\n above. To select multiple columns by name or dtype, you can use\n :obj:`make_column_selector`.\n\n remainder : {'drop', 'passthrough'} or estimator, default='drop'\n By default, only the specified columns in `transformers` are\n transformed and combined in the output, and the non-specified\n columns are dropped. (default of ``'drop'``).\n By specifying ``remainder='passthrough'``, all remaining columns that\n were not specified in `transformers` will be automatically passed\n through. This subset of columns is concatenated with the output of\n the transformers.\n By setting ``remainder`` to be an estimator, the remaining\n non-specified columns will use the ``remainder`` estimator. The\n estimator must support :term:`fit` and :term:`transform`.\n Note that using this feature requires that the DataFrame columns\n input at :term:`fit` and :term:`transform` have identical order.\n\n sparse_threshold : float, default=0.3\n If the output of the different transformers contains sparse matrices,\n these will be stacked as a sparse matrix if the overall density is\n lower than this value. Use ``sparse_threshold=0`` to always return\n dense. When the transformed output consists of all dense data, the\n stacked result will be dense, and this keyword will be ignored.\n\n n_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n transformer_weights : dict, default=None\n Multiplicative weights for features per transformer. The output of the\n transformer is multiplied by these weights. Keys are transformer names,\n values the weights.\n\n verbose : bool, default=False\n If True, the time elapsed while fitting each transformer will be\n printed as it is completed.\n\n Attributes\n ----------\n transformers_ : list\n The collection of fitted transformers as tuples of\n (name, fitted_transformer, column). `fitted_transformer` can be an\n estimator, 'drop', or 'passthrough'. In case there were no columns\n selected, this will be the unfitted transformer.\n If there are remaining columns, the final element is a tuple of the\n form:\n ('remainder', transformer, remaining_columns) corresponding to the\n ``remainder`` parameter. If there are remaining columns, then\n ``len(transformers_)==len(transformers)+1``, otherwise\n ``len(transformers_)==len(transformers)``.\n\n named_transformers_ : :class:`~sklearn.utils.Bunch`\n Read-only attribute to access any transformer by given name.\n Keys are transformer names and values are the fitted transformer\n objects.\n\n sparse_output_ : bool\n Boolean flag indicating whether the output of ``transform`` is a\n sparse matrix or a dense numpy array, which depends on the output\n of the individual transformers and the `sparse_threshold` keyword.\n\n Notes\n -----\n The order of the columns in the transformed feature matrix follows the\n order of how the columns are specified in the `transformers` list.\n Columns of the original feature matrix that are not specified are\n dropped from the resulting transformed feature matrix, unless specified\n in the `passthrough` keyword. Those columns specified with `passthrough`\n are added at the right to the output of the transformers.\n\n See Also\n --------\n make_column_transformer : Convenience function for\n combining the outputs of multiple transformer objects applied to\n column subsets of the original feature space.\n make_column_selector : Convenience function for selecting\n columns based on datatype or the columns name with a regex pattern.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.compose import ColumnTransformer\n >>> from sklearn.preprocessing import Normalizer\n >>> ct = ColumnTransformer(\n ... [(\"norm1\", Normalizer(norm='l1'), [0, 1]),\n ... (\"norm2\", Normalizer(norm='l1'), slice(2, 4))])\n >>> X = np.array([[0., 1., 2., 2.],\n ... [1., 1., 0., 1.]])\n >>> # Normalizer scales each row of X to unit norm. A separate scaling\n >>> # is applied for the two first and two last elements of each\n >>> # row independently.\n >>> ct.fit_transform(X)\n array([[0. , 1. , 0.5, 0.5],\n [0.5, 0.5, 0. , 1. ]])\n\n \"\"\"\n _required_parameters = ['transformers']\n\n @_deprecate_positional_args\n def __init__(self,\n transformers, *,\n remainder='drop',\n sparse_threshold=0.3,\n n_jobs=None,\n transformer_weights=None,\n verbose=False):\n self.transformers = transformers\n self.remainder = remainder\n self.sparse_threshold = sparse_threshold\n self.n_jobs = n_jobs\n self.transformer_weights = transformer_weights\n self.verbose = verbose\n\n @property\n def _transformers(self):\n \"\"\"\n Internal list of transformer only containing the name and\n transformers, dropping the columns. This is for the implementation\n of get_params via BaseComposition._get_params which expects lists\n of tuples of len 2.\n \"\"\"\n return [(name, trans) for name, trans, _ in self.transformers]\n\n @_transformers.setter\n def _transformers(self, value):\n self.transformers = [\n (name, trans, col) for ((name, trans), (_, _, col))\n in zip(value, self.transformers)]\n\n def get_params(self, deep=True):\n \"\"\"Get parameters for this estimator.\n\n Returns the parameters given in the constructor as well as the\n estimators contained within the `transformers` of the\n `ColumnTransformer`.\n\n Parameters\n ----------\n deep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\n Returns\n -------\n params : dict\n Parameter names mapped to their values.\n \"\"\"\n return self._get_params('_transformers', deep=deep)\n\n def set_params(self, **kwargs):\n \"\"\"Set the parameters of this estimator.\n\n Valid parameter keys can be listed with ``get_params()``. Note that you\n can directly set the parameters of the estimators contained in\n `transformers` of `ColumnTransformer`.\n\n Returns\n -------\n self\n \"\"\"\n self._set_params('_transformers', **kwargs)\n return self\n\n def _iter(self, fitted=False, replace_strings=False):\n \"\"\"\n Generate (name, trans, column, weight) tuples.\n\n If fitted=True, use the fitted transformers, else use the\n user specified transformers updated with converted column names\n and potentially appended with transformer for remainder.\n\n \"\"\"\n if fitted:\n transformers = self.transformers_\n else:\n # interleave the validated column specifiers\n transformers = [\n (name, trans, column) for (name, trans, _), column\n in zip(self.transformers, self._columns)\n ]\n # add transformer tuple for remainder\n if self._remainder[2] is not None:\n transformers = chain(transformers, [self._remainder])\n get_weight = (self.transformer_weights or {}).get\n\n for name, trans, column in transformers:\n if replace_strings:\n # replace 'passthrough' with identity transformer and\n # skip in case of 'drop'\n if trans == 'passthrough':\n trans = FunctionTransformer(\n accept_sparse=True, check_inverse=False\n )\n elif trans == 'drop':\n continue\n elif _is_empty_column_selection(column):\n continue\n\n yield (name, trans, column, get_weight(name))\n\n def _validate_transformers(self):\n if not self.transformers:\n return\n\n names, transformers, _ = zip(*self.transformers)\n\n # validate names\n self._validate_names(names)\n\n # validate estimators\n for t in transformers:\n if t in ('drop', 'passthrough'):\n continue\n if (not (hasattr(t, \"fit\") or hasattr(t, \"fit_transform\")) or not\n hasattr(t, \"transform\")):\n raise TypeError(\"All estimators should implement fit and \"\n \"transform, or can be 'drop' or 'passthrough' \"\n \"specifiers. '%s' (type %s) doesn't.\" %\n (t, type(t)))\n\n def _validate_column_callables(self, X):\n \"\"\"\n Converts callable column specifications.\n \"\"\"\n columns = []\n for _, _, column in self.transformers:\n if callable(column):\n column = column(X)\n columns.append(column)\n self._columns = columns\n\n def _validate_remainder(self, X):\n \"\"\"\n Validates ``remainder`` and defines ``_remainder`` targeting\n the remaining columns.\n \"\"\"\n is_transformer = ((hasattr(self.remainder, \"fit\")\n or hasattr(self.remainder, \"fit_transform\"))\n and hasattr(self.remainder, \"transform\"))\n if (self.remainder not in ('drop', 'passthrough')\n and not is_transformer):\n raise ValueError(\n \"The remainder keyword needs to be one of 'drop', \"\n \"'passthrough', or estimator. '%s' was passed instead\" %\n self.remainder)\n\n # Make it possible to check for reordered named columns on transform\n self._has_str_cols = any(_determine_key_type(cols) == 'str'\n for cols in self._columns)\n if hasattr(X, 'columns'):\n self._df_columns = X.columns\n\n self._n_features = X.shape[1]\n cols = []\n for columns in self._columns:\n cols.extend(_get_column_indices(X, columns))\n\n remaining_idx = sorted(set(range(self._n_features)) - set(cols))\n self._remainder = ('remainder', self.remainder, remaining_idx or None)\n\n @property\n def named_transformers_(self):\n \"\"\"Access the fitted transformer by name.\n\n Read-only attribute to access any transformer by given name.\n Keys are transformer names and values are the fitted transformer\n objects.\n\n \"\"\"\n # Use Bunch object to improve autocomplete\n return Bunch(**{name: trans for name, trans, _\n in self.transformers_})\n\n def get_feature_names(self):\n \"\"\"Get feature names from all transformers.\n\n Returns\n -------\n feature_names : list of strings\n Names of the features produced by transform.\n \"\"\"\n check_is_fitted(self)\n feature_names = []\n for name, trans, column, _ in self._iter(fitted=True):\n if trans == 'drop' or _is_empty_column_selection(column):\n continue\n if trans == 'passthrough':\n if hasattr(self, '_df_columns'):\n if ((not isinstance(column, slice))\n and all(isinstance(col, str) for col in column)):\n feature_names.extend(column)\n else:\n feature_names.extend(self._df_columns[column])\n else:\n indices = np.arange(self._n_features)\n feature_names.extend(['x%d' % i for i in indices[column]])\n continue\n if not hasattr(trans, 'get_feature_names'):\n raise AttributeError(\"Transformer %s (type %s) does not \"\n \"provide get_feature_names.\"\n % (str(name), type(trans).__name__))\n feature_names.extend([name + \"__\" + f for f in\n trans.get_feature_names()])\n return feature_names\n\n def _update_fitted_transformers(self, transformers):\n # transformers are fitted; excludes 'drop' cases\n fitted_transformers = iter(transformers)\n transformers_ = []\n\n for name, old, column, _ in self._iter():\n if old == 'drop':\n trans = 'drop'\n elif old == 'passthrough':\n # FunctionTransformer is present in list of transformers,\n # so get next transformer, but save original string\n next(fitted_transformers)\n trans = 'passthrough'\n elif _is_empty_column_selection(column):\n trans = old\n else:\n trans = next(fitted_transformers)\n transformers_.append((name, trans, column))\n\n # sanity check that transformers is exhausted\n assert not list(fitted_transformers)\n self.transformers_ = transformers_\n\n def _validate_output(self, result):\n \"\"\"\n Ensure that the output of each transformer is 2D. Otherwise\n hstack can raise an error or produce incorrect results.\n \"\"\"\n names = [name for name, _, _, _ in self._iter(fitted=True,\n replace_strings=True)]\n for Xs, name in zip(result, names):\n if not getattr(Xs, 'ndim', 0) == 2:\n raise ValueError(\n \"The output of the '{0}' transformer should be 2D (scipy \"\n \"matrix, array, or pandas DataFrame).\".format(name))\n\n def _log_message(self, name, idx, total):\n if not self.verbose:\n return None\n return '(%d of %d) Processing %s' % (idx, total, name)\n\n def _fit_transform(self, X, y, func, fitted=False):\n \"\"\"\n Private function to fit and/or transform on demand.\n\n Return value (transformers and/or transformed X data) depends\n on the passed function.\n ``fitted=True`` ensures the fitted transformers are used.\n \"\"\"\n transformers = list(\n self._iter(fitted=fitted, replace_strings=True))\n try:\n return Parallel(n_jobs=self.n_jobs)(\n delayed(func)(\n transformer=clone(trans) if not fitted else trans,\n X=_safe_indexing(X, column, axis=1),\n y=y,\n weight=weight,\n message_clsname='ColumnTransformer',\n message=self._log_message(name, idx, len(transformers)))\n for idx, (name, trans, column, weight) in enumerate(\n self._iter(fitted=fitted, replace_strings=True), 1))\n except ValueError as e:\n if \"Expected 2D array, got 1D array instead\" in str(e):\n raise ValueError(_ERR_MSG_1DCOLUMN) from e\n else:\n raise\n\n def fit(self, X, y=None):\n \"\"\"Fit all transformers using X.\n\n Parameters\n ----------\n X : {array-like, dataframe} of shape (n_samples, n_features)\n Input data, of which specified subsets are used to fit the\n transformers.\n\n y : array-like of shape (n_samples,...), default=None\n Targets for supervised learning.\n\n Returns\n -------\n self : ColumnTransformer\n This estimator\n\n \"\"\"\n # we use fit_transform to make sure to set sparse_output_ (for which we\n # need the transformed data) to have consistent output type in predict\n self.fit_transform(X, y=y)\n return self\n\n def fit_transform(self, X, y=None):\n \"\"\"Fit all transformers, transform the data and concatenate results.\n\n Parameters\n ----------\n X : {array-like, dataframe} of shape (n_samples, n_features)\n Input data, of which specified subsets are used to fit the\n transformers.\n\n y : array-like of shape (n_samples,), default=None\n Targets for supervised learning.\n\n Returns\n -------\n X_t : {array-like, sparse matrix} of \\\n shape (n_samples, sum_n_components)\n hstack of results of transformers. sum_n_components is the\n sum of n_components (output dimension) over transformers. If\n any result is a sparse matrix, everything will be converted to\n sparse matrices.\n\n \"\"\"\n # TODO: this should be `feature_names_in_` when we start having it\n if hasattr(X, \"columns\"):\n self._feature_names_in = np.asarray(X.columns)\n else:\n self._feature_names_in = None\n X = _check_X(X)\n # set n_features_in_ attribute\n self._check_n_features(X, reset=True)\n self._validate_transformers()\n self._validate_column_callables(X)\n self._validate_remainder(X)\n\n result = self._fit_transform(X, y, _fit_transform_one)\n\n if not result:\n self._update_fitted_transformers([])\n # All transformers are None\n return np.zeros((X.shape[0], 0))\n\n Xs, transformers = zip(*result)\n\n # determine if concatenated output will be sparse or not\n if any(sparse.issparse(X) for X in Xs):\n nnz = sum(X.nnz if sparse.issparse(X) else X.size for X in Xs)\n total = sum(X.shape[0] * X.shape[1] if sparse.issparse(X)\n else X.size for X in Xs)\n density = nnz / total\n self.sparse_output_ = density < self.sparse_threshold\n else:\n self.sparse_output_ = False\n\n self._update_fitted_transformers(transformers)\n self._validate_output(Xs)\n\n return self._hstack(list(Xs))\n\n def transform(self, X):\n \"\"\"Transform X separately by each transformer, concatenate results.\n\n Parameters\n ----------\n X : {array-like, dataframe} of shape (n_samples, n_features)\n The data to be transformed by subset.\n\n Returns\n -------\n X_t : {array-like, sparse matrix} of \\\n shape (n_samples, sum_n_components)\n hstack of results of transformers. sum_n_components is the\n sum of n_components (output dimension) over transformers. If\n any result is a sparse matrix, everything will be converted to\n sparse matrices.\n\n \"\"\"\n check_is_fitted(self)\n X = _check_X(X)\n if hasattr(X, \"columns\"):\n X_feature_names = np.asarray(X.columns)\n else:\n X_feature_names = None\n\n self._check_n_features(X, reset=False)\n if (self._feature_names_in is not None and\n X_feature_names is not None and\n np.any(self._feature_names_in != X_feature_names)):\n raise RuntimeError(\n \"Given feature/column names do not match the ones for the \"\n \"data given during fit.\"\n )\n Xs = self._fit_transform(X, None, _transform_one, fitted=True)\n self._validate_output(Xs)\n\n if not Xs:\n # All transformers are None\n return np.zeros((X.shape[0], 0))\n\n return self._hstack(list(Xs))\n\n def _hstack(self, Xs):\n \"\"\"Stacks Xs horizontally.\n\n This allows subclasses to control the stacking behavior, while reusing\n everything else from ColumnTransformer.\n\n Parameters\n ----------\n Xs : list of {array-like, sparse matrix, dataframe}\n \"\"\"\n if self.sparse_output_:\n try:\n # since all columns should be numeric before stacking them\n # in a sparse matrix, `check_array` is used for the\n # dtype conversion if necessary.\n converted_Xs = [check_array(X,\n accept_sparse=True,\n force_all_finite=False)\n for X in Xs]\n except ValueError as e:\n raise ValueError(\n \"For a sparse output, all columns should \"\n \"be a numeric or convertible to a numeric.\"\n ) from e\n\n return sparse.hstack(converted_Xs).tocsr()\n else:\n Xs = [f.toarray() if sparse.issparse(f) else f for f in Xs]\n return np.hstack(Xs)\n\n def _sk_visual_block_(self):\n if isinstance(self.remainder, str) and self.remainder == 'drop':\n transformers = self.transformers\n elif hasattr(self, \"_remainder\"):\n remainder_columns = self._remainder[2]\n if hasattr(self, '_df_columns'):\n remainder_columns = (\n self._df_columns[remainder_columns].tolist()\n )\n transformers = chain(self.transformers,\n [('remainder', self.remainder,\n remainder_columns)])\n else:\n transformers = chain(self.transformers,\n [('remainder', self.remainder, '')])\n\n names, transformers, name_details = zip(*transformers)\n return _VisualBlock('parallel', transformers,\n names=names, name_details=name_details)", + "instance_attributes": [ + { + "name": "remainder", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "sparse_threshold", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "_columns", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "_remainder", + "types": { + "kind": "NamedType", + "name": "tuple" + } + }, + { + "name": "transformers_", + "types": { + "kind": "NamedType", + "name": "list" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/make_column_selector", + "name": "make_column_selector", + "qname": "sklearn.compose._column_transformer.make_column_selector", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.compose._column_transformer/make_column_selector/__init__", + "scikit-learn/sklearn.compose._column_transformer/make_column_selector/__call__" + ], + "is_public": false, + "reexported_by": [], + "description": "Create a callable to select columns to be used with\n:class:`ColumnTransformer`.\n\n:func:`make_column_selector` can select columns based on datatype or the\ncolumns name with a regex. When using multiple selection criteria, **all**\ncriteria must match for a column to be selected.", + "docstring": "Create a callable to select columns to be used with\n:class:`ColumnTransformer`.\n\n:func:`make_column_selector` can select columns based on datatype or the\ncolumns name with a regex. When using multiple selection criteria, **all**\ncriteria must match for a column to be selected.\n\nParameters\n----------\npattern : str, default=None\n Name of columns containing this regex pattern will be included. If\n None, column selection will not be selected based on pattern.\n\ndtype_include : column dtype or list of column dtypes, default=None\n A selection of dtypes to include. For more details, see\n :meth:`pandas.DataFrame.select_dtypes`.\n\ndtype_exclude : column dtype or list of column dtypes, default=None\n A selection of dtypes to exclude. For more details, see\n :meth:`pandas.DataFrame.select_dtypes`.\n\nReturns\n-------\nselector : callable\n Callable for column selection to be used by a\n :class:`ColumnTransformer`.\n\nSee Also\n--------\nColumnTransformer : Class that allows combining the\n outputs of multiple transformer objects used on column subsets\n of the data into a single feature space.\n\nExamples\n--------\n>>> from sklearn.preprocessing import StandardScaler, OneHotEncoder\n>>> from sklearn.compose import make_column_transformer\n>>> from sklearn.compose import make_column_selector\n>>> import pandas as pd # doctest: +SKIP\n>>> X = pd.DataFrame({'city': ['London', 'London', 'Paris', 'Sallisaw'],\n... 'rating': [5, 3, 4, 5]}) # doctest: +SKIP\n>>> ct = make_column_transformer(\n... (StandardScaler(),\n... make_column_selector(dtype_include=np.number)), # rating\n... (OneHotEncoder(),\n... make_column_selector(dtype_include=object))) # city\n>>> ct.fit_transform(X) # doctest: +SKIP\narray([[ 0.90453403, 1. , 0. , 0. ],\n [-1.50755672, 1. , 0. , 0. ],\n [-0.30151134, 0. , 1. , 0. ],\n [ 0.90453403, 0. , 0. , 1. ]])", + "code": "class make_column_selector:\n \"\"\"Create a callable to select columns to be used with\n :class:`ColumnTransformer`.\n\n :func:`make_column_selector` can select columns based on datatype or the\n columns name with a regex. When using multiple selection criteria, **all**\n criteria must match for a column to be selected.\n\n Parameters\n ----------\n pattern : str, default=None\n Name of columns containing this regex pattern will be included. If\n None, column selection will not be selected based on pattern.\n\n dtype_include : column dtype or list of column dtypes, default=None\n A selection of dtypes to include. For more details, see\n :meth:`pandas.DataFrame.select_dtypes`.\n\n dtype_exclude : column dtype or list of column dtypes, default=None\n A selection of dtypes to exclude. For more details, see\n :meth:`pandas.DataFrame.select_dtypes`.\n\n Returns\n -------\n selector : callable\n Callable for column selection to be used by a\n :class:`ColumnTransformer`.\n\n See Also\n --------\n ColumnTransformer : Class that allows combining the\n outputs of multiple transformer objects used on column subsets\n of the data into a single feature space.\n\n Examples\n --------\n >>> from sklearn.preprocessing import StandardScaler, OneHotEncoder\n >>> from sklearn.compose import make_column_transformer\n >>> from sklearn.compose import make_column_selector\n >>> import pandas as pd # doctest: +SKIP\n >>> X = pd.DataFrame({'city': ['London', 'London', 'Paris', 'Sallisaw'],\n ... 'rating': [5, 3, 4, 5]}) # doctest: +SKIP\n >>> ct = make_column_transformer(\n ... (StandardScaler(),\n ... make_column_selector(dtype_include=np.number)), # rating\n ... (OneHotEncoder(),\n ... make_column_selector(dtype_include=object))) # city\n >>> ct.fit_transform(X) # doctest: +SKIP\n array([[ 0.90453403, 1. , 0. , 0. ],\n [-1.50755672, 1. , 0. , 0. ],\n [-0.30151134, 0. , 1. , 0. ],\n [ 0.90453403, 0. , 0. , 1. ]])\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, pattern=None, *, dtype_include=None,\n dtype_exclude=None):\n self.pattern = pattern\n self.dtype_include = dtype_include\n self.dtype_exclude = dtype_exclude\n\n def __call__(self, df):\n \"\"\"Callable for column selection to be used by a\n :class:`ColumnTransformer`.\n\n Parameters\n ----------\n df : dataframe of shape (n_features, n_samples)\n DataFrame to select columns from.\n \"\"\"\n if not hasattr(df, 'iloc'):\n raise ValueError(\"make_column_selector can only be applied to \"\n \"pandas dataframes\")\n df_row = df.iloc[:1]\n if self.dtype_include is not None or self.dtype_exclude is not None:\n df_row = df_row.select_dtypes(include=self.dtype_include,\n exclude=self.dtype_exclude)\n cols = df_row.columns\n if self.pattern is not None:\n cols = cols[cols.str.contains(self.pattern, regex=True)]\n return cols.tolist()", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.compose._target/TransformedTargetRegressor", + "name": "TransformedTargetRegressor", + "qname": "sklearn.compose._target.TransformedTargetRegressor", + "decorators": [], + "superclasses": ["RegressorMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/__init__", + "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/_fit_transformer", + "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/fit", + "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/predict", + "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/_more_tags", + "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/n_features_in_@getter" + ], + "is_public": false, + "reexported_by": [], + "description": "Meta-estimator to regress on a transformed target.\n\nUseful for applying a non-linear transformation to the target ``y`` in\nregression problems. This transformation can be given as a Transformer\nsuch as the QuantileTransformer or as a function and its inverse such as\n``log`` and ``exp``.\n\nThe computation during ``fit`` is::\n\n regressor.fit(X, func(y))\n\nor::\n\n regressor.fit(X, transformer.transform(y))\n\nThe computation during ``predict`` is::\n\n inverse_func(regressor.predict(X))\n\nor::\n\n transformer.inverse_transform(regressor.predict(X))\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20", + "docstring": "Meta-estimator to regress on a transformed target.\n\nUseful for applying a non-linear transformation to the target ``y`` in\nregression problems. This transformation can be given as a Transformer\nsuch as the QuantileTransformer or as a function and its inverse such as\n``log`` and ``exp``.\n\nThe computation during ``fit`` is::\n\n regressor.fit(X, func(y))\n\nor::\n\n regressor.fit(X, transformer.transform(y))\n\nThe computation during ``predict`` is::\n\n inverse_func(regressor.predict(X))\n\nor::\n\n transformer.inverse_transform(regressor.predict(X))\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nregressor : object, default=None\n Regressor object such as derived from ``RegressorMixin``. This\n regressor will automatically be cloned each time prior to fitting.\n If regressor is ``None``, ``LinearRegression()`` is created and used.\n\ntransformer : object, default=None\n Estimator object such as derived from ``TransformerMixin``. Cannot be\n set at the same time as ``func`` and ``inverse_func``. If\n ``transformer`` is ``None`` as well as ``func`` and ``inverse_func``,\n the transformer will be an identity transformer. Note that the\n transformer will be cloned during fitting. Also, the transformer is\n restricting ``y`` to be a numpy array.\n\nfunc : function, default=None\n Function to apply to ``y`` before passing to ``fit``. Cannot be set at\n the same time as ``transformer``. The function needs to return a\n 2-dimensional array. If ``func`` is ``None``, the function used will be\n the identity function.\n\ninverse_func : function, default=None\n Function to apply to the prediction of the regressor. Cannot be set at\n the same time as ``transformer`` as well. The function needs to return\n a 2-dimensional array. The inverse function is used to return\n predictions to the same space of the original training labels.\n\ncheck_inverse : bool, default=True\n Whether to check that ``transform`` followed by ``inverse_transform``\n or ``func`` followed by ``inverse_func`` leads to the original targets.\n\nAttributes\n----------\nregressor_ : object\n Fitted regressor.\n\ntransformer_ : object\n Transformer used in ``fit`` and ``predict``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import LinearRegression\n>>> from sklearn.compose import TransformedTargetRegressor\n>>> tt = TransformedTargetRegressor(regressor=LinearRegression(),\n... func=np.log, inverse_func=np.exp)\n>>> X = np.arange(4).reshape(-1, 1)\n>>> y = np.exp(2 * X).ravel()\n>>> tt.fit(X, y)\nTransformedTargetRegressor(...)\n>>> tt.score(X, y)\n1.0\n>>> tt.regressor_.coef_\narray([2.])\n\nNotes\n-----\nInternally, the target ``y`` is always converted into a 2-dimensional array\nto be used by scikit-learn transformers. At the time of prediction, the\noutput will be reshaped to a have the same number of dimensions as ``y``.\n\nSee :ref:`examples/compose/plot_transformed_target.py\n`.", + "code": "class TransformedTargetRegressor(RegressorMixin, BaseEstimator):\n \"\"\"Meta-estimator to regress on a transformed target.\n\n Useful for applying a non-linear transformation to the target ``y`` in\n regression problems. This transformation can be given as a Transformer\n such as the QuantileTransformer or as a function and its inverse such as\n ``log`` and ``exp``.\n\n The computation during ``fit`` is::\n\n regressor.fit(X, func(y))\n\n or::\n\n regressor.fit(X, transformer.transform(y))\n\n The computation during ``predict`` is::\n\n inverse_func(regressor.predict(X))\n\n or::\n\n transformer.inverse_transform(regressor.predict(X))\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.20\n\n Parameters\n ----------\n regressor : object, default=None\n Regressor object such as derived from ``RegressorMixin``. This\n regressor will automatically be cloned each time prior to fitting.\n If regressor is ``None``, ``LinearRegression()`` is created and used.\n\n transformer : object, default=None\n Estimator object such as derived from ``TransformerMixin``. Cannot be\n set at the same time as ``func`` and ``inverse_func``. If\n ``transformer`` is ``None`` as well as ``func`` and ``inverse_func``,\n the transformer will be an identity transformer. Note that the\n transformer will be cloned during fitting. Also, the transformer is\n restricting ``y`` to be a numpy array.\n\n func : function, default=None\n Function to apply to ``y`` before passing to ``fit``. Cannot be set at\n the same time as ``transformer``. The function needs to return a\n 2-dimensional array. If ``func`` is ``None``, the function used will be\n the identity function.\n\n inverse_func : function, default=None\n Function to apply to the prediction of the regressor. Cannot be set at\n the same time as ``transformer`` as well. The function needs to return\n a 2-dimensional array. The inverse function is used to return\n predictions to the same space of the original training labels.\n\n check_inverse : bool, default=True\n Whether to check that ``transform`` followed by ``inverse_transform``\n or ``func`` followed by ``inverse_func`` leads to the original targets.\n\n Attributes\n ----------\n regressor_ : object\n Fitted regressor.\n\n transformer_ : object\n Transformer used in ``fit`` and ``predict``.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.linear_model import LinearRegression\n >>> from sklearn.compose import TransformedTargetRegressor\n >>> tt = TransformedTargetRegressor(regressor=LinearRegression(),\n ... func=np.log, inverse_func=np.exp)\n >>> X = np.arange(4).reshape(-1, 1)\n >>> y = np.exp(2 * X).ravel()\n >>> tt.fit(X, y)\n TransformedTargetRegressor(...)\n >>> tt.score(X, y)\n 1.0\n >>> tt.regressor_.coef_\n array([2.])\n\n Notes\n -----\n Internally, the target ``y`` is always converted into a 2-dimensional array\n to be used by scikit-learn transformers. At the time of prediction, the\n output will be reshaped to a have the same number of dimensions as ``y``.\n\n See :ref:`examples/compose/plot_transformed_target.py\n `.\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, regressor=None, *, transformer=None,\n func=None, inverse_func=None, check_inverse=True):\n self.regressor = regressor\n self.transformer = transformer\n self.func = func\n self.inverse_func = inverse_func\n self.check_inverse = check_inverse\n\n def _fit_transformer(self, y):\n \"\"\"Check transformer and fit transformer.\n\n Create the default transformer, fit it and make additional inverse\n check on a subset (optional).\n\n \"\"\"\n if (self.transformer is not None and\n (self.func is not None or self.inverse_func is not None)):\n raise ValueError(\"'transformer' and functions 'func'/\"\n \"'inverse_func' cannot both be set.\")\n elif self.transformer is not None:\n self.transformer_ = clone(self.transformer)\n else:\n if self.func is not None and self.inverse_func is None:\n raise ValueError(\"When 'func' is provided, 'inverse_func' must\"\n \" also be provided\")\n self.transformer_ = FunctionTransformer(\n func=self.func, inverse_func=self.inverse_func, validate=True,\n check_inverse=self.check_inverse)\n # XXX: sample_weight is not currently passed to the\n # transformer. However, if transformer starts using sample_weight, the\n # code should be modified accordingly. At the time to consider the\n # sample_prop feature, it is also a good use case to be considered.\n self.transformer_.fit(y)\n if self.check_inverse:\n idx_selected = slice(None, None, max(1, y.shape[0] // 10))\n y_sel = _safe_indexing(y, idx_selected)\n y_sel_t = self.transformer_.transform(y_sel)\n if not np.allclose(y_sel,\n self.transformer_.inverse_transform(y_sel_t)):\n warnings.warn(\"The provided functions or transformer are\"\n \" not strictly inverse of each other. If\"\n \" you are sure you want to proceed regardless\"\n \", set 'check_inverse=False'\", UserWarning)\n\n def fit(self, X, y, **fit_params):\n \"\"\"Fit the model according to the given training data.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n **fit_params : dict\n Parameters passed to the ``fit`` method of the underlying\n regressor.\n\n\n Returns\n -------\n self : object\n \"\"\"\n y = check_array(y, accept_sparse=False, force_all_finite=True,\n ensure_2d=False, dtype='numeric')\n\n # store the number of dimension of the target to predict an array of\n # similar shape at predict\n self._training_dim = y.ndim\n\n # transformers are designed to modify X which is 2d dimensional, we\n # need to modify y accordingly.\n if y.ndim == 1:\n y_2d = y.reshape(-1, 1)\n else:\n y_2d = y\n self._fit_transformer(y_2d)\n\n # transform y and convert back to 1d array if needed\n y_trans = self.transformer_.transform(y_2d)\n # FIXME: a FunctionTransformer can return a 1D array even when validate\n # is set to True. Therefore, we need to check the number of dimension\n # first.\n if y_trans.ndim == 2 and y_trans.shape[1] == 1:\n y_trans = y_trans.squeeze(axis=1)\n\n if self.regressor is None:\n from ..linear_model import LinearRegression\n self.regressor_ = LinearRegression()\n else:\n self.regressor_ = clone(self.regressor)\n\n self.regressor_.fit(X, y_trans, **fit_params)\n\n return self\n\n def predict(self, X):\n \"\"\"Predict using the base regressor, applying inverse.\n\n The regressor is used to predict and the ``inverse_func`` or\n ``inverse_transform`` is applied before returning the prediction.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\n Returns\n -------\n y_hat : ndarray of shape (n_samples,)\n Predicted values.\n\n \"\"\"\n check_is_fitted(self)\n pred = self.regressor_.predict(X)\n if pred.ndim == 1:\n pred_trans = self.transformer_.inverse_transform(\n pred.reshape(-1, 1))\n else:\n pred_trans = self.transformer_.inverse_transform(pred)\n if (self._training_dim == 1 and\n pred_trans.ndim == 2 and pred_trans.shape[1] == 1):\n pred_trans = pred_trans.squeeze(axis=1)\n\n return pred_trans\n\n def _more_tags(self):\n return {'poor_score': True, 'no_validation': True}\n\n @property\n def n_features_in_(self):\n # For consistency with other estimators we raise a AttributeError so\n # that hasattr() returns False the estimator isn't fitted.\n try:\n check_is_fitted(self)\n except NotFittedError as nfe:\n raise AttributeError(\n \"{} object has no n_features_in_ attribute.\"\n .format(self.__class__.__name__)\n ) from nfe\n\n return self.regressor_.n_features_in_", + "instance_attributes": [ + { + "name": "check_inverse", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope", + "name": "EllipticEnvelope", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope", + "decorators": [], + "superclasses": ["OutlierMixin", "MinCovDet"], + "methods": [ + "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/__init__", + "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/fit", + "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/decision_function", + "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/score_samples", + "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/predict", + "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/score" + ], + "is_public": false, + "reexported_by": [], + "description": "An object for detecting outliers in a Gaussian distributed dataset.\n\nRead more in the :ref:`User Guide `.", + "docstring": "An object for detecting outliers in a Gaussian distributed dataset.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nstore_precision : bool, default=True\n Specify if the estimated precision is stored.\n\nassume_centered : bool, default=False\n If True, the support of robust location and covariance estimates\n is computed, and a covariance estimate is recomputed from it,\n without centering the data.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, the robust location and covariance are directly computed\n with the FastMCD algorithm without additional treatment.\n\nsupport_fraction : float, default=None\n The proportion of points to be included in the support of the raw\n MCD estimate. If None, the minimum value of support_fraction will\n be used within the algorithm: `[n_sample + n_features + 1] / 2`.\n Range is (0, 1).\n\ncontamination : float, default=0.1\n The amount of contamination of the data set, i.e. the proportion\n of outliers in the data set. Range is (0, 0.5).\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling\n the data. Pass an int for reproducible results across multiple function\n calls. See :term: `Glossary `.\n\nAttributes\n----------\nlocation_ : ndarray of shape (n_features,)\n Estimated robust location.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated robust covariance matrix.\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\nsupport_ : ndarray of shape (n_samples,)\n A mask of the observations that have been used to compute the\n robust estimates of location and shape.\n\noffset_ : float\n Offset used to define the decision function from the raw scores.\n We have the relation: ``decision_function = score_samples - offset_``.\n The offset depends on the contamination parameter and is defined in\n such a way we obtain the expected number of outliers (samples with\n decision function < 0) in training.\n\n .. versionadded:: 0.20\n\nraw_location_ : ndarray of shape (n_features,)\n The raw robust estimated location before correction and re-weighting.\n\nraw_covariance_ : ndarray of shape (n_features, n_features)\n The raw robust estimated covariance before correction and re-weighting.\n\nraw_support_ : ndarray of shape (n_samples,)\n A mask of the observations that have been used to compute\n the raw robust estimates of location and shape, before correction\n and re-weighting.\n\ndist_ : ndarray of shape (n_samples,)\n Mahalanobis distances of the training set (on which :meth:`fit` is\n called) observations.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import EllipticEnvelope\n>>> true_cov = np.array([[.8, .3],\n... [.3, .4]])\n>>> X = np.random.RandomState(0).multivariate_normal(mean=[0, 0],\n... cov=true_cov,\n... size=500)\n>>> cov = EllipticEnvelope(random_state=0).fit(X)\n>>> # predict returns 1 for an inlier and -1 for an outlier\n>>> cov.predict([[0, 0],\n... [3, 3]])\narray([ 1, -1])\n>>> cov.covariance_\narray([[0.7411..., 0.2535...],\n [0.2535..., 0.3053...]])\n>>> cov.location_\narray([0.0813... , 0.0427...])\n\nSee Also\n--------\nEmpiricalCovariance, MinCovDet\n\nNotes\n-----\nOutlier detection from covariance estimation may break or not\nperform well in high-dimensional settings. In particular, one will\nalways take care to work with ``n_samples > n_features ** 2``.\n\nReferences\n----------\n.. [1] Rousseeuw, P.J., Van Driessen, K. \"A fast algorithm for the\n minimum covariance determinant estimator\" Technometrics 41(3), 212\n (1999)", + "code": "class EllipticEnvelope(OutlierMixin, MinCovDet):\n \"\"\"An object for detecting outliers in a Gaussian distributed dataset.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n store_precision : bool, default=True\n Specify if the estimated precision is stored.\n\n assume_centered : bool, default=False\n If True, the support of robust location and covariance estimates\n is computed, and a covariance estimate is recomputed from it,\n without centering the data.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, the robust location and covariance are directly computed\n with the FastMCD algorithm without additional treatment.\n\n support_fraction : float, default=None\n The proportion of points to be included in the support of the raw\n MCD estimate. If None, the minimum value of support_fraction will\n be used within the algorithm: `[n_sample + n_features + 1] / 2`.\n Range is (0, 1).\n\n contamination : float, default=0.1\n The amount of contamination of the data set, i.e. the proportion\n of outliers in the data set. Range is (0, 0.5).\n\n random_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling\n the data. Pass an int for reproducible results across multiple function\n calls. See :term: `Glossary `.\n\n Attributes\n ----------\n location_ : ndarray of shape (n_features,)\n Estimated robust location.\n\n covariance_ : ndarray of shape (n_features, n_features)\n Estimated robust covariance matrix.\n\n precision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\n support_ : ndarray of shape (n_samples,)\n A mask of the observations that have been used to compute the\n robust estimates of location and shape.\n\n offset_ : float\n Offset used to define the decision function from the raw scores.\n We have the relation: ``decision_function = score_samples - offset_``.\n The offset depends on the contamination parameter and is defined in\n such a way we obtain the expected number of outliers (samples with\n decision function < 0) in training.\n\n .. versionadded:: 0.20\n\n raw_location_ : ndarray of shape (n_features,)\n The raw robust estimated location before correction and re-weighting.\n\n raw_covariance_ : ndarray of shape (n_features, n_features)\n The raw robust estimated covariance before correction and re-weighting.\n\n raw_support_ : ndarray of shape (n_samples,)\n A mask of the observations that have been used to compute\n the raw robust estimates of location and shape, before correction\n and re-weighting.\n\n dist_ : ndarray of shape (n_samples,)\n Mahalanobis distances of the training set (on which :meth:`fit` is\n called) observations.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.covariance import EllipticEnvelope\n >>> true_cov = np.array([[.8, .3],\n ... [.3, .4]])\n >>> X = np.random.RandomState(0).multivariate_normal(mean=[0, 0],\n ... cov=true_cov,\n ... size=500)\n >>> cov = EllipticEnvelope(random_state=0).fit(X)\n >>> # predict returns 1 for an inlier and -1 for an outlier\n >>> cov.predict([[0, 0],\n ... [3, 3]])\n array([ 1, -1])\n >>> cov.covariance_\n array([[0.7411..., 0.2535...],\n [0.2535..., 0.3053...]])\n >>> cov.location_\n array([0.0813... , 0.0427...])\n\n See Also\n --------\n EmpiricalCovariance, MinCovDet\n\n Notes\n -----\n Outlier detection from covariance estimation may break or not\n perform well in high-dimensional settings. In particular, one will\n always take care to work with ``n_samples > n_features ** 2``.\n\n References\n ----------\n .. [1] Rousseeuw, P.J., Van Driessen, K. \"A fast algorithm for the\n minimum covariance determinant estimator\" Technometrics 41(3), 212\n (1999)\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, store_precision=True, assume_centered=False,\n support_fraction=None, contamination=0.1,\n random_state=None):\n super().__init__(\n store_precision=store_precision,\n assume_centered=assume_centered,\n support_fraction=support_fraction,\n random_state=random_state)\n self.contamination = contamination\n\n def fit(self, X, y=None):\n \"\"\"Fit the EllipticEnvelope model.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\n y : Ignored\n Not used, present for API consistency by convention.\n \"\"\"\n super().fit(X)\n self.offset_ = np.percentile(-self.dist_, 100. * self.contamination)\n return self\n\n def decision_function(self, X):\n \"\"\"Compute the decision function of the given observations.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data matrix.\n\n Returns\n -------\n decision : ndarray of shape (n_samples,)\n Decision function of the samples.\n It is equal to the shifted Mahalanobis distances.\n The threshold for being an outlier is 0, which ensures a\n compatibility with other outlier detection algorithms.\n \"\"\"\n check_is_fitted(self)\n negative_mahal_dist = self.score_samples(X)\n return negative_mahal_dist - self.offset_\n\n def score_samples(self, X):\n \"\"\"Compute the negative Mahalanobis distances.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data matrix.\n\n Returns\n -------\n negative_mahal_distances : array-like of shape (n_samples,)\n Opposite of the Mahalanobis distances.\n \"\"\"\n check_is_fitted(self)\n return -self.mahalanobis(X)\n\n def predict(self, X):\n \"\"\"\n Predict the labels (1 inlier, -1 outlier) of X according to the\n fitted model.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data matrix.\n\n Returns\n -------\n is_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and +1 for inliers.\n \"\"\"\n X = check_array(X)\n is_inlier = np.full(X.shape[0], -1, dtype=int)\n values = self.decision_function(X)\n is_inlier[values >= 0] = 1\n\n return is_inlier\n\n def score(self, X, y, sample_weight=None):\n \"\"\"Returns the mean accuracy on the given test data and labels.\n\n In multi-label classification, this is the subset accuracy\n which is a harsh metric since you require for each sample that\n each label set be correctly predicted.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Test samples.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True labels for X.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n score : float\n Mean accuracy of self.predict(X) w.r.t. y.\n \"\"\"\n return accuracy_score(y, self.predict(X), sample_weight=sample_weight)", + "instance_attributes": [ + { + "name": "contamination", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance", + "name": "EmpiricalCovariance", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance", + "decorators": [], + "superclasses": ["BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/__init__", + "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/_set_covariance", + "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/get_precision", + "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/fit", + "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/score", + "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/error_norm", + "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/mahalanobis" + ], + "is_public": false, + "reexported_by": [], + "description": "Maximum likelihood covariance estimator\n\nRead more in the :ref:`User Guide `.", + "docstring": "Maximum likelihood covariance estimator\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nstore_precision : bool, default=True\n Specifies if the estimated precision is stored.\n\nassume_centered : bool, default=False\n If True, data are not centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False (default), data are centered before computation.\n\nAttributes\n----------\nlocation_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo-inverse matrix.\n (stored only if store_precision is True)\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import EmpiricalCovariance\n>>> from sklearn.datasets import make_gaussian_quantiles\n>>> real_cov = np.array([[.8, .3],\n... [.3, .4]])\n>>> rng = np.random.RandomState(0)\n>>> X = rng.multivariate_normal(mean=[0, 0],\n... cov=real_cov,\n... size=500)\n>>> cov = EmpiricalCovariance().fit(X)\n>>> cov.covariance_\narray([[0.7569..., 0.2818...],\n [0.2818..., 0.3928...]])\n>>> cov.location_\narray([0.0622..., 0.0193...])", + "code": "class EmpiricalCovariance(BaseEstimator):\n \"\"\"Maximum likelihood covariance estimator\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n store_precision : bool, default=True\n Specifies if the estimated precision is stored.\n\n assume_centered : bool, default=False\n If True, data are not centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False (default), data are centered before computation.\n\n Attributes\n ----------\n location_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\n covariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix\n\n precision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo-inverse matrix.\n (stored only if store_precision is True)\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.covariance import EmpiricalCovariance\n >>> from sklearn.datasets import make_gaussian_quantiles\n >>> real_cov = np.array([[.8, .3],\n ... [.3, .4]])\n >>> rng = np.random.RandomState(0)\n >>> X = rng.multivariate_normal(mean=[0, 0],\n ... cov=real_cov,\n ... size=500)\n >>> cov = EmpiricalCovariance().fit(X)\n >>> cov.covariance_\n array([[0.7569..., 0.2818...],\n [0.2818..., 0.3928...]])\n >>> cov.location_\n array([0.0622..., 0.0193...])\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, store_precision=True, assume_centered=False):\n self.store_precision = store_precision\n self.assume_centered = assume_centered\n\n def _set_covariance(self, covariance):\n \"\"\"Saves the covariance and precision estimates\n\n Storage is done accordingly to `self.store_precision`.\n Precision stored only if invertible.\n\n Parameters\n ----------\n covariance : array-like of shape (n_features, n_features)\n Estimated covariance matrix to be stored, and from which precision\n is computed.\n \"\"\"\n covariance = check_array(covariance)\n # set covariance\n self.covariance_ = covariance\n # set precision\n if self.store_precision:\n self.precision_ = linalg.pinvh(covariance, check_finite=False)\n else:\n self.precision_ = None\n\n def get_precision(self):\n \"\"\"Getter for the precision matrix.\n\n Returns\n -------\n precision_ : array-like of shape (n_features, n_features)\n The precision matrix associated to the current covariance object.\n \"\"\"\n if self.store_precision:\n precision = self.precision_\n else:\n precision = linalg.pinvh(self.covariance_, check_finite=False)\n return precision\n\n def fit(self, X, y=None):\n \"\"\"Fits the Maximum Likelihood Estimator covariance model\n according to the given training data and parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n self : object\n \"\"\"\n X = self._validate_data(X)\n if self.assume_centered:\n self.location_ = np.zeros(X.shape[1])\n else:\n self.location_ = X.mean(0)\n covariance = empirical_covariance(\n X, assume_centered=self.assume_centered)\n self._set_covariance(covariance)\n\n return self\n\n def score(self, X_test, y=None):\n \"\"\"Computes the log-likelihood of a Gaussian data set with\n `self.covariance_` as an estimator of its covariance matrix.\n\n Parameters\n ----------\n X_test : array-like of shape (n_samples, n_features)\n Test data of which we compute the likelihood, where n_samples is\n the number of samples and n_features is the number of features.\n X_test is assumed to be drawn from the same distribution than\n the data used in fit (including centering).\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n res : float\n The likelihood of the data set with `self.covariance_` as an\n estimator of its covariance matrix.\n \"\"\"\n # compute empirical covariance of the test set\n test_cov = empirical_covariance(\n X_test - self.location_, assume_centered=True)\n # compute log likelihood\n res = log_likelihood(test_cov, self.get_precision())\n\n return res\n\n def error_norm(self, comp_cov, norm='frobenius', scaling=True,\n squared=True):\n \"\"\"Computes the Mean Squared Error between two covariance estimators.\n (In the sense of the Frobenius norm).\n\n Parameters\n ----------\n comp_cov : array-like of shape (n_features, n_features)\n The covariance to compare with.\n\n norm : {\"frobenius\", \"spectral\"}, default=\"frobenius\"\n The type of norm used to compute the error. Available error types:\n - 'frobenius' (default): sqrt(tr(A^t.A))\n - 'spectral': sqrt(max(eigenvalues(A^t.A))\n where A is the error ``(comp_cov - self.covariance_)``.\n\n scaling : bool, default=True\n If True (default), the squared error norm is divided by n_features.\n If False, the squared error norm is not rescaled.\n\n squared : bool, default=True\n Whether to compute the squared error norm or the error norm.\n If True (default), the squared error norm is returned.\n If False, the error norm is returned.\n\n Returns\n -------\n result : float\n The Mean Squared Error (in the sense of the Frobenius norm) between\n `self` and `comp_cov` covariance estimators.\n \"\"\"\n # compute the error\n error = comp_cov - self.covariance_\n # compute the error norm\n if norm == \"frobenius\":\n squared_norm = np.sum(error ** 2)\n elif norm == \"spectral\":\n squared_norm = np.amax(linalg.svdvals(np.dot(error.T, error)))\n else:\n raise NotImplementedError(\n \"Only spectral and frobenius norms are implemented\")\n # optionally scale the error norm\n if scaling:\n squared_norm = squared_norm / error.shape[0]\n # finally get either the squared norm or the norm\n if squared:\n result = squared_norm\n else:\n result = np.sqrt(squared_norm)\n\n return result\n\n def mahalanobis(self, X):\n \"\"\"Computes the squared Mahalanobis distances of given observations.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The observations, the Mahalanobis distances of the which we\n compute. Observations are assumed to be drawn from the same\n distribution than the data used in fit.\n\n Returns\n -------\n dist : ndarray of shape (n_samples,)\n Squared Mahalanobis distances of the observations.\n \"\"\"\n precision = self.get_precision()\n # compute mahalanobis distances\n dist = pairwise_distances(X, self.location_[np.newaxis, :],\n metric='mahalanobis', VI=precision)\n\n return np.reshape(dist, (len(X),)) ** 2", + "instance_attributes": [ + { + "name": "store_precision", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "assume_centered", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "precision_", + "types": { + "kind": "NamedType", + "name": "tuple" + } + }, + { + "name": "location_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLasso", + "name": "GraphicalLasso", + "qname": "sklearn.covariance._graph_lasso.GraphicalLasso", + "decorators": [], + "superclasses": ["EmpiricalCovariance"], + "methods": [ + "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLasso/__init__", + "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLasso/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Sparse inverse covariance estimation with an l1-penalized estimator.\n\nRead more in the :ref:`User Guide `.\n\n.. versionchanged:: v0.20\n GraphLasso has been renamed to GraphicalLasso", + "docstring": "Sparse inverse covariance estimation with an l1-penalized estimator.\n\nRead more in the :ref:`User Guide `.\n\n.. versionchanged:: v0.20\n GraphLasso has been renamed to GraphicalLasso\n\nParameters\n----------\nalpha : float, default=0.01\n The regularization parameter: the higher alpha, the more\n regularization, the sparser the inverse covariance.\n Range is (0, inf].\n\nmode : {'cd', 'lars'}, default='cd'\n The Lasso solver to use: coordinate descent or LARS. Use LARS for\n very sparse underlying graphs, where p > n. Elsewhere prefer cd\n which is more numerically stable.\n\ntol : float, default=1e-4\n The tolerance to declare convergence: if the dual gap goes below\n this value, iterations are stopped. Range is (0, inf].\n\nenet_tol : float, default=1e-4\n The tolerance for the elastic net solver used to calculate the descent\n direction. This parameter controls the accuracy of the search direction\n for a given column update, not of the overall parameter estimate. Only\n used for mode='cd'. Range is (0, inf].\n\nmax_iter : int, default=100\n The maximum number of iterations.\n\nverbose : bool, default=False\n If verbose is True, the objective function and dual gap are\n plotted at each iteration.\n\nassume_centered : bool, default=False\n If True, data are not centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False, data are centered before computation.\n\nAttributes\n----------\nlocation_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n\nn_iter_ : int\n Number of iterations run.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import GraphicalLasso\n>>> true_cov = np.array([[0.8, 0.0, 0.2, 0.0],\n... [0.0, 0.4, 0.0, 0.0],\n... [0.2, 0.0, 0.3, 0.1],\n... [0.0, 0.0, 0.1, 0.7]])\n>>> np.random.seed(0)\n>>> X = np.random.multivariate_normal(mean=[0, 0, 0, 0],\n... cov=true_cov,\n... size=200)\n>>> cov = GraphicalLasso().fit(X)\n>>> np.around(cov.covariance_, decimals=3)\narray([[0.816, 0.049, 0.218, 0.019],\n [0.049, 0.364, 0.017, 0.034],\n [0.218, 0.017, 0.322, 0.093],\n [0.019, 0.034, 0.093, 0.69 ]])\n>>> np.around(cov.location_, decimals=3)\narray([0.073, 0.04 , 0.038, 0.143])\n\nSee Also\n--------\ngraphical_lasso, GraphicalLassoCV", + "code": "class GraphicalLasso(EmpiricalCovariance):\n \"\"\"Sparse inverse covariance estimation with an l1-penalized estimator.\n\n Read more in the :ref:`User Guide `.\n\n .. versionchanged:: v0.20\n GraphLasso has been renamed to GraphicalLasso\n\n Parameters\n ----------\n alpha : float, default=0.01\n The regularization parameter: the higher alpha, the more\n regularization, the sparser the inverse covariance.\n Range is (0, inf].\n\n mode : {'cd', 'lars'}, default='cd'\n The Lasso solver to use: coordinate descent or LARS. Use LARS for\n very sparse underlying graphs, where p > n. Elsewhere prefer cd\n which is more numerically stable.\n\n tol : float, default=1e-4\n The tolerance to declare convergence: if the dual gap goes below\n this value, iterations are stopped. Range is (0, inf].\n\n enet_tol : float, default=1e-4\n The tolerance for the elastic net solver used to calculate the descent\n direction. This parameter controls the accuracy of the search direction\n for a given column update, not of the overall parameter estimate. Only\n used for mode='cd'. Range is (0, inf].\n\n max_iter : int, default=100\n The maximum number of iterations.\n\n verbose : bool, default=False\n If verbose is True, the objective function and dual gap are\n plotted at each iteration.\n\n assume_centered : bool, default=False\n If True, data are not centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False, data are centered before computation.\n\n Attributes\n ----------\n location_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\n covariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix\n\n precision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n\n n_iter_ : int\n Number of iterations run.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.covariance import GraphicalLasso\n >>> true_cov = np.array([[0.8, 0.0, 0.2, 0.0],\n ... [0.0, 0.4, 0.0, 0.0],\n ... [0.2, 0.0, 0.3, 0.1],\n ... [0.0, 0.0, 0.1, 0.7]])\n >>> np.random.seed(0)\n >>> X = np.random.multivariate_normal(mean=[0, 0, 0, 0],\n ... cov=true_cov,\n ... size=200)\n >>> cov = GraphicalLasso().fit(X)\n >>> np.around(cov.covariance_, decimals=3)\n array([[0.816, 0.049, 0.218, 0.019],\n [0.049, 0.364, 0.017, 0.034],\n [0.218, 0.017, 0.322, 0.093],\n [0.019, 0.034, 0.093, 0.69 ]])\n >>> np.around(cov.location_, decimals=3)\n array([0.073, 0.04 , 0.038, 0.143])\n\n See Also\n --------\n graphical_lasso, GraphicalLassoCV\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, alpha=.01, *, mode='cd', tol=1e-4, enet_tol=1e-4,\n max_iter=100, verbose=False, assume_centered=False):\n super().__init__(assume_centered=assume_centered)\n self.alpha = alpha\n self.mode = mode\n self.tol = tol\n self.enet_tol = enet_tol\n self.max_iter = max_iter\n self.verbose = verbose\n\n def fit(self, X, y=None):\n \"\"\"Fits the GraphicalLasso model to X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Data from which to compute the covariance estimate\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n self : object\n \"\"\"\n # Covariance does not make sense for a single feature\n X = self._validate_data(X, ensure_min_features=2, ensure_min_samples=2,\n estimator=self)\n\n if self.assume_centered:\n self.location_ = np.zeros(X.shape[1])\n else:\n self.location_ = X.mean(0)\n emp_cov = empirical_covariance(\n X, assume_centered=self.assume_centered)\n self.covariance_, self.precision_, self.n_iter_ = graphical_lasso(\n emp_cov, alpha=self.alpha, mode=self.mode, tol=self.tol,\n enet_tol=self.enet_tol, max_iter=self.max_iter,\n verbose=self.verbose, return_n_iter=True)\n return self", + "instance_attributes": [ + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "mode", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "enet_tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "location_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV", + "name": "GraphicalLassoCV", + "qname": "sklearn.covariance._graph_lasso.GraphicalLassoCV", + "decorators": [], + "superclasses": ["GraphicalLasso"], + "methods": [ + "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV/__init__", + "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV/fit", + "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV/grid_scores_@getter", + "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV/cv_alphas_@getter" + ], + "is_public": false, + "reexported_by": [], + "description": "Sparse inverse covariance w/ cross-validated choice of the l1 penalty.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionchanged:: v0.20\n GraphLassoCV has been renamed to GraphicalLassoCV", + "docstring": "Sparse inverse covariance w/ cross-validated choice of the l1 penalty.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionchanged:: v0.20\n GraphLassoCV has been renamed to GraphicalLassoCV\n\nParameters\n----------\nalphas : int or array-like of shape (n_alphas,), dtype=float, default=4\n If an integer is given, it fixes the number of points on the\n grids of alpha to be used. If a list is given, it gives the\n grid to be used. See the notes in the class docstring for\n more details. Range is (0, inf] when floats given.\n\nn_refinements : int, default=4\n The number of times the grid is refined. Not used if explicit\n values of alphas are passed. Range is [1, inf).\n\ncv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.20\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\ntol : float, default=1e-4\n The tolerance to declare convergence: if the dual gap goes below\n this value, iterations are stopped. Range is (0, inf].\n\nenet_tol : float, default=1e-4\n The tolerance for the elastic net solver used to calculate the descent\n direction. This parameter controls the accuracy of the search direction\n for a given column update, not of the overall parameter estimate. Only\n used for mode='cd'. Range is (0, inf].\n\nmax_iter : int, default=100\n Maximum number of iterations.\n\nmode : {'cd', 'lars'}, default='cd'\n The Lasso solver to use: coordinate descent or LARS. Use LARS for\n very sparse underlying graphs, where number of features is greater\n than number of samples. Elsewhere prefer cd which is more numerically\n stable.\n\nn_jobs : int, default=None\n number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\nverbose : bool, default=False\n If verbose is True, the objective function and duality gap are\n printed at each iteration.\n\nassume_centered : bool, default=False\n If True, data are not centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False, data are centered before computation.\n\nAttributes\n----------\nlocation_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix.\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated precision matrix (inverse covariance).\n\nalpha_ : float\n Penalization parameter selected.\n\ncv_alphas_ : list of shape (n_alphas,), dtype=float\n All penalization parameters explored.\n\n .. deprecated:: 0.24\n The `cv_alphas_` attribute is deprecated in version 0.24 in favor\n of `cv_results_['alphas']` and will be removed in version\n 1.1 (renaming of 0.26).\n\ngrid_scores_ : ndarray of shape (n_alphas, n_folds)\n Log-likelihood score on left-out data across folds.\n\n .. deprecated:: 0.24\n The `grid_scores_` attribute is deprecated in version 0.24 in favor\n of `cv_results_` and will be removed in version\n 1.1 (renaming of 0.26).\n\ncv_results_ : dict of ndarrays\n A dict with keys:\n\n alphas : ndarray of shape (n_alphas,)\n All penalization parameters explored.\n\n split(k)_score : ndarray of shape (n_alphas,)\n Log-likelihood score on left-out data across (k)th fold.\n\n mean_score : ndarray of shape (n_alphas,)\n Mean of scores over the folds.\n\n std_score : ndarray of shape (n_alphas,)\n Standard deviation of scores over the folds.\n\n .. versionadded:: 0.24\n\nn_iter_ : int\n Number of iterations run for the optimal alpha.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import GraphicalLassoCV\n>>> true_cov = np.array([[0.8, 0.0, 0.2, 0.0],\n... [0.0, 0.4, 0.0, 0.0],\n... [0.2, 0.0, 0.3, 0.1],\n... [0.0, 0.0, 0.1, 0.7]])\n>>> np.random.seed(0)\n>>> X = np.random.multivariate_normal(mean=[0, 0, 0, 0],\n... cov=true_cov,\n... size=200)\n>>> cov = GraphicalLassoCV().fit(X)\n>>> np.around(cov.covariance_, decimals=3)\narray([[0.816, 0.051, 0.22 , 0.017],\n [0.051, 0.364, 0.018, 0.036],\n [0.22 , 0.018, 0.322, 0.094],\n [0.017, 0.036, 0.094, 0.69 ]])\n>>> np.around(cov.location_, decimals=3)\narray([0.073, 0.04 , 0.038, 0.143])\n\nSee Also\n--------\ngraphical_lasso, GraphicalLasso\n\nNotes\n-----\nThe search for the optimal penalization parameter (alpha) is done on an\niteratively refined grid: first the cross-validated scores on a grid are\ncomputed, then a new refined grid is centered around the maximum, and so\non.\n\nOne of the challenges which is faced here is that the solvers can\nfail to converge to a well-conditioned estimate. The corresponding\nvalues of alpha then come out as missing values, but the optimum may\nbe close to these missing values.", + "code": "class GraphicalLassoCV(GraphicalLasso):\n \"\"\"Sparse inverse covariance w/ cross-validated choice of the l1 penalty.\n\n See glossary entry for :term:`cross-validation estimator`.\n\n Read more in the :ref:`User Guide `.\n\n .. versionchanged:: v0.20\n GraphLassoCV has been renamed to GraphicalLassoCV\n\n Parameters\n ----------\n alphas : int or array-like of shape (n_alphas,), dtype=float, default=4\n If an integer is given, it fixes the number of points on the\n grids of alpha to be used. If a list is given, it gives the\n grid to be used. See the notes in the class docstring for\n more details. Range is (0, inf] when floats given.\n\n n_refinements : int, default=4\n The number of times the grid is refined. Not used if explicit\n values of alphas are passed. Range is [1, inf).\n\n cv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.20\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\n tol : float, default=1e-4\n The tolerance to declare convergence: if the dual gap goes below\n this value, iterations are stopped. Range is (0, inf].\n\n enet_tol : float, default=1e-4\n The tolerance for the elastic net solver used to calculate the descent\n direction. This parameter controls the accuracy of the search direction\n for a given column update, not of the overall parameter estimate. Only\n used for mode='cd'. Range is (0, inf].\n\n max_iter : int, default=100\n Maximum number of iterations.\n\n mode : {'cd', 'lars'}, default='cd'\n The Lasso solver to use: coordinate descent or LARS. Use LARS for\n very sparse underlying graphs, where number of features is greater\n than number of samples. Elsewhere prefer cd which is more numerically\n stable.\n\n n_jobs : int, default=None\n number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\n verbose : bool, default=False\n If verbose is True, the objective function and duality gap are\n printed at each iteration.\n\n assume_centered : bool, default=False\n If True, data are not centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False, data are centered before computation.\n\n Attributes\n ----------\n location_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\n covariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix.\n\n precision_ : ndarray of shape (n_features, n_features)\n Estimated precision matrix (inverse covariance).\n\n alpha_ : float\n Penalization parameter selected.\n\n cv_alphas_ : list of shape (n_alphas,), dtype=float\n All penalization parameters explored.\n\n .. deprecated:: 0.24\n The `cv_alphas_` attribute is deprecated in version 0.24 in favor\n of `cv_results_['alphas']` and will be removed in version\n 1.1 (renaming of 0.26).\n\n grid_scores_ : ndarray of shape (n_alphas, n_folds)\n Log-likelihood score on left-out data across folds.\n\n .. deprecated:: 0.24\n The `grid_scores_` attribute is deprecated in version 0.24 in favor\n of `cv_results_` and will be removed in version\n 1.1 (renaming of 0.26).\n\n cv_results_ : dict of ndarrays\n A dict with keys:\n\n alphas : ndarray of shape (n_alphas,)\n All penalization parameters explored.\n\n split(k)_score : ndarray of shape (n_alphas,)\n Log-likelihood score on left-out data across (k)th fold.\n\n mean_score : ndarray of shape (n_alphas,)\n Mean of scores over the folds.\n\n std_score : ndarray of shape (n_alphas,)\n Standard deviation of scores over the folds.\n\n .. versionadded:: 0.24\n\n n_iter_ : int\n Number of iterations run for the optimal alpha.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.covariance import GraphicalLassoCV\n >>> true_cov = np.array([[0.8, 0.0, 0.2, 0.0],\n ... [0.0, 0.4, 0.0, 0.0],\n ... [0.2, 0.0, 0.3, 0.1],\n ... [0.0, 0.0, 0.1, 0.7]])\n >>> np.random.seed(0)\n >>> X = np.random.multivariate_normal(mean=[0, 0, 0, 0],\n ... cov=true_cov,\n ... size=200)\n >>> cov = GraphicalLassoCV().fit(X)\n >>> np.around(cov.covariance_, decimals=3)\n array([[0.816, 0.051, 0.22 , 0.017],\n [0.051, 0.364, 0.018, 0.036],\n [0.22 , 0.018, 0.322, 0.094],\n [0.017, 0.036, 0.094, 0.69 ]])\n >>> np.around(cov.location_, decimals=3)\n array([0.073, 0.04 , 0.038, 0.143])\n\n See Also\n --------\n graphical_lasso, GraphicalLasso\n\n Notes\n -----\n The search for the optimal penalization parameter (alpha) is done on an\n iteratively refined grid: first the cross-validated scores on a grid are\n computed, then a new refined grid is centered around the maximum, and so\n on.\n\n One of the challenges which is faced here is that the solvers can\n fail to converge to a well-conditioned estimate. The corresponding\n values of alpha then come out as missing values, but the optimum may\n be close to these missing values.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, alphas=4, n_refinements=4, cv=None, tol=1e-4,\n enet_tol=1e-4, max_iter=100, mode='cd', n_jobs=None,\n verbose=False, assume_centered=False):\n super().__init__(\n mode=mode, tol=tol, verbose=verbose, enet_tol=enet_tol,\n max_iter=max_iter, assume_centered=assume_centered)\n self.alphas = alphas\n self.n_refinements = n_refinements\n self.cv = cv\n self.n_jobs = n_jobs\n\n def fit(self, X, y=None):\n \"\"\"Fits the GraphicalLasso covariance model to X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Data from which to compute the covariance estimate\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n self : object\n \"\"\"\n # Covariance does not make sense for a single feature\n X = self._validate_data(X, ensure_min_features=2, estimator=self)\n if self.assume_centered:\n self.location_ = np.zeros(X.shape[1])\n else:\n self.location_ = X.mean(0)\n emp_cov = empirical_covariance(\n X, assume_centered=self.assume_centered)\n\n cv = check_cv(self.cv, y, classifier=False)\n\n # List of (alpha, scores, covs)\n path = list()\n n_alphas = self.alphas\n inner_verbose = max(0, self.verbose - 1)\n\n if isinstance(n_alphas, Sequence):\n alphas = self.alphas\n n_refinements = 1\n else:\n n_refinements = self.n_refinements\n alpha_1 = alpha_max(emp_cov)\n alpha_0 = 1e-2 * alpha_1\n alphas = np.logspace(np.log10(alpha_0), np.log10(alpha_1),\n n_alphas)[::-1]\n\n t0 = time.time()\n for i in range(n_refinements):\n with warnings.catch_warnings():\n # No need to see the convergence warnings on this grid:\n # they will always be points that will not converge\n # during the cross-validation\n warnings.simplefilter('ignore', ConvergenceWarning)\n # Compute the cross-validated loss on the current grid\n\n # NOTE: Warm-restarting graphical_lasso_path has been tried,\n # and this did not allow to gain anything\n # (same execution time with or without).\n this_path = Parallel(\n n_jobs=self.n_jobs,\n verbose=self.verbose\n )(delayed(graphical_lasso_path)(X[train], alphas=alphas,\n X_test=X[test], mode=self.mode,\n tol=self.tol,\n enet_tol=self.enet_tol,\n max_iter=int(.1 *\n self.max_iter),\n verbose=inner_verbose)\n for train, test in cv.split(X, y))\n\n # Little danse to transform the list in what we need\n covs, _, scores = zip(*this_path)\n covs = zip(*covs)\n scores = zip(*scores)\n path.extend(zip(alphas, scores, covs))\n path = sorted(path, key=operator.itemgetter(0), reverse=True)\n\n # Find the maximum (avoid using built in 'max' function to\n # have a fully-reproducible selection of the smallest alpha\n # in case of equality)\n best_score = -np.inf\n last_finite_idx = 0\n for index, (alpha, scores, _) in enumerate(path):\n this_score = np.mean(scores)\n if this_score >= .1 / np.finfo(np.float64).eps:\n this_score = np.nan\n if np.isfinite(this_score):\n last_finite_idx = index\n if this_score >= best_score:\n best_score = this_score\n best_index = index\n\n # Refine the grid\n if best_index == 0:\n # We do not need to go back: we have chosen\n # the highest value of alpha for which there are\n # non-zero coefficients\n alpha_1 = path[0][0]\n alpha_0 = path[1][0]\n elif (best_index == last_finite_idx\n and not best_index == len(path) - 1):\n # We have non-converged models on the upper bound of the\n # grid, we need to refine the grid there\n alpha_1 = path[best_index][0]\n alpha_0 = path[best_index + 1][0]\n elif best_index == len(path) - 1:\n alpha_1 = path[best_index][0]\n alpha_0 = 0.01 * path[best_index][0]\n else:\n alpha_1 = path[best_index - 1][0]\n alpha_0 = path[best_index + 1][0]\n\n if not isinstance(n_alphas, Sequence):\n alphas = np.logspace(np.log10(alpha_1), np.log10(alpha_0),\n n_alphas + 2)\n alphas = alphas[1:-1]\n\n if self.verbose and n_refinements > 1:\n print('[GraphicalLassoCV] Done refinement % 2i out of'\n ' %i: % 3is' % (i + 1, n_refinements, time.time() - t0))\n\n path = list(zip(*path))\n grid_scores = list(path[1])\n alphas = list(path[0])\n # Finally, compute the score with alpha = 0\n alphas.append(0)\n grid_scores.append(cross_val_score(EmpiricalCovariance(), X,\n cv=cv, n_jobs=self.n_jobs,\n verbose=inner_verbose))\n grid_scores = np.array(grid_scores)\n self.cv_results_ = {'alphas': np.array(alphas)}\n for i in range(grid_scores.shape[1]):\n key = \"split{}_score\".format(i)\n self.cv_results_[key] = grid_scores[:, i]\n\n self.cv_results_[\"mean_score\"] = np.mean(grid_scores, axis=1)\n self.cv_results_[\"std_score\"] = np.std(grid_scores, axis=1)\n\n best_alpha = alphas[best_index]\n self.alpha_ = best_alpha\n\n # Finally fit the model with the selected alpha\n self.covariance_, self.precision_, self.n_iter_ = graphical_lasso(\n emp_cov, alpha=best_alpha, mode=self.mode, tol=self.tol,\n enet_tol=self.enet_tol, max_iter=self.max_iter,\n verbose=inner_verbose, return_n_iter=True)\n return self\n\n # TODO: Remove in 1.1 when grid_scores_ is deprecated\n # mypy error: Decorated property not supported\n @deprecated( # type: ignore\n \"The grid_scores_ attribute is deprecated in version 0.24 in favor \"\n \"of cv_results_ and will be removed in version 1.1 (renaming of 0.26).\"\n )\n @property\n def grid_scores_(self):\n # remove 3 for mean_score, std_score, and alphas\n n_alphas = len(self.cv_results_) - 3\n return np.asarray(\n [self.cv_results_[\"split{}_score\".format(i)]\n for i in range(n_alphas)]).T\n\n # TODO: Remove in 1.1 when cv_alphas_ is deprecated\n # mypy error: Decorated property not supported\n @deprecated( # type: ignore\n \"The cv_alphas_ attribute is deprecated in version 0.24 in favor \"\n \"of cv_results_['alpha'] and will be removed in version 1.1 \"\n \"(renaming of 0.26).\"\n )\n @property\n def cv_alphas_(self):\n return self.cv_results_['alphas'].tolist()", + "instance_attributes": [ + { + "name": "alphas", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "n_refinements", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "location_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "cv_results_", + "types": { + "kind": "NamedType", + "name": "dict" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/MinCovDet", + "name": "MinCovDet", + "qname": "sklearn.covariance._robust_covariance.MinCovDet", + "decorators": [], + "superclasses": ["EmpiricalCovariance"], + "methods": [ + "scikit-learn/sklearn.covariance._robust_covariance/MinCovDet/__init__", + "scikit-learn/sklearn.covariance._robust_covariance/MinCovDet/fit", + "scikit-learn/sklearn.covariance._robust_covariance/MinCovDet/correct_covariance", + "scikit-learn/sklearn.covariance._robust_covariance/MinCovDet/reweight_covariance" + ], + "is_public": false, + "reexported_by": [], + "description": "Minimum Covariance Determinant (MCD): robust estimator of covariance.\n\nThe Minimum Covariance Determinant covariance estimator is to be applied\non Gaussian-distributed data, but could still be relevant on data\ndrawn from a unimodal, symmetric distribution. It is not meant to be used\nwith multi-modal data (the algorithm used to fit a MinCovDet object is\nlikely to fail in such a case).\nOne should consider projection pursuit methods to deal with multi-modal\ndatasets.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Minimum Covariance Determinant (MCD): robust estimator of covariance.\n\nThe Minimum Covariance Determinant covariance estimator is to be applied\non Gaussian-distributed data, but could still be relevant on data\ndrawn from a unimodal, symmetric distribution. It is not meant to be used\nwith multi-modal data (the algorithm used to fit a MinCovDet object is\nlikely to fail in such a case).\nOne should consider projection pursuit methods to deal with multi-modal\ndatasets.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nstore_precision : bool, default=True\n Specify if the estimated precision is stored.\n\nassume_centered : bool, default=False\n If True, the support of the robust location and the covariance\n estimates is computed, and a covariance estimate is recomputed from\n it, without centering the data.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, the robust location and covariance are directly computed\n with the FastMCD algorithm without additional treatment.\n\nsupport_fraction : float, default=None\n The proportion of points to be included in the support of the raw\n MCD estimate. Default is None, which implies that the minimum\n value of support_fraction will be used within the algorithm:\n `(n_sample + n_features + 1) / 2`. The parameter must be in the range\n (0, 1).\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling the data.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nAttributes\n----------\nraw_location_ : ndarray of shape (n_features,)\n The raw robust estimated location before correction and re-weighting.\n\nraw_covariance_ : ndarray of shape (n_features, n_features)\n The raw robust estimated covariance before correction and re-weighting.\n\nraw_support_ : ndarray of shape (n_samples,)\n A mask of the observations that have been used to compute\n the raw robust estimates of location and shape, before correction\n and re-weighting.\n\nlocation_ : ndarray of shape (n_features,)\n Estimated robust location.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated robust covariance matrix.\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\nsupport_ : ndarray of shape (n_samples,)\n A mask of the observations that have been used to compute\n the robust estimates of location and shape.\n\ndist_ : ndarray of shape (n_samples,)\n Mahalanobis distances of the training set (on which :meth:`fit` is\n called) observations.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import MinCovDet\n>>> from sklearn.datasets import make_gaussian_quantiles\n>>> real_cov = np.array([[.8, .3],\n... [.3, .4]])\n>>> rng = np.random.RandomState(0)\n>>> X = rng.multivariate_normal(mean=[0, 0],\n... cov=real_cov,\n... size=500)\n>>> cov = MinCovDet(random_state=0).fit(X)\n>>> cov.covariance_\narray([[0.7411..., 0.2535...],\n [0.2535..., 0.3053...]])\n>>> cov.location_\narray([0.0813... , 0.0427...])\n\nReferences\n----------\n\n.. [Rouseeuw1984] P. J. Rousseeuw. Least median of squares regression.\n J. Am Stat Ass, 79:871, 1984.\n.. [Rousseeuw] A Fast Algorithm for the Minimum Covariance Determinant\n Estimator, 1999, American Statistical Association and the American\n Society for Quality, TECHNOMETRICS\n.. [ButlerDavies] R. W. Butler, P. L. Davies and M. Jhun,\n Asymptotics For The Minimum Covariance Determinant Estimator,\n The Annals of Statistics, 1993, Vol. 21, No. 3, 1385-1400", + "code": "class MinCovDet(EmpiricalCovariance):\n \"\"\"Minimum Covariance Determinant (MCD): robust estimator of covariance.\n\n The Minimum Covariance Determinant covariance estimator is to be applied\n on Gaussian-distributed data, but could still be relevant on data\n drawn from a unimodal, symmetric distribution. It is not meant to be used\n with multi-modal data (the algorithm used to fit a MinCovDet object is\n likely to fail in such a case).\n One should consider projection pursuit methods to deal with multi-modal\n datasets.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n store_precision : bool, default=True\n Specify if the estimated precision is stored.\n\n assume_centered : bool, default=False\n If True, the support of the robust location and the covariance\n estimates is computed, and a covariance estimate is recomputed from\n it, without centering the data.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, the robust location and covariance are directly computed\n with the FastMCD algorithm without additional treatment.\n\n support_fraction : float, default=None\n The proportion of points to be included in the support of the raw\n MCD estimate. Default is None, which implies that the minimum\n value of support_fraction will be used within the algorithm:\n `(n_sample + n_features + 1) / 2`. The parameter must be in the range\n (0, 1).\n\n random_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling the data.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\n Attributes\n ----------\n raw_location_ : ndarray of shape (n_features,)\n The raw robust estimated location before correction and re-weighting.\n\n raw_covariance_ : ndarray of shape (n_features, n_features)\n The raw robust estimated covariance before correction and re-weighting.\n\n raw_support_ : ndarray of shape (n_samples,)\n A mask of the observations that have been used to compute\n the raw robust estimates of location and shape, before correction\n and re-weighting.\n\n location_ : ndarray of shape (n_features,)\n Estimated robust location.\n\n covariance_ : ndarray of shape (n_features, n_features)\n Estimated robust covariance matrix.\n\n precision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\n support_ : ndarray of shape (n_samples,)\n A mask of the observations that have been used to compute\n the robust estimates of location and shape.\n\n dist_ : ndarray of shape (n_samples,)\n Mahalanobis distances of the training set (on which :meth:`fit` is\n called) observations.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.covariance import MinCovDet\n >>> from sklearn.datasets import make_gaussian_quantiles\n >>> real_cov = np.array([[.8, .3],\n ... [.3, .4]])\n >>> rng = np.random.RandomState(0)\n >>> X = rng.multivariate_normal(mean=[0, 0],\n ... cov=real_cov,\n ... size=500)\n >>> cov = MinCovDet(random_state=0).fit(X)\n >>> cov.covariance_\n array([[0.7411..., 0.2535...],\n [0.2535..., 0.3053...]])\n >>> cov.location_\n array([0.0813... , 0.0427...])\n\n References\n ----------\n\n .. [Rouseeuw1984] P. J. Rousseeuw. Least median of squares regression.\n J. Am Stat Ass, 79:871, 1984.\n .. [Rousseeuw] A Fast Algorithm for the Minimum Covariance Determinant\n Estimator, 1999, American Statistical Association and the American\n Society for Quality, TECHNOMETRICS\n .. [ButlerDavies] R. W. Butler, P. L. Davies and M. Jhun,\n Asymptotics For The Minimum Covariance Determinant Estimator,\n The Annals of Statistics, 1993, Vol. 21, No. 3, 1385-1400\n \"\"\"\n _nonrobust_covariance = staticmethod(empirical_covariance)\n\n @_deprecate_positional_args\n def __init__(self, *, store_precision=True, assume_centered=False,\n support_fraction=None, random_state=None):\n self.store_precision = store_precision\n self.assume_centered = assume_centered\n self.support_fraction = support_fraction\n self.random_state = random_state\n\n def fit(self, X, y=None):\n \"\"\"Fits a Minimum Covariance Determinant with the FastMCD algorithm.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where `n_samples` is the number of samples\n and `n_features` is the number of features.\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n self : object\n \"\"\"\n X = self._validate_data(X, ensure_min_samples=2, estimator='MinCovDet')\n random_state = check_random_state(self.random_state)\n n_samples, n_features = X.shape\n # check that the empirical covariance is full rank\n if (linalg.svdvals(np.dot(X.T, X)) > 1e-8).sum() != n_features:\n warnings.warn(\"The covariance matrix associated to your dataset \"\n \"is not full rank\")\n # compute and store raw estimates\n raw_location, raw_covariance, raw_support, raw_dist = fast_mcd(\n X, support_fraction=self.support_fraction,\n cov_computation_method=self._nonrobust_covariance,\n random_state=random_state)\n if self.assume_centered:\n raw_location = np.zeros(n_features)\n raw_covariance = self._nonrobust_covariance(X[raw_support],\n assume_centered=True)\n # get precision matrix in an optimized way\n precision = linalg.pinvh(raw_covariance)\n raw_dist = np.sum(np.dot(X, precision) * X, 1)\n self.raw_location_ = raw_location\n self.raw_covariance_ = raw_covariance\n self.raw_support_ = raw_support\n self.location_ = raw_location\n self.support_ = raw_support\n self.dist_ = raw_dist\n # obtain consistency at normal models\n self.correct_covariance(X)\n # re-weight estimator\n self.reweight_covariance(X)\n\n return self\n\n def correct_covariance(self, data):\n \"\"\"Apply a correction to raw Minimum Covariance Determinant estimates.\n\n Correction using the empirical correction factor suggested\n by Rousseeuw and Van Driessen in [RVD]_.\n\n Parameters\n ----------\n data : array-like of shape (n_samples, n_features)\n The data matrix, with p features and n samples.\n The data set must be the one which was used to compute\n the raw estimates.\n\n Returns\n -------\n covariance_corrected : ndarray of shape (n_features, n_features)\n Corrected robust covariance estimate.\n\n References\n ----------\n\n .. [RVD] A Fast Algorithm for the Minimum Covariance\n Determinant Estimator, 1999, American Statistical Association\n and the American Society for Quality, TECHNOMETRICS\n \"\"\"\n\n # Check that the covariance of the support data is not equal to 0.\n # Otherwise self.dist_ = 0 and thus correction = 0.\n n_samples = len(self.dist_)\n n_support = np.sum(self.support_)\n if n_support < n_samples and np.allclose(self.raw_covariance_, 0):\n raise ValueError('The covariance matrix of the support data '\n 'is equal to 0, try to increase support_fraction')\n correction = np.median(self.dist_) / chi2(data.shape[1]).isf(0.5)\n covariance_corrected = self.raw_covariance_ * correction\n self.dist_ /= correction\n return covariance_corrected\n\n def reweight_covariance(self, data):\n \"\"\"Re-weight raw Minimum Covariance Determinant estimates.\n\n Re-weight observations using Rousseeuw's method (equivalent to\n deleting outlying observations from the data set before\n computing location and covariance estimates) described\n in [RVDriessen]_.\n\n Parameters\n ----------\n data : array-like of shape (n_samples, n_features)\n The data matrix, with p features and n samples.\n The data set must be the one which was used to compute\n the raw estimates.\n\n Returns\n -------\n location_reweighted : ndarray of shape (n_features,)\n Re-weighted robust location estimate.\n\n covariance_reweighted : ndarray of shape (n_features, n_features)\n Re-weighted robust covariance estimate.\n\n support_reweighted : ndarray of shape (n_samples,), dtype=bool\n A mask of the observations that have been used to compute\n the re-weighted robust location and covariance estimates.\n\n References\n ----------\n\n .. [RVDriessen] A Fast Algorithm for the Minimum Covariance\n Determinant Estimator, 1999, American Statistical Association\n and the American Society for Quality, TECHNOMETRICS\n \"\"\"\n n_samples, n_features = data.shape\n mask = self.dist_ < chi2(n_features).isf(0.025)\n if self.assume_centered:\n location_reweighted = np.zeros(n_features)\n else:\n location_reweighted = data[mask].mean(0)\n covariance_reweighted = self._nonrobust_covariance(\n data[mask], assume_centered=self.assume_centered)\n support_reweighted = np.zeros(n_samples, dtype=bool)\n support_reweighted[mask] = True\n self._set_covariance(covariance_reweighted)\n self.location_ = location_reweighted\n self.support_ = support_reweighted\n X_centered = data - self.location_\n self.dist_ = np.sum(\n np.dot(X_centered, self.get_precision()) * X_centered, 1)\n return location_reweighted, covariance_reweighted, support_reweighted", + "instance_attributes": [ + { + "name": "store_precision", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "assume_centered", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "raw_location_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "raw_support_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "location_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "support_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/LedoitWolf", + "name": "LedoitWolf", + "qname": "sklearn.covariance._shrunk_covariance.LedoitWolf", + "decorators": [], + "superclasses": ["EmpiricalCovariance"], + "methods": [ + "scikit-learn/sklearn.covariance._shrunk_covariance/LedoitWolf/__init__", + "scikit-learn/sklearn.covariance._shrunk_covariance/LedoitWolf/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "LedoitWolf Estimator\n\nLedoit-Wolf is a particular form of shrinkage, where the shrinkage\ncoefficient is computed using O. Ledoit and M. Wolf's formula as\ndescribed in \"A Well-Conditioned Estimator for Large-Dimensional\nCovariance Matrices\", Ledoit and Wolf, Journal of Multivariate\nAnalysis, Volume 88, Issue 2, February 2004, pages 365-411.\n\nRead more in the :ref:`User Guide `.", + "docstring": "LedoitWolf Estimator\n\nLedoit-Wolf is a particular form of shrinkage, where the shrinkage\ncoefficient is computed using O. Ledoit and M. Wolf's formula as\ndescribed in \"A Well-Conditioned Estimator for Large-Dimensional\nCovariance Matrices\", Ledoit and Wolf, Journal of Multivariate\nAnalysis, Volume 88, Issue 2, February 2004, pages 365-411.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nstore_precision : bool, default=True\n Specify if the estimated precision is stored.\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False (default), data will be centered before computation.\n\nblock_size : int, default=1000\n Size of blocks into which the covariance matrix will be split\n during its Ledoit-Wolf estimation. This is purely a memory\n optimization and does not affect results.\n\nAttributes\n----------\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix.\n\nlocation_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\nshrinkage_ : float\n Coefficient in the convex combination used for the computation\n of the shrunk estimate. Range is [0, 1].\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import LedoitWolf\n>>> real_cov = np.array([[.4, .2],\n... [.2, .8]])\n>>> np.random.seed(0)\n>>> X = np.random.multivariate_normal(mean=[0, 0],\n... cov=real_cov,\n... size=50)\n>>> cov = LedoitWolf().fit(X)\n>>> cov.covariance_\narray([[0.4406..., 0.1616...],\n [0.1616..., 0.8022...]])\n>>> cov.location_\narray([ 0.0595... , -0.0075...])\n\nNotes\n-----\nThe regularised covariance is:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features\nand shrinkage is given by the Ledoit and Wolf formula (see References)\n\nReferences\n----------\n\"A Well-Conditioned Estimator for Large-Dimensional Covariance Matrices\",\nLedoit and Wolf, Journal of Multivariate Analysis, Volume 88, Issue 2,\nFebruary 2004, pages 365-411.", + "code": "class LedoitWolf(EmpiricalCovariance):\n \"\"\"LedoitWolf Estimator\n\n Ledoit-Wolf is a particular form of shrinkage, where the shrinkage\n coefficient is computed using O. Ledoit and M. Wolf's formula as\n described in \"A Well-Conditioned Estimator for Large-Dimensional\n Covariance Matrices\", Ledoit and Wolf, Journal of Multivariate\n Analysis, Volume 88, Issue 2, February 2004, pages 365-411.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n store_precision : bool, default=True\n Specify if the estimated precision is stored.\n\n assume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False (default), data will be centered before computation.\n\n block_size : int, default=1000\n Size of blocks into which the covariance matrix will be split\n during its Ledoit-Wolf estimation. This is purely a memory\n optimization and does not affect results.\n\n Attributes\n ----------\n covariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix.\n\n location_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\n precision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\n shrinkage_ : float\n Coefficient in the convex combination used for the computation\n of the shrunk estimate. Range is [0, 1].\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.covariance import LedoitWolf\n >>> real_cov = np.array([[.4, .2],\n ... [.2, .8]])\n >>> np.random.seed(0)\n >>> X = np.random.multivariate_normal(mean=[0, 0],\n ... cov=real_cov,\n ... size=50)\n >>> cov = LedoitWolf().fit(X)\n >>> cov.covariance_\n array([[0.4406..., 0.1616...],\n [0.1616..., 0.8022...]])\n >>> cov.location_\n array([ 0.0595... , -0.0075...])\n\n Notes\n -----\n The regularised covariance is:\n\n (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\n where mu = trace(cov) / n_features\n and shrinkage is given by the Ledoit and Wolf formula (see References)\n\n References\n ----------\n \"A Well-Conditioned Estimator for Large-Dimensional Covariance Matrices\",\n Ledoit and Wolf, Journal of Multivariate Analysis, Volume 88, Issue 2,\n February 2004, pages 365-411.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, store_precision=True, assume_centered=False,\n block_size=1000):\n super().__init__(store_precision=store_precision,\n assume_centered=assume_centered)\n self.block_size = block_size\n\n def fit(self, X, y=None):\n \"\"\"Fit the Ledoit-Wolf shrunk covariance model according to the given\n training data and parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where `n_samples` is the number of samples\n and `n_features` is the number of features.\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n self : object\n \"\"\"\n # Not calling the parent object to fit, to avoid computing the\n # covariance matrix (and potentially the precision)\n X = self._validate_data(X)\n if self.assume_centered:\n self.location_ = np.zeros(X.shape[1])\n else:\n self.location_ = X.mean(0)\n covariance, shrinkage = ledoit_wolf(X - self.location_,\n assume_centered=True,\n block_size=self.block_size)\n self.shrinkage_ = shrinkage\n self._set_covariance(covariance)\n\n return self", + "instance_attributes": [ + { + "name": "block_size", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "location_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "shrinkage_", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/OAS", + "name": "OAS", + "qname": "sklearn.covariance._shrunk_covariance.OAS", + "decorators": [], + "superclasses": ["EmpiricalCovariance"], + "methods": ["scikit-learn/sklearn.covariance._shrunk_covariance/OAS/fit"], + "is_public": false, + "reexported_by": [], + "description": "Oracle Approximating Shrinkage Estimator\n\nRead more in the :ref:`User Guide `.\n\nOAS is a particular form of shrinkage described in\n\"Shrinkage Algorithms for MMSE Covariance Estimation\"\nChen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010.\n\nThe formula used here does not correspond to the one given in the\narticle. In the original article, formula (23) states that 2/p is\nmultiplied by Trace(cov*cov) in both the numerator and denominator, but\nthis operation is omitted because for a large p, the value of 2/p is\nso small that it doesn't affect the value of the estimator.", + "docstring": "Oracle Approximating Shrinkage Estimator\n\nRead more in the :ref:`User Guide `.\n\nOAS is a particular form of shrinkage described in\n\"Shrinkage Algorithms for MMSE Covariance Estimation\"\nChen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010.\n\nThe formula used here does not correspond to the one given in the\narticle. In the original article, formula (23) states that 2/p is\nmultiplied by Trace(cov*cov) in both the numerator and denominator, but\nthis operation is omitted because for a large p, the value of 2/p is\nso small that it doesn't affect the value of the estimator.\n\nParameters\n----------\nstore_precision : bool, default=True\n Specify if the estimated precision is stored.\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False (default), data will be centered before computation.\n\nAttributes\n----------\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix.\n\nlocation_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\nshrinkage_ : float\n coefficient in the convex combination used for the computation\n of the shrunk estimate. Range is [0, 1].\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import OAS\n>>> from sklearn.datasets import make_gaussian_quantiles\n>>> real_cov = np.array([[.8, .3],\n... [.3, .4]])\n>>> rng = np.random.RandomState(0)\n>>> X = rng.multivariate_normal(mean=[0, 0],\n... cov=real_cov,\n... size=500)\n>>> oas = OAS().fit(X)\n>>> oas.covariance_\narray([[0.7533..., 0.2763...],\n [0.2763..., 0.3964...]])\n>>> oas.precision_\narray([[ 1.7833..., -1.2431... ],\n [-1.2431..., 3.3889...]])\n>>> oas.shrinkage_\n0.0195...\n\nNotes\n-----\nThe regularised covariance is:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features\nand shrinkage is given by the OAS formula (see References)\n\nReferences\n----------\n\"Shrinkage Algorithms for MMSE Covariance Estimation\"\nChen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010.", + "code": "class OAS(EmpiricalCovariance):\n \"\"\"Oracle Approximating Shrinkage Estimator\n\n Read more in the :ref:`User Guide `.\n\n OAS is a particular form of shrinkage described in\n \"Shrinkage Algorithms for MMSE Covariance Estimation\"\n Chen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010.\n\n The formula used here does not correspond to the one given in the\n article. In the original article, formula (23) states that 2/p is\n multiplied by Trace(cov*cov) in both the numerator and denominator, but\n this operation is omitted because for a large p, the value of 2/p is\n so small that it doesn't affect the value of the estimator.\n\n Parameters\n ----------\n store_precision : bool, default=True\n Specify if the estimated precision is stored.\n\n assume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False (default), data will be centered before computation.\n\n Attributes\n ----------\n covariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix.\n\n location_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\n precision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\n shrinkage_ : float\n coefficient in the convex combination used for the computation\n of the shrunk estimate. Range is [0, 1].\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.covariance import OAS\n >>> from sklearn.datasets import make_gaussian_quantiles\n >>> real_cov = np.array([[.8, .3],\n ... [.3, .4]])\n >>> rng = np.random.RandomState(0)\n >>> X = rng.multivariate_normal(mean=[0, 0],\n ... cov=real_cov,\n ... size=500)\n >>> oas = OAS().fit(X)\n >>> oas.covariance_\n array([[0.7533..., 0.2763...],\n [0.2763..., 0.3964...]])\n >>> oas.precision_\n array([[ 1.7833..., -1.2431... ],\n [-1.2431..., 3.3889...]])\n >>> oas.shrinkage_\n 0.0195...\n\n Notes\n -----\n The regularised covariance is:\n\n (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\n where mu = trace(cov) / n_features\n and shrinkage is given by the OAS formula (see References)\n\n References\n ----------\n \"Shrinkage Algorithms for MMSE Covariance Estimation\"\n Chen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010.\n \"\"\"\n\n def fit(self, X, y=None):\n \"\"\"Fit the Oracle Approximating Shrinkage covariance model\n according to the given training data and parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where `n_samples` is the number of samples\n and `n_features` is the number of features.\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n self : object\n \"\"\"\n X = self._validate_data(X)\n # Not calling the parent object to fit, to avoid computing the\n # covariance matrix (and potentially the precision)\n if self.assume_centered:\n self.location_ = np.zeros(X.shape[1])\n else:\n self.location_ = X.mean(0)\n\n covariance, shrinkage = oas(X - self.location_, assume_centered=True)\n self.shrinkage_ = shrinkage\n self._set_covariance(covariance)\n\n return self", + "instance_attributes": [ + { + "name": "location_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "shrinkage_", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/ShrunkCovariance", + "name": "ShrunkCovariance", + "qname": "sklearn.covariance._shrunk_covariance.ShrunkCovariance", + "decorators": [], + "superclasses": ["EmpiricalCovariance"], + "methods": [ + "scikit-learn/sklearn.covariance._shrunk_covariance/ShrunkCovariance/__init__", + "scikit-learn/sklearn.covariance._shrunk_covariance/ShrunkCovariance/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Covariance estimator with shrinkage\n\nRead more in the :ref:`User Guide `.", + "docstring": "Covariance estimator with shrinkage\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nstore_precision : bool, default=True\n Specify if the estimated precision is stored\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False, data will be centered before computation.\n\nshrinkage : float, default=0.1\n Coefficient in the convex combination used for the computation\n of the shrunk estimate. Range is [0, 1].\n\nAttributes\n----------\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix\n\nlocation_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import ShrunkCovariance\n>>> from sklearn.datasets import make_gaussian_quantiles\n>>> real_cov = np.array([[.8, .3],\n... [.3, .4]])\n>>> rng = np.random.RandomState(0)\n>>> X = rng.multivariate_normal(mean=[0, 0],\n... cov=real_cov,\n... size=500)\n>>> cov = ShrunkCovariance().fit(X)\n>>> cov.covariance_\narray([[0.7387..., 0.2536...],\n [0.2536..., 0.4110...]])\n>>> cov.location_\narray([0.0622..., 0.0193...])\n\nNotes\n-----\nThe regularized covariance is given by:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features", + "code": "class ShrunkCovariance(EmpiricalCovariance):\n \"\"\"Covariance estimator with shrinkage\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n store_precision : bool, default=True\n Specify if the estimated precision is stored\n\n assume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False, data will be centered before computation.\n\n shrinkage : float, default=0.1\n Coefficient in the convex combination used for the computation\n of the shrunk estimate. Range is [0, 1].\n\n Attributes\n ----------\n covariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix\n\n location_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\n precision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.covariance import ShrunkCovariance\n >>> from sklearn.datasets import make_gaussian_quantiles\n >>> real_cov = np.array([[.8, .3],\n ... [.3, .4]])\n >>> rng = np.random.RandomState(0)\n >>> X = rng.multivariate_normal(mean=[0, 0],\n ... cov=real_cov,\n ... size=500)\n >>> cov = ShrunkCovariance().fit(X)\n >>> cov.covariance_\n array([[0.7387..., 0.2536...],\n [0.2536..., 0.4110...]])\n >>> cov.location_\n array([0.0622..., 0.0193...])\n\n Notes\n -----\n The regularized covariance is given by:\n\n (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\n where mu = trace(cov) / n_features\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, store_precision=True, assume_centered=False,\n shrinkage=0.1):\n super().__init__(store_precision=store_precision,\n assume_centered=assume_centered)\n self.shrinkage = shrinkage\n\n def fit(self, X, y=None):\n \"\"\"Fit the shrunk covariance model according to the given training data\n and parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n self : object\n \"\"\"\n X = self._validate_data(X)\n # Not calling the parent object to fit, to avoid a potential\n # matrix inversion when setting the precision\n if self.assume_centered:\n self.location_ = np.zeros(X.shape[1])\n else:\n self.location_ = X.mean(0)\n covariance = empirical_covariance(\n X, assume_centered=self.assume_centered)\n covariance = shrunk_covariance(covariance, self.shrinkage)\n self._set_covariance(covariance)\n\n return self", + "instance_attributes": [ + { + "name": "shrinkage", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "location_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/CCA", + "name": "CCA", + "qname": "sklearn.cross_decomposition._pls.CCA", + "decorators": [], + "superclasses": ["_PLS"], + "methods": ["scikit-learn/sklearn.cross_decomposition._pls/CCA/__init__"], + "is_public": false, + "reexported_by": [], + "description": "Canonical Correlation Analysis, also known as \"Mode B\" PLS.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Canonical Correlation Analysis, also known as \"Mode B\" PLS.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=2\n Number of components to keep. Should be in `[1, min(n_samples,\n n_features, n_targets)]`.\n\nscale : bool, default=True\n Whether to scale `X` and `Y`.\n\nmax_iter : int, default=500\n the maximum number of iterations of the power method.\n\ntol : float, default=1e-06\n The tolerance used as convergence criteria in the power method: the\n algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\n than `tol`, where `u` corresponds to the left singular vector.\n\ncopy : bool, default=True\n Whether to copy `X` and `Y` in fit before applying centering, and\n potentially scaling. If False, these operations will be done inplace,\n modifying both arrays.\n\nAttributes\n----------\nx_weights_ : ndarray of shape (n_features, n_components)\n The left singular vectors of the cross-covariance matrices of each\n iteration.\n\ny_weights_ : ndarray of shape (n_targets, n_components)\n The right singular vectors of the cross-covariance matrices of each\n iteration.\n\nx_loadings_ : ndarray of shape (n_features, n_components)\n The loadings of `X`.\n\ny_loadings_ : ndarray of shape (n_targets, n_components)\n The loadings of `Y`.\n\nx_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training samples.\n\n .. deprecated:: 0.24\n `x_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\ny_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training targets.\n\n .. deprecated:: 0.24\n `y_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\nx_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `X`.\n\ny_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `Y`.\n\ncoef_ : ndarray of shape (n_features, n_targets)\n The coefficients of the linear model such that `Y` is approximated as\n `Y = X @ coef_`.\n\nn_iter_ : list of shape (n_components,)\n Number of iterations of the power method, for each\n component.\n\nn_features_in_ : int\n Number of features seen during :term:`fit`.\n\nExamples\n--------\n>>> from sklearn.cross_decomposition import CCA\n>>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [3.,5.,4.]]\n>>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]\n>>> cca = CCA(n_components=1)\n>>> cca.fit(X, Y)\nCCA(n_components=1)\n>>> X_c, Y_c = cca.transform(X, Y)\n\nSee Also\n--------\nPLSCanonical\nPLSSVD", + "code": "class CCA(_PLS):\n \"\"\"Canonical Correlation Analysis, also known as \"Mode B\" PLS.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_components : int, default=2\n Number of components to keep. Should be in `[1, min(n_samples,\n n_features, n_targets)]`.\n\n scale : bool, default=True\n Whether to scale `X` and `Y`.\n\n max_iter : int, default=500\n the maximum number of iterations of the power method.\n\n tol : float, default=1e-06\n The tolerance used as convergence criteria in the power method: the\n algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\n than `tol`, where `u` corresponds to the left singular vector.\n\n copy : bool, default=True\n Whether to copy `X` and `Y` in fit before applying centering, and\n potentially scaling. If False, these operations will be done inplace,\n modifying both arrays.\n\n Attributes\n ----------\n x_weights_ : ndarray of shape (n_features, n_components)\n The left singular vectors of the cross-covariance matrices of each\n iteration.\n\n y_weights_ : ndarray of shape (n_targets, n_components)\n The right singular vectors of the cross-covariance matrices of each\n iteration.\n\n x_loadings_ : ndarray of shape (n_features, n_components)\n The loadings of `X`.\n\n y_loadings_ : ndarray of shape (n_targets, n_components)\n The loadings of `Y`.\n\n x_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training samples.\n\n .. deprecated:: 0.24\n `x_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\n y_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training targets.\n\n .. deprecated:: 0.24\n `y_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\n x_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `X`.\n\n y_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `Y`.\n\n coef_ : ndarray of shape (n_features, n_targets)\n The coefficients of the linear model such that `Y` is approximated as\n `Y = X @ coef_`.\n\n n_iter_ : list of shape (n_components,)\n Number of iterations of the power method, for each\n component.\n\n n_features_in_ : int\n Number of features seen during :term:`fit`.\n\n Examples\n --------\n >>> from sklearn.cross_decomposition import CCA\n >>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [3.,5.,4.]]\n >>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]\n >>> cca = CCA(n_components=1)\n >>> cca.fit(X, Y)\n CCA(n_components=1)\n >>> X_c, Y_c = cca.transform(X, Y)\n\n See Also\n --------\n PLSCanonical\n PLSSVD\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, n_components=2, *, scale=True,\n max_iter=500, tol=1e-06, copy=True):\n super().__init__(n_components=n_components, scale=scale,\n deflation_mode=\"canonical\", mode=\"B\",\n algorithm=\"nipals\", max_iter=max_iter, tol=tol,\n copy=copy)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSCanonical", + "name": "PLSCanonical", + "qname": "sklearn.cross_decomposition._pls.PLSCanonical", + "decorators": [], + "superclasses": ["_PLS"], + "methods": ["scikit-learn/sklearn.cross_decomposition._pls/PLSCanonical/__init__"], + "is_public": false, + "reexported_by": [], + "description": "Partial Least Squares transformer and regressor.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.8", + "docstring": "Partial Least Squares transformer and regressor.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.8\n\nParameters\n----------\nn_components : int, default=2\n Number of components to keep. Should be in `[1, min(n_samples,\n n_features, n_targets)]`.\n\nscale : bool, default=True\n Whether to scale `X` and `Y`.\n\nalgorithm : {'nipals', 'svd'}, default='nipals'\n The algorithm used to estimate the first singular vectors of the\n cross-covariance matrix. 'nipals' uses the power method while 'svd'\n will compute the whole SVD.\n\nmax_iter : int, default=500\n the maximum number of iterations of the power method when\n `algorithm='nipals'`. Ignored otherwise.\n\ntol : float, default=1e-06\n The tolerance used as convergence criteria in the power method: the\n algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\n than `tol`, where `u` corresponds to the left singular vector.\n\ncopy : bool, default=True\n Whether to copy `X` and `Y` in fit before applying centering, and\n potentially scaling. If False, these operations will be done inplace,\n modifying both arrays.\n\nAttributes\n----------\nx_weights_ : ndarray of shape (n_features, n_components)\n The left singular vectors of the cross-covariance matrices of each\n iteration.\n\ny_weights_ : ndarray of shape (n_targets, n_components)\n The right singular vectors of the cross-covariance matrices of each\n iteration.\n\nx_loadings_ : ndarray of shape (n_features, n_components)\n The loadings of `X`.\n\ny_loadings_ : ndarray of shape (n_targets, n_components)\n The loadings of `Y`.\n\nx_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training samples.\n\n .. deprecated:: 0.24\n `x_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\ny_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training targets.\n\n .. deprecated:: 0.24\n `y_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\nx_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `X`.\n\ny_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `Y`.\n\ncoef_ : ndarray of shape (n_features, n_targets)\n The coefficients of the linear model such that `Y` is approximated as\n `Y = X @ coef_`.\n\nn_iter_ : list of shape (n_components,)\n Number of iterations of the power method, for each\n component. Empty if `algorithm='svd'`.\n\nn_features_in_ : int\n Number of features seen during :term:`fit`.\n\nExamples\n--------\n>>> from sklearn.cross_decomposition import PLSCanonical\n>>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]\n>>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]\n>>> plsca = PLSCanonical(n_components=2)\n>>> plsca.fit(X, Y)\nPLSCanonical()\n>>> X_c, Y_c = plsca.transform(X, Y)\n\nSee Also\n--------\nCCA\nPLSSVD", + "code": "class PLSCanonical(_PLS):\n \"\"\"Partial Least Squares transformer and regressor.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.8\n\n Parameters\n ----------\n n_components : int, default=2\n Number of components to keep. Should be in `[1, min(n_samples,\n n_features, n_targets)]`.\n\n scale : bool, default=True\n Whether to scale `X` and `Y`.\n\n algorithm : {'nipals', 'svd'}, default='nipals'\n The algorithm used to estimate the first singular vectors of the\n cross-covariance matrix. 'nipals' uses the power method while 'svd'\n will compute the whole SVD.\n\n max_iter : int, default=500\n the maximum number of iterations of the power method when\n `algorithm='nipals'`. Ignored otherwise.\n\n tol : float, default=1e-06\n The tolerance used as convergence criteria in the power method: the\n algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\n than `tol`, where `u` corresponds to the left singular vector.\n\n copy : bool, default=True\n Whether to copy `X` and `Y` in fit before applying centering, and\n potentially scaling. If False, these operations will be done inplace,\n modifying both arrays.\n\n Attributes\n ----------\n x_weights_ : ndarray of shape (n_features, n_components)\n The left singular vectors of the cross-covariance matrices of each\n iteration.\n\n y_weights_ : ndarray of shape (n_targets, n_components)\n The right singular vectors of the cross-covariance matrices of each\n iteration.\n\n x_loadings_ : ndarray of shape (n_features, n_components)\n The loadings of `X`.\n\n y_loadings_ : ndarray of shape (n_targets, n_components)\n The loadings of `Y`.\n\n x_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training samples.\n\n .. deprecated:: 0.24\n `x_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\n y_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training targets.\n\n .. deprecated:: 0.24\n `y_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\n x_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `X`.\n\n y_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `Y`.\n\n coef_ : ndarray of shape (n_features, n_targets)\n The coefficients of the linear model such that `Y` is approximated as\n `Y = X @ coef_`.\n\n n_iter_ : list of shape (n_components,)\n Number of iterations of the power method, for each\n component. Empty if `algorithm='svd'`.\n\n n_features_in_ : int\n Number of features seen during :term:`fit`.\n\n Examples\n --------\n >>> from sklearn.cross_decomposition import PLSCanonical\n >>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]\n >>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]\n >>> plsca = PLSCanonical(n_components=2)\n >>> plsca.fit(X, Y)\n PLSCanonical()\n >>> X_c, Y_c = plsca.transform(X, Y)\n\n See Also\n --------\n CCA\n PLSSVD\n \"\"\"\n # This implementation provides the same results that the \"plspm\" package\n # provided in the R language (R-project), using the function plsca(X, Y).\n # Results are equal or collinear with the function\n # ``pls(..., mode = \"canonical\")`` of the \"mixOmics\" package. The\n # difference relies in the fact that mixOmics implementation does not\n # exactly implement the Wold algorithm since it does not normalize\n # y_weights to one.\n\n @_deprecate_positional_args\n def __init__(self, n_components=2, *, scale=True, algorithm=\"nipals\",\n max_iter=500, tol=1e-06, copy=True):\n super().__init__(\n n_components=n_components, scale=scale,\n deflation_mode=\"canonical\", mode=\"A\",\n algorithm=algorithm,\n max_iter=max_iter, tol=tol, copy=copy)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSRegression", + "name": "PLSRegression", + "qname": "sklearn.cross_decomposition._pls.PLSRegression", + "decorators": [], + "superclasses": ["_PLS"], + "methods": ["scikit-learn/sklearn.cross_decomposition._pls/PLSRegression/__init__"], + "is_public": false, + "reexported_by": [], + "description": "PLS regression\n\nPLSRegression is also known as PLS2 or PLS1, depending on the number of\ntargets.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.8", + "docstring": "PLS regression\n\nPLSRegression is also known as PLS2 or PLS1, depending on the number of\ntargets.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.8\n\nParameters\n----------\nn_components : int, default=2\n Number of components to keep. Should be in `[1, min(n_samples,\n n_features, n_targets)]`.\n\nscale : bool, default=True\n Whether to scale `X` and `Y`.\n\nmax_iter : int, default=500\n The maximum number of iterations of the power method when\n `algorithm='nipals'`. Ignored otherwise.\n\ntol : float, default=1e-06\n The tolerance used as convergence criteria in the power method: the\n algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\n than `tol`, where `u` corresponds to the left singular vector.\n\ncopy : bool, default=True\n Whether to copy `X` and `Y` in fit before applying centering, and\n potentially scaling. If False, these operations will be done inplace,\n modifying both arrays.\n\nAttributes\n----------\nx_weights_ : ndarray of shape (n_features, n_components)\n The left singular vectors of the cross-covariance matrices of each\n iteration.\n\ny_weights_ : ndarray of shape (n_targets, n_components)\n The right singular vectors of the cross-covariance matrices of each\n iteration.\n\nx_loadings_ : ndarray of shape (n_features, n_components)\n The loadings of `X`.\n\ny_loadings_ : ndarray of shape (n_targets, n_components)\n The loadings of `Y`.\n\nx_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training samples.\n\ny_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training targets.\n\nx_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `X`.\n\ny_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `Y`.\n\ncoef_ : ndarray of shape (n_features, n_targets)\n The coefficients of the linear model such that `Y` is approximated as\n `Y = X @ coef_`.\n\nn_iter_ : list of shape (n_components,)\n Number of iterations of the power method, for each\n component.\n\nn_features_in_ : int\n Number of features seen during :term:`fit`.\n\nExamples\n--------\n>>> from sklearn.cross_decomposition import PLSRegression\n>>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]\n>>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]\n>>> pls2 = PLSRegression(n_components=2)\n>>> pls2.fit(X, Y)\nPLSRegression()\n>>> Y_pred = pls2.predict(X)", + "code": "class PLSRegression(_PLS):\n \"\"\"PLS regression\n\n PLSRegression is also known as PLS2 or PLS1, depending on the number of\n targets.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.8\n\n Parameters\n ----------\n n_components : int, default=2\n Number of components to keep. Should be in `[1, min(n_samples,\n n_features, n_targets)]`.\n\n scale : bool, default=True\n Whether to scale `X` and `Y`.\n\n max_iter : int, default=500\n The maximum number of iterations of the power method when\n `algorithm='nipals'`. Ignored otherwise.\n\n tol : float, default=1e-06\n The tolerance used as convergence criteria in the power method: the\n algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\n than `tol`, where `u` corresponds to the left singular vector.\n\n copy : bool, default=True\n Whether to copy `X` and `Y` in fit before applying centering, and\n potentially scaling. If False, these operations will be done inplace,\n modifying both arrays.\n\n Attributes\n ----------\n x_weights_ : ndarray of shape (n_features, n_components)\n The left singular vectors of the cross-covariance matrices of each\n iteration.\n\n y_weights_ : ndarray of shape (n_targets, n_components)\n The right singular vectors of the cross-covariance matrices of each\n iteration.\n\n x_loadings_ : ndarray of shape (n_features, n_components)\n The loadings of `X`.\n\n y_loadings_ : ndarray of shape (n_targets, n_components)\n The loadings of `Y`.\n\n x_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training samples.\n\n y_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training targets.\n\n x_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `X`.\n\n y_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `Y`.\n\n coef_ : ndarray of shape (n_features, n_targets)\n The coefficients of the linear model such that `Y` is approximated as\n `Y = X @ coef_`.\n\n n_iter_ : list of shape (n_components,)\n Number of iterations of the power method, for each\n component.\n\n n_features_in_ : int\n Number of features seen during :term:`fit`.\n\n Examples\n --------\n >>> from sklearn.cross_decomposition import PLSRegression\n >>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]\n >>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]\n >>> pls2 = PLSRegression(n_components=2)\n >>> pls2.fit(X, Y)\n PLSRegression()\n >>> Y_pred = pls2.predict(X)\n \"\"\"\n\n # This implementation provides the same results that 3 PLS packages\n # provided in the R language (R-project):\n # - \"mixOmics\" with function pls(X, Y, mode = \"regression\")\n # - \"plspm \" with function plsreg2(X, Y)\n # - \"pls\" with function oscorespls.fit(X, Y)\n\n @_deprecate_positional_args\n def __init__(self, n_components=2, *, scale=True,\n max_iter=500, tol=1e-06, copy=True):\n super().__init__(\n n_components=n_components, scale=scale,\n deflation_mode=\"regression\", mode=\"A\",\n algorithm='nipals', max_iter=max_iter,\n tol=tol, copy=copy)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD", + "name": "PLSSVD", + "qname": "sklearn.cross_decomposition._pls.PLSSVD", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/__init__", + "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/fit", + "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/x_scores_@getter", + "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/y_scores_@getter", + "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/x_mean_@getter", + "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/y_mean_@getter", + "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/x_std_@getter", + "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/y_std_@getter", + "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/transform", + "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/fit_transform" + ], + "is_public": false, + "reexported_by": [], + "description": "Partial Least Square SVD.\n\nThis transformer simply performs a SVD on the crosscovariance matrix X'Y.\nIt is able to project both the training data `X` and the targets `Y`. The\ntraining data X is projected on the left singular vectors, while the\ntargets are projected on the right singular vectors.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.8", + "docstring": "Partial Least Square SVD.\n\nThis transformer simply performs a SVD on the crosscovariance matrix X'Y.\nIt is able to project both the training data `X` and the targets `Y`. The\ntraining data X is projected on the left singular vectors, while the\ntargets are projected on the right singular vectors.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.8\n\nParameters\n----------\nn_components : int, default=2\n The number of components to keep. Should be in `[1,\n min(n_samples, n_features, n_targets)]`.\n\nscale : bool, default=True\n Whether to scale `X` and `Y`.\n\ncopy : bool, default=True\n Whether to copy `X` and `Y` in fit before applying centering, and\n potentially scaling. If False, these operations will be done inplace,\n modifying both arrays.\n\nAttributes\n----------\nx_weights_ : ndarray of shape (n_features, n_components)\n The left singular vectors of the SVD of the cross-covariance matrix.\n Used to project `X` in `transform`.\n\ny_weights_ : ndarray of (n_targets, n_components)\n The right singular vectors of the SVD of the cross-covariance matrix.\n Used to project `X` in `transform`.\n\nx_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training samples.\n\n .. deprecated:: 0.24\n `x_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\ny_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training targets.\n\n .. deprecated:: 0.24\n `y_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\nn_features_in_ : int\n Number of features seen during :term:`fit`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.cross_decomposition import PLSSVD\n>>> X = np.array([[0., 0., 1.],\n... [1., 0., 0.],\n... [2., 2., 2.],\n... [2., 5., 4.]])\n>>> Y = np.array([[0.1, -0.2],\n... [0.9, 1.1],\n... [6.2, 5.9],\n... [11.9, 12.3]])\n>>> pls = PLSSVD(n_components=2).fit(X, Y)\n>>> X_c, Y_c = pls.transform(X, Y)\n>>> X_c.shape, Y_c.shape\n((4, 2), (4, 2))\n\nSee Also\n--------\nPLSCanonical\nCCA", + "code": "class PLSSVD(TransformerMixin, BaseEstimator):\n \"\"\"Partial Least Square SVD.\n\n This transformer simply performs a SVD on the crosscovariance matrix X'Y.\n It is able to project both the training data `X` and the targets `Y`. The\n training data X is projected on the left singular vectors, while the\n targets are projected on the right singular vectors.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.8\n\n Parameters\n ----------\n n_components : int, default=2\n The number of components to keep. Should be in `[1,\n min(n_samples, n_features, n_targets)]`.\n\n scale : bool, default=True\n Whether to scale `X` and `Y`.\n\n copy : bool, default=True\n Whether to copy `X` and `Y` in fit before applying centering, and\n potentially scaling. If False, these operations will be done inplace,\n modifying both arrays.\n\n Attributes\n ----------\n x_weights_ : ndarray of shape (n_features, n_components)\n The left singular vectors of the SVD of the cross-covariance matrix.\n Used to project `X` in `transform`.\n\n y_weights_ : ndarray of (n_targets, n_components)\n The right singular vectors of the SVD of the cross-covariance matrix.\n Used to project `X` in `transform`.\n\n x_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training samples.\n\n .. deprecated:: 0.24\n `x_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\n y_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training targets.\n\n .. deprecated:: 0.24\n `y_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\n n_features_in_ : int\n Number of features seen during :term:`fit`.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.cross_decomposition import PLSSVD\n >>> X = np.array([[0., 0., 1.],\n ... [1., 0., 0.],\n ... [2., 2., 2.],\n ... [2., 5., 4.]])\n >>> Y = np.array([[0.1, -0.2],\n ... [0.9, 1.1],\n ... [6.2, 5.9],\n ... [11.9, 12.3]])\n >>> pls = PLSSVD(n_components=2).fit(X, Y)\n >>> X_c, Y_c = pls.transform(X, Y)\n >>> X_c.shape, Y_c.shape\n ((4, 2), (4, 2))\n\n See Also\n --------\n PLSCanonical\n CCA\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_components=2, *, scale=True, copy=True):\n self.n_components = n_components\n self.scale = scale\n self.copy = copy\n\n def fit(self, X, Y):\n \"\"\"Fit model to data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training samples.\n\n Y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Targets.\n \"\"\"\n check_consistent_length(X, Y)\n X = self._validate_data(X, dtype=np.float64, copy=self.copy,\n ensure_min_samples=2)\n Y = check_array(Y, dtype=np.float64, copy=self.copy, ensure_2d=False)\n if Y.ndim == 1:\n Y = Y.reshape(-1, 1)\n\n # we'll compute the SVD of the cross-covariance matrix = X.T.dot(Y)\n # This matrix rank is at most min(n_samples, n_features, n_targets) so\n # n_components cannot be bigger than that.\n n_components = self.n_components\n rank_upper_bound = min(X.shape[0], X.shape[1], Y.shape[1])\n if not 1 <= n_components <= rank_upper_bound:\n # TODO: raise an error in 1.1\n warnings.warn(\n f\"As of version 0.24, n_components({n_components}) should be \"\n f\"in [1, min(n_features, n_samples, n_targets)] = \"\n f\"[1, {rank_upper_bound}]. \"\n f\"n_components={rank_upper_bound} will be used instead. \"\n f\"In version 1.1 (renaming of 0.26), an error will be raised.\",\n FutureWarning\n )\n n_components = rank_upper_bound\n\n X, Y, self._x_mean, self._y_mean, self._x_std, self._y_std = (\n _center_scale_xy(X, Y, self.scale))\n\n # Compute SVD of cross-covariance matrix\n C = np.dot(X.T, Y)\n U, s, Vt = svd(C, full_matrices=False)\n U = U[:, :n_components]\n Vt = Vt[:n_components]\n U, Vt = svd_flip(U, Vt)\n V = Vt.T\n\n self._x_scores = np.dot(X, U) # TODO: remove in 1.1\n self._y_scores = np.dot(Y, V) # TODO: remove in 1.1\n self.x_weights_ = U\n self.y_weights_ = V\n return self\n\n # mypy error: Decorated property not supported\n @deprecated( # type: ignore\n \"Attribute x_scores_ was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26). Use est.transform(X) on \"\n \"the training data instead.\"\n )\n @property\n def x_scores_(self):\n return self._x_scores\n\n # mypy error: Decorated property not supported\n @deprecated( # type: ignore\n \"Attribute y_scores_ was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26). Use est.transform(X, Y) \"\n \"on the training data instead.\"\n )\n @property\n def y_scores_(self):\n return self._y_scores\n\n @deprecated( # type: ignore\n \"Attribute x_mean_ was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26).\")\n @property\n def x_mean_(self):\n return self._x_mean\n\n @deprecated( # type: ignore\n \"Attribute y_mean_ was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26).\")\n @property\n def y_mean_(self):\n return self._y_mean\n\n @deprecated( # type: ignore\n \"Attribute x_std_ was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26).\")\n @property\n def x_std_(self):\n return self._x_std\n\n @deprecated( # type: ignore\n \"Attribute y_std_ was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26).\")\n @property\n def y_std_(self):\n return self._y_std\n\n def transform(self, X, Y=None):\n \"\"\"\n Apply the dimensionality reduction.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Samples to be transformed.\n\n Y : array-like of shape (n_samples,) or (n_samples, n_targets), \\\n default=None\n Targets.\n\n Returns\n -------\n out : array-like or tuple of array-like\n The transformed data `X_tranformed` if `Y` is not None,\n `(X_transformed, Y_transformed)` otherwise.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, dtype=np.float64)\n Xr = (X - self._x_mean) / self._x_std\n x_scores = np.dot(Xr, self.x_weights_)\n if Y is not None:\n Y = check_array(Y, ensure_2d=False, dtype=np.float64)\n if Y.ndim == 1:\n Y = Y.reshape(-1, 1)\n Yr = (Y - self._y_mean) / self._y_std\n y_scores = np.dot(Yr, self.y_weights_)\n return x_scores, y_scores\n return x_scores\n\n def fit_transform(self, X, y=None):\n \"\"\"Learn and apply the dimensionality reduction.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training samples.\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets), \\\n default=None\n Targets.\n\n Returns\n -------\n out : array-like or tuple of array-like\n The transformed data `X_tranformed` if `Y` is not None,\n `(X_transformed, Y_transformed)` otherwise.\n \"\"\"\n return self.fit(X, y).transform(X, y)", + "instance_attributes": [ + { + "name": "n_components", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "scale", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "copy", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "_x_scores", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "_y_scores", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS", + "name": "_PLS", + "qname": "sklearn.cross_decomposition._pls._PLS", + "decorators": [], + "superclasses": ["TransformerMixin", "RegressorMixin", "MultiOutputMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.cross_decomposition._pls/_PLS/__init__", + "scikit-learn/sklearn.cross_decomposition._pls/_PLS/fit", + "scikit-learn/sklearn.cross_decomposition._pls/_PLS/transform", + "scikit-learn/sklearn.cross_decomposition._pls/_PLS/inverse_transform", + "scikit-learn/sklearn.cross_decomposition._pls/_PLS/predict", + "scikit-learn/sklearn.cross_decomposition._pls/_PLS/fit_transform", + "scikit-learn/sklearn.cross_decomposition._pls/_PLS/norm_y_weights@getter", + "scikit-learn/sklearn.cross_decomposition._pls/_PLS/x_mean_@getter", + "scikit-learn/sklearn.cross_decomposition._pls/_PLS/y_mean_@getter", + "scikit-learn/sklearn.cross_decomposition._pls/_PLS/x_std_@getter", + "scikit-learn/sklearn.cross_decomposition._pls/_PLS/y_std_@getter", + "scikit-learn/sklearn.cross_decomposition._pls/_PLS/x_scores_@getter", + "scikit-learn/sklearn.cross_decomposition._pls/_PLS/y_scores_@getter", + "scikit-learn/sklearn.cross_decomposition._pls/_PLS/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Partial Least Squares (PLS)\n\nThis class implements the generic PLS algorithm.\n\nMain ref: Wegelin, a survey of Partial Least Squares (PLS) methods,\nwith emphasis on the two-block case\nhttps://www.stat.washington.edu/research/reports/2000/tr371.pdf", + "docstring": "Partial Least Squares (PLS)\n\nThis class implements the generic PLS algorithm.\n\nMain ref: Wegelin, a survey of Partial Least Squares (PLS) methods,\nwith emphasis on the two-block case\nhttps://www.stat.washington.edu/research/reports/2000/tr371.pdf", + "code": "class _PLS(TransformerMixin, RegressorMixin, MultiOutputMixin, BaseEstimator,\n metaclass=ABCMeta):\n \"\"\"Partial Least Squares (PLS)\n\n This class implements the generic PLS algorithm.\n\n Main ref: Wegelin, a survey of Partial Least Squares (PLS) methods,\n with emphasis on the two-block case\n https://www.stat.washington.edu/research/reports/2000/tr371.pdf\n \"\"\"\n\n @abstractmethod\n def __init__(self, n_components=2, *, scale=True,\n deflation_mode=\"regression\",\n mode=\"A\", algorithm=\"nipals\", max_iter=500, tol=1e-06,\n copy=True):\n self.n_components = n_components\n self.deflation_mode = deflation_mode\n self.mode = mode\n self.scale = scale\n self.algorithm = algorithm\n self.max_iter = max_iter\n self.tol = tol\n self.copy = copy\n\n def fit(self, X, Y):\n \"\"\"Fit model to data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of predictors.\n\n Y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target vectors, where `n_samples` is the number of samples and\n `n_targets` is the number of response variables.\n \"\"\"\n\n check_consistent_length(X, Y)\n X = self._validate_data(X, dtype=np.float64, copy=self.copy,\n ensure_min_samples=2)\n Y = check_array(Y, dtype=np.float64, copy=self.copy, ensure_2d=False)\n if Y.ndim == 1:\n Y = Y.reshape(-1, 1)\n\n n = X.shape[0]\n p = X.shape[1]\n q = Y.shape[1]\n\n n_components = self.n_components\n if self.deflation_mode == 'regression':\n # With PLSRegression n_components is bounded by the rank of (X.T X)\n # see Wegelin page 25\n rank_upper_bound = p\n if not 1 <= n_components <= rank_upper_bound:\n # TODO: raise an error in 1.1\n warnings.warn(\n f\"As of version 0.24, n_components({n_components}) should \"\n f\"be in [1, n_features].\"\n f\"n_components={rank_upper_bound} will be used instead. \"\n f\"In version 1.1 (renaming of 0.26), an error will be \"\n f\"raised.\",\n FutureWarning\n )\n n_components = rank_upper_bound\n else:\n # With CCA and PLSCanonical, n_components is bounded by the rank of\n # X and the rank of Y: see Wegelin page 12\n rank_upper_bound = min(n, p, q)\n if not 1 <= self.n_components <= rank_upper_bound:\n # TODO: raise an error in 1.1\n warnings.warn(\n f\"As of version 0.24, n_components({n_components}) should \"\n f\"be in [1, min(n_features, n_samples, n_targets)] = \"\n f\"[1, {rank_upper_bound}]. \"\n f\"n_components={rank_upper_bound} will be used instead. \"\n f\"In version 1.1 (renaming of 0.26), an error will be \"\n f\"raised.\",\n FutureWarning\n )\n n_components = rank_upper_bound\n\n if self.algorithm not in (\"svd\", \"nipals\"):\n raise ValueError(\"algorithm should be 'svd' or 'nipals', got \"\n f\"{self.algorithm}.\")\n\n self._norm_y_weights = (self.deflation_mode == 'canonical') # 1.1\n norm_y_weights = self._norm_y_weights\n\n # Scale (in place)\n Xk, Yk, self._x_mean, self._y_mean, self._x_std, self._y_std = (\n _center_scale_xy(X, Y, self.scale))\n\n self.x_weights_ = np.zeros((p, n_components)) # U\n self.y_weights_ = np.zeros((q, n_components)) # V\n self._x_scores = np.zeros((n, n_components)) # Xi\n self._y_scores = np.zeros((n, n_components)) # Omega\n self.x_loadings_ = np.zeros((p, n_components)) # Gamma\n self.y_loadings_ = np.zeros((q, n_components)) # Delta\n self.n_iter_ = []\n\n # This whole thing corresponds to the algorithm in section 4.1 of the\n # review from Wegelin. See above for a notation mapping from code to\n # paper.\n Y_eps = np.finfo(Yk.dtype).eps\n for k in range(n_components):\n # Find first left and right singular vectors of the X.T.dot(Y)\n # cross-covariance matrix.\n if self.algorithm == \"nipals\":\n # Replace columns that are all close to zero with zeros\n Yk_mask = np.all(np.abs(Yk) < 10 * Y_eps, axis=0)\n Yk[:, Yk_mask] = 0.0\n\n try:\n x_weights, y_weights, n_iter_ = \\\n _get_first_singular_vectors_power_method(\n Xk, Yk, mode=self.mode, max_iter=self.max_iter,\n tol=self.tol, norm_y_weights=norm_y_weights)\n except StopIteration as e:\n if str(e) != \"Y residual is constant\":\n raise\n warnings.warn(f\"Y residual is constant at iteration {k}\")\n break\n\n self.n_iter_.append(n_iter_)\n\n elif self.algorithm == \"svd\":\n x_weights, y_weights = _get_first_singular_vectors_svd(Xk, Yk)\n\n # inplace sign flip for consistency across solvers and archs\n _svd_flip_1d(x_weights, y_weights)\n\n # compute scores, i.e. the projections of X and Y\n x_scores = np.dot(Xk, x_weights)\n if norm_y_weights:\n y_ss = 1\n else:\n y_ss = np.dot(y_weights, y_weights)\n y_scores = np.dot(Yk, y_weights) / y_ss\n\n # Deflation: subtract rank-one approx to obtain Xk+1 and Yk+1\n x_loadings = np.dot(x_scores, Xk) / np.dot(x_scores, x_scores)\n Xk -= np.outer(x_scores, x_loadings)\n\n if self.deflation_mode == \"canonical\":\n # regress Yk on y_score\n y_loadings = np.dot(y_scores, Yk) / np.dot(y_scores, y_scores)\n Yk -= np.outer(y_scores, y_loadings)\n if self.deflation_mode == \"regression\":\n # regress Yk on x_score\n y_loadings = np.dot(x_scores, Yk) / np.dot(x_scores, x_scores)\n Yk -= np.outer(x_scores, y_loadings)\n\n self.x_weights_[:, k] = x_weights\n self.y_weights_[:, k] = y_weights\n self._x_scores[:, k] = x_scores\n self._y_scores[:, k] = y_scores\n self.x_loadings_[:, k] = x_loadings\n self.y_loadings_[:, k] = y_loadings\n\n # X was approximated as Xi . Gamma.T + X_(R+1)\n # Xi . Gamma.T is a sum of n_components rank-1 matrices. X_(R+1) is\n # whatever is left to fully reconstruct X, and can be 0 if X is of rank\n # n_components.\n # Similiarly, Y was approximated as Omega . Delta.T + Y_(R+1)\n\n # Compute transformation matrices (rotations_). See User Guide.\n self.x_rotations_ = np.dot(\n self.x_weights_,\n pinv2(np.dot(self.x_loadings_.T, self.x_weights_),\n check_finite=False))\n self.y_rotations_ = np.dot(\n self.y_weights_, pinv2(np.dot(self.y_loadings_.T, self.y_weights_),\n check_finite=False))\n\n self.coef_ = np.dot(self.x_rotations_, self.y_loadings_.T)\n self.coef_ = self.coef_ * self._y_std\n return self\n\n def transform(self, X, Y=None, copy=True):\n \"\"\"Apply the dimension reduction.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Samples to transform.\n\n Y : array-like of shape (n_samples, n_targets), default=None\n Target vectors.\n\n copy : bool, default=True\n Whether to copy `X` and `Y`, or perform in-place normalization.\n\n Returns\n -------\n `x_scores` if `Y` is not given, `(x_scores, y_scores)` otherwise.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, copy=copy, dtype=FLOAT_DTYPES)\n # Normalize\n X -= self._x_mean\n X /= self._x_std\n # Apply rotation\n x_scores = np.dot(X, self.x_rotations_)\n if Y is not None:\n Y = check_array(Y, ensure_2d=False, copy=copy, dtype=FLOAT_DTYPES)\n if Y.ndim == 1:\n Y = Y.reshape(-1, 1)\n Y -= self._y_mean\n Y /= self._y_std\n y_scores = np.dot(Y, self.y_rotations_)\n return x_scores, y_scores\n\n return x_scores\n\n def inverse_transform(self, X):\n \"\"\"Transform data back to its original space.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_components)\n New data, where `n_samples` is the number of samples\n and `n_components` is the number of pls components.\n\n Returns\n -------\n x_reconstructed : array-like of shape (n_samples, n_features)\n\n Notes\n -----\n This transformation will only be exact if `n_components=n_features`.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, dtype=FLOAT_DTYPES)\n # From pls space to original space\n X_reconstructed = np.matmul(X, self.x_loadings_.T)\n\n # Denormalize\n X_reconstructed *= self._x_std\n X_reconstructed += self._x_mean\n return X_reconstructed\n\n def predict(self, X, copy=True):\n \"\"\"Predict targets of given samples.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Samples.\n\n copy : bool, default=True\n Whether to copy `X` and `Y`, or perform in-place normalization.\n\n Notes\n -----\n This call requires the estimation of a matrix of shape\n `(n_features, n_targets)`, which may be an issue in high dimensional\n space.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, copy=copy, dtype=FLOAT_DTYPES)\n # Normalize\n X -= self._x_mean\n X /= self._x_std\n Ypred = np.dot(X, self.coef_)\n return Ypred + self._y_mean\n\n def fit_transform(self, X, y=None):\n \"\"\"Learn and apply the dimension reduction on the train data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of predictors.\n\n y : array-like of shape (n_samples, n_targets), default=None\n Target vectors, where n_samples is the number of samples and\n n_targets is the number of response variables.\n\n Returns\n -------\n x_scores if Y is not given, (x_scores, y_scores) otherwise.\n \"\"\"\n return self.fit(X, y).transform(X, y)\n\n # mypy error: Decorated property not supported\n @deprecated( # type: ignore\n \"Attribute norm_y_weights was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26).\")\n @property\n def norm_y_weights(self):\n return self._norm_y_weights\n\n @deprecated( # type: ignore\n \"Attribute x_mean_ was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26).\")\n @property\n def x_mean_(self):\n return self._x_mean\n\n @deprecated( # type: ignore\n \"Attribute y_mean_ was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26).\")\n @property\n def y_mean_(self):\n return self._y_mean\n\n @deprecated( # type: ignore\n \"Attribute x_std_ was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26).\")\n @property\n def x_std_(self):\n return self._x_std\n\n @deprecated( # type: ignore\n \"Attribute y_std_ was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26).\")\n @property\n def y_std_(self):\n return self._y_std\n\n @property\n def x_scores_(self):\n # TODO: raise error in 1.1 instead\n if not isinstance(self, PLSRegression):\n pass\n warnings.warn(\n \"Attribute x_scores_ was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26). Use \"\n \"est.transform(X) on the training data instead.\",\n FutureWarning\n )\n return self._x_scores\n\n @property\n def y_scores_(self):\n # TODO: raise error in 1.1 instead\n if not isinstance(self, PLSRegression):\n warnings.warn(\n \"Attribute y_scores_ was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26). Use \"\n \"est.transform(X) on the training data instead.\",\n FutureWarning\n )\n return self._y_scores\n\n def _more_tags(self):\n return {'poor_score': True,\n 'requires_y': False}", + "instance_attributes": [ + { + "name": "n_components", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "deflation_mode", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "mode", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "scale", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "algorithm", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "copy", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "x_weights_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "y_weights_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "_x_scores", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "_y_scores", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "x_loadings_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "y_loadings_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "n_iter_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "x_rotations_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "y_rotations_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "coef_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.datasets._openml/OpenMLError", + "name": "OpenMLError", + "qname": "sklearn.datasets._openml.OpenMLError", + "decorators": [], + "superclasses": ["ValueError"], + "methods": [], + "is_public": false, + "reexported_by": [], + "description": "HTTP 412 is a specific OpenML error code, indicating a generic error", + "docstring": "HTTP 412 is a specific OpenML error code, indicating a generic error", + "code": "class OpenMLError(ValueError):\n \"\"\"HTTP 412 is a specific OpenML error code, indicating a generic error\"\"\"\n pass", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.decomposition._base/_BasePCA", + "name": "_BasePCA", + "qname": "sklearn.decomposition._base._BasePCA", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.decomposition._base/_BasePCA/get_covariance", + "scikit-learn/sklearn.decomposition._base/_BasePCA/get_precision", + "scikit-learn/sklearn.decomposition._base/_BasePCA/fit", + "scikit-learn/sklearn.decomposition._base/_BasePCA/transform", + "scikit-learn/sklearn.decomposition._base/_BasePCA/inverse_transform" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for PCA methods.\n\nWarning: This class should not be used directly.\nUse derived classes instead.", + "docstring": "Base class for PCA methods.\n\nWarning: This class should not be used directly.\nUse derived classes instead.", + "code": "class _BasePCA(TransformerMixin, BaseEstimator, metaclass=ABCMeta):\n \"\"\"Base class for PCA methods.\n\n Warning: This class should not be used directly.\n Use derived classes instead.\n \"\"\"\n def get_covariance(self):\n \"\"\"Compute data covariance with the generative model.\n\n ``cov = components_.T * S**2 * components_ + sigma2 * eye(n_features)``\n where S**2 contains the explained variances, and sigma2 contains the\n noise variances.\n\n Returns\n -------\n cov : array, shape=(n_features, n_features)\n Estimated covariance of data.\n \"\"\"\n components_ = self.components_\n exp_var = self.explained_variance_\n if self.whiten:\n components_ = components_ * np.sqrt(exp_var[:, np.newaxis])\n exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.)\n cov = np.dot(components_.T * exp_var_diff, components_)\n cov.flat[::len(cov) + 1] += self.noise_variance_ # modify diag inplace\n return cov\n\n def get_precision(self):\n \"\"\"Compute data precision matrix with the generative model.\n\n Equals the inverse of the covariance but computed with\n the matrix inversion lemma for efficiency.\n\n Returns\n -------\n precision : array, shape=(n_features, n_features)\n Estimated precision of data.\n \"\"\"\n n_features = self.components_.shape[1]\n\n # handle corner cases first\n if self.n_components_ == 0:\n return np.eye(n_features) / self.noise_variance_\n if self.n_components_ == n_features:\n return linalg.inv(self.get_covariance())\n\n # Get precision using matrix inversion lemma\n components_ = self.components_\n exp_var = self.explained_variance_\n if self.whiten:\n components_ = components_ * np.sqrt(exp_var[:, np.newaxis])\n exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.)\n precision = np.dot(components_, components_.T) / self.noise_variance_\n precision.flat[::len(precision) + 1] += 1. / exp_var_diff\n precision = np.dot(components_.T,\n np.dot(linalg.inv(precision), components_))\n precision /= -(self.noise_variance_ ** 2)\n precision.flat[::len(precision) + 1] += 1. / self.noise_variance_\n return precision\n\n @abstractmethod\n def fit(self, X, y=None):\n \"\"\"Placeholder for fit. Subclasses should implement this method!\n\n Fit the model with X.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n Training data, where n_samples is the number of samples and\n n_features is the number of features.\n\n Returns\n -------\n self : object\n Returns the instance itself.\n \"\"\"\n\n def transform(self, X):\n \"\"\"Apply dimensionality reduction to X.\n\n X is projected on the first principal components previously extracted\n from a training set.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n New data, where n_samples is the number of samples\n and n_features is the number of features.\n\n Returns\n -------\n X_new : array-like, shape (n_samples, n_components)\n \"\"\"\n check_is_fitted(self)\n\n X = self._validate_data(X, dtype=[np.float64, np.float32], reset=False)\n if self.mean_ is not None:\n X = X - self.mean_\n X_transformed = np.dot(X, self.components_.T)\n if self.whiten:\n X_transformed /= np.sqrt(self.explained_variance_)\n return X_transformed\n\n def inverse_transform(self, X):\n \"\"\"Transform data back to its original space.\n\n In other words, return an input X_original whose transform would be X.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_components)\n New data, where n_samples is the number of samples\n and n_components is the number of components.\n\n Returns\n -------\n X_original array-like, shape (n_samples, n_features)\n\n Notes\n -----\n If whitening is enabled, inverse_transform will compute the\n exact inverse operation, which includes reversing whitening.\n \"\"\"\n if self.whiten:\n return np.dot(X, np.sqrt(self.explained_variance_[:, np.newaxis]) *\n self.components_) + self.mean_\n else:\n return np.dot(X, self.components_) + self.mean_", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning", + "name": "DictionaryLearning", + "qname": "sklearn.decomposition._dict_learning.DictionaryLearning", + "decorators": [], + "superclasses": ["_BaseSparseCoding", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/__init__", + "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Dictionary learning\n\nFinds a dictionary (a set of atoms) that can best be used to represent data\nusing a sparse code.\n\nSolves the optimization problem::\n\n (U^*,V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1\n (U,V)\n with || V_k ||_2 = 1 for all 0 <= k < n_components\n\nRead more in the :ref:`User Guide `.", + "docstring": "Dictionary learning\n\nFinds a dictionary (a set of atoms) that can best be used to represent data\nusing a sparse code.\n\nSolves the optimization problem::\n\n (U^*,V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1\n (U,V)\n with || V_k ||_2 = 1 for all 0 <= k < n_components\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=n_features\n Number of dictionary elements to extract.\n\nalpha : float, default=1.0\n Sparsity controlling parameter.\n\nmax_iter : int, default=1000\n Maximum number of iterations to perform.\n\ntol : float, default=1e-8\n Tolerance for numerical error.\n\nfit_algorithm : {'lars', 'cd'}, default='lars'\n * `'lars'`: uses the least angle regression method to solve the lasso\n problem (:func:`~sklearn.linear_model.lars_path`);\n * `'cd'`: uses the coordinate descent method to compute the\n Lasso solution (:class:`~sklearn.linear_model.Lasso`). Lars will be\n faster if the estimated components are sparse.\n\n .. versionadded:: 0.17\n *cd* coordinate descent method to improve speed.\n\ntransform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}, default='omp'\n Algorithm used to transform the data:\n\n - `'lars'`: uses the least angle regression method\n (:func:`~sklearn.linear_model.lars_path`);\n - `'lasso_lars'`: uses Lars to compute the Lasso solution.\n - `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (:class:`~sklearn.linear_model.Lasso`). `'lasso_lars'`\n will be faster if the estimated components are sparse.\n - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution.\n - `'threshold'`: squashes to zero all coefficients less than alpha from\n the projection ``dictionary * X'``.\n\n .. versionadded:: 0.17\n *lasso_cd* coordinate descent method to improve speed.\n\ntransform_n_nonzero_coefs : int, default=None\n Number of nonzero coefficients to target in each column of the\n solution. This is only used by `algorithm='lars'` and `algorithm='omp'`\n and is overridden by `alpha` in the `omp` case. If `None`, then\n `transform_n_nonzero_coefs=int(n_features / 10)`.\n\ntransform_alpha : float, default=None\n If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n penalty applied to the L1 norm.\n If `algorithm='threshold'`, `alpha` is the absolute value of the\n threshold below which coefficients will be squashed to zero.\n If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\n the reconstruction error targeted. In this case, it overrides\n `n_nonzero_coefs`.\n If `None`, default to 1.0\n\nn_jobs : int or None, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\ncode_init : ndarray of shape (n_samples, n_components), default=None\n Initial value for the code, for warm restart. Only used if `code_init`\n and `dict_init` are not None.\n\ndict_init : ndarray of shape (n_components, n_features), default=None\n Initial values for the dictionary, for warm restart. Only used if\n `code_init` and `dict_init` are not None.\n\nverbose : bool, default=False\n To control the verbosity of the procedure.\n\nsplit_sign : bool, default=False\n Whether to split the sparse feature vector into the concatenation of\n its negative part and its positive part. This can improve the\n performance of downstream classifiers.\n\nrandom_state : int, RandomState instance or None, default=None\n Used for initializing the dictionary when ``dict_init`` is not\n specified, randomly shuffling the data when ``shuffle`` is set to\n ``True``, and updating the dictionary. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\npositive_code : bool, default=False\n Whether to enforce positivity when finding the code.\n\n .. versionadded:: 0.20\n\npositive_dict : bool, default=False\n Whether to enforce positivity when finding the dictionary\n\n .. versionadded:: 0.20\n\ntransform_max_iter : int, default=1000\n Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n `'lasso_lars'`.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n dictionary atoms extracted from the data\n\nerror_ : array\n vector of errors at each iteration\n\nn_iter_ : int\n Number of iterations run.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_sparse_coded_signal\n>>> from sklearn.decomposition import DictionaryLearning\n>>> X, dictionary, code = make_sparse_coded_signal(\n... n_samples=100, n_components=15, n_features=20, n_nonzero_coefs=10,\n... random_state=42,\n... )\n>>> dict_learner = DictionaryLearning(\n... n_components=15, transform_algorithm='lasso_lars', random_state=42,\n... )\n>>> X_transformed = dict_learner.fit_transform(X)\n\nWe can check the level of sparsity of `X_transformed`:\n\n>>> np.mean(X_transformed == 0)\n0.88...\n\nWe can compare the average squared euclidean norm of the reconstruction\nerror of the sparse coded signal relative to the squared euclidean norm of\nthe original signal:\n\n>>> X_hat = X_transformed @ dict_learner.components_\n>>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))\n0.07...\n\nNotes\n-----\n**References:**\n\nJ. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009: Online dictionary learning\nfor sparse coding (https://www.di.ens.fr/sierra/pdfs/icml09.pdf)\n\nSee Also\n--------\nSparseCoder\nMiniBatchDictionaryLearning\nSparsePCA\nMiniBatchSparsePCA", + "code": "class DictionaryLearning(_BaseSparseCoding, BaseEstimator):\n \"\"\"Dictionary learning\n\n Finds a dictionary (a set of atoms) that can best be used to represent data\n using a sparse code.\n\n Solves the optimization problem::\n\n (U^*,V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1\n (U,V)\n with || V_k ||_2 = 1 for all 0 <= k < n_components\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_components : int, default=n_features\n Number of dictionary elements to extract.\n\n alpha : float, default=1.0\n Sparsity controlling parameter.\n\n max_iter : int, default=1000\n Maximum number of iterations to perform.\n\n tol : float, default=1e-8\n Tolerance for numerical error.\n\n fit_algorithm : {'lars', 'cd'}, default='lars'\n * `'lars'`: uses the least angle regression method to solve the lasso\n problem (:func:`~sklearn.linear_model.lars_path`);\n * `'cd'`: uses the coordinate descent method to compute the\n Lasso solution (:class:`~sklearn.linear_model.Lasso`). Lars will be\n faster if the estimated components are sparse.\n\n .. versionadded:: 0.17\n *cd* coordinate descent method to improve speed.\n\n transform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', \\\n 'threshold'}, default='omp'\n Algorithm used to transform the data:\n\n - `'lars'`: uses the least angle regression method\n (:func:`~sklearn.linear_model.lars_path`);\n - `'lasso_lars'`: uses Lars to compute the Lasso solution.\n - `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (:class:`~sklearn.linear_model.Lasso`). `'lasso_lars'`\n will be faster if the estimated components are sparse.\n - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution.\n - `'threshold'`: squashes to zero all coefficients less than alpha from\n the projection ``dictionary * X'``.\n\n .. versionadded:: 0.17\n *lasso_cd* coordinate descent method to improve speed.\n\n transform_n_nonzero_coefs : int, default=None\n Number of nonzero coefficients to target in each column of the\n solution. This is only used by `algorithm='lars'` and `algorithm='omp'`\n and is overridden by `alpha` in the `omp` case. If `None`, then\n `transform_n_nonzero_coefs=int(n_features / 10)`.\n\n transform_alpha : float, default=None\n If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n penalty applied to the L1 norm.\n If `algorithm='threshold'`, `alpha` is the absolute value of the\n threshold below which coefficients will be squashed to zero.\n If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\n the reconstruction error targeted. In this case, it overrides\n `n_nonzero_coefs`.\n If `None`, default to 1.0\n\n n_jobs : int or None, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n code_init : ndarray of shape (n_samples, n_components), default=None\n Initial value for the code, for warm restart. Only used if `code_init`\n and `dict_init` are not None.\n\n dict_init : ndarray of shape (n_components, n_features), default=None\n Initial values for the dictionary, for warm restart. Only used if\n `code_init` and `dict_init` are not None.\n\n verbose : bool, default=False\n To control the verbosity of the procedure.\n\n split_sign : bool, default=False\n Whether to split the sparse feature vector into the concatenation of\n its negative part and its positive part. This can improve the\n performance of downstream classifiers.\n\n random_state : int, RandomState instance or None, default=None\n Used for initializing the dictionary when ``dict_init`` is not\n specified, randomly shuffling the data when ``shuffle`` is set to\n ``True``, and updating the dictionary. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\n positive_code : bool, default=False\n Whether to enforce positivity when finding the code.\n\n .. versionadded:: 0.20\n\n positive_dict : bool, default=False\n Whether to enforce positivity when finding the dictionary\n\n .. versionadded:: 0.20\n\n transform_max_iter : int, default=1000\n Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n `'lasso_lars'`.\n\n .. versionadded:: 0.22\n\n Attributes\n ----------\n components_ : ndarray of shape (n_components, n_features)\n dictionary atoms extracted from the data\n\n error_ : array\n vector of errors at each iteration\n\n n_iter_ : int\n Number of iterations run.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.datasets import make_sparse_coded_signal\n >>> from sklearn.decomposition import DictionaryLearning\n >>> X, dictionary, code = make_sparse_coded_signal(\n ... n_samples=100, n_components=15, n_features=20, n_nonzero_coefs=10,\n ... random_state=42,\n ... )\n >>> dict_learner = DictionaryLearning(\n ... n_components=15, transform_algorithm='lasso_lars', random_state=42,\n ... )\n >>> X_transformed = dict_learner.fit_transform(X)\n\n We can check the level of sparsity of `X_transformed`:\n\n >>> np.mean(X_transformed == 0)\n 0.88...\n\n We can compare the average squared euclidean norm of the reconstruction\n error of the sparse coded signal relative to the squared euclidean norm of\n the original signal:\n\n >>> X_hat = X_transformed @ dict_learner.components_\n >>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))\n 0.07...\n\n Notes\n -----\n **References:**\n\n J. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009: Online dictionary learning\n for sparse coding (https://www.di.ens.fr/sierra/pdfs/icml09.pdf)\n\n See Also\n --------\n SparseCoder\n MiniBatchDictionaryLearning\n SparsePCA\n MiniBatchSparsePCA\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_components=None, *, alpha=1, max_iter=1000, tol=1e-8,\n fit_algorithm='lars', transform_algorithm='omp',\n transform_n_nonzero_coefs=None, transform_alpha=None,\n n_jobs=None, code_init=None, dict_init=None, verbose=False,\n split_sign=False, random_state=None, positive_code=False,\n positive_dict=False, transform_max_iter=1000):\n\n super().__init__(\n transform_algorithm, transform_n_nonzero_coefs,\n transform_alpha, split_sign, n_jobs, positive_code,\n transform_max_iter\n )\n self.n_components = n_components\n self.alpha = alpha\n self.max_iter = max_iter\n self.tol = tol\n self.fit_algorithm = fit_algorithm\n self.code_init = code_init\n self.dict_init = dict_init\n self.verbose = verbose\n self.random_state = random_state\n self.positive_dict = positive_dict\n\n def fit(self, X, y=None):\n \"\"\"Fit the model from data in X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vector, where `n_samples` in the number of samples\n and `n_features` is the number of features.\n\n y : Ignored\n\n Returns\n -------\n self : object\n Returns the object itself.\n \"\"\"\n random_state = check_random_state(self.random_state)\n X = self._validate_data(X)\n if self.n_components is None:\n n_components = X.shape[1]\n else:\n n_components = self.n_components\n\n V, U, E, self.n_iter_ = dict_learning(\n X, n_components, alpha=self.alpha,\n tol=self.tol, max_iter=self.max_iter,\n method=self.fit_algorithm,\n method_max_iter=self.transform_max_iter,\n n_jobs=self.n_jobs,\n code_init=self.code_init,\n dict_init=self.dict_init,\n verbose=self.verbose,\n random_state=random_state,\n return_n_iter=True,\n positive_dict=self.positive_dict,\n positive_code=self.positive_code)\n self.components_ = U\n self.error_ = E\n return self", + "instance_attributes": [ + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "fit_algorithm", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "positive_dict", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "error_", + "types": { + "kind": "NamedType", + "name": "list" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning", + "name": "MiniBatchDictionaryLearning", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning", + "decorators": [], + "superclasses": ["_BaseSparseCoding", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/__init__", + "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/fit", + "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/partial_fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Mini-batch dictionary learning\n\nFinds a dictionary (a set of atoms) that can best be used to represent data\nusing a sparse code.\n\nSolves the optimization problem::\n\n (U^*,V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1\n (U,V)\n with || V_k ||_2 = 1 for all 0 <= k < n_components\n\nRead more in the :ref:`User Guide `.", + "docstring": "Mini-batch dictionary learning\n\nFinds a dictionary (a set of atoms) that can best be used to represent data\nusing a sparse code.\n\nSolves the optimization problem::\n\n (U^*,V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1\n (U,V)\n with || V_k ||_2 = 1 for all 0 <= k < n_components\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=None\n Number of dictionary elements to extract.\n\nalpha : float, default=1\n Sparsity controlling parameter.\n\nn_iter : int, default=1000\n Total number of iterations to perform.\n\nfit_algorithm : {'lars', 'cd'}, default='lars'\n The algorithm used:\n\n - `'lars'`: uses the least angle regression method to solve the lasso\n problem (`linear_model.lars_path`)\n - `'cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). Lars will be faster if\n the estimated components are sparse.\n\nn_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nbatch_size : int, default=3\n Number of samples in each mini-batch.\n\nshuffle : bool, default=True\n Whether to shuffle the samples before forming batches.\n\ndict_init : ndarray of shape (n_components, n_features), default=None\n initial value of the dictionary for warm restart scenarios\n\ntransform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}, default='omp'\n Algorithm used to transform the data:\n\n - `'lars'`: uses the least angle regression method\n (`linear_model.lars_path`);\n - `'lasso_lars'`: uses Lars to compute the Lasso solution.\n - `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). `'lasso_lars'` will be faster\n if the estimated components are sparse.\n - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution.\n - `'threshold'`: squashes to zero all coefficients less than alpha from\n the projection ``dictionary * X'``.\n\ntransform_n_nonzero_coefs : int, default=None\n Number of nonzero coefficients to target in each column of the\n solution. This is only used by `algorithm='lars'` and `algorithm='omp'`\n and is overridden by `alpha` in the `omp` case. If `None`, then\n `transform_n_nonzero_coefs=int(n_features / 10)`.\n\ntransform_alpha : float, default=None\n If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n penalty applied to the L1 norm.\n If `algorithm='threshold'`, `alpha` is the absolute value of the\n threshold below which coefficients will be squashed to zero.\n If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\n the reconstruction error targeted. In this case, it overrides\n `n_nonzero_coefs`.\n If `None`, default to 1.\n\nverbose : bool, default=False\n To control the verbosity of the procedure.\n\nsplit_sign : bool, default=False\n Whether to split the sparse feature vector into the concatenation of\n its negative part and its positive part. This can improve the\n performance of downstream classifiers.\n\nrandom_state : int, RandomState instance or None, default=None\n Used for initializing the dictionary when ``dict_init`` is not\n specified, randomly shuffling the data when ``shuffle`` is set to\n ``True``, and updating the dictionary. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\npositive_code : bool, default=False\n Whether to enforce positivity when finding the code.\n\n .. versionadded:: 0.20\n\npositive_dict : bool, default=False\n Whether to enforce positivity when finding the dictionary.\n\n .. versionadded:: 0.20\n\ntransform_max_iter : int, default=1000\n Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n `'lasso_lars'`.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Components extracted from the data.\n\ninner_stats_ : tuple of (A, B) ndarrays\n Internal sufficient statistics that are kept by the algorithm.\n Keeping them is useful in online settings, to avoid losing the\n history of the evolution, but they shouldn't have any use for the\n end user.\n `A` `(n_components, n_components)` is the dictionary covariance matrix.\n `B` `(n_features, n_components)` is the data approximation matrix.\n\nn_iter_ : int\n Number of iterations run.\n\niter_offset_ : int\n The number of iteration on data batches that has been\n performed before.\n\nrandom_state_ : RandomState instance\n RandomState instance that is generated either from a seed, the random\n number generattor or by `np.random`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_sparse_coded_signal\n>>> from sklearn.decomposition import MiniBatchDictionaryLearning\n>>> X, dictionary, code = make_sparse_coded_signal(\n... n_samples=100, n_components=15, n_features=20, n_nonzero_coefs=10,\n... random_state=42)\n>>> dict_learner = MiniBatchDictionaryLearning(\n... n_components=15, transform_algorithm='lasso_lars', random_state=42,\n... )\n>>> X_transformed = dict_learner.fit_transform(X)\n\nWe can check the level of sparsity of `X_transformed`:\n\n>>> np.mean(X_transformed == 0)\n0.87...\n\nWe can compare the average squared euclidean norm of the reconstruction\nerror of the sparse coded signal relative to the squared euclidean norm of\nthe original signal:\n\n>>> X_hat = X_transformed @ dict_learner.components_\n>>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))\n0.10...\n\nNotes\n-----\n**References:**\n\nJ. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009: Online dictionary learning\nfor sparse coding (https://www.di.ens.fr/sierra/pdfs/icml09.pdf)\n\nSee Also\n--------\nSparseCoder\nDictionaryLearning\nSparsePCA\nMiniBatchSparsePCA", + "code": "class MiniBatchDictionaryLearning(_BaseSparseCoding, BaseEstimator):\n \"\"\"Mini-batch dictionary learning\n\n Finds a dictionary (a set of atoms) that can best be used to represent data\n using a sparse code.\n\n Solves the optimization problem::\n\n (U^*,V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1\n (U,V)\n with || V_k ||_2 = 1 for all 0 <= k < n_components\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_components : int, default=None\n Number of dictionary elements to extract.\n\n alpha : float, default=1\n Sparsity controlling parameter.\n\n n_iter : int, default=1000\n Total number of iterations to perform.\n\n fit_algorithm : {'lars', 'cd'}, default='lars'\n The algorithm used:\n\n - `'lars'`: uses the least angle regression method to solve the lasso\n problem (`linear_model.lars_path`)\n - `'cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). Lars will be faster if\n the estimated components are sparse.\n\n n_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n batch_size : int, default=3\n Number of samples in each mini-batch.\n\n shuffle : bool, default=True\n Whether to shuffle the samples before forming batches.\n\n dict_init : ndarray of shape (n_components, n_features), default=None\n initial value of the dictionary for warm restart scenarios\n\n transform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', \\\n 'threshold'}, default='omp'\n Algorithm used to transform the data:\n\n - `'lars'`: uses the least angle regression method\n (`linear_model.lars_path`);\n - `'lasso_lars'`: uses Lars to compute the Lasso solution.\n - `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). `'lasso_lars'` will be faster\n if the estimated components are sparse.\n - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution.\n - `'threshold'`: squashes to zero all coefficients less than alpha from\n the projection ``dictionary * X'``.\n\n transform_n_nonzero_coefs : int, default=None\n Number of nonzero coefficients to target in each column of the\n solution. This is only used by `algorithm='lars'` and `algorithm='omp'`\n and is overridden by `alpha` in the `omp` case. If `None`, then\n `transform_n_nonzero_coefs=int(n_features / 10)`.\n\n transform_alpha : float, default=None\n If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n penalty applied to the L1 norm.\n If `algorithm='threshold'`, `alpha` is the absolute value of the\n threshold below which coefficients will be squashed to zero.\n If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\n the reconstruction error targeted. In this case, it overrides\n `n_nonzero_coefs`.\n If `None`, default to 1.\n\n verbose : bool, default=False\n To control the verbosity of the procedure.\n\n split_sign : bool, default=False\n Whether to split the sparse feature vector into the concatenation of\n its negative part and its positive part. This can improve the\n performance of downstream classifiers.\n\n random_state : int, RandomState instance or None, default=None\n Used for initializing the dictionary when ``dict_init`` is not\n specified, randomly shuffling the data when ``shuffle`` is set to\n ``True``, and updating the dictionary. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\n positive_code : bool, default=False\n Whether to enforce positivity when finding the code.\n\n .. versionadded:: 0.20\n\n positive_dict : bool, default=False\n Whether to enforce positivity when finding the dictionary.\n\n .. versionadded:: 0.20\n\n transform_max_iter : int, default=1000\n Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n `'lasso_lars'`.\n\n .. versionadded:: 0.22\n\n Attributes\n ----------\n components_ : ndarray of shape (n_components, n_features)\n Components extracted from the data.\n\n inner_stats_ : tuple of (A, B) ndarrays\n Internal sufficient statistics that are kept by the algorithm.\n Keeping them is useful in online settings, to avoid losing the\n history of the evolution, but they shouldn't have any use for the\n end user.\n `A` `(n_components, n_components)` is the dictionary covariance matrix.\n `B` `(n_features, n_components)` is the data approximation matrix.\n\n n_iter_ : int\n Number of iterations run.\n\n iter_offset_ : int\n The number of iteration on data batches that has been\n performed before.\n\n random_state_ : RandomState instance\n RandomState instance that is generated either from a seed, the random\n number generattor or by `np.random`.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.datasets import make_sparse_coded_signal\n >>> from sklearn.decomposition import MiniBatchDictionaryLearning\n >>> X, dictionary, code = make_sparse_coded_signal(\n ... n_samples=100, n_components=15, n_features=20, n_nonzero_coefs=10,\n ... random_state=42)\n >>> dict_learner = MiniBatchDictionaryLearning(\n ... n_components=15, transform_algorithm='lasso_lars', random_state=42,\n ... )\n >>> X_transformed = dict_learner.fit_transform(X)\n\n We can check the level of sparsity of `X_transformed`:\n\n >>> np.mean(X_transformed == 0)\n 0.87...\n\n We can compare the average squared euclidean norm of the reconstruction\n error of the sparse coded signal relative to the squared euclidean norm of\n the original signal:\n\n >>> X_hat = X_transformed @ dict_learner.components_\n >>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))\n 0.10...\n\n Notes\n -----\n **References:**\n\n J. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009: Online dictionary learning\n for sparse coding (https://www.di.ens.fr/sierra/pdfs/icml09.pdf)\n\n See Also\n --------\n SparseCoder\n DictionaryLearning\n SparsePCA\n MiniBatchSparsePCA\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_components=None, *, alpha=1, n_iter=1000,\n fit_algorithm='lars', n_jobs=None, batch_size=3, shuffle=True,\n dict_init=None, transform_algorithm='omp',\n transform_n_nonzero_coefs=None, transform_alpha=None,\n verbose=False, split_sign=False, random_state=None,\n positive_code=False, positive_dict=False,\n transform_max_iter=1000):\n\n super().__init__(\n transform_algorithm, transform_n_nonzero_coefs, transform_alpha,\n split_sign, n_jobs, positive_code, transform_max_iter\n )\n self.n_components = n_components\n self.alpha = alpha\n self.n_iter = n_iter\n self.fit_algorithm = fit_algorithm\n self.dict_init = dict_init\n self.verbose = verbose\n self.shuffle = shuffle\n self.batch_size = batch_size\n self.split_sign = split_sign\n self.random_state = random_state\n self.positive_dict = positive_dict\n\n def fit(self, X, y=None):\n \"\"\"Fit the model from data in X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\n y : Ignored\n\n Returns\n -------\n self : object\n Returns the instance itself.\n \"\"\"\n random_state = check_random_state(self.random_state)\n X = self._validate_data(X)\n\n U, (A, B), self.n_iter_ = dict_learning_online(\n X, self.n_components, alpha=self.alpha,\n n_iter=self.n_iter, return_code=False,\n method=self.fit_algorithm,\n method_max_iter=self.transform_max_iter,\n n_jobs=self.n_jobs, dict_init=self.dict_init,\n batch_size=self.batch_size, shuffle=self.shuffle,\n verbose=self.verbose, random_state=random_state,\n return_inner_stats=True,\n return_n_iter=True,\n positive_dict=self.positive_dict,\n positive_code=self.positive_code)\n self.components_ = U\n # Keep track of the state of the algorithm to be able to do\n # some online fitting (partial_fit)\n self.inner_stats_ = (A, B)\n self.iter_offset_ = self.n_iter\n self.random_state_ = random_state\n return self\n\n def partial_fit(self, X, y=None, iter_offset=None):\n \"\"\"Updates the model using the data in X as a mini-batch.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\n y : Ignored\n\n iter_offset : int, default=None\n The number of iteration on data batches that has been\n performed before this call to partial_fit. This is optional:\n if no number is passed, the memory of the object is\n used.\n\n Returns\n -------\n self : object\n Returns the instance itself.\n \"\"\"\n if not hasattr(self, 'random_state_'):\n self.random_state_ = check_random_state(self.random_state)\n if hasattr(self, 'components_'):\n dict_init = self.components_\n else:\n dict_init = self.dict_init\n inner_stats = getattr(self, 'inner_stats_', None)\n if iter_offset is None:\n iter_offset = getattr(self, 'iter_offset_', 0)\n X = self._validate_data(X, reset=(iter_offset == 0))\n U, (A, B) = dict_learning_online(\n X, self.n_components, alpha=self.alpha,\n n_iter=1, method=self.fit_algorithm,\n method_max_iter=self.transform_max_iter,\n n_jobs=self.n_jobs, dict_init=dict_init,\n batch_size=len(X), shuffle=False,\n verbose=self.verbose, return_code=False,\n iter_offset=iter_offset, random_state=self.random_state_,\n return_inner_stats=True, inner_stats=inner_stats,\n positive_dict=self.positive_dict,\n positive_code=self.positive_code)\n self.components_ = U\n\n # Keep track of the state of the algorithm to be able to do\n # some online fitting (partial_fit)\n self.inner_stats_ = (A, B)\n self.iter_offset_ = iter_offset + 1\n return self", + "instance_attributes": [ + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "n_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "fit_algorithm", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "shuffle", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "batch_size", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "split_sign", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "positive_dict", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "inner_stats_", + "types": { + "kind": "NamedType", + "name": "tuple" + } + }, + { + "name": "iter_offset_", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder", + "name": "SparseCoder", + "qname": "sklearn.decomposition._dict_learning.SparseCoder", + "decorators": [], + "superclasses": ["_BaseSparseCoding", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/__init__", + "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/fit", + "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/components_@getter", + "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/transform", + "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/_more_tags", + "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/n_components_@getter", + "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/n_features_in_@getter" + ], + "is_public": false, + "reexported_by": [], + "description": "Sparse coding\n\nFinds a sparse representation of data against a fixed, precomputed\ndictionary.\n\nEach row of the result is the solution to a sparse coding problem.\nThe goal is to find a sparse array `code` such that::\n\n X ~= code * dictionary\n\nRead more in the :ref:`User Guide `.", + "docstring": "Sparse coding\n\nFinds a sparse representation of data against a fixed, precomputed\ndictionary.\n\nEach row of the result is the solution to a sparse coding problem.\nThe goal is to find a sparse array `code` such that::\n\n X ~= code * dictionary\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndictionary : ndarray of shape (n_components, n_features)\n The dictionary atoms used for sparse coding. Lines are assumed to be\n normalized to unit norm.\n\ntransform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}, default='omp'\n Algorithm used to transform the data:\n\n - `'lars'`: uses the least angle regression method\n (`linear_model.lars_path`);\n - `'lasso_lars'`: uses Lars to compute the Lasso solution;\n - `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (linear_model.Lasso). `'lasso_lars'` will be faster if\n the estimated components are sparse;\n - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution;\n - `'threshold'`: squashes to zero all coefficients less than alpha from\n the projection ``dictionary * X'``.\n\ntransform_n_nonzero_coefs : int, default=None\n Number of nonzero coefficients to target in each column of the\n solution. This is only used by `algorithm='lars'` and `algorithm='omp'`\n and is overridden by `alpha` in the `omp` case. If `None`, then\n `transform_n_nonzero_coefs=int(n_features / 10)`.\n\ntransform_alpha : float, default=None\n If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n penalty applied to the L1 norm.\n If `algorithm='threshold'`, `alpha` is the absolute value of the\n threshold below which coefficients will be squashed to zero.\n If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\n the reconstruction error targeted. In this case, it overrides\n `n_nonzero_coefs`.\n If `None`, default to 1.\n\nsplit_sign : bool, default=False\n Whether to split the sparse feature vector into the concatenation of\n its negative part and its positive part. This can improve the\n performance of downstream classifiers.\n\nn_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\npositive_code : bool, default=False\n Whether to enforce positivity when finding the code.\n\n .. versionadded:: 0.20\n\ntransform_max_iter : int, default=1000\n Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n `lasso_lars`.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n The unchanged dictionary atoms.\n\n .. deprecated:: 0.24\n This attribute is deprecated in 0.24 and will be removed in\n 1.1 (renaming of 0.26). Use `dictionary` instead.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.decomposition import SparseCoder\n>>> X = np.array([[-1, -1, -1], [0, 0, 3]])\n>>> dictionary = np.array(\n... [[0, 1, 0],\n... [-1, -1, 2],\n... [1, 1, 1],\n... [0, 1, 1],\n... [0, 2, 1]],\n... dtype=np.float64\n... )\n>>> coder = SparseCoder(\n... dictionary=dictionary, transform_algorithm='lasso_lars',\n... transform_alpha=1e-10,\n... )\n>>> coder.transform(X)\narray([[ 0., 0., -1., 0., 0.],\n [ 0., 1., 1., 0., 0.]])\n\nSee Also\n--------\nDictionaryLearning\nMiniBatchDictionaryLearning\nSparsePCA\nMiniBatchSparsePCA\nsparse_encode", + "code": "class SparseCoder(_BaseSparseCoding, BaseEstimator):\n \"\"\"Sparse coding\n\n Finds a sparse representation of data against a fixed, precomputed\n dictionary.\n\n Each row of the result is the solution to a sparse coding problem.\n The goal is to find a sparse array `code` such that::\n\n X ~= code * dictionary\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n dictionary : ndarray of shape (n_components, n_features)\n The dictionary atoms used for sparse coding. Lines are assumed to be\n normalized to unit norm.\n\n transform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', \\\n 'threshold'}, default='omp'\n Algorithm used to transform the data:\n\n - `'lars'`: uses the least angle regression method\n (`linear_model.lars_path`);\n - `'lasso_lars'`: uses Lars to compute the Lasso solution;\n - `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (linear_model.Lasso). `'lasso_lars'` will be faster if\n the estimated components are sparse;\n - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution;\n - `'threshold'`: squashes to zero all coefficients less than alpha from\n the projection ``dictionary * X'``.\n\n transform_n_nonzero_coefs : int, default=None\n Number of nonzero coefficients to target in each column of the\n solution. This is only used by `algorithm='lars'` and `algorithm='omp'`\n and is overridden by `alpha` in the `omp` case. If `None`, then\n `transform_n_nonzero_coefs=int(n_features / 10)`.\n\n transform_alpha : float, default=None\n If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n penalty applied to the L1 norm.\n If `algorithm='threshold'`, `alpha` is the absolute value of the\n threshold below which coefficients will be squashed to zero.\n If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\n the reconstruction error targeted. In this case, it overrides\n `n_nonzero_coefs`.\n If `None`, default to 1.\n\n split_sign : bool, default=False\n Whether to split the sparse feature vector into the concatenation of\n its negative part and its positive part. This can improve the\n performance of downstream classifiers.\n\n n_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n positive_code : bool, default=False\n Whether to enforce positivity when finding the code.\n\n .. versionadded:: 0.20\n\n transform_max_iter : int, default=1000\n Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n `lasso_lars`.\n\n .. versionadded:: 0.22\n\n Attributes\n ----------\n components_ : ndarray of shape (n_components, n_features)\n The unchanged dictionary atoms.\n\n .. deprecated:: 0.24\n This attribute is deprecated in 0.24 and will be removed in\n 1.1 (renaming of 0.26). Use `dictionary` instead.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.decomposition import SparseCoder\n >>> X = np.array([[-1, -1, -1], [0, 0, 3]])\n >>> dictionary = np.array(\n ... [[0, 1, 0],\n ... [-1, -1, 2],\n ... [1, 1, 1],\n ... [0, 1, 1],\n ... [0, 2, 1]],\n ... dtype=np.float64\n ... )\n >>> coder = SparseCoder(\n ... dictionary=dictionary, transform_algorithm='lasso_lars',\n ... transform_alpha=1e-10,\n ... )\n >>> coder.transform(X)\n array([[ 0., 0., -1., 0., 0.],\n [ 0., 1., 1., 0., 0.]])\n\n See Also\n --------\n DictionaryLearning\n MiniBatchDictionaryLearning\n SparsePCA\n MiniBatchSparsePCA\n sparse_encode\n \"\"\"\n _required_parameters = [\"dictionary\"]\n\n @_deprecate_positional_args\n def __init__(self, dictionary, *, transform_algorithm='omp',\n transform_n_nonzero_coefs=None, transform_alpha=None,\n split_sign=False, n_jobs=None, positive_code=False,\n transform_max_iter=1000):\n super().__init__(\n transform_algorithm, transform_n_nonzero_coefs,\n transform_alpha, split_sign, n_jobs, positive_code,\n transform_max_iter\n )\n self.dictionary = dictionary\n\n def fit(self, X, y=None):\n \"\"\"Do nothing and return the estimator unchanged.\n\n This method is just there to implement the usual API and hence\n work in pipelines.\n\n Parameters\n ----------\n X : Ignored\n\n y : Ignored\n\n Returns\n -------\n self : object\n \"\"\"\n return self\n\n @deprecated(\"The attribute 'components_' is deprecated \" # type: ignore\n \"in 0.24 and will be removed in 1.1 (renaming of 0.26). Use \"\n \"the 'dictionary' instead.\")\n @property\n def components_(self):\n return self.dictionary\n\n def transform(self, X, y=None):\n \"\"\"Encode the data as a sparse combination of the dictionary atoms.\n\n Coding method is determined by the object parameter\n `transform_algorithm`.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Test data to be transformed, must have the same number of\n features as the data used to train the model.\n\n y : Ignored\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n Transformed data.\n \"\"\"\n return super()._transform(X, self.dictionary)\n\n def _more_tags(self):\n return {\"requires_fit\": False}\n\n @property\n def n_components_(self):\n return self.dictionary.shape[0]\n\n @property\n def n_features_in_(self):\n return self.dictionary.shape[1]", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_BaseSparseCoding", + "name": "_BaseSparseCoding", + "qname": "sklearn.decomposition._dict_learning._BaseSparseCoding", + "decorators": [], + "superclasses": ["TransformerMixin"], + "methods": [ + "scikit-learn/sklearn.decomposition._dict_learning/_BaseSparseCoding/__init__", + "scikit-learn/sklearn.decomposition._dict_learning/_BaseSparseCoding/_transform", + "scikit-learn/sklearn.decomposition._dict_learning/_BaseSparseCoding/transform" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class from SparseCoder and DictionaryLearning algorithms.", + "docstring": "Base class from SparseCoder and DictionaryLearning algorithms.", + "code": "class _BaseSparseCoding(TransformerMixin):\n \"\"\"Base class from SparseCoder and DictionaryLearning algorithms.\"\"\"\n def __init__(self, transform_algorithm, transform_n_nonzero_coefs,\n transform_alpha, split_sign, n_jobs, positive_code,\n transform_max_iter):\n self.transform_algorithm = transform_algorithm\n self.transform_n_nonzero_coefs = transform_n_nonzero_coefs\n self.transform_alpha = transform_alpha\n self.transform_max_iter = transform_max_iter\n self.split_sign = split_sign\n self.n_jobs = n_jobs\n self.positive_code = positive_code\n\n def _transform(self, X, dictionary):\n \"\"\"Private method allowing to accomodate both DictionaryLearning and\n SparseCoder.\"\"\"\n X = self._validate_data(X, reset=False)\n\n code = sparse_encode(\n X, dictionary, algorithm=self.transform_algorithm,\n n_nonzero_coefs=self.transform_n_nonzero_coefs,\n alpha=self.transform_alpha, max_iter=self.transform_max_iter,\n n_jobs=self.n_jobs, positive=self.positive_code)\n\n if self.split_sign:\n # feature vector is split into a positive and negative side\n n_samples, n_features = code.shape\n split_code = np.empty((n_samples, 2 * n_features))\n split_code[:, :n_features] = np.maximum(code, 0)\n split_code[:, n_features:] = -np.minimum(code, 0)\n code = split_code\n\n return code\n\n def transform(self, X):\n \"\"\"Encode the data as a sparse combination of the dictionary atoms.\n\n Coding method is determined by the object parameter\n `transform_algorithm`.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Test data to be transformed, must have the same number of\n features as the data used to train the model.\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n Transformed data.\n \"\"\"\n check_is_fitted(self)\n return self._transform(X, self.components_)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis", + "name": "FactorAnalysis", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/__init__", + "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/fit", + "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/transform", + "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/get_covariance", + "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/get_precision", + "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/score_samples", + "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/score", + "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/_rotate" + ], + "is_public": false, + "reexported_by": [], + "description": "Factor Analysis (FA).\n\nA simple linear generative model with Gaussian latent variables.\n\nThe observations are assumed to be caused by a linear transformation of\nlower dimensional latent factors and added Gaussian noise.\nWithout loss of generality the factors are distributed according to a\nGaussian with zero mean and unit covariance. The noise is also zero mean\nand has an arbitrary diagonal covariance matrix.\n\nIf we would restrict the model further, by assuming that the Gaussian\nnoise is even isotropic (all diagonal entries are the same) we would obtain\n:class:`PPCA`.\n\nFactorAnalysis performs a maximum likelihood estimate of the so-called\n`loading` matrix, the transformation of the latent variables to the\nobserved ones, using SVD based approach.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13", + "docstring": "Factor Analysis (FA).\n\nA simple linear generative model with Gaussian latent variables.\n\nThe observations are assumed to be caused by a linear transformation of\nlower dimensional latent factors and added Gaussian noise.\nWithout loss of generality the factors are distributed according to a\nGaussian with zero mean and unit covariance. The noise is also zero mean\nand has an arbitrary diagonal covariance matrix.\n\nIf we would restrict the model further, by assuming that the Gaussian\nnoise is even isotropic (all diagonal entries are the same) we would obtain\n:class:`PPCA`.\n\nFactorAnalysis performs a maximum likelihood estimate of the so-called\n`loading` matrix, the transformation of the latent variables to the\nobserved ones, using SVD based approach.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nn_components : int, default=None\n Dimensionality of latent space, the number of components\n of ``X`` that are obtained after ``transform``.\n If None, n_components is set to the number of features.\n\ntol : float, defaul=1e-2\n Stopping tolerance for log-likelihood increase.\n\ncopy : bool, default=True\n Whether to make a copy of X. If ``False``, the input X gets overwritten\n during fitting.\n\nmax_iter : int, default=1000\n Maximum number of iterations.\n\nnoise_variance_init : ndarray of shape (n_features,), default=None\n The initial guess of the noise variance for each feature.\n If None, it defaults to np.ones(n_features).\n\nsvd_method : {'lapack', 'randomized'}, default='randomized'\n Which SVD method to use. If 'lapack' use standard SVD from\n scipy.linalg, if 'randomized' use fast ``randomized_svd`` function.\n Defaults to 'randomized'. For most applications 'randomized' will\n be sufficiently precise while providing significant speed gains.\n Accuracy can also be improved by setting higher values for\n `iterated_power`. If this is not sufficient, for maximum precision\n you should choose 'lapack'.\n\niterated_power : int, default=3\n Number of iterations for the power method. 3 by default. Only used\n if ``svd_method`` equals 'randomized'.\n\nrotation : {'varimax', 'quartimax'}, default=None\n If not None, apply the indicated rotation. Currently, varimax and\n quartimax are implemented. See\n `\"The varimax criterion for analytic rotation in factor analysis\"\n `_\n H. F. Kaiser, 1958.\n\n .. versionadded:: 0.24\n\nrandom_state : int or RandomState instance, default=0\n Only used when ``svd_method`` equals 'randomized'. Pass an int for\n reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Components with maximum variance.\n\nloglike_ : list of shape (n_iterations,)\n The log likelihood at each iteration.\n\nnoise_variance_ : ndarray of shape (n_features,)\n The estimated noise variance for each feature.\n\nn_iter_ : int\n Number of iterations run.\n\nmean_ : ndarray of shape (n_features,)\n Per-feature empirical mean, estimated from the training set.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.decomposition import FactorAnalysis\n>>> X, _ = load_digits(return_X_y=True)\n>>> transformer = FactorAnalysis(n_components=7, random_state=0)\n>>> X_transformed = transformer.fit_transform(X)\n>>> X_transformed.shape\n(1797, 7)\n\nReferences\n----------\n- David Barber, Bayesian Reasoning and Machine Learning,\n Algorithm 21.1.\n\n- Christopher M. Bishop: Pattern Recognition and Machine Learning,\n Chapter 12.2.4.\n\nSee Also\n--------\nPCA: Principal component analysis is also a latent linear variable model\n which however assumes equal noise variance for each feature.\n This extra assumption makes probabilistic PCA faster as it can be\n computed in closed form.\nFastICA: Independent component analysis, a latent variable model with\n non-Gaussian latent variables.", + "code": "class FactorAnalysis(TransformerMixin, BaseEstimator):\n \"\"\"Factor Analysis (FA).\n\n A simple linear generative model with Gaussian latent variables.\n\n The observations are assumed to be caused by a linear transformation of\n lower dimensional latent factors and added Gaussian noise.\n Without loss of generality the factors are distributed according to a\n Gaussian with zero mean and unit covariance. The noise is also zero mean\n and has an arbitrary diagonal covariance matrix.\n\n If we would restrict the model further, by assuming that the Gaussian\n noise is even isotropic (all diagonal entries are the same) we would obtain\n :class:`PPCA`.\n\n FactorAnalysis performs a maximum likelihood estimate of the so-called\n `loading` matrix, the transformation of the latent variables to the\n observed ones, using SVD based approach.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.13\n\n Parameters\n ----------\n n_components : int, default=None\n Dimensionality of latent space, the number of components\n of ``X`` that are obtained after ``transform``.\n If None, n_components is set to the number of features.\n\n tol : float, defaul=1e-2\n Stopping tolerance for log-likelihood increase.\n\n copy : bool, default=True\n Whether to make a copy of X. If ``False``, the input X gets overwritten\n during fitting.\n\n max_iter : int, default=1000\n Maximum number of iterations.\n\n noise_variance_init : ndarray of shape (n_features,), default=None\n The initial guess of the noise variance for each feature.\n If None, it defaults to np.ones(n_features).\n\n svd_method : {'lapack', 'randomized'}, default='randomized'\n Which SVD method to use. If 'lapack' use standard SVD from\n scipy.linalg, if 'randomized' use fast ``randomized_svd`` function.\n Defaults to 'randomized'. For most applications 'randomized' will\n be sufficiently precise while providing significant speed gains.\n Accuracy can also be improved by setting higher values for\n `iterated_power`. If this is not sufficient, for maximum precision\n you should choose 'lapack'.\n\n iterated_power : int, default=3\n Number of iterations for the power method. 3 by default. Only used\n if ``svd_method`` equals 'randomized'.\n\n rotation : {'varimax', 'quartimax'}, default=None\n If not None, apply the indicated rotation. Currently, varimax and\n quartimax are implemented. See\n `\"The varimax criterion for analytic rotation in factor analysis\"\n `_\n H. F. Kaiser, 1958.\n\n .. versionadded:: 0.24\n\n random_state : int or RandomState instance, default=0\n Only used when ``svd_method`` equals 'randomized'. Pass an int for\n reproducible results across multiple function calls.\n See :term:`Glossary `.\n\n Attributes\n ----------\n components_ : ndarray of shape (n_components, n_features)\n Components with maximum variance.\n\n loglike_ : list of shape (n_iterations,)\n The log likelihood at each iteration.\n\n noise_variance_ : ndarray of shape (n_features,)\n The estimated noise variance for each feature.\n\n n_iter_ : int\n Number of iterations run.\n\n mean_ : ndarray of shape (n_features,)\n Per-feature empirical mean, estimated from the training set.\n\n Examples\n --------\n >>> from sklearn.datasets import load_digits\n >>> from sklearn.decomposition import FactorAnalysis\n >>> X, _ = load_digits(return_X_y=True)\n >>> transformer = FactorAnalysis(n_components=7, random_state=0)\n >>> X_transformed = transformer.fit_transform(X)\n >>> X_transformed.shape\n (1797, 7)\n\n References\n ----------\n - David Barber, Bayesian Reasoning and Machine Learning,\n Algorithm 21.1.\n\n - Christopher M. Bishop: Pattern Recognition and Machine Learning,\n Chapter 12.2.4.\n\n See Also\n --------\n PCA: Principal component analysis is also a latent linear variable model\n which however assumes equal noise variance for each feature.\n This extra assumption makes probabilistic PCA faster as it can be\n computed in closed form.\n FastICA: Independent component analysis, a latent variable model with\n non-Gaussian latent variables.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_components=None, *, tol=1e-2, copy=True,\n max_iter=1000,\n noise_variance_init=None, svd_method='randomized',\n iterated_power=3, rotation=None, random_state=0):\n self.n_components = n_components\n self.copy = copy\n self.tol = tol\n self.max_iter = max_iter\n if svd_method not in ['lapack', 'randomized']:\n raise ValueError('SVD method %s is not supported. Please consider'\n ' the documentation' % svd_method)\n self.svd_method = svd_method\n\n self.noise_variance_init = noise_variance_init\n self.iterated_power = iterated_power\n self.random_state = random_state\n self.rotation = rotation\n\n def fit(self, X, y=None):\n \"\"\"Fit the FactorAnalysis model to X using SVD based approach\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n y : Ignored\n\n Returns\n -------\n self\n \"\"\"\n X = self._validate_data(X, copy=self.copy, dtype=np.float64)\n\n n_samples, n_features = X.shape\n n_components = self.n_components\n if n_components is None:\n n_components = n_features\n\n self.mean_ = np.mean(X, axis=0)\n X -= self.mean_\n\n # some constant terms\n nsqrt = sqrt(n_samples)\n llconst = n_features * log(2. * np.pi) + n_components\n var = np.var(X, axis=0)\n\n if self.noise_variance_init is None:\n psi = np.ones(n_features, dtype=X.dtype)\n else:\n if len(self.noise_variance_init) != n_features:\n raise ValueError(\"noise_variance_init dimension does not \"\n \"with number of features : %d != %d\" %\n (len(self.noise_variance_init), n_features))\n psi = np.array(self.noise_variance_init)\n\n loglike = []\n old_ll = -np.inf\n SMALL = 1e-12\n\n # we'll modify svd outputs to return unexplained variance\n # to allow for unified computation of loglikelihood\n if self.svd_method == 'lapack':\n def my_svd(X):\n _, s, Vt = linalg.svd(X,\n full_matrices=False,\n check_finite=False)\n return (s[:n_components], Vt[:n_components],\n squared_norm(s[n_components:]))\n elif self.svd_method == 'randomized':\n random_state = check_random_state(self.random_state)\n\n def my_svd(X):\n _, s, Vt = randomized_svd(X, n_components,\n random_state=random_state,\n n_iter=self.iterated_power)\n return s, Vt, squared_norm(X) - squared_norm(s)\n else:\n raise ValueError('SVD method %s is not supported. Please consider'\n ' the documentation' % self.svd_method)\n\n for i in range(self.max_iter):\n # SMALL helps numerics\n sqrt_psi = np.sqrt(psi) + SMALL\n s, Vt, unexp_var = my_svd(X / (sqrt_psi * nsqrt))\n s **= 2\n # Use 'maximum' here to avoid sqrt problems.\n W = np.sqrt(np.maximum(s - 1., 0.))[:, np.newaxis] * Vt\n del Vt\n W *= sqrt_psi\n\n # loglikelihood\n ll = llconst + np.sum(np.log(s))\n ll += unexp_var + np.sum(np.log(psi))\n ll *= -n_samples / 2.\n loglike.append(ll)\n if (ll - old_ll) < self.tol:\n break\n old_ll = ll\n\n psi = np.maximum(var - np.sum(W ** 2, axis=0), SMALL)\n else:\n warnings.warn('FactorAnalysis did not converge.' +\n ' You might want' +\n ' to increase the number of iterations.',\n ConvergenceWarning)\n\n self.components_ = W\n if self.rotation is not None:\n self.components_ = self._rotate(W)\n self.noise_variance_ = psi\n self.loglike_ = loglike\n self.n_iter_ = i + 1\n return self\n\n def transform(self, X):\n \"\"\"Apply dimensionality reduction to X using the model.\n\n Compute the expected mean of the latent variables.\n See Barber, 21.2.33 (or Bishop, 12.66).\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n The latent variables of X.\n \"\"\"\n check_is_fitted(self)\n\n X = self._validate_data(X, reset=False)\n Ih = np.eye(len(self.components_))\n\n X_transformed = X - self.mean_\n\n Wpsi = self.components_ / self.noise_variance_\n cov_z = linalg.inv(Ih + np.dot(Wpsi, self.components_.T))\n tmp = np.dot(X_transformed, Wpsi.T)\n X_transformed = np.dot(tmp, cov_z)\n\n return X_transformed\n\n def get_covariance(self):\n \"\"\"Compute data covariance with the FactorAnalysis model.\n\n ``cov = components_.T * components_ + diag(noise_variance)``\n\n Returns\n -------\n cov : ndarray of shape (n_features, n_features)\n Estimated covariance of data.\n \"\"\"\n check_is_fitted(self)\n\n cov = np.dot(self.components_.T, self.components_)\n cov.flat[::len(cov) + 1] += self.noise_variance_ # modify diag inplace\n return cov\n\n def get_precision(self):\n \"\"\"Compute data precision matrix with the FactorAnalysis model.\n\n Returns\n -------\n precision : ndarray of shape (n_features, n_features)\n Estimated precision of data.\n \"\"\"\n check_is_fitted(self)\n\n n_features = self.components_.shape[1]\n\n # handle corner cases first\n if self.n_components == 0:\n return np.diag(1. / self.noise_variance_)\n if self.n_components == n_features:\n return linalg.inv(self.get_covariance())\n\n # Get precision using matrix inversion lemma\n components_ = self.components_\n precision = np.dot(components_ / self.noise_variance_, components_.T)\n precision.flat[::len(precision) + 1] += 1.\n precision = np.dot(components_.T,\n np.dot(linalg.inv(precision), components_))\n precision /= self.noise_variance_[:, np.newaxis]\n precision /= -self.noise_variance_[np.newaxis, :]\n precision.flat[::len(precision) + 1] += 1. / self.noise_variance_\n return precision\n\n def score_samples(self, X):\n \"\"\"Compute the log-likelihood of each sample\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n The data\n\n Returns\n -------\n ll : ndarray of shape (n_samples,)\n Log-likelihood of each sample under the current model\n \"\"\"\n check_is_fitted(self)\n X = self._validate_data(X, reset=False)\n Xr = X - self.mean_\n precision = self.get_precision()\n n_features = X.shape[1]\n log_like = -.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1)\n log_like -= .5 * (n_features * log(2. * np.pi)\n - fast_logdet(precision))\n return log_like\n\n def score(self, X, y=None):\n \"\"\"Compute the average log-likelihood of the samples\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n The data\n\n y : Ignored\n\n Returns\n -------\n ll : float\n Average log-likelihood of the samples under the current model\n \"\"\"\n return np.mean(self.score_samples(X))\n\n def _rotate(self, components, n_components=None, tol=1e-6):\n \"Rotate the factor analysis solution.\"\n # note that tol is not exposed\n implemented = (\"varimax\", \"quartimax\")\n method = self.rotation\n if method in implemented:\n return _ortho_rotation(components.T, method=method,\n tol=tol)[:self.n_components]\n else:\n raise ValueError(\"'method' must be in %s, not %s\"\n % (implemented, method))", + "instance_attributes": [ + { + "name": "copy", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "svd_method", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "iterated_power", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "random_state", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "noise_variance_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "loglike_", + "types": { + "kind": "NamedType", + "name": "list" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA", + "name": "FastICA", + "qname": "sklearn.decomposition._fastica.FastICA", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.decomposition._fastica/FastICA/__init__", + "scikit-learn/sklearn.decomposition._fastica/FastICA/_fit", + "scikit-learn/sklearn.decomposition._fastica/FastICA/fit_transform", + "scikit-learn/sklearn.decomposition._fastica/FastICA/fit", + "scikit-learn/sklearn.decomposition._fastica/FastICA/transform", + "scikit-learn/sklearn.decomposition._fastica/FastICA/inverse_transform" + ], + "is_public": false, + "reexported_by": [], + "description": "FastICA: a fast algorithm for Independent Component Analysis.\n\nRead more in the :ref:`User Guide `.", + "docstring": "FastICA: a fast algorithm for Independent Component Analysis.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=None\n Number of components to use. If None is passed, all are used.\n\nalgorithm : {'parallel', 'deflation'}, default='parallel'\n Apply parallel or deflational algorithm for FastICA.\n\nwhiten : bool, default=True\n If whiten is false, the data is already considered to be\n whitened, and no whitening is performed.\n\nfun : {'logcosh', 'exp', 'cube'} or callable, default='logcosh'\n The functional form of the G function used in the\n approximation to neg-entropy. Could be either 'logcosh', 'exp',\n or 'cube'.\n You can also provide your own function. It should return a tuple\n containing the value of the function, and of its derivative, in the\n point. Example::\n\n def my_g(x):\n return x ** 3, (3 * x ** 2).mean(axis=-1)\n\nfun_args : dict, default=None\n Arguments to send to the functional form.\n If empty and if fun='logcosh', fun_args will take value\n {'alpha' : 1.0}.\n\nmax_iter : int, default=200\n Maximum number of iterations during fit.\n\ntol : float, default=1e-4\n Tolerance on update at each iteration.\n\nw_init : ndarray of shape (n_components, n_components), default=None\n The mixing matrix to be used to initialize the algorithm.\n\nrandom_state : int, RandomState instance or None, default=None\n Used to initialize ``w_init`` when not specified, with a\n normal distribution. Pass an int, for reproducible results\n across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n The linear operator to apply to the data to get the independent\n sources. This is equal to the unmixing matrix when ``whiten`` is\n False, and equal to ``np.dot(unmixing_matrix, self.whitening_)`` when\n ``whiten`` is True.\n\nmixing_ : ndarray of shape (n_features, n_components)\n The pseudo-inverse of ``components_``. It is the linear operator\n that maps independent sources to the data.\n\nmean_ : ndarray of shape(n_features,)\n The mean over features. Only set if `self.whiten` is True.\n\nn_iter_ : int\n If the algorithm is \"deflation\", n_iter is the\n maximum number of iterations run across all components. Else\n they are just the number of iterations taken to converge.\n\nwhitening_ : ndarray of shape (n_components, n_features)\n Only set if whiten is 'True'. This is the pre-whitening matrix\n that projects data onto the first `n_components` principal components.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.decomposition import FastICA\n>>> X, _ = load_digits(return_X_y=True)\n>>> transformer = FastICA(n_components=7,\n... random_state=0)\n>>> X_transformed = transformer.fit_transform(X)\n>>> X_transformed.shape\n(1797, 7)\n\nNotes\n-----\nImplementation based on\n*A. Hyvarinen and E. Oja, Independent Component Analysis:\nAlgorithms and Applications, Neural Networks, 13(4-5), 2000,\npp. 411-430*", + "code": "class FastICA(TransformerMixin, BaseEstimator):\n \"\"\"FastICA: a fast algorithm for Independent Component Analysis.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_components : int, default=None\n Number of components to use. If None is passed, all are used.\n\n algorithm : {'parallel', 'deflation'}, default='parallel'\n Apply parallel or deflational algorithm for FastICA.\n\n whiten : bool, default=True\n If whiten is false, the data is already considered to be\n whitened, and no whitening is performed.\n\n fun : {'logcosh', 'exp', 'cube'} or callable, default='logcosh'\n The functional form of the G function used in the\n approximation to neg-entropy. Could be either 'logcosh', 'exp',\n or 'cube'.\n You can also provide your own function. It should return a tuple\n containing the value of the function, and of its derivative, in the\n point. Example::\n\n def my_g(x):\n return x ** 3, (3 * x ** 2).mean(axis=-1)\n\n fun_args : dict, default=None\n Arguments to send to the functional form.\n If empty and if fun='logcosh', fun_args will take value\n {'alpha' : 1.0}.\n\n max_iter : int, default=200\n Maximum number of iterations during fit.\n\n tol : float, default=1e-4\n Tolerance on update at each iteration.\n\n w_init : ndarray of shape (n_components, n_components), default=None\n The mixing matrix to be used to initialize the algorithm.\n\n random_state : int, RandomState instance or None, default=None\n Used to initialize ``w_init`` when not specified, with a\n normal distribution. Pass an int, for reproducible results\n across multiple function calls.\n See :term:`Glossary `.\n\n Attributes\n ----------\n components_ : ndarray of shape (n_components, n_features)\n The linear operator to apply to the data to get the independent\n sources. This is equal to the unmixing matrix when ``whiten`` is\n False, and equal to ``np.dot(unmixing_matrix, self.whitening_)`` when\n ``whiten`` is True.\n\n mixing_ : ndarray of shape (n_features, n_components)\n The pseudo-inverse of ``components_``. It is the linear operator\n that maps independent sources to the data.\n\n mean_ : ndarray of shape(n_features,)\n The mean over features. Only set if `self.whiten` is True.\n\n n_iter_ : int\n If the algorithm is \"deflation\", n_iter is the\n maximum number of iterations run across all components. Else\n they are just the number of iterations taken to converge.\n\n whitening_ : ndarray of shape (n_components, n_features)\n Only set if whiten is 'True'. This is the pre-whitening matrix\n that projects data onto the first `n_components` principal components.\n\n Examples\n --------\n >>> from sklearn.datasets import load_digits\n >>> from sklearn.decomposition import FastICA\n >>> X, _ = load_digits(return_X_y=True)\n >>> transformer = FastICA(n_components=7,\n ... random_state=0)\n >>> X_transformed = transformer.fit_transform(X)\n >>> X_transformed.shape\n (1797, 7)\n\n Notes\n -----\n Implementation based on\n *A. Hyvarinen and E. Oja, Independent Component Analysis:\n Algorithms and Applications, Neural Networks, 13(4-5), 2000,\n pp. 411-430*\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_components=None, *, algorithm='parallel', whiten=True,\n fun='logcosh', fun_args=None, max_iter=200, tol=1e-4,\n w_init=None, random_state=None):\n super().__init__()\n if max_iter < 1:\n raise ValueError(\"max_iter should be greater than 1, got \"\n \"(max_iter={})\".format(max_iter))\n self.n_components = n_components\n self.algorithm = algorithm\n self.whiten = whiten\n self.fun = fun\n self.fun_args = fun_args\n self.max_iter = max_iter\n self.tol = tol\n self.w_init = w_init\n self.random_state = random_state\n\n def _fit(self, X, compute_sources=False):\n \"\"\"Fit the model\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n compute_sources : bool, default=False\n If False, sources are not computes but only the rotation matrix.\n This can save memory when working with big data. Defaults to False.\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n \"\"\"\n\n X = self._validate_data(X, copy=self.whiten, dtype=FLOAT_DTYPES,\n ensure_min_samples=2).T\n fun_args = {} if self.fun_args is None else self.fun_args\n random_state = check_random_state(self.random_state)\n\n alpha = fun_args.get('alpha', 1.0)\n if not 1 <= alpha <= 2:\n raise ValueError('alpha must be in [1,2]')\n\n if self.fun == 'logcosh':\n g = _logcosh\n elif self.fun == 'exp':\n g = _exp\n elif self.fun == 'cube':\n g = _cube\n elif callable(self.fun):\n def g(x, fun_args):\n return self.fun(x, **fun_args)\n else:\n exc = ValueError if isinstance(self.fun, str) else TypeError\n raise exc(\n \"Unknown function %r;\"\n \" should be one of 'logcosh', 'exp', 'cube' or callable\"\n % self.fun\n )\n\n n_samples, n_features = X.shape\n\n n_components = self.n_components\n if not self.whiten and n_components is not None:\n n_components = None\n warnings.warn('Ignoring n_components with whiten=False.')\n\n if n_components is None:\n n_components = min(n_samples, n_features)\n if (n_components > min(n_samples, n_features)):\n n_components = min(n_samples, n_features)\n warnings.warn(\n 'n_components is too large: it will be set to %s'\n % n_components\n )\n\n if self.whiten:\n # Centering the columns (ie the variables)\n X_mean = X.mean(axis=-1)\n X -= X_mean[:, np.newaxis]\n\n # Whitening and preprocessing by PCA\n u, d, _ = linalg.svd(X, full_matrices=False, check_finite=False)\n\n del _\n K = (u / d).T[:n_components] # see (6.33) p.140\n del u, d\n X1 = np.dot(K, X)\n # see (13.6) p.267 Here X1 is white and data\n # in X has been projected onto a subspace by PCA\n X1 *= np.sqrt(n_features)\n else:\n # X must be casted to floats to avoid typing issues with numpy\n # 2.0 and the line below\n X1 = as_float_array(X, copy=False) # copy has been taken care of\n\n w_init = self.w_init\n if w_init is None:\n w_init = np.asarray(random_state.normal(\n size=(n_components, n_components)), dtype=X1.dtype)\n\n else:\n w_init = np.asarray(w_init)\n if w_init.shape != (n_components, n_components):\n raise ValueError(\n 'w_init has invalid shape -- should be %(shape)s'\n % {'shape': (n_components, n_components)})\n\n kwargs = {'tol': self.tol,\n 'g': g,\n 'fun_args': fun_args,\n 'max_iter': self.max_iter,\n 'w_init': w_init}\n\n if self.algorithm == 'parallel':\n W, n_iter = _ica_par(X1, **kwargs)\n elif self.algorithm == 'deflation':\n W, n_iter = _ica_def(X1, **kwargs)\n else:\n raise ValueError('Invalid algorithm: must be either `parallel` or'\n ' `deflation`.')\n del X1\n\n if compute_sources:\n if self.whiten:\n S = np.linalg.multi_dot([W, K, X]).T\n else:\n S = np.dot(W, X).T\n else:\n S = None\n\n self.n_iter_ = n_iter\n\n if self.whiten:\n self.components_ = np.dot(W, K)\n self.mean_ = X_mean\n self.whitening_ = K\n else:\n self.components_ = W\n\n self.mixing_ = linalg.pinv(self.components_, check_finite=False)\n self._unmixing = W\n\n return S\n\n def fit_transform(self, X, y=None):\n \"\"\"Fit the model and recover the sources from X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : Ignored\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n \"\"\"\n return self._fit(X, compute_sources=True)\n\n def fit(self, X, y=None):\n \"\"\"Fit the model to X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : Ignored\n\n Returns\n -------\n self\n \"\"\"\n self._fit(X, compute_sources=False)\n return self\n\n def transform(self, X, copy=True):\n \"\"\"Recover the sources from X (apply the unmixing matrix).\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Data to transform, where n_samples is the number of samples\n and n_features is the number of features.\n\n copy : bool, default=True\n If False, data passed to fit can be overwritten. Defaults to True.\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n \"\"\"\n check_is_fitted(self)\n\n X = self._validate_data(X, copy=(copy and self.whiten),\n dtype=FLOAT_DTYPES, reset=False)\n if self.whiten:\n X -= self.mean_\n\n return np.dot(X, self.components_.T)\n\n def inverse_transform(self, X, copy=True):\n \"\"\"Transform the sources back to the mixed data (apply mixing matrix).\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_components)\n Sources, where n_samples is the number of samples\n and n_components is the number of components.\n copy : bool, default=True\n If False, data passed to fit are overwritten. Defaults to True.\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_features)\n \"\"\"\n check_is_fitted(self)\n\n X = check_array(X, copy=(copy and self.whiten), dtype=FLOAT_DTYPES)\n X = np.dot(X, self.mixing_.T)\n if self.whiten:\n X += self.mean_\n\n return X", + "instance_attributes": [ + { + "name": "algorithm", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "whiten", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "fun", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "components_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "mixing_", + "types": { + "kind": "NamedType", + "name": "tuple" + } + }, + { + "name": "_unmixing", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.decomposition._incremental_pca/IncrementalPCA", + "name": "IncrementalPCA", + "qname": "sklearn.decomposition._incremental_pca.IncrementalPCA", + "decorators": [], + "superclasses": ["_BasePCA"], + "methods": [ + "scikit-learn/sklearn.decomposition._incremental_pca/IncrementalPCA/__init__", + "scikit-learn/sklearn.decomposition._incremental_pca/IncrementalPCA/fit", + "scikit-learn/sklearn.decomposition._incremental_pca/IncrementalPCA/partial_fit", + "scikit-learn/sklearn.decomposition._incremental_pca/IncrementalPCA/transform" + ], + "is_public": false, + "reexported_by": [], + "description": "Incremental principal components analysis (IPCA).\n\nLinear dimensionality reduction using Singular Value Decomposition of\nthe data, keeping only the most significant singular vectors to\nproject the data to a lower dimensional space. The input data is centered\nbut not scaled for each feature before applying the SVD.\n\nDepending on the size of the input data, this algorithm can be much more\nmemory efficient than a PCA, and allows sparse input.\n\nThis algorithm has constant memory complexity, on the order\nof ``batch_size * n_features``, enabling use of np.memmap files without\nloading the entire file into memory. For sparse matrices, the input\nis converted to dense in batches (in order to be able to subtract the\nmean) which avoids storing the entire dense matrix at any one time.\n\nThe computational overhead of each SVD is\n``O(batch_size * n_features ** 2)``, but only 2 * batch_size samples\nremain in memory at a time. There will be ``n_samples / batch_size`` SVD\ncomputations to get the principal components, versus 1 large SVD of\ncomplexity ``O(n_samples * n_features ** 2)`` for PCA.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.16", + "docstring": "Incremental principal components analysis (IPCA).\n\nLinear dimensionality reduction using Singular Value Decomposition of\nthe data, keeping only the most significant singular vectors to\nproject the data to a lower dimensional space. The input data is centered\nbut not scaled for each feature before applying the SVD.\n\nDepending on the size of the input data, this algorithm can be much more\nmemory efficient than a PCA, and allows sparse input.\n\nThis algorithm has constant memory complexity, on the order\nof ``batch_size * n_features``, enabling use of np.memmap files without\nloading the entire file into memory. For sparse matrices, the input\nis converted to dense in batches (in order to be able to subtract the\nmean) which avoids storing the entire dense matrix at any one time.\n\nThe computational overhead of each SVD is\n``O(batch_size * n_features ** 2)``, but only 2 * batch_size samples\nremain in memory at a time. There will be ``n_samples / batch_size`` SVD\ncomputations to get the principal components, versus 1 large SVD of\ncomplexity ``O(n_samples * n_features ** 2)`` for PCA.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.16\n\nParameters\n----------\nn_components : int, default=None\n Number of components to keep. If ``n_components`` is ``None``,\n then ``n_components`` is set to ``min(n_samples, n_features)``.\n\nwhiten : bool, default=False\n When True (False by default) the ``components_`` vectors are divided\n by ``n_samples`` times ``components_`` to ensure uncorrelated outputs\n with unit component-wise variances.\n\n Whitening will remove some information from the transformed signal\n (the relative variance scales of the components) but can sometimes\n improve the predictive accuracy of the downstream estimators by\n making data respect some hard-wired assumptions.\n\ncopy : bool, default=True\n If False, X will be overwritten. ``copy=False`` can be used to\n save memory but is unsafe for general use.\n\nbatch_size : int, default=None\n The number of samples to use for each batch. Only used when calling\n ``fit``. If ``batch_size`` is ``None``, then ``batch_size``\n is inferred from the data and set to ``5 * n_features``, to provide a\n balance between approximation accuracy and memory consumption.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Components with maximum variance.\n\nexplained_variance_ : ndarray of shape (n_components,)\n Variance explained by each of the selected components.\n\nexplained_variance_ratio_ : ndarray of shape (n_components,)\n Percentage of variance explained by each of the selected components.\n If all components are stored, the sum of explained variances is equal\n to 1.0.\n\nsingular_values_ : ndarray of shape (n_components,)\n The singular values corresponding to each of the selected components.\n The singular values are equal to the 2-norms of the ``n_components``\n variables in the lower-dimensional space.\n\nmean_ : ndarray of shape (n_features,)\n Per-feature empirical mean, aggregate over calls to ``partial_fit``.\n\nvar_ : ndarray of shape (n_features,)\n Per-feature empirical variance, aggregate over calls to\n ``partial_fit``.\n\nnoise_variance_ : float\n The estimated noise covariance following the Probabilistic PCA model\n from Tipping and Bishop 1999. See \"Pattern Recognition and\n Machine Learning\" by C. Bishop, 12.2.1 p. 574 or\n http://www.miketipping.com/papers/met-mppca.pdf.\n\nn_components_ : int\n The estimated number of components. Relevant when\n ``n_components=None``.\n\nn_samples_seen_ : int\n The number of samples processed by the estimator. Will be reset on\n new calls to fit, but increments across ``partial_fit`` calls.\n\nbatch_size_ : int\n Inferred batch size from ``batch_size``.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.decomposition import IncrementalPCA\n>>> from scipy import sparse\n>>> X, _ = load_digits(return_X_y=True)\n>>> transformer = IncrementalPCA(n_components=7, batch_size=200)\n>>> # either partially fit on smaller batches of data\n>>> transformer.partial_fit(X[:100, :])\nIncrementalPCA(batch_size=200, n_components=7)\n>>> # or let the fit function itself divide the data into batches\n>>> X_sparse = sparse.csr_matrix(X)\n>>> X_transformed = transformer.fit_transform(X_sparse)\n>>> X_transformed.shape\n(1797, 7)\n\nNotes\n-----\nImplements the incremental PCA model from:\n*D. Ross, J. Lim, R. Lin, M. Yang, Incremental Learning for Robust Visual\nTracking, International Journal of Computer Vision, Volume 77, Issue 1-3,\npp. 125-141, May 2008.*\nSee https://www.cs.toronto.edu/~dross/ivt/RossLimLinYang_ijcv.pdf\n\nThis model is an extension of the Sequential Karhunen-Loeve Transform from:\n*A. Levy and M. Lindenbaum, Sequential Karhunen-Loeve Basis Extraction and\nits Application to Images, IEEE Transactions on Image Processing, Volume 9,\nNumber 8, pp. 1371-1374, August 2000.*\nSee https://www.cs.technion.ac.il/~mic/doc/skl-ip.pdf\n\nWe have specifically abstained from an optimization used by authors of both\npapers, a QR decomposition used in specific situations to reduce the\nalgorithmic complexity of the SVD. The source for this technique is\n*Matrix Computations, Third Edition, G. Holub and C. Van Loan, Chapter 5,\nsection 5.4.4, pp 252-253.*. This technique has been omitted because it is\nadvantageous only when decomposing a matrix with ``n_samples`` (rows)\n>= 5/3 * ``n_features`` (columns), and hurts the readability of the\nimplemented algorithm. This would be a good opportunity for future\noptimization, if it is deemed necessary.\n\nReferences\n----------\nD. Ross, J. Lim, R. Lin, M. Yang. Incremental Learning for Robust Visual\nTracking, International Journal of Computer Vision, Volume 77,\nIssue 1-3, pp. 125-141, May 2008.\n\nG. Golub and C. Van Loan. Matrix Computations, Third Edition, Chapter 5,\nSection 5.4.4, pp. 252-253.\n\nSee Also\n--------\nPCA\nKernelPCA\nSparsePCA\nTruncatedSVD", + "code": "class IncrementalPCA(_BasePCA):\n \"\"\"Incremental principal components analysis (IPCA).\n\n Linear dimensionality reduction using Singular Value Decomposition of\n the data, keeping only the most significant singular vectors to\n project the data to a lower dimensional space. The input data is centered\n but not scaled for each feature before applying the SVD.\n\n Depending on the size of the input data, this algorithm can be much more\n memory efficient than a PCA, and allows sparse input.\n\n This algorithm has constant memory complexity, on the order\n of ``batch_size * n_features``, enabling use of np.memmap files without\n loading the entire file into memory. For sparse matrices, the input\n is converted to dense in batches (in order to be able to subtract the\n mean) which avoids storing the entire dense matrix at any one time.\n\n The computational overhead of each SVD is\n ``O(batch_size * n_features ** 2)``, but only 2 * batch_size samples\n remain in memory at a time. There will be ``n_samples / batch_size`` SVD\n computations to get the principal components, versus 1 large SVD of\n complexity ``O(n_samples * n_features ** 2)`` for PCA.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.16\n\n Parameters\n ----------\n n_components : int, default=None\n Number of components to keep. If ``n_components`` is ``None``,\n then ``n_components`` is set to ``min(n_samples, n_features)``.\n\n whiten : bool, default=False\n When True (False by default) the ``components_`` vectors are divided\n by ``n_samples`` times ``components_`` to ensure uncorrelated outputs\n with unit component-wise variances.\n\n Whitening will remove some information from the transformed signal\n (the relative variance scales of the components) but can sometimes\n improve the predictive accuracy of the downstream estimators by\n making data respect some hard-wired assumptions.\n\n copy : bool, default=True\n If False, X will be overwritten. ``copy=False`` can be used to\n save memory but is unsafe for general use.\n\n batch_size : int, default=None\n The number of samples to use for each batch. Only used when calling\n ``fit``. If ``batch_size`` is ``None``, then ``batch_size``\n is inferred from the data and set to ``5 * n_features``, to provide a\n balance between approximation accuracy and memory consumption.\n\n Attributes\n ----------\n components_ : ndarray of shape (n_components, n_features)\n Components with maximum variance.\n\n explained_variance_ : ndarray of shape (n_components,)\n Variance explained by each of the selected components.\n\n explained_variance_ratio_ : ndarray of shape (n_components,)\n Percentage of variance explained by each of the selected components.\n If all components are stored, the sum of explained variances is equal\n to 1.0.\n\n singular_values_ : ndarray of shape (n_components,)\n The singular values corresponding to each of the selected components.\n The singular values are equal to the 2-norms of the ``n_components``\n variables in the lower-dimensional space.\n\n mean_ : ndarray of shape (n_features,)\n Per-feature empirical mean, aggregate over calls to ``partial_fit``.\n\n var_ : ndarray of shape (n_features,)\n Per-feature empirical variance, aggregate over calls to\n ``partial_fit``.\n\n noise_variance_ : float\n The estimated noise covariance following the Probabilistic PCA model\n from Tipping and Bishop 1999. See \"Pattern Recognition and\n Machine Learning\" by C. Bishop, 12.2.1 p. 574 or\n http://www.miketipping.com/papers/met-mppca.pdf.\n\n n_components_ : int\n The estimated number of components. Relevant when\n ``n_components=None``.\n\n n_samples_seen_ : int\n The number of samples processed by the estimator. Will be reset on\n new calls to fit, but increments across ``partial_fit`` calls.\n\n batch_size_ : int\n Inferred batch size from ``batch_size``.\n\n Examples\n --------\n >>> from sklearn.datasets import load_digits\n >>> from sklearn.decomposition import IncrementalPCA\n >>> from scipy import sparse\n >>> X, _ = load_digits(return_X_y=True)\n >>> transformer = IncrementalPCA(n_components=7, batch_size=200)\n >>> # either partially fit on smaller batches of data\n >>> transformer.partial_fit(X[:100, :])\n IncrementalPCA(batch_size=200, n_components=7)\n >>> # or let the fit function itself divide the data into batches\n >>> X_sparse = sparse.csr_matrix(X)\n >>> X_transformed = transformer.fit_transform(X_sparse)\n >>> X_transformed.shape\n (1797, 7)\n\n Notes\n -----\n Implements the incremental PCA model from:\n *D. Ross, J. Lim, R. Lin, M. Yang, Incremental Learning for Robust Visual\n Tracking, International Journal of Computer Vision, Volume 77, Issue 1-3,\n pp. 125-141, May 2008.*\n See https://www.cs.toronto.edu/~dross/ivt/RossLimLinYang_ijcv.pdf\n\n This model is an extension of the Sequential Karhunen-Loeve Transform from:\n *A. Levy and M. Lindenbaum, Sequential Karhunen-Loeve Basis Extraction and\n its Application to Images, IEEE Transactions on Image Processing, Volume 9,\n Number 8, pp. 1371-1374, August 2000.*\n See https://www.cs.technion.ac.il/~mic/doc/skl-ip.pdf\n\n We have specifically abstained from an optimization used by authors of both\n papers, a QR decomposition used in specific situations to reduce the\n algorithmic complexity of the SVD. The source for this technique is\n *Matrix Computations, Third Edition, G. Holub and C. Van Loan, Chapter 5,\n section 5.4.4, pp 252-253.*. This technique has been omitted because it is\n advantageous only when decomposing a matrix with ``n_samples`` (rows)\n >= 5/3 * ``n_features`` (columns), and hurts the readability of the\n implemented algorithm. This would be a good opportunity for future\n optimization, if it is deemed necessary.\n\n References\n ----------\n D. Ross, J. Lim, R. Lin, M. Yang. Incremental Learning for Robust Visual\n Tracking, International Journal of Computer Vision, Volume 77,\n Issue 1-3, pp. 125-141, May 2008.\n\n G. Golub and C. Van Loan. Matrix Computations, Third Edition, Chapter 5,\n Section 5.4.4, pp. 252-253.\n\n See Also\n --------\n PCA\n KernelPCA\n SparsePCA\n TruncatedSVD\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_components=None, *, whiten=False, copy=True,\n batch_size=None):\n self.n_components = n_components\n self.whiten = whiten\n self.copy = copy\n self.batch_size = batch_size\n\n def fit(self, X, y=None):\n \"\"\"Fit the model with X, using minibatches of size batch_size.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : Ignored\n\n Returns\n -------\n self : object\n Returns the instance itself.\n \"\"\"\n self.components_ = None\n self.n_samples_seen_ = 0\n self.mean_ = .0\n self.var_ = .0\n self.singular_values_ = None\n self.explained_variance_ = None\n self.explained_variance_ratio_ = None\n self.noise_variance_ = None\n\n X = self._validate_data(X, accept_sparse=['csr', 'csc', 'lil'],\n copy=self.copy, dtype=[np.float64, np.float32])\n n_samples, n_features = X.shape\n\n if self.batch_size is None:\n self.batch_size_ = 5 * n_features\n else:\n self.batch_size_ = self.batch_size\n\n for batch in gen_batches(n_samples, self.batch_size_,\n min_batch_size=self.n_components or 0):\n X_batch = X[batch]\n if sparse.issparse(X_batch):\n X_batch = X_batch.toarray()\n self.partial_fit(X_batch, check_input=False)\n\n return self\n\n def partial_fit(self, X, y=None, check_input=True):\n \"\"\"Incremental fit with X. All of X is processed as a single batch.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples and\n n_features is the number of features.\n\n check_input : bool, default=True\n Run check_array on X.\n\n y : Ignored\n\n Returns\n -------\n self : object\n Returns the instance itself.\n \"\"\"\n first_pass = not hasattr(self, \"components_\")\n if check_input:\n if sparse.issparse(X):\n raise TypeError(\n \"IncrementalPCA.partial_fit does not support \"\n \"sparse input. Either convert data to dense \"\n \"or use IncrementalPCA.fit to do so in batches.\")\n X = self._validate_data(\n X, copy=self.copy, dtype=[np.float64, np.float32],\n reset=first_pass)\n n_samples, n_features = X.shape\n if first_pass:\n self.components_ = None\n\n if self.n_components is None:\n if self.components_ is None:\n self.n_components_ = min(n_samples, n_features)\n else:\n self.n_components_ = self.components_.shape[0]\n elif not 1 <= self.n_components <= n_features:\n raise ValueError(\"n_components=%r invalid for n_features=%d, need \"\n \"more rows than columns for IncrementalPCA \"\n \"processing\" % (self.n_components, n_features))\n elif not self.n_components <= n_samples:\n raise ValueError(\"n_components=%r must be less or equal to \"\n \"the batch number of samples \"\n \"%d.\" % (self.n_components, n_samples))\n else:\n self.n_components_ = self.n_components\n\n if (self.components_ is not None) and (self.components_.shape[0] !=\n self.n_components_):\n raise ValueError(\"Number of input features has changed from %i \"\n \"to %i between calls to partial_fit! Try \"\n \"setting n_components to a fixed value.\" %\n (self.components_.shape[0], self.n_components_))\n\n # This is the first partial_fit\n if not hasattr(self, 'n_samples_seen_'):\n self.n_samples_seen_ = 0\n self.mean_ = .0\n self.var_ = .0\n\n # Update stats - they are 0 if this is the first step\n col_mean, col_var, n_total_samples = \\\n _incremental_mean_and_var(\n X, last_mean=self.mean_, last_variance=self.var_,\n last_sample_count=np.repeat(self.n_samples_seen_, X.shape[1]))\n n_total_samples = n_total_samples[0]\n\n # Whitening\n if self.n_samples_seen_ == 0:\n # If it is the first step, simply whiten X\n X -= col_mean\n else:\n col_batch_mean = np.mean(X, axis=0)\n X -= col_batch_mean\n # Build matrix of combined previous basis and new data\n mean_correction = \\\n np.sqrt((self.n_samples_seen_ / n_total_samples) *\n n_samples) * (self.mean_ - col_batch_mean)\n X = np.vstack((self.singular_values_.reshape((-1, 1)) *\n self.components_, X, mean_correction))\n\n U, S, Vt = linalg.svd(X, full_matrices=False, check_finite=False)\n U, Vt = svd_flip(U, Vt, u_based_decision=False)\n explained_variance = S ** 2 / (n_total_samples - 1)\n explained_variance_ratio = S ** 2 / np.sum(col_var * n_total_samples)\n\n self.n_samples_seen_ = n_total_samples\n self.components_ = Vt[:self.n_components_]\n self.singular_values_ = S[:self.n_components_]\n self.mean_ = col_mean\n self.var_ = col_var\n self.explained_variance_ = explained_variance[:self.n_components_]\n self.explained_variance_ratio_ = \\\n explained_variance_ratio[:self.n_components_]\n if self.n_components_ < n_features:\n self.noise_variance_ = \\\n explained_variance[self.n_components_:].mean()\n else:\n self.noise_variance_ = 0.\n return self\n\n def transform(self, X):\n \"\"\"Apply dimensionality reduction to X.\n\n X is projected on the first principal components previously extracted\n from a training set, using minibatches of size batch_size if X is\n sparse.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data, where n_samples is the number of samples\n and n_features is the number of features.\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n\n Examples\n --------\n\n >>> import numpy as np\n >>> from sklearn.decomposition import IncrementalPCA\n >>> X = np.array([[-1, -1], [-2, -1], [-3, -2],\n ... [1, 1], [2, 1], [3, 2]])\n >>> ipca = IncrementalPCA(n_components=2, batch_size=3)\n >>> ipca.fit(X)\n IncrementalPCA(batch_size=3, n_components=2)\n >>> ipca.transform(X) # doctest: +SKIP\n \"\"\"\n if sparse.issparse(X):\n n_samples = X.shape[0]\n output = []\n for batch in gen_batches(n_samples, self.batch_size_,\n min_batch_size=self.n_components or 0):\n output.append(super().transform(X[batch].toarray()))\n return np.vstack(output)\n else:\n return super().transform(X)", + "instance_attributes": [ + { + "name": "whiten", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "copy", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "n_samples_seen_", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "mean_", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "var_", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA", + "name": "KernelPCA", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/__init__", + "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/_pairwise@getter", + "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/_get_kernel", + "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/_fit_transform", + "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/_fit_inverse_transform", + "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/fit", + "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/fit_transform", + "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/transform", + "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/inverse_transform", + "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Kernel Principal component analysis (KPCA).\n\nNon-linear dimensionality reduction through the use of kernels (see\n:ref:`metrics`).\n\nRead more in the :ref:`User Guide `.", + "docstring": "Kernel Principal component analysis (KPCA).\n\nNon-linear dimensionality reduction through the use of kernels (see\n:ref:`metrics`).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=None\n Number of components. If None, all non-zero components are kept.\n\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed'}, default='linear'\n Kernel used for PCA.\n\ngamma : float, default=None\n Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other\n kernels. If ``gamma`` is ``None``, then it is set to ``1/n_features``.\n\ndegree : int, default=3\n Degree for poly kernels. Ignored by other kernels.\n\ncoef0 : float, default=1\n Independent term in poly and sigmoid kernels.\n Ignored by other kernels.\n\nkernel_params : dict, default=None\n Parameters (keyword arguments) and\n values for kernel passed as callable object.\n Ignored by other kernels.\n\nalpha : float, default=1.0\n Hyperparameter of the ridge regression that learns the\n inverse transform (when fit_inverse_transform=True).\n\nfit_inverse_transform : bool, default=False\n Learn the inverse transform for non-precomputed kernels.\n (i.e. learn to find the pre-image of a point)\n\neigen_solver : {'auto', 'dense', 'arpack'}, default='auto'\n Select eigensolver to use. If n_components is much less than\n the number of training samples, arpack may be more efficient\n than the dense eigensolver.\n\ntol : float, default=0\n Convergence tolerance for arpack.\n If 0, optimal value will be chosen by arpack.\n\nmax_iter : int, default=None\n Maximum number of iterations for arpack.\n If None, optimal value will be chosen by arpack.\n\nremove_zero_eig : bool, default=False\n If True, then all components with zero eigenvalues are removed, so\n that the number of components in the output may be < n_components\n (and sometimes even zero due to numerical instability).\n When n_components is None, this parameter is ignored and components\n with zero eigenvalues are removed regardless.\n\nrandom_state : int, RandomState instance or None, default=None\n Used when ``eigen_solver`` == 'arpack'. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\n .. versionadded:: 0.18\n\ncopy_X : bool, default=True\n If True, input X is copied and stored by the model in the `X_fit_`\n attribute. If no further changes will be done to X, setting\n `copy_X=False` saves memory by storing a reference.\n\n .. versionadded:: 0.18\n\nn_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.18\n\nAttributes\n----------\nlambdas_ : ndarray of shape (n_components,)\n Eigenvalues of the centered kernel matrix in decreasing order.\n If `n_components` and `remove_zero_eig` are not set,\n then all values are stored.\n\nalphas_ : ndarray of shape (n_samples, n_components)\n Eigenvectors of the centered kernel matrix. If `n_components` and\n `remove_zero_eig` are not set, then all components are stored.\n\ndual_coef_ : ndarray of shape (n_samples, n_features)\n Inverse transform matrix. Only available when\n ``fit_inverse_transform`` is True.\n\nX_transformed_fit_ : ndarray of shape (n_samples, n_components)\n Projection of the fitted data on the kernel principal components.\n Only available when ``fit_inverse_transform`` is True.\n\nX_fit_ : ndarray of shape (n_samples, n_features)\n The data used to fit the model. If `copy_X=False`, then `X_fit_` is\n a reference. This attribute is used for the calls to transform.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.decomposition import KernelPCA\n>>> X, _ = load_digits(return_X_y=True)\n>>> transformer = KernelPCA(n_components=7, kernel='linear')\n>>> X_transformed = transformer.fit_transform(X)\n>>> X_transformed.shape\n(1797, 7)\n\nReferences\n----------\nKernel PCA was introduced in:\n Bernhard Schoelkopf, Alexander J. Smola,\n and Klaus-Robert Mueller. 1999. Kernel principal\n component analysis. In Advances in kernel methods,\n MIT Press, Cambridge, MA, USA 327-352.", + "code": "class KernelPCA(TransformerMixin, BaseEstimator):\n \"\"\"Kernel Principal component analysis (KPCA).\n\n Non-linear dimensionality reduction through the use of kernels (see\n :ref:`metrics`).\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_components : int, default=None\n Number of components. If None, all non-zero components are kept.\n\n kernel : {'linear', 'poly', \\\n 'rbf', 'sigmoid', 'cosine', 'precomputed'}, default='linear'\n Kernel used for PCA.\n\n gamma : float, default=None\n Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other\n kernels. If ``gamma`` is ``None``, then it is set to ``1/n_features``.\n\n degree : int, default=3\n Degree for poly kernels. Ignored by other kernels.\n\n coef0 : float, default=1\n Independent term in poly and sigmoid kernels.\n Ignored by other kernels.\n\n kernel_params : dict, default=None\n Parameters (keyword arguments) and\n values for kernel passed as callable object.\n Ignored by other kernels.\n\n alpha : float, default=1.0\n Hyperparameter of the ridge regression that learns the\n inverse transform (when fit_inverse_transform=True).\n\n fit_inverse_transform : bool, default=False\n Learn the inverse transform for non-precomputed kernels.\n (i.e. learn to find the pre-image of a point)\n\n eigen_solver : {'auto', 'dense', 'arpack'}, default='auto'\n Select eigensolver to use. If n_components is much less than\n the number of training samples, arpack may be more efficient\n than the dense eigensolver.\n\n tol : float, default=0\n Convergence tolerance for arpack.\n If 0, optimal value will be chosen by arpack.\n\n max_iter : int, default=None\n Maximum number of iterations for arpack.\n If None, optimal value will be chosen by arpack.\n\n remove_zero_eig : bool, default=False\n If True, then all components with zero eigenvalues are removed, so\n that the number of components in the output may be < n_components\n (and sometimes even zero due to numerical instability).\n When n_components is None, this parameter is ignored and components\n with zero eigenvalues are removed regardless.\n\n random_state : int, RandomState instance or None, default=None\n Used when ``eigen_solver`` == 'arpack'. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\n .. versionadded:: 0.18\n\n copy_X : bool, default=True\n If True, input X is copied and stored by the model in the `X_fit_`\n attribute. If no further changes will be done to X, setting\n `copy_X=False` saves memory by storing a reference.\n\n .. versionadded:: 0.18\n\n n_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.18\n\n Attributes\n ----------\n lambdas_ : ndarray of shape (n_components,)\n Eigenvalues of the centered kernel matrix in decreasing order.\n If `n_components` and `remove_zero_eig` are not set,\n then all values are stored.\n\n alphas_ : ndarray of shape (n_samples, n_components)\n Eigenvectors of the centered kernel matrix. If `n_components` and\n `remove_zero_eig` are not set, then all components are stored.\n\n dual_coef_ : ndarray of shape (n_samples, n_features)\n Inverse transform matrix. Only available when\n ``fit_inverse_transform`` is True.\n\n X_transformed_fit_ : ndarray of shape (n_samples, n_components)\n Projection of the fitted data on the kernel principal components.\n Only available when ``fit_inverse_transform`` is True.\n\n X_fit_ : ndarray of shape (n_samples, n_features)\n The data used to fit the model. If `copy_X=False`, then `X_fit_` is\n a reference. This attribute is used for the calls to transform.\n\n Examples\n --------\n >>> from sklearn.datasets import load_digits\n >>> from sklearn.decomposition import KernelPCA\n >>> X, _ = load_digits(return_X_y=True)\n >>> transformer = KernelPCA(n_components=7, kernel='linear')\n >>> X_transformed = transformer.fit_transform(X)\n >>> X_transformed.shape\n (1797, 7)\n\n References\n ----------\n Kernel PCA was introduced in:\n Bernhard Schoelkopf, Alexander J. Smola,\n and Klaus-Robert Mueller. 1999. Kernel principal\n component analysis. In Advances in kernel methods,\n MIT Press, Cambridge, MA, USA 327-352.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_components=None, *, kernel=\"linear\",\n gamma=None, degree=3, coef0=1, kernel_params=None,\n alpha=1.0, fit_inverse_transform=False, eigen_solver='auto',\n tol=0, max_iter=None, remove_zero_eig=False,\n random_state=None, copy_X=True, n_jobs=None):\n if fit_inverse_transform and kernel == 'precomputed':\n raise ValueError(\n \"Cannot fit_inverse_transform with a precomputed kernel.\")\n self.n_components = n_components\n self.kernel = kernel\n self.kernel_params = kernel_params\n self.gamma = gamma\n self.degree = degree\n self.coef0 = coef0\n self.alpha = alpha\n self.fit_inverse_transform = fit_inverse_transform\n self.eigen_solver = eigen_solver\n self.remove_zero_eig = remove_zero_eig\n self.tol = tol\n self.max_iter = max_iter\n self.random_state = random_state\n self.n_jobs = n_jobs\n self.copy_X = copy_X\n\n # TODO: Remove in 1.1\n # mypy error: Decorated property not supported\n @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n return self.kernel == \"precomputed\"\n\n def _get_kernel(self, X, Y=None):\n if callable(self.kernel):\n params = self.kernel_params or {}\n else:\n params = {\"gamma\": self.gamma,\n \"degree\": self.degree,\n \"coef0\": self.coef0}\n return pairwise_kernels(X, Y, metric=self.kernel,\n filter_params=True, n_jobs=self.n_jobs,\n **params)\n\n def _fit_transform(self, K):\n \"\"\" Fit's using kernel K\"\"\"\n # center kernel\n K = self._centerer.fit_transform(K)\n\n if self.n_components is None:\n n_components = K.shape[0]\n else:\n n_components = min(K.shape[0], self.n_components)\n\n # compute eigenvectors\n if self.eigen_solver == 'auto':\n if K.shape[0] > 200 and n_components < 10:\n eigen_solver = 'arpack'\n else:\n eigen_solver = 'dense'\n else:\n eigen_solver = self.eigen_solver\n\n if eigen_solver == 'dense':\n self.lambdas_, self.alphas_ = linalg.eigh(\n K, eigvals=(K.shape[0] - n_components, K.shape[0] - 1))\n elif eigen_solver == 'arpack':\n v0 = _init_arpack_v0(K.shape[0], self.random_state)\n self.lambdas_, self.alphas_ = eigsh(K, n_components,\n which=\"LA\",\n tol=self.tol,\n maxiter=self.max_iter,\n v0=v0)\n\n # make sure that the eigenvalues are ok and fix numerical issues\n self.lambdas_ = _check_psd_eigenvalues(self.lambdas_,\n enable_warnings=False)\n\n # flip eigenvectors' sign to enforce deterministic output\n self.alphas_, _ = svd_flip(self.alphas_,\n np.zeros_like(self.alphas_).T)\n\n # sort eigenvectors in descending order\n indices = self.lambdas_.argsort()[::-1]\n self.lambdas_ = self.lambdas_[indices]\n self.alphas_ = self.alphas_[:, indices]\n\n # remove eigenvectors with a zero eigenvalue (null space) if required\n if self.remove_zero_eig or self.n_components is None:\n self.alphas_ = self.alphas_[:, self.lambdas_ > 0]\n self.lambdas_ = self.lambdas_[self.lambdas_ > 0]\n\n # Maintenance note on Eigenvectors normalization\n # ----------------------------------------------\n # there is a link between\n # the eigenvectors of K=Phi(X)'Phi(X) and the ones of Phi(X)Phi(X)'\n # if v is an eigenvector of K\n # then Phi(X)v is an eigenvector of Phi(X)Phi(X)'\n # if u is an eigenvector of Phi(X)Phi(X)'\n # then Phi(X)'u is an eigenvector of Phi(X)'Phi(X)\n #\n # At this stage our self.alphas_ (the v) have norm 1, we need to scale\n # them so that eigenvectors in kernel feature space (the u) have norm=1\n # instead\n #\n # We COULD scale them here:\n # self.alphas_ = self.alphas_ / np.sqrt(self.lambdas_)\n #\n # But choose to perform that LATER when needed, in `fit()` and in\n # `transform()`.\n\n return K\n\n def _fit_inverse_transform(self, X_transformed, X):\n if hasattr(X, \"tocsr\"):\n raise NotImplementedError(\"Inverse transform not implemented for \"\n \"sparse matrices!\")\n\n n_samples = X_transformed.shape[0]\n K = self._get_kernel(X_transformed)\n K.flat[::n_samples + 1] += self.alpha\n self.dual_coef_ = linalg.solve(K, X, sym_pos=True, overwrite_a=True)\n self.X_transformed_fit_ = X_transformed\n\n def fit(self, X, y=None):\n \"\"\"Fit the model from data in X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\n Returns\n -------\n self : object\n Returns the instance itself.\n \"\"\"\n X = self._validate_data(X, accept_sparse='csr', copy=self.copy_X)\n self._centerer = KernelCenterer()\n K = self._get_kernel(X)\n self._fit_transform(K)\n\n if self.fit_inverse_transform:\n # no need to use the kernel to transform X, use shortcut expression\n X_transformed = self.alphas_ * np.sqrt(self.lambdas_)\n\n self._fit_inverse_transform(X_transformed, X)\n\n self.X_fit_ = X\n return self\n\n def fit_transform(self, X, y=None, **params):\n \"\"\"Fit the model from data in X and transform X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n \"\"\"\n self.fit(X, **params)\n\n # no need to use the kernel to transform X, use shortcut expression\n X_transformed = self.alphas_ * np.sqrt(self.lambdas_)\n\n if self.fit_inverse_transform:\n self._fit_inverse_transform(X_transformed, X)\n\n return X_transformed\n\n def transform(self, X):\n \"\"\"Transform X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n \"\"\"\n check_is_fitted(self)\n X = self._validate_data(X, accept_sparse='csr', reset=False)\n\n # Compute centered gram matrix between X and training data X_fit_\n K = self._centerer.transform(self._get_kernel(X, self.X_fit_))\n\n # scale eigenvectors (properly account for null-space for dot product)\n non_zeros = np.flatnonzero(self.lambdas_)\n scaled_alphas = np.zeros_like(self.alphas_)\n scaled_alphas[:, non_zeros] = (self.alphas_[:, non_zeros]\n / np.sqrt(self.lambdas_[non_zeros]))\n\n # Project with a scalar product between K and the scaled eigenvectors\n return np.dot(K, scaled_alphas)\n\n def inverse_transform(self, X):\n \"\"\"Transform X back to original space.\n\n ``inverse_transform`` approximates the inverse transformation using\n a learned pre-image. The pre-image is learned by kernel ridge\n regression of the original data on their low-dimensional representation\n vectors.\n\n .. note:\n :meth:`~sklearn.decomposition.fit` internally uses a centered\n kernel. As the centered kernel no longer contains the information\n of the mean of kernel features, such information is not taken into\n account in reconstruction.\n\n .. note::\n When users want to compute inverse transformation for 'linear'\n kernel, it is recommended that they use\n :class:`~sklearn.decomposition.PCA` instead. Unlike\n :class:`~sklearn.decomposition.PCA`,\n :class:`~sklearn.decomposition.KernelPCA`'s ``inverse_transform``\n does not reconstruct the mean of data when 'linear' kernel is used\n due to the use of centered kernel.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_components)\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_features)\n\n References\n ----------\n \"Learning to Find Pre-Images\", G BakIr et al, 2004.\n \"\"\"\n if not self.fit_inverse_transform:\n raise NotFittedError(\"The fit_inverse_transform parameter was not\"\n \" set to True when instantiating and hence \"\n \"the inverse transform is not available.\")\n\n K = self._get_kernel(X, self.X_transformed_fit_)\n return np.dot(K, self.dual_coef_)\n\n def _more_tags(self):\n return {'preserves_dtype': [np.float64, np.float32],\n 'pairwise': self.kernel == 'precomputed'}", + "instance_attributes": [ + { + "name": "kernel", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "degree", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "coef0", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "fit_inverse_transform", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "eigen_solver", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "remove_zero_eig", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "copy_X", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation", + "name": "LatentDirichletAllocation", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/__init__", + "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_check_params", + "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_init_latent_vars", + "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_e_step", + "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_em_step", + "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_more_tags", + "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_check_non_neg_array", + "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/partial_fit", + "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/fit", + "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_unnormalized_transform", + "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/transform", + "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_approx_bound", + "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/score", + "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_perplexity_precomp_distr", + "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/perplexity" + ], + "is_public": false, + "reexported_by": [], + "description": "Latent Dirichlet Allocation with online variational Bayes algorithm\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide `.", + "docstring": "Latent Dirichlet Allocation with online variational Bayes algorithm\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=10\n Number of topics.\n\n .. versionchanged:: 0.19\n ``n_topics`` was renamed to ``n_components``\n\ndoc_topic_prior : float, default=None\n Prior of document topic distribution `theta`. If the value is None,\n defaults to `1 / n_components`.\n In [1]_, this is called `alpha`.\n\ntopic_word_prior : float, default=None\n Prior of topic word distribution `beta`. If the value is None, defaults\n to `1 / n_components`.\n In [1]_, this is called `eta`.\n\nlearning_method : {'batch', 'online'}, default='batch'\n Method used to update `_component`. Only used in :meth:`fit` method.\n In general, if the data size is large, the online update will be much\n faster than the batch update.\n\n Valid options::\n\n 'batch': Batch variational Bayes method. Use all training data in\n each EM update.\n Old `components_` will be overwritten in each iteration.\n 'online': Online variational Bayes method. In each EM update, use\n mini-batch of training data to update the ``components_``\n variable incrementally. The learning rate is controlled by the\n ``learning_decay`` and the ``learning_offset`` parameters.\n\n .. versionchanged:: 0.20\n The default learning method is now ``\"batch\"``.\n\nlearning_decay : float, default=0.7\n It is a parameter that control learning rate in the online learning\n method. The value should be set between (0.5, 1.0] to guarantee\n asymptotic convergence. When the value is 0.0 and batch_size is\n ``n_samples``, the update method is same as batch learning. In the\n literature, this is called kappa.\n\nlearning_offset : float, default=10.\n A (positive) parameter that downweights early iterations in online\n learning. It should be greater than 1.0. In the literature, this is\n called tau_0.\n\nmax_iter : int, default=10\n The maximum number of iterations.\n\nbatch_size : int, default=128\n Number of documents to use in each EM iteration. Only used in online\n learning.\n\nevaluate_every : int, default=-1\n How often to evaluate perplexity. Only used in `fit` method.\n set it to 0 or negative number to not evaluate perplexity in\n training at all. Evaluating perplexity can help you check convergence\n in training process, but it will also increase total training time.\n Evaluating perplexity in every iteration might increase training time\n up to two-fold.\n\ntotal_samples : int, default=1e6\n Total number of documents. Only used in the :meth:`partial_fit` method.\n\nperp_tol : float, default=1e-1\n Perplexity tolerance in batch learning. Only used when\n ``evaluate_every`` is greater than 0.\n\nmean_change_tol : float, default=1e-3\n Stopping tolerance for updating document topic distribution in E-step.\n\nmax_doc_update_iter : int, default=100\n Max number of iterations for updating document topic distribution in\n the E-step.\n\nn_jobs : int, default=None\n The number of jobs to use in the E-step.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : int, default=0\n Verbosity level.\n\nrandom_state : int, RandomState instance or None, default=None\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Variational parameters for topic word distribution. Since the complete\n conditional for topic word distribution is a Dirichlet,\n ``components_[i, j]`` can be viewed as pseudocount that represents the\n number of times word `j` was assigned to topic `i`.\n It can also be viewed as distribution over the words for each topic\n after normalization:\n ``model.components_ / model.components_.sum(axis=1)[:, np.newaxis]``.\n\nexp_dirichlet_component_ : ndarray of shape (n_components, n_features)\n Exponential value of expectation of log topic word distribution.\n In the literature, this is `exp(E[log(beta)])`.\n\nn_batch_iter_ : int\n Number of iterations of the EM step.\n\nn_iter_ : int\n Number of passes over the dataset.\n\nbound_ : float\n Final perplexity score on training set.\n\ndoc_topic_prior_ : float\n Prior of document topic distribution `theta`. If the value is None,\n it is `1 / n_components`.\n\nrandom_state_ : RandomState instance\n RandomState instance that is generated either from a seed, the random\n number generator or by `np.random`.\n\ntopic_word_prior_ : float\n Prior of topic word distribution `beta`. If the value is None, it is\n `1 / n_components`.\n\nExamples\n--------\n>>> from sklearn.decomposition import LatentDirichletAllocation\n>>> from sklearn.datasets import make_multilabel_classification\n>>> # This produces a feature matrix of token counts, similar to what\n>>> # CountVectorizer would produce on text.\n>>> X, _ = make_multilabel_classification(random_state=0)\n>>> lda = LatentDirichletAllocation(n_components=5,\n... random_state=0)\n>>> lda.fit(X)\nLatentDirichletAllocation(...)\n>>> # get topics for some given samples:\n>>> lda.transform(X[-2:])\narray([[0.00360392, 0.25499205, 0.0036211 , 0.64236448, 0.09541846],\n [0.15297572, 0.00362644, 0.44412786, 0.39568399, 0.003586 ]])\n\nReferences\n----------\n.. [1] \"Online Learning for Latent Dirichlet Allocation\", Matthew D.\n Hoffman, David M. Blei, Francis Bach, 2010\n\n[2] \"Stochastic Variational Inference\", Matthew D. Hoffman, David M. Blei,\n Chong Wang, John Paisley, 2013\n\n[3] Matthew D. Hoffman's onlineldavb code. Link:\n https://github.com/blei-lab/onlineldavb", + "code": "class LatentDirichletAllocation(TransformerMixin, BaseEstimator):\n \"\"\"Latent Dirichlet Allocation with online variational Bayes algorithm\n\n .. versionadded:: 0.17\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_components : int, default=10\n Number of topics.\n\n .. versionchanged:: 0.19\n ``n_topics`` was renamed to ``n_components``\n\n doc_topic_prior : float, default=None\n Prior of document topic distribution `theta`. If the value is None,\n defaults to `1 / n_components`.\n In [1]_, this is called `alpha`.\n\n topic_word_prior : float, default=None\n Prior of topic word distribution `beta`. If the value is None, defaults\n to `1 / n_components`.\n In [1]_, this is called `eta`.\n\n learning_method : {'batch', 'online'}, default='batch'\n Method used to update `_component`. Only used in :meth:`fit` method.\n In general, if the data size is large, the online update will be much\n faster than the batch update.\n\n Valid options::\n\n 'batch': Batch variational Bayes method. Use all training data in\n each EM update.\n Old `components_` will be overwritten in each iteration.\n 'online': Online variational Bayes method. In each EM update, use\n mini-batch of training data to update the ``components_``\n variable incrementally. The learning rate is controlled by the\n ``learning_decay`` and the ``learning_offset`` parameters.\n\n .. versionchanged:: 0.20\n The default learning method is now ``\"batch\"``.\n\n learning_decay : float, default=0.7\n It is a parameter that control learning rate in the online learning\n method. The value should be set between (0.5, 1.0] to guarantee\n asymptotic convergence. When the value is 0.0 and batch_size is\n ``n_samples``, the update method is same as batch learning. In the\n literature, this is called kappa.\n\n learning_offset : float, default=10.\n A (positive) parameter that downweights early iterations in online\n learning. It should be greater than 1.0. In the literature, this is\n called tau_0.\n\n max_iter : int, default=10\n The maximum number of iterations.\n\n batch_size : int, default=128\n Number of documents to use in each EM iteration. Only used in online\n learning.\n\n evaluate_every : int, default=-1\n How often to evaluate perplexity. Only used in `fit` method.\n set it to 0 or negative number to not evaluate perplexity in\n training at all. Evaluating perplexity can help you check convergence\n in training process, but it will also increase total training time.\n Evaluating perplexity in every iteration might increase training time\n up to two-fold.\n\n total_samples : int, default=1e6\n Total number of documents. Only used in the :meth:`partial_fit` method.\n\n perp_tol : float, default=1e-1\n Perplexity tolerance in batch learning. Only used when\n ``evaluate_every`` is greater than 0.\n\n mean_change_tol : float, default=1e-3\n Stopping tolerance for updating document topic distribution in E-step.\n\n max_doc_update_iter : int, default=100\n Max number of iterations for updating document topic distribution in\n the E-step.\n\n n_jobs : int, default=None\n The number of jobs to use in the E-step.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n verbose : int, default=0\n Verbosity level.\n\n random_state : int, RandomState instance or None, default=None\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\n Attributes\n ----------\n components_ : ndarray of shape (n_components, n_features)\n Variational parameters for topic word distribution. Since the complete\n conditional for topic word distribution is a Dirichlet,\n ``components_[i, j]`` can be viewed as pseudocount that represents the\n number of times word `j` was assigned to topic `i`.\n It can also be viewed as distribution over the words for each topic\n after normalization:\n ``model.components_ / model.components_.sum(axis=1)[:, np.newaxis]``.\n\n exp_dirichlet_component_ : ndarray of shape (n_components, n_features)\n Exponential value of expectation of log topic word distribution.\n In the literature, this is `exp(E[log(beta)])`.\n\n n_batch_iter_ : int\n Number of iterations of the EM step.\n\n n_iter_ : int\n Number of passes over the dataset.\n\n bound_ : float\n Final perplexity score on training set.\n\n doc_topic_prior_ : float\n Prior of document topic distribution `theta`. If the value is None,\n it is `1 / n_components`.\n\n random_state_ : RandomState instance\n RandomState instance that is generated either from a seed, the random\n number generator or by `np.random`.\n\n topic_word_prior_ : float\n Prior of topic word distribution `beta`. If the value is None, it is\n `1 / n_components`.\n\n Examples\n --------\n >>> from sklearn.decomposition import LatentDirichletAllocation\n >>> from sklearn.datasets import make_multilabel_classification\n >>> # This produces a feature matrix of token counts, similar to what\n >>> # CountVectorizer would produce on text.\n >>> X, _ = make_multilabel_classification(random_state=0)\n >>> lda = LatentDirichletAllocation(n_components=5,\n ... random_state=0)\n >>> lda.fit(X)\n LatentDirichletAllocation(...)\n >>> # get topics for some given samples:\n >>> lda.transform(X[-2:])\n array([[0.00360392, 0.25499205, 0.0036211 , 0.64236448, 0.09541846],\n [0.15297572, 0.00362644, 0.44412786, 0.39568399, 0.003586 ]])\n\n References\n ----------\n .. [1] \"Online Learning for Latent Dirichlet Allocation\", Matthew D.\n Hoffman, David M. Blei, Francis Bach, 2010\n\n [2] \"Stochastic Variational Inference\", Matthew D. Hoffman, David M. Blei,\n Chong Wang, John Paisley, 2013\n\n [3] Matthew D. Hoffman's onlineldavb code. Link:\n https://github.com/blei-lab/onlineldavb\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_components=10, *, doc_topic_prior=None,\n topic_word_prior=None, learning_method='batch',\n learning_decay=.7, learning_offset=10., max_iter=10,\n batch_size=128, evaluate_every=-1, total_samples=1e6,\n perp_tol=1e-1, mean_change_tol=1e-3, max_doc_update_iter=100,\n n_jobs=None, verbose=0, random_state=None):\n self.n_components = n_components\n self.doc_topic_prior = doc_topic_prior\n self.topic_word_prior = topic_word_prior\n self.learning_method = learning_method\n self.learning_decay = learning_decay\n self.learning_offset = learning_offset\n self.max_iter = max_iter\n self.batch_size = batch_size\n self.evaluate_every = evaluate_every\n self.total_samples = total_samples\n self.perp_tol = perp_tol\n self.mean_change_tol = mean_change_tol\n self.max_doc_update_iter = max_doc_update_iter\n self.n_jobs = n_jobs\n self.verbose = verbose\n self.random_state = random_state\n\n def _check_params(self):\n \"\"\"Check model parameters.\"\"\"\n if self.n_components <= 0:\n raise ValueError(\"Invalid 'n_components' parameter: %r\"\n % self.n_components)\n\n if self.total_samples <= 0:\n raise ValueError(\"Invalid 'total_samples' parameter: %r\"\n % self.total_samples)\n\n if self.learning_offset < 0:\n raise ValueError(\"Invalid 'learning_offset' parameter: %r\"\n % self.learning_offset)\n\n if self.learning_method not in (\"batch\", \"online\"):\n raise ValueError(\"Invalid 'learning_method' parameter: %r\"\n % self.learning_method)\n\n def _init_latent_vars(self, n_features):\n \"\"\"Initialize latent variables.\"\"\"\n\n self.random_state_ = check_random_state(self.random_state)\n self.n_batch_iter_ = 1\n self.n_iter_ = 0\n\n if self.doc_topic_prior is None:\n self.doc_topic_prior_ = 1. / self.n_components\n else:\n self.doc_topic_prior_ = self.doc_topic_prior\n\n if self.topic_word_prior is None:\n self.topic_word_prior_ = 1. / self.n_components\n else:\n self.topic_word_prior_ = self.topic_word_prior\n\n init_gamma = 100.\n init_var = 1. / init_gamma\n # In the literature, this is called `lambda`\n self.components_ = self.random_state_.gamma(\n init_gamma, init_var, (self.n_components, n_features))\n\n # In the literature, this is `exp(E[log(beta)])`\n self.exp_dirichlet_component_ = np.exp(\n _dirichlet_expectation_2d(self.components_))\n\n def _e_step(self, X, cal_sstats, random_init, parallel=None):\n \"\"\"E-step in EM update.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\n cal_sstats : bool\n Parameter that indicate whether to calculate sufficient statistics\n or not. Set ``cal_sstats`` to True when we need to run M-step.\n\n random_init : bool\n Parameter that indicate whether to initialize document topic\n distribution randomly in the E-step. Set it to True in training\n steps.\n\n parallel : joblib.Parallel, default=None\n Pre-initialized instance of joblib.Parallel.\n\n Returns\n -------\n (doc_topic_distr, suff_stats) :\n `doc_topic_distr` is unnormalized topic distribution for each\n document. In the literature, this is called `gamma`.\n `suff_stats` is expected sufficient statistics for the M-step.\n When `cal_sstats == False`, it will be None.\n\n \"\"\"\n\n # Run e-step in parallel\n random_state = self.random_state_ if random_init else None\n\n # TODO: make Parallel._effective_n_jobs public instead?\n n_jobs = effective_n_jobs(self.n_jobs)\n if parallel is None:\n parallel = Parallel(n_jobs=n_jobs, verbose=max(0,\n self.verbose - 1))\n results = parallel(\n delayed(_update_doc_distribution)(X[idx_slice, :],\n self.exp_dirichlet_component_,\n self.doc_topic_prior_,\n self.max_doc_update_iter,\n self.mean_change_tol, cal_sstats,\n random_state)\n for idx_slice in gen_even_slices(X.shape[0], n_jobs))\n\n # merge result\n doc_topics, sstats_list = zip(*results)\n doc_topic_distr = np.vstack(doc_topics)\n\n if cal_sstats:\n # This step finishes computing the sufficient statistics for the\n # M-step.\n suff_stats = np.zeros(self.components_.shape)\n for sstats in sstats_list:\n suff_stats += sstats\n suff_stats *= self.exp_dirichlet_component_\n else:\n suff_stats = None\n\n return (doc_topic_distr, suff_stats)\n\n def _em_step(self, X, total_samples, batch_update, parallel=None):\n \"\"\"EM update for 1 iteration.\n\n update `_component` by batch VB or online VB.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\n total_samples : int\n Total number of documents. It is only used when\n batch_update is `False`.\n\n batch_update : bool\n Parameter that controls updating method.\n `True` for batch learning, `False` for online learning.\n\n parallel : joblib.Parallel, default=None\n Pre-initialized instance of joblib.Parallel\n\n Returns\n -------\n doc_topic_distr : ndarray of shape (n_samples, n_components)\n Unnormalized document topic distribution.\n \"\"\"\n\n # E-step\n _, suff_stats = self._e_step(X, cal_sstats=True, random_init=True,\n parallel=parallel)\n\n # M-step\n if batch_update:\n self.components_ = self.topic_word_prior_ + suff_stats\n else:\n # online update\n # In the literature, the weight is `rho`\n weight = np.power(self.learning_offset + self.n_batch_iter_,\n -self.learning_decay)\n doc_ratio = float(total_samples) / X.shape[0]\n self.components_ *= (1 - weight)\n self.components_ += (weight * (self.topic_word_prior_\n + doc_ratio * suff_stats))\n\n # update `component_` related variables\n self.exp_dirichlet_component_ = np.exp(\n _dirichlet_expectation_2d(self.components_))\n self.n_batch_iter_ += 1\n return\n\n def _more_tags(self):\n return {'requires_positive_X': True}\n\n def _check_non_neg_array(self, X, reset_n_features, whom):\n \"\"\"check X format\n\n check X format and make sure no negative value in X.\n\n Parameters\n ----------\n X : array-like or sparse matrix\n\n \"\"\"\n X = self._validate_data(X, reset=reset_n_features,\n accept_sparse='csr')\n check_non_negative(X, whom)\n return X\n\n def partial_fit(self, X, y=None):\n \"\"\"Online VB with Mini-Batch update.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\n y : Ignored\n\n Returns\n -------\n self\n \"\"\"\n self._check_params()\n first_time = not hasattr(self, 'components_')\n X = self._check_non_neg_array(\n X, reset_n_features=first_time,\n whom=\"LatentDirichletAllocation.partial_fit\")\n n_samples, n_features = X.shape\n batch_size = self.batch_size\n\n # initialize parameters or check\n if first_time:\n self._init_latent_vars(n_features)\n\n if n_features != self.components_.shape[1]:\n raise ValueError(\n \"The provided data has %d dimensions while \"\n \"the model was trained with feature size %d.\" %\n (n_features, self.components_.shape[1]))\n\n n_jobs = effective_n_jobs(self.n_jobs)\n with Parallel(n_jobs=n_jobs,\n verbose=max(0, self.verbose - 1)) as parallel:\n for idx_slice in gen_batches(n_samples, batch_size):\n self._em_step(X[idx_slice, :],\n total_samples=self.total_samples,\n batch_update=False,\n parallel=parallel)\n\n return self\n\n def fit(self, X, y=None):\n \"\"\"Learn model for the data X with variational Bayes method.\n\n When `learning_method` is 'online', use mini-batch update.\n Otherwise, use batch update.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\n y : Ignored\n\n Returns\n -------\n self\n \"\"\"\n self._check_params()\n X = self._check_non_neg_array(X, reset_n_features=True,\n whom=\"LatentDirichletAllocation.fit\")\n n_samples, n_features = X.shape\n max_iter = self.max_iter\n evaluate_every = self.evaluate_every\n learning_method = self.learning_method\n\n batch_size = self.batch_size\n\n # initialize parameters\n self._init_latent_vars(n_features)\n # change to perplexity later\n last_bound = None\n n_jobs = effective_n_jobs(self.n_jobs)\n with Parallel(n_jobs=n_jobs,\n verbose=max(0, self.verbose - 1)) as parallel:\n for i in range(max_iter):\n if learning_method == 'online':\n for idx_slice in gen_batches(n_samples, batch_size):\n self._em_step(X[idx_slice, :], total_samples=n_samples,\n batch_update=False, parallel=parallel)\n else:\n # batch update\n self._em_step(X, total_samples=n_samples,\n batch_update=True, parallel=parallel)\n\n # check perplexity\n if evaluate_every > 0 and (i + 1) % evaluate_every == 0:\n doc_topics_distr, _ = self._e_step(X, cal_sstats=False,\n random_init=False,\n parallel=parallel)\n bound = self._perplexity_precomp_distr(X, doc_topics_distr,\n sub_sampling=False)\n if self.verbose:\n print('iteration: %d of max_iter: %d, perplexity: %.4f'\n % (i + 1, max_iter, bound))\n\n if last_bound and abs(last_bound - bound) < self.perp_tol:\n break\n last_bound = bound\n\n elif self.verbose:\n print('iteration: %d of max_iter: %d' % (i + 1, max_iter))\n self.n_iter_ += 1\n\n # calculate final perplexity value on train set\n doc_topics_distr, _ = self._e_step(X, cal_sstats=False,\n random_init=False,\n parallel=parallel)\n self.bound_ = self._perplexity_precomp_distr(X, doc_topics_distr,\n sub_sampling=False)\n\n return self\n\n def _unnormalized_transform(self, X):\n \"\"\"Transform data X according to fitted model.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\n Returns\n -------\n doc_topic_distr : ndarray of shape (n_samples, n_components)\n Document topic distribution for X.\n \"\"\"\n check_is_fitted(self)\n\n # make sure feature size is the same in fitted model and in X\n X = self._check_non_neg_array(\n X, reset_n_features=True,\n whom=\"LatentDirichletAllocation.transform\")\n n_samples, n_features = X.shape\n if n_features != self.components_.shape[1]:\n raise ValueError(\n \"The provided data has %d dimensions while \"\n \"the model was trained with feature size %d.\" %\n (n_features, self.components_.shape[1]))\n\n doc_topic_distr, _ = self._e_step(X, cal_sstats=False,\n random_init=False)\n\n return doc_topic_distr\n\n def transform(self, X):\n \"\"\"Transform data X according to the fitted model.\n\n .. versionchanged:: 0.18\n *doc_topic_distr* is now normalized\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\n Returns\n -------\n doc_topic_distr : ndarray of shape (n_samples, n_components)\n Document topic distribution for X.\n \"\"\"\n check_is_fitted(self)\n X = self._check_non_neg_array(\n X, reset_n_features=False,\n whom=\"LatentDirichletAllocation.transform\")\n doc_topic_distr = self._unnormalized_transform(X)\n doc_topic_distr /= doc_topic_distr.sum(axis=1)[:, np.newaxis]\n return doc_topic_distr\n\n def _approx_bound(self, X, doc_topic_distr, sub_sampling):\n \"\"\"Estimate the variational bound.\n\n Estimate the variational bound over \"all documents\" using only the\n documents passed in as X. Since log-likelihood of each word cannot\n be computed directly, we use this bound to estimate it.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\n doc_topic_distr : ndarray of shape (n_samples, n_components)\n Document topic distribution. In the literature, this is called\n gamma.\n\n sub_sampling : bool, default=False\n Compensate for subsampling of documents.\n It is used in calculate bound in online learning.\n\n Returns\n -------\n score : float\n\n \"\"\"\n\n def _loglikelihood(prior, distr, dirichlet_distr, size):\n # calculate log-likelihood\n score = np.sum((prior - distr) * dirichlet_distr)\n score += np.sum(gammaln(distr) - gammaln(prior))\n score += np.sum(gammaln(prior * size) - gammaln(np.sum(distr, 1)))\n return score\n\n is_sparse_x = sp.issparse(X)\n n_samples, n_components = doc_topic_distr.shape\n n_features = self.components_.shape[1]\n score = 0\n\n dirichlet_doc_topic = _dirichlet_expectation_2d(doc_topic_distr)\n dirichlet_component_ = _dirichlet_expectation_2d(self.components_)\n doc_topic_prior = self.doc_topic_prior_\n topic_word_prior = self.topic_word_prior_\n\n if is_sparse_x:\n X_data = X.data\n X_indices = X.indices\n X_indptr = X.indptr\n\n # E[log p(docs | theta, beta)]\n for idx_d in range(0, n_samples):\n if is_sparse_x:\n ids = X_indices[X_indptr[idx_d]:X_indptr[idx_d + 1]]\n cnts = X_data[X_indptr[idx_d]:X_indptr[idx_d + 1]]\n else:\n ids = np.nonzero(X[idx_d, :])[0]\n cnts = X[idx_d, ids]\n temp = (dirichlet_doc_topic[idx_d, :, np.newaxis]\n + dirichlet_component_[:, ids])\n norm_phi = logsumexp(temp, axis=0)\n score += np.dot(cnts, norm_phi)\n\n # compute E[log p(theta | alpha) - log q(theta | gamma)]\n score += _loglikelihood(doc_topic_prior, doc_topic_distr,\n dirichlet_doc_topic, self.n_components)\n\n # Compensate for the subsampling of the population of documents\n if sub_sampling:\n doc_ratio = float(self.total_samples) / n_samples\n score *= doc_ratio\n\n # E[log p(beta | eta) - log q (beta | lambda)]\n score += _loglikelihood(topic_word_prior, self.components_,\n dirichlet_component_, n_features)\n\n return score\n\n def score(self, X, y=None):\n \"\"\"Calculate approximate log-likelihood as score.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\n y : Ignored\n\n Returns\n -------\n score : float\n Use approximate bound as score.\n \"\"\"\n check_is_fitted(self)\n X = self._check_non_neg_array(X, reset_n_features=False,\n whom=\"LatentDirichletAllocation.score\")\n\n doc_topic_distr = self._unnormalized_transform(X)\n score = self._approx_bound(X, doc_topic_distr, sub_sampling=False)\n return score\n\n def _perplexity_precomp_distr(self, X, doc_topic_distr=None,\n sub_sampling=False):\n \"\"\"Calculate approximate perplexity for data X with ability to accept\n precomputed doc_topic_distr\n\n Perplexity is defined as exp(-1. * log-likelihood per word)\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\n doc_topic_distr : ndarray of shape (n_samples, n_components), \\\n default=None\n Document topic distribution.\n If it is None, it will be generated by applying transform on X.\n\n Returns\n -------\n score : float\n Perplexity score.\n \"\"\"\n check_is_fitted(self)\n\n X = self._check_non_neg_array(\n X, reset_n_features=True,\n whom=\"LatentDirichletAllocation.perplexity\")\n\n if doc_topic_distr is None:\n doc_topic_distr = self._unnormalized_transform(X)\n else:\n n_samples, n_components = doc_topic_distr.shape\n if n_samples != X.shape[0]:\n raise ValueError(\"Number of samples in X and doc_topic_distr\"\n \" do not match.\")\n\n if n_components != self.n_components:\n raise ValueError(\"Number of topics does not match.\")\n\n current_samples = X.shape[0]\n bound = self._approx_bound(X, doc_topic_distr, sub_sampling)\n\n if sub_sampling:\n word_cnt = X.sum() * (float(self.total_samples) / current_samples)\n else:\n word_cnt = X.sum()\n perword_bound = bound / word_cnt\n\n return np.exp(-1.0 * perword_bound)\n\n def perplexity(self, X, sub_sampling=False):\n \"\"\"Calculate approximate perplexity for data X.\n\n Perplexity is defined as exp(-1. * log-likelihood per word)\n\n .. versionchanged:: 0.19\n *doc_topic_distr* argument has been deprecated and is ignored\n because user no longer has access to unnormalized distribution\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\n sub_sampling : bool\n Do sub-sampling or not.\n\n Returns\n -------\n score : float\n Perplexity score.\n \"\"\"\n return self._perplexity_precomp_distr(X, sub_sampling=sub_sampling)", + "instance_attributes": [ + { + "name": "n_components", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "learning_method", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "learning_decay", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "learning_offset", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "batch_size", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "evaluate_every", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "total_samples", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "perp_tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "mean_change_tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "max_doc_update_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "n_batch_iter_", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "n_iter_", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "doc_topic_prior_", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "topic_word_prior_", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF", + "name": "NMF", + "qname": "sklearn.decomposition._nmf.NMF", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.decomposition._nmf/NMF/__init__", + "scikit-learn/sklearn.decomposition._nmf/NMF/_more_tags", + "scikit-learn/sklearn.decomposition._nmf/NMF/fit_transform", + "scikit-learn/sklearn.decomposition._nmf/NMF/fit", + "scikit-learn/sklearn.decomposition._nmf/NMF/transform", + "scikit-learn/sklearn.decomposition._nmf/NMF/inverse_transform" + ], + "is_public": false, + "reexported_by": [], + "description": "Non-Negative Matrix Factorization (NMF).\n\nFind two non-negative matrices (W, H) whose product approximates the non-\nnegative matrix X. This factorization can be used for example for\ndimensionality reduction, source separation or topic extraction.\n\nThe objective function is:\n\n .. math::\n\n 0.5 * ||X - WH||_{loss}^2 + alpha * l1_{ratio} * ||vec(W)||_1\n\n + alpha * l1_{ratio} * ||vec(H)||_1\n\n + 0.5 * alpha * (1 - l1_{ratio}) * ||W||_{Fro}^2\n\n + 0.5 * alpha * (1 - l1_{ratio}) * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe objective function is minimized with an alternating minimization of W\nand H.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Non-Negative Matrix Factorization (NMF).\n\nFind two non-negative matrices (W, H) whose product approximates the non-\nnegative matrix X. This factorization can be used for example for\ndimensionality reduction, source separation or topic extraction.\n\nThe objective function is:\n\n .. math::\n\n 0.5 * ||X - WH||_{loss}^2 + alpha * l1_{ratio} * ||vec(W)||_1\n\n + alpha * l1_{ratio} * ||vec(H)||_1\n\n + 0.5 * alpha * (1 - l1_{ratio}) * ||W||_{Fro}^2\n\n + 0.5 * alpha * (1 - l1_{ratio}) * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe objective function is minimized with an alternating minimization of W\nand H.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=None\n Number of components, if n_components is not set all features\n are kept.\n\ninit : {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None\n Method used to initialize the procedure.\n Default: None.\n Valid options:\n\n - `None`: 'nndsvd' if n_components <= min(n_samples, n_features),\n otherwise random.\n\n - `'random'`: non-negative random matrices, scaled with:\n sqrt(X.mean() / n_components)\n\n - `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD)\n initialization (better for sparseness)\n\n - `'nndsvda'`: NNDSVD with zeros filled with the average of X\n (better when sparsity is not desired)\n\n - `'nndsvdar'` NNDSVD with zeros filled with small random values\n (generally faster, less accurate alternative to NNDSVDa\n for when sparsity is not desired)\n\n - `'custom'`: use custom matrices W and H\n\nsolver : {'cd', 'mu'}, default='cd'\n Numerical solver to use:\n 'cd' is a Coordinate Descent solver.\n 'mu' is a Multiplicative Update solver.\n\n .. versionadded:: 0.17\n Coordinate Descent solver.\n\n .. versionadded:: 0.19\n Multiplicative Update solver.\n\nbeta_loss : float or {'frobenius', 'kullback-leibler', 'itakura-saito'}, default='frobenius'\n Beta divergence to be minimized, measuring the distance between X\n and the dot product WH. Note that values different from 'frobenius'\n (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\n matrix X cannot contain zeros. Used only in 'mu' solver.\n\n .. versionadded:: 0.19\n\ntol : float, default=1e-4\n Tolerance of the stopping condition.\n\nmax_iter : int, default=200\n Maximum number of iterations before timing out.\n\nrandom_state : int, RandomState instance or None, default=None\n Used for initialisation (when ``init`` == 'nndsvdar' or\n 'random'), and in Coordinate Descent. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\nalpha : float, default=0.\n Constant that multiplies the regularization terms. Set it to zero to\n have no regularization.\n\n .. versionadded:: 0.17\n *alpha* used in the Coordinate Descent solver.\n\nl1_ratio : float, default=0.\n The regularization mixing parameter, with 0 <= l1_ratio <= 1.\n For l1_ratio = 0 the penalty is an elementwise L2 penalty\n (aka Frobenius Norm).\n For l1_ratio = 1 it is an elementwise L1 penalty.\n For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\n .. versionadded:: 0.17\n Regularization parameter *l1_ratio* used in the Coordinate Descent\n solver.\n\nverbose : int, default=0\n Whether to be verbose.\n\nshuffle : bool, default=False\n If true, randomize the order of coordinates in the CD solver.\n\n .. versionadded:: 0.17\n *shuffle* parameter used in the Coordinate Descent solver.\n\nregularization : {'both', 'components', 'transformation', None}, default='both'\n Select whether the regularization affects the components (H), the\n transformation (W), both or none of them.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Factorization matrix, sometimes called 'dictionary'.\n\nn_components_ : int\n The number of components. It is same as the `n_components` parameter\n if it was given. Otherwise, it will be same as the number of\n features.\n\nreconstruction_err_ : float\n Frobenius norm of the matrix difference, or beta-divergence, between\n the training data ``X`` and the reconstructed data ``WH`` from\n the fitted model.\n\nn_iter_ : int\n Actual number of iterations.\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])\n>>> from sklearn.decomposition import NMF\n>>> model = NMF(n_components=2, init='random', random_state=0)\n>>> W = model.fit_transform(X)\n>>> H = model.components_\n\nReferences\n----------\nCichocki, Andrzej, and P. H. A. N. Anh-Huy. \"Fast local algorithms for\nlarge scale nonnegative matrix and tensor factorizations.\"\nIEICE transactions on fundamentals of electronics, communications and\ncomputer sciences 92.3: 708-721, 2009.\n\nFevotte, C., & Idier, J. (2011). Algorithms for nonnegative matrix\nfactorization with the beta-divergence. Neural Computation, 23(9).", + "code": "class NMF(TransformerMixin, BaseEstimator):\n \"\"\"Non-Negative Matrix Factorization (NMF).\n\n Find two non-negative matrices (W, H) whose product approximates the non-\n negative matrix X. This factorization can be used for example for\n dimensionality reduction, source separation or topic extraction.\n\n The objective function is:\n\n .. math::\n\n 0.5 * ||X - WH||_{loss}^2 + alpha * l1_{ratio} * ||vec(W)||_1\n\n + alpha * l1_{ratio} * ||vec(H)||_1\n\n + 0.5 * alpha * (1 - l1_{ratio}) * ||W||_{Fro}^2\n\n + 0.5 * alpha * (1 - l1_{ratio}) * ||H||_{Fro}^2\n\n Where:\n\n :math:`||A||_{Fro}^2 = \\\\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n :math:`||vec(A)||_1 = \\\\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\n The generic norm :math:`||X - WH||_{loss}` may represent\n the Frobenius norm or another supported beta-divergence loss.\n The choice between options is controlled by the `beta_loss` parameter.\n\n The objective function is minimized with an alternating minimization of W\n and H.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_components : int, default=None\n Number of components, if n_components is not set all features\n are kept.\n\n init : {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None\n Method used to initialize the procedure.\n Default: None.\n Valid options:\n\n - `None`: 'nndsvd' if n_components <= min(n_samples, n_features),\n otherwise random.\n\n - `'random'`: non-negative random matrices, scaled with:\n sqrt(X.mean() / n_components)\n\n - `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD)\n initialization (better for sparseness)\n\n - `'nndsvda'`: NNDSVD with zeros filled with the average of X\n (better when sparsity is not desired)\n\n - `'nndsvdar'` NNDSVD with zeros filled with small random values\n (generally faster, less accurate alternative to NNDSVDa\n for when sparsity is not desired)\n\n - `'custom'`: use custom matrices W and H\n\n solver : {'cd', 'mu'}, default='cd'\n Numerical solver to use:\n 'cd' is a Coordinate Descent solver.\n 'mu' is a Multiplicative Update solver.\n\n .. versionadded:: 0.17\n Coordinate Descent solver.\n\n .. versionadded:: 0.19\n Multiplicative Update solver.\n\n beta_loss : float or {'frobenius', 'kullback-leibler', \\\n 'itakura-saito'}, default='frobenius'\n Beta divergence to be minimized, measuring the distance between X\n and the dot product WH. Note that values different from 'frobenius'\n (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\n matrix X cannot contain zeros. Used only in 'mu' solver.\n\n .. versionadded:: 0.19\n\n tol : float, default=1e-4\n Tolerance of the stopping condition.\n\n max_iter : int, default=200\n Maximum number of iterations before timing out.\n\n random_state : int, RandomState instance or None, default=None\n Used for initialisation (when ``init`` == 'nndsvdar' or\n 'random'), and in Coordinate Descent. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\n alpha : float, default=0.\n Constant that multiplies the regularization terms. Set it to zero to\n have no regularization.\n\n .. versionadded:: 0.17\n *alpha* used in the Coordinate Descent solver.\n\n l1_ratio : float, default=0.\n The regularization mixing parameter, with 0 <= l1_ratio <= 1.\n For l1_ratio = 0 the penalty is an elementwise L2 penalty\n (aka Frobenius Norm).\n For l1_ratio = 1 it is an elementwise L1 penalty.\n For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\n .. versionadded:: 0.17\n Regularization parameter *l1_ratio* used in the Coordinate Descent\n solver.\n\n verbose : int, default=0\n Whether to be verbose.\n\n shuffle : bool, default=False\n If true, randomize the order of coordinates in the CD solver.\n\n .. versionadded:: 0.17\n *shuffle* parameter used in the Coordinate Descent solver.\n\n regularization : {'both', 'components', 'transformation', None}, \\\n default='both'\n Select whether the regularization affects the components (H), the\n transformation (W), both or none of them.\n\n .. versionadded:: 0.24\n\n Attributes\n ----------\n components_ : ndarray of shape (n_components, n_features)\n Factorization matrix, sometimes called 'dictionary'.\n\n n_components_ : int\n The number of components. It is same as the `n_components` parameter\n if it was given. Otherwise, it will be same as the number of\n features.\n\n reconstruction_err_ : float\n Frobenius norm of the matrix difference, or beta-divergence, between\n the training data ``X`` and the reconstructed data ``WH`` from\n the fitted model.\n\n n_iter_ : int\n Actual number of iterations.\n\n Examples\n --------\n >>> import numpy as np\n >>> X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])\n >>> from sklearn.decomposition import NMF\n >>> model = NMF(n_components=2, init='random', random_state=0)\n >>> W = model.fit_transform(X)\n >>> H = model.components_\n\n References\n ----------\n Cichocki, Andrzej, and P. H. A. N. Anh-Huy. \"Fast local algorithms for\n large scale nonnegative matrix and tensor factorizations.\"\n IEICE transactions on fundamentals of electronics, communications and\n computer sciences 92.3: 708-721, 2009.\n\n Fevotte, C., & Idier, J. (2011). Algorithms for nonnegative matrix\n factorization with the beta-divergence. Neural Computation, 23(9).\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_components=None, *, init='warn', solver='cd',\n beta_loss='frobenius', tol=1e-4, max_iter=200,\n random_state=None, alpha=0., l1_ratio=0., verbose=0,\n shuffle=False, regularization='both'):\n self.n_components = n_components\n self.init = init\n self.solver = solver\n self.beta_loss = beta_loss\n self.tol = tol\n self.max_iter = max_iter\n self.random_state = random_state\n self.alpha = alpha\n self.l1_ratio = l1_ratio\n self.verbose = verbose\n self.shuffle = shuffle\n self.regularization = regularization\n\n def _more_tags(self):\n return {'requires_positive_X': True}\n\n def fit_transform(self, X, y=None, W=None, H=None):\n \"\"\"Learn a NMF model for the data X and returns the transformed data.\n\n This is more efficient than calling fit followed by transform.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data matrix to be decomposed\n\n y : Ignored\n\n W : array-like of shape (n_samples, n_components)\n If init='custom', it is used as initial guess for the solution.\n\n H : array-like of shape (n_components, n_features)\n If init='custom', it is used as initial guess for the solution.\n\n Returns\n -------\n W : ndarray of shape (n_samples, n_components)\n Transformed data.\n \"\"\"\n X = self._validate_data(X, accept_sparse=('csr', 'csc'),\n dtype=[np.float64, np.float32])\n\n with config_context(assume_finite=True):\n W, H, n_iter_ = non_negative_factorization(\n X=X, W=W, H=H, n_components=self.n_components, init=self.init,\n update_H=True, solver=self.solver, beta_loss=self.beta_loss,\n tol=self.tol, max_iter=self.max_iter, alpha=self.alpha,\n l1_ratio=self.l1_ratio, regularization=self.regularization,\n random_state=self.random_state, verbose=self.verbose,\n shuffle=self.shuffle)\n\n self.reconstruction_err_ = _beta_divergence(X, W, H, self.beta_loss,\n square_root=True)\n\n self.n_components_ = H.shape[0]\n self.components_ = H\n self.n_iter_ = n_iter_\n\n return W\n\n def fit(self, X, y=None, **params):\n \"\"\"Learn a NMF model for the data X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data matrix to be decomposed\n\n y : Ignored\n\n Returns\n -------\n self\n \"\"\"\n self.fit_transform(X, **params)\n return self\n\n def transform(self, X):\n \"\"\"Transform the data X according to the fitted NMF model.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data matrix to be transformed by the model.\n\n Returns\n -------\n W : ndarray of shape (n_samples, n_components)\n Transformed data.\n \"\"\"\n check_is_fitted(self)\n X = self._validate_data(X, accept_sparse=('csr', 'csc'),\n dtype=[np.float64, np.float32],\n reset=False)\n\n with config_context(assume_finite=True):\n W, _, n_iter_ = non_negative_factorization(\n X=X, W=None, H=self.components_,\n n_components=self.n_components_,\n init=self.init, update_H=False, solver=self.solver,\n beta_loss=self.beta_loss, tol=self.tol, max_iter=self.max_iter,\n alpha=self.alpha, l1_ratio=self.l1_ratio,\n regularization=self.regularization,\n random_state=self.random_state,\n verbose=self.verbose, shuffle=self.shuffle)\n\n return W\n\n def inverse_transform(self, W):\n \"\"\"Transform data back to its original space.\n\n Parameters\n ----------\n W : {ndarray, sparse matrix} of shape (n_samples, n_components)\n Transformed data matrix.\n\n Returns\n -------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Data matrix of original shape.\n\n .. versionadded:: 0.18\n \"\"\"\n check_is_fitted(self)\n return np.dot(W, self.components_)", + "instance_attributes": [ + { + "name": "init", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "solver", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "beta_loss", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "l1_ratio", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "shuffle", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "regularization", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA", + "name": "PCA", + "qname": "sklearn.decomposition._pca.PCA", + "decorators": [], + "superclasses": ["_BasePCA"], + "methods": [ + "scikit-learn/sklearn.decomposition._pca/PCA/__init__", + "scikit-learn/sklearn.decomposition._pca/PCA/fit", + "scikit-learn/sklearn.decomposition._pca/PCA/fit_transform", + "scikit-learn/sklearn.decomposition._pca/PCA/_fit", + "scikit-learn/sklearn.decomposition._pca/PCA/_fit_full", + "scikit-learn/sklearn.decomposition._pca/PCA/_fit_truncated", + "scikit-learn/sklearn.decomposition._pca/PCA/score_samples", + "scikit-learn/sklearn.decomposition._pca/PCA/score", + "scikit-learn/sklearn.decomposition._pca/PCA/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Principal component analysis (PCA).\n\nLinear dimensionality reduction using Singular Value Decomposition of the\ndata to project it to a lower dimensional space. The input data is centered\nbut not scaled for each feature before applying the SVD.\n\nIt uses the LAPACK implementation of the full SVD or a randomized truncated\nSVD by the method of Halko et al. 2009, depending on the shape of the input\ndata and the number of components to extract.\n\nIt can also use the scipy.sparse.linalg ARPACK implementation of the\ntruncated SVD.\n\nNotice that this class does not support sparse input. See\n:class:`TruncatedSVD` for an alternative with sparse data.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Principal component analysis (PCA).\n\nLinear dimensionality reduction using Singular Value Decomposition of the\ndata to project it to a lower dimensional space. The input data is centered\nbut not scaled for each feature before applying the SVD.\n\nIt uses the LAPACK implementation of the full SVD or a randomized truncated\nSVD by the method of Halko et al. 2009, depending on the shape of the input\ndata and the number of components to extract.\n\nIt can also use the scipy.sparse.linalg ARPACK implementation of the\ntruncated SVD.\n\nNotice that this class does not support sparse input. See\n:class:`TruncatedSVD` for an alternative with sparse data.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, float or 'mle', default=None\n Number of components to keep.\n if n_components is not set all components are kept::\n\n n_components == min(n_samples, n_features)\n\n If ``n_components == 'mle'`` and ``svd_solver == 'full'``, Minka's\n MLE is used to guess the dimension. Use of ``n_components == 'mle'``\n will interpret ``svd_solver == 'auto'`` as ``svd_solver == 'full'``.\n\n If ``0 < n_components < 1`` and ``svd_solver == 'full'``, select the\n number of components such that the amount of variance that needs to be\n explained is greater than the percentage specified by n_components.\n\n If ``svd_solver == 'arpack'``, the number of components must be\n strictly less than the minimum of n_features and n_samples.\n\n Hence, the None case results in::\n\n n_components == min(n_samples, n_features) - 1\n\ncopy : bool, default=True\n If False, data passed to fit are overwritten and running\n fit(X).transform(X) will not yield the expected results,\n use fit_transform(X) instead.\n\nwhiten : bool, default=False\n When True (False by default) the `components_` vectors are multiplied\n by the square root of n_samples and then divided by the singular values\n to ensure uncorrelated outputs with unit component-wise variances.\n\n Whitening will remove some information from the transformed signal\n (the relative variance scales of the components) but can sometime\n improve the predictive accuracy of the downstream estimators by\n making their data respect some hard-wired assumptions.\n\nsvd_solver : {'auto', 'full', 'arpack', 'randomized'}, default='auto'\n If auto :\n The solver is selected by a default policy based on `X.shape` and\n `n_components`: if the input data is larger than 500x500 and the\n number of components to extract is lower than 80% of the smallest\n dimension of the data, then the more efficient 'randomized'\n method is enabled. Otherwise the exact full SVD is computed and\n optionally truncated afterwards.\n If full :\n run exact full SVD calling the standard LAPACK solver via\n `scipy.linalg.svd` and select the components by postprocessing\n If arpack :\n run SVD truncated to n_components calling ARPACK solver via\n `scipy.sparse.linalg.svds`. It requires strictly\n 0 < n_components < min(X.shape)\n If randomized :\n run randomized SVD by the method of Halko et al.\n\n .. versionadded:: 0.18.0\n\ntol : float, default=0.0\n Tolerance for singular values computed by svd_solver == 'arpack'.\n Must be of range [0.0, infinity).\n\n .. versionadded:: 0.18.0\n\niterated_power : int or 'auto', default='auto'\n Number of iterations for the power method computed by\n svd_solver == 'randomized'.\n Must be of range [0, infinity).\n\n .. versionadded:: 0.18.0\n\nrandom_state : int, RandomState instance or None, default=None\n Used when the 'arpack' or 'randomized' solvers are used. Pass an int\n for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\n .. versionadded:: 0.18.0\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Principal axes in feature space, representing the directions of\n maximum variance in the data. The components are sorted by\n ``explained_variance_``.\n\nexplained_variance_ : ndarray of shape (n_components,)\n The amount of variance explained by each of the selected components.\n The variance estimation uses `n_samples - 1` degrees of freedom.\n\n Equal to n_components largest eigenvalues\n of the covariance matrix of X.\n\n .. versionadded:: 0.18\n\nexplained_variance_ratio_ : ndarray of shape (n_components,)\n Percentage of variance explained by each of the selected components.\n\n If ``n_components`` is not set then all components are stored and the\n sum of the ratios is equal to 1.0.\n\nsingular_values_ : ndarray of shape (n_components,)\n The singular values corresponding to each of the selected components.\n The singular values are equal to the 2-norms of the ``n_components``\n variables in the lower-dimensional space.\n\n .. versionadded:: 0.19\n\nmean_ : ndarray of shape (n_features,)\n Per-feature empirical mean, estimated from the training set.\n\n Equal to `X.mean(axis=0)`.\n\nn_components_ : int\n The estimated number of components. When n_components is set\n to 'mle' or a number between 0 and 1 (with svd_solver == 'full') this\n number is estimated from input data. Otherwise it equals the parameter\n n_components, or the lesser value of n_features and n_samples\n if n_components is None.\n\nn_features_ : int\n Number of features in the training data.\n\nn_samples_ : int\n Number of samples in the training data.\n\nnoise_variance_ : float\n The estimated noise covariance following the Probabilistic PCA model\n from Tipping and Bishop 1999. See \"Pattern Recognition and\n Machine Learning\" by C. Bishop, 12.2.1 p. 574 or\n http://www.miketipping.com/papers/met-mppca.pdf. It is required to\n compute the estimated data covariance and score samples.\n\n Equal to the average of (min(n_features, n_samples) - n_components)\n smallest eigenvalues of the covariance matrix of X.\n\nSee Also\n--------\nKernelPCA : Kernel Principal Component Analysis.\nSparsePCA : Sparse Principal Component Analysis.\nTruncatedSVD : Dimensionality reduction using truncated SVD.\nIncrementalPCA : Incremental Principal Component Analysis.\n\nReferences\n----------\nFor n_components == 'mle', this class uses the method from:\n`Minka, T. P.. \"Automatic choice of dimensionality for PCA\".\nIn NIPS, pp. 598-604 `_\n\nImplements the probabilistic PCA model from:\n`Tipping, M. E., and Bishop, C. M. (1999). \"Probabilistic principal\ncomponent analysis\". Journal of the Royal Statistical Society:\nSeries B (Statistical Methodology), 61(3), 611-622.\n`_\nvia the score and score_samples methods.\n\nFor svd_solver == 'arpack', refer to `scipy.sparse.linalg.svds`.\n\nFor svd_solver == 'randomized', see:\n`Halko, N., Martinsson, P. G., and Tropp, J. A. (2011).\n\"Finding structure with randomness: Probabilistic algorithms for\nconstructing approximate matrix decompositions\".\nSIAM review, 53(2), 217-288.\n`_\nand also\n`Martinsson, P. G., Rokhlin, V., and Tygert, M. (2011).\n\"A randomized algorithm for the decomposition of matrices\".\nApplied and Computational Harmonic Analysis, 30(1), 47-68\n`_.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.decomposition import PCA\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> pca = PCA(n_components=2)\n>>> pca.fit(X)\nPCA(n_components=2)\n>>> print(pca.explained_variance_ratio_)\n[0.9924... 0.0075...]\n>>> print(pca.singular_values_)\n[6.30061... 0.54980...]\n\n>>> pca = PCA(n_components=2, svd_solver='full')\n>>> pca.fit(X)\nPCA(n_components=2, svd_solver='full')\n>>> print(pca.explained_variance_ratio_)\n[0.9924... 0.00755...]\n>>> print(pca.singular_values_)\n[6.30061... 0.54980...]\n\n>>> pca = PCA(n_components=1, svd_solver='arpack')\n>>> pca.fit(X)\nPCA(n_components=1, svd_solver='arpack')\n>>> print(pca.explained_variance_ratio_)\n[0.99244...]\n>>> print(pca.singular_values_)\n[6.30061...]", + "code": "class PCA(_BasePCA):\n \"\"\"Principal component analysis (PCA).\n\n Linear dimensionality reduction using Singular Value Decomposition of the\n data to project it to a lower dimensional space. The input data is centered\n but not scaled for each feature before applying the SVD.\n\n It uses the LAPACK implementation of the full SVD or a randomized truncated\n SVD by the method of Halko et al. 2009, depending on the shape of the input\n data and the number of components to extract.\n\n It can also use the scipy.sparse.linalg ARPACK implementation of the\n truncated SVD.\n\n Notice that this class does not support sparse input. See\n :class:`TruncatedSVD` for an alternative with sparse data.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_components : int, float or 'mle', default=None\n Number of components to keep.\n if n_components is not set all components are kept::\n\n n_components == min(n_samples, n_features)\n\n If ``n_components == 'mle'`` and ``svd_solver == 'full'``, Minka's\n MLE is used to guess the dimension. Use of ``n_components == 'mle'``\n will interpret ``svd_solver == 'auto'`` as ``svd_solver == 'full'``.\n\n If ``0 < n_components < 1`` and ``svd_solver == 'full'``, select the\n number of components such that the amount of variance that needs to be\n explained is greater than the percentage specified by n_components.\n\n If ``svd_solver == 'arpack'``, the number of components must be\n strictly less than the minimum of n_features and n_samples.\n\n Hence, the None case results in::\n\n n_components == min(n_samples, n_features) - 1\n\n copy : bool, default=True\n If False, data passed to fit are overwritten and running\n fit(X).transform(X) will not yield the expected results,\n use fit_transform(X) instead.\n\n whiten : bool, default=False\n When True (False by default) the `components_` vectors are multiplied\n by the square root of n_samples and then divided by the singular values\n to ensure uncorrelated outputs with unit component-wise variances.\n\n Whitening will remove some information from the transformed signal\n (the relative variance scales of the components) but can sometime\n improve the predictive accuracy of the downstream estimators by\n making their data respect some hard-wired assumptions.\n\n svd_solver : {'auto', 'full', 'arpack', 'randomized'}, default='auto'\n If auto :\n The solver is selected by a default policy based on `X.shape` and\n `n_components`: if the input data is larger than 500x500 and the\n number of components to extract is lower than 80% of the smallest\n dimension of the data, then the more efficient 'randomized'\n method is enabled. Otherwise the exact full SVD is computed and\n optionally truncated afterwards.\n If full :\n run exact full SVD calling the standard LAPACK solver via\n `scipy.linalg.svd` and select the components by postprocessing\n If arpack :\n run SVD truncated to n_components calling ARPACK solver via\n `scipy.sparse.linalg.svds`. It requires strictly\n 0 < n_components < min(X.shape)\n If randomized :\n run randomized SVD by the method of Halko et al.\n\n .. versionadded:: 0.18.0\n\n tol : float, default=0.0\n Tolerance for singular values computed by svd_solver == 'arpack'.\n Must be of range [0.0, infinity).\n\n .. versionadded:: 0.18.0\n\n iterated_power : int or 'auto', default='auto'\n Number of iterations for the power method computed by\n svd_solver == 'randomized'.\n Must be of range [0, infinity).\n\n .. versionadded:: 0.18.0\n\n random_state : int, RandomState instance or None, default=None\n Used when the 'arpack' or 'randomized' solvers are used. Pass an int\n for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\n .. versionadded:: 0.18.0\n\n Attributes\n ----------\n components_ : ndarray of shape (n_components, n_features)\n Principal axes in feature space, representing the directions of\n maximum variance in the data. The components are sorted by\n ``explained_variance_``.\n\n explained_variance_ : ndarray of shape (n_components,)\n The amount of variance explained by each of the selected components.\n The variance estimation uses `n_samples - 1` degrees of freedom.\n\n Equal to n_components largest eigenvalues\n of the covariance matrix of X.\n\n .. versionadded:: 0.18\n\n explained_variance_ratio_ : ndarray of shape (n_components,)\n Percentage of variance explained by each of the selected components.\n\n If ``n_components`` is not set then all components are stored and the\n sum of the ratios is equal to 1.0.\n\n singular_values_ : ndarray of shape (n_components,)\n The singular values corresponding to each of the selected components.\n The singular values are equal to the 2-norms of the ``n_components``\n variables in the lower-dimensional space.\n\n .. versionadded:: 0.19\n\n mean_ : ndarray of shape (n_features,)\n Per-feature empirical mean, estimated from the training set.\n\n Equal to `X.mean(axis=0)`.\n\n n_components_ : int\n The estimated number of components. When n_components is set\n to 'mle' or a number between 0 and 1 (with svd_solver == 'full') this\n number is estimated from input data. Otherwise it equals the parameter\n n_components, or the lesser value of n_features and n_samples\n if n_components is None.\n\n n_features_ : int\n Number of features in the training data.\n\n n_samples_ : int\n Number of samples in the training data.\n\n noise_variance_ : float\n The estimated noise covariance following the Probabilistic PCA model\n from Tipping and Bishop 1999. See \"Pattern Recognition and\n Machine Learning\" by C. Bishop, 12.2.1 p. 574 or\n http://www.miketipping.com/papers/met-mppca.pdf. It is required to\n compute the estimated data covariance and score samples.\n\n Equal to the average of (min(n_features, n_samples) - n_components)\n smallest eigenvalues of the covariance matrix of X.\n\n See Also\n --------\n KernelPCA : Kernel Principal Component Analysis.\n SparsePCA : Sparse Principal Component Analysis.\n TruncatedSVD : Dimensionality reduction using truncated SVD.\n IncrementalPCA : Incremental Principal Component Analysis.\n\n References\n ----------\n For n_components == 'mle', this class uses the method from:\n `Minka, T. P.. \"Automatic choice of dimensionality for PCA\".\n In NIPS, pp. 598-604 `_\n\n Implements the probabilistic PCA model from:\n `Tipping, M. E., and Bishop, C. M. (1999). \"Probabilistic principal\n component analysis\". Journal of the Royal Statistical Society:\n Series B (Statistical Methodology), 61(3), 611-622.\n `_\n via the score and score_samples methods.\n\n For svd_solver == 'arpack', refer to `scipy.sparse.linalg.svds`.\n\n For svd_solver == 'randomized', see:\n `Halko, N., Martinsson, P. G., and Tropp, J. A. (2011).\n \"Finding structure with randomness: Probabilistic algorithms for\n constructing approximate matrix decompositions\".\n SIAM review, 53(2), 217-288.\n `_\n and also\n `Martinsson, P. G., Rokhlin, V., and Tygert, M. (2011).\n \"A randomized algorithm for the decomposition of matrices\".\n Applied and Computational Harmonic Analysis, 30(1), 47-68\n `_.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.decomposition import PCA\n >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n >>> pca = PCA(n_components=2)\n >>> pca.fit(X)\n PCA(n_components=2)\n >>> print(pca.explained_variance_ratio_)\n [0.9924... 0.0075...]\n >>> print(pca.singular_values_)\n [6.30061... 0.54980...]\n\n >>> pca = PCA(n_components=2, svd_solver='full')\n >>> pca.fit(X)\n PCA(n_components=2, svd_solver='full')\n >>> print(pca.explained_variance_ratio_)\n [0.9924... 0.00755...]\n >>> print(pca.singular_values_)\n [6.30061... 0.54980...]\n\n >>> pca = PCA(n_components=1, svd_solver='arpack')\n >>> pca.fit(X)\n PCA(n_components=1, svd_solver='arpack')\n >>> print(pca.explained_variance_ratio_)\n [0.99244...]\n >>> print(pca.singular_values_)\n [6.30061...]\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_components=None, *, copy=True, whiten=False,\n svd_solver='auto', tol=0.0, iterated_power='auto',\n random_state=None):\n self.n_components = n_components\n self.copy = copy\n self.whiten = whiten\n self.svd_solver = svd_solver\n self.tol = tol\n self.iterated_power = iterated_power\n self.random_state = random_state\n\n def fit(self, X, y=None):\n \"\"\"Fit the model with X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : Ignored\n\n Returns\n -------\n self : object\n Returns the instance itself.\n \"\"\"\n self._fit(X)\n return self\n\n def fit_transform(self, X, y=None):\n \"\"\"Fit the model with X and apply the dimensionality reduction on X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : Ignored\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n Transformed values.\n\n Notes\n -----\n This method returns a Fortran-ordered array. To convert it to a\n C-ordered array, use 'np.ascontiguousarray'.\n \"\"\"\n U, S, Vt = self._fit(X)\n U = U[:, :self.n_components_]\n\n if self.whiten:\n # X_new = X * V / S * sqrt(n_samples) = U * sqrt(n_samples)\n U *= sqrt(X.shape[0] - 1)\n else:\n # X_new = X * V = U * S * Vt * V = U * S\n U *= S[:self.n_components_]\n\n return U\n\n def _fit(self, X):\n \"\"\"Dispatch to the right submethod depending on the chosen solver.\"\"\"\n\n # Raise an error for sparse input.\n # This is more informative than the generic one raised by check_array.\n if issparse(X):\n raise TypeError('PCA does not support sparse input. See '\n 'TruncatedSVD for a possible alternative.')\n\n X = self._validate_data(X, dtype=[np.float64, np.float32],\n ensure_2d=True, copy=self.copy)\n\n # Handle n_components==None\n if self.n_components is None:\n if self.svd_solver != 'arpack':\n n_components = min(X.shape)\n else:\n n_components = min(X.shape) - 1\n else:\n n_components = self.n_components\n\n # Handle svd_solver\n self._fit_svd_solver = self.svd_solver\n if self._fit_svd_solver == 'auto':\n # Small problem or n_components == 'mle', just call full PCA\n if max(X.shape) <= 500 or n_components == 'mle':\n self._fit_svd_solver = 'full'\n elif n_components >= 1 and n_components < .8 * min(X.shape):\n self._fit_svd_solver = 'randomized'\n # This is also the case of n_components in (0,1)\n else:\n self._fit_svd_solver = 'full'\n\n # Call different fits for either full or truncated SVD\n if self._fit_svd_solver == 'full':\n return self._fit_full(X, n_components)\n elif self._fit_svd_solver in ['arpack', 'randomized']:\n return self._fit_truncated(X, n_components, self._fit_svd_solver)\n else:\n raise ValueError(\"Unrecognized svd_solver='{0}'\"\n \"\".format(self._fit_svd_solver))\n\n def _fit_full(self, X, n_components):\n \"\"\"Fit the model by computing full SVD on X.\"\"\"\n n_samples, n_features = X.shape\n\n if n_components == 'mle':\n if n_samples < n_features:\n raise ValueError(\"n_components='mle' is only supported \"\n \"if n_samples >= n_features\")\n elif not 0 <= n_components <= min(n_samples, n_features):\n raise ValueError(\"n_components=%r must be between 0 and \"\n \"min(n_samples, n_features)=%r with \"\n \"svd_solver='full'\"\n % (n_components, min(n_samples, n_features)))\n elif n_components >= 1:\n if not isinstance(n_components, numbers.Integral):\n raise ValueError(\"n_components=%r must be of type int \"\n \"when greater than or equal to 1, \"\n \"was of type=%r\"\n % (n_components, type(n_components)))\n\n # Center data\n self.mean_ = np.mean(X, axis=0)\n X -= self.mean_\n\n U, S, Vt = linalg.svd(X, full_matrices=False)\n # flip eigenvectors' sign to enforce deterministic output\n U, Vt = svd_flip(U, Vt)\n\n components_ = Vt\n\n # Get variance explained by singular values\n explained_variance_ = (S ** 2) / (n_samples - 1)\n total_var = explained_variance_.sum()\n explained_variance_ratio_ = explained_variance_ / total_var\n singular_values_ = S.copy() # Store the singular values.\n\n # Postprocess the number of components required\n if n_components == 'mle':\n n_components = \\\n _infer_dimension(explained_variance_, n_samples)\n elif 0 < n_components < 1.0:\n # number of components for which the cumulated explained\n # variance percentage is superior to the desired threshold\n # side='right' ensures that number of features selected\n # their variance is always greater than n_components float\n # passed. More discussion in issue: #15669\n ratio_cumsum = stable_cumsum(explained_variance_ratio_)\n n_components = np.searchsorted(ratio_cumsum, n_components,\n side='right') + 1\n # Compute noise covariance using Probabilistic PCA model\n # The sigma2 maximum likelihood (cf. eq. 12.46)\n if n_components < min(n_features, n_samples):\n self.noise_variance_ = explained_variance_[n_components:].mean()\n else:\n self.noise_variance_ = 0.\n\n self.n_samples_, self.n_features_ = n_samples, n_features\n self.components_ = components_[:n_components]\n self.n_components_ = n_components\n self.explained_variance_ = explained_variance_[:n_components]\n self.explained_variance_ratio_ = \\\n explained_variance_ratio_[:n_components]\n self.singular_values_ = singular_values_[:n_components]\n\n return U, S, Vt\n\n def _fit_truncated(self, X, n_components, svd_solver):\n \"\"\"Fit the model by computing truncated SVD (by ARPACK or randomized)\n on X.\n \"\"\"\n n_samples, n_features = X.shape\n\n if isinstance(n_components, str):\n raise ValueError(\"n_components=%r cannot be a string \"\n \"with svd_solver='%s'\"\n % (n_components, svd_solver))\n elif not 1 <= n_components <= min(n_samples, n_features):\n raise ValueError(\"n_components=%r must be between 1 and \"\n \"min(n_samples, n_features)=%r with \"\n \"svd_solver='%s'\"\n % (n_components, min(n_samples, n_features),\n svd_solver))\n elif not isinstance(n_components, numbers.Integral):\n raise ValueError(\"n_components=%r must be of type int \"\n \"when greater than or equal to 1, was of type=%r\"\n % (n_components, type(n_components)))\n elif svd_solver == 'arpack' and n_components == min(n_samples,\n n_features):\n raise ValueError(\"n_components=%r must be strictly less than \"\n \"min(n_samples, n_features)=%r with \"\n \"svd_solver='%s'\"\n % (n_components, min(n_samples, n_features),\n svd_solver))\n\n random_state = check_random_state(self.random_state)\n\n # Center data\n self.mean_ = np.mean(X, axis=0)\n X -= self.mean_\n\n if svd_solver == 'arpack':\n v0 = _init_arpack_v0(min(X.shape), random_state)\n U, S, Vt = svds(X, k=n_components, tol=self.tol, v0=v0)\n # svds doesn't abide by scipy.linalg.svd/randomized_svd\n # conventions, so reverse its outputs.\n S = S[::-1]\n # flip eigenvectors' sign to enforce deterministic output\n U, Vt = svd_flip(U[:, ::-1], Vt[::-1])\n\n elif svd_solver == 'randomized':\n # sign flipping is done inside\n U, S, Vt = randomized_svd(X, n_components=n_components,\n n_iter=self.iterated_power,\n flip_sign=True,\n random_state=random_state)\n\n self.n_samples_, self.n_features_ = n_samples, n_features\n self.components_ = Vt\n self.n_components_ = n_components\n\n # Get variance explained by singular values\n self.explained_variance_ = (S ** 2) / (n_samples - 1)\n total_var = np.var(X, ddof=1, axis=0)\n self.explained_variance_ratio_ = \\\n self.explained_variance_ / total_var.sum()\n self.singular_values_ = S.copy() # Store the singular values.\n\n if self.n_components_ < min(n_features, n_samples):\n self.noise_variance_ = (total_var.sum() -\n self.explained_variance_.sum())\n self.noise_variance_ /= min(n_features, n_samples) - n_components\n else:\n self.noise_variance_ = 0.\n\n return U, S, Vt\n\n def score_samples(self, X):\n \"\"\"Return the log-likelihood of each sample.\n\n See. \"Pattern Recognition and Machine Learning\"\n by C. Bishop, 12.2.1 p. 574\n or http://www.miketipping.com/papers/met-mppca.pdf\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data.\n\n Returns\n -------\n ll : ndarray of shape (n_samples,)\n Log-likelihood of each sample under the current model.\n \"\"\"\n check_is_fitted(self)\n\n X = self._validate_data(X, dtype=[np.float64, np.float32], reset=False)\n Xr = X - self.mean_\n n_features = X.shape[1]\n precision = self.get_precision()\n log_like = -.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1)\n log_like -= .5 * (n_features * log(2. * np.pi) -\n fast_logdet(precision))\n return log_like\n\n def score(self, X, y=None):\n \"\"\"Return the average log-likelihood of all samples.\n\n See. \"Pattern Recognition and Machine Learning\"\n by C. Bishop, 12.2.1 p. 574\n or http://www.miketipping.com/papers/met-mppca.pdf\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data.\n\n y : Ignored\n\n Returns\n -------\n ll : float\n Average log-likelihood of the samples under the current model.\n \"\"\"\n return np.mean(self.score_samples(X))\n\n def _more_tags(self):\n return {'preserves_dtype': [np.float64, np.float32]}", + "instance_attributes": [ + { + "name": "copy", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "whiten", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "svd_solver", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "iterated_power", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "_fit_svd_solver", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA", + "name": "MiniBatchSparsePCA", + "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA", + "decorators": [], + "superclasses": ["SparsePCA"], + "methods": [ + "scikit-learn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/__init__", + "scikit-learn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Mini-batch Sparse Principal Components Analysis\n\nFinds the set of sparse components that can optimally reconstruct\nthe data. The amount of sparseness is controllable by the coefficient\nof the L1 penalty, given by the parameter alpha.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Mini-batch Sparse Principal Components Analysis\n\nFinds the set of sparse components that can optimally reconstruct\nthe data. The amount of sparseness is controllable by the coefficient\nof the L1 penalty, given by the parameter alpha.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=None\n number of sparse atoms to extract\n\nalpha : int, default=1\n Sparsity controlling parameter. Higher values lead to sparser\n components.\n\nridge_alpha : float, default=0.01\n Amount of ridge shrinkage to apply in order to improve\n conditioning when calling the transform method.\n\nn_iter : int, default=100\n number of iterations to perform for each mini batch\n\ncallback : callable, default=None\n callable that gets invoked every five iterations\n\nbatch_size : int, default=3\n the number of features to take in each mini batch\n\nverbose : int or bool, default=False\n Controls the verbosity; the higher, the more messages. Defaults to 0.\n\nshuffle : bool, default=True\n whether to shuffle the data before splitting it in batches\n\nn_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nmethod : {'lars', 'cd'}, default='lars'\n lars: uses the least angle regression method to solve the lasso problem\n (linear_model.lars_path)\n cd: uses the coordinate descent method to compute the\n Lasso solution (linear_model.Lasso). Lars will be faster if\n the estimated components are sparse.\n\nrandom_state : int, RandomState instance or None, default=None\n Used for random shuffling when ``shuffle`` is set to ``True``,\n during online dictionary learning. Pass an int for reproducible results\n across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Sparse components extracted from the data.\n\nn_components_ : int\n Estimated number of components.\n\n .. versionadded:: 0.23\n\nn_iter_ : int\n Number of iterations run.\n\nmean_ : ndarray of shape (n_features,)\n Per-feature empirical mean, estimated from the training set.\n Equal to ``X.mean(axis=0)``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.decomposition import MiniBatchSparsePCA\n>>> X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)\n>>> transformer = MiniBatchSparsePCA(n_components=5, batch_size=50,\n... random_state=0)\n>>> transformer.fit(X)\nMiniBatchSparsePCA(...)\n>>> X_transformed = transformer.transform(X)\n>>> X_transformed.shape\n(200, 5)\n>>> # most values in the components_ are zero (sparsity)\n>>> np.mean(transformer.components_ == 0)\n0.94\n\nSee Also\n--------\nPCA\nSparsePCA\nDictionaryLearning", + "code": "class MiniBatchSparsePCA(SparsePCA):\n \"\"\"Mini-batch Sparse Principal Components Analysis\n\n Finds the set of sparse components that can optimally reconstruct\n the data. The amount of sparseness is controllable by the coefficient\n of the L1 penalty, given by the parameter alpha.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_components : int, default=None\n number of sparse atoms to extract\n\n alpha : int, default=1\n Sparsity controlling parameter. Higher values lead to sparser\n components.\n\n ridge_alpha : float, default=0.01\n Amount of ridge shrinkage to apply in order to improve\n conditioning when calling the transform method.\n\n n_iter : int, default=100\n number of iterations to perform for each mini batch\n\n callback : callable, default=None\n callable that gets invoked every five iterations\n\n batch_size : int, default=3\n the number of features to take in each mini batch\n\n verbose : int or bool, default=False\n Controls the verbosity; the higher, the more messages. Defaults to 0.\n\n shuffle : bool, default=True\n whether to shuffle the data before splitting it in batches\n\n n_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n method : {'lars', 'cd'}, default='lars'\n lars: uses the least angle regression method to solve the lasso problem\n (linear_model.lars_path)\n cd: uses the coordinate descent method to compute the\n Lasso solution (linear_model.Lasso). Lars will be faster if\n the estimated components are sparse.\n\n random_state : int, RandomState instance or None, default=None\n Used for random shuffling when ``shuffle`` is set to ``True``,\n during online dictionary learning. Pass an int for reproducible results\n across multiple function calls.\n See :term:`Glossary `.\n\n Attributes\n ----------\n components_ : ndarray of shape (n_components, n_features)\n Sparse components extracted from the data.\n\n n_components_ : int\n Estimated number of components.\n\n .. versionadded:: 0.23\n\n n_iter_ : int\n Number of iterations run.\n\n mean_ : ndarray of shape (n_features,)\n Per-feature empirical mean, estimated from the training set.\n Equal to ``X.mean(axis=0)``.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.datasets import make_friedman1\n >>> from sklearn.decomposition import MiniBatchSparsePCA\n >>> X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)\n >>> transformer = MiniBatchSparsePCA(n_components=5, batch_size=50,\n ... random_state=0)\n >>> transformer.fit(X)\n MiniBatchSparsePCA(...)\n >>> X_transformed = transformer.transform(X)\n >>> X_transformed.shape\n (200, 5)\n >>> # most values in the components_ are zero (sparsity)\n >>> np.mean(transformer.components_ == 0)\n 0.94\n\n See Also\n --------\n PCA\n SparsePCA\n DictionaryLearning\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_components=None, *, alpha=1, ridge_alpha=0.01,\n n_iter=100, callback=None, batch_size=3, verbose=False,\n shuffle=True, n_jobs=None, method='lars', random_state=None):\n super().__init__(\n n_components=n_components, alpha=alpha, verbose=verbose,\n ridge_alpha=ridge_alpha, n_jobs=n_jobs, method=method,\n random_state=random_state)\n self.n_iter = n_iter\n self.callback = callback\n self.batch_size = batch_size\n self.shuffle = shuffle\n\n def fit(self, X, y=None):\n \"\"\"Fit the model from data in X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\n y : Ignored\n\n Returns\n -------\n self : object\n Returns the instance itself.\n \"\"\"\n random_state = check_random_state(self.random_state)\n X = self._validate_data(X)\n\n self.mean_ = X.mean(axis=0)\n X = X - self.mean_\n\n if self.n_components is None:\n n_components = X.shape[1]\n else:\n n_components = self.n_components\n Vt, _, self.n_iter_ = dict_learning_online(\n X.T, n_components, alpha=self.alpha,\n n_iter=self.n_iter, return_code=True,\n dict_init=None, verbose=self.verbose,\n callback=self.callback,\n batch_size=self.batch_size,\n shuffle=self.shuffle,\n n_jobs=self.n_jobs, method=self.method,\n random_state=random_state,\n return_n_iter=True)\n self.components_ = Vt.T\n\n components_norm = np.linalg.norm(\n self.components_, axis=1)[:, np.newaxis]\n components_norm[components_norm == 0] = 1\n self.components_ /= components_norm\n self.n_components_ = len(self.components_)\n\n return self", + "instance_attributes": [ + { + "name": "n_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "batch_size", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "shuffle", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA", + "name": "SparsePCA", + "qname": "sklearn.decomposition._sparse_pca.SparsePCA", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA/__init__", + "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA/fit", + "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA/transform" + ], + "is_public": false, + "reexported_by": [], + "description": "Sparse Principal Components Analysis (SparsePCA).\n\nFinds the set of sparse components that can optimally reconstruct\nthe data. The amount of sparseness is controllable by the coefficient\nof the L1 penalty, given by the parameter alpha.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Sparse Principal Components Analysis (SparsePCA).\n\nFinds the set of sparse components that can optimally reconstruct\nthe data. The amount of sparseness is controllable by the coefficient\nof the L1 penalty, given by the parameter alpha.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=None\n Number of sparse atoms to extract.\n\nalpha : float, default=1\n Sparsity controlling parameter. Higher values lead to sparser\n components.\n\nridge_alpha : float, default=0.01\n Amount of ridge shrinkage to apply in order to improve\n conditioning when calling the transform method.\n\nmax_iter : int, default=1000\n Maximum number of iterations to perform.\n\ntol : float, default=1e-8\n Tolerance for the stopping condition.\n\nmethod : {'lars', 'cd'}, default='lars'\n lars: uses the least angle regression method to solve the lasso problem\n (linear_model.lars_path)\n cd: uses the coordinate descent method to compute the\n Lasso solution (linear_model.Lasso). Lars will be faster if\n the estimated components are sparse.\n\nn_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nU_init : ndarray of shape (n_samples, n_components), default=None\n Initial values for the loadings for warm restart scenarios. Only used\n if `U_init` and `V_init` are not None.\n\nV_init : ndarray of shape (n_components, n_features), default=None\n Initial values for the components for warm restart scenarios. Only used\n if `U_init` and `V_init` are not None.\n\nverbose : int or bool, default=False\n Controls the verbosity; the higher, the more messages. Defaults to 0.\n\nrandom_state : int, RandomState instance or None, default=None\n Used during dictionary learning. Pass an int for reproducible results\n across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Sparse components extracted from the data.\n\nerror_ : ndarray\n Vector of errors at each iteration.\n\nn_components_ : int\n Estimated number of components.\n\n .. versionadded:: 0.23\n\nn_iter_ : int\n Number of iterations run.\n\nmean_ : ndarray of shape (n_features,)\n Per-feature empirical mean, estimated from the training set.\n Equal to ``X.mean(axis=0)``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.decomposition import SparsePCA\n>>> X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)\n>>> transformer = SparsePCA(n_components=5, random_state=0)\n>>> transformer.fit(X)\nSparsePCA(...)\n>>> X_transformed = transformer.transform(X)\n>>> X_transformed.shape\n(200, 5)\n>>> # most values in the components_ are zero (sparsity)\n>>> np.mean(transformer.components_ == 0)\n0.9666...\n\nSee Also\n--------\nPCA\nMiniBatchSparsePCA\nDictionaryLearning", + "code": "class SparsePCA(TransformerMixin, BaseEstimator):\n \"\"\"Sparse Principal Components Analysis (SparsePCA).\n\n Finds the set of sparse components that can optimally reconstruct\n the data. The amount of sparseness is controllable by the coefficient\n of the L1 penalty, given by the parameter alpha.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_components : int, default=None\n Number of sparse atoms to extract.\n\n alpha : float, default=1\n Sparsity controlling parameter. Higher values lead to sparser\n components.\n\n ridge_alpha : float, default=0.01\n Amount of ridge shrinkage to apply in order to improve\n conditioning when calling the transform method.\n\n max_iter : int, default=1000\n Maximum number of iterations to perform.\n\n tol : float, default=1e-8\n Tolerance for the stopping condition.\n\n method : {'lars', 'cd'}, default='lars'\n lars: uses the least angle regression method to solve the lasso problem\n (linear_model.lars_path)\n cd: uses the coordinate descent method to compute the\n Lasso solution (linear_model.Lasso). Lars will be faster if\n the estimated components are sparse.\n\n n_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n U_init : ndarray of shape (n_samples, n_components), default=None\n Initial values for the loadings for warm restart scenarios. Only used\n if `U_init` and `V_init` are not None.\n\n V_init : ndarray of shape (n_components, n_features), default=None\n Initial values for the components for warm restart scenarios. Only used\n if `U_init` and `V_init` are not None.\n\n verbose : int or bool, default=False\n Controls the verbosity; the higher, the more messages. Defaults to 0.\n\n random_state : int, RandomState instance or None, default=None\n Used during dictionary learning. Pass an int for reproducible results\n across multiple function calls.\n See :term:`Glossary `.\n\n Attributes\n ----------\n components_ : ndarray of shape (n_components, n_features)\n Sparse components extracted from the data.\n\n error_ : ndarray\n Vector of errors at each iteration.\n\n n_components_ : int\n Estimated number of components.\n\n .. versionadded:: 0.23\n\n n_iter_ : int\n Number of iterations run.\n\n mean_ : ndarray of shape (n_features,)\n Per-feature empirical mean, estimated from the training set.\n Equal to ``X.mean(axis=0)``.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.datasets import make_friedman1\n >>> from sklearn.decomposition import SparsePCA\n >>> X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)\n >>> transformer = SparsePCA(n_components=5, random_state=0)\n >>> transformer.fit(X)\n SparsePCA(...)\n >>> X_transformed = transformer.transform(X)\n >>> X_transformed.shape\n (200, 5)\n >>> # most values in the components_ are zero (sparsity)\n >>> np.mean(transformer.components_ == 0)\n 0.9666...\n\n See Also\n --------\n PCA\n MiniBatchSparsePCA\n DictionaryLearning\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_components=None, *, alpha=1, ridge_alpha=0.01,\n max_iter=1000, tol=1e-8, method='lars', n_jobs=None,\n U_init=None, V_init=None, verbose=False, random_state=None):\n self.n_components = n_components\n self.alpha = alpha\n self.ridge_alpha = ridge_alpha\n self.max_iter = max_iter\n self.tol = tol\n self.method = method\n self.n_jobs = n_jobs\n self.U_init = U_init\n self.V_init = V_init\n self.verbose = verbose\n self.random_state = random_state\n\n def fit(self, X, y=None):\n \"\"\"Fit the model from data in X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\n y : Ignored\n\n Returns\n -------\n self : object\n Returns the instance itself.\n \"\"\"\n random_state = check_random_state(self.random_state)\n X = self._validate_data(X)\n\n self.mean_ = X.mean(axis=0)\n X = X - self.mean_\n\n if self.n_components is None:\n n_components = X.shape[1]\n else:\n n_components = self.n_components\n code_init = self.V_init.T if self.V_init is not None else None\n dict_init = self.U_init.T if self.U_init is not None else None\n Vt, _, E, self.n_iter_ = dict_learning(X.T, n_components,\n alpha=self.alpha,\n tol=self.tol,\n max_iter=self.max_iter,\n method=self.method,\n n_jobs=self.n_jobs,\n verbose=self.verbose,\n random_state=random_state,\n code_init=code_init,\n dict_init=dict_init,\n return_n_iter=True)\n self.components_ = Vt.T\n components_norm = np.linalg.norm(\n self.components_, axis=1)[:, np.newaxis]\n components_norm[components_norm == 0] = 1\n self.components_ /= components_norm\n self.n_components_ = len(self.components_)\n\n self.error_ = E\n return self\n\n def transform(self, X):\n \"\"\"Least Squares projection of the data onto the sparse components.\n\n To avoid instability issues in case the system is under-determined,\n regularization can be applied (Ridge regression) via the\n `ridge_alpha` parameter.\n\n Note that Sparse PCA components orthogonality is not enforced as in PCA\n hence one cannot use a simple linear projection.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Test data to be transformed, must have the same number of\n features as the data used to train the model.\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n Transformed data.\n \"\"\"\n check_is_fitted(self)\n\n X = self._validate_data(X, reset=False)\n X = X - self.mean_\n\n U = ridge_regression(self.components_.T, X.T, self.ridge_alpha,\n solver='cholesky')\n\n return U", + "instance_attributes": [ + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "ridge_alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "method", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "components_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "error_", + "types": { + "kind": "NamedType", + "name": "list" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD", + "name": "TruncatedSVD", + "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/__init__", + "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/fit", + "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/fit_transform", + "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/transform", + "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/inverse_transform", + "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Dimensionality reduction using truncated SVD (aka LSA).\n\nThis transformer performs linear dimensionality reduction by means of\ntruncated singular value decomposition (SVD). Contrary to PCA, this\nestimator does not center the data before computing the singular value\ndecomposition. This means it can work with sparse matrices\nefficiently.\n\nIn particular, truncated SVD works on term count/tf-idf matrices as\nreturned by the vectorizers in :mod:`sklearn.feature_extraction.text`. In\nthat context, it is known as latent semantic analysis (LSA).\n\nThis estimator supports two algorithms: a fast randomized SVD solver, and\na \"naive\" algorithm that uses ARPACK as an eigensolver on `X * X.T` or\n`X.T * X`, whichever is more efficient.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Dimensionality reduction using truncated SVD (aka LSA).\n\nThis transformer performs linear dimensionality reduction by means of\ntruncated singular value decomposition (SVD). Contrary to PCA, this\nestimator does not center the data before computing the singular value\ndecomposition. This means it can work with sparse matrices\nefficiently.\n\nIn particular, truncated SVD works on term count/tf-idf matrices as\nreturned by the vectorizers in :mod:`sklearn.feature_extraction.text`. In\nthat context, it is known as latent semantic analysis (LSA).\n\nThis estimator supports two algorithms: a fast randomized SVD solver, and\na \"naive\" algorithm that uses ARPACK as an eigensolver on `X * X.T` or\n`X.T * X`, whichever is more efficient.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=2\n Desired dimensionality of output data.\n Must be strictly less than the number of features.\n The default value is useful for visualisation. For LSA, a value of\n 100 is recommended.\n\nalgorithm : {'arpack', 'randomized'}, default='randomized'\n SVD solver to use. Either \"arpack\" for the ARPACK wrapper in SciPy\n (scipy.sparse.linalg.svds), or \"randomized\" for the randomized\n algorithm due to Halko (2009).\n\nn_iter : int, default=5\n Number of iterations for randomized SVD solver. Not used by ARPACK. The\n default is larger than the default in\n :func:`~sklearn.utils.extmath.randomized_svd` to handle sparse\n matrices that may have large slowly decaying spectrum.\n\nrandom_state : int, RandomState instance or None, default=None\n Used during randomized svd. Pass an int for reproducible results across\n multiple function calls.\n See :term:`Glossary `.\n\ntol : float, default=0.\n Tolerance for ARPACK. 0 means machine precision. Ignored by randomized\n SVD solver.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n\nexplained_variance_ : ndarray of shape (n_components,)\n The variance of the training samples transformed by a projection to\n each component.\n\nexplained_variance_ratio_ : ndarray of shape (n_components,)\n Percentage of variance explained by each of the selected components.\n\nsingular_values_ : ndarray od shape (n_components,)\n The singular values corresponding to each of the selected components.\n The singular values are equal to the 2-norms of the ``n_components``\n variables in the lower-dimensional space.\n\nExamples\n--------\n>>> from sklearn.decomposition import TruncatedSVD\n>>> from scipy.sparse import random as sparse_random\n>>> X = sparse_random(100, 100, density=0.01, format='csr',\n... random_state=42)\n>>> svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)\n>>> svd.fit(X)\nTruncatedSVD(n_components=5, n_iter=7, random_state=42)\n>>> print(svd.explained_variance_ratio_)\n[0.0646... 0.0633... 0.0639... 0.0535... 0.0406...]\n>>> print(svd.explained_variance_ratio_.sum())\n0.286...\n>>> print(svd.singular_values_)\n[1.553... 1.512... 1.510... 1.370... 1.199...]\n\nSee Also\n--------\nPCA\n\nReferences\n----------\nFinding structure with randomness: Stochastic algorithms for constructing\napproximate matrix decompositions\nHalko, et al., 2009 (arXiv:909) https://arxiv.org/pdf/0909.4061.pdf\n\nNotes\n-----\nSVD suffers from a problem called \"sign indeterminacy\", which means the\nsign of the ``components_`` and the output from transform depend on the\nalgorithm and random state. To work around this, fit instances of this\nclass to data once, then keep the instance around to do transformations.", + "code": "class TruncatedSVD(TransformerMixin, BaseEstimator):\n \"\"\"Dimensionality reduction using truncated SVD (aka LSA).\n\n This transformer performs linear dimensionality reduction by means of\n truncated singular value decomposition (SVD). Contrary to PCA, this\n estimator does not center the data before computing the singular value\n decomposition. This means it can work with sparse matrices\n efficiently.\n\n In particular, truncated SVD works on term count/tf-idf matrices as\n returned by the vectorizers in :mod:`sklearn.feature_extraction.text`. In\n that context, it is known as latent semantic analysis (LSA).\n\n This estimator supports two algorithms: a fast randomized SVD solver, and\n a \"naive\" algorithm that uses ARPACK as an eigensolver on `X * X.T` or\n `X.T * X`, whichever is more efficient.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_components : int, default=2\n Desired dimensionality of output data.\n Must be strictly less than the number of features.\n The default value is useful for visualisation. For LSA, a value of\n 100 is recommended.\n\n algorithm : {'arpack', 'randomized'}, default='randomized'\n SVD solver to use. Either \"arpack\" for the ARPACK wrapper in SciPy\n (scipy.sparse.linalg.svds), or \"randomized\" for the randomized\n algorithm due to Halko (2009).\n\n n_iter : int, default=5\n Number of iterations for randomized SVD solver. Not used by ARPACK. The\n default is larger than the default in\n :func:`~sklearn.utils.extmath.randomized_svd` to handle sparse\n matrices that may have large slowly decaying spectrum.\n\n random_state : int, RandomState instance or None, default=None\n Used during randomized svd. Pass an int for reproducible results across\n multiple function calls.\n See :term:`Glossary `.\n\n tol : float, default=0.\n Tolerance for ARPACK. 0 means machine precision. Ignored by randomized\n SVD solver.\n\n Attributes\n ----------\n components_ : ndarray of shape (n_components, n_features)\n\n explained_variance_ : ndarray of shape (n_components,)\n The variance of the training samples transformed by a projection to\n each component.\n\n explained_variance_ratio_ : ndarray of shape (n_components,)\n Percentage of variance explained by each of the selected components.\n\n singular_values_ : ndarray od shape (n_components,)\n The singular values corresponding to each of the selected components.\n The singular values are equal to the 2-norms of the ``n_components``\n variables in the lower-dimensional space.\n\n Examples\n --------\n >>> from sklearn.decomposition import TruncatedSVD\n >>> from scipy.sparse import random as sparse_random\n >>> X = sparse_random(100, 100, density=0.01, format='csr',\n ... random_state=42)\n >>> svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)\n >>> svd.fit(X)\n TruncatedSVD(n_components=5, n_iter=7, random_state=42)\n >>> print(svd.explained_variance_ratio_)\n [0.0646... 0.0633... 0.0639... 0.0535... 0.0406...]\n >>> print(svd.explained_variance_ratio_.sum())\n 0.286...\n >>> print(svd.singular_values_)\n [1.553... 1.512... 1.510... 1.370... 1.199...]\n\n See Also\n --------\n PCA\n\n References\n ----------\n Finding structure with randomness: Stochastic algorithms for constructing\n approximate matrix decompositions\n Halko, et al., 2009 (arXiv:909) https://arxiv.org/pdf/0909.4061.pdf\n\n Notes\n -----\n SVD suffers from a problem called \"sign indeterminacy\", which means the\n sign of the ``components_`` and the output from transform depend on the\n algorithm and random state. To work around this, fit instances of this\n class to data once, then keep the instance around to do transformations.\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_components=2, *, algorithm=\"randomized\", n_iter=5,\n random_state=None, tol=0.):\n self.algorithm = algorithm\n self.n_components = n_components\n self.n_iter = n_iter\n self.random_state = random_state\n self.tol = tol\n\n def fit(self, X, y=None):\n \"\"\"Fit model on training data X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\n y : Ignored\n\n Returns\n -------\n self : object\n Returns the transformer object.\n \"\"\"\n self.fit_transform(X)\n return self\n\n def fit_transform(self, X, y=None):\n \"\"\"Fit model to X and perform dimensionality reduction on X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\n y : Ignored\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n Reduced version of X. This will always be a dense array.\n \"\"\"\n X = self._validate_data(X, accept_sparse=['csr', 'csc'],\n ensure_min_features=2)\n random_state = check_random_state(self.random_state)\n\n if self.algorithm == \"arpack\":\n v0 = _init_arpack_v0(min(X.shape), random_state)\n U, Sigma, VT = svds(X, k=self.n_components, tol=self.tol, v0=v0)\n # svds doesn't abide by scipy.linalg.svd/randomized_svd\n # conventions, so reverse its outputs.\n Sigma = Sigma[::-1]\n U, VT = svd_flip(U[:, ::-1], VT[::-1])\n\n elif self.algorithm == \"randomized\":\n k = self.n_components\n n_features = X.shape[1]\n if k >= n_features:\n raise ValueError(\"n_components must be < n_features;\"\n \" got %d >= %d\" % (k, n_features))\n U, Sigma, VT = randomized_svd(X, self.n_components,\n n_iter=self.n_iter,\n random_state=random_state)\n else:\n raise ValueError(\"unknown algorithm %r\" % self.algorithm)\n\n self.components_ = VT\n\n # As a result of the SVD approximation error on X ~ U @ Sigma @ V.T,\n # X @ V is not the same as U @ Sigma\n if self.algorithm == \"randomized\" or \\\n (self.algorithm == \"arpack\" and self.tol > 0):\n X_transformed = safe_sparse_dot(X, self.components_.T)\n else:\n X_transformed = U * Sigma\n\n # Calculate explained variance & explained variance ratio\n self.explained_variance_ = exp_var = np.var(X_transformed, axis=0)\n if sp.issparse(X):\n _, full_var = mean_variance_axis(X, axis=0)\n full_var = full_var.sum()\n else:\n full_var = np.var(X, axis=0).sum()\n self.explained_variance_ratio_ = exp_var / full_var\n self.singular_values_ = Sigma # Store the singular values.\n\n return X_transformed\n\n def transform(self, X):\n \"\"\"Perform dimensionality reduction on X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data.\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n Reduced version of X. This will always be a dense array.\n \"\"\"\n check_is_fitted(self)\n X = self._validate_data(X, accept_sparse=['csr', 'csc'], reset=False)\n return safe_sparse_dot(X, self.components_.T)\n\n def inverse_transform(self, X):\n \"\"\"Transform X back to its original space.\n\n Returns an array X_original whose transform would be X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_components)\n New data.\n\n Returns\n -------\n X_original : ndarray of shape (n_samples, n_features)\n Note that this is always a dense array.\n \"\"\"\n X = check_array(X)\n return np.dot(X, self.components_)\n\n def _more_tags(self):\n return {'preserves_dtype': [np.float64, np.float32]}", + "instance_attributes": [ + { + "name": "algorithm", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "n_components", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "n_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis", + "name": "LinearDiscriminantAnalysis", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis", + "decorators": [], + "superclasses": ["LinearClassifierMixin", "TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/__init__", + "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_lsqr", + "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_eigen", + "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_svd", + "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/fit", + "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/transform", + "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/predict_proba", + "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/predict_log_proba", + "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/decision_function" + ], + "is_public": true, + "reexported_by": [], + "description": "Linear Discriminant Analysis\n\nA classifier with a linear decision boundary, generated by fitting class\nconditional densities to the data and using Bayes' rule.\n\nThe model fits a Gaussian density to each class, assuming that all classes\nshare the same covariance matrix.\n\nThe fitted model can also be used to reduce the dimensionality of the input\nby projecting it to the most discriminative directions, using the\n`transform` method.\n\n.. versionadded:: 0.17\n *LinearDiscriminantAnalysis*.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Linear Discriminant Analysis\n\nA classifier with a linear decision boundary, generated by fitting class\nconditional densities to the data and using Bayes' rule.\n\nThe model fits a Gaussian density to each class, assuming that all classes\nshare the same covariance matrix.\n\nThe fitted model can also be used to reduce the dimensionality of the input\nby projecting it to the most discriminative directions, using the\n`transform` method.\n\n.. versionadded:: 0.17\n *LinearDiscriminantAnalysis*.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nsolver : {'svd', 'lsqr', 'eigen'}, default='svd'\n Solver to use, possible values:\n - 'svd': Singular value decomposition (default).\n Does not compute the covariance matrix, therefore this solver is\n recommended for data with a large number of features.\n - 'lsqr': Least squares solution.\n Can be combined with shrinkage or custom covariance estimator.\n - 'eigen': Eigenvalue decomposition.\n Can be combined with shrinkage or custom covariance estimator.\n\nshrinkage : 'auto' or float, default=None\n Shrinkage parameter, possible values:\n - None: no shrinkage (default).\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\n This should be left to None if `covariance_estimator` is used.\n Note that shrinkage works only with 'lsqr' and 'eigen' solvers.\n\npriors : array-like of shape (n_classes,), default=None\n The class prior probabilities. By default, the class proportions are\n inferred from the training data.\n\nn_components : int, default=None\n Number of components (<= min(n_classes - 1, n_features)) for\n dimensionality reduction. If None, will be set to\n min(n_classes - 1, n_features). This parameter only affects the\n `transform` method.\n\nstore_covariance : bool, default=False\n If True, explicitely compute the weighted within-class covariance\n matrix when solver is 'svd'. The matrix is always computed\n and stored for the other solvers.\n\n .. versionadded:: 0.17\n\ntol : float, default=1.0e-4\n Absolute threshold for a singular value of X to be considered\n significant, used to estimate the rank of X. Dimensions whose\n singular values are non-significant are discarded. Only used if\n solver is 'svd'.\n\n .. versionadded:: 0.17\n\ncovariance_estimator : covariance estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying on the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in :mod:`sklearn.covariance`.\n if None the shrinkage parameter drives the estimate.\n\n This should be left to None if `shrinkage` is used.\n Note that `covariance_estimator` works only with 'lsqr' and 'eigen'\n solvers.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_classes, n_features)\n Weight vector(s).\n\nintercept_ : ndarray of shape (n_classes,)\n Intercept term.\n\ncovariance_ : array-like of shape (n_features, n_features)\n Weighted within-class covariance matrix. It corresponds to\n `sum_k prior_k * C_k` where `C_k` is the covariance matrix of the\n samples in class `k`. The `C_k` are estimated using the (potentially\n shrunk) biased estimator of covariance. If solver is 'svd', only\n exists when `store_covariance` is True.\n\nexplained_variance_ratio_ : ndarray of shape (n_components,)\n Percentage of variance explained by each of the selected components.\n If ``n_components`` is not set then all components are stored and the\n sum of explained variances is equal to 1.0. Only available when eigen\n or svd solver is used.\n\nmeans_ : array-like of shape (n_classes, n_features)\n Class-wise means.\n\npriors_ : array-like of shape (n_classes,)\n Class priors (sum to 1).\n\nscalings_ : array-like of shape (rank, n_classes - 1)\n Scaling of the features in the space spanned by the class centroids.\n Only available for 'svd' and 'eigen' solvers.\n\nxbar_ : array-like of shape (n_features,)\n Overall mean. Only present if solver is 'svd'.\n\nclasses_ : array-like of shape (n_classes,)\n Unique class labels.\n\nSee Also\n--------\nQuadraticDiscriminantAnalysis : Quadratic Discriminant Analysis.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> y = np.array([1, 1, 1, 2, 2, 2])\n>>> clf = LinearDiscriminantAnalysis()\n>>> clf.fit(X, y)\nLinearDiscriminantAnalysis()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]", + "code": "class LinearDiscriminantAnalysis(LinearClassifierMixin,\n TransformerMixin,\n BaseEstimator):\n \"\"\"Linear Discriminant Analysis\n\n A classifier with a linear decision boundary, generated by fitting class\n conditional densities to the data and using Bayes' rule.\n\n The model fits a Gaussian density to each class, assuming that all classes\n share the same covariance matrix.\n\n The fitted model can also be used to reduce the dimensionality of the input\n by projecting it to the most discriminative directions, using the\n `transform` method.\n\n .. versionadded:: 0.17\n *LinearDiscriminantAnalysis*.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n solver : {'svd', 'lsqr', 'eigen'}, default='svd'\n Solver to use, possible values:\n - 'svd': Singular value decomposition (default).\n Does not compute the covariance matrix, therefore this solver is\n recommended for data with a large number of features.\n - 'lsqr': Least squares solution.\n Can be combined with shrinkage or custom covariance estimator.\n - 'eigen': Eigenvalue decomposition.\n Can be combined with shrinkage or custom covariance estimator.\n\n shrinkage : 'auto' or float, default=None\n Shrinkage parameter, possible values:\n - None: no shrinkage (default).\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\n This should be left to None if `covariance_estimator` is used.\n Note that shrinkage works only with 'lsqr' and 'eigen' solvers.\n\n priors : array-like of shape (n_classes,), default=None\n The class prior probabilities. By default, the class proportions are\n inferred from the training data.\n\n n_components : int, default=None\n Number of components (<= min(n_classes - 1, n_features)) for\n dimensionality reduction. If None, will be set to\n min(n_classes - 1, n_features). This parameter only affects the\n `transform` method.\n\n store_covariance : bool, default=False\n If True, explicitely compute the weighted within-class covariance\n matrix when solver is 'svd'. The matrix is always computed\n and stored for the other solvers.\n\n .. versionadded:: 0.17\n\n tol : float, default=1.0e-4\n Absolute threshold for a singular value of X to be considered\n significant, used to estimate the rank of X. Dimensions whose\n singular values are non-significant are discarded. Only used if\n solver is 'svd'.\n\n .. versionadded:: 0.17\n\n covariance_estimator : covariance estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying on the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in :mod:`sklearn.covariance`.\n if None the shrinkage parameter drives the estimate.\n\n This should be left to None if `shrinkage` is used.\n Note that `covariance_estimator` works only with 'lsqr' and 'eigen'\n solvers.\n\n .. versionadded:: 0.24\n\n Attributes\n ----------\n coef_ : ndarray of shape (n_features,) or (n_classes, n_features)\n Weight vector(s).\n\n intercept_ : ndarray of shape (n_classes,)\n Intercept term.\n\n covariance_ : array-like of shape (n_features, n_features)\n Weighted within-class covariance matrix. It corresponds to\n `sum_k prior_k * C_k` where `C_k` is the covariance matrix of the\n samples in class `k`. The `C_k` are estimated using the (potentially\n shrunk) biased estimator of covariance. If solver is 'svd', only\n exists when `store_covariance` is True.\n\n explained_variance_ratio_ : ndarray of shape (n_components,)\n Percentage of variance explained by each of the selected components.\n If ``n_components`` is not set then all components are stored and the\n sum of explained variances is equal to 1.0. Only available when eigen\n or svd solver is used.\n\n means_ : array-like of shape (n_classes, n_features)\n Class-wise means.\n\n priors_ : array-like of shape (n_classes,)\n Class priors (sum to 1).\n\n scalings_ : array-like of shape (rank, n_classes - 1)\n Scaling of the features in the space spanned by the class centroids.\n Only available for 'svd' and 'eigen' solvers.\n\n xbar_ : array-like of shape (n_features,)\n Overall mean. Only present if solver is 'svd'.\n\n classes_ : array-like of shape (n_classes,)\n Unique class labels.\n\n See Also\n --------\n QuadraticDiscriminantAnalysis : Quadratic Discriminant Analysis.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n >>> y = np.array([1, 1, 1, 2, 2, 2])\n >>> clf = LinearDiscriminantAnalysis()\n >>> clf.fit(X, y)\n LinearDiscriminantAnalysis()\n >>> print(clf.predict([[-0.8, -1]]))\n [1]\n \"\"\"\n\n def __init__(self, solver='svd', shrinkage=None, priors=None,\n n_components=None, store_covariance=False, tol=1e-4,\n covariance_estimator=None):\n self.solver = solver\n self.shrinkage = shrinkage\n self.priors = priors\n self.n_components = n_components\n self.store_covariance = store_covariance # used only in svd solver\n self.tol = tol # used only in svd solver\n self.covariance_estimator = covariance_estimator\n\n def _solve_lsqr(self, X, y, shrinkage, covariance_estimator):\n \"\"\"Least squares solver.\n\n The least squares solver computes a straightforward solution of the\n optimal decision rule based directly on the discriminant functions. It\n can only be used for classification (with any covariance estimator),\n because\n estimation of eigenvectors is not performed. Therefore, dimensionality\n reduction with the transform is not supported.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_classes)\n Target values.\n\n shrinkage : 'auto', float or None\n Shrinkage parameter, possible values:\n - None: no shrinkage.\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\n Shrinkage parameter is ignored if `covariance_estimator` i\n not None\n\n covariance_estimator : estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in sklearn.covariance.\n if None the shrinkage parameter drives the estimate.\n\n .. versionadded:: 0.24\n\n Notes\n -----\n This solver is based on [1]_, section 2.6.2, pp. 39-41.\n\n References\n ----------\n .. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification\n (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN\n 0-471-05669-3.\n \"\"\"\n self.means_ = _class_means(X, y)\n self.covariance_ = _class_cov(X, y, self.priors_, shrinkage,\n covariance_estimator)\n self.coef_ = linalg.lstsq(self.covariance_, self.means_.T)[0].T\n self.intercept_ = (-0.5 * np.diag(np.dot(self.means_, self.coef_.T)) +\n np.log(self.priors_))\n\n def _solve_eigen(self, X, y, shrinkage,\n covariance_estimator):\n \"\"\"Eigenvalue solver.\n\n The eigenvalue solver computes the optimal solution of the Rayleigh\n coefficient (basically the ratio of between class scatter to within\n class scatter). This solver supports both classification and\n dimensionality reduction (with any covariance estimator).\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\n shrinkage : 'auto', float or None\n Shrinkage parameter, possible values:\n - None: no shrinkage.\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage constant.\n\n Shrinkage parameter is ignored if `covariance_estimator` i\n not None\n\n covariance_estimator : estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in sklearn.covariance.\n if None the shrinkage parameter drives the estimate.\n\n .. versionadded:: 0.24\n\n Notes\n -----\n This solver is based on [1]_, section 3.8.3, pp. 121-124.\n\n References\n ----------\n .. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification\n (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN\n 0-471-05669-3.\n \"\"\"\n self.means_ = _class_means(X, y)\n self.covariance_ = _class_cov(X, y, self.priors_, shrinkage,\n covariance_estimator)\n\n Sw = self.covariance_ # within scatter\n St = _cov(X, shrinkage, covariance_estimator) # total scatter\n Sb = St - Sw # between scatter\n\n evals, evecs = linalg.eigh(Sb, Sw)\n self.explained_variance_ratio_ = np.sort(evals / np.sum(evals)\n )[::-1][:self._max_components]\n evecs = evecs[:, np.argsort(evals)[::-1]] # sort eigenvectors\n\n self.scalings_ = evecs\n self.coef_ = np.dot(self.means_, evecs).dot(evecs.T)\n self.intercept_ = (-0.5 * np.diag(np.dot(self.means_, self.coef_.T)) +\n np.log(self.priors_))\n\n def _solve_svd(self, X, y):\n \"\"\"SVD solver.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n \"\"\"\n n_samples, n_features = X.shape\n n_classes = len(self.classes_)\n\n self.means_ = _class_means(X, y)\n if self.store_covariance:\n self.covariance_ = _class_cov(X, y, self.priors_)\n\n Xc = []\n for idx, group in enumerate(self.classes_):\n Xg = X[y == group, :]\n Xc.append(Xg - self.means_[idx])\n\n self.xbar_ = np.dot(self.priors_, self.means_)\n\n Xc = np.concatenate(Xc, axis=0)\n\n # 1) within (univariate) scaling by with classes std-dev\n std = Xc.std(axis=0)\n # avoid division by zero in normalization\n std[std == 0] = 1.\n fac = 1. / (n_samples - n_classes)\n\n # 2) Within variance scaling\n X = np.sqrt(fac) * (Xc / std)\n # SVD of centered (within)scaled data\n U, S, Vt = linalg.svd(X, full_matrices=False)\n\n rank = np.sum(S > self.tol)\n # Scaling of within covariance is: V' 1/S\n scalings = (Vt[:rank] / std).T / S[:rank]\n\n # 3) Between variance scaling\n # Scale weighted centers\n X = np.dot(((np.sqrt((n_samples * self.priors_) * fac)) *\n (self.means_ - self.xbar_).T).T, scalings)\n # Centers are living in a space with n_classes-1 dim (maximum)\n # Use SVD to find projection in the space spanned by the\n # (n_classes) centers\n _, S, Vt = linalg.svd(X, full_matrices=0)\n\n self.explained_variance_ratio_ = (S**2 / np.sum(\n S**2))[:self._max_components]\n rank = np.sum(S > self.tol * S[0])\n self.scalings_ = np.dot(scalings, Vt.T[:, :rank])\n coef = np.dot(self.means_ - self.xbar_, self.scalings_)\n self.intercept_ = (-0.5 * np.sum(coef ** 2, axis=1) +\n np.log(self.priors_))\n self.coef_ = np.dot(coef, self.scalings_.T)\n self.intercept_ -= np.dot(self.xbar_, self.coef_.T)\n\n def fit(self, X, y):\n \"\"\"Fit LinearDiscriminantAnalysis model according to the given\n training data and parameters.\n\n .. versionchanged:: 0.19\n *store_covariance* has been moved to main constructor.\n\n .. versionchanged:: 0.19\n *tol* has been moved to main constructor.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,)\n Target values.\n \"\"\"\n X, y = self._validate_data(X, y, ensure_min_samples=2, estimator=self,\n dtype=[np.float64, np.float32])\n self.classes_ = unique_labels(y)\n n_samples, _ = X.shape\n n_classes = len(self.classes_)\n\n if n_samples == n_classes:\n raise ValueError(\"The number of samples must be more \"\n \"than the number of classes.\")\n\n if self.priors is None: # estimate priors from sample\n _, y_t = np.unique(y, return_inverse=True) # non-negative ints\n self.priors_ = np.bincount(y_t) / float(len(y))\n else:\n self.priors_ = np.asarray(self.priors)\n\n if (self.priors_ < 0).any():\n raise ValueError(\"priors must be non-negative\")\n if not np.isclose(self.priors_.sum(), 1.0):\n warnings.warn(\"The priors do not sum to 1. Renormalizing\",\n UserWarning)\n self.priors_ = self.priors_ / self.priors_.sum()\n\n # Maximum number of components no matter what n_components is\n # specified:\n max_components = min(len(self.classes_) - 1, X.shape[1])\n\n if self.n_components is None:\n self._max_components = max_components\n else:\n if self.n_components > max_components:\n raise ValueError(\n \"n_components cannot be larger than min(n_features, \"\n \"n_classes - 1).\"\n )\n self._max_components = self.n_components\n\n if self.solver == 'svd':\n if self.shrinkage is not None:\n raise NotImplementedError('shrinkage not supported')\n if self.covariance_estimator is not None:\n raise ValueError(\n 'covariance estimator '\n 'is not supported '\n 'with svd solver. Try another solver')\n self._solve_svd(X, y)\n elif self.solver == 'lsqr':\n self._solve_lsqr(X, y, shrinkage=self.shrinkage,\n covariance_estimator=self.covariance_estimator)\n elif self.solver == 'eigen':\n self._solve_eigen(X, y,\n shrinkage=self.shrinkage,\n covariance_estimator=self.covariance_estimator)\n else:\n raise ValueError(\"unknown solver {} (valid solvers are 'svd', \"\n \"'lsqr', and 'eigen').\".format(self.solver))\n if self.classes_.size == 2: # treat binary case as a special case\n self.coef_ = np.array(self.coef_[1, :] - self.coef_[0, :], ndmin=2,\n dtype=X.dtype)\n self.intercept_ = np.array(self.intercept_[1] - self.intercept_[0],\n ndmin=1, dtype=X.dtype)\n return self\n\n def transform(self, X):\n \"\"\"Project data to maximize class separation.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Input data.\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n Transformed data.\n \"\"\"\n if self.solver == 'lsqr':\n raise NotImplementedError(\"transform not implemented for 'lsqr' \"\n \"solver (use 'svd' or 'eigen').\")\n check_is_fitted(self)\n\n X = check_array(X)\n if self.solver == 'svd':\n X_new = np.dot(X - self.xbar_, self.scalings_)\n elif self.solver == 'eigen':\n X_new = np.dot(X, self.scalings_)\n\n return X_new[:, :self._max_components]\n\n def predict_proba(self, X):\n \"\"\"Estimate probability.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Input data.\n\n Returns\n -------\n C : ndarray of shape (n_samples, n_classes)\n Estimated probabilities.\n \"\"\"\n check_is_fitted(self)\n\n decision = self.decision_function(X)\n if self.classes_.size == 2:\n proba = expit(decision)\n return np.vstack([1-proba, proba]).T\n else:\n return softmax(decision)\n\n def predict_log_proba(self, X):\n \"\"\"Estimate log probability.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Input data.\n\n Returns\n -------\n C : ndarray of shape (n_samples, n_classes)\n Estimated log probabilities.\n \"\"\"\n prediction = self.predict_proba(X)\n prediction[prediction == 0.0] += np.finfo(prediction.dtype).tiny\n return np.log(prediction)\n\n def decision_function(self, X):\n \"\"\"Apply decision function to an array of samples.\n\n The decision function is equal (up to a constant factor) to the\n log-posterior of the model, i.e. `log p(y = k | x)`. In a binary\n classification setting this instead corresponds to the difference\n `log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Array of samples (test vectors).\n\n Returns\n -------\n C : ndarray of shape (n_samples,) or (n_samples, n_classes)\n Decision function values related to each class, per sample.\n In the two-class case, the shape is (n_samples,), giving the\n log likelihood ratio of the positive class.\n \"\"\"\n # Only override for the doc\n return super().decision_function(X)", + "instance_attributes": [ + { + "name": "solver", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "store_covariance", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "covariance_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "coef_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "xbar_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "priors_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis", + "name": "QuadraticDiscriminantAnalysis", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis", + "decorators": [], + "superclasses": ["ClassifierMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/__init__", + "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/fit", + "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/_decision_function", + "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/decision_function", + "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/predict", + "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/predict_proba", + "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/predict_log_proba" + ], + "is_public": true, + "reexported_by": [], + "description": "Quadratic Discriminant Analysis\n\nA classifier with a quadratic decision boundary, generated\nby fitting class conditional densities to the data\nand using Bayes' rule.\n\nThe model fits a Gaussian density to each class.\n\n.. versionadded:: 0.17\n *QuadraticDiscriminantAnalysis*\n\nRead more in the :ref:`User Guide `.", + "docstring": "Quadratic Discriminant Analysis\n\nA classifier with a quadratic decision boundary, generated\nby fitting class conditional densities to the data\nand using Bayes' rule.\n\nThe model fits a Gaussian density to each class.\n\n.. versionadded:: 0.17\n *QuadraticDiscriminantAnalysis*\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\npriors : ndarray of shape (n_classes,), default=None\n Class priors. By default, the class proportions are inferred from the\n training data.\n\nreg_param : float, default=0.0\n Regularizes the per-class covariance estimates by transforming S2 as\n ``S2 = (1 - reg_param) * S2 + reg_param * np.eye(n_features)``,\n where S2 corresponds to the `scaling_` attribute of a given class.\n\nstore_covariance : bool, default=False\n If True, the class covariance matrices are explicitely computed and\n stored in the `self.covariance_` attribute.\n\n .. versionadded:: 0.17\n\ntol : float, default=1.0e-4\n Absolute threshold for a singular value to be considered significant,\n used to estimate the rank of `Xk` where `Xk` is the centered matrix\n of samples in class k. This parameter does not affect the\n predictions. It only controls a warning that is raised when features\n are considered to be colinear.\n\n .. versionadded:: 0.17\n\nAttributes\n----------\ncovariance_ : list of len n_classes of ndarray of shape (n_features, n_features)\n For each class, gives the covariance matrix estimated using the\n samples of that class. The estimations are unbiased. Only present if\n `store_covariance` is True.\n\nmeans_ : array-like of shape (n_classes, n_features)\n Class-wise means.\n\npriors_ : array-like of shape (n_classes,)\n Class priors (sum to 1).\n\nrotations_ : list of len n_classes of ndarray of shape (n_features, n_k)\n For each class k an array of shape (n_features, n_k), where\n ``n_k = min(n_features, number of elements in class k)``\n It is the rotation of the Gaussian distribution, i.e. its\n principal axis. It corresponds to `V`, the matrix of eigenvectors\n coming from the SVD of `Xk = U S Vt` where `Xk` is the centered\n matrix of samples from class k.\n\nscalings_ : list of len n_classes of ndarray of shape (n_k,)\n For each class, contains the scaling of\n the Gaussian distributions along its principal axes, i.e. the\n variance in the rotated coordinate system. It corresponds to `S^2 /\n (n_samples - 1)`, where `S` is the diagonal matrix of singular values\n from the SVD of `Xk`, where `Xk` is the centered matrix of samples\n from class k.\n\nclasses_ : ndarray of shape (n_classes,)\n Unique class labels.\n\nExamples\n--------\n>>> from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis\n>>> import numpy as np\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> y = np.array([1, 1, 1, 2, 2, 2])\n>>> clf = QuadraticDiscriminantAnalysis()\n>>> clf.fit(X, y)\nQuadraticDiscriminantAnalysis()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]\n\nSee Also\n--------\nLinearDiscriminantAnalysis : Linear Discriminant Analysis.", + "code": "class QuadraticDiscriminantAnalysis(ClassifierMixin, BaseEstimator):\n \"\"\"Quadratic Discriminant Analysis\n\n A classifier with a quadratic decision boundary, generated\n by fitting class conditional densities to the data\n and using Bayes' rule.\n\n The model fits a Gaussian density to each class.\n\n .. versionadded:: 0.17\n *QuadraticDiscriminantAnalysis*\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n priors : ndarray of shape (n_classes,), default=None\n Class priors. By default, the class proportions are inferred from the\n training data.\n\n reg_param : float, default=0.0\n Regularizes the per-class covariance estimates by transforming S2 as\n ``S2 = (1 - reg_param) * S2 + reg_param * np.eye(n_features)``,\n where S2 corresponds to the `scaling_` attribute of a given class.\n\n store_covariance : bool, default=False\n If True, the class covariance matrices are explicitely computed and\n stored in the `self.covariance_` attribute.\n\n .. versionadded:: 0.17\n\n tol : float, default=1.0e-4\n Absolute threshold for a singular value to be considered significant,\n used to estimate the rank of `Xk` where `Xk` is the centered matrix\n of samples in class k. This parameter does not affect the\n predictions. It only controls a warning that is raised when features\n are considered to be colinear.\n\n .. versionadded:: 0.17\n\n Attributes\n ----------\n covariance_ : list of len n_classes of ndarray \\\n of shape (n_features, n_features)\n For each class, gives the covariance matrix estimated using the\n samples of that class. The estimations are unbiased. Only present if\n `store_covariance` is True.\n\n means_ : array-like of shape (n_classes, n_features)\n Class-wise means.\n\n priors_ : array-like of shape (n_classes,)\n Class priors (sum to 1).\n\n rotations_ : list of len n_classes of ndarray of shape (n_features, n_k)\n For each class k an array of shape (n_features, n_k), where\n ``n_k = min(n_features, number of elements in class k)``\n It is the rotation of the Gaussian distribution, i.e. its\n principal axis. It corresponds to `V`, the matrix of eigenvectors\n coming from the SVD of `Xk = U S Vt` where `Xk` is the centered\n matrix of samples from class k.\n\n scalings_ : list of len n_classes of ndarray of shape (n_k,)\n For each class, contains the scaling of\n the Gaussian distributions along its principal axes, i.e. the\n variance in the rotated coordinate system. It corresponds to `S^2 /\n (n_samples - 1)`, where `S` is the diagonal matrix of singular values\n from the SVD of `Xk`, where `Xk` is the centered matrix of samples\n from class k.\n\n classes_ : ndarray of shape (n_classes,)\n Unique class labels.\n\n Examples\n --------\n >>> from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis\n >>> import numpy as np\n >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n >>> y = np.array([1, 1, 1, 2, 2, 2])\n >>> clf = QuadraticDiscriminantAnalysis()\n >>> clf.fit(X, y)\n QuadraticDiscriminantAnalysis()\n >>> print(clf.predict([[-0.8, -1]]))\n [1]\n\n See Also\n --------\n LinearDiscriminantAnalysis : Linear Discriminant Analysis.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, priors=None, reg_param=0., store_covariance=False,\n tol=1.0e-4):\n self.priors = np.asarray(priors) if priors is not None else None\n self.reg_param = reg_param\n self.store_covariance = store_covariance\n self.tol = tol\n\n def fit(self, X, y):\n \"\"\"Fit the model according to the given training data and parameters.\n\n .. versionchanged:: 0.19\n ``store_covariances`` has been moved to main constructor as\n ``store_covariance``\n\n .. versionchanged:: 0.19\n ``tol`` has been moved to main constructor.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target values (integers)\n \"\"\"\n X, y = self._validate_data(X, y)\n check_classification_targets(y)\n self.classes_, y = np.unique(y, return_inverse=True)\n n_samples, n_features = X.shape\n n_classes = len(self.classes_)\n if n_classes < 2:\n raise ValueError('The number of classes has to be greater than'\n ' one; got %d class' % (n_classes))\n if self.priors is None:\n self.priors_ = np.bincount(y) / float(n_samples)\n else:\n self.priors_ = self.priors\n\n cov = None\n store_covariance = self.store_covariance\n if store_covariance:\n cov = []\n means = []\n scalings = []\n rotations = []\n for ind in range(n_classes):\n Xg = X[y == ind, :]\n meang = Xg.mean(0)\n means.append(meang)\n if len(Xg) == 1:\n raise ValueError('y has only 1 sample in class %s, covariance '\n 'is ill defined.' % str(self.classes_[ind]))\n Xgc = Xg - meang\n # Xgc = U * S * V.T\n _, S, Vt = np.linalg.svd(Xgc, full_matrices=False)\n rank = np.sum(S > self.tol)\n if rank < n_features:\n warnings.warn(\"Variables are collinear\")\n S2 = (S ** 2) / (len(Xg) - 1)\n S2 = ((1 - self.reg_param) * S2) + self.reg_param\n if self.store_covariance or store_covariance:\n # cov = V * (S^2 / (n-1)) * V.T\n cov.append(np.dot(S2 * Vt.T, Vt))\n scalings.append(S2)\n rotations.append(Vt.T)\n if self.store_covariance or store_covariance:\n self.covariance_ = cov\n self.means_ = np.asarray(means)\n self.scalings_ = scalings\n self.rotations_ = rotations\n return self\n\n def _decision_function(self, X):\n # return log posterior, see eq (4.12) p. 110 of the ESL.\n check_is_fitted(self)\n\n X = check_array(X)\n norm2 = []\n for i in range(len(self.classes_)):\n R = self.rotations_[i]\n S = self.scalings_[i]\n Xm = X - self.means_[i]\n X2 = np.dot(Xm, R * (S ** (-0.5)))\n norm2.append(np.sum(X2 ** 2, axis=1))\n norm2 = np.array(norm2).T # shape = [len(X), n_classes]\n u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])\n return (-0.5 * (norm2 + u) + np.log(self.priors_))\n\n def decision_function(self, X):\n \"\"\"Apply decision function to an array of samples.\n\n The decision function is equal (up to a constant factor) to the\n log-posterior of the model, i.e. `log p(y = k | x)`. In a binary\n classification setting this instead corresponds to the difference\n `log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Array of samples (test vectors).\n\n Returns\n -------\n C : ndarray of shape (n_samples,) or (n_samples, n_classes)\n Decision function values related to each class, per sample.\n In the two-class case, the shape is (n_samples,), giving the\n log likelihood ratio of the positive class.\n \"\"\"\n dec_func = self._decision_function(X)\n # handle special case of two classes\n if len(self.classes_) == 2:\n return dec_func[:, 1] - dec_func[:, 0]\n return dec_func\n\n def predict(self, X):\n \"\"\"Perform classification on an array of test vectors X.\n\n The predicted class C for each sample in X is returned.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n C : ndarray of shape (n_samples,)\n \"\"\"\n d = self._decision_function(X)\n y_pred = self.classes_.take(d.argmax(1))\n return y_pred\n\n def predict_proba(self, X):\n \"\"\"Return posterior probabilities of classification.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Array of samples/test vectors.\n\n Returns\n -------\n C : ndarray of shape (n_samples, n_classes)\n Posterior probabilities of classification per class.\n \"\"\"\n values = self._decision_function(X)\n # compute the likelihood of the underlying gaussian models\n # up to a multiplicative constant.\n likelihood = np.exp(values - values.max(axis=1)[:, np.newaxis])\n # compute posterior probabilities\n return likelihood / likelihood.sum(axis=1)[:, np.newaxis]\n\n def predict_log_proba(self, X):\n \"\"\"Return log of posterior probabilities of classification.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Array of samples/test vectors.\n\n Returns\n -------\n C : ndarray of shape (n_samples, n_classes)\n Posterior log-probabilities of classification per class.\n \"\"\"\n # XXX : can do better to avoid precision overflows\n probas_ = self.predict_proba(X)\n return np.log(probas_)", + "instance_attributes": [ + { + "name": "reg_param", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "store_covariance", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "priors_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "scalings_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "rotations_", + "types": { + "kind": "NamedType", + "name": "list" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier", + "name": "DummyClassifier", + "qname": "sklearn.dummy.DummyClassifier", + "decorators": [], + "superclasses": ["MultiOutputMixin", "ClassifierMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.dummy/DummyClassifier/__init__", + "scikit-learn/sklearn.dummy/DummyClassifier/fit", + "scikit-learn/sklearn.dummy/DummyClassifier/predict", + "scikit-learn/sklearn.dummy/DummyClassifier/predict_proba", + "scikit-learn/sklearn.dummy/DummyClassifier/predict_log_proba", + "scikit-learn/sklearn.dummy/DummyClassifier/_more_tags", + "scikit-learn/sklearn.dummy/DummyClassifier/score" + ], + "is_public": true, + "reexported_by": [], + "description": "DummyClassifier is a classifier that makes predictions using simple rules.\n\nThis classifier is useful as a simple baseline to compare with other\n(real) classifiers. Do not use it for real problems.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13", + "docstring": "DummyClassifier is a classifier that makes predictions using simple rules.\n\nThis classifier is useful as a simple baseline to compare with other\n(real) classifiers. Do not use it for real problems.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nstrategy : {\"stratified\", \"most_frequent\", \"prior\", \"uniform\", \"constant\"}, default=\"prior\"\n Strategy to use to generate predictions.\n\n * \"stratified\": generates predictions by respecting the training\n set's class distribution.\n * \"most_frequent\": always predicts the most frequent label in the\n training set.\n * \"prior\": always predicts the class that maximizes the class prior\n (like \"most_frequent\") and ``predict_proba`` returns the class prior.\n * \"uniform\": generates predictions uniformly at random.\n * \"constant\": always predicts a constant label that is provided by\n the user. This is useful for metrics that evaluate a non-majority\n class\n\n .. versionchanged:: 0.24\n The default value of `strategy` has changed to \"prior\" in version\n 0.24.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness to generate the predictions when\n ``strategy='stratified'`` or ``strategy='uniform'``.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nconstant : int or str or array-like of shape (n_outputs,)\n The explicit constant as predicted by the \"constant\" strategy. This\n parameter is useful only for the \"constant\" strategy.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,) or list of such arrays\n Class labels for each output.\n\nn_classes_ : int or list of int\n Number of label for each output.\n\nclass_prior_ : ndarray of shape (n_classes,) or list of such arrays\n Probability of each class for each output.\n\nn_outputs_ : int\n Number of outputs.\n\nsparse_output_ : bool\n True if the array returned from predict is to be in sparse CSC format.\n Is automatically set to True if the input y is passed in sparse format.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.dummy import DummyClassifier\n>>> X = np.array([-1, 1, 1, 1])\n>>> y = np.array([0, 1, 1, 1])\n>>> dummy_clf = DummyClassifier(strategy=\"most_frequent\")\n>>> dummy_clf.fit(X, y)\nDummyClassifier(strategy='most_frequent')\n>>> dummy_clf.predict(X)\narray([1, 1, 1, 1])\n>>> dummy_clf.score(X, y)\n0.75", + "code": "class DummyClassifier(MultiOutputMixin, ClassifierMixin, BaseEstimator):\n \"\"\"\n DummyClassifier is a classifier that makes predictions using simple rules.\n\n This classifier is useful as a simple baseline to compare with other\n (real) classifiers. Do not use it for real problems.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.13\n\n Parameters\n ----------\n strategy : {\"stratified\", \"most_frequent\", \"prior\", \"uniform\", \\\n \"constant\"}, default=\"prior\"\n Strategy to use to generate predictions.\n\n * \"stratified\": generates predictions by respecting the training\n set's class distribution.\n * \"most_frequent\": always predicts the most frequent label in the\n training set.\n * \"prior\": always predicts the class that maximizes the class prior\n (like \"most_frequent\") and ``predict_proba`` returns the class prior.\n * \"uniform\": generates predictions uniformly at random.\n * \"constant\": always predicts a constant label that is provided by\n the user. This is useful for metrics that evaluate a non-majority\n class\n\n .. versionchanged:: 0.24\n The default value of `strategy` has changed to \"prior\" in version\n 0.24.\n\n random_state : int, RandomState instance or None, default=None\n Controls the randomness to generate the predictions when\n ``strategy='stratified'`` or ``strategy='uniform'``.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n constant : int or str or array-like of shape (n_outputs,)\n The explicit constant as predicted by the \"constant\" strategy. This\n parameter is useful only for the \"constant\" strategy.\n\n Attributes\n ----------\n classes_ : ndarray of shape (n_classes,) or list of such arrays\n Class labels for each output.\n\n n_classes_ : int or list of int\n Number of label for each output.\n\n class_prior_ : ndarray of shape (n_classes,) or list of such arrays\n Probability of each class for each output.\n\n n_outputs_ : int\n Number of outputs.\n\n sparse_output_ : bool\n True if the array returned from predict is to be in sparse CSC format.\n Is automatically set to True if the input y is passed in sparse format.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.dummy import DummyClassifier\n >>> X = np.array([-1, 1, 1, 1])\n >>> y = np.array([0, 1, 1, 1])\n >>> dummy_clf = DummyClassifier(strategy=\"most_frequent\")\n >>> dummy_clf.fit(X, y)\n DummyClassifier(strategy='most_frequent')\n >>> dummy_clf.predict(X)\n array([1, 1, 1, 1])\n >>> dummy_clf.score(X, y)\n 0.75\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, strategy=\"prior\", random_state=None,\n constant=None):\n self.strategy = strategy\n self.random_state = random_state\n self.constant = constant\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the random classifier.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n self : object\n \"\"\"\n allowed_strategies = (\"most_frequent\", \"stratified\", \"uniform\",\n \"constant\", \"prior\")\n\n if self.strategy not in allowed_strategies:\n raise ValueError(\"Unknown strategy type: %s, expected one of %s.\"\n % (self.strategy, allowed_strategies))\n\n self._strategy = self.strategy\n\n if self._strategy == \"uniform\" and sp.issparse(y):\n y = y.toarray()\n warnings.warn('A local copy of the target data has been converted '\n 'to a numpy array. Predicting on sparse target data '\n 'with the uniform strategy would not save memory '\n 'and would be slower.',\n UserWarning)\n\n self.sparse_output_ = sp.issparse(y)\n\n if not self.sparse_output_:\n y = np.asarray(y)\n y = np.atleast_1d(y)\n\n if y.ndim == 1:\n y = np.reshape(y, (-1, 1))\n\n self.n_outputs_ = y.shape[1]\n\n self.n_features_in_ = None # No input validation is done for X\n\n check_consistent_length(X, y)\n\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X)\n\n if self._strategy == \"constant\":\n if self.constant is None:\n raise ValueError(\"Constant target value has to be specified \"\n \"when the constant strategy is used.\")\n else:\n constant = np.reshape(np.atleast_1d(self.constant), (-1, 1))\n if constant.shape[0] != self.n_outputs_:\n raise ValueError(\"Constant target value should have \"\n \"shape (%d, 1).\" % self.n_outputs_)\n\n (self.classes_,\n self.n_classes_,\n self.class_prior_) = class_distribution(y, sample_weight)\n\n if self._strategy == \"constant\":\n for k in range(self.n_outputs_):\n if not any(constant[k][0] == c for c in self.classes_[k]):\n # Checking in case of constant strategy if the constant\n # provided by the user is in y.\n err_msg = (\"The constant target value must be present in \"\n \"the training data. You provided constant={}. \"\n \"Possible values are: {}.\"\n .format(self.constant, list(self.classes_[k])))\n raise ValueError(err_msg)\n\n if self.n_outputs_ == 1:\n self.n_classes_ = self.n_classes_[0]\n self.classes_ = self.classes_[0]\n self.class_prior_ = self.class_prior_[0]\n\n return self\n\n def predict(self, X):\n \"\"\"Perform classification on test vectors X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Test data.\n\n Returns\n -------\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Predicted target values for X.\n \"\"\"\n check_is_fitted(self)\n\n # numpy random_state expects Python int and not long as size argument\n # under Windows\n n_samples = _num_samples(X)\n rs = check_random_state(self.random_state)\n\n n_classes_ = self.n_classes_\n classes_ = self.classes_\n class_prior_ = self.class_prior_\n constant = self.constant\n if self.n_outputs_ == 1:\n # Get same type even for self.n_outputs_ == 1\n n_classes_ = [n_classes_]\n classes_ = [classes_]\n class_prior_ = [class_prior_]\n constant = [constant]\n # Compute probability only once\n if self._strategy == \"stratified\":\n proba = self.predict_proba(X)\n if self.n_outputs_ == 1:\n proba = [proba]\n\n if self.sparse_output_:\n class_prob = None\n if self._strategy in (\"most_frequent\", \"prior\"):\n classes_ = [np.array([cp.argmax()]) for cp in class_prior_]\n\n elif self._strategy == \"stratified\":\n class_prob = class_prior_\n\n elif self._strategy == \"uniform\":\n raise ValueError(\"Sparse target prediction is not \"\n \"supported with the uniform strategy\")\n\n elif self._strategy == \"constant\":\n classes_ = [np.array([c]) for c in constant]\n\n y = _random_choice_csc(n_samples, classes_, class_prob,\n self.random_state)\n else:\n if self._strategy in (\"most_frequent\", \"prior\"):\n y = np.tile([classes_[k][class_prior_[k].argmax()] for\n k in range(self.n_outputs_)], [n_samples, 1])\n\n elif self._strategy == \"stratified\":\n y = np.vstack([classes_[k][proba[k].argmax(axis=1)] for\n k in range(self.n_outputs_)]).T\n\n elif self._strategy == \"uniform\":\n ret = [classes_[k][rs.randint(n_classes_[k], size=n_samples)]\n for k in range(self.n_outputs_)]\n y = np.vstack(ret).T\n\n elif self._strategy == \"constant\":\n y = np.tile(self.constant, (n_samples, 1))\n\n if self.n_outputs_ == 1:\n y = np.ravel(y)\n\n return y\n\n def predict_proba(self, X):\n \"\"\"\n Return probability estimates for the test vectors X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Test data.\n\n Returns\n -------\n P : ndarray of shape (n_samples, n_classes) or list of such arrays\n Returns the probability of the sample for each class in\n the model, where classes are ordered arithmetically, for each\n output.\n \"\"\"\n check_is_fitted(self)\n\n # numpy random_state expects Python int and not long as size argument\n # under Windows\n n_samples = _num_samples(X)\n rs = check_random_state(self.random_state)\n\n n_classes_ = self.n_classes_\n classes_ = self.classes_\n class_prior_ = self.class_prior_\n constant = self.constant\n if self.n_outputs_ == 1:\n # Get same type even for self.n_outputs_ == 1\n n_classes_ = [n_classes_]\n classes_ = [classes_]\n class_prior_ = [class_prior_]\n constant = [constant]\n\n P = []\n for k in range(self.n_outputs_):\n if self._strategy == \"most_frequent\":\n ind = class_prior_[k].argmax()\n out = np.zeros((n_samples, n_classes_[k]), dtype=np.float64)\n out[:, ind] = 1.0\n elif self._strategy == \"prior\":\n out = np.ones((n_samples, 1)) * class_prior_[k]\n\n elif self._strategy == \"stratified\":\n out = rs.multinomial(1, class_prior_[k], size=n_samples)\n out = out.astype(np.float64)\n\n elif self._strategy == \"uniform\":\n out = np.ones((n_samples, n_classes_[k]), dtype=np.float64)\n out /= n_classes_[k]\n\n elif self._strategy == \"constant\":\n ind = np.where(classes_[k] == constant[k])\n out = np.zeros((n_samples, n_classes_[k]), dtype=np.float64)\n out[:, ind] = 1.0\n\n P.append(out)\n\n if self.n_outputs_ == 1:\n P = P[0]\n\n return P\n\n def predict_log_proba(self, X):\n \"\"\"\n Return log probability estimates for the test vectors X.\n\n Parameters\n ----------\n X : {array-like, object with finite length or shape}\n Training data, requires length = n_samples\n\n Returns\n -------\n P : ndarray of shape (n_samples, n_classes) or list of such arrays\n Returns the log probability of the sample for each class in\n the model, where classes are ordered arithmetically for each\n output.\n \"\"\"\n proba = self.predict_proba(X)\n if self.n_outputs_ == 1:\n return np.log(proba)\n else:\n return [np.log(p) for p in proba]\n\n def _more_tags(self):\n return {\n 'poor_score': True, 'no_validation': True,\n '_xfail_checks': {\n 'check_methods_subset_invariance':\n 'fails for the predict method',\n 'check_methods_sample_order_invariance':\n 'fails for the predict method'\n }\n }\n\n def score(self, X, y, sample_weight=None):\n \"\"\"Returns the mean accuracy on the given test data and labels.\n\n In multi-label classification, this is the subset accuracy\n which is a harsh metric since you require for each sample that\n each label set be correctly predicted.\n\n Parameters\n ----------\n X : None or array-like of shape (n_samples, n_features)\n Test samples. Passing None as test samples gives the same result\n as passing real test samples, since DummyClassifier\n operates independently of the sampled observations.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True labels for X.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n score : float\n Mean accuracy of self.predict(X) wrt. y.\n\n \"\"\"\n if X is None:\n X = np.zeros(shape=(len(y), 1))\n return super().score(X, y, sample_weight)", + "instance_attributes": [ + { + "name": "strategy", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "_strategy", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.dummy/DummyRegressor", + "name": "DummyRegressor", + "qname": "sklearn.dummy.DummyRegressor", + "decorators": [], + "superclasses": ["MultiOutputMixin", "RegressorMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.dummy/DummyRegressor/__init__", + "scikit-learn/sklearn.dummy/DummyRegressor/fit", + "scikit-learn/sklearn.dummy/DummyRegressor/predict", + "scikit-learn/sklearn.dummy/DummyRegressor/_more_tags", + "scikit-learn/sklearn.dummy/DummyRegressor/score" + ], + "is_public": true, + "reexported_by": [], + "description": "DummyRegressor is a regressor that makes predictions using\nsimple rules.\n\nThis regressor is useful as a simple baseline to compare with other\n(real) regressors. Do not use it for real problems.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13", + "docstring": "DummyRegressor is a regressor that makes predictions using\nsimple rules.\n\nThis regressor is useful as a simple baseline to compare with other\n(real) regressors. Do not use it for real problems.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nstrategy : {\"mean\", \"median\", \"quantile\", \"constant\"}, default=\"mean\"\n Strategy to use to generate predictions.\n\n * \"mean\": always predicts the mean of the training set\n * \"median\": always predicts the median of the training set\n * \"quantile\": always predicts a specified quantile of the training set,\n provided with the quantile parameter.\n * \"constant\": always predicts a constant value that is provided by\n the user.\n\nconstant : int or float or array-like of shape (n_outputs,), default=None\n The explicit constant as predicted by the \"constant\" strategy. This\n parameter is useful only for the \"constant\" strategy.\n\nquantile : float in [0.0, 1.0], default=None\n The quantile to predict using the \"quantile\" strategy. A quantile of\n 0.5 corresponds to the median, while 0.0 to the minimum and 1.0 to the\n maximum.\n\nAttributes\n----------\nconstant_ : ndarray of shape (1, n_outputs)\n Mean or median or quantile of the training targets or constant value\n given by the user.\n\nn_outputs_ : int\n Number of outputs.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.dummy import DummyRegressor\n>>> X = np.array([1.0, 2.0, 3.0, 4.0])\n>>> y = np.array([2.0, 3.0, 5.0, 10.0])\n>>> dummy_regr = DummyRegressor(strategy=\"mean\")\n>>> dummy_regr.fit(X, y)\nDummyRegressor()\n>>> dummy_regr.predict(X)\narray([5., 5., 5., 5.])\n>>> dummy_regr.score(X, y)\n0.0", + "code": "class DummyRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):\n \"\"\"\n DummyRegressor is a regressor that makes predictions using\n simple rules.\n\n This regressor is useful as a simple baseline to compare with other\n (real) regressors. Do not use it for real problems.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.13\n\n Parameters\n ----------\n strategy : {\"mean\", \"median\", \"quantile\", \"constant\"}, default=\"mean\"\n Strategy to use to generate predictions.\n\n * \"mean\": always predicts the mean of the training set\n * \"median\": always predicts the median of the training set\n * \"quantile\": always predicts a specified quantile of the training set,\n provided with the quantile parameter.\n * \"constant\": always predicts a constant value that is provided by\n the user.\n\n constant : int or float or array-like of shape (n_outputs,), default=None\n The explicit constant as predicted by the \"constant\" strategy. This\n parameter is useful only for the \"constant\" strategy.\n\n quantile : float in [0.0, 1.0], default=None\n The quantile to predict using the \"quantile\" strategy. A quantile of\n 0.5 corresponds to the median, while 0.0 to the minimum and 1.0 to the\n maximum.\n\n Attributes\n ----------\n constant_ : ndarray of shape (1, n_outputs)\n Mean or median or quantile of the training targets or constant value\n given by the user.\n\n n_outputs_ : int\n Number of outputs.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.dummy import DummyRegressor\n >>> X = np.array([1.0, 2.0, 3.0, 4.0])\n >>> y = np.array([2.0, 3.0, 5.0, 10.0])\n >>> dummy_regr = DummyRegressor(strategy=\"mean\")\n >>> dummy_regr.fit(X, y)\n DummyRegressor()\n >>> dummy_regr.predict(X)\n array([5., 5., 5., 5.])\n >>> dummy_regr.score(X, y)\n 0.0\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, strategy=\"mean\", constant=None, quantile=None):\n self.strategy = strategy\n self.constant = constant\n self.quantile = quantile\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the random regressor.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n self : object\n \"\"\"\n allowed_strategies = (\"mean\", \"median\", \"quantile\", \"constant\")\n if self.strategy not in allowed_strategies:\n raise ValueError(\"Unknown strategy type: %s, expected one of %s.\"\n % (self.strategy, allowed_strategies))\n\n y = check_array(y, ensure_2d=False)\n self.n_features_in_ = None # No input validation is done for X\n if len(y) == 0:\n raise ValueError(\"y must not be empty.\")\n\n if y.ndim == 1:\n y = np.reshape(y, (-1, 1))\n self.n_outputs_ = y.shape[1]\n\n check_consistent_length(X, y, sample_weight)\n\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X)\n\n if self.strategy == \"mean\":\n self.constant_ = np.average(y, axis=0, weights=sample_weight)\n\n elif self.strategy == \"median\":\n if sample_weight is None:\n self.constant_ = np.median(y, axis=0)\n else:\n self.constant_ = [_weighted_percentile(y[:, k], sample_weight,\n percentile=50.)\n for k in range(self.n_outputs_)]\n\n elif self.strategy == \"quantile\":\n if self.quantile is None or not np.isscalar(self.quantile):\n raise ValueError(\"Quantile must be a scalar in the range \"\n \"[0.0, 1.0], but got %s.\" % self.quantile)\n\n percentile = self.quantile * 100.0\n if sample_weight is None:\n self.constant_ = np.percentile(y, axis=0, q=percentile)\n else:\n self.constant_ = [_weighted_percentile(y[:, k], sample_weight,\n percentile=percentile)\n for k in range(self.n_outputs_)]\n\n elif self.strategy == \"constant\":\n if self.constant is None:\n raise TypeError(\"Constant target value has to be specified \"\n \"when the constant strategy is used.\")\n\n self.constant = check_array(self.constant,\n accept_sparse=['csr', 'csc', 'coo'],\n ensure_2d=False, ensure_min_samples=0)\n\n if self.n_outputs_ != 1 and self.constant.shape[0] != y.shape[1]:\n raise ValueError(\n \"Constant target value should have \"\n \"shape (%d, 1).\" % y.shape[1])\n\n self.constant_ = self.constant\n\n self.constant_ = np.reshape(self.constant_, (1, -1))\n return self\n\n def predict(self, X, return_std=False):\n \"\"\"\n Perform classification on test vectors X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Test data.\n\n return_std : bool, default=False\n Whether to return the standard deviation of posterior prediction.\n All zeros in this case.\n\n .. versionadded:: 0.20\n\n Returns\n -------\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Predicted target values for X.\n\n y_std : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Standard deviation of predictive distribution of query points.\n \"\"\"\n check_is_fitted(self)\n n_samples = _num_samples(X)\n\n y = np.full((n_samples, self.n_outputs_), self.constant_,\n dtype=np.array(self.constant_).dtype)\n y_std = np.zeros((n_samples, self.n_outputs_))\n\n if self.n_outputs_ == 1:\n y = np.ravel(y)\n y_std = np.ravel(y_std)\n\n return (y, y_std) if return_std else y\n\n def _more_tags(self):\n return {'poor_score': True, 'no_validation': True}\n\n def score(self, X, y, sample_weight=None):\n \"\"\"Returns the coefficient of determination R^2 of the prediction.\n\n The coefficient R^2 is defined as (1 - u/v), where u is the residual\n sum of squares ((y_true - y_pred) ** 2).sum() and v is the total\n sum of squares ((y_true - y_true.mean()) ** 2).sum().\n The best possible score is 1.0 and it can be negative (because the\n model can be arbitrarily worse). A constant model that always\n predicts the expected value of y, disregarding the input features,\n would get a R^2 score of 0.0.\n\n Parameters\n ----------\n X : None or array-like of shape (n_samples, n_features)\n Test samples. Passing None as test samples gives the same result\n as passing real test samples, since DummyRegressor\n operates independently of the sampled observations.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True values for X.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n score : float\n R^2 of self.predict(X) wrt. y.\n \"\"\"\n if X is None:\n X = np.zeros(shape=(len(y), 1))\n return super().score(X, y, sample_weight)", + "instance_attributes": [ + { + "name": "strategy", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "constant_", + "types": { + "kind": "NamedType", + "name": "tuple" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier", + "name": "BaggingClassifier", + "qname": "sklearn.ensemble._bagging.BaggingClassifier", + "decorators": [], + "superclasses": ["ClassifierMixin", "BaseBagging"], + "methods": [ + "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/__init__", + "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/_validate_estimator", + "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/_set_oob_score", + "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/_validate_y", + "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/predict", + "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/predict_proba", + "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/predict_log_proba", + "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/decision_function" + ], + "is_public": false, + "reexported_by": [], + "description": "A Bagging classifier.\n\nA Bagging classifier is an ensemble meta-estimator that fits base\nclassifiers each on random subsets of the original dataset and then\naggregate their individual predictions (either by voting or by averaging)\nto form a final prediction. Such a meta-estimator can typically be used as\na way to reduce the variance of a black-box estimator (e.g., a decision\ntree), by introducing randomization into its construction procedure and\nthen making an ensemble out of it.\n\nThis algorithm encompasses several works from the literature. When random\nsubsets of the dataset are drawn as random subsets of the samples, then\nthis algorithm is known as Pasting [1]_. If samples are drawn with\nreplacement, then the method is known as Bagging [2]_. When random subsets\nof the dataset are drawn as random subsets of the features, then the method\nis known as Random Subspaces [3]_. Finally, when base estimators are built\non subsets of both samples and features, then the method is known as\nRandom Patches [4]_.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.15", + "docstring": "A Bagging classifier.\n\nA Bagging classifier is an ensemble meta-estimator that fits base\nclassifiers each on random subsets of the original dataset and then\naggregate their individual predictions (either by voting or by averaging)\nto form a final prediction. Such a meta-estimator can typically be used as\na way to reduce the variance of a black-box estimator (e.g., a decision\ntree), by introducing randomization into its construction procedure and\nthen making an ensemble out of it.\n\nThis algorithm encompasses several works from the literature. When random\nsubsets of the dataset are drawn as random subsets of the samples, then\nthis algorithm is known as Pasting [1]_. If samples are drawn with\nreplacement, then the method is known as Bagging [2]_. When random subsets\nof the dataset are drawn as random subsets of the features, then the method\nis known as Random Subspaces [3]_. Finally, when base estimators are built\non subsets of both samples and features, then the method is known as\nRandom Patches [4]_.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.15\n\nParameters\n----------\nbase_estimator : object, default=None\n The base estimator to fit on random subsets of the dataset.\n If None, then the base estimator is a\n :class:`~sklearn.tree.DecisionTreeClassifier`.\n\nn_estimators : int, default=10\n The number of base estimators in the ensemble.\n\nmax_samples : int or float, default=1.0\n The number of samples to draw from X to train each base estimator (with\n replacement by default, see `bootstrap` for more details).\n\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples.\n\nmax_features : int or float, default=1.0\n The number of features to draw from X to train each base estimator (\n without replacement by default, see `bootstrap_features` for more\n details).\n\n - If int, then draw `max_features` features.\n - If float, then draw `max_features * X.shape[1]` features.\n\nbootstrap : bool, default=True\n Whether samples are drawn with replacement. If False, sampling\n without replacement is performed.\n\nbootstrap_features : bool, default=False\n Whether features are drawn with replacement.\n\noob_score : bool, default=False\n Whether to use out-of-bag samples to estimate\n the generalization error. Only available if bootstrap=True.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit\n a whole new ensemble. See :term:`the Glossary `.\n\n .. versionadded:: 0.17\n *warm_start* constructor parameter.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel for both :meth:`fit` and\n :meth:`predict`. ``None`` means 1 unless in a\n :obj:`joblib.parallel_backend` context. ``-1`` means using all\n processors. See :term:`Glossary ` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the random resampling of the original dataset\n (sample wise and feature wise).\n If the base estimator accepts a `random_state` attribute, a different\n seed is generated for each instance in the ensemble.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nverbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\nAttributes\n----------\nbase_estimator_ : estimator\n The base estimator from which the ensemble is grown.\n\nn_features_ : int\n The number of features when :meth:`fit` is performed.\n\nestimators_ : list of estimators\n The collection of fitted base estimators.\n\nestimators_samples_ : list of arrays\n The subset of drawn samples (i.e., the in-bag samples) for each base\n estimator. Each subset is defined by an array of the indices selected.\n\nestimators_features_ : list of arrays\n The subset of drawn features for each base estimator.\n\nclasses_ : ndarray of shape (n_classes,)\n The classes labels.\n\nn_classes_ : int or list\n The number of classes.\n\noob_score_ : float\n Score of the training dataset obtained using an out-of-bag estimate.\n This attribute exists only when ``oob_score`` is True.\n\noob_decision_function_ : ndarray of shape (n_samples, n_classes)\n Decision function computed with out-of-bag estimate on the training\n set. If n_estimators is small it might be possible that a data point\n was never left out during the bootstrap. In this case,\n `oob_decision_function_` might contain NaN. This attribute exists\n only when ``oob_score`` is True.\n\nExamples\n--------\n>>> from sklearn.svm import SVC\n>>> from sklearn.ensemble import BaggingClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_samples=100, n_features=4,\n... n_informative=2, n_redundant=0,\n... random_state=0, shuffle=False)\n>>> clf = BaggingClassifier(base_estimator=SVC(),\n... n_estimators=10, random_state=0).fit(X, y)\n>>> clf.predict([[0, 0, 0, 0]])\narray([1])\n\nReferences\n----------\n\n.. [1] L. Breiman, \"Pasting small votes for classification in large\n databases and on-line\", Machine Learning, 36(1), 85-103, 1999.\n\n.. [2] L. Breiman, \"Bagging predictors\", Machine Learning, 24(2), 123-140,\n 1996.\n\n.. [3] T. Ho, \"The random subspace method for constructing decision\n forests\", Pattern Analysis and Machine Intelligence, 20(8), 832-844,\n 1998.\n\n.. [4] G. Louppe and P. Geurts, \"Ensembles on Random Patches\", Machine\n Learning and Knowledge Discovery in Databases, 346-361, 2012.", + "code": "class BaggingClassifier(ClassifierMixin, BaseBagging):\n \"\"\"A Bagging classifier.\n\n A Bagging classifier is an ensemble meta-estimator that fits base\n classifiers each on random subsets of the original dataset and then\n aggregate their individual predictions (either by voting or by averaging)\n to form a final prediction. Such a meta-estimator can typically be used as\n a way to reduce the variance of a black-box estimator (e.g., a decision\n tree), by introducing randomization into its construction procedure and\n then making an ensemble out of it.\n\n This algorithm encompasses several works from the literature. When random\n subsets of the dataset are drawn as random subsets of the samples, then\n this algorithm is known as Pasting [1]_. If samples are drawn with\n replacement, then the method is known as Bagging [2]_. When random subsets\n of the dataset are drawn as random subsets of the features, then the method\n is known as Random Subspaces [3]_. Finally, when base estimators are built\n on subsets of both samples and features, then the method is known as\n Random Patches [4]_.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.15\n\n Parameters\n ----------\n base_estimator : object, default=None\n The base estimator to fit on random subsets of the dataset.\n If None, then the base estimator is a\n :class:`~sklearn.tree.DecisionTreeClassifier`.\n\n n_estimators : int, default=10\n The number of base estimators in the ensemble.\n\n max_samples : int or float, default=1.0\n The number of samples to draw from X to train each base estimator (with\n replacement by default, see `bootstrap` for more details).\n\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples.\n\n max_features : int or float, default=1.0\n The number of features to draw from X to train each base estimator (\n without replacement by default, see `bootstrap_features` for more\n details).\n\n - If int, then draw `max_features` features.\n - If float, then draw `max_features * X.shape[1]` features.\n\n bootstrap : bool, default=True\n Whether samples are drawn with replacement. If False, sampling\n without replacement is performed.\n\n bootstrap_features : bool, default=False\n Whether features are drawn with replacement.\n\n oob_score : bool, default=False\n Whether to use out-of-bag samples to estimate\n the generalization error. Only available if bootstrap=True.\n\n warm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit\n a whole new ensemble. See :term:`the Glossary `.\n\n .. versionadded:: 0.17\n *warm_start* constructor parameter.\n\n n_jobs : int, default=None\n The number of jobs to run in parallel for both :meth:`fit` and\n :meth:`predict`. ``None`` means 1 unless in a\n :obj:`joblib.parallel_backend` context. ``-1`` means using all\n processors. See :term:`Glossary ` for more details.\n\n random_state : int, RandomState instance or None, default=None\n Controls the random resampling of the original dataset\n (sample wise and feature wise).\n If the base estimator accepts a `random_state` attribute, a different\n seed is generated for each instance in the ensemble.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n verbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\n Attributes\n ----------\n base_estimator_ : estimator\n The base estimator from which the ensemble is grown.\n\n n_features_ : int\n The number of features when :meth:`fit` is performed.\n\n estimators_ : list of estimators\n The collection of fitted base estimators.\n\n estimators_samples_ : list of arrays\n The subset of drawn samples (i.e., the in-bag samples) for each base\n estimator. Each subset is defined by an array of the indices selected.\n\n estimators_features_ : list of arrays\n The subset of drawn features for each base estimator.\n\n classes_ : ndarray of shape (n_classes,)\n The classes labels.\n\n n_classes_ : int or list\n The number of classes.\n\n oob_score_ : float\n Score of the training dataset obtained using an out-of-bag estimate.\n This attribute exists only when ``oob_score`` is True.\n\n oob_decision_function_ : ndarray of shape (n_samples, n_classes)\n Decision function computed with out-of-bag estimate on the training\n set. If n_estimators is small it might be possible that a data point\n was never left out during the bootstrap. In this case,\n `oob_decision_function_` might contain NaN. This attribute exists\n only when ``oob_score`` is True.\n\n Examples\n --------\n >>> from sklearn.svm import SVC\n >>> from sklearn.ensemble import BaggingClassifier\n >>> from sklearn.datasets import make_classification\n >>> X, y = make_classification(n_samples=100, n_features=4,\n ... n_informative=2, n_redundant=0,\n ... random_state=0, shuffle=False)\n >>> clf = BaggingClassifier(base_estimator=SVC(),\n ... n_estimators=10, random_state=0).fit(X, y)\n >>> clf.predict([[0, 0, 0, 0]])\n array([1])\n\n References\n ----------\n\n .. [1] L. Breiman, \"Pasting small votes for classification in large\n databases and on-line\", Machine Learning, 36(1), 85-103, 1999.\n\n .. [2] L. Breiman, \"Bagging predictors\", Machine Learning, 24(2), 123-140,\n 1996.\n\n .. [3] T. Ho, \"The random subspace method for constructing decision\n forests\", Pattern Analysis and Machine Intelligence, 20(8), 832-844,\n 1998.\n\n .. [4] G. Louppe and P. Geurts, \"Ensembles on Random Patches\", Machine\n Learning and Knowledge Discovery in Databases, 346-361, 2012.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self,\n base_estimator=None,\n n_estimators=10, *,\n max_samples=1.0,\n max_features=1.0,\n bootstrap=True,\n bootstrap_features=False,\n oob_score=False,\n warm_start=False,\n n_jobs=None,\n random_state=None,\n verbose=0):\n\n super().__init__(\n base_estimator,\n n_estimators=n_estimators,\n max_samples=max_samples,\n max_features=max_features,\n bootstrap=bootstrap,\n bootstrap_features=bootstrap_features,\n oob_score=oob_score,\n warm_start=warm_start,\n n_jobs=n_jobs,\n random_state=random_state,\n verbose=verbose)\n\n def _validate_estimator(self):\n \"\"\"Check the estimator and set the base_estimator_ attribute.\"\"\"\n super()._validate_estimator(\n default=DecisionTreeClassifier())\n\n def _set_oob_score(self, X, y):\n n_samples = y.shape[0]\n n_classes_ = self.n_classes_\n\n predictions = np.zeros((n_samples, n_classes_))\n\n for estimator, samples, features in zip(self.estimators_,\n self.estimators_samples_,\n self.estimators_features_):\n # Create mask for OOB samples\n mask = ~indices_to_mask(samples, n_samples)\n\n if hasattr(estimator, \"predict_proba\"):\n predictions[mask, :] += estimator.predict_proba(\n (X[mask, :])[:, features])\n\n else:\n p = estimator.predict((X[mask, :])[:, features])\n j = 0\n\n for i in range(n_samples):\n if mask[i]:\n predictions[i, p[j]] += 1\n j += 1\n\n if (predictions.sum(axis=1) == 0).any():\n warn(\"Some inputs do not have OOB scores. \"\n \"This probably means too few estimators were used \"\n \"to compute any reliable oob estimates.\")\n\n oob_decision_function = (predictions /\n predictions.sum(axis=1)[:, np.newaxis])\n oob_score = accuracy_score(y, np.argmax(predictions, axis=1))\n\n self.oob_decision_function_ = oob_decision_function\n self.oob_score_ = oob_score\n\n def _validate_y(self, y):\n y = column_or_1d(y, warn=True)\n check_classification_targets(y)\n self.classes_, y = np.unique(y, return_inverse=True)\n self.n_classes_ = len(self.classes_)\n\n return y\n\n def predict(self, X):\n \"\"\"Predict class for X.\n\n The predicted class of an input sample is computed as the class with\n the highest mean predicted probability. If base estimators do not\n implement a ``predict_proba`` method, then it resorts to voting.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\n Returns\n -------\n y : ndarray of shape (n_samples,)\n The predicted classes.\n \"\"\"\n predicted_probabilitiy = self.predict_proba(X)\n return self.classes_.take((np.argmax(predicted_probabilitiy, axis=1)),\n axis=0)\n\n def predict_proba(self, X):\n \"\"\"Predict class probabilities for X.\n\n The predicted class probabilities of an input sample is computed as\n the mean predicted class probabilities of the base estimators in the\n ensemble. If base estimators do not implement a ``predict_proba``\n method, then it resorts to voting and the predicted class probabilities\n of an input sample represents the proportion of estimators predicting\n each class.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\n Returns\n -------\n p : ndarray of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n \"\"\"\n check_is_fitted(self)\n # Check data\n X = check_array(\n X, accept_sparse=['csr', 'csc'], dtype=None,\n force_all_finite=False\n )\n\n if self.n_features_ != X.shape[1]:\n raise ValueError(\"Number of features of the model must \"\n \"match the input. Model n_features is {0} and \"\n \"input n_features is {1}.\"\n \"\".format(self.n_features_, X.shape[1]))\n\n # Parallel loop\n n_jobs, n_estimators, starts = _partition_estimators(self.n_estimators,\n self.n_jobs)\n\n all_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose,\n **self._parallel_args())(\n delayed(_parallel_predict_proba)(\n self.estimators_[starts[i]:starts[i + 1]],\n self.estimators_features_[starts[i]:starts[i + 1]],\n X,\n self.n_classes_)\n for i in range(n_jobs))\n\n # Reduce\n proba = sum(all_proba) / self.n_estimators\n\n return proba\n\n def predict_log_proba(self, X):\n \"\"\"Predict class log-probabilities for X.\n\n The predicted class log-probabilities of an input sample is computed as\n the log of the mean predicted class probabilities of the base\n estimators in the ensemble.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\n Returns\n -------\n p : ndarray of shape (n_samples, n_classes)\n The class log-probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n \"\"\"\n check_is_fitted(self)\n if hasattr(self.base_estimator_, \"predict_log_proba\"):\n # Check data\n X = check_array(\n X, accept_sparse=['csr', 'csc'], dtype=None,\n force_all_finite=False\n )\n\n if self.n_features_ != X.shape[1]:\n raise ValueError(\"Number of features of the model must \"\n \"match the input. Model n_features is {0} \"\n \"and input n_features is {1} \"\n \"\".format(self.n_features_, X.shape[1]))\n\n # Parallel loop\n n_jobs, n_estimators, starts = _partition_estimators(\n self.n_estimators, self.n_jobs)\n\n all_log_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose)(\n delayed(_parallel_predict_log_proba)(\n self.estimators_[starts[i]:starts[i + 1]],\n self.estimators_features_[starts[i]:starts[i + 1]],\n X,\n self.n_classes_)\n for i in range(n_jobs))\n\n # Reduce\n log_proba = all_log_proba[0]\n\n for j in range(1, len(all_log_proba)):\n log_proba = np.logaddexp(log_proba, all_log_proba[j])\n\n log_proba -= np.log(self.n_estimators)\n\n return log_proba\n\n else:\n return np.log(self.predict_proba(X))\n\n @if_delegate_has_method(delegate='base_estimator')\n def decision_function(self, X):\n \"\"\"Average of the decision functions of the base classifiers.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\n Returns\n -------\n score : ndarray of shape (n_samples, k)\n The decision function of the input samples. The columns correspond\n to the classes in sorted order, as they appear in the attribute\n ``classes_``. Regression and binary classification are special\n cases with ``k == 1``, otherwise ``k==n_classes``.\n\n \"\"\"\n check_is_fitted(self)\n\n # Check data\n X = check_array(\n X, accept_sparse=['csr', 'csc'], dtype=None,\n force_all_finite=False\n )\n\n if self.n_features_ != X.shape[1]:\n raise ValueError(\"Number of features of the model must \"\n \"match the input. Model n_features is {0} and \"\n \"input n_features is {1} \"\n \"\".format(self.n_features_, X.shape[1]))\n\n # Parallel loop\n n_jobs, n_estimators, starts = _partition_estimators(self.n_estimators,\n self.n_jobs)\n\n all_decisions = Parallel(n_jobs=n_jobs, verbose=self.verbose)(\n delayed(_parallel_decision_function)(\n self.estimators_[starts[i]:starts[i + 1]],\n self.estimators_features_[starts[i]:starts[i + 1]],\n X)\n for i in range(n_jobs))\n\n # Reduce\n decisions = sum(all_decisions) / self.n_estimators\n\n return decisions", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor", + "name": "BaggingRegressor", + "qname": "sklearn.ensemble._bagging.BaggingRegressor", + "decorators": [], + "superclasses": ["RegressorMixin", "BaseBagging"], + "methods": [ + "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/__init__", + "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/predict", + "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/_validate_estimator", + "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/_set_oob_score" + ], + "is_public": false, + "reexported_by": [], + "description": "A Bagging regressor.\n\nA Bagging regressor is an ensemble meta-estimator that fits base\nregressors each on random subsets of the original dataset and then\naggregate their individual predictions (either by voting or by averaging)\nto form a final prediction. Such a meta-estimator can typically be used as\na way to reduce the variance of a black-box estimator (e.g., a decision\ntree), by introducing randomization into its construction procedure and\nthen making an ensemble out of it.\n\nThis algorithm encompasses several works from the literature. When random\nsubsets of the dataset are drawn as random subsets of the samples, then\nthis algorithm is known as Pasting [1]_. If samples are drawn with\nreplacement, then the method is known as Bagging [2]_. When random subsets\nof the dataset are drawn as random subsets of the features, then the method\nis known as Random Subspaces [3]_. Finally, when base estimators are built\non subsets of both samples and features, then the method is known as\nRandom Patches [4]_.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.15", + "docstring": "A Bagging regressor.\n\nA Bagging regressor is an ensemble meta-estimator that fits base\nregressors each on random subsets of the original dataset and then\naggregate their individual predictions (either by voting or by averaging)\nto form a final prediction. Such a meta-estimator can typically be used as\na way to reduce the variance of a black-box estimator (e.g., a decision\ntree), by introducing randomization into its construction procedure and\nthen making an ensemble out of it.\n\nThis algorithm encompasses several works from the literature. When random\nsubsets of the dataset are drawn as random subsets of the samples, then\nthis algorithm is known as Pasting [1]_. If samples are drawn with\nreplacement, then the method is known as Bagging [2]_. When random subsets\nof the dataset are drawn as random subsets of the features, then the method\nis known as Random Subspaces [3]_. Finally, when base estimators are built\non subsets of both samples and features, then the method is known as\nRandom Patches [4]_.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.15\n\nParameters\n----------\nbase_estimator : object, default=None\n The base estimator to fit on random subsets of the dataset.\n If None, then the base estimator is a\n :class:`~sklearn.tree.DecisionTreeRegressor`.\n\nn_estimators : int, default=10\n The number of base estimators in the ensemble.\n\nmax_samples : int or float, default=1.0\n The number of samples to draw from X to train each base estimator (with\n replacement by default, see `bootstrap` for more details).\n\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples.\n\nmax_features : int or float, default=1.0\n The number of features to draw from X to train each base estimator (\n without replacement by default, see `bootstrap_features` for more\n details).\n\n - If int, then draw `max_features` features.\n - If float, then draw `max_features * X.shape[1]` features.\n\nbootstrap : bool, default=True\n Whether samples are drawn with replacement. If False, sampling\n without replacement is performed.\n\nbootstrap_features : bool, default=False\n Whether features are drawn with replacement.\n\noob_score : bool, default=False\n Whether to use out-of-bag samples to estimate\n the generalization error. Only available if bootstrap=True.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit\n a whole new ensemble. See :term:`the Glossary `.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel for both :meth:`fit` and\n :meth:`predict`. ``None`` means 1 unless in a\n :obj:`joblib.parallel_backend` context. ``-1`` means using all\n processors. See :term:`Glossary ` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the random resampling of the original dataset\n (sample wise and feature wise).\n If the base estimator accepts a `random_state` attribute, a different\n seed is generated for each instance in the ensemble.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nverbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\nAttributes\n----------\nbase_estimator_ : estimator\n The base estimator from which the ensemble is grown.\n\nn_features_ : int\n The number of features when :meth:`fit` is performed.\n\nestimators_ : list of estimators\n The collection of fitted sub-estimators.\n\nestimators_samples_ : list of arrays\n The subset of drawn samples (i.e., the in-bag samples) for each base\n estimator. Each subset is defined by an array of the indices selected.\n\nestimators_features_ : list of arrays\n The subset of drawn features for each base estimator.\n\noob_score_ : float\n Score of the training dataset obtained using an out-of-bag estimate.\n This attribute exists only when ``oob_score`` is True.\n\noob_prediction_ : ndarray of shape (n_samples,)\n Prediction computed with out-of-bag estimate on the training\n set. If n_estimators is small it might be possible that a data point\n was never left out during the bootstrap. In this case,\n `oob_prediction_` might contain NaN. This attribute exists only\n when ``oob_score`` is True.\n\nExamples\n--------\n>>> from sklearn.svm import SVR\n>>> from sklearn.ensemble import BaggingRegressor\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_samples=100, n_features=4,\n... n_informative=2, n_targets=1,\n... random_state=0, shuffle=False)\n>>> regr = BaggingRegressor(base_estimator=SVR(),\n... n_estimators=10, random_state=0).fit(X, y)\n>>> regr.predict([[0, 0, 0, 0]])\narray([-2.8720...])\n\nReferences\n----------\n\n.. [1] L. Breiman, \"Pasting small votes for classification in large\n databases and on-line\", Machine Learning, 36(1), 85-103, 1999.\n\n.. [2] L. Breiman, \"Bagging predictors\", Machine Learning, 24(2), 123-140,\n 1996.\n\n.. [3] T. Ho, \"The random subspace method for constructing decision\n forests\", Pattern Analysis and Machine Intelligence, 20(8), 832-844,\n 1998.\n\n.. [4] G. Louppe and P. Geurts, \"Ensembles on Random Patches\", Machine\n Learning and Knowledge Discovery in Databases, 346-361, 2012.", + "code": "class BaggingRegressor(RegressorMixin, BaseBagging):\n \"\"\"A Bagging regressor.\n\n A Bagging regressor is an ensemble meta-estimator that fits base\n regressors each on random subsets of the original dataset and then\n aggregate their individual predictions (either by voting or by averaging)\n to form a final prediction. Such a meta-estimator can typically be used as\n a way to reduce the variance of a black-box estimator (e.g., a decision\n tree), by introducing randomization into its construction procedure and\n then making an ensemble out of it.\n\n This algorithm encompasses several works from the literature. When random\n subsets of the dataset are drawn as random subsets of the samples, then\n this algorithm is known as Pasting [1]_. If samples are drawn with\n replacement, then the method is known as Bagging [2]_. When random subsets\n of the dataset are drawn as random subsets of the features, then the method\n is known as Random Subspaces [3]_. Finally, when base estimators are built\n on subsets of both samples and features, then the method is known as\n Random Patches [4]_.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.15\n\n Parameters\n ----------\n base_estimator : object, default=None\n The base estimator to fit on random subsets of the dataset.\n If None, then the base estimator is a\n :class:`~sklearn.tree.DecisionTreeRegressor`.\n\n n_estimators : int, default=10\n The number of base estimators in the ensemble.\n\n max_samples : int or float, default=1.0\n The number of samples to draw from X to train each base estimator (with\n replacement by default, see `bootstrap` for more details).\n\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples.\n\n max_features : int or float, default=1.0\n The number of features to draw from X to train each base estimator (\n without replacement by default, see `bootstrap_features` for more\n details).\n\n - If int, then draw `max_features` features.\n - If float, then draw `max_features * X.shape[1]` features.\n\n bootstrap : bool, default=True\n Whether samples are drawn with replacement. If False, sampling\n without replacement is performed.\n\n bootstrap_features : bool, default=False\n Whether features are drawn with replacement.\n\n oob_score : bool, default=False\n Whether to use out-of-bag samples to estimate\n the generalization error. Only available if bootstrap=True.\n\n warm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit\n a whole new ensemble. See :term:`the Glossary `.\n\n n_jobs : int, default=None\n The number of jobs to run in parallel for both :meth:`fit` and\n :meth:`predict`. ``None`` means 1 unless in a\n :obj:`joblib.parallel_backend` context. ``-1`` means using all\n processors. See :term:`Glossary ` for more details.\n\n random_state : int, RandomState instance or None, default=None\n Controls the random resampling of the original dataset\n (sample wise and feature wise).\n If the base estimator accepts a `random_state` attribute, a different\n seed is generated for each instance in the ensemble.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n verbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\n Attributes\n ----------\n base_estimator_ : estimator\n The base estimator from which the ensemble is grown.\n\n n_features_ : int\n The number of features when :meth:`fit` is performed.\n\n estimators_ : list of estimators\n The collection of fitted sub-estimators.\n\n estimators_samples_ : list of arrays\n The subset of drawn samples (i.e., the in-bag samples) for each base\n estimator. Each subset is defined by an array of the indices selected.\n\n estimators_features_ : list of arrays\n The subset of drawn features for each base estimator.\n\n oob_score_ : float\n Score of the training dataset obtained using an out-of-bag estimate.\n This attribute exists only when ``oob_score`` is True.\n\n oob_prediction_ : ndarray of shape (n_samples,)\n Prediction computed with out-of-bag estimate on the training\n set. If n_estimators is small it might be possible that a data point\n was never left out during the bootstrap. In this case,\n `oob_prediction_` might contain NaN. This attribute exists only\n when ``oob_score`` is True.\n\n Examples\n --------\n >>> from sklearn.svm import SVR\n >>> from sklearn.ensemble import BaggingRegressor\n >>> from sklearn.datasets import make_regression\n >>> X, y = make_regression(n_samples=100, n_features=4,\n ... n_informative=2, n_targets=1,\n ... random_state=0, shuffle=False)\n >>> regr = BaggingRegressor(base_estimator=SVR(),\n ... n_estimators=10, random_state=0).fit(X, y)\n >>> regr.predict([[0, 0, 0, 0]])\n array([-2.8720...])\n\n References\n ----------\n\n .. [1] L. Breiman, \"Pasting small votes for classification in large\n databases and on-line\", Machine Learning, 36(1), 85-103, 1999.\n\n .. [2] L. Breiman, \"Bagging predictors\", Machine Learning, 24(2), 123-140,\n 1996.\n\n .. [3] T. Ho, \"The random subspace method for constructing decision\n forests\", Pattern Analysis and Machine Intelligence, 20(8), 832-844,\n 1998.\n\n .. [4] G. Louppe and P. Geurts, \"Ensembles on Random Patches\", Machine\n Learning and Knowledge Discovery in Databases, 346-361, 2012.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self,\n base_estimator=None,\n n_estimators=10, *,\n max_samples=1.0,\n max_features=1.0,\n bootstrap=True,\n bootstrap_features=False,\n oob_score=False,\n warm_start=False,\n n_jobs=None,\n random_state=None,\n verbose=0):\n super().__init__(\n base_estimator,\n n_estimators=n_estimators,\n max_samples=max_samples,\n max_features=max_features,\n bootstrap=bootstrap,\n bootstrap_features=bootstrap_features,\n oob_score=oob_score,\n warm_start=warm_start,\n n_jobs=n_jobs,\n random_state=random_state,\n verbose=verbose)\n\n def predict(self, X):\n \"\"\"Predict regression target for X.\n\n The predicted regression target of an input sample is computed as the\n mean predicted regression targets of the estimators in the ensemble.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\n Returns\n -------\n y : ndarray of shape (n_samples,)\n The predicted values.\n \"\"\"\n check_is_fitted(self)\n # Check data\n X = check_array(\n X, accept_sparse=['csr', 'csc'], dtype=None,\n force_all_finite=False\n )\n\n # Parallel loop\n n_jobs, n_estimators, starts = _partition_estimators(self.n_estimators,\n self.n_jobs)\n\n all_y_hat = Parallel(n_jobs=n_jobs, verbose=self.verbose)(\n delayed(_parallel_predict_regression)(\n self.estimators_[starts[i]:starts[i + 1]],\n self.estimators_features_[starts[i]:starts[i + 1]],\n X)\n for i in range(n_jobs))\n\n # Reduce\n y_hat = sum(all_y_hat) / self.n_estimators\n\n return y_hat\n\n def _validate_estimator(self):\n \"\"\"Check the estimator and set the base_estimator_ attribute.\"\"\"\n super()._validate_estimator(\n default=DecisionTreeRegressor())\n\n def _set_oob_score(self, X, y):\n n_samples = y.shape[0]\n\n predictions = np.zeros((n_samples,))\n n_predictions = np.zeros((n_samples,))\n\n for estimator, samples, features in zip(self.estimators_,\n self.estimators_samples_,\n self.estimators_features_):\n # Create mask for OOB samples\n mask = ~indices_to_mask(samples, n_samples)\n\n predictions[mask] += estimator.predict((X[mask, :])[:, features])\n n_predictions[mask] += 1\n\n if (n_predictions == 0).any():\n warn(\"Some inputs do not have OOB scores. \"\n \"This probably means too few estimators were used \"\n \"to compute any reliable oob estimates.\")\n n_predictions[n_predictions == 0] = 1\n\n predictions /= n_predictions\n\n self.oob_prediction_ = predictions\n self.oob_score_ = r2_score(y, predictions)", + "instance_attributes": [ + { + "name": "oob_prediction_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging", + "name": "BaseBagging", + "qname": "sklearn.ensemble._bagging.BaseBagging", + "decorators": [], + "superclasses": ["BaseEnsemble"], + "methods": [ + "scikit-learn/sklearn.ensemble._bagging/BaseBagging/__init__", + "scikit-learn/sklearn.ensemble._bagging/BaseBagging/fit", + "scikit-learn/sklearn.ensemble._bagging/BaseBagging/_parallel_args", + "scikit-learn/sklearn.ensemble._bagging/BaseBagging/_fit", + "scikit-learn/sklearn.ensemble._bagging/BaseBagging/_set_oob_score", + "scikit-learn/sklearn.ensemble._bagging/BaseBagging/_validate_y", + "scikit-learn/sklearn.ensemble._bagging/BaseBagging/_get_estimators_indices", + "scikit-learn/sklearn.ensemble._bagging/BaseBagging/estimators_samples_@getter" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for Bagging meta-estimator.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.", + "docstring": "Base class for Bagging meta-estimator.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.", + "code": "class BaseBagging(BaseEnsemble, metaclass=ABCMeta):\n \"\"\"Base class for Bagging meta-estimator.\n\n Warning: This class should not be used directly. Use derived classes\n instead.\n \"\"\"\n\n @abstractmethod\n def __init__(self,\n base_estimator=None,\n n_estimators=10, *,\n max_samples=1.0,\n max_features=1.0,\n bootstrap=True,\n bootstrap_features=False,\n oob_score=False,\n warm_start=False,\n n_jobs=None,\n random_state=None,\n verbose=0):\n super().__init__(\n base_estimator=base_estimator,\n n_estimators=n_estimators)\n\n self.max_samples = max_samples\n self.max_features = max_features\n self.bootstrap = bootstrap\n self.bootstrap_features = bootstrap_features\n self.oob_score = oob_score\n self.warm_start = warm_start\n self.n_jobs = n_jobs\n self.random_state = random_state\n self.verbose = verbose\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Build a Bagging ensemble of estimators from the training\n set (X, y).\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\n y : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if the base estimator supports\n sample weighting.\n\n Returns\n -------\n self : object\n \"\"\"\n return self._fit(X, y, self.max_samples, sample_weight=sample_weight)\n\n def _parallel_args(self):\n return {}\n\n def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):\n \"\"\"Build a Bagging ensemble of estimators from the training\n set (X, y).\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\n y : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\n max_samples : int or float, default=None\n Argument to use instead of self.max_samples.\n\n max_depth : int, default=None\n Override value used when constructing base estimator. Only\n supported if the base estimator has a max_depth parameter.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if the base estimator supports\n sample weighting.\n\n Returns\n -------\n self : object\n \"\"\"\n random_state = check_random_state(self.random_state)\n\n # Convert data (X is required to be 2d and indexable)\n X, y = self._validate_data(\n X, y, accept_sparse=['csr', 'csc'], dtype=None,\n force_all_finite=False, multi_output=True\n )\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X, dtype=None)\n\n # Remap output\n n_samples, self.n_features_ = X.shape\n self._n_samples = n_samples\n y = self._validate_y(y)\n\n # Check parameters\n self._validate_estimator()\n\n if max_depth is not None:\n self.base_estimator_.max_depth = max_depth\n\n # Validate max_samples\n if max_samples is None:\n max_samples = self.max_samples\n elif not isinstance(max_samples, numbers.Integral):\n max_samples = int(max_samples * X.shape[0])\n\n if not (0 < max_samples <= X.shape[0]):\n raise ValueError(\"max_samples must be in (0, n_samples]\")\n\n # Store validated integer row sampling value\n self._max_samples = max_samples\n\n # Validate max_features\n if isinstance(self.max_features, numbers.Integral):\n max_features = self.max_features\n elif isinstance(self.max_features, float):\n max_features = self.max_features * self.n_features_\n else:\n raise ValueError(\"max_features must be int or float\")\n\n if not (0 < max_features <= self.n_features_):\n raise ValueError(\"max_features must be in (0, n_features]\")\n\n max_features = max(1, int(max_features))\n\n # Store validated integer feature sampling value\n self._max_features = max_features\n\n # Other checks\n if not self.bootstrap and self.oob_score:\n raise ValueError(\"Out of bag estimation only available\"\n \" if bootstrap=True\")\n\n if self.warm_start and self.oob_score:\n raise ValueError(\"Out of bag estimate only available\"\n \" if warm_start=False\")\n\n if hasattr(self, \"oob_score_\") and self.warm_start:\n del self.oob_score_\n\n if not self.warm_start or not hasattr(self, 'estimators_'):\n # Free allocated memory, if any\n self.estimators_ = []\n self.estimators_features_ = []\n\n n_more_estimators = self.n_estimators - len(self.estimators_)\n\n if n_more_estimators < 0:\n raise ValueError('n_estimators=%d must be larger or equal to '\n 'len(estimators_)=%d when warm_start==True'\n % (self.n_estimators, len(self.estimators_)))\n\n elif n_more_estimators == 0:\n warn(\"Warm-start fitting without increasing n_estimators does not \"\n \"fit new trees.\")\n return self\n\n # Parallel loop\n n_jobs, n_estimators, starts = _partition_estimators(n_more_estimators,\n self.n_jobs)\n total_n_estimators = sum(n_estimators)\n\n # Advance random state to state after training\n # the first n_estimators\n if self.warm_start and len(self.estimators_) > 0:\n random_state.randint(MAX_INT, size=len(self.estimators_))\n\n seeds = random_state.randint(MAX_INT, size=n_more_estimators)\n self._seeds = seeds\n\n all_results = Parallel(n_jobs=n_jobs, verbose=self.verbose,\n **self._parallel_args())(\n delayed(_parallel_build_estimators)(\n n_estimators[i],\n self,\n X,\n y,\n sample_weight,\n seeds[starts[i]:starts[i + 1]],\n total_n_estimators,\n verbose=self.verbose)\n for i in range(n_jobs))\n\n # Reduce\n self.estimators_ += list(itertools.chain.from_iterable(\n t[0] for t in all_results))\n self.estimators_features_ += list(itertools.chain.from_iterable(\n t[1] for t in all_results))\n\n if self.oob_score:\n self._set_oob_score(X, y)\n\n return self\n\n @abstractmethod\n def _set_oob_score(self, X, y):\n \"\"\"Calculate out of bag predictions and score.\"\"\"\n\n def _validate_y(self, y):\n if len(y.shape) == 1 or y.shape[1] == 1:\n return column_or_1d(y, warn=True)\n else:\n return y\n\n def _get_estimators_indices(self):\n # Get drawn indices along both sample and feature axes\n for seed in self._seeds:\n # Operations accessing random_state must be performed identically\n # to those in `_parallel_build_estimators()`\n feature_indices, sample_indices = _generate_bagging_indices(\n seed, self.bootstrap_features, self.bootstrap,\n self.n_features_, self._n_samples, self._max_features,\n self._max_samples)\n\n yield feature_indices, sample_indices\n\n @property\n def estimators_samples_(self):\n \"\"\"\n The subset of drawn samples for each base estimator.\n\n Returns a dynamically generated list of indices identifying\n the samples used for fitting each member of the ensemble, i.e.,\n the in-bag samples.\n\n Note: the list is re-created at each call to the property in order\n to reduce the object memory footprint by not storing the sampling\n data. Thus fetching the property may be slower than expected.\n \"\"\"\n return [sample_indices\n for _, sample_indices in self._get_estimators_indices()]", + "instance_attributes": [ + { + "name": "max_samples", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "max_features", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "bootstrap", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "bootstrap_features", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "oob_score", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "warm_start", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "estimators_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "estimators_features_", + "types": { + "kind": "NamedType", + "name": "list" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._base/BaseEnsemble", + "name": "BaseEnsemble", + "qname": "sklearn.ensemble._base.BaseEnsemble", + "decorators": [], + "superclasses": ["MetaEstimatorMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.ensemble._base/BaseEnsemble/__init__", + "scikit-learn/sklearn.ensemble._base/BaseEnsemble/_validate_estimator", + "scikit-learn/sklearn.ensemble._base/BaseEnsemble/_make_estimator", + "scikit-learn/sklearn.ensemble._base/BaseEnsemble/__len__", + "scikit-learn/sklearn.ensemble._base/BaseEnsemble/__getitem__", + "scikit-learn/sklearn.ensemble._base/BaseEnsemble/__iter__" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for all ensemble classes.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.", + "docstring": "Base class for all ensemble classes.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.\n\nParameters\n----------\nbase_estimator : object\n The base estimator from which the ensemble is built.\n\nn_estimators : int, default=10\n The number of estimators in the ensemble.\n\nestimator_params : list of str, default=tuple()\n The list of attributes to use as parameters when instantiating a\n new base estimator. If none are given, default parameters are used.\n\nAttributes\n----------\nbase_estimator_ : estimator\n The base estimator from which the ensemble is grown.\n\nestimators_ : list of estimators\n The collection of fitted base estimators.", + "code": "class BaseEnsemble(MetaEstimatorMixin, BaseEstimator, metaclass=ABCMeta):\n \"\"\"Base class for all ensemble classes.\n\n Warning: This class should not be used directly. Use derived classes\n instead.\n\n Parameters\n ----------\n base_estimator : object\n The base estimator from which the ensemble is built.\n\n n_estimators : int, default=10\n The number of estimators in the ensemble.\n\n estimator_params : list of str, default=tuple()\n The list of attributes to use as parameters when instantiating a\n new base estimator. If none are given, default parameters are used.\n\n Attributes\n ----------\n base_estimator_ : estimator\n The base estimator from which the ensemble is grown.\n\n estimators_ : list of estimators\n The collection of fitted base estimators.\n \"\"\"\n\n # overwrite _required_parameters from MetaEstimatorMixin\n _required_parameters: List[str] = []\n\n @abstractmethod\n def __init__(self, base_estimator, *, n_estimators=10,\n estimator_params=tuple()):\n # Set parameters\n self.base_estimator = base_estimator\n self.n_estimators = n_estimators\n self.estimator_params = estimator_params\n\n # Don't instantiate estimators now! Parameters of base_estimator might\n # still change. Eg., when grid-searching with the nested object syntax.\n # self.estimators_ needs to be filled by the derived classes in fit.\n\n def _validate_estimator(self, default=None):\n \"\"\"Check the estimator and the n_estimator attribute.\n\n Sets the base_estimator_` attributes.\n \"\"\"\n if not isinstance(self.n_estimators, numbers.Integral):\n raise ValueError(\"n_estimators must be an integer, \"\n \"got {0}.\".format(type(self.n_estimators)))\n\n if self.n_estimators <= 0:\n raise ValueError(\"n_estimators must be greater than zero, \"\n \"got {0}.\".format(self.n_estimators))\n\n if self.base_estimator is not None:\n self.base_estimator_ = self.base_estimator\n else:\n self.base_estimator_ = default\n\n if self.base_estimator_ is None:\n raise ValueError(\"base_estimator cannot be None\")\n\n def _make_estimator(self, append=True, random_state=None):\n \"\"\"Make and configure a copy of the `base_estimator_` attribute.\n\n Warning: This method should be used to properly instantiate new\n sub-estimators.\n \"\"\"\n estimator = clone(self.base_estimator_)\n estimator.set_params(**{p: getattr(self, p)\n for p in self.estimator_params})\n\n if random_state is not None:\n _set_random_states(estimator, random_state)\n\n if append:\n self.estimators_.append(estimator)\n\n return estimator\n\n def __len__(self):\n \"\"\"Return the number of estimators in the ensemble.\"\"\"\n return len(self.estimators_)\n\n def __getitem__(self, index):\n \"\"\"Return the index'th estimator in the ensemble.\"\"\"\n return self.estimators_[index]\n\n def __iter__(self):\n \"\"\"Return iterator over estimators in the ensemble.\"\"\"\n return iter(self.estimators_)", + "instance_attributes": [ + { + "name": "n_estimators", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "estimator_params", + "types": { + "kind": "NamedType", + "name": "tuple" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble", + "name": "_BaseHeterogeneousEnsemble", + "qname": "sklearn.ensemble._base._BaseHeterogeneousEnsemble", + "decorators": [], + "superclasses": ["MetaEstimatorMixin", "_BaseComposition"], + "methods": [ + "scikit-learn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble/named_estimators@getter", + "scikit-learn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble/__init__", + "scikit-learn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble/_validate_estimators", + "scikit-learn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble/set_params", + "scikit-learn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble/get_params" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for heterogeneous ensemble of learners.", + "docstring": "Base class for heterogeneous ensemble of learners.\n\nParameters\n----------\nestimators : list of (str, estimator) tuples\n The ensemble of estimators to use in the ensemble. Each element of the\n list is defined as a tuple of string (i.e. name of the estimator) and\n an estimator instance. An estimator can be set to `'drop'` using\n `set_params`.\n\nAttributes\n----------\nestimators_ : list of estimators\n The elements of the estimators parameter, having been fitted on the\n training data. If an estimator has been set to `'drop'`, it will not\n appear in `estimators_`.", + "code": "class _BaseHeterogeneousEnsemble(MetaEstimatorMixin, _BaseComposition,\n metaclass=ABCMeta):\n \"\"\"Base class for heterogeneous ensemble of learners.\n\n Parameters\n ----------\n estimators : list of (str, estimator) tuples\n The ensemble of estimators to use in the ensemble. Each element of the\n list is defined as a tuple of string (i.e. name of the estimator) and\n an estimator instance. An estimator can be set to `'drop'` using\n `set_params`.\n\n Attributes\n ----------\n estimators_ : list of estimators\n The elements of the estimators parameter, having been fitted on the\n training data. If an estimator has been set to `'drop'`, it will not\n appear in `estimators_`.\n \"\"\"\n\n _required_parameters = ['estimators']\n\n @property\n def named_estimators(self):\n return Bunch(**dict(self.estimators))\n\n @abstractmethod\n def __init__(self, estimators):\n self.estimators = estimators\n\n def _validate_estimators(self):\n if self.estimators is None or len(self.estimators) == 0:\n raise ValueError(\n \"Invalid 'estimators' attribute, 'estimators' should be a list\"\n \" of (string, estimator) tuples.\"\n )\n names, estimators = zip(*self.estimators)\n # defined by MetaEstimatorMixin\n self._validate_names(names)\n\n has_estimator = any(est != 'drop' for est in estimators)\n if not has_estimator:\n raise ValueError(\n \"All estimators are dropped. At least one is required \"\n \"to be an estimator.\"\n )\n\n is_estimator_type = (is_classifier if is_classifier(self)\n else is_regressor)\n\n for est in estimators:\n if est != 'drop' and not is_estimator_type(est):\n raise ValueError(\n \"The estimator {} should be a {}.\".format(\n est.__class__.__name__, is_estimator_type.__name__[3:]\n )\n )\n\n return names, estimators\n\n def set_params(self, **params):\n \"\"\"\n Set the parameters of an estimator from the ensemble.\n\n Valid parameter keys can be listed with `get_params()`. Note that you\n can directly set the parameters of the estimators contained in\n `estimators`.\n\n Parameters\n ----------\n **params : keyword arguments\n Specific parameters using e.g.\n `set_params(parameter_name=new_value)`. In addition, to setting the\n parameters of the estimator, the individual estimator of the\n estimators can also be set, or can be removed by setting them to\n 'drop'.\n \"\"\"\n super()._set_params('estimators', **params)\n return self\n\n def get_params(self, deep=True):\n \"\"\"\n Get the parameters of an estimator from the ensemble.\n\n Returns the parameters given in the constructor as well as the\n estimators contained within the `estimators` parameter.\n\n Parameters\n ----------\n deep : bool, default=True\n Setting it to True gets the various estimators and the parameters\n of the estimators as well.\n \"\"\"\n return super()._get_params('estimators', deep=deep)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest", + "name": "BaseForest", + "qname": "sklearn.ensemble._forest.BaseForest", + "decorators": [], + "superclasses": ["MultiOutputMixin", "BaseEnsemble"], + "methods": [ + "scikit-learn/sklearn.ensemble._forest/BaseForest/__init__", + "scikit-learn/sklearn.ensemble._forest/BaseForest/apply", + "scikit-learn/sklearn.ensemble._forest/BaseForest/decision_path", + "scikit-learn/sklearn.ensemble._forest/BaseForest/fit", + "scikit-learn/sklearn.ensemble._forest/BaseForest/_set_oob_score", + "scikit-learn/sklearn.ensemble._forest/BaseForest/_validate_y_class_weight", + "scikit-learn/sklearn.ensemble._forest/BaseForest/_validate_X_predict", + "scikit-learn/sklearn.ensemble._forest/BaseForest/feature_importances_@getter" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for forests of trees.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.", + "docstring": "Base class for forests of trees.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.", + "code": "class BaseForest(MultiOutputMixin, BaseEnsemble, metaclass=ABCMeta):\n \"\"\"\n Base class for forests of trees.\n\n Warning: This class should not be used directly. Use derived classes\n instead.\n \"\"\"\n\n @abstractmethod\n def __init__(self,\n base_estimator,\n n_estimators=100, *,\n estimator_params=tuple(),\n bootstrap=False,\n oob_score=False,\n n_jobs=None,\n random_state=None,\n verbose=0,\n warm_start=False,\n class_weight=None,\n max_samples=None):\n super().__init__(\n base_estimator=base_estimator,\n n_estimators=n_estimators,\n estimator_params=estimator_params)\n\n self.bootstrap = bootstrap\n self.oob_score = oob_score\n self.n_jobs = n_jobs\n self.random_state = random_state\n self.verbose = verbose\n self.warm_start = warm_start\n self.class_weight = class_weight\n self.max_samples = max_samples\n\n def apply(self, X):\n \"\"\"\n Apply trees in the forest to X, return leaf indices.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\n Returns\n -------\n X_leaves : ndarray of shape (n_samples, n_estimators)\n For each datapoint x in X and for each tree in the forest,\n return the index of the leaf x ends up in.\n \"\"\"\n X = self._validate_X_predict(X)\n results = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,\n **_joblib_parallel_args(prefer=\"threads\"))(\n delayed(tree.apply)(X, check_input=False)\n for tree in self.estimators_)\n\n return np.array(results).T\n\n def decision_path(self, X):\n \"\"\"\n Return the decision path in the forest.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\n Returns\n -------\n indicator : sparse matrix of shape (n_samples, n_nodes)\n Return a node indicator matrix where non zero elements indicates\n that the samples goes through the nodes. The matrix is of CSR\n format.\n\n n_nodes_ptr : ndarray of shape (n_estimators + 1,)\n The columns from indicator[n_nodes_ptr[i]:n_nodes_ptr[i+1]]\n gives the indicator value for the i-th estimator.\n\n \"\"\"\n X = self._validate_X_predict(X)\n indicators = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,\n **_joblib_parallel_args(prefer='threads'))(\n delayed(tree.decision_path)(X, check_input=False)\n for tree in self.estimators_)\n\n n_nodes = [0]\n n_nodes.extend([i.shape[1] for i in indicators])\n n_nodes_ptr = np.array(n_nodes).cumsum()\n\n return sparse_hstack(indicators).tocsr(), n_nodes_ptr\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"\n Build a forest of trees from the training set (X, y).\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Internally, its dtype will be converted\n to ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csc_matrix``.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The target values (class labels in classification, real numbers in\n regression).\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. In the case of\n classification, splits are also ignored if they would result in any\n single class carrying a negative weight in either child node.\n\n Returns\n -------\n self : object\n \"\"\"\n # Validate or convert input data\n if issparse(y):\n raise ValueError(\n \"sparse multilabel-indicator for y is not supported.\"\n )\n X, y = self._validate_data(X, y, multi_output=True,\n accept_sparse=\"csc\", dtype=DTYPE)\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X)\n\n if issparse(X):\n # Pre-sort indices to avoid that each individual tree of the\n # ensemble sorts the indices.\n X.sort_indices()\n\n # Remap output\n self.n_features_ = X.shape[1]\n\n y = np.atleast_1d(y)\n if y.ndim == 2 and y.shape[1] == 1:\n warn(\"A column-vector y was passed when a 1d array was\"\n \" expected. Please change the shape of y to \"\n \"(n_samples,), for example using ravel().\",\n DataConversionWarning, stacklevel=2)\n\n if y.ndim == 1:\n # reshape is necessary to preserve the data contiguity against vs\n # [:, np.newaxis] that does not.\n y = np.reshape(y, (-1, 1))\n\n self.n_outputs_ = y.shape[1]\n\n y, expanded_class_weight = self._validate_y_class_weight(y)\n\n if getattr(y, \"dtype\", None) != DOUBLE or not y.flags.contiguous:\n y = np.ascontiguousarray(y, dtype=DOUBLE)\n\n if expanded_class_weight is not None:\n if sample_weight is not None:\n sample_weight = sample_weight * expanded_class_weight\n else:\n sample_weight = expanded_class_weight\n\n # Get bootstrap sample size\n n_samples_bootstrap = _get_n_samples_bootstrap(\n n_samples=X.shape[0],\n max_samples=self.max_samples\n )\n\n # Check parameters\n self._validate_estimator()\n\n if not self.bootstrap and self.oob_score:\n raise ValueError(\"Out of bag estimation only available\"\n \" if bootstrap=True\")\n\n random_state = check_random_state(self.random_state)\n\n if not self.warm_start or not hasattr(self, \"estimators_\"):\n # Free allocated memory, if any\n self.estimators_ = []\n\n n_more_estimators = self.n_estimators - len(self.estimators_)\n\n if n_more_estimators < 0:\n raise ValueError('n_estimators=%d must be larger or equal to '\n 'len(estimators_)=%d when warm_start==True'\n % (self.n_estimators, len(self.estimators_)))\n\n elif n_more_estimators == 0:\n warn(\"Warm-start fitting without increasing n_estimators does not \"\n \"fit new trees.\")\n else:\n if self.warm_start and len(self.estimators_) > 0:\n # We draw from the random state to get the random state we\n # would have got if we hadn't used a warm_start.\n random_state.randint(MAX_INT, size=len(self.estimators_))\n\n trees = [self._make_estimator(append=False,\n random_state=random_state)\n for i in range(n_more_estimators)]\n\n # Parallel loop: we prefer the threading backend as the Cython code\n # for fitting the trees is internally releasing the Python GIL\n # making threading more efficient than multiprocessing in\n # that case. However, for joblib 0.12+ we respect any\n # parallel_backend contexts set at a higher level,\n # since correctness does not rely on using threads.\n trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,\n **_joblib_parallel_args(prefer='threads'))(\n delayed(_parallel_build_trees)(\n t, self, X, y, sample_weight, i, len(trees),\n verbose=self.verbose, class_weight=self.class_weight,\n n_samples_bootstrap=n_samples_bootstrap)\n for i, t in enumerate(trees))\n\n # Collect newly grown trees\n self.estimators_.extend(trees)\n\n if self.oob_score:\n self._set_oob_score(X, y)\n\n # Decapsulate classes_ attributes\n if hasattr(self, \"classes_\") and self.n_outputs_ == 1:\n self.n_classes_ = self.n_classes_[0]\n self.classes_ = self.classes_[0]\n\n return self\n\n @abstractmethod\n def _set_oob_score(self, X, y):\n \"\"\"\n Calculate out of bag predictions and score.\"\"\"\n\n def _validate_y_class_weight(self, y):\n # Default implementation\n return y, None\n\n def _validate_X_predict(self, X):\n \"\"\"\n Validate X whenever one tries to predict, apply, predict_proba.\"\"\"\n check_is_fitted(self)\n\n return self.estimators_[0]._validate_X_predict(X, check_input=True)\n\n @property\n def feature_importances_(self):\n \"\"\"\n The impurity-based feature importances.\n\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n Returns\n -------\n feature_importances_ : ndarray of shape (n_features,)\n The values of this array sum to 1, unless all trees are single node\n trees consisting of only the root node, in which case it will be an\n array of zeros.\n \"\"\"\n check_is_fitted(self)\n\n all_importances = Parallel(n_jobs=self.n_jobs,\n **_joblib_parallel_args(prefer='threads'))(\n delayed(getattr)(tree, 'feature_importances_')\n for tree in self.estimators_ if tree.tree_.node_count > 1)\n\n if not all_importances:\n return np.zeros(self.n_features_, dtype=np.float64)\n\n all_importances = np.mean(all_importances,\n axis=0, dtype=np.float64)\n return all_importances / np.sum(all_importances)", + "instance_attributes": [ + { + "name": "bootstrap", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "oob_score", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "warm_start", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "estimators_", + "types": { + "kind": "NamedType", + "name": "list" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesClassifier", + "name": "ExtraTreesClassifier", + "qname": "sklearn.ensemble._forest.ExtraTreesClassifier", + "decorators": [], + "superclasses": ["ForestClassifier"], + "methods": ["scikit-learn/sklearn.ensemble._forest/ExtraTreesClassifier/__init__"], + "is_public": false, + "reexported_by": [], + "description": "An extra-trees classifier.\n\nThis class implements a meta estimator that fits a number of\nrandomized decision trees (a.k.a. extra-trees) on various sub-samples\nof the dataset and uses averaging to improve the predictive accuracy\nand control over-fitting.\n\nRead more in the :ref:`User Guide `.", + "docstring": "An extra-trees classifier.\n\nThis class implements a meta estimator that fits a number of\nrandomized decision trees (a.k.a. extra-trees) on various sub-samples\nof the dataset and uses averaging to improve the predictive accuracy\nand control over-fitting.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_estimators : int, default=100\n The number of trees in the forest.\n\n .. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22.\n\ncriterion : {\"gini\", \"entropy\"}, default=\"gini\"\n The function to measure the quality of a split. Supported criteria are\n \"gini\" for the Gini impurity and \"entropy\" for the information gain.\n\nmax_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_features : {\"auto\", \"sqrt\", \"log2\"}, int or float, default=\"auto\"\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `round(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=sqrt(n_features)`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nmax_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nbootstrap : bool, default=False\n Whether bootstrap samples are used when building trees. If False, the\n whole dataset is used to build each tree.\n\noob_score : bool, default=False\n Whether to use out-of-bag samples to estimate the generalization score.\n Only available if bootstrap=True.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n :meth:`decision_path` and :meth:`apply` are all parallelized over the\n trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n context. ``-1`` means using all processors. See :term:`Glossary\n ` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls 3 sources of randomness:\n\n - the bootstrapping of the samples used when building trees\n (if ``bootstrap=True``)\n - the sampling of the features to consider when looking for the best\n split at each node (if ``max_features < n_features``)\n - the draw of the splits for each of the `max_features`\n\n See :term:`Glossary ` for details.\n\nverbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit a whole\n new forest. See :term:`the Glossary `.\n\nclass_weight : {\"balanced\", \"balanced_subsample\"}, dict or list of dicts, default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one. For\n multi-output problems, a list of dicts can be provided in the same\n order as the columns of y.\n\n Note that for multioutput (including multilabel) weights should be\n defined for each class of every column in its own dict. For example,\n for four-class multilabel classification weights should be\n [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n [{1:1}, {2:5}, {3:1}, {4:1}].\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n The \"balanced_subsample\" mode is the same as \"balanced\" except that\n weights are computed based on the bootstrap sample for every tree\n grown.\n\n For multi-output, the weights of each column of y will be multiplied.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nmax_samples : int or float, default=None\n If bootstrap is True, the number of samples to draw from X\n to train each base estimator.\n\n - If None (default), then draw `X.shape[0]` samples.\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n `max_samples` should be in the interval `(0, 1)`.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nbase_estimator_ : ExtraTreesClassifier\n The child estimator template used to create the collection of fitted\n sub-estimators.\n\nestimators_ : list of DecisionTreeClassifier\n The collection of fitted sub-estimators.\n\nclasses_ : ndarray of shape (n_classes,) or a list of such arrays\n The classes labels (single output problem), or a list of arrays of\n class labels (multi-output problem).\n\nn_classes_ : int or list\n The number of classes (single output problem), or a list containing the\n number of classes for each output (multi-output problem).\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nn_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\noob_score_ : float\n Score of the training dataset obtained using an out-of-bag estimate.\n This attribute exists only when ``oob_score`` is True.\n\noob_decision_function_ : ndarray of shape (n_samples, n_classes)\n Decision function computed with out-of-bag estimate on the training\n set. If n_estimators is small it might be possible that a data point\n was never left out during the bootstrap. In this case,\n `oob_decision_function_` might contain NaN. This attribute exists\n only when ``oob_score`` is True.\n\nSee Also\n--------\nsklearn.tree.ExtraTreeClassifier : Base classifier for this ensemble.\nRandomForestClassifier : Ensemble Classifier based on trees with optimal\n splits.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized\n trees\", Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.ensemble import ExtraTreesClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_features=4, random_state=0)\n>>> clf = ExtraTreesClassifier(n_estimators=100, random_state=0)\n>>> clf.fit(X, y)\nExtraTreesClassifier(random_state=0)\n>>> clf.predict([[0, 0, 0, 0]])\narray([1])", + "code": "class ExtraTreesClassifier(ForestClassifier):\n \"\"\"\n An extra-trees classifier.\n\n This class implements a meta estimator that fits a number of\n randomized decision trees (a.k.a. extra-trees) on various sub-samples\n of the dataset and uses averaging to improve the predictive accuracy\n and control over-fitting.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_estimators : int, default=100\n The number of trees in the forest.\n\n .. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22.\n\n criterion : {\"gini\", \"entropy\"}, default=\"gini\"\n The function to measure the quality of a split. Supported criteria are\n \"gini\" for the Gini impurity and \"entropy\" for the information gain.\n\n max_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\n min_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\n min_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\n min_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\n max_features : {\"auto\", \"sqrt\", \"log2\"}, int or float, default=\"auto\"\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `round(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=sqrt(n_features)`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\n max_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\n min_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\n min_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\n bootstrap : bool, default=False\n Whether bootstrap samples are used when building trees. If False, the\n whole dataset is used to build each tree.\n\n oob_score : bool, default=False\n Whether to use out-of-bag samples to estimate the generalization score.\n Only available if bootstrap=True.\n\n n_jobs : int, default=None\n The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n :meth:`decision_path` and :meth:`apply` are all parallelized over the\n trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n context. ``-1`` means using all processors. See :term:`Glossary\n ` for more details.\n\n random_state : int, RandomState instance or None, default=None\n Controls 3 sources of randomness:\n\n - the bootstrapping of the samples used when building trees\n (if ``bootstrap=True``)\n - the sampling of the features to consider when looking for the best\n split at each node (if ``max_features < n_features``)\n - the draw of the splits for each of the `max_features`\n\n See :term:`Glossary ` for details.\n\n verbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\n warm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit a whole\n new forest. See :term:`the Glossary `.\n\n class_weight : {\"balanced\", \"balanced_subsample\"}, dict or list of dicts, \\\n default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one. For\n multi-output problems, a list of dicts can be provided in the same\n order as the columns of y.\n\n Note that for multioutput (including multilabel) weights should be\n defined for each class of every column in its own dict. For example,\n for four-class multilabel classification weights should be\n [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n [{1:1}, {2:5}, {3:1}, {4:1}].\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n The \"balanced_subsample\" mode is the same as \"balanced\" except that\n weights are computed based on the bootstrap sample for every tree\n grown.\n\n For multi-output, the weights of each column of y will be multiplied.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\n ccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\n max_samples : int or float, default=None\n If bootstrap is True, the number of samples to draw from X\n to train each base estimator.\n\n - If None (default), then draw `X.shape[0]` samples.\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n `max_samples` should be in the interval `(0, 1)`.\n\n .. versionadded:: 0.22\n\n Attributes\n ----------\n base_estimator_ : ExtraTreesClassifier\n The child estimator template used to create the collection of fitted\n sub-estimators.\n\n estimators_ : list of DecisionTreeClassifier\n The collection of fitted sub-estimators.\n\n classes_ : ndarray of shape (n_classes,) or a list of such arrays\n The classes labels (single output problem), or a list of arrays of\n class labels (multi-output problem).\n\n n_classes_ : int or list\n The number of classes (single output problem), or a list containing the\n number of classes for each output (multi-output problem).\n\n feature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n n_features_ : int\n The number of features when ``fit`` is performed.\n\n n_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\n oob_score_ : float\n Score of the training dataset obtained using an out-of-bag estimate.\n This attribute exists only when ``oob_score`` is True.\n\n oob_decision_function_ : ndarray of shape (n_samples, n_classes)\n Decision function computed with out-of-bag estimate on the training\n set. If n_estimators is small it might be possible that a data point\n was never left out during the bootstrap. In this case,\n `oob_decision_function_` might contain NaN. This attribute exists\n only when ``oob_score`` is True.\n\n See Also\n --------\n sklearn.tree.ExtraTreeClassifier : Base classifier for this ensemble.\n RandomForestClassifier : Ensemble Classifier based on trees with optimal\n splits.\n\n Notes\n -----\n The default values for the parameters controlling the size of the trees\n (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n unpruned trees which can potentially be very large on some data sets. To\n reduce memory consumption, the complexity and size of the trees should be\n controlled by setting those parameter values.\n\n References\n ----------\n .. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized\n trees\", Machine Learning, 63(1), 3-42, 2006.\n\n Examples\n --------\n >>> from sklearn.ensemble import ExtraTreesClassifier\n >>> from sklearn.datasets import make_classification\n >>> X, y = make_classification(n_features=4, random_state=0)\n >>> clf = ExtraTreesClassifier(n_estimators=100, random_state=0)\n >>> clf.fit(X, y)\n ExtraTreesClassifier(random_state=0)\n >>> clf.predict([[0, 0, 0, 0]])\n array([1])\n \"\"\"\n @_deprecate_positional_args\n def __init__(self,\n n_estimators=100, *,\n criterion=\"gini\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.,\n min_impurity_split=None,\n bootstrap=False,\n oob_score=False,\n n_jobs=None,\n random_state=None,\n verbose=0,\n warm_start=False,\n class_weight=None,\n ccp_alpha=0.0,\n max_samples=None):\n super().__init__(\n base_estimator=ExtraTreeClassifier(),\n n_estimators=n_estimators,\n estimator_params=(\"criterion\", \"max_depth\", \"min_samples_split\",\n \"min_samples_leaf\", \"min_weight_fraction_leaf\",\n \"max_features\", \"max_leaf_nodes\",\n \"min_impurity_decrease\", \"min_impurity_split\",\n \"random_state\", \"ccp_alpha\"),\n bootstrap=bootstrap,\n oob_score=oob_score,\n n_jobs=n_jobs,\n random_state=random_state,\n verbose=verbose,\n warm_start=warm_start,\n class_weight=class_weight,\n max_samples=max_samples)\n\n self.criterion = criterion\n self.max_depth = max_depth\n self.min_samples_split = min_samples_split\n self.min_samples_leaf = min_samples_leaf\n self.min_weight_fraction_leaf = min_weight_fraction_leaf\n self.max_features = max_features\n self.max_leaf_nodes = max_leaf_nodes\n self.min_impurity_decrease = min_impurity_decrease\n self.min_impurity_split = min_impurity_split\n self.ccp_alpha = ccp_alpha", + "instance_attributes": [ + { + "name": "criterion", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "min_samples_split", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "min_samples_leaf", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "min_weight_fraction_leaf", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "max_features", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "min_impurity_decrease", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "ccp_alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesRegressor", + "name": "ExtraTreesRegressor", + "qname": "sklearn.ensemble._forest.ExtraTreesRegressor", + "decorators": [], + "superclasses": ["ForestRegressor"], + "methods": ["scikit-learn/sklearn.ensemble._forest/ExtraTreesRegressor/__init__"], + "is_public": false, + "reexported_by": [], + "description": "An extra-trees regressor.\n\nThis class implements a meta estimator that fits a number of\nrandomized decision trees (a.k.a. extra-trees) on various sub-samples\nof the dataset and uses averaging to improve the predictive accuracy\nand control over-fitting.\n\nRead more in the :ref:`User Guide `.", + "docstring": "An extra-trees regressor.\n\nThis class implements a meta estimator that fits a number of\nrandomized decision trees (a.k.a. extra-trees) on various sub-samples\nof the dataset and uses averaging to improve the predictive accuracy\nand control over-fitting.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_estimators : int, default=100\n The number of trees in the forest.\n\n .. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22.\n\ncriterion : {\"mse\", \"mae\"}, default=\"mse\"\n The function to measure the quality of a split. Supported criteria\n are \"mse\" for the mean squared error, which is equal to variance\n reduction as feature selection criterion, and \"mae\" for the mean\n absolute error.\n\n .. versionadded:: 0.18\n Mean Absolute Error (MAE) criterion.\n\nmax_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_features : {\"auto\", \"sqrt\", \"log2\"}, int or float, default=\"auto\"\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `round(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=n_features`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nmax_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nbootstrap : bool, default=False\n Whether bootstrap samples are used when building trees. If False, the\n whole dataset is used to build each tree.\n\noob_score : bool, default=False\n Whether to use out-of-bag samples to estimate the generalization score.\n Only available if bootstrap=True.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n :meth:`decision_path` and :meth:`apply` are all parallelized over the\n trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n context. ``-1`` means using all processors. See :term:`Glossary\n ` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls 3 sources of randomness:\n\n - the bootstrapping of the samples used when building trees\n (if ``bootstrap=True``)\n - the sampling of the features to consider when looking for the best\n split at each node (if ``max_features < n_features``)\n - the draw of the splits for each of the `max_features`\n\n See :term:`Glossary ` for details.\n\nverbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit a whole\n new forest. See :term:`the Glossary `.\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nmax_samples : int or float, default=None\n If bootstrap is True, the number of samples to draw from X\n to train each base estimator.\n\n - If None (default), then draw `X.shape[0]` samples.\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n `max_samples` should be in the interval `(0, 1)`.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nbase_estimator_ : ExtraTreeRegressor\n The child estimator template used to create the collection of fitted\n sub-estimators.\n\nestimators_ : list of DecisionTreeRegressor\n The collection of fitted sub-estimators.\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_features_ : int\n The number of features.\n\nn_outputs_ : int\n The number of outputs.\n\noob_score_ : float\n Score of the training dataset obtained using an out-of-bag estimate.\n This attribute exists only when ``oob_score`` is True.\n\noob_prediction_ : ndarray of shape (n_samples,)\n Prediction computed with out-of-bag estimate on the training set.\n This attribute exists only when ``oob_score`` is True.\n\nSee Also\n--------\nsklearn.tree.ExtraTreeRegressor : Base estimator for this ensemble.\nRandomForestRegressor : Ensemble regressor using trees with optimal splits.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.ensemble import ExtraTreesRegressor\n>>> X, y = load_diabetes(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, random_state=0)\n>>> reg = ExtraTreesRegressor(n_estimators=100, random_state=0).fit(\n... X_train, y_train)\n>>> reg.score(X_test, y_test)\n0.2708...", + "code": "class ExtraTreesRegressor(ForestRegressor):\n \"\"\"\n An extra-trees regressor.\n\n This class implements a meta estimator that fits a number of\n randomized decision trees (a.k.a. extra-trees) on various sub-samples\n of the dataset and uses averaging to improve the predictive accuracy\n and control over-fitting.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_estimators : int, default=100\n The number of trees in the forest.\n\n .. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22.\n\n criterion : {\"mse\", \"mae\"}, default=\"mse\"\n The function to measure the quality of a split. Supported criteria\n are \"mse\" for the mean squared error, which is equal to variance\n reduction as feature selection criterion, and \"mae\" for the mean\n absolute error.\n\n .. versionadded:: 0.18\n Mean Absolute Error (MAE) criterion.\n\n max_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\n min_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\n min_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\n min_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\n max_features : {\"auto\", \"sqrt\", \"log2\"}, int or float, default=\"auto\"\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `round(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=n_features`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\n max_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\n min_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\n min_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\n bootstrap : bool, default=False\n Whether bootstrap samples are used when building trees. If False, the\n whole dataset is used to build each tree.\n\n oob_score : bool, default=False\n Whether to use out-of-bag samples to estimate the generalization score.\n Only available if bootstrap=True.\n\n n_jobs : int, default=None\n The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n :meth:`decision_path` and :meth:`apply` are all parallelized over the\n trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n context. ``-1`` means using all processors. See :term:`Glossary\n ` for more details.\n\n random_state : int, RandomState instance or None, default=None\n Controls 3 sources of randomness:\n\n - the bootstrapping of the samples used when building trees\n (if ``bootstrap=True``)\n - the sampling of the features to consider when looking for the best\n split at each node (if ``max_features < n_features``)\n - the draw of the splits for each of the `max_features`\n\n See :term:`Glossary ` for details.\n\n verbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\n warm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit a whole\n new forest. See :term:`the Glossary `.\n\n ccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\n max_samples : int or float, default=None\n If bootstrap is True, the number of samples to draw from X\n to train each base estimator.\n\n - If None (default), then draw `X.shape[0]` samples.\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n `max_samples` should be in the interval `(0, 1)`.\n\n .. versionadded:: 0.22\n\n Attributes\n ----------\n base_estimator_ : ExtraTreeRegressor\n The child estimator template used to create the collection of fitted\n sub-estimators.\n\n estimators_ : list of DecisionTreeRegressor\n The collection of fitted sub-estimators.\n\n feature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n n_features_ : int\n The number of features.\n\n n_outputs_ : int\n The number of outputs.\n\n oob_score_ : float\n Score of the training dataset obtained using an out-of-bag estimate.\n This attribute exists only when ``oob_score`` is True.\n\n oob_prediction_ : ndarray of shape (n_samples,)\n Prediction computed with out-of-bag estimate on the training set.\n This attribute exists only when ``oob_score`` is True.\n\n See Also\n --------\n sklearn.tree.ExtraTreeRegressor : Base estimator for this ensemble.\n RandomForestRegressor : Ensemble regressor using trees with optimal splits.\n\n Notes\n -----\n The default values for the parameters controlling the size of the trees\n (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n unpruned trees which can potentially be very large on some data sets. To\n reduce memory consumption, the complexity and size of the trees should be\n controlled by setting those parameter values.\n\n References\n ----------\n .. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n Machine Learning, 63(1), 3-42, 2006.\n\n Examples\n --------\n >>> from sklearn.datasets import load_diabetes\n >>> from sklearn.model_selection import train_test_split\n >>> from sklearn.ensemble import ExtraTreesRegressor\n >>> X, y = load_diabetes(return_X_y=True)\n >>> X_train, X_test, y_train, y_test = train_test_split(\n ... X, y, random_state=0)\n >>> reg = ExtraTreesRegressor(n_estimators=100, random_state=0).fit(\n ... X_train, y_train)\n >>> reg.score(X_test, y_test)\n 0.2708...\n \"\"\"\n @_deprecate_positional_args\n def __init__(self,\n n_estimators=100, *,\n criterion=\"mse\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.,\n min_impurity_split=None,\n bootstrap=False,\n oob_score=False,\n n_jobs=None,\n random_state=None,\n verbose=0,\n warm_start=False,\n ccp_alpha=0.0,\n max_samples=None):\n super().__init__(\n base_estimator=ExtraTreeRegressor(),\n n_estimators=n_estimators,\n estimator_params=(\"criterion\", \"max_depth\", \"min_samples_split\",\n \"min_samples_leaf\", \"min_weight_fraction_leaf\",\n \"max_features\", \"max_leaf_nodes\",\n \"min_impurity_decrease\", \"min_impurity_split\",\n \"random_state\", \"ccp_alpha\"),\n bootstrap=bootstrap,\n oob_score=oob_score,\n n_jobs=n_jobs,\n random_state=random_state,\n verbose=verbose,\n warm_start=warm_start,\n max_samples=max_samples)\n\n self.criterion = criterion\n self.max_depth = max_depth\n self.min_samples_split = min_samples_split\n self.min_samples_leaf = min_samples_leaf\n self.min_weight_fraction_leaf = min_weight_fraction_leaf\n self.max_features = max_features\n self.max_leaf_nodes = max_leaf_nodes\n self.min_impurity_decrease = min_impurity_decrease\n self.min_impurity_split = min_impurity_split\n self.ccp_alpha = ccp_alpha", + "instance_attributes": [ + { + "name": "criterion", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "min_samples_split", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "min_samples_leaf", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "min_weight_fraction_leaf", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "max_features", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "min_impurity_decrease", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "ccp_alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier", + "name": "ForestClassifier", + "qname": "sklearn.ensemble._forest.ForestClassifier", + "decorators": [], + "superclasses": ["ClassifierMixin", "BaseForest"], + "methods": [ + "scikit-learn/sklearn.ensemble._forest/ForestClassifier/__init__", + "scikit-learn/sklearn.ensemble._forest/ForestClassifier/_set_oob_score", + "scikit-learn/sklearn.ensemble._forest/ForestClassifier/_validate_y_class_weight", + "scikit-learn/sklearn.ensemble._forest/ForestClassifier/predict", + "scikit-learn/sklearn.ensemble._forest/ForestClassifier/predict_proba", + "scikit-learn/sklearn.ensemble._forest/ForestClassifier/predict_log_proba" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for forest of trees-based classifiers.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.", + "docstring": "Base class for forest of trees-based classifiers.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.", + "code": "class ForestClassifier(ClassifierMixin, BaseForest, metaclass=ABCMeta):\n \"\"\"\n Base class for forest of trees-based classifiers.\n\n Warning: This class should not be used directly. Use derived classes\n instead.\n \"\"\"\n\n @abstractmethod\n def __init__(self,\n base_estimator,\n n_estimators=100, *,\n estimator_params=tuple(),\n bootstrap=False,\n oob_score=False,\n n_jobs=None,\n random_state=None,\n verbose=0,\n warm_start=False,\n class_weight=None,\n max_samples=None):\n super().__init__(\n base_estimator,\n n_estimators=n_estimators,\n estimator_params=estimator_params,\n bootstrap=bootstrap,\n oob_score=oob_score,\n n_jobs=n_jobs,\n random_state=random_state,\n verbose=verbose,\n warm_start=warm_start,\n class_weight=class_weight,\n max_samples=max_samples)\n\n def _set_oob_score(self, X, y):\n \"\"\"\n Compute out-of-bag score.\"\"\"\n X = check_array(X, dtype=DTYPE, accept_sparse='csr')\n\n n_classes_ = self.n_classes_\n n_samples = y.shape[0]\n\n oob_decision_function = []\n oob_score = 0.0\n predictions = [np.zeros((n_samples, n_classes_[k]))\n for k in range(self.n_outputs_)]\n\n n_samples_bootstrap = _get_n_samples_bootstrap(\n n_samples, self.max_samples\n )\n\n for estimator in self.estimators_:\n unsampled_indices = _generate_unsampled_indices(\n estimator.random_state, n_samples, n_samples_bootstrap)\n p_estimator = estimator.predict_proba(X[unsampled_indices, :],\n check_input=False)\n\n if self.n_outputs_ == 1:\n p_estimator = [p_estimator]\n\n for k in range(self.n_outputs_):\n predictions[k][unsampled_indices, :] += p_estimator[k]\n\n for k in range(self.n_outputs_):\n if (predictions[k].sum(axis=1) == 0).any():\n warn(\"Some inputs do not have OOB scores. \"\n \"This probably means too few trees were used \"\n \"to compute any reliable oob estimates.\")\n\n decision = (predictions[k] /\n predictions[k].sum(axis=1)[:, np.newaxis])\n oob_decision_function.append(decision)\n oob_score += np.mean(y[:, k] ==\n np.argmax(predictions[k], axis=1), axis=0)\n\n if self.n_outputs_ == 1:\n self.oob_decision_function_ = oob_decision_function[0]\n else:\n self.oob_decision_function_ = oob_decision_function\n\n self.oob_score_ = oob_score / self.n_outputs_\n\n def _validate_y_class_weight(self, y):\n check_classification_targets(y)\n\n y = np.copy(y)\n expanded_class_weight = None\n\n if self.class_weight is not None:\n y_original = np.copy(y)\n\n self.classes_ = []\n self.n_classes_ = []\n\n y_store_unique_indices = np.zeros(y.shape, dtype=int)\n for k in range(self.n_outputs_):\n classes_k, y_store_unique_indices[:, k] = \\\n np.unique(y[:, k], return_inverse=True)\n self.classes_.append(classes_k)\n self.n_classes_.append(classes_k.shape[0])\n y = y_store_unique_indices\n\n if self.class_weight is not None:\n valid_presets = ('balanced', 'balanced_subsample')\n if isinstance(self.class_weight, str):\n if self.class_weight not in valid_presets:\n raise ValueError('Valid presets for class_weight include '\n '\"balanced\" and \"balanced_subsample\".'\n 'Given \"%s\".'\n % self.class_weight)\n if self.warm_start:\n warn('class_weight presets \"balanced\" or '\n '\"balanced_subsample\" are '\n 'not recommended for warm_start if the fitted data '\n 'differs from the full dataset. In order to use '\n '\"balanced\" weights, use compute_class_weight '\n '(\"balanced\", classes, y). In place of y you can use '\n 'a large enough sample of the full training set '\n 'target to properly estimate the class frequency '\n 'distributions. Pass the resulting weights as the '\n 'class_weight parameter.')\n\n if (self.class_weight != 'balanced_subsample' or\n not self.bootstrap):\n if self.class_weight == \"balanced_subsample\":\n class_weight = \"balanced\"\n else:\n class_weight = self.class_weight\n expanded_class_weight = compute_sample_weight(class_weight,\n y_original)\n\n return y, expanded_class_weight\n\n def predict(self, X):\n \"\"\"\n Predict class for X.\n\n The predicted class of an input sample is a vote by the trees in\n the forest, weighted by their probability estimates. That is,\n the predicted class is the one with highest mean probability\n estimate across the trees.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\n Returns\n -------\n y : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n The predicted classes.\n \"\"\"\n proba = self.predict_proba(X)\n\n if self.n_outputs_ == 1:\n return self.classes_.take(np.argmax(proba, axis=1), axis=0)\n\n else:\n n_samples = proba[0].shape[0]\n # all dtypes should be the same, so just take the first\n class_type = self.classes_[0].dtype\n predictions = np.empty((n_samples, self.n_outputs_),\n dtype=class_type)\n\n for k in range(self.n_outputs_):\n predictions[:, k] = self.classes_[k].take(np.argmax(proba[k],\n axis=1),\n axis=0)\n\n return predictions\n\n def predict_proba(self, X):\n \"\"\"\n Predict class probabilities for X.\n\n The predicted class probabilities of an input sample are computed as\n the mean predicted class probabilities of the trees in the forest.\n The class probability of a single tree is the fraction of samples of\n the same class in a leaf.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\n Returns\n -------\n p : ndarray of shape (n_samples, n_classes), or a list of n_outputs\n such arrays if n_outputs > 1.\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n \"\"\"\n check_is_fitted(self)\n # Check data\n X = self._validate_X_predict(X)\n\n # Assign chunk of trees to jobs\n n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)\n\n # avoid storing the output of every estimator by summing them here\n all_proba = [np.zeros((X.shape[0], j), dtype=np.float64)\n for j in np.atleast_1d(self.n_classes_)]\n lock = threading.Lock()\n Parallel(n_jobs=n_jobs, verbose=self.verbose,\n **_joblib_parallel_args(require=\"sharedmem\"))(\n delayed(_accumulate_prediction)(e.predict_proba, X, all_proba,\n lock)\n for e in self.estimators_)\n\n for proba in all_proba:\n proba /= len(self.estimators_)\n\n if len(all_proba) == 1:\n return all_proba[0]\n else:\n return all_proba\n\n def predict_log_proba(self, X):\n \"\"\"\n Predict class log-probabilities for X.\n\n The predicted class log-probabilities of an input sample is computed as\n the log of the mean predicted class probabilities of the trees in the\n forest.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\n Returns\n -------\n p : ndarray of shape (n_samples, n_classes), or a list of n_outputs\n such arrays if n_outputs > 1.\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n \"\"\"\n proba = self.predict_proba(X)\n\n if self.n_outputs_ == 1:\n return np.log(proba)\n\n else:\n for k in range(self.n_outputs_):\n proba[k] = np.log(proba[k])\n\n return proba", + "instance_attributes": [ + { + "name": "classes_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "n_classes_", + "types": { + "kind": "NamedType", + "name": "list" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestRegressor", + "name": "ForestRegressor", + "qname": "sklearn.ensemble._forest.ForestRegressor", + "decorators": [], + "superclasses": ["RegressorMixin", "BaseForest"], + "methods": [ + "scikit-learn/sklearn.ensemble._forest/ForestRegressor/__init__", + "scikit-learn/sklearn.ensemble._forest/ForestRegressor/predict", + "scikit-learn/sklearn.ensemble._forest/ForestRegressor/_set_oob_score", + "scikit-learn/sklearn.ensemble._forest/ForestRegressor/_compute_partial_dependence_recursion" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for forest of trees-based regressors.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.", + "docstring": "Base class for forest of trees-based regressors.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.", + "code": "class ForestRegressor(RegressorMixin, BaseForest, metaclass=ABCMeta):\n \"\"\"\n Base class for forest of trees-based regressors.\n\n Warning: This class should not be used directly. Use derived classes\n instead.\n \"\"\"\n\n @abstractmethod\n def __init__(self,\n base_estimator,\n n_estimators=100, *,\n estimator_params=tuple(),\n bootstrap=False,\n oob_score=False,\n n_jobs=None,\n random_state=None,\n verbose=0,\n warm_start=False,\n max_samples=None):\n super().__init__(\n base_estimator,\n n_estimators=n_estimators,\n estimator_params=estimator_params,\n bootstrap=bootstrap,\n oob_score=oob_score,\n n_jobs=n_jobs,\n random_state=random_state,\n verbose=verbose,\n warm_start=warm_start,\n max_samples=max_samples)\n\n def predict(self, X):\n \"\"\"\n Predict regression target for X.\n\n The predicted regression target of an input sample is computed as the\n mean predicted regression targets of the trees in the forest.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\n Returns\n -------\n y : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n The predicted values.\n \"\"\"\n check_is_fitted(self)\n # Check data\n X = self._validate_X_predict(X)\n\n # Assign chunk of trees to jobs\n n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)\n\n # avoid storing the output of every estimator by summing them here\n if self.n_outputs_ > 1:\n y_hat = np.zeros((X.shape[0], self.n_outputs_), dtype=np.float64)\n else:\n y_hat = np.zeros((X.shape[0]), dtype=np.float64)\n\n # Parallel loop\n lock = threading.Lock()\n Parallel(n_jobs=n_jobs, verbose=self.verbose,\n **_joblib_parallel_args(require=\"sharedmem\"))(\n delayed(_accumulate_prediction)(e.predict, X, [y_hat], lock)\n for e in self.estimators_)\n\n y_hat /= len(self.estimators_)\n\n return y_hat\n\n def _set_oob_score(self, X, y):\n \"\"\"\n Compute out-of-bag scores.\"\"\"\n X = check_array(X, dtype=DTYPE, accept_sparse='csr')\n\n n_samples = y.shape[0]\n\n predictions = np.zeros((n_samples, self.n_outputs_))\n n_predictions = np.zeros((n_samples, self.n_outputs_))\n\n n_samples_bootstrap = _get_n_samples_bootstrap(\n n_samples, self.max_samples\n )\n\n for estimator in self.estimators_:\n unsampled_indices = _generate_unsampled_indices(\n estimator.random_state, n_samples, n_samples_bootstrap)\n p_estimator = estimator.predict(\n X[unsampled_indices, :], check_input=False)\n\n if self.n_outputs_ == 1:\n p_estimator = p_estimator[:, np.newaxis]\n\n predictions[unsampled_indices, :] += p_estimator\n n_predictions[unsampled_indices, :] += 1\n\n if (n_predictions == 0).any():\n warn(\"Some inputs do not have OOB scores. \"\n \"This probably means too few trees were used \"\n \"to compute any reliable oob estimates.\")\n n_predictions[n_predictions == 0] = 1\n\n predictions /= n_predictions\n self.oob_prediction_ = predictions\n\n if self.n_outputs_ == 1:\n self.oob_prediction_ = \\\n self.oob_prediction_.reshape((n_samples, ))\n\n self.oob_score_ = 0.0\n\n for k in range(self.n_outputs_):\n self.oob_score_ += r2_score(y[:, k],\n predictions[:, k])\n\n self.oob_score_ /= self.n_outputs_\n\n def _compute_partial_dependence_recursion(self, grid, target_features):\n \"\"\"Fast partial dependence computation.\n\n Parameters\n ----------\n grid : ndarray of shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\n target_features : ndarray of shape (n_target_features)\n The set of target features for which the partial dependence\n should be evaluated.\n\n Returns\n -------\n averaged_predictions : ndarray of shape (n_samples,)\n The value of the partial dependence function on each grid point.\n \"\"\"\n grid = np.asarray(grid, dtype=DTYPE, order='C')\n averaged_predictions = np.zeros(shape=grid.shape[0],\n dtype=np.float64, order='C')\n\n for tree in self.estimators_:\n # Note: we don't sum in parallel because the GIL isn't released in\n # the fast method.\n tree.tree_.compute_partial_dependence(\n grid, target_features, averaged_predictions)\n # Average over the forest\n averaged_predictions /= len(self.estimators_)\n\n return averaged_predictions", + "instance_attributes": [ + { + "name": "oob_prediction_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "oob_score_", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestClassifier", + "name": "RandomForestClassifier", + "qname": "sklearn.ensemble._forest.RandomForestClassifier", + "decorators": [], + "superclasses": ["ForestClassifier"], + "methods": ["scikit-learn/sklearn.ensemble._forest/RandomForestClassifier/__init__"], + "is_public": false, + "reexported_by": [], + "description": "A random forest classifier.\n\nA random forest is a meta estimator that fits a number of decision tree\nclassifiers on various sub-samples of the dataset and uses averaging to\nimprove the predictive accuracy and control over-fitting.\nThe sub-sample size is controlled with the `max_samples` parameter if\n`bootstrap=True` (default), otherwise the whole dataset is used to build\neach tree.\n\nRead more in the :ref:`User Guide `.", + "docstring": "A random forest classifier.\n\nA random forest is a meta estimator that fits a number of decision tree\nclassifiers on various sub-samples of the dataset and uses averaging to\nimprove the predictive accuracy and control over-fitting.\nThe sub-sample size is controlled with the `max_samples` parameter if\n`bootstrap=True` (default), otherwise the whole dataset is used to build\neach tree.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_estimators : int, default=100\n The number of trees in the forest.\n\n .. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22.\n\ncriterion : {\"gini\", \"entropy\"}, default=\"gini\"\n The function to measure the quality of a split. Supported criteria are\n \"gini\" for the Gini impurity and \"entropy\" for the information gain.\n Note: this parameter is tree-specific.\n\nmax_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_features : {\"auto\", \"sqrt\", \"log2\"}, int or float, default=\"auto\"\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `round(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=sqrt(n_features)`.\n - If \"sqrt\", then `max_features=sqrt(n_features)` (same as \"auto\").\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nmax_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nbootstrap : bool, default=True\n Whether bootstrap samples are used when building trees. If False, the\n whole dataset is used to build each tree.\n\noob_score : bool, default=False\n Whether to use out-of-bag samples to estimate the generalization score.\n Only available if bootstrap=True.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n :meth:`decision_path` and :meth:`apply` are all parallelized over the\n trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n context. ``-1`` means using all processors. See :term:`Glossary\n ` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls both the randomness of the bootstrapping of the samples used\n when building trees (if ``bootstrap=True``) and the sampling of the\n features to consider when looking for the best split at each node\n (if ``max_features < n_features``).\n See :term:`Glossary ` for details.\n\nverbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit a whole\n new forest. See :term:`the Glossary `.\n\nclass_weight : {\"balanced\", \"balanced_subsample\"}, dict or list of dicts, default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one. For\n multi-output problems, a list of dicts can be provided in the same\n order as the columns of y.\n\n Note that for multioutput (including multilabel) weights should be\n defined for each class of every column in its own dict. For example,\n for four-class multilabel classification weights should be\n [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n [{1:1}, {2:5}, {3:1}, {4:1}].\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n The \"balanced_subsample\" mode is the same as \"balanced\" except that\n weights are computed based on the bootstrap sample for every tree\n grown.\n\n For multi-output, the weights of each column of y will be multiplied.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nmax_samples : int or float, default=None\n If bootstrap is True, the number of samples to draw from X\n to train each base estimator.\n\n - If None (default), then draw `X.shape[0]` samples.\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n `max_samples` should be in the interval `(0, 1)`.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nbase_estimator_ : DecisionTreeClassifier\n The child estimator template used to create the collection of fitted\n sub-estimators.\n\nestimators_ : list of DecisionTreeClassifier\n The collection of fitted sub-estimators.\n\nclasses_ : ndarray of shape (n_classes,) or a list of such arrays\n The classes labels (single output problem), or a list of arrays of\n class labels (multi-output problem).\n\nn_classes_ : int or list\n The number of classes (single output problem), or a list containing the\n number of classes for each output (multi-output problem).\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nn_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\noob_score_ : float\n Score of the training dataset obtained using an out-of-bag estimate.\n This attribute exists only when ``oob_score`` is True.\n\noob_decision_function_ : ndarray of shape (n_samples, n_classes)\n Decision function computed with out-of-bag estimate on the training\n set. If n_estimators is small it might be possible that a data point\n was never left out during the bootstrap. In this case,\n `oob_decision_function_` might contain NaN. This attribute exists\n only when ``oob_score`` is True.\n\nSee Also\n--------\nDecisionTreeClassifier, ExtraTreesClassifier\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nThe features are always randomly permuted at each split. Therefore,\nthe best found split may vary, even with the same training data,\n``max_features=n_features`` and ``bootstrap=False``, if the improvement\nof the criterion is identical for several splits enumerated during the\nsearch of the best split. To obtain a deterministic behaviour during\nfitting, ``random_state`` has to be fixed.\n\nReferences\n----------\n.. [1] L. Breiman, \"Random Forests\", Machine Learning, 45(1), 5-32, 2001.\n\nExamples\n--------\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_samples=1000, n_features=4,\n... n_informative=2, n_redundant=0,\n... random_state=0, shuffle=False)\n>>> clf = RandomForestClassifier(max_depth=2, random_state=0)\n>>> clf.fit(X, y)\nRandomForestClassifier(...)\n>>> print(clf.predict([[0, 0, 0, 0]]))\n[1]", + "code": "class RandomForestClassifier(ForestClassifier):\n \"\"\"\n A random forest classifier.\n\n A random forest is a meta estimator that fits a number of decision tree\n classifiers on various sub-samples of the dataset and uses averaging to\n improve the predictive accuracy and control over-fitting.\n The sub-sample size is controlled with the `max_samples` parameter if\n `bootstrap=True` (default), otherwise the whole dataset is used to build\n each tree.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_estimators : int, default=100\n The number of trees in the forest.\n\n .. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22.\n\n criterion : {\"gini\", \"entropy\"}, default=\"gini\"\n The function to measure the quality of a split. Supported criteria are\n \"gini\" for the Gini impurity and \"entropy\" for the information gain.\n Note: this parameter is tree-specific.\n\n max_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\n min_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\n min_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\n min_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\n max_features : {\"auto\", \"sqrt\", \"log2\"}, int or float, default=\"auto\"\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `round(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=sqrt(n_features)`.\n - If \"sqrt\", then `max_features=sqrt(n_features)` (same as \"auto\").\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\n max_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\n min_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\n min_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\n bootstrap : bool, default=True\n Whether bootstrap samples are used when building trees. If False, the\n whole dataset is used to build each tree.\n\n oob_score : bool, default=False\n Whether to use out-of-bag samples to estimate the generalization score.\n Only available if bootstrap=True.\n\n n_jobs : int, default=None\n The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n :meth:`decision_path` and :meth:`apply` are all parallelized over the\n trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n context. ``-1`` means using all processors. See :term:`Glossary\n ` for more details.\n\n random_state : int, RandomState instance or None, default=None\n Controls both the randomness of the bootstrapping of the samples used\n when building trees (if ``bootstrap=True``) and the sampling of the\n features to consider when looking for the best split at each node\n (if ``max_features < n_features``).\n See :term:`Glossary ` for details.\n\n verbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\n warm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit a whole\n new forest. See :term:`the Glossary `.\n\n class_weight : {\"balanced\", \"balanced_subsample\"}, dict or list of dicts, \\\n default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one. For\n multi-output problems, a list of dicts can be provided in the same\n order as the columns of y.\n\n Note that for multioutput (including multilabel) weights should be\n defined for each class of every column in its own dict. For example,\n for four-class multilabel classification weights should be\n [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n [{1:1}, {2:5}, {3:1}, {4:1}].\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n The \"balanced_subsample\" mode is the same as \"balanced\" except that\n weights are computed based on the bootstrap sample for every tree\n grown.\n\n For multi-output, the weights of each column of y will be multiplied.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\n ccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\n max_samples : int or float, default=None\n If bootstrap is True, the number of samples to draw from X\n to train each base estimator.\n\n - If None (default), then draw `X.shape[0]` samples.\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n `max_samples` should be in the interval `(0, 1)`.\n\n .. versionadded:: 0.22\n\n Attributes\n ----------\n base_estimator_ : DecisionTreeClassifier\n The child estimator template used to create the collection of fitted\n sub-estimators.\n\n estimators_ : list of DecisionTreeClassifier\n The collection of fitted sub-estimators.\n\n classes_ : ndarray of shape (n_classes,) or a list of such arrays\n The classes labels (single output problem), or a list of arrays of\n class labels (multi-output problem).\n\n n_classes_ : int or list\n The number of classes (single output problem), or a list containing the\n number of classes for each output (multi-output problem).\n\n n_features_ : int\n The number of features when ``fit`` is performed.\n\n n_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\n feature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n oob_score_ : float\n Score of the training dataset obtained using an out-of-bag estimate.\n This attribute exists only when ``oob_score`` is True.\n\n oob_decision_function_ : ndarray of shape (n_samples, n_classes)\n Decision function computed with out-of-bag estimate on the training\n set. If n_estimators is small it might be possible that a data point\n was never left out during the bootstrap. In this case,\n `oob_decision_function_` might contain NaN. This attribute exists\n only when ``oob_score`` is True.\n\n See Also\n --------\n DecisionTreeClassifier, ExtraTreesClassifier\n\n Notes\n -----\n The default values for the parameters controlling the size of the trees\n (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n unpruned trees which can potentially be very large on some data sets. To\n reduce memory consumption, the complexity and size of the trees should be\n controlled by setting those parameter values.\n\n The features are always randomly permuted at each split. Therefore,\n the best found split may vary, even with the same training data,\n ``max_features=n_features`` and ``bootstrap=False``, if the improvement\n of the criterion is identical for several splits enumerated during the\n search of the best split. To obtain a deterministic behaviour during\n fitting, ``random_state`` has to be fixed.\n\n References\n ----------\n .. [1] L. Breiman, \"Random Forests\", Machine Learning, 45(1), 5-32, 2001.\n\n Examples\n --------\n >>> from sklearn.ensemble import RandomForestClassifier\n >>> from sklearn.datasets import make_classification\n >>> X, y = make_classification(n_samples=1000, n_features=4,\n ... n_informative=2, n_redundant=0,\n ... random_state=0, shuffle=False)\n >>> clf = RandomForestClassifier(max_depth=2, random_state=0)\n >>> clf.fit(X, y)\n RandomForestClassifier(...)\n >>> print(clf.predict([[0, 0, 0, 0]]))\n [1]\n \"\"\"\n @_deprecate_positional_args\n def __init__(self,\n n_estimators=100, *,\n criterion=\"gini\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.,\n min_impurity_split=None,\n bootstrap=True,\n oob_score=False,\n n_jobs=None,\n random_state=None,\n verbose=0,\n warm_start=False,\n class_weight=None,\n ccp_alpha=0.0,\n max_samples=None):\n super().__init__(\n base_estimator=DecisionTreeClassifier(),\n n_estimators=n_estimators,\n estimator_params=(\"criterion\", \"max_depth\", \"min_samples_split\",\n \"min_samples_leaf\", \"min_weight_fraction_leaf\",\n \"max_features\", \"max_leaf_nodes\",\n \"min_impurity_decrease\", \"min_impurity_split\",\n \"random_state\", \"ccp_alpha\"),\n bootstrap=bootstrap,\n oob_score=oob_score,\n n_jobs=n_jobs,\n random_state=random_state,\n verbose=verbose,\n warm_start=warm_start,\n class_weight=class_weight,\n max_samples=max_samples)\n\n self.criterion = criterion\n self.max_depth = max_depth\n self.min_samples_split = min_samples_split\n self.min_samples_leaf = min_samples_leaf\n self.min_weight_fraction_leaf = min_weight_fraction_leaf\n self.max_features = max_features\n self.max_leaf_nodes = max_leaf_nodes\n self.min_impurity_decrease = min_impurity_decrease\n self.min_impurity_split = min_impurity_split\n self.ccp_alpha = ccp_alpha", + "instance_attributes": [ + { + "name": "criterion", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "min_samples_split", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "min_samples_leaf", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "min_weight_fraction_leaf", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "max_features", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "min_impurity_decrease", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "ccp_alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestRegressor", + "name": "RandomForestRegressor", + "qname": "sklearn.ensemble._forest.RandomForestRegressor", + "decorators": [], + "superclasses": ["ForestRegressor"], + "methods": ["scikit-learn/sklearn.ensemble._forest/RandomForestRegressor/__init__"], + "is_public": false, + "reexported_by": [], + "description": "A random forest regressor.\n\nA random forest is a meta estimator that fits a number of classifying\ndecision trees on various sub-samples of the dataset and uses averaging\nto improve the predictive accuracy and control over-fitting.\nThe sub-sample size is controlled with the `max_samples` parameter if\n`bootstrap=True` (default), otherwise the whole dataset is used to build\neach tree.\n\nRead more in the :ref:`User Guide `.", + "docstring": "A random forest regressor.\n\nA random forest is a meta estimator that fits a number of classifying\ndecision trees on various sub-samples of the dataset and uses averaging\nto improve the predictive accuracy and control over-fitting.\nThe sub-sample size is controlled with the `max_samples` parameter if\n`bootstrap=True` (default), otherwise the whole dataset is used to build\neach tree.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_estimators : int, default=100\n The number of trees in the forest.\n\n .. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22.\n\ncriterion : {\"mse\", \"mae\"}, default=\"mse\"\n The function to measure the quality of a split. Supported criteria\n are \"mse\" for the mean squared error, which is equal to variance\n reduction as feature selection criterion, and \"mae\" for the mean\n absolute error.\n\n .. versionadded:: 0.18\n Mean Absolute Error (MAE) criterion.\n\nmax_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_features : {\"auto\", \"sqrt\", \"log2\"}, int or float, default=\"auto\"\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `round(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=n_features`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nmax_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nbootstrap : bool, default=True\n Whether bootstrap samples are used when building trees. If False, the\n whole dataset is used to build each tree.\n\noob_score : bool, default=False\n Whether to use out-of-bag samples to estimate the generalization score.\n Only available if bootstrap=True.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n :meth:`decision_path` and :meth:`apply` are all parallelized over the\n trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n context. ``-1`` means using all processors. See :term:`Glossary\n ` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls both the randomness of the bootstrapping of the samples used\n when building trees (if ``bootstrap=True``) and the sampling of the\n features to consider when looking for the best split at each node\n (if ``max_features < n_features``).\n See :term:`Glossary ` for details.\n\nverbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit a whole\n new forest. See :term:`the Glossary `.\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nmax_samples : int or float, default=None\n If bootstrap is True, the number of samples to draw from X\n to train each base estimator.\n\n - If None (default), then draw `X.shape[0]` samples.\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n `max_samples` should be in the interval `(0, 1)`.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nbase_estimator_ : DecisionTreeRegressor\n The child estimator template used to create the collection of fitted\n sub-estimators.\n\nestimators_ : list of DecisionTreeRegressor\n The collection of fitted sub-estimators.\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nn_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\noob_score_ : float\n Score of the training dataset obtained using an out-of-bag estimate.\n This attribute exists only when ``oob_score`` is True.\n\noob_prediction_ : ndarray of shape (n_samples,)\n Prediction computed with out-of-bag estimate on the training set.\n This attribute exists only when ``oob_score`` is True.\n\nSee Also\n--------\nDecisionTreeRegressor, ExtraTreesRegressor\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nThe features are always randomly permuted at each split. Therefore,\nthe best found split may vary, even with the same training data,\n``max_features=n_features`` and ``bootstrap=False``, if the improvement\nof the criterion is identical for several splits enumerated during the\nsearch of the best split. To obtain a deterministic behaviour during\nfitting, ``random_state`` has to be fixed.\n\nThe default value ``max_features=\"auto\"`` uses ``n_features``\nrather than ``n_features / 3``. The latter was originally suggested in\n[1], whereas the former was more recently justified empirically in [2].\n\nReferences\n----------\n.. [1] L. Breiman, \"Random Forests\", Machine Learning, 45(1), 5-32, 2001.\n\n.. [2] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized\n trees\", Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.ensemble import RandomForestRegressor\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_features=4, n_informative=2,\n... random_state=0, shuffle=False)\n>>> regr = RandomForestRegressor(max_depth=2, random_state=0)\n>>> regr.fit(X, y)\nRandomForestRegressor(...)\n>>> print(regr.predict([[0, 0, 0, 0]]))\n[-8.32987858]", + "code": "class RandomForestRegressor(ForestRegressor):\n \"\"\"\n A random forest regressor.\n\n A random forest is a meta estimator that fits a number of classifying\n decision trees on various sub-samples of the dataset and uses averaging\n to improve the predictive accuracy and control over-fitting.\n The sub-sample size is controlled with the `max_samples` parameter if\n `bootstrap=True` (default), otherwise the whole dataset is used to build\n each tree.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_estimators : int, default=100\n The number of trees in the forest.\n\n .. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22.\n\n criterion : {\"mse\", \"mae\"}, default=\"mse\"\n The function to measure the quality of a split. Supported criteria\n are \"mse\" for the mean squared error, which is equal to variance\n reduction as feature selection criterion, and \"mae\" for the mean\n absolute error.\n\n .. versionadded:: 0.18\n Mean Absolute Error (MAE) criterion.\n\n max_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\n min_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\n min_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\n min_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\n max_features : {\"auto\", \"sqrt\", \"log2\"}, int or float, default=\"auto\"\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `round(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=n_features`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\n max_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\n min_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\n min_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\n bootstrap : bool, default=True\n Whether bootstrap samples are used when building trees. If False, the\n whole dataset is used to build each tree.\n\n oob_score : bool, default=False\n Whether to use out-of-bag samples to estimate the generalization score.\n Only available if bootstrap=True.\n\n n_jobs : int, default=None\n The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n :meth:`decision_path` and :meth:`apply` are all parallelized over the\n trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n context. ``-1`` means using all processors. See :term:`Glossary\n ` for more details.\n\n random_state : int, RandomState instance or None, default=None\n Controls both the randomness of the bootstrapping of the samples used\n when building trees (if ``bootstrap=True``) and the sampling of the\n features to consider when looking for the best split at each node\n (if ``max_features < n_features``).\n See :term:`Glossary ` for details.\n\n verbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\n warm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit a whole\n new forest. See :term:`the Glossary `.\n\n ccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\n max_samples : int or float, default=None\n If bootstrap is True, the number of samples to draw from X\n to train each base estimator.\n\n - If None (default), then draw `X.shape[0]` samples.\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n `max_samples` should be in the interval `(0, 1)`.\n\n .. versionadded:: 0.22\n\n Attributes\n ----------\n base_estimator_ : DecisionTreeRegressor\n The child estimator template used to create the collection of fitted\n sub-estimators.\n\n estimators_ : list of DecisionTreeRegressor\n The collection of fitted sub-estimators.\n\n feature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n n_features_ : int\n The number of features when ``fit`` is performed.\n\n n_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\n oob_score_ : float\n Score of the training dataset obtained using an out-of-bag estimate.\n This attribute exists only when ``oob_score`` is True.\n\n oob_prediction_ : ndarray of shape (n_samples,)\n Prediction computed with out-of-bag estimate on the training set.\n This attribute exists only when ``oob_score`` is True.\n\n See Also\n --------\n DecisionTreeRegressor, ExtraTreesRegressor\n\n Notes\n -----\n The default values for the parameters controlling the size of the trees\n (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n unpruned trees which can potentially be very large on some data sets. To\n reduce memory consumption, the complexity and size of the trees should be\n controlled by setting those parameter values.\n\n The features are always randomly permuted at each split. Therefore,\n the best found split may vary, even with the same training data,\n ``max_features=n_features`` and ``bootstrap=False``, if the improvement\n of the criterion is identical for several splits enumerated during the\n search of the best split. To obtain a deterministic behaviour during\n fitting, ``random_state`` has to be fixed.\n\n The default value ``max_features=\"auto\"`` uses ``n_features``\n rather than ``n_features / 3``. The latter was originally suggested in\n [1], whereas the former was more recently justified empirically in [2].\n\n References\n ----------\n .. [1] L. Breiman, \"Random Forests\", Machine Learning, 45(1), 5-32, 2001.\n\n .. [2] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized\n trees\", Machine Learning, 63(1), 3-42, 2006.\n\n Examples\n --------\n >>> from sklearn.ensemble import RandomForestRegressor\n >>> from sklearn.datasets import make_regression\n >>> X, y = make_regression(n_features=4, n_informative=2,\n ... random_state=0, shuffle=False)\n >>> regr = RandomForestRegressor(max_depth=2, random_state=0)\n >>> regr.fit(X, y)\n RandomForestRegressor(...)\n >>> print(regr.predict([[0, 0, 0, 0]]))\n [-8.32987858]\n \"\"\"\n @_deprecate_positional_args\n def __init__(self,\n n_estimators=100, *,\n criterion=\"mse\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.,\n min_impurity_split=None,\n bootstrap=True,\n oob_score=False,\n n_jobs=None,\n random_state=None,\n verbose=0,\n warm_start=False,\n ccp_alpha=0.0,\n max_samples=None):\n super().__init__(\n base_estimator=DecisionTreeRegressor(),\n n_estimators=n_estimators,\n estimator_params=(\"criterion\", \"max_depth\", \"min_samples_split\",\n \"min_samples_leaf\", \"min_weight_fraction_leaf\",\n \"max_features\", \"max_leaf_nodes\",\n \"min_impurity_decrease\", \"min_impurity_split\",\n \"random_state\", \"ccp_alpha\"),\n bootstrap=bootstrap,\n oob_score=oob_score,\n n_jobs=n_jobs,\n random_state=random_state,\n verbose=verbose,\n warm_start=warm_start,\n max_samples=max_samples)\n\n self.criterion = criterion\n self.max_depth = max_depth\n self.min_samples_split = min_samples_split\n self.min_samples_leaf = min_samples_leaf\n self.min_weight_fraction_leaf = min_weight_fraction_leaf\n self.max_features = max_features\n self.max_leaf_nodes = max_leaf_nodes\n self.min_impurity_decrease = min_impurity_decrease\n self.min_impurity_split = min_impurity_split\n self.ccp_alpha = ccp_alpha", + "instance_attributes": [ + { + "name": "criterion", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "min_samples_split", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "min_samples_leaf", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "min_weight_fraction_leaf", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "max_features", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "min_impurity_decrease", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "ccp_alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding", + "name": "RandomTreesEmbedding", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding", + "decorators": [], + "superclasses": ["BaseForest"], + "methods": [ + "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/__init__", + "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/_set_oob_score", + "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/fit", + "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/fit_transform", + "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/transform" + ], + "is_public": false, + "reexported_by": [], + "description": "An ensemble of totally random trees.\n\nAn unsupervised transformation of a dataset to a high-dimensional\nsparse representation. A datapoint is coded according to which leaf of\neach tree it is sorted into. Using a one-hot encoding of the leaves,\nthis leads to a binary coding with as many ones as there are trees in\nthe forest.\n\nThe dimensionality of the resulting representation is\n``n_out <= n_estimators * max_leaf_nodes``. If ``max_leaf_nodes == None``,\nthe number of leaf nodes is at most ``n_estimators * 2 ** max_depth``.\n\nRead more in the :ref:`User Guide `.", + "docstring": "An ensemble of totally random trees.\n\nAn unsupervised transformation of a dataset to a high-dimensional\nsparse representation. A datapoint is coded according to which leaf of\neach tree it is sorted into. Using a one-hot encoding of the leaves,\nthis leads to a binary coding with as many ones as there are trees in\nthe forest.\n\nThe dimensionality of the resulting representation is\n``n_out <= n_estimators * max_leaf_nodes``. If ``max_leaf_nodes == None``,\nthe number of leaf nodes is at most ``n_estimators * 2 ** max_depth``.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_estimators : int, default=100\n Number of trees in the forest.\n\n .. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22.\n\nmax_depth : int, default=5\n The maximum depth of each tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` is the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` is the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nsparse_output : bool, default=True\n Whether or not to return a sparse CSR matrix, as default behavior,\n or to return a dense array compatible with dense pipeline operators.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel. :meth:`fit`, :meth:`transform`,\n :meth:`decision_path` and :meth:`apply` are all parallelized over the\n trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n context. ``-1`` means using all processors. See :term:`Glossary\n ` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the generation of the random `y` used to fit the trees\n and the draw of the splits for each feature at the trees' nodes.\n See :term:`Glossary ` for details.\n\nverbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit a whole\n new forest. See :term:`the Glossary `.\n\nAttributes\n----------\nbase_estimator_ : DecisionTreeClassifier instance\n The child estimator template used to create the collection of fitted\n sub-estimators.\n\nestimators_ : list of DecisionTreeClassifier instances\n The collection of fitted sub-estimators.\n\nfeature_importances_ : ndarray of shape (n_features,)\n The feature importances (the higher, the more important the feature).\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nn_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\none_hot_encoder_ : OneHotEncoder instance\n One-hot encoder used to create the sparse embedding.\n\nReferences\n----------\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n Machine Learning, 63(1), 3-42, 2006.\n.. [2] Moosmann, F. and Triggs, B. and Jurie, F. \"Fast discriminative\n visual codebooks using randomized clustering forests\"\n NIPS 2007\n\nExamples\n--------\n>>> from sklearn.ensemble import RandomTreesEmbedding\n>>> X = [[0,0], [1,0], [0,1], [-1,0], [0,-1]]\n>>> random_trees = RandomTreesEmbedding(\n... n_estimators=5, random_state=0, max_depth=1).fit(X)\n>>> X_sparse_embedding = random_trees.transform(X)\n>>> X_sparse_embedding.toarray()\narray([[0., 1., 1., 0., 1., 0., 0., 1., 1., 0.],\n [0., 1., 1., 0., 1., 0., 0., 1., 1., 0.],\n [0., 1., 0., 1., 0., 1., 0., 1., 0., 1.],\n [1., 0., 1., 0., 1., 0., 1., 0., 1., 0.],\n [0., 1., 1., 0., 1., 0., 0., 1., 1., 0.]])", + "code": "class RandomTreesEmbedding(BaseForest):\n \"\"\"\n An ensemble of totally random trees.\n\n An unsupervised transformation of a dataset to a high-dimensional\n sparse representation. A datapoint is coded according to which leaf of\n each tree it is sorted into. Using a one-hot encoding of the leaves,\n this leads to a binary coding with as many ones as there are trees in\n the forest.\n\n The dimensionality of the resulting representation is\n ``n_out <= n_estimators * max_leaf_nodes``. If ``max_leaf_nodes == None``,\n the number of leaf nodes is at most ``n_estimators * 2 ** max_depth``.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_estimators : int, default=100\n Number of trees in the forest.\n\n .. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22.\n\n max_depth : int, default=5\n The maximum depth of each tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\n min_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` is the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\n min_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` is the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\n min_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\n max_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\n min_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\n min_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\n sparse_output : bool, default=True\n Whether or not to return a sparse CSR matrix, as default behavior,\n or to return a dense array compatible with dense pipeline operators.\n\n n_jobs : int, default=None\n The number of jobs to run in parallel. :meth:`fit`, :meth:`transform`,\n :meth:`decision_path` and :meth:`apply` are all parallelized over the\n trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n context. ``-1`` means using all processors. See :term:`Glossary\n ` for more details.\n\n random_state : int, RandomState instance or None, default=None\n Controls the generation of the random `y` used to fit the trees\n and the draw of the splits for each feature at the trees' nodes.\n See :term:`Glossary ` for details.\n\n verbose : int, default=0\n Controls the verbosity when fitting and predicting.\n\n warm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit a whole\n new forest. See :term:`the Glossary `.\n\n Attributes\n ----------\n base_estimator_ : DecisionTreeClassifier instance\n The child estimator template used to create the collection of fitted\n sub-estimators.\n\n estimators_ : list of DecisionTreeClassifier instances\n The collection of fitted sub-estimators.\n\n feature_importances_ : ndarray of shape (n_features,)\n The feature importances (the higher, the more important the feature).\n\n n_features_ : int\n The number of features when ``fit`` is performed.\n\n n_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\n one_hot_encoder_ : OneHotEncoder instance\n One-hot encoder used to create the sparse embedding.\n\n References\n ----------\n .. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n Machine Learning, 63(1), 3-42, 2006.\n .. [2] Moosmann, F. and Triggs, B. and Jurie, F. \"Fast discriminative\n visual codebooks using randomized clustering forests\"\n NIPS 2007\n\n Examples\n --------\n >>> from sklearn.ensemble import RandomTreesEmbedding\n >>> X = [[0,0], [1,0], [0,1], [-1,0], [0,-1]]\n >>> random_trees = RandomTreesEmbedding(\n ... n_estimators=5, random_state=0, max_depth=1).fit(X)\n >>> X_sparse_embedding = random_trees.transform(X)\n >>> X_sparse_embedding.toarray()\n array([[0., 1., 1., 0., 1., 0., 0., 1., 1., 0.],\n [0., 1., 1., 0., 1., 0., 0., 1., 1., 0.],\n [0., 1., 0., 1., 0., 1., 0., 1., 0., 1.],\n [1., 0., 1., 0., 1., 0., 1., 0., 1., 0.],\n [0., 1., 1., 0., 1., 0., 0., 1., 1., 0.]])\n \"\"\"\n\n criterion = 'mse'\n max_features = 1\n\n @_deprecate_positional_args\n def __init__(self,\n n_estimators=100, *,\n max_depth=5,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.,\n max_leaf_nodes=None,\n min_impurity_decrease=0.,\n min_impurity_split=None,\n sparse_output=True,\n n_jobs=None,\n random_state=None,\n verbose=0,\n warm_start=False):\n super().__init__(\n base_estimator=ExtraTreeRegressor(),\n n_estimators=n_estimators,\n estimator_params=(\"criterion\", \"max_depth\", \"min_samples_split\",\n \"min_samples_leaf\", \"min_weight_fraction_leaf\",\n \"max_features\", \"max_leaf_nodes\",\n \"min_impurity_decrease\", \"min_impurity_split\",\n \"random_state\"),\n bootstrap=False,\n oob_score=False,\n n_jobs=n_jobs,\n random_state=random_state,\n verbose=verbose,\n warm_start=warm_start,\n max_samples=None)\n\n self.max_depth = max_depth\n self.min_samples_split = min_samples_split\n self.min_samples_leaf = min_samples_leaf\n self.min_weight_fraction_leaf = min_weight_fraction_leaf\n self.max_leaf_nodes = max_leaf_nodes\n self.min_impurity_decrease = min_impurity_decrease\n self.min_impurity_split = min_impurity_split\n self.sparse_output = sparse_output\n\n def _set_oob_score(self, X, y):\n raise NotImplementedError(\"OOB score not supported by tree embedding\")\n\n def fit(self, X, y=None, sample_weight=None):\n \"\"\"\n Fit estimator.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Use ``dtype=np.float32`` for maximum\n efficiency. Sparse matrices are also supported, use sparse\n ``csc_matrix`` for maximum efficiency.\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. In the case of\n classification, splits are also ignored if they would result in any\n single class carrying a negative weight in either child node.\n\n Returns\n -------\n self : object\n\n \"\"\"\n self.fit_transform(X, y, sample_weight=sample_weight)\n return self\n\n def fit_transform(self, X, y=None, sample_weight=None):\n \"\"\"\n Fit estimator and transform dataset.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data used to build forests. Use ``dtype=np.float32`` for\n maximum efficiency.\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. In the case of\n classification, splits are also ignored if they would result in any\n single class carrying a negative weight in either child node.\n\n Returns\n -------\n X_transformed : sparse matrix of shape (n_samples, n_out)\n Transformed dataset.\n \"\"\"\n X = check_array(X, accept_sparse=['csc'])\n if issparse(X):\n # Pre-sort indices to avoid that each individual tree of the\n # ensemble sorts the indices.\n X.sort_indices()\n\n rnd = check_random_state(self.random_state)\n y = rnd.uniform(size=X.shape[0])\n super().fit(X, y, sample_weight=sample_weight)\n\n self.one_hot_encoder_ = OneHotEncoder(sparse=self.sparse_output)\n return self.one_hot_encoder_.fit_transform(self.apply(X))\n\n def transform(self, X):\n \"\"\"\n Transform dataset.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data to be transformed. Use ``dtype=np.float32`` for maximum\n efficiency. Sparse matrices are also supported, use sparse\n ``csr_matrix`` for maximum efficiency.\n\n Returns\n -------\n X_transformed : sparse matrix of shape (n_samples, n_out)\n Transformed dataset.\n \"\"\"\n check_is_fitted(self)\n return self.one_hot_encoder_.transform(self.apply(X))", + "instance_attributes": [ + { + "name": "max_depth", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "min_samples_split", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "min_samples_leaf", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "min_weight_fraction_leaf", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "min_impurity_decrease", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "sparse_output", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting", + "name": "BaseGradientBoosting", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting", + "decorators": [], + "superclasses": ["BaseEnsemble"], + "methods": [ + "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__", + "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_validate_y", + "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stage", + "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_check_params", + "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_init_state", + "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_clear_state", + "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_resize_state", + "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_is_initialized", + "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_check_initialized", + "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_warn_mae_for_criterion", + "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/fit", + "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stages", + "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_make_estimator", + "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_raw_predict_init", + "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_raw_predict", + "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_staged_raw_predict", + "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/feature_importances_@getter", + "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_compute_partial_dependence_recursion", + "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/apply" + ], + "is_public": false, + "reexported_by": [], + "description": "Abstract base class for Gradient Boosting.", + "docstring": "Abstract base class for Gradient Boosting.", + "code": "class BaseGradientBoosting(BaseEnsemble, metaclass=ABCMeta):\n \"\"\"Abstract base class for Gradient Boosting.\"\"\"\n\n @abstractmethod\n def __init__(self, *, loss, learning_rate, n_estimators, criterion,\n min_samples_split, min_samples_leaf, min_weight_fraction_leaf,\n max_depth, min_impurity_decrease, min_impurity_split,\n init, subsample, max_features, ccp_alpha,\n random_state, alpha=0.9, verbose=0, max_leaf_nodes=None,\n warm_start=False, validation_fraction=0.1,\n n_iter_no_change=None, tol=1e-4):\n\n self.n_estimators = n_estimators\n self.learning_rate = learning_rate\n self.loss = loss\n self.criterion = criterion\n self.min_samples_split = min_samples_split\n self.min_samples_leaf = min_samples_leaf\n self.min_weight_fraction_leaf = min_weight_fraction_leaf\n self.subsample = subsample\n self.max_features = max_features\n self.max_depth = max_depth\n self.min_impurity_decrease = min_impurity_decrease\n self.min_impurity_split = min_impurity_split\n self.ccp_alpha = ccp_alpha\n self.init = init\n self.random_state = random_state\n self.alpha = alpha\n self.verbose = verbose\n self.max_leaf_nodes = max_leaf_nodes\n self.warm_start = warm_start\n self.validation_fraction = validation_fraction\n self.n_iter_no_change = n_iter_no_change\n self.tol = tol\n\n @abstractmethod\n def _validate_y(self, y, sample_weight=None):\n \"\"\"Called by fit to validate y.\"\"\"\n\n def _fit_stage(self, i, X, y, raw_predictions, sample_weight, sample_mask,\n random_state, X_csc=None, X_csr=None):\n \"\"\"Fit another stage of ``_n_classes`` trees to the boosting model.\"\"\"\n\n assert sample_mask.dtype == bool\n loss = self.loss_\n original_y = y\n\n # Need to pass a copy of raw_predictions to negative_gradient()\n # because raw_predictions is partially updated at the end of the loop\n # in update_terminal_regions(), and gradients need to be evaluated at\n # iteration i - 1.\n raw_predictions_copy = raw_predictions.copy()\n\n for k in range(loss.K):\n if loss.is_multi_class:\n y = np.array(original_y == k, dtype=np.float64)\n\n residual = loss.negative_gradient(y, raw_predictions_copy, k=k,\n sample_weight=sample_weight)\n\n # induce regression tree on residuals\n tree = DecisionTreeRegressor(\n criterion=self.criterion,\n splitter='best',\n max_depth=self.max_depth,\n min_samples_split=self.min_samples_split,\n min_samples_leaf=self.min_samples_leaf,\n min_weight_fraction_leaf=self.min_weight_fraction_leaf,\n min_impurity_decrease=self.min_impurity_decrease,\n min_impurity_split=self.min_impurity_split,\n max_features=self.max_features,\n max_leaf_nodes=self.max_leaf_nodes,\n random_state=random_state,\n ccp_alpha=self.ccp_alpha)\n\n if self.subsample < 1.0:\n # no inplace multiplication!\n sample_weight = sample_weight * sample_mask.astype(np.float64)\n\n X = X_csr if X_csr is not None else X\n tree.fit(X, residual, sample_weight=sample_weight,\n check_input=False)\n\n # update tree leaves\n loss.update_terminal_regions(\n tree.tree_, X, y, residual, raw_predictions, sample_weight,\n sample_mask, learning_rate=self.learning_rate, k=k)\n\n # add tree to ensemble\n self.estimators_[i, k] = tree\n\n return raw_predictions\n\n def _check_params(self):\n \"\"\"Check validity of parameters and raise ValueError if not valid.\"\"\"\n if self.n_estimators <= 0:\n raise ValueError(\"n_estimators must be greater than 0 but \"\n \"was %r\" % self.n_estimators)\n\n if self.learning_rate <= 0.0:\n raise ValueError(\"learning_rate must be greater than 0 but \"\n \"was %r\" % self.learning_rate)\n\n if (self.loss not in self._SUPPORTED_LOSS\n or self.loss not in _gb_losses.LOSS_FUNCTIONS):\n raise ValueError(\"Loss '{0:s}' not supported. \".format(self.loss))\n\n if self.loss == 'deviance':\n loss_class = (_gb_losses.MultinomialDeviance\n if len(self.classes_) > 2\n else _gb_losses.BinomialDeviance)\n else:\n loss_class = _gb_losses.LOSS_FUNCTIONS[self.loss]\n\n if is_classifier(self):\n self.loss_ = loss_class(self.n_classes_)\n elif self.loss in (\"huber\", \"quantile\"):\n self.loss_ = loss_class(self.alpha)\n else:\n self.loss_ = loss_class()\n\n if not (0.0 < self.subsample <= 1.0):\n raise ValueError(\"subsample must be in (0,1] but \"\n \"was %r\" % self.subsample)\n\n if self.init is not None:\n # init must be an estimator or 'zero'\n if isinstance(self.init, BaseEstimator):\n self.loss_.check_init_estimator(self.init)\n elif not (isinstance(self.init, str) and self.init == 'zero'):\n raise ValueError(\n \"The init parameter must be an estimator or 'zero'. \"\n \"Got init={}\".format(self.init)\n )\n\n if not (0.0 < self.alpha < 1.0):\n raise ValueError(\"alpha must be in (0.0, 1.0) but \"\n \"was %r\" % self.alpha)\n\n if isinstance(self.max_features, str):\n if self.max_features == \"auto\":\n if is_classifier(self):\n max_features = max(1, int(np.sqrt(self.n_features_)))\n else:\n max_features = self.n_features_\n elif self.max_features == \"sqrt\":\n max_features = max(1, int(np.sqrt(self.n_features_)))\n elif self.max_features == \"log2\":\n max_features = max(1, int(np.log2(self.n_features_)))\n else:\n raise ValueError(\"Invalid value for max_features: %r. \"\n \"Allowed string values are 'auto', 'sqrt' \"\n \"or 'log2'.\" % self.max_features)\n elif self.max_features is None:\n max_features = self.n_features_\n elif isinstance(self.max_features, numbers.Integral):\n max_features = self.max_features\n else: # float\n if 0. < self.max_features <= 1.:\n max_features = max(int(self.max_features *\n self.n_features_), 1)\n else:\n raise ValueError(\"max_features must be in (0, n_features]\")\n\n self.max_features_ = max_features\n\n if not isinstance(self.n_iter_no_change,\n (numbers.Integral, type(None))):\n raise ValueError(\"n_iter_no_change should either be None or an \"\n \"integer. %r was passed\"\n % self.n_iter_no_change)\n\n def _init_state(self):\n \"\"\"Initialize model state and allocate model state data structures. \"\"\"\n\n self.init_ = self.init\n if self.init_ is None:\n self.init_ = self.loss_.init_estimator()\n\n self.estimators_ = np.empty((self.n_estimators, self.loss_.K),\n dtype=object)\n self.train_score_ = np.zeros((self.n_estimators,), dtype=np.float64)\n # do oob?\n if self.subsample < 1.0:\n self.oob_improvement_ = np.zeros((self.n_estimators),\n dtype=np.float64)\n\n def _clear_state(self):\n \"\"\"Clear the state of the gradient boosting model. \"\"\"\n if hasattr(self, 'estimators_'):\n self.estimators_ = np.empty((0, 0), dtype=object)\n if hasattr(self, 'train_score_'):\n del self.train_score_\n if hasattr(self, 'oob_improvement_'):\n del self.oob_improvement_\n if hasattr(self, 'init_'):\n del self.init_\n if hasattr(self, '_rng'):\n del self._rng\n\n def _resize_state(self):\n \"\"\"Add additional ``n_estimators`` entries to all attributes.\"\"\"\n # self.n_estimators is the number of additional est to fit\n total_n_estimators = self.n_estimators\n if total_n_estimators < self.estimators_.shape[0]:\n raise ValueError('resize with smaller n_estimators %d < %d' %\n (total_n_estimators, self.estimators_[0]))\n\n self.estimators_ = np.resize(self.estimators_,\n (total_n_estimators, self.loss_.K))\n self.train_score_ = np.resize(self.train_score_, total_n_estimators)\n if (self.subsample < 1 or hasattr(self, 'oob_improvement_')):\n # if do oob resize arrays or create new if not available\n if hasattr(self, 'oob_improvement_'):\n self.oob_improvement_ = np.resize(self.oob_improvement_,\n total_n_estimators)\n else:\n self.oob_improvement_ = np.zeros((total_n_estimators,),\n dtype=np.float64)\n\n def _is_initialized(self):\n return len(getattr(self, 'estimators_', [])) > 0\n\n def _check_initialized(self):\n \"\"\"Check that the estimator is initialized, raising an error if not.\"\"\"\n check_is_fitted(self)\n\n @abstractmethod\n def _warn_mae_for_criterion(self):\n pass\n\n def fit(self, X, y, sample_weight=None, monitor=None):\n \"\"\"Fit the gradient boosting model.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n y : array-like of shape (n_samples,)\n Target values (strings or integers in classification, real numbers\n in regression)\n For classification, labels must correspond to classes.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. In the case of\n classification, splits are also ignored if they would result in any\n single class carrying a negative weight in either child node.\n\n monitor : callable, default=None\n The monitor is called after each iteration with the current\n iteration, a reference to the estimator and the local variables of\n ``_fit_stages`` as keyword arguments ``callable(i, self,\n locals())``. If the callable returns ``True`` the fitting procedure\n is stopped. The monitor can be used for various things such as\n computing held-out estimates, early stopping, model introspect, and\n snapshoting.\n\n Returns\n -------\n self : object\n \"\"\"\n if self.criterion == 'mae':\n # TODO: This should raise an error from 1.1\n self._warn_mae_for_criterion()\n\n # if not warmstart - clear the estimator state\n if not self.warm_start:\n self._clear_state()\n\n # Check input\n # Since check_array converts both X and y to the same dtype, but the\n # trees use different types for X and y, checking them separately.\n\n X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc', 'coo'],\n dtype=DTYPE, multi_output=True)\n n_samples, self.n_features_ = X.shape\n\n sample_weight_is_none = sample_weight is None\n\n sample_weight = _check_sample_weight(sample_weight, X)\n\n y = column_or_1d(y, warn=True)\n\n if is_classifier(self):\n y = self._validate_y(y, sample_weight)\n else:\n y = self._validate_y(y)\n\n if self.n_iter_no_change is not None:\n stratify = y if is_classifier(self) else None\n X, X_val, y, y_val, sample_weight, sample_weight_val = (\n train_test_split(X, y, sample_weight,\n random_state=self.random_state,\n test_size=self.validation_fraction,\n stratify=stratify))\n if is_classifier(self):\n if self._n_classes != np.unique(y).shape[0]:\n # We choose to error here. The problem is that the init\n # estimator would be trained on y, which has some missing\n # classes now, so its predictions would not have the\n # correct shape.\n raise ValueError(\n 'The training data after the early stopping split '\n 'is missing some classes. Try using another random '\n 'seed.'\n )\n else:\n X_val = y_val = sample_weight_val = None\n\n self._check_params()\n\n if not self._is_initialized():\n # init state\n self._init_state()\n\n # fit initial model and initialize raw predictions\n if self.init_ == 'zero':\n raw_predictions = np.zeros(shape=(X.shape[0], self.loss_.K),\n dtype=np.float64)\n else:\n # XXX clean this once we have a support_sample_weight tag\n if sample_weight_is_none:\n self.init_.fit(X, y)\n else:\n msg = (\"The initial estimator {} does not support sample \"\n \"weights.\".format(self.init_.__class__.__name__))\n try:\n self.init_.fit(X, y, sample_weight=sample_weight)\n except TypeError as e:\n # regular estimator without SW support\n raise ValueError(msg) from e\n except ValueError as e:\n if \"pass parameters to specific steps of \"\\\n \"your pipeline using the \"\\\n \"stepname__parameter\" in str(e): # pipeline\n raise ValueError(msg) from e\n else: # regular estimator whose input checking failed\n raise\n\n raw_predictions = \\\n self.loss_.get_init_raw_predictions(X, self.init_)\n\n begin_at_stage = 0\n\n # The rng state must be preserved if warm_start is True\n self._rng = check_random_state(self.random_state)\n\n else:\n # add more estimators to fitted model\n # invariant: warm_start = True\n if self.n_estimators < self.estimators_.shape[0]:\n raise ValueError('n_estimators=%d must be larger or equal to '\n 'estimators_.shape[0]=%d when '\n 'warm_start==True'\n % (self.n_estimators,\n self.estimators_.shape[0]))\n begin_at_stage = self.estimators_.shape[0]\n # The requirements of _decision_function (called in two lines\n # below) are more constrained than fit. It accepts only CSR\n # matrices.\n X = check_array(X, dtype=DTYPE, order=\"C\", accept_sparse='csr')\n raw_predictions = self._raw_predict(X)\n self._resize_state()\n\n # fit the boosting stages\n n_stages = self._fit_stages(\n X, y, raw_predictions, sample_weight, self._rng, X_val, y_val,\n sample_weight_val, begin_at_stage, monitor)\n\n # change shape of arrays after fit (early-stopping or additional ests)\n if n_stages != self.estimators_.shape[0]:\n self.estimators_ = self.estimators_[:n_stages]\n self.train_score_ = self.train_score_[:n_stages]\n if hasattr(self, 'oob_improvement_'):\n self.oob_improvement_ = self.oob_improvement_[:n_stages]\n\n self.n_estimators_ = n_stages\n return self\n\n def _fit_stages(self, X, y, raw_predictions, sample_weight, random_state,\n X_val, y_val, sample_weight_val,\n begin_at_stage=0, monitor=None):\n \"\"\"Iteratively fits the stages.\n\n For each stage it computes the progress (OOB, train score)\n and delegates to ``_fit_stage``.\n Returns the number of stages fit; might differ from ``n_estimators``\n due to early stopping.\n \"\"\"\n n_samples = X.shape[0]\n do_oob = self.subsample < 1.0\n sample_mask = np.ones((n_samples, ), dtype=bool)\n n_inbag = max(1, int(self.subsample * n_samples))\n loss_ = self.loss_\n\n if self.verbose:\n verbose_reporter = VerboseReporter(verbose=self.verbose)\n verbose_reporter.init(self, begin_at_stage)\n\n X_csc = csc_matrix(X) if issparse(X) else None\n X_csr = csr_matrix(X) if issparse(X) else None\n\n if self.n_iter_no_change is not None:\n loss_history = np.full(self.n_iter_no_change, np.inf)\n # We create a generator to get the predictions for X_val after\n # the addition of each successive stage\n y_val_pred_iter = self._staged_raw_predict(X_val)\n\n # perform boosting iterations\n i = begin_at_stage\n for i in range(begin_at_stage, self.n_estimators):\n\n # subsampling\n if do_oob:\n sample_mask = _random_sample_mask(n_samples, n_inbag,\n random_state)\n # OOB score before adding this stage\n old_oob_score = loss_(y[~sample_mask],\n raw_predictions[~sample_mask],\n sample_weight[~sample_mask])\n\n # fit next stage of trees\n raw_predictions = self._fit_stage(\n i, X, y, raw_predictions, sample_weight, sample_mask,\n random_state, X_csc, X_csr)\n\n # track deviance (= loss)\n if do_oob:\n self.train_score_[i] = loss_(y[sample_mask],\n raw_predictions[sample_mask],\n sample_weight[sample_mask])\n self.oob_improvement_[i] = (\n old_oob_score - loss_(y[~sample_mask],\n raw_predictions[~sample_mask],\n sample_weight[~sample_mask]))\n else:\n # no need to fancy index w/ no subsampling\n self.train_score_[i] = loss_(y, raw_predictions, sample_weight)\n\n if self.verbose > 0:\n verbose_reporter.update(i, self)\n\n if monitor is not None:\n early_stopping = monitor(i, self, locals())\n if early_stopping:\n break\n\n # We also provide an early stopping based on the score from\n # validation set (X_val, y_val), if n_iter_no_change is set\n if self.n_iter_no_change is not None:\n # By calling next(y_val_pred_iter), we get the predictions\n # for X_val after the addition of the current stage\n validation_loss = loss_(y_val, next(y_val_pred_iter),\n sample_weight_val)\n\n # Require validation_score to be better (less) than at least\n # one of the last n_iter_no_change evaluations\n if np.any(validation_loss + self.tol < loss_history):\n loss_history[i % len(loss_history)] = validation_loss\n else:\n break\n\n return i + 1\n\n def _make_estimator(self, append=True):\n # we don't need _make_estimator\n raise NotImplementedError()\n\n def _raw_predict_init(self, X):\n \"\"\"Check input and compute raw predictions of the init estimator.\"\"\"\n self._check_initialized()\n X = self.estimators_[0, 0]._validate_X_predict(X, check_input=True)\n if X.shape[1] != self.n_features_:\n raise ValueError(\"X.shape[1] should be {0:d}, not {1:d}.\".format(\n self.n_features_, X.shape[1]))\n if self.init_ == 'zero':\n raw_predictions = np.zeros(shape=(X.shape[0], self.loss_.K),\n dtype=np.float64)\n else:\n raw_predictions = self.loss_.get_init_raw_predictions(\n X, self.init_).astype(np.float64)\n return raw_predictions\n\n def _raw_predict(self, X):\n \"\"\"Return the sum of the trees raw predictions (+ init estimator).\"\"\"\n raw_predictions = self._raw_predict_init(X)\n predict_stages(self.estimators_, X, self.learning_rate,\n raw_predictions)\n return raw_predictions\n\n def _staged_raw_predict(self, X):\n \"\"\"Compute raw predictions of ``X`` for each iteration.\n\n This method allows monitoring (i.e. determine error on testing set)\n after each stage.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n raw_predictions : generator of ndarray of shape (n_samples, k)\n The raw predictions of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n Regression and binary classification are special cases with\n ``k == 1``, otherwise ``k==n_classes``.\n \"\"\"\n X = check_array(X, dtype=DTYPE, order=\"C\", accept_sparse='csr')\n raw_predictions = self._raw_predict_init(X)\n for i in range(self.estimators_.shape[0]):\n predict_stage(self.estimators_, i, X, self.learning_rate,\n raw_predictions)\n yield raw_predictions.copy()\n\n @property\n def feature_importances_(self):\n \"\"\"The impurity-based feature importances.\n\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n Returns\n -------\n feature_importances_ : ndarray of shape (n_features,)\n The values of this array sum to 1, unless all trees are single node\n trees consisting of only the root node, in which case it will be an\n array of zeros.\n \"\"\"\n self._check_initialized()\n\n relevant_trees = [tree\n for stage in self.estimators_ for tree in stage\n if tree.tree_.node_count > 1]\n if not relevant_trees:\n # degenerate case where all trees have only one node\n return np.zeros(shape=self.n_features_, dtype=np.float64)\n\n relevant_feature_importances = [\n tree.tree_.compute_feature_importances(normalize=False)\n for tree in relevant_trees\n ]\n avg_feature_importances = np.mean(relevant_feature_importances,\n axis=0, dtype=np.float64)\n return avg_feature_importances / np.sum(avg_feature_importances)\n\n def _compute_partial_dependence_recursion(self, grid, target_features):\n \"\"\"Fast partial dependence computation.\n\n Parameters\n ----------\n grid : ndarray of shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\n target_features : ndarray of shape (n_target_features,)\n The set of target features for which the partial dependence\n should be evaluated.\n\n Returns\n -------\n averaged_predictions : ndarray of shape \\\n (n_trees_per_iteration, n_samples)\n The value of the partial dependence function on each grid point.\n \"\"\"\n if self.init is not None:\n warnings.warn(\n 'Using recursion method with a non-constant init predictor '\n 'will lead to incorrect partial dependence values. '\n 'Got init=%s.' % self.init,\n UserWarning\n )\n grid = np.asarray(grid, dtype=DTYPE, order='C')\n n_estimators, n_trees_per_stage = self.estimators_.shape\n averaged_predictions = np.zeros((n_trees_per_stage, grid.shape[0]),\n dtype=np.float64, order='C')\n for stage in range(n_estimators):\n for k in range(n_trees_per_stage):\n tree = self.estimators_[stage, k].tree_\n tree.compute_partial_dependence(grid, target_features,\n averaged_predictions[k])\n averaged_predictions *= self.learning_rate\n\n return averaged_predictions\n\n def apply(self, X):\n \"\"\"Apply trees in the ensemble to X, return leaf indices.\n\n .. versionadded:: 0.17\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will\n be converted to a sparse ``csr_matrix``.\n\n Returns\n -------\n X_leaves : array-like of shape (n_samples, n_estimators, n_classes)\n For each datapoint x in X and for each tree in the ensemble,\n return the index of the leaf x ends up in each estimator.\n In the case of binary classification n_classes is 1.\n \"\"\"\n\n self._check_initialized()\n X = self.estimators_[0, 0]._validate_X_predict(X, check_input=True)\n\n # n_classes will be equal to 1 in the binary classification or the\n # regression case.\n n_estimators, n_classes = self.estimators_.shape\n leaves = np.zeros((X.shape[0], n_estimators, n_classes))\n\n for i in range(n_estimators):\n for j in range(n_classes):\n estimator = self.estimators_[i, j]\n leaves[:, i, j] = estimator.apply(X, check_input=False)\n\n return leaves", + "instance_attributes": [ + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "warm_start", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "validation_fraction", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "estimators_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "train_score_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "oob_improvement_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "n_estimators_", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier", + "name": "GradientBoostingClassifier", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier", + "decorators": [], + "superclasses": ["ClassifierMixin", "BaseGradientBoosting"], + "methods": [ + "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__", + "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/_validate_y", + "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/_warn_mae_for_criterion", + "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/decision_function", + "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/staged_decision_function", + "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/predict", + "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/staged_predict", + "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/predict_proba", + "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/predict_log_proba", + "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/staged_predict_proba" + ], + "is_public": false, + "reexported_by": [], + "description": "Gradient Boosting for classification.\n\nGB builds an additive model in a\nforward stage-wise fashion; it allows for the optimization of\narbitrary differentiable loss functions. In each stage ``n_classes_``\nregression trees are fit on the negative gradient of the\nbinomial or multinomial deviance loss function. Binary classification\nis a special case where only a single regression tree is induced.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Gradient Boosting for classification.\n\nGB builds an additive model in a\nforward stage-wise fashion; it allows for the optimization of\narbitrary differentiable loss functions. In each stage ``n_classes_``\nregression trees are fit on the negative gradient of the\nbinomial or multinomial deviance loss function. Binary classification\nis a special case where only a single regression tree is induced.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nloss : {'deviance', 'exponential'}, default='deviance'\n The loss function to be optimized. 'deviance' refers to\n deviance (= logistic regression) for classification\n with probabilistic outputs. For loss 'exponential' gradient\n boosting recovers the AdaBoost algorithm.\n\nlearning_rate : float, default=0.1\n Learning rate shrinks the contribution of each tree by `learning_rate`.\n There is a trade-off between learning_rate and n_estimators.\n\nn_estimators : int, default=100\n The number of boosting stages to perform. Gradient boosting\n is fairly robust to over-fitting so a large number usually\n results in better performance.\n\nsubsample : float, default=1.0\n The fraction of samples to be used for fitting the individual base\n learners. If smaller than 1.0 this results in Stochastic Gradient\n Boosting. `subsample` interacts with the parameter `n_estimators`.\n Choosing `subsample < 1.0` leads to a reduction of variance\n and an increase in bias.\n\ncriterion : {'friedman_mse', 'mse', 'mae'}, default='friedman_mse'\n The function to measure the quality of a split. Supported criteria\n are 'friedman_mse' for the mean squared error with improvement\n score by Friedman, 'mse' for mean squared error, and 'mae' for\n the mean absolute error. The default value of 'friedman_mse' is\n generally the best as it can provide a better approximation in\n some cases.\n\n .. versionadded:: 0.18\n .. deprecated:: 0.24\n `criterion='mae'` is deprecated and will be removed in version\n 1.1 (renaming of 0.26). Use `criterion='friedman_mse'` or `'mse'`\n instead, as trees should use a least-square criterion in\n Gradient Boosting.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_depth : int, default=3\n The maximum depth of the individual regression estimators. The maximum\n depth limits the number of nodes in the tree. Tune this parameter\n for best performance; the best value depends on the interaction\n of the input variables.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\ninit : estimator or 'zero', default=None\n An estimator object that is used to compute the initial predictions.\n ``init`` has to provide :meth:`fit` and :meth:`predict_proba`. If\n 'zero', the initial raw predictions are set to zero. By default, a\n ``DummyEstimator`` predicting the classes priors is used.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the random seed given to each Tree estimator at each\n boosting iteration.\n In addition, it controls the random permutation of the features at\n each split (see Notes for more details).\n It also controls the random spliting of the training data to obtain a\n validation set if `n_iter_no_change` is not None.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nmax_features : {'auto', 'sqrt', 'log2'}, int or float, default=None\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If 'auto', then `max_features=sqrt(n_features)`.\n - If 'sqrt', then `max_features=sqrt(n_features)`.\n - If 'log2', then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Choosing `max_features < n_features` leads to a reduction of variance\n and an increase in bias.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nverbose : int, default=0\n Enable verbose output. If 1 then it prints progress and performance\n once in a while (the more trees the lower the frequency). If greater\n than 1 then it prints progress and performance for every tree.\n\nmax_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just erase the\n previous solution. See :term:`the Glossary `.\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if ``n_iter_no_change`` is set to an integer.\n\n .. versionadded:: 0.20\n\nn_iter_no_change : int, default=None\n ``n_iter_no_change`` is used to decide if early stopping will be used\n to terminate training when validation score is not improving. By\n default it is set to None to disable early stopping. If set to a\n number, it will set aside ``validation_fraction`` size of the training\n data as validation and terminate training when validation score is not\n improving in all of the previous ``n_iter_no_change`` numbers of\n iterations. The split is stratified.\n\n .. versionadded:: 0.20\n\ntol : float, default=1e-4\n Tolerance for the early stopping. When the loss is not improving\n by at least tol for ``n_iter_no_change`` iterations (if set to a\n number), the training stops.\n\n .. versionadded:: 0.20\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nn_estimators_ : int\n The number of estimators as selected by early stopping (if\n ``n_iter_no_change`` is specified). Otherwise it is set to\n ``n_estimators``.\n\n .. versionadded:: 0.20\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\noob_improvement_ : ndarray of shape (n_estimators,)\n The improvement in loss (= deviance) on the out-of-bag samples\n relative to the previous iteration.\n ``oob_improvement_[0]`` is the improvement in\n loss of the first stage over the ``init`` estimator.\n Only available if ``subsample < 1.0``\n\ntrain_score_ : ndarray of shape (n_estimators,)\n The i-th score ``train_score_[i]`` is the deviance (= loss) of the\n model at iteration ``i`` on the in-bag sample.\n If ``subsample == 1`` this is the deviance on the training data.\n\nloss_ : LossFunction\n The concrete ``LossFunction`` object.\n\ninit_ : estimator\n The estimator that provides the initial predictions.\n Set via the ``init`` argument or ``loss.init_estimator``.\n\nestimators_ : ndarray of DecisionTreeRegressor of shape (n_estimators, ``loss_.K``)\n The collection of fitted sub-estimators. ``loss_.K`` is 1 for binary\n classification, otherwise n_classes.\n\nclasses_ : ndarray of shape (n_classes,)\n The classes labels.\n\nn_features_ : int\n The number of data features.\n\nn_classes_ : int\n The number of classes.\n\nmax_features_ : int\n The inferred value of max_features.\n\nSee Also\n--------\nHistGradientBoostingClassifier : Histogram-based Gradient Boosting\n Classification Tree.\nsklearn.tree.DecisionTreeClassifier : A decision tree classifier.\nRandomForestClassifier : A meta-estimator that fits a number of decision\n tree classifiers on various sub-samples of the dataset and uses\n averaging to improve the predictive accuracy and control over-fitting.\nAdaBoostClassifier : A meta-estimator that begins by fitting a classifier\n on the original dataset and then fits additional copies of the\n classifier on the same dataset where the weights of incorrectly\n classified instances are adjusted such that subsequent classifiers\n focus more on difficult cases.\n\nNotes\n-----\nThe features are always randomly permuted at each split. Therefore,\nthe best found split may vary, even with the same training data and\n``max_features=n_features``, if the improvement of the criterion is\nidentical for several splits enumerated during the search of the best\nsplit. To obtain a deterministic behaviour during fitting,\n``random_state`` has to be fixed.\n\nReferences\n----------\nJ. Friedman, Greedy Function Approximation: A Gradient Boosting\nMachine, The Annals of Statistics, Vol. 29, No. 5, 2001.\n\nJ. Friedman, Stochastic Gradient Boosting, 1999\n\nT. Hastie, R. Tibshirani and J. Friedman.\nElements of Statistical Learning Ed. 2, Springer, 2009.\n\nExamples\n--------\nThe following example shows how to fit a gradient boosting classifier with\n100 decision stumps as weak learners.\n\n>>> from sklearn.datasets import make_hastie_10_2\n>>> from sklearn.ensemble import GradientBoostingClassifier\n\n>>> X, y = make_hastie_10_2(random_state=0)\n>>> X_train, X_test = X[:2000], X[2000:]\n>>> y_train, y_test = y[:2000], y[2000:]\n\n>>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,\n... max_depth=1, random_state=0).fit(X_train, y_train)\n>>> clf.score(X_test, y_test)\n0.913...", + "code": "class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):\n \"\"\"Gradient Boosting for classification.\n\n GB builds an additive model in a\n forward stage-wise fashion; it allows for the optimization of\n arbitrary differentiable loss functions. In each stage ``n_classes_``\n regression trees are fit on the negative gradient of the\n binomial or multinomial deviance loss function. Binary classification\n is a special case where only a single regression tree is induced.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n loss : {'deviance', 'exponential'}, default='deviance'\n The loss function to be optimized. 'deviance' refers to\n deviance (= logistic regression) for classification\n with probabilistic outputs. For loss 'exponential' gradient\n boosting recovers the AdaBoost algorithm.\n\n learning_rate : float, default=0.1\n Learning rate shrinks the contribution of each tree by `learning_rate`.\n There is a trade-off between learning_rate and n_estimators.\n\n n_estimators : int, default=100\n The number of boosting stages to perform. Gradient boosting\n is fairly robust to over-fitting so a large number usually\n results in better performance.\n\n subsample : float, default=1.0\n The fraction of samples to be used for fitting the individual base\n learners. If smaller than 1.0 this results in Stochastic Gradient\n Boosting. `subsample` interacts with the parameter `n_estimators`.\n Choosing `subsample < 1.0` leads to a reduction of variance\n and an increase in bias.\n\n criterion : {'friedman_mse', 'mse', 'mae'}, default='friedman_mse'\n The function to measure the quality of a split. Supported criteria\n are 'friedman_mse' for the mean squared error with improvement\n score by Friedman, 'mse' for mean squared error, and 'mae' for\n the mean absolute error. The default value of 'friedman_mse' is\n generally the best as it can provide a better approximation in\n some cases.\n\n .. versionadded:: 0.18\n .. deprecated:: 0.24\n `criterion='mae'` is deprecated and will be removed in version\n 1.1 (renaming of 0.26). Use `criterion='friedman_mse'` or `'mse'`\n instead, as trees should use a least-square criterion in\n Gradient Boosting.\n\n min_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\n min_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\n min_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\n max_depth : int, default=3\n The maximum depth of the individual regression estimators. The maximum\n depth limits the number of nodes in the tree. Tune this parameter\n for best performance; the best value depends on the interaction\n of the input variables.\n\n min_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\n min_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\n init : estimator or 'zero', default=None\n An estimator object that is used to compute the initial predictions.\n ``init`` has to provide :meth:`fit` and :meth:`predict_proba`. If\n 'zero', the initial raw predictions are set to zero. By default, a\n ``DummyEstimator`` predicting the classes priors is used.\n\n random_state : int, RandomState instance or None, default=None\n Controls the random seed given to each Tree estimator at each\n boosting iteration.\n In addition, it controls the random permutation of the features at\n each split (see Notes for more details).\n It also controls the random spliting of the training data to obtain a\n validation set if `n_iter_no_change` is not None.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n max_features : {'auto', 'sqrt', 'log2'}, int or float, default=None\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If 'auto', then `max_features=sqrt(n_features)`.\n - If 'sqrt', then `max_features=sqrt(n_features)`.\n - If 'log2', then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Choosing `max_features < n_features` leads to a reduction of variance\n and an increase in bias.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\n verbose : int, default=0\n Enable verbose output. If 1 then it prints progress and performance\n once in a while (the more trees the lower the frequency). If greater\n than 1 then it prints progress and performance for every tree.\n\n max_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\n warm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just erase the\n previous solution. See :term:`the Glossary `.\n\n validation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if ``n_iter_no_change`` is set to an integer.\n\n .. versionadded:: 0.20\n\n n_iter_no_change : int, default=None\n ``n_iter_no_change`` is used to decide if early stopping will be used\n to terminate training when validation score is not improving. By\n default it is set to None to disable early stopping. If set to a\n number, it will set aside ``validation_fraction`` size of the training\n data as validation and terminate training when validation score is not\n improving in all of the previous ``n_iter_no_change`` numbers of\n iterations. The split is stratified.\n\n .. versionadded:: 0.20\n\n tol : float, default=1e-4\n Tolerance for the early stopping. When the loss is not improving\n by at least tol for ``n_iter_no_change`` iterations (if set to a\n number), the training stops.\n\n .. versionadded:: 0.20\n\n ccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\n Attributes\n ----------\n n_estimators_ : int\n The number of estimators as selected by early stopping (if\n ``n_iter_no_change`` is specified). Otherwise it is set to\n ``n_estimators``.\n\n .. versionadded:: 0.20\n\n feature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n oob_improvement_ : ndarray of shape (n_estimators,)\n The improvement in loss (= deviance) on the out-of-bag samples\n relative to the previous iteration.\n ``oob_improvement_[0]`` is the improvement in\n loss of the first stage over the ``init`` estimator.\n Only available if ``subsample < 1.0``\n\n train_score_ : ndarray of shape (n_estimators,)\n The i-th score ``train_score_[i]`` is the deviance (= loss) of the\n model at iteration ``i`` on the in-bag sample.\n If ``subsample == 1`` this is the deviance on the training data.\n\n loss_ : LossFunction\n The concrete ``LossFunction`` object.\n\n init_ : estimator\n The estimator that provides the initial predictions.\n Set via the ``init`` argument or ``loss.init_estimator``.\n\n estimators_ : ndarray of DecisionTreeRegressor of \\\nshape (n_estimators, ``loss_.K``)\n The collection of fitted sub-estimators. ``loss_.K`` is 1 for binary\n classification, otherwise n_classes.\n\n classes_ : ndarray of shape (n_classes,)\n The classes labels.\n\n n_features_ : int\n The number of data features.\n\n n_classes_ : int\n The number of classes.\n\n max_features_ : int\n The inferred value of max_features.\n\n See Also\n --------\n HistGradientBoostingClassifier : Histogram-based Gradient Boosting\n Classification Tree.\n sklearn.tree.DecisionTreeClassifier : A decision tree classifier.\n RandomForestClassifier : A meta-estimator that fits a number of decision\n tree classifiers on various sub-samples of the dataset and uses\n averaging to improve the predictive accuracy and control over-fitting.\n AdaBoostClassifier : A meta-estimator that begins by fitting a classifier\n on the original dataset and then fits additional copies of the\n classifier on the same dataset where the weights of incorrectly\n classified instances are adjusted such that subsequent classifiers\n focus more on difficult cases.\n\n Notes\n -----\n The features are always randomly permuted at each split. Therefore,\n the best found split may vary, even with the same training data and\n ``max_features=n_features``, if the improvement of the criterion is\n identical for several splits enumerated during the search of the best\n split. To obtain a deterministic behaviour during fitting,\n ``random_state`` has to be fixed.\n\n References\n ----------\n J. Friedman, Greedy Function Approximation: A Gradient Boosting\n Machine, The Annals of Statistics, Vol. 29, No. 5, 2001.\n\n J. Friedman, Stochastic Gradient Boosting, 1999\n\n T. Hastie, R. Tibshirani and J. Friedman.\n Elements of Statistical Learning Ed. 2, Springer, 2009.\n\n Examples\n --------\n The following example shows how to fit a gradient boosting classifier with\n 100 decision stumps as weak learners.\n\n >>> from sklearn.datasets import make_hastie_10_2\n >>> from sklearn.ensemble import GradientBoostingClassifier\n\n >>> X, y = make_hastie_10_2(random_state=0)\n >>> X_train, X_test = X[:2000], X[2000:]\n >>> y_train, y_test = y[:2000], y[2000:]\n\n >>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,\n ... max_depth=1, random_state=0).fit(X_train, y_train)\n >>> clf.score(X_test, y_test)\n 0.913...\n \"\"\"\n\n _SUPPORTED_LOSS = ('deviance', 'exponential')\n\n @_deprecate_positional_args\n def __init__(self, *, loss='deviance', learning_rate=0.1, n_estimators=100,\n subsample=1.0, criterion='friedman_mse', min_samples_split=2,\n min_samples_leaf=1, min_weight_fraction_leaf=0.,\n max_depth=3, min_impurity_decrease=0.,\n min_impurity_split=None, init=None,\n random_state=None, max_features=None, verbose=0,\n max_leaf_nodes=None, warm_start=False,\n validation_fraction=0.1, n_iter_no_change=None, tol=1e-4,\n ccp_alpha=0.0):\n\n super().__init__(\n loss=loss, learning_rate=learning_rate, n_estimators=n_estimators,\n criterion=criterion, min_samples_split=min_samples_split,\n min_samples_leaf=min_samples_leaf,\n min_weight_fraction_leaf=min_weight_fraction_leaf,\n max_depth=max_depth, init=init, subsample=subsample,\n max_features=max_features,\n random_state=random_state, verbose=verbose,\n max_leaf_nodes=max_leaf_nodes,\n min_impurity_decrease=min_impurity_decrease,\n min_impurity_split=min_impurity_split,\n warm_start=warm_start, validation_fraction=validation_fraction,\n n_iter_no_change=n_iter_no_change, tol=tol, ccp_alpha=ccp_alpha)\n\n def _validate_y(self, y, sample_weight):\n check_classification_targets(y)\n self.classes_, y = np.unique(y, return_inverse=True)\n n_trim_classes = np.count_nonzero(np.bincount(y, sample_weight))\n if n_trim_classes < 2:\n raise ValueError(\"y contains %d class after sample_weight \"\n \"trimmed classes with zero weights, while a \"\n \"minimum of 2 classes are required.\"\n % n_trim_classes)\n self._n_classes = len(self.classes_)\n # expose n_classes_ attribute\n self.n_classes_ = self._n_classes\n return y\n\n def _warn_mae_for_criterion(self):\n # TODO: This should raise an error from 1.1\n warnings.warn(\"criterion='mae' was deprecated in version 0.24 and \"\n \"will be removed in version 1.1 (renaming of 0.26). Use \"\n \"criterion='friedman_mse' or 'mse' instead, as trees \"\n \"should use a least-square criterion in Gradient \"\n \"Boosting.\", FutureWarning)\n\n def decision_function(self, X):\n \"\"\"Compute the decision function of ``X``.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n score : ndarray of shape (n_samples, n_classes) or (n_samples,)\n The decision function of the input samples, which corresponds to\n the raw values predicted from the trees of the ensemble . The\n order of the classes corresponds to that in the attribute\n :term:`classes_`. Regression and binary classification produce an\n array of shape (n_samples,).\n \"\"\"\n X = check_array(X, dtype=DTYPE, order=\"C\", accept_sparse='csr')\n raw_predictions = self._raw_predict(X)\n if raw_predictions.shape[1] == 1:\n return raw_predictions.ravel()\n return raw_predictions\n\n def staged_decision_function(self, X):\n \"\"\"Compute decision function of ``X`` for each iteration.\n\n This method allows monitoring (i.e. determine error on testing set)\n after each stage.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n score : generator of ndarray of shape (n_samples, k)\n The decision function of the input samples, which corresponds to\n the raw values predicted from the trees of the ensemble . The\n classes corresponds to that in the attribute :term:`classes_`.\n Regression and binary classification are special cases with\n ``k == 1``, otherwise ``k==n_classes``.\n \"\"\"\n yield from self._staged_raw_predict(X)\n\n def predict(self, X):\n \"\"\"Predict class for X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n y : ndarray of shape (n_samples,)\n The predicted values.\n \"\"\"\n raw_predictions = self.decision_function(X)\n encoded_labels = \\\n self.loss_._raw_prediction_to_decision(raw_predictions)\n return self.classes_.take(encoded_labels, axis=0)\n\n def staged_predict(self, X):\n \"\"\"Predict class at each stage for X.\n\n This method allows monitoring (i.e. determine error on testing set)\n after each stage.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n y : generator of ndarray of shape (n_samples,)\n The predicted value of the input samples.\n \"\"\"\n for raw_predictions in self._staged_raw_predict(X):\n encoded_labels = \\\n self.loss_._raw_prediction_to_decision(raw_predictions)\n yield self.classes_.take(encoded_labels, axis=0)\n\n def predict_proba(self, X):\n \"\"\"Predict class probabilities for X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Raises\n ------\n AttributeError\n If the ``loss`` does not support probabilities.\n\n Returns\n -------\n p : ndarray of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n \"\"\"\n raw_predictions = self.decision_function(X)\n try:\n return self.loss_._raw_prediction_to_proba(raw_predictions)\n except NotFittedError:\n raise\n except AttributeError as e:\n raise AttributeError('loss=%r does not support predict_proba' %\n self.loss) from e\n\n def predict_log_proba(self, X):\n \"\"\"Predict class log-probabilities for X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Raises\n ------\n AttributeError\n If the ``loss`` does not support probabilities.\n\n Returns\n -------\n p : ndarray of shape (n_samples, n_classes)\n The class log-probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n \"\"\"\n proba = self.predict_proba(X)\n return np.log(proba)\n\n def staged_predict_proba(self, X):\n \"\"\"Predict class probabilities at each stage for X.\n\n This method allows monitoring (i.e. determine error on testing set)\n after each stage.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n y : generator of ndarray of shape (n_samples,)\n The predicted value of the input samples.\n \"\"\"\n try:\n for raw_predictions in self._staged_raw_predict(X):\n yield self.loss_._raw_prediction_to_proba(raw_predictions)\n except NotFittedError:\n raise\n except AttributeError as e:\n raise AttributeError('loss=%r does not support predict_proba' %\n self.loss) from e", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor", + "name": "GradientBoostingRegressor", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor", + "decorators": [], + "superclasses": ["RegressorMixin", "BaseGradientBoosting"], + "methods": [ + "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__", + "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/_validate_y", + "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/_warn_mae_for_criterion", + "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/predict", + "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/staged_predict", + "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/apply", + "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/n_classes_@getter" + ], + "is_public": false, + "reexported_by": [], + "description": "Gradient Boosting for regression.\n\nGB builds an additive model in a forward stage-wise fashion;\nit allows for the optimization of arbitrary differentiable loss functions.\nIn each stage a regression tree is fit on the negative gradient of the\ngiven loss function.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Gradient Boosting for regression.\n\nGB builds an additive model in a forward stage-wise fashion;\nit allows for the optimization of arbitrary differentiable loss functions.\nIn each stage a regression tree is fit on the negative gradient of the\ngiven loss function.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nloss : {'ls', 'lad', 'huber', 'quantile'}, default='ls'\n Loss function to be optimized. 'ls' refers to least squares\n regression. 'lad' (least absolute deviation) is a highly robust\n loss function solely based on order information of the input\n variables. 'huber' is a combination of the two. 'quantile'\n allows quantile regression (use `alpha` to specify the quantile).\n\nlearning_rate : float, default=0.1\n Learning rate shrinks the contribution of each tree by `learning_rate`.\n There is a trade-off between learning_rate and n_estimators.\n\nn_estimators : int, default=100\n The number of boosting stages to perform. Gradient boosting\n is fairly robust to over-fitting so a large number usually\n results in better performance.\n\nsubsample : float, default=1.0\n The fraction of samples to be used for fitting the individual base\n learners. If smaller than 1.0 this results in Stochastic Gradient\n Boosting. `subsample` interacts with the parameter `n_estimators`.\n Choosing `subsample < 1.0` leads to a reduction of variance\n and an increase in bias.\n\ncriterion : {'friedman_mse', 'mse', 'mae'}, default='friedman_mse'\n The function to measure the quality of a split. Supported criteria\n are \"friedman_mse\" for the mean squared error with improvement\n score by Friedman, \"mse\" for mean squared error, and \"mae\" for\n the mean absolute error. The default value of \"friedman_mse\" is\n generally the best as it can provide a better approximation in\n some cases.\n\n .. versionadded:: 0.18\n .. deprecated:: 0.24\n `criterion='mae'` is deprecated and will be removed in version\n 1.1 (renaming of 0.26). The correct way of minimizing the absolute\n error is to use `loss='lad'` instead.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_depth : int, default=3\n Maximum depth of the individual regression estimators. The maximum\n depth limits the number of nodes in the tree. Tune this parameter\n for best performance; the best value depends on the interaction\n of the input variables.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\ninit : estimator or 'zero', default=None\n An estimator object that is used to compute the initial predictions.\n ``init`` has to provide :term:`fit` and :term:`predict`. If 'zero', the\n initial raw predictions are set to zero. By default a\n ``DummyEstimator`` is used, predicting either the average target value\n (for loss='ls'), or a quantile for the other losses.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the random seed given to each Tree estimator at each\n boosting iteration.\n In addition, it controls the random permutation of the features at\n each split (see Notes for more details).\n It also controls the random spliting of the training data to obtain a\n validation set if `n_iter_no_change` is not None.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nmax_features : {'auto', 'sqrt', 'log2'}, int or float, default=None\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=n_features`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Choosing `max_features < n_features` leads to a reduction of variance\n and an increase in bias.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nalpha : float, default=0.9\n The alpha-quantile of the huber loss function and the quantile\n loss function. Only if ``loss='huber'`` or ``loss='quantile'``.\n\nverbose : int, default=0\n Enable verbose output. If 1 then it prints progress and performance\n once in a while (the more trees the lower the frequency). If greater\n than 1 then it prints progress and performance for every tree.\n\nmax_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just erase the\n previous solution. See :term:`the Glossary `.\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if ``n_iter_no_change`` is set to an integer.\n\n .. versionadded:: 0.20\n\nn_iter_no_change : int, default=None\n ``n_iter_no_change`` is used to decide if early stopping will be used\n to terminate training when validation score is not improving. By\n default it is set to None to disable early stopping. If set to a\n number, it will set aside ``validation_fraction`` size of the training\n data as validation and terminate training when validation score is not\n improving in all of the previous ``n_iter_no_change`` numbers of\n iterations.\n\n .. versionadded:: 0.20\n\ntol : float, default=1e-4\n Tolerance for the early stopping. When the loss is not improving\n by at least tol for ``n_iter_no_change`` iterations (if set to a\n number), the training stops.\n\n .. versionadded:: 0.20\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\noob_improvement_ : ndarray of shape (n_estimators,)\n The improvement in loss (= deviance) on the out-of-bag samples\n relative to the previous iteration.\n ``oob_improvement_[0]`` is the improvement in\n loss of the first stage over the ``init`` estimator.\n Only available if ``subsample < 1.0``\n\ntrain_score_ : ndarray of shape (n_estimators,)\n The i-th score ``train_score_[i]`` is the deviance (= loss) of the\n model at iteration ``i`` on the in-bag sample.\n If ``subsample == 1`` this is the deviance on the training data.\n\nloss_ : LossFunction\n The concrete ``LossFunction`` object.\n\ninit_ : estimator\n The estimator that provides the initial predictions.\n Set via the ``init`` argument or ``loss.init_estimator``.\n\nestimators_ : ndarray of DecisionTreeRegressor of shape (n_estimators, 1)\n The collection of fitted sub-estimators.\n\nn_classes_ : int\n The number of classes, set to 1 for regressors.\n\n .. deprecated:: 0.24\n Attribute ``n_classes_`` was deprecated in version 0.24 and\n will be removed in 1.1 (renaming of 0.26).\n\nn_estimators_ : int\n The number of estimators as selected by early stopping (if\n ``n_iter_no_change`` is specified). Otherwise it is set to\n ``n_estimators``.\n\nn_features_ : int\n The number of data features.\n\nmax_features_ : int\n The inferred value of max_features.\n\nSee Also\n--------\nHistGradientBoostingRegressor : Histogram-based Gradient Boosting\n Classification Tree.\nsklearn.tree.DecisionTreeRegressor : A decision tree regressor.\nsklearn.tree.RandomForestRegressor : A random forest regressor.\n\nNotes\n-----\nThe features are always randomly permuted at each split. Therefore,\nthe best found split may vary, even with the same training data and\n``max_features=n_features``, if the improvement of the criterion is\nidentical for several splits enumerated during the search of the best\nsplit. To obtain a deterministic behaviour during fitting,\n``random_state`` has to be fixed.\n\nExamples\n--------\n>>> from sklearn.datasets import make_regression\n>>> from sklearn.ensemble import GradientBoostingRegressor\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = make_regression(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, random_state=0)\n>>> reg = GradientBoostingRegressor(random_state=0)\n>>> reg.fit(X_train, y_train)\nGradientBoostingRegressor(random_state=0)\n>>> reg.predict(X_test[1:2])\narray([-61...])\n>>> reg.score(X_test, y_test)\n0.4...\n\nReferences\n----------\nJ. Friedman, Greedy Function Approximation: A Gradient Boosting\nMachine, The Annals of Statistics, Vol. 29, No. 5, 2001.\n\nJ. Friedman, Stochastic Gradient Boosting, 1999\n\nT. Hastie, R. Tibshirani and J. Friedman.\nElements of Statistical Learning Ed. 2, Springer, 2009.", + "code": "class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):\n \"\"\"Gradient Boosting for regression.\n\n GB builds an additive model in a forward stage-wise fashion;\n it allows for the optimization of arbitrary differentiable loss functions.\n In each stage a regression tree is fit on the negative gradient of the\n given loss function.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n loss : {'ls', 'lad', 'huber', 'quantile'}, default='ls'\n Loss function to be optimized. 'ls' refers to least squares\n regression. 'lad' (least absolute deviation) is a highly robust\n loss function solely based on order information of the input\n variables. 'huber' is a combination of the two. 'quantile'\n allows quantile regression (use `alpha` to specify the quantile).\n\n learning_rate : float, default=0.1\n Learning rate shrinks the contribution of each tree by `learning_rate`.\n There is a trade-off between learning_rate and n_estimators.\n\n n_estimators : int, default=100\n The number of boosting stages to perform. Gradient boosting\n is fairly robust to over-fitting so a large number usually\n results in better performance.\n\n subsample : float, default=1.0\n The fraction of samples to be used for fitting the individual base\n learners. If smaller than 1.0 this results in Stochastic Gradient\n Boosting. `subsample` interacts with the parameter `n_estimators`.\n Choosing `subsample < 1.0` leads to a reduction of variance\n and an increase in bias.\n\n criterion : {'friedman_mse', 'mse', 'mae'}, default='friedman_mse'\n The function to measure the quality of a split. Supported criteria\n are \"friedman_mse\" for the mean squared error with improvement\n score by Friedman, \"mse\" for mean squared error, and \"mae\" for\n the mean absolute error. The default value of \"friedman_mse\" is\n generally the best as it can provide a better approximation in\n some cases.\n\n .. versionadded:: 0.18\n .. deprecated:: 0.24\n `criterion='mae'` is deprecated and will be removed in version\n 1.1 (renaming of 0.26). The correct way of minimizing the absolute\n error is to use `loss='lad'` instead.\n\n min_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\n min_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\n min_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\n max_depth : int, default=3\n Maximum depth of the individual regression estimators. The maximum\n depth limits the number of nodes in the tree. Tune this parameter\n for best performance; the best value depends on the interaction\n of the input variables.\n\n min_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\n min_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\n init : estimator or 'zero', default=None\n An estimator object that is used to compute the initial predictions.\n ``init`` has to provide :term:`fit` and :term:`predict`. If 'zero', the\n initial raw predictions are set to zero. By default a\n ``DummyEstimator`` is used, predicting either the average target value\n (for loss='ls'), or a quantile for the other losses.\n\n random_state : int, RandomState instance or None, default=None\n Controls the random seed given to each Tree estimator at each\n boosting iteration.\n In addition, it controls the random permutation of the features at\n each split (see Notes for more details).\n It also controls the random spliting of the training data to obtain a\n validation set if `n_iter_no_change` is not None.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n max_features : {'auto', 'sqrt', 'log2'}, int or float, default=None\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=n_features`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Choosing `max_features < n_features` leads to a reduction of variance\n and an increase in bias.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\n alpha : float, default=0.9\n The alpha-quantile of the huber loss function and the quantile\n loss function. Only if ``loss='huber'`` or ``loss='quantile'``.\n\n verbose : int, default=0\n Enable verbose output. If 1 then it prints progress and performance\n once in a while (the more trees the lower the frequency). If greater\n than 1 then it prints progress and performance for every tree.\n\n max_leaf_nodes : int, default=None\n Grow trees with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\n warm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just erase the\n previous solution. See :term:`the Glossary `.\n\n validation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if ``n_iter_no_change`` is set to an integer.\n\n .. versionadded:: 0.20\n\n n_iter_no_change : int, default=None\n ``n_iter_no_change`` is used to decide if early stopping will be used\n to terminate training when validation score is not improving. By\n default it is set to None to disable early stopping. If set to a\n number, it will set aside ``validation_fraction`` size of the training\n data as validation and terminate training when validation score is not\n improving in all of the previous ``n_iter_no_change`` numbers of\n iterations.\n\n .. versionadded:: 0.20\n\n tol : float, default=1e-4\n Tolerance for the early stopping. When the loss is not improving\n by at least tol for ``n_iter_no_change`` iterations (if set to a\n number), the training stops.\n\n .. versionadded:: 0.20\n\n ccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\n Attributes\n ----------\n feature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n oob_improvement_ : ndarray of shape (n_estimators,)\n The improvement in loss (= deviance) on the out-of-bag samples\n relative to the previous iteration.\n ``oob_improvement_[0]`` is the improvement in\n loss of the first stage over the ``init`` estimator.\n Only available if ``subsample < 1.0``\n\n train_score_ : ndarray of shape (n_estimators,)\n The i-th score ``train_score_[i]`` is the deviance (= loss) of the\n model at iteration ``i`` on the in-bag sample.\n If ``subsample == 1`` this is the deviance on the training data.\n\n loss_ : LossFunction\n The concrete ``LossFunction`` object.\n\n init_ : estimator\n The estimator that provides the initial predictions.\n Set via the ``init`` argument or ``loss.init_estimator``.\n\n estimators_ : ndarray of DecisionTreeRegressor of shape (n_estimators, 1)\n The collection of fitted sub-estimators.\n\n n_classes_ : int\n The number of classes, set to 1 for regressors.\n\n .. deprecated:: 0.24\n Attribute ``n_classes_`` was deprecated in version 0.24 and\n will be removed in 1.1 (renaming of 0.26).\n\n n_estimators_ : int\n The number of estimators as selected by early stopping (if\n ``n_iter_no_change`` is specified). Otherwise it is set to\n ``n_estimators``.\n\n n_features_ : int\n The number of data features.\n\n max_features_ : int\n The inferred value of max_features.\n\n See Also\n --------\n HistGradientBoostingRegressor : Histogram-based Gradient Boosting\n Classification Tree.\n sklearn.tree.DecisionTreeRegressor : A decision tree regressor.\n sklearn.tree.RandomForestRegressor : A random forest regressor.\n\n Notes\n -----\n The features are always randomly permuted at each split. Therefore,\n the best found split may vary, even with the same training data and\n ``max_features=n_features``, if the improvement of the criterion is\n identical for several splits enumerated during the search of the best\n split. To obtain a deterministic behaviour during fitting,\n ``random_state`` has to be fixed.\n\n Examples\n --------\n >>> from sklearn.datasets import make_regression\n >>> from sklearn.ensemble import GradientBoostingRegressor\n >>> from sklearn.model_selection import train_test_split\n >>> X, y = make_regression(random_state=0)\n >>> X_train, X_test, y_train, y_test = train_test_split(\n ... X, y, random_state=0)\n >>> reg = GradientBoostingRegressor(random_state=0)\n >>> reg.fit(X_train, y_train)\n GradientBoostingRegressor(random_state=0)\n >>> reg.predict(X_test[1:2])\n array([-61...])\n >>> reg.score(X_test, y_test)\n 0.4...\n\n References\n ----------\n J. Friedman, Greedy Function Approximation: A Gradient Boosting\n Machine, The Annals of Statistics, Vol. 29, No. 5, 2001.\n\n J. Friedman, Stochastic Gradient Boosting, 1999\n\n T. Hastie, R. Tibshirani and J. Friedman.\n Elements of Statistical Learning Ed. 2, Springer, 2009.\n \"\"\"\n\n _SUPPORTED_LOSS = ('ls', 'lad', 'huber', 'quantile')\n\n @_deprecate_positional_args\n def __init__(self, *, loss='ls', learning_rate=0.1, n_estimators=100,\n subsample=1.0, criterion='friedman_mse', min_samples_split=2,\n min_samples_leaf=1, min_weight_fraction_leaf=0.,\n max_depth=3, min_impurity_decrease=0.,\n min_impurity_split=None, init=None, random_state=None,\n max_features=None, alpha=0.9, verbose=0, max_leaf_nodes=None,\n warm_start=False, validation_fraction=0.1,\n n_iter_no_change=None, tol=1e-4, ccp_alpha=0.0):\n\n super().__init__(\n loss=loss, learning_rate=learning_rate, n_estimators=n_estimators,\n criterion=criterion, min_samples_split=min_samples_split,\n min_samples_leaf=min_samples_leaf,\n min_weight_fraction_leaf=min_weight_fraction_leaf,\n max_depth=max_depth, init=init, subsample=subsample,\n max_features=max_features,\n min_impurity_decrease=min_impurity_decrease,\n min_impurity_split=min_impurity_split,\n random_state=random_state, alpha=alpha, verbose=verbose,\n max_leaf_nodes=max_leaf_nodes, warm_start=warm_start,\n validation_fraction=validation_fraction,\n n_iter_no_change=n_iter_no_change, tol=tol, ccp_alpha=ccp_alpha)\n\n def _validate_y(self, y, sample_weight=None):\n if y.dtype.kind == 'O':\n y = y.astype(DOUBLE)\n return y\n\n def _warn_mae_for_criterion(self):\n # TODO: This should raise an error from 1.1\n warnings.warn(\"criterion='mae' was deprecated in version 0.24 and \"\n \"will be removed in version 1.1 (renaming of 0.26). The \"\n \"correct way of minimizing the absolute error is to use \"\n \" loss='lad' instead.\", FutureWarning)\n\n def predict(self, X):\n \"\"\"Predict regression target for X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n y : ndarray of shape (n_samples,)\n The predicted values.\n \"\"\"\n X = check_array(X, dtype=DTYPE, order=\"C\", accept_sparse='csr')\n # In regression we can directly return the raw value from the trees.\n return self._raw_predict(X).ravel()\n\n def staged_predict(self, X):\n \"\"\"Predict regression target at each stage for X.\n\n This method allows monitoring (i.e. determine error on testing set)\n after each stage.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n y : generator of ndarray of shape (n_samples,)\n The predicted value of the input samples.\n \"\"\"\n for raw_predictions in self._staged_raw_predict(X):\n yield raw_predictions.ravel()\n\n def apply(self, X):\n \"\"\"Apply trees in the ensemble to X, return leaf indices.\n\n .. versionadded:: 0.17\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will\n be converted to a sparse ``csr_matrix``.\n\n Returns\n -------\n X_leaves : array-like of shape (n_samples, n_estimators)\n For each datapoint x in X and for each tree in the ensemble,\n return the index of the leaf x ends up in each estimator.\n \"\"\"\n\n leaves = super().apply(X)\n leaves = leaves.reshape(X.shape[0], self.estimators_.shape[0])\n return leaves\n\n # FIXME: to be removed in 1.1\n # mypy error: Decorated property not supported\n @deprecated(\"Attribute n_classes_ was deprecated \" # type: ignore\n \"in version 0.24 and will be removed in 1.1 \"\n \"(renaming of 0.26).\")\n @property\n def n_classes_(self):\n try:\n check_is_fitted(self)\n except NotFittedError as nfe:\n raise AttributeError(\n \"{} object has no n_classes_ attribute.\"\n .format(self.__class__.__name__)\n ) from nfe\n return 1", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/VerboseReporter", + "name": "VerboseReporter", + "qname": "sklearn.ensemble._gb.VerboseReporter", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.ensemble._gb/VerboseReporter/__init__", + "scikit-learn/sklearn.ensemble._gb/VerboseReporter/init", + "scikit-learn/sklearn.ensemble._gb/VerboseReporter/update" + ], + "is_public": false, + "reexported_by": [], + "description": "Reports verbose output to stdout.", + "docstring": "Reports verbose output to stdout.\n\nParameters\n----------\nverbose : int\n Verbosity level. If ``verbose==1`` output is printed once in a while\n (when iteration mod verbose_mod is zero).; if larger than 1 then output\n is printed for each update.", + "code": "class VerboseReporter:\n \"\"\"Reports verbose output to stdout.\n\n Parameters\n ----------\n verbose : int\n Verbosity level. If ``verbose==1`` output is printed once in a while\n (when iteration mod verbose_mod is zero).; if larger than 1 then output\n is printed for each update.\n \"\"\"\n def __init__(self, verbose):\n self.verbose = verbose\n\n def init(self, est, begin_at_stage=0):\n \"\"\"Initialize reporter\n\n Parameters\n ----------\n est : Estimator\n The estimator\n\n begin_at_stage : int, default=0\n stage at which to begin reporting\n \"\"\"\n # header fields and line format str\n header_fields = ['Iter', 'Train Loss']\n verbose_fmt = ['{iter:>10d}', '{train_score:>16.4f}']\n # do oob?\n if est.subsample < 1:\n header_fields.append('OOB Improve')\n verbose_fmt.append('{oob_impr:>16.4f}')\n header_fields.append('Remaining Time')\n verbose_fmt.append('{remaining_time:>16s}')\n\n # print the header line\n print(('%10s ' + '%16s ' *\n (len(header_fields) - 1)) % tuple(header_fields))\n\n self.verbose_fmt = ' '.join(verbose_fmt)\n # plot verbose info each time i % verbose_mod == 0\n self.verbose_mod = 1\n self.start_time = time()\n self.begin_at_stage = begin_at_stage\n\n def update(self, j, est):\n \"\"\"Update reporter with new iteration.\n\n Parameters\n ----------\n j : int\n The new iteration.\n est : Estimator\n The estimator.\n \"\"\"\n do_oob = est.subsample < 1\n # we need to take into account if we fit additional estimators.\n i = j - self.begin_at_stage # iteration relative to the start iter\n if (i + 1) % self.verbose_mod == 0:\n oob_impr = est.oob_improvement_[j] if do_oob else 0\n remaining_time = ((est.n_estimators - (j + 1)) *\n (time() - self.start_time) / float(i + 1))\n if remaining_time > 60:\n remaining_time = '{0:.2f}m'.format(remaining_time / 60.0)\n else:\n remaining_time = '{0:.2f}s'.format(remaining_time)\n print(self.verbose_fmt.format(iter=j + 1,\n train_score=est.train_score_[j],\n oob_impr=oob_impr,\n remaining_time=remaining_time))\n if self.verbose == 1 and ((i + 1) // (self.verbose_mod * 10) > 0):\n # adjust verbose frequency (powers of 10)\n self.verbose_mod *= 10", + "instance_attributes": [ + { + "name": "verbose_fmt", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "verbose_mod", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "begin_at_stage", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance", + "name": "BinomialDeviance", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance", + "decorators": [], + "superclasses": ["ClassificationLossFunction"], + "methods": [ + "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/__init__", + "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/init_estimator", + "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/__call__", + "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/negative_gradient", + "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/_update_terminal_region", + "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/_raw_prediction_to_proba", + "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/_raw_prediction_to_decision", + "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/get_init_raw_predictions" + ], + "is_public": false, + "reexported_by": [], + "description": "Binomial deviance loss function for binary classification.\n\nBinary classification is a special case; here, we only need to\nfit one tree instead of ``n_classes`` trees.", + "docstring": "Binomial deviance loss function for binary classification.\n\nBinary classification is a special case; here, we only need to\nfit one tree instead of ``n_classes`` trees.\n\nParameters\n----------\nn_classes : int\n Number of classes.", + "code": "class BinomialDeviance(ClassificationLossFunction):\n \"\"\"Binomial deviance loss function for binary classification.\n\n Binary classification is a special case; here, we only need to\n fit one tree instead of ``n_classes`` trees.\n\n Parameters\n ----------\n n_classes : int\n Number of classes.\n \"\"\"\n def __init__(self, n_classes):\n if n_classes != 2:\n raise ValueError(\"{0:s} requires 2 classes; got {1:d} class(es)\"\n .format(self.__class__.__name__, n_classes))\n # we only need to fit one tree for binary clf.\n super().__init__(n_classes=1)\n\n def init_estimator(self):\n # return the most common class, taking into account the samples\n # weights\n return DummyClassifier(strategy='prior')\n\n def __call__(self, y, raw_predictions, sample_weight=None):\n \"\"\"Compute the deviance (= 2 * negative log-likelihood).\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n True labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\n sample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.\n \"\"\"\n # logaddexp(0, v) == log(1.0 + exp(v))\n raw_predictions = raw_predictions.ravel()\n if sample_weight is None:\n return -2 * np.mean((y * raw_predictions) -\n np.logaddexp(0, raw_predictions))\n else:\n return (-2 / sample_weight.sum() * np.sum(\n sample_weight * ((y * raw_predictions) -\n np.logaddexp(0, raw_predictions))))\n\n def negative_gradient(self, y, raw_predictions, **kargs):\n \"\"\"Compute half of the negative gradient.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n True labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n \"\"\"\n return y - expit(raw_predictions.ravel())\n\n def _update_terminal_region(self, tree, terminal_regions, leaf, X, y,\n residual, raw_predictions, sample_weight):\n \"\"\"Make a single Newton-Raphson step.\n\n our node estimate is given by:\n\n sum(w * (y - prob)) / sum(w * prob * (1 - prob))\n\n we take advantage that: y - prob = residual\n \"\"\"\n terminal_region = np.where(terminal_regions == leaf)[0]\n residual = residual.take(terminal_region, axis=0)\n y = y.take(terminal_region, axis=0)\n sample_weight = sample_weight.take(terminal_region, axis=0)\n\n numerator = np.sum(sample_weight * residual)\n denominator = np.sum(sample_weight *\n (y - residual) * (1 - y + residual))\n\n # prevents overflow and division by zero\n if abs(denominator) < 1e-150:\n tree.value[leaf, 0, 0] = 0.0\n else:\n tree.value[leaf, 0, 0] = numerator / denominator\n\n def _raw_prediction_to_proba(self, raw_predictions):\n proba = np.ones((raw_predictions.shape[0], 2), dtype=np.float64)\n proba[:, 1] = expit(raw_predictions.ravel())\n proba[:, 0] -= proba[:, 1]\n return proba\n\n def _raw_prediction_to_decision(self, raw_predictions):\n proba = self._raw_prediction_to_proba(raw_predictions)\n return np.argmax(proba, axis=1)\n\n def get_init_raw_predictions(self, X, estimator):\n probas = estimator.predict_proba(X)\n proba_pos_class = probas[:, 1]\n eps = np.finfo(np.float32).eps\n proba_pos_class = np.clip(proba_pos_class, eps, 1 - eps)\n # log(x / (1 - x)) is the inverse of the sigmoid (expit) function\n raw_predictions = np.log(proba_pos_class / (1 - proba_pos_class))\n return raw_predictions.reshape(-1, 1).astype(np.float64)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ClassificationLossFunction", + "name": "ClassificationLossFunction", + "qname": "sklearn.ensemble._gb_losses.ClassificationLossFunction", + "decorators": [], + "superclasses": ["LossFunction"], + "methods": [ + "scikit-learn/sklearn.ensemble._gb_losses/ClassificationLossFunction/_raw_prediction_to_proba", + "scikit-learn/sklearn.ensemble._gb_losses/ClassificationLossFunction/_raw_prediction_to_decision", + "scikit-learn/sklearn.ensemble._gb_losses/ClassificationLossFunction/check_init_estimator" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for classification loss functions.", + "docstring": "Base class for classification loss functions. ", + "code": "class ClassificationLossFunction(LossFunction, metaclass=ABCMeta):\n \"\"\"Base class for classification loss functions. \"\"\"\n\n def _raw_prediction_to_proba(self, raw_predictions):\n \"\"\"Template method to convert raw predictions into probabilities.\n\n Parameters\n ----------\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\n Returns\n -------\n probas : ndarray of shape (n_samples, K)\n The predicted probabilities.\n \"\"\"\n\n @abstractmethod\n def _raw_prediction_to_decision(self, raw_predictions):\n \"\"\"Template method to convert raw predictions to decisions.\n\n Parameters\n ----------\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\n Returns\n -------\n encoded_predictions : ndarray of shape (n_samples, K)\n The predicted encoded labels.\n \"\"\"\n\n def check_init_estimator(self, estimator):\n \"\"\"Make sure estimator has fit and predict_proba methods.\n\n Parameters\n ----------\n estimator : object\n The init estimator to check.\n \"\"\"\n if not (hasattr(estimator, 'fit') and\n hasattr(estimator, 'predict_proba')):\n raise ValueError(\n \"The init parameter must be a valid estimator \"\n \"and support both fit and predict_proba.\"\n )", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss", + "name": "ExponentialLoss", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss", + "decorators": [], + "superclasses": ["ClassificationLossFunction"], + "methods": [ + "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/__init__", + "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/init_estimator", + "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/__call__", + "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/negative_gradient", + "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/_update_terminal_region", + "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/_raw_prediction_to_proba", + "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/_raw_prediction_to_decision", + "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/get_init_raw_predictions" + ], + "is_public": false, + "reexported_by": [], + "description": "Exponential loss function for binary classification.\n\nSame loss as AdaBoost.", + "docstring": "Exponential loss function for binary classification.\n\nSame loss as AdaBoost.\n\nParameters\n----------\nn_classes : int\n Number of classes.\n\nReferences\n----------\nGreg Ridgeway, Generalized Boosted Models: A guide to the gbm package, 2007", + "code": "class ExponentialLoss(ClassificationLossFunction):\n \"\"\"Exponential loss function for binary classification.\n\n Same loss as AdaBoost.\n\n Parameters\n ----------\n n_classes : int\n Number of classes.\n\n References\n ----------\n Greg Ridgeway, Generalized Boosted Models: A guide to the gbm package, 2007\n \"\"\"\n def __init__(self, n_classes):\n if n_classes != 2:\n raise ValueError(\"{0:s} requires 2 classes; got {1:d} class(es)\"\n .format(self.__class__.__name__, n_classes))\n # we only need to fit one tree for binary clf.\n super().__init__(n_classes=1)\n\n def init_estimator(self):\n return DummyClassifier(strategy='prior')\n\n def __call__(self, y, raw_predictions, sample_weight=None):\n \"\"\"Compute the exponential loss\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n True labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\n sample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.\n \"\"\"\n raw_predictions = raw_predictions.ravel()\n if sample_weight is None:\n return np.mean(np.exp(-(2. * y - 1.) * raw_predictions))\n else:\n return (1.0 / sample_weight.sum() * np.sum(\n sample_weight * np.exp(-(2 * y - 1) * raw_predictions)))\n\n def negative_gradient(self, y, raw_predictions, **kargs):\n \"\"\"Compute the residual (= negative gradient).\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n True labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n \"\"\"\n y_ = -(2. * y - 1.)\n return y_ * np.exp(y_ * raw_predictions.ravel())\n\n def _update_terminal_region(self, tree, terminal_regions, leaf, X, y,\n residual, raw_predictions, sample_weight):\n terminal_region = np.where(terminal_regions == leaf)[0]\n raw_predictions = raw_predictions.take(terminal_region, axis=0)\n y = y.take(terminal_region, axis=0)\n sample_weight = sample_weight.take(terminal_region, axis=0)\n\n y_ = 2. * y - 1.\n\n numerator = np.sum(y_ * sample_weight * np.exp(-y_ * raw_predictions))\n denominator = np.sum(sample_weight * np.exp(-y_ * raw_predictions))\n\n # prevents overflow and division by zero\n if abs(denominator) < 1e-150:\n tree.value[leaf, 0, 0] = 0.0\n else:\n tree.value[leaf, 0, 0] = numerator / denominator\n\n def _raw_prediction_to_proba(self, raw_predictions):\n proba = np.ones((raw_predictions.shape[0], 2), dtype=np.float64)\n proba[:, 1] = expit(2.0 * raw_predictions.ravel())\n proba[:, 0] -= proba[:, 1]\n return proba\n\n def _raw_prediction_to_decision(self, raw_predictions):\n return (raw_predictions.ravel() >= 0).astype(int)\n\n def get_init_raw_predictions(self, X, estimator):\n probas = estimator.predict_proba(X)\n proba_pos_class = probas[:, 1]\n eps = np.finfo(np.float32).eps\n proba_pos_class = np.clip(proba_pos_class, eps, 1 - eps)\n # according to The Elements of Statistical Learning sec. 10.5, the\n # minimizer of the exponential loss is .5 * log odds ratio. So this is\n # the equivalent to .5 * binomial_deviance.get_init_raw_predictions()\n raw_predictions = .5 * np.log(proba_pos_class / (1 - proba_pos_class))\n return raw_predictions.reshape(-1, 1).astype(np.float64)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction", + "name": "HuberLossFunction", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction", + "decorators": [], + "superclasses": ["RegressionLossFunction"], + "methods": [ + "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/__init__", + "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/init_estimator", + "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/__call__", + "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/negative_gradient", + "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/_update_terminal_region" + ], + "is_public": false, + "reexported_by": [], + "description": "Huber loss function for robust regression.\n\nM-Regression proposed in Friedman 2001.", + "docstring": "Huber loss function for robust regression.\n\nM-Regression proposed in Friedman 2001.\n\nParameters\n----------\nalpha : float, default=0.9\n Percentile at which to extract score.\n\nReferences\n----------\nJ. Friedman, Greedy Function Approximation: A Gradient Boosting\nMachine, The Annals of Statistics, Vol. 29, No. 5, 2001.", + "code": "class HuberLossFunction(RegressionLossFunction):\n \"\"\"Huber loss function for robust regression.\n\n M-Regression proposed in Friedman 2001.\n\n Parameters\n ----------\n alpha : float, default=0.9\n Percentile at which to extract score.\n\n References\n ----------\n J. Friedman, Greedy Function Approximation: A Gradient Boosting\n Machine, The Annals of Statistics, Vol. 29, No. 5, 2001.\n \"\"\"\n\n def __init__(self, alpha=0.9):\n super().__init__()\n self.alpha = alpha\n self.gamma = None\n\n def init_estimator(self):\n return DummyRegressor(strategy='quantile', quantile=.5)\n\n def __call__(self, y, raw_predictions, sample_weight=None):\n \"\"\"Compute the Huber loss.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n True labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\n sample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.\n \"\"\"\n raw_predictions = raw_predictions.ravel()\n diff = y - raw_predictions\n gamma = self.gamma\n if gamma is None:\n if sample_weight is None:\n gamma = np.percentile(np.abs(diff), self.alpha * 100)\n else:\n gamma = _weighted_percentile(np.abs(diff), sample_weight,\n self.alpha * 100)\n\n gamma_mask = np.abs(diff) <= gamma\n if sample_weight is None:\n sq_loss = np.sum(0.5 * diff[gamma_mask] ** 2)\n lin_loss = np.sum(gamma * (np.abs(diff[~gamma_mask]) -\n gamma / 2))\n loss = (sq_loss + lin_loss) / y.shape[0]\n else:\n sq_loss = np.sum(0.5 * sample_weight[gamma_mask] *\n diff[gamma_mask] ** 2)\n lin_loss = np.sum(gamma * sample_weight[~gamma_mask] *\n (np.abs(diff[~gamma_mask]) - gamma / 2))\n loss = (sq_loss + lin_loss) / sample_weight.sum()\n return loss\n\n def negative_gradient(self, y, raw_predictions, sample_weight=None,\n **kargs):\n \"\"\"Compute the negative gradient.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n The target labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n\n sample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.\n \"\"\"\n raw_predictions = raw_predictions.ravel()\n diff = y - raw_predictions\n if sample_weight is None:\n gamma = np.percentile(np.abs(diff), self.alpha * 100)\n else:\n gamma = _weighted_percentile(np.abs(diff), sample_weight,\n self.alpha * 100)\n gamma_mask = np.abs(diff) <= gamma\n residual = np.zeros((y.shape[0],), dtype=np.float64)\n residual[gamma_mask] = diff[gamma_mask]\n residual[~gamma_mask] = gamma * np.sign(diff[~gamma_mask])\n self.gamma = gamma\n return residual\n\n def _update_terminal_region(self, tree, terminal_regions, leaf, X, y,\n residual, raw_predictions, sample_weight):\n terminal_region = np.where(terminal_regions == leaf)[0]\n sample_weight = sample_weight.take(terminal_region, axis=0)\n gamma = self.gamma\n diff = (y.take(terminal_region, axis=0)\n - raw_predictions.take(terminal_region, axis=0))\n median = _weighted_percentile(diff, sample_weight, percentile=50)\n diff_minus_median = diff - median\n tree.value[leaf, 0] = median + np.mean(\n np.sign(diff_minus_median) *\n np.minimum(np.abs(diff_minus_median), gamma))", + "instance_attributes": [ + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError", + "name": "LeastAbsoluteError", + "qname": "sklearn.ensemble._gb_losses.LeastAbsoluteError", + "decorators": [], + "superclasses": ["RegressionLossFunction"], + "methods": [ + "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/init_estimator", + "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/__call__", + "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/negative_gradient", + "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/_update_terminal_region" + ], + "is_public": false, + "reexported_by": [], + "description": "Loss function for least absolute deviation (LAD) regression.", + "docstring": "Loss function for least absolute deviation (LAD) regression.\n\nParameters\n----------\nn_classes : int\n Number of classes", + "code": "class LeastAbsoluteError(RegressionLossFunction):\n \"\"\"Loss function for least absolute deviation (LAD) regression.\n\n Parameters\n ----------\n n_classes : int\n Number of classes\n \"\"\"\n def init_estimator(self):\n return DummyRegressor(strategy='quantile', quantile=.5)\n\n def __call__(self, y, raw_predictions, sample_weight=None):\n \"\"\"Compute the least absolute error.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n True labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves).\n\n sample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.\n \"\"\"\n if sample_weight is None:\n return np.abs(y - raw_predictions.ravel()).mean()\n else:\n return (1 / sample_weight.sum() * np.sum(\n sample_weight * np.abs(y - raw_predictions.ravel())))\n\n def negative_gradient(self, y, raw_predictions, **kargs):\n \"\"\"Compute the negative gradient.\n\n 1.0 if y - raw_predictions > 0.0 else -1.0\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n The target labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n \"\"\"\n raw_predictions = raw_predictions.ravel()\n return 2 * (y - raw_predictions > 0) - 1\n\n def _update_terminal_region(self, tree, terminal_regions, leaf, X, y,\n residual, raw_predictions, sample_weight):\n \"\"\"LAD updates terminal regions to median estimates.\"\"\"\n terminal_region = np.where(terminal_regions == leaf)[0]\n sample_weight = sample_weight.take(terminal_region, axis=0)\n diff = (y.take(terminal_region, axis=0) -\n raw_predictions.take(terminal_region, axis=0))\n tree.value[leaf, 0, 0] = _weighted_percentile(diff, sample_weight,\n percentile=50)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError", + "name": "LeastSquaresError", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError", + "decorators": [], + "superclasses": ["RegressionLossFunction"], + "methods": [ + "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/init_estimator", + "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/__call__", + "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/negative_gradient", + "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/update_terminal_regions", + "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/_update_terminal_region" + ], + "is_public": false, + "reexported_by": [], + "description": "Loss function for least squares (LS) estimation.\nTerminal regions do not need to be updated for least squares.", + "docstring": "Loss function for least squares (LS) estimation.\nTerminal regions do not need to be updated for least squares.\n\nParameters\n----------\nn_classes : int\n Number of classes.", + "code": "class LeastSquaresError(RegressionLossFunction):\n \"\"\"Loss function for least squares (LS) estimation.\n Terminal regions do not need to be updated for least squares.\n\n Parameters\n ----------\n n_classes : int\n Number of classes.\n \"\"\"\n\n def init_estimator(self):\n return DummyRegressor(strategy='mean')\n\n def __call__(self, y, raw_predictions, sample_weight=None):\n \"\"\"Compute the least squares loss.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n True labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves).\n\n sample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.\n \"\"\"\n if sample_weight is None:\n return np.mean((y - raw_predictions.ravel()) ** 2)\n else:\n return (1 / sample_weight.sum() * np.sum(\n sample_weight * ((y - raw_predictions.ravel()) ** 2)))\n\n def negative_gradient(self, y, raw_predictions, **kargs):\n \"\"\"Compute half of the negative gradient.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n The target labels.\n\n raw_predictions : ndarray of shape (n_samples,)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n \"\"\"\n return y - raw_predictions.ravel()\n\n def update_terminal_regions(self, tree, X, y, residual, raw_predictions,\n sample_weight, sample_mask,\n learning_rate=0.1, k=0):\n \"\"\"Least squares does not need to update terminal regions.\n\n But it has to update the predictions.\n\n Parameters\n ----------\n tree : tree.Tree\n The tree object.\n X : ndarray of shape (n_samples, n_features)\n The data array.\n y : ndarray of shape (n_samples,)\n The target labels.\n residual : ndarray of shape (n_samples,)\n The residuals (usually the negative gradient).\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n sample_weight : ndarray of shape (n,)\n The weight of each sample.\n sample_mask : ndarray of shape (n,)\n The sample mask to be used.\n learning_rate : float, default=0.1\n Learning rate shrinks the contribution of each tree by\n ``learning_rate``.\n k : int, default=0\n The index of the estimator being updated.\n \"\"\"\n # update predictions\n raw_predictions[:, k] += learning_rate * tree.predict(X).ravel()\n\n def _update_terminal_region(self, tree, terminal_regions, leaf, X, y,\n residual, raw_predictions, sample_weight):\n pass", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction", + "name": "LossFunction", + "qname": "sklearn.ensemble._gb_losses.LossFunction", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/__init__", + "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/init_estimator", + "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/__call__", + "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/negative_gradient", + "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/update_terminal_regions", + "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/_update_terminal_region", + "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/get_init_raw_predictions" + ], + "is_public": false, + "reexported_by": [], + "description": "Abstract base class for various loss functions.", + "docstring": "Abstract base class for various loss functions.\n\nParameters\n----------\nn_classes : int\n Number of classes.\n\nAttributes\n----------\nK : int\n The number of regression trees to be induced;\n 1 for regression and binary classification;\n ``n_classes`` for multi-class classification.", + "code": "class LossFunction(metaclass=ABCMeta):\n \"\"\"Abstract base class for various loss functions.\n\n Parameters\n ----------\n n_classes : int\n Number of classes.\n\n Attributes\n ----------\n K : int\n The number of regression trees to be induced;\n 1 for regression and binary classification;\n ``n_classes`` for multi-class classification.\n \"\"\"\n\n is_multi_class = False\n\n def __init__(self, n_classes):\n self.K = n_classes\n\n def init_estimator(self):\n \"\"\"Default ``init`` estimator for loss function. \"\"\"\n raise NotImplementedError()\n\n @abstractmethod\n def __call__(self, y, raw_predictions, sample_weight=None):\n \"\"\"Compute the loss.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n True labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves).\n\n sample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.\n \"\"\"\n\n @abstractmethod\n def negative_gradient(self, y, raw_predictions, **kargs):\n \"\"\"Compute the negative gradient.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n The target labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n \"\"\"\n\n def update_terminal_regions(self, tree, X, y, residual, raw_predictions,\n sample_weight, sample_mask,\n learning_rate=0.1, k=0):\n \"\"\"Update the terminal regions (=leaves) of the given tree and\n updates the current predictions of the model. Traverses tree\n and invokes template method `_update_terminal_region`.\n\n Parameters\n ----------\n tree : tree.Tree\n The tree object.\n X : ndarray of shape (n_samples, n_features)\n The data array.\n y : ndarray of shape (n_samples,)\n The target labels.\n residual : ndarray of shape (n_samples,)\n The residuals (usually the negative gradient).\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n sample_weight : ndarray of shape (n_samples,)\n The weight of each sample.\n sample_mask : ndarray of shape (n_samples,)\n The sample mask to be used.\n learning_rate : float, default=0.1\n Learning rate shrinks the contribution of each tree by\n ``learning_rate``.\n k : int, default=0\n The index of the estimator being updated.\n\n \"\"\"\n # compute leaf for each sample in ``X``.\n terminal_regions = tree.apply(X)\n\n # mask all which are not in sample mask.\n masked_terminal_regions = terminal_regions.copy()\n masked_terminal_regions[~sample_mask] = -1\n\n # update each leaf (= perform line search)\n for leaf in np.where(tree.children_left == TREE_LEAF)[0]:\n self._update_terminal_region(tree, masked_terminal_regions,\n leaf, X, y, residual,\n raw_predictions[:, k], sample_weight)\n\n # update predictions (both in-bag and out-of-bag)\n raw_predictions[:, k] += \\\n learning_rate * tree.value[:, 0, 0].take(terminal_regions, axis=0)\n\n @abstractmethod\n def _update_terminal_region(self, tree, terminal_regions, leaf, X, y,\n residual, raw_predictions, sample_weight):\n \"\"\"Template method for updating terminal regions (i.e., leaves).\"\"\"\n\n @abstractmethod\n def get_init_raw_predictions(self, X, estimator):\n \"\"\"Return the initial raw predictions.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n The data array.\n estimator : object\n The estimator to use to compute the predictions.\n\n Returns\n -------\n raw_predictions : ndarray of shape (n_samples, K)\n The initial raw predictions. K is equal to 1 for binary\n classification and regression, and equal to the number of classes\n for multiclass classification. ``raw_predictions`` is casted\n into float64.\n \"\"\"\n pass", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance", + "name": "MultinomialDeviance", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance", + "decorators": [], + "superclasses": ["ClassificationLossFunction"], + "methods": [ + "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/__init__", + "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/init_estimator", + "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/__call__", + "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/negative_gradient", + "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/_update_terminal_region", + "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/_raw_prediction_to_proba", + "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/_raw_prediction_to_decision", + "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/get_init_raw_predictions" + ], + "is_public": false, + "reexported_by": [], + "description": "Multinomial deviance loss function for multi-class classification.\n\nFor multi-class classification we need to fit ``n_classes`` trees at\neach stage.", + "docstring": "Multinomial deviance loss function for multi-class classification.\n\nFor multi-class classification we need to fit ``n_classes`` trees at\neach stage.\n\nParameters\n----------\nn_classes : int\n Number of classes.", + "code": "class MultinomialDeviance(ClassificationLossFunction):\n \"\"\"Multinomial deviance loss function for multi-class classification.\n\n For multi-class classification we need to fit ``n_classes`` trees at\n each stage.\n\n Parameters\n ----------\n n_classes : int\n Number of classes.\n \"\"\"\n\n is_multi_class = True\n\n def __init__(self, n_classes):\n if n_classes < 3:\n raise ValueError(\"{0:s} requires more than 2 classes.\".format(\n self.__class__.__name__))\n super().__init__(n_classes)\n\n def init_estimator(self):\n return DummyClassifier(strategy='prior')\n\n def __call__(self, y, raw_predictions, sample_weight=None):\n \"\"\"Compute the Multinomial deviance.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n True labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\n sample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.\n \"\"\"\n # create one-hot label encoding\n Y = np.zeros((y.shape[0], self.K), dtype=np.float64)\n for k in range(self.K):\n Y[:, k] = y == k\n\n return np.average(\n -1 * (Y * raw_predictions).sum(axis=1) +\n logsumexp(raw_predictions, axis=1),\n weights=sample_weight\n )\n\n def negative_gradient(self, y, raw_predictions, k=0, **kwargs):\n \"\"\"Compute negative gradient for the ``k``-th class.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n The target labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n\n k : int, default=0\n The index of the class.\n \"\"\"\n return y - np.nan_to_num(np.exp(raw_predictions[:, k] -\n logsumexp(raw_predictions, axis=1)))\n\n def _update_terminal_region(self, tree, terminal_regions, leaf, X, y,\n residual, raw_predictions, sample_weight):\n \"\"\"Make a single Newton-Raphson step. \"\"\"\n terminal_region = np.where(terminal_regions == leaf)[0]\n residual = residual.take(terminal_region, axis=0)\n y = y.take(terminal_region, axis=0)\n sample_weight = sample_weight.take(terminal_region, axis=0)\n\n numerator = np.sum(sample_weight * residual)\n numerator *= (self.K - 1) / self.K\n\n denominator = np.sum(sample_weight * (y - residual) *\n (1 - y + residual))\n\n # prevents overflow and division by zero\n if abs(denominator) < 1e-150:\n tree.value[leaf, 0, 0] = 0.0\n else:\n tree.value[leaf, 0, 0] = numerator / denominator\n\n def _raw_prediction_to_proba(self, raw_predictions):\n return np.nan_to_num(\n np.exp(raw_predictions -\n (logsumexp(raw_predictions, axis=1)[:, np.newaxis])))\n\n def _raw_prediction_to_decision(self, raw_predictions):\n proba = self._raw_prediction_to_proba(raw_predictions)\n return np.argmax(proba, axis=1)\n\n def get_init_raw_predictions(self, X, estimator):\n probas = estimator.predict_proba(X)\n eps = np.finfo(np.float32).eps\n probas = np.clip(probas, eps, 1 - eps)\n raw_predictions = np.log(probas).astype(np.float64)\n return raw_predictions", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction", + "name": "QuantileLossFunction", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction", + "decorators": [], + "superclasses": ["RegressionLossFunction"], + "methods": [ + "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/__init__", + "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/init_estimator", + "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/__call__", + "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/negative_gradient", + "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/_update_terminal_region" + ], + "is_public": false, + "reexported_by": [], + "description": "Loss function for quantile regression.\n\nQuantile regression allows to estimate the percentiles\nof the conditional distribution of the target.", + "docstring": "Loss function for quantile regression.\n\nQuantile regression allows to estimate the percentiles\nof the conditional distribution of the target.\n\nParameters\n----------\nalpha : float, default=0.9\n The percentile.", + "code": "class QuantileLossFunction(RegressionLossFunction):\n \"\"\"Loss function for quantile regression.\n\n Quantile regression allows to estimate the percentiles\n of the conditional distribution of the target.\n\n Parameters\n ----------\n alpha : float, default=0.9\n The percentile.\n \"\"\"\n def __init__(self, alpha=0.9):\n super().__init__()\n self.alpha = alpha\n self.percentile = alpha * 100\n\n def init_estimator(self):\n return DummyRegressor(strategy='quantile', quantile=self.alpha)\n\n def __call__(self, y, raw_predictions, sample_weight=None):\n \"\"\"Compute the Quantile loss.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n True labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\n sample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.\n \"\"\"\n raw_predictions = raw_predictions.ravel()\n diff = y - raw_predictions\n alpha = self.alpha\n\n mask = y > raw_predictions\n if sample_weight is None:\n loss = (alpha * diff[mask].sum() -\n (1 - alpha) * diff[~mask].sum()) / y.shape[0]\n else:\n loss = ((alpha * np.sum(sample_weight[mask] * diff[mask]) -\n (1 - alpha) * np.sum(sample_weight[~mask] *\n diff[~mask])) / sample_weight.sum())\n return loss\n\n def negative_gradient(self, y, raw_predictions, **kargs):\n \"\"\"Compute the negative gradient.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n The target labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n \"\"\"\n alpha = self.alpha\n raw_predictions = raw_predictions.ravel()\n mask = y > raw_predictions\n return (alpha * mask) - ((1 - alpha) * ~mask)\n\n def _update_terminal_region(self, tree, terminal_regions, leaf, X, y,\n residual, raw_predictions, sample_weight):\n terminal_region = np.where(terminal_regions == leaf)[0]\n diff = (y.take(terminal_region, axis=0)\n - raw_predictions.take(terminal_region, axis=0))\n sample_weight = sample_weight.take(terminal_region, axis=0)\n\n val = _weighted_percentile(diff, sample_weight, self.percentile)\n tree.value[leaf, 0] = val", + "instance_attributes": [ + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "percentile", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/RegressionLossFunction", + "name": "RegressionLossFunction", + "qname": "sklearn.ensemble._gb_losses.RegressionLossFunction", + "decorators": [], + "superclasses": ["LossFunction"], + "methods": [ + "scikit-learn/sklearn.ensemble._gb_losses/RegressionLossFunction/__init__", + "scikit-learn/sklearn.ensemble._gb_losses/RegressionLossFunction/check_init_estimator", + "scikit-learn/sklearn.ensemble._gb_losses/RegressionLossFunction/get_init_raw_predictions" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for regression loss functions.", + "docstring": "Base class for regression loss functions.", + "code": "class RegressionLossFunction(LossFunction, metaclass=ABCMeta):\n \"\"\"Base class for regression loss functions.\"\"\"\n def __init__(self):\n super().__init__(n_classes=1)\n\n def check_init_estimator(self, estimator):\n \"\"\"Make sure estimator has the required fit and predict methods.\n\n Parameters\n ----------\n estimator : object\n The init estimator to check.\n \"\"\"\n if not (hasattr(estimator, 'fit') and hasattr(estimator, 'predict')):\n raise ValueError(\n \"The init parameter must be a valid estimator and \"\n \"support both fit and predict.\"\n )\n\n def get_init_raw_predictions(self, X, estimator):\n predictions = estimator.predict(X)\n return predictions.reshape(-1, 1).astype(np.float64)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_BinMapper", + "name": "_BinMapper", + "qname": "sklearn.ensemble._hist_gradient_boosting.binning._BinMapper", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_BinMapper/__init__", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_BinMapper/fit", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_BinMapper/transform", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_BinMapper/make_known_categories_bitsets" + ], + "is_public": false, + "reexported_by": [], + "description": "Transformer that maps a dataset into integer-valued bins.\n\nFor continuous features, the bins are created in a feature-wise fashion,\nusing quantiles so that each bins contains approximately the same number\nof samples. For large datasets, quantiles are computed on a subset of the\ndata to speed-up the binning, but the quantiles should remain stable.\n\nFor categorical features, the raw categorical values are expected to be\nin [0, 254] (this is not validated here though) and each category\ncorresponds to a bin. All categorical values must be known at\ninitialization: transform() doesn't know how to bin unknown categorical\nvalues. Note that transform() is only used on non-training data in the\ncase of early stopping.\n\nFeatures with a small number of values may be binned into less than\n``n_bins`` bins. The last bin (at index ``n_bins - 1``) is always reserved\nfor missing values.", + "docstring": "Transformer that maps a dataset into integer-valued bins.\n\nFor continuous features, the bins are created in a feature-wise fashion,\nusing quantiles so that each bins contains approximately the same number\nof samples. For large datasets, quantiles are computed on a subset of the\ndata to speed-up the binning, but the quantiles should remain stable.\n\nFor categorical features, the raw categorical values are expected to be\nin [0, 254] (this is not validated here though) and each category\ncorresponds to a bin. All categorical values must be known at\ninitialization: transform() doesn't know how to bin unknown categorical\nvalues. Note that transform() is only used on non-training data in the\ncase of early stopping.\n\nFeatures with a small number of values may be binned into less than\n``n_bins`` bins. The last bin (at index ``n_bins - 1``) is always reserved\nfor missing values.\n\nParameters\n----------\nn_bins : int, default=256\n The maximum number of bins to use (including the bin for missing\n values). Should be in [3, 256]. Non-missing values are binned on\n ``max_bins = n_bins - 1`` bins. The last bin is always reserved for\n missing values. If for a given feature the number of unique values is\n less than ``max_bins``, then those unique values will be used to\n compute the bin thresholds, instead of the quantiles. For categorical\n features indicated by ``is_categorical``, the docstring for\n ``is_categorical`` details on this procedure.\nsubsample : int or None, default=2e5\n If ``n_samples > subsample``, then ``sub_samples`` samples will be\n randomly chosen to compute the quantiles. If ``None``, the whole data\n is used.\nis_categorical : ndarray of bool of shape (n_features,), default=None\n Indicates categorical features. By default, all features are\n considered continuous.\nknown_categories : list of {ndarray, None} of shape (n_features,), default=none\n For each categorical feature, the array indicates the set of unique\n categorical values. These should be the possible values over all the\n data, not just the training data. For continuous features, the\n corresponding entry should be None.\nrandom_state: int, RandomState instance or None, default=None\n Pseudo-random number generator to control the random sub-sampling.\n Pass an int for reproducible output across multiple\n function calls.\n See :term: `Glossary `.\n\nAttributes\n----------\nbin_thresholds_ : list of ndarray\n For each feature, each array indicates how to map a feature into a\n binned feature. The semantic and size depends on the nature of the\n feature:\n - for real-valued features, the array corresponds to the real-valued\n bin thresholds (the upper bound of each bin). There are ``max_bins\n - 1`` thresholds, where ``max_bins = n_bins - 1`` is the number of\n bins used for non-missing values.\n - for categorical features, the array is a map from a binned category\n value to the raw category value. The size of the array is equal to\n ``min(max_bins, category_cardinality)`` where we ignore missing\n values in the cardinality.\nn_bins_non_missing_ : ndarray, dtype=np.uint32\n For each feature, gives the number of bins actually used for\n non-missing values. For features with a lot of unique values, this is\n equal to ``n_bins - 1``.\nis_categorical_ : ndarray of shape (n_features,), dtype=np.uint8\n Indicator for categorical features.\nmissing_values_bin_idx_ : np.uint8\n The index of the bin where missing values are mapped. This is a\n constant across all features. This corresponds to the last bin, and\n it is always equal to ``n_bins - 1``. Note that if ``n_bins_missing_``\n is less than ``n_bins - 1`` for a given feature, then there are\n empty (and unused) bins.", + "code": "class _BinMapper(TransformerMixin, BaseEstimator):\n \"\"\"Transformer that maps a dataset into integer-valued bins.\n\n For continuous features, the bins are created in a feature-wise fashion,\n using quantiles so that each bins contains approximately the same number\n of samples. For large datasets, quantiles are computed on a subset of the\n data to speed-up the binning, but the quantiles should remain stable.\n\n For categorical features, the raw categorical values are expected to be\n in [0, 254] (this is not validated here though) and each category\n corresponds to a bin. All categorical values must be known at\n initialization: transform() doesn't know how to bin unknown categorical\n values. Note that transform() is only used on non-training data in the\n case of early stopping.\n\n Features with a small number of values may be binned into less than\n ``n_bins`` bins. The last bin (at index ``n_bins - 1``) is always reserved\n for missing values.\n\n Parameters\n ----------\n n_bins : int, default=256\n The maximum number of bins to use (including the bin for missing\n values). Should be in [3, 256]. Non-missing values are binned on\n ``max_bins = n_bins - 1`` bins. The last bin is always reserved for\n missing values. If for a given feature the number of unique values is\n less than ``max_bins``, then those unique values will be used to\n compute the bin thresholds, instead of the quantiles. For categorical\n features indicated by ``is_categorical``, the docstring for\n ``is_categorical`` details on this procedure.\n subsample : int or None, default=2e5\n If ``n_samples > subsample``, then ``sub_samples`` samples will be\n randomly chosen to compute the quantiles. If ``None``, the whole data\n is used.\n is_categorical : ndarray of bool of shape (n_features,), default=None\n Indicates categorical features. By default, all features are\n considered continuous.\n known_categories : list of {ndarray, None} of shape (n_features,), \\\n default=none\n For each categorical feature, the array indicates the set of unique\n categorical values. These should be the possible values over all the\n data, not just the training data. For continuous features, the\n corresponding entry should be None.\n random_state: int, RandomState instance or None, default=None\n Pseudo-random number generator to control the random sub-sampling.\n Pass an int for reproducible output across multiple\n function calls.\n See :term: `Glossary `.\n\n Attributes\n ----------\n bin_thresholds_ : list of ndarray\n For each feature, each array indicates how to map a feature into a\n binned feature. The semantic and size depends on the nature of the\n feature:\n - for real-valued features, the array corresponds to the real-valued\n bin thresholds (the upper bound of each bin). There are ``max_bins\n - 1`` thresholds, where ``max_bins = n_bins - 1`` is the number of\n bins used for non-missing values.\n - for categorical features, the array is a map from a binned category\n value to the raw category value. The size of the array is equal to\n ``min(max_bins, category_cardinality)`` where we ignore missing\n values in the cardinality.\n n_bins_non_missing_ : ndarray, dtype=np.uint32\n For each feature, gives the number of bins actually used for\n non-missing values. For features with a lot of unique values, this is\n equal to ``n_bins - 1``.\n is_categorical_ : ndarray of shape (n_features,), dtype=np.uint8\n Indicator for categorical features.\n missing_values_bin_idx_ : np.uint8\n The index of the bin where missing values are mapped. This is a\n constant across all features. This corresponds to the last bin, and\n it is always equal to ``n_bins - 1``. Note that if ``n_bins_missing_``\n is less than ``n_bins - 1`` for a given feature, then there are\n empty (and unused) bins.\n \"\"\"\n def __init__(self, n_bins=256, subsample=int(2e5), is_categorical=None,\n known_categories=None, random_state=None):\n self.n_bins = n_bins\n self.subsample = subsample\n self.is_categorical = is_categorical\n self.known_categories = known_categories\n self.random_state = random_state\n\n def fit(self, X, y=None):\n \"\"\"Fit data X by computing the binning thresholds.\n\n The last bin is reserved for missing values, whether missing values\n are present in the data or not.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data to bin.\n y: None\n Ignored.\n\n Returns\n -------\n self : object\n \"\"\"\n if not (3 <= self.n_bins <= 256):\n # min is 3: at least 2 distinct bins and a missing values bin\n raise ValueError('n_bins={} should be no smaller than 3 '\n 'and no larger than 256.'.format(self.n_bins))\n\n X = check_array(X, dtype=[X_DTYPE], force_all_finite=False)\n max_bins = self.n_bins - 1\n\n rng = check_random_state(self.random_state)\n if self.subsample is not None and X.shape[0] > self.subsample:\n subset = rng.choice(X.shape[0], self.subsample, replace=False)\n X = X.take(subset, axis=0)\n\n if self.is_categorical is None:\n self.is_categorical_ = np.zeros(X.shape[1], dtype=np.uint8)\n else:\n self.is_categorical_ = np.asarray(self.is_categorical,\n dtype=np.uint8)\n\n n_features = X.shape[1]\n known_categories = self.known_categories\n if known_categories is None:\n known_categories = [None] * n_features\n\n # validate is_categorical and known_categories parameters\n for f_idx in range(n_features):\n is_categorical = self.is_categorical_[f_idx]\n known_cats = known_categories[f_idx]\n if is_categorical and known_cats is None:\n raise ValueError(\n f\"Known categories for feature {f_idx} must be provided.\"\n )\n if not is_categorical and known_cats is not None:\n raise ValueError(\n f\"Feature {f_idx} isn't marked as a categorical feature, \"\n f\"but categories were passed.\"\n )\n\n self.missing_values_bin_idx_ = self.n_bins - 1\n\n self.bin_thresholds_ = []\n n_bins_non_missing = []\n\n for f_idx in range(n_features):\n if not self.is_categorical_[f_idx]:\n thresholds = _find_binning_thresholds(X[:, f_idx], max_bins)\n n_bins_non_missing.append(thresholds.shape[0] + 1)\n else:\n # Since categories are assumed to be encoded in\n # [0, n_cats] and since n_cats <= max_bins,\n # the thresholds *are* the unique categorical values. This will\n # lead to the correct mapping in transform()\n thresholds = known_categories[f_idx]\n n_bins_non_missing.append(thresholds.shape[0])\n\n self.bin_thresholds_.append(thresholds)\n\n self.n_bins_non_missing_ = np.array(n_bins_non_missing,\n dtype=np.uint32)\n return self\n\n def transform(self, X):\n \"\"\"Bin data X.\n\n Missing values will be mapped to the last bin.\n\n For categorical features, the mapping will be incorrect for unknown\n categories. Since the BinMapper is given known_categories of the\n entire training data (i.e. before the call to train_test_split() in\n case of early-stopping), this never happens.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data to bin.\n\n Returns\n -------\n X_binned : array-like of shape (n_samples, n_features)\n The binned data (fortran-aligned).\n \"\"\"\n X = check_array(X, dtype=[X_DTYPE], force_all_finite=False)\n check_is_fitted(self)\n if X.shape[1] != self.n_bins_non_missing_.shape[0]:\n raise ValueError(\n 'This estimator was fitted with {} features but {} got passed '\n 'to transform()'.format(self.n_bins_non_missing_.shape[0],\n X.shape[1])\n )\n binned = np.zeros_like(X, dtype=X_BINNED_DTYPE, order='F')\n _map_to_bins(X, self.bin_thresholds_, self.missing_values_bin_idx_,\n binned)\n return binned\n\n def make_known_categories_bitsets(self):\n \"\"\"Create bitsets of known categories.\n\n Returns\n -------\n - known_cat_bitsets : ndarray of shape (n_categorical_features, 8)\n Array of bitsets of known categories, for each categorical feature.\n - f_idx_map : ndarray of shape (n_features,)\n Map from original feature index to the corresponding index in the\n known_cat_bitsets array.\n \"\"\"\n\n categorical_features_indices = np.flatnonzero(self.is_categorical_)\n\n n_features = self.is_categorical_.size\n n_categorical_features = categorical_features_indices.size\n\n f_idx_map = np.zeros(n_features, dtype=np.uint32)\n f_idx_map[categorical_features_indices] = np.arange(\n n_categorical_features, dtype=np.uint32)\n\n known_categories = self.bin_thresholds_\n\n known_cat_bitsets = np.zeros((n_categorical_features, 8),\n dtype=X_BITSET_INNER_DTYPE)\n\n # TODO: complexity is O(n_categorical_features * 255). Maybe this is\n # worth cythonizing\n for mapped_f_idx, f_idx in enumerate(categorical_features_indices):\n for raw_cat_val in known_categories[f_idx]:\n set_bitset_memoryview(known_cat_bitsets[mapped_f_idx],\n raw_cat_val)\n\n return known_cat_bitsets, f_idx_map", + "instance_attributes": [ + { + "name": "n_bins", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "subsample", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "is_categorical_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "missing_values_bin_idx_", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "bin_thresholds_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "n_bins_non_missing_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting", + "name": "BaseHistGradientBoosting", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting", + "decorators": [], + "superclasses": ["BaseEstimator", "ABC"], + "methods": [ + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/__init__", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_validate_parameters", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_categories", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/fit", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_is_fitted", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_clear_state", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_get_small_trainset", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_early_stopping_scorer", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_early_stopping_loss", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_should_stop", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_bin_data", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_print_iteration_stats", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_raw_predict", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_predict_iterations", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_staged_raw_predict", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_compute_partial_dependence_recursion", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_more_tags", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_get_loss", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_encode_y", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/n_iter_@getter" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for histogram-based gradient boosting estimators.", + "docstring": "Base class for histogram-based gradient boosting estimators.", + "code": "class BaseHistGradientBoosting(BaseEstimator, ABC):\n \"\"\"Base class for histogram-based gradient boosting estimators.\"\"\"\n\n @abstractmethod\n def __init__(self, loss, *, learning_rate, max_iter, max_leaf_nodes,\n max_depth, min_samples_leaf, l2_regularization, max_bins,\n categorical_features, monotonic_cst,\n warm_start, early_stopping, scoring,\n validation_fraction, n_iter_no_change, tol, verbose,\n random_state):\n self.loss = loss\n self.learning_rate = learning_rate\n self.max_iter = max_iter\n self.max_leaf_nodes = max_leaf_nodes\n self.max_depth = max_depth\n self.min_samples_leaf = min_samples_leaf\n self.l2_regularization = l2_regularization\n self.max_bins = max_bins\n self.monotonic_cst = monotonic_cst\n self.categorical_features = categorical_features\n self.warm_start = warm_start\n self.early_stopping = early_stopping\n self.scoring = scoring\n self.validation_fraction = validation_fraction\n self.n_iter_no_change = n_iter_no_change\n self.tol = tol\n self.verbose = verbose\n self.random_state = random_state\n\n def _validate_parameters(self):\n \"\"\"Validate parameters passed to __init__.\n\n The parameters that are directly passed to the grower are checked in\n TreeGrower.\"\"\"\n\n if (self.loss not in self._VALID_LOSSES and\n not isinstance(self.loss, BaseLoss)):\n raise ValueError(\n \"Loss {} is not supported for {}. Accepted losses: \"\n \"{}.\".format(self.loss, self.__class__.__name__,\n ', '.join(self._VALID_LOSSES)))\n\n if self.learning_rate <= 0:\n raise ValueError('learning_rate={} must '\n 'be strictly positive'.format(self.learning_rate))\n if self.max_iter < 1:\n raise ValueError('max_iter={} must not be smaller '\n 'than 1.'.format(self.max_iter))\n if self.n_iter_no_change < 0:\n raise ValueError('n_iter_no_change={} must be '\n 'positive.'.format(self.n_iter_no_change))\n if (self.validation_fraction is not None and\n self.validation_fraction <= 0):\n raise ValueError(\n 'validation_fraction={} must be strictly '\n 'positive, or None.'.format(self.validation_fraction))\n if self.tol is not None and self.tol < 0:\n raise ValueError('tol={} '\n 'must not be smaller than 0.'.format(self.tol))\n\n if not (2 <= self.max_bins <= 255):\n raise ValueError('max_bins={} should be no smaller than 2 '\n 'and no larger than 255.'.format(self.max_bins))\n\n if self.monotonic_cst is not None and self.n_trees_per_iteration_ != 1:\n raise ValueError(\n 'monotonic constraints are not supported for '\n 'multiclass classification.'\n )\n\n def _check_categories(self, X):\n \"\"\"Check and validate categorical features in X\n\n Return\n ------\n is_categorical : ndarray of shape (n_features,) or None, dtype=bool\n Indicates whether a feature is categorical. If no feature is\n categorical, this is None.\n known_categories : list of size n_features or None\n The list contains, for each feature:\n - an array of shape (n_categories,) with the unique cat values\n - None if the feature is not categorical\n None if no feature is categorical.\n \"\"\"\n if self.categorical_features is None:\n return None, None\n\n categorical_features = np.asarray(self.categorical_features)\n\n if categorical_features.size == 0:\n return None, None\n\n if categorical_features.dtype.kind not in ('i', 'b'):\n raise ValueError(\"categorical_features must be an array-like of \"\n \"bools or array-like of ints.\")\n\n n_features = X.shape[1]\n\n # check for categorical features as indices\n if categorical_features.dtype.kind == 'i':\n if (np.max(categorical_features) >= n_features\n or np.min(categorical_features) < 0):\n raise ValueError(\"categorical_features set as integer \"\n \"indices must be in [0, n_features - 1]\")\n is_categorical = np.zeros(n_features, dtype=bool)\n is_categorical[categorical_features] = True\n else:\n if categorical_features.shape[0] != n_features:\n raise ValueError(\"categorical_features set as a boolean mask \"\n \"must have shape (n_features,), got: \"\n f\"{categorical_features.shape}\")\n is_categorical = categorical_features\n\n if not np.any(is_categorical):\n return None, None\n\n # compute the known categories in the training data. We need to do\n # that here instead of in the BinMapper because in case of early\n # stopping, the mapper only gets a fraction of the training data.\n known_categories = []\n\n for f_idx in range(n_features):\n if is_categorical[f_idx]:\n categories = np.unique(X[:, f_idx])\n missing = np.isnan(categories)\n if missing.any():\n categories = categories[~missing]\n\n if categories.size > self.max_bins:\n raise ValueError(\n f\"Categorical feature at index {f_idx} is \"\n f\"expected to have a \"\n f\"cardinality <= {self.max_bins}\"\n )\n\n if (categories >= self.max_bins).any():\n raise ValueError(\n f\"Categorical feature at index {f_idx} is \"\n f\"expected to be encoded with \"\n f\"values < {self.max_bins}\"\n )\n else:\n categories = None\n known_categories.append(categories)\n\n return is_categorical, known_categories\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the gradient boosting model.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input samples.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n sample_weight : array-like of shape (n_samples,) default=None\n Weights of training data.\n\n .. versionadded:: 0.23\n\n Returns\n -------\n self : object\n \"\"\"\n fit_start_time = time()\n acc_find_split_time = 0. # time spent finding the best splits\n acc_apply_split_time = 0. # time spent splitting nodes\n acc_compute_hist_time = 0. # time spent computing histograms\n # time spent predicting X for gradient and hessians update\n acc_prediction_time = 0.\n X, y = self._validate_data(X, y, dtype=[X_DTYPE],\n force_all_finite=False)\n y = self._encode_y(y)\n check_consistent_length(X, y)\n # Do not create unit sample weights by default to later skip some\n # computation\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X,\n dtype=np.float64)\n # TODO: remove when PDP suports sample weights\n self._fitted_with_sw = True\n\n rng = check_random_state(self.random_state)\n\n # When warm starting, we want to re-use the same seed that was used\n # the first time fit was called (e.g. for subsampling or for the\n # train/val split).\n if not (self.warm_start and self._is_fitted()):\n self._random_seed = rng.randint(np.iinfo(np.uint32).max,\n dtype='u8')\n\n self._validate_parameters()\n\n # used for validation in predict\n n_samples, self._n_features = X.shape\n\n self.is_categorical_, known_categories = self._check_categories(X)\n\n # we need this stateful variable to tell raw_predict() that it was\n # called from fit() (this current method), and that the data it has\n # received is pre-binned.\n # predicting is faster on pre-binned data, so we want early stopping\n # predictions to be made on pre-binned data. Unfortunately the _scorer\n # can only call predict() or predict_proba(), not raw_predict(), and\n # there's no way to tell the scorer that it needs to predict binned\n # data.\n self._in_fit = True\n\n if isinstance(self.loss, str):\n self._loss = self._get_loss(sample_weight=sample_weight)\n elif isinstance(self.loss, BaseLoss):\n self._loss = self.loss\n\n if self.early_stopping == 'auto':\n self.do_early_stopping_ = n_samples > 10000\n else:\n self.do_early_stopping_ = self.early_stopping\n\n # create validation data if needed\n self._use_validation_data = self.validation_fraction is not None\n if self.do_early_stopping_ and self._use_validation_data:\n # stratify for classification\n stratify = y if hasattr(self._loss, 'predict_proba') else None\n\n # Save the state of the RNG for the training and validation split.\n # This is needed in order to have the same split when using\n # warm starting.\n\n if sample_weight is None:\n X_train, X_val, y_train, y_val = train_test_split(\n X, y, test_size=self.validation_fraction,\n stratify=stratify,\n random_state=self._random_seed)\n sample_weight_train = sample_weight_val = None\n else:\n # TODO: incorporate sample_weight in sampling here, as well as\n # stratify\n (X_train, X_val, y_train, y_val, sample_weight_train,\n sample_weight_val) = train_test_split(\n X, y, sample_weight, test_size=self.validation_fraction,\n stratify=stratify,\n random_state=self._random_seed)\n else:\n X_train, y_train, sample_weight_train = X, y, sample_weight\n X_val = y_val = sample_weight_val = None\n\n # Bin the data\n # For ease of use of the API, the user-facing GBDT classes accept the\n # parameter max_bins, which doesn't take into account the bin for\n # missing values (which is always allocated). However, since max_bins\n # isn't the true maximal number of bins, all other private classes\n # (binmapper, histbuilder...) accept n_bins instead, which is the\n # actual total number of bins. Everywhere in the code, the\n # convention is that n_bins == max_bins + 1\n n_bins = self.max_bins + 1 # + 1 for missing values\n self._bin_mapper = _BinMapper(\n n_bins=n_bins,\n is_categorical=self.is_categorical_,\n known_categories=known_categories,\n random_state=self._random_seed)\n X_binned_train = self._bin_data(X_train, is_training_data=True)\n if X_val is not None:\n X_binned_val = self._bin_data(X_val, is_training_data=False)\n else:\n X_binned_val = None\n\n # Uses binned data to check for missing values\n has_missing_values = (\n X_binned_train == self._bin_mapper.missing_values_bin_idx_).any(\n axis=0).astype(np.uint8)\n\n if self.verbose:\n print(\"Fitting gradient boosted rounds:\")\n\n n_samples = X_binned_train.shape[0]\n\n # First time calling fit, or no warm start\n if not (self._is_fitted() and self.warm_start):\n # Clear random state and score attributes\n self._clear_state()\n\n # initialize raw_predictions: those are the accumulated values\n # predicted by the trees for the training data. raw_predictions has\n # shape (n_trees_per_iteration, n_samples) where\n # n_trees_per_iterations is n_classes in multiclass classification,\n # else 1.\n self._baseline_prediction = self._loss.get_baseline_prediction(\n y_train, sample_weight_train, self.n_trees_per_iteration_\n )\n raw_predictions = np.zeros(\n shape=(self.n_trees_per_iteration_, n_samples),\n dtype=self._baseline_prediction.dtype\n )\n raw_predictions += self._baseline_prediction\n\n # predictors is a matrix (list of lists) of TreePredictor objects\n # with shape (n_iter_, n_trees_per_iteration)\n self._predictors = predictors = []\n\n # Initialize structures and attributes related to early stopping\n self._scorer = None # set if scoring != loss\n raw_predictions_val = None # set if scoring == loss and use val\n self.train_score_ = []\n self.validation_score_ = []\n\n if self.do_early_stopping_:\n # populate train_score and validation_score with the\n # predictions of the initial model (before the first tree)\n\n if self.scoring == 'loss':\n # we're going to compute scoring w.r.t the loss. As losses\n # take raw predictions as input (unlike the scorers), we\n # can optimize a bit and avoid repeating computing the\n # predictions of the previous trees. We'll re-use\n # raw_predictions (as it's needed for training anyway) for\n # evaluating the training loss, and create\n # raw_predictions_val for storing the raw predictions of\n # the validation data.\n\n if self._use_validation_data:\n raw_predictions_val = np.zeros(\n shape=(self.n_trees_per_iteration_,\n X_binned_val.shape[0]),\n dtype=self._baseline_prediction.dtype\n )\n\n raw_predictions_val += self._baseline_prediction\n\n self._check_early_stopping_loss(raw_predictions, y_train,\n sample_weight_train,\n raw_predictions_val, y_val,\n sample_weight_val)\n else:\n self._scorer = check_scoring(self, self.scoring)\n # _scorer is a callable with signature (est, X, y) and\n # calls est.predict() or est.predict_proba() depending on\n # its nature.\n # Unfortunately, each call to _scorer() will compute\n # the predictions of all the trees. So we use a subset of\n # the training set to compute train scores.\n\n # Compute the subsample set\n (X_binned_small_train,\n y_small_train,\n sample_weight_small_train) = self._get_small_trainset(\n X_binned_train, y_train, sample_weight_train,\n self._random_seed)\n\n self._check_early_stopping_scorer(\n X_binned_small_train, y_small_train,\n sample_weight_small_train,\n X_binned_val, y_val, sample_weight_val,\n )\n begin_at_stage = 0\n\n # warm start: this is not the first time fit was called\n else:\n # Check that the maximum number of iterations is not smaller\n # than the number of iterations from the previous fit\n if self.max_iter < self.n_iter_:\n raise ValueError(\n 'max_iter=%d must be larger than or equal to '\n 'n_iter_=%d when warm_start==True'\n % (self.max_iter, self.n_iter_)\n )\n\n # Convert array attributes to lists\n self.train_score_ = self.train_score_.tolist()\n self.validation_score_ = self.validation_score_.tolist()\n\n # Compute raw predictions\n raw_predictions = self._raw_predict(X_binned_train)\n if self.do_early_stopping_ and self._use_validation_data:\n raw_predictions_val = self._raw_predict(X_binned_val)\n else:\n raw_predictions_val = None\n\n if self.do_early_stopping_ and self.scoring != 'loss':\n # Compute the subsample set\n (X_binned_small_train,\n y_small_train,\n sample_weight_small_train) = self._get_small_trainset(\n X_binned_train, y_train, sample_weight_train,\n self._random_seed)\n\n # Get the predictors from the previous fit\n predictors = self._predictors\n\n begin_at_stage = self.n_iter_\n\n # initialize gradients and hessians (empty arrays).\n # shape = (n_trees_per_iteration, n_samples).\n gradients, hessians = self._loss.init_gradients_and_hessians(\n n_samples=n_samples,\n prediction_dim=self.n_trees_per_iteration_,\n sample_weight=sample_weight_train\n )\n\n for iteration in range(begin_at_stage, self.max_iter):\n\n if self.verbose:\n iteration_start_time = time()\n print(\"[{}/{}] \".format(iteration + 1, self.max_iter),\n end='', flush=True)\n\n # Update gradients and hessians, inplace\n self._loss.update_gradients_and_hessians(gradients, hessians,\n y_train, raw_predictions,\n sample_weight_train)\n\n # Append a list since there may be more than 1 predictor per iter\n predictors.append([])\n\n # Build `n_trees_per_iteration` trees.\n for k in range(self.n_trees_per_iteration_):\n grower = TreeGrower(\n X_binned_train, gradients[k, :], hessians[k, :],\n n_bins=n_bins,\n n_bins_non_missing=self._bin_mapper.n_bins_non_missing_,\n has_missing_values=has_missing_values,\n is_categorical=self.is_categorical_,\n monotonic_cst=self.monotonic_cst,\n max_leaf_nodes=self.max_leaf_nodes,\n max_depth=self.max_depth,\n min_samples_leaf=self.min_samples_leaf,\n l2_regularization=self.l2_regularization,\n shrinkage=self.learning_rate)\n grower.grow()\n\n acc_apply_split_time += grower.total_apply_split_time\n acc_find_split_time += grower.total_find_split_time\n acc_compute_hist_time += grower.total_compute_hist_time\n\n if self._loss.need_update_leaves_values:\n self._loss.update_leaves_values(grower, y_train,\n raw_predictions[k, :],\n sample_weight_train)\n\n predictor = grower.make_predictor(\n binning_thresholds=self._bin_mapper.bin_thresholds_\n )\n predictors[-1].append(predictor)\n\n # Update raw_predictions with the predictions of the newly\n # created tree.\n tic_pred = time()\n _update_raw_predictions(raw_predictions[k, :], grower)\n toc_pred = time()\n acc_prediction_time += toc_pred - tic_pred\n\n should_early_stop = False\n if self.do_early_stopping_:\n if self.scoring == 'loss':\n # Update raw_predictions_val with the newest tree(s)\n if self._use_validation_data:\n for k, pred in enumerate(self._predictors[-1]):\n raw_predictions_val[k, :] += (\n pred.predict_binned(\n X_binned_val,\n self._bin_mapper.missing_values_bin_idx_\n )\n )\n\n should_early_stop = self._check_early_stopping_loss(\n raw_predictions, y_train, sample_weight_train,\n raw_predictions_val, y_val, sample_weight_val\n )\n\n else:\n should_early_stop = self._check_early_stopping_scorer(\n X_binned_small_train, y_small_train,\n sample_weight_small_train,\n X_binned_val, y_val, sample_weight_val\n )\n\n if self.verbose:\n self._print_iteration_stats(iteration_start_time)\n\n # maybe we could also early stop if all the trees are stumps?\n if should_early_stop:\n break\n\n if self.verbose:\n duration = time() - fit_start_time\n n_total_leaves = sum(\n predictor.get_n_leaf_nodes()\n for predictors_at_ith_iteration in self._predictors\n for predictor in predictors_at_ith_iteration\n )\n n_predictors = sum(\n len(predictors_at_ith_iteration)\n for predictors_at_ith_iteration in self._predictors)\n print(\"Fit {} trees in {:.3f} s, ({} total leaves)\".format(\n n_predictors, duration, n_total_leaves))\n print(\"{:<32} {:.3f}s\".format('Time spent computing histograms:',\n acc_compute_hist_time))\n print(\"{:<32} {:.3f}s\".format('Time spent finding best splits:',\n acc_find_split_time))\n print(\"{:<32} {:.3f}s\".format('Time spent applying splits:',\n acc_apply_split_time))\n print(\"{:<32} {:.3f}s\".format('Time spent predicting:',\n acc_prediction_time))\n\n self.train_score_ = np.asarray(self.train_score_)\n self.validation_score_ = np.asarray(self.validation_score_)\n del self._in_fit # hard delete so we're sure it can't be used anymore\n return self\n\n def _is_fitted(self):\n return len(getattr(self, '_predictors', [])) > 0\n\n def _clear_state(self):\n \"\"\"Clear the state of the gradient boosting model.\"\"\"\n for var in ('train_score_', 'validation_score_'):\n if hasattr(self, var):\n delattr(self, var)\n\n def _get_small_trainset(self, X_binned_train, y_train, sample_weight_train,\n seed):\n \"\"\"Compute the indices of the subsample set and return this set.\n\n For efficiency, we need to subsample the training set to compute scores\n with scorers.\n \"\"\"\n # TODO: incorporate sample_weights here in `resample`\n subsample_size = 10000\n if X_binned_train.shape[0] > subsample_size:\n indices = np.arange(X_binned_train.shape[0])\n stratify = y_train if is_classifier(self) else None\n indices = resample(indices, n_samples=subsample_size,\n replace=False, random_state=seed,\n stratify=stratify)\n X_binned_small_train = X_binned_train[indices]\n y_small_train = y_train[indices]\n if sample_weight_train is not None:\n sample_weight_small_train = sample_weight_train[indices]\n else:\n sample_weight_small_train = None\n X_binned_small_train = np.ascontiguousarray(X_binned_small_train)\n return (X_binned_small_train, y_small_train,\n sample_weight_small_train)\n else:\n return X_binned_train, y_train, sample_weight_train\n\n def _check_early_stopping_scorer(self, X_binned_small_train, y_small_train,\n sample_weight_small_train,\n X_binned_val, y_val, sample_weight_val):\n \"\"\"Check if fitting should be early-stopped based on scorer.\n\n Scores are computed on validation data or on training data.\n \"\"\"\n if is_classifier(self):\n y_small_train = self.classes_[y_small_train.astype(int)]\n\n if sample_weight_small_train is None:\n self.train_score_.append(\n self._scorer(self, X_binned_small_train, y_small_train)\n )\n else:\n self.train_score_.append(\n self._scorer(self, X_binned_small_train, y_small_train,\n sample_weight=sample_weight_small_train)\n )\n\n if self._use_validation_data:\n if is_classifier(self):\n y_val = self.classes_[y_val.astype(int)]\n if sample_weight_val is None:\n self.validation_score_.append(\n self._scorer(self, X_binned_val, y_val)\n )\n else:\n self.validation_score_.append(\n self._scorer(self, X_binned_val, y_val,\n sample_weight=sample_weight_val)\n )\n return self._should_stop(self.validation_score_)\n else:\n return self._should_stop(self.train_score_)\n\n def _check_early_stopping_loss(self,\n raw_predictions,\n y_train,\n sample_weight_train,\n raw_predictions_val,\n y_val,\n sample_weight_val):\n \"\"\"Check if fitting should be early-stopped based on loss.\n\n Scores are computed on validation data or on training data.\n \"\"\"\n\n self.train_score_.append(\n -self._loss(y_train, raw_predictions, sample_weight_train)\n )\n\n if self._use_validation_data:\n self.validation_score_.append(\n -self._loss(y_val, raw_predictions_val, sample_weight_val)\n )\n return self._should_stop(self.validation_score_)\n else:\n return self._should_stop(self.train_score_)\n\n def _should_stop(self, scores):\n \"\"\"\n Return True (do early stopping) if the last n scores aren't better\n than the (n-1)th-to-last score, up to some tolerance.\n \"\"\"\n reference_position = self.n_iter_no_change + 1\n if len(scores) < reference_position:\n return False\n\n # A higher score is always better. Higher tol means that it will be\n # harder for subsequent iteration to be considered an improvement upon\n # the reference score, and therefore it is more likely to early stop\n # because of the lack of significant improvement.\n tol = 0 if self.tol is None else self.tol\n reference_score = scores[-reference_position] + tol\n recent_scores = scores[-reference_position + 1:]\n recent_improvements = [score > reference_score\n for score in recent_scores]\n return not any(recent_improvements)\n\n def _bin_data(self, X, is_training_data):\n \"\"\"Bin data X.\n\n If is_training_data, then fit the _bin_mapper attribute.\n Else, the binned data is converted to a C-contiguous array.\n \"\"\"\n\n description = 'training' if is_training_data else 'validation'\n if self.verbose:\n print(\"Binning {:.3f} GB of {} data: \".format(\n X.nbytes / 1e9, description), end=\"\", flush=True)\n tic = time()\n if is_training_data:\n X_binned = self._bin_mapper.fit_transform(X) # F-aligned array\n else:\n X_binned = self._bin_mapper.transform(X) # F-aligned array\n # We convert the array to C-contiguous since predicting is faster\n # with this layout (training is faster on F-arrays though)\n X_binned = np.ascontiguousarray(X_binned)\n toc = time()\n if self.verbose:\n duration = toc - tic\n print(\"{:.3f} s\".format(duration))\n\n return X_binned\n\n def _print_iteration_stats(self, iteration_start_time):\n \"\"\"Print info about the current fitting iteration.\"\"\"\n log_msg = ''\n\n predictors_of_ith_iteration = [\n predictors_list for predictors_list in self._predictors[-1]\n if predictors_list\n ]\n n_trees = len(predictors_of_ith_iteration)\n max_depth = max(predictor.get_max_depth()\n for predictor in predictors_of_ith_iteration)\n n_leaves = sum(predictor.get_n_leaf_nodes()\n for predictor in predictors_of_ith_iteration)\n\n if n_trees == 1:\n log_msg += (\"{} tree, {} leaves, \".format(n_trees, n_leaves))\n else:\n log_msg += (\"{} trees, {} leaves \".format(n_trees, n_leaves))\n log_msg += (\"({} on avg), \".format(int(n_leaves / n_trees)))\n\n log_msg += \"max depth = {}, \".format(max_depth)\n\n if self.do_early_stopping_:\n if self.scoring == 'loss':\n factor = -1 # score_ arrays contain the negative loss\n name = 'loss'\n else:\n factor = 1\n name = 'score'\n log_msg += \"train {}: {:.5f}, \".format(name, factor *\n self.train_score_[-1])\n if self._use_validation_data:\n log_msg += \"val {}: {:.5f}, \".format(\n name, factor * self.validation_score_[-1])\n\n iteration_time = time() - iteration_start_time\n log_msg += \"in {:0.3f}s\".format(iteration_time)\n\n print(log_msg)\n\n def _raw_predict(self, X):\n \"\"\"Return the sum of the leaves values over all predictors.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input samples.\n\n Returns\n -------\n raw_predictions : array, shape (n_trees_per_iteration, n_samples)\n The raw predicted values.\n \"\"\"\n is_binned = getattr(self, '_in_fit', False)\n dtype = X_BINNED_DTYPE if is_binned else X_DTYPE\n X = check_array(X, dtype=dtype, force_all_finite=False)\n check_is_fitted(self)\n if X.shape[1] != self._n_features:\n raise ValueError(\n 'X has {} features but this estimator was trained with '\n '{} features.'.format(X.shape[1], self._n_features)\n )\n n_samples = X.shape[0]\n raw_predictions = np.zeros(\n shape=(self.n_trees_per_iteration_, n_samples),\n dtype=self._baseline_prediction.dtype\n )\n raw_predictions += self._baseline_prediction\n self._predict_iterations(\n X, self._predictors, raw_predictions, is_binned\n )\n return raw_predictions\n\n def _predict_iterations(self, X, predictors, raw_predictions, is_binned):\n \"\"\"Add the predictions of the predictors to raw_predictions.\"\"\"\n if not is_binned:\n known_cat_bitsets, f_idx_map = (\n self._bin_mapper.make_known_categories_bitsets())\n\n for predictors_of_ith_iteration in predictors:\n for k, predictor in enumerate(predictors_of_ith_iteration):\n if is_binned:\n predict = partial(\n predictor.predict_binned,\n missing_values_bin_idx=self._bin_mapper.missing_values_bin_idx_ # noqa\n )\n else:\n predict = partial(\n predictor.predict,\n known_cat_bitsets=known_cat_bitsets,\n f_idx_map=f_idx_map)\n raw_predictions[k, :] += predict(X)\n\n def _staged_raw_predict(self, X):\n \"\"\"Compute raw predictions of ``X`` for each iteration.\n\n This method allows monitoring (i.e. determine error on testing set)\n after each stage.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input samples.\n\n Yields\n -------\n raw_predictions : generator of ndarray of shape \\\n (n_trees_per_iteration, n_samples)\n The raw predictions of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n \"\"\"\n X = check_array(X, dtype=X_DTYPE, force_all_finite=False)\n check_is_fitted(self)\n if X.shape[1] != self._n_features:\n raise ValueError(\n 'X has {} features but this estimator was trained with '\n '{} features.'.format(X.shape[1], self._n_features)\n )\n n_samples = X.shape[0]\n raw_predictions = np.zeros(\n shape=(self.n_trees_per_iteration_, n_samples),\n dtype=self._baseline_prediction.dtype\n )\n raw_predictions += self._baseline_prediction\n for iteration in range(len(self._predictors)):\n self._predict_iterations(\n X,\n self._predictors[iteration:iteration + 1],\n raw_predictions,\n is_binned=False\n )\n yield raw_predictions.copy()\n\n def _compute_partial_dependence_recursion(self, grid, target_features):\n \"\"\"Fast partial dependence computation.\n\n Parameters\n ----------\n grid : ndarray, shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\n target_features : ndarray, shape (n_target_features)\n The set of target features for which the partial dependence\n should be evaluated.\n\n Returns\n -------\n averaged_predictions : ndarray, shape \\\n (n_trees_per_iteration, n_samples)\n The value of the partial dependence function on each grid point.\n \"\"\"\n\n if getattr(self, '_fitted_with_sw', False):\n raise NotImplementedError(\"{} does not support partial dependence \"\n \"plots with the 'recursion' method when \"\n \"sample weights were given during fit \"\n \"time.\".format(self.__class__.__name__))\n\n grid = np.asarray(grid, dtype=X_DTYPE, order='C')\n averaged_predictions = np.zeros(\n (self.n_trees_per_iteration_, grid.shape[0]), dtype=Y_DTYPE)\n\n for predictors_of_ith_iteration in self._predictors:\n for k, predictor in enumerate(predictors_of_ith_iteration):\n predictor.compute_partial_dependence(grid, target_features,\n averaged_predictions[k])\n # Note that the learning rate is already accounted for in the leaves\n # values.\n\n return averaged_predictions\n\n def _more_tags(self):\n return {'allow_nan': True}\n\n @abstractmethod\n def _get_loss(self, sample_weight):\n pass\n\n @abstractmethod\n def _encode_y(self, y=None):\n pass\n\n @property\n def n_iter_(self):\n check_is_fitted(self)\n return len(self._predictors)", + "instance_attributes": [ + { + "name": "_fitted_with_sw", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "_in_fit", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "_bin_mapper", + "types": { + "kind": "NamedType", + "name": "_BinMapper" + } + }, + { + "name": "_predictors", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "train_score_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "validation_score_", + "types": { + "kind": "NamedType", + "name": "list" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier", + "name": "HistGradientBoostingClassifier", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier", + "decorators": [], + "superclasses": ["ClassifierMixin", "BaseHistGradientBoosting"], + "methods": [ + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/__init__", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/predict", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/staged_predict", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/predict_proba", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/staged_predict_proba", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/decision_function", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/staged_decision_function", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/_encode_y", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/_get_loss" + ], + "is_public": false, + "reexported_by": [], + "description": "Histogram-based Gradient Boosting Classification Tree.\n\nThis estimator is much faster than\n:class:`GradientBoostingClassifier`\nfor big datasets (n_samples >= 10 000).\n\nThis estimator has native support for missing values (NaNs). During\ntraining, the tree grower learns at each split point whether samples\nwith missing values should go to the left or right child, based on the\npotential gain. When predicting, samples with missing values are\nassigned to the left or right child consequently. If no missing values\nwere encountered for a given feature during training, then samples with\nmissing values are mapped to whichever child has the most samples.\n\nThis implementation is inspired by\n`LightGBM `_.\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_hist_gradient_boosting``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_hist_gradient_boosting # noqa\n >>> # now you can import normally from ensemble\n >>> from sklearn.ensemble import HistGradientBoostingClassifier\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.21", + "docstring": "Histogram-based Gradient Boosting Classification Tree.\n\nThis estimator is much faster than\n:class:`GradientBoostingClassifier`\nfor big datasets (n_samples >= 10 000).\n\nThis estimator has native support for missing values (NaNs). During\ntraining, the tree grower learns at each split point whether samples\nwith missing values should go to the left or right child, based on the\npotential gain. When predicting, samples with missing values are\nassigned to the left or right child consequently. If no missing values\nwere encountered for a given feature during training, then samples with\nmissing values are mapped to whichever child has the most samples.\n\nThis implementation is inspired by\n`LightGBM `_.\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_hist_gradient_boosting``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_hist_gradient_boosting # noqa\n >>> # now you can import normally from ensemble\n >>> from sklearn.ensemble import HistGradientBoostingClassifier\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.21\n\nParameters\n----------\nloss : {'auto', 'binary_crossentropy', 'categorical_crossentropy'}, default='auto'\n The loss function to use in the boosting process. 'binary_crossentropy'\n (also known as logistic loss) is used for binary classification and\n generalizes to 'categorical_crossentropy' for multiclass\n classification. 'auto' will automatically choose either loss depending\n on the nature of the problem.\nlearning_rate : float, default=0.1\n The learning rate, also known as *shrinkage*. This is used as a\n multiplicative factor for the leaves values. Use ``1`` for no\n shrinkage.\nmax_iter : int, default=100\n The maximum number of iterations of the boosting process, i.e. the\n maximum number of trees for binary classification. For multiclass\n classification, `n_classes` trees per iteration are built.\nmax_leaf_nodes : int or None, default=31\n The maximum number of leaves for each tree. Must be strictly greater\n than 1. If None, there is no maximum limit.\nmax_depth : int or None, default=None\n The maximum depth of each tree. The depth of a tree is the number of\n edges to go from the root to the deepest leaf.\n Depth isn't constrained by default.\nmin_samples_leaf : int, default=20\n The minimum number of samples per leaf. For small datasets with less\n than a few hundred samples, it is recommended to lower this value\n since only very shallow trees would be built.\nl2_regularization : float, default=0\n The L2 regularization parameter. Use 0 for no regularization.\nmax_bins : int, default=255\n The maximum number of bins to use for non-missing values. Before\n training, each feature of the input array `X` is binned into\n integer-valued bins, which allows for a much faster training stage.\n Features with a small number of unique values may use less than\n ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin\n is always reserved for missing values. Must be no larger than 255.\ncategorical_features : array-like of {bool, int} of shape (n_features) or shape (n_categorical_features,), default=None.\n Indicates the categorical features.\n\n - None : no feature will be considered categorical.\n - boolean array-like : boolean mask indicating categorical features.\n - integer array-like : integer indices indicating categorical\n features.\n\n For each categorical feature, there must be at most `max_bins` unique\n categories, and each categorical value must be in [0, max_bins -1].\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.24\n\nmonotonic_cst : array-like of int of shape (n_features), default=None\n Indicates the monotonic constraint to enforce on each feature. -1, 1\n and 0 respectively correspond to a negative constraint, positive\n constraint and no constraint. Read more in the :ref:`User Guide\n `.\n\n .. versionadded:: 0.23\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble. For results to be valid, the\n estimator should be re-trained on the same data only.\n See :term:`the Glossary `.\nearly_stopping : 'auto' or bool, default='auto'\n If 'auto', early stopping is enabled if the sample size is larger than\n 10000. If True, early stopping is enabled, otherwise early stopping is\n disabled.\n\n .. versionadded:: 0.23\n\nscoring : str or callable or None, default='loss'\n Scoring parameter to use for early stopping. It can be a single\n string (see :ref:`scoring_parameter`) or a callable (see\n :ref:`scoring`). If None, the estimator's default scorer\n is used. If ``scoring='loss'``, early stopping is checked\n w.r.t the loss value. Only used if early stopping is performed.\nvalidation_fraction : int or float or None, default=0.1\n Proportion (or absolute size) of training data to set aside as\n validation data for early stopping. If None, early stopping is done on\n the training data. Only used if early stopping is performed.\nn_iter_no_change : int, default=10\n Used to determine when to \"early stop\". The fitting process is\n stopped when none of the last ``n_iter_no_change`` scores are better\n than the ``n_iter_no_change - 1`` -th-to-last one, up to some\n tolerance. Only used if early stopping is performed.\ntol : float or None, default=1e-7\n The absolute tolerance to use when comparing scores. The higher the\n tolerance, the more likely we are to early stop: higher tolerance\n means that it will be harder for subsequent iterations to be\n considered an improvement upon the reference score.\nverbose : int, default=0\n The verbosity level. If not zero, print some information about the\n fitting process.\nrandom_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the subsampling in the\n binning process, and the train/validation data split if early stopping\n is enabled.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nclasses_ : array, shape = (n_classes,)\n Class labels.\ndo_early_stopping_ : bool\n Indicates whether early stopping is used during training.\nn_iter_ : int\n The number of iterations as selected by early stopping, depending on\n the `early_stopping` parameter. Otherwise it corresponds to max_iter.\nn_trees_per_iteration_ : int\n The number of tree that are built at each iteration. This is equal to 1\n for binary classification, and to ``n_classes`` for multiclass\n classification.\ntrain_score_ : ndarray, shape (n_iter_+1,)\n The scores at each iteration on the training data. The first entry\n is the score of the ensemble before the first iteration. Scores are\n computed according to the ``scoring`` parameter. If ``scoring`` is\n not 'loss', scores are computed on a subset of at most 10 000\n samples. Empty if no early stopping.\nvalidation_score_ : ndarray, shape (n_iter_+1,)\n The scores at each iteration on the held-out validation data. The\n first entry is the score of the ensemble before the first iteration.\n Scores are computed according to the ``scoring`` parameter. Empty if\n no early stopping or if ``validation_fraction`` is None.\nis_categorical_ : ndarray, shape (n_features, ) or None\n Boolean mask for the categorical features. ``None`` if there are no\n categorical features.\n\nExamples\n--------\n>>> # To use this experimental feature, we need to explicitly ask for it:\n>>> from sklearn.experimental import enable_hist_gradient_boosting # noqa\n>>> from sklearn.ensemble import HistGradientBoostingClassifier\n>>> from sklearn.datasets import load_iris\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = HistGradientBoostingClassifier().fit(X, y)\n>>> clf.score(X, y)\n1.0", + "code": "class HistGradientBoostingClassifier(ClassifierMixin,\n BaseHistGradientBoosting):\n \"\"\"Histogram-based Gradient Boosting Classification Tree.\n\n This estimator is much faster than\n :class:`GradientBoostingClassifier`\n for big datasets (n_samples >= 10 000).\n\n This estimator has native support for missing values (NaNs). During\n training, the tree grower learns at each split point whether samples\n with missing values should go to the left or right child, based on the\n potential gain. When predicting, samples with missing values are\n assigned to the left or right child consequently. If no missing values\n were encountered for a given feature during training, then samples with\n missing values are mapped to whichever child has the most samples.\n\n This implementation is inspired by\n `LightGBM `_.\n\n .. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_hist_gradient_boosting``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_hist_gradient_boosting # noqa\n >>> # now you can import normally from ensemble\n >>> from sklearn.ensemble import HistGradientBoostingClassifier\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.21\n\n Parameters\n ----------\n loss : {'auto', 'binary_crossentropy', 'categorical_crossentropy'}, \\\n default='auto'\n The loss function to use in the boosting process. 'binary_crossentropy'\n (also known as logistic loss) is used for binary classification and\n generalizes to 'categorical_crossentropy' for multiclass\n classification. 'auto' will automatically choose either loss depending\n on the nature of the problem.\n learning_rate : float, default=0.1\n The learning rate, also known as *shrinkage*. This is used as a\n multiplicative factor for the leaves values. Use ``1`` for no\n shrinkage.\n max_iter : int, default=100\n The maximum number of iterations of the boosting process, i.e. the\n maximum number of trees for binary classification. For multiclass\n classification, `n_classes` trees per iteration are built.\n max_leaf_nodes : int or None, default=31\n The maximum number of leaves for each tree. Must be strictly greater\n than 1. If None, there is no maximum limit.\n max_depth : int or None, default=None\n The maximum depth of each tree. The depth of a tree is the number of\n edges to go from the root to the deepest leaf.\n Depth isn't constrained by default.\n min_samples_leaf : int, default=20\n The minimum number of samples per leaf. For small datasets with less\n than a few hundred samples, it is recommended to lower this value\n since only very shallow trees would be built.\n l2_regularization : float, default=0\n The L2 regularization parameter. Use 0 for no regularization.\n max_bins : int, default=255\n The maximum number of bins to use for non-missing values. Before\n training, each feature of the input array `X` is binned into\n integer-valued bins, which allows for a much faster training stage.\n Features with a small number of unique values may use less than\n ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin\n is always reserved for missing values. Must be no larger than 255.\n categorical_features : array-like of {bool, int} of shape (n_features) \\\n or shape (n_categorical_features,), default=None.\n Indicates the categorical features.\n\n - None : no feature will be considered categorical.\n - boolean array-like : boolean mask indicating categorical features.\n - integer array-like : integer indices indicating categorical\n features.\n\n For each categorical feature, there must be at most `max_bins` unique\n categories, and each categorical value must be in [0, max_bins -1].\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.24\n\n monotonic_cst : array-like of int of shape (n_features), default=None\n Indicates the monotonic constraint to enforce on each feature. -1, 1\n and 0 respectively correspond to a negative constraint, positive\n constraint and no constraint. Read more in the :ref:`User Guide\n `.\n\n .. versionadded:: 0.23\n\n warm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble. For results to be valid, the\n estimator should be re-trained on the same data only.\n See :term:`the Glossary `.\n early_stopping : 'auto' or bool, default='auto'\n If 'auto', early stopping is enabled if the sample size is larger than\n 10000. If True, early stopping is enabled, otherwise early stopping is\n disabled.\n\n .. versionadded:: 0.23\n\n scoring : str or callable or None, default='loss'\n Scoring parameter to use for early stopping. It can be a single\n string (see :ref:`scoring_parameter`) or a callable (see\n :ref:`scoring`). If None, the estimator's default scorer\n is used. If ``scoring='loss'``, early stopping is checked\n w.r.t the loss value. Only used if early stopping is performed.\n validation_fraction : int or float or None, default=0.1\n Proportion (or absolute size) of training data to set aside as\n validation data for early stopping. If None, early stopping is done on\n the training data. Only used if early stopping is performed.\n n_iter_no_change : int, default=10\n Used to determine when to \"early stop\". The fitting process is\n stopped when none of the last ``n_iter_no_change`` scores are better\n than the ``n_iter_no_change - 1`` -th-to-last one, up to some\n tolerance. Only used if early stopping is performed.\n tol : float or None, default=1e-7\n The absolute tolerance to use when comparing scores. The higher the\n tolerance, the more likely we are to early stop: higher tolerance\n means that it will be harder for subsequent iterations to be\n considered an improvement upon the reference score.\n verbose : int, default=0\n The verbosity level. If not zero, print some information about the\n fitting process.\n random_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the subsampling in the\n binning process, and the train/validation data split if early stopping\n is enabled.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Attributes\n ----------\n classes_ : array, shape = (n_classes,)\n Class labels.\n do_early_stopping_ : bool\n Indicates whether early stopping is used during training.\n n_iter_ : int\n The number of iterations as selected by early stopping, depending on\n the `early_stopping` parameter. Otherwise it corresponds to max_iter.\n n_trees_per_iteration_ : int\n The number of tree that are built at each iteration. This is equal to 1\n for binary classification, and to ``n_classes`` for multiclass\n classification.\n train_score_ : ndarray, shape (n_iter_+1,)\n The scores at each iteration on the training data. The first entry\n is the score of the ensemble before the first iteration. Scores are\n computed according to the ``scoring`` parameter. If ``scoring`` is\n not 'loss', scores are computed on a subset of at most 10 000\n samples. Empty if no early stopping.\n validation_score_ : ndarray, shape (n_iter_+1,)\n The scores at each iteration on the held-out validation data. The\n first entry is the score of the ensemble before the first iteration.\n Scores are computed according to the ``scoring`` parameter. Empty if\n no early stopping or if ``validation_fraction`` is None.\n is_categorical_ : ndarray, shape (n_features, ) or None\n Boolean mask for the categorical features. ``None`` if there are no\n categorical features.\n\n Examples\n --------\n >>> # To use this experimental feature, we need to explicitly ask for it:\n >>> from sklearn.experimental import enable_hist_gradient_boosting # noqa\n >>> from sklearn.ensemble import HistGradientBoostingClassifier\n >>> from sklearn.datasets import load_iris\n >>> X, y = load_iris(return_X_y=True)\n >>> clf = HistGradientBoostingClassifier().fit(X, y)\n >>> clf.score(X, y)\n 1.0\n \"\"\"\n\n _VALID_LOSSES = ('binary_crossentropy', 'categorical_crossentropy',\n 'auto')\n\n @_deprecate_positional_args\n def __init__(self, loss='auto', *, learning_rate=0.1, max_iter=100,\n max_leaf_nodes=31, max_depth=None, min_samples_leaf=20,\n l2_regularization=0., max_bins=255,\n categorical_features=None, monotonic_cst=None,\n warm_start=False, early_stopping='auto', scoring='loss',\n validation_fraction=0.1, n_iter_no_change=10, tol=1e-7,\n verbose=0, random_state=None):\n super(HistGradientBoostingClassifier, self).__init__(\n loss=loss, learning_rate=learning_rate, max_iter=max_iter,\n max_leaf_nodes=max_leaf_nodes, max_depth=max_depth,\n min_samples_leaf=min_samples_leaf,\n l2_regularization=l2_regularization, max_bins=max_bins,\n categorical_features=categorical_features,\n monotonic_cst=monotonic_cst,\n warm_start=warm_start,\n early_stopping=early_stopping, scoring=scoring,\n validation_fraction=validation_fraction,\n n_iter_no_change=n_iter_no_change, tol=tol, verbose=verbose,\n random_state=random_state)\n\n def predict(self, X):\n \"\"\"Predict classes for X.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n The input samples.\n\n Returns\n -------\n y : ndarray, shape (n_samples,)\n The predicted classes.\n \"\"\"\n # TODO: This could be done in parallel\n encoded_classes = np.argmax(self.predict_proba(X), axis=1)\n return self.classes_[encoded_classes]\n\n def staged_predict(self, X):\n \"\"\"Predict classes at each iteration.\n\n This method allows monitoring (i.e. determine error on testing set)\n after each stage.\n\n .. versionadded:: 0.24\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input samples.\n\n Yields\n -------\n y : generator of ndarray of shape (n_samples,)\n The predicted classes of the input samples, for each iteration.\n \"\"\"\n for proba in self.staged_predict_proba(X):\n encoded_classes = np.argmax(proba, axis=1)\n yield self.classes_.take(encoded_classes, axis=0)\n\n def predict_proba(self, X):\n \"\"\"Predict class probabilities for X.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n The input samples.\n\n Returns\n -------\n p : ndarray, shape (n_samples, n_classes)\n The class probabilities of the input samples.\n \"\"\"\n raw_predictions = self._raw_predict(X)\n return self._loss.predict_proba(raw_predictions)\n\n def staged_predict_proba(self, X):\n \"\"\"Predict class probabilities at each iteration.\n\n This method allows monitoring (i.e. determine error on testing set)\n after each stage.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input samples.\n\n Yields\n -------\n y : generator of ndarray of shape (n_samples,)\n The predicted class probabilities of the input samples,\n for each iteration.\n \"\"\"\n for raw_predictions in self._staged_raw_predict(X):\n yield self._loss.predict_proba(raw_predictions)\n\n def decision_function(self, X):\n \"\"\"Compute the decision function of ``X``.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n The input samples.\n\n Returns\n -------\n decision : ndarray, shape (n_samples,) or \\\n (n_samples, n_trees_per_iteration)\n The raw predicted values (i.e. the sum of the trees leaves) for\n each sample. n_trees_per_iteration is equal to the number of\n classes in multiclass classification.\n \"\"\"\n decision = self._raw_predict(X)\n if decision.shape[0] == 1:\n decision = decision.ravel()\n return decision.T\n\n def staged_decision_function(self, X):\n \"\"\"Compute decision function of ``X`` for each iteration.\n\n This method allows monitoring (i.e. determine error on testing set)\n after each stage.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input samples.\n\n Yields\n -------\n decision : generator of ndarray of shape (n_samples,) or \\\n (n_samples, n_trees_per_iteration)\n The decision function of the input samples, which corresponds to\n the raw values predicted from the trees of the ensemble . The\n classes corresponds to that in the attribute :term:`classes_`.\n \"\"\"\n for staged_decision in self._staged_raw_predict(X):\n if staged_decision.shape[0] == 1:\n staged_decision = staged_decision.ravel()\n yield staged_decision.T\n\n def _encode_y(self, y):\n # encode classes into 0 ... n_classes - 1 and sets attributes classes_\n # and n_trees_per_iteration_\n check_classification_targets(y)\n\n label_encoder = LabelEncoder()\n encoded_y = label_encoder.fit_transform(y)\n self.classes_ = label_encoder.classes_\n n_classes = self.classes_.shape[0]\n # only 1 tree for binary classification. For multiclass classification,\n # we build 1 tree per class.\n self.n_trees_per_iteration_ = 1 if n_classes <= 2 else n_classes\n encoded_y = encoded_y.astype(Y_DTYPE, copy=False)\n return encoded_y\n\n def _get_loss(self, sample_weight):\n if (self.loss == 'categorical_crossentropy' and\n self.n_trees_per_iteration_ == 1):\n raise ValueError(\"'categorical_crossentropy' is not suitable for \"\n \"a binary classification problem. Please use \"\n \"'auto' or 'binary_crossentropy' instead.\")\n\n if self.loss == 'auto':\n if self.n_trees_per_iteration_ == 1:\n return _LOSSES['binary_crossentropy'](\n sample_weight=sample_weight)\n else:\n return _LOSSES['categorical_crossentropy'](\n sample_weight=sample_weight)\n\n return _LOSSES[self.loss](sample_weight=sample_weight)", + "instance_attributes": [ + { + "name": "n_trees_per_iteration_", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor", + "name": "HistGradientBoostingRegressor", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor", + "decorators": [], + "superclasses": ["RegressorMixin", "BaseHistGradientBoosting"], + "methods": [ + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/__init__", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/predict", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/staged_predict", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/_encode_y", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/_get_loss" + ], + "is_public": false, + "reexported_by": [], + "description": "Histogram-based Gradient Boosting Regression Tree.\n\nThis estimator is much faster than\n:class:`GradientBoostingRegressor`\nfor big datasets (n_samples >= 10 000).\n\nThis estimator has native support for missing values (NaNs). During\ntraining, the tree grower learns at each split point whether samples\nwith missing values should go to the left or right child, based on the\npotential gain. When predicting, samples with missing values are\nassigned to the left or right child consequently. If no missing values\nwere encountered for a given feature during training, then samples with\nmissing values are mapped to whichever child has the most samples.\n\nThis implementation is inspired by\n`LightGBM `_.\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_hist_gradient_boosting``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_hist_gradient_boosting # noqa\n >>> # now you can import normally from ensemble\n >>> from sklearn.ensemble import HistGradientBoostingRegressor\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.21", + "docstring": "Histogram-based Gradient Boosting Regression Tree.\n\nThis estimator is much faster than\n:class:`GradientBoostingRegressor`\nfor big datasets (n_samples >= 10 000).\n\nThis estimator has native support for missing values (NaNs). During\ntraining, the tree grower learns at each split point whether samples\nwith missing values should go to the left or right child, based on the\npotential gain. When predicting, samples with missing values are\nassigned to the left or right child consequently. If no missing values\nwere encountered for a given feature during training, then samples with\nmissing values are mapped to whichever child has the most samples.\n\nThis implementation is inspired by\n`LightGBM `_.\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_hist_gradient_boosting``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_hist_gradient_boosting # noqa\n >>> # now you can import normally from ensemble\n >>> from sklearn.ensemble import HistGradientBoostingRegressor\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.21\n\nParameters\n----------\nloss : {'least_squares', 'least_absolute_deviation', 'poisson'}, default='least_squares'\n The loss function to use in the boosting process. Note that the\n \"least squares\" and \"poisson\" losses actually implement\n \"half least squares loss\" and \"half poisson deviance\" to simplify the\n computation of the gradient. Furthermore, \"poisson\" loss internally\n uses a log-link and requires ``y >= 0``\n\n .. versionchanged:: 0.23\n Added option 'poisson'.\n\nlearning_rate : float, default=0.1\n The learning rate, also known as *shrinkage*. This is used as a\n multiplicative factor for the leaves values. Use ``1`` for no\n shrinkage.\nmax_iter : int, default=100\n The maximum number of iterations of the boosting process, i.e. the\n maximum number of trees.\nmax_leaf_nodes : int or None, default=31\n The maximum number of leaves for each tree. Must be strictly greater\n than 1. If None, there is no maximum limit.\nmax_depth : int or None, default=None\n The maximum depth of each tree. The depth of a tree is the number of\n edges to go from the root to the deepest leaf.\n Depth isn't constrained by default.\nmin_samples_leaf : int, default=20\n The minimum number of samples per leaf. For small datasets with less\n than a few hundred samples, it is recommended to lower this value\n since only very shallow trees would be built.\nl2_regularization : float, default=0\n The L2 regularization parameter. Use ``0`` for no regularization\n (default).\nmax_bins : int, default=255\n The maximum number of bins to use for non-missing values. Before\n training, each feature of the input array `X` is binned into\n integer-valued bins, which allows for a much faster training stage.\n Features with a small number of unique values may use less than\n ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin\n is always reserved for missing values. Must be no larger than 255.\ncategorical_features : array-like of {bool, int} of shape (n_features) or shape (n_categorical_features,), default=None.\n Indicates the categorical features.\n\n - None : no feature will be considered categorical.\n - boolean array-like : boolean mask indicating categorical features.\n - integer array-like : integer indices indicating categorical\n features.\n\n For each categorical feature, there must be at most `max_bins` unique\n categories, and each categorical value must be in [0, max_bins -1].\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.24\n\nmonotonic_cst : array-like of int of shape (n_features), default=None\n Indicates the monotonic constraint to enforce on each feature. -1, 1\n and 0 respectively correspond to a negative constraint, positive\n constraint and no constraint. Read more in the :ref:`User Guide\n `.\n\n .. versionadded:: 0.23\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble. For results to be valid, the\n estimator should be re-trained on the same data only.\n See :term:`the Glossary `.\nearly_stopping : 'auto' or bool, default='auto'\n If 'auto', early stopping is enabled if the sample size is larger than\n 10000. If True, early stopping is enabled, otherwise early stopping is\n disabled.\n\n .. versionadded:: 0.23\n\nscoring : str or callable or None, default='loss'\n Scoring parameter to use for early stopping. It can be a single\n string (see :ref:`scoring_parameter`) or a callable (see\n :ref:`scoring`). If None, the estimator's default scorer is used. If\n ``scoring='loss'``, early stopping is checked w.r.t the loss value.\n Only used if early stopping is performed.\nvalidation_fraction : int or float or None, default=0.1\n Proportion (or absolute size) of training data to set aside as\n validation data for early stopping. If None, early stopping is done on\n the training data. Only used if early stopping is performed.\nn_iter_no_change : int, default=10\n Used to determine when to \"early stop\". The fitting process is\n stopped when none of the last ``n_iter_no_change`` scores are better\n than the ``n_iter_no_change - 1`` -th-to-last one, up to some\n tolerance. Only used if early stopping is performed.\ntol : float or None, default=1e-7\n The absolute tolerance to use when comparing scores during early\n stopping. The higher the tolerance, the more likely we are to early\n stop: higher tolerance means that it will be harder for subsequent\n iterations to be considered an improvement upon the reference score.\nverbose : int, default=0\n The verbosity level. If not zero, print some information about the\n fitting process.\nrandom_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the subsampling in the\n binning process, and the train/validation data split if early stopping\n is enabled.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\ndo_early_stopping_ : bool\n Indicates whether early stopping is used during training.\nn_iter_ : int\n The number of iterations as selected by early stopping, depending on\n the `early_stopping` parameter. Otherwise it corresponds to max_iter.\nn_trees_per_iteration_ : int\n The number of tree that are built at each iteration. For regressors,\n this is always 1.\ntrain_score_ : ndarray, shape (n_iter_+1,)\n The scores at each iteration on the training data. The first entry\n is the score of the ensemble before the first iteration. Scores are\n computed according to the ``scoring`` parameter. If ``scoring`` is\n not 'loss', scores are computed on a subset of at most 10 000\n samples. Empty if no early stopping.\nvalidation_score_ : ndarray, shape (n_iter_+1,)\n The scores at each iteration on the held-out validation data. The\n first entry is the score of the ensemble before the first iteration.\n Scores are computed according to the ``scoring`` parameter. Empty if\n no early stopping or if ``validation_fraction`` is None.\nis_categorical_ : ndarray, shape (n_features, ) or None\n Boolean mask for the categorical features. ``None`` if there are no\n categorical features.\n\nExamples\n--------\n>>> # To use this experimental feature, we need to explicitly ask for it:\n>>> from sklearn.experimental import enable_hist_gradient_boosting # noqa\n>>> from sklearn.ensemble import HistGradientBoostingRegressor\n>>> from sklearn.datasets import load_diabetes\n>>> X, y = load_diabetes(return_X_y=True)\n>>> est = HistGradientBoostingRegressor().fit(X, y)\n>>> est.score(X, y)\n0.92...", + "code": "class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting):\n \"\"\"Histogram-based Gradient Boosting Regression Tree.\n\n This estimator is much faster than\n :class:`GradientBoostingRegressor`\n for big datasets (n_samples >= 10 000).\n\n This estimator has native support for missing values (NaNs). During\n training, the tree grower learns at each split point whether samples\n with missing values should go to the left or right child, based on the\n potential gain. When predicting, samples with missing values are\n assigned to the left or right child consequently. If no missing values\n were encountered for a given feature during training, then samples with\n missing values are mapped to whichever child has the most samples.\n\n This implementation is inspired by\n `LightGBM `_.\n\n .. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_hist_gradient_boosting``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_hist_gradient_boosting # noqa\n >>> # now you can import normally from ensemble\n >>> from sklearn.ensemble import HistGradientBoostingRegressor\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.21\n\n Parameters\n ----------\n loss : {'least_squares', 'least_absolute_deviation', 'poisson'}, \\\n default='least_squares'\n The loss function to use in the boosting process. Note that the\n \"least squares\" and \"poisson\" losses actually implement\n \"half least squares loss\" and \"half poisson deviance\" to simplify the\n computation of the gradient. Furthermore, \"poisson\" loss internally\n uses a log-link and requires ``y >= 0``\n\n .. versionchanged:: 0.23\n Added option 'poisson'.\n\n learning_rate : float, default=0.1\n The learning rate, also known as *shrinkage*. This is used as a\n multiplicative factor for the leaves values. Use ``1`` for no\n shrinkage.\n max_iter : int, default=100\n The maximum number of iterations of the boosting process, i.e. the\n maximum number of trees.\n max_leaf_nodes : int or None, default=31\n The maximum number of leaves for each tree. Must be strictly greater\n than 1. If None, there is no maximum limit.\n max_depth : int or None, default=None\n The maximum depth of each tree. The depth of a tree is the number of\n edges to go from the root to the deepest leaf.\n Depth isn't constrained by default.\n min_samples_leaf : int, default=20\n The minimum number of samples per leaf. For small datasets with less\n than a few hundred samples, it is recommended to lower this value\n since only very shallow trees would be built.\n l2_regularization : float, default=0\n The L2 regularization parameter. Use ``0`` for no regularization\n (default).\n max_bins : int, default=255\n The maximum number of bins to use for non-missing values. Before\n training, each feature of the input array `X` is binned into\n integer-valued bins, which allows for a much faster training stage.\n Features with a small number of unique values may use less than\n ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin\n is always reserved for missing values. Must be no larger than 255.\n categorical_features : array-like of {bool, int} of shape (n_features) \\\n or shape (n_categorical_features,), default=None.\n Indicates the categorical features.\n\n - None : no feature will be considered categorical.\n - boolean array-like : boolean mask indicating categorical features.\n - integer array-like : integer indices indicating categorical\n features.\n\n For each categorical feature, there must be at most `max_bins` unique\n categories, and each categorical value must be in [0, max_bins -1].\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.24\n\n monotonic_cst : array-like of int of shape (n_features), default=None\n Indicates the monotonic constraint to enforce on each feature. -1, 1\n and 0 respectively correspond to a negative constraint, positive\n constraint and no constraint. Read more in the :ref:`User Guide\n `.\n\n .. versionadded:: 0.23\n\n warm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble. For results to be valid, the\n estimator should be re-trained on the same data only.\n See :term:`the Glossary `.\n early_stopping : 'auto' or bool, default='auto'\n If 'auto', early stopping is enabled if the sample size is larger than\n 10000. If True, early stopping is enabled, otherwise early stopping is\n disabled.\n\n .. versionadded:: 0.23\n\n scoring : str or callable or None, default='loss'\n Scoring parameter to use for early stopping. It can be a single\n string (see :ref:`scoring_parameter`) or a callable (see\n :ref:`scoring`). If None, the estimator's default scorer is used. If\n ``scoring='loss'``, early stopping is checked w.r.t the loss value.\n Only used if early stopping is performed.\n validation_fraction : int or float or None, default=0.1\n Proportion (or absolute size) of training data to set aside as\n validation data for early stopping. If None, early stopping is done on\n the training data. Only used if early stopping is performed.\n n_iter_no_change : int, default=10\n Used to determine when to \"early stop\". The fitting process is\n stopped when none of the last ``n_iter_no_change`` scores are better\n than the ``n_iter_no_change - 1`` -th-to-last one, up to some\n tolerance. Only used if early stopping is performed.\n tol : float or None, default=1e-7\n The absolute tolerance to use when comparing scores during early\n stopping. The higher the tolerance, the more likely we are to early\n stop: higher tolerance means that it will be harder for subsequent\n iterations to be considered an improvement upon the reference score.\n verbose : int, default=0\n The verbosity level. If not zero, print some information about the\n fitting process.\n random_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the subsampling in the\n binning process, and the train/validation data split if early stopping\n is enabled.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Attributes\n ----------\n do_early_stopping_ : bool\n Indicates whether early stopping is used during training.\n n_iter_ : int\n The number of iterations as selected by early stopping, depending on\n the `early_stopping` parameter. Otherwise it corresponds to max_iter.\n n_trees_per_iteration_ : int\n The number of tree that are built at each iteration. For regressors,\n this is always 1.\n train_score_ : ndarray, shape (n_iter_+1,)\n The scores at each iteration on the training data. The first entry\n is the score of the ensemble before the first iteration. Scores are\n computed according to the ``scoring`` parameter. If ``scoring`` is\n not 'loss', scores are computed on a subset of at most 10 000\n samples. Empty if no early stopping.\n validation_score_ : ndarray, shape (n_iter_+1,)\n The scores at each iteration on the held-out validation data. The\n first entry is the score of the ensemble before the first iteration.\n Scores are computed according to the ``scoring`` parameter. Empty if\n no early stopping or if ``validation_fraction`` is None.\n is_categorical_ : ndarray, shape (n_features, ) or None\n Boolean mask for the categorical features. ``None`` if there are no\n categorical features.\n\n Examples\n --------\n >>> # To use this experimental feature, we need to explicitly ask for it:\n >>> from sklearn.experimental import enable_hist_gradient_boosting # noqa\n >>> from sklearn.ensemble import HistGradientBoostingRegressor\n >>> from sklearn.datasets import load_diabetes\n >>> X, y = load_diabetes(return_X_y=True)\n >>> est = HistGradientBoostingRegressor().fit(X, y)\n >>> est.score(X, y)\n 0.92...\n \"\"\"\n\n _VALID_LOSSES = ('least_squares', 'least_absolute_deviation',\n 'poisson')\n\n @_deprecate_positional_args\n def __init__(self, loss='least_squares', *, learning_rate=0.1,\n max_iter=100, max_leaf_nodes=31, max_depth=None,\n min_samples_leaf=20, l2_regularization=0., max_bins=255,\n categorical_features=None, monotonic_cst=None,\n warm_start=False, early_stopping='auto',\n scoring='loss', validation_fraction=0.1,\n n_iter_no_change=10, tol=1e-7,\n verbose=0, random_state=None):\n super(HistGradientBoostingRegressor, self).__init__(\n loss=loss, learning_rate=learning_rate, max_iter=max_iter,\n max_leaf_nodes=max_leaf_nodes, max_depth=max_depth,\n min_samples_leaf=min_samples_leaf,\n l2_regularization=l2_regularization, max_bins=max_bins,\n monotonic_cst=monotonic_cst,\n categorical_features=categorical_features,\n early_stopping=early_stopping,\n warm_start=warm_start, scoring=scoring,\n validation_fraction=validation_fraction,\n n_iter_no_change=n_iter_no_change, tol=tol, verbose=verbose,\n random_state=random_state)\n\n def predict(self, X):\n \"\"\"Predict values for X.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n The input samples.\n\n Returns\n -------\n y : ndarray, shape (n_samples,)\n The predicted values.\n \"\"\"\n check_is_fitted(self)\n # Return inverse link of raw predictions after converting\n # shape (n_samples, 1) to (n_samples,)\n return self._loss.inverse_link_function(self._raw_predict(X).ravel())\n\n def staged_predict(self, X):\n \"\"\"Predict regression target for each iteration\n\n This method allows monitoring (i.e. determine error on testing set)\n after each stage.\n\n .. versionadded:: 0.24\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input samples.\n\n Yields\n -------\n y : generator of ndarray of shape (n_samples,)\n The predicted values of the input samples, for each iteration.\n \"\"\"\n for raw_predictions in self._staged_raw_predict(X):\n yield self._loss.inverse_link_function(raw_predictions.ravel())\n\n def _encode_y(self, y):\n # Just convert y to the expected dtype\n self.n_trees_per_iteration_ = 1\n y = y.astype(Y_DTYPE, copy=False)\n if self.loss == 'poisson':\n # Ensure y >= 0 and sum(y) > 0\n if not (np.all(y >= 0) and np.sum(y) > 0):\n raise ValueError(\"loss='poisson' requires non-negative y and \"\n \"sum(y) > 0.\")\n return y\n\n def _get_loss(self, sample_weight):\n return _LOSSES[self.loss](sample_weight=sample_weight)", + "instance_attributes": [ + { + "name": "n_trees_per_iteration_", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower", + "name": "TreeGrower", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/__init__", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_validate_parameters", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/grow", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_apply_shrinkage", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_intilialize_root", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_compute_best_split_and_push", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/split_next", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_finalize_leaf", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_finalize_splittable_nodes", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/make_predictor" + ], + "is_public": false, + "reexported_by": [], + "description": "Tree grower class used to build a tree.\n\nThe tree is fitted to predict the values of a Newton-Raphson step. The\nsplits are considered in a best-first fashion, and the quality of a\nsplit is defined in splitting._split_gain.", + "docstring": "Tree grower class used to build a tree.\n\nThe tree is fitted to predict the values of a Newton-Raphson step. The\nsplits are considered in a best-first fashion, and the quality of a\nsplit is defined in splitting._split_gain.\n\nParameters\n----------\nX_binned : ndarray of shape (n_samples, n_features), dtype=np.uint8\n The binned input samples. Must be Fortran-aligned.\ngradients : ndarray of shape (n_samples,)\n The gradients of each training sample. Those are the gradients of the\n loss w.r.t the predictions, evaluated at iteration ``i - 1``.\nhessians : ndarray of shape (n_samples,)\n The hessians of each training sample. Those are the hessians of the\n loss w.r.t the predictions, evaluated at iteration ``i - 1``.\nmax_leaf_nodes : int, default=None\n The maximum number of leaves for each tree. If None, there is no\n maximum limit.\nmax_depth : int, default=None\n The maximum depth of each tree. The depth of a tree is the number of\n edges to go from the root to the deepest leaf.\n Depth isn't constrained by default.\nmin_samples_leaf : int, default=20\n The minimum number of samples per leaf.\nmin_gain_to_split : float, default=0.\n The minimum gain needed to split a node. Splits with lower gain will\n be ignored.\nn_bins : int, default=256\n The total number of bins, including the bin for missing values. Used\n to define the shape of the histograms.\nn_bins_non_missing : ndarray, dtype=np.uint32, default=None\n For each feature, gives the number of bins actually used for\n non-missing values. For features with a lot of unique values, this\n is equal to ``n_bins - 1``. If it's an int, all features are\n considered to have the same number of bins. If None, all features\n are considered to have ``n_bins - 1`` bins.\nhas_missing_values : bool or ndarray, dtype=bool, default=False\n Whether each feature contains missing values (in the training data).\n If it's a bool, the same value is used for all features.\nis_categorical : ndarray of bool of shape (n_features,), default=None\n Indicates categorical features.\nmonotonic_cst : array-like of shape (n_features,), dtype=int, default=None\n Indicates the monotonic constraint to enforce on each feature. -1, 1\n and 0 respectively correspond to a positive constraint, negative\n constraint and no constraint. Read more in the :ref:`User Guide\n `.\nl2_regularization : float, default=0.\n The L2 regularization parameter.\nmin_hessian_to_split : float, default=1e-3\n The minimum sum of hessians needed in each node. Splits that result in\n at least one child having a sum of hessians less than\n ``min_hessian_to_split`` are discarded.\nshrinkage : float, default=1.\n The shrinkage parameter to apply to the leaves values, also known as\n learning rate.", + "code": "class TreeGrower:\n \"\"\"Tree grower class used to build a tree.\n\n The tree is fitted to predict the values of a Newton-Raphson step. The\n splits are considered in a best-first fashion, and the quality of a\n split is defined in splitting._split_gain.\n\n Parameters\n ----------\n X_binned : ndarray of shape (n_samples, n_features), dtype=np.uint8\n The binned input samples. Must be Fortran-aligned.\n gradients : ndarray of shape (n_samples,)\n The gradients of each training sample. Those are the gradients of the\n loss w.r.t the predictions, evaluated at iteration ``i - 1``.\n hessians : ndarray of shape (n_samples,)\n The hessians of each training sample. Those are the hessians of the\n loss w.r.t the predictions, evaluated at iteration ``i - 1``.\n max_leaf_nodes : int, default=None\n The maximum number of leaves for each tree. If None, there is no\n maximum limit.\n max_depth : int, default=None\n The maximum depth of each tree. The depth of a tree is the number of\n edges to go from the root to the deepest leaf.\n Depth isn't constrained by default.\n min_samples_leaf : int, default=20\n The minimum number of samples per leaf.\n min_gain_to_split : float, default=0.\n The minimum gain needed to split a node. Splits with lower gain will\n be ignored.\n n_bins : int, default=256\n The total number of bins, including the bin for missing values. Used\n to define the shape of the histograms.\n n_bins_non_missing : ndarray, dtype=np.uint32, default=None\n For each feature, gives the number of bins actually used for\n non-missing values. For features with a lot of unique values, this\n is equal to ``n_bins - 1``. If it's an int, all features are\n considered to have the same number of bins. If None, all features\n are considered to have ``n_bins - 1`` bins.\n has_missing_values : bool or ndarray, dtype=bool, default=False\n Whether each feature contains missing values (in the training data).\n If it's a bool, the same value is used for all features.\n is_categorical : ndarray of bool of shape (n_features,), default=None\n Indicates categorical features.\n monotonic_cst : array-like of shape (n_features,), dtype=int, default=None\n Indicates the monotonic constraint to enforce on each feature. -1, 1\n and 0 respectively correspond to a positive constraint, negative\n constraint and no constraint. Read more in the :ref:`User Guide\n `.\n l2_regularization : float, default=0.\n The L2 regularization parameter.\n min_hessian_to_split : float, default=1e-3\n The minimum sum of hessians needed in each node. Splits that result in\n at least one child having a sum of hessians less than\n ``min_hessian_to_split`` are discarded.\n shrinkage : float, default=1.\n The shrinkage parameter to apply to the leaves values, also known as\n learning rate.\n \"\"\"\n\n def __init__(self, X_binned, gradients, hessians, max_leaf_nodes=None,\n max_depth=None, min_samples_leaf=20, min_gain_to_split=0.,\n n_bins=256, n_bins_non_missing=None, has_missing_values=False,\n is_categorical=None, monotonic_cst=None,\n l2_regularization=0., min_hessian_to_split=1e-3,\n shrinkage=1.):\n\n self._validate_parameters(X_binned, max_leaf_nodes, max_depth,\n min_samples_leaf, min_gain_to_split,\n l2_regularization, min_hessian_to_split)\n\n if n_bins_non_missing is None:\n n_bins_non_missing = n_bins - 1\n\n if isinstance(n_bins_non_missing, numbers.Integral):\n n_bins_non_missing = np.array(\n [n_bins_non_missing] * X_binned.shape[1],\n dtype=np.uint32)\n else:\n n_bins_non_missing = np.asarray(n_bins_non_missing,\n dtype=np.uint32)\n\n if isinstance(has_missing_values, bool):\n has_missing_values = [has_missing_values] * X_binned.shape[1]\n has_missing_values = np.asarray(has_missing_values, dtype=np.uint8)\n\n if monotonic_cst is None:\n self.with_monotonic_cst = False\n monotonic_cst = np.full(shape=X_binned.shape[1],\n fill_value=MonotonicConstraint.NO_CST,\n dtype=np.int8)\n else:\n self.with_monotonic_cst = True\n monotonic_cst = np.asarray(monotonic_cst, dtype=np.int8)\n\n if monotonic_cst.shape[0] != X_binned.shape[1]:\n raise ValueError(\n \"monotonic_cst has shape {} but the input data \"\n \"X has {} features.\".format(\n monotonic_cst.shape[0], X_binned.shape[1]\n )\n )\n if np.any(monotonic_cst < -1) or np.any(monotonic_cst > 1):\n raise ValueError(\n \"monotonic_cst must be None or an array-like of \"\n \"-1, 0 or 1.\"\n )\n\n if is_categorical is None:\n is_categorical = np.zeros(shape=X_binned.shape[1], dtype=np.uint8)\n else:\n is_categorical = np.asarray(is_categorical, dtype=np.uint8)\n\n if np.any(np.logical_and(is_categorical == 1,\n monotonic_cst != MonotonicConstraint.NO_CST)):\n raise ValueError(\"Categorical features cannot have monotonic \"\n \"constraints.\")\n\n hessians_are_constant = hessians.shape[0] == 1\n self.histogram_builder = HistogramBuilder(\n X_binned, n_bins, gradients, hessians, hessians_are_constant)\n missing_values_bin_idx = n_bins - 1\n self.splitter = Splitter(\n X_binned, n_bins_non_missing, missing_values_bin_idx,\n has_missing_values, is_categorical, monotonic_cst,\n l2_regularization, min_hessian_to_split,\n min_samples_leaf, min_gain_to_split, hessians_are_constant)\n self.n_bins_non_missing = n_bins_non_missing\n self.missing_values_bin_idx = missing_values_bin_idx\n self.max_leaf_nodes = max_leaf_nodes\n self.has_missing_values = has_missing_values\n self.monotonic_cst = monotonic_cst\n self.is_categorical = is_categorical\n self.l2_regularization = l2_regularization\n self.n_features = X_binned.shape[1]\n self.max_depth = max_depth\n self.min_samples_leaf = min_samples_leaf\n self.X_binned = X_binned\n self.min_gain_to_split = min_gain_to_split\n self.shrinkage = shrinkage\n self.splittable_nodes = []\n self.finalized_leaves = []\n self.total_find_split_time = 0. # time spent finding the best splits\n self.total_compute_hist_time = 0. # time spent computing histograms\n self.total_apply_split_time = 0. # time spent splitting nodes\n self.n_categorical_splits = 0\n self._intilialize_root(gradients, hessians, hessians_are_constant)\n self.n_nodes = 1\n\n def _validate_parameters(self, X_binned, max_leaf_nodes, max_depth,\n min_samples_leaf, min_gain_to_split,\n l2_regularization, min_hessian_to_split):\n \"\"\"Validate parameters passed to __init__.\n\n Also validate parameters passed to splitter.\n \"\"\"\n if X_binned.dtype != np.uint8:\n raise NotImplementedError(\n \"X_binned must be of type uint8.\")\n if not X_binned.flags.f_contiguous:\n raise ValueError(\n \"X_binned should be passed as Fortran contiguous \"\n \"array for maximum efficiency.\")\n if max_leaf_nodes is not None and max_leaf_nodes <= 1:\n raise ValueError('max_leaf_nodes={} should not be'\n ' smaller than 2'.format(max_leaf_nodes))\n if max_depth is not None and max_depth < 1:\n raise ValueError('max_depth={} should not be'\n ' smaller than 1'.format(max_depth))\n if min_samples_leaf < 1:\n raise ValueError('min_samples_leaf={} should '\n 'not be smaller than 1'.format(min_samples_leaf))\n if min_gain_to_split < 0:\n raise ValueError('min_gain_to_split={} '\n 'must be positive.'.format(min_gain_to_split))\n if l2_regularization < 0:\n raise ValueError('l2_regularization={} must be '\n 'positive.'.format(l2_regularization))\n if min_hessian_to_split < 0:\n raise ValueError('min_hessian_to_split={} '\n 'must be positive.'.format(min_hessian_to_split))\n\n def grow(self):\n \"\"\"Grow the tree, from root to leaves.\"\"\"\n while self.splittable_nodes:\n self.split_next()\n\n self._apply_shrinkage()\n\n def _apply_shrinkage(self):\n \"\"\"Multiply leaves values by shrinkage parameter.\n\n This must be done at the very end of the growing process. If this were\n done during the growing process e.g. in finalize_leaf(), then a leaf\n would be shrunk but its sibling would potentially not be (if it's a\n non-leaf), which would lead to a wrong computation of the 'middle'\n value needed to enforce the monotonic constraints.\n \"\"\"\n for leaf in self.finalized_leaves:\n leaf.value *= self.shrinkage\n\n def _intilialize_root(self, gradients, hessians, hessians_are_constant):\n \"\"\"Initialize root node and finalize it if needed.\"\"\"\n n_samples = self.X_binned.shape[0]\n depth = 0\n sum_gradients = sum_parallel(gradients)\n if self.histogram_builder.hessians_are_constant:\n sum_hessians = hessians[0] * n_samples\n else:\n sum_hessians = sum_parallel(hessians)\n self.root = TreeNode(\n depth=depth,\n sample_indices=self.splitter.partition,\n sum_gradients=sum_gradients,\n sum_hessians=sum_hessians,\n value=0\n )\n\n self.root.partition_start = 0\n self.root.partition_stop = n_samples\n\n if self.root.n_samples < 2 * self.min_samples_leaf:\n # Do not even bother computing any splitting statistics.\n self._finalize_leaf(self.root)\n return\n if sum_hessians < self.splitter.min_hessian_to_split:\n self._finalize_leaf(self.root)\n return\n\n self.root.histograms = self.histogram_builder.compute_histograms_brute(\n self.root.sample_indices)\n self._compute_best_split_and_push(self.root)\n\n def _compute_best_split_and_push(self, node):\n \"\"\"Compute the best possible split (SplitInfo) of a given node.\n\n Also push it in the heap of splittable nodes if gain isn't zero.\n The gain of a node is 0 if either all the leaves are pure\n (best gain = 0), or if no split would satisfy the constraints,\n (min_hessians_to_split, min_gain_to_split, min_samples_leaf)\n \"\"\"\n\n node.split_info = self.splitter.find_node_split(\n node.n_samples, node.histograms, node.sum_gradients,\n node.sum_hessians, node.value, node.children_lower_bound,\n node.children_upper_bound)\n\n if node.split_info.gain <= 0: # no valid split\n self._finalize_leaf(node)\n else:\n heappush(self.splittable_nodes, node)\n\n def split_next(self):\n \"\"\"Split the node with highest potential gain.\n\n Returns\n -------\n left : TreeNode\n The resulting left child.\n right : TreeNode\n The resulting right child.\n \"\"\"\n # Consider the node with the highest loss reduction (a.k.a. gain)\n node = heappop(self.splittable_nodes)\n\n tic = time()\n (sample_indices_left,\n sample_indices_right,\n right_child_pos) = self.splitter.split_indices(node.split_info,\n node.sample_indices)\n self.total_apply_split_time += time() - tic\n\n depth = node.depth + 1\n n_leaf_nodes = len(self.finalized_leaves) + len(self.splittable_nodes)\n n_leaf_nodes += 2\n\n left_child_node = TreeNode(depth,\n sample_indices_left,\n node.split_info.sum_gradient_left,\n node.split_info.sum_hessian_left,\n value=node.split_info.value_left,\n )\n right_child_node = TreeNode(depth,\n sample_indices_right,\n node.split_info.sum_gradient_right,\n node.split_info.sum_hessian_right,\n value=node.split_info.value_right,\n )\n\n node.right_child = right_child_node\n node.left_child = left_child_node\n\n # set start and stop indices\n left_child_node.partition_start = node.partition_start\n left_child_node.partition_stop = node.partition_start + right_child_pos\n right_child_node.partition_start = left_child_node.partition_stop\n right_child_node.partition_stop = node.partition_stop\n\n if not self.has_missing_values[node.split_info.feature_idx]:\n # If no missing values are encountered at fit time, then samples\n # with missing values during predict() will go to whichever child\n # has the most samples.\n node.split_info.missing_go_to_left = (\n left_child_node.n_samples > right_child_node.n_samples)\n\n self.n_nodes += 2\n self.n_categorical_splits += node.split_info.is_categorical\n\n if (self.max_leaf_nodes is not None\n and n_leaf_nodes == self.max_leaf_nodes):\n self._finalize_leaf(left_child_node)\n self._finalize_leaf(right_child_node)\n self._finalize_splittable_nodes()\n return left_child_node, right_child_node\n\n if self.max_depth is not None and depth == self.max_depth:\n self._finalize_leaf(left_child_node)\n self._finalize_leaf(right_child_node)\n return left_child_node, right_child_node\n\n if left_child_node.n_samples < self.min_samples_leaf * 2:\n self._finalize_leaf(left_child_node)\n if right_child_node.n_samples < self.min_samples_leaf * 2:\n self._finalize_leaf(right_child_node)\n\n if self.with_monotonic_cst:\n # Set value bounds for respecting monotonic constraints\n # See test_nodes_values() for details\n if (self.monotonic_cst[node.split_info.feature_idx] ==\n MonotonicConstraint.NO_CST):\n lower_left = lower_right = node.children_lower_bound\n upper_left = upper_right = node.children_upper_bound\n else:\n mid = (left_child_node.value + right_child_node.value) / 2\n if (self.monotonic_cst[node.split_info.feature_idx] ==\n MonotonicConstraint.POS):\n lower_left, upper_left = node.children_lower_bound, mid\n lower_right, upper_right = mid, node.children_upper_bound\n else: # NEG\n lower_left, upper_left = mid, node.children_upper_bound\n lower_right, upper_right = node.children_lower_bound, mid\n left_child_node.set_children_bounds(lower_left, upper_left)\n right_child_node.set_children_bounds(lower_right, upper_right)\n\n # Compute histograms of children, and compute their best possible split\n # (if needed)\n should_split_left = not left_child_node.is_leaf\n should_split_right = not right_child_node.is_leaf\n if should_split_left or should_split_right:\n\n # We will compute the histograms of both nodes even if one of them\n # is a leaf, since computing the second histogram is very cheap\n # (using histogram subtraction).\n n_samples_left = left_child_node.sample_indices.shape[0]\n n_samples_right = right_child_node.sample_indices.shape[0]\n if n_samples_left < n_samples_right:\n smallest_child = left_child_node\n largest_child = right_child_node\n else:\n smallest_child = right_child_node\n largest_child = left_child_node\n\n # We use the brute O(n_samples) method on the child that has the\n # smallest number of samples, and the subtraction trick O(n_bins)\n # on the other one.\n tic = time()\n smallest_child.histograms = \\\n self.histogram_builder.compute_histograms_brute(\n smallest_child.sample_indices)\n largest_child.histograms = \\\n self.histogram_builder.compute_histograms_subtraction(\n node.histograms, smallest_child.histograms)\n self.total_compute_hist_time += time() - tic\n\n tic = time()\n if should_split_left:\n self._compute_best_split_and_push(left_child_node)\n if should_split_right:\n self._compute_best_split_and_push(right_child_node)\n self.total_find_split_time += time() - tic\n\n # Release memory used by histograms as they are no longer needed\n # for leaf nodes since they won't be split.\n for child in (left_child_node, right_child_node):\n if child.is_leaf:\n del child.histograms\n\n # Release memory used by histograms as they are no longer needed for\n # internal nodes once children histograms have been computed.\n del node.histograms\n\n return left_child_node, right_child_node\n\n def _finalize_leaf(self, node):\n \"\"\"Make node a leaf of the tree being grown.\"\"\"\n\n node.is_leaf = True\n self.finalized_leaves.append(node)\n\n def _finalize_splittable_nodes(self):\n \"\"\"Transform all splittable nodes into leaves.\n\n Used when some constraint is met e.g. maximum number of leaves or\n maximum depth.\"\"\"\n while len(self.splittable_nodes) > 0:\n node = self.splittable_nodes.pop()\n self._finalize_leaf(node)\n\n def make_predictor(self, binning_thresholds):\n \"\"\"Make a TreePredictor object out of the current tree.\n\n Parameters\n ----------\n binning_thresholds : array-like of floats\n Corresponds to the bin_thresholds_ attribute of the BinMapper.\n For each feature, this stores:\n\n - the bin frontiers for continuous features\n - the unique raw category values for categorical features\n\n Returns\n -------\n A TreePredictor object.\n \"\"\"\n predictor_nodes = np.zeros(self.n_nodes, dtype=PREDICTOR_RECORD_DTYPE)\n binned_left_cat_bitsets = np.zeros((self.n_categorical_splits, 8),\n dtype=X_BITSET_INNER_DTYPE)\n raw_left_cat_bitsets = np.zeros((self.n_categorical_splits, 8),\n dtype=X_BITSET_INNER_DTYPE)\n _fill_predictor_arrays(predictor_nodes, binned_left_cat_bitsets,\n raw_left_cat_bitsets,\n self.root, binning_thresholds,\n self.n_bins_non_missing)\n return TreePredictor(predictor_nodes, binned_left_cat_bitsets,\n raw_left_cat_bitsets)", + "instance_attributes": [ + { + "name": "with_monotonic_cst", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "missing_values_bin_idx", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "l2_regularization", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "min_samples_leaf", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "min_gain_to_split", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "shrinkage", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "splittable_nodes", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "finalized_leaves", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "total_find_split_time", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "total_compute_hist_time", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "total_apply_split_time", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "n_categorical_splits", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "n_nodes", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "root", + "types": { + "kind": "NamedType", + "name": "TreeNode" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeNode", + "name": "TreeNode", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeNode", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeNode/__init__", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeNode/set_children_bounds", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeNode/__lt__" + ], + "is_public": false, + "reexported_by": [], + "description": "Tree Node class used in TreeGrower.\n\nThis isn't used for prediction purposes, only for training (see\nTreePredictor).", + "docstring": "Tree Node class used in TreeGrower.\n\nThis isn't used for prediction purposes, only for training (see\nTreePredictor).\n\nParameters\n----------\ndepth : int\n The depth of the node, i.e. its distance from the root.\nsample_indices : ndarray of shape (n_samples_at_node,), dtype=np.uint\n The indices of the samples at the node.\nsum_gradients : float\n The sum of the gradients of the samples at the node.\nsum_hessians : float\n The sum of the hessians of the samples at the node.\n\nAttributes\n----------\ndepth : int\n The depth of the node, i.e. its distance from the root.\nsample_indices : ndarray of shape (n_samples_at_node,), dtype=np.uint\n The indices of the samples at the node.\nsum_gradients : float\n The sum of the gradients of the samples at the node.\nsum_hessians : float\n The sum of the hessians of the samples at the node.\nsplit_info : SplitInfo or None\n The result of the split evaluation.\nleft_child : TreeNode or None\n The left child of the node. None for leaves.\nright_child : TreeNode or None\n The right child of the node. None for leaves.\nvalue : float or None\n The value of the leaf, as computed in finalize_leaf(). None for\n non-leaf nodes.\npartition_start : int\n start position of the node's sample_indices in splitter.partition.\npartition_stop : int\n stop position of the node's sample_indices in splitter.partition.", + "code": "class TreeNode:\n \"\"\"Tree Node class used in TreeGrower.\n\n This isn't used for prediction purposes, only for training (see\n TreePredictor).\n\n Parameters\n ----------\n depth : int\n The depth of the node, i.e. its distance from the root.\n sample_indices : ndarray of shape (n_samples_at_node,), dtype=np.uint\n The indices of the samples at the node.\n sum_gradients : float\n The sum of the gradients of the samples at the node.\n sum_hessians : float\n The sum of the hessians of the samples at the node.\n\n Attributes\n ----------\n depth : int\n The depth of the node, i.e. its distance from the root.\n sample_indices : ndarray of shape (n_samples_at_node,), dtype=np.uint\n The indices of the samples at the node.\n sum_gradients : float\n The sum of the gradients of the samples at the node.\n sum_hessians : float\n The sum of the hessians of the samples at the node.\n split_info : SplitInfo or None\n The result of the split evaluation.\n left_child : TreeNode or None\n The left child of the node. None for leaves.\n right_child : TreeNode or None\n The right child of the node. None for leaves.\n value : float or None\n The value of the leaf, as computed in finalize_leaf(). None for\n non-leaf nodes.\n partition_start : int\n start position of the node's sample_indices in splitter.partition.\n partition_stop : int\n stop position of the node's sample_indices in splitter.partition.\n \"\"\"\n\n split_info = None\n left_child = None\n right_child = None\n histograms = None\n\n # start and stop indices of the node in the splitter.partition\n # array. Concretely,\n # self.sample_indices = view(self.splitter.partition[start:stop])\n # Please see the comments about splitter.partition and\n # splitter.split_indices for more info about this design.\n # These 2 attributes are only used in _update_raw_prediction, because we\n # need to iterate over the leaves and I don't know how to efficiently\n # store the sample_indices views because they're all of different sizes.\n partition_start = 0\n partition_stop = 0\n\n def __init__(self, depth, sample_indices, sum_gradients,\n sum_hessians, value=None):\n self.depth = depth\n self.sample_indices = sample_indices\n self.n_samples = sample_indices.shape[0]\n self.sum_gradients = sum_gradients\n self.sum_hessians = sum_hessians\n self.value = value\n self.is_leaf = False\n self.set_children_bounds(float('-inf'), float('+inf'))\n\n def set_children_bounds(self, lower, upper):\n \"\"\"Set children values bounds to respect monotonic constraints.\"\"\"\n\n # These are bounds for the node's *children* values, not the node's\n # value. The bounds are used in the splitter when considering potential\n # left and right child.\n self.children_lower_bound = lower\n self.children_upper_bound = upper\n\n def __lt__(self, other_node):\n \"\"\"Comparison for priority queue.\n\n Nodes with high gain are higher priority than nodes with low gain.\n\n heapq.heappush only need the '<' operator.\n heapq.heappop take the smallest item first (smaller is higher\n priority).\n\n Parameters\n ----------\n other_node : TreeNode\n The node to compare with.\n \"\"\"\n return self.split_info.gain > other_node.split_info.gain", + "instance_attributes": [ + { + "name": "is_leaf", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "partition_start", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss", + "name": "BaseLoss", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss", + "decorators": [], + "superclasses": ["ABC"], + "methods": [ + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/__init__", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/__call__", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/pointwise_loss", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/init_gradients_and_hessians", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/get_baseline_prediction", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/update_gradients_and_hessians" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for a loss.", + "docstring": "Base class for a loss.", + "code": "class BaseLoss(ABC):\n \"\"\"Base class for a loss.\"\"\"\n\n def __init__(self, hessians_are_constant):\n self.hessians_are_constant = hessians_are_constant\n\n def __call__(self, y_true, raw_predictions, sample_weight):\n \"\"\"Return the weighted average loss\"\"\"\n return np.average(self.pointwise_loss(y_true, raw_predictions),\n weights=sample_weight)\n\n @abstractmethod\n def pointwise_loss(self, y_true, raw_predictions):\n \"\"\"Return loss value for each input\"\"\"\n\n # This variable indicates whether the loss requires the leaves values to\n # be updated once the tree has been trained. The trees are trained to\n # predict a Newton-Raphson step (see grower._finalize_leaf()). But for\n # some losses (e.g. least absolute deviation) we need to adjust the tree\n # values to account for the \"line search\" of the gradient descent\n # procedure. See the original paper Greedy Function Approximation: A\n # Gradient Boosting Machine by Friedman\n # (https://statweb.stanford.edu/~jhf/ftp/trebst.pdf) for the theory.\n need_update_leaves_values = False\n\n def init_gradients_and_hessians(self, n_samples, prediction_dim,\n sample_weight):\n \"\"\"Return initial gradients and hessians.\n\n Unless hessians are constant, arrays are initialized with undefined\n values.\n\n Parameters\n ----------\n n_samples : int\n The number of samples passed to `fit()`.\n\n prediction_dim : int\n The dimension of a raw prediction, i.e. the number of trees\n built at each iteration. Equals 1 for regression and binary\n classification, or K where K is the number of classes for\n multiclass classification.\n\n sample_weight : array-like of shape(n_samples,) default=None\n Weights of training data.\n\n Returns\n -------\n gradients : ndarray, shape (prediction_dim, n_samples)\n The initial gradients. The array is not initialized.\n hessians : ndarray, shape (prediction_dim, n_samples)\n If hessians are constant (e.g. for `LeastSquares` loss, the\n array is initialized to ``1``. Otherwise, the array is allocated\n without being initialized.\n \"\"\"\n shape = (prediction_dim, n_samples)\n gradients = np.empty(shape=shape, dtype=G_H_DTYPE)\n\n if self.hessians_are_constant:\n # If the hessians are constant, we consider they are equal to 1.\n # - This is correct for the half LS loss\n # - For LAD loss, hessians are actually 0, but they are always\n # ignored anyway.\n hessians = np.ones(shape=(1, 1), dtype=G_H_DTYPE)\n else:\n hessians = np.empty(shape=shape, dtype=G_H_DTYPE)\n\n return gradients, hessians\n\n @abstractmethod\n def get_baseline_prediction(self, y_train, sample_weight, prediction_dim):\n \"\"\"Return initial predictions (before the first iteration).\n\n Parameters\n ----------\n y_train : ndarray, shape (n_samples,)\n The target training values.\n\n sample_weight : array-like of shape(n_samples,) default=None\n Weights of training data.\n\n prediction_dim : int\n The dimension of one prediction: 1 for binary classification and\n regression, n_classes for multiclass classification.\n\n Returns\n -------\n baseline_prediction : float or ndarray, shape (1, prediction_dim)\n The baseline prediction.\n \"\"\"\n\n @abstractmethod\n def update_gradients_and_hessians(self, gradients, hessians, y_true,\n raw_predictions, sample_weight):\n \"\"\"Update gradients and hessians arrays, inplace.\n\n The gradients (resp. hessians) are the first (resp. second) order\n derivatives of the loss for each sample with respect to the\n predictions of model, evaluated at iteration ``i - 1``.\n\n Parameters\n ----------\n gradients : ndarray, shape (prediction_dim, n_samples)\n The gradients (treated as OUT array).\n\n hessians : ndarray, shape (prediction_dim, n_samples) or \\\n (1,)\n The hessians (treated as OUT array).\n\n y_true : ndarray, shape (n_samples,)\n The true target values or each training sample.\n\n raw_predictions : ndarray, shape (prediction_dim, n_samples)\n The raw_predictions (i.e. values from the trees) of the tree\n ensemble at iteration ``i - 1``.\n\n sample_weight : array-like of shape(n_samples,) default=None\n Weights of training data.\n \"\"\"", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy", + "name": "BinaryCrossEntropy", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BinaryCrossEntropy", + "decorators": [], + "superclasses": ["BaseLoss"], + "methods": [ + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/__init__", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/pointwise_loss", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/get_baseline_prediction", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/update_gradients_and_hessians", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/predict_proba" + ], + "is_public": false, + "reexported_by": [], + "description": "Binary cross-entropy loss, for binary classification.\n\nFor a given sample x_i, the binary cross-entropy loss is defined as the\nnegative log-likelihood of the model which can be expressed as::\n\n loss(x_i) = log(1 + exp(raw_pred_i)) - y_true_i * raw_pred_i\n\nSee The Elements of Statistical Learning, by Hastie, Tibshirani, Friedman,\nsection 4.4.1 (about logistic regression).", + "docstring": "Binary cross-entropy loss, for binary classification.\n\nFor a given sample x_i, the binary cross-entropy loss is defined as the\nnegative log-likelihood of the model which can be expressed as::\n\n loss(x_i) = log(1 + exp(raw_pred_i)) - y_true_i * raw_pred_i\n\nSee The Elements of Statistical Learning, by Hastie, Tibshirani, Friedman,\nsection 4.4.1 (about logistic regression).", + "code": "class BinaryCrossEntropy(BaseLoss):\n \"\"\"Binary cross-entropy loss, for binary classification.\n\n For a given sample x_i, the binary cross-entropy loss is defined as the\n negative log-likelihood of the model which can be expressed as::\n\n loss(x_i) = log(1 + exp(raw_pred_i)) - y_true_i * raw_pred_i\n\n See The Elements of Statistical Learning, by Hastie, Tibshirani, Friedman,\n section 4.4.1 (about logistic regression).\n \"\"\"\n\n def __init__(self, sample_weight):\n super().__init__(hessians_are_constant=False)\n\n inverse_link_function = staticmethod(expit)\n\n def pointwise_loss(self, y_true, raw_predictions):\n # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to\n # return a view.\n raw_predictions = raw_predictions.reshape(-1)\n # logaddexp(0, x) = log(1 + exp(x))\n loss = np.logaddexp(0, raw_predictions) - y_true * raw_predictions\n return loss\n\n def get_baseline_prediction(self, y_train, sample_weight, prediction_dim):\n if prediction_dim > 2:\n raise ValueError(\n \"loss='binary_crossentropy' is not defined for multiclass\"\n \" classification with n_classes=%d, use\"\n \" loss='categorical_crossentropy' instead\" % prediction_dim)\n proba_positive_class = np.average(y_train, weights=sample_weight)\n eps = np.finfo(y_train.dtype).eps\n proba_positive_class = np.clip(proba_positive_class, eps, 1 - eps)\n # log(x / 1 - x) is the anti function of sigmoid, or the link function\n # of the Binomial model.\n return np.log(proba_positive_class / (1 - proba_positive_class))\n\n def update_gradients_and_hessians(self, gradients, hessians, y_true,\n raw_predictions, sample_weight):\n # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to\n # return a view.\n raw_predictions = raw_predictions.reshape(-1)\n gradients = gradients.reshape(-1)\n hessians = hessians.reshape(-1)\n _update_gradients_hessians_binary_crossentropy(\n gradients, hessians, y_true, raw_predictions, sample_weight)\n\n def predict_proba(self, raw_predictions):\n # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to\n # return a view.\n raw_predictions = raw_predictions.reshape(-1)\n proba = np.empty((raw_predictions.shape[0], 2), dtype=Y_DTYPE)\n proba[:, 1] = expit(raw_predictions)\n proba[:, 0] = 1 - proba[:, 1]\n return proba", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy", + "name": "CategoricalCrossEntropy", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.CategoricalCrossEntropy", + "decorators": [], + "superclasses": ["BaseLoss"], + "methods": [ + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/__init__", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/pointwise_loss", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/get_baseline_prediction", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/update_gradients_and_hessians", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/predict_proba" + ], + "is_public": false, + "reexported_by": [], + "description": "Categorical cross-entropy loss, for multiclass classification.\n\nFor a given sample x_i, the categorical cross-entropy loss is defined as\nthe negative log-likelihood of the model and generalizes the binary\ncross-entropy to more than 2 classes.", + "docstring": "Categorical cross-entropy loss, for multiclass classification.\n\nFor a given sample x_i, the categorical cross-entropy loss is defined as\nthe negative log-likelihood of the model and generalizes the binary\ncross-entropy to more than 2 classes.", + "code": "class CategoricalCrossEntropy(BaseLoss):\n \"\"\"Categorical cross-entropy loss, for multiclass classification.\n\n For a given sample x_i, the categorical cross-entropy loss is defined as\n the negative log-likelihood of the model and generalizes the binary\n cross-entropy to more than 2 classes.\n \"\"\"\n\n def __init__(self, sample_weight):\n super().__init__(hessians_are_constant=False)\n\n def pointwise_loss(self, y_true, raw_predictions):\n one_hot_true = np.zeros_like(raw_predictions)\n prediction_dim = raw_predictions.shape[0]\n for k in range(prediction_dim):\n one_hot_true[k, :] = (y_true == k)\n\n loss = (logsumexp(raw_predictions, axis=0) -\n (one_hot_true * raw_predictions).sum(axis=0))\n return loss\n\n def get_baseline_prediction(self, y_train, sample_weight, prediction_dim):\n init_value = np.zeros(shape=(prediction_dim, 1), dtype=Y_DTYPE)\n eps = np.finfo(y_train.dtype).eps\n for k in range(prediction_dim):\n proba_kth_class = np.average(y_train == k,\n weights=sample_weight)\n proba_kth_class = np.clip(proba_kth_class, eps, 1 - eps)\n init_value[k, :] += np.log(proba_kth_class)\n\n return init_value\n\n def update_gradients_and_hessians(self, gradients, hessians, y_true,\n raw_predictions, sample_weight):\n _update_gradients_hessians_categorical_crossentropy(\n gradients, hessians, y_true, raw_predictions, sample_weight)\n\n def predict_proba(self, raw_predictions):\n # TODO: This could be done in parallel\n # compute softmax (using exp(log(softmax)))\n proba = np.exp(raw_predictions -\n logsumexp(raw_predictions, axis=0)[np.newaxis, :])\n return proba.T", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation", + "name": "LeastAbsoluteDeviation", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation", + "decorators": [], + "superclasses": ["BaseLoss"], + "methods": [ + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/__init__", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/pointwise_loss", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/get_baseline_prediction", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/inverse_link_function", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/update_gradients_and_hessians", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/update_leaves_values" + ], + "is_public": false, + "reexported_by": [], + "description": "Least absolute deviation, for regression.\n\nFor a given sample x_i, the loss is defined as::\n\n loss(x_i) = |y_true_i - raw_pred_i|", + "docstring": "Least absolute deviation, for regression.\n\nFor a given sample x_i, the loss is defined as::\n\n loss(x_i) = |y_true_i - raw_pred_i|", + "code": "class LeastAbsoluteDeviation(BaseLoss):\n \"\"\"Least absolute deviation, for regression.\n\n For a given sample x_i, the loss is defined as::\n\n loss(x_i) = |y_true_i - raw_pred_i|\n \"\"\"\n\n def __init__(self, sample_weight):\n # If sample weights are provided, the hessians and gradients\n # are multiplied by sample_weight, which means the hessians are\n # equal to sample weights.\n super().__init__(hessians_are_constant=sample_weight is None)\n\n # This variable indicates whether the loss requires the leaves values to\n # be updated once the tree has been trained. The trees are trained to\n # predict a Newton-Raphson step (see grower._finalize_leaf()). But for\n # some losses (e.g. least absolute deviation) we need to adjust the tree\n # values to account for the \"line search\" of the gradient descent\n # procedure. See the original paper Greedy Function Approximation: A\n # Gradient Boosting Machine by Friedman\n # (https://statweb.stanford.edu/~jhf/ftp/trebst.pdf) for the theory.\n need_update_leaves_values = True\n\n def pointwise_loss(self, y_true, raw_predictions):\n # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to\n # return a view.\n raw_predictions = raw_predictions.reshape(-1)\n loss = np.abs(y_true - raw_predictions)\n return loss\n\n def get_baseline_prediction(self, y_train, sample_weight, prediction_dim):\n if sample_weight is None:\n return np.median(y_train)\n else:\n return _weighted_percentile(y_train, sample_weight, 50)\n\n @staticmethod\n def inverse_link_function(raw_predictions):\n return raw_predictions\n\n def update_gradients_and_hessians(self, gradients, hessians, y_true,\n raw_predictions, sample_weight):\n # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to\n # return a view.\n raw_predictions = raw_predictions.reshape(-1)\n gradients = gradients.reshape(-1)\n if sample_weight is None:\n _update_gradients_least_absolute_deviation(gradients, y_true,\n raw_predictions)\n else:\n hessians = hessians.reshape(-1)\n _update_gradients_hessians_least_absolute_deviation(\n gradients, hessians, y_true, raw_predictions, sample_weight)\n\n def update_leaves_values(self, grower, y_true, raw_predictions,\n sample_weight):\n # Update the values predicted by the tree with\n # median(y_true - raw_predictions).\n # See note about need_update_leaves_values in BaseLoss.\n\n # TODO: ideally this should be computed in parallel over the leaves\n # using something similar to _update_raw_predictions(), but this\n # requires a cython version of median()\n for leaf in grower.finalized_leaves:\n indices = leaf.sample_indices\n if sample_weight is None:\n median_res = np.median(y_true[indices]\n - raw_predictions[indices])\n else:\n median_res = _weighted_percentile(\n y_true[indices] - raw_predictions[indices],\n sample_weight=sample_weight[indices],\n percentile=50\n )\n leaf.value = grower.shrinkage * median_res", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares", + "name": "LeastSquares", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastSquares", + "decorators": [], + "superclasses": ["BaseLoss"], + "methods": [ + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/__init__", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/pointwise_loss", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/get_baseline_prediction", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/inverse_link_function", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/update_gradients_and_hessians" + ], + "is_public": false, + "reexported_by": [], + "description": "Least squares loss, for regression.\n\nFor a given sample x_i, least squares loss is defined as::\n\n loss(x_i) = 0.5 * (y_true_i - raw_pred_i)**2\n\nThis actually computes the half least squares loss to simplify\nthe computation of the gradients and get a unit hessian (and be consistent\nwith what is done in LightGBM).", + "docstring": "Least squares loss, for regression.\n\nFor a given sample x_i, least squares loss is defined as::\n\n loss(x_i) = 0.5 * (y_true_i - raw_pred_i)**2\n\nThis actually computes the half least squares loss to simplify\nthe computation of the gradients and get a unit hessian (and be consistent\nwith what is done in LightGBM).", + "code": "class LeastSquares(BaseLoss):\n \"\"\"Least squares loss, for regression.\n\n For a given sample x_i, least squares loss is defined as::\n\n loss(x_i) = 0.5 * (y_true_i - raw_pred_i)**2\n\n This actually computes the half least squares loss to simplify\n the computation of the gradients and get a unit hessian (and be consistent\n with what is done in LightGBM).\n \"\"\"\n\n def __init__(self, sample_weight):\n # If sample weights are provided, the hessians and gradients\n # are multiplied by sample_weight, which means the hessians are\n # equal to sample weights.\n super().__init__(hessians_are_constant=sample_weight is None)\n\n def pointwise_loss(self, y_true, raw_predictions):\n # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to\n # return a view.\n raw_predictions = raw_predictions.reshape(-1)\n loss = 0.5 * np.power(y_true - raw_predictions, 2)\n return loss\n\n def get_baseline_prediction(self, y_train, sample_weight, prediction_dim):\n return np.average(y_train, weights=sample_weight)\n\n @staticmethod\n def inverse_link_function(raw_predictions):\n return raw_predictions\n\n def update_gradients_and_hessians(self, gradients, hessians, y_true,\n raw_predictions, sample_weight):\n # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to\n # return a view.\n raw_predictions = raw_predictions.reshape(-1)\n gradients = gradients.reshape(-1)\n if sample_weight is None:\n _update_gradients_least_squares(gradients, y_true, raw_predictions)\n else:\n hessians = hessians.reshape(-1)\n _update_gradients_hessians_least_squares(gradients, hessians,\n y_true, raw_predictions,\n sample_weight)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson", + "name": "Poisson", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.Poisson", + "decorators": [], + "superclasses": ["BaseLoss"], + "methods": [ + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson/__init__", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson/pointwise_loss", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson/get_baseline_prediction", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson/update_gradients_and_hessians" + ], + "is_public": false, + "reexported_by": [], + "description": "Poisson deviance loss with log-link, for regression.\n\nFor a given sample x_i, Poisson deviance loss is defined as::\n\n loss(x_i) = y_true_i * log(y_true_i/exp(raw_pred_i))\n - y_true_i + exp(raw_pred_i))\n\nThis actually computes half the Poisson deviance to simplify\nthe computation of the gradients.", + "docstring": "Poisson deviance loss with log-link, for regression.\n\nFor a given sample x_i, Poisson deviance loss is defined as::\n\n loss(x_i) = y_true_i * log(y_true_i/exp(raw_pred_i))\n - y_true_i + exp(raw_pred_i))\n\nThis actually computes half the Poisson deviance to simplify\nthe computation of the gradients.", + "code": "class Poisson(BaseLoss):\n \"\"\"Poisson deviance loss with log-link, for regression.\n\n For a given sample x_i, Poisson deviance loss is defined as::\n\n loss(x_i) = y_true_i * log(y_true_i/exp(raw_pred_i))\n - y_true_i + exp(raw_pred_i))\n\n This actually computes half the Poisson deviance to simplify\n the computation of the gradients.\n \"\"\"\n\n def __init__(self, sample_weight):\n super().__init__(hessians_are_constant=False)\n\n inverse_link_function = staticmethod(np.exp)\n\n def pointwise_loss(self, y_true, raw_predictions):\n # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to\n # return a view.\n raw_predictions = raw_predictions.reshape(-1)\n # TODO: For speed, we could remove the constant xlogy(y_true, y_true)\n # Advantage of this form: minimum of zero at raw_predictions = y_true.\n loss = (xlogy(y_true, y_true) - y_true * (raw_predictions + 1)\n + np.exp(raw_predictions))\n return loss\n\n def get_baseline_prediction(self, y_train, sample_weight, prediction_dim):\n y_pred = np.average(y_train, weights=sample_weight)\n eps = np.finfo(y_train.dtype).eps\n y_pred = np.clip(y_pred, eps, None)\n return np.log(y_pred)\n\n def update_gradients_and_hessians(self, gradients, hessians, y_true,\n raw_predictions, sample_weight):\n # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to\n # return a view.\n raw_predictions = raw_predictions.reshape(-1)\n gradients = gradients.reshape(-1)\n hessians = hessians.reshape(-1)\n _update_gradients_hessians_poisson(gradients, hessians,\n y_true, raw_predictions,\n sample_weight)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor", + "name": "TreePredictor", + "qname": "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/__init__", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/get_n_leaf_nodes", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/get_max_depth", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/predict", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/predict_binned", + "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/compute_partial_dependence" + ], + "is_public": false, + "reexported_by": [], + "description": "Tree class used for predictions.", + "docstring": "Tree class used for predictions.\n\nParameters\n----------\nnodes : ndarray of PREDICTOR_RECORD_DTYPE\n The nodes of the tree.\nbinned_left_cat_bitsets : ndarray of shape (n_categorical_splits, 8), dtype=uint32\n Array of bitsets for binned categories used in predict_binned when a\n split is categorical.\nraw_left_cat_bitsets : ndarray of shape (n_categorical_splits, 8), dtype=uint32\n Array of bitsets for raw categories used in predict when a split is\n categorical.", + "code": "class TreePredictor:\n \"\"\"Tree class used for predictions.\n\n Parameters\n ----------\n nodes : ndarray of PREDICTOR_RECORD_DTYPE\n The nodes of the tree.\n binned_left_cat_bitsets : ndarray of shape (n_categorical_splits, 8), \\\n dtype=uint32\n Array of bitsets for binned categories used in predict_binned when a\n split is categorical.\n raw_left_cat_bitsets : ndarray of shape (n_categorical_splits, 8), \\\n dtype=uint32\n Array of bitsets for raw categories used in predict when a split is\n categorical.\n\n \"\"\"\n def __init__(self, nodes, binned_left_cat_bitsets,\n raw_left_cat_bitsets):\n self.nodes = nodes\n self.binned_left_cat_bitsets = binned_left_cat_bitsets\n self.raw_left_cat_bitsets = raw_left_cat_bitsets\n\n def get_n_leaf_nodes(self):\n \"\"\"Return number of leaves.\"\"\"\n return int(self.nodes['is_leaf'].sum())\n\n def get_max_depth(self):\n \"\"\"Return maximum depth among all leaves.\"\"\"\n return int(self.nodes['depth'].max())\n\n def predict(self, X, known_cat_bitsets, f_idx_map):\n \"\"\"Predict raw values for non-binned data.\n\n Parameters\n ----------\n X : ndarray, shape (n_samples, n_features)\n The input samples.\n\n known_cat_bitsets : ndarray of shape (n_categorical_features, 8)\n Array of bitsets of known categories, for each categorical feature.\n\n f_idx_map : ndarray of shape (n_features,)\n Map from original feature index to the corresponding index in the\n known_cat_bitsets array.\n\n Returns\n -------\n y : ndarray, shape (n_samples,)\n The raw predicted values.\n \"\"\"\n out = np.empty(X.shape[0], dtype=Y_DTYPE)\n _predict_from_raw_data(self.nodes, X, self.raw_left_cat_bitsets,\n known_cat_bitsets, f_idx_map, out)\n return out\n\n def predict_binned(self, X, missing_values_bin_idx):\n \"\"\"Predict raw values for binned data.\n\n Parameters\n ----------\n X : ndarray, shape (n_samples, n_features)\n The input samples.\n missing_values_bin_idx : uint8\n Index of the bin that is used for missing values. This is the\n index of the last bin and is always equal to max_bins (as passed\n to the GBDT classes), or equivalently to n_bins - 1.\n\n Returns\n -------\n y : ndarray, shape (n_samples,)\n The raw predicted values.\n \"\"\"\n out = np.empty(X.shape[0], dtype=Y_DTYPE)\n _predict_from_binned_data(self.nodes, X,\n self.binned_left_cat_bitsets,\n missing_values_bin_idx, out)\n return out\n\n def compute_partial_dependence(self, grid, target_features, out):\n \"\"\"Fast partial dependence computation.\n\n Parameters\n ----------\n grid : ndarray, shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\n target_features : ndarray, shape (n_target_features)\n The set of target features for which the partial dependence\n should be evaluated.\n out : ndarray, shape (n_samples)\n The value of the partial dependence function on each grid\n point.\n \"\"\"\n _compute_partial_dependence(self.nodes, grid, target_features, out)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest", + "name": "IsolationForest", + "qname": "sklearn.ensemble._iforest.IsolationForest", + "decorators": [], + "superclasses": ["OutlierMixin", "BaseBagging"], + "methods": [ + "scikit-learn/sklearn.ensemble._iforest/IsolationForest/__init__", + "scikit-learn/sklearn.ensemble._iforest/IsolationForest/_set_oob_score", + "scikit-learn/sklearn.ensemble._iforest/IsolationForest/_parallel_args", + "scikit-learn/sklearn.ensemble._iforest/IsolationForest/fit", + "scikit-learn/sklearn.ensemble._iforest/IsolationForest/predict", + "scikit-learn/sklearn.ensemble._iforest/IsolationForest/decision_function", + "scikit-learn/sklearn.ensemble._iforest/IsolationForest/score_samples", + "scikit-learn/sklearn.ensemble._iforest/IsolationForest/_compute_chunked_score_samples", + "scikit-learn/sklearn.ensemble._iforest/IsolationForest/_compute_score_samples", + "scikit-learn/sklearn.ensemble._iforest/IsolationForest/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Isolation Forest Algorithm.\n\nReturn the anomaly score of each sample using the IsolationForest algorithm\n\nThe IsolationForest 'isolates' observations by randomly selecting a feature\nand then randomly selecting a split value between the maximum and minimum\nvalues of the selected feature.\n\nSince recursive partitioning can be represented by a tree structure, the\nnumber of splittings required to isolate a sample is equivalent to the path\nlength from the root node to the terminating node.\n\nThis path length, averaged over a forest of such random trees, is a\nmeasure of normality and our decision function.\n\nRandom partitioning produces noticeably shorter paths for anomalies.\nHence, when a forest of random trees collectively produce shorter path\nlengths for particular samples, they are highly likely to be anomalies.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "Isolation Forest Algorithm.\n\nReturn the anomaly score of each sample using the IsolationForest algorithm\n\nThe IsolationForest 'isolates' observations by randomly selecting a feature\nand then randomly selecting a split value between the maximum and minimum\nvalues of the selected feature.\n\nSince recursive partitioning can be represented by a tree structure, the\nnumber of splittings required to isolate a sample is equivalent to the path\nlength from the root node to the terminating node.\n\nThis path length, averaged over a forest of such random trees, is a\nmeasure of normality and our decision function.\n\nRandom partitioning produces noticeably shorter paths for anomalies.\nHence, when a forest of random trees collectively produce shorter path\nlengths for particular samples, they are highly likely to be anomalies.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nn_estimators : int, default=100\n The number of base estimators in the ensemble.\n\nmax_samples : \"auto\", int or float, default=\"auto\"\n The number of samples to draw from X to train each base estimator.\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples.\n - If \"auto\", then `max_samples=min(256, n_samples)`.\n\n If max_samples is larger than the number of samples provided,\n all samples will be used for all trees (no sampling).\n\ncontamination : 'auto' or float, default='auto'\n The amount of contamination of the data set, i.e. the proportion\n of outliers in the data set. Used when fitting to define the threshold\n on the scores of the samples.\n\n - If 'auto', the threshold is determined as in the\n original paper.\n - If float, the contamination should be in the range [0, 0.5].\n\n .. versionchanged:: 0.22\n The default value of ``contamination`` changed from 0.1\n to ``'auto'``.\n\nmax_features : int or float, default=1.0\n The number of features to draw from X to train each base estimator.\n\n - If int, then draw `max_features` features.\n - If float, then draw `max_features * X.shape[1]` features.\n\nbootstrap : bool, default=False\n If True, individual trees are fit on random subsets of the training\n data sampled with replacement. If False, sampling without replacement\n is performed.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel for both :meth:`fit` and\n :meth:`predict`. ``None`` means 1 unless in a\n :obj:`joblib.parallel_backend` context. ``-1`` means using all\n processors. See :term:`Glossary ` for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo-randomness of the selection of the feature\n and split values for each branching step and each tree in the forest.\n\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nverbose : int, default=0\n Controls the verbosity of the tree building process.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit a whole\n new forest. See :term:`the Glossary `.\n\n .. versionadded:: 0.21\n\nAttributes\n----------\nbase_estimator_ : ExtraTreeRegressor instance\n The child estimator template used to create the collection of\n fitted sub-estimators.\n\nestimators_ : list of ExtraTreeRegressor instances\n The collection of fitted sub-estimators.\n\nestimators_features_ : list of ndarray\n The subset of drawn features for each base estimator.\n\nestimators_samples_ : list of ndarray\n The subset of drawn samples (i.e., the in-bag samples) for each base\n estimator.\n\nmax_samples_ : int\n The actual number of samples.\n\noffset_ : float\n Offset used to define the decision function from the raw scores. We\n have the relation: ``decision_function = score_samples - offset_``.\n ``offset_`` is defined as follows. When the contamination parameter is\n set to \"auto\", the offset is equal to -0.5 as the scores of inliers are\n close to 0 and the scores of outliers are close to -1. When a\n contamination parameter different than \"auto\" is provided, the offset\n is defined in such a way we obtain the expected number of outliers\n (samples with decision function < 0) in training.\n\n .. versionadded:: 0.20\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nNotes\n-----\nThe implementation is based on an ensemble of ExtraTreeRegressor. The\nmaximum depth of each tree is set to ``ceil(log_2(n))`` where\n:math:`n` is the number of samples used to build the tree\n(see (Liu et al., 2008) for more details).\n\nReferences\n----------\n.. [1] Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. \"Isolation forest.\"\n Data Mining, 2008. ICDM'08. Eighth IEEE International Conference on.\n.. [2] Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. \"Isolation-based\n anomaly detection.\" ACM Transactions on Knowledge Discovery from\n Data (TKDD) 6.1 (2012): 3.\n\nSee Also\n----------\nsklearn.covariance.EllipticEnvelope : An object for detecting outliers in a\n Gaussian distributed dataset.\nsklearn.svm.OneClassSVM : Unsupervised Outlier Detection.\n Estimate the support of a high-dimensional distribution.\n The implementation is based on libsvm.\nsklearn.neighbors.LocalOutlierFactor : Unsupervised Outlier Detection\n using Local Outlier Factor (LOF).\n\nExamples\n--------\n>>> from sklearn.ensemble import IsolationForest\n>>> X = [[-1.1], [0.3], [0.5], [100]]\n>>> clf = IsolationForest(random_state=0).fit(X)\n>>> clf.predict([[0.1], [0], [90]])\narray([ 1, 1, -1])", + "code": "class IsolationForest(OutlierMixin, BaseBagging):\n \"\"\"\n Isolation Forest Algorithm.\n\n Return the anomaly score of each sample using the IsolationForest algorithm\n\n The IsolationForest 'isolates' observations by randomly selecting a feature\n and then randomly selecting a split value between the maximum and minimum\n values of the selected feature.\n\n Since recursive partitioning can be represented by a tree structure, the\n number of splittings required to isolate a sample is equivalent to the path\n length from the root node to the terminating node.\n\n This path length, averaged over a forest of such random trees, is a\n measure of normality and our decision function.\n\n Random partitioning produces noticeably shorter paths for anomalies.\n Hence, when a forest of random trees collectively produce shorter path\n lengths for particular samples, they are highly likely to be anomalies.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n n_estimators : int, default=100\n The number of base estimators in the ensemble.\n\n max_samples : \"auto\", int or float, default=\"auto\"\n The number of samples to draw from X to train each base estimator.\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples.\n - If \"auto\", then `max_samples=min(256, n_samples)`.\n\n If max_samples is larger than the number of samples provided,\n all samples will be used for all trees (no sampling).\n\n contamination : 'auto' or float, default='auto'\n The amount of contamination of the data set, i.e. the proportion\n of outliers in the data set. Used when fitting to define the threshold\n on the scores of the samples.\n\n - If 'auto', the threshold is determined as in the\n original paper.\n - If float, the contamination should be in the range [0, 0.5].\n\n .. versionchanged:: 0.22\n The default value of ``contamination`` changed from 0.1\n to ``'auto'``.\n\n max_features : int or float, default=1.0\n The number of features to draw from X to train each base estimator.\n\n - If int, then draw `max_features` features.\n - If float, then draw `max_features * X.shape[1]` features.\n\n bootstrap : bool, default=False\n If True, individual trees are fit on random subsets of the training\n data sampled with replacement. If False, sampling without replacement\n is performed.\n\n n_jobs : int, default=None\n The number of jobs to run in parallel for both :meth:`fit` and\n :meth:`predict`. ``None`` means 1 unless in a\n :obj:`joblib.parallel_backend` context. ``-1`` means using all\n processors. See :term:`Glossary ` for more details.\n\n random_state : int, RandomState instance or None, default=None\n Controls the pseudo-randomness of the selection of the feature\n and split values for each branching step and each tree in the forest.\n\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\n verbose : int, default=0\n Controls the verbosity of the tree building process.\n\n warm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit\n and add more estimators to the ensemble, otherwise, just fit a whole\n new forest. See :term:`the Glossary `.\n\n .. versionadded:: 0.21\n\n Attributes\n ----------\n base_estimator_ : ExtraTreeRegressor instance\n The child estimator template used to create the collection of\n fitted sub-estimators.\n\n estimators_ : list of ExtraTreeRegressor instances\n The collection of fitted sub-estimators.\n\n estimators_features_ : list of ndarray\n The subset of drawn features for each base estimator.\n\n estimators_samples_ : list of ndarray\n The subset of drawn samples (i.e., the in-bag samples) for each base\n estimator.\n\n max_samples_ : int\n The actual number of samples.\n\n offset_ : float\n Offset used to define the decision function from the raw scores. We\n have the relation: ``decision_function = score_samples - offset_``.\n ``offset_`` is defined as follows. When the contamination parameter is\n set to \"auto\", the offset is equal to -0.5 as the scores of inliers are\n close to 0 and the scores of outliers are close to -1. When a\n contamination parameter different than \"auto\" is provided, the offset\n is defined in such a way we obtain the expected number of outliers\n (samples with decision function < 0) in training.\n\n .. versionadded:: 0.20\n\n n_features_ : int\n The number of features when ``fit`` is performed.\n\n Notes\n -----\n The implementation is based on an ensemble of ExtraTreeRegressor. The\n maximum depth of each tree is set to ``ceil(log_2(n))`` where\n :math:`n` is the number of samples used to build the tree\n (see (Liu et al., 2008) for more details).\n\n References\n ----------\n .. [1] Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. \"Isolation forest.\"\n Data Mining, 2008. ICDM'08. Eighth IEEE International Conference on.\n .. [2] Liu, Fei Tony, Ting, Kai Ming and Zhou, Zhi-Hua. \"Isolation-based\n anomaly detection.\" ACM Transactions on Knowledge Discovery from\n Data (TKDD) 6.1 (2012): 3.\n\n See Also\n ----------\n sklearn.covariance.EllipticEnvelope : An object for detecting outliers in a\n Gaussian distributed dataset.\n sklearn.svm.OneClassSVM : Unsupervised Outlier Detection.\n Estimate the support of a high-dimensional distribution.\n The implementation is based on libsvm.\n sklearn.neighbors.LocalOutlierFactor : Unsupervised Outlier Detection\n using Local Outlier Factor (LOF).\n\n Examples\n --------\n >>> from sklearn.ensemble import IsolationForest\n >>> X = [[-1.1], [0.3], [0.5], [100]]\n >>> clf = IsolationForest(random_state=0).fit(X)\n >>> clf.predict([[0.1], [0], [90]])\n array([ 1, 1, -1])\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *,\n n_estimators=100,\n max_samples=\"auto\",\n contamination=\"auto\",\n max_features=1.,\n bootstrap=False,\n n_jobs=None,\n random_state=None,\n verbose=0,\n warm_start=False):\n super().__init__(\n base_estimator=ExtraTreeRegressor(\n max_features=1,\n splitter='random',\n random_state=random_state),\n # here above max_features has no links with self.max_features\n bootstrap=bootstrap,\n bootstrap_features=False,\n n_estimators=n_estimators,\n max_samples=max_samples,\n max_features=max_features,\n warm_start=warm_start,\n n_jobs=n_jobs,\n random_state=random_state,\n verbose=verbose)\n\n self.contamination = contamination\n\n def _set_oob_score(self, X, y):\n raise NotImplementedError(\"OOB score not supported by iforest\")\n\n def _parallel_args(self):\n # ExtraTreeRegressor releases the GIL, so it's more efficient to use\n # a thread-based backend rather than a process-based backend so as\n # to avoid suffering from communication overhead and extra memory\n # copies.\n return _joblib_parallel_args(prefer='threads')\n\n def fit(self, X, y=None, sample_weight=None):\n \"\"\"\n Fit estimator.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Use ``dtype=np.float32`` for maximum\n efficiency. Sparse matrices are also supported, use sparse\n ``csc_matrix`` for maximum efficiency.\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\n Returns\n -------\n self : object\n Fitted estimator.\n \"\"\"\n X = check_array(X, accept_sparse=['csc'])\n if issparse(X):\n # Pre-sort indices to avoid that each individual tree of the\n # ensemble sorts the indices.\n X.sort_indices()\n\n rnd = check_random_state(self.random_state)\n y = rnd.uniform(size=X.shape[0])\n\n # ensure that max_sample is in [1, n_samples]:\n n_samples = X.shape[0]\n\n if isinstance(self.max_samples, str):\n if self.max_samples == 'auto':\n max_samples = min(256, n_samples)\n else:\n raise ValueError('max_samples (%s) is not supported.'\n 'Valid choices are: \"auto\", int or'\n 'float' % self.max_samples)\n\n elif isinstance(self.max_samples, numbers.Integral):\n if self.max_samples > n_samples:\n warn(\"max_samples (%s) is greater than the \"\n \"total number of samples (%s). max_samples \"\n \"will be set to n_samples for estimation.\"\n % (self.max_samples, n_samples))\n max_samples = n_samples\n else:\n max_samples = self.max_samples\n else: # float\n if not 0. < self.max_samples <= 1.:\n raise ValueError(\"max_samples must be in (0, 1], got %r\"\n % self.max_samples)\n max_samples = int(self.max_samples * X.shape[0])\n\n self.max_samples_ = max_samples\n max_depth = int(np.ceil(np.log2(max(max_samples, 2))))\n super()._fit(X, y, max_samples,\n max_depth=max_depth,\n sample_weight=sample_weight)\n\n if self.contamination == \"auto\":\n # 0.5 plays a special role as described in the original paper.\n # we take the opposite as we consider the opposite of their score.\n self.offset_ = -0.5\n return self\n\n # else, define offset_ wrt contamination parameter\n self.offset_ = np.percentile(self.score_samples(X),\n 100. * self.contamination)\n\n return self\n\n def predict(self, X):\n \"\"\"\n Predict if a particular sample is an outlier or not.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n is_inlier : ndarray of shape (n_samples,)\n For each observation, tells whether or not (+1 or -1) it should\n be considered as an inlier according to the fitted model.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, accept_sparse='csr')\n is_inlier = np.ones(X.shape[0], dtype=int)\n is_inlier[self.decision_function(X) < 0] = -1\n return is_inlier\n\n def decision_function(self, X):\n \"\"\"\n Average anomaly score of X of the base classifiers.\n\n The anomaly score of an input sample is computed as\n the mean anomaly score of the trees in the forest.\n\n The measure of normality of an observation given a tree is the depth\n of the leaf containing this observation, which is equivalent to\n the number of splittings required to isolate this point. In case of\n several observations n_left in the leaf, the average path length of\n a n_left samples isolation tree is added.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n scores : ndarray of shape (n_samples,)\n The anomaly score of the input samples.\n The lower, the more abnormal. Negative scores represent outliers,\n positive scores represent inliers.\n \"\"\"\n # We subtract self.offset_ to make 0 be the threshold value for being\n # an outlier:\n\n return self.score_samples(X) - self.offset_\n\n def score_samples(self, X):\n \"\"\"\n Opposite of the anomaly score defined in the original paper.\n\n The anomaly score of an input sample is computed as\n the mean anomaly score of the trees in the forest.\n\n The measure of normality of an observation given a tree is the depth\n of the leaf containing this observation, which is equivalent to\n the number of splittings required to isolate this point. In case of\n several observations n_left in the leaf, the average path length of\n a n_left samples isolation tree is added.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\n Returns\n -------\n scores : ndarray of shape (n_samples,)\n The anomaly score of the input samples.\n The lower, the more abnormal.\n \"\"\"\n # code structure from ForestClassifier/predict_proba\n\n check_is_fitted(self)\n\n # Check data\n X = check_array(X, accept_sparse='csr')\n if self.n_features_ != X.shape[1]:\n raise ValueError(\"Number of features of the model must \"\n \"match the input. Model n_features is {0} and \"\n \"input n_features is {1}.\"\n \"\".format(self.n_features_, X.shape[1]))\n\n # Take the opposite of the scores as bigger is better (here less\n # abnormal)\n return -self._compute_chunked_score_samples(X)\n\n def _compute_chunked_score_samples(self, X):\n\n n_samples = _num_samples(X)\n\n if self._max_features == X.shape[1]:\n subsample_features = False\n else:\n subsample_features = True\n\n # We get as many rows as possible within our working_memory budget\n # (defined by sklearn.get_config()['working_memory']) to store\n # self._max_features in each row during computation.\n #\n # Note:\n # - this will get at least 1 row, even if 1 row of score will\n # exceed working_memory.\n # - this does only account for temporary memory usage while loading\n # the data needed to compute the scores -- the returned scores\n # themselves are 1D.\n\n chunk_n_rows = get_chunk_n_rows(row_bytes=16 * self._max_features,\n max_n_rows=n_samples)\n slices = gen_batches(n_samples, chunk_n_rows)\n\n scores = np.zeros(n_samples, order=\"f\")\n\n for sl in slices:\n # compute score on the slices of test samples:\n scores[sl] = self._compute_score_samples(X[sl], subsample_features)\n\n return scores\n\n def _compute_score_samples(self, X, subsample_features):\n \"\"\"\n Compute the score of each samples in X going through the extra trees.\n\n Parameters\n ----------\n X : array-like or sparse matrix\n Data matrix.\n\n subsample_features : bool\n Whether features should be subsampled.\n \"\"\"\n n_samples = X.shape[0]\n\n depths = np.zeros(n_samples, order=\"f\")\n\n for tree, features in zip(self.estimators_, self.estimators_features_):\n X_subset = X[:, features] if subsample_features else X\n\n leaves_index = tree.apply(X_subset)\n node_indicator = tree.decision_path(X_subset)\n n_samples_leaf = tree.tree_.n_node_samples[leaves_index]\n\n depths += (\n np.ravel(node_indicator.sum(axis=1))\n + _average_path_length(n_samples_leaf)\n - 1.0\n )\n\n scores = 2 ** (\n -depths\n / (len(self.estimators_)\n * _average_path_length([self.max_samples_]))\n )\n return scores\n\n def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }", + "instance_attributes": [ + { + "name": "contamination", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "offset_", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier", + "name": "StackingClassifier", + "qname": "sklearn.ensemble._stacking.StackingClassifier", + "decorators": [], + "superclasses": ["ClassifierMixin", "_BaseStacking"], + "methods": [ + "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/__init__", + "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/_validate_final_estimator", + "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/fit", + "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/predict", + "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/predict_proba", + "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/decision_function", + "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/transform", + "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/_sk_visual_block_" + ], + "is_public": false, + "reexported_by": [], + "description": "Stack of estimators with a final classifier.\n\nStacked generalization consists in stacking the output of individual\nestimator and use a classifier to compute the final prediction. Stacking\nallows to use the strength of each individual estimator by using their\noutput as input of a final estimator.\n\nNote that `estimators_` are fitted on the full `X` while `final_estimator_`\nis trained using cross-validated predictions of the base estimators using\n`cross_val_predict`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22", + "docstring": "Stack of estimators with a final classifier.\n\nStacked generalization consists in stacking the output of individual\nestimator and use a classifier to compute the final prediction. Stacking\nallows to use the strength of each individual estimator by using their\noutput as input of a final estimator.\n\nNote that `estimators_` are fitted on the full `X` while `final_estimator_`\nis trained using cross-validated predictions of the base estimators using\n`cross_val_predict`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nestimators : list of (str, estimator)\n Base estimators which will be stacked together. Each element of the\n list is defined as a tuple of string (i.e. name) and an estimator\n instance. An estimator can be set to 'drop' using `set_params`.\n\nfinal_estimator : estimator, default=None\n A classifier which will be used to combine the base estimators.\n The default classifier is a\n :class:`~sklearn.linear_model.LogisticRegression`.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy used in\n `cross_val_predict` to train `final_estimator`. Possible inputs for\n cv are:\n\n * None, to use the default 5-fold cross validation,\n * integer, to specify the number of folds in a (Stratified) KFold,\n * An object to be used as a cross-validation generator,\n * An iterable yielding train, test splits.\n\n For integer/None inputs, if the estimator is a classifier and y is\n either binary or multiclass,\n :class:`~sklearn.model_selection.StratifiedKFold` is used.\n In all other cases, :class:`~sklearn.model_selection.KFold` is used.\n These splitters are instantiated with `shuffle=False` so the splits\n will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. note::\n A larger number of split will provide no benefits if the number\n of training samples is large enough. Indeed, the training time\n will increase. ``cv`` is not used for model evaluation but for\n prediction.\n\nstack_method : {'auto', 'predict_proba', 'decision_function', 'predict'}, default='auto'\n Methods called for each base estimator. It can be:\n\n * if 'auto', it will try to invoke, for each estimator,\n `'predict_proba'`, `'decision_function'` or `'predict'` in that\n order.\n * otherwise, one of `'predict_proba'`, `'decision_function'` or\n `'predict'`. If the method is not implemented by the estimator, it\n will raise an error.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel all `estimators` `fit`.\n `None` means 1 unless in a `joblib.parallel_backend` context. -1 means\n using all processors. See Glossary for more details.\n\npassthrough : bool, default=False\n When False, only the predictions of estimators will be used as\n training data for `final_estimator`. When True, the\n `final_estimator` is trained on the predictions as well as the\n original training data.\n\nverbose : int, default=0\n Verbosity level.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n Class labels.\n\nestimators_ : list of estimators\n The elements of the estimators parameter, having been fitted on the\n training data. If an estimator has been set to `'drop'`, it\n will not appear in `estimators_`.\n\nnamed_estimators_ : :class:`~sklearn.utils.Bunch`\n Attribute to access any fitted sub-estimators by name.\n\nfinal_estimator_ : estimator\n The classifier which predicts given the output of `estimators_`.\n\nstack_method_ : list of str\n The method used by each base estimator.\n\nNotes\n-----\nWhen `predict_proba` is used by each estimator (i.e. most of the time for\n`stack_method='auto'` or specifically for `stack_method='predict_proba'`),\nThe first column predicted by each estimator will be dropped in the case\nof a binary classification problem. Indeed, both feature will be perfectly\ncollinear.\n\nReferences\n----------\n.. [1] Wolpert, David H. \"Stacked generalization.\" Neural networks 5.2\n (1992): 241-259.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.svm import LinearSVC\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.ensemble import StackingClassifier\n>>> X, y = load_iris(return_X_y=True)\n>>> estimators = [\n... ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),\n... ('svr', make_pipeline(StandardScaler(),\n... LinearSVC(random_state=42)))\n... ]\n>>> clf = StackingClassifier(\n... estimators=estimators, final_estimator=LogisticRegression()\n... )\n>>> from sklearn.model_selection import train_test_split\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, stratify=y, random_state=42\n... )\n>>> clf.fit(X_train, y_train).score(X_test, y_test)\n0.9...", + "code": "class StackingClassifier(ClassifierMixin, _BaseStacking):\n \"\"\"Stack of estimators with a final classifier.\n\n Stacked generalization consists in stacking the output of individual\n estimator and use a classifier to compute the final prediction. Stacking\n allows to use the strength of each individual estimator by using their\n output as input of a final estimator.\n\n Note that `estimators_` are fitted on the full `X` while `final_estimator_`\n is trained using cross-validated predictions of the base estimators using\n `cross_val_predict`.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.22\n\n Parameters\n ----------\n estimators : list of (str, estimator)\n Base estimators which will be stacked together. Each element of the\n list is defined as a tuple of string (i.e. name) and an estimator\n instance. An estimator can be set to 'drop' using `set_params`.\n\n final_estimator : estimator, default=None\n A classifier which will be used to combine the base estimators.\n The default classifier is a\n :class:`~sklearn.linear_model.LogisticRegression`.\n\n cv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy used in\n `cross_val_predict` to train `final_estimator`. Possible inputs for\n cv are:\n\n * None, to use the default 5-fold cross validation,\n * integer, to specify the number of folds in a (Stratified) KFold,\n * An object to be used as a cross-validation generator,\n * An iterable yielding train, test splits.\n\n For integer/None inputs, if the estimator is a classifier and y is\n either binary or multiclass,\n :class:`~sklearn.model_selection.StratifiedKFold` is used.\n In all other cases, :class:`~sklearn.model_selection.KFold` is used.\n These splitters are instantiated with `shuffle=False` so the splits\n will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. note::\n A larger number of split will provide no benefits if the number\n of training samples is large enough. Indeed, the training time\n will increase. ``cv`` is not used for model evaluation but for\n prediction.\n\n stack_method : {'auto', 'predict_proba', 'decision_function', 'predict'}, \\\n default='auto'\n Methods called for each base estimator. It can be:\n\n * if 'auto', it will try to invoke, for each estimator,\n `'predict_proba'`, `'decision_function'` or `'predict'` in that\n order.\n * otherwise, one of `'predict_proba'`, `'decision_function'` or\n `'predict'`. If the method is not implemented by the estimator, it\n will raise an error.\n\n n_jobs : int, default=None\n The number of jobs to run in parallel all `estimators` `fit`.\n `None` means 1 unless in a `joblib.parallel_backend` context. -1 means\n using all processors. See Glossary for more details.\n\n passthrough : bool, default=False\n When False, only the predictions of estimators will be used as\n training data for `final_estimator`. When True, the\n `final_estimator` is trained on the predictions as well as the\n original training data.\n\n verbose : int, default=0\n Verbosity level.\n\n Attributes\n ----------\n classes_ : ndarray of shape (n_classes,)\n Class labels.\n\n estimators_ : list of estimators\n The elements of the estimators parameter, having been fitted on the\n training data. If an estimator has been set to `'drop'`, it\n will not appear in `estimators_`.\n\n named_estimators_ : :class:`~sklearn.utils.Bunch`\n Attribute to access any fitted sub-estimators by name.\n\n final_estimator_ : estimator\n The classifier which predicts given the output of `estimators_`.\n\n stack_method_ : list of str\n The method used by each base estimator.\n\n Notes\n -----\n When `predict_proba` is used by each estimator (i.e. most of the time for\n `stack_method='auto'` or specifically for `stack_method='predict_proba'`),\n The first column predicted by each estimator will be dropped in the case\n of a binary classification problem. Indeed, both feature will be perfectly\n collinear.\n\n References\n ----------\n .. [1] Wolpert, David H. \"Stacked generalization.\" Neural networks 5.2\n (1992): 241-259.\n\n Examples\n --------\n >>> from sklearn.datasets import load_iris\n >>> from sklearn.ensemble import RandomForestClassifier\n >>> from sklearn.svm import LinearSVC\n >>> from sklearn.linear_model import LogisticRegression\n >>> from sklearn.preprocessing import StandardScaler\n >>> from sklearn.pipeline import make_pipeline\n >>> from sklearn.ensemble import StackingClassifier\n >>> X, y = load_iris(return_X_y=True)\n >>> estimators = [\n ... ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),\n ... ('svr', make_pipeline(StandardScaler(),\n ... LinearSVC(random_state=42)))\n ... ]\n >>> clf = StackingClassifier(\n ... estimators=estimators, final_estimator=LogisticRegression()\n ... )\n >>> from sklearn.model_selection import train_test_split\n >>> X_train, X_test, y_train, y_test = train_test_split(\n ... X, y, stratify=y, random_state=42\n ... )\n >>> clf.fit(X_train, y_train).score(X_test, y_test)\n 0.9...\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, estimators, final_estimator=None, *, cv=None,\n stack_method='auto', n_jobs=None, passthrough=False,\n verbose=0):\n super().__init__(\n estimators=estimators,\n final_estimator=final_estimator,\n cv=cv,\n stack_method=stack_method,\n n_jobs=n_jobs,\n passthrough=passthrough,\n verbose=verbose\n )\n\n def _validate_final_estimator(self):\n self._clone_final_estimator(default=LogisticRegression())\n if not is_classifier(self.final_estimator_):\n raise ValueError(\n \"'final_estimator' parameter should be a classifier. Got {}\"\n .format(self.final_estimator_)\n )\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the estimators.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\n Returns\n -------\n self : object\n \"\"\"\n check_classification_targets(y)\n self._le = LabelEncoder().fit(y)\n self.classes_ = self._le.classes_\n return super().fit(X, self._le.transform(y), sample_weight)\n\n @if_delegate_has_method(delegate='final_estimator_')\n def predict(self, X, **predict_params):\n \"\"\"Predict target for X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n **predict_params : dict of str -> obj\n Parameters to the `predict` called by the `final_estimator`. Note\n that this may be used to return uncertainties from some estimators\n with `return_std` or `return_cov`. Be aware that it will only\n accounts for uncertainty in the final estimator.\n\n Returns\n -------\n y_pred : ndarray of shape (n_samples,) or (n_samples, n_output)\n Predicted targets.\n \"\"\"\n y_pred = super().predict(X, **predict_params)\n return self._le.inverse_transform(y_pred)\n\n @if_delegate_has_method(delegate='final_estimator_')\n def predict_proba(self, X):\n \"\"\"Predict class probabilities for X using\n `final_estimator_.predict_proba`.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n Returns\n -------\n probabilities : ndarray of shape (n_samples, n_classes) or \\\n list of ndarray of shape (n_output,)\n The class probabilities of the input samples.\n \"\"\"\n check_is_fitted(self)\n return self.final_estimator_.predict_proba(self.transform(X))\n\n @if_delegate_has_method(delegate='final_estimator_')\n def decision_function(self, X):\n \"\"\"Predict decision function for samples in X using\n `final_estimator_.decision_function`.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n Returns\n -------\n decisions : ndarray of shape (n_samples,), (n_samples, n_classes), \\\n or (n_samples, n_classes * (n_classes-1) / 2)\n The decision function computed the final estimator.\n \"\"\"\n check_is_fitted(self)\n return self.final_estimator_.decision_function(self.transform(X))\n\n def transform(self, X):\n \"\"\"Return class labels or probabilities for X for each estimator.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\n Returns\n -------\n y_preds : ndarray of shape (n_samples, n_estimators) or \\\n (n_samples, n_classes * n_estimators)\n Prediction outputs for each estimator.\n \"\"\"\n return self._transform(X)\n\n def _sk_visual_block_(self):\n # If final_estimator's default changes then this should be\n # updated.\n if self.final_estimator is None:\n final_estimator = LogisticRegression()\n else:\n final_estimator = self.final_estimator\n return super()._sk_visual_block_(final_estimator)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingRegressor", + "name": "StackingRegressor", + "qname": "sklearn.ensemble._stacking.StackingRegressor", + "decorators": [], + "superclasses": ["RegressorMixin", "_BaseStacking"], + "methods": [ + "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/__init__", + "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/_validate_final_estimator", + "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/fit", + "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/transform", + "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/_sk_visual_block_" + ], + "is_public": false, + "reexported_by": [], + "description": "Stack of estimators with a final regressor.\n\nStacked generalization consists in stacking the output of individual\nestimator and use a regressor to compute the final prediction. Stacking\nallows to use the strength of each individual estimator by using their\noutput as input of a final estimator.\n\nNote that `estimators_` are fitted on the full `X` while `final_estimator_`\nis trained using cross-validated predictions of the base estimators using\n`cross_val_predict`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22", + "docstring": "Stack of estimators with a final regressor.\n\nStacked generalization consists in stacking the output of individual\nestimator and use a regressor to compute the final prediction. Stacking\nallows to use the strength of each individual estimator by using their\noutput as input of a final estimator.\n\nNote that `estimators_` are fitted on the full `X` while `final_estimator_`\nis trained using cross-validated predictions of the base estimators using\n`cross_val_predict`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nestimators : list of (str, estimator)\n Base estimators which will be stacked together. Each element of the\n list is defined as a tuple of string (i.e. name) and an estimator\n instance. An estimator can be set to 'drop' using `set_params`.\n\nfinal_estimator : estimator, default=None\n A regressor which will be used to combine the base estimators.\n The default regressor is a :class:`~sklearn.linear_model.RidgeCV`.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy used in\n `cross_val_predict` to train `final_estimator`. Possible inputs for\n cv are:\n\n * None, to use the default 5-fold cross validation,\n * integer, to specify the number of folds in a (Stratified) KFold,\n * An object to be used as a cross-validation generator,\n * An iterable yielding train, test splits.\n\n For integer/None inputs, if the estimator is a classifier and y is\n either binary or multiclass,\n :class:`~sklearn.model_selection.StratifiedKFold` is used.\n In all other cases, :class:`~sklearn.model_selection.KFold` is used.\n These splitters are instantiated with `shuffle=False` so the splits\n will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. note::\n A larger number of split will provide no benefits if the number\n of training samples is large enough. Indeed, the training time\n will increase. ``cv`` is not used for model evaluation but for\n prediction.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel for `fit` of all `estimators`.\n `None` means 1 unless in a `joblib.parallel_backend` context. -1 means\n using all processors. See Glossary for more details.\n\npassthrough : bool, default=False\n When False, only the predictions of estimators will be used as\n training data for `final_estimator`. When True, the\n `final_estimator` is trained on the predictions as well as the\n original training data.\n\nverbose : int, default=0\n Verbosity level.\n\nAttributes\n----------\nestimators_ : list of estimator\n The elements of the estimators parameter, having been fitted on the\n training data. If an estimator has been set to `'drop'`, it\n will not appear in `estimators_`.\n\nnamed_estimators_ : :class:`~sklearn.utils.Bunch`\n Attribute to access any fitted sub-estimators by name.\n\n\nfinal_estimator_ : estimator\n The regressor to stacked the base estimators fitted.\n\nReferences\n----------\n.. [1] Wolpert, David H. \"Stacked generalization.\" Neural networks 5.2\n (1992): 241-259.\n\nExamples\n--------\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.linear_model import RidgeCV\n>>> from sklearn.svm import LinearSVR\n>>> from sklearn.ensemble import RandomForestRegressor\n>>> from sklearn.ensemble import StackingRegressor\n>>> X, y = load_diabetes(return_X_y=True)\n>>> estimators = [\n... ('lr', RidgeCV()),\n... ('svr', LinearSVR(random_state=42))\n... ]\n>>> reg = StackingRegressor(\n... estimators=estimators,\n... final_estimator=RandomForestRegressor(n_estimators=10,\n... random_state=42)\n... )\n>>> from sklearn.model_selection import train_test_split\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, random_state=42\n... )\n>>> reg.fit(X_train, y_train).score(X_test, y_test)\n0.3...", + "code": "class StackingRegressor(RegressorMixin, _BaseStacking):\n \"\"\"Stack of estimators with a final regressor.\n\n Stacked generalization consists in stacking the output of individual\n estimator and use a regressor to compute the final prediction. Stacking\n allows to use the strength of each individual estimator by using their\n output as input of a final estimator.\n\n Note that `estimators_` are fitted on the full `X` while `final_estimator_`\n is trained using cross-validated predictions of the base estimators using\n `cross_val_predict`.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.22\n\n Parameters\n ----------\n estimators : list of (str, estimator)\n Base estimators which will be stacked together. Each element of the\n list is defined as a tuple of string (i.e. name) and an estimator\n instance. An estimator can be set to 'drop' using `set_params`.\n\n final_estimator : estimator, default=None\n A regressor which will be used to combine the base estimators.\n The default regressor is a :class:`~sklearn.linear_model.RidgeCV`.\n\n cv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy used in\n `cross_val_predict` to train `final_estimator`. Possible inputs for\n cv are:\n\n * None, to use the default 5-fold cross validation,\n * integer, to specify the number of folds in a (Stratified) KFold,\n * An object to be used as a cross-validation generator,\n * An iterable yielding train, test splits.\n\n For integer/None inputs, if the estimator is a classifier and y is\n either binary or multiclass,\n :class:`~sklearn.model_selection.StratifiedKFold` is used.\n In all other cases, :class:`~sklearn.model_selection.KFold` is used.\n These splitters are instantiated with `shuffle=False` so the splits\n will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. note::\n A larger number of split will provide no benefits if the number\n of training samples is large enough. Indeed, the training time\n will increase. ``cv`` is not used for model evaluation but for\n prediction.\n\n n_jobs : int, default=None\n The number of jobs to run in parallel for `fit` of all `estimators`.\n `None` means 1 unless in a `joblib.parallel_backend` context. -1 means\n using all processors. See Glossary for more details.\n\n passthrough : bool, default=False\n When False, only the predictions of estimators will be used as\n training data for `final_estimator`. When True, the\n `final_estimator` is trained on the predictions as well as the\n original training data.\n\n verbose : int, default=0\n Verbosity level.\n\n Attributes\n ----------\n estimators_ : list of estimator\n The elements of the estimators parameter, having been fitted on the\n training data. If an estimator has been set to `'drop'`, it\n will not appear in `estimators_`.\n\n named_estimators_ : :class:`~sklearn.utils.Bunch`\n Attribute to access any fitted sub-estimators by name.\n\n\n final_estimator_ : estimator\n The regressor to stacked the base estimators fitted.\n\n References\n ----------\n .. [1] Wolpert, David H. \"Stacked generalization.\" Neural networks 5.2\n (1992): 241-259.\n\n Examples\n --------\n >>> from sklearn.datasets import load_diabetes\n >>> from sklearn.linear_model import RidgeCV\n >>> from sklearn.svm import LinearSVR\n >>> from sklearn.ensemble import RandomForestRegressor\n >>> from sklearn.ensemble import StackingRegressor\n >>> X, y = load_diabetes(return_X_y=True)\n >>> estimators = [\n ... ('lr', RidgeCV()),\n ... ('svr', LinearSVR(random_state=42))\n ... ]\n >>> reg = StackingRegressor(\n ... estimators=estimators,\n ... final_estimator=RandomForestRegressor(n_estimators=10,\n ... random_state=42)\n ... )\n >>> from sklearn.model_selection import train_test_split\n >>> X_train, X_test, y_train, y_test = train_test_split(\n ... X, y, random_state=42\n ... )\n >>> reg.fit(X_train, y_train).score(X_test, y_test)\n 0.3...\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, estimators, final_estimator=None, *, cv=None,\n n_jobs=None, passthrough=False, verbose=0):\n super().__init__(\n estimators=estimators,\n final_estimator=final_estimator,\n cv=cv,\n stack_method=\"predict\",\n n_jobs=n_jobs,\n passthrough=passthrough,\n verbose=verbose\n )\n\n def _validate_final_estimator(self):\n self._clone_final_estimator(default=RidgeCV())\n if not is_regressor(self.final_estimator_):\n raise ValueError(\n \"'final_estimator' parameter should be a regressor. Got {}\"\n .format(self.final_estimator_)\n )\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the estimators.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\n Returns\n -------\n self : object\n \"\"\"\n y = column_or_1d(y, warn=True)\n return super().fit(X, y, sample_weight)\n\n def transform(self, X):\n \"\"\"Return the predictions for X for each estimator.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\n Returns\n -------\n y_preds : ndarray of shape (n_samples, n_estimators)\n Prediction outputs for each estimator.\n \"\"\"\n return self._transform(X)\n\n def _sk_visual_block_(self):\n # If final_estimator's default changes then this should be\n # updated.\n if self.final_estimator is None:\n final_estimator = RidgeCV()\n else:\n final_estimator = self.final_estimator\n return super()._sk_visual_block_(final_estimator)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking", + "name": "_BaseStacking", + "qname": "sklearn.ensemble._stacking._BaseStacking", + "decorators": [], + "superclasses": ["TransformerMixin", "_BaseHeterogeneousEnsemble"], + "methods": [ + "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/__init__", + "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/_clone_final_estimator", + "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/_concatenate_predictions", + "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/_method_name", + "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/fit", + "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/n_features_in_@getter", + "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/_transform", + "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/predict", + "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/_sk_visual_block_" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for stacking method.", + "docstring": "Base class for stacking method.", + "code": "class _BaseStacking(TransformerMixin, _BaseHeterogeneousEnsemble,\n metaclass=ABCMeta):\n \"\"\"Base class for stacking method.\"\"\"\n\n @abstractmethod\n def __init__(self, estimators, final_estimator=None, *, cv=None,\n stack_method='auto', n_jobs=None, verbose=0,\n passthrough=False):\n super().__init__(estimators=estimators)\n self.final_estimator = final_estimator\n self.cv = cv\n self.stack_method = stack_method\n self.n_jobs = n_jobs\n self.verbose = verbose\n self.passthrough = passthrough\n\n def _clone_final_estimator(self, default):\n if self.final_estimator is not None:\n self.final_estimator_ = clone(self.final_estimator)\n else:\n self.final_estimator_ = clone(default)\n\n def _concatenate_predictions(self, X, predictions):\n \"\"\"Concatenate the predictions of each first layer learner and\n possibly the input dataset `X`.\n\n If `X` is sparse and `self.passthrough` is False, the output of\n `transform` will be dense (the predictions). If `X` is sparse\n and `self.passthrough` is True, the output of `transform` will\n be sparse.\n\n This helper is in charge of ensuring the predictions are 2D arrays and\n it will drop one of the probability column when using probabilities\n in the binary case. Indeed, the p(y|c=0) = 1 - p(y|c=1)\n \"\"\"\n X_meta = []\n for est_idx, preds in enumerate(predictions):\n # case where the the estimator returned a 1D array\n if preds.ndim == 1:\n X_meta.append(preds.reshape(-1, 1))\n else:\n if (self.stack_method_[est_idx] == 'predict_proba' and\n len(self.classes_) == 2):\n # Remove the first column when using probabilities in\n # binary classification because both features are perfectly\n # collinear.\n X_meta.append(preds[:, 1:])\n else:\n X_meta.append(preds)\n if self.passthrough:\n X_meta.append(X)\n if sparse.issparse(X):\n return sparse.hstack(X_meta, format=X.format)\n\n return np.hstack(X_meta)\n\n @staticmethod\n def _method_name(name, estimator, method):\n if estimator == 'drop':\n return None\n if method == 'auto':\n if getattr(estimator, 'predict_proba', None):\n return 'predict_proba'\n elif getattr(estimator, 'decision_function', None):\n return 'decision_function'\n else:\n return 'predict'\n else:\n if not hasattr(estimator, method):\n raise ValueError('Underlying estimator {} does not implement '\n 'the method {}.'.format(name, method))\n return method\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the estimators.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n sample_weight : array-like of shape (n_samples,) or default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\n .. versionchanged:: 0.23\n when not None, `sample_weight` is passed to all underlying\n estimators\n\n Returns\n -------\n self : object\n \"\"\"\n # all_estimators contains all estimators, the one to be fitted and the\n # 'drop' string.\n names, all_estimators = self._validate_estimators()\n self._validate_final_estimator()\n\n stack_method = [self.stack_method] * len(all_estimators)\n\n # Fit the base estimators on the whole training data. Those\n # base estimators will be used in transform, predict, and\n # predict_proba. They are exposed publicly.\n self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n delayed(_fit_single_estimator)(clone(est), X, y, sample_weight)\n for est in all_estimators if est != 'drop'\n )\n\n self.named_estimators_ = Bunch()\n est_fitted_idx = 0\n for name_est, org_est in zip(names, all_estimators):\n if org_est != 'drop':\n self.named_estimators_[name_est] = self.estimators_[\n est_fitted_idx]\n est_fitted_idx += 1\n else:\n self.named_estimators_[name_est] = 'drop'\n\n # To train the meta-classifier using the most data as possible, we use\n # a cross-validation to obtain the output of the stacked estimators.\n\n # To ensure that the data provided to each estimator are the same, we\n # need to set the random state of the cv if there is one and we need to\n # take a copy.\n cv = check_cv(self.cv, y=y, classifier=is_classifier(self))\n if hasattr(cv, 'random_state') and cv.random_state is None:\n cv.random_state = np.random.RandomState()\n\n self.stack_method_ = [\n self._method_name(name, est, meth)\n for name, est, meth in zip(names, all_estimators, stack_method)\n ]\n fit_params = ({\"sample_weight\": sample_weight}\n if sample_weight is not None\n else None)\n predictions = Parallel(n_jobs=self.n_jobs)(\n delayed(cross_val_predict)(clone(est), X, y, cv=deepcopy(cv),\n method=meth, n_jobs=self.n_jobs,\n fit_params=fit_params,\n verbose=self.verbose)\n for est, meth in zip(all_estimators, self.stack_method_)\n if est != 'drop'\n )\n\n # Only not None or not 'drop' estimators will be used in transform.\n # Remove the None from the method as well.\n self.stack_method_ = [\n meth for (meth, est) in zip(self.stack_method_, all_estimators)\n if est != 'drop'\n ]\n\n X_meta = self._concatenate_predictions(X, predictions)\n _fit_single_estimator(self.final_estimator_, X_meta, y,\n sample_weight=sample_weight)\n\n return self\n\n @property\n def n_features_in_(self):\n \"\"\"Number of features seen during :term:`fit`.\"\"\"\n try:\n check_is_fitted(self)\n except NotFittedError as nfe:\n raise AttributeError(\n f\"{self.__class__.__name__} object has no attribute \"\n f\"n_features_in_\") from nfe\n return self.estimators_[0].n_features_in_\n\n def _transform(self, X):\n \"\"\"Concatenate and return the predictions of the estimators.\"\"\"\n check_is_fitted(self)\n predictions = [\n getattr(est, meth)(X)\n for est, meth in zip(self.estimators_, self.stack_method_)\n if est != 'drop'\n ]\n return self._concatenate_predictions(X, predictions)\n\n @if_delegate_has_method(delegate='final_estimator_')\n def predict(self, X, **predict_params):\n \"\"\"Predict target for X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n **predict_params : dict of str -> obj\n Parameters to the `predict` called by the `final_estimator`. Note\n that this may be used to return uncertainties from some estimators\n with `return_std` or `return_cov`. Be aware that it will only\n accounts for uncertainty in the final estimator.\n\n Returns\n -------\n y_pred : ndarray of shape (n_samples,) or (n_samples, n_output)\n Predicted targets.\n \"\"\"\n\n check_is_fitted(self)\n return self.final_estimator_.predict(\n self.transform(X), **predict_params\n )\n\n def _sk_visual_block_(self, final_estimator):\n names, estimators = zip(*self.estimators)\n parallel = _VisualBlock('parallel', estimators, names=names,\n dash_wrapped=False)\n serial = _VisualBlock('serial', (parallel, final_estimator),\n dash_wrapped=False)\n return _VisualBlock('serial', [serial])", + "instance_attributes": [ + { + "name": "stack_method", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "passthrough", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "named_estimators_", + "types": { + "kind": "NamedType", + "name": "Bunch" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier", + "name": "VotingClassifier", + "qname": "sklearn.ensemble._voting.VotingClassifier", + "decorators": [], + "superclasses": ["ClassifierMixin", "_BaseVoting"], + "methods": [ + "scikit-learn/sklearn.ensemble._voting/VotingClassifier/__init__", + "scikit-learn/sklearn.ensemble._voting/VotingClassifier/fit", + "scikit-learn/sklearn.ensemble._voting/VotingClassifier/predict", + "scikit-learn/sklearn.ensemble._voting/VotingClassifier/_collect_probas", + "scikit-learn/sklearn.ensemble._voting/VotingClassifier/_predict_proba", + "scikit-learn/sklearn.ensemble._voting/VotingClassifier/predict_proba@getter", + "scikit-learn/sklearn.ensemble._voting/VotingClassifier/transform" + ], + "is_public": false, + "reexported_by": [], + "description": "Soft Voting/Majority Rule classifier for unfitted estimators.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.17", + "docstring": "Soft Voting/Majority Rule classifier for unfitted estimators.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.17\n\nParameters\n----------\nestimators : list of (str, estimator) tuples\n Invoking the ``fit`` method on the ``VotingClassifier`` will fit clones\n of those original estimators that will be stored in the class attribute\n ``self.estimators_``. An estimator can be set to ``'drop'``\n using ``set_params``.\n\n .. versionchanged:: 0.21\n ``'drop'`` is accepted. Using None was deprecated in 0.22 and\n support was removed in 0.24.\n\nvoting : {'hard', 'soft'}, default='hard'\n If 'hard', uses predicted class labels for majority rule voting.\n Else if 'soft', predicts the class label based on the argmax of\n the sums of the predicted probabilities, which is recommended for\n an ensemble of well-calibrated classifiers.\n\nweights : array-like of shape (n_classifiers,), default=None\n Sequence of weights (`float` or `int`) to weight the occurrences of\n predicted class labels (`hard` voting) or class probabilities\n before averaging (`soft` voting). Uses uniform weights if `None`.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel for ``fit``.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.18\n\nflatten_transform : bool, default=True\n Affects shape of transform output only when voting='soft'\n If voting='soft' and flatten_transform=True, transform method returns\n matrix with shape (n_samples, n_classifiers * n_classes). If\n flatten_transform=False, it returns\n (n_classifiers, n_samples, n_classes).\n\nverbose : bool, default=False\n If True, the time elapsed while fitting will be printed as it\n is completed.\n\n .. versionadded:: 0.23\n\nAttributes\n----------\nestimators_ : list of classifiers\n The collection of fitted sub-estimators as defined in ``estimators``\n that are not 'drop'.\n\nnamed_estimators_ : :class:`~sklearn.utils.Bunch`\n Attribute to access any fitted sub-estimators by name.\n\n .. versionadded:: 0.20\n\nclasses_ : array-like of shape (n_predictions,)\n The classes labels.\n\nSee Also\n--------\nVotingRegressor : Prediction voting regressor.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.naive_bayes import GaussianNB\n>>> from sklearn.ensemble import RandomForestClassifier, VotingClassifier\n>>> clf1 = LogisticRegression(multi_class='multinomial', random_state=1)\n>>> clf2 = RandomForestClassifier(n_estimators=50, random_state=1)\n>>> clf3 = GaussianNB()\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> y = np.array([1, 1, 1, 2, 2, 2])\n>>> eclf1 = VotingClassifier(estimators=[\n... ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')\n>>> eclf1 = eclf1.fit(X, y)\n>>> print(eclf1.predict(X))\n[1 1 1 2 2 2]\n>>> np.array_equal(eclf1.named_estimators_.lr.predict(X),\n... eclf1.named_estimators_['lr'].predict(X))\nTrue\n>>> eclf2 = VotingClassifier(estimators=[\n... ('lr', clf1), ('rf', clf2), ('gnb', clf3)],\n... voting='soft')\n>>> eclf2 = eclf2.fit(X, y)\n>>> print(eclf2.predict(X))\n[1 1 1 2 2 2]\n>>> eclf3 = VotingClassifier(estimators=[\n... ('lr', clf1), ('rf', clf2), ('gnb', clf3)],\n... voting='soft', weights=[2,1,1],\n... flatten_transform=True)\n>>> eclf3 = eclf3.fit(X, y)\n>>> print(eclf3.predict(X))\n[1 1 1 2 2 2]\n>>> print(eclf3.transform(X).shape)\n(6, 6)", + "code": "class VotingClassifier(ClassifierMixin, _BaseVoting):\n \"\"\"Soft Voting/Majority Rule classifier for unfitted estimators.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.17\n\n Parameters\n ----------\n estimators : list of (str, estimator) tuples\n Invoking the ``fit`` method on the ``VotingClassifier`` will fit clones\n of those original estimators that will be stored in the class attribute\n ``self.estimators_``. An estimator can be set to ``'drop'``\n using ``set_params``.\n\n .. versionchanged:: 0.21\n ``'drop'`` is accepted. Using None was deprecated in 0.22 and\n support was removed in 0.24.\n\n voting : {'hard', 'soft'}, default='hard'\n If 'hard', uses predicted class labels for majority rule voting.\n Else if 'soft', predicts the class label based on the argmax of\n the sums of the predicted probabilities, which is recommended for\n an ensemble of well-calibrated classifiers.\n\n weights : array-like of shape (n_classifiers,), default=None\n Sequence of weights (`float` or `int`) to weight the occurrences of\n predicted class labels (`hard` voting) or class probabilities\n before averaging (`soft` voting). Uses uniform weights if `None`.\n\n n_jobs : int, default=None\n The number of jobs to run in parallel for ``fit``.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.18\n\n flatten_transform : bool, default=True\n Affects shape of transform output only when voting='soft'\n If voting='soft' and flatten_transform=True, transform method returns\n matrix with shape (n_samples, n_classifiers * n_classes). If\n flatten_transform=False, it returns\n (n_classifiers, n_samples, n_classes).\n\n verbose : bool, default=False\n If True, the time elapsed while fitting will be printed as it\n is completed.\n\n .. versionadded:: 0.23\n\n Attributes\n ----------\n estimators_ : list of classifiers\n The collection of fitted sub-estimators as defined in ``estimators``\n that are not 'drop'.\n\n named_estimators_ : :class:`~sklearn.utils.Bunch`\n Attribute to access any fitted sub-estimators by name.\n\n .. versionadded:: 0.20\n\n classes_ : array-like of shape (n_predictions,)\n The classes labels.\n\n See Also\n --------\n VotingRegressor : Prediction voting regressor.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.linear_model import LogisticRegression\n >>> from sklearn.naive_bayes import GaussianNB\n >>> from sklearn.ensemble import RandomForestClassifier, VotingClassifier\n >>> clf1 = LogisticRegression(multi_class='multinomial', random_state=1)\n >>> clf2 = RandomForestClassifier(n_estimators=50, random_state=1)\n >>> clf3 = GaussianNB()\n >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n >>> y = np.array([1, 1, 1, 2, 2, 2])\n >>> eclf1 = VotingClassifier(estimators=[\n ... ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')\n >>> eclf1 = eclf1.fit(X, y)\n >>> print(eclf1.predict(X))\n [1 1 1 2 2 2]\n >>> np.array_equal(eclf1.named_estimators_.lr.predict(X),\n ... eclf1.named_estimators_['lr'].predict(X))\n True\n >>> eclf2 = VotingClassifier(estimators=[\n ... ('lr', clf1), ('rf', clf2), ('gnb', clf3)],\n ... voting='soft')\n >>> eclf2 = eclf2.fit(X, y)\n >>> print(eclf2.predict(X))\n [1 1 1 2 2 2]\n >>> eclf3 = VotingClassifier(estimators=[\n ... ('lr', clf1), ('rf', clf2), ('gnb', clf3)],\n ... voting='soft', weights=[2,1,1],\n ... flatten_transform=True)\n >>> eclf3 = eclf3.fit(X, y)\n >>> print(eclf3.predict(X))\n [1 1 1 2 2 2]\n >>> print(eclf3.transform(X).shape)\n (6, 6)\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, estimators, *, voting='hard', weights=None,\n n_jobs=None, flatten_transform=True, verbose=False):\n super().__init__(estimators=estimators)\n self.voting = voting\n self.weights = weights\n self.n_jobs = n_jobs\n self.flatten_transform = flatten_transform\n self.verbose = verbose\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the estimators.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\n .. versionadded:: 0.18\n\n Returns\n -------\n self : object\n\n \"\"\"\n check_classification_targets(y)\n if isinstance(y, np.ndarray) and len(y.shape) > 1 and y.shape[1] > 1:\n raise NotImplementedError('Multilabel and multi-output'\n ' classification is not supported.')\n\n if self.voting not in ('soft', 'hard'):\n raise ValueError(\"Voting must be 'soft' or 'hard'; got (voting=%r)\"\n % self.voting)\n\n self.le_ = LabelEncoder().fit(y)\n self.classes_ = self.le_.classes_\n transformed_y = self.le_.transform(y)\n\n return super().fit(X, transformed_y, sample_weight)\n\n def predict(self, X):\n \"\"\"Predict class labels for X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\n Returns\n -------\n maj : array-like of shape (n_samples,)\n Predicted class labels.\n \"\"\"\n check_is_fitted(self)\n if self.voting == 'soft':\n maj = np.argmax(self.predict_proba(X), axis=1)\n\n else: # 'hard' voting\n predictions = self._predict(X)\n maj = np.apply_along_axis(\n lambda x: np.argmax(\n np.bincount(x, weights=self._weights_not_none)),\n axis=1, arr=predictions)\n\n maj = self.le_.inverse_transform(maj)\n\n return maj\n\n def _collect_probas(self, X):\n \"\"\"Collect results from clf.predict calls.\"\"\"\n return np.asarray([clf.predict_proba(X) for clf in self.estimators_])\n\n def _predict_proba(self, X):\n \"\"\"Predict class probabilities for X in 'soft' voting.\"\"\"\n check_is_fitted(self)\n avg = np.average(self._collect_probas(X), axis=0,\n weights=self._weights_not_none)\n return avg\n\n @property\n def predict_proba(self):\n \"\"\"Compute probabilities of possible outcomes for samples in X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\n Returns\n -------\n avg : array-like of shape (n_samples, n_classes)\n Weighted average probability for each class per sample.\n \"\"\"\n if self.voting == 'hard':\n raise AttributeError(\"predict_proba is not available when\"\n \" voting=%r\" % self.voting)\n return self._predict_proba\n\n def transform(self, X):\n \"\"\"Return class labels or probabilities for X for each estimator.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n Returns\n -------\n probabilities_or_labels\n If `voting='soft'` and `flatten_transform=True`:\n returns ndarray of shape (n_classifiers, n_samples *\n n_classes), being class probabilities calculated by each\n classifier.\n If `voting='soft' and `flatten_transform=False`:\n ndarray of shape (n_classifiers, n_samples, n_classes)\n If `voting='hard'`:\n ndarray of shape (n_samples, n_classifiers), being\n class labels predicted by each classifier.\n \"\"\"\n check_is_fitted(self)\n\n if self.voting == 'soft':\n probas = self._collect_probas(X)\n if not self.flatten_transform:\n return probas\n return np.hstack(probas)\n\n else:\n return self._predict(X)", + "instance_attributes": [ + { + "name": "voting", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "flatten_transform", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingRegressor", + "name": "VotingRegressor", + "qname": "sklearn.ensemble._voting.VotingRegressor", + "decorators": [], + "superclasses": ["RegressorMixin", "_BaseVoting"], + "methods": [ + "scikit-learn/sklearn.ensemble._voting/VotingRegressor/__init__", + "scikit-learn/sklearn.ensemble._voting/VotingRegressor/fit", + "scikit-learn/sklearn.ensemble._voting/VotingRegressor/predict", + "scikit-learn/sklearn.ensemble._voting/VotingRegressor/transform" + ], + "is_public": false, + "reexported_by": [], + "description": "Prediction voting regressor for unfitted estimators.\n\nA voting regressor is an ensemble meta-estimator that fits several base\nregressors, each on the whole dataset. Then it averages the individual\npredictions to form a final prediction.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.21", + "docstring": "Prediction voting regressor for unfitted estimators.\n\nA voting regressor is an ensemble meta-estimator that fits several base\nregressors, each on the whole dataset. Then it averages the individual\npredictions to form a final prediction.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.21\n\nParameters\n----------\nestimators : list of (str, estimator) tuples\n Invoking the ``fit`` method on the ``VotingRegressor`` will fit clones\n of those original estimators that will be stored in the class attribute\n ``self.estimators_``. An estimator can be set to ``'drop'`` using\n ``set_params``.\n\n .. versionchanged:: 0.21\n ``'drop'`` is accepted. Using None was deprecated in 0.22 and\n support was removed in 0.24.\n\nweights : array-like of shape (n_regressors,), default=None\n Sequence of weights (`float` or `int`) to weight the occurrences of\n predicted values before averaging. Uses uniform weights if `None`.\n\nn_jobs : int, default=None\n The number of jobs to run in parallel for ``fit``.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : bool, default=False\n If True, the time elapsed while fitting will be printed as it\n is completed.\n\n .. versionadded:: 0.23\n\nAttributes\n----------\nestimators_ : list of regressors\n The collection of fitted sub-estimators as defined in ``estimators``\n that are not 'drop'.\n\nnamed_estimators_ : Bunch\n Attribute to access any fitted sub-estimators by name.\n\n .. versionadded:: 0.20\n\nSee Also\n--------\nVotingClassifier : Soft Voting/Majority Rule classifier.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import LinearRegression\n>>> from sklearn.ensemble import RandomForestRegressor\n>>> from sklearn.ensemble import VotingRegressor\n>>> r1 = LinearRegression()\n>>> r2 = RandomForestRegressor(n_estimators=10, random_state=1)\n>>> X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])\n>>> y = np.array([2, 6, 12, 20, 30, 42])\n>>> er = VotingRegressor([('lr', r1), ('rf', r2)])\n>>> print(er.fit(X, y).predict(X))\n[ 3.3 5.7 11.8 19.7 28. 40.3]", + "code": "class VotingRegressor(RegressorMixin, _BaseVoting):\n \"\"\"Prediction voting regressor for unfitted estimators.\n\n A voting regressor is an ensemble meta-estimator that fits several base\n regressors, each on the whole dataset. Then it averages the individual\n predictions to form a final prediction.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.21\n\n Parameters\n ----------\n estimators : list of (str, estimator) tuples\n Invoking the ``fit`` method on the ``VotingRegressor`` will fit clones\n of those original estimators that will be stored in the class attribute\n ``self.estimators_``. An estimator can be set to ``'drop'`` using\n ``set_params``.\n\n .. versionchanged:: 0.21\n ``'drop'`` is accepted. Using None was deprecated in 0.22 and\n support was removed in 0.24.\n\n weights : array-like of shape (n_regressors,), default=None\n Sequence of weights (`float` or `int`) to weight the occurrences of\n predicted values before averaging. Uses uniform weights if `None`.\n\n n_jobs : int, default=None\n The number of jobs to run in parallel for ``fit``.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n verbose : bool, default=False\n If True, the time elapsed while fitting will be printed as it\n is completed.\n\n .. versionadded:: 0.23\n\n Attributes\n ----------\n estimators_ : list of regressors\n The collection of fitted sub-estimators as defined in ``estimators``\n that are not 'drop'.\n\n named_estimators_ : Bunch\n Attribute to access any fitted sub-estimators by name.\n\n .. versionadded:: 0.20\n\n See Also\n --------\n VotingClassifier : Soft Voting/Majority Rule classifier.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.linear_model import LinearRegression\n >>> from sklearn.ensemble import RandomForestRegressor\n >>> from sklearn.ensemble import VotingRegressor\n >>> r1 = LinearRegression()\n >>> r2 = RandomForestRegressor(n_estimators=10, random_state=1)\n >>> X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])\n >>> y = np.array([2, 6, 12, 20, 30, 42])\n >>> er = VotingRegressor([('lr', r1), ('rf', r2)])\n >>> print(er.fit(X, y).predict(X))\n [ 3.3 5.7 11.8 19.7 28. 40.3]\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, estimators, *, weights=None, n_jobs=None,\n verbose=False):\n super().__init__(estimators=estimators)\n self.weights = weights\n self.n_jobs = n_jobs\n self.verbose = verbose\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the estimators.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\n Returns\n -------\n self : object\n Fitted estimator.\n \"\"\"\n y = column_or_1d(y, warn=True)\n return super().fit(X, y, sample_weight)\n\n def predict(self, X):\n \"\"\"Predict regression target for X.\n\n The predicted regression target of an input sample is computed as the\n mean predicted regression targets of the estimators in the ensemble.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\n Returns\n -------\n y : ndarray of shape (n_samples,)\n The predicted values.\n \"\"\"\n check_is_fitted(self)\n return np.average(self._predict(X), axis=1,\n weights=self._weights_not_none)\n\n def transform(self, X):\n \"\"\"Return predictions for X for each estimator.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\n Returns\n -------\n predictions: ndarray of shape (n_samples, n_classifiers)\n Values predicted by each regressor.\n \"\"\"\n check_is_fitted(self)\n return self._predict(X)", + "instance_attributes": [ + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting", + "name": "_BaseVoting", + "qname": "sklearn.ensemble._voting._BaseVoting", + "decorators": [], + "superclasses": ["TransformerMixin", "_BaseHeterogeneousEnsemble"], + "methods": [ + "scikit-learn/sklearn.ensemble._voting/_BaseVoting/_log_message", + "scikit-learn/sklearn.ensemble._voting/_BaseVoting/_weights_not_none@getter", + "scikit-learn/sklearn.ensemble._voting/_BaseVoting/_predict", + "scikit-learn/sklearn.ensemble._voting/_BaseVoting/fit", + "scikit-learn/sklearn.ensemble._voting/_BaseVoting/fit_transform", + "scikit-learn/sklearn.ensemble._voting/_BaseVoting/n_features_in_@getter", + "scikit-learn/sklearn.ensemble._voting/_BaseVoting/_sk_visual_block_", + "scikit-learn/sklearn.ensemble._voting/_BaseVoting/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for voting.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.", + "docstring": "Base class for voting.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.", + "code": "class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble):\n \"\"\"Base class for voting.\n\n Warning: This class should not be used directly. Use derived classes\n instead.\n \"\"\"\n\n def _log_message(self, name, idx, total):\n if not self.verbose:\n return None\n return '(%d of %d) Processing %s' % (idx, total, name)\n\n @property\n def _weights_not_none(self):\n \"\"\"Get the weights of not `None` estimators.\"\"\"\n if self.weights is None:\n return None\n return [w for est, w in zip(self.estimators, self.weights)\n if est[1] != 'drop']\n\n def _predict(self, X):\n \"\"\"Collect results from clf.predict calls.\"\"\"\n return np.asarray([est.predict(X) for est in self.estimators_]).T\n\n @abstractmethod\n def fit(self, X, y, sample_weight=None):\n \"\"\"Get common fit operations.\"\"\"\n names, clfs = self._validate_estimators()\n\n if (self.weights is not None and\n len(self.weights) != len(self.estimators)):\n raise ValueError('Number of `estimators` and weights must be equal'\n '; got %d weights, %d estimators'\n % (len(self.weights), len(self.estimators)))\n\n self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n delayed(_fit_single_estimator)(\n clone(clf), X, y,\n sample_weight=sample_weight,\n message_clsname='Voting',\n message=self._log_message(names[idx],\n idx + 1, len(clfs))\n )\n for idx, clf in enumerate(clfs) if clf != 'drop'\n )\n\n self.named_estimators_ = Bunch()\n\n # Uses 'drop' as placeholder for dropped estimators\n est_iter = iter(self.estimators_)\n for name, est in self.estimators:\n current_est = est if est == 'drop' else next(est_iter)\n self.named_estimators_[name] = current_est\n\n return self\n\n def fit_transform(self, X, y=None, **fit_params):\n \"\"\"Return class labels or probabilities for each estimator.\n\n Return predictions for X for each estimator.\n\n Parameters\n ----------\n X : {array-like, sparse matrix, dataframe} of shape \\\n (n_samples, n_features)\n Input samples\n\n y : ndarray of shape (n_samples,), default=None\n Target values (None for unsupervised transformations).\n\n **fit_params : dict\n Additional fit parameters.\n\n Returns\n -------\n X_new : ndarray array of shape (n_samples, n_features_new)\n Transformed array.\n \"\"\"\n return super().fit_transform(X, y, **fit_params)\n\n @property\n def n_features_in_(self):\n # For consistency with other estimators we raise a AttributeError so\n # that hasattr() fails if the estimator isn't fitted.\n try:\n check_is_fitted(self)\n except NotFittedError as nfe:\n raise AttributeError(\n \"{} object has no n_features_in_ attribute.\"\n .format(self.__class__.__name__)\n ) from nfe\n\n return self.estimators_[0].n_features_in_\n\n def _sk_visual_block_(self):\n names, estimators = zip(*self.estimators)\n return _VisualBlock('parallel', estimators, names=names)\n\n def _more_tags(self):\n return {\"preserves_dtype\": []}", + "instance_attributes": [ + { + "name": "named_estimators_", + "types": { + "kind": "NamedType", + "name": "Bunch" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier", + "name": "AdaBoostClassifier", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier", + "decorators": [], + "superclasses": ["ClassifierMixin", "BaseWeightBoosting"], + "methods": [ + "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/__init__", + "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/fit", + "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_validate_estimator", + "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost", + "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost_real", + "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost_discrete", + "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/predict", + "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/staged_predict", + "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/decision_function", + "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/staged_decision_function", + "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_compute_proba_from_decision", + "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/predict_proba", + "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/staged_predict_proba", + "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/predict_log_proba" + ], + "is_public": false, + "reexported_by": [], + "description": "An AdaBoost classifier.\n\nAn AdaBoost [1] classifier is a meta-estimator that begins by fitting a\nclassifier on the original dataset and then fits additional copies of the\nclassifier on the same dataset but where the weights of incorrectly\nclassified instances are adjusted such that subsequent classifiers focus\nmore on difficult cases.\n\nThis class implements the algorithm known as AdaBoost-SAMME [2].\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.14", + "docstring": "An AdaBoost classifier.\n\nAn AdaBoost [1] classifier is a meta-estimator that begins by fitting a\nclassifier on the original dataset and then fits additional copies of the\nclassifier on the same dataset but where the weights of incorrectly\nclassified instances are adjusted such that subsequent classifiers focus\nmore on difficult cases.\n\nThis class implements the algorithm known as AdaBoost-SAMME [2].\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.14\n\nParameters\n----------\nbase_estimator : object, default=None\n The base estimator from which the boosted ensemble is built.\n Support for sample weighting is required, as well as proper\n ``classes_`` and ``n_classes_`` attributes. If ``None``, then\n the base estimator is :class:`~sklearn.tree.DecisionTreeClassifier`\n initialized with `max_depth=1`.\n\nn_estimators : int, default=50\n The maximum number of estimators at which boosting is terminated.\n In case of perfect fit, the learning procedure is stopped early.\n\nlearning_rate : float, default=1.\n Weight applied to each classifier at each boosting iteration. A higher\n learning rate increases the contribution of each classifier. There is\n a trade-off between the `learning_rate` and `n_estimators` parameters.\n\nalgorithm : {'SAMME', 'SAMME.R'}, default='SAMME.R'\n If 'SAMME.R' then use the SAMME.R real boosting algorithm.\n ``base_estimator`` must support calculation of class probabilities.\n If 'SAMME' then use the SAMME discrete boosting algorithm.\n The SAMME.R algorithm typically converges faster than SAMME,\n achieving a lower test error with fewer boosting iterations.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the random seed given at each `base_estimator` at each\n boosting iteration.\n Thus, it is only used when `base_estimator` exposes a `random_state`.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nbase_estimator_ : estimator\n The base estimator from which the ensemble is grown.\n\nestimators_ : list of classifiers\n The collection of fitted sub-estimators.\n\nclasses_ : ndarray of shape (n_classes,)\n The classes labels.\n\nn_classes_ : int\n The number of classes.\n\nestimator_weights_ : ndarray of floats\n Weights for each estimator in the boosted ensemble.\n\nestimator_errors_ : ndarray of floats\n Classification error for each estimator in the boosted\n ensemble.\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances if supported by the\n ``base_estimator`` (when based on decision trees).\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nSee Also\n--------\nAdaBoostRegressor : An AdaBoost regressor that begins by fitting a\n regressor on the original dataset and then fits additional copies of\n the regressor on the same dataset but where the weights of instances\n are adjusted according to the error of the current prediction.\n\nGradientBoostingClassifier : GB builds an additive model in a forward\n stage-wise fashion. Regression trees are fit on the negative gradient\n of the binomial or multinomial deviance loss function. Binary\n classification is a special case where only a single regression tree is\n induced.\n\nsklearn.tree.DecisionTreeClassifier : A non-parametric supervised learning\n method used for classification.\n Creates a model that predicts the value of a target variable by\n learning simple decision rules inferred from the data features.\n\nReferences\n----------\n.. [1] Y. Freund, R. Schapire, \"A Decision-Theoretic Generalization of\n on-Line Learning and an Application to Boosting\", 1995.\n\n.. [2] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\", 2009.\n\nExamples\n--------\n>>> from sklearn.ensemble import AdaBoostClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_samples=1000, n_features=4,\n... n_informative=2, n_redundant=0,\n... random_state=0, shuffle=False)\n>>> clf = AdaBoostClassifier(n_estimators=100, random_state=0)\n>>> clf.fit(X, y)\nAdaBoostClassifier(n_estimators=100, random_state=0)\n>>> clf.predict([[0, 0, 0, 0]])\narray([1])\n>>> clf.score(X, y)\n0.983...", + "code": "class AdaBoostClassifier(ClassifierMixin, BaseWeightBoosting):\n \"\"\"An AdaBoost classifier.\n\n An AdaBoost [1] classifier is a meta-estimator that begins by fitting a\n classifier on the original dataset and then fits additional copies of the\n classifier on the same dataset but where the weights of incorrectly\n classified instances are adjusted such that subsequent classifiers focus\n more on difficult cases.\n\n This class implements the algorithm known as AdaBoost-SAMME [2].\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.14\n\n Parameters\n ----------\n base_estimator : object, default=None\n The base estimator from which the boosted ensemble is built.\n Support for sample weighting is required, as well as proper\n ``classes_`` and ``n_classes_`` attributes. If ``None``, then\n the base estimator is :class:`~sklearn.tree.DecisionTreeClassifier`\n initialized with `max_depth=1`.\n\n n_estimators : int, default=50\n The maximum number of estimators at which boosting is terminated.\n In case of perfect fit, the learning procedure is stopped early.\n\n learning_rate : float, default=1.\n Weight applied to each classifier at each boosting iteration. A higher\n learning rate increases the contribution of each classifier. There is\n a trade-off between the `learning_rate` and `n_estimators` parameters.\n\n algorithm : {'SAMME', 'SAMME.R'}, default='SAMME.R'\n If 'SAMME.R' then use the SAMME.R real boosting algorithm.\n ``base_estimator`` must support calculation of class probabilities.\n If 'SAMME' then use the SAMME discrete boosting algorithm.\n The SAMME.R algorithm typically converges faster than SAMME,\n achieving a lower test error with fewer boosting iterations.\n\n random_state : int, RandomState instance or None, default=None\n Controls the random seed given at each `base_estimator` at each\n boosting iteration.\n Thus, it is only used when `base_estimator` exposes a `random_state`.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Attributes\n ----------\n base_estimator_ : estimator\n The base estimator from which the ensemble is grown.\n\n estimators_ : list of classifiers\n The collection of fitted sub-estimators.\n\n classes_ : ndarray of shape (n_classes,)\n The classes labels.\n\n n_classes_ : int\n The number of classes.\n\n estimator_weights_ : ndarray of floats\n Weights for each estimator in the boosted ensemble.\n\n estimator_errors_ : ndarray of floats\n Classification error for each estimator in the boosted\n ensemble.\n\n feature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances if supported by the\n ``base_estimator`` (when based on decision trees).\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n See Also\n --------\n AdaBoostRegressor : An AdaBoost regressor that begins by fitting a\n regressor on the original dataset and then fits additional copies of\n the regressor on the same dataset but where the weights of instances\n are adjusted according to the error of the current prediction.\n\n GradientBoostingClassifier : GB builds an additive model in a forward\n stage-wise fashion. Regression trees are fit on the negative gradient\n of the binomial or multinomial deviance loss function. Binary\n classification is a special case where only a single regression tree is\n induced.\n\n sklearn.tree.DecisionTreeClassifier : A non-parametric supervised learning\n method used for classification.\n Creates a model that predicts the value of a target variable by\n learning simple decision rules inferred from the data features.\n\n References\n ----------\n .. [1] Y. Freund, R. Schapire, \"A Decision-Theoretic Generalization of\n on-Line Learning and an Application to Boosting\", 1995.\n\n .. [2] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\", 2009.\n\n Examples\n --------\n >>> from sklearn.ensemble import AdaBoostClassifier\n >>> from sklearn.datasets import make_classification\n >>> X, y = make_classification(n_samples=1000, n_features=4,\n ... n_informative=2, n_redundant=0,\n ... random_state=0, shuffle=False)\n >>> clf = AdaBoostClassifier(n_estimators=100, random_state=0)\n >>> clf.fit(X, y)\n AdaBoostClassifier(n_estimators=100, random_state=0)\n >>> clf.predict([[0, 0, 0, 0]])\n array([1])\n >>> clf.score(X, y)\n 0.983...\n \"\"\"\n @_deprecate_positional_args\n def __init__(self,\n base_estimator=None, *,\n n_estimators=50,\n learning_rate=1.,\n algorithm='SAMME.R',\n random_state=None):\n\n super().__init__(\n base_estimator=base_estimator,\n n_estimators=n_estimators,\n learning_rate=learning_rate,\n random_state=random_state)\n\n self.algorithm = algorithm\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Build a boosted classifier from the training set (X, y).\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n y : array-like of shape (n_samples,)\n The target values (class labels).\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, the sample weights are initialized to\n ``1 / n_samples``.\n\n Returns\n -------\n self : object\n Fitted estimator.\n \"\"\"\n # Check that algorithm is supported\n if self.algorithm not in ('SAMME', 'SAMME.R'):\n raise ValueError(\"algorithm %s is not supported\" % self.algorithm)\n\n # Fit\n return super().fit(X, y, sample_weight)\n\n def _validate_estimator(self):\n \"\"\"Check the estimator and set the base_estimator_ attribute.\"\"\"\n super()._validate_estimator(\n default=DecisionTreeClassifier(max_depth=1))\n\n # SAMME-R requires predict_proba-enabled base estimators\n if self.algorithm == 'SAMME.R':\n if not hasattr(self.base_estimator_, 'predict_proba'):\n raise TypeError(\n \"AdaBoostClassifier with algorithm='SAMME.R' requires \"\n \"that the weak learner supports the calculation of class \"\n \"probabilities with a predict_proba method.\\n\"\n \"Please change the base estimator or set \"\n \"algorithm='SAMME' instead.\")\n if not has_fit_parameter(self.base_estimator_, \"sample_weight\"):\n raise ValueError(\"%s doesn't support sample_weight.\"\n % self.base_estimator_.__class__.__name__)\n\n def _boost(self, iboost, X, y, sample_weight, random_state):\n \"\"\"Implement a single boost.\n\n Perform a single boost according to the real multi-class SAMME.R\n algorithm or to the discrete SAMME algorithm and return the updated\n sample weights.\n\n Parameters\n ----------\n iboost : int\n The index of the current boost iteration.\n\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples.\n\n y : array-like of shape (n_samples,)\n The target values (class labels).\n\n sample_weight : array-like of shape (n_samples,)\n The current sample weights.\n\n random_state : RandomState instance\n The RandomState instance used if the base estimator accepts a\n `random_state` attribute.\n\n Returns\n -------\n sample_weight : array-like of shape (n_samples,) or None\n The reweighted sample weights.\n If None then boosting has terminated early.\n\n estimator_weight : float\n The weight for the current boost.\n If None then boosting has terminated early.\n\n estimator_error : float\n The classification error for the current boost.\n If None then boosting has terminated early.\n \"\"\"\n if self.algorithm == 'SAMME.R':\n return self._boost_real(iboost, X, y, sample_weight, random_state)\n\n else: # elif self.algorithm == \"SAMME\":\n return self._boost_discrete(iboost, X, y, sample_weight,\n random_state)\n\n def _boost_real(self, iboost, X, y, sample_weight, random_state):\n \"\"\"Implement a single boost using the SAMME.R real algorithm.\"\"\"\n estimator = self._make_estimator(random_state=random_state)\n\n estimator.fit(X, y, sample_weight=sample_weight)\n\n y_predict_proba = estimator.predict_proba(X)\n\n if iboost == 0:\n self.classes_ = getattr(estimator, 'classes_', None)\n self.n_classes_ = len(self.classes_)\n\n y_predict = self.classes_.take(np.argmax(y_predict_proba, axis=1),\n axis=0)\n\n # Instances incorrectly classified\n incorrect = y_predict != y\n\n # Error fraction\n estimator_error = np.mean(\n np.average(incorrect, weights=sample_weight, axis=0))\n\n # Stop if classification is perfect\n if estimator_error <= 0:\n return sample_weight, 1., 0.\n\n # Construct y coding as described in Zhu et al [2]:\n #\n # y_k = 1 if c == k else -1 / (K - 1)\n #\n # where K == n_classes_ and c, k in [0, K) are indices along the second\n # axis of the y coding with c being the index corresponding to the true\n # class label.\n n_classes = self.n_classes_\n classes = self.classes_\n y_codes = np.array([-1. / (n_classes - 1), 1.])\n y_coding = y_codes.take(classes == y[:, np.newaxis])\n\n # Displace zero probabilities so the log is defined.\n # Also fix negative elements which may occur with\n # negative sample weights.\n proba = y_predict_proba # alias for readability\n np.clip(proba, np.finfo(proba.dtype).eps, None, out=proba)\n\n # Boost weight using multi-class AdaBoost SAMME.R alg\n estimator_weight = (-1. * self.learning_rate\n * ((n_classes - 1.) / n_classes)\n * xlogy(y_coding, y_predict_proba).sum(axis=1))\n\n # Only boost the weights if it will fit again\n if not iboost == self.n_estimators - 1:\n # Only boost positive weights\n sample_weight *= np.exp(estimator_weight *\n ((sample_weight > 0) |\n (estimator_weight < 0)))\n\n return sample_weight, 1., estimator_error\n\n def _boost_discrete(self, iboost, X, y, sample_weight, random_state):\n \"\"\"Implement a single boost using the SAMME discrete algorithm.\"\"\"\n estimator = self._make_estimator(random_state=random_state)\n\n estimator.fit(X, y, sample_weight=sample_weight)\n\n y_predict = estimator.predict(X)\n\n if iboost == 0:\n self.classes_ = getattr(estimator, 'classes_', None)\n self.n_classes_ = len(self.classes_)\n\n # Instances incorrectly classified\n incorrect = y_predict != y\n\n # Error fraction\n estimator_error = np.mean(\n np.average(incorrect, weights=sample_weight, axis=0))\n\n # Stop if classification is perfect\n if estimator_error <= 0:\n return sample_weight, 1., 0.\n\n n_classes = self.n_classes_\n\n # Stop if the error is at least as bad as random guessing\n if estimator_error >= 1. - (1. / n_classes):\n self.estimators_.pop(-1)\n if len(self.estimators_) == 0:\n raise ValueError('BaseClassifier in AdaBoostClassifier '\n 'ensemble is worse than random, ensemble '\n 'can not be fit.')\n return None, None, None\n\n # Boost weight using multi-class AdaBoost SAMME alg\n estimator_weight = self.learning_rate * (\n np.log((1. - estimator_error) / estimator_error) +\n np.log(n_classes - 1.))\n\n # Only boost the weights if I will fit again\n if not iboost == self.n_estimators - 1:\n # Only boost positive weights\n sample_weight *= np.exp(estimator_weight * incorrect *\n (sample_weight > 0))\n\n return sample_weight, estimator_weight, estimator_error\n\n def predict(self, X):\n \"\"\"Predict classes for X.\n\n The predicted class of an input sample is computed as the weighted mean\n prediction of the classifiers in the ensemble.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n Returns\n -------\n y : ndarray of shape (n_samples,)\n The predicted classes.\n \"\"\"\n X = self._check_X(X)\n\n pred = self.decision_function(X)\n\n if self.n_classes_ == 2:\n return self.classes_.take(pred > 0, axis=0)\n\n return self.classes_.take(np.argmax(pred, axis=1), axis=0)\n\n def staged_predict(self, X):\n \"\"\"Return staged predictions for X.\n\n The predicted class of an input sample is computed as the weighted mean\n prediction of the classifiers in the ensemble.\n\n This generator method yields the ensemble prediction after each\n iteration of boosting and therefore allows monitoring, such as to\n determine the prediction on a test set after each boost.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n Yields\n ------\n y : generator of ndarray of shape (n_samples,)\n The predicted classes.\n \"\"\"\n X = self._check_X(X)\n\n n_classes = self.n_classes_\n classes = self.classes_\n\n if n_classes == 2:\n for pred in self.staged_decision_function(X):\n yield np.array(classes.take(pred > 0, axis=0))\n\n else:\n for pred in self.staged_decision_function(X):\n yield np.array(classes.take(\n np.argmax(pred, axis=1), axis=0))\n\n def decision_function(self, X):\n \"\"\"Compute the decision function of ``X``.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n Returns\n -------\n score : ndarray of shape of (n_samples, k)\n The decision function of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute.\n Binary classification is a special cases with ``k == 1``,\n otherwise ``k==n_classes``. For binary classification,\n values closer to -1 or 1 mean more like the first or second\n class in ``classes_``, respectively.\n \"\"\"\n check_is_fitted(self)\n X = self._check_X(X)\n\n n_classes = self.n_classes_\n classes = self.classes_[:, np.newaxis]\n\n if self.algorithm == 'SAMME.R':\n # The weights are all 1. for SAMME.R\n pred = sum(_samme_proba(estimator, n_classes, X)\n for estimator in self.estimators_)\n else: # self.algorithm == \"SAMME\"\n pred = sum((estimator.predict(X) == classes).T * w\n for estimator, w in zip(self.estimators_,\n self.estimator_weights_))\n\n pred /= self.estimator_weights_.sum()\n if n_classes == 2:\n pred[:, 0] *= -1\n return pred.sum(axis=1)\n return pred\n\n def staged_decision_function(self, X):\n \"\"\"Compute decision function of ``X`` for each boosting iteration.\n\n This method allows monitoring (i.e. determine error on testing set)\n after each boosting iteration.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n Yields\n ------\n score : generator of ndarray of shape (n_samples, k)\n The decision function of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute.\n Binary classification is a special cases with ``k == 1``,\n otherwise ``k==n_classes``. For binary classification,\n values closer to -1 or 1 mean more like the first or second\n class in ``classes_``, respectively.\n \"\"\"\n check_is_fitted(self)\n X = self._check_X(X)\n\n n_classes = self.n_classes_\n classes = self.classes_[:, np.newaxis]\n pred = None\n norm = 0.\n\n for weight, estimator in zip(self.estimator_weights_,\n self.estimators_):\n norm += weight\n\n if self.algorithm == 'SAMME.R':\n # The weights are all 1. for SAMME.R\n current_pred = _samme_proba(estimator, n_classes, X)\n else: # elif self.algorithm == \"SAMME\":\n current_pred = estimator.predict(X)\n current_pred = (current_pred == classes).T * weight\n\n if pred is None:\n pred = current_pred\n else:\n pred += current_pred\n\n if n_classes == 2:\n tmp_pred = np.copy(pred)\n tmp_pred[:, 0] *= -1\n yield (tmp_pred / norm).sum(axis=1)\n else:\n yield pred / norm\n\n @staticmethod\n def _compute_proba_from_decision(decision, n_classes):\n \"\"\"Compute probabilities from the decision function.\n\n This is based eq. (4) of [1] where:\n p(y=c|X) = exp((1 / K-1) f_c(X)) / sum_k(exp((1 / K-1) f_k(X)))\n = softmax((1 / K-1) * f(X))\n\n References\n ----------\n .. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\",\n 2009.\n \"\"\"\n if n_classes == 2:\n decision = np.vstack([-decision, decision]).T / 2\n else:\n decision /= (n_classes - 1)\n return softmax(decision, copy=False)\n\n def predict_proba(self, X):\n \"\"\"Predict class probabilities for X.\n\n The predicted class probabilities of an input sample is computed as\n the weighted mean predicted class probabilities of the classifiers\n in the ensemble.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n Returns\n -------\n p : ndarray of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute.\n \"\"\"\n check_is_fitted(self)\n X = self._check_X(X)\n\n n_classes = self.n_classes_\n\n if n_classes == 1:\n return np.ones((_num_samples(X), 1))\n\n decision = self.decision_function(X)\n return self._compute_proba_from_decision(decision, n_classes)\n\n def staged_predict_proba(self, X):\n \"\"\"Predict class probabilities for X.\n\n The predicted class probabilities of an input sample is computed as\n the weighted mean predicted class probabilities of the classifiers\n in the ensemble.\n\n This generator method yields the ensemble predicted class probabilities\n after each iteration of boosting and therefore allows monitoring, such\n as to determine the predicted class probabilities on a test set after\n each boost.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n Yields\n -------\n p : generator of ndarray of shape (n_samples,)\n The class probabilities of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute.\n \"\"\"\n X = self._check_X(X)\n\n n_classes = self.n_classes_\n\n for decision in self.staged_decision_function(X):\n yield self._compute_proba_from_decision(decision, n_classes)\n\n def predict_log_proba(self, X):\n \"\"\"Predict class log-probabilities for X.\n\n The predicted class log-probabilities of an input sample is computed as\n the weighted mean predicted class log-probabilities of the classifiers\n in the ensemble.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n Returns\n -------\n p : ndarray of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute.\n \"\"\"\n X = self._check_X(X)\n return np.log(self.predict_proba(X))", + "instance_attributes": [ + { + "name": "algorithm", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor", + "name": "AdaBoostRegressor", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor", + "decorators": [], + "superclasses": ["RegressorMixin", "BaseWeightBoosting"], + "methods": [ + "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/__init__", + "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/fit", + "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/_validate_estimator", + "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/_boost", + "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/_get_median_predict", + "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/predict", + "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/staged_predict" + ], + "is_public": false, + "reexported_by": [], + "description": "An AdaBoost regressor.\n\nAn AdaBoost [1] regressor is a meta-estimator that begins by fitting a\nregressor on the original dataset and then fits additional copies of the\nregressor on the same dataset but where the weights of instances are\nadjusted according to the error of the current prediction. As such,\nsubsequent regressors focus more on difficult cases.\n\nThis class implements the algorithm known as AdaBoost.R2 [2].\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.14", + "docstring": "An AdaBoost regressor.\n\nAn AdaBoost [1] regressor is a meta-estimator that begins by fitting a\nregressor on the original dataset and then fits additional copies of the\nregressor on the same dataset but where the weights of instances are\nadjusted according to the error of the current prediction. As such,\nsubsequent regressors focus more on difficult cases.\n\nThis class implements the algorithm known as AdaBoost.R2 [2].\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.14\n\nParameters\n----------\nbase_estimator : object, default=None\n The base estimator from which the boosted ensemble is built.\n If ``None``, then the base estimator is\n :class:`~sklearn.tree.DecisionTreeRegressor` initialized with\n `max_depth=3`.\n\nn_estimators : int, default=50\n The maximum number of estimators at which boosting is terminated.\n In case of perfect fit, the learning procedure is stopped early.\n\nlearning_rate : float, default=1.\n Weight applied to each classifier at each boosting iteration. A higher\n learning rate increases the contribution of each classifier. There is\n a trade-off between the `learning_rate` and `n_estimators` parameters.\n\nloss : {'linear', 'square', 'exponential'}, default='linear'\n The loss function to use when updating the weights after each\n boosting iteration.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the random seed given at each `base_estimator` at each\n boosting iteration.\n Thus, it is only used when `base_estimator` exposes a `random_state`.\n In addition, it controls the bootstrap of the weights used to train the\n `base_estimator` at each boosting iteration.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nbase_estimator_ : estimator\n The base estimator from which the ensemble is grown.\n\nestimators_ : list of classifiers\n The collection of fitted sub-estimators.\n\nestimator_weights_ : ndarray of floats\n Weights for each estimator in the boosted ensemble.\n\nestimator_errors_ : ndarray of floats\n Regression error for each estimator in the boosted ensemble.\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances if supported by the\n ``base_estimator`` (when based on decision trees).\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nExamples\n--------\n>>> from sklearn.ensemble import AdaBoostRegressor\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_features=4, n_informative=2,\n... random_state=0, shuffle=False)\n>>> regr = AdaBoostRegressor(random_state=0, n_estimators=100)\n>>> regr.fit(X, y)\nAdaBoostRegressor(n_estimators=100, random_state=0)\n>>> regr.predict([[0, 0, 0, 0]])\narray([4.7972...])\n>>> regr.score(X, y)\n0.9771...\n\nSee Also\n--------\nAdaBoostClassifier, GradientBoostingRegressor,\nsklearn.tree.DecisionTreeRegressor\n\nReferences\n----------\n.. [1] Y. Freund, R. Schapire, \"A Decision-Theoretic Generalization of\n on-Line Learning and an Application to Boosting\", 1995.\n\n.. [2] H. Drucker, \"Improving Regressors using Boosting Techniques\", 1997.", + "code": "class AdaBoostRegressor(RegressorMixin, BaseWeightBoosting):\n \"\"\"An AdaBoost regressor.\n\n An AdaBoost [1] regressor is a meta-estimator that begins by fitting a\n regressor on the original dataset and then fits additional copies of the\n regressor on the same dataset but where the weights of instances are\n adjusted according to the error of the current prediction. As such,\n subsequent regressors focus more on difficult cases.\n\n This class implements the algorithm known as AdaBoost.R2 [2].\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.14\n\n Parameters\n ----------\n base_estimator : object, default=None\n The base estimator from which the boosted ensemble is built.\n If ``None``, then the base estimator is\n :class:`~sklearn.tree.DecisionTreeRegressor` initialized with\n `max_depth=3`.\n\n n_estimators : int, default=50\n The maximum number of estimators at which boosting is terminated.\n In case of perfect fit, the learning procedure is stopped early.\n\n learning_rate : float, default=1.\n Weight applied to each classifier at each boosting iteration. A higher\n learning rate increases the contribution of each classifier. There is\n a trade-off between the `learning_rate` and `n_estimators` parameters.\n\n loss : {'linear', 'square', 'exponential'}, default='linear'\n The loss function to use when updating the weights after each\n boosting iteration.\n\n random_state : int, RandomState instance or None, default=None\n Controls the random seed given at each `base_estimator` at each\n boosting iteration.\n Thus, it is only used when `base_estimator` exposes a `random_state`.\n In addition, it controls the bootstrap of the weights used to train the\n `base_estimator` at each boosting iteration.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Attributes\n ----------\n base_estimator_ : estimator\n The base estimator from which the ensemble is grown.\n\n estimators_ : list of classifiers\n The collection of fitted sub-estimators.\n\n estimator_weights_ : ndarray of floats\n Weights for each estimator in the boosted ensemble.\n\n estimator_errors_ : ndarray of floats\n Regression error for each estimator in the boosted ensemble.\n\n feature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances if supported by the\n ``base_estimator`` (when based on decision trees).\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n Examples\n --------\n >>> from sklearn.ensemble import AdaBoostRegressor\n >>> from sklearn.datasets import make_regression\n >>> X, y = make_regression(n_features=4, n_informative=2,\n ... random_state=0, shuffle=False)\n >>> regr = AdaBoostRegressor(random_state=0, n_estimators=100)\n >>> regr.fit(X, y)\n AdaBoostRegressor(n_estimators=100, random_state=0)\n >>> regr.predict([[0, 0, 0, 0]])\n array([4.7972...])\n >>> regr.score(X, y)\n 0.9771...\n\n See Also\n --------\n AdaBoostClassifier, GradientBoostingRegressor,\n sklearn.tree.DecisionTreeRegressor\n\n References\n ----------\n .. [1] Y. Freund, R. Schapire, \"A Decision-Theoretic Generalization of\n on-Line Learning and an Application to Boosting\", 1995.\n\n .. [2] H. Drucker, \"Improving Regressors using Boosting Techniques\", 1997.\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self,\n base_estimator=None, *,\n n_estimators=50,\n learning_rate=1.,\n loss='linear',\n random_state=None):\n\n super().__init__(\n base_estimator=base_estimator,\n n_estimators=n_estimators,\n learning_rate=learning_rate,\n random_state=random_state)\n\n self.loss = loss\n self.random_state = random_state\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Build a boosted regressor from the training set (X, y).\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n y : array-like of shape (n_samples,)\n The target values (real numbers).\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, the sample weights are initialized to\n 1 / n_samples.\n\n Returns\n -------\n self : object\n \"\"\"\n # Check loss\n if self.loss not in ('linear', 'square', 'exponential'):\n raise ValueError(\n \"loss must be 'linear', 'square', or 'exponential'\")\n\n # Fit\n return super().fit(X, y, sample_weight)\n\n def _validate_estimator(self):\n \"\"\"Check the estimator and set the base_estimator_ attribute.\"\"\"\n super()._validate_estimator(\n default=DecisionTreeRegressor(max_depth=3))\n\n def _boost(self, iboost, X, y, sample_weight, random_state):\n \"\"\"Implement a single boost for regression\n\n Perform a single boost according to the AdaBoost.R2 algorithm and\n return the updated sample weights.\n\n Parameters\n ----------\n iboost : int\n The index of the current boost iteration.\n\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples.\n\n y : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\n sample_weight : array-like of shape (n_samples,)\n The current sample weights.\n\n random_state : RandomState\n The RandomState instance used if the base estimator accepts a\n `random_state` attribute.\n Controls also the bootstrap of the weights used to train the weak\n learner.\n replacement.\n\n Returns\n -------\n sample_weight : array-like of shape (n_samples,) or None\n The reweighted sample weights.\n If None then boosting has terminated early.\n\n estimator_weight : float\n The weight for the current boost.\n If None then boosting has terminated early.\n\n estimator_error : float\n The regression error for the current boost.\n If None then boosting has terminated early.\n \"\"\"\n estimator = self._make_estimator(random_state=random_state)\n\n # Weighted sampling of the training set with replacement\n bootstrap_idx = random_state.choice(\n np.arange(_num_samples(X)), size=_num_samples(X), replace=True,\n p=sample_weight\n )\n\n # Fit on the bootstrapped sample and obtain a prediction\n # for all samples in the training set\n X_ = _safe_indexing(X, bootstrap_idx)\n y_ = _safe_indexing(y, bootstrap_idx)\n estimator.fit(X_, y_)\n y_predict = estimator.predict(X)\n\n error_vect = np.abs(y_predict - y)\n sample_mask = sample_weight > 0\n masked_sample_weight = sample_weight[sample_mask]\n masked_error_vector = error_vect[sample_mask]\n\n error_max = masked_error_vector.max()\n if error_max != 0:\n masked_error_vector /= error_max\n\n if self.loss == 'square':\n masked_error_vector **= 2\n elif self.loss == 'exponential':\n masked_error_vector = 1. - np.exp(-masked_error_vector)\n\n # Calculate the average loss\n estimator_error = (masked_sample_weight * masked_error_vector).sum()\n\n if estimator_error <= 0:\n # Stop if fit is perfect\n return sample_weight, 1., 0.\n\n elif estimator_error >= 0.5:\n # Discard current estimator only if it isn't the only one\n if len(self.estimators_) > 1:\n self.estimators_.pop(-1)\n return None, None, None\n\n beta = estimator_error / (1. - estimator_error)\n\n # Boost weight using AdaBoost.R2 alg\n estimator_weight = self.learning_rate * np.log(1. / beta)\n\n if not iboost == self.n_estimators - 1:\n sample_weight[sample_mask] *= np.power(\n beta, (1. - masked_error_vector) * self.learning_rate\n )\n\n return sample_weight, estimator_weight, estimator_error\n\n def _get_median_predict(self, X, limit):\n # Evaluate predictions of all estimators\n predictions = np.array([\n est.predict(X) for est in self.estimators_[:limit]]).T\n\n # Sort the predictions\n sorted_idx = np.argsort(predictions, axis=1)\n\n # Find index of median prediction for each sample\n weight_cdf = stable_cumsum(self.estimator_weights_[sorted_idx], axis=1)\n median_or_above = weight_cdf >= 0.5 * weight_cdf[:, -1][:, np.newaxis]\n median_idx = median_or_above.argmax(axis=1)\n\n median_estimators = sorted_idx[np.arange(_num_samples(X)), median_idx]\n\n # Return median predictions\n return predictions[np.arange(_num_samples(X)), median_estimators]\n\n def predict(self, X):\n \"\"\"Predict regression value for X.\n\n The predicted regression value of an input sample is computed\n as the weighted median prediction of the classifiers in the ensemble.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n Returns\n -------\n y : ndarray of shape (n_samples,)\n The predicted regression values.\n \"\"\"\n check_is_fitted(self)\n X = self._check_X(X)\n\n return self._get_median_predict(X, len(self.estimators_))\n\n def staged_predict(self, X):\n \"\"\"Return staged predictions for X.\n\n The predicted regression value of an input sample is computed\n as the weighted median prediction of the classifiers in the ensemble.\n\n This generator method yields the ensemble prediction after each\n iteration of boosting and therefore allows monitoring, such as to\n determine the prediction on a test set after each boost.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples.\n\n Yields\n -------\n y : generator of ndarray of shape (n_samples,)\n The predicted regression values.\n \"\"\"\n check_is_fitted(self)\n X = self._check_X(X)\n\n for i, _ in enumerate(self.estimators_, 1):\n yield self._get_median_predict(X, limit=i)", + "instance_attributes": [ + { + "name": "loss", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting", + "name": "BaseWeightBoosting", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting", + "decorators": [], + "superclasses": ["BaseEnsemble"], + "methods": [ + "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/__init__", + "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/_check_X", + "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/fit", + "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/_boost", + "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/staged_score", + "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/feature_importances_@getter" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for AdaBoost estimators.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.", + "docstring": "Base class for AdaBoost estimators.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.", + "code": "class BaseWeightBoosting(BaseEnsemble, metaclass=ABCMeta):\n \"\"\"Base class for AdaBoost estimators.\n\n Warning: This class should not be used directly. Use derived classes\n instead.\n \"\"\"\n\n @abstractmethod\n def __init__(self,\n base_estimator=None, *,\n n_estimators=50,\n estimator_params=tuple(),\n learning_rate=1.,\n random_state=None):\n\n super().__init__(\n base_estimator=base_estimator,\n n_estimators=n_estimators,\n estimator_params=estimator_params)\n\n self.learning_rate = learning_rate\n self.random_state = random_state\n\n def _check_X(self, X):\n return check_array(X, accept_sparse=['csr', 'csc'], ensure_2d=True,\n allow_nd=True, dtype=None)\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Build a boosted classifier/regressor from the training set (X, y).\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n y : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, the sample weights are initialized to\n 1 / n_samples.\n\n Returns\n -------\n self : object\n \"\"\"\n # Check parameters\n if self.learning_rate <= 0:\n raise ValueError(\"learning_rate must be greater than zero\")\n\n X, y = self._validate_data(X, y,\n accept_sparse=['csr', 'csc'],\n ensure_2d=True,\n allow_nd=True,\n dtype=None,\n y_numeric=is_regressor(self))\n\n sample_weight = _check_sample_weight(sample_weight, X, np.float64)\n sample_weight /= sample_weight.sum()\n if np.any(sample_weight < 0):\n raise ValueError(\"sample_weight cannot contain negative weights\")\n\n # Check parameters\n self._validate_estimator()\n\n # Clear any previous fit results\n self.estimators_ = []\n self.estimator_weights_ = np.zeros(self.n_estimators, dtype=np.float64)\n self.estimator_errors_ = np.ones(self.n_estimators, dtype=np.float64)\n\n # Initializion of the random number instance that will be used to\n # generate a seed at each iteration\n random_state = check_random_state(self.random_state)\n\n for iboost in range(self.n_estimators):\n # Boosting step\n sample_weight, estimator_weight, estimator_error = self._boost(\n iboost,\n X, y,\n sample_weight,\n random_state)\n\n # Early termination\n if sample_weight is None:\n break\n\n self.estimator_weights_[iboost] = estimator_weight\n self.estimator_errors_[iboost] = estimator_error\n\n # Stop if error is zero\n if estimator_error == 0:\n break\n\n sample_weight_sum = np.sum(sample_weight)\n\n # Stop if the sum of sample weights has become non-positive\n if sample_weight_sum <= 0:\n break\n\n if iboost < self.n_estimators - 1:\n # Normalize\n sample_weight /= sample_weight_sum\n\n return self\n\n @abstractmethod\n def _boost(self, iboost, X, y, sample_weight, random_state):\n \"\"\"Implement a single boost.\n\n Warning: This method needs to be overridden by subclasses.\n\n Parameters\n ----------\n iboost : int\n The index of the current boost iteration.\n\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n y : array-like of shape (n_samples,)\n The target values (class labels).\n\n sample_weight : array-like of shape (n_samples,)\n The current sample weights.\n\n random_state : RandomState\n The current random number generator\n\n Returns\n -------\n sample_weight : array-like of shape (n_samples,) or None\n The reweighted sample weights.\n If None then boosting has terminated early.\n\n estimator_weight : float\n The weight for the current boost.\n If None then boosting has terminated early.\n\n error : float\n The classification error for the current boost.\n If None then boosting has terminated early.\n \"\"\"\n pass\n\n def staged_score(self, X, y, sample_weight=None):\n \"\"\"Return staged scores for X, y.\n\n This generator method yields the ensemble score after each iteration of\n boosting and therefore allows monitoring, such as to determine the\n score on a test set after each boost.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n y : array-like of shape (n_samples,)\n Labels for X.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Yields\n ------\n z : float\n \"\"\"\n X = self._check_X(X)\n\n for y_pred in self.staged_predict(X):\n if is_classifier(self):\n yield accuracy_score(y, y_pred, sample_weight=sample_weight)\n else:\n yield r2_score(y, y_pred, sample_weight=sample_weight)\n\n @property\n def feature_importances_(self):\n \"\"\"The impurity-based feature importances.\n\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n Returns\n -------\n feature_importances_ : ndarray of shape (n_features,)\n The feature importances.\n \"\"\"\n if self.estimators_ is None or len(self.estimators_) == 0:\n raise ValueError(\"Estimator not fitted, \"\n \"call `fit` before `feature_importances_`.\")\n\n try:\n norm = self.estimator_weights_.sum()\n return (sum(weight * clf.feature_importances_ for weight, clf\n in zip(self.estimator_weights_, self.estimators_))\n / norm)\n\n except AttributeError as e:\n raise AttributeError(\n \"Unable to compute feature importances \"\n \"since base_estimator does not have a \"\n \"feature_importances_ attribute\") from e", + "instance_attributes": [ + { + "name": "learning_rate", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "estimators_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "estimator_weights_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "estimator_errors_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.exceptions/ChangedBehaviorWarning", + "name": "ChangedBehaviorWarning", + "qname": "sklearn.exceptions.ChangedBehaviorWarning", + "decorators": ["deprecated('ChangedBehaviorWarning is deprecated in 0.24 and will be removed in 1.1')"], + "superclasses": ["UserWarning"], + "methods": [], + "is_public": true, + "reexported_by": [], + "description": "Warning class used to notify the user of any change in the behavior.\n\n.. versionchanged:: 0.18\n Moved from sklearn.base.", + "docstring": "Warning class used to notify the user of any change in the behavior.\n\n.. versionchanged:: 0.18\n Moved from sklearn.base.", + "code": "class ChangedBehaviorWarning(UserWarning):\n \"\"\"Warning class used to notify the user of any change in the behavior.\n\n .. versionchanged:: 0.18\n Moved from sklearn.base.\n \"\"\"", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.exceptions/ConvergenceWarning", + "name": "ConvergenceWarning", + "qname": "sklearn.exceptions.ConvergenceWarning", + "decorators": [], + "superclasses": ["UserWarning"], + "methods": [], + "is_public": true, + "reexported_by": [], + "description": "Custom warning to capture convergence problems\n\n.. versionchanged:: 0.18\n Moved from sklearn.utils.", + "docstring": "Custom warning to capture convergence problems\n\n.. versionchanged:: 0.18\n Moved from sklearn.utils.", + "code": "class ConvergenceWarning(UserWarning):\n \"\"\"Custom warning to capture convergence problems\n\n .. versionchanged:: 0.18\n Moved from sklearn.utils.\n \"\"\"", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.exceptions/DataConversionWarning", + "name": "DataConversionWarning", + "qname": "sklearn.exceptions.DataConversionWarning", + "decorators": [], + "superclasses": ["UserWarning"], + "methods": [], + "is_public": true, + "reexported_by": [], + "description": "Warning used to notify implicit data conversions happening in the code.\n\nThis warning occurs when some input data needs to be converted or\ninterpreted in a way that may not match the user's expectations.\n\nFor example, this warning may occur when the user\n - passes an integer array to a function which expects float input and\n will convert the input\n - requests a non-copying operation, but a copy is required to meet the\n implementation's data-type expectations;\n - passes an input whose shape can be interpreted ambiguously.\n\n.. versionchanged:: 0.18\n Moved from sklearn.utils.validation.", + "docstring": "Warning used to notify implicit data conversions happening in the code.\n\nThis warning occurs when some input data needs to be converted or\ninterpreted in a way that may not match the user's expectations.\n\nFor example, this warning may occur when the user\n - passes an integer array to a function which expects float input and\n will convert the input\n - requests a non-copying operation, but a copy is required to meet the\n implementation's data-type expectations;\n - passes an input whose shape can be interpreted ambiguously.\n\n.. versionchanged:: 0.18\n Moved from sklearn.utils.validation.", + "code": "class DataConversionWarning(UserWarning):\n \"\"\"Warning used to notify implicit data conversions happening in the code.\n\n This warning occurs when some input data needs to be converted or\n interpreted in a way that may not match the user's expectations.\n\n For example, this warning may occur when the user\n - passes an integer array to a function which expects float input and\n will convert the input\n - requests a non-copying operation, but a copy is required to meet the\n implementation's data-type expectations;\n - passes an input whose shape can be interpreted ambiguously.\n\n .. versionchanged:: 0.18\n Moved from sklearn.utils.validation.\n \"\"\"", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.exceptions/DataDimensionalityWarning", + "name": "DataDimensionalityWarning", + "qname": "sklearn.exceptions.DataDimensionalityWarning", + "decorators": [], + "superclasses": ["UserWarning"], + "methods": [], + "is_public": true, + "reexported_by": [], + "description": "Custom warning to notify potential issues with data dimensionality.\n\nFor example, in random projection, this warning is raised when the\nnumber of components, which quantifies the dimensionality of the target\nprojection space, is higher than the number of features, which quantifies\nthe dimensionality of the original source space, to imply that the\ndimensionality of the problem will not be reduced.\n\n.. versionchanged:: 0.18\n Moved from sklearn.utils.", + "docstring": "Custom warning to notify potential issues with data dimensionality.\n\nFor example, in random projection, this warning is raised when the\nnumber of components, which quantifies the dimensionality of the target\nprojection space, is higher than the number of features, which quantifies\nthe dimensionality of the original source space, to imply that the\ndimensionality of the problem will not be reduced.\n\n.. versionchanged:: 0.18\n Moved from sklearn.utils.", + "code": "class DataDimensionalityWarning(UserWarning):\n \"\"\"Custom warning to notify potential issues with data dimensionality.\n\n For example, in random projection, this warning is raised when the\n number of components, which quantifies the dimensionality of the target\n projection space, is higher than the number of features, which quantifies\n the dimensionality of the original source space, to imply that the\n dimensionality of the problem will not be reduced.\n\n .. versionchanged:: 0.18\n Moved from sklearn.utils.\n \"\"\"", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.exceptions/EfficiencyWarning", + "name": "EfficiencyWarning", + "qname": "sklearn.exceptions.EfficiencyWarning", + "decorators": [], + "superclasses": ["UserWarning"], + "methods": [], + "is_public": true, + "reexported_by": [], + "description": "Warning used to notify the user of inefficient computation.\n\nThis warning notifies the user that the efficiency may not be optimal due\nto some reason which may be included as a part of the warning message.\nThis may be subclassed into a more specific Warning class.\n\n.. versionadded:: 0.18", + "docstring": "Warning used to notify the user of inefficient computation.\n\nThis warning notifies the user that the efficiency may not be optimal due\nto some reason which may be included as a part of the warning message.\nThis may be subclassed into a more specific Warning class.\n\n.. versionadded:: 0.18", + "code": "class EfficiencyWarning(UserWarning):\n \"\"\"Warning used to notify the user of inefficient computation.\n\n This warning notifies the user that the efficiency may not be optimal due\n to some reason which may be included as a part of the warning message.\n This may be subclassed into a more specific Warning class.\n\n .. versionadded:: 0.18\n \"\"\"", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.exceptions/FitFailedWarning", + "name": "FitFailedWarning", + "qname": "sklearn.exceptions.FitFailedWarning", + "decorators": [], + "superclasses": ["RuntimeWarning"], + "methods": [], + "is_public": true, + "reexported_by": [], + "description": "Warning class used if there is an error while fitting the estimator.\n\nThis Warning is used in meta estimators GridSearchCV and RandomizedSearchCV\nand the cross-validation helper function cross_val_score to warn when there\nis an error while fitting the estimator.\n\n.. versionchanged:: 0.18\n Moved from sklearn.cross_validation.", + "docstring": "Warning class used if there is an error while fitting the estimator.\n\nThis Warning is used in meta estimators GridSearchCV and RandomizedSearchCV\nand the cross-validation helper function cross_val_score to warn when there\nis an error while fitting the estimator.\n\n.. versionchanged:: 0.18\n Moved from sklearn.cross_validation.", + "code": "class FitFailedWarning(RuntimeWarning):\n \"\"\"Warning class used if there is an error while fitting the estimator.\n\n This Warning is used in meta estimators GridSearchCV and RandomizedSearchCV\n and the cross-validation helper function cross_val_score to warn when there\n is an error while fitting the estimator.\n\n .. versionchanged:: 0.18\n Moved from sklearn.cross_validation.\n \"\"\"", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.exceptions/NonBLASDotWarning", + "name": "NonBLASDotWarning", + "qname": "sklearn.exceptions.NonBLASDotWarning", + "decorators": ["deprecated('NonBLASDotWarning is deprecated in 0.24 and will be removed in 1.1')"], + "superclasses": ["EfficiencyWarning"], + "methods": [], + "is_public": true, + "reexported_by": [], + "description": "Warning used when the dot operation does not use BLAS.\n\nThis warning is used to notify the user that BLAS was not used for dot\noperation and hence the efficiency may be affected.\n\n.. versionchanged:: 0.18\n Moved from sklearn.utils.validation, extends EfficiencyWarning.", + "docstring": "Warning used when the dot operation does not use BLAS.\n\nThis warning is used to notify the user that BLAS was not used for dot\noperation and hence the efficiency may be affected.\n\n.. versionchanged:: 0.18\n Moved from sklearn.utils.validation, extends EfficiencyWarning.", + "code": "class NonBLASDotWarning(EfficiencyWarning):\n \"\"\"Warning used when the dot operation does not use BLAS.\n\n This warning is used to notify the user that BLAS was not used for dot\n operation and hence the efficiency may be affected.\n\n .. versionchanged:: 0.18\n Moved from sklearn.utils.validation, extends EfficiencyWarning.\n \"\"\"", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.exceptions/NotFittedError", + "name": "NotFittedError", + "qname": "sklearn.exceptions.NotFittedError", + "decorators": [], + "superclasses": ["ValueError", "AttributeError"], + "methods": [], + "is_public": true, + "reexported_by": [], + "description": "Exception class to raise if estimator is used before fitting.\n\nThis class inherits from both ValueError and AttributeError to help with\nexception handling and backward compatibility.", + "docstring": "Exception class to raise if estimator is used before fitting.\n\nThis class inherits from both ValueError and AttributeError to help with\nexception handling and backward compatibility.\n\nExamples\n--------\n>>> from sklearn.svm import LinearSVC\n>>> from sklearn.exceptions import NotFittedError\n>>> try:\n... LinearSVC().predict([[1, 2], [2, 3], [3, 4]])\n... except NotFittedError as e:\n... print(repr(e))\nNotFittedError(\"This LinearSVC instance is not fitted yet. Call 'fit' with\nappropriate arguments before using this estimator.\"...)\n\n.. versionchanged:: 0.18\n Moved from sklearn.utils.validation.", + "code": "class NotFittedError(ValueError, AttributeError):\n \"\"\"Exception class to raise if estimator is used before fitting.\n\n This class inherits from both ValueError and AttributeError to help with\n exception handling and backward compatibility.\n\n Examples\n --------\n >>> from sklearn.svm import LinearSVC\n >>> from sklearn.exceptions import NotFittedError\n >>> try:\n ... LinearSVC().predict([[1, 2], [2, 3], [3, 4]])\n ... except NotFittedError as e:\n ... print(repr(e))\n NotFittedError(\"This LinearSVC instance is not fitted yet. Call 'fit' with\n appropriate arguments before using this estimator.\"...)\n\n .. versionchanged:: 0.18\n Moved from sklearn.utils.validation.\n \"\"\"", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.exceptions/PositiveSpectrumWarning", + "name": "PositiveSpectrumWarning", + "qname": "sklearn.exceptions.PositiveSpectrumWarning", + "decorators": [], + "superclasses": ["UserWarning"], + "methods": [], + "is_public": true, + "reexported_by": [], + "description": "Warning raised when the eigenvalues of a PSD matrix have issues\n\nThis warning is typically raised by ``_check_psd_eigenvalues`` when the\neigenvalues of a positive semidefinite (PSD) matrix such as a gram matrix\n(kernel) present significant negative eigenvalues, or bad conditioning i.e.\nvery small non-zero eigenvalues compared to the largest eigenvalue.\n\n.. versionadded:: 0.22", + "docstring": "Warning raised when the eigenvalues of a PSD matrix have issues\n\nThis warning is typically raised by ``_check_psd_eigenvalues`` when the\neigenvalues of a positive semidefinite (PSD) matrix such as a gram matrix\n(kernel) present significant negative eigenvalues, or bad conditioning i.e.\nvery small non-zero eigenvalues compared to the largest eigenvalue.\n\n.. versionadded:: 0.22", + "code": "class PositiveSpectrumWarning(UserWarning):\n \"\"\"Warning raised when the eigenvalues of a PSD matrix have issues\n\n This warning is typically raised by ``_check_psd_eigenvalues`` when the\n eigenvalues of a positive semidefinite (PSD) matrix such as a gram matrix\n (kernel) present significant negative eigenvalues, or bad conditioning i.e.\n very small non-zero eigenvalues compared to the largest eigenvalue.\n\n .. versionadded:: 0.22\n \"\"\"", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.exceptions/SkipTestWarning", + "name": "SkipTestWarning", + "qname": "sklearn.exceptions.SkipTestWarning", + "decorators": [], + "superclasses": ["UserWarning"], + "methods": [], + "is_public": true, + "reexported_by": [], + "description": "Warning class used to notify the user of a test that was skipped.\n\nFor example, one of the estimator checks requires a pandas import.\nIf the pandas package cannot be imported, the test will be skipped rather\nthan register as a failure.", + "docstring": "Warning class used to notify the user of a test that was skipped.\n\nFor example, one of the estimator checks requires a pandas import.\nIf the pandas package cannot be imported, the test will be skipped rather\nthan register as a failure.", + "code": "class SkipTestWarning(UserWarning):\n \"\"\"Warning class used to notify the user of a test that was skipped.\n\n For example, one of the estimator checks requires a pandas import.\n If the pandas package cannot be imported, the test will be skipped rather\n than register as a failure.\n \"\"\"", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.exceptions/UndefinedMetricWarning", + "name": "UndefinedMetricWarning", + "qname": "sklearn.exceptions.UndefinedMetricWarning", + "decorators": [], + "superclasses": ["UserWarning"], + "methods": [], + "is_public": true, + "reexported_by": [], + "description": "Warning used when the metric is invalid\n\n.. versionchanged:: 0.18\n Moved from sklearn.base.", + "docstring": "Warning used when the metric is invalid\n\n.. versionchanged:: 0.18\n Moved from sklearn.base.", + "code": "class UndefinedMetricWarning(UserWarning):\n \"\"\"Warning used when the metric is invalid\n\n .. versionchanged:: 0.18\n Moved from sklearn.base.\n \"\"\"", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffContainerType", + "name": "ArffContainerType", + "qname": "sklearn.externals._arff.ArffContainerType", + "decorators": [], + "superclasses": ["TypedDict"], + "methods": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " class ArffContainerType(TypedDict):\n description: str\n relation: str\n attributes: List\n data: Union[ArffDenseDataType, ArffSparseDataType]", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffDecoder", + "name": "ArffDecoder", + "qname": "sklearn.externals._arff.ArffDecoder", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.externals._arff/ArffDecoder/__init__", + "scikit-learn/sklearn.externals._arff/ArffDecoder/_decode_comment", + "scikit-learn/sklearn.externals._arff/ArffDecoder/_decode_relation", + "scikit-learn/sklearn.externals._arff/ArffDecoder/_decode_attribute", + "scikit-learn/sklearn.externals._arff/ArffDecoder/_decode", + "scikit-learn/sklearn.externals._arff/ArffDecoder/decode" + ], + "is_public": false, + "reexported_by": [], + "description": "An ARFF decoder.", + "docstring": "An ARFF decoder.", + "code": "class ArffDecoder:\n '''An ARFF decoder.'''\n\n def __init__(self):\n '''Constructor.'''\n self._conversors = []\n self._current_line = 0\n\n def _decode_comment(self, s):\n '''(INTERNAL) Decodes a comment line.\n\n Comments are single line strings starting, obligatorily, with the ``%``\n character, and can have any symbol, including whitespaces or special\n characters.\n\n This method must receive a normalized string, i.e., a string without\n padding, including the \"\\r\\n\" characters.\n\n :param s: a normalized string.\n :return: a string with the decoded comment.\n '''\n res = re.sub(r'^\\%( )?', '', s)\n return res\n\n def _decode_relation(self, s):\n '''(INTERNAL) Decodes a relation line.\n\n The relation declaration is a line with the format ``@RELATION\n ``, where ``relation-name`` is a string. The string must\n start with alphabetic character and must be quoted if the name includes\n spaces, otherwise this method will raise a `BadRelationFormat` exception.\n\n This method must receive a normalized string, i.e., a string without\n padding, including the \"\\r\\n\" characters.\n\n :param s: a normalized string.\n :return: a string with the decoded relation name.\n '''\n _, v = s.split(' ', 1)\n v = v.strip()\n\n if not _RE_RELATION.match(v):\n raise BadRelationFormat()\n\n res = str(v.strip('\"\\''))\n return res\n\n def _decode_attribute(self, s):\n '''(INTERNAL) Decodes an attribute line.\n\n The attribute is the most complex declaration in an arff file. All\n attributes must follow the template::\n\n @attribute \n\n where ``attribute-name`` is a string, quoted if the name contains any\n whitespace, and ``datatype`` can be:\n\n - Numerical attributes as ``NUMERIC``, ``INTEGER`` or ``REAL``.\n - Strings as ``STRING``.\n - Dates (NOT IMPLEMENTED).\n - Nominal attributes with format:\n\n {, , , ...}\n\n The nominal names follow the rules for the attribute names, i.e., they\n must be quoted if the name contains whitespaces.\n\n This method must receive a normalized string, i.e., a string without\n padding, including the \"\\r\\n\" characters.\n\n :param s: a normalized string.\n :return: a tuple (ATTRIBUTE_NAME, TYPE_OR_VALUES).\n '''\n _, v = s.split(' ', 1)\n v = v.strip()\n\n # Verify the general structure of declaration\n m = _RE_ATTRIBUTE.match(v)\n if not m:\n raise BadAttributeFormat()\n\n # Extracts the raw name and type\n name, type_ = m.groups()\n\n # Extracts the final name\n name = str(name.strip('\"\\''))\n\n # Extracts the final type\n if type_[:1] == \"{\" and type_[-1:] == \"}\":\n try:\n type_ = _parse_values(type_.strip('{} '))\n except Exception:\n raise BadAttributeType()\n if isinstance(type_, dict):\n raise BadAttributeType()\n\n else:\n # If not nominal, verify the type name\n type_ = str(type_).upper()\n if type_ not in ['NUMERIC', 'REAL', 'INTEGER', 'STRING']:\n raise BadAttributeType()\n\n return (name, type_)\n\n def _decode(self, s, encode_nominal=False, matrix_type=DENSE):\n '''Do the job the ``encode``.'''\n\n # Make sure this method is idempotent\n self._current_line = 0\n\n # If string, convert to a list of lines\n if isinstance(s, str):\n s = s.strip('\\r\\n ').replace('\\r\\n', '\\n').split('\\n')\n\n # Create the return object\n obj: ArffContainerType = {\n 'description': '',\n 'relation': '',\n 'attributes': [],\n 'data': []\n }\n attribute_names = {}\n\n # Create the data helper object\n data = _get_data_object_for_decoding(matrix_type)\n\n # Read all lines\n STATE = _TK_DESCRIPTION\n s = iter(s)\n for row in s:\n self._current_line += 1\n # Ignore empty lines\n row = row.strip(' \\r\\n')\n if not row: continue\n\n u_row = row.upper()\n\n # DESCRIPTION -----------------------------------------------------\n if u_row.startswith(_TK_DESCRIPTION) and STATE == _TK_DESCRIPTION:\n obj['description'] += self._decode_comment(row) + '\\n'\n # -----------------------------------------------------------------\n\n # RELATION --------------------------------------------------------\n elif u_row.startswith(_TK_RELATION):\n if STATE != _TK_DESCRIPTION:\n raise BadLayout()\n\n STATE = _TK_RELATION\n obj['relation'] = self._decode_relation(row)\n # -----------------------------------------------------------------\n\n # ATTRIBUTE -------------------------------------------------------\n elif u_row.startswith(_TK_ATTRIBUTE):\n if STATE != _TK_RELATION and STATE != _TK_ATTRIBUTE:\n raise BadLayout()\n\n STATE = _TK_ATTRIBUTE\n\n attr = self._decode_attribute(row)\n if attr[0] in attribute_names:\n raise BadAttributeName(attr[0], attribute_names[attr[0]])\n else:\n attribute_names[attr[0]] = self._current_line\n obj['attributes'].append(attr)\n\n if isinstance(attr[1], (list, tuple)):\n if encode_nominal:\n conversor = EncodedNominalConversor(attr[1])\n else:\n conversor = NominalConversor(attr[1])\n else:\n CONVERSOR_MAP = {'STRING': str,\n 'INTEGER': lambda x: int(float(x)),\n 'NUMERIC': float,\n 'REAL': float}\n conversor = CONVERSOR_MAP[attr[1]]\n\n self._conversors.append(conversor)\n # -----------------------------------------------------------------\n\n # DATA ------------------------------------------------------------\n elif u_row.startswith(_TK_DATA):\n if STATE != _TK_ATTRIBUTE:\n raise BadLayout()\n\n break\n # -----------------------------------------------------------------\n\n # COMMENT ---------------------------------------------------------\n elif u_row.startswith(_TK_COMMENT):\n pass\n # -----------------------------------------------------------------\n else:\n # Never found @DATA\n raise BadLayout()\n\n def stream():\n for row in s:\n self._current_line += 1\n row = row.strip()\n # Ignore empty lines and comment lines.\n if row and not row.startswith(_TK_COMMENT):\n yield row\n\n # Alter the data object\n obj['data'] = data.decode_rows(stream(), self._conversors)\n if obj['description'].endswith('\\n'):\n obj['description'] = obj['description'][:-1]\n\n return obj\n\n def decode(self, s, encode_nominal=False, return_type=DENSE):\n '''Returns the Python representation of a given ARFF file.\n\n When a file object is passed as an argument, this method reads lines\n iteratively, avoiding to load unnecessary information to the memory.\n\n :param s: a string or file object with the ARFF file.\n :param encode_nominal: boolean, if True perform a label encoding\n while reading the .arff file.\n :param return_type: determines the data structure used to store the\n dataset. Can be one of `arff.DENSE`, `arff.COO`, `arff.LOD`,\n `arff.DENSE_GEN` or `arff.LOD_GEN`.\n Consult the sections on `working with sparse data`_ and `loading\n progressively`_.\n '''\n try:\n return self._decode(s, encode_nominal=encode_nominal,\n matrix_type=return_type)\n except ArffException as e:\n e.line = self._current_line\n raise e", + "instance_attributes": [ + { + "name": "_conversors", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "_current_line", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffEncoder", + "name": "ArffEncoder", + "qname": "sklearn.externals._arff.ArffEncoder", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.externals._arff/ArffEncoder/_encode_comment", + "scikit-learn/sklearn.externals._arff/ArffEncoder/_encode_relation", + "scikit-learn/sklearn.externals._arff/ArffEncoder/_encode_attribute", + "scikit-learn/sklearn.externals._arff/ArffEncoder/encode", + "scikit-learn/sklearn.externals._arff/ArffEncoder/iter_encode" + ], + "is_public": false, + "reexported_by": [], + "description": "An ARFF encoder.", + "docstring": "An ARFF encoder.", + "code": "class ArffEncoder:\n '''An ARFF encoder.'''\n\n def _encode_comment(self, s=''):\n '''(INTERNAL) Encodes a comment line.\n\n Comments are single line strings starting, obligatorily, with the ``%``\n character, and can have any symbol, including whitespaces or special\n characters.\n\n If ``s`` is None, this method will simply return an empty comment.\n\n :param s: (OPTIONAL) string.\n :return: a string with the encoded comment line.\n '''\n if s:\n return '%s %s'%(_TK_COMMENT, s)\n else:\n return '%s' % _TK_COMMENT\n\n def _encode_relation(self, name):\n '''(INTERNAL) Decodes a relation line.\n\n The relation declaration is a line with the format ``@RELATION\n ``, where ``relation-name`` is a string.\n\n :param name: a string.\n :return: a string with the encoded relation declaration.\n '''\n for char in ' %{},':\n if char in name:\n name = '\"%s\"'%name\n break\n\n return '%s %s'%(_TK_RELATION, name)\n\n def _encode_attribute(self, name, type_):\n '''(INTERNAL) Encodes an attribute line.\n\n The attribute follow the template::\n\n @attribute \n\n where ``attribute-name`` is a string, and ``datatype`` can be:\n\n - Numerical attributes as ``NUMERIC``, ``INTEGER`` or ``REAL``.\n - Strings as ``STRING``.\n - Dates (NOT IMPLEMENTED).\n - Nominal attributes with format:\n\n {, , , ...}\n\n This method must receive a the name of the attribute and its type, if\n the attribute type is nominal, ``type`` must be a list of values.\n\n :param name: a string.\n :param type_: a string or a list of string.\n :return: a string with the encoded attribute declaration.\n '''\n for char in ' %{},':\n if char in name:\n name = '\"%s\"'%name\n break\n\n if isinstance(type_, (tuple, list)):\n type_tmp = ['%s' % encode_string(type_k) for type_k in type_]\n type_ = '{%s}'%(', '.join(type_tmp))\n\n return '%s %s %s'%(_TK_ATTRIBUTE, name, type_)\n\n def encode(self, obj):\n '''Encodes a given object to an ARFF file.\n\n :param obj: the object containing the ARFF information.\n :return: the ARFF file as an string.\n '''\n data = [row for row in self.iter_encode(obj)]\n\n return '\\n'.join(data)\n\n def iter_encode(self, obj):\n '''The iterative version of `arff.ArffEncoder.encode`.\n\n This encodes iteratively a given object and return, one-by-one, the\n lines of the ARFF file.\n\n :param obj: the object containing the ARFF information.\n :return: (yields) the ARFF file as strings.\n '''\n # DESCRIPTION\n if obj.get('description', None):\n for row in obj['description'].split('\\n'):\n yield self._encode_comment(row)\n\n # RELATION\n if not obj.get('relation'):\n raise BadObject('Relation name not found or with invalid value.')\n\n yield self._encode_relation(obj['relation'])\n yield ''\n\n # ATTRIBUTES\n if not obj.get('attributes'):\n raise BadObject('Attributes not found.')\n\n attribute_names = set()\n for attr in obj['attributes']:\n # Verify for bad object format\n if not isinstance(attr, (tuple, list)) or \\\n len(attr) != 2 or \\\n not isinstance(attr[0], str):\n raise BadObject('Invalid attribute declaration \"%s\"'%str(attr))\n\n if isinstance(attr[1], str):\n # Verify for invalid types\n if attr[1] not in _SIMPLE_TYPES:\n raise BadObject('Invalid attribute type \"%s\"'%str(attr))\n\n # Verify for bad object format\n elif not isinstance(attr[1], (tuple, list)):\n raise BadObject('Invalid attribute type \"%s\"'%str(attr))\n\n # Verify attribute name is not used twice\n if attr[0] in attribute_names:\n raise BadObject('Trying to use attribute name \"%s\" for the '\n 'second time.' % str(attr[0]))\n else:\n attribute_names.add(attr[0])\n\n yield self._encode_attribute(attr[0], attr[1])\n yield ''\n attributes = obj['attributes']\n\n # DATA\n yield _TK_DATA\n if 'data' in obj:\n data = _get_data_object_for_encoding(obj.get('data'))\n yield from data.encode_data(obj.get('data'), attributes)\n\n yield ''", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffException", + "name": "ArffException", + "qname": "sklearn.externals._arff.ArffException", + "decorators": [], + "superclasses": ["Exception"], + "methods": [ + "scikit-learn/sklearn.externals._arff/ArffException/__init__", + "scikit-learn/sklearn.externals._arff/ArffException/__str__" + ], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class ArffException(Exception):\n message: Optional[str] = None\n\n def __init__(self):\n self.line = -1\n\n def __str__(self):\n return self.message%self.line", + "instance_attributes": [ + { + "name": "line", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadAttributeFormat", + "name": "BadAttributeFormat", + "qname": "sklearn.externals._arff.BadAttributeFormat", + "decorators": [], + "superclasses": ["ArffException"], + "methods": [], + "is_public": false, + "reexported_by": [], + "description": "Error raised when some attribute declaration is in an invalid format.", + "docstring": "Error raised when some attribute declaration is in an invalid format.", + "code": "class BadAttributeFormat(ArffException):\n '''Error raised when some attribute declaration is in an invalid format.'''\n message = 'Bad @ATTRIBUTE format, at line %d.'", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadAttributeName", + "name": "BadAttributeName", + "qname": "sklearn.externals._arff.BadAttributeName", + "decorators": [], + "superclasses": ["ArffException"], + "methods": ["scikit-learn/sklearn.externals._arff/BadAttributeName/__init__"], + "is_public": false, + "reexported_by": [], + "description": "Error raised when an attribute name is provided twice the attribute\ndeclaration.", + "docstring": "Error raised when an attribute name is provided twice the attribute\ndeclaration.", + "code": "class BadAttributeName(ArffException):\n '''Error raised when an attribute name is provided twice the attribute\n declaration.'''\n\n def __init__(self, value, value2):\n super().__init__()\n self.message = (\n ('Bad @ATTRIBUTE name %s at line' % value) +\n ' %d, this name is already in use in line' +\n (' %d.' % value2)\n )", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadAttributeType", + "name": "BadAttributeType", + "qname": "sklearn.externals._arff.BadAttributeType", + "decorators": [], + "superclasses": ["ArffException"], + "methods": [], + "is_public": false, + "reexported_by": [], + "description": "Error raised when some invalid type is provided into the attribute\ndeclaration.", + "docstring": "Error raised when some invalid type is provided into the attribute\ndeclaration.", + "code": "class BadAttributeType(ArffException):\n '''Error raised when some invalid type is provided into the attribute\n declaration.'''\n message = 'Bad @ATTRIBUTE type, at line %d.'", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadDataFormat", + "name": "BadDataFormat", + "qname": "sklearn.externals._arff.BadDataFormat", + "decorators": [], + "superclasses": ["ArffException"], + "methods": ["scikit-learn/sklearn.externals._arff/BadDataFormat/__init__"], + "is_public": false, + "reexported_by": [], + "description": "Error raised when some data instance is in an invalid format.", + "docstring": "Error raised when some data instance is in an invalid format.", + "code": "class BadDataFormat(ArffException):\n '''Error raised when some data instance is in an invalid format.'''\n def __init__(self, value):\n super().__init__()\n self.message = (\n 'Bad @DATA instance format in line %d: ' +\n ('%s' % value)\n )", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadLayout", + "name": "BadLayout", + "qname": "sklearn.externals._arff.BadLayout", + "decorators": [], + "superclasses": ["ArffException"], + "methods": ["scikit-learn/sklearn.externals._arff/BadLayout/__init__"], + "is_public": false, + "reexported_by": [], + "description": "Error raised when the layout of the ARFF file has something wrong.", + "docstring": "Error raised when the layout of the ARFF file has something wrong.", + "code": "class BadLayout(ArffException):\n '''Error raised when the layout of the ARFF file has something wrong.'''\n message = 'Invalid layout of the ARFF file, at line %d.'\n\n def __init__(self, msg=''):\n super().__init__()\n if msg:\n self.message = BadLayout.message + ' ' + msg.replace('%', '%%')", + "instance_attributes": [ + { + "name": "message", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadNominalFormatting", + "name": "BadNominalFormatting", + "qname": "sklearn.externals._arff.BadNominalFormatting", + "decorators": [], + "superclasses": ["ArffException"], + "methods": ["scikit-learn/sklearn.externals._arff/BadNominalFormatting/__init__"], + "is_public": false, + "reexported_by": [], + "description": "Error raised when a nominal value with space is not properly quoted.", + "docstring": "Error raised when a nominal value with space is not properly quoted.", + "code": "class BadNominalFormatting(ArffException):\n '''Error raised when a nominal value with space is not properly quoted.'''\n def __init__(self, value):\n super().__init__()\n self.message = (\n ('Nominal data value \"%s\" not properly quoted in line ' % value) +\n '%d.'\n )", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadNominalValue", + "name": "BadNominalValue", + "qname": "sklearn.externals._arff.BadNominalValue", + "decorators": [], + "superclasses": ["ArffException"], + "methods": ["scikit-learn/sklearn.externals._arff/BadNominalValue/__init__"], + "is_public": false, + "reexported_by": [], + "description": "Error raised when a value in used in some data instance but is not\ndeclared into it respective attribute declaration.", + "docstring": "Error raised when a value in used in some data instance but is not\ndeclared into it respective attribute declaration.", + "code": "class BadNominalValue(ArffException):\n '''Error raised when a value in used in some data instance but is not\n declared into it respective attribute declaration.'''\n\n def __init__(self, value):\n super().__init__()\n self.message = (\n ('Data value %s not found in nominal declaration, ' % value)\n + 'at line %d.'\n )", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadNumericalValue", + "name": "BadNumericalValue", + "qname": "sklearn.externals._arff.BadNumericalValue", + "decorators": [], + "superclasses": ["ArffException"], + "methods": [], + "is_public": false, + "reexported_by": [], + "description": "Error raised when and invalid numerical value is used in some data\ninstance.", + "docstring": "Error raised when and invalid numerical value is used in some data\ninstance.", + "code": "class BadNumericalValue(ArffException):\n '''Error raised when and invalid numerical value is used in some data\n instance.'''\n message = 'Invalid numerical value, at line %d.'", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadObject", + "name": "BadObject", + "qname": "sklearn.externals._arff.BadObject", + "decorators": [], + "superclasses": ["ArffException"], + "methods": [ + "scikit-learn/sklearn.externals._arff/BadObject/__init__", + "scikit-learn/sklearn.externals._arff/BadObject/__str__" + ], + "is_public": false, + "reexported_by": [], + "description": "Error raised when the object representing the ARFF file has something\nwrong.", + "docstring": "Error raised when the object representing the ARFF file has something\nwrong.", + "code": "class BadObject(ArffException):\n '''Error raised when the object representing the ARFF file has something\n wrong.'''\n def __init__(self, msg='Invalid object.'):\n self.msg = msg\n\n def __str__(self):\n return '%s' % self.msg", + "instance_attributes": [ + { + "name": "msg", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadRelationFormat", + "name": "BadRelationFormat", + "qname": "sklearn.externals._arff.BadRelationFormat", + "decorators": [], + "superclasses": ["ArffException"], + "methods": [], + "is_public": false, + "reexported_by": [], + "description": "Error raised when the relation declaration is in an invalid format.", + "docstring": "Error raised when the relation declaration is in an invalid format.", + "code": "class BadRelationFormat(ArffException):\n '''Error raised when the relation declaration is in an invalid format.'''\n message = 'Bad @RELATION format, at line %d.'", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadStringValue", + "name": "BadStringValue", + "qname": "sklearn.externals._arff.BadStringValue", + "decorators": [], + "superclasses": ["ArffException"], + "methods": [], + "is_public": false, + "reexported_by": [], + "description": "Error raise when a string contains space but is not quoted.", + "docstring": "Error raise when a string contains space but is not quoted.", + "code": "class BadStringValue(ArffException):\n '''Error raise when a string contains space but is not quoted.'''\n message = 'Invalid string value at line %d.'", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.externals._arff/COOData", + "name": "COOData", + "qname": "sklearn.externals._arff.COOData", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.externals._arff/COOData/decode_rows", + "scikit-learn/sklearn.externals._arff/COOData/encode_data" + ], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class COOData:\n def decode_rows(self, stream, conversors):\n data, rows, cols = [], [], []\n for i, row in enumerate(stream):\n values = _parse_values(row)\n if not isinstance(values, dict):\n raise BadLayout()\n if not values:\n continue\n row_cols, values = zip(*sorted(values.items()))\n try:\n values = [value if value is None else conversors[key](value)\n for key, value in zip(row_cols, values)]\n except ValueError as exc:\n if 'float: ' in str(exc):\n raise BadNumericalValue()\n raise\n except IndexError:\n # conversor out of range\n raise BadDataFormat(row)\n\n data.extend(values)\n rows.extend([i] * len(values))\n cols.extend(row_cols)\n\n return data, rows, cols\n\n def encode_data(self, data, attributes):\n num_attributes = len(attributes)\n new_data = []\n current_row = 0\n\n row = data.row\n col = data.col\n data = data.data\n\n # Check if the rows are sorted\n if not all(row[i] <= row[i + 1] for i in range(len(row) - 1)):\n raise ValueError(\"liac-arff can only output COO matrices with \"\n \"sorted rows.\")\n\n for v, col, row in zip(data, col, row):\n if row > current_row:\n # Add empty rows if necessary\n while current_row < row:\n yield \" \".join([\"{\", ','.join(new_data), \"}\"])\n new_data = []\n current_row += 1\n\n if col >= num_attributes:\n raise BadObject(\n 'Instance %d has at least %d attributes, expected %d' %\n (current_row, col + 1, num_attributes)\n )\n\n if v is None or v == '' or v != v:\n s = '?'\n else:\n s = encode_string(str(v))\n new_data.append(\"%d %s\" % (col, s))\n\n yield \" \".join([\"{\", ','.join(new_data), \"}\"])", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.externals._arff/Data", + "name": "Data", + "qname": "sklearn.externals._arff.Data", + "decorators": [], + "superclasses": ["_DataListMixin", "DenseGeneratorData"], + "methods": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class Data(_DataListMixin, DenseGeneratorData):\n pass", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.externals._arff/DenseGeneratorData", + "name": "DenseGeneratorData", + "qname": "sklearn.externals._arff.DenseGeneratorData", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.externals._arff/DenseGeneratorData/decode_rows", + "scikit-learn/sklearn.externals._arff/DenseGeneratorData/_decode_values", + "scikit-learn/sklearn.externals._arff/DenseGeneratorData/encode_data" + ], + "is_public": false, + "reexported_by": [], + "description": "Internal helper class to allow for different matrix types without\nmaking the code a huge collection of if statements.", + "docstring": "Internal helper class to allow for different matrix types without\nmaking the code a huge collection of if statements.", + "code": "class DenseGeneratorData:\n '''Internal helper class to allow for different matrix types without\n making the code a huge collection of if statements.'''\n\n def decode_rows(self, stream, conversors):\n for row in stream:\n values = _parse_values(row)\n\n if isinstance(values, dict):\n if values and max(values) >= len(conversors):\n raise BadDataFormat(row)\n # XXX: int 0 is used for implicit values, not '0'\n values = [values[i] if i in values else 0 for i in\n range(len(conversors))]\n else:\n if len(values) != len(conversors):\n raise BadDataFormat(row)\n\n yield self._decode_values(values, conversors)\n\n @staticmethod\n def _decode_values(values, conversors):\n try:\n values = [None if value is None else conversor(value)\n for conversor, value\n in zip(conversors, values)]\n except ValueError as exc:\n if 'float: ' in str(exc):\n raise BadNumericalValue()\n return values\n\n def encode_data(self, data, attributes):\n '''(INTERNAL) Encodes a line of data.\n\n Data instances follow the csv format, i.e, attribute values are\n delimited by commas. After converted from csv.\n\n :param data: a list of values.\n :param attributes: a list of attributes. Used to check if data is valid.\n :return: a string with the encoded data line.\n '''\n current_row = 0\n\n for inst in data:\n if len(inst) != len(attributes):\n raise BadObject(\n 'Instance %d has %d attributes, expected %d' %\n (current_row, len(inst), len(attributes))\n )\n\n new_data = []\n for value in inst:\n if value is None or value == '' or value != value:\n s = '?'\n else:\n s = encode_string(str(value))\n new_data.append(s)\n\n current_row += 1\n yield ','.join(new_data)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.externals._arff/EncodedNominalConversor", + "name": "EncodedNominalConversor", + "qname": "sklearn.externals._arff.EncodedNominalConversor", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.externals._arff/EncodedNominalConversor/__init__", + "scikit-learn/sklearn.externals._arff/EncodedNominalConversor/__call__" + ], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class EncodedNominalConversor:\n def __init__(self, values):\n self.values = {v: i for i, v in enumerate(values)}\n self.values[0] = 0\n\n def __call__(self, value):\n try:\n return self.values[value]\n except KeyError:\n raise BadNominalValue(value)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.externals._arff/LODData", + "name": "LODData", + "qname": "sklearn.externals._arff.LODData", + "decorators": [], + "superclasses": ["_DataListMixin", "LODGeneratorData"], + "methods": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class LODData(_DataListMixin, LODGeneratorData):\n pass", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.externals._arff/LODGeneratorData", + "name": "LODGeneratorData", + "qname": "sklearn.externals._arff.LODGeneratorData", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.externals._arff/LODGeneratorData/decode_rows", + "scikit-learn/sklearn.externals._arff/LODGeneratorData/encode_data" + ], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class LODGeneratorData:\n def decode_rows(self, stream, conversors):\n for row in stream:\n values = _parse_values(row)\n\n if not isinstance(values, dict):\n raise BadLayout()\n try:\n yield {key: None if value is None else conversors[key](value)\n for key, value in values.items()}\n except ValueError as exc:\n if 'float: ' in str(exc):\n raise BadNumericalValue()\n raise\n except IndexError:\n # conversor out of range\n raise BadDataFormat(row)\n\n def encode_data(self, data, attributes):\n current_row = 0\n\n num_attributes = len(attributes)\n for row in data:\n new_data = []\n\n if len(row) > 0 and max(row) >= num_attributes:\n raise BadObject(\n 'Instance %d has %d attributes, expected %d' %\n (current_row, max(row) + 1, num_attributes)\n )\n\n for col in sorted(row):\n v = row[col]\n if v is None or v == '' or v != v:\n s = '?'\n else:\n s = encode_string(str(v))\n new_data.append(\"%d %s\" % (col, s))\n\n current_row += 1\n yield \" \".join([\"{\", ','.join(new_data), \"}\"])", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.externals._arff/NominalConversor", + "name": "NominalConversor", + "qname": "sklearn.externals._arff.NominalConversor", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.externals._arff/NominalConversor/__init__", + "scikit-learn/sklearn.externals._arff/NominalConversor/__call__" + ], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class NominalConversor:\n def __init__(self, values):\n self.values = set(values)\n self.zero_value = values[0]\n\n def __call__(self, value):\n if value not in self.values:\n if value == 0:\n # Sparse decode\n # See issue #52: nominals should take their first value when\n # unspecified in a sparse matrix. Naturally, this is consistent\n # with EncodedNominalConversor.\n return self.zero_value\n raise BadNominalValue(value)\n return str(value)", + "instance_attributes": [ + { + "name": "values", + "types": { + "kind": "NamedType", + "name": "set" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.externals._arff/_DataListMixin", + "name": "_DataListMixin", + "qname": "sklearn.externals._arff._DataListMixin", + "decorators": [], + "superclasses": [], + "methods": ["scikit-learn/sklearn.externals._arff/_DataListMixin/decode_rows"], + "is_public": false, + "reexported_by": [], + "description": "Mixin to return a list from decode_rows instead of a generator", + "docstring": "Mixin to return a list from decode_rows instead of a generator", + "code": "class _DataListMixin:\n \"\"\"Mixin to return a list from decode_rows instead of a generator\"\"\"\n def decode_rows(self, stream, conversors):\n return list(super().decode_rows(stream, conversors))", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.externals._pep562/Pep562", + "name": "Pep562", + "qname": "sklearn.externals._pep562.Pep562", + "decorators": [], + "superclasses": ["object"], + "methods": [ + "scikit-learn/sklearn.externals._pep562/Pep562/__init__", + "scikit-learn/sklearn.externals._pep562/Pep562/__dir__", + "scikit-learn/sklearn.externals._pep562/Pep562/__getattr__" + ], + "is_public": false, + "reexported_by": [], + "description": "Backport of PEP 562 .\n\nWraps the module in a class that exposes the mechanics to override `__dir__` and `__getattr__`.\nThe given module will be searched for overrides of `__dir__` and `__getattr__` and use them when needed.", + "docstring": "Backport of PEP 562 .\n\nWraps the module in a class that exposes the mechanics to override `__dir__` and `__getattr__`.\nThe given module will be searched for overrides of `__dir__` and `__getattr__` and use them when needed.", + "code": "class Pep562(object):\n \"\"\"\n Backport of PEP 562 .\n\n Wraps the module in a class that exposes the mechanics to override `__dir__` and `__getattr__`.\n The given module will be searched for overrides of `__dir__` and `__getattr__` and use them when needed.\n \"\"\"\n\n def __init__(self, name):\n \"\"\"Acquire `__getattr__` and `__dir__`, but only replace module for versions less than Python 3.7.\"\"\"\n\n self._module = sys.modules[name]\n self._get_attr = getattr(self._module, '__getattr__', None)\n self._get_dir = getattr(self._module, '__dir__', None)\n sys.modules[name] = self\n\n def __dir__(self):\n \"\"\"Return the overridden `dir` if one was provided, else apply `dir` to the module.\"\"\"\n\n return self._get_dir() if self._get_dir else dir(self._module)\n\n def __getattr__(self, name):\n \"\"\"Attempt to retrieve the attribute from the module, and if missing, use the overridden function if present.\"\"\"\n\n try:\n return getattr(self._module, name)\n except AttributeError:\n if self._get_attr:\n return self._get_attr(name)\n raise", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer", + "name": "DictVectorizer", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/__init__", + "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/_add_iterable_element", + "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/fit", + "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/_transform", + "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/fit_transform", + "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/inverse_transform", + "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/transform", + "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/get_feature_names", + "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/restrict", + "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Transforms lists of feature-value mappings to vectors.\n\nThis transformer turns lists of mappings (dict-like objects) of feature\nnames to feature values into Numpy arrays or scipy.sparse matrices for use\nwith scikit-learn estimators.\n\nWhen feature values are strings, this transformer will do a binary one-hot\n(aka one-of-K) coding: one boolean-valued feature is constructed for each\nof the possible string values that the feature can take on. For instance,\na feature \"f\" that can take on the values \"ham\" and \"spam\" will become two\nfeatures in the output, one signifying \"f=ham\", the other \"f=spam\".\n\nIf a feature value is a sequence or set of strings, this transformer\nwill iterate over the values and will count the occurrences of each string\nvalue.\n\nHowever, note that this transformer will only do a binary one-hot encoding\nwhen feature values are of type string. If categorical features are\nrepresented as numeric values such as int or iterables of strings, the\nDictVectorizer can be followed by\n:class:`~sklearn.preprocessing.OneHotEncoder` to complete\nbinary one-hot encoding.\n\nFeatures that do not occur in a sample (mapping) will have a zero value\nin the resulting array/matrix.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Transforms lists of feature-value mappings to vectors.\n\nThis transformer turns lists of mappings (dict-like objects) of feature\nnames to feature values into Numpy arrays or scipy.sparse matrices for use\nwith scikit-learn estimators.\n\nWhen feature values are strings, this transformer will do a binary one-hot\n(aka one-of-K) coding: one boolean-valued feature is constructed for each\nof the possible string values that the feature can take on. For instance,\na feature \"f\" that can take on the values \"ham\" and \"spam\" will become two\nfeatures in the output, one signifying \"f=ham\", the other \"f=spam\".\n\nIf a feature value is a sequence or set of strings, this transformer\nwill iterate over the values and will count the occurrences of each string\nvalue.\n\nHowever, note that this transformer will only do a binary one-hot encoding\nwhen feature values are of type string. If categorical features are\nrepresented as numeric values such as int or iterables of strings, the\nDictVectorizer can be followed by\n:class:`~sklearn.preprocessing.OneHotEncoder` to complete\nbinary one-hot encoding.\n\nFeatures that do not occur in a sample (mapping) will have a zero value\nin the resulting array/matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndtype : dtype, default=np.float64\n The type of feature values. Passed to Numpy array/scipy.sparse matrix\n constructors as the dtype argument.\nseparator : str, default=\"=\"\n Separator string used when constructing new features for one-hot\n coding.\nsparse : bool, default=True\n Whether transform should produce scipy.sparse matrices.\nsort : bool, default=True\n Whether ``feature_names_`` and ``vocabulary_`` should be\n sorted when fitting.\n\nAttributes\n----------\nvocabulary_ : dict\n A dictionary mapping feature names to feature indices.\n\nfeature_names_ : list\n A list of length n_features containing the feature names (e.g., \"f=ham\"\n and \"f=spam\").\n\nExamples\n--------\n>>> from sklearn.feature_extraction import DictVectorizer\n>>> v = DictVectorizer(sparse=False)\n>>> D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]\n>>> X = v.fit_transform(D)\n>>> X\narray([[2., 0., 1.],\n [0., 1., 3.]])\n>>> v.inverse_transform(X) == [{'bar': 2.0, 'foo': 1.0},\n... {'baz': 1.0, 'foo': 3.0}]\nTrue\n>>> v.transform({'foo': 4, 'unseen_feature': 3})\narray([[0., 0., 4.]])\n\nSee Also\n--------\nFeatureHasher : Performs vectorization using only a hash function.\nsklearn.preprocessing.OrdinalEncoder : Handles nominal/categorical\n features encoded as columns of arbitrary data types.", + "code": "class DictVectorizer(TransformerMixin, BaseEstimator):\n \"\"\"Transforms lists of feature-value mappings to vectors.\n\n This transformer turns lists of mappings (dict-like objects) of feature\n names to feature values into Numpy arrays or scipy.sparse matrices for use\n with scikit-learn estimators.\n\n When feature values are strings, this transformer will do a binary one-hot\n (aka one-of-K) coding: one boolean-valued feature is constructed for each\n of the possible string values that the feature can take on. For instance,\n a feature \"f\" that can take on the values \"ham\" and \"spam\" will become two\n features in the output, one signifying \"f=ham\", the other \"f=spam\".\n\n If a feature value is a sequence or set of strings, this transformer\n will iterate over the values and will count the occurrences of each string\n value.\n\n However, note that this transformer will only do a binary one-hot encoding\n when feature values are of type string. If categorical features are\n represented as numeric values such as int or iterables of strings, the\n DictVectorizer can be followed by\n :class:`~sklearn.preprocessing.OneHotEncoder` to complete\n binary one-hot encoding.\n\n Features that do not occur in a sample (mapping) will have a zero value\n in the resulting array/matrix.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n dtype : dtype, default=np.float64\n The type of feature values. Passed to Numpy array/scipy.sparse matrix\n constructors as the dtype argument.\n separator : str, default=\"=\"\n Separator string used when constructing new features for one-hot\n coding.\n sparse : bool, default=True\n Whether transform should produce scipy.sparse matrices.\n sort : bool, default=True\n Whether ``feature_names_`` and ``vocabulary_`` should be\n sorted when fitting.\n\n Attributes\n ----------\n vocabulary_ : dict\n A dictionary mapping feature names to feature indices.\n\n feature_names_ : list\n A list of length n_features containing the feature names (e.g., \"f=ham\"\n and \"f=spam\").\n\n Examples\n --------\n >>> from sklearn.feature_extraction import DictVectorizer\n >>> v = DictVectorizer(sparse=False)\n >>> D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]\n >>> X = v.fit_transform(D)\n >>> X\n array([[2., 0., 1.],\n [0., 1., 3.]])\n >>> v.inverse_transform(X) == [{'bar': 2.0, 'foo': 1.0},\n ... {'baz': 1.0, 'foo': 3.0}]\n True\n >>> v.transform({'foo': 4, 'unseen_feature': 3})\n array([[0., 0., 4.]])\n\n See Also\n --------\n FeatureHasher : Performs vectorization using only a hash function.\n sklearn.preprocessing.OrdinalEncoder : Handles nominal/categorical\n features encoded as columns of arbitrary data types.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, dtype=np.float64, separator=\"=\", sparse=True,\n sort=True):\n self.dtype = dtype\n self.separator = separator\n self.sparse = sparse\n self.sort = sort\n\n def _add_iterable_element(self, f, v, feature_names, vocab, *,\n fitting=True, transforming=False,\n indices=None, values=None):\n \"\"\"Add feature names for iterable of strings\"\"\"\n for vv in v:\n if isinstance(vv, str):\n feature_name = \"%s%s%s\" % (f, self.separator, vv)\n vv = 1\n else:\n raise TypeError(f'Unsupported type {type(vv)} in iterable '\n 'value. Only iterables of string are '\n 'supported.')\n if fitting and feature_name not in vocab:\n vocab[feature_name] = len(feature_names)\n feature_names.append(feature_name)\n\n if transforming and feature_name in vocab:\n indices.append(vocab[feature_name])\n values.append(self.dtype(vv))\n\n return\n\n def fit(self, X, y=None):\n \"\"\"Learn a list of feature name -> indices mappings.\n\n Parameters\n ----------\n X : Mapping or iterable over Mappings\n Dict(s) or Mapping(s) from feature names (arbitrary Python\n objects) to feature values (strings or convertible to dtype).\n\n .. versionchanged:: 0.24\n Accepts multiple string values for one categorical feature.\n\n y : (ignored)\n\n Returns\n -------\n self\n \"\"\"\n feature_names = []\n vocab = {}\n\n for x in X:\n for f, v in x.items():\n if isinstance(v, str):\n feature_name = \"%s%s%s\" % (f, self.separator, v)\n v = 1\n elif isinstance(v, Number) or (v is None):\n feature_name = f\n elif isinstance(v, Mapping):\n raise TypeError(f'Unsupported value type {type(v)} '\n f'for {f}: {v}.\\n'\n 'Mapping objects are not supported.')\n elif isinstance(v, Iterable):\n feature_name = None\n self._add_iterable_element(f, v, feature_names, vocab)\n\n if feature_name is not None:\n if feature_name not in vocab:\n vocab[feature_name] = len(feature_names)\n feature_names.append(feature_name)\n\n if self.sort:\n feature_names.sort()\n vocab = {f: i for i, f in enumerate(feature_names)}\n\n self.feature_names_ = feature_names\n self.vocabulary_ = vocab\n\n return self\n\n def _transform(self, X, fitting):\n # Sanity check: Python's array has no way of explicitly requesting the\n # signed 32-bit integers that scipy.sparse needs, so we use the next\n # best thing: typecode \"i\" (int). However, if that gives larger or\n # smaller integers than 32-bit ones, np.frombuffer screws up.\n assert array(\"i\").itemsize == 4, (\n \"sizeof(int) != 4 on your platform; please report this at\"\n \" https://github.com/scikit-learn/scikit-learn/issues and\"\n \" include the output from platform.platform() in your bug report\")\n\n dtype = self.dtype\n if fitting:\n feature_names = []\n vocab = {}\n else:\n feature_names = self.feature_names_\n vocab = self.vocabulary_\n\n transforming = True\n\n # Process everything as sparse regardless of setting\n X = [X] if isinstance(X, Mapping) else X\n\n indices = array(\"i\")\n indptr = [0]\n # XXX we could change values to an array.array as well, but it\n # would require (heuristic) conversion of dtype to typecode...\n values = []\n\n # collect all the possible feature names and build sparse matrix at\n # same time\n for x in X:\n for f, v in x.items():\n if isinstance(v, str):\n feature_name = \"%s%s%s\" % (f, self.separator, v)\n v = 1\n elif isinstance(v, Number) or (v is None):\n feature_name = f\n elif isinstance(v, Mapping):\n raise TypeError(f'Unsupported value Type {type(v)} '\n f'for {f}: {v}.\\n'\n 'Mapping objects are not supported.')\n elif isinstance(v, Iterable):\n feature_name = None\n self._add_iterable_element(f, v, feature_names, vocab,\n fitting=fitting,\n transforming=transforming,\n indices=indices, values=values)\n\n if feature_name is not None:\n if fitting and feature_name not in vocab:\n vocab[feature_name] = len(feature_names)\n feature_names.append(feature_name)\n\n if feature_name in vocab:\n indices.append(vocab[feature_name])\n values.append(self.dtype(v))\n\n indptr.append(len(indices))\n\n if len(indptr) == 1:\n raise ValueError(\"Sample sequence X is empty.\")\n\n indices = np.frombuffer(indices, dtype=np.intc)\n shape = (len(indptr) - 1, len(vocab))\n\n result_matrix = sp.csr_matrix((values, indices, indptr),\n shape=shape, dtype=dtype)\n\n # Sort everything if asked\n if fitting and self.sort:\n feature_names.sort()\n map_index = np.empty(len(feature_names), dtype=np.int32)\n for new_val, f in enumerate(feature_names):\n map_index[new_val] = vocab[f]\n vocab[f] = new_val\n result_matrix = result_matrix[:, map_index]\n\n if self.sparse:\n result_matrix.sort_indices()\n else:\n result_matrix = result_matrix.toarray()\n\n if fitting:\n self.feature_names_ = feature_names\n self.vocabulary_ = vocab\n\n return result_matrix\n\n def fit_transform(self, X, y=None):\n \"\"\"Learn a list of feature name -> indices mappings and transform X.\n\n Like fit(X) followed by transform(X), but does not require\n materializing X in memory.\n\n Parameters\n ----------\n X : Mapping or iterable over Mappings\n Dict(s) or Mapping(s) from feature names (arbitrary Python\n objects) to feature values (strings or convertible to dtype).\n\n .. versionchanged:: 0.24\n Accepts multiple string values for one categorical feature.\n\n y : (ignored)\n\n Returns\n -------\n Xa : {array, sparse matrix}\n Feature vectors; always 2-d.\n \"\"\"\n return self._transform(X, fitting=True)\n\n def inverse_transform(self, X, dict_type=dict):\n \"\"\"Transform array or sparse matrix X back to feature mappings.\n\n X must have been produced by this DictVectorizer's transform or\n fit_transform method; it may only have passed through transformers\n that preserve the number of features and their order.\n\n In the case of one-hot/one-of-K coding, the constructed feature\n names and values are returned rather than the original ones.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Sample matrix.\n dict_type : type, default=dict\n Constructor for feature mappings. Must conform to the\n collections.Mapping API.\n\n Returns\n -------\n D : list of dict_type objects of shape (n_samples,)\n Feature mappings for the samples in X.\n \"\"\"\n # COO matrix is not subscriptable\n X = check_array(X, accept_sparse=['csr', 'csc'])\n n_samples = X.shape[0]\n\n names = self.feature_names_\n dicts = [dict_type() for _ in range(n_samples)]\n\n if sp.issparse(X):\n for i, j in zip(*X.nonzero()):\n dicts[i][names[j]] = X[i, j]\n else:\n for i, d in enumerate(dicts):\n for j, v in enumerate(X[i, :]):\n if v != 0:\n d[names[j]] = X[i, j]\n\n return dicts\n\n def transform(self, X):\n \"\"\"Transform feature->value dicts to array or sparse matrix.\n\n Named features not encountered during fit or fit_transform will be\n silently ignored.\n\n Parameters\n ----------\n X : Mapping or iterable over Mappings of shape (n_samples,)\n Dict(s) or Mapping(s) from feature names (arbitrary Python\n objects) to feature values (strings or convertible to dtype).\n\n Returns\n -------\n Xa : {array, sparse matrix}\n Feature vectors; always 2-d.\n \"\"\"\n if self.sparse:\n return self._transform(X, fitting=False)\n\n else:\n dtype = self.dtype\n vocab = self.vocabulary_\n X = _tosequence(X)\n Xa = np.zeros((len(X), len(vocab)), dtype=dtype)\n\n for i, x in enumerate(X):\n for f, v in x.items():\n if isinstance(v, str):\n f = \"%s%s%s\" % (f, self.separator, v)\n v = 1\n try:\n Xa[i, vocab[f]] = dtype(v)\n except KeyError:\n pass\n\n return Xa\n\n def get_feature_names(self):\n \"\"\"Returns a list of feature names, ordered by their indices.\n\n If one-of-K coding is applied to categorical features, this will\n include the constructed feature names but not the original ones.\n \"\"\"\n return self.feature_names_\n\n def restrict(self, support, indices=False):\n \"\"\"Restrict the features to those in support using feature selection.\n\n This function modifies the estimator in-place.\n\n Parameters\n ----------\n support : array-like\n Boolean mask or list of indices (as returned by the get_support\n member of feature selectors).\n indices : bool, default=False\n Whether support is a list of indices.\n\n Returns\n -------\n self\n\n Examples\n --------\n >>> from sklearn.feature_extraction import DictVectorizer\n >>> from sklearn.feature_selection import SelectKBest, chi2\n >>> v = DictVectorizer()\n >>> D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]\n >>> X = v.fit_transform(D)\n >>> support = SelectKBest(chi2, k=2).fit(X, [0, 1])\n >>> v.get_feature_names()\n ['bar', 'baz', 'foo']\n >>> v.restrict(support.get_support())\n DictVectorizer()\n >>> v.get_feature_names()\n ['bar', 'foo']\n \"\"\"\n if not indices:\n support = np.where(support)[0]\n\n names = self.feature_names_\n new_vocab = {}\n for i in support:\n new_vocab[names[i]] = len(new_vocab)\n\n self.vocabulary_ = new_vocab\n self.feature_names_ = [f for f, i in sorted(new_vocab.items(),\n key=itemgetter(1))]\n\n return self\n\n def _more_tags(self):\n return {'X_types': [\"dict\"]}", + "instance_attributes": [ + { + "name": "dtype", + "types": { + "kind": "NamedType", + "name": "type" + } + }, + { + "name": "separator", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "sparse", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "sort", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "feature_names_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "vocabulary_", + "types": { + "kind": "NamedType", + "name": "dict" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.feature_extraction._hash/FeatureHasher", + "name": "FeatureHasher", + "qname": "sklearn.feature_extraction._hash.FeatureHasher", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.feature_extraction._hash/FeatureHasher/__init__", + "scikit-learn/sklearn.feature_extraction._hash/FeatureHasher/_validate_params", + "scikit-learn/sklearn.feature_extraction._hash/FeatureHasher/fit", + "scikit-learn/sklearn.feature_extraction._hash/FeatureHasher/transform", + "scikit-learn/sklearn.feature_extraction._hash/FeatureHasher/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Implements feature hashing, aka the hashing trick.\n\nThis class turns sequences of symbolic feature names (strings) into\nscipy.sparse matrices, using a hash function to compute the matrix column\ncorresponding to a name. The hash function employed is the signed 32-bit\nversion of Murmurhash3.\n\nFeature names of type byte string are used as-is. Unicode strings are\nconverted to UTF-8 first, but no Unicode normalization is done.\nFeature values must be (finite) numbers.\n\nThis class is a low-memory alternative to DictVectorizer and\nCountVectorizer, intended for large-scale (online) learning and situations\nwhere memory is tight, e.g. when running prediction code on embedded\ndevices.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13", + "docstring": "Implements feature hashing, aka the hashing trick.\n\nThis class turns sequences of symbolic feature names (strings) into\nscipy.sparse matrices, using a hash function to compute the matrix column\ncorresponding to a name. The hash function employed is the signed 32-bit\nversion of Murmurhash3.\n\nFeature names of type byte string are used as-is. Unicode strings are\nconverted to UTF-8 first, but no Unicode normalization is done.\nFeature values must be (finite) numbers.\n\nThis class is a low-memory alternative to DictVectorizer and\nCountVectorizer, intended for large-scale (online) learning and situations\nwhere memory is tight, e.g. when running prediction code on embedded\ndevices.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nn_features : int, default=2**20\n The number of features (columns) in the output matrices. Small numbers\n of features are likely to cause hash collisions, but large numbers\n will cause larger coefficient dimensions in linear learners.\ninput_type : {\"dict\", \"pair\", \"string\"}, default=\"dict\"\n Either \"dict\" (the default) to accept dictionaries over\n (feature_name, value); \"pair\" to accept pairs of (feature_name, value);\n or \"string\" to accept single strings.\n feature_name should be a string, while value should be a number.\n In the case of \"string\", a value of 1 is implied.\n The feature_name is hashed to find the appropriate column for the\n feature. The value's sign might be flipped in the output (but see\n non_negative, below).\ndtype : numpy dtype, default=np.float64\n The type of feature values. Passed to scipy.sparse matrix constructors\n as the dtype argument. Do not set this to bool, np.boolean or any\n unsigned integer type.\nalternate_sign : bool, default=True\n When True, an alternating sign is added to the features as to\n approximately conserve the inner product in the hashed space even for\n small n_features. This approach is similar to sparse random projection.\n\n .. versionchanged:: 0.19\n ``alternate_sign`` replaces the now deprecated ``non_negative``\n parameter.\n\nExamples\n--------\n>>> from sklearn.feature_extraction import FeatureHasher\n>>> h = FeatureHasher(n_features=10)\n>>> D = [{'dog': 1, 'cat':2, 'elephant':4},{'dog': 2, 'run': 5}]\n>>> f = h.transform(D)\n>>> f.toarray()\narray([[ 0., 0., -4., -1., 0., 0., 0., 0., 0., 2.],\n [ 0., 0., 0., -2., -5., 0., 0., 0., 0., 0.]])\n\nSee Also\n--------\nDictVectorizer : Vectorizes string-valued features using a hash table.\nsklearn.preprocessing.OneHotEncoder : Handles nominal/categorical features.", + "code": "class FeatureHasher(TransformerMixin, BaseEstimator):\n \"\"\"Implements feature hashing, aka the hashing trick.\n\n This class turns sequences of symbolic feature names (strings) into\n scipy.sparse matrices, using a hash function to compute the matrix column\n corresponding to a name. The hash function employed is the signed 32-bit\n version of Murmurhash3.\n\n Feature names of type byte string are used as-is. Unicode strings are\n converted to UTF-8 first, but no Unicode normalization is done.\n Feature values must be (finite) numbers.\n\n This class is a low-memory alternative to DictVectorizer and\n CountVectorizer, intended for large-scale (online) learning and situations\n where memory is tight, e.g. when running prediction code on embedded\n devices.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.13\n\n Parameters\n ----------\n n_features : int, default=2**20\n The number of features (columns) in the output matrices. Small numbers\n of features are likely to cause hash collisions, but large numbers\n will cause larger coefficient dimensions in linear learners.\n input_type : {\"dict\", \"pair\", \"string\"}, default=\"dict\"\n Either \"dict\" (the default) to accept dictionaries over\n (feature_name, value); \"pair\" to accept pairs of (feature_name, value);\n or \"string\" to accept single strings.\n feature_name should be a string, while value should be a number.\n In the case of \"string\", a value of 1 is implied.\n The feature_name is hashed to find the appropriate column for the\n feature. The value's sign might be flipped in the output (but see\n non_negative, below).\n dtype : numpy dtype, default=np.float64\n The type of feature values. Passed to scipy.sparse matrix constructors\n as the dtype argument. Do not set this to bool, np.boolean or any\n unsigned integer type.\n alternate_sign : bool, default=True\n When True, an alternating sign is added to the features as to\n approximately conserve the inner product in the hashed space even for\n small n_features. This approach is similar to sparse random projection.\n\n .. versionchanged:: 0.19\n ``alternate_sign`` replaces the now deprecated ``non_negative``\n parameter.\n\n Examples\n --------\n >>> from sklearn.feature_extraction import FeatureHasher\n >>> h = FeatureHasher(n_features=10)\n >>> D = [{'dog': 1, 'cat':2, 'elephant':4},{'dog': 2, 'run': 5}]\n >>> f = h.transform(D)\n >>> f.toarray()\n array([[ 0., 0., -4., -1., 0., 0., 0., 0., 0., 2.],\n [ 0., 0., 0., -2., -5., 0., 0., 0., 0., 0.]])\n\n See Also\n --------\n DictVectorizer : Vectorizes string-valued features using a hash table.\n sklearn.preprocessing.OneHotEncoder : Handles nominal/categorical features.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_features=(2 ** 20), *, input_type=\"dict\",\n dtype=np.float64, alternate_sign=True):\n self._validate_params(n_features, input_type)\n\n self.dtype = dtype\n self.input_type = input_type\n self.n_features = n_features\n self.alternate_sign = alternate_sign\n\n @staticmethod\n def _validate_params(n_features, input_type):\n # strangely, np.int16 instances are not instances of Integral,\n # while np.int64 instances are...\n if not isinstance(n_features, numbers.Integral):\n raise TypeError(\"n_features must be integral, got %r (%s).\"\n % (n_features, type(n_features)))\n elif n_features < 1 or n_features >= np.iinfo(np.int32).max + 1:\n raise ValueError(\"Invalid number of features (%d).\" % n_features)\n\n if input_type not in (\"dict\", \"pair\", \"string\"):\n raise ValueError(\"input_type must be 'dict', 'pair' or 'string',\"\n \" got %r.\" % input_type)\n\n def fit(self, X=None, y=None):\n \"\"\"No-op.\n\n This method doesn't do anything. It exists purely for compatibility\n with the scikit-learn transformer API.\n\n Parameters\n ----------\n X : ndarray\n\n Returns\n -------\n self : FeatureHasher\n\n \"\"\"\n # repeat input validation for grid search (which calls set_params)\n self._validate_params(self.n_features, self.input_type)\n return self\n\n def transform(self, raw_X):\n \"\"\"Transform a sequence of instances to a scipy.sparse matrix.\n\n Parameters\n ----------\n raw_X : iterable over iterable over raw features, length = n_samples\n Samples. Each sample must be iterable an (e.g., a list or tuple)\n containing/generating feature names (and optionally values, see\n the input_type constructor argument) which will be hashed.\n raw_X need not support the len function, so it can be the result\n of a generator; n_samples is determined on the fly.\n\n Returns\n -------\n X : sparse matrix of shape (n_samples, n_features)\n Feature matrix, for use with estimators or further transformers.\n\n \"\"\"\n raw_X = iter(raw_X)\n if self.input_type == \"dict\":\n raw_X = (_iteritems(d) for d in raw_X)\n elif self.input_type == \"string\":\n raw_X = (((f, 1) for f in x) for x in raw_X)\n indices, indptr, values = \\\n _hashing_transform(raw_X, self.n_features, self.dtype,\n self.alternate_sign, seed=0)\n n_samples = indptr.shape[0] - 1\n\n if n_samples == 0:\n raise ValueError(\"Cannot vectorize empty sequence.\")\n\n X = sp.csr_matrix((values, indices, indptr), dtype=self.dtype,\n shape=(n_samples, self.n_features))\n X.sum_duplicates() # also sorts the indices\n\n return X\n\n def _more_tags(self):\n return {'X_types': [self.input_type]}", + "instance_attributes": [ + { + "name": "dtype", + "types": { + "kind": "NamedType", + "name": "type" + } + }, + { + "name": "input_type", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "n_features", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "alternate_sign", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/PatchExtractor", + "name": "PatchExtractor", + "qname": "sklearn.feature_extraction.image.PatchExtractor", + "decorators": [], + "superclasses": ["BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.feature_extraction.image/PatchExtractor/__init__", + "scikit-learn/sklearn.feature_extraction.image/PatchExtractor/fit", + "scikit-learn/sklearn.feature_extraction.image/PatchExtractor/transform", + "scikit-learn/sklearn.feature_extraction.image/PatchExtractor/_more_tags" + ], + "is_public": true, + "reexported_by": [], + "description": "Extracts patches from a collection of images\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.9", + "docstring": "Extracts patches from a collection of images\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.9\n\nParameters\n----------\npatch_size : tuple of int (patch_height, patch_width), default=None\n The dimensions of one patch.\n\nmax_patches : int or float, default=None\n The maximum number of patches per image to extract. If max_patches is a\n float in (0, 1), it is taken to mean a proportion of the total number\n of patches.\n\nrandom_state : int, RandomState instance, default=None\n Determines the random number generator used for random sampling when\n `max_patches` is not None. Use an int to make the randomness\n deterministic.\n See :term:`Glossary `.\n\nExamples\n--------\n>>> from sklearn.datasets import load_sample_images\n>>> from sklearn.feature_extraction import image\n>>> # Use the array data from the second image in this dataset:\n>>> X = load_sample_images().images[1]\n>>> print('Image shape: {}'.format(X.shape))\nImage shape: (427, 640, 3)\n>>> pe = image.PatchExtractor(patch_size=(2, 2))\n>>> pe_fit = pe.fit(X)\n>>> pe_trans = pe.transform(X)\n>>> print('Patches shape: {}'.format(pe_trans.shape))\nPatches shape: (545706, 2, 2)", + "code": "class PatchExtractor(BaseEstimator):\n \"\"\"Extracts patches from a collection of images\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.9\n\n Parameters\n ----------\n patch_size : tuple of int (patch_height, patch_width), default=None\n The dimensions of one patch.\n\n max_patches : int or float, default=None\n The maximum number of patches per image to extract. If max_patches is a\n float in (0, 1), it is taken to mean a proportion of the total number\n of patches.\n\n random_state : int, RandomState instance, default=None\n Determines the random number generator used for random sampling when\n `max_patches` is not None. Use an int to make the randomness\n deterministic.\n See :term:`Glossary `.\n\n Examples\n --------\n >>> from sklearn.datasets import load_sample_images\n >>> from sklearn.feature_extraction import image\n >>> # Use the array data from the second image in this dataset:\n >>> X = load_sample_images().images[1]\n >>> print('Image shape: {}'.format(X.shape))\n Image shape: (427, 640, 3)\n >>> pe = image.PatchExtractor(patch_size=(2, 2))\n >>> pe_fit = pe.fit(X)\n >>> pe_trans = pe.transform(X)\n >>> print('Patches shape: {}'.format(pe_trans.shape))\n Patches shape: (545706, 2, 2)\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, patch_size=None, max_patches=None,\n random_state=None):\n self.patch_size = patch_size\n self.max_patches = max_patches\n self.random_state = random_state\n\n def fit(self, X, y=None):\n \"\"\"Do nothing and return the estimator unchanged.\n\n This method is just there to implement the usual API and hence\n work in pipelines.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n \"\"\"\n return self\n\n def transform(self, X):\n \"\"\"Transforms the image samples in X into a matrix of patch data.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, image_height, image_width) or \\\n (n_samples, image_height, image_width, n_channels)\n Array of images from which to extract patches. For color images,\n the last dimension specifies the channel: a RGB image would have\n `n_channels=3`.\n\n Returns\n -------\n patches : array of shape (n_patches, patch_height, patch_width) or \\\n (n_patches, patch_height, patch_width, n_channels)\n The collection of patches extracted from the images, where\n `n_patches` is either `n_samples * max_patches` or the total\n number of patches that can be extracted.\n \"\"\"\n self.random_state = check_random_state(self.random_state)\n n_images, i_h, i_w = X.shape[:3]\n X = np.reshape(X, (n_images, i_h, i_w, -1))\n n_channels = X.shape[-1]\n if self.patch_size is None:\n patch_size = i_h // 10, i_w // 10\n else:\n patch_size = self.patch_size\n\n # compute the dimensions of the patches array\n p_h, p_w = patch_size\n n_patches = _compute_n_patches(i_h, i_w, p_h, p_w, self.max_patches)\n patches_shape = (n_images * n_patches,) + patch_size\n if n_channels > 1:\n patches_shape += (n_channels,)\n\n # extract the patches\n patches = np.empty(patches_shape)\n for ii, image in enumerate(X):\n patches[ii * n_patches:(ii + 1) * n_patches] = extract_patches_2d(\n image, patch_size, max_patches=self.max_patches,\n random_state=self.random_state)\n return patches\n\n def _more_tags(self):\n return {'X_types': ['3darray']}", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer", + "name": "CountVectorizer", + "qname": "sklearn.feature_extraction.text.CountVectorizer", + "decorators": [], + "superclasses": ["_VectorizerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/__init__", + "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/_sort_features", + "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/_limit_features", + "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/_count_vocab", + "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/fit", + "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/fit_transform", + "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/transform", + "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/inverse_transform", + "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/get_feature_names", + "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/_more_tags" + ], + "is_public": true, + "reexported_by": [], + "description": "Convert a collection of text documents to a matrix of token counts\n\nThis implementation produces a sparse representation of the counts using\nscipy.sparse.csr_matrix.\n\nIf you do not provide an a-priori dictionary and you do not use an analyzer\nthat does some kind of feature selection then the number of features will\nbe equal to the vocabulary size found by analyzing the data.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Convert a collection of text documents to a matrix of token counts\n\nThis implementation produces a sparse representation of the counts using\nscipy.sparse.csr_matrix.\n\nIf you do not provide an a-priori dictionary and you do not use an analyzer\nthat does some kind of feature selection then the number of features will\nbe equal to the vocabulary size found by analyzing the data.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ninput : {'filename', 'file', 'content'}, default='content'\n - If `'filename'`, the sequence passed as an argument to fit is\n expected to be a list of filenames that need reading to fetch\n the raw content to analyze.\n\n - If `'file'`, the sequence items must have a 'read' method (file-like\n object) that is called to fetch the bytes in memory.\n\n - If `'content'`, the input is expected to be a sequence of items that\n can be of type string or byte.\n\nencoding : string, default='utf-8'\n If bytes or files are given to analyze, this encoding is used to\n decode.\n\ndecode_error : {'strict', 'ignore', 'replace'}, default='strict'\n Instruction on what to do if a byte sequence is given to analyze that\n contains characters not of the given `encoding`. By default, it is\n 'strict', meaning that a UnicodeDecodeError will be raised. Other\n values are 'ignore' and 'replace'.\n\nstrip_accents : {'ascii', 'unicode'}, default=None\n Remove accents and perform other character normalization\n during the preprocessing step.\n 'ascii' is a fast method that only works on characters that have\n an direct ASCII mapping.\n 'unicode' is a slightly slower method that works on any characters.\n None (default) does nothing.\n\n Both 'ascii' and 'unicode' use NFKD normalization from\n :func:`unicodedata.normalize`.\n\nlowercase : bool, default=True\n Convert all characters to lowercase before tokenizing.\n\npreprocessor : callable, default=None\n Override the preprocessing (strip_accents and lowercase) stage while\n preserving the tokenizing and n-grams generation steps.\n Only applies if ``analyzer is not callable``.\n\ntokenizer : callable, default=None\n Override the string tokenization step while preserving the\n preprocessing and n-grams generation steps.\n Only applies if ``analyzer == 'word'``.\n\nstop_words : {'english'}, list, default=None\n If 'english', a built-in stop word list for English is used.\n There are several known issues with 'english' and you should\n consider an alternative (see :ref:`stop_words`).\n\n If a list, that list is assumed to contain stop words, all of which\n will be removed from the resulting tokens.\n Only applies if ``analyzer == 'word'``.\n\n If None, no stop words will be used. max_df can be set to a value\n in the range [0.7, 1.0) to automatically detect and filter stop\n words based on intra corpus document frequency of terms.\n\ntoken_pattern : str, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n Regular expression denoting what constitutes a \"token\", only used\n if ``analyzer == 'word'``. The default regexp select tokens of 2\n or more alphanumeric characters (punctuation is completely ignored\n and always treated as a token separator).\n\n If there is a capturing group in token_pattern then the\n captured group content, not the entire match, becomes the token.\n At most one capturing group is permitted.\n\nngram_range : tuple (min_n, max_n), default=(1, 1)\n The lower and upper boundary of the range of n-values for different\n word n-grams or char n-grams to be extracted. All values of n such\n such that min_n <= n <= max_n will be used. For example an\n ``ngram_range`` of ``(1, 1)`` means only unigrams, ``(1, 2)`` means\n unigrams and bigrams, and ``(2, 2)`` means only bigrams.\n Only applies if ``analyzer is not callable``.\n\nanalyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n Whether the feature should be made of word n-gram or character\n n-grams.\n Option 'char_wb' creates character n-grams only from text inside\n word boundaries; n-grams at the edges of words are padded with space.\n\n If a callable is passed it is used to extract the sequence of features\n out of the raw, unprocessed input.\n\n .. versionchanged:: 0.21\n\n Since v0.21, if ``input`` is ``filename`` or ``file``, the data is\n first read from the file and then passed to the given callable\n analyzer.\n\nmax_df : float in range [0.0, 1.0] or int, default=1.0\n When building the vocabulary ignore terms that have a document\n frequency strictly higher than the given threshold (corpus-specific\n stop words).\n If float, the parameter represents a proportion of documents, integer\n absolute counts.\n This parameter is ignored if vocabulary is not None.\n\nmin_df : float in range [0.0, 1.0] or int, default=1\n When building the vocabulary ignore terms that have a document\n frequency strictly lower than the given threshold. This value is also\n called cut-off in the literature.\n If float, the parameter represents a proportion of documents, integer\n absolute counts.\n This parameter is ignored if vocabulary is not None.\n\nmax_features : int, default=None\n If not None, build a vocabulary that only consider the top\n max_features ordered by term frequency across the corpus.\n\n This parameter is ignored if vocabulary is not None.\n\nvocabulary : Mapping or iterable, default=None\n Either a Mapping (e.g., a dict) where keys are terms and values are\n indices in the feature matrix, or an iterable over terms. If not\n given, a vocabulary is determined from the input documents. Indices\n in the mapping should not be repeated and should not have any gap\n between 0 and the largest index.\n\nbinary : bool, default=False\n If True, all non zero counts are set to 1. This is useful for discrete\n probabilistic models that model binary events rather than integer\n counts.\n\ndtype : type, default=np.int64\n Type of the matrix returned by fit_transform() or transform().\n\nAttributes\n----------\nvocabulary_ : dict\n A mapping of terms to feature indices.\n\nfixed_vocabulary_: boolean\n True if a fixed vocabulary of term to indices mapping\n is provided by the user\n\nstop_words_ : set\n Terms that were ignored because they either:\n\n - occurred in too many documents (`max_df`)\n - occurred in too few documents (`min_df`)\n - were cut off by feature selection (`max_features`).\n\n This is only available if no vocabulary was given.\n\nExamples\n--------\n>>> from sklearn.feature_extraction.text import CountVectorizer\n>>> corpus = [\n... 'This is the first document.',\n... 'This document is the second document.',\n... 'And this is the third one.',\n... 'Is this the first document?',\n... ]\n>>> vectorizer = CountVectorizer()\n>>> X = vectorizer.fit_transform(corpus)\n>>> print(vectorizer.get_feature_names())\n['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', 'this']\n>>> print(X.toarray())\n[[0 1 1 1 0 0 1 0 1]\n [0 2 0 1 0 1 1 0 1]\n [1 0 0 1 1 0 1 1 1]\n [0 1 1 1 0 0 1 0 1]]\n>>> vectorizer2 = CountVectorizer(analyzer='word', ngram_range=(2, 2))\n>>> X2 = vectorizer2.fit_transform(corpus)\n>>> print(vectorizer2.get_feature_names())\n['and this', 'document is', 'first document', 'is the', 'is this',\n'second document', 'the first', 'the second', 'the third', 'third one',\n 'this document', 'this is', 'this the']\n >>> print(X2.toarray())\n [[0 0 1 1 0 0 1 0 0 0 0 1 0]\n [0 1 0 1 0 1 0 1 0 0 1 0 0]\n [1 0 0 1 0 0 0 0 1 1 0 1 0]\n [0 0 1 0 1 0 1 0 0 0 0 0 1]]\n\nSee Also\n--------\nHashingVectorizer, TfidfVectorizer\n\nNotes\n-----\nThe ``stop_words_`` attribute can get large and increase the model size\nwhen pickling. This attribute is provided only for introspection and can\nbe safely removed using delattr or set to None before pickling.", + "code": "class CountVectorizer(_VectorizerMixin, BaseEstimator):\n r\"\"\"Convert a collection of text documents to a matrix of token counts\n\n This implementation produces a sparse representation of the counts using\n scipy.sparse.csr_matrix.\n\n If you do not provide an a-priori dictionary and you do not use an analyzer\n that does some kind of feature selection then the number of features will\n be equal to the vocabulary size found by analyzing the data.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n input : {'filename', 'file', 'content'}, default='content'\n - If `'filename'`, the sequence passed as an argument to fit is\n expected to be a list of filenames that need reading to fetch\n the raw content to analyze.\n\n - If `'file'`, the sequence items must have a 'read' method (file-like\n object) that is called to fetch the bytes in memory.\n\n - If `'content'`, the input is expected to be a sequence of items that\n can be of type string or byte.\n\n encoding : string, default='utf-8'\n If bytes or files are given to analyze, this encoding is used to\n decode.\n\n decode_error : {'strict', 'ignore', 'replace'}, default='strict'\n Instruction on what to do if a byte sequence is given to analyze that\n contains characters not of the given `encoding`. By default, it is\n 'strict', meaning that a UnicodeDecodeError will be raised. Other\n values are 'ignore' and 'replace'.\n\n strip_accents : {'ascii', 'unicode'}, default=None\n Remove accents and perform other character normalization\n during the preprocessing step.\n 'ascii' is a fast method that only works on characters that have\n an direct ASCII mapping.\n 'unicode' is a slightly slower method that works on any characters.\n None (default) does nothing.\n\n Both 'ascii' and 'unicode' use NFKD normalization from\n :func:`unicodedata.normalize`.\n\n lowercase : bool, default=True\n Convert all characters to lowercase before tokenizing.\n\n preprocessor : callable, default=None\n Override the preprocessing (strip_accents and lowercase) stage while\n preserving the tokenizing and n-grams generation steps.\n Only applies if ``analyzer is not callable``.\n\n tokenizer : callable, default=None\n Override the string tokenization step while preserving the\n preprocessing and n-grams generation steps.\n Only applies if ``analyzer == 'word'``.\n\n stop_words : {'english'}, list, default=None\n If 'english', a built-in stop word list for English is used.\n There are several known issues with 'english' and you should\n consider an alternative (see :ref:`stop_words`).\n\n If a list, that list is assumed to contain stop words, all of which\n will be removed from the resulting tokens.\n Only applies if ``analyzer == 'word'``.\n\n If None, no stop words will be used. max_df can be set to a value\n in the range [0.7, 1.0) to automatically detect and filter stop\n words based on intra corpus document frequency of terms.\n\n token_pattern : str, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n Regular expression denoting what constitutes a \"token\", only used\n if ``analyzer == 'word'``. The default regexp select tokens of 2\n or more alphanumeric characters (punctuation is completely ignored\n and always treated as a token separator).\n\n If there is a capturing group in token_pattern then the\n captured group content, not the entire match, becomes the token.\n At most one capturing group is permitted.\n\n ngram_range : tuple (min_n, max_n), default=(1, 1)\n The lower and upper boundary of the range of n-values for different\n word n-grams or char n-grams to be extracted. All values of n such\n such that min_n <= n <= max_n will be used. For example an\n ``ngram_range`` of ``(1, 1)`` means only unigrams, ``(1, 2)`` means\n unigrams and bigrams, and ``(2, 2)`` means only bigrams.\n Only applies if ``analyzer is not callable``.\n\n analyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n Whether the feature should be made of word n-gram or character\n n-grams.\n Option 'char_wb' creates character n-grams only from text inside\n word boundaries; n-grams at the edges of words are padded with space.\n\n If a callable is passed it is used to extract the sequence of features\n out of the raw, unprocessed input.\n\n .. versionchanged:: 0.21\n\n Since v0.21, if ``input`` is ``filename`` or ``file``, the data is\n first read from the file and then passed to the given callable\n analyzer.\n\n max_df : float in range [0.0, 1.0] or int, default=1.0\n When building the vocabulary ignore terms that have a document\n frequency strictly higher than the given threshold (corpus-specific\n stop words).\n If float, the parameter represents a proportion of documents, integer\n absolute counts.\n This parameter is ignored if vocabulary is not None.\n\n min_df : float in range [0.0, 1.0] or int, default=1\n When building the vocabulary ignore terms that have a document\n frequency strictly lower than the given threshold. This value is also\n called cut-off in the literature.\n If float, the parameter represents a proportion of documents, integer\n absolute counts.\n This parameter is ignored if vocabulary is not None.\n\n max_features : int, default=None\n If not None, build a vocabulary that only consider the top\n max_features ordered by term frequency across the corpus.\n\n This parameter is ignored if vocabulary is not None.\n\n vocabulary : Mapping or iterable, default=None\n Either a Mapping (e.g., a dict) where keys are terms and values are\n indices in the feature matrix, or an iterable over terms. If not\n given, a vocabulary is determined from the input documents. Indices\n in the mapping should not be repeated and should not have any gap\n between 0 and the largest index.\n\n binary : bool, default=False\n If True, all non zero counts are set to 1. This is useful for discrete\n probabilistic models that model binary events rather than integer\n counts.\n\n dtype : type, default=np.int64\n Type of the matrix returned by fit_transform() or transform().\n\n Attributes\n ----------\n vocabulary_ : dict\n A mapping of terms to feature indices.\n\n fixed_vocabulary_: boolean\n True if a fixed vocabulary of term to indices mapping\n is provided by the user\n\n stop_words_ : set\n Terms that were ignored because they either:\n\n - occurred in too many documents (`max_df`)\n - occurred in too few documents (`min_df`)\n - were cut off by feature selection (`max_features`).\n\n This is only available if no vocabulary was given.\n\n Examples\n --------\n >>> from sklearn.feature_extraction.text import CountVectorizer\n >>> corpus = [\n ... 'This is the first document.',\n ... 'This document is the second document.',\n ... 'And this is the third one.',\n ... 'Is this the first document?',\n ... ]\n >>> vectorizer = CountVectorizer()\n >>> X = vectorizer.fit_transform(corpus)\n >>> print(vectorizer.get_feature_names())\n ['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', 'this']\n >>> print(X.toarray())\n [[0 1 1 1 0 0 1 0 1]\n [0 2 0 1 0 1 1 0 1]\n [1 0 0 1 1 0 1 1 1]\n [0 1 1 1 0 0 1 0 1]]\n >>> vectorizer2 = CountVectorizer(analyzer='word', ngram_range=(2, 2))\n >>> X2 = vectorizer2.fit_transform(corpus)\n >>> print(vectorizer2.get_feature_names())\n ['and this', 'document is', 'first document', 'is the', 'is this',\n 'second document', 'the first', 'the second', 'the third', 'third one',\n 'this document', 'this is', 'this the']\n >>> print(X2.toarray())\n [[0 0 1 1 0 0 1 0 0 0 0 1 0]\n [0 1 0 1 0 1 0 1 0 0 1 0 0]\n [1 0 0 1 0 0 0 0 1 1 0 1 0]\n [0 0 1 0 1 0 1 0 0 0 0 0 1]]\n\n See Also\n --------\n HashingVectorizer, TfidfVectorizer\n\n Notes\n -----\n The ``stop_words_`` attribute can get large and increase the model size\n when pickling. This attribute is provided only for introspection and can\n be safely removed using delattr or set to None before pickling.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, input='content', encoding='utf-8',\n decode_error='strict', strip_accents=None,\n lowercase=True, preprocessor=None, tokenizer=None,\n stop_words=None, token_pattern=r\"(?u)\\b\\w\\w+\\b\",\n ngram_range=(1, 1), analyzer='word',\n max_df=1.0, min_df=1, max_features=None,\n vocabulary=None, binary=False, dtype=np.int64):\n self.input = input\n self.encoding = encoding\n self.decode_error = decode_error\n self.strip_accents = strip_accents\n self.preprocessor = preprocessor\n self.tokenizer = tokenizer\n self.analyzer = analyzer\n self.lowercase = lowercase\n self.token_pattern = token_pattern\n self.stop_words = stop_words\n self.max_df = max_df\n self.min_df = min_df\n if max_df < 0 or min_df < 0:\n raise ValueError(\"negative value for max_df or min_df\")\n self.max_features = max_features\n if max_features is not None:\n if (not isinstance(max_features, numbers.Integral) or\n max_features <= 0):\n raise ValueError(\n \"max_features=%r, neither a positive integer nor None\"\n % max_features)\n self.ngram_range = ngram_range\n self.vocabulary = vocabulary\n self.binary = binary\n self.dtype = dtype\n\n def _sort_features(self, X, vocabulary):\n \"\"\"Sort features by name\n\n Returns a reordered matrix and modifies the vocabulary in place\n \"\"\"\n sorted_features = sorted(vocabulary.items())\n map_index = np.empty(len(sorted_features), dtype=X.indices.dtype)\n for new_val, (term, old_val) in enumerate(sorted_features):\n vocabulary[term] = new_val\n map_index[old_val] = new_val\n\n X.indices = map_index.take(X.indices, mode='clip')\n return X\n\n def _limit_features(self, X, vocabulary, high=None, low=None,\n limit=None):\n \"\"\"Remove too rare or too common features.\n\n Prune features that are non zero in more samples than high or less\n documents than low, modifying the vocabulary, and restricting it to\n at most the limit most frequent.\n\n This does not prune samples with zero features.\n \"\"\"\n if high is None and low is None and limit is None:\n return X, set()\n\n # Calculate a mask based on document frequencies\n dfs = _document_frequency(X)\n mask = np.ones(len(dfs), dtype=bool)\n if high is not None:\n mask &= dfs <= high\n if low is not None:\n mask &= dfs >= low\n if limit is not None and mask.sum() > limit:\n tfs = np.asarray(X.sum(axis=0)).ravel()\n mask_inds = (-tfs[mask]).argsort()[:limit]\n new_mask = np.zeros(len(dfs), dtype=bool)\n new_mask[np.where(mask)[0][mask_inds]] = True\n mask = new_mask\n\n new_indices = np.cumsum(mask) - 1 # maps old indices to new\n removed_terms = set()\n for term, old_index in list(vocabulary.items()):\n if mask[old_index]:\n vocabulary[term] = new_indices[old_index]\n else:\n del vocabulary[term]\n removed_terms.add(term)\n kept_indices = np.where(mask)[0]\n if len(kept_indices) == 0:\n raise ValueError(\"After pruning, no terms remain. Try a lower\"\n \" min_df or a higher max_df.\")\n return X[:, kept_indices], removed_terms\n\n def _count_vocab(self, raw_documents, fixed_vocab):\n \"\"\"Create sparse feature matrix, and vocabulary where fixed_vocab=False\n \"\"\"\n if fixed_vocab:\n vocabulary = self.vocabulary_\n else:\n # Add a new value when a new vocabulary item is seen\n vocabulary = defaultdict()\n vocabulary.default_factory = vocabulary.__len__\n\n analyze = self.build_analyzer()\n j_indices = []\n indptr = []\n\n values = _make_int_array()\n indptr.append(0)\n for doc in raw_documents:\n feature_counter = {}\n for feature in analyze(doc):\n try:\n feature_idx = vocabulary[feature]\n if feature_idx not in feature_counter:\n feature_counter[feature_idx] = 1\n else:\n feature_counter[feature_idx] += 1\n except KeyError:\n # Ignore out-of-vocabulary items for fixed_vocab=True\n continue\n\n j_indices.extend(feature_counter.keys())\n values.extend(feature_counter.values())\n indptr.append(len(j_indices))\n\n if not fixed_vocab:\n # disable defaultdict behaviour\n vocabulary = dict(vocabulary)\n if not vocabulary:\n raise ValueError(\"empty vocabulary; perhaps the documents only\"\n \" contain stop words\")\n\n if indptr[-1] > np.iinfo(np.int32).max: # = 2**31 - 1\n if _IS_32BIT:\n raise ValueError(('sparse CSR array has {} non-zero '\n 'elements and requires 64 bit indexing, '\n 'which is unsupported with 32 bit Python.')\n .format(indptr[-1]))\n indices_dtype = np.int64\n\n else:\n indices_dtype = np.int32\n j_indices = np.asarray(j_indices, dtype=indices_dtype)\n indptr = np.asarray(indptr, dtype=indices_dtype)\n values = np.frombuffer(values, dtype=np.intc)\n\n X = sp.csr_matrix((values, j_indices, indptr),\n shape=(len(indptr) - 1, len(vocabulary)),\n dtype=self.dtype)\n X.sort_indices()\n return vocabulary, X\n\n def fit(self, raw_documents, y=None):\n \"\"\"Learn a vocabulary dictionary of all tokens in the raw documents.\n\n Parameters\n ----------\n raw_documents : iterable\n An iterable which yields either str, unicode or file objects.\n\n Returns\n -------\n self\n \"\"\"\n self._warn_for_unused_params()\n self.fit_transform(raw_documents)\n return self\n\n def fit_transform(self, raw_documents, y=None):\n \"\"\"Learn the vocabulary dictionary and return document-term matrix.\n\n This is equivalent to fit followed by transform, but more efficiently\n implemented.\n\n Parameters\n ----------\n raw_documents : iterable\n An iterable which yields either str, unicode or file objects.\n\n Returns\n -------\n X : array of shape (n_samples, n_features)\n Document-term matrix.\n \"\"\"\n # We intentionally don't call the transform method to make\n # fit_transform overridable without unwanted side effects in\n # TfidfVectorizer.\n if isinstance(raw_documents, str):\n raise ValueError(\n \"Iterable over raw text documents expected, \"\n \"string object received.\")\n\n self._validate_params()\n self._validate_vocabulary()\n max_df = self.max_df\n min_df = self.min_df\n max_features = self.max_features\n\n vocabulary, X = self._count_vocab(raw_documents,\n self.fixed_vocabulary_)\n\n if self.binary:\n X.data.fill(1)\n\n if not self.fixed_vocabulary_:\n n_doc = X.shape[0]\n max_doc_count = (max_df\n if isinstance(max_df, numbers.Integral)\n else max_df * n_doc)\n min_doc_count = (min_df\n if isinstance(min_df, numbers.Integral)\n else min_df * n_doc)\n if max_doc_count < min_doc_count:\n raise ValueError(\n \"max_df corresponds to < documents than min_df\")\n if max_features is not None:\n X = self._sort_features(X, vocabulary)\n X, self.stop_words_ = self._limit_features(X, vocabulary,\n max_doc_count,\n min_doc_count,\n max_features)\n if max_features is None:\n X = self._sort_features(X, vocabulary)\n self.vocabulary_ = vocabulary\n\n return X\n\n def transform(self, raw_documents):\n \"\"\"Transform documents to document-term matrix.\n\n Extract token counts out of raw text documents using the vocabulary\n fitted with fit or the one provided to the constructor.\n\n Parameters\n ----------\n raw_documents : iterable\n An iterable which yields either str, unicode or file objects.\n\n Returns\n -------\n X : sparse matrix of shape (n_samples, n_features)\n Document-term matrix.\n \"\"\"\n if isinstance(raw_documents, str):\n raise ValueError(\n \"Iterable over raw text documents expected, \"\n \"string object received.\")\n self._check_vocabulary()\n\n # use the same matrix-building strategy as fit_transform\n _, X = self._count_vocab(raw_documents, fixed_vocab=True)\n if self.binary:\n X.data.fill(1)\n return X\n\n def inverse_transform(self, X):\n \"\"\"Return terms per document with nonzero entries in X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document-term matrix.\n\n Returns\n -------\n X_inv : list of arrays of shape (n_samples,)\n List of arrays of terms.\n \"\"\"\n self._check_vocabulary()\n # We need CSR format for fast row manipulations.\n X = check_array(X, accept_sparse='csr')\n n_samples = X.shape[0]\n\n terms = np.array(list(self.vocabulary_.keys()))\n indices = np.array(list(self.vocabulary_.values()))\n inverse_vocabulary = terms[np.argsort(indices)]\n\n if sp.issparse(X):\n return [inverse_vocabulary[X[i, :].nonzero()[1]].ravel()\n for i in range(n_samples)]\n else:\n return [inverse_vocabulary[np.flatnonzero(X[i, :])].ravel()\n for i in range(n_samples)]\n\n def get_feature_names(self):\n \"\"\"Array mapping from feature integer indices to feature name.\n\n Returns\n -------\n feature_names : list\n A list of feature names.\n \"\"\"\n\n self._check_vocabulary()\n\n return [t for t, i in sorted(self.vocabulary_.items(),\n key=itemgetter(1))]\n\n def _more_tags(self):\n return {'X_types': ['string']}", + "instance_attributes": [ + { + "name": "input", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "encoding", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "decode_error", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "analyzer", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "lowercase", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "token_pattern", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "max_df", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "min_df", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "ngram_range", + "types": { + "kind": "NamedType", + "name": "tuple" + } + }, + { + "name": "binary", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "dtype", + "types": { + "kind": "NamedType", + "name": "type" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer", + "name": "HashingVectorizer", + "qname": "sklearn.feature_extraction.text.HashingVectorizer", + "decorators": [], + "superclasses": ["TransformerMixin", "_VectorizerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/__init__", + "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/partial_fit", + "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/fit", + "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/transform", + "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/fit_transform", + "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/_get_hasher", + "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/_more_tags" + ], + "is_public": true, + "reexported_by": [], + "description": "Convert a collection of text documents to a matrix of token occurrences\n\nIt turns a collection of text documents into a scipy.sparse matrix holding\ntoken occurrence counts (or binary occurrence information), possibly\nnormalized as token frequencies if norm='l1' or projected on the euclidean\nunit sphere if norm='l2'.\n\nThis text vectorizer implementation uses the hashing trick to find the\ntoken string name to feature integer index mapping.\n\nThis strategy has several advantages:\n\n- it is very low memory scalable to large datasets as there is no need to\n store a vocabulary dictionary in memory\n\n- it is fast to pickle and un-pickle as it holds no state besides the\n constructor parameters\n\n- it can be used in a streaming (partial fit) or parallel pipeline as there\n is no state computed during fit.\n\nThere are also a couple of cons (vs using a CountVectorizer with an\nin-memory vocabulary):\n\n- there is no way to compute the inverse transform (from feature indices to\n string feature names) which can be a problem when trying to introspect\n which features are most important to a model.\n\n- there can be collisions: distinct tokens can be mapped to the same\n feature index. However in practice this is rarely an issue if n_features\n is large enough (e.g. 2 ** 18 for text classification problems).\n\n- no IDF weighting as this would render the transformer stateful.\n\nThe hash function employed is the signed 32-bit version of Murmurhash3.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Convert a collection of text documents to a matrix of token occurrences\n\nIt turns a collection of text documents into a scipy.sparse matrix holding\ntoken occurrence counts (or binary occurrence information), possibly\nnormalized as token frequencies if norm='l1' or projected on the euclidean\nunit sphere if norm='l2'.\n\nThis text vectorizer implementation uses the hashing trick to find the\ntoken string name to feature integer index mapping.\n\nThis strategy has several advantages:\n\n- it is very low memory scalable to large datasets as there is no need to\n store a vocabulary dictionary in memory\n\n- it is fast to pickle and un-pickle as it holds no state besides the\n constructor parameters\n\n- it can be used in a streaming (partial fit) or parallel pipeline as there\n is no state computed during fit.\n\nThere are also a couple of cons (vs using a CountVectorizer with an\nin-memory vocabulary):\n\n- there is no way to compute the inverse transform (from feature indices to\n string feature names) which can be a problem when trying to introspect\n which features are most important to a model.\n\n- there can be collisions: distinct tokens can be mapped to the same\n feature index. However in practice this is rarely an issue if n_features\n is large enough (e.g. 2 ** 18 for text classification problems).\n\n- no IDF weighting as this would render the transformer stateful.\n\nThe hash function employed is the signed 32-bit version of Murmurhash3.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\ninput : {'filename', 'file', 'content'}, default='content'\n - If `'filename'`, the sequence passed as an argument to fit is\n expected to be a list of filenames that need reading to fetch\n the raw content to analyze.\n\n - If `'file'`, the sequence items must have a 'read' method (file-like\n object) that is called to fetch the bytes in memory.\n\n - If `'content'`, the input is expected to be a sequence of items that\n can be of type string or byte.\n\nencoding : string, default='utf-8'\n If bytes or files are given to analyze, this encoding is used to\n decode.\n\ndecode_error : {'strict', 'ignore', 'replace'}, default='strict'\n Instruction on what to do if a byte sequence is given to analyze that\n contains characters not of the given `encoding`. By default, it is\n 'strict', meaning that a UnicodeDecodeError will be raised. Other\n values are 'ignore' and 'replace'.\n\nstrip_accents : {'ascii', 'unicode'}, default=None\n Remove accents and perform other character normalization\n during the preprocessing step.\n 'ascii' is a fast method that only works on characters that have\n an direct ASCII mapping.\n 'unicode' is a slightly slower method that works on any characters.\n None (default) does nothing.\n\n Both 'ascii' and 'unicode' use NFKD normalization from\n :func:`unicodedata.normalize`.\n\nlowercase : bool, default=True\n Convert all characters to lowercase before tokenizing.\n\npreprocessor : callable, default=None\n Override the preprocessing (string transformation) stage while\n preserving the tokenizing and n-grams generation steps.\n Only applies if ``analyzer is not callable``.\n\ntokenizer : callable, default=None\n Override the string tokenization step while preserving the\n preprocessing and n-grams generation steps.\n Only applies if ``analyzer == 'word'``.\n\nstop_words : {'english'}, list, default=None\n If 'english', a built-in stop word list for English is used.\n There are several known issues with 'english' and you should\n consider an alternative (see :ref:`stop_words`).\n\n If a list, that list is assumed to contain stop words, all of which\n will be removed from the resulting tokens.\n Only applies if ``analyzer == 'word'``.\n\ntoken_pattern : str, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n Regular expression denoting what constitutes a \"token\", only used\n if ``analyzer == 'word'``. The default regexp selects tokens of 2\n or more alphanumeric characters (punctuation is completely ignored\n and always treated as a token separator).\n\n If there is a capturing group in token_pattern then the\n captured group content, not the entire match, becomes the token.\n At most one capturing group is permitted.\n\nngram_range : tuple (min_n, max_n), default=(1, 1)\n The lower and upper boundary of the range of n-values for different\n n-grams to be extracted. All values of n such that min_n <= n <= max_n\n will be used. For example an ``ngram_range`` of ``(1, 1)`` means only\n unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means\n only bigrams.\n Only applies if ``analyzer is not callable``.\n\nanalyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n Whether the feature should be made of word or character n-grams.\n Option 'char_wb' creates character n-grams only from text inside\n word boundaries; n-grams at the edges of words are padded with space.\n\n If a callable is passed it is used to extract the sequence of features\n out of the raw, unprocessed input.\n\n .. versionchanged:: 0.21\n Since v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data\n is first read from the file and then passed to the given callable\n analyzer.\n\nn_features : int, default=(2 ** 20)\n The number of features (columns) in the output matrices. Small numbers\n of features are likely to cause hash collisions, but large numbers\n will cause larger coefficient dimensions in linear learners.\n\nbinary : bool, default=False.\n If True, all non zero counts are set to 1. This is useful for discrete\n probabilistic models that model binary events rather than integer\n counts.\n\nnorm : {'l1', 'l2'}, default='l2'\n Norm used to normalize term vectors. None for no normalization.\n\nalternate_sign : bool, default=True\n When True, an alternating sign is added to the features as to\n approximately conserve the inner product in the hashed space even for\n small n_features. This approach is similar to sparse random projection.\n\n .. versionadded:: 0.19\n\ndtype : type, default=np.float64\n Type of the matrix returned by fit_transform() or transform().\n\nExamples\n--------\n>>> from sklearn.feature_extraction.text import HashingVectorizer\n>>> corpus = [\n... 'This is the first document.',\n... 'This document is the second document.',\n... 'And this is the third one.',\n... 'Is this the first document?',\n... ]\n>>> vectorizer = HashingVectorizer(n_features=2**4)\n>>> X = vectorizer.fit_transform(corpus)\n>>> print(X.shape)\n(4, 16)\n\nSee Also\n--------\nCountVectorizer, TfidfVectorizer", + "code": "class HashingVectorizer(TransformerMixin, _VectorizerMixin, BaseEstimator):\n r\"\"\"Convert a collection of text documents to a matrix of token occurrences\n\n It turns a collection of text documents into a scipy.sparse matrix holding\n token occurrence counts (or binary occurrence information), possibly\n normalized as token frequencies if norm='l1' or projected on the euclidean\n unit sphere if norm='l2'.\n\n This text vectorizer implementation uses the hashing trick to find the\n token string name to feature integer index mapping.\n\n This strategy has several advantages:\n\n - it is very low memory scalable to large datasets as there is no need to\n store a vocabulary dictionary in memory\n\n - it is fast to pickle and un-pickle as it holds no state besides the\n constructor parameters\n\n - it can be used in a streaming (partial fit) or parallel pipeline as there\n is no state computed during fit.\n\n There are also a couple of cons (vs using a CountVectorizer with an\n in-memory vocabulary):\n\n - there is no way to compute the inverse transform (from feature indices to\n string feature names) which can be a problem when trying to introspect\n which features are most important to a model.\n\n - there can be collisions: distinct tokens can be mapped to the same\n feature index. However in practice this is rarely an issue if n_features\n is large enough (e.g. 2 ** 18 for text classification problems).\n\n - no IDF weighting as this would render the transformer stateful.\n\n The hash function employed is the signed 32-bit version of Murmurhash3.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n\n input : {'filename', 'file', 'content'}, default='content'\n - If `'filename'`, the sequence passed as an argument to fit is\n expected to be a list of filenames that need reading to fetch\n the raw content to analyze.\n\n - If `'file'`, the sequence items must have a 'read' method (file-like\n object) that is called to fetch the bytes in memory.\n\n - If `'content'`, the input is expected to be a sequence of items that\n can be of type string or byte.\n\n encoding : string, default='utf-8'\n If bytes or files are given to analyze, this encoding is used to\n decode.\n\n decode_error : {'strict', 'ignore', 'replace'}, default='strict'\n Instruction on what to do if a byte sequence is given to analyze that\n contains characters not of the given `encoding`. By default, it is\n 'strict', meaning that a UnicodeDecodeError will be raised. Other\n values are 'ignore' and 'replace'.\n\n strip_accents : {'ascii', 'unicode'}, default=None\n Remove accents and perform other character normalization\n during the preprocessing step.\n 'ascii' is a fast method that only works on characters that have\n an direct ASCII mapping.\n 'unicode' is a slightly slower method that works on any characters.\n None (default) does nothing.\n\n Both 'ascii' and 'unicode' use NFKD normalization from\n :func:`unicodedata.normalize`.\n\n lowercase : bool, default=True\n Convert all characters to lowercase before tokenizing.\n\n preprocessor : callable, default=None\n Override the preprocessing (string transformation) stage while\n preserving the tokenizing and n-grams generation steps.\n Only applies if ``analyzer is not callable``.\n\n tokenizer : callable, default=None\n Override the string tokenization step while preserving the\n preprocessing and n-grams generation steps.\n Only applies if ``analyzer == 'word'``.\n\n stop_words : {'english'}, list, default=None\n If 'english', a built-in stop word list for English is used.\n There are several known issues with 'english' and you should\n consider an alternative (see :ref:`stop_words`).\n\n If a list, that list is assumed to contain stop words, all of which\n will be removed from the resulting tokens.\n Only applies if ``analyzer == 'word'``.\n\n token_pattern : str, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n Regular expression denoting what constitutes a \"token\", only used\n if ``analyzer == 'word'``. The default regexp selects tokens of 2\n or more alphanumeric characters (punctuation is completely ignored\n and always treated as a token separator).\n\n If there is a capturing group in token_pattern then the\n captured group content, not the entire match, becomes the token.\n At most one capturing group is permitted.\n\n ngram_range : tuple (min_n, max_n), default=(1, 1)\n The lower and upper boundary of the range of n-values for different\n n-grams to be extracted. All values of n such that min_n <= n <= max_n\n will be used. For example an ``ngram_range`` of ``(1, 1)`` means only\n unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means\n only bigrams.\n Only applies if ``analyzer is not callable``.\n\n analyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n Whether the feature should be made of word or character n-grams.\n Option 'char_wb' creates character n-grams only from text inside\n word boundaries; n-grams at the edges of words are padded with space.\n\n If a callable is passed it is used to extract the sequence of features\n out of the raw, unprocessed input.\n\n .. versionchanged:: 0.21\n Since v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data\n is first read from the file and then passed to the given callable\n analyzer.\n\n n_features : int, default=(2 ** 20)\n The number of features (columns) in the output matrices. Small numbers\n of features are likely to cause hash collisions, but large numbers\n will cause larger coefficient dimensions in linear learners.\n\n binary : bool, default=False.\n If True, all non zero counts are set to 1. This is useful for discrete\n probabilistic models that model binary events rather than integer\n counts.\n\n norm : {'l1', 'l2'}, default='l2'\n Norm used to normalize term vectors. None for no normalization.\n\n alternate_sign : bool, default=True\n When True, an alternating sign is added to the features as to\n approximately conserve the inner product in the hashed space even for\n small n_features. This approach is similar to sparse random projection.\n\n .. versionadded:: 0.19\n\n dtype : type, default=np.float64\n Type of the matrix returned by fit_transform() or transform().\n\n Examples\n --------\n >>> from sklearn.feature_extraction.text import HashingVectorizer\n >>> corpus = [\n ... 'This is the first document.',\n ... 'This document is the second document.',\n ... 'And this is the third one.',\n ... 'Is this the first document?',\n ... ]\n >>> vectorizer = HashingVectorizer(n_features=2**4)\n >>> X = vectorizer.fit_transform(corpus)\n >>> print(X.shape)\n (4, 16)\n\n See Also\n --------\n CountVectorizer, TfidfVectorizer\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, input='content', encoding='utf-8',\n decode_error='strict', strip_accents=None,\n lowercase=True, preprocessor=None, tokenizer=None,\n stop_words=None, token_pattern=r\"(?u)\\b\\w\\w+\\b\",\n ngram_range=(1, 1), analyzer='word', n_features=(2 ** 20),\n binary=False, norm='l2', alternate_sign=True,\n dtype=np.float64):\n self.input = input\n self.encoding = encoding\n self.decode_error = decode_error\n self.strip_accents = strip_accents\n self.preprocessor = preprocessor\n self.tokenizer = tokenizer\n self.analyzer = analyzer\n self.lowercase = lowercase\n self.token_pattern = token_pattern\n self.stop_words = stop_words\n self.n_features = n_features\n self.ngram_range = ngram_range\n self.binary = binary\n self.norm = norm\n self.alternate_sign = alternate_sign\n self.dtype = dtype\n\n def partial_fit(self, X, y=None):\n \"\"\"Does nothing: this transformer is stateless.\n\n This method is just there to mark the fact that this transformer\n can work in a streaming setup.\n\n Parameters\n ----------\n X : ndarray of shape [n_samples, n_features]\n Training data.\n \"\"\"\n return self\n\n def fit(self, X, y=None):\n \"\"\"Does nothing: this transformer is stateless.\n\n Parameters\n ----------\n X : ndarray of shape [n_samples, n_features]\n Training data.\n \"\"\"\n # triggers a parameter validation\n if isinstance(X, str):\n raise ValueError(\n \"Iterable over raw text documents expected, \"\n \"string object received.\")\n\n self._warn_for_unused_params()\n self._validate_params()\n\n self._get_hasher().fit(X, y=y)\n return self\n\n def transform(self, X):\n \"\"\"Transform a sequence of documents to a document-term matrix.\n\n Parameters\n ----------\n X : iterable over raw text documents, length = n_samples\n Samples. Each sample must be a text document (either bytes or\n unicode strings, file name or file object depending on the\n constructor argument) which will be tokenized and hashed.\n\n Returns\n -------\n X : sparse matrix of shape (n_samples, n_features)\n Document-term matrix.\n \"\"\"\n if isinstance(X, str):\n raise ValueError(\n \"Iterable over raw text documents expected, \"\n \"string object received.\")\n\n self._validate_params()\n\n analyzer = self.build_analyzer()\n X = self._get_hasher().transform(analyzer(doc) for doc in X)\n if self.binary:\n X.data.fill(1)\n if self.norm is not None:\n X = normalize(X, norm=self.norm, copy=False)\n return X\n\n def fit_transform(self, X, y=None):\n \"\"\"Transform a sequence of documents to a document-term matrix.\n\n Parameters\n ----------\n X : iterable over raw text documents, length = n_samples\n Samples. Each sample must be a text document (either bytes or\n unicode strings, file name or file object depending on the\n constructor argument) which will be tokenized and hashed.\n y : any\n Ignored. This parameter exists only for compatibility with\n sklearn.pipeline.Pipeline.\n\n Returns\n -------\n X : sparse matrix of shape (n_samples, n_features)\n Document-term matrix.\n \"\"\"\n return self.fit(X, y).transform(X)\n\n def _get_hasher(self):\n return FeatureHasher(n_features=self.n_features,\n input_type='string', dtype=self.dtype,\n alternate_sign=self.alternate_sign)\n\n def _more_tags(self):\n return {'X_types': ['string']}", + "instance_attributes": [ + { + "name": "input", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "encoding", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "decode_error", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "analyzer", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "lowercase", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "token_pattern", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "n_features", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "ngram_range", + "types": { + "kind": "NamedType", + "name": "tuple" + } + }, + { + "name": "binary", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "norm", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "alternate_sign", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "dtype", + "types": { + "kind": "NamedType", + "name": "type" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer", + "name": "TfidfTransformer", + "qname": "sklearn.feature_extraction.text.TfidfTransformer", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/__init__", + "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/fit", + "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/transform", + "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/idf_@getter", + "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/idf_@setter", + "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/_more_tags" + ], + "is_public": true, + "reexported_by": [], + "description": "Transform a count matrix to a normalized tf or tf-idf representation\n\nTf means term-frequency while tf-idf means term-frequency times inverse\ndocument-frequency. This is a common term weighting scheme in information\nretrieval, that has also found good use in document classification.\n\nThe goal of using tf-idf instead of the raw frequencies of occurrence of a\ntoken in a given document is to scale down the impact of tokens that occur\nvery frequently in a given corpus and that are hence empirically less\ninformative than features that occur in a small fraction of the training\ncorpus.\n\nThe formula that is used to compute the tf-idf for a term t of a document d\nin a document set is tf-idf(t, d) = tf(t, d) * idf(t), and the idf is\ncomputed as idf(t) = log [ n / df(t) ] + 1 (if ``smooth_idf=False``), where\nn is the total number of documents in the document set and df(t) is the\ndocument frequency of t; the document frequency is the number of documents\nin the document set that contain the term t. The effect of adding \"1\" to\nthe idf in the equation above is that terms with zero idf, i.e., terms\nthat occur in all documents in a training set, will not be entirely\nignored.\n(Note that the idf formula above differs from the standard textbook\nnotation that defines the idf as\nidf(t) = log [ n / (df(t) + 1) ]).\n\nIf ``smooth_idf=True`` (the default), the constant \"1\" is added to the\nnumerator and denominator of the idf as if an extra document was seen\ncontaining every term in the collection exactly once, which prevents\nzero divisions: idf(t) = log [ (1 + n) / (1 + df(t)) ] + 1.\n\nFurthermore, the formulas used to compute tf and idf depend\non parameter settings that correspond to the SMART notation used in IR\nas follows:\n\nTf is \"n\" (natural) by default, \"l\" (logarithmic) when\n``sublinear_tf=True``.\nIdf is \"t\" when use_idf is given, \"n\" (none) otherwise.\nNormalization is \"c\" (cosine) when ``norm='l2'``, \"n\" (none)\nwhen ``norm=None``.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Transform a count matrix to a normalized tf or tf-idf representation\n\nTf means term-frequency while tf-idf means term-frequency times inverse\ndocument-frequency. This is a common term weighting scheme in information\nretrieval, that has also found good use in document classification.\n\nThe goal of using tf-idf instead of the raw frequencies of occurrence of a\ntoken in a given document is to scale down the impact of tokens that occur\nvery frequently in a given corpus and that are hence empirically less\ninformative than features that occur in a small fraction of the training\ncorpus.\n\nThe formula that is used to compute the tf-idf for a term t of a document d\nin a document set is tf-idf(t, d) = tf(t, d) * idf(t), and the idf is\ncomputed as idf(t) = log [ n / df(t) ] + 1 (if ``smooth_idf=False``), where\nn is the total number of documents in the document set and df(t) is the\ndocument frequency of t; the document frequency is the number of documents\nin the document set that contain the term t. The effect of adding \"1\" to\nthe idf in the equation above is that terms with zero idf, i.e., terms\nthat occur in all documents in a training set, will not be entirely\nignored.\n(Note that the idf formula above differs from the standard textbook\nnotation that defines the idf as\nidf(t) = log [ n / (df(t) + 1) ]).\n\nIf ``smooth_idf=True`` (the default), the constant \"1\" is added to the\nnumerator and denominator of the idf as if an extra document was seen\ncontaining every term in the collection exactly once, which prevents\nzero divisions: idf(t) = log [ (1 + n) / (1 + df(t)) ] + 1.\n\nFurthermore, the formulas used to compute tf and idf depend\non parameter settings that correspond to the SMART notation used in IR\nas follows:\n\nTf is \"n\" (natural) by default, \"l\" (logarithmic) when\n``sublinear_tf=True``.\nIdf is \"t\" when use_idf is given, \"n\" (none) otherwise.\nNormalization is \"c\" (cosine) when ``norm='l2'``, \"n\" (none)\nwhen ``norm=None``.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nnorm : {'l1', 'l2'}, default='l2'\n Each output row will have unit norm, either:\n * 'l2': Sum of squares of vector elements is 1. The cosine\n similarity between two vectors is their dot product when l2 norm has\n been applied.\n * 'l1': Sum of absolute values of vector elements is 1.\n See :func:`preprocessing.normalize`\n\nuse_idf : bool, default=True\n Enable inverse-document-frequency reweighting.\n\nsmooth_idf : bool, default=True\n Smooth idf weights by adding one to document frequencies, as if an\n extra document was seen containing every term in the collection\n exactly once. Prevents zero divisions.\n\nsublinear_tf : bool, default=False\n Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf).\n\nAttributes\n----------\nidf_ : array of shape (n_features)\n The inverse document frequency (IDF) vector; only defined\n if ``use_idf`` is True.\n\n .. versionadded:: 0.20\n\nExamples\n--------\n>>> from sklearn.feature_extraction.text import TfidfTransformer\n>>> from sklearn.feature_extraction.text import CountVectorizer\n>>> from sklearn.pipeline import Pipeline\n>>> import numpy as np\n>>> corpus = ['this is the first document',\n... 'this document is the second document',\n... 'and this is the third one',\n... 'is this the first document']\n>>> vocabulary = ['this', 'document', 'first', 'is', 'second', 'the',\n... 'and', 'one']\n>>> pipe = Pipeline([('count', CountVectorizer(vocabulary=vocabulary)),\n... ('tfid', TfidfTransformer())]).fit(corpus)\n>>> pipe['count'].transform(corpus).toarray()\narray([[1, 1, 1, 1, 0, 1, 0, 0],\n [1, 2, 0, 1, 1, 1, 0, 0],\n [1, 0, 0, 1, 0, 1, 1, 1],\n [1, 1, 1, 1, 0, 1, 0, 0]])\n>>> pipe['tfid'].idf_\narray([1. , 1.22314355, 1.51082562, 1. , 1.91629073,\n 1. , 1.91629073, 1.91629073])\n>>> pipe.transform(corpus).shape\n(4, 8)\n\nReferences\n----------\n\n.. [Yates2011] R. Baeza-Yates and B. Ribeiro-Neto (2011). Modern\n Information Retrieval. Addison Wesley, pp. 68-74.\n\n.. [MRS2008] C.D. Manning, P. Raghavan and H. Sch\u00fctze (2008).\n Introduction to Information Retrieval. Cambridge University\n Press, pp. 118-120.", + "code": "class TfidfTransformer(TransformerMixin, BaseEstimator):\n \"\"\"Transform a count matrix to a normalized tf or tf-idf representation\n\n Tf means term-frequency while tf-idf means term-frequency times inverse\n document-frequency. This is a common term weighting scheme in information\n retrieval, that has also found good use in document classification.\n\n The goal of using tf-idf instead of the raw frequencies of occurrence of a\n token in a given document is to scale down the impact of tokens that occur\n very frequently in a given corpus and that are hence empirically less\n informative than features that occur in a small fraction of the training\n corpus.\n\n The formula that is used to compute the tf-idf for a term t of a document d\n in a document set is tf-idf(t, d) = tf(t, d) * idf(t), and the idf is\n computed as idf(t) = log [ n / df(t) ] + 1 (if ``smooth_idf=False``), where\n n is the total number of documents in the document set and df(t) is the\n document frequency of t; the document frequency is the number of documents\n in the document set that contain the term t. The effect of adding \"1\" to\n the idf in the equation above is that terms with zero idf, i.e., terms\n that occur in all documents in a training set, will not be entirely\n ignored.\n (Note that the idf formula above differs from the standard textbook\n notation that defines the idf as\n idf(t) = log [ n / (df(t) + 1) ]).\n\n If ``smooth_idf=True`` (the default), the constant \"1\" is added to the\n numerator and denominator of the idf as if an extra document was seen\n containing every term in the collection exactly once, which prevents\n zero divisions: idf(t) = log [ (1 + n) / (1 + df(t)) ] + 1.\n\n Furthermore, the formulas used to compute tf and idf depend\n on parameter settings that correspond to the SMART notation used in IR\n as follows:\n\n Tf is \"n\" (natural) by default, \"l\" (logarithmic) when\n ``sublinear_tf=True``.\n Idf is \"t\" when use_idf is given, \"n\" (none) otherwise.\n Normalization is \"c\" (cosine) when ``norm='l2'``, \"n\" (none)\n when ``norm=None``.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n norm : {'l1', 'l2'}, default='l2'\n Each output row will have unit norm, either:\n * 'l2': Sum of squares of vector elements is 1. The cosine\n similarity between two vectors is their dot product when l2 norm has\n been applied.\n * 'l1': Sum of absolute values of vector elements is 1.\n See :func:`preprocessing.normalize`\n\n use_idf : bool, default=True\n Enable inverse-document-frequency reweighting.\n\n smooth_idf : bool, default=True\n Smooth idf weights by adding one to document frequencies, as if an\n extra document was seen containing every term in the collection\n exactly once. Prevents zero divisions.\n\n sublinear_tf : bool, default=False\n Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf).\n\n Attributes\n ----------\n idf_ : array of shape (n_features)\n The inverse document frequency (IDF) vector; only defined\n if ``use_idf`` is True.\n\n .. versionadded:: 0.20\n\n Examples\n --------\n >>> from sklearn.feature_extraction.text import TfidfTransformer\n >>> from sklearn.feature_extraction.text import CountVectorizer\n >>> from sklearn.pipeline import Pipeline\n >>> import numpy as np\n >>> corpus = ['this is the first document',\n ... 'this document is the second document',\n ... 'and this is the third one',\n ... 'is this the first document']\n >>> vocabulary = ['this', 'document', 'first', 'is', 'second', 'the',\n ... 'and', 'one']\n >>> pipe = Pipeline([('count', CountVectorizer(vocabulary=vocabulary)),\n ... ('tfid', TfidfTransformer())]).fit(corpus)\n >>> pipe['count'].transform(corpus).toarray()\n array([[1, 1, 1, 1, 0, 1, 0, 0],\n [1, 2, 0, 1, 1, 1, 0, 0],\n [1, 0, 0, 1, 0, 1, 1, 1],\n [1, 1, 1, 1, 0, 1, 0, 0]])\n >>> pipe['tfid'].idf_\n array([1. , 1.22314355, 1.51082562, 1. , 1.91629073,\n 1. , 1.91629073, 1.91629073])\n >>> pipe.transform(corpus).shape\n (4, 8)\n\n References\n ----------\n\n .. [Yates2011] R. Baeza-Yates and B. Ribeiro-Neto (2011). Modern\n Information Retrieval. Addison Wesley, pp. 68-74.\n\n .. [MRS2008] C.D. Manning, P. Raghavan and H. Sch\u00fctze (2008).\n Introduction to Information Retrieval. Cambridge University\n Press, pp. 118-120.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, norm='l2', use_idf=True, smooth_idf=True,\n sublinear_tf=False):\n self.norm = norm\n self.use_idf = use_idf\n self.smooth_idf = smooth_idf\n self.sublinear_tf = sublinear_tf\n\n def fit(self, X, y=None):\n \"\"\"Learn the idf vector (global term weights).\n\n Parameters\n ----------\n X : sparse matrix of shape n_samples, n_features)\n A matrix of term/token counts.\n \"\"\"\n X = check_array(X, accept_sparse=('csr', 'csc'))\n if not sp.issparse(X):\n X = sp.csr_matrix(X)\n dtype = X.dtype if X.dtype in FLOAT_DTYPES else np.float64\n\n if self.use_idf:\n n_samples, n_features = X.shape\n df = _document_frequency(X)\n df = df.astype(dtype, **_astype_copy_false(df))\n\n # perform idf smoothing if required\n df += int(self.smooth_idf)\n n_samples += int(self.smooth_idf)\n\n # log+1 instead of log makes sure terms with zero idf don't get\n # suppressed entirely.\n idf = np.log(n_samples / df) + 1\n self._idf_diag = sp.diags(idf, offsets=0,\n shape=(n_features, n_features),\n format='csr',\n dtype=dtype)\n\n return self\n\n def transform(self, X, copy=True):\n \"\"\"Transform a count matrix to a tf or tf-idf representation\n\n Parameters\n ----------\n X : sparse matrix of (n_samples, n_features)\n a matrix of term/token counts\n\n copy : bool, default=True\n Whether to copy X and operate on the copy or perform in-place\n operations.\n\n Returns\n -------\n vectors : sparse matrix of shape (n_samples, n_features)\n \"\"\"\n X = check_array(X, accept_sparse='csr', dtype=FLOAT_DTYPES, copy=copy)\n if not sp.issparse(X):\n X = sp.csr_matrix(X, dtype=np.float64)\n\n n_samples, n_features = X.shape\n\n if self.sublinear_tf:\n np.log(X.data, X.data)\n X.data += 1\n\n if self.use_idf:\n # idf_ being a property, the automatic attributes detection\n # does not work as usual and we need to specify the attribute\n # name:\n check_is_fitted(self, attributes=[\"idf_\"],\n msg='idf vector is not fitted')\n\n expected_n_features = self._idf_diag.shape[0]\n if n_features != expected_n_features:\n raise ValueError(\"Input has n_features=%d while the model\"\n \" has been trained with n_features=%d\" % (\n n_features, expected_n_features))\n # *= doesn't work\n X = X * self._idf_diag\n\n if self.norm:\n X = normalize(X, norm=self.norm, copy=False)\n\n return X\n\n @property\n def idf_(self):\n # if _idf_diag is not set, this will raise an attribute error,\n # which means hasattr(self, \"idf_\") is False\n return np.ravel(self._idf_diag.sum(axis=0))\n\n @idf_.setter\n def idf_(self, value):\n value = np.asarray(value, dtype=np.float64)\n n_features = value.shape[0]\n self._idf_diag = sp.spdiags(value, diags=0, m=n_features,\n n=n_features, format='csr')\n\n def _more_tags(self):\n return {'X_types': 'sparse'}", + "instance_attributes": [ + { + "name": "norm", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "use_idf", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "smooth_idf", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "sublinear_tf", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "_idf_diag", + "types": { + "kind": "NamedType", + "name": "dia_matrix" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer", + "name": "TfidfVectorizer", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer", + "decorators": [], + "superclasses": ["CountVectorizer"], + "methods": [ + "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/__init__", + "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/norm@getter", + "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/norm@setter", + "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/use_idf@getter", + "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/use_idf@setter", + "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/smooth_idf@getter", + "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/smooth_idf@setter", + "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/sublinear_tf@getter", + "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/sublinear_tf@setter", + "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/idf_@getter", + "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/idf_@setter", + "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/_check_params", + "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/fit", + "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/fit_transform", + "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/transform", + "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/_more_tags" + ], + "is_public": true, + "reexported_by": [], + "description": "Convert a collection of raw documents to a matrix of TF-IDF features.\n\nEquivalent to :class:`CountVectorizer` followed by\n:class:`TfidfTransformer`.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Convert a collection of raw documents to a matrix of TF-IDF features.\n\nEquivalent to :class:`CountVectorizer` followed by\n:class:`TfidfTransformer`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ninput : {'filename', 'file', 'content'}, default='content'\n - If `'filename'`, the sequence passed as an argument to fit is\n expected to be a list of filenames that need reading to fetch\n the raw content to analyze.\n\n - If `'file'`, the sequence items must have a 'read' method (file-like\n object) that is called to fetch the bytes in memory.\n\n - If `'content'`, the input is expected to be a sequence of items that\n can be of type string or byte.\n\nencoding : str, default='utf-8'\n If bytes or files are given to analyze, this encoding is used to\n decode.\n\ndecode_error : {'strict', 'ignore', 'replace'}, default='strict'\n Instruction on what to do if a byte sequence is given to analyze that\n contains characters not of the given `encoding`. By default, it is\n 'strict', meaning that a UnicodeDecodeError will be raised. Other\n values are 'ignore' and 'replace'.\n\nstrip_accents : {'ascii', 'unicode'}, default=None\n Remove accents and perform other character normalization\n during the preprocessing step.\n 'ascii' is a fast method that only works on characters that have\n an direct ASCII mapping.\n 'unicode' is a slightly slower method that works on any characters.\n None (default) does nothing.\n\n Both 'ascii' and 'unicode' use NFKD normalization from\n :func:`unicodedata.normalize`.\n\nlowercase : bool, default=True\n Convert all characters to lowercase before tokenizing.\n\npreprocessor : callable, default=None\n Override the preprocessing (string transformation) stage while\n preserving the tokenizing and n-grams generation steps.\n Only applies if ``analyzer is not callable``.\n\ntokenizer : callable, default=None\n Override the string tokenization step while preserving the\n preprocessing and n-grams generation steps.\n Only applies if ``analyzer == 'word'``.\n\nanalyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n Whether the feature should be made of word or character n-grams.\n Option 'char_wb' creates character n-grams only from text inside\n word boundaries; n-grams at the edges of words are padded with space.\n\n If a callable is passed it is used to extract the sequence of features\n out of the raw, unprocessed input.\n\n .. versionchanged:: 0.21\n Since v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data\n is first read from the file and then passed to the given callable\n analyzer.\n\nstop_words : {'english'}, list, default=None\n If a string, it is passed to _check_stop_list and the appropriate stop\n list is returned. 'english' is currently the only supported string\n value.\n There are several known issues with 'english' and you should\n consider an alternative (see :ref:`stop_words`).\n\n If a list, that list is assumed to contain stop words, all of which\n will be removed from the resulting tokens.\n Only applies if ``analyzer == 'word'``.\n\n If None, no stop words will be used. max_df can be set to a value\n in the range [0.7, 1.0) to automatically detect and filter stop\n words based on intra corpus document frequency of terms.\n\ntoken_pattern : str, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n Regular expression denoting what constitutes a \"token\", only used\n if ``analyzer == 'word'``. The default regexp selects tokens of 2\n or more alphanumeric characters (punctuation is completely ignored\n and always treated as a token separator).\n\n If there is a capturing group in token_pattern then the\n captured group content, not the entire match, becomes the token.\n At most one capturing group is permitted.\n\nngram_range : tuple (min_n, max_n), default=(1, 1)\n The lower and upper boundary of the range of n-values for different\n n-grams to be extracted. All values of n such that min_n <= n <= max_n\n will be used. For example an ``ngram_range`` of ``(1, 1)`` means only\n unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means\n only bigrams.\n Only applies if ``analyzer is not callable``.\n\nmax_df : float or int, default=1.0\n When building the vocabulary ignore terms that have a document\n frequency strictly higher than the given threshold (corpus-specific\n stop words).\n If float in range [0.0, 1.0], the parameter represents a proportion of\n documents, integer absolute counts.\n This parameter is ignored if vocabulary is not None.\n\nmin_df : float or int, default=1\n When building the vocabulary ignore terms that have a document\n frequency strictly lower than the given threshold. This value is also\n called cut-off in the literature.\n If float in range of [0.0, 1.0], the parameter represents a proportion\n of documents, integer absolute counts.\n This parameter is ignored if vocabulary is not None.\n\nmax_features : int, default=None\n If not None, build a vocabulary that only consider the top\n max_features ordered by term frequency across the corpus.\n\n This parameter is ignored if vocabulary is not None.\n\nvocabulary : Mapping or iterable, default=None\n Either a Mapping (e.g., a dict) where keys are terms and values are\n indices in the feature matrix, or an iterable over terms. If not\n given, a vocabulary is determined from the input documents.\n\nbinary : bool, default=False\n If True, all non-zero term counts are set to 1. This does not mean\n outputs will have only 0/1 values, only that the tf term in tf-idf\n is binary. (Set idf and normalization to False to get 0/1 outputs).\n\ndtype : dtype, default=float64\n Type of the matrix returned by fit_transform() or transform().\n\nnorm : {'l1', 'l2'}, default='l2'\n Each output row will have unit norm, either:\n * 'l2': Sum of squares of vector elements is 1. The cosine\n similarity between two vectors is their dot product when l2 norm has\n been applied.\n * 'l1': Sum of absolute values of vector elements is 1.\n See :func:`preprocessing.normalize`.\n\nuse_idf : bool, default=True\n Enable inverse-document-frequency reweighting.\n\nsmooth_idf : bool, default=True\n Smooth idf weights by adding one to document frequencies, as if an\n extra document was seen containing every term in the collection\n exactly once. Prevents zero divisions.\n\nsublinear_tf : bool, default=False\n Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf).\n\nAttributes\n----------\nvocabulary_ : dict\n A mapping of terms to feature indices.\n\nfixed_vocabulary_: bool\n True if a fixed vocabulary of term to indices mapping\n is provided by the user\n\nidf_ : array of shape (n_features,)\n The inverse document frequency (IDF) vector; only defined\n if ``use_idf`` is True.\n\nstop_words_ : set\n Terms that were ignored because they either:\n\n - occurred in too many documents (`max_df`)\n - occurred in too few documents (`min_df`)\n - were cut off by feature selection (`max_features`).\n\n This is only available if no vocabulary was given.\n\nSee Also\n--------\nCountVectorizer : Transforms text into a sparse matrix of n-gram counts.\n\nTfidfTransformer : Performs the TF-IDF transformation from a provided\n matrix of counts.\n\nNotes\n-----\nThe ``stop_words_`` attribute can get large and increase the model size\nwhen pickling. This attribute is provided only for introspection and can\nbe safely removed using delattr or set to None before pickling.\n\nExamples\n--------\n>>> from sklearn.feature_extraction.text import TfidfVectorizer\n>>> corpus = [\n... 'This is the first document.',\n... 'This document is the second document.',\n... 'And this is the third one.',\n... 'Is this the first document?',\n... ]\n>>> vectorizer = TfidfVectorizer()\n>>> X = vectorizer.fit_transform(corpus)\n>>> print(vectorizer.get_feature_names())\n['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', 'this']\n>>> print(X.shape)\n(4, 9)", + "code": "class TfidfVectorizer(CountVectorizer):\n r\"\"\"Convert a collection of raw documents to a matrix of TF-IDF features.\n\n Equivalent to :class:`CountVectorizer` followed by\n :class:`TfidfTransformer`.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n input : {'filename', 'file', 'content'}, default='content'\n - If `'filename'`, the sequence passed as an argument to fit is\n expected to be a list of filenames that need reading to fetch\n the raw content to analyze.\n\n - If `'file'`, the sequence items must have a 'read' method (file-like\n object) that is called to fetch the bytes in memory.\n\n - If `'content'`, the input is expected to be a sequence of items that\n can be of type string or byte.\n\n encoding : str, default='utf-8'\n If bytes or files are given to analyze, this encoding is used to\n decode.\n\n decode_error : {'strict', 'ignore', 'replace'}, default='strict'\n Instruction on what to do if a byte sequence is given to analyze that\n contains characters not of the given `encoding`. By default, it is\n 'strict', meaning that a UnicodeDecodeError will be raised. Other\n values are 'ignore' and 'replace'.\n\n strip_accents : {'ascii', 'unicode'}, default=None\n Remove accents and perform other character normalization\n during the preprocessing step.\n 'ascii' is a fast method that only works on characters that have\n an direct ASCII mapping.\n 'unicode' is a slightly slower method that works on any characters.\n None (default) does nothing.\n\n Both 'ascii' and 'unicode' use NFKD normalization from\n :func:`unicodedata.normalize`.\n\n lowercase : bool, default=True\n Convert all characters to lowercase before tokenizing.\n\n preprocessor : callable, default=None\n Override the preprocessing (string transformation) stage while\n preserving the tokenizing and n-grams generation steps.\n Only applies if ``analyzer is not callable``.\n\n tokenizer : callable, default=None\n Override the string tokenization step while preserving the\n preprocessing and n-grams generation steps.\n Only applies if ``analyzer == 'word'``.\n\n analyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n Whether the feature should be made of word or character n-grams.\n Option 'char_wb' creates character n-grams only from text inside\n word boundaries; n-grams at the edges of words are padded with space.\n\n If a callable is passed it is used to extract the sequence of features\n out of the raw, unprocessed input.\n\n .. versionchanged:: 0.21\n Since v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data\n is first read from the file and then passed to the given callable\n analyzer.\n\n stop_words : {'english'}, list, default=None\n If a string, it is passed to _check_stop_list and the appropriate stop\n list is returned. 'english' is currently the only supported string\n value.\n There are several known issues with 'english' and you should\n consider an alternative (see :ref:`stop_words`).\n\n If a list, that list is assumed to contain stop words, all of which\n will be removed from the resulting tokens.\n Only applies if ``analyzer == 'word'``.\n\n If None, no stop words will be used. max_df can be set to a value\n in the range [0.7, 1.0) to automatically detect and filter stop\n words based on intra corpus document frequency of terms.\n\n token_pattern : str, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n Regular expression denoting what constitutes a \"token\", only used\n if ``analyzer == 'word'``. The default regexp selects tokens of 2\n or more alphanumeric characters (punctuation is completely ignored\n and always treated as a token separator).\n\n If there is a capturing group in token_pattern then the\n captured group content, not the entire match, becomes the token.\n At most one capturing group is permitted.\n\n ngram_range : tuple (min_n, max_n), default=(1, 1)\n The lower and upper boundary of the range of n-values for different\n n-grams to be extracted. All values of n such that min_n <= n <= max_n\n will be used. For example an ``ngram_range`` of ``(1, 1)`` means only\n unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means\n only bigrams.\n Only applies if ``analyzer is not callable``.\n\n max_df : float or int, default=1.0\n When building the vocabulary ignore terms that have a document\n frequency strictly higher than the given threshold (corpus-specific\n stop words).\n If float in range [0.0, 1.0], the parameter represents a proportion of\n documents, integer absolute counts.\n This parameter is ignored if vocabulary is not None.\n\n min_df : float or int, default=1\n When building the vocabulary ignore terms that have a document\n frequency strictly lower than the given threshold. This value is also\n called cut-off in the literature.\n If float in range of [0.0, 1.0], the parameter represents a proportion\n of documents, integer absolute counts.\n This parameter is ignored if vocabulary is not None.\n\n max_features : int, default=None\n If not None, build a vocabulary that only consider the top\n max_features ordered by term frequency across the corpus.\n\n This parameter is ignored if vocabulary is not None.\n\n vocabulary : Mapping or iterable, default=None\n Either a Mapping (e.g., a dict) where keys are terms and values are\n indices in the feature matrix, or an iterable over terms. If not\n given, a vocabulary is determined from the input documents.\n\n binary : bool, default=False\n If True, all non-zero term counts are set to 1. This does not mean\n outputs will have only 0/1 values, only that the tf term in tf-idf\n is binary. (Set idf and normalization to False to get 0/1 outputs).\n\n dtype : dtype, default=float64\n Type of the matrix returned by fit_transform() or transform().\n\n norm : {'l1', 'l2'}, default='l2'\n Each output row will have unit norm, either:\n * 'l2': Sum of squares of vector elements is 1. The cosine\n similarity between two vectors is their dot product when l2 norm has\n been applied.\n * 'l1': Sum of absolute values of vector elements is 1.\n See :func:`preprocessing.normalize`.\n\n use_idf : bool, default=True\n Enable inverse-document-frequency reweighting.\n\n smooth_idf : bool, default=True\n Smooth idf weights by adding one to document frequencies, as if an\n extra document was seen containing every term in the collection\n exactly once. Prevents zero divisions.\n\n sublinear_tf : bool, default=False\n Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf).\n\n Attributes\n ----------\n vocabulary_ : dict\n A mapping of terms to feature indices.\n\n fixed_vocabulary_: bool\n True if a fixed vocabulary of term to indices mapping\n is provided by the user\n\n idf_ : array of shape (n_features,)\n The inverse document frequency (IDF) vector; only defined\n if ``use_idf`` is True.\n\n stop_words_ : set\n Terms that were ignored because they either:\n\n - occurred in too many documents (`max_df`)\n - occurred in too few documents (`min_df`)\n - were cut off by feature selection (`max_features`).\n\n This is only available if no vocabulary was given.\n\n See Also\n --------\n CountVectorizer : Transforms text into a sparse matrix of n-gram counts.\n\n TfidfTransformer : Performs the TF-IDF transformation from a provided\n matrix of counts.\n\n Notes\n -----\n The ``stop_words_`` attribute can get large and increase the model size\n when pickling. This attribute is provided only for introspection and can\n be safely removed using delattr or set to None before pickling.\n\n Examples\n --------\n >>> from sklearn.feature_extraction.text import TfidfVectorizer\n >>> corpus = [\n ... 'This is the first document.',\n ... 'This document is the second document.',\n ... 'And this is the third one.',\n ... 'Is this the first document?',\n ... ]\n >>> vectorizer = TfidfVectorizer()\n >>> X = vectorizer.fit_transform(corpus)\n >>> print(vectorizer.get_feature_names())\n ['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', 'this']\n >>> print(X.shape)\n (4, 9)\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, input='content', encoding='utf-8',\n decode_error='strict', strip_accents=None, lowercase=True,\n preprocessor=None, tokenizer=None, analyzer='word',\n stop_words=None, token_pattern=r\"(?u)\\b\\w\\w+\\b\",\n ngram_range=(1, 1), max_df=1.0, min_df=1,\n max_features=None, vocabulary=None, binary=False,\n dtype=np.float64, norm='l2', use_idf=True, smooth_idf=True,\n sublinear_tf=False):\n\n super().__init__(\n input=input, encoding=encoding, decode_error=decode_error,\n strip_accents=strip_accents, lowercase=lowercase,\n preprocessor=preprocessor, tokenizer=tokenizer, analyzer=analyzer,\n stop_words=stop_words, token_pattern=token_pattern,\n ngram_range=ngram_range, max_df=max_df, min_df=min_df,\n max_features=max_features, vocabulary=vocabulary, binary=binary,\n dtype=dtype)\n\n self._tfidf = TfidfTransformer(norm=norm, use_idf=use_idf,\n smooth_idf=smooth_idf,\n sublinear_tf=sublinear_tf)\n\n # Broadcast the TF-IDF parameters to the underlying transformer instance\n # for easy grid search and repr\n\n @property\n def norm(self):\n return self._tfidf.norm\n\n @norm.setter\n def norm(self, value):\n self._tfidf.norm = value\n\n @property\n def use_idf(self):\n return self._tfidf.use_idf\n\n @use_idf.setter\n def use_idf(self, value):\n self._tfidf.use_idf = value\n\n @property\n def smooth_idf(self):\n return self._tfidf.smooth_idf\n\n @smooth_idf.setter\n def smooth_idf(self, value):\n self._tfidf.smooth_idf = value\n\n @property\n def sublinear_tf(self):\n return self._tfidf.sublinear_tf\n\n @sublinear_tf.setter\n def sublinear_tf(self, value):\n self._tfidf.sublinear_tf = value\n\n @property\n def idf_(self):\n return self._tfidf.idf_\n\n @idf_.setter\n def idf_(self, value):\n self._validate_vocabulary()\n if hasattr(self, 'vocabulary_'):\n if len(self.vocabulary_) != len(value):\n raise ValueError(\"idf length = %d must be equal \"\n \"to vocabulary size = %d\" %\n (len(value), len(self.vocabulary)))\n self._tfidf.idf_ = value\n\n def _check_params(self):\n if self.dtype not in FLOAT_DTYPES:\n warnings.warn(\"Only {} 'dtype' should be used. {} 'dtype' will \"\n \"be converted to np.float64.\"\n .format(FLOAT_DTYPES, self.dtype),\n UserWarning)\n\n def fit(self, raw_documents, y=None):\n \"\"\"Learn vocabulary and idf from training set.\n\n Parameters\n ----------\n raw_documents : iterable\n An iterable which yields either str, unicode or file objects.\n y : None\n This parameter is not needed to compute tfidf.\n\n Returns\n -------\n self : object\n Fitted vectorizer.\n \"\"\"\n self._check_params()\n self._warn_for_unused_params()\n X = super().fit_transform(raw_documents)\n self._tfidf.fit(X)\n return self\n\n def fit_transform(self, raw_documents, y=None):\n \"\"\"Learn vocabulary and idf, return document-term matrix.\n\n This is equivalent to fit followed by transform, but more efficiently\n implemented.\n\n Parameters\n ----------\n raw_documents : iterable\n An iterable which yields either str, unicode or file objects.\n y : None\n This parameter is ignored.\n\n Returns\n -------\n X : sparse matrix of (n_samples, n_features)\n Tf-idf-weighted document-term matrix.\n \"\"\"\n self._check_params()\n X = super().fit_transform(raw_documents)\n self._tfidf.fit(X)\n # X is already a transformed view of raw_documents so\n # we set copy to False\n return self._tfidf.transform(X, copy=False)\n\n def transform(self, raw_documents):\n \"\"\"Transform documents to document-term matrix.\n\n Uses the vocabulary and document frequencies (df) learned by fit (or\n fit_transform).\n\n Parameters\n ----------\n raw_documents : iterable\n An iterable which yields either str, unicode or file objects.\n\n Returns\n -------\n X : sparse matrix of (n_samples, n_features)\n Tf-idf-weighted document-term matrix.\n \"\"\"\n check_is_fitted(self, msg='The TF-IDF vectorizer is not fitted')\n\n X = super().transform(raw_documents)\n return self._tfidf.transform(X, copy=False)\n\n def _more_tags(self):\n return {'X_types': ['string'], '_skip_test': True}", + "instance_attributes": [ + { + "name": "_tfidf", + "types": { + "kind": "NamedType", + "name": "TfidfTransformer" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin", + "name": "_VectorizerMixin", + "qname": "sklearn.feature_extraction.text._VectorizerMixin", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/decode", + "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_word_ngrams", + "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_char_ngrams", + "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_char_wb_ngrams", + "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/build_preprocessor", + "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/build_tokenizer", + "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/get_stop_words", + "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_check_stop_words_consistency", + "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/build_analyzer", + "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_validate_vocabulary", + "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_check_vocabulary", + "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_validate_params", + "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_warn_for_unused_params" + ], + "is_public": false, + "reexported_by": [], + "description": "Provides common code for text vectorizers (tokenization logic).", + "docstring": "Provides common code for text vectorizers (tokenization logic).", + "code": "class _VectorizerMixin:\n \"\"\"Provides common code for text vectorizers (tokenization logic).\"\"\"\n\n _white_spaces = re.compile(r\"\\s\\s+\")\n\n def decode(self, doc):\n \"\"\"Decode the input into a string of unicode symbols.\n\n The decoding strategy depends on the vectorizer parameters.\n\n Parameters\n ----------\n doc : str\n The string to decode.\n\n Returns\n -------\n doc: str\n A string of unicode symbols.\n \"\"\"\n if self.input == 'filename':\n with open(doc, 'rb') as fh:\n doc = fh.read()\n\n elif self.input == 'file':\n doc = doc.read()\n\n if isinstance(doc, bytes):\n doc = doc.decode(self.encoding, self.decode_error)\n\n if doc is np.nan:\n raise ValueError(\"np.nan is an invalid document, expected byte or \"\n \"unicode string.\")\n\n return doc\n\n def _word_ngrams(self, tokens, stop_words=None):\n \"\"\"Turn tokens into a sequence of n-grams after stop words filtering\"\"\"\n # handle stop words\n if stop_words is not None:\n tokens = [w for w in tokens if w not in stop_words]\n\n # handle token n-grams\n min_n, max_n = self.ngram_range\n if max_n != 1:\n original_tokens = tokens\n if min_n == 1:\n # no need to do any slicing for unigrams\n # just iterate through the original tokens\n tokens = list(original_tokens)\n min_n += 1\n else:\n tokens = []\n\n n_original_tokens = len(original_tokens)\n\n # bind method outside of loop to reduce overhead\n tokens_append = tokens.append\n space_join = \" \".join\n\n for n in range(min_n,\n min(max_n + 1, n_original_tokens + 1)):\n for i in range(n_original_tokens - n + 1):\n tokens_append(space_join(original_tokens[i: i + n]))\n\n return tokens\n\n def _char_ngrams(self, text_document):\n \"\"\"Tokenize text_document into a sequence of character n-grams\"\"\"\n # normalize white spaces\n text_document = self._white_spaces.sub(\" \", text_document)\n\n text_len = len(text_document)\n min_n, max_n = self.ngram_range\n if min_n == 1:\n # no need to do any slicing for unigrams\n # iterate through the string\n ngrams = list(text_document)\n min_n += 1\n else:\n ngrams = []\n\n # bind method outside of loop to reduce overhead\n ngrams_append = ngrams.append\n\n for n in range(min_n, min(max_n + 1, text_len + 1)):\n for i in range(text_len - n + 1):\n ngrams_append(text_document[i: i + n])\n return ngrams\n\n def _char_wb_ngrams(self, text_document):\n \"\"\"Whitespace sensitive char-n-gram tokenization.\n\n Tokenize text_document into a sequence of character n-grams\n operating only inside word boundaries. n-grams at the edges\n of words are padded with space.\"\"\"\n # normalize white spaces\n text_document = self._white_spaces.sub(\" \", text_document)\n\n min_n, max_n = self.ngram_range\n ngrams = []\n\n # bind method outside of loop to reduce overhead\n ngrams_append = ngrams.append\n\n for w in text_document.split():\n w = ' ' + w + ' '\n w_len = len(w)\n for n in range(min_n, max_n + 1):\n offset = 0\n ngrams_append(w[offset:offset + n])\n while offset + n < w_len:\n offset += 1\n ngrams_append(w[offset:offset + n])\n if offset == 0: # count a short word (w_len < n) only once\n break\n return ngrams\n\n def build_preprocessor(self):\n \"\"\"Return a function to preprocess the text before tokenization.\n\n Returns\n -------\n preprocessor: callable\n A function to preprocess the text before tokenization.\n \"\"\"\n if self.preprocessor is not None:\n return self.preprocessor\n\n # accent stripping\n if not self.strip_accents:\n strip_accents = None\n elif callable(self.strip_accents):\n strip_accents = self.strip_accents\n elif self.strip_accents == 'ascii':\n strip_accents = strip_accents_ascii\n elif self.strip_accents == 'unicode':\n strip_accents = strip_accents_unicode\n else:\n raise ValueError('Invalid value for \"strip_accents\": %s' %\n self.strip_accents)\n\n return partial(\n _preprocess, accent_function=strip_accents, lower=self.lowercase\n )\n\n def build_tokenizer(self):\n \"\"\"Return a function that splits a string into a sequence of tokens.\n\n Returns\n -------\n tokenizer: callable\n A function to split a string into a sequence of tokens.\n \"\"\"\n if self.tokenizer is not None:\n return self.tokenizer\n token_pattern = re.compile(self.token_pattern)\n\n if token_pattern.groups > 1:\n raise ValueError(\n \"More than 1 capturing group in token pattern. Only a single \"\n \"group should be captured.\"\n )\n\n return token_pattern.findall\n\n def get_stop_words(self):\n \"\"\"Build or fetch the effective stop words list.\n\n Returns\n -------\n stop_words: list or None\n A list of stop words.\n \"\"\"\n return _check_stop_list(self.stop_words)\n\n def _check_stop_words_consistency(self, stop_words, preprocess, tokenize):\n \"\"\"Check if stop words are consistent\n\n Returns\n -------\n is_consistent : True if stop words are consistent with the preprocessor\n and tokenizer, False if they are not, None if the check\n was previously performed, \"error\" if it could not be\n performed (e.g. because of the use of a custom\n preprocessor / tokenizer)\n \"\"\"\n if id(self.stop_words) == getattr(self, '_stop_words_id', None):\n # Stop words are were previously validated\n return None\n\n # NB: stop_words is validated, unlike self.stop_words\n try:\n inconsistent = set()\n for w in stop_words or ():\n tokens = list(tokenize(preprocess(w)))\n for token in tokens:\n if token not in stop_words:\n inconsistent.add(token)\n self._stop_words_id = id(self.stop_words)\n\n if inconsistent:\n warnings.warn('Your stop_words may be inconsistent with '\n 'your preprocessing. Tokenizing the stop '\n 'words generated tokens %r not in '\n 'stop_words.' % sorted(inconsistent))\n return not inconsistent\n except Exception:\n # Failed to check stop words consistency (e.g. because a custom\n # preprocessor or tokenizer was used)\n self._stop_words_id = id(self.stop_words)\n return 'error'\n\n def build_analyzer(self):\n \"\"\"Return a callable that handles preprocessing, tokenization\n and n-grams generation.\n\n Returns\n -------\n analyzer: callable\n A function to handle preprocessing, tokenization\n and n-grams generation.\n \"\"\"\n\n if callable(self.analyzer):\n return partial(\n _analyze, analyzer=self.analyzer, decoder=self.decode\n )\n\n preprocess = self.build_preprocessor()\n\n if self.analyzer == 'char':\n return partial(_analyze, ngrams=self._char_ngrams,\n preprocessor=preprocess, decoder=self.decode)\n\n elif self.analyzer == 'char_wb':\n\n return partial(_analyze, ngrams=self._char_wb_ngrams,\n preprocessor=preprocess, decoder=self.decode)\n\n elif self.analyzer == 'word':\n stop_words = self.get_stop_words()\n tokenize = self.build_tokenizer()\n self._check_stop_words_consistency(stop_words, preprocess,\n tokenize)\n return partial(_analyze, ngrams=self._word_ngrams,\n tokenizer=tokenize, preprocessor=preprocess,\n decoder=self.decode, stop_words=stop_words)\n\n else:\n raise ValueError('%s is not a valid tokenization scheme/analyzer' %\n self.analyzer)\n\n def _validate_vocabulary(self):\n vocabulary = self.vocabulary\n if vocabulary is not None:\n if isinstance(vocabulary, set):\n vocabulary = sorted(vocabulary)\n if not isinstance(vocabulary, Mapping):\n vocab = {}\n for i, t in enumerate(vocabulary):\n if vocab.setdefault(t, i) != i:\n msg = \"Duplicate term in vocabulary: %r\" % t\n raise ValueError(msg)\n vocabulary = vocab\n else:\n indices = set(vocabulary.values())\n if len(indices) != len(vocabulary):\n raise ValueError(\"Vocabulary contains repeated indices.\")\n for i in range(len(vocabulary)):\n if i not in indices:\n msg = (\"Vocabulary of size %d doesn't contain index \"\n \"%d.\" % (len(vocabulary), i))\n raise ValueError(msg)\n if not vocabulary:\n raise ValueError(\"empty vocabulary passed to fit\")\n self.fixed_vocabulary_ = True\n self.vocabulary_ = dict(vocabulary)\n else:\n self.fixed_vocabulary_ = False\n\n def _check_vocabulary(self):\n \"\"\"Check if vocabulary is empty or missing (not fitted)\"\"\"\n if not hasattr(self, 'vocabulary_'):\n self._validate_vocabulary()\n if not self.fixed_vocabulary_:\n raise NotFittedError(\"Vocabulary not fitted or provided\")\n\n if len(self.vocabulary_) == 0:\n raise ValueError(\"Vocabulary is empty\")\n\n def _validate_params(self):\n \"\"\"Check validity of ngram_range parameter\"\"\"\n min_n, max_m = self.ngram_range\n if min_n > max_m:\n raise ValueError(\n \"Invalid value for ngram_range=%s \"\n \"lower boundary larger than the upper boundary.\"\n % str(self.ngram_range))\n\n def _warn_for_unused_params(self):\n\n if self.tokenizer is not None and self.token_pattern is not None:\n warnings.warn(\"The parameter 'token_pattern' will not be used\"\n \" since 'tokenizer' is not None'\")\n\n if self.preprocessor is not None and callable(self.analyzer):\n warnings.warn(\"The parameter 'preprocessor' will not be used\"\n \" since 'analyzer' is callable'\")\n\n if (self.ngram_range != (1, 1) and self.ngram_range is not None\n and callable(self.analyzer)):\n warnings.warn(\"The parameter 'ngram_range' will not be used\"\n \" since 'analyzer' is callable'\")\n if self.analyzer != 'word' or callable(self.analyzer):\n if self.stop_words is not None:\n warnings.warn(\"The parameter 'stop_words' will not be used\"\n \" since 'analyzer' != 'word'\")\n if self.token_pattern is not None and \\\n self.token_pattern != r\"(?u)\\b\\w\\w+\\b\":\n warnings.warn(\"The parameter 'token_pattern' will not be used\"\n \" since 'analyzer' != 'word'\")\n if self.tokenizer is not None:\n warnings.warn(\"The parameter 'tokenizer' will not be used\"\n \" since 'analyzer' != 'word'\")", + "instance_attributes": [ + { + "name": "fixed_vocabulary_", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "vocabulary_", + "types": { + "kind": "NamedType", + "name": "dict" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.feature_selection._base/SelectorMixin", + "name": "SelectorMixin", + "qname": "sklearn.feature_selection._base.SelectorMixin", + "decorators": [], + "superclasses": ["TransformerMixin"], + "methods": [ + "scikit-learn/sklearn.feature_selection._base/SelectorMixin/get_support", + "scikit-learn/sklearn.feature_selection._base/SelectorMixin/_get_support_mask", + "scikit-learn/sklearn.feature_selection._base/SelectorMixin/transform", + "scikit-learn/sklearn.feature_selection._base/SelectorMixin/inverse_transform" + ], + "is_public": false, + "reexported_by": [], + "description": "Transformer mixin that performs feature selection given a support mask\n\nThis mixin provides a feature selector implementation with `transform` and\n`inverse_transform` functionality given an implementation of\n`_get_support_mask`.", + "docstring": "Transformer mixin that performs feature selection given a support mask\n\nThis mixin provides a feature selector implementation with `transform` and\n`inverse_transform` functionality given an implementation of\n`_get_support_mask`.", + "code": "class SelectorMixin(TransformerMixin, metaclass=ABCMeta):\n \"\"\"\n Transformer mixin that performs feature selection given a support mask\n\n This mixin provides a feature selector implementation with `transform` and\n `inverse_transform` functionality given an implementation of\n `_get_support_mask`.\n \"\"\"\n\n def get_support(self, indices=False):\n \"\"\"\n Get a mask, or integer index, of the features selected\n\n Parameters\n ----------\n indices : bool, default=False\n If True, the return value will be an array of integers, rather\n than a boolean mask.\n\n Returns\n -------\n support : array\n An index that selects the retained features from a feature vector.\n If `indices` is False, this is a boolean array of shape\n [# input features], in which an element is True iff its\n corresponding feature is selected for retention. If `indices` is\n True, this is an integer array of shape [# output features] whose\n values are indices into the input feature vector.\n \"\"\"\n mask = self._get_support_mask()\n return mask if not indices else np.where(mask)[0]\n\n @abstractmethod\n def _get_support_mask(self):\n \"\"\"\n Get the boolean mask indicating which features are selected\n\n Returns\n -------\n support : boolean array of shape [# input features]\n An element is True iff its corresponding feature is selected for\n retention.\n \"\"\"\n\n def transform(self, X):\n \"\"\"Reduce X to the selected features.\n\n Parameters\n ----------\n X : array of shape [n_samples, n_features]\n The input samples.\n\n Returns\n -------\n X_r : array of shape [n_samples, n_selected_features]\n The input samples with only the selected features.\n \"\"\"\n # note: we use _safe_tags instead of _get_tags because this is a\n # public Mixin.\n X = check_array(\n X,\n dtype=None,\n accept_sparse=\"csr\",\n force_all_finite=not _safe_tags(self, key=\"allow_nan\"),\n )\n mask = self.get_support()\n if not mask.any():\n warn(\"No features were selected: either the data is\"\n \" too noisy or the selection test too strict.\",\n UserWarning)\n return np.empty(0).reshape((X.shape[0], 0))\n if len(mask) != X.shape[1]:\n raise ValueError(\"X has a different shape than during fitting.\")\n return X[:, safe_mask(X, mask)]\n\n def inverse_transform(self, X):\n \"\"\"\n Reverse the transformation operation\n\n Parameters\n ----------\n X : array of shape [n_samples, n_selected_features]\n The input samples.\n\n Returns\n -------\n X_r : array of shape [n_samples, n_original_features]\n `X` with columns of zeros inserted where features would have\n been removed by :meth:`transform`.\n \"\"\"\n if issparse(X):\n X = X.tocsc()\n # insert additional entries in indptr:\n # e.g. if transform changed indptr from [0 2 6 7] to [0 2 3]\n # col_nonzeros here will be [2 0 1] so indptr becomes [0 2 2 3]\n it = self.inverse_transform(np.diff(X.indptr).reshape(1, -1))\n col_nonzeros = it.ravel()\n indptr = np.concatenate([[0], np.cumsum(col_nonzeros)])\n Xt = csc_matrix((X.data, X.indices, indptr),\n shape=(X.shape[0], len(indptr) - 1), dtype=X.dtype)\n return Xt\n\n support = self.get_support()\n X = check_array(X, dtype=None)\n if support.sum() != X.shape[1]:\n raise ValueError(\"X has a different shape than during fitting.\")\n\n if X.ndim == 1:\n X = X[None, :]\n Xt = np.zeros((X.shape[0], support.size), dtype=X.dtype)\n Xt[:, support] = X\n return Xt", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel", + "name": "SelectFromModel", + "qname": "sklearn.feature_selection._from_model.SelectFromModel", + "decorators": [], + "superclasses": ["MetaEstimatorMixin", "SelectorMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/__init__", + "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/_get_support_mask", + "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/fit", + "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/threshold_@getter", + "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/partial_fit", + "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/n_features_in_@getter", + "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Meta-transformer for selecting features based on importance weights.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide `.", + "docstring": "Meta-transformer for selecting features based on importance weights.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : object\n The base estimator from which the transformer is built.\n This can be both a fitted (if ``prefit`` is set to True)\n or a non-fitted estimator. The estimator should have a\n ``feature_importances_`` or ``coef_`` attribute after fitting.\n Otherwise, the ``importance_getter`` parameter should be used.\n\nthreshold : string or float, default=None\n The threshold value to use for feature selection. Features whose\n importance is greater or equal are kept while the others are\n discarded. If \"median\" (resp. \"mean\"), then the ``threshold`` value is\n the median (resp. the mean) of the feature importances. A scaling\n factor (e.g., \"1.25*mean\") may also be used. If None and if the\n estimator has a parameter penalty set to l1, either explicitly\n or implicitly (e.g, Lasso), the threshold used is 1e-5.\n Otherwise, \"mean\" is used by default.\n\nprefit : bool, default=False\n Whether a prefit model is expected to be passed into the constructor\n directly or not. If True, ``transform`` must be called directly\n and SelectFromModel cannot be used with ``cross_val_score``,\n ``GridSearchCV`` and similar utilities that clone the estimator.\n Otherwise train the model using ``fit`` and then ``transform`` to do\n feature selection.\n\nnorm_order : non-zero int, inf, -inf, default=1\n Order of the norm used to filter the vectors of coefficients below\n ``threshold`` in the case where the ``coef_`` attribute of the\n estimator is of dimension 2.\n\nmax_features : int, default=None\n The maximum number of features to select.\n To only select based on ``max_features``, set ``threshold=-np.inf``.\n\n .. versionadded:: 0.20\n\nimportance_getter : str or callable, default='auto'\n If 'auto', uses the feature importance either through a ``coef_``\n attribute or ``feature_importances_`` attribute of estimator.\n\n Also accepts a string that specifies an attribute name/path\n for extracting feature importance (implemented with `attrgetter`).\n For example, give `regressor_.coef_` in case of\n :class:`~sklearn.compose.TransformedTargetRegressor` or\n `named_steps.clf.feature_importances_` in case of\n :class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\n If `callable`, overrides the default feature importance getter.\n The callable is passed with the fitted estimator and it should\n return importance for each feature.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nestimator_ : an estimator\n The base estimator from which the transformer is built.\n This is stored only when a non-fitted estimator is passed to the\n ``SelectFromModel``, i.e when prefit is False.\n\nthreshold_ : float\n The threshold value used for feature selection.\n\nNotes\n-----\nAllows NaN/Inf in the input if the underlying estimator does as well.\n\nExamples\n--------\n>>> from sklearn.feature_selection import SelectFromModel\n>>> from sklearn.linear_model import LogisticRegression\n>>> X = [[ 0.87, -1.34, 0.31 ],\n... [-2.79, -0.02, -0.85 ],\n... [-1.34, -0.48, -2.55 ],\n... [ 1.92, 1.48, 0.65 ]]\n>>> y = [0, 1, 0, 1]\n>>> selector = SelectFromModel(estimator=LogisticRegression()).fit(X, y)\n>>> selector.estimator_.coef_\narray([[-0.3252302 , 0.83462377, 0.49750423]])\n>>> selector.threshold_\n0.55245...\n>>> selector.get_support()\narray([False, True, False])\n>>> selector.transform(X)\narray([[-1.34],\n [-0.02],\n [-0.48],\n [ 1.48]])\n\nSee Also\n--------\nRFE : Recursive feature elimination based on importance weights.\nRFECV : Recursive feature elimination with built-in cross-validated\n selection of the best number of features.\nSequentialFeatureSelector : Sequential cross-validation based feature\n selection. Does not rely on importance weights.", + "code": "class SelectFromModel(MetaEstimatorMixin, SelectorMixin, BaseEstimator):\n \"\"\"Meta-transformer for selecting features based on importance weights.\n\n .. versionadded:: 0.17\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n estimator : object\n The base estimator from which the transformer is built.\n This can be both a fitted (if ``prefit`` is set to True)\n or a non-fitted estimator. The estimator should have a\n ``feature_importances_`` or ``coef_`` attribute after fitting.\n Otherwise, the ``importance_getter`` parameter should be used.\n\n threshold : string or float, default=None\n The threshold value to use for feature selection. Features whose\n importance is greater or equal are kept while the others are\n discarded. If \"median\" (resp. \"mean\"), then the ``threshold`` value is\n the median (resp. the mean) of the feature importances. A scaling\n factor (e.g., \"1.25*mean\") may also be used. If None and if the\n estimator has a parameter penalty set to l1, either explicitly\n or implicitly (e.g, Lasso), the threshold used is 1e-5.\n Otherwise, \"mean\" is used by default.\n\n prefit : bool, default=False\n Whether a prefit model is expected to be passed into the constructor\n directly or not. If True, ``transform`` must be called directly\n and SelectFromModel cannot be used with ``cross_val_score``,\n ``GridSearchCV`` and similar utilities that clone the estimator.\n Otherwise train the model using ``fit`` and then ``transform`` to do\n feature selection.\n\n norm_order : non-zero int, inf, -inf, default=1\n Order of the norm used to filter the vectors of coefficients below\n ``threshold`` in the case where the ``coef_`` attribute of the\n estimator is of dimension 2.\n\n max_features : int, default=None\n The maximum number of features to select.\n To only select based on ``max_features``, set ``threshold=-np.inf``.\n\n .. versionadded:: 0.20\n\n importance_getter : str or callable, default='auto'\n If 'auto', uses the feature importance either through a ``coef_``\n attribute or ``feature_importances_`` attribute of estimator.\n\n Also accepts a string that specifies an attribute name/path\n for extracting feature importance (implemented with `attrgetter`).\n For example, give `regressor_.coef_` in case of\n :class:`~sklearn.compose.TransformedTargetRegressor` or\n `named_steps.clf.feature_importances_` in case of\n :class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\n If `callable`, overrides the default feature importance getter.\n The callable is passed with the fitted estimator and it should\n return importance for each feature.\n\n .. versionadded:: 0.24\n\n Attributes\n ----------\n estimator_ : an estimator\n The base estimator from which the transformer is built.\n This is stored only when a non-fitted estimator is passed to the\n ``SelectFromModel``, i.e when prefit is False.\n\n threshold_ : float\n The threshold value used for feature selection.\n\n Notes\n -----\n Allows NaN/Inf in the input if the underlying estimator does as well.\n\n Examples\n --------\n >>> from sklearn.feature_selection import SelectFromModel\n >>> from sklearn.linear_model import LogisticRegression\n >>> X = [[ 0.87, -1.34, 0.31 ],\n ... [-2.79, -0.02, -0.85 ],\n ... [-1.34, -0.48, -2.55 ],\n ... [ 1.92, 1.48, 0.65 ]]\n >>> y = [0, 1, 0, 1]\n >>> selector = SelectFromModel(estimator=LogisticRegression()).fit(X, y)\n >>> selector.estimator_.coef_\n array([[-0.3252302 , 0.83462377, 0.49750423]])\n >>> selector.threshold_\n 0.55245...\n >>> selector.get_support()\n array([False, True, False])\n >>> selector.transform(X)\n array([[-1.34],\n [-0.02],\n [-0.48],\n [ 1.48]])\n\n See Also\n --------\n RFE : Recursive feature elimination based on importance weights.\n RFECV : Recursive feature elimination with built-in cross-validated\n selection of the best number of features.\n SequentialFeatureSelector : Sequential cross-validation based feature\n selection. Does not rely on importance weights.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, estimator, *, threshold=None, prefit=False,\n norm_order=1, max_features=None,\n importance_getter='auto'):\n self.estimator = estimator\n self.threshold = threshold\n self.prefit = prefit\n self.importance_getter = importance_getter\n self.norm_order = norm_order\n self.max_features = max_features\n\n def _get_support_mask(self):\n # SelectFromModel can directly call on transform.\n if self.prefit:\n estimator = self.estimator\n elif hasattr(self, 'estimator_'):\n estimator = self.estimator_\n else:\n raise ValueError('Either fit the model before transform or set'\n ' \"prefit=True\" while passing the fitted'\n ' estimator to the constructor.')\n scores = _get_feature_importances(\n estimator=estimator, getter=self.importance_getter,\n transform_func='norm', norm_order=self.norm_order)\n threshold = _calculate_threshold(estimator, scores, self.threshold)\n if self.max_features is not None:\n mask = np.zeros_like(scores, dtype=bool)\n candidate_indices = \\\n np.argsort(-scores, kind='mergesort')[:self.max_features]\n mask[candidate_indices] = True\n else:\n mask = np.ones_like(scores, dtype=bool)\n mask[scores < threshold] = False\n return mask\n\n def fit(self, X, y=None, **fit_params):\n \"\"\"Fit the SelectFromModel meta-transformer.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The training input samples.\n\n y : array-like of shape (n_samples,), default=None\n The target values (integers that correspond to classes in\n classification, real numbers in regression).\n\n **fit_params : Other estimator specific parameters\n\n Returns\n -------\n self : object\n \"\"\"\n if self.max_features is not None:\n if not isinstance(self.max_features, numbers.Integral):\n raise TypeError(\"'max_features' should be an integer between\"\n \" 0 and {} features. Got {!r} instead.\"\n .format(X.shape[1], self.max_features))\n elif self.max_features < 0 or self.max_features > X.shape[1]:\n raise ValueError(\"'max_features' should be 0 and {} features.\"\n \"Got {} instead.\"\n .format(X.shape[1], self.max_features))\n\n if self.prefit:\n raise NotFittedError(\n \"Since 'prefit=True', call transform directly\")\n self.estimator_ = clone(self.estimator)\n self.estimator_.fit(X, y, **fit_params)\n return self\n\n @property\n def threshold_(self):\n scores = _get_feature_importances(estimator=self.estimator_,\n getter=self.importance_getter,\n transform_func='norm',\n norm_order=self.norm_order)\n return _calculate_threshold(self.estimator, scores, self.threshold)\n\n @if_delegate_has_method('estimator')\n def partial_fit(self, X, y=None, **fit_params):\n \"\"\"Fit the SelectFromModel meta-transformer only once.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The training input samples.\n\n y : array-like of shape (n_samples,), default=None\n The target values (integers that correspond to classes in\n classification, real numbers in regression).\n\n **fit_params : Other estimator specific parameters\n\n Returns\n -------\n self : object\n \"\"\"\n if self.prefit:\n raise NotFittedError(\n \"Since 'prefit=True', call transform directly\")\n if not hasattr(self, \"estimator_\"):\n self.estimator_ = clone(self.estimator)\n self.estimator_.partial_fit(X, y, **fit_params)\n return self\n\n @property\n def n_features_in_(self):\n # For consistency with other estimators we raise a AttributeError so\n # that hasattr() fails if the estimator isn't fitted.\n try:\n check_is_fitted(self)\n except NotFittedError as nfe:\n raise AttributeError(\n \"{} object has no n_features_in_ attribute.\"\n .format(self.__class__.__name__)\n ) from nfe\n\n return self.estimator_.n_features_in_\n\n def _more_tags(self):\n return {\n 'allow_nan': _safe_tags(self.estimator, key=\"allow_nan\")\n }", + "instance_attributes": [ + { + "name": "prefit", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "importance_getter", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "norm_order", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE", + "name": "RFE", + "qname": "sklearn.feature_selection._rfe.RFE", + "decorators": [], + "superclasses": ["SelectorMixin", "MetaEstimatorMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.feature_selection._rfe/RFE/__init__", + "scikit-learn/sklearn.feature_selection._rfe/RFE/_estimator_type@getter", + "scikit-learn/sklearn.feature_selection._rfe/RFE/classes_@getter", + "scikit-learn/sklearn.feature_selection._rfe/RFE/fit", + "scikit-learn/sklearn.feature_selection._rfe/RFE/_fit", + "scikit-learn/sklearn.feature_selection._rfe/RFE/predict", + "scikit-learn/sklearn.feature_selection._rfe/RFE/score", + "scikit-learn/sklearn.feature_selection._rfe/RFE/_get_support_mask", + "scikit-learn/sklearn.feature_selection._rfe/RFE/decision_function", + "scikit-learn/sklearn.feature_selection._rfe/RFE/predict_proba", + "scikit-learn/sklearn.feature_selection._rfe/RFE/predict_log_proba", + "scikit-learn/sklearn.feature_selection._rfe/RFE/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Feature ranking with recursive feature elimination.\n\nGiven an external estimator that assigns weights to features (e.g., the\ncoefficients of a linear model), the goal of recursive feature elimination\n(RFE) is to select features by recursively considering smaller and smaller\nsets of features. First, the estimator is trained on the initial set of\nfeatures and the importance of each feature is obtained either through\nany specific attribute or callable.\nThen, the least important features are pruned from current set of features.\nThat procedure is recursively repeated on the pruned set until the desired\nnumber of features to select is eventually reached.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Feature ranking with recursive feature elimination.\n\nGiven an external estimator that assigns weights to features (e.g., the\ncoefficients of a linear model), the goal of recursive feature elimination\n(RFE) is to select features by recursively considering smaller and smaller\nsets of features. First, the estimator is trained on the initial set of\nfeatures and the importance of each feature is obtained either through\nany specific attribute or callable.\nThen, the least important features are pruned from current set of features.\nThat procedure is recursively repeated on the pruned set until the desired\nnumber of features to select is eventually reached.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : ``Estimator`` instance\n A supervised learning estimator with a ``fit`` method that provides\n information about feature importance\n (e.g. `coef_`, `feature_importances_`).\n\nn_features_to_select : int or float, default=None\n The number of features to select. If `None`, half of the features are\n selected. If integer, the parameter is the absolute number of features\n to select. If float between 0 and 1, it is the fraction of features to\n select.\n\n .. versionchanged:: 0.24\n Added float values for fractions.\n\nstep : int or float, default=1\n If greater than or equal to 1, then ``step`` corresponds to the\n (integer) number of features to remove at each iteration.\n If within (0.0, 1.0), then ``step`` corresponds to the percentage\n (rounded down) of features to remove at each iteration.\n\nverbose : int, default=0\n Controls verbosity of output.\n\nimportance_getter : str or callable, default='auto'\n If 'auto', uses the feature importance either through a `coef_`\n or `feature_importances_` attributes of estimator.\n\n Also accepts a string that specifies an attribute name/path\n for extracting feature importance (implemented with `attrgetter`).\n For example, give `regressor_.coef_` in case of\n :class:`~sklearn.compose.TransformedTargetRegressor` or\n `named_steps.clf.feature_importances_` in case of\n class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\n If `callable`, overrides the default feature importance getter.\n The callable is passed with the fitted estimator and it should\n return importance for each feature.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nestimator_ : ``Estimator`` instance\n The fitted estimator used to select features.\n\nn_features_ : int\n The number of selected features.\n\nranking_ : ndarray of shape (n_features,)\n The feature ranking, such that ``ranking_[i]`` corresponds to the\n ranking position of the i-th feature. Selected (i.e., estimated\n best) features are assigned rank 1.\n\nsupport_ : ndarray of shape (n_features,)\n The mask of selected features.\n\nExamples\n--------\nThe following example shows how to retrieve the 5 most informative\nfeatures in the Friedman #1 dataset.\n\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.feature_selection import RFE\n>>> from sklearn.svm import SVR\n>>> X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)\n>>> estimator = SVR(kernel=\"linear\")\n>>> selector = RFE(estimator, n_features_to_select=5, step=1)\n>>> selector = selector.fit(X, y)\n>>> selector.support_\narray([ True, True, True, True, True, False, False, False, False,\n False])\n>>> selector.ranking_\narray([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])\n\nNotes\n-----\nAllows NaN/Inf in the input if the underlying estimator does as well.\n\nSee Also\n--------\nRFECV : Recursive feature elimination with built-in cross-validated\n selection of the best number of features.\nSelectFromModel : Feature selection based on thresholds of importance\n weights.\nSequentialFeatureSelector : Sequential cross-validation based feature\n selection. Does not rely on importance weights.\n\nReferences\n----------\n\n.. [1] Guyon, I., Weston, J., Barnhill, S., & Vapnik, V., \"Gene selection\n for cancer classification using support vector machines\",\n Mach. Learn., 46(1-3), 389--422, 2002.", + "code": "class RFE(SelectorMixin, MetaEstimatorMixin, BaseEstimator):\n \"\"\"Feature ranking with recursive feature elimination.\n\n Given an external estimator that assigns weights to features (e.g., the\n coefficients of a linear model), the goal of recursive feature elimination\n (RFE) is to select features by recursively considering smaller and smaller\n sets of features. First, the estimator is trained on the initial set of\n features and the importance of each feature is obtained either through\n any specific attribute or callable.\n Then, the least important features are pruned from current set of features.\n That procedure is recursively repeated on the pruned set until the desired\n number of features to select is eventually reached.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n estimator : ``Estimator`` instance\n A supervised learning estimator with a ``fit`` method that provides\n information about feature importance\n (e.g. `coef_`, `feature_importances_`).\n\n n_features_to_select : int or float, default=None\n The number of features to select. If `None`, half of the features are\n selected. If integer, the parameter is the absolute number of features\n to select. If float between 0 and 1, it is the fraction of features to\n select.\n\n .. versionchanged:: 0.24\n Added float values for fractions.\n\n step : int or float, default=1\n If greater than or equal to 1, then ``step`` corresponds to the\n (integer) number of features to remove at each iteration.\n If within (0.0, 1.0), then ``step`` corresponds to the percentage\n (rounded down) of features to remove at each iteration.\n\n verbose : int, default=0\n Controls verbosity of output.\n\n importance_getter : str or callable, default='auto'\n If 'auto', uses the feature importance either through a `coef_`\n or `feature_importances_` attributes of estimator.\n\n Also accepts a string that specifies an attribute name/path\n for extracting feature importance (implemented with `attrgetter`).\n For example, give `regressor_.coef_` in case of\n :class:`~sklearn.compose.TransformedTargetRegressor` or\n `named_steps.clf.feature_importances_` in case of\n class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\n If `callable`, overrides the default feature importance getter.\n The callable is passed with the fitted estimator and it should\n return importance for each feature.\n\n .. versionadded:: 0.24\n\n Attributes\n ----------\n estimator_ : ``Estimator`` instance\n The fitted estimator used to select features.\n\n n_features_ : int\n The number of selected features.\n\n ranking_ : ndarray of shape (n_features,)\n The feature ranking, such that ``ranking_[i]`` corresponds to the\n ranking position of the i-th feature. Selected (i.e., estimated\n best) features are assigned rank 1.\n\n support_ : ndarray of shape (n_features,)\n The mask of selected features.\n\n Examples\n --------\n The following example shows how to retrieve the 5 most informative\n features in the Friedman #1 dataset.\n\n >>> from sklearn.datasets import make_friedman1\n >>> from sklearn.feature_selection import RFE\n >>> from sklearn.svm import SVR\n >>> X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)\n >>> estimator = SVR(kernel=\"linear\")\n >>> selector = RFE(estimator, n_features_to_select=5, step=1)\n >>> selector = selector.fit(X, y)\n >>> selector.support_\n array([ True, True, True, True, True, False, False, False, False,\n False])\n >>> selector.ranking_\n array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])\n\n Notes\n -----\n Allows NaN/Inf in the input if the underlying estimator does as well.\n\n See Also\n --------\n RFECV : Recursive feature elimination with built-in cross-validated\n selection of the best number of features.\n SelectFromModel : Feature selection based on thresholds of importance\n weights.\n SequentialFeatureSelector : Sequential cross-validation based feature\n selection. Does not rely on importance weights.\n\n References\n ----------\n\n .. [1] Guyon, I., Weston, J., Barnhill, S., & Vapnik, V., \"Gene selection\n for cancer classification using support vector machines\",\n Mach. Learn., 46(1-3), 389--422, 2002.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, estimator, *, n_features_to_select=None, step=1,\n verbose=0, importance_getter='auto'):\n self.estimator = estimator\n self.n_features_to_select = n_features_to_select\n self.step = step\n self.importance_getter = importance_getter\n self.verbose = verbose\n\n @property\n def _estimator_type(self):\n return self.estimator._estimator_type\n\n @property\n def classes_(self):\n return self.estimator_.classes_\n\n def fit(self, X, y):\n \"\"\"Fit the RFE model and then the underlying estimator on the selected\n features.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples.\n\n y : array-like of shape (n_samples,)\n The target values.\n \"\"\"\n return self._fit(X, y)\n\n def _fit(self, X, y, step_score=None):\n # Parameter step_score controls the calculation of self.scores_\n # step_score is not exposed to users\n # and is used when implementing RFECV\n # self.scores_ will not be calculated when calling _fit through fit\n\n tags = self._get_tags()\n X, y = self._validate_data(\n X, y, accept_sparse=\"csc\",\n ensure_min_features=2,\n force_all_finite=not tags.get(\"allow_nan\", True),\n multi_output=True\n )\n error_msg = (\"n_features_to_select must be either None, a \"\n \"positive integer representing the absolute \"\n \"number of features or a float in (0.0, 1.0] \"\n \"representing a percentage of features to \"\n f\"select. Got {self.n_features_to_select}\")\n\n # Initialization\n n_features = X.shape[1]\n if self.n_features_to_select is None:\n n_features_to_select = n_features // 2\n elif self.n_features_to_select < 0:\n raise ValueError(error_msg)\n elif isinstance(self.n_features_to_select, numbers.Integral): # int\n n_features_to_select = self.n_features_to_select\n elif self.n_features_to_select > 1.0: # float > 1\n raise ValueError(error_msg)\n else: # float\n n_features_to_select = int(n_features * self.n_features_to_select)\n\n if 0.0 < self.step < 1.0:\n step = int(max(1, self.step * n_features))\n else:\n step = int(self.step)\n if step <= 0:\n raise ValueError(\"Step must be >0\")\n\n support_ = np.ones(n_features, dtype=bool)\n ranking_ = np.ones(n_features, dtype=int)\n\n if step_score:\n self.scores_ = []\n\n # Elimination\n while np.sum(support_) > n_features_to_select:\n # Remaining features\n features = np.arange(n_features)[support_]\n\n # Rank the remaining features\n estimator = clone(self.estimator)\n if self.verbose > 0:\n print(\"Fitting estimator with %d features.\" % np.sum(support_))\n\n estimator.fit(X[:, features], y)\n\n # Get importance and rank them\n importances = _get_feature_importances(\n estimator, self.importance_getter, transform_func=\"square\",\n )\n ranks = np.argsort(importances)\n\n # for sparse case ranks is matrix\n ranks = np.ravel(ranks)\n\n # Eliminate the worse features\n threshold = min(step, np.sum(support_) - n_features_to_select)\n\n # Compute step score on the previous selection iteration\n # because 'estimator' must use features\n # that have not been eliminated yet\n if step_score:\n self.scores_.append(step_score(estimator, features))\n support_[features[ranks][:threshold]] = False\n ranking_[np.logical_not(support_)] += 1\n\n # Set final attributes\n features = np.arange(n_features)[support_]\n self.estimator_ = clone(self.estimator)\n self.estimator_.fit(X[:, features], y)\n\n # Compute step score when only n_features_to_select features left\n if step_score:\n self.scores_.append(step_score(self.estimator_, features))\n self.n_features_ = support_.sum()\n self.support_ = support_\n self.ranking_ = ranking_\n\n return self\n\n @if_delegate_has_method(delegate='estimator')\n def predict(self, X):\n \"\"\"Reduce X to the selected features and then predict using the\n underlying estimator.\n\n Parameters\n ----------\n X : array of shape [n_samples, n_features]\n The input samples.\n\n Returns\n -------\n y : array of shape [n_samples]\n The predicted target values.\n \"\"\"\n check_is_fitted(self)\n return self.estimator_.predict(self.transform(X))\n\n @if_delegate_has_method(delegate='estimator')\n def score(self, X, y):\n \"\"\"Reduce X to the selected features and then return the score of the\n underlying estimator.\n\n Parameters\n ----------\n X : array of shape [n_samples, n_features]\n The input samples.\n\n y : array of shape [n_samples]\n The target values.\n \"\"\"\n check_is_fitted(self)\n return self.estimator_.score(self.transform(X), y)\n\n def _get_support_mask(self):\n check_is_fitted(self)\n return self.support_\n\n @if_delegate_has_method(delegate='estimator')\n def decision_function(self, X):\n \"\"\"Compute the decision function of ``X``.\n\n Parameters\n ----------\n X : {array-like or sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n score : array, shape = [n_samples, n_classes] or [n_samples]\n The decision function of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n Regression and binary classification produce an array of shape\n [n_samples].\n \"\"\"\n check_is_fitted(self)\n return self.estimator_.decision_function(self.transform(X))\n\n @if_delegate_has_method(delegate='estimator')\n def predict_proba(self, X):\n \"\"\"Predict class probabilities for X.\n\n Parameters\n ----------\n X : {array-like or sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n p : array of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n \"\"\"\n check_is_fitted(self)\n return self.estimator_.predict_proba(self.transform(X))\n\n @if_delegate_has_method(delegate='estimator')\n def predict_log_proba(self, X):\n \"\"\"Predict class log-probabilities for X.\n\n Parameters\n ----------\n X : array of shape [n_samples, n_features]\n The input samples.\n\n Returns\n -------\n p : array of shape (n_samples, n_classes)\n The class log-probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n \"\"\"\n check_is_fitted(self)\n return self.estimator_.predict_log_proba(self.transform(X))\n\n def _more_tags(self):\n return {\n 'poor_score': True,\n 'allow_nan': _safe_tags(self.estimator, key='allow_nan'),\n 'requires_y': True,\n }", + "instance_attributes": [ + { + "name": "step", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "importance_getter", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "scores_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "n_features_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "support_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "ranking_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFECV", + "name": "RFECV", + "qname": "sklearn.feature_selection._rfe.RFECV", + "decorators": [], + "superclasses": ["RFE"], + "methods": [ + "scikit-learn/sklearn.feature_selection._rfe/RFECV/__init__", + "scikit-learn/sklearn.feature_selection._rfe/RFECV/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Feature ranking with recursive feature elimination and cross-validated\nselection of the best number of features.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Feature ranking with recursive feature elimination and cross-validated\nselection of the best number of features.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : ``Estimator`` instance\n A supervised learning estimator with a ``fit`` method that provides\n information about feature importance either through a ``coef_``\n attribute or through a ``feature_importances_`` attribute.\n\nstep : int or float, default=1\n If greater than or equal to 1, then ``step`` corresponds to the\n (integer) number of features to remove at each iteration.\n If within (0.0, 1.0), then ``step`` corresponds to the percentage\n (rounded down) of features to remove at each iteration.\n Note that the last iteration may remove fewer than ``step`` features in\n order to reach ``min_features_to_select``.\n\nmin_features_to_select : int, default=1\n The minimum number of features to be selected. This number of features\n will always be scored, even if the difference between the original\n feature count and ``min_features_to_select`` isn't divisible by\n ``step``.\n\n .. versionadded:: 0.20\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if ``y`` is binary or multiclass,\n :class:`~sklearn.model_selection.StratifiedKFold` is used. If the\n estimator is a classifier or if ``y`` is neither binary nor multiclass,\n :class:`~sklearn.model_selection.KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value of None changed from 3-fold to 5-fold.\n\nscoring : string, callable or None, default=None\n A string (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n\nverbose : int, default=0\n Controls verbosity of output.\n\nn_jobs : int or None, default=None\n Number of cores to run in parallel while fitting across folds.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.18\n\nimportance_getter : str or callable, default='auto'\n If 'auto', uses the feature importance either through a `coef_`\n or `feature_importances_` attributes of estimator.\n\n Also accepts a string that specifies an attribute name/path\n for extracting feature importance.\n For example, give `regressor_.coef_` in case of\n :class:`~sklearn.compose.TransformedTargetRegressor` or\n `named_steps.clf.feature_importances_` in case of\n :class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\n If `callable`, overrides the default feature importance getter.\n The callable is passed with the fitted estimator and it should\n return importance for each feature.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nestimator_ : ``Estimator`` instance\n The fitted estimator used to select features.\n\ngrid_scores_ : ndarray of shape (n_subsets_of_features,)\n The cross-validation scores such that\n ``grid_scores_[i]`` corresponds to\n the CV score of the i-th subset of features.\n\nn_features_ : int\n The number of selected features with cross-validation.\n\nranking_ : narray of shape (n_features,)\n The feature ranking, such that `ranking_[i]`\n corresponds to the ranking\n position of the i-th feature.\n Selected (i.e., estimated best)\n features are assigned rank 1.\n\nsupport_ : ndarray of shape (n_features,)\n The mask of selected features.\n\nNotes\n-----\nThe size of ``grid_scores_`` is equal to\n``ceil((n_features - min_features_to_select) / step) + 1``,\nwhere step is the number of features removed at each iteration.\n\nAllows NaN/Inf in the input if the underlying estimator does as well.\n\nExamples\n--------\nThe following example shows how to retrieve the a-priori not known 5\ninformative features in the Friedman #1 dataset.\n\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.feature_selection import RFECV\n>>> from sklearn.svm import SVR\n>>> X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)\n>>> estimator = SVR(kernel=\"linear\")\n>>> selector = RFECV(estimator, step=1, cv=5)\n>>> selector = selector.fit(X, y)\n>>> selector.support_\narray([ True, True, True, True, True, False, False, False, False,\n False])\n>>> selector.ranking_\narray([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])\n\nSee Also\n--------\nRFE : Recursive feature elimination.\n\nReferences\n----------\n\n.. [1] Guyon, I., Weston, J., Barnhill, S., & Vapnik, V., \"Gene selection\n for cancer classification using support vector machines\",\n Mach. Learn., 46(1-3), 389--422, 2002.", + "code": "class RFECV(RFE):\n \"\"\"Feature ranking with recursive feature elimination and cross-validated\n selection of the best number of features.\n\n See glossary entry for :term:`cross-validation estimator`.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n estimator : ``Estimator`` instance\n A supervised learning estimator with a ``fit`` method that provides\n information about feature importance either through a ``coef_``\n attribute or through a ``feature_importances_`` attribute.\n\n step : int or float, default=1\n If greater than or equal to 1, then ``step`` corresponds to the\n (integer) number of features to remove at each iteration.\n If within (0.0, 1.0), then ``step`` corresponds to the percentage\n (rounded down) of features to remove at each iteration.\n Note that the last iteration may remove fewer than ``step`` features in\n order to reach ``min_features_to_select``.\n\n min_features_to_select : int, default=1\n The minimum number of features to be selected. This number of features\n will always be scored, even if the difference between the original\n feature count and ``min_features_to_select`` isn't divisible by\n ``step``.\n\n .. versionadded:: 0.20\n\n cv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if ``y`` is binary or multiclass,\n :class:`~sklearn.model_selection.StratifiedKFold` is used. If the\n estimator is a classifier or if ``y`` is neither binary nor multiclass,\n :class:`~sklearn.model_selection.KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value of None changed from 3-fold to 5-fold.\n\n scoring : string, callable or None, default=None\n A string (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n\n verbose : int, default=0\n Controls verbosity of output.\n\n n_jobs : int or None, default=None\n Number of cores to run in parallel while fitting across folds.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.18\n\n importance_getter : str or callable, default='auto'\n If 'auto', uses the feature importance either through a `coef_`\n or `feature_importances_` attributes of estimator.\n\n Also accepts a string that specifies an attribute name/path\n for extracting feature importance.\n For example, give `regressor_.coef_` in case of\n :class:`~sklearn.compose.TransformedTargetRegressor` or\n `named_steps.clf.feature_importances_` in case of\n :class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\n If `callable`, overrides the default feature importance getter.\n The callable is passed with the fitted estimator and it should\n return importance for each feature.\n\n .. versionadded:: 0.24\n\n Attributes\n ----------\n estimator_ : ``Estimator`` instance\n The fitted estimator used to select features.\n\n grid_scores_ : ndarray of shape (n_subsets_of_features,)\n The cross-validation scores such that\n ``grid_scores_[i]`` corresponds to\n the CV score of the i-th subset of features.\n\n n_features_ : int\n The number of selected features with cross-validation.\n\n ranking_ : narray of shape (n_features,)\n The feature ranking, such that `ranking_[i]`\n corresponds to the ranking\n position of the i-th feature.\n Selected (i.e., estimated best)\n features are assigned rank 1.\n\n support_ : ndarray of shape (n_features,)\n The mask of selected features.\n\n Notes\n -----\n The size of ``grid_scores_`` is equal to\n ``ceil((n_features - min_features_to_select) / step) + 1``,\n where step is the number of features removed at each iteration.\n\n Allows NaN/Inf in the input if the underlying estimator does as well.\n\n Examples\n --------\n The following example shows how to retrieve the a-priori not known 5\n informative features in the Friedman #1 dataset.\n\n >>> from sklearn.datasets import make_friedman1\n >>> from sklearn.feature_selection import RFECV\n >>> from sklearn.svm import SVR\n >>> X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)\n >>> estimator = SVR(kernel=\"linear\")\n >>> selector = RFECV(estimator, step=1, cv=5)\n >>> selector = selector.fit(X, y)\n >>> selector.support_\n array([ True, True, True, True, True, False, False, False, False,\n False])\n >>> selector.ranking_\n array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])\n\n See Also\n --------\n RFE : Recursive feature elimination.\n\n References\n ----------\n\n .. [1] Guyon, I., Weston, J., Barnhill, S., & Vapnik, V., \"Gene selection\n for cancer classification using support vector machines\",\n Mach. Learn., 46(1-3), 389--422, 2002.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, estimator, *, step=1, min_features_to_select=1,\n cv=None, scoring=None, verbose=0, n_jobs=None,\n importance_getter='auto'):\n self.estimator = estimator\n self.step = step\n self.importance_getter = importance_getter\n self.cv = cv\n self.scoring = scoring\n self.verbose = verbose\n self.n_jobs = n_jobs\n self.min_features_to_select = min_features_to_select\n\n def fit(self, X, y, groups=None):\n \"\"\"Fit the RFE model and automatically tune the number of selected\n features.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where `n_samples` is the number of samples and\n `n_features` is the total number of features.\n\n y : array-like of shape (n_samples,)\n Target values (integers for classification, real numbers for\n regression).\n\n groups : array-like of shape (n_samples,) or None, default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n .. versionadded:: 0.20\n \"\"\"\n tags = self._get_tags()\n X, y = self._validate_data(\n X, y, accept_sparse=\"csr\", ensure_min_features=2,\n force_all_finite=not tags.get('allow_nan', True),\n multi_output=True\n )\n\n # Initialization\n cv = check_cv(self.cv, y, classifier=is_classifier(self.estimator))\n scorer = check_scoring(self.estimator, scoring=self.scoring)\n n_features = X.shape[1]\n\n if 0.0 < self.step < 1.0:\n step = int(max(1, self.step * n_features))\n else:\n step = int(self.step)\n if step <= 0:\n raise ValueError(\"Step must be >0\")\n\n # Build an RFE object, which will evaluate and score each possible\n # feature count, down to self.min_features_to_select\n rfe = RFE(estimator=self.estimator,\n n_features_to_select=self.min_features_to_select,\n importance_getter=self.importance_getter,\n step=self.step, verbose=self.verbose)\n\n # Determine the number of subsets of features by fitting across\n # the train folds and choosing the \"features_to_select\" parameter\n # that gives the least averaged error across all folds.\n\n # Note that joblib raises a non-picklable error for bound methods\n # even if n_jobs is set to 1 with the default multiprocessing\n # backend.\n # This branching is done so that to\n # make sure that user code that sets n_jobs to 1\n # and provides bound methods as scorers is not broken with the\n # addition of n_jobs parameter in version 0.18.\n\n if effective_n_jobs(self.n_jobs) == 1:\n parallel, func = list, _rfe_single_fit\n else:\n parallel = Parallel(n_jobs=self.n_jobs)\n func = delayed(_rfe_single_fit)\n\n scores = parallel(\n func(rfe, self.estimator, X, y, train, test, scorer)\n for train, test in cv.split(X, y, groups))\n\n scores = np.sum(scores, axis=0)\n scores_rev = scores[::-1]\n argmax_idx = len(scores) - np.argmax(scores_rev) - 1\n n_features_to_select = max(\n n_features - (argmax_idx * step),\n self.min_features_to_select)\n\n # Re-execute an elimination with best_k over the whole set\n rfe = RFE(estimator=self.estimator,\n n_features_to_select=n_features_to_select, step=self.step,\n importance_getter=self.importance_getter,\n verbose=self.verbose)\n\n rfe.fit(X, y)\n\n # Set final attributes\n self.support_ = rfe.support_\n self.n_features_ = rfe.n_features_\n self.ranking_ = rfe.ranking_\n self.estimator_ = clone(self.estimator)\n self.estimator_.fit(self.transform(X), y)\n\n # Fixing a normalization error, n is equal to get_n_splits(X, y) - 1\n # here, the scores are normalized by get_n_splits(X, y)\n self.grid_scores_ = scores[::-1] / cv.get_n_splits(X, y, groups)\n return self", + "instance_attributes": [ + { + "name": "step", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "importance_getter", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "min_features_to_select", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "support_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "n_features_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "ranking_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector", + "name": "SequentialFeatureSelector", + "qname": "sklearn.feature_selection._sequential.SequentialFeatureSelector", + "decorators": [], + "superclasses": ["SelectorMixin", "MetaEstimatorMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/__init__", + "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/fit", + "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/_get_best_new_feature", + "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/_get_support_mask", + "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Transformer that performs Sequential Feature Selection.\n\nThis Sequential Feature Selector adds (forward selection) or\nremoves (backward selection) features to form a feature subset in a\ngreedy fashion. At each stage, this estimator chooses the best feature to\nadd or remove based on the cross-validation score of an estimator.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.24", + "docstring": "Transformer that performs Sequential Feature Selection.\n\nThis Sequential Feature Selector adds (forward selection) or\nremoves (backward selection) features to form a feature subset in a\ngreedy fashion. At each stage, this estimator chooses the best feature to\nadd or remove based on the cross-validation score of an estimator.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nestimator : estimator instance\n An unfitted estimator.\n\nn_features_to_select : int or float, default=None\n The number of features to select. If `None`, half of the features are\n selected. If integer, the parameter is the absolute number of features\n to select. If float between 0 and 1, it is the fraction of features to\n select.\n\ndirection : {'forward', 'backward'}, default='forward'\n Whether to perform forward selection or backward selection.\n\nscoring : str, callable, list/tuple or dict, default=None\n A single str (see :ref:`scoring_parameter`) or a callable\n (see :ref:`scoring`) to evaluate the predictions on the test set.\n\n NOTE that when using custom scorers, each scorer should return a single\n value. Metric functions returning a list/array of values can be wrapped\n into multiple scorers that return one value each.\n\n If None, the estimator's score method is used.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - integer, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used. These splitters are instantiated\n with `shuffle=False` so the splits will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel. When evaluating a new feature to\n add or remove, the cross-validation procedure is parallel over the\n folds.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nn_features_to_select_ : int\n The number of features that were selected.\n\nsupport_ : ndarray of shape (n_features,), dtype=bool\n The mask of selected features.\n\nSee Also\n--------\nRFE : Recursive feature elimination based on importance weights.\nRFECV : Recursive feature elimination based on importance weights, with\n automatic selection of the number of features.\nSelectFromModel : Feature selection based on thresholds of importance\n weights.\n\nExamples\n--------\n>>> from sklearn.feature_selection import SequentialFeatureSelector\n>>> from sklearn.neighbors import KNeighborsClassifier\n>>> from sklearn.datasets import load_iris\n>>> X, y = load_iris(return_X_y=True)\n>>> knn = KNeighborsClassifier(n_neighbors=3)\n>>> sfs = SequentialFeatureSelector(knn, n_features_to_select=3)\n>>> sfs.fit(X, y)\nSequentialFeatureSelector(estimator=KNeighborsClassifier(n_neighbors=3),\n n_features_to_select=3)\n>>> sfs.get_support()\narray([ True, False, True, True])\n>>> sfs.transform(X).shape\n(150, 3)", + "code": "class SequentialFeatureSelector(SelectorMixin, MetaEstimatorMixin,\n BaseEstimator):\n \"\"\"Transformer that performs Sequential Feature Selection.\n\n This Sequential Feature Selector adds (forward selection) or\n removes (backward selection) features to form a feature subset in a\n greedy fashion. At each stage, this estimator chooses the best feature to\n add or remove based on the cross-validation score of an estimator.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.24\n\n Parameters\n ----------\n estimator : estimator instance\n An unfitted estimator.\n\n n_features_to_select : int or float, default=None\n The number of features to select. If `None`, half of the features are\n selected. If integer, the parameter is the absolute number of features\n to select. If float between 0 and 1, it is the fraction of features to\n select.\n\n direction : {'forward', 'backward'}, default='forward'\n Whether to perform forward selection or backward selection.\n\n scoring : str, callable, list/tuple or dict, default=None\n A single str (see :ref:`scoring_parameter`) or a callable\n (see :ref:`scoring`) to evaluate the predictions on the test set.\n\n NOTE that when using custom scorers, each scorer should return a single\n value. Metric functions returning a list/array of values can be wrapped\n into multiple scorers that return one value each.\n\n If None, the estimator's score method is used.\n\n cv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - integer, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used. These splitters are instantiated\n with `shuffle=False` so the splits will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n n_jobs : int, default=None\n Number of jobs to run in parallel. When evaluating a new feature to\n add or remove, the cross-validation procedure is parallel over the\n folds.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n Attributes\n ----------\n n_features_to_select_ : int\n The number of features that were selected.\n\n support_ : ndarray of shape (n_features,), dtype=bool\n The mask of selected features.\n\n See Also\n --------\n RFE : Recursive feature elimination based on importance weights.\n RFECV : Recursive feature elimination based on importance weights, with\n automatic selection of the number of features.\n SelectFromModel : Feature selection based on thresholds of importance\n weights.\n\n Examples\n --------\n >>> from sklearn.feature_selection import SequentialFeatureSelector\n >>> from sklearn.neighbors import KNeighborsClassifier\n >>> from sklearn.datasets import load_iris\n >>> X, y = load_iris(return_X_y=True)\n >>> knn = KNeighborsClassifier(n_neighbors=3)\n >>> sfs = SequentialFeatureSelector(knn, n_features_to_select=3)\n >>> sfs.fit(X, y)\n SequentialFeatureSelector(estimator=KNeighborsClassifier(n_neighbors=3),\n n_features_to_select=3)\n >>> sfs.get_support()\n array([ True, False, True, True])\n >>> sfs.transform(X).shape\n (150, 3)\n \"\"\"\n def __init__(self, estimator, *, n_features_to_select=None,\n direction='forward', scoring=None, cv=5, n_jobs=None):\n\n self.estimator = estimator\n self.n_features_to_select = n_features_to_select\n self.direction = direction\n self.scoring = scoring\n self.cv = cv\n self.n_jobs = n_jobs\n\n def fit(self, X, y):\n \"\"\"Learn the features to select.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vectors.\n y : array-like of shape (n_samples,)\n Target values.\n\n Returns\n -------\n self : object\n \"\"\"\n tags = self._get_tags()\n X, y = self._validate_data(\n X, y, accept_sparse=\"csc\",\n ensure_min_features=2,\n force_all_finite=not tags.get(\"allow_nan\", True),\n multi_output=True\n )\n n_features = X.shape[1]\n\n error_msg = (\"n_features_to_select must be either None, an \"\n \"integer in [1, n_features - 1] \"\n \"representing the absolute \"\n \"number of features, or a float in (0, 1] \"\n \"representing a percentage of features to \"\n f\"select. Got {self.n_features_to_select}\")\n if self.n_features_to_select is None:\n self.n_features_to_select_ = n_features // 2\n elif isinstance(self.n_features_to_select, numbers.Integral):\n if not 0 < self.n_features_to_select < n_features:\n raise ValueError(error_msg)\n self.n_features_to_select_ = self.n_features_to_select\n elif isinstance(self.n_features_to_select, numbers.Real):\n if not 0 < self.n_features_to_select <= 1:\n raise ValueError(error_msg)\n self.n_features_to_select_ = int(n_features *\n self.n_features_to_select)\n else:\n raise ValueError(error_msg)\n\n if self.direction not in ('forward', 'backward'):\n raise ValueError(\n \"direction must be either 'forward' or 'backward'. \"\n f\"Got {self.direction}.\"\n )\n\n cloned_estimator = clone(self.estimator)\n\n # the current mask corresponds to the set of features:\n # - that we have already *selected* if we do forward selection\n # - that we have already *excluded* if we do backward selection\n current_mask = np.zeros(shape=n_features, dtype=bool)\n n_iterations = (\n self.n_features_to_select_ if self.direction == 'forward'\n else n_features - self.n_features_to_select_\n )\n for _ in range(n_iterations):\n new_feature_idx = self._get_best_new_feature(cloned_estimator, X,\n y, current_mask)\n current_mask[new_feature_idx] = True\n\n if self.direction == 'backward':\n current_mask = ~current_mask\n self.support_ = current_mask\n\n return self\n\n def _get_best_new_feature(self, estimator, X, y, current_mask):\n # Return the best new feature to add to the current_mask, i.e. return\n # the best new feature to add (resp. remove) when doing forward\n # selection (resp. backward selection)\n candidate_feature_indices = np.flatnonzero(~current_mask)\n scores = {}\n for feature_idx in candidate_feature_indices:\n candidate_mask = current_mask.copy()\n candidate_mask[feature_idx] = True\n if self.direction == 'backward':\n candidate_mask = ~candidate_mask\n X_new = X[:, candidate_mask]\n scores[feature_idx] = cross_val_score(\n estimator, X_new, y, cv=self.cv, scoring=self.scoring,\n n_jobs=self.n_jobs).mean()\n return max(scores, key=lambda feature_idx: scores[feature_idx])\n\n def _get_support_mask(self):\n check_is_fitted(self)\n return self.support_\n\n def _more_tags(self):\n return {\n 'allow_nan': _safe_tags(self.estimator, key=\"allow_nan\"),\n 'requires_y': True,\n }", + "instance_attributes": [ + { + "name": "direction", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "cv", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "support_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/GenericUnivariateSelect", + "name": "GenericUnivariateSelect", + "qname": "sklearn.feature_selection._univariate_selection.GenericUnivariateSelect", + "decorators": [], + "superclasses": ["_BaseFilter"], + "methods": [ + "scikit-learn/sklearn.feature_selection._univariate_selection/GenericUnivariateSelect/__init__", + "scikit-learn/sklearn.feature_selection._univariate_selection/GenericUnivariateSelect/_make_selector", + "scikit-learn/sklearn.feature_selection._univariate_selection/GenericUnivariateSelect/_check_params", + "scikit-learn/sklearn.feature_selection._univariate_selection/GenericUnivariateSelect/_get_support_mask" + ], + "is_public": false, + "reexported_by": [], + "description": "Univariate feature selector with configurable strategy.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Univariate feature selector with configurable strategy.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues). For modes 'percentile' or 'kbest' it can return\n a single array scores.\n\nmode : {'percentile', 'k_best', 'fpr', 'fdr', 'fwe'}, default='percentile'\n Feature selection mode.\n\nparam : float or int depending on the feature selection mode, default=1e-5\n Parameter of the corresponding mode.\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n p-values of feature scores, None if `score_func` returned scores only.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.feature_selection import GenericUnivariateSelect, chi2\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> X.shape\n(569, 30)\n>>> transformer = GenericUnivariateSelect(chi2, mode='k_best', param=20)\n>>> X_new = transformer.fit_transform(X, y)\n>>> X_new.shape\n(569, 20)\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nmutual_info_classif : Mutual information for a discrete target.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nf_regression : F-value between label/feature for regression tasks.\nmutual_info_regression : Mutual information for a continuous target.\nSelectPercentile : Select features based on percentile of the highest\n scores.\nSelectKBest : Select features based on the k highest scores.\nSelectFpr : Select features based on a false positive rate test.\nSelectFdr : Select features based on an estimated false discovery rate.\nSelectFwe : Select features based on family-wise error rate.", + "code": "class GenericUnivariateSelect(_BaseFilter):\n \"\"\"Univariate feature selector with configurable strategy.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n score_func : callable, default=f_classif\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues). For modes 'percentile' or 'kbest' it can return\n a single array scores.\n\n mode : {'percentile', 'k_best', 'fpr', 'fdr', 'fwe'}, default='percentile'\n Feature selection mode.\n\n param : float or int depending on the feature selection mode, default=1e-5\n Parameter of the corresponding mode.\n\n Attributes\n ----------\n scores_ : array-like of shape (n_features,)\n Scores of features.\n\n pvalues_ : array-like of shape (n_features,)\n p-values of feature scores, None if `score_func` returned scores only.\n\n Examples\n --------\n >>> from sklearn.datasets import load_breast_cancer\n >>> from sklearn.feature_selection import GenericUnivariateSelect, chi2\n >>> X, y = load_breast_cancer(return_X_y=True)\n >>> X.shape\n (569, 30)\n >>> transformer = GenericUnivariateSelect(chi2, mode='k_best', param=20)\n >>> X_new = transformer.fit_transform(X, y)\n >>> X_new.shape\n (569, 20)\n\n See Also\n --------\n f_classif : ANOVA F-value between label/feature for classification tasks.\n mutual_info_classif : Mutual information for a discrete target.\n chi2 : Chi-squared stats of non-negative features for classification tasks.\n f_regression : F-value between label/feature for regression tasks.\n mutual_info_regression : Mutual information for a continuous target.\n SelectPercentile : Select features based on percentile of the highest\n scores.\n SelectKBest : Select features based on the k highest scores.\n SelectFpr : Select features based on a false positive rate test.\n SelectFdr : Select features based on an estimated false discovery rate.\n SelectFwe : Select features based on family-wise error rate.\n \"\"\"\n\n _selection_modes = {'percentile': SelectPercentile,\n 'k_best': SelectKBest,\n 'fpr': SelectFpr,\n 'fdr': SelectFdr,\n 'fwe': SelectFwe}\n\n @_deprecate_positional_args\n def __init__(self, score_func=f_classif, *, mode='percentile', param=1e-5):\n super().__init__(score_func=score_func)\n self.mode = mode\n self.param = param\n\n def _make_selector(self):\n selector = self._selection_modes[self.mode](score_func=self.score_func)\n\n # Now perform some acrobatics to set the right named parameter in\n # the selector\n possible_params = selector._get_param_names()\n possible_params.remove('score_func')\n selector.set_params(**{possible_params[0]: self.param})\n\n return selector\n\n def _check_params(self, X, y):\n if self.mode not in self._selection_modes:\n raise ValueError(\"The mode passed should be one of %s, %r,\"\n \" (type %s) was passed.\"\n % (self._selection_modes.keys(), self.mode,\n type(self.mode)))\n\n self._make_selector()._check_params(X, y)\n\n def _get_support_mask(self):\n check_is_fitted(self)\n\n selector = self._make_selector()\n selector.pvalues_ = self.pvalues_\n selector.scores_ = self.scores_\n return selector._get_support_mask()", + "instance_attributes": [ + { + "name": "mode", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "param", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFdr", + "name": "SelectFdr", + "qname": "sklearn.feature_selection._univariate_selection.SelectFdr", + "decorators": [], + "superclasses": ["_BaseFilter"], + "methods": [ + "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFdr/__init__", + "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFdr/_get_support_mask" + ], + "is_public": false, + "reexported_by": [], + "description": "Filter: Select the p-values for an estimated false discovery rate\n\nThis uses the Benjamini-Hochberg procedure. ``alpha`` is an upper bound\non the expected false discovery rate.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Filter: Select the p-values for an estimated false discovery rate\n\nThis uses the Benjamini-Hochberg procedure. ``alpha`` is an upper bound\non the expected false discovery rate.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues).\n Default is f_classif (see below \"See Also\"). The default function only\n works with classification tasks.\n\nalpha : float, default=5e-2\n The highest uncorrected p-value for features to keep.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.feature_selection import SelectFdr, chi2\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> X.shape\n(569, 30)\n>>> X_new = SelectFdr(chi2, alpha=0.01).fit_transform(X, y)\n>>> X_new.shape\n(569, 16)\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n p-values of feature scores.\n\nReferences\n----------\nhttps://en.wikipedia.org/wiki/False_discovery_rate\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nmutual_info_classif : Mutual information for a discrete target.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nf_regression : F-value between label/feature for regression tasks.\nmutual_info_regression : Mutual information for a contnuous target.\nSelectPercentile : Select features based on percentile of the highest\n scores.\nSelectKBest : Select features based on the k highest scores.\nSelectFpr : Select features based on a false positive rate test.\nSelectFwe : Select features based on family-wise error rate.\nGenericUnivariateSelect : Univariate feature selector with configurable\n mode.", + "code": "class SelectFdr(_BaseFilter):\n \"\"\"Filter: Select the p-values for an estimated false discovery rate\n\n This uses the Benjamini-Hochberg procedure. ``alpha`` is an upper bound\n on the expected false discovery rate.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n score_func : callable, default=f_classif\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues).\n Default is f_classif (see below \"See Also\"). The default function only\n works with classification tasks.\n\n alpha : float, default=5e-2\n The highest uncorrected p-value for features to keep.\n\n Examples\n --------\n >>> from sklearn.datasets import load_breast_cancer\n >>> from sklearn.feature_selection import SelectFdr, chi2\n >>> X, y = load_breast_cancer(return_X_y=True)\n >>> X.shape\n (569, 30)\n >>> X_new = SelectFdr(chi2, alpha=0.01).fit_transform(X, y)\n >>> X_new.shape\n (569, 16)\n\n Attributes\n ----------\n scores_ : array-like of shape (n_features,)\n Scores of features.\n\n pvalues_ : array-like of shape (n_features,)\n p-values of feature scores.\n\n References\n ----------\n https://en.wikipedia.org/wiki/False_discovery_rate\n\n See Also\n --------\n f_classif : ANOVA F-value between label/feature for classification tasks.\n mutual_info_classif : Mutual information for a discrete target.\n chi2 : Chi-squared stats of non-negative features for classification tasks.\n f_regression : F-value between label/feature for regression tasks.\n mutual_info_regression : Mutual information for a contnuous target.\n SelectPercentile : Select features based on percentile of the highest\n scores.\n SelectKBest : Select features based on the k highest scores.\n SelectFpr : Select features based on a false positive rate test.\n SelectFwe : Select features based on family-wise error rate.\n GenericUnivariateSelect : Univariate feature selector with configurable\n mode.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, score_func=f_classif, *, alpha=5e-2):\n super().__init__(score_func=score_func)\n self.alpha = alpha\n\n def _get_support_mask(self):\n check_is_fitted(self)\n\n n_features = len(self.pvalues_)\n sv = np.sort(self.pvalues_)\n selected = sv[sv <= float(self.alpha) / n_features *\n np.arange(1, n_features + 1)]\n if selected.size == 0:\n return np.zeros_like(self.pvalues_, dtype=bool)\n return self.pvalues_ <= selected.max()", + "instance_attributes": [ + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFpr", + "name": "SelectFpr", + "qname": "sklearn.feature_selection._univariate_selection.SelectFpr", + "decorators": [], + "superclasses": ["_BaseFilter"], + "methods": [ + "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFpr/__init__", + "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFpr/_get_support_mask" + ], + "is_public": false, + "reexported_by": [], + "description": "Filter: Select the pvalues below alpha based on a FPR test.\n\nFPR test stands for False Positive Rate test. It controls the total\namount of false detections.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Filter: Select the pvalues below alpha based on a FPR test.\n\nFPR test stands for False Positive Rate test. It controls the total\namount of false detections.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues).\n Default is f_classif (see below \"See Also\"). The default function only\n works with classification tasks.\n\nalpha : float, default=5e-2\n The highest p-value for features to be kept.\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n p-values of feature scores.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.feature_selection import SelectFpr, chi2\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> X.shape\n(569, 30)\n>>> X_new = SelectFpr(chi2, alpha=0.01).fit_transform(X, y)\n>>> X_new.shape\n(569, 16)\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nmutual_info_classif: Mutual information for a discrete target.\nf_regression : F-value between label/feature for regression tasks.\nmutual_info_regression : Mutual information for a continuous target.\nSelectPercentile : Select features based on percentile of the highest\n scores.\nSelectKBest : Select features based on the k highest scores.\nSelectFdr : Select features based on an estimated false discovery rate.\nSelectFwe : Select features based on family-wise error rate.\nGenericUnivariateSelect : Univariate feature selector with configurable\n mode.", + "code": "class SelectFpr(_BaseFilter):\n \"\"\"Filter: Select the pvalues below alpha based on a FPR test.\n\n FPR test stands for False Positive Rate test. It controls the total\n amount of false detections.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n score_func : callable, default=f_classif\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues).\n Default is f_classif (see below \"See Also\"). The default function only\n works with classification tasks.\n\n alpha : float, default=5e-2\n The highest p-value for features to be kept.\n\n Attributes\n ----------\n scores_ : array-like of shape (n_features,)\n Scores of features.\n\n pvalues_ : array-like of shape (n_features,)\n p-values of feature scores.\n\n Examples\n --------\n >>> from sklearn.datasets import load_breast_cancer\n >>> from sklearn.feature_selection import SelectFpr, chi2\n >>> X, y = load_breast_cancer(return_X_y=True)\n >>> X.shape\n (569, 30)\n >>> X_new = SelectFpr(chi2, alpha=0.01).fit_transform(X, y)\n >>> X_new.shape\n (569, 16)\n\n See Also\n --------\n f_classif : ANOVA F-value between label/feature for classification tasks.\n chi2 : Chi-squared stats of non-negative features for classification tasks.\n mutual_info_classif: Mutual information for a discrete target.\n f_regression : F-value between label/feature for regression tasks.\n mutual_info_regression : Mutual information for a continuous target.\n SelectPercentile : Select features based on percentile of the highest\n scores.\n SelectKBest : Select features based on the k highest scores.\n SelectFdr : Select features based on an estimated false discovery rate.\n SelectFwe : Select features based on family-wise error rate.\n GenericUnivariateSelect : Univariate feature selector with configurable\n mode.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, score_func=f_classif, *, alpha=5e-2):\n super().__init__(score_func=score_func)\n self.alpha = alpha\n\n def _get_support_mask(self):\n check_is_fitted(self)\n\n return self.pvalues_ < self.alpha", + "instance_attributes": [ + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFwe", + "name": "SelectFwe", + "qname": "sklearn.feature_selection._univariate_selection.SelectFwe", + "decorators": [], + "superclasses": ["_BaseFilter"], + "methods": [ + "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFwe/__init__", + "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFwe/_get_support_mask" + ], + "is_public": false, + "reexported_by": [], + "description": "Filter: Select the p-values corresponding to Family-wise error rate\n\nRead more in the :ref:`User Guide `.", + "docstring": "Filter: Select the p-values corresponding to Family-wise error rate\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues).\n Default is f_classif (see below \"See Also\"). The default function only\n works with classification tasks.\n\nalpha : float, default=5e-2\n The highest uncorrected p-value for features to keep.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.feature_selection import SelectFwe, chi2\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> X.shape\n(569, 30)\n>>> X_new = SelectFwe(chi2, alpha=0.01).fit_transform(X, y)\n>>> X_new.shape\n(569, 15)\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n p-values of feature scores.\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nf_regression : F-value between label/feature for regression tasks.\nSelectPercentile : Select features based on percentile of the highest\n scores.\nSelectKBest : Select features based on the k highest scores.\nSelectFpr : Select features based on a false positive rate test.\nSelectFdr : Select features based on an estimated false discovery rate.\nGenericUnivariateSelect : Univariate feature selector with configurable\n mode.", + "code": "class SelectFwe(_BaseFilter):\n \"\"\"Filter: Select the p-values corresponding to Family-wise error rate\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n score_func : callable, default=f_classif\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues).\n Default is f_classif (see below \"See Also\"). The default function only\n works with classification tasks.\n\n alpha : float, default=5e-2\n The highest uncorrected p-value for features to keep.\n\n Examples\n --------\n >>> from sklearn.datasets import load_breast_cancer\n >>> from sklearn.feature_selection import SelectFwe, chi2\n >>> X, y = load_breast_cancer(return_X_y=True)\n >>> X.shape\n (569, 30)\n >>> X_new = SelectFwe(chi2, alpha=0.01).fit_transform(X, y)\n >>> X_new.shape\n (569, 15)\n\n Attributes\n ----------\n scores_ : array-like of shape (n_features,)\n Scores of features.\n\n pvalues_ : array-like of shape (n_features,)\n p-values of feature scores.\n\n See Also\n --------\n f_classif : ANOVA F-value between label/feature for classification tasks.\n chi2 : Chi-squared stats of non-negative features for classification tasks.\n f_regression : F-value between label/feature for regression tasks.\n SelectPercentile : Select features based on percentile of the highest\n scores.\n SelectKBest : Select features based on the k highest scores.\n SelectFpr : Select features based on a false positive rate test.\n SelectFdr : Select features based on an estimated false discovery rate.\n GenericUnivariateSelect : Univariate feature selector with configurable\n mode.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, score_func=f_classif, *, alpha=5e-2):\n super().__init__(score_func=score_func)\n self.alpha = alpha\n\n def _get_support_mask(self):\n check_is_fitted(self)\n\n return (self.pvalues_ < self.alpha / len(self.pvalues_))", + "instance_attributes": [ + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectKBest", + "name": "SelectKBest", + "qname": "sklearn.feature_selection._univariate_selection.SelectKBest", + "decorators": [], + "superclasses": ["_BaseFilter"], + "methods": [ + "scikit-learn/sklearn.feature_selection._univariate_selection/SelectKBest/__init__", + "scikit-learn/sklearn.feature_selection._univariate_selection/SelectKBest/_check_params", + "scikit-learn/sklearn.feature_selection._univariate_selection/SelectKBest/_get_support_mask" + ], + "is_public": false, + "reexported_by": [], + "description": "Select features according to the k highest scores.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Select features according to the k highest scores.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues) or a single array with scores.\n Default is f_classif (see below \"See Also\"). The default function only\n works with classification tasks.\n\n .. versionadded:: 0.18\n\nk : int or \"all\", default=10\n Number of top features to select.\n The \"all\" option bypasses selection, for use in a parameter search.\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n p-values of feature scores, None if `score_func` returned only scores.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.feature_selection import SelectKBest, chi2\n>>> X, y = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> X_new = SelectKBest(chi2, k=20).fit_transform(X, y)\n>>> X_new.shape\n(1797, 20)\n\nNotes\n-----\nTies between features with equal scores will be broken in an unspecified\nway.\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nmutual_info_classif : Mutual information for a discrete target.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nf_regression : F-value between label/feature for regression tasks.\nmutual_info_regression : Mutual information for a continuous target.\nSelectPercentile : Select features based on percentile of the highest\n scores.\nSelectFpr : Select features based on a false positive rate test.\nSelectFdr : Select features based on an estimated false discovery rate.\nSelectFwe : Select features based on family-wise error rate.\nGenericUnivariateSelect : Univariate feature selector with configurable\n mode.", + "code": "class SelectKBest(_BaseFilter):\n \"\"\"Select features according to the k highest scores.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n score_func : callable, default=f_classif\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues) or a single array with scores.\n Default is f_classif (see below \"See Also\"). The default function only\n works with classification tasks.\n\n .. versionadded:: 0.18\n\n k : int or \"all\", default=10\n Number of top features to select.\n The \"all\" option bypasses selection, for use in a parameter search.\n\n Attributes\n ----------\n scores_ : array-like of shape (n_features,)\n Scores of features.\n\n pvalues_ : array-like of shape (n_features,)\n p-values of feature scores, None if `score_func` returned only scores.\n\n Examples\n --------\n >>> from sklearn.datasets import load_digits\n >>> from sklearn.feature_selection import SelectKBest, chi2\n >>> X, y = load_digits(return_X_y=True)\n >>> X.shape\n (1797, 64)\n >>> X_new = SelectKBest(chi2, k=20).fit_transform(X, y)\n >>> X_new.shape\n (1797, 20)\n\n Notes\n -----\n Ties between features with equal scores will be broken in an unspecified\n way.\n\n See Also\n --------\n f_classif : ANOVA F-value between label/feature for classification tasks.\n mutual_info_classif : Mutual information for a discrete target.\n chi2 : Chi-squared stats of non-negative features for classification tasks.\n f_regression : F-value between label/feature for regression tasks.\n mutual_info_regression : Mutual information for a continuous target.\n SelectPercentile : Select features based on percentile of the highest\n scores.\n SelectFpr : Select features based on a false positive rate test.\n SelectFdr : Select features based on an estimated false discovery rate.\n SelectFwe : Select features based on family-wise error rate.\n GenericUnivariateSelect : Univariate feature selector with configurable\n mode.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, score_func=f_classif, *, k=10):\n super().__init__(score_func=score_func)\n self.k = k\n\n def _check_params(self, X, y):\n if not (self.k == \"all\" or 0 <= self.k <= X.shape[1]):\n raise ValueError(\"k should be >=0, <= n_features = %d; got %r. \"\n \"Use k='all' to return all features.\"\n % (X.shape[1], self.k))\n\n def _get_support_mask(self):\n check_is_fitted(self)\n\n if self.k == 'all':\n return np.ones(self.scores_.shape, dtype=bool)\n elif self.k == 0:\n return np.zeros(self.scores_.shape, dtype=bool)\n else:\n scores = _clean_nans(self.scores_)\n mask = np.zeros(scores.shape, dtype=bool)\n\n # Request a stable sort. Mergesort takes more memory (~40MB per\n # megafeature on x86-64).\n mask[np.argsort(scores, kind=\"mergesort\")[-self.k:]] = 1\n return mask", + "instance_attributes": [ + { + "name": "k", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectPercentile", + "name": "SelectPercentile", + "qname": "sklearn.feature_selection._univariate_selection.SelectPercentile", + "decorators": [], + "superclasses": ["_BaseFilter"], + "methods": [ + "scikit-learn/sklearn.feature_selection._univariate_selection/SelectPercentile/__init__", + "scikit-learn/sklearn.feature_selection._univariate_selection/SelectPercentile/_check_params", + "scikit-learn/sklearn.feature_selection._univariate_selection/SelectPercentile/_get_support_mask" + ], + "is_public": false, + "reexported_by": [], + "description": "Select features according to a percentile of the highest scores.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Select features according to a percentile of the highest scores.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nscore_func : callable, default=f_classif\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues) or a single array with scores.\n Default is f_classif (see below \"See Also\"). The default function only\n works with classification tasks.\n\n .. versionadded:: 0.18\n\npercentile : int, default=10\n Percent of features to keep.\n\nAttributes\n----------\nscores_ : array-like of shape (n_features,)\n Scores of features.\n\npvalues_ : array-like of shape (n_features,)\n p-values of feature scores, None if `score_func` returned only scores.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.feature_selection import SelectPercentile, chi2\n>>> X, y = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> X_new = SelectPercentile(chi2, percentile=10).fit_transform(X, y)\n>>> X_new.shape\n(1797, 7)\n\nNotes\n-----\nTies between features with equal scores will be broken in an unspecified\nway.\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nmutual_info_classif : Mutual information for a discrete target.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nf_regression : F-value between label/feature for regression tasks.\nmutual_info_regression : Mutual information for a continuous target.\nSelectKBest : Select features based on the k highest scores.\nSelectFpr : Select features based on a false positive rate test.\nSelectFdr : Select features based on an estimated false discovery rate.\nSelectFwe : Select features based on family-wise error rate.\nGenericUnivariateSelect : Univariate feature selector with configurable\n mode.", + "code": "class SelectPercentile(_BaseFilter):\n \"\"\"Select features according to a percentile of the highest scores.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n score_func : callable, default=f_classif\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues) or a single array with scores.\n Default is f_classif (see below \"See Also\"). The default function only\n works with classification tasks.\n\n .. versionadded:: 0.18\n\n percentile : int, default=10\n Percent of features to keep.\n\n Attributes\n ----------\n scores_ : array-like of shape (n_features,)\n Scores of features.\n\n pvalues_ : array-like of shape (n_features,)\n p-values of feature scores, None if `score_func` returned only scores.\n\n Examples\n --------\n >>> from sklearn.datasets import load_digits\n >>> from sklearn.feature_selection import SelectPercentile, chi2\n >>> X, y = load_digits(return_X_y=True)\n >>> X.shape\n (1797, 64)\n >>> X_new = SelectPercentile(chi2, percentile=10).fit_transform(X, y)\n >>> X_new.shape\n (1797, 7)\n\n Notes\n -----\n Ties between features with equal scores will be broken in an unspecified\n way.\n\n See Also\n --------\n f_classif : ANOVA F-value between label/feature for classification tasks.\n mutual_info_classif : Mutual information for a discrete target.\n chi2 : Chi-squared stats of non-negative features for classification tasks.\n f_regression : F-value between label/feature for regression tasks.\n mutual_info_regression : Mutual information for a continuous target.\n SelectKBest : Select features based on the k highest scores.\n SelectFpr : Select features based on a false positive rate test.\n SelectFdr : Select features based on an estimated false discovery rate.\n SelectFwe : Select features based on family-wise error rate.\n GenericUnivariateSelect : Univariate feature selector with configurable\n mode.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, score_func=f_classif, *, percentile=10):\n super().__init__(score_func=score_func)\n self.percentile = percentile\n\n def _check_params(self, X, y):\n if not 0 <= self.percentile <= 100:\n raise ValueError(\"percentile should be >=0, <=100; got %r\"\n % self.percentile)\n\n def _get_support_mask(self):\n check_is_fitted(self)\n\n # Cater for NaNs\n if self.percentile == 100:\n return np.ones(len(self.scores_), dtype=bool)\n elif self.percentile == 0:\n return np.zeros(len(self.scores_), dtype=bool)\n\n scores = _clean_nans(self.scores_)\n threshold = np.percentile(scores, 100 - self.percentile)\n mask = scores > threshold\n ties = np.where(scores == threshold)[0]\n if len(ties):\n max_feats = int(len(scores) * self.percentile / 100)\n kept_ties = ties[:max_feats - mask.sum()]\n mask[kept_ties] = True\n return mask", + "instance_attributes": [ + { + "name": "percentile", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/_BaseFilter", + "name": "_BaseFilter", + "qname": "sklearn.feature_selection._univariate_selection._BaseFilter", + "decorators": [], + "superclasses": ["SelectorMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.feature_selection._univariate_selection/_BaseFilter/__init__", + "scikit-learn/sklearn.feature_selection._univariate_selection/_BaseFilter/fit", + "scikit-learn/sklearn.feature_selection._univariate_selection/_BaseFilter/_check_params", + "scikit-learn/sklearn.feature_selection._univariate_selection/_BaseFilter/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Initialize the univariate feature selection.", + "docstring": "Initialize the univariate feature selection.\n\nParameters\n----------\nscore_func : callable\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues) or a single array with scores.", + "code": "class _BaseFilter(SelectorMixin, BaseEstimator):\n \"\"\"Initialize the univariate feature selection.\n\n Parameters\n ----------\n score_func : callable\n Function taking two arrays X and y, and returning a pair of arrays\n (scores, pvalues) or a single array with scores.\n \"\"\"\n\n def __init__(self, score_func):\n self.score_func = score_func\n\n def fit(self, X, y):\n \"\"\"Run score function on (X, y) and get the appropriate features.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The training input samples.\n\n y : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\n Returns\n -------\n self : object\n \"\"\"\n X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc'],\n multi_output=True)\n\n if not callable(self.score_func):\n raise TypeError(\"The score function should be a callable, %s (%s) \"\n \"was passed.\"\n % (self.score_func, type(self.score_func)))\n\n self._check_params(X, y)\n score_func_ret = self.score_func(X, y)\n if isinstance(score_func_ret, (list, tuple)):\n self.scores_, self.pvalues_ = score_func_ret\n self.pvalues_ = np.asarray(self.pvalues_)\n else:\n self.scores_ = score_func_ret\n self.pvalues_ = None\n\n self.scores_ = np.asarray(self.scores_)\n\n return self\n\n def _check_params(self, X, y):\n pass\n\n def _more_tags(self):\n return {'requires_y': True}", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.feature_selection._variance_threshold/VarianceThreshold", + "name": "VarianceThreshold", + "qname": "sklearn.feature_selection._variance_threshold.VarianceThreshold", + "decorators": [], + "superclasses": ["SelectorMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.feature_selection._variance_threshold/VarianceThreshold/__init__", + "scikit-learn/sklearn.feature_selection._variance_threshold/VarianceThreshold/fit", + "scikit-learn/sklearn.feature_selection._variance_threshold/VarianceThreshold/_get_support_mask", + "scikit-learn/sklearn.feature_selection._variance_threshold/VarianceThreshold/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Feature selector that removes all low-variance features.\n\nThis feature selection algorithm looks only at the features (X), not the\ndesired outputs (y), and can thus be used for unsupervised learning.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Feature selector that removes all low-variance features.\n\nThis feature selection algorithm looks only at the features (X), not the\ndesired outputs (y), and can thus be used for unsupervised learning.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nthreshold : float, default=0\n Features with a training-set variance lower than this threshold will\n be removed. The default is to keep all features with non-zero variance,\n i.e. remove the features that have the same value in all samples.\n\nAttributes\n----------\nvariances_ : array, shape (n_features,)\n Variances of individual features.\n\nNotes\n-----\nAllows NaN in the input.\nRaises ValueError if no feature in X meets the variance threshold.\n\nExamples\n--------\nThe following dataset has integer features, two of which are the same\nin every sample. These are removed with the default setting for threshold::\n\n >>> X = [[0, 2, 0, 3], [0, 1, 4, 3], [0, 1, 1, 3]]\n >>> selector = VarianceThreshold()\n >>> selector.fit_transform(X)\n array([[2, 0],\n [1, 4],\n [1, 1]])", + "code": "class VarianceThreshold(SelectorMixin, BaseEstimator):\n \"\"\"Feature selector that removes all low-variance features.\n\n This feature selection algorithm looks only at the features (X), not the\n desired outputs (y), and can thus be used for unsupervised learning.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n threshold : float, default=0\n Features with a training-set variance lower than this threshold will\n be removed. The default is to keep all features with non-zero variance,\n i.e. remove the features that have the same value in all samples.\n\n Attributes\n ----------\n variances_ : array, shape (n_features,)\n Variances of individual features.\n\n Notes\n -----\n Allows NaN in the input.\n Raises ValueError if no feature in X meets the variance threshold.\n\n Examples\n --------\n The following dataset has integer features, two of which are the same\n in every sample. These are removed with the default setting for threshold::\n\n >>> X = [[0, 2, 0, 3], [0, 1, 4, 3], [0, 1, 1, 3]]\n >>> selector = VarianceThreshold()\n >>> selector.fit_transform(X)\n array([[2, 0],\n [1, 4],\n [1, 1]])\n \"\"\"\n\n def __init__(self, threshold=0.):\n self.threshold = threshold\n\n def fit(self, X, y=None):\n \"\"\"Learn empirical variances from X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n Sample vectors from which to compute variances.\n\n y : any, default=None\n Ignored. This parameter exists only for compatibility with\n sklearn.pipeline.Pipeline.\n\n Returns\n -------\n self\n \"\"\"\n X = self._validate_data(X, accept_sparse=('csr', 'csc'),\n dtype=np.float64,\n force_all_finite='allow-nan')\n\n if hasattr(X, \"toarray\"): # sparse matrix\n _, self.variances_ = mean_variance_axis(X, axis=0)\n if self.threshold == 0:\n mins, maxes = min_max_axis(X, axis=0)\n peak_to_peaks = maxes - mins\n else:\n self.variances_ = np.nanvar(X, axis=0)\n if self.threshold == 0:\n peak_to_peaks = np.ptp(X, axis=0)\n\n if self.threshold == 0:\n # Use peak-to-peak to avoid numeric precision issues\n # for constant features\n compare_arr = np.array([self.variances_, peak_to_peaks])\n self.variances_ = np.nanmin(compare_arr, axis=0)\n\n if np.all(~np.isfinite(self.variances_) |\n (self.variances_ <= self.threshold)):\n msg = \"No feature in X meets the variance threshold {0:.5f}\"\n if X.shape[0] == 1:\n msg += \" (X contains only one sample)\"\n raise ValueError(msg.format(self.threshold))\n\n return self\n\n def _get_support_mask(self):\n check_is_fitted(self)\n\n return self.variances_ > self.threshold\n\n def _more_tags(self):\n return {'allow_nan': True}", + "instance_attributes": [ + { + "name": "threshold", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier", + "name": "GaussianProcessClassifier", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier", + "decorators": [], + "superclasses": ["ClassifierMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/__init__", + "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/fit", + "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/predict", + "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/predict_proba", + "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/kernel_@getter", + "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/log_marginal_likelihood" + ], + "is_public": false, + "reexported_by": [], + "description": "Gaussian process classification (GPC) based on Laplace approximation.\n\nThe implementation is based on Algorithm 3.1, 3.2, and 5.1 of\nGaussian Processes for Machine Learning (GPML) by Rasmussen and\nWilliams.\n\nInternally, the Laplace approximation is used for approximating the\nnon-Gaussian posterior by a Gaussian.\n\nCurrently, the implementation is restricted to using the logistic link\nfunction. For multi-class classification, several binary one-versus rest\nclassifiers are fitted. Note that this class thus does not implement\na true multi-class Laplace approximation.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Gaussian process classification (GPC) based on Laplace approximation.\n\nThe implementation is based on Algorithm 3.1, 3.2, and 5.1 of\nGaussian Processes for Machine Learning (GPML) by Rasmussen and\nWilliams.\n\nInternally, the Laplace approximation is used for approximating the\nnon-Gaussian posterior by a Gaussian.\n\nCurrently, the implementation is restricted to using the logistic link\nfunction. For multi-class classification, several binary one-versus rest\nclassifiers are fitted. Note that this class thus does not implement\na true multi-class Laplace approximation.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nkernel : kernel instance, default=None\n The kernel specifying the covariance function of the GP. If None is\n passed, the kernel \"1.0 * RBF(1.0)\" is used as default. Note that\n the kernel's hyperparameters are optimized during fitting.\n\noptimizer : 'fmin_l_bfgs_b' or callable, default='fmin_l_bfgs_b'\n Can either be one of the internally supported optimizers for optimizing\n the kernel's parameters, specified by a string, or an externally\n defined optimizer passed as a callable. If a callable is passed, it\n must have the signature::\n\n def optimizer(obj_func, initial_theta, bounds):\n # * 'obj_func' is the objective function to be maximized, which\n # takes the hyperparameters theta as parameter and an\n # optional flag eval_gradient, which determines if the\n # gradient is returned additionally to the function value\n # * 'initial_theta': the initial value for theta, which can be\n # used by local optimizers\n # * 'bounds': the bounds on the values of theta\n ....\n # Returned are the best found hyperparameters theta and\n # the corresponding value of the target function.\n return theta_opt, func_min\n\n Per default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize\n is used. If None is passed, the kernel's parameters are kept fixed.\n Available internal optimizers are::\n\n 'fmin_l_bfgs_b'\n\nn_restarts_optimizer : int, default=0\n The number of restarts of the optimizer for finding the kernel's\n parameters which maximize the log-marginal likelihood. The first run\n of the optimizer is performed from the kernel's initial parameters,\n the remaining ones (if any) from thetas sampled log-uniform randomly\n from the space of allowed theta-values. If greater than 0, all bounds\n must be finite. Note that n_restarts_optimizer=0 implies that one\n run is performed.\n\nmax_iter_predict : int, default=100\n The maximum number of iterations in Newton's method for approximating\n the posterior during predict. Smaller values will reduce computation\n time at the cost of worse results.\n\nwarm_start : bool, default=False\n If warm-starts are enabled, the solution of the last Newton iteration\n on the Laplace approximation of the posterior mode is used as\n initialization for the next call of _posterior_mode(). This can speed\n up convergence when _posterior_mode is called several times on similar\n problems as in hyperparameter optimization. See :term:`the Glossary\n `.\n\ncopy_X_train : bool, default=True\n If True, a persistent copy of the training data is stored in the\n object. Otherwise, just a reference to the training data is stored,\n which might cause predictions to change if the data is modified\n externally.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation used to initialize the centers.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nmulti_class : {'one_vs_rest', 'one_vs_one'}, default='one_vs_rest'\n Specifies how multi-class classification problems are handled.\n Supported are 'one_vs_rest' and 'one_vs_one'. In 'one_vs_rest',\n one binary Gaussian process classifier is fitted for each class, which\n is trained to separate this class from the rest. In 'one_vs_one', one\n binary Gaussian process classifier is fitted for each pair of classes,\n which is trained to separate these two classes. The predictions of\n these binary predictors are combined into multi-class predictions.\n Note that 'one_vs_one' does not support predicting probability\n estimates.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation: the specified\n multiclass problems are computed in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nbase_estimator_ : ``Estimator`` instance\n The estimator instance that defines the likelihood function\n using the observed data.\n\nkernel_ : kernel instance\n The kernel used for prediction. In case of binary classification,\n the structure of the kernel is the same as the one passed as parameter\n but with optimized hyperparameters. In case of multi-class\n classification, a CompoundKernel is returned which consists of the\n different kernels used in the one-versus-rest classifiers.\n\nlog_marginal_likelihood_value_ : float\n The log-marginal-likelihood of ``self.kernel_.theta``\n\nclasses_ : array-like of shape (n_classes,)\n Unique class labels.\n\nn_classes_ : int\n The number of classes in the training data\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.gaussian_process import GaussianProcessClassifier\n>>> from sklearn.gaussian_process.kernels import RBF\n>>> X, y = load_iris(return_X_y=True)\n>>> kernel = 1.0 * RBF(1.0)\n>>> gpc = GaussianProcessClassifier(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpc.score(X, y)\n0.9866...\n>>> gpc.predict_proba(X[:2,:])\narray([[0.83548752, 0.03228706, 0.13222543],\n [0.79064206, 0.06525643, 0.14410151]])\n\n.. versionadded:: 0.18", + "code": "class GaussianProcessClassifier(ClassifierMixin, BaseEstimator):\n \"\"\"Gaussian process classification (GPC) based on Laplace approximation.\n\n The implementation is based on Algorithm 3.1, 3.2, and 5.1 of\n Gaussian Processes for Machine Learning (GPML) by Rasmussen and\n Williams.\n\n Internally, the Laplace approximation is used for approximating the\n non-Gaussian posterior by a Gaussian.\n\n Currently, the implementation is restricted to using the logistic link\n function. For multi-class classification, several binary one-versus rest\n classifiers are fitted. Note that this class thus does not implement\n a true multi-class Laplace approximation.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n kernel : kernel instance, default=None\n The kernel specifying the covariance function of the GP. If None is\n passed, the kernel \"1.0 * RBF(1.0)\" is used as default. Note that\n the kernel's hyperparameters are optimized during fitting.\n\n optimizer : 'fmin_l_bfgs_b' or callable, default='fmin_l_bfgs_b'\n Can either be one of the internally supported optimizers for optimizing\n the kernel's parameters, specified by a string, or an externally\n defined optimizer passed as a callable. If a callable is passed, it\n must have the signature::\n\n def optimizer(obj_func, initial_theta, bounds):\n # * 'obj_func' is the objective function to be maximized, which\n # takes the hyperparameters theta as parameter and an\n # optional flag eval_gradient, which determines if the\n # gradient is returned additionally to the function value\n # * 'initial_theta': the initial value for theta, which can be\n # used by local optimizers\n # * 'bounds': the bounds on the values of theta\n ....\n # Returned are the best found hyperparameters theta and\n # the corresponding value of the target function.\n return theta_opt, func_min\n\n Per default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize\n is used. If None is passed, the kernel's parameters are kept fixed.\n Available internal optimizers are::\n\n 'fmin_l_bfgs_b'\n\n n_restarts_optimizer : int, default=0\n The number of restarts of the optimizer for finding the kernel's\n parameters which maximize the log-marginal likelihood. The first run\n of the optimizer is performed from the kernel's initial parameters,\n the remaining ones (if any) from thetas sampled log-uniform randomly\n from the space of allowed theta-values. If greater than 0, all bounds\n must be finite. Note that n_restarts_optimizer=0 implies that one\n run is performed.\n\n max_iter_predict : int, default=100\n The maximum number of iterations in Newton's method for approximating\n the posterior during predict. Smaller values will reduce computation\n time at the cost of worse results.\n\n warm_start : bool, default=False\n If warm-starts are enabled, the solution of the last Newton iteration\n on the Laplace approximation of the posterior mode is used as\n initialization for the next call of _posterior_mode(). This can speed\n up convergence when _posterior_mode is called several times on similar\n problems as in hyperparameter optimization. See :term:`the Glossary\n `.\n\n copy_X_train : bool, default=True\n If True, a persistent copy of the training data is stored in the\n object. Otherwise, just a reference to the training data is stored,\n which might cause predictions to change if the data is modified\n externally.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation used to initialize the centers.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\n multi_class : {'one_vs_rest', 'one_vs_one'}, default='one_vs_rest'\n Specifies how multi-class classification problems are handled.\n Supported are 'one_vs_rest' and 'one_vs_one'. In 'one_vs_rest',\n one binary Gaussian process classifier is fitted for each class, which\n is trained to separate this class from the rest. In 'one_vs_one', one\n binary Gaussian process classifier is fitted for each pair of classes,\n which is trained to separate these two classes. The predictions of\n these binary predictors are combined into multi-class predictions.\n Note that 'one_vs_one' does not support predicting probability\n estimates.\n\n n_jobs : int, default=None\n The number of jobs to use for the computation: the specified\n multiclass problems are computed in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n Attributes\n ----------\n base_estimator_ : ``Estimator`` instance\n The estimator instance that defines the likelihood function\n using the observed data.\n\n kernel_ : kernel instance\n The kernel used for prediction. In case of binary classification,\n the structure of the kernel is the same as the one passed as parameter\n but with optimized hyperparameters. In case of multi-class\n classification, a CompoundKernel is returned which consists of the\n different kernels used in the one-versus-rest classifiers.\n\n log_marginal_likelihood_value_ : float\n The log-marginal-likelihood of ``self.kernel_.theta``\n\n classes_ : array-like of shape (n_classes,)\n Unique class labels.\n\n n_classes_ : int\n The number of classes in the training data\n\n Examples\n --------\n >>> from sklearn.datasets import load_iris\n >>> from sklearn.gaussian_process import GaussianProcessClassifier\n >>> from sklearn.gaussian_process.kernels import RBF\n >>> X, y = load_iris(return_X_y=True)\n >>> kernel = 1.0 * RBF(1.0)\n >>> gpc = GaussianProcessClassifier(kernel=kernel,\n ... random_state=0).fit(X, y)\n >>> gpc.score(X, y)\n 0.9866...\n >>> gpc.predict_proba(X[:2,:])\n array([[0.83548752, 0.03228706, 0.13222543],\n [0.79064206, 0.06525643, 0.14410151]])\n\n .. versionadded:: 0.18\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, kernel=None, *, optimizer=\"fmin_l_bfgs_b\",\n n_restarts_optimizer=0, max_iter_predict=100,\n warm_start=False, copy_X_train=True, random_state=None,\n multi_class=\"one_vs_rest\", n_jobs=None):\n self.kernel = kernel\n self.optimizer = optimizer\n self.n_restarts_optimizer = n_restarts_optimizer\n self.max_iter_predict = max_iter_predict\n self.warm_start = warm_start\n self.copy_X_train = copy_X_train\n self.random_state = random_state\n self.multi_class = multi_class\n self.n_jobs = n_jobs\n\n def fit(self, X, y):\n \"\"\"Fit Gaussian process classification model\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or list of object\n Feature vectors or other representations of training data.\n\n y : array-like of shape (n_samples,)\n Target values, must be binary\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n if self.kernel is None or self.kernel.requires_vector_input:\n X, y = self._validate_data(X, y, multi_output=False,\n ensure_2d=True, dtype=\"numeric\")\n else:\n X, y = self._validate_data(X, y, multi_output=False,\n ensure_2d=False, dtype=None)\n\n self.base_estimator_ = _BinaryGaussianProcessClassifierLaplace(\n kernel=self.kernel,\n optimizer=self.optimizer,\n n_restarts_optimizer=self.n_restarts_optimizer,\n max_iter_predict=self.max_iter_predict,\n warm_start=self.warm_start,\n copy_X_train=self.copy_X_train,\n random_state=self.random_state)\n\n self.classes_ = np.unique(y)\n self.n_classes_ = self.classes_.size\n if self.n_classes_ == 1:\n raise ValueError(\"GaussianProcessClassifier requires 2 or more \"\n \"distinct classes; got %d class (only class %s \"\n \"is present)\"\n % (self.n_classes_, self.classes_[0]))\n if self.n_classes_ > 2:\n if self.multi_class == \"one_vs_rest\":\n self.base_estimator_ = \\\n OneVsRestClassifier(self.base_estimator_,\n n_jobs=self.n_jobs)\n elif self.multi_class == \"one_vs_one\":\n self.base_estimator_ = \\\n OneVsOneClassifier(self.base_estimator_,\n n_jobs=self.n_jobs)\n else:\n raise ValueError(\"Unknown multi-class mode %s\"\n % self.multi_class)\n\n self.base_estimator_.fit(X, y)\n\n if self.n_classes_ > 2:\n self.log_marginal_likelihood_value_ = np.mean(\n [estimator.log_marginal_likelihood()\n for estimator in self.base_estimator_.estimators_])\n else:\n self.log_marginal_likelihood_value_ = \\\n self.base_estimator_.log_marginal_likelihood()\n\n return self\n\n def predict(self, X):\n \"\"\"Perform classification on an array of test vectors X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated for classification.\n\n Returns\n -------\n C : ndarray of shape (n_samples,)\n Predicted target values for X, values are from ``classes_``\n \"\"\"\n check_is_fitted(self)\n\n if self.kernel is None or self.kernel.requires_vector_input:\n X = check_array(X, ensure_2d=True, dtype=\"numeric\")\n else:\n X = check_array(X, ensure_2d=False, dtype=None)\n\n return self.base_estimator_.predict(X)\n\n def predict_proba(self, X):\n \"\"\"Return probability estimates for the test vector X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated for classification.\n\n Returns\n -------\n C : array-like of shape (n_samples, n_classes)\n Returns the probability of the samples for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`.\n \"\"\"\n check_is_fitted(self)\n if self.n_classes_ > 2 and self.multi_class == \"one_vs_one\":\n raise ValueError(\"one_vs_one multi-class mode does not support \"\n \"predicting probability estimates. Use \"\n \"one_vs_rest mode instead.\")\n\n if self.kernel is None or self.kernel.requires_vector_input:\n X = check_array(X, ensure_2d=True, dtype=\"numeric\")\n else:\n X = check_array(X, ensure_2d=False, dtype=None)\n\n return self.base_estimator_.predict_proba(X)\n\n @property\n def kernel_(self):\n if self.n_classes_ == 2:\n return self.base_estimator_.kernel_\n else:\n return CompoundKernel(\n [estimator.kernel_\n for estimator in self.base_estimator_.estimators_])\n\n def log_marginal_likelihood(self, theta=None, eval_gradient=False,\n clone_kernel=True):\n \"\"\"Returns log-marginal likelihood of theta for training data.\n\n In the case of multi-class classification, the mean log-marginal\n likelihood of the one-versus-rest classifiers are returned.\n\n Parameters\n ----------\n theta : array-like of shape (n_kernel_params,), default=None\n Kernel hyperparameters for which the log-marginal likelihood is\n evaluated. In the case of multi-class classification, theta may\n be the hyperparameters of the compound kernel or of an individual\n kernel. In the latter case, all individual kernel get assigned the\n same theta values. If None, the precomputed log_marginal_likelihood\n of ``self.kernel_.theta`` is returned.\n\n eval_gradient : bool, default=False\n If True, the gradient of the log-marginal likelihood with respect\n to the kernel hyperparameters at position theta is returned\n additionally. Note that gradient computation is not supported\n for non-binary classification. If True, theta must not be None.\n\n clone_kernel : bool, default=True\n If True, the kernel attribute is copied. If False, the kernel\n attribute is modified, but may result in a performance improvement.\n\n Returns\n -------\n log_likelihood : float\n Log-marginal likelihood of theta for training data.\n\n log_likelihood_gradient : ndarray of shape (n_kernel_params,), optional\n Gradient of the log-marginal likelihood with respect to the kernel\n hyperparameters at position theta.\n Only returned when `eval_gradient` is True.\n \"\"\"\n check_is_fitted(self)\n\n if theta is None:\n if eval_gradient:\n raise ValueError(\n \"Gradient can only be evaluated for theta!=None\")\n return self.log_marginal_likelihood_value_\n\n theta = np.asarray(theta)\n if self.n_classes_ == 2:\n return self.base_estimator_.log_marginal_likelihood(\n theta, eval_gradient, clone_kernel=clone_kernel)\n else:\n if eval_gradient:\n raise NotImplementedError(\n \"Gradient of log-marginal-likelihood not implemented for \"\n \"multi-class GPC.\")\n estimators = self.base_estimator_.estimators_\n n_dims = estimators[0].kernel_.n_dims\n if theta.shape[0] == n_dims: # use same theta for all sub-kernels\n return np.mean(\n [estimator.log_marginal_likelihood(\n theta, clone_kernel=clone_kernel)\n for i, estimator in enumerate(estimators)])\n elif theta.shape[0] == n_dims * self.classes_.shape[0]:\n # theta for compound kernel\n return np.mean(\n [estimator.log_marginal_likelihood(\n theta[n_dims * i:n_dims * (i + 1)],\n clone_kernel=clone_kernel)\n for i, estimator in enumerate(estimators)])\n else:\n raise ValueError(\"Shape of theta must be either %d or %d. \"\n \"Obtained theta with shape %d.\"\n % (n_dims, n_dims * self.classes_.shape[0],\n theta.shape[0]))", + "instance_attributes": [ + { + "name": "optimizer", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "n_restarts_optimizer", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "max_iter_predict", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "warm_start", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "copy_X_train", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "multi_class", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "base_estimator_", + "types": { + "kind": "NamedType", + "name": "_BinaryGaussianProcessClassifierLaplace" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace", + "name": "_BinaryGaussianProcessClassifierLaplace", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace", + "decorators": [], + "superclasses": ["BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/__init__", + "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/fit", + "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/predict", + "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/predict_proba", + "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/log_marginal_likelihood", + "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/_posterior_mode", + "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/_constrained_optimization" + ], + "is_public": false, + "reexported_by": [], + "description": "Binary Gaussian process classification based on Laplace approximation.\n\nThe implementation is based on Algorithm 3.1, 3.2, and 5.1 of\n``Gaussian Processes for Machine Learning'' (GPML) by Rasmussen and\nWilliams.\n\nInternally, the Laplace approximation is used for approximating the\nnon-Gaussian posterior by a Gaussian.\n\nCurrently, the implementation is restricted to using the logistic link\nfunction.\n\n.. versionadded:: 0.18", + "docstring": "Binary Gaussian process classification based on Laplace approximation.\n\nThe implementation is based on Algorithm 3.1, 3.2, and 5.1 of\n``Gaussian Processes for Machine Learning'' (GPML) by Rasmussen and\nWilliams.\n\nInternally, the Laplace approximation is used for approximating the\nnon-Gaussian posterior by a Gaussian.\n\nCurrently, the implementation is restricted to using the logistic link\nfunction.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nkernel : kernel instance, default=None\n The kernel specifying the covariance function of the GP. If None is\n passed, the kernel \"1.0 * RBF(1.0)\" is used as default. Note that\n the kernel's hyperparameters are optimized during fitting.\n\noptimizer : 'fmin_l_bfgs_b' or callable, default='fmin_l_bfgs_b'\n Can either be one of the internally supported optimizers for optimizing\n the kernel's parameters, specified by a string, or an externally\n defined optimizer passed as a callable. If a callable is passed, it\n must have the signature::\n\n def optimizer(obj_func, initial_theta, bounds):\n # * 'obj_func' is the objective function to be maximized, which\n # takes the hyperparameters theta as parameter and an\n # optional flag eval_gradient, which determines if the\n # gradient is returned additionally to the function value\n # * 'initial_theta': the initial value for theta, which can be\n # used by local optimizers\n # * 'bounds': the bounds on the values of theta\n ....\n # Returned are the best found hyperparameters theta and\n # the corresponding value of the target function.\n return theta_opt, func_min\n\n Per default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize\n is used. If None is passed, the kernel's parameters are kept fixed.\n Available internal optimizers are::\n\n 'fmin_l_bfgs_b'\n\nn_restarts_optimizer : int, default=0\n The number of restarts of the optimizer for finding the kernel's\n parameters which maximize the log-marginal likelihood. The first run\n of the optimizer is performed from the kernel's initial parameters,\n the remaining ones (if any) from thetas sampled log-uniform randomly\n from the space of allowed theta-values. If greater than 0, all bounds\n must be finite. Note that n_restarts_optimizer=0 implies that one\n run is performed.\n\nmax_iter_predict : int, default=100\n The maximum number of iterations in Newton's method for approximating\n the posterior during predict. Smaller values will reduce computation\n time at the cost of worse results.\n\nwarm_start : bool, default=False\n If warm-starts are enabled, the solution of the last Newton iteration\n on the Laplace approximation of the posterior mode is used as\n initialization for the next call of _posterior_mode(). This can speed\n up convergence when _posterior_mode is called several times on similar\n problems as in hyperparameter optimization. See :term:`the Glossary\n `.\n\ncopy_X_train : bool, default=True\n If True, a persistent copy of the training data is stored in the\n object. Otherwise, just a reference to the training data is stored,\n which might cause predictions to change if the data is modified\n externally.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation used to initialize the centers.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nAttributes\n----------\nX_train_ : array-like of shape (n_samples, n_features) or list of object\n Feature vectors or other representations of training data (also\n required for prediction).\n\ny_train_ : array-like of shape (n_samples,)\n Target values in training data (also required for prediction)\n\nclasses_ : array-like of shape (n_classes,)\n Unique class labels.\n\nkernel_ : kernl instance\n The kernel used for prediction. The structure of the kernel is the\n same as the one passed as parameter but with optimized hyperparameters\n\nL_ : array-like of shape (n_samples, n_samples)\n Lower-triangular Cholesky decomposition of the kernel in X_train_\n\npi_ : array-like of shape (n_samples,)\n The probabilities of the positive class for the training points\n X_train_\n\nW_sr_ : array-like of shape (n_samples,)\n Square root of W, the Hessian of log-likelihood of the latent function\n values for the observed labels. Since W is diagonal, only the diagonal\n of sqrt(W) is stored.\n\nlog_marginal_likelihood_value_ : float\n The log-marginal-likelihood of ``self.kernel_.theta``", + "code": "class _BinaryGaussianProcessClassifierLaplace(BaseEstimator):\n \"\"\"Binary Gaussian process classification based on Laplace approximation.\n\n The implementation is based on Algorithm 3.1, 3.2, and 5.1 of\n ``Gaussian Processes for Machine Learning'' (GPML) by Rasmussen and\n Williams.\n\n Internally, the Laplace approximation is used for approximating the\n non-Gaussian posterior by a Gaussian.\n\n Currently, the implementation is restricted to using the logistic link\n function.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n kernel : kernel instance, default=None\n The kernel specifying the covariance function of the GP. If None is\n passed, the kernel \"1.0 * RBF(1.0)\" is used as default. Note that\n the kernel's hyperparameters are optimized during fitting.\n\n optimizer : 'fmin_l_bfgs_b' or callable, default='fmin_l_bfgs_b'\n Can either be one of the internally supported optimizers for optimizing\n the kernel's parameters, specified by a string, or an externally\n defined optimizer passed as a callable. If a callable is passed, it\n must have the signature::\n\n def optimizer(obj_func, initial_theta, bounds):\n # * 'obj_func' is the objective function to be maximized, which\n # takes the hyperparameters theta as parameter and an\n # optional flag eval_gradient, which determines if the\n # gradient is returned additionally to the function value\n # * 'initial_theta': the initial value for theta, which can be\n # used by local optimizers\n # * 'bounds': the bounds on the values of theta\n ....\n # Returned are the best found hyperparameters theta and\n # the corresponding value of the target function.\n return theta_opt, func_min\n\n Per default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize\n is used. If None is passed, the kernel's parameters are kept fixed.\n Available internal optimizers are::\n\n 'fmin_l_bfgs_b'\n\n n_restarts_optimizer : int, default=0\n The number of restarts of the optimizer for finding the kernel's\n parameters which maximize the log-marginal likelihood. The first run\n of the optimizer is performed from the kernel's initial parameters,\n the remaining ones (if any) from thetas sampled log-uniform randomly\n from the space of allowed theta-values. If greater than 0, all bounds\n must be finite. Note that n_restarts_optimizer=0 implies that one\n run is performed.\n\n max_iter_predict : int, default=100\n The maximum number of iterations in Newton's method for approximating\n the posterior during predict. Smaller values will reduce computation\n time at the cost of worse results.\n\n warm_start : bool, default=False\n If warm-starts are enabled, the solution of the last Newton iteration\n on the Laplace approximation of the posterior mode is used as\n initialization for the next call of _posterior_mode(). This can speed\n up convergence when _posterior_mode is called several times on similar\n problems as in hyperparameter optimization. See :term:`the Glossary\n `.\n\n copy_X_train : bool, default=True\n If True, a persistent copy of the training data is stored in the\n object. Otherwise, just a reference to the training data is stored,\n which might cause predictions to change if the data is modified\n externally.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation used to initialize the centers.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\n Attributes\n ----------\n X_train_ : array-like of shape (n_samples, n_features) or list of object\n Feature vectors or other representations of training data (also\n required for prediction).\n\n y_train_ : array-like of shape (n_samples,)\n Target values in training data (also required for prediction)\n\n classes_ : array-like of shape (n_classes,)\n Unique class labels.\n\n kernel_ : kernl instance\n The kernel used for prediction. The structure of the kernel is the\n same as the one passed as parameter but with optimized hyperparameters\n\n L_ : array-like of shape (n_samples, n_samples)\n Lower-triangular Cholesky decomposition of the kernel in X_train_\n\n pi_ : array-like of shape (n_samples,)\n The probabilities of the positive class for the training points\n X_train_\n\n W_sr_ : array-like of shape (n_samples,)\n Square root of W, the Hessian of log-likelihood of the latent function\n values for the observed labels. Since W is diagonal, only the diagonal\n of sqrt(W) is stored.\n\n log_marginal_likelihood_value_ : float\n The log-marginal-likelihood of ``self.kernel_.theta``\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, kernel=None, *, optimizer=\"fmin_l_bfgs_b\",\n n_restarts_optimizer=0, max_iter_predict=100,\n warm_start=False, copy_X_train=True, random_state=None):\n self.kernel = kernel\n self.optimizer = optimizer\n self.n_restarts_optimizer = n_restarts_optimizer\n self.max_iter_predict = max_iter_predict\n self.warm_start = warm_start\n self.copy_X_train = copy_X_train\n self.random_state = random_state\n\n def fit(self, X, y):\n \"\"\"Fit Gaussian process classification model\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or list of object\n Feature vectors or other representations of training data.\n\n y : array-like of shape (n_samples,)\n Target values, must be binary\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n if self.kernel is None: # Use an RBF kernel as default\n self.kernel_ = C(1.0, constant_value_bounds=\"fixed\") \\\n * RBF(1.0, length_scale_bounds=\"fixed\")\n else:\n self.kernel_ = clone(self.kernel)\n\n self.rng = check_random_state(self.random_state)\n\n self.X_train_ = np.copy(X) if self.copy_X_train else X\n\n # Encode class labels and check that it is a binary classification\n # problem\n label_encoder = LabelEncoder()\n self.y_train_ = label_encoder.fit_transform(y)\n self.classes_ = label_encoder.classes_\n if self.classes_.size > 2:\n raise ValueError(\"%s supports only binary classification. \"\n \"y contains classes %s\"\n % (self.__class__.__name__, self.classes_))\n elif self.classes_.size == 1:\n raise ValueError(\"{0:s} requires 2 classes; got {1:d} class\"\n .format(self.__class__.__name__,\n self.classes_.size))\n\n if self.optimizer is not None and self.kernel_.n_dims > 0:\n # Choose hyperparameters based on maximizing the log-marginal\n # likelihood (potentially starting from several initial values)\n def obj_func(theta, eval_gradient=True):\n if eval_gradient:\n lml, grad = self.log_marginal_likelihood(\n theta, eval_gradient=True, clone_kernel=False)\n return -lml, -grad\n else:\n return -self.log_marginal_likelihood(theta,\n clone_kernel=False)\n\n # First optimize starting from theta specified in kernel\n optima = [self._constrained_optimization(obj_func,\n self.kernel_.theta,\n self.kernel_.bounds)]\n\n # Additional runs are performed from log-uniform chosen initial\n # theta\n if self.n_restarts_optimizer > 0:\n if not np.isfinite(self.kernel_.bounds).all():\n raise ValueError(\n \"Multiple optimizer restarts (n_restarts_optimizer>0) \"\n \"requires that all bounds are finite.\")\n bounds = self.kernel_.bounds\n for iteration in range(self.n_restarts_optimizer):\n theta_initial = np.exp(self.rng.uniform(bounds[:, 0],\n bounds[:, 1]))\n optima.append(\n self._constrained_optimization(obj_func, theta_initial,\n bounds))\n # Select result from run with minimal (negative) log-marginal\n # likelihood\n lml_values = list(map(itemgetter(1), optima))\n self.kernel_.theta = optima[np.argmin(lml_values)][0]\n self.kernel_._check_bounds_params()\n\n self.log_marginal_likelihood_value_ = -np.min(lml_values)\n else:\n self.log_marginal_likelihood_value_ = \\\n self.log_marginal_likelihood(self.kernel_.theta)\n\n # Precompute quantities required for predictions which are independent\n # of actual query points\n K = self.kernel_(self.X_train_)\n\n _, (self.pi_, self.W_sr_, self.L_, _, _) = \\\n self._posterior_mode(K, return_temporaries=True)\n\n return self\n\n def predict(self, X):\n \"\"\"Perform classification on an array of test vectors X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated for classification.\n\n Returns\n -------\n C : ndarray of shape (n_samples,)\n Predicted target values for X, values are from ``classes_``\n \"\"\"\n check_is_fitted(self)\n\n # As discussed on Section 3.4.2 of GPML, for making hard binary\n # decisions, it is enough to compute the MAP of the posterior and\n # pass it through the link function\n K_star = self.kernel_(self.X_train_, X) # K_star =k(x_star)\n f_star = K_star.T.dot(self.y_train_ - self.pi_) # Algorithm 3.2,Line 4\n\n return np.where(f_star > 0, self.classes_[1], self.classes_[0])\n\n def predict_proba(self, X):\n \"\"\"Return probability estimates for the test vector X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated for classification.\n\n Returns\n -------\n C : array-like of shape (n_samples, n_classes)\n Returns the probability of the samples for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute ``classes_``.\n \"\"\"\n check_is_fitted(self)\n\n # Based on Algorithm 3.2 of GPML\n K_star = self.kernel_(self.X_train_, X) # K_star =k(x_star)\n f_star = K_star.T.dot(self.y_train_ - self.pi_) # Line 4\n v = solve(self.L_, self.W_sr_[:, np.newaxis] * K_star) # Line 5\n # Line 6 (compute np.diag(v.T.dot(v)) via einsum)\n var_f_star = self.kernel_.diag(X) - np.einsum(\"ij,ij->j\", v, v)\n\n # Line 7:\n # Approximate \\int log(z) * N(z | f_star, var_f_star)\n # Approximation is due to Williams & Barber, \"Bayesian Classification\n # with Gaussian Processes\", Appendix A: Approximate the logistic\n # sigmoid by a linear combination of 5 error functions.\n # For information on how this integral can be computed see\n # blitiri.blogspot.de/2012/11/gaussian-integral-of-error-function.html\n alpha = 1 / (2 * var_f_star)\n gamma = LAMBDAS * f_star\n integrals = np.sqrt(np.pi / alpha) \\\n * erf(gamma * np.sqrt(alpha / (alpha + LAMBDAS**2))) \\\n / (2 * np.sqrt(var_f_star * 2 * np.pi))\n pi_star = (COEFS * integrals).sum(axis=0) + .5 * COEFS.sum()\n\n return np.vstack((1 - pi_star, pi_star)).T\n\n def log_marginal_likelihood(self, theta=None, eval_gradient=False,\n clone_kernel=True):\n \"\"\"Returns log-marginal likelihood of theta for training data.\n\n Parameters\n ----------\n theta : array-like of shape (n_kernel_params,), default=None\n Kernel hyperparameters for which the log-marginal likelihood is\n evaluated. If None, the precomputed log_marginal_likelihood\n of ``self.kernel_.theta`` is returned.\n\n eval_gradient : bool, default=False\n If True, the gradient of the log-marginal likelihood with respect\n to the kernel hyperparameters at position theta is returned\n additionally. If True, theta must not be None.\n\n clone_kernel : bool, default=True\n If True, the kernel attribute is copied. If False, the kernel\n attribute is modified, but may result in a performance improvement.\n\n Returns\n -------\n log_likelihood : float\n Log-marginal likelihood of theta for training data.\n\n log_likelihood_gradient : ndarray of shape (n_kernel_params,), \\\n optional\n Gradient of the log-marginal likelihood with respect to the kernel\n hyperparameters at position theta.\n Only returned when `eval_gradient` is True.\n \"\"\"\n if theta is None:\n if eval_gradient:\n raise ValueError(\n \"Gradient can only be evaluated for theta!=None\")\n return self.log_marginal_likelihood_value_\n\n if clone_kernel:\n kernel = self.kernel_.clone_with_theta(theta)\n else:\n kernel = self.kernel_\n kernel.theta = theta\n\n if eval_gradient:\n K, K_gradient = kernel(self.X_train_, eval_gradient=True)\n else:\n K = kernel(self.X_train_)\n\n # Compute log-marginal-likelihood Z and also store some temporaries\n # which can be reused for computing Z's gradient\n Z, (pi, W_sr, L, b, a) = \\\n self._posterior_mode(K, return_temporaries=True)\n\n if not eval_gradient:\n return Z\n\n # Compute gradient based on Algorithm 5.1 of GPML\n d_Z = np.empty(theta.shape[0])\n # XXX: Get rid of the np.diag() in the next line\n R = W_sr[:, np.newaxis] * cho_solve((L, True), np.diag(W_sr)) # Line 7\n C = solve(L, W_sr[:, np.newaxis] * K) # Line 8\n # Line 9: (use einsum to compute np.diag(C.T.dot(C))))\n s_2 = -0.5 * (np.diag(K) - np.einsum('ij, ij -> j', C, C)) \\\n * (pi * (1 - pi) * (1 - 2 * pi)) # third derivative\n\n for j in range(d_Z.shape[0]):\n C = K_gradient[:, :, j] # Line 11\n # Line 12: (R.T.ravel().dot(C.ravel()) = np.trace(R.dot(C)))\n s_1 = .5 * a.T.dot(C).dot(a) - .5 * R.T.ravel().dot(C.ravel())\n\n b = C.dot(self.y_train_ - pi) # Line 13\n s_3 = b - K.dot(R.dot(b)) # Line 14\n\n d_Z[j] = s_1 + s_2.T.dot(s_3) # Line 15\n\n return Z, d_Z\n\n def _posterior_mode(self, K, return_temporaries=False):\n \"\"\"Mode-finding for binary Laplace GPC and fixed kernel.\n\n This approximates the posterior of the latent function values for given\n inputs and target observations with a Gaussian approximation and uses\n Newton's iteration to find the mode of this approximation.\n \"\"\"\n # Based on Algorithm 3.1 of GPML\n\n # If warm_start are enabled, we reuse the last solution for the\n # posterior mode as initialization; otherwise, we initialize with 0\n if self.warm_start and hasattr(self, \"f_cached\") \\\n and self.f_cached.shape == self.y_train_.shape:\n f = self.f_cached\n else:\n f = np.zeros_like(self.y_train_, dtype=np.float64)\n\n # Use Newton's iteration method to find mode of Laplace approximation\n log_marginal_likelihood = -np.inf\n for _ in range(self.max_iter_predict):\n # Line 4\n pi = expit(f)\n W = pi * (1 - pi)\n # Line 5\n W_sr = np.sqrt(W)\n W_sr_K = W_sr[:, np.newaxis] * K\n B = np.eye(W.shape[0]) + W_sr_K * W_sr\n L = cholesky(B, lower=True)\n # Line 6\n b = W * f + (self.y_train_ - pi)\n # Line 7\n a = b - W_sr * cho_solve((L, True), W_sr_K.dot(b))\n # Line 8\n f = K.dot(a)\n\n # Line 10: Compute log marginal likelihood in loop and use as\n # convergence criterion\n lml = -0.5 * a.T.dot(f) \\\n - np.log1p(np.exp(-(self.y_train_ * 2 - 1) * f)).sum() \\\n - np.log(np.diag(L)).sum()\n # Check if we have converged (log marginal likelihood does\n # not decrease)\n # XXX: more complex convergence criterion\n if lml - log_marginal_likelihood < 1e-10:\n break\n log_marginal_likelihood = lml\n\n self.f_cached = f # Remember solution for later warm-starts\n if return_temporaries:\n return log_marginal_likelihood, (pi, W_sr, L, b, a)\n else:\n return log_marginal_likelihood\n\n def _constrained_optimization(self, obj_func, initial_theta, bounds):\n if self.optimizer == \"fmin_l_bfgs_b\":\n opt_res = scipy.optimize.minimize(\n obj_func, initial_theta, method=\"L-BFGS-B\", jac=True,\n bounds=bounds)\n _check_optimize_result(\"lbfgs\", opt_res)\n theta_opt, func_min = opt_res.x, opt_res.fun\n elif callable(self.optimizer):\n theta_opt, func_min = \\\n self.optimizer(obj_func, initial_theta, bounds=bounds)\n else:\n raise ValueError(\"Unknown optimizer %s.\" % self.optimizer)\n\n return theta_opt, func_min", + "instance_attributes": [ + { + "name": "optimizer", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "n_restarts_optimizer", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "max_iter_predict", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "warm_start", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "copy_X_train", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "kernel_", + "types": { + "kind": "NamedType", + "name": "Product" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor", + "name": "GaussianProcessRegressor", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor", + "decorators": [], + "superclasses": ["MultiOutputMixin", "RegressorMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/__init__", + "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/fit", + "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/predict", + "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/sample_y", + "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/log_marginal_likelihood", + "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/_constrained_optimization", + "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Gaussian process regression (GPR).\n\nThe implementation is based on Algorithm 2.1 of Gaussian Processes\nfor Machine Learning (GPML) by Rasmussen and Williams.\n\nIn addition to standard scikit-learn estimator API,\nGaussianProcessRegressor:\n\n * allows prediction without prior fitting (based on the GP prior)\n * provides an additional method sample_y(X), which evaluates samples\n drawn from the GPR (prior or posterior) at given inputs\n * exposes a method log_marginal_likelihood(theta), which can be used\n externally for other ways of selecting hyperparameters, e.g., via\n Markov chain Monte Carlo.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "Gaussian process regression (GPR).\n\nThe implementation is based on Algorithm 2.1 of Gaussian Processes\nfor Machine Learning (GPML) by Rasmussen and Williams.\n\nIn addition to standard scikit-learn estimator API,\nGaussianProcessRegressor:\n\n * allows prediction without prior fitting (based on the GP prior)\n * provides an additional method sample_y(X), which evaluates samples\n drawn from the GPR (prior or posterior) at given inputs\n * exposes a method log_marginal_likelihood(theta), which can be used\n externally for other ways of selecting hyperparameters, e.g., via\n Markov chain Monte Carlo.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nkernel : kernel instance, default=None\n The kernel specifying the covariance function of the GP. If None is\n passed, the kernel ``ConstantKernel(1.0, constant_value_bounds=\"fixed\"\n * RBF(1.0, length_scale_bounds=\"fixed\")`` is used as default. Note that\n the kernel hyperparameters are optimized during fitting unless the\n bounds are marked as \"fixed\".\n\nalpha : float or ndarray of shape (n_samples,), default=1e-10\n Value added to the diagonal of the kernel matrix during fitting.\n This can prevent a potential numerical issue during fitting, by\n ensuring that the calculated values form a positive definite matrix.\n It can also be interpreted as the variance of additional Gaussian\n measurement noise on the training observations. Note that this is\n different from using a `WhiteKernel`. If an array is passed, it must\n have the same number of entries as the data used for fitting and is\n used as datapoint-dependent noise level. Allowing to specify the\n noise level directly as a parameter is mainly for convenience and\n for consistency with Ridge.\n\noptimizer : \"fmin_l_bfgs_b\" or callable, default=\"fmin_l_bfgs_b\"\n Can either be one of the internally supported optimizers for optimizing\n the kernel's parameters, specified by a string, or an externally\n defined optimizer passed as a callable. If a callable is passed, it\n must have the signature::\n\n def optimizer(obj_func, initial_theta, bounds):\n # * 'obj_func' is the objective function to be minimized, which\n # takes the hyperparameters theta as parameter and an\n # optional flag eval_gradient, which determines if the\n # gradient is returned additionally to the function value\n # * 'initial_theta': the initial value for theta, which can be\n # used by local optimizers\n # * 'bounds': the bounds on the values of theta\n ....\n # Returned are the best found hyperparameters theta and\n # the corresponding value of the target function.\n return theta_opt, func_min\n\n Per default, the 'L-BGFS-B' algorithm from scipy.optimize.minimize\n is used. If None is passed, the kernel's parameters are kept fixed.\n Available internal optimizers are::\n\n 'fmin_l_bfgs_b'\n\nn_restarts_optimizer : int, default=0\n The number of restarts of the optimizer for finding the kernel's\n parameters which maximize the log-marginal likelihood. The first run\n of the optimizer is performed from the kernel's initial parameters,\n the remaining ones (if any) from thetas sampled log-uniform randomly\n from the space of allowed theta-values. If greater than 0, all bounds\n must be finite. Note that n_restarts_optimizer == 0 implies that one\n run is performed.\n\nnormalize_y : bool, default=False\n Whether the target values y are normalized, the mean and variance of\n the target values are set equal to 0 and 1 respectively. This is\n recommended for cases where zero-mean, unit-variance priors are used.\n Note that, in this implementation, the normalisation is reversed\n before the GP predictions are reported.\n\n .. versionchanged:: 0.23\n\ncopy_X_train : bool, default=True\n If True, a persistent copy of the training data is stored in the\n object. Otherwise, just a reference to the training data is stored,\n which might cause predictions to change if the data is modified\n externally.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation used to initialize the centers.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nAttributes\n----------\nX_train_ : array-like of shape (n_samples, n_features) or list of object\n Feature vectors or other representations of training data (also\n required for prediction).\n\ny_train_ : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values in training data (also required for prediction)\n\nkernel_ : kernel instance\n The kernel used for prediction. The structure of the kernel is the\n same as the one passed as parameter but with optimized hyperparameters\n\nL_ : array-like of shape (n_samples, n_samples)\n Lower-triangular Cholesky decomposition of the kernel in ``X_train_``\n\nalpha_ : array-like of shape (n_samples,)\n Dual coefficients of training data points in kernel space\n\nlog_marginal_likelihood_value_ : float\n The log-marginal-likelihood of ``self.kernel_.theta``\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = DotProduct() + WhiteKernel()\n>>> gpr = GaussianProcessRegressor(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.3680...\n>>> gpr.predict(X[:2,:], return_std=True)\n(array([653.0..., 592.1...]), array([316.6..., 316.6...]))", + "code": "class GaussianProcessRegressor(MultiOutputMixin,\n RegressorMixin, BaseEstimator):\n \"\"\"Gaussian process regression (GPR).\n\n The implementation is based on Algorithm 2.1 of Gaussian Processes\n for Machine Learning (GPML) by Rasmussen and Williams.\n\n In addition to standard scikit-learn estimator API,\n GaussianProcessRegressor:\n\n * allows prediction without prior fitting (based on the GP prior)\n * provides an additional method sample_y(X), which evaluates samples\n drawn from the GPR (prior or posterior) at given inputs\n * exposes a method log_marginal_likelihood(theta), which can be used\n externally for other ways of selecting hyperparameters, e.g., via\n Markov chain Monte Carlo.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n kernel : kernel instance, default=None\n The kernel specifying the covariance function of the GP. If None is\n passed, the kernel ``ConstantKernel(1.0, constant_value_bounds=\"fixed\"\n * RBF(1.0, length_scale_bounds=\"fixed\")`` is used as default. Note that\n the kernel hyperparameters are optimized during fitting unless the\n bounds are marked as \"fixed\".\n\n alpha : float or ndarray of shape (n_samples,), default=1e-10\n Value added to the diagonal of the kernel matrix during fitting.\n This can prevent a potential numerical issue during fitting, by\n ensuring that the calculated values form a positive definite matrix.\n It can also be interpreted as the variance of additional Gaussian\n measurement noise on the training observations. Note that this is\n different from using a `WhiteKernel`. If an array is passed, it must\n have the same number of entries as the data used for fitting and is\n used as datapoint-dependent noise level. Allowing to specify the\n noise level directly as a parameter is mainly for convenience and\n for consistency with Ridge.\n\n optimizer : \"fmin_l_bfgs_b\" or callable, default=\"fmin_l_bfgs_b\"\n Can either be one of the internally supported optimizers for optimizing\n the kernel's parameters, specified by a string, or an externally\n defined optimizer passed as a callable. If a callable is passed, it\n must have the signature::\n\n def optimizer(obj_func, initial_theta, bounds):\n # * 'obj_func' is the objective function to be minimized, which\n # takes the hyperparameters theta as parameter and an\n # optional flag eval_gradient, which determines if the\n # gradient is returned additionally to the function value\n # * 'initial_theta': the initial value for theta, which can be\n # used by local optimizers\n # * 'bounds': the bounds on the values of theta\n ....\n # Returned are the best found hyperparameters theta and\n # the corresponding value of the target function.\n return theta_opt, func_min\n\n Per default, the 'L-BGFS-B' algorithm from scipy.optimize.minimize\n is used. If None is passed, the kernel's parameters are kept fixed.\n Available internal optimizers are::\n\n 'fmin_l_bfgs_b'\n\n n_restarts_optimizer : int, default=0\n The number of restarts of the optimizer for finding the kernel's\n parameters which maximize the log-marginal likelihood. The first run\n of the optimizer is performed from the kernel's initial parameters,\n the remaining ones (if any) from thetas sampled log-uniform randomly\n from the space of allowed theta-values. If greater than 0, all bounds\n must be finite. Note that n_restarts_optimizer == 0 implies that one\n run is performed.\n\n normalize_y : bool, default=False\n Whether the target values y are normalized, the mean and variance of\n the target values are set equal to 0 and 1 respectively. This is\n recommended for cases where zero-mean, unit-variance priors are used.\n Note that, in this implementation, the normalisation is reversed\n before the GP predictions are reported.\n\n .. versionchanged:: 0.23\n\n copy_X_train : bool, default=True\n If True, a persistent copy of the training data is stored in the\n object. Otherwise, just a reference to the training data is stored,\n which might cause predictions to change if the data is modified\n externally.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation used to initialize the centers.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\n Attributes\n ----------\n X_train_ : array-like of shape (n_samples, n_features) or list of object\n Feature vectors or other representations of training data (also\n required for prediction).\n\n y_train_ : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values in training data (also required for prediction)\n\n kernel_ : kernel instance\n The kernel used for prediction. The structure of the kernel is the\n same as the one passed as parameter but with optimized hyperparameters\n\n L_ : array-like of shape (n_samples, n_samples)\n Lower-triangular Cholesky decomposition of the kernel in ``X_train_``\n\n alpha_ : array-like of shape (n_samples,)\n Dual coefficients of training data points in kernel space\n\n log_marginal_likelihood_value_ : float\n The log-marginal-likelihood of ``self.kernel_.theta``\n\n Examples\n --------\n >>> from sklearn.datasets import make_friedman2\n >>> from sklearn.gaussian_process import GaussianProcessRegressor\n >>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel\n >>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n >>> kernel = DotProduct() + WhiteKernel()\n >>> gpr = GaussianProcessRegressor(kernel=kernel,\n ... random_state=0).fit(X, y)\n >>> gpr.score(X, y)\n 0.3680...\n >>> gpr.predict(X[:2,:], return_std=True)\n (array([653.0..., 592.1...]), array([316.6..., 316.6...]))\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, kernel=None, *, alpha=1e-10,\n optimizer=\"fmin_l_bfgs_b\", n_restarts_optimizer=0,\n normalize_y=False, copy_X_train=True, random_state=None):\n self.kernel = kernel\n self.alpha = alpha\n self.optimizer = optimizer\n self.n_restarts_optimizer = n_restarts_optimizer\n self.normalize_y = normalize_y\n self.copy_X_train = copy_X_train\n self.random_state = random_state\n\n def fit(self, X, y):\n \"\"\"Fit Gaussian process regression model.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or list of object\n Feature vectors or other representations of training data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n if self.kernel is None: # Use an RBF kernel as default\n self.kernel_ = C(1.0, constant_value_bounds=\"fixed\") \\\n * RBF(1.0, length_scale_bounds=\"fixed\")\n else:\n self.kernel_ = clone(self.kernel)\n\n self._rng = check_random_state(self.random_state)\n\n if self.kernel_.requires_vector_input:\n X, y = self._validate_data(X, y, multi_output=True, y_numeric=True,\n ensure_2d=True, dtype=\"numeric\")\n else:\n X, y = self._validate_data(X, y, multi_output=True, y_numeric=True,\n ensure_2d=False, dtype=None)\n\n # Normalize target value\n if self.normalize_y:\n self._y_train_mean = np.mean(y, axis=0)\n self._y_train_std = _handle_zeros_in_scale(\n np.std(y, axis=0), copy=False\n )\n\n # Remove mean and make unit variance\n y = (y - self._y_train_mean) / self._y_train_std\n\n else:\n self._y_train_mean = np.zeros(1)\n self._y_train_std = 1\n\n if np.iterable(self.alpha) \\\n and self.alpha.shape[0] != y.shape[0]:\n if self.alpha.shape[0] == 1:\n self.alpha = self.alpha[0]\n else:\n raise ValueError(\"alpha must be a scalar or an array\"\n \" with same number of entries as y.(%d != %d)\"\n % (self.alpha.shape[0], y.shape[0]))\n\n self.X_train_ = np.copy(X) if self.copy_X_train else X\n self.y_train_ = np.copy(y) if self.copy_X_train else y\n\n if self.optimizer is not None and self.kernel_.n_dims > 0:\n # Choose hyperparameters based on maximizing the log-marginal\n # likelihood (potentially starting from several initial values)\n def obj_func(theta, eval_gradient=True):\n if eval_gradient:\n lml, grad = self.log_marginal_likelihood(\n theta, eval_gradient=True, clone_kernel=False)\n return -lml, -grad\n else:\n return -self.log_marginal_likelihood(theta,\n clone_kernel=False)\n\n # First optimize starting from theta specified in kernel\n optima = [(self._constrained_optimization(obj_func,\n self.kernel_.theta,\n self.kernel_.bounds))]\n\n # Additional runs are performed from log-uniform chosen initial\n # theta\n if self.n_restarts_optimizer > 0:\n if not np.isfinite(self.kernel_.bounds).all():\n raise ValueError(\n \"Multiple optimizer restarts (n_restarts_optimizer>0) \"\n \"requires that all bounds are finite.\")\n bounds = self.kernel_.bounds\n for iteration in range(self.n_restarts_optimizer):\n theta_initial = \\\n self._rng.uniform(bounds[:, 0], bounds[:, 1])\n optima.append(\n self._constrained_optimization(obj_func, theta_initial,\n bounds))\n # Select result from run with minimal (negative) log-marginal\n # likelihood\n lml_values = list(map(itemgetter(1), optima))\n self.kernel_.theta = optima[np.argmin(lml_values)][0]\n self.kernel_._check_bounds_params()\n\n self.log_marginal_likelihood_value_ = -np.min(lml_values)\n else:\n self.log_marginal_likelihood_value_ = \\\n self.log_marginal_likelihood(self.kernel_.theta,\n clone_kernel=False)\n\n # Precompute quantities required for predictions which are independent\n # of actual query points\n K = self.kernel_(self.X_train_)\n K[np.diag_indices_from(K)] += self.alpha\n try:\n self.L_ = cholesky(K, lower=True) # Line 2\n except np.linalg.LinAlgError as exc:\n exc.args = (\"The kernel, %s, is not returning a \"\n \"positive definite matrix. Try gradually \"\n \"increasing the 'alpha' parameter of your \"\n \"GaussianProcessRegressor estimator.\"\n % self.kernel_,) + exc.args\n raise\n self.alpha_ = cho_solve((self.L_, True), self.y_train_) # Line 3\n return self\n\n def predict(self, X, return_std=False, return_cov=False):\n \"\"\"Predict using the Gaussian process regression model\n\n We can also predict based on an unfitted model by using the GP prior.\n In addition to the mean of the predictive distribution, also its\n standard deviation (return_std=True) or covariance (return_cov=True).\n Note that at most one of the two can be requested.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated.\n\n return_std : bool, default=False\n If True, the standard-deviation of the predictive distribution at\n the query points is returned along with the mean.\n\n return_cov : bool, default=False\n If True, the covariance of the joint predictive distribution at\n the query points is returned along with the mean.\n\n Returns\n -------\n y_mean : ndarray of shape (n_samples, [n_output_dims])\n Mean of predictive distribution a query points.\n\n y_std : ndarray of shape (n_samples,), optional\n Standard deviation of predictive distribution at query points.\n Only returned when `return_std` is True.\n\n y_cov : ndarray of shape (n_samples, n_samples), optional\n Covariance of joint predictive distribution a query points.\n Only returned when `return_cov` is True.\n \"\"\"\n if return_std and return_cov:\n raise RuntimeError(\n \"Not returning standard deviation of predictions when \"\n \"returning full covariance.\")\n\n if self.kernel is None or self.kernel.requires_vector_input:\n X = check_array(X, ensure_2d=True, dtype=\"numeric\")\n else:\n X = check_array(X, ensure_2d=False, dtype=None)\n\n if not hasattr(self, \"X_train_\"): # Unfitted;predict based on GP prior\n if self.kernel is None:\n kernel = (C(1.0, constant_value_bounds=\"fixed\") *\n RBF(1.0, length_scale_bounds=\"fixed\"))\n else:\n kernel = self.kernel\n y_mean = np.zeros(X.shape[0])\n if return_cov:\n y_cov = kernel(X)\n return y_mean, y_cov\n elif return_std:\n y_var = kernel.diag(X)\n return y_mean, np.sqrt(y_var)\n else:\n return y_mean\n else: # Predict based on GP posterior\n K_trans = self.kernel_(X, self.X_train_)\n y_mean = K_trans.dot(self.alpha_) # Line 4 (y_mean = f_star)\n # undo normalisation\n y_mean = self._y_train_std * y_mean + self._y_train_mean\n\n if return_cov:\n # Solve K @ V = K_trans.T\n V = cho_solve((self.L_, True), K_trans.T) # Line 5\n y_cov = self.kernel_(X) - K_trans.dot(V) # Line 6\n\n # undo normalisation\n y_cov = y_cov * self._y_train_std**2\n\n return y_mean, y_cov\n elif return_std:\n # Solve K @ V = K_trans.T\n V = cho_solve((self.L_, True), K_trans.T) # Line 5\n\n # Compute variance of predictive distribution\n # Use einsum to avoid explicitly forming the large matrix\n # K_trans @ V just to extract its diagonal afterward.\n y_var = self.kernel_.diag(X)\n y_var -= np.einsum(\"ij,ji->i\", K_trans, V)\n\n # Check if any of the variances is negative because of\n # numerical issues. If yes: set the variance to 0.\n y_var_negative = y_var < 0\n if np.any(y_var_negative):\n warnings.warn(\"Predicted variances smaller than 0. \"\n \"Setting those variances to 0.\")\n y_var[y_var_negative] = 0.0\n\n # undo normalisation\n y_var = y_var * self._y_train_std**2\n\n return y_mean, np.sqrt(y_var)\n else:\n return y_mean\n\n def sample_y(self, X, n_samples=1, random_state=0):\n \"\"\"Draw samples from Gaussian process and evaluate at X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated.\n\n n_samples : int, default=1\n The number of samples drawn from the Gaussian process\n\n random_state : int, RandomState instance or None, default=0\n Determines random number generation to randomly draw samples.\n Pass an int for reproducible results across multiple function\n calls.\n See :term: `Glossary `.\n\n Returns\n -------\n y_samples : ndarray of shape (n_samples_X, [n_output_dims], n_samples)\n Values of n_samples samples drawn from Gaussian process and\n evaluated at query points.\n \"\"\"\n rng = check_random_state(random_state)\n\n y_mean, y_cov = self.predict(X, return_cov=True)\n if y_mean.ndim == 1:\n y_samples = rng.multivariate_normal(y_mean, y_cov, n_samples).T\n else:\n y_samples = \\\n [rng.multivariate_normal(y_mean[:, i], y_cov,\n n_samples).T[:, np.newaxis]\n for i in range(y_mean.shape[1])]\n y_samples = np.hstack(y_samples)\n return y_samples\n\n def log_marginal_likelihood(self, theta=None, eval_gradient=False,\n clone_kernel=True):\n \"\"\"Returns log-marginal likelihood of theta for training data.\n\n Parameters\n ----------\n theta : array-like of shape (n_kernel_params,) default=None\n Kernel hyperparameters for which the log-marginal likelihood is\n evaluated. If None, the precomputed log_marginal_likelihood\n of ``self.kernel_.theta`` is returned.\n\n eval_gradient : bool, default=False\n If True, the gradient of the log-marginal likelihood with respect\n to the kernel hyperparameters at position theta is returned\n additionally. If True, theta must not be None.\n\n clone_kernel : bool, default=True\n If True, the kernel attribute is copied. If False, the kernel\n attribute is modified, but may result in a performance improvement.\n\n Returns\n -------\n log_likelihood : float\n Log-marginal likelihood of theta for training data.\n\n log_likelihood_gradient : ndarray of shape (n_kernel_params,), optional\n Gradient of the log-marginal likelihood with respect to the kernel\n hyperparameters at position theta.\n Only returned when eval_gradient is True.\n \"\"\"\n if theta is None:\n if eval_gradient:\n raise ValueError(\n \"Gradient can only be evaluated for theta!=None\")\n return self.log_marginal_likelihood_value_\n\n if clone_kernel:\n kernel = self.kernel_.clone_with_theta(theta)\n else:\n kernel = self.kernel_\n kernel.theta = theta\n\n if eval_gradient:\n K, K_gradient = kernel(self.X_train_, eval_gradient=True)\n else:\n K = kernel(self.X_train_)\n\n K[np.diag_indices_from(K)] += self.alpha\n try:\n L = cholesky(K, lower=True) # Line 2\n except np.linalg.LinAlgError:\n return (-np.inf, np.zeros_like(theta)) \\\n if eval_gradient else -np.inf\n\n # Support multi-dimensional output of self.y_train_\n y_train = self.y_train_\n if y_train.ndim == 1:\n y_train = y_train[:, np.newaxis]\n\n alpha = cho_solve((L, True), y_train) # Line 3\n\n # Compute log-likelihood (compare line 7)\n log_likelihood_dims = -0.5 * np.einsum(\"ik,ik->k\", y_train, alpha)\n log_likelihood_dims -= np.log(np.diag(L)).sum()\n log_likelihood_dims -= K.shape[0] / 2 * np.log(2 * np.pi)\n log_likelihood = log_likelihood_dims.sum(-1) # sum over dimensions\n\n if eval_gradient: # compare Equation 5.9 from GPML\n tmp = np.einsum(\"ik,jk->ijk\", alpha, alpha) # k: output-dimension\n tmp -= cho_solve((L, True), np.eye(K.shape[0]))[:, :, np.newaxis]\n # Compute \"0.5 * trace(tmp.dot(K_gradient))\" without\n # constructing the full matrix tmp.dot(K_gradient) since only\n # its diagonal is required\n log_likelihood_gradient_dims = \\\n 0.5 * np.einsum(\"ijl,jik->kl\", tmp, K_gradient)\n log_likelihood_gradient = log_likelihood_gradient_dims.sum(-1)\n\n if eval_gradient:\n return log_likelihood, log_likelihood_gradient\n else:\n return log_likelihood\n\n def _constrained_optimization(self, obj_func, initial_theta, bounds):\n if self.optimizer == \"fmin_l_bfgs_b\":\n opt_res = scipy.optimize.minimize(\n obj_func, initial_theta, method=\"L-BFGS-B\", jac=True,\n bounds=bounds)\n _check_optimize_result(\"lbfgs\", opt_res)\n theta_opt, func_min = opt_res.x, opt_res.fun\n elif callable(self.optimizer):\n theta_opt, func_min = \\\n self.optimizer(obj_func, initial_theta, bounds=bounds)\n else:\n raise ValueError(\"Unknown optimizer %s.\" % self.optimizer)\n\n return theta_opt, func_min\n\n def _more_tags(self):\n return {'requires_fit': False}", + "instance_attributes": [ + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "optimizer", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "n_restarts_optimizer", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "normalize_y", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "copy_X_train", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "kernel_", + "types": { + "kind": "NamedType", + "name": "Product" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel", + "name": "CompoundKernel", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel", + "decorators": [], + "superclasses": ["Kernel"], + "methods": [ + "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/__init__", + "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/get_params", + "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/theta@getter", + "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/theta@setter", + "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/bounds@getter", + "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/__call__", + "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/__eq__", + "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/is_stationary", + "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/requires_vector_input@getter", + "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/diag" + ], + "is_public": true, + "reexported_by": [], + "description": "Kernel which is composed of a set of other kernels.\n\n.. versionadded:: 0.18", + "docstring": "Kernel which is composed of a set of other kernels.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nkernels : list of Kernels\n The other kernels\n\nExamples\n--------\n>>> from sklearn.gaussian_process.kernels import WhiteKernel\n>>> from sklearn.gaussian_process.kernels import RBF\n>>> from sklearn.gaussian_process.kernels import CompoundKernel\n>>> kernel = CompoundKernel(\n... [WhiteKernel(noise_level=3.0), RBF(length_scale=2.0)])\n>>> print(kernel.bounds)\n[[-11.51292546 11.51292546]\n [-11.51292546 11.51292546]]\n>>> print(kernel.n_dims)\n2\n>>> print(kernel.theta)\n[1.09861229 0.69314718]", + "code": "class CompoundKernel(Kernel):\n \"\"\"Kernel which is composed of a set of other kernels.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n kernels : list of Kernels\n The other kernels\n\n Examples\n --------\n >>> from sklearn.gaussian_process.kernels import WhiteKernel\n >>> from sklearn.gaussian_process.kernels import RBF\n >>> from sklearn.gaussian_process.kernels import CompoundKernel\n >>> kernel = CompoundKernel(\n ... [WhiteKernel(noise_level=3.0), RBF(length_scale=2.0)])\n >>> print(kernel.bounds)\n [[-11.51292546 11.51292546]\n [-11.51292546 11.51292546]]\n >>> print(kernel.n_dims)\n 2\n >>> print(kernel.theta)\n [1.09861229 0.69314718]\n \"\"\"\n\n def __init__(self, kernels):\n self.kernels = kernels\n\n def get_params(self, deep=True):\n \"\"\"Get parameters of this kernel.\n\n Parameters\n ----------\n deep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\n Returns\n -------\n params : dict\n Parameter names mapped to their values.\n \"\"\"\n return dict(kernels=self.kernels)\n\n @property\n def theta(self):\n \"\"\"Returns the (flattened, log-transformed) non-fixed hyperparameters.\n\n Note that theta are typically the log-transformed values of the\n kernel's hyperparameters as this representation of the search space\n is more amenable for hyperparameter search, as hyperparameters like\n length-scales naturally live on a log-scale.\n\n Returns\n -------\n theta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel\n \"\"\"\n return np.hstack([kernel.theta for kernel in self.kernels])\n\n @theta.setter\n def theta(self, theta):\n \"\"\"Sets the (flattened, log-transformed) non-fixed hyperparameters.\n\n Parameters\n ----------\n theta : array of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel\n \"\"\"\n k_dims = self.k1.n_dims\n for i, kernel in enumerate(self.kernels):\n kernel.theta = theta[i * k_dims:(i + 1) * k_dims]\n\n @property\n def bounds(self):\n \"\"\"Returns the log-transformed bounds on the theta.\n\n Returns\n -------\n bounds : array of shape (n_dims, 2)\n The log-transformed bounds on the kernel's hyperparameters theta\n \"\"\"\n return np.vstack([kernel.bounds for kernel in self.kernels])\n\n def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n Note that this compound kernel returns the results of all simple kernel\n stacked along an additional axis.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features) or list of object, \\\n default=None\n Left argument of the returned kernel k(X, Y)\n\n Y : array-like of shape (n_samples_X, n_features) or list of object, \\\n default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\n eval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of the\n kernel hyperparameter is computed.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_Y, n_kernels)\n Kernel k(X, Y)\n\n K_gradient : ndarray of shape \\\n (n_samples_X, n_samples_X, n_dims, n_kernels), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.\n \"\"\"\n if eval_gradient:\n K = []\n K_grad = []\n for kernel in self.kernels:\n K_single, K_grad_single = kernel(X, Y, eval_gradient)\n K.append(K_single)\n K_grad.append(K_grad_single[..., np.newaxis])\n return np.dstack(K), np.concatenate(K_grad, 3)\n else:\n return np.dstack([kernel(X, Y, eval_gradient)\n for kernel in self.kernels])\n\n def __eq__(self, b):\n if type(self) != type(b) or len(self.kernels) != len(b.kernels):\n return False\n return np.all([self.kernels[i] == b.kernels[i]\n for i in range(len(self.kernels))])\n\n def is_stationary(self):\n \"\"\"Returns whether the kernel is stationary. \"\"\"\n return np.all([kernel.is_stationary() for kernel in self.kernels])\n\n @property\n def requires_vector_input(self):\n \"\"\"Returns whether the kernel is defined on discrete structures. \"\"\"\n return np.any([kernel.requires_vector_input\n for kernel in self.kernels])\n\n def diag(self, X):\n \"\"\"Returns the diagonal of the kernel k(X, X).\n\n The result of this method is identical to `np.diag(self(X))`; however,\n it can be evaluated more efficiently since only the diagonal is\n evaluated.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\n Returns\n -------\n K_diag : ndarray of shape (n_samples_X, n_kernels)\n Diagonal of kernel k(X, X)\n \"\"\"\n return np.vstack([kernel.diag(X) for kernel in self.kernels]).T", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ConstantKernel", + "name": "ConstantKernel", + "qname": "sklearn.gaussian_process.kernels.ConstantKernel", + "decorators": [], + "superclasses": ["StationaryKernelMixin", "GenericKernelMixin", "Kernel"], + "methods": [ + "scikit-learn/sklearn.gaussian_process.kernels/ConstantKernel/__init__", + "scikit-learn/sklearn.gaussian_process.kernels/ConstantKernel/hyperparameter_constant_value@getter", + "scikit-learn/sklearn.gaussian_process.kernels/ConstantKernel/__call__", + "scikit-learn/sklearn.gaussian_process.kernels/ConstantKernel/diag", + "scikit-learn/sklearn.gaussian_process.kernels/ConstantKernel/__repr__" + ], + "is_public": true, + "reexported_by": [], + "description": "Constant kernel.\n\nCan be used as part of a product-kernel where it scales the magnitude of\nthe other factor (kernel) or as part of a sum-kernel, where it modifies\nthe mean of the Gaussian process.\n\n.. math::\n k(x_1, x_2) = constant\\_value \\;\\forall\\; x_1, x_2\n\nAdding a constant kernel is equivalent to adding a constant::\n\n kernel = RBF() + ConstantKernel(constant_value=2)\n\nis the same as::\n\n kernel = RBF() + 2\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "Constant kernel.\n\nCan be used as part of a product-kernel where it scales the magnitude of\nthe other factor (kernel) or as part of a sum-kernel, where it modifies\nthe mean of the Gaussian process.\n\n.. math::\n k(x_1, x_2) = constant\\_value \\;\\forall\\; x_1, x_2\n\nAdding a constant kernel is equivalent to adding a constant::\n\n kernel = RBF() + ConstantKernel(constant_value=2)\n\nis the same as::\n\n kernel = RBF() + 2\n\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nconstant_value : float, default=1.0\n The constant value which defines the covariance:\n k(x_1, x_2) = constant_value\n\nconstant_value_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on `constant_value`.\n If set to \"fixed\", `constant_value` cannot be changed during\n hyperparameter tuning.\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import RBF, ConstantKernel\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = RBF() + ConstantKernel(constant_value=2)\n>>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5,\n... random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.3696...\n>>> gpr.predict(X[:1,:], return_std=True)\n(array([606.1...]), array([0.24...]))", + "code": "class ConstantKernel(StationaryKernelMixin, GenericKernelMixin,\n Kernel):\n \"\"\"Constant kernel.\n\n Can be used as part of a product-kernel where it scales the magnitude of\n the other factor (kernel) or as part of a sum-kernel, where it modifies\n the mean of the Gaussian process.\n\n .. math::\n k(x_1, x_2) = constant\\\\_value \\\\;\\\\forall\\\\; x_1, x_2\n\n Adding a constant kernel is equivalent to adding a constant::\n\n kernel = RBF() + ConstantKernel(constant_value=2)\n\n is the same as::\n\n kernel = RBF() + 2\n\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n constant_value : float, default=1.0\n The constant value which defines the covariance:\n k(x_1, x_2) = constant_value\n\n constant_value_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on `constant_value`.\n If set to \"fixed\", `constant_value` cannot be changed during\n hyperparameter tuning.\n\n Examples\n --------\n >>> from sklearn.datasets import make_friedman2\n >>> from sklearn.gaussian_process import GaussianProcessRegressor\n >>> from sklearn.gaussian_process.kernels import RBF, ConstantKernel\n >>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n >>> kernel = RBF() + ConstantKernel(constant_value=2)\n >>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5,\n ... random_state=0).fit(X, y)\n >>> gpr.score(X, y)\n 0.3696...\n >>> gpr.predict(X[:1,:], return_std=True)\n (array([606.1...]), array([0.24...]))\n \"\"\"\n\n def __init__(self, constant_value=1.0, constant_value_bounds=(1e-5, 1e5)):\n self.constant_value = constant_value\n self.constant_value_bounds = constant_value_bounds\n\n @property\n def hyperparameter_constant_value(self):\n return Hyperparameter(\n \"constant_value\", \"numeric\", self.constant_value_bounds)\n\n def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\n Y : array-like of shape (n_samples_X, n_features) or list of object, \\\n default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\n eval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\n K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \\\n optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when eval_gradient\n is True.\n \"\"\"\n if Y is None:\n Y = X\n elif eval_gradient:\n raise ValueError(\"Gradient can only be evaluated when Y is None.\")\n\n K = np.full((_num_samples(X), _num_samples(Y)), self.constant_value,\n dtype=np.array(self.constant_value).dtype)\n if eval_gradient:\n if not self.hyperparameter_constant_value.fixed:\n return (K, np.full((_num_samples(X), _num_samples(X), 1),\n self.constant_value,\n dtype=np.array(self.constant_value).dtype))\n else:\n return K, np.empty((_num_samples(X), _num_samples(X), 0))\n else:\n return K\n\n def diag(self, X):\n \"\"\"Returns the diagonal of the kernel k(X, X).\n\n The result of this method is identical to np.diag(self(X)); however,\n it can be evaluated more efficiently since only the diagonal is\n evaluated.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\n Returns\n -------\n K_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)\n \"\"\"\n return np.full(_num_samples(X), self.constant_value,\n dtype=np.array(self.constant_value).dtype)\n\n def __repr__(self):\n return \"{0:.3g}**2\".format(np.sqrt(self.constant_value))", + "instance_attributes": [ + { + "name": "constant_value", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "constant_value_bounds", + "types": { + "kind": "NamedType", + "name": "tuple" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/DotProduct", + "name": "DotProduct", + "qname": "sklearn.gaussian_process.kernels.DotProduct", + "decorators": [], + "superclasses": ["Kernel"], + "methods": [ + "scikit-learn/sklearn.gaussian_process.kernels/DotProduct/__init__", + "scikit-learn/sklearn.gaussian_process.kernels/DotProduct/hyperparameter_sigma_0@getter", + "scikit-learn/sklearn.gaussian_process.kernels/DotProduct/__call__", + "scikit-learn/sklearn.gaussian_process.kernels/DotProduct/diag", + "scikit-learn/sklearn.gaussian_process.kernels/DotProduct/is_stationary", + "scikit-learn/sklearn.gaussian_process.kernels/DotProduct/__repr__" + ], + "is_public": true, + "reexported_by": [], + "description": "Dot-Product kernel.\n\nThe DotProduct kernel is non-stationary and can be obtained from linear\nregression by putting :math:`N(0, 1)` priors on the coefficients\nof :math:`x_d (d = 1, . . . , D)` and a prior of :math:`N(0, \\sigma_0^2)`\non the bias. The DotProduct kernel is invariant to a rotation of\nthe coordinates about the origin, but not translations.\nIt is parameterized by a parameter sigma_0 :math:`\\sigma`\nwhich controls the inhomogenity of the kernel. For :math:`\\sigma_0^2 =0`,\nthe kernel is called the homogeneous linear kernel, otherwise\nit is inhomogeneous. The kernel is given by\n\n.. math::\n k(x_i, x_j) = \\sigma_0 ^ 2 + x_i \\cdot x_j\n\nThe DotProduct kernel is commonly combined with exponentiation.\n\nSee [1]_, Chapter 4, Section 4.2, for further details regarding the\nDotProduct kernel.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "Dot-Product kernel.\n\nThe DotProduct kernel is non-stationary and can be obtained from linear\nregression by putting :math:`N(0, 1)` priors on the coefficients\nof :math:`x_d (d = 1, . . . , D)` and a prior of :math:`N(0, \\sigma_0^2)`\non the bias. The DotProduct kernel is invariant to a rotation of\nthe coordinates about the origin, but not translations.\nIt is parameterized by a parameter sigma_0 :math:`\\sigma`\nwhich controls the inhomogenity of the kernel. For :math:`\\sigma_0^2 =0`,\nthe kernel is called the homogeneous linear kernel, otherwise\nit is inhomogeneous. The kernel is given by\n\n.. math::\n k(x_i, x_j) = \\sigma_0 ^ 2 + x_i \\cdot x_j\n\nThe DotProduct kernel is commonly combined with exponentiation.\n\nSee [1]_, Chapter 4, Section 4.2, for further details regarding the\nDotProduct kernel.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nsigma_0 : float >= 0, default=1.0\n Parameter controlling the inhomogenity of the kernel. If sigma_0=0,\n the kernel is homogenous.\n\nsigma_0_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'sigma_0'.\n If set to \"fixed\", 'sigma_0' cannot be changed during\n hyperparameter tuning.\n\nReferences\n----------\n.. [1] `Carl Edward Rasmussen, Christopher K. I. Williams (2006).\n \"Gaussian Processes for Machine Learning\". The MIT Press.\n `_\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = DotProduct() + WhiteKernel()\n>>> gpr = GaussianProcessRegressor(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.3680...\n>>> gpr.predict(X[:2,:], return_std=True)\n(array([653.0..., 592.1...]), array([316.6..., 316.6...]))", + "code": "class DotProduct(Kernel):\n r\"\"\"Dot-Product kernel.\n\n The DotProduct kernel is non-stationary and can be obtained from linear\n regression by putting :math:`N(0, 1)` priors on the coefficients\n of :math:`x_d (d = 1, . . . , D)` and a prior of :math:`N(0, \\sigma_0^2)`\n on the bias. The DotProduct kernel is invariant to a rotation of\n the coordinates about the origin, but not translations.\n It is parameterized by a parameter sigma_0 :math:`\\sigma`\n which controls the inhomogenity of the kernel. For :math:`\\sigma_0^2 =0`,\n the kernel is called the homogeneous linear kernel, otherwise\n it is inhomogeneous. The kernel is given by\n\n .. math::\n k(x_i, x_j) = \\sigma_0 ^ 2 + x_i \\cdot x_j\n\n The DotProduct kernel is commonly combined with exponentiation.\n\n See [1]_, Chapter 4, Section 4.2, for further details regarding the\n DotProduct kernel.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n sigma_0 : float >= 0, default=1.0\n Parameter controlling the inhomogenity of the kernel. If sigma_0=0,\n the kernel is homogenous.\n\n sigma_0_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'sigma_0'.\n If set to \"fixed\", 'sigma_0' cannot be changed during\n hyperparameter tuning.\n\n References\n ----------\n .. [1] `Carl Edward Rasmussen, Christopher K. I. Williams (2006).\n \"Gaussian Processes for Machine Learning\". The MIT Press.\n `_\n\n Examples\n --------\n >>> from sklearn.datasets import make_friedman2\n >>> from sklearn.gaussian_process import GaussianProcessRegressor\n >>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel\n >>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n >>> kernel = DotProduct() + WhiteKernel()\n >>> gpr = GaussianProcessRegressor(kernel=kernel,\n ... random_state=0).fit(X, y)\n >>> gpr.score(X, y)\n 0.3680...\n >>> gpr.predict(X[:2,:], return_std=True)\n (array([653.0..., 592.1...]), array([316.6..., 316.6...]))\n \"\"\"\n def __init__(self, sigma_0=1.0, sigma_0_bounds=(1e-5, 1e5)):\n self.sigma_0 = sigma_0\n self.sigma_0_bounds = sigma_0_bounds\n\n @property\n def hyperparameter_sigma_0(self):\n return Hyperparameter(\"sigma_0\", \"numeric\", self.sigma_0_bounds)\n\n def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\n Y : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\n eval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\n K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\\\n optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.\n \"\"\"\n X = np.atleast_2d(X)\n if Y is None:\n K = np.inner(X, X) + self.sigma_0 ** 2\n else:\n if eval_gradient:\n raise ValueError(\n \"Gradient can only be evaluated when Y is None.\")\n K = np.inner(X, Y) + self.sigma_0 ** 2\n\n if eval_gradient:\n if not self.hyperparameter_sigma_0.fixed:\n K_gradient = np.empty((K.shape[0], K.shape[1], 1))\n K_gradient[..., 0] = 2 * self.sigma_0 ** 2\n return K, K_gradient\n else:\n return K, np.empty((X.shape[0], X.shape[0], 0))\n else:\n return K\n\n def diag(self, X):\n \"\"\"Returns the diagonal of the kernel k(X, X).\n\n The result of this method is identical to np.diag(self(X)); however,\n it can be evaluated more efficiently since only the diagonal is\n evaluated.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y).\n\n Returns\n -------\n K_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X).\n \"\"\"\n return np.einsum('ij,ij->i', X, X) + self.sigma_0 ** 2\n\n def is_stationary(self):\n \"\"\"Returns whether the kernel is stationary. \"\"\"\n return False\n\n def __repr__(self):\n return \"{0}(sigma_0={1:.3g})\".format(\n self.__class__.__name__, self.sigma_0)", + "instance_attributes": [ + { + "name": "sigma_0", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "sigma_0_bounds", + "types": { + "kind": "NamedType", + "name": "tuple" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ExpSineSquared", + "name": "ExpSineSquared", + "qname": "sklearn.gaussian_process.kernels.ExpSineSquared", + "decorators": [], + "superclasses": ["StationaryKernelMixin", "NormalizedKernelMixin", "Kernel"], + "methods": [ + "scikit-learn/sklearn.gaussian_process.kernels/ExpSineSquared/__init__", + "scikit-learn/sklearn.gaussian_process.kernels/ExpSineSquared/hyperparameter_length_scale@getter", + "scikit-learn/sklearn.gaussian_process.kernels/ExpSineSquared/hyperparameter_periodicity@getter", + "scikit-learn/sklearn.gaussian_process.kernels/ExpSineSquared/__call__", + "scikit-learn/sklearn.gaussian_process.kernels/ExpSineSquared/__repr__" + ], + "is_public": true, + "reexported_by": [], + "description": "Exp-Sine-Squared kernel (aka periodic kernel).\n\nThe ExpSineSquared kernel allows one to model functions which repeat\nthemselves exactly. It is parameterized by a length scale\nparameter :math:`l>0` and a periodicity parameter :math:`p>0`.\nOnly the isotropic variant where :math:`l` is a scalar is\nsupported at the moment. The kernel is given by:\n\n.. math::\n k(x_i, x_j) = \\text{exp}\\left(-\n \\frac{ 2\\sin^2(\\pi d(x_i, x_j)/p) }{ l^ 2} \\right)\n\nwhere :math:`l` is the length scale of the kernel, :math:`p` the\nperiodicity of the kernel and :math:`d(\\\\cdot,\\\\cdot)` is the\nEuclidean distance.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "Exp-Sine-Squared kernel (aka periodic kernel).\n\nThe ExpSineSquared kernel allows one to model functions which repeat\nthemselves exactly. It is parameterized by a length scale\nparameter :math:`l>0` and a periodicity parameter :math:`p>0`.\nOnly the isotropic variant where :math:`l` is a scalar is\nsupported at the moment. The kernel is given by:\n\n.. math::\n k(x_i, x_j) = \\text{exp}\\left(-\n \\frac{ 2\\sin^2(\\pi d(x_i, x_j)/p) }{ l^ 2} \\right)\n\nwhere :math:`l` is the length scale of the kernel, :math:`p` the\nperiodicity of the kernel and :math:`d(\\\\cdot,\\\\cdot)` is the\nEuclidean distance.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\n\nlength_scale : float > 0, default=1.0\n The length scale of the kernel.\n\nperiodicity : float > 0, default=1.0\n The periodicity of the kernel.\n\nlength_scale_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'length_scale'.\n If set to \"fixed\", 'length_scale' cannot be changed during\n hyperparameter tuning.\n\nperiodicity_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'periodicity'.\n If set to \"fixed\", 'periodicity' cannot be changed during\n hyperparameter tuning.\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import ExpSineSquared\n>>> X, y = make_friedman2(n_samples=50, noise=0, random_state=0)\n>>> kernel = ExpSineSquared(length_scale=1, periodicity=1)\n>>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5,\n... random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.0144...\n>>> gpr.predict(X[:2,:], return_std=True)\n(array([425.6..., 457.5...]), array([0.3894..., 0.3467...]))", + "code": "class ExpSineSquared(StationaryKernelMixin, NormalizedKernelMixin, Kernel):\n r\"\"\"Exp-Sine-Squared kernel (aka periodic kernel).\n\n The ExpSineSquared kernel allows one to model functions which repeat\n themselves exactly. It is parameterized by a length scale\n parameter :math:`l>0` and a periodicity parameter :math:`p>0`.\n Only the isotropic variant where :math:`l` is a scalar is\n supported at the moment. The kernel is given by:\n\n .. math::\n k(x_i, x_j) = \\text{exp}\\left(-\n \\frac{ 2\\sin^2(\\pi d(x_i, x_j)/p) }{ l^ 2} \\right)\n\n where :math:`l` is the length scale of the kernel, :math:`p` the\n periodicity of the kernel and :math:`d(\\\\cdot,\\\\cdot)` is the\n Euclidean distance.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n\n length_scale : float > 0, default=1.0\n The length scale of the kernel.\n\n periodicity : float > 0, default=1.0\n The periodicity of the kernel.\n\n length_scale_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'length_scale'.\n If set to \"fixed\", 'length_scale' cannot be changed during\n hyperparameter tuning.\n\n periodicity_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'periodicity'.\n If set to \"fixed\", 'periodicity' cannot be changed during\n hyperparameter tuning.\n\n Examples\n --------\n >>> from sklearn.datasets import make_friedman2\n >>> from sklearn.gaussian_process import GaussianProcessRegressor\n >>> from sklearn.gaussian_process.kernels import ExpSineSquared\n >>> X, y = make_friedman2(n_samples=50, noise=0, random_state=0)\n >>> kernel = ExpSineSquared(length_scale=1, periodicity=1)\n >>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5,\n ... random_state=0).fit(X, y)\n >>> gpr.score(X, y)\n 0.0144...\n >>> gpr.predict(X[:2,:], return_std=True)\n (array([425.6..., 457.5...]), array([0.3894..., 0.3467...]))\n \"\"\"\n def __init__(self, length_scale=1.0, periodicity=1.0,\n length_scale_bounds=(1e-5, 1e5),\n periodicity_bounds=(1e-5, 1e5)):\n self.length_scale = length_scale\n self.periodicity = periodicity\n self.length_scale_bounds = length_scale_bounds\n self.periodicity_bounds = periodicity_bounds\n\n @property\n def hyperparameter_length_scale(self):\n \"\"\"Returns the length scale\"\"\"\n return Hyperparameter(\n \"length_scale\", \"numeric\", self.length_scale_bounds)\n\n @property\n def hyperparameter_periodicity(self):\n return Hyperparameter(\n \"periodicity\", \"numeric\", self.periodicity_bounds)\n\n def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\n Y : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\n eval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\n K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \\\n optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.\n \"\"\"\n X = np.atleast_2d(X)\n if Y is None:\n dists = squareform(pdist(X, metric='euclidean'))\n arg = np.pi * dists / self.periodicity\n sin_of_arg = np.sin(arg)\n K = np.exp(- 2 * (sin_of_arg / self.length_scale) ** 2)\n else:\n if eval_gradient:\n raise ValueError(\n \"Gradient can only be evaluated when Y is None.\")\n dists = cdist(X, Y, metric='euclidean')\n K = np.exp(- 2 * (np.sin(np.pi / self.periodicity * dists)\n / self.length_scale) ** 2)\n\n if eval_gradient:\n cos_of_arg = np.cos(arg)\n # gradient with respect to length_scale\n if not self.hyperparameter_length_scale.fixed:\n length_scale_gradient = \\\n 4 / self.length_scale**2 * sin_of_arg**2 * K\n length_scale_gradient = length_scale_gradient[:, :, np.newaxis]\n else: # length_scale is kept fixed\n length_scale_gradient = np.empty((K.shape[0], K.shape[1], 0))\n # gradient with respect to p\n if not self.hyperparameter_periodicity.fixed:\n periodicity_gradient = \\\n 4 * arg / self.length_scale**2 * cos_of_arg \\\n * sin_of_arg * K\n periodicity_gradient = periodicity_gradient[:, :, np.newaxis]\n else: # p is kept fixed\n periodicity_gradient = np.empty((K.shape[0], K.shape[1], 0))\n\n return K, np.dstack((length_scale_gradient, periodicity_gradient))\n else:\n return K\n\n def __repr__(self):\n return \"{0}(length_scale={1:.3g}, periodicity={2:.3g})\".format(\n self.__class__.__name__, self.length_scale, self.periodicity)", + "instance_attributes": [ + { + "name": "length_scale", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "periodicity", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "length_scale_bounds", + "types": { + "kind": "NamedType", + "name": "tuple" + } + }, + { + "name": "periodicity_bounds", + "types": { + "kind": "NamedType", + "name": "tuple" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation", + "name": "Exponentiation", + "qname": "sklearn.gaussian_process.kernels.Exponentiation", + "decorators": [], + "superclasses": ["Kernel"], + "methods": [ + "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/__init__", + "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/get_params", + "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/hyperparameters@getter", + "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/theta@getter", + "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/theta@setter", + "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/bounds@getter", + "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/__eq__", + "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/__call__", + "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/diag", + "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/__repr__", + "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/is_stationary", + "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/requires_vector_input@getter" + ], + "is_public": true, + "reexported_by": [], + "description": "The Exponentiation kernel takes one base kernel and a scalar parameter\n:math:`p` and combines them via\n\n.. math::\n k_{exp}(X, Y) = k(X, Y) ^p\n\nNote that the `__pow__` magic method is overridden, so\n`Exponentiation(RBF(), 2)` is equivalent to using the ** operator\nwith `RBF() ** 2`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "The Exponentiation kernel takes one base kernel and a scalar parameter\n:math:`p` and combines them via\n\n.. math::\n k_{exp}(X, Y) = k(X, Y) ^p\n\nNote that the `__pow__` magic method is overridden, so\n`Exponentiation(RBF(), 2)` is equivalent to using the ** operator\nwith `RBF() ** 2`.\n\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nkernel : Kernel\n The base kernel\n\nexponent : float\n The exponent for the base kernel\n\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import (RationalQuadratic,\n... Exponentiation)\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = Exponentiation(RationalQuadratic(), exponent=2)\n>>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5,\n... random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.419...\n>>> gpr.predict(X[:1,:], return_std=True)\n(array([635.5...]), array([0.559...]))", + "code": "class Exponentiation(Kernel):\n \"\"\"The Exponentiation kernel takes one base kernel and a scalar parameter\n :math:`p` and combines them via\n\n .. math::\n k_{exp}(X, Y) = k(X, Y) ^p\n\n Note that the `__pow__` magic method is overridden, so\n `Exponentiation(RBF(), 2)` is equivalent to using the ** operator\n with `RBF() ** 2`.\n\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n kernel : Kernel\n The base kernel\n\n exponent : float\n The exponent for the base kernel\n\n\n Examples\n --------\n >>> from sklearn.datasets import make_friedman2\n >>> from sklearn.gaussian_process import GaussianProcessRegressor\n >>> from sklearn.gaussian_process.kernels import (RationalQuadratic,\n ... Exponentiation)\n >>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n >>> kernel = Exponentiation(RationalQuadratic(), exponent=2)\n >>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5,\n ... random_state=0).fit(X, y)\n >>> gpr.score(X, y)\n 0.419...\n >>> gpr.predict(X[:1,:], return_std=True)\n (array([635.5...]), array([0.559...]))\n \"\"\"\n\n def __init__(self, kernel, exponent):\n self.kernel = kernel\n self.exponent = exponent\n\n def get_params(self, deep=True):\n \"\"\"Get parameters of this kernel.\n\n Parameters\n ----------\n deep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\n Returns\n -------\n params : dict\n Parameter names mapped to their values.\n \"\"\"\n params = dict(kernel=self.kernel, exponent=self.exponent)\n if deep:\n deep_items = self.kernel.get_params().items()\n params.update(('kernel__' + k, val) for k, val in deep_items)\n return params\n\n @property\n def hyperparameters(self):\n \"\"\"Returns a list of all hyperparameter.\"\"\"\n r = []\n for hyperparameter in self.kernel.hyperparameters:\n r.append(Hyperparameter(\"kernel__\" + hyperparameter.name,\n hyperparameter.value_type,\n hyperparameter.bounds,\n hyperparameter.n_elements))\n return r\n\n @property\n def theta(self):\n \"\"\"Returns the (flattened, log-transformed) non-fixed hyperparameters.\n\n Note that theta are typically the log-transformed values of the\n kernel's hyperparameters as this representation of the search space\n is more amenable for hyperparameter search, as hyperparameters like\n length-scales naturally live on a log-scale.\n\n Returns\n -------\n theta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel\n \"\"\"\n return self.kernel.theta\n\n @theta.setter\n def theta(self, theta):\n \"\"\"Sets the (flattened, log-transformed) non-fixed hyperparameters.\n\n Parameters\n ----------\n theta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel\n \"\"\"\n self.kernel.theta = theta\n\n @property\n def bounds(self):\n \"\"\"Returns the log-transformed bounds on the theta.\n\n Returns\n -------\n bounds : ndarray of shape (n_dims, 2)\n The log-transformed bounds on the kernel's hyperparameters theta\n \"\"\"\n return self.kernel.bounds\n\n def __eq__(self, b):\n if type(self) != type(b):\n return False\n return (self.kernel == b.kernel and self.exponent == b.exponent)\n\n def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\n Y : array-like of shape (n_samples_Y, n_features) or list of object,\\\n default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\n eval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\n K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\\\n optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.\n \"\"\"\n if eval_gradient:\n K, K_gradient = self.kernel(X, Y, eval_gradient=True)\n K_gradient *= \\\n self.exponent * K[:, :, np.newaxis] ** (self.exponent - 1)\n return K ** self.exponent, K_gradient\n else:\n K = self.kernel(X, Y, eval_gradient=False)\n return K ** self.exponent\n\n def diag(self, X):\n \"\"\"Returns the diagonal of the kernel k(X, X).\n\n The result of this method is identical to np.diag(self(X)); however,\n it can be evaluated more efficiently since only the diagonal is\n evaluated.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\n Returns\n -------\n K_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)\n \"\"\"\n return self.kernel.diag(X) ** self.exponent\n\n def __repr__(self):\n return \"{0} ** {1}\".format(self.kernel, self.exponent)\n\n def is_stationary(self):\n \"\"\"Returns whether the kernel is stationary. \"\"\"\n return self.kernel.is_stationary()\n\n @property\n def requires_vector_input(self):\n \"\"\"Returns whether the kernel is defined on discrete structures. \"\"\"\n return self.kernel.requires_vector_input", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/GenericKernelMixin", + "name": "GenericKernelMixin", + "qname": "sklearn.gaussian_process.kernels.GenericKernelMixin", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.gaussian_process.kernels/GenericKernelMixin/requires_vector_input@getter" + ], + "is_public": true, + "reexported_by": [], + "description": "Mixin for kernels which operate on generic objects such as variable-\nlength sequences, trees, and graphs.\n\n.. versionadded:: 0.22", + "docstring": "Mixin for kernels which operate on generic objects such as variable-\nlength sequences, trees, and graphs.\n\n.. versionadded:: 0.22", + "code": "class GenericKernelMixin:\n \"\"\"Mixin for kernels which operate on generic objects such as variable-\n length sequences, trees, and graphs.\n\n .. versionadded:: 0.22\n \"\"\"\n\n @property\n def requires_vector_input(self):\n \"\"\"Whether the kernel works only on fixed-length feature vectors.\"\"\"\n return False", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Hyperparameter", + "name": "Hyperparameter", + "qname": "sklearn.gaussian_process.kernels.Hyperparameter", + "decorators": [], + "superclasses": ["namedtuple('Hyperparameter', ('name', 'value_type', 'bounds', 'n_elements', 'fixed'))"], + "methods": [ + "scikit-learn/sklearn.gaussian_process.kernels/Hyperparameter/__new__", + "scikit-learn/sklearn.gaussian_process.kernels/Hyperparameter/__eq__" + ], + "is_public": true, + "reexported_by": [], + "description": "A kernel hyperparameter's specification in form of a namedtuple.\n\n.. versionadded:: 0.18", + "docstring": "A kernel hyperparameter's specification in form of a namedtuple.\n\n.. versionadded:: 0.18\n\nAttributes\n----------\nname : str\n The name of the hyperparameter. Note that a kernel using a\n hyperparameter with name \"x\" must have the attributes self.x and\n self.x_bounds\n\nvalue_type : str\n The type of the hyperparameter. Currently, only \"numeric\"\n hyperparameters are supported.\n\nbounds : pair of floats >= 0 or \"fixed\"\n The lower and upper bound on the parameter. If n_elements>1, a pair\n of 1d array with n_elements each may be given alternatively. If\n the string \"fixed\" is passed as bounds, the hyperparameter's value\n cannot be changed.\n\nn_elements : int, default=1\n The number of elements of the hyperparameter value. Defaults to 1,\n which corresponds to a scalar hyperparameter. n_elements > 1\n corresponds to a hyperparameter which is vector-valued,\n such as, e.g., anisotropic length-scales.\n\nfixed : bool, default=None\n Whether the value of this hyperparameter is fixed, i.e., cannot be\n changed during hyperparameter tuning. If None is passed, the \"fixed\" is\n derived based on the given bounds.\n\nExamples\n--------\n>>> from sklearn.gaussian_process.kernels import ConstantKernel\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import Hyperparameter\n>>> X, y = make_friedman2(n_samples=50, noise=0, random_state=0)\n>>> kernel = ConstantKernel(constant_value=1.0,\n... constant_value_bounds=(0.0, 10.0))\n\nWe can access each hyperparameter:\n\n>>> for hyperparameter in kernel.hyperparameters:\n... print(hyperparameter)\nHyperparameter(name='constant_value', value_type='numeric',\nbounds=array([[ 0., 10.]]), n_elements=1, fixed=False)\n\n>>> params = kernel.get_params()\n>>> for key in sorted(params): print(f\"{key} : {params[key]}\")\nconstant_value : 1.0\nconstant_value_bounds : (0.0, 10.0)", + "code": "class Hyperparameter(namedtuple('Hyperparameter',\n ('name', 'value_type', 'bounds',\n 'n_elements', 'fixed'))):\n \"\"\"A kernel hyperparameter's specification in form of a namedtuple.\n\n .. versionadded:: 0.18\n\n Attributes\n ----------\n name : str\n The name of the hyperparameter. Note that a kernel using a\n hyperparameter with name \"x\" must have the attributes self.x and\n self.x_bounds\n\n value_type : str\n The type of the hyperparameter. Currently, only \"numeric\"\n hyperparameters are supported.\n\n bounds : pair of floats >= 0 or \"fixed\"\n The lower and upper bound on the parameter. If n_elements>1, a pair\n of 1d array with n_elements each may be given alternatively. If\n the string \"fixed\" is passed as bounds, the hyperparameter's value\n cannot be changed.\n\n n_elements : int, default=1\n The number of elements of the hyperparameter value. Defaults to 1,\n which corresponds to a scalar hyperparameter. n_elements > 1\n corresponds to a hyperparameter which is vector-valued,\n such as, e.g., anisotropic length-scales.\n\n fixed : bool, default=None\n Whether the value of this hyperparameter is fixed, i.e., cannot be\n changed during hyperparameter tuning. If None is passed, the \"fixed\" is\n derived based on the given bounds.\n\n Examples\n --------\n >>> from sklearn.gaussian_process.kernels import ConstantKernel\n >>> from sklearn.datasets import make_friedman2\n >>> from sklearn.gaussian_process import GaussianProcessRegressor\n >>> from sklearn.gaussian_process.kernels import Hyperparameter\n >>> X, y = make_friedman2(n_samples=50, noise=0, random_state=0)\n >>> kernel = ConstantKernel(constant_value=1.0,\n ... constant_value_bounds=(0.0, 10.0))\n\n We can access each hyperparameter:\n\n >>> for hyperparameter in kernel.hyperparameters:\n ... print(hyperparameter)\n Hyperparameter(name='constant_value', value_type='numeric',\n bounds=array([[ 0., 10.]]), n_elements=1, fixed=False)\n\n >>> params = kernel.get_params()\n >>> for key in sorted(params): print(f\"{key} : {params[key]}\")\n constant_value : 1.0\n constant_value_bounds : (0.0, 10.0)\n \"\"\"\n\n # A raw namedtuple is very memory efficient as it packs the attributes\n # in a struct to get rid of the __dict__ of attributes in particular it\n # does not copy the string for the keys on each instance.\n # By deriving a namedtuple class just to introduce the __init__ method we\n # would also reintroduce the __dict__ on the instance. By telling the\n # Python interpreter that this subclass uses static __slots__ instead of\n # dynamic attributes. Furthermore we don't need any additional slot in the\n # subclass so we set __slots__ to the empty tuple.\n __slots__ = ()\n\n def __new__(cls, name, value_type, bounds, n_elements=1, fixed=None):\n if not isinstance(bounds, str) or bounds != \"fixed\":\n bounds = np.atleast_2d(bounds)\n if n_elements > 1: # vector-valued parameter\n if bounds.shape[0] == 1:\n bounds = np.repeat(bounds, n_elements, 0)\n elif bounds.shape[0] != n_elements:\n raise ValueError(\"Bounds on %s should have either 1 or \"\n \"%d dimensions. Given are %d\"\n % (name, n_elements, bounds.shape[0]))\n\n if fixed is None:\n fixed = isinstance(bounds, str) and bounds == \"fixed\"\n return super(Hyperparameter, cls).__new__(\n cls, name, value_type, bounds, n_elements, fixed)\n\n # This is mainly a testing utility to check that two hyperparameters\n # are equal.\n def __eq__(self, other):\n return (self.name == other.name and\n self.value_type == other.value_type and\n np.all(self.bounds == other.bounds) and\n self.n_elements == other.n_elements and\n self.fixed == other.fixed)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel", + "name": "Kernel", + "qname": "sklearn.gaussian_process.kernels.Kernel", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.gaussian_process.kernels/Kernel/get_params", + "scikit-learn/sklearn.gaussian_process.kernels/Kernel/set_params", + "scikit-learn/sklearn.gaussian_process.kernels/Kernel/clone_with_theta", + "scikit-learn/sklearn.gaussian_process.kernels/Kernel/n_dims@getter", + "scikit-learn/sklearn.gaussian_process.kernels/Kernel/hyperparameters@getter", + "scikit-learn/sklearn.gaussian_process.kernels/Kernel/theta@getter", + "scikit-learn/sklearn.gaussian_process.kernels/Kernel/theta@setter", + "scikit-learn/sklearn.gaussian_process.kernels/Kernel/bounds@getter", + "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__add__", + "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__radd__", + "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__mul__", + "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__rmul__", + "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__pow__", + "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__eq__", + "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__repr__", + "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__call__", + "scikit-learn/sklearn.gaussian_process.kernels/Kernel/diag", + "scikit-learn/sklearn.gaussian_process.kernels/Kernel/is_stationary", + "scikit-learn/sklearn.gaussian_process.kernels/Kernel/requires_vector_input@getter", + "scikit-learn/sklearn.gaussian_process.kernels/Kernel/_check_bounds_params" + ], + "is_public": true, + "reexported_by": [], + "description": "Base class for all kernels.\n\n.. versionadded:: 0.18", + "docstring": "Base class for all kernels.\n\n.. versionadded:: 0.18", + "code": "class Kernel(metaclass=ABCMeta):\n \"\"\"Base class for all kernels.\n\n .. versionadded:: 0.18\n \"\"\"\n\n def get_params(self, deep=True):\n \"\"\"Get parameters of this kernel.\n\n Parameters\n ----------\n deep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\n Returns\n -------\n params : dict\n Parameter names mapped to their values.\n \"\"\"\n params = dict()\n\n # introspect the constructor arguments to find the model parameters\n # to represent\n cls = self.__class__\n init = getattr(cls.__init__, 'deprecated_original', cls.__init__)\n init_sign = signature(init)\n args, varargs = [], []\n for parameter in init_sign.parameters.values():\n if (parameter.kind != parameter.VAR_KEYWORD and\n parameter.name != 'self'):\n args.append(parameter.name)\n if parameter.kind == parameter.VAR_POSITIONAL:\n varargs.append(parameter.name)\n\n if len(varargs) != 0:\n raise RuntimeError(\"scikit-learn kernels should always \"\n \"specify their parameters in the signature\"\n \" of their __init__ (no varargs).\"\n \" %s doesn't follow this convention.\"\n % (cls, ))\n for arg in args:\n params[arg] = getattr(self, arg)\n\n return params\n\n def set_params(self, **params):\n \"\"\"Set the parameters of this kernel.\n\n The method works on simple kernels as well as on nested kernels.\n The latter have parameters of the form ``__``\n so that it's possible to update each component of a nested object.\n\n Returns\n -------\n self\n \"\"\"\n if not params:\n # Simple optimisation to gain speed (inspect is slow)\n return self\n valid_params = self.get_params(deep=True)\n for key, value in params.items():\n split = key.split('__', 1)\n if len(split) > 1:\n # nested objects case\n name, sub_name = split\n if name not in valid_params:\n raise ValueError('Invalid parameter %s for kernel %s. '\n 'Check the list of available parameters '\n 'with `kernel.get_params().keys()`.' %\n (name, self))\n sub_object = valid_params[name]\n sub_object.set_params(**{sub_name: value})\n else:\n # simple objects case\n if key not in valid_params:\n raise ValueError('Invalid parameter %s for kernel %s. '\n 'Check the list of available parameters '\n 'with `kernel.get_params().keys()`.' %\n (key, self.__class__.__name__))\n setattr(self, key, value)\n return self\n\n def clone_with_theta(self, theta):\n \"\"\"Returns a clone of self with given hyperparameters theta.\n\n Parameters\n ----------\n theta : ndarray of shape (n_dims,)\n The hyperparameters\n \"\"\"\n cloned = clone(self)\n cloned.theta = theta\n return cloned\n\n @property\n def n_dims(self):\n \"\"\"Returns the number of non-fixed hyperparameters of the kernel.\"\"\"\n return self.theta.shape[0]\n\n @property\n def hyperparameters(self):\n \"\"\"Returns a list of all hyperparameter specifications.\"\"\"\n r = [getattr(self, attr) for attr in dir(self)\n if attr.startswith(\"hyperparameter_\")]\n return r\n\n @property\n def theta(self):\n \"\"\"Returns the (flattened, log-transformed) non-fixed hyperparameters.\n\n Note that theta are typically the log-transformed values of the\n kernel's hyperparameters as this representation of the search space\n is more amenable for hyperparameter search, as hyperparameters like\n length-scales naturally live on a log-scale.\n\n Returns\n -------\n theta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel\n \"\"\"\n theta = []\n params = self.get_params()\n for hyperparameter in self.hyperparameters:\n if not hyperparameter.fixed:\n theta.append(params[hyperparameter.name])\n if len(theta) > 0:\n return np.log(np.hstack(theta))\n else:\n return np.array([])\n\n @theta.setter\n def theta(self, theta):\n \"\"\"Sets the (flattened, log-transformed) non-fixed hyperparameters.\n\n Parameters\n ----------\n theta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel\n \"\"\"\n params = self.get_params()\n i = 0\n for hyperparameter in self.hyperparameters:\n if hyperparameter.fixed:\n continue\n if hyperparameter.n_elements > 1:\n # vector-valued parameter\n params[hyperparameter.name] = np.exp(\n theta[i:i + hyperparameter.n_elements])\n i += hyperparameter.n_elements\n else:\n params[hyperparameter.name] = np.exp(theta[i])\n i += 1\n\n if i != len(theta):\n raise ValueError(\"theta has not the correct number of entries.\"\n \" Should be %d; given are %d\"\n % (i, len(theta)))\n self.set_params(**params)\n\n @property\n def bounds(self):\n \"\"\"Returns the log-transformed bounds on the theta.\n\n Returns\n -------\n bounds : ndarray of shape (n_dims, 2)\n The log-transformed bounds on the kernel's hyperparameters theta\n \"\"\"\n bounds = [hyperparameter.bounds\n for hyperparameter in self.hyperparameters\n if not hyperparameter.fixed]\n if len(bounds) > 0:\n return np.log(np.vstack(bounds))\n else:\n return np.array([])\n\n def __add__(self, b):\n if not isinstance(b, Kernel):\n return Sum(self, ConstantKernel(b))\n return Sum(self, b)\n\n def __radd__(self, b):\n if not isinstance(b, Kernel):\n return Sum(ConstantKernel(b), self)\n return Sum(b, self)\n\n def __mul__(self, b):\n if not isinstance(b, Kernel):\n return Product(self, ConstantKernel(b))\n return Product(self, b)\n\n def __rmul__(self, b):\n if not isinstance(b, Kernel):\n return Product(ConstantKernel(b), self)\n return Product(b, self)\n\n def __pow__(self, b):\n return Exponentiation(self, b)\n\n def __eq__(self, b):\n if type(self) != type(b):\n return False\n params_a = self.get_params()\n params_b = b.get_params()\n for key in set(list(params_a.keys()) + list(params_b.keys())):\n if np.any(params_a.get(key, None) != params_b.get(key, None)):\n return False\n return True\n\n def __repr__(self):\n return \"{0}({1})\".format(self.__class__.__name__,\n \", \".join(map(\"{0:.3g}\".format, self.theta)))\n\n @abstractmethod\n def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Evaluate the kernel.\"\"\"\n\n @abstractmethod\n def diag(self, X):\n \"\"\"Returns the diagonal of the kernel k(X, X).\n\n The result of this method is identical to np.diag(self(X)); however,\n it can be evaluated more efficiently since only the diagonal is\n evaluated.\n\n Parameters\n ----------\n X : array-like of shape (n_samples,)\n Left argument of the returned kernel k(X, Y)\n\n Returns\n -------\n K_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)\n \"\"\"\n\n @abstractmethod\n def is_stationary(self):\n \"\"\"Returns whether the kernel is stationary. \"\"\"\n\n @property\n def requires_vector_input(self):\n \"\"\"Returns whether the kernel is defined on fixed-length feature\n vectors or generic objects. Defaults to True for backward\n compatibility.\"\"\"\n return True\n\n def _check_bounds_params(self):\n \"\"\"Called after fitting to warn if bounds may have been too tight.\"\"\"\n list_close = np.isclose(self.bounds,\n np.atleast_2d(self.theta).T)\n idx = 0\n for hyp in self.hyperparameters:\n if hyp.fixed:\n continue\n for dim in range(hyp.n_elements):\n if list_close[idx, 0]:\n warnings.warn(\"The optimal value found for \"\n \"dimension %s of parameter %s is \"\n \"close to the specified lower \"\n \"bound %s. Decreasing the bound and\"\n \" calling fit again may find a \"\n \"better value.\" %\n (dim, hyp.name, hyp.bounds[dim][0]),\n ConvergenceWarning)\n elif list_close[idx, 1]:\n warnings.warn(\"The optimal value found for \"\n \"dimension %s of parameter %s is \"\n \"close to the specified upper \"\n \"bound %s. Increasing the bound and\"\n \" calling fit again may find a \"\n \"better value.\" %\n (dim, hyp.name, hyp.bounds[dim][1]),\n ConvergenceWarning)\n idx += 1", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator", + "name": "KernelOperator", + "qname": "sklearn.gaussian_process.kernels.KernelOperator", + "decorators": [], + "superclasses": ["Kernel"], + "methods": [ + "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/__init__", + "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/get_params", + "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/hyperparameters@getter", + "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/theta@getter", + "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/theta@setter", + "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/bounds@getter", + "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/__eq__", + "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/is_stationary", + "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/requires_vector_input@getter" + ], + "is_public": true, + "reexported_by": [], + "description": "Base class for all kernel operators.\n\n.. versionadded:: 0.18", + "docstring": "Base class for all kernel operators.\n\n.. versionadded:: 0.18", + "code": "class KernelOperator(Kernel):\n \"\"\"Base class for all kernel operators.\n\n .. versionadded:: 0.18\n \"\"\"\n\n def __init__(self, k1, k2):\n self.k1 = k1\n self.k2 = k2\n\n def get_params(self, deep=True):\n \"\"\"Get parameters of this kernel.\n\n Parameters\n ----------\n deep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\n Returns\n -------\n params : dict\n Parameter names mapped to their values.\n \"\"\"\n params = dict(k1=self.k1, k2=self.k2)\n if deep:\n deep_items = self.k1.get_params().items()\n params.update(('k1__' + k, val) for k, val in deep_items)\n deep_items = self.k2.get_params().items()\n params.update(('k2__' + k, val) for k, val in deep_items)\n\n return params\n\n @property\n def hyperparameters(self):\n \"\"\"Returns a list of all hyperparameter.\"\"\"\n r = [Hyperparameter(\"k1__\" + hyperparameter.name,\n hyperparameter.value_type,\n hyperparameter.bounds, hyperparameter.n_elements)\n for hyperparameter in self.k1.hyperparameters]\n\n for hyperparameter in self.k2.hyperparameters:\n r.append(Hyperparameter(\"k2__\" + hyperparameter.name,\n hyperparameter.value_type,\n hyperparameter.bounds,\n hyperparameter.n_elements))\n return r\n\n @property\n def theta(self):\n \"\"\"Returns the (flattened, log-transformed) non-fixed hyperparameters.\n\n Note that theta are typically the log-transformed values of the\n kernel's hyperparameters as this representation of the search space\n is more amenable for hyperparameter search, as hyperparameters like\n length-scales naturally live on a log-scale.\n\n Returns\n -------\n theta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel\n \"\"\"\n return np.append(self.k1.theta, self.k2.theta)\n\n @theta.setter\n def theta(self, theta):\n \"\"\"Sets the (flattened, log-transformed) non-fixed hyperparameters.\n\n Parameters\n ----------\n theta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel\n \"\"\"\n k1_dims = self.k1.n_dims\n self.k1.theta = theta[:k1_dims]\n self.k2.theta = theta[k1_dims:]\n\n @property\n def bounds(self):\n \"\"\"Returns the log-transformed bounds on the theta.\n\n Returns\n -------\n bounds : ndarray of shape (n_dims, 2)\n The log-transformed bounds on the kernel's hyperparameters theta\n \"\"\"\n if self.k1.bounds.size == 0:\n return self.k2.bounds\n if self.k2.bounds.size == 0:\n return self.k1.bounds\n return np.vstack((self.k1.bounds, self.k2.bounds))\n\n def __eq__(self, b):\n if type(self) != type(b):\n return False\n return (self.k1 == b.k1 and self.k2 == b.k2) \\\n or (self.k1 == b.k2 and self.k2 == b.k1)\n\n def is_stationary(self):\n \"\"\"Returns whether the kernel is stationary. \"\"\"\n return self.k1.is_stationary() and self.k2.is_stationary()\n\n @property\n def requires_vector_input(self):\n \"\"\"Returns whether the kernel is stationary. \"\"\"\n return (self.k1.requires_vector_input or\n self.k2.requires_vector_input)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Matern", + "name": "Matern", + "qname": "sklearn.gaussian_process.kernels.Matern", + "decorators": [], + "superclasses": ["RBF"], + "methods": [ + "scikit-learn/sklearn.gaussian_process.kernels/Matern/__init__", + "scikit-learn/sklearn.gaussian_process.kernels/Matern/__call__", + "scikit-learn/sklearn.gaussian_process.kernels/Matern/__repr__" + ], + "is_public": true, + "reexported_by": [], + "description": "Matern kernel.\n\nThe class of Matern kernels is a generalization of the :class:`RBF`.\nIt has an additional parameter :math:`\\nu` which controls the\nsmoothness of the resulting function. The smaller :math:`\\nu`,\nthe less smooth the approximated function is.\nAs :math:`\\nu\\rightarrow\\infty`, the kernel becomes equivalent to\nthe :class:`RBF` kernel. When :math:`\\nu = 1/2`, the Mat\u00e9rn kernel\nbecomes identical to the absolute exponential kernel.\nImportant intermediate values are\n:math:`\\nu=1.5` (once differentiable functions)\nand :math:`\\nu=2.5` (twice differentiable functions).\n\nThe kernel is given by:\n\n.. math::\n k(x_i, x_j) = \\frac{1}{\\Gamma(\\nu)2^{\\nu-1}}\\Bigg(\n \\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\n \\Bigg)^\\nu K_\\nu\\Bigg(\n \\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\\Bigg)\n\nwhere :math:`d(\\cdot,\\cdot)` is the Euclidean distance,\n:math:`K_{\\nu}(\\cdot)` is a modified Bessel function and\n:math:`\\Gamma(\\cdot)` is the gamma function.\nSee [1]_, Chapter 4, Section 4.2, for details regarding the different\nvariants of the Matern kernel.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "Matern kernel.\n\nThe class of Matern kernels is a generalization of the :class:`RBF`.\nIt has an additional parameter :math:`\\nu` which controls the\nsmoothness of the resulting function. The smaller :math:`\\nu`,\nthe less smooth the approximated function is.\nAs :math:`\\nu\\rightarrow\\infty`, the kernel becomes equivalent to\nthe :class:`RBF` kernel. When :math:`\\nu = 1/2`, the Mat\u00e9rn kernel\nbecomes identical to the absolute exponential kernel.\nImportant intermediate values are\n:math:`\\nu=1.5` (once differentiable functions)\nand :math:`\\nu=2.5` (twice differentiable functions).\n\nThe kernel is given by:\n\n.. math::\n k(x_i, x_j) = \\frac{1}{\\Gamma(\\nu)2^{\\nu-1}}\\Bigg(\n \\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\n \\Bigg)^\\nu K_\\nu\\Bigg(\n \\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\\Bigg)\n\n\n\nwhere :math:`d(\\cdot,\\cdot)` is the Euclidean distance,\n:math:`K_{\\nu}(\\cdot)` is a modified Bessel function and\n:math:`\\Gamma(\\cdot)` is the gamma function.\nSee [1]_, Chapter 4, Section 4.2, for details regarding the different\nvariants of the Matern kernel.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nlength_scale : float or ndarray of shape (n_features,), default=1.0\n The length scale of the kernel. If a float, an isotropic kernel is\n used. If an array, an anisotropic kernel is used where each dimension\n of l defines the length-scale of the respective feature dimension.\n\nlength_scale_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'length_scale'.\n If set to \"fixed\", 'length_scale' cannot be changed during\n hyperparameter tuning.\n\nnu : float, default=1.5\n The parameter nu controlling the smoothness of the learned function.\n The smaller nu, the less smooth the approximated function is.\n For nu=inf, the kernel becomes equivalent to the RBF kernel and for\n nu=0.5 to the absolute exponential kernel. Important intermediate\n values are nu=1.5 (once differentiable functions) and nu=2.5\n (twice differentiable functions). Note that values of nu not in\n [0.5, 1.5, 2.5, inf] incur a considerably higher computational cost\n (appr. 10 times higher) since they require to evaluate the modified\n Bessel function. Furthermore, in contrast to l, nu is kept fixed to\n its initial value and not optimized.\n\nReferences\n----------\n.. [1] `Carl Edward Rasmussen, Christopher K. I. Williams (2006).\n \"Gaussian Processes for Machine Learning\". The MIT Press.\n `_\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.gaussian_process import GaussianProcessClassifier\n>>> from sklearn.gaussian_process.kernels import Matern\n>>> X, y = load_iris(return_X_y=True)\n>>> kernel = 1.0 * Matern(length_scale=1.0, nu=1.5)\n>>> gpc = GaussianProcessClassifier(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpc.score(X, y)\n0.9866...\n>>> gpc.predict_proba(X[:2,:])\narray([[0.8513..., 0.0368..., 0.1117...],\n [0.8086..., 0.0693..., 0.1220...]])", + "code": "class Matern(RBF):\n \"\"\" Matern kernel.\n\n The class of Matern kernels is a generalization of the :class:`RBF`.\n It has an additional parameter :math:`\\\\nu` which controls the\n smoothness of the resulting function. The smaller :math:`\\\\nu`,\n the less smooth the approximated function is.\n As :math:`\\\\nu\\\\rightarrow\\\\infty`, the kernel becomes equivalent to\n the :class:`RBF` kernel. When :math:`\\\\nu = 1/2`, the Mat\u00e9rn kernel\n becomes identical to the absolute exponential kernel.\n Important intermediate values are\n :math:`\\\\nu=1.5` (once differentiable functions)\n and :math:`\\\\nu=2.5` (twice differentiable functions).\n\n The kernel is given by:\n\n .. math::\n k(x_i, x_j) = \\\\frac{1}{\\\\Gamma(\\\\nu)2^{\\\\nu-1}}\\\\Bigg(\n \\\\frac{\\\\sqrt{2\\\\nu}}{l} d(x_i , x_j )\n \\\\Bigg)^\\\\nu K_\\\\nu\\\\Bigg(\n \\\\frac{\\\\sqrt{2\\\\nu}}{l} d(x_i , x_j )\\\\Bigg)\n\n\n\n where :math:`d(\\\\cdot,\\\\cdot)` is the Euclidean distance,\n :math:`K_{\\\\nu}(\\\\cdot)` is a modified Bessel function and\n :math:`\\\\Gamma(\\\\cdot)` is the gamma function.\n See [1]_, Chapter 4, Section 4.2, for details regarding the different\n variants of the Matern kernel.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n length_scale : float or ndarray of shape (n_features,), default=1.0\n The length scale of the kernel. If a float, an isotropic kernel is\n used. If an array, an anisotropic kernel is used where each dimension\n of l defines the length-scale of the respective feature dimension.\n\n length_scale_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'length_scale'.\n If set to \"fixed\", 'length_scale' cannot be changed during\n hyperparameter tuning.\n\n nu : float, default=1.5\n The parameter nu controlling the smoothness of the learned function.\n The smaller nu, the less smooth the approximated function is.\n For nu=inf, the kernel becomes equivalent to the RBF kernel and for\n nu=0.5 to the absolute exponential kernel. Important intermediate\n values are nu=1.5 (once differentiable functions) and nu=2.5\n (twice differentiable functions). Note that values of nu not in\n [0.5, 1.5, 2.5, inf] incur a considerably higher computational cost\n (appr. 10 times higher) since they require to evaluate the modified\n Bessel function. Furthermore, in contrast to l, nu is kept fixed to\n its initial value and not optimized.\n\n References\n ----------\n .. [1] `Carl Edward Rasmussen, Christopher K. I. Williams (2006).\n \"Gaussian Processes for Machine Learning\". The MIT Press.\n `_\n\n Examples\n --------\n >>> from sklearn.datasets import load_iris\n >>> from sklearn.gaussian_process import GaussianProcessClassifier\n >>> from sklearn.gaussian_process.kernels import Matern\n >>> X, y = load_iris(return_X_y=True)\n >>> kernel = 1.0 * Matern(length_scale=1.0, nu=1.5)\n >>> gpc = GaussianProcessClassifier(kernel=kernel,\n ... random_state=0).fit(X, y)\n >>> gpc.score(X, y)\n 0.9866...\n >>> gpc.predict_proba(X[:2,:])\n array([[0.8513..., 0.0368..., 0.1117...],\n [0.8086..., 0.0693..., 0.1220...]])\n \"\"\"\n def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5),\n nu=1.5):\n super().__init__(length_scale, length_scale_bounds)\n self.nu = nu\n\n def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\n Y : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\n eval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\n K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \\\n optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.\n \"\"\"\n X = np.atleast_2d(X)\n length_scale = _check_length_scale(X, self.length_scale)\n if Y is None:\n dists = pdist(X / length_scale, metric='euclidean')\n else:\n if eval_gradient:\n raise ValueError(\n \"Gradient can only be evaluated when Y is None.\")\n dists = cdist(X / length_scale, Y / length_scale,\n metric='euclidean')\n\n if self.nu == 0.5:\n K = np.exp(-dists)\n elif self.nu == 1.5:\n K = dists * math.sqrt(3)\n K = (1. + K) * np.exp(-K)\n elif self.nu == 2.5:\n K = dists * math.sqrt(5)\n K = (1. + K + K ** 2 / 3.0) * np.exp(-K)\n elif self.nu == np.inf:\n K = np.exp(-dists ** 2 / 2.0)\n else: # general case; expensive to evaluate\n K = dists\n K[K == 0.0] += np.finfo(float).eps # strict zeros result in nan\n tmp = (math.sqrt(2 * self.nu) * K)\n K.fill((2 ** (1. - self.nu)) / gamma(self.nu))\n K *= tmp ** self.nu\n K *= kv(self.nu, tmp)\n\n if Y is None:\n # convert from upper-triangular matrix to square matrix\n K = squareform(K)\n np.fill_diagonal(K, 1)\n\n if eval_gradient:\n if self.hyperparameter_length_scale.fixed:\n # Hyperparameter l kept fixed\n K_gradient = np.empty((X.shape[0], X.shape[0], 0))\n return K, K_gradient\n\n # We need to recompute the pairwise dimension-wise distances\n if self.anisotropic:\n D = (X[:, np.newaxis, :] - X[np.newaxis, :, :])**2 \\\n / (length_scale ** 2)\n else:\n D = squareform(dists**2)[:, :, np.newaxis]\n\n if self.nu == 0.5:\n K_gradient = K[..., np.newaxis] * D \\\n / np.sqrt(D.sum(2))[:, :, np.newaxis]\n K_gradient[~np.isfinite(K_gradient)] = 0\n elif self.nu == 1.5:\n K_gradient = \\\n 3 * D * np.exp(-np.sqrt(3 * D.sum(-1)))[..., np.newaxis]\n elif self.nu == 2.5:\n tmp = np.sqrt(5 * D.sum(-1))[..., np.newaxis]\n K_gradient = 5.0 / 3.0 * D * (tmp + 1) * np.exp(-tmp)\n elif self.nu == np.inf:\n K_gradient = D * K[..., np.newaxis]\n else:\n # approximate gradient numerically\n def f(theta): # helper function\n return self.clone_with_theta(theta)(X, Y)\n return K, _approx_fprime(self.theta, f, 1e-10)\n\n if not self.anisotropic:\n return K, K_gradient[:, :].sum(-1)[:, :, np.newaxis]\n else:\n return K, K_gradient\n else:\n return K\n\n def __repr__(self):\n if self.anisotropic:\n return \"{0}(length_scale=[{1}], nu={2:.3g})\".format(\n self.__class__.__name__,\n \", \".join(map(\"{0:.3g}\".format, self.length_scale)),\n self.nu)\n else:\n return \"{0}(length_scale={1:.3g}, nu={2:.3g})\".format(\n self.__class__.__name__, np.ravel(self.length_scale)[0],\n self.nu)", + "instance_attributes": [ + { + "name": "nu", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/NormalizedKernelMixin", + "name": "NormalizedKernelMixin", + "qname": "sklearn.gaussian_process.kernels.NormalizedKernelMixin", + "decorators": [], + "superclasses": [], + "methods": ["scikit-learn/sklearn.gaussian_process.kernels/NormalizedKernelMixin/diag"], + "is_public": true, + "reexported_by": [], + "description": "Mixin for kernels which are normalized: k(X, X)=1.\n\n.. versionadded:: 0.18", + "docstring": "Mixin for kernels which are normalized: k(X, X)=1.\n\n.. versionadded:: 0.18", + "code": "class NormalizedKernelMixin:\n \"\"\"Mixin for kernels which are normalized: k(X, X)=1.\n\n .. versionadded:: 0.18\n \"\"\"\n\n def diag(self, X):\n \"\"\"Returns the diagonal of the kernel k(X, X).\n\n The result of this method is identical to np.diag(self(X)); however,\n it can be evaluated more efficiently since only the diagonal is\n evaluated.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\n Returns\n -------\n K_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)\n \"\"\"\n return np.ones(X.shape[0])", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel", + "name": "PairwiseKernel", + "qname": "sklearn.gaussian_process.kernels.PairwiseKernel", + "decorators": [], + "superclasses": ["Kernel"], + "methods": [ + "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/__init__", + "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/hyperparameter_gamma@getter", + "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/__call__", + "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/diag", + "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/is_stationary", + "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/__repr__" + ], + "is_public": true, + "reexported_by": [], + "description": "Wrapper for kernels in sklearn.metrics.pairwise.\n\nA thin wrapper around the functionality of the kernels in\nsklearn.metrics.pairwise.\n\nNote: Evaluation of eval_gradient is not analytic but numeric and all\n kernels support only isotropic distances. The parameter gamma is\n considered to be a hyperparameter and may be optimized. The other\n kernel parameters are set directly at initialization and are kept\n fixed.\n\n.. versionadded:: 0.18", + "docstring": "Wrapper for kernels in sklearn.metrics.pairwise.\n\nA thin wrapper around the functionality of the kernels in\nsklearn.metrics.pairwise.\n\nNote: Evaluation of eval_gradient is not analytic but numeric and all\n kernels support only isotropic distances. The parameter gamma is\n considered to be a hyperparameter and may be optimized. The other\n kernel parameters are set directly at initialization and are kept\n fixed.\n\n.. versionadded:: 0.18\n\nParameters\n----------\ngamma : float, default=1.0\n Parameter gamma of the pairwise kernel specified by metric. It should\n be positive.\n\ngamma_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'gamma'.\n If set to \"fixed\", 'gamma' cannot be changed during\n hyperparameter tuning.\n\nmetric : {\"linear\", \"additive_chi2\", \"chi2\", \"poly\", \"polynomial\", \"rbf\", \"laplacian\", \"sigmoid\", \"cosine\"} or callable, default=\"linear\"\n The metric to use when calculating kernel between instances in a\n feature array. If metric is a string, it must be one of the metrics\n in pairwise.PAIRWISE_KERNEL_FUNCTIONS.\n If metric is \"precomputed\", X is assumed to be a kernel matrix.\n Alternatively, if metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays from X as input and return a value indicating\n the distance between them.\n\npairwise_kernels_kwargs : dict, default=None\n All entries of this dict (if any) are passed as keyword arguments to\n the pairwise kernel function.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.gaussian_process import GaussianProcessClassifier\n>>> from sklearn.gaussian_process.kernels import PairwiseKernel\n>>> X, y = load_iris(return_X_y=True)\n>>> kernel = PairwiseKernel(metric='rbf')\n>>> gpc = GaussianProcessClassifier(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpc.score(X, y)\n0.9733...\n>>> gpc.predict_proba(X[:2,:])\narray([[0.8880..., 0.05663..., 0.05532...],\n [0.8676..., 0.07073..., 0.06165...]])", + "code": "class PairwiseKernel(Kernel):\n \"\"\"Wrapper for kernels in sklearn.metrics.pairwise.\n\n A thin wrapper around the functionality of the kernels in\n sklearn.metrics.pairwise.\n\n Note: Evaluation of eval_gradient is not analytic but numeric and all\n kernels support only isotropic distances. The parameter gamma is\n considered to be a hyperparameter and may be optimized. The other\n kernel parameters are set directly at initialization and are kept\n fixed.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n gamma : float, default=1.0\n Parameter gamma of the pairwise kernel specified by metric. It should\n be positive.\n\n gamma_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'gamma'.\n If set to \"fixed\", 'gamma' cannot be changed during\n hyperparameter tuning.\n\n metric : {\"linear\", \"additive_chi2\", \"chi2\", \"poly\", \"polynomial\", \\\n \"rbf\", \"laplacian\", \"sigmoid\", \"cosine\"} or callable, \\\n default=\"linear\"\n The metric to use when calculating kernel between instances in a\n feature array. If metric is a string, it must be one of the metrics\n in pairwise.PAIRWISE_KERNEL_FUNCTIONS.\n If metric is \"precomputed\", X is assumed to be a kernel matrix.\n Alternatively, if metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays from X as input and return a value indicating\n the distance between them.\n\n pairwise_kernels_kwargs : dict, default=None\n All entries of this dict (if any) are passed as keyword arguments to\n the pairwise kernel function.\n\n Examples\n --------\n >>> from sklearn.datasets import load_iris\n >>> from sklearn.gaussian_process import GaussianProcessClassifier\n >>> from sklearn.gaussian_process.kernels import PairwiseKernel\n >>> X, y = load_iris(return_X_y=True)\n >>> kernel = PairwiseKernel(metric='rbf')\n >>> gpc = GaussianProcessClassifier(kernel=kernel,\n ... random_state=0).fit(X, y)\n >>> gpc.score(X, y)\n 0.9733...\n >>> gpc.predict_proba(X[:2,:])\n array([[0.8880..., 0.05663..., 0.05532...],\n [0.8676..., 0.07073..., 0.06165...]])\n \"\"\"\n def __init__(self, gamma=1.0, gamma_bounds=(1e-5, 1e5), metric=\"linear\",\n pairwise_kernels_kwargs=None):\n self.gamma = gamma\n self.gamma_bounds = gamma_bounds\n self.metric = metric\n self.pairwise_kernels_kwargs = pairwise_kernels_kwargs\n\n @property\n def hyperparameter_gamma(self):\n return Hyperparameter(\"gamma\", \"numeric\", self.gamma_bounds)\n\n def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\n Y : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\n eval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\n K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\\\n optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.\n \"\"\"\n pairwise_kernels_kwargs = self.pairwise_kernels_kwargs\n if self.pairwise_kernels_kwargs is None:\n pairwise_kernels_kwargs = {}\n\n X = np.atleast_2d(X)\n K = pairwise_kernels(X, Y, metric=self.metric, gamma=self.gamma,\n filter_params=True,\n **pairwise_kernels_kwargs)\n if eval_gradient:\n if self.hyperparameter_gamma.fixed:\n return K, np.empty((X.shape[0], X.shape[0], 0))\n else:\n # approximate gradient numerically\n def f(gamma): # helper function\n return pairwise_kernels(\n X, Y, metric=self.metric, gamma=np.exp(gamma),\n filter_params=True, **pairwise_kernels_kwargs)\n return K, _approx_fprime(self.theta, f, 1e-10)\n else:\n return K\n\n def diag(self, X):\n \"\"\"Returns the diagonal of the kernel k(X, X).\n\n The result of this method is identical to np.diag(self(X)); however,\n it can be evaluated more efficiently since only the diagonal is\n evaluated.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\n Returns\n -------\n K_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)\n \"\"\"\n # We have to fall back to slow way of computing diagonal\n return np.apply_along_axis(self, 1, X).ravel()\n\n def is_stationary(self):\n \"\"\"Returns whether the kernel is stationary. \"\"\"\n return self.metric in [\"rbf\"]\n\n def __repr__(self):\n return \"{0}(gamma={1}, metric={2})\".format(\n self.__class__.__name__, self.gamma, self.metric)", + "instance_attributes": [ + { + "name": "gamma", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "gamma_bounds", + "types": { + "kind": "NamedType", + "name": "tuple" + } + }, + { + "name": "metric", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Product", + "name": "Product", + "qname": "sklearn.gaussian_process.kernels.Product", + "decorators": [], + "superclasses": ["KernelOperator"], + "methods": [ + "scikit-learn/sklearn.gaussian_process.kernels/Product/__call__", + "scikit-learn/sklearn.gaussian_process.kernels/Product/diag", + "scikit-learn/sklearn.gaussian_process.kernels/Product/__repr__" + ], + "is_public": true, + "reexported_by": [], + "description": "The `Product` kernel takes two kernels :math:`k_1` and :math:`k_2`\nand combines them via\n\n.. math::\n k_{prod}(X, Y) = k_1(X, Y) * k_2(X, Y)\n\nNote that the `__mul__` magic method is overridden, so\n`Product(RBF(), RBF())` is equivalent to using the * operator\nwith `RBF() * RBF()`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "The `Product` kernel takes two kernels :math:`k_1` and :math:`k_2`\nand combines them via\n\n.. math::\n k_{prod}(X, Y) = k_1(X, Y) * k_2(X, Y)\n\nNote that the `__mul__` magic method is overridden, so\n`Product(RBF(), RBF())` is equivalent to using the * operator\nwith `RBF() * RBF()`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nk1 : Kernel\n The first base-kernel of the product-kernel\n\nk2 : Kernel\n The second base-kernel of the product-kernel\n\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import (RBF, Product,\n... ConstantKernel)\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = Product(ConstantKernel(2), RBF())\n>>> gpr = GaussianProcessRegressor(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n1.0\n>>> kernel\n1.41**2 * RBF(length_scale=1)", + "code": "class Product(KernelOperator):\n \"\"\"The `Product` kernel takes two kernels :math:`k_1` and :math:`k_2`\n and combines them via\n\n .. math::\n k_{prod}(X, Y) = k_1(X, Y) * k_2(X, Y)\n\n Note that the `__mul__` magic method is overridden, so\n `Product(RBF(), RBF())` is equivalent to using the * operator\n with `RBF() * RBF()`.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n k1 : Kernel\n The first base-kernel of the product-kernel\n\n k2 : Kernel\n The second base-kernel of the product-kernel\n\n\n Examples\n --------\n >>> from sklearn.datasets import make_friedman2\n >>> from sklearn.gaussian_process import GaussianProcessRegressor\n >>> from sklearn.gaussian_process.kernels import (RBF, Product,\n ... ConstantKernel)\n >>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n >>> kernel = Product(ConstantKernel(2), RBF())\n >>> gpr = GaussianProcessRegressor(kernel=kernel,\n ... random_state=0).fit(X, y)\n >>> gpr.score(X, y)\n 1.0\n >>> kernel\n 1.41**2 * RBF(length_scale=1)\n \"\"\"\n\n def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\n Y : array-like of shape (n_samples_Y, n_features) or list of object,\\\n default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\n eval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\n K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \\\n optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.\n \"\"\"\n if eval_gradient:\n K1, K1_gradient = self.k1(X, Y, eval_gradient=True)\n K2, K2_gradient = self.k2(X, Y, eval_gradient=True)\n return K1 * K2, np.dstack((K1_gradient * K2[:, :, np.newaxis],\n K2_gradient * K1[:, :, np.newaxis]))\n else:\n return self.k1(X, Y) * self.k2(X, Y)\n\n def diag(self, X):\n \"\"\"Returns the diagonal of the kernel k(X, X).\n\n The result of this method is identical to np.diag(self(X)); however,\n it can be evaluated more efficiently since only the diagonal is\n evaluated.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\n Returns\n -------\n K_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)\n \"\"\"\n return self.k1.diag(X) * self.k2.diag(X)\n\n def __repr__(self):\n return \"{0} * {1}\".format(self.k1, self.k2)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RBF", + "name": "RBF", + "qname": "sklearn.gaussian_process.kernels.RBF", + "decorators": [], + "superclasses": ["StationaryKernelMixin", "NormalizedKernelMixin", "Kernel"], + "methods": [ + "scikit-learn/sklearn.gaussian_process.kernels/RBF/__init__", + "scikit-learn/sklearn.gaussian_process.kernels/RBF/anisotropic@getter", + "scikit-learn/sklearn.gaussian_process.kernels/RBF/hyperparameter_length_scale@getter", + "scikit-learn/sklearn.gaussian_process.kernels/RBF/__call__", + "scikit-learn/sklearn.gaussian_process.kernels/RBF/__repr__" + ], + "is_public": true, + "reexported_by": [], + "description": "Radial-basis function kernel (aka squared-exponential kernel).\n\nThe RBF kernel is a stationary kernel. It is also known as the\n\"squared exponential\" kernel. It is parameterized by a length scale\nparameter :math:`l>0`, which can either be a scalar (isotropic variant\nof the kernel) or a vector with the same number of dimensions as the inputs\nX (anisotropic variant of the kernel). The kernel is given by:\n\n.. math::\n k(x_i, x_j) = \\exp\\left(- \\frac{d(x_i, x_j)^2}{2l^2} \\right)\n\nwhere :math:`l` is the length scale of the kernel and\n:math:`d(\\cdot,\\cdot)` is the Euclidean distance.\nFor advice on how to set the length scale parameter, see e.g. [1]_.\n\nThis kernel is infinitely differentiable, which implies that GPs with this\nkernel as covariance function have mean square derivatives of all orders,\nand are thus very smooth.\nSee [2]_, Chapter 4, Section 4.2, for further details of the RBF kernel.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "Radial-basis function kernel (aka squared-exponential kernel).\n\nThe RBF kernel is a stationary kernel. It is also known as the\n\"squared exponential\" kernel. It is parameterized by a length scale\nparameter :math:`l>0`, which can either be a scalar (isotropic variant\nof the kernel) or a vector with the same number of dimensions as the inputs\nX (anisotropic variant of the kernel). The kernel is given by:\n\n.. math::\n k(x_i, x_j) = \\exp\\left(- \\frac{d(x_i, x_j)^2}{2l^2} \\right)\n\nwhere :math:`l` is the length scale of the kernel and\n:math:`d(\\cdot,\\cdot)` is the Euclidean distance.\nFor advice on how to set the length scale parameter, see e.g. [1]_.\n\nThis kernel is infinitely differentiable, which implies that GPs with this\nkernel as covariance function have mean square derivatives of all orders,\nand are thus very smooth.\nSee [2]_, Chapter 4, Section 4.2, for further details of the RBF kernel.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nlength_scale : float or ndarray of shape (n_features,), default=1.0\n The length scale of the kernel. If a float, an isotropic kernel is\n used. If an array, an anisotropic kernel is used where each dimension\n of l defines the length-scale of the respective feature dimension.\n\nlength_scale_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'length_scale'.\n If set to \"fixed\", 'length_scale' cannot be changed during\n hyperparameter tuning.\n\nReferences\n----------\n.. [1] `David Duvenaud (2014). \"The Kernel Cookbook:\n Advice on Covariance functions\".\n `_\n\n.. [2] `Carl Edward Rasmussen, Christopher K. I. Williams (2006).\n \"Gaussian Processes for Machine Learning\". The MIT Press.\n `_\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.gaussian_process import GaussianProcessClassifier\n>>> from sklearn.gaussian_process.kernels import RBF\n>>> X, y = load_iris(return_X_y=True)\n>>> kernel = 1.0 * RBF(1.0)\n>>> gpc = GaussianProcessClassifier(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpc.score(X, y)\n0.9866...\n>>> gpc.predict_proba(X[:2,:])\narray([[0.8354..., 0.03228..., 0.1322...],\n [0.7906..., 0.0652..., 0.1441...]])", + "code": "class RBF(StationaryKernelMixin, NormalizedKernelMixin, Kernel):\n \"\"\"Radial-basis function kernel (aka squared-exponential kernel).\n\n The RBF kernel is a stationary kernel. It is also known as the\n \"squared exponential\" kernel. It is parameterized by a length scale\n parameter :math:`l>0`, which can either be a scalar (isotropic variant\n of the kernel) or a vector with the same number of dimensions as the inputs\n X (anisotropic variant of the kernel). The kernel is given by:\n\n .. math::\n k(x_i, x_j) = \\\\exp\\\\left(- \\\\frac{d(x_i, x_j)^2}{2l^2} \\\\right)\n\n where :math:`l` is the length scale of the kernel and\n :math:`d(\\\\cdot,\\\\cdot)` is the Euclidean distance.\n For advice on how to set the length scale parameter, see e.g. [1]_.\n\n This kernel is infinitely differentiable, which implies that GPs with this\n kernel as covariance function have mean square derivatives of all orders,\n and are thus very smooth.\n See [2]_, Chapter 4, Section 4.2, for further details of the RBF kernel.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n length_scale : float or ndarray of shape (n_features,), default=1.0\n The length scale of the kernel. If a float, an isotropic kernel is\n used. If an array, an anisotropic kernel is used where each dimension\n of l defines the length-scale of the respective feature dimension.\n\n length_scale_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'length_scale'.\n If set to \"fixed\", 'length_scale' cannot be changed during\n hyperparameter tuning.\n\n References\n ----------\n .. [1] `David Duvenaud (2014). \"The Kernel Cookbook:\n Advice on Covariance functions\".\n `_\n\n .. [2] `Carl Edward Rasmussen, Christopher K. I. Williams (2006).\n \"Gaussian Processes for Machine Learning\". The MIT Press.\n `_\n\n Examples\n --------\n >>> from sklearn.datasets import load_iris\n >>> from sklearn.gaussian_process import GaussianProcessClassifier\n >>> from sklearn.gaussian_process.kernels import RBF\n >>> X, y = load_iris(return_X_y=True)\n >>> kernel = 1.0 * RBF(1.0)\n >>> gpc = GaussianProcessClassifier(kernel=kernel,\n ... random_state=0).fit(X, y)\n >>> gpc.score(X, y)\n 0.9866...\n >>> gpc.predict_proba(X[:2,:])\n array([[0.8354..., 0.03228..., 0.1322...],\n [0.7906..., 0.0652..., 0.1441...]])\n \"\"\"\n def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5)):\n self.length_scale = length_scale\n self.length_scale_bounds = length_scale_bounds\n\n @property\n def anisotropic(self):\n return np.iterable(self.length_scale) and len(self.length_scale) > 1\n\n @property\n def hyperparameter_length_scale(self):\n if self.anisotropic:\n return Hyperparameter(\"length_scale\", \"numeric\",\n self.length_scale_bounds,\n len(self.length_scale))\n return Hyperparameter(\n \"length_scale\", \"numeric\", self.length_scale_bounds)\n\n def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\n Y : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\n eval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\n K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \\\n optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.\n \"\"\"\n X = np.atleast_2d(X)\n length_scale = _check_length_scale(X, self.length_scale)\n if Y is None:\n dists = pdist(X / length_scale, metric='sqeuclidean')\n K = np.exp(-.5 * dists)\n # convert from upper-triangular matrix to square matrix\n K = squareform(K)\n np.fill_diagonal(K, 1)\n else:\n if eval_gradient:\n raise ValueError(\n \"Gradient can only be evaluated when Y is None.\")\n dists = cdist(X / length_scale, Y / length_scale,\n metric='sqeuclidean')\n K = np.exp(-.5 * dists)\n\n if eval_gradient:\n if self.hyperparameter_length_scale.fixed:\n # Hyperparameter l kept fixed\n return K, np.empty((X.shape[0], X.shape[0], 0))\n elif not self.anisotropic or length_scale.shape[0] == 1:\n K_gradient = \\\n (K * squareform(dists))[:, :, np.newaxis]\n return K, K_gradient\n elif self.anisotropic:\n # We need to recompute the pairwise dimension-wise distances\n K_gradient = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2 \\\n / (length_scale ** 2)\n K_gradient *= K[..., np.newaxis]\n return K, K_gradient\n else:\n return K\n\n def __repr__(self):\n if self.anisotropic:\n return \"{0}(length_scale=[{1}])\".format(\n self.__class__.__name__, \", \".join(map(\"{0:.3g}\".format,\n self.length_scale)))\n else: # isotropic\n return \"{0}(length_scale={1:.3g})\".format(\n self.__class__.__name__, np.ravel(self.length_scale)[0])", + "instance_attributes": [ + { + "name": "length_scale", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "length_scale_bounds", + "types": { + "kind": "NamedType", + "name": "tuple" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RationalQuadratic", + "name": "RationalQuadratic", + "qname": "sklearn.gaussian_process.kernels.RationalQuadratic", + "decorators": [], + "superclasses": ["StationaryKernelMixin", "NormalizedKernelMixin", "Kernel"], + "methods": [ + "scikit-learn/sklearn.gaussian_process.kernels/RationalQuadratic/__init__", + "scikit-learn/sklearn.gaussian_process.kernels/RationalQuadratic/hyperparameter_length_scale@getter", + "scikit-learn/sklearn.gaussian_process.kernels/RationalQuadratic/hyperparameter_alpha@getter", + "scikit-learn/sklearn.gaussian_process.kernels/RationalQuadratic/__call__", + "scikit-learn/sklearn.gaussian_process.kernels/RationalQuadratic/__repr__" + ], + "is_public": true, + "reexported_by": [], + "description": "Rational Quadratic kernel.\n\nThe RationalQuadratic kernel can be seen as a scale mixture (an infinite\nsum) of RBF kernels with different characteristic length scales. It is\nparameterized by a length scale parameter :math:`l>0` and a scale\nmixture parameter :math:`\\alpha>0`. Only the isotropic variant\nwhere length_scale :math:`l` is a scalar is supported at the moment.\nThe kernel is given by:\n\n.. math::\n k(x_i, x_j) = \\left(\n 1 + \\frac{d(x_i, x_j)^2 }{ 2\\alpha l^2}\\right)^{-\\alpha}\n\nwhere :math:`\\alpha` is the scale mixture parameter, :math:`l` is\nthe length scale of the kernel and :math:`d(\\cdot,\\cdot)` is the\nEuclidean distance.\nFor advice on how to set the parameters, see e.g. [1]_.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "Rational Quadratic kernel.\n\nThe RationalQuadratic kernel can be seen as a scale mixture (an infinite\nsum) of RBF kernels with different characteristic length scales. It is\nparameterized by a length scale parameter :math:`l>0` and a scale\nmixture parameter :math:`\\alpha>0`. Only the isotropic variant\nwhere length_scale :math:`l` is a scalar is supported at the moment.\nThe kernel is given by:\n\n.. math::\n k(x_i, x_j) = \\left(\n 1 + \\frac{d(x_i, x_j)^2 }{ 2\\alpha l^2}\\right)^{-\\alpha}\n\nwhere :math:`\\alpha` is the scale mixture parameter, :math:`l` is\nthe length scale of the kernel and :math:`d(\\cdot,\\cdot)` is the\nEuclidean distance.\nFor advice on how to set the parameters, see e.g. [1]_.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nlength_scale : float > 0, default=1.0\n The length scale of the kernel.\n\nalpha : float > 0, default=1.0\n Scale mixture parameter\n\nlength_scale_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'length_scale'.\n If set to \"fixed\", 'length_scale' cannot be changed during\n hyperparameter tuning.\n\nalpha_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'alpha'.\n If set to \"fixed\", 'alpha' cannot be changed during\n hyperparameter tuning.\n\nReferences\n----------\n.. [1] `David Duvenaud (2014). \"The Kernel Cookbook:\n Advice on Covariance functions\".\n `_\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.gaussian_process import GaussianProcessClassifier\n>>> from sklearn.gaussian_process.kernels import Matern\n>>> X, y = load_iris(return_X_y=True)\n>>> kernel = RationalQuadratic(length_scale=1.0, alpha=1.5)\n>>> gpc = GaussianProcessClassifier(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpc.score(X, y)\n0.9733...\n>>> gpc.predict_proba(X[:2,:])\narray([[0.8881..., 0.0566..., 0.05518...],\n [0.8678..., 0.0707... , 0.0614...]])", + "code": "class RationalQuadratic(StationaryKernelMixin, NormalizedKernelMixin, Kernel):\n \"\"\"Rational Quadratic kernel.\n\n The RationalQuadratic kernel can be seen as a scale mixture (an infinite\n sum) of RBF kernels with different characteristic length scales. It is\n parameterized by a length scale parameter :math:`l>0` and a scale\n mixture parameter :math:`\\\\alpha>0`. Only the isotropic variant\n where length_scale :math:`l` is a scalar is supported at the moment.\n The kernel is given by:\n\n .. math::\n k(x_i, x_j) = \\\\left(\n 1 + \\\\frac{d(x_i, x_j)^2 }{ 2\\\\alpha l^2}\\\\right)^{-\\\\alpha}\n\n where :math:`\\\\alpha` is the scale mixture parameter, :math:`l` is\n the length scale of the kernel and :math:`d(\\\\cdot,\\\\cdot)` is the\n Euclidean distance.\n For advice on how to set the parameters, see e.g. [1]_.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n length_scale : float > 0, default=1.0\n The length scale of the kernel.\n\n alpha : float > 0, default=1.0\n Scale mixture parameter\n\n length_scale_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'length_scale'.\n If set to \"fixed\", 'length_scale' cannot be changed during\n hyperparameter tuning.\n\n alpha_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'alpha'.\n If set to \"fixed\", 'alpha' cannot be changed during\n hyperparameter tuning.\n\n References\n ----------\n .. [1] `David Duvenaud (2014). \"The Kernel Cookbook:\n Advice on Covariance functions\".\n `_\n\n Examples\n --------\n >>> from sklearn.datasets import load_iris\n >>> from sklearn.gaussian_process import GaussianProcessClassifier\n >>> from sklearn.gaussian_process.kernels import Matern\n >>> X, y = load_iris(return_X_y=True)\n >>> kernel = RationalQuadratic(length_scale=1.0, alpha=1.5)\n >>> gpc = GaussianProcessClassifier(kernel=kernel,\n ... random_state=0).fit(X, y)\n >>> gpc.score(X, y)\n 0.9733...\n >>> gpc.predict_proba(X[:2,:])\n array([[0.8881..., 0.0566..., 0.05518...],\n [0.8678..., 0.0707... , 0.0614...]])\n \"\"\"\n def __init__(self, length_scale=1.0, alpha=1.0,\n length_scale_bounds=(1e-5, 1e5), alpha_bounds=(1e-5, 1e5)):\n self.length_scale = length_scale\n self.alpha = alpha\n self.length_scale_bounds = length_scale_bounds\n self.alpha_bounds = alpha_bounds\n\n @property\n def hyperparameter_length_scale(self):\n return Hyperparameter(\n \"length_scale\", \"numeric\", self.length_scale_bounds)\n\n @property\n def hyperparameter_alpha(self):\n return Hyperparameter(\"alpha\", \"numeric\", self.alpha_bounds)\n\n def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\n Y : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\n eval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\n K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims)\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when eval_gradient\n is True.\n \"\"\"\n if len(np.atleast_1d(self.length_scale)) > 1:\n raise AttributeError(\n \"RationalQuadratic kernel only supports isotropic version, \"\n \"please use a single scalar for length_scale\")\n X = np.atleast_2d(X)\n if Y is None:\n dists = squareform(pdist(X, metric='sqeuclidean'))\n tmp = dists / (2 * self.alpha * self.length_scale ** 2)\n base = (1 + tmp)\n K = base ** -self.alpha\n np.fill_diagonal(K, 1)\n else:\n if eval_gradient:\n raise ValueError(\n \"Gradient can only be evaluated when Y is None.\")\n dists = cdist(X, Y, metric='sqeuclidean')\n K = (1 + dists / (2 * self.alpha * self.length_scale ** 2)) \\\n ** -self.alpha\n\n if eval_gradient:\n # gradient with respect to length_scale\n if not self.hyperparameter_length_scale.fixed:\n length_scale_gradient = \\\n dists * K / (self.length_scale ** 2 * base)\n length_scale_gradient = length_scale_gradient[:, :, np.newaxis]\n else: # l is kept fixed\n length_scale_gradient = np.empty((K.shape[0], K.shape[1], 0))\n\n # gradient with respect to alpha\n if not self.hyperparameter_alpha.fixed:\n alpha_gradient = \\\n K * (-self.alpha * np.log(base)\n + dists / (2 * self.length_scale ** 2 * base))\n alpha_gradient = alpha_gradient[:, :, np.newaxis]\n else: # alpha is kept fixed\n alpha_gradient = np.empty((K.shape[0], K.shape[1], 0))\n\n return K, np.dstack((alpha_gradient, length_scale_gradient))\n else:\n return K\n\n def __repr__(self):\n return \"{0}(alpha={1:.3g}, length_scale={2:.3g})\".format(\n self.__class__.__name__, self.alpha, self.length_scale)", + "instance_attributes": [ + { + "name": "length_scale", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "length_scale_bounds", + "types": { + "kind": "NamedType", + "name": "tuple" + } + }, + { + "name": "alpha_bounds", + "types": { + "kind": "NamedType", + "name": "tuple" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/StationaryKernelMixin", + "name": "StationaryKernelMixin", + "qname": "sklearn.gaussian_process.kernels.StationaryKernelMixin", + "decorators": [], + "superclasses": [], + "methods": ["scikit-learn/sklearn.gaussian_process.kernels/StationaryKernelMixin/is_stationary"], + "is_public": true, + "reexported_by": [], + "description": "Mixin for kernels which are stationary: k(X, Y)= f(X-Y).\n\n.. versionadded:: 0.18", + "docstring": "Mixin for kernels which are stationary: k(X, Y)= f(X-Y).\n\n.. versionadded:: 0.18", + "code": "class StationaryKernelMixin:\n \"\"\"Mixin for kernels which are stationary: k(X, Y)= f(X-Y).\n\n .. versionadded:: 0.18\n \"\"\"\n\n def is_stationary(self):\n \"\"\"Returns whether the kernel is stationary. \"\"\"\n return True", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Sum", + "name": "Sum", + "qname": "sklearn.gaussian_process.kernels.Sum", + "decorators": [], + "superclasses": ["KernelOperator"], + "methods": [ + "scikit-learn/sklearn.gaussian_process.kernels/Sum/__call__", + "scikit-learn/sklearn.gaussian_process.kernels/Sum/diag", + "scikit-learn/sklearn.gaussian_process.kernels/Sum/__repr__" + ], + "is_public": true, + "reexported_by": [], + "description": "The `Sum` kernel takes two kernels :math:`k_1` and :math:`k_2`\nand combines them via\n\n.. math::\n k_{sum}(X, Y) = k_1(X, Y) + k_2(X, Y)\n\nNote that the `__add__` magic method is overridden, so\n`Sum(RBF(), RBF())` is equivalent to using the + operator\nwith `RBF() + RBF()`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "The `Sum` kernel takes two kernels :math:`k_1` and :math:`k_2`\nand combines them via\n\n.. math::\n k_{sum}(X, Y) = k_1(X, Y) + k_2(X, Y)\n\nNote that the `__add__` magic method is overridden, so\n`Sum(RBF(), RBF())` is equivalent to using the + operator\nwith `RBF() + RBF()`.\n\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nk1 : Kernel\n The first base-kernel of the sum-kernel\n\nk2 : Kernel\n The second base-kernel of the sum-kernel\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import RBF, Sum, ConstantKernel\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = Sum(ConstantKernel(2), RBF())\n>>> gpr = GaussianProcessRegressor(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n1.0\n>>> kernel\n1.41**2 + RBF(length_scale=1)", + "code": "class Sum(KernelOperator):\n \"\"\"The `Sum` kernel takes two kernels :math:`k_1` and :math:`k_2`\n and combines them via\n\n .. math::\n k_{sum}(X, Y) = k_1(X, Y) + k_2(X, Y)\n\n Note that the `__add__` magic method is overridden, so\n `Sum(RBF(), RBF())` is equivalent to using the + operator\n with `RBF() + RBF()`.\n\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n k1 : Kernel\n The first base-kernel of the sum-kernel\n\n k2 : Kernel\n The second base-kernel of the sum-kernel\n\n Examples\n --------\n >>> from sklearn.datasets import make_friedman2\n >>> from sklearn.gaussian_process import GaussianProcessRegressor\n >>> from sklearn.gaussian_process.kernels import RBF, Sum, ConstantKernel\n >>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n >>> kernel = Sum(ConstantKernel(2), RBF())\n >>> gpr = GaussianProcessRegressor(kernel=kernel,\n ... random_state=0).fit(X, y)\n >>> gpr.score(X, y)\n 1.0\n >>> kernel\n 1.41**2 + RBF(length_scale=1)\n \"\"\"\n\n def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\n Y : array-like of shape (n_samples_X, n_features) or list of object,\\\n default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\n eval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\n K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\\\n optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.\n \"\"\"\n if eval_gradient:\n K1, K1_gradient = self.k1(X, Y, eval_gradient=True)\n K2, K2_gradient = self.k2(X, Y, eval_gradient=True)\n return K1 + K2, np.dstack((K1_gradient, K2_gradient))\n else:\n return self.k1(X, Y) + self.k2(X, Y)\n\n def diag(self, X):\n \"\"\"Returns the diagonal of the kernel k(X, X).\n\n The result of this method is identical to `np.diag(self(X))`; however,\n it can be evaluated more efficiently since only the diagonal is\n evaluated.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\n Returns\n -------\n K_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)\n \"\"\"\n return self.k1.diag(X) + self.k2.diag(X)\n\n def __repr__(self):\n return \"{0} + {1}\".format(self.k1, self.k2)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/WhiteKernel", + "name": "WhiteKernel", + "qname": "sklearn.gaussian_process.kernels.WhiteKernel", + "decorators": [], + "superclasses": ["StationaryKernelMixin", "GenericKernelMixin", "Kernel"], + "methods": [ + "scikit-learn/sklearn.gaussian_process.kernels/WhiteKernel/__init__", + "scikit-learn/sklearn.gaussian_process.kernels/WhiteKernel/hyperparameter_noise_level@getter", + "scikit-learn/sklearn.gaussian_process.kernels/WhiteKernel/__call__", + "scikit-learn/sklearn.gaussian_process.kernels/WhiteKernel/diag", + "scikit-learn/sklearn.gaussian_process.kernels/WhiteKernel/__repr__" + ], + "is_public": true, + "reexported_by": [], + "description": "White kernel.\n\nThe main use-case of this kernel is as part of a sum-kernel where it\nexplains the noise of the signal as independently and identically\nnormally-distributed. The parameter noise_level equals the variance of this\nnoise.\n\n.. math::\n k(x_1, x_2) = noise\\_level \\text{ if } x_i == x_j \\text{ else } 0\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "White kernel.\n\nThe main use-case of this kernel is as part of a sum-kernel where it\nexplains the noise of the signal as independently and identically\nnormally-distributed. The parameter noise_level equals the variance of this\nnoise.\n\n.. math::\n k(x_1, x_2) = noise\\_level \\text{ if } x_i == x_j \\text{ else } 0\n\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nnoise_level : float, default=1.0\n Parameter controlling the noise level (variance)\n\nnoise_level_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'noise_level'.\n If set to \"fixed\", 'noise_level' cannot be changed during\n hyperparameter tuning.\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = DotProduct() + WhiteKernel(noise_level=0.5)\n>>> gpr = GaussianProcessRegressor(kernel=kernel,\n... random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.3680...\n>>> gpr.predict(X[:2,:], return_std=True)\n(array([653.0..., 592.1... ]), array([316.6..., 316.6...]))", + "code": "class WhiteKernel(StationaryKernelMixin, GenericKernelMixin,\n Kernel):\n \"\"\"White kernel.\n\n The main use-case of this kernel is as part of a sum-kernel where it\n explains the noise of the signal as independently and identically\n normally-distributed. The parameter noise_level equals the variance of this\n noise.\n\n .. math::\n k(x_1, x_2) = noise\\\\_level \\\\text{ if } x_i == x_j \\\\text{ else } 0\n\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n noise_level : float, default=1.0\n Parameter controlling the noise level (variance)\n\n noise_level_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n The lower and upper bound on 'noise_level'.\n If set to \"fixed\", 'noise_level' cannot be changed during\n hyperparameter tuning.\n\n Examples\n --------\n >>> from sklearn.datasets import make_friedman2\n >>> from sklearn.gaussian_process import GaussianProcessRegressor\n >>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel\n >>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n >>> kernel = DotProduct() + WhiteKernel(noise_level=0.5)\n >>> gpr = GaussianProcessRegressor(kernel=kernel,\n ... random_state=0).fit(X, y)\n >>> gpr.score(X, y)\n 0.3680...\n >>> gpr.predict(X[:2,:], return_std=True)\n (array([653.0..., 592.1... ]), array([316.6..., 316.6...]))\n \"\"\"\n def __init__(self, noise_level=1.0, noise_level_bounds=(1e-5, 1e5)):\n self.noise_level = noise_level\n self.noise_level_bounds = noise_level_bounds\n\n @property\n def hyperparameter_noise_level(self):\n return Hyperparameter(\n \"noise_level\", \"numeric\", self.noise_level_bounds)\n\n def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\n Y : array-like of shape (n_samples_X, n_features) or list of object,\\\n default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\n eval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\n K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\\\n optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when eval_gradient\n is True.\n \"\"\"\n if Y is not None and eval_gradient:\n raise ValueError(\"Gradient can only be evaluated when Y is None.\")\n\n if Y is None:\n K = self.noise_level * np.eye(_num_samples(X))\n if eval_gradient:\n if not self.hyperparameter_noise_level.fixed:\n return (K, self.noise_level\n * np.eye(_num_samples(X))[:, :, np.newaxis])\n else:\n return K, np.empty((_num_samples(X), _num_samples(X), 0))\n else:\n return K\n else:\n return np.zeros((_num_samples(X), _num_samples(Y)))\n\n def diag(self, X):\n \"\"\"Returns the diagonal of the kernel k(X, X).\n\n The result of this method is identical to np.diag(self(X)); however,\n it can be evaluated more efficiently since only the diagonal is\n evaluated.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\n Returns\n -------\n K_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)\n \"\"\"\n return np.full(_num_samples(X), self.noise_level,\n dtype=np.array(self.noise_level).dtype)\n\n def __repr__(self):\n return \"{0}(noise_level={1:.3g})\".format(self.__class__.__name__,\n self.noise_level)", + "instance_attributes": [ + { + "name": "noise_level", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "noise_level_bounds", + "types": { + "kind": "NamedType", + "name": "tuple" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator", + "name": "MissingIndicator", + "qname": "sklearn.impute._base.MissingIndicator", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.impute._base/MissingIndicator/__init__", + "scikit-learn/sklearn.impute._base/MissingIndicator/_get_missing_features_info", + "scikit-learn/sklearn.impute._base/MissingIndicator/_validate_input", + "scikit-learn/sklearn.impute._base/MissingIndicator/_fit", + "scikit-learn/sklearn.impute._base/MissingIndicator/fit", + "scikit-learn/sklearn.impute._base/MissingIndicator/transform", + "scikit-learn/sklearn.impute._base/MissingIndicator/fit_transform", + "scikit-learn/sklearn.impute._base/MissingIndicator/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Binary indicators for missing values.\n\nNote that this component typically should not be used in a vanilla\n:class:`Pipeline` consisting of transformers and a classifier, but rather\ncould be added using a :class:`FeatureUnion` or :class:`ColumnTransformer`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20", + "docstring": "Binary indicators for missing values.\n\nNote that this component typically should not be used in a vanilla\n:class:`Pipeline` consisting of transformers and a classifier, but rather\ncould be added using a :class:`FeatureUnion` or :class:`ColumnTransformer`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nmissing_values : int, float, string, np.nan or None, default=np.nan\n The placeholder for the missing values. All occurrences of\n `missing_values` will be imputed. For pandas' dataframes with\n nullable integer dtypes with missing values, `missing_values`\n should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.\n\nfeatures : {'missing-only', 'all'}, default='missing-only'\n Whether the imputer mask should represent all or a subset of\n features.\n\n - If 'missing-only' (default), the imputer mask will only represent\n features containing missing values during fit time.\n - If 'all', the imputer mask will represent all features.\n\nsparse : bool or 'auto', default='auto'\n Whether the imputer mask format should be sparse or dense.\n\n - If 'auto' (default), the imputer mask will be of same type as\n input.\n - If True, the imputer mask will be a sparse matrix.\n - If False, the imputer mask will be a numpy array.\n\nerror_on_new : bool, default=True\n If True, transform will raise an error when there are features with\n missing values in transform that have no missing values in fit. This is\n applicable only when `features='missing-only'`.\n\nAttributes\n----------\nfeatures_ : ndarray, shape (n_missing_features,) or (n_features,)\n The features indices which will be returned when calling ``transform``.\n They are computed during ``fit``. For ``features='all'``, it is\n to ``range(n_features)``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.impute import MissingIndicator\n>>> X1 = np.array([[np.nan, 1, 3],\n... [4, 0, np.nan],\n... [8, 1, 0]])\n>>> X2 = np.array([[5, 1, np.nan],\n... [np.nan, 2, 3],\n... [2, 4, 0]])\n>>> indicator = MissingIndicator()\n>>> indicator.fit(X1)\nMissingIndicator()\n>>> X2_tr = indicator.transform(X2)\n>>> X2_tr\narray([[False, True],\n [ True, False],\n [False, False]])", + "code": "class MissingIndicator(TransformerMixin, BaseEstimator):\n \"\"\"Binary indicators for missing values.\n\n Note that this component typically should not be used in a vanilla\n :class:`Pipeline` consisting of transformers and a classifier, but rather\n could be added using a :class:`FeatureUnion` or :class:`ColumnTransformer`.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.20\n\n Parameters\n ----------\n missing_values : int, float, string, np.nan or None, default=np.nan\n The placeholder for the missing values. All occurrences of\n `missing_values` will be imputed. For pandas' dataframes with\n nullable integer dtypes with missing values, `missing_values`\n should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.\n\n features : {'missing-only', 'all'}, default='missing-only'\n Whether the imputer mask should represent all or a subset of\n features.\n\n - If 'missing-only' (default), the imputer mask will only represent\n features containing missing values during fit time.\n - If 'all', the imputer mask will represent all features.\n\n sparse : bool or 'auto', default='auto'\n Whether the imputer mask format should be sparse or dense.\n\n - If 'auto' (default), the imputer mask will be of same type as\n input.\n - If True, the imputer mask will be a sparse matrix.\n - If False, the imputer mask will be a numpy array.\n\n error_on_new : bool, default=True\n If True, transform will raise an error when there are features with\n missing values in transform that have no missing values in fit. This is\n applicable only when `features='missing-only'`.\n\n Attributes\n ----------\n features_ : ndarray, shape (n_missing_features,) or (n_features,)\n The features indices which will be returned when calling ``transform``.\n They are computed during ``fit``. For ``features='all'``, it is\n to ``range(n_features)``.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.impute import MissingIndicator\n >>> X1 = np.array([[np.nan, 1, 3],\n ... [4, 0, np.nan],\n ... [8, 1, 0]])\n >>> X2 = np.array([[5, 1, np.nan],\n ... [np.nan, 2, 3],\n ... [2, 4, 0]])\n >>> indicator = MissingIndicator()\n >>> indicator.fit(X1)\n MissingIndicator()\n >>> X2_tr = indicator.transform(X2)\n >>> X2_tr\n array([[False, True],\n [ True, False],\n [False, False]])\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, missing_values=np.nan, features=\"missing-only\",\n sparse=\"auto\", error_on_new=True):\n self.missing_values = missing_values\n self.features = features\n self.sparse = sparse\n self.error_on_new = error_on_new\n\n def _get_missing_features_info(self, X):\n \"\"\"Compute the imputer mask and the indices of the features\n containing missing values.\n\n Parameters\n ----------\n X : {ndarray or sparse matrix}, shape (n_samples, n_features)\n The input data with missing values. Note that ``X`` has been\n checked in ``fit`` and ``transform`` before to call this function.\n\n Returns\n -------\n imputer_mask : {ndarray or sparse matrix}, shape \\\n (n_samples, n_features)\n The imputer mask of the original data.\n\n features_with_missing : ndarray, shape (n_features_with_missing)\n The features containing missing values.\n\n \"\"\"\n if not self._precomputed:\n imputer_mask = _get_mask(X, self.missing_values)\n else:\n imputer_mask = X\n\n if sp.issparse(X):\n imputer_mask.eliminate_zeros()\n\n if self.features == 'missing-only':\n n_missing = imputer_mask.getnnz(axis=0)\n\n if self.sparse is False:\n imputer_mask = imputer_mask.toarray()\n elif imputer_mask.format == 'csr':\n imputer_mask = imputer_mask.tocsc()\n else:\n if not self._precomputed:\n imputer_mask = _get_mask(X, self.missing_values)\n else:\n imputer_mask = X\n\n if self.features == 'missing-only':\n n_missing = imputer_mask.sum(axis=0)\n\n if self.sparse is True:\n imputer_mask = sp.csc_matrix(imputer_mask)\n\n if self.features == 'all':\n features_indices = np.arange(X.shape[1])\n else:\n features_indices = np.flatnonzero(n_missing)\n\n return imputer_mask, features_indices\n\n def _validate_input(self, X, in_fit):\n if not is_scalar_nan(self.missing_values):\n force_all_finite = True\n else:\n force_all_finite = \"allow-nan\"\n X = self._validate_data(X, reset=in_fit,\n accept_sparse=('csc', 'csr'), dtype=None,\n force_all_finite=force_all_finite)\n _check_inputs_dtype(X, self.missing_values)\n if X.dtype.kind not in (\"i\", \"u\", \"f\", \"O\"):\n raise ValueError(\"MissingIndicator does not support data with \"\n \"dtype {0}. Please provide either a numeric array\"\n \" (with a floating point or integer dtype) or \"\n \"categorical data represented either as an array \"\n \"with integer dtype or an array of string values \"\n \"with an object dtype.\".format(X.dtype))\n\n if sp.issparse(X) and self.missing_values == 0:\n # missing_values = 0 not allowed with sparse data as it would\n # force densification\n raise ValueError(\"Sparse input with missing_values=0 is \"\n \"not supported. Provide a dense \"\n \"array instead.\")\n\n return X\n\n def _fit(self, X, y=None, precomputed=False):\n \"\"\"Fit the transformer on X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n Input data, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features.\n If `precomputed` is True, then `X` is a mask of the\n input data.\n\n precomputed : bool\n Whether the input data is a mask.\n\n Returns\n -------\n imputer_mask : {ndarray or sparse matrix}, shape (n_samples, \\\n n_features)\n The imputer mask of the original data.\n\n \"\"\"\n if precomputed:\n if not (hasattr(X, 'dtype') and X.dtype.kind == 'b'):\n raise ValueError(\"precomputed is True but the input data is \"\n \"not a mask\")\n self._precomputed = True\n else:\n self._precomputed = False\n\n # Need not validate X again as it would have already been validated\n # in the Imputer calling MissingIndicator\n if not self._precomputed:\n X = self._validate_input(X, in_fit=True)\n\n self._n_features = X.shape[1]\n\n if self.features not in ('missing-only', 'all'):\n raise ValueError(\"'features' has to be either 'missing-only' or \"\n \"'all'. Got {} instead.\".format(self.features))\n\n if not ((isinstance(self.sparse, str) and\n self.sparse == \"auto\") or isinstance(self.sparse, bool)):\n raise ValueError(\"'sparse' has to be a boolean or 'auto'. \"\n \"Got {!r} instead.\".format(self.sparse))\n\n missing_features_info = self._get_missing_features_info(X)\n self.features_ = missing_features_info[1]\n\n return missing_features_info[0]\n\n def fit(self, X, y=None):\n \"\"\"Fit the transformer on X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n Input data, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features.\n\n Returns\n -------\n self : object\n Returns self.\n \"\"\"\n self._fit(X, y)\n\n return self\n\n def transform(self, X):\n \"\"\"Generate missing values indicator for X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data to complete.\n\n Returns\n -------\n Xt : {ndarray or sparse matrix}, shape (n_samples, n_features) \\\n or (n_samples, n_features_with_missing)\n The missing indicator for input data. The data type of ``Xt``\n will be boolean.\n\n \"\"\"\n check_is_fitted(self)\n\n # Need not validate X again as it would have already been validated\n # in the Imputer calling MissingIndicator\n if not self._precomputed:\n X = self._validate_input(X, in_fit=False)\n else:\n if not (hasattr(X, 'dtype') and X.dtype.kind == 'b'):\n raise ValueError(\"precomputed is True but the input data is \"\n \"not a mask\")\n\n imputer_mask, features = self._get_missing_features_info(X)\n\n if self.features == \"missing-only\":\n features_diff_fit_trans = np.setdiff1d(features, self.features_)\n if (self.error_on_new and features_diff_fit_trans.size > 0):\n raise ValueError(\"The features {} have missing values \"\n \"in transform but have no missing values \"\n \"in fit.\".format(features_diff_fit_trans))\n\n if self.features_.size < self._n_features:\n imputer_mask = imputer_mask[:, self.features_]\n\n return imputer_mask\n\n def fit_transform(self, X, y=None):\n \"\"\"Generate missing values indicator for X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data to complete.\n\n Returns\n -------\n Xt : {ndarray or sparse matrix}, shape (n_samples, n_features) \\\n or (n_samples, n_features_with_missing)\n The missing indicator for input data. The data type of ``Xt``\n will be boolean.\n\n \"\"\"\n imputer_mask = self._fit(X, y)\n\n if self.features_.size < self._n_features:\n imputer_mask = imputer_mask[:, self.features_]\n\n return imputer_mask\n\n def _more_tags(self):\n return {\n \"allow_nan\": True,\n \"X_types\": [\"2darray\", \"string\"],\n \"preserves_dtype\": [],\n }", + "instance_attributes": [ + { + "name": "features", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "sparse", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "error_on_new", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "_precomputed", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer", + "name": "SimpleImputer", + "qname": "sklearn.impute._base.SimpleImputer", + "decorators": [], + "superclasses": ["_BaseImputer"], + "methods": [ + "scikit-learn/sklearn.impute._base/SimpleImputer/__init__", + "scikit-learn/sklearn.impute._base/SimpleImputer/_validate_input", + "scikit-learn/sklearn.impute._base/SimpleImputer/fit", + "scikit-learn/sklearn.impute._base/SimpleImputer/_sparse_fit", + "scikit-learn/sklearn.impute._base/SimpleImputer/_dense_fit", + "scikit-learn/sklearn.impute._base/SimpleImputer/transform", + "scikit-learn/sklearn.impute._base/SimpleImputer/inverse_transform" + ], + "is_public": false, + "reexported_by": [], + "description": "Imputation transformer for completing missing values.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n `SimpleImputer` replaces the previous `sklearn.preprocessing.Imputer`\n estimator which is now removed.", + "docstring": "Imputation transformer for completing missing values.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n `SimpleImputer` replaces the previous `sklearn.preprocessing.Imputer`\n estimator which is now removed.\n\nParameters\n----------\nmissing_values : int, float, str, np.nan or None, default=np.nan\n The placeholder for the missing values. All occurrences of\n `missing_values` will be imputed. For pandas' dataframes with\n nullable integer dtypes with missing values, `missing_values`\n should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.\n\nstrategy : string, default='mean'\n The imputation strategy.\n\n - If \"mean\", then replace missing values using the mean along\n each column. Can only be used with numeric data.\n - If \"median\", then replace missing values using the median along\n each column. Can only be used with numeric data.\n - If \"most_frequent\", then replace missing using the most frequent\n value along each column. Can be used with strings or numeric data.\n If there is more than one such value, only the smallest is returned.\n - If \"constant\", then replace missing values with fill_value. Can be\n used with strings or numeric data.\n\n .. versionadded:: 0.20\n strategy=\"constant\" for fixed value imputation.\n\nfill_value : string or numerical value, default=None\n When strategy == \"constant\", fill_value is used to replace all\n occurrences of missing_values.\n If left to the default, fill_value will be 0 when imputing numerical\n data and \"missing_value\" for strings or object data types.\n\nverbose : integer, default=0\n Controls the verbosity of the imputer.\n\ncopy : boolean, default=True\n If True, a copy of X will be created. If False, imputation will\n be done in-place whenever possible. Note that, in the following cases,\n a new copy will always be made, even if `copy=False`:\n\n - If X is not an array of floating values;\n - If X is encoded as a CSR matrix;\n - If add_indicator=True.\n\nadd_indicator : boolean, default=False\n If True, a :class:`MissingIndicator` transform will stack onto output\n of the imputer's transform. This allows a predictive estimator\n to account for missingness despite imputation. If a feature has no\n missing values at fit/train time, the feature won't appear on\n the missing indicator even if there are missing values at\n transform/test time.\n\nAttributes\n----------\nstatistics_ : array of shape (n_features,)\n The imputation fill value for each feature.\n Computing statistics can result in `np.nan` values.\n During :meth:`transform`, features corresponding to `np.nan`\n statistics will be discarded.\n\nindicator_ : :class:`~sklearn.impute.MissingIndicator`\n Indicator used to add binary indicators for missing values.\n ``None`` if add_indicator is False.\n\nSee Also\n--------\nIterativeImputer : Multivariate imputation of missing values.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.impute import SimpleImputer\n>>> imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')\n>>> imp_mean.fit([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])\nSimpleImputer()\n>>> X = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]\n>>> print(imp_mean.transform(X))\n[[ 7. 2. 3. ]\n [ 4. 3.5 6. ]\n [10. 3.5 9. ]]\n\nNotes\n-----\nColumns which only contained missing values at :meth:`fit` are discarded\nupon :meth:`transform` if strategy is not \"constant\".", + "code": "class SimpleImputer(_BaseImputer):\n \"\"\"Imputation transformer for completing missing values.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.20\n `SimpleImputer` replaces the previous `sklearn.preprocessing.Imputer`\n estimator which is now removed.\n\n Parameters\n ----------\n missing_values : int, float, str, np.nan or None, default=np.nan\n The placeholder for the missing values. All occurrences of\n `missing_values` will be imputed. For pandas' dataframes with\n nullable integer dtypes with missing values, `missing_values`\n should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.\n\n strategy : string, default='mean'\n The imputation strategy.\n\n - If \"mean\", then replace missing values using the mean along\n each column. Can only be used with numeric data.\n - If \"median\", then replace missing values using the median along\n each column. Can only be used with numeric data.\n - If \"most_frequent\", then replace missing using the most frequent\n value along each column. Can be used with strings or numeric data.\n If there is more than one such value, only the smallest is returned.\n - If \"constant\", then replace missing values with fill_value. Can be\n used with strings or numeric data.\n\n .. versionadded:: 0.20\n strategy=\"constant\" for fixed value imputation.\n\n fill_value : string or numerical value, default=None\n When strategy == \"constant\", fill_value is used to replace all\n occurrences of missing_values.\n If left to the default, fill_value will be 0 when imputing numerical\n data and \"missing_value\" for strings or object data types.\n\n verbose : integer, default=0\n Controls the verbosity of the imputer.\n\n copy : boolean, default=True\n If True, a copy of X will be created. If False, imputation will\n be done in-place whenever possible. Note that, in the following cases,\n a new copy will always be made, even if `copy=False`:\n\n - If X is not an array of floating values;\n - If X is encoded as a CSR matrix;\n - If add_indicator=True.\n\n add_indicator : boolean, default=False\n If True, a :class:`MissingIndicator` transform will stack onto output\n of the imputer's transform. This allows a predictive estimator\n to account for missingness despite imputation. If a feature has no\n missing values at fit/train time, the feature won't appear on\n the missing indicator even if there are missing values at\n transform/test time.\n\n Attributes\n ----------\n statistics_ : array of shape (n_features,)\n The imputation fill value for each feature.\n Computing statistics can result in `np.nan` values.\n During :meth:`transform`, features corresponding to `np.nan`\n statistics will be discarded.\n\n indicator_ : :class:`~sklearn.impute.MissingIndicator`\n Indicator used to add binary indicators for missing values.\n ``None`` if add_indicator is False.\n\n See Also\n --------\n IterativeImputer : Multivariate imputation of missing values.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.impute import SimpleImputer\n >>> imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')\n >>> imp_mean.fit([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])\n SimpleImputer()\n >>> X = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]\n >>> print(imp_mean.transform(X))\n [[ 7. 2. 3. ]\n [ 4. 3.5 6. ]\n [10. 3.5 9. ]]\n\n Notes\n -----\n Columns which only contained missing values at :meth:`fit` are discarded\n upon :meth:`transform` if strategy is not \"constant\".\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, missing_values=np.nan, strategy=\"mean\",\n fill_value=None, verbose=0, copy=True, add_indicator=False):\n super().__init__(\n missing_values=missing_values,\n add_indicator=add_indicator\n )\n self.strategy = strategy\n self.fill_value = fill_value\n self.verbose = verbose\n self.copy = copy\n\n def _validate_input(self, X, in_fit):\n allowed_strategies = [\"mean\", \"median\", \"most_frequent\", \"constant\"]\n if self.strategy not in allowed_strategies:\n raise ValueError(\"Can only use these strategies: {0} \"\n \" got strategy={1}\".format(allowed_strategies,\n self.strategy))\n\n if self.strategy in (\"most_frequent\", \"constant\"):\n # If input is a list of strings, dtype = object.\n # Otherwise ValueError is raised in SimpleImputer\n # with strategy='most_frequent' or 'constant'\n # because the list is converted to Unicode numpy array\n if isinstance(X, list) and \\\n any(isinstance(elem, str) for row in X for elem in row):\n dtype = object\n else:\n dtype = None\n else:\n dtype = FLOAT_DTYPES\n\n if not is_scalar_nan(self.missing_values):\n force_all_finite = True\n else:\n force_all_finite = \"allow-nan\"\n\n try:\n X = self._validate_data(X, reset=in_fit,\n accept_sparse='csc', dtype=dtype,\n force_all_finite=force_all_finite,\n copy=self.copy)\n except ValueError as ve:\n if \"could not convert\" in str(ve):\n new_ve = ValueError(\"Cannot use {} strategy with non-numeric \"\n \"data:\\n{}\".format(self.strategy, ve))\n raise new_ve from None\n else:\n raise ve\n\n _check_inputs_dtype(X, self.missing_values)\n if X.dtype.kind not in (\"i\", \"u\", \"f\", \"O\"):\n raise ValueError(\"SimpleImputer does not support data with dtype \"\n \"{0}. Please provide either a numeric array (with\"\n \" a floating point or integer dtype) or \"\n \"categorical data represented either as an array \"\n \"with integer dtype or an array of string values \"\n \"with an object dtype.\".format(X.dtype))\n\n return X\n\n def fit(self, X, y=None):\n \"\"\"Fit the imputer on X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n Input data, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features.\n\n Returns\n -------\n self : SimpleImputer\n \"\"\"\n X = self._validate_input(X, in_fit=True)\n\n # default fill_value is 0 for numerical input and \"missing_value\"\n # otherwise\n if self.fill_value is None:\n if X.dtype.kind in (\"i\", \"u\", \"f\"):\n fill_value = 0\n else:\n fill_value = \"missing_value\"\n else:\n fill_value = self.fill_value\n\n # fill_value should be numerical in case of numerical input\n if (self.strategy == \"constant\" and\n X.dtype.kind in (\"i\", \"u\", \"f\") and\n not isinstance(fill_value, numbers.Real)):\n raise ValueError(\"'fill_value'={0} is invalid. Expected a \"\n \"numerical value when imputing numerical \"\n \"data\".format(fill_value))\n\n if sp.issparse(X):\n # missing_values = 0 not allowed with sparse data as it would\n # force densification\n if self.missing_values == 0:\n raise ValueError(\"Imputation not possible when missing_values \"\n \"== 0 and input is sparse. Provide a dense \"\n \"array instead.\")\n else:\n self.statistics_ = self._sparse_fit(X,\n self.strategy,\n self.missing_values,\n fill_value)\n\n else:\n self.statistics_ = self._dense_fit(X,\n self.strategy,\n self.missing_values,\n fill_value)\n\n return self\n\n def _sparse_fit(self, X, strategy, missing_values, fill_value):\n \"\"\"Fit the transformer on sparse data.\"\"\"\n missing_mask = _get_mask(X, missing_values)\n mask_data = missing_mask.data\n n_implicit_zeros = X.shape[0] - np.diff(X.indptr)\n\n statistics = np.empty(X.shape[1])\n\n if strategy == \"constant\":\n # for constant strategy, self.statistcs_ is used to store\n # fill_value in each column\n statistics.fill(fill_value)\n else:\n for i in range(X.shape[1]):\n column = X.data[X.indptr[i]:X.indptr[i + 1]]\n mask_column = mask_data[X.indptr[i]:X.indptr[i + 1]]\n column = column[~mask_column]\n\n # combine explicit and implicit zeros\n mask_zeros = _get_mask(column, 0)\n column = column[~mask_zeros]\n n_explicit_zeros = mask_zeros.sum()\n n_zeros = n_implicit_zeros[i] + n_explicit_zeros\n\n if strategy == \"mean\":\n s = column.size + n_zeros\n statistics[i] = np.nan if s == 0 else column.sum() / s\n\n elif strategy == \"median\":\n statistics[i] = _get_median(column,\n n_zeros)\n\n elif strategy == \"most_frequent\":\n statistics[i] = _most_frequent(column,\n 0,\n n_zeros)\n super()._fit_indicator(missing_mask)\n\n return statistics\n\n def _dense_fit(self, X, strategy, missing_values, fill_value):\n \"\"\"Fit the transformer on dense data.\"\"\"\n missing_mask = _get_mask(X, missing_values)\n masked_X = ma.masked_array(X, mask=missing_mask)\n\n super()._fit_indicator(missing_mask)\n\n # Mean\n if strategy == \"mean\":\n mean_masked = np.ma.mean(masked_X, axis=0)\n # Avoid the warning \"Warning: converting a masked element to nan.\"\n mean = np.ma.getdata(mean_masked)\n mean[np.ma.getmask(mean_masked)] = np.nan\n\n return mean\n\n # Median\n elif strategy == \"median\":\n median_masked = np.ma.median(masked_X, axis=0)\n # Avoid the warning \"Warning: converting a masked element to nan.\"\n median = np.ma.getdata(median_masked)\n median[np.ma.getmaskarray(median_masked)] = np.nan\n\n return median\n\n # Most frequent\n elif strategy == \"most_frequent\":\n # Avoid use of scipy.stats.mstats.mode due to the required\n # additional overhead and slow benchmarking performance.\n # See Issue 14325 and PR 14399 for full discussion.\n\n # To be able access the elements by columns\n X = X.transpose()\n mask = missing_mask.transpose()\n\n if X.dtype.kind == \"O\":\n most_frequent = np.empty(X.shape[0], dtype=object)\n else:\n most_frequent = np.empty(X.shape[0])\n\n for i, (row, row_mask) in enumerate(zip(X[:], mask[:])):\n row_mask = np.logical_not(row_mask).astype(bool)\n row = row[row_mask]\n most_frequent[i] = _most_frequent(row, np.nan, 0)\n\n return most_frequent\n\n # Constant\n elif strategy == \"constant\":\n # for constant strategy, self.statistcs_ is used to store\n # fill_value in each column\n return np.full(X.shape[1], fill_value, dtype=X.dtype)\n\n def transform(self, X):\n \"\"\"Impute all missing values in X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data to complete.\n \"\"\"\n check_is_fitted(self)\n\n X = self._validate_input(X, in_fit=False)\n statistics = self.statistics_\n\n if X.shape[1] != statistics.shape[0]:\n raise ValueError(\"X has %d features per sample, expected %d\"\n % (X.shape[1], self.statistics_.shape[0]))\n\n # compute mask before eliminating invalid features\n missing_mask = _get_mask(X, self.missing_values)\n\n # Delete the invalid columns if strategy is not constant\n if self.strategy == \"constant\":\n valid_statistics = statistics\n valid_statistics_indexes = None\n else:\n # same as np.isnan but also works for object dtypes\n invalid_mask = _get_mask(statistics, np.nan)\n valid_mask = np.logical_not(invalid_mask)\n valid_statistics = statistics[valid_mask]\n valid_statistics_indexes = np.flatnonzero(valid_mask)\n\n if invalid_mask.any():\n missing = np.arange(X.shape[1])[invalid_mask]\n if self.verbose:\n warnings.warn(\"Deleting features without \"\n \"observed values: %s\" % missing)\n X = X[:, valid_statistics_indexes]\n\n # Do actual imputation\n if sp.issparse(X):\n if self.missing_values == 0:\n raise ValueError(\"Imputation not possible when missing_values \"\n \"== 0 and input is sparse. Provide a dense \"\n \"array instead.\")\n else:\n # if no invalid statistics are found, use the mask computed\n # before, else recompute mask\n if valid_statistics_indexes is None:\n mask = missing_mask.data\n else:\n mask = _get_mask(X.data, self.missing_values)\n indexes = np.repeat(\n np.arange(len(X.indptr) - 1, dtype=int),\n np.diff(X.indptr))[mask]\n\n X.data[mask] = valid_statistics[indexes].astype(X.dtype,\n copy=False)\n else:\n # use mask computed before eliminating invalid mask\n if valid_statistics_indexes is None:\n mask_valid_features = missing_mask\n else:\n mask_valid_features = missing_mask[:, valid_statistics_indexes]\n n_missing = np.sum(mask_valid_features, axis=0)\n values = np.repeat(valid_statistics, n_missing)\n coordinates = np.where(mask_valid_features.transpose())[::-1]\n\n X[coordinates] = values\n\n X_indicator = super()._transform_indicator(missing_mask)\n\n return super()._concatenate_indicator(X, X_indicator)\n\n def inverse_transform(self, X):\n \"\"\"Convert the data back to the original representation.\n\n Inverts the `transform` operation performed on an array.\n This operation can only be performed after :class:`SimpleImputer` is\n instantiated with `add_indicator=True`.\n\n Note that ``inverse_transform`` can only invert the transform in\n features that have binary indicators for missing values. If a feature\n has no missing values at ``fit`` time, the feature won't have a binary\n indicator, and the imputation done at ``transform`` time won't be\n inverted.\n\n .. versionadded:: 0.24\n\n Parameters\n ----------\n X : array-like of shape \\\n (n_samples, n_features + n_features_missing_indicator)\n The imputed data to be reverted to original data. It has to be\n an augmented array of imputed data and the missing indicator mask.\n\n Returns\n -------\n X_original : ndarray of shape (n_samples, n_features)\n The original X with missing values as it was prior\n to imputation.\n \"\"\"\n check_is_fitted(self)\n\n if not self.add_indicator:\n raise ValueError(\"'inverse_transform' works only when \"\n \"'SimpleImputer' is instantiated with \"\n \"'add_indicator=True'. \"\n f\"Got 'add_indicator={self.add_indicator}' \"\n \"instead.\")\n\n n_features_missing = len(self.indicator_.features_)\n non_empty_feature_count = X.shape[1] - n_features_missing\n array_imputed = X[:, :non_empty_feature_count].copy()\n missing_mask = X[:, non_empty_feature_count:].astype(bool)\n\n n_features_original = len(self.statistics_)\n shape_original = (X.shape[0], n_features_original)\n X_original = np.zeros(shape_original)\n X_original[:, self.indicator_.features_] = missing_mask\n full_mask = X_original.astype(bool)\n\n imputed_idx, original_idx = 0, 0\n while imputed_idx < len(array_imputed.T):\n if not np.all(X_original[:, original_idx]):\n X_original[:, original_idx] = array_imputed.T[imputed_idx]\n imputed_idx += 1\n original_idx += 1\n else:\n original_idx += 1\n\n X_original[full_mask] = self.missing_values\n return X_original", + "instance_attributes": [ + { + "name": "strategy", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "copy", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "statistics_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.impute._base/_BaseImputer", + "name": "_BaseImputer", + "qname": "sklearn.impute._base._BaseImputer", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.impute._base/_BaseImputer/__init__", + "scikit-learn/sklearn.impute._base/_BaseImputer/_fit_indicator", + "scikit-learn/sklearn.impute._base/_BaseImputer/_transform_indicator", + "scikit-learn/sklearn.impute._base/_BaseImputer/_concatenate_indicator", + "scikit-learn/sklearn.impute._base/_BaseImputer/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for all imputers.\n\nIt adds automatically support for `add_indicator`.", + "docstring": "Base class for all imputers.\n\nIt adds automatically support for `add_indicator`.", + "code": "class _BaseImputer(TransformerMixin, BaseEstimator):\n \"\"\"Base class for all imputers.\n\n It adds automatically support for `add_indicator`.\n \"\"\"\n\n def __init__(self, *, missing_values=np.nan, add_indicator=False):\n self.missing_values = missing_values\n self.add_indicator = add_indicator\n\n def _fit_indicator(self, X):\n \"\"\"Fit a MissingIndicator.\"\"\"\n if self.add_indicator:\n self.indicator_ = MissingIndicator(\n missing_values=self.missing_values, error_on_new=False)\n self.indicator_._fit(X, precomputed=True)\n else:\n self.indicator_ = None\n\n def _transform_indicator(self, X):\n \"\"\"Compute the indicator mask.'\n\n Note that X must be the original data as passed to the imputer before\n any imputation, since imputation may be done inplace in some cases.\n \"\"\"\n if self.add_indicator:\n if not hasattr(self, 'indicator_'):\n raise ValueError(\n \"Make sure to call _fit_indicator before \"\n \"_transform_indicator\"\n )\n return self.indicator_.transform(X)\n\n def _concatenate_indicator(self, X_imputed, X_indicator):\n \"\"\"Concatenate indicator mask with the imputed data.\"\"\"\n if not self.add_indicator:\n return X_imputed\n\n hstack = sp.hstack if sp.issparse(X_imputed) else np.hstack\n if X_indicator is None:\n raise ValueError(\n \"Data from the missing indicator are not provided. Call \"\n \"_fit_indicator and _transform_indicator in the imputer \"\n \"implementation.\"\n )\n\n return hstack((X_imputed, X_indicator))\n\n def _more_tags(self):\n return {'allow_nan': is_scalar_nan(self.missing_values)}", + "instance_attributes": [ + { + "name": "add_indicator", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "indicator_", + "types": { + "kind": "NamedType", + "name": "MissingIndicator" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer", + "name": "IterativeImputer", + "qname": "sklearn.impute._iterative.IterativeImputer", + "decorators": [], + "superclasses": ["_BaseImputer"], + "methods": [ + "scikit-learn/sklearn.impute._iterative/IterativeImputer/__init__", + "scikit-learn/sklearn.impute._iterative/IterativeImputer/_impute_one_feature", + "scikit-learn/sklearn.impute._iterative/IterativeImputer/_get_neighbor_feat_idx", + "scikit-learn/sklearn.impute._iterative/IterativeImputer/_get_ordered_idx", + "scikit-learn/sklearn.impute._iterative/IterativeImputer/_get_abs_corr_mat", + "scikit-learn/sklearn.impute._iterative/IterativeImputer/_initial_imputation", + "scikit-learn/sklearn.impute._iterative/IterativeImputer/_validate_limit", + "scikit-learn/sklearn.impute._iterative/IterativeImputer/fit_transform", + "scikit-learn/sklearn.impute._iterative/IterativeImputer/transform", + "scikit-learn/sklearn.impute._iterative/IterativeImputer/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Multivariate imputer that estimates each feature from all the others.\n\nA strategy for imputing missing values by modeling each feature with\nmissing values as a function of other features in a round-robin fashion.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.21\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_iterative_imputer``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_iterative_imputer # noqa\n >>> # now you can import normally from sklearn.impute\n >>> from sklearn.impute import IterativeImputer", + "docstring": "Multivariate imputer that estimates each feature from all the others.\n\nA strategy for imputing missing values by modeling each feature with\nmissing values as a function of other features in a round-robin fashion.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.21\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_iterative_imputer``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_iterative_imputer # noqa\n >>> # now you can import normally from sklearn.impute\n >>> from sklearn.impute import IterativeImputer\n\nParameters\n----------\nestimator : estimator object, default=BayesianRidge()\n The estimator to use at each step of the round-robin imputation.\n If ``sample_posterior`` is True, the estimator must support\n ``return_std`` in its ``predict`` method.\n\nmissing_values : int, np.nan, default=np.nan\n The placeholder for the missing values. All occurrences of\n `missing_values` will be imputed. For pandas' dataframes with\n nullable integer dtypes with missing values, `missing_values`\n should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.\n\nsample_posterior : boolean, default=False\n Whether to sample from the (Gaussian) predictive posterior of the\n fitted estimator for each imputation. Estimator must support\n ``return_std`` in its ``predict`` method if set to ``True``. Set to\n ``True`` if using ``IterativeImputer`` for multiple imputations.\n\nmax_iter : int, default=10\n Maximum number of imputation rounds to perform before returning the\n imputations computed during the final round. A round is a single\n imputation of each feature with missing values. The stopping criterion\n is met once `max(abs(X_t - X_{t-1}))/max(abs(X[known_vals])) < tol`,\n where `X_t` is `X` at iteration `t`. Note that early stopping is only\n applied if ``sample_posterior=False``.\n\ntol : float, default=1e-3\n Tolerance of the stopping condition.\n\nn_nearest_features : int, default=None\n Number of other features to use to estimate the missing values of\n each feature column. Nearness between features is measured using\n the absolute correlation coefficient between each feature pair (after\n initial imputation). To ensure coverage of features throughout the\n imputation process, the neighbor features are not necessarily nearest,\n but are drawn with probability proportional to correlation for each\n imputed target feature. Can provide significant speed-up when the\n number of features is huge. If ``None``, all features will be used.\n\ninitial_strategy : str, default='mean'\n Which strategy to use to initialize the missing values. Same as the\n ``strategy`` parameter in :class:`~sklearn.impute.SimpleImputer`\n Valid values: {\"mean\", \"median\", \"most_frequent\", or \"constant\"}.\n\nimputation_order : str, default='ascending'\n The order in which the features will be imputed. Possible values:\n\n \"ascending\"\n From features with fewest missing values to most.\n \"descending\"\n From features with most missing values to fewest.\n \"roman\"\n Left to right.\n \"arabic\"\n Right to left.\n \"random\"\n A random order for each round.\n\nskip_complete : boolean, default=False\n If ``True`` then features with missing values during ``transform``\n which did not have any missing values during ``fit`` will be imputed\n with the initial imputation method only. Set to ``True`` if you have\n many features with no missing values at both ``fit`` and ``transform``\n time to save compute.\n\nmin_value : float or array-like of shape (n_features,), default=-np.inf\n Minimum possible imputed value. Broadcast to shape (n_features,) if\n scalar. If array-like, expects shape (n_features,), one min value for\n each feature. The default is `-np.inf`.\n\n .. versionchanged:: 0.23\n Added support for array-like.\n\nmax_value : float or array-like of shape (n_features,), default=np.inf\n Maximum possible imputed value. Broadcast to shape (n_features,) if\n scalar. If array-like, expects shape (n_features,), one max value for\n each feature. The default is `np.inf`.\n\n .. versionchanged:: 0.23\n Added support for array-like.\n\nverbose : int, default=0\n Verbosity flag, controls the debug messages that are issued\n as functions are evaluated. The higher, the more verbose. Can be 0, 1,\n or 2.\n\nrandom_state : int, RandomState instance or None, default=None\n The seed of the pseudo random number generator to use. Randomizes\n selection of estimator features if n_nearest_features is not None, the\n ``imputation_order`` if ``random``, and the sampling from posterior if\n ``sample_posterior`` is True. Use an integer for determinism.\n See :term:`the Glossary `.\n\nadd_indicator : boolean, default=False\n If True, a :class:`MissingIndicator` transform will stack onto output\n of the imputer's transform. This allows a predictive estimator\n to account for missingness despite imputation. If a feature has no\n missing values at fit/train time, the feature won't appear on\n the missing indicator even if there are missing values at\n transform/test time.\n\nAttributes\n----------\ninitial_imputer_ : object of type :class:`~sklearn.impute.SimpleImputer`\n Imputer used to initialize the missing values.\n\nimputation_sequence_ : list of tuples\n Each tuple has ``(feat_idx, neighbor_feat_idx, estimator)``, where\n ``feat_idx`` is the current feature to be imputed,\n ``neighbor_feat_idx`` is the array of other features used to impute the\n current feature, and ``estimator`` is the trained estimator used for\n the imputation. Length is ``self.n_features_with_missing_ *\n self.n_iter_``.\n\nn_iter_ : int\n Number of iteration rounds that occurred. Will be less than\n ``self.max_iter`` if early stopping criterion was reached.\n\nn_features_with_missing_ : int\n Number of features with missing values.\n\nindicator_ : :class:`~sklearn.impute.MissingIndicator`\n Indicator used to add binary indicators for missing values.\n ``None`` if add_indicator is False.\n\nrandom_state_ : RandomState instance\n RandomState instance that is generated either from a seed, the random\n number generator or by `np.random`.\n\nSee Also\n--------\nSimpleImputer : Univariate imputation of missing values.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.experimental import enable_iterative_imputer\n>>> from sklearn.impute import IterativeImputer\n>>> imp_mean = IterativeImputer(random_state=0)\n>>> imp_mean.fit([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])\nIterativeImputer(random_state=0)\n>>> X = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]\n>>> imp_mean.transform(X)\narray([[ 6.9584..., 2. , 3. ],\n [ 4. , 2.6000..., 6. ],\n [10. , 4.9999..., 9. ]])\n\nNotes\n-----\nTo support imputation in inductive mode we store each feature's estimator\nduring the ``fit`` phase, and predict without refitting (in order) during\nthe ``transform`` phase.\n\nFeatures which contain all missing values at ``fit`` are discarded upon\n``transform``.\n\nReferences\n----------\n.. [1] `Stef van Buuren, Karin Groothuis-Oudshoorn (2011). \"mice:\n Multivariate Imputation by Chained Equations in R\". Journal of\n Statistical Software 45: 1-67.\n `_\n\n.. [2] `S. F. Buck, (1960). \"A Method of Estimation of Missing Values in\n Multivariate Data Suitable for use with an Electronic Computer\".\n Journal of the Royal Statistical Society 22(2): 302-306.\n `_", + "code": "class IterativeImputer(_BaseImputer):\n \"\"\"Multivariate imputer that estimates each feature from all the others.\n\n A strategy for imputing missing values by modeling each feature with\n missing values as a function of other features in a round-robin fashion.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.21\n\n .. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_iterative_imputer``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_iterative_imputer # noqa\n >>> # now you can import normally from sklearn.impute\n >>> from sklearn.impute import IterativeImputer\n\n Parameters\n ----------\n estimator : estimator object, default=BayesianRidge()\n The estimator to use at each step of the round-robin imputation.\n If ``sample_posterior`` is True, the estimator must support\n ``return_std`` in its ``predict`` method.\n\n missing_values : int, np.nan, default=np.nan\n The placeholder for the missing values. All occurrences of\n `missing_values` will be imputed. For pandas' dataframes with\n nullable integer dtypes with missing values, `missing_values`\n should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.\n\n sample_posterior : boolean, default=False\n Whether to sample from the (Gaussian) predictive posterior of the\n fitted estimator for each imputation. Estimator must support\n ``return_std`` in its ``predict`` method if set to ``True``. Set to\n ``True`` if using ``IterativeImputer`` for multiple imputations.\n\n max_iter : int, default=10\n Maximum number of imputation rounds to perform before returning the\n imputations computed during the final round. A round is a single\n imputation of each feature with missing values. The stopping criterion\n is met once `max(abs(X_t - X_{t-1}))/max(abs(X[known_vals])) < tol`,\n where `X_t` is `X` at iteration `t`. Note that early stopping is only\n applied if ``sample_posterior=False``.\n\n tol : float, default=1e-3\n Tolerance of the stopping condition.\n\n n_nearest_features : int, default=None\n Number of other features to use to estimate the missing values of\n each feature column. Nearness between features is measured using\n the absolute correlation coefficient between each feature pair (after\n initial imputation). To ensure coverage of features throughout the\n imputation process, the neighbor features are not necessarily nearest,\n but are drawn with probability proportional to correlation for each\n imputed target feature. Can provide significant speed-up when the\n number of features is huge. If ``None``, all features will be used.\n\n initial_strategy : str, default='mean'\n Which strategy to use to initialize the missing values. Same as the\n ``strategy`` parameter in :class:`~sklearn.impute.SimpleImputer`\n Valid values: {\"mean\", \"median\", \"most_frequent\", or \"constant\"}.\n\n imputation_order : str, default='ascending'\n The order in which the features will be imputed. Possible values:\n\n \"ascending\"\n From features with fewest missing values to most.\n \"descending\"\n From features with most missing values to fewest.\n \"roman\"\n Left to right.\n \"arabic\"\n Right to left.\n \"random\"\n A random order for each round.\n\n skip_complete : boolean, default=False\n If ``True`` then features with missing values during ``transform``\n which did not have any missing values during ``fit`` will be imputed\n with the initial imputation method only. Set to ``True`` if you have\n many features with no missing values at both ``fit`` and ``transform``\n time to save compute.\n\n min_value : float or array-like of shape (n_features,), default=-np.inf\n Minimum possible imputed value. Broadcast to shape (n_features,) if\n scalar. If array-like, expects shape (n_features,), one min value for\n each feature. The default is `-np.inf`.\n\n .. versionchanged:: 0.23\n Added support for array-like.\n\n max_value : float or array-like of shape (n_features,), default=np.inf\n Maximum possible imputed value. Broadcast to shape (n_features,) if\n scalar. If array-like, expects shape (n_features,), one max value for\n each feature. The default is `np.inf`.\n\n .. versionchanged:: 0.23\n Added support for array-like.\n\n verbose : int, default=0\n Verbosity flag, controls the debug messages that are issued\n as functions are evaluated. The higher, the more verbose. Can be 0, 1,\n or 2.\n\n random_state : int, RandomState instance or None, default=None\n The seed of the pseudo random number generator to use. Randomizes\n selection of estimator features if n_nearest_features is not None, the\n ``imputation_order`` if ``random``, and the sampling from posterior if\n ``sample_posterior`` is True. Use an integer for determinism.\n See :term:`the Glossary `.\n\n add_indicator : boolean, default=False\n If True, a :class:`MissingIndicator` transform will stack onto output\n of the imputer's transform. This allows a predictive estimator\n to account for missingness despite imputation. If a feature has no\n missing values at fit/train time, the feature won't appear on\n the missing indicator even if there are missing values at\n transform/test time.\n\n Attributes\n ----------\n initial_imputer_ : object of type :class:`~sklearn.impute.SimpleImputer`\n Imputer used to initialize the missing values.\n\n imputation_sequence_ : list of tuples\n Each tuple has ``(feat_idx, neighbor_feat_idx, estimator)``, where\n ``feat_idx`` is the current feature to be imputed,\n ``neighbor_feat_idx`` is the array of other features used to impute the\n current feature, and ``estimator`` is the trained estimator used for\n the imputation. Length is ``self.n_features_with_missing_ *\n self.n_iter_``.\n\n n_iter_ : int\n Number of iteration rounds that occurred. Will be less than\n ``self.max_iter`` if early stopping criterion was reached.\n\n n_features_with_missing_ : int\n Number of features with missing values.\n\n indicator_ : :class:`~sklearn.impute.MissingIndicator`\n Indicator used to add binary indicators for missing values.\n ``None`` if add_indicator is False.\n\n random_state_ : RandomState instance\n RandomState instance that is generated either from a seed, the random\n number generator or by `np.random`.\n\n See Also\n --------\n SimpleImputer : Univariate imputation of missing values.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.experimental import enable_iterative_imputer\n >>> from sklearn.impute import IterativeImputer\n >>> imp_mean = IterativeImputer(random_state=0)\n >>> imp_mean.fit([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])\n IterativeImputer(random_state=0)\n >>> X = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]\n >>> imp_mean.transform(X)\n array([[ 6.9584..., 2. , 3. ],\n [ 4. , 2.6000..., 6. ],\n [10. , 4.9999..., 9. ]])\n\n Notes\n -----\n To support imputation in inductive mode we store each feature's estimator\n during the ``fit`` phase, and predict without refitting (in order) during\n the ``transform`` phase.\n\n Features which contain all missing values at ``fit`` are discarded upon\n ``transform``.\n\n References\n ----------\n .. [1] `Stef van Buuren, Karin Groothuis-Oudshoorn (2011). \"mice:\n Multivariate Imputation by Chained Equations in R\". Journal of\n Statistical Software 45: 1-67.\n `_\n\n .. [2] `S. F. Buck, (1960). \"A Method of Estimation of Missing Values in\n Multivariate Data Suitable for use with an Electronic Computer\".\n Journal of the Royal Statistical Society 22(2): 302-306.\n `_\n \"\"\"\n def __init__(self,\n estimator=None, *,\n missing_values=np.nan,\n sample_posterior=False,\n max_iter=10,\n tol=1e-3,\n n_nearest_features=None,\n initial_strategy=\"mean\",\n imputation_order='ascending',\n skip_complete=False,\n min_value=-np.inf,\n max_value=np.inf,\n verbose=0,\n random_state=None,\n add_indicator=False):\n super().__init__(\n missing_values=missing_values,\n add_indicator=add_indicator\n )\n\n self.estimator = estimator\n self.sample_posterior = sample_posterior\n self.max_iter = max_iter\n self.tol = tol\n self.n_nearest_features = n_nearest_features\n self.initial_strategy = initial_strategy\n self.imputation_order = imputation_order\n self.skip_complete = skip_complete\n self.min_value = min_value\n self.max_value = max_value\n self.verbose = verbose\n self.random_state = random_state\n\n def _impute_one_feature(self,\n X_filled,\n mask_missing_values,\n feat_idx,\n neighbor_feat_idx,\n estimator=None,\n fit_mode=True):\n \"\"\"Impute a single feature from the others provided.\n\n This function predicts the missing values of one of the features using\n the current estimates of all the other features. The ``estimator`` must\n support ``return_std=True`` in its ``predict`` method for this function\n to work.\n\n Parameters\n ----------\n X_filled : ndarray\n Input data with the most recent imputations.\n\n mask_missing_values : ndarray\n Input data's missing indicator matrix.\n\n feat_idx : int\n Index of the feature currently being imputed.\n\n neighbor_feat_idx : ndarray\n Indices of the features to be used in imputing ``feat_idx``.\n\n estimator : object\n The estimator to use at this step of the round-robin imputation.\n If ``sample_posterior`` is True, the estimator must support\n ``return_std`` in its ``predict`` method.\n If None, it will be cloned from self._estimator.\n\n fit_mode : boolean, default=True\n Whether to fit and predict with the estimator or just predict.\n\n Returns\n -------\n X_filled : ndarray\n Input data with ``X_filled[missing_row_mask, feat_idx]`` updated.\n\n estimator : estimator with sklearn API\n The fitted estimator used to impute\n ``X_filled[missing_row_mask, feat_idx]``.\n \"\"\"\n if estimator is None and fit_mode is False:\n raise ValueError(\"If fit_mode is False, then an already-fitted \"\n \"estimator should be passed in.\")\n\n if estimator is None:\n estimator = clone(self._estimator)\n\n missing_row_mask = mask_missing_values[:, feat_idx]\n if fit_mode:\n X_train = _safe_indexing(X_filled[:, neighbor_feat_idx],\n ~missing_row_mask)\n y_train = _safe_indexing(X_filled[:, feat_idx],\n ~missing_row_mask)\n estimator.fit(X_train, y_train)\n\n # if no missing values, don't predict\n if np.sum(missing_row_mask) == 0:\n return X_filled, estimator\n\n # get posterior samples if there is at least one missing value\n X_test = _safe_indexing(X_filled[:, neighbor_feat_idx],\n missing_row_mask)\n if self.sample_posterior:\n mus, sigmas = estimator.predict(X_test, return_std=True)\n imputed_values = np.zeros(mus.shape, dtype=X_filled.dtype)\n # two types of problems: (1) non-positive sigmas\n # (2) mus outside legal range of min_value and max_value\n # (results in inf sample)\n positive_sigmas = sigmas > 0\n imputed_values[~positive_sigmas] = mus[~positive_sigmas]\n mus_too_low = mus < self._min_value[feat_idx]\n imputed_values[mus_too_low] = self._min_value[feat_idx]\n mus_too_high = mus > self._max_value[feat_idx]\n imputed_values[mus_too_high] = self._max_value[feat_idx]\n # the rest can be sampled without statistical issues\n inrange_mask = positive_sigmas & ~mus_too_low & ~mus_too_high\n mus = mus[inrange_mask]\n sigmas = sigmas[inrange_mask]\n a = (self._min_value[feat_idx] - mus) / sigmas\n b = (self._max_value[feat_idx] - mus) / sigmas\n\n truncated_normal = stats.truncnorm(a=a, b=b,\n loc=mus, scale=sigmas)\n imputed_values[inrange_mask] = truncated_normal.rvs(\n random_state=self.random_state_)\n else:\n imputed_values = estimator.predict(X_test)\n imputed_values = np.clip(imputed_values,\n self._min_value[feat_idx],\n self._max_value[feat_idx])\n\n # update the feature\n X_filled[missing_row_mask, feat_idx] = imputed_values\n return X_filled, estimator\n\n def _get_neighbor_feat_idx(self,\n n_features,\n feat_idx,\n abs_corr_mat):\n \"\"\"Get a list of other features to predict ``feat_idx``.\n\n If self.n_nearest_features is less than or equal to the total\n number of features, then use a probability proportional to the absolute\n correlation between ``feat_idx`` and each other feature to randomly\n choose a subsample of the other features (without replacement).\n\n Parameters\n ----------\n n_features : int\n Number of features in ``X``.\n\n feat_idx : int\n Index of the feature currently being imputed.\n\n abs_corr_mat : ndarray, shape (n_features, n_features)\n Absolute correlation matrix of ``X``. The diagonal has been zeroed\n out and each feature has been normalized to sum to 1. Can be None.\n\n Returns\n -------\n neighbor_feat_idx : array-like\n The features to use to impute ``feat_idx``.\n \"\"\"\n if (self.n_nearest_features is not None and\n self.n_nearest_features < n_features):\n p = abs_corr_mat[:, feat_idx]\n neighbor_feat_idx = self.random_state_.choice(\n np.arange(n_features), self.n_nearest_features, replace=False,\n p=p)\n else:\n inds_left = np.arange(feat_idx)\n inds_right = np.arange(feat_idx + 1, n_features)\n neighbor_feat_idx = np.concatenate((inds_left, inds_right))\n return neighbor_feat_idx\n\n def _get_ordered_idx(self, mask_missing_values):\n \"\"\"Decide in what order we will update the features.\n\n As a homage to the MICE R package, we will have 4 main options of\n how to order the updates, and use a random order if anything else\n is specified.\n\n Also, this function skips features which have no missing values.\n\n Parameters\n ----------\n mask_missing_values : array-like, shape (n_samples, n_features)\n Input data's missing indicator matrix, where \"n_samples\" is the\n number of samples and \"n_features\" is the number of features.\n\n Returns\n -------\n ordered_idx : ndarray, shape (n_features,)\n The order in which to impute the features.\n \"\"\"\n frac_of_missing_values = mask_missing_values.mean(axis=0)\n if self.skip_complete:\n missing_values_idx = np.flatnonzero(frac_of_missing_values)\n else:\n missing_values_idx = np.arange(np.shape(frac_of_missing_values)[0])\n if self.imputation_order == 'roman':\n ordered_idx = missing_values_idx\n elif self.imputation_order == 'arabic':\n ordered_idx = missing_values_idx[::-1]\n elif self.imputation_order == 'ascending':\n n = len(frac_of_missing_values) - len(missing_values_idx)\n ordered_idx = np.argsort(frac_of_missing_values,\n kind='mergesort')[n:]\n elif self.imputation_order == 'descending':\n n = len(frac_of_missing_values) - len(missing_values_idx)\n ordered_idx = np.argsort(frac_of_missing_values,\n kind='mergesort')[n:][::-1]\n elif self.imputation_order == 'random':\n ordered_idx = missing_values_idx\n self.random_state_.shuffle(ordered_idx)\n else:\n raise ValueError(\"Got an invalid imputation order: '{0}'. It must \"\n \"be one of the following: 'roman', 'arabic', \"\n \"'ascending', 'descending', or \"\n \"'random'.\".format(self.imputation_order))\n return ordered_idx\n\n def _get_abs_corr_mat(self, X_filled, tolerance=1e-6):\n \"\"\"Get absolute correlation matrix between features.\n\n Parameters\n ----------\n X_filled : ndarray, shape (n_samples, n_features)\n Input data with the most recent imputations.\n\n tolerance : float, default=1e-6\n ``abs_corr_mat`` can have nans, which will be replaced\n with ``tolerance``.\n\n Returns\n -------\n abs_corr_mat : ndarray, shape (n_features, n_features)\n Absolute correlation matrix of ``X`` at the beginning of the\n current round. The diagonal has been zeroed out and each feature's\n absolute correlations with all others have been normalized to sum\n to 1.\n \"\"\"\n n_features = X_filled.shape[1]\n if (self.n_nearest_features is None or\n self.n_nearest_features >= n_features):\n return None\n with np.errstate(invalid='ignore'):\n # if a feature in the neighboorhood has only a single value\n # (e.g., categorical feature), the std. dev. will be null and\n # np.corrcoef will raise a warning due to a division by zero\n abs_corr_mat = np.abs(np.corrcoef(X_filled.T))\n # np.corrcoef is not defined for features with zero std\n abs_corr_mat[np.isnan(abs_corr_mat)] = tolerance\n # ensures exploration, i.e. at least some probability of sampling\n np.clip(abs_corr_mat, tolerance, None, out=abs_corr_mat)\n # features are not their own neighbors\n np.fill_diagonal(abs_corr_mat, 0)\n # needs to sum to 1 for np.random.choice sampling\n abs_corr_mat = normalize(abs_corr_mat, norm='l1', axis=0, copy=False)\n return abs_corr_mat\n\n def _initial_imputation(self, X, in_fit=False):\n \"\"\"Perform initial imputation for input X.\n\n Parameters\n ----------\n X : ndarray, shape (n_samples, n_features)\n Input data, where \"n_samples\" is the number of samples and\n \"n_features\" is the number of features.\n\n in_fit : bool, default=False\n Whether function is called in fit.\n\n Returns\n -------\n Xt : ndarray, shape (n_samples, n_features)\n Input data, where \"n_samples\" is the number of samples and\n \"n_features\" is the number of features.\n\n X_filled : ndarray, shape (n_samples, n_features)\n Input data with the most recent imputations.\n\n mask_missing_values : ndarray, shape (n_samples, n_features)\n Input data's missing indicator matrix, where \"n_samples\" is the\n number of samples and \"n_features\" is the number of features.\n\n X_missing_mask : ndarray, shape (n_samples, n_features)\n Input data's mask matrix indicating missing datapoints, where\n \"n_samples\" is the number of samples and \"n_features\" is the\n number of features.\n \"\"\"\n if is_scalar_nan(self.missing_values):\n force_all_finite = \"allow-nan\"\n else:\n force_all_finite = True\n\n X = self._validate_data(X, dtype=FLOAT_DTYPES, order=\"F\", reset=in_fit,\n force_all_finite=force_all_finite)\n _check_inputs_dtype(X, self.missing_values)\n\n X_missing_mask = _get_mask(X, self.missing_values)\n mask_missing_values = X_missing_mask.copy()\n if self.initial_imputer_ is None:\n self.initial_imputer_ = SimpleImputer(\n missing_values=self.missing_values,\n strategy=self.initial_strategy\n )\n X_filled = self.initial_imputer_.fit_transform(X)\n else:\n X_filled = self.initial_imputer_.transform(X)\n\n valid_mask = np.flatnonzero(np.logical_not(\n np.isnan(self.initial_imputer_.statistics_)))\n Xt = X[:, valid_mask]\n mask_missing_values = mask_missing_values[:, valid_mask]\n\n return Xt, X_filled, mask_missing_values, X_missing_mask\n\n @staticmethod\n def _validate_limit(limit, limit_type, n_features):\n \"\"\"Validate the limits (min/max) of the feature values\n Converts scalar min/max limits to vectors of shape (n_features,)\n\n Parameters\n ----------\n limit: scalar or array-like\n The user-specified limit (i.e, min_value or max_value)\n limit_type: string, \"max\" or \"min\"\n n_features: Number of features in the dataset\n\n Returns\n -------\n limit: ndarray, shape(n_features,)\n Array of limits, one for each feature\n \"\"\"\n limit_bound = np.inf if limit_type == \"max\" else -np.inf\n limit = limit_bound if limit is None else limit\n if np.isscalar(limit):\n limit = np.full(n_features, limit)\n limit = check_array(\n limit, force_all_finite=False, copy=False, ensure_2d=False\n )\n if not limit.shape[0] == n_features:\n raise ValueError(\n f\"'{limit_type}_value' should be of \"\n f\"shape ({n_features},) when an array-like \"\n f\"is provided. Got {limit.shape}, instead.\"\n )\n return limit\n\n def fit_transform(self, X, y=None):\n \"\"\"Fits the imputer on X and return the transformed X.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n Input data, where \"n_samples\" is the number of samples and\n \"n_features\" is the number of features.\n\n y : ignored.\n\n Returns\n -------\n Xt : array-like, shape (n_samples, n_features)\n The imputed input data.\n \"\"\"\n self.random_state_ = getattr(self, \"random_state_\",\n check_random_state(self.random_state))\n\n if self.max_iter < 0:\n raise ValueError(\n \"'max_iter' should be a positive integer. Got {} instead.\"\n .format(self.max_iter))\n\n if self.tol < 0:\n raise ValueError(\n \"'tol' should be a non-negative float. Got {} instead.\"\n .format(self.tol)\n )\n\n if self.estimator is None:\n from ..linear_model import BayesianRidge\n self._estimator = BayesianRidge()\n else:\n self._estimator = clone(self.estimator)\n\n self.imputation_sequence_ = []\n\n self.initial_imputer_ = None\n\n X, Xt, mask_missing_values, complete_mask = (\n self._initial_imputation(X, in_fit=True))\n\n super()._fit_indicator(complete_mask)\n X_indicator = super()._transform_indicator(complete_mask)\n\n if self.max_iter == 0 or np.all(mask_missing_values):\n self.n_iter_ = 0\n return super()._concatenate_indicator(Xt, X_indicator)\n\n # Edge case: a single feature. We return the initial ...\n if Xt.shape[1] == 1:\n self.n_iter_ = 0\n return super()._concatenate_indicator(Xt, X_indicator)\n\n self._min_value = self._validate_limit(\n self.min_value, \"min\", X.shape[1])\n self._max_value = self._validate_limit(\n self.max_value, \"max\", X.shape[1])\n\n if not np.all(np.greater(self._max_value, self._min_value)):\n raise ValueError(\n \"One (or more) features have min_value >= max_value.\")\n\n # order in which to impute\n # note this is probably too slow for large feature data (d > 100000)\n # and a better way would be good.\n # see: https://goo.gl/KyCNwj and subsequent comments\n ordered_idx = self._get_ordered_idx(mask_missing_values)\n self.n_features_with_missing_ = len(ordered_idx)\n\n abs_corr_mat = self._get_abs_corr_mat(Xt)\n\n n_samples, n_features = Xt.shape\n if self.verbose > 0:\n print(\"[IterativeImputer] Completing matrix with shape %s\"\n % (X.shape,))\n start_t = time()\n if not self.sample_posterior:\n Xt_previous = Xt.copy()\n normalized_tol = self.tol * np.max(\n np.abs(X[~mask_missing_values])\n )\n for self.n_iter_ in range(1, self.max_iter + 1):\n if self.imputation_order == 'random':\n ordered_idx = self._get_ordered_idx(mask_missing_values)\n\n for feat_idx in ordered_idx:\n neighbor_feat_idx = self._get_neighbor_feat_idx(n_features,\n feat_idx,\n abs_corr_mat)\n Xt, estimator = self._impute_one_feature(\n Xt, mask_missing_values, feat_idx, neighbor_feat_idx,\n estimator=None, fit_mode=True)\n estimator_triplet = _ImputerTriplet(feat_idx,\n neighbor_feat_idx,\n estimator)\n self.imputation_sequence_.append(estimator_triplet)\n\n if self.verbose > 1:\n print('[IterativeImputer] Ending imputation round '\n '%d/%d, elapsed time %0.2f'\n % (self.n_iter_, self.max_iter, time() - start_t))\n\n if not self.sample_posterior:\n inf_norm = np.linalg.norm(Xt - Xt_previous, ord=np.inf,\n axis=None)\n if self.verbose > 0:\n print('[IterativeImputer] '\n 'Change: {}, scaled tolerance: {} '.format(\n inf_norm, normalized_tol))\n if inf_norm < normalized_tol:\n if self.verbose > 0:\n print('[IterativeImputer] Early stopping criterion '\n 'reached.')\n break\n Xt_previous = Xt.copy()\n else:\n if not self.sample_posterior:\n warnings.warn(\"[IterativeImputer] Early stopping criterion not\"\n \" reached.\", ConvergenceWarning)\n Xt[~mask_missing_values] = X[~mask_missing_values]\n return super()._concatenate_indicator(Xt, X_indicator)\n\n def transform(self, X):\n \"\"\"Imputes all missing values in X.\n\n Note that this is stochastic, and that if random_state is not fixed,\n repeated calls, or permuted input, will yield different results.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input data to complete.\n\n Returns\n -------\n Xt : array-like, shape (n_samples, n_features)\n The imputed input data.\n \"\"\"\n check_is_fitted(self)\n\n X, Xt, mask_missing_values, complete_mask = self._initial_imputation(X)\n\n X_indicator = super()._transform_indicator(complete_mask)\n\n if self.n_iter_ == 0 or np.all(mask_missing_values):\n return super()._concatenate_indicator(Xt, X_indicator)\n\n imputations_per_round = len(self.imputation_sequence_) // self.n_iter_\n i_rnd = 0\n if self.verbose > 0:\n print(\"[IterativeImputer] Completing matrix with shape %s\"\n % (X.shape,))\n start_t = time()\n for it, estimator_triplet in enumerate(self.imputation_sequence_):\n Xt, _ = self._impute_one_feature(\n Xt,\n mask_missing_values,\n estimator_triplet.feat_idx,\n estimator_triplet.neighbor_feat_idx,\n estimator=estimator_triplet.estimator,\n fit_mode=False\n )\n if not (it + 1) % imputations_per_round:\n if self.verbose > 1:\n print('[IterativeImputer] Ending imputation round '\n '%d/%d, elapsed time %0.2f'\n % (i_rnd + 1, self.n_iter_, time() - start_t))\n i_rnd += 1\n\n Xt[~mask_missing_values] = X[~mask_missing_values]\n\n return super()._concatenate_indicator(Xt, X_indicator)\n\n def fit(self, X, y=None):\n \"\"\"Fits the imputer on X and return self.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n Input data, where \"n_samples\" is the number of samples and\n \"n_features\" is the number of features.\n\n y : ignored\n\n Returns\n -------\n self : object\n Returns self.\n \"\"\"\n self.fit_transform(X)\n return self", + "instance_attributes": [ + { + "name": "sample_posterior", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "initial_strategy", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "imputation_order", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "skip_complete", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "initial_imputer_", + "types": { + "kind": "NamedType", + "name": "SimpleImputer" + } + }, + { + "name": "imputation_sequence_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "n_iter_", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.impute._knn/KNNImputer", + "name": "KNNImputer", + "qname": "sklearn.impute._knn.KNNImputer", + "decorators": [], + "superclasses": ["_BaseImputer"], + "methods": [ + "scikit-learn/sklearn.impute._knn/KNNImputer/__init__", + "scikit-learn/sklearn.impute._knn/KNNImputer/_calc_impute", + "scikit-learn/sklearn.impute._knn/KNNImputer/fit", + "scikit-learn/sklearn.impute._knn/KNNImputer/transform" + ], + "is_public": false, + "reexported_by": [], + "description": "Imputation for completing missing values using k-Nearest Neighbors.\n\nEach sample's missing values are imputed using the mean value from\n`n_neighbors` nearest neighbors found in the training set. Two samples are\nclose if the features that neither is missing are close.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22", + "docstring": "Imputation for completing missing values using k-Nearest Neighbors.\n\nEach sample's missing values are imputed using the mean value from\n`n_neighbors` nearest neighbors found in the training set. Two samples are\nclose if the features that neither is missing are close.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nmissing_values : int, float, str, np.nan or None, default=np.nan\n The placeholder for the missing values. All occurrences of\n `missing_values` will be imputed. For pandas' dataframes with\n nullable integer dtypes with missing values, `missing_values`\n should be set to np.nan, since `pd.NA` will be converted to np.nan.\n\nn_neighbors : int, default=5\n Number of neighboring samples to use for imputation.\n\nweights : {'uniform', 'distance'} or callable, default='uniform'\n Weight function used in prediction. Possible values:\n\n - 'uniform' : uniform weights. All points in each neighborhood are\n weighted equally.\n - 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n - callable : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights.\n\nmetric : {'nan_euclidean'} or callable, default='nan_euclidean'\n Distance metric for searching neighbors. Possible values:\n\n - 'nan_euclidean'\n - callable : a user-defined function which conforms to the definition\n of ``_pairwise_callable(X, Y, metric, **kwds)``. The function\n accepts two arrays, X and Y, and a `missing_values` keyword in\n `kwds` and returns a scalar distance value.\n\ncopy : bool, default=True\n If True, a copy of X will be created. If False, imputation will\n be done in-place whenever possible.\n\nadd_indicator : bool, default=False\n If True, a :class:`MissingIndicator` transform will stack onto the\n output of the imputer's transform. This allows a predictive estimator\n to account for missingness despite imputation. If a feature has no\n missing values at fit/train time, the feature won't appear on the\n missing indicator even if there are missing values at transform/test\n time.\n\nAttributes\n----------\nindicator_ : :class:`~sklearn.impute.MissingIndicator`\n Indicator used to add binary indicators for missing values.\n ``None`` if add_indicator is False.\n\nReferences\n----------\n* Olga Troyanskaya, Michael Cantor, Gavin Sherlock, Pat Brown, Trevor\n Hastie, Robert Tibshirani, David Botstein and Russ B. Altman, Missing\n value estimation methods for DNA microarrays, BIOINFORMATICS Vol. 17\n no. 6, 2001 Pages 520-525.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.impute import KNNImputer\n>>> X = [[1, 2, np.nan], [3, 4, 3], [np.nan, 6, 5], [8, 8, 7]]\n>>> imputer = KNNImputer(n_neighbors=2)\n>>> imputer.fit_transform(X)\narray([[1. , 2. , 4. ],\n [3. , 4. , 3. ],\n [5.5, 6. , 5. ],\n [8. , 8. , 7. ]])", + "code": "class KNNImputer(_BaseImputer):\n \"\"\"Imputation for completing missing values using k-Nearest Neighbors.\n\n Each sample's missing values are imputed using the mean value from\n `n_neighbors` nearest neighbors found in the training set. Two samples are\n close if the features that neither is missing are close.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.22\n\n Parameters\n ----------\n missing_values : int, float, str, np.nan or None, default=np.nan\n The placeholder for the missing values. All occurrences of\n `missing_values` will be imputed. For pandas' dataframes with\n nullable integer dtypes with missing values, `missing_values`\n should be set to np.nan, since `pd.NA` will be converted to np.nan.\n\n n_neighbors : int, default=5\n Number of neighboring samples to use for imputation.\n\n weights : {'uniform', 'distance'} or callable, default='uniform'\n Weight function used in prediction. Possible values:\n\n - 'uniform' : uniform weights. All points in each neighborhood are\n weighted equally.\n - 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n - callable : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights.\n\n metric : {'nan_euclidean'} or callable, default='nan_euclidean'\n Distance metric for searching neighbors. Possible values:\n\n - 'nan_euclidean'\n - callable : a user-defined function which conforms to the definition\n of ``_pairwise_callable(X, Y, metric, **kwds)``. The function\n accepts two arrays, X and Y, and a `missing_values` keyword in\n `kwds` and returns a scalar distance value.\n\n copy : bool, default=True\n If True, a copy of X will be created. If False, imputation will\n be done in-place whenever possible.\n\n add_indicator : bool, default=False\n If True, a :class:`MissingIndicator` transform will stack onto the\n output of the imputer's transform. This allows a predictive estimator\n to account for missingness despite imputation. If a feature has no\n missing values at fit/train time, the feature won't appear on the\n missing indicator even if there are missing values at transform/test\n time.\n\n Attributes\n ----------\n indicator_ : :class:`~sklearn.impute.MissingIndicator`\n Indicator used to add binary indicators for missing values.\n ``None`` if add_indicator is False.\n\n References\n ----------\n * Olga Troyanskaya, Michael Cantor, Gavin Sherlock, Pat Brown, Trevor\n Hastie, Robert Tibshirani, David Botstein and Russ B. Altman, Missing\n value estimation methods for DNA microarrays, BIOINFORMATICS Vol. 17\n no. 6, 2001 Pages 520-525.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.impute import KNNImputer\n >>> X = [[1, 2, np.nan], [3, 4, 3], [np.nan, 6, 5], [8, 8, 7]]\n >>> imputer = KNNImputer(n_neighbors=2)\n >>> imputer.fit_transform(X)\n array([[1. , 2. , 4. ],\n [3. , 4. , 3. ],\n [5.5, 6. , 5. ],\n [8. , 8. , 7. ]])\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, missing_values=np.nan, n_neighbors=5,\n weights=\"uniform\", metric=\"nan_euclidean\", copy=True,\n add_indicator=False):\n super().__init__(\n missing_values=missing_values,\n add_indicator=add_indicator\n )\n self.n_neighbors = n_neighbors\n self.weights = weights\n self.metric = metric\n self.copy = copy\n\n def _calc_impute(self, dist_pot_donors, n_neighbors,\n fit_X_col, mask_fit_X_col):\n \"\"\"Helper function to impute a single column.\n\n Parameters\n ----------\n dist_pot_donors : ndarray of shape (n_receivers, n_potential_donors)\n Distance matrix between the receivers and potential donors from\n training set. There must be at least one non-nan distance between\n a receiver and a potential donor.\n\n n_neighbors : int\n Number of neighbors to consider.\n\n fit_X_col : ndarray of shape (n_potential_donors,)\n Column of potential donors from training set.\n\n mask_fit_X_col : ndarray of shape (n_potential_donors,)\n Missing mask for fit_X_col.\n\n Returns\n -------\n imputed_values: ndarray of shape (n_receivers,)\n Imputed values for receiver.\n \"\"\"\n # Get donors\n donors_idx = np.argpartition(dist_pot_donors, n_neighbors - 1,\n axis=1)[:, :n_neighbors]\n\n # Get weight matrix from from distance matrix\n donors_dist = dist_pot_donors[\n np.arange(donors_idx.shape[0])[:, None], donors_idx]\n\n weight_matrix = _get_weights(donors_dist, self.weights)\n\n # fill nans with zeros\n if weight_matrix is not None:\n weight_matrix[np.isnan(weight_matrix)] = 0.0\n\n # Retrieve donor values and calculate kNN average\n donors = fit_X_col.take(donors_idx)\n donors_mask = mask_fit_X_col.take(donors_idx)\n donors = np.ma.array(donors, mask=donors_mask)\n\n return np.ma.average(donors, axis=1, weights=weight_matrix).data\n\n def fit(self, X, y=None):\n \"\"\"Fit the imputer on X.\n\n Parameters\n ----------\n X : array-like shape of (n_samples, n_features)\n Input data, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\n Returns\n -------\n self : object\n \"\"\"\n # Check data integrity and calling arguments\n if not is_scalar_nan(self.missing_values):\n force_all_finite = True\n else:\n force_all_finite = \"allow-nan\"\n if self.metric not in _NAN_METRICS and not callable(self.metric):\n raise ValueError(\n \"The selected metric does not support NaN values\")\n if self.n_neighbors <= 0:\n raise ValueError(\n \"Expected n_neighbors > 0. Got {}\".format(self.n_neighbors))\n\n X = self._validate_data(X, accept_sparse=False, dtype=FLOAT_DTYPES,\n force_all_finite=force_all_finite,\n copy=self.copy)\n\n _check_weights(self.weights)\n self._fit_X = X\n self._mask_fit_X = _get_mask(self._fit_X, self.missing_values)\n\n super()._fit_indicator(self._mask_fit_X)\n\n return self\n\n def transform(self, X):\n \"\"\"Impute all missing values in X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input data to complete.\n\n Returns\n -------\n X : array-like of shape (n_samples, n_output_features)\n The imputed dataset. `n_output_features` is the number of features\n that is not always missing during `fit`.\n \"\"\"\n\n check_is_fitted(self)\n if not is_scalar_nan(self.missing_values):\n force_all_finite = True\n else:\n force_all_finite = \"allow-nan\"\n X = self._validate_data(X, accept_sparse=False, dtype=FLOAT_DTYPES,\n force_all_finite=force_all_finite,\n copy=self.copy, reset=False)\n\n mask = _get_mask(X, self.missing_values)\n mask_fit_X = self._mask_fit_X\n valid_mask = ~np.all(mask_fit_X, axis=0)\n\n X_indicator = super()._transform_indicator(mask)\n\n # Removes columns where the training data is all nan\n if not np.any(mask):\n # No missing values in X\n # Remove columns where the training data is all nan\n return X[:, valid_mask]\n\n row_missing_idx = np.flatnonzero(mask.any(axis=1))\n\n non_missing_fix_X = np.logical_not(mask_fit_X)\n\n # Maps from indices from X to indices in dist matrix\n dist_idx_map = np.zeros(X.shape[0], dtype=int)\n dist_idx_map[row_missing_idx] = np.arange(row_missing_idx.shape[0])\n\n def process_chunk(dist_chunk, start):\n row_missing_chunk = row_missing_idx[start:start + len(dist_chunk)]\n\n # Find and impute missing by column\n for col in range(X.shape[1]):\n if not valid_mask[col]:\n # column was all missing during training\n continue\n\n col_mask = mask[row_missing_chunk, col]\n if not np.any(col_mask):\n # column has no missing values\n continue\n\n potential_donors_idx, = np.nonzero(non_missing_fix_X[:, col])\n\n # receivers_idx are indices in X\n receivers_idx = row_missing_chunk[np.flatnonzero(col_mask)]\n\n # distances for samples that needed imputation for column\n dist_subset = (dist_chunk[dist_idx_map[receivers_idx] - start]\n [:, potential_donors_idx])\n\n # receivers with all nan distances impute with mean\n all_nan_dist_mask = np.isnan(dist_subset).all(axis=1)\n all_nan_receivers_idx = receivers_idx[all_nan_dist_mask]\n\n if all_nan_receivers_idx.size:\n col_mean = np.ma.array(self._fit_X[:, col],\n mask=mask_fit_X[:, col]).mean()\n X[all_nan_receivers_idx, col] = col_mean\n\n if len(all_nan_receivers_idx) == len(receivers_idx):\n # all receivers imputed with mean\n continue\n\n # receivers with at least one defined distance\n receivers_idx = receivers_idx[~all_nan_dist_mask]\n dist_subset = (dist_chunk[dist_idx_map[receivers_idx]\n - start]\n [:, potential_donors_idx])\n\n n_neighbors = min(self.n_neighbors, len(potential_donors_idx))\n value = self._calc_impute(\n dist_subset,\n n_neighbors,\n self._fit_X[potential_donors_idx, col],\n mask_fit_X[potential_donors_idx, col])\n X[receivers_idx, col] = value\n\n # process in fixed-memory chunks\n gen = pairwise_distances_chunked(\n X[row_missing_idx, :],\n self._fit_X,\n metric=self.metric,\n missing_values=self.missing_values,\n force_all_finite=force_all_finite,\n reduce_func=process_chunk)\n for chunk in gen:\n # process_chunk modifies X in place. No return value.\n pass\n\n return super()._concatenate_indicator(X[:, valid_mask], X_indicator)", + "instance_attributes": [ + { + "name": "n_neighbors", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "weights", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "metric", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "copy", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay", + "name": "PartialDependenceDisplay", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/__init__", + "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_get_sample_count", + "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_ice_lines", + "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_average_dependence", + "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_one_way_partial_dependence", + "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_two_way_partial_dependence", + "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/plot" + ], + "is_public": false, + "reexported_by": [], + "description": "Partial Dependence Plot (PDP).\n\nThis can also display individual partial dependencies which are often\nreferred to as: Individual Condition Expectation (ICE).\n\nIt is recommended to use\n:func:`~sklearn.inspection.plot_partial_dependence` to create a\n:class:`~sklearn.inspection.PartialDependenceDisplay`. All parameters are\nstored as attributes.\n\nRead more in\n:ref:`sphx_glr_auto_examples_miscellaneous_plot_partial_dependence_visualization_api.py`\nand the :ref:`User Guide `.\n\n .. versionadded:: 0.22", + "docstring": "Partial Dependence Plot (PDP).\n\nThis can also display individual partial dependencies which are often\nreferred to as: Individual Condition Expectation (ICE).\n\nIt is recommended to use\n:func:`~sklearn.inspection.plot_partial_dependence` to create a\n:class:`~sklearn.inspection.PartialDependenceDisplay`. All parameters are\nstored as attributes.\n\nRead more in\n:ref:`sphx_glr_auto_examples_miscellaneous_plot_partial_dependence_visualization_api.py`\nand the :ref:`User Guide `.\n\n .. versionadded:: 0.22\n\nParameters\n----------\npd_results : list of Bunch\n Results of :func:`~sklearn.inspection.partial_dependence` for\n ``features``.\n\nfeatures : list of (int,) or list of (int, int)\n Indices of features for a given plot. A tuple of one integer will plot\n a partial dependence curve of one feature. A tuple of two integers will\n plot a two-way partial dependence curve as a contour plot.\n\nfeature_names : list of str\n Feature names corresponding to the indices in ``features``.\n\ntarget_idx : int\n\n - In a multiclass setting, specifies the class for which the PDPs\n should be computed. Note that for binary classification, the\n positive class (index 1) is always used.\n - In a multioutput setting, specifies the task for which the PDPs\n should be computed.\n\n Ignored in binary classification or classical regression settings.\n\npdp_lim : dict\n Global min and max average predictions, such that all plots will have\n the same scale and y limits. `pdp_lim[1]` is the global min and max for\n single partial dependence curves. `pdp_lim[2]` is the global min and\n max for two-way partial dependence curves.\n\ndeciles : dict\n Deciles for feature indices in ``features``.\n\nkind : {'average', 'individual', 'both'}, default='average'\n Whether to plot the partial dependence averaged across all the samples\n in the dataset or one line per sample or both.\n\n - ``kind='average'`` results in the traditional PD plot;\n - ``kind='individual'`` results in the ICE plot.\n\n Note that the fast ``method='recursion'`` option is only available for\n ``kind='average'``. Plotting individual dependencies requires using the\n slower ``method='brute'`` option.\n\n .. versionadded:: 0.24\n\nsubsample : float, int or None, default=1000\n Sampling for ICE curves when `kind` is 'individual' or 'both'.\n If float, should be between 0.0 and 1.0 and represent the proportion\n of the dataset to be used to plot ICE curves. If int, represents the\n maximum absolute number of samples to use.\n\n Note that the full dataset is still used to calculate partial\n dependence when `kind='both'`.\n\n .. versionadded:: 0.24\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of the selected samples when subsamples is not\n `None`. See :term:`Glossary ` for details.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nbounding_ax_ : matplotlib Axes or None\n If `ax` is an axes or None, the `bounding_ax_` is the axes where the\n grid of partial dependence plots are drawn. If `ax` is a list of axes\n or a numpy array of axes, `bounding_ax_` is None.\n\naxes_ : ndarray of matplotlib Axes\n If `ax` is an axes or None, `axes_[i, j]` is the axes on the i-th row\n and j-th column. If `ax` is a list of axes, `axes_[i]` is the i-th item\n in `ax`. Elements that are None correspond to a nonexisting axes in\n that position.\n\nlines_ : ndarray of matplotlib Artists\n If `ax` is an axes or None, `lines_[i, j]` is the partial dependence\n curve on the i-th row and j-th column. If `ax` is a list of axes,\n `lines_[i]` is the partial dependence curve corresponding to the i-th\n item in `ax`. Elements that are None correspond to a nonexisting axes\n or an axes that does not include a line plot.\n\ndeciles_vlines_ : ndarray of matplotlib LineCollection\n If `ax` is an axes or None, `vlines_[i, j]` is the line collection\n representing the x axis deciles of the i-th row and j-th column. If\n `ax` is a list of axes, `vlines_[i]` corresponds to the i-th item in\n `ax`. Elements that are None correspond to a nonexisting axes or an\n axes that does not include a PDP plot.\n\n .. versionadded:: 0.23\n\ndeciles_hlines_ : ndarray of matplotlib LineCollection\n If `ax` is an axes or None, `vlines_[i, j]` is the line collection\n representing the y axis deciles of the i-th row and j-th column. If\n `ax` is a list of axes, `vlines_[i]` corresponds to the i-th item in\n `ax`. Elements that are None correspond to a nonexisting axes or an\n axes that does not include a 2-way plot.\n\n .. versionadded:: 0.23\n\ncontours_ : ndarray of matplotlib Artists\n If `ax` is an axes or None, `contours_[i, j]` is the partial dependence\n plot on the i-th row and j-th column. If `ax` is a list of axes,\n `contours_[i]` is the partial dependence plot corresponding to the i-th\n item in `ax`. Elements that are None correspond to a nonexisting axes\n or an axes that does not include a contour plot.\n\nfigure_ : matplotlib Figure\n Figure containing partial dependence plots.\n\nSee Also\n--------\npartial_dependence : Compute Partial Dependence values.\nplot_partial_dependence : Plot Partial Dependence.", + "code": "class PartialDependenceDisplay:\n \"\"\"Partial Dependence Plot (PDP).\n\n This can also display individual partial dependencies which are often\n referred to as: Individual Condition Expectation (ICE).\n\n It is recommended to use\n :func:`~sklearn.inspection.plot_partial_dependence` to create a\n :class:`~sklearn.inspection.PartialDependenceDisplay`. All parameters are\n stored as attributes.\n\n Read more in\n :ref:`sphx_glr_auto_examples_miscellaneous_plot_partial_dependence_visualization_api.py`\n and the :ref:`User Guide `.\n\n .. versionadded:: 0.22\n\n Parameters\n ----------\n pd_results : list of Bunch\n Results of :func:`~sklearn.inspection.partial_dependence` for\n ``features``.\n\n features : list of (int,) or list of (int, int)\n Indices of features for a given plot. A tuple of one integer will plot\n a partial dependence curve of one feature. A tuple of two integers will\n plot a two-way partial dependence curve as a contour plot.\n\n feature_names : list of str\n Feature names corresponding to the indices in ``features``.\n\n target_idx : int\n\n - In a multiclass setting, specifies the class for which the PDPs\n should be computed. Note that for binary classification, the\n positive class (index 1) is always used.\n - In a multioutput setting, specifies the task for which the PDPs\n should be computed.\n\n Ignored in binary classification or classical regression settings.\n\n pdp_lim : dict\n Global min and max average predictions, such that all plots will have\n the same scale and y limits. `pdp_lim[1]` is the global min and max for\n single partial dependence curves. `pdp_lim[2]` is the global min and\n max for two-way partial dependence curves.\n\n deciles : dict\n Deciles for feature indices in ``features``.\n\n kind : {'average', 'individual', 'both'}, default='average'\n Whether to plot the partial dependence averaged across all the samples\n in the dataset or one line per sample or both.\n\n - ``kind='average'`` results in the traditional PD plot;\n - ``kind='individual'`` results in the ICE plot.\n\n Note that the fast ``method='recursion'`` option is only available for\n ``kind='average'``. Plotting individual dependencies requires using the\n slower ``method='brute'`` option.\n\n .. versionadded:: 0.24\n\n subsample : float, int or None, default=1000\n Sampling for ICE curves when `kind` is 'individual' or 'both'.\n If float, should be between 0.0 and 1.0 and represent the proportion\n of the dataset to be used to plot ICE curves. If int, represents the\n maximum absolute number of samples to use.\n\n Note that the full dataset is still used to calculate partial\n dependence when `kind='both'`.\n\n .. versionadded:: 0.24\n\n random_state : int, RandomState instance or None, default=None\n Controls the randomness of the selected samples when subsamples is not\n `None`. See :term:`Glossary ` for details.\n\n .. versionadded:: 0.24\n\n Attributes\n ----------\n bounding_ax_ : matplotlib Axes or None\n If `ax` is an axes or None, the `bounding_ax_` is the axes where the\n grid of partial dependence plots are drawn. If `ax` is a list of axes\n or a numpy array of axes, `bounding_ax_` is None.\n\n axes_ : ndarray of matplotlib Axes\n If `ax` is an axes or None, `axes_[i, j]` is the axes on the i-th row\n and j-th column. If `ax` is a list of axes, `axes_[i]` is the i-th item\n in `ax`. Elements that are None correspond to a nonexisting axes in\n that position.\n\n lines_ : ndarray of matplotlib Artists\n If `ax` is an axes or None, `lines_[i, j]` is the partial dependence\n curve on the i-th row and j-th column. If `ax` is a list of axes,\n `lines_[i]` is the partial dependence curve corresponding to the i-th\n item in `ax`. Elements that are None correspond to a nonexisting axes\n or an axes that does not include a line plot.\n\n deciles_vlines_ : ndarray of matplotlib LineCollection\n If `ax` is an axes or None, `vlines_[i, j]` is the line collection\n representing the x axis deciles of the i-th row and j-th column. If\n `ax` is a list of axes, `vlines_[i]` corresponds to the i-th item in\n `ax`. Elements that are None correspond to a nonexisting axes or an\n axes that does not include a PDP plot.\n\n .. versionadded:: 0.23\n\n deciles_hlines_ : ndarray of matplotlib LineCollection\n If `ax` is an axes or None, `vlines_[i, j]` is the line collection\n representing the y axis deciles of the i-th row and j-th column. If\n `ax` is a list of axes, `vlines_[i]` corresponds to the i-th item in\n `ax`. Elements that are None correspond to a nonexisting axes or an\n axes that does not include a 2-way plot.\n\n .. versionadded:: 0.23\n\n contours_ : ndarray of matplotlib Artists\n If `ax` is an axes or None, `contours_[i, j]` is the partial dependence\n plot on the i-th row and j-th column. If `ax` is a list of axes,\n `contours_[i]` is the partial dependence plot corresponding to the i-th\n item in `ax`. Elements that are None correspond to a nonexisting axes\n or an axes that does not include a contour plot.\n\n figure_ : matplotlib Figure\n Figure containing partial dependence plots.\n\n See Also\n --------\n partial_dependence : Compute Partial Dependence values.\n plot_partial_dependence : Plot Partial Dependence.\n \"\"\"\n @_deprecate_positional_args\n def __init__(\n self,\n pd_results,\n *,\n features,\n feature_names,\n target_idx,\n pdp_lim,\n deciles,\n kind=\"average\",\n subsample=1000,\n random_state=None,\n ):\n self.pd_results = pd_results\n self.features = features\n self.feature_names = feature_names\n self.target_idx = target_idx\n self.pdp_lim = pdp_lim\n self.deciles = deciles\n self.kind = kind\n self.subsample = subsample\n self.random_state = random_state\n\n def _get_sample_count(self, n_samples):\n \"\"\"Compute the number of samples as an integer.\"\"\"\n if isinstance(self.subsample, numbers.Integral):\n if self.subsample < n_samples:\n return self.subsample\n return n_samples\n elif isinstance(self.subsample, numbers.Real):\n return ceil(n_samples * self.subsample)\n return n_samples\n\n def _plot_ice_lines(\n self, preds, feature_values, n_ice_to_plot,\n ax, pd_plot_idx, n_total_lines_by_plot, individual_line_kw\n ):\n \"\"\"Plot the ICE lines.\n\n Parameters\n ----------\n preds : ndarray of shape \\\n (n_instances, n_grid_points)\n The predictions computed for all points of `feature_values` for a\n given feature for all samples in `X`.\n feature_values : ndarray of shape (n_grid_points,)\n The feature values for which the predictions have been computed.\n n_ice_to_plot : int\n The number of ICE lines to plot.\n ax : Matplotlib axes\n The axis on which to plot the ICE lines.\n pd_plot_idx : int\n The sequential index of the plot. It will be unraveled to find the\n matching 2D position in the grid layout.\n n_total_lines_by_plot : int\n The total number of lines expected to be plot on the axis.\n individual_line_kw : dict\n Dict with keywords passed when plotting the ICE lines.\n \"\"\"\n rng = check_random_state(self.random_state)\n # subsample ice\n ice_lines_idx = rng.choice(\n preds.shape[0], n_ice_to_plot, replace=False,\n )\n ice_lines_subsampled = preds[ice_lines_idx, :]\n # plot the subsampled ice\n for ice_idx, ice in enumerate(ice_lines_subsampled):\n line_idx = np.unravel_index(\n pd_plot_idx * n_total_lines_by_plot + ice_idx,\n self.lines_.shape\n )\n self.lines_[line_idx] = ax.plot(\n feature_values, ice.ravel(), **individual_line_kw\n )[0]\n\n def _plot_average_dependence(\n self,\n avg_preds,\n feature_values,\n ax,\n pd_line_idx,\n line_kw,\n ):\n \"\"\"Plot the average partial dependence.\n\n Parameters\n ----------\n avg_preds : ndarray of shape (n_grid_points,)\n The average predictions for all points of `feature_values` for a\n given feature for all samples in `X`.\n feature_values : ndarray of shape (n_grid_points,)\n The feature values for which the predictions have been computed.\n ax : Matplotlib axes\n The axis on which to plot the ICE lines.\n pd_line_idx : int\n The sequential index of the plot. It will be unraveled to find the\n matching 2D position in the grid layout.\n line_kw : dict\n Dict with keywords passed when plotting the PD plot.\n \"\"\"\n line_idx = np.unravel_index(pd_line_idx, self.lines_.shape)\n self.lines_[line_idx] = ax.plot(\n feature_values,\n avg_preds,\n **line_kw,\n )[0]\n\n def _plot_one_way_partial_dependence(\n self,\n preds,\n avg_preds,\n feature_values,\n feature_idx,\n n_ice_lines,\n ax,\n n_cols,\n pd_plot_idx,\n n_lines,\n individual_line_kw,\n line_kw,\n ):\n \"\"\"Plot 1-way partial dependence: ICE and PDP.\n\n Parameters\n ----------\n preds : ndarray of shape \\\n (n_instances, n_grid_points) or None\n The predictions computed for all points of `feature_values` for a\n given feature for all samples in `X`.\n avg_preds : ndarray of shape (n_grid_points,)\n The average predictions for all points of `feature_values` for a\n given feature for all samples in `X`.\n feature_values : ndarray of shape (n_grid_points,)\n The feature values for which the predictions have been computed.\n feature_idx : int\n The index corresponding to the target feature.\n n_ice_lines : int\n The number of ICE lines to plot.\n ax : Matplotlib axes\n The axis on which to plot the ICE and PDP lines.\n n_cols : int or None\n The number of column in the axis.\n pd_plot_idx : int\n The sequential index of the plot. It will be unraveled to find the\n matching 2D position in the grid layout.\n n_lines : int\n The total number of lines expected to be plot on the axis.\n individual_line_kw : dict\n Dict with keywords passed when plotting the ICE lines.\n line_kw : dict\n Dict with keywords passed when plotting the PD plot.\n \"\"\"\n from matplotlib import transforms # noqa\n\n if self.kind in (\"individual\", \"both\"):\n self._plot_ice_lines(\n preds[self.target_idx],\n feature_values,\n n_ice_lines,\n ax,\n pd_plot_idx,\n n_lines,\n individual_line_kw,\n )\n\n if self.kind in (\"average\", \"both\"):\n # the average is stored as the last line\n if self.kind == \"average\":\n pd_line_idx = pd_plot_idx\n else:\n pd_line_idx = pd_plot_idx * n_lines + n_ice_lines\n self._plot_average_dependence(\n avg_preds[self.target_idx].ravel(),\n feature_values,\n ax,\n pd_line_idx,\n line_kw,\n )\n\n trans = transforms.blended_transform_factory(\n ax.transData, ax.transAxes\n )\n # create the decile line for the vertical axis\n vlines_idx = np.unravel_index(pd_plot_idx, self.deciles_vlines_.shape)\n self.deciles_vlines_[vlines_idx] = ax.vlines(\n self.deciles[feature_idx[0]],\n 0,\n 0.05,\n transform=trans,\n color=\"k\",\n )\n # reset ylim which was overwritten by vlines\n ax.set_ylim(self.pdp_lim[1])\n\n # Set xlabel if it is not already set\n if not ax.get_xlabel():\n ax.set_xlabel(self.feature_names[feature_idx[0]])\n\n if n_cols is None or pd_plot_idx % n_cols == 0:\n if not ax.get_ylabel():\n ax.set_ylabel('Partial dependence')\n else:\n ax.set_yticklabels([])\n\n if line_kw.get(\"label\", None) and self.kind != 'individual':\n ax.legend()\n\n def _plot_two_way_partial_dependence(\n self,\n avg_preds,\n feature_values,\n feature_idx,\n ax,\n pd_plot_idx,\n Z_level,\n contour_kw,\n ):\n \"\"\"Plot 2-way partial dependence.\n\n Parameters\n ----------\n avg_preds : ndarray of shape \\\n (n_instances, n_grid_points, n_grid_points)\n The average predictions for all points of `feature_values[0]` and\n `feature_values[1]` for some given features for all samples in `X`.\n feature_values : seq of 1d array\n A sequence of array of the feature values for which the predictions\n have been computed.\n feature_idx : tuple of int\n The indices of the target features\n ax : Matplotlib axes\n The axis on which to plot the ICE and PDP lines.\n pd_plot_idx : int\n The sequential index of the plot. It will be unraveled to find the\n matching 2D position in the grid layout.\n Z_level : ndarray of shape (8, 8)\n The Z-level used to encode the average predictions.\n contour_kw : dict\n Dict with keywords passed when plotting the contours.\n \"\"\"\n from matplotlib import transforms # noqa\n\n XX, YY = np.meshgrid(feature_values[0], feature_values[1])\n Z = avg_preds[self.target_idx].T\n CS = ax.contour(XX, YY, Z, levels=Z_level, linewidths=0.5, colors=\"k\")\n contour_idx = np.unravel_index(pd_plot_idx, self.contours_.shape)\n self.contours_[contour_idx] = ax.contourf(\n XX,\n YY,\n Z,\n levels=Z_level,\n vmax=Z_level[-1],\n vmin=Z_level[0],\n **contour_kw,\n )\n ax.clabel(CS, fmt=\"%2.2f\", colors=\"k\", fontsize=10, inline=True)\n\n trans = transforms.blended_transform_factory(\n ax.transData, ax.transAxes\n )\n # create the decile line for the vertical axis\n xlim, ylim = ax.get_xlim(), ax.get_ylim()\n vlines_idx = np.unravel_index(pd_plot_idx, self.deciles_vlines_.shape)\n self.deciles_vlines_[vlines_idx] = ax.vlines(\n self.deciles[feature_idx[0]], 0, 0.05, transform=trans, color=\"k\",\n )\n # create the decile line for the horizontal axis\n hlines_idx = np.unravel_index(pd_plot_idx, self.deciles_hlines_.shape)\n self.deciles_hlines_[hlines_idx] = ax.hlines(\n self.deciles[feature_idx[1]], 0, 0.05, transform=trans, color=\"k\",\n )\n # reset xlim and ylim since they are overwritten by hlines and vlines\n ax.set_xlim(xlim)\n ax.set_ylim(ylim)\n\n # set xlabel if it is not already set\n if not ax.get_xlabel():\n ax.set_xlabel(self.feature_names[feature_idx[0]])\n ax.set_ylabel(self.feature_names[feature_idx[1]])\n\n @_deprecate_positional_args(version=\"1.1\")\n def plot(self, *, ax=None, n_cols=3, line_kw=None, contour_kw=None):\n \"\"\"Plot partial dependence plots.\n\n Parameters\n ----------\n ax : Matplotlib axes or array-like of Matplotlib axes, default=None\n - If a single axis is passed in, it is treated as a bounding axes\n and a grid of partial dependence plots will be drawn within\n these bounds. The `n_cols` parameter controls the number of\n columns in the grid.\n - If an array-like of axes are passed in, the partial dependence\n plots will be drawn directly into these axes.\n - If `None`, a figure and a bounding axes is created and treated\n as the single axes case.\n\n n_cols : int, default=3\n The maximum number of columns in the grid plot. Only active when\n `ax` is a single axes or `None`.\n\n line_kw : dict, default=None\n Dict with keywords passed to the `matplotlib.pyplot.plot` call.\n For one-way partial dependence plots.\n\n contour_kw : dict, default=None\n Dict with keywords passed to the `matplotlib.pyplot.contourf`\n call for two-way partial dependence plots.\n\n Returns\n -------\n display : :class:`~sklearn.inspection.PartialDependenceDisplay`\n \"\"\"\n\n check_matplotlib_support(\"plot_partial_dependence\")\n import matplotlib.pyplot as plt # noqa\n from matplotlib.gridspec import GridSpecFromSubplotSpec # noqa\n\n if line_kw is None:\n line_kw = {}\n if contour_kw is None:\n contour_kw = {}\n\n if ax is None:\n _, ax = plt.subplots()\n\n default_contour_kws = {\"alpha\": 0.75}\n contour_kw = {**default_contour_kws, **contour_kw}\n\n default_line_kws = {\n \"color\": \"C0\",\n \"label\": \"average\" if self.kind == \"both\" else None,\n }\n line_kw = {**default_line_kws, **line_kw}\n\n individual_line_kw = line_kw.copy()\n del individual_line_kw[\"label\"]\n\n if self.kind == 'individual' or self.kind == 'both':\n individual_line_kw['alpha'] = 0.3\n individual_line_kw['linewidth'] = 0.5\n\n n_features = len(self.features)\n if self.kind in (\"individual\", \"both\"):\n n_ice_lines = self._get_sample_count(\n len(self.pd_results[0].individual[0])\n )\n if self.kind == \"individual\":\n n_lines = n_ice_lines\n else:\n n_lines = n_ice_lines + 1\n else:\n n_ice_lines = 0\n n_lines = 1\n\n if isinstance(ax, plt.Axes):\n # If ax was set off, it has most likely been set to off\n # by a previous call to plot.\n if not ax.axison:\n raise ValueError(\"The ax was already used in another plot \"\n \"function, please set ax=display.axes_ \"\n \"instead\")\n\n ax.set_axis_off()\n self.bounding_ax_ = ax\n self.figure_ = ax.figure\n\n n_cols = min(n_cols, n_features)\n n_rows = int(np.ceil(n_features / float(n_cols)))\n\n self.axes_ = np.empty((n_rows, n_cols), dtype=object)\n if self.kind == 'average':\n self.lines_ = np.empty((n_rows, n_cols), dtype=object)\n else:\n self.lines_ = np.empty((n_rows, n_cols, n_lines), dtype=object)\n self.contours_ = np.empty((n_rows, n_cols), dtype=object)\n\n axes_ravel = self.axes_.ravel()\n\n gs = GridSpecFromSubplotSpec(n_rows, n_cols,\n subplot_spec=ax.get_subplotspec())\n for i, spec in zip(range(n_features), gs):\n axes_ravel[i] = self.figure_.add_subplot(spec)\n\n else: # array-like\n ax = np.asarray(ax, dtype=object)\n if ax.size != n_features:\n raise ValueError(\"Expected ax to have {} axes, got {}\"\n .format(n_features, ax.size))\n\n if ax.ndim == 2:\n n_cols = ax.shape[1]\n else:\n n_cols = None\n\n self.bounding_ax_ = None\n self.figure_ = ax.ravel()[0].figure\n self.axes_ = ax\n if self.kind == 'average':\n self.lines_ = np.empty_like(ax, dtype=object)\n else:\n self.lines_ = np.empty(ax.shape + (n_lines,), dtype=object)\n self.contours_ = np.empty_like(ax, dtype=object)\n\n # create contour levels for two-way plots\n if 2 in self.pdp_lim:\n Z_level = np.linspace(*self.pdp_lim[2], num=8)\n\n self.deciles_vlines_ = np.empty_like(self.axes_, dtype=object)\n self.deciles_hlines_ = np.empty_like(self.axes_, dtype=object)\n\n for pd_plot_idx, (axi, feature_idx, pd_result) in enumerate(\n zip(self.axes_.ravel(), self.features, self.pd_results)\n ):\n avg_preds = None\n preds = None\n feature_values = pd_result[\"values\"]\n if self.kind == 'individual':\n preds = pd_result.individual\n elif self.kind == 'average':\n avg_preds = pd_result.average\n else: # kind='both'\n avg_preds = pd_result.average\n preds = pd_result.individual\n\n if len(feature_values) == 1:\n self._plot_one_way_partial_dependence(\n preds,\n avg_preds,\n feature_values[0],\n feature_idx,\n n_ice_lines,\n axi,\n n_cols,\n pd_plot_idx,\n n_lines,\n individual_line_kw,\n line_kw,\n )\n else:\n self._plot_two_way_partial_dependence(\n avg_preds,\n feature_values,\n feature_idx,\n axi,\n pd_plot_idx,\n Z_level,\n contour_kw,\n )\n\n return self", + "instance_attributes": [ + { + "name": "kind", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "subsample", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "axes_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "lines_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "contours_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "deciles_vlines_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "deciles_hlines_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression", + "name": "IsotonicRegression", + "qname": "sklearn.isotonic.IsotonicRegression", + "decorators": [], + "superclasses": ["RegressorMixin", "TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.isotonic/IsotonicRegression/__init__", + "scikit-learn/sklearn.isotonic/IsotonicRegression/_check_input_data_shape", + "scikit-learn/sklearn.isotonic/IsotonicRegression/_build_f", + "scikit-learn/sklearn.isotonic/IsotonicRegression/_build_y", + "scikit-learn/sklearn.isotonic/IsotonicRegression/fit", + "scikit-learn/sklearn.isotonic/IsotonicRegression/transform", + "scikit-learn/sklearn.isotonic/IsotonicRegression/predict", + "scikit-learn/sklearn.isotonic/IsotonicRegression/__getstate__", + "scikit-learn/sklearn.isotonic/IsotonicRegression/__setstate__", + "scikit-learn/sklearn.isotonic/IsotonicRegression/_more_tags" + ], + "is_public": true, + "reexported_by": [], + "description": "Isotonic regression model.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13", + "docstring": "Isotonic regression model.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\ny_min : float, default=None\n Lower bound on the lowest predicted value (the minimum value may\n still be higher). If not set, defaults to -inf.\n\ny_max : float, default=None\n Upper bound on the highest predicted value (the maximum may still be\n lower). If not set, defaults to +inf.\n\nincreasing : bool or 'auto', default=True\n Determines whether the predictions should be constrained to increase\n or decrease with `X`. 'auto' will decide based on the Spearman\n correlation estimate's sign.\n\nout_of_bounds : {'nan', 'clip', 'raise'}, default='nan'\n Handles how `X` values outside of the training domain are handled\n during prediction.\n\n - 'nan', predictions will be NaN.\n - 'clip', predictions will be set to the value corresponding to\n the nearest train interval endpoint.\n - 'raise', a `ValueError` is raised.\n\nAttributes\n----------\nX_min_ : float\n Minimum value of input array `X_` for left bound.\n\nX_max_ : float\n Maximum value of input array `X_` for right bound.\n\nX_thresholds_ : ndarray of shape (n_thresholds,)\n Unique ascending `X` values used to interpolate\n the y = f(X) monotonic function.\n\n .. versionadded:: 0.24\n\ny_thresholds_ : ndarray of shape (n_thresholds,)\n De-duplicated `y` values suitable to interpolate the y = f(X)\n monotonic function.\n\n .. versionadded:: 0.24\n\nf_ : function\n The stepwise interpolating function that covers the input domain ``X``.\n\nincreasing_ : bool\n Inferred value for ``increasing``.\n\nNotes\n-----\nTies are broken using the secondary method from de Leeuw, 1977.\n\nReferences\n----------\nIsotonic Median Regression: A Linear Programming Approach\nNilotpal Chakravarti\nMathematics of Operations Research\nVol. 14, No. 2 (May, 1989), pp. 303-308\n\nIsotone Optimization in R : Pool-Adjacent-Violators\nAlgorithm (PAVA) and Active Set Methods\nde Leeuw, Hornik, Mair\nJournal of Statistical Software 2009\n\nCorrectness of Kruskal's algorithms for monotone regression with ties\nde Leeuw, Psychometrica, 1977\n\nExamples\n--------\n>>> from sklearn.datasets import make_regression\n>>> from sklearn.isotonic import IsotonicRegression\n>>> X, y = make_regression(n_samples=10, n_features=1, random_state=41)\n>>> iso_reg = IsotonicRegression().fit(X, y)\n>>> iso_reg.predict([.1, .2])\narray([1.8628..., 3.7256...])", + "code": "class IsotonicRegression(RegressorMixin, TransformerMixin, BaseEstimator):\n \"\"\"Isotonic regression model.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.13\n\n Parameters\n ----------\n y_min : float, default=None\n Lower bound on the lowest predicted value (the minimum value may\n still be higher). If not set, defaults to -inf.\n\n y_max : float, default=None\n Upper bound on the highest predicted value (the maximum may still be\n lower). If not set, defaults to +inf.\n\n increasing : bool or 'auto', default=True\n Determines whether the predictions should be constrained to increase\n or decrease with `X`. 'auto' will decide based on the Spearman\n correlation estimate's sign.\n\n out_of_bounds : {'nan', 'clip', 'raise'}, default='nan'\n Handles how `X` values outside of the training domain are handled\n during prediction.\n\n - 'nan', predictions will be NaN.\n - 'clip', predictions will be set to the value corresponding to\n the nearest train interval endpoint.\n - 'raise', a `ValueError` is raised.\n\n Attributes\n ----------\n X_min_ : float\n Minimum value of input array `X_` for left bound.\n\n X_max_ : float\n Maximum value of input array `X_` for right bound.\n\n X_thresholds_ : ndarray of shape (n_thresholds,)\n Unique ascending `X` values used to interpolate\n the y = f(X) monotonic function.\n\n .. versionadded:: 0.24\n\n y_thresholds_ : ndarray of shape (n_thresholds,)\n De-duplicated `y` values suitable to interpolate the y = f(X)\n monotonic function.\n\n .. versionadded:: 0.24\n\n f_ : function\n The stepwise interpolating function that covers the input domain ``X``.\n\n increasing_ : bool\n Inferred value for ``increasing``.\n\n Notes\n -----\n Ties are broken using the secondary method from de Leeuw, 1977.\n\n References\n ----------\n Isotonic Median Regression: A Linear Programming Approach\n Nilotpal Chakravarti\n Mathematics of Operations Research\n Vol. 14, No. 2 (May, 1989), pp. 303-308\n\n Isotone Optimization in R : Pool-Adjacent-Violators\n Algorithm (PAVA) and Active Set Methods\n de Leeuw, Hornik, Mair\n Journal of Statistical Software 2009\n\n Correctness of Kruskal's algorithms for monotone regression with ties\n de Leeuw, Psychometrica, 1977\n\n Examples\n --------\n >>> from sklearn.datasets import make_regression\n >>> from sklearn.isotonic import IsotonicRegression\n >>> X, y = make_regression(n_samples=10, n_features=1, random_state=41)\n >>> iso_reg = IsotonicRegression().fit(X, y)\n >>> iso_reg.predict([.1, .2])\n array([1.8628..., 3.7256...])\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, y_min=None, y_max=None, increasing=True,\n out_of_bounds='nan'):\n self.y_min = y_min\n self.y_max = y_max\n self.increasing = increasing\n self.out_of_bounds = out_of_bounds\n\n def _check_input_data_shape(self, X):\n if not (X.ndim == 1 or (X.ndim == 2 and X.shape[1] == 1)):\n msg = \"Isotonic regression input X should be a 1d array or \" \\\n \"2d array with 1 feature\"\n raise ValueError(msg)\n\n def _build_f(self, X, y):\n \"\"\"Build the f_ interp1d function.\"\"\"\n\n # Handle the out_of_bounds argument by setting bounds_error\n if self.out_of_bounds not in [\"raise\", \"nan\", \"clip\"]:\n raise ValueError(\"The argument ``out_of_bounds`` must be in \"\n \"'nan', 'clip', 'raise'; got {0}\"\n .format(self.out_of_bounds))\n\n bounds_error = self.out_of_bounds == \"raise\"\n if len(y) == 1:\n # single y, constant prediction\n self.f_ = lambda x: y.repeat(x.shape)\n else:\n self.f_ = interpolate.interp1d(X, y, kind='linear',\n bounds_error=bounds_error)\n\n def _build_y(self, X, y, sample_weight, trim_duplicates=True):\n \"\"\"Build the y_ IsotonicRegression.\"\"\"\n self._check_input_data_shape(X)\n X = X.reshape(-1) # use 1d view\n\n # Determine increasing if auto-determination requested\n if self.increasing == 'auto':\n self.increasing_ = check_increasing(X, y)\n else:\n self.increasing_ = self.increasing\n\n # If sample_weights is passed, removed zero-weight values and clean\n # order\n sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n mask = sample_weight > 0\n X, y, sample_weight = X[mask], y[mask], sample_weight[mask]\n\n order = np.lexsort((y, X))\n X, y, sample_weight = [array[order] for array in [X, y, sample_weight]]\n unique_X, unique_y, unique_sample_weight = _make_unique(\n X, y, sample_weight)\n\n X = unique_X\n y = isotonic_regression(unique_y, sample_weight=unique_sample_weight,\n y_min=self.y_min, y_max=self.y_max,\n increasing=self.increasing_)\n\n # Handle the left and right bounds on X\n self.X_min_, self.X_max_ = np.min(X), np.max(X)\n\n if trim_duplicates:\n # Remove unnecessary points for faster prediction\n keep_data = np.ones((len(y),), dtype=bool)\n # Aside from the 1st and last point, remove points whose y values\n # are equal to both the point before and the point after it.\n keep_data[1:-1] = np.logical_or(\n np.not_equal(y[1:-1], y[:-2]),\n np.not_equal(y[1:-1], y[2:])\n )\n return X[keep_data], y[keep_data]\n else:\n # The ability to turn off trim_duplicates is only used to it make\n # easier to unit test that removing duplicates in y does not have\n # any impact the resulting interpolation function (besides\n # prediction speed).\n return X, y\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the model using X, y as training data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples,) or (n_samples, 1)\n Training data.\n\n .. versionchanged:: 0.24\n Also accepts 2d array with 1 feature.\n\n y : array-like of shape (n_samples,)\n Training target.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Weights. If set to None, all weights will be set to 1 (equal\n weights).\n\n Returns\n -------\n self : object\n Returns an instance of self.\n\n Notes\n -----\n X is stored for future use, as :meth:`transform` needs X to interpolate\n new input data.\n \"\"\"\n check_params = dict(accept_sparse=False, ensure_2d=False)\n X = check_array(X, dtype=[np.float64, np.float32], **check_params)\n y = check_array(y, dtype=X.dtype, **check_params)\n check_consistent_length(X, y, sample_weight)\n\n # Transform y by running the isotonic regression algorithm and\n # transform X accordingly.\n X, y = self._build_y(X, y, sample_weight)\n\n # It is necessary to store the non-redundant part of the training set\n # on the model to make it possible to support model persistence via\n # the pickle module as the object built by scipy.interp1d is not\n # picklable directly.\n self.X_thresholds_, self.y_thresholds_ = X, y\n\n # Build the interpolation function\n self._build_f(X, y)\n return self\n\n def transform(self, T):\n \"\"\"Transform new data by linear interpolation\n\n Parameters\n ----------\n T : array-like of shape (n_samples,) or (n_samples, 1)\n Data to transform.\n\n .. versionchanged:: 0.24\n Also accepts 2d array with 1 feature.\n\n Returns\n -------\n y_pred : ndarray of shape (n_samples,)\n The transformed data\n \"\"\"\n\n if hasattr(self, 'X_thresholds_'):\n dtype = self.X_thresholds_.dtype\n else:\n dtype = np.float64\n\n T = check_array(T, dtype=dtype, ensure_2d=False)\n\n self._check_input_data_shape(T)\n T = T.reshape(-1) # use 1d view\n\n # Handle the out_of_bounds argument by clipping if needed\n if self.out_of_bounds not in [\"raise\", \"nan\", \"clip\"]:\n raise ValueError(\"The argument ``out_of_bounds`` must be in \"\n \"'nan', 'clip', 'raise'; got {0}\"\n .format(self.out_of_bounds))\n\n if self.out_of_bounds == \"clip\":\n T = np.clip(T, self.X_min_, self.X_max_)\n\n res = self.f_(T)\n\n # on scipy 0.17, interp1d up-casts to float64, so we cast back\n res = res.astype(T.dtype)\n\n return res\n\n def predict(self, T):\n \"\"\"Predict new data by linear interpolation.\n\n Parameters\n ----------\n T : array-like of shape (n_samples,) or (n_samples, 1)\n Data to transform.\n\n Returns\n -------\n y_pred : ndarray of shape (n_samples,)\n Transformed data.\n \"\"\"\n return self.transform(T)\n\n def __getstate__(self):\n \"\"\"Pickle-protocol - return state of the estimator. \"\"\"\n state = super().__getstate__()\n # remove interpolation method\n state.pop('f_', None)\n return state\n\n def __setstate__(self, state):\n \"\"\"Pickle-protocol - set state of the estimator.\n\n We need to rebuild the interpolation function.\n \"\"\"\n super().__setstate__(state)\n if hasattr(self, 'X_thresholds_') and hasattr(self, 'y_thresholds_'):\n self._build_f(self.X_thresholds_, self.y_thresholds_)\n\n def _more_tags(self):\n return {'X_types': ['1darray']}", + "instance_attributes": [ + { + "name": "increasing", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "out_of_bounds", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "f_", + "types": { + "kind": "NamedType", + "name": "Callable" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler", + "name": "AdditiveChi2Sampler", + "qname": "sklearn.kernel_approximation.AdditiveChi2Sampler", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/__init__", + "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/fit", + "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/transform", + "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/_transform_dense", + "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/_transform_sparse", + "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/_more_tags" + ], + "is_public": true, + "reexported_by": [], + "description": "Approximate feature map for additive chi2 kernel.\n\nUses sampling the fourier transform of the kernel characteristic\nat regular intervals.\n\nSince the kernel that is to be approximated is additive, the components of\nthe input vectors can be treated separately. Each entry in the original\nspace is transformed into 2*sample_steps+1 features, where sample_steps is\na parameter of the method. Typical values of sample_steps include 1, 2 and\n3.\n\nOptimal choices for the sampling interval for certain data ranges can be\ncomputed (see the reference). The default values should be reasonable.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Approximate feature map for additive chi2 kernel.\n\nUses sampling the fourier transform of the kernel characteristic\nat regular intervals.\n\nSince the kernel that is to be approximated is additive, the components of\nthe input vectors can be treated separately. Each entry in the original\nspace is transformed into 2*sample_steps+1 features, where sample_steps is\na parameter of the method. Typical values of sample_steps include 1, 2 and\n3.\n\nOptimal choices for the sampling interval for certain data ranges can be\ncomputed (see the reference). The default values should be reasonable.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nsample_steps : int, default=2\n Gives the number of (complex) sampling points.\nsample_interval : float, default=None\n Sampling interval. Must be specified when sample_steps not in {1,2,3}.\n\nAttributes\n----------\nsample_interval_ : float\n Stored sampling interval. Specified as a parameter if sample_steps not\n in {1,2,3}.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.linear_model import SGDClassifier\n>>> from sklearn.kernel_approximation import AdditiveChi2Sampler\n>>> X, y = load_digits(return_X_y=True)\n>>> chi2sampler = AdditiveChi2Sampler(sample_steps=2)\n>>> X_transformed = chi2sampler.fit_transform(X, y)\n>>> clf = SGDClassifier(max_iter=5, random_state=0, tol=1e-3)\n>>> clf.fit(X_transformed, y)\nSGDClassifier(max_iter=5, random_state=0)\n>>> clf.score(X_transformed, y)\n0.9499...\n\nNotes\n-----\nThis estimator approximates a slightly different version of the additive\nchi squared kernel then ``metric.additive_chi2`` computes.\n\nSee Also\n--------\nSkewedChi2Sampler : A Fourier-approximation to a non-additive variant of\n the chi squared kernel.\n\nsklearn.metrics.pairwise.chi2_kernel : The exact chi squared kernel.\n\nsklearn.metrics.pairwise.additive_chi2_kernel : The exact additive chi\n squared kernel.\n\nReferences\n----------\nSee `\"Efficient additive kernels via explicit feature maps\"\n`_\nA. Vedaldi and A. Zisserman, Pattern Analysis and Machine Intelligence,\n2011", + "code": "class AdditiveChi2Sampler(TransformerMixin, BaseEstimator):\n \"\"\"Approximate feature map for additive chi2 kernel.\n\n Uses sampling the fourier transform of the kernel characteristic\n at regular intervals.\n\n Since the kernel that is to be approximated is additive, the components of\n the input vectors can be treated separately. Each entry in the original\n space is transformed into 2*sample_steps+1 features, where sample_steps is\n a parameter of the method. Typical values of sample_steps include 1, 2 and\n 3.\n\n Optimal choices for the sampling interval for certain data ranges can be\n computed (see the reference). The default values should be reasonable.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n sample_steps : int, default=2\n Gives the number of (complex) sampling points.\n sample_interval : float, default=None\n Sampling interval. Must be specified when sample_steps not in {1,2,3}.\n\n Attributes\n ----------\n sample_interval_ : float\n Stored sampling interval. Specified as a parameter if sample_steps not\n in {1,2,3}.\n\n Examples\n --------\n >>> from sklearn.datasets import load_digits\n >>> from sklearn.linear_model import SGDClassifier\n >>> from sklearn.kernel_approximation import AdditiveChi2Sampler\n >>> X, y = load_digits(return_X_y=True)\n >>> chi2sampler = AdditiveChi2Sampler(sample_steps=2)\n >>> X_transformed = chi2sampler.fit_transform(X, y)\n >>> clf = SGDClassifier(max_iter=5, random_state=0, tol=1e-3)\n >>> clf.fit(X_transformed, y)\n SGDClassifier(max_iter=5, random_state=0)\n >>> clf.score(X_transformed, y)\n 0.9499...\n\n Notes\n -----\n This estimator approximates a slightly different version of the additive\n chi squared kernel then ``metric.additive_chi2`` computes.\n\n See Also\n --------\n SkewedChi2Sampler : A Fourier-approximation to a non-additive variant of\n the chi squared kernel.\n\n sklearn.metrics.pairwise.chi2_kernel : The exact chi squared kernel.\n\n sklearn.metrics.pairwise.additive_chi2_kernel : The exact additive chi\n squared kernel.\n\n References\n ----------\n See `\"Efficient additive kernels via explicit feature maps\"\n `_\n A. Vedaldi and A. Zisserman, Pattern Analysis and Machine Intelligence,\n 2011\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, sample_steps=2, sample_interval=None):\n self.sample_steps = sample_steps\n self.sample_interval = sample_interval\n\n def fit(self, X, y=None):\n \"\"\"Set the parameters\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n Training data, where n_samples in the number of samples\n and n_features is the number of features.\n\n Returns\n -------\n self : object\n Returns the transformer.\n \"\"\"\n X = self._validate_data(X, accept_sparse='csr')\n check_non_negative(X, 'X in AdditiveChi2Sampler.fit')\n\n if self.sample_interval is None:\n # See reference, figure 2 c)\n if self.sample_steps == 1:\n self.sample_interval_ = 0.8\n elif self.sample_steps == 2:\n self.sample_interval_ = 0.5\n elif self.sample_steps == 3:\n self.sample_interval_ = 0.4\n else:\n raise ValueError(\"If sample_steps is not in [1, 2, 3],\"\n \" you need to provide sample_interval\")\n else:\n self.sample_interval_ = self.sample_interval\n return self\n\n def transform(self, X):\n \"\"\"Apply approximate feature map to X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n\n Returns\n -------\n X_new : {ndarray, sparse matrix}, \\\n shape = (n_samples, n_features * (2*sample_steps + 1))\n Whether the return value is an array of sparse matrix depends on\n the type of the input X.\n \"\"\"\n msg = (\"%(name)s is not fitted. Call fit to set the parameters before\"\n \" calling transform\")\n check_is_fitted(self, msg=msg)\n\n X = self._validate_data(X, accept_sparse='csr', reset=False)\n check_non_negative(X, 'X in AdditiveChi2Sampler.transform')\n sparse = sp.issparse(X)\n\n # zeroth component\n # 1/cosh = sech\n # cosh(0) = 1.0\n\n transf = self._transform_sparse if sparse else self._transform_dense\n return transf(X)\n\n def _transform_dense(self, X):\n non_zero = (X != 0.0)\n X_nz = X[non_zero]\n\n X_step = np.zeros_like(X)\n X_step[non_zero] = np.sqrt(X_nz * self.sample_interval_)\n\n X_new = [X_step]\n\n log_step_nz = self.sample_interval_ * np.log(X_nz)\n step_nz = 2 * X_nz * self.sample_interval_\n\n for j in range(1, self.sample_steps):\n factor_nz = np.sqrt(step_nz /\n np.cosh(np.pi * j * self.sample_interval_))\n\n X_step = np.zeros_like(X)\n X_step[non_zero] = factor_nz * np.cos(j * log_step_nz)\n X_new.append(X_step)\n\n X_step = np.zeros_like(X)\n X_step[non_zero] = factor_nz * np.sin(j * log_step_nz)\n X_new.append(X_step)\n\n return np.hstack(X_new)\n\n def _transform_sparse(self, X):\n indices = X.indices.copy()\n indptr = X.indptr.copy()\n\n data_step = np.sqrt(X.data * self.sample_interval_)\n X_step = sp.csr_matrix((data_step, indices, indptr),\n shape=X.shape, dtype=X.dtype, copy=False)\n X_new = [X_step]\n\n log_step_nz = self.sample_interval_ * np.log(X.data)\n step_nz = 2 * X.data * self.sample_interval_\n\n for j in range(1, self.sample_steps):\n factor_nz = np.sqrt(step_nz /\n np.cosh(np.pi * j * self.sample_interval_))\n\n data_step = factor_nz * np.cos(j * log_step_nz)\n X_step = sp.csr_matrix((data_step, indices, indptr),\n shape=X.shape, dtype=X.dtype, copy=False)\n X_new.append(X_step)\n\n data_step = factor_nz * np.sin(j * log_step_nz)\n X_step = sp.csr_matrix((data_step, indices, indptr),\n shape=X.shape, dtype=X.dtype, copy=False)\n X_new.append(X_step)\n\n return sp.hstack(X_new)\n\n def _more_tags(self):\n return {'stateless': True,\n 'requires_positive_X': True}", + "instance_attributes": [ + { + "name": "sample_steps", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "sample_interval_", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/Nystroem", + "name": "Nystroem", + "qname": "sklearn.kernel_approximation.Nystroem", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.kernel_approximation/Nystroem/__init__", + "scikit-learn/sklearn.kernel_approximation/Nystroem/fit", + "scikit-learn/sklearn.kernel_approximation/Nystroem/transform", + "scikit-learn/sklearn.kernel_approximation/Nystroem/_get_kernel_params", + "scikit-learn/sklearn.kernel_approximation/Nystroem/_more_tags" + ], + "is_public": true, + "reexported_by": [], + "description": "Approximate a kernel map using a subset of the training data.\n\nConstructs an approximate feature map for an arbitrary kernel\nusing a subset of the data as basis.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13", + "docstring": "Approximate a kernel map using a subset of the training data.\n\nConstructs an approximate feature map for an arbitrary kernel\nusing a subset of the data as basis.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nkernel : string or callable, default='rbf'\n Kernel map to be approximated. A callable should accept two arguments\n and the keyword arguments passed to this object as kernel_params, and\n should return a floating point number.\n\ngamma : float, default=None\n Gamma parameter for the RBF, laplacian, polynomial, exponential chi2\n and sigmoid kernels. Interpretation of the default value is left to\n the kernel; see the documentation for sklearn.metrics.pairwise.\n Ignored by other kernels.\n\ncoef0 : float, default=None\n Zero coefficient for polynomial and sigmoid kernels.\n Ignored by other kernels.\n\ndegree : float, default=None\n Degree of the polynomial kernel. Ignored by other kernels.\n\nkernel_params : dict, default=None\n Additional parameters (keyword arguments) for kernel function passed\n as callable object.\n\nn_components : int, default=100\n Number of features to construct.\n How many data points will be used to construct the mapping.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the uniform sampling without\n replacement of n_components of the training data to construct the basis\n kernel.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the kernel matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Subset of training points used to construct the feature map.\n\ncomponent_indices_ : ndarray of shape (n_components)\n Indices of ``components_`` in the training set.\n\nnormalization_ : ndarray of shape (n_components, n_components)\n Normalization matrix needed for embedding.\n Square root of the kernel matrix on ``components_``.\n\nExamples\n--------\n>>> from sklearn import datasets, svm\n>>> from sklearn.kernel_approximation import Nystroem\n>>> X, y = datasets.load_digits(n_class=9, return_X_y=True)\n>>> data = X / 16.\n>>> clf = svm.LinearSVC()\n>>> feature_map_nystroem = Nystroem(gamma=.2,\n... random_state=1,\n... n_components=300)\n>>> data_transformed = feature_map_nystroem.fit_transform(data)\n>>> clf.fit(data_transformed, y)\nLinearSVC()\n>>> clf.score(data_transformed, y)\n0.9987...\n\nReferences\n----------\n* Williams, C.K.I. and Seeger, M.\n \"Using the Nystroem method to speed up kernel machines\",\n Advances in neural information processing systems 2001\n\n* T. Yang, Y. Li, M. Mahdavi, R. Jin and Z. Zhou\n \"Nystroem Method vs Random Fourier Features: A Theoretical and Empirical\n Comparison\",\n Advances in Neural Information Processing Systems 2012\n\n\nSee Also\n--------\nRBFSampler : An approximation to the RBF kernel using random Fourier\n features.\n\nsklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.", + "code": "class Nystroem(TransformerMixin, BaseEstimator):\n \"\"\"Approximate a kernel map using a subset of the training data.\n\n Constructs an approximate feature map for an arbitrary kernel\n using a subset of the data as basis.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.13\n\n Parameters\n ----------\n kernel : string or callable, default='rbf'\n Kernel map to be approximated. A callable should accept two arguments\n and the keyword arguments passed to this object as kernel_params, and\n should return a floating point number.\n\n gamma : float, default=None\n Gamma parameter for the RBF, laplacian, polynomial, exponential chi2\n and sigmoid kernels. Interpretation of the default value is left to\n the kernel; see the documentation for sklearn.metrics.pairwise.\n Ignored by other kernels.\n\n coef0 : float, default=None\n Zero coefficient for polynomial and sigmoid kernels.\n Ignored by other kernels.\n\n degree : float, default=None\n Degree of the polynomial kernel. Ignored by other kernels.\n\n kernel_params : dict, default=None\n Additional parameters (keyword arguments) for kernel function passed\n as callable object.\n\n n_components : int, default=100\n Number of features to construct.\n How many data points will be used to construct the mapping.\n\n random_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the uniform sampling without\n replacement of n_components of the training data to construct the basis\n kernel.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n n_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the kernel matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.24\n\n Attributes\n ----------\n components_ : ndarray of shape (n_components, n_features)\n Subset of training points used to construct the feature map.\n\n component_indices_ : ndarray of shape (n_components)\n Indices of ``components_`` in the training set.\n\n normalization_ : ndarray of shape (n_components, n_components)\n Normalization matrix needed for embedding.\n Square root of the kernel matrix on ``components_``.\n\n Examples\n --------\n >>> from sklearn import datasets, svm\n >>> from sklearn.kernel_approximation import Nystroem\n >>> X, y = datasets.load_digits(n_class=9, return_X_y=True)\n >>> data = X / 16.\n >>> clf = svm.LinearSVC()\n >>> feature_map_nystroem = Nystroem(gamma=.2,\n ... random_state=1,\n ... n_components=300)\n >>> data_transformed = feature_map_nystroem.fit_transform(data)\n >>> clf.fit(data_transformed, y)\n LinearSVC()\n >>> clf.score(data_transformed, y)\n 0.9987...\n\n References\n ----------\n * Williams, C.K.I. and Seeger, M.\n \"Using the Nystroem method to speed up kernel machines\",\n Advances in neural information processing systems 2001\n\n * T. Yang, Y. Li, M. Mahdavi, R. Jin and Z. Zhou\n \"Nystroem Method vs Random Fourier Features: A Theoretical and Empirical\n Comparison\",\n Advances in Neural Information Processing Systems 2012\n\n\n See Also\n --------\n RBFSampler : An approximation to the RBF kernel using random Fourier\n features.\n\n sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, kernel=\"rbf\", *, gamma=None, coef0=None, degree=None,\n kernel_params=None, n_components=100, random_state=None,\n n_jobs=None):\n\n self.kernel = kernel\n self.gamma = gamma\n self.coef0 = coef0\n self.degree = degree\n self.kernel_params = kernel_params\n self.n_components = n_components\n self.random_state = random_state\n self.n_jobs = n_jobs\n\n def fit(self, X, y=None):\n \"\"\"Fit estimator to data.\n\n Samples a subset of training points, computes kernel\n on these and computes normalization matrix.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n \"\"\"\n X = self._validate_data(X, accept_sparse='csr')\n rnd = check_random_state(self.random_state)\n n_samples = X.shape[0]\n\n # get basis vectors\n if self.n_components > n_samples:\n # XXX should we just bail?\n n_components = n_samples\n warnings.warn(\"n_components > n_samples. This is not possible.\\n\"\n \"n_components was set to n_samples, which results\"\n \" in inefficient evaluation of the full kernel.\")\n\n else:\n n_components = self.n_components\n n_components = min(n_samples, n_components)\n inds = rnd.permutation(n_samples)\n basis_inds = inds[:n_components]\n basis = X[basis_inds]\n\n basis_kernel = pairwise_kernels(basis, metric=self.kernel,\n filter_params=True,\n n_jobs=self.n_jobs,\n **self._get_kernel_params())\n\n # sqrt of kernel matrix on basis vectors\n U, S, V = svd(basis_kernel)\n S = np.maximum(S, 1e-12)\n self.normalization_ = np.dot(U / np.sqrt(S), V)\n self.components_ = basis\n self.component_indices_ = inds\n return self\n\n def transform(self, X):\n \"\"\"Apply feature map to X.\n\n Computes an approximate feature map using the kernel\n between some training points and X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Data to transform.\n\n Returns\n -------\n X_transformed : ndarray of shape (n_samples, n_components)\n Transformed data.\n \"\"\"\n check_is_fitted(self)\n X = self._validate_data(X, accept_sparse='csr', reset=False)\n\n kernel_params = self._get_kernel_params()\n embedded = pairwise_kernels(X, self.components_,\n metric=self.kernel,\n filter_params=True,\n n_jobs=self.n_jobs,\n **kernel_params)\n return np.dot(embedded, self.normalization_.T)\n\n def _get_kernel_params(self):\n params = self.kernel_params\n if params is None:\n params = {}\n if not callable(self.kernel) and self.kernel != 'precomputed':\n for param in (KERNEL_PARAMS[self.kernel]):\n if getattr(self, param) is not None:\n params[param] = getattr(self, param)\n else:\n if (self.gamma is not None or\n self.coef0 is not None or\n self.degree is not None):\n raise ValueError(\"Don't pass gamma, coef0 or degree to \"\n \"Nystroem if using a callable \"\n \"or precomputed kernel\")\n\n return params\n\n def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_transformer_preserve_dtypes':\n 'dtypes are preserved but not at a close enough precision',\n },\n 'preserves_dtype': [np.float64, np.float32]\n }", + "instance_attributes": [ + { + "name": "kernel", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "n_components", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "normalization_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/PolynomialCountSketch", + "name": "PolynomialCountSketch", + "qname": "sklearn.kernel_approximation.PolynomialCountSketch", + "decorators": [], + "superclasses": ["BaseEstimator", "TransformerMixin"], + "methods": [ + "scikit-learn/sklearn.kernel_approximation/PolynomialCountSketch/__init__", + "scikit-learn/sklearn.kernel_approximation/PolynomialCountSketch/fit", + "scikit-learn/sklearn.kernel_approximation/PolynomialCountSketch/transform" + ], + "is_public": true, + "reexported_by": [], + "description": "Polynomial kernel approximation via Tensor Sketch.\n\nImplements Tensor Sketch, which approximates the feature map\nof the polynomial kernel::\n\n K(X, Y) = (gamma * + coef0)^degree\n\nby efficiently computing a Count Sketch of the outer product of a\nvector with itself using Fast Fourier Transforms (FFT). Read more in the\n:ref:`User Guide `.\n\n.. versionadded:: 0.24", + "docstring": "Polynomial kernel approximation via Tensor Sketch.\n\nImplements Tensor Sketch, which approximates the feature map\nof the polynomial kernel::\n\n K(X, Y) = (gamma * + coef0)^degree\n\nby efficiently computing a Count Sketch of the outer product of a\nvector with itself using Fast Fourier Transforms (FFT). Read more in the\n:ref:`User Guide `.\n\n.. versionadded:: 0.24\n\nParameters\n----------\ngamma : float, default=1.0\n Parameter of the polynomial kernel whose feature map\n will be approximated.\n\ndegree : int, default=2\n Degree of the polynomial kernel whose feature map\n will be approximated.\n\ncoef0 : int, default=0\n Constant term of the polynomial kernel whose feature map\n will be approximated.\n\nn_components : int, default=100\n Dimensionality of the output feature space. Usually, n_components\n should be greater than the number of features in input samples in\n order to achieve good performance. The optimal score / run time\n balance is typically achieved around n_components = 10 * n_features,\n but this depends on the specific dataset being used.\n\nrandom_state : int, RandomState instance, default=None\n Determines random number generation for indexHash and bitHash\n initialization. Pass an int for reproducible results across multiple\n function calls. See :term:`Glossary `.\n\nAttributes\n----------\nindexHash_ : ndarray of shape (degree, n_features), dtype=int64\n Array of indexes in range [0, n_components) used to represent\n the 2-wise independent hash functions for Count Sketch computation.\n\nbitHash_ : ndarray of shape (degree, n_features), dtype=float32\n Array with random entries in {+1, -1}, used to represent\n the 2-wise independent hash functions for Count Sketch computation.\n\nExamples\n--------\n>>> from sklearn.kernel_approximation import PolynomialCountSketch\n>>> from sklearn.linear_model import SGDClassifier\n>>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n>>> y = [0, 0, 1, 1]\n>>> ps = PolynomialCountSketch(degree=3, random_state=1)\n>>> X_features = ps.fit_transform(X)\n>>> clf = SGDClassifier(max_iter=10, tol=1e-3)\n>>> clf.fit(X_features, y)\nSGDClassifier(max_iter=10)\n>>> clf.score(X_features, y)\n1.0", + "code": "class PolynomialCountSketch(BaseEstimator, TransformerMixin):\n \"\"\"Polynomial kernel approximation via Tensor Sketch.\n\n Implements Tensor Sketch, which approximates the feature map\n of the polynomial kernel::\n\n K(X, Y) = (gamma * + coef0)^degree\n\n by efficiently computing a Count Sketch of the outer product of a\n vector with itself using Fast Fourier Transforms (FFT). Read more in the\n :ref:`User Guide `.\n\n .. versionadded:: 0.24\n\n Parameters\n ----------\n gamma : float, default=1.0\n Parameter of the polynomial kernel whose feature map\n will be approximated.\n\n degree : int, default=2\n Degree of the polynomial kernel whose feature map\n will be approximated.\n\n coef0 : int, default=0\n Constant term of the polynomial kernel whose feature map\n will be approximated.\n\n n_components : int, default=100\n Dimensionality of the output feature space. Usually, n_components\n should be greater than the number of features in input samples in\n order to achieve good performance. The optimal score / run time\n balance is typically achieved around n_components = 10 * n_features,\n but this depends on the specific dataset being used.\n\n random_state : int, RandomState instance, default=None\n Determines random number generation for indexHash and bitHash\n initialization. Pass an int for reproducible results across multiple\n function calls. See :term:`Glossary `.\n\n Attributes\n ----------\n indexHash_ : ndarray of shape (degree, n_features), dtype=int64\n Array of indexes in range [0, n_components) used to represent\n the 2-wise independent hash functions for Count Sketch computation.\n\n bitHash_ : ndarray of shape (degree, n_features), dtype=float32\n Array with random entries in {+1, -1}, used to represent\n the 2-wise independent hash functions for Count Sketch computation.\n\n Examples\n --------\n >>> from sklearn.kernel_approximation import PolynomialCountSketch\n >>> from sklearn.linear_model import SGDClassifier\n >>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n >>> y = [0, 0, 1, 1]\n >>> ps = PolynomialCountSketch(degree=3, random_state=1)\n >>> X_features = ps.fit_transform(X)\n >>> clf = SGDClassifier(max_iter=10, tol=1e-3)\n >>> clf.fit(X_features, y)\n SGDClassifier(max_iter=10)\n >>> clf.score(X_features, y)\n 1.0\n \"\"\"\n\n def __init__(self, *, gamma=1., degree=2, coef0=0, n_components=100,\n random_state=None):\n self.gamma = gamma\n self.degree = degree\n self.coef0 = coef0\n self.n_components = n_components\n self.random_state = random_state\n\n def fit(self, X, y=None):\n \"\"\"Fit the model with X.\n\n Initializes the internal variables. The method needs no information\n about the distribution of data, so we only care about n_features in X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data, where n_samples in the number of samples\n and n_features is the number of features.\n\n Returns\n -------\n self : object\n Returns the transformer.\n \"\"\"\n if not self.degree >= 1:\n raise ValueError(f\"degree={self.degree} should be >=1.\")\n\n X = self._validate_data(X, accept_sparse=\"csc\")\n random_state = check_random_state(self.random_state)\n\n n_features = X.shape[1]\n if self.coef0 != 0:\n n_features += 1\n\n self.indexHash_ = random_state.randint(0, high=self.n_components,\n size=(self.degree, n_features))\n\n self.bitHash_ = random_state.choice(a=[-1, 1],\n size=(self.degree, n_features))\n return self\n\n def transform(self, X):\n \"\"\"Generate the feature map approximation for X.\n\n Parameters\n ----------\n X : {array-like}, shape (n_samples, n_features)\n New data, where n_samples in the number of samples\n and n_features is the number of features.\n\n Returns\n -------\n X_new : array-like, shape (n_samples, n_components)\n \"\"\"\n\n check_is_fitted(self)\n X = self._validate_data(X, accept_sparse=\"csc\", reset=False)\n\n X_gamma = np.sqrt(self.gamma) * X\n\n if sp.issparse(X_gamma) and self.coef0 != 0:\n X_gamma = sp.hstack([X_gamma, np.sqrt(self.coef0) *\n np.ones((X_gamma.shape[0], 1))],\n format=\"csc\")\n\n elif not sp.issparse(X_gamma) and self.coef0 != 0:\n X_gamma = np.hstack([X_gamma, np.sqrt(self.coef0) *\n np.ones((X_gamma.shape[0], 1))])\n\n if X_gamma.shape[1] != self.indexHash_.shape[1]:\n raise ValueError(\"Number of features of test samples does not\"\n \" match that of training samples.\")\n\n count_sketches = np.zeros(\n (X_gamma.shape[0], self.degree, self.n_components))\n\n if sp.issparse(X_gamma):\n for j in range(X_gamma.shape[1]):\n for d in range(self.degree):\n iHashIndex = self.indexHash_[d, j]\n iHashBit = self.bitHash_[d, j]\n count_sketches[:, d, iHashIndex] += \\\n (iHashBit * X_gamma[:, j]).toarray().ravel()\n\n else:\n for j in range(X_gamma.shape[1]):\n for d in range(self.degree):\n iHashIndex = self.indexHash_[d, j]\n iHashBit = self.bitHash_[d, j]\n count_sketches[:, d, iHashIndex] += \\\n iHashBit * X_gamma[:, j]\n\n # For each same, compute a count sketch of phi(x) using the polynomial\n # multiplication (via FFT) of p count sketches of x.\n count_sketches_fft = fft(count_sketches, axis=2, overwrite_x=True)\n count_sketches_fft_prod = np.prod(count_sketches_fft, axis=1)\n data_sketch = np.real(ifft(count_sketches_fft_prod, overwrite_x=True))\n\n return data_sketch", + "instance_attributes": [ + { + "name": "gamma", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "degree", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "coef0", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "n_components", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/RBFSampler", + "name": "RBFSampler", + "qname": "sklearn.kernel_approximation.RBFSampler", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.kernel_approximation/RBFSampler/__init__", + "scikit-learn/sklearn.kernel_approximation/RBFSampler/fit", + "scikit-learn/sklearn.kernel_approximation/RBFSampler/transform" + ], + "is_public": true, + "reexported_by": [], + "description": "Approximates feature map of an RBF kernel by Monte Carlo approximation\nof its Fourier transform.\n\nIt implements a variant of Random Kitchen Sinks.[1]\n\nRead more in the :ref:`User Guide `.", + "docstring": "Approximates feature map of an RBF kernel by Monte Carlo approximation\nof its Fourier transform.\n\nIt implements a variant of Random Kitchen Sinks.[1]\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ngamma : float, default=1.0\n Parameter of RBF kernel: exp(-gamma * x^2)\n\nn_components : int, default=100\n Number of Monte Carlo samples per original feature.\n Equals the dimensionality of the computed feature space.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the generation of the random\n weights and random offset when fitting the training data.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nrandom_offset_ : ndarray of shape (n_components,), dtype=float64\n Random offset used to compute the projection in the `n_components`\n dimensions of the feature space.\n\nrandom_weights_ : ndarray of shape (n_features, n_components), dtype=float64\n Random projection directions drawn from the Fourier transform\n of the RBF kernel.\n\n\nExamples\n--------\n>>> from sklearn.kernel_approximation import RBFSampler\n>>> from sklearn.linear_model import SGDClassifier\n>>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n>>> y = [0, 0, 1, 1]\n>>> rbf_feature = RBFSampler(gamma=1, random_state=1)\n>>> X_features = rbf_feature.fit_transform(X)\n>>> clf = SGDClassifier(max_iter=5, tol=1e-3)\n>>> clf.fit(X_features, y)\nSGDClassifier(max_iter=5)\n>>> clf.score(X_features, y)\n1.0\n\nNotes\n-----\nSee \"Random Features for Large-Scale Kernel Machines\" by A. Rahimi and\nBenjamin Recht.\n\n[1] \"Weighted Sums of Random Kitchen Sinks: Replacing\nminimization with randomization in learning\" by A. Rahimi and\nBenjamin Recht.\n(https://people.eecs.berkeley.edu/~brecht/papers/08.rah.rec.nips.pdf)", + "code": "class RBFSampler(TransformerMixin, BaseEstimator):\n \"\"\"Approximates feature map of an RBF kernel by Monte Carlo approximation\n of its Fourier transform.\n\n It implements a variant of Random Kitchen Sinks.[1]\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n gamma : float, default=1.0\n Parameter of RBF kernel: exp(-gamma * x^2)\n\n n_components : int, default=100\n Number of Monte Carlo samples per original feature.\n Equals the dimensionality of the computed feature space.\n\n random_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the generation of the random\n weights and random offset when fitting the training data.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Attributes\n ----------\n random_offset_ : ndarray of shape (n_components,), dtype=float64\n Random offset used to compute the projection in the `n_components`\n dimensions of the feature space.\n\n random_weights_ : ndarray of shape (n_features, n_components),\\\n dtype=float64\n Random projection directions drawn from the Fourier transform\n of the RBF kernel.\n\n\n Examples\n --------\n >>> from sklearn.kernel_approximation import RBFSampler\n >>> from sklearn.linear_model import SGDClassifier\n >>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n >>> y = [0, 0, 1, 1]\n >>> rbf_feature = RBFSampler(gamma=1, random_state=1)\n >>> X_features = rbf_feature.fit_transform(X)\n >>> clf = SGDClassifier(max_iter=5, tol=1e-3)\n >>> clf.fit(X_features, y)\n SGDClassifier(max_iter=5)\n >>> clf.score(X_features, y)\n 1.0\n\n Notes\n -----\n See \"Random Features for Large-Scale Kernel Machines\" by A. Rahimi and\n Benjamin Recht.\n\n [1] \"Weighted Sums of Random Kitchen Sinks: Replacing\n minimization with randomization in learning\" by A. Rahimi and\n Benjamin Recht.\n (https://people.eecs.berkeley.edu/~brecht/papers/08.rah.rec.nips.pdf)\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, gamma=1., n_components=100, random_state=None):\n self.gamma = gamma\n self.n_components = n_components\n self.random_state = random_state\n\n def fit(self, X, y=None):\n \"\"\"Fit the model with X.\n\n Samples random projection according to n_features.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n Training data, where n_samples in the number of samples\n and n_features is the number of features.\n\n Returns\n -------\n self : object\n Returns the transformer.\n \"\"\"\n\n X = self._validate_data(X, accept_sparse='csr')\n random_state = check_random_state(self.random_state)\n n_features = X.shape[1]\n\n self.random_weights_ = (np.sqrt(2 * self.gamma) * random_state.normal(\n size=(n_features, self.n_components)))\n\n self.random_offset_ = random_state.uniform(0, 2 * np.pi,\n size=self.n_components)\n return self\n\n def transform(self, X):\n \"\"\"Apply the approximate feature map to X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n New data, where n_samples in the number of samples\n and n_features is the number of features.\n\n Returns\n -------\n X_new : array-like, shape (n_samples, n_components)\n \"\"\"\n check_is_fitted(self)\n\n X = self._validate_data(X, accept_sparse='csr', reset=False)\n projection = safe_sparse_dot(X, self.random_weights_)\n projection += self.random_offset_\n np.cos(projection, projection)\n projection *= np.sqrt(2.) / np.sqrt(self.n_components)\n return projection", + "instance_attributes": [ + { + "name": "gamma", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "n_components", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/SkewedChi2Sampler", + "name": "SkewedChi2Sampler", + "qname": "sklearn.kernel_approximation.SkewedChi2Sampler", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.kernel_approximation/SkewedChi2Sampler/__init__", + "scikit-learn/sklearn.kernel_approximation/SkewedChi2Sampler/fit", + "scikit-learn/sklearn.kernel_approximation/SkewedChi2Sampler/transform" + ], + "is_public": true, + "reexported_by": [], + "description": "Approximates feature map of the \"skewed chi-squared\" kernel by Monte\nCarlo approximation of its Fourier transform.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Approximates feature map of the \"skewed chi-squared\" kernel by Monte\nCarlo approximation of its Fourier transform.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nskewedness : float, default=1.0\n \"skewedness\" parameter of the kernel. Needs to be cross-validated.\n\nn_components : int, default=100\n number of Monte Carlo samples per original feature.\n Equals the dimensionality of the computed feature space.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the generation of the random\n weights and random offset when fitting the training data.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nrandom_weights_ : ndarray of shape (n_features, n_components)\n Weight array, sampled from a secant hyperbolic distribution, which will\n be used to linearly transform the log of the data.\n\nrandom_offset_ : ndarray of shape (n_features, n_components)\n Bias term, which will be added to the data. It is uniformly distributed\n between 0 and 2*pi.\n\nExamples\n--------\n>>> from sklearn.kernel_approximation import SkewedChi2Sampler\n>>> from sklearn.linear_model import SGDClassifier\n>>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n>>> y = [0, 0, 1, 1]\n>>> chi2_feature = SkewedChi2Sampler(skewedness=.01,\n... n_components=10,\n... random_state=0)\n>>> X_features = chi2_feature.fit_transform(X, y)\n>>> clf = SGDClassifier(max_iter=10, tol=1e-3)\n>>> clf.fit(X_features, y)\nSGDClassifier(max_iter=10)\n>>> clf.score(X_features, y)\n1.0\n\nReferences\n----------\nSee \"Random Fourier Approximations for Skewed Multiplicative Histogram\nKernels\" by Fuxin Li, Catalin Ionescu and Cristian Sminchisescu.\n\nSee Also\n--------\nAdditiveChi2Sampler : A different approach for approximating an additive\n variant of the chi squared kernel.\n\nsklearn.metrics.pairwise.chi2_kernel : The exact chi squared kernel.", + "code": "class SkewedChi2Sampler(TransformerMixin, BaseEstimator):\n \"\"\"Approximates feature map of the \"skewed chi-squared\" kernel by Monte\n Carlo approximation of its Fourier transform.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n skewedness : float, default=1.0\n \"skewedness\" parameter of the kernel. Needs to be cross-validated.\n\n n_components : int, default=100\n number of Monte Carlo samples per original feature.\n Equals the dimensionality of the computed feature space.\n\n random_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the generation of the random\n weights and random offset when fitting the training data.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Attributes\n ----------\n random_weights_ : ndarray of shape (n_features, n_components)\n Weight array, sampled from a secant hyperbolic distribution, which will\n be used to linearly transform the log of the data.\n\n random_offset_ : ndarray of shape (n_features, n_components)\n Bias term, which will be added to the data. It is uniformly distributed\n between 0 and 2*pi.\n\n Examples\n --------\n >>> from sklearn.kernel_approximation import SkewedChi2Sampler\n >>> from sklearn.linear_model import SGDClassifier\n >>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n >>> y = [0, 0, 1, 1]\n >>> chi2_feature = SkewedChi2Sampler(skewedness=.01,\n ... n_components=10,\n ... random_state=0)\n >>> X_features = chi2_feature.fit_transform(X, y)\n >>> clf = SGDClassifier(max_iter=10, tol=1e-3)\n >>> clf.fit(X_features, y)\n SGDClassifier(max_iter=10)\n >>> clf.score(X_features, y)\n 1.0\n\n References\n ----------\n See \"Random Fourier Approximations for Skewed Multiplicative Histogram\n Kernels\" by Fuxin Li, Catalin Ionescu and Cristian Sminchisescu.\n\n See Also\n --------\n AdditiveChi2Sampler : A different approach for approximating an additive\n variant of the chi squared kernel.\n\n sklearn.metrics.pairwise.chi2_kernel : The exact chi squared kernel.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, skewedness=1., n_components=100, random_state=None):\n self.skewedness = skewedness\n self.n_components = n_components\n self.random_state = random_state\n\n def fit(self, X, y=None):\n \"\"\"Fit the model with X.\n\n Samples random projection according to n_features.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n Training data, where n_samples in the number of samples\n and n_features is the number of features.\n\n Returns\n -------\n self : object\n Returns the transformer.\n \"\"\"\n\n X = self._validate_data(X)\n random_state = check_random_state(self.random_state)\n n_features = X.shape[1]\n uniform = random_state.uniform(size=(n_features, self.n_components))\n # transform by inverse CDF of sech\n self.random_weights_ = (1. / np.pi\n * np.log(np.tan(np.pi / 2. * uniform)))\n self.random_offset_ = random_state.uniform(0, 2 * np.pi,\n size=self.n_components)\n return self\n\n def transform(self, X):\n \"\"\"Apply the approximate feature map to X.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n New data, where n_samples in the number of samples\n and n_features is the number of features. All values of X must be\n strictly greater than \"-skewedness\".\n\n Returns\n -------\n X_new : array-like, shape (n_samples, n_components)\n \"\"\"\n check_is_fitted(self)\n\n X = as_float_array(X, copy=True)\n X = self._validate_data(X, copy=False, reset=False)\n if (X <= -self.skewedness).any():\n raise ValueError(\"X may not contain entries smaller than\"\n \" -skewedness.\")\n\n X += self.skewedness\n np.log(X, X)\n projection = safe_sparse_dot(X, self.random_weights_)\n projection += self.random_offset_\n np.cos(projection, projection)\n projection *= np.sqrt(2.) / np.sqrt(self.n_components)\n return projection", + "instance_attributes": [ + { + "name": "skewedness", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "n_components", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge", + "name": "KernelRidge", + "qname": "sklearn.kernel_ridge.KernelRidge", + "decorators": [], + "superclasses": ["MultiOutputMixin", "RegressorMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.kernel_ridge/KernelRidge/__init__", + "scikit-learn/sklearn.kernel_ridge/KernelRidge/_get_kernel", + "scikit-learn/sklearn.kernel_ridge/KernelRidge/_more_tags", + "scikit-learn/sklearn.kernel_ridge/KernelRidge/_pairwise@getter", + "scikit-learn/sklearn.kernel_ridge/KernelRidge/fit", + "scikit-learn/sklearn.kernel_ridge/KernelRidge/predict" + ], + "is_public": true, + "reexported_by": [], + "description": "Kernel ridge regression.\n\nKernel ridge regression (KRR) combines ridge regression (linear least\nsquares with l2-norm regularization) with the kernel trick. It thus\nlearns a linear function in the space induced by the respective kernel and\nthe data. For non-linear kernels, this corresponds to a non-linear\nfunction in the original space.\n\nThe form of the model learned by KRR is identical to support vector\nregression (SVR). However, different loss functions are used: KRR uses\nsquared error loss while support vector regression uses epsilon-insensitive\nloss, both combined with l2 regularization. In contrast to SVR, fitting a\nKRR model can be done in closed-form and is typically faster for\nmedium-sized datasets. On the other hand, the learned model is non-sparse\nand thus slower than SVR, which learns a sparse model for epsilon > 0, at\nprediction-time.\n\nThis estimator has built-in support for multi-variate regression\n(i.e., when y is a 2d-array of shape [n_samples, n_targets]).\n\nRead more in the :ref:`User Guide `.", + "docstring": "Kernel ridge regression.\n\nKernel ridge regression (KRR) combines ridge regression (linear least\nsquares with l2-norm regularization) with the kernel trick. It thus\nlearns a linear function in the space induced by the respective kernel and\nthe data. For non-linear kernels, this corresponds to a non-linear\nfunction in the original space.\n\nThe form of the model learned by KRR is identical to support vector\nregression (SVR). However, different loss functions are used: KRR uses\nsquared error loss while support vector regression uses epsilon-insensitive\nloss, both combined with l2 regularization. In contrast to SVR, fitting a\nKRR model can be done in closed-form and is typically faster for\nmedium-sized datasets. On the other hand, the learned model is non-sparse\nand thus slower than SVR, which learns a sparse model for epsilon > 0, at\nprediction-time.\n\nThis estimator has built-in support for multi-variate regression\n(i.e., when y is a 2d-array of shape [n_samples, n_targets]).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float or array-like of shape (n_targets,), default=1.0\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\n assumed to be specific to the targets. Hence they must correspond in\n number. See :ref:`ridge_regression` for formula.\n\nkernel : string or callable, default=\"linear\"\n Kernel mapping used internally. This parameter is directly passed to\n :class:`~sklearn.metrics.pairwise.pairwise_kernel`.\n If `kernel` is a string, it must be one of the metrics\n in `pairwise.PAIRWISE_KERNEL_FUNCTIONS`.\n If `kernel` is \"precomputed\", X is assumed to be a kernel matrix.\n Alternatively, if `kernel` is a callable function, it is called on\n each pair of instances (rows) and the resulting value recorded. The\n callable should take two rows from X as input and return the\n corresponding kernel value as a single number. This means that\n callables from :mod:`sklearn.metrics.pairwise` are not allowed, as\n they operate on matrices, not single samples. Use the string\n identifying the kernel instead.\n\ngamma : float, default=None\n Gamma parameter for the RBF, laplacian, polynomial, exponential chi2\n and sigmoid kernels. Interpretation of the default value is left to\n the kernel; see the documentation for sklearn.metrics.pairwise.\n Ignored by other kernels.\n\ndegree : float, default=3\n Degree of the polynomial kernel. Ignored by other kernels.\n\ncoef0 : float, default=1\n Zero coefficient for polynomial and sigmoid kernels.\n Ignored by other kernels.\n\nkernel_params : mapping of string to any, default=None\n Additional parameters (keyword arguments) for kernel function passed\n as callable object.\n\nAttributes\n----------\ndual_coef_ : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Representation of weight vector(s) in kernel space\n\nX_fit_ : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training data, which is also required for prediction. If\n kernel == \"precomputed\" this is instead the precomputed\n training matrix, of shape (n_samples, n_samples).\n\nReferences\n----------\n* Kevin P. Murphy\n \"Machine Learning: A Probabilistic Perspective\", The MIT Press\n chapter 14.4.3, pp. 492-493\n\nSee Also\n--------\nsklearn.linear_model.Ridge : Linear ridge regression.\nsklearn.svm.SVR : Support Vector Regression implemented using libsvm.\n\nExamples\n--------\n>>> from sklearn.kernel_ridge import KernelRidge\n>>> import numpy as np\n>>> n_samples, n_features = 10, 5\n>>> rng = np.random.RandomState(0)\n>>> y = rng.randn(n_samples)\n>>> X = rng.randn(n_samples, n_features)\n>>> clf = KernelRidge(alpha=1.0)\n>>> clf.fit(X, y)\nKernelRidge(alpha=1.0)", + "code": "class KernelRidge(MultiOutputMixin, RegressorMixin, BaseEstimator):\n \"\"\"Kernel ridge regression.\n\n Kernel ridge regression (KRR) combines ridge regression (linear least\n squares with l2-norm regularization) with the kernel trick. It thus\n learns a linear function in the space induced by the respective kernel and\n the data. For non-linear kernels, this corresponds to a non-linear\n function in the original space.\n\n The form of the model learned by KRR is identical to support vector\n regression (SVR). However, different loss functions are used: KRR uses\n squared error loss while support vector regression uses epsilon-insensitive\n loss, both combined with l2 regularization. In contrast to SVR, fitting a\n KRR model can be done in closed-form and is typically faster for\n medium-sized datasets. On the other hand, the learned model is non-sparse\n and thus slower than SVR, which learns a sparse model for epsilon > 0, at\n prediction-time.\n\n This estimator has built-in support for multi-variate regression\n (i.e., when y is a 2d-array of shape [n_samples, n_targets]).\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n alpha : float or array-like of shape (n_targets,), default=1.0\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\n assumed to be specific to the targets. Hence they must correspond in\n number. See :ref:`ridge_regression` for formula.\n\n kernel : string or callable, default=\"linear\"\n Kernel mapping used internally. This parameter is directly passed to\n :class:`~sklearn.metrics.pairwise.pairwise_kernel`.\n If `kernel` is a string, it must be one of the metrics\n in `pairwise.PAIRWISE_KERNEL_FUNCTIONS`.\n If `kernel` is \"precomputed\", X is assumed to be a kernel matrix.\n Alternatively, if `kernel` is a callable function, it is called on\n each pair of instances (rows) and the resulting value recorded. The\n callable should take two rows from X as input and return the\n corresponding kernel value as a single number. This means that\n callables from :mod:`sklearn.metrics.pairwise` are not allowed, as\n they operate on matrices, not single samples. Use the string\n identifying the kernel instead.\n\n gamma : float, default=None\n Gamma parameter for the RBF, laplacian, polynomial, exponential chi2\n and sigmoid kernels. Interpretation of the default value is left to\n the kernel; see the documentation for sklearn.metrics.pairwise.\n Ignored by other kernels.\n\n degree : float, default=3\n Degree of the polynomial kernel. Ignored by other kernels.\n\n coef0 : float, default=1\n Zero coefficient for polynomial and sigmoid kernels.\n Ignored by other kernels.\n\n kernel_params : mapping of string to any, default=None\n Additional parameters (keyword arguments) for kernel function passed\n as callable object.\n\n Attributes\n ----------\n dual_coef_ : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Representation of weight vector(s) in kernel space\n\n X_fit_ : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training data, which is also required for prediction. If\n kernel == \"precomputed\" this is instead the precomputed\n training matrix, of shape (n_samples, n_samples).\n\n References\n ----------\n * Kevin P. Murphy\n \"Machine Learning: A Probabilistic Perspective\", The MIT Press\n chapter 14.4.3, pp. 492-493\n\n See Also\n --------\n sklearn.linear_model.Ridge : Linear ridge regression.\n sklearn.svm.SVR : Support Vector Regression implemented using libsvm.\n\n Examples\n --------\n >>> from sklearn.kernel_ridge import KernelRidge\n >>> import numpy as np\n >>> n_samples, n_features = 10, 5\n >>> rng = np.random.RandomState(0)\n >>> y = rng.randn(n_samples)\n >>> X = rng.randn(n_samples, n_features)\n >>> clf = KernelRidge(alpha=1.0)\n >>> clf.fit(X, y)\n KernelRidge(alpha=1.0)\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, alpha=1, *, kernel=\"linear\", gamma=None, degree=3,\n coef0=1, kernel_params=None):\n self.alpha = alpha\n self.kernel = kernel\n self.gamma = gamma\n self.degree = degree\n self.coef0 = coef0\n self.kernel_params = kernel_params\n\n def _get_kernel(self, X, Y=None):\n if callable(self.kernel):\n params = self.kernel_params or {}\n else:\n params = {\"gamma\": self.gamma,\n \"degree\": self.degree,\n \"coef0\": self.coef0}\n return pairwise_kernels(X, Y, metric=self.kernel,\n filter_params=True, **params)\n\n def _more_tags(self):\n return {'pairwise': self.kernel == 'precomputed'}\n\n # TODO: Remove in 1.1\n # mypy error: Decorated property not supported\n @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n return self.kernel == \"precomputed\"\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit Kernel Ridge regression model\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data. If kernel == \"precomputed\" this is instead\n a precomputed kernel matrix, of shape (n_samples, n_samples).\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values\n\n sample_weight : float or array-like of shape (n_samples,), default=None\n Individual weights for each sample, ignored if None is passed.\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n # Convert data\n X, y = self._validate_data(X, y, accept_sparse=(\"csr\", \"csc\"),\n multi_output=True, y_numeric=True)\n if sample_weight is not None and not isinstance(sample_weight, float):\n sample_weight = _check_sample_weight(sample_weight, X)\n\n K = self._get_kernel(X)\n alpha = np.atleast_1d(self.alpha)\n\n ravel = False\n if len(y.shape) == 1:\n y = y.reshape(-1, 1)\n ravel = True\n\n copy = self.kernel == \"precomputed\"\n self.dual_coef_ = _solve_cholesky_kernel(K, y, alpha,\n sample_weight,\n copy)\n if ravel:\n self.dual_coef_ = self.dual_coef_.ravel()\n\n self.X_fit_ = X\n\n return self\n\n def predict(self, X):\n \"\"\"Predict using the kernel ridge model\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples. If kernel == \"precomputed\" this is instead a\n precomputed kernel matrix, shape = [n_samples,\n n_samples_fitted], where n_samples_fitted is the number of\n samples used in the fitting for this estimator.\n\n Returns\n -------\n C : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Returns predicted values.\n \"\"\"\n check_is_fitted(self)\n X = self._validate_data(X, accept_sparse=(\"csr\", \"csc\"), reset=False)\n K = self._get_kernel(X, self.X_fit_)\n return np.dot(K, self.dual_coef_)", + "instance_attributes": [ + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "kernel", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "degree", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "coef0", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearClassifierMixin", + "name": "LinearClassifierMixin", + "qname": "sklearn.linear_model._base.LinearClassifierMixin", + "decorators": [], + "superclasses": ["ClassifierMixin"], + "methods": [ + "scikit-learn/sklearn.linear_model._base/LinearClassifierMixin/decision_function", + "scikit-learn/sklearn.linear_model._base/LinearClassifierMixin/predict", + "scikit-learn/sklearn.linear_model._base/LinearClassifierMixin/_predict_proba_lr" + ], + "is_public": false, + "reexported_by": [], + "description": "Mixin for linear classifiers.\n\nHandles prediction for sparse and dense X.", + "docstring": "Mixin for linear classifiers.\n\nHandles prediction for sparse and dense X.", + "code": "class LinearClassifierMixin(ClassifierMixin):\n \"\"\"Mixin for linear classifiers.\n\n Handles prediction for sparse and dense X.\n \"\"\"\n\n def decision_function(self, X):\n \"\"\"\n Predict confidence scores for samples.\n\n The confidence score for a sample is proportional to the signed\n distance of that sample to the hyperplane.\n\n Parameters\n ----------\n X : array-like or sparse matrix, shape (n_samples, n_features)\n Samples.\n\n Returns\n -------\n array, shape=(n_samples,) if n_classes == 2 else (n_samples, n_classes)\n Confidence scores per (sample, class) combination. In the binary\n case, confidence score for self.classes_[1] where >0 means this\n class would be predicted.\n \"\"\"\n check_is_fitted(self)\n\n X = check_array(X, accept_sparse='csr')\n\n n_features = self.coef_.shape[1]\n if X.shape[1] != n_features:\n raise ValueError(\"X has %d features per sample; expecting %d\"\n % (X.shape[1], n_features))\n\n scores = safe_sparse_dot(X, self.coef_.T,\n dense_output=True) + self.intercept_\n return scores.ravel() if scores.shape[1] == 1 else scores\n\n def predict(self, X):\n \"\"\"\n Predict class labels for samples in X.\n\n Parameters\n ----------\n X : array-like or sparse matrix, shape (n_samples, n_features)\n Samples.\n\n Returns\n -------\n C : array, shape [n_samples]\n Predicted class label per sample.\n \"\"\"\n scores = self.decision_function(X)\n if len(scores.shape) == 1:\n indices = (scores > 0).astype(int)\n else:\n indices = scores.argmax(axis=1)\n return self.classes_[indices]\n\n def _predict_proba_lr(self, X):\n \"\"\"Probability estimation for OvR logistic regression.\n\n Positive class probabilities are computed as\n 1. / (1. + np.exp(-self.decision_function(X)));\n multiclass is handled by normalizing that over all classes.\n \"\"\"\n prob = self.decision_function(X)\n expit(prob, out=prob)\n if prob.ndim == 1:\n return np.vstack([1 - prob, prob]).T\n else:\n # OvR normalization, like LibLinear's predict_probability\n prob /= prob.sum(axis=1).reshape((prob.shape[0], -1))\n return prob", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearModel", + "name": "LinearModel", + "qname": "sklearn.linear_model._base.LinearModel", + "decorators": [], + "superclasses": ["BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.linear_model._base/LinearModel/fit", + "scikit-learn/sklearn.linear_model._base/LinearModel/_decision_function", + "scikit-learn/sklearn.linear_model._base/LinearModel/predict", + "scikit-learn/sklearn.linear_model._base/LinearModel/_set_intercept", + "scikit-learn/sklearn.linear_model._base/LinearModel/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for Linear Models", + "docstring": "Base class for Linear Models", + "code": "class LinearModel(BaseEstimator, metaclass=ABCMeta):\n \"\"\"Base class for Linear Models\"\"\"\n\n @abstractmethod\n def fit(self, X, y):\n \"\"\"Fit model.\"\"\"\n\n def _decision_function(self, X):\n check_is_fitted(self)\n\n X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])\n return safe_sparse_dot(X, self.coef_.T,\n dense_output=True) + self.intercept_\n\n def predict(self, X):\n \"\"\"\n Predict using the linear model.\n\n Parameters\n ----------\n X : array-like or sparse matrix, shape (n_samples, n_features)\n Samples.\n\n Returns\n -------\n C : array, shape (n_samples,)\n Returns predicted values.\n \"\"\"\n return self._decision_function(X)\n\n _preprocess_data = staticmethod(_preprocess_data)\n\n def _set_intercept(self, X_offset, y_offset, X_scale):\n \"\"\"Set the intercept_\n \"\"\"\n if self.fit_intercept:\n self.coef_ = self.coef_ / X_scale\n self.intercept_ = y_offset - np.dot(X_offset, self.coef_.T)\n else:\n self.intercept_ = 0.\n\n def _more_tags(self):\n return {'requires_y': True}", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearRegression", + "name": "LinearRegression", + "qname": "sklearn.linear_model._base.LinearRegression", + "decorators": [], + "superclasses": ["MultiOutputMixin", "RegressorMixin", "LinearModel"], + "methods": [ + "scikit-learn/sklearn.linear_model._base/LinearRegression/__init__", + "scikit-learn/sklearn.linear_model._base/LinearRegression/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Ordinary least squares Linear Regression.\n\nLinearRegression fits a linear model with coefficients w = (w1, ..., wp)\nto minimize the residual sum of squares between the observed targets in\nthe dataset, and the targets predicted by the linear approximation.", + "docstring": "Ordinary least squares Linear Regression.\n\nLinearRegression fits a linear model with coefficients w = (w1, ..., wp)\nto minimize the residual sum of squares between the observed targets in\nthe dataset, and the targets predicted by the linear approximation.\n\nParameters\n----------\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to False, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This will only provide\n speedup for n_targets > 1 and sufficient large problems.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\npositive : bool, default=False\n When set to ``True``, forces the coefficients to be positive. This\n option is only supported for dense arrays.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncoef_ : array of shape (n_features, ) or (n_targets, n_features)\n Estimated coefficients for the linear regression problem.\n If multiple targets are passed during the fit (y 2D), this\n is a 2D array of shape (n_targets, n_features), while if only\n one target is passed, this is a 1D array of length n_features.\n\nrank_ : int\n Rank of matrix `X`. Only available when `X` is dense.\n\nsingular_ : array of shape (min(X, y),)\n Singular values of `X`. Only available when `X` is dense.\n\nintercept_ : float or array of shape (n_targets,)\n Independent term in the linear model. Set to 0.0 if\n `fit_intercept = False`.\n\nSee Also\n--------\nRidge : Ridge regression addresses some of the\n problems of Ordinary Least Squares by imposing a penalty on the\n size of the coefficients with l2 regularization.\nLasso : The Lasso is a linear model that estimates\n sparse coefficients with l1 regularization.\nElasticNet : Elastic-Net is a linear regression\n model trained with both l1 and l2 -norm regularization of the\n coefficients.\n\nNotes\n-----\nFrom the implementation point of view, this is just plain Ordinary\nLeast Squares (scipy.linalg.lstsq) or Non Negative Least Squares\n(scipy.optimize.nnls) wrapped as a predictor object.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import LinearRegression\n>>> X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])\n>>> # y = 1 * x_0 + 2 * x_1 + 3\n>>> y = np.dot(X, np.array([1, 2])) + 3\n>>> reg = LinearRegression().fit(X, y)\n>>> reg.score(X, y)\n1.0\n>>> reg.coef_\narray([1., 2.])\n>>> reg.intercept_\n3.0...\n>>> reg.predict(np.array([[3, 5]]))\narray([16.])", + "code": "class LinearRegression(MultiOutputMixin, RegressorMixin, LinearModel):\n \"\"\"\n Ordinary least squares Linear Regression.\n\n LinearRegression fits a linear model with coefficients w = (w1, ..., wp)\n to minimize the residual sum of squares between the observed targets in\n the dataset, and the targets predicted by the linear approximation.\n\n Parameters\n ----------\n fit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to False, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\n normalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n copy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\n n_jobs : int, default=None\n The number of jobs to use for the computation. This will only provide\n speedup for n_targets > 1 and sufficient large problems.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n positive : bool, default=False\n When set to ``True``, forces the coefficients to be positive. This\n option is only supported for dense arrays.\n\n .. versionadded:: 0.24\n\n Attributes\n ----------\n coef_ : array of shape (n_features, ) or (n_targets, n_features)\n Estimated coefficients for the linear regression problem.\n If multiple targets are passed during the fit (y 2D), this\n is a 2D array of shape (n_targets, n_features), while if only\n one target is passed, this is a 1D array of length n_features.\n\n rank_ : int\n Rank of matrix `X`. Only available when `X` is dense.\n\n singular_ : array of shape (min(X, y),)\n Singular values of `X`. Only available when `X` is dense.\n\n intercept_ : float or array of shape (n_targets,)\n Independent term in the linear model. Set to 0.0 if\n `fit_intercept = False`.\n\n See Also\n --------\n Ridge : Ridge regression addresses some of the\n problems of Ordinary Least Squares by imposing a penalty on the\n size of the coefficients with l2 regularization.\n Lasso : The Lasso is a linear model that estimates\n sparse coefficients with l1 regularization.\n ElasticNet : Elastic-Net is a linear regression\n model trained with both l1 and l2 -norm regularization of the\n coefficients.\n\n Notes\n -----\n From the implementation point of view, this is just plain Ordinary\n Least Squares (scipy.linalg.lstsq) or Non Negative Least Squares\n (scipy.optimize.nnls) wrapped as a predictor object.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.linear_model import LinearRegression\n >>> X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])\n >>> # y = 1 * x_0 + 2 * x_1 + 3\n >>> y = np.dot(X, np.array([1, 2])) + 3\n >>> reg = LinearRegression().fit(X, y)\n >>> reg.score(X, y)\n 1.0\n >>> reg.coef_\n array([1., 2.])\n >>> reg.intercept_\n 3.0...\n >>> reg.predict(np.array([[3, 5]]))\n array([16.])\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, fit_intercept=True, normalize=False, copy_X=True,\n n_jobs=None, positive=False):\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.copy_X = copy_X\n self.n_jobs = n_jobs\n self.positive = positive\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"\n Fit linear model.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values. Will be cast to X's dtype if necessary\n\n sample_weight : array-like of shape (n_samples,), default=None\n Individual weights for each sample\n\n .. versionadded:: 0.17\n parameter *sample_weight* support to LinearRegression.\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n\n n_jobs_ = self.n_jobs\n\n accept_sparse = False if self.positive else ['csr', 'csc', 'coo']\n\n X, y = self._validate_data(X, y, accept_sparse=accept_sparse,\n y_numeric=True, multi_output=True)\n\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X,\n dtype=X.dtype)\n\n X, y, X_offset, y_offset, X_scale = self._preprocess_data(\n X, y, fit_intercept=self.fit_intercept, normalize=self.normalize,\n copy=self.copy_X, sample_weight=sample_weight,\n return_mean=True)\n\n if sample_weight is not None:\n # Sample weight can be implemented via a simple rescaling.\n X, y = _rescale_data(X, y, sample_weight)\n\n if self.positive:\n if y.ndim < 2:\n self.coef_, self._residues = optimize.nnls(X, y)\n else:\n # scipy.optimize.nnls cannot handle y with shape (M, K)\n outs = Parallel(n_jobs=n_jobs_)(\n delayed(optimize.nnls)(X, y[:, j])\n for j in range(y.shape[1]))\n self.coef_, self._residues = map(np.vstack, zip(*outs))\n elif sp.issparse(X):\n X_offset_scale = X_offset / X_scale\n\n def matvec(b):\n return X.dot(b) - b.dot(X_offset_scale)\n\n def rmatvec(b):\n return X.T.dot(b) - X_offset_scale * np.sum(b)\n\n X_centered = sparse.linalg.LinearOperator(shape=X.shape,\n matvec=matvec,\n rmatvec=rmatvec)\n\n if y.ndim < 2:\n out = sparse_lsqr(X_centered, y)\n self.coef_ = out[0]\n self._residues = out[3]\n else:\n # sparse_lstsq cannot handle y with shape (M, K)\n outs = Parallel(n_jobs=n_jobs_)(\n delayed(sparse_lsqr)(X_centered, y[:, j].ravel())\n for j in range(y.shape[1]))\n self.coef_ = np.vstack([out[0] for out in outs])\n self._residues = np.vstack([out[3] for out in outs])\n else:\n self.coef_, self._residues, self.rank_, self.singular_ = \\\n linalg.lstsq(X, y)\n self.coef_ = self.coef_.T\n\n if y.ndim == 1:\n self.coef_ = np.ravel(self.coef_)\n self._set_intercept(X_offset, y_offset, X_scale)\n return self", + "instance_attributes": [ + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "normalize", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "copy_X", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "positive", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._base/SparseCoefMixin", + "name": "SparseCoefMixin", + "qname": "sklearn.linear_model._base.SparseCoefMixin", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.linear_model._base/SparseCoefMixin/densify", + "scikit-learn/sklearn.linear_model._base/SparseCoefMixin/sparsify" + ], + "is_public": false, + "reexported_by": [], + "description": "Mixin for converting coef_ to and from CSR format.\n\nL1-regularizing estimators should inherit this.", + "docstring": "Mixin for converting coef_ to and from CSR format.\n\nL1-regularizing estimators should inherit this.", + "code": "class SparseCoefMixin:\n \"\"\"Mixin for converting coef_ to and from CSR format.\n\n L1-regularizing estimators should inherit this.\n \"\"\"\n\n def densify(self):\n \"\"\"\n Convert coefficient matrix to dense array format.\n\n Converts the ``coef_`` member (back) to a numpy.ndarray. This is the\n default format of ``coef_`` and is required for fitting, so calling\n this method is only required on models that have previously been\n sparsified; otherwise, it is a no-op.\n\n Returns\n -------\n self\n Fitted estimator.\n \"\"\"\n msg = \"Estimator, %(name)s, must be fitted before densifying.\"\n check_is_fitted(self, msg=msg)\n if sp.issparse(self.coef_):\n self.coef_ = self.coef_.toarray()\n return self\n\n def sparsify(self):\n \"\"\"\n Convert coefficient matrix to sparse format.\n\n Converts the ``coef_`` member to a scipy.sparse matrix, which for\n L1-regularized models can be much more memory- and storage-efficient\n than the usual numpy.ndarray representation.\n\n The ``intercept_`` member is not converted.\n\n Returns\n -------\n self\n Fitted estimator.\n\n Notes\n -----\n For non-sparse models, i.e. when there are not many zeros in ``coef_``,\n this may actually *increase* memory usage, so use this method with\n care. A rule of thumb is that the number of zero elements, which can\n be computed with ``(coef_ == 0).sum()``, must be more than 50% for this\n to provide significant benefits.\n\n After calling this method, further fitting with the partial_fit\n method (if any) will not work until you call densify.\n \"\"\"\n msg = \"Estimator, %(name)s, must be fitted before sparsifying.\"\n check_is_fitted(self, msg=msg)\n self.coef_ = sp.csr_matrix(self.coef_)\n return self", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression", + "name": "ARDRegression", + "qname": "sklearn.linear_model._bayes.ARDRegression", + "decorators": [], + "superclasses": ["RegressorMixin", "LinearModel"], + "methods": [ + "scikit-learn/sklearn.linear_model._bayes/ARDRegression/__init__", + "scikit-learn/sklearn.linear_model._bayes/ARDRegression/fit", + "scikit-learn/sklearn.linear_model._bayes/ARDRegression/_update_sigma_woodbury", + "scikit-learn/sklearn.linear_model._bayes/ARDRegression/_update_sigma", + "scikit-learn/sklearn.linear_model._bayes/ARDRegression/predict" + ], + "is_public": false, + "reexported_by": [], + "description": "Bayesian ARD regression.\n\nFit the weights of a regression model, using an ARD prior. The weights of\nthe regression model are assumed to be in Gaussian distributions.\nAlso estimate the parameters lambda (precisions of the distributions of the\nweights) and alpha (precision of the distribution of the noise).\nThe estimation is done by an iterative procedures (Evidence Maximization)\n\nRead more in the :ref:`User Guide `.", + "docstring": "Bayesian ARD regression.\n\nFit the weights of a regression model, using an ARD prior. The weights of\nthe regression model are assumed to be in Gaussian distributions.\nAlso estimate the parameters lambda (precisions of the distributions of the\nweights) and alpha (precision of the distribution of the noise).\nThe estimation is done by an iterative procedures (Evidence Maximization)\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_iter : int, default=300\n Maximum number of iterations.\n\ntol : float, default=1e-3\n Stop the algorithm if w has converged.\n\nalpha_1 : float, default=1e-6\n Hyper-parameter : shape parameter for the Gamma distribution prior\n over the alpha parameter.\n\nalpha_2 : float, default=1e-6\n Hyper-parameter : inverse scale parameter (rate parameter) for the\n Gamma distribution prior over the alpha parameter.\n\nlambda_1 : float, default=1e-6\n Hyper-parameter : shape parameter for the Gamma distribution prior\n over the lambda parameter.\n\nlambda_2 : float, default=1e-6\n Hyper-parameter : inverse scale parameter (rate parameter) for the\n Gamma distribution prior over the lambda parameter.\n\ncompute_score : bool, default=False\n If True, compute the objective function at each step of the model.\n\nthreshold_lambda : float, default=10 000\n threshold for removing (pruning) weights with high precision from\n the computation.\n\nfit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\nverbose : bool, default=False\n Verbose mode when fitting the model.\n\nAttributes\n----------\ncoef_ : array-like of shape (n_features,)\n Coefficients of the regression model (mean of distribution)\n\nalpha_ : float\n estimated precision of the noise.\n\nlambda_ : array-like of shape (n_features,)\n estimated precisions of the weights.\n\nsigma_ : array-like of shape (n_features, n_features)\n estimated variance-covariance matrix of the weights\n\nscores_ : float\n if computed, value of the objective function (to be maximized)\n\nintercept_ : float\n Independent term in decision function. Set to 0.0 if\n ``fit_intercept = False``.\n\nX_offset_ : float\n If `normalize=True`, offset subtracted for centering data to a\n zero mean.\n\nX_scale_ : float\n If `normalize=True`, parameter used to scale data to a unit\n standard deviation.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.ARDRegression()\n>>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\nARDRegression()\n>>> clf.predict([[1, 1]])\narray([1.])\n\nNotes\n-----\nFor an example, see :ref:`examples/linear_model/plot_ard.py\n`.\n\nReferences\n----------\nD. J. C. MacKay, Bayesian nonlinear modeling for the prediction\ncompetition, ASHRAE Transactions, 1994.\n\nR. Salakhutdinov, Lecture notes on Statistical Machine Learning,\nhttp://www.utstat.toronto.edu/~rsalakhu/sta4273/notes/Lecture2.pdf#page=15\nTheir beta is our ``self.alpha_``\nTheir alpha is our ``self.lambda_``\nARD is a little different than the slide: only dimensions/features for\nwhich ``self.lambda_ < self.threshold_lambda`` are kept and the rest are\ndiscarded.", + "code": "class ARDRegression(RegressorMixin, LinearModel):\n \"\"\"Bayesian ARD regression.\n\n Fit the weights of a regression model, using an ARD prior. The weights of\n the regression model are assumed to be in Gaussian distributions.\n Also estimate the parameters lambda (precisions of the distributions of the\n weights) and alpha (precision of the distribution of the noise).\n The estimation is done by an iterative procedures (Evidence Maximization)\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_iter : int, default=300\n Maximum number of iterations.\n\n tol : float, default=1e-3\n Stop the algorithm if w has converged.\n\n alpha_1 : float, default=1e-6\n Hyper-parameter : shape parameter for the Gamma distribution prior\n over the alpha parameter.\n\n alpha_2 : float, default=1e-6\n Hyper-parameter : inverse scale parameter (rate parameter) for the\n Gamma distribution prior over the alpha parameter.\n\n lambda_1 : float, default=1e-6\n Hyper-parameter : shape parameter for the Gamma distribution prior\n over the lambda parameter.\n\n lambda_2 : float, default=1e-6\n Hyper-parameter : inverse scale parameter (rate parameter) for the\n Gamma distribution prior over the lambda parameter.\n\n compute_score : bool, default=False\n If True, compute the objective function at each step of the model.\n\n threshold_lambda : float, default=10 000\n threshold for removing (pruning) weights with high precision from\n the computation.\n\n fit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\n normalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n copy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\n verbose : bool, default=False\n Verbose mode when fitting the model.\n\n Attributes\n ----------\n coef_ : array-like of shape (n_features,)\n Coefficients of the regression model (mean of distribution)\n\n alpha_ : float\n estimated precision of the noise.\n\n lambda_ : array-like of shape (n_features,)\n estimated precisions of the weights.\n\n sigma_ : array-like of shape (n_features, n_features)\n estimated variance-covariance matrix of the weights\n\n scores_ : float\n if computed, value of the objective function (to be maximized)\n\n intercept_ : float\n Independent term in decision function. Set to 0.0 if\n ``fit_intercept = False``.\n\n X_offset_ : float\n If `normalize=True`, offset subtracted for centering data to a\n zero mean.\n\n X_scale_ : float\n If `normalize=True`, parameter used to scale data to a unit\n standard deviation.\n\n Examples\n --------\n >>> from sklearn import linear_model\n >>> clf = linear_model.ARDRegression()\n >>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\n ARDRegression()\n >>> clf.predict([[1, 1]])\n array([1.])\n\n Notes\n -----\n For an example, see :ref:`examples/linear_model/plot_ard.py\n `.\n\n References\n ----------\n D. J. C. MacKay, Bayesian nonlinear modeling for the prediction\n competition, ASHRAE Transactions, 1994.\n\n R. Salakhutdinov, Lecture notes on Statistical Machine Learning,\n http://www.utstat.toronto.edu/~rsalakhu/sta4273/notes/Lecture2.pdf#page=15\n Their beta is our ``self.alpha_``\n Their alpha is our ``self.lambda_``\n ARD is a little different than the slide: only dimensions/features for\n which ``self.lambda_ < self.threshold_lambda`` are kept and the rest are\n discarded.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6,\n lambda_1=1.e-6, lambda_2=1.e-6, compute_score=False,\n threshold_lambda=1.e+4, fit_intercept=True, normalize=False,\n copy_X=True, verbose=False):\n self.n_iter = n_iter\n self.tol = tol\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.alpha_1 = alpha_1\n self.alpha_2 = alpha_2\n self.lambda_1 = lambda_1\n self.lambda_2 = lambda_2\n self.compute_score = compute_score\n self.threshold_lambda = threshold_lambda\n self.copy_X = copy_X\n self.verbose = verbose\n\n def fit(self, X, y):\n \"\"\"Fit the ARDRegression model according to the given training data\n and parameters.\n\n Iterative procedure to maximize the evidence\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n y : array-like of shape (n_samples,)\n Target values (integers). Will be cast to X's dtype if necessary\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n X, y = self._validate_data(X, y, dtype=np.float64, y_numeric=True,\n ensure_min_samples=2)\n\n n_samples, n_features = X.shape\n coef_ = np.zeros(n_features)\n\n X, y, X_offset_, y_offset_, X_scale_ = self._preprocess_data(\n X, y, self.fit_intercept, self.normalize, self.copy_X)\n\n self.X_offset_ = X_offset_\n self.X_scale_ = X_scale_\n\n # Launch the convergence loop\n keep_lambda = np.ones(n_features, dtype=bool)\n\n lambda_1 = self.lambda_1\n lambda_2 = self.lambda_2\n alpha_1 = self.alpha_1\n alpha_2 = self.alpha_2\n verbose = self.verbose\n\n # Initialization of the values of the parameters\n eps = np.finfo(np.float64).eps\n # Add `eps` in the denominator to omit division by zero if `np.var(y)`\n # is zero\n alpha_ = 1. / (np.var(y) + eps)\n lambda_ = np.ones(n_features)\n\n self.scores_ = list()\n coef_old_ = None\n\n def update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_):\n coef_[keep_lambda] = alpha_ * np.linalg.multi_dot([\n sigma_, X[:, keep_lambda].T, y])\n return coef_\n\n update_sigma = (self._update_sigma if n_samples >= n_features\n else self._update_sigma_woodbury)\n # Iterative procedure of ARDRegression\n for iter_ in range(self.n_iter):\n sigma_ = update_sigma(X, alpha_, lambda_, keep_lambda)\n coef_ = update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_)\n\n # Update alpha and lambda\n rmse_ = np.sum((y - np.dot(X, coef_)) ** 2)\n gamma_ = 1. - lambda_[keep_lambda] * np.diag(sigma_)\n lambda_[keep_lambda] = ((gamma_ + 2. * lambda_1) /\n ((coef_[keep_lambda]) ** 2 +\n 2. * lambda_2))\n alpha_ = ((n_samples - gamma_.sum() + 2. * alpha_1) /\n (rmse_ + 2. * alpha_2))\n\n # Prune the weights with a precision over a threshold\n keep_lambda = lambda_ < self.threshold_lambda\n coef_[~keep_lambda] = 0\n\n # Compute the objective function\n if self.compute_score:\n s = (lambda_1 * np.log(lambda_) - lambda_2 * lambda_).sum()\n s += alpha_1 * log(alpha_) - alpha_2 * alpha_\n s += 0.5 * (fast_logdet(sigma_) + n_samples * log(alpha_) +\n np.sum(np.log(lambda_)))\n s -= 0.5 * (alpha_ * rmse_ + (lambda_ * coef_ ** 2).sum())\n self.scores_.append(s)\n\n # Check for convergence\n if iter_ > 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:\n if verbose:\n print(\"Converged after %s iterations\" % iter_)\n break\n coef_old_ = np.copy(coef_)\n\n if not keep_lambda.any():\n break\n\n if keep_lambda.any():\n # update sigma and mu using updated params from the last iteration\n sigma_ = update_sigma(X, alpha_, lambda_, keep_lambda)\n coef_ = update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_)\n else:\n sigma_ = np.array([]).reshape(0, 0)\n\n self.coef_ = coef_\n self.alpha_ = alpha_\n self.sigma_ = sigma_\n self.lambda_ = lambda_\n self._set_intercept(X_offset_, y_offset_, X_scale_)\n return self\n\n def _update_sigma_woodbury(self, X, alpha_, lambda_, keep_lambda):\n # See slides as referenced in the docstring note\n # this function is used when n_samples < n_features and will invert\n # a matrix of shape (n_samples, n_samples) making use of the\n # woodbury formula:\n # https://en.wikipedia.org/wiki/Woodbury_matrix_identity\n n_samples = X.shape[0]\n X_keep = X[:, keep_lambda]\n inv_lambda = 1 / lambda_[keep_lambda].reshape(1, -1)\n sigma_ = pinvh(\n np.eye(n_samples) / alpha_ + np.dot(X_keep * inv_lambda, X_keep.T)\n )\n sigma_ = np.dot(sigma_, X_keep * inv_lambda)\n sigma_ = - np.dot(inv_lambda.reshape(-1, 1) * X_keep.T, sigma_)\n sigma_[np.diag_indices(sigma_.shape[1])] += 1. / lambda_[keep_lambda]\n return sigma_\n\n def _update_sigma(self, X, alpha_, lambda_, keep_lambda):\n # See slides as referenced in the docstring note\n # this function is used when n_samples >= n_features and will\n # invert a matrix of shape (n_features, n_features)\n X_keep = X[:, keep_lambda]\n gram = np.dot(X_keep.T, X_keep)\n eye = np.eye(gram.shape[0])\n sigma_inv = lambda_[keep_lambda] * eye + alpha_ * gram\n sigma_ = pinvh(sigma_inv)\n return sigma_\n\n def predict(self, X, return_std=False):\n \"\"\"Predict using the linear model.\n\n In addition to the mean of the predictive distribution, also its\n standard deviation can be returned.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\n return_std : bool, default=False\n Whether to return the standard deviation of posterior prediction.\n\n Returns\n -------\n y_mean : array-like of shape (n_samples,)\n Mean of predictive distribution of query points.\n\n y_std : array-like of shape (n_samples,)\n Standard deviation of predictive distribution of query points.\n \"\"\"\n y_mean = self._decision_function(X)\n if return_std is False:\n return y_mean\n else:\n if self.normalize:\n X = (X - self.X_offset_) / self.X_scale_\n X = X[:, self.lambda_ < self.threshold_lambda]\n sigmas_squared_data = (np.dot(X, self.sigma_) * X).sum(axis=1)\n y_std = np.sqrt(sigmas_squared_data + (1. / self.alpha_))\n return y_mean, y_std", + "instance_attributes": [ + { + "name": "n_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "normalize", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "alpha_1", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "alpha_2", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "lambda_1", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "lambda_2", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "compute_score", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "threshold_lambda", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "copy_X", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "scores_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "coef_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "sigma_", + "types": { + "kind": "NamedType", + "name": "tuple" + } + }, + { + "name": "lambda_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge", + "name": "BayesianRidge", + "qname": "sklearn.linear_model._bayes.BayesianRidge", + "decorators": [], + "superclasses": ["RegressorMixin", "LinearModel"], + "methods": [ + "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/__init__", + "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/fit", + "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/predict", + "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/_update_coef_", + "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/_log_marginal_likelihood" + ], + "is_public": false, + "reexported_by": [], + "description": "Bayesian ridge regression.\n\nFit a Bayesian ridge model. See the Notes section for details on this\nimplementation and the optimization of the regularization parameters\nlambda (precision of the weights) and alpha (precision of the noise).\n\nRead more in the :ref:`User Guide `.", + "docstring": "Bayesian ridge regression.\n\nFit a Bayesian ridge model. See the Notes section for details on this\nimplementation and the optimization of the regularization parameters\nlambda (precision of the weights) and alpha (precision of the noise).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_iter : int, default=300\n Maximum number of iterations. Should be greater than or equal to 1.\n\ntol : float, default=1e-3\n Stop the algorithm if w has converged.\n\nalpha_1 : float, default=1e-6\n Hyper-parameter : shape parameter for the Gamma distribution prior\n over the alpha parameter.\n\nalpha_2 : float, default=1e-6\n Hyper-parameter : inverse scale parameter (rate parameter) for the\n Gamma distribution prior over the alpha parameter.\n\nlambda_1 : float, default=1e-6\n Hyper-parameter : shape parameter for the Gamma distribution prior\n over the lambda parameter.\n\nlambda_2 : float, default=1e-6\n Hyper-parameter : inverse scale parameter (rate parameter) for the\n Gamma distribution prior over the lambda parameter.\n\nalpha_init : float, default=None\n Initial value for alpha (precision of the noise).\n If not set, alpha_init is 1/Var(y).\n\n .. versionadded:: 0.22\n\nlambda_init : float, default=None\n Initial value for lambda (precision of the weights).\n If not set, lambda_init is 1.\n\n .. versionadded:: 0.22\n\ncompute_score : bool, default=False\n If True, compute the log marginal likelihood at each iteration of the\n optimization.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model.\n The intercept is not treated as a probabilistic parameter\n and thus has no associated variance. If set\n to False, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\nverbose : bool, default=False\n Verbose mode when fitting the model.\n\n\nAttributes\n----------\ncoef_ : array-like of shape (n_features,)\n Coefficients of the regression model (mean of distribution)\n\nintercept_ : float\n Independent term in decision function. Set to 0.0 if\n ``fit_intercept = False``.\n\nalpha_ : float\n Estimated precision of the noise.\n\nlambda_ : float\n Estimated precision of the weights.\n\nsigma_ : array-like of shape (n_features, n_features)\n Estimated variance-covariance matrix of the weights\n\nscores_ : array-like of shape (n_iter_+1,)\n If computed_score is True, value of the log marginal likelihood (to be\n maximized) at each iteration of the optimization. The array starts\n with the value of the log marginal likelihood obtained for the initial\n values of alpha and lambda and ends with the value obtained for the\n estimated alpha and lambda.\n\nn_iter_ : int\n The actual number of iterations to reach the stopping criterion.\n\nX_offset_ : float\n If `normalize=True`, offset subtracted for centering data to a\n zero mean.\n\nX_scale_ : float\n If `normalize=True`, parameter used to scale data to a unit\n standard deviation.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.BayesianRidge()\n>>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\nBayesianRidge()\n>>> clf.predict([[1, 1]])\narray([1.])\n\nNotes\n-----\nThere exist several strategies to perform Bayesian ridge regression. This\nimplementation is based on the algorithm described in Appendix A of\n(Tipping, 2001) where updates of the regularization parameters are done as\nsuggested in (MacKay, 1992). Note that according to A New\nView of Automatic Relevance Determination (Wipf and Nagarajan, 2008) these\nupdate rules do not guarantee that the marginal likelihood is increasing\nbetween two consecutive iterations of the optimization.\n\nReferences\n----------\nD. J. C. MacKay, Bayesian Interpolation, Computation and Neural Systems,\nVol. 4, No. 3, 1992.\n\nM. E. Tipping, Sparse Bayesian Learning and the Relevance Vector Machine,\nJournal of Machine Learning Research, Vol. 1, 2001.", + "code": "class BayesianRidge(RegressorMixin, LinearModel):\n \"\"\"Bayesian ridge regression.\n\n Fit a Bayesian ridge model. See the Notes section for details on this\n implementation and the optimization of the regularization parameters\n lambda (precision of the weights) and alpha (precision of the noise).\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_iter : int, default=300\n Maximum number of iterations. Should be greater than or equal to 1.\n\n tol : float, default=1e-3\n Stop the algorithm if w has converged.\n\n alpha_1 : float, default=1e-6\n Hyper-parameter : shape parameter for the Gamma distribution prior\n over the alpha parameter.\n\n alpha_2 : float, default=1e-6\n Hyper-parameter : inverse scale parameter (rate parameter) for the\n Gamma distribution prior over the alpha parameter.\n\n lambda_1 : float, default=1e-6\n Hyper-parameter : shape parameter for the Gamma distribution prior\n over the lambda parameter.\n\n lambda_2 : float, default=1e-6\n Hyper-parameter : inverse scale parameter (rate parameter) for the\n Gamma distribution prior over the lambda parameter.\n\n alpha_init : float, default=None\n Initial value for alpha (precision of the noise).\n If not set, alpha_init is 1/Var(y).\n\n .. versionadded:: 0.22\n\n lambda_init : float, default=None\n Initial value for lambda (precision of the weights).\n If not set, lambda_init is 1.\n\n .. versionadded:: 0.22\n\n compute_score : bool, default=False\n If True, compute the log marginal likelihood at each iteration of the\n optimization.\n\n fit_intercept : bool, default=True\n Whether to calculate the intercept for this model.\n The intercept is not treated as a probabilistic parameter\n and thus has no associated variance. If set\n to False, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\n normalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n copy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\n verbose : bool, default=False\n Verbose mode when fitting the model.\n\n\n Attributes\n ----------\n coef_ : array-like of shape (n_features,)\n Coefficients of the regression model (mean of distribution)\n\n intercept_ : float\n Independent term in decision function. Set to 0.0 if\n ``fit_intercept = False``.\n\n alpha_ : float\n Estimated precision of the noise.\n\n lambda_ : float\n Estimated precision of the weights.\n\n sigma_ : array-like of shape (n_features, n_features)\n Estimated variance-covariance matrix of the weights\n\n scores_ : array-like of shape (n_iter_+1,)\n If computed_score is True, value of the log marginal likelihood (to be\n maximized) at each iteration of the optimization. The array starts\n with the value of the log marginal likelihood obtained for the initial\n values of alpha and lambda and ends with the value obtained for the\n estimated alpha and lambda.\n\n n_iter_ : int\n The actual number of iterations to reach the stopping criterion.\n\n X_offset_ : float\n If `normalize=True`, offset subtracted for centering data to a\n zero mean.\n\n X_scale_ : float\n If `normalize=True`, parameter used to scale data to a unit\n standard deviation.\n\n Examples\n --------\n >>> from sklearn import linear_model\n >>> clf = linear_model.BayesianRidge()\n >>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\n BayesianRidge()\n >>> clf.predict([[1, 1]])\n array([1.])\n\n Notes\n -----\n There exist several strategies to perform Bayesian ridge regression. This\n implementation is based on the algorithm described in Appendix A of\n (Tipping, 2001) where updates of the regularization parameters are done as\n suggested in (MacKay, 1992). Note that according to A New\n View of Automatic Relevance Determination (Wipf and Nagarajan, 2008) these\n update rules do not guarantee that the marginal likelihood is increasing\n between two consecutive iterations of the optimization.\n\n References\n ----------\n D. J. C. MacKay, Bayesian Interpolation, Computation and Neural Systems,\n Vol. 4, No. 3, 1992.\n\n M. E. Tipping, Sparse Bayesian Learning and the Relevance Vector Machine,\n Journal of Machine Learning Research, Vol. 1, 2001.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6,\n lambda_1=1.e-6, lambda_2=1.e-6, alpha_init=None,\n lambda_init=None, compute_score=False, fit_intercept=True,\n normalize=False, copy_X=True, verbose=False):\n self.n_iter = n_iter\n self.tol = tol\n self.alpha_1 = alpha_1\n self.alpha_2 = alpha_2\n self.lambda_1 = lambda_1\n self.lambda_2 = lambda_2\n self.alpha_init = alpha_init\n self.lambda_init = lambda_init\n self.compute_score = compute_score\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.copy_X = copy_X\n self.verbose = verbose\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the model\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Training data\n y : ndarray of shape (n_samples,)\n Target values. Will be cast to X's dtype if necessary\n\n sample_weight : ndarray of shape (n_samples,), default=None\n Individual weights for each sample\n\n .. versionadded:: 0.20\n parameter *sample_weight* support to BayesianRidge.\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n\n if self.n_iter < 1:\n raise ValueError('n_iter should be greater than or equal to 1.'\n ' Got {!r}.'.format(self.n_iter))\n\n X, y = self._validate_data(X, y, dtype=np.float64, y_numeric=True)\n\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X,\n dtype=X.dtype)\n\n X, y, X_offset_, y_offset_, X_scale_ = self._preprocess_data(\n X, y, self.fit_intercept, self.normalize, self.copy_X,\n sample_weight=sample_weight)\n\n if sample_weight is not None:\n # Sample weight can be implemented via a simple rescaling.\n X, y = _rescale_data(X, y, sample_weight)\n\n self.X_offset_ = X_offset_\n self.X_scale_ = X_scale_\n n_samples, n_features = X.shape\n\n # Initialization of the values of the parameters\n eps = np.finfo(np.float64).eps\n # Add `eps` in the denominator to omit division by zero if `np.var(y)`\n # is zero\n alpha_ = self.alpha_init\n lambda_ = self.lambda_init\n if alpha_ is None:\n alpha_ = 1. / (np.var(y) + eps)\n if lambda_ is None:\n lambda_ = 1.\n\n verbose = self.verbose\n lambda_1 = self.lambda_1\n lambda_2 = self.lambda_2\n alpha_1 = self.alpha_1\n alpha_2 = self.alpha_2\n\n self.scores_ = list()\n coef_old_ = None\n\n XT_y = np.dot(X.T, y)\n U, S, Vh = linalg.svd(X, full_matrices=False)\n eigen_vals_ = S ** 2\n\n # Convergence loop of the bayesian ridge regression\n for iter_ in range(self.n_iter):\n\n # update posterior mean coef_ based on alpha_ and lambda_ and\n # compute corresponding rmse\n coef_, rmse_ = self._update_coef_(X, y, n_samples, n_features,\n XT_y, U, Vh, eigen_vals_,\n alpha_, lambda_)\n if self.compute_score:\n # compute the log marginal likelihood\n s = self._log_marginal_likelihood(n_samples, n_features,\n eigen_vals_,\n alpha_, lambda_,\n coef_, rmse_)\n self.scores_.append(s)\n\n # Update alpha and lambda according to (MacKay, 1992)\n gamma_ = np.sum((alpha_ * eigen_vals_) /\n (lambda_ + alpha_ * eigen_vals_))\n lambda_ = ((gamma_ + 2 * lambda_1) /\n (np.sum(coef_ ** 2) + 2 * lambda_2))\n alpha_ = ((n_samples - gamma_ + 2 * alpha_1) /\n (rmse_ + 2 * alpha_2))\n\n # Check for convergence\n if iter_ != 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:\n if verbose:\n print(\"Convergence after \", str(iter_), \" iterations\")\n break\n coef_old_ = np.copy(coef_)\n\n self.n_iter_ = iter_ + 1\n\n # return regularization parameters and corresponding posterior mean,\n # log marginal likelihood and posterior covariance\n self.alpha_ = alpha_\n self.lambda_ = lambda_\n self.coef_, rmse_ = self._update_coef_(X, y, n_samples, n_features,\n XT_y, U, Vh, eigen_vals_,\n alpha_, lambda_)\n if self.compute_score:\n # compute the log marginal likelihood\n s = self._log_marginal_likelihood(n_samples, n_features,\n eigen_vals_,\n alpha_, lambda_,\n coef_, rmse_)\n self.scores_.append(s)\n self.scores_ = np.array(self.scores_)\n\n # posterior covariance is given by 1/alpha_ * scaled_sigma_\n scaled_sigma_ = np.dot(Vh.T,\n Vh / (eigen_vals_ +\n lambda_ / alpha_)[:, np.newaxis])\n self.sigma_ = (1. / alpha_) * scaled_sigma_\n\n self._set_intercept(X_offset_, y_offset_, X_scale_)\n\n return self\n\n def predict(self, X, return_std=False):\n \"\"\"Predict using the linear model.\n\n In addition to the mean of the predictive distribution, also its\n standard deviation can be returned.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\n return_std : bool, default=False\n Whether to return the standard deviation of posterior prediction.\n\n Returns\n -------\n y_mean : array-like of shape (n_samples,)\n Mean of predictive distribution of query points.\n\n y_std : array-like of shape (n_samples,)\n Standard deviation of predictive distribution of query points.\n \"\"\"\n y_mean = self._decision_function(X)\n if return_std is False:\n return y_mean\n else:\n if self.normalize:\n X = (X - self.X_offset_) / self.X_scale_\n sigmas_squared_data = (np.dot(X, self.sigma_) * X).sum(axis=1)\n y_std = np.sqrt(sigmas_squared_data + (1. / self.alpha_))\n return y_mean, y_std\n\n def _update_coef_(self, X, y, n_samples, n_features, XT_y, U, Vh,\n eigen_vals_, alpha_, lambda_):\n \"\"\"Update posterior mean and compute corresponding rmse.\n\n Posterior mean is given by coef_ = scaled_sigma_ * X.T * y where\n scaled_sigma_ = (lambda_/alpha_ * np.eye(n_features)\n + np.dot(X.T, X))^-1\n \"\"\"\n\n if n_samples > n_features:\n coef_ = np.linalg.multi_dot([Vh.T,\n Vh / (eigen_vals_ + lambda_ /\n alpha_)[:, np.newaxis],\n XT_y])\n else:\n coef_ = np.linalg.multi_dot([X.T,\n U / (eigen_vals_ + lambda_ /\n alpha_)[None, :],\n U.T, y])\n\n rmse_ = np.sum((y - np.dot(X, coef_)) ** 2)\n\n return coef_, rmse_\n\n def _log_marginal_likelihood(self, n_samples, n_features, eigen_vals,\n alpha_, lambda_, coef, rmse):\n \"\"\"Log marginal likelihood.\"\"\"\n alpha_1 = self.alpha_1\n alpha_2 = self.alpha_2\n lambda_1 = self.lambda_1\n lambda_2 = self.lambda_2\n\n # compute the log of the determinant of the posterior covariance.\n # posterior covariance is given by\n # sigma = (lambda_ * np.eye(n_features) + alpha_ * np.dot(X.T, X))^-1\n if n_samples > n_features:\n logdet_sigma = - np.sum(np.log(lambda_ + alpha_ * eigen_vals))\n else:\n logdet_sigma = np.full(n_features, lambda_,\n dtype=np.array(lambda_).dtype)\n logdet_sigma[:n_samples] += alpha_ * eigen_vals\n logdet_sigma = - np.sum(np.log(logdet_sigma))\n\n score = lambda_1 * log(lambda_) - lambda_2 * lambda_\n score += alpha_1 * log(alpha_) - alpha_2 * alpha_\n score += 0.5 * (n_features * log(lambda_) +\n n_samples * log(alpha_) -\n alpha_ * rmse -\n lambda_ * np.sum(coef ** 2) +\n logdet_sigma -\n n_samples * log(2 * np.pi))\n\n return score", + "instance_attributes": [ + { + "name": "n_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "alpha_1", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "alpha_2", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "lambda_1", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "lambda_2", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "compute_score", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "normalize", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "copy_X", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "scores_", + "types": { + "kind": "NamedType", + "name": "list" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet", + "name": "ElasticNet", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet", + "decorators": [], + "superclasses": ["MultiOutputMixin", "RegressorMixin", "LinearModel"], + "methods": [ + "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/__init__", + "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/fit", + "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/sparse_coef_@getter", + "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/_decision_function" + ], + "is_public": false, + "reexported_by": [], + "description": "Linear regression with combined L1 and L2 priors as regularizer.\n\nMinimizes the objective function::\n\n 1 / (2 * n_samples) * ||y - Xw||^2_2\n + alpha * l1_ratio * ||w||_1\n + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nIf you are interested in controlling the L1 and L2 penalty\nseparately, keep in mind that this is equivalent to::\n\n a * ||w||_1 + 0.5 * b * ||w||_2^2\n\nwhere::\n\n alpha = a + b and l1_ratio = a / (a + b)\n\nThe parameter l1_ratio corresponds to alpha in the glmnet R package while\nalpha corresponds to the lambda parameter in glmnet. Specifically, l1_ratio\n= 1 is the lasso penalty. Currently, l1_ratio <= 0.01 is not reliable,\nunless you supply your own sequence of alpha.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Linear regression with combined L1 and L2 priors as regularizer.\n\nMinimizes the objective function::\n\n 1 / (2 * n_samples) * ||y - Xw||^2_2\n + alpha * l1_ratio * ||w||_1\n + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nIf you are interested in controlling the L1 and L2 penalty\nseparately, keep in mind that this is equivalent to::\n\n a * ||w||_1 + 0.5 * b * ||w||_2^2\n\nwhere::\n\n alpha = a + b and l1_ratio = a / (a + b)\n\nThe parameter l1_ratio corresponds to alpha in the glmnet R package while\nalpha corresponds to the lambda parameter in glmnet. Specifically, l1_ratio\n= 1 is the lasso penalty. Currently, l1_ratio <= 0.01 is not reliable,\nunless you supply your own sequence of alpha.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Constant that multiplies the penalty terms. Defaults to 1.0.\n See the notes for the exact mathematical meaning of this\n parameter. ``alpha = 0`` is equivalent to an ordinary least square,\n solved by the :class:`LinearRegression` object. For numerical\n reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised.\n Given this, you should use the :class:`LinearRegression` object.\n\nl1_ratio : float, default=0.5\n The ElasticNet mixing parameter, with ``0 <= l1_ratio <= 1``. For\n ``l1_ratio = 0`` the penalty is an L2 penalty. ``For l1_ratio = 1`` it\n is an L1 penalty. For ``0 < l1_ratio < 1``, the penalty is a\n combination of L1 and L2.\n\nfit_intercept : bool, default=True\n Whether the intercept should be estimated or not. If ``False``, the\n data is assumed to be already centered.\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : bool or array-like of shape (n_features, n_features), default=False\n Whether to use a precomputed Gram matrix to speed up\n calculations. The Gram matrix can also be passed as argument.\n For sparse input this option is always ``False`` to preserve sparsity.\n\nmax_iter : int, default=1000\n The maximum number of iterations.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\ntol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\npositive : bool, default=False\n When set to ``True``, forces the coefficients to be positive.\n\nrandom_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the cost function formula).\n\nsparse_coef_ : sparse matrix of shape (n_features,) or (n_tasks, n_features)\n Sparse representation of the `coef_`.\n\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function.\n\nn_iter_ : list of int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance.\n\ndual_gap_ : float or ndarray of shape (n_targets,)\n Given param alpha, the dual gaps at the end of the optimization,\n same shape as each observation of y.\n\nExamples\n--------\n>>> from sklearn.linear_model import ElasticNet\n>>> from sklearn.datasets import make_regression\n\n>>> X, y = make_regression(n_features=2, random_state=0)\n>>> regr = ElasticNet(random_state=0)\n>>> regr.fit(X, y)\nElasticNet(random_state=0)\n>>> print(regr.coef_)\n[18.83816048 64.55968825]\n>>> print(regr.intercept_)\n1.451...\n>>> print(regr.predict([[0, 0]]))\n[1.451...]\n\n\nNotes\n-----\nTo avoid unnecessary memory duplication the X argument of the fit method\nshould be directly passed as a Fortran-contiguous numpy array.\n\nSee Also\n--------\nElasticNetCV : Elastic net model with best model selection by\n cross-validation.\nSGDRegressor : Implements elastic net regression with incremental training.\nSGDClassifier : Implements logistic regression with elastic net penalty\n (``SGDClassifier(loss=\"log\", penalty=\"elasticnet\")``).", + "code": "class ElasticNet(MultiOutputMixin, RegressorMixin, LinearModel):\n \"\"\"Linear regression with combined L1 and L2 priors as regularizer.\n\n Minimizes the objective function::\n\n 1 / (2 * n_samples) * ||y - Xw||^2_2\n + alpha * l1_ratio * ||w||_1\n + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\n If you are interested in controlling the L1 and L2 penalty\n separately, keep in mind that this is equivalent to::\n\n a * ||w||_1 + 0.5 * b * ||w||_2^2\n\n where::\n\n alpha = a + b and l1_ratio = a / (a + b)\n\n The parameter l1_ratio corresponds to alpha in the glmnet R package while\n alpha corresponds to the lambda parameter in glmnet. Specifically, l1_ratio\n = 1 is the lasso penalty. Currently, l1_ratio <= 0.01 is not reliable,\n unless you supply your own sequence of alpha.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n alpha : float, default=1.0\n Constant that multiplies the penalty terms. Defaults to 1.0.\n See the notes for the exact mathematical meaning of this\n parameter. ``alpha = 0`` is equivalent to an ordinary least square,\n solved by the :class:`LinearRegression` object. For numerical\n reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised.\n Given this, you should use the :class:`LinearRegression` object.\n\n l1_ratio : float, default=0.5\n The ElasticNet mixing parameter, with ``0 <= l1_ratio <= 1``. For\n ``l1_ratio = 0`` the penalty is an L2 penalty. ``For l1_ratio = 1`` it\n is an L1 penalty. For ``0 < l1_ratio < 1``, the penalty is a\n combination of L1 and L2.\n\n fit_intercept : bool, default=True\n Whether the intercept should be estimated or not. If ``False``, the\n data is assumed to be already centered.\n\n normalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n precompute : bool or array-like of shape (n_features, n_features),\\\n default=False\n Whether to use a precomputed Gram matrix to speed up\n calculations. The Gram matrix can also be passed as argument.\n For sparse input this option is always ``False`` to preserve sparsity.\n\n max_iter : int, default=1000\n The maximum number of iterations.\n\n copy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\n tol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\n warm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\n positive : bool, default=False\n When set to ``True``, forces the coefficients to be positive.\n\n random_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n selection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\n Attributes\n ----------\n coef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the cost function formula).\n\n sparse_coef_ : sparse matrix of shape (n_features,) or \\\n (n_tasks, n_features)\n Sparse representation of the `coef_`.\n\n intercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function.\n\n n_iter_ : list of int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance.\n\n dual_gap_ : float or ndarray of shape (n_targets,)\n Given param alpha, the dual gaps at the end of the optimization,\n same shape as each observation of y.\n\n Examples\n --------\n >>> from sklearn.linear_model import ElasticNet\n >>> from sklearn.datasets import make_regression\n\n >>> X, y = make_regression(n_features=2, random_state=0)\n >>> regr = ElasticNet(random_state=0)\n >>> regr.fit(X, y)\n ElasticNet(random_state=0)\n >>> print(regr.coef_)\n [18.83816048 64.55968825]\n >>> print(regr.intercept_)\n 1.451...\n >>> print(regr.predict([[0, 0]]))\n [1.451...]\n\n\n Notes\n -----\n To avoid unnecessary memory duplication the X argument of the fit method\n should be directly passed as a Fortran-contiguous numpy array.\n\n See Also\n --------\n ElasticNetCV : Elastic net model with best model selection by\n cross-validation.\n SGDRegressor : Implements elastic net regression with incremental training.\n SGDClassifier : Implements logistic regression with elastic net penalty\n (``SGDClassifier(loss=\"log\", penalty=\"elasticnet\")``).\n \"\"\"\n path = staticmethod(enet_path)\n\n @_deprecate_positional_args\n def __init__(self, alpha=1.0, *, l1_ratio=0.5, fit_intercept=True,\n normalize=False, precompute=False, max_iter=1000,\n copy_X=True, tol=1e-4, warm_start=False, positive=False,\n random_state=None, selection='cyclic'):\n self.alpha = alpha\n self.l1_ratio = l1_ratio\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.precompute = precompute\n self.max_iter = max_iter\n self.copy_X = copy_X\n self.tol = tol\n self.warm_start = warm_start\n self.positive = positive\n self.random_state = random_state\n self.selection = selection\n\n def fit(self, X, y, sample_weight=None, check_input=True):\n \"\"\"Fit model with coordinate descent.\n\n Parameters\n ----------\n X : {ndarray, sparse matrix} of (n_samples, n_features)\n Data.\n\n y : {ndarray, sparse matrix} of shape (n_samples,) or \\\n (n_samples, n_targets)\n Target. Will be cast to X's dtype if necessary.\n\n sample_weight : float or array-like of shape (n_samples,), default=None\n Sample weight.\n\n .. versionadded:: 0.23\n\n check_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\n Notes\n -----\n\n Coordinate descent is an algorithm that considers each column of\n data at a time hence it will automatically convert the X input\n as a Fortran-contiguous numpy array if necessary.\n\n To avoid memory re-allocation it is advised to allocate the\n initial data in memory directly using that format.\n \"\"\"\n\n if self.alpha == 0:\n warnings.warn(\"With alpha=0, this algorithm does not converge \"\n \"well. You are advised to use the LinearRegression \"\n \"estimator\", stacklevel=2)\n\n if isinstance(self.precompute, str):\n raise ValueError('precompute should be one of True, False or'\n ' array-like. Got %r' % self.precompute)\n\n if (not isinstance(self.l1_ratio, numbers.Number) or\n self.l1_ratio < 0 or self.l1_ratio > 1):\n raise ValueError(\"l1_ratio must be between 0 and 1; \"\n f\"got l1_ratio={self.l1_ratio}\")\n\n # Remember if X is copied\n X_copied = False\n # We expect X and y to be float64 or float32 Fortran ordered arrays\n # when bypassing checks\n if check_input:\n X_copied = self.copy_X and self.fit_intercept\n X, y = self._validate_data(X, y, accept_sparse='csc',\n order='F',\n dtype=[np.float64, np.float32],\n copy=X_copied, multi_output=True,\n y_numeric=True)\n y = check_array(y, order='F', copy=False, dtype=X.dtype.type,\n ensure_2d=False)\n\n n_samples, n_features = X.shape\n alpha = self.alpha\n\n if isinstance(sample_weight, numbers.Number):\n sample_weight = None\n if sample_weight is not None:\n if check_input:\n if sparse.issparse(X):\n raise ValueError(\"Sample weights do not (yet) support \"\n \"sparse matrices.\")\n sample_weight = _check_sample_weight(sample_weight, X,\n dtype=X.dtype)\n # simplify things by rescaling sw to sum up to n_samples\n # => np.average(x, weights=sw) = np.mean(sw * x)\n sample_weight *= (n_samples / np.sum(sample_weight))\n # Objective function is:\n # 1/2 * np.average(squared error, weights=sw) + alpha * penalty\n # but coordinate descent minimizes:\n # 1/2 * sum(squared error) + alpha * penalty\n # enet_path therefore sets alpha = n_samples * alpha\n # With sw, enet_path should set alpha = sum(sw) * alpha\n # Therefore, we rescale alpha = sum(sw) / n_samples * alpha\n # Note: As we rescaled sample_weights to sum up to n_samples,\n # we don't need this\n # alpha *= np.sum(sample_weight) / n_samples\n\n # Ensure copying happens only once, don't do it again if done above.\n # X and y will be rescaled if sample_weight is not None, order='F'\n # ensures that the returned X and y are still F-contiguous.\n should_copy = self.copy_X and not X_copied\n X, y, X_offset, y_offset, X_scale, precompute, Xy = \\\n _pre_fit(X, y, None, self.precompute, self.normalize,\n self.fit_intercept, copy=should_copy,\n check_input=check_input, sample_weight=sample_weight)\n # coordinate descent needs F-ordered arrays and _pre_fit might have\n # called _rescale_data\n if check_input or sample_weight is not None:\n X, y = _set_order(X, y, order='F')\n if y.ndim == 1:\n y = y[:, np.newaxis]\n if Xy is not None and Xy.ndim == 1:\n Xy = Xy[:, np.newaxis]\n\n n_targets = y.shape[1]\n\n if self.selection not in ['cyclic', 'random']:\n raise ValueError(\"selection should be either random or cyclic.\")\n\n if not self.warm_start or not hasattr(self, \"coef_\"):\n coef_ = np.zeros((n_targets, n_features), dtype=X.dtype,\n order='F')\n else:\n coef_ = self.coef_\n if coef_.ndim == 1:\n coef_ = coef_[np.newaxis, :]\n\n dual_gaps_ = np.zeros(n_targets, dtype=X.dtype)\n self.n_iter_ = []\n\n for k in range(n_targets):\n if Xy is not None:\n this_Xy = Xy[:, k]\n else:\n this_Xy = None\n _, this_coef, this_dual_gap, this_iter = \\\n self.path(X, y[:, k],\n l1_ratio=self.l1_ratio, eps=None,\n n_alphas=None, alphas=[alpha],\n precompute=precompute, Xy=this_Xy,\n fit_intercept=False, normalize=False, copy_X=True,\n verbose=False, tol=self.tol, positive=self.positive,\n X_offset=X_offset, X_scale=X_scale,\n return_n_iter=True, coef_init=coef_[k],\n max_iter=self.max_iter,\n random_state=self.random_state,\n selection=self.selection,\n check_input=False)\n coef_[k] = this_coef[:, 0]\n dual_gaps_[k] = this_dual_gap[0]\n self.n_iter_.append(this_iter[0])\n\n if n_targets == 1:\n self.n_iter_ = self.n_iter_[0]\n self.coef_ = coef_[0]\n self.dual_gap_ = dual_gaps_[0]\n else:\n self.coef_ = coef_\n self.dual_gap_ = dual_gaps_\n\n self._set_intercept(X_offset, y_offset, X_scale)\n\n # workaround since _set_intercept will cast self.coef_ into X.dtype\n self.coef_ = np.asarray(self.coef_, dtype=X.dtype)\n\n # return self for chaining fit and predict calls\n return self\n\n @property\n def sparse_coef_(self):\n \"\"\"Sparse representation of the fitted `coef_`.\"\"\"\n return sparse.csr_matrix(self.coef_)\n\n def _decision_function(self, X):\n \"\"\"Decision function of the linear model.\n\n Parameters\n ----------\n X : numpy array or scipy.sparse matrix of shape (n_samples, n_features)\n\n Returns\n -------\n T : ndarray of shape (n_samples,)\n The predicted decision function.\n \"\"\"\n check_is_fitted(self)\n if sparse.isspmatrix(X):\n return safe_sparse_dot(X, self.coef_.T,\n dense_output=True) + self.intercept_\n else:\n return super()._decision_function(X)", + "instance_attributes": [ + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "l1_ratio", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "normalize", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "precompute", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "copy_X", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "warm_start", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "positive", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "selection", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "n_iter_", + "types": { + "kind": "NamedType", + "name": "list" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV", + "name": "ElasticNetCV", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV", + "decorators": [], + "superclasses": ["RegressorMixin", "LinearModelCV"], + "methods": [ + "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/__init__", + "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/_get_estimator", + "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/_is_multitask", + "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Elastic Net model with iterative fitting along a regularization path.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Elastic Net model with iterative fitting along a regularization path.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nl1_ratio : float or list of float, default=0.5\n float between 0 and 1 passed to ElasticNet (scaling between\n l1 and l2 penalties). For ``l1_ratio = 0``\n the penalty is an L2 penalty. For ``l1_ratio = 1`` it is an L1 penalty.\n For ``0 < l1_ratio < 1``, the penalty is a combination of L1 and L2\n This parameter can be a list, in which case the different\n values are tested by cross-validation and the one giving the best\n prediction score is used. Note that a good choice of list of\n values for l1_ratio is often to put more values close to 1\n (i.e. Lasso) and less close to 0 (i.e. Ridge), as in ``[.1, .5, .7,\n .9, .95, .99, 1]``.\n\neps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n Number of alphas along the regularization path, used for each l1_ratio.\n\nalphas : ndarray, default=None\n List of alphas where to compute the models.\n If None alphas are set automatically.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : 'auto', bool or array-like of shape (n_features, n_features), default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\nmax_iter : int, default=1000\n The maximum number of iterations.\n\ntol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\ncv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - int, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\nverbose : bool or int, default=0\n Amount of verbosity.\n\nn_jobs : int, default=None\n Number of CPUs to use during the cross validation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\npositive : bool, default=False\n When set to ``True``, forces the coefficients to be positive.\n\nrandom_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\nAttributes\n----------\nalpha_ : float\n The amount of penalization chosen by cross validation.\n\nl1_ratio_ : float\n The compromise between l1 and l2 penalization chosen by\n cross validation.\n\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the cost function formula).\n\nintercept_ : float or ndarray of shape (n_targets, n_features)\n Independent term in the decision function.\n\nmse_path_ : ndarray of shape (n_l1_ratio, n_alpha, n_folds)\n Mean square error for the test set on each fold, varying l1_ratio and\n alpha.\n\nalphas_ : ndarray of shape (n_alphas,) or (n_l1_ratio, n_alphas)\n The grid of alphas used for fitting, for each l1_ratio.\n\ndual_gap_ : float\n The dual gaps at the end of the optimization for the optimal alpha.\n\nn_iter_ : int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance for the optimal alpha.\n\nExamples\n--------\n>>> from sklearn.linear_model import ElasticNetCV\n>>> from sklearn.datasets import make_regression\n\n>>> X, y = make_regression(n_features=2, random_state=0)\n>>> regr = ElasticNetCV(cv=5, random_state=0)\n>>> regr.fit(X, y)\nElasticNetCV(cv=5, random_state=0)\n>>> print(regr.alpha_)\n0.199...\n>>> print(regr.intercept_)\n0.398...\n>>> print(regr.predict([[0, 0]]))\n[0.398...]\n\n\nNotes\n-----\nFor an example, see\n:ref:`examples/linear_model/plot_lasso_model_selection.py\n`.\n\nTo avoid unnecessary memory duplication the X argument of the fit method\nshould be directly passed as a Fortran-contiguous numpy array.\n\nThe parameter l1_ratio corresponds to alpha in the glmnet R package\nwhile alpha corresponds to the lambda parameter in glmnet.\nMore specifically, the optimization objective is::\n\n 1 / (2 * n_samples) * ||y - Xw||^2_2\n + alpha * l1_ratio * ||w||_1\n + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nIf you are interested in controlling the L1 and L2 penalty\nseparately, keep in mind that this is equivalent to::\n\n a * L1 + b * L2\n\nfor::\n\n alpha = a + b and l1_ratio = a / (a + b).\n\nSee Also\n--------\nenet_path\nElasticNet", + "code": "class ElasticNetCV(RegressorMixin, LinearModelCV):\n \"\"\"Elastic Net model with iterative fitting along a regularization path.\n\n See glossary entry for :term:`cross-validation estimator`.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n l1_ratio : float or list of float, default=0.5\n float between 0 and 1 passed to ElasticNet (scaling between\n l1 and l2 penalties). For ``l1_ratio = 0``\n the penalty is an L2 penalty. For ``l1_ratio = 1`` it is an L1 penalty.\n For ``0 < l1_ratio < 1``, the penalty is a combination of L1 and L2\n This parameter can be a list, in which case the different\n values are tested by cross-validation and the one giving the best\n prediction score is used. Note that a good choice of list of\n values for l1_ratio is often to put more values close to 1\n (i.e. Lasso) and less close to 0 (i.e. Ridge), as in ``[.1, .5, .7,\n .9, .95, .99, 1]``.\n\n eps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``.\n\n n_alphas : int, default=100\n Number of alphas along the regularization path, used for each l1_ratio.\n\n alphas : ndarray, default=None\n List of alphas where to compute the models.\n If None alphas are set automatically.\n\n fit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\n normalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n precompute : 'auto', bool or array-like of shape (n_features, n_features),\\\n default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\n max_iter : int, default=1000\n The maximum number of iterations.\n\n tol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\n cv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - int, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\n copy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\n verbose : bool or int, default=0\n Amount of verbosity.\n\n n_jobs : int, default=None\n Number of CPUs to use during the cross validation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n positive : bool, default=False\n When set to ``True``, forces the coefficients to be positive.\n\n random_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n selection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\n Attributes\n ----------\n alpha_ : float\n The amount of penalization chosen by cross validation.\n\n l1_ratio_ : float\n The compromise between l1 and l2 penalization chosen by\n cross validation.\n\n coef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the cost function formula).\n\n intercept_ : float or ndarray of shape (n_targets, n_features)\n Independent term in the decision function.\n\n mse_path_ : ndarray of shape (n_l1_ratio, n_alpha, n_folds)\n Mean square error for the test set on each fold, varying l1_ratio and\n alpha.\n\n alphas_ : ndarray of shape (n_alphas,) or (n_l1_ratio, n_alphas)\n The grid of alphas used for fitting, for each l1_ratio.\n\n dual_gap_ : float\n The dual gaps at the end of the optimization for the optimal alpha.\n\n n_iter_ : int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance for the optimal alpha.\n\n Examples\n --------\n >>> from sklearn.linear_model import ElasticNetCV\n >>> from sklearn.datasets import make_regression\n\n >>> X, y = make_regression(n_features=2, random_state=0)\n >>> regr = ElasticNetCV(cv=5, random_state=0)\n >>> regr.fit(X, y)\n ElasticNetCV(cv=5, random_state=0)\n >>> print(regr.alpha_)\n 0.199...\n >>> print(regr.intercept_)\n 0.398...\n >>> print(regr.predict([[0, 0]]))\n [0.398...]\n\n\n Notes\n -----\n For an example, see\n :ref:`examples/linear_model/plot_lasso_model_selection.py\n `.\n\n To avoid unnecessary memory duplication the X argument of the fit method\n should be directly passed as a Fortran-contiguous numpy array.\n\n The parameter l1_ratio corresponds to alpha in the glmnet R package\n while alpha corresponds to the lambda parameter in glmnet.\n More specifically, the optimization objective is::\n\n 1 / (2 * n_samples) * ||y - Xw||^2_2\n + alpha * l1_ratio * ||w||_1\n + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\n If you are interested in controlling the L1 and L2 penalty\n separately, keep in mind that this is equivalent to::\n\n a * L1 + b * L2\n\n for::\n\n alpha = a + b and l1_ratio = a / (a + b).\n\n See Also\n --------\n enet_path\n ElasticNet\n\n \"\"\"\n path = staticmethod(enet_path)\n\n @_deprecate_positional_args\n def __init__(self, *, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,\n fit_intercept=True, normalize=False, precompute='auto',\n max_iter=1000, tol=1e-4, cv=None, copy_X=True,\n verbose=0, n_jobs=None, positive=False, random_state=None,\n selection='cyclic'):\n self.l1_ratio = l1_ratio\n self.eps = eps\n self.n_alphas = n_alphas\n self.alphas = alphas\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.precompute = precompute\n self.max_iter = max_iter\n self.tol = tol\n self.cv = cv\n self.copy_X = copy_X\n self.verbose = verbose\n self.n_jobs = n_jobs\n self.positive = positive\n self.random_state = random_state\n self.selection = selection\n\n def _get_estimator(self):\n return ElasticNet()\n\n def _is_multitask(self):\n return False\n\n def _more_tags(self):\n return {'multioutput': False}", + "instance_attributes": [ + { + "name": "l1_ratio", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "eps", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "n_alphas", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "normalize", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "precompute", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "copy_X", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "positive", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "selection", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/Lasso", + "name": "Lasso", + "qname": "sklearn.linear_model._coordinate_descent.Lasso", + "decorators": [], + "superclasses": ["ElasticNet"], + "methods": ["scikit-learn/sklearn.linear_model._coordinate_descent/Lasso/__init__"], + "is_public": false, + "reexported_by": [], + "description": "Linear Model trained with L1 prior as regularizer (aka the Lasso)\n\nThe optimization objective for Lasso is::\n\n (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nTechnically the Lasso model is optimizing the same objective function as\nthe Elastic Net with ``l1_ratio=1.0`` (no L2 penalty).\n\nRead more in the :ref:`User Guide `.", + "docstring": "Linear Model trained with L1 prior as regularizer (aka the Lasso)\n\nThe optimization objective for Lasso is::\n\n (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nTechnically the Lasso model is optimizing the same objective function as\nthe Elastic Net with ``l1_ratio=1.0`` (no L2 penalty).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Constant that multiplies the L1 term. Defaults to 1.0.\n ``alpha = 0`` is equivalent to an ordinary least square, solved\n by the :class:`LinearRegression` object. For numerical\n reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised.\n Given this, you should use the :class:`LinearRegression` object.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to False, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : bool or array-like of shape (n_features, n_features), default=False\n Whether to use a precomputed Gram matrix to speed up\n calculations. The Gram matrix can also be passed as argument.\n For sparse input this option is always ``False`` to preserve sparsity.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=1000\n The maximum number of iterations.\n\ntol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\npositive : bool, default=False\n When set to ``True``, forces the coefficients to be positive.\n\nrandom_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the cost function formula).\n\ndual_gap_ : float or ndarray of shape (n_targets,)\n Given param alpha, the dual gaps at the end of the optimization,\n same shape as each observation of y.\n\nsparse_coef_ : sparse matrix of shape (n_features, 1) or (n_targets, n_features)\n Readonly property derived from ``coef_``.\n\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function.\n\nn_iter_ : int or list of int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.Lasso(alpha=0.1)\n>>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\nLasso(alpha=0.1)\n>>> print(clf.coef_)\n[0.85 0. ]\n>>> print(clf.intercept_)\n0.15...\n\nSee Also\n--------\nlars_path\nlasso_path\nLassoLars\nLassoCV\nLassoLarsCV\nsklearn.decomposition.sparse_encode\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nTo avoid unnecessary memory duplication the X argument of the fit method\nshould be directly passed as a Fortran-contiguous numpy array.", + "code": "class Lasso(ElasticNet):\n \"\"\"Linear Model trained with L1 prior as regularizer (aka the Lasso)\n\n The optimization objective for Lasso is::\n\n (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\n Technically the Lasso model is optimizing the same objective function as\n the Elastic Net with ``l1_ratio=1.0`` (no L2 penalty).\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n alpha : float, default=1.0\n Constant that multiplies the L1 term. Defaults to 1.0.\n ``alpha = 0`` is equivalent to an ordinary least square, solved\n by the :class:`LinearRegression` object. For numerical\n reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised.\n Given this, you should use the :class:`LinearRegression` object.\n\n fit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to False, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\n normalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n precompute : bool or array-like of shape (n_features, n_features),\\\n default=False\n Whether to use a precomputed Gram matrix to speed up\n calculations. The Gram matrix can also be passed as argument.\n For sparse input this option is always ``False`` to preserve sparsity.\n\n copy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\n max_iter : int, default=1000\n The maximum number of iterations.\n\n tol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\n warm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\n positive : bool, default=False\n When set to ``True``, forces the coefficients to be positive.\n\n random_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n selection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\n Attributes\n ----------\n coef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the cost function formula).\n\n dual_gap_ : float or ndarray of shape (n_targets,)\n Given param alpha, the dual gaps at the end of the optimization,\n same shape as each observation of y.\n\n sparse_coef_ : sparse matrix of shape (n_features, 1) or \\\n (n_targets, n_features)\n Readonly property derived from ``coef_``.\n\n intercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function.\n\n n_iter_ : int or list of int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance.\n\n Examples\n --------\n >>> from sklearn import linear_model\n >>> clf = linear_model.Lasso(alpha=0.1)\n >>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\n Lasso(alpha=0.1)\n >>> print(clf.coef_)\n [0.85 0. ]\n >>> print(clf.intercept_)\n 0.15...\n\n See Also\n --------\n lars_path\n lasso_path\n LassoLars\n LassoCV\n LassoLarsCV\n sklearn.decomposition.sparse_encode\n\n Notes\n -----\n The algorithm used to fit the model is coordinate descent.\n\n To avoid unnecessary memory duplication the X argument of the fit method\n should be directly passed as a Fortran-contiguous numpy array.\n \"\"\"\n path = staticmethod(enet_path)\n\n @_deprecate_positional_args\n def __init__(self, alpha=1.0, *, fit_intercept=True, normalize=False,\n precompute=False, copy_X=True, max_iter=1000,\n tol=1e-4, warm_start=False, positive=False,\n random_state=None, selection='cyclic'):\n super().__init__(\n alpha=alpha, l1_ratio=1.0, fit_intercept=fit_intercept,\n normalize=normalize, precompute=precompute, copy_X=copy_X,\n max_iter=max_iter, tol=tol, warm_start=warm_start,\n positive=positive, random_state=random_state,\n selection=selection)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV", + "name": "LassoCV", + "qname": "sklearn.linear_model._coordinate_descent.LassoCV", + "decorators": [], + "superclasses": ["RegressorMixin", "LinearModelCV"], + "methods": [ + "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/__init__", + "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/_get_estimator", + "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/_is_multitask", + "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Lasso linear model with iterative fitting along a regularization path.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe best model is selected by cross-validation.\n\nThe optimization objective for Lasso is::\n\n (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide `.", + "docstring": "Lasso linear model with iterative fitting along a regularization path.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe best model is selected by cross-validation.\n\nThe optimization objective for Lasso is::\n\n (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\neps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n Number of alphas along the regularization path.\n\nalphas : ndarray, default=None\n List of alphas where to compute the models.\n If ``None`` alphas are set automatically.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : 'auto', bool or array-like of shape (n_features, n_features), default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\nmax_iter : int, default=1000\n The maximum number of iterations.\n\ntol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\ncv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - int, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nverbose : bool or int, default=False\n Amount of verbosity.\n\nn_jobs : int, default=None\n Number of CPUs to use during the cross validation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\npositive : bool, default=False\n If positive, restrict regression coefficients to be positive.\n\nrandom_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\nAttributes\n----------\nalpha_ : float\n The amount of penalization chosen by cross validation.\n\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the cost function formula).\n\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function.\n\nmse_path_ : ndarray of shape (n_alphas, n_folds)\n Mean square error for the test set on each fold, varying alpha.\n\nalphas_ : ndarray of shape (n_alphas,)\n The grid of alphas used for fitting.\n\ndual_gap_ : float or ndarray of shape (n_targets,)\n The dual gap at the end of the optimization for the optimal alpha\n (``alpha_``).\n\nn_iter_ : int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance for the optimal alpha.\n\nExamples\n--------\n>>> from sklearn.linear_model import LassoCV\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(noise=4, random_state=0)\n>>> reg = LassoCV(cv=5, random_state=0).fit(X, y)\n>>> reg.score(X, y)\n0.9993...\n>>> reg.predict(X[:1,])\narray([-78.4951...])\n\nNotes\n-----\nFor an example, see\n:ref:`examples/linear_model/plot_lasso_model_selection.py\n`.\n\nTo avoid unnecessary memory duplication the X argument of the fit method\nshould be directly passed as a Fortran-contiguous numpy array.\n\nSee Also\n--------\nlars_path\nlasso_path\nLassoLars\nLasso\nLassoLarsCV", + "code": "class LassoCV(RegressorMixin, LinearModelCV):\n \"\"\"Lasso linear model with iterative fitting along a regularization path.\n\n See glossary entry for :term:`cross-validation estimator`.\n\n The best model is selected by cross-validation.\n\n The optimization objective for Lasso is::\n\n (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n eps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``.\n\n n_alphas : int, default=100\n Number of alphas along the regularization path.\n\n alphas : ndarray, default=None\n List of alphas where to compute the models.\n If ``None`` alphas are set automatically.\n\n fit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\n normalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n precompute : 'auto', bool or array-like of shape (n_features, n_features),\\\n default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\n max_iter : int, default=1000\n The maximum number of iterations.\n\n tol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\n copy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\n cv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - int, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\n verbose : bool or int, default=False\n Amount of verbosity.\n\n n_jobs : int, default=None\n Number of CPUs to use during the cross validation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n positive : bool, default=False\n If positive, restrict regression coefficients to be positive.\n\n random_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n selection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\n Attributes\n ----------\n alpha_ : float\n The amount of penalization chosen by cross validation.\n\n coef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the cost function formula).\n\n intercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function.\n\n mse_path_ : ndarray of shape (n_alphas, n_folds)\n Mean square error for the test set on each fold, varying alpha.\n\n alphas_ : ndarray of shape (n_alphas,)\n The grid of alphas used for fitting.\n\n dual_gap_ : float or ndarray of shape (n_targets,)\n The dual gap at the end of the optimization for the optimal alpha\n (``alpha_``).\n\n n_iter_ : int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance for the optimal alpha.\n\n Examples\n --------\n >>> from sklearn.linear_model import LassoCV\n >>> from sklearn.datasets import make_regression\n >>> X, y = make_regression(noise=4, random_state=0)\n >>> reg = LassoCV(cv=5, random_state=0).fit(X, y)\n >>> reg.score(X, y)\n 0.9993...\n >>> reg.predict(X[:1,])\n array([-78.4951...])\n\n Notes\n -----\n For an example, see\n :ref:`examples/linear_model/plot_lasso_model_selection.py\n `.\n\n To avoid unnecessary memory duplication the X argument of the fit method\n should be directly passed as a Fortran-contiguous numpy array.\n\n See Also\n --------\n lars_path\n lasso_path\n LassoLars\n Lasso\n LassoLarsCV\n \"\"\"\n path = staticmethod(lasso_path)\n\n @_deprecate_positional_args\n def __init__(self, *, eps=1e-3, n_alphas=100, alphas=None,\n fit_intercept=True,\n normalize=False, precompute='auto', max_iter=1000, tol=1e-4,\n copy_X=True, cv=None, verbose=False, n_jobs=None,\n positive=False, random_state=None, selection='cyclic'):\n super().__init__(\n eps=eps, n_alphas=n_alphas, alphas=alphas,\n fit_intercept=fit_intercept, normalize=normalize,\n precompute=precompute, max_iter=max_iter, tol=tol, copy_X=copy_X,\n cv=cv, verbose=verbose, n_jobs=n_jobs, positive=positive,\n random_state=random_state, selection=selection)\n\n def _get_estimator(self):\n return Lasso()\n\n def _is_multitask(self):\n return False\n\n def _more_tags(self):\n return {'multioutput': False}", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV", + "name": "LinearModelCV", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV", + "decorators": [], + "superclasses": ["MultiOutputMixin", "LinearModel"], + "methods": [ + "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/__init__", + "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/_get_estimator", + "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/_is_multitask", + "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for iterative model fitting along a regularization path.", + "docstring": "Base class for iterative model fitting along a regularization path.", + "code": "class LinearModelCV(MultiOutputMixin, LinearModel, metaclass=ABCMeta):\n \"\"\"Base class for iterative model fitting along a regularization path.\"\"\"\n\n @abstractmethod\n def __init__(self, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True,\n normalize=False, precompute='auto', max_iter=1000, tol=1e-4,\n copy_X=True, cv=None, verbose=False, n_jobs=None,\n positive=False, random_state=None, selection='cyclic'):\n self.eps = eps\n self.n_alphas = n_alphas\n self.alphas = alphas\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.precompute = precompute\n self.max_iter = max_iter\n self.tol = tol\n self.copy_X = copy_X\n self.cv = cv\n self.verbose = verbose\n self.n_jobs = n_jobs\n self.positive = positive\n self.random_state = random_state\n self.selection = selection\n\n @abstractmethod\n def _get_estimator(self):\n \"\"\"Model to be fitted after the best alpha has been determined.\"\"\"\n\n @abstractmethod\n def _is_multitask(self):\n \"\"\"Bool indicating if class is meant for multidimensional target.\"\"\"\n\n def fit(self, X, y):\n \"\"\"Fit linear model with coordinate descent.\n\n Fit is on grid of alphas and best alpha estimated by cross-validation.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data. Pass directly as Fortran-contiguous data\n to avoid unnecessary memory duplication. If y is mono-output,\n X can be sparse.\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n \"\"\"\n # This makes sure that there is no duplication in memory.\n # Dealing right with copy_X is important in the following:\n # Multiple functions touch X and subsamples of X and can induce a\n # lot of duplication of memory\n copy_X = self.copy_X and self.fit_intercept\n\n check_y_params = dict(copy=False, dtype=[np.float64, np.float32],\n ensure_2d=False)\n if isinstance(X, np.ndarray) or sparse.isspmatrix(X):\n # Keep a reference to X\n reference_to_old_X = X\n # Let us not impose fortran ordering so far: it is\n # not useful for the cross-validation loop and will be done\n # by the model fitting itself\n\n # Need to validate separately here.\n # We can't pass multi_ouput=True because that would allow y to be\n # csr. We also want to allow y to be 64 or 32 but check_X_y only\n # allows to convert for 64.\n check_X_params = dict(accept_sparse='csc',\n dtype=[np.float64, np.float32], copy=False)\n X, y = self._validate_data(X, y,\n validate_separately=(check_X_params,\n check_y_params))\n if sparse.isspmatrix(X):\n if (hasattr(reference_to_old_X, \"data\") and\n not np.may_share_memory(reference_to_old_X.data, X.data)):\n # X is a sparse matrix and has been copied\n copy_X = False\n elif not np.may_share_memory(reference_to_old_X, X):\n # X has been copied\n copy_X = False\n del reference_to_old_X\n else:\n # Need to validate separately here.\n # We can't pass multi_ouput=True because that would allow y to be\n # csr. We also want to allow y to be 64 or 32 but check_X_y only\n # allows to convert for 64.\n check_X_params = dict(accept_sparse='csc',\n dtype=[np.float64, np.float32], order='F',\n copy=copy_X)\n X, y = self._validate_data(X, y,\n validate_separately=(check_X_params,\n check_y_params))\n copy_X = False\n\n if y.shape[0] == 0:\n raise ValueError(\"y has 0 samples: %r\" % y)\n\n if not self._is_multitask():\n if y.ndim > 1 and y.shape[1] > 1:\n raise ValueError(\"For multi-task outputs, use \"\n \"MultiTask%s\" % self.__class__.__name__)\n y = column_or_1d(y, warn=True)\n else:\n if sparse.isspmatrix(X):\n raise TypeError(\"X should be dense but a sparse matrix was\"\n \"passed\")\n elif y.ndim == 1:\n raise ValueError(\"For mono-task outputs, use \"\n \"%sCV\" % self.__class__.__name__[9:])\n\n model = self._get_estimator()\n\n if self.selection not in [\"random\", \"cyclic\"]:\n raise ValueError(\"selection should be either random or cyclic.\")\n\n if X.shape[0] != y.shape[0]:\n raise ValueError(\"X and y have inconsistent dimensions (%d != %d)\"\n % (X.shape[0], y.shape[0]))\n\n # All LinearModelCV parameters except 'cv' are acceptable\n path_params = self.get_params()\n if 'l1_ratio' in path_params:\n l1_ratios = np.atleast_1d(path_params['l1_ratio'])\n # For the first path, we need to set l1_ratio\n path_params['l1_ratio'] = l1_ratios[0]\n else:\n l1_ratios = [1, ]\n path_params.pop('cv', None)\n path_params.pop('n_jobs', None)\n\n alphas = self.alphas\n n_l1_ratio = len(l1_ratios)\n if alphas is None:\n alphas = [_alpha_grid(X, y, l1_ratio=l1_ratio,\n fit_intercept=self.fit_intercept,\n eps=self.eps, n_alphas=self.n_alphas,\n normalize=self.normalize, copy_X=self.copy_X)\n for l1_ratio in l1_ratios]\n else:\n # Making sure alphas is properly ordered.\n alphas = np.tile(np.sort(alphas)[::-1], (n_l1_ratio, 1))\n # We want n_alphas to be the number of alphas used for each l1_ratio.\n n_alphas = len(alphas[0])\n path_params.update({'n_alphas': n_alphas})\n\n path_params['copy_X'] = copy_X\n # We are not computing in parallel, we can modify X\n # inplace in the folds\n if effective_n_jobs(self.n_jobs) > 1:\n path_params['copy_X'] = False\n\n # init cross-validation generator\n cv = check_cv(self.cv)\n\n # Compute path for all folds and compute MSE to get the best alpha\n folds = list(cv.split(X, y))\n best_mse = np.inf\n\n # We do a double for loop folded in one, in order to be able to\n # iterate in parallel on l1_ratio and folds\n jobs = (delayed(_path_residuals)(X, y, train, test, self.path,\n path_params, alphas=this_alphas,\n l1_ratio=this_l1_ratio, X_order='F',\n dtype=X.dtype.type)\n for this_l1_ratio, this_alphas in zip(l1_ratios, alphas)\n for train, test in folds)\n mse_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,\n **_joblib_parallel_args(prefer=\"threads\"))(jobs)\n mse_paths = np.reshape(mse_paths, (n_l1_ratio, len(folds), -1))\n mean_mse = np.mean(mse_paths, axis=1)\n self.mse_path_ = np.squeeze(np.rollaxis(mse_paths, 2, 1))\n for l1_ratio, l1_alphas, mse_alphas in zip(l1_ratios, alphas,\n mean_mse):\n i_best_alpha = np.argmin(mse_alphas)\n this_best_mse = mse_alphas[i_best_alpha]\n if this_best_mse < best_mse:\n best_alpha = l1_alphas[i_best_alpha]\n best_l1_ratio = l1_ratio\n best_mse = this_best_mse\n\n self.l1_ratio_ = best_l1_ratio\n self.alpha_ = best_alpha\n if self.alphas is None:\n self.alphas_ = np.asarray(alphas)\n if n_l1_ratio == 1:\n self.alphas_ = self.alphas_[0]\n # Remove duplicate alphas in case alphas is provided.\n else:\n self.alphas_ = np.asarray(alphas[0])\n\n # Refit the model with the parameters selected\n common_params = {name: value\n for name, value in self.get_params().items()\n if name in model.get_params()}\n model.set_params(**common_params)\n model.alpha = best_alpha\n model.l1_ratio = best_l1_ratio\n model.copy_X = copy_X\n precompute = getattr(self, \"precompute\", None)\n if isinstance(precompute, str) and precompute == \"auto\":\n model.precompute = False\n model.fit(X, y)\n if not hasattr(self, 'l1_ratio'):\n del self.l1_ratio_\n self.coef_ = model.coef_\n self.intercept_ = model.intercept_\n self.dual_gap_ = model.dual_gap_\n self.n_iter_ = model.n_iter_\n return self", + "instance_attributes": [ + { + "name": "eps", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "n_alphas", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "normalize", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "precompute", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "copy_X", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "positive", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "selection", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet", + "name": "MultiTaskElasticNet", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNet", + "decorators": [], + "superclasses": ["Lasso"], + "methods": [ + "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet/__init__", + "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet/fit", + "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Multi-task ElasticNet model trained with L1/L2 mixed-norm as\nregularizer.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n (1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n + alpha * l1_ratio * ||W||_21\n + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n ||W||_21 = sum_i sqrt(sum_j W_ij ^ 2)\n\ni.e. the sum of norms of each row.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Multi-task ElasticNet model trained with L1/L2 mixed-norm as\nregularizer.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n (1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n + alpha * l1_ratio * ||W||_21\n + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n ||W||_21 = sum_i sqrt(sum_j W_ij ^ 2)\n\ni.e. the sum of norms of each row.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Constant that multiplies the L1/L2 term. Defaults to 1.0.\n\nl1_ratio : float, default=0.5\n The ElasticNet mixing parameter, with 0 < l1_ratio <= 1.\n For l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it\n is an L2 penalty.\n For ``0 < l1_ratio < 1``, the penalty is a combination of L1/L2 and L2.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=1000\n The maximum number of iterations.\n\ntol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\nrandom_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\nAttributes\n----------\nintercept_ : ndarray of shape (n_tasks,)\n Independent term in decision function.\n\ncoef_ : ndarray of shape (n_tasks, n_features)\n Parameter vector (W in the cost function formula). If a 1D y is\n passed in at fit (non multi-task usage), ``coef_`` is then a 1D array.\n Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\nn_iter_ : int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance.\n\ndual_gap_ : float\n The dual gaps at the end of the optimization.\n\neps_ : float\n The tolerance scaled scaled by the variance of the target `y`.\n\nsparse_coef_ : sparse matrix of shape (n_features,) or (n_tasks, n_features)\n Sparse representation of the `coef_`.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.MultiTaskElasticNet(alpha=0.1)\n>>> clf.fit([[0,0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]])\nMultiTaskElasticNet(alpha=0.1)\n>>> print(clf.coef_)\n[[0.45663524 0.45612256]\n [0.45663524 0.45612256]]\n>>> print(clf.intercept_)\n[0.0872422 0.0872422]\n\nSee Also\n--------\nMultiTaskElasticNetCV : Multi-task L1/L2 ElasticNet with built-in\n cross-validation.\nElasticNet\nMultiTaskLasso\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nTo avoid unnecessary memory duplication the X and y arguments of the fit\nmethod should be directly passed as Fortran-contiguous numpy arrays.", + "code": "class MultiTaskElasticNet(Lasso):\n \"\"\"Multi-task ElasticNet model trained with L1/L2 mixed-norm as\n regularizer.\n\n The optimization objective for MultiTaskElasticNet is::\n\n (1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n + alpha * l1_ratio * ||W||_21\n + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\n Where::\n\n ||W||_21 = sum_i sqrt(sum_j W_ij ^ 2)\n\n i.e. the sum of norms of each row.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n alpha : float, default=1.0\n Constant that multiplies the L1/L2 term. Defaults to 1.0.\n\n l1_ratio : float, default=0.5\n The ElasticNet mixing parameter, with 0 < l1_ratio <= 1.\n For l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it\n is an L2 penalty.\n For ``0 < l1_ratio < 1``, the penalty is a combination of L1/L2 and L2.\n\n fit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\n normalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n copy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\n max_iter : int, default=1000\n The maximum number of iterations.\n\n tol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\n warm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\n random_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n selection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\n Attributes\n ----------\n intercept_ : ndarray of shape (n_tasks,)\n Independent term in decision function.\n\n coef_ : ndarray of shape (n_tasks, n_features)\n Parameter vector (W in the cost function formula). If a 1D y is\n passed in at fit (non multi-task usage), ``coef_`` is then a 1D array.\n Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\n n_iter_ : int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance.\n\n dual_gap_ : float\n The dual gaps at the end of the optimization.\n\n eps_ : float\n The tolerance scaled scaled by the variance of the target `y`.\n\n sparse_coef_ : sparse matrix of shape (n_features,) or \\\n (n_tasks, n_features)\n Sparse representation of the `coef_`.\n\n Examples\n --------\n >>> from sklearn import linear_model\n >>> clf = linear_model.MultiTaskElasticNet(alpha=0.1)\n >>> clf.fit([[0,0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]])\n MultiTaskElasticNet(alpha=0.1)\n >>> print(clf.coef_)\n [[0.45663524 0.45612256]\n [0.45663524 0.45612256]]\n >>> print(clf.intercept_)\n [0.0872422 0.0872422]\n\n See Also\n --------\n MultiTaskElasticNetCV : Multi-task L1/L2 ElasticNet with built-in\n cross-validation.\n ElasticNet\n MultiTaskLasso\n\n Notes\n -----\n The algorithm used to fit the model is coordinate descent.\n\n To avoid unnecessary memory duplication the X and y arguments of the fit\n method should be directly passed as Fortran-contiguous numpy arrays.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, alpha=1.0, *, l1_ratio=0.5, fit_intercept=True,\n normalize=False, copy_X=True, max_iter=1000, tol=1e-4,\n warm_start=False, random_state=None, selection='cyclic'):\n self.l1_ratio = l1_ratio\n self.alpha = alpha\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.max_iter = max_iter\n self.copy_X = copy_X\n self.tol = tol\n self.warm_start = warm_start\n self.random_state = random_state\n self.selection = selection\n\n def fit(self, X, y):\n \"\"\"Fit MultiTaskElasticNet model with coordinate descent\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Data.\n y : ndarray of shape (n_samples, n_tasks)\n Target. Will be cast to X's dtype if necessary.\n\n Notes\n -----\n\n Coordinate descent is an algorithm that considers each column of\n data at a time hence it will automatically convert the X input\n as a Fortran-contiguous numpy array if necessary.\n\n To avoid memory re-allocation it is advised to allocate the\n initial data in memory directly using that format.\n \"\"\"\n # Need to validate separately here.\n # We can't pass multi_ouput=True because that would allow y to be csr.\n check_X_params = dict(dtype=[np.float64, np.float32], order='F',\n copy=self.copy_X and self.fit_intercept)\n check_y_params = dict(ensure_2d=False, order='F')\n X, y = self._validate_data(X, y, validate_separately=(check_X_params,\n check_y_params))\n y = y.astype(X.dtype)\n\n if hasattr(self, 'l1_ratio'):\n model_str = 'ElasticNet'\n else:\n model_str = 'Lasso'\n if y.ndim == 1:\n raise ValueError(\"For mono-task outputs, use %s\" % model_str)\n\n n_samples, n_features = X.shape\n _, n_tasks = y.shape\n\n if n_samples != y.shape[0]:\n raise ValueError(\"X and y have inconsistent dimensions (%d != %d)\"\n % (n_samples, y.shape[0]))\n\n X, y, X_offset, y_offset, X_scale = _preprocess_data(\n X, y, self.fit_intercept, self.normalize, copy=False)\n\n if not self.warm_start or not hasattr(self, \"coef_\"):\n self.coef_ = np.zeros((n_tasks, n_features), dtype=X.dtype.type,\n order='F')\n\n l1_reg = self.alpha * self.l1_ratio * n_samples\n l2_reg = self.alpha * (1.0 - self.l1_ratio) * n_samples\n\n self.coef_ = np.asfortranarray(self.coef_) # coef contiguous in memory\n\n if self.selection not in ['random', 'cyclic']:\n raise ValueError(\"selection should be either random or cyclic.\")\n random = (self.selection == 'random')\n\n self.coef_, self.dual_gap_, self.eps_, self.n_iter_ = \\\n cd_fast.enet_coordinate_descent_multi_task(\n self.coef_, l1_reg, l2_reg, X, y, self.max_iter, self.tol,\n check_random_state(self.random_state), random)\n\n self._set_intercept(X_offset, y_offset, X_scale)\n\n # return self for chaining fit and predict calls\n return self\n\n def _more_tags(self):\n return {'multioutput_only': True}", + "instance_attributes": [ + { + "name": "l1_ratio", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "normalize", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "copy_X", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "warm_start", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "selection", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "coef_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV", + "name": "MultiTaskElasticNetCV", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNetCV", + "decorators": [], + "superclasses": ["RegressorMixin", "LinearModelCV"], + "methods": [ + "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/__init__", + "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/_get_estimator", + "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/_is_multitask", + "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Multi-task L1/L2 ElasticNet with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^Fro_2\n + alpha * l1_ratio * ||W||_21\n + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.15", + "docstring": "Multi-task L1/L2 ElasticNet with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^Fro_2\n + alpha * l1_ratio * ||W||_21\n + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.15\n\nParameters\n----------\nl1_ratio : float or list of float, default=0.5\n The ElasticNet mixing parameter, with 0 < l1_ratio <= 1.\n For l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it\n is an L2 penalty.\n For ``0 < l1_ratio < 1``, the penalty is a combination of L1/L2 and L2.\n This parameter can be a list, in which case the different\n values are tested by cross-validation and the one giving the best\n prediction score is used. Note that a good choice of list of\n values for l1_ratio is often to put more values close to 1\n (i.e. Lasso) and less close to 0 (i.e. Ridge), as in ``[.1, .5, .7,\n .9, .95, .99, 1]``\n\neps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n Number of alphas along the regularization path.\n\nalphas : array-like, default=None\n List of alphas where to compute the models.\n If not provided, set automatically.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nmax_iter : int, default=1000\n The maximum number of iterations.\n\ntol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\ncv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - int, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\nverbose : bool or int, default=0\n Amount of verbosity.\n\nn_jobs : int, default=None\n Number of CPUs to use during the cross validation. Note that this is\n used only if multiple values for l1_ratio are given.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nrandom_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\nAttributes\n----------\nintercept_ : ndarray of shape (n_tasks,)\n Independent term in decision function.\n\ncoef_ : ndarray of shape (n_tasks, n_features)\n Parameter vector (W in the cost function formula).\n Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\nalpha_ : float\n The amount of penalization chosen by cross validation.\n\nmse_path_ : ndarray of shape (n_alphas, n_folds) or (n_l1_ratio, n_alphas, n_folds)\n Mean square error for the test set on each fold, varying alpha.\n\nalphas_ : ndarray of shape (n_alphas,) or (n_l1_ratio, n_alphas)\n The grid of alphas used for fitting, for each l1_ratio.\n\nl1_ratio_ : float\n Best l1_ratio obtained by cross-validation.\n\nn_iter_ : int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance for the optimal alpha.\n\ndual_gap_ : float\n The dual gap at the end of the optimization for the optimal alpha.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.MultiTaskElasticNetCV(cv=3)\n>>> clf.fit([[0,0], [1, 1], [2, 2]],\n... [[0, 0], [1, 1], [2, 2]])\nMultiTaskElasticNetCV(cv=3)\n>>> print(clf.coef_)\n[[0.52875032 0.46958558]\n [0.52875032 0.46958558]]\n>>> print(clf.intercept_)\n[0.00166409 0.00166409]\n\nSee Also\n--------\nMultiTaskElasticNet\nElasticNetCV\nMultiTaskLassoCV\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nTo avoid unnecessary memory duplication the X and y arguments of the fit\nmethod should be directly passed as Fortran-contiguous numpy arrays.", + "code": "class MultiTaskElasticNetCV(RegressorMixin, LinearModelCV):\n \"\"\"Multi-task L1/L2 ElasticNet with built-in cross-validation.\n\n See glossary entry for :term:`cross-validation estimator`.\n\n The optimization objective for MultiTaskElasticNet is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^Fro_2\n + alpha * l1_ratio * ||W||_21\n + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\n Where::\n\n ||W||_21 = \\\\sum_i \\\\sqrt{\\\\sum_j w_{ij}^2}\n\n i.e. the sum of norm of each row.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.15\n\n Parameters\n ----------\n l1_ratio : float or list of float, default=0.5\n The ElasticNet mixing parameter, with 0 < l1_ratio <= 1.\n For l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it\n is an L2 penalty.\n For ``0 < l1_ratio < 1``, the penalty is a combination of L1/L2 and L2.\n This parameter can be a list, in which case the different\n values are tested by cross-validation and the one giving the best\n prediction score is used. Note that a good choice of list of\n values for l1_ratio is often to put more values close to 1\n (i.e. Lasso) and less close to 0 (i.e. Ridge), as in ``[.1, .5, .7,\n .9, .95, .99, 1]``\n\n eps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``.\n\n n_alphas : int, default=100\n Number of alphas along the regularization path.\n\n alphas : array-like, default=None\n List of alphas where to compute the models.\n If not provided, set automatically.\n\n fit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\n normalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n max_iter : int, default=1000\n The maximum number of iterations.\n\n tol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\n cv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - int, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\n copy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\n verbose : bool or int, default=0\n Amount of verbosity.\n\n n_jobs : int, default=None\n Number of CPUs to use during the cross validation. Note that this is\n used only if multiple values for l1_ratio are given.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n random_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n selection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\n Attributes\n ----------\n intercept_ : ndarray of shape (n_tasks,)\n Independent term in decision function.\n\n coef_ : ndarray of shape (n_tasks, n_features)\n Parameter vector (W in the cost function formula).\n Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\n alpha_ : float\n The amount of penalization chosen by cross validation.\n\n mse_path_ : ndarray of shape (n_alphas, n_folds) or \\\n (n_l1_ratio, n_alphas, n_folds)\n Mean square error for the test set on each fold, varying alpha.\n\n alphas_ : ndarray of shape (n_alphas,) or (n_l1_ratio, n_alphas)\n The grid of alphas used for fitting, for each l1_ratio.\n\n l1_ratio_ : float\n Best l1_ratio obtained by cross-validation.\n\n n_iter_ : int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance for the optimal alpha.\n\n dual_gap_ : float\n The dual gap at the end of the optimization for the optimal alpha.\n\n Examples\n --------\n >>> from sklearn import linear_model\n >>> clf = linear_model.MultiTaskElasticNetCV(cv=3)\n >>> clf.fit([[0,0], [1, 1], [2, 2]],\n ... [[0, 0], [1, 1], [2, 2]])\n MultiTaskElasticNetCV(cv=3)\n >>> print(clf.coef_)\n [[0.52875032 0.46958558]\n [0.52875032 0.46958558]]\n >>> print(clf.intercept_)\n [0.00166409 0.00166409]\n\n See Also\n --------\n MultiTaskElasticNet\n ElasticNetCV\n MultiTaskLassoCV\n\n Notes\n -----\n The algorithm used to fit the model is coordinate descent.\n\n To avoid unnecessary memory duplication the X and y arguments of the fit\n method should be directly passed as Fortran-contiguous numpy arrays.\n \"\"\"\n path = staticmethod(enet_path)\n\n @_deprecate_positional_args\n def __init__(self, *, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,\n fit_intercept=True, normalize=False,\n max_iter=1000, tol=1e-4, cv=None, copy_X=True,\n verbose=0, n_jobs=None, random_state=None,\n selection='cyclic'):\n self.l1_ratio = l1_ratio\n self.eps = eps\n self.n_alphas = n_alphas\n self.alphas = alphas\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.max_iter = max_iter\n self.tol = tol\n self.cv = cv\n self.copy_X = copy_X\n self.verbose = verbose\n self.n_jobs = n_jobs\n self.random_state = random_state\n self.selection = selection\n\n def _get_estimator(self):\n return MultiTaskElasticNet()\n\n def _is_multitask(self):\n return True\n\n def _more_tags(self):\n return {'multioutput_only': True}", + "instance_attributes": [ + { + "name": "l1_ratio", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "eps", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "n_alphas", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "normalize", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "copy_X", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "selection", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLasso", + "name": "MultiTaskLasso", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLasso", + "decorators": [], + "superclasses": ["MultiTaskElasticNet"], + "methods": ["scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLasso/__init__"], + "is_public": false, + "reexported_by": [], + "description": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nThe optimization objective for Lasso is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\nWhere::\n\n ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nThe optimization objective for Lasso is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\nWhere::\n\n ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Constant that multiplies the L1/L2 term. Defaults to 1.0.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=1000\n The maximum number of iterations.\n\ntol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\nwarm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\nrandom_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_tasks, n_features)\n Parameter vector (W in the cost function formula).\n Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\nintercept_ : ndarray of shape (n_tasks,)\n Independent term in decision function.\n\nn_iter_ : int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance.\n\ndual_gap_ : ndarray of shape (n_alphas,)\n The dual gaps at the end of the optimization for each alpha.\n\neps_ : float\n The tolerance scaled scaled by the variance of the target `y`.\n\nsparse_coef_ : sparse matrix of shape (n_features,) or (n_tasks, n_features)\n Sparse representation of the `coef_`.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.MultiTaskLasso(alpha=0.1)\n>>> clf.fit([[0, 1], [1, 2], [2, 4]], [[0, 0], [1, 1], [2, 3]])\nMultiTaskLasso(alpha=0.1)\n>>> print(clf.coef_)\n[[0. 0.60809415]\n[0. 0.94592424]]\n>>> print(clf.intercept_)\n[-0.41888636 -0.87382323]\n\nSee Also\n--------\nMultiTaskLasso : Multi-task L1/L2 Lasso with built-in cross-validation.\nLasso\nMultiTaskElasticNet\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nTo avoid unnecessary memory duplication the X and y arguments of the fit\nmethod should be directly passed as Fortran-contiguous numpy arrays.", + "code": "class MultiTaskLasso(MultiTaskElasticNet):\n \"\"\"Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\n The optimization objective for Lasso is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\n Where::\n\n ||W||_21 = \\\\sum_i \\\\sqrt{\\\\sum_j w_{ij}^2}\n\n i.e. the sum of norm of each row.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n alpha : float, default=1.0\n Constant that multiplies the L1/L2 term. Defaults to 1.0.\n\n fit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\n normalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n copy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\n max_iter : int, default=1000\n The maximum number of iterations.\n\n tol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\n warm_start : bool, default=False\n When set to ``True``, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\n random_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n selection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4\n\n Attributes\n ----------\n coef_ : ndarray of shape (n_tasks, n_features)\n Parameter vector (W in the cost function formula).\n Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\n intercept_ : ndarray of shape (n_tasks,)\n Independent term in decision function.\n\n n_iter_ : int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance.\n\n dual_gap_ : ndarray of shape (n_alphas,)\n The dual gaps at the end of the optimization for each alpha.\n\n eps_ : float\n The tolerance scaled scaled by the variance of the target `y`.\n\n sparse_coef_ : sparse matrix of shape (n_features,) or \\\n (n_tasks, n_features)\n Sparse representation of the `coef_`.\n\n Examples\n --------\n >>> from sklearn import linear_model\n >>> clf = linear_model.MultiTaskLasso(alpha=0.1)\n >>> clf.fit([[0, 1], [1, 2], [2, 4]], [[0, 0], [1, 1], [2, 3]])\n MultiTaskLasso(alpha=0.1)\n >>> print(clf.coef_)\n [[0. 0.60809415]\n [0. 0.94592424]]\n >>> print(clf.intercept_)\n [-0.41888636 -0.87382323]\n\n See Also\n --------\n MultiTaskLasso : Multi-task L1/L2 Lasso with built-in cross-validation.\n Lasso\n MultiTaskElasticNet\n\n Notes\n -----\n The algorithm used to fit the model is coordinate descent.\n\n To avoid unnecessary memory duplication the X and y arguments of the fit\n method should be directly passed as Fortran-contiguous numpy arrays.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, alpha=1.0, *, fit_intercept=True, normalize=False,\n copy_X=True, max_iter=1000, tol=1e-4, warm_start=False,\n random_state=None, selection='cyclic'):\n self.alpha = alpha\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.max_iter = max_iter\n self.copy_X = copy_X\n self.tol = tol\n self.warm_start = warm_start\n self.l1_ratio = 1.0\n self.random_state = random_state\n self.selection = selection", + "instance_attributes": [ + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "normalize", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "copy_X", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "warm_start", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "l1_ratio", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "selection", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV", + "name": "MultiTaskLassoCV", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLassoCV", + "decorators": [], + "superclasses": ["RegressorMixin", "LinearModelCV"], + "methods": [ + "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/__init__", + "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/_get_estimator", + "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/_is_multitask", + "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskLasso is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^Fro_2 + alpha * ||W||_21\n\nWhere::\n\n ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.15", + "docstring": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskLasso is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^Fro_2 + alpha * ||W||_21\n\nWhere::\n\n ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.15\n\nParameters\n----------\neps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n Number of alphas along the regularization path.\n\nalphas : array-like, default=None\n List of alphas where to compute the models.\n If not provided, set automatically.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nmax_iter : int, default=1000\n The maximum number of iterations.\n\ntol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\ncv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - int, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nverbose : bool or int, default=False\n Amount of verbosity.\n\nn_jobs : int, default=None\n Number of CPUs to use during the cross validation. Note that this is\n used only if multiple values for l1_ratio are given.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nrandom_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\nAttributes\n----------\nintercept_ : ndarray of shape (n_tasks,)\n Independent term in decision function.\n\ncoef_ : ndarray of shape (n_tasks, n_features)\n Parameter vector (W in the cost function formula).\n Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\nalpha_ : float\n The amount of penalization chosen by cross validation.\n\nmse_path_ : ndarray of shape (n_alphas, n_folds)\n Mean square error for the test set on each fold, varying alpha.\n\nalphas_ : ndarray of shape (n_alphas,)\n The grid of alphas used for fitting.\n\nn_iter_ : int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance for the optimal alpha.\n\ndual_gap_ : float\n The dual gap at the end of the optimization for the optimal alpha.\n\nExamples\n--------\n>>> from sklearn.linear_model import MultiTaskLassoCV\n>>> from sklearn.datasets import make_regression\n>>> from sklearn.metrics import r2_score\n>>> X, y = make_regression(n_targets=2, noise=4, random_state=0)\n>>> reg = MultiTaskLassoCV(cv=5, random_state=0).fit(X, y)\n>>> r2_score(y, reg.predict(X))\n0.9994...\n>>> reg.alpha_\n0.5713...\n>>> reg.predict(X[:1,])\narray([[153.7971..., 94.9015...]])\n\nSee Also\n--------\nMultiTaskElasticNet\nElasticNetCV\nMultiTaskElasticNetCV\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nTo avoid unnecessary memory duplication the X and y arguments of the fit\nmethod should be directly passed as Fortran-contiguous numpy arrays.", + "code": "class MultiTaskLassoCV(RegressorMixin, LinearModelCV):\n \"\"\"Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\n See glossary entry for :term:`cross-validation estimator`.\n\n The optimization objective for MultiTaskLasso is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^Fro_2 + alpha * ||W||_21\n\n Where::\n\n ||W||_21 = \\\\sum_i \\\\sqrt{\\\\sum_j w_{ij}^2}\n\n i.e. the sum of norm of each row.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.15\n\n Parameters\n ----------\n eps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``.\n\n n_alphas : int, default=100\n Number of alphas along the regularization path.\n\n alphas : array-like, default=None\n List of alphas where to compute the models.\n If not provided, set automatically.\n\n fit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\n normalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n max_iter : int, default=1000\n The maximum number of iterations.\n\n tol : float, default=1e-4\n The tolerance for the optimization: if the updates are\n smaller than ``tol``, the optimization code checks the\n dual gap for optimality and continues until it is smaller\n than ``tol``.\n\n copy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\n cv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - int, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\n verbose : bool or int, default=False\n Amount of verbosity.\n\n n_jobs : int, default=None\n Number of CPUs to use during the cross validation. Note that this is\n used only if multiple values for l1_ratio are given.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n random_state : int, RandomState instance, default=None\n The seed of the pseudo random number generator that selects a random\n feature to update. Used when ``selection`` == 'random'.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n selection : {'cyclic', 'random'}, default='cyclic'\n If set to 'random', a random coefficient is updated every iteration\n rather than looping over features sequentially by default. This\n (setting to 'random') often leads to significantly faster convergence\n especially when tol is higher than 1e-4.\n\n Attributes\n ----------\n intercept_ : ndarray of shape (n_tasks,)\n Independent term in decision function.\n\n coef_ : ndarray of shape (n_tasks, n_features)\n Parameter vector (W in the cost function formula).\n Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\n alpha_ : float\n The amount of penalization chosen by cross validation.\n\n mse_path_ : ndarray of shape (n_alphas, n_folds)\n Mean square error for the test set on each fold, varying alpha.\n\n alphas_ : ndarray of shape (n_alphas,)\n The grid of alphas used for fitting.\n\n n_iter_ : int\n Number of iterations run by the coordinate descent solver to reach\n the specified tolerance for the optimal alpha.\n\n dual_gap_ : float\n The dual gap at the end of the optimization for the optimal alpha.\n\n Examples\n --------\n >>> from sklearn.linear_model import MultiTaskLassoCV\n >>> from sklearn.datasets import make_regression\n >>> from sklearn.metrics import r2_score\n >>> X, y = make_regression(n_targets=2, noise=4, random_state=0)\n >>> reg = MultiTaskLassoCV(cv=5, random_state=0).fit(X, y)\n >>> r2_score(y, reg.predict(X))\n 0.9994...\n >>> reg.alpha_\n 0.5713...\n >>> reg.predict(X[:1,])\n array([[153.7971..., 94.9015...]])\n\n See Also\n --------\n MultiTaskElasticNet\n ElasticNetCV\n MultiTaskElasticNetCV\n\n Notes\n -----\n The algorithm used to fit the model is coordinate descent.\n\n To avoid unnecessary memory duplication the X and y arguments of the fit\n method should be directly passed as Fortran-contiguous numpy arrays.\n \"\"\"\n path = staticmethod(lasso_path)\n\n @_deprecate_positional_args\n def __init__(self, *, eps=1e-3, n_alphas=100, alphas=None,\n fit_intercept=True,\n normalize=False, max_iter=1000, tol=1e-4, copy_X=True,\n cv=None, verbose=False, n_jobs=None, random_state=None,\n selection='cyclic'):\n super().__init__(\n eps=eps, n_alphas=n_alphas, alphas=alphas,\n fit_intercept=fit_intercept, normalize=normalize,\n max_iter=max_iter, tol=tol, copy_X=copy_X,\n cv=cv, verbose=verbose, n_jobs=n_jobs, random_state=random_state,\n selection=selection)\n\n def _get_estimator(self):\n return MultiTaskLasso()\n\n def _is_multitask(self):\n return True\n\n def _more_tags(self):\n return {'multioutput_only': True}", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GammaRegressor", + "name": "GammaRegressor", + "qname": "sklearn.linear_model._glm.glm.GammaRegressor", + "decorators": [], + "superclasses": ["GeneralizedLinearRegressor"], + "methods": [ + "scikit-learn/sklearn.linear_model._glm.glm/GammaRegressor/__init__", + "scikit-learn/sklearn.linear_model._glm.glm/GammaRegressor/family@getter", + "scikit-learn/sklearn.linear_model._glm.glm/GammaRegressor/family@setter" + ], + "is_public": false, + "reexported_by": [], + "description": "Generalized Linear Model with a Gamma distribution.\n\nThis regressor uses the 'log' link function.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.23", + "docstring": "Generalized Linear Model with a Gamma distribution.\n\nThis regressor uses the 'log' link function.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.23\n\nParameters\n----------\nalpha : float, default=1\n Constant that multiplies the penalty term and thus determines the\n regularization strength. ``alpha = 0`` is equivalent to unpenalized\n GLMs. In this case, the design matrix `X` must have full column rank\n (no collinearities).\n\nfit_intercept : bool, default=True\n Specifies if a constant (a.k.a. bias or intercept) should be\n added to the linear predictor (X @ coef + intercept).\n\nmax_iter : int, default=100\n The maximal number of iterations for the solver.\n\ntol : float, default=1e-4\n Stopping criterion. For the lbfgs solver,\n the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n where ``g_j`` is the j-th component of the gradient (derivative) of\n the objective function.\n\nwarm_start : bool, default=False\n If set to ``True``, reuse the solution of the previous call to ``fit``\n as initialization for ``coef_`` and ``intercept_`` .\n\nverbose : int, default=0\n For the lbfgs solver set verbose to any positive number for verbosity.\n\nAttributes\n----------\ncoef_ : array of shape (n_features,)\n Estimated coefficients for the linear predictor (`X * coef_ +\n intercept_`) in the GLM.\n\nintercept_ : float\n Intercept (a.k.a. bias) added to linear predictor.\n\nn_iter_ : int\n Actual number of iterations used in the solver.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.GammaRegressor()\n>>> X = [[1, 2], [2, 3], [3, 4], [4, 3]]\n>>> y = [19, 26, 33, 30]\n>>> clf.fit(X, y)\nGammaRegressor()\n>>> clf.score(X, y)\n0.773...\n>>> clf.coef_\narray([0.072..., 0.066...])\n>>> clf.intercept_\n2.896...\n>>> clf.predict([[1, 0], [2, 8]])\narray([19.483..., 35.795...])", + "code": "class GammaRegressor(GeneralizedLinearRegressor):\n \"\"\"Generalized Linear Model with a Gamma distribution.\n\n This regressor uses the 'log' link function.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.23\n\n Parameters\n ----------\n alpha : float, default=1\n Constant that multiplies the penalty term and thus determines the\n regularization strength. ``alpha = 0`` is equivalent to unpenalized\n GLMs. In this case, the design matrix `X` must have full column rank\n (no collinearities).\n\n fit_intercept : bool, default=True\n Specifies if a constant (a.k.a. bias or intercept) should be\n added to the linear predictor (X @ coef + intercept).\n\n max_iter : int, default=100\n The maximal number of iterations for the solver.\n\n tol : float, default=1e-4\n Stopping criterion. For the lbfgs solver,\n the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n where ``g_j`` is the j-th component of the gradient (derivative) of\n the objective function.\n\n warm_start : bool, default=False\n If set to ``True``, reuse the solution of the previous call to ``fit``\n as initialization for ``coef_`` and ``intercept_`` .\n\n verbose : int, default=0\n For the lbfgs solver set verbose to any positive number for verbosity.\n\n Attributes\n ----------\n coef_ : array of shape (n_features,)\n Estimated coefficients for the linear predictor (`X * coef_ +\n intercept_`) in the GLM.\n\n intercept_ : float\n Intercept (a.k.a. bias) added to linear predictor.\n\n n_iter_ : int\n Actual number of iterations used in the solver.\n\n Examples\n --------\n >>> from sklearn import linear_model\n >>> clf = linear_model.GammaRegressor()\n >>> X = [[1, 2], [2, 3], [3, 4], [4, 3]]\n >>> y = [19, 26, 33, 30]\n >>> clf.fit(X, y)\n GammaRegressor()\n >>> clf.score(X, y)\n 0.773...\n >>> clf.coef_\n array([0.072..., 0.066...])\n >>> clf.intercept_\n 2.896...\n >>> clf.predict([[1, 0], [2, 8]])\n array([19.483..., 35.795...])\n \"\"\"\n def __init__(self, *, alpha=1.0, fit_intercept=True, max_iter=100,\n tol=1e-4, warm_start=False, verbose=0):\n\n super().__init__(alpha=alpha, fit_intercept=fit_intercept,\n family=\"gamma\", link='log', max_iter=max_iter,\n tol=tol, warm_start=warm_start, verbose=verbose)\n\n @property\n def family(self):\n # Make this attribute read-only to avoid mis-uses e.g. in GridSearch.\n return \"gamma\"\n\n @family.setter\n def family(self, value):\n if value != \"gamma\":\n raise ValueError(\"GammaRegressor.family must be 'gamma'!\")", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor", + "name": "GeneralizedLinearRegressor", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor", + "decorators": [], + "superclasses": ["RegressorMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/__init__", + "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/fit", + "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/_linear_predictor", + "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/predict", + "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/score", + "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Regression via a penalized Generalized Linear Model (GLM).\n\nGLMs based on a reproductive Exponential Dispersion Model (EDM) aim at\nfitting and predicting the mean of the target y as y_pred=h(X*w).\nTherefore, the fit minimizes the following objective function with L2\npriors as regularizer::\n\n 1/(2*sum(s)) * deviance(y, h(X*w); s)\n + 1/2 * alpha * |w|_2\n\nwith inverse link function h and s=sample_weight.\nThe parameter ``alpha`` corresponds to the lambda parameter in glmnet.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.23", + "docstring": "Regression via a penalized Generalized Linear Model (GLM).\n\nGLMs based on a reproductive Exponential Dispersion Model (EDM) aim at\nfitting and predicting the mean of the target y as y_pred=h(X*w).\nTherefore, the fit minimizes the following objective function with L2\npriors as regularizer::\n\n 1/(2*sum(s)) * deviance(y, h(X*w); s)\n + 1/2 * alpha * |w|_2\n\nwith inverse link function h and s=sample_weight.\nThe parameter ``alpha`` corresponds to the lambda parameter in glmnet.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.23\n\nParameters\n----------\nalpha : float, default=1\n Constant that multiplies the penalty term and thus determines the\n regularization strength. ``alpha = 0`` is equivalent to unpenalized\n GLMs. In this case, the design matrix `X` must have full column rank\n (no collinearities).\n\nfit_intercept : bool, default=True\n Specifies if a constant (a.k.a. bias or intercept) should be\n added to the linear predictor (X @ coef + intercept).\n\nfamily : {'normal', 'poisson', 'gamma', 'inverse-gaussian'} or an ExponentialDispersionModel instance, default='normal'\n The distributional assumption of the GLM, i.e. which distribution from\n the EDM, specifies the loss function to be minimized.\n\nlink : {'auto', 'identity', 'log'} or an instance of class BaseLink, default='auto'\n The link function of the GLM, i.e. mapping from linear predictor\n `X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets\n the link depending on the chosen family as follows:\n\n - 'identity' for Normal distribution\n - 'log' for Poisson, Gamma and Inverse Gaussian distributions\n\nsolver : 'lbfgs', default='lbfgs'\n Algorithm to use in the optimization problem:\n\n 'lbfgs'\n Calls scipy's L-BFGS-B optimizer.\n\nmax_iter : int, default=100\n The maximal number of iterations for the solver.\n\ntol : float, default=1e-4\n Stopping criterion. For the lbfgs solver,\n the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n where ``g_j`` is the j-th component of the gradient (derivative) of\n the objective function.\n\nwarm_start : bool, default=False\n If set to ``True``, reuse the solution of the previous call to ``fit``\n as initialization for ``coef_`` and ``intercept_``.\n\nverbose : int, default=0\n For the lbfgs solver set verbose to any positive number for verbosity.\n\nAttributes\n----------\ncoef_ : array of shape (n_features,)\n Estimated coefficients for the linear predictor (`X @ coef_ +\n intercept_`) in the GLM.\n\nintercept_ : float\n Intercept (a.k.a. bias) added to linear predictor.\n\nn_iter_ : int\n Actual number of iterations used in the solver.", + "code": "class GeneralizedLinearRegressor(RegressorMixin, BaseEstimator):\n \"\"\"Regression via a penalized Generalized Linear Model (GLM).\n\n GLMs based on a reproductive Exponential Dispersion Model (EDM) aim at\n fitting and predicting the mean of the target y as y_pred=h(X*w).\n Therefore, the fit minimizes the following objective function with L2\n priors as regularizer::\n\n 1/(2*sum(s)) * deviance(y, h(X*w); s)\n + 1/2 * alpha * |w|_2\n\n with inverse link function h and s=sample_weight.\n The parameter ``alpha`` corresponds to the lambda parameter in glmnet.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.23\n\n Parameters\n ----------\n alpha : float, default=1\n Constant that multiplies the penalty term and thus determines the\n regularization strength. ``alpha = 0`` is equivalent to unpenalized\n GLMs. In this case, the design matrix `X` must have full column rank\n (no collinearities).\n\n fit_intercept : bool, default=True\n Specifies if a constant (a.k.a. bias or intercept) should be\n added to the linear predictor (X @ coef + intercept).\n\n family : {'normal', 'poisson', 'gamma', 'inverse-gaussian'} \\\n or an ExponentialDispersionModel instance, default='normal'\n The distributional assumption of the GLM, i.e. which distribution from\n the EDM, specifies the loss function to be minimized.\n\n link : {'auto', 'identity', 'log'} or an instance of class BaseLink, \\\n default='auto'\n The link function of the GLM, i.e. mapping from linear predictor\n `X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets\n the link depending on the chosen family as follows:\n\n - 'identity' for Normal distribution\n - 'log' for Poisson, Gamma and Inverse Gaussian distributions\n\n solver : 'lbfgs', default='lbfgs'\n Algorithm to use in the optimization problem:\n\n 'lbfgs'\n Calls scipy's L-BFGS-B optimizer.\n\n max_iter : int, default=100\n The maximal number of iterations for the solver.\n\n tol : float, default=1e-4\n Stopping criterion. For the lbfgs solver,\n the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n where ``g_j`` is the j-th component of the gradient (derivative) of\n the objective function.\n\n warm_start : bool, default=False\n If set to ``True``, reuse the solution of the previous call to ``fit``\n as initialization for ``coef_`` and ``intercept_``.\n\n verbose : int, default=0\n For the lbfgs solver set verbose to any positive number for verbosity.\n\n Attributes\n ----------\n coef_ : array of shape (n_features,)\n Estimated coefficients for the linear predictor (`X @ coef_ +\n intercept_`) in the GLM.\n\n intercept_ : float\n Intercept (a.k.a. bias) added to linear predictor.\n\n n_iter_ : int\n Actual number of iterations used in the solver.\n \"\"\"\n def __init__(self, *, alpha=1.0,\n fit_intercept=True, family='normal', link='auto',\n solver='lbfgs', max_iter=100, tol=1e-4, warm_start=False,\n verbose=0):\n self.alpha = alpha\n self.fit_intercept = fit_intercept\n self.family = family\n self.link = link\n self.solver = solver\n self.max_iter = max_iter\n self.tol = tol\n self.warm_start = warm_start\n self.verbose = verbose\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit a Generalized Linear Model.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n if isinstance(self.family, ExponentialDispersionModel):\n self._family_instance = self.family\n elif self.family in EDM_DISTRIBUTIONS:\n self._family_instance = EDM_DISTRIBUTIONS[self.family]()\n else:\n raise ValueError(\n \"The family must be an instance of class\"\n \" ExponentialDispersionModel or an element of\"\n \" ['normal', 'poisson', 'gamma', 'inverse-gaussian']\"\n \"; got (family={0})\".format(self.family))\n\n # Guarantee that self._link_instance is set to an instance of\n # class BaseLink\n if isinstance(self.link, BaseLink):\n self._link_instance = self.link\n else:\n if self.link == 'auto':\n if isinstance(self._family_instance, TweedieDistribution):\n if self._family_instance.power <= 0:\n self._link_instance = IdentityLink()\n if self._family_instance.power >= 1:\n self._link_instance = LogLink()\n else:\n raise ValueError(\"No default link known for the \"\n \"specified distribution family. Please \"\n \"set link manually, i.e. not to 'auto'; \"\n \"got (link='auto', family={})\"\n .format(self.family))\n elif self.link == 'identity':\n self._link_instance = IdentityLink()\n elif self.link == 'log':\n self._link_instance = LogLink()\n else:\n raise ValueError(\n \"The link must be an instance of class Link or \"\n \"an element of ['auto', 'identity', 'log']; \"\n \"got (link={0})\".format(self.link))\n\n if not isinstance(self.alpha, numbers.Number) or self.alpha < 0:\n raise ValueError(\"Penalty term must be a non-negative number;\"\n \" got (alpha={0})\".format(self.alpha))\n if not isinstance(self.fit_intercept, bool):\n raise ValueError(\"The argument fit_intercept must be bool;\"\n \" got {0}\".format(self.fit_intercept))\n if self.solver not in ['lbfgs']:\n raise ValueError(\"GeneralizedLinearRegressor supports only solvers\"\n \"'lbfgs'; got {0}\".format(self.solver))\n solver = self.solver\n if (not isinstance(self.max_iter, numbers.Integral)\n or self.max_iter <= 0):\n raise ValueError(\"Maximum number of iteration must be a positive \"\n \"integer;\"\n \" got (max_iter={0!r})\".format(self.max_iter))\n if not isinstance(self.tol, numbers.Number) or self.tol <= 0:\n raise ValueError(\"Tolerance for stopping criteria must be \"\n \"positive; got (tol={0!r})\".format(self.tol))\n if not isinstance(self.warm_start, bool):\n raise ValueError(\"The argument warm_start must be bool;\"\n \" got {0}\".format(self.warm_start))\n\n family = self._family_instance\n link = self._link_instance\n\n X, y = check_X_y(X, y, accept_sparse=['csc', 'csr'],\n dtype=[np.float64, np.float32],\n y_numeric=True, multi_output=False)\n\n weights = _check_sample_weight(sample_weight, X)\n\n _, n_features = X.shape\n\n if not np.all(family.in_y_range(y)):\n raise ValueError(\"Some value(s) of y are out of the valid \"\n \"range for family {0}\"\n .format(family.__class__.__name__))\n # TODO: if alpha=0 check that X is not rank deficient\n\n # rescaling of sample_weight\n #\n # IMPORTANT NOTE: Since we want to minimize\n # 1/(2*sum(sample_weight)) * deviance + L2,\n # deviance = sum(sample_weight * unit_deviance),\n # we rescale weights such that sum(weights) = 1 and this becomes\n # 1/2*deviance + L2 with deviance=sum(weights * unit_deviance)\n weights = weights / weights.sum()\n\n if self.warm_start and hasattr(self, 'coef_'):\n if self.fit_intercept:\n coef = np.concatenate((np.array([self.intercept_]),\n self.coef_))\n else:\n coef = self.coef_\n else:\n if self.fit_intercept:\n coef = np.zeros(n_features+1)\n coef[0] = link(np.average(y, weights=weights))\n else:\n coef = np.zeros(n_features)\n\n # algorithms for optimization\n\n if solver == 'lbfgs':\n def func(coef, X, y, weights, alpha, family, link):\n y_pred, devp = _y_pred_deviance_derivative(\n coef, X, y, weights, family, link\n )\n dev = family.deviance(y, y_pred, weights)\n # offset if coef[0] is intercept\n offset = 1 if self.fit_intercept else 0\n coef_scaled = alpha * coef[offset:]\n obj = 0.5 * dev + 0.5 * (coef[offset:] @ coef_scaled)\n objp = 0.5 * devp\n objp[offset:] += coef_scaled\n return obj, objp\n\n args = (X, y, weights, self.alpha, family, link)\n\n opt_res = scipy.optimize.minimize(\n func, coef, method=\"L-BFGS-B\", jac=True,\n options={\n \"maxiter\": self.max_iter,\n \"iprint\": (self.verbose > 0) - 1,\n \"gtol\": self.tol,\n \"ftol\": 1e3*np.finfo(float).eps,\n },\n args=args)\n self.n_iter_ = _check_optimize_result(\"lbfgs\", opt_res)\n coef = opt_res.x\n\n if self.fit_intercept:\n self.intercept_ = coef[0]\n self.coef_ = coef[1:]\n else:\n # set intercept to zero as the other linear models do\n self.intercept_ = 0.\n self.coef_ = coef\n\n return self\n\n def _linear_predictor(self, X):\n \"\"\"Compute the linear_predictor = `X @ coef_ + intercept_`.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\n Returns\n -------\n y_pred : array of shape (n_samples,)\n Returns predicted values of linear predictor.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],\n dtype=[np.float64, np.float32], ensure_2d=True,\n allow_nd=False)\n return X @ self.coef_ + self.intercept_\n\n def predict(self, X):\n \"\"\"Predict using GLM with feature matrix X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\n Returns\n -------\n y_pred : array of shape (n_samples,)\n Returns predicted values.\n \"\"\"\n # check_array is done in _linear_predictor\n eta = self._linear_predictor(X)\n y_pred = self._link_instance.inverse(eta)\n return y_pred\n\n def score(self, X, y, sample_weight=None):\n \"\"\"Compute D^2, the percentage of deviance explained.\n\n D^2 is a generalization of the coefficient of determination R^2.\n R^2 uses squared error and D^2 deviance. Note that those two are equal\n for ``family='normal'``.\n\n D^2 is defined as\n :math:`D^2 = 1-\\\\frac{D(y_{true},y_{pred})}{D_{null}}`,\n :math:`D_{null}` is the null deviance, i.e. the deviance of a model\n with intercept alone, which corresponds to :math:`y_{pred} = \\\\bar{y}`.\n The mean :math:`\\\\bar{y}` is averaged by sample_weight.\n Best possible score is 1.0 and it can be negative (because the model\n can be arbitrarily worse).\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Test samples.\n\n y : array-like of shape (n_samples,)\n True values of target.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n score : float\n D^2 of self.predict(X) w.r.t. y.\n \"\"\"\n # Note, default score defined in RegressorMixin is R^2 score.\n # TODO: make D^2 a score function in module metrics (and thereby get\n # input validation and so on)\n weights = _check_sample_weight(sample_weight, X)\n y_pred = self.predict(X)\n dev = self._family_instance.deviance(y, y_pred, weights=weights)\n y_mean = np.average(y, weights=weights)\n dev_null = self._family_instance.deviance(y, y_mean, weights=weights)\n return 1 - dev / dev_null\n\n def _more_tags(self):\n # create the _family_instance if fit wasn't called yet.\n if hasattr(self, '_family_instance'):\n _family_instance = self._family_instance\n elif isinstance(self.family, ExponentialDispersionModel):\n _family_instance = self.family\n elif self.family in EDM_DISTRIBUTIONS:\n _family_instance = EDM_DISTRIBUTIONS[self.family]()\n else:\n raise ValueError\n return {\"requires_positive_y\": not _family_instance.in_y_range(-1.0)}", + "instance_attributes": [ + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "family", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "link", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "solver", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "warm_start", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "_family_instance", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "_link_instance", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/PoissonRegressor", + "name": "PoissonRegressor", + "qname": "sklearn.linear_model._glm.glm.PoissonRegressor", + "decorators": [], + "superclasses": ["GeneralizedLinearRegressor"], + "methods": [ + "scikit-learn/sklearn.linear_model._glm.glm/PoissonRegressor/__init__", + "scikit-learn/sklearn.linear_model._glm.glm/PoissonRegressor/family@getter", + "scikit-learn/sklearn.linear_model._glm.glm/PoissonRegressor/family@setter" + ], + "is_public": false, + "reexported_by": [], + "description": "Generalized Linear Model with a Poisson distribution.\n\nThis regressor uses the 'log' link function.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.23", + "docstring": "Generalized Linear Model with a Poisson distribution.\n\nThis regressor uses the 'log' link function.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.23\n\nParameters\n----------\nalpha : float, default=1\n Constant that multiplies the penalty term and thus determines the\n regularization strength. ``alpha = 0`` is equivalent to unpenalized\n GLMs. In this case, the design matrix `X` must have full column rank\n (no collinearities).\n\nfit_intercept : bool, default=True\n Specifies if a constant (a.k.a. bias or intercept) should be\n added to the linear predictor (X @ coef + intercept).\n\nmax_iter : int, default=100\n The maximal number of iterations for the solver.\n\ntol : float, default=1e-4\n Stopping criterion. For the lbfgs solver,\n the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n where ``g_j`` is the j-th component of the gradient (derivative) of\n the objective function.\n\nwarm_start : bool, default=False\n If set to ``True``, reuse the solution of the previous call to ``fit``\n as initialization for ``coef_`` and ``intercept_`` .\n\nverbose : int, default=0\n For the lbfgs solver set verbose to any positive number for verbosity.\n\nAttributes\n----------\ncoef_ : array of shape (n_features,)\n Estimated coefficients for the linear predictor (`X @ coef_ +\n intercept_`) in the GLM.\n\nintercept_ : float\n Intercept (a.k.a. bias) added to linear predictor.\n\nn_iter_ : int\n Actual number of iterations used in the solver.\n\nExamples\n----------\n>>> from sklearn import linear_model\n>>> clf = linear_model.PoissonRegressor()\n>>> X = [[1, 2], [2, 3], [3, 4], [4, 3]]\n>>> y = [12, 17, 22, 21]\n>>> clf.fit(X, y)\nPoissonRegressor()\n>>> clf.score(X, y)\n0.990...\n>>> clf.coef_\narray([0.121..., 0.158...])\n>>> clf.intercept_\n2.088...\n>>> clf.predict([[1, 1], [3, 4]])\narray([10.676..., 21.875...])", + "code": "class PoissonRegressor(GeneralizedLinearRegressor):\n \"\"\"Generalized Linear Model with a Poisson distribution.\n\n This regressor uses the 'log' link function.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.23\n\n Parameters\n ----------\n alpha : float, default=1\n Constant that multiplies the penalty term and thus determines the\n regularization strength. ``alpha = 0`` is equivalent to unpenalized\n GLMs. In this case, the design matrix `X` must have full column rank\n (no collinearities).\n\n fit_intercept : bool, default=True\n Specifies if a constant (a.k.a. bias or intercept) should be\n added to the linear predictor (X @ coef + intercept).\n\n max_iter : int, default=100\n The maximal number of iterations for the solver.\n\n tol : float, default=1e-4\n Stopping criterion. For the lbfgs solver,\n the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n where ``g_j`` is the j-th component of the gradient (derivative) of\n the objective function.\n\n warm_start : bool, default=False\n If set to ``True``, reuse the solution of the previous call to ``fit``\n as initialization for ``coef_`` and ``intercept_`` .\n\n verbose : int, default=0\n For the lbfgs solver set verbose to any positive number for verbosity.\n\n Attributes\n ----------\n coef_ : array of shape (n_features,)\n Estimated coefficients for the linear predictor (`X @ coef_ +\n intercept_`) in the GLM.\n\n intercept_ : float\n Intercept (a.k.a. bias) added to linear predictor.\n\n n_iter_ : int\n Actual number of iterations used in the solver.\n\n Examples\n ----------\n >>> from sklearn import linear_model\n >>> clf = linear_model.PoissonRegressor()\n >>> X = [[1, 2], [2, 3], [3, 4], [4, 3]]\n >>> y = [12, 17, 22, 21]\n >>> clf.fit(X, y)\n PoissonRegressor()\n >>> clf.score(X, y)\n 0.990...\n >>> clf.coef_\n array([0.121..., 0.158...])\n >>> clf.intercept_\n 2.088...\n >>> clf.predict([[1, 1], [3, 4]])\n array([10.676..., 21.875...])\n \"\"\"\n def __init__(self, *, alpha=1.0, fit_intercept=True, max_iter=100,\n tol=1e-4, warm_start=False, verbose=0):\n\n super().__init__(alpha=alpha, fit_intercept=fit_intercept,\n family=\"poisson\", link='log', max_iter=max_iter,\n tol=tol, warm_start=warm_start, verbose=verbose)\n\n @property\n def family(self):\n # Make this attribute read-only to avoid mis-uses e.g. in GridSearch.\n return \"poisson\"\n\n @family.setter\n def family(self, value):\n if value != \"poisson\":\n raise ValueError(\"PoissonRegressor.family must be 'poisson'!\")", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/TweedieRegressor", + "name": "TweedieRegressor", + "qname": "sklearn.linear_model._glm.glm.TweedieRegressor", + "decorators": [], + "superclasses": ["GeneralizedLinearRegressor"], + "methods": [ + "scikit-learn/sklearn.linear_model._glm.glm/TweedieRegressor/__init__", + "scikit-learn/sklearn.linear_model._glm.glm/TweedieRegressor/family@getter", + "scikit-learn/sklearn.linear_model._glm.glm/TweedieRegressor/family@setter" + ], + "is_public": false, + "reexported_by": [], + "description": "Generalized Linear Model with a Tweedie distribution.\n\nThis estimator can be used to model different GLMs depending on the\n``power`` parameter, which determines the underlying distribution.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.23", + "docstring": "Generalized Linear Model with a Tweedie distribution.\n\nThis estimator can be used to model different GLMs depending on the\n``power`` parameter, which determines the underlying distribution.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.23\n\nParameters\n----------\npower : float, default=0\n The power determines the underlying target distribution according\n to the following table:\n\n +-------+------------------------+\n | Power | Distribution |\n +=======+========================+\n | 0 | Normal |\n +-------+------------------------+\n | 1 | Poisson |\n +-------+------------------------+\n | (1,2) | Compound Poisson Gamma |\n +-------+------------------------+\n | 2 | Gamma |\n +-------+------------------------+\n | 3 | Inverse Gaussian |\n +-------+------------------------+\n\n For ``0 < power < 1``, no distribution exists.\n\nalpha : float, default=1\n Constant that multiplies the penalty term and thus determines the\n regularization strength. ``alpha = 0`` is equivalent to unpenalized\n GLMs. In this case, the design matrix `X` must have full column rank\n (no collinearities).\n\nfit_intercept : bool, default=True\n Specifies if a constant (a.k.a. bias or intercept) should be\n added to the linear predictor (X @ coef + intercept).\n\nlink : {'auto', 'identity', 'log'}, default='auto'\n The link function of the GLM, i.e. mapping from linear predictor\n `X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets\n the link depending on the chosen family as follows:\n\n - 'identity' for Normal distribution\n - 'log' for Poisson, Gamma and Inverse Gaussian distributions\n\nmax_iter : int, default=100\n The maximal number of iterations for the solver.\n\ntol : float, default=1e-4\n Stopping criterion. For the lbfgs solver,\n the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n where ``g_j`` is the j-th component of the gradient (derivative) of\n the objective function.\n\nwarm_start : bool, default=False\n If set to ``True``, reuse the solution of the previous call to ``fit``\n as initialization for ``coef_`` and ``intercept_`` .\n\nverbose : int, default=0\n For the lbfgs solver set verbose to any positive number for verbosity.\n\nAttributes\n----------\ncoef_ : array of shape (n_features,)\n Estimated coefficients for the linear predictor (`X @ coef_ +\n intercept_`) in the GLM.\n\nintercept_ : float\n Intercept (a.k.a. bias) added to linear predictor.\n\nn_iter_ : int\n Actual number of iterations used in the solver.\n\nExamples\n----------\n>>> from sklearn import linear_model\n>>> clf = linear_model.TweedieRegressor()\n>>> X = [[1, 2], [2, 3], [3, 4], [4, 3]]\n>>> y = [2, 3.5, 5, 5.5]\n>>> clf.fit(X, y)\nTweedieRegressor()\n>>> clf.score(X, y)\n0.839...\n>>> clf.coef_\narray([0.599..., 0.299...])\n>>> clf.intercept_\n1.600...\n>>> clf.predict([[1, 1], [3, 4]])\narray([2.500..., 4.599...])", + "code": "class TweedieRegressor(GeneralizedLinearRegressor):\n \"\"\"Generalized Linear Model with a Tweedie distribution.\n\n This estimator can be used to model different GLMs depending on the\n ``power`` parameter, which determines the underlying distribution.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.23\n\n Parameters\n ----------\n power : float, default=0\n The power determines the underlying target distribution according\n to the following table:\n\n +-------+------------------------+\n | Power | Distribution |\n +=======+========================+\n | 0 | Normal |\n +-------+------------------------+\n | 1 | Poisson |\n +-------+------------------------+\n | (1,2) | Compound Poisson Gamma |\n +-------+------------------------+\n | 2 | Gamma |\n +-------+------------------------+\n | 3 | Inverse Gaussian |\n +-------+------------------------+\n\n For ``0 < power < 1``, no distribution exists.\n\n alpha : float, default=1\n Constant that multiplies the penalty term and thus determines the\n regularization strength. ``alpha = 0`` is equivalent to unpenalized\n GLMs. In this case, the design matrix `X` must have full column rank\n (no collinearities).\n\n fit_intercept : bool, default=True\n Specifies if a constant (a.k.a. bias or intercept) should be\n added to the linear predictor (X @ coef + intercept).\n\n link : {'auto', 'identity', 'log'}, default='auto'\n The link function of the GLM, i.e. mapping from linear predictor\n `X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets\n the link depending on the chosen family as follows:\n\n - 'identity' for Normal distribution\n - 'log' for Poisson, Gamma and Inverse Gaussian distributions\n\n max_iter : int, default=100\n The maximal number of iterations for the solver.\n\n tol : float, default=1e-4\n Stopping criterion. For the lbfgs solver,\n the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n where ``g_j`` is the j-th component of the gradient (derivative) of\n the objective function.\n\n warm_start : bool, default=False\n If set to ``True``, reuse the solution of the previous call to ``fit``\n as initialization for ``coef_`` and ``intercept_`` .\n\n verbose : int, default=0\n For the lbfgs solver set verbose to any positive number for verbosity.\n\n Attributes\n ----------\n coef_ : array of shape (n_features,)\n Estimated coefficients for the linear predictor (`X @ coef_ +\n intercept_`) in the GLM.\n\n intercept_ : float\n Intercept (a.k.a. bias) added to linear predictor.\n\n n_iter_ : int\n Actual number of iterations used in the solver.\n\n Examples\n ----------\n >>> from sklearn import linear_model\n >>> clf = linear_model.TweedieRegressor()\n >>> X = [[1, 2], [2, 3], [3, 4], [4, 3]]\n >>> y = [2, 3.5, 5, 5.5]\n >>> clf.fit(X, y)\n TweedieRegressor()\n >>> clf.score(X, y)\n 0.839...\n >>> clf.coef_\n array([0.599..., 0.299...])\n >>> clf.intercept_\n 1.600...\n >>> clf.predict([[1, 1], [3, 4]])\n array([2.500..., 4.599...])\n \"\"\"\n def __init__(self, *, power=0.0, alpha=1.0, fit_intercept=True,\n link='auto', max_iter=100, tol=1e-4,\n warm_start=False, verbose=0):\n\n super().__init__(alpha=alpha, fit_intercept=fit_intercept,\n family=TweedieDistribution(power=power), link=link,\n max_iter=max_iter, tol=tol,\n warm_start=warm_start, verbose=verbose)\n\n @property\n def family(self):\n # We use a property with a setter to make sure that the family is\n # always a Tweedie distribution, and that self.power and\n # self.family.power are identical by construction.\n dist = TweedieDistribution(power=self.power)\n # TODO: make the returned object immutable\n return dist\n\n @family.setter\n def family(self, value):\n if isinstance(value, TweedieDistribution):\n self.power = value.power\n else:\n raise TypeError(\"TweedieRegressor.family must be of type \"\n \"TweedieDistribution!\")", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/BaseLink", + "name": "BaseLink", + "qname": "sklearn.linear_model._glm.link.BaseLink", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.linear_model._glm.link/BaseLink/__call__", + "scikit-learn/sklearn.linear_model._glm.link/BaseLink/derivative", + "scikit-learn/sklearn.linear_model._glm.link/BaseLink/inverse", + "scikit-learn/sklearn.linear_model._glm.link/BaseLink/inverse_derivative" + ], + "is_public": false, + "reexported_by": [], + "description": "Abstract base class for Link functions.", + "docstring": "Abstract base class for Link functions.", + "code": "class BaseLink(metaclass=ABCMeta):\n \"\"\"Abstract base class for Link functions.\"\"\"\n\n @abstractmethod\n def __call__(self, y_pred):\n \"\"\"Compute the link function g(y_pred).\n\n The link function links the mean y_pred=E[Y] to the so called linear\n predictor (X*w), i.e. g(y_pred) = linear predictor.\n\n Parameters\n ----------\n y_pred : array of shape (n_samples,)\n Usually the (predicted) mean.\n \"\"\"\n\n @abstractmethod\n def derivative(self, y_pred):\n \"\"\"Compute the derivative of the link g'(y_pred).\n\n Parameters\n ----------\n y_pred : array of shape (n_samples,)\n Usually the (predicted) mean.\n \"\"\"\n\n @abstractmethod\n def inverse(self, lin_pred):\n \"\"\"Compute the inverse link function h(lin_pred).\n\n Gives the inverse relationship between linear predictor and the mean\n y_pred=E[Y], i.e. h(linear predictor) = y_pred.\n\n Parameters\n ----------\n lin_pred : array of shape (n_samples,)\n Usually the (fitted) linear predictor.\n \"\"\"\n\n @abstractmethod\n def inverse_derivative(self, lin_pred):\n \"\"\"Compute the derivative of the inverse link function h'(lin_pred).\n\n Parameters\n ----------\n lin_pred : array of shape (n_samples,)\n Usually the (fitted) linear predictor.\n \"\"\"", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/IdentityLink", + "name": "IdentityLink", + "qname": "sklearn.linear_model._glm.link.IdentityLink", + "decorators": [], + "superclasses": ["BaseLink"], + "methods": [ + "scikit-learn/sklearn.linear_model._glm.link/IdentityLink/__call__", + "scikit-learn/sklearn.linear_model._glm.link/IdentityLink/derivative", + "scikit-learn/sklearn.linear_model._glm.link/IdentityLink/inverse", + "scikit-learn/sklearn.linear_model._glm.link/IdentityLink/inverse_derivative" + ], + "is_public": false, + "reexported_by": [], + "description": "The identity link function g(x)=x.", + "docstring": "The identity link function g(x)=x.", + "code": "class IdentityLink(BaseLink):\n \"\"\"The identity link function g(x)=x.\"\"\"\n\n def __call__(self, y_pred):\n return y_pred\n\n def derivative(self, y_pred):\n return np.ones_like(y_pred)\n\n def inverse(self, lin_pred):\n return lin_pred\n\n def inverse_derivative(self, lin_pred):\n return np.ones_like(lin_pred)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogLink", + "name": "LogLink", + "qname": "sklearn.linear_model._glm.link.LogLink", + "decorators": [], + "superclasses": ["BaseLink"], + "methods": [ + "scikit-learn/sklearn.linear_model._glm.link/LogLink/__call__", + "scikit-learn/sklearn.linear_model._glm.link/LogLink/derivative", + "scikit-learn/sklearn.linear_model._glm.link/LogLink/inverse", + "scikit-learn/sklearn.linear_model._glm.link/LogLink/inverse_derivative" + ], + "is_public": false, + "reexported_by": [], + "description": "The log link function g(x)=log(x).", + "docstring": "The log link function g(x)=log(x).", + "code": "class LogLink(BaseLink):\n \"\"\"The log link function g(x)=log(x).\"\"\"\n\n def __call__(self, y_pred):\n return np.log(y_pred)\n\n def derivative(self, y_pred):\n return 1 / y_pred\n\n def inverse(self, lin_pred):\n return np.exp(lin_pred)\n\n def inverse_derivative(self, lin_pred):\n return np.exp(lin_pred)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogitLink", + "name": "LogitLink", + "qname": "sklearn.linear_model._glm.link.LogitLink", + "decorators": [], + "superclasses": ["BaseLink"], + "methods": [ + "scikit-learn/sklearn.linear_model._glm.link/LogitLink/__call__", + "scikit-learn/sklearn.linear_model._glm.link/LogitLink/derivative", + "scikit-learn/sklearn.linear_model._glm.link/LogitLink/inverse", + "scikit-learn/sklearn.linear_model._glm.link/LogitLink/inverse_derivative" + ], + "is_public": false, + "reexported_by": [], + "description": "The logit link function g(x)=logit(x).", + "docstring": "The logit link function g(x)=logit(x).", + "code": "class LogitLink(BaseLink):\n \"\"\"The logit link function g(x)=logit(x).\"\"\"\n\n def __call__(self, y_pred):\n return logit(y_pred)\n\n def derivative(self, y_pred):\n return 1 / (y_pred * (1 - y_pred))\n\n def inverse(self, lin_pred):\n return expit(lin_pred)\n\n def inverse_derivative(self, lin_pred):\n ep = expit(lin_pred)\n return ep * (1 - ep)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._huber/HuberRegressor", + "name": "HuberRegressor", + "qname": "sklearn.linear_model._huber.HuberRegressor", + "decorators": [], + "superclasses": ["LinearModel", "RegressorMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.linear_model._huber/HuberRegressor/__init__", + "scikit-learn/sklearn.linear_model._huber/HuberRegressor/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Linear regression model that is robust to outliers.\n\nThe Huber Regressor optimizes the squared loss for the samples where\n``|(y - X'w) / sigma| < epsilon`` and the absolute loss for the samples\nwhere ``|(y - X'w) / sigma| > epsilon``, where w and sigma are parameters\nto be optimized. The parameter sigma makes sure that if y is scaled up\nor down by a certain factor, one does not need to rescale epsilon to\nachieve the same robustness. Note that this does not take into account\nthe fact that the different features of X may be of different scales.\n\nThis makes sure that the loss function is not heavily influenced by the\noutliers while not completely ignoring their effect.\n\nRead more in the :ref:`User Guide `\n\n.. versionadded:: 0.18", + "docstring": "Linear regression model that is robust to outliers.\n\nThe Huber Regressor optimizes the squared loss for the samples where\n``|(y - X'w) / sigma| < epsilon`` and the absolute loss for the samples\nwhere ``|(y - X'w) / sigma| > epsilon``, where w and sigma are parameters\nto be optimized. The parameter sigma makes sure that if y is scaled up\nor down by a certain factor, one does not need to rescale epsilon to\nachieve the same robustness. Note that this does not take into account\nthe fact that the different features of X may be of different scales.\n\nThis makes sure that the loss function is not heavily influenced by the\noutliers while not completely ignoring their effect.\n\nRead more in the :ref:`User Guide `\n\n.. versionadded:: 0.18\n\nParameters\n----------\nepsilon : float, greater than 1.0, default=1.35\n The parameter epsilon controls the number of samples that should be\n classified as outliers. The smaller the epsilon, the more robust it is\n to outliers.\n\nmax_iter : int, default=100\n Maximum number of iterations that\n ``scipy.optimize.minimize(method=\"L-BFGS-B\")`` should run for.\n\nalpha : float, default=0.0001\n Regularization parameter.\n\nwarm_start : bool, default=False\n This is useful if the stored attributes of a previously used model\n has to be reused. If set to False, then the coefficients will\n be rewritten for every call to fit.\n See :term:`the Glossary `.\n\nfit_intercept : bool, default=True\n Whether or not to fit the intercept. This can be set to False\n if the data is already centered around the origin.\n\ntol : float, default=1e-05\n The iteration will stop when\n ``max{|proj g_i | i = 1, ..., n}`` <= ``tol``\n where pg_i is the i-th component of the projected gradient.\n\nAttributes\n----------\ncoef_ : array, shape (n_features,)\n Features got by optimizing the Huber loss.\n\nintercept_ : float\n Bias.\n\nscale_ : float\n The value by which ``|y - X'w - c|`` is scaled down.\n\nn_iter_ : int\n Number of iterations that\n ``scipy.optimize.minimize(method=\"L-BFGS-B\")`` has run for.\n\n .. versionchanged:: 0.20\n\n In SciPy <= 1.0.0 the number of lbfgs iterations may exceed\n ``max_iter``. ``n_iter_`` will now report at most ``max_iter``.\n\noutliers_ : array, shape (n_samples,)\n A boolean mask which is set to True where the samples are identified\n as outliers.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import HuberRegressor, LinearRegression\n>>> from sklearn.datasets import make_regression\n>>> rng = np.random.RandomState(0)\n>>> X, y, coef = make_regression(\n... n_samples=200, n_features=2, noise=4.0, coef=True, random_state=0)\n>>> X[:4] = rng.uniform(10, 20, (4, 2))\n>>> y[:4] = rng.uniform(10, 20, 4)\n>>> huber = HuberRegressor().fit(X, y)\n>>> huber.score(X, y)\n-7.284...\n>>> huber.predict(X[:1,])\narray([806.7200...])\n>>> linear = LinearRegression().fit(X, y)\n>>> print(\"True coefficients:\", coef)\nTrue coefficients: [20.4923... 34.1698...]\n>>> print(\"Huber coefficients:\", huber.coef_)\nHuber coefficients: [17.7906... 31.0106...]\n>>> print(\"Linear Regression coefficients:\", linear.coef_)\nLinear Regression coefficients: [-1.9221... 7.0226...]\n\nReferences\n----------\n.. [1] Peter J. Huber, Elvezio M. Ronchetti, Robust Statistics\n Concomitant scale estimates, pg 172\n.. [2] Art B. Owen (2006), A robust hybrid of lasso and ridge regression.\n https://statweb.stanford.edu/~owen/reports/hhu.pdf", + "code": "class HuberRegressor(LinearModel, RegressorMixin, BaseEstimator):\n \"\"\"Linear regression model that is robust to outliers.\n\n The Huber Regressor optimizes the squared loss for the samples where\n ``|(y - X'w) / sigma| < epsilon`` and the absolute loss for the samples\n where ``|(y - X'w) / sigma| > epsilon``, where w and sigma are parameters\n to be optimized. The parameter sigma makes sure that if y is scaled up\n or down by a certain factor, one does not need to rescale epsilon to\n achieve the same robustness. Note that this does not take into account\n the fact that the different features of X may be of different scales.\n\n This makes sure that the loss function is not heavily influenced by the\n outliers while not completely ignoring their effect.\n\n Read more in the :ref:`User Guide `\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n epsilon : float, greater than 1.0, default=1.35\n The parameter epsilon controls the number of samples that should be\n classified as outliers. The smaller the epsilon, the more robust it is\n to outliers.\n\n max_iter : int, default=100\n Maximum number of iterations that\n ``scipy.optimize.minimize(method=\"L-BFGS-B\")`` should run for.\n\n alpha : float, default=0.0001\n Regularization parameter.\n\n warm_start : bool, default=False\n This is useful if the stored attributes of a previously used model\n has to be reused. If set to False, then the coefficients will\n be rewritten for every call to fit.\n See :term:`the Glossary `.\n\n fit_intercept : bool, default=True\n Whether or not to fit the intercept. This can be set to False\n if the data is already centered around the origin.\n\n tol : float, default=1e-05\n The iteration will stop when\n ``max{|proj g_i | i = 1, ..., n}`` <= ``tol``\n where pg_i is the i-th component of the projected gradient.\n\n Attributes\n ----------\n coef_ : array, shape (n_features,)\n Features got by optimizing the Huber loss.\n\n intercept_ : float\n Bias.\n\n scale_ : float\n The value by which ``|y - X'w - c|`` is scaled down.\n\n n_iter_ : int\n Number of iterations that\n ``scipy.optimize.minimize(method=\"L-BFGS-B\")`` has run for.\n\n .. versionchanged:: 0.20\n\n In SciPy <= 1.0.0 the number of lbfgs iterations may exceed\n ``max_iter``. ``n_iter_`` will now report at most ``max_iter``.\n\n outliers_ : array, shape (n_samples,)\n A boolean mask which is set to True where the samples are identified\n as outliers.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.linear_model import HuberRegressor, LinearRegression\n >>> from sklearn.datasets import make_regression\n >>> rng = np.random.RandomState(0)\n >>> X, y, coef = make_regression(\n ... n_samples=200, n_features=2, noise=4.0, coef=True, random_state=0)\n >>> X[:4] = rng.uniform(10, 20, (4, 2))\n >>> y[:4] = rng.uniform(10, 20, 4)\n >>> huber = HuberRegressor().fit(X, y)\n >>> huber.score(X, y)\n -7.284...\n >>> huber.predict(X[:1,])\n array([806.7200...])\n >>> linear = LinearRegression().fit(X, y)\n >>> print(\"True coefficients:\", coef)\n True coefficients: [20.4923... 34.1698...]\n >>> print(\"Huber coefficients:\", huber.coef_)\n Huber coefficients: [17.7906... 31.0106...]\n >>> print(\"Linear Regression coefficients:\", linear.coef_)\n Linear Regression coefficients: [-1.9221... 7.0226...]\n\n References\n ----------\n .. [1] Peter J. Huber, Elvezio M. Ronchetti, Robust Statistics\n Concomitant scale estimates, pg 172\n .. [2] Art B. Owen (2006), A robust hybrid of lasso and ridge regression.\n https://statweb.stanford.edu/~owen/reports/hhu.pdf\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, epsilon=1.35, max_iter=100, alpha=0.0001,\n warm_start=False, fit_intercept=True, tol=1e-05):\n self.epsilon = epsilon\n self.max_iter = max_iter\n self.alpha = alpha\n self.warm_start = warm_start\n self.fit_intercept = fit_intercept\n self.tol = tol\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the model according to the given training data.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n\n y : array-like, shape (n_samples,)\n Target vector relative to X.\n\n sample_weight : array-like, shape (n_samples,)\n Weight given to each sample.\n\n Returns\n -------\n self : object\n \"\"\"\n X, y = self._validate_data(\n X, y, copy=False, accept_sparse=['csr'], y_numeric=True,\n dtype=[np.float64, np.float32])\n\n sample_weight = _check_sample_weight(sample_weight, X)\n\n if self.epsilon < 1.0:\n raise ValueError(\n \"epsilon should be greater than or equal to 1.0, got %f\"\n % self.epsilon)\n\n if self.warm_start and hasattr(self, 'coef_'):\n parameters = np.concatenate(\n (self.coef_, [self.intercept_, self.scale_]))\n else:\n if self.fit_intercept:\n parameters = np.zeros(X.shape[1] + 2)\n else:\n parameters = np.zeros(X.shape[1] + 1)\n # Make sure to initialize the scale parameter to a strictly\n # positive value:\n parameters[-1] = 1\n\n # Sigma or the scale factor should be non-negative.\n # Setting it to be zero might cause undefined bounds hence we set it\n # to a value close to zero.\n bounds = np.tile([-np.inf, np.inf], (parameters.shape[0], 1))\n bounds[-1][0] = np.finfo(np.float64).eps * 10\n\n opt_res = optimize.minimize(\n _huber_loss_and_gradient, parameters, method=\"L-BFGS-B\", jac=True,\n args=(X, y, self.epsilon, self.alpha, sample_weight),\n options={\"maxiter\": self.max_iter, \"gtol\": self.tol, \"iprint\": -1},\n bounds=bounds)\n\n parameters = opt_res.x\n\n if opt_res.status == 2:\n raise ValueError(\"HuberRegressor convergence failed:\"\n \" l-BFGS-b solver terminated with %s\"\n % opt_res.message)\n self.n_iter_ = _check_optimize_result(\"lbfgs\", opt_res, self.max_iter)\n self.scale_ = parameters[-1]\n if self.fit_intercept:\n self.intercept_ = parameters[-2]\n else:\n self.intercept_ = 0.0\n self.coef_ = parameters[:X.shape[1]]\n\n residual = np.abs(\n y - safe_sparse_dot(X, self.coef_) - self.intercept_)\n self.outliers_ = residual > self.scale_ * self.epsilon\n return self", + "instance_attributes": [ + { + "name": "epsilon", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "warm_start", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars", + "name": "Lars", + "qname": "sklearn.linear_model._least_angle.Lars", + "decorators": [], + "superclasses": ["MultiOutputMixin", "RegressorMixin", "LinearModel"], + "methods": [ + "scikit-learn/sklearn.linear_model._least_angle/Lars/__init__", + "scikit-learn/sklearn.linear_model._least_angle/Lars/_get_gram", + "scikit-learn/sklearn.linear_model._least_angle/Lars/_fit", + "scikit-learn/sklearn.linear_model._least_angle/Lars/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Least Angle Regression model a.k.a. LAR\n\nRead more in the :ref:`User Guide `.", + "docstring": "Least Angle Regression model a.k.a. LAR\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n Sets the verbosity amount.\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : bool, 'auto' or array-like , default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\nn_nonzero_coefs : int, default=500\n Target number of non-zero coefficients. Use ``np.inf`` for no limit.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\nfit_path : bool, default=True\n If True the full path is stored in the ``coef_path_`` attribute.\n If you compute the solution for a large problem or many targets,\n setting ``fit_path`` to ``False`` will lead to a speedup, especially\n with a small alpha.\n\njitter : float, default=None\n Upper bound on a uniform noise parameter to be added to the\n `y` values, to satisfy the model's assumption of\n one-at-a-time computations. Might help with stability.\n\n .. versionadded:: 0.23\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for jittering. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `. Ignored if `jitter` is None.\n\n .. versionadded:: 0.23\n\nAttributes\n----------\nalphas_ : array-like of shape (n_alphas + 1,) or list of such arrays\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n number of nodes in the path with ``alpha >= alpha_min``, whichever\n is smaller. If this is a list of array-like, the length of the outer\n list is `n_targets`.\n\nactive_ : list of shape (n_alphas,) or list of such lists\n Indices of active variables at the end of the path.\n If this is a list of list, the length of the outer list is `n_targets`.\n\ncoef_path_ : array-like of shape (n_features, n_alphas + 1) or list of such arrays\n The varying values of the coefficients along the path. It is not\n present if the ``fit_path`` parameter is ``False``. If this is a list\n of array-like, the length of the outer list is `n_targets`.\n\ncoef_ : array-like of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the formulation formula).\n\nintercept_ : float or array-like of shape (n_targets,)\n Independent term in decision function.\n\nn_iter_ : array-like or int\n The number of iterations taken by lars_path to find the\n grid of alphas for each target.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> reg = linear_model.Lars(n_nonzero_coefs=1)\n>>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])\nLars(n_nonzero_coefs=1)\n>>> print(reg.coef_)\n[ 0. -1.11...]\n\nSee Also\n--------\nlars_path, LarsCV\nsklearn.decomposition.sparse_encode", + "code": "class Lars(MultiOutputMixin, RegressorMixin, LinearModel):\n \"\"\"Least Angle Regression model a.k.a. LAR\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n fit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\n verbose : bool or int, default=False\n Sets the verbosity amount.\n\n normalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n precompute : bool, 'auto' or array-like , default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\n n_nonzero_coefs : int, default=500\n Target number of non-zero coefficients. Use ``np.inf`` for no limit.\n\n eps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\n copy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\n fit_path : bool, default=True\n If True the full path is stored in the ``coef_path_`` attribute.\n If you compute the solution for a large problem or many targets,\n setting ``fit_path`` to ``False`` will lead to a speedup, especially\n with a small alpha.\n\n jitter : float, default=None\n Upper bound on a uniform noise parameter to be added to the\n `y` values, to satisfy the model's assumption of\n one-at-a-time computations. Might help with stability.\n\n .. versionadded:: 0.23\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for jittering. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `. Ignored if `jitter` is None.\n\n .. versionadded:: 0.23\n\n Attributes\n ----------\n alphas_ : array-like of shape (n_alphas + 1,) or list of such arrays\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n number of nodes in the path with ``alpha >= alpha_min``, whichever\n is smaller. If this is a list of array-like, the length of the outer\n list is `n_targets`.\n\n active_ : list of shape (n_alphas,) or list of such lists\n Indices of active variables at the end of the path.\n If this is a list of list, the length of the outer list is `n_targets`.\n\n coef_path_ : array-like of shape (n_features, n_alphas + 1) or list \\\n of such arrays\n The varying values of the coefficients along the path. It is not\n present if the ``fit_path`` parameter is ``False``. If this is a list\n of array-like, the length of the outer list is `n_targets`.\n\n coef_ : array-like of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the formulation formula).\n\n intercept_ : float or array-like of shape (n_targets,)\n Independent term in decision function.\n\n n_iter_ : array-like or int\n The number of iterations taken by lars_path to find the\n grid of alphas for each target.\n\n Examples\n --------\n >>> from sklearn import linear_model\n >>> reg = linear_model.Lars(n_nonzero_coefs=1)\n >>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])\n Lars(n_nonzero_coefs=1)\n >>> print(reg.coef_)\n [ 0. -1.11...]\n\n See Also\n --------\n lars_path, LarsCV\n sklearn.decomposition.sparse_encode\n\n \"\"\"\n\n method = \"lar\"\n positive = False\n\n @_deprecate_positional_args\n def __init__(self, *, fit_intercept=True, verbose=False, normalize=True,\n precompute='auto', n_nonzero_coefs=500,\n eps=np.finfo(float).eps, copy_X=True, fit_path=True,\n jitter=None, random_state=None):\n self.fit_intercept = fit_intercept\n self.verbose = verbose\n self.normalize = normalize\n self.precompute = precompute\n self.n_nonzero_coefs = n_nonzero_coefs\n self.eps = eps\n self.copy_X = copy_X\n self.fit_path = fit_path\n self.jitter = jitter\n self.random_state = random_state\n\n @staticmethod\n def _get_gram(precompute, X, y):\n if (not hasattr(precompute, '__array__')) and (\n (precompute is True) or\n (precompute == 'auto' and X.shape[0] > X.shape[1]) or\n (precompute == 'auto' and y.shape[1] > 1)):\n precompute = np.dot(X.T, X)\n\n return precompute\n\n def _fit(self, X, y, max_iter, alpha, fit_path, Xy=None):\n \"\"\"Auxiliary method to fit the model using X, y as training data\"\"\"\n n_features = X.shape[1]\n\n X, y, X_offset, y_offset, X_scale = self._preprocess_data(\n X, y, self.fit_intercept, self.normalize, self.copy_X)\n\n if y.ndim == 1:\n y = y[:, np.newaxis]\n\n n_targets = y.shape[1]\n\n Gram = self._get_gram(self.precompute, X, y)\n\n self.alphas_ = []\n self.n_iter_ = []\n self.coef_ = np.empty((n_targets, n_features))\n\n if fit_path:\n self.active_ = []\n self.coef_path_ = []\n for k in range(n_targets):\n this_Xy = None if Xy is None else Xy[:, k]\n alphas, active, coef_path, n_iter_ = lars_path(\n X, y[:, k], Gram=Gram, Xy=this_Xy, copy_X=self.copy_X,\n copy_Gram=True, alpha_min=alpha, method=self.method,\n verbose=max(0, self.verbose - 1), max_iter=max_iter,\n eps=self.eps, return_path=True,\n return_n_iter=True, positive=self.positive)\n self.alphas_.append(alphas)\n self.active_.append(active)\n self.n_iter_.append(n_iter_)\n self.coef_path_.append(coef_path)\n self.coef_[k] = coef_path[:, -1]\n\n if n_targets == 1:\n self.alphas_, self.active_, self.coef_path_, self.coef_ = [\n a[0] for a in (self.alphas_, self.active_, self.coef_path_,\n self.coef_)]\n self.n_iter_ = self.n_iter_[0]\n else:\n for k in range(n_targets):\n this_Xy = None if Xy is None else Xy[:, k]\n alphas, _, self.coef_[k], n_iter_ = lars_path(\n X, y[:, k], Gram=Gram, Xy=this_Xy, copy_X=self.copy_X,\n copy_Gram=True, alpha_min=alpha, method=self.method,\n verbose=max(0, self.verbose - 1), max_iter=max_iter,\n eps=self.eps, return_path=False, return_n_iter=True,\n positive=self.positive)\n self.alphas_.append(alphas)\n self.n_iter_.append(n_iter_)\n if n_targets == 1:\n self.alphas_ = self.alphas_[0]\n self.n_iter_ = self.n_iter_[0]\n\n self._set_intercept(X_offset, y_offset, X_scale)\n return self\n\n def fit(self, X, y, Xy=None):\n \"\"\"Fit the model using X, y as training data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\n Xy : array-like of shape (n_samples,) or (n_samples, n_targets), \\\n default=None\n Xy = np.dot(X.T, y) that can be precomputed. It is useful\n only when the Gram matrix is precomputed.\n\n Returns\n -------\n self : object\n returns an instance of self.\n \"\"\"\n X, y = self._validate_data(X, y, y_numeric=True, multi_output=True)\n\n alpha = getattr(self, 'alpha', 0.)\n if hasattr(self, 'n_nonzero_coefs'):\n alpha = 0. # n_nonzero_coefs parametrization takes priority\n max_iter = self.n_nonzero_coefs\n else:\n max_iter = self.max_iter\n\n if self.jitter is not None:\n rng = check_random_state(self.random_state)\n\n noise = rng.uniform(high=self.jitter, size=len(y))\n y = y + noise\n\n self._fit(X, y, max_iter=max_iter, alpha=alpha, fit_path=self.fit_path,\n Xy=Xy)\n\n return self", + "instance_attributes": [ + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "normalize", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "precompute", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "n_nonzero_coefs", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "copy_X", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "fit_path", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "alphas_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "n_iter_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "coef_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "active_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "coef_path_", + "types": { + "kind": "NamedType", + "name": "list" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LarsCV", + "name": "LarsCV", + "qname": "sklearn.linear_model._least_angle.LarsCV", + "decorators": [], + "superclasses": ["Lars"], + "methods": [ + "scikit-learn/sklearn.linear_model._least_angle/LarsCV/__init__", + "scikit-learn/sklearn.linear_model._least_angle/LarsCV/_more_tags", + "scikit-learn/sklearn.linear_model._least_angle/LarsCV/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Cross-validated Least Angle Regression model.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Cross-validated Least Angle Regression model.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nfit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n Sets the verbosity amount.\n\nmax_iter : int, default=500\n Maximum number of iterations to perform.\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : bool, 'auto' or array-like , default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram matrix\n cannot be passed as argument since we will use only subsets of X.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nmax_n_alphas : int, default=1000\n The maximum number of points on the path used to compute the\n residuals in the cross-validation\n\nn_jobs : int or None, default=None\n Number of CPUs to use during the cross validation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\nAttributes\n----------\nactive_ : list of length n_alphas or list of such lists\n Indices of active variables at the end of the path.\n If this is a list of lists, the outer list length is `n_targets`.\n\ncoef_ : array-like of shape (n_features,)\n parameter vector (w in the formulation formula)\n\nintercept_ : float\n independent term in decision function\n\ncoef_path_ : array-like of shape (n_features, n_alphas)\n the varying values of the coefficients along the path\n\nalpha_ : float\n the estimated regularization parameter alpha\n\nalphas_ : array-like of shape (n_alphas,)\n the different values of alpha along the path\n\ncv_alphas_ : array-like of shape (n_cv_alphas,)\n all the values of alpha along the path for the different folds\n\nmse_path_ : array-like of shape (n_folds, n_cv_alphas)\n the mean square error on left-out for each fold along the path\n (alpha values given by ``cv_alphas``)\n\nn_iter_ : array-like or int\n the number of iterations run by Lars with the optimal alpha.\n\nExamples\n--------\n>>> from sklearn.linear_model import LarsCV\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_samples=200, noise=4.0, random_state=0)\n>>> reg = LarsCV(cv=5).fit(X, y)\n>>> reg.score(X, y)\n0.9996...\n>>> reg.alpha_\n0.0254...\n>>> reg.predict(X[:1,])\narray([154.0842...])\n\nSee Also\n--------\nlars_path, LassoLars, LassoLarsCV", + "code": "class LarsCV(Lars):\n \"\"\"Cross-validated Least Angle Regression model.\n\n See glossary entry for :term:`cross-validation estimator`.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n fit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\n verbose : bool or int, default=False\n Sets the verbosity amount.\n\n max_iter : int, default=500\n Maximum number of iterations to perform.\n\n normalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n precompute : bool, 'auto' or array-like , default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram matrix\n cannot be passed as argument since we will use only subsets of X.\n\n cv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\n max_n_alphas : int, default=1000\n The maximum number of points on the path used to compute the\n residuals in the cross-validation\n\n n_jobs : int or None, default=None\n Number of CPUs to use during the cross validation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n eps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\n copy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\n Attributes\n ----------\n active_ : list of length n_alphas or list of such lists\n Indices of active variables at the end of the path.\n If this is a list of lists, the outer list length is `n_targets`.\n\n coef_ : array-like of shape (n_features,)\n parameter vector (w in the formulation formula)\n\n intercept_ : float\n independent term in decision function\n\n coef_path_ : array-like of shape (n_features, n_alphas)\n the varying values of the coefficients along the path\n\n alpha_ : float\n the estimated regularization parameter alpha\n\n alphas_ : array-like of shape (n_alphas,)\n the different values of alpha along the path\n\n cv_alphas_ : array-like of shape (n_cv_alphas,)\n all the values of alpha along the path for the different folds\n\n mse_path_ : array-like of shape (n_folds, n_cv_alphas)\n the mean square error on left-out for each fold along the path\n (alpha values given by ``cv_alphas``)\n\n n_iter_ : array-like or int\n the number of iterations run by Lars with the optimal alpha.\n\n Examples\n --------\n >>> from sklearn.linear_model import LarsCV\n >>> from sklearn.datasets import make_regression\n >>> X, y = make_regression(n_samples=200, noise=4.0, random_state=0)\n >>> reg = LarsCV(cv=5).fit(X, y)\n >>> reg.score(X, y)\n 0.9996...\n >>> reg.alpha_\n 0.0254...\n >>> reg.predict(X[:1,])\n array([154.0842...])\n\n See Also\n --------\n lars_path, LassoLars, LassoLarsCV\n \"\"\"\n\n method = \"lar\"\n\n @_deprecate_positional_args\n def __init__(self, *, fit_intercept=True, verbose=False, max_iter=500,\n normalize=True, precompute='auto', cv=None,\n max_n_alphas=1000, n_jobs=None, eps=np.finfo(float).eps,\n copy_X=True):\n self.max_iter = max_iter\n self.cv = cv\n self.max_n_alphas = max_n_alphas\n self.n_jobs = n_jobs\n super().__init__(fit_intercept=fit_intercept,\n verbose=verbose, normalize=normalize,\n precompute=precompute,\n n_nonzero_coefs=500,\n eps=eps, copy_X=copy_X, fit_path=True)\n\n def _more_tags(self):\n return {'multioutput': False}\n\n def fit(self, X, y):\n \"\"\"Fit the model using X, y as training data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n Returns\n -------\n self : object\n returns an instance of self.\n \"\"\"\n X, y = self._validate_data(X, y, y_numeric=True)\n X = as_float_array(X, copy=self.copy_X)\n y = as_float_array(y, copy=self.copy_X)\n\n # init cross-validation generator\n cv = check_cv(self.cv, classifier=False)\n\n # As we use cross-validation, the Gram matrix is not precomputed here\n Gram = self.precompute\n if hasattr(Gram, '__array__'):\n warnings.warn('Parameter \"precompute\" cannot be an array in '\n '%s. Automatically switch to \"auto\" instead.'\n % self.__class__.__name__)\n Gram = 'auto'\n\n cv_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(\n delayed(_lars_path_residues)(\n X[train], y[train], X[test], y[test], Gram=Gram, copy=False,\n method=self.method, verbose=max(0, self.verbose - 1),\n normalize=self.normalize, fit_intercept=self.fit_intercept,\n max_iter=self.max_iter, eps=self.eps, positive=self.positive)\n for train, test in cv.split(X, y))\n all_alphas = np.concatenate(list(zip(*cv_paths))[0])\n # Unique also sorts\n all_alphas = np.unique(all_alphas)\n # Take at most max_n_alphas values\n stride = int(max(1, int(len(all_alphas) / float(self.max_n_alphas))))\n all_alphas = all_alphas[::stride]\n\n mse_path = np.empty((len(all_alphas), len(cv_paths)))\n for index, (alphas, _, _, residues) in enumerate(cv_paths):\n alphas = alphas[::-1]\n residues = residues[::-1]\n if alphas[0] != 0:\n alphas = np.r_[0, alphas]\n residues = np.r_[residues[0, np.newaxis], residues]\n if alphas[-1] != all_alphas[-1]:\n alphas = np.r_[alphas, all_alphas[-1]]\n residues = np.r_[residues, residues[-1, np.newaxis]]\n this_residues = interpolate.interp1d(alphas,\n residues,\n axis=0)(all_alphas)\n this_residues **= 2\n mse_path[:, index] = np.mean(this_residues, axis=-1)\n\n mask = np.all(np.isfinite(mse_path), axis=-1)\n all_alphas = all_alphas[mask]\n mse_path = mse_path[mask]\n # Select the alpha that minimizes left-out error\n i_best_alpha = np.argmin(mse_path.mean(axis=-1))\n best_alpha = all_alphas[i_best_alpha]\n\n # Store our parameters\n self.alpha_ = best_alpha\n self.cv_alphas_ = all_alphas\n self.mse_path_ = mse_path\n\n # Now compute the full model\n # it will call a lasso internally when self if LassoLarsCV\n # as self.method == 'lasso'\n self._fit(X, y, max_iter=self.max_iter, alpha=best_alpha,\n Xy=None, fit_path=True)\n return self", + "instance_attributes": [ + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "max_n_alphas", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLars", + "name": "LassoLars", + "qname": "sklearn.linear_model._least_angle.LassoLars", + "decorators": [], + "superclasses": ["Lars"], + "methods": ["scikit-learn/sklearn.linear_model._least_angle/LassoLars/__init__"], + "is_public": false, + "reexported_by": [], + "description": "Lasso model fit with Least Angle Regression a.k.a. Lars\n\nIt is a Linear Model trained with an L1 prior as regularizer.\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide `.", + "docstring": "Lasso model fit with Least Angle Regression a.k.a. Lars\n\nIt is a Linear Model trained with an L1 prior as regularizer.\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Constant that multiplies the penalty term. Defaults to 1.0.\n ``alpha = 0`` is equivalent to an ordinary least square, solved\n by :class:`LinearRegression`. For numerical reasons, using\n ``alpha = 0`` with the LassoLars object is not advised and you\n should prefer the LinearRegression object.\n\nfit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n Sets the verbosity amount.\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : bool, 'auto' or array-like, default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\nmax_iter : int, default=500\n Maximum number of iterations to perform.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\nfit_path : bool, default=True\n If ``True`` the full path is stored in the ``coef_path_`` attribute.\n If you compute the solution for a large problem or many targets,\n setting ``fit_path`` to ``False`` will lead to a speedup, especially\n with a small alpha.\n\npositive : bool, default=False\n Restrict coefficients to be >= 0. Be aware that you might want to\n remove fit_intercept which is set True by default.\n Under the positive restriction the model coefficients will not converge\n to the ordinary-least-squares solution for small values of alpha.\n Only coefficients up to the smallest alpha value (``alphas_[alphas_ >\n 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\n algorithm are typically in congruence with the solution of the\n coordinate descent Lasso estimator.\n\njitter : float, default=None\n Upper bound on a uniform noise parameter to be added to the\n `y` values, to satisfy the model's assumption of\n one-at-a-time computations. Might help with stability.\n\n .. versionadded:: 0.23\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for jittering. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `. Ignored if `jitter` is None.\n\n .. versionadded:: 0.23\n\nAttributes\n----------\nalphas_ : array-like of shape (n_alphas + 1,) or list of such arrays\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n number of nodes in the path with ``alpha >= alpha_min``, whichever\n is smaller. If this is a list of array-like, the length of the outer\n list is `n_targets`.\n\nactive_ : list of length n_alphas or list of such lists\n Indices of active variables at the end of the path.\n If this is a list of list, the length of the outer list is `n_targets`.\n\ncoef_path_ : array-like of shape (n_features, n_alphas + 1) or list of such arrays\n If a list is passed it's expected to be one of n_targets such arrays.\n The varying values of the coefficients along the path. It is not\n present if the ``fit_path`` parameter is ``False``. If this is a list\n of array-like, the length of the outer list is `n_targets`.\n\ncoef_ : array-like of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the formulation formula).\n\nintercept_ : float or array-like of shape (n_targets,)\n Independent term in decision function.\n\nn_iter_ : array-like or int\n The number of iterations taken by lars_path to find the\n grid of alphas for each target.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> reg = linear_model.LassoLars(alpha=0.01)\n>>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1])\nLassoLars(alpha=0.01)\n>>> print(reg.coef_)\n[ 0. -0.963257...]\n\nSee Also\n--------\nlars_path\nlasso_path\nLasso\nLassoCV\nLassoLarsCV\nLassoLarsIC\nsklearn.decomposition.sparse_encode", + "code": "class LassoLars(Lars):\n \"\"\"Lasso model fit with Least Angle Regression a.k.a. Lars\n\n It is a Linear Model trained with an L1 prior as regularizer.\n\n The optimization objective for Lasso is::\n\n (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n alpha : float, default=1.0\n Constant that multiplies the penalty term. Defaults to 1.0.\n ``alpha = 0`` is equivalent to an ordinary least square, solved\n by :class:`LinearRegression`. For numerical reasons, using\n ``alpha = 0`` with the LassoLars object is not advised and you\n should prefer the LinearRegression object.\n\n fit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\n verbose : bool or int, default=False\n Sets the verbosity amount.\n\n normalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n precompute : bool, 'auto' or array-like, default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\n max_iter : int, default=500\n Maximum number of iterations to perform.\n\n eps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\n copy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\n fit_path : bool, default=True\n If ``True`` the full path is stored in the ``coef_path_`` attribute.\n If you compute the solution for a large problem or many targets,\n setting ``fit_path`` to ``False`` will lead to a speedup, especially\n with a small alpha.\n\n positive : bool, default=False\n Restrict coefficients to be >= 0. Be aware that you might want to\n remove fit_intercept which is set True by default.\n Under the positive restriction the model coefficients will not converge\n to the ordinary-least-squares solution for small values of alpha.\n Only coefficients up to the smallest alpha value (``alphas_[alphas_ >\n 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\n algorithm are typically in congruence with the solution of the\n coordinate descent Lasso estimator.\n\n jitter : float, default=None\n Upper bound on a uniform noise parameter to be added to the\n `y` values, to satisfy the model's assumption of\n one-at-a-time computations. Might help with stability.\n\n .. versionadded:: 0.23\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for jittering. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `. Ignored if `jitter` is None.\n\n .. versionadded:: 0.23\n\n Attributes\n ----------\n alphas_ : array-like of shape (n_alphas + 1,) or list of such arrays\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n number of nodes in the path with ``alpha >= alpha_min``, whichever\n is smaller. If this is a list of array-like, the length of the outer\n list is `n_targets`.\n\n active_ : list of length n_alphas or list of such lists\n Indices of active variables at the end of the path.\n If this is a list of list, the length of the outer list is `n_targets`.\n\n coef_path_ : array-like of shape (n_features, n_alphas + 1) or list \\\n of such arrays\n If a list is passed it's expected to be one of n_targets such arrays.\n The varying values of the coefficients along the path. It is not\n present if the ``fit_path`` parameter is ``False``. If this is a list\n of array-like, the length of the outer list is `n_targets`.\n\n coef_ : array-like of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the formulation formula).\n\n intercept_ : float or array-like of shape (n_targets,)\n Independent term in decision function.\n\n n_iter_ : array-like or int\n The number of iterations taken by lars_path to find the\n grid of alphas for each target.\n\n Examples\n --------\n >>> from sklearn import linear_model\n >>> reg = linear_model.LassoLars(alpha=0.01)\n >>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1])\n LassoLars(alpha=0.01)\n >>> print(reg.coef_)\n [ 0. -0.963257...]\n\n See Also\n --------\n lars_path\n lasso_path\n Lasso\n LassoCV\n LassoLarsCV\n LassoLarsIC\n sklearn.decomposition.sparse_encode\n\n \"\"\"\n method = 'lasso'\n\n @_deprecate_positional_args\n def __init__(self, alpha=1.0, *, fit_intercept=True, verbose=False,\n normalize=True, precompute='auto', max_iter=500,\n eps=np.finfo(float).eps, copy_X=True, fit_path=True,\n positive=False, jitter=None, random_state=None):\n self.alpha = alpha\n self.fit_intercept = fit_intercept\n self.max_iter = max_iter\n self.verbose = verbose\n self.normalize = normalize\n self.positive = positive\n self.precompute = precompute\n self.copy_X = copy_X\n self.eps = eps\n self.fit_path = fit_path\n self.jitter = jitter\n self.random_state = random_state", + "instance_attributes": [ + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "normalize", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "positive", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "precompute", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "copy_X", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "fit_path", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsCV", + "name": "LassoLarsCV", + "qname": "sklearn.linear_model._least_angle.LassoLarsCV", + "decorators": [], + "superclasses": ["LarsCV"], + "methods": ["scikit-learn/sklearn.linear_model._least_angle/LassoLarsCV/__init__"], + "is_public": false, + "reexported_by": [], + "description": "Cross-validated Lasso, using the LARS algorithm.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide `.", + "docstring": "Cross-validated Lasso, using the LARS algorithm.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nfit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n Sets the verbosity amount.\n\nmax_iter : int, default=500\n Maximum number of iterations to perform.\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : bool or 'auto' , default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram matrix\n cannot be passed as argument since we will use only subsets of X.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nmax_n_alphas : int, default=1000\n The maximum number of points on the path used to compute the\n residuals in the cross-validation\n\nn_jobs : int or None, default=None\n Number of CPUs to use during the cross validation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\npositive : bool, default=False\n Restrict coefficients to be >= 0. Be aware that you might want to\n remove fit_intercept which is set True by default.\n Under the positive restriction the model coefficients do not converge\n to the ordinary-least-squares solution for small values of alpha.\n Only coefficients up to the smallest alpha value (``alphas_[alphas_ >\n 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\n algorithm are typically in congruence with the solution of the\n coordinate descent Lasso estimator.\n As a consequence using LassoLarsCV only makes sense for problems where\n a sparse solution is expected and/or reached.\n\nAttributes\n----------\ncoef_ : array-like of shape (n_features,)\n parameter vector (w in the formulation formula)\n\nintercept_ : float\n independent term in decision function.\n\ncoef_path_ : array-like of shape (n_features, n_alphas)\n the varying values of the coefficients along the path\n\nalpha_ : float\n the estimated regularization parameter alpha\n\nalphas_ : array-like of shape (n_alphas,)\n the different values of alpha along the path\n\ncv_alphas_ : array-like of shape (n_cv_alphas,)\n all the values of alpha along the path for the different folds\n\nmse_path_ : array-like of shape (n_folds, n_cv_alphas)\n the mean square error on left-out for each fold along the path\n (alpha values given by ``cv_alphas``)\n\nn_iter_ : array-like or int\n the number of iterations run by Lars with the optimal alpha.\n\nactive_ : list of int\n Indices of active variables at the end of the path.\n\nExamples\n--------\n>>> from sklearn.linear_model import LassoLarsCV\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(noise=4.0, random_state=0)\n>>> reg = LassoLarsCV(cv=5).fit(X, y)\n>>> reg.score(X, y)\n0.9992...\n>>> reg.alpha_\n0.0484...\n>>> reg.predict(X[:1,])\narray([-77.8723...])\n\nNotes\n-----\n\nThe object solves the same problem as the LassoCV object. However,\nunlike the LassoCV, it find the relevant alphas values by itself.\nIn general, because of this property, it will be more stable.\nHowever, it is more fragile to heavily multicollinear datasets.\n\nIt is more efficient than the LassoCV if only a small number of\nfeatures are selected compared to the total number, for instance if\nthere are very few samples compared to the number of features.\n\nSee Also\n--------\nlars_path, LassoLars, LarsCV, LassoCV", + "code": "class LassoLarsCV(LarsCV):\n \"\"\"Cross-validated Lasso, using the LARS algorithm.\n\n See glossary entry for :term:`cross-validation estimator`.\n\n The optimization objective for Lasso is::\n\n (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n fit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\n verbose : bool or int, default=False\n Sets the verbosity amount.\n\n max_iter : int, default=500\n Maximum number of iterations to perform.\n\n normalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n precompute : bool or 'auto' , default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram matrix\n cannot be passed as argument since we will use only subsets of X.\n\n cv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\n max_n_alphas : int, default=1000\n The maximum number of points on the path used to compute the\n residuals in the cross-validation\n\n n_jobs : int or None, default=None\n Number of CPUs to use during the cross validation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n eps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\n copy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\n positive : bool, default=False\n Restrict coefficients to be >= 0. Be aware that you might want to\n remove fit_intercept which is set True by default.\n Under the positive restriction the model coefficients do not converge\n to the ordinary-least-squares solution for small values of alpha.\n Only coefficients up to the smallest alpha value (``alphas_[alphas_ >\n 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\n algorithm are typically in congruence with the solution of the\n coordinate descent Lasso estimator.\n As a consequence using LassoLarsCV only makes sense for problems where\n a sparse solution is expected and/or reached.\n\n Attributes\n ----------\n coef_ : array-like of shape (n_features,)\n parameter vector (w in the formulation formula)\n\n intercept_ : float\n independent term in decision function.\n\n coef_path_ : array-like of shape (n_features, n_alphas)\n the varying values of the coefficients along the path\n\n alpha_ : float\n the estimated regularization parameter alpha\n\n alphas_ : array-like of shape (n_alphas,)\n the different values of alpha along the path\n\n cv_alphas_ : array-like of shape (n_cv_alphas,)\n all the values of alpha along the path for the different folds\n\n mse_path_ : array-like of shape (n_folds, n_cv_alphas)\n the mean square error on left-out for each fold along the path\n (alpha values given by ``cv_alphas``)\n\n n_iter_ : array-like or int\n the number of iterations run by Lars with the optimal alpha.\n\n active_ : list of int\n Indices of active variables at the end of the path.\n\n Examples\n --------\n >>> from sklearn.linear_model import LassoLarsCV\n >>> from sklearn.datasets import make_regression\n >>> X, y = make_regression(noise=4.0, random_state=0)\n >>> reg = LassoLarsCV(cv=5).fit(X, y)\n >>> reg.score(X, y)\n 0.9992...\n >>> reg.alpha_\n 0.0484...\n >>> reg.predict(X[:1,])\n array([-77.8723...])\n\n Notes\n -----\n\n The object solves the same problem as the LassoCV object. However,\n unlike the LassoCV, it find the relevant alphas values by itself.\n In general, because of this property, it will be more stable.\n However, it is more fragile to heavily multicollinear datasets.\n\n It is more efficient than the LassoCV if only a small number of\n features are selected compared to the total number, for instance if\n there are very few samples compared to the number of features.\n\n See Also\n --------\n lars_path, LassoLars, LarsCV, LassoCV\n \"\"\"\n\n method = 'lasso'\n\n @_deprecate_positional_args\n def __init__(self, *, fit_intercept=True, verbose=False, max_iter=500,\n normalize=True, precompute='auto', cv=None,\n max_n_alphas=1000, n_jobs=None, eps=np.finfo(float).eps,\n copy_X=True, positive=False):\n self.fit_intercept = fit_intercept\n self.verbose = verbose\n self.max_iter = max_iter\n self.normalize = normalize\n self.precompute = precompute\n self.cv = cv\n self.max_n_alphas = max_n_alphas\n self.n_jobs = n_jobs\n self.eps = eps\n self.copy_X = copy_X\n self.positive = positive", + "instance_attributes": [ + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "normalize", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "precompute", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "max_n_alphas", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "copy_X", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "positive", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsIC", + "name": "LassoLarsIC", + "qname": "sklearn.linear_model._least_angle.LassoLarsIC", + "decorators": [], + "superclasses": ["LassoLars"], + "methods": [ + "scikit-learn/sklearn.linear_model._least_angle/LassoLarsIC/__init__", + "scikit-learn/sklearn.linear_model._least_angle/LassoLarsIC/_more_tags", + "scikit-learn/sklearn.linear_model._least_angle/LassoLarsIC/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Lasso model fit with Lars using BIC or AIC for model selection\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nAIC is the Akaike information criterion and BIC is the Bayes\nInformation criterion. Such criteria are useful to select the value\nof the regularization parameter by making a trade-off between the\ngoodness of fit and the complexity of the model. A good model should\nexplain well the data while being simple.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Lasso model fit with Lars using BIC or AIC for model selection\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nAIC is the Akaike information criterion and BIC is the Bayes\nInformation criterion. Such criteria are useful to select the value\nof the regularization parameter by making a trade-off between the\ngoodness of fit and the complexity of the model. A good model should\nexplain well the data while being simple.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncriterion : {'bic' , 'aic'}, default='aic'\n The type of criterion to use.\n\nfit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nverbose : bool or int, default=False\n Sets the verbosity amount.\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : bool, 'auto' or array-like, default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\nmax_iter : int, default=500\n Maximum number of iterations to perform. Can be used for\n early stopping.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\npositive : bool, default=False\n Restrict coefficients to be >= 0. Be aware that you might want to\n remove fit_intercept which is set True by default.\n Under the positive restriction the model coefficients do not converge\n to the ordinary-least-squares solution for small values of alpha.\n Only coefficients up to the smallest alpha value (``alphas_[alphas_ >\n 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\n algorithm are typically in congruence with the solution of the\n coordinate descent Lasso estimator.\n As a consequence using LassoLarsIC only makes sense for problems where\n a sparse solution is expected and/or reached.\n\nAttributes\n----------\ncoef_ : array-like of shape (n_features,)\n parameter vector (w in the formulation formula)\n\nintercept_ : float\n independent term in decision function.\n\nalpha_ : float\n the alpha parameter chosen by the information criterion\n\nalphas_ : array-like of shape (n_alphas + 1,) or list of such arrays\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n number of nodes in the path with ``alpha >= alpha_min``, whichever\n is smaller. If a list, it will be of length `n_targets`.\n\nn_iter_ : int\n number of iterations run by lars_path to find the grid of\n alphas.\n\ncriterion_ : array-like of shape (n_alphas,)\n The value of the information criteria ('aic', 'bic') across all\n alphas. The alpha which has the smallest information criterion is\n chosen. This value is larger by a factor of ``n_samples`` compared to\n Eqns. 2.15 and 2.16 in (Zou et al, 2007).\n\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> reg = linear_model.LassoLarsIC(criterion='bic')\n>>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])\nLassoLarsIC(criterion='bic')\n>>> print(reg.coef_)\n[ 0. -1.11...]\n\nNotes\n-----\nThe estimation of the number of degrees of freedom is given by:\n\n\"On the degrees of freedom of the lasso\"\nHui Zou, Trevor Hastie, and Robert Tibshirani\nAnn. Statist. Volume 35, Number 5 (2007), 2173-2192.\n\nhttps://en.wikipedia.org/wiki/Akaike_information_criterion\nhttps://en.wikipedia.org/wiki/Bayesian_information_criterion\n\nSee Also\n--------\nlars_path, LassoLars, LassoLarsCV", + "code": "class LassoLarsIC(LassoLars):\n \"\"\"Lasso model fit with Lars using BIC or AIC for model selection\n\n The optimization objective for Lasso is::\n\n (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\n AIC is the Akaike information criterion and BIC is the Bayes\n Information criterion. Such criteria are useful to select the value\n of the regularization parameter by making a trade-off between the\n goodness of fit and the complexity of the model. A good model should\n explain well the data while being simple.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n criterion : {'bic' , 'aic'}, default='aic'\n The type of criterion to use.\n\n fit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\n verbose : bool or int, default=False\n Sets the verbosity amount.\n\n normalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n precompute : bool, 'auto' or array-like, default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\n max_iter : int, default=500\n Maximum number of iterations to perform. Can be used for\n early stopping.\n\n eps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\n copy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\n positive : bool, default=False\n Restrict coefficients to be >= 0. Be aware that you might want to\n remove fit_intercept which is set True by default.\n Under the positive restriction the model coefficients do not converge\n to the ordinary-least-squares solution for small values of alpha.\n Only coefficients up to the smallest alpha value (``alphas_[alphas_ >\n 0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\n algorithm are typically in congruence with the solution of the\n coordinate descent Lasso estimator.\n As a consequence using LassoLarsIC only makes sense for problems where\n a sparse solution is expected and/or reached.\n\n Attributes\n ----------\n coef_ : array-like of shape (n_features,)\n parameter vector (w in the formulation formula)\n\n intercept_ : float\n independent term in decision function.\n\n alpha_ : float\n the alpha parameter chosen by the information criterion\n\n alphas_ : array-like of shape (n_alphas + 1,) or list of such arrays\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n number of nodes in the path with ``alpha >= alpha_min``, whichever\n is smaller. If a list, it will be of length `n_targets`.\n\n n_iter_ : int\n number of iterations run by lars_path to find the grid of\n alphas.\n\n criterion_ : array-like of shape (n_alphas,)\n The value of the information criteria ('aic', 'bic') across all\n alphas. The alpha which has the smallest information criterion is\n chosen. This value is larger by a factor of ``n_samples`` compared to\n Eqns. 2.15 and 2.16 in (Zou et al, 2007).\n\n\n Examples\n --------\n >>> from sklearn import linear_model\n >>> reg = linear_model.LassoLarsIC(criterion='bic')\n >>> reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])\n LassoLarsIC(criterion='bic')\n >>> print(reg.coef_)\n [ 0. -1.11...]\n\n Notes\n -----\n The estimation of the number of degrees of freedom is given by:\n\n \"On the degrees of freedom of the lasso\"\n Hui Zou, Trevor Hastie, and Robert Tibshirani\n Ann. Statist. Volume 35, Number 5 (2007), 2173-2192.\n\n https://en.wikipedia.org/wiki/Akaike_information_criterion\n https://en.wikipedia.org/wiki/Bayesian_information_criterion\n\n See Also\n --------\n lars_path, LassoLars, LassoLarsCV\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, criterion='aic', *, fit_intercept=True, verbose=False,\n normalize=True, precompute='auto', max_iter=500,\n eps=np.finfo(float).eps, copy_X=True, positive=False):\n self.criterion = criterion\n self.fit_intercept = fit_intercept\n self.positive = positive\n self.max_iter = max_iter\n self.verbose = verbose\n self.normalize = normalize\n self.copy_X = copy_X\n self.precompute = precompute\n self.eps = eps\n self.fit_path = True\n\n def _more_tags(self):\n return {'multioutput': False}\n\n def fit(self, X, y, copy_X=None):\n \"\"\"Fit the model using X, y as training data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n training data.\n\n y : array-like of shape (n_samples,)\n target values. Will be cast to X's dtype if necessary\n\n copy_X : bool, default=None\n If provided, this parameter will override the choice\n of copy_X made at instance creation.\n If ``True``, X will be copied; else, it may be overwritten.\n\n Returns\n -------\n self : object\n returns an instance of self.\n \"\"\"\n if copy_X is None:\n copy_X = self.copy_X\n X, y = self._validate_data(X, y, y_numeric=True)\n\n X, y, Xmean, ymean, Xstd = LinearModel._preprocess_data(\n X, y, self.fit_intercept, self.normalize, copy_X)\n\n Gram = self.precompute\n\n alphas_, _, coef_path_, self.n_iter_ = lars_path(\n X, y, Gram=Gram, copy_X=copy_X, copy_Gram=True, alpha_min=0.0,\n method='lasso', verbose=self.verbose, max_iter=self.max_iter,\n eps=self.eps, return_n_iter=True, positive=self.positive)\n\n n_samples = X.shape[0]\n\n if self.criterion == 'aic':\n K = 2 # AIC\n elif self.criterion == 'bic':\n K = log(n_samples) # BIC\n else:\n raise ValueError('criterion should be either bic or aic')\n\n R = y[:, np.newaxis] - np.dot(X, coef_path_) # residuals\n mean_squared_error = np.mean(R ** 2, axis=0)\n sigma2 = np.var(y)\n\n df = np.zeros(coef_path_.shape[1], dtype=int) # Degrees of freedom\n for k, coef in enumerate(coef_path_.T):\n mask = np.abs(coef) > np.finfo(coef.dtype).eps\n if not np.any(mask):\n continue\n # get the number of degrees of freedom equal to:\n # Xc = X[:, mask]\n # Trace(Xc * inv(Xc.T, Xc) * Xc.T) ie the number of non-zero coefs\n df[k] = np.sum(mask)\n\n self.alphas_ = alphas_\n eps64 = np.finfo('float64').eps\n self.criterion_ = (n_samples * mean_squared_error / (sigma2 + eps64) +\n K * df) # Eqns. 2.15--16 in (Zou et al, 2007)\n n_best = np.argmin(self.criterion_)\n\n self.alpha_ = alphas_[n_best]\n self.coef_ = coef_path_[:, n_best]\n self._set_intercept(Xmean, ymean, Xstd)\n return self", + "instance_attributes": [ + { + "name": "criterion", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "positive", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "normalize", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "copy_X", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "precompute", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "fit_path", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "alphas_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression", + "name": "LogisticRegression", + "qname": "sklearn.linear_model._logistic.LogisticRegression", + "decorators": [], + "superclasses": ["LinearClassifierMixin", "SparseCoefMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/__init__", + "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/fit", + "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/predict_proba", + "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/predict_log_proba" + ], + "is_public": false, + "reexported_by": [], + "description": "Logistic Regression (aka logit, MaxEnt) classifier.\n\nIn the multiclass case, the training algorithm uses the one-vs-rest (OvR)\nscheme if the 'multi_class' option is set to 'ovr', and uses the\ncross-entropy loss if the 'multi_class' option is set to 'multinomial'.\n(Currently the 'multinomial' option is supported only by the 'lbfgs',\n'sag', 'saga' and 'newton-cg' solvers.)\n\nThis class implements regularized logistic regression using the\n'liblinear' library, 'newton-cg', 'sag', 'saga' and 'lbfgs' solvers. **Note\nthat regularization is applied by default**. It can handle both dense\nand sparse input. Use C-ordered arrays or CSR matrices containing 64-bit\nfloats for optimal performance; any other input format will be converted\n(and copied).\n\nThe 'newton-cg', 'sag', and 'lbfgs' solvers support only L2 regularization\nwith primal formulation, or no regularization. The 'liblinear' solver\nsupports both L1 and L2 regularization, with a dual formulation only for\nthe L2 penalty. The Elastic-Net regularization is only supported by the\n'saga' solver.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Logistic Regression (aka logit, MaxEnt) classifier.\n\nIn the multiclass case, the training algorithm uses the one-vs-rest (OvR)\nscheme if the 'multi_class' option is set to 'ovr', and uses the\ncross-entropy loss if the 'multi_class' option is set to 'multinomial'.\n(Currently the 'multinomial' option is supported only by the 'lbfgs',\n'sag', 'saga' and 'newton-cg' solvers.)\n\nThis class implements regularized logistic regression using the\n'liblinear' library, 'newton-cg', 'sag', 'saga' and 'lbfgs' solvers. **Note\nthat regularization is applied by default**. It can handle both dense\nand sparse input. Use C-ordered arrays or CSR matrices containing 64-bit\nfloats for optimal performance; any other input format will be converted\n(and copied).\n\nThe 'newton-cg', 'sag', and 'lbfgs' solvers support only L2 regularization\nwith primal formulation, or no regularization. The 'liblinear' solver\nsupports both L1 and L2 regularization, with a dual formulation only for\nthe L2 penalty. The Elastic-Net regularization is only supported by the\n'saga' solver.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\npenalty : {'l1', 'l2', 'elasticnet', 'none'}, default='l2'\n Used to specify the norm used in the penalization. The 'newton-cg',\n 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\n only supported by the 'saga' solver. If 'none' (not supported by the\n liblinear solver), no regularization is applied.\n\n .. versionadded:: 0.19\n l1 penalty with SAGA solver (allowing 'multinomial' + L1)\n\ndual : bool, default=False\n Dual or primal formulation. Dual formulation is only implemented for\n l2 penalty with liblinear solver. Prefer dual=False when\n n_samples > n_features.\n\ntol : float, default=1e-4\n Tolerance for stopping criteria.\n\nC : float, default=1.0\n Inverse of regularization strength; must be a positive float.\n Like in support vector machines, smaller values specify stronger\n regularization.\n\nfit_intercept : bool, default=True\n Specifies if a constant (a.k.a. bias or intercept) should be\n added to the decision function.\n\nintercept_scaling : float, default=1\n Useful only when the solver 'liblinear' is used\n and self.fit_intercept is set to True. In this case, x becomes\n [x, self.intercept_scaling],\n i.e. a \"synthetic\" feature with constant value equal to\n intercept_scaling is appended to the instance vector.\n The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\n Note! the synthetic feature weight is subject to l1/l2 regularization\n as all other features.\n To lessen the effect of regularization on synthetic feature weight\n (and therefore on the intercept) intercept_scaling has to be increased.\n\nclass_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\n .. versionadded:: 0.17\n *class_weight='balanced'*\n\nrandom_state : int, RandomState instance, default=None\n Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\n data. See :term:`Glossary ` for details.\n\nsolver : {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}, default='lbfgs'\n\n Algorithm to use in the optimization problem.\n\n - For small datasets, 'liblinear' is a good choice, whereas 'sag' and\n 'saga' are faster for large ones.\n - For multiclass problems, only 'newton-cg', 'sag', 'saga' and 'lbfgs'\n handle multinomial loss; 'liblinear' is limited to one-versus-rest\n schemes.\n - 'newton-cg', 'lbfgs', 'sag' and 'saga' handle L2 or no penalty\n - 'liblinear' and 'saga' also handle L1 penalty\n - 'saga' also supports 'elasticnet' penalty\n - 'liblinear' does not support setting ``penalty='none'``\n\n Note that 'sag' and 'saga' fast convergence is only guaranteed on\n features with approximately the same scale. You can\n preprocess the data with a scaler from sklearn.preprocessing.\n\n .. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n .. versionadded:: 0.19\n SAGA solver.\n .. versionchanged:: 0.22\n The default solver changed from 'liblinear' to 'lbfgs' in 0.22.\n\nmax_iter : int, default=100\n Maximum number of iterations taken for the solvers to converge.\n\nmulti_class : {'auto', 'ovr', 'multinomial'}, default='auto'\n If the option chosen is 'ovr', then a binary problem is fit for each\n label. For 'multinomial' the loss minimised is the multinomial loss fit\n across the entire probability distribution, *even when the data is\n binary*. 'multinomial' is unavailable when solver='liblinear'.\n 'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n and otherwise selects 'multinomial'.\n\n .. versionadded:: 0.18\n Stochastic Average Gradient descent solver for 'multinomial' case.\n .. versionchanged:: 0.22\n Default changed from 'ovr' to 'auto' in 0.22.\n\nverbose : int, default=0\n For the liblinear and lbfgs solvers set verbose to any positive\n number for verbosity.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n Useless for liblinear solver. See :term:`the Glossary `.\n\n .. versionadded:: 0.17\n *warm_start* to support *lbfgs*, *newton-cg*, *sag*, *saga* solvers.\n\nn_jobs : int, default=None\n Number of CPU cores used when parallelizing over classes if\n multi_class='ovr'\". This parameter is ignored when the ``solver`` is\n set to 'liblinear' regardless of whether 'multi_class' is specified or\n not. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n context. ``-1`` means using all processors.\n See :term:`Glossary ` for more details.\n\nl1_ratio : float, default=None\n The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\n used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\n to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\n to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\n combination of L1 and L2.\n\nAttributes\n----------\n\nclasses_ : ndarray of shape (n_classes, )\n A list of class labels known to the classifier.\n\ncoef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n Coefficient of the features in the decision function.\n\n `coef_` is of shape (1, n_features) when the given problem is binary.\n In particular, when `multi_class='multinomial'`, `coef_` corresponds\n to outcome 1 (True) and `-coef_` corresponds to outcome 0 (False).\n\nintercept_ : ndarray of shape (1,) or (n_classes,)\n Intercept (a.k.a. bias) added to the decision function.\n\n If `fit_intercept` is set to False, the intercept is set to zero.\n `intercept_` is of shape (1,) when the given problem is binary.\n In particular, when `multi_class='multinomial'`, `intercept_`\n corresponds to outcome 1 (True) and `-intercept_` corresponds to\n outcome 0 (False).\n\nn_iter_ : ndarray of shape (n_classes,) or (1, )\n Actual number of iterations for all classes. If binary or multinomial,\n it returns only 1 element. For liblinear solver, only the maximum\n number of iteration across all classes is given.\n\n .. versionchanged:: 0.20\n\n In SciPy <= 1.0.0 the number of lbfgs iterations may exceed\n ``max_iter``. ``n_iter_`` will now report at most ``max_iter``.\n\nSee Also\n--------\nSGDClassifier : Incrementally trained logistic regression (when given\n the parameter ``loss=\"log\"``).\nLogisticRegressionCV : Logistic regression with built-in cross validation.\n\nNotes\n-----\nThe underlying C implementation uses a random number generator to\nselect features when fitting the model. It is thus not uncommon,\nto have slightly different results for the same input data. If\nthat happens, try with a smaller tol parameter.\n\nPredict output may not match that of standalone liblinear in certain\ncases. See :ref:`differences from liblinear `\nin the narrative documentation.\n\nReferences\n----------\n\nL-BFGS-B -- Software for Large-scale Bound-constrained Optimization\n Ciyou Zhu, Richard Byrd, Jorge Nocedal and Jose Luis Morales.\n http://users.iems.northwestern.edu/~nocedal/lbfgsb.html\n\nLIBLINEAR -- A Library for Large Linear Classification\n https://www.csie.ntu.edu.tw/~cjlin/liblinear/\n\nSAG -- Mark Schmidt, Nicolas Le Roux, and Francis Bach\n Minimizing Finite Sums with the Stochastic Average Gradient\n https://hal.inria.fr/hal-00860051/document\n\nSAGA -- Defazio, A., Bach F. & Lacoste-Julien S. (2014).\n SAGA: A Fast Incremental Gradient Method With Support\n for Non-Strongly Convex Composite Objectives\n https://arxiv.org/abs/1407.0202\n\nHsiang-Fu Yu, Fang-Lan Huang, Chih-Jen Lin (2011). Dual coordinate descent\n methods for logistic regression and maximum entropy models.\n Machine Learning 85(1-2):41-75.\n https://www.csie.ntu.edu.tw/~cjlin/papers/maxent_dual.pdf\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.linear_model import LogisticRegression\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = LogisticRegression(random_state=0).fit(X, y)\n>>> clf.predict(X[:2, :])\narray([0, 0])\n>>> clf.predict_proba(X[:2, :])\narray([[9.8...e-01, 1.8...e-02, 1.4...e-08],\n [9.7...e-01, 2.8...e-02, ...e-08]])\n>>> clf.score(X, y)\n0.97...", + "code": "class LogisticRegression(LinearClassifierMixin,\n SparseCoefMixin,\n BaseEstimator):\n \"\"\"\n Logistic Regression (aka logit, MaxEnt) classifier.\n\n In the multiclass case, the training algorithm uses the one-vs-rest (OvR)\n scheme if the 'multi_class' option is set to 'ovr', and uses the\n cross-entropy loss if the 'multi_class' option is set to 'multinomial'.\n (Currently the 'multinomial' option is supported only by the 'lbfgs',\n 'sag', 'saga' and 'newton-cg' solvers.)\n\n This class implements regularized logistic regression using the\n 'liblinear' library, 'newton-cg', 'sag', 'saga' and 'lbfgs' solvers. **Note\n that regularization is applied by default**. It can handle both dense\n and sparse input. Use C-ordered arrays or CSR matrices containing 64-bit\n floats for optimal performance; any other input format will be converted\n (and copied).\n\n The 'newton-cg', 'sag', and 'lbfgs' solvers support only L2 regularization\n with primal formulation, or no regularization. The 'liblinear' solver\n supports both L1 and L2 regularization, with a dual formulation only for\n the L2 penalty. The Elastic-Net regularization is only supported by the\n 'saga' solver.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n penalty : {'l1', 'l2', 'elasticnet', 'none'}, default='l2'\n Used to specify the norm used in the penalization. The 'newton-cg',\n 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\n only supported by the 'saga' solver. If 'none' (not supported by the\n liblinear solver), no regularization is applied.\n\n .. versionadded:: 0.19\n l1 penalty with SAGA solver (allowing 'multinomial' + L1)\n\n dual : bool, default=False\n Dual or primal formulation. Dual formulation is only implemented for\n l2 penalty with liblinear solver. Prefer dual=False when\n n_samples > n_features.\n\n tol : float, default=1e-4\n Tolerance for stopping criteria.\n\n C : float, default=1.0\n Inverse of regularization strength; must be a positive float.\n Like in support vector machines, smaller values specify stronger\n regularization.\n\n fit_intercept : bool, default=True\n Specifies if a constant (a.k.a. bias or intercept) should be\n added to the decision function.\n\n intercept_scaling : float, default=1\n Useful only when the solver 'liblinear' is used\n and self.fit_intercept is set to True. In this case, x becomes\n [x, self.intercept_scaling],\n i.e. a \"synthetic\" feature with constant value equal to\n intercept_scaling is appended to the instance vector.\n The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\n Note! the synthetic feature weight is subject to l1/l2 regularization\n as all other features.\n To lessen the effect of regularization on synthetic feature weight\n (and therefore on the intercept) intercept_scaling has to be increased.\n\n class_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\n .. versionadded:: 0.17\n *class_weight='balanced'*\n\n random_state : int, RandomState instance, default=None\n Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\n data. See :term:`Glossary ` for details.\n\n solver : {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}, \\\n default='lbfgs'\n\n Algorithm to use in the optimization problem.\n\n - For small datasets, 'liblinear' is a good choice, whereas 'sag' and\n 'saga' are faster for large ones.\n - For multiclass problems, only 'newton-cg', 'sag', 'saga' and 'lbfgs'\n handle multinomial loss; 'liblinear' is limited to one-versus-rest\n schemes.\n - 'newton-cg', 'lbfgs', 'sag' and 'saga' handle L2 or no penalty\n - 'liblinear' and 'saga' also handle L1 penalty\n - 'saga' also supports 'elasticnet' penalty\n - 'liblinear' does not support setting ``penalty='none'``\n\n Note that 'sag' and 'saga' fast convergence is only guaranteed on\n features with approximately the same scale. You can\n preprocess the data with a scaler from sklearn.preprocessing.\n\n .. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n .. versionadded:: 0.19\n SAGA solver.\n .. versionchanged:: 0.22\n The default solver changed from 'liblinear' to 'lbfgs' in 0.22.\n\n max_iter : int, default=100\n Maximum number of iterations taken for the solvers to converge.\n\n multi_class : {'auto', 'ovr', 'multinomial'}, default='auto'\n If the option chosen is 'ovr', then a binary problem is fit for each\n label. For 'multinomial' the loss minimised is the multinomial loss fit\n across the entire probability distribution, *even when the data is\n binary*. 'multinomial' is unavailable when solver='liblinear'.\n 'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n and otherwise selects 'multinomial'.\n\n .. versionadded:: 0.18\n Stochastic Average Gradient descent solver for 'multinomial' case.\n .. versionchanged:: 0.22\n Default changed from 'ovr' to 'auto' in 0.22.\n\n verbose : int, default=0\n For the liblinear and lbfgs solvers set verbose to any positive\n number for verbosity.\n\n warm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n Useless for liblinear solver. See :term:`the Glossary `.\n\n .. versionadded:: 0.17\n *warm_start* to support *lbfgs*, *newton-cg*, *sag*, *saga* solvers.\n\n n_jobs : int, default=None\n Number of CPU cores used when parallelizing over classes if\n multi_class='ovr'\". This parameter is ignored when the ``solver`` is\n set to 'liblinear' regardless of whether 'multi_class' is specified or\n not. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n context. ``-1`` means using all processors.\n See :term:`Glossary ` for more details.\n\n l1_ratio : float, default=None\n The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\n used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\n to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\n to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\n combination of L1 and L2.\n\n Attributes\n ----------\n\n classes_ : ndarray of shape (n_classes, )\n A list of class labels known to the classifier.\n\n coef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n Coefficient of the features in the decision function.\n\n `coef_` is of shape (1, n_features) when the given problem is binary.\n In particular, when `multi_class='multinomial'`, `coef_` corresponds\n to outcome 1 (True) and `-coef_` corresponds to outcome 0 (False).\n\n intercept_ : ndarray of shape (1,) or (n_classes,)\n Intercept (a.k.a. bias) added to the decision function.\n\n If `fit_intercept` is set to False, the intercept is set to zero.\n `intercept_` is of shape (1,) when the given problem is binary.\n In particular, when `multi_class='multinomial'`, `intercept_`\n corresponds to outcome 1 (True) and `-intercept_` corresponds to\n outcome 0 (False).\n\n n_iter_ : ndarray of shape (n_classes,) or (1, )\n Actual number of iterations for all classes. If binary or multinomial,\n it returns only 1 element. For liblinear solver, only the maximum\n number of iteration across all classes is given.\n\n .. versionchanged:: 0.20\n\n In SciPy <= 1.0.0 the number of lbfgs iterations may exceed\n ``max_iter``. ``n_iter_`` will now report at most ``max_iter``.\n\n See Also\n --------\n SGDClassifier : Incrementally trained logistic regression (when given\n the parameter ``loss=\"log\"``).\n LogisticRegressionCV : Logistic regression with built-in cross validation.\n\n Notes\n -----\n The underlying C implementation uses a random number generator to\n select features when fitting the model. It is thus not uncommon,\n to have slightly different results for the same input data. If\n that happens, try with a smaller tol parameter.\n\n Predict output may not match that of standalone liblinear in certain\n cases. See :ref:`differences from liblinear `\n in the narrative documentation.\n\n References\n ----------\n\n L-BFGS-B -- Software for Large-scale Bound-constrained Optimization\n Ciyou Zhu, Richard Byrd, Jorge Nocedal and Jose Luis Morales.\n http://users.iems.northwestern.edu/~nocedal/lbfgsb.html\n\n LIBLINEAR -- A Library for Large Linear Classification\n https://www.csie.ntu.edu.tw/~cjlin/liblinear/\n\n SAG -- Mark Schmidt, Nicolas Le Roux, and Francis Bach\n Minimizing Finite Sums with the Stochastic Average Gradient\n https://hal.inria.fr/hal-00860051/document\n\n SAGA -- Defazio, A., Bach F. & Lacoste-Julien S. (2014).\n SAGA: A Fast Incremental Gradient Method With Support\n for Non-Strongly Convex Composite Objectives\n https://arxiv.org/abs/1407.0202\n\n Hsiang-Fu Yu, Fang-Lan Huang, Chih-Jen Lin (2011). Dual coordinate descent\n methods for logistic regression and maximum entropy models.\n Machine Learning 85(1-2):41-75.\n https://www.csie.ntu.edu.tw/~cjlin/papers/maxent_dual.pdf\n\n Examples\n --------\n >>> from sklearn.datasets import load_iris\n >>> from sklearn.linear_model import LogisticRegression\n >>> X, y = load_iris(return_X_y=True)\n >>> clf = LogisticRegression(random_state=0).fit(X, y)\n >>> clf.predict(X[:2, :])\n array([0, 0])\n >>> clf.predict_proba(X[:2, :])\n array([[9.8...e-01, 1.8...e-02, 1.4...e-08],\n [9.7...e-01, 2.8...e-02, ...e-08]])\n >>> clf.score(X, y)\n 0.97...\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, penalty='l2', *, dual=False, tol=1e-4, C=1.0,\n fit_intercept=True, intercept_scaling=1, class_weight=None,\n random_state=None, solver='lbfgs', max_iter=100,\n multi_class='auto', verbose=0, warm_start=False, n_jobs=None,\n l1_ratio=None):\n\n self.penalty = penalty\n self.dual = dual\n self.tol = tol\n self.C = C\n self.fit_intercept = fit_intercept\n self.intercept_scaling = intercept_scaling\n self.class_weight = class_weight\n self.random_state = random_state\n self.solver = solver\n self.max_iter = max_iter\n self.multi_class = multi_class\n self.verbose = verbose\n self.warm_start = warm_start\n self.n_jobs = n_jobs\n self.l1_ratio = l1_ratio\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"\n Fit the model according to the given training data.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target vector relative to X.\n\n sample_weight : array-like of shape (n_samples,) default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\n .. versionadded:: 0.17\n *sample_weight* support to LogisticRegression.\n\n Returns\n -------\n self\n Fitted estimator.\n\n Notes\n -----\n The SAGA solver supports both float64 and float32 bit arrays.\n \"\"\"\n solver = _check_solver(self.solver, self.penalty, self.dual)\n\n if not isinstance(self.C, numbers.Number) or self.C < 0:\n raise ValueError(\"Penalty term must be positive; got (C=%r)\"\n % self.C)\n if self.penalty == 'elasticnet':\n if (not isinstance(self.l1_ratio, numbers.Number) or\n self.l1_ratio < 0 or self.l1_ratio > 1):\n raise ValueError(\"l1_ratio must be between 0 and 1;\"\n \" got (l1_ratio=%r)\" % self.l1_ratio)\n elif self.l1_ratio is not None:\n warnings.warn(\"l1_ratio parameter is only used when penalty is \"\n \"'elasticnet'. Got \"\n \"(penalty={})\".format(self.penalty))\n if self.penalty == 'none':\n if self.C != 1.0: # default values\n warnings.warn(\n \"Setting penalty='none' will ignore the C and l1_ratio \"\n \"parameters\"\n )\n # Note that check for l1_ratio is done right above\n C_ = np.inf\n penalty = 'l2'\n else:\n C_ = self.C\n penalty = self.penalty\n if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0:\n raise ValueError(\"Maximum number of iteration must be positive;\"\n \" got (max_iter=%r)\" % self.max_iter)\n if not isinstance(self.tol, numbers.Number) or self.tol < 0:\n raise ValueError(\"Tolerance for stopping criteria must be \"\n \"positive; got (tol=%r)\" % self.tol)\n\n if solver == 'lbfgs':\n _dtype = np.float64\n else:\n _dtype = [np.float64, np.float32]\n\n X, y = self._validate_data(X, y, accept_sparse='csr', dtype=_dtype,\n order=\"C\",\n accept_large_sparse=solver != 'liblinear')\n check_classification_targets(y)\n self.classes_ = np.unique(y)\n\n multi_class = _check_multi_class(self.multi_class, solver,\n len(self.classes_))\n\n if solver == 'liblinear':\n if effective_n_jobs(self.n_jobs) != 1:\n warnings.warn(\"'n_jobs' > 1 does not have any effect when\"\n \" 'solver' is set to 'liblinear'. Got 'n_jobs'\"\n \" = {}.\".format(effective_n_jobs(self.n_jobs)))\n self.coef_, self.intercept_, n_iter_ = _fit_liblinear(\n X, y, self.C, self.fit_intercept, self.intercept_scaling,\n self.class_weight, self.penalty, self.dual, self.verbose,\n self.max_iter, self.tol, self.random_state,\n sample_weight=sample_weight)\n self.n_iter_ = np.array([n_iter_])\n return self\n\n if solver in ['sag', 'saga']:\n max_squared_sum = row_norms(X, squared=True).max()\n else:\n max_squared_sum = None\n\n n_classes = len(self.classes_)\n classes_ = self.classes_\n if n_classes < 2:\n raise ValueError(\"This solver needs samples of at least 2 classes\"\n \" in the data, but the data contains only one\"\n \" class: %r\" % classes_[0])\n\n if len(self.classes_) == 2:\n n_classes = 1\n classes_ = classes_[1:]\n\n if self.warm_start:\n warm_start_coef = getattr(self, 'coef_', None)\n else:\n warm_start_coef = None\n if warm_start_coef is not None and self.fit_intercept:\n warm_start_coef = np.append(warm_start_coef,\n self.intercept_[:, np.newaxis],\n axis=1)\n\n # Hack so that we iterate only once for the multinomial case.\n if multi_class == 'multinomial':\n classes_ = [None]\n warm_start_coef = [warm_start_coef]\n if warm_start_coef is None:\n warm_start_coef = [None] * n_classes\n\n path_func = delayed(_logistic_regression_path)\n\n # The SAG solver releases the GIL so it's more efficient to use\n # threads for this solver.\n if solver in ['sag', 'saga']:\n prefer = 'threads'\n else:\n prefer = 'processes'\n fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,\n **_joblib_parallel_args(prefer=prefer))(\n path_func(X, y, pos_class=class_, Cs=[C_],\n l1_ratio=self.l1_ratio, fit_intercept=self.fit_intercept,\n tol=self.tol, verbose=self.verbose, solver=solver,\n multi_class=multi_class, max_iter=self.max_iter,\n class_weight=self.class_weight, check_input=False,\n random_state=self.random_state, coef=warm_start_coef_,\n penalty=penalty, max_squared_sum=max_squared_sum,\n sample_weight=sample_weight)\n for class_, warm_start_coef_ in zip(classes_, warm_start_coef))\n\n fold_coefs_, _, n_iter_ = zip(*fold_coefs_)\n self.n_iter_ = np.asarray(n_iter_, dtype=np.int32)[:, 0]\n\n n_features = X.shape[1]\n if multi_class == 'multinomial':\n self.coef_ = fold_coefs_[0][0]\n else:\n self.coef_ = np.asarray(fold_coefs_)\n self.coef_ = self.coef_.reshape(n_classes, n_features +\n int(self.fit_intercept))\n\n if self.fit_intercept:\n self.intercept_ = self.coef_[:, -1]\n self.coef_ = self.coef_[:, :-1]\n else:\n self.intercept_ = np.zeros(n_classes)\n\n return self\n\n def predict_proba(self, X):\n \"\"\"\n Probability estimates.\n\n The returned estimates for all classes are ordered by the\n label of classes.\n\n For a multi_class problem, if multi_class is set to be \"multinomial\"\n the softmax function is used to find the predicted probability of\n each class.\n Else use a one-vs-rest approach, i.e calculate the probability\n of each class assuming it to be positive using the logistic function.\n and normalize these values across all the classes.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Vector to be scored, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\n Returns\n -------\n T : array-like of shape (n_samples, n_classes)\n Returns the probability of the sample for each class in the model,\n where classes are ordered as they are in ``self.classes_``.\n \"\"\"\n check_is_fitted(self)\n\n ovr = (self.multi_class in [\"ovr\", \"warn\"] or\n (self.multi_class == 'auto' and (self.classes_.size <= 2 or\n self.solver == 'liblinear')))\n if ovr:\n return super()._predict_proba_lr(X)\n else:\n decision = self.decision_function(X)\n if decision.ndim == 1:\n # Workaround for multi_class=\"multinomial\" and binary outcomes\n # which requires softmax prediction with only a 1D decision.\n decision_2d = np.c_[-decision, decision]\n else:\n decision_2d = decision\n return softmax(decision_2d, copy=False)\n\n def predict_log_proba(self, X):\n \"\"\"\n Predict logarithm of probability estimates.\n\n The returned estimates for all classes are ordered by the\n label of classes.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Vector to be scored, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\n Returns\n -------\n T : array-like of shape (n_samples, n_classes)\n Returns the log-probability of the sample for each class in the\n model, where classes are ordered as they are in ``self.classes_``.\n \"\"\"\n return np.log(self.predict_proba(X))", + "instance_attributes": [ + { + "name": "classes_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "penalty", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "dual", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "C", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "intercept_scaling", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "solver", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "multi_class", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "warm_start", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "n_iter_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV", + "name": "LogisticRegressionCV", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV", + "decorators": [], + "superclasses": ["LogisticRegression", "LinearClassifierMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/__init__", + "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/fit", + "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/score", + "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Logistic Regression CV (aka logit, MaxEnt) classifier.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThis class implements logistic regression using liblinear, newton-cg, sag\nof lbfgs optimizer. The newton-cg, sag and lbfgs solvers support only L2\nregularization with primal formulation. The liblinear solver supports both\nL1 and L2 regularization, with a dual formulation only for the L2 penalty.\nElastic-Net penalty is only supported by the saga solver.\n\nFor the grid of `Cs` values and `l1_ratios` values, the best hyperparameter\nis selected by the cross-validator\n:class:`~sklearn.model_selection.StratifiedKFold`, but it can be changed\nusing the :term:`cv` parameter. The 'newton-cg', 'sag', 'saga' and 'lbfgs'\nsolvers can warm-start the coefficients (see :term:`Glossary`).\n\nRead more in the :ref:`User Guide `.", + "docstring": "Logistic Regression CV (aka logit, MaxEnt) classifier.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThis class implements logistic regression using liblinear, newton-cg, sag\nof lbfgs optimizer. The newton-cg, sag and lbfgs solvers support only L2\nregularization with primal formulation. The liblinear solver supports both\nL1 and L2 regularization, with a dual formulation only for the L2 penalty.\nElastic-Net penalty is only supported by the saga solver.\n\nFor the grid of `Cs` values and `l1_ratios` values, the best hyperparameter\nis selected by the cross-validator\n:class:`~sklearn.model_selection.StratifiedKFold`, but it can be changed\nusing the :term:`cv` parameter. The 'newton-cg', 'sag', 'saga' and 'lbfgs'\nsolvers can warm-start the coefficients (see :term:`Glossary`).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nCs : int or list of floats, default=10\n Each of the values in Cs describes the inverse of regularization\n strength. If Cs is as an int, then a grid of Cs values are chosen\n in a logarithmic scale between 1e-4 and 1e4.\n Like in support vector machines, smaller values specify stronger\n regularization.\n\nfit_intercept : bool, default=True\n Specifies if a constant (a.k.a. bias or intercept) should be\n added to the decision function.\n\ncv : int or cross-validation generator, default=None\n The default cross-validation generator used is Stratified K-Folds.\n If an integer is provided, then it is the number of folds used.\n See the module :mod:`sklearn.model_selection` module for the\n list of possible cross-validation objects.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\ndual : bool, default=False\n Dual or primal formulation. Dual formulation is only implemented for\n l2 penalty with liblinear solver. Prefer dual=False when\n n_samples > n_features.\n\npenalty : {'l1', 'l2', 'elasticnet'}, default='l2'\n Used to specify the norm used in the penalization. The 'newton-cg',\n 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\n only supported by the 'saga' solver.\n\nscoring : str or callable, default=None\n A string (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``. For a list of scoring functions\n that can be used, look at :mod:`sklearn.metrics`. The\n default scoring option used is 'accuracy'.\n\nsolver : {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}, default='lbfgs'\n\n Algorithm to use in the optimization problem.\n\n - For small datasets, 'liblinear' is a good choice, whereas 'sag' and\n 'saga' are faster for large ones.\n - For multiclass problems, only 'newton-cg', 'sag', 'saga' and 'lbfgs'\n handle multinomial loss; 'liblinear' is limited to one-versus-rest\n schemes.\n - 'newton-cg', 'lbfgs' and 'sag' only handle L2 penalty, whereas\n 'liblinear' and 'saga' handle L1 penalty.\n - 'liblinear' might be slower in LogisticRegressionCV because it does\n not handle warm-starting.\n\n Note that 'sag' and 'saga' fast convergence is only guaranteed on\n features with approximately the same scale. You can preprocess the data\n with a scaler from sklearn.preprocessing.\n\n .. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n .. versionadded:: 0.19\n SAGA solver.\n\ntol : float, default=1e-4\n Tolerance for stopping criteria.\n\nmax_iter : int, default=100\n Maximum number of iterations of the optimization algorithm.\n\nclass_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\n .. versionadded:: 0.17\n class_weight == 'balanced'\n\nn_jobs : int, default=None\n Number of CPU cores used during the cross-validation loop.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : int, default=0\n For the 'liblinear', 'sag' and 'lbfgs' solvers set verbose to any\n positive number for verbosity.\n\nrefit : bool, default=True\n If set to True, the scores are averaged across all folds, and the\n coefs and the C that corresponds to the best score is taken, and a\n final refit is done using these parameters.\n Otherwise the coefs, intercepts and C that correspond to the\n best scores across folds are averaged.\n\nintercept_scaling : float, default=1\n Useful only when the solver 'liblinear' is used\n and self.fit_intercept is set to True. In this case, x becomes\n [x, self.intercept_scaling],\n i.e. a \"synthetic\" feature with constant value equal to\n intercept_scaling is appended to the instance vector.\n The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\n Note! the synthetic feature weight is subject to l1/l2 regularization\n as all other features.\n To lessen the effect of regularization on synthetic feature weight\n (and therefore on the intercept) intercept_scaling has to be increased.\n\nmulti_class : {'auto, 'ovr', 'multinomial'}, default='auto'\n If the option chosen is 'ovr', then a binary problem is fit for each\n label. For 'multinomial' the loss minimised is the multinomial loss fit\n across the entire probability distribution, *even when the data is\n binary*. 'multinomial' is unavailable when solver='liblinear'.\n 'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n and otherwise selects 'multinomial'.\n\n .. versionadded:: 0.18\n Stochastic Average Gradient descent solver for 'multinomial' case.\n .. versionchanged:: 0.22\n Default changed from 'ovr' to 'auto' in 0.22.\n\nrandom_state : int, RandomState instance, default=None\n Used when `solver='sag'`, 'saga' or 'liblinear' to shuffle the data.\n Note that this only applies to the solver and not the cross-validation\n generator. See :term:`Glossary ` for details.\n\nl1_ratios : list of float, default=None\n The list of Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``.\n Only used if ``penalty='elasticnet'``. A value of 0 is equivalent to\n using ``penalty='l2'``, while 1 is equivalent to using\n ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a combination\n of L1 and L2.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes, )\n A list of class labels known to the classifier.\n\ncoef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n Coefficient of the features in the decision function.\n\n `coef_` is of shape (1, n_features) when the given problem\n is binary.\n\nintercept_ : ndarray of shape (1,) or (n_classes,)\n Intercept (a.k.a. bias) added to the decision function.\n\n If `fit_intercept` is set to False, the intercept is set to zero.\n `intercept_` is of shape(1,) when the problem is binary.\n\nCs_ : ndarray of shape (n_cs)\n Array of C i.e. inverse of regularization parameter values used\n for cross-validation.\n\nl1_ratios_ : ndarray of shape (n_l1_ratios)\n Array of l1_ratios used for cross-validation. If no l1_ratio is used\n (i.e. penalty is not 'elasticnet'), this is set to ``[None]``\n\ncoefs_paths_ : ndarray of shape (n_folds, n_cs, n_features) or (n_folds, n_cs, n_features + 1)\n dict with classes as the keys, and the path of coefficients obtained\n during cross-validating across each fold and then across each Cs\n after doing an OvR for the corresponding class as values.\n If the 'multi_class' option is set to 'multinomial', then\n the coefs_paths are the coefficients corresponding to each class.\n Each dict value has shape ``(n_folds, n_cs, n_features)`` or\n ``(n_folds, n_cs, n_features + 1)`` depending on whether the\n intercept is fit or not. If ``penalty='elasticnet'``, the shape is\n ``(n_folds, n_cs, n_l1_ratios_, n_features)`` or\n ``(n_folds, n_cs, n_l1_ratios_, n_features + 1)``.\n\nscores_ : dict\n dict with classes as the keys, and the values as the\n grid of scores obtained during cross-validating each fold, after doing\n an OvR for the corresponding class. If the 'multi_class' option\n given is 'multinomial' then the same scores are repeated across\n all classes, since this is the multinomial class. Each dict value\n has shape ``(n_folds, n_cs`` or ``(n_folds, n_cs, n_l1_ratios)`` if\n ``penalty='elasticnet'``.\n\nC_ : ndarray of shape (n_classes,) or (n_classes - 1,)\n Array of C that maps to the best scores across every class. If refit is\n set to False, then for each class, the best C is the average of the\n C's that correspond to the best scores for each fold.\n `C_` is of shape(n_classes,) when the problem is binary.\n\nl1_ratio_ : ndarray of shape (n_classes,) or (n_classes - 1,)\n Array of l1_ratio that maps to the best scores across every class. If\n refit is set to False, then for each class, the best l1_ratio is the\n average of the l1_ratio's that correspond to the best scores for each\n fold. `l1_ratio_` is of shape(n_classes,) when the problem is binary.\n\nn_iter_ : ndarray of shape (n_classes, n_folds, n_cs) or (1, n_folds, n_cs)\n Actual number of iterations for all classes, folds and Cs.\n In the binary or multinomial cases, the first dimension is equal to 1.\n If ``penalty='elasticnet'``, the shape is ``(n_classes, n_folds,\n n_cs, n_l1_ratios)`` or ``(1, n_folds, n_cs, n_l1_ratios)``.\n\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.linear_model import LogisticRegressionCV\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = LogisticRegressionCV(cv=5, random_state=0).fit(X, y)\n>>> clf.predict(X[:2, :])\narray([0, 0])\n>>> clf.predict_proba(X[:2, :]).shape\n(2, 3)\n>>> clf.score(X, y)\n0.98...\n\nSee Also\n--------\nLogisticRegression", + "code": "class LogisticRegressionCV(LogisticRegression,\n LinearClassifierMixin,\n BaseEstimator):\n \"\"\"Logistic Regression CV (aka logit, MaxEnt) classifier.\n\n See glossary entry for :term:`cross-validation estimator`.\n\n This class implements logistic regression using liblinear, newton-cg, sag\n of lbfgs optimizer. The newton-cg, sag and lbfgs solvers support only L2\n regularization with primal formulation. The liblinear solver supports both\n L1 and L2 regularization, with a dual formulation only for the L2 penalty.\n Elastic-Net penalty is only supported by the saga solver.\n\n For the grid of `Cs` values and `l1_ratios` values, the best hyperparameter\n is selected by the cross-validator\n :class:`~sklearn.model_selection.StratifiedKFold`, but it can be changed\n using the :term:`cv` parameter. The 'newton-cg', 'sag', 'saga' and 'lbfgs'\n solvers can warm-start the coefficients (see :term:`Glossary`).\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n Cs : int or list of floats, default=10\n Each of the values in Cs describes the inverse of regularization\n strength. If Cs is as an int, then a grid of Cs values are chosen\n in a logarithmic scale between 1e-4 and 1e4.\n Like in support vector machines, smaller values specify stronger\n regularization.\n\n fit_intercept : bool, default=True\n Specifies if a constant (a.k.a. bias or intercept) should be\n added to the decision function.\n\n cv : int or cross-validation generator, default=None\n The default cross-validation generator used is Stratified K-Folds.\n If an integer is provided, then it is the number of folds used.\n See the module :mod:`sklearn.model_selection` module for the\n list of possible cross-validation objects.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\n dual : bool, default=False\n Dual or primal formulation. Dual formulation is only implemented for\n l2 penalty with liblinear solver. Prefer dual=False when\n n_samples > n_features.\n\n penalty : {'l1', 'l2', 'elasticnet'}, default='l2'\n Used to specify the norm used in the penalization. The 'newton-cg',\n 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\n only supported by the 'saga' solver.\n\n scoring : str or callable, default=None\n A string (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``. For a list of scoring functions\n that can be used, look at :mod:`sklearn.metrics`. The\n default scoring option used is 'accuracy'.\n\n solver : {'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}, \\\n default='lbfgs'\n\n Algorithm to use in the optimization problem.\n\n - For small datasets, 'liblinear' is a good choice, whereas 'sag' and\n 'saga' are faster for large ones.\n - For multiclass problems, only 'newton-cg', 'sag', 'saga' and 'lbfgs'\n handle multinomial loss; 'liblinear' is limited to one-versus-rest\n schemes.\n - 'newton-cg', 'lbfgs' and 'sag' only handle L2 penalty, whereas\n 'liblinear' and 'saga' handle L1 penalty.\n - 'liblinear' might be slower in LogisticRegressionCV because it does\n not handle warm-starting.\n\n Note that 'sag' and 'saga' fast convergence is only guaranteed on\n features with approximately the same scale. You can preprocess the data\n with a scaler from sklearn.preprocessing.\n\n .. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n .. versionadded:: 0.19\n SAGA solver.\n\n tol : float, default=1e-4\n Tolerance for stopping criteria.\n\n max_iter : int, default=100\n Maximum number of iterations of the optimization algorithm.\n\n class_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\n .. versionadded:: 0.17\n class_weight == 'balanced'\n\n n_jobs : int, default=None\n Number of CPU cores used during the cross-validation loop.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n verbose : int, default=0\n For the 'liblinear', 'sag' and 'lbfgs' solvers set verbose to any\n positive number for verbosity.\n\n refit : bool, default=True\n If set to True, the scores are averaged across all folds, and the\n coefs and the C that corresponds to the best score is taken, and a\n final refit is done using these parameters.\n Otherwise the coefs, intercepts and C that correspond to the\n best scores across folds are averaged.\n\n intercept_scaling : float, default=1\n Useful only when the solver 'liblinear' is used\n and self.fit_intercept is set to True. In this case, x becomes\n [x, self.intercept_scaling],\n i.e. a \"synthetic\" feature with constant value equal to\n intercept_scaling is appended to the instance vector.\n The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\n Note! the synthetic feature weight is subject to l1/l2 regularization\n as all other features.\n To lessen the effect of regularization on synthetic feature weight\n (and therefore on the intercept) intercept_scaling has to be increased.\n\n multi_class : {'auto, 'ovr', 'multinomial'}, default='auto'\n If the option chosen is 'ovr', then a binary problem is fit for each\n label. For 'multinomial' the loss minimised is the multinomial loss fit\n across the entire probability distribution, *even when the data is\n binary*. 'multinomial' is unavailable when solver='liblinear'.\n 'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n and otherwise selects 'multinomial'.\n\n .. versionadded:: 0.18\n Stochastic Average Gradient descent solver for 'multinomial' case.\n .. versionchanged:: 0.22\n Default changed from 'ovr' to 'auto' in 0.22.\n\n random_state : int, RandomState instance, default=None\n Used when `solver='sag'`, 'saga' or 'liblinear' to shuffle the data.\n Note that this only applies to the solver and not the cross-validation\n generator. See :term:`Glossary ` for details.\n\n l1_ratios : list of float, default=None\n The list of Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``.\n Only used if ``penalty='elasticnet'``. A value of 0 is equivalent to\n using ``penalty='l2'``, while 1 is equivalent to using\n ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a combination\n of L1 and L2.\n\n Attributes\n ----------\n classes_ : ndarray of shape (n_classes, )\n A list of class labels known to the classifier.\n\n coef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n Coefficient of the features in the decision function.\n\n `coef_` is of shape (1, n_features) when the given problem\n is binary.\n\n intercept_ : ndarray of shape (1,) or (n_classes,)\n Intercept (a.k.a. bias) added to the decision function.\n\n If `fit_intercept` is set to False, the intercept is set to zero.\n `intercept_` is of shape(1,) when the problem is binary.\n\n Cs_ : ndarray of shape (n_cs)\n Array of C i.e. inverse of regularization parameter values used\n for cross-validation.\n\n l1_ratios_ : ndarray of shape (n_l1_ratios)\n Array of l1_ratios used for cross-validation. If no l1_ratio is used\n (i.e. penalty is not 'elasticnet'), this is set to ``[None]``\n\n coefs_paths_ : ndarray of shape (n_folds, n_cs, n_features) or \\\n (n_folds, n_cs, n_features + 1)\n dict with classes as the keys, and the path of coefficients obtained\n during cross-validating across each fold and then across each Cs\n after doing an OvR for the corresponding class as values.\n If the 'multi_class' option is set to 'multinomial', then\n the coefs_paths are the coefficients corresponding to each class.\n Each dict value has shape ``(n_folds, n_cs, n_features)`` or\n ``(n_folds, n_cs, n_features + 1)`` depending on whether the\n intercept is fit or not. If ``penalty='elasticnet'``, the shape is\n ``(n_folds, n_cs, n_l1_ratios_, n_features)`` or\n ``(n_folds, n_cs, n_l1_ratios_, n_features + 1)``.\n\n scores_ : dict\n dict with classes as the keys, and the values as the\n grid of scores obtained during cross-validating each fold, after doing\n an OvR for the corresponding class. If the 'multi_class' option\n given is 'multinomial' then the same scores are repeated across\n all classes, since this is the multinomial class. Each dict value\n has shape ``(n_folds, n_cs`` or ``(n_folds, n_cs, n_l1_ratios)`` if\n ``penalty='elasticnet'``.\n\n C_ : ndarray of shape (n_classes,) or (n_classes - 1,)\n Array of C that maps to the best scores across every class. If refit is\n set to False, then for each class, the best C is the average of the\n C's that correspond to the best scores for each fold.\n `C_` is of shape(n_classes,) when the problem is binary.\n\n l1_ratio_ : ndarray of shape (n_classes,) or (n_classes - 1,)\n Array of l1_ratio that maps to the best scores across every class. If\n refit is set to False, then for each class, the best l1_ratio is the\n average of the l1_ratio's that correspond to the best scores for each\n fold. `l1_ratio_` is of shape(n_classes,) when the problem is binary.\n\n n_iter_ : ndarray of shape (n_classes, n_folds, n_cs) or (1, n_folds, n_cs)\n Actual number of iterations for all classes, folds and Cs.\n In the binary or multinomial cases, the first dimension is equal to 1.\n If ``penalty='elasticnet'``, the shape is ``(n_classes, n_folds,\n n_cs, n_l1_ratios)`` or ``(1, n_folds, n_cs, n_l1_ratios)``.\n\n\n Examples\n --------\n >>> from sklearn.datasets import load_iris\n >>> from sklearn.linear_model import LogisticRegressionCV\n >>> X, y = load_iris(return_X_y=True)\n >>> clf = LogisticRegressionCV(cv=5, random_state=0).fit(X, y)\n >>> clf.predict(X[:2, :])\n array([0, 0])\n >>> clf.predict_proba(X[:2, :]).shape\n (2, 3)\n >>> clf.score(X, y)\n 0.98...\n\n See Also\n --------\n LogisticRegression\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, Cs=10, fit_intercept=True, cv=None, dual=False,\n penalty='l2', scoring=None, solver='lbfgs', tol=1e-4,\n max_iter=100, class_weight=None, n_jobs=None, verbose=0,\n refit=True, intercept_scaling=1., multi_class='auto',\n random_state=None, l1_ratios=None):\n self.Cs = Cs\n self.fit_intercept = fit_intercept\n self.cv = cv\n self.dual = dual\n self.penalty = penalty\n self.scoring = scoring\n self.tol = tol\n self.max_iter = max_iter\n self.class_weight = class_weight\n self.n_jobs = n_jobs\n self.verbose = verbose\n self.solver = solver\n self.refit = refit\n self.intercept_scaling = intercept_scaling\n self.multi_class = multi_class\n self.random_state = random_state\n self.l1_ratios = l1_ratios\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the model according to the given training data.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target vector relative to X.\n\n sample_weight : array-like of shape (n_samples,) default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\n Returns\n -------\n self : object\n \"\"\"\n solver = _check_solver(self.solver, self.penalty, self.dual)\n\n if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0:\n raise ValueError(\"Maximum number of iteration must be positive;\"\n \" got (max_iter=%r)\" % self.max_iter)\n if not isinstance(self.tol, numbers.Number) or self.tol < 0:\n raise ValueError(\"Tolerance for stopping criteria must be \"\n \"positive; got (tol=%r)\" % self.tol)\n if self.penalty == 'elasticnet':\n if self.l1_ratios is None or len(self.l1_ratios) == 0 or any(\n (not isinstance(l1_ratio, numbers.Number) or l1_ratio < 0\n or l1_ratio > 1) for l1_ratio in self.l1_ratios):\n raise ValueError(\"l1_ratios must be a list of numbers between \"\n \"0 and 1; got (l1_ratios=%r)\" %\n self.l1_ratios)\n l1_ratios_ = self.l1_ratios\n else:\n if self.l1_ratios is not None:\n warnings.warn(\"l1_ratios parameter is only used when penalty \"\n \"is 'elasticnet'. Got (penalty={})\".format(\n self.penalty))\n\n l1_ratios_ = [None]\n\n if self.penalty == 'none':\n raise ValueError(\n \"penalty='none' is not useful and not supported by \"\n \"LogisticRegressionCV.\"\n )\n\n X, y = self._validate_data(X, y, accept_sparse='csr', dtype=np.float64,\n order=\"C\",\n accept_large_sparse=solver != 'liblinear')\n check_classification_targets(y)\n\n class_weight = self.class_weight\n\n # Encode for string labels\n label_encoder = LabelEncoder().fit(y)\n y = label_encoder.transform(y)\n if isinstance(class_weight, dict):\n class_weight = {label_encoder.transform([cls])[0]: v\n for cls, v in class_weight.items()}\n\n # The original class labels\n classes = self.classes_ = label_encoder.classes_\n encoded_labels = label_encoder.transform(label_encoder.classes_)\n\n multi_class = _check_multi_class(self.multi_class, solver,\n len(classes))\n\n if solver in ['sag', 'saga']:\n max_squared_sum = row_norms(X, squared=True).max()\n else:\n max_squared_sum = None\n\n # init cross-validation generator\n cv = check_cv(self.cv, y, classifier=True)\n folds = list(cv.split(X, y))\n\n # Use the label encoded classes\n n_classes = len(encoded_labels)\n\n if n_classes < 2:\n raise ValueError(\"This solver needs samples of at least 2 classes\"\n \" in the data, but the data contains only one\"\n \" class: %r\" % classes[0])\n\n if n_classes == 2:\n # OvR in case of binary problems is as good as fitting\n # the higher label\n n_classes = 1\n encoded_labels = encoded_labels[1:]\n classes = classes[1:]\n\n # We need this hack to iterate only once over labels, in the case of\n # multi_class = multinomial, without changing the value of the labels.\n if multi_class == 'multinomial':\n iter_encoded_labels = iter_classes = [None]\n else:\n iter_encoded_labels = encoded_labels\n iter_classes = classes\n\n # compute the class weights for the entire dataset y\n if class_weight == \"balanced\":\n class_weight = compute_class_weight(\n class_weight, classes=np.arange(len(self.classes_)), y=y)\n class_weight = dict(enumerate(class_weight))\n\n path_func = delayed(_log_reg_scoring_path)\n\n # The SAG solver releases the GIL so it's more efficient to use\n # threads for this solver.\n if self.solver in ['sag', 'saga']:\n prefer = 'threads'\n else:\n prefer = 'processes'\n\n fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,\n **_joblib_parallel_args(prefer=prefer))(\n path_func(X, y, train, test, pos_class=label, Cs=self.Cs,\n fit_intercept=self.fit_intercept, penalty=self.penalty,\n dual=self.dual, solver=solver, tol=self.tol,\n max_iter=self.max_iter, verbose=self.verbose,\n class_weight=class_weight, scoring=self.scoring,\n multi_class=multi_class,\n intercept_scaling=self.intercept_scaling,\n random_state=self.random_state,\n max_squared_sum=max_squared_sum,\n sample_weight=sample_weight,\n l1_ratio=l1_ratio\n )\n for label in iter_encoded_labels\n for train, test in folds\n for l1_ratio in l1_ratios_)\n\n # _log_reg_scoring_path will output different shapes depending on the\n # multi_class param, so we need to reshape the outputs accordingly.\n # Cs is of shape (n_classes . n_folds . n_l1_ratios, n_Cs) and all the\n # rows are equal, so we just take the first one.\n # After reshaping,\n # - scores is of shape (n_classes, n_folds, n_Cs . n_l1_ratios)\n # - coefs_paths is of shape\n # (n_classes, n_folds, n_Cs . n_l1_ratios, n_features)\n # - n_iter is of shape\n # (n_classes, n_folds, n_Cs . n_l1_ratios) or\n # (1, n_folds, n_Cs . n_l1_ratios)\n coefs_paths, Cs, scores, n_iter_ = zip(*fold_coefs_)\n self.Cs_ = Cs[0]\n if multi_class == 'multinomial':\n coefs_paths = np.reshape(\n coefs_paths,\n (len(folds), len(l1_ratios_) * len(self.Cs_), n_classes, -1)\n )\n # equiv to coefs_paths = np.moveaxis(coefs_paths, (0, 1, 2, 3),\n # (1, 2, 0, 3))\n coefs_paths = np.swapaxes(coefs_paths, 0, 1)\n coefs_paths = np.swapaxes(coefs_paths, 0, 2)\n self.n_iter_ = np.reshape(\n n_iter_,\n (1, len(folds), len(self.Cs_) * len(l1_ratios_))\n )\n # repeat same scores across all classes\n scores = np.tile(scores, (n_classes, 1, 1))\n else:\n coefs_paths = np.reshape(\n coefs_paths,\n (n_classes, len(folds), len(self.Cs_) * len(l1_ratios_),\n -1)\n )\n self.n_iter_ = np.reshape(\n n_iter_,\n (n_classes, len(folds), len(self.Cs_) * len(l1_ratios_))\n )\n scores = np.reshape(scores, (n_classes, len(folds), -1))\n self.scores_ = dict(zip(classes, scores))\n self.coefs_paths_ = dict(zip(classes, coefs_paths))\n\n self.C_ = list()\n self.l1_ratio_ = list()\n self.coef_ = np.empty((n_classes, X.shape[1]))\n self.intercept_ = np.zeros(n_classes)\n for index, (cls, encoded_label) in enumerate(\n zip(iter_classes, iter_encoded_labels)):\n\n if multi_class == 'ovr':\n scores = self.scores_[cls]\n coefs_paths = self.coefs_paths_[cls]\n else:\n # For multinomial, all scores are the same across classes\n scores = scores[0]\n # coefs_paths will keep its original shape because\n # logistic_regression_path expects it this way\n\n if self.refit:\n # best_index is between 0 and (n_Cs . n_l1_ratios - 1)\n # for example, with n_cs=2 and n_l1_ratios=3\n # the layout of scores is\n # [c1, c2, c1, c2, c1, c2]\n # l1_1 , l1_2 , l1_3\n best_index = scores.sum(axis=0).argmax()\n\n best_index_C = best_index % len(self.Cs_)\n C_ = self.Cs_[best_index_C]\n self.C_.append(C_)\n\n best_index_l1 = best_index // len(self.Cs_)\n l1_ratio_ = l1_ratios_[best_index_l1]\n self.l1_ratio_.append(l1_ratio_)\n\n if multi_class == 'multinomial':\n coef_init = np.mean(coefs_paths[:, :, best_index, :],\n axis=1)\n else:\n coef_init = np.mean(coefs_paths[:, best_index, :], axis=0)\n\n # Note that y is label encoded and hence pos_class must be\n # the encoded label / None (for 'multinomial')\n w, _, _ = _logistic_regression_path(\n X, y, pos_class=encoded_label, Cs=[C_], solver=solver,\n fit_intercept=self.fit_intercept, coef=coef_init,\n max_iter=self.max_iter, tol=self.tol,\n penalty=self.penalty,\n class_weight=class_weight,\n multi_class=multi_class,\n verbose=max(0, self.verbose - 1),\n random_state=self.random_state,\n check_input=False, max_squared_sum=max_squared_sum,\n sample_weight=sample_weight,\n l1_ratio=l1_ratio_)\n w = w[0]\n\n else:\n # Take the best scores across every fold and the average of\n # all coefficients corresponding to the best scores.\n best_indices = np.argmax(scores, axis=1)\n if multi_class == 'ovr':\n w = np.mean([coefs_paths[i, best_indices[i], :]\n for i in range(len(folds))], axis=0)\n else:\n w = np.mean([coefs_paths[:, i, best_indices[i], :]\n for i in range(len(folds))], axis=0)\n\n best_indices_C = best_indices % len(self.Cs_)\n self.C_.append(np.mean(self.Cs_[best_indices_C]))\n\n if self.penalty == 'elasticnet':\n best_indices_l1 = best_indices // len(self.Cs_)\n self.l1_ratio_.append(np.mean(l1_ratios_[best_indices_l1]))\n else:\n self.l1_ratio_.append(None)\n\n if multi_class == 'multinomial':\n self.C_ = np.tile(self.C_, n_classes)\n self.l1_ratio_ = np.tile(self.l1_ratio_, n_classes)\n self.coef_ = w[:, :X.shape[1]]\n if self.fit_intercept:\n self.intercept_ = w[:, -1]\n else:\n self.coef_[index] = w[: X.shape[1]]\n if self.fit_intercept:\n self.intercept_[index] = w[-1]\n\n self.C_ = np.asarray(self.C_)\n self.l1_ratio_ = np.asarray(self.l1_ratio_)\n self.l1_ratios_ = np.asarray(l1_ratios_)\n # if elasticnet was used, add the l1_ratios dimension to some\n # attributes\n if self.l1_ratios is not None:\n # with n_cs=2 and n_l1_ratios=3\n # the layout of scores is\n # [c1, c2, c1, c2, c1, c2]\n # l1_1 , l1_2 , l1_3\n # To get a 2d array with the following layout\n # l1_1, l1_2, l1_3\n # c1 [[ . , . , . ],\n # c2 [ . , . , . ]]\n # We need to first reshape and then transpose.\n # The same goes for the other arrays\n for cls, coefs_path in self.coefs_paths_.items():\n self.coefs_paths_[cls] = coefs_path.reshape(\n (len(folds), self.l1_ratios_.size, self.Cs_.size, -1))\n self.coefs_paths_[cls] = np.transpose(self.coefs_paths_[cls],\n (0, 2, 1, 3))\n for cls, score in self.scores_.items():\n self.scores_[cls] = score.reshape(\n (len(folds), self.l1_ratios_.size, self.Cs_.size))\n self.scores_[cls] = np.transpose(self.scores_[cls], (0, 2, 1))\n\n self.n_iter_ = self.n_iter_.reshape(\n (-1, len(folds), self.l1_ratios_.size, self.Cs_.size))\n self.n_iter_ = np.transpose(self.n_iter_, (0, 1, 3, 2))\n\n return self\n\n def score(self, X, y, sample_weight=None):\n \"\"\"Returns the score using the `scoring` option on the given\n test data and labels.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Test samples.\n\n y : array-like of shape (n_samples,)\n True labels for X.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n score : float\n Score of self.predict(X) wrt. y.\n\n \"\"\"\n scoring = self.scoring or 'accuracy'\n scoring = get_scorer(scoring)\n\n return scoring(self, X, y, sample_weight=sample_weight)\n\n def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }", + "instance_attributes": [ + { + "name": "Cs", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "dual", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "penalty", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "solver", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "refit", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "intercept_scaling", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "multi_class", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "scores_", + "types": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "name": "coefs_paths_", + "types": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "name": "C_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "l1_ratio_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "coef_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "intercept_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuit", + "name": "OrthogonalMatchingPursuit", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuit", + "decorators": [], + "superclasses": ["MultiOutputMixin", "RegressorMixin", "LinearModel"], + "methods": [ + "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuit/__init__", + "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuit/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Orthogonal Matching Pursuit model (OMP).\n\nRead more in the :ref:`User Guide `.", + "docstring": "Orthogonal Matching Pursuit model (OMP).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_nonzero_coefs : int, default=None\n Desired number of non-zero entries in the solution. If None (by\n default) this value is set to 10% of n_features.\n\ntol : float, default=None\n Maximum norm of the residual. If not None, overrides n_nonzero_coefs.\n\nfit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nprecompute : 'auto' or bool, default='auto'\n Whether to use a precomputed Gram and Xy matrix to speed up\n calculations. Improves performance when :term:`n_targets` or\n :term:`n_samples` is very large. Note that if you already have such\n matrices, you can pass them directly to the fit method.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the formula).\n\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function.\n\nn_iter_ : int or array-like\n Number of active features across every target.\n\nn_nonzero_coefs_ : int\n The number of non-zero coefficients in the solution. If\n `n_nonzero_coefs` is None and `tol` is None this value is either set\n to 10% of `n_features` or 1, whichever is greater.\n\nExamples\n--------\n>>> from sklearn.linear_model import OrthogonalMatchingPursuit\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(noise=4, random_state=0)\n>>> reg = OrthogonalMatchingPursuit().fit(X, y)\n>>> reg.score(X, y)\n0.9991...\n>>> reg.predict(X[:1,])\narray([-78.3854...])\n\nNotes\n-----\nOrthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,\nMatching pursuits with time-frequency dictionaries, IEEE Transactions on\nSignal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.\n(http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf)\n\nThis implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,\nM., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal\nMatching Pursuit Technical Report - CS Technion, April 2008.\nhttps://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf\n\nSee Also\n--------\northogonal_mp\northogonal_mp_gram\nlars_path\nLars\nLassoLars\nsklearn.decomposition.sparse_encode\nOrthogonalMatchingPursuitCV", + "code": "class OrthogonalMatchingPursuit(MultiOutputMixin, RegressorMixin, LinearModel):\n \"\"\"Orthogonal Matching Pursuit model (OMP).\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_nonzero_coefs : int, default=None\n Desired number of non-zero entries in the solution. If None (by\n default) this value is set to 10% of n_features.\n\n tol : float, default=None\n Maximum norm of the residual. If not None, overrides n_nonzero_coefs.\n\n fit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\n normalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n precompute : 'auto' or bool, default='auto'\n Whether to use a precomputed Gram and Xy matrix to speed up\n calculations. Improves performance when :term:`n_targets` or\n :term:`n_samples` is very large. Note that if you already have such\n matrices, you can pass them directly to the fit method.\n\n Attributes\n ----------\n coef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the formula).\n\n intercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function.\n\n n_iter_ : int or array-like\n Number of active features across every target.\n\n n_nonzero_coefs_ : int\n The number of non-zero coefficients in the solution. If\n `n_nonzero_coefs` is None and `tol` is None this value is either set\n to 10% of `n_features` or 1, whichever is greater.\n\n Examples\n --------\n >>> from sklearn.linear_model import OrthogonalMatchingPursuit\n >>> from sklearn.datasets import make_regression\n >>> X, y = make_regression(noise=4, random_state=0)\n >>> reg = OrthogonalMatchingPursuit().fit(X, y)\n >>> reg.score(X, y)\n 0.9991...\n >>> reg.predict(X[:1,])\n array([-78.3854...])\n\n Notes\n -----\n Orthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,\n Matching pursuits with time-frequency dictionaries, IEEE Transactions on\n Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.\n (http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf)\n\n This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,\n M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal\n Matching Pursuit Technical Report - CS Technion, April 2008.\n https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf\n\n See Also\n --------\n orthogonal_mp\n orthogonal_mp_gram\n lars_path\n Lars\n LassoLars\n sklearn.decomposition.sparse_encode\n OrthogonalMatchingPursuitCV\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, n_nonzero_coefs=None, tol=None, fit_intercept=True,\n normalize=True, precompute='auto'):\n self.n_nonzero_coefs = n_nonzero_coefs\n self.tol = tol\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.precompute = precompute\n\n def fit(self, X, y):\n \"\"\"Fit the model using X, y as training data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values. Will be cast to X's dtype if necessary\n\n\n Returns\n -------\n self : object\n returns an instance of self.\n \"\"\"\n X, y = self._validate_data(X, y, multi_output=True, y_numeric=True)\n n_features = X.shape[1]\n\n X, y, X_offset, y_offset, X_scale, Gram, Xy = \\\n _pre_fit(X, y, None, self.precompute, self.normalize,\n self.fit_intercept, copy=True)\n\n if y.ndim == 1:\n y = y[:, np.newaxis]\n\n if self.n_nonzero_coefs is None and self.tol is None:\n # default for n_nonzero_coefs is 0.1 * n_features\n # but at least one.\n self.n_nonzero_coefs_ = max(int(0.1 * n_features), 1)\n else:\n self.n_nonzero_coefs_ = self.n_nonzero_coefs\n\n if Gram is False:\n coef_, self.n_iter_ = orthogonal_mp(\n X, y, n_nonzero_coefs=self.n_nonzero_coefs_, tol=self.tol,\n precompute=False, copy_X=True,\n return_n_iter=True)\n else:\n norms_sq = np.sum(y ** 2, axis=0) if self.tol is not None else None\n\n coef_, self.n_iter_ = orthogonal_mp_gram(\n Gram, Xy=Xy, n_nonzero_coefs=self.n_nonzero_coefs_,\n tol=self.tol, norms_squared=norms_sq,\n copy_Gram=True, copy_Xy=True,\n return_n_iter=True)\n self.coef_ = coef_.T\n self._set_intercept(X_offset, y_offset, X_scale)\n return self", + "instance_attributes": [ + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "normalize", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "precompute", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuitCV", + "name": "OrthogonalMatchingPursuitCV", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuitCV", + "decorators": [], + "superclasses": ["RegressorMixin", "LinearModel"], + "methods": [ + "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuitCV/__init__", + "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuitCV/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Cross-validated Orthogonal Matching Pursuit model (OMP).\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Cross-validated Orthogonal Matching Pursuit model (OMP).\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncopy : bool, default=True\n Whether the design matrix X must be copied by the algorithm. A false\n value is only helpful if X is already Fortran-ordered, otherwise a\n copy is made anyway.\n\nfit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nmax_iter : int, default=None\n Maximum numbers of iterations to perform, therefore maximum features\n to include. 10% of ``n_features`` but at least 5 if available.\n\ncv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nn_jobs : int, default=None\n Number of CPUs to use during the cross validation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : bool or int, default=False\n Sets the verbosity amount.\n\nAttributes\n----------\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function.\n\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the problem formulation).\n\nn_nonzero_coefs_ : int\n Estimated number of non-zero coefficients giving the best mean squared\n error over the cross-validation folds.\n\nn_iter_ : int or array-like\n Number of active features across every target for the model refit with\n the best hyperparameters got by cross-validating across all folds.\n\nExamples\n--------\n>>> from sklearn.linear_model import OrthogonalMatchingPursuitCV\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_features=100, n_informative=10,\n... noise=4, random_state=0)\n>>> reg = OrthogonalMatchingPursuitCV(cv=5).fit(X, y)\n>>> reg.score(X, y)\n0.9991...\n>>> reg.n_nonzero_coefs_\n10\n>>> reg.predict(X[:1,])\narray([-78.3854...])\n\nSee Also\n--------\northogonal_mp\northogonal_mp_gram\nlars_path\nLars\nLassoLars\nOrthogonalMatchingPursuit\nLarsCV\nLassoLarsCV\nsklearn.decomposition.sparse_encode", + "code": "class OrthogonalMatchingPursuitCV(RegressorMixin, LinearModel):\n \"\"\"Cross-validated Orthogonal Matching Pursuit model (OMP).\n\n See glossary entry for :term:`cross-validation estimator`.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n copy : bool, default=True\n Whether the design matrix X must be copied by the algorithm. A false\n value is only helpful if X is already Fortran-ordered, otherwise a\n copy is made anyway.\n\n fit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\n normalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n max_iter : int, default=None\n Maximum numbers of iterations to perform, therefore maximum features\n to include. 10% of ``n_features`` but at least 5 if available.\n\n cv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\n n_jobs : int, default=None\n Number of CPUs to use during the cross validation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n verbose : bool or int, default=False\n Sets the verbosity amount.\n\n Attributes\n ----------\n intercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function.\n\n coef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Parameter vector (w in the problem formulation).\n\n n_nonzero_coefs_ : int\n Estimated number of non-zero coefficients giving the best mean squared\n error over the cross-validation folds.\n\n n_iter_ : int or array-like\n Number of active features across every target for the model refit with\n the best hyperparameters got by cross-validating across all folds.\n\n Examples\n --------\n >>> from sklearn.linear_model import OrthogonalMatchingPursuitCV\n >>> from sklearn.datasets import make_regression\n >>> X, y = make_regression(n_features=100, n_informative=10,\n ... noise=4, random_state=0)\n >>> reg = OrthogonalMatchingPursuitCV(cv=5).fit(X, y)\n >>> reg.score(X, y)\n 0.9991...\n >>> reg.n_nonzero_coefs_\n 10\n >>> reg.predict(X[:1,])\n array([-78.3854...])\n\n See Also\n --------\n orthogonal_mp\n orthogonal_mp_gram\n lars_path\n Lars\n LassoLars\n OrthogonalMatchingPursuit\n LarsCV\n LassoLarsCV\n sklearn.decomposition.sparse_encode\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, copy=True, fit_intercept=True, normalize=True,\n max_iter=None, cv=None, n_jobs=None, verbose=False):\n self.copy = copy\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.max_iter = max_iter\n self.cv = cv\n self.n_jobs = n_jobs\n self.verbose = verbose\n\n def fit(self, X, y):\n \"\"\"Fit the model using X, y as training data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,)\n Target values. Will be cast to X's dtype if necessary.\n\n Returns\n -------\n self : object\n returns an instance of self.\n \"\"\"\n X, y = self._validate_data(X, y, y_numeric=True, ensure_min_features=2,\n estimator=self)\n X = as_float_array(X, copy=False, force_all_finite=False)\n cv = check_cv(self.cv, classifier=False)\n max_iter = (min(max(int(0.1 * X.shape[1]), 5), X.shape[1])\n if not self.max_iter\n else self.max_iter)\n cv_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(\n delayed(_omp_path_residues)(\n X[train], y[train], X[test], y[test], self.copy,\n self.fit_intercept, self.normalize, max_iter)\n for train, test in cv.split(X))\n\n min_early_stop = min(fold.shape[0] for fold in cv_paths)\n mse_folds = np.array([(fold[:min_early_stop] ** 2).mean(axis=1)\n for fold in cv_paths])\n best_n_nonzero_coefs = np.argmin(mse_folds.mean(axis=0)) + 1\n self.n_nonzero_coefs_ = best_n_nonzero_coefs\n omp = OrthogonalMatchingPursuit(n_nonzero_coefs=best_n_nonzero_coefs,\n fit_intercept=self.fit_intercept,\n normalize=self.normalize)\n omp.fit(X, y)\n self.coef_ = omp.coef_\n self.intercept_ = omp.intercept_\n self.n_iter_ = omp.n_iter_\n return self", + "instance_attributes": [ + { + "name": "copy", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "normalize", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "n_iter_", + "types": { + "kind": "NamedType", + "name": "list" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier", + "name": "PassiveAggressiveClassifier", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier", + "decorators": [], + "superclasses": ["BaseSGDClassifier"], + "methods": [ + "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/__init__", + "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/partial_fit", + "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Passive Aggressive Classifier\n\nRead more in the :ref:`User Guide `.", + "docstring": "Passive Aggressive Classifier\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nC : float, default=1.0\n Maximum step size (regularization). Defaults to 1.0.\n\nfit_intercept : bool, default=True\n Whether the intercept should be estimated or not. If False, the\n data is assumed to be already centered.\n\nmax_iter : int, default=1000\n The maximum number of passes over the training data (aka epochs).\n It only impacts the behavior in the ``fit`` method, and not the\n :meth:`partial_fit` method.\n\n .. versionadded:: 0.19\n\ntol : float or None, default=1e-3\n The stopping criterion. If it is not None, the iterations will stop\n when (loss > previous_loss - tol).\n\n .. versionadded:: 0.19\n\nearly_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation.\n score is not improving. If set to True, it will automatically set aside\n a stratified fraction of training data as validation and terminate\n training when validation score is not improving by at least tol for\n n_iter_no_change consecutive epochs.\n\n .. versionadded:: 0.20\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if early_stopping is True.\n\n .. versionadded:: 0.20\n\nn_iter_no_change : int, default=5\n Number of iterations with no improvement to wait before early stopping.\n\n .. versionadded:: 0.20\n\nshuffle : bool, default=True\n Whether or not the training data should be shuffled after each epoch.\n\nverbose : integer, default=0\n The verbosity level\n\nloss : string, default=\"hinge\"\n The loss function to be used:\n hinge: equivalent to PA-I in the reference paper.\n squared_hinge: equivalent to PA-II in the reference paper.\n\nn_jobs : int or None, default=None\n The number of CPUs to use to do the OVA (One Versus All, for\n multi-class problems) computation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nrandom_state : int, RandomState instance, default=None\n Used to shuffle the training data, when ``shuffle`` is set to\n ``True``. Pass an int for reproducible output across multiple\n function calls.\n See :term:`Glossary `.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\n Repeatedly calling fit or partial_fit when warm_start is True can\n result in a different solution than when calling fit a single time\n because of the way the data is shuffled.\n\nclass_weight : dict, {class_label: weight} or \"balanced\" or None, default=None\n Preset for the class_weight fit parameter.\n\n Weights associated with classes. If not given, all classes\n are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n .. versionadded:: 0.17\n parameter *class_weight* to automatically weight samples.\n\naverage : bool or int, default=False\n When set to True, computes the averaged SGD weights and stores the\n result in the ``coef_`` attribute. If set to an int greater than 1,\n averaging will begin once the total number of samples seen reaches\n average. So average=10 will begin averaging after seeing 10 samples.\n\n .. versionadded:: 0.19\n parameter *average* to use weights averaging in SGD\n\nAttributes\n----------\ncoef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes, n_features]\n Weights assigned to the features.\n\nintercept_ : array, shape = [1] if n_classes == 2 else [n_classes]\n Constants in decision function.\n\nn_iter_ : int\n The actual number of iterations to reach the stopping criterion.\n For multiclass fits, it is the maximum over every binary fit.\n\nclasses_ : array of shape (n_classes,)\n The unique classes labels.\n\nt_ : int\n Number of weight updates performed during training.\n Same as ``(n_iter_ * n_samples)``.\n\nloss_function_ : callable\n Loss function used by the algorithm.\n\nExamples\n--------\n>>> from sklearn.linear_model import PassiveAggressiveClassifier\n>>> from sklearn.datasets import make_classification\n\n>>> X, y = make_classification(n_features=4, random_state=0)\n>>> clf = PassiveAggressiveClassifier(max_iter=1000, random_state=0,\n... tol=1e-3)\n>>> clf.fit(X, y)\nPassiveAggressiveClassifier(random_state=0)\n>>> print(clf.coef_)\n[[0.26642044 0.45070924 0.67251877 0.64185414]]\n>>> print(clf.intercept_)\n[1.84127814]\n>>> print(clf.predict([[0, 0, 0, 0]]))\n[1]\n\nSee Also\n--------\nSGDClassifier\nPerceptron\n\nReferences\n----------\nOnline Passive-Aggressive Algorithms\n\nK. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006)", + "code": "class PassiveAggressiveClassifier(BaseSGDClassifier):\n \"\"\"Passive Aggressive Classifier\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n\n C : float, default=1.0\n Maximum step size (regularization). Defaults to 1.0.\n\n fit_intercept : bool, default=True\n Whether the intercept should be estimated or not. If False, the\n data is assumed to be already centered.\n\n max_iter : int, default=1000\n The maximum number of passes over the training data (aka epochs).\n It only impacts the behavior in the ``fit`` method, and not the\n :meth:`partial_fit` method.\n\n .. versionadded:: 0.19\n\n tol : float or None, default=1e-3\n The stopping criterion. If it is not None, the iterations will stop\n when (loss > previous_loss - tol).\n\n .. versionadded:: 0.19\n\n early_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation.\n score is not improving. If set to True, it will automatically set aside\n a stratified fraction of training data as validation and terminate\n training when validation score is not improving by at least tol for\n n_iter_no_change consecutive epochs.\n\n .. versionadded:: 0.20\n\n validation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if early_stopping is True.\n\n .. versionadded:: 0.20\n\n n_iter_no_change : int, default=5\n Number of iterations with no improvement to wait before early stopping.\n\n .. versionadded:: 0.20\n\n shuffle : bool, default=True\n Whether or not the training data should be shuffled after each epoch.\n\n verbose : integer, default=0\n The verbosity level\n\n loss : string, default=\"hinge\"\n The loss function to be used:\n hinge: equivalent to PA-I in the reference paper.\n squared_hinge: equivalent to PA-II in the reference paper.\n\n n_jobs : int or None, default=None\n The number of CPUs to use to do the OVA (One Versus All, for\n multi-class problems) computation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n random_state : int, RandomState instance, default=None\n Used to shuffle the training data, when ``shuffle`` is set to\n ``True``. Pass an int for reproducible output across multiple\n function calls.\n See :term:`Glossary `.\n\n warm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\n Repeatedly calling fit or partial_fit when warm_start is True can\n result in a different solution than when calling fit a single time\n because of the way the data is shuffled.\n\n class_weight : dict, {class_label: weight} or \"balanced\" or None, \\\n default=None\n Preset for the class_weight fit parameter.\n\n Weights associated with classes. If not given, all classes\n are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n .. versionadded:: 0.17\n parameter *class_weight* to automatically weight samples.\n\n average : bool or int, default=False\n When set to True, computes the averaged SGD weights and stores the\n result in the ``coef_`` attribute. If set to an int greater than 1,\n averaging will begin once the total number of samples seen reaches\n average. So average=10 will begin averaging after seeing 10 samples.\n\n .. versionadded:: 0.19\n parameter *average* to use weights averaging in SGD\n\n Attributes\n ----------\n coef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes,\\\n n_features]\n Weights assigned to the features.\n\n intercept_ : array, shape = [1] if n_classes == 2 else [n_classes]\n Constants in decision function.\n\n n_iter_ : int\n The actual number of iterations to reach the stopping criterion.\n For multiclass fits, it is the maximum over every binary fit.\n\n classes_ : array of shape (n_classes,)\n The unique classes labels.\n\n t_ : int\n Number of weight updates performed during training.\n Same as ``(n_iter_ * n_samples)``.\n\n loss_function_ : callable\n Loss function used by the algorithm.\n\n Examples\n --------\n >>> from sklearn.linear_model import PassiveAggressiveClassifier\n >>> from sklearn.datasets import make_classification\n\n >>> X, y = make_classification(n_features=4, random_state=0)\n >>> clf = PassiveAggressiveClassifier(max_iter=1000, random_state=0,\n ... tol=1e-3)\n >>> clf.fit(X, y)\n PassiveAggressiveClassifier(random_state=0)\n >>> print(clf.coef_)\n [[0.26642044 0.45070924 0.67251877 0.64185414]]\n >>> print(clf.intercept_)\n [1.84127814]\n >>> print(clf.predict([[0, 0, 0, 0]]))\n [1]\n\n See Also\n --------\n SGDClassifier\n Perceptron\n\n References\n ----------\n Online Passive-Aggressive Algorithms\n \n K. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006)\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, C=1.0, fit_intercept=True, max_iter=1000, tol=1e-3,\n early_stopping=False, validation_fraction=0.1,\n n_iter_no_change=5, shuffle=True, verbose=0, loss=\"hinge\",\n n_jobs=None, random_state=None, warm_start=False,\n class_weight=None, average=False):\n super().__init__(\n penalty=None,\n fit_intercept=fit_intercept,\n max_iter=max_iter,\n tol=tol,\n early_stopping=early_stopping,\n validation_fraction=validation_fraction,\n n_iter_no_change=n_iter_no_change,\n shuffle=shuffle,\n verbose=verbose,\n random_state=random_state,\n eta0=1.0,\n warm_start=warm_start,\n class_weight=class_weight,\n average=average,\n n_jobs=n_jobs)\n\n self.C = C\n self.loss = loss\n\n def partial_fit(self, X, y, classes=None):\n \"\"\"Fit linear model with Passive Aggressive algorithm.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Subset of the training data\n\n y : numpy array of shape [n_samples]\n Subset of the target values\n\n classes : array, shape = [n_classes]\n Classes across all calls to partial_fit.\n Can be obtained by via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is required for the first call to partial_fit\n and can be omitted in the subsequent calls.\n Note that y doesn't need to contain all labels in `classes`.\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n self._validate_params(for_partial_fit=True)\n if self.class_weight == 'balanced':\n raise ValueError(\"class_weight 'balanced' is not supported for \"\n \"partial_fit. For 'balanced' weights, use \"\n \"`sklearn.utils.compute_class_weight` with \"\n \"`class_weight='balanced'`. In place of y you \"\n \"can use a large enough subset of the full \"\n \"training set target to properly estimate the \"\n \"class frequency distributions. Pass the \"\n \"resulting weights as the class_weight \"\n \"parameter.\")\n lr = \"pa1\" if self.loss == \"hinge\" else \"pa2\"\n return self._partial_fit(X, y, alpha=1.0, C=self.C,\n loss=\"hinge\", learning_rate=lr, max_iter=1,\n classes=classes, sample_weight=None,\n coef_init=None, intercept_init=None)\n\n def fit(self, X, y, coef_init=None, intercept_init=None):\n \"\"\"Fit linear model with Passive Aggressive algorithm.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data\n\n y : numpy array of shape [n_samples]\n Target values\n\n coef_init : array, shape = [n_classes,n_features]\n The initial coefficients to warm-start the optimization.\n\n intercept_init : array, shape = [n_classes]\n The initial intercept to warm-start the optimization.\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n self._validate_params()\n lr = \"pa1\" if self.loss == \"hinge\" else \"pa2\"\n return self._fit(X, y, alpha=1.0, C=self.C,\n loss=\"hinge\", learning_rate=lr,\n coef_init=coef_init, intercept_init=intercept_init)", + "instance_attributes": [ + { + "name": "C", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "loss", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor", + "name": "PassiveAggressiveRegressor", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor", + "decorators": [], + "superclasses": ["BaseSGDRegressor"], + "methods": [ + "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/__init__", + "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/partial_fit", + "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Passive Aggressive Regressor\n\nRead more in the :ref:`User Guide `.", + "docstring": "Passive Aggressive Regressor\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nC : float, default=1.0\n Maximum step size (regularization). Defaults to 1.0.\n\nfit_intercept : bool, default=True\n Whether the intercept should be estimated or not. If False, the\n data is assumed to be already centered. Defaults to True.\n\nmax_iter : int, default=1000\n The maximum number of passes over the training data (aka epochs).\n It only impacts the behavior in the ``fit`` method, and not the\n :meth:`partial_fit` method.\n\n .. versionadded:: 0.19\n\ntol : float or None, default=1e-3\n The stopping criterion. If it is not None, the iterations will stop\n when (loss > previous_loss - tol).\n\n .. versionadded:: 0.19\n\nearly_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation.\n score is not improving. If set to True, it will automatically set aside\n a fraction of training data as validation and terminate\n training when validation score is not improving by at least tol for\n n_iter_no_change consecutive epochs.\n\n .. versionadded:: 0.20\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if early_stopping is True.\n\n .. versionadded:: 0.20\n\nn_iter_no_change : int, default=5\n Number of iterations with no improvement to wait before early stopping.\n\n .. versionadded:: 0.20\n\nshuffle : bool, default=True\n Whether or not the training data should be shuffled after each epoch.\n\nverbose : integer, default=0\n The verbosity level\n\nloss : string, default=\"epsilon_insensitive\"\n The loss function to be used:\n epsilon_insensitive: equivalent to PA-I in the reference paper.\n squared_epsilon_insensitive: equivalent to PA-II in the reference\n paper.\n\nepsilon : float, default=0.1\n If the difference between the current prediction and the correct label\n is below this threshold, the model is not updated.\n\nrandom_state : int, RandomState instance, default=None\n Used to shuffle the training data, when ``shuffle`` is set to\n ``True``. Pass an int for reproducible output across multiple\n function calls.\n See :term:`Glossary `.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\n Repeatedly calling fit or partial_fit when warm_start is True can\n result in a different solution than when calling fit a single time\n because of the way the data is shuffled.\n\naverage : bool or int, default=False\n When set to True, computes the averaged SGD weights and stores the\n result in the ``coef_`` attribute. If set to an int greater than 1,\n averaging will begin once the total number of samples seen reaches\n average. So average=10 will begin averaging after seeing 10 samples.\n\n .. versionadded:: 0.19\n parameter *average* to use weights averaging in SGD\n\nAttributes\n----------\ncoef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes, n_features]\n Weights assigned to the features.\n\nintercept_ : array, shape = [1] if n_classes == 2 else [n_classes]\n Constants in decision function.\n\nn_iter_ : int\n The actual number of iterations to reach the stopping criterion.\n\nt_ : int\n Number of weight updates performed during training.\n Same as ``(n_iter_ * n_samples)``.\n\nExamples\n--------\n>>> from sklearn.linear_model import PassiveAggressiveRegressor\n>>> from sklearn.datasets import make_regression\n\n>>> X, y = make_regression(n_features=4, random_state=0)\n>>> regr = PassiveAggressiveRegressor(max_iter=100, random_state=0,\n... tol=1e-3)\n>>> regr.fit(X, y)\nPassiveAggressiveRegressor(max_iter=100, random_state=0)\n>>> print(regr.coef_)\n[20.48736655 34.18818427 67.59122734 87.94731329]\n>>> print(regr.intercept_)\n[-0.02306214]\n>>> print(regr.predict([[0, 0, 0, 0]]))\n[-0.02306214]\n\nSee Also\n--------\nSGDRegressor\n\nReferences\n----------\nOnline Passive-Aggressive Algorithms\n\nK. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006)", + "code": "class PassiveAggressiveRegressor(BaseSGDRegressor):\n \"\"\"Passive Aggressive Regressor\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n\n C : float, default=1.0\n Maximum step size (regularization). Defaults to 1.0.\n\n fit_intercept : bool, default=True\n Whether the intercept should be estimated or not. If False, the\n data is assumed to be already centered. Defaults to True.\n\n max_iter : int, default=1000\n The maximum number of passes over the training data (aka epochs).\n It only impacts the behavior in the ``fit`` method, and not the\n :meth:`partial_fit` method.\n\n .. versionadded:: 0.19\n\n tol : float or None, default=1e-3\n The stopping criterion. If it is not None, the iterations will stop\n when (loss > previous_loss - tol).\n\n .. versionadded:: 0.19\n\n early_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation.\n score is not improving. If set to True, it will automatically set aside\n a fraction of training data as validation and terminate\n training when validation score is not improving by at least tol for\n n_iter_no_change consecutive epochs.\n\n .. versionadded:: 0.20\n\n validation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if early_stopping is True.\n\n .. versionadded:: 0.20\n\n n_iter_no_change : int, default=5\n Number of iterations with no improvement to wait before early stopping.\n\n .. versionadded:: 0.20\n\n shuffle : bool, default=True\n Whether or not the training data should be shuffled after each epoch.\n\n verbose : integer, default=0\n The verbosity level\n\n loss : string, default=\"epsilon_insensitive\"\n The loss function to be used:\n epsilon_insensitive: equivalent to PA-I in the reference paper.\n squared_epsilon_insensitive: equivalent to PA-II in the reference\n paper.\n\n epsilon : float, default=0.1\n If the difference between the current prediction and the correct label\n is below this threshold, the model is not updated.\n\n random_state : int, RandomState instance, default=None\n Used to shuffle the training data, when ``shuffle`` is set to\n ``True``. Pass an int for reproducible output across multiple\n function calls.\n See :term:`Glossary `.\n\n warm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\n Repeatedly calling fit or partial_fit when warm_start is True can\n result in a different solution than when calling fit a single time\n because of the way the data is shuffled.\n\n average : bool or int, default=False\n When set to True, computes the averaged SGD weights and stores the\n result in the ``coef_`` attribute. If set to an int greater than 1,\n averaging will begin once the total number of samples seen reaches\n average. So average=10 will begin averaging after seeing 10 samples.\n\n .. versionadded:: 0.19\n parameter *average* to use weights averaging in SGD\n\n Attributes\n ----------\n coef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes,\\\n n_features]\n Weights assigned to the features.\n\n intercept_ : array, shape = [1] if n_classes == 2 else [n_classes]\n Constants in decision function.\n\n n_iter_ : int\n The actual number of iterations to reach the stopping criterion.\n\n t_ : int\n Number of weight updates performed during training.\n Same as ``(n_iter_ * n_samples)``.\n\n Examples\n --------\n >>> from sklearn.linear_model import PassiveAggressiveRegressor\n >>> from sklearn.datasets import make_regression\n\n >>> X, y = make_regression(n_features=4, random_state=0)\n >>> regr = PassiveAggressiveRegressor(max_iter=100, random_state=0,\n ... tol=1e-3)\n >>> regr.fit(X, y)\n PassiveAggressiveRegressor(max_iter=100, random_state=0)\n >>> print(regr.coef_)\n [20.48736655 34.18818427 67.59122734 87.94731329]\n >>> print(regr.intercept_)\n [-0.02306214]\n >>> print(regr.predict([[0, 0, 0, 0]]))\n [-0.02306214]\n\n See Also\n --------\n SGDRegressor\n\n References\n ----------\n Online Passive-Aggressive Algorithms\n \n K. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006)\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, C=1.0, fit_intercept=True, max_iter=1000, tol=1e-3,\n early_stopping=False, validation_fraction=0.1,\n n_iter_no_change=5, shuffle=True, verbose=0,\n loss=\"epsilon_insensitive\", epsilon=DEFAULT_EPSILON,\n random_state=None, warm_start=False,\n average=False):\n super().__init__(\n penalty=None,\n l1_ratio=0,\n epsilon=epsilon,\n eta0=1.0,\n fit_intercept=fit_intercept,\n max_iter=max_iter,\n tol=tol,\n early_stopping=early_stopping,\n validation_fraction=validation_fraction,\n n_iter_no_change=n_iter_no_change,\n shuffle=shuffle,\n verbose=verbose,\n random_state=random_state,\n warm_start=warm_start,\n average=average)\n self.C = C\n self.loss = loss\n\n def partial_fit(self, X, y):\n \"\"\"Fit linear model with Passive Aggressive algorithm.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Subset of training data\n\n y : numpy array of shape [n_samples]\n Subset of target values\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n self._validate_params(for_partial_fit=True)\n lr = \"pa1\" if self.loss == \"epsilon_insensitive\" else \"pa2\"\n return self._partial_fit(X, y, alpha=1.0, C=self.C,\n loss=\"epsilon_insensitive\",\n learning_rate=lr, max_iter=1,\n sample_weight=None,\n coef_init=None, intercept_init=None)\n\n def fit(self, X, y, coef_init=None, intercept_init=None):\n \"\"\"Fit linear model with Passive Aggressive algorithm.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data\n\n y : numpy array of shape [n_samples]\n Target values\n\n coef_init : array, shape = [n_features]\n The initial coefficients to warm-start the optimization.\n\n intercept_init : array, shape = [1]\n The initial intercept to warm-start the optimization.\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n self._validate_params()\n lr = \"pa1\" if self.loss == \"epsilon_insensitive\" else \"pa2\"\n return self._fit(X, y, alpha=1.0, C=self.C,\n loss=\"epsilon_insensitive\",\n learning_rate=lr,\n coef_init=coef_init,\n intercept_init=intercept_init)", + "instance_attributes": [ + { + "name": "C", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "loss", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._perceptron/Perceptron", + "name": "Perceptron", + "qname": "sklearn.linear_model._perceptron.Perceptron", + "decorators": [], + "superclasses": ["BaseSGDClassifier"], + "methods": ["scikit-learn/sklearn.linear_model._perceptron/Perceptron/__init__"], + "is_public": false, + "reexported_by": [], + "description": "Perceptron\n\nRead more in the :ref:`User Guide `.", + "docstring": "Perceptron\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\npenalty : {'l2','l1','elasticnet'}, default=None\n The penalty (aka regularization term) to be used.\n\nalpha : float, default=0.0001\n Constant that multiplies the regularization term if regularization is\n used.\n\nl1_ratio : float, default=0.15\n The Elastic Net mixing parameter, with `0 <= l1_ratio <= 1`.\n `l1_ratio=0` corresponds to L2 penalty, `l1_ratio=1` to L1.\n Only used if `penalty='elasticnet'`.\n\n .. versionadded:: 0.24\n\nfit_intercept : bool, default=True\n Whether the intercept should be estimated or not. If False, the\n data is assumed to be already centered.\n\nmax_iter : int, default=1000\n The maximum number of passes over the training data (aka epochs).\n It only impacts the behavior in the ``fit`` method, and not the\n :meth:`partial_fit` method.\n\n .. versionadded:: 0.19\n\ntol : float, default=1e-3\n The stopping criterion. If it is not None, the iterations will stop\n when (loss > previous_loss - tol).\n\n .. versionadded:: 0.19\n\nshuffle : bool, default=True\n Whether or not the training data should be shuffled after each epoch.\n\nverbose : int, default=0\n The verbosity level\n\neta0 : double, default=1\n Constant by which the updates are multiplied.\n\nn_jobs : int, default=None\n The number of CPUs to use to do the OVA (One Versus All, for\n multi-class problems) computation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nrandom_state : int, RandomState instance, default=None\n Used to shuffle the training data, when ``shuffle`` is set to\n ``True``. Pass an int for reproducible output across multiple\n function calls.\n See :term:`Glossary `.\n\nearly_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation.\n score is not improving. If set to True, it will automatically set aside\n a stratified fraction of training data as validation and terminate\n training when validation score is not improving by at least tol for\n n_iter_no_change consecutive epochs.\n\n .. versionadded:: 0.20\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if early_stopping is True.\n\n .. versionadded:: 0.20\n\nn_iter_no_change : int, default=5\n Number of iterations with no improvement to wait before early stopping.\n\n .. versionadded:: 0.20\n\nclass_weight : dict, {class_label: weight} or \"balanced\", default=None\n Preset for the class_weight fit parameter.\n\n Weights associated with classes. If not given, all classes\n are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution. See\n :term:`the Glossary `.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n The unique classes labels.\n\ncoef_ : ndarray of shape (1, n_features) if n_classes == 2 else (n_classes, n_features)\n Weights assigned to the features.\n\nintercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n Constants in decision function.\n\nloss_function_ : concrete\u00a0LossFunction\n The function that determines the loss, or difference between the\n output of the algorithm and the target values.\n\nn_iter_ : int\n The actual number of iterations to reach the stopping criterion.\n For multiclass fits, it is the maximum over every binary fit.\n\nt_ : int\n Number of weight updates performed during training.\n Same as ``(n_iter_ * n_samples)``.\n\nNotes\n-----\n\n``Perceptron`` is a classification algorithm which shares the same\nunderlying implementation with ``SGDClassifier``. In fact,\n``Perceptron()`` is equivalent to `SGDClassifier(loss=\"perceptron\",\neta0=1, learning_rate=\"constant\", penalty=None)`.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.linear_model import Perceptron\n>>> X, y = load_digits(return_X_y=True)\n>>> clf = Perceptron(tol=1e-3, random_state=0)\n>>> clf.fit(X, y)\nPerceptron()\n>>> clf.score(X, y)\n0.939...\n\nSee Also\n--------\nSGDClassifier\n\nReferences\n----------\n\nhttps://en.wikipedia.org/wiki/Perceptron and references therein.", + "code": "class Perceptron(BaseSGDClassifier):\n \"\"\"Perceptron\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n\n penalty : {'l2','l1','elasticnet'}, default=None\n The penalty (aka regularization term) to be used.\n\n alpha : float, default=0.0001\n Constant that multiplies the regularization term if regularization is\n used.\n\n l1_ratio : float, default=0.15\n The Elastic Net mixing parameter, with `0 <= l1_ratio <= 1`.\n `l1_ratio=0` corresponds to L2 penalty, `l1_ratio=1` to L1.\n Only used if `penalty='elasticnet'`.\n\n .. versionadded:: 0.24\n\n fit_intercept : bool, default=True\n Whether the intercept should be estimated or not. If False, the\n data is assumed to be already centered.\n\n max_iter : int, default=1000\n The maximum number of passes over the training data (aka epochs).\n It only impacts the behavior in the ``fit`` method, and not the\n :meth:`partial_fit` method.\n\n .. versionadded:: 0.19\n\n tol : float, default=1e-3\n The stopping criterion. If it is not None, the iterations will stop\n when (loss > previous_loss - tol).\n\n .. versionadded:: 0.19\n\n shuffle : bool, default=True\n Whether or not the training data should be shuffled after each epoch.\n\n verbose : int, default=0\n The verbosity level\n\n eta0 : double, default=1\n Constant by which the updates are multiplied.\n\n n_jobs : int, default=None\n The number of CPUs to use to do the OVA (One Versus All, for\n multi-class problems) computation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n random_state : int, RandomState instance, default=None\n Used to shuffle the training data, when ``shuffle`` is set to\n ``True``. Pass an int for reproducible output across multiple\n function calls.\n See :term:`Glossary `.\n\n early_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation.\n score is not improving. If set to True, it will automatically set aside\n a stratified fraction of training data as validation and terminate\n training when validation score is not improving by at least tol for\n n_iter_no_change consecutive epochs.\n\n .. versionadded:: 0.20\n\n validation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if early_stopping is True.\n\n .. versionadded:: 0.20\n\n n_iter_no_change : int, default=5\n Number of iterations with no improvement to wait before early stopping.\n\n .. versionadded:: 0.20\n\n class_weight : dict, {class_label: weight} or \"balanced\", default=None\n Preset for the class_weight fit parameter.\n\n Weights associated with classes. If not given, all classes\n are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n warm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution. See\n :term:`the Glossary `.\n\n Attributes\n ----------\n classes_ : ndarray of shape (n_classes,)\n The unique classes labels.\n\n coef_ : ndarray of shape (1, n_features) if n_classes == 2 else \\\n (n_classes, n_features)\n Weights assigned to the features.\n\n intercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n Constants in decision function.\n\n loss_function_ : concrete\u00a0LossFunction\n The function that determines the loss, or difference between the\n output of the algorithm and the target values.\n\n n_iter_ : int\n The actual number of iterations to reach the stopping criterion.\n For multiclass fits, it is the maximum over every binary fit.\n\n t_ : int\n Number of weight updates performed during training.\n Same as ``(n_iter_ * n_samples)``.\n\n Notes\n -----\n\n ``Perceptron`` is a classification algorithm which shares the same\n underlying implementation with ``SGDClassifier``. In fact,\n ``Perceptron()`` is equivalent to `SGDClassifier(loss=\"perceptron\",\n eta0=1, learning_rate=\"constant\", penalty=None)`.\n\n Examples\n --------\n >>> from sklearn.datasets import load_digits\n >>> from sklearn.linear_model import Perceptron\n >>> X, y = load_digits(return_X_y=True)\n >>> clf = Perceptron(tol=1e-3, random_state=0)\n >>> clf.fit(X, y)\n Perceptron()\n >>> clf.score(X, y)\n 0.939...\n\n See Also\n --------\n SGDClassifier\n\n References\n ----------\n\n https://en.wikipedia.org/wiki/Perceptron and references therein.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, penalty=None, alpha=0.0001, l1_ratio=0.15,\n fit_intercept=True,\n max_iter=1000, tol=1e-3, shuffle=True, verbose=0, eta0=1.0,\n n_jobs=None, random_state=0, early_stopping=False,\n validation_fraction=0.1, n_iter_no_change=5,\n class_weight=None, warm_start=False):\n super().__init__(\n loss=\"perceptron\", penalty=penalty, alpha=alpha, l1_ratio=l1_ratio,\n fit_intercept=fit_intercept, max_iter=max_iter, tol=tol,\n shuffle=shuffle, verbose=verbose, random_state=random_state,\n learning_rate=\"constant\", eta0=eta0, early_stopping=early_stopping,\n validation_fraction=validation_fraction,\n n_iter_no_change=n_iter_no_change, power_t=0.5,\n warm_start=warm_start, class_weight=class_weight, n_jobs=n_jobs)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor", + "name": "RANSACRegressor", + "qname": "sklearn.linear_model._ransac.RANSACRegressor", + "decorators": [], + "superclasses": ["MetaEstimatorMixin", "RegressorMixin", "MultiOutputMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/__init__", + "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/fit", + "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/predict", + "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/score", + "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "RANSAC (RANdom SAmple Consensus) algorithm.\n\nRANSAC is an iterative algorithm for the robust estimation of parameters\nfrom a subset of inliers from the complete data set.\n\nRead more in the :ref:`User Guide `.", + "docstring": "RANSAC (RANdom SAmple Consensus) algorithm.\n\nRANSAC is an iterative algorithm for the robust estimation of parameters\nfrom a subset of inliers from the complete data set.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nbase_estimator : object, default=None\n Base estimator object which implements the following methods:\n\n * `fit(X, y)`: Fit model to given training data and target values.\n * `score(X, y)`: Returns the mean accuracy on the given test data,\n which is used for the stop criterion defined by `stop_score`.\n Additionally, the score is used to decide which of two equally\n large consensus sets is chosen as the better one.\n * `predict(X)`: Returns predicted values using the linear model,\n which is used to compute residual error using loss function.\n\n If `base_estimator` is None, then\n :class:`~sklearn.linear_model.LinearRegression` is used for\n target values of dtype float.\n\n Note that the current implementation only supports regression\n estimators.\n\nmin_samples : int (>= 1) or float ([0, 1]), default=None\n Minimum number of samples chosen randomly from original data. Treated\n as an absolute number of samples for `min_samples >= 1`, treated as a\n relative number `ceil(min_samples * X.shape[0]`) for\n `min_samples < 1`. This is typically chosen as the minimal number of\n samples necessary to estimate the given `base_estimator`. By default a\n ``sklearn.linear_model.LinearRegression()`` estimator is assumed and\n `min_samples` is chosen as ``X.shape[1] + 1``.\n\nresidual_threshold : float, default=None\n Maximum residual for a data sample to be classified as an inlier.\n By default the threshold is chosen as the MAD (median absolute\n deviation) of the target values `y`.\n\nis_data_valid : callable, default=None\n This function is called with the randomly selected data before the\n model is fitted to it: `is_data_valid(X, y)`. If its return value is\n False the current randomly chosen sub-sample is skipped.\n\nis_model_valid : callable, default=None\n This function is called with the estimated model and the randomly\n selected data: `is_model_valid(model, X, y)`. If its return value is\n False the current randomly chosen sub-sample is skipped.\n Rejecting samples with this function is computationally costlier than\n with `is_data_valid`. `is_model_valid` should therefore only be used if\n the estimated model is needed for making the rejection decision.\n\nmax_trials : int, default=100\n Maximum number of iterations for random sample selection.\n\nmax_skips : int, default=np.inf\n Maximum number of iterations that can be skipped due to finding zero\n inliers or invalid data defined by ``is_data_valid`` or invalid models\n defined by ``is_model_valid``.\n\n .. versionadded:: 0.19\n\nstop_n_inliers : int, default=np.inf\n Stop iteration if at least this number of inliers are found.\n\nstop_score : float, default=np.inf\n Stop iteration if score is greater equal than this threshold.\n\nstop_probability : float in range [0, 1], default=0.99\n RANSAC iteration stops if at least one outlier-free set of the training\n data is sampled in RANSAC. This requires to generate at least N\n samples (iterations)::\n\n N >= log(1 - probability) / log(1 - e**m)\n\n where the probability (confidence) is typically set to high value such\n as 0.99 (the default) and e is the current fraction of inliers w.r.t.\n the total number of samples.\n\nloss : string, callable, default='absolute_loss'\n String inputs, \"absolute_loss\" and \"squared_loss\" are supported which\n find the absolute loss and squared loss per sample\n respectively.\n\n If ``loss`` is a callable, then it should be a function that takes\n two arrays as inputs, the true and predicted value and returns a 1-D\n array with the i-th value of the array corresponding to the loss\n on ``X[i]``.\n\n If the loss on a sample is greater than the ``residual_threshold``,\n then this sample is classified as an outlier.\n\n .. versionadded:: 0.18\n\nrandom_state : int, RandomState instance, default=None\n The generator used to initialize the centers.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nestimator_ : object\n Best fitted model (copy of the `base_estimator` object).\n\nn_trials_ : int\n Number of random selection trials until one of the stop criteria is\n met. It is always ``<= max_trials``.\n\ninlier_mask_ : bool array of shape [n_samples]\n Boolean mask of inliers classified as ``True``.\n\nn_skips_no_inliers_ : int\n Number of iterations skipped due to finding zero inliers.\n\n .. versionadded:: 0.19\n\nn_skips_invalid_data_ : int\n Number of iterations skipped due to invalid data defined by\n ``is_data_valid``.\n\n .. versionadded:: 0.19\n\nn_skips_invalid_model_ : int\n Number of iterations skipped due to an invalid model defined by\n ``is_model_valid``.\n\n .. versionadded:: 0.19\n\nExamples\n--------\n>>> from sklearn.linear_model import RANSACRegressor\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(\n... n_samples=200, n_features=2, noise=4.0, random_state=0)\n>>> reg = RANSACRegressor(random_state=0).fit(X, y)\n>>> reg.score(X, y)\n0.9885...\n>>> reg.predict(X[:1,])\narray([-31.9417...])\n\nReferences\n----------\n.. [1] https://en.wikipedia.org/wiki/RANSAC\n.. [2] https://www.sri.com/sites/default/files/publications/ransac-publication.pdf\n.. [3] http://www.bmva.org/bmvc/2009/Papers/Paper355/Paper355.pdf", + "code": "class RANSACRegressor(MetaEstimatorMixin, RegressorMixin,\n MultiOutputMixin, BaseEstimator):\n \"\"\"RANSAC (RANdom SAmple Consensus) algorithm.\n\n RANSAC is an iterative algorithm for the robust estimation of parameters\n from a subset of inliers from the complete data set.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n base_estimator : object, default=None\n Base estimator object which implements the following methods:\n\n * `fit(X, y)`: Fit model to given training data and target values.\n * `score(X, y)`: Returns the mean accuracy on the given test data,\n which is used for the stop criterion defined by `stop_score`.\n Additionally, the score is used to decide which of two equally\n large consensus sets is chosen as the better one.\n * `predict(X)`: Returns predicted values using the linear model,\n which is used to compute residual error using loss function.\n\n If `base_estimator` is None, then\n :class:`~sklearn.linear_model.LinearRegression` is used for\n target values of dtype float.\n\n Note that the current implementation only supports regression\n estimators.\n\n min_samples : int (>= 1) or float ([0, 1]), default=None\n Minimum number of samples chosen randomly from original data. Treated\n as an absolute number of samples for `min_samples >= 1`, treated as a\n relative number `ceil(min_samples * X.shape[0]`) for\n `min_samples < 1`. This is typically chosen as the minimal number of\n samples necessary to estimate the given `base_estimator`. By default a\n ``sklearn.linear_model.LinearRegression()`` estimator is assumed and\n `min_samples` is chosen as ``X.shape[1] + 1``.\n\n residual_threshold : float, default=None\n Maximum residual for a data sample to be classified as an inlier.\n By default the threshold is chosen as the MAD (median absolute\n deviation) of the target values `y`.\n\n is_data_valid : callable, default=None\n This function is called with the randomly selected data before the\n model is fitted to it: `is_data_valid(X, y)`. If its return value is\n False the current randomly chosen sub-sample is skipped.\n\n is_model_valid : callable, default=None\n This function is called with the estimated model and the randomly\n selected data: `is_model_valid(model, X, y)`. If its return value is\n False the current randomly chosen sub-sample is skipped.\n Rejecting samples with this function is computationally costlier than\n with `is_data_valid`. `is_model_valid` should therefore only be used if\n the estimated model is needed for making the rejection decision.\n\n max_trials : int, default=100\n Maximum number of iterations for random sample selection.\n\n max_skips : int, default=np.inf\n Maximum number of iterations that can be skipped due to finding zero\n inliers or invalid data defined by ``is_data_valid`` or invalid models\n defined by ``is_model_valid``.\n\n .. versionadded:: 0.19\n\n stop_n_inliers : int, default=np.inf\n Stop iteration if at least this number of inliers are found.\n\n stop_score : float, default=np.inf\n Stop iteration if score is greater equal than this threshold.\n\n stop_probability : float in range [0, 1], default=0.99\n RANSAC iteration stops if at least one outlier-free set of the training\n data is sampled in RANSAC. This requires to generate at least N\n samples (iterations)::\n\n N >= log(1 - probability) / log(1 - e**m)\n\n where the probability (confidence) is typically set to high value such\n as 0.99 (the default) and e is the current fraction of inliers w.r.t.\n the total number of samples.\n\n loss : string, callable, default='absolute_loss'\n String inputs, \"absolute_loss\" and \"squared_loss\" are supported which\n find the absolute loss and squared loss per sample\n respectively.\n\n If ``loss`` is a callable, then it should be a function that takes\n two arrays as inputs, the true and predicted value and returns a 1-D\n array with the i-th value of the array corresponding to the loss\n on ``X[i]``.\n\n If the loss on a sample is greater than the ``residual_threshold``,\n then this sample is classified as an outlier.\n\n .. versionadded:: 0.18\n\n random_state : int, RandomState instance, default=None\n The generator used to initialize the centers.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Attributes\n ----------\n estimator_ : object\n Best fitted model (copy of the `base_estimator` object).\n\n n_trials_ : int\n Number of random selection trials until one of the stop criteria is\n met. It is always ``<= max_trials``.\n\n inlier_mask_ : bool array of shape [n_samples]\n Boolean mask of inliers classified as ``True``.\n\n n_skips_no_inliers_ : int\n Number of iterations skipped due to finding zero inliers.\n\n .. versionadded:: 0.19\n\n n_skips_invalid_data_ : int\n Number of iterations skipped due to invalid data defined by\n ``is_data_valid``.\n\n .. versionadded:: 0.19\n\n n_skips_invalid_model_ : int\n Number of iterations skipped due to an invalid model defined by\n ``is_model_valid``.\n\n .. versionadded:: 0.19\n\n Examples\n --------\n >>> from sklearn.linear_model import RANSACRegressor\n >>> from sklearn.datasets import make_regression\n >>> X, y = make_regression(\n ... n_samples=200, n_features=2, noise=4.0, random_state=0)\n >>> reg = RANSACRegressor(random_state=0).fit(X, y)\n >>> reg.score(X, y)\n 0.9885...\n >>> reg.predict(X[:1,])\n array([-31.9417...])\n\n References\n ----------\n .. [1] https://en.wikipedia.org/wiki/RANSAC\n .. [2] https://www.sri.com/sites/default/files/publications/ransac-publication.pdf\n .. [3] http://www.bmva.org/bmvc/2009/Papers/Paper355/Paper355.pdf\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, base_estimator=None, *, min_samples=None,\n residual_threshold=None, is_data_valid=None,\n is_model_valid=None, max_trials=100, max_skips=np.inf,\n stop_n_inliers=np.inf, stop_score=np.inf,\n stop_probability=0.99, loss='absolute_loss',\n random_state=None):\n\n self.base_estimator = base_estimator\n self.min_samples = min_samples\n self.residual_threshold = residual_threshold\n self.is_data_valid = is_data_valid\n self.is_model_valid = is_model_valid\n self.max_trials = max_trials\n self.max_skips = max_skips\n self.stop_n_inliers = stop_n_inliers\n self.stop_score = stop_score\n self.stop_probability = stop_probability\n self.random_state = random_state\n self.loss = loss\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit estimator using RANSAC algorithm.\n\n Parameters\n ----------\n X : array-like or sparse matrix, shape [n_samples, n_features]\n Training data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Individual weights for each sample\n raises error if sample_weight is passed and base_estimator\n fit method does not support it.\n\n .. versionadded:: 0.18\n\n Raises\n ------\n ValueError\n If no valid consensus set could be found. This occurs if\n `is_data_valid` and `is_model_valid` return False for all\n `max_trials` randomly chosen sub-samples.\n\n \"\"\"\n # Need to validate separately here.\n # We can't pass multi_ouput=True because that would allow y to be csr.\n check_X_params = dict(accept_sparse='csr')\n check_y_params = dict(ensure_2d=False)\n X, y = self._validate_data(X, y, validate_separately=(check_X_params,\n check_y_params))\n check_consistent_length(X, y)\n\n if self.base_estimator is not None:\n base_estimator = clone(self.base_estimator)\n else:\n base_estimator = LinearRegression()\n\n if self.min_samples is None:\n # assume linear model by default\n min_samples = X.shape[1] + 1\n elif 0 < self.min_samples < 1:\n min_samples = np.ceil(self.min_samples * X.shape[0])\n elif self.min_samples >= 1:\n if self.min_samples % 1 != 0:\n raise ValueError(\"Absolute number of samples must be an \"\n \"integer value.\")\n min_samples = self.min_samples\n else:\n raise ValueError(\"Value for `min_samples` must be scalar and \"\n \"positive.\")\n if min_samples > X.shape[0]:\n raise ValueError(\"`min_samples` may not be larger than number \"\n \"of samples: n_samples = %d.\" % (X.shape[0]))\n\n if self.stop_probability < 0 or self.stop_probability > 1:\n raise ValueError(\"`stop_probability` must be in range [0, 1].\")\n\n if self.residual_threshold is None:\n # MAD (median absolute deviation)\n residual_threshold = np.median(np.abs(y - np.median(y)))\n else:\n residual_threshold = self.residual_threshold\n\n if self.loss == \"absolute_loss\":\n if y.ndim == 1:\n loss_function = lambda y_true, y_pred: np.abs(y_true - y_pred)\n else:\n loss_function = lambda \\\n y_true, y_pred: np.sum(np.abs(y_true - y_pred), axis=1)\n\n elif self.loss == \"squared_loss\":\n if y.ndim == 1:\n loss_function = lambda y_true, y_pred: (y_true - y_pred) ** 2\n else:\n loss_function = lambda \\\n y_true, y_pred: np.sum((y_true - y_pred) ** 2, axis=1)\n\n elif callable(self.loss):\n loss_function = self.loss\n\n else:\n raise ValueError(\n \"loss should be 'absolute_loss', 'squared_loss' or a callable.\"\n \"Got %s. \" % self.loss)\n\n\n random_state = check_random_state(self.random_state)\n\n try: # Not all estimator accept a random_state\n base_estimator.set_params(random_state=random_state)\n except ValueError:\n pass\n\n estimator_fit_has_sample_weight = has_fit_parameter(base_estimator,\n \"sample_weight\")\n estimator_name = type(base_estimator).__name__\n if (sample_weight is not None and not\n estimator_fit_has_sample_weight):\n raise ValueError(\"%s does not support sample_weight. Samples\"\n \" weights are only used for the calibration\"\n \" itself.\" % estimator_name)\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X)\n\n n_inliers_best = 1\n score_best = -np.inf\n inlier_mask_best = None\n X_inlier_best = None\n y_inlier_best = None\n inlier_best_idxs_subset = None\n self.n_skips_no_inliers_ = 0\n self.n_skips_invalid_data_ = 0\n self.n_skips_invalid_model_ = 0\n\n # number of data samples\n n_samples = X.shape[0]\n sample_idxs = np.arange(n_samples)\n\n self.n_trials_ = 0\n max_trials = self.max_trials\n while self.n_trials_ < max_trials:\n self.n_trials_ += 1\n\n if (self.n_skips_no_inliers_ + self.n_skips_invalid_data_ +\n self.n_skips_invalid_model_) > self.max_skips:\n break\n\n # choose random sample set\n subset_idxs = sample_without_replacement(n_samples, min_samples,\n random_state=random_state)\n X_subset = X[subset_idxs]\n y_subset = y[subset_idxs]\n\n # check if random sample set is valid\n if (self.is_data_valid is not None\n and not self.is_data_valid(X_subset, y_subset)):\n self.n_skips_invalid_data_ += 1\n continue\n\n # fit model for current random sample set\n if sample_weight is None:\n base_estimator.fit(X_subset, y_subset)\n else:\n base_estimator.fit(X_subset, y_subset,\n sample_weight=sample_weight[subset_idxs])\n\n # check if estimated model is valid\n if (self.is_model_valid is not None and not\n self.is_model_valid(base_estimator, X_subset, y_subset)):\n self.n_skips_invalid_model_ += 1\n continue\n\n # residuals of all data for current random sample model\n y_pred = base_estimator.predict(X)\n residuals_subset = loss_function(y, y_pred)\n\n # classify data into inliers and outliers\n inlier_mask_subset = residuals_subset < residual_threshold\n n_inliers_subset = np.sum(inlier_mask_subset)\n\n # less inliers -> skip current random sample\n if n_inliers_subset < n_inliers_best:\n self.n_skips_no_inliers_ += 1\n continue\n\n # extract inlier data set\n inlier_idxs_subset = sample_idxs[inlier_mask_subset]\n X_inlier_subset = X[inlier_idxs_subset]\n y_inlier_subset = y[inlier_idxs_subset]\n\n # score of inlier data set\n score_subset = base_estimator.score(X_inlier_subset,\n y_inlier_subset)\n\n # same number of inliers but worse score -> skip current random\n # sample\n if (n_inliers_subset == n_inliers_best\n and score_subset < score_best):\n continue\n\n # save current random sample as best sample\n n_inliers_best = n_inliers_subset\n score_best = score_subset\n inlier_mask_best = inlier_mask_subset\n X_inlier_best = X_inlier_subset\n y_inlier_best = y_inlier_subset\n inlier_best_idxs_subset = inlier_idxs_subset\n\n max_trials = min(\n max_trials,\n _dynamic_max_trials(n_inliers_best, n_samples,\n min_samples, self.stop_probability))\n\n # break if sufficient number of inliers or score is reached\n if n_inliers_best >= self.stop_n_inliers or \\\n score_best >= self.stop_score:\n break\n\n # if none of the iterations met the required criteria\n if inlier_mask_best is None:\n if ((self.n_skips_no_inliers_ + self.n_skips_invalid_data_ +\n self.n_skips_invalid_model_) > self.max_skips):\n raise ValueError(\n \"RANSAC skipped more iterations than `max_skips` without\"\n \" finding a valid consensus set. Iterations were skipped\"\n \" because each randomly chosen sub-sample failed the\"\n \" passing criteria. See estimator attributes for\"\n \" diagnostics (n_skips*).\")\n else:\n raise ValueError(\n \"RANSAC could not find a valid consensus set. All\"\n \" `max_trials` iterations were skipped because each\"\n \" randomly chosen sub-sample failed the passing criteria.\"\n \" See estimator attributes for diagnostics (n_skips*).\")\n else:\n if (self.n_skips_no_inliers_ + self.n_skips_invalid_data_ +\n self.n_skips_invalid_model_) > self.max_skips:\n warnings.warn(\"RANSAC found a valid consensus set but exited\"\n \" early due to skipping more iterations than\"\n \" `max_skips`. See estimator attributes for\"\n \" diagnostics (n_skips*).\",\n ConvergenceWarning)\n\n # estimate final model using all inliers\n if sample_weight is None:\n base_estimator.fit(X_inlier_best, y_inlier_best)\n else:\n base_estimator.fit(\n X_inlier_best,\n y_inlier_best,\n sample_weight=sample_weight[inlier_best_idxs_subset])\n\n self.estimator_ = base_estimator\n self.inlier_mask_ = inlier_mask_best\n return self\n\n def predict(self, X):\n \"\"\"Predict using the estimated model.\n\n This is a wrapper for `estimator_.predict(X)`.\n\n Parameters\n ----------\n X : numpy array of shape [n_samples, n_features]\n\n Returns\n -------\n y : array, shape = [n_samples] or [n_samples, n_targets]\n Returns predicted values.\n \"\"\"\n check_is_fitted(self)\n\n return self.estimator_.predict(X)\n\n def score(self, X, y):\n \"\"\"Returns the score of the prediction.\n\n This is a wrapper for `estimator_.score(X, y)`.\n\n Parameters\n ----------\n X : numpy array or sparse matrix of shape [n_samples, n_features]\n Training data.\n\n y : array, shape = [n_samples] or [n_samples, n_targets]\n Target values.\n\n Returns\n -------\n z : float\n Score of the prediction.\n \"\"\"\n check_is_fitted(self)\n\n return self.estimator_.score(X, y)\n\n def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }", + "instance_attributes": [ + { + "name": "max_trials", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "stop_probability", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "loss", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "n_skips_no_inliers_", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "n_skips_invalid_data_", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "n_skips_invalid_model_", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "n_trials_", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/Ridge", + "name": "Ridge", + "qname": "sklearn.linear_model._ridge.Ridge", + "decorators": [], + "superclasses": ["MultiOutputMixin", "RegressorMixin", "_BaseRidge"], + "methods": [ + "scikit-learn/sklearn.linear_model._ridge/Ridge/__init__", + "scikit-learn/sklearn.linear_model._ridge/Ridge/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Linear least squares with l2 regularization.\n\nMinimizes the objective function::\n\n||y - Xw||^2_2 + alpha * ||w||^2_2\n\nThis model solves a regression model where the loss function is\nthe linear least squares function and regularization is given by\nthe l2-norm. Also known as Ridge Regression or Tikhonov regularization.\nThis estimator has built-in support for multi-variate regression\n(i.e., when y is a 2d-array of shape (n_samples, n_targets)).\n\nRead more in the :ref:`User Guide `.", + "docstring": "Linear least squares with l2 regularization.\n\nMinimizes the objective function::\n\n||y - Xw||^2_2 + alpha * ||w||^2_2\n\nThis model solves a regression model where the loss function is\nthe linear least squares function and regularization is given by\nthe l2-norm. Also known as Ridge Regression or Tikhonov regularization.\nThis estimator has built-in support for multi-variate regression\n(i.e., when y is a 2d-array of shape (n_samples, n_targets)).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : {float, ndarray of shape (n_targets,)}, default=1.0\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\n assumed to be specific to the targets. Hence they must correspond in\n number.\n\nfit_intercept : bool, default=True\n Whether to fit the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. ``X`` and ``y`` are expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=None\n Maximum number of iterations for conjugate gradient solver.\n For 'sparse_cg' and 'lsqr' solvers, the default value is determined\n by scipy.sparse.linalg. For 'sag' solver, the default value is 1000.\n\ntol : float, default=1e-3\n Precision of the solution.\n\nsolver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}, default='auto'\n Solver to use in the computational routines:\n\n - 'auto' chooses the solver automatically based on the type of data.\n\n - 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n coefficients. More stable for singular matrices than 'cholesky'.\n\n - 'cholesky' uses the standard scipy.linalg.solve function to\n obtain a closed-form solution.\n\n - 'sparse_cg' uses the conjugate gradient solver as found in\n scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n more appropriate than 'cholesky' for large-scale data\n (possibility to set `tol` and `max_iter`).\n\n - 'lsqr' uses the dedicated regularized least-squares routine\n scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n procedure.\n\n - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n its improved, unbiased version named SAGA. Both methods also use an\n iterative procedure, and are often faster than other solvers when\n both n_samples and n_features are large. Note that 'sag' and\n 'saga' fast convergence is only guaranteed on features with\n approximately the same scale. You can preprocess the data with a\n scaler from sklearn.preprocessing.\n\n All last five solvers support both dense and sparse data. However, only\n 'sag' and 'sparse_cg' supports sparse input when `fit_intercept` is\n True.\n\n .. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n .. versionadded:: 0.19\n SAGA solver.\n\nrandom_state : int, RandomState instance, default=None\n Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\n See :term:`Glossary ` for details.\n\n .. versionadded:: 0.17\n `random_state` to support Stochastic Average Gradient.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Weight vector(s).\n\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function. Set to 0.0 if\n ``fit_intercept = False``.\n\nn_iter_ : None or ndarray of shape (n_targets,)\n Actual number of iterations for each target. Available only for\n sag and lsqr solvers. Other solvers will return None.\n\n .. versionadded:: 0.17\n\nSee Also\n--------\nRidgeClassifier : Ridge classifier.\nRidgeCV : Ridge regression with built-in cross validation.\n:class:`~sklearn.kernel_ridge.KernelRidge` : Kernel ridge regression\n combines ridge regression with the kernel trick.\n\nExamples\n--------\n>>> from sklearn.linear_model import Ridge\n>>> import numpy as np\n>>> n_samples, n_features = 10, 5\n>>> rng = np.random.RandomState(0)\n>>> y = rng.randn(n_samples)\n>>> X = rng.randn(n_samples, n_features)\n>>> clf = Ridge(alpha=1.0)\n>>> clf.fit(X, y)\nRidge()", + "code": "class Ridge(MultiOutputMixin, RegressorMixin, _BaseRidge):\n \"\"\"Linear least squares with l2 regularization.\n\n Minimizes the objective function::\n\n ||y - Xw||^2_2 + alpha * ||w||^2_2\n\n This model solves a regression model where the loss function is\n the linear least squares function and regularization is given by\n the l2-norm. Also known as Ridge Regression or Tikhonov regularization.\n This estimator has built-in support for multi-variate regression\n (i.e., when y is a 2d-array of shape (n_samples, n_targets)).\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n alpha : {float, ndarray of shape (n_targets,)}, default=1.0\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\n assumed to be specific to the targets. Hence they must correspond in\n number.\n\n fit_intercept : bool, default=True\n Whether to fit the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. ``X`` and ``y`` are expected to be centered).\n\n normalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n copy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\n max_iter : int, default=None\n Maximum number of iterations for conjugate gradient solver.\n For 'sparse_cg' and 'lsqr' solvers, the default value is determined\n by scipy.sparse.linalg. For 'sag' solver, the default value is 1000.\n\n tol : float, default=1e-3\n Precision of the solution.\n\n solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}, \\\n default='auto'\n Solver to use in the computational routines:\n\n - 'auto' chooses the solver automatically based on the type of data.\n\n - 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n coefficients. More stable for singular matrices than 'cholesky'.\n\n - 'cholesky' uses the standard scipy.linalg.solve function to\n obtain a closed-form solution.\n\n - 'sparse_cg' uses the conjugate gradient solver as found in\n scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n more appropriate than 'cholesky' for large-scale data\n (possibility to set `tol` and `max_iter`).\n\n - 'lsqr' uses the dedicated regularized least-squares routine\n scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n procedure.\n\n - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n its improved, unbiased version named SAGA. Both methods also use an\n iterative procedure, and are often faster than other solvers when\n both n_samples and n_features are large. Note that 'sag' and\n 'saga' fast convergence is only guaranteed on features with\n approximately the same scale. You can preprocess the data with a\n scaler from sklearn.preprocessing.\n\n All last five solvers support both dense and sparse data. However, only\n 'sag' and 'sparse_cg' supports sparse input when `fit_intercept` is\n True.\n\n .. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n .. versionadded:: 0.19\n SAGA solver.\n\n random_state : int, RandomState instance, default=None\n Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\n See :term:`Glossary ` for details.\n\n .. versionadded:: 0.17\n `random_state` to support Stochastic Average Gradient.\n\n Attributes\n ----------\n coef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n Weight vector(s).\n\n intercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function. Set to 0.0 if\n ``fit_intercept = False``.\n\n n_iter_ : None or ndarray of shape (n_targets,)\n Actual number of iterations for each target. Available only for\n sag and lsqr solvers. Other solvers will return None.\n\n .. versionadded:: 0.17\n\n See Also\n --------\n RidgeClassifier : Ridge classifier.\n RidgeCV : Ridge regression with built-in cross validation.\n :class:`~sklearn.kernel_ridge.KernelRidge` : Kernel ridge regression\n combines ridge regression with the kernel trick.\n\n Examples\n --------\n >>> from sklearn.linear_model import Ridge\n >>> import numpy as np\n >>> n_samples, n_features = 10, 5\n >>> rng = np.random.RandomState(0)\n >>> y = rng.randn(n_samples)\n >>> X = rng.randn(n_samples, n_features)\n >>> clf = Ridge(alpha=1.0)\n >>> clf.fit(X, y)\n Ridge()\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, alpha=1.0, *, fit_intercept=True, normalize=False,\n copy_X=True, max_iter=None, tol=1e-3, solver=\"auto\",\n random_state=None):\n super().__init__(\n alpha=alpha, fit_intercept=fit_intercept,\n normalize=normalize, copy_X=copy_X,\n max_iter=max_iter, tol=tol, solver=solver,\n random_state=random_state)\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit Ridge regression model.\n\n Parameters\n ----------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training data\n\n y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Target values\n\n sample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n return super().fit(X, y, sample_weight=sample_weight)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeCV", + "name": "RidgeCV", + "qname": "sklearn.linear_model._ridge.RidgeCV", + "decorators": [], + "superclasses": ["MultiOutputMixin", "RegressorMixin", "_BaseRidgeCV"], + "methods": [], + "is_public": false, + "reexported_by": [], + "description": "Ridge regression with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nBy default, it performs efficient Leave-One-Out Cross-Validation.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Ridge regression with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nBy default, it performs efficient Leave-One-Out Cross-Validation.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalphas : ndarray of shape (n_alphas,), default=(0.1, 1.0, 10.0)\n Array of alpha values to try.\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`.\n If using Leave-One-Out cross-validation, alphas must be positive.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nscoring : string, callable, default=None\n A string (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n If None, the negative mean squared error if cv is 'auto' or None\n (i.e. when using leave-one-out cross-validation), and r2 score\n otherwise.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the efficient Leave-One-Out cross-validation\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if ``y`` is binary or multiclass,\n :class:`~sklearn.model_selection.StratifiedKFold` is used, else,\n :class:`~sklearn.model_selection.KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\ngcv_mode : {'auto', 'svd', eigen'}, default='auto'\n Flag indicating which strategy to use when performing\n Leave-One-Out Cross-Validation. Options are::\n\n 'auto' : use 'svd' if n_samples > n_features, otherwise use 'eigen'\n 'svd' : force use of singular value decomposition of X when X is\n dense, eigenvalue decomposition of X^T.X when X is sparse.\n 'eigen' : force computation via eigendecomposition of X.X^T\n\n The 'auto' mode is the default and is intended to pick the cheaper\n option of the two depending on the shape of the training data.\n\nstore_cv_values : bool, default=False\n Flag indicating if the cross-validation values corresponding to\n each alpha should be stored in the ``cv_values_`` attribute (see\n below). This flag is only compatible with ``cv=None`` (i.e. using\n Leave-One-Out Cross-Validation).\n\nalpha_per_target : bool, default=False\n Flag indicating whether to optimize the alpha value (picked from the\n `alphas` parameter list) for each target separately (for multi-output\n settings: multiple prediction targets). When set to `True`, after\n fitting, the `alpha_` attribute will contain a value for each target.\n When set to `False`, a single alpha is used for all targets.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncv_values_ : ndarray of shape (n_samples, n_alphas) or shape (n_samples, n_targets, n_alphas), optional\n Cross-validation values for each alpha (only available if\n ``store_cv_values=True`` and ``cv=None``). After ``fit()`` has been\n called, this attribute will contain the mean squared errors\n (by default) or the values of the ``{loss,score}_func`` function\n (if provided in the constructor).\n\ncoef_ : ndarray of shape (n_features) or (n_targets, n_features)\n Weight vector(s).\n\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function. Set to 0.0 if\n ``fit_intercept = False``.\n\nalpha_ : float or ndarray of shape (n_targets,)\n Estimated regularization parameter, or, if ``alpha_per_target=True``,\n the estimated regularization parameter for each target.\n\nbest_score_ : float or ndarray of shape (n_targets,)\n Score of base estimator with best alpha, or, if\n ``alpha_per_target=True``, a score for each target.\n\n .. versionadded:: 0.23\n\nExamples\n--------\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.linear_model import RidgeCV\n>>> X, y = load_diabetes(return_X_y=True)\n>>> clf = RidgeCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X, y)\n>>> clf.score(X, y)\n0.5166...\n\nSee Also\n--------\nRidge : Ridge regression.\nRidgeClassifier : Ridge classifier.\nRidgeClassifierCV : Ridge classifier with built-in cross validation.", + "code": "class RidgeCV(MultiOutputMixin, RegressorMixin, _BaseRidgeCV):\n \"\"\"Ridge regression with built-in cross-validation.\n\n See glossary entry for :term:`cross-validation estimator`.\n\n By default, it performs efficient Leave-One-Out Cross-Validation.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n alphas : ndarray of shape (n_alphas,), default=(0.1, 1.0, 10.0)\n Array of alpha values to try.\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`.\n If using Leave-One-Out cross-validation, alphas must be positive.\n\n fit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\n normalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n scoring : string, callable, default=None\n A string (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n If None, the negative mean squared error if cv is 'auto' or None\n (i.e. when using leave-one-out cross-validation), and r2 score\n otherwise.\n\n cv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the efficient Leave-One-Out cross-validation\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if ``y`` is binary or multiclass,\n :class:`~sklearn.model_selection.StratifiedKFold` is used, else,\n :class:`~sklearn.model_selection.KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n gcv_mode : {'auto', 'svd', eigen'}, default='auto'\n Flag indicating which strategy to use when performing\n Leave-One-Out Cross-Validation. Options are::\n\n 'auto' : use 'svd' if n_samples > n_features, otherwise use 'eigen'\n 'svd' : force use of singular value decomposition of X when X is\n dense, eigenvalue decomposition of X^T.X when X is sparse.\n 'eigen' : force computation via eigendecomposition of X.X^T\n\n The 'auto' mode is the default and is intended to pick the cheaper\n option of the two depending on the shape of the training data.\n\n store_cv_values : bool, default=False\n Flag indicating if the cross-validation values corresponding to\n each alpha should be stored in the ``cv_values_`` attribute (see\n below). This flag is only compatible with ``cv=None`` (i.e. using\n Leave-One-Out Cross-Validation).\n\n alpha_per_target : bool, default=False\n Flag indicating whether to optimize the alpha value (picked from the\n `alphas` parameter list) for each target separately (for multi-output\n settings: multiple prediction targets). When set to `True`, after\n fitting, the `alpha_` attribute will contain a value for each target.\n When set to `False`, a single alpha is used for all targets.\n\n .. versionadded:: 0.24\n\n Attributes\n ----------\n cv_values_ : ndarray of shape (n_samples, n_alphas) or \\\n shape (n_samples, n_targets, n_alphas), optional\n Cross-validation values for each alpha (only available if\n ``store_cv_values=True`` and ``cv=None``). After ``fit()`` has been\n called, this attribute will contain the mean squared errors\n (by default) or the values of the ``{loss,score}_func`` function\n (if provided in the constructor).\n\n coef_ : ndarray of shape (n_features) or (n_targets, n_features)\n Weight vector(s).\n\n intercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function. Set to 0.0 if\n ``fit_intercept = False``.\n\n alpha_ : float or ndarray of shape (n_targets,)\n Estimated regularization parameter, or, if ``alpha_per_target=True``,\n the estimated regularization parameter for each target.\n\n best_score_ : float or ndarray of shape (n_targets,)\n Score of base estimator with best alpha, or, if\n ``alpha_per_target=True``, a score for each target.\n\n .. versionadded:: 0.23\n\n Examples\n --------\n >>> from sklearn.datasets import load_diabetes\n >>> from sklearn.linear_model import RidgeCV\n >>> X, y = load_diabetes(return_X_y=True)\n >>> clf = RidgeCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X, y)\n >>> clf.score(X, y)\n 0.5166...\n\n See Also\n --------\n Ridge : Ridge regression.\n RidgeClassifier : Ridge classifier.\n RidgeClassifierCV : Ridge classifier with built-in cross validation.\n \"\"\"", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifier", + "name": "RidgeClassifier", + "qname": "sklearn.linear_model._ridge.RidgeClassifier", + "decorators": [], + "superclasses": ["LinearClassifierMixin", "_BaseRidge"], + "methods": [ + "scikit-learn/sklearn.linear_model._ridge/RidgeClassifier/__init__", + "scikit-learn/sklearn.linear_model._ridge/RidgeClassifier/fit", + "scikit-learn/sklearn.linear_model._ridge/RidgeClassifier/classes_@getter" + ], + "is_public": false, + "reexported_by": [], + "description": "Classifier using Ridge regression.\n\nThis classifier first converts the target values into ``{-1, 1}`` and\nthen treats the problem as a regression task (multi-output regression in\nthe multiclass case).\n\nRead more in the :ref:`User Guide `.", + "docstring": "Classifier using Ridge regression.\n\nThis classifier first converts the target values into ``{-1, 1}`` and\nthen treats the problem as a regression task (multi-output regression in\nthe multiclass case).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set to false, no\n intercept will be used in calculations (e.g. data is expected to be\n already centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=None\n Maximum number of iterations for conjugate gradient solver.\n The default value is determined by scipy.sparse.linalg.\n\ntol : float, default=1e-3\n Precision of the solution.\n\nclass_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``.\n\nsolver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}, default='auto'\n Solver to use in the computational routines:\n\n - 'auto' chooses the solver automatically based on the type of data.\n\n - 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n coefficients. More stable for singular matrices than 'cholesky'.\n\n - 'cholesky' uses the standard scipy.linalg.solve function to\n obtain a closed-form solution.\n\n - 'sparse_cg' uses the conjugate gradient solver as found in\n scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n more appropriate than 'cholesky' for large-scale data\n (possibility to set `tol` and `max_iter`).\n\n - 'lsqr' uses the dedicated regularized least-squares routine\n scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n procedure.\n\n - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n its unbiased and more flexible version named SAGA. Both methods\n use an iterative procedure, and are often faster than other solvers\n when both n_samples and n_features are large. Note that 'sag' and\n 'saga' fast convergence is only guaranteed on features with\n approximately the same scale. You can preprocess the data with a\n scaler from sklearn.preprocessing.\n\n .. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n .. versionadded:: 0.19\n SAGA solver.\n\nrandom_state : int, RandomState instance, default=None\n Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\n See :term:`Glossary ` for details.\n\nAttributes\n----------\ncoef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n Coefficient of the features in the decision function.\n\n ``coef_`` is of shape (1, n_features) when the given problem is binary.\n\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function. Set to 0.0 if\n ``fit_intercept = False``.\n\nn_iter_ : None or ndarray of shape (n_targets,)\n Actual number of iterations for each target. Available only for\n sag and lsqr solvers. Other solvers will return None.\n\nclasses_ : ndarray of shape (n_classes,)\n The classes labels.\n\nSee Also\n--------\nRidge : Ridge regression.\nRidgeClassifierCV : Ridge classifier with built-in cross validation.\n\nNotes\n-----\nFor multi-class classification, n_class classifiers are trained in\na one-versus-all approach. Concretely, this is implemented by taking\nadvantage of the multi-variate response support in Ridge.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.linear_model import RidgeClassifier\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> clf = RidgeClassifier().fit(X, y)\n>>> clf.score(X, y)\n0.9595...", + "code": "class RidgeClassifier(LinearClassifierMixin, _BaseRidge):\n \"\"\"Classifier using Ridge regression.\n\n This classifier first converts the target values into ``{-1, 1}`` and\n then treats the problem as a regression task (multi-output regression in\n the multiclass case).\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n alpha : float, default=1.0\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`.\n\n fit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set to false, no\n intercept will be used in calculations (e.g. data is expected to be\n already centered).\n\n normalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n copy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\n max_iter : int, default=None\n Maximum number of iterations for conjugate gradient solver.\n The default value is determined by scipy.sparse.linalg.\n\n tol : float, default=1e-3\n Precision of the solution.\n\n class_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``.\n\n solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}, \\\n default='auto'\n Solver to use in the computational routines:\n\n - 'auto' chooses the solver automatically based on the type of data.\n\n - 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n coefficients. More stable for singular matrices than 'cholesky'.\n\n - 'cholesky' uses the standard scipy.linalg.solve function to\n obtain a closed-form solution.\n\n - 'sparse_cg' uses the conjugate gradient solver as found in\n scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n more appropriate than 'cholesky' for large-scale data\n (possibility to set `tol` and `max_iter`).\n\n - 'lsqr' uses the dedicated regularized least-squares routine\n scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n procedure.\n\n - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n its unbiased and more flexible version named SAGA. Both methods\n use an iterative procedure, and are often faster than other solvers\n when both n_samples and n_features are large. Note that 'sag' and\n 'saga' fast convergence is only guaranteed on features with\n approximately the same scale. You can preprocess the data with a\n scaler from sklearn.preprocessing.\n\n .. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n .. versionadded:: 0.19\n SAGA solver.\n\n random_state : int, RandomState instance, default=None\n Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\n See :term:`Glossary ` for details.\n\n Attributes\n ----------\n coef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n Coefficient of the features in the decision function.\n\n ``coef_`` is of shape (1, n_features) when the given problem is binary.\n\n intercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function. Set to 0.0 if\n ``fit_intercept = False``.\n\n n_iter_ : None or ndarray of shape (n_targets,)\n Actual number of iterations for each target. Available only for\n sag and lsqr solvers. Other solvers will return None.\n\n classes_ : ndarray of shape (n_classes,)\n The classes labels.\n\n See Also\n --------\n Ridge : Ridge regression.\n RidgeClassifierCV : Ridge classifier with built-in cross validation.\n\n Notes\n -----\n For multi-class classification, n_class classifiers are trained in\n a one-versus-all approach. Concretely, this is implemented by taking\n advantage of the multi-variate response support in Ridge.\n\n Examples\n --------\n >>> from sklearn.datasets import load_breast_cancer\n >>> from sklearn.linear_model import RidgeClassifier\n >>> X, y = load_breast_cancer(return_X_y=True)\n >>> clf = RidgeClassifier().fit(X, y)\n >>> clf.score(X, y)\n 0.9595...\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, alpha=1.0, *, fit_intercept=True, normalize=False,\n copy_X=True, max_iter=None, tol=1e-3, class_weight=None,\n solver=\"auto\", random_state=None):\n super().__init__(\n alpha=alpha, fit_intercept=fit_intercept, normalize=normalize,\n copy_X=copy_X, max_iter=max_iter, tol=tol, solver=solver,\n random_state=random_state)\n self.class_weight = class_weight\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit Ridge classifier model.\n\n Parameters\n ----------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\n y : ndarray of shape (n_samples,)\n Target values.\n\n sample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\n .. versionadded:: 0.17\n *sample_weight* support to Classifier.\n\n Returns\n -------\n self : object\n Instance of the estimator.\n \"\"\"\n _accept_sparse = _get_valid_accept_sparse(sparse.issparse(X),\n self.solver)\n X, y = self._validate_data(X, y, accept_sparse=_accept_sparse,\n multi_output=True, y_numeric=False)\n sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n self._label_binarizer = LabelBinarizer(pos_label=1, neg_label=-1)\n Y = self._label_binarizer.fit_transform(y)\n if not self._label_binarizer.y_type_.startswith('multilabel'):\n y = column_or_1d(y, warn=True)\n else:\n # we don't (yet) support multi-label classification in Ridge\n raise ValueError(\n \"%s doesn't support multi-label classification\" % (\n self.__class__.__name__))\n\n if self.class_weight:\n # modify the sample weights with the corresponding class weight\n sample_weight = (sample_weight *\n compute_sample_weight(self.class_weight, y))\n\n super().fit(X, Y, sample_weight=sample_weight)\n return self\n\n @property\n def classes_(self):\n return self._label_binarizer.classes_", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifierCV", + "name": "RidgeClassifierCV", + "qname": "sklearn.linear_model._ridge.RidgeClassifierCV", + "decorators": [], + "superclasses": ["LinearClassifierMixin", "_BaseRidgeCV"], + "methods": [ + "scikit-learn/sklearn.linear_model._ridge/RidgeClassifierCV/__init__", + "scikit-learn/sklearn.linear_model._ridge/RidgeClassifierCV/fit", + "scikit-learn/sklearn.linear_model._ridge/RidgeClassifierCV/classes_@getter", + "scikit-learn/sklearn.linear_model._ridge/RidgeClassifierCV/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Ridge classifier with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nBy default, it performs Leave-One-Out Cross-Validation. Currently,\nonly the n_features > n_samples case is handled efficiently.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Ridge classifier with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nBy default, it performs Leave-One-Out Cross-Validation. Currently,\nonly the n_features > n_samples case is handled efficiently.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalphas : ndarray of shape (n_alphas,), default=(0.1, 1.0, 10.0)\n Array of alpha values to try.\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nscoring : string, callable, default=None\n A string (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the efficient Leave-One-Out cross-validation\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\nclass_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\nstore_cv_values : bool, default=False\n Flag indicating if the cross-validation values corresponding to\n each alpha should be stored in the ``cv_values_`` attribute (see\n below). This flag is only compatible with ``cv=None`` (i.e. using\n Leave-One-Out Cross-Validation).\n\nAttributes\n----------\ncv_values_ : ndarray of shape (n_samples, n_targets, n_alphas), optional\n Cross-validation values for each alpha (if ``store_cv_values=True`` and\n ``cv=None``). After ``fit()`` has been called, this attribute will\n contain the mean squared errors (by default) or the values of the\n ``{loss,score}_func`` function (if provided in the constructor). This\n attribute exists only when ``store_cv_values`` is True.\n\ncoef_ : ndarray of shape (1, n_features) or (n_targets, n_features)\n Coefficient of the features in the decision function.\n\n ``coef_`` is of shape (1, n_features) when the given problem is binary.\n\nintercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function. Set to 0.0 if\n ``fit_intercept = False``.\n\nalpha_ : float\n Estimated regularization parameter.\n\nbest_score_ : float\n Score of base estimator with best alpha.\n\n .. versionadded:: 0.23\n\nclasses_ : ndarray of shape (n_classes,)\n The classes labels.\n\nExamples\n--------\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.linear_model import RidgeClassifierCV\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> clf = RidgeClassifierCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X, y)\n>>> clf.score(X, y)\n0.9630...\n\nSee Also\n--------\nRidge : Ridge regression.\nRidgeClassifier : Ridge classifier.\nRidgeCV : Ridge regression with built-in cross validation.\n\nNotes\n-----\nFor multi-class classification, n_class classifiers are trained in\na one-versus-all approach. Concretely, this is implemented by taking\nadvantage of the multi-variate response support in Ridge.", + "code": "class RidgeClassifierCV(LinearClassifierMixin, _BaseRidgeCV):\n \"\"\"Ridge classifier with built-in cross-validation.\n\n See glossary entry for :term:`cross-validation estimator`.\n\n By default, it performs Leave-One-Out Cross-Validation. Currently,\n only the n_features > n_samples case is handled efficiently.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n alphas : ndarray of shape (n_alphas,), default=(0.1, 1.0, 10.0)\n Array of alpha values to try.\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`.\n\n fit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\n normalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n scoring : string, callable, default=None\n A string (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n\n cv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the efficient Leave-One-Out cross-validation\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n class_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n store_cv_values : bool, default=False\n Flag indicating if the cross-validation values corresponding to\n each alpha should be stored in the ``cv_values_`` attribute (see\n below). This flag is only compatible with ``cv=None`` (i.e. using\n Leave-One-Out Cross-Validation).\n\n Attributes\n ----------\n cv_values_ : ndarray of shape (n_samples, n_targets, n_alphas), optional\n Cross-validation values for each alpha (if ``store_cv_values=True`` and\n ``cv=None``). After ``fit()`` has been called, this attribute will\n contain the mean squared errors (by default) or the values of the\n ``{loss,score}_func`` function (if provided in the constructor). This\n attribute exists only when ``store_cv_values`` is True.\n\n coef_ : ndarray of shape (1, n_features) or (n_targets, n_features)\n Coefficient of the features in the decision function.\n\n ``coef_`` is of shape (1, n_features) when the given problem is binary.\n\n intercept_ : float or ndarray of shape (n_targets,)\n Independent term in decision function. Set to 0.0 if\n ``fit_intercept = False``.\n\n alpha_ : float\n Estimated regularization parameter.\n\n best_score_ : float\n Score of base estimator with best alpha.\n\n .. versionadded:: 0.23\n\n classes_ : ndarray of shape (n_classes,)\n The classes labels.\n\n Examples\n --------\n >>> from sklearn.datasets import load_breast_cancer\n >>> from sklearn.linear_model import RidgeClassifierCV\n >>> X, y = load_breast_cancer(return_X_y=True)\n >>> clf = RidgeClassifierCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X, y)\n >>> clf.score(X, y)\n 0.9630...\n\n See Also\n --------\n Ridge : Ridge regression.\n RidgeClassifier : Ridge classifier.\n RidgeCV : Ridge regression with built-in cross validation.\n\n Notes\n -----\n For multi-class classification, n_class classifiers are trained in\n a one-versus-all approach. Concretely, this is implemented by taking\n advantage of the multi-variate response support in Ridge.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, alphas=(0.1, 1.0, 10.0), *, fit_intercept=True,\n normalize=False, scoring=None, cv=None, class_weight=None,\n store_cv_values=False):\n super().__init__(\n alphas=alphas, fit_intercept=fit_intercept, normalize=normalize,\n scoring=scoring, cv=cv, store_cv_values=store_cv_values)\n self.class_weight = class_weight\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit Ridge classifier with cv.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples\n and n_features is the number of features. When using GCV,\n will be cast to float64 if necessary.\n\n y : ndarray of shape (n_samples,)\n Target values. Will be cast to X's dtype if necessary.\n\n sample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\n Returns\n -------\n self : object\n \"\"\"\n X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc', 'coo'],\n multi_output=True, y_numeric=False)\n sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n self._label_binarizer = LabelBinarizer(pos_label=1, neg_label=-1)\n Y = self._label_binarizer.fit_transform(y)\n if not self._label_binarizer.y_type_.startswith('multilabel'):\n y = column_or_1d(y, warn=True)\n\n if self.class_weight:\n # modify the sample weights with the corresponding class weight\n sample_weight = (sample_weight *\n compute_sample_weight(self.class_weight, y))\n\n target = Y if self.cv is None else y\n _BaseRidgeCV.fit(self, X, target, sample_weight=sample_weight)\n return self\n\n @property\n def classes_(self):\n return self._label_binarizer.classes_\n\n def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidge", + "name": "_BaseRidge", + "qname": "sklearn.linear_model._ridge._BaseRidge", + "decorators": [], + "superclasses": ["LinearModel"], + "methods": [ + "scikit-learn/sklearn.linear_model._ridge/_BaseRidge/__init__", + "scikit-learn/sklearn.linear_model._ridge/_BaseRidge/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class _BaseRidge(LinearModel, metaclass=ABCMeta):\n @abstractmethod\n @_deprecate_positional_args\n def __init__(self, alpha=1.0, *, fit_intercept=True, normalize=False,\n copy_X=True, max_iter=None, tol=1e-3, solver=\"auto\",\n random_state=None):\n self.alpha = alpha\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.copy_X = copy_X\n self.max_iter = max_iter\n self.tol = tol\n self.solver = solver\n self.random_state = random_state\n\n def fit(self, X, y, sample_weight=None):\n\n # all other solvers work at both float precision levels\n _dtype = [np.float64, np.float32]\n _accept_sparse = _get_valid_accept_sparse(sparse.issparse(X),\n self.solver)\n X, y = self._validate_data(X, y,\n accept_sparse=_accept_sparse,\n dtype=_dtype,\n multi_output=True, y_numeric=True)\n if sparse.issparse(X) and self.fit_intercept:\n if self.solver not in ['auto', 'sparse_cg', 'sag']:\n raise ValueError(\n \"solver='{}' does not support fitting the intercept \"\n \"on sparse data. Please set the solver to 'auto' or \"\n \"'sparse_cg', 'sag', or set `fit_intercept=False`\"\n .format(self.solver))\n if (self.solver == 'sag' and self.max_iter is None and\n self.tol > 1e-4):\n warnings.warn(\n '\"sag\" solver requires many iterations to fit '\n 'an intercept with sparse inputs. Either set the '\n 'solver to \"auto\" or \"sparse_cg\", or set a low '\n '\"tol\" and a high \"max_iter\" (especially if inputs are '\n 'not standardized).')\n solver = 'sag'\n else:\n solver = 'sparse_cg'\n else:\n solver = self.solver\n\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X,\n dtype=X.dtype)\n\n # when X is sparse we only remove offset from y\n X, y, X_offset, y_offset, X_scale = self._preprocess_data(\n X, y, self.fit_intercept, self.normalize, self.copy_X,\n sample_weight=sample_weight, return_mean=True)\n\n if solver == 'sag' and sparse.issparse(X) and self.fit_intercept:\n self.coef_, self.n_iter_, self.intercept_ = _ridge_regression(\n X, y, alpha=self.alpha, sample_weight=sample_weight,\n max_iter=self.max_iter, tol=self.tol, solver='sag',\n random_state=self.random_state, return_n_iter=True,\n return_intercept=True, check_input=False)\n # add the offset which was subtracted by _preprocess_data\n self.intercept_ += y_offset\n\n else:\n if sparse.issparse(X) and self.fit_intercept:\n # required to fit intercept with sparse_cg solver\n params = {'X_offset': X_offset, 'X_scale': X_scale}\n else:\n # for dense matrices or when intercept is set to 0\n params = {}\n\n self.coef_, self.n_iter_ = _ridge_regression(\n X, y, alpha=self.alpha, sample_weight=sample_weight,\n max_iter=self.max_iter, tol=self.tol, solver=solver,\n random_state=self.random_state, return_n_iter=True,\n return_intercept=False, check_input=False, **params)\n self._set_intercept(X_offset, y_offset, X_scale)\n\n return self", + "instance_attributes": [ + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "normalize", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "copy_X", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "solver", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidgeCV", + "name": "_BaseRidgeCV", + "qname": "sklearn.linear_model._ridge._BaseRidgeCV", + "decorators": [], + "superclasses": ["LinearModel"], + "methods": [ + "scikit-learn/sklearn.linear_model._ridge/_BaseRidgeCV/__init__", + "scikit-learn/sklearn.linear_model._ridge/_BaseRidgeCV/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class _BaseRidgeCV(LinearModel):\n @_deprecate_positional_args\n def __init__(self, alphas=(0.1, 1.0, 10.0), *,\n fit_intercept=True, normalize=False, scoring=None,\n cv=None, gcv_mode=None, store_cv_values=False,\n alpha_per_target=False):\n self.alphas = np.asarray(alphas)\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.scoring = scoring\n self.cv = cv\n self.gcv_mode = gcv_mode\n self.store_cv_values = store_cv_values\n self.alpha_per_target = alpha_per_target\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit Ridge regression model with cv.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Training data. If using GCV, will be cast to float64\n if necessary.\n\n y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Target values. Will be cast to X's dtype if necessary.\n\n sample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\n Returns\n -------\n self : object\n\n Notes\n -----\n When sample_weight is provided, the selected hyperparameter may depend\n on whether we use leave-one-out cross-validation (cv=None or cv='auto')\n or another form of cross-validation, because only leave-one-out\n cross-validation takes the sample weights into account when computing\n the validation score.\n \"\"\"\n cv = self.cv\n if cv is None:\n estimator = _RidgeGCV(self.alphas,\n fit_intercept=self.fit_intercept,\n normalize=self.normalize,\n scoring=self.scoring,\n gcv_mode=self.gcv_mode,\n store_cv_values=self.store_cv_values,\n is_clf=is_classifier(self),\n alpha_per_target=self.alpha_per_target)\n estimator.fit(X, y, sample_weight=sample_weight)\n self.alpha_ = estimator.alpha_\n self.best_score_ = estimator.best_score_\n if self.store_cv_values:\n self.cv_values_ = estimator.cv_values_\n else:\n if self.store_cv_values:\n raise ValueError(\"cv!=None and store_cv_values=True\"\n \" are incompatible\")\n if self.alpha_per_target:\n raise ValueError(\"cv!=None and alpha_per_target=True\"\n \" are incompatible\")\n parameters = {'alpha': self.alphas}\n solver = 'sparse_cg' if sparse.issparse(X) else 'auto'\n model = RidgeClassifier if is_classifier(self) else Ridge\n gs = GridSearchCV(model(fit_intercept=self.fit_intercept,\n normalize=self.normalize,\n solver=solver),\n parameters, cv=cv, scoring=self.scoring)\n gs.fit(X, y, sample_weight=sample_weight)\n estimator = gs.best_estimator_\n self.alpha_ = gs.best_estimator_.alpha\n self.best_score_ = gs.best_score_\n\n self.coef_ = estimator.coef_\n self.intercept_ = estimator.intercept_\n self.n_features_in_ = estimator.n_features_in_\n\n return self", + "instance_attributes": [ + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "normalize", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "store_cv_values", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "alpha_per_target", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "cv_values_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_IdentityClassifier", + "name": "_IdentityClassifier", + "qname": "sklearn.linear_model._ridge._IdentityClassifier", + "decorators": [], + "superclasses": ["LinearClassifierMixin"], + "methods": [ + "scikit-learn/sklearn.linear_model._ridge/_IdentityClassifier/__init__", + "scikit-learn/sklearn.linear_model._ridge/_IdentityClassifier/decision_function" + ], + "is_public": false, + "reexported_by": [], + "description": "Fake classifier which will directly output the prediction.\n\nWe inherit from LinearClassifierMixin to get the proper shape for the\noutput `y`.", + "docstring": "Fake classifier which will directly output the prediction.\n\nWe inherit from LinearClassifierMixin to get the proper shape for the\noutput `y`.", + "code": "class _IdentityClassifier(LinearClassifierMixin):\n \"\"\"Fake classifier which will directly output the prediction.\n\n We inherit from LinearClassifierMixin to get the proper shape for the\n output `y`.\n \"\"\"\n def __init__(self, classes):\n self.classes_ = classes\n\n def decision_function(self, y_predict):\n return y_predict", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_IdentityRegressor", + "name": "_IdentityRegressor", + "qname": "sklearn.linear_model._ridge._IdentityRegressor", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.linear_model._ridge/_IdentityRegressor/decision_function", + "scikit-learn/sklearn.linear_model._ridge/_IdentityRegressor/predict" + ], + "is_public": false, + "reexported_by": [], + "description": "Fake regressor which will directly output the prediction.", + "docstring": "Fake regressor which will directly output the prediction.", + "code": "class _IdentityRegressor:\n \"\"\"Fake regressor which will directly output the prediction.\"\"\"\n\n def decision_function(self, y_predict):\n return y_predict\n\n def predict(self, y_predict):\n return y_predict", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV", + "name": "_RidgeGCV", + "qname": "sklearn.linear_model._ridge._RidgeGCV", + "decorators": [], + "superclasses": ["LinearModel"], + "methods": [ + "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/__init__", + "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_decomp_diag", + "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_diag_dot", + "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_compute_gram", + "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_compute_covariance", + "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_sparse_multidot_diag", + "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_eigen_decompose_gram", + "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_gram", + "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_eigen_decompose_covariance", + "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance_no_intercept", + "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance_intercept", + "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance", + "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_svd_decompose_design_matrix", + "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_svd_design_matrix", + "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Ridge regression with built-in Leave-one-out Cross-Validation.\n\nThis class is not intended to be used directly. Use RidgeCV instead.", + "docstring": "Ridge regression with built-in Leave-one-out Cross-Validation.\n\nThis class is not intended to be used directly. Use RidgeCV instead.\n\nNotes\n-----\n\nWe want to solve (K + alpha*Id)c = y,\nwhere K = X X^T is the kernel matrix.\n\nLet G = (K + alpha*Id).\n\nDual solution: c = G^-1y\nPrimal solution: w = X^T c\n\nCompute eigendecomposition K = Q V Q^T.\nThen G^-1 = Q (V + alpha*Id)^-1 Q^T,\nwhere (V + alpha*Id) is diagonal.\nIt is thus inexpensive to inverse for many alphas.\n\nLet loov be the vector of prediction values for each example\nwhen the model was fitted with all examples but this example.\n\nloov = (KG^-1Y - diag(KG^-1)Y) / diag(I-KG^-1)\n\nLet looe be the vector of prediction errors for each example\nwhen the model was fitted with all examples but this example.\n\nlooe = y - loov = c / diag(G^-1)\n\nThe best score (negative mean squared error or user-provided scoring) is\nstored in the `best_score_` attribute, and the selected hyperparameter in\n`alpha_`.\n\nReferences\n----------\nhttp://cbcl.mit.edu/publications/ps/MIT-CSAIL-TR-2007-025.pdf\nhttps://www.mit.edu/~9.520/spring07/Classes/rlsslides.pdf", + "code": "class _RidgeGCV(LinearModel):\n \"\"\"Ridge regression with built-in Leave-one-out Cross-Validation.\n\n This class is not intended to be used directly. Use RidgeCV instead.\n\n Notes\n -----\n\n We want to solve (K + alpha*Id)c = y,\n where K = X X^T is the kernel matrix.\n\n Let G = (K + alpha*Id).\n\n Dual solution: c = G^-1y\n Primal solution: w = X^T c\n\n Compute eigendecomposition K = Q V Q^T.\n Then G^-1 = Q (V + alpha*Id)^-1 Q^T,\n where (V + alpha*Id) is diagonal.\n It is thus inexpensive to inverse for many alphas.\n\n Let loov be the vector of prediction values for each example\n when the model was fitted with all examples but this example.\n\n loov = (KG^-1Y - diag(KG^-1)Y) / diag(I-KG^-1)\n\n Let looe be the vector of prediction errors for each example\n when the model was fitted with all examples but this example.\n\n looe = y - loov = c / diag(G^-1)\n\n The best score (negative mean squared error or user-provided scoring) is\n stored in the `best_score_` attribute, and the selected hyperparameter in\n `alpha_`.\n\n References\n ----------\n http://cbcl.mit.edu/publications/ps/MIT-CSAIL-TR-2007-025.pdf\n https://www.mit.edu/~9.520/spring07/Classes/rlsslides.pdf\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, alphas=(0.1, 1.0, 10.0), *,\n fit_intercept=True, normalize=False,\n scoring=None, copy_X=True,\n gcv_mode=None, store_cv_values=False,\n is_clf=False, alpha_per_target=False):\n self.alphas = np.asarray(alphas)\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.scoring = scoring\n self.copy_X = copy_X\n self.gcv_mode = gcv_mode\n self.store_cv_values = store_cv_values\n self.is_clf = is_clf\n self.alpha_per_target = alpha_per_target\n\n @staticmethod\n def _decomp_diag(v_prime, Q):\n # compute diagonal of the matrix: dot(Q, dot(diag(v_prime), Q^T))\n return (v_prime * Q ** 2).sum(axis=-1)\n\n @staticmethod\n def _diag_dot(D, B):\n # compute dot(diag(D), B)\n if len(B.shape) > 1:\n # handle case where B is > 1-d\n D = D[(slice(None), ) + (np.newaxis, ) * (len(B.shape) - 1)]\n return D * B\n\n def _compute_gram(self, X, sqrt_sw):\n \"\"\"Computes the Gram matrix XX^T with possible centering.\n\n Parameters\n ----------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The preprocessed design matrix.\n\n sqrt_sw : ndarray of shape (n_samples,)\n square roots of sample weights\n\n Returns\n -------\n gram : ndarray of shape (n_samples, n_samples)\n The Gram matrix.\n X_mean : ndarray of shape (n_feature,)\n The weighted mean of ``X`` for each feature.\n\n Notes\n -----\n When X is dense the centering has been done in preprocessing\n so the mean is 0 and we just compute XX^T.\n\n When X is sparse it has not been centered in preprocessing, but it has\n been scaled by sqrt(sample weights).\n\n When self.fit_intercept is False no centering is done.\n\n The centered X is never actually computed because centering would break\n the sparsity of X.\n \"\"\"\n center = self.fit_intercept and sparse.issparse(X)\n if not center:\n # in this case centering has been done in preprocessing\n # or we are not fitting an intercept.\n X_mean = np.zeros(X.shape[1], dtype=X.dtype)\n return safe_sparse_dot(X, X.T, dense_output=True), X_mean\n # X is sparse\n n_samples = X.shape[0]\n sample_weight_matrix = sparse.dia_matrix(\n (sqrt_sw, 0), shape=(n_samples, n_samples))\n X_weighted = sample_weight_matrix.dot(X)\n X_mean, _ = mean_variance_axis(X_weighted, axis=0)\n X_mean *= n_samples / sqrt_sw.dot(sqrt_sw)\n X_mX = sqrt_sw[:, None] * safe_sparse_dot(\n X_mean, X.T, dense_output=True)\n X_mX_m = np.outer(sqrt_sw, sqrt_sw) * np.dot(X_mean, X_mean)\n return (safe_sparse_dot(X, X.T, dense_output=True) + X_mX_m\n - X_mX - X_mX.T, X_mean)\n\n def _compute_covariance(self, X, sqrt_sw):\n \"\"\"Computes covariance matrix X^TX with possible centering.\n\n Parameters\n ----------\n X : sparse matrix of shape (n_samples, n_features)\n The preprocessed design matrix.\n\n sqrt_sw : ndarray of shape (n_samples,)\n square roots of sample weights\n\n Returns\n -------\n covariance : ndarray of shape (n_features, n_features)\n The covariance matrix.\n X_mean : ndarray of shape (n_feature,)\n The weighted mean of ``X`` for each feature.\n\n Notes\n -----\n Since X is sparse it has not been centered in preprocessing, but it has\n been scaled by sqrt(sample weights).\n\n When self.fit_intercept is False no centering is done.\n\n The centered X is never actually computed because centering would break\n the sparsity of X.\n \"\"\"\n if not self.fit_intercept:\n # in this case centering has been done in preprocessing\n # or we are not fitting an intercept.\n X_mean = np.zeros(X.shape[1], dtype=X.dtype)\n return safe_sparse_dot(X.T, X, dense_output=True), X_mean\n # this function only gets called for sparse X\n n_samples = X.shape[0]\n sample_weight_matrix = sparse.dia_matrix(\n (sqrt_sw, 0), shape=(n_samples, n_samples))\n X_weighted = sample_weight_matrix.dot(X)\n X_mean, _ = mean_variance_axis(X_weighted, axis=0)\n X_mean = X_mean * n_samples / sqrt_sw.dot(sqrt_sw)\n weight_sum = sqrt_sw.dot(sqrt_sw)\n return (safe_sparse_dot(X.T, X, dense_output=True) -\n weight_sum * np.outer(X_mean, X_mean),\n X_mean)\n\n def _sparse_multidot_diag(self, X, A, X_mean, sqrt_sw):\n \"\"\"Compute the diagonal of (X - X_mean).dot(A).dot((X - X_mean).T)\n without explicitely centering X nor computing X.dot(A)\n when X is sparse.\n\n Parameters\n ----------\n X : sparse matrix of shape (n_samples, n_features)\n\n A : ndarray of shape (n_features, n_features)\n\n X_mean : ndarray of shape (n_features,)\n\n sqrt_sw : ndarray of shape (n_features,)\n square roots of sample weights\n\n Returns\n -------\n diag : np.ndarray, shape (n_samples,)\n The computed diagonal.\n \"\"\"\n intercept_col = scale = sqrt_sw\n batch_size = X.shape[1]\n diag = np.empty(X.shape[0], dtype=X.dtype)\n for start in range(0, X.shape[0], batch_size):\n batch = slice(start, min(X.shape[0], start + batch_size), 1)\n X_batch = np.empty(\n (X[batch].shape[0], X.shape[1] + self.fit_intercept),\n dtype=X.dtype\n )\n if self.fit_intercept:\n X_batch[:, :-1] = X[batch].A - X_mean * scale[batch][:, None]\n X_batch[:, -1] = intercept_col[batch]\n else:\n X_batch = X[batch].A\n diag[batch] = (X_batch.dot(A) * X_batch).sum(axis=1)\n return diag\n\n def _eigen_decompose_gram(self, X, y, sqrt_sw):\n \"\"\"Eigendecomposition of X.X^T, used when n_samples <= n_features.\"\"\"\n # if X is dense it has already been centered in preprocessing\n K, X_mean = self._compute_gram(X, sqrt_sw)\n if self.fit_intercept:\n # to emulate centering X with sample weights,\n # ie removing the weighted average, we add a column\n # containing the square roots of the sample weights.\n # by centering, it is orthogonal to the other columns\n K += np.outer(sqrt_sw, sqrt_sw)\n eigvals, Q = linalg.eigh(K)\n QT_y = np.dot(Q.T, y)\n return X_mean, eigvals, Q, QT_y\n\n def _solve_eigen_gram(self, alpha, y, sqrt_sw, X_mean, eigvals, Q, QT_y):\n \"\"\"Compute dual coefficients and diagonal of G^-1.\n\n Used when we have a decomposition of X.X^T (n_samples <= n_features).\n \"\"\"\n w = 1. / (eigvals + alpha)\n if self.fit_intercept:\n # the vector containing the square roots of the sample weights (1\n # when no sample weights) is the eigenvector of XX^T which\n # corresponds to the intercept; we cancel the regularization on\n # this dimension. the corresponding eigenvalue is\n # sum(sample_weight).\n normalized_sw = sqrt_sw / np.linalg.norm(sqrt_sw)\n intercept_dim = _find_smallest_angle(normalized_sw, Q)\n w[intercept_dim] = 0 # cancel regularization for the intercept\n\n c = np.dot(Q, self._diag_dot(w, QT_y))\n G_inverse_diag = self._decomp_diag(w, Q)\n # handle case where y is 2-d\n if len(y.shape) != 1:\n G_inverse_diag = G_inverse_diag[:, np.newaxis]\n return G_inverse_diag, c\n\n def _eigen_decompose_covariance(self, X, y, sqrt_sw):\n \"\"\"Eigendecomposition of X^T.X, used when n_samples > n_features\n and X is sparse.\n \"\"\"\n n_samples, n_features = X.shape\n cov = np.empty((n_features + 1, n_features + 1), dtype=X.dtype)\n cov[:-1, :-1], X_mean = self._compute_covariance(X, sqrt_sw)\n if not self.fit_intercept:\n cov = cov[:-1, :-1]\n # to emulate centering X with sample weights,\n # ie removing the weighted average, we add a column\n # containing the square roots of the sample weights.\n # by centering, it is orthogonal to the other columns\n # when all samples have the same weight we add a column of 1\n else:\n cov[-1] = 0\n cov[:, -1] = 0\n cov[-1, -1] = sqrt_sw.dot(sqrt_sw)\n nullspace_dim = max(0, n_features - n_samples)\n eigvals, V = linalg.eigh(cov)\n # remove eigenvalues and vectors in the null space of X^T.X\n eigvals = eigvals[nullspace_dim:]\n V = V[:, nullspace_dim:]\n return X_mean, eigvals, V, X\n\n def _solve_eigen_covariance_no_intercept(\n self, alpha, y, sqrt_sw, X_mean, eigvals, V, X):\n \"\"\"Compute dual coefficients and diagonal of G^-1.\n\n Used when we have a decomposition of X^T.X\n (n_samples > n_features and X is sparse), and not fitting an intercept.\n \"\"\"\n w = 1 / (eigvals + alpha)\n A = (V * w).dot(V.T)\n AXy = A.dot(safe_sparse_dot(X.T, y, dense_output=True))\n y_hat = safe_sparse_dot(X, AXy, dense_output=True)\n hat_diag = self._sparse_multidot_diag(X, A, X_mean, sqrt_sw)\n if len(y.shape) != 1:\n # handle case where y is 2-d\n hat_diag = hat_diag[:, np.newaxis]\n return (1 - hat_diag) / alpha, (y - y_hat) / alpha\n\n def _solve_eigen_covariance_intercept(\n self, alpha, y, sqrt_sw, X_mean, eigvals, V, X):\n \"\"\"Compute dual coefficients and diagonal of G^-1.\n\n Used when we have a decomposition of X^T.X\n (n_samples > n_features and X is sparse),\n and we are fitting an intercept.\n \"\"\"\n # the vector [0, 0, ..., 0, 1]\n # is the eigenvector of X^TX which\n # corresponds to the intercept; we cancel the regularization on\n # this dimension. the corresponding eigenvalue is\n # sum(sample_weight), e.g. n when uniform sample weights.\n intercept_sv = np.zeros(V.shape[0])\n intercept_sv[-1] = 1\n intercept_dim = _find_smallest_angle(intercept_sv, V)\n w = 1 / (eigvals + alpha)\n w[intercept_dim] = 1 / eigvals[intercept_dim]\n A = (V * w).dot(V.T)\n # add a column to X containing the square roots of sample weights\n X_op = _X_CenterStackOp(X, X_mean, sqrt_sw)\n AXy = A.dot(X_op.T.dot(y))\n y_hat = X_op.dot(AXy)\n hat_diag = self._sparse_multidot_diag(X, A, X_mean, sqrt_sw)\n # return (1 - hat_diag), (y - y_hat)\n if len(y.shape) != 1:\n # handle case where y is 2-d\n hat_diag = hat_diag[:, np.newaxis]\n return (1 - hat_diag) / alpha, (y - y_hat) / alpha\n\n def _solve_eigen_covariance(\n self, alpha, y, sqrt_sw, X_mean, eigvals, V, X):\n \"\"\"Compute dual coefficients and diagonal of G^-1.\n\n Used when we have a decomposition of X^T.X\n (n_samples > n_features and X is sparse).\n \"\"\"\n if self.fit_intercept:\n return self._solve_eigen_covariance_intercept(\n alpha, y, sqrt_sw, X_mean, eigvals, V, X)\n return self._solve_eigen_covariance_no_intercept(\n alpha, y, sqrt_sw, X_mean, eigvals, V, X)\n\n def _svd_decompose_design_matrix(self, X, y, sqrt_sw):\n # X already centered\n X_mean = np.zeros(X.shape[1], dtype=X.dtype)\n if self.fit_intercept:\n # to emulate fit_intercept=True situation, add a column\n # containing the square roots of the sample weights\n # by centering, the other columns are orthogonal to that one\n intercept_column = sqrt_sw[:, None]\n X = np.hstack((X, intercept_column))\n U, singvals, _ = linalg.svd(X, full_matrices=0)\n singvals_sq = singvals ** 2\n UT_y = np.dot(U.T, y)\n return X_mean, singvals_sq, U, UT_y\n\n def _solve_svd_design_matrix(\n self, alpha, y, sqrt_sw, X_mean, singvals_sq, U, UT_y):\n \"\"\"Compute dual coefficients and diagonal of G^-1.\n\n Used when we have an SVD decomposition of X\n (n_samples > n_features and X is dense).\n \"\"\"\n w = ((singvals_sq + alpha) ** -1) - (alpha ** -1)\n if self.fit_intercept:\n # detect intercept column\n normalized_sw = sqrt_sw / np.linalg.norm(sqrt_sw)\n intercept_dim = _find_smallest_angle(normalized_sw, U)\n # cancel the regularization for the intercept\n w[intercept_dim] = - (alpha ** -1)\n c = np.dot(U, self._diag_dot(w, UT_y)) + (alpha ** -1) * y\n G_inverse_diag = self._decomp_diag(w, U) + (alpha ** -1)\n if len(y.shape) != 1:\n # handle case where y is 2-d\n G_inverse_diag = G_inverse_diag[:, np.newaxis]\n return G_inverse_diag, c\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit Ridge regression model with gcv.\n\n Parameters\n ----------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training data. Will be cast to float64 if necessary.\n\n y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Target values. Will be cast to float64 if necessary.\n\n sample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\n Returns\n -------\n self : object\n \"\"\"\n X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc', 'coo'],\n dtype=[np.float64],\n multi_output=True, y_numeric=True)\n\n # alpha_per_target cannot be used in classifier mode. All subclasses\n # of _RidgeGCV that are classifiers keep alpha_per_target at its\n # default value: False, so the condition below should never happen.\n assert not (self.is_clf and self.alpha_per_target)\n\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X,\n dtype=X.dtype)\n\n if np.any(self.alphas <= 0):\n raise ValueError(\n \"alphas must be positive. Got {} containing some \"\n \"negative or null value instead.\".format(self.alphas))\n\n X, y, X_offset, y_offset, X_scale = LinearModel._preprocess_data(\n X, y, self.fit_intercept, self.normalize, self.copy_X,\n sample_weight=sample_weight)\n\n gcv_mode = _check_gcv_mode(X, self.gcv_mode)\n\n if gcv_mode == 'eigen':\n decompose = self._eigen_decompose_gram\n solve = self._solve_eigen_gram\n elif gcv_mode == 'svd':\n if sparse.issparse(X):\n decompose = self._eigen_decompose_covariance\n solve = self._solve_eigen_covariance\n else:\n decompose = self._svd_decompose_design_matrix\n solve = self._solve_svd_design_matrix\n\n n_samples = X.shape[0]\n\n if sample_weight is not None:\n X, y = _rescale_data(X, y, sample_weight)\n sqrt_sw = np.sqrt(sample_weight)\n else:\n sqrt_sw = np.ones(n_samples, dtype=X.dtype)\n\n X_mean, *decomposition = decompose(X, y, sqrt_sw)\n\n scorer = check_scoring(self, scoring=self.scoring, allow_none=True)\n error = scorer is None\n\n n_y = 1 if len(y.shape) == 1 else y.shape[1]\n n_alphas = 1 if np.ndim(self.alphas) == 0 else len(self.alphas)\n\n if self.store_cv_values:\n self.cv_values_ = np.empty(\n (n_samples * n_y, n_alphas), dtype=X.dtype)\n\n best_coef, best_score, best_alpha = None, None, None\n\n for i, alpha in enumerate(np.atleast_1d(self.alphas)):\n G_inverse_diag, c = solve(\n float(alpha), y, sqrt_sw, X_mean, *decomposition)\n if error:\n squared_errors = (c / G_inverse_diag) ** 2\n if self.alpha_per_target:\n alpha_score = -squared_errors.mean(axis=0)\n else:\n alpha_score = -squared_errors.mean()\n if self.store_cv_values:\n self.cv_values_[:, i] = squared_errors.ravel()\n else:\n predictions = y - (c / G_inverse_diag)\n if self.store_cv_values:\n self.cv_values_[:, i] = predictions.ravel()\n\n if self.is_clf:\n identity_estimator = _IdentityClassifier(\n classes=np.arange(n_y)\n )\n alpha_score = scorer(identity_estimator,\n predictions, y.argmax(axis=1))\n else:\n identity_estimator = _IdentityRegressor()\n if self.alpha_per_target:\n alpha_score = np.array([\n scorer(identity_estimator,\n predictions[:, j], y[:, j])\n for j in range(n_y)\n ])\n else:\n alpha_score = scorer(identity_estimator,\n predictions.ravel(), y.ravel())\n\n # Keep track of the best model\n if best_score is None:\n # initialize\n if self.alpha_per_target and n_y > 1:\n best_coef = c\n best_score = np.atleast_1d(alpha_score)\n best_alpha = np.full(n_y, alpha)\n else:\n best_coef = c\n best_score = alpha_score\n best_alpha = alpha\n else:\n # update\n if self.alpha_per_target and n_y > 1:\n to_update = alpha_score > best_score\n best_coef[:, to_update] = c[:, to_update]\n best_score[to_update] = alpha_score[to_update]\n best_alpha[to_update] = alpha\n elif alpha_score > best_score:\n best_coef, best_score, best_alpha = c, alpha_score, alpha\n\n self.alpha_ = best_alpha\n self.best_score_ = best_score\n self.dual_coef_ = best_coef\n self.coef_ = safe_sparse_dot(self.dual_coef_.T, X)\n\n X_offset += X_mean * X_scale\n self._set_intercept(X_offset, y_offset, X_scale)\n\n if self.store_cv_values:\n if len(y.shape) == 1:\n cv_values_shape = n_samples, n_alphas\n else:\n cv_values_shape = n_samples, n_y, n_alphas\n self.cv_values_ = self.cv_values_.reshape(cv_values_shape)\n\n return self", + "instance_attributes": [ + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "normalize", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "copy_X", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "store_cv_values", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "is_clf", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "alpha_per_target", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "cv_values_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_XT_CenterStackOp", + "name": "_XT_CenterStackOp", + "qname": "sklearn.linear_model._ridge._XT_CenterStackOp", + "decorators": [], + "superclasses": ["sparse.linalg.LinearOperator"], + "methods": [ + "scikit-learn/sklearn.linear_model._ridge/_XT_CenterStackOp/__init__", + "scikit-learn/sklearn.linear_model._ridge/_XT_CenterStackOp/_matvec", + "scikit-learn/sklearn.linear_model._ridge/_XT_CenterStackOp/_matmat" + ], + "is_public": false, + "reexported_by": [], + "description": "Behaves as transposed centered and scaled X with an intercept column.\n\nThis operator behaves as\nnp.hstack([X - sqrt_sw[:, None] * X_mean, sqrt_sw[:, None]]).T", + "docstring": "Behaves as transposed centered and scaled X with an intercept column.\n\nThis operator behaves as\nnp.hstack([X - sqrt_sw[:, None] * X_mean, sqrt_sw[:, None]]).T", + "code": "class _XT_CenterStackOp(sparse.linalg.LinearOperator):\n \"\"\"Behaves as transposed centered and scaled X with an intercept column.\n\n This operator behaves as\n np.hstack([X - sqrt_sw[:, None] * X_mean, sqrt_sw[:, None]]).T\n \"\"\"\n\n def __init__(self, X, X_mean, sqrt_sw):\n n_samples, n_features = X.shape\n super().__init__(X.dtype, (n_features + 1, n_samples))\n self.X = X\n self.X_mean = X_mean\n self.sqrt_sw = sqrt_sw\n\n def _matvec(self, v):\n v = v.ravel()\n n_features = self.shape[0]\n res = np.empty(n_features, dtype=self.X.dtype)\n res[:-1] = (\n safe_sparse_dot(self.X.T, v, dense_output=True) -\n (self.X_mean * self.sqrt_sw.dot(v))\n )\n res[-1] = np.dot(v, self.sqrt_sw)\n return res\n\n def _matmat(self, v):\n n_features = self.shape[0]\n res = np.empty((n_features, v.shape[1]), dtype=self.X.dtype)\n res[:-1] = (\n safe_sparse_dot(self.X.T, v, dense_output=True) -\n self.X_mean[:, None] * self.sqrt_sw.dot(v)\n )\n res[-1] = np.dot(self.sqrt_sw, v)\n return res", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_X_CenterStackOp", + "name": "_X_CenterStackOp", + "qname": "sklearn.linear_model._ridge._X_CenterStackOp", + "decorators": [], + "superclasses": ["sparse.linalg.LinearOperator"], + "methods": [ + "scikit-learn/sklearn.linear_model._ridge/_X_CenterStackOp/__init__", + "scikit-learn/sklearn.linear_model._ridge/_X_CenterStackOp/_matvec", + "scikit-learn/sklearn.linear_model._ridge/_X_CenterStackOp/_matmat", + "scikit-learn/sklearn.linear_model._ridge/_X_CenterStackOp/_transpose" + ], + "is_public": false, + "reexported_by": [], + "description": "Behaves as centered and scaled X with an added intercept column.\n\nThis operator behaves as\nnp.hstack([X - sqrt_sw[:, None] * X_mean, sqrt_sw[:, None]])", + "docstring": "Behaves as centered and scaled X with an added intercept column.\n\nThis operator behaves as\nnp.hstack([X - sqrt_sw[:, None] * X_mean, sqrt_sw[:, None]])", + "code": "class _X_CenterStackOp(sparse.linalg.LinearOperator):\n \"\"\"Behaves as centered and scaled X with an added intercept column.\n\n This operator behaves as\n np.hstack([X - sqrt_sw[:, None] * X_mean, sqrt_sw[:, None]])\n \"\"\"\n\n def __init__(self, X, X_mean, sqrt_sw):\n n_samples, n_features = X.shape\n super().__init__(X.dtype, (n_samples, n_features + 1))\n self.X = X\n self.X_mean = X_mean\n self.sqrt_sw = sqrt_sw\n\n def _matvec(self, v):\n v = v.ravel()\n return safe_sparse_dot(\n self.X, v[:-1], dense_output=True\n ) - self.sqrt_sw * self.X_mean.dot(v[:-1]) + v[-1] * self.sqrt_sw\n\n def _matmat(self, v):\n return (\n safe_sparse_dot(self.X, v[:-1], dense_output=True) -\n self.sqrt_sw[:, None] * self.X_mean.dot(v[:-1]) + v[-1] *\n self.sqrt_sw[:, None])\n\n def _transpose(self):\n return _XT_CenterStackOp(self.X, self.X_mean, self.sqrt_sw)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD", + "name": "BaseSGD", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD", + "decorators": [], + "superclasses": ["SparseCoefMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/__init__", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/set_params", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/fit", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_validate_params", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_get_loss_function", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_get_learning_rate_type", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_get_penalty_type", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_allocate_parameter_mem", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_make_validation_split", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_make_validation_score_cb", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/standard_coef_@getter", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/standard_intercept_@getter", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/average_coef_@getter", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/average_intercept_@getter" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for SGD classification and regression.", + "docstring": "Base class for SGD classification and regression.", + "code": "class BaseSGD(SparseCoefMixin, BaseEstimator, metaclass=ABCMeta):\n \"\"\"Base class for SGD classification and regression.\"\"\"\n @_deprecate_positional_args\n def __init__(self, loss, *, penalty='l2', alpha=0.0001, C=1.0,\n l1_ratio=0.15, fit_intercept=True, max_iter=1000, tol=1e-3,\n shuffle=True, verbose=0, epsilon=0.1, random_state=None,\n learning_rate=\"optimal\", eta0=0.0, power_t=0.5,\n early_stopping=False, validation_fraction=0.1,\n n_iter_no_change=5, warm_start=False, average=False):\n self.loss = loss\n self.penalty = penalty\n self.learning_rate = learning_rate\n self.epsilon = epsilon\n self.alpha = alpha\n self.C = C\n self.l1_ratio = l1_ratio\n self.fit_intercept = fit_intercept\n self.shuffle = shuffle\n self.random_state = random_state\n self.verbose = verbose\n self.eta0 = eta0\n self.power_t = power_t\n self.early_stopping = early_stopping\n self.validation_fraction = validation_fraction\n self.n_iter_no_change = n_iter_no_change\n self.warm_start = warm_start\n self.average = average\n self.max_iter = max_iter\n self.tol = tol\n # current tests expect init to do parameter validation\n # but we are not allowed to set attributes\n self._validate_params()\n\n def set_params(self, **kwargs):\n \"\"\"Set and validate the parameters of estimator.\n\n Parameters\n ----------\n **kwargs : dict\n Estimator parameters.\n\n Returns\n -------\n self : object\n Estimator instance.\n \"\"\"\n super().set_params(**kwargs)\n self._validate_params()\n return self\n\n @abstractmethod\n def fit(self, X, y):\n \"\"\"Fit model.\"\"\"\n\n def _validate_params(self, for_partial_fit=False):\n \"\"\"Validate input params. \"\"\"\n if not isinstance(self.shuffle, bool):\n raise ValueError(\"shuffle must be either True or False\")\n if not isinstance(self.early_stopping, bool):\n raise ValueError(\"early_stopping must be either True or False\")\n if self.early_stopping and for_partial_fit:\n raise ValueError(\"early_stopping should be False with partial_fit\")\n if self.max_iter is not None and self.max_iter <= 0:\n raise ValueError(\"max_iter must be > zero. Got %f\" % self.max_iter)\n if not (0.0 <= self.l1_ratio <= 1.0):\n raise ValueError(\"l1_ratio must be in [0, 1]\")\n if self.alpha < 0.0:\n raise ValueError(\"alpha must be >= 0\")\n if self.n_iter_no_change < 1:\n raise ValueError(\"n_iter_no_change must be >= 1\")\n if not (0.0 < self.validation_fraction < 1.0):\n raise ValueError(\"validation_fraction must be in range (0, 1)\")\n if self.learning_rate in (\"constant\", \"invscaling\", \"adaptive\"):\n if self.eta0 <= 0.0:\n raise ValueError(\"eta0 must be > 0\")\n if self.learning_rate == \"optimal\" and self.alpha == 0:\n raise ValueError(\"alpha must be > 0 since \"\n \"learning_rate is 'optimal'. alpha is used \"\n \"to compute the optimal learning rate.\")\n\n # raises ValueError if not registered\n self._get_penalty_type(self.penalty)\n self._get_learning_rate_type(self.learning_rate)\n\n if self.loss not in self.loss_functions:\n raise ValueError(\"The loss %s is not supported. \" % self.loss)\n\n def _get_loss_function(self, loss):\n \"\"\"Get concrete ``LossFunction`` object for str ``loss``. \"\"\"\n try:\n loss_ = self.loss_functions[loss]\n loss_class, args = loss_[0], loss_[1:]\n if loss in ('huber', 'epsilon_insensitive',\n 'squared_epsilon_insensitive'):\n args = (self.epsilon, )\n return loss_class(*args)\n except KeyError as e:\n raise ValueError(\"The loss %s is not supported. \" % loss) from e\n\n def _get_learning_rate_type(self, learning_rate):\n try:\n return LEARNING_RATE_TYPES[learning_rate]\n except KeyError as e:\n raise ValueError(\"learning rate %s \"\n \"is not supported. \" % learning_rate) from e\n\n def _get_penalty_type(self, penalty):\n penalty = str(penalty).lower()\n try:\n return PENALTY_TYPES[penalty]\n except KeyError as e:\n raise ValueError(\"Penalty %s is not supported. \" % penalty) from e\n\n def _allocate_parameter_mem(self, n_classes, n_features, coef_init=None,\n intercept_init=None):\n \"\"\"Allocate mem for parameters; initialize if provided.\"\"\"\n if n_classes > 2:\n # allocate coef_ for multi-class\n if coef_init is not None:\n coef_init = np.asarray(coef_init, order=\"C\")\n if coef_init.shape != (n_classes, n_features):\n raise ValueError(\"Provided ``coef_`` does not match \"\n \"dataset. \")\n self.coef_ = coef_init\n else:\n self.coef_ = np.zeros((n_classes, n_features),\n dtype=np.float64, order=\"C\")\n\n # allocate intercept_ for multi-class\n if intercept_init is not None:\n intercept_init = np.asarray(intercept_init, order=\"C\")\n if intercept_init.shape != (n_classes, ):\n raise ValueError(\"Provided intercept_init \"\n \"does not match dataset.\")\n self.intercept_ = intercept_init\n else:\n self.intercept_ = np.zeros(n_classes, dtype=np.float64,\n order=\"C\")\n else:\n # allocate coef_ for binary problem\n if coef_init is not None:\n coef_init = np.asarray(coef_init, dtype=np.float64,\n order=\"C\")\n coef_init = coef_init.ravel()\n if coef_init.shape != (n_features,):\n raise ValueError(\"Provided coef_init does not \"\n \"match dataset.\")\n self.coef_ = coef_init\n else:\n self.coef_ = np.zeros(n_features,\n dtype=np.float64,\n order=\"C\")\n\n # allocate intercept_ for binary problem\n if intercept_init is not None:\n intercept_init = np.asarray(intercept_init, dtype=np.float64)\n if intercept_init.shape != (1,) and intercept_init.shape != ():\n raise ValueError(\"Provided intercept_init \"\n \"does not match dataset.\")\n self.intercept_ = intercept_init.reshape(1,)\n else:\n self.intercept_ = np.zeros(1, dtype=np.float64, order=\"C\")\n\n # initialize average parameters\n if self.average > 0:\n self._standard_coef = self.coef_\n self._standard_intercept = self.intercept_\n self._average_coef = np.zeros(self.coef_.shape,\n dtype=np.float64,\n order=\"C\")\n self._average_intercept = np.zeros(self._standard_intercept.shape,\n dtype=np.float64,\n order=\"C\")\n\n def _make_validation_split(self, y):\n \"\"\"Split the dataset between training set and validation set.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples, )\n Target values.\n\n Returns\n -------\n validation_mask : ndarray of shape (n_samples, )\n Equal to 1 on the validation set, 0 on the training set.\n \"\"\"\n n_samples = y.shape[0]\n validation_mask = np.zeros(n_samples, dtype=np.uint8)\n if not self.early_stopping:\n # use the full set for training, with an empty validation set\n return validation_mask\n\n if is_classifier(self):\n splitter_type = StratifiedShuffleSplit\n else:\n splitter_type = ShuffleSplit\n cv = splitter_type(test_size=self.validation_fraction,\n random_state=self.random_state)\n idx_train, idx_val = next(cv.split(np.zeros(shape=(y.shape[0], 1)), y))\n if idx_train.shape[0] == 0 or idx_val.shape[0] == 0:\n raise ValueError(\n \"Splitting %d samples into a train set and a validation set \"\n \"with validation_fraction=%r led to an empty set (%d and %d \"\n \"samples). Please either change validation_fraction, increase \"\n \"number of samples, or disable early_stopping.\"\n % (n_samples, self.validation_fraction, idx_train.shape[0],\n idx_val.shape[0]))\n\n validation_mask[idx_val] = 1\n return validation_mask\n\n def _make_validation_score_cb(self, validation_mask, X, y, sample_weight,\n classes=None):\n if not self.early_stopping:\n return None\n\n return _ValidationScoreCallback(\n self, X[validation_mask], y[validation_mask],\n sample_weight[validation_mask], classes=classes)\n\n # mypy error: Decorated property not supported\n @deprecated(\"Attribute standard_coef_ was deprecated \" # type: ignore\n \"in version 0.23 and will be removed in 1.0 \"\n \"(renaming of 0.25).\")\n @property\n def standard_coef_(self):\n return self._standard_coef\n\n # mypy error: Decorated property not supported\n @deprecated( # type: ignore\n \"Attribute standard_intercept_ was deprecated \"\n \"in version 0.23 and will be removed in 1.0 (renaming of 0.25).\"\n )\n @property\n def standard_intercept_(self):\n return self._standard_intercept\n\n # mypy error: Decorated property not supported\n @deprecated(\"Attribute average_coef_ was deprecated \" # type: ignore\n \"in version 0.23 and will be removed in 1.0 \"\n \"(renaming of 0.25).\")\n @property\n def average_coef_(self):\n return self._average_coef\n\n # mypy error: Decorated property not supported\n @deprecated(\"Attribute average_intercept_ was deprecated \" # type: ignore\n \"in version 0.23 and will be removed in 1.0 \"\n \"(renaming of 0.25).\")\n @property\n def average_intercept_(self):\n return self._average_intercept", + "instance_attributes": [ + { + "name": "penalty", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "learning_rate", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "epsilon", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "C", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "l1_ratio", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "shuffle", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "eta0", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "power_t", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "early_stopping", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "validation_fraction", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "n_iter_no_change", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "warm_start", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "average", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "_average_coef", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "_average_intercept", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier", + "name": "BaseSGDClassifier", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier", + "decorators": [], + "superclasses": ["LinearClassifierMixin", "BaseSGD"], + "methods": [ + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/__init__", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_partial_fit", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit_binary", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit_multiclass", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/partial_fit", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class BaseSGDClassifier(LinearClassifierMixin, BaseSGD, metaclass=ABCMeta):\n\n loss_functions = {\n \"hinge\": (Hinge, 1.0),\n \"squared_hinge\": (SquaredHinge, 1.0),\n \"perceptron\": (Hinge, 0.0),\n \"log\": (Log, ),\n \"modified_huber\": (ModifiedHuber, ),\n \"squared_loss\": (SquaredLoss, ),\n \"huber\": (Huber, DEFAULT_EPSILON),\n \"epsilon_insensitive\": (EpsilonInsensitive, DEFAULT_EPSILON),\n \"squared_epsilon_insensitive\": (SquaredEpsilonInsensitive,\n DEFAULT_EPSILON),\n }\n\n @abstractmethod\n @_deprecate_positional_args\n def __init__(self, loss=\"hinge\", *, penalty='l2', alpha=0.0001,\n l1_ratio=0.15, fit_intercept=True, max_iter=1000, tol=1e-3,\n shuffle=True, verbose=0, epsilon=DEFAULT_EPSILON, n_jobs=None,\n random_state=None, learning_rate=\"optimal\", eta0=0.0,\n power_t=0.5, early_stopping=False,\n validation_fraction=0.1, n_iter_no_change=5,\n class_weight=None, warm_start=False, average=False):\n\n super().__init__(\n loss=loss, penalty=penalty, alpha=alpha, l1_ratio=l1_ratio,\n fit_intercept=fit_intercept, max_iter=max_iter, tol=tol,\n shuffle=shuffle, verbose=verbose, epsilon=epsilon,\n random_state=random_state, learning_rate=learning_rate, eta0=eta0,\n power_t=power_t, early_stopping=early_stopping,\n validation_fraction=validation_fraction,\n n_iter_no_change=n_iter_no_change, warm_start=warm_start,\n average=average)\n self.class_weight = class_weight\n self.n_jobs = n_jobs\n\n def _partial_fit(self, X, y, alpha, C,\n loss, learning_rate, max_iter,\n classes, sample_weight,\n coef_init, intercept_init):\n X, y = check_X_y(X, y, accept_sparse='csr', dtype=np.float64,\n order=\"C\", accept_large_sparse=False)\n\n n_samples, n_features = X.shape\n\n _check_partial_fit_first_call(self, classes)\n\n n_classes = self.classes_.shape[0]\n\n # Allocate datastructures from input arguments\n self._expanded_class_weight = compute_class_weight(\n self.class_weight, classes=self.classes_, y=y)\n sample_weight = _check_sample_weight(sample_weight, X)\n\n if getattr(self, \"coef_\", None) is None or coef_init is not None:\n self._allocate_parameter_mem(n_classes, n_features,\n coef_init, intercept_init)\n elif n_features != self.coef_.shape[-1]:\n raise ValueError(\"Number of features %d does not match previous \"\n \"data %d.\" % (n_features, self.coef_.shape[-1]))\n\n self.loss_function_ = self._get_loss_function(loss)\n if not hasattr(self, \"t_\"):\n self.t_ = 1.0\n\n # delegate to concrete training procedure\n if n_classes > 2:\n self._fit_multiclass(X, y, alpha=alpha, C=C,\n learning_rate=learning_rate,\n sample_weight=sample_weight,\n max_iter=max_iter)\n elif n_classes == 2:\n self._fit_binary(X, y, alpha=alpha, C=C,\n learning_rate=learning_rate,\n sample_weight=sample_weight,\n max_iter=max_iter)\n else:\n raise ValueError(\n \"The number of classes has to be greater than one;\"\n \" got %d class\" % n_classes)\n\n return self\n\n def _fit(self, X, y, alpha, C, loss, learning_rate, coef_init=None,\n intercept_init=None, sample_weight=None):\n self._validate_params()\n if hasattr(self, \"classes_\"):\n self.classes_ = None\n\n X, y = self._validate_data(X, y, accept_sparse='csr',\n dtype=np.float64, order=\"C\",\n accept_large_sparse=False)\n\n # labels can be encoded as float, int, or string literals\n # np.unique sorts in asc order; largest class id is positive class\n classes = np.unique(y)\n\n if self.warm_start and hasattr(self, \"coef_\"):\n if coef_init is None:\n coef_init = self.coef_\n if intercept_init is None:\n intercept_init = self.intercept_\n else:\n self.coef_ = None\n self.intercept_ = None\n\n if self.average > 0:\n self._standard_coef = self.coef_\n self._standard_intercept = self.intercept_\n self._average_coef = None\n self._average_intercept = None\n\n # Clear iteration count for multiple call to fit.\n self.t_ = 1.0\n\n self._partial_fit(X, y, alpha, C, loss, learning_rate, self.max_iter,\n classes, sample_weight, coef_init, intercept_init)\n\n if (self.tol is not None and self.tol > -np.inf\n and self.n_iter_ == self.max_iter):\n warnings.warn(\"Maximum number of iteration reached before \"\n \"convergence. Consider increasing max_iter to \"\n \"improve the fit.\",\n ConvergenceWarning)\n return self\n\n def _fit_binary(self, X, y, alpha, C, sample_weight,\n learning_rate, max_iter):\n \"\"\"Fit a binary classifier on X and y. \"\"\"\n coef, intercept, n_iter_ = fit_binary(self, 1, X, y, alpha, C,\n learning_rate, max_iter,\n self._expanded_class_weight[1],\n self._expanded_class_weight[0],\n sample_weight,\n random_state=self.random_state)\n\n self.t_ += n_iter_ * X.shape[0]\n self.n_iter_ = n_iter_\n\n # need to be 2d\n if self.average > 0:\n if self.average <= self.t_ - 1:\n self.coef_ = self._average_coef.reshape(1, -1)\n self.intercept_ = self._average_intercept\n else:\n self.coef_ = self._standard_coef.reshape(1, -1)\n self._standard_intercept = np.atleast_1d(intercept)\n self.intercept_ = self._standard_intercept\n else:\n self.coef_ = coef.reshape(1, -1)\n # intercept is a float, need to convert it to an array of length 1\n self.intercept_ = np.atleast_1d(intercept)\n\n def _fit_multiclass(self, X, y, alpha, C, learning_rate,\n sample_weight, max_iter):\n \"\"\"Fit a multi-class classifier by combining binary classifiers\n\n Each binary classifier predicts one class versus all others. This\n strategy is called OvA (One versus All) or OvR (One versus Rest).\n \"\"\"\n # Precompute the validation split using the multiclass labels\n # to ensure proper balancing of the classes.\n validation_mask = self._make_validation_split(y)\n\n # Use joblib to fit OvA in parallel.\n # Pick the random seed for each job outside of fit_binary to avoid\n # sharing the estimator random state between threads which could lead\n # to non-deterministic behavior\n random_state = check_random_state(self.random_state)\n seeds = random_state.randint(MAX_INT, size=len(self.classes_))\n result = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,\n **_joblib_parallel_args(require=\"sharedmem\"))(\n delayed(fit_binary)(self, i, X, y, alpha, C, learning_rate,\n max_iter, self._expanded_class_weight[i],\n 1., sample_weight,\n validation_mask=validation_mask,\n random_state=seed)\n for i, seed in enumerate(seeds))\n\n # take the maximum of n_iter_ over every binary fit\n n_iter_ = 0.\n for i, (_, intercept, n_iter_i) in enumerate(result):\n self.intercept_[i] = intercept\n n_iter_ = max(n_iter_, n_iter_i)\n\n self.t_ += n_iter_ * X.shape[0]\n self.n_iter_ = n_iter_\n\n if self.average > 0:\n if self.average <= self.t_ - 1.0:\n self.coef_ = self._average_coef\n self.intercept_ = self._average_intercept\n else:\n self.coef_ = self._standard_coef\n self._standard_intercept = np.atleast_1d(self.intercept_)\n self.intercept_ = self._standard_intercept\n\n def partial_fit(self, X, y, classes=None, sample_weight=None):\n \"\"\"Perform one epoch of stochastic gradient descent on given samples.\n\n Internally, this method uses ``max_iter = 1``. Therefore, it is not\n guaranteed that a minimum of the cost function is reached after calling\n it once. Matters such as objective convergence and early stopping\n should be handled by the user.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n Subset of the training data.\n\n y : ndarray of shape (n_samples,)\n Subset of the target values.\n\n classes : ndarray of shape (n_classes,), default=None\n Classes across all calls to partial_fit.\n Can be obtained by via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is required for the first call to partial_fit\n and can be omitted in the subsequent calls.\n Note that y doesn't need to contain all labels in `classes`.\n\n sample_weight : array-like, shape (n_samples,), default=None\n Weights applied to individual samples.\n If not provided, uniform weights are assumed.\n\n Returns\n -------\n self :\n Returns an instance of self.\n \"\"\"\n self._validate_params(for_partial_fit=True)\n if self.class_weight in ['balanced']:\n raise ValueError(\"class_weight '{0}' is not supported for \"\n \"partial_fit. In order to use 'balanced' weights,\"\n \" use compute_class_weight('{0}', \"\n \"classes=classes, y=y). \"\n \"In place of y you can us a large enough sample \"\n \"of the full training set target to properly \"\n \"estimate the class frequency distributions. \"\n \"Pass the resulting weights as the class_weight \"\n \"parameter.\".format(self.class_weight))\n return self._partial_fit(X, y, alpha=self.alpha, C=1.0, loss=self.loss,\n learning_rate=self.learning_rate, max_iter=1,\n classes=classes, sample_weight=sample_weight,\n coef_init=None, intercept_init=None)\n\n def fit(self, X, y, coef_init=None, intercept_init=None,\n sample_weight=None):\n \"\"\"Fit linear model with Stochastic Gradient Descent.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n Training data.\n\n y : ndarray of shape (n_samples,)\n Target values.\n\n coef_init : ndarray of shape (n_classes, n_features), default=None\n The initial coefficients to warm-start the optimization.\n\n intercept_init : ndarray of shape (n_classes,), default=None\n The initial intercept to warm-start the optimization.\n\n sample_weight : array-like, shape (n_samples,), default=None\n Weights applied to individual samples.\n If not provided, uniform weights are assumed. These weights will\n be multiplied with class_weight (passed through the\n constructor) if class_weight is specified.\n\n Returns\n -------\n self :\n Returns an instance of self.\n \"\"\"\n return self._fit(X, y, alpha=self.alpha, C=1.0,\n loss=self.loss, learning_rate=self.learning_rate,\n coef_init=coef_init, intercept_init=intercept_init,\n sample_weight=sample_weight)", + "instance_attributes": [ + { + "name": "t_", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor", + "name": "BaseSGDRegressor", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor", + "decorators": [], + "superclasses": ["RegressorMixin", "BaseSGD"], + "methods": [ + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/__init__", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_partial_fit", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/partial_fit", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_fit", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/fit", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_decision_function", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/predict", + "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_fit_regressor" + ], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class BaseSGDRegressor(RegressorMixin, BaseSGD):\n\n loss_functions = {\n \"squared_loss\": (SquaredLoss, ),\n \"huber\": (Huber, DEFAULT_EPSILON),\n \"epsilon_insensitive\": (EpsilonInsensitive, DEFAULT_EPSILON),\n \"squared_epsilon_insensitive\": (SquaredEpsilonInsensitive,\n DEFAULT_EPSILON),\n }\n\n @abstractmethod\n @_deprecate_positional_args\n def __init__(self, loss=\"squared_loss\", *, penalty=\"l2\", alpha=0.0001,\n l1_ratio=0.15, fit_intercept=True, max_iter=1000, tol=1e-3,\n shuffle=True, verbose=0, epsilon=DEFAULT_EPSILON,\n random_state=None, learning_rate=\"invscaling\", eta0=0.01,\n power_t=0.25, early_stopping=False, validation_fraction=0.1,\n n_iter_no_change=5, warm_start=False, average=False):\n super().__init__(\n loss=loss, penalty=penalty, alpha=alpha, l1_ratio=l1_ratio,\n fit_intercept=fit_intercept, max_iter=max_iter, tol=tol,\n shuffle=shuffle, verbose=verbose, epsilon=epsilon,\n random_state=random_state, learning_rate=learning_rate, eta0=eta0,\n power_t=power_t, early_stopping=early_stopping,\n validation_fraction=validation_fraction,\n n_iter_no_change=n_iter_no_change, warm_start=warm_start,\n average=average)\n\n def _partial_fit(self, X, y, alpha, C, loss, learning_rate,\n max_iter, sample_weight, coef_init, intercept_init):\n X, y = self._validate_data(X, y, accept_sparse=\"csr\", copy=False,\n order='C', dtype=np.float64,\n accept_large_sparse=False)\n y = y.astype(np.float64, copy=False)\n\n n_samples, n_features = X.shape\n\n sample_weight = _check_sample_weight(sample_weight, X)\n\n # Allocate datastructures from input arguments\n if getattr(self, \"coef_\", None) is None:\n self._allocate_parameter_mem(1, n_features, coef_init,\n intercept_init)\n elif n_features != self.coef_.shape[-1]:\n raise ValueError(\"Number of features %d does not match previous \"\n \"data %d.\" % (n_features, self.coef_.shape[-1]))\n if self.average > 0 and getattr(self, \"_average_coef\", None) is None:\n self._average_coef = np.zeros(n_features,\n dtype=np.float64,\n order=\"C\")\n self._average_intercept = np.zeros(1, dtype=np.float64, order=\"C\")\n\n self._fit_regressor(X, y, alpha, C, loss, learning_rate,\n sample_weight, max_iter)\n\n return self\n\n def partial_fit(self, X, y, sample_weight=None):\n \"\"\"Perform one epoch of stochastic gradient descent on given samples.\n\n Internally, this method uses ``max_iter = 1``. Therefore, it is not\n guaranteed that a minimum of the cost function is reached after calling\n it once. Matters such as objective convergence and early stopping\n should be handled by the user.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n Subset of training data\n\n y : numpy array of shape (n_samples,)\n Subset of target values\n\n sample_weight : array-like, shape (n_samples,), default=None\n Weights applied to individual samples.\n If not provided, uniform weights are assumed.\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n self._validate_params(for_partial_fit=True)\n return self._partial_fit(X, y, self.alpha, C=1.0,\n loss=self.loss,\n learning_rate=self.learning_rate, max_iter=1,\n sample_weight=sample_weight, coef_init=None,\n intercept_init=None)\n\n def _fit(self, X, y, alpha, C, loss, learning_rate, coef_init=None,\n intercept_init=None, sample_weight=None):\n self._validate_params()\n if self.warm_start and getattr(self, \"coef_\", None) is not None:\n if coef_init is None:\n coef_init = self.coef_\n if intercept_init is None:\n intercept_init = self.intercept_\n else:\n self.coef_ = None\n self.intercept_ = None\n\n # Clear iteration count for multiple call to fit.\n self.t_ = 1.0\n\n self._partial_fit(X, y, alpha, C, loss, learning_rate,\n self.max_iter, sample_weight, coef_init,\n intercept_init)\n\n if (self.tol is not None and self.tol > -np.inf\n and self.n_iter_ == self.max_iter):\n warnings.warn(\"Maximum number of iteration reached before \"\n \"convergence. Consider increasing max_iter to \"\n \"improve the fit.\",\n ConvergenceWarning)\n\n return self\n\n def fit(self, X, y, coef_init=None, intercept_init=None,\n sample_weight=None):\n \"\"\"Fit linear model with Stochastic Gradient Descent.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n Training data\n\n y : ndarray of shape (n_samples,)\n Target values\n\n coef_init : ndarray of shape (n_features,), default=None\n The initial coefficients to warm-start the optimization.\n\n intercept_init : ndarray of shape (1,), default=None\n The initial intercept to warm-start the optimization.\n\n sample_weight : array-like, shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n return self._fit(X, y, alpha=self.alpha, C=1.0,\n loss=self.loss, learning_rate=self.learning_rate,\n coef_init=coef_init,\n intercept_init=intercept_init,\n sample_weight=sample_weight)\n\n def _decision_function(self, X):\n \"\"\"Predict using the linear model\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n\n Returns\n -------\n ndarray of shape (n_samples,)\n Predicted target values per element in X.\n \"\"\"\n check_is_fitted(self)\n\n X = check_array(X, accept_sparse='csr')\n\n scores = safe_sparse_dot(X, self.coef_.T,\n dense_output=True) + self.intercept_\n return scores.ravel()\n\n def predict(self, X):\n \"\"\"Predict using the linear model\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n\n Returns\n -------\n ndarray of shape (n_samples,)\n Predicted target values per element in X.\n \"\"\"\n return self._decision_function(X)\n\n def _fit_regressor(self, X, y, alpha, C, loss, learning_rate,\n sample_weight, max_iter):\n dataset, intercept_decay = make_dataset(X, y, sample_weight)\n\n loss_function = self._get_loss_function(loss)\n penalty_type = self._get_penalty_type(self.penalty)\n learning_rate_type = self._get_learning_rate_type(learning_rate)\n\n if not hasattr(self, \"t_\"):\n self.t_ = 1.0\n\n validation_mask = self._make_validation_split(y)\n validation_score_cb = self._make_validation_score_cb(\n validation_mask, X, y, sample_weight)\n\n random_state = check_random_state(self.random_state)\n # numpy mtrand expects a C long which is a signed 32 bit integer under\n # Windows\n seed = random_state.randint(0, np.iinfo(np.int32).max)\n\n tol = self.tol if self.tol is not None else -np.inf\n\n if self.average:\n coef = self._standard_coef\n intercept = self._standard_intercept\n average_coef = self._average_coef\n average_intercept = self._average_intercept\n else:\n coef = self.coef_\n intercept = self.intercept_\n average_coef = None # Not used\n average_intercept = [0] # Not used\n\n coef, intercept, average_coef, average_intercept, self.n_iter_ = \\\n _plain_sgd(coef,\n intercept[0],\n average_coef,\n average_intercept[0],\n loss_function,\n penalty_type,\n alpha, C,\n self.l1_ratio,\n dataset,\n validation_mask, self.early_stopping,\n validation_score_cb,\n int(self.n_iter_no_change),\n max_iter, tol,\n int(self.fit_intercept),\n int(self.verbose),\n int(self.shuffle),\n seed,\n 1.0, 1.0,\n learning_rate_type,\n self.eta0, self.power_t, self.t_,\n intercept_decay, self.average)\n\n self.t_ += self.n_iter_ * X.shape[0]\n\n if self.average > 0:\n self._average_intercept = np.atleast_1d(average_intercept)\n self._standard_intercept = np.atleast_1d(intercept)\n\n if self.average <= self.t_ - 1.0:\n # made enough updates for averaging to be taken into account\n self.coef_ = average_coef\n self.intercept_ = np.atleast_1d(average_intercept)\n else:\n self.coef_ = coef\n self.intercept_ = np.atleast_1d(intercept)\n\n else:\n self.intercept_ = np.atleast_1d(intercept)", + "instance_attributes": [ + { + "name": "_average_coef", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "_average_intercept", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "t_", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier", + "name": "SGDClassifier", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier", + "decorators": [], + "superclasses": ["BaseSGDClassifier"], + "methods": [ + "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/__init__", + "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/_check_proba", + "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/predict_proba@getter", + "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/_predict_proba", + "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/predict_log_proba@getter", + "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/_predict_log_proba", + "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Linear classifiers (SVM, logistic regression, etc.) with SGD training.\n\nThis estimator implements regularized linear models with stochastic\ngradient descent (SGD) learning: the gradient of the loss is estimated\neach sample at a time and the model is updated along the way with a\ndecreasing strength schedule (aka learning rate). SGD allows minibatch\n(online/out-of-core) learning via the `partial_fit` method.\nFor best results using the default learning rate schedule, the data should\nhave zero mean and unit variance.\n\nThis implementation works with data represented as dense or sparse arrays\nof floating point values for the features. The model it fits can be\ncontrolled with the loss parameter; by default, it fits a linear support\nvector machine (SVM).\n\nThe regularizer is a penalty added to the loss function that shrinks model\nparameters towards the zero vector using either the squared euclidean norm\nL2 or the absolute norm L1 or a combination of both (Elastic Net). If the\nparameter update crosses the 0.0 value because of the regularizer, the\nupdate is truncated to 0.0 to allow for learning sparse models and achieve\nonline feature selection.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Linear classifiers (SVM, logistic regression, etc.) with SGD training.\n\nThis estimator implements regularized linear models with stochastic\ngradient descent (SGD) learning: the gradient of the loss is estimated\neach sample at a time and the model is updated along the way with a\ndecreasing strength schedule (aka learning rate). SGD allows minibatch\n(online/out-of-core) learning via the `partial_fit` method.\nFor best results using the default learning rate schedule, the data should\nhave zero mean and unit variance.\n\nThis implementation works with data represented as dense or sparse arrays\nof floating point values for the features. The model it fits can be\ncontrolled with the loss parameter; by default, it fits a linear support\nvector machine (SVM).\n\nThe regularizer is a penalty added to the loss function that shrinks model\nparameters towards the zero vector using either the squared euclidean norm\nL2 or the absolute norm L1 or a combination of both (Elastic Net). If the\nparameter update crosses the 0.0 value because of the regularizer, the\nupdate is truncated to 0.0 to allow for learning sparse models and achieve\nonline feature selection.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nloss : str, default='hinge'\n The loss function to be used. Defaults to 'hinge', which gives a\n linear SVM.\n\n The possible options are 'hinge', 'log', 'modified_huber',\n 'squared_hinge', 'perceptron', or a regression loss: 'squared_loss',\n 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'.\n\n The 'log' loss gives logistic regression, a probabilistic classifier.\n 'modified_huber' is another smooth loss that brings tolerance to\n outliers as well as probability estimates.\n 'squared_hinge' is like hinge but is quadratically penalized.\n 'perceptron' is the linear loss used by the perceptron algorithm.\n The other losses are designed for regression but can be useful in\n classification as well; see\n :class:`~sklearn.linear_model.SGDRegressor` for a description.\n\n More details about the losses formulas can be found in the\n :ref:`User Guide `.\n\npenalty : {'l2', 'l1', 'elasticnet'}, default='l2'\n The penalty (aka regularization term) to be used. Defaults to 'l2'\n which is the standard regularizer for linear SVM models. 'l1' and\n 'elasticnet' might bring sparsity to the model (feature selection)\n not achievable with 'l2'.\n\nalpha : float, default=0.0001\n Constant that multiplies the regularization term. The higher the\n value, the stronger the regularization.\n Also used to compute the learning rate when set to `learning_rate` is\n set to 'optimal'.\n\nl1_ratio : float, default=0.15\n The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.\n l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.\n Only used if `penalty` is 'elasticnet'.\n\nfit_intercept : bool, default=True\n Whether the intercept should be estimated or not. If False, the\n data is assumed to be already centered.\n\nmax_iter : int, default=1000\n The maximum number of passes over the training data (aka epochs).\n It only impacts the behavior in the ``fit`` method, and not the\n :meth:`partial_fit` method.\n\n .. versionadded:: 0.19\n\ntol : float, default=1e-3\n The stopping criterion. If it is not None, training will stop\n when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive\n epochs.\n Convergence is checked against the training loss or the\n validation loss depending on the `early_stopping` parameter.\n\n .. versionadded:: 0.19\n\nshuffle : bool, default=True\n Whether or not the training data should be shuffled after each epoch.\n\nverbose : int, default=0\n The verbosity level.\n\nepsilon : float, default=0.1\n Epsilon in the epsilon-insensitive loss functions; only if `loss` is\n 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'.\n For 'huber', determines the threshold at which it becomes less\n important to get the prediction exactly right.\n For epsilon-insensitive, any differences between the current prediction\n and the correct label are ignored if they are less than this threshold.\n\nn_jobs : int, default=None\n The number of CPUs to use to do the OVA (One Versus All, for\n multi-class problems) computation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nrandom_state : int, RandomState instance, default=None\n Used for shuffling the data, when ``shuffle`` is set to ``True``.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nlearning_rate : str, default='optimal'\n The learning rate schedule:\n\n - 'constant': `eta = eta0`\n - 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n where t0 is chosen by a heuristic proposed by Leon Bottou.\n - 'invscaling': `eta = eta0 / pow(t, power_t)`\n - 'adaptive': eta = eta0, as long as the training keeps decreasing.\n Each time n_iter_no_change consecutive epochs fail to decrease the\n training loss by tol or fail to increase validation score by tol if\n early_stopping is True, the current learning rate is divided by 5.\n\n .. versionadded:: 0.20\n Added 'adaptive' option\n\neta0 : double, default=0.0\n The initial learning rate for the 'constant', 'invscaling' or\n 'adaptive' schedules. The default value is 0.0 as eta0 is not used by\n the default schedule 'optimal'.\n\npower_t : double, default=0.5\n The exponent for inverse scaling learning rate [default 0.5].\n\nearly_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation\n score is not improving. If set to True, it will automatically set aside\n a stratified fraction of training data as validation and terminate\n training when validation score returned by the `score` method is not\n improving by at least tol for n_iter_no_change consecutive epochs.\n\n .. versionadded:: 0.20\n Added 'early_stopping' option\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if `early_stopping` is True.\n\n .. versionadded:: 0.20\n Added 'validation_fraction' option\n\nn_iter_no_change : int, default=5\n Number of iterations with no improvement to wait before stopping\n fitting.\n Convergence is checked against the training loss or the\n validation loss depending on the `early_stopping` parameter.\n\n .. versionadded:: 0.20\n Added 'n_iter_no_change' option\n\nclass_weight : dict, {class_label: weight} or \"balanced\", default=None\n Preset for the class_weight fit parameter.\n\n Weights associated with classes. If not given, all classes\n are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\n Repeatedly calling fit or partial_fit when warm_start is True can\n result in a different solution than when calling fit a single time\n because of the way the data is shuffled.\n If a dynamic learning rate is used, the learning rate is adapted\n depending on the number of samples already seen. Calling ``fit`` resets\n this counter, while ``partial_fit`` will result in increasing the\n existing counter.\n\naverage : bool or int, default=False\n When set to True, computes the averaged SGD weights accross all\n updates and stores the result in the ``coef_`` attribute. If set to\n an int greater than 1, averaging will begin once the total number of\n samples seen reaches `average`. So ``average=10`` will begin\n averaging after seeing 10 samples.\n\nAttributes\n----------\ncoef_ : ndarray of shape (1, n_features) if n_classes == 2 else (n_classes, n_features)\n Weights assigned to the features.\n\nintercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n Constants in decision function.\n\nn_iter_ : int\n The actual number of iterations before reaching the stopping criterion.\n For multiclass fits, it is the maximum over every binary fit.\n\nloss_function_ : concrete ``LossFunction``\n\nclasses_ : array of shape (n_classes,)\n\nt_ : int\n Number of weight updates performed during training.\n Same as ``(n_iter_ * n_samples)``.\n\nSee Also\n--------\nsklearn.svm.LinearSVC : Linear support vector classification.\nLogisticRegression : Logistic regression.\nPerceptron : Inherits from SGDClassifier. ``Perceptron()`` is equivalent to\n ``SGDClassifier(loss=\"perceptron\", eta0=1, learning_rate=\"constant\",\n penalty=None)``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import SGDClassifier\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.pipeline import make_pipeline\n>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n>>> Y = np.array([1, 1, 2, 2])\n>>> # Always scale the input. The most convenient way is to use a pipeline.\n>>> clf = make_pipeline(StandardScaler(),\n... SGDClassifier(max_iter=1000, tol=1e-3))\n>>> clf.fit(X, Y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('sgdclassifier', SGDClassifier())])\n>>> print(clf.predict([[-0.8, -1]]))\n[1]", + "code": "class SGDClassifier(BaseSGDClassifier):\n \"\"\"Linear classifiers (SVM, logistic regression, etc.) with SGD training.\n\n This estimator implements regularized linear models with stochastic\n gradient descent (SGD) learning: the gradient of the loss is estimated\n each sample at a time and the model is updated along the way with a\n decreasing strength schedule (aka learning rate). SGD allows minibatch\n (online/out-of-core) learning via the `partial_fit` method.\n For best results using the default learning rate schedule, the data should\n have zero mean and unit variance.\n\n This implementation works with data represented as dense or sparse arrays\n of floating point values for the features. The model it fits can be\n controlled with the loss parameter; by default, it fits a linear support\n vector machine (SVM).\n\n The regularizer is a penalty added to the loss function that shrinks model\n parameters towards the zero vector using either the squared euclidean norm\n L2 or the absolute norm L1 or a combination of both (Elastic Net). If the\n parameter update crosses the 0.0 value because of the regularizer, the\n update is truncated to 0.0 to allow for learning sparse models and achieve\n online feature selection.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n loss : str, default='hinge'\n The loss function to be used. Defaults to 'hinge', which gives a\n linear SVM.\n\n The possible options are 'hinge', 'log', 'modified_huber',\n 'squared_hinge', 'perceptron', or a regression loss: 'squared_loss',\n 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'.\n\n The 'log' loss gives logistic regression, a probabilistic classifier.\n 'modified_huber' is another smooth loss that brings tolerance to\n outliers as well as probability estimates.\n 'squared_hinge' is like hinge but is quadratically penalized.\n 'perceptron' is the linear loss used by the perceptron algorithm.\n The other losses are designed for regression but can be useful in\n classification as well; see\n :class:`~sklearn.linear_model.SGDRegressor` for a description.\n\n More details about the losses formulas can be found in the\n :ref:`User Guide `.\n\n penalty : {'l2', 'l1', 'elasticnet'}, default='l2'\n The penalty (aka regularization term) to be used. Defaults to 'l2'\n which is the standard regularizer for linear SVM models. 'l1' and\n 'elasticnet' might bring sparsity to the model (feature selection)\n not achievable with 'l2'.\n\n alpha : float, default=0.0001\n Constant that multiplies the regularization term. The higher the\n value, the stronger the regularization.\n Also used to compute the learning rate when set to `learning_rate` is\n set to 'optimal'.\n\n l1_ratio : float, default=0.15\n The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.\n l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.\n Only used if `penalty` is 'elasticnet'.\n\n fit_intercept : bool, default=True\n Whether the intercept should be estimated or not. If False, the\n data is assumed to be already centered.\n\n max_iter : int, default=1000\n The maximum number of passes over the training data (aka epochs).\n It only impacts the behavior in the ``fit`` method, and not the\n :meth:`partial_fit` method.\n\n .. versionadded:: 0.19\n\n tol : float, default=1e-3\n The stopping criterion. If it is not None, training will stop\n when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive\n epochs.\n Convergence is checked against the training loss or the\n validation loss depending on the `early_stopping` parameter.\n\n .. versionadded:: 0.19\n\n shuffle : bool, default=True\n Whether or not the training data should be shuffled after each epoch.\n\n verbose : int, default=0\n The verbosity level.\n\n epsilon : float, default=0.1\n Epsilon in the epsilon-insensitive loss functions; only if `loss` is\n 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'.\n For 'huber', determines the threshold at which it becomes less\n important to get the prediction exactly right.\n For epsilon-insensitive, any differences between the current prediction\n and the correct label are ignored if they are less than this threshold.\n\n n_jobs : int, default=None\n The number of CPUs to use to do the OVA (One Versus All, for\n multi-class problems) computation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n random_state : int, RandomState instance, default=None\n Used for shuffling the data, when ``shuffle`` is set to ``True``.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n learning_rate : str, default='optimal'\n The learning rate schedule:\n\n - 'constant': `eta = eta0`\n - 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n where t0 is chosen by a heuristic proposed by Leon Bottou.\n - 'invscaling': `eta = eta0 / pow(t, power_t)`\n - 'adaptive': eta = eta0, as long as the training keeps decreasing.\n Each time n_iter_no_change consecutive epochs fail to decrease the\n training loss by tol or fail to increase validation score by tol if\n early_stopping is True, the current learning rate is divided by 5.\n\n .. versionadded:: 0.20\n Added 'adaptive' option\n\n eta0 : double, default=0.0\n The initial learning rate for the 'constant', 'invscaling' or\n 'adaptive' schedules. The default value is 0.0 as eta0 is not used by\n the default schedule 'optimal'.\n\n power_t : double, default=0.5\n The exponent for inverse scaling learning rate [default 0.5].\n\n early_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation\n score is not improving. If set to True, it will automatically set aside\n a stratified fraction of training data as validation and terminate\n training when validation score returned by the `score` method is not\n improving by at least tol for n_iter_no_change consecutive epochs.\n\n .. versionadded:: 0.20\n Added 'early_stopping' option\n\n validation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if `early_stopping` is True.\n\n .. versionadded:: 0.20\n Added 'validation_fraction' option\n\n n_iter_no_change : int, default=5\n Number of iterations with no improvement to wait before stopping\n fitting.\n Convergence is checked against the training loss or the\n validation loss depending on the `early_stopping` parameter.\n\n .. versionadded:: 0.20\n Added 'n_iter_no_change' option\n\n class_weight : dict, {class_label: weight} or \"balanced\", default=None\n Preset for the class_weight fit parameter.\n\n Weights associated with classes. If not given, all classes\n are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``.\n\n warm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\n Repeatedly calling fit or partial_fit when warm_start is True can\n result in a different solution than when calling fit a single time\n because of the way the data is shuffled.\n If a dynamic learning rate is used, the learning rate is adapted\n depending on the number of samples already seen. Calling ``fit`` resets\n this counter, while ``partial_fit`` will result in increasing the\n existing counter.\n\n average : bool or int, default=False\n When set to True, computes the averaged SGD weights accross all\n updates and stores the result in the ``coef_`` attribute. If set to\n an int greater than 1, averaging will begin once the total number of\n samples seen reaches `average`. So ``average=10`` will begin\n averaging after seeing 10 samples.\n\n Attributes\n ----------\n coef_ : ndarray of shape (1, n_features) if n_classes == 2 else \\\n (n_classes, n_features)\n Weights assigned to the features.\n\n intercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n Constants in decision function.\n\n n_iter_ : int\n The actual number of iterations before reaching the stopping criterion.\n For multiclass fits, it is the maximum over every binary fit.\n\n loss_function_ : concrete ``LossFunction``\n\n classes_ : array of shape (n_classes,)\n\n t_ : int\n Number of weight updates performed during training.\n Same as ``(n_iter_ * n_samples)``.\n\n See Also\n --------\n sklearn.svm.LinearSVC : Linear support vector classification.\n LogisticRegression : Logistic regression.\n Perceptron : Inherits from SGDClassifier. ``Perceptron()`` is equivalent to\n ``SGDClassifier(loss=\"perceptron\", eta0=1, learning_rate=\"constant\",\n penalty=None)``.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.linear_model import SGDClassifier\n >>> from sklearn.preprocessing import StandardScaler\n >>> from sklearn.pipeline import make_pipeline\n >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n >>> Y = np.array([1, 1, 2, 2])\n >>> # Always scale the input. The most convenient way is to use a pipeline.\n >>> clf = make_pipeline(StandardScaler(),\n ... SGDClassifier(max_iter=1000, tol=1e-3))\n >>> clf.fit(X, Y)\n Pipeline(steps=[('standardscaler', StandardScaler()),\n ('sgdclassifier', SGDClassifier())])\n >>> print(clf.predict([[-0.8, -1]]))\n [1]\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, loss=\"hinge\", *, penalty='l2', alpha=0.0001,\n l1_ratio=0.15,\n fit_intercept=True, max_iter=1000, tol=1e-3, shuffle=True,\n verbose=0, epsilon=DEFAULT_EPSILON, n_jobs=None,\n random_state=None, learning_rate=\"optimal\", eta0=0.0,\n power_t=0.5, early_stopping=False, validation_fraction=0.1,\n n_iter_no_change=5, class_weight=None, warm_start=False,\n average=False):\n super().__init__(\n loss=loss, penalty=penalty, alpha=alpha, l1_ratio=l1_ratio,\n fit_intercept=fit_intercept, max_iter=max_iter, tol=tol,\n shuffle=shuffle, verbose=verbose, epsilon=epsilon, n_jobs=n_jobs,\n random_state=random_state, learning_rate=learning_rate, eta0=eta0,\n power_t=power_t, early_stopping=early_stopping,\n validation_fraction=validation_fraction,\n n_iter_no_change=n_iter_no_change, class_weight=class_weight,\n warm_start=warm_start, average=average)\n\n def _check_proba(self):\n if self.loss not in (\"log\", \"modified_huber\"):\n raise AttributeError(\"probability estimates are not available for\"\n \" loss=%r\" % self.loss)\n\n @property\n def predict_proba(self):\n \"\"\"Probability estimates.\n\n This method is only available for log loss and modified Huber loss.\n\n Multiclass probability estimates are derived from binary (one-vs.-rest)\n estimates by simple normalization, as recommended by Zadrozny and\n Elkan.\n\n Binary probability estimates for loss=\"modified_huber\" are given by\n (clip(decision_function(X), -1, 1) + 1) / 2. For other loss functions\n it is necessary to perform proper probability calibration by wrapping\n the classifier with\n :class:`~sklearn.calibration.CalibratedClassifierCV` instead.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n Input data for prediction.\n\n Returns\n -------\n ndarray of shape (n_samples, n_classes)\n Returns the probability of the sample for each class in the model,\n where classes are ordered as they are in `self.classes_`.\n\n References\n ----------\n Zadrozny and Elkan, \"Transforming classifier scores into multiclass\n probability estimates\", SIGKDD'02,\n http://www.research.ibm.com/people/z/zadrozny/kdd2002-Transf.pdf\n\n The justification for the formula in the loss=\"modified_huber\"\n case is in the appendix B in:\n http://jmlr.csail.mit.edu/papers/volume2/zhang02c/zhang02c.pdf\n \"\"\"\n self._check_proba()\n return self._predict_proba\n\n def _predict_proba(self, X):\n check_is_fitted(self)\n\n if self.loss == \"log\":\n return self._predict_proba_lr(X)\n\n elif self.loss == \"modified_huber\":\n binary = (len(self.classes_) == 2)\n scores = self.decision_function(X)\n\n if binary:\n prob2 = np.ones((scores.shape[0], 2))\n prob = prob2[:, 1]\n else:\n prob = scores\n\n np.clip(scores, -1, 1, prob)\n prob += 1.\n prob /= 2.\n\n if binary:\n prob2[:, 0] -= prob\n prob = prob2\n else:\n # the above might assign zero to all classes, which doesn't\n # normalize neatly; work around this to produce uniform\n # probabilities\n prob_sum = prob.sum(axis=1)\n all_zero = (prob_sum == 0)\n if np.any(all_zero):\n prob[all_zero, :] = 1\n prob_sum[all_zero] = len(self.classes_)\n\n # normalize\n prob /= prob_sum.reshape((prob.shape[0], -1))\n\n return prob\n\n else:\n raise NotImplementedError(\"predict_(log_)proba only supported when\"\n \" loss='log' or loss='modified_huber' \"\n \"(%r given)\" % self.loss)\n\n @property\n def predict_log_proba(self):\n \"\"\"Log of probability estimates.\n\n This method is only available for log loss and modified Huber loss.\n\n When loss=\"modified_huber\", probability estimates may be hard zeros\n and ones, so taking the logarithm is not possible.\n\n See ``predict_proba`` for details.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data for prediction.\n\n Returns\n -------\n T : array-like, shape (n_samples, n_classes)\n Returns the log-probability of the sample for each class in the\n model, where classes are ordered as they are in\n `self.classes_`.\n \"\"\"\n self._check_proba()\n return self._predict_log_proba\n\n def _predict_log_proba(self, X):\n return np.log(self.predict_proba(X))\n\n def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor", + "name": "SGDRegressor", + "qname": "sklearn.linear_model._stochastic_gradient.SGDRegressor", + "decorators": [], + "superclasses": ["BaseSGDRegressor"], + "methods": [ + "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/__init__", + "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Linear model fitted by minimizing a regularized empirical loss with SGD\n\nSGD stands for Stochastic Gradient Descent: the gradient of the loss is\nestimated each sample at a time and the model is updated along the way with\na decreasing strength schedule (aka learning rate).\n\nThe regularizer is a penalty added to the loss function that shrinks model\nparameters towards the zero vector using either the squared euclidean norm\nL2 or the absolute norm L1 or a combination of both (Elastic Net). If the\nparameter update crosses the 0.0 value because of the regularizer, the\nupdate is truncated to 0.0 to allow for learning sparse models and achieve\nonline feature selection.\n\nThis implementation works with data represented as dense numpy arrays of\nfloating point values for the features.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Linear model fitted by minimizing a regularized empirical loss with SGD\n\nSGD stands for Stochastic Gradient Descent: the gradient of the loss is\nestimated each sample at a time and the model is updated along the way with\na decreasing strength schedule (aka learning rate).\n\nThe regularizer is a penalty added to the loss function that shrinks model\nparameters towards the zero vector using either the squared euclidean norm\nL2 or the absolute norm L1 or a combination of both (Elastic Net). If the\nparameter update crosses the 0.0 value because of the regularizer, the\nupdate is truncated to 0.0 to allow for learning sparse models and achieve\nonline feature selection.\n\nThis implementation works with data represented as dense numpy arrays of\nfloating point values for the features.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nloss : str, default='squared_loss'\n The loss function to be used. The possible values are 'squared_loss',\n 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'\n\n The 'squared_loss' refers to the ordinary least squares fit.\n 'huber' modifies 'squared_loss' to focus less on getting outliers\n correct by switching from squared to linear loss past a distance of\n epsilon. 'epsilon_insensitive' ignores errors less than epsilon and is\n linear past that; this is the loss function used in SVR.\n 'squared_epsilon_insensitive' is the same but becomes squared loss past\n a tolerance of epsilon.\n\n More details about the losses formulas can be found in the\n :ref:`User Guide `.\n\npenalty : {'l2', 'l1', 'elasticnet'}, default='l2'\n The penalty (aka regularization term) to be used. Defaults to 'l2'\n which is the standard regularizer for linear SVM models. 'l1' and\n 'elasticnet' might bring sparsity to the model (feature selection)\n not achievable with 'l2'.\n\nalpha : float, default=0.0001\n Constant that multiplies the regularization term. The higher the\n value, the stronger the regularization.\n Also used to compute the learning rate when set to `learning_rate` is\n set to 'optimal'.\n\nl1_ratio : float, default=0.15\n The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.\n l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.\n Only used if `penalty` is 'elasticnet'.\n\nfit_intercept : bool, default=True\n Whether the intercept should be estimated or not. If False, the\n data is assumed to be already centered.\n\nmax_iter : int, default=1000\n The maximum number of passes over the training data (aka epochs).\n It only impacts the behavior in the ``fit`` method, and not the\n :meth:`partial_fit` method.\n\n .. versionadded:: 0.19\n\ntol : float, default=1e-3\n The stopping criterion. If it is not None, training will stop\n when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive\n epochs.\n Convergence is checked against the training loss or the\n validation loss depending on the `early_stopping` parameter.\n\n .. versionadded:: 0.19\n\nshuffle : bool, default=True\n Whether or not the training data should be shuffled after each epoch.\n\nverbose : int, default=0\n The verbosity level.\n\nepsilon : float, default=0.1\n Epsilon in the epsilon-insensitive loss functions; only if `loss` is\n 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'.\n For 'huber', determines the threshold at which it becomes less\n important to get the prediction exactly right.\n For epsilon-insensitive, any differences between the current prediction\n and the correct label are ignored if they are less than this threshold.\n\nrandom_state : int, RandomState instance, default=None\n Used for shuffling the data, when ``shuffle`` is set to ``True``.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nlearning_rate : string, default='invscaling'\n The learning rate schedule:\n\n - 'constant': `eta = eta0`\n - 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n where t0 is chosen by a heuristic proposed by Leon Bottou.\n - 'invscaling': `eta = eta0 / pow(t, power_t)`\n - 'adaptive': eta = eta0, as long as the training keeps decreasing.\n Each time n_iter_no_change consecutive epochs fail to decrease the\n training loss by tol or fail to increase validation score by tol if\n early_stopping is True, the current learning rate is divided by 5.\n\n .. versionadded:: 0.20\n Added 'adaptive' option\n\neta0 : double, default=0.01\n The initial learning rate for the 'constant', 'invscaling' or\n 'adaptive' schedules. The default value is 0.01.\n\npower_t : double, default=0.25\n The exponent for inverse scaling learning rate.\n\nearly_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation\n score is not improving. If set to True, it will automatically set aside\n a fraction of training data as validation and terminate\n training when validation score returned by the `score` method is not\n improving by at least `tol` for `n_iter_no_change` consecutive\n epochs.\n\n .. versionadded:: 0.20\n Added 'early_stopping' option\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if `early_stopping` is True.\n\n .. versionadded:: 0.20\n Added 'validation_fraction' option\n\nn_iter_no_change : int, default=5\n Number of iterations with no improvement to wait before stopping\n fitting.\n Convergence is checked against the training loss or the\n validation loss depending on the `early_stopping` parameter.\n\n .. versionadded:: 0.20\n Added 'n_iter_no_change' option\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\n Repeatedly calling fit or partial_fit when warm_start is True can\n result in a different solution than when calling fit a single time\n because of the way the data is shuffled.\n If a dynamic learning rate is used, the learning rate is adapted\n depending on the number of samples already seen. Calling ``fit`` resets\n this counter, while ``partial_fit`` will result in increasing the\n existing counter.\n\naverage : bool or int, default=False\n When set to True, computes the averaged SGD weights accross all\n updates and stores the result in the ``coef_`` attribute. If set to\n an int greater than 1, averaging will begin once the total number of\n samples seen reaches `average`. So ``average=10`` will begin\n averaging after seeing 10 samples.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,)\n Weights assigned to the features.\n\nintercept_ : ndarray of shape (1,)\n The intercept term.\n\naverage_coef_ : ndarray of shape (n_features,)\n Averaged weights assigned to the features. Only available\n if ``average=True``.\n\n .. deprecated:: 0.23\n Attribute ``average_coef_`` was deprecated\n in version 0.23 and will be removed in 1.0 (renaming of 0.25).\n\naverage_intercept_ : ndarray of shape (1,)\n The averaged intercept term. Only available if ``average=True``.\n\n .. deprecated:: 0.23\n Attribute ``average_intercept_`` was deprecated\n in version 0.23 and will be removed in 1.0 (renaming of 0.25).\n\nn_iter_ : int\n The actual number of iterations before reaching the stopping criterion.\n\nt_ : int\n Number of weight updates performed during training.\n Same as ``(n_iter_ * n_samples)``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import SGDRegressor\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> n_samples, n_features = 10, 5\n>>> rng = np.random.RandomState(0)\n>>> y = rng.randn(n_samples)\n>>> X = rng.randn(n_samples, n_features)\n>>> # Always scale the input. The most convenient way is to use a pipeline.\n>>> reg = make_pipeline(StandardScaler(),\n... SGDRegressor(max_iter=1000, tol=1e-3))\n>>> reg.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('sgdregressor', SGDRegressor())])\n\nSee Also\n--------\nRidge, ElasticNet, Lasso, sklearn.svm.SVR", + "code": "class SGDRegressor(BaseSGDRegressor):\n \"\"\"Linear model fitted by minimizing a regularized empirical loss with SGD\n\n SGD stands for Stochastic Gradient Descent: the gradient of the loss is\n estimated each sample at a time and the model is updated along the way with\n a decreasing strength schedule (aka learning rate).\n\n The regularizer is a penalty added to the loss function that shrinks model\n parameters towards the zero vector using either the squared euclidean norm\n L2 or the absolute norm L1 or a combination of both (Elastic Net). If the\n parameter update crosses the 0.0 value because of the regularizer, the\n update is truncated to 0.0 to allow for learning sparse models and achieve\n online feature selection.\n\n This implementation works with data represented as dense numpy arrays of\n floating point values for the features.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n loss : str, default='squared_loss'\n The loss function to be used. The possible values are 'squared_loss',\n 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'\n\n The 'squared_loss' refers to the ordinary least squares fit.\n 'huber' modifies 'squared_loss' to focus less on getting outliers\n correct by switching from squared to linear loss past a distance of\n epsilon. 'epsilon_insensitive' ignores errors less than epsilon and is\n linear past that; this is the loss function used in SVR.\n 'squared_epsilon_insensitive' is the same but becomes squared loss past\n a tolerance of epsilon.\n\n More details about the losses formulas can be found in the\n :ref:`User Guide `.\n\n penalty : {'l2', 'l1', 'elasticnet'}, default='l2'\n The penalty (aka regularization term) to be used. Defaults to 'l2'\n which is the standard regularizer for linear SVM models. 'l1' and\n 'elasticnet' might bring sparsity to the model (feature selection)\n not achievable with 'l2'.\n\n alpha : float, default=0.0001\n Constant that multiplies the regularization term. The higher the\n value, the stronger the regularization.\n Also used to compute the learning rate when set to `learning_rate` is\n set to 'optimal'.\n\n l1_ratio : float, default=0.15\n The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.\n l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.\n Only used if `penalty` is 'elasticnet'.\n\n fit_intercept : bool, default=True\n Whether the intercept should be estimated or not. If False, the\n data is assumed to be already centered.\n\n max_iter : int, default=1000\n The maximum number of passes over the training data (aka epochs).\n It only impacts the behavior in the ``fit`` method, and not the\n :meth:`partial_fit` method.\n\n .. versionadded:: 0.19\n\n tol : float, default=1e-3\n The stopping criterion. If it is not None, training will stop\n when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive\n epochs.\n Convergence is checked against the training loss or the\n validation loss depending on the `early_stopping` parameter.\n\n .. versionadded:: 0.19\n\n shuffle : bool, default=True\n Whether or not the training data should be shuffled after each epoch.\n\n verbose : int, default=0\n The verbosity level.\n\n epsilon : float, default=0.1\n Epsilon in the epsilon-insensitive loss functions; only if `loss` is\n 'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'.\n For 'huber', determines the threshold at which it becomes less\n important to get the prediction exactly right.\n For epsilon-insensitive, any differences between the current prediction\n and the correct label are ignored if they are less than this threshold.\n\n random_state : int, RandomState instance, default=None\n Used for shuffling the data, when ``shuffle`` is set to ``True``.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n learning_rate : string, default='invscaling'\n The learning rate schedule:\n\n - 'constant': `eta = eta0`\n - 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n where t0 is chosen by a heuristic proposed by Leon Bottou.\n - 'invscaling': `eta = eta0 / pow(t, power_t)`\n - 'adaptive': eta = eta0, as long as the training keeps decreasing.\n Each time n_iter_no_change consecutive epochs fail to decrease the\n training loss by tol or fail to increase validation score by tol if\n early_stopping is True, the current learning rate is divided by 5.\n\n .. versionadded:: 0.20\n Added 'adaptive' option\n\n eta0 : double, default=0.01\n The initial learning rate for the 'constant', 'invscaling' or\n 'adaptive' schedules. The default value is 0.01.\n\n power_t : double, default=0.25\n The exponent for inverse scaling learning rate.\n\n early_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation\n score is not improving. If set to True, it will automatically set aside\n a fraction of training data as validation and terminate\n training when validation score returned by the `score` method is not\n improving by at least `tol` for `n_iter_no_change` consecutive\n epochs.\n\n .. versionadded:: 0.20\n Added 'early_stopping' option\n\n validation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if `early_stopping` is True.\n\n .. versionadded:: 0.20\n Added 'validation_fraction' option\n\n n_iter_no_change : int, default=5\n Number of iterations with no improvement to wait before stopping\n fitting.\n Convergence is checked against the training loss or the\n validation loss depending on the `early_stopping` parameter.\n\n .. versionadded:: 0.20\n Added 'n_iter_no_change' option\n\n warm_start : bool, default=False\n When set to True, reuse the solution of the previous call to fit as\n initialization, otherwise, just erase the previous solution.\n See :term:`the Glossary `.\n\n Repeatedly calling fit or partial_fit when warm_start is True can\n result in a different solution than when calling fit a single time\n because of the way the data is shuffled.\n If a dynamic learning rate is used, the learning rate is adapted\n depending on the number of samples already seen. Calling ``fit`` resets\n this counter, while ``partial_fit`` will result in increasing the\n existing counter.\n\n average : bool or int, default=False\n When set to True, computes the averaged SGD weights accross all\n updates and stores the result in the ``coef_`` attribute. If set to\n an int greater than 1, averaging will begin once the total number of\n samples seen reaches `average`. So ``average=10`` will begin\n averaging after seeing 10 samples.\n\n Attributes\n ----------\n coef_ : ndarray of shape (n_features,)\n Weights assigned to the features.\n\n intercept_ : ndarray of shape (1,)\n The intercept term.\n\n average_coef_ : ndarray of shape (n_features,)\n Averaged weights assigned to the features. Only available\n if ``average=True``.\n\n .. deprecated:: 0.23\n Attribute ``average_coef_`` was deprecated\n in version 0.23 and will be removed in 1.0 (renaming of 0.25).\n\n average_intercept_ : ndarray of shape (1,)\n The averaged intercept term. Only available if ``average=True``.\n\n .. deprecated:: 0.23\n Attribute ``average_intercept_`` was deprecated\n in version 0.23 and will be removed in 1.0 (renaming of 0.25).\n\n n_iter_ : int\n The actual number of iterations before reaching the stopping criterion.\n\n t_ : int\n Number of weight updates performed during training.\n Same as ``(n_iter_ * n_samples)``.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.linear_model import SGDRegressor\n >>> from sklearn.pipeline import make_pipeline\n >>> from sklearn.preprocessing import StandardScaler\n >>> n_samples, n_features = 10, 5\n >>> rng = np.random.RandomState(0)\n >>> y = rng.randn(n_samples)\n >>> X = rng.randn(n_samples, n_features)\n >>> # Always scale the input. The most convenient way is to use a pipeline.\n >>> reg = make_pipeline(StandardScaler(),\n ... SGDRegressor(max_iter=1000, tol=1e-3))\n >>> reg.fit(X, y)\n Pipeline(steps=[('standardscaler', StandardScaler()),\n ('sgdregressor', SGDRegressor())])\n\n See Also\n --------\n Ridge, ElasticNet, Lasso, sklearn.svm.SVR\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, loss=\"squared_loss\", *, penalty=\"l2\", alpha=0.0001,\n l1_ratio=0.15, fit_intercept=True, max_iter=1000, tol=1e-3,\n shuffle=True, verbose=0, epsilon=DEFAULT_EPSILON,\n random_state=None, learning_rate=\"invscaling\", eta0=0.01,\n power_t=0.25, early_stopping=False, validation_fraction=0.1,\n n_iter_no_change=5, warm_start=False, average=False):\n super().__init__(\n loss=loss, penalty=penalty, alpha=alpha, l1_ratio=l1_ratio,\n fit_intercept=fit_intercept, max_iter=max_iter, tol=tol,\n shuffle=shuffle, verbose=verbose, epsilon=epsilon,\n random_state=random_state, learning_rate=learning_rate, eta0=eta0,\n power_t=power_t, early_stopping=early_stopping,\n validation_fraction=validation_fraction,\n n_iter_no_change=n_iter_no_change, warm_start=warm_start,\n average=average)\n\n def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/_ValidationScoreCallback", + "name": "_ValidationScoreCallback", + "qname": "sklearn.linear_model._stochastic_gradient._ValidationScoreCallback", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.linear_model._stochastic_gradient/_ValidationScoreCallback/__init__", + "scikit-learn/sklearn.linear_model._stochastic_gradient/_ValidationScoreCallback/__call__" + ], + "is_public": false, + "reexported_by": [], + "description": "Callback for early stopping based on validation score", + "docstring": "Callback for early stopping based on validation score", + "code": "class _ValidationScoreCallback:\n \"\"\"Callback for early stopping based on validation score\"\"\"\n\n def __init__(self, estimator, X_val, y_val, sample_weight_val,\n classes=None):\n self.estimator = clone(estimator)\n self.estimator.t_ = 1 # to pass check_is_fitted\n if classes is not None:\n self.estimator.classes_ = classes\n self.X_val = X_val\n self.y_val = y_val\n self.sample_weight_val = sample_weight_val\n\n def __call__(self, coef, intercept):\n est = self.estimator\n est.coef_ = coef.reshape(1, -1)\n est.intercept_ = np.atleast_1d(intercept)\n return est.score(self.X_val, self.y_val, self.sample_weight_val)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/TheilSenRegressor", + "name": "TheilSenRegressor", + "qname": "sklearn.linear_model._theil_sen.TheilSenRegressor", + "decorators": [], + "superclasses": ["RegressorMixin", "LinearModel"], + "methods": [ + "scikit-learn/sklearn.linear_model._theil_sen/TheilSenRegressor/__init__", + "scikit-learn/sklearn.linear_model._theil_sen/TheilSenRegressor/_check_subparams", + "scikit-learn/sklearn.linear_model._theil_sen/TheilSenRegressor/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Theil-Sen Estimator: robust multivariate regression model.\n\nThe algorithm calculates least square solutions on subsets with size\nn_subsamples of the samples in X. Any value of n_subsamples between the\nnumber of features and samples leads to an estimator with a compromise\nbetween robustness and efficiency. Since the number of least square\nsolutions is \"n_samples choose n_subsamples\", it can be extremely large\nand can therefore be limited with max_subpopulation. If this limit is\nreached, the subsets are chosen randomly. In a final step, the spatial\nmedian (or L1 median) is calculated of all least square solutions.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Theil-Sen Estimator: robust multivariate regression model.\n\nThe algorithm calculates least square solutions on subsets with size\nn_subsamples of the samples in X. Any value of n_subsamples between the\nnumber of features and samples leads to an estimator with a compromise\nbetween robustness and efficiency. Since the number of least square\nsolutions is \"n_samples choose n_subsamples\", it can be extremely large\nand can therefore be limited with max_subpopulation. If this limit is\nreached, the subsets are chosen randomly. In a final step, the spatial\nmedian (or L1 median) is calculated of all least square solutions.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations.\n\ncopy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\nmax_subpopulation : int, default=1e4\n Instead of computing with a set of cardinality 'n choose k', where n is\n the number of samples and k is the number of subsamples (at least\n number of features), consider only a stochastic subpopulation of a\n given maximal size if 'n choose k' is larger than max_subpopulation.\n For other than small problem sizes this parameter will determine\n memory usage and runtime if n_subsamples is not changed.\n\nn_subsamples : int, default=None\n Number of samples to calculate the parameters. This is at least the\n number of features (plus 1 if fit_intercept=True) and the number of\n samples as a maximum. A lower number leads to a higher breakdown\n point and a low efficiency while a high number leads to a low\n breakdown point and a high efficiency. If None, take the\n minimum number of subsamples leading to maximal robustness.\n If n_subsamples is set to n_samples, Theil-Sen is identical to least\n squares.\n\nmax_iter : int, default=300\n Maximum number of iterations for the calculation of spatial median.\n\ntol : float, default=1.e-3\n Tolerance when calculating spatial median.\n\nrandom_state : int, RandomState instance or None, default=None\n A random number generator instance to define the state of the random\n permutations generator. Pass an int for reproducible output across\n multiple function calls.\n See :term:`Glossary `\n\nn_jobs : int, default=None\n Number of CPUs to use during the cross validation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : bool, default=False\n Verbose mode when fitting the model.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,)\n Coefficients of the regression model (median of distribution).\n\nintercept_ : float\n Estimated intercept of regression model.\n\nbreakdown_ : float\n Approximated breakdown point.\n\nn_iter_ : int\n Number of iterations needed for the spatial median.\n\nn_subpopulation_ : int\n Number of combinations taken into account from 'n choose k', where n is\n the number of samples and k is the number of subsamples.\n\nExamples\n--------\n>>> from sklearn.linear_model import TheilSenRegressor\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(\n... n_samples=200, n_features=2, noise=4.0, random_state=0)\n>>> reg = TheilSenRegressor(random_state=0).fit(X, y)\n>>> reg.score(X, y)\n0.9884...\n>>> reg.predict(X[:1,])\narray([-31.5871...])\n\nReferences\n----------\n- Theil-Sen Estimators in a Multiple Linear Regression Model, 2009\n Xin Dang, Hanxiang Peng, Xueqin Wang and Heping Zhang\n http://home.olemiss.edu/~xdang/papers/MTSE.pdf", + "code": "class TheilSenRegressor(RegressorMixin, LinearModel):\n \"\"\"Theil-Sen Estimator: robust multivariate regression model.\n\n The algorithm calculates least square solutions on subsets with size\n n_subsamples of the samples in X. Any value of n_subsamples between the\n number of features and samples leads to an estimator with a compromise\n between robustness and efficiency. Since the number of least square\n solutions is \"n_samples choose n_subsamples\", it can be extremely large\n and can therefore be limited with max_subpopulation. If this limit is\n reached, the subsets are chosen randomly. In a final step, the spatial\n median (or L1 median) is calculated of all least square solutions.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n fit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations.\n\n copy_X : bool, default=True\n If True, X will be copied; else, it may be overwritten.\n\n max_subpopulation : int, default=1e4\n Instead of computing with a set of cardinality 'n choose k', where n is\n the number of samples and k is the number of subsamples (at least\n number of features), consider only a stochastic subpopulation of a\n given maximal size if 'n choose k' is larger than max_subpopulation.\n For other than small problem sizes this parameter will determine\n memory usage and runtime if n_subsamples is not changed.\n\n n_subsamples : int, default=None\n Number of samples to calculate the parameters. This is at least the\n number of features (plus 1 if fit_intercept=True) and the number of\n samples as a maximum. A lower number leads to a higher breakdown\n point and a low efficiency while a high number leads to a low\n breakdown point and a high efficiency. If None, take the\n minimum number of subsamples leading to maximal robustness.\n If n_subsamples is set to n_samples, Theil-Sen is identical to least\n squares.\n\n max_iter : int, default=300\n Maximum number of iterations for the calculation of spatial median.\n\n tol : float, default=1.e-3\n Tolerance when calculating spatial median.\n\n random_state : int, RandomState instance or None, default=None\n A random number generator instance to define the state of the random\n permutations generator. Pass an int for reproducible output across\n multiple function calls.\n See :term:`Glossary `\n\n n_jobs : int, default=None\n Number of CPUs to use during the cross validation.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n verbose : bool, default=False\n Verbose mode when fitting the model.\n\n Attributes\n ----------\n coef_ : ndarray of shape (n_features,)\n Coefficients of the regression model (median of distribution).\n\n intercept_ : float\n Estimated intercept of regression model.\n\n breakdown_ : float\n Approximated breakdown point.\n\n n_iter_ : int\n Number of iterations needed for the spatial median.\n\n n_subpopulation_ : int\n Number of combinations taken into account from 'n choose k', where n is\n the number of samples and k is the number of subsamples.\n\n Examples\n --------\n >>> from sklearn.linear_model import TheilSenRegressor\n >>> from sklearn.datasets import make_regression\n >>> X, y = make_regression(\n ... n_samples=200, n_features=2, noise=4.0, random_state=0)\n >>> reg = TheilSenRegressor(random_state=0).fit(X, y)\n >>> reg.score(X, y)\n 0.9884...\n >>> reg.predict(X[:1,])\n array([-31.5871...])\n\n References\n ----------\n - Theil-Sen Estimators in a Multiple Linear Regression Model, 2009\n Xin Dang, Hanxiang Peng, Xueqin Wang and Heping Zhang\n http://home.olemiss.edu/~xdang/papers/MTSE.pdf\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, fit_intercept=True, copy_X=True,\n max_subpopulation=1e4, n_subsamples=None, max_iter=300,\n tol=1.e-3, random_state=None, n_jobs=None, verbose=False):\n self.fit_intercept = fit_intercept\n self.copy_X = copy_X\n self.max_subpopulation = int(max_subpopulation)\n self.n_subsamples = n_subsamples\n self.max_iter = max_iter\n self.tol = tol\n self.random_state = random_state\n self.n_jobs = n_jobs\n self.verbose = verbose\n\n def _check_subparams(self, n_samples, n_features):\n n_subsamples = self.n_subsamples\n\n if self.fit_intercept:\n n_dim = n_features + 1\n else:\n n_dim = n_features\n\n if n_subsamples is not None:\n if n_subsamples > n_samples:\n raise ValueError(\"Invalid parameter since n_subsamples > \"\n \"n_samples ({0} > {1}).\".format(n_subsamples,\n n_samples))\n if n_samples >= n_features:\n if n_dim > n_subsamples:\n plus_1 = \"+1\" if self.fit_intercept else \"\"\n raise ValueError(\"Invalid parameter since n_features{0} \"\n \"> n_subsamples ({1} > {2}).\"\n \"\".format(plus_1, n_dim, n_samples))\n else: # if n_samples < n_features\n if n_subsamples != n_samples:\n raise ValueError(\"Invalid parameter since n_subsamples != \"\n \"n_samples ({0} != {1}) while n_samples \"\n \"< n_features.\".format(n_subsamples,\n n_samples))\n else:\n n_subsamples = min(n_dim, n_samples)\n\n if self.max_subpopulation <= 0:\n raise ValueError(\"Subpopulation must be strictly positive \"\n \"({0} <= 0).\".format(self.max_subpopulation))\n\n all_combinations = max(1, np.rint(binom(n_samples, n_subsamples)))\n n_subpopulation = int(min(self.max_subpopulation, all_combinations))\n\n return n_subsamples, n_subpopulation\n\n def fit(self, X, y):\n \"\"\"Fit linear model.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Training data.\n y : ndarray of shape (n_samples,)\n Target values.\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n random_state = check_random_state(self.random_state)\n X, y = self._validate_data(X, y, y_numeric=True)\n n_samples, n_features = X.shape\n n_subsamples, self.n_subpopulation_ = self._check_subparams(n_samples,\n n_features)\n self.breakdown_ = _breakdown_point(n_samples, n_subsamples)\n\n if self.verbose:\n print(\"Breakdown point: {0}\".format(self.breakdown_))\n print(\"Number of samples: {0}\".format(n_samples))\n tol_outliers = int(self.breakdown_ * n_samples)\n print(\"Tolerable outliers: {0}\".format(tol_outliers))\n print(\"Number of subpopulations: {0}\".format(\n self.n_subpopulation_))\n\n # Determine indices of subpopulation\n if np.rint(binom(n_samples, n_subsamples)) <= self.max_subpopulation:\n indices = list(combinations(range(n_samples), n_subsamples))\n else:\n indices = [random_state.choice(n_samples, size=n_subsamples,\n replace=False)\n for _ in range(self.n_subpopulation_)]\n\n n_jobs = effective_n_jobs(self.n_jobs)\n index_list = np.array_split(indices, n_jobs)\n weights = Parallel(n_jobs=n_jobs,\n verbose=self.verbose)(\n delayed(_lstsq)(X, y, index_list[job], self.fit_intercept)\n for job in range(n_jobs))\n weights = np.vstack(weights)\n self.n_iter_, coefs = _spatial_median(weights,\n max_iter=self.max_iter,\n tol=self.tol)\n\n if self.fit_intercept:\n self.intercept_ = coefs[0]\n self.coef_ = coefs[1:]\n else:\n self.intercept_ = 0.\n self.coef_ = coefs\n\n return self", + "instance_attributes": [ + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "copy_X", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "max_subpopulation", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap", + "name": "Isomap", + "qname": "sklearn.manifold._isomap.Isomap", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.manifold._isomap/Isomap/__init__", + "scikit-learn/sklearn.manifold._isomap/Isomap/_fit_transform", + "scikit-learn/sklearn.manifold._isomap/Isomap/reconstruction_error", + "scikit-learn/sklearn.manifold._isomap/Isomap/fit", + "scikit-learn/sklearn.manifold._isomap/Isomap/fit_transform", + "scikit-learn/sklearn.manifold._isomap/Isomap/transform" + ], + "is_public": false, + "reexported_by": [], + "description": "Isomap Embedding\n\nNon-linear dimensionality reduction through Isometric Mapping\n\nRead more in the :ref:`User Guide `.", + "docstring": "Isomap Embedding\n\nNon-linear dimensionality reduction through Isometric Mapping\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_neighbors : int, default=5\n number of neighbors to consider for each point.\n\nn_components : int, default=2\n number of coordinates for the manifold\n\neigen_solver : {'auto', 'arpack', 'dense'}, default='auto'\n 'auto' : Attempt to choose the most efficient solver\n for the given problem.\n\n 'arpack' : Use Arnoldi decomposition to find the eigenvalues\n and eigenvectors.\n\n 'dense' : Use a direct solver (i.e. LAPACK)\n for the eigenvalue decomposition.\n\ntol : float, default=0\n Convergence tolerance passed to arpack or lobpcg.\n not used if eigen_solver == 'dense'.\n\nmax_iter : int, default=None\n Maximum number of iterations for the arpack solver.\n not used if eigen_solver == 'dense'.\n\npath_method : {'auto', 'FW', 'D'}, default='auto'\n Method to use in finding shortest path.\n\n 'auto' : attempt to choose the best algorithm automatically.\n\n 'FW' : Floyd-Warshall algorithm.\n\n 'D' : Dijkstra's algorithm.\n\nneighbors_algorithm : {'auto', 'brute', 'kd_tree', 'ball_tree'}, default='auto'\n Algorithm to use for nearest neighbors search,\n passed to neighbors.NearestNeighbors instance.\n\nn_jobs : int or None, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nmetric : string, or callable, default=\"minkowski\"\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string or callable, it must be one of\n the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n its metric parameter.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square. X may be a :term:`Glossary `.\n\n .. versionadded:: 0.22\n\np : int, default=2\n Parameter for the Minkowski metric from\n sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n .. versionadded:: 0.22\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nembedding_ : array-like, shape (n_samples, n_components)\n Stores the embedding vectors.\n\nkernel_pca_ : object\n :class:`~sklearn.decomposition.KernelPCA` object used to implement the\n embedding.\n\nnbrs_ : sklearn.neighbors.NearestNeighbors instance\n Stores nearest neighbors instance, including BallTree or KDtree\n if applicable.\n\ndist_matrix_ : array-like, shape (n_samples, n_samples)\n Stores the geodesic distance matrix of training data.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.manifold import Isomap\n>>> X, _ = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> embedding = Isomap(n_components=2)\n>>> X_transformed = embedding.fit_transform(X[:100])\n>>> X_transformed.shape\n(100, 2)\n\nReferences\n----------\n\n.. [1] Tenenbaum, J.B.; De Silva, V.; & Langford, J.C. A global geometric\n framework for nonlinear dimensionality reduction. Science 290 (5500)", + "code": "class Isomap(TransformerMixin, BaseEstimator):\n \"\"\"Isomap Embedding\n\n Non-linear dimensionality reduction through Isometric Mapping\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_neighbors : int, default=5\n number of neighbors to consider for each point.\n\n n_components : int, default=2\n number of coordinates for the manifold\n\n eigen_solver : {'auto', 'arpack', 'dense'}, default='auto'\n 'auto' : Attempt to choose the most efficient solver\n for the given problem.\n\n 'arpack' : Use Arnoldi decomposition to find the eigenvalues\n and eigenvectors.\n\n 'dense' : Use a direct solver (i.e. LAPACK)\n for the eigenvalue decomposition.\n\n tol : float, default=0\n Convergence tolerance passed to arpack or lobpcg.\n not used if eigen_solver == 'dense'.\n\n max_iter : int, default=None\n Maximum number of iterations for the arpack solver.\n not used if eigen_solver == 'dense'.\n\n path_method : {'auto', 'FW', 'D'}, default='auto'\n Method to use in finding shortest path.\n\n 'auto' : attempt to choose the best algorithm automatically.\n\n 'FW' : Floyd-Warshall algorithm.\n\n 'D' : Dijkstra's algorithm.\n\n neighbors_algorithm : {'auto', 'brute', 'kd_tree', 'ball_tree'}, \\\n default='auto'\n Algorithm to use for nearest neighbors search,\n passed to neighbors.NearestNeighbors instance.\n\n n_jobs : int or None, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n metric : string, or callable, default=\"minkowski\"\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string or callable, it must be one of\n the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n its metric parameter.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square. X may be a :term:`Glossary `.\n\n .. versionadded:: 0.22\n\n p : int, default=2\n Parameter for the Minkowski metric from\n sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n .. versionadded:: 0.22\n\n metric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\n .. versionadded:: 0.22\n\n Attributes\n ----------\n embedding_ : array-like, shape (n_samples, n_components)\n Stores the embedding vectors.\n\n kernel_pca_ : object\n :class:`~sklearn.decomposition.KernelPCA` object used to implement the\n embedding.\n\n nbrs_ : sklearn.neighbors.NearestNeighbors instance\n Stores nearest neighbors instance, including BallTree or KDtree\n if applicable.\n\n dist_matrix_ : array-like, shape (n_samples, n_samples)\n Stores the geodesic distance matrix of training data.\n\n Examples\n --------\n >>> from sklearn.datasets import load_digits\n >>> from sklearn.manifold import Isomap\n >>> X, _ = load_digits(return_X_y=True)\n >>> X.shape\n (1797, 64)\n >>> embedding = Isomap(n_components=2)\n >>> X_transformed = embedding.fit_transform(X[:100])\n >>> X_transformed.shape\n (100, 2)\n\n References\n ----------\n\n .. [1] Tenenbaum, J.B.; De Silva, V.; & Langford, J.C. A global geometric\n framework for nonlinear dimensionality reduction. Science 290 (5500)\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, n_neighbors=5, n_components=2, eigen_solver='auto',\n tol=0, max_iter=None, path_method='auto',\n neighbors_algorithm='auto', n_jobs=None, metric='minkowski',\n p=2, metric_params=None):\n self.n_neighbors = n_neighbors\n self.n_components = n_components\n self.eigen_solver = eigen_solver\n self.tol = tol\n self.max_iter = max_iter\n self.path_method = path_method\n self.neighbors_algorithm = neighbors_algorithm\n self.n_jobs = n_jobs\n self.metric = metric\n self.p = p\n self.metric_params = metric_params\n\n def _fit_transform(self, X):\n self.nbrs_ = NearestNeighbors(n_neighbors=self.n_neighbors,\n algorithm=self.neighbors_algorithm,\n metric=self.metric, p=self.p,\n metric_params=self.metric_params,\n n_jobs=self.n_jobs)\n self.nbrs_.fit(X)\n self.n_features_in_ = self.nbrs_.n_features_in_\n\n self.kernel_pca_ = KernelPCA(n_components=self.n_components,\n kernel=\"precomputed\",\n eigen_solver=self.eigen_solver,\n tol=self.tol, max_iter=self.max_iter,\n n_jobs=self.n_jobs)\n\n kng = kneighbors_graph(self.nbrs_, self.n_neighbors,\n metric=self.metric, p=self.p,\n metric_params=self.metric_params,\n mode='distance', n_jobs=self.n_jobs)\n\n self.dist_matrix_ = graph_shortest_path(kng,\n method=self.path_method,\n directed=False)\n G = self.dist_matrix_ ** 2\n G *= -0.5\n\n self.embedding_ = self.kernel_pca_.fit_transform(G)\n\n def reconstruction_error(self):\n \"\"\"Compute the reconstruction error for the embedding.\n\n Returns\n -------\n reconstruction_error : float\n\n Notes\n -----\n The cost function of an isomap embedding is\n\n ``E = frobenius_norm[K(D) - K(D_fit)] / n_samples``\n\n Where D is the matrix of distances for the input data X,\n D_fit is the matrix of distances for the output embedding X_fit,\n and K is the isomap kernel:\n\n ``K(D) = -0.5 * (I - 1/n_samples) * D^2 * (I - 1/n_samples)``\n \"\"\"\n G = -0.5 * self.dist_matrix_ ** 2\n G_center = KernelCenterer().fit_transform(G)\n evals = self.kernel_pca_.lambdas_\n return np.sqrt(np.sum(G_center ** 2) - np.sum(evals ** 2)) / G.shape[0]\n\n def fit(self, X, y=None):\n \"\"\"Compute the embedding vectors for data X\n\n Parameters\n ----------\n X : {array-like, sparse graph, BallTree, KDTree, NearestNeighbors}\n Sample data, shape = (n_samples, n_features), in the form of a\n numpy array, sparse graph, precomputed tree, or NearestNeighbors\n object.\n\n y : Ignored\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n self._fit_transform(X)\n return self\n\n def fit_transform(self, X, y=None):\n \"\"\"Fit the model from data in X and transform X.\n\n Parameters\n ----------\n X : {array-like, sparse graph, BallTree, KDTree}\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\n y : Ignored\n\n Returns\n -------\n X_new : array-like, shape (n_samples, n_components)\n \"\"\"\n self._fit_transform(X)\n return self.embedding_\n\n def transform(self, X):\n \"\"\"Transform X.\n\n This is implemented by linking the points X into the graph of geodesic\n distances of the training data. First the `n_neighbors` nearest\n neighbors of X are found in the training data, and from these the\n shortest geodesic distances from each point in X to each point in\n the training data are computed in order to construct the kernel.\n The embedding of X is the projection of this kernel onto the\n embedding vectors of the training set.\n\n Parameters\n ----------\n X : array-like, shape (n_queries, n_features)\n If neighbors_algorithm='precomputed', X is assumed to be a\n distance matrix or a sparse graph of shape\n (n_queries, n_samples_fit).\n\n Returns\n -------\n X_new : array-like, shape (n_queries, n_components)\n \"\"\"\n check_is_fitted(self)\n distances, indices = self.nbrs_.kneighbors(X, return_distance=True)\n\n # Create the graph of shortest distances from X to\n # training data via the nearest neighbors of X.\n # This can be done as a single array operation, but it potentially\n # takes a lot of memory. To avoid that, use a loop:\n\n n_samples_fit = self.nbrs_.n_samples_fit_\n n_queries = distances.shape[0]\n G_X = np.zeros((n_queries, n_samples_fit))\n for i in range(n_queries):\n G_X[i] = np.min(self.dist_matrix_[indices[i]] +\n distances[i][:, None], 0)\n\n G_X **= 2\n G_X *= -0.5\n\n return self.kernel_pca_.transform(G_X)", + "instance_attributes": [ + { + "name": "n_neighbors", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "n_components", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "eigen_solver", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "path_method", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "neighbors_algorithm", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "metric", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "p", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "kernel_pca_", + "types": { + "kind": "NamedType", + "name": "KernelPCA" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding", + "name": "LocallyLinearEmbedding", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding", + "decorators": [], + "superclasses": ["TransformerMixin", "_UnstableArchMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/__init__", + "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/_fit_transform", + "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/fit", + "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/fit_transform", + "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/transform" + ], + "is_public": false, + "reexported_by": [], + "description": "Locally Linear Embedding\n\nRead more in the :ref:`User Guide `.", + "docstring": "Locally Linear Embedding\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_neighbors : int, default=5\n number of neighbors to consider for each point.\n\nn_components : int, default=2\n number of coordinates for the manifold\n\nreg : float, default=1e-3\n regularization constant, multiplies the trace of the local covariance\n matrix of the distances.\n\neigen_solver : {'auto', 'arpack', 'dense'}, default='auto'\n auto : algorithm will attempt to choose the best method for input data\n\n arpack : use arnoldi iteration in shift-invert mode.\n For this method, M may be a dense matrix, sparse matrix,\n or general linear operator.\n Warning: ARPACK can be unstable for some problems. It is\n best to try several random seeds in order to check results.\n\n dense : use standard dense matrix operations for the eigenvalue\n decomposition. For this method, M must be an array\n or matrix type. This method should be avoided for\n large problems.\n\ntol : float, default=1e-6\n Tolerance for 'arpack' method\n Not used if eigen_solver=='dense'.\n\nmax_iter : int, default=100\n maximum number of iterations for the arpack solver.\n Not used if eigen_solver=='dense'.\n\nmethod : {'standard', 'hessian', 'modified', 'ltsa'}, default='standard'\n - `standard`: use the standard locally linear embedding algorithm. see\n reference [1]_\n - `hessian`: use the Hessian eigenmap method. This method requires\n ``n_neighbors > n_components * (1 + (n_components + 1) / 2``. see\n reference [2]_\n - `modified`: use the modified locally linear embedding algorithm.\n see reference [3]_\n - `ltsa`: use local tangent space alignment algorithm. see\n reference [4]_\n\nhessian_tol : float, default=1e-4\n Tolerance for Hessian eigenmapping method.\n Only used if ``method == 'hessian'``\n\nmodified_tol : float, default=1e-12\n Tolerance for modified LLE method.\n Only used if ``method == 'modified'``\n\nneighbors_algorithm : {'auto', 'brute', 'kd_tree', 'ball_tree'}, default='auto'\n algorithm to use for nearest neighbors search,\n passed to neighbors.NearestNeighbors instance\n\nrandom_state : int, RandomState instance, default=None\n Determines the random number generator when\n ``eigen_solver`` == 'arpack'. Pass an int for reproducible results\n across multiple function calls. See :term: `Glossary `.\n\nn_jobs : int or None, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nembedding_ : array-like, shape [n_samples, n_components]\n Stores the embedding vectors\n\nreconstruction_error_ : float\n Reconstruction error associated with `embedding_`\n\nnbrs_ : NearestNeighbors object\n Stores nearest neighbors instance, including BallTree or KDtree\n if applicable.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.manifold import LocallyLinearEmbedding\n>>> X, _ = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> embedding = LocallyLinearEmbedding(n_components=2)\n>>> X_transformed = embedding.fit_transform(X[:100])\n>>> X_transformed.shape\n(100, 2)\n\nReferences\n----------\n\n.. [1] Roweis, S. & Saul, L. Nonlinear dimensionality reduction\n by locally linear embedding. Science 290:2323 (2000).\n.. [2] Donoho, D. & Grimes, C. Hessian eigenmaps: Locally\n linear embedding techniques for high-dimensional data.\n Proc Natl Acad Sci U S A. 100:5591 (2003).\n.. [3] Zhang, Z. & Wang, J. MLLE: Modified Locally Linear\n Embedding Using Multiple Weights.\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.70.382\n.. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear\n dimensionality reduction via tangent space alignment.\n Journal of Shanghai Univ. 8:406 (2004)", + "code": "class LocallyLinearEmbedding(TransformerMixin,\n _UnstableArchMixin, BaseEstimator):\n \"\"\"Locally Linear Embedding\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_neighbors : int, default=5\n number of neighbors to consider for each point.\n\n n_components : int, default=2\n number of coordinates for the manifold\n\n reg : float, default=1e-3\n regularization constant, multiplies the trace of the local covariance\n matrix of the distances.\n\n eigen_solver : {'auto', 'arpack', 'dense'}, default='auto'\n auto : algorithm will attempt to choose the best method for input data\n\n arpack : use arnoldi iteration in shift-invert mode.\n For this method, M may be a dense matrix, sparse matrix,\n or general linear operator.\n Warning: ARPACK can be unstable for some problems. It is\n best to try several random seeds in order to check results.\n\n dense : use standard dense matrix operations for the eigenvalue\n decomposition. For this method, M must be an array\n or matrix type. This method should be avoided for\n large problems.\n\n tol : float, default=1e-6\n Tolerance for 'arpack' method\n Not used if eigen_solver=='dense'.\n\n max_iter : int, default=100\n maximum number of iterations for the arpack solver.\n Not used if eigen_solver=='dense'.\n\n method : {'standard', 'hessian', 'modified', 'ltsa'}, default='standard'\n - `standard`: use the standard locally linear embedding algorithm. see\n reference [1]_\n - `hessian`: use the Hessian eigenmap method. This method requires\n ``n_neighbors > n_components * (1 + (n_components + 1) / 2``. see\n reference [2]_\n - `modified`: use the modified locally linear embedding algorithm.\n see reference [3]_\n - `ltsa`: use local tangent space alignment algorithm. see\n reference [4]_\n\n hessian_tol : float, default=1e-4\n Tolerance for Hessian eigenmapping method.\n Only used if ``method == 'hessian'``\n\n modified_tol : float, default=1e-12\n Tolerance for modified LLE method.\n Only used if ``method == 'modified'``\n\n neighbors_algorithm : {'auto', 'brute', 'kd_tree', 'ball_tree'}, \\\n default='auto'\n algorithm to use for nearest neighbors search,\n passed to neighbors.NearestNeighbors instance\n\n random_state : int, RandomState instance, default=None\n Determines the random number generator when\n ``eigen_solver`` == 'arpack'. Pass an int for reproducible results\n across multiple function calls. See :term: `Glossary `.\n\n n_jobs : int or None, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n Attributes\n ----------\n embedding_ : array-like, shape [n_samples, n_components]\n Stores the embedding vectors\n\n reconstruction_error_ : float\n Reconstruction error associated with `embedding_`\n\n nbrs_ : NearestNeighbors object\n Stores nearest neighbors instance, including BallTree or KDtree\n if applicable.\n\n Examples\n --------\n >>> from sklearn.datasets import load_digits\n >>> from sklearn.manifold import LocallyLinearEmbedding\n >>> X, _ = load_digits(return_X_y=True)\n >>> X.shape\n (1797, 64)\n >>> embedding = LocallyLinearEmbedding(n_components=2)\n >>> X_transformed = embedding.fit_transform(X[:100])\n >>> X_transformed.shape\n (100, 2)\n\n References\n ----------\n\n .. [1] Roweis, S. & Saul, L. Nonlinear dimensionality reduction\n by locally linear embedding. Science 290:2323 (2000).\n .. [2] Donoho, D. & Grimes, C. Hessian eigenmaps: Locally\n linear embedding techniques for high-dimensional data.\n Proc Natl Acad Sci U S A. 100:5591 (2003).\n .. [3] Zhang, Z. & Wang, J. MLLE: Modified Locally Linear\n Embedding Using Multiple Weights.\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.70.382\n .. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear\n dimensionality reduction via tangent space alignment.\n Journal of Shanghai Univ. 8:406 (2004)\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, n_neighbors=5, n_components=2, reg=1E-3,\n eigen_solver='auto', tol=1E-6, max_iter=100,\n method='standard', hessian_tol=1E-4, modified_tol=1E-12,\n neighbors_algorithm='auto', random_state=None, n_jobs=None):\n self.n_neighbors = n_neighbors\n self.n_components = n_components\n self.reg = reg\n self.eigen_solver = eigen_solver\n self.tol = tol\n self.max_iter = max_iter\n self.method = method\n self.hessian_tol = hessian_tol\n self.modified_tol = modified_tol\n self.random_state = random_state\n self.neighbors_algorithm = neighbors_algorithm\n self.n_jobs = n_jobs\n\n def _fit_transform(self, X):\n self.nbrs_ = NearestNeighbors(n_neighbors=self.n_neighbors,\n algorithm=self.neighbors_algorithm,\n n_jobs=self.n_jobs)\n\n random_state = check_random_state(self.random_state)\n X = self._validate_data(X, dtype=float)\n self.nbrs_.fit(X)\n self.embedding_, self.reconstruction_error_ = \\\n locally_linear_embedding(\n X=self.nbrs_, n_neighbors=self.n_neighbors,\n n_components=self.n_components,\n eigen_solver=self.eigen_solver, tol=self.tol,\n max_iter=self.max_iter, method=self.method,\n hessian_tol=self.hessian_tol, modified_tol=self.modified_tol,\n random_state=random_state, reg=self.reg, n_jobs=self.n_jobs)\n\n def fit(self, X, y=None):\n \"\"\"Compute the embedding vectors for data X\n\n Parameters\n ----------\n X : array-like of shape [n_samples, n_features]\n training set.\n\n y : Ignored\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n self._fit_transform(X)\n return self\n\n def fit_transform(self, X, y=None):\n \"\"\"Compute the embedding vectors for data X and transform X.\n\n Parameters\n ----------\n X : array-like of shape [n_samples, n_features]\n training set.\n\n y : Ignored\n\n Returns\n -------\n X_new : array-like, shape (n_samples, n_components)\n \"\"\"\n self._fit_transform(X)\n return self.embedding_\n\n def transform(self, X):\n \"\"\"\n Transform new points into embedding space.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n X_new : array, shape = [n_samples, n_components]\n\n Notes\n -----\n Because of scaling performed by this method, it is discouraged to use\n it together with methods that are not scale-invariant (like SVMs)\n \"\"\"\n check_is_fitted(self)\n\n X = check_array(X)\n ind = self.nbrs_.kneighbors(X, n_neighbors=self.n_neighbors,\n return_distance=False)\n weights = barycenter_weights(X, self.nbrs_._fit_X, ind, reg=self.reg)\n X_new = np.empty((X.shape[0], self.n_components))\n for i in range(X.shape[0]):\n X_new[i] = np.dot(self.embedding_[ind[i]].T, weights[i])\n return X_new", + "instance_attributes": [ + { + "name": "n_neighbors", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "n_components", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "reg", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "eigen_solver", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "method", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "hessian_tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "modified_tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "neighbors_algorithm", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.manifold._mds/MDS", + "name": "MDS", + "qname": "sklearn.manifold._mds.MDS", + "decorators": [], + "superclasses": ["BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.manifold._mds/MDS/__init__", + "scikit-learn/sklearn.manifold._mds/MDS/_more_tags", + "scikit-learn/sklearn.manifold._mds/MDS/_pairwise@getter", + "scikit-learn/sklearn.manifold._mds/MDS/fit", + "scikit-learn/sklearn.manifold._mds/MDS/fit_transform" + ], + "is_public": false, + "reexported_by": [], + "description": "Multidimensional scaling.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Multidimensional scaling.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=2\n Number of dimensions in which to immerse the dissimilarities.\n\nmetric : bool, default=True\n If ``True``, perform metric MDS; otherwise, perform nonmetric MDS.\n\nn_init : int, default=4\n Number of times the SMACOF algorithm will be run with different\n initializations. The final results will be the best output of the runs,\n determined by the run with the smallest final stress.\n\nmax_iter : int, default=300\n Maximum number of iterations of the SMACOF algorithm for a single run.\n\nverbose : int, default=0\n Level of verbosity.\n\neps : float, default=1e-3\n Relative tolerance with respect to stress at which to declare\n convergence.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. If multiple\n initializations are used (``n_init``), each run of the algorithm is\n computed in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the random number generator used to initialize the centers.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\ndissimilarity : {'euclidean', 'precomputed'}, default='euclidean'\n Dissimilarity measure to use:\n\n - 'euclidean':\n Pairwise Euclidean distances between points in the dataset.\n\n - 'precomputed':\n Pre-computed dissimilarities are passed directly to ``fit`` and\n ``fit_transform``.\n\nAttributes\n----------\nembedding_ : ndarray of shape (n_samples, n_components)\n Stores the position of the dataset in the embedding space.\n\nstress_ : float\n The final value of the stress (sum of squared distance of the\n disparities and the distances for all constrained points).\n\ndissimilarity_matrix_ : ndarray of shape (n_samples, n_samples)\n Pairwise dissimilarities between the points. Symmetric matrix that:\n\n - either uses a custom dissimilarity matrix by setting `dissimilarity`\n to 'precomputed';\n - or constructs a dissimilarity matrix from data using\n Euclidean distances.\n\nn_iter_ : int\n The number of iterations corresponding to the best stress.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.manifold import MDS\n>>> X, _ = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> embedding = MDS(n_components=2)\n>>> X_transformed = embedding.fit_transform(X[:100])\n>>> X_transformed.shape\n(100, 2)\n\nReferences\n----------\n\"Modern Multidimensional Scaling - Theory and Applications\" Borg, I.;\nGroenen P. Springer Series in Statistics (1997)\n\n\"Nonmetric multidimensional scaling: a numerical method\" Kruskal, J.\nPsychometrika, 29 (1964)\n\n\"Multidimensional scaling by optimizing goodness of fit to a nonmetric\nhypothesis\" Kruskal, J. Psychometrika, 29, (1964)", + "code": "class MDS(BaseEstimator):\n \"\"\"Multidimensional scaling.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_components : int, default=2\n Number of dimensions in which to immerse the dissimilarities.\n\n metric : bool, default=True\n If ``True``, perform metric MDS; otherwise, perform nonmetric MDS.\n\n n_init : int, default=4\n Number of times the SMACOF algorithm will be run with different\n initializations. The final results will be the best output of the runs,\n determined by the run with the smallest final stress.\n\n max_iter : int, default=300\n Maximum number of iterations of the SMACOF algorithm for a single run.\n\n verbose : int, default=0\n Level of verbosity.\n\n eps : float, default=1e-3\n Relative tolerance with respect to stress at which to declare\n convergence.\n\n n_jobs : int, default=None\n The number of jobs to use for the computation. If multiple\n initializations are used (``n_init``), each run of the algorithm is\n computed in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n random_state : int, RandomState instance or None, default=None\n Determines the random number generator used to initialize the centers.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\n dissimilarity : {'euclidean', 'precomputed'}, default='euclidean'\n Dissimilarity measure to use:\n\n - 'euclidean':\n Pairwise Euclidean distances between points in the dataset.\n\n - 'precomputed':\n Pre-computed dissimilarities are passed directly to ``fit`` and\n ``fit_transform``.\n\n Attributes\n ----------\n embedding_ : ndarray of shape (n_samples, n_components)\n Stores the position of the dataset in the embedding space.\n\n stress_ : float\n The final value of the stress (sum of squared distance of the\n disparities and the distances for all constrained points).\n\n dissimilarity_matrix_ : ndarray of shape (n_samples, n_samples)\n Pairwise dissimilarities between the points. Symmetric matrix that:\n\n - either uses a custom dissimilarity matrix by setting `dissimilarity`\n to 'precomputed';\n - or constructs a dissimilarity matrix from data using\n Euclidean distances.\n\n n_iter_ : int\n The number of iterations corresponding to the best stress.\n\n Examples\n --------\n >>> from sklearn.datasets import load_digits\n >>> from sklearn.manifold import MDS\n >>> X, _ = load_digits(return_X_y=True)\n >>> X.shape\n (1797, 64)\n >>> embedding = MDS(n_components=2)\n >>> X_transformed = embedding.fit_transform(X[:100])\n >>> X_transformed.shape\n (100, 2)\n\n References\n ----------\n \"Modern Multidimensional Scaling - Theory and Applications\" Borg, I.;\n Groenen P. Springer Series in Statistics (1997)\n\n \"Nonmetric multidimensional scaling: a numerical method\" Kruskal, J.\n Psychometrika, 29 (1964)\n\n \"Multidimensional scaling by optimizing goodness of fit to a nonmetric\n hypothesis\" Kruskal, J. Psychometrika, 29, (1964)\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_components=2, *, metric=True, n_init=4,\n max_iter=300, verbose=0, eps=1e-3, n_jobs=None,\n random_state=None, dissimilarity=\"euclidean\"):\n self.n_components = n_components\n self.dissimilarity = dissimilarity\n self.metric = metric\n self.n_init = n_init\n self.max_iter = max_iter\n self.eps = eps\n self.verbose = verbose\n self.n_jobs = n_jobs\n self.random_state = random_state\n\n def _more_tags(self):\n return {'pairwise': self.dissimilarity == 'precomputed'}\n\n # TODO: Remove in 1.1\n # mypy error: Decorated property not supported\n @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n return self.dissimilarity == \"precomputed\"\n\n def fit(self, X, y=None, init=None):\n \"\"\"\n Computes the position of the points in the embedding space.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or \\\n (n_samples, n_samples)\n Input data. If ``dissimilarity=='precomputed'``, the input should\n be the dissimilarity matrix.\n\n y : Ignored\n\n init : ndarray of shape (n_samples,), default=None\n Starting configuration of the embedding to initialize the SMACOF\n algorithm. By default, the algorithm is initialized with a randomly\n chosen array.\n \"\"\"\n self.fit_transform(X, init=init)\n return self\n\n def fit_transform(self, X, y=None, init=None):\n \"\"\"\n Fit the data from X, and returns the embedded coordinates.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or \\\n (n_samples, n_samples)\n Input data. If ``dissimilarity=='precomputed'``, the input should\n be the dissimilarity matrix.\n\n y : Ignored\n\n init : ndarray of shape (n_samples,), default=None\n Starting configuration of the embedding to initialize the SMACOF\n algorithm. By default, the algorithm is initialized with a randomly\n chosen array.\n \"\"\"\n X = self._validate_data(X)\n if X.shape[0] == X.shape[1] and self.dissimilarity != \"precomputed\":\n warnings.warn(\"The MDS API has changed. ``fit`` now constructs an\"\n \" dissimilarity matrix from data. To use a custom \"\n \"dissimilarity matrix, set \"\n \"``dissimilarity='precomputed'``.\")\n\n if self.dissimilarity == \"precomputed\":\n self.dissimilarity_matrix_ = X\n elif self.dissimilarity == \"euclidean\":\n self.dissimilarity_matrix_ = euclidean_distances(X)\n else:\n raise ValueError(\"Proximity must be 'precomputed' or 'euclidean'.\"\n \" Got %s instead\" % str(self.dissimilarity))\n\n self.embedding_, self.stress_, self.n_iter_ = smacof(\n self.dissimilarity_matrix_, metric=self.metric,\n n_components=self.n_components, init=init, n_init=self.n_init,\n n_jobs=self.n_jobs, max_iter=self.max_iter, verbose=self.verbose,\n eps=self.eps, random_state=self.random_state,\n return_n_iter=True)\n\n return self.embedding_", + "instance_attributes": [ + { + "name": "n_components", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "dissimilarity", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "metric", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "n_init", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "eps", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding", + "name": "SpectralEmbedding", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding", + "decorators": [], + "superclasses": ["BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/__init__", + "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/_more_tags", + "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/_pairwise@getter", + "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/_get_affinity_matrix", + "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/fit", + "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/fit_transform" + ], + "is_public": false, + "reexported_by": [], + "description": "Spectral embedding for non-linear dimensionality reduction.\n\nForms an affinity matrix given by the specified function and\napplies spectral decomposition to the corresponding graph laplacian.\nThe resulting transformation is given by the value of the\neigenvectors for each data point.\n\nNote : Laplacian Eigenmaps is the actual algorithm implemented here.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Spectral embedding for non-linear dimensionality reduction.\n\nForms an affinity matrix given by the specified function and\napplies spectral decomposition to the corresponding graph laplacian.\nThe resulting transformation is given by the value of the\neigenvectors for each data point.\n\nNote : Laplacian Eigenmaps is the actual algorithm implemented here.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=2\n The dimension of the projected subspace.\n\naffinity : {'nearest_neighbors', 'rbf', 'precomputed', 'precomputed_nearest_neighbors'} or callable, default='nearest_neighbors'\n How to construct the affinity matrix.\n - 'nearest_neighbors' : construct the affinity matrix by computing a\n graph of nearest neighbors.\n - 'rbf' : construct the affinity matrix by computing a radial basis\n function (RBF) kernel.\n - 'precomputed' : interpret ``X`` as a precomputed affinity matrix.\n - 'precomputed_nearest_neighbors' : interpret ``X`` as a sparse graph\n of precomputed nearest neighbors, and constructs the affinity matrix\n by selecting the ``n_neighbors`` nearest neighbors.\n - callable : use passed in function as affinity\n the function takes in data matrix (n_samples, n_features)\n and return affinity matrix (n_samples, n_samples).\n\ngamma : float, default=None\n Kernel coefficient for rbf kernel. If None, gamma will be set to\n 1/n_features.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the random number generator used for the initialization of\n the lobpcg eigenvectors when ``solver`` == 'amg'. Pass an int for\n reproducible results across multiple function calls.\n See :term: `Glossary `.\n\neigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n The eigenvalue decomposition strategy to use. AMG requires pyamg\n to be installed. It can be faster on very large, sparse problems.\n If None, then ``'arpack'`` is used.\n\nn_neighbors : int, default=None\n Number of nearest neighbors for nearest_neighbors graph building.\n If None, n_neighbors will be set to max(n_samples/10, 1).\n\nn_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nembedding_ : ndarray of shape (n_samples, n_components)\n Spectral embedding of the training matrix.\n\naffinity_matrix_ : ndarray of shape (n_samples, n_samples)\n Affinity_matrix constructed from samples or precomputed.\n\nn_neighbors_ : int\n Number of nearest neighbors effectively used.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.manifold import SpectralEmbedding\n>>> X, _ = load_digits(return_X_y=True)\n>>> X.shape\n(1797, 64)\n>>> embedding = SpectralEmbedding(n_components=2)\n>>> X_transformed = embedding.fit_transform(X[:100])\n>>> X_transformed.shape\n(100, 2)\n\nReferences\n----------\n\n- A Tutorial on Spectral Clustering, 2007\n Ulrike von Luxburg\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323\n\n- On Spectral Clustering: Analysis and an algorithm, 2001\n Andrew Y. Ng, Michael I. Jordan, Yair Weiss\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.19.8100\n\n- Normalized cuts and image segmentation, 2000\n Jianbo Shi, Jitendra Malik\n http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.2324", + "code": "class SpectralEmbedding(BaseEstimator):\n \"\"\"Spectral embedding for non-linear dimensionality reduction.\n\n Forms an affinity matrix given by the specified function and\n applies spectral decomposition to the corresponding graph laplacian.\n The resulting transformation is given by the value of the\n eigenvectors for each data point.\n\n Note : Laplacian Eigenmaps is the actual algorithm implemented here.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_components : int, default=2\n The dimension of the projected subspace.\n\n affinity : {'nearest_neighbors', 'rbf', 'precomputed', \\\n 'precomputed_nearest_neighbors'} or callable, \\\n default='nearest_neighbors'\n How to construct the affinity matrix.\n - 'nearest_neighbors' : construct the affinity matrix by computing a\n graph of nearest neighbors.\n - 'rbf' : construct the affinity matrix by computing a radial basis\n function (RBF) kernel.\n - 'precomputed' : interpret ``X`` as a precomputed affinity matrix.\n - 'precomputed_nearest_neighbors' : interpret ``X`` as a sparse graph\n of precomputed nearest neighbors, and constructs the affinity matrix\n by selecting the ``n_neighbors`` nearest neighbors.\n - callable : use passed in function as affinity\n the function takes in data matrix (n_samples, n_features)\n and return affinity matrix (n_samples, n_samples).\n\n gamma : float, default=None\n Kernel coefficient for rbf kernel. If None, gamma will be set to\n 1/n_features.\n\n random_state : int, RandomState instance or None, default=None\n Determines the random number generator used for the initialization of\n the lobpcg eigenvectors when ``solver`` == 'amg'. Pass an int for\n reproducible results across multiple function calls.\n See :term: `Glossary `.\n\n eigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n The eigenvalue decomposition strategy to use. AMG requires pyamg\n to be installed. It can be faster on very large, sparse problems.\n If None, then ``'arpack'`` is used.\n\n n_neighbors : int, default=None\n Number of nearest neighbors for nearest_neighbors graph building.\n If None, n_neighbors will be set to max(n_samples/10, 1).\n\n n_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n Attributes\n ----------\n embedding_ : ndarray of shape (n_samples, n_components)\n Spectral embedding of the training matrix.\n\n affinity_matrix_ : ndarray of shape (n_samples, n_samples)\n Affinity_matrix constructed from samples or precomputed.\n\n n_neighbors_ : int\n Number of nearest neighbors effectively used.\n\n Examples\n --------\n >>> from sklearn.datasets import load_digits\n >>> from sklearn.manifold import SpectralEmbedding\n >>> X, _ = load_digits(return_X_y=True)\n >>> X.shape\n (1797, 64)\n >>> embedding = SpectralEmbedding(n_components=2)\n >>> X_transformed = embedding.fit_transform(X[:100])\n >>> X_transformed.shape\n (100, 2)\n\n References\n ----------\n\n - A Tutorial on Spectral Clustering, 2007\n Ulrike von Luxburg\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323\n\n - On Spectral Clustering: Analysis and an algorithm, 2001\n Andrew Y. Ng, Michael I. Jordan, Yair Weiss\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.19.8100\n\n - Normalized cuts and image segmentation, 2000\n Jianbo Shi, Jitendra Malik\n http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.2324\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_components=2, *, affinity=\"nearest_neighbors\",\n gamma=None, random_state=None, eigen_solver=None,\n n_neighbors=None, n_jobs=None):\n self.n_components = n_components\n self.affinity = affinity\n self.gamma = gamma\n self.random_state = random_state\n self.eigen_solver = eigen_solver\n self.n_neighbors = n_neighbors\n self.n_jobs = n_jobs\n\n def _more_tags(self):\n return {'pairwise': self.affinity in [\"precomputed\",\n \"precomputed_nearest_neighbors\"]}\n\n # TODO: Remove in 1.1\n # mypy error: Decorated property not supported\n @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n return self.affinity in [\"precomputed\",\n \"precomputed_nearest_neighbors\"]\n\n def _get_affinity_matrix(self, X, Y=None):\n \"\"\"Calculate the affinity matrix from data\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples\n and n_features is the number of features.\n\n If affinity is \"precomputed\"\n X : array-like of shape (n_samples, n_samples),\n Interpret X as precomputed adjacency graph computed from\n samples.\n\n Y: Ignored\n\n Returns\n -------\n affinity_matrix of shape (n_samples, n_samples)\n \"\"\"\n if self.affinity == 'precomputed':\n self.affinity_matrix_ = X\n return self.affinity_matrix_\n if self.affinity == 'precomputed_nearest_neighbors':\n estimator = NearestNeighbors(n_neighbors=self.n_neighbors,\n n_jobs=self.n_jobs,\n metric=\"precomputed\").fit(X)\n connectivity = estimator.kneighbors_graph(X=X, mode='connectivity')\n self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)\n return self.affinity_matrix_\n if self.affinity == 'nearest_neighbors':\n if sparse.issparse(X):\n warnings.warn(\"Nearest neighbors affinity currently does \"\n \"not support sparse input, falling back to \"\n \"rbf affinity\")\n self.affinity = \"rbf\"\n else:\n self.n_neighbors_ = (self.n_neighbors\n if self.n_neighbors is not None\n else max(int(X.shape[0] / 10), 1))\n self.affinity_matrix_ = kneighbors_graph(X, self.n_neighbors_,\n include_self=True,\n n_jobs=self.n_jobs)\n # currently only symmetric affinity_matrix supported\n self.affinity_matrix_ = 0.5 * (self.affinity_matrix_ +\n self.affinity_matrix_.T)\n return self.affinity_matrix_\n if self.affinity == 'rbf':\n self.gamma_ = (self.gamma\n if self.gamma is not None else 1.0 / X.shape[1])\n self.affinity_matrix_ = rbf_kernel(X, gamma=self.gamma_)\n return self.affinity_matrix_\n self.affinity_matrix_ = self.affinity(X)\n return self.affinity_matrix_\n\n def fit(self, X, y=None):\n \"\"\"Fit the model from data in X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples\n and n_features is the number of features.\n\n If affinity is \"precomputed\"\n X : {array-like, sparse matrix}, shape (n_samples, n_samples),\n Interpret X as precomputed adjacency graph computed from\n samples.\n\n y : Ignored\n\n Returns\n -------\n self : object\n Returns the instance itself.\n \"\"\"\n\n X = self._validate_data(X, accept_sparse='csr', ensure_min_samples=2,\n estimator=self)\n\n random_state = check_random_state(self.random_state)\n if isinstance(self.affinity, str):\n if self.affinity not in {\"nearest_neighbors\", \"rbf\", \"precomputed\",\n \"precomputed_nearest_neighbors\"}:\n raise ValueError((\"%s is not a valid affinity. Expected \"\n \"'precomputed', 'rbf', 'nearest_neighbors' \"\n \"or a callable.\") % self.affinity)\n elif not callable(self.affinity):\n raise ValueError((\"'affinity' is expected to be an affinity \"\n \"name or a callable. Got: %s\") % self.affinity)\n\n affinity_matrix = self._get_affinity_matrix(X)\n self.embedding_ = spectral_embedding(affinity_matrix,\n n_components=self.n_components,\n eigen_solver=self.eigen_solver,\n random_state=random_state)\n return self\n\n def fit_transform(self, X, y=None):\n \"\"\"Fit the model from data in X and transform X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples\n and n_features is the number of features.\n\n If affinity is \"precomputed\"\n X : {array-like, sparse matrix} of shape (n_samples, n_samples),\n Interpret X as precomputed adjacency graph computed from\n samples.\n\n y : Ignored\n\n Returns\n -------\n X_new : array-like of shape (n_samples, n_components)\n \"\"\"\n self.fit(X)\n return self.embedding_", + "instance_attributes": [ + { + "name": "n_components", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "affinity", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE", + "name": "TSNE", + "qname": "sklearn.manifold._t_sne.TSNE", + "decorators": [], + "superclasses": ["BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.manifold._t_sne/TSNE/__init__", + "scikit-learn/sklearn.manifold._t_sne/TSNE/_fit", + "scikit-learn/sklearn.manifold._t_sne/TSNE/_tsne", + "scikit-learn/sklearn.manifold._t_sne/TSNE/fit_transform", + "scikit-learn/sklearn.manifold._t_sne/TSNE/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "t-distributed Stochastic Neighbor Embedding.\n\nt-SNE [1] is a tool to visualize high-dimensional data. It converts\nsimilarities between data points to joint probabilities and tries\nto minimize the Kullback-Leibler divergence between the joint\nprobabilities of the low-dimensional embedding and the\nhigh-dimensional data. t-SNE has a cost function that is not convex,\ni.e. with different initializations we can get different results.\n\nIt is highly recommended to use another dimensionality reduction\nmethod (e.g. PCA for dense data or TruncatedSVD for sparse data)\nto reduce the number of dimensions to a reasonable amount (e.g. 50)\nif the number of features is very high. This will suppress some\nnoise and speed up the computation of pairwise distances between\nsamples. For more tips see Laurens van der Maaten's FAQ [2].\n\nRead more in the :ref:`User Guide `.", + "docstring": "t-distributed Stochastic Neighbor Embedding.\n\nt-SNE [1] is a tool to visualize high-dimensional data. It converts\nsimilarities between data points to joint probabilities and tries\nto minimize the Kullback-Leibler divergence between the joint\nprobabilities of the low-dimensional embedding and the\nhigh-dimensional data. t-SNE has a cost function that is not convex,\ni.e. with different initializations we can get different results.\n\nIt is highly recommended to use another dimensionality reduction\nmethod (e.g. PCA for dense data or TruncatedSVD for sparse data)\nto reduce the number of dimensions to a reasonable amount (e.g. 50)\nif the number of features is very high. This will suppress some\nnoise and speed up the computation of pairwise distances between\nsamples. For more tips see Laurens van der Maaten's FAQ [2].\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=2\n Dimension of the embedded space.\n\nperplexity : float, default=30.0\n The perplexity is related to the number of nearest neighbors that\n is used in other manifold learning algorithms. Larger datasets\n usually require a larger perplexity. Consider selecting a value\n between 5 and 50. Different values can result in significantly\n different results.\n\nearly_exaggeration : float, default=12.0\n Controls how tight natural clusters in the original space are in\n the embedded space and how much space will be between them. For\n larger values, the space between natural clusters will be larger\n in the embedded space. Again, the choice of this parameter is not\n very critical. If the cost function increases during initial\n optimization, the early exaggeration factor or the learning rate\n might be too high.\n\nlearning_rate : float, default=200.0\n The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If\n the learning rate is too high, the data may look like a 'ball' with any\n point approximately equidistant from its nearest neighbours. If the\n learning rate is too low, most points may look compressed in a dense\n cloud with few outliers. If the cost function gets stuck in a bad local\n minimum increasing the learning rate may help.\n\nn_iter : int, default=1000\n Maximum number of iterations for the optimization. Should be at\n least 250.\n\nn_iter_without_progress : int, default=300\n Maximum number of iterations without progress before we abort the\n optimization, used after 250 initial iterations with early\n exaggeration. Note that progress is only checked every 50 iterations so\n this value is rounded to the next multiple of 50.\n\n .. versionadded:: 0.17\n parameter *n_iter_without_progress* to control stopping criteria.\n\nmin_grad_norm : float, default=1e-7\n If the gradient norm is below this threshold, the optimization will\n be stopped.\n\nmetric : str or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string, it must be one of the options\n allowed by scipy.spatial.distance.pdist for its metric parameter, or\n a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS.\n If metric is \"precomputed\", X is assumed to be a distance matrix.\n Alternatively, if metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays from X as input and return a value indicating\n the distance between them. The default is \"euclidean\" which is\n interpreted as squared euclidean distance.\n\ninit : {'random', 'pca'} or ndarray of shape (n_samples, n_components), default='random'\n Initialization of embedding. Possible options are 'random', 'pca',\n and a numpy array of shape (n_samples, n_components).\n PCA initialization cannot be used with precomputed distances and is\n usually more globally stable than random initialization.\n\nverbose : int, default=0\n Verbosity level.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the random number generator. Pass an int for reproducible\n results across multiple function calls. Note that different\n initializations might result in different local minima of the cost\n function. See :term: `Glossary `.\n\nmethod : str, default='barnes_hut'\n By default the gradient calculation algorithm uses Barnes-Hut\n approximation running in O(NlogN) time. method='exact'\n will run on the slower, but exact, algorithm in O(N^2) time. The\n exact algorithm should be used when nearest-neighbor errors need\n to be better than 3%. However, the exact method cannot scale to\n millions of examples.\n\n .. versionadded:: 0.17\n Approximate optimization *method* via the Barnes-Hut.\n\nangle : float, default=0.5\n Only used if method='barnes_hut'\n This is the trade-off between speed and accuracy for Barnes-Hut T-SNE.\n 'angle' is the angular size (referred to as theta in [3]) of a distant\n node as measured from a point. If this size is below 'angle' then it is\n used as a summary node of all points contained within it.\n This method is not very sensitive to changes in this parameter\n in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing\n computation time and angle greater 0.8 has quickly increasing error.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search. This parameter\n has no impact when ``metric=\"precomputed\"`` or\n (``metric=\"euclidean\"`` and ``method=\"exact\"``).\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.22\n\nsquare_distances : True or 'legacy', default='legacy'\n Whether TSNE should square the distance values. ``'legacy'`` means\n that distance values are squared only when ``metric=\"euclidean\"``.\n ``True`` means that distance values are squared for all metrics.\n\n .. versionadded:: 0.24\n Added to provide backward compatibility during deprecation of\n legacy squaring behavior.\n .. deprecated:: 0.24\n Legacy squaring behavior was deprecated in 0.24. The ``'legacy'``\n value will be removed in 1.1 (renaming of 0.26), at which point the\n default value will change to ``True``.\n\nAttributes\n----------\nembedding_ : array-like of shape (n_samples, n_components)\n Stores the embedding vectors.\n\nkl_divergence_ : float\n Kullback-Leibler divergence after optimization.\n\nn_iter_ : int\n Number of iterations run.\n\nExamples\n--------\n\n>>> import numpy as np\n>>> from sklearn.manifold import TSNE\n>>> X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])\n>>> X_embedded = TSNE(n_components=2).fit_transform(X)\n>>> X_embedded.shape\n(4, 2)\n\nReferences\n----------\n\n[1] van der Maaten, L.J.P.; Hinton, G.E. Visualizing High-Dimensional Data\n Using t-SNE. Journal of Machine Learning Research 9:2579-2605, 2008.\n\n[2] van der Maaten, L.J.P. t-Distributed Stochastic Neighbor Embedding\n https://lvdmaaten.github.io/tsne/\n\n[3] L.J.P. van der Maaten. Accelerating t-SNE using Tree-Based Algorithms.\n Journal of Machine Learning Research 15(Oct):3221-3245, 2014.\n https://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf", + "code": "class TSNE(BaseEstimator):\n \"\"\"t-distributed Stochastic Neighbor Embedding.\n\n t-SNE [1] is a tool to visualize high-dimensional data. It converts\n similarities between data points to joint probabilities and tries\n to minimize the Kullback-Leibler divergence between the joint\n probabilities of the low-dimensional embedding and the\n high-dimensional data. t-SNE has a cost function that is not convex,\n i.e. with different initializations we can get different results.\n\n It is highly recommended to use another dimensionality reduction\n method (e.g. PCA for dense data or TruncatedSVD for sparse data)\n to reduce the number of dimensions to a reasonable amount (e.g. 50)\n if the number of features is very high. This will suppress some\n noise and speed up the computation of pairwise distances between\n samples. For more tips see Laurens van der Maaten's FAQ [2].\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_components : int, default=2\n Dimension of the embedded space.\n\n perplexity : float, default=30.0\n The perplexity is related to the number of nearest neighbors that\n is used in other manifold learning algorithms. Larger datasets\n usually require a larger perplexity. Consider selecting a value\n between 5 and 50. Different values can result in significantly\n different results.\n\n early_exaggeration : float, default=12.0\n Controls how tight natural clusters in the original space are in\n the embedded space and how much space will be between them. For\n larger values, the space between natural clusters will be larger\n in the embedded space. Again, the choice of this parameter is not\n very critical. If the cost function increases during initial\n optimization, the early exaggeration factor or the learning rate\n might be too high.\n\n learning_rate : float, default=200.0\n The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If\n the learning rate is too high, the data may look like a 'ball' with any\n point approximately equidistant from its nearest neighbours. If the\n learning rate is too low, most points may look compressed in a dense\n cloud with few outliers. If the cost function gets stuck in a bad local\n minimum increasing the learning rate may help.\n\n n_iter : int, default=1000\n Maximum number of iterations for the optimization. Should be at\n least 250.\n\n n_iter_without_progress : int, default=300\n Maximum number of iterations without progress before we abort the\n optimization, used after 250 initial iterations with early\n exaggeration. Note that progress is only checked every 50 iterations so\n this value is rounded to the next multiple of 50.\n\n .. versionadded:: 0.17\n parameter *n_iter_without_progress* to control stopping criteria.\n\n min_grad_norm : float, default=1e-7\n If the gradient norm is below this threshold, the optimization will\n be stopped.\n\n metric : str or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string, it must be one of the options\n allowed by scipy.spatial.distance.pdist for its metric parameter, or\n a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS.\n If metric is \"precomputed\", X is assumed to be a distance matrix.\n Alternatively, if metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays from X as input and return a value indicating\n the distance between them. The default is \"euclidean\" which is\n interpreted as squared euclidean distance.\n\n init : {'random', 'pca'} or ndarray of shape (n_samples, n_components), \\\n default='random'\n Initialization of embedding. Possible options are 'random', 'pca',\n and a numpy array of shape (n_samples, n_components).\n PCA initialization cannot be used with precomputed distances and is\n usually more globally stable than random initialization.\n\n verbose : int, default=0\n Verbosity level.\n\n random_state : int, RandomState instance or None, default=None\n Determines the random number generator. Pass an int for reproducible\n results across multiple function calls. Note that different\n initializations might result in different local minima of the cost\n function. See :term: `Glossary `.\n\n method : str, default='barnes_hut'\n By default the gradient calculation algorithm uses Barnes-Hut\n approximation running in O(NlogN) time. method='exact'\n will run on the slower, but exact, algorithm in O(N^2) time. The\n exact algorithm should be used when nearest-neighbor errors need\n to be better than 3%. However, the exact method cannot scale to\n millions of examples.\n\n .. versionadded:: 0.17\n Approximate optimization *method* via the Barnes-Hut.\n\n angle : float, default=0.5\n Only used if method='barnes_hut'\n This is the trade-off between speed and accuracy for Barnes-Hut T-SNE.\n 'angle' is the angular size (referred to as theta in [3]) of a distant\n node as measured from a point. If this size is below 'angle' then it is\n used as a summary node of all points contained within it.\n This method is not very sensitive to changes in this parameter\n in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing\n computation time and angle greater 0.8 has quickly increasing error.\n\n n_jobs : int, default=None\n The number of parallel jobs to run for neighbors search. This parameter\n has no impact when ``metric=\"precomputed\"`` or\n (``metric=\"euclidean\"`` and ``method=\"exact\"``).\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.22\n\n square_distances : True or 'legacy', default='legacy'\n Whether TSNE should square the distance values. ``'legacy'`` means\n that distance values are squared only when ``metric=\"euclidean\"``.\n ``True`` means that distance values are squared for all metrics.\n\n .. versionadded:: 0.24\n Added to provide backward compatibility during deprecation of\n legacy squaring behavior.\n .. deprecated:: 0.24\n Legacy squaring behavior was deprecated in 0.24. The ``'legacy'``\n value will be removed in 1.1 (renaming of 0.26), at which point the\n default value will change to ``True``.\n\n Attributes\n ----------\n embedding_ : array-like of shape (n_samples, n_components)\n Stores the embedding vectors.\n\n kl_divergence_ : float\n Kullback-Leibler divergence after optimization.\n\n n_iter_ : int\n Number of iterations run.\n\n Examples\n --------\n\n >>> import numpy as np\n >>> from sklearn.manifold import TSNE\n >>> X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])\n >>> X_embedded = TSNE(n_components=2).fit_transform(X)\n >>> X_embedded.shape\n (4, 2)\n\n References\n ----------\n\n [1] van der Maaten, L.J.P.; Hinton, G.E. Visualizing High-Dimensional Data\n Using t-SNE. Journal of Machine Learning Research 9:2579-2605, 2008.\n\n [2] van der Maaten, L.J.P. t-Distributed Stochastic Neighbor Embedding\n https://lvdmaaten.github.io/tsne/\n\n [3] L.J.P. van der Maaten. Accelerating t-SNE using Tree-Based Algorithms.\n Journal of Machine Learning Research 15(Oct):3221-3245, 2014.\n https://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf\n \"\"\"\n # Control the number of exploration iterations with early_exaggeration on\n _EXPLORATION_N_ITER = 250\n\n # Control the number of iterations between progress checks\n _N_ITER_CHECK = 50\n\n @_deprecate_positional_args\n def __init__(self, n_components=2, *, perplexity=30.0,\n early_exaggeration=12.0, learning_rate=200.0, n_iter=1000,\n n_iter_without_progress=300, min_grad_norm=1e-7,\n metric=\"euclidean\", init=\"random\", verbose=0,\n random_state=None, method='barnes_hut', angle=0.5,\n n_jobs=None, square_distances='legacy'):\n self.n_components = n_components\n self.perplexity = perplexity\n self.early_exaggeration = early_exaggeration\n self.learning_rate = learning_rate\n self.n_iter = n_iter\n self.n_iter_without_progress = n_iter_without_progress\n self.min_grad_norm = min_grad_norm\n self.metric = metric\n self.init = init\n self.verbose = verbose\n self.random_state = random_state\n self.method = method\n self.angle = angle\n self.n_jobs = n_jobs\n # TODO Revisit deprecation of square_distances for 1.1-1.3 (#12401)\n self.square_distances = square_distances\n\n def _fit(self, X, skip_num_points=0):\n \"\"\"Private function to fit the model using X as training data.\"\"\"\n\n if self.method not in ['barnes_hut', 'exact']:\n raise ValueError(\"'method' must be 'barnes_hut' or 'exact'\")\n if self.angle < 0.0 or self.angle > 1.0:\n raise ValueError(\"'angle' must be between 0.0 - 1.0\")\n if self.square_distances not in [True, 'legacy']:\n raise ValueError(\"'square_distances' must be True or 'legacy'.\")\n if self.metric != \"euclidean\" and self.square_distances is not True:\n warnings.warn(\n \"'square_distances' has been introduced in 0.24 to help phase \"\n \"out legacy squaring behavior. The 'legacy' setting will be \"\n \"removed in 1.1 (renaming of 0.26), and the default setting \"\n \"will be changed to True. In 1.3, 'square_distances' will be \"\n \"removed altogether, and distances will be squared by \"\n \"default. Set 'square_distances'=True to silence this \"\n \"warning.\",\n FutureWarning\n )\n if self.method == 'barnes_hut':\n X = self._validate_data(X, accept_sparse=['csr'],\n ensure_min_samples=2,\n dtype=[np.float32, np.float64])\n else:\n X = self._validate_data(X, accept_sparse=['csr', 'csc', 'coo'],\n dtype=[np.float32, np.float64])\n if self.metric == \"precomputed\":\n if isinstance(self.init, str) and self.init == 'pca':\n raise ValueError(\"The parameter init=\\\"pca\\\" cannot be \"\n \"used with metric=\\\"precomputed\\\".\")\n if X.shape[0] != X.shape[1]:\n raise ValueError(\"X should be a square distance matrix\")\n\n check_non_negative(X, \"TSNE.fit(). With metric='precomputed', X \"\n \"should contain positive distances.\")\n\n if self.method == \"exact\" and issparse(X):\n raise TypeError(\n 'TSNE with method=\"exact\" does not accept sparse '\n 'precomputed distance matrix. Use method=\"barnes_hut\" '\n 'or provide the dense distance matrix.')\n\n if self.method == 'barnes_hut' and self.n_components > 3:\n raise ValueError(\"'n_components' should be inferior to 4 for the \"\n \"barnes_hut algorithm as it relies on \"\n \"quad-tree or oct-tree.\")\n random_state = check_random_state(self.random_state)\n\n if self.early_exaggeration < 1.0:\n raise ValueError(\"early_exaggeration must be at least 1, but is {}\"\n .format(self.early_exaggeration))\n\n if self.n_iter < 250:\n raise ValueError(\"n_iter should be at least 250\")\n\n n_samples = X.shape[0]\n\n neighbors_nn = None\n if self.method == \"exact\":\n # Retrieve the distance matrix, either using the precomputed one or\n # computing it.\n if self.metric == \"precomputed\":\n distances = X\n else:\n if self.verbose:\n print(\"[t-SNE] Computing pairwise distances...\")\n\n if self.metric == \"euclidean\":\n # Euclidean is squared here, rather than using **= 2,\n # because euclidean_distances already calculates\n # squared distances, and returns np.sqrt(dist) for\n # squared=False.\n # Also, Euclidean is slower for n_jobs>1, so don't set here\n distances = pairwise_distances(X, metric=self.metric,\n squared=True)\n else:\n distances = pairwise_distances(X, metric=self.metric,\n n_jobs=self.n_jobs)\n\n if np.any(distances < 0):\n raise ValueError(\"All distances should be positive, the \"\n \"metric given is not correct\")\n\n if self.metric != \"euclidean\" and self.square_distances is True:\n distances **= 2\n\n # compute the joint probability distribution for the input space\n P = _joint_probabilities(distances, self.perplexity, self.verbose)\n assert np.all(np.isfinite(P)), \"All probabilities should be finite\"\n assert np.all(P >= 0), \"All probabilities should be non-negative\"\n assert np.all(P <= 1), (\"All probabilities should be less \"\n \"or then equal to one\")\n\n else:\n # Compute the number of nearest neighbors to find.\n # LvdM uses 3 * perplexity as the number of neighbors.\n # In the event that we have very small # of points\n # set the neighbors to n - 1.\n n_neighbors = min(n_samples - 1, int(3. * self.perplexity + 1))\n\n if self.verbose:\n print(\"[t-SNE] Computing {} nearest neighbors...\"\n .format(n_neighbors))\n\n # Find the nearest neighbors for every point\n knn = NearestNeighbors(algorithm='auto',\n n_jobs=self.n_jobs,\n n_neighbors=n_neighbors,\n metric=self.metric)\n t0 = time()\n knn.fit(X)\n duration = time() - t0\n if self.verbose:\n print(\"[t-SNE] Indexed {} samples in {:.3f}s...\".format(\n n_samples, duration))\n\n t0 = time()\n distances_nn = knn.kneighbors_graph(mode='distance')\n duration = time() - t0\n if self.verbose:\n print(\"[t-SNE] Computed neighbors for {} samples \"\n \"in {:.3f}s...\".format(n_samples, duration))\n\n # Free the memory used by the ball_tree\n del knn\n\n if self.square_distances is True or self.metric == \"euclidean\":\n # knn return the euclidean distance but we need it squared\n # to be consistent with the 'exact' method. Note that the\n # the method was derived using the euclidean method as in the\n # input space. Not sure of the implication of using a different\n # metric.\n distances_nn.data **= 2\n\n # compute the joint probability distribution for the input space\n P = _joint_probabilities_nn(distances_nn, self.perplexity,\n self.verbose)\n\n if isinstance(self.init, np.ndarray):\n X_embedded = self.init\n elif self.init == 'pca':\n pca = PCA(n_components=self.n_components, svd_solver='randomized',\n random_state=random_state)\n X_embedded = pca.fit_transform(X).astype(np.float32, copy=False)\n elif self.init == 'random':\n # The embedding is initialized with iid samples from Gaussians with\n # standard deviation 1e-4.\n X_embedded = 1e-4 * random_state.randn(\n n_samples, self.n_components).astype(np.float32)\n else:\n raise ValueError(\"'init' must be 'pca', 'random', or \"\n \"a numpy array\")\n\n # Degrees of freedom of the Student's t-distribution. The suggestion\n # degrees_of_freedom = n_components - 1 comes from\n # \"Learning a Parametric Embedding by Preserving Local Structure\"\n # Laurens van der Maaten, 2009.\n degrees_of_freedom = max(self.n_components - 1, 1)\n\n return self._tsne(P, degrees_of_freedom, n_samples,\n X_embedded=X_embedded,\n neighbors=neighbors_nn,\n skip_num_points=skip_num_points)\n\n def _tsne(self, P, degrees_of_freedom, n_samples, X_embedded,\n neighbors=None, skip_num_points=0):\n \"\"\"Runs t-SNE.\"\"\"\n # t-SNE minimizes the Kullback-Leiber divergence of the Gaussians P\n # and the Student's t-distributions Q. The optimization algorithm that\n # we use is batch gradient descent with two stages:\n # * initial optimization with early exaggeration and momentum at 0.5\n # * final optimization with momentum at 0.8\n params = X_embedded.ravel()\n\n opt_args = {\n \"it\": 0,\n \"n_iter_check\": self._N_ITER_CHECK,\n \"min_grad_norm\": self.min_grad_norm,\n \"learning_rate\": self.learning_rate,\n \"verbose\": self.verbose,\n \"kwargs\": dict(skip_num_points=skip_num_points),\n \"args\": [P, degrees_of_freedom, n_samples, self.n_components],\n \"n_iter_without_progress\": self._EXPLORATION_N_ITER,\n \"n_iter\": self._EXPLORATION_N_ITER,\n \"momentum\": 0.5,\n }\n if self.method == 'barnes_hut':\n obj_func = _kl_divergence_bh\n opt_args['kwargs']['angle'] = self.angle\n # Repeat verbose argument for _kl_divergence_bh\n opt_args['kwargs']['verbose'] = self.verbose\n # Get the number of threads for gradient computation here to\n # avoid recomputing it at each iteration.\n opt_args['kwargs']['num_threads'] = _openmp_effective_n_threads()\n else:\n obj_func = _kl_divergence\n\n # Learning schedule (part 1): do 250 iteration with lower momentum but\n # higher learning rate controlled via the early exaggeration parameter\n P *= self.early_exaggeration\n params, kl_divergence, it = _gradient_descent(obj_func, params,\n **opt_args)\n if self.verbose:\n print(\"[t-SNE] KL divergence after %d iterations with early \"\n \"exaggeration: %f\" % (it + 1, kl_divergence))\n\n # Learning schedule (part 2): disable early exaggeration and finish\n # optimization with a higher momentum at 0.8\n P /= self.early_exaggeration\n remaining = self.n_iter - self._EXPLORATION_N_ITER\n if it < self._EXPLORATION_N_ITER or remaining > 0:\n opt_args['n_iter'] = self.n_iter\n opt_args['it'] = it + 1\n opt_args['momentum'] = 0.8\n opt_args['n_iter_without_progress'] = self.n_iter_without_progress\n params, kl_divergence, it = _gradient_descent(obj_func, params,\n **opt_args)\n\n # Save the final number of iterations\n self.n_iter_ = it\n\n if self.verbose:\n print(\"[t-SNE] KL divergence after %d iterations: %f\"\n % (it + 1, kl_divergence))\n\n X_embedded = params.reshape(n_samples, self.n_components)\n self.kl_divergence_ = kl_divergence\n\n return X_embedded\n\n def fit_transform(self, X, y=None):\n \"\"\"Fit X into an embedded space and return that transformed\n output.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n If the metric is 'precomputed' X must be a square distance\n matrix. Otherwise it contains a sample per row. If the method\n is 'exact', X may be a sparse matrix of type 'csr', 'csc'\n or 'coo'. If the method is 'barnes_hut' and the metric is\n 'precomputed', X may be a precomputed sparse graph.\n\n y : Ignored\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n Embedding of the training data in low-dimensional space.\n \"\"\"\n embedding = self._fit(X)\n self.embedding_ = embedding\n return self.embedding_\n\n def fit(self, X, y=None):\n \"\"\"Fit X into an embedded space.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n If the metric is 'precomputed' X must be a square distance\n matrix. Otherwise it contains a sample per row. If the method\n is 'exact', X may be a sparse matrix of type 'csr', 'csc'\n or 'coo'. If the method is 'barnes_hut' and the metric is\n 'precomputed', X may be a precomputed sparse graph.\n\n y : Ignored\n \"\"\"\n self.fit_transform(X)\n return self", + "instance_attributes": [ + { + "name": "n_components", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "perplexity", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "early_exaggeration", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "learning_rate", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "n_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "n_iter_without_progress", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "min_grad_norm", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "metric", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "init", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "method", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "angle", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "square_distances", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/ConfusionMatrixDisplay", + "name": "ConfusionMatrixDisplay", + "qname": "sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.metrics._plot.confusion_matrix/ConfusionMatrixDisplay/__init__", + "scikit-learn/sklearn.metrics._plot.confusion_matrix/ConfusionMatrixDisplay/plot" + ], + "is_public": false, + "reexported_by": [], + "description": "Confusion Matrix visualization.\n\nIt is recommend to use :func:`~sklearn.metrics.plot_confusion_matrix` to\ncreate a :class:`ConfusionMatrixDisplay`. All parameters are stored as\nattributes.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Confusion Matrix visualization.\n\nIt is recommend to use :func:`~sklearn.metrics.plot_confusion_matrix` to\ncreate a :class:`ConfusionMatrixDisplay`. All parameters are stored as\nattributes.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nconfusion_matrix : ndarray of shape (n_classes, n_classes)\n Confusion matrix.\n\ndisplay_labels : ndarray of shape (n_classes,), default=None\n Display labels for plot. If None, display labels are set from 0 to\n `n_classes - 1`.\n\nAttributes\n----------\nim_ : matplotlib AxesImage\n Image representing the confusion matrix.\n\ntext_ : ndarray of shape (n_classes, n_classes), dtype=matplotlib Text, or None\n Array of matplotlib axes. `None` if `include_values` is false.\n\nax_ : matplotlib Axes\n Axes with confusion matrix.\n\nfigure_ : matplotlib Figure\n Figure containing the confusion matrix.\n\nSee Also\n--------\nconfusion_matrix : Compute Confusion Matrix to evaluate the accuracy of a\n classification.\nConfusionMatrixDisplay.from_estimator : Plot the confusion matrix\n given an estimator, the data, and the label.\nConfusionMatrixDisplay.from_predictions : Plot the confusion matrix\n given the true and predicted labels.\n\nExamples\n--------\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.svm import SVC\n>>> X, y = make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n... random_state=0)\n>>> clf = SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> predictions = clf.predict(X_test)\n>>> cm = confusion_matrix(y_test, predictions, labels=clf.classes_)\n>>> disp = ConfusionMatrixDisplay(confusion_matrix=cm,\n... display_labels=clf.classes_)\n>>> disp.plot() # doctest: +SKIP", + "code": "class ConfusionMatrixDisplay:\n \"\"\"Confusion Matrix visualization.\n\n It is recommend to use :func:`~sklearn.metrics.plot_confusion_matrix` to\n create a :class:`ConfusionMatrixDisplay`. All parameters are stored as\n attributes.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n confusion_matrix : ndarray of shape (n_classes, n_classes)\n Confusion matrix.\n\n display_labels : ndarray of shape (n_classes,), default=None\n Display labels for plot. If None, display labels are set from 0 to\n `n_classes - 1`.\n\n Attributes\n ----------\n im_ : matplotlib AxesImage\n Image representing the confusion matrix.\n\n text_ : ndarray of shape (n_classes, n_classes), dtype=matplotlib Text, \\\n or None\n Array of matplotlib axes. `None` if `include_values` is false.\n\n ax_ : matplotlib Axes\n Axes with confusion matrix.\n\n figure_ : matplotlib Figure\n Figure containing the confusion matrix.\n\n See Also\n --------\n confusion_matrix : Compute Confusion Matrix to evaluate the accuracy of a\n classification.\n ConfusionMatrixDisplay.from_estimator : Plot the confusion matrix\n given an estimator, the data, and the label.\n ConfusionMatrixDisplay.from_predictions : Plot the confusion matrix\n given the true and predicted labels.\n\n Examples\n --------\n >>> from sklearn.datasets import make_classification\n >>> from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n >>> from sklearn.model_selection import train_test_split\n >>> from sklearn.svm import SVC\n >>> X, y = make_classification(random_state=0)\n >>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n ... random_state=0)\n >>> clf = SVC(random_state=0)\n >>> clf.fit(X_train, y_train)\n SVC(random_state=0)\n >>> predictions = clf.predict(X_test)\n >>> cm = confusion_matrix(y_test, predictions, labels=clf.classes_)\n >>> disp = ConfusionMatrixDisplay(confusion_matrix=cm,\n ... display_labels=clf.classes_)\n >>> disp.plot() # doctest: +SKIP\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, confusion_matrix, *, display_labels=None):\n self.confusion_matrix = confusion_matrix\n self.display_labels = display_labels\n\n @_deprecate_positional_args\n def plot(self, *, include_values=True, cmap='viridis',\n xticks_rotation='horizontal', values_format=None,\n ax=None, colorbar=True):\n \"\"\"Plot visualization.\n\n Parameters\n ----------\n include_values : bool, default=True\n Includes values in confusion matrix.\n\n cmap : str or matplotlib Colormap, default='viridis'\n Colormap recognized by matplotlib.\n\n xticks_rotation : {'vertical', 'horizontal'} or float, \\\n default='horizontal'\n Rotation of xtick labels.\n\n values_format : str, default=None\n Format specification for values in confusion matrix. If `None`,\n the format specification is 'd' or '.2g' whichever is shorter.\n\n ax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is\n created.\n\n colorbar : bool, default=True\n Whether or not to add a colorbar to the plot.\n\n Returns\n -------\n display : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n \"\"\"\n check_matplotlib_support(\"ConfusionMatrixDisplay.plot\")\n import matplotlib.pyplot as plt\n\n if ax is None:\n fig, ax = plt.subplots()\n else:\n fig = ax.figure\n\n cm = self.confusion_matrix\n n_classes = cm.shape[0]\n self.im_ = ax.imshow(cm, interpolation='nearest', cmap=cmap)\n self.text_ = None\n cmap_min, cmap_max = self.im_.cmap(0), self.im_.cmap(256)\n\n if include_values:\n self.text_ = np.empty_like(cm, dtype=object)\n\n # print text with appropriate color depending on background\n thresh = (cm.max() + cm.min()) / 2.0\n\n for i, j in product(range(n_classes), range(n_classes)):\n color = cmap_max if cm[i, j] < thresh else cmap_min\n\n if values_format is None:\n text_cm = format(cm[i, j], '.2g')\n if cm.dtype.kind != 'f':\n text_d = format(cm[i, j], 'd')\n if len(text_d) < len(text_cm):\n text_cm = text_d\n else:\n text_cm = format(cm[i, j], values_format)\n\n self.text_[i, j] = ax.text(\n j, i, text_cm,\n ha=\"center\", va=\"center\",\n color=color)\n\n if self.display_labels is None:\n display_labels = np.arange(n_classes)\n else:\n display_labels = self.display_labels\n if colorbar:\n fig.colorbar(self.im_, ax=ax)\n ax.set(xticks=np.arange(n_classes),\n yticks=np.arange(n_classes),\n xticklabels=display_labels,\n yticklabels=display_labels,\n ylabel=\"True label\",\n xlabel=\"Predicted label\")\n\n ax.set_ylim((n_classes - 0.5, -0.5))\n plt.setp(ax.get_xticklabels(), rotation=xticks_rotation)\n\n self.figure_ = fig\n self.ax_ = ax\n return self", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.metrics._plot.det_curve/DetCurveDisplay", + "name": "DetCurveDisplay", + "qname": "sklearn.metrics._plot.det_curve.DetCurveDisplay", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.metrics._plot.det_curve/DetCurveDisplay/__init__", + "scikit-learn/sklearn.metrics._plot.det_curve/DetCurveDisplay/plot" + ], + "is_public": false, + "reexported_by": [], + "description": "DET curve visualization.\n\nIt is recommend to use :func:`~sklearn.metrics.plot_det_curve` to create a\nvisualizer. All parameters are stored as attributes.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.24", + "docstring": "DET curve visualization.\n\nIt is recommend to use :func:`~sklearn.metrics.plot_det_curve` to create a\nvisualizer. All parameters are stored as attributes.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nfpr : ndarray\n False positive rate.\n\nfnr : ndarray\n False negative rate.\n\nestimator_name : str, default=None\n Name of estimator. If None, the estimator name is not shown.\n\npos_label : str or int, default=None\n The label of the positive class.\n\nAttributes\n----------\nline_ : matplotlib Artist\n DET Curve.\n\nax_ : matplotlib Axes\n Axes with DET Curve.\n\nfigure_ : matplotlib Figure\n Figure containing the curve.\n\nSee Also\n--------\ndet_curve : Compute error rates for different probability thresholds.\nplot_det_curve : Plot detection error tradeoff (DET) curve.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt # doctest: +SKIP\n>>> import numpy as np\n>>> from sklearn import metrics\n>>> y = np.array([0, 0, 1, 1])\n>>> pred = np.array([0.1, 0.4, 0.35, 0.8])\n>>> fpr, fnr, thresholds = metrics.det_curve(y, pred)\n>>> display = metrics.DetCurveDisplay(\n... fpr=fpr, fnr=fnr, estimator_name='example estimator'\n... )\n>>> display.plot() # doctest: +SKIP\n>>> plt.show() # doctest: +SKIP", + "code": "class DetCurveDisplay:\n \"\"\"DET curve visualization.\n\n It is recommend to use :func:`~sklearn.metrics.plot_det_curve` to create a\n visualizer. All parameters are stored as attributes.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.24\n\n Parameters\n ----------\n fpr : ndarray\n False positive rate.\n\n fnr : ndarray\n False negative rate.\n\n estimator_name : str, default=None\n Name of estimator. If None, the estimator name is not shown.\n\n pos_label : str or int, default=None\n The label of the positive class.\n\n Attributes\n ----------\n line_ : matplotlib Artist\n DET Curve.\n\n ax_ : matplotlib Axes\n Axes with DET Curve.\n\n figure_ : matplotlib Figure\n Figure containing the curve.\n\n See Also\n --------\n det_curve : Compute error rates for different probability thresholds.\n plot_det_curve : Plot detection error tradeoff (DET) curve.\n\n Examples\n --------\n >>> import matplotlib.pyplot as plt # doctest: +SKIP\n >>> import numpy as np\n >>> from sklearn import metrics\n >>> y = np.array([0, 0, 1, 1])\n >>> pred = np.array([0.1, 0.4, 0.35, 0.8])\n >>> fpr, fnr, thresholds = metrics.det_curve(y, pred)\n >>> display = metrics.DetCurveDisplay(\n ... fpr=fpr, fnr=fnr, estimator_name='example estimator'\n ... )\n >>> display.plot() # doctest: +SKIP\n >>> plt.show() # doctest: +SKIP\n \"\"\"\n def __init__(self, *, fpr, fnr, estimator_name=None, pos_label=None):\n self.fpr = fpr\n self.fnr = fnr\n self.estimator_name = estimator_name\n self.pos_label = pos_label\n\n def plot(self, ax=None, *, name=None, **kwargs):\n \"\"\"Plot visualization.\n\n Parameters\n ----------\n ax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is\n created.\n\n name : str, default=None\n Name of DET curve for labeling. If `None`, use the name of the\n estimator.\n\n Returns\n -------\n display : :class:`~sklearn.metrics.plot.DetCurveDisplay`\n Object that stores computed values.\n \"\"\"\n check_matplotlib_support('DetCurveDisplay.plot')\n\n name = self.estimator_name if name is None else name\n line_kwargs = {} if name is None else {\"label\": name}\n line_kwargs.update(**kwargs)\n\n import matplotlib.pyplot as plt\n\n if ax is None:\n _, ax = plt.subplots()\n\n self.line_, = ax.plot(\n sp.stats.norm.ppf(self.fpr),\n sp.stats.norm.ppf(self.fnr),\n **line_kwargs,\n )\n info_pos_label = (f\" (Positive label: {self.pos_label})\"\n if self.pos_label is not None else \"\")\n\n xlabel = \"False Positive Rate\" + info_pos_label\n ylabel = \"False Negative Rate\" + info_pos_label\n ax.set(xlabel=xlabel, ylabel=ylabel)\n\n if \"label\" in line_kwargs:\n ax.legend(loc=\"lower right\")\n\n ticks = [0.001, 0.01, 0.05, 0.20, 0.5, 0.80, 0.95, 0.99, 0.999]\n tick_locations = sp.stats.norm.ppf(ticks)\n tick_labels = [\n '{:.0%}'.format(s) if (100*s).is_integer() else '{:.1%}'.format(s)\n for s in ticks\n ]\n ax.set_xticks(tick_locations)\n ax.set_xticklabels(tick_labels)\n ax.set_xlim(-3, 3)\n ax.set_yticks(tick_locations)\n ax.set_yticklabels(tick_labels)\n ax.set_ylim(-3, 3)\n\n self.ax_ = ax\n self.figure_ = ax.figure\n return self", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay", + "name": "PrecisionRecallDisplay", + "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/__init__", + "scikit-learn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/plot" + ], + "is_public": false, + "reexported_by": [], + "description": "Precision Recall visualization.\n\nIt is recommend to use :func:`~sklearn.metrics.plot_precision_recall_curve`\nto create a visualizer. All parameters are stored as attributes.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Precision Recall visualization.\n\nIt is recommend to use :func:`~sklearn.metrics.plot_precision_recall_curve`\nto create a visualizer. All parameters are stored as attributes.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n-----------\nprecision : ndarray\n Precision values.\n\nrecall : ndarray\n Recall values.\n\naverage_precision : float, default=None\n Average precision. If None, the average precision is not shown.\n\nestimator_name : str, default=None\n Name of estimator. If None, then the estimator name is not shown.\n\npos_label : str or int, default=None\n The class considered as the positive class. If None, the class will not\n be shown in the legend.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nline_ : matplotlib Artist\n Precision recall curve.\n\nax_ : matplotlib Axes\n Axes with precision recall curve.\n\nfigure_ : matplotlib Figure\n Figure containing the curve.\n\nSee Also\n--------\nprecision_recall_curve : Compute precision-recall pairs for different\n probability thresholds.\nplot_precision_recall_curve : Plot Precision Recall Curve for binary\n classifiers.\n\nExamples\n--------\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.metrics import (precision_recall_curve,\n... PrecisionRecallDisplay)\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.svm import SVC\n>>> X, y = make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n... random_state=0)\n>>> clf = SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> predictions = clf.predict(X_test)\n>>> precision, recall, _ = precision_recall_curve(y_test, predictions)\n>>> disp = PrecisionRecallDisplay(precision=precision, recall=recall)\n>>> disp.plot() # doctest: +SKIP", + "code": "class PrecisionRecallDisplay:\n \"\"\"Precision Recall visualization.\n\n It is recommend to use :func:`~sklearn.metrics.plot_precision_recall_curve`\n to create a visualizer. All parameters are stored as attributes.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n -----------\n precision : ndarray\n Precision values.\n\n recall : ndarray\n Recall values.\n\n average_precision : float, default=None\n Average precision. If None, the average precision is not shown.\n\n estimator_name : str, default=None\n Name of estimator. If None, then the estimator name is not shown.\n\n pos_label : str or int, default=None\n The class considered as the positive class. If None, the class will not\n be shown in the legend.\n\n .. versionadded:: 0.24\n\n Attributes\n ----------\n line_ : matplotlib Artist\n Precision recall curve.\n\n ax_ : matplotlib Axes\n Axes with precision recall curve.\n\n figure_ : matplotlib Figure\n Figure containing the curve.\n\n See Also\n --------\n precision_recall_curve : Compute precision-recall pairs for different\n probability thresholds.\n plot_precision_recall_curve : Plot Precision Recall Curve for binary\n classifiers.\n\n Examples\n --------\n >>> from sklearn.datasets import make_classification\n >>> from sklearn.metrics import (precision_recall_curve,\n ... PrecisionRecallDisplay)\n >>> from sklearn.model_selection import train_test_split\n >>> from sklearn.svm import SVC\n >>> X, y = make_classification(random_state=0)\n >>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n ... random_state=0)\n >>> clf = SVC(random_state=0)\n >>> clf.fit(X_train, y_train)\n SVC(random_state=0)\n >>> predictions = clf.predict(X_test)\n >>> precision, recall, _ = precision_recall_curve(y_test, predictions)\n >>> disp = PrecisionRecallDisplay(precision=precision, recall=recall)\n >>> disp.plot() # doctest: +SKIP\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, precision, recall, *,\n average_precision=None, estimator_name=None, pos_label=None):\n self.estimator_name = estimator_name\n self.precision = precision\n self.recall = recall\n self.average_precision = average_precision\n self.pos_label = pos_label\n\n @_deprecate_positional_args\n def plot(self, ax=None, *, name=None, **kwargs):\n \"\"\"Plot visualization.\n\n Extra keyword arguments will be passed to matplotlib's `plot`.\n\n Parameters\n ----------\n ax : Matplotlib Axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is\n created.\n\n name : str, default=None\n Name of precision recall curve for labeling. If `None`, use the\n name of the estimator.\n\n **kwargs : dict\n Keyword arguments to be passed to matplotlib's `plot`.\n\n Returns\n -------\n display : :class:`~sklearn.metrics.PrecisionRecallDisplay`\n Object that stores computed values.\n \"\"\"\n check_matplotlib_support(\"PrecisionRecallDisplay.plot\")\n\n name = self.estimator_name if name is None else name\n\n line_kwargs = {\"drawstyle\": \"steps-post\"}\n if self.average_precision is not None and name is not None:\n line_kwargs[\"label\"] = (f\"{name} (AP = \"\n f\"{self.average_precision:0.2f})\")\n elif self.average_precision is not None:\n line_kwargs[\"label\"] = (f\"AP = \"\n f\"{self.average_precision:0.2f}\")\n elif name is not None:\n line_kwargs[\"label\"] = name\n line_kwargs.update(**kwargs)\n\n import matplotlib.pyplot as plt\n\n if ax is None:\n fig, ax = plt.subplots()\n\n self.line_, = ax.plot(self.recall, self.precision, **line_kwargs)\n info_pos_label = (f\" (Positive label: {self.pos_label})\"\n if self.pos_label is not None else \"\")\n\n xlabel = \"Recall\" + info_pos_label\n ylabel = \"Precision\" + info_pos_label\n ax.set(xlabel=xlabel, ylabel=ylabel)\n\n if \"label\" in line_kwargs:\n ax.legend(loc=\"lower left\")\n\n self.ax_ = ax\n self.figure_ = ax.figure\n return self", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve/RocCurveDisplay", + "name": "RocCurveDisplay", + "qname": "sklearn.metrics._plot.roc_curve.RocCurveDisplay", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.metrics._plot.roc_curve/RocCurveDisplay/__init__", + "scikit-learn/sklearn.metrics._plot.roc_curve/RocCurveDisplay/plot" + ], + "is_public": false, + "reexported_by": [], + "description": "ROC Curve visualization.\n\nIt is recommend to use :func:`~sklearn.metrics.plot_roc_curve` to create a\nvisualizer. All parameters are stored as attributes.\n\nRead more in the :ref:`User Guide `.", + "docstring": "ROC Curve visualization.\n\nIt is recommend to use :func:`~sklearn.metrics.plot_roc_curve` to create a\nvisualizer. All parameters are stored as attributes.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nfpr : ndarray\n False positive rate.\n\ntpr : ndarray\n True positive rate.\n\nroc_auc : float, default=None\n Area under ROC curve. If None, the roc_auc score is not shown.\n\nestimator_name : str, default=None\n Name of estimator. If None, the estimator name is not shown.\n\npos_label : str or int, default=None\n The class considered as the positive class when computing the roc auc\n metrics. By default, `estimators.classes_[1]` is considered\n as the positive class.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nline_ : matplotlib Artist\n ROC Curve.\n\nax_ : matplotlib Axes\n Axes with ROC Curve.\n\nfigure_ : matplotlib Figure\n Figure containing the curve.\n\nSee Also\n--------\nroc_curve : Compute Receiver operating characteristic (ROC) curve.\nplot_roc_curve : Plot Receiver operating characteristic (ROC) curve.\nroc_auc_score : Compute the area under the ROC curve.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt # doctest: +SKIP\n>>> import numpy as np\n>>> from sklearn import metrics\n>>> y = np.array([0, 0, 1, 1])\n>>> pred = np.array([0.1, 0.4, 0.35, 0.8])\n>>> fpr, tpr, thresholds = metrics.roc_curve(y, pred)\n>>> roc_auc = metrics.auc(fpr, tpr)\n>>> display = metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name='example estimator')\n>>> display.plot() # doctest: +SKIP\n>>> plt.show() # doctest: +SKIP", + "code": "class RocCurveDisplay:\n \"\"\"ROC Curve visualization.\n\n It is recommend to use :func:`~sklearn.metrics.plot_roc_curve` to create a\n visualizer. All parameters are stored as attributes.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n fpr : ndarray\n False positive rate.\n\n tpr : ndarray\n True positive rate.\n\n roc_auc : float, default=None\n Area under ROC curve. If None, the roc_auc score is not shown.\n\n estimator_name : str, default=None\n Name of estimator. If None, the estimator name is not shown.\n\n pos_label : str or int, default=None\n The class considered as the positive class when computing the roc auc\n metrics. By default, `estimators.classes_[1]` is considered\n as the positive class.\n\n .. versionadded:: 0.24\n\n Attributes\n ----------\n line_ : matplotlib Artist\n ROC Curve.\n\n ax_ : matplotlib Axes\n Axes with ROC Curve.\n\n figure_ : matplotlib Figure\n Figure containing the curve.\n\n See Also\n --------\n roc_curve : Compute Receiver operating characteristic (ROC) curve.\n plot_roc_curve : Plot Receiver operating characteristic (ROC) curve.\n roc_auc_score : Compute the area under the ROC curve.\n\n Examples\n --------\n >>> import matplotlib.pyplot as plt # doctest: +SKIP\n >>> import numpy as np\n >>> from sklearn import metrics\n >>> y = np.array([0, 0, 1, 1])\n >>> pred = np.array([0.1, 0.4, 0.35, 0.8])\n >>> fpr, tpr, thresholds = metrics.roc_curve(y, pred)\n >>> roc_auc = metrics.auc(fpr, tpr)\n >>> display = metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc,\\\n estimator_name='example estimator')\n >>> display.plot() # doctest: +SKIP\n >>> plt.show() # doctest: +SKIP\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, fpr, tpr,\n roc_auc=None, estimator_name=None, pos_label=None):\n self.estimator_name = estimator_name\n self.fpr = fpr\n self.tpr = tpr\n self.roc_auc = roc_auc\n self.pos_label = pos_label\n\n @_deprecate_positional_args\n def plot(self, ax=None, *, name=None, **kwargs):\n \"\"\"Plot visualization\n\n Extra keyword arguments will be passed to matplotlib's ``plot``.\n\n Parameters\n ----------\n ax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is\n created.\n\n name : str, default=None\n Name of ROC Curve for labeling. If `None`, use the name of the\n estimator.\n\n Returns\n -------\n display : :class:`~sklearn.metrics.plot.RocCurveDisplay`\n Object that stores computed values.\n \"\"\"\n check_matplotlib_support('RocCurveDisplay.plot')\n\n name = self.estimator_name if name is None else name\n\n line_kwargs = {}\n if self.roc_auc is not None and name is not None:\n line_kwargs[\"label\"] = f\"{name} (AUC = {self.roc_auc:0.2f})\"\n elif self.roc_auc is not None:\n line_kwargs[\"label\"] = f\"AUC = {self.roc_auc:0.2f}\"\n elif name is not None:\n line_kwargs[\"label\"] = name\n\n line_kwargs.update(**kwargs)\n\n import matplotlib.pyplot as plt\n\n if ax is None:\n fig, ax = plt.subplots()\n\n self.line_, = ax.plot(self.fpr, self.tpr, **line_kwargs)\n info_pos_label = (f\" (Positive label: {self.pos_label})\"\n if self.pos_label is not None else \"\")\n\n xlabel = \"False Positive Rate\" + info_pos_label\n ylabel = \"True Positive Rate\" + info_pos_label\n ax.set(xlabel=xlabel, ylabel=ylabel)\n\n if \"label\" in line_kwargs:\n ax.legend(loc=\"lower right\")\n\n self.ax_ = ax\n self.figure_ = ax.figure\n return self", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_BaseScorer", + "name": "_BaseScorer", + "qname": "sklearn.metrics._scorer._BaseScorer", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.metrics._scorer/_BaseScorer/__init__", + "scikit-learn/sklearn.metrics._scorer/_BaseScorer/_check_pos_label", + "scikit-learn/sklearn.metrics._scorer/_BaseScorer/_select_proba_binary", + "scikit-learn/sklearn.metrics._scorer/_BaseScorer/__repr__", + "scikit-learn/sklearn.metrics._scorer/_BaseScorer/__call__", + "scikit-learn/sklearn.metrics._scorer/_BaseScorer/_factory_args" + ], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class _BaseScorer:\n def __init__(self, score_func, sign, kwargs):\n self._kwargs = kwargs\n self._score_func = score_func\n self._sign = sign\n\n @staticmethod\n def _check_pos_label(pos_label, classes):\n if pos_label not in list(classes):\n raise ValueError(\n f\"pos_label={pos_label} is not a valid label: {classes}\"\n )\n\n def _select_proba_binary(self, y_pred, classes):\n \"\"\"Select the column of the positive label in `y_pred` when\n probabilities are provided.\n\n Parameters\n ----------\n y_pred : ndarray of shape (n_samples, n_classes)\n The prediction given by `predict_proba`.\n\n classes : ndarray of shape (n_classes,)\n The class labels for the estimator.\n\n Returns\n -------\n y_pred : ndarray of shape (n_samples,)\n Probability predictions of the positive class.\n \"\"\"\n if y_pred.shape[1] == 2:\n pos_label = self._kwargs.get(\"pos_label\", classes[1])\n self._check_pos_label(pos_label, classes)\n col_idx = np.flatnonzero(classes == pos_label)[0]\n return y_pred[:, col_idx]\n\n err_msg = (\n f\"Got predict_proba of shape {y_pred.shape}, but need \"\n f\"classifier with two classes for {self._score_func.__name__} \"\n f\"scoring\"\n )\n raise ValueError(err_msg)\n\n def __repr__(self):\n kwargs_string = \"\".join([\", %s=%s\" % (str(k), str(v))\n for k, v in self._kwargs.items()])\n return (\"make_scorer(%s%s%s%s)\"\n % (self._score_func.__name__,\n \"\" if self._sign > 0 else \", greater_is_better=False\",\n self._factory_args(), kwargs_string))\n\n def __call__(self, estimator, X, y_true, sample_weight=None):\n \"\"\"Evaluate predicted target values for X relative to y_true.\n\n Parameters\n ----------\n estimator : object\n Trained estimator to use for scoring. Must have a predict_proba\n method; the output of that is used to compute the score.\n\n X : {array-like, sparse matrix}\n Test data that will be fed to estimator.predict.\n\n y_true : array-like\n Gold standard target values for X.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n score : float\n Score function applied to prediction of estimator on X.\n \"\"\"\n return self._score(partial(_cached_call, None), estimator, X, y_true,\n sample_weight=sample_weight)\n\n def _factory_args(self):\n \"\"\"Return non-default make_scorer arguments for repr.\"\"\"\n return \"\"", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_MultimetricScorer", + "name": "_MultimetricScorer", + "qname": "sklearn.metrics._scorer._MultimetricScorer", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.metrics._scorer/_MultimetricScorer/__init__", + "scikit-learn/sklearn.metrics._scorer/_MultimetricScorer/__call__", + "scikit-learn/sklearn.metrics._scorer/_MultimetricScorer/_use_cache" + ], + "is_public": false, + "reexported_by": [], + "description": "Callable for multimetric scoring used to avoid repeated calls\nto `predict_proba`, `predict`, and `decision_function`.\n\n`_MultimetricScorer` will return a dictionary of scores corresponding to\nthe scorers in the dictionary. Note that `_MultimetricScorer` can be\ncreated with a dictionary with one key (i.e. only one actual scorer).", + "docstring": "Callable for multimetric scoring used to avoid repeated calls\nto `predict_proba`, `predict`, and `decision_function`.\n\n`_MultimetricScorer` will return a dictionary of scores corresponding to\nthe scorers in the dictionary. Note that `_MultimetricScorer` can be\ncreated with a dictionary with one key (i.e. only one actual scorer).\n\nParameters\n----------\nscorers : dict\n Dictionary mapping names to callable scorers.", + "code": "class _MultimetricScorer:\n \"\"\"Callable for multimetric scoring used to avoid repeated calls\n to `predict_proba`, `predict`, and `decision_function`.\n\n `_MultimetricScorer` will return a dictionary of scores corresponding to\n the scorers in the dictionary. Note that `_MultimetricScorer` can be\n created with a dictionary with one key (i.e. only one actual scorer).\n\n Parameters\n ----------\n scorers : dict\n Dictionary mapping names to callable scorers.\n \"\"\"\n def __init__(self, **scorers):\n self._scorers = scorers\n\n def __call__(self, estimator, *args, **kwargs):\n \"\"\"Evaluate predicted target values.\"\"\"\n scores = {}\n cache = {} if self._use_cache(estimator) else None\n cached_call = partial(_cached_call, cache)\n\n for name, scorer in self._scorers.items():\n if isinstance(scorer, _BaseScorer):\n score = scorer._score(cached_call, estimator,\n *args, **kwargs)\n else:\n score = scorer(estimator, *args, **kwargs)\n scores[name] = score\n return scores\n\n def _use_cache(self, estimator):\n \"\"\"Return True if using a cache is beneficial.\n\n Caching may be beneficial when one of these conditions holds:\n - `_ProbaScorer` will be called twice.\n - `_PredictScorer` will be called twice.\n - `_ThresholdScorer` will be called twice.\n - `_ThresholdScorer` and `_PredictScorer` are called and\n estimator is a regressor.\n - `_ThresholdScorer` and `_ProbaScorer` are called and\n estimator does not have a `decision_function` attribute.\n\n \"\"\"\n if len(self._scorers) == 1: # Only one scorer\n return False\n\n counter = Counter([type(v) for v in self._scorers.values()])\n\n if any(counter[known_type] > 1 for known_type in\n [_PredictScorer, _ProbaScorer, _ThresholdScorer]):\n return True\n\n if counter[_ThresholdScorer]:\n if is_regressor(estimator) and counter[_PredictScorer]:\n return True\n elif (counter[_ProbaScorer] and\n not hasattr(estimator, \"decision_function\")):\n return True\n return False", + "instance_attributes": [ + { + "name": "_scorers", + "types": { + "kind": "NamedType", + "name": "dict" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_PredictScorer", + "name": "_PredictScorer", + "qname": "sklearn.metrics._scorer._PredictScorer", + "decorators": [], + "superclasses": ["_BaseScorer"], + "methods": ["scikit-learn/sklearn.metrics._scorer/_PredictScorer/_score"], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class _PredictScorer(_BaseScorer):\n def _score(self, method_caller, estimator, X, y_true, sample_weight=None):\n \"\"\"Evaluate predicted target values for X relative to y_true.\n\n Parameters\n ----------\n method_caller : callable\n Returns predictions given an estimator, method name, and other\n arguments, potentially caching results.\n\n estimator : object\n Trained estimator to use for scoring. Must have a `predict`\n method; the output of that is used to compute the score.\n\n X : {array-like, sparse matrix}\n Test data that will be fed to estimator.predict.\n\n y_true : array-like\n Gold standard target values for X.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n score : float\n Score function applied to prediction of estimator on X.\n \"\"\"\n\n y_pred = method_caller(estimator, \"predict\", X)\n if sample_weight is not None:\n return self._sign * self._score_func(y_true, y_pred,\n sample_weight=sample_weight,\n **self._kwargs)\n else:\n return self._sign * self._score_func(y_true, y_pred,\n **self._kwargs)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_ProbaScorer", + "name": "_ProbaScorer", + "qname": "sklearn.metrics._scorer._ProbaScorer", + "decorators": [], + "superclasses": ["_BaseScorer"], + "methods": [ + "scikit-learn/sklearn.metrics._scorer/_ProbaScorer/_score", + "scikit-learn/sklearn.metrics._scorer/_ProbaScorer/_factory_args" + ], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class _ProbaScorer(_BaseScorer):\n def _score(self, method_caller, clf, X, y, sample_weight=None):\n \"\"\"Evaluate predicted probabilities for X relative to y_true.\n\n Parameters\n ----------\n method_caller : callable\n Returns predictions given an estimator, method name, and other\n arguments, potentially caching results.\n\n clf : object\n Trained classifier to use for scoring. Must have a `predict_proba`\n method; the output of that is used to compute the score.\n\n X : {array-like, sparse matrix}\n Test data that will be fed to clf.predict_proba.\n\n y : array-like\n Gold standard target values for X. These must be class labels,\n not probabilities.\n\n sample_weight : array-like, default=None\n Sample weights.\n\n Returns\n -------\n score : float\n Score function applied to prediction of estimator on X.\n \"\"\"\n\n y_type = type_of_target(y)\n y_pred = method_caller(clf, \"predict_proba\", X)\n if y_type == \"binary\" and y_pred.shape[1] <= 2:\n # `y_type` could be equal to \"binary\" even in a multi-class\n # problem: (when only 2 class are given to `y_true` during scoring)\n # Thus, we need to check for the shape of `y_pred`.\n y_pred = self._select_proba_binary(y_pred, clf.classes_)\n if sample_weight is not None:\n return self._sign * self._score_func(y, y_pred,\n sample_weight=sample_weight,\n **self._kwargs)\n else:\n return self._sign * self._score_func(y, y_pred, **self._kwargs)\n\n def _factory_args(self):\n return \", needs_proba=True\"", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_ThresholdScorer", + "name": "_ThresholdScorer", + "qname": "sklearn.metrics._scorer._ThresholdScorer", + "decorators": [], + "superclasses": ["_BaseScorer"], + "methods": [ + "scikit-learn/sklearn.metrics._scorer/_ThresholdScorer/_score", + "scikit-learn/sklearn.metrics._scorer/_ThresholdScorer/_factory_args" + ], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class _ThresholdScorer(_BaseScorer):\n def _score(self, method_caller, clf, X, y, sample_weight=None):\n \"\"\"Evaluate decision function output for X relative to y_true.\n\n Parameters\n ----------\n method_caller : callable\n Returns predictions given an estimator, method name, and other\n arguments, potentially caching results.\n\n clf : object\n Trained classifier to use for scoring. Must have either a\n decision_function method or a predict_proba method; the output of\n that is used to compute the score.\n\n X : {array-like, sparse matrix}\n Test data that will be fed to clf.decision_function or\n clf.predict_proba.\n\n y : array-like\n Gold standard target values for X. These must be class labels,\n not decision function values.\n\n sample_weight : array-like, default=None\n Sample weights.\n\n Returns\n -------\n score : float\n Score function applied to prediction of estimator on X.\n \"\"\"\n\n y_type = type_of_target(y)\n if y_type not in (\"binary\", \"multilabel-indicator\"):\n raise ValueError(\"{0} format is not supported\".format(y_type))\n\n if is_regressor(clf):\n y_pred = method_caller(clf, \"predict\", X)\n else:\n try:\n y_pred = method_caller(clf, \"decision_function\", X)\n\n if isinstance(y_pred, list):\n # For multi-output multi-class estimator\n y_pred = np.vstack([p for p in y_pred]).T\n elif y_type == \"binary\" and \"pos_label\" in self._kwargs:\n self._check_pos_label(\n self._kwargs[\"pos_label\"], clf.classes_\n )\n if self._kwargs[\"pos_label\"] == clf.classes_[0]:\n # The implicit positive class of the binary classifier\n # does not match `pos_label`: we need to invert the\n # predictions\n y_pred *= -1\n\n except (NotImplementedError, AttributeError):\n y_pred = method_caller(clf, \"predict_proba\", X)\n\n if y_type == \"binary\":\n y_pred = self._select_proba_binary(y_pred, clf.classes_)\n elif isinstance(y_pred, list):\n y_pred = np.vstack([p[:, -1] for p in y_pred]).T\n\n if sample_weight is not None:\n return self._sign * self._score_func(y, y_pred,\n sample_weight=sample_weight,\n **self._kwargs)\n else:\n return self._sign * self._score_func(y, y_pred, **self._kwargs)\n\n def _factory_args(self):\n return \", needs_threshold=True\"", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture", + "name": "BaseMixture", + "qname": "sklearn.mixture._base.BaseMixture", + "decorators": [], + "superclasses": ["DensityMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.mixture._base/BaseMixture/__init__", + "scikit-learn/sklearn.mixture._base/BaseMixture/_check_initial_parameters", + "scikit-learn/sklearn.mixture._base/BaseMixture/_check_parameters", + "scikit-learn/sklearn.mixture._base/BaseMixture/_initialize_parameters", + "scikit-learn/sklearn.mixture._base/BaseMixture/_initialize", + "scikit-learn/sklearn.mixture._base/BaseMixture/fit", + "scikit-learn/sklearn.mixture._base/BaseMixture/fit_predict", + "scikit-learn/sklearn.mixture._base/BaseMixture/_e_step", + "scikit-learn/sklearn.mixture._base/BaseMixture/_m_step", + "scikit-learn/sklearn.mixture._base/BaseMixture/_get_parameters", + "scikit-learn/sklearn.mixture._base/BaseMixture/_set_parameters", + "scikit-learn/sklearn.mixture._base/BaseMixture/score_samples", + "scikit-learn/sklearn.mixture._base/BaseMixture/score", + "scikit-learn/sklearn.mixture._base/BaseMixture/predict", + "scikit-learn/sklearn.mixture._base/BaseMixture/predict_proba", + "scikit-learn/sklearn.mixture._base/BaseMixture/sample", + "scikit-learn/sklearn.mixture._base/BaseMixture/_estimate_weighted_log_prob", + "scikit-learn/sklearn.mixture._base/BaseMixture/_estimate_log_weights", + "scikit-learn/sklearn.mixture._base/BaseMixture/_estimate_log_prob", + "scikit-learn/sklearn.mixture._base/BaseMixture/_estimate_log_prob_resp", + "scikit-learn/sklearn.mixture._base/BaseMixture/_print_verbose_msg_init_beg", + "scikit-learn/sklearn.mixture._base/BaseMixture/_print_verbose_msg_iter_end", + "scikit-learn/sklearn.mixture._base/BaseMixture/_print_verbose_msg_init_end" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for mixture models.\n\nThis abstract class specifies an interface for all mixture classes and\nprovides basic common methods for mixture models.", + "docstring": "Base class for mixture models.\n\nThis abstract class specifies an interface for all mixture classes and\nprovides basic common methods for mixture models.", + "code": "class BaseMixture(DensityMixin, BaseEstimator, metaclass=ABCMeta):\n \"\"\"Base class for mixture models.\n\n This abstract class specifies an interface for all mixture classes and\n provides basic common methods for mixture models.\n \"\"\"\n\n def __init__(self, n_components, tol, reg_covar,\n max_iter, n_init, init_params, random_state, warm_start,\n verbose, verbose_interval):\n self.n_components = n_components\n self.tol = tol\n self.reg_covar = reg_covar\n self.max_iter = max_iter\n self.n_init = n_init\n self.init_params = init_params\n self.random_state = random_state\n self.warm_start = warm_start\n self.verbose = verbose\n self.verbose_interval = verbose_interval\n\n def _check_initial_parameters(self, X):\n \"\"\"Check values of the basic parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n \"\"\"\n if self.n_components < 1:\n raise ValueError(\"Invalid value for 'n_components': %d \"\n \"Estimation requires at least one component\"\n % self.n_components)\n\n if self.tol < 0.:\n raise ValueError(\"Invalid value for 'tol': %.5f \"\n \"Tolerance used by the EM must be non-negative\"\n % self.tol)\n\n if self.n_init < 1:\n raise ValueError(\"Invalid value for 'n_init': %d \"\n \"Estimation requires at least one run\"\n % self.n_init)\n\n if self.max_iter < 1:\n raise ValueError(\"Invalid value for 'max_iter': %d \"\n \"Estimation requires at least one iteration\"\n % self.max_iter)\n\n if self.reg_covar < 0.:\n raise ValueError(\"Invalid value for 'reg_covar': %.5f \"\n \"regularization on covariance must be \"\n \"non-negative\"\n % self.reg_covar)\n\n # Check all the parameters values of the derived class\n self._check_parameters(X)\n\n @abstractmethod\n def _check_parameters(self, X):\n \"\"\"Check initial parameters of the derived class.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n \"\"\"\n pass\n\n def _initialize_parameters(self, X, random_state):\n \"\"\"Initialize the model parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n random_state : RandomState\n A random number generator instance that controls the random seed\n used for the method chosen to initialize the parameters.\n \"\"\"\n n_samples, _ = X.shape\n\n if self.init_params == 'kmeans':\n resp = np.zeros((n_samples, self.n_components))\n label = cluster.KMeans(n_clusters=self.n_components, n_init=1,\n random_state=random_state).fit(X).labels_\n resp[np.arange(n_samples), label] = 1\n elif self.init_params == 'random':\n resp = random_state.rand(n_samples, self.n_components)\n resp /= resp.sum(axis=1)[:, np.newaxis]\n else:\n raise ValueError(\"Unimplemented initialization method '%s'\"\n % self.init_params)\n\n self._initialize(X, resp)\n\n @abstractmethod\n def _initialize(self, X, resp):\n \"\"\"Initialize the model parameters of the derived class.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n resp : array-like of shape (n_samples, n_components)\n \"\"\"\n pass\n\n def fit(self, X, y=None):\n \"\"\"Estimate model parameters with the EM algorithm.\n\n The method fits the model ``n_init`` times and sets the parameters with\n which the model has the largest likelihood or lower bound. Within each\n trial, the method iterates between E-step and M-step for ``max_iter``\n times until the change of likelihood or lower bound is less than\n ``tol``, otherwise, a ``ConvergenceWarning`` is raised.\n If ``warm_start`` is ``True``, then ``n_init`` is ignored and a single\n initialization is performed upon the first call. Upon consecutive\n calls, training starts where it left off.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\n Returns\n -------\n self\n \"\"\"\n self.fit_predict(X, y)\n return self\n\n def fit_predict(self, X, y=None):\n \"\"\"Estimate model parameters using X and predict the labels for X.\n\n The method fits the model n_init times and sets the parameters with\n which the model has the largest likelihood or lower bound. Within each\n trial, the method iterates between E-step and M-step for `max_iter`\n times until the change of likelihood or lower bound is less than\n `tol`, otherwise, a :class:`~sklearn.exceptions.ConvergenceWarning` is\n raised. After fitting, it predicts the most probable label for the\n input data points.\n\n .. versionadded:: 0.20\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\n Returns\n -------\n labels : array, shape (n_samples,)\n Component labels.\n \"\"\"\n X = _check_X(X, self.n_components, ensure_min_samples=2)\n self._check_n_features(X, reset=True)\n self._check_initial_parameters(X)\n\n # if we enable warm_start, we will have a unique initialisation\n do_init = not(self.warm_start and hasattr(self, 'converged_'))\n n_init = self.n_init if do_init else 1\n\n max_lower_bound = -np.infty\n self.converged_ = False\n\n random_state = check_random_state(self.random_state)\n\n n_samples, _ = X.shape\n for init in range(n_init):\n self._print_verbose_msg_init_beg(init)\n\n if do_init:\n self._initialize_parameters(X, random_state)\n\n lower_bound = (-np.infty if do_init else self.lower_bound_)\n\n for n_iter in range(1, self.max_iter + 1):\n prev_lower_bound = lower_bound\n\n log_prob_norm, log_resp = self._e_step(X)\n self._m_step(X, log_resp)\n lower_bound = self._compute_lower_bound(\n log_resp, log_prob_norm)\n\n change = lower_bound - prev_lower_bound\n self._print_verbose_msg_iter_end(n_iter, change)\n\n if abs(change) < self.tol:\n self.converged_ = True\n break\n\n self._print_verbose_msg_init_end(lower_bound)\n\n if lower_bound > max_lower_bound:\n max_lower_bound = lower_bound\n best_params = self._get_parameters()\n best_n_iter = n_iter\n\n if not self.converged_:\n warnings.warn('Initialization %d did not converge. '\n 'Try different init parameters, '\n 'or increase max_iter, tol '\n 'or check for degenerate data.'\n % (init + 1), ConvergenceWarning)\n\n self._set_parameters(best_params)\n self.n_iter_ = best_n_iter\n self.lower_bound_ = max_lower_bound\n\n # Always do a final e-step to guarantee that the labels returned by\n # fit_predict(X) are always consistent with fit(X).predict(X)\n # for any value of max_iter and tol (and any random_state).\n _, log_resp = self._e_step(X)\n\n return log_resp.argmax(axis=1)\n\n def _e_step(self, X):\n \"\"\"E step.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n log_prob_norm : float\n Mean of the logarithms of the probabilities of each sample in X\n\n log_responsibility : array, shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X.\n \"\"\"\n log_prob_norm, log_resp = self._estimate_log_prob_resp(X)\n return np.mean(log_prob_norm), log_resp\n\n @abstractmethod\n def _m_step(self, X, log_resp):\n \"\"\"M step.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n log_resp : array-like of shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X.\n \"\"\"\n pass\n\n @abstractmethod\n def _get_parameters(self):\n pass\n\n @abstractmethod\n def _set_parameters(self, params):\n pass\n\n def score_samples(self, X):\n \"\"\"Compute the weighted log probabilities for each sample.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\n Returns\n -------\n log_prob : array, shape (n_samples,)\n Log probabilities of each data point in X.\n \"\"\"\n check_is_fitted(self)\n X = _check_X(X, None, self.means_.shape[1])\n\n return logsumexp(self._estimate_weighted_log_prob(X), axis=1)\n\n def score(self, X, y=None):\n \"\"\"Compute the per-sample average log-likelihood of the given data X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_dimensions)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\n Returns\n -------\n log_likelihood : float\n Log likelihood of the Gaussian mixture given X.\n \"\"\"\n return self.score_samples(X).mean()\n\n def predict(self, X):\n \"\"\"Predict the labels for the data samples in X using trained model.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\n Returns\n -------\n labels : array, shape (n_samples,)\n Component labels.\n \"\"\"\n check_is_fitted(self)\n X = _check_X(X, None, self.means_.shape[1])\n return self._estimate_weighted_log_prob(X).argmax(axis=1)\n\n def predict_proba(self, X):\n \"\"\"Predict posterior probability of each component given the data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\n Returns\n -------\n resp : array, shape (n_samples, n_components)\n Returns the probability each Gaussian (state) in\n the model given each sample.\n \"\"\"\n check_is_fitted(self)\n X = _check_X(X, None, self.means_.shape[1])\n _, log_resp = self._estimate_log_prob_resp(X)\n return np.exp(log_resp)\n\n def sample(self, n_samples=1):\n \"\"\"Generate random samples from the fitted Gaussian distribution.\n\n Parameters\n ----------\n n_samples : int, default=1\n Number of samples to generate.\n\n Returns\n -------\n X : array, shape (n_samples, n_features)\n Randomly generated sample\n\n y : array, shape (nsamples,)\n Component labels\n\n \"\"\"\n check_is_fitted(self)\n\n if n_samples < 1:\n raise ValueError(\n \"Invalid value for 'n_samples': %d . The sampling requires at \"\n \"least one sample.\" % (self.n_components))\n\n _, n_features = self.means_.shape\n rng = check_random_state(self.random_state)\n n_samples_comp = rng.multinomial(n_samples, self.weights_)\n\n if self.covariance_type == 'full':\n X = np.vstack([\n rng.multivariate_normal(mean, covariance, int(sample))\n for (mean, covariance, sample) in zip(\n self.means_, self.covariances_, n_samples_comp)])\n elif self.covariance_type == \"tied\":\n X = np.vstack([\n rng.multivariate_normal(mean, self.covariances_, int(sample))\n for (mean, sample) in zip(\n self.means_, n_samples_comp)])\n else:\n X = np.vstack([\n mean + rng.randn(sample, n_features) * np.sqrt(covariance)\n for (mean, covariance, sample) in zip(\n self.means_, self.covariances_, n_samples_comp)])\n\n y = np.concatenate([np.full(sample, j, dtype=int)\n for j, sample in enumerate(n_samples_comp)])\n\n return (X, y)\n\n def _estimate_weighted_log_prob(self, X):\n \"\"\"Estimate the weighted log-probabilities, log P(X | Z) + log weights.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n weighted_log_prob : array, shape (n_samples, n_component)\n \"\"\"\n return self._estimate_log_prob(X) + self._estimate_log_weights()\n\n @abstractmethod\n def _estimate_log_weights(self):\n \"\"\"Estimate log-weights in EM algorithm, E[ log pi ] in VB algorithm.\n\n Returns\n -------\n log_weight : array, shape (n_components, )\n \"\"\"\n pass\n\n @abstractmethod\n def _estimate_log_prob(self, X):\n \"\"\"Estimate the log-probabilities log P(X | Z).\n\n Compute the log-probabilities per each component for each sample.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n log_prob : array, shape (n_samples, n_component)\n \"\"\"\n pass\n\n def _estimate_log_prob_resp(self, X):\n \"\"\"Estimate log probabilities and responsibilities for each sample.\n\n Compute the log probabilities, weighted log probabilities per\n component and responsibilities for each sample in X with respect to\n the current state of the model.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n log_prob_norm : array, shape (n_samples,)\n log p(X)\n\n log_responsibilities : array, shape (n_samples, n_components)\n logarithm of the responsibilities\n \"\"\"\n weighted_log_prob = self._estimate_weighted_log_prob(X)\n log_prob_norm = logsumexp(weighted_log_prob, axis=1)\n with np.errstate(under='ignore'):\n # ignore underflow\n log_resp = weighted_log_prob - log_prob_norm[:, np.newaxis]\n return log_prob_norm, log_resp\n\n def _print_verbose_msg_init_beg(self, n_init):\n \"\"\"Print verbose message on initialization.\"\"\"\n if self.verbose == 1:\n print(\"Initialization %d\" % n_init)\n elif self.verbose >= 2:\n print(\"Initialization %d\" % n_init)\n self._init_prev_time = time()\n self._iter_prev_time = self._init_prev_time\n\n def _print_verbose_msg_iter_end(self, n_iter, diff_ll):\n \"\"\"Print verbose message on initialization.\"\"\"\n if n_iter % self.verbose_interval == 0:\n if self.verbose == 1:\n print(\" Iteration %d\" % n_iter)\n elif self.verbose >= 2:\n cur_time = time()\n print(\" Iteration %d\\t time lapse %.5fs\\t ll change %.5f\" % (\n n_iter, cur_time - self._iter_prev_time, diff_ll))\n self._iter_prev_time = cur_time\n\n def _print_verbose_msg_init_end(self, ll):\n \"\"\"Print verbose message on the end of iteration.\"\"\"\n if self.verbose == 1:\n print(\"Initialization converged: %s\" % self.converged_)\n elif self.verbose >= 2:\n print(\"Initialization converged: %s\\t time lapse %.5fs\\t ll %.5f\" %\n (self.converged_, time() - self._init_prev_time, ll))", + "instance_attributes": [ + { + "name": "converged_", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture", + "name": "BayesianGaussianMixture", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture", + "decorators": [], + "superclasses": ["BaseMixture"], + "methods": [ + "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/__init__", + "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_check_parameters", + "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_check_weights_parameters", + "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_check_means_parameters", + "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_check_precision_parameters", + "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_checkcovariance_prior_parameter", + "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_initialize", + "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_weights", + "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_means", + "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_precisions", + "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_wishart_full", + "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_wishart_tied", + "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_wishart_diag", + "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_wishart_spherical", + "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_m_step", + "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_log_weights", + "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_log_prob", + "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_compute_lower_bound", + "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_get_parameters", + "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_set_parameters" + ], + "is_public": false, + "reexported_by": [], + "description": "Variational Bayesian estimation of a Gaussian mixture.\n\nThis class allows to infer an approximate posterior distribution over the\nparameters of a Gaussian mixture distribution. The effective number of\ncomponents can be inferred from the data.\n\nThis class implements two types of prior for the weights distribution: a\nfinite mixture model with Dirichlet distribution and an infinite mixture\nmodel with the Dirichlet Process. In practice Dirichlet Process inference\nalgorithm is approximated and uses a truncated distribution with a fixed\nmaximum number of components (called the Stick-breaking representation).\nThe number of components actually used almost always depends on the data.\n\n.. versionadded:: 0.18\n\nRead more in the :ref:`User Guide `.", + "docstring": "Variational Bayesian estimation of a Gaussian mixture.\n\nThis class allows to infer an approximate posterior distribution over the\nparameters of a Gaussian mixture distribution. The effective number of\ncomponents can be inferred from the data.\n\nThis class implements two types of prior for the weights distribution: a\nfinite mixture model with Dirichlet distribution and an infinite mixture\nmodel with the Dirichlet Process. In practice Dirichlet Process inference\nalgorithm is approximated and uses a truncated distribution with a fixed\nmaximum number of components (called the Stick-breaking representation).\nThe number of components actually used almost always depends on the data.\n\n.. versionadded:: 0.18\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=1\n The number of mixture components. Depending on the data and the value\n of the `weight_concentration_prior` the model can decide to not use\n all the components by setting some component `weights_` to values very\n close to zero. The number of effective components is therefore smaller\n than n_components.\n\ncovariance_type : {'full', 'tied', 'diag', 'spherical'}, default='full'\n String describing the type of covariance parameters to use.\n Must be one of::\n\n 'full' (each component has its own general covariance matrix),\n 'tied' (all components share the same general covariance matrix),\n 'diag' (each component has its own diagonal covariance matrix),\n 'spherical' (each component has its own single variance).\n\ntol : float, default=1e-3\n The convergence threshold. EM iterations will stop when the\n lower bound average gain on the likelihood (of the training data with\n respect to the model) is below this threshold.\n\nreg_covar : float, default=1e-6\n Non-negative regularization added to the diagonal of covariance.\n Allows to assure that the covariance matrices are all positive.\n\nmax_iter : int, default=100\n The number of EM iterations to perform.\n\nn_init : int, default=1\n The number of initializations to perform. The result with the highest\n lower bound value on the likelihood is kept.\n\ninit_params : {'kmeans', 'random'}, default='kmeans'\n The method used to initialize the weights, the means and the\n covariances.\n Must be one of::\n\n 'kmeans' : responsibilities are initialized using kmeans.\n 'random' : responsibilities are initialized randomly.\n\nweight_concentration_prior_type : str, default='dirichlet_process'\n String describing the type of the weight concentration prior.\n Must be one of::\n\n 'dirichlet_process' (using the Stick-breaking representation),\n 'dirichlet_distribution' (can favor more uniform weights).\n\nweight_concentration_prior : float | None, default=None.\n The dirichlet concentration of each component on the weight\n distribution (Dirichlet). This is commonly called gamma in the\n literature. The higher concentration puts more mass in\n the center and will lead to more components being active, while a lower\n concentration parameter will lead to more mass at the edge of the\n mixture weights simplex. The value of the parameter must be greater\n than 0. If it is None, it's set to ``1. / n_components``.\n\nmean_precision_prior : float | None, default=None.\n The precision prior on the mean distribution (Gaussian).\n Controls the extent of where means can be placed. Larger\n values concentrate the cluster means around `mean_prior`.\n The value of the parameter must be greater than 0.\n If it is None, it is set to 1.\n\nmean_prior : array-like, shape (n_features,), default=None.\n The prior on the mean distribution (Gaussian).\n If it is None, it is set to the mean of X.\n\ndegrees_of_freedom_prior : float | None, default=None.\n The prior of the number of degrees of freedom on the covariance\n distributions (Wishart). If it is None, it's set to `n_features`.\n\ncovariance_prior : float or array-like, default=None.\n The prior on the covariance distribution (Wishart).\n If it is None, the emiprical covariance prior is initialized using the\n covariance of X. The shape depends on `covariance_type`::\n\n (n_features, n_features) if 'full',\n (n_features, n_features) if 'tied',\n (n_features) if 'diag',\n float if 'spherical'\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the random seed given to the method chosen to initialize the\n parameters (see `init_params`).\n In addition, it controls the generation of random samples from the\n fitted distribution (see the method `sample`).\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nwarm_start : bool, default=False\n If 'warm_start' is True, the solution of the last fitting is used as\n initialization for the next call of fit(). This can speed up\n convergence when fit is called several times on similar problems.\n See :term:`the Glossary `.\n\nverbose : int, default=0\n Enable verbose output. If 1 then it prints the current\n initialization and each iteration step. If greater than 1 then\n it prints also the log probability and the time needed\n for each step.\n\nverbose_interval : int, default=10\n Number of iteration done before the next print.\n\nAttributes\n----------\nweights_ : array-like of shape (n_components,)\n The weights of each mixture components.\n\nmeans_ : array-like of shape (n_components, n_features)\n The mean of each mixture component.\n\ncovariances_ : array-like\n The covariance of each mixture component.\n The shape depends on `covariance_type`::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\nprecisions_ : array-like\n The precision matrices for each component in the mixture. A precision\n matrix is the inverse of a covariance matrix. A covariance matrix is\n symmetric positive definite so the mixture of Gaussian can be\n equivalently parameterized by the precision matrices. Storing the\n precision matrices instead of the covariance matrices makes it more\n efficient to compute the log-likelihood of new samples at test time.\n The shape depends on ``covariance_type``::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\nprecisions_cholesky_ : array-like\n The cholesky decomposition of the precision matrices of each mixture\n component. A precision matrix is the inverse of a covariance matrix.\n A covariance matrix is symmetric positive definite so the mixture of\n Gaussian can be equivalently parameterized by the precision matrices.\n Storing the precision matrices instead of the covariance matrices makes\n it more efficient to compute the log-likelihood of new samples at test\n time. The shape depends on ``covariance_type``::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\nconverged_ : bool\n True when convergence was reached in fit(), False otherwise.\n\nn_iter_ : int\n Number of step used by the best fit of inference to reach the\n convergence.\n\nlower_bound_ : float\n Lower bound value on the likelihood (of the training data with\n respect to the model) of the best fit of inference.\n\nweight_concentration_prior_ : tuple or float\n The dirichlet concentration of each component on the weight\n distribution (Dirichlet). The type depends on\n ``weight_concentration_prior_type``::\n\n (float, float) if 'dirichlet_process' (Beta parameters),\n float if 'dirichlet_distribution' (Dirichlet parameters).\n\n The higher concentration puts more mass in\n the center and will lead to more components being active, while a lower\n concentration parameter will lead to more mass at the edge of the\n simplex.\n\nweight_concentration_ : array-like of shape (n_components,)\n The dirichlet concentration of each component on the weight\n distribution (Dirichlet).\n\nmean_precision_prior_ : float\n The precision prior on the mean distribution (Gaussian).\n Controls the extent of where means can be placed.\n Larger values concentrate the cluster means around `mean_prior`.\n If mean_precision_prior is set to None, `mean_precision_prior_` is set\n to 1.\n\nmean_precision_ : array-like of shape (n_components,)\n The precision of each components on the mean distribution (Gaussian).\n\nmean_prior_ : array-like of shape (n_features,)\n The prior on the mean distribution (Gaussian).\n\ndegrees_of_freedom_prior_ : float\n The prior of the number of degrees of freedom on the covariance\n distributions (Wishart).\n\ndegrees_of_freedom_ : array-like of shape (n_components,)\n The number of degrees of freedom of each components in the model.\n\ncovariance_prior_ : float or array-like\n The prior on the covariance distribution (Wishart).\n The shape depends on `covariance_type`::\n\n (n_features, n_features) if 'full',\n (n_features, n_features) if 'tied',\n (n_features) if 'diag',\n float if 'spherical'\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.mixture import BayesianGaussianMixture\n>>> X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [12, 4], [10, 7]])\n>>> bgm = BayesianGaussianMixture(n_components=2, random_state=42).fit(X)\n>>> bgm.means_\narray([[2.49... , 2.29...],\n [8.45..., 4.52... ]])\n>>> bgm.predict([[0, 0], [9, 3]])\narray([0, 1])\n\nSee Also\n--------\nGaussianMixture : Finite Gaussian mixture fit with EM.\n\nReferences\n----------\n\n.. [1] `Bishop, Christopher M. (2006). \"Pattern recognition and machine\n learning\". Vol. 4 No. 4. New York: Springer.\n `_\n\n.. [2] `Hagai Attias. (2000). \"A Variational Bayesian Framework for\n Graphical Models\". In Advances in Neural Information Processing\n Systems 12.\n `_\n\n.. [3] `Blei, David M. and Michael I. Jordan. (2006). \"Variational\n inference for Dirichlet process mixtures\". Bayesian analysis 1.1\n `_", + "code": "class BayesianGaussianMixture(BaseMixture):\n \"\"\"Variational Bayesian estimation of a Gaussian mixture.\n\n This class allows to infer an approximate posterior distribution over the\n parameters of a Gaussian mixture distribution. The effective number of\n components can be inferred from the data.\n\n This class implements two types of prior for the weights distribution: a\n finite mixture model with Dirichlet distribution and an infinite mixture\n model with the Dirichlet Process. In practice Dirichlet Process inference\n algorithm is approximated and uses a truncated distribution with a fixed\n maximum number of components (called the Stick-breaking representation).\n The number of components actually used almost always depends on the data.\n\n .. versionadded:: 0.18\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_components : int, default=1\n The number of mixture components. Depending on the data and the value\n of the `weight_concentration_prior` the model can decide to not use\n all the components by setting some component `weights_` to values very\n close to zero. The number of effective components is therefore smaller\n than n_components.\n\n covariance_type : {'full', 'tied', 'diag', 'spherical'}, default='full'\n String describing the type of covariance parameters to use.\n Must be one of::\n\n 'full' (each component has its own general covariance matrix),\n 'tied' (all components share the same general covariance matrix),\n 'diag' (each component has its own diagonal covariance matrix),\n 'spherical' (each component has its own single variance).\n\n tol : float, default=1e-3\n The convergence threshold. EM iterations will stop when the\n lower bound average gain on the likelihood (of the training data with\n respect to the model) is below this threshold.\n\n reg_covar : float, default=1e-6\n Non-negative regularization added to the diagonal of covariance.\n Allows to assure that the covariance matrices are all positive.\n\n max_iter : int, default=100\n The number of EM iterations to perform.\n\n n_init : int, default=1\n The number of initializations to perform. The result with the highest\n lower bound value on the likelihood is kept.\n\n init_params : {'kmeans', 'random'}, default='kmeans'\n The method used to initialize the weights, the means and the\n covariances.\n Must be one of::\n\n 'kmeans' : responsibilities are initialized using kmeans.\n 'random' : responsibilities are initialized randomly.\n\n weight_concentration_prior_type : str, default='dirichlet_process'\n String describing the type of the weight concentration prior.\n Must be one of::\n\n 'dirichlet_process' (using the Stick-breaking representation),\n 'dirichlet_distribution' (can favor more uniform weights).\n\n weight_concentration_prior : float | None, default=None.\n The dirichlet concentration of each component on the weight\n distribution (Dirichlet). This is commonly called gamma in the\n literature. The higher concentration puts more mass in\n the center and will lead to more components being active, while a lower\n concentration parameter will lead to more mass at the edge of the\n mixture weights simplex. The value of the parameter must be greater\n than 0. If it is None, it's set to ``1. / n_components``.\n\n mean_precision_prior : float | None, default=None.\n The precision prior on the mean distribution (Gaussian).\n Controls the extent of where means can be placed. Larger\n values concentrate the cluster means around `mean_prior`.\n The value of the parameter must be greater than 0.\n If it is None, it is set to 1.\n\n mean_prior : array-like, shape (n_features,), default=None.\n The prior on the mean distribution (Gaussian).\n If it is None, it is set to the mean of X.\n\n degrees_of_freedom_prior : float | None, default=None.\n The prior of the number of degrees of freedom on the covariance\n distributions (Wishart). If it is None, it's set to `n_features`.\n\n covariance_prior : float or array-like, default=None.\n The prior on the covariance distribution (Wishart).\n If it is None, the emiprical covariance prior is initialized using the\n covariance of X. The shape depends on `covariance_type`::\n\n (n_features, n_features) if 'full',\n (n_features, n_features) if 'tied',\n (n_features) if 'diag',\n float if 'spherical'\n\n random_state : int, RandomState instance or None, default=None\n Controls the random seed given to the method chosen to initialize the\n parameters (see `init_params`).\n In addition, it controls the generation of random samples from the\n fitted distribution (see the method `sample`).\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n warm_start : bool, default=False\n If 'warm_start' is True, the solution of the last fitting is used as\n initialization for the next call of fit(). This can speed up\n convergence when fit is called several times on similar problems.\n See :term:`the Glossary `.\n\n verbose : int, default=0\n Enable verbose output. If 1 then it prints the current\n initialization and each iteration step. If greater than 1 then\n it prints also the log probability and the time needed\n for each step.\n\n verbose_interval : int, default=10\n Number of iteration done before the next print.\n\n Attributes\n ----------\n weights_ : array-like of shape (n_components,)\n The weights of each mixture components.\n\n means_ : array-like of shape (n_components, n_features)\n The mean of each mixture component.\n\n covariances_ : array-like\n The covariance of each mixture component.\n The shape depends on `covariance_type`::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\n precisions_ : array-like\n The precision matrices for each component in the mixture. A precision\n matrix is the inverse of a covariance matrix. A covariance matrix is\n symmetric positive definite so the mixture of Gaussian can be\n equivalently parameterized by the precision matrices. Storing the\n precision matrices instead of the covariance matrices makes it more\n efficient to compute the log-likelihood of new samples at test time.\n The shape depends on ``covariance_type``::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\n precisions_cholesky_ : array-like\n The cholesky decomposition of the precision matrices of each mixture\n component. A precision matrix is the inverse of a covariance matrix.\n A covariance matrix is symmetric positive definite so the mixture of\n Gaussian can be equivalently parameterized by the precision matrices.\n Storing the precision matrices instead of the covariance matrices makes\n it more efficient to compute the log-likelihood of new samples at test\n time. The shape depends on ``covariance_type``::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\n converged_ : bool\n True when convergence was reached in fit(), False otherwise.\n\n n_iter_ : int\n Number of step used by the best fit of inference to reach the\n convergence.\n\n lower_bound_ : float\n Lower bound value on the likelihood (of the training data with\n respect to the model) of the best fit of inference.\n\n weight_concentration_prior_ : tuple or float\n The dirichlet concentration of each component on the weight\n distribution (Dirichlet). The type depends on\n ``weight_concentration_prior_type``::\n\n (float, float) if 'dirichlet_process' (Beta parameters),\n float if 'dirichlet_distribution' (Dirichlet parameters).\n\n The higher concentration puts more mass in\n the center and will lead to more components being active, while a lower\n concentration parameter will lead to more mass at the edge of the\n simplex.\n\n weight_concentration_ : array-like of shape (n_components,)\n The dirichlet concentration of each component on the weight\n distribution (Dirichlet).\n\n mean_precision_prior_ : float\n The precision prior on the mean distribution (Gaussian).\n Controls the extent of where means can be placed.\n Larger values concentrate the cluster means around `mean_prior`.\n If mean_precision_prior is set to None, `mean_precision_prior_` is set\n to 1.\n\n mean_precision_ : array-like of shape (n_components,)\n The precision of each components on the mean distribution (Gaussian).\n\n mean_prior_ : array-like of shape (n_features,)\n The prior on the mean distribution (Gaussian).\n\n degrees_of_freedom_prior_ : float\n The prior of the number of degrees of freedom on the covariance\n distributions (Wishart).\n\n degrees_of_freedom_ : array-like of shape (n_components,)\n The number of degrees of freedom of each components in the model.\n\n covariance_prior_ : float or array-like\n The prior on the covariance distribution (Wishart).\n The shape depends on `covariance_type`::\n\n (n_features, n_features) if 'full',\n (n_features, n_features) if 'tied',\n (n_features) if 'diag',\n float if 'spherical'\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.mixture import BayesianGaussianMixture\n >>> X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [12, 4], [10, 7]])\n >>> bgm = BayesianGaussianMixture(n_components=2, random_state=42).fit(X)\n >>> bgm.means_\n array([[2.49... , 2.29...],\n [8.45..., 4.52... ]])\n >>> bgm.predict([[0, 0], [9, 3]])\n array([0, 1])\n\n See Also\n --------\n GaussianMixture : Finite Gaussian mixture fit with EM.\n\n References\n ----------\n\n .. [1] `Bishop, Christopher M. (2006). \"Pattern recognition and machine\n learning\". Vol. 4 No. 4. New York: Springer.\n `_\n\n .. [2] `Hagai Attias. (2000). \"A Variational Bayesian Framework for\n Graphical Models\". In Advances in Neural Information Processing\n Systems 12.\n `_\n\n .. [3] `Blei, David M. and Michael I. Jordan. (2006). \"Variational\n inference for Dirichlet process mixtures\". Bayesian analysis 1.1\n `_\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, n_components=1, covariance_type='full', tol=1e-3,\n reg_covar=1e-6, max_iter=100, n_init=1, init_params='kmeans',\n weight_concentration_prior_type='dirichlet_process',\n weight_concentration_prior=None,\n mean_precision_prior=None, mean_prior=None,\n degrees_of_freedom_prior=None, covariance_prior=None,\n random_state=None, warm_start=False, verbose=0,\n verbose_interval=10):\n super().__init__(\n n_components=n_components, tol=tol, reg_covar=reg_covar,\n max_iter=max_iter, n_init=n_init, init_params=init_params,\n random_state=random_state, warm_start=warm_start,\n verbose=verbose, verbose_interval=verbose_interval)\n\n self.covariance_type = covariance_type\n self.weight_concentration_prior_type = weight_concentration_prior_type\n self.weight_concentration_prior = weight_concentration_prior\n self.mean_precision_prior = mean_precision_prior\n self.mean_prior = mean_prior\n self.degrees_of_freedom_prior = degrees_of_freedom_prior\n self.covariance_prior = covariance_prior\n\n def _check_parameters(self, X):\n \"\"\"Check that the parameters are well defined.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n \"\"\"\n if self.covariance_type not in ['spherical', 'tied', 'diag', 'full']:\n raise ValueError(\"Invalid value for 'covariance_type': %s \"\n \"'covariance_type' should be in \"\n \"['spherical', 'tied', 'diag', 'full']\"\n % self.covariance_type)\n\n if (self.weight_concentration_prior_type not in\n ['dirichlet_process', 'dirichlet_distribution']):\n raise ValueError(\n \"Invalid value for 'weight_concentration_prior_type': %s \"\n \"'weight_concentration_prior_type' should be in \"\n \"['dirichlet_process', 'dirichlet_distribution']\"\n % self.weight_concentration_prior_type)\n\n self._check_weights_parameters()\n self._check_means_parameters(X)\n self._check_precision_parameters(X)\n self._checkcovariance_prior_parameter(X)\n\n def _check_weights_parameters(self):\n \"\"\"Check the parameter of the Dirichlet distribution.\"\"\"\n if self.weight_concentration_prior is None:\n self.weight_concentration_prior_ = 1. / self.n_components\n elif self.weight_concentration_prior > 0.:\n self.weight_concentration_prior_ = (\n self.weight_concentration_prior)\n else:\n raise ValueError(\"The parameter 'weight_concentration_prior' \"\n \"should be greater than 0., but got %.3f.\"\n % self.weight_concentration_prior)\n\n def _check_means_parameters(self, X):\n \"\"\"Check the parameters of the Gaussian distribution.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n \"\"\"\n _, n_features = X.shape\n\n if self.mean_precision_prior is None:\n self.mean_precision_prior_ = 1.\n elif self.mean_precision_prior > 0.:\n self.mean_precision_prior_ = self.mean_precision_prior\n else:\n raise ValueError(\"The parameter 'mean_precision_prior' should be \"\n \"greater than 0., but got %.3f.\"\n % self.mean_precision_prior)\n\n if self.mean_prior is None:\n self.mean_prior_ = X.mean(axis=0)\n else:\n self.mean_prior_ = check_array(self.mean_prior,\n dtype=[np.float64, np.float32],\n ensure_2d=False)\n _check_shape(self.mean_prior_, (n_features, ), 'means')\n\n def _check_precision_parameters(self, X):\n \"\"\"Check the prior parameters of the precision distribution.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n \"\"\"\n _, n_features = X.shape\n\n if self.degrees_of_freedom_prior is None:\n self.degrees_of_freedom_prior_ = n_features\n elif self.degrees_of_freedom_prior > n_features - 1.:\n self.degrees_of_freedom_prior_ = self.degrees_of_freedom_prior\n else:\n raise ValueError(\"The parameter 'degrees_of_freedom_prior' \"\n \"should be greater than %d, but got %.3f.\"\n % (n_features - 1, self.degrees_of_freedom_prior))\n\n def _checkcovariance_prior_parameter(self, X):\n \"\"\"Check the `covariance_prior_`.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n \"\"\"\n _, n_features = X.shape\n\n if self.covariance_prior is None:\n self.covariance_prior_ = {\n 'full': np.atleast_2d(np.cov(X.T)),\n 'tied': np.atleast_2d(np.cov(X.T)),\n 'diag': np.var(X, axis=0, ddof=1),\n 'spherical': np.var(X, axis=0, ddof=1).mean()\n }[self.covariance_type]\n\n elif self.covariance_type in ['full', 'tied']:\n self.covariance_prior_ = check_array(\n self.covariance_prior, dtype=[np.float64, np.float32],\n ensure_2d=False)\n _check_shape(self.covariance_prior_, (n_features, n_features),\n '%s covariance_prior' % self.covariance_type)\n _check_precision_matrix(self.covariance_prior_,\n self.covariance_type)\n elif self.covariance_type == 'diag':\n self.covariance_prior_ = check_array(\n self.covariance_prior, dtype=[np.float64, np.float32],\n ensure_2d=False)\n _check_shape(self.covariance_prior_, (n_features,),\n '%s covariance_prior' % self.covariance_type)\n _check_precision_positivity(self.covariance_prior_,\n self.covariance_type)\n # spherical case\n elif self.covariance_prior > 0.:\n self.covariance_prior_ = self.covariance_prior\n else:\n raise ValueError(\"The parameter 'spherical covariance_prior' \"\n \"should be greater than 0., but got %.3f.\"\n % self.covariance_prior)\n\n def _initialize(self, X, resp):\n \"\"\"Initialization of the mixture parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n resp : array-like of shape (n_samples, n_components)\n \"\"\"\n nk, xk, sk = _estimate_gaussian_parameters(X, resp, self.reg_covar,\n self.covariance_type)\n\n self._estimate_weights(nk)\n self._estimate_means(nk, xk)\n self._estimate_precisions(nk, xk, sk)\n\n def _estimate_weights(self, nk):\n \"\"\"Estimate the parameters of the Dirichlet distribution.\n\n Parameters\n ----------\n nk : array-like of shape (n_components,)\n \"\"\"\n if self.weight_concentration_prior_type == 'dirichlet_process':\n # For dirichlet process weight_concentration will be a tuple\n # containing the two parameters of the beta distribution\n self.weight_concentration_ = (\n 1. + nk,\n (self.weight_concentration_prior_ +\n np.hstack((np.cumsum(nk[::-1])[-2::-1], 0))))\n else:\n # case Variationnal Gaussian mixture with dirichlet distribution\n self.weight_concentration_ = self.weight_concentration_prior_ + nk\n\n def _estimate_means(self, nk, xk):\n \"\"\"Estimate the parameters of the Gaussian distribution.\n\n Parameters\n ----------\n nk : array-like of shape (n_components,)\n\n xk : array-like of shape (n_components, n_features)\n \"\"\"\n self.mean_precision_ = self.mean_precision_prior_ + nk\n self.means_ = ((self.mean_precision_prior_ * self.mean_prior_ +\n nk[:, np.newaxis] * xk) /\n self.mean_precision_[:, np.newaxis])\n\n def _estimate_precisions(self, nk, xk, sk):\n \"\"\"Estimate the precisions parameters of the precision distribution.\n\n Parameters\n ----------\n nk : array-like of shape (n_components,)\n\n xk : array-like of shape (n_components, n_features)\n\n sk : array-like\n The shape depends of `covariance_type`:\n 'full' : (n_components, n_features, n_features)\n 'tied' : (n_features, n_features)\n 'diag' : (n_components, n_features)\n 'spherical' : (n_components,)\n \"\"\"\n {\"full\": self._estimate_wishart_full,\n \"tied\": self._estimate_wishart_tied,\n \"diag\": self._estimate_wishart_diag,\n \"spherical\": self._estimate_wishart_spherical\n }[self.covariance_type](nk, xk, sk)\n\n self.precisions_cholesky_ = _compute_precision_cholesky(\n self.covariances_, self.covariance_type)\n\n def _estimate_wishart_full(self, nk, xk, sk):\n \"\"\"Estimate the full Wishart distribution parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n nk : array-like of shape (n_components,)\n\n xk : array-like of shape (n_components, n_features)\n\n sk : array-like of shape (n_components, n_features, n_features)\n \"\"\"\n _, n_features = xk.shape\n\n # Warning : in some Bishop book, there is a typo on the formula 10.63\n # `degrees_of_freedom_k = degrees_of_freedom_0 + Nk` is\n # the correct formula\n self.degrees_of_freedom_ = self.degrees_of_freedom_prior_ + nk\n\n self.covariances_ = np.empty((self.n_components, n_features,\n n_features))\n\n for k in range(self.n_components):\n diff = xk[k] - self.mean_prior_\n self.covariances_[k] = (self.covariance_prior_ + nk[k] * sk[k] +\n nk[k] * self.mean_precision_prior_ /\n self.mean_precision_[k] * np.outer(diff,\n diff))\n\n # Contrary to the original bishop book, we normalize the covariances\n self.covariances_ /= (\n self.degrees_of_freedom_[:, np.newaxis, np.newaxis])\n\n def _estimate_wishart_tied(self, nk, xk, sk):\n \"\"\"Estimate the tied Wishart distribution parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n nk : array-like of shape (n_components,)\n\n xk : array-like of shape (n_components, n_features)\n\n sk : array-like of shape (n_features, n_features)\n \"\"\"\n _, n_features = xk.shape\n\n # Warning : in some Bishop book, there is a typo on the formula 10.63\n # `degrees_of_freedom_k = degrees_of_freedom_0 + Nk`\n # is the correct formula\n self.degrees_of_freedom_ = (\n self.degrees_of_freedom_prior_ + nk.sum() / self.n_components)\n\n diff = xk - self.mean_prior_\n self.covariances_ = (\n self.covariance_prior_ + sk * nk.sum() / self.n_components +\n self.mean_precision_prior_ / self.n_components * np.dot(\n (nk / self.mean_precision_) * diff.T, diff))\n\n # Contrary to the original bishop book, we normalize the covariances\n self.covariances_ /= self.degrees_of_freedom_\n\n def _estimate_wishart_diag(self, nk, xk, sk):\n \"\"\"Estimate the diag Wishart distribution parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n nk : array-like of shape (n_components,)\n\n xk : array-like of shape (n_components, n_features)\n\n sk : array-like of shape (n_components, n_features)\n \"\"\"\n _, n_features = xk.shape\n\n # Warning : in some Bishop book, there is a typo on the formula 10.63\n # `degrees_of_freedom_k = degrees_of_freedom_0 + Nk`\n # is the correct formula\n self.degrees_of_freedom_ = self.degrees_of_freedom_prior_ + nk\n\n diff = xk - self.mean_prior_\n self.covariances_ = (\n self.covariance_prior_ + nk[:, np.newaxis] * (\n sk + (self.mean_precision_prior_ /\n self.mean_precision_)[:, np.newaxis] * np.square(diff)))\n\n # Contrary to the original bishop book, we normalize the covariances\n self.covariances_ /= self.degrees_of_freedom_[:, np.newaxis]\n\n def _estimate_wishart_spherical(self, nk, xk, sk):\n \"\"\"Estimate the spherical Wishart distribution parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n nk : array-like of shape (n_components,)\n\n xk : array-like of shape (n_components, n_features)\n\n sk : array-like of shape (n_components,)\n \"\"\"\n _, n_features = xk.shape\n\n # Warning : in some Bishop book, there is a typo on the formula 10.63\n # `degrees_of_freedom_k = degrees_of_freedom_0 + Nk`\n # is the correct formula\n self.degrees_of_freedom_ = self.degrees_of_freedom_prior_ + nk\n\n diff = xk - self.mean_prior_\n self.covariances_ = (\n self.covariance_prior_ + nk * (\n sk + self.mean_precision_prior_ / self.mean_precision_ *\n np.mean(np.square(diff), 1)))\n\n # Contrary to the original bishop book, we normalize the covariances\n self.covariances_ /= self.degrees_of_freedom_\n\n def _m_step(self, X, log_resp):\n \"\"\"M step.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n log_resp : array-like of shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X.\n \"\"\"\n n_samples, _ = X.shape\n\n nk, xk, sk = _estimate_gaussian_parameters(\n X, np.exp(log_resp), self.reg_covar, self.covariance_type)\n self._estimate_weights(nk)\n self._estimate_means(nk, xk)\n self._estimate_precisions(nk, xk, sk)\n\n def _estimate_log_weights(self):\n if self.weight_concentration_prior_type == 'dirichlet_process':\n digamma_sum = digamma(self.weight_concentration_[0] +\n self.weight_concentration_[1])\n digamma_a = digamma(self.weight_concentration_[0])\n digamma_b = digamma(self.weight_concentration_[1])\n return (digamma_a - digamma_sum +\n np.hstack((0, np.cumsum(digamma_b - digamma_sum)[:-1])))\n else:\n # case Variationnal Gaussian mixture with dirichlet distribution\n return (digamma(self.weight_concentration_) -\n digamma(np.sum(self.weight_concentration_)))\n\n def _estimate_log_prob(self, X):\n _, n_features = X.shape\n # We remove `n_features * np.log(self.degrees_of_freedom_)` because\n # the precision matrix is normalized\n log_gauss = (_estimate_log_gaussian_prob(\n X, self.means_, self.precisions_cholesky_, self.covariance_type) -\n .5 * n_features * np.log(self.degrees_of_freedom_))\n\n log_lambda = n_features * np.log(2.) + np.sum(digamma(\n .5 * (self.degrees_of_freedom_ -\n np.arange(0, n_features)[:, np.newaxis])), 0)\n\n return log_gauss + .5 * (log_lambda -\n n_features / self.mean_precision_)\n\n def _compute_lower_bound(self, log_resp, log_prob_norm):\n \"\"\"Estimate the lower bound of the model.\n\n The lower bound on the likelihood (of the training data with respect to\n the model) is used to detect the convergence and has to increase at\n each iteration.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n log_resp : array, shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X.\n\n log_prob_norm : float\n Logarithm of the probability of each sample in X.\n\n Returns\n -------\n lower_bound : float\n \"\"\"\n # Contrary to the original formula, we have done some simplification\n # and removed all the constant terms.\n n_features, = self.mean_prior_.shape\n\n # We removed `.5 * n_features * np.log(self.degrees_of_freedom_)`\n # because the precision matrix is normalized.\n log_det_precisions_chol = (_compute_log_det_cholesky(\n self.precisions_cholesky_, self.covariance_type, n_features) -\n .5 * n_features * np.log(self.degrees_of_freedom_))\n\n if self.covariance_type == 'tied':\n log_wishart = self.n_components * np.float64(_log_wishart_norm(\n self.degrees_of_freedom_, log_det_precisions_chol, n_features))\n else:\n log_wishart = np.sum(_log_wishart_norm(\n self.degrees_of_freedom_, log_det_precisions_chol, n_features))\n\n if self.weight_concentration_prior_type == 'dirichlet_process':\n log_norm_weight = -np.sum(betaln(self.weight_concentration_[0],\n self.weight_concentration_[1]))\n else:\n log_norm_weight = _log_dirichlet_norm(self.weight_concentration_)\n\n return (-np.sum(np.exp(log_resp) * log_resp) -\n log_wishart - log_norm_weight -\n 0.5 * n_features * np.sum(np.log(self.mean_precision_)))\n\n def _get_parameters(self):\n return (self.weight_concentration_,\n self.mean_precision_, self.means_,\n self.degrees_of_freedom_, self.covariances_,\n self.precisions_cholesky_)\n\n def _set_parameters(self, params):\n (self.weight_concentration_, self.mean_precision_, self.means_,\n self.degrees_of_freedom_, self.covariances_,\n self.precisions_cholesky_) = params\n\n # Weights computation\n if self.weight_concentration_prior_type == \"dirichlet_process\":\n weight_dirichlet_sum = (self.weight_concentration_[0] +\n self.weight_concentration_[1])\n tmp = self.weight_concentration_[1] / weight_dirichlet_sum\n self.weights_ = (\n self.weight_concentration_[0] / weight_dirichlet_sum *\n np.hstack((1, np.cumprod(tmp[:-1]))))\n self.weights_ /= np.sum(self.weights_)\n else:\n self. weights_ = (self.weight_concentration_ /\n np.sum(self.weight_concentration_))\n\n # Precisions matrices computation\n if self.covariance_type == 'full':\n self.precisions_ = np.array([\n np.dot(prec_chol, prec_chol.T)\n for prec_chol in self.precisions_cholesky_])\n\n elif self.covariance_type == 'tied':\n self.precisions_ = np.dot(self.precisions_cholesky_,\n self.precisions_cholesky_.T)\n else:\n self.precisions_ = self.precisions_cholesky_ ** 2", + "instance_attributes": [ + { + "name": "covariance_type", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "weight_concentration_prior_type", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "mean_precision_prior_", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "weight_concentration_", + "types": { + "kind": "NamedType", + "name": "tuple" + } + }, + { + "name": "covariances_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "precisions_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture", + "name": "GaussianMixture", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture", + "decorators": [], + "superclasses": ["BaseMixture"], + "methods": [ + "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/__init__", + "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_check_parameters", + "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_initialize", + "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_m_step", + "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_estimate_log_prob", + "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_estimate_log_weights", + "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_compute_lower_bound", + "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_get_parameters", + "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_set_parameters", + "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_n_parameters", + "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/bic", + "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/aic" + ], + "is_public": false, + "reexported_by": [], + "description": "Gaussian Mixture.\n\nRepresentation of a Gaussian mixture model probability distribution.\nThis class allows to estimate the parameters of a Gaussian mixture\ndistribution.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "Gaussian Mixture.\n\nRepresentation of a Gaussian mixture model probability distribution.\nThis class allows to estimate the parameters of a Gaussian mixture\ndistribution.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nn_components : int, default=1\n The number of mixture components.\n\ncovariance_type : {'full', 'tied', 'diag', 'spherical'}, default='full'\n String describing the type of covariance parameters to use.\n Must be one of:\n\n 'full'\n each component has its own general covariance matrix\n 'tied'\n all components share the same general covariance matrix\n 'diag'\n each component has its own diagonal covariance matrix\n 'spherical'\n each component has its own single variance\n\ntol : float, default=1e-3\n The convergence threshold. EM iterations will stop when the\n lower bound average gain is below this threshold.\n\nreg_covar : float, default=1e-6\n Non-negative regularization added to the diagonal of covariance.\n Allows to assure that the covariance matrices are all positive.\n\nmax_iter : int, default=100\n The number of EM iterations to perform.\n\nn_init : int, default=1\n The number of initializations to perform. The best results are kept.\n\ninit_params : {'kmeans', 'random'}, default='kmeans'\n The method used to initialize the weights, the means and the\n precisions.\n Must be one of::\n\n 'kmeans' : responsibilities are initialized using kmeans.\n 'random' : responsibilities are initialized randomly.\n\nweights_init : array-like of shape (n_components, ), default=None\n The user-provided initial weights.\n If it is None, weights are initialized using the `init_params` method.\n\nmeans_init : array-like of shape (n_components, n_features), default=None\n The user-provided initial means,\n If it is None, means are initialized using the `init_params` method.\n\nprecisions_init : array-like, default=None\n The user-provided initial precisions (inverse of the covariance\n matrices).\n If it is None, precisions are initialized using the 'init_params'\n method.\n The shape depends on 'covariance_type'::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the random seed given to the method chosen to initialize the\n parameters (see `init_params`).\n In addition, it controls the generation of random samples from the\n fitted distribution (see the method `sample`).\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nwarm_start : bool, default=False\n If 'warm_start' is True, the solution of the last fitting is used as\n initialization for the next call of fit(). This can speed up\n convergence when fit is called several times on similar problems.\n In that case, 'n_init' is ignored and only a single initialization\n occurs upon the first call.\n See :term:`the Glossary `.\n\nverbose : int, default=0\n Enable verbose output. If 1 then it prints the current\n initialization and each iteration step. If greater than 1 then\n it prints also the log probability and the time needed\n for each step.\n\nverbose_interval : int, default=10\n Number of iteration done before the next print.\n\nAttributes\n----------\nweights_ : array-like of shape (n_components,)\n The weights of each mixture components.\n\nmeans_ : array-like of shape (n_components, n_features)\n The mean of each mixture component.\n\ncovariances_ : array-like\n The covariance of each mixture component.\n The shape depends on `covariance_type`::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\nprecisions_ : array-like\n The precision matrices for each component in the mixture. A precision\n matrix is the inverse of a covariance matrix. A covariance matrix is\n symmetric positive definite so the mixture of Gaussian can be\n equivalently parameterized by the precision matrices. Storing the\n precision matrices instead of the covariance matrices makes it more\n efficient to compute the log-likelihood of new samples at test time.\n The shape depends on `covariance_type`::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\nprecisions_cholesky_ : array-like\n The cholesky decomposition of the precision matrices of each mixture\n component. A precision matrix is the inverse of a covariance matrix.\n A covariance matrix is symmetric positive definite so the mixture of\n Gaussian can be equivalently parameterized by the precision matrices.\n Storing the precision matrices instead of the covariance matrices makes\n it more efficient to compute the log-likelihood of new samples at test\n time. The shape depends on `covariance_type`::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\nconverged_ : bool\n True when convergence was reached in fit(), False otherwise.\n\nn_iter_ : int\n Number of step used by the best fit of EM to reach the convergence.\n\nlower_bound_ : float\n Lower bound value on the log-likelihood (of the training data with\n respect to the model) of the best fit of EM.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.mixture import GaussianMixture\n>>> X = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])\n>>> gm = GaussianMixture(n_components=2, random_state=0).fit(X)\n>>> gm.means_\narray([[10., 2.],\n [ 1., 2.]])\n>>> gm.predict([[0, 0], [12, 3]])\narray([1, 0])\n\nSee Also\n--------\nBayesianGaussianMixture : Gaussian mixture model fit with a variational\n inference.", + "code": "class GaussianMixture(BaseMixture):\n \"\"\"Gaussian Mixture.\n\n Representation of a Gaussian mixture model probability distribution.\n This class allows to estimate the parameters of a Gaussian mixture\n distribution.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n n_components : int, default=1\n The number of mixture components.\n\n covariance_type : {'full', 'tied', 'diag', 'spherical'}, default='full'\n String describing the type of covariance parameters to use.\n Must be one of:\n\n 'full'\n each component has its own general covariance matrix\n 'tied'\n all components share the same general covariance matrix\n 'diag'\n each component has its own diagonal covariance matrix\n 'spherical'\n each component has its own single variance\n\n tol : float, default=1e-3\n The convergence threshold. EM iterations will stop when the\n lower bound average gain is below this threshold.\n\n reg_covar : float, default=1e-6\n Non-negative regularization added to the diagonal of covariance.\n Allows to assure that the covariance matrices are all positive.\n\n max_iter : int, default=100\n The number of EM iterations to perform.\n\n n_init : int, default=1\n The number of initializations to perform. The best results are kept.\n\n init_params : {'kmeans', 'random'}, default='kmeans'\n The method used to initialize the weights, the means and the\n precisions.\n Must be one of::\n\n 'kmeans' : responsibilities are initialized using kmeans.\n 'random' : responsibilities are initialized randomly.\n\n weights_init : array-like of shape (n_components, ), default=None\n The user-provided initial weights.\n If it is None, weights are initialized using the `init_params` method.\n\n means_init : array-like of shape (n_components, n_features), default=None\n The user-provided initial means,\n If it is None, means are initialized using the `init_params` method.\n\n precisions_init : array-like, default=None\n The user-provided initial precisions (inverse of the covariance\n matrices).\n If it is None, precisions are initialized using the 'init_params'\n method.\n The shape depends on 'covariance_type'::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\n random_state : int, RandomState instance or None, default=None\n Controls the random seed given to the method chosen to initialize the\n parameters (see `init_params`).\n In addition, it controls the generation of random samples from the\n fitted distribution (see the method `sample`).\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n warm_start : bool, default=False\n If 'warm_start' is True, the solution of the last fitting is used as\n initialization for the next call of fit(). This can speed up\n convergence when fit is called several times on similar problems.\n In that case, 'n_init' is ignored and only a single initialization\n occurs upon the first call.\n See :term:`the Glossary `.\n\n verbose : int, default=0\n Enable verbose output. If 1 then it prints the current\n initialization and each iteration step. If greater than 1 then\n it prints also the log probability and the time needed\n for each step.\n\n verbose_interval : int, default=10\n Number of iteration done before the next print.\n\n Attributes\n ----------\n weights_ : array-like of shape (n_components,)\n The weights of each mixture components.\n\n means_ : array-like of shape (n_components, n_features)\n The mean of each mixture component.\n\n covariances_ : array-like\n The covariance of each mixture component.\n The shape depends on `covariance_type`::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\n precisions_ : array-like\n The precision matrices for each component in the mixture. A precision\n matrix is the inverse of a covariance matrix. A covariance matrix is\n symmetric positive definite so the mixture of Gaussian can be\n equivalently parameterized by the precision matrices. Storing the\n precision matrices instead of the covariance matrices makes it more\n efficient to compute the log-likelihood of new samples at test time.\n The shape depends on `covariance_type`::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\n precisions_cholesky_ : array-like\n The cholesky decomposition of the precision matrices of each mixture\n component. A precision matrix is the inverse of a covariance matrix.\n A covariance matrix is symmetric positive definite so the mixture of\n Gaussian can be equivalently parameterized by the precision matrices.\n Storing the precision matrices instead of the covariance matrices makes\n it more efficient to compute the log-likelihood of new samples at test\n time. The shape depends on `covariance_type`::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'\n\n converged_ : bool\n True when convergence was reached in fit(), False otherwise.\n\n n_iter_ : int\n Number of step used by the best fit of EM to reach the convergence.\n\n lower_bound_ : float\n Lower bound value on the log-likelihood (of the training data with\n respect to the model) of the best fit of EM.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.mixture import GaussianMixture\n >>> X = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])\n >>> gm = GaussianMixture(n_components=2, random_state=0).fit(X)\n >>> gm.means_\n array([[10., 2.],\n [ 1., 2.]])\n >>> gm.predict([[0, 0], [12, 3]])\n array([1, 0])\n\n See Also\n --------\n BayesianGaussianMixture : Gaussian mixture model fit with a variational\n inference.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_components=1, *, covariance_type='full', tol=1e-3,\n reg_covar=1e-6, max_iter=100, n_init=1, init_params='kmeans',\n weights_init=None, means_init=None, precisions_init=None,\n random_state=None, warm_start=False,\n verbose=0, verbose_interval=10):\n super().__init__(\n n_components=n_components, tol=tol, reg_covar=reg_covar,\n max_iter=max_iter, n_init=n_init, init_params=init_params,\n random_state=random_state, warm_start=warm_start,\n verbose=verbose, verbose_interval=verbose_interval)\n\n self.covariance_type = covariance_type\n self.weights_init = weights_init\n self.means_init = means_init\n self.precisions_init = precisions_init\n\n def _check_parameters(self, X):\n \"\"\"Check the Gaussian mixture parameters are well defined.\"\"\"\n _, n_features = X.shape\n if self.covariance_type not in ['spherical', 'tied', 'diag', 'full']:\n raise ValueError(\"Invalid value for 'covariance_type': %s \"\n \"'covariance_type' should be in \"\n \"['spherical', 'tied', 'diag', 'full']\"\n % self.covariance_type)\n\n if self.weights_init is not None:\n self.weights_init = _check_weights(self.weights_init,\n self.n_components)\n\n if self.means_init is not None:\n self.means_init = _check_means(self.means_init,\n self.n_components, n_features)\n\n if self.precisions_init is not None:\n self.precisions_init = _check_precisions(self.precisions_init,\n self.covariance_type,\n self.n_components,\n n_features)\n\n def _initialize(self, X, resp):\n \"\"\"Initialization of the Gaussian mixture parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n resp : array-like of shape (n_samples, n_components)\n \"\"\"\n n_samples, _ = X.shape\n\n weights, means, covariances = _estimate_gaussian_parameters(\n X, resp, self.reg_covar, self.covariance_type)\n weights /= n_samples\n\n self.weights_ = (weights if self.weights_init is None\n else self.weights_init)\n self.means_ = means if self.means_init is None else self.means_init\n\n if self.precisions_init is None:\n self.covariances_ = covariances\n self.precisions_cholesky_ = _compute_precision_cholesky(\n covariances, self.covariance_type)\n elif self.covariance_type == 'full':\n self.precisions_cholesky_ = np.array(\n [linalg.cholesky(prec_init, lower=True)\n for prec_init in self.precisions_init])\n elif self.covariance_type == 'tied':\n self.precisions_cholesky_ = linalg.cholesky(self.precisions_init,\n lower=True)\n else:\n self.precisions_cholesky_ = self.precisions_init\n\n def _m_step(self, X, log_resp):\n \"\"\"M step.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n log_resp : array-like of shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X.\n \"\"\"\n n_samples, _ = X.shape\n self.weights_, self.means_, self.covariances_ = (\n _estimate_gaussian_parameters(X, np.exp(log_resp), self.reg_covar,\n self.covariance_type))\n self.weights_ /= n_samples\n self.precisions_cholesky_ = _compute_precision_cholesky(\n self.covariances_, self.covariance_type)\n\n def _estimate_log_prob(self, X):\n return _estimate_log_gaussian_prob(\n X, self.means_, self.precisions_cholesky_, self.covariance_type)\n\n def _estimate_log_weights(self):\n return np.log(self.weights_)\n\n def _compute_lower_bound(self, _, log_prob_norm):\n return log_prob_norm\n\n def _get_parameters(self):\n return (self.weights_, self.means_, self.covariances_,\n self.precisions_cholesky_)\n\n def _set_parameters(self, params):\n (self.weights_, self.means_, self.covariances_,\n self.precisions_cholesky_) = params\n\n # Attributes computation\n _, n_features = self.means_.shape\n\n if self.covariance_type == 'full':\n self.precisions_ = np.empty(self.precisions_cholesky_.shape)\n for k, prec_chol in enumerate(self.precisions_cholesky_):\n self.precisions_[k] = np.dot(prec_chol, prec_chol.T)\n\n elif self.covariance_type == 'tied':\n self.precisions_ = np.dot(self.precisions_cholesky_,\n self.precisions_cholesky_.T)\n else:\n self.precisions_ = self.precisions_cholesky_ ** 2\n\n def _n_parameters(self):\n \"\"\"Return the number of free parameters in the model.\"\"\"\n _, n_features = self.means_.shape\n if self.covariance_type == 'full':\n cov_params = self.n_components * n_features * (n_features + 1) / 2.\n elif self.covariance_type == 'diag':\n cov_params = self.n_components * n_features\n elif self.covariance_type == 'tied':\n cov_params = n_features * (n_features + 1) / 2.\n elif self.covariance_type == 'spherical':\n cov_params = self.n_components\n mean_params = n_features * self.n_components\n return int(cov_params + mean_params + self.n_components - 1)\n\n def bic(self, X):\n \"\"\"Bayesian information criterion for the current model on the input X.\n\n Parameters\n ----------\n X : array of shape (n_samples, n_dimensions)\n\n Returns\n -------\n bic : float\n The lower the better.\n \"\"\"\n return (-2 * self.score(X) * X.shape[0] +\n self._n_parameters() * np.log(X.shape[0]))\n\n def aic(self, X):\n \"\"\"Akaike information criterion for the current model on the input X.\n\n Parameters\n ----------\n X : array of shape (n_samples, n_dimensions)\n\n Returns\n -------\n aic : float\n The lower the better.\n \"\"\"\n return -2 * self.score(X) * X.shape[0] + 2 * self._n_parameters()", + "instance_attributes": [ + { + "name": "covariance_type", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "precisions_cholesky_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "precisions_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV", + "name": "BaseSearchCV", + "qname": "sklearn.model_selection._search.BaseSearchCV", + "decorators": [], + "superclasses": ["MetaEstimatorMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.model_selection._search/BaseSearchCV/__init__", + "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_estimator_type@getter", + "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_more_tags", + "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_pairwise@getter", + "scikit-learn/sklearn.model_selection._search/BaseSearchCV/score", + "scikit-learn/sklearn.model_selection._search/BaseSearchCV/score_samples", + "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_check_is_fitted", + "scikit-learn/sklearn.model_selection._search/BaseSearchCV/predict", + "scikit-learn/sklearn.model_selection._search/BaseSearchCV/predict_proba", + "scikit-learn/sklearn.model_selection._search/BaseSearchCV/predict_log_proba", + "scikit-learn/sklearn.model_selection._search/BaseSearchCV/decision_function", + "scikit-learn/sklearn.model_selection._search/BaseSearchCV/transform", + "scikit-learn/sklearn.model_selection._search/BaseSearchCV/inverse_transform", + "scikit-learn/sklearn.model_selection._search/BaseSearchCV/n_features_in_@getter", + "scikit-learn/sklearn.model_selection._search/BaseSearchCV/classes_@getter", + "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_run_search", + "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_check_refit_for_multimetric", + "scikit-learn/sklearn.model_selection._search/BaseSearchCV/fit", + "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_format_results" + ], + "is_public": false, + "reexported_by": [], + "description": "Abstract base class for hyper parameter search with cross-validation.", + "docstring": "Abstract base class for hyper parameter search with cross-validation.\n ", + "code": "class BaseSearchCV(MetaEstimatorMixin, BaseEstimator, metaclass=ABCMeta):\n \"\"\"Abstract base class for hyper parameter search with cross-validation.\n \"\"\"\n\n @abstractmethod\n @_deprecate_positional_args\n def __init__(self, estimator, *, scoring=None, n_jobs=None,\n refit=True, cv=None, verbose=0,\n pre_dispatch='2*n_jobs', error_score=np.nan,\n return_train_score=True):\n\n self.scoring = scoring\n self.estimator = estimator\n self.n_jobs = n_jobs\n self.refit = refit\n self.cv = cv\n self.verbose = verbose\n self.pre_dispatch = pre_dispatch\n self.error_score = error_score\n self.return_train_score = return_train_score\n\n @property\n def _estimator_type(self):\n return self.estimator._estimator_type\n\n def _more_tags(self):\n # allows cross-validation to see 'precomputed' metrics\n return {\n 'pairwise': _safe_tags(self.estimator, \"pairwise\"),\n \"_xfail_checks\": {\"check_supervised_y_2d\":\n \"DataConversionWarning not caught\"},\n }\n\n # TODO: Remove in 1.1\n # mypy error: Decorated property not supported\n @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n # allows cross-validation to see 'precomputed' metrics\n return getattr(self.estimator, '_pairwise', False)\n\n def score(self, X, y=None):\n \"\"\"Returns the score on the given data, if the estimator has been refit.\n\n This uses the score defined by ``scoring`` where provided, and the\n ``best_estimator_.score`` method otherwise.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Input data, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples, n_output) \\\n or (n_samples,), default=None\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\n Returns\n -------\n score : float\n \"\"\"\n self._check_is_fitted('score')\n if self.scorer_ is None:\n raise ValueError(\"No score function explicitly defined, \"\n \"and the estimator doesn't provide one %s\"\n % self.best_estimator_)\n if isinstance(self.scorer_, dict):\n if self.multimetric_:\n scorer = self.scorer_[self.refit]\n else:\n scorer = self.scorer_\n return scorer(self.best_estimator_, X, y)\n\n # callable\n score = self.scorer_(self.best_estimator_, X, y)\n if self.multimetric_:\n score = score[self.refit]\n return score\n\n @if_delegate_has_method(delegate=('best_estimator_', 'estimator'))\n def score_samples(self, X):\n \"\"\"Call score_samples on the estimator with the best found parameters.\n\n Only available if ``refit=True`` and the underlying estimator supports\n ``score_samples``.\n\n .. versionadded:: 0.24\n\n Parameters\n ----------\n X : iterable\n Data to predict on. Must fulfill input requirements\n of the underlying estimator.\n\n Returns\n -------\n y_score : ndarray of shape (n_samples,)\n \"\"\"\n self._check_is_fitted('score_samples')\n return self.best_estimator_.score_samples(X)\n\n def _check_is_fitted(self, method_name):\n if not self.refit:\n raise NotFittedError('This %s instance was initialized '\n 'with refit=False. %s is '\n 'available only after refitting on the best '\n 'parameters. You can refit an estimator '\n 'manually using the ``best_params_`` '\n 'attribute'\n % (type(self).__name__, method_name))\n else:\n check_is_fitted(self)\n\n @if_delegate_has_method(delegate=('best_estimator_', 'estimator'))\n def predict(self, X):\n \"\"\"Call predict on the estimator with the best found parameters.\n\n Only available if ``refit=True`` and the underlying estimator supports\n ``predict``.\n\n Parameters\n ----------\n X : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator.\n\n \"\"\"\n self._check_is_fitted('predict')\n return self.best_estimator_.predict(X)\n\n @if_delegate_has_method(delegate=('best_estimator_', 'estimator'))\n def predict_proba(self, X):\n \"\"\"Call predict_proba on the estimator with the best found parameters.\n\n Only available if ``refit=True`` and the underlying estimator supports\n ``predict_proba``.\n\n Parameters\n ----------\n X : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator.\n\n \"\"\"\n self._check_is_fitted('predict_proba')\n return self.best_estimator_.predict_proba(X)\n\n @if_delegate_has_method(delegate=('best_estimator_', 'estimator'))\n def predict_log_proba(self, X):\n \"\"\"Call predict_log_proba on the estimator with the best found parameters.\n\n Only available if ``refit=True`` and the underlying estimator supports\n ``predict_log_proba``.\n\n Parameters\n ----------\n X : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator.\n\n \"\"\"\n self._check_is_fitted('predict_log_proba')\n return self.best_estimator_.predict_log_proba(X)\n\n @if_delegate_has_method(delegate=('best_estimator_', 'estimator'))\n def decision_function(self, X):\n \"\"\"Call decision_function on the estimator with the best found parameters.\n\n Only available if ``refit=True`` and the underlying estimator supports\n ``decision_function``.\n\n Parameters\n ----------\n X : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator.\n\n \"\"\"\n self._check_is_fitted('decision_function')\n return self.best_estimator_.decision_function(X)\n\n @if_delegate_has_method(delegate=('best_estimator_', 'estimator'))\n def transform(self, X):\n \"\"\"Call transform on the estimator with the best found parameters.\n\n Only available if the underlying estimator supports ``transform`` and\n ``refit=True``.\n\n Parameters\n ----------\n X : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator.\n\n \"\"\"\n self._check_is_fitted('transform')\n return self.best_estimator_.transform(X)\n\n @if_delegate_has_method(delegate=('best_estimator_', 'estimator'))\n def inverse_transform(self, Xt):\n \"\"\"Call inverse_transform on the estimator with the best found params.\n\n Only available if the underlying estimator implements\n ``inverse_transform`` and ``refit=True``.\n\n Parameters\n ----------\n Xt : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator.\n\n \"\"\"\n self._check_is_fitted('inverse_transform')\n return self.best_estimator_.inverse_transform(Xt)\n\n @property\n def n_features_in_(self):\n # For consistency with other estimators we raise a AttributeError so\n # that hasattr() fails if the search estimator isn't fitted.\n try:\n check_is_fitted(self)\n except NotFittedError as nfe:\n raise AttributeError(\n \"{} object has no n_features_in_ attribute.\"\n .format(self.__class__.__name__)\n ) from nfe\n\n return self.best_estimator_.n_features_in_\n\n @property\n def classes_(self):\n self._check_is_fitted(\"classes_\")\n return self.best_estimator_.classes_\n\n def _run_search(self, evaluate_candidates):\n \"\"\"Repeatedly calls `evaluate_candidates` to conduct a search.\n\n This method, implemented in sub-classes, makes it possible to\n customize the the scheduling of evaluations: GridSearchCV and\n RandomizedSearchCV schedule evaluations for their whole parameter\n search space at once but other more sequential approaches are also\n possible: for instance is possible to iteratively schedule evaluations\n for new regions of the parameter search space based on previously\n collected evaluation results. This makes it possible to implement\n Bayesian optimization or more generally sequential model-based\n optimization by deriving from the BaseSearchCV abstract base class.\n For example, Successive Halving is implemented by calling\n `evaluate_candidates` multiples times (once per iteration of the SH\n process), each time passing a different set of candidates with `X`\n and `y` of increasing sizes.\n\n Parameters\n ----------\n evaluate_candidates : callable\n This callback accepts:\n - a list of candidates, where each candidate is a dict of\n parameter settings.\n - an optional `cv` parameter which can be used to e.g.\n evaluate candidates on different dataset splits, or\n evaluate candidates on subsampled data (as done in the\n SucessiveHaling estimators). By default, the original `cv`\n parameter is used, and it is available as a private\n `_checked_cv_orig` attribute.\n - an optional `more_results` dict. Each key will be added to\n the `cv_results_` attribute. Values should be lists of\n length `n_candidates`\n\n It returns a dict of all results so far, formatted like\n ``cv_results_``.\n\n Important note (relevant whether the default cv is used or not):\n in randomized splitters, and unless the random_state parameter of\n cv was set to an int, calling cv.split() multiple times will\n yield different splits. Since cv.split() is called in\n evaluate_candidates, this means that candidates will be evaluated\n on different splits each time evaluate_candidates is called. This\n might be a methodological issue depending on the search strategy\n that you're implementing. To prevent randomized splitters from\n being used, you may use _split._yields_constant_splits()\n\n Examples\n --------\n\n ::\n\n def _run_search(self, evaluate_candidates):\n 'Try C=0.1 only if C=1 is better than C=10'\n all_results = evaluate_candidates([{'C': 1}, {'C': 10}])\n score = all_results['mean_test_score']\n if score[0] < score[1]:\n evaluate_candidates([{'C': 0.1}])\n \"\"\"\n raise NotImplementedError(\"_run_search not implemented.\")\n\n def _check_refit_for_multimetric(self, scores):\n \"\"\"Check `refit` is compatible with `scores` is valid\"\"\"\n multimetric_refit_msg = (\n \"For multi-metric scoring, the parameter refit must be set to a \"\n \"scorer key or a callable to refit an estimator with the best \"\n \"parameter setting on the whole data and make the best_* \"\n \"attributes available for that metric. If this is not needed, \"\n f\"refit should be set to False explicitly. {self.refit!r} was \"\n \"passed.\")\n\n valid_refit_dict = (isinstance(self.refit, str) and\n self.refit in scores)\n\n if (self.refit is not False and not valid_refit_dict\n and not callable(self.refit)):\n raise ValueError(multimetric_refit_msg)\n\n @_deprecate_positional_args\n def fit(self, X, y=None, *, groups=None, **fit_params):\n \"\"\"Run fit with all sets of parameters.\n\n Parameters\n ----------\n\n X : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples, n_output) \\\n or (n_samples,), default=None\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\n groups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n **fit_params : dict of str -> object\n Parameters passed to the ``fit`` method of the estimator\n \"\"\"\n estimator = self.estimator\n refit_metric = \"score\"\n\n if callable(self.scoring):\n scorers = self.scoring\n elif self.scoring is None or isinstance(self.scoring, str):\n scorers = check_scoring(self.estimator, self.scoring)\n else:\n scorers = _check_multimetric_scoring(self.estimator, self.scoring)\n self._check_refit_for_multimetric(scorers)\n refit_metric = self.refit\n\n X, y, groups = indexable(X, y, groups)\n fit_params = _check_fit_params(X, fit_params)\n\n cv_orig = check_cv(self.cv, y, classifier=is_classifier(estimator))\n n_splits = cv_orig.get_n_splits(X, y, groups)\n\n base_estimator = clone(self.estimator)\n\n parallel = Parallel(n_jobs=self.n_jobs,\n pre_dispatch=self.pre_dispatch)\n\n fit_and_score_kwargs = dict(scorer=scorers,\n fit_params=fit_params,\n return_train_score=self.return_train_score,\n return_n_test_samples=True,\n return_times=True,\n return_parameters=False,\n error_score=self.error_score,\n verbose=self.verbose)\n results = {}\n with parallel:\n all_candidate_params = []\n all_out = []\n all_more_results = defaultdict(list)\n\n def evaluate_candidates(candidate_params, cv=None,\n more_results=None):\n cv = cv or cv_orig\n candidate_params = list(candidate_params)\n n_candidates = len(candidate_params)\n\n if self.verbose > 0:\n print(\"Fitting {0} folds for each of {1} candidates,\"\n \" totalling {2} fits\".format(\n n_splits, n_candidates, n_candidates * n_splits))\n\n out = parallel(delayed(_fit_and_score)(clone(base_estimator),\n X, y,\n train=train, test=test,\n parameters=parameters,\n split_progress=(\n split_idx,\n n_splits),\n candidate_progress=(\n cand_idx,\n n_candidates),\n **fit_and_score_kwargs)\n for (cand_idx, parameters),\n (split_idx, (train, test)) in product(\n enumerate(candidate_params),\n enumerate(cv.split(X, y, groups))))\n\n if len(out) < 1:\n raise ValueError('No fits were performed. '\n 'Was the CV iterator empty? '\n 'Were there no candidates?')\n elif len(out) != n_candidates * n_splits:\n raise ValueError('cv.split and cv.get_n_splits returned '\n 'inconsistent results. Expected {} '\n 'splits, got {}'\n .format(n_splits,\n len(out) // n_candidates))\n\n # For callable self.scoring, the return type is only know after\n # calling. If the return type is a dictionary, the error scores\n # can now be inserted with the correct key. The type checking\n # of out will be done in `_insert_error_scores`.\n if callable(self.scoring):\n _insert_error_scores(out, self.error_score)\n all_candidate_params.extend(candidate_params)\n all_out.extend(out)\n if more_results is not None:\n for key, value in more_results.items():\n all_more_results[key].extend(value)\n\n nonlocal results\n results = self._format_results(\n all_candidate_params, n_splits, all_out,\n all_more_results)\n\n return results\n\n self._run_search(evaluate_candidates)\n\n # multimetric is determined here because in the case of a callable\n # self.scoring the return type is only known after calling\n first_test_score = all_out[0]['test_scores']\n self.multimetric_ = isinstance(first_test_score, dict)\n\n # check refit_metric now for a callabe scorer that is multimetric\n if callable(self.scoring) and self.multimetric_:\n self._check_refit_for_multimetric(first_test_score)\n refit_metric = self.refit\n\n # For multi-metric evaluation, store the best_index_, best_params_ and\n # best_score_ iff refit is one of the scorer names\n # In single metric evaluation, refit_metric is \"score\"\n if self.refit or not self.multimetric_:\n # If callable, refit is expected to return the index of the best\n # parameter set.\n if callable(self.refit):\n self.best_index_ = self.refit(results)\n if not isinstance(self.best_index_, numbers.Integral):\n raise TypeError('best_index_ returned is not an integer')\n if (self.best_index_ < 0 or\n self.best_index_ >= len(results[\"params\"])):\n raise IndexError('best_index_ index out of range')\n else:\n self.best_index_ = results[\"rank_test_%s\"\n % refit_metric].argmin()\n self.best_score_ = results[\"mean_test_%s\" % refit_metric][\n self.best_index_]\n self.best_params_ = results[\"params\"][self.best_index_]\n\n if self.refit:\n # we clone again after setting params in case some\n # of the params are estimators as well.\n self.best_estimator_ = clone(clone(base_estimator).set_params(\n **self.best_params_))\n refit_start_time = time.time()\n if y is not None:\n self.best_estimator_.fit(X, y, **fit_params)\n else:\n self.best_estimator_.fit(X, **fit_params)\n refit_end_time = time.time()\n self.refit_time_ = refit_end_time - refit_start_time\n\n # Store the only scorer not as a dict for single metric evaluation\n self.scorer_ = scorers\n\n self.cv_results_ = results\n self.n_splits_ = n_splits\n\n return self\n\n def _format_results(self, candidate_params, n_splits, out,\n more_results=None):\n n_candidates = len(candidate_params)\n out = _aggregate_score_dicts(out)\n\n results = dict(more_results or {})\n for key, val in results.items():\n # each value is a list (as per evaluate_candidate's convention)\n # we convert it to an array for consistency with the other keys\n results[key] = np.asarray(val)\n\n def _store(key_name, array, weights=None, splits=False, rank=False):\n \"\"\"A small helper to store the scores/times to the cv_results_\"\"\"\n # When iterated first by splits, then by parameters\n # We want `array` to have `n_candidates` rows and `n_splits` cols.\n array = np.array(array, dtype=np.float64).reshape(n_candidates,\n n_splits)\n if splits:\n for split_idx in range(n_splits):\n # Uses closure to alter the results\n results[\"split%d_%s\"\n % (split_idx, key_name)] = array[:, split_idx]\n\n array_means = np.average(array, axis=1, weights=weights)\n results['mean_%s' % key_name] = array_means\n\n if (key_name.startswith((\"train_\", \"test_\")) and\n np.any(~np.isfinite(array_means))):\n warnings.warn(\n f\"One or more of the {key_name.split('_')[0]} scores \"\n f\"are non-finite: {array_means}\",\n category=UserWarning\n )\n\n # Weighted std is not directly available in numpy\n array_stds = np.sqrt(np.average((array -\n array_means[:, np.newaxis]) ** 2,\n axis=1, weights=weights))\n results['std_%s' % key_name] = array_stds\n\n if rank:\n results[\"rank_%s\" % key_name] = np.asarray(\n rankdata(-array_means, method='min'), dtype=np.int32)\n\n _store('fit_time', out[\"fit_time\"])\n _store('score_time', out[\"score_time\"])\n # Use one MaskedArray and mask all the places where the param is not\n # applicable for that candidate. Use defaultdict as each candidate may\n # not contain all the params\n param_results = defaultdict(partial(MaskedArray,\n np.empty(n_candidates,),\n mask=True,\n dtype=object))\n for cand_idx, params in enumerate(candidate_params):\n for name, value in params.items():\n # An all masked empty array gets created for the key\n # `\"param_%s\" % name` at the first occurrence of `name`.\n # Setting the value at an index also unmasks that index\n param_results[\"param_%s\" % name][cand_idx] = value\n\n results.update(param_results)\n # Store a list of param dicts at the key 'params'\n results['params'] = candidate_params\n\n test_scores_dict = _normalize_score_results(out[\"test_scores\"])\n if self.return_train_score:\n train_scores_dict = _normalize_score_results(out[\"train_scores\"])\n\n for scorer_name in test_scores_dict:\n # Computed the (weighted) mean and std for test scores alone\n _store('test_%s' % scorer_name, test_scores_dict[scorer_name],\n splits=True, rank=True,\n weights=None)\n if self.return_train_score:\n _store('train_%s' % scorer_name,\n train_scores_dict[scorer_name],\n splits=True)\n\n return results", + "instance_attributes": [ + { + "name": "refit", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "pre_dispatch", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "return_train_score", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "cv_results_", + "types": { + "kind": "NamedType", + "name": "dict" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.model_selection._search/GridSearchCV", + "name": "GridSearchCV", + "qname": "sklearn.model_selection._search.GridSearchCV", + "decorators": [], + "superclasses": ["BaseSearchCV"], + "methods": [ + "scikit-learn/sklearn.model_selection._search/GridSearchCV/__init__", + "scikit-learn/sklearn.model_selection._search/GridSearchCV/_run_search" + ], + "is_public": false, + "reexported_by": [], + "description": "Exhaustive search over specified parameter values for an estimator.\n\nImportant members are fit, predict.\n\nGridSearchCV implements a \"fit\" and a \"score\" method.\nIt also implements \"score_samples\", \"predict\", \"predict_proba\",\n\"decision_function\", \"transform\" and \"inverse_transform\" if they are\nimplemented in the estimator used.\n\nThe parameters of the estimator used to apply these methods are optimized\nby cross-validated grid-search over a parameter grid.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Exhaustive search over specified parameter values for an estimator.\n\nImportant members are fit, predict.\n\nGridSearchCV implements a \"fit\" and a \"score\" method.\nIt also implements \"score_samples\", \"predict\", \"predict_proba\",\n\"decision_function\", \"transform\" and \"inverse_transform\" if they are\nimplemented in the estimator used.\n\nThe parameters of the estimator used to apply these methods are optimized\nby cross-validated grid-search over a parameter grid.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object.\n This is assumed to implement the scikit-learn estimator interface.\n Either estimator needs to provide a ``score`` function,\n or ``scoring`` must be passed.\n\nparam_grid : dict or list of dictionaries\n Dictionary with parameters names (`str`) as keys and lists of\n parameter settings to try as values, or a list of such\n dictionaries, in which case the grids spanned by each dictionary\n in the list are explored. This enables searching over any sequence\n of parameter settings.\n\nscoring : str, callable, list, tuple or dict, default=None\n Strategy to evaluate the performance of the cross-validated model on\n the test set.\n\n If `scoring` represents a single score, one can use:\n\n - a single string (see :ref:`scoring_parameter`);\n - a callable (see :ref:`scoring`) that returns a single value.\n\n If `scoring` represents multiple scores, one can use:\n\n - a list or tuple of unique strings;\n - a callable returning a dictionary where the keys are the metric\n names and the values are the metric scores;\n - a dictionary with metric names as keys and callables a values.\n\n See :ref:`multimetric_grid_search` for an example.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\nrefit : bool, str, or callable, default=True\n Refit an estimator using the best found parameters on the whole\n dataset.\n\n For multiple metric evaluation, this needs to be a `str` denoting the\n scorer that would be used to find the best parameters for refitting\n the estimator at the end.\n\n Where there are considerations other than maximum score in\n choosing a best estimator, ``refit`` can be set to a function which\n returns the selected ``best_index_`` given ``cv_results_``. In that\n case, the ``best_estimator_`` and ``best_params_`` will be set\n according to the returned ``best_index_`` while the ``best_score_``\n attribute will not be available.\n\n The refitted estimator is made available at the ``best_estimator_``\n attribute and permits using ``predict`` directly on this\n ``GridSearchCV`` instance.\n\n Also for multiple metric evaluation, the attributes ``best_index_``,\n ``best_score_`` and ``best_params_`` will only be available if\n ``refit`` is set and all of them will be determined w.r.t this specific\n scorer.\n\n See ``scoring`` parameter to know more about multiple metric\n evaluation.\n\n .. versionchanged:: 0.20\n Support for callable added.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - integer, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used. These splitters are instantiated\n with `shuffle=False` so the splits will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nverbose : int\n Controls the verbosity: the higher, the more messages.\n\n - >1 : the computation time for each fold and parameter candidate is\n displayed;\n - >2 : the score is also displayed;\n - >3 : the fold and candidate parameter indexes are also displayed\n together with the starting time of the computation.\n\npre_dispatch : int, or str, default=n_jobs\n Controls the number of jobs that get dispatched during parallel\n execution. Reducing this number can be useful to avoid an\n explosion of memory consumption when more jobs get dispatched\n than CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'\n\nerror_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised. If a numeric value is given,\n FitFailedWarning is raised. This parameter does not affect the refit\n step, which will always raise the error.\n\nreturn_train_score : bool, default=False\n If ``False``, the ``cv_results_`` attribute will not include training\n scores.\n Computing training scores is used to get insights on how different\n parameter settings impact the overfitting/underfitting trade-off.\n However computing the scores on the training set can be computationally\n expensive and is not strictly required to select the parameters that\n yield the best generalization performance.\n\n .. versionadded:: 0.19\n\n .. versionchanged:: 0.21\n Default value was changed from ``True`` to ``False``\n\n\nExamples\n--------\n>>> from sklearn import svm, datasets\n>>> from sklearn.model_selection import GridSearchCV\n>>> iris = datasets.load_iris()\n>>> parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}\n>>> svc = svm.SVC()\n>>> clf = GridSearchCV(svc, parameters)\n>>> clf.fit(iris.data, iris.target)\nGridSearchCV(estimator=SVC(),\n param_grid={'C': [1, 10], 'kernel': ('linear', 'rbf')})\n>>> sorted(clf.cv_results_.keys())\n['mean_fit_time', 'mean_score_time', 'mean_test_score',...\n 'param_C', 'param_kernel', 'params',...\n 'rank_test_score', 'split0_test_score',...\n 'split2_test_score', ...\n 'std_fit_time', 'std_score_time', 'std_test_score']\n\nAttributes\n----------\ncv_results_ : dict of numpy (masked) ndarrays\n A dict with keys as column headers and values as columns, that can be\n imported into a pandas ``DataFrame``.\n\n For instance the below given table\n\n +------------+-----------+------------+-----------------+---+---------+\n |param_kernel|param_gamma|param_degree|split0_test_score|...|rank_t...|\n +============+===========+============+=================+===+=========+\n | 'poly' | -- | 2 | 0.80 |...| 2 |\n +------------+-----------+------------+-----------------+---+---------+\n | 'poly' | -- | 3 | 0.70 |...| 4 |\n +------------+-----------+------------+-----------------+---+---------+\n | 'rbf' | 0.1 | -- | 0.80 |...| 3 |\n +------------+-----------+------------+-----------------+---+---------+\n | 'rbf' | 0.2 | -- | 0.93 |...| 1 |\n +------------+-----------+------------+-----------------+---+---------+\n\n will be represented by a ``cv_results_`` dict of::\n\n {\n 'param_kernel': masked_array(data = ['poly', 'poly', 'rbf', 'rbf'],\n mask = [False False False False]...)\n 'param_gamma': masked_array(data = [-- -- 0.1 0.2],\n mask = [ True True False False]...),\n 'param_degree': masked_array(data = [2.0 3.0 -- --],\n mask = [False False True True]...),\n 'split0_test_score' : [0.80, 0.70, 0.80, 0.93],\n 'split1_test_score' : [0.82, 0.50, 0.70, 0.78],\n 'mean_test_score' : [0.81, 0.60, 0.75, 0.85],\n 'std_test_score' : [0.01, 0.10, 0.05, 0.08],\n 'rank_test_score' : [2, 4, 3, 1],\n 'split0_train_score' : [0.80, 0.92, 0.70, 0.93],\n 'split1_train_score' : [0.82, 0.55, 0.70, 0.87],\n 'mean_train_score' : [0.81, 0.74, 0.70, 0.90],\n 'std_train_score' : [0.01, 0.19, 0.00, 0.03],\n 'mean_fit_time' : [0.73, 0.63, 0.43, 0.49],\n 'std_fit_time' : [0.01, 0.02, 0.01, 0.01],\n 'mean_score_time' : [0.01, 0.06, 0.04, 0.04],\n 'std_score_time' : [0.00, 0.00, 0.00, 0.01],\n 'params' : [{'kernel': 'poly', 'degree': 2}, ...],\n }\n\n NOTE\n\n The key ``'params'`` is used to store a list of parameter\n settings dicts for all the parameter candidates.\n\n The ``mean_fit_time``, ``std_fit_time``, ``mean_score_time`` and\n ``std_score_time`` are all in seconds.\n\n For multi-metric evaluation, the scores for all the scorers are\n available in the ``cv_results_`` dict at the keys ending with that\n scorer's name (``'_'``) instead of ``'_score'`` shown\n above. ('split0_test_precision', 'mean_train_precision' etc.)\n\nbest_estimator_ : estimator\n Estimator that was chosen by the search, i.e. estimator\n which gave highest score (or smallest loss if specified)\n on the left out data. Not available if ``refit=False``.\n\n See ``refit`` parameter for more information on allowed values.\n\nbest_score_ : float\n Mean cross-validated score of the best_estimator\n\n For multi-metric evaluation, this is present only if ``refit`` is\n specified.\n\n This attribute is not available if ``refit`` is a function.\n\nbest_params_ : dict\n Parameter setting that gave the best results on the hold out data.\n\n For multi-metric evaluation, this is present only if ``refit`` is\n specified.\n\nbest_index_ : int\n The index (of the ``cv_results_`` arrays) which corresponds to the best\n candidate parameter setting.\n\n The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n the parameter setting for the best model, that gives the highest\n mean score (``search.best_score_``).\n\n For multi-metric evaluation, this is present only if ``refit`` is\n specified.\n\nscorer_ : function or a dict\n Scorer function used on the held out data to choose the best\n parameters for the model.\n\n For multi-metric evaluation, this attribute holds the validated\n ``scoring`` dict which maps the scorer key to the scorer callable.\n\nn_splits_ : int\n The number of cross-validation splits (folds/iterations).\n\nrefit_time_ : float\n Seconds used for refitting the best model on the whole dataset.\n\n This is present only if ``refit`` is not False.\n\n .. versionadded:: 0.20\n\nmultimetric_ : bool\n Whether or not the scorers compute several metrics.\n\nNotes\n-----\nThe parameters selected are those that maximize the score of the left out\ndata, unless an explicit score is passed in which case it is used instead.\n\nIf `n_jobs` was set to a value higher than one, the data is copied for each\npoint in the grid (and not `n_jobs` times). This is done for efficiency\nreasons if individual jobs take very little time, but may raise errors if\nthe dataset is large and not enough memory is available. A workaround in\nthis case is to set `pre_dispatch`. Then, the memory is copied only\n`pre_dispatch` many times. A reasonable value for `pre_dispatch` is `2 *\nn_jobs`.\n\nSee Also\n---------\nParameterGrid : Generates all the combinations of a hyperparameter grid.\ntrain_test_split : Utility function to split the data into a development\n set usable for fitting a GridSearchCV instance and an evaluation set\n for its final evaluation.\nsklearn.metrics.make_scorer : Make a scorer from a performance metric or\n loss function.", + "code": "class GridSearchCV(BaseSearchCV):\n \"\"\"Exhaustive search over specified parameter values for an estimator.\n\n Important members are fit, predict.\n\n GridSearchCV implements a \"fit\" and a \"score\" method.\n It also implements \"score_samples\", \"predict\", \"predict_proba\",\n \"decision_function\", \"transform\" and \"inverse_transform\" if they are\n implemented in the estimator used.\n\n The parameters of the estimator used to apply these methods are optimized\n by cross-validated grid-search over a parameter grid.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n estimator : estimator object.\n This is assumed to implement the scikit-learn estimator interface.\n Either estimator needs to provide a ``score`` function,\n or ``scoring`` must be passed.\n\n param_grid : dict or list of dictionaries\n Dictionary with parameters names (`str`) as keys and lists of\n parameter settings to try as values, or a list of such\n dictionaries, in which case the grids spanned by each dictionary\n in the list are explored. This enables searching over any sequence\n of parameter settings.\n\n scoring : str, callable, list, tuple or dict, default=None\n Strategy to evaluate the performance of the cross-validated model on\n the test set.\n\n If `scoring` represents a single score, one can use:\n\n - a single string (see :ref:`scoring_parameter`);\n - a callable (see :ref:`scoring`) that returns a single value.\n\n If `scoring` represents multiple scores, one can use:\n\n - a list or tuple of unique strings;\n - a callable returning a dictionary where the keys are the metric\n names and the values are the metric scores;\n - a dictionary with metric names as keys and callables a values.\n\n See :ref:`multimetric_grid_search` for an example.\n\n n_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\n refit : bool, str, or callable, default=True\n Refit an estimator using the best found parameters on the whole\n dataset.\n\n For multiple metric evaluation, this needs to be a `str` denoting the\n scorer that would be used to find the best parameters for refitting\n the estimator at the end.\n\n Where there are considerations other than maximum score in\n choosing a best estimator, ``refit`` can be set to a function which\n returns the selected ``best_index_`` given ``cv_results_``. In that\n case, the ``best_estimator_`` and ``best_params_`` will be set\n according to the returned ``best_index_`` while the ``best_score_``\n attribute will not be available.\n\n The refitted estimator is made available at the ``best_estimator_``\n attribute and permits using ``predict`` directly on this\n ``GridSearchCV`` instance.\n\n Also for multiple metric evaluation, the attributes ``best_index_``,\n ``best_score_`` and ``best_params_`` will only be available if\n ``refit`` is set and all of them will be determined w.r.t this specific\n scorer.\n\n See ``scoring`` parameter to know more about multiple metric\n evaluation.\n\n .. versionchanged:: 0.20\n Support for callable added.\n\n cv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - integer, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used. These splitters are instantiated\n with `shuffle=False` so the splits will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\n verbose : int\n Controls the verbosity: the higher, the more messages.\n\n - >1 : the computation time for each fold and parameter candidate is\n displayed;\n - >2 : the score is also displayed;\n - >3 : the fold and candidate parameter indexes are also displayed\n together with the starting time of the computation.\n\n pre_dispatch : int, or str, default=n_jobs\n Controls the number of jobs that get dispatched during parallel\n execution. Reducing this number can be useful to avoid an\n explosion of memory consumption when more jobs get dispatched\n than CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'\n\n error_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised. If a numeric value is given,\n FitFailedWarning is raised. This parameter does not affect the refit\n step, which will always raise the error.\n\n return_train_score : bool, default=False\n If ``False``, the ``cv_results_`` attribute will not include training\n scores.\n Computing training scores is used to get insights on how different\n parameter settings impact the overfitting/underfitting trade-off.\n However computing the scores on the training set can be computationally\n expensive and is not strictly required to select the parameters that\n yield the best generalization performance.\n\n .. versionadded:: 0.19\n\n .. versionchanged:: 0.21\n Default value was changed from ``True`` to ``False``\n\n\n Examples\n --------\n >>> from sklearn import svm, datasets\n >>> from sklearn.model_selection import GridSearchCV\n >>> iris = datasets.load_iris()\n >>> parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}\n >>> svc = svm.SVC()\n >>> clf = GridSearchCV(svc, parameters)\n >>> clf.fit(iris.data, iris.target)\n GridSearchCV(estimator=SVC(),\n param_grid={'C': [1, 10], 'kernel': ('linear', 'rbf')})\n >>> sorted(clf.cv_results_.keys())\n ['mean_fit_time', 'mean_score_time', 'mean_test_score',...\n 'param_C', 'param_kernel', 'params',...\n 'rank_test_score', 'split0_test_score',...\n 'split2_test_score', ...\n 'std_fit_time', 'std_score_time', 'std_test_score']\n\n Attributes\n ----------\n cv_results_ : dict of numpy (masked) ndarrays\n A dict with keys as column headers and values as columns, that can be\n imported into a pandas ``DataFrame``.\n\n For instance the below given table\n\n +------------+-----------+------------+-----------------+---+---------+\n |param_kernel|param_gamma|param_degree|split0_test_score|...|rank_t...|\n +============+===========+============+=================+===+=========+\n | 'poly' | -- | 2 | 0.80 |...| 2 |\n +------------+-----------+------------+-----------------+---+---------+\n | 'poly' | -- | 3 | 0.70 |...| 4 |\n +------------+-----------+------------+-----------------+---+---------+\n | 'rbf' | 0.1 | -- | 0.80 |...| 3 |\n +------------+-----------+------------+-----------------+---+---------+\n | 'rbf' | 0.2 | -- | 0.93 |...| 1 |\n +------------+-----------+------------+-----------------+---+---------+\n\n will be represented by a ``cv_results_`` dict of::\n\n {\n 'param_kernel': masked_array(data = ['poly', 'poly', 'rbf', 'rbf'],\n mask = [False False False False]...)\n 'param_gamma': masked_array(data = [-- -- 0.1 0.2],\n mask = [ True True False False]...),\n 'param_degree': masked_array(data = [2.0 3.0 -- --],\n mask = [False False True True]...),\n 'split0_test_score' : [0.80, 0.70, 0.80, 0.93],\n 'split1_test_score' : [0.82, 0.50, 0.70, 0.78],\n 'mean_test_score' : [0.81, 0.60, 0.75, 0.85],\n 'std_test_score' : [0.01, 0.10, 0.05, 0.08],\n 'rank_test_score' : [2, 4, 3, 1],\n 'split0_train_score' : [0.80, 0.92, 0.70, 0.93],\n 'split1_train_score' : [0.82, 0.55, 0.70, 0.87],\n 'mean_train_score' : [0.81, 0.74, 0.70, 0.90],\n 'std_train_score' : [0.01, 0.19, 0.00, 0.03],\n 'mean_fit_time' : [0.73, 0.63, 0.43, 0.49],\n 'std_fit_time' : [0.01, 0.02, 0.01, 0.01],\n 'mean_score_time' : [0.01, 0.06, 0.04, 0.04],\n 'std_score_time' : [0.00, 0.00, 0.00, 0.01],\n 'params' : [{'kernel': 'poly', 'degree': 2}, ...],\n }\n\n NOTE\n\n The key ``'params'`` is used to store a list of parameter\n settings dicts for all the parameter candidates.\n\n The ``mean_fit_time``, ``std_fit_time``, ``mean_score_time`` and\n ``std_score_time`` are all in seconds.\n\n For multi-metric evaluation, the scores for all the scorers are\n available in the ``cv_results_`` dict at the keys ending with that\n scorer's name (``'_'``) instead of ``'_score'`` shown\n above. ('split0_test_precision', 'mean_train_precision' etc.)\n\n best_estimator_ : estimator\n Estimator that was chosen by the search, i.e. estimator\n which gave highest score (or smallest loss if specified)\n on the left out data. Not available if ``refit=False``.\n\n See ``refit`` parameter for more information on allowed values.\n\n best_score_ : float\n Mean cross-validated score of the best_estimator\n\n For multi-metric evaluation, this is present only if ``refit`` is\n specified.\n\n This attribute is not available if ``refit`` is a function.\n\n best_params_ : dict\n Parameter setting that gave the best results on the hold out data.\n\n For multi-metric evaluation, this is present only if ``refit`` is\n specified.\n\n best_index_ : int\n The index (of the ``cv_results_`` arrays) which corresponds to the best\n candidate parameter setting.\n\n The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n the parameter setting for the best model, that gives the highest\n mean score (``search.best_score_``).\n\n For multi-metric evaluation, this is present only if ``refit`` is\n specified.\n\n scorer_ : function or a dict\n Scorer function used on the held out data to choose the best\n parameters for the model.\n\n For multi-metric evaluation, this attribute holds the validated\n ``scoring`` dict which maps the scorer key to the scorer callable.\n\n n_splits_ : int\n The number of cross-validation splits (folds/iterations).\n\n refit_time_ : float\n Seconds used for refitting the best model on the whole dataset.\n\n This is present only if ``refit`` is not False.\n\n .. versionadded:: 0.20\n\n multimetric_ : bool\n Whether or not the scorers compute several metrics.\n\n Notes\n -----\n The parameters selected are those that maximize the score of the left out\n data, unless an explicit score is passed in which case it is used instead.\n\n If `n_jobs` was set to a value higher than one, the data is copied for each\n point in the grid (and not `n_jobs` times). This is done for efficiency\n reasons if individual jobs take very little time, but may raise errors if\n the dataset is large and not enough memory is available. A workaround in\n this case is to set `pre_dispatch`. Then, the memory is copied only\n `pre_dispatch` many times. A reasonable value for `pre_dispatch` is `2 *\n n_jobs`.\n\n See Also\n ---------\n ParameterGrid : Generates all the combinations of a hyperparameter grid.\n train_test_split : Utility function to split the data into a development\n set usable for fitting a GridSearchCV instance and an evaluation set\n for its final evaluation.\n sklearn.metrics.make_scorer : Make a scorer from a performance metric or\n loss function.\n\n \"\"\"\n _required_parameters = [\"estimator\", \"param_grid\"]\n\n @_deprecate_positional_args\n def __init__(self, estimator, param_grid, *, scoring=None,\n n_jobs=None, refit=True, cv=None,\n verbose=0, pre_dispatch='2*n_jobs',\n error_score=np.nan, return_train_score=False):\n super().__init__(\n estimator=estimator, scoring=scoring,\n n_jobs=n_jobs, refit=refit, cv=cv, verbose=verbose,\n pre_dispatch=pre_dispatch, error_score=error_score,\n return_train_score=return_train_score)\n self.param_grid = param_grid\n _check_param_grid(param_grid)\n\n def _run_search(self, evaluate_candidates):\n \"\"\"Search all candidates in param_grid\"\"\"\n evaluate_candidates(ParameterGrid(self.param_grid))", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.model_selection._search/ParameterGrid", + "name": "ParameterGrid", + "qname": "sklearn.model_selection._search.ParameterGrid", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.model_selection._search/ParameterGrid/__init__", + "scikit-learn/sklearn.model_selection._search/ParameterGrid/__iter__", + "scikit-learn/sklearn.model_selection._search/ParameterGrid/__len__", + "scikit-learn/sklearn.model_selection._search/ParameterGrid/__getitem__" + ], + "is_public": false, + "reexported_by": [], + "description": "Grid of parameters with a discrete number of values for each.\n\nCan be used to iterate over parameter value combinations with the\nPython built-in function iter.\nThe order of the generated parameter combinations is deterministic.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Grid of parameters with a discrete number of values for each.\n\nCan be used to iterate over parameter value combinations with the\nPython built-in function iter.\nThe order of the generated parameter combinations is deterministic.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nparam_grid : dict of str to sequence, or sequence of such\n The parameter grid to explore, as a dictionary mapping estimator\n parameters to sequences of allowed values.\n\n An empty dict signifies default parameters.\n\n A sequence of dicts signifies a sequence of grids to search, and is\n useful to avoid exploring parameter combinations that make no sense\n or have no effect. See the examples below.\n\nExamples\n--------\n>>> from sklearn.model_selection import ParameterGrid\n>>> param_grid = {'a': [1, 2], 'b': [True, False]}\n>>> list(ParameterGrid(param_grid)) == (\n... [{'a': 1, 'b': True}, {'a': 1, 'b': False},\n... {'a': 2, 'b': True}, {'a': 2, 'b': False}])\nTrue\n\n>>> grid = [{'kernel': ['linear']}, {'kernel': ['rbf'], 'gamma': [1, 10]}]\n>>> list(ParameterGrid(grid)) == [{'kernel': 'linear'},\n... {'kernel': 'rbf', 'gamma': 1},\n... {'kernel': 'rbf', 'gamma': 10}]\nTrue\n>>> ParameterGrid(grid)[1] == {'kernel': 'rbf', 'gamma': 1}\nTrue\n\nSee Also\n--------\nGridSearchCV : Uses :class:`ParameterGrid` to perform a full parallelized\n parameter search.", + "code": "class ParameterGrid:\n \"\"\"Grid of parameters with a discrete number of values for each.\n\n Can be used to iterate over parameter value combinations with the\n Python built-in function iter.\n The order of the generated parameter combinations is deterministic.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n param_grid : dict of str to sequence, or sequence of such\n The parameter grid to explore, as a dictionary mapping estimator\n parameters to sequences of allowed values.\n\n An empty dict signifies default parameters.\n\n A sequence of dicts signifies a sequence of grids to search, and is\n useful to avoid exploring parameter combinations that make no sense\n or have no effect. See the examples below.\n\n Examples\n --------\n >>> from sklearn.model_selection import ParameterGrid\n >>> param_grid = {'a': [1, 2], 'b': [True, False]}\n >>> list(ParameterGrid(param_grid)) == (\n ... [{'a': 1, 'b': True}, {'a': 1, 'b': False},\n ... {'a': 2, 'b': True}, {'a': 2, 'b': False}])\n True\n\n >>> grid = [{'kernel': ['linear']}, {'kernel': ['rbf'], 'gamma': [1, 10]}]\n >>> list(ParameterGrid(grid)) == [{'kernel': 'linear'},\n ... {'kernel': 'rbf', 'gamma': 1},\n ... {'kernel': 'rbf', 'gamma': 10}]\n True\n >>> ParameterGrid(grid)[1] == {'kernel': 'rbf', 'gamma': 1}\n True\n\n See Also\n --------\n GridSearchCV : Uses :class:`ParameterGrid` to perform a full parallelized\n parameter search.\n \"\"\"\n\n def __init__(self, param_grid):\n if not isinstance(param_grid, (Mapping, Iterable)):\n raise TypeError('Parameter grid is not a dict or '\n 'a list ({!r})'.format(param_grid))\n\n if isinstance(param_grid, Mapping):\n # wrap dictionary in a singleton list to support either dict\n # or list of dicts\n param_grid = [param_grid]\n\n # check if all entries are dictionaries of lists\n for grid in param_grid:\n if not isinstance(grid, dict):\n raise TypeError('Parameter grid is not a '\n 'dict ({!r})'.format(grid))\n for key in grid:\n if not isinstance(grid[key], Iterable):\n raise TypeError('Parameter grid value is not iterable '\n '(key={!r}, value={!r})'\n .format(key, grid[key]))\n\n self.param_grid = param_grid\n\n def __iter__(self):\n \"\"\"Iterate over the points in the grid.\n\n Returns\n -------\n params : iterator over dict of str to any\n Yields dictionaries mapping each estimator parameter to one of its\n allowed values.\n \"\"\"\n for p in self.param_grid:\n # Always sort the keys of a dictionary, for reproducibility\n items = sorted(p.items())\n if not items:\n yield {}\n else:\n keys, values = zip(*items)\n for v in product(*values):\n params = dict(zip(keys, v))\n yield params\n\n def __len__(self):\n \"\"\"Number of points on the grid.\"\"\"\n # Product function that can handle iterables (np.product can't).\n product = partial(reduce, operator.mul)\n return sum(product(len(v) for v in p.values()) if p else 1\n for p in self.param_grid)\n\n def __getitem__(self, ind):\n \"\"\"Get the parameters that would be ``ind``th in iteration\n\n Parameters\n ----------\n ind : int\n The iteration index\n\n Returns\n -------\n params : dict of str to any\n Equal to list(self)[ind]\n \"\"\"\n # This is used to make discrete sampling without replacement memory\n # efficient.\n for sub_grid in self.param_grid:\n # XXX: could memoize information used here\n if not sub_grid:\n if ind == 0:\n return {}\n else:\n ind -= 1\n continue\n\n # Reverse so most frequent cycling parameter comes first\n keys, values_lists = zip(*sorted(sub_grid.items())[::-1])\n sizes = [len(v_list) for v_list in values_lists]\n total = np.product(sizes)\n\n if ind >= total:\n # Try the next grid\n ind -= total\n else:\n out = {}\n for key, v_list, n in zip(keys, values_lists, sizes):\n ind, offset = divmod(ind, n)\n out[key] = v_list[offset]\n return out\n\n raise IndexError('ParameterGrid index out of range')", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.model_selection._search/ParameterSampler", + "name": "ParameterSampler", + "qname": "sklearn.model_selection._search.ParameterSampler", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.model_selection._search/ParameterSampler/__init__", + "scikit-learn/sklearn.model_selection._search/ParameterSampler/_is_all_lists", + "scikit-learn/sklearn.model_selection._search/ParameterSampler/__iter__", + "scikit-learn/sklearn.model_selection._search/ParameterSampler/__len__" + ], + "is_public": false, + "reexported_by": [], + "description": "Generator on parameters sampled from given distributions.\n\nNon-deterministic iterable over random candidate combinations for hyper-\nparameter search. If all parameters are presented as a list,\nsampling without replacement is performed. If at least one parameter\nis given as a distribution, sampling with replacement is used.\nIt is highly recommended to use continuous distributions for continuous\nparameters.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Generator on parameters sampled from given distributions.\n\nNon-deterministic iterable over random candidate combinations for hyper-\nparameter search. If all parameters are presented as a list,\nsampling without replacement is performed. If at least one parameter\nis given as a distribution, sampling with replacement is used.\nIt is highly recommended to use continuous distributions for continuous\nparameters.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nparam_distributions : dict\n Dictionary with parameters names (`str`) as keys and distributions\n or lists of parameters to try. Distributions must provide a ``rvs``\n method for sampling (such as those from scipy.stats.distributions).\n If a list is given, it is sampled uniformly.\n If a list of dicts is given, first a dict is sampled uniformly, and\n then a parameter is sampled using that dict as above.\n\nn_iter : int\n Number of parameter settings that are produced.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo random number generator state used for random uniform sampling\n from lists of possible values instead of scipy.stats distributions.\n Pass an int for reproducible output across multiple\n function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nparams : dict of str to any\n **Yields** dictionaries mapping each estimator parameter to\n as sampled value.\n\nExamples\n--------\n>>> from sklearn.model_selection import ParameterSampler\n>>> from scipy.stats.distributions import expon\n>>> import numpy as np\n>>> rng = np.random.RandomState(0)\n>>> param_grid = {'a':[1, 2], 'b': expon()}\n>>> param_list = list(ParameterSampler(param_grid, n_iter=4,\n... random_state=rng))\n>>> rounded_list = [dict((k, round(v, 6)) for (k, v) in d.items())\n... for d in param_list]\n>>> rounded_list == [{'b': 0.89856, 'a': 1},\n... {'b': 0.923223, 'a': 1},\n... {'b': 1.878964, 'a': 2},\n... {'b': 1.038159, 'a': 2}]\nTrue", + "code": "class ParameterSampler:\n \"\"\"Generator on parameters sampled from given distributions.\n\n Non-deterministic iterable over random candidate combinations for hyper-\n parameter search. If all parameters are presented as a list,\n sampling without replacement is performed. If at least one parameter\n is given as a distribution, sampling with replacement is used.\n It is highly recommended to use continuous distributions for continuous\n parameters.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n param_distributions : dict\n Dictionary with parameters names (`str`) as keys and distributions\n or lists of parameters to try. Distributions must provide a ``rvs``\n method for sampling (such as those from scipy.stats.distributions).\n If a list is given, it is sampled uniformly.\n If a list of dicts is given, first a dict is sampled uniformly, and\n then a parameter is sampled using that dict as above.\n\n n_iter : int\n Number of parameter settings that are produced.\n\n random_state : int, RandomState instance or None, default=None\n Pseudo random number generator state used for random uniform sampling\n from lists of possible values instead of scipy.stats distributions.\n Pass an int for reproducible output across multiple\n function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n params : dict of str to any\n **Yields** dictionaries mapping each estimator parameter to\n as sampled value.\n\n Examples\n --------\n >>> from sklearn.model_selection import ParameterSampler\n >>> from scipy.stats.distributions import expon\n >>> import numpy as np\n >>> rng = np.random.RandomState(0)\n >>> param_grid = {'a':[1, 2], 'b': expon()}\n >>> param_list = list(ParameterSampler(param_grid, n_iter=4,\n ... random_state=rng))\n >>> rounded_list = [dict((k, round(v, 6)) for (k, v) in d.items())\n ... for d in param_list]\n >>> rounded_list == [{'b': 0.89856, 'a': 1},\n ... {'b': 0.923223, 'a': 1},\n ... {'b': 1.878964, 'a': 2},\n ... {'b': 1.038159, 'a': 2}]\n True\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, param_distributions, n_iter, *, random_state=None):\n if not isinstance(param_distributions, (Mapping, Iterable)):\n raise TypeError('Parameter distribution is not a dict or '\n 'a list ({!r})'.format(param_distributions))\n\n if isinstance(param_distributions, Mapping):\n # wrap dictionary in a singleton list to support either dict\n # or list of dicts\n param_distributions = [param_distributions]\n\n for dist in param_distributions:\n if not isinstance(dist, dict):\n raise TypeError('Parameter distribution is not a '\n 'dict ({!r})'.format(dist))\n for key in dist:\n if (not isinstance(dist[key], Iterable)\n and not hasattr(dist[key], 'rvs')):\n raise TypeError('Parameter value is not iterable '\n 'or distribution (key={!r}, value={!r})'\n .format(key, dist[key]))\n self.n_iter = n_iter\n self.random_state = random_state\n self.param_distributions = param_distributions\n\n def _is_all_lists(self):\n return all(\n all(not hasattr(v, \"rvs\") for v in dist.values())\n for dist in self.param_distributions\n )\n\n def __iter__(self):\n rng = check_random_state(self.random_state)\n\n # if all distributions are given as lists, we want to sample without\n # replacement\n if self._is_all_lists():\n # look up sampled parameter settings in parameter grid\n param_grid = ParameterGrid(self.param_distributions)\n grid_size = len(param_grid)\n n_iter = self.n_iter\n\n if grid_size < n_iter:\n warnings.warn(\n 'The total space of parameters %d is smaller '\n 'than n_iter=%d. Running %d iterations. For exhaustive '\n 'searches, use GridSearchCV.'\n % (grid_size, self.n_iter, grid_size), UserWarning)\n n_iter = grid_size\n for i in sample_without_replacement(grid_size, n_iter,\n random_state=rng):\n yield param_grid[i]\n\n else:\n for _ in range(self.n_iter):\n dist = rng.choice(self.param_distributions)\n # Always sort the keys of a dictionary, for reproducibility\n items = sorted(dist.items())\n params = dict()\n for k, v in items:\n if hasattr(v, \"rvs\"):\n params[k] = v.rvs(random_state=rng)\n else:\n params[k] = v[rng.randint(len(v))]\n yield params\n\n def __len__(self):\n \"\"\"Number of points that will be sampled.\"\"\"\n if self._is_all_lists():\n grid_size = len(ParameterGrid(self.param_distributions))\n return min(self.n_iter, grid_size)\n else:\n return self.n_iter", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.model_selection._search/RandomizedSearchCV", + "name": "RandomizedSearchCV", + "qname": "sklearn.model_selection._search.RandomizedSearchCV", + "decorators": [], + "superclasses": ["BaseSearchCV"], + "methods": [ + "scikit-learn/sklearn.model_selection._search/RandomizedSearchCV/__init__", + "scikit-learn/sklearn.model_selection._search/RandomizedSearchCV/_run_search" + ], + "is_public": false, + "reexported_by": [], + "description": "Randomized search on hyper parameters.\n\nRandomizedSearchCV implements a \"fit\" and a \"score\" method.\nIt also implements \"score_samples\", \"predict\", \"predict_proba\",\n\"decision_function\", \"transform\" and \"inverse_transform\" if they are\nimplemented in the estimator used.\n\nThe parameters of the estimator used to apply these methods are optimized\nby cross-validated search over parameter settings.\n\nIn contrast to GridSearchCV, not all parameter values are tried out, but\nrather a fixed number of parameter settings is sampled from the specified\ndistributions. The number of parameter settings that are tried is\ngiven by n_iter.\n\nIf all parameters are presented as a list,\nsampling without replacement is performed. If at least one parameter\nis given as a distribution, sampling with replacement is used.\nIt is highly recommended to use continuous distributions for continuous\nparameters.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.14", + "docstring": "Randomized search on hyper parameters.\n\nRandomizedSearchCV implements a \"fit\" and a \"score\" method.\nIt also implements \"score_samples\", \"predict\", \"predict_proba\",\n\"decision_function\", \"transform\" and \"inverse_transform\" if they are\nimplemented in the estimator used.\n\nThe parameters of the estimator used to apply these methods are optimized\nby cross-validated search over parameter settings.\n\nIn contrast to GridSearchCV, not all parameter values are tried out, but\nrather a fixed number of parameter settings is sampled from the specified\ndistributions. The number of parameter settings that are tried is\ngiven by n_iter.\n\nIf all parameters are presented as a list,\nsampling without replacement is performed. If at least one parameter\nis given as a distribution, sampling with replacement is used.\nIt is highly recommended to use continuous distributions for continuous\nparameters.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.14\n\nParameters\n----------\nestimator : estimator object.\n A object of that type is instantiated for each grid point.\n This is assumed to implement the scikit-learn estimator interface.\n Either estimator needs to provide a ``score`` function,\n or ``scoring`` must be passed.\n\nparam_distributions : dict or list of dicts\n Dictionary with parameters names (`str`) as keys and distributions\n or lists of parameters to try. Distributions must provide a ``rvs``\n method for sampling (such as those from scipy.stats.distributions).\n If a list is given, it is sampled uniformly.\n If a list of dicts is given, first a dict is sampled uniformly, and\n then a parameter is sampled using that dict as above.\n\nn_iter : int, default=10\n Number of parameter settings that are sampled. n_iter trades\n off runtime vs quality of the solution.\n\nscoring : str, callable, list, tuple or dict, default=None\n Strategy to evaluate the performance of the cross-validated model on\n the test set.\n\n If `scoring` represents a single score, one can use:\n\n - a single string (see :ref:`scoring_parameter`);\n - a callable (see :ref:`scoring`) that returns a single value.\n\n If `scoring` represents multiple scores, one can use:\n\n - a list or tuple of unique strings;\n - a callable returning a dictionary where the keys are the metric\n names and the values are the metric scores;\n - a dictionary with metric names as keys and callables a values.\n\n See :ref:`multimetric_grid_search` for an example.\n\n If None, the estimator's score method is used.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\nrefit : bool, str, or callable, default=True\n Refit an estimator using the best found parameters on the whole\n dataset.\n\n For multiple metric evaluation, this needs to be a `str` denoting the\n scorer that would be used to find the best parameters for refitting\n the estimator at the end.\n\n Where there are considerations other than maximum score in\n choosing a best estimator, ``refit`` can be set to a function which\n returns the selected ``best_index_`` given the ``cv_results``. In that\n case, the ``best_estimator_`` and ``best_params_`` will be set\n according to the returned ``best_index_`` while the ``best_score_``\n attribute will not be available.\n\n The refitted estimator is made available at the ``best_estimator_``\n attribute and permits using ``predict`` directly on this\n ``RandomizedSearchCV`` instance.\n\n Also for multiple metric evaluation, the attributes ``best_index_``,\n ``best_score_`` and ``best_params_`` will only be available if\n ``refit`` is set and all of them will be determined w.r.t this specific\n scorer.\n\n See ``scoring`` parameter to know more about multiple metric\n evaluation.\n\n .. versionchanged:: 0.20\n Support for callable added.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - integer, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used. These splitters are instantiated\n with `shuffle=False` so the splits will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nverbose : int\n Controls the verbosity: the higher, the more messages.\n\npre_dispatch : int, or str, default=None\n Controls the number of jobs that get dispatched during parallel\n execution. Reducing this number can be useful to avoid an\n explosion of memory consumption when more jobs get dispatched\n than CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo random number generator state used for random uniform sampling\n from lists of possible values instead of scipy.stats distributions.\n Pass an int for reproducible output across multiple\n function calls.\n See :term:`Glossary `.\n\nerror_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised. If a numeric value is given,\n FitFailedWarning is raised. This parameter does not affect the refit\n step, which will always raise the error.\n\nreturn_train_score : bool, default=False\n If ``False``, the ``cv_results_`` attribute will not include training\n scores.\n Computing training scores is used to get insights on how different\n parameter settings impact the overfitting/underfitting trade-off.\n However computing the scores on the training set can be computationally\n expensive and is not strictly required to select the parameters that\n yield the best generalization performance.\n\n .. versionadded:: 0.19\n\n .. versionchanged:: 0.21\n Default value was changed from ``True`` to ``False``\n\nAttributes\n----------\ncv_results_ : dict of numpy (masked) ndarrays\n A dict with keys as column headers and values as columns, that can be\n imported into a pandas ``DataFrame``.\n\n For instance the below given table\n\n +--------------+-------------+-------------------+---+---------------+\n | param_kernel | param_gamma | split0_test_score |...|rank_test_score|\n +==============+=============+===================+===+===============+\n | 'rbf' | 0.1 | 0.80 |...| 1 |\n +--------------+-------------+-------------------+---+---------------+\n | 'rbf' | 0.2 | 0.84 |...| 3 |\n +--------------+-------------+-------------------+---+---------------+\n | 'rbf' | 0.3 | 0.70 |...| 2 |\n +--------------+-------------+-------------------+---+---------------+\n\n will be represented by a ``cv_results_`` dict of::\n\n {\n 'param_kernel' : masked_array(data = ['rbf', 'rbf', 'rbf'],\n mask = False),\n 'param_gamma' : masked_array(data = [0.1 0.2 0.3], mask = False),\n 'split0_test_score' : [0.80, 0.84, 0.70],\n 'split1_test_score' : [0.82, 0.50, 0.70],\n 'mean_test_score' : [0.81, 0.67, 0.70],\n 'std_test_score' : [0.01, 0.24, 0.00],\n 'rank_test_score' : [1, 3, 2],\n 'split0_train_score' : [0.80, 0.92, 0.70],\n 'split1_train_score' : [0.82, 0.55, 0.70],\n 'mean_train_score' : [0.81, 0.74, 0.70],\n 'std_train_score' : [0.01, 0.19, 0.00],\n 'mean_fit_time' : [0.73, 0.63, 0.43],\n 'std_fit_time' : [0.01, 0.02, 0.01],\n 'mean_score_time' : [0.01, 0.06, 0.04],\n 'std_score_time' : [0.00, 0.00, 0.00],\n 'params' : [{'kernel' : 'rbf', 'gamma' : 0.1}, ...],\n }\n\n NOTE\n\n The key ``'params'`` is used to store a list of parameter\n settings dicts for all the parameter candidates.\n\n The ``mean_fit_time``, ``std_fit_time``, ``mean_score_time`` and\n ``std_score_time`` are all in seconds.\n\n For multi-metric evaluation, the scores for all the scorers are\n available in the ``cv_results_`` dict at the keys ending with that\n scorer's name (``'_'``) instead of ``'_score'`` shown\n above. ('split0_test_precision', 'mean_train_precision' etc.)\n\nbest_estimator_ : estimator\n Estimator that was chosen by the search, i.e. estimator\n which gave highest score (or smallest loss if specified)\n on the left out data. Not available if ``refit=False``.\n\n For multi-metric evaluation, this attribute is present only if\n ``refit`` is specified.\n\n See ``refit`` parameter for more information on allowed values.\n\nbest_score_ : float\n Mean cross-validated score of the best_estimator.\n\n For multi-metric evaluation, this is not available if ``refit`` is\n ``False``. See ``refit`` parameter for more information.\n\n This attribute is not available if ``refit`` is a function.\n\nbest_params_ : dict\n Parameter setting that gave the best results on the hold out data.\n\n For multi-metric evaluation, this is not available if ``refit`` is\n ``False``. See ``refit`` parameter for more information.\n\nbest_index_ : int\n The index (of the ``cv_results_`` arrays) which corresponds to the best\n candidate parameter setting.\n\n The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n the parameter setting for the best model, that gives the highest\n mean score (``search.best_score_``).\n\n For multi-metric evaluation, this is not available if ``refit`` is\n ``False``. See ``refit`` parameter for more information.\n\nscorer_ : function or a dict\n Scorer function used on the held out data to choose the best\n parameters for the model.\n\n For multi-metric evaluation, this attribute holds the validated\n ``scoring`` dict which maps the scorer key to the scorer callable.\n\nn_splits_ : int\n The number of cross-validation splits (folds/iterations).\n\nrefit_time_ : float\n Seconds used for refitting the best model on the whole dataset.\n\n This is present only if ``refit`` is not False.\n\n .. versionadded:: 0.20\n\nmultimetric_ : bool\n Whether or not the scorers compute several metrics.\n\nNotes\n-----\nThe parameters selected are those that maximize the score of the held-out\ndata, according to the scoring parameter.\n\nIf `n_jobs` was set to a value higher than one, the data is copied for each\nparameter setting(and not `n_jobs` times). This is done for efficiency\nreasons if individual jobs take very little time, but may raise errors if\nthe dataset is large and not enough memory is available. A workaround in\nthis case is to set `pre_dispatch`. Then, the memory is copied only\n`pre_dispatch` many times. A reasonable value for `pre_dispatch` is `2 *\nn_jobs`.\n\nSee Also\n--------\nGridSearchCV : Does exhaustive search over a grid of parameters.\nParameterSampler : A generator over parameter settings, constructed from\n param_distributions.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.model_selection import RandomizedSearchCV\n>>> from scipy.stats import uniform\n>>> iris = load_iris()\n>>> logistic = LogisticRegression(solver='saga', tol=1e-2, max_iter=200,\n... random_state=0)\n>>> distributions = dict(C=uniform(loc=0, scale=4),\n... penalty=['l2', 'l1'])\n>>> clf = RandomizedSearchCV(logistic, distributions, random_state=0)\n>>> search = clf.fit(iris.data, iris.target)\n>>> search.best_params_\n{'C': 2..., 'penalty': 'l1'}", + "code": "class RandomizedSearchCV(BaseSearchCV):\n \"\"\"Randomized search on hyper parameters.\n\n RandomizedSearchCV implements a \"fit\" and a \"score\" method.\n It also implements \"score_samples\", \"predict\", \"predict_proba\",\n \"decision_function\", \"transform\" and \"inverse_transform\" if they are\n implemented in the estimator used.\n\n The parameters of the estimator used to apply these methods are optimized\n by cross-validated search over parameter settings.\n\n In contrast to GridSearchCV, not all parameter values are tried out, but\n rather a fixed number of parameter settings is sampled from the specified\n distributions. The number of parameter settings that are tried is\n given by n_iter.\n\n If all parameters are presented as a list,\n sampling without replacement is performed. If at least one parameter\n is given as a distribution, sampling with replacement is used.\n It is highly recommended to use continuous distributions for continuous\n parameters.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.14\n\n Parameters\n ----------\n estimator : estimator object.\n A object of that type is instantiated for each grid point.\n This is assumed to implement the scikit-learn estimator interface.\n Either estimator needs to provide a ``score`` function,\n or ``scoring`` must be passed.\n\n param_distributions : dict or list of dicts\n Dictionary with parameters names (`str`) as keys and distributions\n or lists of parameters to try. Distributions must provide a ``rvs``\n method for sampling (such as those from scipy.stats.distributions).\n If a list is given, it is sampled uniformly.\n If a list of dicts is given, first a dict is sampled uniformly, and\n then a parameter is sampled using that dict as above.\n\n n_iter : int, default=10\n Number of parameter settings that are sampled. n_iter trades\n off runtime vs quality of the solution.\n\n scoring : str, callable, list, tuple or dict, default=None\n Strategy to evaluate the performance of the cross-validated model on\n the test set.\n\n If `scoring` represents a single score, one can use:\n\n - a single string (see :ref:`scoring_parameter`);\n - a callable (see :ref:`scoring`) that returns a single value.\n\n If `scoring` represents multiple scores, one can use:\n\n - a list or tuple of unique strings;\n - a callable returning a dictionary where the keys are the metric\n names and the values are the metric scores;\n - a dictionary with metric names as keys and callables a values.\n\n See :ref:`multimetric_grid_search` for an example.\n\n If None, the estimator's score method is used.\n\n n_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\n refit : bool, str, or callable, default=True\n Refit an estimator using the best found parameters on the whole\n dataset.\n\n For multiple metric evaluation, this needs to be a `str` denoting the\n scorer that would be used to find the best parameters for refitting\n the estimator at the end.\n\n Where there are considerations other than maximum score in\n choosing a best estimator, ``refit`` can be set to a function which\n returns the selected ``best_index_`` given the ``cv_results``. In that\n case, the ``best_estimator_`` and ``best_params_`` will be set\n according to the returned ``best_index_`` while the ``best_score_``\n attribute will not be available.\n\n The refitted estimator is made available at the ``best_estimator_``\n attribute and permits using ``predict`` directly on this\n ``RandomizedSearchCV`` instance.\n\n Also for multiple metric evaluation, the attributes ``best_index_``,\n ``best_score_`` and ``best_params_`` will only be available if\n ``refit`` is set and all of them will be determined w.r.t this specific\n scorer.\n\n See ``scoring`` parameter to know more about multiple metric\n evaluation.\n\n .. versionchanged:: 0.20\n Support for callable added.\n\n cv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - integer, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used. These splitters are instantiated\n with `shuffle=False` so the splits will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\n verbose : int\n Controls the verbosity: the higher, the more messages.\n\n pre_dispatch : int, or str, default=None\n Controls the number of jobs that get dispatched during parallel\n execution. Reducing this number can be useful to avoid an\n explosion of memory consumption when more jobs get dispatched\n than CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'\n\n random_state : int, RandomState instance or None, default=None\n Pseudo random number generator state used for random uniform sampling\n from lists of possible values instead of scipy.stats distributions.\n Pass an int for reproducible output across multiple\n function calls.\n See :term:`Glossary `.\n\n error_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised. If a numeric value is given,\n FitFailedWarning is raised. This parameter does not affect the refit\n step, which will always raise the error.\n\n return_train_score : bool, default=False\n If ``False``, the ``cv_results_`` attribute will not include training\n scores.\n Computing training scores is used to get insights on how different\n parameter settings impact the overfitting/underfitting trade-off.\n However computing the scores on the training set can be computationally\n expensive and is not strictly required to select the parameters that\n yield the best generalization performance.\n\n .. versionadded:: 0.19\n\n .. versionchanged:: 0.21\n Default value was changed from ``True`` to ``False``\n\n Attributes\n ----------\n cv_results_ : dict of numpy (masked) ndarrays\n A dict with keys as column headers and values as columns, that can be\n imported into a pandas ``DataFrame``.\n\n For instance the below given table\n\n +--------------+-------------+-------------------+---+---------------+\n | param_kernel | param_gamma | split0_test_score |...|rank_test_score|\n +==============+=============+===================+===+===============+\n | 'rbf' | 0.1 | 0.80 |...| 1 |\n +--------------+-------------+-------------------+---+---------------+\n | 'rbf' | 0.2 | 0.84 |...| 3 |\n +--------------+-------------+-------------------+---+---------------+\n | 'rbf' | 0.3 | 0.70 |...| 2 |\n +--------------+-------------+-------------------+---+---------------+\n\n will be represented by a ``cv_results_`` dict of::\n\n {\n 'param_kernel' : masked_array(data = ['rbf', 'rbf', 'rbf'],\n mask = False),\n 'param_gamma' : masked_array(data = [0.1 0.2 0.3], mask = False),\n 'split0_test_score' : [0.80, 0.84, 0.70],\n 'split1_test_score' : [0.82, 0.50, 0.70],\n 'mean_test_score' : [0.81, 0.67, 0.70],\n 'std_test_score' : [0.01, 0.24, 0.00],\n 'rank_test_score' : [1, 3, 2],\n 'split0_train_score' : [0.80, 0.92, 0.70],\n 'split1_train_score' : [0.82, 0.55, 0.70],\n 'mean_train_score' : [0.81, 0.74, 0.70],\n 'std_train_score' : [0.01, 0.19, 0.00],\n 'mean_fit_time' : [0.73, 0.63, 0.43],\n 'std_fit_time' : [0.01, 0.02, 0.01],\n 'mean_score_time' : [0.01, 0.06, 0.04],\n 'std_score_time' : [0.00, 0.00, 0.00],\n 'params' : [{'kernel' : 'rbf', 'gamma' : 0.1}, ...],\n }\n\n NOTE\n\n The key ``'params'`` is used to store a list of parameter\n settings dicts for all the parameter candidates.\n\n The ``mean_fit_time``, ``std_fit_time``, ``mean_score_time`` and\n ``std_score_time`` are all in seconds.\n\n For multi-metric evaluation, the scores for all the scorers are\n available in the ``cv_results_`` dict at the keys ending with that\n scorer's name (``'_'``) instead of ``'_score'`` shown\n above. ('split0_test_precision', 'mean_train_precision' etc.)\n\n best_estimator_ : estimator\n Estimator that was chosen by the search, i.e. estimator\n which gave highest score (or smallest loss if specified)\n on the left out data. Not available if ``refit=False``.\n\n For multi-metric evaluation, this attribute is present only if\n ``refit`` is specified.\n\n See ``refit`` parameter for more information on allowed values.\n\n best_score_ : float\n Mean cross-validated score of the best_estimator.\n\n For multi-metric evaluation, this is not available if ``refit`` is\n ``False``. See ``refit`` parameter for more information.\n\n This attribute is not available if ``refit`` is a function.\n\n best_params_ : dict\n Parameter setting that gave the best results on the hold out data.\n\n For multi-metric evaluation, this is not available if ``refit`` is\n ``False``. See ``refit`` parameter for more information.\n\n best_index_ : int\n The index (of the ``cv_results_`` arrays) which corresponds to the best\n candidate parameter setting.\n\n The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n the parameter setting for the best model, that gives the highest\n mean score (``search.best_score_``).\n\n For multi-metric evaluation, this is not available if ``refit`` is\n ``False``. See ``refit`` parameter for more information.\n\n scorer_ : function or a dict\n Scorer function used on the held out data to choose the best\n parameters for the model.\n\n For multi-metric evaluation, this attribute holds the validated\n ``scoring`` dict which maps the scorer key to the scorer callable.\n\n n_splits_ : int\n The number of cross-validation splits (folds/iterations).\n\n refit_time_ : float\n Seconds used for refitting the best model on the whole dataset.\n\n This is present only if ``refit`` is not False.\n\n .. versionadded:: 0.20\n\n multimetric_ : bool\n Whether or not the scorers compute several metrics.\n\n Notes\n -----\n The parameters selected are those that maximize the score of the held-out\n data, according to the scoring parameter.\n\n If `n_jobs` was set to a value higher than one, the data is copied for each\n parameter setting(and not `n_jobs` times). This is done for efficiency\n reasons if individual jobs take very little time, but may raise errors if\n the dataset is large and not enough memory is available. A workaround in\n this case is to set `pre_dispatch`. Then, the memory is copied only\n `pre_dispatch` many times. A reasonable value for `pre_dispatch` is `2 *\n n_jobs`.\n\n See Also\n --------\n GridSearchCV : Does exhaustive search over a grid of parameters.\n ParameterSampler : A generator over parameter settings, constructed from\n param_distributions.\n\n Examples\n --------\n >>> from sklearn.datasets import load_iris\n >>> from sklearn.linear_model import LogisticRegression\n >>> from sklearn.model_selection import RandomizedSearchCV\n >>> from scipy.stats import uniform\n >>> iris = load_iris()\n >>> logistic = LogisticRegression(solver='saga', tol=1e-2, max_iter=200,\n ... random_state=0)\n >>> distributions = dict(C=uniform(loc=0, scale=4),\n ... penalty=['l2', 'l1'])\n >>> clf = RandomizedSearchCV(logistic, distributions, random_state=0)\n >>> search = clf.fit(iris.data, iris.target)\n >>> search.best_params_\n {'C': 2..., 'penalty': 'l1'}\n \"\"\"\n _required_parameters = [\"estimator\", \"param_distributions\"]\n\n @_deprecate_positional_args\n def __init__(self, estimator, param_distributions, *, n_iter=10,\n scoring=None, n_jobs=None, refit=True,\n cv=None, verbose=0, pre_dispatch='2*n_jobs',\n random_state=None, error_score=np.nan,\n return_train_score=False):\n self.param_distributions = param_distributions\n self.n_iter = n_iter\n self.random_state = random_state\n super().__init__(\n estimator=estimator, scoring=scoring,\n n_jobs=n_jobs, refit=refit, cv=cv, verbose=verbose,\n pre_dispatch=pre_dispatch, error_score=error_score,\n return_train_score=return_train_score)\n\n def _run_search(self, evaluate_candidates):\n \"\"\"Search n_iter candidates from param_distributions\"\"\"\n evaluate_candidates(ParameterSampler(\n self.param_distributions, self.n_iter,\n random_state=self.random_state))", + "instance_attributes": [ + { + "name": "n_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving", + "name": "BaseSuccessiveHalving", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving", + "decorators": [], + "superclasses": ["BaseSearchCV"], + "methods": [ + "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/__init__", + "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/_check_input_parameters", + "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/fit", + "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/_run_search", + "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/_generate_candidate_params" + ], + "is_public": false, + "reexported_by": [], + "description": "Implements successive halving.\n\nRef:\nAlmost optimal exploration in multi-armed bandits, ICML 13\nZohar Karnin, Tomer Koren, Oren Somekh", + "docstring": "Implements successive halving.\n\nRef:\nAlmost optimal exploration in multi-armed bandits, ICML 13\nZohar Karnin, Tomer Koren, Oren Somekh", + "code": "class BaseSuccessiveHalving(BaseSearchCV):\n \"\"\"Implements successive halving.\n\n Ref:\n Almost optimal exploration in multi-armed bandits, ICML 13\n Zohar Karnin, Tomer Koren, Oren Somekh\n \"\"\"\n def __init__(self, estimator, *, scoring=None,\n n_jobs=None, refit=True, cv=5, verbose=0, random_state=None,\n error_score=np.nan, return_train_score=True,\n max_resources='auto', min_resources='exhaust',\n resource='n_samples', factor=3, aggressive_elimination=False):\n\n refit = _refit_callable if refit else False\n super().__init__(estimator, scoring=scoring,\n n_jobs=n_jobs, refit=refit, cv=cv,\n verbose=verbose,\n error_score=error_score,\n return_train_score=return_train_score)\n\n self.random_state = random_state\n self.max_resources = max_resources\n self.resource = resource\n self.factor = factor\n self.min_resources = min_resources\n self.aggressive_elimination = aggressive_elimination\n\n def _check_input_parameters(self, X, y, groups):\n\n if self.scoring is not None and not (isinstance(self.scoring, str)\n or callable(self.scoring)):\n raise ValueError('scoring parameter must be a string, '\n 'a callable or None. Multimetric scoring is not '\n 'supported.')\n\n # We need to enforce that successive calls to cv.split() yield the same\n # splits: see https://github.com/scikit-learn/scikit-learn/issues/15149\n if not _yields_constant_splits(self._checked_cv_orig):\n raise ValueError(\n \"The cv parameter must yield consistent folds across \"\n \"calls to split(). Set its random_state to an int, or set \"\n \"shuffle=False.\"\n )\n\n if (self.resource != 'n_samples'\n and self.resource not in self.estimator.get_params()):\n raise ValueError(\n f'Cannot use resource={self.resource} which is not supported '\n f'by estimator {self.estimator.__class__.__name__}'\n )\n\n if (isinstance(self.max_resources, str) and\n self.max_resources != 'auto'):\n raise ValueError(\n \"max_resources must be either 'auto' or a positive integer\"\n )\n if self.max_resources != 'auto' and (\n not isinstance(self.max_resources, Integral) or\n self.max_resources <= 0):\n raise ValueError(\n \"max_resources must be either 'auto' or a positive integer\"\n )\n\n if self.min_resources not in ('smallest', 'exhaust') and (\n not isinstance(self.min_resources, Integral) or\n self.min_resources <= 0):\n raise ValueError(\n \"min_resources must be either 'smallest', 'exhaust', \"\n \"or a positive integer \"\n \"no greater than max_resources.\"\n )\n\n if isinstance(self, HalvingRandomSearchCV):\n if self.min_resources == self.n_candidates == 'exhaust':\n # for n_candidates=exhaust to work, we need to know what\n # min_resources is. Similarly min_resources=exhaust needs to\n # know the actual number of candidates.\n raise ValueError(\n \"n_candidates and min_resources cannot be both set to \"\n \"'exhaust'.\"\n )\n if self.n_candidates != 'exhaust' and (\n not isinstance(self.n_candidates, Integral) or\n self.n_candidates <= 0):\n raise ValueError(\n \"n_candidates must be either 'exhaust' \"\n \"or a positive integer\"\n )\n\n self.min_resources_ = self.min_resources\n if self.min_resources_ in ('smallest', 'exhaust'):\n if self.resource == 'n_samples':\n n_splits = self._checked_cv_orig.get_n_splits(X, y, groups)\n # please see https://gph.is/1KjihQe for a justification\n magic_factor = 2\n self.min_resources_ = n_splits * magic_factor\n if is_classifier(self.estimator):\n n_classes = np.unique(y).shape[0]\n self.min_resources_ *= n_classes\n else:\n self.min_resources_ = 1\n # if 'exhaust', min_resources_ might be set to a higher value later\n # in _run_search\n\n self.max_resources_ = self.max_resources\n if self.max_resources_ == 'auto':\n if not self.resource == 'n_samples':\n raise ValueError(\n \"max_resources can only be 'auto' if resource='n_samples'\")\n self.max_resources_ = _num_samples(X)\n\n if self.min_resources_ > self.max_resources_:\n raise ValueError(\n f'min_resources_={self.min_resources_} is greater '\n f'than max_resources_={self.max_resources_}.'\n )\n\n def fit(self, X, y=None, groups=None, **fit_params):\n \"\"\"Run fit with all sets of parameters.\n\n Parameters\n ----------\n\n X : array-like, shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like, shape (n_samples,) or (n_samples, n_output), optional\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\n groups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n **fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of the estimator\n \"\"\"\n self._checked_cv_orig = check_cv(\n self.cv, y, classifier=is_classifier(self.estimator))\n\n self._check_input_parameters(\n X=X,\n y=y,\n groups=groups,\n )\n\n self._n_samples_orig = _num_samples(X)\n\n super().fit(X, y=y, groups=groups, **fit_params)\n\n # Set best_score_: BaseSearchCV does not set it, as refit is a callable\n self.best_score_ = (\n self.cv_results_['mean_test_score'][self.best_index_])\n\n return self\n\n def _run_search(self, evaluate_candidates):\n candidate_params = self._generate_candidate_params()\n\n if self.resource != 'n_samples' and any(\n self.resource in candidate for candidate in candidate_params):\n # Can only check this now since we need the candidates list\n raise ValueError(\n f\"Cannot use parameter {self.resource} as the resource since \"\n \"it is part of the searched parameters.\"\n )\n\n # n_required_iterations is the number of iterations needed so that the\n # last iterations evaluates less than `factor` candidates.\n n_required_iterations = 1 + floor(log(len(candidate_params),\n self.factor))\n\n if self.min_resources == 'exhaust':\n # To exhaust the resources, we want to start with the biggest\n # min_resources possible so that the last (required) iteration\n # uses as many resources as possible\n last_iteration = n_required_iterations - 1\n self.min_resources_ = max(\n self.min_resources_,\n self.max_resources_ // self.factor**last_iteration\n )\n\n # n_possible_iterations is the number of iterations that we can\n # actually do starting from min_resources and without exceeding\n # max_resources. Depending on max_resources and the number of\n # candidates, this may be higher or smaller than\n # n_required_iterations.\n n_possible_iterations = 1 + floor(log(\n self.max_resources_ // self.min_resources_, self.factor))\n\n if self.aggressive_elimination:\n n_iterations = n_required_iterations\n else:\n n_iterations = min(n_possible_iterations, n_required_iterations)\n\n if self.verbose:\n print(f'n_iterations: {n_iterations}')\n print(f'n_required_iterations: {n_required_iterations}')\n print(f'n_possible_iterations: {n_possible_iterations}')\n print(f'min_resources_: {self.min_resources_}')\n print(f'max_resources_: {self.max_resources_}')\n print(f'aggressive_elimination: {self.aggressive_elimination}')\n print(f'factor: {self.factor}')\n\n self.n_resources_ = []\n self.n_candidates_ = []\n\n for itr in range(n_iterations):\n\n power = itr # default\n if self.aggressive_elimination:\n # this will set n_resources to the initial value (i.e. the\n # value of n_resources at the first iteration) for as many\n # iterations as needed (while candidates are being\n # eliminated), and then go on as usual.\n power = max(\n 0,\n itr - n_required_iterations + n_possible_iterations\n )\n\n n_resources = int(self.factor**power * self.min_resources_)\n # guard, probably not needed\n n_resources = min(n_resources, self.max_resources_)\n self.n_resources_.append(n_resources)\n\n n_candidates = len(candidate_params)\n self.n_candidates_.append(n_candidates)\n\n if self.verbose:\n print('-' * 10)\n print(f'iter: {itr}')\n print(f'n_candidates: {n_candidates}')\n print(f'n_resources: {n_resources}')\n\n if self.resource == 'n_samples':\n # subsampling will be done in cv.split()\n cv = _SubsampleMetaSplitter(\n base_cv=self._checked_cv_orig,\n fraction=n_resources / self._n_samples_orig,\n subsample_test=True,\n random_state=self.random_state\n )\n\n else:\n # Need copy so that the n_resources of next iteration does\n # not overwrite\n candidate_params = [c.copy() for c in candidate_params]\n for candidate in candidate_params:\n candidate[self.resource] = n_resources\n cv = self._checked_cv_orig\n\n more_results = {'iter': [itr] * n_candidates,\n 'n_resources': [n_resources] * n_candidates}\n\n results = evaluate_candidates(candidate_params, cv,\n more_results=more_results)\n\n n_candidates_to_keep = ceil(n_candidates / self.factor)\n candidate_params = _top_k(results, n_candidates_to_keep, itr)\n\n self.n_remaining_candidates_ = len(candidate_params)\n self.n_required_iterations_ = n_required_iterations\n self.n_possible_iterations_ = n_possible_iterations\n self.n_iterations_ = n_iterations\n\n @abstractmethod\n def _generate_candidate_params(self):\n pass", + "instance_attributes": [ + { + "name": "max_resources", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "resource", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "factor", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "min_resources", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "aggressive_elimination", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "min_resources_", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "max_resources_", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "n_resources_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "n_candidates_", + "types": { + "kind": "NamedType", + "name": "list" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingGridSearchCV", + "name": "HalvingGridSearchCV", + "qname": "sklearn.model_selection._search_successive_halving.HalvingGridSearchCV", + "decorators": [], + "superclasses": ["BaseSuccessiveHalving"], + "methods": [ + "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingGridSearchCV/__init__", + "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingGridSearchCV/_generate_candidate_params" + ], + "is_public": false, + "reexported_by": [], + "description": "Search over specified parameter values with successive halving.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using\nmore and more resources.\n\nRead more in the :ref:`User guide `.\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_halving_search_cv``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_halving_search_cv # noqa\n >>> # now you can import normally from model_selection\n >>> from sklearn.model_selection import HalvingGridSearchCV", + "docstring": "Search over specified parameter values with successive halving.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using\nmore and more resources.\n\nRead more in the :ref:`User guide `.\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_halving_search_cv``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_halving_search_cv # noqa\n >>> # now you can import normally from model_selection\n >>> from sklearn.model_selection import HalvingGridSearchCV\n\nParameters\n----------\nestimator : estimator object.\n This is assumed to implement the scikit-learn estimator interface.\n Either estimator needs to provide a ``score`` function,\n or ``scoring`` must be passed.\n\nparam_grid : dict or list of dictionaries\n Dictionary with parameters names (string) as keys and lists of\n parameter settings to try as values, or a list of such\n dictionaries, in which case the grids spanned by each dictionary\n in the list are explored. This enables searching over any sequence\n of parameter settings.\n\nfactor : int or float, default=3\n The 'halving' parameter, which determines the proportion of candidates\n that are selected for each subsequent iteration. For example,\n ``factor=3`` means that only one third of the candidates are selected.\n\nresource : ``'n_samples'`` or str, default='n_samples'\n Defines the resource that increases with each iteration. By default,\n the resource is the number of samples. It can also be set to any\n parameter of the base estimator that accepts positive integer\n values, e.g. 'n_iterations' or 'n_estimators' for a gradient\n boosting estimator. In this case ``max_resources`` cannot be 'auto'\n and must be set explicitly.\n\nmax_resources : int, default='auto'\n The maximum amount of resource that any candidate is allowed to use\n for a given iteration. By default, this is set to ``n_samples`` when\n ``resource='n_samples'`` (default), else an error is raised.\n\nmin_resources : {'exhaust', 'smallest'} or int, default='exhaust'\n The minimum amount of resource that any candidate is allowed to use\n for a given iteration. Equivalently, this defines the amount of\n resources `r0` that are allocated for each candidate at the first\n iteration.\n\n - 'smallest' is a heuristic that sets `r0` to a small value:\n - ``n_splits * 2`` when ``resource='n_samples'`` for a regression\n problem\n - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a\n classification problem\n - ``1`` when ``resource != 'n_samples'``\n - 'exhaust' will set `r0` such that the **last** iteration uses as\n much resources as possible. Namely, the last iteration will use the\n highest value smaller than ``max_resources`` that is a multiple of\n both ``min_resources`` and ``factor``. In general, using 'exhaust'\n leads to a more accurate estimator, but is slightly more time\n consuming.\n\n Note that the amount of resources used at each iteration is always a\n multiple of ``min_resources``.\n\naggressive_elimination : bool, default=False\n This is only relevant in cases where there isn't enough resources to\n reduce the remaining candidates to at most `factor` after the last\n iteration. If ``True``, then the search process will 'replay' the\n first iteration for as long as needed until the number of candidates\n is small enough. This is ``False`` by default, which means that the\n last iteration may evaluate more than ``factor`` candidates. See\n :ref:`aggressive_elimination` for more details.\n\ncv : int, cross-validation generator or iterable, default=5\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - integer, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used. These splitters are instantiated\n with `shuffle=False` so the splits will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. note::\n Due to implementation details, the folds produced by `cv` must be\n the same across multiple calls to `cv.split()`. For\n built-in `scikit-learn` iterators, this can be achieved by\n deactivating shuffling (`shuffle=False`), or by setting the\n `cv`'s `random_state` parameter to an integer.\n\nscoring : string, callable, or None, default=None\n A single string (see :ref:`scoring_parameter`) or a callable\n (see :ref:`scoring`) to evaluate the predictions on the test set.\n If None, the estimator's score method is used.\n\nrefit : bool, default=True\n If True, refit an estimator using the best found parameters on the\n whole dataset.\n\n The refitted estimator is made available at the ``best_estimator_``\n attribute and permits using ``predict`` directly on this\n ``HalvingGridSearchCV`` instance.\n\nerror_score : 'raise' or numeric\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised. If a numeric value is given,\n FitFailedWarning is raised. This parameter does not affect the refit\n step, which will always raise the error. Default is ``np.nan``\n\nreturn_train_score : bool, default=False\n If ``False``, the ``cv_results_`` attribute will not include training\n scores.\n Computing training scores is used to get insights on how different\n parameter settings impact the overfitting/underfitting trade-off.\n However computing the scores on the training set can be computationally\n expensive and is not strictly required to select the parameters that\n yield the best generalization performance.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo random number generator state used for subsampling the dataset\n when `resources != 'n_samples'`. Ignored otherwise.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nn_jobs : int or None, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : int\n Controls the verbosity: the higher, the more messages.\n\nAttributes\n----------\nn_resources_ : list of int\n The amount of resources used at each iteration.\n\nn_candidates_ : list of int\n The number of candidate parameters that were evaluated at each\n iteration.\n\nn_remaining_candidates_ : int\n The number of candidate parameters that are left after the last\n iteration. It corresponds to `ceil(n_candidates[-1] / factor)`\n\nmax_resources_ : int\n The maximum number of resources that any candidate is allowed to use\n for a given iteration. Note that since the number of resources used\n at each iteration must be a multiple of ``min_resources_``, the\n actual number of resources used at the last iteration may be smaller\n than ``max_resources_``.\n\nmin_resources_ : int\n The amount of resources that are allocated for each candidate at the\n first iteration.\n\nn_iterations_ : int\n The actual number of iterations that were run. This is equal to\n ``n_required_iterations_`` if ``aggressive_elimination`` is ``True``.\n Else, this is equal to ``min(n_possible_iterations_,\n n_required_iterations_)``.\n\nn_possible_iterations_ : int\n The number of iterations that are possible starting with\n ``min_resources_`` resources and without exceeding\n ``max_resources_``.\n\nn_required_iterations_ : int\n The number of iterations that are required to end up with less than\n ``factor`` candidates at the last iteration, starting with\n ``min_resources_`` resources. This will be smaller than\n ``n_possible_iterations_`` when there isn't enough resources.\n\ncv_results_ : dict of numpy (masked) ndarrays\n A dict with keys as column headers and values as columns, that can be\n imported into a pandas ``DataFrame``. It contains many informations for\n analysing the results of a search.\n Please refer to the :ref:`User guide`\n for details.\n\nbest_estimator_ : estimator or dict\n Estimator that was chosen by the search, i.e. estimator\n which gave highest score (or smallest loss if specified)\n on the left out data. Not available if ``refit=False``.\n\nbest_score_ : float\n Mean cross-validated score of the best_estimator.\n\nbest_params_ : dict\n Parameter setting that gave the best results on the hold out data.\n\nbest_index_ : int\n The index (of the ``cv_results_`` arrays) which corresponds to the best\n candidate parameter setting.\n\n The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n the parameter setting for the best model, that gives the highest\n mean score (``search.best_score_``).\n\nscorer_ : function or a dict\n Scorer function used on the held out data to choose the best\n parameters for the model.\n\nn_splits_ : int\n The number of cross-validation splits (folds/iterations).\n\nrefit_time_ : float\n Seconds used for refitting the best model on the whole dataset.\n\n This is present only if ``refit`` is not False.\n\nSee Also\n--------\n:class:`HalvingRandomSearchCV`:\n Random search over a set of parameters using successive halving.\n\nNotes\n-----\nThe parameters selected are those that maximize the score of the held-out\ndata, according to the scoring parameter.\n\nExamples\n--------\n\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.experimental import enable_halving_search_cv # noqa\n>>> from sklearn.model_selection import HalvingGridSearchCV\n...\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = RandomForestClassifier(random_state=0)\n...\n>>> param_grid = {\"max_depth\": [3, None],\n... \"min_samples_split\": [5, 10]}\n>>> search = HalvingGridSearchCV(clf, param_grid, resource='n_estimators',\n... max_resources=10,\n... random_state=0).fit(X, y)\n>>> search.best_params_ # doctest: +SKIP\n{'max_depth': None, 'min_samples_split': 10, 'n_estimators': 9}", + "code": "class HalvingGridSearchCV(BaseSuccessiveHalving):\n \"\"\"Search over specified parameter values with successive halving.\n\n The search strategy starts evaluating all the candidates with a small\n amount of resources and iteratively selects the best candidates, using\n more and more resources.\n\n Read more in the :ref:`User guide `.\n\n .. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_halving_search_cv``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_halving_search_cv # noqa\n >>> # now you can import normally from model_selection\n >>> from sklearn.model_selection import HalvingGridSearchCV\n\n Parameters\n ----------\n estimator : estimator object.\n This is assumed to implement the scikit-learn estimator interface.\n Either estimator needs to provide a ``score`` function,\n or ``scoring`` must be passed.\n\n param_grid : dict or list of dictionaries\n Dictionary with parameters names (string) as keys and lists of\n parameter settings to try as values, or a list of such\n dictionaries, in which case the grids spanned by each dictionary\n in the list are explored. This enables searching over any sequence\n of parameter settings.\n\n factor : int or float, default=3\n The 'halving' parameter, which determines the proportion of candidates\n that are selected for each subsequent iteration. For example,\n ``factor=3`` means that only one third of the candidates are selected.\n\n resource : ``'n_samples'`` or str, default='n_samples'\n Defines the resource that increases with each iteration. By default,\n the resource is the number of samples. It can also be set to any\n parameter of the base estimator that accepts positive integer\n values, e.g. 'n_iterations' or 'n_estimators' for a gradient\n boosting estimator. In this case ``max_resources`` cannot be 'auto'\n and must be set explicitly.\n\n max_resources : int, default='auto'\n The maximum amount of resource that any candidate is allowed to use\n for a given iteration. By default, this is set to ``n_samples`` when\n ``resource='n_samples'`` (default), else an error is raised.\n\n min_resources : {'exhaust', 'smallest'} or int, default='exhaust'\n The minimum amount of resource that any candidate is allowed to use\n for a given iteration. Equivalently, this defines the amount of\n resources `r0` that are allocated for each candidate at the first\n iteration.\n\n - 'smallest' is a heuristic that sets `r0` to a small value:\n - ``n_splits * 2`` when ``resource='n_samples'`` for a regression\n problem\n - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a\n classification problem\n - ``1`` when ``resource != 'n_samples'``\n - 'exhaust' will set `r0` such that the **last** iteration uses as\n much resources as possible. Namely, the last iteration will use the\n highest value smaller than ``max_resources`` that is a multiple of\n both ``min_resources`` and ``factor``. In general, using 'exhaust'\n leads to a more accurate estimator, but is slightly more time\n consuming.\n\n Note that the amount of resources used at each iteration is always a\n multiple of ``min_resources``.\n\n aggressive_elimination : bool, default=False\n This is only relevant in cases where there isn't enough resources to\n reduce the remaining candidates to at most `factor` after the last\n iteration. If ``True``, then the search process will 'replay' the\n first iteration for as long as needed until the number of candidates\n is small enough. This is ``False`` by default, which means that the\n last iteration may evaluate more than ``factor`` candidates. See\n :ref:`aggressive_elimination` for more details.\n\n cv : int, cross-validation generator or iterable, default=5\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - integer, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used. These splitters are instantiated\n with `shuffle=False` so the splits will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. note::\n Due to implementation details, the folds produced by `cv` must be\n the same across multiple calls to `cv.split()`. For\n built-in `scikit-learn` iterators, this can be achieved by\n deactivating shuffling (`shuffle=False`), or by setting the\n `cv`'s `random_state` parameter to an integer.\n\n scoring : string, callable, or None, default=None\n A single string (see :ref:`scoring_parameter`) or a callable\n (see :ref:`scoring`) to evaluate the predictions on the test set.\n If None, the estimator's score method is used.\n\n refit : bool, default=True\n If True, refit an estimator using the best found parameters on the\n whole dataset.\n\n The refitted estimator is made available at the ``best_estimator_``\n attribute and permits using ``predict`` directly on this\n ``HalvingGridSearchCV`` instance.\n\n error_score : 'raise' or numeric\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised. If a numeric value is given,\n FitFailedWarning is raised. This parameter does not affect the refit\n step, which will always raise the error. Default is ``np.nan``\n\n return_train_score : bool, default=False\n If ``False``, the ``cv_results_`` attribute will not include training\n scores.\n Computing training scores is used to get insights on how different\n parameter settings impact the overfitting/underfitting trade-off.\n However computing the scores on the training set can be computationally\n expensive and is not strictly required to select the parameters that\n yield the best generalization performance.\n\n random_state : int, RandomState instance or None, default=None\n Pseudo random number generator state used for subsampling the dataset\n when `resources != 'n_samples'`. Ignored otherwise.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n n_jobs : int or None, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n verbose : int\n Controls the verbosity: the higher, the more messages.\n\n Attributes\n ----------\n n_resources_ : list of int\n The amount of resources used at each iteration.\n\n n_candidates_ : list of int\n The number of candidate parameters that were evaluated at each\n iteration.\n\n n_remaining_candidates_ : int\n The number of candidate parameters that are left after the last\n iteration. It corresponds to `ceil(n_candidates[-1] / factor)`\n\n max_resources_ : int\n The maximum number of resources that any candidate is allowed to use\n for a given iteration. Note that since the number of resources used\n at each iteration must be a multiple of ``min_resources_``, the\n actual number of resources used at the last iteration may be smaller\n than ``max_resources_``.\n\n min_resources_ : int\n The amount of resources that are allocated for each candidate at the\n first iteration.\n\n n_iterations_ : int\n The actual number of iterations that were run. This is equal to\n ``n_required_iterations_`` if ``aggressive_elimination`` is ``True``.\n Else, this is equal to ``min(n_possible_iterations_,\n n_required_iterations_)``.\n\n n_possible_iterations_ : int\n The number of iterations that are possible starting with\n ``min_resources_`` resources and without exceeding\n ``max_resources_``.\n\n n_required_iterations_ : int\n The number of iterations that are required to end up with less than\n ``factor`` candidates at the last iteration, starting with\n ``min_resources_`` resources. This will be smaller than\n ``n_possible_iterations_`` when there isn't enough resources.\n\n cv_results_ : dict of numpy (masked) ndarrays\n A dict with keys as column headers and values as columns, that can be\n imported into a pandas ``DataFrame``. It contains many informations for\n analysing the results of a search.\n Please refer to the :ref:`User guide`\n for details.\n\n best_estimator_ : estimator or dict\n Estimator that was chosen by the search, i.e. estimator\n which gave highest score (or smallest loss if specified)\n on the left out data. Not available if ``refit=False``.\n\n best_score_ : float\n Mean cross-validated score of the best_estimator.\n\n best_params_ : dict\n Parameter setting that gave the best results on the hold out data.\n\n best_index_ : int\n The index (of the ``cv_results_`` arrays) which corresponds to the best\n candidate parameter setting.\n\n The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n the parameter setting for the best model, that gives the highest\n mean score (``search.best_score_``).\n\n scorer_ : function or a dict\n Scorer function used on the held out data to choose the best\n parameters for the model.\n\n n_splits_ : int\n The number of cross-validation splits (folds/iterations).\n\n refit_time_ : float\n Seconds used for refitting the best model on the whole dataset.\n\n This is present only if ``refit`` is not False.\n\n See Also\n --------\n :class:`HalvingRandomSearchCV`:\n Random search over a set of parameters using successive halving.\n\n Notes\n -----\n The parameters selected are those that maximize the score of the held-out\n data, according to the scoring parameter.\n\n Examples\n --------\n\n >>> from sklearn.datasets import load_iris\n >>> from sklearn.ensemble import RandomForestClassifier\n >>> from sklearn.experimental import enable_halving_search_cv # noqa\n >>> from sklearn.model_selection import HalvingGridSearchCV\n ...\n >>> X, y = load_iris(return_X_y=True)\n >>> clf = RandomForestClassifier(random_state=0)\n ...\n >>> param_grid = {\"max_depth\": [3, None],\n ... \"min_samples_split\": [5, 10]}\n >>> search = HalvingGridSearchCV(clf, param_grid, resource='n_estimators',\n ... max_resources=10,\n ... random_state=0).fit(X, y)\n >>> search.best_params_ # doctest: +SKIP\n {'max_depth': None, 'min_samples_split': 10, 'n_estimators': 9}\n \"\"\"\n _required_parameters = [\"estimator\", \"param_grid\"]\n\n def __init__(self, estimator, param_grid, *,\n factor=3, resource='n_samples', max_resources='auto',\n min_resources='exhaust', aggressive_elimination=False,\n cv=5, scoring=None, refit=True, error_score=np.nan,\n return_train_score=True, random_state=None, n_jobs=None,\n verbose=0):\n super().__init__(estimator, scoring=scoring,\n n_jobs=n_jobs, refit=refit, verbose=verbose, cv=cv,\n random_state=random_state, error_score=error_score,\n return_train_score=return_train_score,\n max_resources=max_resources, resource=resource,\n factor=factor, min_resources=min_resources,\n aggressive_elimination=aggressive_elimination)\n self.param_grid = param_grid\n _check_param_grid(self.param_grid)\n\n def _generate_candidate_params(self):\n return ParameterGrid(self.param_grid)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingRandomSearchCV", + "name": "HalvingRandomSearchCV", + "qname": "sklearn.model_selection._search_successive_halving.HalvingRandomSearchCV", + "decorators": [], + "superclasses": ["BaseSuccessiveHalving"], + "methods": [ + "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingRandomSearchCV/__init__", + "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingRandomSearchCV/_generate_candidate_params" + ], + "is_public": false, + "reexported_by": [], + "description": "Randomized search on hyper parameters.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using more\nand more resources.\n\nThe candidates are sampled at random from the parameter space and the\nnumber of sampled candidates is determined by ``n_candidates``.\n\nRead more in the :ref:`User guide`.\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_halving_search_cv``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_halving_search_cv # noqa\n >>> # now you can import normally from model_selection\n >>> from sklearn.model_selection import HalvingRandomSearchCV", + "docstring": "Randomized search on hyper parameters.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using more\nand more resources.\n\nThe candidates are sampled at random from the parameter space and the\nnumber of sampled candidates is determined by ``n_candidates``.\n\nRead more in the :ref:`User guide`.\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_halving_search_cv``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_halving_search_cv # noqa\n >>> # now you can import normally from model_selection\n >>> from sklearn.model_selection import HalvingRandomSearchCV\n\nParameters\n----------\nestimator : estimator object.\n This is assumed to implement the scikit-learn estimator interface.\n Either estimator needs to provide a ``score`` function,\n or ``scoring`` must be passed.\n\nparam_distributions : dict\n Dictionary with parameters names (string) as keys and distributions\n or lists of parameters to try. Distributions must provide a ``rvs``\n method for sampling (such as those from scipy.stats.distributions).\n If a list is given, it is sampled uniformly.\n\nn_candidates : int, default='exhaust'\n The number of candidate parameters to sample, at the first\n iteration. Using 'exhaust' will sample enough candidates so that the\n last iteration uses as many resources as possible, based on\n `min_resources`, `max_resources` and `factor`. In this case,\n `min_resources` cannot be 'exhaust'.\n\nfactor : int or float, default=3\n The 'halving' parameter, which determines the proportion of candidates\n that are selected for each subsequent iteration. For example,\n ``factor=3`` means that only one third of the candidates are selected.\n\nresource : ``'n_samples'`` or str, default='n_samples'\n Defines the resource that increases with each iteration. By default,\n the resource is the number of samples. It can also be set to any\n parameter of the base estimator that accepts positive integer\n values, e.g. 'n_iterations' or 'n_estimators' for a gradient\n boosting estimator. In this case ``max_resources`` cannot be 'auto'\n and must be set explicitly.\n\nmax_resources : int, default='auto'\n The maximum number of resources that any candidate is allowed to use\n for a given iteration. By default, this is set ``n_samples`` when\n ``resource='n_samples'`` (default), else an error is raised.\n\nmin_resources : {'exhaust', 'smallest'} or int, default='smallest'\n The minimum amount of resource that any candidate is allowed to use\n for a given iteration. Equivalently, this defines the amount of\n resources `r0` that are allocated for each candidate at the first\n iteration.\n\n - 'smallest' is a heuristic that sets `r0` to a small value:\n - ``n_splits * 2`` when ``resource='n_samples'`` for a regression\n problem\n - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a\n classification problem\n - ``1`` when ``resource != 'n_samples'``\n - 'exhaust' will set `r0` such that the **last** iteration uses as\n much resources as possible. Namely, the last iteration will use the\n highest value smaller than ``max_resources`` that is a multiple of\n both ``min_resources`` and ``factor``. In general, using 'exhaust'\n leads to a more accurate estimator, but is slightly more time\n consuming. 'exhaust' isn't available when `n_candidates='exhaust'`.\n\n Note that the amount of resources used at each iteration is always a\n multiple of ``min_resources``.\n\naggressive_elimination : bool, default=False\n This is only relevant in cases where there isn't enough resources to\n reduce the remaining candidates to at most `factor` after the last\n iteration. If ``True``, then the search process will 'replay' the\n first iteration for as long as needed until the number of candidates\n is small enough. This is ``False`` by default, which means that the\n last iteration may evaluate more than ``factor`` candidates. See\n :ref:`aggressive_elimination` for more details.\n\ncv : int, cross-validation generator or an iterable, default=5\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - integer, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used. These splitters are instantiated\n with `shuffle=False` so the splits will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. note::\n Due to implementation details, the folds produced by `cv` must be\n the same across multiple calls to `cv.split()`. For\n built-in `scikit-learn` iterators, this can be achieved by\n deactivating shuffling (`shuffle=False`), or by setting the\n `cv`'s `random_state` parameter to an integer.\n\nscoring : string, callable, or None, default=None\n A single string (see :ref:`scoring_parameter`) or a callable\n (see :ref:`scoring`) to evaluate the predictions on the test set.\n If None, the estimator's score method is used.\n\nrefit : bool, default=True\n If True, refit an estimator using the best found parameters on the\n whole dataset.\n\n The refitted estimator is made available at the ``best_estimator_``\n attribute and permits using ``predict`` directly on this\n ``HalvingRandomSearchCV`` instance.\n\nerror_score : 'raise' or numeric\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised. If a numeric value is given,\n FitFailedWarning is raised. This parameter does not affect the refit\n step, which will always raise the error. Default is ``np.nan``\n\nreturn_train_score : bool, default=False\n If ``False``, the ``cv_results_`` attribute will not include training\n scores.\n Computing training scores is used to get insights on how different\n parameter settings impact the overfitting/underfitting trade-off.\n However computing the scores on the training set can be computationally\n expensive and is not strictly required to select the parameters that\n yield the best generalization performance.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo random number generator state used for subsampling the dataset\n when `resources != 'n_samples'`. Also used for random uniform\n sampling from lists of possible values instead of scipy.stats\n distributions.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nn_jobs : int or None, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : int\n Controls the verbosity: the higher, the more messages.\n\nAttributes\n----------\nn_resources_ : list of int\n The amount of resources used at each iteration.\n\nn_candidates_ : list of int\n The number of candidate parameters that were evaluated at each\n iteration.\n\nn_remaining_candidates_ : int\n The number of candidate parameters that are left after the last\n iteration. It corresponds to `ceil(n_candidates[-1] / factor)`\n\nmax_resources_ : int\n The maximum number of resources that any candidate is allowed to use\n for a given iteration. Note that since the number of resources used at\n each iteration must be a multiple of ``min_resources_``, the actual\n number of resources used at the last iteration may be smaller than\n ``max_resources_``.\n\nmin_resources_ : int\n The amount of resources that are allocated for each candidate at the\n first iteration.\n\nn_iterations_ : int\n The actual number of iterations that were run. This is equal to\n ``n_required_iterations_`` if ``aggressive_elimination`` is ``True``.\n Else, this is equal to ``min(n_possible_iterations_,\n n_required_iterations_)``.\n\nn_possible_iterations_ : int\n The number of iterations that are possible starting with\n ``min_resources_`` resources and without exceeding\n ``max_resources_``.\n\nn_required_iterations_ : int\n The number of iterations that are required to end up with less than\n ``factor`` candidates at the last iteration, starting with\n ``min_resources_`` resources. This will be smaller than\n ``n_possible_iterations_`` when there isn't enough resources.\n\ncv_results_ : dict of numpy (masked) ndarrays\n A dict with keys as column headers and values as columns, that can be\n imported into a pandas ``DataFrame``. It contains many informations for\n analysing the results of a search.\n Please refer to the :ref:`User guide`\n for details.\n\nbest_estimator_ : estimator or dict\n Estimator that was chosen by the search, i.e. estimator\n which gave highest score (or smallest loss if specified)\n on the left out data. Not available if ``refit=False``.\n\nbest_score_ : float\n Mean cross-validated score of the best_estimator.\n\nbest_params_ : dict\n Parameter setting that gave the best results on the hold out data.\n\nbest_index_ : int\n The index (of the ``cv_results_`` arrays) which corresponds to the best\n candidate parameter setting.\n\n The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n the parameter setting for the best model, that gives the highest\n mean score (``search.best_score_``).\n\nscorer_ : function or a dict\n Scorer function used on the held out data to choose the best\n parameters for the model.\n\nn_splits_ : int\n The number of cross-validation splits (folds/iterations).\n\nrefit_time_ : float\n Seconds used for refitting the best model on the whole dataset.\n\n This is present only if ``refit`` is not False.\n\nSee Also\n--------\n:class:`HalvingGridSearchCV`:\n Search over a grid of parameters using successive halving.\n\nNotes\n-----\nThe parameters selected are those that maximize the score of the held-out\ndata, according to the scoring parameter.\n\nExamples\n--------\n\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.experimental import enable_halving_search_cv # noqa\n>>> from sklearn.model_selection import HalvingRandomSearchCV\n>>> from scipy.stats import randint\n...\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = RandomForestClassifier(random_state=0)\n>>> np.random.seed(0)\n...\n>>> param_distributions = {\"max_depth\": [3, None],\n... \"min_samples_split\": randint(2, 11)}\n>>> search = HalvingRandomSearchCV(clf, param_distributions,\n... resource='n_estimators',\n... max_resources=10,\n... random_state=0).fit(X, y)\n>>> search.best_params_ # doctest: +SKIP\n{'max_depth': None, 'min_samples_split': 10, 'n_estimators': 9}", + "code": "class HalvingRandomSearchCV(BaseSuccessiveHalving):\n \"\"\"Randomized search on hyper parameters.\n\n The search strategy starts evaluating all the candidates with a small\n amount of resources and iteratively selects the best candidates, using more\n and more resources.\n\n The candidates are sampled at random from the parameter space and the\n number of sampled candidates is determined by ``n_candidates``.\n\n Read more in the :ref:`User guide`.\n\n .. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_halving_search_cv``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_halving_search_cv # noqa\n >>> # now you can import normally from model_selection\n >>> from sklearn.model_selection import HalvingRandomSearchCV\n\n Parameters\n ----------\n estimator : estimator object.\n This is assumed to implement the scikit-learn estimator interface.\n Either estimator needs to provide a ``score`` function,\n or ``scoring`` must be passed.\n\n param_distributions : dict\n Dictionary with parameters names (string) as keys and distributions\n or lists of parameters to try. Distributions must provide a ``rvs``\n method for sampling (such as those from scipy.stats.distributions).\n If a list is given, it is sampled uniformly.\n\n n_candidates : int, default='exhaust'\n The number of candidate parameters to sample, at the first\n iteration. Using 'exhaust' will sample enough candidates so that the\n last iteration uses as many resources as possible, based on\n `min_resources`, `max_resources` and `factor`. In this case,\n `min_resources` cannot be 'exhaust'.\n\n factor : int or float, default=3\n The 'halving' parameter, which determines the proportion of candidates\n that are selected for each subsequent iteration. For example,\n ``factor=3`` means that only one third of the candidates are selected.\n\n resource : ``'n_samples'`` or str, default='n_samples'\n Defines the resource that increases with each iteration. By default,\n the resource is the number of samples. It can also be set to any\n parameter of the base estimator that accepts positive integer\n values, e.g. 'n_iterations' or 'n_estimators' for a gradient\n boosting estimator. In this case ``max_resources`` cannot be 'auto'\n and must be set explicitly.\n\n max_resources : int, default='auto'\n The maximum number of resources that any candidate is allowed to use\n for a given iteration. By default, this is set ``n_samples`` when\n ``resource='n_samples'`` (default), else an error is raised.\n\n min_resources : {'exhaust', 'smallest'} or int, default='smallest'\n The minimum amount of resource that any candidate is allowed to use\n for a given iteration. Equivalently, this defines the amount of\n resources `r0` that are allocated for each candidate at the first\n iteration.\n\n - 'smallest' is a heuristic that sets `r0` to a small value:\n - ``n_splits * 2`` when ``resource='n_samples'`` for a regression\n problem\n - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a\n classification problem\n - ``1`` when ``resource != 'n_samples'``\n - 'exhaust' will set `r0` such that the **last** iteration uses as\n much resources as possible. Namely, the last iteration will use the\n highest value smaller than ``max_resources`` that is a multiple of\n both ``min_resources`` and ``factor``. In general, using 'exhaust'\n leads to a more accurate estimator, but is slightly more time\n consuming. 'exhaust' isn't available when `n_candidates='exhaust'`.\n\n Note that the amount of resources used at each iteration is always a\n multiple of ``min_resources``.\n\n aggressive_elimination : bool, default=False\n This is only relevant in cases where there isn't enough resources to\n reduce the remaining candidates to at most `factor` after the last\n iteration. If ``True``, then the search process will 'replay' the\n first iteration for as long as needed until the number of candidates\n is small enough. This is ``False`` by default, which means that the\n last iteration may evaluate more than ``factor`` candidates. See\n :ref:`aggressive_elimination` for more details.\n\n cv : int, cross-validation generator or an iterable, default=5\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - integer, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used. These splitters are instantiated\n with `shuffle=False` so the splits will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. note::\n Due to implementation details, the folds produced by `cv` must be\n the same across multiple calls to `cv.split()`. For\n built-in `scikit-learn` iterators, this can be achieved by\n deactivating shuffling (`shuffle=False`), or by setting the\n `cv`'s `random_state` parameter to an integer.\n\n scoring : string, callable, or None, default=None\n A single string (see :ref:`scoring_parameter`) or a callable\n (see :ref:`scoring`) to evaluate the predictions on the test set.\n If None, the estimator's score method is used.\n\n refit : bool, default=True\n If True, refit an estimator using the best found parameters on the\n whole dataset.\n\n The refitted estimator is made available at the ``best_estimator_``\n attribute and permits using ``predict`` directly on this\n ``HalvingRandomSearchCV`` instance.\n\n error_score : 'raise' or numeric\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised. If a numeric value is given,\n FitFailedWarning is raised. This parameter does not affect the refit\n step, which will always raise the error. Default is ``np.nan``\n\n return_train_score : bool, default=False\n If ``False``, the ``cv_results_`` attribute will not include training\n scores.\n Computing training scores is used to get insights on how different\n parameter settings impact the overfitting/underfitting trade-off.\n However computing the scores on the training set can be computationally\n expensive and is not strictly required to select the parameters that\n yield the best generalization performance.\n\n random_state : int, RandomState instance or None, default=None\n Pseudo random number generator state used for subsampling the dataset\n when `resources != 'n_samples'`. Also used for random uniform\n sampling from lists of possible values instead of scipy.stats\n distributions.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n n_jobs : int or None, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n verbose : int\n Controls the verbosity: the higher, the more messages.\n\n Attributes\n ----------\n n_resources_ : list of int\n The amount of resources used at each iteration.\n\n n_candidates_ : list of int\n The number of candidate parameters that were evaluated at each\n iteration.\n\n n_remaining_candidates_ : int\n The number of candidate parameters that are left after the last\n iteration. It corresponds to `ceil(n_candidates[-1] / factor)`\n\n max_resources_ : int\n The maximum number of resources that any candidate is allowed to use\n for a given iteration. Note that since the number of resources used at\n each iteration must be a multiple of ``min_resources_``, the actual\n number of resources used at the last iteration may be smaller than\n ``max_resources_``.\n\n min_resources_ : int\n The amount of resources that are allocated for each candidate at the\n first iteration.\n\n n_iterations_ : int\n The actual number of iterations that were run. This is equal to\n ``n_required_iterations_`` if ``aggressive_elimination`` is ``True``.\n Else, this is equal to ``min(n_possible_iterations_,\n n_required_iterations_)``.\n\n n_possible_iterations_ : int\n The number of iterations that are possible starting with\n ``min_resources_`` resources and without exceeding\n ``max_resources_``.\n\n n_required_iterations_ : int\n The number of iterations that are required to end up with less than\n ``factor`` candidates at the last iteration, starting with\n ``min_resources_`` resources. This will be smaller than\n ``n_possible_iterations_`` when there isn't enough resources.\n\n cv_results_ : dict of numpy (masked) ndarrays\n A dict with keys as column headers and values as columns, that can be\n imported into a pandas ``DataFrame``. It contains many informations for\n analysing the results of a search.\n Please refer to the :ref:`User guide`\n for details.\n\n best_estimator_ : estimator or dict\n Estimator that was chosen by the search, i.e. estimator\n which gave highest score (or smallest loss if specified)\n on the left out data. Not available if ``refit=False``.\n\n best_score_ : float\n Mean cross-validated score of the best_estimator.\n\n best_params_ : dict\n Parameter setting that gave the best results on the hold out data.\n\n best_index_ : int\n The index (of the ``cv_results_`` arrays) which corresponds to the best\n candidate parameter setting.\n\n The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n the parameter setting for the best model, that gives the highest\n mean score (``search.best_score_``).\n\n scorer_ : function or a dict\n Scorer function used on the held out data to choose the best\n parameters for the model.\n\n n_splits_ : int\n The number of cross-validation splits (folds/iterations).\n\n refit_time_ : float\n Seconds used for refitting the best model on the whole dataset.\n\n This is present only if ``refit`` is not False.\n\n See Also\n --------\n :class:`HalvingGridSearchCV`:\n Search over a grid of parameters using successive halving.\n\n Notes\n -----\n The parameters selected are those that maximize the score of the held-out\n data, according to the scoring parameter.\n\n Examples\n --------\n\n >>> from sklearn.datasets import load_iris\n >>> from sklearn.ensemble import RandomForestClassifier\n >>> from sklearn.experimental import enable_halving_search_cv # noqa\n >>> from sklearn.model_selection import HalvingRandomSearchCV\n >>> from scipy.stats import randint\n ...\n >>> X, y = load_iris(return_X_y=True)\n >>> clf = RandomForestClassifier(random_state=0)\n >>> np.random.seed(0)\n ...\n >>> param_distributions = {\"max_depth\": [3, None],\n ... \"min_samples_split\": randint(2, 11)}\n >>> search = HalvingRandomSearchCV(clf, param_distributions,\n ... resource='n_estimators',\n ... max_resources=10,\n ... random_state=0).fit(X, y)\n >>> search.best_params_ # doctest: +SKIP\n {'max_depth': None, 'min_samples_split': 10, 'n_estimators': 9}\n \"\"\"\n _required_parameters = [\"estimator\", \"param_distributions\"]\n\n def __init__(self, estimator, param_distributions, *,\n n_candidates='exhaust', factor=3, resource='n_samples',\n max_resources='auto', min_resources='smallest',\n aggressive_elimination=False, cv=5, scoring=None,\n refit=True, error_score=np.nan, return_train_score=True,\n random_state=None, n_jobs=None, verbose=0):\n super().__init__(estimator, scoring=scoring,\n n_jobs=n_jobs, refit=refit, verbose=verbose, cv=cv,\n random_state=random_state, error_score=error_score,\n return_train_score=return_train_score,\n max_resources=max_resources, resource=resource,\n factor=factor, min_resources=min_resources,\n aggressive_elimination=aggressive_elimination)\n self.param_distributions = param_distributions\n self.n_candidates = n_candidates\n\n def _generate_candidate_params(self):\n n_candidates_first_iter = self.n_candidates\n if n_candidates_first_iter == 'exhaust':\n # This will generate enough candidate so that the last iteration\n # uses as much resources as possible\n n_candidates_first_iter = (\n self.max_resources_ // self.min_resources_)\n return ParameterSampler(self.param_distributions,\n n_candidates_first_iter,\n random_state=self.random_state)", + "instance_attributes": [ + { + "name": "n_candidates", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/_SubsampleMetaSplitter", + "name": "_SubsampleMetaSplitter", + "qname": "sklearn.model_selection._search_successive_halving._SubsampleMetaSplitter", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.model_selection._search_successive_halving/_SubsampleMetaSplitter/__init__", + "scikit-learn/sklearn.model_selection._search_successive_halving/_SubsampleMetaSplitter/split" + ], + "is_public": false, + "reexported_by": [], + "description": "Splitter that subsamples a given fraction of the dataset", + "docstring": "Splitter that subsamples a given fraction of the dataset", + "code": "class _SubsampleMetaSplitter:\n \"\"\"Splitter that subsamples a given fraction of the dataset\"\"\"\n def __init__(self, *, base_cv, fraction, subsample_test, random_state):\n self.base_cv = base_cv\n self.fraction = fraction\n self.subsample_test = subsample_test\n self.random_state = random_state\n\n def split(self, X, y, groups=None):\n for train_idx, test_idx in self.base_cv.split(X, y, groups):\n train_idx = resample(\n train_idx, replace=False, random_state=self.random_state,\n n_samples=int(self.fraction * train_idx.shape[0])\n )\n if self.subsample_test:\n test_idx = resample(\n test_idx, replace=False, random_state=self.random_state,\n n_samples=int(self.fraction * test_idx.shape[0])\n )\n yield train_idx, test_idx", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseCrossValidator", + "name": "BaseCrossValidator", + "qname": "sklearn.model_selection._split.BaseCrossValidator", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/split", + "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/_iter_test_masks", + "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/_iter_test_indices", + "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/get_n_splits", + "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/__repr__" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for all cross-validators\n\nImplementations must define `_iter_test_masks` or `_iter_test_indices`.", + "docstring": "Base class for all cross-validators\n\nImplementations must define `_iter_test_masks` or `_iter_test_indices`.", + "code": "class BaseCrossValidator(metaclass=ABCMeta):\n \"\"\"Base class for all cross-validators\n\n Implementations must define `_iter_test_masks` or `_iter_test_indices`.\n \"\"\"\n def split(self, X, y=None, groups=None):\n \"\"\"Generate indices to split data into training and test set.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n The target variable for supervised learning problems.\n\n groups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n \"\"\"\n X, y, groups = indexable(X, y, groups)\n indices = np.arange(_num_samples(X))\n for test_index in self._iter_test_masks(X, y, groups):\n train_index = indices[np.logical_not(test_index)]\n test_index = indices[test_index]\n yield train_index, test_index\n\n # Since subclasses must implement either _iter_test_masks or\n # _iter_test_indices, neither can be abstract.\n def _iter_test_masks(self, X=None, y=None, groups=None):\n \"\"\"Generates boolean masks corresponding to test sets.\n\n By default, delegates to _iter_test_indices(X, y, groups)\n \"\"\"\n for test_index in self._iter_test_indices(X, y, groups):\n test_mask = np.zeros(_num_samples(X), dtype=bool)\n test_mask[test_index] = True\n yield test_mask\n\n def _iter_test_indices(self, X=None, y=None, groups=None):\n \"\"\"Generates integer indices corresponding to test sets.\"\"\"\n raise NotImplementedError\n\n @abstractmethod\n def get_n_splits(self, X=None, y=None, groups=None):\n \"\"\"Returns the number of splitting iterations in the cross-validator\"\"\"\n\n def __repr__(self):\n return _build_repr(self)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit", + "name": "BaseShuffleSplit", + "qname": "sklearn.model_selection._split.BaseShuffleSplit", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/__init__", + "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/split", + "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/_iter_indices", + "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/get_n_splits", + "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/__repr__" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for ShuffleSplit and StratifiedShuffleSplit", + "docstring": "Base class for ShuffleSplit and StratifiedShuffleSplit", + "code": "class BaseShuffleSplit(metaclass=ABCMeta):\n \"\"\"Base class for ShuffleSplit and StratifiedShuffleSplit\"\"\"\n @_deprecate_positional_args\n def __init__(self, n_splits=10, *, test_size=None, train_size=None,\n random_state=None):\n self.n_splits = n_splits\n self.test_size = test_size\n self.train_size = train_size\n self.random_state = random_state\n self._default_test_size = 0.1\n\n def split(self, X, y=None, groups=None):\n \"\"\"Generate indices to split data into training and test set.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n The target variable for supervised learning problems.\n\n groups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n\n Notes\n -----\n Randomized CV splitters may return different results for each call of\n split. You can make the results identical by setting `random_state`\n to an integer.\n \"\"\"\n X, y, groups = indexable(X, y, groups)\n for train, test in self._iter_indices(X, y, groups):\n yield train, test\n\n @abstractmethod\n def _iter_indices(self, X, y=None, groups=None):\n \"\"\"Generate (train, test) indices\"\"\"\n\n def get_n_splits(self, X=None, y=None, groups=None):\n \"\"\"Returns the number of splitting iterations in the cross-validator\n\n Parameters\n ----------\n X : object\n Always ignored, exists for compatibility.\n\n y : object\n Always ignored, exists for compatibility.\n\n groups : object\n Always ignored, exists for compatibility.\n\n Returns\n -------\n n_splits : int\n Returns the number of splitting iterations in the cross-validator.\n \"\"\"\n return self.n_splits\n\n def __repr__(self):\n return _build_repr(self)", + "instance_attributes": [ + { + "name": "n_splits", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "_default_test_size", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupKFold", + "name": "GroupKFold", + "qname": "sklearn.model_selection._split.GroupKFold", + "decorators": [], + "superclasses": ["_BaseKFold"], + "methods": [ + "scikit-learn/sklearn.model_selection._split/GroupKFold/__init__", + "scikit-learn/sklearn.model_selection._split/GroupKFold/_iter_test_indices", + "scikit-learn/sklearn.model_selection._split/GroupKFold/split" + ], + "is_public": false, + "reexported_by": [], + "description": "K-fold iterator variant with non-overlapping groups.\n\nThe same group will not appear in two different folds (the number of\ndistinct groups has to be at least equal to the number of folds).\n\nThe folds are approximately balanced in the sense that the number of\ndistinct groups is approximately the same in each fold.\n\nRead more in the :ref:`User Guide `.", + "docstring": "K-fold iterator variant with non-overlapping groups.\n\nThe same group will not appear in two different folds (the number of\ndistinct groups has to be at least equal to the number of folds).\n\nThe folds are approximately balanced in the sense that the number of\ndistinct groups is approximately the same in each fold.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_splits : int, default=5\n Number of folds. Must be at least 2.\n\n .. versionchanged:: 0.22\n ``n_splits`` default value changed from 3 to 5.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import GroupKFold\n>>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])\n>>> y = np.array([1, 2, 3, 4])\n>>> groups = np.array([0, 0, 2, 2])\n>>> group_kfold = GroupKFold(n_splits=2)\n>>> group_kfold.get_n_splits(X, y, groups)\n2\n>>> print(group_kfold)\nGroupKFold(n_splits=2)\n>>> for train_index, test_index in group_kfold.split(X, y, groups):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\n... print(X_train, X_test, y_train, y_test)\n...\nTRAIN: [0 1] TEST: [2 3]\n[[1 2]\n [3 4]] [[5 6]\n [7 8]] [1 2] [3 4]\nTRAIN: [2 3] TEST: [0 1]\n[[5 6]\n [7 8]] [[1 2]\n [3 4]] [3 4] [1 2]\n\nSee Also\n--------\nLeaveOneGroupOut : For splitting the data according to explicit\n domain-specific stratification of the dataset.", + "code": "class GroupKFold(_BaseKFold):\n \"\"\"K-fold iterator variant with non-overlapping groups.\n\n The same group will not appear in two different folds (the number of\n distinct groups has to be at least equal to the number of folds).\n\n The folds are approximately balanced in the sense that the number of\n distinct groups is approximately the same in each fold.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_splits : int, default=5\n Number of folds. Must be at least 2.\n\n .. versionchanged:: 0.22\n ``n_splits`` default value changed from 3 to 5.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.model_selection import GroupKFold\n >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])\n >>> y = np.array([1, 2, 3, 4])\n >>> groups = np.array([0, 0, 2, 2])\n >>> group_kfold = GroupKFold(n_splits=2)\n >>> group_kfold.get_n_splits(X, y, groups)\n 2\n >>> print(group_kfold)\n GroupKFold(n_splits=2)\n >>> for train_index, test_index in group_kfold.split(X, y, groups):\n ... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n ... X_train, X_test = X[train_index], X[test_index]\n ... y_train, y_test = y[train_index], y[test_index]\n ... print(X_train, X_test, y_train, y_test)\n ...\n TRAIN: [0 1] TEST: [2 3]\n [[1 2]\n [3 4]] [[5 6]\n [7 8]] [1 2] [3 4]\n TRAIN: [2 3] TEST: [0 1]\n [[5 6]\n [7 8]] [[1 2]\n [3 4]] [3 4] [1 2]\n\n See Also\n --------\n LeaveOneGroupOut : For splitting the data according to explicit\n domain-specific stratification of the dataset.\n \"\"\"\n def __init__(self, n_splits=5):\n super().__init__(n_splits, shuffle=False, random_state=None)\n\n def _iter_test_indices(self, X, y, groups):\n if groups is None:\n raise ValueError(\"The 'groups' parameter should not be None.\")\n groups = check_array(groups, ensure_2d=False, dtype=None)\n\n unique_groups, groups = np.unique(groups, return_inverse=True)\n n_groups = len(unique_groups)\n\n if self.n_splits > n_groups:\n raise ValueError(\"Cannot have number of splits n_splits=%d greater\"\n \" than the number of groups: %d.\"\n % (self.n_splits, n_groups))\n\n # Weight groups by their number of occurrences\n n_samples_per_group = np.bincount(groups)\n\n # Distribute the most frequent groups first\n indices = np.argsort(n_samples_per_group)[::-1]\n n_samples_per_group = n_samples_per_group[indices]\n\n # Total weight of each fold\n n_samples_per_fold = np.zeros(self.n_splits)\n\n # Mapping from group index to fold index\n group_to_fold = np.zeros(len(unique_groups))\n\n # Distribute samples by adding the largest weight to the lightest fold\n for group_index, weight in enumerate(n_samples_per_group):\n lightest_fold = np.argmin(n_samples_per_fold)\n n_samples_per_fold[lightest_fold] += weight\n group_to_fold[indices[group_index]] = lightest_fold\n\n indices = group_to_fold[groups]\n\n for f in range(self.n_splits):\n yield np.where(indices == f)[0]\n\n def split(self, X, y=None, groups=None):\n \"\"\"Generate indices to split data into training and test set.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\n groups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n \"\"\"\n return super().split(X, y, groups)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupShuffleSplit", + "name": "GroupShuffleSplit", + "qname": "sklearn.model_selection._split.GroupShuffleSplit", + "decorators": [], + "superclasses": ["ShuffleSplit"], + "methods": [ + "scikit-learn/sklearn.model_selection._split/GroupShuffleSplit/__init__", + "scikit-learn/sklearn.model_selection._split/GroupShuffleSplit/_iter_indices", + "scikit-learn/sklearn.model_selection._split/GroupShuffleSplit/split" + ], + "is_public": false, + "reexported_by": [], + "description": "Shuffle-Group(s)-Out cross-validation iterator\n\nProvides randomized train/test indices to split data according to a\nthird-party provided group. This group information can be used to encode\narbitrary domain specific stratifications of the samples as integers.\n\nFor instance the groups could be the year of collection of the samples\nand thus allow for cross-validation against time-based splits.\n\nThe difference between LeavePGroupsOut and GroupShuffleSplit is that\nthe former generates splits using all subsets of size ``p`` unique groups,\nwhereas GroupShuffleSplit generates a user-determined number of random\ntest splits, each with a user-determined fraction of unique groups.\n\nFor example, a less computationally intensive alternative to\n``LeavePGroupsOut(p=10)`` would be\n``GroupShuffleSplit(test_size=10, n_splits=100)``.\n\nNote: The parameters ``test_size`` and ``train_size`` refer to groups, and\nnot to samples, as in ShuffleSplit.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Shuffle-Group(s)-Out cross-validation iterator\n\nProvides randomized train/test indices to split data according to a\nthird-party provided group. This group information can be used to encode\narbitrary domain specific stratifications of the samples as integers.\n\nFor instance the groups could be the year of collection of the samples\nand thus allow for cross-validation against time-based splits.\n\nThe difference between LeavePGroupsOut and GroupShuffleSplit is that\nthe former generates splits using all subsets of size ``p`` unique groups,\nwhereas GroupShuffleSplit generates a user-determined number of random\ntest splits, each with a user-determined fraction of unique groups.\n\nFor example, a less computationally intensive alternative to\n``LeavePGroupsOut(p=10)`` would be\n``GroupShuffleSplit(test_size=10, n_splits=100)``.\n\nNote: The parameters ``test_size`` and ``train_size`` refer to groups, and\nnot to samples, as in ShuffleSplit.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_splits : int, default=5\n Number of re-shuffling & splitting iterations.\n\ntest_size : float, int, default=0.2\n If float, should be between 0.0 and 1.0 and represent the proportion\n of groups to include in the test split (rounded up). If int,\n represents the absolute number of test groups. If None, the value is\n set to the complement of the train size.\n The default will change in version 0.21. It will remain 0.2 only\n if ``train_size`` is unspecified, otherwise it will complement\n the specified ``train_size``.\n\ntrain_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the\n proportion of the groups to include in the train split. If\n int, represents the absolute number of train groups. If None,\n the value is automatically set to the complement of the test size.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of the training and testing indices produced.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import GroupShuffleSplit\n>>> X = np.ones(shape=(8, 2))\n>>> y = np.ones(shape=(8, 1))\n>>> groups = np.array([1, 1, 2, 2, 2, 3, 3, 3])\n>>> print(groups.shape)\n(8,)\n>>> gss = GroupShuffleSplit(n_splits=2, train_size=.7, random_state=42)\n>>> gss.get_n_splits()\n2\n>>> for train_idx, test_idx in gss.split(X, y, groups):\n... print(\"TRAIN:\", train_idx, \"TEST:\", test_idx)\nTRAIN: [2 3 4 5 6 7] TEST: [0 1]\nTRAIN: [0 1 5 6 7] TEST: [2 3 4]", + "code": "class GroupShuffleSplit(ShuffleSplit):\n '''Shuffle-Group(s)-Out cross-validation iterator\n\n Provides randomized train/test indices to split data according to a\n third-party provided group. This group information can be used to encode\n arbitrary domain specific stratifications of the samples as integers.\n\n For instance the groups could be the year of collection of the samples\n and thus allow for cross-validation against time-based splits.\n\n The difference between LeavePGroupsOut and GroupShuffleSplit is that\n the former generates splits using all subsets of size ``p`` unique groups,\n whereas GroupShuffleSplit generates a user-determined number of random\n test splits, each with a user-determined fraction of unique groups.\n\n For example, a less computationally intensive alternative to\n ``LeavePGroupsOut(p=10)`` would be\n ``GroupShuffleSplit(test_size=10, n_splits=100)``.\n\n Note: The parameters ``test_size`` and ``train_size`` refer to groups, and\n not to samples, as in ShuffleSplit.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_splits : int, default=5\n Number of re-shuffling & splitting iterations.\n\n test_size : float, int, default=0.2\n If float, should be between 0.0 and 1.0 and represent the proportion\n of groups to include in the test split (rounded up). If int,\n represents the absolute number of test groups. If None, the value is\n set to the complement of the train size.\n The default will change in version 0.21. It will remain 0.2 only\n if ``train_size`` is unspecified, otherwise it will complement\n the specified ``train_size``.\n\n train_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the\n proportion of the groups to include in the train split. If\n int, represents the absolute number of train groups. If None,\n the value is automatically set to the complement of the test size.\n\n random_state : int, RandomState instance or None, default=None\n Controls the randomness of the training and testing indices produced.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.model_selection import GroupShuffleSplit\n >>> X = np.ones(shape=(8, 2))\n >>> y = np.ones(shape=(8, 1))\n >>> groups = np.array([1, 1, 2, 2, 2, 3, 3, 3])\n >>> print(groups.shape)\n (8,)\n >>> gss = GroupShuffleSplit(n_splits=2, train_size=.7, random_state=42)\n >>> gss.get_n_splits()\n 2\n >>> for train_idx, test_idx in gss.split(X, y, groups):\n ... print(\"TRAIN:\", train_idx, \"TEST:\", test_idx)\n TRAIN: [2 3 4 5 6 7] TEST: [0 1]\n TRAIN: [0 1 5 6 7] TEST: [2 3 4]\n '''\n @_deprecate_positional_args\n def __init__(self, n_splits=5, *, test_size=None, train_size=None,\n random_state=None):\n super().__init__(\n n_splits=n_splits,\n test_size=test_size,\n train_size=train_size,\n random_state=random_state)\n self._default_test_size = 0.2\n\n def _iter_indices(self, X, y, groups):\n if groups is None:\n raise ValueError(\"The 'groups' parameter should not be None.\")\n groups = check_array(groups, ensure_2d=False, dtype=None)\n classes, group_indices = np.unique(groups, return_inverse=True)\n for group_train, group_test in super()._iter_indices(X=classes):\n # these are the indices of classes in the partition\n # invert them into data indices\n\n train = np.flatnonzero(np.in1d(group_indices, group_train))\n test = np.flatnonzero(np.in1d(group_indices, group_test))\n\n yield train, test\n\n def split(self, X, y=None, groups=None):\n \"\"\"Generate indices to split data into training and test set.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\n groups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n\n Notes\n -----\n Randomized CV splitters may return different results for each call of\n split. You can make the results identical by setting `random_state`\n to an integer.\n \"\"\"\n return super().split(X, y, groups)", + "instance_attributes": [ + { + "name": "_default_test_size", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.model_selection._split/KFold", + "name": "KFold", + "qname": "sklearn.model_selection._split.KFold", + "decorators": [], + "superclasses": ["_BaseKFold"], + "methods": [ + "scikit-learn/sklearn.model_selection._split/KFold/__init__", + "scikit-learn/sklearn.model_selection._split/KFold/_iter_test_indices" + ], + "is_public": false, + "reexported_by": [], + "description": "K-Folds cross-validator\n\nProvides train/test indices to split data in train/test sets. Split\ndataset into k consecutive folds (without shuffling by default).\n\nEach fold is then used once as a validation while the k - 1 remaining\nfolds form the training set.\n\nRead more in the :ref:`User Guide `.", + "docstring": "K-Folds cross-validator\n\nProvides train/test indices to split data in train/test sets. Split\ndataset into k consecutive folds (without shuffling by default).\n\nEach fold is then used once as a validation while the k - 1 remaining\nfolds form the training set.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_splits : int, default=5\n Number of folds. Must be at least 2.\n\n .. versionchanged:: 0.22\n ``n_splits`` default value changed from 3 to 5.\n\nshuffle : bool, default=False\n Whether to shuffle the data before splitting into batches.\n Note that the samples within each split will not be shuffled.\n\nrandom_state : int, RandomState instance or None, default=None\n When `shuffle` is True, `random_state` affects the ordering of the\n indices, which controls the randomness of each fold. Otherwise, this\n parameter has no effect.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import KFold\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([1, 2, 3, 4])\n>>> kf = KFold(n_splits=2)\n>>> kf.get_n_splits(X)\n2\n>>> print(kf)\nKFold(n_splits=2, random_state=None, shuffle=False)\n>>> for train_index, test_index in kf.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\nTRAIN: [2 3] TEST: [0 1]\nTRAIN: [0 1] TEST: [2 3]\n\nNotes\n-----\nThe first ``n_samples % n_splits`` folds have size\n``n_samples // n_splits + 1``, other folds have size\n``n_samples // n_splits``, where ``n_samples`` is the number of samples.\n\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer.\n\nSee Also\n--------\nStratifiedKFold : Takes group information into account to avoid building\n folds with imbalanced class distributions (for binary or multiclass\n classification tasks).\n\nGroupKFold : K-fold iterator variant with non-overlapping groups.\n\nRepeatedKFold : Repeats K-Fold n times.", + "code": "class KFold(_BaseKFold):\n \"\"\"K-Folds cross-validator\n\n Provides train/test indices to split data in train/test sets. Split\n dataset into k consecutive folds (without shuffling by default).\n\n Each fold is then used once as a validation while the k - 1 remaining\n folds form the training set.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_splits : int, default=5\n Number of folds. Must be at least 2.\n\n .. versionchanged:: 0.22\n ``n_splits`` default value changed from 3 to 5.\n\n shuffle : bool, default=False\n Whether to shuffle the data before splitting into batches.\n Note that the samples within each split will not be shuffled.\n\n random_state : int, RandomState instance or None, default=None\n When `shuffle` is True, `random_state` affects the ordering of the\n indices, which controls the randomness of each fold. Otherwise, this\n parameter has no effect.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.model_selection import KFold\n >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n >>> y = np.array([1, 2, 3, 4])\n >>> kf = KFold(n_splits=2)\n >>> kf.get_n_splits(X)\n 2\n >>> print(kf)\n KFold(n_splits=2, random_state=None, shuffle=False)\n >>> for train_index, test_index in kf.split(X):\n ... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n ... X_train, X_test = X[train_index], X[test_index]\n ... y_train, y_test = y[train_index], y[test_index]\n TRAIN: [2 3] TEST: [0 1]\n TRAIN: [0 1] TEST: [2 3]\n\n Notes\n -----\n The first ``n_samples % n_splits`` folds have size\n ``n_samples // n_splits + 1``, other folds have size\n ``n_samples // n_splits``, where ``n_samples`` is the number of samples.\n\n Randomized CV splitters may return different results for each call of\n split. You can make the results identical by setting `random_state`\n to an integer.\n\n See Also\n --------\n StratifiedKFold : Takes group information into account to avoid building\n folds with imbalanced class distributions (for binary or multiclass\n classification tasks).\n\n GroupKFold : K-fold iterator variant with non-overlapping groups.\n\n RepeatedKFold : Repeats K-Fold n times.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_splits=5, *, shuffle=False,\n random_state=None):\n super().__init__(n_splits=n_splits, shuffle=shuffle,\n random_state=random_state)\n\n def _iter_test_indices(self, X, y=None, groups=None):\n n_samples = _num_samples(X)\n indices = np.arange(n_samples)\n if self.shuffle:\n check_random_state(self.random_state).shuffle(indices)\n\n n_splits = self.n_splits\n fold_sizes = np.full(n_splits, n_samples // n_splits, dtype=int)\n fold_sizes[:n_samples % n_splits] += 1\n current = 0\n for fold_size in fold_sizes:\n start, stop = current, current + fold_size\n yield indices[start:stop]\n current = stop", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneGroupOut", + "name": "LeaveOneGroupOut", + "qname": "sklearn.model_selection._split.LeaveOneGroupOut", + "decorators": [], + "superclasses": ["BaseCrossValidator"], + "methods": [ + "scikit-learn/sklearn.model_selection._split/LeaveOneGroupOut/_iter_test_masks", + "scikit-learn/sklearn.model_selection._split/LeaveOneGroupOut/get_n_splits", + "scikit-learn/sklearn.model_selection._split/LeaveOneGroupOut/split" + ], + "is_public": false, + "reexported_by": [], + "description": "Leave One Group Out cross-validator\n\nProvides train/test indices to split data according to a third-party\nprovided group. This group information can be used to encode arbitrary\ndomain specific stratifications of the samples as integers.\n\nFor instance the groups could be the year of collection of the samples\nand thus allow for cross-validation against time-based splits.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Leave One Group Out cross-validator\n\nProvides train/test indices to split data according to a third-party\nprovided group. This group information can be used to encode arbitrary\ndomain specific stratifications of the samples as integers.\n\nFor instance the groups could be the year of collection of the samples\nand thus allow for cross-validation against time-based splits.\n\nRead more in the :ref:`User Guide `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import LeaveOneGroupOut\n>>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])\n>>> y = np.array([1, 2, 1, 2])\n>>> groups = np.array([1, 1, 2, 2])\n>>> logo = LeaveOneGroupOut()\n>>> logo.get_n_splits(X, y, groups)\n2\n>>> logo.get_n_splits(groups=groups) # 'groups' is always required\n2\n>>> print(logo)\nLeaveOneGroupOut()\n>>> for train_index, test_index in logo.split(X, y, groups):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\n... print(X_train, X_test, y_train, y_test)\nTRAIN: [2 3] TEST: [0 1]\n[[5 6]\n [7 8]] [[1 2]\n [3 4]] [1 2] [1 2]\nTRAIN: [0 1] TEST: [2 3]\n[[1 2]\n [3 4]] [[5 6]\n [7 8]] [1 2] [1 2]", + "code": "class LeaveOneGroupOut(BaseCrossValidator):\n \"\"\"Leave One Group Out cross-validator\n\n Provides train/test indices to split data according to a third-party\n provided group. This group information can be used to encode arbitrary\n domain specific stratifications of the samples as integers.\n\n For instance the groups could be the year of collection of the samples\n and thus allow for cross-validation against time-based splits.\n\n Read more in the :ref:`User Guide `.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.model_selection import LeaveOneGroupOut\n >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])\n >>> y = np.array([1, 2, 1, 2])\n >>> groups = np.array([1, 1, 2, 2])\n >>> logo = LeaveOneGroupOut()\n >>> logo.get_n_splits(X, y, groups)\n 2\n >>> logo.get_n_splits(groups=groups) # 'groups' is always required\n 2\n >>> print(logo)\n LeaveOneGroupOut()\n >>> for train_index, test_index in logo.split(X, y, groups):\n ... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n ... X_train, X_test = X[train_index], X[test_index]\n ... y_train, y_test = y[train_index], y[test_index]\n ... print(X_train, X_test, y_train, y_test)\n TRAIN: [2 3] TEST: [0 1]\n [[5 6]\n [7 8]] [[1 2]\n [3 4]] [1 2] [1 2]\n TRAIN: [0 1] TEST: [2 3]\n [[1 2]\n [3 4]] [[5 6]\n [7 8]] [1 2] [1 2]\n\n \"\"\"\n\n def _iter_test_masks(self, X, y, groups):\n if groups is None:\n raise ValueError(\"The 'groups' parameter should not be None.\")\n # We make a copy of groups to avoid side-effects during iteration\n groups = check_array(groups, copy=True, ensure_2d=False, dtype=None)\n unique_groups = np.unique(groups)\n if len(unique_groups) <= 1:\n raise ValueError(\n \"The groups parameter contains fewer than 2 unique groups \"\n \"(%s). LeaveOneGroupOut expects at least 2.\" % unique_groups)\n for i in unique_groups:\n yield groups == i\n\n def get_n_splits(self, X=None, y=None, groups=None):\n \"\"\"Returns the number of splitting iterations in the cross-validator\n\n Parameters\n ----------\n X : object\n Always ignored, exists for compatibility.\n\n y : object\n Always ignored, exists for compatibility.\n\n groups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set. This 'groups' parameter must always be specified to\n calculate the number of splits, though the other parameters can be\n omitted.\n\n Returns\n -------\n n_splits : int\n Returns the number of splitting iterations in the cross-validator.\n \"\"\"\n if groups is None:\n raise ValueError(\"The 'groups' parameter should not be None.\")\n groups = check_array(groups, ensure_2d=False, dtype=None)\n return len(np.unique(groups))\n\n def split(self, X, y=None, groups=None):\n \"\"\"Generate indices to split data into training and test set.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\n groups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n \"\"\"\n return super().split(X, y, groups)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneOut", + "name": "LeaveOneOut", + "qname": "sklearn.model_selection._split.LeaveOneOut", + "decorators": [], + "superclasses": ["BaseCrossValidator"], + "methods": [ + "scikit-learn/sklearn.model_selection._split/LeaveOneOut/_iter_test_indices", + "scikit-learn/sklearn.model_selection._split/LeaveOneOut/get_n_splits" + ], + "is_public": false, + "reexported_by": [], + "description": "Leave-One-Out cross-validator\n\nProvides train/test indices to split data in train/test sets. Each\nsample is used once as a test set (singleton) while the remaining\nsamples form the training set.\n\nNote: ``LeaveOneOut()`` is equivalent to ``KFold(n_splits=n)`` and\n``LeavePOut(p=1)`` where ``n`` is the number of samples.\n\nDue to the high number of test sets (which is the same as the\nnumber of samples) this cross-validation method can be very costly.\nFor large datasets one should favor :class:`KFold`, :class:`ShuffleSplit`\nor :class:`StratifiedKFold`.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Leave-One-Out cross-validator\n\nProvides train/test indices to split data in train/test sets. Each\nsample is used once as a test set (singleton) while the remaining\nsamples form the training set.\n\nNote: ``LeaveOneOut()`` is equivalent to ``KFold(n_splits=n)`` and\n``LeavePOut(p=1)`` where ``n`` is the number of samples.\n\nDue to the high number of test sets (which is the same as the\nnumber of samples) this cross-validation method can be very costly.\nFor large datasets one should favor :class:`KFold`, :class:`ShuffleSplit`\nor :class:`StratifiedKFold`.\n\nRead more in the :ref:`User Guide `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import LeaveOneOut\n>>> X = np.array([[1, 2], [3, 4]])\n>>> y = np.array([1, 2])\n>>> loo = LeaveOneOut()\n>>> loo.get_n_splits(X)\n2\n>>> print(loo)\nLeaveOneOut()\n>>> for train_index, test_index in loo.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\n... print(X_train, X_test, y_train, y_test)\nTRAIN: [1] TEST: [0]\n[[3 4]] [[1 2]] [2] [1]\nTRAIN: [0] TEST: [1]\n[[1 2]] [[3 4]] [1] [2]\n\nSee Also\n--------\nLeaveOneGroupOut : For splitting the data according to explicit,\n domain-specific stratification of the dataset.\nGroupKFold : K-fold iterator variant with non-overlapping groups.", + "code": "class LeaveOneOut(BaseCrossValidator):\n \"\"\"Leave-One-Out cross-validator\n\n Provides train/test indices to split data in train/test sets. Each\n sample is used once as a test set (singleton) while the remaining\n samples form the training set.\n\n Note: ``LeaveOneOut()`` is equivalent to ``KFold(n_splits=n)`` and\n ``LeavePOut(p=1)`` where ``n`` is the number of samples.\n\n Due to the high number of test sets (which is the same as the\n number of samples) this cross-validation method can be very costly.\n For large datasets one should favor :class:`KFold`, :class:`ShuffleSplit`\n or :class:`StratifiedKFold`.\n\n Read more in the :ref:`User Guide `.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.model_selection import LeaveOneOut\n >>> X = np.array([[1, 2], [3, 4]])\n >>> y = np.array([1, 2])\n >>> loo = LeaveOneOut()\n >>> loo.get_n_splits(X)\n 2\n >>> print(loo)\n LeaveOneOut()\n >>> for train_index, test_index in loo.split(X):\n ... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n ... X_train, X_test = X[train_index], X[test_index]\n ... y_train, y_test = y[train_index], y[test_index]\n ... print(X_train, X_test, y_train, y_test)\n TRAIN: [1] TEST: [0]\n [[3 4]] [[1 2]] [2] [1]\n TRAIN: [0] TEST: [1]\n [[1 2]] [[3 4]] [1] [2]\n\n See Also\n --------\n LeaveOneGroupOut : For splitting the data according to explicit,\n domain-specific stratification of the dataset.\n GroupKFold : K-fold iterator variant with non-overlapping groups.\n \"\"\"\n\n def _iter_test_indices(self, X, y=None, groups=None):\n n_samples = _num_samples(X)\n if n_samples <= 1:\n raise ValueError(\n 'Cannot perform LeaveOneOut with n_samples={}.'.format(\n n_samples)\n )\n return range(n_samples)\n\n def get_n_splits(self, X, y=None, groups=None):\n \"\"\"Returns the number of splitting iterations in the cross-validator\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : object\n Always ignored, exists for compatibility.\n\n groups : object\n Always ignored, exists for compatibility.\n\n Returns\n -------\n n_splits : int\n Returns the number of splitting iterations in the cross-validator.\n \"\"\"\n if X is None:\n raise ValueError(\"The 'X' parameter should not be None.\")\n return _num_samples(X)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePGroupsOut", + "name": "LeavePGroupsOut", + "qname": "sklearn.model_selection._split.LeavePGroupsOut", + "decorators": [], + "superclasses": ["BaseCrossValidator"], + "methods": [ + "scikit-learn/sklearn.model_selection._split/LeavePGroupsOut/__init__", + "scikit-learn/sklearn.model_selection._split/LeavePGroupsOut/_iter_test_masks", + "scikit-learn/sklearn.model_selection._split/LeavePGroupsOut/get_n_splits", + "scikit-learn/sklearn.model_selection._split/LeavePGroupsOut/split" + ], + "is_public": false, + "reexported_by": [], + "description": "Leave P Group(s) Out cross-validator\n\nProvides train/test indices to split data according to a third-party\nprovided group. This group information can be used to encode arbitrary\ndomain specific stratifications of the samples as integers.\n\nFor instance the groups could be the year of collection of the samples\nand thus allow for cross-validation against time-based splits.\n\nThe difference between LeavePGroupsOut and LeaveOneGroupOut is that\nthe former builds the test sets with all the samples assigned to\n``p`` different values of the groups while the latter uses samples\nall assigned the same groups.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Leave P Group(s) Out cross-validator\n\nProvides train/test indices to split data according to a third-party\nprovided group. This group information can be used to encode arbitrary\ndomain specific stratifications of the samples as integers.\n\nFor instance the groups could be the year of collection of the samples\nand thus allow for cross-validation against time-based splits.\n\nThe difference between LeavePGroupsOut and LeaveOneGroupOut is that\nthe former builds the test sets with all the samples assigned to\n``p`` different values of the groups while the latter uses samples\nall assigned the same groups.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_groups : int\n Number of groups (``p``) to leave out in the test split.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import LeavePGroupsOut\n>>> X = np.array([[1, 2], [3, 4], [5, 6]])\n>>> y = np.array([1, 2, 1])\n>>> groups = np.array([1, 2, 3])\n>>> lpgo = LeavePGroupsOut(n_groups=2)\n>>> lpgo.get_n_splits(X, y, groups)\n3\n>>> lpgo.get_n_splits(groups=groups) # 'groups' is always required\n3\n>>> print(lpgo)\nLeavePGroupsOut(n_groups=2)\n>>> for train_index, test_index in lpgo.split(X, y, groups):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\n... print(X_train, X_test, y_train, y_test)\nTRAIN: [2] TEST: [0 1]\n[[5 6]] [[1 2]\n [3 4]] [1] [1 2]\nTRAIN: [1] TEST: [0 2]\n[[3 4]] [[1 2]\n [5 6]] [2] [1 1]\nTRAIN: [0] TEST: [1 2]\n[[1 2]] [[3 4]\n [5 6]] [1] [2 1]\n\nSee Also\n--------\nGroupKFold : K-fold iterator variant with non-overlapping groups.", + "code": "class LeavePGroupsOut(BaseCrossValidator):\n \"\"\"Leave P Group(s) Out cross-validator\n\n Provides train/test indices to split data according to a third-party\n provided group. This group information can be used to encode arbitrary\n domain specific stratifications of the samples as integers.\n\n For instance the groups could be the year of collection of the samples\n and thus allow for cross-validation against time-based splits.\n\n The difference between LeavePGroupsOut and LeaveOneGroupOut is that\n the former builds the test sets with all the samples assigned to\n ``p`` different values of the groups while the latter uses samples\n all assigned the same groups.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_groups : int\n Number of groups (``p``) to leave out in the test split.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.model_selection import LeavePGroupsOut\n >>> X = np.array([[1, 2], [3, 4], [5, 6]])\n >>> y = np.array([1, 2, 1])\n >>> groups = np.array([1, 2, 3])\n >>> lpgo = LeavePGroupsOut(n_groups=2)\n >>> lpgo.get_n_splits(X, y, groups)\n 3\n >>> lpgo.get_n_splits(groups=groups) # 'groups' is always required\n 3\n >>> print(lpgo)\n LeavePGroupsOut(n_groups=2)\n >>> for train_index, test_index in lpgo.split(X, y, groups):\n ... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n ... X_train, X_test = X[train_index], X[test_index]\n ... y_train, y_test = y[train_index], y[test_index]\n ... print(X_train, X_test, y_train, y_test)\n TRAIN: [2] TEST: [0 1]\n [[5 6]] [[1 2]\n [3 4]] [1] [1 2]\n TRAIN: [1] TEST: [0 2]\n [[3 4]] [[1 2]\n [5 6]] [2] [1 1]\n TRAIN: [0] TEST: [1 2]\n [[1 2]] [[3 4]\n [5 6]] [1] [2 1]\n\n See Also\n --------\n GroupKFold : K-fold iterator variant with non-overlapping groups.\n \"\"\"\n\n def __init__(self, n_groups):\n self.n_groups = n_groups\n\n def _iter_test_masks(self, X, y, groups):\n if groups is None:\n raise ValueError(\"The 'groups' parameter should not be None.\")\n groups = check_array(groups, copy=True, ensure_2d=False, dtype=None)\n unique_groups = np.unique(groups)\n if self.n_groups >= len(unique_groups):\n raise ValueError(\n \"The groups parameter contains fewer than (or equal to) \"\n \"n_groups (%d) numbers of unique groups (%s). LeavePGroupsOut \"\n \"expects that at least n_groups + 1 (%d) unique groups be \"\n \"present\" % (self.n_groups, unique_groups, self.n_groups + 1))\n combi = combinations(range(len(unique_groups)), self.n_groups)\n for indices in combi:\n test_index = np.zeros(_num_samples(X), dtype=bool)\n for l in unique_groups[np.array(indices)]:\n test_index[groups == l] = True\n yield test_index\n\n def get_n_splits(self, X=None, y=None, groups=None):\n \"\"\"Returns the number of splitting iterations in the cross-validator\n\n Parameters\n ----------\n X : object\n Always ignored, exists for compatibility.\n\n y : object\n Always ignored, exists for compatibility.\n\n groups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set. This 'groups' parameter must always be specified to\n calculate the number of splits, though the other parameters can be\n omitted.\n\n Returns\n -------\n n_splits : int\n Returns the number of splitting iterations in the cross-validator.\n \"\"\"\n if groups is None:\n raise ValueError(\"The 'groups' parameter should not be None.\")\n groups = check_array(groups, ensure_2d=False, dtype=None)\n return int(comb(len(np.unique(groups)), self.n_groups, exact=True))\n\n def split(self, X, y=None, groups=None):\n \"\"\"Generate indices to split data into training and test set.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\n groups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n \"\"\"\n return super().split(X, y, groups)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePOut", + "name": "LeavePOut", + "qname": "sklearn.model_selection._split.LeavePOut", + "decorators": [], + "superclasses": ["BaseCrossValidator"], + "methods": [ + "scikit-learn/sklearn.model_selection._split/LeavePOut/__init__", + "scikit-learn/sklearn.model_selection._split/LeavePOut/_iter_test_indices", + "scikit-learn/sklearn.model_selection._split/LeavePOut/get_n_splits" + ], + "is_public": false, + "reexported_by": [], + "description": "Leave-P-Out cross-validator\n\nProvides train/test indices to split data in train/test sets. This results\nin testing on all distinct samples of size p, while the remaining n - p\nsamples form the training set in each iteration.\n\nNote: ``LeavePOut(p)`` is NOT equivalent to\n``KFold(n_splits=n_samples // p)`` which creates non-overlapping test sets.\n\nDue to the high number of iterations which grows combinatorically with the\nnumber of samples this cross-validation method can be very costly. For\nlarge datasets one should favor :class:`KFold`, :class:`StratifiedKFold`\nor :class:`ShuffleSplit`.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Leave-P-Out cross-validator\n\nProvides train/test indices to split data in train/test sets. This results\nin testing on all distinct samples of size p, while the remaining n - p\nsamples form the training set in each iteration.\n\nNote: ``LeavePOut(p)`` is NOT equivalent to\n``KFold(n_splits=n_samples // p)`` which creates non-overlapping test sets.\n\nDue to the high number of iterations which grows combinatorically with the\nnumber of samples this cross-validation method can be very costly. For\nlarge datasets one should favor :class:`KFold`, :class:`StratifiedKFold`\nor :class:`ShuffleSplit`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\np : int\n Size of the test sets. Must be strictly less than the number of\n samples.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import LeavePOut\n>>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])\n>>> y = np.array([1, 2, 3, 4])\n>>> lpo = LeavePOut(2)\n>>> lpo.get_n_splits(X)\n6\n>>> print(lpo)\nLeavePOut(p=2)\n>>> for train_index, test_index in lpo.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\nTRAIN: [2 3] TEST: [0 1]\nTRAIN: [1 3] TEST: [0 2]\nTRAIN: [1 2] TEST: [0 3]\nTRAIN: [0 3] TEST: [1 2]\nTRAIN: [0 2] TEST: [1 3]\nTRAIN: [0 1] TEST: [2 3]", + "code": "class LeavePOut(BaseCrossValidator):\n \"\"\"Leave-P-Out cross-validator\n\n Provides train/test indices to split data in train/test sets. This results\n in testing on all distinct samples of size p, while the remaining n - p\n samples form the training set in each iteration.\n\n Note: ``LeavePOut(p)`` is NOT equivalent to\n ``KFold(n_splits=n_samples // p)`` which creates non-overlapping test sets.\n\n Due to the high number of iterations which grows combinatorically with the\n number of samples this cross-validation method can be very costly. For\n large datasets one should favor :class:`KFold`, :class:`StratifiedKFold`\n or :class:`ShuffleSplit`.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n p : int\n Size of the test sets. Must be strictly less than the number of\n samples.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.model_selection import LeavePOut\n >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])\n >>> y = np.array([1, 2, 3, 4])\n >>> lpo = LeavePOut(2)\n >>> lpo.get_n_splits(X)\n 6\n >>> print(lpo)\n LeavePOut(p=2)\n >>> for train_index, test_index in lpo.split(X):\n ... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n ... X_train, X_test = X[train_index], X[test_index]\n ... y_train, y_test = y[train_index], y[test_index]\n TRAIN: [2 3] TEST: [0 1]\n TRAIN: [1 3] TEST: [0 2]\n TRAIN: [1 2] TEST: [0 3]\n TRAIN: [0 3] TEST: [1 2]\n TRAIN: [0 2] TEST: [1 3]\n TRAIN: [0 1] TEST: [2 3]\n \"\"\"\n\n def __init__(self, p):\n self.p = p\n\n def _iter_test_indices(self, X, y=None, groups=None):\n n_samples = _num_samples(X)\n if n_samples <= self.p:\n raise ValueError(\n 'p={} must be strictly less than the number of '\n 'samples={}'.format(self.p, n_samples)\n )\n for combination in combinations(range(n_samples), self.p):\n yield np.array(combination)\n\n def get_n_splits(self, X, y=None, groups=None):\n \"\"\"Returns the number of splitting iterations in the cross-validator\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : object\n Always ignored, exists for compatibility.\n\n groups : object\n Always ignored, exists for compatibility.\n \"\"\"\n if X is None:\n raise ValueError(\"The 'X' parameter should not be None.\")\n return int(comb(_num_samples(X), self.p, exact=True))", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.model_selection._split/PredefinedSplit", + "name": "PredefinedSplit", + "qname": "sklearn.model_selection._split.PredefinedSplit", + "decorators": [], + "superclasses": ["BaseCrossValidator"], + "methods": [ + "scikit-learn/sklearn.model_selection._split/PredefinedSplit/__init__", + "scikit-learn/sklearn.model_selection._split/PredefinedSplit/split", + "scikit-learn/sklearn.model_selection._split/PredefinedSplit/_iter_test_masks", + "scikit-learn/sklearn.model_selection._split/PredefinedSplit/get_n_splits" + ], + "is_public": false, + "reexported_by": [], + "description": "Predefined split cross-validator\n\nProvides train/test indices to split data into train/test sets using a\npredefined scheme specified by the user with the ``test_fold`` parameter.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.16", + "docstring": "Predefined split cross-validator\n\nProvides train/test indices to split data into train/test sets using a\npredefined scheme specified by the user with the ``test_fold`` parameter.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.16\n\nParameters\n----------\ntest_fold : array-like of shape (n_samples,)\n The entry ``test_fold[i]`` represents the index of the test set that\n sample ``i`` belongs to. It is possible to exclude sample ``i`` from\n any test set (i.e. include sample ``i`` in every training set) by\n setting ``test_fold[i]`` equal to -1.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import PredefinedSplit\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 1, 1])\n>>> test_fold = [0, 1, -1, 1]\n>>> ps = PredefinedSplit(test_fold)\n>>> ps.get_n_splits()\n2\n>>> print(ps)\nPredefinedSplit(test_fold=array([ 0, 1, -1, 1]))\n>>> for train_index, test_index in ps.split():\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\nTRAIN: [1 2 3] TEST: [0]\nTRAIN: [0 2] TEST: [1 3]", + "code": "class PredefinedSplit(BaseCrossValidator):\n \"\"\"Predefined split cross-validator\n\n Provides train/test indices to split data into train/test sets using a\n predefined scheme specified by the user with the ``test_fold`` parameter.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.16\n\n Parameters\n ----------\n test_fold : array-like of shape (n_samples,)\n The entry ``test_fold[i]`` represents the index of the test set that\n sample ``i`` belongs to. It is possible to exclude sample ``i`` from\n any test set (i.e. include sample ``i`` in every training set) by\n setting ``test_fold[i]`` equal to -1.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.model_selection import PredefinedSplit\n >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n >>> y = np.array([0, 0, 1, 1])\n >>> test_fold = [0, 1, -1, 1]\n >>> ps = PredefinedSplit(test_fold)\n >>> ps.get_n_splits()\n 2\n >>> print(ps)\n PredefinedSplit(test_fold=array([ 0, 1, -1, 1]))\n >>> for train_index, test_index in ps.split():\n ... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n ... X_train, X_test = X[train_index], X[test_index]\n ... y_train, y_test = y[train_index], y[test_index]\n TRAIN: [1 2 3] TEST: [0]\n TRAIN: [0 2] TEST: [1 3]\n \"\"\"\n\n def __init__(self, test_fold):\n self.test_fold = np.array(test_fold, dtype=int)\n self.test_fold = column_or_1d(self.test_fold)\n self.unique_folds = np.unique(self.test_fold)\n self.unique_folds = self.unique_folds[self.unique_folds != -1]\n\n def split(self, X=None, y=None, groups=None):\n \"\"\"Generate indices to split data into training and test set.\n\n Parameters\n ----------\n X : object\n Always ignored, exists for compatibility.\n\n y : object\n Always ignored, exists for compatibility.\n\n groups : object\n Always ignored, exists for compatibility.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n \"\"\"\n ind = np.arange(len(self.test_fold))\n for test_index in self._iter_test_masks():\n train_index = ind[np.logical_not(test_index)]\n test_index = ind[test_index]\n yield train_index, test_index\n\n def _iter_test_masks(self):\n \"\"\"Generates boolean masks corresponding to test sets.\"\"\"\n for f in self.unique_folds:\n test_index = np.where(self.test_fold == f)[0]\n test_mask = np.zeros(len(self.test_fold), dtype=bool)\n test_mask[test_index] = True\n yield test_mask\n\n def get_n_splits(self, X=None, y=None, groups=None):\n \"\"\"Returns the number of splitting iterations in the cross-validator\n\n Parameters\n ----------\n X : object\n Always ignored, exists for compatibility.\n\n y : object\n Always ignored, exists for compatibility.\n\n groups : object\n Always ignored, exists for compatibility.\n\n Returns\n -------\n n_splits : int\n Returns the number of splitting iterations in the cross-validator.\n \"\"\"\n return len(self.unique_folds)", + "instance_attributes": [ + { + "name": "test_fold", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.model_selection._split/RepeatedKFold", + "name": "RepeatedKFold", + "qname": "sklearn.model_selection._split.RepeatedKFold", + "decorators": [], + "superclasses": ["_RepeatedSplits"], + "methods": ["scikit-learn/sklearn.model_selection._split/RepeatedKFold/__init__"], + "is_public": false, + "reexported_by": [], + "description": "Repeated K-Fold cross validator.\n\nRepeats K-Fold n times with different randomization in each repetition.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Repeated K-Fold cross validator.\n\nRepeats K-Fold n times with different randomization in each repetition.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_splits : int, default=5\n Number of folds. Must be at least 2.\n\nn_repeats : int, default=10\n Number of times cross-validator needs to be repeated.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of each repeated cross-validation instance.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import RepeatedKFold\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 1, 1])\n>>> rkf = RepeatedKFold(n_splits=2, n_repeats=2, random_state=2652124)\n>>> for train_index, test_index in rkf.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\n...\nTRAIN: [0 1] TEST: [2 3]\nTRAIN: [2 3] TEST: [0 1]\nTRAIN: [1 2] TEST: [0 3]\nTRAIN: [0 3] TEST: [1 2]\n\nNotes\n-----\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer.\n\nSee Also\n--------\nRepeatedStratifiedKFold : Repeats Stratified K-Fold n times.", + "code": "class RepeatedKFold(_RepeatedSplits):\n \"\"\"Repeated K-Fold cross validator.\n\n Repeats K-Fold n times with different randomization in each repetition.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_splits : int, default=5\n Number of folds. Must be at least 2.\n\n n_repeats : int, default=10\n Number of times cross-validator needs to be repeated.\n\n random_state : int, RandomState instance or None, default=None\n Controls the randomness of each repeated cross-validation instance.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.model_selection import RepeatedKFold\n >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n >>> y = np.array([0, 0, 1, 1])\n >>> rkf = RepeatedKFold(n_splits=2, n_repeats=2, random_state=2652124)\n >>> for train_index, test_index in rkf.split(X):\n ... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n ... X_train, X_test = X[train_index], X[test_index]\n ... y_train, y_test = y[train_index], y[test_index]\n ...\n TRAIN: [0 1] TEST: [2 3]\n TRAIN: [2 3] TEST: [0 1]\n TRAIN: [1 2] TEST: [0 3]\n TRAIN: [0 3] TEST: [1 2]\n\n Notes\n -----\n Randomized CV splitters may return different results for each call of\n split. You can make the results identical by setting `random_state`\n to an integer.\n\n See Also\n --------\n RepeatedStratifiedKFold : Repeats Stratified K-Fold n times.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, n_splits=5, n_repeats=10, random_state=None):\n super().__init__(\n KFold, n_repeats=n_repeats,\n random_state=random_state, n_splits=n_splits)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.model_selection._split/RepeatedStratifiedKFold", + "name": "RepeatedStratifiedKFold", + "qname": "sklearn.model_selection._split.RepeatedStratifiedKFold", + "decorators": [], + "superclasses": ["_RepeatedSplits"], + "methods": ["scikit-learn/sklearn.model_selection._split/RepeatedStratifiedKFold/__init__"], + "is_public": false, + "reexported_by": [], + "description": "Repeated Stratified K-Fold cross validator.\n\nRepeats Stratified K-Fold n times with different randomization in each\nrepetition.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Repeated Stratified K-Fold cross validator.\n\nRepeats Stratified K-Fold n times with different randomization in each\nrepetition.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_splits : int, default=5\n Number of folds. Must be at least 2.\n\nn_repeats : int, default=10\n Number of times cross-validator needs to be repeated.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the generation of the random states for each repetition.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import RepeatedStratifiedKFold\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 1, 1])\n>>> rskf = RepeatedStratifiedKFold(n_splits=2, n_repeats=2,\n... random_state=36851234)\n>>> for train_index, test_index in rskf.split(X, y):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\n...\nTRAIN: [1 2] TEST: [0 3]\nTRAIN: [0 3] TEST: [1 2]\nTRAIN: [1 3] TEST: [0 2]\nTRAIN: [0 2] TEST: [1 3]\n\nNotes\n-----\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer.\n\nSee Also\n--------\nRepeatedKFold : Repeats K-Fold n times.", + "code": "class RepeatedStratifiedKFold(_RepeatedSplits):\n \"\"\"Repeated Stratified K-Fold cross validator.\n\n Repeats Stratified K-Fold n times with different randomization in each\n repetition.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_splits : int, default=5\n Number of folds. Must be at least 2.\n\n n_repeats : int, default=10\n Number of times cross-validator needs to be repeated.\n\n random_state : int, RandomState instance or None, default=None\n Controls the generation of the random states for each repetition.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.model_selection import RepeatedStratifiedKFold\n >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n >>> y = np.array([0, 0, 1, 1])\n >>> rskf = RepeatedStratifiedKFold(n_splits=2, n_repeats=2,\n ... random_state=36851234)\n >>> for train_index, test_index in rskf.split(X, y):\n ... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n ... X_train, X_test = X[train_index], X[test_index]\n ... y_train, y_test = y[train_index], y[test_index]\n ...\n TRAIN: [1 2] TEST: [0 3]\n TRAIN: [0 3] TEST: [1 2]\n TRAIN: [1 3] TEST: [0 2]\n TRAIN: [0 2] TEST: [1 3]\n\n Notes\n -----\n Randomized CV splitters may return different results for each call of\n split. You can make the results identical by setting `random_state`\n to an integer.\n\n See Also\n --------\n RepeatedKFold : Repeats K-Fold n times.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, n_splits=5, n_repeats=10, random_state=None):\n super().__init__(\n StratifiedKFold, n_repeats=n_repeats, random_state=random_state,\n n_splits=n_splits)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.model_selection._split/ShuffleSplit", + "name": "ShuffleSplit", + "qname": "sklearn.model_selection._split.ShuffleSplit", + "decorators": [], + "superclasses": ["BaseShuffleSplit"], + "methods": [ + "scikit-learn/sklearn.model_selection._split/ShuffleSplit/__init__", + "scikit-learn/sklearn.model_selection._split/ShuffleSplit/_iter_indices" + ], + "is_public": false, + "reexported_by": [], + "description": "Random permutation cross-validator\n\nYields indices to split data into training and test sets.\n\nNote: contrary to other cross-validation strategies, random splits\ndo not guarantee that all folds will be different, although this is\nstill very likely for sizeable datasets.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Random permutation cross-validator\n\nYields indices to split data into training and test sets.\n\nNote: contrary to other cross-validation strategies, random splits\ndo not guarantee that all folds will be different, although this is\nstill very likely for sizeable datasets.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_splits : int, default=10\n Number of re-shuffling & splitting iterations.\n\ntest_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the proportion\n of the dataset to include in the test split. If int, represents the\n absolute number of test samples. If None, the value is set to the\n complement of the train size. If ``train_size`` is also None, it will\n be set to 0.1.\n\ntrain_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the\n proportion of the dataset to include in the train split. If\n int, represents the absolute number of train samples. If None,\n the value is automatically set to the complement of the test size.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of the training and testing indices produced.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import ShuffleSplit\n>>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [3, 4], [5, 6]])\n>>> y = np.array([1, 2, 1, 2, 1, 2])\n>>> rs = ShuffleSplit(n_splits=5, test_size=.25, random_state=0)\n>>> rs.get_n_splits(X)\n5\n>>> print(rs)\nShuffleSplit(n_splits=5, random_state=0, test_size=0.25, train_size=None)\n>>> for train_index, test_index in rs.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\nTRAIN: [1 3 0 4] TEST: [5 2]\nTRAIN: [4 0 2 5] TEST: [1 3]\nTRAIN: [1 2 4 0] TEST: [3 5]\nTRAIN: [3 4 1 0] TEST: [5 2]\nTRAIN: [3 5 1 0] TEST: [2 4]\n>>> rs = ShuffleSplit(n_splits=5, train_size=0.5, test_size=.25,\n... random_state=0)\n>>> for train_index, test_index in rs.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\nTRAIN: [1 3 0] TEST: [5 2]\nTRAIN: [4 0 2] TEST: [1 3]\nTRAIN: [1 2 4] TEST: [3 5]\nTRAIN: [3 4 1] TEST: [5 2]\nTRAIN: [3 5 1] TEST: [2 4]", + "code": "class ShuffleSplit(BaseShuffleSplit):\n \"\"\"Random permutation cross-validator\n\n Yields indices to split data into training and test sets.\n\n Note: contrary to other cross-validation strategies, random splits\n do not guarantee that all folds will be different, although this is\n still very likely for sizeable datasets.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_splits : int, default=10\n Number of re-shuffling & splitting iterations.\n\n test_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the proportion\n of the dataset to include in the test split. If int, represents the\n absolute number of test samples. If None, the value is set to the\n complement of the train size. If ``train_size`` is also None, it will\n be set to 0.1.\n\n train_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the\n proportion of the dataset to include in the train split. If\n int, represents the absolute number of train samples. If None,\n the value is automatically set to the complement of the test size.\n\n random_state : int, RandomState instance or None, default=None\n Controls the randomness of the training and testing indices produced.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.model_selection import ShuffleSplit\n >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [3, 4], [5, 6]])\n >>> y = np.array([1, 2, 1, 2, 1, 2])\n >>> rs = ShuffleSplit(n_splits=5, test_size=.25, random_state=0)\n >>> rs.get_n_splits(X)\n 5\n >>> print(rs)\n ShuffleSplit(n_splits=5, random_state=0, test_size=0.25, train_size=None)\n >>> for train_index, test_index in rs.split(X):\n ... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n TRAIN: [1 3 0 4] TEST: [5 2]\n TRAIN: [4 0 2 5] TEST: [1 3]\n TRAIN: [1 2 4 0] TEST: [3 5]\n TRAIN: [3 4 1 0] TEST: [5 2]\n TRAIN: [3 5 1 0] TEST: [2 4]\n >>> rs = ShuffleSplit(n_splits=5, train_size=0.5, test_size=.25,\n ... random_state=0)\n >>> for train_index, test_index in rs.split(X):\n ... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n TRAIN: [1 3 0] TEST: [5 2]\n TRAIN: [4 0 2] TEST: [1 3]\n TRAIN: [1 2 4] TEST: [3 5]\n TRAIN: [3 4 1] TEST: [5 2]\n TRAIN: [3 5 1] TEST: [2 4]\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_splits=10, *, test_size=None, train_size=None,\n random_state=None):\n super().__init__(\n n_splits=n_splits,\n test_size=test_size,\n train_size=train_size,\n random_state=random_state)\n self._default_test_size = 0.1\n\n def _iter_indices(self, X, y=None, groups=None):\n n_samples = _num_samples(X)\n n_train, n_test = _validate_shuffle_split(\n n_samples, self.test_size, self.train_size,\n default_test_size=self._default_test_size)\n\n rng = check_random_state(self.random_state)\n for i in range(self.n_splits):\n # random partition\n permutation = rng.permutation(n_samples)\n ind_test = permutation[:n_test]\n ind_train = permutation[n_test:(n_test + n_train)]\n yield ind_train, ind_test", + "instance_attributes": [ + { + "name": "_default_test_size", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedKFold", + "name": "StratifiedKFold", + "qname": "sklearn.model_selection._split.StratifiedKFold", + "decorators": [], + "superclasses": ["_BaseKFold"], + "methods": [ + "scikit-learn/sklearn.model_selection._split/StratifiedKFold/__init__", + "scikit-learn/sklearn.model_selection._split/StratifiedKFold/_make_test_folds", + "scikit-learn/sklearn.model_selection._split/StratifiedKFold/_iter_test_masks", + "scikit-learn/sklearn.model_selection._split/StratifiedKFold/split" + ], + "is_public": false, + "reexported_by": [], + "description": "Stratified K-Folds cross-validator.\n\nProvides train/test indices to split data in train/test sets.\n\nThis cross-validation object is a variation of KFold that returns\nstratified folds. The folds are made by preserving the percentage of\nsamples for each class.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Stratified K-Folds cross-validator.\n\nProvides train/test indices to split data in train/test sets.\n\nThis cross-validation object is a variation of KFold that returns\nstratified folds. The folds are made by preserving the percentage of\nsamples for each class.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_splits : int, default=5\n Number of folds. Must be at least 2.\n\n .. versionchanged:: 0.22\n ``n_splits`` default value changed from 3 to 5.\n\nshuffle : bool, default=False\n Whether to shuffle each class's samples before splitting into batches.\n Note that the samples within each split will not be shuffled.\n\nrandom_state : int, RandomState instance or None, default=None\n When `shuffle` is True, `random_state` affects the ordering of the\n indices, which controls the randomness of each fold for each class.\n Otherwise, leave `random_state` as `None`.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import StratifiedKFold\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 1, 1])\n>>> skf = StratifiedKFold(n_splits=2)\n>>> skf.get_n_splits(X, y)\n2\n>>> print(skf)\nStratifiedKFold(n_splits=2, random_state=None, shuffle=False)\n>>> for train_index, test_index in skf.split(X, y):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\nTRAIN: [1 3] TEST: [0 2]\nTRAIN: [0 2] TEST: [1 3]\n\nNotes\n-----\nThe implementation is designed to:\n\n* Generate test sets such that all contain the same distribution of\n classes, or as close as possible.\n* Be invariant to class label: relabelling ``y = [\"Happy\", \"Sad\"]`` to\n ``y = [1, 0]`` should not change the indices generated.\n* Preserve order dependencies in the dataset ordering, when\n ``shuffle=False``: all samples from class k in some test set were\n contiguous in y, or separated in y by samples from classes other than k.\n* Generate test sets where the smallest and largest differ by at most one\n sample.\n\n.. versionchanged:: 0.22\n The previous implementation did not follow the last constraint.\n\nSee Also\n--------\nRepeatedStratifiedKFold : Repeats Stratified K-Fold n times.", + "code": "class StratifiedKFold(_BaseKFold):\n \"\"\"Stratified K-Folds cross-validator.\n\n Provides train/test indices to split data in train/test sets.\n\n This cross-validation object is a variation of KFold that returns\n stratified folds. The folds are made by preserving the percentage of\n samples for each class.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_splits : int, default=5\n Number of folds. Must be at least 2.\n\n .. versionchanged:: 0.22\n ``n_splits`` default value changed from 3 to 5.\n\n shuffle : bool, default=False\n Whether to shuffle each class's samples before splitting into batches.\n Note that the samples within each split will not be shuffled.\n\n random_state : int, RandomState instance or None, default=None\n When `shuffle` is True, `random_state` affects the ordering of the\n indices, which controls the randomness of each fold for each class.\n Otherwise, leave `random_state` as `None`.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.model_selection import StratifiedKFold\n >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])\n >>> y = np.array([0, 0, 1, 1])\n >>> skf = StratifiedKFold(n_splits=2)\n >>> skf.get_n_splits(X, y)\n 2\n >>> print(skf)\n StratifiedKFold(n_splits=2, random_state=None, shuffle=False)\n >>> for train_index, test_index in skf.split(X, y):\n ... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n ... X_train, X_test = X[train_index], X[test_index]\n ... y_train, y_test = y[train_index], y[test_index]\n TRAIN: [1 3] TEST: [0 2]\n TRAIN: [0 2] TEST: [1 3]\n\n Notes\n -----\n The implementation is designed to:\n\n * Generate test sets such that all contain the same distribution of\n classes, or as close as possible.\n * Be invariant to class label: relabelling ``y = [\"Happy\", \"Sad\"]`` to\n ``y = [1, 0]`` should not change the indices generated.\n * Preserve order dependencies in the dataset ordering, when\n ``shuffle=False``: all samples from class k in some test set were\n contiguous in y, or separated in y by samples from classes other than k.\n * Generate test sets where the smallest and largest differ by at most one\n sample.\n\n .. versionchanged:: 0.22\n The previous implementation did not follow the last constraint.\n\n See Also\n --------\n RepeatedStratifiedKFold : Repeats Stratified K-Fold n times.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_splits=5, *, shuffle=False, random_state=None):\n super().__init__(n_splits=n_splits, shuffle=shuffle,\n random_state=random_state)\n\n def _make_test_folds(self, X, y=None):\n rng = check_random_state(self.random_state)\n y = np.asarray(y)\n type_of_target_y = type_of_target(y)\n allowed_target_types = ('binary', 'multiclass')\n if type_of_target_y not in allowed_target_types:\n raise ValueError(\n 'Supported target types are: {}. Got {!r} instead.'.format(\n allowed_target_types, type_of_target_y))\n\n y = column_or_1d(y)\n\n _, y_idx, y_inv = np.unique(y, return_index=True, return_inverse=True)\n # y_inv encodes y according to lexicographic order. We invert y_idx to\n # map the classes so that they are encoded by order of appearance:\n # 0 represents the first label appearing in y, 1 the second, etc.\n _, class_perm = np.unique(y_idx, return_inverse=True)\n y_encoded = class_perm[y_inv]\n\n n_classes = len(y_idx)\n y_counts = np.bincount(y_encoded)\n min_groups = np.min(y_counts)\n if np.all(self.n_splits > y_counts):\n raise ValueError(\"n_splits=%d cannot be greater than the\"\n \" number of members in each class.\"\n % (self.n_splits))\n if self.n_splits > min_groups:\n warnings.warn((\"The least populated class in y has only %d\"\n \" members, which is less than n_splits=%d.\"\n % (min_groups, self.n_splits)), UserWarning)\n\n # Determine the optimal number of samples from each class in each fold,\n # using round robin over the sorted y. (This can be done direct from\n # counts, but that code is unreadable.)\n y_order = np.sort(y_encoded)\n allocation = np.asarray(\n [np.bincount(y_order[i::self.n_splits], minlength=n_classes)\n for i in range(self.n_splits)])\n\n # To maintain the data order dependencies as best as possible within\n # the stratification constraint, we assign samples from each class in\n # blocks (and then mess that up when shuffle=True).\n test_folds = np.empty(len(y), dtype='i')\n for k in range(n_classes):\n # since the kth column of allocation stores the number of samples\n # of class k in each test set, this generates blocks of fold\n # indices corresponding to the allocation for class k.\n folds_for_class = np.arange(self.n_splits).repeat(allocation[:, k])\n if self.shuffle:\n rng.shuffle(folds_for_class)\n test_folds[y_encoded == k] = folds_for_class\n return test_folds\n\n def _iter_test_masks(self, X, y=None, groups=None):\n test_folds = self._make_test_folds(X, y)\n for i in range(self.n_splits):\n yield test_folds == i\n\n def split(self, X, y, groups=None):\n \"\"\"Generate indices to split data into training and test set.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n Note that providing ``y`` is sufficient to generate the splits and\n hence ``np.zeros(n_samples)`` may be used as a placeholder for\n ``X`` instead of actual training data.\n\n y : array-like of shape (n_samples,)\n The target variable for supervised learning problems.\n Stratification is done based on the y labels.\n\n groups : object\n Always ignored, exists for compatibility.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n\n Notes\n -----\n Randomized CV splitters may return different results for each call of\n split. You can make the results identical by setting `random_state`\n to an integer.\n \"\"\"\n y = check_array(y, ensure_2d=False, dtype=None)\n return super().split(X, y, groups)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedShuffleSplit", + "name": "StratifiedShuffleSplit", + "qname": "sklearn.model_selection._split.StratifiedShuffleSplit", + "decorators": [], + "superclasses": ["BaseShuffleSplit"], + "methods": [ + "scikit-learn/sklearn.model_selection._split/StratifiedShuffleSplit/__init__", + "scikit-learn/sklearn.model_selection._split/StratifiedShuffleSplit/_iter_indices", + "scikit-learn/sklearn.model_selection._split/StratifiedShuffleSplit/split" + ], + "is_public": false, + "reexported_by": [], + "description": "Stratified ShuffleSplit cross-validator\n\nProvides train/test indices to split data in train/test sets.\n\nThis cross-validation object is a merge of StratifiedKFold and\nShuffleSplit, which returns stratified randomized folds. The folds\nare made by preserving the percentage of samples for each class.\n\nNote: like the ShuffleSplit strategy, stratified random splits\ndo not guarantee that all folds will be different, although this is\nstill very likely for sizeable datasets.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Stratified ShuffleSplit cross-validator\n\nProvides train/test indices to split data in train/test sets.\n\nThis cross-validation object is a merge of StratifiedKFold and\nShuffleSplit, which returns stratified randomized folds. The folds\nare made by preserving the percentage of samples for each class.\n\nNote: like the ShuffleSplit strategy, stratified random splits\ndo not guarantee that all folds will be different, although this is\nstill very likely for sizeable datasets.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_splits : int, default=10\n Number of re-shuffling & splitting iterations.\n\ntest_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the proportion\n of the dataset to include in the test split. If int, represents the\n absolute number of test samples. If None, the value is set to the\n complement of the train size. If ``train_size`` is also None, it will\n be set to 0.1.\n\ntrain_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the\n proportion of the dataset to include in the train split. If\n int, represents the absolute number of train samples. If None,\n the value is automatically set to the complement of the test size.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of the training and testing indices produced.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import StratifiedShuffleSplit\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([0, 0, 0, 1, 1, 1])\n>>> sss = StratifiedShuffleSplit(n_splits=5, test_size=0.5, random_state=0)\n>>> sss.get_n_splits(X, y)\n5\n>>> print(sss)\nStratifiedShuffleSplit(n_splits=5, random_state=0, ...)\n>>> for train_index, test_index in sss.split(X, y):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\nTRAIN: [5 2 3] TEST: [4 1 0]\nTRAIN: [5 1 4] TEST: [0 2 3]\nTRAIN: [5 0 2] TEST: [4 3 1]\nTRAIN: [4 1 0] TEST: [2 3 5]\nTRAIN: [0 5 1] TEST: [3 4 2]", + "code": "class StratifiedShuffleSplit(BaseShuffleSplit):\n \"\"\"Stratified ShuffleSplit cross-validator\n\n Provides train/test indices to split data in train/test sets.\n\n This cross-validation object is a merge of StratifiedKFold and\n ShuffleSplit, which returns stratified randomized folds. The folds\n are made by preserving the percentage of samples for each class.\n\n Note: like the ShuffleSplit strategy, stratified random splits\n do not guarantee that all folds will be different, although this is\n still very likely for sizeable datasets.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_splits : int, default=10\n Number of re-shuffling & splitting iterations.\n\n test_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the proportion\n of the dataset to include in the test split. If int, represents the\n absolute number of test samples. If None, the value is set to the\n complement of the train size. If ``train_size`` is also None, it will\n be set to 0.1.\n\n train_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the\n proportion of the dataset to include in the train split. If\n int, represents the absolute number of train samples. If None,\n the value is automatically set to the complement of the test size.\n\n random_state : int, RandomState instance or None, default=None\n Controls the randomness of the training and testing indices produced.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.model_selection import StratifiedShuffleSplit\n >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])\n >>> y = np.array([0, 0, 0, 1, 1, 1])\n >>> sss = StratifiedShuffleSplit(n_splits=5, test_size=0.5, random_state=0)\n >>> sss.get_n_splits(X, y)\n 5\n >>> print(sss)\n StratifiedShuffleSplit(n_splits=5, random_state=0, ...)\n >>> for train_index, test_index in sss.split(X, y):\n ... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n ... X_train, X_test = X[train_index], X[test_index]\n ... y_train, y_test = y[train_index], y[test_index]\n TRAIN: [5 2 3] TEST: [4 1 0]\n TRAIN: [5 1 4] TEST: [0 2 3]\n TRAIN: [5 0 2] TEST: [4 3 1]\n TRAIN: [4 1 0] TEST: [2 3 5]\n TRAIN: [0 5 1] TEST: [3 4 2]\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_splits=10, *, test_size=None, train_size=None,\n random_state=None):\n super().__init__(\n n_splits=n_splits,\n test_size=test_size,\n train_size=train_size,\n random_state=random_state)\n self._default_test_size = 0.1\n\n def _iter_indices(self, X, y, groups=None):\n n_samples = _num_samples(X)\n y = check_array(y, ensure_2d=False, dtype=None)\n n_train, n_test = _validate_shuffle_split(\n n_samples, self.test_size, self.train_size,\n default_test_size=self._default_test_size)\n\n if y.ndim == 2:\n # for multi-label y, map each distinct row to a string repr\n # using join because str(row) uses an ellipsis if len(row) > 1000\n y = np.array([' '.join(row.astype('str')) for row in y])\n\n classes, y_indices = np.unique(y, return_inverse=True)\n n_classes = classes.shape[0]\n\n class_counts = np.bincount(y_indices)\n if np.min(class_counts) < 2:\n raise ValueError(\"The least populated class in y has only 1\"\n \" member, which is too few. The minimum\"\n \" number of groups for any class cannot\"\n \" be less than 2.\")\n\n if n_train < n_classes:\n raise ValueError('The train_size = %d should be greater or '\n 'equal to the number of classes = %d' %\n (n_train, n_classes))\n if n_test < n_classes:\n raise ValueError('The test_size = %d should be greater or '\n 'equal to the number of classes = %d' %\n (n_test, n_classes))\n\n # Find the sorted list of instances for each class:\n # (np.unique above performs a sort, so code is O(n logn) already)\n class_indices = np.split(np.argsort(y_indices, kind='mergesort'),\n np.cumsum(class_counts)[:-1])\n\n rng = check_random_state(self.random_state)\n\n for _ in range(self.n_splits):\n # if there are ties in the class-counts, we want\n # to make sure to break them anew in each iteration\n n_i = _approximate_mode(class_counts, n_train, rng)\n class_counts_remaining = class_counts - n_i\n t_i = _approximate_mode(class_counts_remaining, n_test, rng)\n\n train = []\n test = []\n\n for i in range(n_classes):\n permutation = rng.permutation(class_counts[i])\n perm_indices_class_i = class_indices[i].take(permutation,\n mode='clip')\n\n train.extend(perm_indices_class_i[:n_i[i]])\n test.extend(perm_indices_class_i[n_i[i]:n_i[i] + t_i[i]])\n\n train = rng.permutation(train)\n test = rng.permutation(test)\n\n yield train, test\n\n def split(self, X, y, groups=None):\n \"\"\"Generate indices to split data into training and test set.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n Note that providing ``y`` is sufficient to generate the splits and\n hence ``np.zeros(n_samples)`` may be used as a placeholder for\n ``X`` instead of actual training data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_labels)\n The target variable for supervised learning problems.\n Stratification is done based on the y labels.\n\n groups : object\n Always ignored, exists for compatibility.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n\n Notes\n -----\n Randomized CV splitters may return different results for each call of\n split. You can make the results identical by setting `random_state`\n to an integer.\n \"\"\"\n y = check_array(y, ensure_2d=False, dtype=None)\n return super().split(X, y, groups)", + "instance_attributes": [ + { + "name": "_default_test_size", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.model_selection._split/TimeSeriesSplit", + "name": "TimeSeriesSplit", + "qname": "sklearn.model_selection._split.TimeSeriesSplit", + "decorators": [], + "superclasses": ["_BaseKFold"], + "methods": [ + "scikit-learn/sklearn.model_selection._split/TimeSeriesSplit/__init__", + "scikit-learn/sklearn.model_selection._split/TimeSeriesSplit/split" + ], + "is_public": false, + "reexported_by": [], + "description": "Time Series cross-validator\n\nProvides train/test indices to split time series data samples\nthat are observed at fixed time intervals, in train/test sets.\nIn each split, test indices must be higher than before, and thus shuffling\nin cross validator is inappropriate.\n\nThis cross-validation object is a variation of :class:`KFold`.\nIn the kth split, it returns first k folds as train set and the\n(k+1)th fold as test set.\n\nNote that unlike standard cross-validation methods, successive\ntraining sets are supersets of those that come before them.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "Time Series cross-validator\n\nProvides train/test indices to split time series data samples\nthat are observed at fixed time intervals, in train/test sets.\nIn each split, test indices must be higher than before, and thus shuffling\nin cross validator is inappropriate.\n\nThis cross-validation object is a variation of :class:`KFold`.\nIn the kth split, it returns first k folds as train set and the\n(k+1)th fold as test set.\n\nNote that unlike standard cross-validation methods, successive\ntraining sets are supersets of those that come before them.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nn_splits : int, default=5\n Number of splits. Must be at least 2.\n\n .. versionchanged:: 0.22\n ``n_splits`` default value changed from 3 to 5.\n\nmax_train_size : int, default=None\n Maximum size for a single training set.\n\ntest_size : int, default=None\n Used to limit the size of the test set. Defaults to\n ``n_samples // (n_splits + 1)``, which is the maximum allowed value\n with ``gap=0``.\n\n .. versionadded:: 0.24\n\ngap : int, default=0\n Number of samples to exclude from the end of each train set before\n the test set.\n\n .. versionadded:: 0.24\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import TimeSeriesSplit\n>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])\n>>> y = np.array([1, 2, 3, 4, 5, 6])\n>>> tscv = TimeSeriesSplit()\n>>> print(tscv)\nTimeSeriesSplit(gap=0, max_train_size=None, n_splits=5, test_size=None)\n>>> for train_index, test_index in tscv.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\nTRAIN: [0] TEST: [1]\nTRAIN: [0 1] TEST: [2]\nTRAIN: [0 1 2] TEST: [3]\nTRAIN: [0 1 2 3] TEST: [4]\nTRAIN: [0 1 2 3 4] TEST: [5]\n>>> # Fix test_size to 2 with 12 samples\n>>> X = np.random.randn(12, 2)\n>>> y = np.random.randint(0, 2, 12)\n>>> tscv = TimeSeriesSplit(n_splits=3, test_size=2)\n>>> for train_index, test_index in tscv.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\nTRAIN: [0 1 2 3 4 5] TEST: [6 7]\nTRAIN: [0 1 2 3 4 5 6 7] TEST: [8 9]\nTRAIN: [0 1 2 3 4 5 6 7 8 9] TEST: [10 11]\n>>> # Add in a 2 period gap\n>>> tscv = TimeSeriesSplit(n_splits=3, test_size=2, gap=2)\n>>> for train_index, test_index in tscv.split(X):\n... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n... X_train, X_test = X[train_index], X[test_index]\n... y_train, y_test = y[train_index], y[test_index]\nTRAIN: [0 1 2 3] TEST: [6 7]\nTRAIN: [0 1 2 3 4 5] TEST: [8 9]\nTRAIN: [0 1 2 3 4 5 6 7] TEST: [10 11]\n\nNotes\n-----\nThe training set has size ``i * n_samples // (n_splits + 1)\n+ n_samples % (n_splits + 1)`` in the ``i`` th split,\nwith a test set of size ``n_samples//(n_splits + 1)`` by default,\nwhere ``n_samples`` is the number of samples.", + "code": "class TimeSeriesSplit(_BaseKFold):\n \"\"\"Time Series cross-validator\n\n Provides train/test indices to split time series data samples\n that are observed at fixed time intervals, in train/test sets.\n In each split, test indices must be higher than before, and thus shuffling\n in cross validator is inappropriate.\n\n This cross-validation object is a variation of :class:`KFold`.\n In the kth split, it returns first k folds as train set and the\n (k+1)th fold as test set.\n\n Note that unlike standard cross-validation methods, successive\n training sets are supersets of those that come before them.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n n_splits : int, default=5\n Number of splits. Must be at least 2.\n\n .. versionchanged:: 0.22\n ``n_splits`` default value changed from 3 to 5.\n\n max_train_size : int, default=None\n Maximum size for a single training set.\n\n test_size : int, default=None\n Used to limit the size of the test set. Defaults to\n ``n_samples // (n_splits + 1)``, which is the maximum allowed value\n with ``gap=0``.\n\n .. versionadded:: 0.24\n\n gap : int, default=0\n Number of samples to exclude from the end of each train set before\n the test set.\n\n .. versionadded:: 0.24\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.model_selection import TimeSeriesSplit\n >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])\n >>> y = np.array([1, 2, 3, 4, 5, 6])\n >>> tscv = TimeSeriesSplit()\n >>> print(tscv)\n TimeSeriesSplit(gap=0, max_train_size=None, n_splits=5, test_size=None)\n >>> for train_index, test_index in tscv.split(X):\n ... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n ... X_train, X_test = X[train_index], X[test_index]\n ... y_train, y_test = y[train_index], y[test_index]\n TRAIN: [0] TEST: [1]\n TRAIN: [0 1] TEST: [2]\n TRAIN: [0 1 2] TEST: [3]\n TRAIN: [0 1 2 3] TEST: [4]\n TRAIN: [0 1 2 3 4] TEST: [5]\n >>> # Fix test_size to 2 with 12 samples\n >>> X = np.random.randn(12, 2)\n >>> y = np.random.randint(0, 2, 12)\n >>> tscv = TimeSeriesSplit(n_splits=3, test_size=2)\n >>> for train_index, test_index in tscv.split(X):\n ... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n ... X_train, X_test = X[train_index], X[test_index]\n ... y_train, y_test = y[train_index], y[test_index]\n TRAIN: [0 1 2 3 4 5] TEST: [6 7]\n TRAIN: [0 1 2 3 4 5 6 7] TEST: [8 9]\n TRAIN: [0 1 2 3 4 5 6 7 8 9] TEST: [10 11]\n >>> # Add in a 2 period gap\n >>> tscv = TimeSeriesSplit(n_splits=3, test_size=2, gap=2)\n >>> for train_index, test_index in tscv.split(X):\n ... print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n ... X_train, X_test = X[train_index], X[test_index]\n ... y_train, y_test = y[train_index], y[test_index]\n TRAIN: [0 1 2 3] TEST: [6 7]\n TRAIN: [0 1 2 3 4 5] TEST: [8 9]\n TRAIN: [0 1 2 3 4 5 6 7] TEST: [10 11]\n\n Notes\n -----\n The training set has size ``i * n_samples // (n_splits + 1)\n + n_samples % (n_splits + 1)`` in the ``i`` th split,\n with a test set of size ``n_samples//(n_splits + 1)`` by default,\n where ``n_samples`` is the number of samples.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self,\n n_splits=5,\n *,\n max_train_size=None,\n test_size=None,\n gap=0):\n super().__init__(n_splits, shuffle=False, random_state=None)\n self.max_train_size = max_train_size\n self.test_size = test_size\n self.gap = gap\n\n def split(self, X, y=None, groups=None):\n \"\"\"Generate indices to split data into training and test set.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Always ignored, exists for compatibility.\n\n groups : array-like of shape (n_samples,)\n Always ignored, exists for compatibility.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n \"\"\"\n X, y, groups = indexable(X, y, groups)\n n_samples = _num_samples(X)\n n_splits = self.n_splits\n n_folds = n_splits + 1\n gap = self.gap\n test_size = self.test_size if self.test_size is not None \\\n else n_samples // n_folds\n\n # Make sure we have enough samples for the given split parameters\n if n_folds > n_samples:\n raise ValueError(\n (f\"Cannot have number of folds={n_folds} greater\"\n f\" than the number of samples={n_samples}.\"))\n if n_samples - gap - (test_size * n_splits) <= 0:\n raise ValueError(\n (f\"Too many splits={n_splits} for number of samples\"\n f\"={n_samples} with test_size={test_size} and gap={gap}.\"))\n\n indices = np.arange(n_samples)\n test_starts = range(n_samples - n_splits * test_size,\n n_samples, test_size)\n\n for test_start in test_starts:\n train_end = test_start - gap\n if self.max_train_size and self.max_train_size < train_end:\n yield (indices[train_end - self.max_train_size:train_end],\n indices[test_start:test_start + test_size])\n else:\n yield (indices[:train_end],\n indices[test_start:test_start + test_size])", + "instance_attributes": [ + { + "name": "gap", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_BaseKFold", + "name": "_BaseKFold", + "qname": "sklearn.model_selection._split._BaseKFold", + "decorators": [], + "superclasses": ["BaseCrossValidator"], + "methods": [ + "scikit-learn/sklearn.model_selection._split/_BaseKFold/__init__", + "scikit-learn/sklearn.model_selection._split/_BaseKFold/split", + "scikit-learn/sklearn.model_selection._split/_BaseKFold/get_n_splits" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for KFold, GroupKFold, and StratifiedKFold", + "docstring": "Base class for KFold, GroupKFold, and StratifiedKFold", + "code": "class _BaseKFold(BaseCrossValidator, metaclass=ABCMeta):\n \"\"\"Base class for KFold, GroupKFold, and StratifiedKFold\"\"\"\n\n @abstractmethod\n @_deprecate_positional_args\n def __init__(self, n_splits, *, shuffle, random_state):\n if not isinstance(n_splits, numbers.Integral):\n raise ValueError('The number of folds must be of Integral type. '\n '%s of type %s was passed.'\n % (n_splits, type(n_splits)))\n n_splits = int(n_splits)\n\n if n_splits <= 1:\n raise ValueError(\n \"k-fold cross-validation requires at least one\"\n \" train/test split by setting n_splits=2 or more,\"\n \" got n_splits={0}.\".format(n_splits))\n\n if not isinstance(shuffle, bool):\n raise TypeError(\"shuffle must be True or False;\"\n \" got {0}\".format(shuffle))\n\n if not shuffle and random_state is not None: # None is the default\n raise ValueError(\n 'Setting a random_state has no effect since shuffle is '\n 'False. You should leave '\n 'random_state to its default (None), or set shuffle=True.',\n )\n\n self.n_splits = n_splits\n self.shuffle = shuffle\n self.random_state = random_state\n\n def split(self, X, y=None, groups=None):\n \"\"\"Generate indices to split data into training and test set.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\n groups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n \"\"\"\n X, y, groups = indexable(X, y, groups)\n n_samples = _num_samples(X)\n if self.n_splits > n_samples:\n raise ValueError(\n (\"Cannot have number of splits n_splits={0} greater\"\n \" than the number of samples: n_samples={1}.\")\n .format(self.n_splits, n_samples))\n\n for train, test in super().split(X, y, groups):\n yield train, test\n\n def get_n_splits(self, X=None, y=None, groups=None):\n \"\"\"Returns the number of splitting iterations in the cross-validator\n\n Parameters\n ----------\n X : object\n Always ignored, exists for compatibility.\n\n y : object\n Always ignored, exists for compatibility.\n\n groups : object\n Always ignored, exists for compatibility.\n\n Returns\n -------\n n_splits : int\n Returns the number of splitting iterations in the cross-validator.\n \"\"\"\n return self.n_splits", + "instance_attributes": [ + { + "name": "n_splits", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_CVIterableWrapper", + "name": "_CVIterableWrapper", + "qname": "sklearn.model_selection._split._CVIterableWrapper", + "decorators": [], + "superclasses": ["BaseCrossValidator"], + "methods": [ + "scikit-learn/sklearn.model_selection._split/_CVIterableWrapper/__init__", + "scikit-learn/sklearn.model_selection._split/_CVIterableWrapper/get_n_splits", + "scikit-learn/sklearn.model_selection._split/_CVIterableWrapper/split" + ], + "is_public": false, + "reexported_by": [], + "description": "Wrapper class for old style cv objects and iterables.", + "docstring": "Wrapper class for old style cv objects and iterables.", + "code": "class _CVIterableWrapper(BaseCrossValidator):\n \"\"\"Wrapper class for old style cv objects and iterables.\"\"\"\n def __init__(self, cv):\n self.cv = list(cv)\n\n def get_n_splits(self, X=None, y=None, groups=None):\n \"\"\"Returns the number of splitting iterations in the cross-validator\n\n Parameters\n ----------\n X : object\n Always ignored, exists for compatibility.\n\n y : object\n Always ignored, exists for compatibility.\n\n groups : object\n Always ignored, exists for compatibility.\n\n Returns\n -------\n n_splits : int\n Returns the number of splitting iterations in the cross-validator.\n \"\"\"\n return len(self.cv)\n\n def split(self, X=None, y=None, groups=None):\n \"\"\"Generate indices to split data into training and test set.\n\n Parameters\n ----------\n X : object\n Always ignored, exists for compatibility.\n\n y : object\n Always ignored, exists for compatibility.\n\n groups : object\n Always ignored, exists for compatibility.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n \"\"\"\n for train, test in self.cv:\n yield train, test", + "instance_attributes": [ + { + "name": "cv", + "types": { + "kind": "NamedType", + "name": "list" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_RepeatedSplits", + "name": "_RepeatedSplits", + "qname": "sklearn.model_selection._split._RepeatedSplits", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.model_selection._split/_RepeatedSplits/__init__", + "scikit-learn/sklearn.model_selection._split/_RepeatedSplits/split", + "scikit-learn/sklearn.model_selection._split/_RepeatedSplits/get_n_splits", + "scikit-learn/sklearn.model_selection._split/_RepeatedSplits/__repr__" + ], + "is_public": false, + "reexported_by": [], + "description": "Repeated splits for an arbitrary randomized CV splitter.\n\nRepeats splits for cross-validators n times with different randomization\nin each repetition.", + "docstring": "Repeated splits for an arbitrary randomized CV splitter.\n\nRepeats splits for cross-validators n times with different randomization\nin each repetition.\n\nParameters\n----------\ncv : callable\n Cross-validator class.\n\nn_repeats : int, default=10\n Number of times cross-validator needs to be repeated.\n\nrandom_state : int, RandomState instance or None, default=None\n Passes `random_state` to the arbitrary repeating cross validator.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n**cvargs : additional params\n Constructor parameters for cv. Must not contain random_state\n and shuffle.", + "code": "class _RepeatedSplits(metaclass=ABCMeta):\n \"\"\"Repeated splits for an arbitrary randomized CV splitter.\n\n Repeats splits for cross-validators n times with different randomization\n in each repetition.\n\n Parameters\n ----------\n cv : callable\n Cross-validator class.\n\n n_repeats : int, default=10\n Number of times cross-validator needs to be repeated.\n\n random_state : int, RandomState instance or None, default=None\n Passes `random_state` to the arbitrary repeating cross validator.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n **cvargs : additional params\n Constructor parameters for cv. Must not contain random_state\n and shuffle.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, cv, *, n_repeats=10, random_state=None, **cvargs):\n if not isinstance(n_repeats, numbers.Integral):\n raise ValueError(\"Number of repetitions must be of Integral type.\")\n\n if n_repeats <= 0:\n raise ValueError(\"Number of repetitions must be greater than 0.\")\n\n if any(key in cvargs for key in ('random_state', 'shuffle')):\n raise ValueError(\n \"cvargs must not contain random_state or shuffle.\")\n\n self.cv = cv\n self.n_repeats = n_repeats\n self.random_state = random_state\n self.cvargs = cvargs\n\n def split(self, X, y=None, groups=None):\n \"\"\"Generates indices to split data into training and test set.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n The target variable for supervised learning problems.\n\n groups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n \"\"\"\n n_repeats = self.n_repeats\n rng = check_random_state(self.random_state)\n\n for idx in range(n_repeats):\n cv = self.cv(random_state=rng, shuffle=True,\n **self.cvargs)\n for train_index, test_index in cv.split(X, y, groups):\n yield train_index, test_index\n\n def get_n_splits(self, X=None, y=None, groups=None):\n \"\"\"Returns the number of splitting iterations in the cross-validator\n\n Parameters\n ----------\n X : object\n Always ignored, exists for compatibility.\n ``np.zeros(n_samples)`` may be used as a placeholder.\n\n y : object\n Always ignored, exists for compatibility.\n ``np.zeros(n_samples)`` may be used as a placeholder.\n\n groups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\n Returns\n -------\n n_splits : int\n Returns the number of splitting iterations in the cross-validator.\n \"\"\"\n rng = check_random_state(self.random_state)\n cv = self.cv(random_state=rng, shuffle=True,\n **self.cvargs)\n return cv.get_n_splits(X, y, groups) * self.n_repeats\n\n def __repr__(self):\n return _build_repr(self)", + "instance_attributes": [ + { + "name": "n_repeats", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "cvargs", + "types": { + "kind": "NamedType", + "name": "dict" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier", + "name": "OneVsOneClassifier", + "qname": "sklearn.multiclass.OneVsOneClassifier", + "decorators": [], + "superclasses": ["MetaEstimatorMixin", "ClassifierMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.multiclass/OneVsOneClassifier/__init__", + "scikit-learn/sklearn.multiclass/OneVsOneClassifier/fit", + "scikit-learn/sklearn.multiclass/OneVsOneClassifier/partial_fit", + "scikit-learn/sklearn.multiclass/OneVsOneClassifier/predict", + "scikit-learn/sklearn.multiclass/OneVsOneClassifier/decision_function", + "scikit-learn/sklearn.multiclass/OneVsOneClassifier/n_classes_@getter", + "scikit-learn/sklearn.multiclass/OneVsOneClassifier/_pairwise@getter", + "scikit-learn/sklearn.multiclass/OneVsOneClassifier/_more_tags" + ], + "is_public": true, + "reexported_by": [], + "description": "One-vs-one multiclass strategy\n\nThis strategy consists in fitting one classifier per class pair.\nAt prediction time, the class which received the most votes is selected.\nSince it requires to fit `n_classes * (n_classes - 1) / 2` classifiers,\nthis method is usually slower than one-vs-the-rest, due to its\nO(n_classes^2) complexity. However, this method may be advantageous for\nalgorithms such as kernel algorithms which don't scale well with\n`n_samples`. This is because each individual learning problem only involves\na small subset of the data whereas, with one-vs-the-rest, the complete\ndataset is used `n_classes` times.\n\nRead more in the :ref:`User Guide `.", + "docstring": "One-vs-one multiclass strategy\n\nThis strategy consists in fitting one classifier per class pair.\nAt prediction time, the class which received the most votes is selected.\nSince it requires to fit `n_classes * (n_classes - 1) / 2` classifiers,\nthis method is usually slower than one-vs-the-rest, due to its\nO(n_classes^2) complexity. However, this method may be advantageous for\nalgorithms such as kernel algorithms which don't scale well with\n`n_samples`. This is because each individual learning problem only involves\na small subset of the data whereas, with one-vs-the-rest, the complete\ndataset is used `n_classes` times.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object\n An estimator object implementing :term:`fit` and one of\n :term:`decision_function` or :term:`predict_proba`.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation: the `n_classes * (\n n_classes - 1) / 2` OVO problems are computed in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nestimators_ : list of ``n_classes * (n_classes - 1) / 2`` estimators\n Estimators used for predictions.\n\nclasses_ : numpy array of shape [n_classes]\n Array containing labels.\n\nn_classes_ : int\n Number of classes\n\npairwise_indices_ : list, length = ``len(estimators_)``, or ``None``\n Indices of samples used when training the estimators.\n ``None`` when ``estimator``'s `pairwise` tag is False.\n\n .. deprecated:: 0.24\n\n The _pairwise attribute is deprecated in 0.24. From 1.1\n (renaming of 0.25) and onward, `pairwise_indices_` will use the\n pairwise estimator tag instead.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.multiclass import OneVsOneClassifier\n>>> from sklearn.svm import LinearSVC\n>>> X, y = load_iris(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, test_size=0.33, shuffle=True, random_state=0)\n>>> clf = OneVsOneClassifier(\n... LinearSVC(random_state=0)).fit(X_train, y_train)\n>>> clf.predict(X_test[:10])\narray([2, 1, 0, 2, 0, 2, 0, 1, 1, 1])", + "code": "class OneVsOneClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator):\n \"\"\"One-vs-one multiclass strategy\n\n This strategy consists in fitting one classifier per class pair.\n At prediction time, the class which received the most votes is selected.\n Since it requires to fit `n_classes * (n_classes - 1) / 2` classifiers,\n this method is usually slower than one-vs-the-rest, due to its\n O(n_classes^2) complexity. However, this method may be advantageous for\n algorithms such as kernel algorithms which don't scale well with\n `n_samples`. This is because each individual learning problem only involves\n a small subset of the data whereas, with one-vs-the-rest, the complete\n dataset is used `n_classes` times.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n estimator : estimator object\n An estimator object implementing :term:`fit` and one of\n :term:`decision_function` or :term:`predict_proba`.\n\n n_jobs : int, default=None\n The number of jobs to use for the computation: the `n_classes * (\n n_classes - 1) / 2` OVO problems are computed in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n Attributes\n ----------\n estimators_ : list of ``n_classes * (n_classes - 1) / 2`` estimators\n Estimators used for predictions.\n\n classes_ : numpy array of shape [n_classes]\n Array containing labels.\n\n n_classes_ : int\n Number of classes\n\n pairwise_indices_ : list, length = ``len(estimators_)``, or ``None``\n Indices of samples used when training the estimators.\n ``None`` when ``estimator``'s `pairwise` tag is False.\n\n .. deprecated:: 0.24\n\n The _pairwise attribute is deprecated in 0.24. From 1.1\n (renaming of 0.25) and onward, `pairwise_indices_` will use the\n pairwise estimator tag instead.\n\n Examples\n --------\n >>> from sklearn.datasets import load_iris\n >>> from sklearn.model_selection import train_test_split\n >>> from sklearn.multiclass import OneVsOneClassifier\n >>> from sklearn.svm import LinearSVC\n >>> X, y = load_iris(return_X_y=True)\n >>> X_train, X_test, y_train, y_test = train_test_split(\n ... X, y, test_size=0.33, shuffle=True, random_state=0)\n >>> clf = OneVsOneClassifier(\n ... LinearSVC(random_state=0)).fit(X_train, y_train)\n >>> clf.predict(X_test[:10])\n array([2, 1, 0, 2, 0, 2, 0, 1, 1, 1])\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, estimator, *, n_jobs=None):\n self.estimator = estimator\n self.n_jobs = n_jobs\n\n def fit(self, X, y):\n \"\"\"Fit underlying estimators.\n\n Parameters\n ----------\n X : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\n y : array-like of shape (n_samples,)\n Multi-class targets.\n\n Returns\n -------\n self\n \"\"\"\n X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc'],\n force_all_finite=False)\n check_classification_targets(y)\n\n self.classes_ = np.unique(y)\n if len(self.classes_) == 1:\n raise ValueError(\"OneVsOneClassifier can not be fit when only one\"\n \" class is present.\")\n n_classes = self.classes_.shape[0]\n estimators_indices = list(zip(*(Parallel(n_jobs=self.n_jobs)(\n delayed(_fit_ovo_binary)\n (self.estimator, X, y, self.classes_[i], self.classes_[j])\n for i in range(n_classes) for j in range(i + 1, n_classes)))))\n\n self.estimators_ = estimators_indices[0]\n\n pairwise = _is_pairwise(self)\n self.pairwise_indices_ = (\n estimators_indices[1] if pairwise else None)\n\n return self\n\n @if_delegate_has_method(delegate='estimator')\n def partial_fit(self, X, y, classes=None):\n \"\"\"Partially fit underlying estimators\n\n Should be used when memory is inefficient to train all data. Chunks\n of data can be passed in several iteration, where the first call\n should have an array of all target variables.\n\n\n Parameters\n ----------\n X : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\n y : array-like of shape (n_samples,)\n Multi-class targets.\n\n classes : array, shape (n_classes, )\n Classes across all calls to partial_fit.\n Can be obtained via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is only required in the first call of partial_fit\n and can be omitted in the subsequent calls.\n\n Returns\n -------\n self\n \"\"\"\n if _check_partial_fit_first_call(self, classes):\n self.estimators_ = [clone(self.estimator) for _ in\n range(self.n_classes_ *\n (self.n_classes_ - 1) // 2)]\n\n if len(np.setdiff1d(y, self.classes_)):\n raise ValueError(\"Mini-batch contains {0} while it \"\n \"must be subset of {1}\".format(np.unique(y),\n self.classes_))\n\n X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'],\n force_all_finite=False)\n check_classification_targets(y)\n combinations = itertools.combinations(range(self.n_classes_), 2)\n self.estimators_ = Parallel(\n n_jobs=self.n_jobs)(\n delayed(_partial_fit_ovo_binary)(\n estimator, X, y, self.classes_[i], self.classes_[j])\n for estimator, (i, j) in zip(self.estimators_,\n (combinations)))\n\n self.pairwise_indices_ = None\n\n return self\n\n def predict(self, X):\n \"\"\"Estimate the best class label for each sample in X.\n\n This is implemented as ``argmax(decision_function(X), axis=1)`` which\n will return the label of the class with most votes by estimators\n predicting the outcome of a decision for each possible class pair.\n\n Parameters\n ----------\n X : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\n Returns\n -------\n y : numpy array of shape [n_samples]\n Predicted multi-class targets.\n \"\"\"\n Y = self.decision_function(X)\n if self.n_classes_ == 2:\n return self.classes_[(Y > 0).astype(int)]\n return self.classes_[Y.argmax(axis=1)]\n\n def decision_function(self, X):\n \"\"\"Decision function for the OneVsOneClassifier.\n\n The decision values for the samples are computed by adding the\n normalized sum of pair-wise classification confidence levels to the\n votes in order to disambiguate between the decision values when the\n votes for all the classes are equal leading to a tie.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n Y : array-like of shape (n_samples, n_classes) or (n_samples,) for \\\n binary classification.\n\n .. versionchanged:: 0.19\n output shape changed to ``(n_samples,)`` to conform to\n scikit-learn conventions for binary classification.\n \"\"\"\n check_is_fitted(self)\n\n indices = self.pairwise_indices_\n if indices is None:\n Xs = [X] * len(self.estimators_)\n else:\n Xs = [X[:, idx] for idx in indices]\n\n predictions = np.vstack([est.predict(Xi)\n for est, Xi in zip(self.estimators_, Xs)]).T\n confidences = np.vstack([_predict_binary(est, Xi)\n for est, Xi in zip(self.estimators_, Xs)]).T\n Y = _ovr_decision_function(predictions,\n confidences, len(self.classes_))\n if self.n_classes_ == 2:\n return Y[:, 1]\n return Y\n\n @property\n def n_classes_(self):\n return len(self.classes_)\n\n # TODO: Remove in 1.1\n # mypy error: Decorated property not supported\n @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n \"\"\"Indicate if wrapped estimator is using a precomputed Gram matrix\"\"\"\n return getattr(self.estimator, \"_pairwise\", False)\n\n def _more_tags(self):\n \"\"\"Indicate if wrapped estimator is using a precomputed Gram matrix\"\"\"\n return {\n 'pairwise': _safe_tags(self.estimator, key=\"pairwise\")\n }", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier", + "name": "OneVsRestClassifier", + "qname": "sklearn.multiclass.OneVsRestClassifier", + "decorators": [], + "superclasses": ["MultiOutputMixin", "ClassifierMixin", "MetaEstimatorMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.multiclass/OneVsRestClassifier/__init__", + "scikit-learn/sklearn.multiclass/OneVsRestClassifier/fit", + "scikit-learn/sklearn.multiclass/OneVsRestClassifier/partial_fit", + "scikit-learn/sklearn.multiclass/OneVsRestClassifier/predict", + "scikit-learn/sklearn.multiclass/OneVsRestClassifier/predict_proba", + "scikit-learn/sklearn.multiclass/OneVsRestClassifier/decision_function", + "scikit-learn/sklearn.multiclass/OneVsRestClassifier/multilabel_@getter", + "scikit-learn/sklearn.multiclass/OneVsRestClassifier/n_classes_@getter", + "scikit-learn/sklearn.multiclass/OneVsRestClassifier/coef_@getter", + "scikit-learn/sklearn.multiclass/OneVsRestClassifier/intercept_@getter", + "scikit-learn/sklearn.multiclass/OneVsRestClassifier/_pairwise@getter", + "scikit-learn/sklearn.multiclass/OneVsRestClassifier/_more_tags", + "scikit-learn/sklearn.multiclass/OneVsRestClassifier/_first_estimator@getter", + "scikit-learn/sklearn.multiclass/OneVsRestClassifier/n_features_in_@getter" + ], + "is_public": true, + "reexported_by": [], + "description": "One-vs-the-rest (OvR) multiclass strategy.\n\nAlso known as one-vs-all, this strategy consists in fitting one classifier\nper class. For each classifier, the class is fitted against all the other\nclasses. In addition to its computational efficiency (only `n_classes`\nclassifiers are needed), one advantage of this approach is its\ninterpretability. Since each class is represented by one and one classifier\nonly, it is possible to gain knowledge about the class by inspecting its\ncorresponding classifier. This is the most commonly used strategy for\nmulticlass classification and is a fair default choice.\n\nOneVsRestClassifier can also be used for multilabel classification. To use\nthis feature, provide an indicator matrix for the target `y` when calling\n`.fit`. In other words, the target labels should be formatted as a 2D\nbinary (0/1) matrix, where [i, j] == 1 indicates the presence of label j\nin sample i. This estimator uses the binary relevance method to perform\nmultilabel classification, which involves training one binary classifier\nindependently for each label.\n\nRead more in the :ref:`User Guide `.", + "docstring": "One-vs-the-rest (OvR) multiclass strategy.\n\nAlso known as one-vs-all, this strategy consists in fitting one classifier\nper class. For each classifier, the class is fitted against all the other\nclasses. In addition to its computational efficiency (only `n_classes`\nclassifiers are needed), one advantage of this approach is its\ninterpretability. Since each class is represented by one and one classifier\nonly, it is possible to gain knowledge about the class by inspecting its\ncorresponding classifier. This is the most commonly used strategy for\nmulticlass classification and is a fair default choice.\n\nOneVsRestClassifier can also be used for multilabel classification. To use\nthis feature, provide an indicator matrix for the target `y` when calling\n`.fit`. In other words, the target labels should be formatted as a 2D\nbinary (0/1) matrix, where [i, j] == 1 indicates the presence of label j\nin sample i. This estimator uses the binary relevance method to perform\nmultilabel classification, which involves training one binary classifier\nindependently for each label.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object\n An estimator object implementing :term:`fit` and one of\n :term:`decision_function` or :term:`predict_proba`.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation: the `n_classes`\n one-vs-rest problems are computed in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\nAttributes\n----------\nestimators_ : list of `n_classes` estimators\n Estimators used for predictions.\n\ncoef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n Coefficient of the features in the decision function. This attribute\n exists only if the ``estimators_`` defines ``coef_``.\n\n .. deprecated:: 0.24\n This attribute is deprecated in 0.24 and will\n be removed in 1.1 (renaming of 0.26). If you use this attribute\n in :class:`~sklearn.feature_selection.RFE` or\n :class:`~sklearn.feature_selection.SelectFromModel`,\n you may pass a callable to the `importance_getter`\n parameter that extracts feature the importances\n from `estimators_`.\n\nintercept_ : ndarray of shape (1, 1) or (n_classes, 1)\n If ``y`` is binary, the shape is ``(1, 1)`` else ``(n_classes, 1)``\n This attribute exists only if the ``estimators_`` defines\n ``intercept_``.\n\n .. deprecated:: 0.24\n This attribute is deprecated in 0.24 and will\n be removed in 1.1 (renaming of 0.26). If you use this attribute\n in :class:`~sklearn.feature_selection.RFE` or\n :class:`~sklearn.feature_selection.SelectFromModel`,\n you may pass a callable to the `importance_getter`\n parameter that extracts feature the importances\n from `estimators_`.\n\nclasses_ : array, shape = [`n_classes`]\n Class labels.\n\nn_classes_ : int\n Number of classes.\n\nlabel_binarizer_ : LabelBinarizer object\n Object used to transform multiclass labels to binary labels and\n vice-versa.\n\nmultilabel_ : boolean\n Whether a OneVsRestClassifier is a multilabel classifier.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.multiclass import OneVsRestClassifier\n>>> from sklearn.svm import SVC\n>>> X = np.array([\n... [10, 10],\n... [8, 10],\n... [-5, 5.5],\n... [-5.4, 5.5],\n... [-20, -20],\n... [-15, -20]\n... ])\n>>> y = np.array([0, 0, 1, 1, 2, 2])\n>>> clf = OneVsRestClassifier(SVC()).fit(X, y)\n>>> clf.predict([[-19, -20], [9, 9], [-5, 5]])\narray([2, 0, 1])\n\nSee Also\n--------\nsklearn.multioutput.MultiOutputClassifier : Alternate way of extending an\n estimator for multilabel classification.\nsklearn.preprocessing.MultiLabelBinarizer : Transform iterable of iterables\n to binary indicator matrix.", + "code": "class OneVsRestClassifier(MultiOutputMixin, ClassifierMixin,\n MetaEstimatorMixin, BaseEstimator):\n \"\"\"One-vs-the-rest (OvR) multiclass strategy.\n\n Also known as one-vs-all, this strategy consists in fitting one classifier\n per class. For each classifier, the class is fitted against all the other\n classes. In addition to its computational efficiency (only `n_classes`\n classifiers are needed), one advantage of this approach is its\n interpretability. Since each class is represented by one and one classifier\n only, it is possible to gain knowledge about the class by inspecting its\n corresponding classifier. This is the most commonly used strategy for\n multiclass classification and is a fair default choice.\n\n OneVsRestClassifier can also be used for multilabel classification. To use\n this feature, provide an indicator matrix for the target `y` when calling\n `.fit`. In other words, the target labels should be formatted as a 2D\n binary (0/1) matrix, where [i, j] == 1 indicates the presence of label j\n in sample i. This estimator uses the binary relevance method to perform\n multilabel classification, which involves training one binary classifier\n independently for each label.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n estimator : estimator object\n An estimator object implementing :term:`fit` and one of\n :term:`decision_function` or :term:`predict_proba`.\n\n n_jobs : int, default=None\n The number of jobs to use for the computation: the `n_classes`\n one-vs-rest problems are computed in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\n Attributes\n ----------\n estimators_ : list of `n_classes` estimators\n Estimators used for predictions.\n\n coef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n Coefficient of the features in the decision function. This attribute\n exists only if the ``estimators_`` defines ``coef_``.\n\n .. deprecated:: 0.24\n This attribute is deprecated in 0.24 and will\n be removed in 1.1 (renaming of 0.26). If you use this attribute\n in :class:`~sklearn.feature_selection.RFE` or\n :class:`~sklearn.feature_selection.SelectFromModel`,\n you may pass a callable to the `importance_getter`\n parameter that extracts feature the importances\n from `estimators_`.\n\n intercept_ : ndarray of shape (1, 1) or (n_classes, 1)\n If ``y`` is binary, the shape is ``(1, 1)`` else ``(n_classes, 1)``\n This attribute exists only if the ``estimators_`` defines\n ``intercept_``.\n\n .. deprecated:: 0.24\n This attribute is deprecated in 0.24 and will\n be removed in 1.1 (renaming of 0.26). If you use this attribute\n in :class:`~sklearn.feature_selection.RFE` or\n :class:`~sklearn.feature_selection.SelectFromModel`,\n you may pass a callable to the `importance_getter`\n parameter that extracts feature the importances\n from `estimators_`.\n\n classes_ : array, shape = [`n_classes`]\n Class labels.\n\n n_classes_ : int\n Number of classes.\n\n label_binarizer_ : LabelBinarizer object\n Object used to transform multiclass labels to binary labels and\n vice-versa.\n\n multilabel_ : boolean\n Whether a OneVsRestClassifier is a multilabel classifier.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.multiclass import OneVsRestClassifier\n >>> from sklearn.svm import SVC\n >>> X = np.array([\n ... [10, 10],\n ... [8, 10],\n ... [-5, 5.5],\n ... [-5.4, 5.5],\n ... [-20, -20],\n ... [-15, -20]\n ... ])\n >>> y = np.array([0, 0, 1, 1, 2, 2])\n >>> clf = OneVsRestClassifier(SVC()).fit(X, y)\n >>> clf.predict([[-19, -20], [9, 9], [-5, 5]])\n array([2, 0, 1])\n\n See Also\n --------\n sklearn.multioutput.MultiOutputClassifier : Alternate way of extending an\n estimator for multilabel classification.\n sklearn.preprocessing.MultiLabelBinarizer : Transform iterable of iterables\n to binary indicator matrix.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, estimator, *, n_jobs=None):\n self.estimator = estimator\n self.n_jobs = n_jobs\n\n def fit(self, X, y):\n \"\"\"Fit underlying estimators.\n\n Parameters\n ----------\n X : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\n y : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n Multi-class targets. An indicator matrix turns on multilabel\n classification.\n\n Returns\n -------\n self\n \"\"\"\n # A sparse LabelBinarizer, with sparse_output=True, has been shown to\n # outperform or match a dense label binarizer in all cases and has also\n # resulted in less or equal memory consumption in the fit_ovr function\n # overall.\n self.label_binarizer_ = LabelBinarizer(sparse_output=True)\n Y = self.label_binarizer_.fit_transform(y)\n Y = Y.tocsc()\n self.classes_ = self.label_binarizer_.classes_\n columns = (col.toarray().ravel() for col in Y.T)\n # In cases where individual estimators are very fast to train setting\n # n_jobs > 1 in can results in slower performance due to the overhead\n # of spawning threads. See joblib issue #112.\n self.estimators_ = Parallel(n_jobs=self.n_jobs)(delayed(_fit_binary)(\n self.estimator, X, column, classes=[\n \"not %s\" % self.label_binarizer_.classes_[i],\n self.label_binarizer_.classes_[i]])\n for i, column in enumerate(columns))\n\n return self\n\n @if_delegate_has_method('estimator')\n def partial_fit(self, X, y, classes=None):\n \"\"\"Partially fit underlying estimators\n\n Should be used when memory is inefficient to train all data.\n Chunks of data can be passed in several iteration.\n\n Parameters\n ----------\n X : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\n y : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n Multi-class targets. An indicator matrix turns on multilabel\n classification.\n\n classes : array, shape (n_classes, )\n Classes across all calls to partial_fit.\n Can be obtained via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is only required in the first call of partial_fit\n and can be omitted in the subsequent calls.\n\n Returns\n -------\n self\n \"\"\"\n if _check_partial_fit_first_call(self, classes):\n if not hasattr(self.estimator, \"partial_fit\"):\n raise ValueError((\"Base estimator {0}, doesn't have \"\n \"partial_fit method\").format(self.estimator))\n self.estimators_ = [clone(self.estimator) for _ in range\n (self.n_classes_)]\n\n # A sparse LabelBinarizer, with sparse_output=True, has been\n # shown to outperform or match a dense label binarizer in all\n # cases and has also resulted in less or equal memory consumption\n # in the fit_ovr function overall.\n self.label_binarizer_ = LabelBinarizer(sparse_output=True)\n self.label_binarizer_.fit(self.classes_)\n\n if len(np.setdiff1d(y, self.classes_)):\n raise ValueError((\"Mini-batch contains {0} while classes \" +\n \"must be subset of {1}\").format(np.unique(y),\n self.classes_))\n\n Y = self.label_binarizer_.transform(y)\n Y = Y.tocsc()\n columns = (col.toarray().ravel() for col in Y.T)\n\n self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n delayed(_partial_fit_binary)(estimator, X, column)\n for estimator, column in zip(self.estimators_, columns))\n\n return self\n\n def predict(self, X):\n \"\"\"Predict multi-class targets using underlying estimators.\n\n Parameters\n ----------\n X : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\n Returns\n -------\n y : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n Predicted multi-class targets.\n \"\"\"\n check_is_fitted(self)\n\n n_samples = _num_samples(X)\n if self.label_binarizer_.y_type_ == \"multiclass\":\n maxima = np.empty(n_samples, dtype=float)\n maxima.fill(-np.inf)\n argmaxima = np.zeros(n_samples, dtype=int)\n for i, e in enumerate(self.estimators_):\n pred = _predict_binary(e, X)\n np.maximum(maxima, pred, out=maxima)\n argmaxima[maxima == pred] = i\n return self.classes_[argmaxima]\n else:\n if (hasattr(self.estimators_[0], \"decision_function\") and\n is_classifier(self.estimators_[0])):\n thresh = 0\n else:\n thresh = .5\n indices = array.array('i')\n indptr = array.array('i', [0])\n for e in self.estimators_:\n indices.extend(np.where(_predict_binary(e, X) > thresh)[0])\n indptr.append(len(indices))\n data = np.ones(len(indices), dtype=int)\n indicator = sp.csc_matrix((data, indices, indptr),\n shape=(n_samples, len(self.estimators_)))\n return self.label_binarizer_.inverse_transform(indicator)\n\n @if_delegate_has_method(['_first_estimator', 'estimator'])\n def predict_proba(self, X):\n \"\"\"Probability estimates.\n\n The returned estimates for all classes are ordered by label of classes.\n\n Note that in the multilabel case, each sample can have any number of\n labels. This returns the marginal probability that the given sample has\n the label in question. For example, it is entirely consistent that two\n labels both have a 90% probability of applying to a given sample.\n\n In the single label multiclass case, the rows of the returned matrix\n sum to 1.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n T : (sparse) array-like of shape (n_samples, n_classes)\n Returns the probability of the sample for each class in the model,\n where classes are ordered as they are in `self.classes_`.\n \"\"\"\n check_is_fitted(self)\n # Y[i, j] gives the probability that sample i has the label j.\n # In the multi-label case, these are not disjoint.\n Y = np.array([e.predict_proba(X)[:, 1] for e in self.estimators_]).T\n\n if len(self.estimators_) == 1:\n # Only one estimator, but we still want to return probabilities\n # for two classes.\n Y = np.concatenate(((1 - Y), Y), axis=1)\n\n if not self.multilabel_:\n # Then, probabilities should be normalized to 1.\n Y /= np.sum(Y, axis=1)[:, np.newaxis]\n return Y\n\n @if_delegate_has_method(['_first_estimator', 'estimator'])\n def decision_function(self, X):\n \"\"\"Returns the distance of each sample from the decision boundary for\n each class. This can only be used with estimators which implement the\n decision_function method.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n T : array-like of shape (n_samples, n_classes) or (n_samples,) for \\\n binary classification.\n\n .. versionchanged:: 0.19\n output shape changed to ``(n_samples,)`` to conform to\n scikit-learn conventions for binary classification.\n \"\"\"\n check_is_fitted(self)\n if len(self.estimators_) == 1:\n return self.estimators_[0].decision_function(X)\n return np.array([est.decision_function(X).ravel()\n for est in self.estimators_]).T\n\n @property\n def multilabel_(self):\n \"\"\"Whether this is a multilabel classifier\"\"\"\n return self.label_binarizer_.y_type_.startswith('multilabel')\n\n @property\n def n_classes_(self):\n return len(self.classes_)\n\n # TODO: Remove coef_ attribute in 1.1\n # mypy error: Decorated property not supported\n @deprecated(\"Attribute coef_ was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26). \"\n \"If you observe this warning while using RFE \"\n \"or SelectFromModel, use the importance_getter \"\n \"parameter instead.\")\n @property\n def coef_(self):\n check_is_fitted(self)\n if not hasattr(self.estimators_[0], \"coef_\"):\n raise AttributeError(\n \"Base estimator doesn't have a coef_ attribute.\")\n coefs = [e.coef_ for e in self.estimators_]\n if sp.issparse(coefs[0]):\n return sp.vstack(coefs)\n return np.vstack(coefs)\n\n # TODO: Remove intercept_ attribute in 1.1\n # mypy error: Decorated property not supported\n @deprecated(\"Attribute intercept_ was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26). \"\n \"If you observe this warning while using RFE \"\n \"or SelectFromModel, use the importance_getter \"\n \"parameter instead.\")\n @property\n def intercept_(self):\n check_is_fitted(self)\n if not hasattr(self.estimators_[0], \"intercept_\"):\n raise AttributeError(\n \"Base estimator doesn't have an intercept_ attribute.\")\n return np.array([e.intercept_.ravel() for e in self.estimators_])\n\n # TODO: Remove in 1.1\n # mypy error: Decorated property not supported\n @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n \"\"\"Indicate if wrapped estimator is using a precomputed Gram matrix\"\"\"\n return getattr(self.estimator, \"_pairwise\", False)\n\n def _more_tags(self):\n \"\"\"Indicate if wrapped estimator is using a precomputed Gram matrix\"\"\"\n return {'pairwise': _safe_tags(self.estimator, key=\"pairwise\")}\n\n @property\n def _first_estimator(self):\n return self.estimators_[0]\n\n @property\n def n_features_in_(self):\n # For consistency with other estimators we raise a AttributeError so\n # that hasattr() fails if the OVR estimator isn't fitted.\n try:\n check_is_fitted(self)\n except NotFittedError as nfe:\n raise AttributeError(\n \"{} object has no n_features_in_ attribute.\"\n .format(self.__class__.__name__)\n ) from nfe\n return self.estimators_[0].n_features_in_", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.multiclass/OutputCodeClassifier", + "name": "OutputCodeClassifier", + "qname": "sklearn.multiclass.OutputCodeClassifier", + "decorators": [], + "superclasses": ["MetaEstimatorMixin", "ClassifierMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.multiclass/OutputCodeClassifier/__init__", + "scikit-learn/sklearn.multiclass/OutputCodeClassifier/fit", + "scikit-learn/sklearn.multiclass/OutputCodeClassifier/predict" + ], + "is_public": true, + "reexported_by": [], + "description": "(Error-Correcting) Output-Code multiclass strategy\n\nOutput-code based strategies consist in representing each class with a\nbinary code (an array of 0s and 1s). At fitting time, one binary\nclassifier per bit in the code book is fitted. At prediction time, the\nclassifiers are used to project new points in the class space and the class\nclosest to the points is chosen. The main advantage of these strategies is\nthat the number of classifiers used can be controlled by the user, either\nfor compressing the model (0 < code_size < 1) or for making the model more\nrobust to errors (code_size > 1). See the documentation for more details.\n\nRead more in the :ref:`User Guide `.", + "docstring": "(Error-Correcting) Output-Code multiclass strategy\n\nOutput-code based strategies consist in representing each class with a\nbinary code (an array of 0s and 1s). At fitting time, one binary\nclassifier per bit in the code book is fitted. At prediction time, the\nclassifiers are used to project new points in the class space and the class\nclosest to the points is chosen. The main advantage of these strategies is\nthat the number of classifiers used can be controlled by the user, either\nfor compressing the model (0 < code_size < 1) or for making the model more\nrobust to errors (code_size > 1). See the documentation for more details.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object\n An estimator object implementing :term:`fit` and one of\n :term:`decision_function` or :term:`predict_proba`.\n\ncode_size : float\n Percentage of the number of classes to be used to create the code book.\n A number between 0 and 1 will require fewer classifiers than\n one-vs-the-rest. A number greater than 1 will require more classifiers\n than one-vs-the-rest.\n\nrandom_state : int, RandomState instance, default=None\n The generator used to initialize the codebook.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation: the multiclass problems\n are computed in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nestimators_ : list of `int(n_classes * code_size)` estimators\n Estimators used for predictions.\n\nclasses_ : numpy array of shape [n_classes]\n Array containing labels.\n\ncode_book_ : numpy array of shape [n_classes, code_size]\n Binary array containing the code of each class.\n\nExamples\n--------\n>>> from sklearn.multiclass import OutputCodeClassifier\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_samples=100, n_features=4,\n... n_informative=2, n_redundant=0,\n... random_state=0, shuffle=False)\n>>> clf = OutputCodeClassifier(\n... estimator=RandomForestClassifier(random_state=0),\n... random_state=0).fit(X, y)\n>>> clf.predict([[0, 0, 0, 0]])\narray([1])\n\nReferences\n----------\n\n.. [1] \"Solving multiclass learning problems via error-correcting output\n codes\",\n Dietterich T., Bakiri G.,\n Journal of Artificial Intelligence Research 2,\n 1995.\n\n.. [2] \"The error coding method and PICTs\",\n James G., Hastie T.,\n Journal of Computational and Graphical statistics 7,\n 1998.\n\n.. [3] \"The Elements of Statistical Learning\",\n Hastie T., Tibshirani R., Friedman J., page 606 (second-edition)\n 2008.", + "code": "class OutputCodeClassifier(MetaEstimatorMixin, ClassifierMixin, BaseEstimator):\n \"\"\"(Error-Correcting) Output-Code multiclass strategy\n\n Output-code based strategies consist in representing each class with a\n binary code (an array of 0s and 1s). At fitting time, one binary\n classifier per bit in the code book is fitted. At prediction time, the\n classifiers are used to project new points in the class space and the class\n closest to the points is chosen. The main advantage of these strategies is\n that the number of classifiers used can be controlled by the user, either\n for compressing the model (0 < code_size < 1) or for making the model more\n robust to errors (code_size > 1). See the documentation for more details.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n estimator : estimator object\n An estimator object implementing :term:`fit` and one of\n :term:`decision_function` or :term:`predict_proba`.\n\n code_size : float\n Percentage of the number of classes to be used to create the code book.\n A number between 0 and 1 will require fewer classifiers than\n one-vs-the-rest. A number greater than 1 will require more classifiers\n than one-vs-the-rest.\n\n random_state : int, RandomState instance, default=None\n The generator used to initialize the codebook.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n n_jobs : int, default=None\n The number of jobs to use for the computation: the multiclass problems\n are computed in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n Attributes\n ----------\n estimators_ : list of `int(n_classes * code_size)` estimators\n Estimators used for predictions.\n\n classes_ : numpy array of shape [n_classes]\n Array containing labels.\n\n code_book_ : numpy array of shape [n_classes, code_size]\n Binary array containing the code of each class.\n\n Examples\n --------\n >>> from sklearn.multiclass import OutputCodeClassifier\n >>> from sklearn.ensemble import RandomForestClassifier\n >>> from sklearn.datasets import make_classification\n >>> X, y = make_classification(n_samples=100, n_features=4,\n ... n_informative=2, n_redundant=0,\n ... random_state=0, shuffle=False)\n >>> clf = OutputCodeClassifier(\n ... estimator=RandomForestClassifier(random_state=0),\n ... random_state=0).fit(X, y)\n >>> clf.predict([[0, 0, 0, 0]])\n array([1])\n\n References\n ----------\n\n .. [1] \"Solving multiclass learning problems via error-correcting output\n codes\",\n Dietterich T., Bakiri G.,\n Journal of Artificial Intelligence Research 2,\n 1995.\n\n .. [2] \"The error coding method and PICTs\",\n James G., Hastie T.,\n Journal of Computational and Graphical statistics 7,\n 1998.\n\n .. [3] \"The Elements of Statistical Learning\",\n Hastie T., Tibshirani R., Friedman J., page 606 (second-edition)\n 2008.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, estimator, *, code_size=1.5, random_state=None,\n n_jobs=None):\n self.estimator = estimator\n self.code_size = code_size\n self.random_state = random_state\n self.n_jobs = n_jobs\n\n def fit(self, X, y):\n \"\"\"Fit underlying estimators.\n\n Parameters\n ----------\n X : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\n y : numpy array of shape [n_samples]\n Multi-class targets.\n\n Returns\n -------\n self\n \"\"\"\n X, y = self._validate_data(X, y, accept_sparse=True)\n if self.code_size <= 0:\n raise ValueError(\"code_size should be greater than 0, got {0}\"\n \"\".format(self.code_size))\n\n _check_estimator(self.estimator)\n random_state = check_random_state(self.random_state)\n check_classification_targets(y)\n\n self.classes_ = np.unique(y)\n n_classes = self.classes_.shape[0]\n code_size_ = int(n_classes * self.code_size)\n\n # FIXME: there are more elaborate methods than generating the codebook\n # randomly.\n self.code_book_ = random_state.random_sample((n_classes, code_size_))\n self.code_book_[self.code_book_ > 0.5] = 1\n\n if hasattr(self.estimator, \"decision_function\"):\n self.code_book_[self.code_book_ != 1] = -1\n else:\n self.code_book_[self.code_book_ != 1] = 0\n\n classes_index = {c: i for i, c in enumerate(self.classes_)}\n\n Y = np.array([self.code_book_[classes_index[y[i]]]\n for i in range(X.shape[0])], dtype=int)\n\n self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n delayed(_fit_binary)(self.estimator, X, Y[:, i])\n for i in range(Y.shape[1]))\n\n return self\n\n def predict(self, X):\n \"\"\"Predict multi-class targets using underlying estimators.\n\n Parameters\n ----------\n X : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\n Returns\n -------\n y : numpy array of shape [n_samples]\n Predicted multi-class targets.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, accept_sparse=True)\n Y = np.array([_predict_binary(e, X) for e in self.estimators_]).T\n pred = euclidean_distances(Y, self.code_book_).argmin(axis=1)\n return self.classes_[pred]", + "instance_attributes": [ + { + "name": "code_size", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.multiclass/_ConstantPredictor", + "name": "_ConstantPredictor", + "qname": "sklearn.multiclass._ConstantPredictor", + "decorators": [], + "superclasses": ["BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.multiclass/_ConstantPredictor/fit", + "scikit-learn/sklearn.multiclass/_ConstantPredictor/predict", + "scikit-learn/sklearn.multiclass/_ConstantPredictor/decision_function", + "scikit-learn/sklearn.multiclass/_ConstantPredictor/predict_proba" + ], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class _ConstantPredictor(BaseEstimator):\n\n def fit(self, X, y):\n self.y_ = y\n return self\n\n def predict(self, X):\n check_is_fitted(self)\n\n return np.repeat(self.y_, X.shape[0])\n\n def decision_function(self, X):\n check_is_fitted(self)\n\n return np.repeat(self.y_, X.shape[0])\n\n def predict_proba(self, X):\n check_is_fitted(self)\n\n return np.repeat([np.hstack([1 - self.y_, self.y_])],\n X.shape[0], axis=0)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.multioutput/ClassifierChain", + "name": "ClassifierChain", + "qname": "sklearn.multioutput.ClassifierChain", + "decorators": [], + "superclasses": ["MetaEstimatorMixin", "ClassifierMixin", "_BaseChain"], + "methods": [ + "scikit-learn/sklearn.multioutput/ClassifierChain/fit", + "scikit-learn/sklearn.multioutput/ClassifierChain/predict_proba", + "scikit-learn/sklearn.multioutput/ClassifierChain/decision_function", + "scikit-learn/sklearn.multioutput/ClassifierChain/_more_tags" + ], + "is_public": true, + "reexported_by": [], + "description": "A multi-label model that arranges binary classifiers into a chain.\n\nEach model makes a prediction in the order specified by the chain using\nall of the available features provided to the model plus the predictions\nof models that are earlier in the chain.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.19", + "docstring": "A multi-label model that arranges binary classifiers into a chain.\n\nEach model makes a prediction in the order specified by the chain using\nall of the available features provided to the model plus the predictions\nof models that are earlier in the chain.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.19\n\nParameters\n----------\nbase_estimator : estimator\n The base estimator from which the classifier chain is built.\n\norder : array-like of shape (n_outputs,) or 'random', default=None\n If None, the order will be determined by the order of columns in\n the label matrix Y.::\n\n order = [0, 1, 2, ..., Y.shape[1] - 1]\n\n The order of the chain can be explicitly set by providing a list of\n integers. For example, for a chain of length 5.::\n\n order = [1, 3, 2, 4, 0]\n\n means that the first model in the chain will make predictions for\n column 1 in the Y matrix, the second model will make predictions\n for column 3, etc.\n\n If order is 'random' a random ordering will be used.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines whether to use cross validated predictions or true\n labels for the results of previous estimators in the chain.\n Possible inputs for cv are:\n\n - None, to use true labels when fitting,\n - integer, to specify the number of folds in a (Stratified)KFold,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\nrandom_state : int, RandomState instance or None, optional (default=None)\n If ``order='random'``, determines random number generation for the\n chain order.\n In addition, it controls the random seed given at each `base_estimator`\n at each chaining iteration. Thus, it is only used when `base_estimator`\n exposes a `random_state`.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nclasses_ : list\n A list of arrays of length ``len(estimators_)`` containing the\n class labels for each estimator in the chain.\n\nestimators_ : list\n A list of clones of base_estimator.\n\norder_ : list\n The order of labels in the classifier chain.\n\nExamples\n--------\n>>> from sklearn.datasets import make_multilabel_classification\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.multioutput import ClassifierChain\n>>> X, Y = make_multilabel_classification(\n... n_samples=12, n_classes=3, random_state=0\n... )\n>>> X_train, X_test, Y_train, Y_test = train_test_split(\n... X, Y, random_state=0\n... )\n>>> base_lr = LogisticRegression(solver='lbfgs', random_state=0)\n>>> chain = ClassifierChain(base_lr, order='random', random_state=0)\n>>> chain.fit(X_train, Y_train).predict(X_test)\narray([[1., 1., 0.],\n [1., 0., 0.],\n [0., 1., 0.]])\n>>> chain.predict_proba(X_test)\narray([[0.8387..., 0.9431..., 0.4576...],\n [0.8878..., 0.3684..., 0.2640...],\n [0.0321..., 0.9935..., 0.0625...]])\n\nSee Also\n--------\nRegressorChain : Equivalent for regression.\nMultioutputClassifier : Classifies each output independently rather than\n chaining.\n\nReferences\n----------\nJesse Read, Bernhard Pfahringer, Geoff Holmes, Eibe Frank, \"Classifier\nChains for Multi-label Classification\", 2009.", + "code": "class ClassifierChain(MetaEstimatorMixin, ClassifierMixin, _BaseChain):\n \"\"\"A multi-label model that arranges binary classifiers into a chain.\n\n Each model makes a prediction in the order specified by the chain using\n all of the available features provided to the model plus the predictions\n of models that are earlier in the chain.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.19\n\n Parameters\n ----------\n base_estimator : estimator\n The base estimator from which the classifier chain is built.\n\n order : array-like of shape (n_outputs,) or 'random', default=None\n If None, the order will be determined by the order of columns in\n the label matrix Y.::\n\n order = [0, 1, 2, ..., Y.shape[1] - 1]\n\n The order of the chain can be explicitly set by providing a list of\n integers. For example, for a chain of length 5.::\n\n order = [1, 3, 2, 4, 0]\n\n means that the first model in the chain will make predictions for\n column 1 in the Y matrix, the second model will make predictions\n for column 3, etc.\n\n If order is 'random' a random ordering will be used.\n\n cv : int, cross-validation generator or an iterable, default=None\n Determines whether to use cross validated predictions or true\n labels for the results of previous estimators in the chain.\n Possible inputs for cv are:\n\n - None, to use true labels when fitting,\n - integer, to specify the number of folds in a (Stratified)KFold,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n random_state : int, RandomState instance or None, optional (default=None)\n If ``order='random'``, determines random number generation for the\n chain order.\n In addition, it controls the random seed given at each `base_estimator`\n at each chaining iteration. Thus, it is only used when `base_estimator`\n exposes a `random_state`.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Attributes\n ----------\n classes_ : list\n A list of arrays of length ``len(estimators_)`` containing the\n class labels for each estimator in the chain.\n\n estimators_ : list\n A list of clones of base_estimator.\n\n order_ : list\n The order of labels in the classifier chain.\n\n Examples\n --------\n >>> from sklearn.datasets import make_multilabel_classification\n >>> from sklearn.linear_model import LogisticRegression\n >>> from sklearn.model_selection import train_test_split\n >>> from sklearn.multioutput import ClassifierChain\n >>> X, Y = make_multilabel_classification(\n ... n_samples=12, n_classes=3, random_state=0\n ... )\n >>> X_train, X_test, Y_train, Y_test = train_test_split(\n ... X, Y, random_state=0\n ... )\n >>> base_lr = LogisticRegression(solver='lbfgs', random_state=0)\n >>> chain = ClassifierChain(base_lr, order='random', random_state=0)\n >>> chain.fit(X_train, Y_train).predict(X_test)\n array([[1., 1., 0.],\n [1., 0., 0.],\n [0., 1., 0.]])\n >>> chain.predict_proba(X_test)\n array([[0.8387..., 0.9431..., 0.4576...],\n [0.8878..., 0.3684..., 0.2640...],\n [0.0321..., 0.9935..., 0.0625...]])\n\n See Also\n --------\n RegressorChain : Equivalent for regression.\n MultioutputClassifier : Classifies each output independently rather than\n chaining.\n\n References\n ----------\n Jesse Read, Bernhard Pfahringer, Geoff Holmes, Eibe Frank, \"Classifier\n Chains for Multi-label Classification\", 2009.\n \"\"\"\n\n def fit(self, X, Y):\n \"\"\"Fit the model to data matrix X and targets Y.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n Y : array-like of shape (n_samples, n_classes)\n The target values.\n\n Returns\n -------\n self : object\n \"\"\"\n super().fit(X, Y)\n self.classes_ = [estimator.classes_\n for chain_idx, estimator\n in enumerate(self.estimators_)]\n return self\n\n @if_delegate_has_method('base_estimator')\n def predict_proba(self, X):\n \"\"\"Predict probability estimates.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n\n Returns\n -------\n Y_prob : array-like of shape (n_samples, n_classes)\n \"\"\"\n X = check_array(X, accept_sparse=True)\n Y_prob_chain = np.zeros((X.shape[0], len(self.estimators_)))\n Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_)))\n for chain_idx, estimator in enumerate(self.estimators_):\n previous_predictions = Y_pred_chain[:, :chain_idx]\n if sp.issparse(X):\n X_aug = sp.hstack((X, previous_predictions))\n else:\n X_aug = np.hstack((X, previous_predictions))\n Y_prob_chain[:, chain_idx] = estimator.predict_proba(X_aug)[:, 1]\n Y_pred_chain[:, chain_idx] = estimator.predict(X_aug)\n inv_order = np.empty_like(self.order_)\n inv_order[self.order_] = np.arange(len(self.order_))\n Y_prob = Y_prob_chain[:, inv_order]\n\n return Y_prob\n\n @if_delegate_has_method('base_estimator')\n def decision_function(self, X):\n \"\"\"Evaluate the decision_function of the models in the chain.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n Y_decision : array-like of shape (n_samples, n_classes)\n Returns the decision function of the sample for each model\n in the chain.\n \"\"\"\n Y_decision_chain = np.zeros((X.shape[0], len(self.estimators_)))\n Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_)))\n for chain_idx, estimator in enumerate(self.estimators_):\n previous_predictions = Y_pred_chain[:, :chain_idx]\n if sp.issparse(X):\n X_aug = sp.hstack((X, previous_predictions))\n else:\n X_aug = np.hstack((X, previous_predictions))\n Y_decision_chain[:, chain_idx] = estimator.decision_function(X_aug)\n Y_pred_chain[:, chain_idx] = estimator.predict(X_aug)\n\n inv_order = np.empty_like(self.order_)\n inv_order[self.order_] = np.arange(len(self.order_))\n Y_decision = Y_decision_chain[:, inv_order]\n\n return Y_decision\n\n def _more_tags(self):\n return {'_skip_test': True,\n 'multioutput_only': True}", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputClassifier", + "name": "MultiOutputClassifier", + "qname": "sklearn.multioutput.MultiOutputClassifier", + "decorators": [], + "superclasses": ["ClassifierMixin", "_MultiOutputEstimator"], + "methods": [ + "scikit-learn/sklearn.multioutput/MultiOutputClassifier/__init__", + "scikit-learn/sklearn.multioutput/MultiOutputClassifier/fit", + "scikit-learn/sklearn.multioutput/MultiOutputClassifier/predict_proba@getter", + "scikit-learn/sklearn.multioutput/MultiOutputClassifier/_predict_proba", + "scikit-learn/sklearn.multioutput/MultiOutputClassifier/score", + "scikit-learn/sklearn.multioutput/MultiOutputClassifier/_more_tags" + ], + "is_public": true, + "reexported_by": [], + "description": "Multi target classification\n\nThis strategy consists of fitting one classifier per target. This is a\nsimple strategy for extending classifiers that do not natively support\nmulti-target classification", + "docstring": "Multi target classification\n\nThis strategy consists of fitting one classifier per target. This is a\nsimple strategy for extending classifiers that do not natively support\nmulti-target classification\n\nParameters\n----------\nestimator : estimator object\n An estimator object implementing :term:`fit`, :term:`score` and\n :term:`predict_proba`.\n\nn_jobs : int or None, optional (default=None)\n The number of jobs to run in parallel.\n :meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported\n by the passed estimator) will be parallelized for each target.\n\n When individual estimators are fast to train or predict,\n using ``n_jobs > 1`` can result in slower performance due\n to the parallelism overhead.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all available processes / threads.\n See :term:`Glossary ` for more details.\n\n .. versionchanged:: 0.20\n `n_jobs` default changed from 1 to None\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n Class labels.\n\nestimators_ : list of ``n_output`` estimators\n Estimators used for predictions.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_multilabel_classification\n>>> from sklearn.multioutput import MultiOutputClassifier\n>>> from sklearn.neighbors import KNeighborsClassifier\n\n>>> X, y = make_multilabel_classification(n_classes=3, random_state=0)\n>>> clf = MultiOutputClassifier(KNeighborsClassifier()).fit(X, y)\n>>> clf.predict(X[-2:])\narray([[1, 1, 0], [1, 1, 1]])", + "code": "class MultiOutputClassifier(ClassifierMixin, _MultiOutputEstimator):\n \"\"\"Multi target classification\n\n This strategy consists of fitting one classifier per target. This is a\n simple strategy for extending classifiers that do not natively support\n multi-target classification\n\n Parameters\n ----------\n estimator : estimator object\n An estimator object implementing :term:`fit`, :term:`score` and\n :term:`predict_proba`.\n\n n_jobs : int or None, optional (default=None)\n The number of jobs to run in parallel.\n :meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported\n by the passed estimator) will be parallelized for each target.\n\n When individual estimators are fast to train or predict,\n using ``n_jobs > 1`` can result in slower performance due\n to the parallelism overhead.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all available processes / threads.\n See :term:`Glossary ` for more details.\n\n .. versionchanged:: 0.20\n `n_jobs` default changed from 1 to None\n\n Attributes\n ----------\n classes_ : ndarray of shape (n_classes,)\n Class labels.\n\n estimators_ : list of ``n_output`` estimators\n Estimators used for predictions.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.datasets import make_multilabel_classification\n >>> from sklearn.multioutput import MultiOutputClassifier\n >>> from sklearn.neighbors import KNeighborsClassifier\n\n >>> X, y = make_multilabel_classification(n_classes=3, random_state=0)\n >>> clf = MultiOutputClassifier(KNeighborsClassifier()).fit(X, y)\n >>> clf.predict(X[-2:])\n array([[1, 1, 0], [1, 1, 1]])\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, estimator, *, n_jobs=None):\n super().__init__(estimator, n_jobs=n_jobs)\n\n def fit(self, X, Y, sample_weight=None, **fit_params):\n \"\"\"Fit the model to data matrix X and targets Y.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n Y : array-like of shape (n_samples, n_classes)\n The target values.\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Only supported if the underlying classifier supports sample\n weights.\n **fit_params : dict of string -> object\n Parameters passed to the ``estimator.fit`` method of each step.\n\n .. versionadded:: 0.23\n\n Returns\n -------\n self : object\n \"\"\"\n super().fit(X, Y, sample_weight, **fit_params)\n self.classes_ = [estimator.classes_ for estimator in self.estimators_]\n return self\n\n @property\n def predict_proba(self):\n \"\"\"Probability estimates.\n Returns prediction probabilities for each class of each output.\n\n This method will raise a ``ValueError`` if any of the\n estimators do not have ``predict_proba``.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Data\n\n Returns\n -------\n p : array of shape (n_samples, n_classes), or a list of n_outputs \\\n such arrays if n_outputs > 1.\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n\n .. versionchanged:: 0.19\n This function now returns a list of arrays where the length of\n the list is ``n_outputs``, and each array is (``n_samples``,\n ``n_classes``) for that particular output.\n \"\"\"\n check_is_fitted(self)\n if not all([hasattr(estimator, \"predict_proba\")\n for estimator in self.estimators_]):\n raise AttributeError(\"The base estimator should \"\n \"implement predict_proba method\")\n return self._predict_proba\n\n def _predict_proba(self, X):\n results = [estimator.predict_proba(X) for estimator in\n self.estimators_]\n return results\n\n def score(self, X, y):\n \"\"\"Returns the mean accuracy on the given test data and labels.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Test samples\n\n y : array-like of shape (n_samples, n_outputs)\n True values for X\n\n Returns\n -------\n scores : float\n accuracy_score of self.predict(X) versus y\n \"\"\"\n check_is_fitted(self)\n n_outputs_ = len(self.estimators_)\n if y.ndim == 1:\n raise ValueError(\"y must have at least two dimensions for \"\n \"multi target classification but has only one\")\n if y.shape[1] != n_outputs_:\n raise ValueError(\"The number of outputs of Y for fit {0} and\"\n \" score {1} should be same\".\n format(n_outputs_, y.shape[1]))\n y_pred = self.predict(X)\n return np.mean(np.all(y == y_pred, axis=1))\n\n def _more_tags(self):\n # FIXME\n return {'_skip_test': True}", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputRegressor", + "name": "MultiOutputRegressor", + "qname": "sklearn.multioutput.MultiOutputRegressor", + "decorators": [], + "superclasses": ["RegressorMixin", "_MultiOutputEstimator"], + "methods": [ + "scikit-learn/sklearn.multioutput/MultiOutputRegressor/__init__", + "scikit-learn/sklearn.multioutput/MultiOutputRegressor/partial_fit" + ], + "is_public": true, + "reexported_by": [], + "description": "Multi target regression\n\nThis strategy consists of fitting one regressor per target. This is a\nsimple strategy for extending regressors that do not natively support\nmulti-target regression.\n\n.. versionadded:: 0.18", + "docstring": "Multi target regression\n\nThis strategy consists of fitting one regressor per target. This is a\nsimple strategy for extending regressors that do not natively support\nmulti-target regression.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nestimator : estimator object\n An estimator object implementing :term:`fit` and :term:`predict`.\n\nn_jobs : int or None, optional (default=None)\n The number of jobs to run in parallel.\n :meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported\n by the passed estimator) will be parallelized for each target.\n\n When individual estimators are fast to train or predict,\n using ``n_jobs > 1`` can result in slower performance due\n to the parallelism overhead.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all available processes / threads.\n See :term:`Glossary ` for more details.\n\n .. versionchanged:: 0.20\n `n_jobs` default changed from 1 to None\n\nAttributes\n----------\nestimators_ : list of ``n_output`` estimators\n Estimators used for predictions.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import load_linnerud\n>>> from sklearn.multioutput import MultiOutputRegressor\n>>> from sklearn.linear_model import Ridge\n>>> X, y = load_linnerud(return_X_y=True)\n>>> clf = MultiOutputRegressor(Ridge(random_state=123)).fit(X, y)\n>>> clf.predict(X[[0]])\narray([[176..., 35..., 57...]])", + "code": "class MultiOutputRegressor(RegressorMixin, _MultiOutputEstimator):\n \"\"\"Multi target regression\n\n This strategy consists of fitting one regressor per target. This is a\n simple strategy for extending regressors that do not natively support\n multi-target regression.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n estimator : estimator object\n An estimator object implementing :term:`fit` and :term:`predict`.\n\n n_jobs : int or None, optional (default=None)\n The number of jobs to run in parallel.\n :meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported\n by the passed estimator) will be parallelized for each target.\n\n When individual estimators are fast to train or predict,\n using ``n_jobs > 1`` can result in slower performance due\n to the parallelism overhead.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all available processes / threads.\n See :term:`Glossary ` for more details.\n\n .. versionchanged:: 0.20\n `n_jobs` default changed from 1 to None\n\n Attributes\n ----------\n estimators_ : list of ``n_output`` estimators\n Estimators used for predictions.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.datasets import load_linnerud\n >>> from sklearn.multioutput import MultiOutputRegressor\n >>> from sklearn.linear_model import Ridge\n >>> X, y = load_linnerud(return_X_y=True)\n >>> clf = MultiOutputRegressor(Ridge(random_state=123)).fit(X, y)\n >>> clf.predict(X[[0]])\n array([[176..., 35..., 57...]])\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, estimator, *, n_jobs=None):\n super().__init__(estimator, n_jobs=n_jobs)\n\n @if_delegate_has_method('estimator')\n def partial_fit(self, X, y, sample_weight=None):\n \"\"\"Incrementally fit the model to data.\n Fit a separate model for each output variable.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\n y : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n Multi-output targets.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Only supported if the underlying regressor supports sample\n weights.\n\n Returns\n -------\n self : object\n \"\"\"\n super().partial_fit(\n X, y, sample_weight=sample_weight)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.multioutput/RegressorChain", + "name": "RegressorChain", + "qname": "sklearn.multioutput.RegressorChain", + "decorators": [], + "superclasses": ["MetaEstimatorMixin", "RegressorMixin", "_BaseChain"], + "methods": [ + "scikit-learn/sklearn.multioutput/RegressorChain/fit", + "scikit-learn/sklearn.multioutput/RegressorChain/_more_tags" + ], + "is_public": true, + "reexported_by": [], + "description": "A multi-label model that arranges regressions into a chain.\n\nEach model makes a prediction in the order specified by the chain using\nall of the available features provided to the model plus the predictions\nof models that are earlier in the chain.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20", + "docstring": "A multi-label model that arranges regressions into a chain.\n\nEach model makes a prediction in the order specified by the chain using\nall of the available features provided to the model plus the predictions\nof models that are earlier in the chain.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nbase_estimator : estimator\n The base estimator from which the classifier chain is built.\n\norder : array-like of shape (n_outputs,) or 'random', default=None\n If None, the order will be determined by the order of columns in\n the label matrix Y.::\n\n order = [0, 1, 2, ..., Y.shape[1] - 1]\n\n The order of the chain can be explicitly set by providing a list of\n integers. For example, for a chain of length 5.::\n\n order = [1, 3, 2, 4, 0]\n\n means that the first model in the chain will make predictions for\n column 1 in the Y matrix, the second model will make predictions\n for column 3, etc.\n\n If order is 'random' a random ordering will be used.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines whether to use cross validated predictions or true\n labels for the results of previous estimators in the chain.\n Possible inputs for cv are:\n\n - None, to use true labels when fitting,\n - integer, to specify the number of folds in a (Stratified)KFold,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\nrandom_state : int, RandomState instance or None, optional (default=None)\n If ``order='random'``, determines random number generation for the\n chain order.\n In addition, it controls the random seed given at each `base_estimator`\n at each chaining iteration. Thus, it is only used when `base_estimator`\n exposes a `random_state`.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nestimators_ : list\n A list of clones of base_estimator.\n\norder_ : list\n The order of labels in the classifier chain.\n\nExamples\n--------\n>>> from sklearn.multioutput import RegressorChain\n>>> from sklearn.linear_model import LogisticRegression\n>>> logreg = LogisticRegression(solver='lbfgs',multi_class='multinomial')\n>>> X, Y = [[1, 0], [0, 1], [1, 1]], [[0, 2], [1, 1], [2, 0]]\n>>> chain = RegressorChain(base_estimator=logreg, order=[0, 1]).fit(X, Y)\n>>> chain.predict(X)\narray([[0., 2.],\n [1., 1.],\n [2., 0.]])\n\nSee Also\n--------\nClassifierChain : Equivalent for classification.\nMultioutputRegressor : Learns each output independently rather than\n chaining.", + "code": "class RegressorChain(MetaEstimatorMixin, RegressorMixin, _BaseChain):\n \"\"\"A multi-label model that arranges regressions into a chain.\n\n Each model makes a prediction in the order specified by the chain using\n all of the available features provided to the model plus the predictions\n of models that are earlier in the chain.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.20\n\n Parameters\n ----------\n base_estimator : estimator\n The base estimator from which the classifier chain is built.\n\n order : array-like of shape (n_outputs,) or 'random', default=None\n If None, the order will be determined by the order of columns in\n the label matrix Y.::\n\n order = [0, 1, 2, ..., Y.shape[1] - 1]\n\n The order of the chain can be explicitly set by providing a list of\n integers. For example, for a chain of length 5.::\n\n order = [1, 3, 2, 4, 0]\n\n means that the first model in the chain will make predictions for\n column 1 in the Y matrix, the second model will make predictions\n for column 3, etc.\n\n If order is 'random' a random ordering will be used.\n\n cv : int, cross-validation generator or an iterable, default=None\n Determines whether to use cross validated predictions or true\n labels for the results of previous estimators in the chain.\n Possible inputs for cv are:\n\n - None, to use true labels when fitting,\n - integer, to specify the number of folds in a (Stratified)KFold,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n random_state : int, RandomState instance or None, optional (default=None)\n If ``order='random'``, determines random number generation for the\n chain order.\n In addition, it controls the random seed given at each `base_estimator`\n at each chaining iteration. Thus, it is only used when `base_estimator`\n exposes a `random_state`.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Attributes\n ----------\n estimators_ : list\n A list of clones of base_estimator.\n\n order_ : list\n The order of labels in the classifier chain.\n\n Examples\n --------\n >>> from sklearn.multioutput import RegressorChain\n >>> from sklearn.linear_model import LogisticRegression\n >>> logreg = LogisticRegression(solver='lbfgs',multi_class='multinomial')\n >>> X, Y = [[1, 0], [0, 1], [1, 1]], [[0, 2], [1, 1], [2, 0]]\n >>> chain = RegressorChain(base_estimator=logreg, order=[0, 1]).fit(X, Y)\n >>> chain.predict(X)\n array([[0., 2.],\n [1., 1.],\n [2., 0.]])\n\n See Also\n --------\n ClassifierChain : Equivalent for classification.\n MultioutputRegressor : Learns each output independently rather than\n chaining.\n\n \"\"\"\n\n def fit(self, X, Y, **fit_params):\n \"\"\"Fit the model to data matrix X and targets Y.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n Y : array-like of shape (n_samples, n_classes)\n The target values.\n\n **fit_params : dict of string -> object\n Parameters passed to the `fit` method at each step\n of the regressor chain.\n\n .. versionadded:: 0.23\n\n Returns\n -------\n self : object\n \"\"\"\n super().fit(X, Y, **fit_params)\n return self\n\n def _more_tags(self):\n return {'multioutput_only': True}", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.multioutput/_BaseChain", + "name": "_BaseChain", + "qname": "sklearn.multioutput._BaseChain", + "decorators": [], + "superclasses": ["BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.multioutput/_BaseChain/__init__", + "scikit-learn/sklearn.multioutput/_BaseChain/fit", + "scikit-learn/sklearn.multioutput/_BaseChain/predict" + ], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class _BaseChain(BaseEstimator, metaclass=ABCMeta):\n @_deprecate_positional_args\n def __init__(self, base_estimator, *, order=None, cv=None,\n random_state=None):\n self.base_estimator = base_estimator\n self.order = order\n self.cv = cv\n self.random_state = random_state\n\n @abstractmethod\n def fit(self, X, Y, **fit_params):\n \"\"\"Fit the model to data matrix X and targets Y.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n Y : array-like of shape (n_samples, n_classes)\n The target values.\n **fit_params : dict of string -> object\n Parameters passed to the `fit` method of each step.\n\n .. versionadded:: 0.23\n\n Returns\n -------\n self : object\n \"\"\"\n X, Y = self._validate_data(X, Y, multi_output=True, accept_sparse=True)\n\n random_state = check_random_state(self.random_state)\n check_array(X, accept_sparse=True)\n self.order_ = self.order\n if isinstance(self.order_, tuple):\n self.order_ = np.array(self.order_)\n\n if self.order_ is None:\n self.order_ = np.array(range(Y.shape[1]))\n elif isinstance(self.order_, str):\n if self.order_ == 'random':\n self.order_ = random_state.permutation(Y.shape[1])\n elif sorted(self.order_) != list(range(Y.shape[1])):\n raise ValueError(\"invalid order\")\n\n self.estimators_ = [clone(self.base_estimator)\n for _ in range(Y.shape[1])]\n\n if self.cv is None:\n Y_pred_chain = Y[:, self.order_]\n if sp.issparse(X):\n X_aug = sp.hstack((X, Y_pred_chain), format='lil')\n X_aug = X_aug.tocsr()\n else:\n X_aug = np.hstack((X, Y_pred_chain))\n\n elif sp.issparse(X):\n Y_pred_chain = sp.lil_matrix((X.shape[0], Y.shape[1]))\n X_aug = sp.hstack((X, Y_pred_chain), format='lil')\n\n else:\n Y_pred_chain = np.zeros((X.shape[0], Y.shape[1]))\n X_aug = np.hstack((X, Y_pred_chain))\n\n del Y_pred_chain\n\n for chain_idx, estimator in enumerate(self.estimators_):\n y = Y[:, self.order_[chain_idx]]\n estimator.fit(X_aug[:, :(X.shape[1] + chain_idx)], y,\n **fit_params)\n if self.cv is not None and chain_idx < len(self.estimators_) - 1:\n col_idx = X.shape[1] + chain_idx\n cv_result = cross_val_predict(\n self.base_estimator, X_aug[:, :col_idx],\n y=y, cv=self.cv)\n if sp.issparse(X_aug):\n X_aug[:, col_idx] = np.expand_dims(cv_result, 1)\n else:\n X_aug[:, col_idx] = cv_result\n\n return self\n\n def predict(self, X):\n \"\"\"Predict on the data matrix X using the ClassifierChain model.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\n Returns\n -------\n Y_pred : array-like of shape (n_samples, n_classes)\n The predicted values.\n\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, accept_sparse=True)\n Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_)))\n for chain_idx, estimator in enumerate(self.estimators_):\n previous_predictions = Y_pred_chain[:, :chain_idx]\n if sp.issparse(X):\n if chain_idx == 0:\n X_aug = X\n else:\n X_aug = sp.hstack((X, previous_predictions))\n else:\n X_aug = np.hstack((X, previous_predictions))\n Y_pred_chain[:, chain_idx] = estimator.predict(X_aug)\n\n inv_order = np.empty_like(self.order_)\n inv_order[self.order_] = np.arange(len(self.order_))\n Y_pred = Y_pred_chain[:, inv_order]\n\n return Y_pred", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.multioutput/_MultiOutputEstimator", + "name": "_MultiOutputEstimator", + "qname": "sklearn.multioutput._MultiOutputEstimator", + "decorators": [], + "superclasses": ["MetaEstimatorMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/__init__", + "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/partial_fit", + "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/fit", + "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/predict", + "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class _MultiOutputEstimator(MetaEstimatorMixin,\n BaseEstimator,\n metaclass=ABCMeta):\n @abstractmethod\n @_deprecate_positional_args\n def __init__(self, estimator, *, n_jobs=None):\n self.estimator = estimator\n self.n_jobs = n_jobs\n\n @if_delegate_has_method('estimator')\n def partial_fit(self, X, y, classes=None, sample_weight=None):\n \"\"\"Incrementally fit the model to data.\n Fit a separate model for each output variable.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\n y : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n Multi-output targets.\n\n classes : list of ndarray of shape (n_outputs,)\n Each array is unique classes for one output in str/int\n Can be obtained by via\n ``[np.unique(y[:, i]) for i in range(y.shape[1])]``, where y is the\n target matrix of the entire dataset.\n This argument is required for the first call to partial_fit\n and can be omitted in the subsequent calls.\n Note that y doesn't need to contain all labels in `classes`.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Only supported if the underlying regressor supports sample\n weights.\n\n Returns\n -------\n self : object\n \"\"\"\n X, y = check_X_y(X, y,\n force_all_finite=False,\n multi_output=True,\n accept_sparse=True)\n\n if y.ndim == 1:\n raise ValueError(\"y must have at least two dimensions for \"\n \"multi-output regression but has only one.\")\n\n if (sample_weight is not None and\n not has_fit_parameter(self.estimator, 'sample_weight')):\n raise ValueError(\"Underlying estimator does not support\"\n \" sample weights.\")\n\n first_time = not hasattr(self, 'estimators_')\n\n self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n delayed(_partial_fit_estimator)(\n self.estimators_[i] if not first_time else self.estimator,\n X, y[:, i],\n classes[i] if classes is not None else None,\n sample_weight, first_time) for i in range(y.shape[1]))\n return self\n\n def fit(self, X, y, sample_weight=None, **fit_params):\n \"\"\" Fit the model to data.\n Fit a separate model for each output variable.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\n y : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n Multi-output targets. An indicator matrix turns on multilabel\n estimation.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Only supported if the underlying regressor supports sample\n weights.\n\n **fit_params : dict of string -> object\n Parameters passed to the ``estimator.fit`` method of each step.\n\n .. versionadded:: 0.23\n\n Returns\n -------\n self : object\n \"\"\"\n\n if not hasattr(self.estimator, \"fit\"):\n raise ValueError(\"The base estimator should implement\"\n \" a fit method\")\n\n X, y = self._validate_data(X, y,\n force_all_finite=False,\n multi_output=True, accept_sparse=True)\n\n if is_classifier(self):\n check_classification_targets(y)\n\n if y.ndim == 1:\n raise ValueError(\"y must have at least two dimensions for \"\n \"multi-output regression but has only one.\")\n\n if (sample_weight is not None and\n not has_fit_parameter(self.estimator, 'sample_weight')):\n raise ValueError(\"Underlying estimator does not support\"\n \" sample weights.\")\n\n fit_params_validated = _check_fit_params(X, fit_params)\n\n self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n delayed(_fit_estimator)(\n self.estimator, X, y[:, i], sample_weight,\n **fit_params_validated)\n for i in range(y.shape[1]))\n return self\n\n def predict(self, X):\n \"\"\"Predict multi-output variable using a model\n trained for each target variable.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\n Returns\n -------\n y : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n Multi-output targets predicted across multiple predictors.\n Note: Separate models are generated for each predictor.\n \"\"\"\n check_is_fitted(self)\n if not hasattr(self.estimators_[0], \"predict\"):\n raise ValueError(\"The base estimator should implement\"\n \" a predict method\")\n\n X = check_array(X, force_all_finite=False, accept_sparse=True)\n\n y = Parallel(n_jobs=self.n_jobs)(\n delayed(e.predict)(X)\n for e in self.estimators_)\n\n return np.asarray(y).T\n\n def _more_tags(self):\n return {'multioutput_only': True}", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.naive_bayes/BernoulliNB", + "name": "BernoulliNB", + "qname": "sklearn.naive_bayes.BernoulliNB", + "decorators": [], + "superclasses": ["_BaseDiscreteNB"], + "methods": [ + "scikit-learn/sklearn.naive_bayes/BernoulliNB/__init__", + "scikit-learn/sklearn.naive_bayes/BernoulliNB/_check_X", + "scikit-learn/sklearn.naive_bayes/BernoulliNB/_check_X_y", + "scikit-learn/sklearn.naive_bayes/BernoulliNB/_count", + "scikit-learn/sklearn.naive_bayes/BernoulliNB/_update_feature_log_prob", + "scikit-learn/sklearn.naive_bayes/BernoulliNB/_joint_log_likelihood" + ], + "is_public": true, + "reexported_by": [], + "description": "Naive Bayes classifier for multivariate Bernoulli models.\n\nLike MultinomialNB, this classifier is suitable for discrete data. The\ndifference is that while MultinomialNB works with occurrence counts,\nBernoulliNB is designed for binary/boolean features.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Naive Bayes classifier for multivariate Bernoulli models.\n\nLike MultinomialNB, this classifier is suitable for discrete data. The\ndifference is that while MultinomialNB works with occurrence counts,\nBernoulliNB is designed for binary/boolean features.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Additive (Laplace/Lidstone) smoothing parameter\n (0 for no smoothing).\n\nbinarize : float or None, default=0.0\n Threshold for binarizing (mapping to booleans) of sample features.\n If None, input is presumed to already consist of binary vectors.\n\nfit_prior : bool, default=True\n Whether to learn class prior probabilities or not.\n If false, a uniform prior will be used.\n\nclass_prior : array-like of shape (n_classes,), default=None\n Prior probabilities of the classes. If specified the priors are not\n adjusted according to the data.\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes)\n Number of samples encountered for each class during fitting. This\n value is weighted by the sample weight when provided.\n\nclass_log_prior_ : ndarray of shape (n_classes)\n Log probability of each class (smoothed).\n\nclasses_ : ndarray of shape (n_classes,)\n Class labels known to the classifier\n\ncoef_ : ndarray of shape (n_classes, n_features)\n Mirrors ``feature_log_prob_`` for interpreting `BernoulliNB`\n as a linear model.\n\nfeature_count_ : ndarray of shape (n_classes, n_features)\n Number of samples encountered for each (class, feature)\n during fitting. This value is weighted by the sample weight when\n provided.\n\nfeature_log_prob_ : ndarray of shape (n_classes, n_features)\n Empirical log probability of features given a class, P(x_i|y).\n\nintercept_ : ndarray of shape (n_classes,)\n Mirrors ``class_log_prior_`` for interpreting `BernoulliNB`\n as a linear model.\n\nn_features_ : int\n Number of features of each sample.\n\nExamples\n--------\n>>> import numpy as np\n>>> rng = np.random.RandomState(1)\n>>> X = rng.randint(5, size=(6, 100))\n>>> Y = np.array([1, 2, 3, 4, 4, 5])\n>>> from sklearn.naive_bayes import BernoulliNB\n>>> clf = BernoulliNB()\n>>> clf.fit(X, Y)\nBernoulliNB()\n>>> print(clf.predict(X[2:3]))\n[3]\n\nReferences\n----------\nC.D. Manning, P. Raghavan and H. Schuetze (2008). Introduction to\nInformation Retrieval. Cambridge University Press, pp. 234-265.\nhttps://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html\n\nA. McCallum and K. Nigam (1998). A comparison of event models for naive\nBayes text classification. Proc. AAAI/ICML-98 Workshop on Learning for\nText Categorization, pp. 41-48.\n\nV. Metsis, I. Androutsopoulos and G. Paliouras (2006). Spam filtering with\nnaive Bayes -- Which naive Bayes? 3rd Conf. on Email and Anti-Spam (CEAS).", + "code": "class BernoulliNB(_BaseDiscreteNB):\n \"\"\"Naive Bayes classifier for multivariate Bernoulli models.\n\n Like MultinomialNB, this classifier is suitable for discrete data. The\n difference is that while MultinomialNB works with occurrence counts,\n BernoulliNB is designed for binary/boolean features.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n alpha : float, default=1.0\n Additive (Laplace/Lidstone) smoothing parameter\n (0 for no smoothing).\n\n binarize : float or None, default=0.0\n Threshold for binarizing (mapping to booleans) of sample features.\n If None, input is presumed to already consist of binary vectors.\n\n fit_prior : bool, default=True\n Whether to learn class prior probabilities or not.\n If false, a uniform prior will be used.\n\n class_prior : array-like of shape (n_classes,), default=None\n Prior probabilities of the classes. If specified the priors are not\n adjusted according to the data.\n\n Attributes\n ----------\n class_count_ : ndarray of shape (n_classes)\n Number of samples encountered for each class during fitting. This\n value is weighted by the sample weight when provided.\n\n class_log_prior_ : ndarray of shape (n_classes)\n Log probability of each class (smoothed).\n\n classes_ : ndarray of shape (n_classes,)\n Class labels known to the classifier\n\n coef_ : ndarray of shape (n_classes, n_features)\n Mirrors ``feature_log_prob_`` for interpreting `BernoulliNB`\n as a linear model.\n\n feature_count_ : ndarray of shape (n_classes, n_features)\n Number of samples encountered for each (class, feature)\n during fitting. This value is weighted by the sample weight when\n provided.\n\n feature_log_prob_ : ndarray of shape (n_classes, n_features)\n Empirical log probability of features given a class, P(x_i|y).\n\n intercept_ : ndarray of shape (n_classes,)\n Mirrors ``class_log_prior_`` for interpreting `BernoulliNB`\n as a linear model.\n\n n_features_ : int\n Number of features of each sample.\n\n Examples\n --------\n >>> import numpy as np\n >>> rng = np.random.RandomState(1)\n >>> X = rng.randint(5, size=(6, 100))\n >>> Y = np.array([1, 2, 3, 4, 4, 5])\n >>> from sklearn.naive_bayes import BernoulliNB\n >>> clf = BernoulliNB()\n >>> clf.fit(X, Y)\n BernoulliNB()\n >>> print(clf.predict(X[2:3]))\n [3]\n\n References\n ----------\n C.D. Manning, P. Raghavan and H. Schuetze (2008). Introduction to\n Information Retrieval. Cambridge University Press, pp. 234-265.\n https://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html\n\n A. McCallum and K. Nigam (1998). A comparison of event models for naive\n Bayes text classification. Proc. AAAI/ICML-98 Workshop on Learning for\n Text Categorization, pp. 41-48.\n\n V. Metsis, I. Androutsopoulos and G. Paliouras (2006). Spam filtering with\n naive Bayes -- Which naive Bayes? 3rd Conf. on Email and Anti-Spam (CEAS).\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, *, alpha=1.0, binarize=.0, fit_prior=True,\n class_prior=None):\n self.alpha = alpha\n self.binarize = binarize\n self.fit_prior = fit_prior\n self.class_prior = class_prior\n\n def _check_X(self, X):\n X = super()._check_X(X)\n if self.binarize is not None:\n X = binarize(X, threshold=self.binarize)\n return X\n\n def _check_X_y(self, X, y):\n X, y = super()._check_X_y(X, y)\n if self.binarize is not None:\n X = binarize(X, threshold=self.binarize)\n return X, y\n\n def _count(self, X, Y):\n \"\"\"Count and smooth feature occurrences.\"\"\"\n self.feature_count_ += safe_sparse_dot(Y.T, X)\n self.class_count_ += Y.sum(axis=0)\n\n def _update_feature_log_prob(self, alpha):\n \"\"\"Apply smoothing to raw counts and recompute log probabilities\"\"\"\n smoothed_fc = self.feature_count_ + alpha\n smoothed_cc = self.class_count_ + alpha * 2\n\n self.feature_log_prob_ = (np.log(smoothed_fc) -\n np.log(smoothed_cc.reshape(-1, 1)))\n\n def _joint_log_likelihood(self, X):\n \"\"\"Calculate the posterior log probability of the samples X\"\"\"\n n_classes, n_features = self.feature_log_prob_.shape\n n_samples, n_features_X = X.shape\n\n if n_features_X != n_features:\n raise ValueError(\"Expected input with %d features, got %d instead\"\n % (n_features, n_features_X))\n\n neg_prob = np.log(1 - np.exp(self.feature_log_prob_))\n # Compute neg_prob \u00b7 (1 - X).T as \u2211neg_prob - X \u00b7 neg_prob\n jll = safe_sparse_dot(X, (self.feature_log_prob_ - neg_prob).T)\n jll += self.class_log_prior_ + neg_prob.sum(axis=1)\n\n return jll", + "instance_attributes": [ + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "binarize", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "fit_prior", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB", + "name": "CategoricalNB", + "qname": "sklearn.naive_bayes.CategoricalNB", + "decorators": [], + "superclasses": ["_BaseDiscreteNB"], + "methods": [ + "scikit-learn/sklearn.naive_bayes/CategoricalNB/__init__", + "scikit-learn/sklearn.naive_bayes/CategoricalNB/fit", + "scikit-learn/sklearn.naive_bayes/CategoricalNB/partial_fit", + "scikit-learn/sklearn.naive_bayes/CategoricalNB/_more_tags", + "scikit-learn/sklearn.naive_bayes/CategoricalNB/_check_X", + "scikit-learn/sklearn.naive_bayes/CategoricalNB/_check_X_y", + "scikit-learn/sklearn.naive_bayes/CategoricalNB/_init_counters", + "scikit-learn/sklearn.naive_bayes/CategoricalNB/_validate_n_categories", + "scikit-learn/sklearn.naive_bayes/CategoricalNB/_count", + "scikit-learn/sklearn.naive_bayes/CategoricalNB/_update_feature_log_prob", + "scikit-learn/sklearn.naive_bayes/CategoricalNB/_joint_log_likelihood" + ], + "is_public": true, + "reexported_by": [], + "description": "Naive Bayes classifier for categorical features\n\nThe categorical Naive Bayes classifier is suitable for classification with\ndiscrete features that are categorically distributed. The categories of\neach feature are drawn from a categorical distribution.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Naive Bayes classifier for categorical features\n\nThe categorical Naive Bayes classifier is suitable for classification with\ndiscrete features that are categorically distributed. The categories of\neach feature are drawn from a categorical distribution.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Additive (Laplace/Lidstone) smoothing parameter\n (0 for no smoothing).\n\nfit_prior : bool, default=True\n Whether to learn class prior probabilities or not.\n If false, a uniform prior will be used.\n\nclass_prior : array-like of shape (n_classes,), default=None\n Prior probabilities of the classes. If specified the priors are not\n adjusted according to the data.\n\nmin_categories : int or array-like of shape (n_features,), default=None\n Minimum number of categories per feature.\n\n - integer: Sets the minimum number of categories per feature to\n `n_categories` for each features.\n - array-like: shape (n_features,) where `n_categories[i]` holds the\n minimum number of categories for the ith column of the input.\n - None (default): Determines the number of categories automatically\n from the training data.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncategory_count_ : list of arrays of shape (n_features,)\n Holds arrays of shape (n_classes, n_categories of respective feature)\n for each feature. Each array provides the number of samples\n encountered for each class and category of the specific feature.\n\nclass_count_ : ndarray of shape (n_classes,)\n Number of samples encountered for each class during fitting. This\n value is weighted by the sample weight when provided.\n\nclass_log_prior_ : ndarray of shape (n_classes,)\n Smoothed empirical log probability for each class.\n\nclasses_ : ndarray of shape (n_classes,)\n Class labels known to the classifier\n\nfeature_log_prob_ : list of arrays of shape (n_features,)\n Holds arrays of shape (n_classes, n_categories of respective feature)\n for each feature. Each array provides the empirical log probability\n of categories given the respective feature and class, ``P(x_i|y)``.\n\nn_features_ : int\n Number of features of each sample.\n\nn_categories_ : ndarray of shape (n_features,), dtype=np.int64\n Number of categories for each feature. This value is\n inferred from the data or set by the minimum number of categories.\n\n .. versionadded:: 0.24\n\nExamples\n--------\n>>> import numpy as np\n>>> rng = np.random.RandomState(1)\n>>> X = rng.randint(5, size=(6, 100))\n>>> y = np.array([1, 2, 3, 4, 5, 6])\n>>> from sklearn.naive_bayes import CategoricalNB\n>>> clf = CategoricalNB()\n>>> clf.fit(X, y)\nCategoricalNB()\n>>> print(clf.predict(X[2:3]))\n[3]", + "code": "class CategoricalNB(_BaseDiscreteNB):\n \"\"\"Naive Bayes classifier for categorical features\n\n The categorical Naive Bayes classifier is suitable for classification with\n discrete features that are categorically distributed. The categories of\n each feature are drawn from a categorical distribution.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n alpha : float, default=1.0\n Additive (Laplace/Lidstone) smoothing parameter\n (0 for no smoothing).\n\n fit_prior : bool, default=True\n Whether to learn class prior probabilities or not.\n If false, a uniform prior will be used.\n\n class_prior : array-like of shape (n_classes,), default=None\n Prior probabilities of the classes. If specified the priors are not\n adjusted according to the data.\n\n min_categories : int or array-like of shape (n_features,), default=None\n Minimum number of categories per feature.\n\n - integer: Sets the minimum number of categories per feature to\n `n_categories` for each features.\n - array-like: shape (n_features,) where `n_categories[i]` holds the\n minimum number of categories for the ith column of the input.\n - None (default): Determines the number of categories automatically\n from the training data.\n\n .. versionadded:: 0.24\n\n Attributes\n ----------\n category_count_ : list of arrays of shape (n_features,)\n Holds arrays of shape (n_classes, n_categories of respective feature)\n for each feature. Each array provides the number of samples\n encountered for each class and category of the specific feature.\n\n class_count_ : ndarray of shape (n_classes,)\n Number of samples encountered for each class during fitting. This\n value is weighted by the sample weight when provided.\n\n class_log_prior_ : ndarray of shape (n_classes,)\n Smoothed empirical log probability for each class.\n\n classes_ : ndarray of shape (n_classes,)\n Class labels known to the classifier\n\n feature_log_prob_ : list of arrays of shape (n_features,)\n Holds arrays of shape (n_classes, n_categories of respective feature)\n for each feature. Each array provides the empirical log probability\n of categories given the respective feature and class, ``P(x_i|y)``.\n\n n_features_ : int\n Number of features of each sample.\n\n n_categories_ : ndarray of shape (n_features,), dtype=np.int64\n Number of categories for each feature. This value is\n inferred from the data or set by the minimum number of categories.\n\n .. versionadded:: 0.24\n\n Examples\n --------\n >>> import numpy as np\n >>> rng = np.random.RandomState(1)\n >>> X = rng.randint(5, size=(6, 100))\n >>> y = np.array([1, 2, 3, 4, 5, 6])\n >>> from sklearn.naive_bayes import CategoricalNB\n >>> clf = CategoricalNB()\n >>> clf.fit(X, y)\n CategoricalNB()\n >>> print(clf.predict(X[2:3]))\n [3]\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, *, alpha=1.0, fit_prior=True, class_prior=None,\n min_categories=None):\n self.alpha = alpha\n self.fit_prior = fit_prior\n self.class_prior = class_prior\n self.min_categories = min_categories\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit Naive Bayes classifier according to X, y\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features. Here, each feature of X is\n assumed to be from a different categorical distribution.\n It is further assumed that all categories of each feature are\n represented by the numbers 0, ..., n - 1, where n refers to the\n total number of categories for the given feature. This can, for\n instance, be achieved with the help of OrdinalEncoder.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n sample_weight : array-like of shape (n_samples), default=None\n Weights applied to individual samples (1. for unweighted).\n\n Returns\n -------\n self : object\n \"\"\"\n return super().fit(X, y, sample_weight=sample_weight)\n\n def partial_fit(self, X, y, classes=None, sample_weight=None):\n \"\"\"Incremental fit on a batch of samples.\n\n This method is expected to be called several times consecutively\n on different chunks of a dataset so as to implement out-of-core\n or online learning.\n\n This is especially useful when the whole dataset is too big to fit in\n memory at once.\n\n This method has some performance overhead hence it is better to call\n partial_fit on chunks of data that are as large as possible\n (as long as fitting in the memory budget) to hide the overhead.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features. Here, each feature of X is\n assumed to be from a different categorical distribution.\n It is further assumed that all categories of each feature are\n represented by the numbers 0, ..., n - 1, where n refers to the\n total number of categories for the given feature. This can, for\n instance, be achieved with the help of OrdinalEncoder.\n\n y : array-like of shape (n_samples)\n Target values.\n\n classes : array-like of shape (n_classes), default=None\n List of all the classes that can possibly appear in the y vector.\n\n Must be provided at the first call to partial_fit, can be omitted\n in subsequent calls.\n\n sample_weight : array-like of shape (n_samples), default=None\n Weights applied to individual samples (1. for unweighted).\n\n Returns\n -------\n self : object\n \"\"\"\n return super().partial_fit(X, y, classes,\n sample_weight=sample_weight)\n\n def _more_tags(self):\n return {'requires_positive_X': True}\n\n def _check_X(self, X):\n X = check_array(X, dtype='int', accept_sparse=False,\n force_all_finite=True)\n check_non_negative(X, \"CategoricalNB (input X)\")\n return X\n\n def _check_X_y(self, X, y):\n X, y = self._validate_data(X, y, dtype='int', accept_sparse=False,\n force_all_finite=True)\n check_non_negative(X, \"CategoricalNB (input X)\")\n return X, y\n\n def _init_counters(self, n_effective_classes, n_features):\n self.class_count_ = np.zeros(n_effective_classes, dtype=np.float64)\n self.category_count_ = [np.zeros((n_effective_classes, 0))\n for _ in range(n_features)]\n\n @staticmethod\n def _validate_n_categories(X, min_categories):\n # rely on max for n_categories categories are encoded between 0...n-1\n n_categories_X = X.max(axis=0) + 1\n min_categories_ = np.array(min_categories)\n if min_categories is not None:\n if not np.issubdtype(min_categories_.dtype, np.signedinteger):\n raise ValueError(\n f\"'min_categories' should have integral type. Got \"\n f\"{min_categories_.dtype} instead.\"\n )\n n_categories_ = np.maximum(n_categories_X,\n min_categories_,\n dtype=np.int64)\n if n_categories_.shape != n_categories_X.shape:\n raise ValueError(\n f\"'min_categories' should have shape ({X.shape[1]},\"\n f\") when an array-like is provided. Got\"\n f\" {min_categories_.shape} instead.\"\n )\n return n_categories_\n else:\n return n_categories_X\n\n def _count(self, X, Y):\n def _update_cat_count_dims(cat_count, highest_feature):\n diff = highest_feature + 1 - cat_count.shape[1]\n if diff > 0:\n # we append a column full of zeros for each new category\n return np.pad(cat_count, [(0, 0), (0, diff)], 'constant')\n return cat_count\n\n def _update_cat_count(X_feature, Y, cat_count, n_classes):\n for j in range(n_classes):\n mask = Y[:, j].astype(bool)\n if Y.dtype.type == np.int64:\n weights = None\n else:\n weights = Y[mask, j]\n counts = np.bincount(X_feature[mask], weights=weights)\n indices = np.nonzero(counts)[0]\n cat_count[j, indices] += counts[indices]\n\n self.class_count_ += Y.sum(axis=0)\n self.n_categories_ = self._validate_n_categories(\n X, self.min_categories)\n for i in range(self.n_features_):\n X_feature = X[:, i]\n self.category_count_[i] = _update_cat_count_dims(\n self.category_count_[i], self.n_categories_[i] - 1)\n _update_cat_count(X_feature, Y,\n self.category_count_[i],\n self.class_count_.shape[0])\n\n def _update_feature_log_prob(self, alpha):\n feature_log_prob = []\n for i in range(self.n_features_):\n smoothed_cat_count = self.category_count_[i] + alpha\n smoothed_class_count = smoothed_cat_count.sum(axis=1)\n feature_log_prob.append(\n np.log(smoothed_cat_count) -\n np.log(smoothed_class_count.reshape(-1, 1)))\n self.feature_log_prob_ = feature_log_prob\n\n def _joint_log_likelihood(self, X):\n if not X.shape[1] == self.n_features_:\n raise ValueError(\"Expected input with %d features, got %d instead\"\n % (self.n_features_, X.shape[1]))\n jll = np.zeros((X.shape[0], self.class_count_.shape[0]))\n for i in range(self.n_features_):\n indices = X[:, i]\n jll += self.feature_log_prob_[i][:, indices].T\n total_ll = jll + self.class_log_prior_\n return total_ll", + "instance_attributes": [ + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "fit_prior", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "class_count_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "feature_log_prob_", + "types": { + "kind": "NamedType", + "name": "list" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.naive_bayes/ComplementNB", + "name": "ComplementNB", + "qname": "sklearn.naive_bayes.ComplementNB", + "decorators": [], + "superclasses": ["_BaseDiscreteNB"], + "methods": [ + "scikit-learn/sklearn.naive_bayes/ComplementNB/__init__", + "scikit-learn/sklearn.naive_bayes/ComplementNB/_more_tags", + "scikit-learn/sklearn.naive_bayes/ComplementNB/_count", + "scikit-learn/sklearn.naive_bayes/ComplementNB/_update_feature_log_prob", + "scikit-learn/sklearn.naive_bayes/ComplementNB/_joint_log_likelihood" + ], + "is_public": true, + "reexported_by": [], + "description": "The Complement Naive Bayes classifier described in Rennie et al. (2003).\n\nThe Complement Naive Bayes classifier was designed to correct the \"severe\nassumptions\" made by the standard Multinomial Naive Bayes classifier. It is\nparticularly suited for imbalanced data sets.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20", + "docstring": "The Complement Naive Bayes classifier described in Rennie et al. (2003).\n\nThe Complement Naive Bayes classifier was designed to correct the \"severe\nassumptions\" made by the standard Multinomial Naive Bayes classifier. It is\nparticularly suited for imbalanced data sets.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nalpha : float, default=1.0\n Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).\n\nfit_prior : bool, default=True\n Only used in edge case with a single class in the training set.\n\nclass_prior : array-like of shape (n_classes,), default=None\n Prior probabilities of the classes. Not used.\n\nnorm : bool, default=False\n Whether or not a second normalization of the weights is performed. The\n default behavior mirrors the implementations found in Mahout and Weka,\n which do not follow the full algorithm described in Table 9 of the\n paper.\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes,)\n Number of samples encountered for each class during fitting. This\n value is weighted by the sample weight when provided.\n\nclass_log_prior_ : ndarray of shape (n_classes,)\n Smoothed empirical log probability for each class. Only used in edge\n case with a single class in the training set.\n\nclasses_ : ndarray of shape (n_classes,)\n Class labels known to the classifier\n\ncoef_ : ndarray of shape (n_classes, n_features)\n Mirrors ``feature_log_prob_`` for interpreting `ComplementNB`\n as a linear model.\n\n .. deprecated:: 0.24\n ``coef_`` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26).\n\nfeature_all_ : ndarray of shape (n_features,)\n Number of samples encountered for each feature during fitting. This\n value is weighted by the sample weight when provided.\n\nfeature_count_ : ndarray of shape (n_classes, n_features)\n Number of samples encountered for each (class, feature) during fitting.\n This value is weighted by the sample weight when provided.\n\nfeature_log_prob_ : ndarray of shape (n_classes, n_features)\n Empirical weights for class complements.\n\nintercept_ : ndarray of shape (n_classes,)\n Mirrors ``class_log_prior_`` for interpreting `ComplementNB`\n as a linear model.\n\n .. deprecated:: 0.24\n ``coef_`` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26).\n\nn_features_ : int\n Number of features of each sample.\n\nExamples\n--------\n>>> import numpy as np\n>>> rng = np.random.RandomState(1)\n>>> X = rng.randint(5, size=(6, 100))\n>>> y = np.array([1, 2, 3, 4, 5, 6])\n>>> from sklearn.naive_bayes import ComplementNB\n>>> clf = ComplementNB()\n>>> clf.fit(X, y)\nComplementNB()\n>>> print(clf.predict(X[2:3]))\n[3]\n\nReferences\n----------\nRennie, J. D., Shih, L., Teevan, J., & Karger, D. R. (2003).\nTackling the poor assumptions of naive bayes text classifiers. In ICML\n(Vol. 3, pp. 616-623).\nhttps://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf", + "code": "class ComplementNB(_BaseDiscreteNB):\n \"\"\"The Complement Naive Bayes classifier described in Rennie et al. (2003).\n\n The Complement Naive Bayes classifier was designed to correct the \"severe\n assumptions\" made by the standard Multinomial Naive Bayes classifier. It is\n particularly suited for imbalanced data sets.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.20\n\n Parameters\n ----------\n alpha : float, default=1.0\n Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).\n\n fit_prior : bool, default=True\n Only used in edge case with a single class in the training set.\n\n class_prior : array-like of shape (n_classes,), default=None\n Prior probabilities of the classes. Not used.\n\n norm : bool, default=False\n Whether or not a second normalization of the weights is performed. The\n default behavior mirrors the implementations found in Mahout and Weka,\n which do not follow the full algorithm described in Table 9 of the\n paper.\n\n Attributes\n ----------\n class_count_ : ndarray of shape (n_classes,)\n Number of samples encountered for each class during fitting. This\n value is weighted by the sample weight when provided.\n\n class_log_prior_ : ndarray of shape (n_classes,)\n Smoothed empirical log probability for each class. Only used in edge\n case with a single class in the training set.\n\n classes_ : ndarray of shape (n_classes,)\n Class labels known to the classifier\n\n coef_ : ndarray of shape (n_classes, n_features)\n Mirrors ``feature_log_prob_`` for interpreting `ComplementNB`\n as a linear model.\n\n .. deprecated:: 0.24\n ``coef_`` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26).\n\n feature_all_ : ndarray of shape (n_features,)\n Number of samples encountered for each feature during fitting. This\n value is weighted by the sample weight when provided.\n\n feature_count_ : ndarray of shape (n_classes, n_features)\n Number of samples encountered for each (class, feature) during fitting.\n This value is weighted by the sample weight when provided.\n\n feature_log_prob_ : ndarray of shape (n_classes, n_features)\n Empirical weights for class complements.\n\n intercept_ : ndarray of shape (n_classes,)\n Mirrors ``class_log_prior_`` for interpreting `ComplementNB`\n as a linear model.\n\n .. deprecated:: 0.24\n ``coef_`` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26).\n\n n_features_ : int\n Number of features of each sample.\n\n Examples\n --------\n >>> import numpy as np\n >>> rng = np.random.RandomState(1)\n >>> X = rng.randint(5, size=(6, 100))\n >>> y = np.array([1, 2, 3, 4, 5, 6])\n >>> from sklearn.naive_bayes import ComplementNB\n >>> clf = ComplementNB()\n >>> clf.fit(X, y)\n ComplementNB()\n >>> print(clf.predict(X[2:3]))\n [3]\n\n References\n ----------\n Rennie, J. D., Shih, L., Teevan, J., & Karger, D. R. (2003).\n Tackling the poor assumptions of naive bayes text classifiers. In ICML\n (Vol. 3, pp. 616-623).\n https://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, *, alpha=1.0, fit_prior=True, class_prior=None,\n norm=False):\n self.alpha = alpha\n self.fit_prior = fit_prior\n self.class_prior = class_prior\n self.norm = norm\n\n def _more_tags(self):\n return {'requires_positive_X': True}\n\n def _count(self, X, Y):\n \"\"\"Count feature occurrences.\"\"\"\n check_non_negative(X, \"ComplementNB (input X)\")\n self.feature_count_ += safe_sparse_dot(Y.T, X)\n self.class_count_ += Y.sum(axis=0)\n self.feature_all_ = self.feature_count_.sum(axis=0)\n\n def _update_feature_log_prob(self, alpha):\n \"\"\"Apply smoothing to raw counts and compute the weights.\"\"\"\n comp_count = self.feature_all_ + alpha - self.feature_count_\n logged = np.log(comp_count / comp_count.sum(axis=1, keepdims=True))\n # _BaseNB.predict uses argmax, but ComplementNB operates with argmin.\n if self.norm:\n summed = logged.sum(axis=1, keepdims=True)\n feature_log_prob = logged / summed\n else:\n feature_log_prob = -logged\n self.feature_log_prob_ = feature_log_prob\n\n def _joint_log_likelihood(self, X):\n \"\"\"Calculate the class scores for the samples in X.\"\"\"\n jll = safe_sparse_dot(X, self.feature_log_prob_.T)\n if len(self.classes_) == 1:\n jll += self.class_log_prior_\n return jll", + "instance_attributes": [ + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "fit_prior", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "norm", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB", + "name": "GaussianNB", + "qname": "sklearn.naive_bayes.GaussianNB", + "decorators": [], + "superclasses": ["_BaseNB"], + "methods": [ + "scikit-learn/sklearn.naive_bayes/GaussianNB/__init__", + "scikit-learn/sklearn.naive_bayes/GaussianNB/fit", + "scikit-learn/sklearn.naive_bayes/GaussianNB/_check_X", + "scikit-learn/sklearn.naive_bayes/GaussianNB/_update_mean_variance", + "scikit-learn/sklearn.naive_bayes/GaussianNB/partial_fit", + "scikit-learn/sklearn.naive_bayes/GaussianNB/_partial_fit", + "scikit-learn/sklearn.naive_bayes/GaussianNB/_joint_log_likelihood" + ], + "is_public": true, + "reexported_by": [], + "description": "Gaussian Naive Bayes (GaussianNB)\n\nCan perform online updates to model parameters via :meth:`partial_fit`.\nFor details on algorithm used to update feature means and variance online,\nsee Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\n http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\nRead more in the :ref:`User Guide `.", + "docstring": "Gaussian Naive Bayes (GaussianNB)\n\nCan perform online updates to model parameters via :meth:`partial_fit`.\nFor details on algorithm used to update feature means and variance online,\nsee Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\n http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\npriors : array-like of shape (n_classes,)\n Prior probabilities of the classes. If specified the priors are not\n adjusted according to the data.\n\nvar_smoothing : float, default=1e-9\n Portion of the largest variance of all features that is added to\n variances for calculation stability.\n\n .. versionadded:: 0.20\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes,)\n number of training samples observed in each class.\n\nclass_prior_ : ndarray of shape (n_classes,)\n probability of each class.\n\nclasses_ : ndarray of shape (n_classes,)\n class labels known to the classifier\n\nepsilon_ : float\n absolute additive value to variances\n\nsigma_ : ndarray of shape (n_classes, n_features)\n variance of each feature per class\n\ntheta_ : ndarray of shape (n_classes, n_features)\n mean of each feature per class\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> Y = np.array([1, 1, 1, 2, 2, 2])\n>>> from sklearn.naive_bayes import GaussianNB\n>>> clf = GaussianNB()\n>>> clf.fit(X, Y)\nGaussianNB()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]\n>>> clf_pf = GaussianNB()\n>>> clf_pf.partial_fit(X, Y, np.unique(Y))\nGaussianNB()\n>>> print(clf_pf.predict([[-0.8, -1]]))\n[1]", + "code": "class GaussianNB(_BaseNB):\n \"\"\"\n Gaussian Naive Bayes (GaussianNB)\n\n Can perform online updates to model parameters via :meth:`partial_fit`.\n For details on algorithm used to update feature means and variance online,\n see Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\n http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n priors : array-like of shape (n_classes,)\n Prior probabilities of the classes. If specified the priors are not\n adjusted according to the data.\n\n var_smoothing : float, default=1e-9\n Portion of the largest variance of all features that is added to\n variances for calculation stability.\n\n .. versionadded:: 0.20\n\n Attributes\n ----------\n class_count_ : ndarray of shape (n_classes,)\n number of training samples observed in each class.\n\n class_prior_ : ndarray of shape (n_classes,)\n probability of each class.\n\n classes_ : ndarray of shape (n_classes,)\n class labels known to the classifier\n\n epsilon_ : float\n absolute additive value to variances\n\n sigma_ : ndarray of shape (n_classes, n_features)\n variance of each feature per class\n\n theta_ : ndarray of shape (n_classes, n_features)\n mean of each feature per class\n\n Examples\n --------\n >>> import numpy as np\n >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n >>> Y = np.array([1, 1, 1, 2, 2, 2])\n >>> from sklearn.naive_bayes import GaussianNB\n >>> clf = GaussianNB()\n >>> clf.fit(X, Y)\n GaussianNB()\n >>> print(clf.predict([[-0.8, -1]]))\n [1]\n >>> clf_pf = GaussianNB()\n >>> clf_pf.partial_fit(X, Y, np.unique(Y))\n GaussianNB()\n >>> print(clf_pf.predict([[-0.8, -1]]))\n [1]\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, *, priors=None, var_smoothing=1e-9):\n self.priors = priors\n self.var_smoothing = var_smoothing\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit Gaussian Naive Bayes according to X, y\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\n .. versionadded:: 0.17\n Gaussian Naive Bayes supports fitting with *sample_weight*.\n\n Returns\n -------\n self : object\n \"\"\"\n X, y = self._validate_data(X, y)\n y = column_or_1d(y, warn=True)\n return self._partial_fit(X, y, np.unique(y), _refit=True,\n sample_weight=sample_weight)\n\n def _check_X(self, X):\n return check_array(X)\n\n @staticmethod\n def _update_mean_variance(n_past, mu, var, X, sample_weight=None):\n \"\"\"Compute online update of Gaussian mean and variance.\n\n Given starting sample count, mean, and variance, a new set of\n points X, and optionally sample weights, return the updated mean and\n variance. (NB - each dimension (column) in X is treated as independent\n -- you get variance, not covariance).\n\n Can take scalar mean and variance, or vector mean and variance to\n simultaneously update a number of independent Gaussians.\n\n See Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\n http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\n Parameters\n ----------\n n_past : int\n Number of samples represented in old mean and variance. If sample\n weights were given, this should contain the sum of sample\n weights represented in old mean and variance.\n\n mu : array-like of shape (number of Gaussians,)\n Means for Gaussians in original set.\n\n var : array-like of shape (number of Gaussians,)\n Variances for Gaussians in original set.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\n Returns\n -------\n total_mu : array-like of shape (number of Gaussians,)\n Updated mean for each Gaussian over the combined set.\n\n total_var : array-like of shape (number of Gaussians,)\n Updated variance for each Gaussian over the combined set.\n \"\"\"\n if X.shape[0] == 0:\n return mu, var\n\n # Compute (potentially weighted) mean and variance of new datapoints\n if sample_weight is not None:\n n_new = float(sample_weight.sum())\n new_mu = np.average(X, axis=0, weights=sample_weight)\n new_var = np.average((X - new_mu) ** 2, axis=0,\n weights=sample_weight)\n else:\n n_new = X.shape[0]\n new_var = np.var(X, axis=0)\n new_mu = np.mean(X, axis=0)\n\n if n_past == 0:\n return new_mu, new_var\n\n n_total = float(n_past + n_new)\n\n # Combine mean of old and new data, taking into consideration\n # (weighted) number of observations\n total_mu = (n_new * new_mu + n_past * mu) / n_total\n\n # Combine variance of old and new data, taking into consideration\n # (weighted) number of observations. This is achieved by combining\n # the sum-of-squared-differences (ssd)\n old_ssd = n_past * var\n new_ssd = n_new * new_var\n total_ssd = (old_ssd + new_ssd +\n (n_new * n_past / n_total) * (mu - new_mu) ** 2)\n total_var = total_ssd / n_total\n\n return total_mu, total_var\n\n def partial_fit(self, X, y, classes=None, sample_weight=None):\n \"\"\"Incremental fit on a batch of samples.\n\n This method is expected to be called several times consecutively\n on different chunks of a dataset so as to implement out-of-core\n or online learning.\n\n This is especially useful when the whole dataset is too big to fit in\n memory at once.\n\n This method has some performance and numerical stability overhead,\n hence it is better to call partial_fit on chunks of data that are\n as large as possible (as long as fitting in the memory budget) to\n hide the overhead.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n classes : array-like of shape (n_classes,), default=None\n List of all the classes that can possibly appear in the y vector.\n\n Must be provided at the first call to partial_fit, can be omitted\n in subsequent calls.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\n .. versionadded:: 0.17\n\n Returns\n -------\n self : object\n \"\"\"\n return self._partial_fit(X, y, classes, _refit=False,\n sample_weight=sample_weight)\n\n def _partial_fit(self, X, y, classes=None, _refit=False,\n sample_weight=None):\n \"\"\"Actual implementation of Gaussian NB fitting.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n classes : array-like of shape (n_classes,), default=None\n List of all the classes that can possibly appear in the y vector.\n\n Must be provided at the first call to partial_fit, can be omitted\n in subsequent calls.\n\n _refit : bool, default=False\n If true, act as though this were the first time we called\n _partial_fit (ie, throw away any past fitting and start over).\n\n sample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\n Returns\n -------\n self : object\n \"\"\"\n X, y = check_X_y(X, y)\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X)\n\n # If the ratio of data variance between dimensions is too small, it\n # will cause numerical errors. To address this, we artificially\n # boost the variance by epsilon, a small fraction of the standard\n # deviation of the largest dimension.\n self.epsilon_ = self.var_smoothing * np.var(X, axis=0).max()\n\n if _refit:\n self.classes_ = None\n\n if _check_partial_fit_first_call(self, classes):\n # This is the first call to partial_fit:\n # initialize various cumulative counters\n n_features = X.shape[1]\n n_classes = len(self.classes_)\n self.theta_ = np.zeros((n_classes, n_features))\n self.sigma_ = np.zeros((n_classes, n_features))\n\n self.class_count_ = np.zeros(n_classes, dtype=np.float64)\n\n # Initialise the class prior\n # Take into account the priors\n if self.priors is not None:\n priors = np.asarray(self.priors)\n # Check that the provide prior match the number of classes\n if len(priors) != n_classes:\n raise ValueError('Number of priors must match number of'\n ' classes.')\n # Check that the sum is 1\n if not np.isclose(priors.sum(), 1.0):\n raise ValueError('The sum of the priors should be 1.')\n # Check that the prior are non-negative\n if (priors < 0).any():\n raise ValueError('Priors must be non-negative.')\n self.class_prior_ = priors\n else:\n # Initialize the priors to zeros for each class\n self.class_prior_ = np.zeros(len(self.classes_),\n dtype=np.float64)\n else:\n if X.shape[1] != self.theta_.shape[1]:\n msg = \"Number of features %d does not match previous data %d.\"\n raise ValueError(msg % (X.shape[1], self.theta_.shape[1]))\n # Put epsilon back in each time\n self.sigma_[:, :] -= self.epsilon_\n\n classes = self.classes_\n\n unique_y = np.unique(y)\n unique_y_in_classes = np.in1d(unique_y, classes)\n\n if not np.all(unique_y_in_classes):\n raise ValueError(\"The target label(s) %s in y do not exist in the \"\n \"initial classes %s\" %\n (unique_y[~unique_y_in_classes], classes))\n\n for y_i in unique_y:\n i = classes.searchsorted(y_i)\n X_i = X[y == y_i, :]\n\n if sample_weight is not None:\n sw_i = sample_weight[y == y_i]\n N_i = sw_i.sum()\n else:\n sw_i = None\n N_i = X_i.shape[0]\n\n new_theta, new_sigma = self._update_mean_variance(\n self.class_count_[i], self.theta_[i, :], self.sigma_[i, :],\n X_i, sw_i)\n\n self.theta_[i, :] = new_theta\n self.sigma_[i, :] = new_sigma\n self.class_count_[i] += N_i\n\n self.sigma_[:, :] += self.epsilon_\n\n # Update if only no priors is provided\n if self.priors is None:\n # Empirical prior, with sample_weight taken into account\n self.class_prior_ = self.class_count_ / self.class_count_.sum()\n\n return self\n\n def _joint_log_likelihood(self, X):\n joint_log_likelihood = []\n for i in range(np.size(self.classes_)):\n jointi = np.log(self.class_prior_[i])\n n_ij = - 0.5 * np.sum(np.log(2. * np.pi * self.sigma_[i, :]))\n n_ij -= 0.5 * np.sum(((X - self.theta_[i, :]) ** 2) /\n (self.sigma_[i, :]), 1)\n joint_log_likelihood.append(jointi + n_ij)\n\n joint_log_likelihood = np.array(joint_log_likelihood).T\n return joint_log_likelihood", + "instance_attributes": [ + { + "name": "var_smoothing", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "theta_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "sigma_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "class_count_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.naive_bayes/MultinomialNB", + "name": "MultinomialNB", + "qname": "sklearn.naive_bayes.MultinomialNB", + "decorators": [], + "superclasses": ["_BaseDiscreteNB"], + "methods": [ + "scikit-learn/sklearn.naive_bayes/MultinomialNB/__init__", + "scikit-learn/sklearn.naive_bayes/MultinomialNB/_more_tags", + "scikit-learn/sklearn.naive_bayes/MultinomialNB/_count", + "scikit-learn/sklearn.naive_bayes/MultinomialNB/_update_feature_log_prob", + "scikit-learn/sklearn.naive_bayes/MultinomialNB/_joint_log_likelihood" + ], + "is_public": true, + "reexported_by": [], + "description": "Naive Bayes classifier for multinomial models\n\nThe multinomial Naive Bayes classifier is suitable for classification with\ndiscrete features (e.g., word counts for text classification). The\nmultinomial distribution normally requires integer feature counts. However,\nin practice, fractional counts such as tf-idf may also work.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Naive Bayes classifier for multinomial models\n\nThe multinomial Naive Bayes classifier is suitable for classification with\ndiscrete features (e.g., word counts for text classification). The\nmultinomial distribution normally requires integer feature counts. However,\nin practice, fractional counts such as tf-idf may also work.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Additive (Laplace/Lidstone) smoothing parameter\n (0 for no smoothing).\n\nfit_prior : bool, default=True\n Whether to learn class prior probabilities or not.\n If false, a uniform prior will be used.\n\nclass_prior : array-like of shape (n_classes,), default=None\n Prior probabilities of the classes. If specified the priors are not\n adjusted according to the data.\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes,)\n Number of samples encountered for each class during fitting. This\n value is weighted by the sample weight when provided.\n\nclass_log_prior_ : ndarray of shape (n_classes, )\n Smoothed empirical log probability for each class.\n\nclasses_ : ndarray of shape (n_classes,)\n Class labels known to the classifier\n\ncoef_ : ndarray of shape (n_classes, n_features)\n Mirrors ``feature_log_prob_`` for interpreting `MultinomialNB`\n as a linear model.\n\n .. deprecated:: 0.24\n ``coef_`` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26).\n\nfeature_count_ : ndarray of shape (n_classes, n_features)\n Number of samples encountered for each (class, feature)\n during fitting. This value is weighted by the sample weight when\n provided.\n\nfeature_log_prob_ : ndarray of shape (n_classes, n_features)\n Empirical log probability of features\n given a class, ``P(x_i|y)``.\n\nintercept_ : ndarray of shape (n_classes,)\n Mirrors ``class_log_prior_`` for interpreting `MultinomialNB`\n as a linear model.\n\n .. deprecated:: 0.24\n ``intercept_`` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26).\n\nn_features_ : int\n Number of features of each sample.\n\nExamples\n--------\n>>> import numpy as np\n>>> rng = np.random.RandomState(1)\n>>> X = rng.randint(5, size=(6, 100))\n>>> y = np.array([1, 2, 3, 4, 5, 6])\n>>> from sklearn.naive_bayes import MultinomialNB\n>>> clf = MultinomialNB()\n>>> clf.fit(X, y)\nMultinomialNB()\n>>> print(clf.predict(X[2:3]))\n[3]\n\nNotes\n-----\nFor the rationale behind the names `coef_` and `intercept_`, i.e.\nnaive Bayes as a linear classifier, see J. Rennie et al. (2003),\nTackling the poor assumptions of naive Bayes text classifiers, ICML.\n\nReferences\n----------\nC.D. Manning, P. Raghavan and H. Schuetze (2008). Introduction to\nInformation Retrieval. Cambridge University Press, pp. 234-265.\nhttps://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html", + "code": "class MultinomialNB(_BaseDiscreteNB):\n \"\"\"\n Naive Bayes classifier for multinomial models\n\n The multinomial Naive Bayes classifier is suitable for classification with\n discrete features (e.g., word counts for text classification). The\n multinomial distribution normally requires integer feature counts. However,\n in practice, fractional counts such as tf-idf may also work.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n alpha : float, default=1.0\n Additive (Laplace/Lidstone) smoothing parameter\n (0 for no smoothing).\n\n fit_prior : bool, default=True\n Whether to learn class prior probabilities or not.\n If false, a uniform prior will be used.\n\n class_prior : array-like of shape (n_classes,), default=None\n Prior probabilities of the classes. If specified the priors are not\n adjusted according to the data.\n\n Attributes\n ----------\n class_count_ : ndarray of shape (n_classes,)\n Number of samples encountered for each class during fitting. This\n value is weighted by the sample weight when provided.\n\n class_log_prior_ : ndarray of shape (n_classes, )\n Smoothed empirical log probability for each class.\n\n classes_ : ndarray of shape (n_classes,)\n Class labels known to the classifier\n\n coef_ : ndarray of shape (n_classes, n_features)\n Mirrors ``feature_log_prob_`` for interpreting `MultinomialNB`\n as a linear model.\n\n .. deprecated:: 0.24\n ``coef_`` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26).\n\n feature_count_ : ndarray of shape (n_classes, n_features)\n Number of samples encountered for each (class, feature)\n during fitting. This value is weighted by the sample weight when\n provided.\n\n feature_log_prob_ : ndarray of shape (n_classes, n_features)\n Empirical log probability of features\n given a class, ``P(x_i|y)``.\n\n intercept_ : ndarray of shape (n_classes,)\n Mirrors ``class_log_prior_`` for interpreting `MultinomialNB`\n as a linear model.\n\n .. deprecated:: 0.24\n ``intercept_`` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26).\n\n n_features_ : int\n Number of features of each sample.\n\n Examples\n --------\n >>> import numpy as np\n >>> rng = np.random.RandomState(1)\n >>> X = rng.randint(5, size=(6, 100))\n >>> y = np.array([1, 2, 3, 4, 5, 6])\n >>> from sklearn.naive_bayes import MultinomialNB\n >>> clf = MultinomialNB()\n >>> clf.fit(X, y)\n MultinomialNB()\n >>> print(clf.predict(X[2:3]))\n [3]\n\n Notes\n -----\n For the rationale behind the names `coef_` and `intercept_`, i.e.\n naive Bayes as a linear classifier, see J. Rennie et al. (2003),\n Tackling the poor assumptions of naive Bayes text classifiers, ICML.\n\n References\n ----------\n C.D. Manning, P. Raghavan and H. Schuetze (2008). Introduction to\n Information Retrieval. Cambridge University Press, pp. 234-265.\n https://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, *, alpha=1.0, fit_prior=True, class_prior=None):\n self.alpha = alpha\n self.fit_prior = fit_prior\n self.class_prior = class_prior\n\n def _more_tags(self):\n return {'requires_positive_X': True}\n\n def _count(self, X, Y):\n \"\"\"Count and smooth feature occurrences.\"\"\"\n check_non_negative(X, \"MultinomialNB (input X)\")\n self.feature_count_ += safe_sparse_dot(Y.T, X)\n self.class_count_ += Y.sum(axis=0)\n\n def _update_feature_log_prob(self, alpha):\n \"\"\"Apply smoothing to raw counts and recompute log probabilities\"\"\"\n smoothed_fc = self.feature_count_ + alpha\n smoothed_cc = smoothed_fc.sum(axis=1)\n\n self.feature_log_prob_ = (np.log(smoothed_fc) -\n np.log(smoothed_cc.reshape(-1, 1)))\n\n def _joint_log_likelihood(self, X):\n \"\"\"Calculate the posterior log probability of the samples X\"\"\"\n return (safe_sparse_dot(X, self.feature_log_prob_.T) +\n self.class_log_prior_)", + "instance_attributes": [ + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "fit_prior", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB", + "name": "_BaseDiscreteNB", + "qname": "sklearn.naive_bayes._BaseDiscreteNB", + "decorators": [], + "superclasses": ["_BaseNB"], + "methods": [ + "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/_check_X", + "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/_check_X_y", + "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/_update_class_log_prior", + "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/_check_alpha", + "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/partial_fit", + "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/fit", + "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/_init_counters", + "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/coef_@getter", + "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/intercept_@getter", + "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Abstract base class for naive Bayes on discrete/categorical data\n\nAny estimator based on this class should provide:\n\n__init__\n_joint_log_likelihood(X) as per _BaseNB", + "docstring": "Abstract base class for naive Bayes on discrete/categorical data\n\nAny estimator based on this class should provide:\n\n__init__\n_joint_log_likelihood(X) as per _BaseNB", + "code": "class _BaseDiscreteNB(_BaseNB):\n \"\"\"Abstract base class for naive Bayes on discrete/categorical data\n\n Any estimator based on this class should provide:\n\n __init__\n _joint_log_likelihood(X) as per _BaseNB\n \"\"\"\n\n def _check_X(self, X):\n return check_array(X, accept_sparse='csr')\n\n def _check_X_y(self, X, y):\n return self._validate_data(X, y, accept_sparse='csr')\n\n def _update_class_log_prior(self, class_prior=None):\n n_classes = len(self.classes_)\n if class_prior is not None:\n if len(class_prior) != n_classes:\n raise ValueError(\"Number of priors must match number of\"\n \" classes.\")\n self.class_log_prior_ = np.log(class_prior)\n elif self.fit_prior:\n with warnings.catch_warnings():\n # silence the warning when count is 0 because class was not yet\n # observed\n warnings.simplefilter(\"ignore\", RuntimeWarning)\n log_class_count = np.log(self.class_count_)\n\n # empirical prior, with sample_weight taken into account\n self.class_log_prior_ = (log_class_count -\n np.log(self.class_count_.sum()))\n else:\n self.class_log_prior_ = np.full(n_classes, -np.log(n_classes))\n\n def _check_alpha(self):\n if np.min(self.alpha) < 0:\n raise ValueError('Smoothing parameter alpha = %.1e. '\n 'alpha should be > 0.' % np.min(self.alpha))\n if isinstance(self.alpha, np.ndarray):\n if not self.alpha.shape[0] == self.n_features_:\n raise ValueError(\"alpha should be a scalar or a numpy array \"\n \"with shape [n_features]\")\n if np.min(self.alpha) < _ALPHA_MIN:\n warnings.warn('alpha too small will result in numeric errors, '\n 'setting alpha = %.1e' % _ALPHA_MIN)\n return np.maximum(self.alpha, _ALPHA_MIN)\n return self.alpha\n\n def partial_fit(self, X, y, classes=None, sample_weight=None):\n \"\"\"Incremental fit on a batch of samples.\n\n This method is expected to be called several times consecutively\n on different chunks of a dataset so as to implement out-of-core\n or online learning.\n\n This is especially useful when the whole dataset is too big to fit in\n memory at once.\n\n This method has some performance overhead hence it is better to call\n partial_fit on chunks of data that are as large as possible\n (as long as fitting in the memory budget) to hide the overhead.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n classes : array-like of shape (n_classes), default=None\n List of all the classes that can possibly appear in the y vector.\n\n Must be provided at the first call to partial_fit, can be omitted\n in subsequent calls.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\n Returns\n -------\n self : object\n \"\"\"\n X, y = self._check_X_y(X, y)\n _, n_features = X.shape\n\n if _check_partial_fit_first_call(self, classes):\n # This is the first call to partial_fit:\n # initialize various cumulative counters\n n_effective_classes = len(classes) if len(classes) > 1 else 2\n self._init_counters(n_effective_classes, n_features)\n self.n_features_ = n_features\n elif n_features != self.n_features_:\n msg = \"Number of features %d does not match previous data %d.\"\n raise ValueError(msg % (n_features, self.n_features_))\n\n Y = label_binarize(y, classes=self.classes_)\n if Y.shape[1] == 1:\n Y = np.concatenate((1 - Y, Y), axis=1)\n\n if X.shape[0] != Y.shape[0]:\n msg = \"X.shape[0]=%d and y.shape[0]=%d are incompatible.\"\n raise ValueError(msg % (X.shape[0], y.shape[0]))\n\n # label_binarize() returns arrays with dtype=np.int64.\n # We convert it to np.float64 to support sample_weight consistently\n Y = Y.astype(np.float64, copy=False)\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X)\n sample_weight = np.atleast_2d(sample_weight)\n Y *= sample_weight.T\n\n class_prior = self.class_prior\n\n # Count raw events from data before updating the class log prior\n # and feature log probas\n self._count(X, Y)\n\n # XXX: OPTIM: we could introduce a public finalization method to\n # be called by the user explicitly just once after several consecutive\n # calls to partial_fit and prior any call to predict[_[log_]proba]\n # to avoid computing the smooth log probas at each call to partial fit\n alpha = self._check_alpha()\n self._update_feature_log_prob(alpha)\n self._update_class_log_prior(class_prior=class_prior)\n return self\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit Naive Bayes classifier according to X, y\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\n Returns\n -------\n self : object\n \"\"\"\n X, y = self._check_X_y(X, y)\n _, n_features = X.shape\n self.n_features_ = n_features\n\n labelbin = LabelBinarizer()\n Y = labelbin.fit_transform(y)\n self.classes_ = labelbin.classes_\n if Y.shape[1] == 1:\n Y = np.concatenate((1 - Y, Y), axis=1)\n\n # LabelBinarizer().fit_transform() returns arrays with dtype=np.int64.\n # We convert it to np.float64 to support sample_weight consistently;\n # this means we also don't have to cast X to floating point\n if sample_weight is not None:\n Y = Y.astype(np.float64, copy=False)\n sample_weight = _check_sample_weight(sample_weight, X)\n sample_weight = np.atleast_2d(sample_weight)\n Y *= sample_weight.T\n\n class_prior = self.class_prior\n\n # Count raw events from data before updating the class log prior\n # and feature log probas\n n_effective_classes = Y.shape[1]\n\n self._init_counters(n_effective_classes, n_features)\n self._count(X, Y)\n alpha = self._check_alpha()\n self._update_feature_log_prob(alpha)\n self._update_class_log_prior(class_prior=class_prior)\n return self\n\n def _init_counters(self, n_effective_classes, n_features):\n self.class_count_ = np.zeros(n_effective_classes, dtype=np.float64)\n self.feature_count_ = np.zeros((n_effective_classes, n_features),\n dtype=np.float64)\n\n # mypy error: Decorated property not supported\n @deprecated(\"Attribute coef_ was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def coef_(self):\n return (self.feature_log_prob_[1:]\n if len(self.classes_) == 2 else self.feature_log_prob_)\n\n # mypy error: Decorated property not supported\n @deprecated(\"Attribute intercept_ was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def intercept_(self):\n return (self.class_log_prior_[1:]\n if len(self.classes_) == 2 else self.class_log_prior_)\n\n def _more_tags(self):\n return {'poor_score': True}", + "instance_attributes": [ + { + "name": "class_count_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "feature_count_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseNB", + "name": "_BaseNB", + "qname": "sklearn.naive_bayes._BaseNB", + "decorators": [], + "superclasses": ["ClassifierMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.naive_bayes/_BaseNB/_joint_log_likelihood", + "scikit-learn/sklearn.naive_bayes/_BaseNB/_check_X", + "scikit-learn/sklearn.naive_bayes/_BaseNB/predict", + "scikit-learn/sklearn.naive_bayes/_BaseNB/predict_log_proba", + "scikit-learn/sklearn.naive_bayes/_BaseNB/predict_proba" + ], + "is_public": false, + "reexported_by": [], + "description": "Abstract base class for naive Bayes estimators", + "docstring": "Abstract base class for naive Bayes estimators", + "code": "class _BaseNB(ClassifierMixin, BaseEstimator, metaclass=ABCMeta):\n \"\"\"Abstract base class for naive Bayes estimators\"\"\"\n\n @abstractmethod\n def _joint_log_likelihood(self, X):\n \"\"\"Compute the unnormalized posterior log probability of X\n\n I.e. ``log P(c) + log P(x|c)`` for all rows x of X, as an array-like of\n shape (n_classes, n_samples).\n\n Input is passed to _joint_log_likelihood as-is by predict,\n predict_proba and predict_log_proba.\n \"\"\"\n\n @abstractmethod\n def _check_X(self, X):\n \"\"\"To be overridden in subclasses with the actual checks.\"\"\"\n\n def predict(self, X):\n \"\"\"\n Perform classification on an array of test vectors X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n C : ndarray of shape (n_samples,)\n Predicted target values for X\n \"\"\"\n check_is_fitted(self)\n X = self._check_X(X)\n jll = self._joint_log_likelihood(X)\n return self.classes_[np.argmax(jll, axis=1)]\n\n def predict_log_proba(self, X):\n \"\"\"\n Return log-probability estimates for the test vector X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n C : array-like of shape (n_samples, n_classes)\n Returns the log-probability of the samples for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`.\n \"\"\"\n check_is_fitted(self)\n X = self._check_X(X)\n jll = self._joint_log_likelihood(X)\n # normalize by P(x) = P(f_1, ..., f_n)\n log_prob_x = logsumexp(jll, axis=1)\n return jll - np.atleast_2d(log_prob_x).T\n\n def predict_proba(self, X):\n \"\"\"\n Return probability estimates for the test vector X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n C : array-like of shape (n_samples, n_classes)\n Returns the probability of the samples for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`.\n \"\"\"\n return np.exp(self.predict_log_proba(X))", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.neighbors._base/KNeighborsMixin", + "name": "KNeighborsMixin", + "qname": "sklearn.neighbors._base.KNeighborsMixin", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.neighbors._base/KNeighborsMixin/_kneighbors_reduce_func", + "scikit-learn/sklearn.neighbors._base/KNeighborsMixin/kneighbors", + "scikit-learn/sklearn.neighbors._base/KNeighborsMixin/kneighbors_graph" + ], + "is_public": false, + "reexported_by": [], + "description": "Mixin for k-neighbors searches", + "docstring": "Mixin for k-neighbors searches", + "code": "class KNeighborsMixin:\n \"\"\"Mixin for k-neighbors searches\"\"\"\n\n def _kneighbors_reduce_func(self, dist, start,\n n_neighbors, return_distance):\n \"\"\"Reduce a chunk of distances to the nearest neighbors\n\n Callback to :func:`sklearn.metrics.pairwise.pairwise_distances_chunked`\n\n Parameters\n ----------\n dist : ndarray of shape (n_samples_chunk, n_samples)\n The distance matrix.\n\n start : int\n The index in X which the first row of dist corresponds to.\n\n n_neighbors : int\n Number of neighbors required for each sample.\n\n return_distance : bool\n Whether or not to return the distances.\n\n Returns\n -------\n dist : array of shape (n_samples_chunk, n_neighbors)\n Returned only if `return_distance=True`.\n\n neigh : array of shape (n_samples_chunk, n_neighbors)\n The neighbors indices.\n \"\"\"\n sample_range = np.arange(dist.shape[0])[:, None]\n neigh_ind = np.argpartition(dist, n_neighbors - 1, axis=1)\n neigh_ind = neigh_ind[:, :n_neighbors]\n # argpartition doesn't guarantee sorted order, so we sort again\n neigh_ind = neigh_ind[\n sample_range, np.argsort(dist[sample_range, neigh_ind])]\n if return_distance:\n if self.effective_metric_ == 'euclidean':\n result = np.sqrt(dist[sample_range, neigh_ind]), neigh_ind\n else:\n result = dist[sample_range, neigh_ind], neigh_ind\n else:\n result = neigh_ind\n return result\n\n def kneighbors(self, X=None, n_neighbors=None, return_distance=True):\n \"\"\"Finds the K-neighbors of a point.\n\n Returns indices of and distances to the neighbors of each point.\n\n Parameters\n ----------\n X : array-like, shape (n_queries, n_features), \\\n or (n_queries, n_indexed) if metric == 'precomputed', \\\n default=None\n The query point or points.\n If not provided, neighbors of each indexed point are returned.\n In this case, the query point is not considered its own neighbor.\n\n n_neighbors : int, default=None\n Number of neighbors required for each sample. The default is the\n value passed to the constructor.\n\n return_distance : bool, default=True\n Whether or not to return the distances.\n\n Returns\n -------\n neigh_dist : ndarray of shape (n_queries, n_neighbors)\n Array representing the lengths to points, only present if\n return_distance=True\n\n neigh_ind : ndarray of shape (n_queries, n_neighbors)\n Indices of the nearest points in the population matrix.\n\n Examples\n --------\n In the following example, we construct a NearestNeighbors\n class from an array representing our data set and ask who's\n the closest point to [1,1,1]\n\n >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n >>> from sklearn.neighbors import NearestNeighbors\n >>> neigh = NearestNeighbors(n_neighbors=1)\n >>> neigh.fit(samples)\n NearestNeighbors(n_neighbors=1)\n >>> print(neigh.kneighbors([[1., 1., 1.]]))\n (array([[0.5]]), array([[2]]))\n\n As you can see, it returns [[0.5]], and [[2]], which means that the\n element is at distance 0.5 and is the third element of samples\n (indexes start at 0). You can also query for multiple points:\n\n >>> X = [[0., 1., 0.], [1., 0., 1.]]\n >>> neigh.kneighbors(X, return_distance=False)\n array([[1],\n [2]]...)\n \"\"\"\n check_is_fitted(self)\n\n if n_neighbors is None:\n n_neighbors = self.n_neighbors\n elif n_neighbors <= 0:\n raise ValueError(\n \"Expected n_neighbors > 0. Got %d\" %\n n_neighbors\n )\n else:\n if not isinstance(n_neighbors, numbers.Integral):\n raise TypeError(\n \"n_neighbors does not take %s value, \"\n \"enter integer value\" %\n type(n_neighbors))\n\n if X is not None:\n query_is_train = False\n if self.effective_metric_ == 'precomputed':\n X = _check_precomputed(X)\n else:\n X = check_array(X, accept_sparse='csr')\n else:\n query_is_train = True\n X = self._fit_X\n # Include an extra neighbor to account for the sample itself being\n # returned, which is removed later\n n_neighbors += 1\n\n n_samples_fit = self.n_samples_fit_\n if n_neighbors > n_samples_fit:\n raise ValueError(\n \"Expected n_neighbors <= n_samples, \"\n \" but n_samples = %d, n_neighbors = %d\" %\n (n_samples_fit, n_neighbors)\n )\n\n n_jobs = effective_n_jobs(self.n_jobs)\n chunked_results = None\n if (self._fit_method == 'brute' and\n self.effective_metric_ == 'precomputed' and issparse(X)):\n results = _kneighbors_from_graph(\n X, n_neighbors=n_neighbors,\n return_distance=return_distance)\n\n elif self._fit_method == 'brute':\n reduce_func = partial(self._kneighbors_reduce_func,\n n_neighbors=n_neighbors,\n return_distance=return_distance)\n\n # for efficiency, use squared euclidean distances\n if self.effective_metric_ == 'euclidean':\n kwds = {'squared': True}\n else:\n kwds = self.effective_metric_params_\n\n chunked_results = list(pairwise_distances_chunked(\n X, self._fit_X, reduce_func=reduce_func,\n metric=self.effective_metric_, n_jobs=n_jobs,\n **kwds))\n\n elif self._fit_method in ['ball_tree', 'kd_tree']:\n if issparse(X):\n raise ValueError(\n \"%s does not work with sparse matrices. Densify the data, \"\n \"or set algorithm='brute'\" % self._fit_method)\n old_joblib = (\n parse_version(joblib.__version__) < parse_version('0.12'))\n if old_joblib:\n # Deal with change of API in joblib\n parallel_kwargs = {\"backend\": \"threading\"}\n else:\n parallel_kwargs = {\"prefer\": \"threads\"}\n chunked_results = Parallel(n_jobs, **parallel_kwargs)(\n delayed(_tree_query_parallel_helper)(\n self._tree, X[s], n_neighbors, return_distance)\n for s in gen_even_slices(X.shape[0], n_jobs)\n )\n else:\n raise ValueError(\"internal: _fit_method not recognized\")\n\n if chunked_results is not None:\n if return_distance:\n neigh_dist, neigh_ind = zip(*chunked_results)\n results = np.vstack(neigh_dist), np.vstack(neigh_ind)\n else:\n results = np.vstack(chunked_results)\n\n if not query_is_train:\n return results\n else:\n # If the query data is the same as the indexed data, we would like\n # to ignore the first nearest neighbor of every sample, i.e\n # the sample itself.\n if return_distance:\n neigh_dist, neigh_ind = results\n else:\n neigh_ind = results\n\n n_queries, _ = X.shape\n sample_range = np.arange(n_queries)[:, None]\n sample_mask = neigh_ind != sample_range\n\n # Corner case: When the number of duplicates are more\n # than the number of neighbors, the first NN will not\n # be the sample, but a duplicate.\n # In that case mask the first duplicate.\n dup_gr_nbrs = np.all(sample_mask, axis=1)\n sample_mask[:, 0][dup_gr_nbrs] = False\n neigh_ind = np.reshape(\n neigh_ind[sample_mask], (n_queries, n_neighbors - 1))\n\n if return_distance:\n neigh_dist = np.reshape(\n neigh_dist[sample_mask], (n_queries, n_neighbors - 1))\n return neigh_dist, neigh_ind\n return neigh_ind\n\n def kneighbors_graph(self, X=None, n_neighbors=None,\n mode='connectivity'):\n \"\"\"Computes the (weighted) graph of k-Neighbors for points in X\n\n Parameters\n ----------\n X : array-like of shape (n_queries, n_features), \\\n or (n_queries, n_indexed) if metric == 'precomputed', \\\n default=None\n The query point or points.\n If not provided, neighbors of each indexed point are returned.\n In this case, the query point is not considered its own neighbor.\n For ``metric='precomputed'`` the shape should be\n (n_queries, n_indexed). Otherwise the shape should be\n (n_queries, n_features).\n\n n_neighbors : int, default=None\n Number of neighbors for each sample. The default is the value\n passed to the constructor.\n\n mode : {'connectivity', 'distance'}, default='connectivity'\n Type of returned matrix: 'connectivity' will return the\n connectivity matrix with ones and zeros, in 'distance' the\n edges are Euclidean distance between points.\n\n Returns\n -------\n A : sparse-matrix of shape (n_queries, n_samples_fit)\n `n_samples_fit` is the number of samples in the fitted data\n `A[i, j]` is assigned the weight of edge that connects `i` to `j`.\n The matrix is of CSR format.\n\n Examples\n --------\n >>> X = [[0], [3], [1]]\n >>> from sklearn.neighbors import NearestNeighbors\n >>> neigh = NearestNeighbors(n_neighbors=2)\n >>> neigh.fit(X)\n NearestNeighbors(n_neighbors=2)\n >>> A = neigh.kneighbors_graph(X)\n >>> A.toarray()\n array([[1., 0., 1.],\n [0., 1., 1.],\n [1., 0., 1.]])\n\n See Also\n --------\n NearestNeighbors.radius_neighbors_graph\n \"\"\"\n check_is_fitted(self)\n if n_neighbors is None:\n n_neighbors = self.n_neighbors\n\n # check the input only in self.kneighbors\n\n # construct CSR matrix representation of the k-NN graph\n if mode == 'connectivity':\n A_ind = self.kneighbors(X, n_neighbors, return_distance=False)\n n_queries = A_ind.shape[0]\n A_data = np.ones(n_queries * n_neighbors)\n\n elif mode == 'distance':\n A_data, A_ind = self.kneighbors(\n X, n_neighbors, return_distance=True)\n A_data = np.ravel(A_data)\n\n else:\n raise ValueError(\n 'Unsupported mode, must be one of \"connectivity\" '\n 'or \"distance\" but got \"%s\" instead' % mode)\n\n n_queries = A_ind.shape[0]\n n_samples_fit = self.n_samples_fit_\n n_nonzero = n_queries * n_neighbors\n A_indptr = np.arange(0, n_nonzero + 1, n_neighbors)\n\n kneighbors_graph = csr_matrix((A_data, A_ind.ravel(), A_indptr),\n shape=(n_queries, n_samples_fit))\n\n return kneighbors_graph", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.neighbors._base/NeighborsBase", + "name": "NeighborsBase", + "qname": "sklearn.neighbors._base.NeighborsBase", + "decorators": [], + "superclasses": ["MultiOutputMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.neighbors._base/NeighborsBase/__init__", + "scikit-learn/sklearn.neighbors._base/NeighborsBase/_check_algorithm_metric", + "scikit-learn/sklearn.neighbors._base/NeighborsBase/_fit", + "scikit-learn/sklearn.neighbors._base/NeighborsBase/_more_tags", + "scikit-learn/sklearn.neighbors._base/NeighborsBase/_pairwise@getter" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for nearest neighbors estimators.", + "docstring": "Base class for nearest neighbors estimators.", + "code": "class NeighborsBase(MultiOutputMixin, BaseEstimator, metaclass=ABCMeta):\n \"\"\"Base class for nearest neighbors estimators.\"\"\"\n\n @abstractmethod\n def __init__(self, n_neighbors=None, radius=None,\n algorithm='auto', leaf_size=30, metric='minkowski',\n p=2, metric_params=None, n_jobs=None):\n\n self.n_neighbors = n_neighbors\n self.radius = radius\n self.algorithm = algorithm\n self.leaf_size = leaf_size\n self.metric = metric\n self.metric_params = metric_params\n self.p = p\n self.n_jobs = n_jobs\n self._check_algorithm_metric()\n\n def _check_algorithm_metric(self):\n if self.algorithm not in ['auto', 'brute',\n 'kd_tree', 'ball_tree']:\n raise ValueError(\"unrecognized algorithm: '%s'\" % self.algorithm)\n\n if self.algorithm == 'auto':\n if self.metric == 'precomputed':\n alg_check = 'brute'\n elif (callable(self.metric) or\n self.metric in VALID_METRICS['ball_tree']):\n alg_check = 'ball_tree'\n else:\n alg_check = 'brute'\n else:\n alg_check = self.algorithm\n\n if callable(self.metric):\n if self.algorithm == 'kd_tree':\n # callable metric is only valid for brute force and ball_tree\n raise ValueError(\n \"kd_tree does not support callable metric '%s'\"\n \"Function call overhead will result\"\n \"in very poor performance.\"\n % self.metric)\n elif self.metric not in VALID_METRICS[alg_check]:\n raise ValueError(\"Metric '%s' not valid. Use \"\n \"sorted(sklearn.neighbors.VALID_METRICS['%s']) \"\n \"to get valid options. \"\n \"Metric can also be a callable function.\"\n % (self.metric, alg_check))\n\n if self.metric_params is not None and 'p' in self.metric_params:\n if self.p is not None:\n warnings.warn(\"Parameter p is found in metric_params. \"\n \"The corresponding parameter from __init__ \"\n \"is ignored.\", SyntaxWarning, stacklevel=3)\n effective_p = self.metric_params['p']\n else:\n effective_p = self.p\n\n if self.metric in ['wminkowski', 'minkowski'] and effective_p < 1:\n raise ValueError(\"p must be greater than one for minkowski metric\")\n\n def _fit(self, X, y=None):\n if self._get_tags()[\"requires_y\"]:\n if not isinstance(X, (KDTree, BallTree, NeighborsBase)):\n X, y = self._validate_data(X, y, accept_sparse=\"csr\",\n multi_output=True)\n\n if is_classifier(self):\n # Classification targets require a specific format\n if y.ndim == 1 or y.ndim == 2 and y.shape[1] == 1:\n if y.ndim != 1:\n warnings.warn(\"A column-vector y was passed when a \"\n \"1d array was expected. Please change \"\n \"the shape of y to (n_samples,), for \"\n \"example using ravel().\",\n DataConversionWarning, stacklevel=2)\n\n self.outputs_2d_ = False\n y = y.reshape((-1, 1))\n else:\n self.outputs_2d_ = True\n\n check_classification_targets(y)\n self.classes_ = []\n self._y = np.empty(y.shape, dtype=int)\n for k in range(self._y.shape[1]):\n classes, self._y[:, k] = np.unique(\n y[:, k], return_inverse=True)\n self.classes_.append(classes)\n\n if not self.outputs_2d_:\n self.classes_ = self.classes_[0]\n self._y = self._y.ravel()\n else:\n self._y = y\n\n else:\n if not isinstance(X, (KDTree, BallTree, NeighborsBase)):\n X = self._validate_data(X, accept_sparse='csr')\n\n self._check_algorithm_metric()\n if self.metric_params is None:\n self.effective_metric_params_ = {}\n else:\n self.effective_metric_params_ = self.metric_params.copy()\n\n effective_p = self.effective_metric_params_.get('p', self.p)\n if self.metric in ['wminkowski', 'minkowski']:\n self.effective_metric_params_['p'] = effective_p\n\n self.effective_metric_ = self.metric\n # For minkowski distance, use more efficient methods where available\n if self.metric == 'minkowski':\n p = self.effective_metric_params_.pop('p', 2)\n if p < 1:\n raise ValueError(\"p must be greater than one \"\n \"for minkowski metric\")\n elif p == 1:\n self.effective_metric_ = 'manhattan'\n elif p == 2:\n self.effective_metric_ = 'euclidean'\n elif p == np.inf:\n self.effective_metric_ = 'chebyshev'\n else:\n self.effective_metric_params_['p'] = p\n\n if isinstance(X, NeighborsBase):\n self._fit_X = X._fit_X\n self._tree = X._tree\n self._fit_method = X._fit_method\n self.n_samples_fit_ = X.n_samples_fit_\n return self\n\n elif isinstance(X, BallTree):\n self._fit_X = X.data\n self._tree = X\n self._fit_method = 'ball_tree'\n self.n_samples_fit_ = X.data.shape[0]\n return self\n\n elif isinstance(X, KDTree):\n self._fit_X = X.data\n self._tree = X\n self._fit_method = 'kd_tree'\n self.n_samples_fit_ = X.data.shape[0]\n return self\n\n if self.effective_metric_ == 'precomputed':\n X = _check_precomputed(X)\n self.n_features_in_ = X.shape[1]\n\n n_samples = X.shape[0]\n if n_samples == 0:\n raise ValueError(\"n_samples must be greater than 0\")\n\n # Precomputed matrix X must be squared\n if self.metric == 'precomputed' and X.shape[0] != X.shape[1]:\n raise ValueError(\"Precomputed matrix must be a square matrix.\"\n \" Input is a {}x{} matrix.\"\n .format(X.shape[0], X.shape[1]))\n\n if issparse(X):\n if self.algorithm not in ('auto', 'brute'):\n warnings.warn(\"cannot use tree with sparse input: \"\n \"using brute force\")\n if self.effective_metric_ not in VALID_METRICS_SPARSE['brute'] \\\n and not callable(self.effective_metric_):\n raise ValueError(\"Metric '%s' not valid for sparse input. \"\n \"Use sorted(sklearn.neighbors.\"\n \"VALID_METRICS_SPARSE['brute']) \"\n \"to get valid options. \"\n \"Metric can also be a callable function.\"\n % (self.effective_metric_))\n self._fit_X = X.copy()\n self._tree = None\n self._fit_method = 'brute'\n self.n_samples_fit_ = X.shape[0]\n return self\n\n self._fit_method = self.algorithm\n self._fit_X = X\n self.n_samples_fit_ = X.shape[0]\n\n if self._fit_method == 'auto':\n # A tree approach is better for small number of neighbors or small\n # number of features, with KDTree generally faster when available\n if (self.metric == 'precomputed' or self._fit_X.shape[1] > 15 or\n (self.n_neighbors is not None and\n self.n_neighbors >= self._fit_X.shape[0] // 2)):\n self._fit_method = 'brute'\n else:\n if self.effective_metric_ in VALID_METRICS['kd_tree']:\n self._fit_method = 'kd_tree'\n elif (callable(self.effective_metric_) or\n self.effective_metric_ in VALID_METRICS['ball_tree']):\n self._fit_method = 'ball_tree'\n else:\n self._fit_method = 'brute'\n\n if self._fit_method == 'ball_tree':\n self._tree = BallTree(X, self.leaf_size,\n metric=self.effective_metric_,\n **self.effective_metric_params_)\n elif self._fit_method == 'kd_tree':\n self._tree = KDTree(X, self.leaf_size,\n metric=self.effective_metric_,\n **self.effective_metric_params_)\n elif self._fit_method == 'brute':\n self._tree = None\n else:\n raise ValueError(\"algorithm = '%s' not recognized\"\n % self.algorithm)\n\n if self.n_neighbors is not None:\n if self.n_neighbors <= 0:\n raise ValueError(\n \"Expected n_neighbors > 0. Got %d\" %\n self.n_neighbors\n )\n else:\n if not isinstance(self.n_neighbors, numbers.Integral):\n raise TypeError(\n \"n_neighbors does not take %s value, \"\n \"enter integer value\" %\n type(self.n_neighbors))\n\n return self\n\n def _more_tags(self):\n # For cross-validation routines to split data correctly\n return {'pairwise': self.metric == 'precomputed'}\n\n # TODO: Remove in 1.1\n # mypy error: Decorated property not supported\n @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n # For cross-validation routines to split data correctly\n return self.metric == 'precomputed'", + "instance_attributes": [ + { + "name": "algorithm", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "leaf_size", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "metric", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "p", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "outputs_2d_", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "classes_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "_y", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "effective_metric_params_", + "types": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "name": "effective_metric_", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.neighbors._base/RadiusNeighborsMixin", + "name": "RadiusNeighborsMixin", + "qname": "sklearn.neighbors._base.RadiusNeighborsMixin", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.neighbors._base/RadiusNeighborsMixin/_radius_neighbors_reduce_func", + "scikit-learn/sklearn.neighbors._base/RadiusNeighborsMixin/radius_neighbors", + "scikit-learn/sklearn.neighbors._base/RadiusNeighborsMixin/radius_neighbors_graph" + ], + "is_public": false, + "reexported_by": [], + "description": "Mixin for radius-based neighbors searches", + "docstring": "Mixin for radius-based neighbors searches", + "code": "class RadiusNeighborsMixin:\n \"\"\"Mixin for radius-based neighbors searches\"\"\"\n\n def _radius_neighbors_reduce_func(self, dist, start,\n radius, return_distance):\n \"\"\"Reduce a chunk of distances to the nearest neighbors\n\n Callback to :func:`sklearn.metrics.pairwise.pairwise_distances_chunked`\n\n Parameters\n ----------\n dist : ndarray of shape (n_samples_chunk, n_samples)\n The distance matrix.\n\n start : int\n The index in X which the first row of dist corresponds to.\n\n radius : float\n The radius considered when making the nearest neighbors search.\n\n return_distance : bool\n Whether or not to return the distances.\n\n Returns\n -------\n dist : list of ndarray of shape (n_samples_chunk,)\n Returned only if `return_distance=True`.\n\n neigh : list of ndarray of shape (n_samples_chunk,)\n The neighbors indices.\n \"\"\"\n neigh_ind = [np.where(d <= radius)[0] for d in dist]\n\n if return_distance:\n if self.effective_metric_ == 'euclidean':\n dist = [np.sqrt(d[neigh_ind[i]])\n for i, d in enumerate(dist)]\n else:\n dist = [d[neigh_ind[i]]\n for i, d in enumerate(dist)]\n results = dist, neigh_ind\n else:\n results = neigh_ind\n return results\n\n def radius_neighbors(self, X=None, radius=None, return_distance=True,\n sort_results=False):\n \"\"\"Finds the neighbors within a given radius of a point or points.\n\n Return the indices and distances of each point from the dataset\n lying in a ball with size ``radius`` around the points of the query\n array. Points lying on the boundary are included in the results.\n\n The result points are *not* necessarily sorted by distance to their\n query point.\n\n Parameters\n ----------\n X : array-like of (n_samples, n_features), default=None\n The query point or points.\n If not provided, neighbors of each indexed point are returned.\n In this case, the query point is not considered its own neighbor.\n\n radius : float, default=None\n Limiting distance of neighbors to return. The default is the value\n passed to the constructor.\n\n return_distance : bool, default=True\n Whether or not to return the distances.\n\n sort_results : bool, default=False\n If True, the distances and indices will be sorted by increasing\n distances before being returned. If False, the results may not\n be sorted. If `return_distance=False`, setting `sort_results=True`\n will result in an error.\n\n .. versionadded:: 0.22\n\n Returns\n -------\n neigh_dist : ndarray of shape (n_samples,) of arrays\n Array representing the distances to each point, only present if\n `return_distance=True`. The distance values are computed according\n to the ``metric`` constructor parameter.\n\n neigh_ind : ndarray of shape (n_samples,) of arrays\n An array of arrays of indices of the approximate nearest points\n from the population matrix that lie within a ball of size\n ``radius`` around the query points.\n\n Examples\n --------\n In the following example, we construct a NeighborsClassifier\n class from an array representing our data set and ask who's\n the closest point to [1, 1, 1]:\n\n >>> import numpy as np\n >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n >>> from sklearn.neighbors import NearestNeighbors\n >>> neigh = NearestNeighbors(radius=1.6)\n >>> neigh.fit(samples)\n NearestNeighbors(radius=1.6)\n >>> rng = neigh.radius_neighbors([[1., 1., 1.]])\n >>> print(np.asarray(rng[0][0]))\n [1.5 0.5]\n >>> print(np.asarray(rng[1][0]))\n [1 2]\n\n The first array returned contains the distances to all points which\n are closer than 1.6, while the second array returned contains their\n indices. In general, multiple points can be queried at the same time.\n\n Notes\n -----\n Because the number of neighbors of each point is not necessarily\n equal, the results for multiple query points cannot be fit in a\n standard data array.\n For efficiency, `radius_neighbors` returns arrays of objects, where\n each object is a 1D array of indices or distances.\n \"\"\"\n check_is_fitted(self)\n\n if X is not None:\n query_is_train = False\n if self.effective_metric_ == 'precomputed':\n X = _check_precomputed(X)\n else:\n X = check_array(X, accept_sparse='csr')\n else:\n query_is_train = True\n X = self._fit_X\n\n if radius is None:\n radius = self.radius\n\n if (self._fit_method == 'brute' and\n self.effective_metric_ == 'precomputed' and issparse(X)):\n results = _radius_neighbors_from_graph(\n X, radius=radius, return_distance=return_distance)\n\n elif self._fit_method == 'brute':\n # for efficiency, use squared euclidean distances\n if self.effective_metric_ == 'euclidean':\n radius *= radius\n kwds = {'squared': True}\n else:\n kwds = self.effective_metric_params_\n\n reduce_func = partial(self._radius_neighbors_reduce_func,\n radius=radius,\n return_distance=return_distance)\n\n chunked_results = pairwise_distances_chunked(\n X, self._fit_X, reduce_func=reduce_func,\n metric=self.effective_metric_, n_jobs=self.n_jobs,\n **kwds)\n if return_distance:\n neigh_dist_chunks, neigh_ind_chunks = zip(*chunked_results)\n neigh_dist_list = sum(neigh_dist_chunks, [])\n neigh_ind_list = sum(neigh_ind_chunks, [])\n neigh_dist = _to_object_array(neigh_dist_list)\n neigh_ind = _to_object_array(neigh_ind_list)\n results = neigh_dist, neigh_ind\n else:\n neigh_ind_list = sum(chunked_results, [])\n results = _to_object_array(neigh_ind_list)\n\n if sort_results:\n if not return_distance:\n raise ValueError(\"return_distance must be True \"\n \"if sort_results is True.\")\n for ii in range(len(neigh_dist)):\n order = np.argsort(neigh_dist[ii], kind='mergesort')\n neigh_ind[ii] = neigh_ind[ii][order]\n neigh_dist[ii] = neigh_dist[ii][order]\n results = neigh_dist, neigh_ind\n\n elif self._fit_method in ['ball_tree', 'kd_tree']:\n if issparse(X):\n raise ValueError(\n \"%s does not work with sparse matrices. Densify the data, \"\n \"or set algorithm='brute'\" % self._fit_method)\n\n n_jobs = effective_n_jobs(self.n_jobs)\n delayed_query = delayed(_tree_query_radius_parallel_helper)\n if parse_version(joblib.__version__) < parse_version('0.12'):\n # Deal with change of API in joblib\n parallel_kwargs = {\"backend\": \"threading\"}\n else:\n parallel_kwargs = {\"prefer\": \"threads\"}\n\n chunked_results = Parallel(n_jobs, **parallel_kwargs)(\n delayed_query(self._tree, X[s], radius, return_distance,\n sort_results=sort_results)\n\n for s in gen_even_slices(X.shape[0], n_jobs)\n )\n if return_distance:\n neigh_ind, neigh_dist = tuple(zip(*chunked_results))\n results = np.hstack(neigh_dist), np.hstack(neigh_ind)\n else:\n results = np.hstack(chunked_results)\n else:\n raise ValueError(\"internal: _fit_method not recognized\")\n\n if not query_is_train:\n return results\n else:\n # If the query data is the same as the indexed data, we would like\n # to ignore the first nearest neighbor of every sample, i.e\n # the sample itself.\n if return_distance:\n neigh_dist, neigh_ind = results\n else:\n neigh_ind = results\n\n for ind, ind_neighbor in enumerate(neigh_ind):\n mask = ind_neighbor != ind\n\n neigh_ind[ind] = ind_neighbor[mask]\n if return_distance:\n neigh_dist[ind] = neigh_dist[ind][mask]\n\n if return_distance:\n return neigh_dist, neigh_ind\n return neigh_ind\n\n def radius_neighbors_graph(self, X=None, radius=None, mode='connectivity',\n sort_results=False):\n \"\"\"Computes the (weighted) graph of Neighbors for points in X\n\n Neighborhoods are restricted the points at a distance lower than\n radius.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features), default=None\n The query point or points.\n If not provided, neighbors of each indexed point are returned.\n In this case, the query point is not considered its own neighbor.\n\n radius : float, default=None\n Radius of neighborhoods. The default is the value passed to the\n constructor.\n\n mode : {'connectivity', 'distance'}, default='connectivity'\n Type of returned matrix: 'connectivity' will return the\n connectivity matrix with ones and zeros, in 'distance' the\n edges are Euclidean distance between points.\n\n sort_results : bool, default=False\n If True, in each row of the result, the non-zero entries will be\n sorted by increasing distances. If False, the non-zero entries may\n not be sorted. Only used with mode='distance'.\n\n .. versionadded:: 0.22\n\n Returns\n -------\n A : sparse-matrix of shape (n_queries, n_samples_fit)\n `n_samples_fit` is the number of samples in the fitted data\n `A[i, j]` is assigned the weight of edge that connects `i` to `j`.\n The matrix if of format CSR.\n\n Examples\n --------\n >>> X = [[0], [3], [1]]\n >>> from sklearn.neighbors import NearestNeighbors\n >>> neigh = NearestNeighbors(radius=1.5)\n >>> neigh.fit(X)\n NearestNeighbors(radius=1.5)\n >>> A = neigh.radius_neighbors_graph(X)\n >>> A.toarray()\n array([[1., 0., 1.],\n [0., 1., 0.],\n [1., 0., 1.]])\n\n See Also\n --------\n kneighbors_graph\n \"\"\"\n check_is_fitted(self)\n\n # check the input only in self.radius_neighbors\n\n if radius is None:\n radius = self.radius\n\n # construct CSR matrix representation of the NN graph\n if mode == 'connectivity':\n A_ind = self.radius_neighbors(X, radius,\n return_distance=False)\n A_data = None\n elif mode == 'distance':\n dist, A_ind = self.radius_neighbors(X, radius,\n return_distance=True,\n sort_results=sort_results)\n A_data = np.concatenate(list(dist))\n else:\n raise ValueError(\n 'Unsupported mode, must be one of \"connectivity\", '\n 'or \"distance\" but got %s instead' % mode)\n\n n_queries = A_ind.shape[0]\n n_samples_fit = self.n_samples_fit_\n n_neighbors = np.array([len(a) for a in A_ind])\n A_ind = np.concatenate(list(A_ind))\n if A_data is None:\n A_data = np.ones(len(A_ind))\n A_indptr = np.concatenate((np.zeros(1, dtype=int),\n np.cumsum(n_neighbors)))\n\n return csr_matrix((A_data, A_ind, A_indptr),\n shape=(n_queries, n_samples_fit))", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier", + "name": "KNeighborsClassifier", + "qname": "sklearn.neighbors._classification.KNeighborsClassifier", + "decorators": [], + "superclasses": ["KNeighborsMixin", "ClassifierMixin", "NeighborsBase"], + "methods": [ + "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/__init__", + "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/fit", + "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/predict", + "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/predict_proba" + ], + "is_public": false, + "reexported_by": [], + "description": "Classifier implementing the k-nearest neighbors vote.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Classifier implementing the k-nearest neighbors vote.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_neighbors : int, default=5\n Number of neighbors to use by default for :meth:`kneighbors` queries.\n\nweights : {'uniform', 'distance'} or callable, default='uniform'\n weight function used in prediction. Possible values:\n\n - 'uniform' : uniform weights. All points in each neighborhood\n are weighted equally.\n - 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n - [callable] : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\np : int, default=2\n Power parameter for the Minkowski metric. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric : str or callable, default='minkowski'\n the distance metric to use for the tree. The default metric is\n minkowski, and with p=2 is equivalent to the standard Euclidean\n metric. See the documentation of :class:`DistanceMetric` for a\n list of available metrics.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit. X may be a :term:`sparse graph`,\n in which case only \"nonzero\" elements may be considered neighbors.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n Doesn't affect :meth:`fit` method.\n\nAttributes\n----------\nclasses_ : array of shape (n_classes,)\n Class labels known to the classifier\n\neffective_metric_ : str or callble\n The distance metric used. It will be same as the `metric` parameter\n or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n 'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n Additional keyword arguments for the metric function. For most metrics\n will be same with `metric_params` parameter, but may also contain the\n `p` parameter value if the `effective_metric_` attribute is set to\n 'minkowski'.\n\nn_samples_fit_ : int\n Number of samples in the fitted data.\n\noutputs_2d_ : bool\n False when `y`'s shape is (n_samples, ) or (n_samples, 1) during fit\n otherwise True.\n\nExamples\n--------\n>>> X = [[0], [1], [2], [3]]\n>>> y = [0, 0, 1, 1]\n>>> from sklearn.neighbors import KNeighborsClassifier\n>>> neigh = KNeighborsClassifier(n_neighbors=3)\n>>> neigh.fit(X, y)\nKNeighborsClassifier(...)\n>>> print(neigh.predict([[1.1]]))\n[0]\n>>> print(neigh.predict_proba([[0.9]]))\n[[0.66666667 0.33333333]]\n\nSee Also\n--------\nRadiusNeighborsClassifier\nKNeighborsRegressor\nRadiusNeighborsRegressor\nNearestNeighbors\n\nNotes\n-----\nSee :ref:`Nearest Neighbors ` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n.. warning::\n\n Regarding the Nearest Neighbors algorithms, if it is found that two\n neighbors, neighbor `k+1` and `k`, have identical distances\n but different labels, the results will depend on the ordering of the\n training data.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm", + "code": "class KNeighborsClassifier(KNeighborsMixin,\n ClassifierMixin,\n NeighborsBase):\n \"\"\"Classifier implementing the k-nearest neighbors vote.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_neighbors : int, default=5\n Number of neighbors to use by default for :meth:`kneighbors` queries.\n\n weights : {'uniform', 'distance'} or callable, default='uniform'\n weight function used in prediction. Possible values:\n\n - 'uniform' : uniform weights. All points in each neighborhood\n are weighted equally.\n - 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n - [callable] : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights.\n\n algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\n leaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\n p : int, default=2\n Power parameter for the Minkowski metric. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n metric : str or callable, default='minkowski'\n the distance metric to use for the tree. The default metric is\n minkowski, and with p=2 is equivalent to the standard Euclidean\n metric. See the documentation of :class:`DistanceMetric` for a\n list of available metrics.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit. X may be a :term:`sparse graph`,\n in which case only \"nonzero\" elements may be considered neighbors.\n\n metric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\n n_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n Doesn't affect :meth:`fit` method.\n\n Attributes\n ----------\n classes_ : array of shape (n_classes,)\n Class labels known to the classifier\n\n effective_metric_ : str or callble\n The distance metric used. It will be same as the `metric` parameter\n or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n 'minkowski' and `p` parameter set to 2.\n\n effective_metric_params_ : dict\n Additional keyword arguments for the metric function. For most metrics\n will be same with `metric_params` parameter, but may also contain the\n `p` parameter value if the `effective_metric_` attribute is set to\n 'minkowski'.\n\n n_samples_fit_ : int\n Number of samples in the fitted data.\n\n outputs_2d_ : bool\n False when `y`'s shape is (n_samples, ) or (n_samples, 1) during fit\n otherwise True.\n\n Examples\n --------\n >>> X = [[0], [1], [2], [3]]\n >>> y = [0, 0, 1, 1]\n >>> from sklearn.neighbors import KNeighborsClassifier\n >>> neigh = KNeighborsClassifier(n_neighbors=3)\n >>> neigh.fit(X, y)\n KNeighborsClassifier(...)\n >>> print(neigh.predict([[1.1]]))\n [0]\n >>> print(neigh.predict_proba([[0.9]]))\n [[0.66666667 0.33333333]]\n\n See Also\n --------\n RadiusNeighborsClassifier\n KNeighborsRegressor\n RadiusNeighborsRegressor\n NearestNeighbors\n\n Notes\n -----\n See :ref:`Nearest Neighbors ` in the online documentation\n for a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n .. warning::\n\n Regarding the Nearest Neighbors algorithms, if it is found that two\n neighbors, neighbor `k+1` and `k`, have identical distances\n but different labels, the results will depend on the ordering of the\n training data.\n\n https://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, n_neighbors=5, *,\n weights='uniform', algorithm='auto', leaf_size=30,\n p=2, metric='minkowski', metric_params=None, n_jobs=None,\n **kwargs):\n super().__init__(\n n_neighbors=n_neighbors,\n algorithm=algorithm,\n leaf_size=leaf_size, metric=metric, p=p,\n metric_params=metric_params,\n n_jobs=n_jobs, **kwargs)\n self.weights = _check_weights(weights)\n\n def fit(self, X, y):\n \"\"\"Fit the k-nearest neighbors classifier from the training dataset.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n (n_samples, n_samples) if metric='precomputed'\n Training data.\n\n y : {array-like, sparse matrix} of shape (n_samples,) or \\\n (n_samples, n_outputs)\n Target values.\n\n Returns\n -------\n self : KNeighborsClassifier\n The fitted k-nearest neighbors classifier.\n \"\"\"\n return self._fit(X, y)\n\n def predict(self, X):\n \"\"\"Predict the class labels for the provided data.\n\n Parameters\n ----------\n X : array-like of shape (n_queries, n_features), \\\n or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\n Returns\n -------\n y : ndarray of shape (n_queries,) or (n_queries, n_outputs)\n Class labels for each data sample.\n \"\"\"\n X = check_array(X, accept_sparse='csr')\n\n neigh_dist, neigh_ind = self.kneighbors(X)\n classes_ = self.classes_\n _y = self._y\n if not self.outputs_2d_:\n _y = self._y.reshape((-1, 1))\n classes_ = [self.classes_]\n\n n_outputs = len(classes_)\n n_queries = _num_samples(X)\n weights = _get_weights(neigh_dist, self.weights)\n\n y_pred = np.empty((n_queries, n_outputs), dtype=classes_[0].dtype)\n for k, classes_k in enumerate(classes_):\n if weights is None:\n mode, _ = stats.mode(_y[neigh_ind, k], axis=1)\n else:\n mode, _ = weighted_mode(_y[neigh_ind, k], weights, axis=1)\n\n mode = np.asarray(mode.ravel(), dtype=np.intp)\n y_pred[:, k] = classes_k.take(mode)\n\n if not self.outputs_2d_:\n y_pred = y_pred.ravel()\n\n return y_pred\n\n def predict_proba(self, X):\n \"\"\"Return probability estimates for the test data X.\n\n Parameters\n ----------\n X : array-like of shape (n_queries, n_features), \\\n or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\n Returns\n -------\n p : ndarray of shape (n_queries, n_classes), or a list of n_outputs\n of such arrays if n_outputs > 1.\n The class probabilities of the input samples. Classes are ordered\n by lexicographic order.\n \"\"\"\n X = check_array(X, accept_sparse='csr')\n\n neigh_dist, neigh_ind = self.kneighbors(X)\n\n classes_ = self.classes_\n _y = self._y\n if not self.outputs_2d_:\n _y = self._y.reshape((-1, 1))\n classes_ = [self.classes_]\n\n n_queries = _num_samples(X)\n\n weights = _get_weights(neigh_dist, self.weights)\n if weights is None:\n weights = np.ones_like(neigh_ind)\n\n all_rows = np.arange(X.shape[0])\n probabilities = []\n for k, classes_k in enumerate(classes_):\n pred_labels = _y[:, k][neigh_ind]\n proba_k = np.zeros((n_queries, classes_k.size))\n\n # a simple ':' index doesn't work right\n for i, idx in enumerate(pred_labels.T): # loop is O(n_neighbors)\n proba_k[all_rows, idx] += weights[:, i]\n\n # normalize 'votes' into real [0,1] probabilities\n normalizer = proba_k.sum(axis=1)[:, np.newaxis]\n normalizer[normalizer == 0.0] = 1.0\n proba_k /= normalizer\n\n probabilities.append(proba_k)\n\n if not self.outputs_2d_:\n probabilities = probabilities[0]\n\n return probabilities", + "instance_attributes": [ + { + "name": "weights", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier", + "name": "RadiusNeighborsClassifier", + "qname": "sklearn.neighbors._classification.RadiusNeighborsClassifier", + "decorators": [], + "superclasses": ["RadiusNeighborsMixin", "ClassifierMixin", "NeighborsBase"], + "methods": [ + "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/__init__", + "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/fit", + "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/predict", + "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/predict_proba" + ], + "is_public": false, + "reexported_by": [], + "description": "Classifier implementing a vote among neighbors within a given radius\n\nRead more in the :ref:`User Guide `.", + "docstring": "Classifier implementing a vote among neighbors within a given radius\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nradius : float, default=1.0\n Range of parameter space to use by default for :meth:`radius_neighbors`\n queries.\n\nweights : {'uniform', 'distance'} or callable, default='uniform'\n weight function used in prediction. Possible values:\n\n - 'uniform' : uniform weights. All points in each neighborhood\n are weighted equally.\n - 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n - [callable] : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights.\n\n Uniform weights are used by default.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\np : int, default=2\n Power parameter for the Minkowski metric. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric : str or callable, default='minkowski'\n the distance metric to use for the tree. The default metric is\n minkowski, and with p=2 is equivalent to the standard Euclidean\n metric. See the documentation of :class:`DistanceMetric` for a\n list of available metrics.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit. X may be a :term:`sparse graph`,\n in which case only \"nonzero\" elements may be considered neighbors.\n\noutlier_label : {manual label, 'most_frequent'}, default=None\n label for outlier samples (samples with no neighbors in given radius).\n\n - manual label: str or int label (should be the same type as y)\n or list of manual labels if multi-output is used.\n - 'most_frequent' : assign the most frequent label of y to outliers.\n - None : when any outlier is detected, ValueError will be raised.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n Class labels known to the classifier.\n\neffective_metric_ : str or callable\n The distance metric used. It will be same as the `metric` parameter\n or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n 'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n Additional keyword arguments for the metric function. For most metrics\n will be same with `metric_params` parameter, but may also contain the\n `p` parameter value if the `effective_metric_` attribute is set to\n 'minkowski'.\n\nn_samples_fit_ : int\n Number of samples in the fitted data.\n\noutlier_label_ : int or array-like of shape (n_class,)\n Label which is given for outlier samples (samples with no neighbors\n on given radius).\n\noutputs_2d_ : bool\n False when `y`'s shape is (n_samples, ) or (n_samples, 1) during fit\n otherwise True.\n\nExamples\n--------\n>>> X = [[0], [1], [2], [3]]\n>>> y = [0, 0, 1, 1]\n>>> from sklearn.neighbors import RadiusNeighborsClassifier\n>>> neigh = RadiusNeighborsClassifier(radius=1.0)\n>>> neigh.fit(X, y)\nRadiusNeighborsClassifier(...)\n>>> print(neigh.predict([[1.5]]))\n[0]\n>>> print(neigh.predict_proba([[1.0]]))\n[[0.66666667 0.33333333]]\n\nSee Also\n--------\nKNeighborsClassifier\nRadiusNeighborsRegressor\nKNeighborsRegressor\nNearestNeighbors\n\nNotes\n-----\nSee :ref:`Nearest Neighbors ` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm", + "code": "class RadiusNeighborsClassifier(RadiusNeighborsMixin,\n ClassifierMixin,\n NeighborsBase):\n \"\"\"Classifier implementing a vote among neighbors within a given radius\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n radius : float, default=1.0\n Range of parameter space to use by default for :meth:`radius_neighbors`\n queries.\n\n weights : {'uniform', 'distance'} or callable, default='uniform'\n weight function used in prediction. Possible values:\n\n - 'uniform' : uniform weights. All points in each neighborhood\n are weighted equally.\n - 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n - [callable] : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights.\n\n Uniform weights are used by default.\n\n algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\n leaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\n p : int, default=2\n Power parameter for the Minkowski metric. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n metric : str or callable, default='minkowski'\n the distance metric to use for the tree. The default metric is\n minkowski, and with p=2 is equivalent to the standard Euclidean\n metric. See the documentation of :class:`DistanceMetric` for a\n list of available metrics.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit. X may be a :term:`sparse graph`,\n in which case only \"nonzero\" elements may be considered neighbors.\n\n outlier_label : {manual label, 'most_frequent'}, default=None\n label for outlier samples (samples with no neighbors in given radius).\n\n - manual label: str or int label (should be the same type as y)\n or list of manual labels if multi-output is used.\n - 'most_frequent' : assign the most frequent label of y to outliers.\n - None : when any outlier is detected, ValueError will be raised.\n\n metric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\n n_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n Attributes\n ----------\n classes_ : ndarray of shape (n_classes,)\n Class labels known to the classifier.\n\n effective_metric_ : str or callable\n The distance metric used. It will be same as the `metric` parameter\n or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n 'minkowski' and `p` parameter set to 2.\n\n effective_metric_params_ : dict\n Additional keyword arguments for the metric function. For most metrics\n will be same with `metric_params` parameter, but may also contain the\n `p` parameter value if the `effective_metric_` attribute is set to\n 'minkowski'.\n\n n_samples_fit_ : int\n Number of samples in the fitted data.\n\n outlier_label_ : int or array-like of shape (n_class,)\n Label which is given for outlier samples (samples with no neighbors\n on given radius).\n\n outputs_2d_ : bool\n False when `y`'s shape is (n_samples, ) or (n_samples, 1) during fit\n otherwise True.\n\n Examples\n --------\n >>> X = [[0], [1], [2], [3]]\n >>> y = [0, 0, 1, 1]\n >>> from sklearn.neighbors import RadiusNeighborsClassifier\n >>> neigh = RadiusNeighborsClassifier(radius=1.0)\n >>> neigh.fit(X, y)\n RadiusNeighborsClassifier(...)\n >>> print(neigh.predict([[1.5]]))\n [0]\n >>> print(neigh.predict_proba([[1.0]]))\n [[0.66666667 0.33333333]]\n\n See Also\n --------\n KNeighborsClassifier\n RadiusNeighborsRegressor\n KNeighborsRegressor\n NearestNeighbors\n\n Notes\n -----\n See :ref:`Nearest Neighbors ` in the online documentation\n for a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n https://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, radius=1.0, *, weights='uniform',\n algorithm='auto', leaf_size=30, p=2, metric='minkowski',\n outlier_label=None, metric_params=None, n_jobs=None,\n **kwargs):\n super().__init__(\n radius=radius,\n algorithm=algorithm,\n leaf_size=leaf_size,\n metric=metric, p=p, metric_params=metric_params,\n n_jobs=n_jobs, **kwargs)\n self.weights = _check_weights(weights)\n self.outlier_label = outlier_label\n\n def fit(self, X, y):\n \"\"\"Fit the radius neighbors classifier from the training dataset.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n (n_samples, n_samples) if metric='precomputed'\n Training data.\n\n y : {array-like, sparse matrix} of shape (n_samples,) or \\\n (n_samples, n_outputs)\n Target values.\n\n Returns\n -------\n self : RadiusNeighborsClassifier\n The fitted radius neighbors classifier.\n \"\"\"\n self._fit(X, y)\n\n classes_ = self.classes_\n _y = self._y\n if not self.outputs_2d_:\n _y = self._y.reshape((-1, 1))\n classes_ = [self.classes_]\n\n if self.outlier_label is None:\n outlier_label_ = None\n\n elif self.outlier_label == 'most_frequent':\n outlier_label_ = []\n # iterate over multi-output, get the most frequent label for each\n # output.\n for k, classes_k in enumerate(classes_):\n label_count = np.bincount(_y[:, k])\n outlier_label_.append(classes_k[label_count.argmax()])\n\n else:\n if (_is_arraylike(self.outlier_label) and\n not isinstance(self.outlier_label, str)):\n if len(self.outlier_label) != len(classes_):\n raise ValueError(\"The length of outlier_label: {} is \"\n \"inconsistent with the output \"\n \"length: {}\".format(self.outlier_label,\n len(classes_)))\n outlier_label_ = self.outlier_label\n else:\n outlier_label_ = [self.outlier_label] * len(classes_)\n\n for classes, label in zip(classes_, outlier_label_):\n if (_is_arraylike(label) and\n not isinstance(label, str)):\n # ensure the outlier lable for each output is a scalar.\n raise TypeError(\"The outlier_label of classes {} is \"\n \"supposed to be a scalar, got \"\n \"{}.\".format(classes, label))\n if np.append(classes, label).dtype != classes.dtype:\n # ensure the dtype of outlier label is consistent with y.\n raise TypeError(\"The dtype of outlier_label {} is \"\n \"inconsistent with classes {} in \"\n \"y.\".format(label, classes))\n\n self.outlier_label_ = outlier_label_\n\n return self\n\n def predict(self, X):\n \"\"\"Predict the class labels for the provided data.\n\n Parameters\n ----------\n X : array-like of shape (n_queries, n_features), \\\n or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\n Returns\n -------\n y : ndarray of shape (n_queries,) or (n_queries, n_outputs)\n Class labels for each data sample.\n \"\"\"\n\n probs = self.predict_proba(X)\n classes_ = self.classes_\n\n if not self.outputs_2d_:\n probs = [probs]\n classes_ = [self.classes_]\n\n n_outputs = len(classes_)\n n_queries = probs[0].shape[0]\n y_pred = np.empty((n_queries, n_outputs), dtype=classes_[0].dtype)\n\n for k, prob in enumerate(probs):\n # iterate over multi-output, assign labels based on probabilities\n # of each output.\n max_prob_index = prob.argmax(axis=1)\n y_pred[:, k] = classes_[k].take(max_prob_index)\n\n outlier_zero_probs = (prob == 0).all(axis=1)\n if outlier_zero_probs.any():\n zero_prob_index = np.flatnonzero(outlier_zero_probs)\n y_pred[zero_prob_index, k] = self.outlier_label_[k]\n\n if not self.outputs_2d_:\n y_pred = y_pred.ravel()\n\n return y_pred\n\n def predict_proba(self, X):\n \"\"\"Return probability estimates for the test data X.\n\n Parameters\n ----------\n X : array-like of shape (n_queries, n_features), \\\n or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\n Returns\n -------\n p : ndarray of shape (n_queries, n_classes), or a list of n_outputs\n of such arrays if n_outputs > 1.\n The class probabilities of the input samples. Classes are ordered\n by lexicographic order.\n \"\"\"\n\n X = check_array(X, accept_sparse='csr')\n n_queries = _num_samples(X)\n\n neigh_dist, neigh_ind = self.radius_neighbors(X)\n outlier_mask = np.zeros(n_queries, dtype=bool)\n outlier_mask[:] = [len(nind) == 0 for nind in neigh_ind]\n outliers = np.flatnonzero(outlier_mask)\n inliers = np.flatnonzero(~outlier_mask)\n\n classes_ = self.classes_\n _y = self._y\n if not self.outputs_2d_:\n _y = self._y.reshape((-1, 1))\n classes_ = [self.classes_]\n\n if self.outlier_label_ is None and outliers.size > 0:\n raise ValueError('No neighbors found for test samples %r, '\n 'you can try using larger radius, '\n 'giving a label for outliers, '\n 'or considering removing them from your dataset.'\n % outliers)\n\n weights = _get_weights(neigh_dist, self.weights)\n if weights is not None:\n weights = weights[inliers]\n\n probabilities = []\n # iterate over multi-output, measure probabilities of the k-th output.\n for k, classes_k in enumerate(classes_):\n pred_labels = np.zeros(len(neigh_ind), dtype=object)\n pred_labels[:] = [_y[ind, k] for ind in neigh_ind]\n\n proba_k = np.zeros((n_queries, classes_k.size))\n proba_inl = np.zeros((len(inliers), classes_k.size))\n\n # samples have different size of neighbors within the same radius\n if weights is None:\n for i, idx in enumerate(pred_labels[inliers]):\n proba_inl[i, :] = np.bincount(idx,\n minlength=classes_k.size)\n else:\n for i, idx in enumerate(pred_labels[inliers]):\n proba_inl[i, :] = np.bincount(idx,\n weights[i],\n minlength=classes_k.size)\n proba_k[inliers, :] = proba_inl\n\n if outliers.size > 0:\n _outlier_label = self.outlier_label_[k]\n label_index = np.flatnonzero(classes_k == _outlier_label)\n if label_index.size == 1:\n proba_k[outliers, label_index[0]] = 1.0\n else:\n warnings.warn('Outlier label {} is not in training '\n 'classes. All class probabilities of '\n 'outliers will be assigned with 0.'\n ''.format(self.outlier_label_[k]))\n\n # normalize 'votes' into real [0,1] probabilities\n normalizer = proba_k.sum(axis=1)[:, np.newaxis]\n normalizer[normalizer == 0.0] = 1.0\n proba_k /= normalizer\n\n probabilities.append(proba_k)\n\n if not self.outputs_2d_:\n probabilities = probabilities[0]\n\n return probabilities", + "instance_attributes": [ + { + "name": "weights", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer", + "name": "KNeighborsTransformer", + "qname": "sklearn.neighbors._graph.KNeighborsTransformer", + "decorators": [], + "superclasses": ["KNeighborsMixin", "TransformerMixin", "NeighborsBase"], + "methods": [ + "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/__init__", + "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/fit", + "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/transform", + "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/fit_transform", + "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Transform X into a (weighted) graph of k nearest neighbors\n\nThe transformed data is a sparse graph as returned by kneighbors_graph.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22", + "docstring": "Transform X into a (weighted) graph of k nearest neighbors\n\nThe transformed data is a sparse graph as returned by kneighbors_graph.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nmode : {'distance', 'connectivity'}, default='distance'\n Type of returned matrix: 'connectivity' will return the connectivity\n matrix with ones and zeros, and 'distance' will return the distances\n between neighbors according to the given metric.\n\nn_neighbors : int, default=5\n Number of neighbors for each sample in the transformed sparse graph.\n For compatibility reasons, as each sample is considered as its own\n neighbor, one extra neighbor will be computed when mode == 'distance'.\n In this case, the sparse graph contains (n_neighbors + 1) neighbors.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\nmetric : str or callable, default='minkowski'\n metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string.\n\n Distance matrices are not supported.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\np : int, default=2\n Parameter for the Minkowski metric from\n sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nn_jobs : int, default=1\n The number of parallel jobs to run for neighbors search.\n If ``-1``, then the number of jobs is set to the number of CPU cores.\n\nAttributes\n----------\neffective_metric_ : str or callable\n The distance metric used. It will be same as the `metric` parameter\n or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n 'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n Additional keyword arguments for the metric function. For most metrics\n will be same with `metric_params` parameter, but may also contain the\n `p` parameter value if the `effective_metric_` attribute is set to\n 'minkowski'.\n\nn_samples_fit_ : int\n Number of samples in the fitted data.\n\nExamples\n--------\n>>> from sklearn.manifold import Isomap\n>>> from sklearn.neighbors import KNeighborsTransformer\n>>> from sklearn.pipeline import make_pipeline\n>>> estimator = make_pipeline(\n... KNeighborsTransformer(n_neighbors=5, mode='distance'),\n... Isomap(neighbors_algorithm='precomputed'))", + "code": "class KNeighborsTransformer(KNeighborsMixin,\n TransformerMixin,\n NeighborsBase):\n \"\"\"Transform X into a (weighted) graph of k nearest neighbors\n\n The transformed data is a sparse graph as returned by kneighbors_graph.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.22\n\n Parameters\n ----------\n mode : {'distance', 'connectivity'}, default='distance'\n Type of returned matrix: 'connectivity' will return the connectivity\n matrix with ones and zeros, and 'distance' will return the distances\n between neighbors according to the given metric.\n\n n_neighbors : int, default=5\n Number of neighbors for each sample in the transformed sparse graph.\n For compatibility reasons, as each sample is considered as its own\n neighbor, one extra neighbor will be computed when mode == 'distance'.\n In this case, the sparse graph contains (n_neighbors + 1) neighbors.\n\n algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\n leaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\n metric : str or callable, default='minkowski'\n metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string.\n\n Distance matrices are not supported.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\n p : int, default=2\n Parameter for the Minkowski metric from\n sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n metric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\n n_jobs : int, default=1\n The number of parallel jobs to run for neighbors search.\n If ``-1``, then the number of jobs is set to the number of CPU cores.\n\n Attributes\n ----------\n effective_metric_ : str or callable\n The distance metric used. It will be same as the `metric` parameter\n or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n 'minkowski' and `p` parameter set to 2.\n\n effective_metric_params_ : dict\n Additional keyword arguments for the metric function. For most metrics\n will be same with `metric_params` parameter, but may also contain the\n `p` parameter value if the `effective_metric_` attribute is set to\n 'minkowski'.\n\n n_samples_fit_ : int\n Number of samples in the fitted data.\n\n Examples\n --------\n >>> from sklearn.manifold import Isomap\n >>> from sklearn.neighbors import KNeighborsTransformer\n >>> from sklearn.pipeline import make_pipeline\n >>> estimator = make_pipeline(\n ... KNeighborsTransformer(n_neighbors=5, mode='distance'),\n ... Isomap(neighbors_algorithm='precomputed'))\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, mode='distance', n_neighbors=5, algorithm='auto',\n leaf_size=30, metric='minkowski', p=2, metric_params=None,\n n_jobs=1):\n super(KNeighborsTransformer, self).__init__(\n n_neighbors=n_neighbors, radius=None, algorithm=algorithm,\n leaf_size=leaf_size, metric=metric, p=p,\n metric_params=metric_params, n_jobs=n_jobs)\n self.mode = mode\n\n def fit(self, X, y=None):\n \"\"\"Fit the k-nearest neighbors transformer from the training dataset.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n (n_samples, n_samples) if metric='precomputed'\n Training data.\n\n Returns\n -------\n self : KNeighborsTransformer\n The fitted k-nearest neighbors transformer.\n \"\"\"\n return self._fit(X)\n\n def transform(self, X):\n \"\"\"Computes the (weighted) graph of Neighbors for points in X\n\n Parameters\n ----------\n X : array-like of shape (n_samples_transform, n_features)\n Sample data.\n\n Returns\n -------\n Xt : sparse matrix of shape (n_samples_transform, n_samples_fit)\n Xt[i, j] is assigned the weight of edge that connects i to j.\n Only the neighbors have an explicit value.\n The diagonal is always explicit.\n The matrix is of CSR format.\n \"\"\"\n check_is_fitted(self)\n add_one = self.mode == 'distance'\n return self.kneighbors_graph(X, mode=self.mode,\n n_neighbors=self.n_neighbors + add_one)\n\n def fit_transform(self, X, y=None):\n \"\"\"Fit to data, then transform it.\n\n Fits transformer to X and y with optional parameters fit_params\n and returns a transformed version of X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training set.\n\n y : ignored\n\n Returns\n -------\n Xt : sparse matrix of shape (n_samples, n_samples)\n Xt[i, j] is assigned the weight of edge that connects i to j.\n Only the neighbors have an explicit value.\n The diagonal is always explicit.\n The matrix is of CSR format.\n \"\"\"\n return self.fit(X).transform(X)\n\n def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_methods_sample_order_invariance':\n 'check is not applicable.'\n }\n }", + "instance_attributes": [ + { + "name": "mode", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer", + "name": "RadiusNeighborsTransformer", + "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer", + "decorators": [], + "superclasses": ["RadiusNeighborsMixin", "TransformerMixin", "NeighborsBase"], + "methods": [ + "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/__init__", + "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/fit", + "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/transform", + "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/fit_transform", + "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Transform X into a (weighted) graph of neighbors nearer than a radius\n\nThe transformed data is a sparse graph as returned by\nradius_neighbors_graph.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22", + "docstring": "Transform X into a (weighted) graph of neighbors nearer than a radius\n\nThe transformed data is a sparse graph as returned by\nradius_neighbors_graph.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nmode : {'distance', 'connectivity'}, default='distance'\n Type of returned matrix: 'connectivity' will return the connectivity\n matrix with ones and zeros, and 'distance' will return the distances\n between neighbors according to the given metric.\n\nradius : float, default=1.\n Radius of neighborhood in the transformed sparse graph.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\nmetric : str or callable, default='minkowski'\n metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string.\n\n Distance matrices are not supported.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\np : int, default=2\n Parameter for the Minkowski metric from\n sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nn_jobs : int, default=1\n The number of parallel jobs to run for neighbors search.\n If ``-1``, then the number of jobs is set to the number of CPU cores.\n\nAttributes\n----------\neffective_metric_ : str or callable\n The distance metric used. It will be same as the `metric` parameter\n or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n 'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n Additional keyword arguments for the metric function. For most metrics\n will be same with `metric_params` parameter, but may also contain the\n `p` parameter value if the `effective_metric_` attribute is set to\n 'minkowski'.\n\nn_samples_fit_ : int\n Number of samples in the fitted data.\n\nExamples\n--------\n>>> from sklearn.cluster import DBSCAN\n>>> from sklearn.neighbors import RadiusNeighborsTransformer\n>>> from sklearn.pipeline import make_pipeline\n>>> estimator = make_pipeline(\n... RadiusNeighborsTransformer(radius=42.0, mode='distance'),\n... DBSCAN(min_samples=30, metric='precomputed'))", + "code": "class RadiusNeighborsTransformer(RadiusNeighborsMixin,\n TransformerMixin,\n NeighborsBase):\n \"\"\"Transform X into a (weighted) graph of neighbors nearer than a radius\n\n The transformed data is a sparse graph as returned by\n radius_neighbors_graph.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.22\n\n Parameters\n ----------\n mode : {'distance', 'connectivity'}, default='distance'\n Type of returned matrix: 'connectivity' will return the connectivity\n matrix with ones and zeros, and 'distance' will return the distances\n between neighbors according to the given metric.\n\n radius : float, default=1.\n Radius of neighborhood in the transformed sparse graph.\n\n algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\n leaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\n metric : str or callable, default='minkowski'\n metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string.\n\n Distance matrices are not supported.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\n p : int, default=2\n Parameter for the Minkowski metric from\n sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n metric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\n n_jobs : int, default=1\n The number of parallel jobs to run for neighbors search.\n If ``-1``, then the number of jobs is set to the number of CPU cores.\n\n Attributes\n ----------\n effective_metric_ : str or callable\n The distance metric used. It will be same as the `metric` parameter\n or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n 'minkowski' and `p` parameter set to 2.\n\n effective_metric_params_ : dict\n Additional keyword arguments for the metric function. For most metrics\n will be same with `metric_params` parameter, but may also contain the\n `p` parameter value if the `effective_metric_` attribute is set to\n 'minkowski'.\n\n n_samples_fit_ : int\n Number of samples in the fitted data.\n\n Examples\n --------\n >>> from sklearn.cluster import DBSCAN\n >>> from sklearn.neighbors import RadiusNeighborsTransformer\n >>> from sklearn.pipeline import make_pipeline\n >>> estimator = make_pipeline(\n ... RadiusNeighborsTransformer(radius=42.0, mode='distance'),\n ... DBSCAN(min_samples=30, metric='precomputed'))\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, mode='distance', radius=1., algorithm='auto',\n leaf_size=30, metric='minkowski', p=2, metric_params=None,\n n_jobs=1):\n super(RadiusNeighborsTransformer, self).__init__(\n n_neighbors=None, radius=radius, algorithm=algorithm,\n leaf_size=leaf_size, metric=metric, p=p,\n metric_params=metric_params, n_jobs=n_jobs)\n self.mode = mode\n\n def fit(self, X, y=None):\n \"\"\"Fit the radius neighbors transformer from the training dataset.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n (n_samples, n_samples) if metric='precomputed'\n Training data.\n\n Returns\n -------\n self : RadiusNeighborsTransformer\n The fitted radius neighbors transformer.\n \"\"\"\n return self._fit(X)\n\n def transform(self, X):\n \"\"\"Computes the (weighted) graph of Neighbors for points in X\n\n Parameters\n ----------\n X : array-like of shape (n_samples_transform, n_features)\n Sample data\n\n Returns\n -------\n Xt : sparse matrix of shape (n_samples_transform, n_samples_fit)\n Xt[i, j] is assigned the weight of edge that connects i to j.\n Only the neighbors have an explicit value.\n The diagonal is always explicit.\n The matrix is of CSR format.\n \"\"\"\n check_is_fitted(self)\n return self.radius_neighbors_graph(X, mode=self.mode,\n sort_results=True)\n\n def fit_transform(self, X, y=None):\n \"\"\"Fit to data, then transform it.\n\n Fits transformer to X and y with optional parameters fit_params\n and returns a transformed version of X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training set.\n\n y : ignored\n\n Returns\n -------\n Xt : sparse matrix of shape (n_samples, n_samples)\n Xt[i, j] is assigned the weight of edge that connects i to j.\n Only the neighbors have an explicit value.\n The diagonal is always explicit.\n The matrix is of CSR format.\n \"\"\"\n return self.fit(X).transform(X)\n\n def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_methods_sample_order_invariance':\n 'check is not applicable.'\n }\n }", + "instance_attributes": [ + { + "name": "mode", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity", + "name": "KernelDensity", + "qname": "sklearn.neighbors._kde.KernelDensity", + "decorators": [], + "superclasses": ["BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.neighbors._kde/KernelDensity/__init__", + "scikit-learn/sklearn.neighbors._kde/KernelDensity/_choose_algorithm", + "scikit-learn/sklearn.neighbors._kde/KernelDensity/fit", + "scikit-learn/sklearn.neighbors._kde/KernelDensity/score_samples", + "scikit-learn/sklearn.neighbors._kde/KernelDensity/score", + "scikit-learn/sklearn.neighbors._kde/KernelDensity/sample", + "scikit-learn/sklearn.neighbors._kde/KernelDensity/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Kernel Density Estimation.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Kernel Density Estimation.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nbandwidth : float, default=1.0\n The bandwidth of the kernel.\n\nalgorithm : {'kd_tree', 'ball_tree', 'auto'}, default='auto'\n The tree algorithm to use.\n\nkernel : {'gaussian', 'tophat', 'epanechnikov', 'exponential', 'linear', 'cosine'}, default='gaussian'\n The kernel to use.\n\nmetric : str, default='euclidean'\n The distance metric to use. Note that not all metrics are\n valid with all algorithms. Refer to the documentation of\n :class:`BallTree` and :class:`KDTree` for a description of\n available algorithms. Note that the normalization of the density\n output is correct only for the Euclidean distance metric. Default\n is 'euclidean'.\n\natol : float, default=0\n The desired absolute tolerance of the result. A larger tolerance will\n generally lead to faster execution.\n\nrtol : float, default=0\n The desired relative tolerance of the result. A larger tolerance will\n generally lead to faster execution.\n\nbreadth_first : bool, default=True\n If true (default), use a breadth-first approach to the problem.\n Otherwise use a depth-first approach.\n\nleaf_size : int, default=40\n Specify the leaf size of the underlying tree. See :class:`BallTree`\n or :class:`KDTree` for details.\n\nmetric_params : dict, default=None\n Additional parameters to be passed to the tree for use with the\n metric. For more information, see the documentation of\n :class:`BallTree` or :class:`KDTree`.\n\nAttributes\n----------\ntree_ : ``BinaryTree`` instance\n The tree algorithm for fast generalized N-point problems.\n\nSee Also\n--------\nsklearn.neighbors.KDTree : K-dimensional tree for fast generalized N-point\n problems.\nsklearn.neighbors.BallTree : Ball tree for fast generalized N-point\n problems.\n\nExamples\n--------\nCompute a gaussian kernel density estimate with a fixed bandwidth.\n\n>>> import numpy as np\n>>> rng = np.random.RandomState(42)\n>>> X = rng.random_sample((100, 3))\n>>> kde = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(X)\n>>> log_density = kde.score_samples(X[:3])\n>>> log_density\narray([-1.52955942, -1.51462041, -1.60244657])", + "code": "class KernelDensity(BaseEstimator):\n \"\"\"Kernel Density Estimation.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n bandwidth : float, default=1.0\n The bandwidth of the kernel.\n\n algorithm : {'kd_tree', 'ball_tree', 'auto'}, default='auto'\n The tree algorithm to use.\n\n kernel : {'gaussian', 'tophat', 'epanechnikov', 'exponential', 'linear', \\\n 'cosine'}, default='gaussian'\n The kernel to use.\n\n metric : str, default='euclidean'\n The distance metric to use. Note that not all metrics are\n valid with all algorithms. Refer to the documentation of\n :class:`BallTree` and :class:`KDTree` for a description of\n available algorithms. Note that the normalization of the density\n output is correct only for the Euclidean distance metric. Default\n is 'euclidean'.\n\n atol : float, default=0\n The desired absolute tolerance of the result. A larger tolerance will\n generally lead to faster execution.\n\n rtol : float, default=0\n The desired relative tolerance of the result. A larger tolerance will\n generally lead to faster execution.\n\n breadth_first : bool, default=True\n If true (default), use a breadth-first approach to the problem.\n Otherwise use a depth-first approach.\n\n leaf_size : int, default=40\n Specify the leaf size of the underlying tree. See :class:`BallTree`\n or :class:`KDTree` for details.\n\n metric_params : dict, default=None\n Additional parameters to be passed to the tree for use with the\n metric. For more information, see the documentation of\n :class:`BallTree` or :class:`KDTree`.\n\n Attributes\n ----------\n tree_ : ``BinaryTree`` instance\n The tree algorithm for fast generalized N-point problems.\n\n See Also\n --------\n sklearn.neighbors.KDTree : K-dimensional tree for fast generalized N-point\n problems.\n sklearn.neighbors.BallTree : Ball tree for fast generalized N-point\n problems.\n\n Examples\n --------\n Compute a gaussian kernel density estimate with a fixed bandwidth.\n\n >>> import numpy as np\n >>> rng = np.random.RandomState(42)\n >>> X = rng.random_sample((100, 3))\n >>> kde = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(X)\n >>> log_density = kde.score_samples(X[:3])\n >>> log_density\n array([-1.52955942, -1.51462041, -1.60244657])\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, bandwidth=1.0, algorithm='auto',\n kernel='gaussian', metric=\"euclidean\", atol=0, rtol=0,\n breadth_first=True, leaf_size=40, metric_params=None):\n self.algorithm = algorithm\n self.bandwidth = bandwidth\n self.kernel = kernel\n self.metric = metric\n self.atol = atol\n self.rtol = rtol\n self.breadth_first = breadth_first\n self.leaf_size = leaf_size\n self.metric_params = metric_params\n\n # run the choose algorithm code so that exceptions will happen here\n # we're using clone() in the GenerativeBayes classifier,\n # so we can't do this kind of logic in __init__\n self._choose_algorithm(self.algorithm, self.metric)\n\n if bandwidth <= 0:\n raise ValueError(\"bandwidth must be positive\")\n if kernel not in VALID_KERNELS:\n raise ValueError(\"invalid kernel: '{0}'\".format(kernel))\n\n def _choose_algorithm(self, algorithm, metric):\n # given the algorithm string + metric string, choose the optimal\n # algorithm to compute the result.\n if algorithm == 'auto':\n # use KD Tree if possible\n if metric in KDTree.valid_metrics:\n return 'kd_tree'\n elif metric in BallTree.valid_metrics:\n return 'ball_tree'\n else:\n raise ValueError(\"invalid metric: '{0}'\".format(metric))\n elif algorithm in TREE_DICT:\n if metric not in TREE_DICT[algorithm].valid_metrics:\n raise ValueError(\"invalid metric for {0}: \"\n \"'{1}'\".format(TREE_DICT[algorithm],\n metric))\n return algorithm\n else:\n raise ValueError(\"invalid algorithm: '{0}'\".format(algorithm))\n\n def fit(self, X, y=None, sample_weight=None):\n \"\"\"Fit the Kernel Density model on the data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\n y : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\n sample_weight : array-like of shape (n_samples,), default=None\n List of sample weights attached to the data X.\n\n .. versionadded:: 0.20\n\n Returns\n -------\n self : object\n Returns instance of object.\n \"\"\"\n algorithm = self._choose_algorithm(self.algorithm, self.metric)\n X = self._validate_data(X, order='C', dtype=DTYPE)\n\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X, DTYPE)\n if sample_weight.min() <= 0:\n raise ValueError(\"sample_weight must have positive values\")\n\n kwargs = self.metric_params\n if kwargs is None:\n kwargs = {}\n self.tree_ = TREE_DICT[algorithm](X, metric=self.metric,\n leaf_size=self.leaf_size,\n sample_weight=sample_weight,\n **kwargs)\n return self\n\n def score_samples(self, X):\n \"\"\"Evaluate the log density model on the data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n An array of points to query. Last dimension should match dimension\n of training data (n_features).\n\n Returns\n -------\n density : ndarray of shape (n_samples,)\n The array of log(density) evaluations. These are normalized to be\n probability densities, so values will be low for high-dimensional\n data.\n \"\"\"\n check_is_fitted(self)\n # The returned density is normalized to the number of points.\n # For it to be a probability, we must scale it. For this reason\n # we'll also scale atol.\n X = check_array(X, order='C', dtype=DTYPE)\n if self.tree_.sample_weight is None:\n N = self.tree_.data.shape[0]\n else:\n N = self.tree_.sum_weight\n atol_N = self.atol * N\n log_density = self.tree_.kernel_density(\n X, h=self.bandwidth, kernel=self.kernel, atol=atol_N,\n rtol=self.rtol, breadth_first=self.breadth_first, return_log=True)\n log_density -= np.log(N)\n return log_density\n\n def score(self, X, y=None):\n \"\"\"Compute the total log probability density under the model.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\n y : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\n Returns\n -------\n logprob : float\n Total log-likelihood of the data in X. This is normalized to be a\n probability density, so the value will be low for high-dimensional\n data.\n \"\"\"\n return np.sum(self.score_samples(X))\n\n def sample(self, n_samples=1, random_state=None):\n \"\"\"Generate random samples from the model.\n\n Currently, this is implemented only for gaussian and tophat kernels.\n\n Parameters\n ----------\n n_samples : int, default=1\n Number of samples to generate.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation used to generate\n random samples. Pass an int for reproducible results\n across multiple function calls.\n See :term: `Glossary `.\n\n Returns\n -------\n X : array-like of shape (n_samples, n_features)\n List of samples.\n \"\"\"\n check_is_fitted(self)\n # TODO: implement sampling for other valid kernel shapes\n if self.kernel not in ['gaussian', 'tophat']:\n raise NotImplementedError()\n\n data = np.asarray(self.tree_.data)\n\n rng = check_random_state(random_state)\n u = rng.uniform(0, 1, size=n_samples)\n if self.tree_.sample_weight is None:\n i = (u * data.shape[0]).astype(np.int64)\n else:\n cumsum_weight = np.cumsum(np.asarray(self.tree_.sample_weight))\n sum_weight = cumsum_weight[-1]\n i = np.searchsorted(cumsum_weight, u * sum_weight)\n if self.kernel == 'gaussian':\n return np.atleast_2d(rng.normal(data[i], self.bandwidth))\n\n elif self.kernel == 'tophat':\n # we first draw points from a d-dimensional normal distribution,\n # then use an incomplete gamma function to map them to a uniform\n # d-dimensional tophat distribution.\n dim = data.shape[1]\n X = rng.normal(size=(n_samples, dim))\n s_sq = row_norms(X, squared=True)\n correction = (gammainc(0.5 * dim, 0.5 * s_sq) ** (1. / dim)\n * self.bandwidth / np.sqrt(s_sq))\n return data[i] + X * correction[:, np.newaxis]\n\n def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'sample_weight must have positive values',\n }\n }", + "instance_attributes": [ + { + "name": "algorithm", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "bandwidth", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "kernel", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "metric", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "atol", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "rtol", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "breadth_first", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "leaf_size", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor", + "name": "LocalOutlierFactor", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor", + "decorators": [], + "superclasses": ["KNeighborsMixin", "OutlierMixin", "NeighborsBase"], + "methods": [ + "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/__init__", + "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/fit_predict@getter", + "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/_fit_predict", + "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/fit", + "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/predict@getter", + "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/_predict", + "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/decision_function@getter", + "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/_decision_function", + "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/score_samples@getter", + "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/_score_samples", + "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/_local_reachability_density" + ], + "is_public": false, + "reexported_by": [], + "description": "Unsupervised Outlier Detection using Local Outlier Factor (LOF)\n\nThe anomaly score of each sample is called Local Outlier Factor.\nIt measures the local deviation of density of a given sample with\nrespect to its neighbors.\nIt is local in that the anomaly score depends on how isolated the object\nis with respect to the surrounding neighborhood.\nMore precisely, locality is given by k-nearest neighbors, whose distance\nis used to estimate the local density.\nBy comparing the local density of a sample to the local densities of\nits neighbors, one can identify samples that have a substantially lower\ndensity than their neighbors. These are considered outliers.\n\n.. versionadded:: 0.19", + "docstring": "Unsupervised Outlier Detection using Local Outlier Factor (LOF)\n\nThe anomaly score of each sample is called Local Outlier Factor.\nIt measures the local deviation of density of a given sample with\nrespect to its neighbors.\nIt is local in that the anomaly score depends on how isolated the object\nis with respect to the surrounding neighborhood.\nMore precisely, locality is given by k-nearest neighbors, whose distance\nis used to estimate the local density.\nBy comparing the local density of a sample to the local densities of\nits neighbors, one can identify samples that have a substantially lower\ndensity than their neighbors. These are considered outliers.\n\n.. versionadded:: 0.19\n\nParameters\n----------\nn_neighbors : int, default=20\n Number of neighbors to use by default for :meth:`kneighbors` queries.\n If n_neighbors is larger than the number of samples provided,\n all samples will be used.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can\n affect the speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\nmetric : str or callable, default='minkowski'\n metric used for the distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square. X may be a sparse matrix, in which case only \"nonzero\"\n elements may be considered neighbors.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics:\n https://docs.scipy.org/doc/scipy/reference/spatial.distance.html\n\np : int, default=2\n Parameter for the Minkowski metric from\n :func:`sklearn.metrics.pairwise.pairwise_distances`. When p = 1, this\n is equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\ncontamination : 'auto' or float, default='auto'\n The amount of contamination of the data set, i.e. the proportion\n of outliers in the data set. When fitting this is used to define the\n threshold on the scores of the samples.\n\n - if 'auto', the threshold is determined as in the\n original paper,\n - if a float, the contamination should be in the range [0, 0.5].\n\n .. versionchanged:: 0.22\n The default value of ``contamination`` changed from 0.1\n to ``'auto'``.\n\nnovelty : bool, default=False\n By default, LocalOutlierFactor is only meant to be used for outlier\n detection (novelty=False). Set novelty to True if you want to use\n LocalOutlierFactor for novelty detection. In this case be aware that\n you should only use predict, decision_function and score_samples\n on new unseen data and not on the training set.\n\n .. versionadded:: 0.20\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nnegative_outlier_factor_ : ndarray of shape (n_samples,)\n The opposite LOF of the training samples. The higher, the more normal.\n Inliers tend to have a LOF score close to 1\n (``negative_outlier_factor_`` close to -1), while outliers tend to have\n a larger LOF score.\n\n The local outlier factor (LOF) of a sample captures its\n supposed 'degree of abnormality'.\n It is the average of the ratio of the local reachability density of\n a sample and those of its k-nearest neighbors.\n\nn_neighbors_ : int\n The actual number of neighbors used for :meth:`kneighbors` queries.\n\noffset_ : float\n Offset used to obtain binary labels from the raw scores.\n Observations having a negative_outlier_factor smaller than `offset_`\n are detected as abnormal.\n The offset is set to -1.5 (inliers score around -1), except when a\n contamination parameter different than \"auto\" is provided. In that\n case, the offset is defined in such a way we obtain the expected\n number of outliers in training.\n\n .. versionadded:: 0.20\n\neffective_metric_ : str\n The effective metric used for the distance computation.\n\neffective_metric_params_ : dict\n The effective additional keyword arguments for the metric function.\n\nn_samples_fit_ : int\n It is the number of samples in the fitted data.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.neighbors import LocalOutlierFactor\n>>> X = [[-1.1], [0.2], [101.1], [0.3]]\n>>> clf = LocalOutlierFactor(n_neighbors=2)\n>>> clf.fit_predict(X)\narray([ 1, 1, -1, 1])\n>>> clf.negative_outlier_factor_\narray([ -0.9821..., -1.0370..., -73.3697..., -0.9821...])\n\nReferences\n----------\n.. [1] Breunig, M. M., Kriegel, H. P., Ng, R. T., & Sander, J. (2000, May).\n LOF: identifying density-based local outliers. In ACM sigmod record.", + "code": "class LocalOutlierFactor(KNeighborsMixin,\n OutlierMixin,\n NeighborsBase):\n \"\"\"Unsupervised Outlier Detection using Local Outlier Factor (LOF)\n\n The anomaly score of each sample is called Local Outlier Factor.\n It measures the local deviation of density of a given sample with\n respect to its neighbors.\n It is local in that the anomaly score depends on how isolated the object\n is with respect to the surrounding neighborhood.\n More precisely, locality is given by k-nearest neighbors, whose distance\n is used to estimate the local density.\n By comparing the local density of a sample to the local densities of\n its neighbors, one can identify samples that have a substantially lower\n density than their neighbors. These are considered outliers.\n\n .. versionadded:: 0.19\n\n Parameters\n ----------\n n_neighbors : int, default=20\n Number of neighbors to use by default for :meth:`kneighbors` queries.\n If n_neighbors is larger than the number of samples provided,\n all samples will be used.\n\n algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\n leaf_size : int, default=30\n Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can\n affect the speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\n metric : str or callable, default='minkowski'\n metric used for the distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square. X may be a sparse matrix, in which case only \"nonzero\"\n elements may be considered neighbors.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics:\n https://docs.scipy.org/doc/scipy/reference/spatial.distance.html\n\n p : int, default=2\n Parameter for the Minkowski metric from\n :func:`sklearn.metrics.pairwise.pairwise_distances`. When p = 1, this\n is equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n metric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\n contamination : 'auto' or float, default='auto'\n The amount of contamination of the data set, i.e. the proportion\n of outliers in the data set. When fitting this is used to define the\n threshold on the scores of the samples.\n\n - if 'auto', the threshold is determined as in the\n original paper,\n - if a float, the contamination should be in the range [0, 0.5].\n\n .. versionchanged:: 0.22\n The default value of ``contamination`` changed from 0.1\n to ``'auto'``.\n\n novelty : bool, default=False\n By default, LocalOutlierFactor is only meant to be used for outlier\n detection (novelty=False). Set novelty to True if you want to use\n LocalOutlierFactor for novelty detection. In this case be aware that\n you should only use predict, decision_function and score_samples\n on new unseen data and not on the training set.\n\n .. versionadded:: 0.20\n\n n_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n Attributes\n ----------\n negative_outlier_factor_ : ndarray of shape (n_samples,)\n The opposite LOF of the training samples. The higher, the more normal.\n Inliers tend to have a LOF score close to 1\n (``negative_outlier_factor_`` close to -1), while outliers tend to have\n a larger LOF score.\n\n The local outlier factor (LOF) of a sample captures its\n supposed 'degree of abnormality'.\n It is the average of the ratio of the local reachability density of\n a sample and those of its k-nearest neighbors.\n\n n_neighbors_ : int\n The actual number of neighbors used for :meth:`kneighbors` queries.\n\n offset_ : float\n Offset used to obtain binary labels from the raw scores.\n Observations having a negative_outlier_factor smaller than `offset_`\n are detected as abnormal.\n The offset is set to -1.5 (inliers score around -1), except when a\n contamination parameter different than \"auto\" is provided. In that\n case, the offset is defined in such a way we obtain the expected\n number of outliers in training.\n\n .. versionadded:: 0.20\n\n effective_metric_ : str\n The effective metric used for the distance computation.\n\n effective_metric_params_ : dict\n The effective additional keyword arguments for the metric function.\n\n n_samples_fit_ : int\n It is the number of samples in the fitted data.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.neighbors import LocalOutlierFactor\n >>> X = [[-1.1], [0.2], [101.1], [0.3]]\n >>> clf = LocalOutlierFactor(n_neighbors=2)\n >>> clf.fit_predict(X)\n array([ 1, 1, -1, 1])\n >>> clf.negative_outlier_factor_\n array([ -0.9821..., -1.0370..., -73.3697..., -0.9821...])\n\n References\n ----------\n .. [1] Breunig, M. M., Kriegel, H. P., Ng, R. T., & Sander, J. (2000, May).\n LOF: identifying density-based local outliers. In ACM sigmod record.\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_neighbors=20, *, algorithm='auto', leaf_size=30,\n metric='minkowski', p=2, metric_params=None,\n contamination=\"auto\", novelty=False, n_jobs=None):\n super().__init__(\n n_neighbors=n_neighbors,\n algorithm=algorithm,\n leaf_size=leaf_size, metric=metric, p=p,\n metric_params=metric_params, n_jobs=n_jobs)\n self.contamination = contamination\n self.novelty = novelty\n\n @property\n def fit_predict(self):\n \"\"\"Fits the model to the training set X and returns the labels.\n\n **Not available for novelty detection (when novelty is set to True).**\n Label is 1 for an inlier and -1 for an outlier according to the LOF\n score and the contamination parameter.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features), default=None\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. to the training samples.\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n is_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and 1 for inliers.\n \"\"\"\n\n # As fit_predict would be different from fit.predict, fit_predict is\n # only available for outlier detection (novelty=False)\n\n if self.novelty:\n msg = ('fit_predict is not available when novelty=True. Use '\n 'novelty=False if you want to predict on the training set.')\n raise AttributeError(msg)\n\n return self._fit_predict\n\n def _fit_predict(self, X, y=None):\n \"\"\"Fits the model to the training set X and returns the labels.\n\n Label is 1 for an inlier and -1 for an outlier according to the LOF\n score and the contamination parameter.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features), default=None\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. to the training samples.\n\n Returns\n -------\n is_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and 1 for inliers.\n \"\"\"\n\n # As fit_predict would be different from fit.predict, fit_predict is\n # only available for outlier detection (novelty=False)\n\n return self.fit(X)._predict()\n\n def fit(self, X, y=None):\n \"\"\"Fit the local outlier factor detector from the training dataset.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n (n_samples, n_samples) if metric='precomputed'\n Training data.\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n self : LocalOutlierFactor\n The fitted local outlier factor detector.\n \"\"\"\n self._fit(X)\n\n if self.contamination != 'auto':\n if not(0. < self.contamination <= .5):\n raise ValueError(\"contamination must be in (0, 0.5], \"\n \"got: %f\" % self.contamination)\n\n n_samples = self.n_samples_fit_\n if self.n_neighbors > n_samples:\n warnings.warn(\"n_neighbors (%s) is greater than the \"\n \"total number of samples (%s). n_neighbors \"\n \"will be set to (n_samples - 1) for estimation.\"\n % (self.n_neighbors, n_samples))\n self.n_neighbors_ = max(1, min(self.n_neighbors, n_samples - 1))\n\n self._distances_fit_X_, _neighbors_indices_fit_X_ = self.kneighbors(\n n_neighbors=self.n_neighbors_)\n\n self._lrd = self._local_reachability_density(\n self._distances_fit_X_, _neighbors_indices_fit_X_)\n\n # Compute lof score over training samples to define offset_:\n lrd_ratios_array = (self._lrd[_neighbors_indices_fit_X_] /\n self._lrd[:, np.newaxis])\n\n self.negative_outlier_factor_ = -np.mean(lrd_ratios_array, axis=1)\n\n if self.contamination == \"auto\":\n # inliers score around -1 (the higher, the less abnormal).\n self.offset_ = -1.5\n else:\n self.offset_ = np.percentile(self.negative_outlier_factor_,\n 100. * self.contamination)\n\n return self\n\n @property\n def predict(self):\n \"\"\"Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\n **Only available for novelty detection (when novelty is set to True).**\n This method allows to generalize prediction to *new observations* (not\n in the training set).\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. to the training samples.\n\n Returns\n -------\n is_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and +1 for inliers.\n \"\"\"\n if not self.novelty:\n msg = ('predict is not available when novelty=False, use '\n 'fit_predict if you want to predict on training data. Use '\n 'novelty=True if you want to use LOF for novelty detection '\n 'and predict on new unseen data.')\n raise AttributeError(msg)\n\n return self._predict\n\n def _predict(self, X=None):\n \"\"\"Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\n If X is None, returns the same as fit_predict(X_train).\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features), default=None\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. to the training samples. If None, makes prediction on the\n training data without considering them as their own neighbors.\n\n Returns\n -------\n is_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and +1 for inliers.\n \"\"\"\n check_is_fitted(self)\n\n if X is not None:\n X = check_array(X, accept_sparse='csr')\n is_inlier = np.ones(X.shape[0], dtype=int)\n is_inlier[self.decision_function(X) < 0] = -1\n else:\n is_inlier = np.ones(self.n_samples_fit_, dtype=int)\n is_inlier[self.negative_outlier_factor_ < self.offset_] = -1\n\n return is_inlier\n\n @property\n def decision_function(self):\n \"\"\"Shifted opposite of the Local Outlier Factor of X.\n\n Bigger is better, i.e. large values correspond to inliers.\n\n **Only available for novelty detection (when novelty is set to True).**\n The shift offset allows a zero threshold for being an outlier.\n The argument X is supposed to contain *new data*: if X contains a\n point from training, it considers the later in its own neighborhood.\n Also, the samples in X are not considered in the neighborhood of any\n point.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. the training samples.\n\n Returns\n -------\n shifted_opposite_lof_scores : ndarray of shape (n_samples,)\n The shifted opposite of the Local Outlier Factor of each input\n samples. The lower, the more abnormal. Negative scores represent\n outliers, positive scores represent inliers.\n \"\"\"\n if not self.novelty:\n msg = ('decision_function is not available when novelty=False. '\n 'Use novelty=True if you want to use LOF for novelty '\n 'detection and compute decision_function for new unseen '\n 'data. Note that the opposite LOF of the training samples '\n 'is always available by considering the '\n 'negative_outlier_factor_ attribute.')\n raise AttributeError(msg)\n\n return self._decision_function\n\n def _decision_function(self, X):\n \"\"\"Shifted opposite of the Local Outlier Factor of X.\n\n Bigger is better, i.e. large values correspond to inliers.\n\n **Only available for novelty detection (when novelty is set to True).**\n The shift offset allows a zero threshold for being an outlier.\n The argument X is supposed to contain *new data*: if X contains a\n point from training, it considers the later in its own neighborhood.\n Also, the samples in X are not considered in the neighborhood of any\n point.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. the training samples.\n\n Returns\n -------\n shifted_opposite_lof_scores : ndarray of shape (n_samples,)\n The shifted opposite of the Local Outlier Factor of each input\n samples. The lower, the more abnormal. Negative scores represent\n outliers, positive scores represent inliers.\n \"\"\"\n\n return self._score_samples(X) - self.offset_\n\n @property\n def score_samples(self):\n \"\"\"Opposite of the Local Outlier Factor of X.\n\n It is the opposite as bigger is better, i.e. large values correspond\n to inliers.\n\n **Only available for novelty detection (when novelty is set to True).**\n The argument X is supposed to contain *new data*: if X contains a\n point from training, it considers the later in its own neighborhood.\n Also, the samples in X are not considered in the neighborhood of any\n point.\n The score_samples on training data is available by considering the\n the ``negative_outlier_factor_`` attribute.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. the training samples.\n\n Returns\n -------\n opposite_lof_scores : ndarray of shape (n_samples,)\n The opposite of the Local Outlier Factor of each input samples.\n The lower, the more abnormal.\n \"\"\"\n if not self.novelty:\n msg = ('score_samples is not available when novelty=False. The '\n 'scores of the training samples are always available '\n 'through the negative_outlier_factor_ attribute. Use '\n 'novelty=True if you want to use LOF for novelty detection '\n 'and compute score_samples for new unseen data.')\n raise AttributeError(msg)\n\n return self._score_samples\n\n def _score_samples(self, X):\n \"\"\"Opposite of the Local Outlier Factor of X.\n\n It is the opposite as bigger is better, i.e. large values correspond\n to inliers.\n\n **Only available for novelty detection (when novelty is set to True).**\n The argument X is supposed to contain *new data*: if X contains a\n point from training, it considers the later in its own neighborhood.\n Also, the samples in X are not considered in the neighborhood of any\n point.\n The score_samples on training data is available by considering the\n the ``negative_outlier_factor_`` attribute.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. the training samples.\n\n Returns\n -------\n opposite_lof_scores : ndarray of shape (n_samples,)\n The opposite of the Local Outlier Factor of each input samples.\n The lower, the more abnormal.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, accept_sparse='csr')\n\n distances_X, neighbors_indices_X = (\n self.kneighbors(X, n_neighbors=self.n_neighbors_))\n X_lrd = self._local_reachability_density(distances_X,\n neighbors_indices_X)\n\n lrd_ratios_array = (self._lrd[neighbors_indices_X] /\n X_lrd[:, np.newaxis])\n\n # as bigger is better:\n return -np.mean(lrd_ratios_array, axis=1)\n\n def _local_reachability_density(self, distances_X, neighbors_indices):\n \"\"\"The local reachability density (LRD)\n\n The LRD of a sample is the inverse of the average reachability\n distance of its k-nearest neighbors.\n\n Parameters\n ----------\n distances_X : ndarray of shape (n_queries, self.n_neighbors)\n Distances to the neighbors (in the training samples `self._fit_X`)\n of each query point to compute the LRD.\n\n neighbors_indices : ndarray of shape (n_queries, self.n_neighbors)\n Neighbors indices (of each query point) among training samples\n self._fit_X.\n\n Returns\n -------\n local_reachability_density : ndarray of shape (n_queries,)\n The local reachability density of each sample.\n \"\"\"\n dist_k = self._distances_fit_X_[neighbors_indices,\n self.n_neighbors_ - 1]\n reach_dist_array = np.maximum(distances_X, dist_k)\n\n # 1e-10 to avoid `nan' when nb of duplicates > n_neighbors_:\n return 1. / (np.mean(reach_dist_array, axis=1) + 1e-10)", + "instance_attributes": [ + { + "name": "contamination", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "novelty", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "offset_", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis", + "name": "NeighborhoodComponentsAnalysis", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/__init__", + "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/fit", + "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/transform", + "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_validate_params", + "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_initialize", + "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_callback", + "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_loss_grad_lbfgs", + "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Neighborhood Components Analysis\n\nNeighborhood Component Analysis (NCA) is a machine learning algorithm for\nmetric learning. It learns a linear transformation in a supervised fashion\nto improve the classification accuracy of a stochastic nearest neighbors\nrule in the transformed space.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Neighborhood Components Analysis\n\nNeighborhood Component Analysis (NCA) is a machine learning algorithm for\nmetric learning. It learns a linear transformation in a supervised fashion\nto improve the classification accuracy of a stochastic nearest neighbors\nrule in the transformed space.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=None\n Preferred dimensionality of the projected space.\n If None it will be set to ``n_features``.\n\ninit : {'auto', 'pca', 'lda', 'identity', 'random'} or ndarray of shape (n_features_a, n_features_b), default='auto'\n Initialization of the linear transformation. Possible options are\n 'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape\n (n_features_a, n_features_b).\n\n 'auto'\n Depending on ``n_components``, the most reasonable initialization\n will be chosen. If ``n_components <= n_classes`` we use 'lda', as\n it uses labels information. If not, but\n ``n_components < min(n_features, n_samples)``, we use 'pca', as\n it projects data in meaningful directions (those of higher\n variance). Otherwise, we just use 'identity'.\n\n 'pca'\n ``n_components`` principal components of the inputs passed\n to :meth:`fit` will be used to initialize the transformation.\n (See :class:`~sklearn.decomposition.PCA`)\n\n 'lda'\n ``min(n_components, n_classes)`` most discriminative\n components of the inputs passed to :meth:`fit` will be used to\n initialize the transformation. (If ``n_components > n_classes``,\n the rest of the components will be zero.) (See\n :class:`~sklearn.discriminant_analysis.LinearDiscriminantAnalysis`)\n\n 'identity'\n If ``n_components`` is strictly smaller than the\n dimensionality of the inputs passed to :meth:`fit`, the identity\n matrix will be truncated to the first ``n_components`` rows.\n\n 'random'\n The initial transformation will be a random array of shape\n `(n_components, n_features)`. Each value is sampled from the\n standard normal distribution.\n\n numpy array\n n_features_b must match the dimensionality of the inputs passed to\n :meth:`fit` and n_features_a must be less than or equal to that.\n If ``n_components`` is not None, n_features_a must match it.\n\nwarm_start : bool, default=False\n If True and :meth:`fit` has been called before, the solution of the\n previous call to :meth:`fit` is used as the initial linear\n transformation (``n_components`` and ``init`` will be ignored).\n\nmax_iter : int, default=50\n Maximum number of iterations in the optimization.\n\ntol : float, default=1e-5\n Convergence tolerance for the optimization.\n\ncallback : callable, default=None\n If not None, this function is called after every iteration of the\n optimizer, taking as arguments the current solution (flattened\n transformation matrix) and the number of iterations. This might be\n useful in case one wants to examine or store the transformation\n found after each iteration.\n\nverbose : int, default=0\n If 0, no progress messages will be printed.\n If 1, progress messages will be printed to stdout.\n If > 1, progress messages will be printed and the ``disp``\n parameter of :func:`scipy.optimize.minimize` will be set to\n ``verbose - 2``.\n\nrandom_state : int or numpy.RandomState, default=None\n A pseudo random number generator object or a seed for it if int. If\n ``init='random'``, ``random_state`` is used to initialize the random\n transformation. If ``init='pca'``, ``random_state`` is passed as an\n argument to PCA when initializing the transformation. Pass an int\n for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n The linear transformation learned during fitting.\n\nn_iter_ : int\n Counts the number of iterations performed by the optimizer.\n\nrandom_state_ : numpy.RandomState\n Pseudo random number generator object used during initialization.\n\nExamples\n--------\n>>> from sklearn.neighbors import NeighborhoodComponentsAnalysis\n>>> from sklearn.neighbors import KNeighborsClassifier\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = load_iris(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n... stratify=y, test_size=0.7, random_state=42)\n>>> nca = NeighborhoodComponentsAnalysis(random_state=42)\n>>> nca.fit(X_train, y_train)\nNeighborhoodComponentsAnalysis(...)\n>>> knn = KNeighborsClassifier(n_neighbors=3)\n>>> knn.fit(X_train, y_train)\nKNeighborsClassifier(...)\n>>> print(knn.score(X_test, y_test))\n0.933333...\n>>> knn.fit(nca.transform(X_train), y_train)\nKNeighborsClassifier(...)\n>>> print(knn.score(nca.transform(X_test), y_test))\n0.961904...\n\nReferences\n----------\n.. [1] J. Goldberger, G. Hinton, S. Roweis, R. Salakhutdinov.\n \"Neighbourhood Components Analysis\". Advances in Neural Information\n Processing Systems. 17, 513-520, 2005.\n http://www.cs.nyu.edu/~roweis/papers/ncanips.pdf\n\n.. [2] Wikipedia entry on Neighborhood Components Analysis\n https://en.wikipedia.org/wiki/Neighbourhood_components_analysis", + "code": "class NeighborhoodComponentsAnalysis(TransformerMixin, BaseEstimator):\n \"\"\"Neighborhood Components Analysis\n\n Neighborhood Component Analysis (NCA) is a machine learning algorithm for\n metric learning. It learns a linear transformation in a supervised fashion\n to improve the classification accuracy of a stochastic nearest neighbors\n rule in the transformed space.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_components : int, default=None\n Preferred dimensionality of the projected space.\n If None it will be set to ``n_features``.\n\n init : {'auto', 'pca', 'lda', 'identity', 'random'} or ndarray of shape \\\n (n_features_a, n_features_b), default='auto'\n Initialization of the linear transformation. Possible options are\n 'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape\n (n_features_a, n_features_b).\n\n 'auto'\n Depending on ``n_components``, the most reasonable initialization\n will be chosen. If ``n_components <= n_classes`` we use 'lda', as\n it uses labels information. If not, but\n ``n_components < min(n_features, n_samples)``, we use 'pca', as\n it projects data in meaningful directions (those of higher\n variance). Otherwise, we just use 'identity'.\n\n 'pca'\n ``n_components`` principal components of the inputs passed\n to :meth:`fit` will be used to initialize the transformation.\n (See :class:`~sklearn.decomposition.PCA`)\n\n 'lda'\n ``min(n_components, n_classes)`` most discriminative\n components of the inputs passed to :meth:`fit` will be used to\n initialize the transformation. (If ``n_components > n_classes``,\n the rest of the components will be zero.) (See\n :class:`~sklearn.discriminant_analysis.LinearDiscriminantAnalysis`)\n\n 'identity'\n If ``n_components`` is strictly smaller than the\n dimensionality of the inputs passed to :meth:`fit`, the identity\n matrix will be truncated to the first ``n_components`` rows.\n\n 'random'\n The initial transformation will be a random array of shape\n `(n_components, n_features)`. Each value is sampled from the\n standard normal distribution.\n\n numpy array\n n_features_b must match the dimensionality of the inputs passed to\n :meth:`fit` and n_features_a must be less than or equal to that.\n If ``n_components`` is not None, n_features_a must match it.\n\n warm_start : bool, default=False\n If True and :meth:`fit` has been called before, the solution of the\n previous call to :meth:`fit` is used as the initial linear\n transformation (``n_components`` and ``init`` will be ignored).\n\n max_iter : int, default=50\n Maximum number of iterations in the optimization.\n\n tol : float, default=1e-5\n Convergence tolerance for the optimization.\n\n callback : callable, default=None\n If not None, this function is called after every iteration of the\n optimizer, taking as arguments the current solution (flattened\n transformation matrix) and the number of iterations. This might be\n useful in case one wants to examine or store the transformation\n found after each iteration.\n\n verbose : int, default=0\n If 0, no progress messages will be printed.\n If 1, progress messages will be printed to stdout.\n If > 1, progress messages will be printed and the ``disp``\n parameter of :func:`scipy.optimize.minimize` will be set to\n ``verbose - 2``.\n\n random_state : int or numpy.RandomState, default=None\n A pseudo random number generator object or a seed for it if int. If\n ``init='random'``, ``random_state`` is used to initialize the random\n transformation. If ``init='pca'``, ``random_state`` is passed as an\n argument to PCA when initializing the transformation. Pass an int\n for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\n Attributes\n ----------\n components_ : ndarray of shape (n_components, n_features)\n The linear transformation learned during fitting.\n\n n_iter_ : int\n Counts the number of iterations performed by the optimizer.\n\n random_state_ : numpy.RandomState\n Pseudo random number generator object used during initialization.\n\n Examples\n --------\n >>> from sklearn.neighbors import NeighborhoodComponentsAnalysis\n >>> from sklearn.neighbors import KNeighborsClassifier\n >>> from sklearn.datasets import load_iris\n >>> from sklearn.model_selection import train_test_split\n >>> X, y = load_iris(return_X_y=True)\n >>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n ... stratify=y, test_size=0.7, random_state=42)\n >>> nca = NeighborhoodComponentsAnalysis(random_state=42)\n >>> nca.fit(X_train, y_train)\n NeighborhoodComponentsAnalysis(...)\n >>> knn = KNeighborsClassifier(n_neighbors=3)\n >>> knn.fit(X_train, y_train)\n KNeighborsClassifier(...)\n >>> print(knn.score(X_test, y_test))\n 0.933333...\n >>> knn.fit(nca.transform(X_train), y_train)\n KNeighborsClassifier(...)\n >>> print(knn.score(nca.transform(X_test), y_test))\n 0.961904...\n\n References\n ----------\n .. [1] J. Goldberger, G. Hinton, S. Roweis, R. Salakhutdinov.\n \"Neighbourhood Components Analysis\". Advances in Neural Information\n Processing Systems. 17, 513-520, 2005.\n http://www.cs.nyu.edu/~roweis/papers/ncanips.pdf\n\n .. [2] Wikipedia entry on Neighborhood Components Analysis\n https://en.wikipedia.org/wiki/Neighbourhood_components_analysis\n\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, n_components=None, *, init='auto', warm_start=False,\n max_iter=50, tol=1e-5, callback=None, verbose=0,\n random_state=None):\n self.n_components = n_components\n self.init = init\n self.warm_start = warm_start\n self.max_iter = max_iter\n self.tol = tol\n self.callback = callback\n self.verbose = verbose\n self.random_state = random_state\n\n def fit(self, X, y):\n \"\"\"Fit the model according to the given training data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The training samples.\n\n y : array-like of shape (n_samples,)\n The corresponding training labels.\n\n Returns\n -------\n self : object\n returns a trained NeighborhoodComponentsAnalysis model.\n \"\"\"\n\n # Verify inputs X and y and NCA parameters, and transform a copy if\n # needed\n X, y, init = self._validate_params(X, y)\n\n # Initialize the random generator\n self.random_state_ = check_random_state(self.random_state)\n\n # Measure the total training time\n t_train = time.time()\n\n # Compute a mask that stays fixed during optimization:\n same_class_mask = y[:, np.newaxis] == y[np.newaxis, :]\n # (n_samples, n_samples)\n\n # Initialize the transformation\n transformation = self._initialize(X, y, init)\n\n # Create a dictionary of parameters to be passed to the optimizer\n disp = self.verbose - 2 if self.verbose > 1 else -1\n optimizer_params = {'method': 'L-BFGS-B',\n 'fun': self._loss_grad_lbfgs,\n 'args': (X, same_class_mask, -1.0),\n 'jac': True,\n 'x0': transformation,\n 'tol': self.tol,\n 'options': dict(maxiter=self.max_iter, disp=disp),\n 'callback': self._callback\n }\n\n # Call the optimizer\n self.n_iter_ = 0\n opt_result = minimize(**optimizer_params)\n\n # Reshape the solution found by the optimizer\n self.components_ = opt_result.x.reshape(-1, X.shape[1])\n\n # Stop timer\n t_train = time.time() - t_train\n if self.verbose:\n cls_name = self.__class__.__name__\n\n # Warn the user if the algorithm did not converge\n if not opt_result.success:\n warn('[{}] NCA did not converge: {}'.format(\n cls_name, opt_result.message),\n ConvergenceWarning)\n\n print('[{}] Training took {:8.2f}s.'.format(cls_name, t_train))\n\n return self\n\n def transform(self, X):\n \"\"\"Applies the learned transformation to the given data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Data samples.\n\n Returns\n -------\n X_embedded: ndarray of shape (n_samples, n_components)\n The data samples transformed.\n\n Raises\n ------\n NotFittedError\n If :meth:`fit` has not been called before.\n \"\"\"\n\n check_is_fitted(self)\n X = check_array(X)\n\n return np.dot(X, self.components_.T)\n\n def _validate_params(self, X, y):\n \"\"\"Validate parameters as soon as :meth:`fit` is called.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The training samples.\n\n y : array-like of shape (n_samples,)\n The corresponding training labels.\n\n Returns\n -------\n X : ndarray of shape (n_samples, n_features)\n The validated training samples.\n\n y : ndarray of shape (n_samples,)\n The validated training labels, encoded to be integers in\n the range(0, n_classes).\n\n init : str or ndarray of shape (n_features_a, n_features_b)\n The validated initialization of the linear transformation.\n\n Raises\n -------\n TypeError\n If a parameter is not an instance of the desired type.\n\n ValueError\n If a parameter's value violates its legal value range or if the\n combination of two or more given parameters is incompatible.\n \"\"\"\n\n # Validate the inputs X and y, and converts y to numerical classes.\n X, y = self._validate_data(X, y, ensure_min_samples=2)\n check_classification_targets(y)\n y = LabelEncoder().fit_transform(y)\n\n # Check the preferred dimensionality of the projected space\n if self.n_components is not None:\n check_scalar(\n self.n_components, 'n_components', numbers.Integral, min_val=1)\n\n if self.n_components > X.shape[1]:\n raise ValueError('The preferred dimensionality of the '\n 'projected space `n_components` ({}) cannot '\n 'be greater than the given data '\n 'dimensionality ({})!'\n .format(self.n_components, X.shape[1]))\n\n # If warm_start is enabled, check that the inputs are consistent\n check_scalar(self.warm_start, 'warm_start', bool)\n if self.warm_start and hasattr(self, 'components_'):\n if self.components_.shape[1] != X.shape[1]:\n raise ValueError('The new inputs dimensionality ({}) does not '\n 'match the input dimensionality of the '\n 'previously learned transformation ({}).'\n .format(X.shape[1],\n self.components_.shape[1]))\n\n check_scalar(self.max_iter, 'max_iter', numbers.Integral, min_val=1)\n check_scalar(self.tol, 'tol', numbers.Real, min_val=0.)\n check_scalar(self.verbose, 'verbose', numbers.Integral, min_val=0)\n\n if self.callback is not None:\n if not callable(self.callback):\n raise ValueError('`callback` is not callable.')\n\n # Check how the linear transformation should be initialized\n init = self.init\n\n if isinstance(init, np.ndarray):\n init = check_array(init)\n\n # Assert that init.shape[1] = X.shape[1]\n if init.shape[1] != X.shape[1]:\n raise ValueError(\n 'The input dimensionality ({}) of the given '\n 'linear transformation `init` must match the '\n 'dimensionality of the given inputs `X` ({}).'\n .format(init.shape[1], X.shape[1]))\n\n # Assert that init.shape[0] <= init.shape[1]\n if init.shape[0] > init.shape[1]:\n raise ValueError(\n 'The output dimensionality ({}) of the given '\n 'linear transformation `init` cannot be '\n 'greater than its input dimensionality ({}).'\n .format(init.shape[0], init.shape[1]))\n\n if self.n_components is not None:\n # Assert that self.n_components = init.shape[0]\n if self.n_components != init.shape[0]:\n raise ValueError('The preferred dimensionality of the '\n 'projected space `n_components` ({}) does'\n ' not match the output dimensionality of '\n 'the given linear transformation '\n '`init` ({})!'\n .format(self.n_components,\n init.shape[0]))\n elif init in ['auto', 'pca', 'lda', 'identity', 'random']:\n pass\n else:\n raise ValueError(\n \"`init` must be 'auto', 'pca', 'lda', 'identity', 'random' \"\n \"or a numpy array of shape (n_components, n_features).\")\n\n return X, y, init\n\n def _initialize(self, X, y, init):\n \"\"\"Initialize the transformation.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The training samples.\n\n y : array-like of shape (n_samples,)\n The training labels.\n\n init : str or ndarray of shape (n_features_a, n_features_b)\n The validated initialization of the linear transformation.\n\n Returns\n -------\n transformation : ndarray of shape (n_components, n_features)\n The initialized linear transformation.\n\n \"\"\"\n\n transformation = init\n if self.warm_start and hasattr(self, 'components_'):\n transformation = self.components_\n elif isinstance(init, np.ndarray):\n pass\n else:\n n_samples, n_features = X.shape\n n_components = self.n_components or n_features\n if init == 'auto':\n n_classes = len(np.unique(y))\n if n_components <= min(n_features, n_classes - 1):\n init = 'lda'\n elif n_components < min(n_features, n_samples):\n init = 'pca'\n else:\n init = 'identity'\n if init == 'identity':\n transformation = np.eye(n_components, X.shape[1])\n elif init == 'random':\n transformation = self.random_state_.randn(n_components,\n X.shape[1])\n elif init in {'pca', 'lda'}:\n init_time = time.time()\n if init == 'pca':\n pca = PCA(n_components=n_components,\n random_state=self.random_state_)\n if self.verbose:\n print('Finding principal components... ', end='')\n sys.stdout.flush()\n pca.fit(X)\n transformation = pca.components_\n elif init == 'lda':\n from ..discriminant_analysis import (\n LinearDiscriminantAnalysis)\n lda = LinearDiscriminantAnalysis(n_components=n_components)\n if self.verbose:\n print('Finding most discriminative components... ',\n end='')\n sys.stdout.flush()\n lda.fit(X, y)\n transformation = lda.scalings_.T[:n_components]\n if self.verbose:\n print('done in {:5.2f}s'.format(time.time() - init_time))\n return transformation\n\n def _callback(self, transformation):\n \"\"\"Called after each iteration of the optimizer.\n\n Parameters\n ----------\n transformation : ndarray of shape (n_components * n_features,)\n The solution computed by the optimizer in this iteration.\n \"\"\"\n if self.callback is not None:\n self.callback(transformation, self.n_iter_)\n\n self.n_iter_ += 1\n\n def _loss_grad_lbfgs(self, transformation, X, same_class_mask, sign=1.0):\n \"\"\"Compute the loss and the loss gradient w.r.t. ``transformation``.\n\n Parameters\n ----------\n transformation : ndarray of shape (n_components * n_features,)\n The raveled linear transformation on which to compute loss and\n evaluate gradient.\n\n X : ndarray of shape (n_samples, n_features)\n The training samples.\n\n same_class_mask : ndarray of shape (n_samples, n_samples)\n A mask where ``mask[i, j] == 1`` if ``X[i]`` and ``X[j]`` belong\n to the same class, and ``0`` otherwise.\n\n Returns\n -------\n loss : float\n The loss computed for the given transformation.\n\n gradient : ndarray of shape (n_components * n_features,)\n The new (flattened) gradient of the loss.\n \"\"\"\n\n if self.n_iter_ == 0:\n self.n_iter_ += 1\n if self.verbose:\n header_fields = ['Iteration', 'Objective Value', 'Time(s)']\n header_fmt = '{:>10} {:>20} {:>10}'\n header = header_fmt.format(*header_fields)\n cls_name = self.__class__.__name__\n print('[{}]'.format(cls_name))\n print('[{}] {}\\n[{}] {}'.format(cls_name, header,\n cls_name, '-' * len(header)))\n\n t_funcall = time.time()\n\n transformation = transformation.reshape(-1, X.shape[1])\n X_embedded = np.dot(X, transformation.T) # (n_samples, n_components)\n\n # Compute softmax distances\n p_ij = pairwise_distances(X_embedded, squared=True)\n np.fill_diagonal(p_ij, np.inf)\n p_ij = softmax(-p_ij) # (n_samples, n_samples)\n\n # Compute loss\n masked_p_ij = p_ij * same_class_mask\n p = np.sum(masked_p_ij, axis=1, keepdims=True) # (n_samples, 1)\n loss = np.sum(p)\n\n # Compute gradient of loss w.r.t. `transform`\n weighted_p_ij = masked_p_ij - p_ij * p\n weighted_p_ij_sym = weighted_p_ij + weighted_p_ij.T\n np.fill_diagonal(weighted_p_ij_sym, -weighted_p_ij.sum(axis=0))\n gradient = 2 * X_embedded.T.dot(weighted_p_ij_sym).dot(X)\n # time complexity of the gradient: O(n_components x n_samples x (\n # n_samples + n_features))\n\n if self.verbose:\n t_funcall = time.time() - t_funcall\n values_fmt = '[{}] {:>10} {:>20.6e} {:>10.2f}'\n print(values_fmt.format(self.__class__.__name__, self.n_iter_,\n loss, t_funcall))\n sys.stdout.flush()\n\n return sign * loss, sign * gradient.ravel()\n\n def _more_tags(self):\n return {'requires_y': True}", + "instance_attributes": [ + { + "name": "init", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "warm_start", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "n_iter_", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.neighbors._nearest_centroid/NearestCentroid", + "name": "NearestCentroid", + "qname": "sklearn.neighbors._nearest_centroid.NearestCentroid", + "decorators": [], + "superclasses": ["ClassifierMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.neighbors._nearest_centroid/NearestCentroid/__init__", + "scikit-learn/sklearn.neighbors._nearest_centroid/NearestCentroid/fit", + "scikit-learn/sklearn.neighbors._nearest_centroid/NearestCentroid/predict" + ], + "is_public": false, + "reexported_by": [], + "description": "Nearest centroid classifier.\n\nEach class is represented by its centroid, with test samples classified to\nthe class with the nearest centroid.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Nearest centroid classifier.\n\nEach class is represented by its centroid, with test samples classified to\nthe class with the nearest centroid.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nmetric : str or callable\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string or callable, it must be one of\n the options allowed by metrics.pairwise.pairwise_distances for its\n metric parameter.\n The centroids for the samples corresponding to each class is the point\n from which the sum of the distances (according to the metric) of all\n samples that belong to that particular class are minimized.\n If the \"manhattan\" metric is provided, this centroid is the median and\n for all other metrics, the centroid is now set to be the mean.\n\n .. versionchanged:: 0.19\n ``metric='precomputed'`` was deprecated and now raises an error\n\nshrink_threshold : float, default=None\n Threshold for shrinking centroids to remove features.\n\nAttributes\n----------\ncentroids_ : array-like of shape (n_classes, n_features)\n Centroid of each class.\n\nclasses_ : array of shape (n_classes,)\n The unique classes labels.\n\nExamples\n--------\n>>> from sklearn.neighbors import NearestCentroid\n>>> import numpy as np\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> y = np.array([1, 1, 1, 2, 2, 2])\n>>> clf = NearestCentroid()\n>>> clf.fit(X, y)\nNearestCentroid()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]\n\nSee Also\n--------\nKNeighborsClassifier : Nearest neighbors classifier.\n\nNotes\n-----\nWhen used for text classification with tf-idf vectors, this classifier is\nalso known as the Rocchio classifier.\n\nReferences\n----------\nTibshirani, R., Hastie, T., Narasimhan, B., & Chu, G. (2002). Diagnosis of\nmultiple cancer types by shrunken centroids of gene expression. Proceedings\nof the National Academy of Sciences of the United States of America,\n99(10), 6567-6572. The National Academy of Sciences.", + "code": "class NearestCentroid(ClassifierMixin, BaseEstimator):\n \"\"\"Nearest centroid classifier.\n\n Each class is represented by its centroid, with test samples classified to\n the class with the nearest centroid.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n metric : str or callable\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string or callable, it must be one of\n the options allowed by metrics.pairwise.pairwise_distances for its\n metric parameter.\n The centroids for the samples corresponding to each class is the point\n from which the sum of the distances (according to the metric) of all\n samples that belong to that particular class are minimized.\n If the \"manhattan\" metric is provided, this centroid is the median and\n for all other metrics, the centroid is now set to be the mean.\n\n .. versionchanged:: 0.19\n ``metric='precomputed'`` was deprecated and now raises an error\n\n shrink_threshold : float, default=None\n Threshold for shrinking centroids to remove features.\n\n Attributes\n ----------\n centroids_ : array-like of shape (n_classes, n_features)\n Centroid of each class.\n\n classes_ : array of shape (n_classes,)\n The unique classes labels.\n\n Examples\n --------\n >>> from sklearn.neighbors import NearestCentroid\n >>> import numpy as np\n >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n >>> y = np.array([1, 1, 1, 2, 2, 2])\n >>> clf = NearestCentroid()\n >>> clf.fit(X, y)\n NearestCentroid()\n >>> print(clf.predict([[-0.8, -1]]))\n [1]\n\n See Also\n --------\n KNeighborsClassifier : Nearest neighbors classifier.\n\n Notes\n -----\n When used for text classification with tf-idf vectors, this classifier is\n also known as the Rocchio classifier.\n\n References\n ----------\n Tibshirani, R., Hastie, T., Narasimhan, B., & Chu, G. (2002). Diagnosis of\n multiple cancer types by shrunken centroids of gene expression. Proceedings\n of the National Academy of Sciences of the United States of America,\n 99(10), 6567-6572. The National Academy of Sciences.\n\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, metric='euclidean', *, shrink_threshold=None):\n self.metric = metric\n self.shrink_threshold = shrink_threshold\n\n def fit(self, X, y):\n \"\"\"\n Fit the NearestCentroid model according to the given training data.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n Note that centroid shrinking cannot be used with sparse matrices.\n y : array-like of shape (n_samples,)\n Target values (integers)\n \"\"\"\n if self.metric == 'precomputed':\n raise ValueError(\"Precomputed is not supported.\")\n # If X is sparse and the metric is \"manhattan\", store it in a csc\n # format is easier to calculate the median.\n if self.metric == 'manhattan':\n X, y = self._validate_data(X, y, accept_sparse=['csc'])\n else:\n X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc'])\n is_X_sparse = sp.issparse(X)\n if is_X_sparse and self.shrink_threshold:\n raise ValueError(\"threshold shrinking not supported\"\n \" for sparse input\")\n check_classification_targets(y)\n\n n_samples, n_features = X.shape\n le = LabelEncoder()\n y_ind = le.fit_transform(y)\n self.classes_ = classes = le.classes_\n n_classes = classes.size\n if n_classes < 2:\n raise ValueError('The number of classes has to be greater than'\n ' one; got %d class' % (n_classes))\n\n # Mask mapping each class to its members.\n self.centroids_ = np.empty((n_classes, n_features), dtype=np.float64)\n # Number of clusters in each class.\n nk = np.zeros(n_classes)\n\n for cur_class in range(n_classes):\n center_mask = y_ind == cur_class\n nk[cur_class] = np.sum(center_mask)\n if is_X_sparse:\n center_mask = np.where(center_mask)[0]\n\n # XXX: Update other averaging methods according to the metrics.\n if self.metric == \"manhattan\":\n # NumPy does not calculate median of sparse matrices.\n if not is_X_sparse:\n self.centroids_[cur_class] = np.median(X[center_mask], axis=0)\n else:\n self.centroids_[cur_class] = csc_median_axis_0(X[center_mask])\n else:\n if self.metric != 'euclidean':\n warnings.warn(\"Averaging for metrics other than \"\n \"euclidean and manhattan not supported. \"\n \"The average is set to be the mean.\"\n )\n self.centroids_[cur_class] = X[center_mask].mean(axis=0)\n\n if self.shrink_threshold:\n if np.all(np.ptp(X, axis=0) == 0):\n raise ValueError(\"All features have zero variance. \"\n \"Division by zero.\")\n dataset_centroid_ = np.mean(X, axis=0)\n\n # m parameter for determining deviation\n m = np.sqrt((1. / nk) - (1. / n_samples))\n # Calculate deviation using the standard deviation of centroids.\n variance = (X - self.centroids_[y_ind]) ** 2\n variance = variance.sum(axis=0)\n s = np.sqrt(variance / (n_samples - n_classes))\n s += np.median(s) # To deter outliers from affecting the results.\n mm = m.reshape(len(m), 1) # Reshape to allow broadcasting.\n ms = mm * s\n deviation = ((self.centroids_ - dataset_centroid_) / ms)\n # Soft thresholding: if the deviation crosses 0 during shrinking,\n # it becomes zero.\n signs = np.sign(deviation)\n deviation = (np.abs(deviation) - self.shrink_threshold)\n np.clip(deviation, 0, None, out=deviation)\n deviation *= signs\n # Now adjust the centroids using the deviation\n msd = ms * deviation\n self.centroids_ = dataset_centroid_[np.newaxis, :] + msd\n return self\n\n def predict(self, X):\n \"\"\"Perform classification on an array of test vectors X.\n\n The predicted class C for each sample in X is returned.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n C : ndarray of shape (n_samples,)\n\n Notes\n -----\n If the metric constructor parameter is \"precomputed\", X is assumed to\n be the distance matrix between the data to be predicted and\n ``self.centroids_``.\n \"\"\"\n check_is_fitted(self)\n\n X = check_array(X, accept_sparse='csr')\n return self.classes_[pairwise_distances(\n X, self.centroids_, metric=self.metric).argmin(axis=1)]", + "instance_attributes": [ + { + "name": "metric", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "centroids_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor", + "name": "KNeighborsRegressor", + "qname": "sklearn.neighbors._regression.KNeighborsRegressor", + "decorators": [], + "superclasses": ["KNeighborsMixin", "RegressorMixin", "NeighborsBase"], + "methods": [ + "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/__init__", + "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/_more_tags", + "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/_pairwise@getter", + "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/fit", + "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/predict" + ], + "is_public": false, + "reexported_by": [], + "description": "Regression based on k-nearest neighbors.\n\nThe target is predicted by local interpolation of the targets\nassociated of the nearest neighbors in the training set.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.9", + "docstring": "Regression based on k-nearest neighbors.\n\nThe target is predicted by local interpolation of the targets\nassociated of the nearest neighbors in the training set.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.9\n\nParameters\n----------\nn_neighbors : int, default=5\n Number of neighbors to use by default for :meth:`kneighbors` queries.\n\nweights : {'uniform', 'distance'} or callable, default='uniform'\n weight function used in prediction. Possible values:\n\n - 'uniform' : uniform weights. All points in each neighborhood\n are weighted equally.\n - 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n - [callable] : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights.\n\n Uniform weights are used by default.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\np : int, default=2\n Power parameter for the Minkowski metric. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric : str or callable, default='minkowski'\n the distance metric to use for the tree. The default metric is\n minkowski, and with p=2 is equivalent to the standard Euclidean\n metric. See the documentation of :class:`DistanceMetric` for a\n list of available metrics.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit. X may be a :term:`sparse graph`,\n in which case only \"nonzero\" elements may be considered neighbors.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n Doesn't affect :meth:`fit` method.\n\nAttributes\n----------\neffective_metric_ : str or callable\n The distance metric to use. It will be same as the `metric` parameter\n or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n 'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n Additional keyword arguments for the metric function. For most metrics\n will be same with `metric_params` parameter, but may also contain the\n `p` parameter value if the `effective_metric_` attribute is set to\n 'minkowski'.\n\nn_samples_fit_ : int\n Number of samples in the fitted data.\n\nExamples\n--------\n>>> X = [[0], [1], [2], [3]]\n>>> y = [0, 0, 1, 1]\n>>> from sklearn.neighbors import KNeighborsRegressor\n>>> neigh = KNeighborsRegressor(n_neighbors=2)\n>>> neigh.fit(X, y)\nKNeighborsRegressor(...)\n>>> print(neigh.predict([[1.5]]))\n[0.5]\n\nSee Also\n--------\nNearestNeighbors\nRadiusNeighborsRegressor\nKNeighborsClassifier\nRadiusNeighborsClassifier\n\nNotes\n-----\nSee :ref:`Nearest Neighbors ` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n.. warning::\n\n Regarding the Nearest Neighbors algorithms, if it is found that two\n neighbors, neighbor `k+1` and `k`, have identical distances but\n different labels, the results will depend on the ordering of the\n training data.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm", + "code": "class KNeighborsRegressor(KNeighborsMixin,\n RegressorMixin,\n NeighborsBase):\n \"\"\"Regression based on k-nearest neighbors.\n\n The target is predicted by local interpolation of the targets\n associated of the nearest neighbors in the training set.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.9\n\n Parameters\n ----------\n n_neighbors : int, default=5\n Number of neighbors to use by default for :meth:`kneighbors` queries.\n\n weights : {'uniform', 'distance'} or callable, default='uniform'\n weight function used in prediction. Possible values:\n\n - 'uniform' : uniform weights. All points in each neighborhood\n are weighted equally.\n - 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n - [callable] : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights.\n\n Uniform weights are used by default.\n\n algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\n leaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\n p : int, default=2\n Power parameter for the Minkowski metric. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n metric : str or callable, default='minkowski'\n the distance metric to use for the tree. The default metric is\n minkowski, and with p=2 is equivalent to the standard Euclidean\n metric. See the documentation of :class:`DistanceMetric` for a\n list of available metrics.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit. X may be a :term:`sparse graph`,\n in which case only \"nonzero\" elements may be considered neighbors.\n\n metric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\n n_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n Doesn't affect :meth:`fit` method.\n\n Attributes\n ----------\n effective_metric_ : str or callable\n The distance metric to use. It will be same as the `metric` parameter\n or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n 'minkowski' and `p` parameter set to 2.\n\n effective_metric_params_ : dict\n Additional keyword arguments for the metric function. For most metrics\n will be same with `metric_params` parameter, but may also contain the\n `p` parameter value if the `effective_metric_` attribute is set to\n 'minkowski'.\n\n n_samples_fit_ : int\n Number of samples in the fitted data.\n\n Examples\n --------\n >>> X = [[0], [1], [2], [3]]\n >>> y = [0, 0, 1, 1]\n >>> from sklearn.neighbors import KNeighborsRegressor\n >>> neigh = KNeighborsRegressor(n_neighbors=2)\n >>> neigh.fit(X, y)\n KNeighborsRegressor(...)\n >>> print(neigh.predict([[1.5]]))\n [0.5]\n\n See Also\n --------\n NearestNeighbors\n RadiusNeighborsRegressor\n KNeighborsClassifier\n RadiusNeighborsClassifier\n\n Notes\n -----\n See :ref:`Nearest Neighbors ` in the online documentation\n for a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n .. warning::\n\n Regarding the Nearest Neighbors algorithms, if it is found that two\n neighbors, neighbor `k+1` and `k`, have identical distances but\n different labels, the results will depend on the ordering of the\n training data.\n\n https://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, n_neighbors=5, *, weights='uniform',\n algorithm='auto', leaf_size=30,\n p=2, metric='minkowski', metric_params=None, n_jobs=None,\n **kwargs):\n super().__init__(\n n_neighbors=n_neighbors,\n algorithm=algorithm,\n leaf_size=leaf_size, metric=metric, p=p,\n metric_params=metric_params, n_jobs=n_jobs, **kwargs)\n self.weights = _check_weights(weights)\n\n def _more_tags(self):\n # For cross-validation routines to split data correctly\n return {'pairwise': self.metric == 'precomputed'}\n\n # TODO: Remove in 1.1\n # mypy error: Decorated property not supported\n @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n # For cross-validation routines to split data correctly\n return self.metric == 'precomputed'\n\n def fit(self, X, y):\n \"\"\"Fit the k-nearest neighbors regressor from the training dataset.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n (n_samples, n_samples) if metric='precomputed'\n Training data.\n\n y : {array-like, sparse matrix} of shape (n_samples,) or \\\n (n_samples, n_outputs)\n Target values.\n\n Returns\n -------\n self : KNeighborsRegressor\n The fitted k-nearest neighbors regressor.\n \"\"\"\n return self._fit(X, y)\n\n def predict(self, X):\n \"\"\"Predict the target for the provided data\n\n Parameters\n ----------\n X : array-like of shape (n_queries, n_features), \\\n or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\n Returns\n -------\n y : ndarray of shape (n_queries,) or (n_queries, n_outputs), dtype=int\n Target values.\n \"\"\"\n X = check_array(X, accept_sparse='csr')\n\n neigh_dist, neigh_ind = self.kneighbors(X)\n\n weights = _get_weights(neigh_dist, self.weights)\n\n _y = self._y\n if _y.ndim == 1:\n _y = _y.reshape((-1, 1))\n\n if weights is None:\n y_pred = np.mean(_y[neigh_ind], axis=1)\n else:\n y_pred = np.empty((X.shape[0], _y.shape[1]), dtype=np.float64)\n denom = np.sum(weights, axis=1)\n\n for j in range(_y.shape[1]):\n num = np.sum(_y[neigh_ind, j] * weights, axis=1)\n y_pred[:, j] = num / denom\n\n if self._y.ndim == 1:\n y_pred = y_pred.ravel()\n\n return y_pred", + "instance_attributes": [ + { + "name": "weights", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/RadiusNeighborsRegressor", + "name": "RadiusNeighborsRegressor", + "qname": "sklearn.neighbors._regression.RadiusNeighborsRegressor", + "decorators": [], + "superclasses": ["RadiusNeighborsMixin", "RegressorMixin", "NeighborsBase"], + "methods": [ + "scikit-learn/sklearn.neighbors._regression/RadiusNeighborsRegressor/__init__", + "scikit-learn/sklearn.neighbors._regression/RadiusNeighborsRegressor/fit", + "scikit-learn/sklearn.neighbors._regression/RadiusNeighborsRegressor/predict" + ], + "is_public": false, + "reexported_by": [], + "description": "Regression based on neighbors within a fixed radius.\n\nThe target is predicted by local interpolation of the targets\nassociated of the nearest neighbors in the training set.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.9", + "docstring": "Regression based on neighbors within a fixed radius.\n\nThe target is predicted by local interpolation of the targets\nassociated of the nearest neighbors in the training set.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.9\n\nParameters\n----------\nradius : float, default=1.0\n Range of parameter space to use by default for :meth:`radius_neighbors`\n queries.\n\nweights : {'uniform', 'distance'} or callable, default='uniform'\n weight function used in prediction. Possible values:\n\n - 'uniform' : uniform weights. All points in each neighborhood\n are weighted equally.\n - 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n - [callable] : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights.\n\n Uniform weights are used by default.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\np : int, default=2\n Power parameter for the Minkowski metric. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric : str or callable, default='minkowski'\n the distance metric to use for the tree. The default metric is\n minkowski, and with p=2 is equivalent to the standard Euclidean\n metric. See the documentation of :class:`DistanceMetric` for a\n list of available metrics.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit. X may be a :term:`sparse graph`,\n in which case only \"nonzero\" elements may be considered neighbors.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\neffective_metric_ : str or callable\n The distance metric to use. It will be same as the `metric` parameter\n or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n 'minkowski' and `p` parameter set to 2.\n\neffective_metric_params_ : dict\n Additional keyword arguments for the metric function. For most metrics\n will be same with `metric_params` parameter, but may also contain the\n `p` parameter value if the `effective_metric_` attribute is set to\n 'minkowski'.\n\nn_samples_fit_ : int\n Number of samples in the fitted data.\n\nExamples\n--------\n>>> X = [[0], [1], [2], [3]]\n>>> y = [0, 0, 1, 1]\n>>> from sklearn.neighbors import RadiusNeighborsRegressor\n>>> neigh = RadiusNeighborsRegressor(radius=1.0)\n>>> neigh.fit(X, y)\nRadiusNeighborsRegressor(...)\n>>> print(neigh.predict([[1.5]]))\n[0.5]\n\nSee Also\n--------\nNearestNeighbors\nKNeighborsRegressor\nKNeighborsClassifier\nRadiusNeighborsClassifier\n\nNotes\n-----\nSee :ref:`Nearest Neighbors ` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm", + "code": "class RadiusNeighborsRegressor(RadiusNeighborsMixin,\n RegressorMixin,\n NeighborsBase):\n \"\"\"Regression based on neighbors within a fixed radius.\n\n The target is predicted by local interpolation of the targets\n associated of the nearest neighbors in the training set.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.9\n\n Parameters\n ----------\n radius : float, default=1.0\n Range of parameter space to use by default for :meth:`radius_neighbors`\n queries.\n\n weights : {'uniform', 'distance'} or callable, default='uniform'\n weight function used in prediction. Possible values:\n\n - 'uniform' : uniform weights. All points in each neighborhood\n are weighted equally.\n - 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n - [callable] : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights.\n\n Uniform weights are used by default.\n\n algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\n leaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\n p : int, default=2\n Power parameter for the Minkowski metric. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n metric : str or callable, default='minkowski'\n the distance metric to use for the tree. The default metric is\n minkowski, and with p=2 is equivalent to the standard Euclidean\n metric. See the documentation of :class:`DistanceMetric` for a\n list of available metrics.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit. X may be a :term:`sparse graph`,\n in which case only \"nonzero\" elements may be considered neighbors.\n\n metric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\n n_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n Attributes\n ----------\n effective_metric_ : str or callable\n The distance metric to use. It will be same as the `metric` parameter\n or a synonym of it, e.g. 'euclidean' if the `metric` parameter set to\n 'minkowski' and `p` parameter set to 2.\n\n effective_metric_params_ : dict\n Additional keyword arguments for the metric function. For most metrics\n will be same with `metric_params` parameter, but may also contain the\n `p` parameter value if the `effective_metric_` attribute is set to\n 'minkowski'.\n\n n_samples_fit_ : int\n Number of samples in the fitted data.\n\n Examples\n --------\n >>> X = [[0], [1], [2], [3]]\n >>> y = [0, 0, 1, 1]\n >>> from sklearn.neighbors import RadiusNeighborsRegressor\n >>> neigh = RadiusNeighborsRegressor(radius=1.0)\n >>> neigh.fit(X, y)\n RadiusNeighborsRegressor(...)\n >>> print(neigh.predict([[1.5]]))\n [0.5]\n\n See Also\n --------\n NearestNeighbors\n KNeighborsRegressor\n KNeighborsClassifier\n RadiusNeighborsClassifier\n\n Notes\n -----\n See :ref:`Nearest Neighbors ` in the online documentation\n for a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n https://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, radius=1.0, *, weights='uniform',\n algorithm='auto', leaf_size=30,\n p=2, metric='minkowski', metric_params=None, n_jobs=None,\n **kwargs):\n super().__init__(\n radius=radius,\n algorithm=algorithm,\n leaf_size=leaf_size,\n p=p, metric=metric, metric_params=metric_params,\n n_jobs=n_jobs, **kwargs)\n self.weights = _check_weights(weights)\n\n def fit(self, X, y):\n \"\"\"Fit the radius neighbors regressor from the training dataset.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n (n_samples, n_samples) if metric='precomputed'\n Training data.\n\n y : {array-like, sparse matrix} of shape (n_samples,) or \\\n (n_samples, n_outputs)\n Target values.\n\n Returns\n -------\n self : RadiusNeighborsRegressor\n The fitted radius neighbors regressor.\n \"\"\"\n return self._fit(X, y)\n\n def predict(self, X):\n \"\"\"Predict the target for the provided data\n\n Parameters\n ----------\n X : array-like of shape (n_queries, n_features), \\\n or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\n Returns\n -------\n y : ndarray of shape (n_queries,) or (n_queries, n_outputs), \\\n dtype=double\n Target values.\n \"\"\"\n X = check_array(X, accept_sparse='csr')\n\n neigh_dist, neigh_ind = self.radius_neighbors(X)\n\n weights = _get_weights(neigh_dist, self.weights)\n\n _y = self._y\n if _y.ndim == 1:\n _y = _y.reshape((-1, 1))\n\n empty_obs = np.full_like(_y[0], np.nan)\n\n if weights is None:\n y_pred = np.array([np.mean(_y[ind, :], axis=0)\n if len(ind) else empty_obs\n for (i, ind) in enumerate(neigh_ind)])\n\n else:\n y_pred = np.array([np.average(_y[ind, :], axis=0,\n weights=weights[i])\n if len(ind) else empty_obs\n for (i, ind) in enumerate(neigh_ind)])\n\n if np.any(np.isnan(y_pred)):\n empty_warning_msg = (\"One or more samples have no neighbors \"\n \"within specified radius; predicting NaN.\")\n warnings.warn(empty_warning_msg)\n\n if self._y.ndim == 1:\n y_pred = y_pred.ravel()\n\n return y_pred", + "instance_attributes": [ + { + "name": "weights", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.neighbors._unsupervised/NearestNeighbors", + "name": "NearestNeighbors", + "qname": "sklearn.neighbors._unsupervised.NearestNeighbors", + "decorators": [], + "superclasses": ["KNeighborsMixin", "RadiusNeighborsMixin", "NeighborsBase"], + "methods": [ + "scikit-learn/sklearn.neighbors._unsupervised/NearestNeighbors/__init__", + "scikit-learn/sklearn.neighbors._unsupervised/NearestNeighbors/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Unsupervised learner for implementing neighbor searches.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.9", + "docstring": "Unsupervised learner for implementing neighbor searches.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.9\n\nParameters\n----------\nn_neighbors : int, default=5\n Number of neighbors to use by default for :meth:`kneighbors` queries.\n\nradius : float, default=1.0\n Range of parameter space to use by default for :meth:`radius_neighbors`\n queries.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\nmetric : str or callable, default='minkowski'\n the distance metric to use for the tree. The default metric is\n minkowski, and with p=2 is equivalent to the standard Euclidean\n metric. See the documentation of :class:`DistanceMetric` for a\n list of available metrics.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit. X may be a :term:`sparse graph`,\n in which case only \"nonzero\" elements may be considered neighbors.\n\np : int, default=2\n Parameter for the Minkowski metric from\n sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\neffective_metric_ : str\n Metric used to compute distances to neighbors.\n\neffective_metric_params_ : dict\n Parameters for the metric used to compute distances to neighbors.\n\nn_samples_fit_ : int\n Number of samples in the fitted data.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.neighbors import NearestNeighbors\n>>> samples = [[0, 0, 2], [1, 0, 0], [0, 0, 1]]\n\n>>> neigh = NearestNeighbors(n_neighbors=2, radius=0.4)\n>>> neigh.fit(samples)\nNearestNeighbors(...)\n\n>>> neigh.kneighbors([[0, 0, 1.3]], 2, return_distance=False)\narray([[2, 0]]...)\n\n>>> nbrs = neigh.radius_neighbors(\n... [[0, 0, 1.3]], 0.4, return_distance=False\n... )\n>>> np.asarray(nbrs[0][0])\narray(2)\n\nSee Also\n--------\nKNeighborsClassifier\nRadiusNeighborsClassifier\nKNeighborsRegressor\nRadiusNeighborsRegressor\nBallTree\n\nNotes\n-----\nSee :ref:`Nearest Neighbors ` in the online documentation\nfor a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\nhttps://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm", + "code": "class NearestNeighbors(KNeighborsMixin,\n RadiusNeighborsMixin,\n NeighborsBase):\n \"\"\"Unsupervised learner for implementing neighbor searches.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.9\n\n Parameters\n ----------\n n_neighbors : int, default=5\n Number of neighbors to use by default for :meth:`kneighbors` queries.\n\n radius : float, default=1.0\n Range of parameter space to use by default for :meth:`radius_neighbors`\n queries.\n\n algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\n leaf_size : int, default=30\n Leaf size passed to BallTree or KDTree. This can affect the\n speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\n metric : str or callable, default='minkowski'\n the distance metric to use for the tree. The default metric is\n minkowski, and with p=2 is equivalent to the standard Euclidean\n metric. See the documentation of :class:`DistanceMetric` for a\n list of available metrics.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit. X may be a :term:`sparse graph`,\n in which case only \"nonzero\" elements may be considered neighbors.\n\n p : int, default=2\n Parameter for the Minkowski metric from\n sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n metric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\n n_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n Attributes\n ----------\n effective_metric_ : str\n Metric used to compute distances to neighbors.\n\n effective_metric_params_ : dict\n Parameters for the metric used to compute distances to neighbors.\n\n n_samples_fit_ : int\n Number of samples in the fitted data.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.neighbors import NearestNeighbors\n >>> samples = [[0, 0, 2], [1, 0, 0], [0, 0, 1]]\n\n >>> neigh = NearestNeighbors(n_neighbors=2, radius=0.4)\n >>> neigh.fit(samples)\n NearestNeighbors(...)\n\n >>> neigh.kneighbors([[0, 0, 1.3]], 2, return_distance=False)\n array([[2, 0]]...)\n\n >>> nbrs = neigh.radius_neighbors(\n ... [[0, 0, 1.3]], 0.4, return_distance=False\n ... )\n >>> np.asarray(nbrs[0][0])\n array(2)\n\n See Also\n --------\n KNeighborsClassifier\n RadiusNeighborsClassifier\n KNeighborsRegressor\n RadiusNeighborsRegressor\n BallTree\n\n Notes\n -----\n See :ref:`Nearest Neighbors ` in the online documentation\n for a discussion of the choice of ``algorithm`` and ``leaf_size``.\n\n https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, *, n_neighbors=5, radius=1.0,\n algorithm='auto', leaf_size=30, metric='minkowski',\n p=2, metric_params=None, n_jobs=None):\n super().__init__(\n n_neighbors=n_neighbors,\n radius=radius,\n algorithm=algorithm,\n leaf_size=leaf_size, metric=metric, p=p,\n metric_params=metric_params, n_jobs=n_jobs)\n\n def fit(self, X, y=None):\n \"\"\"Fit the nearest neighbors estimator from the training dataset.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n (n_samples, n_samples) if metric='precomputed'\n Training data.\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n self : NearestNeighbors\n The fitted nearest neighbors estimator.\n \"\"\"\n return self._fit(X)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron", + "name": "BaseMultilayerPerceptron", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron", + "decorators": [], + "superclasses": ["BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_unpack", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_forward_pass", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_forward_pass_fast", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_compute_loss_grad", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_loss_grad_lbfgs", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_backprop", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_initialize", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_init_coef", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_validate_hyperparameters", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit_lbfgs", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit_stochastic", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_update_no_improvement_count", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/fit", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/partial_fit@getter", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_partial_fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for MLP classification and regression.\n\nWarning: This class should not be used directly.\nUse derived classes instead.\n\n.. versionadded:: 0.18", + "docstring": "Base class for MLP classification and regression.\n\nWarning: This class should not be used directly.\nUse derived classes instead.\n\n.. versionadded:: 0.18", + "code": "class BaseMultilayerPerceptron(BaseEstimator, metaclass=ABCMeta):\n \"\"\"Base class for MLP classification and regression.\n\n Warning: This class should not be used directly.\n Use derived classes instead.\n\n .. versionadded:: 0.18\n \"\"\"\n\n @abstractmethod\n def __init__(self, hidden_layer_sizes, activation, solver,\n alpha, batch_size, learning_rate, learning_rate_init, power_t,\n max_iter, loss, shuffle, random_state, tol, verbose,\n warm_start, momentum, nesterovs_momentum, early_stopping,\n validation_fraction, beta_1, beta_2, epsilon,\n n_iter_no_change, max_fun):\n self.activation = activation\n self.solver = solver\n self.alpha = alpha\n self.batch_size = batch_size\n self.learning_rate = learning_rate\n self.learning_rate_init = learning_rate_init\n self.power_t = power_t\n self.max_iter = max_iter\n self.loss = loss\n self.hidden_layer_sizes = hidden_layer_sizes\n self.shuffle = shuffle\n self.random_state = random_state\n self.tol = tol\n self.verbose = verbose\n self.warm_start = warm_start\n self.momentum = momentum\n self.nesterovs_momentum = nesterovs_momentum\n self.early_stopping = early_stopping\n self.validation_fraction = validation_fraction\n self.beta_1 = beta_1\n self.beta_2 = beta_2\n self.epsilon = epsilon\n self.n_iter_no_change = n_iter_no_change\n self.max_fun = max_fun\n\n def _unpack(self, packed_parameters):\n \"\"\"Extract the coefficients and intercepts from packed_parameters.\"\"\"\n for i in range(self.n_layers_ - 1):\n start, end, shape = self._coef_indptr[i]\n self.coefs_[i] = np.reshape(packed_parameters[start:end], shape)\n\n start, end = self._intercept_indptr[i]\n self.intercepts_[i] = packed_parameters[start:end]\n\n def _forward_pass(self, activations):\n \"\"\"Perform a forward pass on the network by computing the values\n of the neurons in the hidden layers and the output layer.\n\n Parameters\n ----------\n activations : list, length = n_layers - 1\n The ith element of the list holds the values of the ith layer.\n \"\"\"\n hidden_activation = ACTIVATIONS[self.activation]\n # Iterate over the hidden layers\n for i in range(self.n_layers_ - 1):\n activations[i + 1] = safe_sparse_dot(activations[i],\n self.coefs_[i])\n activations[i + 1] += self.intercepts_[i]\n\n # For the hidden layers\n if (i + 1) != (self.n_layers_ - 1):\n hidden_activation(activations[i + 1])\n\n # For the last layer\n output_activation = ACTIVATIONS[self.out_activation_]\n output_activation(activations[i + 1])\n\n return activations\n\n def _forward_pass_fast(self, X):\n \"\"\"Predict using the trained model\n\n This is the same as _forward_pass but does not record the activations\n of all layers and only returns the last layer's activation.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\n Returns\n -------\n y_pred : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n The decision function of the samples for each class in the model.\n \"\"\"\n X = self._validate_data(X, accept_sparse=['csr', 'csc'], reset=False)\n\n # Initialize first layer\n activation = X\n\n # Forward propagate\n hidden_activation = ACTIVATIONS[self.activation]\n for i in range(self.n_layers_ - 1):\n activation = safe_sparse_dot(activation, self.coefs_[i])\n activation += self.intercepts_[i]\n if i != self.n_layers_ - 2:\n hidden_activation(activation)\n output_activation = ACTIVATIONS[self.out_activation_]\n output_activation(activation)\n\n return activation\n\n def _compute_loss_grad(self, layer, n_samples, activations, deltas,\n coef_grads, intercept_grads):\n \"\"\"Compute the gradient of loss with respect to coefs and intercept for\n specified layer.\n\n This function does backpropagation for the specified one layer.\n \"\"\"\n coef_grads[layer] = safe_sparse_dot(activations[layer].T,\n deltas[layer])\n coef_grads[layer] += (self.alpha * self.coefs_[layer])\n coef_grads[layer] /= n_samples\n\n intercept_grads[layer] = np.mean(deltas[layer], 0)\n\n def _loss_grad_lbfgs(self, packed_coef_inter, X, y, activations, deltas,\n coef_grads, intercept_grads):\n \"\"\"Compute the MLP loss function and its corresponding derivatives\n with respect to the different parameters given in the initialization.\n\n Returned gradients are packed in a single vector so it can be used\n in lbfgs\n\n Parameters\n ----------\n packed_coef_inter : ndarray\n A vector comprising the flattened coefficients and intercepts.\n\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\n y : ndarray of shape (n_samples,)\n The target values.\n\n activations : list, length = n_layers - 1\n The ith element of the list holds the values of the ith layer.\n\n deltas : list, length = n_layers - 1\n The ith element of the list holds the difference between the\n activations of the i + 1 layer and the backpropagated error.\n More specifically, deltas are gradients of loss with respect to z\n in each layer, where z = wx + b is the value of a particular layer\n before passing through the activation function\n\n coef_grads : list, length = n_layers - 1\n The ith element contains the amount of change used to update the\n coefficient parameters of the ith layer in an iteration.\n\n intercept_grads : list, length = n_layers - 1\n The ith element contains the amount of change used to update the\n intercept parameters of the ith layer in an iteration.\n\n Returns\n -------\n loss : float\n grad : array-like, shape (number of nodes of all layers,)\n \"\"\"\n self._unpack(packed_coef_inter)\n loss, coef_grads, intercept_grads = self._backprop(\n X, y, activations, deltas, coef_grads, intercept_grads)\n grad = _pack(coef_grads, intercept_grads)\n return loss, grad\n\n def _backprop(self, X, y, activations, deltas, coef_grads,\n intercept_grads):\n \"\"\"Compute the MLP loss function and its corresponding derivatives\n with respect to each parameter: weights and bias vectors.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\n y : ndarray of shape (n_samples,)\n The target values.\n\n activations : list, length = n_layers - 1\n The ith element of the list holds the values of the ith layer.\n\n deltas : list, length = n_layers - 1\n The ith element of the list holds the difference between the\n activations of the i + 1 layer and the backpropagated error.\n More specifically, deltas are gradients of loss with respect to z\n in each layer, where z = wx + b is the value of a particular layer\n before passing through the activation function\n\n coef_grads : list, length = n_layers - 1\n The ith element contains the amount of change used to update the\n coefficient parameters of the ith layer in an iteration.\n\n intercept_grads : list, length = n_layers - 1\n The ith element contains the amount of change used to update the\n intercept parameters of the ith layer in an iteration.\n\n Returns\n -------\n loss : float\n coef_grads : list, length = n_layers - 1\n intercept_grads : list, length = n_layers - 1\n \"\"\"\n n_samples = X.shape[0]\n\n # Forward propagate\n activations = self._forward_pass(activations)\n\n # Get loss\n loss_func_name = self.loss\n if loss_func_name == 'log_loss' and self.out_activation_ == 'logistic':\n loss_func_name = 'binary_log_loss'\n loss = LOSS_FUNCTIONS[loss_func_name](y, activations[-1])\n # Add L2 regularization term to loss\n values = 0\n for s in self.coefs_:\n s = s.ravel()\n values += np.dot(s, s)\n loss += (0.5 * self.alpha) * values / n_samples\n\n # Backward propagate\n last = self.n_layers_ - 2\n\n # The calculation of delta[last] here works with following\n # combinations of output activation and loss function:\n # sigmoid and binary cross entropy, softmax and categorical cross\n # entropy, and identity with squared loss\n deltas[last] = activations[-1] - y\n\n # Compute gradient for the last layer\n self._compute_loss_grad(\n last, n_samples, activations, deltas, coef_grads, intercept_grads)\n\n inplace_derivative = DERIVATIVES[self.activation]\n # Iterate over the hidden layers\n for i in range(self.n_layers_ - 2, 0, -1):\n deltas[i - 1] = safe_sparse_dot(deltas[i], self.coefs_[i].T)\n inplace_derivative(activations[i], deltas[i - 1])\n\n self._compute_loss_grad(\n i - 1, n_samples, activations, deltas, coef_grads,\n intercept_grads)\n\n return loss, coef_grads, intercept_grads\n\n def _initialize(self, y, layer_units, dtype):\n # set all attributes, allocate weights etc for first call\n # Initialize parameters\n self.n_iter_ = 0\n self.t_ = 0\n self.n_outputs_ = y.shape[1]\n\n # Compute the number of layers\n self.n_layers_ = len(layer_units)\n\n # Output for regression\n if not is_classifier(self):\n self.out_activation_ = 'identity'\n # Output for multi class\n elif self._label_binarizer.y_type_ == 'multiclass':\n self.out_activation_ = 'softmax'\n # Output for binary class and multi-label\n else:\n self.out_activation_ = 'logistic'\n\n # Initialize coefficient and intercept layers\n self.coefs_ = []\n self.intercepts_ = []\n\n for i in range(self.n_layers_ - 1):\n coef_init, intercept_init = self._init_coef(layer_units[i],\n layer_units[i + 1],\n dtype)\n self.coefs_.append(coef_init)\n self.intercepts_.append(intercept_init)\n\n if self.solver in _STOCHASTIC_SOLVERS:\n self.loss_curve_ = []\n self._no_improvement_count = 0\n if self.early_stopping:\n self.validation_scores_ = []\n self.best_validation_score_ = -np.inf\n else:\n self.best_loss_ = np.inf\n\n def _init_coef(self, fan_in, fan_out, dtype):\n # Use the initialization method recommended by\n # Glorot et al.\n factor = 6.\n if self.activation == 'logistic':\n factor = 2.\n init_bound = np.sqrt(factor / (fan_in + fan_out))\n\n # Generate weights and bias:\n coef_init = self._random_state.uniform(-init_bound, init_bound,\n (fan_in, fan_out))\n intercept_init = self._random_state.uniform(-init_bound, init_bound,\n fan_out)\n coef_init = coef_init.astype(dtype, copy=False)\n intercept_init = intercept_init.astype(dtype, copy=False)\n return coef_init, intercept_init\n\n def _fit(self, X, y, incremental=False):\n # Make sure self.hidden_layer_sizes is a list\n hidden_layer_sizes = self.hidden_layer_sizes\n if not hasattr(hidden_layer_sizes, \"__iter__\"):\n hidden_layer_sizes = [hidden_layer_sizes]\n hidden_layer_sizes = list(hidden_layer_sizes)\n\n # Validate input parameters.\n self._validate_hyperparameters()\n if np.any(np.array(hidden_layer_sizes) <= 0):\n raise ValueError(\"hidden_layer_sizes must be > 0, got %s.\" %\n hidden_layer_sizes)\n first_pass = (not hasattr(self, 'coefs_') or\n (not self.warm_start and not incremental))\n\n X, y = self._validate_input(X, y, incremental, reset=first_pass)\n\n n_samples, n_features = X.shape\n\n # Ensure y is 2D\n if y.ndim == 1:\n y = y.reshape((-1, 1))\n\n self.n_outputs_ = y.shape[1]\n\n layer_units = ([n_features] + hidden_layer_sizes +\n [self.n_outputs_])\n\n # check random state\n self._random_state = check_random_state(self.random_state)\n\n if first_pass:\n # First time training the model\n self._initialize(y, layer_units, X.dtype)\n\n # Initialize lists\n activations = [X] + [None] * (len(layer_units) - 1)\n deltas = [None] * (len(activations) - 1)\n\n coef_grads = [np.empty((n_fan_in_, n_fan_out_), dtype=X.dtype)\n for n_fan_in_,\n n_fan_out_ in zip(layer_units[:-1],\n layer_units[1:])]\n\n intercept_grads = [np.empty(n_fan_out_, dtype=X.dtype)\n for n_fan_out_ in\n layer_units[1:]]\n\n # Run the Stochastic optimization solver\n if self.solver in _STOCHASTIC_SOLVERS:\n self._fit_stochastic(X, y, activations, deltas, coef_grads,\n intercept_grads, layer_units, incremental)\n\n # Run the LBFGS solver\n elif self.solver == 'lbfgs':\n self._fit_lbfgs(X, y, activations, deltas, coef_grads,\n intercept_grads, layer_units)\n return self\n\n def _validate_hyperparameters(self):\n if not isinstance(self.shuffle, bool):\n raise ValueError(\"shuffle must be either True or False, got %s.\" %\n self.shuffle)\n if self.max_iter <= 0:\n raise ValueError(\"max_iter must be > 0, got %s.\" % self.max_iter)\n if self.max_fun <= 0:\n raise ValueError(\"max_fun must be > 0, got %s.\" % self.max_fun)\n if self.alpha < 0.0:\n raise ValueError(\"alpha must be >= 0, got %s.\" % self.alpha)\n if (self.learning_rate in [\"constant\", \"invscaling\", \"adaptive\"] and\n self.learning_rate_init <= 0.0):\n raise ValueError(\"learning_rate_init must be > 0, got %s.\" %\n self.learning_rate)\n if self.momentum > 1 or self.momentum < 0:\n raise ValueError(\"momentum must be >= 0 and <= 1, got %s\" %\n self.momentum)\n if not isinstance(self.nesterovs_momentum, bool):\n raise ValueError(\"nesterovs_momentum must be either True or False,\"\n \" got %s.\" % self.nesterovs_momentum)\n if not isinstance(self.early_stopping, bool):\n raise ValueError(\"early_stopping must be either True or False,\"\n \" got %s.\" % self.early_stopping)\n if self.validation_fraction < 0 or self.validation_fraction >= 1:\n raise ValueError(\"validation_fraction must be >= 0 and < 1, \"\n \"got %s\" % self.validation_fraction)\n if self.beta_1 < 0 or self.beta_1 >= 1:\n raise ValueError(\"beta_1 must be >= 0 and < 1, got %s\" %\n self.beta_1)\n if self.beta_2 < 0 or self.beta_2 >= 1:\n raise ValueError(\"beta_2 must be >= 0 and < 1, got %s\" %\n self.beta_2)\n if self.epsilon <= 0.0:\n raise ValueError(\"epsilon must be > 0, got %s.\" % self.epsilon)\n if self.n_iter_no_change <= 0:\n raise ValueError(\"n_iter_no_change must be > 0, got %s.\"\n % self.n_iter_no_change)\n\n # raise ValueError if not registered\n if self.activation not in ACTIVATIONS:\n raise ValueError(\"The activation '%s' is not supported. Supported \"\n \"activations are %s.\"\n % (self.activation, list(sorted(ACTIVATIONS))))\n if self.learning_rate not in [\"constant\", \"invscaling\", \"adaptive\"]:\n raise ValueError(\"learning rate %s is not supported. \" %\n self.learning_rate)\n supported_solvers = _STOCHASTIC_SOLVERS + [\"lbfgs\"]\n if self.solver not in supported_solvers:\n raise ValueError(\"The solver %s is not supported. \"\n \" Expected one of: %s\" %\n (self.solver, \", \".join(supported_solvers)))\n\n def _fit_lbfgs(self, X, y, activations, deltas, coef_grads,\n intercept_grads, layer_units):\n # Store meta information for the parameters\n self._coef_indptr = []\n self._intercept_indptr = []\n start = 0\n\n # Save sizes and indices of coefficients for faster unpacking\n for i in range(self.n_layers_ - 1):\n n_fan_in, n_fan_out = layer_units[i], layer_units[i + 1]\n\n end = start + (n_fan_in * n_fan_out)\n self._coef_indptr.append((start, end, (n_fan_in, n_fan_out)))\n start = end\n\n # Save sizes and indices of intercepts for faster unpacking\n for i in range(self.n_layers_ - 1):\n end = start + layer_units[i + 1]\n self._intercept_indptr.append((start, end))\n start = end\n\n # Run LBFGS\n packed_coef_inter = _pack(self.coefs_,\n self.intercepts_)\n\n if self.verbose is True or self.verbose >= 1:\n iprint = 1\n else:\n iprint = -1\n\n opt_res = scipy.optimize.minimize(\n self._loss_grad_lbfgs, packed_coef_inter,\n method=\"L-BFGS-B\", jac=True,\n options={\n \"maxfun\": self.max_fun,\n \"maxiter\": self.max_iter,\n \"iprint\": iprint,\n \"gtol\": self.tol\n },\n args=(X, y, activations, deltas, coef_grads, intercept_grads))\n self.n_iter_ = _check_optimize_result(\"lbfgs\", opt_res, self.max_iter)\n self.loss_ = opt_res.fun\n self._unpack(opt_res.x)\n\n def _fit_stochastic(self, X, y, activations, deltas, coef_grads,\n intercept_grads, layer_units, incremental):\n\n if not incremental or not hasattr(self, '_optimizer'):\n params = self.coefs_ + self.intercepts_\n\n if self.solver == 'sgd':\n self._optimizer = SGDOptimizer(\n params, self.learning_rate_init, self.learning_rate,\n self.momentum, self.nesterovs_momentum, self.power_t)\n elif self.solver == 'adam':\n self._optimizer = AdamOptimizer(\n params, self.learning_rate_init, self.beta_1, self.beta_2,\n self.epsilon)\n\n # early_stopping in partial_fit doesn't make sense\n early_stopping = self.early_stopping and not incremental\n if early_stopping:\n # don't stratify in multilabel classification\n should_stratify = is_classifier(self) and self.n_outputs_ == 1\n stratify = y if should_stratify else None\n X, X_val, y, y_val = train_test_split(\n X, y, random_state=self._random_state,\n test_size=self.validation_fraction,\n stratify=stratify)\n if is_classifier(self):\n y_val = self._label_binarizer.inverse_transform(y_val)\n else:\n X_val = None\n y_val = None\n\n n_samples = X.shape[0]\n sample_idx = np.arange(n_samples, dtype=int)\n\n if self.batch_size == 'auto':\n batch_size = min(200, n_samples)\n else:\n if self.batch_size < 1 or self.batch_size > n_samples:\n warnings.warn(\"Got `batch_size` less than 1 or larger than \"\n \"sample size. It is going to be clipped\")\n batch_size = np.clip(self.batch_size, 1, n_samples)\n\n try:\n for it in range(self.max_iter):\n if self.shuffle:\n # Only shuffle the sample indices instead of X and y to\n # reduce the memory footprint. These indices will be used\n # to slice the X and y.\n sample_idx = shuffle(sample_idx,\n random_state=self._random_state)\n\n accumulated_loss = 0.0\n for batch_slice in gen_batches(n_samples, batch_size):\n if self.shuffle:\n X_batch = _safe_indexing(X, sample_idx[batch_slice])\n y_batch = y[sample_idx[batch_slice]]\n else:\n X_batch = X[batch_slice]\n y_batch = y[batch_slice]\n\n activations[0] = X_batch\n batch_loss, coef_grads, intercept_grads = self._backprop(\n X_batch, y_batch, activations, deltas,\n coef_grads, intercept_grads)\n accumulated_loss += batch_loss * (batch_slice.stop -\n batch_slice.start)\n\n # update weights\n grads = coef_grads + intercept_grads\n self._optimizer.update_params(grads)\n\n self.n_iter_ += 1\n self.loss_ = accumulated_loss / X.shape[0]\n\n self.t_ += n_samples\n self.loss_curve_.append(self.loss_)\n if self.verbose:\n print(\"Iteration %d, loss = %.8f\" % (self.n_iter_,\n self.loss_))\n\n # update no_improvement_count based on training loss or\n # validation score according to early_stopping\n self._update_no_improvement_count(early_stopping, X_val, y_val)\n\n # for learning rate that needs to be updated at iteration end\n self._optimizer.iteration_ends(self.t_)\n\n if self._no_improvement_count > self.n_iter_no_change:\n # not better than last `n_iter_no_change` iterations by tol\n # stop or decrease learning rate\n if early_stopping:\n msg = (\"Validation score did not improve more than \"\n \"tol=%f for %d consecutive epochs.\" % (\n self.tol, self.n_iter_no_change))\n else:\n msg = (\"Training loss did not improve more than tol=%f\"\n \" for %d consecutive epochs.\" % (\n self.tol, self.n_iter_no_change))\n\n is_stopping = self._optimizer.trigger_stopping(\n msg, self.verbose)\n if is_stopping:\n break\n else:\n self._no_improvement_count = 0\n\n if incremental:\n break\n\n if self.n_iter_ == self.max_iter:\n warnings.warn(\n \"Stochastic Optimizer: Maximum iterations (%d) \"\n \"reached and the optimization hasn't converged yet.\"\n % self.max_iter, ConvergenceWarning)\n except KeyboardInterrupt:\n warnings.warn(\"Training interrupted by user.\")\n\n if early_stopping:\n # restore best weights\n self.coefs_ = self._best_coefs\n self.intercepts_ = self._best_intercepts\n\n def _update_no_improvement_count(self, early_stopping, X_val, y_val):\n if early_stopping:\n # compute validation score, use that for stopping\n self.validation_scores_.append(self.score(X_val, y_val))\n\n if self.verbose:\n print(\"Validation score: %f\" % self.validation_scores_[-1])\n # update best parameters\n # use validation_scores_, not loss_curve_\n # let's hope no-one overloads .score with mse\n last_valid_score = self.validation_scores_[-1]\n\n if last_valid_score < (self.best_validation_score_ +\n self.tol):\n self._no_improvement_count += 1\n else:\n self._no_improvement_count = 0\n\n if last_valid_score > self.best_validation_score_:\n self.best_validation_score_ = last_valid_score\n self._best_coefs = [c.copy() for c in self.coefs_]\n self._best_intercepts = [i.copy()\n for i in self.intercepts_]\n else:\n if self.loss_curve_[-1] > self.best_loss_ - self.tol:\n self._no_improvement_count += 1\n else:\n self._no_improvement_count = 0\n if self.loss_curve_[-1] < self.best_loss_:\n self.best_loss_ = self.loss_curve_[-1]\n\n def fit(self, X, y):\n \"\"\"Fit the model to data matrix X and target(s) y.\n\n Parameters\n ----------\n X : ndarray or sparse matrix of shape (n_samples, n_features)\n The input data.\n\n y : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n The target values (class labels in classification, real numbers in\n regression).\n\n Returns\n -------\n self : returns a trained MLP model.\n \"\"\"\n return self._fit(X, y, incremental=False)\n\n @property\n def partial_fit(self):\n \"\"\"Update the model with a single iteration over the given data.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\n y : ndarray of shape (n_samples,)\n The target values.\n\n Returns\n -------\n self : returns a trained MLP model.\n \"\"\"\n if self.solver not in _STOCHASTIC_SOLVERS:\n raise AttributeError(\"partial_fit is only available for stochastic\"\n \" optimizers. %s is not stochastic.\"\n % self.solver)\n return self._partial_fit\n\n def _partial_fit(self, X, y):\n return self._fit(X, y, incremental=True)", + "instance_attributes": [ + { + "name": "n_iter_", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "t_", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "out_activation_", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "coefs_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "intercepts_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "loss_curve_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "_no_improvement_count", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "validation_scores_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "_coef_indptr", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "_intercept_indptr", + "types": { + "kind": "NamedType", + "name": "list" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier", + "name": "MLPClassifier", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier", + "decorators": [], + "superclasses": ["ClassifierMixin", "BaseMultilayerPerceptron"], + "methods": [ + "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/_validate_input", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/predict", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/partial_fit@getter", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/_partial_fit", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/predict_log_proba", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/predict_proba" + ], + "is_public": false, + "reexported_by": [], + "description": "Multi-layer Perceptron classifier.\n\nThis model optimizes the log-loss function using LBFGS or stochastic\ngradient descent.\n\n.. versionadded:: 0.18", + "docstring": "Multi-layer Perceptron classifier.\n\nThis model optimizes the log-loss function using LBFGS or stochastic\ngradient descent.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nhidden_layer_sizes : tuple, length = n_layers - 2, default=(100,)\n The ith element represents the number of neurons in the ith\n hidden layer.\n\nactivation : {'identity', 'logistic', 'tanh', 'relu'}, default='relu'\n Activation function for the hidden layer.\n\n - 'identity', no-op activation, useful to implement linear bottleneck,\n returns f(x) = x\n\n - 'logistic', the logistic sigmoid function,\n returns f(x) = 1 / (1 + exp(-x)).\n\n - 'tanh', the hyperbolic tan function,\n returns f(x) = tanh(x).\n\n - 'relu', the rectified linear unit function,\n returns f(x) = max(0, x)\n\nsolver : {'lbfgs', 'sgd', 'adam'}, default='adam'\n The solver for weight optimization.\n\n - 'lbfgs' is an optimizer in the family of quasi-Newton methods.\n\n - 'sgd' refers to stochastic gradient descent.\n\n - 'adam' refers to a stochastic gradient-based optimizer proposed\n by Kingma, Diederik, and Jimmy Ba\n\n Note: The default solver 'adam' works pretty well on relatively\n large datasets (with thousands of training samples or more) in terms of\n both training time and validation score.\n For small datasets, however, 'lbfgs' can converge faster and perform\n better.\n\nalpha : float, default=0.0001\n L2 penalty (regularization term) parameter.\n\nbatch_size : int, default='auto'\n Size of minibatches for stochastic optimizers.\n If the solver is 'lbfgs', the classifier will not use minibatch.\n When set to \"auto\", `batch_size=min(200, n_samples)`\n\nlearning_rate : {'constant', 'invscaling', 'adaptive'}, default='constant'\n Learning rate schedule for weight updates.\n\n - 'constant' is a constant learning rate given by\n 'learning_rate_init'.\n\n - 'invscaling' gradually decreases the learning rate at each\n time step 't' using an inverse scaling exponent of 'power_t'.\n effective_learning_rate = learning_rate_init / pow(t, power_t)\n\n - 'adaptive' keeps the learning rate constant to\n 'learning_rate_init' as long as training loss keeps decreasing.\n Each time two consecutive epochs fail to decrease training loss by at\n least tol, or fail to increase validation score by at least tol if\n 'early_stopping' is on, the current learning rate is divided by 5.\n\n Only used when ``solver='sgd'``.\n\nlearning_rate_init : double, default=0.001\n The initial learning rate used. It controls the step-size\n in updating the weights. Only used when solver='sgd' or 'adam'.\n\npower_t : double, default=0.5\n The exponent for inverse scaling learning rate.\n It is used in updating effective learning rate when the learning_rate\n is set to 'invscaling'. Only used when solver='sgd'.\n\nmax_iter : int, default=200\n Maximum number of iterations. The solver iterates until convergence\n (determined by 'tol') or this number of iterations. For stochastic\n solvers ('sgd', 'adam'), note that this determines the number of epochs\n (how many times each data point will be used), not the number of\n gradient steps.\n\nshuffle : bool, default=True\n Whether to shuffle samples in each iteration. Only used when\n solver='sgd' or 'adam'.\n\nrandom_state : int, RandomState instance, default=None\n Determines random number generation for weights and bias\n initialization, train-test split if early stopping is used, and batch\n sampling when solver='sgd' or 'adam'.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\ntol : float, default=1e-4\n Tolerance for the optimization. When the loss or score is not improving\n by at least ``tol`` for ``n_iter_no_change`` consecutive iterations,\n unless ``learning_rate`` is set to 'adaptive', convergence is\n considered to be reached and training stops.\n\nverbose : bool, default=False\n Whether to print progress messages to stdout.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous\n call to fit as initialization, otherwise, just erase the\n previous solution. See :term:`the Glossary `.\n\nmomentum : float, default=0.9\n Momentum for gradient descent update. Should be between 0 and 1. Only\n used when solver='sgd'.\n\nnesterovs_momentum : bool, default=True\n Whether to use Nesterov's momentum. Only used when solver='sgd' and\n momentum > 0.\n\nearly_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation\n score is not improving. If set to true, it will automatically set\n aside 10% of training data as validation and terminate training when\n validation score is not improving by at least tol for\n ``n_iter_no_change`` consecutive epochs. The split is stratified,\n except in a multilabel setting.\n If early stopping is False, then the training stops when the training\n loss does not improve by more than tol for n_iter_no_change consecutive\n passes over the training set.\n Only effective when solver='sgd' or 'adam'\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if early_stopping is True\n\nbeta_1 : float, default=0.9\n Exponential decay rate for estimates of first moment vector in adam,\n should be in [0, 1). Only used when solver='adam'\n\nbeta_2 : float, default=0.999\n Exponential decay rate for estimates of second moment vector in adam,\n should be in [0, 1). Only used when solver='adam'\n\nepsilon : float, default=1e-8\n Value for numerical stability in adam. Only used when solver='adam'\n\nn_iter_no_change : int, default=10\n Maximum number of epochs to not meet ``tol`` improvement.\n Only effective when solver='sgd' or 'adam'\n\n .. versionadded:: 0.20\n\nmax_fun : int, default=15000\n Only used when solver='lbfgs'. Maximum number of loss function calls.\n The solver iterates until convergence (determined by 'tol'), number\n of iterations reaches max_iter, or this number of loss function calls.\n Note that number of loss function calls will be greater than or equal\n to the number of iterations for the `MLPClassifier`.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nclasses_ : ndarray or list of ndarray of shape (n_classes,)\n Class labels for each output.\n\nloss_ : float\n The current loss computed with the loss function.\n\nbest_loss_ : float\n The minimum loss reached by the solver throughout fitting.\n\nloss_curve_ : list of shape (`n_iter_`,)\n The ith element in the list represents the loss at the ith iteration.\n\nt_ : int\n The number of training samples seen by the solver during fitting.\n\ncoefs_ : list of shape (n_layers - 1,)\n The ith element in the list represents the weight matrix corresponding\n to layer i.\n\nintercepts_ : list of shape (n_layers - 1,)\n The ith element in the list represents the bias vector corresponding to\n layer i + 1.\n\nn_iter_ : int\n The number of iterations the solver has run.\n\nn_layers_ : int\n Number of layers.\n\nn_outputs_ : int\n Number of outputs.\n\nout_activation_ : str\n Name of the output activation function.\n\nExamples\n--------\n>>> from sklearn.neural_network import MLPClassifier\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = make_classification(n_samples=100, random_state=1)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,\n... random_state=1)\n>>> clf = MLPClassifier(random_state=1, max_iter=300).fit(X_train, y_train)\n>>> clf.predict_proba(X_test[:1])\narray([[0.038..., 0.961...]])\n>>> clf.predict(X_test[:5, :])\narray([1, 0, 1, 0, 1])\n>>> clf.score(X_test, y_test)\n0.8...\n\nNotes\n-----\nMLPClassifier trains iteratively since at each time step\nthe partial derivatives of the loss function with respect to the model\nparameters are computed to update the parameters.\n\nIt can also have a regularization term added to the loss function\nthat shrinks model parameters to prevent overfitting.\n\nThis implementation works with data represented as dense numpy arrays or\nsparse scipy arrays of floating point values.\n\nReferences\n----------\nHinton, Geoffrey E.\n \"Connectionist learning procedures.\" Artificial intelligence 40.1\n (1989): 185-234.\n\nGlorot, Xavier, and Yoshua Bengio. \"Understanding the difficulty of\n training deep feedforward neural networks.\" International Conference\n on Artificial Intelligence and Statistics. 2010.\n\nHe, Kaiming, et al. \"Delving deep into rectifiers: Surpassing human-level\n performance on imagenet classification.\" arXiv preprint\n arXiv:1502.01852 (2015).\n\nKingma, Diederik, and Jimmy Ba. \"Adam: A method for stochastic\n optimization.\" arXiv preprint arXiv:1412.6980 (2014).", + "code": "class MLPClassifier(ClassifierMixin, BaseMultilayerPerceptron):\n \"\"\"Multi-layer Perceptron classifier.\n\n This model optimizes the log-loss function using LBFGS or stochastic\n gradient descent.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n hidden_layer_sizes : tuple, length = n_layers - 2, default=(100,)\n The ith element represents the number of neurons in the ith\n hidden layer.\n\n activation : {'identity', 'logistic', 'tanh', 'relu'}, default='relu'\n Activation function for the hidden layer.\n\n - 'identity', no-op activation, useful to implement linear bottleneck,\n returns f(x) = x\n\n - 'logistic', the logistic sigmoid function,\n returns f(x) = 1 / (1 + exp(-x)).\n\n - 'tanh', the hyperbolic tan function,\n returns f(x) = tanh(x).\n\n - 'relu', the rectified linear unit function,\n returns f(x) = max(0, x)\n\n solver : {'lbfgs', 'sgd', 'adam'}, default='adam'\n The solver for weight optimization.\n\n - 'lbfgs' is an optimizer in the family of quasi-Newton methods.\n\n - 'sgd' refers to stochastic gradient descent.\n\n - 'adam' refers to a stochastic gradient-based optimizer proposed\n by Kingma, Diederik, and Jimmy Ba\n\n Note: The default solver 'adam' works pretty well on relatively\n large datasets (with thousands of training samples or more) in terms of\n both training time and validation score.\n For small datasets, however, 'lbfgs' can converge faster and perform\n better.\n\n alpha : float, default=0.0001\n L2 penalty (regularization term) parameter.\n\n batch_size : int, default='auto'\n Size of minibatches for stochastic optimizers.\n If the solver is 'lbfgs', the classifier will not use minibatch.\n When set to \"auto\", `batch_size=min(200, n_samples)`\n\n learning_rate : {'constant', 'invscaling', 'adaptive'}, default='constant'\n Learning rate schedule for weight updates.\n\n - 'constant' is a constant learning rate given by\n 'learning_rate_init'.\n\n - 'invscaling' gradually decreases the learning rate at each\n time step 't' using an inverse scaling exponent of 'power_t'.\n effective_learning_rate = learning_rate_init / pow(t, power_t)\n\n - 'adaptive' keeps the learning rate constant to\n 'learning_rate_init' as long as training loss keeps decreasing.\n Each time two consecutive epochs fail to decrease training loss by at\n least tol, or fail to increase validation score by at least tol if\n 'early_stopping' is on, the current learning rate is divided by 5.\n\n Only used when ``solver='sgd'``.\n\n learning_rate_init : double, default=0.001\n The initial learning rate used. It controls the step-size\n in updating the weights. Only used when solver='sgd' or 'adam'.\n\n power_t : double, default=0.5\n The exponent for inverse scaling learning rate.\n It is used in updating effective learning rate when the learning_rate\n is set to 'invscaling'. Only used when solver='sgd'.\n\n max_iter : int, default=200\n Maximum number of iterations. The solver iterates until convergence\n (determined by 'tol') or this number of iterations. For stochastic\n solvers ('sgd', 'adam'), note that this determines the number of epochs\n (how many times each data point will be used), not the number of\n gradient steps.\n\n shuffle : bool, default=True\n Whether to shuffle samples in each iteration. Only used when\n solver='sgd' or 'adam'.\n\n random_state : int, RandomState instance, default=None\n Determines random number generation for weights and bias\n initialization, train-test split if early stopping is used, and batch\n sampling when solver='sgd' or 'adam'.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\n tol : float, default=1e-4\n Tolerance for the optimization. When the loss or score is not improving\n by at least ``tol`` for ``n_iter_no_change`` consecutive iterations,\n unless ``learning_rate`` is set to 'adaptive', convergence is\n considered to be reached and training stops.\n\n verbose : bool, default=False\n Whether to print progress messages to stdout.\n\n warm_start : bool, default=False\n When set to True, reuse the solution of the previous\n call to fit as initialization, otherwise, just erase the\n previous solution. See :term:`the Glossary `.\n\n momentum : float, default=0.9\n Momentum for gradient descent update. Should be between 0 and 1. Only\n used when solver='sgd'.\n\n nesterovs_momentum : bool, default=True\n Whether to use Nesterov's momentum. Only used when solver='sgd' and\n momentum > 0.\n\n early_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation\n score is not improving. If set to true, it will automatically set\n aside 10% of training data as validation and terminate training when\n validation score is not improving by at least tol for\n ``n_iter_no_change`` consecutive epochs. The split is stratified,\n except in a multilabel setting.\n If early stopping is False, then the training stops when the training\n loss does not improve by more than tol for n_iter_no_change consecutive\n passes over the training set.\n Only effective when solver='sgd' or 'adam'\n\n validation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if early_stopping is True\n\n beta_1 : float, default=0.9\n Exponential decay rate for estimates of first moment vector in adam,\n should be in [0, 1). Only used when solver='adam'\n\n beta_2 : float, default=0.999\n Exponential decay rate for estimates of second moment vector in adam,\n should be in [0, 1). Only used when solver='adam'\n\n epsilon : float, default=1e-8\n Value for numerical stability in adam. Only used when solver='adam'\n\n n_iter_no_change : int, default=10\n Maximum number of epochs to not meet ``tol`` improvement.\n Only effective when solver='sgd' or 'adam'\n\n .. versionadded:: 0.20\n\n max_fun : int, default=15000\n Only used when solver='lbfgs'. Maximum number of loss function calls.\n The solver iterates until convergence (determined by 'tol'), number\n of iterations reaches max_iter, or this number of loss function calls.\n Note that number of loss function calls will be greater than or equal\n to the number of iterations for the `MLPClassifier`.\n\n .. versionadded:: 0.22\n\n Attributes\n ----------\n classes_ : ndarray or list of ndarray of shape (n_classes,)\n Class labels for each output.\n\n loss_ : float\n The current loss computed with the loss function.\n\n best_loss_ : float\n The minimum loss reached by the solver throughout fitting.\n\n loss_curve_ : list of shape (`n_iter_`,)\n The ith element in the list represents the loss at the ith iteration.\n\n t_ : int\n The number of training samples seen by the solver during fitting.\n\n coefs_ : list of shape (n_layers - 1,)\n The ith element in the list represents the weight matrix corresponding\n to layer i.\n\n intercepts_ : list of shape (n_layers - 1,)\n The ith element in the list represents the bias vector corresponding to\n layer i + 1.\n\n n_iter_ : int\n The number of iterations the solver has run.\n\n n_layers_ : int\n Number of layers.\n\n n_outputs_ : int\n Number of outputs.\n\n out_activation_ : str\n Name of the output activation function.\n\n Examples\n --------\n >>> from sklearn.neural_network import MLPClassifier\n >>> from sklearn.datasets import make_classification\n >>> from sklearn.model_selection import train_test_split\n >>> X, y = make_classification(n_samples=100, random_state=1)\n >>> X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,\n ... random_state=1)\n >>> clf = MLPClassifier(random_state=1, max_iter=300).fit(X_train, y_train)\n >>> clf.predict_proba(X_test[:1])\n array([[0.038..., 0.961...]])\n >>> clf.predict(X_test[:5, :])\n array([1, 0, 1, 0, 1])\n >>> clf.score(X_test, y_test)\n 0.8...\n\n Notes\n -----\n MLPClassifier trains iteratively since at each time step\n the partial derivatives of the loss function with respect to the model\n parameters are computed to update the parameters.\n\n It can also have a regularization term added to the loss function\n that shrinks model parameters to prevent overfitting.\n\n This implementation works with data represented as dense numpy arrays or\n sparse scipy arrays of floating point values.\n\n References\n ----------\n Hinton, Geoffrey E.\n \"Connectionist learning procedures.\" Artificial intelligence 40.1\n (1989): 185-234.\n\n Glorot, Xavier, and Yoshua Bengio. \"Understanding the difficulty of\n training deep feedforward neural networks.\" International Conference\n on Artificial Intelligence and Statistics. 2010.\n\n He, Kaiming, et al. \"Delving deep into rectifiers: Surpassing human-level\n performance on imagenet classification.\" arXiv preprint\n arXiv:1502.01852 (2015).\n\n Kingma, Diederik, and Jimmy Ba. \"Adam: A method for stochastic\n optimization.\" arXiv preprint arXiv:1412.6980 (2014).\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, hidden_layer_sizes=(100,), activation=\"relu\", *,\n solver='adam', alpha=0.0001,\n batch_size='auto', learning_rate=\"constant\",\n learning_rate_init=0.001, power_t=0.5, max_iter=200,\n shuffle=True, random_state=None, tol=1e-4,\n verbose=False, warm_start=False, momentum=0.9,\n nesterovs_momentum=True, early_stopping=False,\n validation_fraction=0.1, beta_1=0.9, beta_2=0.999,\n epsilon=1e-8, n_iter_no_change=10, max_fun=15000):\n super().__init__(\n hidden_layer_sizes=hidden_layer_sizes,\n activation=activation, solver=solver, alpha=alpha,\n batch_size=batch_size, learning_rate=learning_rate,\n learning_rate_init=learning_rate_init, power_t=power_t,\n max_iter=max_iter, loss='log_loss', shuffle=shuffle,\n random_state=random_state, tol=tol, verbose=verbose,\n warm_start=warm_start, momentum=momentum,\n nesterovs_momentum=nesterovs_momentum,\n early_stopping=early_stopping,\n validation_fraction=validation_fraction,\n beta_1=beta_1, beta_2=beta_2, epsilon=epsilon,\n n_iter_no_change=n_iter_no_change, max_fun=max_fun)\n\n def _validate_input(self, X, y, incremental, reset):\n X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc'],\n multi_output=True,\n dtype=(np.float64, np.float32),\n reset=reset)\n if y.ndim == 2 and y.shape[1] == 1:\n y = column_or_1d(y, warn=True)\n\n # Matrix of actions to be taken under the possible combinations:\n # The case that incremental == True and classes_ not defined is\n # already checked by _check_partial_fit_first_call that is called\n # in _partial_fit below.\n # The cases are already grouped into the respective if blocks below.\n #\n # incremental warm_start classes_ def action\n # 0 0 0 define classes_\n # 0 1 0 define classes_\n # 0 0 1 redefine classes_\n #\n # 0 1 1 check compat warm_start\n # 1 1 1 check compat warm_start\n #\n # 1 0 1 check compat last fit\n #\n # Note the reliance on short-circuiting here, so that the second\n # or part implies that classes_ is defined.\n if (\n (not hasattr(self, \"classes_\")) or\n (not self.warm_start and not incremental)\n ):\n self._label_binarizer = LabelBinarizer()\n self._label_binarizer.fit(y)\n self.classes_ = self._label_binarizer.classes_\n else:\n classes = unique_labels(y)\n if self.warm_start:\n if set(classes) != set(self.classes_):\n raise ValueError(\n f\"warm_start can only be used where `y` has the same \"\n f\"classes as in the previous call to fit. Previously \"\n f\"got {self.classes_}, `y` has {classes}\"\n )\n elif len(np.setdiff1d(classes, self.classes_, assume_unique=True)):\n raise ValueError(\n f\"`y` has classes not in `self.classes_`. \"\n f\"`self.classes_` has {self.classes_}. 'y' has {classes}.\"\n )\n\n # This downcast to bool is to prevent upcasting when working with\n # float32 data\n y = self._label_binarizer.transform(y).astype(bool)\n return X, y\n\n def predict(self, X):\n \"\"\"Predict using the multi-layer perceptron classifier\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\n Returns\n -------\n y : ndarray, shape (n_samples,) or (n_samples, n_classes)\n The predicted classes.\n \"\"\"\n check_is_fitted(self)\n y_pred = self._forward_pass_fast(X)\n\n if self.n_outputs_ == 1:\n y_pred = y_pred.ravel()\n\n return self._label_binarizer.inverse_transform(y_pred)\n\n @property\n def partial_fit(self):\n \"\"\"Update the model with a single iteration over the given data.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\n y : array-like of shape (n_samples,)\n The target values.\n\n classes : array of shape (n_classes,), default=None\n Classes across all calls to partial_fit.\n Can be obtained via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is required for the first call to partial_fit\n and can be omitted in the subsequent calls.\n Note that y doesn't need to contain all labels in `classes`.\n\n Returns\n -------\n self : returns a trained MLP model.\n \"\"\"\n if self.solver not in _STOCHASTIC_SOLVERS:\n raise AttributeError(\"partial_fit is only available for stochastic\"\n \" optimizer. %s is not stochastic\"\n % self.solver)\n return self._partial_fit\n\n def _partial_fit(self, X, y, classes=None):\n if _check_partial_fit_first_call(self, classes):\n self._label_binarizer = LabelBinarizer()\n if type_of_target(y).startswith('multilabel'):\n self._label_binarizer.fit(y)\n else:\n self._label_binarizer.fit(classes)\n\n super()._partial_fit(X, y)\n\n return self\n\n def predict_log_proba(self, X):\n \"\"\"Return the log of probability estimates.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n The input data.\n\n Returns\n -------\n log_y_prob : ndarray of shape (n_samples, n_classes)\n The predicted log-probability of the sample for each class\n in the model, where classes are ordered as they are in\n `self.classes_`. Equivalent to log(predict_proba(X))\n \"\"\"\n y_prob = self.predict_proba(X)\n return np.log(y_prob, out=y_prob)\n\n def predict_proba(self, X):\n \"\"\"Probability estimates.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\n Returns\n -------\n y_prob : ndarray of shape (n_samples, n_classes)\n The predicted probability of the sample for each class in the\n model, where classes are ordered as they are in `self.classes_`.\n \"\"\"\n check_is_fitted(self)\n y_pred = self._forward_pass_fast(X)\n\n if self.n_outputs_ == 1:\n y_pred = y_pred.ravel()\n\n if y_pred.ndim == 1:\n return np.vstack([1 - y_pred, y_pred]).T\n else:\n return y_pred", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor", + "name": "MLPRegressor", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor", + "decorators": [], + "superclasses": ["RegressorMixin", "BaseMultilayerPerceptron"], + "methods": [ + "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/predict", + "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/_validate_input" + ], + "is_public": false, + "reexported_by": [], + "description": "Multi-layer Perceptron regressor.\n\nThis model optimizes the squared-loss using LBFGS or stochastic gradient\ndescent.\n\n.. versionadded:: 0.18", + "docstring": "Multi-layer Perceptron regressor.\n\nThis model optimizes the squared-loss using LBFGS or stochastic gradient\ndescent.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nhidden_layer_sizes : tuple, length = n_layers - 2, default=(100,)\n The ith element represents the number of neurons in the ith\n hidden layer.\n\nactivation : {'identity', 'logistic', 'tanh', 'relu'}, default='relu'\n Activation function for the hidden layer.\n\n - 'identity', no-op activation, useful to implement linear bottleneck,\n returns f(x) = x\n\n - 'logistic', the logistic sigmoid function,\n returns f(x) = 1 / (1 + exp(-x)).\n\n - 'tanh', the hyperbolic tan function,\n returns f(x) = tanh(x).\n\n - 'relu', the rectified linear unit function,\n returns f(x) = max(0, x)\n\nsolver : {'lbfgs', 'sgd', 'adam'}, default='adam'\n The solver for weight optimization.\n\n - 'lbfgs' is an optimizer in the family of quasi-Newton methods.\n\n - 'sgd' refers to stochastic gradient descent.\n\n - 'adam' refers to a stochastic gradient-based optimizer proposed by\n Kingma, Diederik, and Jimmy Ba\n\n Note: The default solver 'adam' works pretty well on relatively\n large datasets (with thousands of training samples or more) in terms of\n both training time and validation score.\n For small datasets, however, 'lbfgs' can converge faster and perform\n better.\n\nalpha : float, default=0.0001\n L2 penalty (regularization term) parameter.\n\nbatch_size : int, default='auto'\n Size of minibatches for stochastic optimizers.\n If the solver is 'lbfgs', the classifier will not use minibatch.\n When set to \"auto\", `batch_size=min(200, n_samples)`\n\nlearning_rate : {'constant', 'invscaling', 'adaptive'}, default='constant'\n Learning rate schedule for weight updates.\n\n - 'constant' is a constant learning rate given by\n 'learning_rate_init'.\n\n - 'invscaling' gradually decreases the learning rate ``learning_rate_``\n at each time step 't' using an inverse scaling exponent of 'power_t'.\n effective_learning_rate = learning_rate_init / pow(t, power_t)\n\n - 'adaptive' keeps the learning rate constant to\n 'learning_rate_init' as long as training loss keeps decreasing.\n Each time two consecutive epochs fail to decrease training loss by at\n least tol, or fail to increase validation score by at least tol if\n 'early_stopping' is on, the current learning rate is divided by 5.\n\n Only used when solver='sgd'.\n\nlearning_rate_init : double, default=0.001\n The initial learning rate used. It controls the step-size\n in updating the weights. Only used when solver='sgd' or 'adam'.\n\npower_t : double, default=0.5\n The exponent for inverse scaling learning rate.\n It is used in updating effective learning rate when the learning_rate\n is set to 'invscaling'. Only used when solver='sgd'.\n\nmax_iter : int, default=200\n Maximum number of iterations. The solver iterates until convergence\n (determined by 'tol') or this number of iterations. For stochastic\n solvers ('sgd', 'adam'), note that this determines the number of epochs\n (how many times each data point will be used), not the number of\n gradient steps.\n\nshuffle : bool, default=True\n Whether to shuffle samples in each iteration. Only used when\n solver='sgd' or 'adam'.\n\nrandom_state : int, RandomState instance, default=None\n Determines random number generation for weights and bias\n initialization, train-test split if early stopping is used, and batch\n sampling when solver='sgd' or 'adam'.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\ntol : float, default=1e-4\n Tolerance for the optimization. When the loss or score is not improving\n by at least ``tol`` for ``n_iter_no_change`` consecutive iterations,\n unless ``learning_rate`` is set to 'adaptive', convergence is\n considered to be reached and training stops.\n\nverbose : bool, default=False\n Whether to print progress messages to stdout.\n\nwarm_start : bool, default=False\n When set to True, reuse the solution of the previous\n call to fit as initialization, otherwise, just erase the\n previous solution. See :term:`the Glossary `.\n\nmomentum : float, default=0.9\n Momentum for gradient descent update. Should be between 0 and 1. Only\n used when solver='sgd'.\n\nnesterovs_momentum : bool, default=True\n Whether to use Nesterov's momentum. Only used when solver='sgd' and\n momentum > 0.\n\nearly_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation\n score is not improving. If set to true, it will automatically set\n aside 10% of training data as validation and terminate training when\n validation score is not improving by at least ``tol`` for\n ``n_iter_no_change`` consecutive epochs.\n Only effective when solver='sgd' or 'adam'\n\nvalidation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if early_stopping is True\n\nbeta_1 : float, default=0.9\n Exponential decay rate for estimates of first moment vector in adam,\n should be in [0, 1). Only used when solver='adam'\n\nbeta_2 : float, default=0.999\n Exponential decay rate for estimates of second moment vector in adam,\n should be in [0, 1). Only used when solver='adam'\n\nepsilon : float, default=1e-8\n Value for numerical stability in adam. Only used when solver='adam'\n\nn_iter_no_change : int, default=10\n Maximum number of epochs to not meet ``tol`` improvement.\n Only effective when solver='sgd' or 'adam'\n\n .. versionadded:: 0.20\n\nmax_fun : int, default=15000\n Only used when solver='lbfgs'. Maximum number of function calls.\n The solver iterates until convergence (determined by 'tol'), number\n of iterations reaches max_iter, or this number of function calls.\n Note that number of function calls will be greater than or equal to\n the number of iterations for the MLPRegressor.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nloss_ : float\n The current loss computed with the loss function.\n\nbest_loss_ : float\n The minimum loss reached by the solver throughout fitting.\n\nloss_curve_ : list of shape (`n_iter_`,)\n Loss value evaluated at the end of each training step.\n The ith element in the list represents the loss at the ith iteration.\n\nt_ : int\n The number of training samples seen by the solver during fitting.\n Mathematically equals `n_iters * X.shape[0]`, it means\n `time_step` and it is used by optimizer's learning rate scheduler.\n\ncoefs_ : list of shape (n_layers - 1,)\n The ith element in the list represents the weight matrix corresponding\n to layer i.\n\nintercepts_ : list of shape (n_layers - 1,)\n The ith element in the list represents the bias vector corresponding to\n layer i + 1.\n\nn_iter_ : int\n The number of iterations the solver has run.\n\nn_layers_ : int\n Number of layers.\n\nn_outputs_ : int\n Number of outputs.\n\nout_activation_ : str\n Name of the output activation function.\n\nExamples\n--------\n>>> from sklearn.neural_network import MLPRegressor\n>>> from sklearn.datasets import make_regression\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = make_regression(n_samples=200, random_state=1)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n... random_state=1)\n>>> regr = MLPRegressor(random_state=1, max_iter=500).fit(X_train, y_train)\n>>> regr.predict(X_test[:2])\narray([-0.9..., -7.1...])\n>>> regr.score(X_test, y_test)\n0.4...\n\nNotes\n-----\nMLPRegressor trains iteratively since at each time step\nthe partial derivatives of the loss function with respect to the model\nparameters are computed to update the parameters.\n\nIt can also have a regularization term added to the loss function\nthat shrinks model parameters to prevent overfitting.\n\nThis implementation works with data represented as dense and sparse numpy\narrays of floating point values.\n\nReferences\n----------\nHinton, Geoffrey E.\n \"Connectionist learning procedures.\" Artificial intelligence 40.1\n (1989): 185-234.\n\nGlorot, Xavier, and Yoshua Bengio. \"Understanding the difficulty of\n training deep feedforward neural networks.\" International Conference\n on Artificial Intelligence and Statistics. 2010.\n\nHe, Kaiming, et al. \"Delving deep into rectifiers: Surpassing human-level\n performance on imagenet classification.\" arXiv preprint\n arXiv:1502.01852 (2015).\n\nKingma, Diederik, and Jimmy Ba. \"Adam: A method for stochastic\n optimization.\" arXiv preprint arXiv:1412.6980 (2014).", + "code": "class MLPRegressor(RegressorMixin, BaseMultilayerPerceptron):\n \"\"\"Multi-layer Perceptron regressor.\n\n This model optimizes the squared-loss using LBFGS or stochastic gradient\n descent.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n hidden_layer_sizes : tuple, length = n_layers - 2, default=(100,)\n The ith element represents the number of neurons in the ith\n hidden layer.\n\n activation : {'identity', 'logistic', 'tanh', 'relu'}, default='relu'\n Activation function for the hidden layer.\n\n - 'identity', no-op activation, useful to implement linear bottleneck,\n returns f(x) = x\n\n - 'logistic', the logistic sigmoid function,\n returns f(x) = 1 / (1 + exp(-x)).\n\n - 'tanh', the hyperbolic tan function,\n returns f(x) = tanh(x).\n\n - 'relu', the rectified linear unit function,\n returns f(x) = max(0, x)\n\n solver : {'lbfgs', 'sgd', 'adam'}, default='adam'\n The solver for weight optimization.\n\n - 'lbfgs' is an optimizer in the family of quasi-Newton methods.\n\n - 'sgd' refers to stochastic gradient descent.\n\n - 'adam' refers to a stochastic gradient-based optimizer proposed by\n Kingma, Diederik, and Jimmy Ba\n\n Note: The default solver 'adam' works pretty well on relatively\n large datasets (with thousands of training samples or more) in terms of\n both training time and validation score.\n For small datasets, however, 'lbfgs' can converge faster and perform\n better.\n\n alpha : float, default=0.0001\n L2 penalty (regularization term) parameter.\n\n batch_size : int, default='auto'\n Size of minibatches for stochastic optimizers.\n If the solver is 'lbfgs', the classifier will not use minibatch.\n When set to \"auto\", `batch_size=min(200, n_samples)`\n\n learning_rate : {'constant', 'invscaling', 'adaptive'}, default='constant'\n Learning rate schedule for weight updates.\n\n - 'constant' is a constant learning rate given by\n 'learning_rate_init'.\n\n - 'invscaling' gradually decreases the learning rate ``learning_rate_``\n at each time step 't' using an inverse scaling exponent of 'power_t'.\n effective_learning_rate = learning_rate_init / pow(t, power_t)\n\n - 'adaptive' keeps the learning rate constant to\n 'learning_rate_init' as long as training loss keeps decreasing.\n Each time two consecutive epochs fail to decrease training loss by at\n least tol, or fail to increase validation score by at least tol if\n 'early_stopping' is on, the current learning rate is divided by 5.\n\n Only used when solver='sgd'.\n\n learning_rate_init : double, default=0.001\n The initial learning rate used. It controls the step-size\n in updating the weights. Only used when solver='sgd' or 'adam'.\n\n power_t : double, default=0.5\n The exponent for inverse scaling learning rate.\n It is used in updating effective learning rate when the learning_rate\n is set to 'invscaling'. Only used when solver='sgd'.\n\n max_iter : int, default=200\n Maximum number of iterations. The solver iterates until convergence\n (determined by 'tol') or this number of iterations. For stochastic\n solvers ('sgd', 'adam'), note that this determines the number of epochs\n (how many times each data point will be used), not the number of\n gradient steps.\n\n shuffle : bool, default=True\n Whether to shuffle samples in each iteration. Only used when\n solver='sgd' or 'adam'.\n\n random_state : int, RandomState instance, default=None\n Determines random number generation for weights and bias\n initialization, train-test split if early stopping is used, and batch\n sampling when solver='sgd' or 'adam'.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\n tol : float, default=1e-4\n Tolerance for the optimization. When the loss or score is not improving\n by at least ``tol`` for ``n_iter_no_change`` consecutive iterations,\n unless ``learning_rate`` is set to 'adaptive', convergence is\n considered to be reached and training stops.\n\n verbose : bool, default=False\n Whether to print progress messages to stdout.\n\n warm_start : bool, default=False\n When set to True, reuse the solution of the previous\n call to fit as initialization, otherwise, just erase the\n previous solution. See :term:`the Glossary `.\n\n momentum : float, default=0.9\n Momentum for gradient descent update. Should be between 0 and 1. Only\n used when solver='sgd'.\n\n nesterovs_momentum : bool, default=True\n Whether to use Nesterov's momentum. Only used when solver='sgd' and\n momentum > 0.\n\n early_stopping : bool, default=False\n Whether to use early stopping to terminate training when validation\n score is not improving. If set to true, it will automatically set\n aside 10% of training data as validation and terminate training when\n validation score is not improving by at least ``tol`` for\n ``n_iter_no_change`` consecutive epochs.\n Only effective when solver='sgd' or 'adam'\n\n validation_fraction : float, default=0.1\n The proportion of training data to set aside as validation set for\n early stopping. Must be between 0 and 1.\n Only used if early_stopping is True\n\n beta_1 : float, default=0.9\n Exponential decay rate for estimates of first moment vector in adam,\n should be in [0, 1). Only used when solver='adam'\n\n beta_2 : float, default=0.999\n Exponential decay rate for estimates of second moment vector in adam,\n should be in [0, 1). Only used when solver='adam'\n\n epsilon : float, default=1e-8\n Value for numerical stability in adam. Only used when solver='adam'\n\n n_iter_no_change : int, default=10\n Maximum number of epochs to not meet ``tol`` improvement.\n Only effective when solver='sgd' or 'adam'\n\n .. versionadded:: 0.20\n\n max_fun : int, default=15000\n Only used when solver='lbfgs'. Maximum number of function calls.\n The solver iterates until convergence (determined by 'tol'), number\n of iterations reaches max_iter, or this number of function calls.\n Note that number of function calls will be greater than or equal to\n the number of iterations for the MLPRegressor.\n\n .. versionadded:: 0.22\n\n Attributes\n ----------\n loss_ : float\n The current loss computed with the loss function.\n\n best_loss_ : float\n The minimum loss reached by the solver throughout fitting.\n\n loss_curve_ : list of shape (`n_iter_`,)\n Loss value evaluated at the end of each training step.\n The ith element in the list represents the loss at the ith iteration.\n\n t_ : int\n The number of training samples seen by the solver during fitting.\n Mathematically equals `n_iters * X.shape[0]`, it means\n `time_step` and it is used by optimizer's learning rate scheduler.\n\n coefs_ : list of shape (n_layers - 1,)\n The ith element in the list represents the weight matrix corresponding\n to layer i.\n\n intercepts_ : list of shape (n_layers - 1,)\n The ith element in the list represents the bias vector corresponding to\n layer i + 1.\n\n n_iter_ : int\n The number of iterations the solver has run.\n\n n_layers_ : int\n Number of layers.\n\n n_outputs_ : int\n Number of outputs.\n\n out_activation_ : str\n Name of the output activation function.\n\n Examples\n --------\n >>> from sklearn.neural_network import MLPRegressor\n >>> from sklearn.datasets import make_regression\n >>> from sklearn.model_selection import train_test_split\n >>> X, y = make_regression(n_samples=200, random_state=1)\n >>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n ... random_state=1)\n >>> regr = MLPRegressor(random_state=1, max_iter=500).fit(X_train, y_train)\n >>> regr.predict(X_test[:2])\n array([-0.9..., -7.1...])\n >>> regr.score(X_test, y_test)\n 0.4...\n\n Notes\n -----\n MLPRegressor trains iteratively since at each time step\n the partial derivatives of the loss function with respect to the model\n parameters are computed to update the parameters.\n\n It can also have a regularization term added to the loss function\n that shrinks model parameters to prevent overfitting.\n\n This implementation works with data represented as dense and sparse numpy\n arrays of floating point values.\n\n References\n ----------\n Hinton, Geoffrey E.\n \"Connectionist learning procedures.\" Artificial intelligence 40.1\n (1989): 185-234.\n\n Glorot, Xavier, and Yoshua Bengio. \"Understanding the difficulty of\n training deep feedforward neural networks.\" International Conference\n on Artificial Intelligence and Statistics. 2010.\n\n He, Kaiming, et al. \"Delving deep into rectifiers: Surpassing human-level\n performance on imagenet classification.\" arXiv preprint\n arXiv:1502.01852 (2015).\n\n Kingma, Diederik, and Jimmy Ba. \"Adam: A method for stochastic\n optimization.\" arXiv preprint arXiv:1412.6980 (2014).\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, hidden_layer_sizes=(100,), activation=\"relu\", *,\n solver='adam', alpha=0.0001,\n batch_size='auto', learning_rate=\"constant\",\n learning_rate_init=0.001,\n power_t=0.5, max_iter=200, shuffle=True,\n random_state=None, tol=1e-4,\n verbose=False, warm_start=False, momentum=0.9,\n nesterovs_momentum=True, early_stopping=False,\n validation_fraction=0.1, beta_1=0.9, beta_2=0.999,\n epsilon=1e-8, n_iter_no_change=10, max_fun=15000):\n super().__init__(\n hidden_layer_sizes=hidden_layer_sizes,\n activation=activation, solver=solver, alpha=alpha,\n batch_size=batch_size, learning_rate=learning_rate,\n learning_rate_init=learning_rate_init, power_t=power_t,\n max_iter=max_iter, loss='squared_loss', shuffle=shuffle,\n random_state=random_state, tol=tol, verbose=verbose,\n warm_start=warm_start, momentum=momentum,\n nesterovs_momentum=nesterovs_momentum,\n early_stopping=early_stopping,\n validation_fraction=validation_fraction,\n beta_1=beta_1, beta_2=beta_2, epsilon=epsilon,\n n_iter_no_change=n_iter_no_change, max_fun=max_fun)\n\n def predict(self, X):\n \"\"\"Predict using the multi-layer perceptron model.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\n Returns\n -------\n y : ndarray of shape (n_samples, n_outputs)\n The predicted values.\n \"\"\"\n check_is_fitted(self)\n y_pred = self._forward_pass_fast(X)\n if y_pred.shape[1] == 1:\n return y_pred.ravel()\n return y_pred\n\n def _validate_input(self, X, y, incremental, reset):\n X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc'],\n multi_output=True, y_numeric=True,\n dtype=(np.float64, np.float32),\n reset=reset)\n if y.ndim == 2 and y.shape[1] == 1:\n y = column_or_1d(y, warn=True)\n return X, y", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM", + "name": "BernoulliRBM", + "qname": "sklearn.neural_network._rbm.BernoulliRBM", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/__init__", + "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/transform", + "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_mean_hiddens", + "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_sample_hiddens", + "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_sample_visibles", + "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_free_energy", + "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/gibbs", + "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/partial_fit", + "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_fit", + "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/score_samples", + "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/fit", + "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Bernoulli Restricted Boltzmann Machine (RBM).\n\nA Restricted Boltzmann Machine with binary visible units and\nbinary hidden units. Parameters are estimated using Stochastic Maximum\nLikelihood (SML), also known as Persistent Contrastive Divergence (PCD)\n[2].\n\nThe time complexity of this implementation is ``O(d ** 2)`` assuming\nd ~ n_features ~ n_components.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Bernoulli Restricted Boltzmann Machine (RBM).\n\nA Restricted Boltzmann Machine with binary visible units and\nbinary hidden units. Parameters are estimated using Stochastic Maximum\nLikelihood (SML), also known as Persistent Contrastive Divergence (PCD)\n[2].\n\nThe time complexity of this implementation is ``O(d ** 2)`` assuming\nd ~ n_features ~ n_components.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=256\n Number of binary hidden units.\n\nlearning_rate : float, default=0.1\n The learning rate for weight updates. It is *highly* recommended\n to tune this hyper-parameter. Reasonable values are in the\n 10**[0., -3.] range.\n\nbatch_size : int, default=10\n Number of examples per minibatch.\n\nn_iter : int, default=10\n Number of iterations/sweeps over the training dataset to perform\n during training.\n\nverbose : int, default=0\n The verbosity level. The default, zero, means silent mode.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for:\n\n - Gibbs sampling from visible and hidden layers.\n\n - Initializing components, sampling from layers during fit.\n\n - Corrupting the data when scoring samples.\n\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nintercept_hidden_ : array-like of shape (n_components,)\n Biases of the hidden units.\n\nintercept_visible_ : array-like of shape (n_features,)\n Biases of the visible units.\n\ncomponents_ : array-like of shape (n_components, n_features)\n Weight matrix, where n_features in the number of\n visible units and n_components is the number of hidden units.\n\nh_samples_ : array-like of shape (batch_size, n_components)\n Hidden Activation sampled from the model distribution,\n where batch_size in the number of examples per minibatch and\n n_components is the number of hidden units.\n\nExamples\n--------\n\n>>> import numpy as np\n>>> from sklearn.neural_network import BernoulliRBM\n>>> X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])\n>>> model = BernoulliRBM(n_components=2)\n>>> model.fit(X)\nBernoulliRBM(n_components=2)\n\nReferences\n----------\n\n[1] Hinton, G. E., Osindero, S. and Teh, Y. A fast learning algorithm for\n deep belief nets. Neural Computation 18, pp 1527-1554.\n https://www.cs.toronto.edu/~hinton/absps/fastnc.pdf\n\n[2] Tieleman, T. Training Restricted Boltzmann Machines using\n Approximations to the Likelihood Gradient. International Conference\n on Machine Learning (ICML) 2008", + "code": "class BernoulliRBM(TransformerMixin, BaseEstimator):\n \"\"\"Bernoulli Restricted Boltzmann Machine (RBM).\n\n A Restricted Boltzmann Machine with binary visible units and\n binary hidden units. Parameters are estimated using Stochastic Maximum\n Likelihood (SML), also known as Persistent Contrastive Divergence (PCD)\n [2].\n\n The time complexity of this implementation is ``O(d ** 2)`` assuming\n d ~ n_features ~ n_components.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_components : int, default=256\n Number of binary hidden units.\n\n learning_rate : float, default=0.1\n The learning rate for weight updates. It is *highly* recommended\n to tune this hyper-parameter. Reasonable values are in the\n 10**[0., -3.] range.\n\n batch_size : int, default=10\n Number of examples per minibatch.\n\n n_iter : int, default=10\n Number of iterations/sweeps over the training dataset to perform\n during training.\n\n verbose : int, default=0\n The verbosity level. The default, zero, means silent mode.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for:\n\n - Gibbs sampling from visible and hidden layers.\n\n - Initializing components, sampling from layers during fit.\n\n - Corrupting the data when scoring samples.\n\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\n Attributes\n ----------\n intercept_hidden_ : array-like of shape (n_components,)\n Biases of the hidden units.\n\n intercept_visible_ : array-like of shape (n_features,)\n Biases of the visible units.\n\n components_ : array-like of shape (n_components, n_features)\n Weight matrix, where n_features in the number of\n visible units and n_components is the number of hidden units.\n\n h_samples_ : array-like of shape (batch_size, n_components)\n Hidden Activation sampled from the model distribution,\n where batch_size in the number of examples per minibatch and\n n_components is the number of hidden units.\n\n Examples\n --------\n\n >>> import numpy as np\n >>> from sklearn.neural_network import BernoulliRBM\n >>> X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])\n >>> model = BernoulliRBM(n_components=2)\n >>> model.fit(X)\n BernoulliRBM(n_components=2)\n\n References\n ----------\n\n [1] Hinton, G. E., Osindero, S. and Teh, Y. A fast learning algorithm for\n deep belief nets. Neural Computation 18, pp 1527-1554.\n https://www.cs.toronto.edu/~hinton/absps/fastnc.pdf\n\n [2] Tieleman, T. Training Restricted Boltzmann Machines using\n Approximations to the Likelihood Gradient. International Conference\n on Machine Learning (ICML) 2008\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_components=256, *, learning_rate=0.1, batch_size=10,\n n_iter=10, verbose=0, random_state=None):\n self.n_components = n_components\n self.learning_rate = learning_rate\n self.batch_size = batch_size\n self.n_iter = n_iter\n self.verbose = verbose\n self.random_state = random_state\n\n def transform(self, X):\n \"\"\"Compute the hidden layer activation probabilities, P(h=1|v=X).\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to be transformed.\n\n Returns\n -------\n h : ndarray of shape (n_samples, n_components)\n Latent representations of the data.\n \"\"\"\n check_is_fitted(self)\n\n X = self._validate_data(X, accept_sparse='csr', reset=False,\n dtype=(np.float64, np.float32))\n return self._mean_hiddens(X)\n\n def _mean_hiddens(self, v):\n \"\"\"Computes the probabilities P(h=1|v).\n\n Parameters\n ----------\n v : ndarray of shape (n_samples, n_features)\n Values of the visible layer.\n\n Returns\n -------\n h : ndarray of shape (n_samples, n_components)\n Corresponding mean field values for the hidden layer.\n \"\"\"\n p = safe_sparse_dot(v, self.components_.T)\n p += self.intercept_hidden_\n return expit(p, out=p)\n\n def _sample_hiddens(self, v, rng):\n \"\"\"Sample from the distribution P(h|v).\n\n Parameters\n ----------\n v : ndarray of shape (n_samples, n_features)\n Values of the visible layer to sample from.\n\n rng : RandomState instance\n Random number generator to use.\n\n Returns\n -------\n h : ndarray of shape (n_samples, n_components)\n Values of the hidden layer.\n \"\"\"\n p = self._mean_hiddens(v)\n return (rng.random_sample(size=p.shape) < p)\n\n def _sample_visibles(self, h, rng):\n \"\"\"Sample from the distribution P(v|h).\n\n Parameters\n ----------\n h : ndarray of shape (n_samples, n_components)\n Values of the hidden layer to sample from.\n\n rng : RandomState instance\n Random number generator to use.\n\n Returns\n -------\n v : ndarray of shape (n_samples, n_features)\n Values of the visible layer.\n \"\"\"\n p = np.dot(h, self.components_)\n p += self.intercept_visible_\n expit(p, out=p)\n return (rng.random_sample(size=p.shape) < p)\n\n def _free_energy(self, v):\n \"\"\"Computes the free energy F(v) = - log sum_h exp(-E(v,h)).\n\n Parameters\n ----------\n v : ndarray of shape (n_samples, n_features)\n Values of the visible layer.\n\n Returns\n -------\n free_energy : ndarray of shape (n_samples,)\n The value of the free energy.\n \"\"\"\n return (- safe_sparse_dot(v, self.intercept_visible_)\n - np.logaddexp(0, safe_sparse_dot(v, self.components_.T)\n + self.intercept_hidden_).sum(axis=1))\n\n def gibbs(self, v):\n \"\"\"Perform one Gibbs sampling step.\n\n Parameters\n ----------\n v : ndarray of shape (n_samples, n_features)\n Values of the visible layer to start from.\n\n Returns\n -------\n v_new : ndarray of shape (n_samples, n_features)\n Values of the visible layer after one Gibbs step.\n \"\"\"\n check_is_fitted(self)\n if not hasattr(self, \"random_state_\"):\n self.random_state_ = check_random_state(self.random_state)\n h_ = self._sample_hiddens(v, self.random_state_)\n v_ = self._sample_visibles(h_, self.random_state_)\n\n return v_\n\n def partial_fit(self, X, y=None):\n \"\"\"Fit the model to the data X which should contain a partial\n segment of the data.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Training data.\n\n Returns\n -------\n self : BernoulliRBM\n The fitted model.\n \"\"\"\n first_pass = not hasattr(self, 'components_')\n X = self._validate_data(X, accept_sparse='csr', dtype=np.float64,\n reset=first_pass)\n if not hasattr(self, 'random_state_'):\n self.random_state_ = check_random_state(self.random_state)\n if not hasattr(self, 'components_'):\n self.components_ = np.asarray(\n self.random_state_.normal(\n 0,\n 0.01,\n (self.n_components, X.shape[1])\n ),\n order='F')\n if not hasattr(self, 'intercept_hidden_'):\n self.intercept_hidden_ = np.zeros(self.n_components, )\n if not hasattr(self, 'intercept_visible_'):\n self.intercept_visible_ = np.zeros(X.shape[1], )\n if not hasattr(self, 'h_samples_'):\n self.h_samples_ = np.zeros((self.batch_size, self.n_components))\n\n self._fit(X, self.random_state_)\n\n def _fit(self, v_pos, rng):\n \"\"\"Inner fit for one mini-batch.\n\n Adjust the parameters to maximize the likelihood of v using\n Stochastic Maximum Likelihood (SML).\n\n Parameters\n ----------\n v_pos : ndarray of shape (n_samples, n_features)\n The data to use for training.\n\n rng : RandomState instance\n Random number generator to use for sampling.\n \"\"\"\n h_pos = self._mean_hiddens(v_pos)\n v_neg = self._sample_visibles(self.h_samples_, rng)\n h_neg = self._mean_hiddens(v_neg)\n\n lr = float(self.learning_rate) / v_pos.shape[0]\n update = safe_sparse_dot(v_pos.T, h_pos, dense_output=True).T\n update -= np.dot(h_neg.T, v_neg)\n self.components_ += lr * update\n self.intercept_hidden_ += lr * (h_pos.sum(axis=0) - h_neg.sum(axis=0))\n self.intercept_visible_ += lr * (np.asarray(\n v_pos.sum(axis=0)).squeeze() -\n v_neg.sum(axis=0))\n\n h_neg[rng.uniform(size=h_neg.shape) < h_neg] = 1.0 # sample binomial\n self.h_samples_ = np.floor(h_neg, h_neg)\n\n def score_samples(self, X):\n \"\"\"Compute the pseudo-likelihood of X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Values of the visible layer. Must be all-boolean (not checked).\n\n Returns\n -------\n pseudo_likelihood : ndarray of shape (n_samples,)\n Value of the pseudo-likelihood (proxy for likelihood).\n\n Notes\n -----\n This method is not deterministic: it computes a quantity called the\n free energy on X, then on a randomly corrupted version of X, and\n returns the log of the logistic function of the difference.\n \"\"\"\n check_is_fitted(self)\n\n v = check_array(X, accept_sparse='csr')\n rng = check_random_state(self.random_state)\n\n # Randomly corrupt one feature in each sample in v.\n ind = (np.arange(v.shape[0]),\n rng.randint(0, v.shape[1], v.shape[0]))\n if sp.issparse(v):\n data = -2 * v[ind] + 1\n v_ = v + sp.csr_matrix((data.A.ravel(), ind), shape=v.shape)\n else:\n v_ = v.copy()\n v_[ind] = 1 - v_[ind]\n\n fe = self._free_energy(v)\n fe_ = self._free_energy(v_)\n return v.shape[1] * log_logistic(fe_ - fe)\n\n def fit(self, X, y=None):\n \"\"\"Fit the model to the data X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\n Returns\n -------\n self : BernoulliRBM\n The fitted model.\n \"\"\"\n X = self._validate_data(\n X, accept_sparse='csr', dtype=(np.float64, np.float32)\n )\n n_samples = X.shape[0]\n rng = check_random_state(self.random_state)\n\n self.components_ = np.asarray(\n rng.normal(0, 0.01, (self.n_components, X.shape[1])),\n order='F',\n dtype=X.dtype)\n self.intercept_hidden_ = np.zeros(self.n_components, dtype=X.dtype)\n self.intercept_visible_ = np.zeros(X.shape[1], dtype=X.dtype)\n self.h_samples_ = np.zeros((self.batch_size, self.n_components),\n dtype=X.dtype)\n\n n_batches = int(np.ceil(float(n_samples) / self.batch_size))\n batch_slices = list(gen_even_slices(n_batches * self.batch_size,\n n_batches, n_samples=n_samples))\n verbose = self.verbose\n begin = time.time()\n for iteration in range(1, self.n_iter + 1):\n for batch_slice in batch_slices:\n self._fit(X[batch_slice], rng)\n\n if verbose:\n end = time.time()\n print(\"[%s] Iteration %d, pseudo-likelihood = %.2f,\"\n \" time = %.2fs\"\n % (type(self).__name__, iteration,\n self.score_samples(X).mean(), end - begin))\n begin = end\n\n return self\n\n def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_methods_subset_invariance':\n 'fails for the decision_function method',\n 'check_methods_sample_order_invariance':\n 'fails for the score_samples method',\n }\n }", + "instance_attributes": [ + { + "name": "n_components", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "learning_rate", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "batch_size", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "n_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "intercept_hidden_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "intercept_visible_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "h_samples_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/AdamOptimizer", + "name": "AdamOptimizer", + "qname": "sklearn.neural_network._stochastic_optimizers.AdamOptimizer", + "decorators": [], + "superclasses": ["BaseOptimizer"], + "methods": [ + "scikit-learn/sklearn.neural_network._stochastic_optimizers/AdamOptimizer/__init__", + "scikit-learn/sklearn.neural_network._stochastic_optimizers/AdamOptimizer/_get_updates" + ], + "is_public": false, + "reexported_by": [], + "description": "Stochastic gradient descent optimizer with Adam\n\nNote: All default values are from the original Adam paper", + "docstring": "Stochastic gradient descent optimizer with Adam\n\nNote: All default values are from the original Adam paper\n\nParameters\n----------\nparams : list, length = len(coefs_) + len(intercepts_)\n The concatenated list containing coefs_ and intercepts_ in MLP model.\n Used for initializing velocities and updating params\n\nlearning_rate_init : float, default=0.001\n The initial learning rate used. It controls the step-size in updating\n the weights\n\nbeta_1 : float, default=0.9\n Exponential decay rate for estimates of first moment vector, should be\n in [0, 1)\n\nbeta_2 : float, default=0.999\n Exponential decay rate for estimates of second moment vector, should be\n in [0, 1)\n\nepsilon : float, default=1e-8\n Value for numerical stability\n\nAttributes\n----------\nlearning_rate : float\n The current learning rate\n\nt : int\n Timestep\n\nms : list, length = len(params)\n First moment vectors\n\nvs : list, length = len(params)\n Second moment vectors\n\nReferences\n----------\nKingma, Diederik, and Jimmy Ba.\n\"Adam: A method for stochastic optimization.\"\narXiv preprint arXiv:1412.6980 (2014).", + "code": "class AdamOptimizer(BaseOptimizer):\n \"\"\"Stochastic gradient descent optimizer with Adam\n\n Note: All default values are from the original Adam paper\n\n Parameters\n ----------\n params : list, length = len(coefs_) + len(intercepts_)\n The concatenated list containing coefs_ and intercepts_ in MLP model.\n Used for initializing velocities and updating params\n\n learning_rate_init : float, default=0.001\n The initial learning rate used. It controls the step-size in updating\n the weights\n\n beta_1 : float, default=0.9\n Exponential decay rate for estimates of first moment vector, should be\n in [0, 1)\n\n beta_2 : float, default=0.999\n Exponential decay rate for estimates of second moment vector, should be\n in [0, 1)\n\n epsilon : float, default=1e-8\n Value for numerical stability\n\n Attributes\n ----------\n learning_rate : float\n The current learning rate\n\n t : int\n Timestep\n\n ms : list, length = len(params)\n First moment vectors\n\n vs : list, length = len(params)\n Second moment vectors\n\n References\n ----------\n Kingma, Diederik, and Jimmy Ba.\n \"Adam: A method for stochastic optimization.\"\n arXiv preprint arXiv:1412.6980 (2014).\n \"\"\"\n\n def __init__(self, params, learning_rate_init=0.001, beta_1=0.9,\n beta_2=0.999, epsilon=1e-8):\n super().__init__(params, learning_rate_init)\n\n self.beta_1 = beta_1\n self.beta_2 = beta_2\n self.epsilon = epsilon\n self.t = 0\n self.ms = [np.zeros_like(param) for param in params]\n self.vs = [np.zeros_like(param) for param in params]\n\n def _get_updates(self, grads):\n \"\"\"Get the values used to update params with given gradients\n\n Parameters\n ----------\n grads : list, length = len(coefs_) + len(intercepts_)\n Containing gradients with respect to coefs_ and intercepts_ in MLP\n model. So length should be aligned with params\n\n Returns\n -------\n updates : list, length = len(grads)\n The values to add to params\n \"\"\"\n self.t += 1\n self.ms = [self.beta_1 * m + (1 - self.beta_1) * grad\n for m, grad in zip(self.ms, grads)]\n self.vs = [self.beta_2 * v + (1 - self.beta_2) * (grad ** 2)\n for v, grad in zip(self.vs, grads)]\n self.learning_rate = (self.learning_rate_init *\n np.sqrt(1 - self.beta_2 ** self.t) /\n (1 - self.beta_1 ** self.t))\n updates = [-self.learning_rate * m / (np.sqrt(v) + self.epsilon)\n for m, v in zip(self.ms, self.vs)]\n return updates", + "instance_attributes": [ + { + "name": "beta_1", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "beta_2", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "epsilon", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "t", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/BaseOptimizer", + "name": "BaseOptimizer", + "qname": "sklearn.neural_network._stochastic_optimizers.BaseOptimizer", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.neural_network._stochastic_optimizers/BaseOptimizer/__init__", + "scikit-learn/sklearn.neural_network._stochastic_optimizers/BaseOptimizer/update_params", + "scikit-learn/sklearn.neural_network._stochastic_optimizers/BaseOptimizer/iteration_ends", + "scikit-learn/sklearn.neural_network._stochastic_optimizers/BaseOptimizer/trigger_stopping" + ], + "is_public": false, + "reexported_by": [], + "description": "Base (Stochastic) gradient descent optimizer", + "docstring": "Base (Stochastic) gradient descent optimizer\n\nParameters\n----------\nparams : list, length = len(coefs_) + len(intercepts_)\n The concatenated list containing coefs_ and intercepts_ in MLP model.\n Used for initializing velocities and updating params\n\nlearning_rate_init : float, default=0.1\n The initial learning rate used. It controls the step-size in updating\n the weights\n\nAttributes\n----------\nlearning_rate : float\n the current learning rate", + "code": "class BaseOptimizer:\n \"\"\"Base (Stochastic) gradient descent optimizer\n\n Parameters\n ----------\n params : list, length = len(coefs_) + len(intercepts_)\n The concatenated list containing coefs_ and intercepts_ in MLP model.\n Used for initializing velocities and updating params\n\n learning_rate_init : float, default=0.1\n The initial learning rate used. It controls the step-size in updating\n the weights\n\n Attributes\n ----------\n learning_rate : float\n the current learning rate\n \"\"\"\n\n def __init__(self, params, learning_rate_init=0.1):\n self.params = [param for param in params]\n self.learning_rate_init = learning_rate_init\n self.learning_rate = float(learning_rate_init)\n\n def update_params(self, grads):\n \"\"\"Update parameters with given gradients\n\n Parameters\n ----------\n grads : list, length = len(params)\n Containing gradients with respect to coefs_ and intercepts_ in MLP\n model. So length should be aligned with params\n \"\"\"\n updates = self._get_updates(grads)\n for param, update in zip(self.params, updates):\n param += update\n\n def iteration_ends(self, time_step):\n \"\"\"Perform update to learning rate and potentially other states at the\n end of an iteration\n \"\"\"\n pass\n\n def trigger_stopping(self, msg, verbose):\n \"\"\"Decides whether it is time to stop training\n\n Parameters\n ----------\n msg : str\n Message passed in for verbose output\n\n verbose : bool\n Print message to stdin if True\n\n Returns\n -------\n is_stopping : bool\n True if training needs to stop\n \"\"\"\n if verbose:\n print(msg + \" Stopping.\")\n return True", + "instance_attributes": [ + { + "name": "learning_rate_init", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "learning_rate", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/SGDOptimizer", + "name": "SGDOptimizer", + "qname": "sklearn.neural_network._stochastic_optimizers.SGDOptimizer", + "decorators": [], + "superclasses": ["BaseOptimizer"], + "methods": [ + "scikit-learn/sklearn.neural_network._stochastic_optimizers/SGDOptimizer/__init__", + "scikit-learn/sklearn.neural_network._stochastic_optimizers/SGDOptimizer/iteration_ends", + "scikit-learn/sklearn.neural_network._stochastic_optimizers/SGDOptimizer/trigger_stopping", + "scikit-learn/sklearn.neural_network._stochastic_optimizers/SGDOptimizer/_get_updates" + ], + "is_public": false, + "reexported_by": [], + "description": "Stochastic gradient descent optimizer with momentum", + "docstring": "Stochastic gradient descent optimizer with momentum\n\nParameters\n----------\nparams : list, length = len(coefs_) + len(intercepts_)\n The concatenated list containing coefs_ and intercepts_ in MLP model.\n Used for initializing velocities and updating params\n\nlearning_rate_init : float, default=0.1\n The initial learning rate used. It controls the step-size in updating\n the weights\n\nlr_schedule : {'constant', 'adaptive', 'invscaling'}, default='constant'\n Learning rate schedule for weight updates.\n\n -'constant', is a constant learning rate given by\n 'learning_rate_init'.\n\n -'invscaling' gradually decreases the learning rate 'learning_rate_' at\n each time step 't' using an inverse scaling exponent of 'power_t'.\n learning_rate_ = learning_rate_init / pow(t, power_t)\n\n -'adaptive', keeps the learning rate constant to\n 'learning_rate_init' as long as the training keeps decreasing.\n Each time 2 consecutive epochs fail to decrease the training loss by\n tol, or fail to increase validation score by tol if 'early_stopping'\n is on, the current learning rate is divided by 5.\n\nmomentum : float, default=0.9\n Value of momentum used, must be larger than or equal to 0\n\nnesterov : bool, default=True\n Whether to use nesterov's momentum or not. Use nesterov's if True\n\npower_t : float, default=0.5\n Power of time step 't' in inverse scaling. See `lr_schedule` for\n more details.\n\nAttributes\n----------\nlearning_rate : float\n the current learning rate\n\nvelocities : list, length = len(params)\n velocities that are used to update params", + "code": "class SGDOptimizer(BaseOptimizer):\n \"\"\"Stochastic gradient descent optimizer with momentum\n\n Parameters\n ----------\n params : list, length = len(coefs_) + len(intercepts_)\n The concatenated list containing coefs_ and intercepts_ in MLP model.\n Used for initializing velocities and updating params\n\n learning_rate_init : float, default=0.1\n The initial learning rate used. It controls the step-size in updating\n the weights\n\n lr_schedule : {'constant', 'adaptive', 'invscaling'}, default='constant'\n Learning rate schedule for weight updates.\n\n -'constant', is a constant learning rate given by\n 'learning_rate_init'.\n\n -'invscaling' gradually decreases the learning rate 'learning_rate_' at\n each time step 't' using an inverse scaling exponent of 'power_t'.\n learning_rate_ = learning_rate_init / pow(t, power_t)\n\n -'adaptive', keeps the learning rate constant to\n 'learning_rate_init' as long as the training keeps decreasing.\n Each time 2 consecutive epochs fail to decrease the training loss by\n tol, or fail to increase validation score by tol if 'early_stopping'\n is on, the current learning rate is divided by 5.\n\n momentum : float, default=0.9\n Value of momentum used, must be larger than or equal to 0\n\n nesterov : bool, default=True\n Whether to use nesterov's momentum or not. Use nesterov's if True\n\n power_t : float, default=0.5\n Power of time step 't' in inverse scaling. See `lr_schedule` for\n more details.\n\n Attributes\n ----------\n learning_rate : float\n the current learning rate\n\n velocities : list, length = len(params)\n velocities that are used to update params\n \"\"\"\n\n def __init__(self, params, learning_rate_init=0.1, lr_schedule='constant',\n momentum=0.9, nesterov=True, power_t=0.5):\n super().__init__(params, learning_rate_init)\n\n self.lr_schedule = lr_schedule\n self.momentum = momentum\n self.nesterov = nesterov\n self.power_t = power_t\n self.velocities = [np.zeros_like(param) for param in params]\n\n def iteration_ends(self, time_step):\n \"\"\"Perform updates to learning rate and potential other states at the\n end of an iteration\n\n Parameters\n ----------\n time_step : int\n number of training samples trained on so far, used to update\n learning rate for 'invscaling'\n \"\"\"\n if self.lr_schedule == 'invscaling':\n self.learning_rate = (float(self.learning_rate_init) /\n (time_step + 1) ** self.power_t)\n\n def trigger_stopping(self, msg, verbose):\n if self.lr_schedule != 'adaptive':\n if verbose:\n print(msg + \" Stopping.\")\n return True\n\n if self.learning_rate <= 1e-6:\n if verbose:\n print(msg + \" Learning rate too small. Stopping.\")\n return True\n\n self.learning_rate /= 5.\n if verbose:\n print(msg + \" Setting learning rate to %f\" %\n self.learning_rate)\n return False\n\n def _get_updates(self, grads):\n \"\"\"Get the values used to update params with given gradients\n\n Parameters\n ----------\n grads : list, length = len(coefs_) + len(intercepts_)\n Containing gradients with respect to coefs_ and intercepts_ in MLP\n model. So length should be aligned with params\n\n Returns\n -------\n updates : list, length = len(grads)\n The values to add to params\n \"\"\"\n updates = [self.momentum * velocity - self.learning_rate * grad\n for velocity, grad in zip(self.velocities, grads)]\n self.velocities = updates\n\n if self.nesterov:\n updates = [self.momentum * velocity - self.learning_rate * grad\n for velocity, grad in zip(self.velocities, grads)]\n\n return updates", + "instance_attributes": [ + { + "name": "lr_schedule", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "momentum", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "nesterov", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "power_t", + "types": { + "kind": "NamedType", + "name": "float" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion", + "name": "FeatureUnion", + "qname": "sklearn.pipeline.FeatureUnion", + "decorators": [], + "superclasses": ["TransformerMixin", "_BaseComposition"], + "methods": [ + "scikit-learn/sklearn.pipeline/FeatureUnion/__init__", + "scikit-learn/sklearn.pipeline/FeatureUnion/get_params", + "scikit-learn/sklearn.pipeline/FeatureUnion/set_params", + "scikit-learn/sklearn.pipeline/FeatureUnion/_validate_transformers", + "scikit-learn/sklearn.pipeline/FeatureUnion/_validate_transformer_weights", + "scikit-learn/sklearn.pipeline/FeatureUnion/_iter", + "scikit-learn/sklearn.pipeline/FeatureUnion/get_feature_names", + "scikit-learn/sklearn.pipeline/FeatureUnion/fit", + "scikit-learn/sklearn.pipeline/FeatureUnion/fit_transform", + "scikit-learn/sklearn.pipeline/FeatureUnion/_log_message", + "scikit-learn/sklearn.pipeline/FeatureUnion/_parallel_func", + "scikit-learn/sklearn.pipeline/FeatureUnion/transform", + "scikit-learn/sklearn.pipeline/FeatureUnion/_hstack", + "scikit-learn/sklearn.pipeline/FeatureUnion/_update_transformer_list", + "scikit-learn/sklearn.pipeline/FeatureUnion/n_features_in_@getter", + "scikit-learn/sklearn.pipeline/FeatureUnion/_sk_visual_block_" + ], + "is_public": true, + "reexported_by": [], + "description": "Concatenates results of multiple transformer objects.\n\nThis estimator applies a list of transformer objects in parallel to the\ninput data, then concatenates the results. This is useful to combine\nseveral feature extraction mechanisms into a single transformer.\n\nParameters of the transformers may be set using its name and the parameter\nname separated by a '__'. A transformer may be replaced entirely by\nsetting the parameter with its name to another transformer,\nor removed by setting to 'drop'.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13", + "docstring": "Concatenates results of multiple transformer objects.\n\nThis estimator applies a list of transformer objects in parallel to the\ninput data, then concatenates the results. This is useful to combine\nseveral feature extraction mechanisms into a single transformer.\n\nParameters of the transformers may be set using its name and the parameter\nname separated by a '__'. A transformer may be replaced entirely by\nsetting the parameter with its name to another transformer,\nor removed by setting to 'drop'.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\ntransformer_list : list of (string, transformer) tuples\n List of transformer objects to be applied to the data. The first\n half of each tuple is the name of the transformer. The tranformer can\n be 'drop' for it to be ignored.\n\n .. versionchanged:: 0.22\n Deprecated `None` as a transformer in favor of 'drop'.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\ntransformer_weights : dict, default=None\n Multiplicative weights for features per transformer.\n Keys are transformer names, values the weights.\n Raises ValueError if key not present in ``transformer_list``.\n\nverbose : bool, default=False\n If True, the time elapsed while fitting each transformer will be\n printed as it is completed.\n\nSee Also\n--------\nmake_union : Convenience function for simplified feature union\n construction.\n\nExamples\n--------\n>>> from sklearn.pipeline import FeatureUnion\n>>> from sklearn.decomposition import PCA, TruncatedSVD\n>>> union = FeatureUnion([(\"pca\", PCA(n_components=1)),\n... (\"svd\", TruncatedSVD(n_components=2))])\n>>> X = [[0., 1., 3], [2., 2., 5]]\n>>> union.fit_transform(X)\narray([[ 1.5 , 3.0..., 0.8...],\n [-1.5 , 5.7..., -0.4...]])", + "code": "class FeatureUnion(TransformerMixin, _BaseComposition):\n \"\"\"Concatenates results of multiple transformer objects.\n\n This estimator applies a list of transformer objects in parallel to the\n input data, then concatenates the results. This is useful to combine\n several feature extraction mechanisms into a single transformer.\n\n Parameters of the transformers may be set using its name and the parameter\n name separated by a '__'. A transformer may be replaced entirely by\n setting the parameter with its name to another transformer,\n or removed by setting to 'drop'.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.13\n\n Parameters\n ----------\n transformer_list : list of (string, transformer) tuples\n List of transformer objects to be applied to the data. The first\n half of each tuple is the name of the transformer. The tranformer can\n be 'drop' for it to be ignored.\n\n .. versionchanged:: 0.22\n Deprecated `None` as a transformer in favor of 'drop'.\n\n n_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\n transformer_weights : dict, default=None\n Multiplicative weights for features per transformer.\n Keys are transformer names, values the weights.\n Raises ValueError if key not present in ``transformer_list``.\n\n verbose : bool, default=False\n If True, the time elapsed while fitting each transformer will be\n printed as it is completed.\n\n See Also\n --------\n make_union : Convenience function for simplified feature union\n construction.\n\n Examples\n --------\n >>> from sklearn.pipeline import FeatureUnion\n >>> from sklearn.decomposition import PCA, TruncatedSVD\n >>> union = FeatureUnion([(\"pca\", PCA(n_components=1)),\n ... (\"svd\", TruncatedSVD(n_components=2))])\n >>> X = [[0., 1., 3], [2., 2., 5]]\n >>> union.fit_transform(X)\n array([[ 1.5 , 3.0..., 0.8...],\n [-1.5 , 5.7..., -0.4...]])\n \"\"\"\n _required_parameters = [\"transformer_list\"]\n\n @_deprecate_positional_args\n def __init__(self, transformer_list, *, n_jobs=None,\n transformer_weights=None, verbose=False):\n self.transformer_list = transformer_list\n self.n_jobs = n_jobs\n self.transformer_weights = transformer_weights\n self.verbose = verbose\n self._validate_transformers()\n\n def get_params(self, deep=True):\n \"\"\"Get parameters for this estimator.\n\n Returns the parameters given in the constructor as well as the\n estimators contained within the `transformer_list` of the\n `FeatureUnion`.\n\n Parameters\n ----------\n deep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\n Returns\n -------\n params : mapping of string to any\n Parameter names mapped to their values.\n \"\"\"\n return self._get_params('transformer_list', deep=deep)\n\n def set_params(self, **kwargs):\n \"\"\"Set the parameters of this estimator.\n\n Valid parameter keys can be listed with ``get_params()``. Note that\n you can directly set the parameters of the estimators contained in\n `tranformer_list`.\n\n Returns\n -------\n self\n \"\"\"\n self._set_params('transformer_list', **kwargs)\n return self\n\n def _validate_transformers(self):\n names, transformers = zip(*self.transformer_list)\n\n # validate names\n self._validate_names(names)\n\n # validate estimators\n for t in transformers:\n if t == 'drop':\n continue\n if (not (hasattr(t, \"fit\") or hasattr(t, \"fit_transform\")) or not\n hasattr(t, \"transform\")):\n raise TypeError(\"All estimators should implement fit and \"\n \"transform. '%s' (type %s) doesn't\" %\n (t, type(t)))\n\n def _validate_transformer_weights(self):\n if not self.transformer_weights:\n return\n\n transformer_names = set(name for name, _ in self.transformer_list)\n for name in self.transformer_weights:\n if name not in transformer_names:\n raise ValueError(\n f'Attempting to weight transformer \"{name}\", '\n 'but it is not present in transformer_list.'\n )\n\n def _iter(self):\n \"\"\"\n Generate (name, trans, weight) tuples excluding None and\n 'drop' transformers.\n \"\"\"\n get_weight = (self.transformer_weights or {}).get\n return ((name, trans, get_weight(name))\n for name, trans in self.transformer_list\n if trans != 'drop')\n\n def get_feature_names(self):\n \"\"\"Get feature names from all transformers.\n\n Returns\n -------\n feature_names : list of strings\n Names of the features produced by transform.\n \"\"\"\n feature_names = []\n for name, trans, weight in self._iter():\n if not hasattr(trans, 'get_feature_names'):\n raise AttributeError(\"Transformer %s (type %s) does not \"\n \"provide get_feature_names.\"\n % (str(name), type(trans).__name__))\n feature_names.extend([name + \"__\" + f for f in\n trans.get_feature_names()])\n return feature_names\n\n def fit(self, X, y=None, **fit_params):\n \"\"\"Fit all transformers using X.\n\n Parameters\n ----------\n X : iterable or array-like, depending on transformers\n Input data, used to fit transformers.\n\n y : array-like of shape (n_samples, n_outputs), default=None\n Targets for supervised learning.\n\n Returns\n -------\n self : FeatureUnion\n This estimator\n \"\"\"\n transformers = self._parallel_func(X, y, fit_params, _fit_one)\n if not transformers:\n # All transformers are None\n return self\n\n self._update_transformer_list(transformers)\n return self\n\n def fit_transform(self, X, y=None, **fit_params):\n \"\"\"Fit all transformers, transform the data and concatenate results.\n\n Parameters\n ----------\n X : iterable or array-like, depending on transformers\n Input data to be transformed.\n\n y : array-like of shape (n_samples, n_outputs), default=None\n Targets for supervised learning.\n\n Returns\n -------\n X_t : array-like or sparse matrix of \\\n shape (n_samples, sum_n_components)\n hstack of results of transformers. sum_n_components is the\n sum of n_components (output dimension) over transformers.\n \"\"\"\n results = self._parallel_func(X, y, fit_params, _fit_transform_one)\n if not results:\n # All transformers are None\n return np.zeros((X.shape[0], 0))\n\n Xs, transformers = zip(*results)\n self._update_transformer_list(transformers)\n\n return self._hstack(Xs)\n\n def _log_message(self, name, idx, total):\n if not self.verbose:\n return None\n return '(step %d of %d) Processing %s' % (idx, total, name)\n\n def _parallel_func(self, X, y, fit_params, func):\n \"\"\"Runs func in parallel on X and y\"\"\"\n self.transformer_list = list(self.transformer_list)\n self._validate_transformers()\n self._validate_transformer_weights()\n transformers = list(self._iter())\n\n return Parallel(n_jobs=self.n_jobs)(delayed(func)(\n transformer, X, y, weight,\n message_clsname='FeatureUnion',\n message=self._log_message(name, idx, len(transformers)),\n **fit_params) for idx, (name, transformer,\n weight) in enumerate(transformers, 1))\n\n def transform(self, X):\n \"\"\"Transform X separately by each transformer, concatenate results.\n\n Parameters\n ----------\n X : iterable or array-like, depending on transformers\n Input data to be transformed.\n\n Returns\n -------\n X_t : array-like or sparse matrix of \\\n shape (n_samples, sum_n_components)\n hstack of results of transformers. sum_n_components is the\n sum of n_components (output dimension) over transformers.\n \"\"\"\n Xs = Parallel(n_jobs=self.n_jobs)(\n delayed(_transform_one)(trans, X, None, weight)\n for name, trans, weight in self._iter())\n if not Xs:\n # All transformers are None\n return np.zeros((X.shape[0], 0))\n\n return self._hstack(Xs)\n\n def _hstack(self, Xs):\n if any(sparse.issparse(f) for f in Xs):\n Xs = sparse.hstack(Xs).tocsr()\n else:\n Xs = np.hstack(Xs)\n return Xs\n\n def _update_transformer_list(self, transformers):\n transformers = iter(transformers)\n self.transformer_list[:] = [(name, old if old == 'drop'\n else next(transformers))\n for name, old in self.transformer_list]\n\n @property\n def n_features_in_(self):\n # X is passed to all transformers so we just delegate to the first one\n return self.transformer_list[0][1].n_features_in_\n\n def _sk_visual_block_(self):\n names, transformers = zip(*self.transformer_list)\n return _VisualBlock('parallel', transformers, names=names)", + "instance_attributes": [ + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline", + "name": "Pipeline", + "qname": "sklearn.pipeline.Pipeline", + "decorators": [], + "superclasses": ["_BaseComposition"], + "methods": [ + "scikit-learn/sklearn.pipeline/Pipeline/__init__", + "scikit-learn/sklearn.pipeline/Pipeline/get_params", + "scikit-learn/sklearn.pipeline/Pipeline/set_params", + "scikit-learn/sklearn.pipeline/Pipeline/_validate_steps", + "scikit-learn/sklearn.pipeline/Pipeline/_iter", + "scikit-learn/sklearn.pipeline/Pipeline/__len__", + "scikit-learn/sklearn.pipeline/Pipeline/__getitem__", + "scikit-learn/sklearn.pipeline/Pipeline/_estimator_type@getter", + "scikit-learn/sklearn.pipeline/Pipeline/named_steps@getter", + "scikit-learn/sklearn.pipeline/Pipeline/_final_estimator@getter", + "scikit-learn/sklearn.pipeline/Pipeline/_log_message", + "scikit-learn/sklearn.pipeline/Pipeline/_check_fit_params", + "scikit-learn/sklearn.pipeline/Pipeline/_fit", + "scikit-learn/sklearn.pipeline/Pipeline/fit", + "scikit-learn/sklearn.pipeline/Pipeline/fit_transform", + "scikit-learn/sklearn.pipeline/Pipeline/predict", + "scikit-learn/sklearn.pipeline/Pipeline/fit_predict", + "scikit-learn/sklearn.pipeline/Pipeline/predict_proba", + "scikit-learn/sklearn.pipeline/Pipeline/decision_function", + "scikit-learn/sklearn.pipeline/Pipeline/score_samples", + "scikit-learn/sklearn.pipeline/Pipeline/predict_log_proba", + "scikit-learn/sklearn.pipeline/Pipeline/transform@getter", + "scikit-learn/sklearn.pipeline/Pipeline/_transform", + "scikit-learn/sklearn.pipeline/Pipeline/inverse_transform@getter", + "scikit-learn/sklearn.pipeline/Pipeline/_inverse_transform", + "scikit-learn/sklearn.pipeline/Pipeline/score", + "scikit-learn/sklearn.pipeline/Pipeline/classes_@getter", + "scikit-learn/sklearn.pipeline/Pipeline/_more_tags", + "scikit-learn/sklearn.pipeline/Pipeline/_pairwise@getter", + "scikit-learn/sklearn.pipeline/Pipeline/n_features_in_@getter", + "scikit-learn/sklearn.pipeline/Pipeline/_sk_visual_block_" + ], + "is_public": true, + "reexported_by": [], + "description": "Pipeline of transforms with a final estimator.\n\nSequentially apply a list of transforms and a final estimator.\nIntermediate steps of the pipeline must be 'transforms', that is, they\nmust implement fit and transform methods.\nThe final estimator only needs to implement fit.\nThe transformers in the pipeline can be cached using ``memory`` argument.\n\nThe purpose of the pipeline is to assemble several steps that can be\ncross-validated together while setting different parameters.\nFor this, it enables setting parameters of the various steps using their\nnames and the parameter name separated by a '__', as in the example below.\nA step's estimator may be replaced entirely by setting the parameter\nwith its name to another estimator, or a transformer removed by setting\nit to 'passthrough' or ``None``.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.5", + "docstring": "Pipeline of transforms with a final estimator.\n\nSequentially apply a list of transforms and a final estimator.\nIntermediate steps of the pipeline must be 'transforms', that is, they\nmust implement fit and transform methods.\nThe final estimator only needs to implement fit.\nThe transformers in the pipeline can be cached using ``memory`` argument.\n\nThe purpose of the pipeline is to assemble several steps that can be\ncross-validated together while setting different parameters.\nFor this, it enables setting parameters of the various steps using their\nnames and the parameter name separated by a '__', as in the example below.\nA step's estimator may be replaced entirely by setting the parameter\nwith its name to another estimator, or a transformer removed by setting\nit to 'passthrough' or ``None``.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.5\n\nParameters\n----------\nsteps : list\n List of (name, transform) tuples (implementing fit/transform) that are\n chained, in the order in which they are chained, with the last object\n an estimator.\n\nmemory : str or object with the joblib.Memory interface, default=None\n Used to cache the fitted transformers of the pipeline. By default,\n no caching is performed. If a string is given, it is the path to\n the caching directory. Enabling caching triggers a clone of\n the transformers before fitting. Therefore, the transformer\n instance given to the pipeline cannot be inspected\n directly. Use the attribute ``named_steps`` or ``steps`` to\n inspect estimators within the pipeline. Caching the\n transformers is advantageous when fitting is time consuming.\n\nverbose : bool, default=False\n If True, the time elapsed while fitting each step will be printed as it\n is completed.\n\nAttributes\n----------\nnamed_steps : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n Read-only attribute to access any step parameter by user given name.\n Keys are step names and values are steps parameters.\n\nSee Also\n--------\nmake_pipeline : Convenience function for simplified pipeline construction.\n\nExamples\n--------\n>>> from sklearn.svm import SVC\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.pipeline import Pipeline\n>>> X, y = make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n... random_state=0)\n>>> pipe = Pipeline([('scaler', StandardScaler()), ('svc', SVC())])\n>>> # The pipeline can be used as any other estimator\n>>> # and avoids leaking the test set into the train set\n>>> pipe.fit(X_train, y_train)\nPipeline(steps=[('scaler', StandardScaler()), ('svc', SVC())])\n>>> pipe.score(X_test, y_test)\n0.88", + "code": "class Pipeline(_BaseComposition):\n \"\"\"\n Pipeline of transforms with a final estimator.\n\n Sequentially apply a list of transforms and a final estimator.\n Intermediate steps of the pipeline must be 'transforms', that is, they\n must implement fit and transform methods.\n The final estimator only needs to implement fit.\n The transformers in the pipeline can be cached using ``memory`` argument.\n\n The purpose of the pipeline is to assemble several steps that can be\n cross-validated together while setting different parameters.\n For this, it enables setting parameters of the various steps using their\n names and the parameter name separated by a '__', as in the example below.\n A step's estimator may be replaced entirely by setting the parameter\n with its name to another estimator, or a transformer removed by setting\n it to 'passthrough' or ``None``.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.5\n\n Parameters\n ----------\n steps : list\n List of (name, transform) tuples (implementing fit/transform) that are\n chained, in the order in which they are chained, with the last object\n an estimator.\n\n memory : str or object with the joblib.Memory interface, default=None\n Used to cache the fitted transformers of the pipeline. By default,\n no caching is performed. If a string is given, it is the path to\n the caching directory. Enabling caching triggers a clone of\n the transformers before fitting. Therefore, the transformer\n instance given to the pipeline cannot be inspected\n directly. Use the attribute ``named_steps`` or ``steps`` to\n inspect estimators within the pipeline. Caching the\n transformers is advantageous when fitting is time consuming.\n\n verbose : bool, default=False\n If True, the time elapsed while fitting each step will be printed as it\n is completed.\n\n Attributes\n ----------\n named_steps : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n Read-only attribute to access any step parameter by user given name.\n Keys are step names and values are steps parameters.\n\n See Also\n --------\n make_pipeline : Convenience function for simplified pipeline construction.\n\n Examples\n --------\n >>> from sklearn.svm import SVC\n >>> from sklearn.preprocessing import StandardScaler\n >>> from sklearn.datasets import make_classification\n >>> from sklearn.model_selection import train_test_split\n >>> from sklearn.pipeline import Pipeline\n >>> X, y = make_classification(random_state=0)\n >>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n ... random_state=0)\n >>> pipe = Pipeline([('scaler', StandardScaler()), ('svc', SVC())])\n >>> # The pipeline can be used as any other estimator\n >>> # and avoids leaking the test set into the train set\n >>> pipe.fit(X_train, y_train)\n Pipeline(steps=[('scaler', StandardScaler()), ('svc', SVC())])\n >>> pipe.score(X_test, y_test)\n 0.88\n \"\"\"\n\n # BaseEstimator interface\n _required_parameters = ['steps']\n\n @_deprecate_positional_args\n def __init__(self, steps, *, memory=None, verbose=False):\n self.steps = steps\n self.memory = memory\n self.verbose = verbose\n self._validate_steps()\n\n def get_params(self, deep=True):\n \"\"\"Get parameters for this estimator.\n\n Returns the parameters given in the constructor as well as the\n estimators contained within the `steps` of the `Pipeline`.\n\n Parameters\n ----------\n deep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\n Returns\n -------\n params : mapping of string to any\n Parameter names mapped to their values.\n \"\"\"\n return self._get_params('steps', deep=deep)\n\n def set_params(self, **kwargs):\n \"\"\"Set the parameters of this estimator.\n\n Valid parameter keys can be listed with ``get_params()``. Note that\n you can directly set the parameters of the estimators contained in\n `steps`.\n\n Returns\n -------\n self\n \"\"\"\n self._set_params('steps', **kwargs)\n return self\n\n def _validate_steps(self):\n names, estimators = zip(*self.steps)\n\n # validate names\n self._validate_names(names)\n\n # validate estimators\n transformers = estimators[:-1]\n estimator = estimators[-1]\n\n for t in transformers:\n if t is None or t == 'passthrough':\n continue\n if (not (hasattr(t, \"fit\") or hasattr(t, \"fit_transform\")) or not\n hasattr(t, \"transform\")):\n raise TypeError(\"All intermediate steps should be \"\n \"transformers and implement fit and transform \"\n \"or be the string 'passthrough' \"\n \"'%s' (type %s) doesn't\" % (t, type(t)))\n\n # We allow last estimator to be None as an identity transformation\n if (estimator is not None and estimator != 'passthrough'\n and not hasattr(estimator, \"fit\")):\n raise TypeError(\n \"Last step of Pipeline should implement fit \"\n \"or be the string 'passthrough'. \"\n \"'%s' (type %s) doesn't\" % (estimator, type(estimator)))\n\n def _iter(self, with_final=True, filter_passthrough=True):\n \"\"\"\n Generate (idx, (name, trans)) tuples from self.steps\n\n When filter_passthrough is True, 'passthrough' and None transformers\n are filtered out.\n \"\"\"\n stop = len(self.steps)\n if not with_final:\n stop -= 1\n\n for idx, (name, trans) in enumerate(islice(self.steps, 0, stop)):\n if not filter_passthrough:\n yield idx, name, trans\n elif trans is not None and trans != 'passthrough':\n yield idx, name, trans\n\n def __len__(self):\n \"\"\"\n Returns the length of the Pipeline\n \"\"\"\n return len(self.steps)\n\n def __getitem__(self, ind):\n \"\"\"Returns a sub-pipeline or a single esimtator in the pipeline\n\n Indexing with an integer will return an estimator; using a slice\n returns another Pipeline instance which copies a slice of this\n Pipeline. This copy is shallow: modifying (or fitting) estimators in\n the sub-pipeline will affect the larger pipeline and vice-versa.\n However, replacing a value in `step` will not affect a copy.\n \"\"\"\n if isinstance(ind, slice):\n if ind.step not in (1, None):\n raise ValueError(\"Pipeline slicing only supports a step of 1\")\n return self.__class__(\n self.steps[ind], memory=self.memory, verbose=self.verbose\n )\n try:\n name, est = self.steps[ind]\n except TypeError:\n # Not an int, try get step by name\n return self.named_steps[ind]\n return est\n\n @property\n def _estimator_type(self):\n return self.steps[-1][1]._estimator_type\n\n @property\n def named_steps(self):\n # Use Bunch object to improve autocomplete\n return Bunch(**dict(self.steps))\n\n @property\n def _final_estimator(self):\n estimator = self.steps[-1][1]\n return 'passthrough' if estimator is None else estimator\n\n def _log_message(self, step_idx):\n if not self.verbose:\n return None\n name, step = self.steps[step_idx]\n\n return '(step %d of %d) Processing %s' % (step_idx + 1,\n len(self.steps),\n name)\n\n def _check_fit_params(self, **fit_params):\n fit_params_steps = {name: {} for name, step in self.steps\n if step is not None}\n for pname, pval in fit_params.items():\n if '__' not in pname:\n raise ValueError(\n \"Pipeline.fit does not accept the {} parameter. \"\n \"You can pass parameters to specific steps of your \"\n \"pipeline using the stepname__parameter format, e.g. \"\n \"`Pipeline.fit(X, y, logisticregression__sample_weight\"\n \"=sample_weight)`.\".format(pname))\n step, param = pname.split('__', 1)\n fit_params_steps[step][param] = pval\n return fit_params_steps\n\n # Estimator interface\n\n def _fit(self, X, y=None, **fit_params_steps):\n # shallow copy of steps - this should really be steps_\n self.steps = list(self.steps)\n self._validate_steps()\n # Setup the memory\n memory = check_memory(self.memory)\n\n fit_transform_one_cached = memory.cache(_fit_transform_one)\n\n for (step_idx,\n name,\n transformer) in self._iter(with_final=False,\n filter_passthrough=False):\n if (transformer is None or transformer == 'passthrough'):\n with _print_elapsed_time('Pipeline',\n self._log_message(step_idx)):\n continue\n\n if hasattr(memory, 'location'):\n # joblib >= 0.12\n if memory.location is None:\n # we do not clone when caching is disabled to\n # preserve backward compatibility\n cloned_transformer = transformer\n else:\n cloned_transformer = clone(transformer)\n elif hasattr(memory, 'cachedir'):\n # joblib < 0.11\n if memory.cachedir is None:\n # we do not clone when caching is disabled to\n # preserve backward compatibility\n cloned_transformer = transformer\n else:\n cloned_transformer = clone(transformer)\n else:\n cloned_transformer = clone(transformer)\n # Fit or load from cache the current transformer\n X, fitted_transformer = fit_transform_one_cached(\n cloned_transformer, X, y, None,\n message_clsname='Pipeline',\n message=self._log_message(step_idx),\n **fit_params_steps[name])\n # Replace the transformer of the step with the fitted\n # transformer. This is necessary when loading the transformer\n # from the cache.\n self.steps[step_idx] = (name, fitted_transformer)\n return X\n\n def fit(self, X, y=None, **fit_params):\n \"\"\"Fit the model\n\n Fit all the transforms one after the other and transform the\n data, then fit the transformed data using the final estimator.\n\n Parameters\n ----------\n X : iterable\n Training data. Must fulfill input requirements of first step of the\n pipeline.\n\n y : iterable, default=None\n Training targets. Must fulfill label requirements for all steps of\n the pipeline.\n\n **fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of each step, where\n each parameter name is prefixed such that parameter ``p`` for step\n ``s`` has key ``s__p``.\n\n Returns\n -------\n self : Pipeline\n This estimator\n \"\"\"\n fit_params_steps = self._check_fit_params(**fit_params)\n Xt = self._fit(X, y, **fit_params_steps)\n with _print_elapsed_time('Pipeline',\n self._log_message(len(self.steps) - 1)):\n if self._final_estimator != 'passthrough':\n fit_params_last_step = fit_params_steps[self.steps[-1][0]]\n self._final_estimator.fit(Xt, y, **fit_params_last_step)\n\n return self\n\n def fit_transform(self, X, y=None, **fit_params):\n \"\"\"Fit the model and transform with the final estimator\n\n Fits all the transforms one after the other and transforms the\n data, then uses fit_transform on transformed data with the final\n estimator.\n\n Parameters\n ----------\n X : iterable\n Training data. Must fulfill input requirements of first step of the\n pipeline.\n\n y : iterable, default=None\n Training targets. Must fulfill label requirements for all steps of\n the pipeline.\n\n **fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of each step, where\n each parameter name is prefixed such that parameter ``p`` for step\n ``s`` has key ``s__p``.\n\n Returns\n -------\n Xt : array-like of shape (n_samples, n_transformed_features)\n Transformed samples\n \"\"\"\n fit_params_steps = self._check_fit_params(**fit_params)\n Xt = self._fit(X, y, **fit_params_steps)\n\n last_step = self._final_estimator\n with _print_elapsed_time('Pipeline',\n self._log_message(len(self.steps) - 1)):\n if last_step == 'passthrough':\n return Xt\n fit_params_last_step = fit_params_steps[self.steps[-1][0]]\n if hasattr(last_step, 'fit_transform'):\n return last_step.fit_transform(Xt, y, **fit_params_last_step)\n else:\n return last_step.fit(Xt, y,\n **fit_params_last_step).transform(Xt)\n\n @if_delegate_has_method(delegate='_final_estimator')\n def predict(self, X, **predict_params):\n \"\"\"Apply transforms to the data, and predict with the final estimator\n\n Parameters\n ----------\n X : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\n **predict_params : dict of string -> object\n Parameters to the ``predict`` called at the end of all\n transformations in the pipeline. Note that while this may be\n used to return uncertainties from some models with return_std\n or return_cov, uncertainties that are generated by the\n transformations in the pipeline are not propagated to the\n final estimator.\n\n .. versionadded:: 0.20\n\n Returns\n -------\n y_pred : array-like\n \"\"\"\n Xt = X\n for _, name, transform in self._iter(with_final=False):\n Xt = transform.transform(Xt)\n return self.steps[-1][-1].predict(Xt, **predict_params)\n\n @if_delegate_has_method(delegate='_final_estimator')\n def fit_predict(self, X, y=None, **fit_params):\n \"\"\"Applies fit_predict of last step in pipeline after transforms.\n\n Applies fit_transforms of a pipeline to the data, followed by the\n fit_predict method of the final estimator in the pipeline. Valid\n only if the final estimator implements fit_predict.\n\n Parameters\n ----------\n X : iterable\n Training data. Must fulfill input requirements of first step of\n the pipeline.\n\n y : iterable, default=None\n Training targets. Must fulfill label requirements for all steps\n of the pipeline.\n\n **fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of each step, where\n each parameter name is prefixed such that parameter ``p`` for step\n ``s`` has key ``s__p``.\n\n Returns\n -------\n y_pred : array-like\n \"\"\"\n fit_params_steps = self._check_fit_params(**fit_params)\n Xt = self._fit(X, y, **fit_params_steps)\n\n fit_params_last_step = fit_params_steps[self.steps[-1][0]]\n with _print_elapsed_time('Pipeline',\n self._log_message(len(self.steps) - 1)):\n y_pred = self.steps[-1][-1].fit_predict(Xt, y,\n **fit_params_last_step)\n return y_pred\n\n @if_delegate_has_method(delegate='_final_estimator')\n def predict_proba(self, X):\n \"\"\"Apply transforms, and predict_proba of the final estimator\n\n Parameters\n ----------\n X : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\n Returns\n -------\n y_proba : array-like of shape (n_samples, n_classes)\n \"\"\"\n Xt = X\n for _, name, transform in self._iter(with_final=False):\n Xt = transform.transform(Xt)\n return self.steps[-1][-1].predict_proba(Xt)\n\n @if_delegate_has_method(delegate='_final_estimator')\n def decision_function(self, X):\n \"\"\"Apply transforms, and decision_function of the final estimator\n\n Parameters\n ----------\n X : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\n Returns\n -------\n y_score : array-like of shape (n_samples, n_classes)\n \"\"\"\n Xt = X\n for _, name, transform in self._iter(with_final=False):\n Xt = transform.transform(Xt)\n return self.steps[-1][-1].decision_function(Xt)\n\n @if_delegate_has_method(delegate='_final_estimator')\n def score_samples(self, X):\n \"\"\"Apply transforms, and score_samples of the final estimator.\n\n Parameters\n ----------\n X : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\n Returns\n -------\n y_score : ndarray of shape (n_samples,)\n \"\"\"\n Xt = X\n for _, _, transformer in self._iter(with_final=False):\n Xt = transformer.transform(Xt)\n return self.steps[-1][-1].score_samples(Xt)\n\n @if_delegate_has_method(delegate='_final_estimator')\n def predict_log_proba(self, X):\n \"\"\"Apply transforms, and predict_log_proba of the final estimator\n\n Parameters\n ----------\n X : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\n Returns\n -------\n y_score : array-like of shape (n_samples, n_classes)\n \"\"\"\n Xt = X\n for _, name, transform in self._iter(with_final=False):\n Xt = transform.transform(Xt)\n return self.steps[-1][-1].predict_log_proba(Xt)\n\n @property\n def transform(self):\n \"\"\"Apply transforms, and transform with the final estimator\n\n This also works where final estimator is ``None``: all prior\n transformations are applied.\n\n Parameters\n ----------\n X : iterable\n Data to transform. Must fulfill input requirements of first step\n of the pipeline.\n\n Returns\n -------\n Xt : array-like of shape (n_samples, n_transformed_features)\n \"\"\"\n # _final_estimator is None or has transform, otherwise attribute error\n # XXX: Handling the None case means we can't use if_delegate_has_method\n if self._final_estimator != 'passthrough':\n self._final_estimator.transform\n return self._transform\n\n def _transform(self, X):\n Xt = X\n for _, _, transform in self._iter():\n Xt = transform.transform(Xt)\n return Xt\n\n @property\n def inverse_transform(self):\n \"\"\"Apply inverse transformations in reverse order\n\n All estimators in the pipeline must support ``inverse_transform``.\n\n Parameters\n ----------\n Xt : array-like of shape (n_samples, n_transformed_features)\n Data samples, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features. Must fulfill\n input requirements of last step of pipeline's\n ``inverse_transform`` method.\n\n Returns\n -------\n Xt : array-like of shape (n_samples, n_features)\n \"\"\"\n # raise AttributeError if necessary for hasattr behaviour\n # XXX: Handling the None case means we can't use if_delegate_has_method\n for _, _, transform in self._iter():\n transform.inverse_transform\n return self._inverse_transform\n\n def _inverse_transform(self, X):\n Xt = X\n reverse_iter = reversed(list(self._iter()))\n for _, _, transform in reverse_iter:\n Xt = transform.inverse_transform(Xt)\n return Xt\n\n @if_delegate_has_method(delegate='_final_estimator')\n def score(self, X, y=None, sample_weight=None):\n \"\"\"Apply transforms, and score with the final estimator\n\n Parameters\n ----------\n X : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\n y : iterable, default=None\n Targets used for scoring. Must fulfill label requirements for all\n steps of the pipeline.\n\n sample_weight : array-like, default=None\n If not None, this argument is passed as ``sample_weight`` keyword\n argument to the ``score`` method of the final estimator.\n\n Returns\n -------\n score : float\n \"\"\"\n Xt = X\n for _, name, transform in self._iter(with_final=False):\n Xt = transform.transform(Xt)\n score_params = {}\n if sample_weight is not None:\n score_params['sample_weight'] = sample_weight\n return self.steps[-1][-1].score(Xt, y, **score_params)\n\n @property\n def classes_(self):\n return self.steps[-1][-1].classes_\n\n def _more_tags(self):\n # check if first estimator expects pairwise input\n return {'pairwise': _safe_tags(self.steps[0][1], \"pairwise\")}\n\n # TODO: Remove in 1.1\n # mypy error: Decorated property not supported\n @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n # check if first estimator expects pairwise input\n return getattr(self.steps[0][1], '_pairwise', False)\n\n @property\n def n_features_in_(self):\n # delegate to first step (which will call _check_is_fitted)\n return self.steps[0][1].n_features_in_\n\n def _sk_visual_block_(self):\n _, estimators = zip(*self.steps)\n\n def _get_name(name, est):\n if est is None or est == 'passthrough':\n return f'{name}: passthrough'\n # Is an estimator\n return f'{name}: {est.__class__.__name__}'\n names = [_get_name(name, est) for name, est in self.steps]\n name_details = [str(est) for est in estimators]\n return _VisualBlock('serial', estimators,\n names=names,\n name_details=name_details,\n dash_wrapped=False)", + "instance_attributes": [ + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/Binarizer", + "name": "Binarizer", + "qname": "sklearn.preprocessing._data.Binarizer", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.preprocessing._data/Binarizer/__init__", + "scikit-learn/sklearn.preprocessing._data/Binarizer/fit", + "scikit-learn/sklearn.preprocessing._data/Binarizer/transform", + "scikit-learn/sklearn.preprocessing._data/Binarizer/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Binarize data (set feature values to 0 or 1) according to a threshold.\n\nValues greater than the threshold map to 1, while values less than\nor equal to the threshold map to 0. With the default threshold of 0,\nonly positive values map to 1.\n\nBinarization is a common operation on text count data where the\nanalyst can decide to only consider the presence or absence of a\nfeature rather than a quantified number of occurrences for instance.\n\nIt can also be used as a pre-processing step for estimators that\nconsider boolean random variables (e.g. modelled using the Bernoulli\ndistribution in a Bayesian setting).\n\nRead more in the :ref:`User Guide `.", + "docstring": "Binarize data (set feature values to 0 or 1) according to a threshold.\n\nValues greater than the threshold map to 1, while values less than\nor equal to the threshold map to 0. With the default threshold of 0,\nonly positive values map to 1.\n\nBinarization is a common operation on text count data where the\nanalyst can decide to only consider the presence or absence of a\nfeature rather than a quantified number of occurrences for instance.\n\nIt can also be used as a pre-processing step for estimators that\nconsider boolean random variables (e.g. modelled using the Bernoulli\ndistribution in a Bayesian setting).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nthreshold : float, default=0.0\n Feature values below or equal to this are replaced by 0, above it by 1.\n Threshold may not be less than 0 for operations on sparse matrices.\n\ncopy : bool, default=True\n set to False to perform inplace binarization and avoid a copy (if\n the input is already a numpy array or a scipy.sparse CSR matrix).\n\nExamples\n--------\n>>> from sklearn.preprocessing import Binarizer\n>>> X = [[ 1., -1., 2.],\n... [ 2., 0., 0.],\n... [ 0., 1., -1.]]\n>>> transformer = Binarizer().fit(X) # fit does nothing.\n>>> transformer\nBinarizer()\n>>> transformer.transform(X)\narray([[1., 0., 1.],\n [1., 0., 0.],\n [0., 1., 0.]])\n\nNotes\n-----\nIf the input is a sparse matrix, only the non-zero values are subject\nto update by the Binarizer class.\n\nThis estimator is stateless (besides constructor parameters), the\nfit method does nothing but is useful when used in a pipeline.\n\nSee Also\n--------\nbinarize : Equivalent function without the estimator API.", + "code": "class Binarizer(TransformerMixin, BaseEstimator):\n \"\"\"Binarize data (set feature values to 0 or 1) according to a threshold.\n\n Values greater than the threshold map to 1, while values less than\n or equal to the threshold map to 0. With the default threshold of 0,\n only positive values map to 1.\n\n Binarization is a common operation on text count data where the\n analyst can decide to only consider the presence or absence of a\n feature rather than a quantified number of occurrences for instance.\n\n It can also be used as a pre-processing step for estimators that\n consider boolean random variables (e.g. modelled using the Bernoulli\n distribution in a Bayesian setting).\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n threshold : float, default=0.0\n Feature values below or equal to this are replaced by 0, above it by 1.\n Threshold may not be less than 0 for operations on sparse matrices.\n\n copy : bool, default=True\n set to False to perform inplace binarization and avoid a copy (if\n the input is already a numpy array or a scipy.sparse CSR matrix).\n\n Examples\n --------\n >>> from sklearn.preprocessing import Binarizer\n >>> X = [[ 1., -1., 2.],\n ... [ 2., 0., 0.],\n ... [ 0., 1., -1.]]\n >>> transformer = Binarizer().fit(X) # fit does nothing.\n >>> transformer\n Binarizer()\n >>> transformer.transform(X)\n array([[1., 0., 1.],\n [1., 0., 0.],\n [0., 1., 0.]])\n\n Notes\n -----\n If the input is a sparse matrix, only the non-zero values are subject\n to update by the Binarizer class.\n\n This estimator is stateless (besides constructor parameters), the\n fit method does nothing but is useful when used in a pipeline.\n\n See Also\n --------\n binarize : Equivalent function without the estimator API.\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, *, threshold=0.0, copy=True):\n self.threshold = threshold\n self.copy = copy\n\n def fit(self, X, y=None):\n \"\"\"Do nothing and return the estimator unchanged.\n\n This method is just there to implement the usual API and hence\n work in pipelines.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data.\n\n y : None\n Ignored.\n\n Returns\n -------\n self : object\n Fitted transformer.\n \"\"\"\n self._validate_data(X, accept_sparse='csr')\n return self\n\n def transform(self, X, copy=None):\n \"\"\"Binarize each element of X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to binarize, element by element.\n scipy.sparse matrices should be in CSR format to avoid an\n un-necessary copy.\n\n copy : bool\n Copy the input X or not.\n\n Returns\n -------\n X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array.\n \"\"\"\n copy = copy if copy is not None else self.copy\n # TODO: This should be refactored because binarize also calls\n # check_array\n X = self._validate_data(X, accept_sparse=['csr', 'csc'], copy=copy,\n reset=False)\n return binarize(X, threshold=self.threshold, copy=False)\n\n def _more_tags(self):\n return {'stateless': True}", + "instance_attributes": [ + { + "name": "threshold", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "copy", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/KernelCenterer", + "name": "KernelCenterer", + "qname": "sklearn.preprocessing._data.KernelCenterer", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.preprocessing._data/KernelCenterer/__init__", + "scikit-learn/sklearn.preprocessing._data/KernelCenterer/fit", + "scikit-learn/sklearn.preprocessing._data/KernelCenterer/transform", + "scikit-learn/sklearn.preprocessing._data/KernelCenterer/_more_tags", + "scikit-learn/sklearn.preprocessing._data/KernelCenterer/_pairwise@getter" + ], + "is_public": false, + "reexported_by": [], + "description": "Center a kernel matrix.\n\nLet K(x, z) be a kernel defined by phi(x)^T phi(z), where phi is a\nfunction mapping x to a Hilbert space. KernelCenterer centers (i.e.,\nnormalize to have zero mean) the data without explicitly computing phi(x).\nIt is equivalent to centering phi(x) with\nsklearn.preprocessing.StandardScaler(with_std=False).\n\nRead more in the :ref:`User Guide `.", + "docstring": "Center a kernel matrix.\n\nLet K(x, z) be a kernel defined by phi(x)^T phi(z), where phi is a\nfunction mapping x to a Hilbert space. KernelCenterer centers (i.e.,\nnormalize to have zero mean) the data without explicitly computing phi(x).\nIt is equivalent to centering phi(x) with\nsklearn.preprocessing.StandardScaler(with_std=False).\n\nRead more in the :ref:`User Guide `.\n\nAttributes\n----------\nK_fit_rows_ : array of shape (n_samples,)\n Average of each column of kernel matrix.\n\nK_fit_all_ : float\n Average of kernel matrix.\n\nExamples\n--------\n>>> from sklearn.preprocessing import KernelCenterer\n>>> from sklearn.metrics.pairwise import pairwise_kernels\n>>> X = [[ 1., -2., 2.],\n... [ -2., 1., 3.],\n... [ 4., 1., -2.]]\n>>> K = pairwise_kernels(X, metric='linear')\n>>> K\narray([[ 9., 2., -2.],\n [ 2., 14., -13.],\n [ -2., -13., 21.]])\n>>> transformer = KernelCenterer().fit(K)\n>>> transformer\nKernelCenterer()\n>>> transformer.transform(K)\narray([[ 5., 0., -5.],\n [ 0., 14., -14.],\n [ -5., -14., 19.]])", + "code": "class KernelCenterer(TransformerMixin, BaseEstimator):\n \"\"\"Center a kernel matrix.\n\n Let K(x, z) be a kernel defined by phi(x)^T phi(z), where phi is a\n function mapping x to a Hilbert space. KernelCenterer centers (i.e.,\n normalize to have zero mean) the data without explicitly computing phi(x).\n It is equivalent to centering phi(x) with\n sklearn.preprocessing.StandardScaler(with_std=False).\n\n Read more in the :ref:`User Guide `.\n\n Attributes\n ----------\n K_fit_rows_ : array of shape (n_samples,)\n Average of each column of kernel matrix.\n\n K_fit_all_ : float\n Average of kernel matrix.\n\n Examples\n --------\n >>> from sklearn.preprocessing import KernelCenterer\n >>> from sklearn.metrics.pairwise import pairwise_kernels\n >>> X = [[ 1., -2., 2.],\n ... [ -2., 1., 3.],\n ... [ 4., 1., -2.]]\n >>> K = pairwise_kernels(X, metric='linear')\n >>> K\n array([[ 9., 2., -2.],\n [ 2., 14., -13.],\n [ -2., -13., 21.]])\n >>> transformer = KernelCenterer().fit(K)\n >>> transformer\n KernelCenterer()\n >>> transformer.transform(K)\n array([[ 5., 0., -5.],\n [ 0., 14., -14.],\n [ -5., -14., 19.]])\n \"\"\"\n\n def __init__(self):\n # Needed for backported inspect.signature compatibility with PyPy\n pass\n\n def fit(self, K, y=None):\n \"\"\"Fit KernelCenterer\n\n Parameters\n ----------\n K : ndarray of shape (n_samples, n_samples)\n Kernel matrix.\n\n y : None\n Ignored.\n\n Returns\n -------\n self : object\n Fitted transformer.\n \"\"\"\n\n K = self._validate_data(K, dtype=FLOAT_DTYPES)\n\n if K.shape[0] != K.shape[1]:\n raise ValueError(\"Kernel matrix must be a square matrix.\"\n \" Input is a {}x{} matrix.\"\n .format(K.shape[0], K.shape[1]))\n\n n_samples = K.shape[0]\n self.K_fit_rows_ = np.sum(K, axis=0) / n_samples\n self.K_fit_all_ = self.K_fit_rows_.sum() / n_samples\n return self\n\n def transform(self, K, copy=True):\n \"\"\"Center kernel matrix.\n\n Parameters\n ----------\n K : ndarray of shape (n_samples1, n_samples2)\n Kernel matrix.\n\n copy : bool, default=True\n Set to False to perform inplace computation.\n\n Returns\n -------\n K_new : ndarray of shape (n_samples1, n_samples2)\n \"\"\"\n check_is_fitted(self)\n\n K = self._validate_data(K, copy=copy, dtype=FLOAT_DTYPES, reset=False)\n\n K_pred_cols = (np.sum(K, axis=1) /\n self.K_fit_rows_.shape[0])[:, np.newaxis]\n\n K -= self.K_fit_rows_\n K -= K_pred_cols\n K += self.K_fit_all_\n\n return K\n\n def _more_tags(self):\n return {'pairwise': True}\n\n # TODO: Remove in 1.1\n # mypy error: Decorated property not supported\n @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1.\")\n @property\n def _pairwise(self):\n return True", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler", + "name": "MaxAbsScaler", + "qname": "sklearn.preprocessing._data.MaxAbsScaler", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/__init__", + "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/_reset", + "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/fit", + "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/partial_fit", + "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/transform", + "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/inverse_transform", + "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Scale each feature by its maximum absolute value.\n\nThis estimator scales and translates each feature individually such\nthat the maximal absolute value of each feature in the\ntraining set will be 1.0. It does not shift/center the data, and\nthus does not destroy any sparsity.\n\nThis scaler can also be applied to sparse CSR or CSC matrices.\n\n.. versionadded:: 0.17", + "docstring": "Scale each feature by its maximum absolute value.\n\nThis estimator scales and translates each feature individually such\nthat the maximal absolute value of each feature in the\ntraining set will be 1.0. It does not shift/center the data, and\nthus does not destroy any sparsity.\n\nThis scaler can also be applied to sparse CSR or CSC matrices.\n\n.. versionadded:: 0.17\n\nParameters\n----------\ncopy : bool, default=True\n Set to False to perform inplace scaling and avoid a copy (if the input\n is already a numpy array).\n\nAttributes\n----------\nscale_ : ndarray of shape (n_features,)\n Per feature relative scaling of the data.\n\n .. versionadded:: 0.17\n *scale_* attribute.\n\nmax_abs_ : ndarray of shape (n_features,)\n Per feature maximum absolute value.\n\nn_samples_seen_ : int\n The number of samples processed by the estimator. Will be reset on\n new calls to fit, but increments across ``partial_fit`` calls.\n\nExamples\n--------\n>>> from sklearn.preprocessing import MaxAbsScaler\n>>> X = [[ 1., -1., 2.],\n... [ 2., 0., 0.],\n... [ 0., 1., -1.]]\n>>> transformer = MaxAbsScaler().fit(X)\n>>> transformer\nMaxAbsScaler()\n>>> transformer.transform(X)\narray([[ 0.5, -1. , 1. ],\n [ 1. , 0. , 0. ],\n [ 0. , 1. , -0.5]])\n\nSee Also\n--------\nmaxabs_scale : Equivalent function without the estimator API.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in fit, and maintained in\ntransform.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.", + "code": "class MaxAbsScaler(TransformerMixin, BaseEstimator):\n \"\"\"Scale each feature by its maximum absolute value.\n\n This estimator scales and translates each feature individually such\n that the maximal absolute value of each feature in the\n training set will be 1.0. It does not shift/center the data, and\n thus does not destroy any sparsity.\n\n This scaler can also be applied to sparse CSR or CSC matrices.\n\n .. versionadded:: 0.17\n\n Parameters\n ----------\n copy : bool, default=True\n Set to False to perform inplace scaling and avoid a copy (if the input\n is already a numpy array).\n\n Attributes\n ----------\n scale_ : ndarray of shape (n_features,)\n Per feature relative scaling of the data.\n\n .. versionadded:: 0.17\n *scale_* attribute.\n\n max_abs_ : ndarray of shape (n_features,)\n Per feature maximum absolute value.\n\n n_samples_seen_ : int\n The number of samples processed by the estimator. Will be reset on\n new calls to fit, but increments across ``partial_fit`` calls.\n\n Examples\n --------\n >>> from sklearn.preprocessing import MaxAbsScaler\n >>> X = [[ 1., -1., 2.],\n ... [ 2., 0., 0.],\n ... [ 0., 1., -1.]]\n >>> transformer = MaxAbsScaler().fit(X)\n >>> transformer\n MaxAbsScaler()\n >>> transformer.transform(X)\n array([[ 0.5, -1. , 1. ],\n [ 1. , 0. , 0. ],\n [ 0. , 1. , -0.5]])\n\n See Also\n --------\n maxabs_scale : Equivalent function without the estimator API.\n\n Notes\n -----\n NaNs are treated as missing values: disregarded in fit, and maintained in\n transform.\n\n For a comparison of the different scalers, transformers, and normalizers,\n see :ref:`examples/preprocessing/plot_all_scaling.py\n `.\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, *, copy=True):\n self.copy = copy\n\n def _reset(self):\n \"\"\"Reset internal data-dependent state of the scaler, if necessary.\n\n __init__ parameters are not touched.\n \"\"\"\n\n # Checking one attribute is enough, becase they are all set together\n # in partial_fit\n if hasattr(self, 'scale_'):\n del self.scale_\n del self.n_samples_seen_\n del self.max_abs_\n\n def fit(self, X, y=None):\n \"\"\"Compute the maximum absolute value to be used for later scaling.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the per-feature minimum and maximum\n used for later scaling along the features axis.\n\n y : None\n Ignored.\n\n Returns\n -------\n self : object\n Fitted scaler.\n \"\"\"\n # Reset internal state before fitting\n self._reset()\n return self.partial_fit(X, y)\n\n def partial_fit(self, X, y=None):\n \"\"\"\n Online computation of max absolute value of X for later scaling.\n\n All of X is processed as a single batch. This is intended for cases\n when :meth:`fit` is not feasible due to very large number of\n `n_samples` or because X is read from a continuous stream.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the mean and standard deviation\n used for later scaling along the features axis.\n\n y : None\n Ignored.\n\n Returns\n -------\n self : object\n Fitted scaler.\n \"\"\"\n first_pass = not hasattr(self, 'n_samples_seen_')\n X = self._validate_data(X, reset=first_pass,\n accept_sparse=('csr', 'csc'), estimator=self,\n dtype=FLOAT_DTYPES,\n force_all_finite='allow-nan')\n\n if sparse.issparse(X):\n mins, maxs = min_max_axis(X, axis=0, ignore_nan=True)\n max_abs = np.maximum(np.abs(mins), np.abs(maxs))\n else:\n max_abs = np.nanmax(np.abs(X), axis=0)\n\n if first_pass:\n self.n_samples_seen_ = X.shape[0]\n else:\n max_abs = np.maximum(self.max_abs_, max_abs)\n self.n_samples_seen_ += X.shape[0]\n\n self.max_abs_ = max_abs\n self.scale_ = _handle_zeros_in_scale(max_abs)\n return self\n\n def transform(self, X):\n \"\"\"Scale the data\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data that should be scaled.\n\n Returns\n -------\n X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array.\n \"\"\"\n check_is_fitted(self)\n X = self._validate_data(X, accept_sparse=('csr', 'csc'),\n copy=self.copy, reset=False,\n estimator=self, dtype=FLOAT_DTYPES,\n force_all_finite='allow-nan')\n\n if sparse.issparse(X):\n inplace_column_scale(X, 1.0 / self.scale_)\n else:\n X /= self.scale_\n return X\n\n def inverse_transform(self, X):\n \"\"\"Scale back the data to the original representation\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data that should be transformed back.\n\n Returns\n -------\n X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,\n estimator=self, dtype=FLOAT_DTYPES,\n force_all_finite='allow-nan')\n\n if sparse.issparse(X):\n inplace_column_scale(X, self.scale_)\n else:\n X *= self.scale_\n return X\n\n def _more_tags(self):\n return {'allow_nan': True}", + "instance_attributes": [ + { + "name": "copy", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MinMaxScaler", + "name": "MinMaxScaler", + "qname": "sklearn.preprocessing._data.MinMaxScaler", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/__init__", + "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/_reset", + "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/fit", + "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/partial_fit", + "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/transform", + "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/inverse_transform", + "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Transform features by scaling each feature to a given range.\n\nThis estimator scales and translates each feature individually such\nthat it is in the given range on the training set, e.g. between\nzero and one.\n\nThe transformation is given by::\n\n X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))\n X_scaled = X_std * (max - min) + min\n\nwhere min, max = feature_range.\n\nThis transformation is often used as an alternative to zero mean,\nunit variance scaling.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Transform features by scaling each feature to a given range.\n\nThis estimator scales and translates each feature individually such\nthat it is in the given range on the training set, e.g. between\nzero and one.\n\nThe transformation is given by::\n\n X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))\n X_scaled = X_std * (max - min) + min\n\nwhere min, max = feature_range.\n\nThis transformation is often used as an alternative to zero mean,\nunit variance scaling.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nfeature_range : tuple (min, max), default=(0, 1)\n Desired range of transformed data.\n\ncopy : bool, default=True\n Set to False to perform inplace row normalization and avoid a\n copy (if the input is already a numpy array).\n\nclip : bool, default=False\n Set to True to clip transformed values of held-out data to\n provided `feature range`.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nmin_ : ndarray of shape (n_features,)\n Per feature adjustment for minimum. Equivalent to\n ``min - X.min(axis=0) * self.scale_``\n\nscale_ : ndarray of shape (n_features,)\n Per feature relative scaling of the data. Equivalent to\n ``(max - min) / (X.max(axis=0) - X.min(axis=0))``\n\n .. versionadded:: 0.17\n *scale_* attribute.\n\ndata_min_ : ndarray of shape (n_features,)\n Per feature minimum seen in the data\n\n .. versionadded:: 0.17\n *data_min_*\n\ndata_max_ : ndarray of shape (n_features,)\n Per feature maximum seen in the data\n\n .. versionadded:: 0.17\n *data_max_*\n\ndata_range_ : ndarray of shape (n_features,)\n Per feature range ``(data_max_ - data_min_)`` seen in the data\n\n .. versionadded:: 0.17\n *data_range_*\n\nn_samples_seen_ : int\n The number of samples processed by the estimator.\n It will be reset on new calls to fit, but increments across\n ``partial_fit`` calls.\n\nExamples\n--------\n>>> from sklearn.preprocessing import MinMaxScaler\n>>> data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]\n>>> scaler = MinMaxScaler()\n>>> print(scaler.fit(data))\nMinMaxScaler()\n>>> print(scaler.data_max_)\n[ 1. 18.]\n>>> print(scaler.transform(data))\n[[0. 0. ]\n [0.25 0.25]\n [0.5 0.5 ]\n [1. 1. ]]\n>>> print(scaler.transform([[2, 2]]))\n[[1.5 0. ]]\n\nSee Also\n--------\nminmax_scale : Equivalent function without the estimator API.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in fit, and maintained in\ntransform.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.", + "code": "class MinMaxScaler(TransformerMixin, BaseEstimator):\n \"\"\"Transform features by scaling each feature to a given range.\n\n This estimator scales and translates each feature individually such\n that it is in the given range on the training set, e.g. between\n zero and one.\n\n The transformation is given by::\n\n X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))\n X_scaled = X_std * (max - min) + min\n\n where min, max = feature_range.\n\n This transformation is often used as an alternative to zero mean,\n unit variance scaling.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n feature_range : tuple (min, max), default=(0, 1)\n Desired range of transformed data.\n\n copy : bool, default=True\n Set to False to perform inplace row normalization and avoid a\n copy (if the input is already a numpy array).\n\n clip : bool, default=False\n Set to True to clip transformed values of held-out data to\n provided `feature range`.\n\n .. versionadded:: 0.24\n\n Attributes\n ----------\n min_ : ndarray of shape (n_features,)\n Per feature adjustment for minimum. Equivalent to\n ``min - X.min(axis=0) * self.scale_``\n\n scale_ : ndarray of shape (n_features,)\n Per feature relative scaling of the data. Equivalent to\n ``(max - min) / (X.max(axis=0) - X.min(axis=0))``\n\n .. versionadded:: 0.17\n *scale_* attribute.\n\n data_min_ : ndarray of shape (n_features,)\n Per feature minimum seen in the data\n\n .. versionadded:: 0.17\n *data_min_*\n\n data_max_ : ndarray of shape (n_features,)\n Per feature maximum seen in the data\n\n .. versionadded:: 0.17\n *data_max_*\n\n data_range_ : ndarray of shape (n_features,)\n Per feature range ``(data_max_ - data_min_)`` seen in the data\n\n .. versionadded:: 0.17\n *data_range_*\n\n n_samples_seen_ : int\n The number of samples processed by the estimator.\n It will be reset on new calls to fit, but increments across\n ``partial_fit`` calls.\n\n Examples\n --------\n >>> from sklearn.preprocessing import MinMaxScaler\n >>> data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]\n >>> scaler = MinMaxScaler()\n >>> print(scaler.fit(data))\n MinMaxScaler()\n >>> print(scaler.data_max_)\n [ 1. 18.]\n >>> print(scaler.transform(data))\n [[0. 0. ]\n [0.25 0.25]\n [0.5 0.5 ]\n [1. 1. ]]\n >>> print(scaler.transform([[2, 2]]))\n [[1.5 0. ]]\n\n See Also\n --------\n minmax_scale : Equivalent function without the estimator API.\n\n Notes\n -----\n NaNs are treated as missing values: disregarded in fit, and maintained in\n transform.\n\n For a comparison of the different scalers, transformers, and normalizers,\n see :ref:`examples/preprocessing/plot_all_scaling.py\n `.\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, feature_range=(0, 1), *, copy=True, clip=False):\n self.feature_range = feature_range\n self.copy = copy\n self.clip = clip\n\n def _reset(self):\n \"\"\"Reset internal data-dependent state of the scaler, if necessary.\n\n __init__ parameters are not touched.\n \"\"\"\n\n # Checking one attribute is enough, becase they are all set together\n # in partial_fit\n if hasattr(self, 'scale_'):\n del self.scale_\n del self.min_\n del self.n_samples_seen_\n del self.data_min_\n del self.data_max_\n del self.data_range_\n\n def fit(self, X, y=None):\n \"\"\"Compute the minimum and maximum to be used for later scaling.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data used to compute the per-feature minimum and maximum\n used for later scaling along the features axis.\n\n y : None\n Ignored.\n\n Returns\n -------\n self : object\n Fitted scaler.\n \"\"\"\n\n # Reset internal state before fitting\n self._reset()\n return self.partial_fit(X, y)\n\n def partial_fit(self, X, y=None):\n \"\"\"Online computation of min and max on X for later scaling.\n\n All of X is processed as a single batch. This is intended for cases\n when :meth:`fit` is not feasible due to very large number of\n `n_samples` or because X is read from a continuous stream.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data used to compute the mean and standard deviation\n used for later scaling along the features axis.\n\n y : None\n Ignored.\n\n Returns\n -------\n self : object\n Fitted scaler.\n \"\"\"\n feature_range = self.feature_range\n if feature_range[0] >= feature_range[1]:\n raise ValueError(\"Minimum of desired feature range must be smaller\"\n \" than maximum. Got %s.\" % str(feature_range))\n\n if sparse.issparse(X):\n raise TypeError(\"MinMaxScaler does not support sparse input. \"\n \"Consider using MaxAbsScaler instead.\")\n\n first_pass = not hasattr(self, 'n_samples_seen_')\n X = self._validate_data(X, reset=first_pass,\n estimator=self, dtype=FLOAT_DTYPES,\n force_all_finite=\"allow-nan\")\n\n data_min = np.nanmin(X, axis=0)\n data_max = np.nanmax(X, axis=0)\n\n if first_pass:\n self.n_samples_seen_ = X.shape[0]\n else:\n data_min = np.minimum(self.data_min_, data_min)\n data_max = np.maximum(self.data_max_, data_max)\n self.n_samples_seen_ += X.shape[0]\n\n data_range = data_max - data_min\n self.scale_ = ((feature_range[1] - feature_range[0]) /\n _handle_zeros_in_scale(data_range))\n self.min_ = feature_range[0] - data_min * self.scale_\n self.data_min_ = data_min\n self.data_max_ = data_max\n self.data_range_ = data_range\n return self\n\n def transform(self, X):\n \"\"\"Scale features of X according to feature_range.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Input data that will be transformed.\n\n Returns\n -------\n Xt : ndarray of shape (n_samples, n_features)\n Transformed data.\n \"\"\"\n check_is_fitted(self)\n\n X = self._validate_data(X, copy=self.copy, dtype=FLOAT_DTYPES,\n force_all_finite=\"allow-nan\", reset=False)\n\n X *= self.scale_\n X += self.min_\n if self.clip:\n np.clip(X, self.feature_range[0], self.feature_range[1], out=X)\n return X\n\n def inverse_transform(self, X):\n \"\"\"Undo the scaling of X according to feature_range.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Input data that will be transformed. It cannot be sparse.\n\n Returns\n -------\n Xt : ndarray of shape (n_samples, n_features)\n Transformed data.\n \"\"\"\n check_is_fitted(self)\n\n X = check_array(X, copy=self.copy, dtype=FLOAT_DTYPES,\n force_all_finite=\"allow-nan\")\n\n X -= self.min_\n X /= self.scale_\n return X\n\n def _more_tags(self):\n return {'allow_nan': True}", + "instance_attributes": [ + { + "name": "feature_range", + "types": { + "kind": "NamedType", + "name": "tuple" + } + }, + { + "name": "copy", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "clip", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/Normalizer", + "name": "Normalizer", + "qname": "sklearn.preprocessing._data.Normalizer", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.preprocessing._data/Normalizer/__init__", + "scikit-learn/sklearn.preprocessing._data/Normalizer/fit", + "scikit-learn/sklearn.preprocessing._data/Normalizer/transform", + "scikit-learn/sklearn.preprocessing._data/Normalizer/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Normalize samples individually to unit norm.\n\nEach sample (i.e. each row of the data matrix) with at least one\nnon zero component is rescaled independently of other samples so\nthat its norm (l1, l2 or inf) equals one.\n\nThis transformer is able to work both with dense numpy arrays and\nscipy.sparse matrix (use CSR format if you want to avoid the burden of\na copy / conversion).\n\nScaling inputs to unit norms is a common operation for text\nclassification or clustering for instance. For instance the dot\nproduct of two l2-normalized TF-IDF vectors is the cosine similarity\nof the vectors and is the base similarity metric for the Vector\nSpace Model commonly used by the Information Retrieval community.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Normalize samples individually to unit norm.\n\nEach sample (i.e. each row of the data matrix) with at least one\nnon zero component is rescaled independently of other samples so\nthat its norm (l1, l2 or inf) equals one.\n\nThis transformer is able to work both with dense numpy arrays and\nscipy.sparse matrix (use CSR format if you want to avoid the burden of\na copy / conversion).\n\nScaling inputs to unit norms is a common operation for text\nclassification or clustering for instance. For instance the dot\nproduct of two l2-normalized TF-IDF vectors is the cosine similarity\nof the vectors and is the base similarity metric for the Vector\nSpace Model commonly used by the Information Retrieval community.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nnorm : {'l1', 'l2', 'max'}, default='l2'\n The norm to use to normalize each non zero sample. If norm='max'\n is used, values will be rescaled by the maximum of the absolute\n values.\n\ncopy : bool, default=True\n set to False to perform inplace row normalization and avoid a\n copy (if the input is already a numpy array or a scipy.sparse\n CSR matrix).\n\nExamples\n--------\n>>> from sklearn.preprocessing import Normalizer\n>>> X = [[4, 1, 2, 2],\n... [1, 3, 9, 3],\n... [5, 7, 5, 1]]\n>>> transformer = Normalizer().fit(X) # fit does nothing.\n>>> transformer\nNormalizer()\n>>> transformer.transform(X)\narray([[0.8, 0.2, 0.4, 0.4],\n [0.1, 0.3, 0.9, 0.3],\n [0.5, 0.7, 0.5, 0.1]])\n\nNotes\n-----\nThis estimator is stateless (besides constructor parameters), the\nfit method does nothing but is useful when used in a pipeline.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.\n\nSee Also\n--------\nnormalize : Equivalent function without the estimator API.", + "code": "class Normalizer(TransformerMixin, BaseEstimator):\n \"\"\"Normalize samples individually to unit norm.\n\n Each sample (i.e. each row of the data matrix) with at least one\n non zero component is rescaled independently of other samples so\n that its norm (l1, l2 or inf) equals one.\n\n This transformer is able to work both with dense numpy arrays and\n scipy.sparse matrix (use CSR format if you want to avoid the burden of\n a copy / conversion).\n\n Scaling inputs to unit norms is a common operation for text\n classification or clustering for instance. For instance the dot\n product of two l2-normalized TF-IDF vectors is the cosine similarity\n of the vectors and is the base similarity metric for the Vector\n Space Model commonly used by the Information Retrieval community.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n norm : {'l1', 'l2', 'max'}, default='l2'\n The norm to use to normalize each non zero sample. If norm='max'\n is used, values will be rescaled by the maximum of the absolute\n values.\n\n copy : bool, default=True\n set to False to perform inplace row normalization and avoid a\n copy (if the input is already a numpy array or a scipy.sparse\n CSR matrix).\n\n Examples\n --------\n >>> from sklearn.preprocessing import Normalizer\n >>> X = [[4, 1, 2, 2],\n ... [1, 3, 9, 3],\n ... [5, 7, 5, 1]]\n >>> transformer = Normalizer().fit(X) # fit does nothing.\n >>> transformer\n Normalizer()\n >>> transformer.transform(X)\n array([[0.8, 0.2, 0.4, 0.4],\n [0.1, 0.3, 0.9, 0.3],\n [0.5, 0.7, 0.5, 0.1]])\n\n Notes\n -----\n This estimator is stateless (besides constructor parameters), the\n fit method does nothing but is useful when used in a pipeline.\n\n For a comparison of the different scalers, transformers, and normalizers,\n see :ref:`examples/preprocessing/plot_all_scaling.py\n `.\n\n See Also\n --------\n normalize : Equivalent function without the estimator API.\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, norm='l2', *, copy=True):\n self.norm = norm\n self.copy = copy\n\n def fit(self, X, y=None):\n \"\"\"Do nothing and return the estimator unchanged\n\n This method is just there to implement the usual API and hence\n work in pipelines.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to estimate the normalization parameters.\n\n y : None\n Ignored.\n\n Returns\n -------\n self : object\n Fitted transformer.\n \"\"\"\n self._validate_data(X, accept_sparse='csr')\n return self\n\n def transform(self, X, copy=None):\n \"\"\"Scale each non zero row of X to unit norm\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to normalize, row by row. scipy.sparse matrices should be\n in CSR format to avoid an un-necessary copy.\n\n copy : bool, default=None\n Copy the input X or not.\n\n Returns\n -------\n X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array.\n \"\"\"\n copy = copy if copy is not None else self.copy\n X = self._validate_data(X, accept_sparse='csr', reset=False)\n return normalize(X, norm=self.norm, axis=1, copy=copy)\n\n def _more_tags(self):\n return {'stateless': True}", + "instance_attributes": [ + { + "name": "norm", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "copy", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures", + "name": "PolynomialFeatures", + "qname": "sklearn.preprocessing._data.PolynomialFeatures", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/__init__", + "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/_combinations", + "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/powers_@getter", + "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/get_feature_names", + "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/fit", + "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/transform" + ], + "is_public": false, + "reexported_by": [], + "description": "Generate polynomial and interaction features.\n\nGenerate a new feature matrix consisting of all polynomial combinations\nof the features with degree less than or equal to the specified degree.\nFor example, if an input sample is two dimensional and of the form\n[a, b], the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].", + "docstring": "Generate polynomial and interaction features.\n\nGenerate a new feature matrix consisting of all polynomial combinations\nof the features with degree less than or equal to the specified degree.\nFor example, if an input sample is two dimensional and of the form\n[a, b], the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].\n\nParameters\n----------\ndegree : int, default=2\n The degree of the polynomial features.\n\ninteraction_only : bool, default=False\n If true, only interaction features are produced: features that are\n products of at most ``degree`` *distinct* input features (so not\n ``x[1] ** 2``, ``x[0] * x[2] ** 3``, etc.).\n\ninclude_bias : bool, default=True\n If True (default), then include a bias column, the feature in which\n all polynomial powers are zero (i.e. a column of ones - acts as an\n intercept term in a linear model).\n\norder : {'C', 'F'}, default='C'\n Order of output array in the dense case. 'F' order is faster to\n compute, but may slow down subsequent estimators.\n\n .. versionadded:: 0.21\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import PolynomialFeatures\n>>> X = np.arange(6).reshape(3, 2)\n>>> X\narray([[0, 1],\n [2, 3],\n [4, 5]])\n>>> poly = PolynomialFeatures(2)\n>>> poly.fit_transform(X)\narray([[ 1., 0., 1., 0., 0., 1.],\n [ 1., 2., 3., 4., 6., 9.],\n [ 1., 4., 5., 16., 20., 25.]])\n>>> poly = PolynomialFeatures(interaction_only=True)\n>>> poly.fit_transform(X)\narray([[ 1., 0., 1., 0.],\n [ 1., 2., 3., 6.],\n [ 1., 4., 5., 20.]])\n\nAttributes\n----------\npowers_ : ndarray of shape (n_output_features, n_input_features)\n powers_[i, j] is the exponent of the jth input in the ith output.\n\nn_input_features_ : int\n The total number of input features.\n\nn_output_features_ : int\n The total number of polynomial output features. The number of output\n features is computed by iterating over all suitably sized combinations\n of input features.\n\nNotes\n-----\nBe aware that the number of features in the output array scales\npolynomially in the number of features of the input array, and\nexponentially in the degree. High degrees can cause overfitting.\n\nSee :ref:`examples/linear_model/plot_polynomial_interpolation.py\n`", + "code": "class PolynomialFeatures(TransformerMixin, BaseEstimator):\n \"\"\"Generate polynomial and interaction features.\n\n Generate a new feature matrix consisting of all polynomial combinations\n of the features with degree less than or equal to the specified degree.\n For example, if an input sample is two dimensional and of the form\n [a, b], the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].\n\n Parameters\n ----------\n degree : int, default=2\n The degree of the polynomial features.\n\n interaction_only : bool, default=False\n If true, only interaction features are produced: features that are\n products of at most ``degree`` *distinct* input features (so not\n ``x[1] ** 2``, ``x[0] * x[2] ** 3``, etc.).\n\n include_bias : bool, default=True\n If True (default), then include a bias column, the feature in which\n all polynomial powers are zero (i.e. a column of ones - acts as an\n intercept term in a linear model).\n\n order : {'C', 'F'}, default='C'\n Order of output array in the dense case. 'F' order is faster to\n compute, but may slow down subsequent estimators.\n\n .. versionadded:: 0.21\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.preprocessing import PolynomialFeatures\n >>> X = np.arange(6).reshape(3, 2)\n >>> X\n array([[0, 1],\n [2, 3],\n [4, 5]])\n >>> poly = PolynomialFeatures(2)\n >>> poly.fit_transform(X)\n array([[ 1., 0., 1., 0., 0., 1.],\n [ 1., 2., 3., 4., 6., 9.],\n [ 1., 4., 5., 16., 20., 25.]])\n >>> poly = PolynomialFeatures(interaction_only=True)\n >>> poly.fit_transform(X)\n array([[ 1., 0., 1., 0.],\n [ 1., 2., 3., 6.],\n [ 1., 4., 5., 20.]])\n\n Attributes\n ----------\n powers_ : ndarray of shape (n_output_features, n_input_features)\n powers_[i, j] is the exponent of the jth input in the ith output.\n\n n_input_features_ : int\n The total number of input features.\n\n n_output_features_ : int\n The total number of polynomial output features. The number of output\n features is computed by iterating over all suitably sized combinations\n of input features.\n\n Notes\n -----\n Be aware that the number of features in the output array scales\n polynomially in the number of features of the input array, and\n exponentially in the degree. High degrees can cause overfitting.\n\n See :ref:`examples/linear_model/plot_polynomial_interpolation.py\n `\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, degree=2, *, interaction_only=False, include_bias=True,\n order='C'):\n self.degree = degree\n self.interaction_only = interaction_only\n self.include_bias = include_bias\n self.order = order\n\n @staticmethod\n def _combinations(n_features, degree, interaction_only, include_bias):\n comb = (combinations if interaction_only else combinations_w_r)\n start = int(not include_bias)\n return chain.from_iterable(comb(range(n_features), i)\n for i in range(start, degree + 1))\n\n @property\n def powers_(self):\n check_is_fitted(self)\n\n combinations = self._combinations(self.n_input_features_, self.degree,\n self.interaction_only,\n self.include_bias)\n return np.vstack([np.bincount(c, minlength=self.n_input_features_)\n for c in combinations])\n\n def get_feature_names(self, input_features=None):\n \"\"\"\n Return feature names for output features\n\n Parameters\n ----------\n input_features : list of str of shape (n_features,), default=None\n String names for input features if available. By default,\n \"x0\", \"x1\", ... \"xn_features\" is used.\n\n Returns\n -------\n output_feature_names : list of str of shape (n_output_features,)\n \"\"\"\n powers = self.powers_\n if input_features is None:\n input_features = ['x%d' % i for i in range(powers.shape[1])]\n feature_names = []\n for row in powers:\n inds = np.where(row)[0]\n if len(inds):\n name = \" \".join(\"%s^%d\" % (input_features[ind], exp)\n if exp != 1 else input_features[ind]\n for ind, exp in zip(inds, row[inds]))\n else:\n name = \"1\"\n feature_names.append(name)\n return feature_names\n\n def fit(self, X, y=None):\n \"\"\"\n Compute number of output features.\n\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data.\n\n y : None\n Ignored.\n\n Returns\n -------\n self : object\n Fitted transformer.\n \"\"\"\n n_samples, n_features = self._validate_data(\n X, accept_sparse=True).shape\n combinations = self._combinations(n_features, self.degree,\n self.interaction_only,\n self.include_bias)\n self.n_input_features_ = n_features\n self.n_output_features_ = sum(1 for _ in combinations)\n return self\n\n def transform(self, X):\n \"\"\"Transform data to polynomial features\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to transform, row by row.\n\n Prefer CSR over CSC for sparse input (for speed), but CSC is\n required if the degree is 4 or higher. If the degree is less than\n 4 and the input format is CSC, it will be converted to CSR, have\n its polynomial features generated, then converted back to CSC.\n\n If the degree is 2 or 3, the method described in \"Leveraging\n Sparsity to Speed Up Polynomial Feature Expansions of CSR Matrices\n Using K-Simplex Numbers\" by Andrew Nystrom and John Hughes is\n used, which is much faster than the method used on CSC input. For\n this reason, a CSC input will be converted to CSR, and the output\n will be converted back to CSC prior to being returned, hence the\n preference of CSR.\n\n Returns\n -------\n XP : {ndarray, sparse matrix} of shape (n_samples, NP)\n The matrix of features, where NP is the number of polynomial\n features generated from the combination of inputs. If a sparse\n matrix is provided, it will be converted into a sparse\n ``csr_matrix``.\n \"\"\"\n check_is_fitted(self)\n\n X = self._validate_data(X, order='F', dtype=FLOAT_DTYPES, reset=False,\n accept_sparse=('csr', 'csc'))\n\n n_samples, n_features = X.shape\n\n if n_features != self.n_input_features_:\n raise ValueError(\"X shape does not match training shape\")\n\n if sparse.isspmatrix_csr(X):\n if self.degree > 3:\n return self.transform(X.tocsc()).tocsr()\n to_stack = []\n if self.include_bias:\n to_stack.append(np.ones(shape=(n_samples, 1), dtype=X.dtype))\n to_stack.append(X)\n for deg in range(2, self.degree+1):\n Xp_next = _csr_polynomial_expansion(X.data, X.indices,\n X.indptr, X.shape[1],\n self.interaction_only,\n deg)\n if Xp_next is None:\n break\n to_stack.append(Xp_next)\n XP = sparse.hstack(to_stack, format='csr')\n elif sparse.isspmatrix_csc(X) and self.degree < 4:\n return self.transform(X.tocsr()).tocsc()\n else:\n if sparse.isspmatrix(X):\n combinations = self._combinations(n_features, self.degree,\n self.interaction_only,\n self.include_bias)\n columns = []\n for comb in combinations:\n if comb:\n out_col = 1\n for col_idx in comb:\n out_col = X[:, col_idx].multiply(out_col)\n columns.append(out_col)\n else:\n bias = sparse.csc_matrix(np.ones((X.shape[0], 1)))\n columns.append(bias)\n XP = sparse.hstack(columns, dtype=X.dtype).tocsc()\n else:\n XP = np.empty((n_samples, self.n_output_features_),\n dtype=X.dtype, order=self.order)\n\n # What follows is a faster implementation of:\n # for i, comb in enumerate(combinations):\n # XP[:, i] = X[:, comb].prod(1)\n # This implementation uses two optimisations.\n # First one is broadcasting,\n # multiply ([X1, ..., Xn], X1) -> [X1 X1, ..., Xn X1]\n # multiply ([X2, ..., Xn], X2) -> [X2 X2, ..., Xn X2]\n # ...\n # multiply ([X[:, start:end], X[:, start]) -> ...\n # Second optimisation happens for degrees >= 3.\n # Xi^3 is computed reusing previous computation:\n # Xi^3 = Xi^2 * Xi.\n\n if self.include_bias:\n XP[:, 0] = 1\n current_col = 1\n else:\n current_col = 0\n\n # d = 0\n XP[:, current_col:current_col + n_features] = X\n index = list(range(current_col,\n current_col + n_features))\n current_col += n_features\n index.append(current_col)\n\n # d >= 1\n for _ in range(1, self.degree):\n new_index = []\n end = index[-1]\n for feature_idx in range(n_features):\n start = index[feature_idx]\n new_index.append(current_col)\n if self.interaction_only:\n start += (index[feature_idx + 1] -\n index[feature_idx])\n next_col = current_col + end - start\n if next_col <= current_col:\n break\n # XP[:, start:end] are terms of degree d - 1\n # that exclude feature #feature_idx.\n np.multiply(XP[:, start:end],\n X[:, feature_idx:feature_idx + 1],\n out=XP[:, current_col:next_col],\n casting='no')\n current_col = next_col\n\n new_index.append(current_col)\n index = new_index\n\n return XP", + "instance_attributes": [ + { + "name": "degree", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "interaction_only", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "include_bias", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "order", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer", + "name": "PowerTransformer", + "qname": "sklearn.preprocessing._data.PowerTransformer", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.preprocessing._data/PowerTransformer/__init__", + "scikit-learn/sklearn.preprocessing._data/PowerTransformer/fit", + "scikit-learn/sklearn.preprocessing._data/PowerTransformer/fit_transform", + "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_fit", + "scikit-learn/sklearn.preprocessing._data/PowerTransformer/transform", + "scikit-learn/sklearn.preprocessing._data/PowerTransformer/inverse_transform", + "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_box_cox_inverse_tranform", + "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_yeo_johnson_inverse_transform", + "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_yeo_johnson_transform", + "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_box_cox_optimize", + "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_yeo_johnson_optimize", + "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_check_input", + "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Apply a power transform featurewise to make data more Gaussian-like.\n\nPower transforms are a family of parametric, monotonic transformations\nthat are applied to make data more Gaussian-like. This is useful for\nmodeling issues related to heteroscedasticity (non-constant variance),\nor other situations where normality is desired.\n\nCurrently, PowerTransformer supports the Box-Cox transform and the\nYeo-Johnson transform. The optimal parameter for stabilizing variance and\nminimizing skewness is estimated through maximum likelihood.\n\nBox-Cox requires input data to be strictly positive, while Yeo-Johnson\nsupports both positive or negative data.\n\nBy default, zero-mean, unit-variance normalization is applied to the\ntransformed data.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20", + "docstring": "Apply a power transform featurewise to make data more Gaussian-like.\n\nPower transforms are a family of parametric, monotonic transformations\nthat are applied to make data more Gaussian-like. This is useful for\nmodeling issues related to heteroscedasticity (non-constant variance),\nor other situations where normality is desired.\n\nCurrently, PowerTransformer supports the Box-Cox transform and the\nYeo-Johnson transform. The optimal parameter for stabilizing variance and\nminimizing skewness is estimated through maximum likelihood.\n\nBox-Cox requires input data to be strictly positive, while Yeo-Johnson\nsupports both positive or negative data.\n\nBy default, zero-mean, unit-variance normalization is applied to the\ntransformed data.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nmethod : {'yeo-johnson', 'box-cox'}, default='yeo-johnson'\n The power transform method. Available methods are:\n\n - 'yeo-johnson' [1]_, works with positive and negative values\n - 'box-cox' [2]_, only works with strictly positive values\n\nstandardize : bool, default=True\n Set to True to apply zero-mean, unit-variance normalization to the\n transformed output.\n\ncopy : bool, default=True\n Set to False to perform inplace computation during transformation.\n\nAttributes\n----------\nlambdas_ : ndarray of float of shape (n_features,)\n The parameters of the power transformation for the selected features.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import PowerTransformer\n>>> pt = PowerTransformer()\n>>> data = [[1, 2], [3, 2], [4, 5]]\n>>> print(pt.fit(data))\nPowerTransformer()\n>>> print(pt.lambdas_)\n[ 1.386... -3.100...]\n>>> print(pt.transform(data))\n[[-1.316... -0.707...]\n [ 0.209... -0.707...]\n [ 1.106... 1.414...]]\n\nSee Also\n--------\npower_transform : Equivalent function without the estimator API.\n\nQuantileTransformer : Maps data to a standard normal distribution with\n the parameter `output_distribution='normal'`.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in ``fit``, and maintained\nin ``transform``.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.\n\nReferences\n----------\n\n.. [1] I.K. Yeo and R.A. Johnson, \"A new family of power transformations to\n improve normality or symmetry.\" Biometrika, 87(4), pp.954-959,\n (2000).\n\n.. [2] G.E.P. Box and D.R. Cox, \"An Analysis of Transformations\", Journal\n of the Royal Statistical Society B, 26, 211-252 (1964).", + "code": "class PowerTransformer(TransformerMixin, BaseEstimator):\n \"\"\"Apply a power transform featurewise to make data more Gaussian-like.\n\n Power transforms are a family of parametric, monotonic transformations\n that are applied to make data more Gaussian-like. This is useful for\n modeling issues related to heteroscedasticity (non-constant variance),\n or other situations where normality is desired.\n\n Currently, PowerTransformer supports the Box-Cox transform and the\n Yeo-Johnson transform. The optimal parameter for stabilizing variance and\n minimizing skewness is estimated through maximum likelihood.\n\n Box-Cox requires input data to be strictly positive, while Yeo-Johnson\n supports both positive or negative data.\n\n By default, zero-mean, unit-variance normalization is applied to the\n transformed data.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.20\n\n Parameters\n ----------\n method : {'yeo-johnson', 'box-cox'}, default='yeo-johnson'\n The power transform method. Available methods are:\n\n - 'yeo-johnson' [1]_, works with positive and negative values\n - 'box-cox' [2]_, only works with strictly positive values\n\n standardize : bool, default=True\n Set to True to apply zero-mean, unit-variance normalization to the\n transformed output.\n\n copy : bool, default=True\n Set to False to perform inplace computation during transformation.\n\n Attributes\n ----------\n lambdas_ : ndarray of float of shape (n_features,)\n The parameters of the power transformation for the selected features.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.preprocessing import PowerTransformer\n >>> pt = PowerTransformer()\n >>> data = [[1, 2], [3, 2], [4, 5]]\n >>> print(pt.fit(data))\n PowerTransformer()\n >>> print(pt.lambdas_)\n [ 1.386... -3.100...]\n >>> print(pt.transform(data))\n [[-1.316... -0.707...]\n [ 0.209... -0.707...]\n [ 1.106... 1.414...]]\n\n See Also\n --------\n power_transform : Equivalent function without the estimator API.\n\n QuantileTransformer : Maps data to a standard normal distribution with\n the parameter `output_distribution='normal'`.\n\n Notes\n -----\n NaNs are treated as missing values: disregarded in ``fit``, and maintained\n in ``transform``.\n\n For a comparison of the different scalers, transformers, and normalizers,\n see :ref:`examples/preprocessing/plot_all_scaling.py\n `.\n\n References\n ----------\n\n .. [1] I.K. Yeo and R.A. Johnson, \"A new family of power transformations to\n improve normality or symmetry.\" Biometrika, 87(4), pp.954-959,\n (2000).\n\n .. [2] G.E.P. Box and D.R. Cox, \"An Analysis of Transformations\", Journal\n of the Royal Statistical Society B, 26, 211-252 (1964).\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, method='yeo-johnson', *, standardize=True, copy=True):\n self.method = method\n self.standardize = standardize\n self.copy = copy\n\n def fit(self, X, y=None):\n \"\"\"Estimate the optimal parameter lambda for each feature.\n\n The optimal lambda parameter for minimizing skewness is estimated on\n each feature independently using maximum likelihood.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data used to estimate the optimal transformation parameters.\n\n y : None\n Ignored.\n\n Returns\n -------\n self : object\n Fitted transformer.\n \"\"\"\n self._fit(X, y=y, force_transform=False)\n return self\n\n def fit_transform(self, X, y=None):\n return self._fit(X, y, force_transform=True)\n\n def _fit(self, X, y=None, force_transform=False):\n X = self._check_input(X, in_fit=True, check_positive=True,\n check_method=True)\n\n if not self.copy and not force_transform: # if call from fit()\n X = X.copy() # force copy so that fit does not change X inplace\n\n optim_function = {'box-cox': self._box_cox_optimize,\n 'yeo-johnson': self._yeo_johnson_optimize\n }[self.method]\n with np.errstate(invalid='ignore'): # hide NaN warnings\n self.lambdas_ = np.array([optim_function(col) for col in X.T])\n\n if self.standardize or force_transform:\n transform_function = {'box-cox': boxcox,\n 'yeo-johnson': self._yeo_johnson_transform\n }[self.method]\n for i, lmbda in enumerate(self.lambdas_):\n with np.errstate(invalid='ignore'): # hide NaN warnings\n X[:, i] = transform_function(X[:, i], lmbda)\n\n if self.standardize:\n self._scaler = StandardScaler(copy=False)\n if force_transform:\n X = self._scaler.fit_transform(X)\n else:\n self._scaler.fit(X)\n\n return X\n\n def transform(self, X):\n \"\"\"Apply the power transform to each feature using the fitted lambdas.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data to be transformed using a power transformation.\n\n Returns\n -------\n X_trans : ndarray of shape (n_samples, n_features)\n The transformed data.\n \"\"\"\n check_is_fitted(self)\n X = self._check_input(X, in_fit=False, check_positive=True,\n check_shape=True)\n\n transform_function = {'box-cox': boxcox,\n 'yeo-johnson': self._yeo_johnson_transform\n }[self.method]\n for i, lmbda in enumerate(self.lambdas_):\n with np.errstate(invalid='ignore'): # hide NaN warnings\n X[:, i] = transform_function(X[:, i], lmbda)\n\n if self.standardize:\n X = self._scaler.transform(X)\n\n return X\n\n def inverse_transform(self, X):\n \"\"\"Apply the inverse power transformation using the fitted lambdas.\n\n The inverse of the Box-Cox transformation is given by::\n\n if lambda_ == 0:\n X = exp(X_trans)\n else:\n X = (X_trans * lambda_ + 1) ** (1 / lambda_)\n\n The inverse of the Yeo-Johnson transformation is given by::\n\n if X >= 0 and lambda_ == 0:\n X = exp(X_trans) - 1\n elif X >= 0 and lambda_ != 0:\n X = (X_trans * lambda_ + 1) ** (1 / lambda_) - 1\n elif X < 0 and lambda_ != 2:\n X = 1 - (-(2 - lambda_) * X_trans + 1) ** (1 / (2 - lambda_))\n elif X < 0 and lambda_ == 2:\n X = 1 - exp(-X_trans)\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The transformed data.\n\n Returns\n -------\n X : ndarray of shape (n_samples, n_features)\n The original data.\n \"\"\"\n check_is_fitted(self)\n X = self._check_input(X, in_fit=False, check_shape=True)\n\n if self.standardize:\n X = self._scaler.inverse_transform(X)\n\n inv_fun = {'box-cox': self._box_cox_inverse_tranform,\n 'yeo-johnson': self._yeo_johnson_inverse_transform\n }[self.method]\n for i, lmbda in enumerate(self.lambdas_):\n with np.errstate(invalid='ignore'): # hide NaN warnings\n X[:, i] = inv_fun(X[:, i], lmbda)\n\n return X\n\n def _box_cox_inverse_tranform(self, x, lmbda):\n \"\"\"Return inverse-transformed input x following Box-Cox inverse\n transform with parameter lambda.\n \"\"\"\n if lmbda == 0:\n x_inv = np.exp(x)\n else:\n x_inv = (x * lmbda + 1) ** (1 / lmbda)\n\n return x_inv\n\n def _yeo_johnson_inverse_transform(self, x, lmbda):\n \"\"\"Return inverse-transformed input x following Yeo-Johnson inverse\n transform with parameter lambda.\n \"\"\"\n x_inv = np.zeros_like(x)\n pos = x >= 0\n\n # when x >= 0\n if abs(lmbda) < np.spacing(1.):\n x_inv[pos] = np.exp(x[pos]) - 1\n else: # lmbda != 0\n x_inv[pos] = np.power(x[pos] * lmbda + 1, 1 / lmbda) - 1\n\n # when x < 0\n if abs(lmbda - 2) > np.spacing(1.):\n x_inv[~pos] = 1 - np.power(-(2 - lmbda) * x[~pos] + 1,\n 1 / (2 - lmbda))\n else: # lmbda == 2\n x_inv[~pos] = 1 - np.exp(-x[~pos])\n\n return x_inv\n\n def _yeo_johnson_transform(self, x, lmbda):\n \"\"\"Return transformed input x following Yeo-Johnson transform with\n parameter lambda.\n \"\"\"\n\n out = np.zeros_like(x)\n pos = x >= 0 # binary mask\n\n # when x >= 0\n if abs(lmbda) < np.spacing(1.):\n out[pos] = np.log1p(x[pos])\n else: # lmbda != 0\n out[pos] = (np.power(x[pos] + 1, lmbda) - 1) / lmbda\n\n # when x < 0\n if abs(lmbda - 2) > np.spacing(1.):\n out[~pos] = -(np.power(-x[~pos] + 1, 2 - lmbda) - 1) / (2 - lmbda)\n else: # lmbda == 2\n out[~pos] = -np.log1p(-x[~pos])\n\n return out\n\n def _box_cox_optimize(self, x):\n \"\"\"Find and return optimal lambda parameter of the Box-Cox transform by\n MLE, for observed data x.\n\n We here use scipy builtins which uses the brent optimizer.\n \"\"\"\n # the computation of lambda is influenced by NaNs so we need to\n # get rid of them\n _, lmbda = stats.boxcox(x[~np.isnan(x)], lmbda=None)\n\n return lmbda\n\n def _yeo_johnson_optimize(self, x):\n \"\"\"Find and return optimal lambda parameter of the Yeo-Johnson\n transform by MLE, for observed data x.\n\n Like for Box-Cox, MLE is done via the brent optimizer.\n \"\"\"\n\n def _neg_log_likelihood(lmbda):\n \"\"\"Return the negative log likelihood of the observed data x as a\n function of lambda.\"\"\"\n x_trans = self._yeo_johnson_transform(x, lmbda)\n n_samples = x.shape[0]\n\n loglike = -n_samples / 2 * np.log(x_trans.var())\n loglike += (lmbda - 1) * (np.sign(x) * np.log1p(np.abs(x))).sum()\n\n return -loglike\n\n # the computation of lambda is influenced by NaNs so we need to\n # get rid of them\n x = x[~np.isnan(x)]\n # choosing bracket -2, 2 like for boxcox\n return optimize.brent(_neg_log_likelihood, brack=(-2, 2))\n\n def _check_input(self, X, in_fit, check_positive=False, check_shape=False,\n check_method=False):\n \"\"\"Validate the input before fit and transform.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n in_fit : bool\n Whether or not `_check_input` is called from `fit` or other\n methods, e.g. `predict`, `transform`, etc.\n\n check_positive : bool, default=False\n If True, check that all data is positive and non-zero (only if\n ``self.method=='box-cox'``).\n\n check_shape : bool, default=False\n If True, check that n_features matches the length of self.lambdas_\n\n check_method : bool, default=False\n If True, check that the transformation method is valid.\n \"\"\"\n X = self._validate_data(X, ensure_2d=True, dtype=FLOAT_DTYPES,\n copy=self.copy, force_all_finite='allow-nan',\n reset=in_fit)\n\n with np.warnings.catch_warnings():\n np.warnings.filterwarnings(\n 'ignore', r'All-NaN (slice|axis) encountered')\n if (check_positive and self.method == 'box-cox' and\n np.nanmin(X) <= 0):\n raise ValueError(\"The Box-Cox transformation can only be \"\n \"applied to strictly positive data\")\n\n if check_shape and not X.shape[1] == len(self.lambdas_):\n raise ValueError(\"Input data has a different number of features \"\n \"than fitting data. Should have {n}, data has {m}\"\n .format(n=len(self.lambdas_), m=X.shape[1]))\n\n valid_methods = ('box-cox', 'yeo-johnson')\n if check_method and self.method not in valid_methods:\n raise ValueError(\"'method' must be one of {}, \"\n \"got {} instead.\"\n .format(valid_methods, self.method))\n\n return X\n\n def _more_tags(self):\n return {'allow_nan': True}", + "instance_attributes": [ + { + "name": "method", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "standardize", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "copy", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "lambdas_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "_scaler", + "types": { + "kind": "NamedType", + "name": "StandardScaler" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer", + "name": "QuantileTransformer", + "qname": "sklearn.preprocessing._data.QuantileTransformer", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/__init__", + "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_dense_fit", + "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_sparse_fit", + "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/fit", + "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_transform_col", + "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_check_inputs", + "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_transform", + "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/transform", + "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/inverse_transform", + "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Transform features using quantiles information.\n\nThis method transforms the features to follow a uniform or a normal\ndistribution. Therefore, for a given feature, this transformation tends\nto spread out the most frequent values. It also reduces the impact of\n(marginal) outliers: this is therefore a robust preprocessing scheme.\n\nThe transformation is applied on each feature independently. First an\nestimate of the cumulative distribution function of a feature is\nused to map the original values to a uniform distribution. The obtained\nvalues are then mapped to the desired output distribution using the\nassociated quantile function. Features values of new/unseen data that fall\nbelow or above the fitted range will be mapped to the bounds of the output\ndistribution. Note that this transform is non-linear. It may distort linear\ncorrelations between variables measured at the same scale but renders\nvariables measured at different scales more directly comparable.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.19", + "docstring": "Transform features using quantiles information.\n\nThis method transforms the features to follow a uniform or a normal\ndistribution. Therefore, for a given feature, this transformation tends\nto spread out the most frequent values. It also reduces the impact of\n(marginal) outliers: this is therefore a robust preprocessing scheme.\n\nThe transformation is applied on each feature independently. First an\nestimate of the cumulative distribution function of a feature is\nused to map the original values to a uniform distribution. The obtained\nvalues are then mapped to the desired output distribution using the\nassociated quantile function. Features values of new/unseen data that fall\nbelow or above the fitted range will be mapped to the bounds of the output\ndistribution. Note that this transform is non-linear. It may distort linear\ncorrelations between variables measured at the same scale but renders\nvariables measured at different scales more directly comparable.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.19\n\nParameters\n----------\nn_quantiles : int, default=1000 or n_samples\n Number of quantiles to be computed. It corresponds to the number\n of landmarks used to discretize the cumulative distribution function.\n If n_quantiles is larger than the number of samples, n_quantiles is set\n to the number of samples as a larger number of quantiles does not give\n a better approximation of the cumulative distribution function\n estimator.\n\noutput_distribution : {'uniform', 'normal'}, default='uniform'\n Marginal distribution for the transformed data. The choices are\n 'uniform' (default) or 'normal'.\n\nignore_implicit_zeros : bool, default=False\n Only applies to sparse matrices. If True, the sparse entries of the\n matrix are discarded to compute the quantile statistics. If False,\n these entries are treated as zeros.\n\nsubsample : int, default=1e5\n Maximum number of samples used to estimate the quantiles for\n computational efficiency. Note that the subsampling procedure may\n differ for value-identical sparse and dense matrices.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for subsampling and smoothing\n noise.\n Please see ``subsample`` for more details.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `\n\ncopy : bool, default=True\n Set to False to perform inplace transformation and avoid a copy (if the\n input is already a numpy array).\n\nAttributes\n----------\nn_quantiles_ : int\n The actual number of quantiles used to discretize the cumulative\n distribution function.\n\nquantiles_ : ndarray of shape (n_quantiles, n_features)\n The values corresponding the quantiles of reference.\n\nreferences_ : ndarray of shape (n_quantiles, )\n Quantiles of references.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import QuantileTransformer\n>>> rng = np.random.RandomState(0)\n>>> X = np.sort(rng.normal(loc=0.5, scale=0.25, size=(25, 1)), axis=0)\n>>> qt = QuantileTransformer(n_quantiles=10, random_state=0)\n>>> qt.fit_transform(X)\narray([...])\n\nSee Also\n--------\nquantile_transform : Equivalent function without the estimator API.\nPowerTransformer : Perform mapping to a normal distribution using a power\n transform.\nStandardScaler : Perform standardization that is faster, but less robust\n to outliers.\nRobustScaler : Perform robust standardization that removes the influence\n of outliers but does not put outliers and inliers on the same scale.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in fit, and maintained in\ntransform.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.", + "code": "class QuantileTransformer(TransformerMixin, BaseEstimator):\n \"\"\"Transform features using quantiles information.\n\n This method transforms the features to follow a uniform or a normal\n distribution. Therefore, for a given feature, this transformation tends\n to spread out the most frequent values. It also reduces the impact of\n (marginal) outliers: this is therefore a robust preprocessing scheme.\n\n The transformation is applied on each feature independently. First an\n estimate of the cumulative distribution function of a feature is\n used to map the original values to a uniform distribution. The obtained\n values are then mapped to the desired output distribution using the\n associated quantile function. Features values of new/unseen data that fall\n below or above the fitted range will be mapped to the bounds of the output\n distribution. Note that this transform is non-linear. It may distort linear\n correlations between variables measured at the same scale but renders\n variables measured at different scales more directly comparable.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.19\n\n Parameters\n ----------\n n_quantiles : int, default=1000 or n_samples\n Number of quantiles to be computed. It corresponds to the number\n of landmarks used to discretize the cumulative distribution function.\n If n_quantiles is larger than the number of samples, n_quantiles is set\n to the number of samples as a larger number of quantiles does not give\n a better approximation of the cumulative distribution function\n estimator.\n\n output_distribution : {'uniform', 'normal'}, default='uniform'\n Marginal distribution for the transformed data. The choices are\n 'uniform' (default) or 'normal'.\n\n ignore_implicit_zeros : bool, default=False\n Only applies to sparse matrices. If True, the sparse entries of the\n matrix are discarded to compute the quantile statistics. If False,\n these entries are treated as zeros.\n\n subsample : int, default=1e5\n Maximum number of samples used to estimate the quantiles for\n computational efficiency. Note that the subsampling procedure may\n differ for value-identical sparse and dense matrices.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for subsampling and smoothing\n noise.\n Please see ``subsample`` for more details.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `\n\n copy : bool, default=True\n Set to False to perform inplace transformation and avoid a copy (if the\n input is already a numpy array).\n\n Attributes\n ----------\n n_quantiles_ : int\n The actual number of quantiles used to discretize the cumulative\n distribution function.\n\n quantiles_ : ndarray of shape (n_quantiles, n_features)\n The values corresponding the quantiles of reference.\n\n references_ : ndarray of shape (n_quantiles, )\n Quantiles of references.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.preprocessing import QuantileTransformer\n >>> rng = np.random.RandomState(0)\n >>> X = np.sort(rng.normal(loc=0.5, scale=0.25, size=(25, 1)), axis=0)\n >>> qt = QuantileTransformer(n_quantiles=10, random_state=0)\n >>> qt.fit_transform(X)\n array([...])\n\n See Also\n --------\n quantile_transform : Equivalent function without the estimator API.\n PowerTransformer : Perform mapping to a normal distribution using a power\n transform.\n StandardScaler : Perform standardization that is faster, but less robust\n to outliers.\n RobustScaler : Perform robust standardization that removes the influence\n of outliers but does not put outliers and inliers on the same scale.\n\n Notes\n -----\n NaNs are treated as missing values: disregarded in fit, and maintained in\n transform.\n\n For a comparison of the different scalers, transformers, and normalizers,\n see :ref:`examples/preprocessing/plot_all_scaling.py\n `.\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, *, n_quantiles=1000, output_distribution='uniform',\n ignore_implicit_zeros=False, subsample=int(1e5),\n random_state=None, copy=True):\n self.n_quantiles = n_quantiles\n self.output_distribution = output_distribution\n self.ignore_implicit_zeros = ignore_implicit_zeros\n self.subsample = subsample\n self.random_state = random_state\n self.copy = copy\n\n def _dense_fit(self, X, random_state):\n \"\"\"Compute percentiles for dense matrices.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n The data used to scale along the features axis.\n \"\"\"\n if self.ignore_implicit_zeros:\n warnings.warn(\"'ignore_implicit_zeros' takes effect only with\"\n \" sparse matrix. This parameter has no effect.\")\n\n n_samples, n_features = X.shape\n references = self.references_ * 100\n\n self.quantiles_ = []\n for col in X.T:\n if self.subsample < n_samples:\n subsample_idx = random_state.choice(n_samples,\n size=self.subsample,\n replace=False)\n col = col.take(subsample_idx, mode='clip')\n self.quantiles_.append(np.nanpercentile(col, references))\n self.quantiles_ = np.transpose(self.quantiles_)\n # Due to floating-point precision error in `np.nanpercentile`,\n # make sure that quantiles are monotonically increasing.\n # Upstream issue in numpy:\n # https://github.com/numpy/numpy/issues/14685\n self.quantiles_ = np.maximum.accumulate(self.quantiles_)\n\n def _sparse_fit(self, X, random_state):\n \"\"\"Compute percentiles for sparse matrices.\n\n Parameters\n ----------\n X : sparse matrix of shape (n_samples, n_features)\n The data used to scale along the features axis. The sparse matrix\n needs to be nonnegative. If a sparse matrix is provided,\n it will be converted into a sparse ``csc_matrix``.\n \"\"\"\n n_samples, n_features = X.shape\n references = self.references_ * 100\n\n self.quantiles_ = []\n for feature_idx in range(n_features):\n column_nnz_data = X.data[X.indptr[feature_idx]:\n X.indptr[feature_idx + 1]]\n if len(column_nnz_data) > self.subsample:\n column_subsample = (self.subsample * len(column_nnz_data) //\n n_samples)\n if self.ignore_implicit_zeros:\n column_data = np.zeros(shape=column_subsample,\n dtype=X.dtype)\n else:\n column_data = np.zeros(shape=self.subsample, dtype=X.dtype)\n column_data[:column_subsample] = random_state.choice(\n column_nnz_data, size=column_subsample, replace=False)\n else:\n if self.ignore_implicit_zeros:\n column_data = np.zeros(shape=len(column_nnz_data),\n dtype=X.dtype)\n else:\n column_data = np.zeros(shape=n_samples, dtype=X.dtype)\n column_data[:len(column_nnz_data)] = column_nnz_data\n\n if not column_data.size:\n # if no nnz, an error will be raised for computing the\n # quantiles. Force the quantiles to be zeros.\n self.quantiles_.append([0] * len(references))\n else:\n self.quantiles_.append(\n np.nanpercentile(column_data, references))\n self.quantiles_ = np.transpose(self.quantiles_)\n # due to floating-point precision error in `np.nanpercentile`,\n # make sure the quantiles are monotonically increasing\n # Upstream issue in numpy:\n # https://github.com/numpy/numpy/issues/14685\n self.quantiles_ = np.maximum.accumulate(self.quantiles_)\n\n def fit(self, X, y=None):\n \"\"\"Compute the quantiles used for transforming.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the features axis. If a sparse\n matrix is provided, it will be converted into a sparse\n ``csc_matrix``. Additionally, the sparse matrix needs to be\n nonnegative if `ignore_implicit_zeros` is False.\n\n y : None\n Ignored.\n\n Returns\n -------\n self : object\n Fitted transformer.\n \"\"\"\n if self.n_quantiles <= 0:\n raise ValueError(\"Invalid value for 'n_quantiles': %d. \"\n \"The number of quantiles must be at least one.\"\n % self.n_quantiles)\n\n if self.subsample <= 0:\n raise ValueError(\"Invalid value for 'subsample': %d. \"\n \"The number of subsamples must be at least one.\"\n % self.subsample)\n\n if self.n_quantiles > self.subsample:\n raise ValueError(\"The number of quantiles cannot be greater than\"\n \" the number of samples used. Got {} quantiles\"\n \" and {} samples.\".format(self.n_quantiles,\n self.subsample))\n\n X = self._check_inputs(X, in_fit=True, copy=False)\n n_samples = X.shape[0]\n\n if self.n_quantiles > n_samples:\n warnings.warn(\"n_quantiles (%s) is greater than the total number \"\n \"of samples (%s). n_quantiles is set to \"\n \"n_samples.\"\n % (self.n_quantiles, n_samples))\n self.n_quantiles_ = max(1, min(self.n_quantiles, n_samples))\n\n rng = check_random_state(self.random_state)\n\n # Create the quantiles of reference\n self.references_ = np.linspace(0, 1, self.n_quantiles_,\n endpoint=True)\n if sparse.issparse(X):\n self._sparse_fit(X, rng)\n else:\n self._dense_fit(X, rng)\n\n return self\n\n def _transform_col(self, X_col, quantiles, inverse):\n \"\"\"Private function to transform a single feature.\"\"\"\n\n output_distribution = self.output_distribution\n\n if not inverse:\n lower_bound_x = quantiles[0]\n upper_bound_x = quantiles[-1]\n lower_bound_y = 0\n upper_bound_y = 1\n else:\n lower_bound_x = 0\n upper_bound_x = 1\n lower_bound_y = quantiles[0]\n upper_bound_y = quantiles[-1]\n # for inverse transform, match a uniform distribution\n with np.errstate(invalid='ignore'): # hide NaN comparison warnings\n if output_distribution == 'normal':\n X_col = stats.norm.cdf(X_col)\n # else output distribution is already a uniform distribution\n\n # find index for lower and higher bounds\n with np.errstate(invalid='ignore'): # hide NaN comparison warnings\n if output_distribution == 'normal':\n lower_bounds_idx = (X_col - BOUNDS_THRESHOLD <\n lower_bound_x)\n upper_bounds_idx = (X_col + BOUNDS_THRESHOLD >\n upper_bound_x)\n if output_distribution == 'uniform':\n lower_bounds_idx = (X_col == lower_bound_x)\n upper_bounds_idx = (X_col == upper_bound_x)\n\n isfinite_mask = ~np.isnan(X_col)\n X_col_finite = X_col[isfinite_mask]\n if not inverse:\n # Interpolate in one direction and in the other and take the\n # mean. This is in case of repeated values in the features\n # and hence repeated quantiles\n #\n # If we don't do this, only one extreme of the duplicated is\n # used (the upper when we do ascending, and the\n # lower for descending). We take the mean of these two\n X_col[isfinite_mask] = .5 * (\n np.interp(X_col_finite, quantiles, self.references_)\n - np.interp(-X_col_finite, -quantiles[::-1],\n -self.references_[::-1]))\n else:\n X_col[isfinite_mask] = np.interp(X_col_finite,\n self.references_, quantiles)\n\n X_col[upper_bounds_idx] = upper_bound_y\n X_col[lower_bounds_idx] = lower_bound_y\n # for forward transform, match the output distribution\n if not inverse:\n with np.errstate(invalid='ignore'): # hide NaN comparison warnings\n if output_distribution == 'normal':\n X_col = stats.norm.ppf(X_col)\n # find the value to clip the data to avoid mapping to\n # infinity. Clip such that the inverse transform will be\n # consistent\n clip_min = stats.norm.ppf(BOUNDS_THRESHOLD - np.spacing(1))\n clip_max = stats.norm.ppf(1 - (BOUNDS_THRESHOLD -\n np.spacing(1)))\n X_col = np.clip(X_col, clip_min, clip_max)\n # else output distribution is uniform and the ppf is the\n # identity function so we let X_col unchanged\n\n return X_col\n\n def _check_inputs(self, X, in_fit, accept_sparse_negative=False,\n copy=False):\n \"\"\"Check inputs before fit and transform.\"\"\"\n X = self._validate_data(X, reset=in_fit,\n accept_sparse='csc', copy=copy,\n dtype=FLOAT_DTYPES,\n force_all_finite='allow-nan')\n # we only accept positive sparse matrix when ignore_implicit_zeros is\n # false and that we call fit or transform.\n with np.errstate(invalid='ignore'): # hide NaN comparison warnings\n if (not accept_sparse_negative and not self.ignore_implicit_zeros\n and (sparse.issparse(X) and np.any(X.data < 0))):\n raise ValueError('QuantileTransformer only accepts'\n ' non-negative sparse matrices.')\n\n # check the output distribution\n if self.output_distribution not in ('normal', 'uniform'):\n raise ValueError(\"'output_distribution' has to be either 'normal'\"\n \" or 'uniform'. Got '{}' instead.\".format(\n self.output_distribution))\n\n return X\n\n def _transform(self, X, inverse=False):\n \"\"\"Forward and inverse transform.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n The data used to scale along the features axis.\n\n inverse : bool, default=False\n If False, apply forward transform. If True, apply\n inverse transform.\n\n Returns\n -------\n X : ndarray of shape (n_samples, n_features)\n Projected data.\n \"\"\"\n\n if sparse.issparse(X):\n for feature_idx in range(X.shape[1]):\n column_slice = slice(X.indptr[feature_idx],\n X.indptr[feature_idx + 1])\n X.data[column_slice] = self._transform_col(\n X.data[column_slice], self.quantiles_[:, feature_idx],\n inverse)\n else:\n for feature_idx in range(X.shape[1]):\n X[:, feature_idx] = self._transform_col(\n X[:, feature_idx], self.quantiles_[:, feature_idx],\n inverse)\n\n return X\n\n def transform(self, X):\n \"\"\"Feature-wise transformation of the data.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the features axis. If a sparse\n matrix is provided, it will be converted into a sparse\n ``csc_matrix``. Additionally, the sparse matrix needs to be\n nonnegative if `ignore_implicit_zeros` is False.\n\n Returns\n -------\n Xt : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The projected data.\n \"\"\"\n check_is_fitted(self)\n X = self._check_inputs(X, in_fit=False, copy=self.copy)\n\n return self._transform(X, inverse=False)\n\n def inverse_transform(self, X):\n \"\"\"Back-projection to the original space.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the features axis. If a sparse\n matrix is provided, it will be converted into a sparse\n ``csc_matrix``. Additionally, the sparse matrix needs to be\n nonnegative if `ignore_implicit_zeros` is False.\n\n Returns\n -------\n Xt : {ndarray, sparse matrix} of (n_samples, n_features)\n The projected data.\n \"\"\"\n check_is_fitted(self)\n X = self._check_inputs(X, in_fit=False, accept_sparse_negative=True,\n copy=self.copy)\n\n return self._transform(X, inverse=True)\n\n def _more_tags(self):\n return {'allow_nan': True}", + "instance_attributes": [ + { + "name": "n_quantiles", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "output_distribution", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "ignore_implicit_zeros", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "subsample", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "copy", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "quantiles_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "references_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/RobustScaler", + "name": "RobustScaler", + "qname": "sklearn.preprocessing._data.RobustScaler", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.preprocessing._data/RobustScaler/__init__", + "scikit-learn/sklearn.preprocessing._data/RobustScaler/fit", + "scikit-learn/sklearn.preprocessing._data/RobustScaler/transform", + "scikit-learn/sklearn.preprocessing._data/RobustScaler/inverse_transform", + "scikit-learn/sklearn.preprocessing._data/RobustScaler/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Scale features using statistics that are robust to outliers.\n\nThis Scaler removes the median and scales the data according to\nthe quantile range (defaults to IQR: Interquartile Range).\nThe IQR is the range between the 1st quartile (25th quantile)\nand the 3rd quartile (75th quantile).\n\nCentering and scaling happen independently on each feature by\ncomputing the relevant statistics on the samples in the training\nset. Median and interquartile range are then stored to be used on\nlater data using the ``transform`` method.\n\nStandardization of a dataset is a common requirement for many\nmachine learning estimators. Typically this is done by removing the mean\nand scaling to unit variance. However, outliers can often influence the\nsample mean / variance in a negative way. In such cases, the median and\nthe interquartile range often give better results.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide `.", + "docstring": "Scale features using statistics that are robust to outliers.\n\nThis Scaler removes the median and scales the data according to\nthe quantile range (defaults to IQR: Interquartile Range).\nThe IQR is the range between the 1st quartile (25th quantile)\nand the 3rd quartile (75th quantile).\n\nCentering and scaling happen independently on each feature by\ncomputing the relevant statistics on the samples in the training\nset. Median and interquartile range are then stored to be used on\nlater data using the ``transform`` method.\n\nStandardization of a dataset is a common requirement for many\nmachine learning estimators. Typically this is done by removing the mean\nand scaling to unit variance. However, outliers can often influence the\nsample mean / variance in a negative way. In such cases, the median and\nthe interquartile range often give better results.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nwith_centering : bool, default=True\n If True, center the data before scaling.\n This will cause ``transform`` to raise an exception when attempted on\n sparse matrices, because centering them entails building a dense\n matrix which in common use cases is likely to be too large to fit in\n memory.\n\nwith_scaling : bool, default=True\n If True, scale the data to interquartile range.\n\nquantile_range : tuple (q_min, q_max), 0.0 < q_min < q_max < 100.0, default=(25.0, 75.0), == (1st quantile, 3rd quantile), == IQR\n Quantile range used to calculate ``scale_``.\n\n .. versionadded:: 0.18\n\ncopy : bool, default=True\n If False, try to avoid a copy and do inplace scaling instead.\n This is not guaranteed to always work inplace; e.g. if the data is\n not a NumPy array or scipy.sparse CSR matrix, a copy may still be\n returned.\n\nunit_variance : bool, default=False\n If True, scale data so that normally distributed features have a\n variance of 1. In general, if the difference between the x-values of\n ``q_max`` and ``q_min`` for a standard normal distribution is greater\n than 1, the dataset will be scaled down. If less than 1, the dataset\n will be scaled up.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncenter_ : array of floats\n The median value for each feature in the training set.\n\nscale_ : array of floats\n The (scaled) interquartile range for each feature in the training set.\n\n .. versionadded:: 0.17\n *scale_* attribute.\n\nExamples\n--------\n>>> from sklearn.preprocessing import RobustScaler\n>>> X = [[ 1., -2., 2.],\n... [ -2., 1., 3.],\n... [ 4., 1., -2.]]\n>>> transformer = RobustScaler().fit(X)\n>>> transformer\nRobustScaler()\n>>> transformer.transform(X)\narray([[ 0. , -2. , 0. ],\n [-1. , 0. , 0.4],\n [ 1. , 0. , -1.6]])\n\nSee Also\n--------\nrobust_scale : Equivalent function without the estimator API.\n\n:class:`~sklearn.decomposition.PCA`\n Further removes the linear correlation across features with\n 'whiten=True'.\n\nNotes\n-----\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.\n\nhttps://en.wikipedia.org/wiki/Median\nhttps://en.wikipedia.org/wiki/Interquartile_range", + "code": "class RobustScaler(TransformerMixin, BaseEstimator):\n \"\"\"Scale features using statistics that are robust to outliers.\n\n This Scaler removes the median and scales the data according to\n the quantile range (defaults to IQR: Interquartile Range).\n The IQR is the range between the 1st quartile (25th quantile)\n and the 3rd quartile (75th quantile).\n\n Centering and scaling happen independently on each feature by\n computing the relevant statistics on the samples in the training\n set. Median and interquartile range are then stored to be used on\n later data using the ``transform`` method.\n\n Standardization of a dataset is a common requirement for many\n machine learning estimators. Typically this is done by removing the mean\n and scaling to unit variance. However, outliers can often influence the\n sample mean / variance in a negative way. In such cases, the median and\n the interquartile range often give better results.\n\n .. versionadded:: 0.17\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n with_centering : bool, default=True\n If True, center the data before scaling.\n This will cause ``transform`` to raise an exception when attempted on\n sparse matrices, because centering them entails building a dense\n matrix which in common use cases is likely to be too large to fit in\n memory.\n\n with_scaling : bool, default=True\n If True, scale the data to interquartile range.\n\n quantile_range : tuple (q_min, q_max), 0.0 < q_min < q_max < 100.0, \\\n default=(25.0, 75.0), == (1st quantile, 3rd quantile), == IQR\n Quantile range used to calculate ``scale_``.\n\n .. versionadded:: 0.18\n\n copy : bool, default=True\n If False, try to avoid a copy and do inplace scaling instead.\n This is not guaranteed to always work inplace; e.g. if the data is\n not a NumPy array or scipy.sparse CSR matrix, a copy may still be\n returned.\n\n unit_variance : bool, default=False\n If True, scale data so that normally distributed features have a\n variance of 1. In general, if the difference between the x-values of\n ``q_max`` and ``q_min`` for a standard normal distribution is greater\n than 1, the dataset will be scaled down. If less than 1, the dataset\n will be scaled up.\n\n .. versionadded:: 0.24\n\n Attributes\n ----------\n center_ : array of floats\n The median value for each feature in the training set.\n\n scale_ : array of floats\n The (scaled) interquartile range for each feature in the training set.\n\n .. versionadded:: 0.17\n *scale_* attribute.\n\n Examples\n --------\n >>> from sklearn.preprocessing import RobustScaler\n >>> X = [[ 1., -2., 2.],\n ... [ -2., 1., 3.],\n ... [ 4., 1., -2.]]\n >>> transformer = RobustScaler().fit(X)\n >>> transformer\n RobustScaler()\n >>> transformer.transform(X)\n array([[ 0. , -2. , 0. ],\n [-1. , 0. , 0.4],\n [ 1. , 0. , -1.6]])\n\n See Also\n --------\n robust_scale : Equivalent function without the estimator API.\n\n :class:`~sklearn.decomposition.PCA`\n Further removes the linear correlation across features with\n 'whiten=True'.\n\n Notes\n -----\n For a comparison of the different scalers, transformers, and normalizers,\n see :ref:`examples/preprocessing/plot_all_scaling.py\n `.\n\n https://en.wikipedia.org/wiki/Median\n https://en.wikipedia.org/wiki/Interquartile_range\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *, with_centering=True, with_scaling=True,\n quantile_range=(25.0, 75.0), copy=True, unit_variance=False):\n self.with_centering = with_centering\n self.with_scaling = with_scaling\n self.quantile_range = quantile_range\n self.unit_variance = unit_variance\n self.copy = copy\n\n def fit(self, X, y=None):\n \"\"\"Compute the median and quantiles to be used for scaling.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the median and quantiles\n used for later scaling along the features axis.\n\n y : None\n Ignored.\n\n Returns\n -------\n self : object\n Fitted scaler.\n \"\"\"\n # at fit, convert sparse matrices to csc for optimized computation of\n # the quantiles\n X = self._validate_data(X, accept_sparse='csc', estimator=self,\n dtype=FLOAT_DTYPES,\n force_all_finite='allow-nan')\n\n q_min, q_max = self.quantile_range\n if not 0 <= q_min <= q_max <= 100:\n raise ValueError(\"Invalid quantile range: %s\" %\n str(self.quantile_range))\n\n if self.with_centering:\n if sparse.issparse(X):\n raise ValueError(\n \"Cannot center sparse matrices: use `with_centering=False`\"\n \" instead. See docstring for motivation and alternatives.\")\n self.center_ = np.nanmedian(X, axis=0)\n else:\n self.center_ = None\n\n if self.with_scaling:\n quantiles = []\n for feature_idx in range(X.shape[1]):\n if sparse.issparse(X):\n column_nnz_data = X.data[X.indptr[feature_idx]:\n X.indptr[feature_idx + 1]]\n column_data = np.zeros(shape=X.shape[0], dtype=X.dtype)\n column_data[:len(column_nnz_data)] = column_nnz_data\n else:\n column_data = X[:, feature_idx]\n\n quantiles.append(np.nanpercentile(column_data,\n self.quantile_range))\n\n quantiles = np.transpose(quantiles)\n\n self.scale_ = quantiles[1] - quantiles[0]\n self.scale_ = _handle_zeros_in_scale(self.scale_, copy=False)\n if self.unit_variance:\n adjust = (stats.norm.ppf(q_max / 100.0) -\n stats.norm.ppf(q_min / 100.0))\n self.scale_ = self.scale_ / adjust\n else:\n self.scale_ = None\n\n return self\n\n def transform(self, X):\n \"\"\"Center and scale the data.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the specified axis.\n\n Returns\n -------\n X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array.\n \"\"\"\n check_is_fitted(self)\n X = self._validate_data(X, accept_sparse=('csr', 'csc'),\n copy=self.copy, estimator=self,\n dtype=FLOAT_DTYPES, reset=False,\n force_all_finite='allow-nan')\n\n if sparse.issparse(X):\n if self.with_scaling:\n inplace_column_scale(X, 1.0 / self.scale_)\n else:\n if self.with_centering:\n X -= self.center_\n if self.with_scaling:\n X /= self.scale_\n return X\n\n def inverse_transform(self, X):\n \"\"\"Scale back the data to the original representation\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The rescaled data to be transformed back.\n\n Returns\n -------\n X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,\n estimator=self, dtype=FLOAT_DTYPES,\n force_all_finite='allow-nan')\n\n if sparse.issparse(X):\n if self.with_scaling:\n inplace_column_scale(X, self.scale_)\n else:\n if self.with_scaling:\n X *= self.scale_\n if self.with_centering:\n X += self.center_\n return X\n\n def _more_tags(self):\n return {'allow_nan': True}", + "instance_attributes": [ + { + "name": "with_centering", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "with_scaling", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "quantile_range", + "types": { + "kind": "NamedType", + "name": "tuple" + } + }, + { + "name": "unit_variance", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "copy", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler", + "name": "StandardScaler", + "qname": "sklearn.preprocessing._data.StandardScaler", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.preprocessing._data/StandardScaler/__init__", + "scikit-learn/sklearn.preprocessing._data/StandardScaler/_reset", + "scikit-learn/sklearn.preprocessing._data/StandardScaler/fit", + "scikit-learn/sklearn.preprocessing._data/StandardScaler/partial_fit", + "scikit-learn/sklearn.preprocessing._data/StandardScaler/transform", + "scikit-learn/sklearn.preprocessing._data/StandardScaler/inverse_transform", + "scikit-learn/sklearn.preprocessing._data/StandardScaler/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Standardize features by removing the mean and scaling to unit variance\n\nThe standard score of a sample `x` is calculated as:\n\n z = (x - u) / s\n\nwhere `u` is the mean of the training samples or zero if `with_mean=False`,\nand `s` is the standard deviation of the training samples or one if\n`with_std=False`.\n\nCentering and scaling happen independently on each feature by computing\nthe relevant statistics on the samples in the training set. Mean and\nstandard deviation are then stored to be used on later data using\n:meth:`transform`.\n\nStandardization of a dataset is a common requirement for many\nmachine learning estimators: they might behave badly if the\nindividual features do not more or less look like standard normally\ndistributed data (e.g. Gaussian with 0 mean and unit variance).\n\nFor instance many elements used in the objective function of\na learning algorithm (such as the RBF kernel of Support Vector\nMachines or the L1 and L2 regularizers of linear models) assume that\nall features are centered around 0 and have variance in the same\norder. If a feature has a variance that is orders of magnitude larger\nthat others, it might dominate the objective function and make the\nestimator unable to learn from other features correctly as expected.\n\nThis scaler can also be applied to sparse CSR or CSC matrices by passing\n`with_mean=False` to avoid breaking the sparsity structure of the data.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Standardize features by removing the mean and scaling to unit variance\n\nThe standard score of a sample `x` is calculated as:\n\n z = (x - u) / s\n\nwhere `u` is the mean of the training samples or zero if `with_mean=False`,\nand `s` is the standard deviation of the training samples or one if\n`with_std=False`.\n\nCentering and scaling happen independently on each feature by computing\nthe relevant statistics on the samples in the training set. Mean and\nstandard deviation are then stored to be used on later data using\n:meth:`transform`.\n\nStandardization of a dataset is a common requirement for many\nmachine learning estimators: they might behave badly if the\nindividual features do not more or less look like standard normally\ndistributed data (e.g. Gaussian with 0 mean and unit variance).\n\nFor instance many elements used in the objective function of\na learning algorithm (such as the RBF kernel of Support Vector\nMachines or the L1 and L2 regularizers of linear models) assume that\nall features are centered around 0 and have variance in the same\norder. If a feature has a variance that is orders of magnitude larger\nthat others, it might dominate the objective function and make the\nestimator unable to learn from other features correctly as expected.\n\nThis scaler can also be applied to sparse CSR or CSC matrices by passing\n`with_mean=False` to avoid breaking the sparsity structure of the data.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncopy : bool, default=True\n If False, try to avoid a copy and do inplace scaling instead.\n This is not guaranteed to always work inplace; e.g. if the data is\n not a NumPy array or scipy.sparse CSR matrix, a copy may still be\n returned.\n\nwith_mean : bool, default=True\n If True, center the data before scaling.\n This does not work (and will raise an exception) when attempted on\n sparse matrices, because centering them entails building a dense\n matrix which in common use cases is likely to be too large to fit in\n memory.\n\nwith_std : bool, default=True\n If True, scale the data to unit variance (or equivalently,\n unit standard deviation).\n\nAttributes\n----------\nscale_ : ndarray of shape (n_features,) or None\n Per feature relative scaling of the data to achieve zero mean and unit\n variance. Generally this is calculated using `np.sqrt(var_)`. If a\n variance is zero, we can't achieve unit variance, and the data is left\n as-is, giving a scaling factor of 1. `scale_` is equal to `None`\n when `with_std=False`.\n\n .. versionadded:: 0.17\n *scale_*\n\nmean_ : ndarray of shape (n_features,) or None\n The mean value for each feature in the training set.\n Equal to ``None`` when ``with_mean=False``.\n\nvar_ : ndarray of shape (n_features,) or None\n The variance for each feature in the training set. Used to compute\n `scale_`. Equal to ``None`` when ``with_std=False``.\n\nn_samples_seen_ : int or ndarray of shape (n_features,)\n The number of samples processed by the estimator for each feature.\n If there are no missing samples, the ``n_samples_seen`` will be an\n integer, otherwise it will be an array of dtype int. If\n `sample_weights` are used it will be a float (if no missing data)\n or an array of dtype float that sums the weights seen so far.\n Will be reset on new calls to fit, but increments across\n ``partial_fit`` calls.\n\nExamples\n--------\n>>> from sklearn.preprocessing import StandardScaler\n>>> data = [[0, 0], [0, 0], [1, 1], [1, 1]]\n>>> scaler = StandardScaler()\n>>> print(scaler.fit(data))\nStandardScaler()\n>>> print(scaler.mean_)\n[0.5 0.5]\n>>> print(scaler.transform(data))\n[[-1. -1.]\n [-1. -1.]\n [ 1. 1.]\n [ 1. 1.]]\n>>> print(scaler.transform([[2, 2]]))\n[[3. 3.]]\n\nSee Also\n--------\nscale : Equivalent function without the estimator API.\n\n:class:`~sklearn.decomposition.PCA` : Further removes the linear\n correlation across features with 'whiten=True'.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in fit, and maintained in\ntransform.\n\nWe use a biased estimator for the standard deviation, equivalent to\n`numpy.std(x, ddof=0)`. Note that the choice of `ddof` is unlikely to\naffect model performance.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.", + "code": "class StandardScaler(TransformerMixin, BaseEstimator):\n \"\"\"Standardize features by removing the mean and scaling to unit variance\n\n The standard score of a sample `x` is calculated as:\n\n z = (x - u) / s\n\n where `u` is the mean of the training samples or zero if `with_mean=False`,\n and `s` is the standard deviation of the training samples or one if\n `with_std=False`.\n\n Centering and scaling happen independently on each feature by computing\n the relevant statistics on the samples in the training set. Mean and\n standard deviation are then stored to be used on later data using\n :meth:`transform`.\n\n Standardization of a dataset is a common requirement for many\n machine learning estimators: they might behave badly if the\n individual features do not more or less look like standard normally\n distributed data (e.g. Gaussian with 0 mean and unit variance).\n\n For instance many elements used in the objective function of\n a learning algorithm (such as the RBF kernel of Support Vector\n Machines or the L1 and L2 regularizers of linear models) assume that\n all features are centered around 0 and have variance in the same\n order. If a feature has a variance that is orders of magnitude larger\n that others, it might dominate the objective function and make the\n estimator unable to learn from other features correctly as expected.\n\n This scaler can also be applied to sparse CSR or CSC matrices by passing\n `with_mean=False` to avoid breaking the sparsity structure of the data.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n copy : bool, default=True\n If False, try to avoid a copy and do inplace scaling instead.\n This is not guaranteed to always work inplace; e.g. if the data is\n not a NumPy array or scipy.sparse CSR matrix, a copy may still be\n returned.\n\n with_mean : bool, default=True\n If True, center the data before scaling.\n This does not work (and will raise an exception) when attempted on\n sparse matrices, because centering them entails building a dense\n matrix which in common use cases is likely to be too large to fit in\n memory.\n\n with_std : bool, default=True\n If True, scale the data to unit variance (or equivalently,\n unit standard deviation).\n\n Attributes\n ----------\n scale_ : ndarray of shape (n_features,) or None\n Per feature relative scaling of the data to achieve zero mean and unit\n variance. Generally this is calculated using `np.sqrt(var_)`. If a\n variance is zero, we can't achieve unit variance, and the data is left\n as-is, giving a scaling factor of 1. `scale_` is equal to `None`\n when `with_std=False`.\n\n .. versionadded:: 0.17\n *scale_*\n\n mean_ : ndarray of shape (n_features,) or None\n The mean value for each feature in the training set.\n Equal to ``None`` when ``with_mean=False``.\n\n var_ : ndarray of shape (n_features,) or None\n The variance for each feature in the training set. Used to compute\n `scale_`. Equal to ``None`` when ``with_std=False``.\n\n n_samples_seen_ : int or ndarray of shape (n_features,)\n The number of samples processed by the estimator for each feature.\n If there are no missing samples, the ``n_samples_seen`` will be an\n integer, otherwise it will be an array of dtype int. If\n `sample_weights` are used it will be a float (if no missing data)\n or an array of dtype float that sums the weights seen so far.\n Will be reset on new calls to fit, but increments across\n ``partial_fit`` calls.\n\n Examples\n --------\n >>> from sklearn.preprocessing import StandardScaler\n >>> data = [[0, 0], [0, 0], [1, 1], [1, 1]]\n >>> scaler = StandardScaler()\n >>> print(scaler.fit(data))\n StandardScaler()\n >>> print(scaler.mean_)\n [0.5 0.5]\n >>> print(scaler.transform(data))\n [[-1. -1.]\n [-1. -1.]\n [ 1. 1.]\n [ 1. 1.]]\n >>> print(scaler.transform([[2, 2]]))\n [[3. 3.]]\n\n See Also\n --------\n scale : Equivalent function without the estimator API.\n\n :class:`~sklearn.decomposition.PCA` : Further removes the linear\n correlation across features with 'whiten=True'.\n\n Notes\n -----\n NaNs are treated as missing values: disregarded in fit, and maintained in\n transform.\n\n We use a biased estimator for the standard deviation, equivalent to\n `numpy.std(x, ddof=0)`. Note that the choice of `ddof` is unlikely to\n affect model performance.\n\n For a comparison of the different scalers, transformers, and normalizers,\n see :ref:`examples/preprocessing/plot_all_scaling.py\n `.\n \"\"\" # noqa\n\n @_deprecate_positional_args\n def __init__(self, *, copy=True, with_mean=True, with_std=True):\n self.with_mean = with_mean\n self.with_std = with_std\n self.copy = copy\n\n def _reset(self):\n \"\"\"Reset internal data-dependent state of the scaler, if necessary.\n\n __init__ parameters are not touched.\n \"\"\"\n\n # Checking one attribute is enough, becase they are all set together\n # in partial_fit\n if hasattr(self, 'scale_'):\n del self.scale_\n del self.n_samples_seen_\n del self.mean_\n del self.var_\n\n def fit(self, X, y=None, sample_weight=None):\n \"\"\"Compute the mean and std to be used for later scaling.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the mean and standard deviation\n used for later scaling along the features axis.\n\n y : None\n Ignored.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Individual weights for each sample.\n\n .. versionadded:: 0.24\n parameter *sample_weight* support to StandardScaler.\n\n Returns\n -------\n self : object\n Fitted scaler.\n \"\"\"\n\n # Reset internal state before fitting\n self._reset()\n return self.partial_fit(X, y, sample_weight)\n\n def partial_fit(self, X, y=None, sample_weight=None):\n \"\"\"\n Online computation of mean and std on X for later scaling.\n\n All of X is processed as a single batch. This is intended for cases\n when :meth:`fit` is not feasible due to very large number of\n `n_samples` or because X is read from a continuous stream.\n\n The algorithm for incremental mean and std is given in Equation 1.5a,b\n in Chan, Tony F., Gene H. Golub, and Randall J. LeVeque. \"Algorithms\n for computing the sample variance: Analysis and recommendations.\"\n The American Statistician 37.3 (1983): 242-247:\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the mean and standard deviation\n used for later scaling along the features axis.\n\n y : None\n Ignored.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Individual weights for each sample.\n\n .. versionadded:: 0.24\n parameter *sample_weight* support to StandardScaler.\n\n Returns\n -------\n self : object\n Fitted scaler.\n \"\"\"\n first_call = not hasattr(self, \"n_samples_seen_\")\n X = self._validate_data(X, accept_sparse=('csr', 'csc'),\n estimator=self, dtype=FLOAT_DTYPES,\n force_all_finite='allow-nan', reset=first_call)\n n_features = X.shape[1]\n\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X,\n dtype=X.dtype)\n\n # Even in the case of `with_mean=False`, we update the mean anyway\n # This is needed for the incremental computation of the var\n # See incr_mean_variance_axis and _incremental_mean_variance_axis\n\n # if n_samples_seen_ is an integer (i.e. no missing values), we need to\n # transform it to a NumPy array of shape (n_features,) required by\n # incr_mean_variance_axis and _incremental_variance_axis\n dtype = np.int64 if sample_weight is None else X.dtype\n if not hasattr(self, 'n_samples_seen_'):\n self.n_samples_seen_ = np.zeros(n_features, dtype=dtype)\n elif np.size(self.n_samples_seen_) == 1:\n self.n_samples_seen_ = np.repeat(\n self.n_samples_seen_, X.shape[1])\n self.n_samples_seen_ = \\\n self.n_samples_seen_.astype(dtype, copy=False)\n\n if sparse.issparse(X):\n if self.with_mean:\n raise ValueError(\n \"Cannot center sparse matrices: pass `with_mean=False` \"\n \"instead. See docstring for motivation and alternatives.\")\n sparse_constructor = (sparse.csr_matrix\n if X.format == 'csr' else sparse.csc_matrix)\n\n if self.with_std:\n # First pass\n if not hasattr(self, 'scale_'):\n self.mean_, self.var_, self.n_samples_seen_ = \\\n mean_variance_axis(X, axis=0, weights=sample_weight,\n return_sum_weights=True)\n # Next passes\n else:\n self.mean_, self.var_, self.n_samples_seen_ = \\\n incr_mean_variance_axis(X, axis=0,\n last_mean=self.mean_,\n last_var=self.var_,\n last_n=self.n_samples_seen_,\n weights=sample_weight)\n # We force the mean and variance to float64 for large arrays\n # See https://github.com/scikit-learn/scikit-learn/pull/12338\n self.mean_ = self.mean_.astype(np.float64, copy=False)\n self.var_ = self.var_.astype(np.float64, copy=False)\n else:\n self.mean_ = None # as with_mean must be False for sparse\n self.var_ = None\n weights = _check_sample_weight(sample_weight, X)\n sum_weights_nan = weights @ sparse_constructor(\n (np.isnan(X.data), X.indices, X.indptr),\n shape=X.shape)\n self.n_samples_seen_ += (\n (np.sum(weights) - sum_weights_nan).astype(dtype)\n )\n else:\n # First pass\n if not hasattr(self, 'scale_'):\n self.mean_ = .0\n if self.with_std:\n self.var_ = .0\n else:\n self.var_ = None\n\n if not self.with_mean and not self.with_std:\n self.mean_ = None\n self.var_ = None\n self.n_samples_seen_ += X.shape[0] - np.isnan(X).sum(axis=0)\n\n elif sample_weight is not None:\n self.mean_, self.var_, self.n_samples_seen_ = \\\n _incremental_weighted_mean_and_var(X, sample_weight,\n self.mean_,\n self.var_,\n self.n_samples_seen_)\n else:\n self.mean_, self.var_, self.n_samples_seen_ = \\\n _incremental_mean_and_var(X, self.mean_, self.var_,\n self.n_samples_seen_)\n\n # for backward-compatibility, reduce n_samples_seen_ to an integer\n # if the number of samples is the same for each feature (i.e. no\n # missing values)\n if np.ptp(self.n_samples_seen_) == 0:\n self.n_samples_seen_ = self.n_samples_seen_[0]\n\n if self.with_std:\n self.scale_ = _handle_zeros_in_scale(np.sqrt(self.var_))\n else:\n self.scale_ = None\n\n return self\n\n def transform(self, X, copy=None):\n \"\"\"Perform standardization by centering and scaling\n\n Parameters\n ----------\n X : {array-like, sparse matrix of shape (n_samples, n_features)\n The data used to scale along the features axis.\n copy : bool, default=None\n Copy the input X or not.\n\n Returns\n -------\n X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array.\n \"\"\"\n check_is_fitted(self)\n\n copy = copy if copy is not None else self.copy\n X = self._validate_data(X, reset=False,\n accept_sparse='csr', copy=copy,\n estimator=self, dtype=FLOAT_DTYPES,\n force_all_finite='allow-nan')\n\n if sparse.issparse(X):\n if self.with_mean:\n raise ValueError(\n \"Cannot center sparse matrices: pass `with_mean=False` \"\n \"instead. See docstring for motivation and alternatives.\")\n if self.scale_ is not None:\n inplace_column_scale(X, 1 / self.scale_)\n else:\n if self.with_mean:\n X -= self.mean_\n if self.with_std:\n X /= self.scale_\n return X\n\n def inverse_transform(self, X, copy=None):\n \"\"\"Scale back the data to the original representation\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the features axis.\n copy : bool, default=None\n Copy the input X or not.\n\n Returns\n -------\n X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array.\n \"\"\"\n check_is_fitted(self)\n\n copy = copy if copy is not None else self.copy\n if sparse.issparse(X):\n if self.with_mean:\n raise ValueError(\n \"Cannot uncenter sparse matrices: pass `with_mean=False` \"\n \"instead See docstring for motivation and alternatives.\")\n if not sparse.isspmatrix_csr(X):\n X = X.tocsr()\n copy = False\n if copy:\n X = X.copy()\n if self.scale_ is not None:\n inplace_column_scale(X, self.scale_)\n else:\n X = np.asarray(X)\n if copy:\n X = X.copy()\n if self.with_std:\n X *= self.scale_\n if self.with_mean:\n X += self.mean_\n return X\n\n def _more_tags(self):\n return {'allow_nan': True,\n 'preserves_dtype': [np.float64, np.float32]}", + "instance_attributes": [ + { + "name": "with_mean", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "with_std", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "copy", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "n_samples_seen_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer", + "name": "KBinsDiscretizer", + "qname": "sklearn.preprocessing._discretization.KBinsDiscretizer", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer/__init__", + "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer/fit", + "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer/_validate_n_bins", + "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer/transform", + "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer/inverse_transform" + ], + "is_public": false, + "reexported_by": [], + "description": "Bin continuous data into intervals.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20", + "docstring": "Bin continuous data into intervals.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nn_bins : int or array-like of shape (n_features,), default=5\n The number of bins to produce. Raises ValueError if ``n_bins < 2``.\n\nencode : {'onehot', 'onehot-dense', 'ordinal'}, default='onehot'\n Method used to encode the transformed result.\n\n onehot\n Encode the transformed result with one-hot encoding\n and return a sparse matrix. Ignored features are always\n stacked to the right.\n onehot-dense\n Encode the transformed result with one-hot encoding\n and return a dense array. Ignored features are always\n stacked to the right.\n ordinal\n Return the bin identifier encoded as an integer value.\n\nstrategy : {'uniform', 'quantile', 'kmeans'}, default='quantile'\n Strategy used to define the widths of the bins.\n\n uniform\n All bins in each feature have identical widths.\n quantile\n All bins in each feature have the same number of points.\n kmeans\n Values in each bin have the same nearest center of a 1D k-means\n cluster.\n\ndtype : {np.float32, np.float64}, default=None\n The desired data-type for the output. If None, output dtype is\n consistent with input dtype. Only np.float32 and np.float64 are\n supported.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nn_bins_ : ndarray of shape (n_features,), dtype=np.int_\n Number of bins per feature. Bins whose width are too small\n (i.e., <= 1e-8) are removed with a warning.\n\nbin_edges_ : ndarray of ndarray of shape (n_features,)\n The edges of each bin. Contain arrays of varying shapes ``(n_bins_, )``\n Ignored features will have empty arrays.\n\nSee Also\n--------\nBinarizer : Class used to bin values as ``0`` or\n ``1`` based on a parameter ``threshold``.\n\nNotes\n-----\nIn bin edges for feature ``i``, the first and last values are used only for\n``inverse_transform``. During transform, bin edges are extended to::\n\n np.concatenate([-np.inf, bin_edges_[i][1:-1], np.inf])\n\nYou can combine ``KBinsDiscretizer`` with\n:class:`~sklearn.compose.ColumnTransformer` if you only want to preprocess\npart of the features.\n\n``KBinsDiscretizer`` might produce constant features (e.g., when\n``encode = 'onehot'`` and certain bins do not contain any data).\nThese features can be removed with feature selection algorithms\n(e.g., :class:`~sklearn.feature_selection.VarianceThreshold`).\n\nExamples\n--------\n>>> X = [[-2, 1, -4, -1],\n... [-1, 2, -3, -0.5],\n... [ 0, 3, -2, 0.5],\n... [ 1, 4, -1, 2]]\n>>> est = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')\n>>> est.fit(X)\nKBinsDiscretizer(...)\n>>> Xt = est.transform(X)\n>>> Xt # doctest: +SKIP\narray([[ 0., 0., 0., 0.],\n [ 1., 1., 1., 0.],\n [ 2., 2., 2., 1.],\n [ 2., 2., 2., 2.]])\n\nSometimes it may be useful to convert the data back into the original\nfeature space. The ``inverse_transform`` function converts the binned\ndata into the original feature space. Each value will be equal to the mean\nof the two bin edges.\n\n>>> est.bin_edges_[0]\narray([-2., -1., 0., 1.])\n>>> est.inverse_transform(Xt)\narray([[-1.5, 1.5, -3.5, -0.5],\n [-0.5, 2.5, -2.5, -0.5],\n [ 0.5, 3.5, -1.5, 0.5],\n [ 0.5, 3.5, -1.5, 1.5]])", + "code": "class KBinsDiscretizer(TransformerMixin, BaseEstimator):\n \"\"\"\n Bin continuous data into intervals.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.20\n\n Parameters\n ----------\n n_bins : int or array-like of shape (n_features,), default=5\n The number of bins to produce. Raises ValueError if ``n_bins < 2``.\n\n encode : {'onehot', 'onehot-dense', 'ordinal'}, default='onehot'\n Method used to encode the transformed result.\n\n onehot\n Encode the transformed result with one-hot encoding\n and return a sparse matrix. Ignored features are always\n stacked to the right.\n onehot-dense\n Encode the transformed result with one-hot encoding\n and return a dense array. Ignored features are always\n stacked to the right.\n ordinal\n Return the bin identifier encoded as an integer value.\n\n strategy : {'uniform', 'quantile', 'kmeans'}, default='quantile'\n Strategy used to define the widths of the bins.\n\n uniform\n All bins in each feature have identical widths.\n quantile\n All bins in each feature have the same number of points.\n kmeans\n Values in each bin have the same nearest center of a 1D k-means\n cluster.\n\n dtype : {np.float32, np.float64}, default=None\n The desired data-type for the output. If None, output dtype is\n consistent with input dtype. Only np.float32 and np.float64 are\n supported.\n\n .. versionadded:: 0.24\n\n Attributes\n ----------\n n_bins_ : ndarray of shape (n_features,), dtype=np.int_\n Number of bins per feature. Bins whose width are too small\n (i.e., <= 1e-8) are removed with a warning.\n\n bin_edges_ : ndarray of ndarray of shape (n_features,)\n The edges of each bin. Contain arrays of varying shapes ``(n_bins_, )``\n Ignored features will have empty arrays.\n\n See Also\n --------\n Binarizer : Class used to bin values as ``0`` or\n ``1`` based on a parameter ``threshold``.\n\n Notes\n -----\n In bin edges for feature ``i``, the first and last values are used only for\n ``inverse_transform``. During transform, bin edges are extended to::\n\n np.concatenate([-np.inf, bin_edges_[i][1:-1], np.inf])\n\n You can combine ``KBinsDiscretizer`` with\n :class:`~sklearn.compose.ColumnTransformer` if you only want to preprocess\n part of the features.\n\n ``KBinsDiscretizer`` might produce constant features (e.g., when\n ``encode = 'onehot'`` and certain bins do not contain any data).\n These features can be removed with feature selection algorithms\n (e.g., :class:`~sklearn.feature_selection.VarianceThreshold`).\n\n Examples\n --------\n >>> X = [[-2, 1, -4, -1],\n ... [-1, 2, -3, -0.5],\n ... [ 0, 3, -2, 0.5],\n ... [ 1, 4, -1, 2]]\n >>> est = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')\n >>> est.fit(X)\n KBinsDiscretizer(...)\n >>> Xt = est.transform(X)\n >>> Xt # doctest: +SKIP\n array([[ 0., 0., 0., 0.],\n [ 1., 1., 1., 0.],\n [ 2., 2., 2., 1.],\n [ 2., 2., 2., 2.]])\n\n Sometimes it may be useful to convert the data back into the original\n feature space. The ``inverse_transform`` function converts the binned\n data into the original feature space. Each value will be equal to the mean\n of the two bin edges.\n\n >>> est.bin_edges_[0]\n array([-2., -1., 0., 1.])\n >>> est.inverse_transform(Xt)\n array([[-1.5, 1.5, -3.5, -0.5],\n [-0.5, 2.5, -2.5, -0.5],\n [ 0.5, 3.5, -1.5, 0.5],\n [ 0.5, 3.5, -1.5, 1.5]])\n\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, n_bins=5, *, encode='onehot', strategy='quantile',\n dtype=None):\n self.n_bins = n_bins\n self.encode = encode\n self.strategy = strategy\n self.dtype = dtype\n\n def fit(self, X, y=None):\n \"\"\"\n Fit the estimator.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Data to be discretized.\n\n y : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\n Returns\n -------\n self\n \"\"\"\n X = self._validate_data(X, dtype='numeric')\n\n supported_dtype = (np.float64, np.float32)\n if self.dtype in supported_dtype:\n output_dtype = self.dtype\n elif self.dtype is None:\n output_dtype = X.dtype\n else:\n raise ValueError(\n f\"Valid options for 'dtype' are \"\n f\"{supported_dtype + (None,)}. Got dtype={self.dtype} \"\n f\" instead.\"\n )\n\n valid_encode = ('onehot', 'onehot-dense', 'ordinal')\n if self.encode not in valid_encode:\n raise ValueError(\"Valid options for 'encode' are {}. \"\n \"Got encode={!r} instead.\"\n .format(valid_encode, self.encode))\n valid_strategy = ('uniform', 'quantile', 'kmeans')\n if self.strategy not in valid_strategy:\n raise ValueError(\"Valid options for 'strategy' are {}. \"\n \"Got strategy={!r} instead.\"\n .format(valid_strategy, self.strategy))\n\n n_features = X.shape[1]\n n_bins = self._validate_n_bins(n_features)\n\n bin_edges = np.zeros(n_features, dtype=object)\n for jj in range(n_features):\n column = X[:, jj]\n col_min, col_max = column.min(), column.max()\n\n if col_min == col_max:\n warnings.warn(\"Feature %d is constant and will be \"\n \"replaced with 0.\" % jj)\n n_bins[jj] = 1\n bin_edges[jj] = np.array([-np.inf, np.inf])\n continue\n\n if self.strategy == 'uniform':\n bin_edges[jj] = np.linspace(col_min, col_max, n_bins[jj] + 1)\n\n elif self.strategy == 'quantile':\n quantiles = np.linspace(0, 100, n_bins[jj] + 1)\n bin_edges[jj] = np.asarray(np.percentile(column, quantiles))\n\n elif self.strategy == 'kmeans':\n from ..cluster import KMeans # fixes import loops\n\n # Deterministic initialization with uniform spacing\n uniform_edges = np.linspace(col_min, col_max, n_bins[jj] + 1)\n init = (uniform_edges[1:] + uniform_edges[:-1])[:, None] * 0.5\n\n # 1D k-means procedure\n km = KMeans(n_clusters=n_bins[jj], init=init, n_init=1)\n centers = km.fit(column[:, None]).cluster_centers_[:, 0]\n # Must sort, centers may be unsorted even with sorted init\n centers.sort()\n bin_edges[jj] = (centers[1:] + centers[:-1]) * 0.5\n bin_edges[jj] = np.r_[col_min, bin_edges[jj], col_max]\n\n # Remove bins whose width are too small (i.e., <= 1e-8)\n if self.strategy in ('quantile', 'kmeans'):\n mask = np.ediff1d(bin_edges[jj], to_begin=np.inf) > 1e-8\n bin_edges[jj] = bin_edges[jj][mask]\n if len(bin_edges[jj]) - 1 != n_bins[jj]:\n warnings.warn('Bins whose width are too small (i.e., <= '\n '1e-8) in feature %d are removed. Consider '\n 'decreasing the number of bins.' % jj)\n n_bins[jj] = len(bin_edges[jj]) - 1\n\n self.bin_edges_ = bin_edges\n self.n_bins_ = n_bins\n\n if 'onehot' in self.encode:\n self._encoder = OneHotEncoder(\n categories=[np.arange(i) for i in self.n_bins_],\n sparse=self.encode == 'onehot',\n dtype=output_dtype)\n # Fit the OneHotEncoder with toy datasets\n # so that it's ready for use after the KBinsDiscretizer is fitted\n self._encoder.fit(np.zeros((1, len(self.n_bins_))))\n\n return self\n\n def _validate_n_bins(self, n_features):\n \"\"\"Returns n_bins_, the number of bins per feature.\n \"\"\"\n orig_bins = self.n_bins\n if isinstance(orig_bins, numbers.Number):\n if not isinstance(orig_bins, numbers.Integral):\n raise ValueError(\"{} received an invalid n_bins type. \"\n \"Received {}, expected int.\"\n .format(KBinsDiscretizer.__name__,\n type(orig_bins).__name__))\n if orig_bins < 2:\n raise ValueError(\"{} received an invalid number \"\n \"of bins. Received {}, expected at least 2.\"\n .format(KBinsDiscretizer.__name__, orig_bins))\n return np.full(n_features, orig_bins, dtype=int)\n\n n_bins = check_array(orig_bins, dtype=int, copy=True,\n ensure_2d=False)\n\n if n_bins.ndim > 1 or n_bins.shape[0] != n_features:\n raise ValueError(\"n_bins must be a scalar or array \"\n \"of shape (n_features,).\")\n\n bad_nbins_value = (n_bins < 2) | (n_bins != orig_bins)\n\n violating_indices = np.where(bad_nbins_value)[0]\n if violating_indices.shape[0] > 0:\n indices = \", \".join(str(i) for i in violating_indices)\n raise ValueError(\"{} received an invalid number \"\n \"of bins at indices {}. Number of bins \"\n \"must be at least 2, and must be an int.\"\n .format(KBinsDiscretizer.__name__, indices))\n return n_bins\n\n def transform(self, X):\n \"\"\"\n Discretize the data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Data to be discretized.\n\n Returns\n -------\n Xt : {ndarray, sparse matrix}, dtype={np.float32, np.float64}\n Data in the binned space. Will be a sparse matrix if\n `self.encode='onehot'` and ndarray otherwise.\n \"\"\"\n check_is_fitted(self)\n\n # check input and attribute dtypes\n dtype = (np.float64, np.float32) if self.dtype is None else self.dtype\n Xt = self._validate_data(X, copy=True, dtype=dtype, reset=False)\n\n bin_edges = self.bin_edges_\n for jj in range(Xt.shape[1]):\n # Values which are close to a bin edge are susceptible to numeric\n # instability. Add eps to X so these values are binned correctly\n # with respect to their decimal truncation. See documentation of\n # numpy.isclose for an explanation of ``rtol`` and ``atol``.\n rtol = 1.e-5\n atol = 1.e-8\n eps = atol + rtol * np.abs(Xt[:, jj])\n Xt[:, jj] = np.digitize(Xt[:, jj] + eps, bin_edges[jj][1:])\n np.clip(Xt, 0, self.n_bins_ - 1, out=Xt)\n\n if self.encode == 'ordinal':\n return Xt\n\n dtype_init = None\n if 'onehot' in self.encode:\n dtype_init = self._encoder.dtype\n self._encoder.dtype = Xt.dtype\n try:\n Xt_enc = self._encoder.transform(Xt)\n finally:\n # revert the initial dtype to avoid modifying self.\n self._encoder.dtype = dtype_init\n return Xt_enc\n\n def inverse_transform(self, Xt):\n \"\"\"\n Transform discretized data back to original feature space.\n\n Note that this function does not regenerate the original data\n due to discretization rounding.\n\n Parameters\n ----------\n Xt : array-like of shape (n_samples, n_features)\n Transformed data in the binned space.\n\n Returns\n -------\n Xinv : ndarray, dtype={np.float32, np.float64}\n Data in the original feature space.\n \"\"\"\n check_is_fitted(self)\n\n if 'onehot' in self.encode:\n Xt = self._encoder.inverse_transform(Xt)\n\n Xinv = check_array(Xt, copy=True, dtype=(np.float64, np.float32))\n n_features = self.n_bins_.shape[0]\n if Xinv.shape[1] != n_features:\n raise ValueError(\"Incorrect number of features. Expecting {}, \"\n \"received {}.\".format(n_features, Xinv.shape[1]))\n\n for jj in range(n_features):\n bin_edges = self.bin_edges_[jj]\n bin_centers = (bin_edges[1:] + bin_edges[:-1]) * 0.5\n Xinv[:, jj] = bin_centers[np.int_(Xinv[:, jj])]\n\n return Xinv", + "instance_attributes": [ + { + "name": "n_bins", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "encode", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "strategy", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "bin_edges_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder", + "name": "OneHotEncoder", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder", + "decorators": [], + "superclasses": ["_BaseEncoder"], + "methods": [ + "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/__init__", + "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/_validate_keywords", + "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/_compute_drop_idx", + "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/fit", + "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/fit_transform", + "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/transform", + "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/inverse_transform", + "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/get_feature_names" + ], + "is_public": false, + "reexported_by": [], + "description": "Encode categorical features as a one-hot numeric array.\n\nThe input to this transformer should be an array-like of integers or\nstrings, denoting the values taken on by categorical (discrete) features.\nThe features are encoded using a one-hot (aka 'one-of-K' or 'dummy')\nencoding scheme. This creates a binary column for each category and\nreturns a sparse matrix or dense array (depending on the ``sparse``\nparameter)\n\nBy default, the encoder derives the categories based on the unique values\nin each feature. Alternatively, you can also specify the `categories`\nmanually.\n\nThis encoding is needed for feeding categorical data to many scikit-learn\nestimators, notably linear models and SVMs with the standard kernels.\n\nNote: a one-hot encoding of y labels should use a LabelBinarizer\ninstead.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Encode categorical features as a one-hot numeric array.\n\nThe input to this transformer should be an array-like of integers or\nstrings, denoting the values taken on by categorical (discrete) features.\nThe features are encoded using a one-hot (aka 'one-of-K' or 'dummy')\nencoding scheme. This creates a binary column for each category and\nreturns a sparse matrix or dense array (depending on the ``sparse``\nparameter)\n\nBy default, the encoder derives the categories based on the unique values\nin each feature. Alternatively, you can also specify the `categories`\nmanually.\n\nThis encoding is needed for feeding categorical data to many scikit-learn\nestimators, notably linear models and SVMs with the standard kernels.\n\nNote: a one-hot encoding of y labels should use a LabelBinarizer\ninstead.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncategories : 'auto' or a list of array-like, default='auto'\n Categories (unique values) per feature:\n\n - 'auto' : Determine categories automatically from the training data.\n - list : ``categories[i]`` holds the categories expected in the ith\n column. The passed categories should not mix strings and numeric\n values within a single feature, and should be sorted in case of\n numeric values.\n\n The used categories can be found in the ``categories_`` attribute.\n\n .. versionadded:: 0.20\n\ndrop : {'first', 'if_binary'} or a array-like of shape (n_features,), default=None\n Specifies a methodology to use to drop one of the categories per\n feature. This is useful in situations where perfectly collinear\n features cause problems, such as when feeding the resulting data\n into a neural network or an unregularized regression.\n\n However, dropping one category breaks the symmetry of the original\n representation and can therefore induce a bias in downstream models,\n for instance for penalized linear classification or regression models.\n\n - None : retain all features (the default).\n - 'first' : drop the first category in each feature. If only one\n category is present, the feature will be dropped entirely.\n - 'if_binary' : drop the first category in each feature with two\n categories. Features with 1 or more than 2 categories are\n left intact.\n - array : ``drop[i]`` is the category in feature ``X[:, i]`` that\n should be dropped.\n\n .. versionadded:: 0.21\n The parameter `drop` was added in 0.21.\n\n .. versionchanged:: 0.23\n The option `drop='if_binary'` was added in 0.23.\n\nsparse : bool, default=True\n Will return sparse matrix if set True else will return an array.\n\ndtype : number type, default=float\n Desired dtype of output.\n\nhandle_unknown : {'error', 'ignore'}, default='error'\n Whether to raise an error or ignore if an unknown categorical feature\n is present during transform (default is to raise). When this parameter\n is set to 'ignore' and an unknown category is encountered during\n transform, the resulting one-hot encoded columns for this feature\n will be all zeros. In the inverse transform, an unknown category\n will be denoted as None.\n\nAttributes\n----------\ncategories_ : list of arrays\n The categories of each feature determined during fitting\n (in order of the features in X and corresponding with the output\n of ``transform``). This includes the category specified in ``drop``\n (if any).\n\ndrop_idx_ : array of shape (n_features,)\n - ``drop_idx_[i]`` is\u00a0the index in ``categories_[i]`` of the category\n to be dropped for each feature.\n - ``drop_idx_[i] = None`` if no category is to be dropped from the\n feature with index ``i``, e.g. when `drop='if_binary'` and the\n feature isn't binary.\n - ``drop_idx_ = None`` if all the transformed features will be\n retained.\n\n .. versionchanged:: 0.23\n Added the possibility to contain `None` values.\n\nSee Also\n--------\nOrdinalEncoder : Performs an ordinal (integer)\n encoding of the categorical features.\nsklearn.feature_extraction.DictVectorizer : Performs a one-hot encoding of\n dictionary items (also handles string-valued features).\nsklearn.feature_extraction.FeatureHasher : Performs an approximate one-hot\n encoding of dictionary items or strings.\nLabelBinarizer : Binarizes labels in a one-vs-all\n fashion.\nMultiLabelBinarizer : Transforms between iterable of\n iterables and a multilabel format, e.g. a (samples x classes) binary\n matrix indicating the presence of a class label.\n\nExamples\n--------\nGiven a dataset with two features, we let the encoder find the unique\nvalues per feature and transform the data to a binary one-hot encoding.\n\n>>> from sklearn.preprocessing import OneHotEncoder\n\nOne can discard categories not seen during `fit`:\n\n>>> enc = OneHotEncoder(handle_unknown='ignore')\n>>> X = [['Male', 1], ['Female', 3], ['Female', 2]]\n>>> enc.fit(X)\nOneHotEncoder(handle_unknown='ignore')\n>>> enc.categories_\n[array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n>>> enc.transform([['Female', 1], ['Male', 4]]).toarray()\narray([[1., 0., 1., 0., 0.],\n [0., 1., 0., 0., 0.]])\n>>> enc.inverse_transform([[0, 1, 1, 0, 0], [0, 0, 0, 1, 0]])\narray([['Male', 1],\n [None, 2]], dtype=object)\n>>> enc.get_feature_names(['gender', 'group'])\narray(['gender_Female', 'gender_Male', 'group_1', 'group_2', 'group_3'],\n dtype=object)\n\nOne can always drop the first column for each feature:\n\n>>> drop_enc = OneHotEncoder(drop='first').fit(X)\n>>> drop_enc.categories_\n[array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n>>> drop_enc.transform([['Female', 1], ['Male', 2]]).toarray()\narray([[0., 0., 0.],\n [1., 1., 0.]])\n\nOr drop a column for feature only having 2 categories:\n\n>>> drop_binary_enc = OneHotEncoder(drop='if_binary').fit(X)\n>>> drop_binary_enc.transform([['Female', 1], ['Male', 2]]).toarray()\narray([[0., 1., 0., 0.],\n [1., 0., 1., 0.]])", + "code": "class OneHotEncoder(_BaseEncoder):\n \"\"\"\n Encode categorical features as a one-hot numeric array.\n\n The input to this transformer should be an array-like of integers or\n strings, denoting the values taken on by categorical (discrete) features.\n The features are encoded using a one-hot (aka 'one-of-K' or 'dummy')\n encoding scheme. This creates a binary column for each category and\n returns a sparse matrix or dense array (depending on the ``sparse``\n parameter)\n\n By default, the encoder derives the categories based on the unique values\n in each feature. Alternatively, you can also specify the `categories`\n manually.\n\n This encoding is needed for feeding categorical data to many scikit-learn\n estimators, notably linear models and SVMs with the standard kernels.\n\n Note: a one-hot encoding of y labels should use a LabelBinarizer\n instead.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n categories : 'auto' or a list of array-like, default='auto'\n Categories (unique values) per feature:\n\n - 'auto' : Determine categories automatically from the training data.\n - list : ``categories[i]`` holds the categories expected in the ith\n column. The passed categories should not mix strings and numeric\n values within a single feature, and should be sorted in case of\n numeric values.\n\n The used categories can be found in the ``categories_`` attribute.\n\n .. versionadded:: 0.20\n\n drop : {'first', 'if_binary'} or a array-like of shape (n_features,), \\\n default=None\n Specifies a methodology to use to drop one of the categories per\n feature. This is useful in situations where perfectly collinear\n features cause problems, such as when feeding the resulting data\n into a neural network or an unregularized regression.\n\n However, dropping one category breaks the symmetry of the original\n representation and can therefore induce a bias in downstream models,\n for instance for penalized linear classification or regression models.\n\n - None : retain all features (the default).\n - 'first' : drop the first category in each feature. If only one\n category is present, the feature will be dropped entirely.\n - 'if_binary' : drop the first category in each feature with two\n categories. Features with 1 or more than 2 categories are\n left intact.\n - array : ``drop[i]`` is the category in feature ``X[:, i]`` that\n should be dropped.\n\n .. versionadded:: 0.21\n The parameter `drop` was added in 0.21.\n\n .. versionchanged:: 0.23\n The option `drop='if_binary'` was added in 0.23.\n\n sparse : bool, default=True\n Will return sparse matrix if set True else will return an array.\n\n dtype : number type, default=float\n Desired dtype of output.\n\n handle_unknown : {'error', 'ignore'}, default='error'\n Whether to raise an error or ignore if an unknown categorical feature\n is present during transform (default is to raise). When this parameter\n is set to 'ignore' and an unknown category is encountered during\n transform, the resulting one-hot encoded columns for this feature\n will be all zeros. In the inverse transform, an unknown category\n will be denoted as None.\n\n Attributes\n ----------\n categories_ : list of arrays\n The categories of each feature determined during fitting\n (in order of the features in X and corresponding with the output\n of ``transform``). This includes the category specified in ``drop``\n (if any).\n\n drop_idx_ : array of shape (n_features,)\n - ``drop_idx_[i]`` is\u00a0the index in ``categories_[i]`` of the category\n to be dropped for each feature.\n - ``drop_idx_[i] = None`` if no category is to be dropped from the\n feature with index ``i``, e.g. when `drop='if_binary'` and the\n feature isn't binary.\n - ``drop_idx_ = None`` if all the transformed features will be\n retained.\n\n .. versionchanged:: 0.23\n Added the possibility to contain `None` values.\n\n See Also\n --------\n OrdinalEncoder : Performs an ordinal (integer)\n encoding of the categorical features.\n sklearn.feature_extraction.DictVectorizer : Performs a one-hot encoding of\n dictionary items (also handles string-valued features).\n sklearn.feature_extraction.FeatureHasher : Performs an approximate one-hot\n encoding of dictionary items or strings.\n LabelBinarizer : Binarizes labels in a one-vs-all\n fashion.\n MultiLabelBinarizer : Transforms between iterable of\n iterables and a multilabel format, e.g. a (samples x classes) binary\n matrix indicating the presence of a class label.\n\n Examples\n --------\n Given a dataset with two features, we let the encoder find the unique\n values per feature and transform the data to a binary one-hot encoding.\n\n >>> from sklearn.preprocessing import OneHotEncoder\n\n One can discard categories not seen during `fit`:\n\n >>> enc = OneHotEncoder(handle_unknown='ignore')\n >>> X = [['Male', 1], ['Female', 3], ['Female', 2]]\n >>> enc.fit(X)\n OneHotEncoder(handle_unknown='ignore')\n >>> enc.categories_\n [array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n >>> enc.transform([['Female', 1], ['Male', 4]]).toarray()\n array([[1., 0., 1., 0., 0.],\n [0., 1., 0., 0., 0.]])\n >>> enc.inverse_transform([[0, 1, 1, 0, 0], [0, 0, 0, 1, 0]])\n array([['Male', 1],\n [None, 2]], dtype=object)\n >>> enc.get_feature_names(['gender', 'group'])\n array(['gender_Female', 'gender_Male', 'group_1', 'group_2', 'group_3'],\n dtype=object)\n\n One can always drop the first column for each feature:\n\n >>> drop_enc = OneHotEncoder(drop='first').fit(X)\n >>> drop_enc.categories_\n [array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n >>> drop_enc.transform([['Female', 1], ['Male', 2]]).toarray()\n array([[0., 0., 0.],\n [1., 1., 0.]])\n\n Or drop a column for feature only having 2 categories:\n\n >>> drop_binary_enc = OneHotEncoder(drop='if_binary').fit(X)\n >>> drop_binary_enc.transform([['Female', 1], ['Male', 2]]).toarray()\n array([[0., 1., 0., 0.],\n [1., 0., 1., 0.]])\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, *, categories='auto', drop=None, sparse=True,\n dtype=np.float64, handle_unknown='error'):\n self.categories = categories\n self.sparse = sparse\n self.dtype = dtype\n self.handle_unknown = handle_unknown\n self.drop = drop\n\n def _validate_keywords(self):\n if self.handle_unknown not in ('error', 'ignore'):\n msg = (\"handle_unknown should be either 'error' or 'ignore', \"\n \"got {0}.\".format(self.handle_unknown))\n raise ValueError(msg)\n # If we have both dropped columns and ignored unknown\n # values, there will be ambiguous cells. This creates difficulties\n # in interpreting the model.\n if self.drop is not None and self.handle_unknown != 'error':\n raise ValueError(\n \"`handle_unknown` must be 'error' when the drop parameter is \"\n \"specified, as both would create categories that are all \"\n \"zero.\")\n\n def _compute_drop_idx(self):\n if self.drop is None:\n return None\n elif isinstance(self.drop, str):\n if self.drop == 'first':\n return np.zeros(len(self.categories_), dtype=object)\n elif self.drop == 'if_binary':\n return np.array([0 if len(cats) == 2 else None\n for cats in self.categories_], dtype=object)\n else:\n msg = (\n \"Wrong input for parameter `drop`. Expected \"\n \"'first', 'if_binary', None or array of objects, got {}\"\n )\n raise ValueError(msg.format(type(self.drop)))\n\n else:\n try:\n drop_array = np.asarray(self.drop, dtype=object)\n droplen = len(drop_array)\n except (ValueError, TypeError):\n msg = (\n \"Wrong input for parameter `drop`. Expected \"\n \"'first', 'if_binary', None or array of objects, got {}\"\n )\n raise ValueError(msg.format(type(drop_array)))\n if droplen != len(self.categories_):\n msg = (\"`drop` should have length equal to the number \"\n \"of features ({}), got {}\")\n raise ValueError(msg.format(len(self.categories_), droplen))\n missing_drops = []\n drop_indices = []\n for col_idx, (val, cat_list) in enumerate(zip(drop_array,\n self.categories_)):\n if not is_scalar_nan(val):\n drop_idx = np.where(cat_list == val)[0]\n if drop_idx.size: # found drop idx\n drop_indices.append(drop_idx[0])\n else:\n missing_drops.append((col_idx, val))\n continue\n\n # val is nan, find nan in categories manually\n for cat_idx, cat in enumerate(cat_list):\n if is_scalar_nan(cat):\n drop_indices.append(cat_idx)\n break\n else: # loop did not break thus drop is missing\n missing_drops.append((col_idx, val))\n\n if any(missing_drops):\n msg = (\"The following categories were supposed to be \"\n \"dropped, but were not found in the training \"\n \"data.\\n{}\".format(\n \"\\n\".join(\n [\"Category: {}, Feature: {}\".format(c, v)\n for c, v in missing_drops])))\n raise ValueError(msg)\n return np.array(drop_indices, dtype=object)\n\n def fit(self, X, y=None):\n \"\"\"\n Fit OneHotEncoder to X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data to determine the categories of each feature.\n\n y : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\n Returns\n -------\n self\n \"\"\"\n self._validate_keywords()\n self._fit(X, handle_unknown=self.handle_unknown,\n force_all_finite='allow-nan')\n self.drop_idx_ = self._compute_drop_idx()\n return self\n\n def fit_transform(self, X, y=None):\n \"\"\"\n Fit OneHotEncoder to X, then transform X.\n\n Equivalent to fit(X).transform(X) but more convenient.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data to encode.\n\n y : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\n Returns\n -------\n X_out : {ndarray, sparse matrix} of shape \\\n (n_samples, n_encoded_features)\n Transformed input. If `sparse=True`, a sparse matrix will be\n returned.\n \"\"\"\n self._validate_keywords()\n return super().fit_transform(X, y)\n\n def transform(self, X):\n \"\"\"\n Transform X using one-hot encoding.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data to encode.\n\n Returns\n -------\n X_out : {ndarray, sparse matrix} of shape \\\n (n_samples, n_encoded_features)\n Transformed input. If `sparse=True`, a sparse matrix will be\n returned.\n \"\"\"\n check_is_fitted(self)\n # validation of X happens in _check_X called by _transform\n X_int, X_mask = self._transform(X, handle_unknown=self.handle_unknown,\n force_all_finite='allow-nan')\n\n n_samples, n_features = X_int.shape\n\n if self.drop_idx_ is not None:\n to_drop = self.drop_idx_.copy()\n # We remove all the dropped categories from mask, and decrement all\n # categories that occur after them to avoid an empty column.\n keep_cells = X_int != to_drop\n n_values = []\n for i, cats in enumerate(self.categories_):\n n_cats = len(cats)\n\n # drop='if_binary' but feature isn't binary\n if to_drop[i] is None:\n # set to cardinality to not drop from X_int\n to_drop[i] = n_cats\n n_values.append(n_cats)\n else: # dropped\n n_values.append(n_cats - 1)\n\n to_drop = to_drop.reshape(1, -1)\n X_int[X_int > to_drop] -= 1\n X_mask &= keep_cells\n else:\n n_values = [len(cats) for cats in self.categories_]\n\n mask = X_mask.ravel()\n feature_indices = np.cumsum([0] + n_values)\n indices = (X_int + feature_indices[:-1]).ravel()[mask]\n\n indptr = np.empty(n_samples + 1, dtype=int)\n indptr[0] = 0\n np.sum(X_mask, axis=1, out=indptr[1:])\n np.cumsum(indptr[1:], out=indptr[1:])\n data = np.ones(indptr[-1])\n\n out = sparse.csr_matrix((data, indices, indptr),\n shape=(n_samples, feature_indices[-1]),\n dtype=self.dtype)\n if not self.sparse:\n return out.toarray()\n else:\n return out\n\n def inverse_transform(self, X):\n \"\"\"\n Convert the data back to the original representation.\n\n In case unknown categories are encountered (all zeros in the\n one-hot encoding), ``None`` is used to represent this category.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape \\\n (n_samples, n_encoded_features)\n The transformed data.\n\n Returns\n -------\n X_tr : ndarray of shape (n_samples, n_features)\n Inverse transformed array.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, accept_sparse='csr')\n\n n_samples, _ = X.shape\n n_features = len(self.categories_)\n if self.drop_idx_ is None:\n n_transformed_features = sum(len(cats)\n for cats in self.categories_)\n else:\n n_transformed_features = sum(\n len(cats) - 1 if to_drop is not None else len(cats)\n for cats, to_drop in zip(self.categories_, self.drop_idx_)\n )\n\n # validate shape of passed X\n msg = (\"Shape of the passed X data is not correct. Expected {0} \"\n \"columns, got {1}.\")\n if X.shape[1] != n_transformed_features:\n raise ValueError(msg.format(n_transformed_features, X.shape[1]))\n\n # create resulting array of appropriate dtype\n dt = np.find_common_type([cat.dtype for cat in self.categories_], [])\n X_tr = np.empty((n_samples, n_features), dtype=dt)\n\n j = 0\n found_unknown = {}\n\n for i in range(n_features):\n if self.drop_idx_ is None or self.drop_idx_[i] is None:\n cats = self.categories_[i]\n else:\n cats = np.delete(self.categories_[i], self.drop_idx_[i])\n n_categories = len(cats)\n\n # Only happens if there was a column with a unique\n # category. In this case we just fill the column with this\n # unique category value.\n if n_categories == 0:\n X_tr[:, i] = self.categories_[i][self.drop_idx_[i]]\n j += n_categories\n continue\n sub = X[:, j:j + n_categories]\n # for sparse X argmax returns 2D matrix, ensure 1D array\n labels = np.asarray(sub.argmax(axis=1)).flatten()\n X_tr[:, i] = cats[labels]\n if self.handle_unknown == 'ignore':\n unknown = np.asarray(sub.sum(axis=1) == 0).flatten()\n # ignored unknown categories: we have a row of all zero\n if unknown.any():\n found_unknown[i] = unknown\n else:\n dropped = np.asarray(sub.sum(axis=1) == 0).flatten()\n if dropped.any():\n if self.drop_idx_ is None:\n all_zero_samples = np.flatnonzero(dropped)\n raise ValueError(\n f\"Samples {all_zero_samples} can not be inverted \"\n \"when drop=None and handle_unknown='error' \"\n \"because they contain all zeros\")\n # we can safely assume that all of the nulls in each column\n # are the dropped value\n X_tr[dropped, i] = self.categories_[i][\n self.drop_idx_[i]\n ]\n\n j += n_categories\n\n # if ignored are found: potentially need to upcast result to\n # insert None values\n if found_unknown:\n if X_tr.dtype != object:\n X_tr = X_tr.astype(object)\n\n for idx, mask in found_unknown.items():\n X_tr[mask, idx] = None\n\n return X_tr\n\n def get_feature_names(self, input_features=None):\n \"\"\"\n Return feature names for output features.\n\n Parameters\n ----------\n input_features : list of str of shape (n_features,)\n String names for input features if available. By default,\n \"x0\", \"x1\", ... \"xn_features\" is used.\n\n Returns\n -------\n output_feature_names : ndarray of shape (n_output_features,)\n Array of feature names.\n \"\"\"\n check_is_fitted(self)\n cats = self.categories_\n if input_features is None:\n input_features = ['x%d' % i for i in range(len(cats))]\n elif len(input_features) != len(self.categories_):\n raise ValueError(\n \"input_features should have length equal to number of \"\n \"features ({}), got {}\".format(len(self.categories_),\n len(input_features)))\n\n feature_names = []\n for i in range(len(cats)):\n names = [\n input_features[i] + '_' + str(t) for t in cats[i]]\n if self.drop_idx_ is not None and self.drop_idx_[i] is not None:\n names.pop(self.drop_idx_[i])\n feature_names.extend(names)\n\n return np.array(feature_names, dtype=object)", + "instance_attributes": [ + { + "name": "categories", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "sparse", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "dtype", + "types": { + "kind": "NamedType", + "name": "type" + } + }, + { + "name": "handle_unknown", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OrdinalEncoder", + "name": "OrdinalEncoder", + "qname": "sklearn.preprocessing._encoders.OrdinalEncoder", + "decorators": [], + "superclasses": ["_BaseEncoder"], + "methods": [ + "scikit-learn/sklearn.preprocessing._encoders/OrdinalEncoder/__init__", + "scikit-learn/sklearn.preprocessing._encoders/OrdinalEncoder/fit", + "scikit-learn/sklearn.preprocessing._encoders/OrdinalEncoder/transform", + "scikit-learn/sklearn.preprocessing._encoders/OrdinalEncoder/inverse_transform" + ], + "is_public": false, + "reexported_by": [], + "description": "Encode categorical features as an integer array.\n\nThe input to this transformer should be an array-like of integers or\nstrings, denoting the values taken on by categorical (discrete) features.\nThe features are converted to ordinal integers. This results in\na single column of integers (0 to n_categories - 1) per feature.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20", + "docstring": "Encode categorical features as an integer array.\n\nThe input to this transformer should be an array-like of integers or\nstrings, denoting the values taken on by categorical (discrete) features.\nThe features are converted to ordinal integers. This results in\na single column of integers (0 to n_categories - 1) per feature.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\ncategories : 'auto' or a list of array-like, default='auto'\n Categories (unique values) per feature:\n\n - 'auto' : Determine categories automatically from the training data.\n - list : ``categories[i]`` holds the categories expected in the ith\n column. The passed categories should not mix strings and numeric\n values, and should be sorted in case of numeric values.\n\n The used categories can be found in the ``categories_`` attribute.\n\ndtype : number type, default np.float64\n Desired dtype of output.\n\nhandle_unknown : {'error', 'use_encoded_value'}, default='error'\n When set to 'error' an error will be raised in case an unknown\n categorical feature is present during transform. When set to\n 'use_encoded_value', the encoded value of unknown categories will be\n set to the value given for the parameter `unknown_value`. In\n :meth:`inverse_transform`, an unknown category will be denoted as None.\n\n .. versionadded:: 0.24\n\nunknown_value : int or np.nan, default=None\n When the parameter handle_unknown is set to 'use_encoded_value', this\n parameter is required and will set the encoded value of unknown\n categories. It has to be distinct from the values used to encode any of\n the categories in `fit`. If set to np.nan, the `dtype` parameter must\n be a float dtype.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncategories_ : list of arrays\n The categories of each feature determined during ``fit`` (in order of\n the features in X and corresponding with the output of ``transform``).\n This does not include categories that weren't seen during ``fit``.\n\nSee Also\n--------\nOneHotEncoder : Performs a one-hot encoding of categorical features.\nLabelEncoder : Encodes target labels with values between 0 and\n ``n_classes-1``.\n\nExamples\n--------\nGiven a dataset with two features, we let the encoder find the unique\nvalues per feature and transform the data to an ordinal encoding.\n\n>>> from sklearn.preprocessing import OrdinalEncoder\n>>> enc = OrdinalEncoder()\n>>> X = [['Male', 1], ['Female', 3], ['Female', 2]]\n>>> enc.fit(X)\nOrdinalEncoder()\n>>> enc.categories_\n[array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n>>> enc.transform([['Female', 3], ['Male', 1]])\narray([[0., 2.],\n [1., 0.]])\n\n>>> enc.inverse_transform([[1, 0], [0, 1]])\narray([['Male', 1],\n ['Female', 2]], dtype=object)", + "code": "class OrdinalEncoder(_BaseEncoder):\n \"\"\"\n Encode categorical features as an integer array.\n\n The input to this transformer should be an array-like of integers or\n strings, denoting the values taken on by categorical (discrete) features.\n The features are converted to ordinal integers. This results in\n a single column of integers (0 to n_categories - 1) per feature.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.20\n\n Parameters\n ----------\n categories : 'auto' or a list of array-like, default='auto'\n Categories (unique values) per feature:\n\n - 'auto' : Determine categories automatically from the training data.\n - list : ``categories[i]`` holds the categories expected in the ith\n column. The passed categories should not mix strings and numeric\n values, and should be sorted in case of numeric values.\n\n The used categories can be found in the ``categories_`` attribute.\n\n dtype : number type, default np.float64\n Desired dtype of output.\n\n handle_unknown : {'error', 'use_encoded_value'}, default='error'\n When set to 'error' an error will be raised in case an unknown\n categorical feature is present during transform. When set to\n 'use_encoded_value', the encoded value of unknown categories will be\n set to the value given for the parameter `unknown_value`. In\n :meth:`inverse_transform`, an unknown category will be denoted as None.\n\n .. versionadded:: 0.24\n\n unknown_value : int or np.nan, default=None\n When the parameter handle_unknown is set to 'use_encoded_value', this\n parameter is required and will set the encoded value of unknown\n categories. It has to be distinct from the values used to encode any of\n the categories in `fit`. If set to np.nan, the `dtype` parameter must\n be a float dtype.\n\n .. versionadded:: 0.24\n\n Attributes\n ----------\n categories_ : list of arrays\n The categories of each feature determined during ``fit`` (in order of\n the features in X and corresponding with the output of ``transform``).\n This does not include categories that weren't seen during ``fit``.\n\n See Also\n --------\n OneHotEncoder : Performs a one-hot encoding of categorical features.\n LabelEncoder : Encodes target labels with values between 0 and\n ``n_classes-1``.\n\n Examples\n --------\n Given a dataset with two features, we let the encoder find the unique\n values per feature and transform the data to an ordinal encoding.\n\n >>> from sklearn.preprocessing import OrdinalEncoder\n >>> enc = OrdinalEncoder()\n >>> X = [['Male', 1], ['Female', 3], ['Female', 2]]\n >>> enc.fit(X)\n OrdinalEncoder()\n >>> enc.categories_\n [array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n >>> enc.transform([['Female', 3], ['Male', 1]])\n array([[0., 2.],\n [1., 0.]])\n\n >>> enc.inverse_transform([[1, 0], [0, 1]])\n array([['Male', 1],\n ['Female', 2]], dtype=object)\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, *, categories='auto', dtype=np.float64,\n handle_unknown='error', unknown_value=None):\n self.categories = categories\n self.dtype = dtype\n self.handle_unknown = handle_unknown\n self.unknown_value = unknown_value\n\n def fit(self, X, y=None):\n \"\"\"\n Fit the OrdinalEncoder to X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data to determine the categories of each feature.\n\n y : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\n Returns\n -------\n self\n \"\"\"\n handle_unknown_strategies = (\"error\", \"use_encoded_value\")\n if self.handle_unknown not in handle_unknown_strategies:\n raise ValueError(\n f\"handle_unknown should be either 'error' or \"\n f\"'use_encoded_value', got {self.handle_unknown}.\"\n )\n\n if self.handle_unknown == 'use_encoded_value':\n if is_scalar_nan(self.unknown_value):\n if np.dtype(self.dtype).kind != 'f':\n raise ValueError(\n f\"When unknown_value is np.nan, the dtype \"\n \"parameter should be \"\n f\"a float dtype. Got {self.dtype}.\"\n )\n elif not isinstance(self.unknown_value, numbers.Integral):\n raise TypeError(f\"unknown_value should be an integer or \"\n f\"np.nan when \"\n f\"handle_unknown is 'use_encoded_value', \"\n f\"got {self.unknown_value}.\")\n elif self.unknown_value is not None:\n raise TypeError(f\"unknown_value should only be set when \"\n f\"handle_unknown is 'use_encoded_value', \"\n f\"got {self.unknown_value}.\")\n\n self._fit(X)\n\n if self.handle_unknown == 'use_encoded_value':\n for feature_cats in self.categories_:\n if 0 <= self.unknown_value < len(feature_cats):\n raise ValueError(f\"The used value for unknown_value \"\n f\"{self.unknown_value} is one of the \"\n f\"values already used for encoding the \"\n f\"seen categories.\")\n\n return self\n\n def transform(self, X):\n \"\"\"\n Transform X to ordinal codes.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data to encode.\n\n Returns\n -------\n X_out : ndarray of shape (n_samples, n_features)\n Transformed input.\n \"\"\"\n X_int, X_mask = self._transform(X, handle_unknown=self.handle_unknown)\n X_trans = X_int.astype(self.dtype, copy=False)\n\n # create separate category for unknown values\n if self.handle_unknown == 'use_encoded_value':\n X_trans[~X_mask] = self.unknown_value\n return X_trans\n\n def inverse_transform(self, X):\n \"\"\"\n Convert the data back to the original representation.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The transformed data.\n\n Returns\n -------\n X_tr : ndarray of shape (n_samples, n_features)\n Inverse transformed array.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, accept_sparse='csr')\n\n n_samples, _ = X.shape\n n_features = len(self.categories_)\n\n # validate shape of passed X\n msg = (\"Shape of the passed X data is not correct. Expected {0} \"\n \"columns, got {1}.\")\n if X.shape[1] != n_features:\n raise ValueError(msg.format(n_features, X.shape[1]))\n\n # create resulting array of appropriate dtype\n dt = np.find_common_type([cat.dtype for cat in self.categories_], [])\n X_tr = np.empty((n_samples, n_features), dtype=dt)\n\n found_unknown = {}\n\n for i in range(n_features):\n labels = X[:, i].astype('int64', copy=False)\n if self.handle_unknown == 'use_encoded_value':\n unknown_labels = labels == self.unknown_value\n X_tr[:, i] = self.categories_[i][np.where(\n unknown_labels, 0, labels)]\n found_unknown[i] = unknown_labels\n else:\n X_tr[:, i] = self.categories_[i][labels]\n\n # insert None values for unknown values\n if found_unknown:\n X_tr = X_tr.astype(object, copy=False)\n\n for idx, mask in found_unknown.items():\n X_tr[mask, idx] = None\n\n return X_tr", + "instance_attributes": [ + { + "name": "categories", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "dtype", + "types": { + "kind": "NamedType", + "name": "type" + } + }, + { + "name": "handle_unknown", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder", + "name": "_BaseEncoder", + "qname": "sklearn.preprocessing._encoders._BaseEncoder", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_check_X", + "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_get_feature", + "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_fit", + "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_transform", + "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for encoders that includes the code to categorize and\ntransform the input features.", + "docstring": "Base class for encoders that includes the code to categorize and\ntransform the input features.", + "code": "class _BaseEncoder(TransformerMixin, BaseEstimator):\n \"\"\"\n Base class for encoders that includes the code to categorize and\n transform the input features.\n\n \"\"\"\n\n def _check_X(self, X, force_all_finite=True):\n \"\"\"\n Perform custom check_array:\n - convert list of strings to object dtype\n - check for missing values for object dtype data (check_array does\n not do that)\n - return list of features (arrays): this list of features is\n constructed feature by feature to preserve the data types\n of pandas DataFrame columns, as otherwise information is lost\n and cannot be used, eg for the `categories_` attribute.\n\n \"\"\"\n if not (hasattr(X, 'iloc') and getattr(X, 'ndim', 0) == 2):\n # if not a dataframe, do normal check_array validation\n X_temp = check_array(X, dtype=None,\n force_all_finite=force_all_finite)\n if (not hasattr(X, 'dtype')\n and np.issubdtype(X_temp.dtype, np.str_)):\n X = check_array(X, dtype=object,\n force_all_finite=force_all_finite)\n else:\n X = X_temp\n needs_validation = False\n else:\n # pandas dataframe, do validation later column by column, in order\n # to keep the dtype information to be used in the encoder.\n needs_validation = force_all_finite\n\n n_samples, n_features = X.shape\n X_columns = []\n\n for i in range(n_features):\n Xi = self._get_feature(X, feature_idx=i)\n Xi = check_array(Xi, ensure_2d=False, dtype=None,\n force_all_finite=needs_validation)\n X_columns.append(Xi)\n\n return X_columns, n_samples, n_features\n\n def _get_feature(self, X, feature_idx):\n if hasattr(X, 'iloc'):\n # pandas dataframes\n return X.iloc[:, feature_idx]\n # numpy arrays, sparse arrays\n return X[:, feature_idx]\n\n def _fit(self, X, handle_unknown='error', force_all_finite=True):\n X_list, n_samples, n_features = self._check_X(\n X, force_all_finite=force_all_finite)\n\n if self.categories != 'auto':\n if len(self.categories) != n_features:\n raise ValueError(\"Shape mismatch: if categories is an array,\"\n \" it has to be of shape (n_features,).\")\n\n self.categories_ = []\n\n for i in range(n_features):\n Xi = X_list[i]\n if self.categories == 'auto':\n cats = _unique(Xi)\n else:\n cats = np.array(self.categories[i], dtype=Xi.dtype)\n if Xi.dtype.kind not in 'OUS':\n sorted_cats = np.sort(cats)\n error_msg = (\"Unsorted categories are not \"\n \"supported for numerical categories\")\n # if there are nans, nan should be the last element\n stop_idx = -1 if np.isnan(sorted_cats[-1]) else None\n if (np.any(sorted_cats[:stop_idx] != cats[:stop_idx]) or\n (np.isnan(sorted_cats[-1]) and\n not np.isnan(sorted_cats[-1]))):\n raise ValueError(error_msg)\n\n if handle_unknown == 'error':\n diff = _check_unknown(Xi, cats)\n if diff:\n msg = (\"Found unknown categories {0} in column {1}\"\n \" during fit\".format(diff, i))\n raise ValueError(msg)\n self.categories_.append(cats)\n\n def _transform(self, X, handle_unknown='error', force_all_finite=True):\n X_list, n_samples, n_features = self._check_X(\n X, force_all_finite=force_all_finite)\n\n X_int = np.zeros((n_samples, n_features), dtype=int)\n X_mask = np.ones((n_samples, n_features), dtype=bool)\n\n if n_features != len(self.categories_):\n raise ValueError(\n \"The number of features in X is different to the number of \"\n \"features of the fitted data. The fitted data had {} features \"\n \"and the X has {} features.\"\n .format(len(self.categories_,), n_features)\n )\n\n for i in range(n_features):\n Xi = X_list[i]\n diff, valid_mask = _check_unknown(Xi, self.categories_[i],\n return_mask=True)\n\n if not np.all(valid_mask):\n if handle_unknown == 'error':\n msg = (\"Found unknown categories {0} in column {1}\"\n \" during transform\".format(diff, i))\n raise ValueError(msg)\n else:\n # Set the problematic rows to an acceptable value and\n # continue `The rows are marked `X_mask` and will be\n # removed later.\n X_mask[:, i] = valid_mask\n # cast Xi into the largest string type necessary\n # to handle different lengths of numpy strings\n if (self.categories_[i].dtype.kind in ('U', 'S')\n and self.categories_[i].itemsize > Xi.itemsize):\n Xi = Xi.astype(self.categories_[i].dtype)\n elif (self.categories_[i].dtype.kind == 'O' and\n Xi.dtype.kind == 'U'):\n # categories are objects and Xi are numpy strings.\n # Cast Xi to an object dtype to prevent truncation\n # when setting invalid values.\n Xi = Xi.astype('O')\n else:\n Xi = Xi.copy()\n\n Xi[~valid_mask] = self.categories_[i][0]\n # We use check_unknown=False, since _check_unknown was\n # already called above.\n X_int[:, i] = _encode(Xi, uniques=self.categories_[i],\n check_unknown=False)\n\n return X_int, X_mask\n\n def _more_tags(self):\n return {'X_types': ['categorical']}", + "instance_attributes": [ + { + "name": "categories_", + "types": { + "kind": "NamedType", + "name": "list" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer", + "name": "FunctionTransformer", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/__init__", + "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/_check_input", + "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/_check_inverse_transform", + "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/fit", + "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/transform", + "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/inverse_transform", + "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/_transform", + "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Constructs a transformer from an arbitrary callable.\n\nA FunctionTransformer forwards its X (and optionally y) arguments to a\nuser-defined function or function object and returns the result of this\nfunction. This is useful for stateless transformations such as taking the\nlog of frequencies, doing custom scaling, etc.\n\nNote: If a lambda is used as the function, then the resulting\ntransformer will not be pickleable.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide `.", + "docstring": "Constructs a transformer from an arbitrary callable.\n\nA FunctionTransformer forwards its X (and optionally y) arguments to a\nuser-defined function or function object and returns the result of this\nfunction. This is useful for stateless transformations such as taking the\nlog of frequencies, doing custom scaling, etc.\n\nNote: If a lambda is used as the function, then the resulting\ntransformer will not be pickleable.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nfunc : callable, default=None\n The callable to use for the transformation. This will be passed\n the same arguments as transform, with args and kwargs forwarded.\n If func is None, then func will be the identity function.\n\ninverse_func : callable, default=None\n The callable to use for the inverse transformation. This will be\n passed the same arguments as inverse transform, with args and\n kwargs forwarded. If inverse_func is None, then inverse_func\n will be the identity function.\n\nvalidate : bool, default=False\n Indicate that the input X array should be checked before calling\n ``func``. The possibilities are:\n\n - If False, there is no input validation.\n - If True, then X will be converted to a 2-dimensional NumPy array or\n sparse matrix. If the conversion is not possible an exception is\n raised.\n\n .. versionchanged:: 0.22\n The default of ``validate`` changed from True to False.\n\naccept_sparse : bool, default=False\n Indicate that func accepts a sparse matrix as input. If validate is\n False, this has no effect. Otherwise, if accept_sparse is false,\n sparse matrix inputs will cause an exception to be raised.\n\ncheck_inverse : bool, default=True\n Whether to check that or ``func`` followed by ``inverse_func`` leads to\n the original inputs. It can be used for a sanity check, raising a\n warning when the condition is not fulfilled.\n\n .. versionadded:: 0.20\n\nkw_args : dict, default=None\n Dictionary of additional keyword arguments to pass to func.\n\n .. versionadded:: 0.18\n\ninv_kw_args : dict, default=None\n Dictionary of additional keyword arguments to pass to inverse_func.\n\n .. versionadded:: 0.18\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import FunctionTransformer\n>>> transformer = FunctionTransformer(np.log1p)\n>>> X = np.array([[0, 1], [2, 3]])\n>>> transformer.transform(X)\narray([[0. , 0.6931...],\n [1.0986..., 1.3862...]])", + "code": "class FunctionTransformer(TransformerMixin, BaseEstimator):\n \"\"\"Constructs a transformer from an arbitrary callable.\n\n A FunctionTransformer forwards its X (and optionally y) arguments to a\n user-defined function or function object and returns the result of this\n function. This is useful for stateless transformations such as taking the\n log of frequencies, doing custom scaling, etc.\n\n Note: If a lambda is used as the function, then the resulting\n transformer will not be pickleable.\n\n .. versionadded:: 0.17\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n func : callable, default=None\n The callable to use for the transformation. This will be passed\n the same arguments as transform, with args and kwargs forwarded.\n If func is None, then func will be the identity function.\n\n inverse_func : callable, default=None\n The callable to use for the inverse transformation. This will be\n passed the same arguments as inverse transform, with args and\n kwargs forwarded. If inverse_func is None, then inverse_func\n will be the identity function.\n\n validate : bool, default=False\n Indicate that the input X array should be checked before calling\n ``func``. The possibilities are:\n\n - If False, there is no input validation.\n - If True, then X will be converted to a 2-dimensional NumPy array or\n sparse matrix. If the conversion is not possible an exception is\n raised.\n\n .. versionchanged:: 0.22\n The default of ``validate`` changed from True to False.\n\n accept_sparse : bool, default=False\n Indicate that func accepts a sparse matrix as input. If validate is\n False, this has no effect. Otherwise, if accept_sparse is false,\n sparse matrix inputs will cause an exception to be raised.\n\n check_inverse : bool, default=True\n Whether to check that or ``func`` followed by ``inverse_func`` leads to\n the original inputs. It can be used for a sanity check, raising a\n warning when the condition is not fulfilled.\n\n .. versionadded:: 0.20\n\n kw_args : dict, default=None\n Dictionary of additional keyword arguments to pass to func.\n\n .. versionadded:: 0.18\n\n inv_kw_args : dict, default=None\n Dictionary of additional keyword arguments to pass to inverse_func.\n\n .. versionadded:: 0.18\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.preprocessing import FunctionTransformer\n >>> transformer = FunctionTransformer(np.log1p)\n >>> X = np.array([[0, 1], [2, 3]])\n >>> transformer.transform(X)\n array([[0. , 0.6931...],\n [1.0986..., 1.3862...]])\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, func=None, inverse_func=None, *, validate=False,\n accept_sparse=False, check_inverse=True, kw_args=None,\n inv_kw_args=None):\n self.func = func\n self.inverse_func = inverse_func\n self.validate = validate\n self.accept_sparse = accept_sparse\n self.check_inverse = check_inverse\n self.kw_args = kw_args\n self.inv_kw_args = inv_kw_args\n\n def _check_input(self, X):\n if self.validate:\n return self._validate_data(X, accept_sparse=self.accept_sparse)\n return X\n\n def _check_inverse_transform(self, X):\n \"\"\"Check that func and inverse_func are the inverse.\"\"\"\n idx_selected = slice(None, None, max(1, X.shape[0] // 100))\n X_round_trip = self.inverse_transform(self.transform(X[idx_selected]))\n if not _allclose_dense_sparse(X[idx_selected], X_round_trip):\n warnings.warn(\"The provided functions are not strictly\"\n \" inverse of each other. If you are sure you\"\n \" want to proceed regardless, set\"\n \" 'check_inverse=False'.\", UserWarning)\n\n def fit(self, X, y=None):\n \"\"\"Fit transformer by checking X.\n\n If ``validate`` is ``True``, ``X`` will be checked.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n Input array.\n\n Returns\n -------\n self\n \"\"\"\n X = self._check_input(X)\n if (self.check_inverse and not (self.func is None or\n self.inverse_func is None)):\n self._check_inverse_transform(X)\n return self\n\n def transform(self, X):\n \"\"\"Transform X using the forward function.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n Input array.\n\n Returns\n -------\n X_out : array-like, shape (n_samples, n_features)\n Transformed input.\n \"\"\"\n return self._transform(X, func=self.func, kw_args=self.kw_args)\n\n def inverse_transform(self, X):\n \"\"\"Transform X using the inverse function.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n Input array.\n\n Returns\n -------\n X_out : array-like, shape (n_samples, n_features)\n Transformed input.\n \"\"\"\n return self._transform(X, func=self.inverse_func,\n kw_args=self.inv_kw_args)\n\n def _transform(self, X, func=None, kw_args=None):\n X = self._check_input(X)\n\n if func is None:\n func = _identity\n\n return func(X, **(kw_args if kw_args else {}))\n\n def _more_tags(self):\n return {'no_validation': not self.validate,\n 'stateless': True}", + "instance_attributes": [ + { + "name": "validate", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "accept_sparse", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "check_inverse", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelBinarizer", + "name": "LabelBinarizer", + "qname": "sklearn.preprocessing._label.LabelBinarizer", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/__init__", + "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/fit", + "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/fit_transform", + "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/transform", + "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/inverse_transform", + "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Binarize labels in a one-vs-all fashion.\n\nSeveral regression and binary classification algorithms are\navailable in scikit-learn. A simple way to extend these algorithms\nto the multi-class classification case is to use the so-called\none-vs-all scheme.\n\nAt learning time, this simply consists in learning one regressor\nor binary classifier per class. In doing so, one needs to convert\nmulti-class labels to binary labels (belong or does not belong\nto the class). LabelBinarizer makes this process easy with the\ntransform method.\n\nAt prediction time, one assigns the class for which the corresponding\nmodel gave the greatest confidence. LabelBinarizer makes this easy\nwith the inverse_transform method.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Binarize labels in a one-vs-all fashion.\n\nSeveral regression and binary classification algorithms are\navailable in scikit-learn. A simple way to extend these algorithms\nto the multi-class classification case is to use the so-called\none-vs-all scheme.\n\nAt learning time, this simply consists in learning one regressor\nor binary classifier per class. In doing so, one needs to convert\nmulti-class labels to binary labels (belong or does not belong\nto the class). LabelBinarizer makes this process easy with the\ntransform method.\n\nAt prediction time, one assigns the class for which the corresponding\nmodel gave the greatest confidence. LabelBinarizer makes this easy\nwith the inverse_transform method.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nneg_label : int, default=0\n Value with which negative labels must be encoded.\n\npos_label : int, default=1\n Value with which positive labels must be encoded.\n\nsparse_output : bool, default=False\n True if the returned array from transform is desired to be in sparse\n CSR format.\n\nAttributes\n----------\n\nclasses_ : ndarray of shape (n_classes,)\n Holds the label for each class.\n\ny_type_ : str\n Represents the type of the target data as evaluated by\n utils.multiclass.type_of_target. Possible type are 'continuous',\n 'continuous-multioutput', 'binary', 'multiclass',\n 'multiclass-multioutput', 'multilabel-indicator', and 'unknown'.\n\nsparse_input_ : bool\n True if the input data to transform is given as a sparse matrix, False\n otherwise.\n\nExamples\n--------\n>>> from sklearn import preprocessing\n>>> lb = preprocessing.LabelBinarizer()\n>>> lb.fit([1, 2, 6, 4, 2])\nLabelBinarizer()\n>>> lb.classes_\narray([1, 2, 4, 6])\n>>> lb.transform([1, 6])\narray([[1, 0, 0, 0],\n [0, 0, 0, 1]])\n\nBinary targets transform to a column vector\n\n>>> lb = preprocessing.LabelBinarizer()\n>>> lb.fit_transform(['yes', 'no', 'no', 'yes'])\narray([[1],\n [0],\n [0],\n [1]])\n\nPassing a 2D matrix for multilabel classification\n\n>>> import numpy as np\n>>> lb.fit(np.array([[0, 1, 1], [1, 0, 0]]))\nLabelBinarizer()\n>>> lb.classes_\narray([0, 1, 2])\n>>> lb.transform([0, 1, 2, 1])\narray([[1, 0, 0],\n [0, 1, 0],\n [0, 0, 1],\n [0, 1, 0]])\n\nSee Also\n--------\nlabel_binarize : Function to perform the transform operation of\n LabelBinarizer with fixed classes.\nOneHotEncoder : Encode categorical features using a one-hot aka one-of-K\n scheme.", + "code": "class LabelBinarizer(TransformerMixin, BaseEstimator):\n \"\"\"Binarize labels in a one-vs-all fashion.\n\n Several regression and binary classification algorithms are\n available in scikit-learn. A simple way to extend these algorithms\n to the multi-class classification case is to use the so-called\n one-vs-all scheme.\n\n At learning time, this simply consists in learning one regressor\n or binary classifier per class. In doing so, one needs to convert\n multi-class labels to binary labels (belong or does not belong\n to the class). LabelBinarizer makes this process easy with the\n transform method.\n\n At prediction time, one assigns the class for which the corresponding\n model gave the greatest confidence. LabelBinarizer makes this easy\n with the inverse_transform method.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n\n neg_label : int, default=0\n Value with which negative labels must be encoded.\n\n pos_label : int, default=1\n Value with which positive labels must be encoded.\n\n sparse_output : bool, default=False\n True if the returned array from transform is desired to be in sparse\n CSR format.\n\n Attributes\n ----------\n\n classes_ : ndarray of shape (n_classes,)\n Holds the label for each class.\n\n y_type_ : str\n Represents the type of the target data as evaluated by\n utils.multiclass.type_of_target. Possible type are 'continuous',\n 'continuous-multioutput', 'binary', 'multiclass',\n 'multiclass-multioutput', 'multilabel-indicator', and 'unknown'.\n\n sparse_input_ : bool\n True if the input data to transform is given as a sparse matrix, False\n otherwise.\n\n Examples\n --------\n >>> from sklearn import preprocessing\n >>> lb = preprocessing.LabelBinarizer()\n >>> lb.fit([1, 2, 6, 4, 2])\n LabelBinarizer()\n >>> lb.classes_\n array([1, 2, 4, 6])\n >>> lb.transform([1, 6])\n array([[1, 0, 0, 0],\n [0, 0, 0, 1]])\n\n Binary targets transform to a column vector\n\n >>> lb = preprocessing.LabelBinarizer()\n >>> lb.fit_transform(['yes', 'no', 'no', 'yes'])\n array([[1],\n [0],\n [0],\n [1]])\n\n Passing a 2D matrix for multilabel classification\n\n >>> import numpy as np\n >>> lb.fit(np.array([[0, 1, 1], [1, 0, 0]]))\n LabelBinarizer()\n >>> lb.classes_\n array([0, 1, 2])\n >>> lb.transform([0, 1, 2, 1])\n array([[1, 0, 0],\n [0, 1, 0],\n [0, 0, 1],\n [0, 1, 0]])\n\n See Also\n --------\n label_binarize : Function to perform the transform operation of\n LabelBinarizer with fixed classes.\n OneHotEncoder : Encode categorical features using a one-hot aka one-of-K\n scheme.\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, *, neg_label=0, pos_label=1, sparse_output=False):\n if neg_label >= pos_label:\n raise ValueError(\"neg_label={0} must be strictly less than \"\n \"pos_label={1}.\".format(neg_label, pos_label))\n\n if sparse_output and (pos_label == 0 or neg_label != 0):\n raise ValueError(\"Sparse binarization is only supported with non \"\n \"zero pos_label and zero neg_label, got \"\n \"pos_label={0} and neg_label={1}\"\n \"\".format(pos_label, neg_label))\n\n self.neg_label = neg_label\n self.pos_label = pos_label\n self.sparse_output = sparse_output\n\n def fit(self, y):\n \"\"\"Fit label binarizer.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,) or (n_samples, n_classes)\n Target values. The 2-d matrix should only contain 0 and 1,\n represents multilabel classification.\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n self.y_type_ = type_of_target(y)\n if 'multioutput' in self.y_type_:\n raise ValueError(\"Multioutput target data is not supported with \"\n \"label binarization\")\n if _num_samples(y) == 0:\n raise ValueError('y has 0 samples: %r' % y)\n\n self.sparse_input_ = sp.issparse(y)\n self.classes_ = unique_labels(y)\n return self\n\n def fit_transform(self, y):\n \"\"\"Fit label binarizer and transform multi-class labels to binary\n labels.\n\n The output of transform is sometimes referred to as\n the 1-of-K coding scheme.\n\n Parameters\n ----------\n y : {ndarray, sparse matrix} of shape (n_samples,) or \\\n (n_samples, n_classes)\n Target values. The 2-d matrix should only contain 0 and 1,\n represents multilabel classification. Sparse matrix can be\n CSR, CSC, COO, DOK, or LIL.\n\n Returns\n -------\n Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n Shape will be (n_samples, 1) for binary problems. Sparse matrix\n will be of CSR format.\n \"\"\"\n return self.fit(y).transform(y)\n\n def transform(self, y):\n \"\"\"Transform multi-class labels to binary labels.\n\n The output of transform is sometimes referred to by some authors as\n the 1-of-K coding scheme.\n\n Parameters\n ----------\n y : {array, sparse matrix} of shape (n_samples,) or \\\n (n_samples, n_classes)\n Target values. The 2-d matrix should only contain 0 and 1,\n represents multilabel classification. Sparse matrix can be\n CSR, CSC, COO, DOK, or LIL.\n\n Returns\n -------\n Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n Shape will be (n_samples, 1) for binary problems. Sparse matrix\n will be of CSR format.\n \"\"\"\n check_is_fitted(self)\n\n y_is_multilabel = type_of_target(y).startswith('multilabel')\n if y_is_multilabel and not self.y_type_.startswith('multilabel'):\n raise ValueError(\"The object was not fitted with multilabel\"\n \" input.\")\n\n return label_binarize(y, classes=self.classes_,\n pos_label=self.pos_label,\n neg_label=self.neg_label,\n sparse_output=self.sparse_output)\n\n def inverse_transform(self, Y, threshold=None):\n \"\"\"Transform binary labels back to multi-class labels.\n\n Parameters\n ----------\n Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n Target values. All sparse matrices are converted to CSR before\n inverse transformation.\n\n threshold : float, default=None\n Threshold used in the binary and multi-label cases.\n\n Use 0 when ``Y`` contains the output of decision_function\n (classifier).\n Use 0.5 when ``Y`` contains the output of predict_proba.\n\n If None, the threshold is assumed to be half way between\n neg_label and pos_label.\n\n Returns\n -------\n y : {ndarray, sparse matrix} of shape (n_samples,)\n Target values. Sparse matrix will be of CSR format.\n\n Notes\n -----\n In the case when the binary labels are fractional\n (probabilistic), inverse_transform chooses the class with the\n greatest value. Typically, this allows to use the output of a\n linear model's decision_function method directly as the input\n of inverse_transform.\n \"\"\"\n check_is_fitted(self)\n\n if threshold is None:\n threshold = (self.pos_label + self.neg_label) / 2.\n\n if self.y_type_ == \"multiclass\":\n y_inv = _inverse_binarize_multiclass(Y, self.classes_)\n else:\n y_inv = _inverse_binarize_thresholding(Y, self.y_type_,\n self.classes_, threshold)\n\n if self.sparse_input_:\n y_inv = sp.csr_matrix(y_inv)\n elif sp.issparse(y_inv):\n y_inv = y_inv.toarray()\n\n return y_inv\n\n def _more_tags(self):\n return {'X_types': ['1dlabels']}", + "instance_attributes": [ + { + "name": "neg_label", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "pos_label", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "sparse_output", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelEncoder", + "name": "LabelEncoder", + "qname": "sklearn.preprocessing._label.LabelEncoder", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.preprocessing._label/LabelEncoder/fit", + "scikit-learn/sklearn.preprocessing._label/LabelEncoder/fit_transform", + "scikit-learn/sklearn.preprocessing._label/LabelEncoder/transform", + "scikit-learn/sklearn.preprocessing._label/LabelEncoder/inverse_transform", + "scikit-learn/sklearn.preprocessing._label/LabelEncoder/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Encode target labels with value between 0 and n_classes-1.\n\nThis transformer should be used to encode target values, *i.e.* `y`, and\nnot the input `X`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.12", + "docstring": "Encode target labels with value between 0 and n_classes-1.\n\nThis transformer should be used to encode target values, *i.e.* `y`, and\nnot the input `X`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.12\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n Holds the label for each class.\n\nExamples\n--------\n`LabelEncoder` can be used to normalize labels.\n\n>>> from sklearn import preprocessing\n>>> le = preprocessing.LabelEncoder()\n>>> le.fit([1, 2, 2, 6])\nLabelEncoder()\n>>> le.classes_\narray([1, 2, 6])\n>>> le.transform([1, 1, 2, 6])\narray([0, 0, 1, 2]...)\n>>> le.inverse_transform([0, 0, 1, 2])\narray([1, 1, 2, 6])\n\nIt can also be used to transform non-numerical labels (as long as they are\nhashable and comparable) to numerical labels.\n\n>>> le = preprocessing.LabelEncoder()\n>>> le.fit([\"paris\", \"paris\", \"tokyo\", \"amsterdam\"])\nLabelEncoder()\n>>> list(le.classes_)\n['amsterdam', 'paris', 'tokyo']\n>>> le.transform([\"tokyo\", \"tokyo\", \"paris\"])\narray([2, 2, 1]...)\n>>> list(le.inverse_transform([2, 2, 1]))\n['tokyo', 'tokyo', 'paris']\n\nSee Also\n--------\nOrdinalEncoder : Encode categorical features using an ordinal encoding\n scheme.\nOneHotEncoder : Encode categorical features as a one-hot numeric array.", + "code": "class LabelEncoder(TransformerMixin, BaseEstimator):\n \"\"\"Encode target labels with value between 0 and n_classes-1.\n\n This transformer should be used to encode target values, *i.e.* `y`, and\n not the input `X`.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.12\n\n Attributes\n ----------\n classes_ : ndarray of shape (n_classes,)\n Holds the label for each class.\n\n Examples\n --------\n `LabelEncoder` can be used to normalize labels.\n\n >>> from sklearn import preprocessing\n >>> le = preprocessing.LabelEncoder()\n >>> le.fit([1, 2, 2, 6])\n LabelEncoder()\n >>> le.classes_\n array([1, 2, 6])\n >>> le.transform([1, 1, 2, 6])\n array([0, 0, 1, 2]...)\n >>> le.inverse_transform([0, 0, 1, 2])\n array([1, 1, 2, 6])\n\n It can also be used to transform non-numerical labels (as long as they are\n hashable and comparable) to numerical labels.\n\n >>> le = preprocessing.LabelEncoder()\n >>> le.fit([\"paris\", \"paris\", \"tokyo\", \"amsterdam\"])\n LabelEncoder()\n >>> list(le.classes_)\n ['amsterdam', 'paris', 'tokyo']\n >>> le.transform([\"tokyo\", \"tokyo\", \"paris\"])\n array([2, 2, 1]...)\n >>> list(le.inverse_transform([2, 2, 1]))\n ['tokyo', 'tokyo', 'paris']\n\n See Also\n --------\n OrdinalEncoder : Encode categorical features using an ordinal encoding\n scheme.\n OneHotEncoder : Encode categorical features as a one-hot numeric array.\n \"\"\"\n\n def fit(self, y):\n \"\"\"Fit label encoder.\n\n Parameters\n ----------\n y : array-like of shape (n_samples,)\n Target values.\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n y = column_or_1d(y, warn=True)\n self.classes_ = _unique(y)\n return self\n\n def fit_transform(self, y):\n \"\"\"Fit label encoder and return encoded labels.\n\n Parameters\n ----------\n y : array-like of shape (n_samples,)\n Target values.\n\n Returns\n -------\n y : array-like of shape (n_samples,)\n \"\"\"\n y = column_or_1d(y, warn=True)\n self.classes_, y = _unique(y, return_inverse=True)\n return y\n\n def transform(self, y):\n \"\"\"Transform labels to normalized encoding.\n\n Parameters\n ----------\n y : array-like of shape (n_samples,)\n Target values.\n\n Returns\n -------\n y : array-like of shape (n_samples,)\n \"\"\"\n check_is_fitted(self)\n y = column_or_1d(y, warn=True)\n # transform of empty array is empty array\n if _num_samples(y) == 0:\n return np.array([])\n\n return _encode(y, uniques=self.classes_)\n\n def inverse_transform(self, y):\n \"\"\"Transform labels back to original encoding.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n Target values.\n\n Returns\n -------\n y : ndarray of shape (n_samples,)\n \"\"\"\n check_is_fitted(self)\n y = column_or_1d(y, warn=True)\n # inverse transform of empty array is empty array\n if _num_samples(y) == 0:\n return np.array([])\n\n diff = np.setdiff1d(y, np.arange(len(self.classes_)))\n if len(diff):\n raise ValueError(\n \"y contains previously unseen labels: %s\" % str(diff))\n y = np.asarray(y)\n return self.classes_[y]\n\n def _more_tags(self):\n return {'X_types': ['1dlabels']}", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer", + "name": "MultiLabelBinarizer", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/__init__", + "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/fit", + "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/fit_transform", + "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/transform", + "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/_build_cache", + "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/_transform", + "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/inverse_transform", + "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Transform between iterable of iterables and a multilabel format.\n\nAlthough a list of sets or tuples is a very intuitive format for multilabel\ndata, it is unwieldy to process. This transformer converts between this\nintuitive format and the supported multilabel format: a (samples x classes)\nbinary matrix indicating the presence of a class label.", + "docstring": "Transform between iterable of iterables and a multilabel format.\n\nAlthough a list of sets or tuples is a very intuitive format for multilabel\ndata, it is unwieldy to process. This transformer converts between this\nintuitive format and the supported multilabel format: a (samples x classes)\nbinary matrix indicating the presence of a class label.\n\nParameters\n----------\nclasses : array-like of shape (n_classes,), default=None\n Indicates an ordering for the class labels.\n All entries should be unique (cannot contain duplicate classes).\n\nsparse_output : bool, default=False\n Set to True if output binary array is desired in CSR sparse format.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n A copy of the `classes` parameter when provided.\n Otherwise it corresponds to the sorted set of classes found\n when fitting.\n\nExamples\n--------\n>>> from sklearn.preprocessing import MultiLabelBinarizer\n>>> mlb = MultiLabelBinarizer()\n>>> mlb.fit_transform([(1, 2), (3,)])\narray([[1, 1, 0],\n [0, 0, 1]])\n>>> mlb.classes_\narray([1, 2, 3])\n\n>>> mlb.fit_transform([{'sci-fi', 'thriller'}, {'comedy'}])\narray([[0, 1, 1],\n [1, 0, 0]])\n>>> list(mlb.classes_)\n['comedy', 'sci-fi', 'thriller']\n\nA common mistake is to pass in a list, which leads to the following issue:\n\n>>> mlb = MultiLabelBinarizer()\n>>> mlb.fit(['sci-fi', 'thriller', 'comedy'])\nMultiLabelBinarizer()\n>>> mlb.classes_\narray(['-', 'c', 'd', 'e', 'f', 'h', 'i', 'l', 'm', 'o', 'r', 's', 't',\n 'y'], dtype=object)\n\nTo correct this, the list of labels should be passed in as:\n\n>>> mlb = MultiLabelBinarizer()\n>>> mlb.fit([['sci-fi', 'thriller', 'comedy']])\nMultiLabelBinarizer()\n>>> mlb.classes_\narray(['comedy', 'sci-fi', 'thriller'], dtype=object)\n\nSee Also\n--------\nOneHotEncoder : Encode categorical features using a one-hot aka one-of-K\n scheme.", + "code": "class MultiLabelBinarizer(TransformerMixin, BaseEstimator):\n \"\"\"Transform between iterable of iterables and a multilabel format.\n\n Although a list of sets or tuples is a very intuitive format for multilabel\n data, it is unwieldy to process. This transformer converts between this\n intuitive format and the supported multilabel format: a (samples x classes)\n binary matrix indicating the presence of a class label.\n\n Parameters\n ----------\n classes : array-like of shape (n_classes,), default=None\n Indicates an ordering for the class labels.\n All entries should be unique (cannot contain duplicate classes).\n\n sparse_output : bool, default=False\n Set to True if output binary array is desired in CSR sparse format.\n\n Attributes\n ----------\n classes_ : ndarray of shape (n_classes,)\n A copy of the `classes` parameter when provided.\n Otherwise it corresponds to the sorted set of classes found\n when fitting.\n\n Examples\n --------\n >>> from sklearn.preprocessing import MultiLabelBinarizer\n >>> mlb = MultiLabelBinarizer()\n >>> mlb.fit_transform([(1, 2), (3,)])\n array([[1, 1, 0],\n [0, 0, 1]])\n >>> mlb.classes_\n array([1, 2, 3])\n\n >>> mlb.fit_transform([{'sci-fi', 'thriller'}, {'comedy'}])\n array([[0, 1, 1],\n [1, 0, 0]])\n >>> list(mlb.classes_)\n ['comedy', 'sci-fi', 'thriller']\n\n A common mistake is to pass in a list, which leads to the following issue:\n\n >>> mlb = MultiLabelBinarizer()\n >>> mlb.fit(['sci-fi', 'thriller', 'comedy'])\n MultiLabelBinarizer()\n >>> mlb.classes_\n array(['-', 'c', 'd', 'e', 'f', 'h', 'i', 'l', 'm', 'o', 'r', 's', 't',\n 'y'], dtype=object)\n\n To correct this, the list of labels should be passed in as:\n\n >>> mlb = MultiLabelBinarizer()\n >>> mlb.fit([['sci-fi', 'thriller', 'comedy']])\n MultiLabelBinarizer()\n >>> mlb.classes_\n array(['comedy', 'sci-fi', 'thriller'], dtype=object)\n\n See Also\n --------\n OneHotEncoder : Encode categorical features using a one-hot aka one-of-K\n scheme.\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, *, classes=None, sparse_output=False):\n self.classes = classes\n self.sparse_output = sparse_output\n\n def fit(self, y):\n \"\"\"Fit the label sets binarizer, storing :term:`classes_`.\n\n Parameters\n ----------\n y : iterable of iterables\n A set of labels (any orderable and hashable object) for each\n sample. If the `classes` parameter is set, `y` will not be\n iterated.\n\n Returns\n -------\n self : returns this MultiLabelBinarizer instance\n \"\"\"\n self._cached_dict = None\n if self.classes is None:\n classes = sorted(set(itertools.chain.from_iterable(y)))\n elif len(set(self.classes)) < len(self.classes):\n raise ValueError(\"The classes argument contains duplicate \"\n \"classes. Remove these duplicates before passing \"\n \"them to MultiLabelBinarizer.\")\n else:\n classes = self.classes\n dtype = int if all(isinstance(c, int) for c in classes) else object\n self.classes_ = np.empty(len(classes), dtype=dtype)\n self.classes_[:] = classes\n return self\n\n def fit_transform(self, y):\n \"\"\"Fit the label sets binarizer and transform the given label sets.\n\n Parameters\n ----------\n y : iterable of iterables\n A set of labels (any orderable and hashable object) for each\n sample. If the `classes` parameter is set, `y` will not be\n iterated.\n\n Returns\n -------\n y_indicator : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n A matrix such that `y_indicator[i, j] = 1` i.f.f. `classes_[j]`\n is in `y[i]`, and 0 otherwise. Sparse matrix will be of CSR\n format.\n \"\"\"\n self._cached_dict = None\n\n if self.classes is not None:\n return self.fit(y).transform(y)\n\n # Automatically increment on new class\n class_mapping = defaultdict(int)\n class_mapping.default_factory = class_mapping.__len__\n yt = self._transform(y, class_mapping)\n\n # sort classes and reorder columns\n tmp = sorted(class_mapping, key=class_mapping.get)\n\n # (make safe for tuples)\n dtype = int if all(isinstance(c, int) for c in tmp) else object\n class_mapping = np.empty(len(tmp), dtype=dtype)\n class_mapping[:] = tmp\n self.classes_, inverse = np.unique(class_mapping, return_inverse=True)\n # ensure yt.indices keeps its current dtype\n yt.indices = np.array(inverse[yt.indices], dtype=yt.indices.dtype,\n copy=False)\n\n if not self.sparse_output:\n yt = yt.toarray()\n\n return yt\n\n def transform(self, y):\n \"\"\"Transform the given label sets.\n\n Parameters\n ----------\n y : iterable of iterables\n A set of labels (any orderable and hashable object) for each\n sample. If the `classes` parameter is set, `y` will not be\n iterated.\n\n Returns\n -------\n y_indicator : array or CSR matrix, shape (n_samples, n_classes)\n A matrix such that `y_indicator[i, j] = 1` iff `classes_[j]` is in\n `y[i]`, and 0 otherwise.\n \"\"\"\n check_is_fitted(self)\n\n class_to_index = self._build_cache()\n yt = self._transform(y, class_to_index)\n\n if not self.sparse_output:\n yt = yt.toarray()\n\n return yt\n\n def _build_cache(self):\n if self._cached_dict is None:\n self._cached_dict = dict(zip(self.classes_,\n range(len(self.classes_))))\n\n return self._cached_dict\n\n def _transform(self, y, class_mapping):\n \"\"\"Transforms the label sets with a given mapping\n\n Parameters\n ----------\n y : iterable of iterables\n class_mapping : Mapping\n Maps from label to column index in label indicator matrix.\n\n Returns\n -------\n y_indicator : sparse matrix of shape (n_samples, n_classes)\n Label indicator matrix. Will be of CSR format.\n \"\"\"\n indices = array.array('i')\n indptr = array.array('i', [0])\n unknown = set()\n for labels in y:\n index = set()\n for label in labels:\n try:\n index.add(class_mapping[label])\n except KeyError:\n unknown.add(label)\n indices.extend(index)\n indptr.append(len(indices))\n if unknown:\n warnings.warn('unknown class(es) {0} will be ignored'\n .format(sorted(unknown, key=str)))\n data = np.ones(len(indices), dtype=int)\n\n return sp.csr_matrix((data, indices, indptr),\n shape=(len(indptr) - 1, len(class_mapping)))\n\n def inverse_transform(self, yt):\n \"\"\"Transform the given indicator matrix into label sets.\n\n Parameters\n ----------\n yt : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n A matrix containing only 1s ands 0s.\n\n Returns\n -------\n y : list of tuples\n The set of labels for each sample such that `y[i]` consists of\n `classes_[j]` for each `yt[i, j] == 1`.\n \"\"\"\n check_is_fitted(self)\n\n if yt.shape[1] != len(self.classes_):\n raise ValueError('Expected indicator for {0} classes, but got {1}'\n .format(len(self.classes_), yt.shape[1]))\n\n if sp.issparse(yt):\n yt = yt.tocsr()\n if len(yt.data) != 0 and len(np.setdiff1d(yt.data, [0, 1])) > 0:\n raise ValueError('Expected only 0s and 1s in label indicator.')\n return [tuple(self.classes_.take(yt.indices[start:end]))\n for start, end in zip(yt.indptr[:-1], yt.indptr[1:])]\n else:\n unexpected = np.setdiff1d(yt, [0, 1])\n if len(unexpected) > 0:\n raise ValueError('Expected only 0s and 1s in label indicator. '\n 'Also got {0}'.format(unexpected))\n return [tuple(self.classes_.compress(indicators)) for indicators\n in yt]\n\n def _more_tags(self):\n return {'X_types': ['2dlabels']}", + "instance_attributes": [ + { + "name": "sparse_output", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "classes_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.random_projection/BaseRandomProjection", + "name": "BaseRandomProjection", + "qname": "sklearn.random_projection.BaseRandomProjection", + "decorators": [], + "superclasses": ["TransformerMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.random_projection/BaseRandomProjection/__init__", + "scikit-learn/sklearn.random_projection/BaseRandomProjection/_make_random_matrix", + "scikit-learn/sklearn.random_projection/BaseRandomProjection/fit", + "scikit-learn/sklearn.random_projection/BaseRandomProjection/transform" + ], + "is_public": true, + "reexported_by": [], + "description": "Base class for random projections.\n\nWarning: This class should not be used directly.\nUse derived classes instead.", + "docstring": "Base class for random projections.\n\nWarning: This class should not be used directly.\nUse derived classes instead.", + "code": "class BaseRandomProjection(TransformerMixin, BaseEstimator, metaclass=ABCMeta):\n \"\"\"Base class for random projections.\n\n Warning: This class should not be used directly.\n Use derived classes instead.\n \"\"\"\n\n @abstractmethod\n def __init__(self, n_components='auto', *, eps=0.1, dense_output=False,\n random_state=None):\n self.n_components = n_components\n self.eps = eps\n self.dense_output = dense_output\n self.random_state = random_state\n\n @abstractmethod\n def _make_random_matrix(self, n_components, n_features):\n \"\"\"Generate the random projection matrix.\n\n Parameters\n ----------\n n_components : int,\n Dimensionality of the target projection space.\n\n n_features : int,\n Dimensionality of the original source space.\n\n Returns\n -------\n components : {ndarray, sparse matrix} of shape \\\n (n_components, n_features)\n The generated random matrix. Sparse matrix will be of CSR format.\n\n \"\"\"\n\n def fit(self, X, y=None):\n \"\"\"Generate a sparse random projection matrix.\n\n Parameters\n ----------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training set: only the shape is used to find optimal random\n matrix dimensions based on the theory referenced in the\n afore mentioned papers.\n\n y\n Ignored\n\n Returns\n -------\n self\n\n \"\"\"\n X = self._validate_data(X, accept_sparse=['csr', 'csc'])\n\n n_samples, n_features = X.shape\n\n if self.n_components == 'auto':\n self.n_components_ = johnson_lindenstrauss_min_dim(\n n_samples=n_samples, eps=self.eps)\n\n if self.n_components_ <= 0:\n raise ValueError(\n 'eps=%f and n_samples=%d lead to a target dimension of '\n '%d which is invalid' % (\n self.eps, n_samples, self.n_components_))\n\n elif self.n_components_ > n_features:\n raise ValueError(\n 'eps=%f and n_samples=%d lead to a target dimension of '\n '%d which is larger than the original space with '\n 'n_features=%d' % (self.eps, n_samples, self.n_components_,\n n_features))\n else:\n if self.n_components <= 0:\n raise ValueError(\"n_components must be greater than 0, got %s\"\n % self.n_components)\n\n elif self.n_components > n_features:\n warnings.warn(\n \"The number of components is higher than the number of\"\n \" features: n_features < n_components (%s < %s).\"\n \"The dimensionality of the problem will not be reduced.\"\n % (n_features, self.n_components),\n DataDimensionalityWarning)\n\n self.n_components_ = self.n_components\n\n # Generate a projection matrix of size [n_components, n_features]\n self.components_ = self._make_random_matrix(self.n_components_,\n n_features)\n\n # Check contract\n assert self.components_.shape == (self.n_components_, n_features), (\n 'An error has occurred the self.components_ matrix has '\n ' not the proper shape.')\n\n return self\n\n def transform(self, X):\n \"\"\"Project the data by using matrix product with the random matrix\n\n Parameters\n ----------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The input data to project into a smaller dimensional space.\n\n Returns\n -------\n X_new : {ndarray, sparse matrix} of shape (n_samples, n_components)\n Projected array.\n \"\"\"\n X = check_array(X, accept_sparse=['csr', 'csc'])\n\n check_is_fitted(self)\n\n if X.shape[1] != self.components_.shape[1]:\n raise ValueError(\n 'Impossible to perform projection:'\n 'X at fit stage had a different number of features. '\n '(%s != %s)' % (X.shape[1], self.components_.shape[1]))\n\n X_new = safe_sparse_dot(X, self.components_.T,\n dense_output=self.dense_output)\n return X_new", + "instance_attributes": [ + { + "name": "n_components", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "eps", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "dense_output", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.random_projection/GaussianRandomProjection", + "name": "GaussianRandomProjection", + "qname": "sklearn.random_projection.GaussianRandomProjection", + "decorators": [], + "superclasses": ["BaseRandomProjection"], + "methods": [ + "scikit-learn/sklearn.random_projection/GaussianRandomProjection/__init__", + "scikit-learn/sklearn.random_projection/GaussianRandomProjection/_make_random_matrix" + ], + "is_public": true, + "reexported_by": [], + "description": "Reduce dimensionality through Gaussian random projection.\n\nThe components of the random matrix are drawn from N(0, 1 / n_components).\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13", + "docstring": "Reduce dimensionality through Gaussian random projection.\n\nThe components of the random matrix are drawn from N(0, 1 / n_components).\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nn_components : int or 'auto', default='auto'\n Dimensionality of the target projection space.\n\n n_components can be automatically adjusted according to the\n number of samples in the dataset and the bound given by the\n Johnson-Lindenstrauss lemma. In that case the quality of the\n embedding is controlled by the ``eps`` parameter.\n\n It should be noted that Johnson-Lindenstrauss lemma can yield\n very conservative estimated of the required number of components\n as it makes no assumption on the structure of the dataset.\n\neps : float, default=0.1\n Parameter to control the quality of the embedding according to\n the Johnson-Lindenstrauss lemma when `n_components` is set to\n 'auto'. The value should be strictly positive.\n\n Smaller values lead to better embedding and higher number of\n dimensions (n_components) in the target projection space.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generator used to generate the\n projection matrix at fit time.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nn_components_ : int\n Concrete number of components computed when n_components=\"auto\".\n\ncomponents_ : ndarray of shape (n_components, n_features)\n Random matrix used for the projection.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.random_projection import GaussianRandomProjection\n>>> rng = np.random.RandomState(42)\n>>> X = rng.rand(100, 10000)\n>>> transformer = GaussianRandomProjection(random_state=rng)\n>>> X_new = transformer.fit_transform(X)\n>>> X_new.shape\n(100, 3947)\n\nSee Also\n--------\nSparseRandomProjection", + "code": "class GaussianRandomProjection(BaseRandomProjection):\n \"\"\"Reduce dimensionality through Gaussian random projection.\n\n The components of the random matrix are drawn from N(0, 1 / n_components).\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.13\n\n Parameters\n ----------\n n_components : int or 'auto', default='auto'\n Dimensionality of the target projection space.\n\n n_components can be automatically adjusted according to the\n number of samples in the dataset and the bound given by the\n Johnson-Lindenstrauss lemma. In that case the quality of the\n embedding is controlled by the ``eps`` parameter.\n\n It should be noted that Johnson-Lindenstrauss lemma can yield\n very conservative estimated of the required number of components\n as it makes no assumption on the structure of the dataset.\n\n eps : float, default=0.1\n Parameter to control the quality of the embedding according to\n the Johnson-Lindenstrauss lemma when `n_components` is set to\n 'auto'. The value should be strictly positive.\n\n Smaller values lead to better embedding and higher number of\n dimensions (n_components) in the target projection space.\n\n random_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generator used to generate the\n projection matrix at fit time.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Attributes\n ----------\n n_components_ : int\n Concrete number of components computed when n_components=\"auto\".\n\n components_ : ndarray of shape (n_components, n_features)\n Random matrix used for the projection.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.random_projection import GaussianRandomProjection\n >>> rng = np.random.RandomState(42)\n >>> X = rng.rand(100, 10000)\n >>> transformer = GaussianRandomProjection(random_state=rng)\n >>> X_new = transformer.fit_transform(X)\n >>> X_new.shape\n (100, 3947)\n\n See Also\n --------\n SparseRandomProjection\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_components='auto', *, eps=0.1, random_state=None):\n super().__init__(\n n_components=n_components,\n eps=eps,\n dense_output=True,\n random_state=random_state)\n\n def _make_random_matrix(self, n_components, n_features):\n \"\"\" Generate the random projection matrix.\n\n Parameters\n ----------\n n_components : int,\n Dimensionality of the target projection space.\n\n n_features : int,\n Dimensionality of the original source space.\n\n Returns\n -------\n components : {ndarray, sparse matrix} of shape \\\n (n_components, n_features)\n The generated random matrix. Sparse matrix will be of CSR format.\n\n \"\"\"\n random_state = check_random_state(self.random_state)\n return _gaussian_random_matrix(n_components,\n n_features,\n random_state=random_state)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.random_projection/SparseRandomProjection", + "name": "SparseRandomProjection", + "qname": "sklearn.random_projection.SparseRandomProjection", + "decorators": [], + "superclasses": ["BaseRandomProjection"], + "methods": [ + "scikit-learn/sklearn.random_projection/SparseRandomProjection/__init__", + "scikit-learn/sklearn.random_projection/SparseRandomProjection/_make_random_matrix" + ], + "is_public": true, + "reexported_by": [], + "description": "Reduce dimensionality through sparse random projection.\n\nSparse random matrix is an alternative to dense random\nprojection matrix that guarantees similar embedding quality while being\nmuch more memory efficient and allowing faster computation of the\nprojected data.\n\nIf we note `s = 1 / density` the components of the random matrix are\ndrawn from:\n\n - -sqrt(s) / sqrt(n_components) with probability 1 / 2s\n - 0 with probability 1 - 1 / s\n - +sqrt(s) / sqrt(n_components) with probability 1 / 2s\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13", + "docstring": "Reduce dimensionality through sparse random projection.\n\nSparse random matrix is an alternative to dense random\nprojection matrix that guarantees similar embedding quality while being\nmuch more memory efficient and allowing faster computation of the\nprojected data.\n\nIf we note `s = 1 / density` the components of the random matrix are\ndrawn from:\n\n - -sqrt(s) / sqrt(n_components) with probability 1 / 2s\n - 0 with probability 1 - 1 / s\n - +sqrt(s) / sqrt(n_components) with probability 1 / 2s\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nn_components : int or 'auto', default='auto'\n Dimensionality of the target projection space.\n\n n_components can be automatically adjusted according to the\n number of samples in the dataset and the bound given by the\n Johnson-Lindenstrauss lemma. In that case the quality of the\n embedding is controlled by the ``eps`` parameter.\n\n It should be noted that Johnson-Lindenstrauss lemma can yield\n very conservative estimated of the required number of components\n as it makes no assumption on the structure of the dataset.\n\ndensity : float or 'auto', default='auto'\n Ratio in the range (0, 1] of non-zero component in the random\n projection matrix.\n\n If density = 'auto', the value is set to the minimum density\n as recommended by Ping Li et al.: 1 / sqrt(n_features).\n\n Use density = 1 / 3.0 if you want to reproduce the results from\n Achlioptas, 2001.\n\neps : float, default=0.1\n Parameter to control the quality of the embedding according to\n the Johnson-Lindenstrauss lemma when n_components is set to\n 'auto'. This value should be strictly positive.\n\n Smaller values lead to better embedding and higher number of\n dimensions (n_components) in the target projection space.\n\ndense_output : bool, default=False\n If True, ensure that the output of the random projection is a\n dense numpy array even if the input and random projection matrix\n are both sparse. In practice, if the number of components is\n small the number of zero components in the projected data will\n be very small and it will be more CPU and memory efficient to\n use a dense representation.\n\n If False, the projected data uses a sparse representation if\n the input is sparse.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generator used to generate the\n projection matrix at fit time.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nn_components_ : int\n Concrete number of components computed when n_components=\"auto\".\n\ncomponents_ : sparse matrix of shape (n_components, n_features)\n Random matrix used for the projection. Sparse matrix will be of CSR\n format.\n\ndensity_ : float in range 0.0 - 1.0\n Concrete density computed from when density = \"auto\".\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.random_projection import SparseRandomProjection\n>>> rng = np.random.RandomState(42)\n>>> X = rng.rand(100, 10000)\n>>> transformer = SparseRandomProjection(random_state=rng)\n>>> X_new = transformer.fit_transform(X)\n>>> X_new.shape\n(100, 3947)\n>>> # very few components are non-zero\n>>> np.mean(transformer.components_ != 0)\n0.0100...\n\nSee Also\n--------\nGaussianRandomProjection\n\nReferences\n----------\n\n.. [1] Ping Li, T. Hastie and K. W. Church, 2006,\n \"Very Sparse Random Projections\".\n https://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf\n\n.. [2] D. Achlioptas, 2001, \"Database-friendly random projections\",\n https://users.soe.ucsc.edu/~optas/papers/jl.pdf", + "code": "class SparseRandomProjection(BaseRandomProjection):\n \"\"\"Reduce dimensionality through sparse random projection.\n\n Sparse random matrix is an alternative to dense random\n projection matrix that guarantees similar embedding quality while being\n much more memory efficient and allowing faster computation of the\n projected data.\n\n If we note `s = 1 / density` the components of the random matrix are\n drawn from:\n\n - -sqrt(s) / sqrt(n_components) with probability 1 / 2s\n - 0 with probability 1 - 1 / s\n - +sqrt(s) / sqrt(n_components) with probability 1 / 2s\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.13\n\n Parameters\n ----------\n n_components : int or 'auto', default='auto'\n Dimensionality of the target projection space.\n\n n_components can be automatically adjusted according to the\n number of samples in the dataset and the bound given by the\n Johnson-Lindenstrauss lemma. In that case the quality of the\n embedding is controlled by the ``eps`` parameter.\n\n It should be noted that Johnson-Lindenstrauss lemma can yield\n very conservative estimated of the required number of components\n as it makes no assumption on the structure of the dataset.\n\n density : float or 'auto', default='auto'\n Ratio in the range (0, 1] of non-zero component in the random\n projection matrix.\n\n If density = 'auto', the value is set to the minimum density\n as recommended by Ping Li et al.: 1 / sqrt(n_features).\n\n Use density = 1 / 3.0 if you want to reproduce the results from\n Achlioptas, 2001.\n\n eps : float, default=0.1\n Parameter to control the quality of the embedding according to\n the Johnson-Lindenstrauss lemma when n_components is set to\n 'auto'. This value should be strictly positive.\n\n Smaller values lead to better embedding and higher number of\n dimensions (n_components) in the target projection space.\n\n dense_output : bool, default=False\n If True, ensure that the output of the random projection is a\n dense numpy array even if the input and random projection matrix\n are both sparse. In practice, if the number of components is\n small the number of zero components in the projected data will\n be very small and it will be more CPU and memory efficient to\n use a dense representation.\n\n If False, the projected data uses a sparse representation if\n the input is sparse.\n\n random_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generator used to generate the\n projection matrix at fit time.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Attributes\n ----------\n n_components_ : int\n Concrete number of components computed when n_components=\"auto\".\n\n components_ : sparse matrix of shape (n_components, n_features)\n Random matrix used for the projection. Sparse matrix will be of CSR\n format.\n\n density_ : float in range 0.0 - 1.0\n Concrete density computed from when density = \"auto\".\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.random_projection import SparseRandomProjection\n >>> rng = np.random.RandomState(42)\n >>> X = rng.rand(100, 10000)\n >>> transformer = SparseRandomProjection(random_state=rng)\n >>> X_new = transformer.fit_transform(X)\n >>> X_new.shape\n (100, 3947)\n >>> # very few components are non-zero\n >>> np.mean(transformer.components_ != 0)\n 0.0100...\n\n See Also\n --------\n GaussianRandomProjection\n\n References\n ----------\n\n .. [1] Ping Li, T. Hastie and K. W. Church, 2006,\n \"Very Sparse Random Projections\".\n https://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf\n\n .. [2] D. Achlioptas, 2001, \"Database-friendly random projections\",\n https://users.soe.ucsc.edu/~optas/papers/jl.pdf\n\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, n_components='auto', *, density='auto', eps=0.1,\n dense_output=False, random_state=None):\n super().__init__(\n n_components=n_components,\n eps=eps,\n dense_output=dense_output,\n random_state=random_state)\n\n self.density = density\n\n def _make_random_matrix(self, n_components, n_features):\n \"\"\" Generate the random projection matrix\n\n Parameters\n ----------\n n_components : int\n Dimensionality of the target projection space.\n\n n_features : int\n Dimensionality of the original source space.\n\n Returns\n -------\n components : {ndarray, sparse matrix} of shape \\\n (n_components, n_features)\n The generated random matrix. Sparse matrix will be of CSR format.\n\n \"\"\"\n random_state = check_random_state(self.random_state)\n self.density_ = _check_density(self.density, n_features)\n return _sparse_random_matrix(n_components,\n n_features,\n density=self.density_,\n random_state=random_state)", + "instance_attributes": [ + { + "name": "density", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "density_", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation", + "name": "BaseLabelPropagation", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation", + "decorators": [], + "superclasses": ["ClassifierMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/__init__", + "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/_get_kernel", + "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/_build_graph", + "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/predict", + "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/predict_proba", + "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for label propagation module.", + "docstring": "Base class for label propagation module.\n\n Parameters\n ----------\n kernel : {'knn', 'rbf'} or callable, default='rbf'\n String identifier for kernel function to use or the kernel function\n itself. Only 'rbf' and 'knn' strings are valid inputs. The function\n passed should take two inputs, each of shape (n_samples, n_features),\n and return a (n_samples, n_samples) shaped weight matrix.\n\n gamma : float, default=20\n Parameter for rbf kernel.\n\n n_neighbors : int, default=7\n Parameter for knn kernel. Need to be strictly positive.\n\n alpha : float, default=1.0\n Clamping factor.\n\n max_iter : int, default=30\n Change maximum number of iterations allowed.\n\n tol : float, default=1e-3\n Convergence tolerance: threshold to consider the system at steady\n state.\n\nn_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n ", + "code": "class BaseLabelPropagation(ClassifierMixin, BaseEstimator, metaclass=ABCMeta):\n \"\"\"Base class for label propagation module.\n\n Parameters\n ----------\n kernel : {'knn', 'rbf'} or callable, default='rbf'\n String identifier for kernel function to use or the kernel function\n itself. Only 'rbf' and 'knn' strings are valid inputs. The function\n passed should take two inputs, each of shape (n_samples, n_features),\n and return a (n_samples, n_samples) shaped weight matrix.\n\n gamma : float, default=20\n Parameter for rbf kernel.\n\n n_neighbors : int, default=7\n Parameter for knn kernel. Need to be strictly positive.\n\n alpha : float, default=1.0\n Clamping factor.\n\n max_iter : int, default=30\n Change maximum number of iterations allowed.\n\n tol : float, default=1e-3\n Convergence tolerance: threshold to consider the system at steady\n state.\n\n n_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, kernel='rbf', *, gamma=20, n_neighbors=7,\n alpha=1, max_iter=30, tol=1e-3, n_jobs=None):\n\n self.max_iter = max_iter\n self.tol = tol\n\n # kernel parameters\n self.kernel = kernel\n self.gamma = gamma\n self.n_neighbors = n_neighbors\n\n # clamping factor\n self.alpha = alpha\n\n self.n_jobs = n_jobs\n\n def _get_kernel(self, X, y=None):\n if self.kernel == \"rbf\":\n if y is None:\n return rbf_kernel(X, X, gamma=self.gamma)\n else:\n return rbf_kernel(X, y, gamma=self.gamma)\n elif self.kernel == \"knn\":\n if self.nn_fit is None:\n self.nn_fit = NearestNeighbors(n_neighbors=self.n_neighbors,\n n_jobs=self.n_jobs).fit(X)\n if y is None:\n return self.nn_fit.kneighbors_graph(self.nn_fit._fit_X,\n self.n_neighbors,\n mode='connectivity')\n else:\n return self.nn_fit.kneighbors(y, return_distance=False)\n elif callable(self.kernel):\n if y is None:\n return self.kernel(X, X)\n else:\n return self.kernel(X, y)\n else:\n raise ValueError(\"%s is not a valid kernel. Only rbf and knn\"\n \" or an explicit function \"\n \" are supported at this time.\" % self.kernel)\n\n @abstractmethod\n def _build_graph(self):\n raise NotImplementedError(\"Graph construction must be implemented\"\n \" to fit a label propagation model.\")\n\n def predict(self, X):\n \"\"\"Performs inductive inference across the model.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data matrix.\n\n Returns\n -------\n y : ndarray of shape (n_samples,)\n Predictions for input data.\n \"\"\"\n probas = self.predict_proba(X)\n return self.classes_[np.argmax(probas, axis=1)].ravel()\n\n def predict_proba(self, X):\n \"\"\"Predict probability for each possible outcome.\n\n Compute the probability estimates for each single sample in X\n and each possible outcome seen during training (categorical\n distribution).\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data matrix.\n\n Returns\n -------\n probabilities : ndarray of shape (n_samples, n_classes)\n Normalized probability distributions across\n class labels.\n \"\"\"\n check_is_fitted(self)\n\n X_2d = check_array(X, accept_sparse=['csc', 'csr', 'coo', 'dok',\n 'bsr', 'lil', 'dia'])\n weight_matrices = self._get_kernel(self.X_, X_2d)\n if self.kernel == 'knn':\n probabilities = np.array([\n np.sum(self.label_distributions_[weight_matrix], axis=0)\n for weight_matrix in weight_matrices])\n else:\n weight_matrices = weight_matrices.T\n probabilities = safe_sparse_dot(\n weight_matrices, self.label_distributions_)\n normalizer = np.atleast_2d(np.sum(probabilities, axis=1)).T\n probabilities /= normalizer\n return probabilities\n\n def fit(self, X, y):\n \"\"\"Fit a semi-supervised label propagation model based\n\n All the input data is provided matrix X (labeled and unlabeled)\n and corresponding label matrix y with a dedicated marker value for\n unlabeled samples.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n A matrix of shape (n_samples, n_samples) will be created from this.\n\n y : array-like of shape (n_samples,)\n `n_labeled_samples` (unlabeled points are marked as -1)\n All unlabeled samples will be transductively assigned labels.\n\n Returns\n -------\n self : object\n \"\"\"\n X, y = self._validate_data(X, y)\n self.X_ = X\n check_classification_targets(y)\n\n # actual graph construction (implementations should override this)\n graph_matrix = self._build_graph()\n\n # label construction\n # construct a categorical distribution for classification only\n classes = np.unique(y)\n classes = (classes[classes != -1])\n self.classes_ = classes\n\n n_samples, n_classes = len(y), len(classes)\n\n alpha = self.alpha\n if self._variant == 'spreading' and \\\n (alpha is None or alpha <= 0.0 or alpha >= 1.0):\n raise ValueError('alpha=%s is invalid: it must be inside '\n 'the open interval (0, 1)' % alpha)\n y = np.asarray(y)\n unlabeled = y == -1\n\n # initialize distributions\n self.label_distributions_ = np.zeros((n_samples, n_classes))\n for label in classes:\n self.label_distributions_[y == label, classes == label] = 1\n\n y_static = np.copy(self.label_distributions_)\n if self._variant == 'propagation':\n # LabelPropagation\n y_static[unlabeled] = 0\n else:\n # LabelSpreading\n y_static *= 1 - alpha\n\n l_previous = np.zeros((self.X_.shape[0], n_classes))\n\n unlabeled = unlabeled[:, np.newaxis]\n if sparse.isspmatrix(graph_matrix):\n graph_matrix = graph_matrix.tocsr()\n\n for self.n_iter_ in range(self.max_iter):\n if np.abs(self.label_distributions_ - l_previous).sum() < self.tol:\n break\n\n l_previous = self.label_distributions_\n self.label_distributions_ = safe_sparse_dot(\n graph_matrix, self.label_distributions_)\n\n if self._variant == 'propagation':\n normalizer = np.sum(\n self.label_distributions_, axis=1)[:, np.newaxis]\n self.label_distributions_ /= normalizer\n self.label_distributions_ = np.where(unlabeled,\n self.label_distributions_,\n y_static)\n else:\n # clamp\n self.label_distributions_ = np.multiply(\n alpha, self.label_distributions_) + y_static\n else:\n warnings.warn(\n 'max_iter=%d was reached without convergence.' % self.max_iter,\n category=ConvergenceWarning\n )\n self.n_iter_ += 1\n\n normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis]\n normalizer[normalizer == 0] = 1\n self.label_distributions_ /= normalizer\n\n # set the transduction item\n transduction = self.classes_[np.argmax(self.label_distributions_,\n axis=1)]\n self.transduction_ = transduction.ravel()\n return self", + "instance_attributes": [ + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "kernel", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "gamma", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "n_neighbors", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "alpha", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "nn_fit", + "types": { + "kind": "NamedType", + "name": "NearestNeighbors" + } + }, + { + "name": "label_distributions_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelPropagation", + "name": "LabelPropagation", + "qname": "sklearn.semi_supervised._label_propagation.LabelPropagation", + "decorators": [], + "superclasses": ["BaseLabelPropagation"], + "methods": [ + "scikit-learn/sklearn.semi_supervised._label_propagation/LabelPropagation/__init__", + "scikit-learn/sklearn.semi_supervised._label_propagation/LabelPropagation/_build_graph", + "scikit-learn/sklearn.semi_supervised._label_propagation/LabelPropagation/fit" + ], + "is_public": false, + "reexported_by": [], + "description": "Label Propagation classifier\n\nRead more in the :ref:`User Guide `.", + "docstring": "Label Propagation classifier\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nkernel : {'knn', 'rbf'} or callable, default='rbf'\n String identifier for kernel function to use or the kernel function\n itself. Only 'rbf' and 'knn' strings are valid inputs. The function\n passed should take two inputs, each of shape (n_samples, n_features),\n and return a (n_samples, n_samples) shaped weight matrix.\n\ngamma : float, default=20\n Parameter for rbf kernel.\n\nn_neighbors : int, default=7\n Parameter for knn kernel which need to be strictly positive.\n\nmax_iter : int, default=1000\n Change maximum number of iterations allowed.\n\ntol : float, 1e-3\n Convergence tolerance: threshold to consider the system at steady\n state.\n\nn_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nX_ : ndarray of shape (n_samples, n_features)\n Input array.\n\nclasses_ : ndarray of shape (n_classes,)\n The distinct labels used in classifying instances.\n\nlabel_distributions_ : ndarray of shape (n_samples, n_classes)\n Categorical distribution for each item.\n\ntransduction_ : ndarray of shape (n_samples)\n Label assigned to each item via the transduction.\n\nn_iter_ : int\n Number of iterations run.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import datasets\n>>> from sklearn.semi_supervised import LabelPropagation\n>>> label_prop_model = LabelPropagation()\n>>> iris = datasets.load_iris()\n>>> rng = np.random.RandomState(42)\n>>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3\n>>> labels = np.copy(iris.target)\n>>> labels[random_unlabeled_points] = -1\n>>> label_prop_model.fit(iris.data, labels)\nLabelPropagation(...)\n\nReferences\n----------\nXiaojin Zhu and Zoubin Ghahramani. Learning from labeled and unlabeled data\nwith label propagation. Technical Report CMU-CALD-02-107, Carnegie Mellon\nUniversity, 2002 http://pages.cs.wisc.edu/~jerryzhu/pub/CMU-CALD-02-107.pdf\n\nSee Also\n--------\nLabelSpreading : Alternate label propagation strategy more robust to noise.", + "code": "class LabelPropagation(BaseLabelPropagation):\n \"\"\"Label Propagation classifier\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n kernel : {'knn', 'rbf'} or callable, default='rbf'\n String identifier for kernel function to use or the kernel function\n itself. Only 'rbf' and 'knn' strings are valid inputs. The function\n passed should take two inputs, each of shape (n_samples, n_features),\n and return a (n_samples, n_samples) shaped weight matrix.\n\n gamma : float, default=20\n Parameter for rbf kernel.\n\n n_neighbors : int, default=7\n Parameter for knn kernel which need to be strictly positive.\n\n max_iter : int, default=1000\n Change maximum number of iterations allowed.\n\n tol : float, 1e-3\n Convergence tolerance: threshold to consider the system at steady\n state.\n\n n_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n Attributes\n ----------\n X_ : ndarray of shape (n_samples, n_features)\n Input array.\n\n classes_ : ndarray of shape (n_classes,)\n The distinct labels used in classifying instances.\n\n label_distributions_ : ndarray of shape (n_samples, n_classes)\n Categorical distribution for each item.\n\n transduction_ : ndarray of shape (n_samples)\n Label assigned to each item via the transduction.\n\n n_iter_ : int\n Number of iterations run.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn import datasets\n >>> from sklearn.semi_supervised import LabelPropagation\n >>> label_prop_model = LabelPropagation()\n >>> iris = datasets.load_iris()\n >>> rng = np.random.RandomState(42)\n >>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3\n >>> labels = np.copy(iris.target)\n >>> labels[random_unlabeled_points] = -1\n >>> label_prop_model.fit(iris.data, labels)\n LabelPropagation(...)\n\n References\n ----------\n Xiaojin Zhu and Zoubin Ghahramani. Learning from labeled and unlabeled data\n with label propagation. Technical Report CMU-CALD-02-107, Carnegie Mellon\n University, 2002 http://pages.cs.wisc.edu/~jerryzhu/pub/CMU-CALD-02-107.pdf\n\n See Also\n --------\n LabelSpreading : Alternate label propagation strategy more robust to noise.\n \"\"\"\n\n _variant = 'propagation'\n\n @_deprecate_positional_args\n def __init__(self, kernel='rbf', *, gamma=20, n_neighbors=7,\n max_iter=1000, tol=1e-3, n_jobs=None):\n super().__init__(kernel=kernel, gamma=gamma,\n n_neighbors=n_neighbors, max_iter=max_iter,\n tol=tol, n_jobs=n_jobs, alpha=None)\n\n def _build_graph(self):\n \"\"\"Matrix representing a fully connected graph between each sample\n\n This basic implementation creates a non-stochastic affinity matrix, so\n class distributions will exceed 1 (normalization may be desired).\n \"\"\"\n if self.kernel == 'knn':\n self.nn_fit = None\n affinity_matrix = self._get_kernel(self.X_)\n normalizer = affinity_matrix.sum(axis=0)\n if sparse.isspmatrix(affinity_matrix):\n affinity_matrix.data /= np.diag(np.array(normalizer))\n else:\n affinity_matrix /= normalizer[:, np.newaxis]\n return affinity_matrix\n\n def fit(self, X, y):\n return super().fit(X, y)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelSpreading", + "name": "LabelSpreading", + "qname": "sklearn.semi_supervised._label_propagation.LabelSpreading", + "decorators": [], + "superclasses": ["BaseLabelPropagation"], + "methods": [ + "scikit-learn/sklearn.semi_supervised._label_propagation/LabelSpreading/__init__", + "scikit-learn/sklearn.semi_supervised._label_propagation/LabelSpreading/_build_graph" + ], + "is_public": false, + "reexported_by": [], + "description": "LabelSpreading model for semi-supervised learning\n\nThis model is similar to the basic Label Propagation algorithm,\nbut uses affinity matrix based on the normalized graph Laplacian\nand soft clamping across the labels.\n\nRead more in the :ref:`User Guide `.", + "docstring": "LabelSpreading model for semi-supervised learning\n\nThis model is similar to the basic Label Propagation algorithm,\nbut uses affinity matrix based on the normalized graph Laplacian\nand soft clamping across the labels.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nkernel : {'knn', 'rbf'} or callable, default='rbf'\n String identifier for kernel function to use or the kernel function\n itself. Only 'rbf' and 'knn' strings are valid inputs. The function\n passed should take two inputs, each of shape (n_samples, n_features),\n and return a (n_samples, n_samples) shaped weight matrix.\n\ngamma : float, default=20\n Parameter for rbf kernel.\n\nn_neighbors : int, default=7\n Parameter for knn kernel which is a strictly positive integer.\n\nalpha : float, default=0.2\n Clamping factor. A value in (0, 1) that specifies the relative amount\n that an instance should adopt the information from its neighbors as\n opposed to its initial label.\n alpha=0 means keeping the initial label information; alpha=1 means\n replacing all initial information.\n\nmax_iter : int, default=30\n Maximum number of iterations allowed.\n\ntol : float, default=1e-3\n Convergence tolerance: threshold to consider the system at steady\n state.\n\nn_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nX_ : ndarray of shape (n_samples, n_features)\n Input array.\n\nclasses_ : ndarray of shape (n_classes,)\n The distinct labels used in classifying instances.\n\nlabel_distributions_ : ndarray of shape (n_samples, n_classes)\n Categorical distribution for each item.\n\ntransduction_ : ndarray of shape (n_samples,)\n Label assigned to each item via the transduction.\n\nn_iter_ : int\n Number of iterations run.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import datasets\n>>> from sklearn.semi_supervised import LabelSpreading\n>>> label_prop_model = LabelSpreading()\n>>> iris = datasets.load_iris()\n>>> rng = np.random.RandomState(42)\n>>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3\n>>> labels = np.copy(iris.target)\n>>> labels[random_unlabeled_points] = -1\n>>> label_prop_model.fit(iris.data, labels)\nLabelSpreading(...)\n\nReferences\n----------\nDengyong Zhou, Olivier Bousquet, Thomas Navin Lal, Jason Weston,\nBernhard Schoelkopf. Learning with local and global consistency (2004)\nhttp://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.115.3219\n\nSee Also\n--------\nLabelPropagation : Unregularized graph based semi-supervised learning.", + "code": "class LabelSpreading(BaseLabelPropagation):\n \"\"\"LabelSpreading model for semi-supervised learning\n\n This model is similar to the basic Label Propagation algorithm,\n but uses affinity matrix based on the normalized graph Laplacian\n and soft clamping across the labels.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n kernel : {'knn', 'rbf'} or callable, default='rbf'\n String identifier for kernel function to use or the kernel function\n itself. Only 'rbf' and 'knn' strings are valid inputs. The function\n passed should take two inputs, each of shape (n_samples, n_features),\n and return a (n_samples, n_samples) shaped weight matrix.\n\n gamma : float, default=20\n Parameter for rbf kernel.\n\n n_neighbors : int, default=7\n Parameter for knn kernel which is a strictly positive integer.\n\n alpha : float, default=0.2\n Clamping factor. A value in (0, 1) that specifies the relative amount\n that an instance should adopt the information from its neighbors as\n opposed to its initial label.\n alpha=0 means keeping the initial label information; alpha=1 means\n replacing all initial information.\n\n max_iter : int, default=30\n Maximum number of iterations allowed.\n\n tol : float, default=1e-3\n Convergence tolerance: threshold to consider the system at steady\n state.\n\n n_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n Attributes\n ----------\n X_ : ndarray of shape (n_samples, n_features)\n Input array.\n\n classes_ : ndarray of shape (n_classes,)\n The distinct labels used in classifying instances.\n\n label_distributions_ : ndarray of shape (n_samples, n_classes)\n Categorical distribution for each item.\n\n transduction_ : ndarray of shape (n_samples,)\n Label assigned to each item via the transduction.\n\n n_iter_ : int\n Number of iterations run.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn import datasets\n >>> from sklearn.semi_supervised import LabelSpreading\n >>> label_prop_model = LabelSpreading()\n >>> iris = datasets.load_iris()\n >>> rng = np.random.RandomState(42)\n >>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3\n >>> labels = np.copy(iris.target)\n >>> labels[random_unlabeled_points] = -1\n >>> label_prop_model.fit(iris.data, labels)\n LabelSpreading(...)\n\n References\n ----------\n Dengyong Zhou, Olivier Bousquet, Thomas Navin Lal, Jason Weston,\n Bernhard Schoelkopf. Learning with local and global consistency (2004)\n http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.115.3219\n\n See Also\n --------\n LabelPropagation : Unregularized graph based semi-supervised learning.\n \"\"\"\n\n _variant = 'spreading'\n\n @_deprecate_positional_args\n def __init__(self, kernel='rbf', *, gamma=20, n_neighbors=7, alpha=0.2,\n max_iter=30, tol=1e-3, n_jobs=None):\n\n # this one has different base parameters\n super().__init__(kernel=kernel, gamma=gamma,\n n_neighbors=n_neighbors, alpha=alpha,\n max_iter=max_iter, tol=tol, n_jobs=n_jobs)\n\n def _build_graph(self):\n \"\"\"Graph matrix for Label Spreading computes the graph laplacian\"\"\"\n # compute affinity matrix (or gram matrix)\n if self.kernel == 'knn':\n self.nn_fit = None\n n_samples = self.X_.shape[0]\n affinity_matrix = self._get_kernel(self.X_)\n laplacian = csgraph.laplacian(affinity_matrix, normed=True)\n laplacian = -laplacian\n if sparse.isspmatrix(laplacian):\n diag_mask = (laplacian.row == laplacian.col)\n laplacian.data[diag_mask] = 0.0\n else:\n laplacian.flat[::n_samples + 1] = 0.0 # set diag to 0.0\n return laplacian", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier", + "name": "SelfTrainingClassifier", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier", + "decorators": [], + "superclasses": ["MetaEstimatorMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/__init__", + "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/fit", + "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/predict", + "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/predict_proba", + "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/decision_function", + "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/predict_log_proba", + "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/score" + ], + "is_public": false, + "reexported_by": [], + "description": "Self-training classifier.\n\nThis class allows a given supervised classifier to function as a\nsemi-supervised classifier, allowing it to learn from unlabeled data. It\ndoes this by iteratively predicting pseudo-labels for the unlabeled data\nand adding them to the training set.\n\nThe classifier will continue iterating until either max_iter is reached, or\nno pseudo-labels were added to the training set in the previous iteration.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Self-training classifier.\n\nThis class allows a given supervised classifier to function as a\nsemi-supervised classifier, allowing it to learn from unlabeled data. It\ndoes this by iteratively predicting pseudo-labels for the unlabeled data\nand adding them to the training set.\n\nThe classifier will continue iterating until either max_iter is reached, or\nno pseudo-labels were added to the training set in the previous iteration.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nbase_estimator : estimator object\n An estimator object implementing ``fit`` and ``predict_proba``.\n Invoking the ``fit`` method will fit a clone of the passed estimator,\n which will be stored in the ``base_estimator_`` attribute.\n\nthreshold : float, default=0.75\n The decision threshold for use with `criterion='threshold'`.\n Should be in [0, 1). When using the 'threshold' criterion, a\n :ref:`well calibrated classifier ` should be used.\n\ncriterion : {'threshold', 'k_best'}, default='threshold'\n The selection criterion used to select which labels to add to the\n training set. If 'threshold', pseudo-labels with prediction\n probabilities above `threshold` are added to the dataset. If 'k_best',\n the `k_best` pseudo-labels with highest prediction probabilities are\n added to the dataset. When using the 'threshold' criterion, a\n :ref:`well calibrated classifier ` should be used.\n\nk_best : int, default=10\n The amount of samples to add in each iteration. Only used when\n `criterion` is k_best'.\n\nmax_iter : int or None, default=10\n Maximum number of iterations allowed. Should be greater than or equal\n to 0. If it is ``None``, the classifier will continue to predict labels\n until no new pseudo-labels are added, or all unlabeled samples have\n been labeled.\n\nverbose : bool, default=False\n Enable verbose output.\n\nAttributes\n----------\nbase_estimator_ : estimator object\n The fitted estimator.\n\nclasses_ : ndarray or list of ndarray of shape (n_classes,)\n Class labels for each output. (Taken from the trained\n ``base_estimator_``).\n\ntransduction_ : ndarray of shape (n_samples,)\n The labels used for the final fit of the classifier, including\n pseudo-labels added during fit.\n\nlabeled_iter_ : ndarray of shape (n_samples,)\n The iteration in which each sample was labeled. When a sample has\n iteration 0, the sample was already labeled in the original dataset.\n When a sample has iteration -1, the sample was not labeled in any\n iteration.\n\nn_iter_ : int\n The number of rounds of self-training, that is the number of times the\n base estimator is fitted on relabeled variants of the training set.\n\ntermination_condition_ : {'max_iter', 'no_change', 'all_labeled'}\n The reason that fitting was stopped.\n\n - 'max_iter': `n_iter_` reached `max_iter`.\n - 'no_change': no new labels were predicted.\n - 'all_labeled': all unlabeled samples were labeled before `max_iter`\n was reached.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import datasets\n>>> from sklearn.semi_supervised import SelfTrainingClassifier\n>>> from sklearn.svm import SVC\n>>> rng = np.random.RandomState(42)\n>>> iris = datasets.load_iris()\n>>> random_unlabeled_points = rng.rand(iris.target.shape[0]) < 0.3\n>>> iris.target[random_unlabeled_points] = -1\n>>> svc = SVC(probability=True, gamma=\"auto\")\n>>> self_training_model = SelfTrainingClassifier(svc)\n>>> self_training_model.fit(iris.data, iris.target)\nSelfTrainingClassifier(...)\n\nReferences\n----------\nDavid Yarowsky. 1995. Unsupervised word sense disambiguation rivaling\nsupervised methods. In Proceedings of the 33rd annual meeting on\nAssociation for Computational Linguistics (ACL '95). Association for\nComputational Linguistics, Stroudsburg, PA, USA, 189-196. DOI:\nhttps://doi.org/10.3115/981658.981684", + "code": "class SelfTrainingClassifier(MetaEstimatorMixin, BaseEstimator):\n \"\"\"Self-training classifier.\n\n This class allows a given supervised classifier to function as a\n semi-supervised classifier, allowing it to learn from unlabeled data. It\n does this by iteratively predicting pseudo-labels for the unlabeled data\n and adding them to the training set.\n\n The classifier will continue iterating until either max_iter is reached, or\n no pseudo-labels were added to the training set in the previous iteration.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n base_estimator : estimator object\n An estimator object implementing ``fit`` and ``predict_proba``.\n Invoking the ``fit`` method will fit a clone of the passed estimator,\n which will be stored in the ``base_estimator_`` attribute.\n\n threshold : float, default=0.75\n The decision threshold for use with `criterion='threshold'`.\n Should be in [0, 1). When using the 'threshold' criterion, a\n :ref:`well calibrated classifier ` should be used.\n\n criterion : {'threshold', 'k_best'}, default='threshold'\n The selection criterion used to select which labels to add to the\n training set. If 'threshold', pseudo-labels with prediction\n probabilities above `threshold` are added to the dataset. If 'k_best',\n the `k_best` pseudo-labels with highest prediction probabilities are\n added to the dataset. When using the 'threshold' criterion, a\n :ref:`well calibrated classifier ` should be used.\n\n k_best : int, default=10\n The amount of samples to add in each iteration. Only used when\n `criterion` is k_best'.\n\n max_iter : int or None, default=10\n Maximum number of iterations allowed. Should be greater than or equal\n to 0. If it is ``None``, the classifier will continue to predict labels\n until no new pseudo-labels are added, or all unlabeled samples have\n been labeled.\n\n verbose : bool, default=False\n Enable verbose output.\n\n Attributes\n ----------\n base_estimator_ : estimator object\n The fitted estimator.\n\n classes_ : ndarray or list of ndarray of shape (n_classes,)\n Class labels for each output. (Taken from the trained\n ``base_estimator_``).\n\n transduction_ : ndarray of shape (n_samples,)\n The labels used for the final fit of the classifier, including\n pseudo-labels added during fit.\n\n labeled_iter_ : ndarray of shape (n_samples,)\n The iteration in which each sample was labeled. When a sample has\n iteration 0, the sample was already labeled in the original dataset.\n When a sample has iteration -1, the sample was not labeled in any\n iteration.\n\n n_iter_ : int\n The number of rounds of self-training, that is the number of times the\n base estimator is fitted on relabeled variants of the training set.\n\n termination_condition_ : {'max_iter', 'no_change', 'all_labeled'}\n The reason that fitting was stopped.\n\n - 'max_iter': `n_iter_` reached `max_iter`.\n - 'no_change': no new labels were predicted.\n - 'all_labeled': all unlabeled samples were labeled before `max_iter`\n was reached.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn import datasets\n >>> from sklearn.semi_supervised import SelfTrainingClassifier\n >>> from sklearn.svm import SVC\n >>> rng = np.random.RandomState(42)\n >>> iris = datasets.load_iris()\n >>> random_unlabeled_points = rng.rand(iris.target.shape[0]) < 0.3\n >>> iris.target[random_unlabeled_points] = -1\n >>> svc = SVC(probability=True, gamma=\"auto\")\n >>> self_training_model = SelfTrainingClassifier(svc)\n >>> self_training_model.fit(iris.data, iris.target)\n SelfTrainingClassifier(...)\n\n References\n ----------\n David Yarowsky. 1995. Unsupervised word sense disambiguation rivaling\n supervised methods. In Proceedings of the 33rd annual meeting on\n Association for Computational Linguistics (ACL '95). Association for\n Computational Linguistics, Stroudsburg, PA, USA, 189-196. DOI:\n https://doi.org/10.3115/981658.981684\n \"\"\"\n _estimator_type = \"classifier\"\n\n def __init__(self,\n base_estimator,\n threshold=0.75,\n criterion='threshold',\n k_best=10,\n max_iter=10,\n verbose=False):\n self.base_estimator = base_estimator\n self.threshold = threshold\n self.criterion = criterion\n self.k_best = k_best\n self.max_iter = max_iter\n self.verbose = verbose\n\n def fit(self, X, y):\n \"\"\"\n Fits this ``SelfTrainingClassifier`` to a dataset.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\n y : {array-like, sparse matrix} of shape (n_samples,)\n Array representing the labels. Unlabeled samples should have the\n label -1.\n\n Returns\n -------\n self : object\n Returns an instance of self.\n \"\"\"\n # we need row slicing support for sparce matrices\n X, y = self._validate_data(X, y, accept_sparse=[\n 'csr', 'csc', 'lil', 'dok'])\n\n if self.base_estimator is None:\n raise ValueError(\"base_estimator cannot be None!\")\n\n self.base_estimator_ = clone(self.base_estimator)\n\n if self.max_iter is not None and self.max_iter < 0:\n raise ValueError(\"max_iter must be >= 0 or None,\"\n f\" got {self.max_iter}\")\n\n if not (0 <= self.threshold < 1):\n raise ValueError(\"threshold must be in [0,1),\"\n f\" got {self.threshold}\")\n\n if self.criterion not in ['threshold', 'k_best']:\n raise ValueError(f\"criterion must be either 'threshold' \"\n f\"or 'k_best', got {self.criterion}.\")\n\n if y.dtype.kind in ['U', 'S']:\n raise ValueError(\"y has dtype string. If you wish to predict on \"\n \"string targets, use dtype object, and use -1\"\n \" as the label for unlabeled samples.\")\n\n has_label = y != -1\n\n if np.all(has_label):\n warnings.warn(\"y contains no unlabeled samples\", UserWarning)\n\n if self.criterion == 'k_best' and (self.k_best > X.shape[0] -\n np.sum(has_label)):\n warnings.warn(\"k_best is larger than the amount of unlabeled \"\n \"samples. All unlabeled samples will be labeled in \"\n \"the first iteration\", UserWarning)\n\n self.transduction_ = np.copy(y)\n self.labeled_iter_ = np.full_like(y, -1)\n self.labeled_iter_[has_label] = 0\n\n self.n_iter_ = 0\n\n while not np.all(has_label) and (self.max_iter is None or\n self.n_iter_ < self.max_iter):\n self.n_iter_ += 1\n self.base_estimator_.fit(\n X[safe_mask(X, has_label)],\n self.transduction_[has_label])\n\n # Validate the fitted estimator since `predict_proba` can be\n # delegated to an underlying \"final\" fitted estimator as\n # generally done in meta-estimator or pipeline.\n _validate_estimator(self.base_estimator_)\n\n # Predict on the unlabeled samples\n prob = self.base_estimator_.predict_proba(\n X[safe_mask(X, ~has_label)])\n pred = self.base_estimator_.classes_[np.argmax(prob, axis=1)]\n max_proba = np.max(prob, axis=1)\n\n # Select new labeled samples\n if self.criterion == 'threshold':\n selected = max_proba > self.threshold\n else:\n n_to_select = min(self.k_best, max_proba.shape[0])\n if n_to_select == max_proba.shape[0]:\n selected = np.ones_like(max_proba, dtype=bool)\n else:\n # NB these are indicies, not a mask\n selected = \\\n np.argpartition(-max_proba, n_to_select)[:n_to_select]\n\n # Map selected indices into original array\n selected_full = np.nonzero(~has_label)[0][selected]\n\n # Add newly labeled confident predictions to the dataset\n self.transduction_[selected_full] = pred[selected]\n has_label[selected_full] = True\n self.labeled_iter_[selected_full] = self.n_iter_\n\n if selected_full.shape[0] == 0:\n # no changed labels\n self.termination_condition_ = \"no_change\"\n break\n\n if self.verbose:\n print(f\"End of iteration {self.n_iter_},\"\n f\" added {selected_full.shape[0]} new labels.\")\n\n if self.n_iter_ == self.max_iter:\n self.termination_condition_ = \"max_iter\"\n if np.all(has_label):\n self.termination_condition_ = \"all_labeled\"\n\n self.base_estimator_.fit(\n X[safe_mask(X, has_label)],\n self.transduction_[has_label])\n self.classes_ = self.base_estimator_.classes_\n return self\n\n @if_delegate_has_method(delegate='base_estimator')\n def predict(self, X):\n \"\"\"Predict the classes of X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\n Returns\n -------\n y : ndarray of shape (n_samples,)\n Array with predicted labels.\n \"\"\"\n check_is_fitted(self)\n return self.base_estimator_.predict(X)\n\n def predict_proba(self, X):\n \"\"\"Predict probability for each possible outcome.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\n Returns\n -------\n y : ndarray of shape (n_samples, n_features)\n Array with prediction probabilities.\n \"\"\"\n check_is_fitted(self)\n return self.base_estimator_.predict_proba(X)\n\n @if_delegate_has_method(delegate='base_estimator')\n def decision_function(self, X):\n \"\"\"Calls decision function of the `base_estimator`.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\n Returns\n -------\n y : ndarray of shape (n_samples, n_features)\n Result of the decision function of the `base_estimator`.\n \"\"\"\n check_is_fitted(self)\n return self.base_estimator_.decision_function(X)\n\n @if_delegate_has_method(delegate='base_estimator')\n def predict_log_proba(self, X):\n \"\"\"Predict log probability for each possible outcome.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\n Returns\n -------\n y : ndarray of shape (n_samples, n_features)\n Array with log prediction probabilities.\n \"\"\"\n check_is_fitted(self)\n return self.base_estimator_.predict_log_proba(X)\n\n @if_delegate_has_method(delegate='base_estimator')\n def score(self, X, y):\n \"\"\"Calls score on the `base_estimator`.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\n y : array-like of shape (n_samples,)\n Array representing the labels.\n\n Returns\n -------\n score : float\n Result of calling score on the `base_estimator`.\n \"\"\"\n check_is_fitted(self)\n return self.base_estimator_.score(X, y)", + "instance_attributes": [ + { + "name": "threshold", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "criterion", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "k_best", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "labeled_iter_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "name": "n_iter_", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "termination_condition_", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM", + "name": "BaseLibSVM", + "qname": "sklearn.svm._base.BaseLibSVM", + "decorators": [], + "superclasses": ["BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.svm._base/BaseLibSVM/__init__", + "scikit-learn/sklearn.svm._base/BaseLibSVM/_more_tags", + "scikit-learn/sklearn.svm._base/BaseLibSVM/_pairwise@getter", + "scikit-learn/sklearn.svm._base/BaseLibSVM/fit", + "scikit-learn/sklearn.svm._base/BaseLibSVM/_validate_targets", + "scikit-learn/sklearn.svm._base/BaseLibSVM/_warn_from_fit_status", + "scikit-learn/sklearn.svm._base/BaseLibSVM/_dense_fit", + "scikit-learn/sklearn.svm._base/BaseLibSVM/_sparse_fit", + "scikit-learn/sklearn.svm._base/BaseLibSVM/predict", + "scikit-learn/sklearn.svm._base/BaseLibSVM/_dense_predict", + "scikit-learn/sklearn.svm._base/BaseLibSVM/_sparse_predict", + "scikit-learn/sklearn.svm._base/BaseLibSVM/_compute_kernel", + "scikit-learn/sklearn.svm._base/BaseLibSVM/_decision_function", + "scikit-learn/sklearn.svm._base/BaseLibSVM/_dense_decision_function", + "scikit-learn/sklearn.svm._base/BaseLibSVM/_sparse_decision_function", + "scikit-learn/sklearn.svm._base/BaseLibSVM/_validate_for_predict", + "scikit-learn/sklearn.svm._base/BaseLibSVM/coef_@getter", + "scikit-learn/sklearn.svm._base/BaseLibSVM/_get_coef", + "scikit-learn/sklearn.svm._base/BaseLibSVM/n_support_@getter" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for estimators that use libsvm as backing library.\n\nThis implements support vector machine classification and regression.\n\nParameter documentation is in the derived `SVC` class.", + "docstring": "Base class for estimators that use libsvm as backing library.\n\nThis implements support vector machine classification and regression.\n\nParameter documentation is in the derived `SVC` class.", + "code": "class BaseLibSVM(BaseEstimator, metaclass=ABCMeta):\n \"\"\"Base class for estimators that use libsvm as backing library.\n\n This implements support vector machine classification and regression.\n\n Parameter documentation is in the derived `SVC` class.\n \"\"\"\n\n # The order of these must match the integer values in LibSVM.\n # XXX These are actually the same in the dense case. Need to factor\n # this out.\n _sparse_kernels = [\"linear\", \"poly\", \"rbf\", \"sigmoid\", \"precomputed\"]\n\n @abstractmethod\n def __init__(self, kernel, degree, gamma, coef0,\n tol, C, nu, epsilon, shrinking, probability, cache_size,\n class_weight, verbose, max_iter, random_state):\n\n if self._impl not in LIBSVM_IMPL:\n raise ValueError(\"impl should be one of %s, %s was given\" % (\n LIBSVM_IMPL, self._impl))\n\n if gamma == 0:\n msg = (\"The gamma value of 0.0 is invalid. Use 'auto' to set\"\n \" gamma to a value of 1 / n_features.\")\n raise ValueError(msg)\n\n self.kernel = kernel\n self.degree = degree\n self.gamma = gamma\n self.coef0 = coef0\n self.tol = tol\n self.C = C\n self.nu = nu\n self.epsilon = epsilon\n self.shrinking = shrinking\n self.probability = probability\n self.cache_size = cache_size\n self.class_weight = class_weight\n self.verbose = verbose\n self.max_iter = max_iter\n self.random_state = random_state\n\n def _more_tags(self):\n # Used by cross_val_score.\n return {'pairwise': self.kernel == 'precomputed'}\n\n # TODO: Remove in 1.1\n # mypy error: Decorated property not supported\n @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n # Used by cross_val_score.\n return self.kernel == \"precomputed\"\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the SVM model according to the given training data.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) \\\n or (n_samples, n_samples)\n Training vectors, where n_samples is the number of samples\n and n_features is the number of features.\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples, n_samples).\n\n y : array-like of shape (n_samples,)\n Target values (class labels in classification, real numbers in\n regression).\n\n sample_weight : array-like of shape (n_samples,), default=None\n Per-sample weights. Rescale C per sample. Higher weights\n force the classifier to put more emphasis on these points.\n\n Returns\n -------\n self : object\n\n Notes\n -----\n If X and y are not C-ordered and contiguous arrays of np.float64 and\n X is not a scipy.sparse.csr_matrix, X and/or y may be copied.\n\n If X is a dense array, then the other methods will not support sparse\n matrices as input.\n \"\"\"\n\n rnd = check_random_state(self.random_state)\n\n sparse = sp.isspmatrix(X)\n if sparse and self.kernel == \"precomputed\":\n raise TypeError(\"Sparse precomputed kernels are not supported.\")\n self._sparse = sparse and not callable(self.kernel)\n\n if hasattr(self, 'decision_function_shape'):\n if self.decision_function_shape not in ('ovr', 'ovo'):\n raise ValueError(\n f\"decision_function_shape must be either 'ovr' or 'ovo', \"\n f\"got {self.decision_function_shape}.\"\n )\n\n if callable(self.kernel):\n check_consistent_length(X, y)\n else:\n X, y = self._validate_data(X, y, dtype=np.float64,\n order='C', accept_sparse='csr',\n accept_large_sparse=False)\n\n y = self._validate_targets(y)\n\n sample_weight = np.asarray([]\n if sample_weight is None\n else sample_weight, dtype=np.float64)\n solver_type = LIBSVM_IMPL.index(self._impl)\n\n # input validation\n n_samples = _num_samples(X)\n if solver_type != 2 and n_samples != y.shape[0]:\n raise ValueError(\"X and y have incompatible shapes.\\n\" +\n \"X has %s samples, but y has %s.\" %\n (n_samples, y.shape[0]))\n\n if self.kernel == \"precomputed\" and n_samples != X.shape[1]:\n raise ValueError(\"Precomputed matrix must be a square matrix.\"\n \" Input is a {}x{} matrix.\"\n .format(X.shape[0], X.shape[1]))\n\n if sample_weight.shape[0] > 0 and sample_weight.shape[0] != n_samples:\n raise ValueError(\"sample_weight and X have incompatible shapes: \"\n \"%r vs %r\\n\"\n \"Note: Sparse matrices cannot be indexed w/\"\n \"boolean masks (use `indices=True` in CV).\"\n % (sample_weight.shape, X.shape))\n\n kernel = 'precomputed' if callable(self.kernel) else self.kernel\n\n if kernel == 'precomputed':\n # unused but needs to be a float for cython code that ignores\n # it anyway\n self._gamma = 0.\n elif isinstance(self.gamma, str):\n if self.gamma == 'scale':\n # var = E[X^2] - E[X]^2 if sparse\n X_var = ((X.multiply(X)).mean() - (X.mean()) ** 2\n if sparse else X.var())\n self._gamma = 1.0 / (X.shape[1] * X_var) if X_var != 0 else 1.0\n elif self.gamma == 'auto':\n self._gamma = 1.0 / X.shape[1]\n else:\n raise ValueError(\n \"When 'gamma' is a string, it should be either 'scale' or \"\n \"'auto'. Got '{}' instead.\".format(self.gamma)\n )\n else:\n self._gamma = self.gamma\n\n fit = self._sparse_fit if self._sparse else self._dense_fit\n if self.verbose:\n print('[LibSVM]', end='')\n\n seed = rnd.randint(np.iinfo('i').max)\n fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)\n # see comment on the other call to np.iinfo in this file\n\n self.shape_fit_ = X.shape if hasattr(X, \"shape\") else (n_samples, )\n\n # In binary case, we need to flip the sign of coef, intercept and\n # decision function. Use self._intercept_ and self._dual_coef_\n # internally.\n self._intercept_ = self.intercept_.copy()\n self._dual_coef_ = self.dual_coef_\n if self._impl in ['c_svc', 'nu_svc'] and len(self.classes_) == 2:\n self.intercept_ *= -1\n self.dual_coef_ = -self.dual_coef_\n\n return self\n\n def _validate_targets(self, y):\n \"\"\"Validation of y and class_weight.\n\n Default implementation for SVR and one-class; overridden in BaseSVC.\n \"\"\"\n # XXX this is ugly.\n # Regression models should not have a class_weight_ attribute.\n self.class_weight_ = np.empty(0)\n return column_or_1d(y, warn=True).astype(np.float64, copy=False)\n\n def _warn_from_fit_status(self):\n assert self.fit_status_ in (0, 1)\n if self.fit_status_ == 1:\n warnings.warn('Solver terminated early (max_iter=%i).'\n ' Consider pre-processing your data with'\n ' StandardScaler or MinMaxScaler.'\n % self.max_iter, ConvergenceWarning)\n\n def _dense_fit(self, X, y, sample_weight, solver_type, kernel,\n random_seed):\n if callable(self.kernel):\n # you must store a reference to X to compute the kernel in predict\n # TODO: add keyword copy to copy on demand\n self.__Xfit = X\n X = self._compute_kernel(X)\n\n if X.shape[0] != X.shape[1]:\n raise ValueError(\"X.shape[0] should be equal to X.shape[1]\")\n\n libsvm.set_verbosity_wrap(self.verbose)\n\n # we don't pass **self.get_params() to allow subclasses to\n # add other parameters to __init__\n self.support_, self.support_vectors_, self._n_support, \\\n self.dual_coef_, self.intercept_, self._probA, \\\n self._probB, self.fit_status_ = libsvm.fit(\n X, y,\n svm_type=solver_type, sample_weight=sample_weight,\n class_weight=self.class_weight_, kernel=kernel, C=self.C,\n nu=self.nu, probability=self.probability, degree=self.degree,\n shrinking=self.shrinking, tol=self.tol,\n cache_size=self.cache_size, coef0=self.coef0,\n gamma=self._gamma, epsilon=self.epsilon,\n max_iter=self.max_iter, random_seed=random_seed)\n\n self._warn_from_fit_status()\n\n def _sparse_fit(self, X, y, sample_weight, solver_type, kernel,\n random_seed):\n X.data = np.asarray(X.data, dtype=np.float64, order='C')\n X.sort_indices()\n\n kernel_type = self._sparse_kernels.index(kernel)\n\n libsvm_sparse.set_verbosity_wrap(self.verbose)\n\n self.support_, self.support_vectors_, dual_coef_data, \\\n self.intercept_, self._n_support, \\\n self._probA, self._probB, self.fit_status_ = \\\n libsvm_sparse.libsvm_sparse_train(\n X.shape[1], X.data, X.indices, X.indptr, y, solver_type,\n kernel_type, self.degree, self._gamma, self.coef0, self.tol,\n self.C, self.class_weight_,\n sample_weight, self.nu, self.cache_size, self.epsilon,\n int(self.shrinking), int(self.probability), self.max_iter,\n random_seed)\n\n self._warn_from_fit_status()\n\n if hasattr(self, \"classes_\"):\n n_class = len(self.classes_) - 1\n else: # regression\n n_class = 1\n n_SV = self.support_vectors_.shape[0]\n\n dual_coef_indices = np.tile(np.arange(n_SV), n_class)\n if not n_SV:\n self.dual_coef_ = sp.csr_matrix([])\n else:\n dual_coef_indptr = np.arange(0, dual_coef_indices.size + 1,\n dual_coef_indices.size / n_class)\n self.dual_coef_ = sp.csr_matrix(\n (dual_coef_data, dual_coef_indices, dual_coef_indptr),\n (n_class, n_SV))\n\n def predict(self, X):\n \"\"\"Perform regression on samples in X.\n\n For an one-class model, +1 (inlier) or -1 (outlier) is returned.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\n Returns\n -------\n y_pred : ndarray of shape (n_samples,)\n \"\"\"\n X = self._validate_for_predict(X)\n predict = self._sparse_predict if self._sparse else self._dense_predict\n return predict(X)\n\n def _dense_predict(self, X):\n X = self._compute_kernel(X)\n if X.ndim == 1:\n X = check_array(X, order='C', accept_large_sparse=False)\n\n kernel = self.kernel\n if callable(self.kernel):\n kernel = 'precomputed'\n if X.shape[1] != self.shape_fit_[0]:\n raise ValueError(\"X.shape[1] = %d should be equal to %d, \"\n \"the number of samples at training time\" %\n (X.shape[1], self.shape_fit_[0]))\n\n svm_type = LIBSVM_IMPL.index(self._impl)\n\n return libsvm.predict(\n X, self.support_, self.support_vectors_, self._n_support,\n self._dual_coef_, self._intercept_,\n self._probA, self._probB, svm_type=svm_type, kernel=kernel,\n degree=self.degree, coef0=self.coef0, gamma=self._gamma,\n cache_size=self.cache_size)\n\n def _sparse_predict(self, X):\n # Precondition: X is a csr_matrix of dtype np.float64.\n kernel = self.kernel\n if callable(kernel):\n kernel = 'precomputed'\n\n kernel_type = self._sparse_kernels.index(kernel)\n\n C = 0.0 # C is not useful here\n\n return libsvm_sparse.libsvm_sparse_predict(\n X.data, X.indices, X.indptr,\n self.support_vectors_.data,\n self.support_vectors_.indices,\n self.support_vectors_.indptr,\n self._dual_coef_.data, self._intercept_,\n LIBSVM_IMPL.index(self._impl), kernel_type,\n self.degree, self._gamma, self.coef0, self.tol,\n C, self.class_weight_,\n self.nu, self.epsilon, self.shrinking,\n self.probability, self._n_support,\n self._probA, self._probB)\n\n def _compute_kernel(self, X):\n \"\"\"Return the data transformed by a callable kernel\"\"\"\n if callable(self.kernel):\n # in the case of precomputed kernel given as a function, we\n # have to compute explicitly the kernel matrix\n kernel = self.kernel(X, self.__Xfit)\n if sp.issparse(kernel):\n kernel = kernel.toarray()\n X = np.asarray(kernel, dtype=np.float64, order='C')\n return X\n\n def _decision_function(self, X):\n \"\"\"Evaluates the decision function for the samples in X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n X : array-like of shape (n_samples, n_class * (n_class-1) / 2)\n Returns the decision function of the sample for each class\n in the model.\n \"\"\"\n # NOTE: _validate_for_predict contains check for is_fitted\n # hence must be placed before any other attributes are used.\n X = self._validate_for_predict(X)\n X = self._compute_kernel(X)\n\n if self._sparse:\n dec_func = self._sparse_decision_function(X)\n else:\n dec_func = self._dense_decision_function(X)\n\n # In binary case, we need to flip the sign of coef, intercept and\n # decision function.\n if self._impl in ['c_svc', 'nu_svc'] and len(self.classes_) == 2:\n return -dec_func.ravel()\n\n return dec_func\n\n def _dense_decision_function(self, X):\n X = check_array(X, dtype=np.float64, order=\"C\",\n accept_large_sparse=False)\n\n kernel = self.kernel\n if callable(kernel):\n kernel = 'precomputed'\n\n return libsvm.decision_function(\n X, self.support_, self.support_vectors_, self._n_support,\n self._dual_coef_, self._intercept_,\n self._probA, self._probB,\n svm_type=LIBSVM_IMPL.index(self._impl),\n kernel=kernel, degree=self.degree, cache_size=self.cache_size,\n coef0=self.coef0, gamma=self._gamma)\n\n def _sparse_decision_function(self, X):\n X.data = np.asarray(X.data, dtype=np.float64, order='C')\n\n kernel = self.kernel\n if hasattr(kernel, '__call__'):\n kernel = 'precomputed'\n\n kernel_type = self._sparse_kernels.index(kernel)\n\n return libsvm_sparse.libsvm_sparse_decision_function(\n X.data, X.indices, X.indptr,\n self.support_vectors_.data,\n self.support_vectors_.indices,\n self.support_vectors_.indptr,\n self._dual_coef_.data, self._intercept_,\n LIBSVM_IMPL.index(self._impl), kernel_type,\n self.degree, self._gamma, self.coef0, self.tol,\n self.C, self.class_weight_,\n self.nu, self.epsilon, self.shrinking,\n self.probability, self._n_support,\n self._probA, self._probB)\n\n def _validate_for_predict(self, X):\n check_is_fitted(self)\n\n if not callable(self.kernel):\n X = check_array(X, accept_sparse='csr', dtype=np.float64,\n order=\"C\", accept_large_sparse=False)\n\n if self._sparse and not sp.isspmatrix(X):\n X = sp.csr_matrix(X)\n if self._sparse:\n X.sort_indices()\n\n if sp.issparse(X) and not self._sparse and not callable(self.kernel):\n raise ValueError(\n \"cannot use sparse input in %r trained on dense data\"\n % type(self).__name__)\n\n if self.kernel == \"precomputed\":\n if X.shape[1] != self.shape_fit_[0]:\n raise ValueError(\"X.shape[1] = %d should be equal to %d, \"\n \"the number of samples at training time\" %\n (X.shape[1], self.shape_fit_[0]))\n elif not callable(self.kernel) and X.shape[1] != self.shape_fit_[1]:\n raise ValueError(\"X.shape[1] = %d should be equal to %d, \"\n \"the number of features at training time\" %\n (X.shape[1], self.shape_fit_[1]))\n return X\n\n @property\n def coef_(self):\n if self.kernel != 'linear':\n raise AttributeError('coef_ is only available when using a '\n 'linear kernel')\n\n coef = self._get_coef()\n\n # coef_ being a read-only property, it's better to mark the value as\n # immutable to avoid hiding potential bugs for the unsuspecting user.\n if sp.issparse(coef):\n # sparse matrix do not have global flags\n coef.data.flags.writeable = False\n else:\n # regular dense array\n coef.flags.writeable = False\n return coef\n\n def _get_coef(self):\n return safe_sparse_dot(self._dual_coef_, self.support_vectors_)\n\n @property\n def n_support_(self):\n try:\n check_is_fitted(self)\n except NotFittedError:\n raise AttributeError\n\n svm_type = LIBSVM_IMPL.index(self._impl)\n if svm_type in (0, 1):\n return self._n_support\n else:\n # SVR and OneClass\n # _n_support has size 2, we make it size 1\n return np.array([self._n_support[0]])", + "instance_attributes": [ + { + "name": "_gamma", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "class_weight_", + "types": { + "kind": "NamedType", + "name": "ndarray" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC", + "name": "BaseSVC", + "qname": "sklearn.svm._base.BaseSVC", + "decorators": [], + "superclasses": ["ClassifierMixin", "BaseLibSVM"], + "methods": [ + "scikit-learn/sklearn.svm._base/BaseSVC/__init__", + "scikit-learn/sklearn.svm._base/BaseSVC/_validate_targets", + "scikit-learn/sklearn.svm._base/BaseSVC/decision_function", + "scikit-learn/sklearn.svm._base/BaseSVC/predict", + "scikit-learn/sklearn.svm._base/BaseSVC/_check_proba", + "scikit-learn/sklearn.svm._base/BaseSVC/predict_proba@getter", + "scikit-learn/sklearn.svm._base/BaseSVC/_predict_proba", + "scikit-learn/sklearn.svm._base/BaseSVC/predict_log_proba@getter", + "scikit-learn/sklearn.svm._base/BaseSVC/_predict_log_proba", + "scikit-learn/sklearn.svm._base/BaseSVC/_dense_predict_proba", + "scikit-learn/sklearn.svm._base/BaseSVC/_sparse_predict_proba", + "scikit-learn/sklearn.svm._base/BaseSVC/_get_coef", + "scikit-learn/sklearn.svm._base/BaseSVC/probA_@getter", + "scikit-learn/sklearn.svm._base/BaseSVC/probB_@getter" + ], + "is_public": false, + "reexported_by": [], + "description": "ABC for LibSVM-based classifiers.", + "docstring": "ABC for LibSVM-based classifiers.", + "code": "class BaseSVC(ClassifierMixin, BaseLibSVM, metaclass=ABCMeta):\n \"\"\"ABC for LibSVM-based classifiers.\"\"\"\n @abstractmethod\n def __init__(self, kernel, degree, gamma, coef0, tol, C, nu,\n shrinking, probability, cache_size, class_weight, verbose,\n max_iter, decision_function_shape, random_state,\n break_ties):\n self.decision_function_shape = decision_function_shape\n self.break_ties = break_ties\n super().__init__(\n kernel=kernel, degree=degree, gamma=gamma,\n coef0=coef0, tol=tol, C=C, nu=nu, epsilon=0., shrinking=shrinking,\n probability=probability, cache_size=cache_size,\n class_weight=class_weight, verbose=verbose, max_iter=max_iter,\n random_state=random_state)\n\n def _validate_targets(self, y):\n y_ = column_or_1d(y, warn=True)\n check_classification_targets(y)\n cls, y = np.unique(y_, return_inverse=True)\n self.class_weight_ = compute_class_weight(self.class_weight,\n classes=cls, y=y_)\n if len(cls) < 2:\n raise ValueError(\n \"The number of classes has to be greater than one; got %d\"\n \" class\" % len(cls))\n\n self.classes_ = cls\n\n return np.asarray(y, dtype=np.float64, order='C')\n\n def decision_function(self, X):\n \"\"\"Evaluates the decision function for the samples in X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n X : ndarray of shape (n_samples, n_classes * (n_classes-1) / 2)\n Returns the decision function of the sample for each class\n in the model.\n If decision_function_shape='ovr', the shape is (n_samples,\n n_classes).\n\n Notes\n -----\n If decision_function_shape='ovo', the function values are proportional\n to the distance of the samples X to the separating hyperplane. If the\n exact distances are required, divide the function values by the norm of\n the weight vector (``coef_``). See also `this question\n `_ for further details.\n If decision_function_shape='ovr', the decision function is a monotonic\n transformation of ovo decision function.\n \"\"\"\n dec = self._decision_function(X)\n if self.decision_function_shape == 'ovr' and len(self.classes_) > 2:\n return _ovr_decision_function(dec < 0, -dec, len(self.classes_))\n return dec\n\n def predict(self, X):\n \"\"\"Perform classification on samples in X.\n\n For an one-class model, +1 or -1 is returned.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n (n_samples_test, n_samples_train)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\n Returns\n -------\n y_pred : ndarray of shape (n_samples,)\n Class labels for samples in X.\n \"\"\"\n check_is_fitted(self)\n if self.break_ties and self.decision_function_shape == 'ovo':\n raise ValueError(\"break_ties must be False when \"\n \"decision_function_shape is 'ovo'\")\n\n if (self.break_ties\n and self.decision_function_shape == 'ovr'\n and len(self.classes_) > 2):\n y = np.argmax(self.decision_function(X), axis=1)\n else:\n y = super().predict(X)\n return self.classes_.take(np.asarray(y, dtype=np.intp))\n\n # Hacky way of getting predict_proba to raise an AttributeError when\n # probability=False using properties. Do not use this in new code; when\n # probabilities are not available depending on a setting, introduce two\n # estimators.\n def _check_proba(self):\n if not self.probability:\n raise AttributeError(\"predict_proba is not available when \"\n \" probability=False\")\n if self._impl not in ('c_svc', 'nu_svc'):\n raise AttributeError(\"predict_proba only implemented for SVC\"\n \" and NuSVC\")\n\n @property\n def predict_proba(self):\n \"\"\"Compute probabilities of possible outcomes for samples in X.\n\n The model need to have probability information computed at training\n time: fit with attribute `probability` set to True.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\n Returns\n -------\n T : ndarray of shape (n_samples, n_classes)\n Returns the probability of the sample for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`.\n\n Notes\n -----\n The probability model is created using cross validation, so\n the results can be slightly different than those obtained by\n predict. Also, it will produce meaningless results on very small\n datasets.\n \"\"\"\n self._check_proba()\n return self._predict_proba\n\n def _predict_proba(self, X):\n X = self._validate_for_predict(X)\n if self.probA_.size == 0 or self.probB_.size == 0:\n raise NotFittedError(\"predict_proba is not available when fitted \"\n \"with probability=False\")\n pred_proba = (self._sparse_predict_proba\n if self._sparse else self._dense_predict_proba)\n return pred_proba(X)\n\n @property\n def predict_log_proba(self):\n \"\"\"Compute log probabilities of possible outcomes for samples in X.\n\n The model need to have probability information computed at training\n time: fit with attribute `probability` set to True.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or \\\n (n_samples_test, n_samples_train)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\n Returns\n -------\n T : ndarray of shape (n_samples, n_classes)\n Returns the log-probabilities of the sample for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`.\n\n Notes\n -----\n The probability model is created using cross validation, so\n the results can be slightly different than those obtained by\n predict. Also, it will produce meaningless results on very small\n datasets.\n \"\"\"\n self._check_proba()\n return self._predict_log_proba\n\n def _predict_log_proba(self, X):\n return np.log(self.predict_proba(X))\n\n def _dense_predict_proba(self, X):\n X = self._compute_kernel(X)\n\n kernel = self.kernel\n if callable(kernel):\n kernel = 'precomputed'\n\n svm_type = LIBSVM_IMPL.index(self._impl)\n pprob = libsvm.predict_proba(\n X, self.support_, self.support_vectors_, self._n_support,\n self._dual_coef_, self._intercept_,\n self._probA, self._probB,\n svm_type=svm_type, kernel=kernel, degree=self.degree,\n cache_size=self.cache_size, coef0=self.coef0, gamma=self._gamma)\n\n return pprob\n\n def _sparse_predict_proba(self, X):\n X.data = np.asarray(X.data, dtype=np.float64, order='C')\n\n kernel = self.kernel\n if callable(kernel):\n kernel = 'precomputed'\n\n kernel_type = self._sparse_kernels.index(kernel)\n\n return libsvm_sparse.libsvm_sparse_predict_proba(\n X.data, X.indices, X.indptr,\n self.support_vectors_.data,\n self.support_vectors_.indices,\n self.support_vectors_.indptr,\n self._dual_coef_.data, self._intercept_,\n LIBSVM_IMPL.index(self._impl), kernel_type,\n self.degree, self._gamma, self.coef0, self.tol,\n self.C, self.class_weight_,\n self.nu, self.epsilon, self.shrinking,\n self.probability, self._n_support,\n self._probA, self._probB)\n\n def _get_coef(self):\n if self.dual_coef_.shape[0] == 1:\n # binary classifier\n coef = safe_sparse_dot(self.dual_coef_, self.support_vectors_)\n else:\n # 1vs1 classifier\n coef = _one_vs_one_coef(self.dual_coef_, self._n_support,\n self.support_vectors_)\n if sp.issparse(coef[0]):\n coef = sp.vstack(coef).tocsr()\n else:\n coef = np.vstack(coef)\n\n return coef\n\n @property\n def probA_(self):\n return self._probA\n\n @property\n def probB_(self):\n return self._probB", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVC", + "name": "LinearSVC", + "qname": "sklearn.svm._classes.LinearSVC", + "decorators": [], + "superclasses": ["LinearClassifierMixin", "SparseCoefMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.svm._classes/LinearSVC/__init__", + "scikit-learn/sklearn.svm._classes/LinearSVC/fit", + "scikit-learn/sklearn.svm._classes/LinearSVC/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Linear Support Vector Classification.\n\nSimilar to SVC with parameter kernel='linear', but implemented in terms of\nliblinear rather than libsvm, so it has more flexibility in the choice of\npenalties and loss functions and should scale better to large numbers of\nsamples.\n\nThis class supports both dense and sparse input and the multiclass support\nis handled according to a one-vs-the-rest scheme.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Linear Support Vector Classification.\n\nSimilar to SVC with parameter kernel='linear', but implemented in terms of\nliblinear rather than libsvm, so it has more flexibility in the choice of\npenalties and loss functions and should scale better to large numbers of\nsamples.\n\nThis class supports both dense and sparse input and the multiclass support\nis handled according to a one-vs-the-rest scheme.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\npenalty : {'l1', 'l2'}, default='l2'\n Specifies the norm used in the penalization. The 'l2'\n penalty is the standard used in SVC. The 'l1' leads to ``coef_``\n vectors that are sparse.\n\nloss : {'hinge', 'squared_hinge'}, default='squared_hinge'\n Specifies the loss function. 'hinge' is the standard SVM loss\n (used e.g. by the SVC class) while 'squared_hinge' is the\n square of the hinge loss. The combination of ``penalty='l1'``\n and ``loss='hinge'`` is not supported.\n\ndual : bool, default=True\n Select the algorithm to either solve the dual or primal\n optimization problem. Prefer dual=False when n_samples > n_features.\n\ntol : float, default=1e-4\n Tolerance for stopping criteria.\n\nC : float, default=1.0\n Regularization parameter. The strength of the regularization is\n inversely proportional to C. Must be strictly positive.\n\nmulti_class : {'ovr', 'crammer_singer'}, default='ovr'\n Determines the multi-class strategy if `y` contains more than\n two classes.\n ``\"ovr\"`` trains n_classes one-vs-rest classifiers, while\n ``\"crammer_singer\"`` optimizes a joint objective over all classes.\n While `crammer_singer` is interesting from a theoretical perspective\n as it is consistent, it is seldom used in practice as it rarely leads\n to better accuracy and is more expensive to compute.\n If ``\"crammer_singer\"`` is chosen, the options loss, penalty and dual\n will be ignored.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be already centered).\n\nintercept_scaling : float, default=1\n When self.fit_intercept is True, instance vector x becomes\n ``[x, self.intercept_scaling]``,\n i.e. a \"synthetic\" feature with constant value equals to\n intercept_scaling is appended to the instance vector.\n The intercept becomes intercept_scaling * synthetic feature weight\n Note! the synthetic feature weight is subject to l1/l2 regularization\n as all other features.\n To lessen the effect of regularization on synthetic feature weight\n (and therefore on the intercept) intercept_scaling has to be increased.\n\nclass_weight : dict or 'balanced', default=None\n Set the parameter C of class i to ``class_weight[i]*C`` for\n SVC. If not given, all classes are supposed to have\n weight one.\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``.\n\nverbose : int, default=0\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in liblinear that, if enabled, may not work\n properly in a multithreaded context.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generation for shuffling the data for\n the dual coordinate descent (if ``dual=True``). When ``dual=False`` the\n underlying implementation of :class:`LinearSVC` is not random and\n ``random_state`` has no effect on the results.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nmax_iter : int, default=1000\n The maximum number of iterations to be run.\n\nAttributes\n----------\ncoef_ : ndarray of shape (1, n_features) if n_classes == 2 else (n_classes, n_features)\n Weights assigned to the features (coefficients in the primal\n problem).\n\n ``coef_`` is a readonly property derived from ``raw_coef_`` that\n follows the internal memory layout of liblinear.\n\nintercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n Constants in decision function.\n\nclasses_ : ndarray of shape (n_classes,)\n The unique classes labels.\n\nn_iter_ : int\n Maximum number of iterations run across all classes.\n\nSee Also\n--------\nSVC : Implementation of Support Vector Machine classifier using libsvm:\n the kernel can be non-linear but its SMO algorithm does not\n scale to large number of samples as LinearSVC does.\n\n Furthermore SVC multi-class mode is implemented using one\n vs one scheme while LinearSVC uses one vs the rest. It is\n possible to implement one vs the rest with SVC by using the\n :class:`~sklearn.multiclass.OneVsRestClassifier` wrapper.\n\n Finally SVC can fit dense data without memory copy if the input\n is C-contiguous. Sparse data will still incur memory copy though.\n\nsklearn.linear_model.SGDClassifier : SGDClassifier can optimize the same\n cost function as LinearSVC\n by adjusting the penalty and loss parameters. In addition it requires\n less memory, allows incremental (online) learning, and implements\n various loss functions and regularization regimes.\n\nNotes\n-----\nThe underlying C implementation uses a random number generator to\nselect features when fitting the model. It is thus not uncommon\nto have slightly different results for the same input data. If\nthat happens, try with a smaller ``tol`` parameter.\n\nThe underlying implementation, liblinear, uses a sparse internal\nrepresentation for the data that will incur a memory copy.\n\nPredict output may not match that of standalone liblinear in certain\ncases. See :ref:`differences from liblinear `\nin the narrative documentation.\n\nReferences\n----------\n`LIBLINEAR: A Library for Large Linear Classification\n`__\n\nExamples\n--------\n>>> from sklearn.svm import LinearSVC\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_features=4, random_state=0)\n>>> clf = make_pipeline(StandardScaler(),\n... LinearSVC(random_state=0, tol=1e-5))\n>>> clf.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('linearsvc', LinearSVC(random_state=0, tol=1e-05))])\n\n>>> print(clf.named_steps['linearsvc'].coef_)\n[[0.141... 0.526... 0.679... 0.493...]]\n\n>>> print(clf.named_steps['linearsvc'].intercept_)\n[0.1693...]\n>>> print(clf.predict([[0, 0, 0, 0]]))\n[1]", + "code": "class LinearSVC(LinearClassifierMixin,\n SparseCoefMixin,\n BaseEstimator):\n \"\"\"Linear Support Vector Classification.\n\n Similar to SVC with parameter kernel='linear', but implemented in terms of\n liblinear rather than libsvm, so it has more flexibility in the choice of\n penalties and loss functions and should scale better to large numbers of\n samples.\n\n This class supports both dense and sparse input and the multiclass support\n is handled according to a one-vs-the-rest scheme.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n penalty : {'l1', 'l2'}, default='l2'\n Specifies the norm used in the penalization. The 'l2'\n penalty is the standard used in SVC. The 'l1' leads to ``coef_``\n vectors that are sparse.\n\n loss : {'hinge', 'squared_hinge'}, default='squared_hinge'\n Specifies the loss function. 'hinge' is the standard SVM loss\n (used e.g. by the SVC class) while 'squared_hinge' is the\n square of the hinge loss. The combination of ``penalty='l1'``\n and ``loss='hinge'`` is not supported.\n\n dual : bool, default=True\n Select the algorithm to either solve the dual or primal\n optimization problem. Prefer dual=False when n_samples > n_features.\n\n tol : float, default=1e-4\n Tolerance for stopping criteria.\n\n C : float, default=1.0\n Regularization parameter. The strength of the regularization is\n inversely proportional to C. Must be strictly positive.\n\n multi_class : {'ovr', 'crammer_singer'}, default='ovr'\n Determines the multi-class strategy if `y` contains more than\n two classes.\n ``\"ovr\"`` trains n_classes one-vs-rest classifiers, while\n ``\"crammer_singer\"`` optimizes a joint objective over all classes.\n While `crammer_singer` is interesting from a theoretical perspective\n as it is consistent, it is seldom used in practice as it rarely leads\n to better accuracy and is more expensive to compute.\n If ``\"crammer_singer\"`` is chosen, the options loss, penalty and dual\n will be ignored.\n\n fit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be already centered).\n\n intercept_scaling : float, default=1\n When self.fit_intercept is True, instance vector x becomes\n ``[x, self.intercept_scaling]``,\n i.e. a \"synthetic\" feature with constant value equals to\n intercept_scaling is appended to the instance vector.\n The intercept becomes intercept_scaling * synthetic feature weight\n Note! the synthetic feature weight is subject to l1/l2 regularization\n as all other features.\n To lessen the effect of regularization on synthetic feature weight\n (and therefore on the intercept) intercept_scaling has to be increased.\n\n class_weight : dict or 'balanced', default=None\n Set the parameter C of class i to ``class_weight[i]*C`` for\n SVC. If not given, all classes are supposed to have\n weight one.\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``.\n\n verbose : int, default=0\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in liblinear that, if enabled, may not work\n properly in a multithreaded context.\n\n random_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generation for shuffling the data for\n the dual coordinate descent (if ``dual=True``). When ``dual=False`` the\n underlying implementation of :class:`LinearSVC` is not random and\n ``random_state`` has no effect on the results.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n max_iter : int, default=1000\n The maximum number of iterations to be run.\n\n Attributes\n ----------\n coef_ : ndarray of shape (1, n_features) if n_classes == 2 \\\n else (n_classes, n_features)\n Weights assigned to the features (coefficients in the primal\n problem).\n\n ``coef_`` is a readonly property derived from ``raw_coef_`` that\n follows the internal memory layout of liblinear.\n\n intercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)\n Constants in decision function.\n\n classes_ : ndarray of shape (n_classes,)\n The unique classes labels.\n\n n_iter_ : int\n Maximum number of iterations run across all classes.\n\n See Also\n --------\n SVC : Implementation of Support Vector Machine classifier using libsvm:\n the kernel can be non-linear but its SMO algorithm does not\n scale to large number of samples as LinearSVC does.\n\n Furthermore SVC multi-class mode is implemented using one\n vs one scheme while LinearSVC uses one vs the rest. It is\n possible to implement one vs the rest with SVC by using the\n :class:`~sklearn.multiclass.OneVsRestClassifier` wrapper.\n\n Finally SVC can fit dense data without memory copy if the input\n is C-contiguous. Sparse data will still incur memory copy though.\n\n sklearn.linear_model.SGDClassifier : SGDClassifier can optimize the same\n cost function as LinearSVC\n by adjusting the penalty and loss parameters. In addition it requires\n less memory, allows incremental (online) learning, and implements\n various loss functions and regularization regimes.\n\n Notes\n -----\n The underlying C implementation uses a random number generator to\n select features when fitting the model. It is thus not uncommon\n to have slightly different results for the same input data. If\n that happens, try with a smaller ``tol`` parameter.\n\n The underlying implementation, liblinear, uses a sparse internal\n representation for the data that will incur a memory copy.\n\n Predict output may not match that of standalone liblinear in certain\n cases. See :ref:`differences from liblinear `\n in the narrative documentation.\n\n References\n ----------\n `LIBLINEAR: A Library for Large Linear Classification\n `__\n\n Examples\n --------\n >>> from sklearn.svm import LinearSVC\n >>> from sklearn.pipeline import make_pipeline\n >>> from sklearn.preprocessing import StandardScaler\n >>> from sklearn.datasets import make_classification\n >>> X, y = make_classification(n_features=4, random_state=0)\n >>> clf = make_pipeline(StandardScaler(),\n ... LinearSVC(random_state=0, tol=1e-5))\n >>> clf.fit(X, y)\n Pipeline(steps=[('standardscaler', StandardScaler()),\n ('linearsvc', LinearSVC(random_state=0, tol=1e-05))])\n\n >>> print(clf.named_steps['linearsvc'].coef_)\n [[0.141... 0.526... 0.679... 0.493...]]\n\n >>> print(clf.named_steps['linearsvc'].intercept_)\n [0.1693...]\n >>> print(clf.predict([[0, 0, 0, 0]]))\n [1]\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, penalty='l2', loss='squared_hinge', *, dual=True,\n tol=1e-4, C=1.0, multi_class='ovr', fit_intercept=True,\n intercept_scaling=1, class_weight=None, verbose=0,\n random_state=None, max_iter=1000):\n self.dual = dual\n self.tol = tol\n self.C = C\n self.multi_class = multi_class\n self.fit_intercept = fit_intercept\n self.intercept_scaling = intercept_scaling\n self.class_weight = class_weight\n self.verbose = verbose\n self.random_state = random_state\n self.max_iter = max_iter\n self.penalty = penalty\n self.loss = loss\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the model according to the given training data.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target vector relative to X.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Array of weights that are assigned to individual\n samples. If not provided,\n then each sample is given unit weight.\n\n .. versionadded:: 0.18\n\n Returns\n -------\n self : object\n An instance of the estimator.\n \"\"\"\n if self.C < 0:\n raise ValueError(\"Penalty term must be positive; got (C=%r)\"\n % self.C)\n\n X, y = self._validate_data(X, y, accept_sparse='csr',\n dtype=np.float64, order=\"C\",\n accept_large_sparse=False)\n check_classification_targets(y)\n self.classes_ = np.unique(y)\n\n self.coef_, self.intercept_, self.n_iter_ = _fit_liblinear(\n X, y, self.C, self.fit_intercept, self.intercept_scaling,\n self.class_weight, self.penalty, self.dual, self.verbose,\n self.max_iter, self.tol, self.random_state, self.multi_class,\n self.loss, sample_weight=sample_weight)\n\n if self.multi_class == \"crammer_singer\" and len(self.classes_) == 2:\n self.coef_ = (self.coef_[1] - self.coef_[0]).reshape(1, -1)\n if self.fit_intercept:\n intercept = self.intercept_[1] - self.intercept_[0]\n self.intercept_ = np.array([intercept])\n\n return self\n\n def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }", + "instance_attributes": [ + { + "name": "dual", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "C", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "multi_class", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "intercept_scaling", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "penalty", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "loss", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVR", + "name": "LinearSVR", + "qname": "sklearn.svm._classes.LinearSVR", + "decorators": [], + "superclasses": ["RegressorMixin", "LinearModel"], + "methods": [ + "scikit-learn/sklearn.svm._classes/LinearSVR/__init__", + "scikit-learn/sklearn.svm._classes/LinearSVR/fit", + "scikit-learn/sklearn.svm._classes/LinearSVR/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Linear Support Vector Regression.\n\nSimilar to SVR with parameter kernel='linear', but implemented in terms of\nliblinear rather than libsvm, so it has more flexibility in the choice of\npenalties and loss functions and should scale better to large numbers of\nsamples.\n\nThis class supports both dense and sparse input.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.16", + "docstring": "Linear Support Vector Regression.\n\nSimilar to SVR with parameter kernel='linear', but implemented in terms of\nliblinear rather than libsvm, so it has more flexibility in the choice of\npenalties and loss functions and should scale better to large numbers of\nsamples.\n\nThis class supports both dense and sparse input.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.16\n\nParameters\n----------\nepsilon : float, default=0.0\n Epsilon parameter in the epsilon-insensitive loss function. Note\n that the value of this parameter depends on the scale of the target\n variable y. If unsure, set ``epsilon=0``.\n\ntol : float, default=1e-4\n Tolerance for stopping criteria.\n\nC : float, default=1.0\n Regularization parameter. The strength of the regularization is\n inversely proportional to C. Must be strictly positive.\n\nloss : {'epsilon_insensitive', 'squared_epsilon_insensitive'}, default='epsilon_insensitive'\n Specifies the loss function. The epsilon-insensitive loss\n (standard SVR) is the L1 loss, while the squared epsilon-insensitive\n loss ('squared_epsilon_insensitive') is the L2 loss.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be already centered).\n\nintercept_scaling : float, default=1.\n When self.fit_intercept is True, instance vector x becomes\n [x, self.intercept_scaling],\n i.e. a \"synthetic\" feature with constant value equals to\n intercept_scaling is appended to the instance vector.\n The intercept becomes intercept_scaling * synthetic feature weight\n Note! the synthetic feature weight is subject to l1/l2 regularization\n as all other features.\n To lessen the effect of regularization on synthetic feature weight\n (and therefore on the intercept) intercept_scaling has to be increased.\n\ndual : bool, default=True\n Select the algorithm to either solve the dual or primal\n optimization problem. Prefer dual=False when n_samples > n_features.\n\nverbose : int, default=0\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in liblinear that, if enabled, may not work\n properly in a multithreaded context.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generation for shuffling the data.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nmax_iter : int, default=1000\n The maximum number of iterations to be run.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features) if n_classes == 2 else (n_classes, n_features)\n Weights assigned to the features (coefficients in the primal\n problem).\n\n `coef_` is a readonly property derived from `raw_coef_` that\n follows the internal memory layout of liblinear.\n\nintercept_ : ndarray of shape (1) if n_classes == 2 else (n_classes)\n Constants in decision function.\n\nn_iter_ : int\n Maximum number of iterations run across all classes.\n\nExamples\n--------\n>>> from sklearn.svm import LinearSVR\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(n_features=4, random_state=0)\n>>> regr = make_pipeline(StandardScaler(),\n... LinearSVR(random_state=0, tol=1e-5))\n>>> regr.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('linearsvr', LinearSVR(random_state=0, tol=1e-05))])\n\n>>> print(regr.named_steps['linearsvr'].coef_)\n[18.582... 27.023... 44.357... 64.522...]\n>>> print(regr.named_steps['linearsvr'].intercept_)\n[-4...]\n>>> print(regr.predict([[0, 0, 0, 0]]))\n[-2.384...]\n\n\nSee Also\n--------\nLinearSVC : Implementation of Support Vector Machine classifier using the\n same library as this class (liblinear).\n\nSVR : Implementation of Support Vector Machine regression using libsvm:\n the kernel can be non-linear but its SMO algorithm does not\n scale to large number of samples as LinearSVC does.\n\nsklearn.linear_model.SGDRegressor : SGDRegressor can optimize the same cost\n function as LinearSVR\n by adjusting the penalty and loss parameters. In addition it requires\n less memory, allows incremental (online) learning, and implements\n various loss functions and regularization regimes.", + "code": "class LinearSVR(RegressorMixin, LinearModel):\n \"\"\"Linear Support Vector Regression.\n\n Similar to SVR with parameter kernel='linear', but implemented in terms of\n liblinear rather than libsvm, so it has more flexibility in the choice of\n penalties and loss functions and should scale better to large numbers of\n samples.\n\n This class supports both dense and sparse input.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.16\n\n Parameters\n ----------\n epsilon : float, default=0.0\n Epsilon parameter in the epsilon-insensitive loss function. Note\n that the value of this parameter depends on the scale of the target\n variable y. If unsure, set ``epsilon=0``.\n\n tol : float, default=1e-4\n Tolerance for stopping criteria.\n\n C : float, default=1.0\n Regularization parameter. The strength of the regularization is\n inversely proportional to C. Must be strictly positive.\n\n loss : {'epsilon_insensitive', 'squared_epsilon_insensitive'}, \\\n default='epsilon_insensitive'\n Specifies the loss function. The epsilon-insensitive loss\n (standard SVR) is the L1 loss, while the squared epsilon-insensitive\n loss ('squared_epsilon_insensitive') is the L2 loss.\n\n fit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be already centered).\n\n intercept_scaling : float, default=1.\n When self.fit_intercept is True, instance vector x becomes\n [x, self.intercept_scaling],\n i.e. a \"synthetic\" feature with constant value equals to\n intercept_scaling is appended to the instance vector.\n The intercept becomes intercept_scaling * synthetic feature weight\n Note! the synthetic feature weight is subject to l1/l2 regularization\n as all other features.\n To lessen the effect of regularization on synthetic feature weight\n (and therefore on the intercept) intercept_scaling has to be increased.\n\n dual : bool, default=True\n Select the algorithm to either solve the dual or primal\n optimization problem. Prefer dual=False when n_samples > n_features.\n\n verbose : int, default=0\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in liblinear that, if enabled, may not work\n properly in a multithreaded context.\n\n random_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generation for shuffling the data.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n max_iter : int, default=1000\n The maximum number of iterations to be run.\n\n Attributes\n ----------\n coef_ : ndarray of shape (n_features) if n_classes == 2 \\\n else (n_classes, n_features)\n Weights assigned to the features (coefficients in the primal\n problem).\n\n `coef_` is a readonly property derived from `raw_coef_` that\n follows the internal memory layout of liblinear.\n\n intercept_ : ndarray of shape (1) if n_classes == 2 else (n_classes)\n Constants in decision function.\n\n n_iter_ : int\n Maximum number of iterations run across all classes.\n\n Examples\n --------\n >>> from sklearn.svm import LinearSVR\n >>> from sklearn.pipeline import make_pipeline\n >>> from sklearn.preprocessing import StandardScaler\n >>> from sklearn.datasets import make_regression\n >>> X, y = make_regression(n_features=4, random_state=0)\n >>> regr = make_pipeline(StandardScaler(),\n ... LinearSVR(random_state=0, tol=1e-5))\n >>> regr.fit(X, y)\n Pipeline(steps=[('standardscaler', StandardScaler()),\n ('linearsvr', LinearSVR(random_state=0, tol=1e-05))])\n\n >>> print(regr.named_steps['linearsvr'].coef_)\n [18.582... 27.023... 44.357... 64.522...]\n >>> print(regr.named_steps['linearsvr'].intercept_)\n [-4...]\n >>> print(regr.predict([[0, 0, 0, 0]]))\n [-2.384...]\n\n\n See Also\n --------\n LinearSVC : Implementation of Support Vector Machine classifier using the\n same library as this class (liblinear).\n\n SVR : Implementation of Support Vector Machine regression using libsvm:\n the kernel can be non-linear but its SMO algorithm does not\n scale to large number of samples as LinearSVC does.\n\n sklearn.linear_model.SGDRegressor : SGDRegressor can optimize the same cost\n function as LinearSVR\n by adjusting the penalty and loss parameters. In addition it requires\n less memory, allows incremental (online) learning, and implements\n various loss functions and regularization regimes.\n \"\"\"\n\n @_deprecate_positional_args\n def __init__(self, *, epsilon=0.0, tol=1e-4, C=1.0,\n loss='epsilon_insensitive', fit_intercept=True,\n intercept_scaling=1., dual=True, verbose=0,\n random_state=None, max_iter=1000):\n self.tol = tol\n self.C = C\n self.epsilon = epsilon\n self.fit_intercept = fit_intercept\n self.intercept_scaling = intercept_scaling\n self.verbose = verbose\n self.random_state = random_state\n self.max_iter = max_iter\n self.dual = dual\n self.loss = loss\n\n def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the model according to the given training data.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target vector relative to X\n\n sample_weight : array-like of shape (n_samples,), default=None\n Array of weights that are assigned to individual\n samples. If not provided,\n then each sample is given unit weight.\n\n .. versionadded:: 0.18\n\n Returns\n -------\n self : object\n An instance of the estimator.\n \"\"\"\n if self.C < 0:\n raise ValueError(\"Penalty term must be positive; got (C=%r)\"\n % self.C)\n\n X, y = self._validate_data(X, y, accept_sparse='csr',\n dtype=np.float64, order=\"C\",\n accept_large_sparse=False)\n penalty = 'l2' # SVR only accepts l2 penalty\n self.coef_, self.intercept_, self.n_iter_ = _fit_liblinear(\n X, y, self.C, self.fit_intercept, self.intercept_scaling,\n None, penalty, self.dual, self.verbose,\n self.max_iter, self.tol, self.random_state, loss=self.loss,\n epsilon=self.epsilon, sample_weight=sample_weight)\n self.coef_ = self.coef_.ravel()\n\n return self\n\n def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }", + "instance_attributes": [ + { + "name": "tol", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "C", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "epsilon", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "fit_intercept", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "intercept_scaling", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "verbose", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "max_iter", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "dual", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "loss", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVC", + "name": "NuSVC", + "qname": "sklearn.svm._classes.NuSVC", + "decorators": [], + "superclasses": ["BaseSVC"], + "methods": [ + "scikit-learn/sklearn.svm._classes/NuSVC/__init__", + "scikit-learn/sklearn.svm._classes/NuSVC/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Nu-Support Vector Classification.\n\nSimilar to SVC but uses a parameter to control the number of support\nvectors.\n\nThe implementation is based on libsvm.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Nu-Support Vector Classification.\n\nSimilar to SVC but uses a parameter to control the number of support\nvectors.\n\nThe implementation is based on libsvm.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nnu : float, default=0.5\n An upper bound on the fraction of margin errors (see :ref:`User Guide\n `) and a lower bound of the fraction of support vectors.\n Should be in the interval (0, 1].\n\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'\n Specifies the kernel type to be used in the algorithm.\n It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\n a callable.\n If none is given, 'rbf' will be used. If a callable is given it is\n used to precompute the kernel matrix.\n\ndegree : int, default=3\n Degree of the polynomial kernel function ('poly').\n Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n - if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n - if 'auto', uses 1 / n_features.\n\n .. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n Independent term in kernel function.\n It is only significant in 'poly' and 'sigmoid'.\n\nshrinking : bool, default=True\n Whether to use the shrinking heuristic.\n See the :ref:`User Guide `.\n\nprobability : bool, default=False\n Whether to enable probability estimates. This must be enabled prior\n to calling `fit`, will slow down that method as it internally uses\n 5-fold cross-validation, and `predict_proba` may be inconsistent with\n `predict`. Read more in the :ref:`User Guide `.\n\ntol : float, default=1e-3\n Tolerance for stopping criterion.\n\ncache_size : float, default=200\n Specify the size of the kernel cache (in MB).\n\nclass_weight : {dict, 'balanced'}, default=None\n Set the parameter C of class i to class_weight[i]*C for\n SVC. If not given, all classes are supposed to have\n weight one. The \"balanced\" mode uses the values of y to automatically\n adjust weights inversely proportional to class frequencies as\n ``n_samples / (n_classes * np.bincount(y))``\n\nverbose : bool, default=False\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in libsvm that, if enabled, may not work\n properly in a multithreaded context.\n\nmax_iter : int, default=-1\n Hard limit on iterations within solver, or -1 for no limit.\n\ndecision_function_shape : {'ovo', 'ovr'}, default='ovr'\n Whether to return a one-vs-rest ('ovr') decision function of shape\n (n_samples, n_classes) as all other classifiers, or the original\n one-vs-one ('ovo') decision function of libsvm which has shape\n (n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one\n ('ovo') is always used as multi-class strategy. The parameter is\n ignored for binary classification.\n\n .. versionchanged:: 0.19\n decision_function_shape is 'ovr' by default.\n\n .. versionadded:: 0.17\n *decision_function_shape='ovr'* is recommended.\n\n .. versionchanged:: 0.17\n Deprecated *decision_function_shape='ovo' and None*.\n\nbreak_ties : bool, default=False\n If true, ``decision_function_shape='ovr'``, and number of classes > 2,\n :term:`predict` will break ties according to the confidence values of\n :term:`decision_function`; otherwise the first class among the tied\n classes is returned. Please note that breaking ties comes at a\n relatively high computational cost compared to a simple predict.\n\n .. versionadded:: 0.22\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generation for shuffling the data for\n probability estimates. Ignored when `probability` is False.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n Multipliers of parameter C of each class.\n Computed based on the ``class_weight`` parameter.\n\nclasses_ : ndarray of shape (n_classes,)\n The unique classes labels.\n\ncoef_ : ndarray of shape (n_classes * (n_classes -1) / 2, n_features)\n Weights assigned to the features (coefficients in the primal\n problem). This is only available in the case of a linear kernel.\n\n `coef_` is readonly property derived from `dual_coef_` and\n `support_vectors_`.\n\ndual_coef_ : ndarray of shape (n_classes - 1, n_SV)\n Dual coefficients of the support vector in the decision\n function (see :ref:`sgd_mathematical_formulation`), multiplied by\n their targets.\n For multiclass, coefficient for all 1-vs-1 classifiers.\n The layout of the coefficients in the multiclass case is somewhat\n non-trivial. See the :ref:`multi-class section of the User Guide\n ` for details.\n\nfit_status_ : int\n 0 if correctly fitted, 1 if the algorithm did not converge.\n\nintercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n Constants in decision function.\n\nsupport_ : ndarray of shape (n_SV,)\n Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n Support vectors.\n\nn_support_ : ndarray of shape (n_classes,), dtype=int32\n Number of support vectors for each class.\n\nfit_status_ : int\n 0 if correctly fitted, 1 if the algorithm did not converge.\n\nprobA_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\nprobB_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n If `probability=True`, it corresponds to the parameters learned in\n Platt scaling to produce probability estimates from decision values.\n If `probability=False`, it's an empty array. Platt scaling uses the\n logistic function\n ``1 / (1 + exp(decision_value * probA_ + probB_))``\n where ``probA_`` and ``probB_`` are learned from the dataset [2]_. For\n more information on the multiclass case and training procedure see\n section 8 of [1]_.\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n Array dimensions of training vector ``X``.\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n>>> y = np.array([1, 1, 2, 2])\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.svm import NuSVC\n>>> clf = make_pipeline(StandardScaler(), NuSVC())\n>>> clf.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()), ('nusvc', NuSVC())])\n>>> print(clf.predict([[-0.8, -1]]))\n[1]\n\nSee Also\n--------\nSVC : Support Vector Machine for classification using libsvm.\n\nLinearSVC : Scalable linear Support Vector Machine for classification using\n liblinear.\n\nReferences\n----------\n.. [1] `LIBSVM: A Library for Support Vector Machines\n `_\n\n.. [2] `Platt, John (1999). \"Probabilistic outputs for support vector\n machines and comparison to regularizedlikelihood methods.\"\n `_", + "code": "class NuSVC(BaseSVC):\n \"\"\"Nu-Support Vector Classification.\n\n Similar to SVC but uses a parameter to control the number of support\n vectors.\n\n The implementation is based on libsvm.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n nu : float, default=0.5\n An upper bound on the fraction of margin errors (see :ref:`User Guide\n `) and a lower bound of the fraction of support vectors.\n Should be in the interval (0, 1].\n\n kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'\n Specifies the kernel type to be used in the algorithm.\n It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\n a callable.\n If none is given, 'rbf' will be used. If a callable is given it is\n used to precompute the kernel matrix.\n\n degree : int, default=3\n Degree of the polynomial kernel function ('poly').\n Ignored by all other kernels.\n\n gamma : {'scale', 'auto'} or float, default='scale'\n Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n - if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n - if 'auto', uses 1 / n_features.\n\n .. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'.\n\n coef0 : float, default=0.0\n Independent term in kernel function.\n It is only significant in 'poly' and 'sigmoid'.\n\n shrinking : bool, default=True\n Whether to use the shrinking heuristic.\n See the :ref:`User Guide `.\n\n probability : bool, default=False\n Whether to enable probability estimates. This must be enabled prior\n to calling `fit`, will slow down that method as it internally uses\n 5-fold cross-validation, and `predict_proba` may be inconsistent with\n `predict`. Read more in the :ref:`User Guide `.\n\n tol : float, default=1e-3\n Tolerance for stopping criterion.\n\n cache_size : float, default=200\n Specify the size of the kernel cache (in MB).\n\n class_weight : {dict, 'balanced'}, default=None\n Set the parameter C of class i to class_weight[i]*C for\n SVC. If not given, all classes are supposed to have\n weight one. The \"balanced\" mode uses the values of y to automatically\n adjust weights inversely proportional to class frequencies as\n ``n_samples / (n_classes * np.bincount(y))``\n\n verbose : bool, default=False\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in libsvm that, if enabled, may not work\n properly in a multithreaded context.\n\n max_iter : int, default=-1\n Hard limit on iterations within solver, or -1 for no limit.\n\n decision_function_shape : {'ovo', 'ovr'}, default='ovr'\n Whether to return a one-vs-rest ('ovr') decision function of shape\n (n_samples, n_classes) as all other classifiers, or the original\n one-vs-one ('ovo') decision function of libsvm which has shape\n (n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one\n ('ovo') is always used as multi-class strategy. The parameter is\n ignored for binary classification.\n\n .. versionchanged:: 0.19\n decision_function_shape is 'ovr' by default.\n\n .. versionadded:: 0.17\n *decision_function_shape='ovr'* is recommended.\n\n .. versionchanged:: 0.17\n Deprecated *decision_function_shape='ovo' and None*.\n\n break_ties : bool, default=False\n If true, ``decision_function_shape='ovr'``, and number of classes > 2,\n :term:`predict` will break ties according to the confidence values of\n :term:`decision_function`; otherwise the first class among the tied\n classes is returned. Please note that breaking ties comes at a\n relatively high computational cost compared to a simple predict.\n\n .. versionadded:: 0.22\n\n random_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generation for shuffling the data for\n probability estimates. Ignored when `probability` is False.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Attributes\n ----------\n class_weight_ : ndarray of shape (n_classes,)\n Multipliers of parameter C of each class.\n Computed based on the ``class_weight`` parameter.\n\n classes_ : ndarray of shape (n_classes,)\n The unique classes labels.\n\n coef_ : ndarray of shape (n_classes * (n_classes -1) / 2, n_features)\n Weights assigned to the features (coefficients in the primal\n problem). This is only available in the case of a linear kernel.\n\n `coef_` is readonly property derived from `dual_coef_` and\n `support_vectors_`.\n\n dual_coef_ : ndarray of shape (n_classes - 1, n_SV)\n Dual coefficients of the support vector in the decision\n function (see :ref:`sgd_mathematical_formulation`), multiplied by\n their targets.\n For multiclass, coefficient for all 1-vs-1 classifiers.\n The layout of the coefficients in the multiclass case is somewhat\n non-trivial. See the :ref:`multi-class section of the User Guide\n ` for details.\n\n fit_status_ : int\n 0 if correctly fitted, 1 if the algorithm did not converge.\n\n intercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n Constants in decision function.\n\n support_ : ndarray of shape (n_SV,)\n Indices of support vectors.\n\n support_vectors_ : ndarray of shape (n_SV, n_features)\n Support vectors.\n\n n_support_ : ndarray of shape (n_classes,), dtype=int32\n Number of support vectors for each class.\n\n fit_status_ : int\n 0 if correctly fitted, 1 if the algorithm did not converge.\n\n probA_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n probB_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n If `probability=True`, it corresponds to the parameters learned in\n Platt scaling to produce probability estimates from decision values.\n If `probability=False`, it's an empty array. Platt scaling uses the\n logistic function\n ``1 / (1 + exp(decision_value * probA_ + probB_))``\n where ``probA_`` and ``probB_`` are learned from the dataset [2]_. For\n more information on the multiclass case and training procedure see\n section 8 of [1]_.\n\n shape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n Array dimensions of training vector ``X``.\n\n Examples\n --------\n >>> import numpy as np\n >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n >>> y = np.array([1, 1, 2, 2])\n >>> from sklearn.pipeline import make_pipeline\n >>> from sklearn.preprocessing import StandardScaler\n >>> from sklearn.svm import NuSVC\n >>> clf = make_pipeline(StandardScaler(), NuSVC())\n >>> clf.fit(X, y)\n Pipeline(steps=[('standardscaler', StandardScaler()), ('nusvc', NuSVC())])\n >>> print(clf.predict([[-0.8, -1]]))\n [1]\n\n See Also\n --------\n SVC : Support Vector Machine for classification using libsvm.\n\n LinearSVC : Scalable linear Support Vector Machine for classification using\n liblinear.\n\n References\n ----------\n .. [1] `LIBSVM: A Library for Support Vector Machines\n `_\n\n .. [2] `Platt, John (1999). \"Probabilistic outputs for support vector\n machines and comparison to regularizedlikelihood methods.\"\n `_\n \"\"\"\n\n _impl = 'nu_svc'\n\n @_deprecate_positional_args\n def __init__(self, *, nu=0.5, kernel='rbf', degree=3, gamma='scale',\n coef0=0.0, shrinking=True, probability=False, tol=1e-3,\n cache_size=200, class_weight=None, verbose=False, max_iter=-1,\n decision_function_shape='ovr', break_ties=False,\n random_state=None):\n\n super().__init__(\n kernel=kernel, degree=degree, gamma=gamma,\n coef0=coef0, tol=tol, C=0., nu=nu, shrinking=shrinking,\n probability=probability, cache_size=cache_size,\n class_weight=class_weight, verbose=verbose, max_iter=max_iter,\n decision_function_shape=decision_function_shape,\n break_ties=break_ties,\n random_state=random_state)\n\n def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_methods_subset_invariance':\n 'fails for the decision_function method',\n 'check_class_weight_classifiers': 'class_weight is ignored.',\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVR", + "name": "NuSVR", + "qname": "sklearn.svm._classes.NuSVR", + "decorators": [], + "superclasses": ["RegressorMixin", "BaseLibSVM"], + "methods": [ + "scikit-learn/sklearn.svm._classes/NuSVR/__init__", + "scikit-learn/sklearn.svm._classes/NuSVR/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Nu Support Vector Regression.\n\nSimilar to NuSVC, for regression, uses a parameter nu to control\nthe number of support vectors. However, unlike NuSVC, where nu\nreplaces C, here nu replaces the parameter epsilon of epsilon-SVR.\n\nThe implementation is based on libsvm.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Nu Support Vector Regression.\n\nSimilar to NuSVC, for regression, uses a parameter nu to control\nthe number of support vectors. However, unlike NuSVC, where nu\nreplaces C, here nu replaces the parameter epsilon of epsilon-SVR.\n\nThe implementation is based on libsvm.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nnu : float, default=0.5\n An upper bound on the fraction of training errors and a lower bound of\n the fraction of support vectors. Should be in the interval (0, 1]. By\n default 0.5 will be taken.\n\nC : float, default=1.0\n Penalty parameter C of the error term.\n\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'\n Specifies the kernel type to be used in the algorithm.\n It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\n a callable.\n If none is given, 'rbf' will be used. If a callable is given it is\n used to precompute the kernel matrix.\n\ndegree : int, default=3\n Degree of the polynomial kernel function ('poly').\n Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n - if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n - if 'auto', uses 1 / n_features.\n\n .. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n Independent term in kernel function.\n It is only significant in 'poly' and 'sigmoid'.\n\nshrinking : bool, default=True\n Whether to use the shrinking heuristic.\n See the :ref:`User Guide `.\n\ntol : float, default=1e-3\n Tolerance for stopping criterion.\n\ncache_size : float, default=200\n Specify the size of the kernel cache (in MB).\n\nverbose : bool, default=False\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in libsvm that, if enabled, may not work\n properly in a multithreaded context.\n\nmax_iter : int, default=-1\n Hard limit on iterations within solver, or -1 for no limit.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n Multipliers of parameter C for each class.\n Computed based on the ``class_weight`` parameter.\n\ncoef_ : ndarray of shape (1, n_features)\n Weights assigned to the features (coefficients in the primal\n problem). This is only available in the case of a linear kernel.\n\n `coef_` is readonly property derived from `dual_coef_` and\n `support_vectors_`.\n\ndual_coef_ : ndarray of shape (1, n_SV)\n Coefficients of the support vector in the decision function.\n\nfit_status_ : int\n 0 if correctly fitted, 1 otherwise (will raise warning)\n\nintercept_ : ndarray of shape (1,)\n Constants in decision function.\n\nn_support_ : ndarray of shape (n_classes,), dtype=int32\n Number of support vectors for each class.\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n Array dimensions of training vector ``X``.\n\nsupport_ : ndarray of shape (n_SV,)\n Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n Support vectors.\n\nExamples\n--------\n>>> from sklearn.svm import NuSVR\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> import numpy as np\n>>> n_samples, n_features = 10, 5\n>>> np.random.seed(0)\n>>> y = np.random.randn(n_samples)\n>>> X = np.random.randn(n_samples, n_features)\n>>> regr = make_pipeline(StandardScaler(), NuSVR(C=1.0, nu=0.1))\n>>> regr.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('nusvr', NuSVR(nu=0.1))])\n\nSee Also\n--------\nNuSVC : Support Vector Machine for classification implemented with libsvm\n with a parameter to control the number of support vectors.\n\nSVR : Epsilon Support Vector Machine for regression implemented with\n libsvm.\n\nReferences\n----------\n.. [1] `LIBSVM: A Library for Support Vector Machines\n `_\n\n.. [2] `Platt, John (1999). \"Probabilistic outputs for support vector\n machines and comparison to regularizedlikelihood methods.\"\n `_", + "code": "class NuSVR(RegressorMixin, BaseLibSVM):\n \"\"\"Nu Support Vector Regression.\n\n Similar to NuSVC, for regression, uses a parameter nu to control\n the number of support vectors. However, unlike NuSVC, where nu\n replaces C, here nu replaces the parameter epsilon of epsilon-SVR.\n\n The implementation is based on libsvm.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n nu : float, default=0.5\n An upper bound on the fraction of training errors and a lower bound of\n the fraction of support vectors. Should be in the interval (0, 1]. By\n default 0.5 will be taken.\n\n C : float, default=1.0\n Penalty parameter C of the error term.\n\n kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'\n Specifies the kernel type to be used in the algorithm.\n It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\n a callable.\n If none is given, 'rbf' will be used. If a callable is given it is\n used to precompute the kernel matrix.\n\n degree : int, default=3\n Degree of the polynomial kernel function ('poly').\n Ignored by all other kernels.\n\n gamma : {'scale', 'auto'} or float, default='scale'\n Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n - if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n - if 'auto', uses 1 / n_features.\n\n .. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'.\n\n coef0 : float, default=0.0\n Independent term in kernel function.\n It is only significant in 'poly' and 'sigmoid'.\n\n shrinking : bool, default=True\n Whether to use the shrinking heuristic.\n See the :ref:`User Guide `.\n\n tol : float, default=1e-3\n Tolerance for stopping criterion.\n\n cache_size : float, default=200\n Specify the size of the kernel cache (in MB).\n\n verbose : bool, default=False\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in libsvm that, if enabled, may not work\n properly in a multithreaded context.\n\n max_iter : int, default=-1\n Hard limit on iterations within solver, or -1 for no limit.\n\n Attributes\n ----------\n class_weight_ : ndarray of shape (n_classes,)\n Multipliers of parameter C for each class.\n Computed based on the ``class_weight`` parameter.\n\n coef_ : ndarray of shape (1, n_features)\n Weights assigned to the features (coefficients in the primal\n problem). This is only available in the case of a linear kernel.\n\n `coef_` is readonly property derived from `dual_coef_` and\n `support_vectors_`.\n\n dual_coef_ : ndarray of shape (1, n_SV)\n Coefficients of the support vector in the decision function.\n\n fit_status_ : int\n 0 if correctly fitted, 1 otherwise (will raise warning)\n\n intercept_ : ndarray of shape (1,)\n Constants in decision function.\n\n n_support_ : ndarray of shape (n_classes,), dtype=int32\n Number of support vectors for each class.\n\n shape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n Array dimensions of training vector ``X``.\n\n support_ : ndarray of shape (n_SV,)\n Indices of support vectors.\n\n support_vectors_ : ndarray of shape (n_SV, n_features)\n Support vectors.\n\n Examples\n --------\n >>> from sklearn.svm import NuSVR\n >>> from sklearn.pipeline import make_pipeline\n >>> from sklearn.preprocessing import StandardScaler\n >>> import numpy as np\n >>> n_samples, n_features = 10, 5\n >>> np.random.seed(0)\n >>> y = np.random.randn(n_samples)\n >>> X = np.random.randn(n_samples, n_features)\n >>> regr = make_pipeline(StandardScaler(), NuSVR(C=1.0, nu=0.1))\n >>> regr.fit(X, y)\n Pipeline(steps=[('standardscaler', StandardScaler()),\n ('nusvr', NuSVR(nu=0.1))])\n\n See Also\n --------\n NuSVC : Support Vector Machine for classification implemented with libsvm\n with a parameter to control the number of support vectors.\n\n SVR : Epsilon Support Vector Machine for regression implemented with\n libsvm.\n\n References\n ----------\n .. [1] `LIBSVM: A Library for Support Vector Machines\n `_\n\n .. [2] `Platt, John (1999). \"Probabilistic outputs for support vector\n machines and comparison to regularizedlikelihood methods.\"\n `_\n \"\"\"\n\n _impl = 'nu_svr'\n\n @_deprecate_positional_args\n def __init__(self, *, nu=0.5, C=1.0, kernel='rbf', degree=3,\n gamma='scale', coef0=0.0, shrinking=True,\n tol=1e-3, cache_size=200, verbose=False, max_iter=-1):\n\n super().__init__(\n kernel=kernel, degree=degree, gamma=gamma, coef0=coef0,\n tol=tol, C=C, nu=nu, epsilon=0., shrinking=shrinking,\n probability=False, cache_size=cache_size, class_weight=None,\n verbose=verbose, max_iter=max_iter, random_state=None)\n\n def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM", + "name": "OneClassSVM", + "qname": "sklearn.svm._classes.OneClassSVM", + "decorators": [], + "superclasses": ["OutlierMixin", "BaseLibSVM"], + "methods": [ + "scikit-learn/sklearn.svm._classes/OneClassSVM/__init__", + "scikit-learn/sklearn.svm._classes/OneClassSVM/fit", + "scikit-learn/sklearn.svm._classes/OneClassSVM/decision_function", + "scikit-learn/sklearn.svm._classes/OneClassSVM/score_samples", + "scikit-learn/sklearn.svm._classes/OneClassSVM/predict", + "scikit-learn/sklearn.svm._classes/OneClassSVM/probA_@getter", + "scikit-learn/sklearn.svm._classes/OneClassSVM/probB_@getter", + "scikit-learn/sklearn.svm._classes/OneClassSVM/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Unsupervised Outlier Detection.\n\nEstimate the support of a high-dimensional distribution.\n\nThe implementation is based on libsvm.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Unsupervised Outlier Detection.\n\nEstimate the support of a high-dimensional distribution.\n\nThe implementation is based on libsvm.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'\n Specifies the kernel type to be used in the algorithm.\n It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\n a callable.\n If none is given, 'rbf' will be used. If a callable is given it is\n used to precompute the kernel matrix.\n\ndegree : int, default=3\n Degree of the polynomial kernel function ('poly').\n Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n - if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n - if 'auto', uses 1 / n_features.\n\n .. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n Independent term in kernel function.\n It is only significant in 'poly' and 'sigmoid'.\n\ntol : float, default=1e-3\n Tolerance for stopping criterion.\n\nnu : float, default=0.5\n An upper bound on the fraction of training\n errors and a lower bound of the fraction of support\n vectors. Should be in the interval (0, 1]. By default 0.5\n will be taken.\n\nshrinking : bool, default=True\n Whether to use the shrinking heuristic.\n See the :ref:`User Guide `.\n\ncache_size : float, default=200\n Specify the size of the kernel cache (in MB).\n\nverbose : bool, default=False\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in libsvm that, if enabled, may not work\n properly in a multithreaded context.\n\nmax_iter : int, default=-1\n Hard limit on iterations within solver, or -1 for no limit.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n Multipliers of parameter C for each class.\n Computed based on the ``class_weight`` parameter.\n\ncoef_ : ndarray of shape (1, n_features)\n Weights assigned to the features (coefficients in the primal\n problem). This is only available in the case of a linear kernel.\n\n `coef_` is readonly property derived from `dual_coef_` and\n `support_vectors_`.\n\ndual_coef_ : ndarray of shape (1, n_SV)\n Coefficients of the support vectors in the decision function.\n\nfit_status_ : int\n 0 if correctly fitted, 1 otherwise (will raise warning)\n\nintercept_ : ndarray of shape (1,)\n Constant in the decision function.\n\nn_support_ : ndarray of shape (n_classes,), dtype=int32\n Number of support vectors for each class.\n\noffset_ : float\n Offset used to define the decision function from the raw scores.\n We have the relation: decision_function = score_samples - `offset_`.\n The offset is the opposite of `intercept_` and is provided for\n consistency with other outlier detection algorithms.\n\n .. versionadded:: 0.20\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n Array dimensions of training vector ``X``.\n\nsupport_ : ndarray of shape (n_SV,)\n Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n Support vectors.\n\nExamples\n--------\n>>> from sklearn.svm import OneClassSVM\n>>> X = [[0], [0.44], [0.45], [0.46], [1]]\n>>> clf = OneClassSVM(gamma='auto').fit(X)\n>>> clf.predict(X)\narray([-1, 1, 1, 1, -1])\n>>> clf.score_samples(X)\narray([1.7798..., 2.0547..., 2.0556..., 2.0561..., 1.7332...])", + "code": "class OneClassSVM(OutlierMixin, BaseLibSVM):\n \"\"\"Unsupervised Outlier Detection.\n\n Estimate the support of a high-dimensional distribution.\n\n The implementation is based on libsvm.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'\n Specifies the kernel type to be used in the algorithm.\n It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\n a callable.\n If none is given, 'rbf' will be used. If a callable is given it is\n used to precompute the kernel matrix.\n\n degree : int, default=3\n Degree of the polynomial kernel function ('poly').\n Ignored by all other kernels.\n\n gamma : {'scale', 'auto'} or float, default='scale'\n Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n - if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n - if 'auto', uses 1 / n_features.\n\n .. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'.\n\n coef0 : float, default=0.0\n Independent term in kernel function.\n It is only significant in 'poly' and 'sigmoid'.\n\n tol : float, default=1e-3\n Tolerance for stopping criterion.\n\n nu : float, default=0.5\n An upper bound on the fraction of training\n errors and a lower bound of the fraction of support\n vectors. Should be in the interval (0, 1]. By default 0.5\n will be taken.\n\n shrinking : bool, default=True\n Whether to use the shrinking heuristic.\n See the :ref:`User Guide `.\n\n cache_size : float, default=200\n Specify the size of the kernel cache (in MB).\n\n verbose : bool, default=False\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in libsvm that, if enabled, may not work\n properly in a multithreaded context.\n\n max_iter : int, default=-1\n Hard limit on iterations within solver, or -1 for no limit.\n\n Attributes\n ----------\n class_weight_ : ndarray of shape (n_classes,)\n Multipliers of parameter C for each class.\n Computed based on the ``class_weight`` parameter.\n\n coef_ : ndarray of shape (1, n_features)\n Weights assigned to the features (coefficients in the primal\n problem). This is only available in the case of a linear kernel.\n\n `coef_` is readonly property derived from `dual_coef_` and\n `support_vectors_`.\n\n dual_coef_ : ndarray of shape (1, n_SV)\n Coefficients of the support vectors in the decision function.\n\n fit_status_ : int\n 0 if correctly fitted, 1 otherwise (will raise warning)\n\n intercept_ : ndarray of shape (1,)\n Constant in the decision function.\n\n n_support_ : ndarray of shape (n_classes,), dtype=int32\n Number of support vectors for each class.\n\n offset_ : float\n Offset used to define the decision function from the raw scores.\n We have the relation: decision_function = score_samples - `offset_`.\n The offset is the opposite of `intercept_` and is provided for\n consistency with other outlier detection algorithms.\n\n .. versionadded:: 0.20\n\n shape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n Array dimensions of training vector ``X``.\n\n support_ : ndarray of shape (n_SV,)\n Indices of support vectors.\n\n support_vectors_ : ndarray of shape (n_SV, n_features)\n Support vectors.\n\n Examples\n --------\n >>> from sklearn.svm import OneClassSVM\n >>> X = [[0], [0.44], [0.45], [0.46], [1]]\n >>> clf = OneClassSVM(gamma='auto').fit(X)\n >>> clf.predict(X)\n array([-1, 1, 1, 1, -1])\n >>> clf.score_samples(X)\n array([1.7798..., 2.0547..., 2.0556..., 2.0561..., 1.7332...])\n \"\"\"\n\n _impl = 'one_class'\n\n @_deprecate_positional_args\n def __init__(self, *, kernel='rbf', degree=3, gamma='scale',\n coef0=0.0, tol=1e-3, nu=0.5, shrinking=True, cache_size=200,\n verbose=False, max_iter=-1):\n\n super().__init__(\n kernel, degree, gamma, coef0, tol, 0., nu, 0.,\n shrinking, False, cache_size, None, verbose, max_iter,\n random_state=None)\n\n def fit(self, X, y=None, sample_weight=None, **params):\n \"\"\"Detects the soft boundary of the set of samples X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Set of samples, where n_samples is the number of samples and\n n_features is the number of features.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Per-sample weights. Rescale C per sample. Higher weights\n force the classifier to put more emphasis on these points.\n\n y : Ignored\n not used, present for API consistency by convention.\n\n Returns\n -------\n self : object\n\n Notes\n -----\n If X is not a C-ordered contiguous array it is copied.\n\n \"\"\"\n super().fit(X, np.ones(_num_samples(X)),\n sample_weight=sample_weight, **params)\n self.offset_ = -self._intercept_\n return self\n\n def decision_function(self, X):\n \"\"\"Signed distance to the separating hyperplane.\n\n Signed distance is positive for an inlier and negative for an outlier.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data matrix.\n\n Returns\n -------\n dec : ndarray of shape (n_samples,)\n Returns the decision function of the samples.\n \"\"\"\n dec = self._decision_function(X).ravel()\n return dec\n\n def score_samples(self, X):\n \"\"\"Raw scoring function of the samples.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data matrix.\n\n Returns\n -------\n score_samples : ndarray of shape (n_samples,)\n Returns the (unshifted) scoring function of the samples.\n \"\"\"\n return self.decision_function(X) + self.offset_\n\n def predict(self, X):\n \"\"\"Perform classification on samples in X.\n\n For a one-class model, +1 or -1 is returned.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n (n_samples_test, n_samples_train)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\n Returns\n -------\n y_pred : ndarray of shape (n_samples,)\n Class labels for samples in X.\n \"\"\"\n y = super().predict(X)\n return np.asarray(y, dtype=np.intp)\n\n # mypy error: Decorated property not supported\n @deprecated( # type: ignore\n \"The probA_ attribute is deprecated in version 0.23 and will be \"\n \"removed in version 1.0.\")\n @property\n def probA_(self):\n return self._probA\n\n # mypy error: Decorated property not supported\n @deprecated( # type: ignore\n \"The probB_ attribute is deprecated in version 0.23 and will be \"\n \"removed in version 1.0.\")\n @property\n def probB_(self):\n return self._probB\n\n def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVC", + "name": "SVC", + "qname": "sklearn.svm._classes.SVC", + "decorators": [], + "superclasses": ["BaseSVC"], + "methods": [ + "scikit-learn/sklearn.svm._classes/SVC/__init__", + "scikit-learn/sklearn.svm._classes/SVC/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "C-Support Vector Classification.\n\nThe implementation is based on libsvm. The fit time scales at least\nquadratically with the number of samples and may be impractical\nbeyond tens of thousands of samples. For large datasets\nconsider using :class:`~sklearn.svm.LinearSVC` or\n:class:`~sklearn.linear_model.SGDClassifier` instead, possibly after a\n:class:`~sklearn.kernel_approximation.Nystroem` transformer.\n\nThe multiclass support is handled according to a one-vs-one scheme.\n\nFor details on the precise mathematical formulation of the provided\nkernel functions and how `gamma`, `coef0` and `degree` affect each\nother, see the corresponding section in the narrative documentation:\n:ref:`svm_kernels`.\n\nRead more in the :ref:`User Guide `.", + "docstring": "C-Support Vector Classification.\n\nThe implementation is based on libsvm. The fit time scales at least\nquadratically with the number of samples and may be impractical\nbeyond tens of thousands of samples. For large datasets\nconsider using :class:`~sklearn.svm.LinearSVC` or\n:class:`~sklearn.linear_model.SGDClassifier` instead, possibly after a\n:class:`~sklearn.kernel_approximation.Nystroem` transformer.\n\nThe multiclass support is handled according to a one-vs-one scheme.\n\nFor details on the precise mathematical formulation of the provided\nkernel functions and how `gamma`, `coef0` and `degree` affect each\nother, see the corresponding section in the narrative documentation:\n:ref:`svm_kernels`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nC : float, default=1.0\n Regularization parameter. The strength of the regularization is\n inversely proportional to C. Must be strictly positive. The penalty\n is a squared l2 penalty.\n\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'\n Specifies the kernel type to be used in the algorithm.\n It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\n a callable.\n If none is given, 'rbf' will be used. If a callable is given it is\n used to pre-compute the kernel matrix from data matrices; that matrix\n should be an array of shape ``(n_samples, n_samples)``.\n\ndegree : int, default=3\n Degree of the polynomial kernel function ('poly').\n Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n - if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n - if 'auto', uses 1 / n_features.\n\n .. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n Independent term in kernel function.\n It is only significant in 'poly' and 'sigmoid'.\n\nshrinking : bool, default=True\n Whether to use the shrinking heuristic.\n See the :ref:`User Guide `.\n\nprobability : bool, default=False\n Whether to enable probability estimates. This must be enabled prior\n to calling `fit`, will slow down that method as it internally uses\n 5-fold cross-validation, and `predict_proba` may be inconsistent with\n `predict`. Read more in the :ref:`User Guide `.\n\ntol : float, default=1e-3\n Tolerance for stopping criterion.\n\ncache_size : float, default=200\n Specify the size of the kernel cache (in MB).\n\nclass_weight : dict or 'balanced', default=None\n Set the parameter C of class i to class_weight[i]*C for\n SVC. If not given, all classes are supposed to have\n weight one.\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\nverbose : bool, default=False\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in libsvm that, if enabled, may not work\n properly in a multithreaded context.\n\nmax_iter : int, default=-1\n Hard limit on iterations within solver, or -1 for no limit.\n\ndecision_function_shape : {'ovo', 'ovr'}, default='ovr'\n Whether to return a one-vs-rest ('ovr') decision function of shape\n (n_samples, n_classes) as all other classifiers, or the original\n one-vs-one ('ovo') decision function of libsvm which has shape\n (n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one\n ('ovo') is always used as multi-class strategy. The parameter is\n ignored for binary classification.\n\n .. versionchanged:: 0.19\n decision_function_shape is 'ovr' by default.\n\n .. versionadded:: 0.17\n *decision_function_shape='ovr'* is recommended.\n\n .. versionchanged:: 0.17\n Deprecated *decision_function_shape='ovo' and None*.\n\nbreak_ties : bool, default=False\n If true, ``decision_function_shape='ovr'``, and number of classes > 2,\n :term:`predict` will break ties according to the confidence values of\n :term:`decision_function`; otherwise the first class among the tied\n classes is returned. Please note that breaking ties comes at a\n relatively high computational cost compared to a simple predict.\n\n .. versionadded:: 0.22\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generation for shuffling the data for\n probability estimates. Ignored when `probability` is False.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n Multipliers of parameter C for each class.\n Computed based on the ``class_weight`` parameter.\n\nclasses_ : ndarray of shape (n_classes,)\n The classes labels.\n\ncoef_ : ndarray of shape (n_classes * (n_classes - 1) / 2, n_features)\n Weights assigned to the features (coefficients in the primal\n problem). This is only available in the case of a linear kernel.\n\n `coef_` is a readonly property derived from `dual_coef_` and\n `support_vectors_`.\n\ndual_coef_ : ndarray of shape (n_classes -1, n_SV)\n Dual coefficients of the support vector in the decision\n function (see :ref:`sgd_mathematical_formulation`), multiplied by\n their targets.\n For multiclass, coefficient for all 1-vs-1 classifiers.\n The layout of the coefficients in the multiclass case is somewhat\n non-trivial. See the :ref:`multi-class section of the User Guide\n ` for details.\n\nfit_status_ : int\n 0 if correctly fitted, 1 otherwise (will raise warning)\n\nintercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n Constants in decision function.\n\nsupport_ : ndarray of shape (n_SV)\n Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n Support vectors.\n\nn_support_ : ndarray of shape (n_classes,), dtype=int32\n Number of support vectors for each class.\n\nprobA_ : ndarray of shape (n_classes * (n_classes - 1) / 2)\nprobB_ : ndarray of shape (n_classes * (n_classes - 1) / 2)\n If `probability=True`, it corresponds to the parameters learned in\n Platt scaling to produce probability estimates from decision values.\n If `probability=False`, it's an empty array. Platt scaling uses the\n logistic function\n ``1 / (1 + exp(decision_value * probA_ + probB_))``\n where ``probA_`` and ``probB_`` are learned from the dataset [2]_. For\n more information on the multiclass case and training procedure see\n section 8 of [1]_.\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n Array dimensions of training vector ``X``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n>>> y = np.array([1, 1, 2, 2])\n>>> from sklearn.svm import SVC\n>>> clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))\n>>> clf.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('svc', SVC(gamma='auto'))])\n\n>>> print(clf.predict([[-0.8, -1]]))\n[1]\n\nSee Also\n--------\nSVR : Support Vector Machine for Regression implemented using libsvm.\n\nLinearSVC : Scalable Linear Support Vector Machine for classification\n implemented using liblinear. Check the See Also section of\n LinearSVC for more comparison element.\n\nReferences\n----------\n.. [1] `LIBSVM: A Library for Support Vector Machines\n `_\n\n.. [2] `Platt, John (1999). \"Probabilistic outputs for support vector\n machines and comparison to regularizedlikelihood methods.\"\n `_", + "code": "class SVC(BaseSVC):\n \"\"\"C-Support Vector Classification.\n\n The implementation is based on libsvm. The fit time scales at least\n quadratically with the number of samples and may be impractical\n beyond tens of thousands of samples. For large datasets\n consider using :class:`~sklearn.svm.LinearSVC` or\n :class:`~sklearn.linear_model.SGDClassifier` instead, possibly after a\n :class:`~sklearn.kernel_approximation.Nystroem` transformer.\n\n The multiclass support is handled according to a one-vs-one scheme.\n\n For details on the precise mathematical formulation of the provided\n kernel functions and how `gamma`, `coef0` and `degree` affect each\n other, see the corresponding section in the narrative documentation:\n :ref:`svm_kernels`.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n C : float, default=1.0\n Regularization parameter. The strength of the regularization is\n inversely proportional to C. Must be strictly positive. The penalty\n is a squared l2 penalty.\n\n kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'\n Specifies the kernel type to be used in the algorithm.\n It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\n a callable.\n If none is given, 'rbf' will be used. If a callable is given it is\n used to pre-compute the kernel matrix from data matrices; that matrix\n should be an array of shape ``(n_samples, n_samples)``.\n\n degree : int, default=3\n Degree of the polynomial kernel function ('poly').\n Ignored by all other kernels.\n\n gamma : {'scale', 'auto'} or float, default='scale'\n Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n - if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n - if 'auto', uses 1 / n_features.\n\n .. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'.\n\n coef0 : float, default=0.0\n Independent term in kernel function.\n It is only significant in 'poly' and 'sigmoid'.\n\n shrinking : bool, default=True\n Whether to use the shrinking heuristic.\n See the :ref:`User Guide `.\n\n probability : bool, default=False\n Whether to enable probability estimates. This must be enabled prior\n to calling `fit`, will slow down that method as it internally uses\n 5-fold cross-validation, and `predict_proba` may be inconsistent with\n `predict`. Read more in the :ref:`User Guide `.\n\n tol : float, default=1e-3\n Tolerance for stopping criterion.\n\n cache_size : float, default=200\n Specify the size of the kernel cache (in MB).\n\n class_weight : dict or 'balanced', default=None\n Set the parameter C of class i to class_weight[i]*C for\n SVC. If not given, all classes are supposed to have\n weight one.\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n verbose : bool, default=False\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in libsvm that, if enabled, may not work\n properly in a multithreaded context.\n\n max_iter : int, default=-1\n Hard limit on iterations within solver, or -1 for no limit.\n\n decision_function_shape : {'ovo', 'ovr'}, default='ovr'\n Whether to return a one-vs-rest ('ovr') decision function of shape\n (n_samples, n_classes) as all other classifiers, or the original\n one-vs-one ('ovo') decision function of libsvm which has shape\n (n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one\n ('ovo') is always used as multi-class strategy. The parameter is\n ignored for binary classification.\n\n .. versionchanged:: 0.19\n decision_function_shape is 'ovr' by default.\n\n .. versionadded:: 0.17\n *decision_function_shape='ovr'* is recommended.\n\n .. versionchanged:: 0.17\n Deprecated *decision_function_shape='ovo' and None*.\n\n break_ties : bool, default=False\n If true, ``decision_function_shape='ovr'``, and number of classes > 2,\n :term:`predict` will break ties according to the confidence values of\n :term:`decision_function`; otherwise the first class among the tied\n classes is returned. Please note that breaking ties comes at a\n relatively high computational cost compared to a simple predict.\n\n .. versionadded:: 0.22\n\n random_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generation for shuffling the data for\n probability estimates. Ignored when `probability` is False.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Attributes\n ----------\n class_weight_ : ndarray of shape (n_classes,)\n Multipliers of parameter C for each class.\n Computed based on the ``class_weight`` parameter.\n\n classes_ : ndarray of shape (n_classes,)\n The classes labels.\n\n coef_ : ndarray of shape (n_classes * (n_classes - 1) / 2, n_features)\n Weights assigned to the features (coefficients in the primal\n problem). This is only available in the case of a linear kernel.\n\n `coef_` is a readonly property derived from `dual_coef_` and\n `support_vectors_`.\n\n dual_coef_ : ndarray of shape (n_classes -1, n_SV)\n Dual coefficients of the support vector in the decision\n function (see :ref:`sgd_mathematical_formulation`), multiplied by\n their targets.\n For multiclass, coefficient for all 1-vs-1 classifiers.\n The layout of the coefficients in the multiclass case is somewhat\n non-trivial. See the :ref:`multi-class section of the User Guide\n ` for details.\n\n fit_status_ : int\n 0 if correctly fitted, 1 otherwise (will raise warning)\n\n intercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)\n Constants in decision function.\n\n support_ : ndarray of shape (n_SV)\n Indices of support vectors.\n\n support_vectors_ : ndarray of shape (n_SV, n_features)\n Support vectors.\n\n n_support_ : ndarray of shape (n_classes,), dtype=int32\n Number of support vectors for each class.\n\n probA_ : ndarray of shape (n_classes * (n_classes - 1) / 2)\n probB_ : ndarray of shape (n_classes * (n_classes - 1) / 2)\n If `probability=True`, it corresponds to the parameters learned in\n Platt scaling to produce probability estimates from decision values.\n If `probability=False`, it's an empty array. Platt scaling uses the\n logistic function\n ``1 / (1 + exp(decision_value * probA_ + probB_))``\n where ``probA_`` and ``probB_`` are learned from the dataset [2]_. For\n more information on the multiclass case and training procedure see\n section 8 of [1]_.\n\n shape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n Array dimensions of training vector ``X``.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.pipeline import make_pipeline\n >>> from sklearn.preprocessing import StandardScaler\n >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n >>> y = np.array([1, 1, 2, 2])\n >>> from sklearn.svm import SVC\n >>> clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))\n >>> clf.fit(X, y)\n Pipeline(steps=[('standardscaler', StandardScaler()),\n ('svc', SVC(gamma='auto'))])\n\n >>> print(clf.predict([[-0.8, -1]]))\n [1]\n\n See Also\n --------\n SVR : Support Vector Machine for Regression implemented using libsvm.\n\n LinearSVC : Scalable Linear Support Vector Machine for classification\n implemented using liblinear. Check the See Also section of\n LinearSVC for more comparison element.\n\n References\n ----------\n .. [1] `LIBSVM: A Library for Support Vector Machines\n `_\n\n .. [2] `Platt, John (1999). \"Probabilistic outputs for support vector\n machines and comparison to regularizedlikelihood methods.\"\n `_\n \"\"\"\n\n _impl = 'c_svc'\n\n @_deprecate_positional_args\n def __init__(self, *, C=1.0, kernel='rbf', degree=3, gamma='scale',\n coef0=0.0, shrinking=True, probability=False,\n tol=1e-3, cache_size=200, class_weight=None,\n verbose=False, max_iter=-1, decision_function_shape='ovr',\n break_ties=False,\n random_state=None):\n\n super().__init__(\n kernel=kernel, degree=degree, gamma=gamma,\n coef0=coef0, tol=tol, C=C, nu=0., shrinking=shrinking,\n probability=probability, cache_size=cache_size,\n class_weight=class_weight, verbose=verbose, max_iter=max_iter,\n decision_function_shape=decision_function_shape,\n break_ties=break_ties,\n random_state=random_state)\n\n def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVR", + "name": "SVR", + "qname": "sklearn.svm._classes.SVR", + "decorators": [], + "superclasses": ["RegressorMixin", "BaseLibSVM"], + "methods": [ + "scikit-learn/sklearn.svm._classes/SVR/__init__", + "scikit-learn/sklearn.svm._classes/SVR/probA_@getter", + "scikit-learn/sklearn.svm._classes/SVR/probB_@getter", + "scikit-learn/sklearn.svm._classes/SVR/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Epsilon-Support Vector Regression.\n\nThe free parameters in the model are C and epsilon.\n\nThe implementation is based on libsvm. The fit time complexity\nis more than quadratic with the number of samples which makes it hard\nto scale to datasets with more than a couple of 10000 samples. For large\ndatasets consider using :class:`~sklearn.svm.LinearSVR` or\n:class:`~sklearn.linear_model.SGDRegressor` instead, possibly after a\n:class:`~sklearn.kernel_approximation.Nystroem` transformer.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Epsilon-Support Vector Regression.\n\nThe free parameters in the model are C and epsilon.\n\nThe implementation is based on libsvm. The fit time complexity\nis more than quadratic with the number of samples which makes it hard\nto scale to datasets with more than a couple of 10000 samples. For large\ndatasets consider using :class:`~sklearn.svm.LinearSVR` or\n:class:`~sklearn.linear_model.SGDRegressor` instead, possibly after a\n:class:`~sklearn.kernel_approximation.Nystroem` transformer.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nkernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'\n Specifies the kernel type to be used in the algorithm.\n It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\n a callable.\n If none is given, 'rbf' will be used. If a callable is given it is\n used to precompute the kernel matrix.\n\ndegree : int, default=3\n Degree of the polynomial kernel function ('poly').\n Ignored by all other kernels.\n\ngamma : {'scale', 'auto'} or float, default='scale'\n Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n - if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n - if 'auto', uses 1 / n_features.\n\n .. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'.\n\ncoef0 : float, default=0.0\n Independent term in kernel function.\n It is only significant in 'poly' and 'sigmoid'.\n\ntol : float, default=1e-3\n Tolerance for stopping criterion.\n\nC : float, default=1.0\n Regularization parameter. The strength of the regularization is\n inversely proportional to C. Must be strictly positive.\n The penalty is a squared l2 penalty.\n\nepsilon : float, default=0.1\n Epsilon in the epsilon-SVR model. It specifies the epsilon-tube\n within which no penalty is associated in the training loss function\n with points predicted within a distance epsilon from the actual\n value.\n\nshrinking : bool, default=True\n Whether to use the shrinking heuristic.\n See the :ref:`User Guide `.\n\ncache_size : float, default=200\n Specify the size of the kernel cache (in MB).\n\nverbose : bool, default=False\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in libsvm that, if enabled, may not work\n properly in a multithreaded context.\n\nmax_iter : int, default=-1\n Hard limit on iterations within solver, or -1 for no limit.\n\nAttributes\n----------\nclass_weight_ : ndarray of shape (n_classes,)\n Multipliers of parameter C for each class.\n Computed based on the ``class_weight`` parameter.\n\ncoef_ : ndarray of shape (1, n_features)\n Weights assigned to the features (coefficients in the primal\n problem). This is only available in the case of a linear kernel.\n\n `coef_` is readonly property derived from `dual_coef_` and\n `support_vectors_`.\n\ndual_coef_ : ndarray of shape (1, n_SV)\n Coefficients of the support vector in the decision function.\n\nfit_status_ : int\n 0 if correctly fitted, 1 otherwise (will raise warning)\n\nintercept_ : ndarray of shape (1,)\n Constants in decision function.\n\nn_support_ : ndarray of shape (n_classes,), dtype=int32\n Number of support vectors for each class.\n\nshape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n Array dimensions of training vector ``X``.\n\nsupport_ : ndarray of shape (n_SV,)\n Indices of support vectors.\n\nsupport_vectors_ : ndarray of shape (n_SV, n_features)\n Support vectors.\n\nExamples\n--------\n>>> from sklearn.svm import SVR\n>>> from sklearn.pipeline import make_pipeline\n>>> from sklearn.preprocessing import StandardScaler\n>>> import numpy as np\n>>> n_samples, n_features = 10, 5\n>>> rng = np.random.RandomState(0)\n>>> y = rng.randn(n_samples)\n>>> X = rng.randn(n_samples, n_features)\n>>> regr = make_pipeline(StandardScaler(), SVR(C=1.0, epsilon=0.2))\n>>> regr.fit(X, y)\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('svr', SVR(epsilon=0.2))])\n\nSee Also\n--------\nNuSVR : Support Vector Machine for regression implemented using libsvm\n using a parameter to control the number of support vectors.\n\nLinearSVR : Scalable Linear Support Vector Machine for regression\n implemented using liblinear.\n\nReferences\n----------\n.. [1] `LIBSVM: A Library for Support Vector Machines\n `_\n\n.. [2] `Platt, John (1999). \"Probabilistic outputs for support vector\n machines and comparison to regularizedlikelihood methods.\"\n `_", + "code": "class SVR(RegressorMixin, BaseLibSVM):\n \"\"\"Epsilon-Support Vector Regression.\n\n The free parameters in the model are C and epsilon.\n\n The implementation is based on libsvm. The fit time complexity\n is more than quadratic with the number of samples which makes it hard\n to scale to datasets with more than a couple of 10000 samples. For large\n datasets consider using :class:`~sklearn.svm.LinearSVR` or\n :class:`~sklearn.linear_model.SGDRegressor` instead, possibly after a\n :class:`~sklearn.kernel_approximation.Nystroem` transformer.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'\n Specifies the kernel type to be used in the algorithm.\n It must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\n a callable.\n If none is given, 'rbf' will be used. If a callable is given it is\n used to precompute the kernel matrix.\n\n degree : int, default=3\n Degree of the polynomial kernel function ('poly').\n Ignored by all other kernels.\n\n gamma : {'scale', 'auto'} or float, default='scale'\n Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n - if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n - if 'auto', uses 1 / n_features.\n\n .. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'.\n\n coef0 : float, default=0.0\n Independent term in kernel function.\n It is only significant in 'poly' and 'sigmoid'.\n\n tol : float, default=1e-3\n Tolerance for stopping criterion.\n\n C : float, default=1.0\n Regularization parameter. The strength of the regularization is\n inversely proportional to C. Must be strictly positive.\n The penalty is a squared l2 penalty.\n\n epsilon : float, default=0.1\n Epsilon in the epsilon-SVR model. It specifies the epsilon-tube\n within which no penalty is associated in the training loss function\n with points predicted within a distance epsilon from the actual\n value.\n\n shrinking : bool, default=True\n Whether to use the shrinking heuristic.\n See the :ref:`User Guide `.\n\n cache_size : float, default=200\n Specify the size of the kernel cache (in MB).\n\n verbose : bool, default=False\n Enable verbose output. Note that this setting takes advantage of a\n per-process runtime setting in libsvm that, if enabled, may not work\n properly in a multithreaded context.\n\n max_iter : int, default=-1\n Hard limit on iterations within solver, or -1 for no limit.\n\n Attributes\n ----------\n class_weight_ : ndarray of shape (n_classes,)\n Multipliers of parameter C for each class.\n Computed based on the ``class_weight`` parameter.\n\n coef_ : ndarray of shape (1, n_features)\n Weights assigned to the features (coefficients in the primal\n problem). This is only available in the case of a linear kernel.\n\n `coef_` is readonly property derived from `dual_coef_` and\n `support_vectors_`.\n\n dual_coef_ : ndarray of shape (1, n_SV)\n Coefficients of the support vector in the decision function.\n\n fit_status_ : int\n 0 if correctly fitted, 1 otherwise (will raise warning)\n\n intercept_ : ndarray of shape (1,)\n Constants in decision function.\n\n n_support_ : ndarray of shape (n_classes,), dtype=int32\n Number of support vectors for each class.\n\n shape_fit_ : tuple of int of shape (n_dimensions_of_X,)\n Array dimensions of training vector ``X``.\n\n support_ : ndarray of shape (n_SV,)\n Indices of support vectors.\n\n support_vectors_ : ndarray of shape (n_SV, n_features)\n Support vectors.\n\n Examples\n --------\n >>> from sklearn.svm import SVR\n >>> from sklearn.pipeline import make_pipeline\n >>> from sklearn.preprocessing import StandardScaler\n >>> import numpy as np\n >>> n_samples, n_features = 10, 5\n >>> rng = np.random.RandomState(0)\n >>> y = rng.randn(n_samples)\n >>> X = rng.randn(n_samples, n_features)\n >>> regr = make_pipeline(StandardScaler(), SVR(C=1.0, epsilon=0.2))\n >>> regr.fit(X, y)\n Pipeline(steps=[('standardscaler', StandardScaler()),\n ('svr', SVR(epsilon=0.2))])\n\n See Also\n --------\n NuSVR : Support Vector Machine for regression implemented using libsvm\n using a parameter to control the number of support vectors.\n\n LinearSVR : Scalable Linear Support Vector Machine for regression\n implemented using liblinear.\n\n References\n ----------\n .. [1] `LIBSVM: A Library for Support Vector Machines\n `_\n\n .. [2] `Platt, John (1999). \"Probabilistic outputs for support vector\n machines and comparison to regularizedlikelihood methods.\"\n `_\n \"\"\"\n\n _impl = 'epsilon_svr'\n\n @_deprecate_positional_args\n def __init__(self, *, kernel='rbf', degree=3, gamma='scale',\n coef0=0.0, tol=1e-3, C=1.0, epsilon=0.1, shrinking=True,\n cache_size=200, verbose=False, max_iter=-1):\n\n super().__init__(\n kernel=kernel, degree=degree, gamma=gamma,\n coef0=coef0, tol=tol, C=C, nu=0., epsilon=epsilon, verbose=verbose,\n shrinking=shrinking, probability=False, cache_size=cache_size,\n class_weight=None, max_iter=max_iter, random_state=None)\n\n # mypy error: Decorated property not supported\n @deprecated( # type: ignore\n \"The probA_ attribute is deprecated in version 0.23 and will be \"\n \"removed in version 1.0 (renaming of 0.25).\")\n @property\n def probA_(self):\n return self._probA\n\n # mypy error: Decorated property not supported\n @deprecated( # type: ignore\n \"The probB_ attribute is deprecated in version 0.23 and will be \"\n \"removed in version 1.0 (renaming of 0.25).\")\n @property\n def probB_(self):\n return self._probB\n\n def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree", + "name": "BaseDecisionTree", + "qname": "sklearn.tree._classes.BaseDecisionTree", + "decorators": [], + "superclasses": ["MultiOutputMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.tree._classes/BaseDecisionTree/__init__", + "scikit-learn/sklearn.tree._classes/BaseDecisionTree/get_depth", + "scikit-learn/sklearn.tree._classes/BaseDecisionTree/get_n_leaves", + "scikit-learn/sklearn.tree._classes/BaseDecisionTree/fit", + "scikit-learn/sklearn.tree._classes/BaseDecisionTree/_validate_X_predict", + "scikit-learn/sklearn.tree._classes/BaseDecisionTree/predict", + "scikit-learn/sklearn.tree._classes/BaseDecisionTree/apply", + "scikit-learn/sklearn.tree._classes/BaseDecisionTree/decision_path", + "scikit-learn/sklearn.tree._classes/BaseDecisionTree/_prune_tree", + "scikit-learn/sklearn.tree._classes/BaseDecisionTree/cost_complexity_pruning_path", + "scikit-learn/sklearn.tree._classes/BaseDecisionTree/feature_importances_@getter" + ], + "is_public": false, + "reexported_by": [], + "description": "Base class for decision trees.\n\nWarning: This class should not be used directly.\nUse derived classes instead.", + "docstring": "Base class for decision trees.\n\nWarning: This class should not be used directly.\nUse derived classes instead.", + "code": "class BaseDecisionTree(MultiOutputMixin, BaseEstimator, metaclass=ABCMeta):\n \"\"\"Base class for decision trees.\n\n Warning: This class should not be used directly.\n Use derived classes instead.\n \"\"\"\n\n @abstractmethod\n @_deprecate_positional_args\n def __init__(self, *,\n criterion,\n splitter,\n max_depth,\n min_samples_split,\n min_samples_leaf,\n min_weight_fraction_leaf,\n max_features,\n max_leaf_nodes,\n random_state,\n min_impurity_decrease,\n min_impurity_split,\n class_weight=None,\n ccp_alpha=0.0):\n self.criterion = criterion\n self.splitter = splitter\n self.max_depth = max_depth\n self.min_samples_split = min_samples_split\n self.min_samples_leaf = min_samples_leaf\n self.min_weight_fraction_leaf = min_weight_fraction_leaf\n self.max_features = max_features\n self.max_leaf_nodes = max_leaf_nodes\n self.random_state = random_state\n self.min_impurity_decrease = min_impurity_decrease\n self.min_impurity_split = min_impurity_split\n self.class_weight = class_weight\n self.ccp_alpha = ccp_alpha\n\n def get_depth(self):\n \"\"\"Return the depth of the decision tree.\n\n The depth of a tree is the maximum distance between the root\n and any leaf.\n\n Returns\n -------\n self.tree_.max_depth : int\n The maximum depth of the tree.\n \"\"\"\n check_is_fitted(self)\n return self.tree_.max_depth\n\n def get_n_leaves(self):\n \"\"\"Return the number of leaves of the decision tree.\n\n Returns\n -------\n self.tree_.n_leaves : int\n Number of leaves.\n \"\"\"\n check_is_fitted(self)\n return self.tree_.n_leaves\n\n def fit(self, X, y, sample_weight=None, check_input=True,\n X_idx_sorted=\"deprecated\"):\n\n random_state = check_random_state(self.random_state)\n\n if self.ccp_alpha < 0.0:\n raise ValueError(\"ccp_alpha must be greater than or equal to 0\")\n\n if check_input:\n # Need to validate separately here.\n # We can't pass multi_ouput=True because that would allow y to be\n # csr.\n check_X_params = dict(dtype=DTYPE, accept_sparse=\"csc\")\n check_y_params = dict(ensure_2d=False, dtype=None)\n X, y = self._validate_data(X, y,\n validate_separately=(check_X_params,\n check_y_params))\n if issparse(X):\n X.sort_indices()\n\n if X.indices.dtype != np.intc or X.indptr.dtype != np.intc:\n raise ValueError(\"No support for np.int64 index based \"\n \"sparse matrices\")\n\n if self.criterion == \"poisson\":\n if np.any(y < 0):\n raise ValueError(\"Some value(s) of y are negative which is\"\n \" not allowed for Poisson regression.\")\n if np.sum(y) <= 0:\n raise ValueError(\"Sum of y is not positive which is \"\n \"necessary for Poisson regression.\")\n\n # Determine output settings\n n_samples, self.n_features_ = X.shape\n self.n_features_in_ = self.n_features_\n is_classification = is_classifier(self)\n\n y = np.atleast_1d(y)\n expanded_class_weight = None\n\n if y.ndim == 1:\n # reshape is necessary to preserve the data contiguity against vs\n # [:, np.newaxis] that does not.\n y = np.reshape(y, (-1, 1))\n\n self.n_outputs_ = y.shape[1]\n\n if is_classification:\n check_classification_targets(y)\n y = np.copy(y)\n\n self.classes_ = []\n self.n_classes_ = []\n\n if self.class_weight is not None:\n y_original = np.copy(y)\n\n y_encoded = np.zeros(y.shape, dtype=int)\n for k in range(self.n_outputs_):\n classes_k, y_encoded[:, k] = np.unique(y[:, k],\n return_inverse=True)\n self.classes_.append(classes_k)\n self.n_classes_.append(classes_k.shape[0])\n y = y_encoded\n\n if self.class_weight is not None:\n expanded_class_weight = compute_sample_weight(\n self.class_weight, y_original)\n\n self.n_classes_ = np.array(self.n_classes_, dtype=np.intp)\n\n if getattr(y, \"dtype\", None) != DOUBLE or not y.flags.contiguous:\n y = np.ascontiguousarray(y, dtype=DOUBLE)\n\n # Check parameters\n max_depth = (np.iinfo(np.int32).max if self.max_depth is None\n else self.max_depth)\n max_leaf_nodes = (-1 if self.max_leaf_nodes is None\n else self.max_leaf_nodes)\n\n if isinstance(self.min_samples_leaf, numbers.Integral):\n if not 1 <= self.min_samples_leaf:\n raise ValueError(\"min_samples_leaf must be at least 1 \"\n \"or in (0, 0.5], got %s\"\n % self.min_samples_leaf)\n min_samples_leaf = self.min_samples_leaf\n else: # float\n if not 0. < self.min_samples_leaf <= 0.5:\n raise ValueError(\"min_samples_leaf must be at least 1 \"\n \"or in (0, 0.5], got %s\"\n % self.min_samples_leaf)\n min_samples_leaf = int(ceil(self.min_samples_leaf * n_samples))\n\n if isinstance(self.min_samples_split, numbers.Integral):\n if not 2 <= self.min_samples_split:\n raise ValueError(\"min_samples_split must be an integer \"\n \"greater than 1 or a float in (0.0, 1.0]; \"\n \"got the integer %s\"\n % self.min_samples_split)\n min_samples_split = self.min_samples_split\n else: # float\n if not 0. < self.min_samples_split <= 1.:\n raise ValueError(\"min_samples_split must be an integer \"\n \"greater than 1 or a float in (0.0, 1.0]; \"\n \"got the float %s\"\n % self.min_samples_split)\n min_samples_split = int(ceil(self.min_samples_split * n_samples))\n min_samples_split = max(2, min_samples_split)\n\n min_samples_split = max(min_samples_split, 2 * min_samples_leaf)\n\n if isinstance(self.max_features, str):\n if self.max_features == \"auto\":\n if is_classification:\n max_features = max(1, int(np.sqrt(self.n_features_)))\n else:\n max_features = self.n_features_\n elif self.max_features == \"sqrt\":\n max_features = max(1, int(np.sqrt(self.n_features_)))\n elif self.max_features == \"log2\":\n max_features = max(1, int(np.log2(self.n_features_)))\n else:\n raise ValueError(\"Invalid value for max_features. \"\n \"Allowed string values are 'auto', \"\n \"'sqrt' or 'log2'.\")\n elif self.max_features is None:\n max_features = self.n_features_\n elif isinstance(self.max_features, numbers.Integral):\n max_features = self.max_features\n else: # float\n if self.max_features > 0.0:\n max_features = max(1,\n int(self.max_features * self.n_features_))\n else:\n max_features = 0\n\n self.max_features_ = max_features\n\n if len(y) != n_samples:\n raise ValueError(\"Number of labels=%d does not match \"\n \"number of samples=%d\" % (len(y), n_samples))\n if not 0 <= self.min_weight_fraction_leaf <= 0.5:\n raise ValueError(\"min_weight_fraction_leaf must in [0, 0.5]\")\n if max_depth <= 0:\n raise ValueError(\"max_depth must be greater than zero. \")\n if not (0 < max_features <= self.n_features_):\n raise ValueError(\"max_features must be in (0, n_features]\")\n if not isinstance(max_leaf_nodes, numbers.Integral):\n raise ValueError(\"max_leaf_nodes must be integral number but was \"\n \"%r\" % max_leaf_nodes)\n if -1 < max_leaf_nodes < 2:\n raise ValueError((\"max_leaf_nodes {0} must be either None \"\n \"or larger than 1\").format(max_leaf_nodes))\n\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X, DOUBLE)\n\n if expanded_class_weight is not None:\n if sample_weight is not None:\n sample_weight = sample_weight * expanded_class_weight\n else:\n sample_weight = expanded_class_weight\n\n # Set min_weight_leaf from min_weight_fraction_leaf\n if sample_weight is None:\n min_weight_leaf = (self.min_weight_fraction_leaf *\n n_samples)\n else:\n min_weight_leaf = (self.min_weight_fraction_leaf *\n np.sum(sample_weight))\n\n min_impurity_split = self.min_impurity_split\n if min_impurity_split is not None:\n warnings.warn(\n \"The min_impurity_split parameter is deprecated. Its default \"\n \"value has changed from 1e-7 to 0 in version 0.23, and it \"\n \"will be removed in 1.0 (renaming of 0.25). Use the \"\n \"min_impurity_decrease parameter instead.\",\n FutureWarning\n )\n\n if min_impurity_split < 0.:\n raise ValueError(\"min_impurity_split must be greater than \"\n \"or equal to 0\")\n else:\n min_impurity_split = 0\n\n if self.min_impurity_decrease < 0.:\n raise ValueError(\"min_impurity_decrease must be greater than \"\n \"or equal to 0\")\n\n # TODO: Remove in 1.1\n if X_idx_sorted != \"deprecated\":\n warnings.warn(\n \"The parameter 'X_idx_sorted' is deprecated and has no \"\n \"effect. It will be removed in 1.1 (renaming of 0.26). You \"\n \"can suppress this warning by not passing any value to the \"\n \"'X_idx_sorted' parameter.\",\n FutureWarning\n )\n\n # Build tree\n criterion = self.criterion\n if not isinstance(criterion, Criterion):\n if is_classification:\n criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,\n self.n_classes_)\n else:\n criterion = CRITERIA_REG[self.criterion](self.n_outputs_,\n n_samples)\n else:\n # Make a deepcopy in case the criterion has mutable attributes that\n # might be shared and modified concurrently during parallel fitting\n criterion = copy.deepcopy(criterion)\n\n SPLITTERS = SPARSE_SPLITTERS if issparse(X) else DENSE_SPLITTERS\n\n splitter = self.splitter\n if not isinstance(self.splitter, Splitter):\n splitter = SPLITTERS[self.splitter](criterion,\n self.max_features_,\n min_samples_leaf,\n min_weight_leaf,\n random_state)\n\n if is_classifier(self):\n self.tree_ = Tree(self.n_features_,\n self.n_classes_, self.n_outputs_)\n else:\n self.tree_ = Tree(self.n_features_,\n # TODO: tree should't need this in this case\n np.array([1] * self.n_outputs_, dtype=np.intp),\n self.n_outputs_)\n\n # Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise\n if max_leaf_nodes < 0:\n builder = DepthFirstTreeBuilder(splitter, min_samples_split,\n min_samples_leaf,\n min_weight_leaf,\n max_depth,\n self.min_impurity_decrease,\n min_impurity_split)\n else:\n builder = BestFirstTreeBuilder(splitter, min_samples_split,\n min_samples_leaf,\n min_weight_leaf,\n max_depth,\n max_leaf_nodes,\n self.min_impurity_decrease,\n min_impurity_split)\n\n builder.build(self.tree_, X, y, sample_weight)\n\n if self.n_outputs_ == 1 and is_classifier(self):\n self.n_classes_ = self.n_classes_[0]\n self.classes_ = self.classes_[0]\n\n self._prune_tree()\n\n return self\n\n def _validate_X_predict(self, X, check_input):\n \"\"\"Validate the training data on predict (probabilities).\"\"\"\n if check_input:\n X = self._validate_data(X, dtype=DTYPE, accept_sparse=\"csr\",\n reset=False)\n if issparse(X) and (X.indices.dtype != np.intc or\n X.indptr.dtype != np.intc):\n raise ValueError(\"No support for np.int64 index based \"\n \"sparse matrices\")\n else:\n # The number of features is checked regardless of `check_input`\n self._check_n_features(X, reset=False)\n return X\n\n def predict(self, X, check_input=True):\n \"\"\"Predict class or regression value for X.\n\n For a classification model, the predicted class for each sample in X is\n returned. For a regression model, the predicted value based on X is\n returned.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n check_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\n Returns\n -------\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The predicted classes, or the predict values.\n \"\"\"\n check_is_fitted(self)\n X = self._validate_X_predict(X, check_input)\n proba = self.tree_.predict(X)\n n_samples = X.shape[0]\n\n # Classification\n if is_classifier(self):\n if self.n_outputs_ == 1:\n return self.classes_.take(np.argmax(proba, axis=1), axis=0)\n\n else:\n class_type = self.classes_[0].dtype\n predictions = np.zeros((n_samples, self.n_outputs_),\n dtype=class_type)\n for k in range(self.n_outputs_):\n predictions[:, k] = self.classes_[k].take(\n np.argmax(proba[:, k], axis=1),\n axis=0)\n\n return predictions\n\n # Regression\n else:\n if self.n_outputs_ == 1:\n return proba[:, 0]\n\n else:\n return proba[:, :, 0]\n\n def apply(self, X, check_input=True):\n \"\"\"Return the index of the leaf that each sample is predicted as.\n\n .. versionadded:: 0.17\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n check_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\n Returns\n -------\n X_leaves : array-like of shape (n_samples,)\n For each datapoint x in X, return the index of the leaf x\n ends up in. Leaves are numbered within\n ``[0; self.tree_.node_count)``, possibly with gaps in the\n numbering.\n \"\"\"\n check_is_fitted(self)\n X = self._validate_X_predict(X, check_input)\n return self.tree_.apply(X)\n\n def decision_path(self, X, check_input=True):\n \"\"\"Return the decision path in the tree.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n check_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\n Returns\n -------\n indicator : sparse matrix of shape (n_samples, n_nodes)\n Return a node indicator CSR matrix where non zero elements\n indicates that the samples goes through the nodes.\n \"\"\"\n X = self._validate_X_predict(X, check_input)\n return self.tree_.decision_path(X)\n\n def _prune_tree(self):\n \"\"\"Prune tree using Minimal Cost-Complexity Pruning.\"\"\"\n check_is_fitted(self)\n\n if self.ccp_alpha < 0.0:\n raise ValueError(\"ccp_alpha must be greater than or equal to 0\")\n\n if self.ccp_alpha == 0.0:\n return\n\n # build pruned tree\n if is_classifier(self):\n n_classes = np.atleast_1d(self.n_classes_)\n pruned_tree = Tree(self.n_features_, n_classes, self.n_outputs_)\n else:\n pruned_tree = Tree(self.n_features_,\n # TODO: the tree shouldn't need this param\n np.array([1] * self.n_outputs_, dtype=np.intp),\n self.n_outputs_)\n _build_pruned_tree_ccp(pruned_tree, self.tree_, self.ccp_alpha)\n\n self.tree_ = pruned_tree\n\n def cost_complexity_pruning_path(self, X, y, sample_weight=None):\n \"\"\"Compute the pruning path during Minimal Cost-Complexity Pruning.\n\n See :ref:`minimal_cost_complexity_pruning` for details on the pruning\n process.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csc_matrix``.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The target values (class labels) as integers or strings.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. Splits are also\n ignored if they would result in any single class carrying a\n negative weight in either child node.\n\n Returns\n -------\n ccp_path : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n ccp_alphas : ndarray\n Effective alphas of subtree during pruning.\n\n impurities : ndarray\n Sum of the impurities of the subtree leaves for the\n corresponding alpha value in ``ccp_alphas``.\n \"\"\"\n est = clone(self).set_params(ccp_alpha=0.0)\n est.fit(X, y, sample_weight=sample_weight)\n return Bunch(**ccp_pruning_path(est.tree_))\n\n @property\n def feature_importances_(self):\n \"\"\"Return the feature importances.\n\n The importance of a feature is computed as the (normalized) total\n reduction of the criterion brought by that feature.\n It is also known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n Returns\n -------\n feature_importances_ : ndarray of shape (n_features,)\n Normalized total reduction of criteria by feature\n (Gini importance).\n \"\"\"\n check_is_fitted(self)\n\n return self.tree_.compute_feature_importances()", + "instance_attributes": [ + { + "name": "ccp_alpha", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "classes_", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "n_classes_", + "types": { + "kind": "NamedType", + "name": "list" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier", + "name": "DecisionTreeClassifier", + "qname": "sklearn.tree._classes.DecisionTreeClassifier", + "decorators": [], + "superclasses": ["ClassifierMixin", "BaseDecisionTree"], + "methods": [ + "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/__init__", + "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/fit", + "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/predict_proba", + "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/predict_log_proba" + ], + "is_public": false, + "reexported_by": [], + "description": "A decision tree classifier.\n\nRead more in the :ref:`User Guide `.", + "docstring": "A decision tree classifier.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncriterion : {\"gini\", \"entropy\"}, default=\"gini\"\n The function to measure the quality of a split. Supported criteria are\n \"gini\" for the Gini impurity and \"entropy\" for the information gain.\n\nsplitter : {\"best\", \"random\"}, default=\"best\"\n The strategy used to choose the split at each node. Supported\n strategies are \"best\" to choose the best split and \"random\" to choose\n the best random split.\n\nmax_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_features : int, float or {\"auto\", \"sqrt\", \"log2\"}, default=None\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=sqrt(n_features)`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of the estimator. The features are always\n randomly permuted at each split, even if ``splitter`` is set to\n ``\"best\"``. When ``max_features < n_features``, the algorithm will\n select ``max_features`` at random at each split before finding the best\n split among them. But the best found split may vary across different\n runs, even if ``max_features=n_features``. That is the case, if the\n improvement of the criterion is identical for several splits and one\n split has to be selected at random. To obtain a deterministic behaviour\n during fitting, ``random_state`` has to be fixed to an integer.\n See :term:`Glossary ` for details.\n\nmax_leaf_nodes : int, default=None\n Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=0\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nclass_weight : dict, list of dict or \"balanced\", default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If None, all classes are supposed to have weight one. For\n multi-output problems, a list of dicts can be provided in the same\n order as the columns of y.\n\n Note that for multioutput (including multilabel) weights should be\n defined for each class of every column in its own dict. For example,\n for four-class multilabel classification weights should be\n [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n [{1:1}, {2:5}, {3:1}, {4:1}].\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n For multi-output, the weights of each column of y will be multiplied.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,) or list of ndarray\n The classes labels (single output problem),\n or a list of arrays of class labels (multi-output problem).\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance [4]_.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nmax_features_ : int\n The inferred value of max_features.\n\nn_classes_ : int or list of int\n The number of classes (for single output problems),\n or a list containing the number of classes for each\n output (for multi-output problems).\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nn_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\ntree_ : Tree instance\n The underlying Tree object. Please refer to\n ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n for basic usage of these attributes.\n\nSee Also\n--------\nDecisionTreeRegressor : A decision tree regressor.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nThe :meth:`predict` method operates using the :func:`numpy.argmax`\nfunction on the outputs of :meth:`predict_proba`. This means that in\ncase the highest predicted probabilities are tied, the classifier will\npredict the tied class with the lowest index in :term:`classes_`.\n\nReferences\n----------\n\n.. [1] https://en.wikipedia.org/wiki/Decision_tree_learning\n\n.. [2] L. Breiman, J. Friedman, R. Olshen, and C. Stone, \"Classification\n and Regression Trees\", Wadsworth, Belmont, CA, 1984.\n\n.. [3] T. Hastie, R. Tibshirani and J. Friedman. \"Elements of Statistical\n Learning\", Springer, 2009.\n\n.. [4] L. Breiman, and A. Cutler, \"Random Forests\",\n https://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.model_selection import cross_val_score\n>>> from sklearn.tree import DecisionTreeClassifier\n>>> clf = DecisionTreeClassifier(random_state=0)\n>>> iris = load_iris()\n>>> cross_val_score(clf, iris.data, iris.target, cv=10)\n... # doctest: +SKIP\n...\narray([ 1. , 0.93..., 0.86..., 0.93..., 0.93...,\n 0.93..., 0.93..., 1. , 0.93..., 1. ])", + "code": "class DecisionTreeClassifier(ClassifierMixin, BaseDecisionTree):\n \"\"\"A decision tree classifier.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n criterion : {\"gini\", \"entropy\"}, default=\"gini\"\n The function to measure the quality of a split. Supported criteria are\n \"gini\" for the Gini impurity and \"entropy\" for the information gain.\n\n splitter : {\"best\", \"random\"}, default=\"best\"\n The strategy used to choose the split at each node. Supported\n strategies are \"best\" to choose the best split and \"random\" to choose\n the best random split.\n\n max_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\n min_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\n min_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\n min_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\n max_features : int, float or {\"auto\", \"sqrt\", \"log2\"}, default=None\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=sqrt(n_features)`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\n random_state : int, RandomState instance or None, default=None\n Controls the randomness of the estimator. The features are always\n randomly permuted at each split, even if ``splitter`` is set to\n ``\"best\"``. When ``max_features < n_features``, the algorithm will\n select ``max_features`` at random at each split before finding the best\n split among them. But the best found split may vary across different\n runs, even if ``max_features=n_features``. That is the case, if the\n improvement of the criterion is identical for several splits and one\n split has to be selected at random. To obtain a deterministic behaviour\n during fitting, ``random_state`` has to be fixed to an integer.\n See :term:`Glossary ` for details.\n\n max_leaf_nodes : int, default=None\n Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\n min_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\n min_impurity_split : float, default=0\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\n class_weight : dict, list of dict or \"balanced\", default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If None, all classes are supposed to have weight one. For\n multi-output problems, a list of dicts can be provided in the same\n order as the columns of y.\n\n Note that for multioutput (including multilabel) weights should be\n defined for each class of every column in its own dict. For example,\n for four-class multilabel classification weights should be\n [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n [{1:1}, {2:5}, {3:1}, {4:1}].\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n For multi-output, the weights of each column of y will be multiplied.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\n ccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\n Attributes\n ----------\n classes_ : ndarray of shape (n_classes,) or list of ndarray\n The classes labels (single output problem),\n or a list of arrays of class labels (multi-output problem).\n\n feature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance [4]_.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n max_features_ : int\n The inferred value of max_features.\n\n n_classes_ : int or list of int\n The number of classes (for single output problems),\n or a list containing the number of classes for each\n output (for multi-output problems).\n\n n_features_ : int\n The number of features when ``fit`` is performed.\n\n n_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\n tree_ : Tree instance\n The underlying Tree object. Please refer to\n ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n for basic usage of these attributes.\n\n See Also\n --------\n DecisionTreeRegressor : A decision tree regressor.\n\n Notes\n -----\n The default values for the parameters controlling the size of the trees\n (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n unpruned trees which can potentially be very large on some data sets. To\n reduce memory consumption, the complexity and size of the trees should be\n controlled by setting those parameter values.\n\n The :meth:`predict` method operates using the :func:`numpy.argmax`\n function on the outputs of :meth:`predict_proba`. This means that in\n case the highest predicted probabilities are tied, the classifier will\n predict the tied class with the lowest index in :term:`classes_`.\n\n References\n ----------\n\n .. [1] https://en.wikipedia.org/wiki/Decision_tree_learning\n\n .. [2] L. Breiman, J. Friedman, R. Olshen, and C. Stone, \"Classification\n and Regression Trees\", Wadsworth, Belmont, CA, 1984.\n\n .. [3] T. Hastie, R. Tibshirani and J. Friedman. \"Elements of Statistical\n Learning\", Springer, 2009.\n\n .. [4] L. Breiman, and A. Cutler, \"Random Forests\",\n https://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm\n\n Examples\n --------\n >>> from sklearn.datasets import load_iris\n >>> from sklearn.model_selection import cross_val_score\n >>> from sklearn.tree import DecisionTreeClassifier\n >>> clf = DecisionTreeClassifier(random_state=0)\n >>> iris = load_iris()\n >>> cross_val_score(clf, iris.data, iris.target, cv=10)\n ... # doctest: +SKIP\n ...\n array([ 1. , 0.93..., 0.86..., 0.93..., 0.93...,\n 0.93..., 0.93..., 1. , 0.93..., 1. ])\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *,\n criterion=\"gini\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.,\n max_features=None,\n random_state=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.,\n min_impurity_split=None,\n class_weight=None,\n ccp_alpha=0.0):\n super().__init__(\n criterion=criterion,\n splitter=splitter,\n max_depth=max_depth,\n min_samples_split=min_samples_split,\n min_samples_leaf=min_samples_leaf,\n min_weight_fraction_leaf=min_weight_fraction_leaf,\n max_features=max_features,\n max_leaf_nodes=max_leaf_nodes,\n class_weight=class_weight,\n random_state=random_state,\n min_impurity_decrease=min_impurity_decrease,\n min_impurity_split=min_impurity_split,\n ccp_alpha=ccp_alpha)\n\n def fit(self, X, y, sample_weight=None, check_input=True,\n X_idx_sorted=\"deprecated\"):\n \"\"\"Build a decision tree classifier from the training set (X, y).\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csc_matrix``.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The target values (class labels) as integers or strings.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. Splits are also\n ignored if they would result in any single class carrying a\n negative weight in either child node.\n\n check_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\n X_idx_sorted : deprecated, default=\"deprecated\"\n This parameter is deprecated and has no effect.\n It will be removed in 1.1 (renaming of 0.26).\n\n .. deprecated :: 0.24\n\n Returns\n -------\n self : DecisionTreeClassifier\n Fitted estimator.\n \"\"\"\n\n super().fit(\n X, y,\n sample_weight=sample_weight,\n check_input=check_input,\n X_idx_sorted=X_idx_sorted)\n return self\n\n def predict_proba(self, X, check_input=True):\n \"\"\"Predict class probabilities of the input samples X.\n\n The predicted class probability is the fraction of samples of the same\n class in a leaf.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n check_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\n Returns\n -------\n proba : ndarray of shape (n_samples, n_classes) or list of n_outputs \\\n such arrays if n_outputs > 1\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n \"\"\"\n check_is_fitted(self)\n X = self._validate_X_predict(X, check_input)\n proba = self.tree_.predict(X)\n\n if self.n_outputs_ == 1:\n proba = proba[:, :self.n_classes_]\n normalizer = proba.sum(axis=1)[:, np.newaxis]\n normalizer[normalizer == 0.0] = 1.0\n proba /= normalizer\n\n return proba\n\n else:\n all_proba = []\n\n for k in range(self.n_outputs_):\n proba_k = proba[:, k, :self.n_classes_[k]]\n normalizer = proba_k.sum(axis=1)[:, np.newaxis]\n normalizer[normalizer == 0.0] = 1.0\n proba_k /= normalizer\n all_proba.append(proba_k)\n\n return all_proba\n\n def predict_log_proba(self, X):\n \"\"\"Predict class log-probabilities of the input samples X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n proba : ndarray of shape (n_samples, n_classes) or list of n_outputs \\\n such arrays if n_outputs > 1\n The class log-probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n \"\"\"\n proba = self.predict_proba(X)\n\n if self.n_outputs_ == 1:\n return np.log(proba)\n\n else:\n for k in range(self.n_outputs_):\n proba[k] = np.log(proba[k])\n\n return proba", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor", + "name": "DecisionTreeRegressor", + "qname": "sklearn.tree._classes.DecisionTreeRegressor", + "decorators": [], + "superclasses": ["RegressorMixin", "BaseDecisionTree"], + "methods": [ + "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/__init__", + "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/fit", + "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/_compute_partial_dependence_recursion" + ], + "is_public": false, + "reexported_by": [], + "description": "A decision tree regressor.\n\nRead more in the :ref:`User Guide `.", + "docstring": "A decision tree regressor.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncriterion : {\"mse\", \"friedman_mse\", \"mae\", \"poisson\"}, default=\"mse\"\n The function to measure the quality of a split. Supported criteria\n are \"mse\" for the mean squared error, which is equal to variance\n reduction as feature selection criterion and minimizes the L2 loss\n using the mean of each terminal node, \"friedman_mse\", which uses mean\n squared error with Friedman's improvement score for potential splits,\n \"mae\" for the mean absolute error, which minimizes the L1 loss using\n the median of each terminal node, and \"poisson\" which uses reduction in\n Poisson deviance to find splits.\n\n .. versionadded:: 0.18\n Mean Absolute Error (MAE) criterion.\n\n .. versionadded:: 0.24\n Poisson deviance criterion.\n\nsplitter : {\"best\", \"random\"}, default=\"best\"\n The strategy used to choose the split at each node. Supported\n strategies are \"best\" to choose the best split and \"random\" to choose\n the best random split.\n\nmax_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_features : int, float or {\"auto\", \"sqrt\", \"log2\"}, default=None\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=n_features`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of the estimator. The features are always\n randomly permuted at each split, even if ``splitter`` is set to\n ``\"best\"``. When ``max_features < n_features``, the algorithm will\n select ``max_features`` at random at each split before finding the best\n split among them. But the best found split may vary across different\n runs, even if ``max_features=n_features``. That is the case, if the\n improvement of the criterion is identical for several splits and one\n split has to be selected at random. To obtain a deterministic behaviour\n during fitting, ``random_state`` has to be fixed to an integer.\n See :term:`Glossary ` for details.\n\nmax_leaf_nodes : int, default=None\n Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=0\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nfeature_importances_ : ndarray of shape (n_features,)\n The feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the\n (normalized) total reduction of the criterion brought\n by that feature. It is also known as the Gini importance [4]_.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nmax_features_ : int\n The inferred value of max_features.\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nn_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\ntree_ : Tree instance\n The underlying Tree object. Please refer to\n ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n for basic usage of these attributes.\n\nSee Also\n--------\nDecisionTreeClassifier : A decision tree classifier.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n\n.. [1] https://en.wikipedia.org/wiki/Decision_tree_learning\n\n.. [2] L. Breiman, J. Friedman, R. Olshen, and C. Stone, \"Classification\n and Regression Trees\", Wadsworth, Belmont, CA, 1984.\n\n.. [3] T. Hastie, R. Tibshirani and J. Friedman. \"Elements of Statistical\n Learning\", Springer, 2009.\n\n.. [4] L. Breiman, and A. Cutler, \"Random Forests\",\n https://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm\n\nExamples\n--------\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.model_selection import cross_val_score\n>>> from sklearn.tree import DecisionTreeRegressor\n>>> X, y = load_diabetes(return_X_y=True)\n>>> regressor = DecisionTreeRegressor(random_state=0)\n>>> cross_val_score(regressor, X, y, cv=10)\n... # doctest: +SKIP\n...\narray([-0.39..., -0.46..., 0.02..., 0.06..., -0.50...,\n 0.16..., 0.11..., -0.73..., -0.30..., -0.00...])", + "code": "class DecisionTreeRegressor(RegressorMixin, BaseDecisionTree):\n \"\"\"A decision tree regressor.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n criterion : {\"mse\", \"friedman_mse\", \"mae\", \"poisson\"}, default=\"mse\"\n The function to measure the quality of a split. Supported criteria\n are \"mse\" for the mean squared error, which is equal to variance\n reduction as feature selection criterion and minimizes the L2 loss\n using the mean of each terminal node, \"friedman_mse\", which uses mean\n squared error with Friedman's improvement score for potential splits,\n \"mae\" for the mean absolute error, which minimizes the L1 loss using\n the median of each terminal node, and \"poisson\" which uses reduction in\n Poisson deviance to find splits.\n\n .. versionadded:: 0.18\n Mean Absolute Error (MAE) criterion.\n\n .. versionadded:: 0.24\n Poisson deviance criterion.\n\n splitter : {\"best\", \"random\"}, default=\"best\"\n The strategy used to choose the split at each node. Supported\n strategies are \"best\" to choose the best split and \"random\" to choose\n the best random split.\n\n max_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\n min_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\n min_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\n min_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\n max_features : int, float or {\"auto\", \"sqrt\", \"log2\"}, default=None\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=n_features`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\n random_state : int, RandomState instance or None, default=None\n Controls the randomness of the estimator. The features are always\n randomly permuted at each split, even if ``splitter`` is set to\n ``\"best\"``. When ``max_features < n_features``, the algorithm will\n select ``max_features`` at random at each split before finding the best\n split among them. But the best found split may vary across different\n runs, even if ``max_features=n_features``. That is the case, if the\n improvement of the criterion is identical for several splits and one\n split has to be selected at random. To obtain a deterministic behaviour\n during fitting, ``random_state`` has to be fixed to an integer.\n See :term:`Glossary ` for details.\n\n max_leaf_nodes : int, default=None\n Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\n min_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\n min_impurity_split : float, default=0\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\n ccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\n Attributes\n ----------\n feature_importances_ : ndarray of shape (n_features,)\n The feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the\n (normalized) total reduction of the criterion brought\n by that feature. It is also known as the Gini importance [4]_.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n max_features_ : int\n The inferred value of max_features.\n\n n_features_ : int\n The number of features when ``fit`` is performed.\n\n n_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\n tree_ : Tree instance\n The underlying Tree object. Please refer to\n ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n for basic usage of these attributes.\n\n See Also\n --------\n DecisionTreeClassifier : A decision tree classifier.\n\n Notes\n -----\n The default values for the parameters controlling the size of the trees\n (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n unpruned trees which can potentially be very large on some data sets. To\n reduce memory consumption, the complexity and size of the trees should be\n controlled by setting those parameter values.\n\n References\n ----------\n\n .. [1] https://en.wikipedia.org/wiki/Decision_tree_learning\n\n .. [2] L. Breiman, J. Friedman, R. Olshen, and C. Stone, \"Classification\n and Regression Trees\", Wadsworth, Belmont, CA, 1984.\n\n .. [3] T. Hastie, R. Tibshirani and J. Friedman. \"Elements of Statistical\n Learning\", Springer, 2009.\n\n .. [4] L. Breiman, and A. Cutler, \"Random Forests\",\n https://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm\n\n Examples\n --------\n >>> from sklearn.datasets import load_diabetes\n >>> from sklearn.model_selection import cross_val_score\n >>> from sklearn.tree import DecisionTreeRegressor\n >>> X, y = load_diabetes(return_X_y=True)\n >>> regressor = DecisionTreeRegressor(random_state=0)\n >>> cross_val_score(regressor, X, y, cv=10)\n ... # doctest: +SKIP\n ...\n array([-0.39..., -0.46..., 0.02..., 0.06..., -0.50...,\n 0.16..., 0.11..., -0.73..., -0.30..., -0.00...])\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *,\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.,\n max_features=None,\n random_state=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.,\n min_impurity_split=None,\n ccp_alpha=0.0):\n super().__init__(\n criterion=criterion,\n splitter=splitter,\n max_depth=max_depth,\n min_samples_split=min_samples_split,\n min_samples_leaf=min_samples_leaf,\n min_weight_fraction_leaf=min_weight_fraction_leaf,\n max_features=max_features,\n max_leaf_nodes=max_leaf_nodes,\n random_state=random_state,\n min_impurity_decrease=min_impurity_decrease,\n min_impurity_split=min_impurity_split,\n ccp_alpha=ccp_alpha)\n\n def fit(self, X, y, sample_weight=None, check_input=True,\n X_idx_sorted=\"deprecated\"):\n \"\"\"Build a decision tree regressor from the training set (X, y).\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csc_matrix``.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The target values (real numbers). Use ``dtype=np.float64`` and\n ``order='C'`` for maximum efficiency.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node.\n\n check_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\n X_idx_sorted : deprecated, default=\"deprecated\"\n This parameter is deprecated and has no effect.\n It will be removed in 1.1 (renaming of 0.26).\n\n .. deprecated :: 0.24\n\n Returns\n -------\n self : DecisionTreeRegressor\n Fitted estimator.\n \"\"\"\n\n super().fit(\n X, y,\n sample_weight=sample_weight,\n check_input=check_input,\n X_idx_sorted=X_idx_sorted)\n return self\n\n def _compute_partial_dependence_recursion(self, grid, target_features):\n \"\"\"Fast partial dependence computation.\n\n Parameters\n ----------\n grid : ndarray of shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\n target_features : ndarray of shape (n_target_features)\n The set of target features for which the partial dependence\n should be evaluated.\n\n Returns\n -------\n averaged_predictions : ndarray of shape (n_samples,)\n The value of the partial dependence function on each grid point.\n \"\"\"\n grid = np.asarray(grid, dtype=DTYPE, order='C')\n averaged_predictions = np.zeros(shape=grid.shape[0],\n dtype=np.float64, order='C')\n\n self.tree_.compute_partial_dependence(\n grid, target_features, averaged_predictions)\n return averaged_predictions", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeClassifier", + "name": "ExtraTreeClassifier", + "qname": "sklearn.tree._classes.ExtraTreeClassifier", + "decorators": [], + "superclasses": ["DecisionTreeClassifier"], + "methods": ["scikit-learn/sklearn.tree._classes/ExtraTreeClassifier/__init__"], + "is_public": false, + "reexported_by": [], + "description": "An extremely randomized tree classifier.\n\nExtra-trees differ from classic decision trees in the way they are built.\nWhen looking for the best split to separate the samples of a node into two\ngroups, random splits are drawn for each of the `max_features` randomly\nselected features and the best split among those is chosen. When\n`max_features` is set 1, this amounts to building a totally random\ndecision tree.\n\nWarning: Extra-trees should only be used within ensemble methods.\n\nRead more in the :ref:`User Guide `.", + "docstring": "An extremely randomized tree classifier.\n\nExtra-trees differ from classic decision trees in the way they are built.\nWhen looking for the best split to separate the samples of a node into two\ngroups, random splits are drawn for each of the `max_features` randomly\nselected features and the best split among those is chosen. When\n`max_features` is set 1, this amounts to building a totally random\ndecision tree.\n\nWarning: Extra-trees should only be used within ensemble methods.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncriterion : {\"gini\", \"entropy\"}, default=\"gini\"\n The function to measure the quality of a split. Supported criteria are\n \"gini\" for the Gini impurity and \"entropy\" for the information gain.\n\nsplitter : {\"random\", \"best\"}, default=\"random\"\n The strategy used to choose the split at each node. Supported\n strategies are \"best\" to choose the best split and \"random\" to choose\n the best random split.\n\nmax_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_features : int, float, {\"auto\", \"sqrt\", \"log2\"} or None, default=\"auto\"\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=sqrt(n_features)`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nrandom_state : int, RandomState instance or None, default=None\n Used to pick randomly the `max_features` used at each split.\n See :term:`Glossary ` for details.\n\nmax_leaf_nodes : int, default=None\n Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nclass_weight : dict, list of dict or \"balanced\", default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If None, all classes are supposed to have weight one. For\n multi-output problems, a list of dicts can be provided in the same\n order as the columns of y.\n\n Note that for multioutput (including multilabel) weights should be\n defined for each class of every column in its own dict. For example,\n for four-class multilabel classification weights should be\n [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n [{1:1}, {2:5}, {3:1}, {4:1}].\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n For multi-output, the weights of each column of y will be multiplied.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,) or list of ndarray\n The classes labels (single output problem),\n or a list of arrays of class labels (multi-output problem).\n\nmax_features_ : int\n The inferred value of max_features.\n\nn_classes_ : int or list of int\n The number of classes (for single output problems),\n or a list containing the number of classes for each\n output (for multi-output problems).\n\nfeature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nn_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\ntree_ : Tree instance\n The underlying Tree object. Please refer to\n ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n for basic usage of these attributes.\n\nSee Also\n--------\nExtraTreeRegressor : An extremely randomized tree regressor.\nsklearn.ensemble.ExtraTreesClassifier : An extra-trees classifier.\nsklearn.ensemble.ExtraTreesRegressor : An extra-trees regressor.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.ensemble import BaggingClassifier\n>>> from sklearn.tree import ExtraTreeClassifier\n>>> X, y = load_iris(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, random_state=0)\n>>> extra_tree = ExtraTreeClassifier(random_state=0)\n>>> cls = BaggingClassifier(extra_tree, random_state=0).fit(\n... X_train, y_train)\n>>> cls.score(X_test, y_test)\n0.8947...", + "code": "class ExtraTreeClassifier(DecisionTreeClassifier):\n \"\"\"An extremely randomized tree classifier.\n\n Extra-trees differ from classic decision trees in the way they are built.\n When looking for the best split to separate the samples of a node into two\n groups, random splits are drawn for each of the `max_features` randomly\n selected features and the best split among those is chosen. When\n `max_features` is set 1, this amounts to building a totally random\n decision tree.\n\n Warning: Extra-trees should only be used within ensemble methods.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n criterion : {\"gini\", \"entropy\"}, default=\"gini\"\n The function to measure the quality of a split. Supported criteria are\n \"gini\" for the Gini impurity and \"entropy\" for the information gain.\n\n splitter : {\"random\", \"best\"}, default=\"random\"\n The strategy used to choose the split at each node. Supported\n strategies are \"best\" to choose the best split and \"random\" to choose\n the best random split.\n\n max_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\n min_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\n min_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\n min_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\n max_features : int, float, {\"auto\", \"sqrt\", \"log2\"} or None, default=\"auto\"\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=sqrt(n_features)`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\n random_state : int, RandomState instance or None, default=None\n Used to pick randomly the `max_features` used at each split.\n See :term:`Glossary ` for details.\n\n max_leaf_nodes : int, default=None\n Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\n min_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\n min_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\n class_weight : dict, list of dict or \"balanced\", default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If None, all classes are supposed to have weight one. For\n multi-output problems, a list of dicts can be provided in the same\n order as the columns of y.\n\n Note that for multioutput (including multilabel) weights should be\n defined for each class of every column in its own dict. For example,\n for four-class multilabel classification weights should be\n [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n [{1:1}, {2:5}, {3:1}, {4:1}].\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n For multi-output, the weights of each column of y will be multiplied.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\n ccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\n Attributes\n ----------\n classes_ : ndarray of shape (n_classes,) or list of ndarray\n The classes labels (single output problem),\n or a list of arrays of class labels (multi-output problem).\n\n max_features_ : int\n The inferred value of max_features.\n\n n_classes_ : int or list of int\n The number of classes (for single output problems),\n or a list containing the number of classes for each\n output (for multi-output problems).\n\n feature_importances_ : ndarray of shape (n_features,)\n The impurity-based feature importances.\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n n_features_ : int\n The number of features when ``fit`` is performed.\n\n n_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\n tree_ : Tree instance\n The underlying Tree object. Please refer to\n ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n for basic usage of these attributes.\n\n See Also\n --------\n ExtraTreeRegressor : An extremely randomized tree regressor.\n sklearn.ensemble.ExtraTreesClassifier : An extra-trees classifier.\n sklearn.ensemble.ExtraTreesRegressor : An extra-trees regressor.\n\n Notes\n -----\n The default values for the parameters controlling the size of the trees\n (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n unpruned trees which can potentially be very large on some data sets. To\n reduce memory consumption, the complexity and size of the trees should be\n controlled by setting those parameter values.\n\n References\n ----------\n\n .. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n Machine Learning, 63(1), 3-42, 2006.\n\n Examples\n --------\n >>> from sklearn.datasets import load_iris\n >>> from sklearn.model_selection import train_test_split\n >>> from sklearn.ensemble import BaggingClassifier\n >>> from sklearn.tree import ExtraTreeClassifier\n >>> X, y = load_iris(return_X_y=True)\n >>> X_train, X_test, y_train, y_test = train_test_split(\n ... X, y, random_state=0)\n >>> extra_tree = ExtraTreeClassifier(random_state=0)\n >>> cls = BaggingClassifier(extra_tree, random_state=0).fit(\n ... X_train, y_train)\n >>> cls.score(X_test, y_test)\n 0.8947...\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *,\n criterion=\"gini\",\n splitter=\"random\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.,\n max_features=\"auto\",\n random_state=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.,\n min_impurity_split=None,\n class_weight=None,\n ccp_alpha=0.0):\n super().__init__(\n criterion=criterion,\n splitter=splitter,\n max_depth=max_depth,\n min_samples_split=min_samples_split,\n min_samples_leaf=min_samples_leaf,\n min_weight_fraction_leaf=min_weight_fraction_leaf,\n max_features=max_features,\n max_leaf_nodes=max_leaf_nodes,\n class_weight=class_weight,\n min_impurity_decrease=min_impurity_decrease,\n min_impurity_split=min_impurity_split,\n random_state=random_state,\n ccp_alpha=ccp_alpha)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeRegressor", + "name": "ExtraTreeRegressor", + "qname": "sklearn.tree._classes.ExtraTreeRegressor", + "decorators": [], + "superclasses": ["DecisionTreeRegressor"], + "methods": ["scikit-learn/sklearn.tree._classes/ExtraTreeRegressor/__init__"], + "is_public": false, + "reexported_by": [], + "description": "An extremely randomized tree regressor.\n\nExtra-trees differ from classic decision trees in the way they are built.\nWhen looking for the best split to separate the samples of a node into two\ngroups, random splits are drawn for each of the `max_features` randomly\nselected features and the best split among those is chosen. When\n`max_features` is set 1, this amounts to building a totally random\ndecision tree.\n\nWarning: Extra-trees should only be used within ensemble methods.\n\nRead more in the :ref:`User Guide `.", + "docstring": "An extremely randomized tree regressor.\n\nExtra-trees differ from classic decision trees in the way they are built.\nWhen looking for the best split to separate the samples of a node into two\ngroups, random splits are drawn for each of the `max_features` randomly\nselected features and the best split among those is chosen. When\n`max_features` is set 1, this amounts to building a totally random\ndecision tree.\n\nWarning: Extra-trees should only be used within ensemble methods.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncriterion : {\"mse\", \"friedman_mse\", \"mae\"}, default=\"mse\"\n The function to measure the quality of a split. Supported criteria\n are \"mse\" for the mean squared error, which is equal to variance\n reduction as feature selection criterion and \"mae\" for the mean\n absolute error.\n\n .. versionadded:: 0.18\n Mean Absolute Error (MAE) criterion.\n\n .. versionadded:: 0.24\n Poisson deviance criterion.\n\nsplitter : {\"random\", \"best\"}, default=\"random\"\n The strategy used to choose the split at each node. Supported\n strategies are \"best\" to choose the best split and \"random\" to choose\n the best random split.\n\nmax_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\nmin_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\nmin_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\nmax_features : int, float, {\"auto\", \"sqrt\", \"log2\"} or None, default=\"auto\"\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=n_features`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\nrandom_state : int, RandomState instance or None, default=None\n Used to pick randomly the `max_features` used at each split.\n See :term:`Glossary ` for details.\n\nmin_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\nmin_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\nmax_leaf_nodes : int, default=None\n Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\nccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\nmax_features_ : int\n The inferred value of max_features.\n\nn_features_ : int\n The number of features when ``fit`` is performed.\n\nfeature_importances_ : ndarray of shape (n_features,)\n Return impurity-based feature importances (the higher, the more\n important the feature).\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\nn_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\ntree_ : Tree instance\n The underlying Tree object. Please refer to\n ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n for basic usage of these attributes.\n\nSee Also\n--------\nExtraTreeClassifier : An extremely randomized tree classifier.\nsklearn.ensemble.ExtraTreesClassifier : An extra-trees classifier.\nsklearn.ensemble.ExtraTreesRegressor : An extra-trees regressor.\n\nNotes\n-----\nThe default values for the parameters controlling the size of the trees\n(e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\nunpruned trees which can potentially be very large on some data sets. To\nreduce memory consumption, the complexity and size of the trees should be\ncontrolled by setting those parameter values.\n\nReferences\n----------\n\n.. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n Machine Learning, 63(1), 3-42, 2006.\n\nExamples\n--------\n>>> from sklearn.datasets import load_diabetes\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.ensemble import BaggingRegressor\n>>> from sklearn.tree import ExtraTreeRegressor\n>>> X, y = load_diabetes(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, random_state=0)\n>>> extra_tree = ExtraTreeRegressor(random_state=0)\n>>> reg = BaggingRegressor(extra_tree, random_state=0).fit(\n... X_train, y_train)\n>>> reg.score(X_test, y_test)\n0.33...", + "code": "class ExtraTreeRegressor(DecisionTreeRegressor):\n \"\"\"An extremely randomized tree regressor.\n\n Extra-trees differ from classic decision trees in the way they are built.\n When looking for the best split to separate the samples of a node into two\n groups, random splits are drawn for each of the `max_features` randomly\n selected features and the best split among those is chosen. When\n `max_features` is set 1, this amounts to building a totally random\n decision tree.\n\n Warning: Extra-trees should only be used within ensemble methods.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n criterion : {\"mse\", \"friedman_mse\", \"mae\"}, default=\"mse\"\n The function to measure the quality of a split. Supported criteria\n are \"mse\" for the mean squared error, which is equal to variance\n reduction as feature selection criterion and \"mae\" for the mean\n absolute error.\n\n .. versionadded:: 0.18\n Mean Absolute Error (MAE) criterion.\n\n .. versionadded:: 0.24\n Poisson deviance criterion.\n\n splitter : {\"random\", \"best\"}, default=\"random\"\n The strategy used to choose the split at each node. Supported\n strategies are \"best\" to choose the best split and \"random\" to choose\n the best random split.\n\n max_depth : int, default=None\n The maximum depth of the tree. If None, then nodes are expanded until\n all leaves are pure or until all leaves contain less than\n min_samples_split samples.\n\n min_samples_split : int or float, default=2\n The minimum number of samples required to split an internal node:\n\n - If int, then consider `min_samples_split` as the minimum number.\n - If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\n min_samples_leaf : int or float, default=1\n The minimum number of samples required to be at a leaf node.\n A split point at any depth will only be considered if it leaves at\n least ``min_samples_leaf`` training samples in each of the left and\n right branches. This may have the effect of smoothing the model,\n especially in regression.\n\n - If int, then consider `min_samples_leaf` as the minimum number.\n - If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n .. versionchanged:: 0.18\n Added float values for fractions.\n\n min_weight_fraction_leaf : float, default=0.0\n The minimum weighted fraction of the sum total of weights (of all\n the input samples) required to be at a leaf node. Samples have\n equal weight when sample_weight is not provided.\n\n max_features : int, float, {\"auto\", \"sqrt\", \"log2\"} or None, default=\"auto\"\n The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=n_features`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\n Note: the search for a split does not stop until at least one\n valid partition of the node samples is found, even if it requires to\n effectively inspect more than ``max_features`` features.\n\n random_state : int, RandomState instance or None, default=None\n Used to pick randomly the `max_features` used at each split.\n See :term:`Glossary ` for details.\n\n min_impurity_decrease : float, default=0.0\n A node will be split if this split induces a decrease of the impurity\n greater than or equal to this value.\n\n The weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\n where ``N`` is the total number of samples, ``N_t`` is the number of\n samples at the current node, ``N_t_L`` is the number of samples in the\n left child, and ``N_t_R`` is the number of samples in the right child.\n\n ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n if ``sample_weight`` is passed.\n\n .. versionadded:: 0.19\n\n min_impurity_split : float, default=None\n Threshold for early stopping in tree growth. A node will split\n if its impurity is above the threshold, otherwise it is a leaf.\n\n .. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead.\n\n max_leaf_nodes : int, default=None\n Grow a tree with ``max_leaf_nodes`` in best-first fashion.\n Best nodes are defined as relative reduction in impurity.\n If None then unlimited number of leaf nodes.\n\n ccp_alpha : non-negative float, default=0.0\n Complexity parameter used for Minimal Cost-Complexity Pruning. The\n subtree with the largest cost complexity that is smaller than\n ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n :ref:`minimal_cost_complexity_pruning` for details.\n\n .. versionadded:: 0.22\n\n Attributes\n ----------\n max_features_ : int\n The inferred value of max_features.\n\n n_features_ : int\n The number of features when ``fit`` is performed.\n\n feature_importances_ : ndarray of shape (n_features,)\n Return impurity-based feature importances (the higher, the more\n important the feature).\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n n_outputs_ : int\n The number of outputs when ``fit`` is performed.\n\n tree_ : Tree instance\n The underlying Tree object. Please refer to\n ``help(sklearn.tree._tree.Tree)`` for attributes of Tree object and\n :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py`\n for basic usage of these attributes.\n\n See Also\n --------\n ExtraTreeClassifier : An extremely randomized tree classifier.\n sklearn.ensemble.ExtraTreesClassifier : An extra-trees classifier.\n sklearn.ensemble.ExtraTreesRegressor : An extra-trees regressor.\n\n Notes\n -----\n The default values for the parameters controlling the size of the trees\n (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n unpruned trees which can potentially be very large on some data sets. To\n reduce memory consumption, the complexity and size of the trees should be\n controlled by setting those parameter values.\n\n References\n ----------\n\n .. [1] P. Geurts, D. Ernst., and L. Wehenkel, \"Extremely randomized trees\",\n Machine Learning, 63(1), 3-42, 2006.\n\n Examples\n --------\n >>> from sklearn.datasets import load_diabetes\n >>> from sklearn.model_selection import train_test_split\n >>> from sklearn.ensemble import BaggingRegressor\n >>> from sklearn.tree import ExtraTreeRegressor\n >>> X, y = load_diabetes(return_X_y=True)\n >>> X_train, X_test, y_train, y_test = train_test_split(\n ... X, y, random_state=0)\n >>> extra_tree = ExtraTreeRegressor(random_state=0)\n >>> reg = BaggingRegressor(extra_tree, random_state=0).fit(\n ... X_train, y_train)\n >>> reg.score(X_test, y_test)\n 0.33...\n \"\"\"\n @_deprecate_positional_args\n def __init__(self, *,\n criterion=\"mse\",\n splitter=\"random\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.,\n max_features=\"auto\",\n random_state=None,\n min_impurity_decrease=0.,\n min_impurity_split=None,\n max_leaf_nodes=None,\n ccp_alpha=0.0):\n super().__init__(\n criterion=criterion,\n splitter=splitter,\n max_depth=max_depth,\n min_samples_split=min_samples_split,\n min_samples_leaf=min_samples_leaf,\n min_weight_fraction_leaf=min_weight_fraction_leaf,\n max_features=max_features,\n max_leaf_nodes=max_leaf_nodes,\n min_impurity_decrease=min_impurity_decrease,\n min_impurity_split=min_impurity_split,\n random_state=random_state,\n ccp_alpha=ccp_alpha)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.tree._export/Sentinel", + "name": "Sentinel", + "qname": "sklearn.tree._export.Sentinel", + "decorators": [], + "superclasses": [], + "methods": ["scikit-learn/sklearn.tree._export/Sentinel/__repr__"], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class Sentinel:\n def __repr__(self):\n return '\"tree.dot\"'", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter", + "name": "_BaseTreeExporter", + "qname": "sklearn.tree._export._BaseTreeExporter", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.tree._export/_BaseTreeExporter/__init__", + "scikit-learn/sklearn.tree._export/_BaseTreeExporter/get_color", + "scikit-learn/sklearn.tree._export/_BaseTreeExporter/get_fill_color", + "scikit-learn/sklearn.tree._export/_BaseTreeExporter/node_to_str" + ], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class _BaseTreeExporter:\n def __init__(self, max_depth=None, feature_names=None,\n class_names=None, label='all', filled=False,\n impurity=True, node_ids=False,\n proportion=False, rotate=False, rounded=False,\n precision=3, fontsize=None):\n self.max_depth = max_depth\n self.feature_names = feature_names\n self.class_names = class_names\n self.label = label\n self.filled = filled\n self.impurity = impurity\n self.node_ids = node_ids\n self.proportion = proportion\n self.rotate = rotate\n self.rounded = rounded\n self.precision = precision\n self.fontsize = fontsize\n\n def get_color(self, value):\n # Find the appropriate color & intensity for a node\n if self.colors['bounds'] is None:\n # Classification tree\n color = list(self.colors['rgb'][np.argmax(value)])\n sorted_values = sorted(value, reverse=True)\n if len(sorted_values) == 1:\n alpha = 0\n else:\n alpha = ((sorted_values[0] - sorted_values[1])\n / (1 - sorted_values[1]))\n else:\n # Regression tree or multi-output\n color = list(self.colors['rgb'][0])\n alpha = ((value - self.colors['bounds'][0]) /\n (self.colors['bounds'][1] - self.colors['bounds'][0]))\n # unpack numpy scalars\n alpha = float(alpha)\n # compute the color as alpha against white\n color = [int(round(alpha * c + (1 - alpha) * 255, 0)) for c in color]\n # Return html color code in #RRGGBB format\n return '#%2x%2x%2x' % tuple(color)\n\n def get_fill_color(self, tree, node_id):\n # Fetch appropriate color for node\n if 'rgb' not in self.colors:\n # Initialize colors and bounds if required\n self.colors['rgb'] = _color_brew(tree.n_classes[0])\n if tree.n_outputs != 1:\n # Find max and min impurities for multi-output\n self.colors['bounds'] = (np.min(-tree.impurity),\n np.max(-tree.impurity))\n elif (tree.n_classes[0] == 1 and\n len(np.unique(tree.value)) != 1):\n # Find max and min values in leaf nodes for regression\n self.colors['bounds'] = (np.min(tree.value),\n np.max(tree.value))\n if tree.n_outputs == 1:\n node_val = (tree.value[node_id][0, :] /\n tree.weighted_n_node_samples[node_id])\n if tree.n_classes[0] == 1:\n # Regression\n node_val = tree.value[node_id][0, :]\n else:\n # If multi-output color node by impurity\n node_val = -tree.impurity[node_id]\n return self.get_color(node_val)\n\n def node_to_str(self, tree, node_id, criterion):\n # Generate the node content string\n if tree.n_outputs == 1:\n value = tree.value[node_id][0, :]\n else:\n value = tree.value[node_id]\n\n # Should labels be shown?\n labels = (self.label == 'root' and node_id == 0) or self.label == 'all'\n\n characters = self.characters\n node_string = characters[-1]\n\n # Write node ID\n if self.node_ids:\n if labels:\n node_string += 'node '\n node_string += characters[0] + str(node_id) + characters[4]\n\n # Write decision criteria\n if tree.children_left[node_id] != _tree.TREE_LEAF:\n # Always write node decision criteria, except for leaves\n if self.feature_names is not None:\n feature = self.feature_names[tree.feature[node_id]]\n else:\n feature = \"X%s%s%s\" % (characters[1],\n tree.feature[node_id],\n characters[2])\n node_string += '%s %s %s%s' % (feature,\n characters[3],\n round(tree.threshold[node_id],\n self.precision),\n characters[4])\n\n # Write impurity\n if self.impurity:\n if isinstance(criterion, _criterion.FriedmanMSE):\n criterion = \"friedman_mse\"\n elif not isinstance(criterion, str):\n criterion = \"impurity\"\n if labels:\n node_string += '%s = ' % criterion\n node_string += (str(round(tree.impurity[node_id], self.precision))\n + characters[4])\n\n # Write node sample count\n if labels:\n node_string += 'samples = '\n if self.proportion:\n percent = (100. * tree.n_node_samples[node_id] /\n float(tree.n_node_samples[0]))\n node_string += (str(round(percent, 1)) + '%' +\n characters[4])\n else:\n node_string += (str(tree.n_node_samples[node_id]) +\n characters[4])\n\n # Write node class distribution / regression value\n if self.proportion and tree.n_classes[0] != 1:\n # For classification this will show the proportion of samples\n value = value / tree.weighted_n_node_samples[node_id]\n if labels:\n node_string += 'value = '\n if tree.n_classes[0] == 1:\n # Regression\n value_text = np.around(value, self.precision)\n elif self.proportion:\n # Classification\n value_text = np.around(value, self.precision)\n elif np.all(np.equal(np.mod(value, 1), 0)):\n # Classification without floating-point weights\n value_text = value.astype(int)\n else:\n # Classification with floating-point weights\n value_text = np.around(value, self.precision)\n # Strip whitespace\n value_text = str(value_text.astype('S32')).replace(\"b'\", \"'\")\n value_text = value_text.replace(\"' '\", \", \").replace(\"'\", \"\")\n if tree.n_classes[0] == 1 and tree.n_outputs == 1:\n value_text = value_text.replace(\"[\", \"\").replace(\"]\", \"\")\n value_text = value_text.replace(\"\\n \", characters[4])\n node_string += value_text + characters[4]\n\n # Write node majority class\n if (self.class_names is not None and\n tree.n_classes[0] != 1 and\n tree.n_outputs == 1):\n # Only done for single-output classification trees\n if labels:\n node_string += 'class = '\n if self.class_names is not True:\n class_name = self.class_names[np.argmax(value)]\n else:\n class_name = \"y%s%s%s\" % (characters[1],\n np.argmax(value),\n characters[2])\n node_string += class_name\n\n # Clean up any trailing newlines\n if node_string.endswith(characters[4]):\n node_string = node_string[:-len(characters[4])]\n\n return node_string + characters[5]", + "instance_attributes": [ + { + "name": "label", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "filled", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "impurity", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "node_ids", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "proportion", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "rotate", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "rounded", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "precision", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter", + "name": "_DOTTreeExporter", + "qname": "sklearn.tree._export._DOTTreeExporter", + "decorators": [], + "superclasses": ["_BaseTreeExporter"], + "methods": [ + "scikit-learn/sklearn.tree._export/_DOTTreeExporter/__init__", + "scikit-learn/sklearn.tree._export/_DOTTreeExporter/export", + "scikit-learn/sklearn.tree._export/_DOTTreeExporter/tail", + "scikit-learn/sklearn.tree._export/_DOTTreeExporter/head", + "scikit-learn/sklearn.tree._export/_DOTTreeExporter/recurse" + ], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class _DOTTreeExporter(_BaseTreeExporter):\n def __init__(self, out_file=SENTINEL, max_depth=None,\n feature_names=None, class_names=None, label='all',\n filled=False, leaves_parallel=False, impurity=True,\n node_ids=False, proportion=False, rotate=False, rounded=False,\n special_characters=False, precision=3):\n\n super().__init__(\n max_depth=max_depth, feature_names=feature_names,\n class_names=class_names, label=label, filled=filled,\n impurity=impurity,\n node_ids=node_ids, proportion=proportion, rotate=rotate,\n rounded=rounded,\n precision=precision)\n self.leaves_parallel = leaves_parallel\n self.out_file = out_file\n self.special_characters = special_characters\n\n # PostScript compatibility for special characters\n if special_characters:\n self.characters = ['#', '', '', '≤', '
',\n '>', '<']\n else:\n self.characters = ['#', '[', ']', '<=', '\\\\n', '\"', '\"']\n\n # validate\n if isinstance(precision, Integral):\n if precision < 0:\n raise ValueError(\"'precision' should be greater or equal to 0.\"\n \" Got {} instead.\".format(precision))\n else:\n raise ValueError(\"'precision' should be an integer. Got {}\"\n \" instead.\".format(type(precision)))\n\n # The depth of each node for plotting with 'leaf' option\n self.ranks = {'leaves': []}\n # The colors to render each node with\n self.colors = {'bounds': None}\n\n def export(self, decision_tree):\n # Check length of feature_names before getting into the tree node\n # Raise error if length of feature_names does not match\n # n_features_ in the decision_tree\n if self.feature_names is not None:\n if len(self.feature_names) != decision_tree.n_features_:\n raise ValueError(\"Length of feature_names, %d \"\n \"does not match number of features, %d\"\n % (len(self.feature_names),\n decision_tree.n_features_))\n # each part writes to out_file\n self.head()\n # Now recurse the tree and add node & edge attributes\n if isinstance(decision_tree, _tree.Tree):\n self.recurse(decision_tree, 0, criterion=\"impurity\")\n else:\n self.recurse(decision_tree.tree_, 0,\n criterion=decision_tree.criterion)\n\n self.tail()\n\n def tail(self):\n # If required, draw leaf nodes at same depth as each other\n if self.leaves_parallel:\n for rank in sorted(self.ranks):\n self.out_file.write(\n \"{rank=same ; \" +\n \"; \".join(r for r in self.ranks[rank]) + \"} ;\\n\")\n self.out_file.write(\"}\")\n\n def head(self):\n self.out_file.write('digraph Tree {\\n')\n\n # Specify node aesthetics\n self.out_file.write('node [shape=box')\n rounded_filled = []\n if self.filled:\n rounded_filled.append('filled')\n if self.rounded:\n rounded_filled.append('rounded')\n if len(rounded_filled) > 0:\n self.out_file.write(\n ', style=\"%s\", color=\"black\"'\n % \", \".join(rounded_filled))\n if self.rounded:\n self.out_file.write(', fontname=helvetica')\n self.out_file.write('] ;\\n')\n\n # Specify graph & edge aesthetics\n if self.leaves_parallel:\n self.out_file.write(\n 'graph [ranksep=equally, splines=polyline] ;\\n')\n if self.rounded:\n self.out_file.write('edge [fontname=helvetica] ;\\n')\n if self.rotate:\n self.out_file.write('rankdir=LR ;\\n')\n\n def recurse(self, tree, node_id, criterion, parent=None, depth=0):\n if node_id == _tree.TREE_LEAF:\n raise ValueError(\"Invalid node_id %s\" % _tree.TREE_LEAF)\n\n left_child = tree.children_left[node_id]\n right_child = tree.children_right[node_id]\n\n # Add node with description\n if self.max_depth is None or depth <= self.max_depth:\n\n # Collect ranks for 'leaf' option in plot_options\n if left_child == _tree.TREE_LEAF:\n self.ranks['leaves'].append(str(node_id))\n elif str(depth) not in self.ranks:\n self.ranks[str(depth)] = [str(node_id)]\n else:\n self.ranks[str(depth)].append(str(node_id))\n\n self.out_file.write(\n '%d [label=%s' % (node_id, self.node_to_str(tree, node_id,\n criterion)))\n\n if self.filled:\n self.out_file.write(', fillcolor=\"%s\"'\n % self.get_fill_color(tree, node_id))\n self.out_file.write('] ;\\n')\n\n if parent is not None:\n # Add edge to parent\n self.out_file.write('%d -> %d' % (parent, node_id))\n if parent == 0:\n # Draw True/False labels if parent is root node\n angles = np.array([45, -45]) * ((self.rotate - .5) * -2)\n self.out_file.write(' [labeldistance=2.5, labelangle=')\n if node_id == 1:\n self.out_file.write('%d, headlabel=\"True\"]' %\n angles[0])\n else:\n self.out_file.write('%d, headlabel=\"False\"]' %\n angles[1])\n self.out_file.write(' ;\\n')\n\n if left_child != _tree.TREE_LEAF:\n self.recurse(tree, left_child, criterion=criterion,\n parent=node_id, depth=depth + 1)\n self.recurse(tree, right_child, criterion=criterion,\n parent=node_id, depth=depth + 1)\n\n else:\n self.ranks['leaves'].append(str(node_id))\n\n self.out_file.write('%d [label=\"(...)\"' % node_id)\n if self.filled:\n # color cropped nodes grey\n self.out_file.write(', fillcolor=\"#C0C0C0\"')\n self.out_file.write('] ;\\n' % node_id)\n\n if parent is not None:\n # Add edge to parent\n self.out_file.write('%d -> %d ;\\n' % (parent, node_id))", + "instance_attributes": [ + { + "name": "leaves_parallel", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "out_file", + "types": { + "kind": "NamedType", + "name": "Sentinel" + } + }, + { + "name": "special_characters", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "characters", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "ranks", + "types": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "name": "colors", + "types": { + "kind": "NamedType", + "name": "dict" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter", + "name": "_MPLTreeExporter", + "qname": "sklearn.tree._export._MPLTreeExporter", + "decorators": [], + "superclasses": ["_BaseTreeExporter"], + "methods": [ + "scikit-learn/sklearn.tree._export/_MPLTreeExporter/__init__", + "scikit-learn/sklearn.tree._export/_MPLTreeExporter/_make_tree", + "scikit-learn/sklearn.tree._export/_MPLTreeExporter/export", + "scikit-learn/sklearn.tree._export/_MPLTreeExporter/recurse" + ], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class _MPLTreeExporter(_BaseTreeExporter):\n def __init__(self, max_depth=None, feature_names=None,\n class_names=None, label='all', filled=False,\n impurity=True, node_ids=False,\n proportion=False, rotate=False, rounded=False,\n precision=3, fontsize=None):\n\n super().__init__(\n max_depth=max_depth, feature_names=feature_names,\n class_names=class_names, label=label, filled=filled,\n impurity=impurity, node_ids=node_ids, proportion=proportion,\n rotate=rotate, rounded=rounded, precision=precision)\n self.fontsize = fontsize\n\n # validate\n if isinstance(precision, Integral):\n if precision < 0:\n raise ValueError(\"'precision' should be greater or equal to 0.\"\n \" Got {} instead.\".format(precision))\n else:\n raise ValueError(\"'precision' should be an integer. Got {}\"\n \" instead.\".format(type(precision)))\n\n # The depth of each node for plotting with 'leaf' option\n self.ranks = {'leaves': []}\n # The colors to render each node with\n self.colors = {'bounds': None}\n\n self.characters = ['#', '[', ']', '<=', '\\n', '', '']\n self.bbox_args = dict()\n if self.rounded:\n self.bbox_args['boxstyle'] = \"round\"\n\n self.arrow_args = dict(arrowstyle=\"<-\")\n\n def _make_tree(self, node_id, et, criterion, depth=0):\n # traverses _tree.Tree recursively, builds intermediate\n # \"_reingold_tilford.Tree\" object\n name = self.node_to_str(et, node_id, criterion=criterion)\n if (et.children_left[node_id] != _tree.TREE_LEAF\n and (self.max_depth is None or depth <= self.max_depth)):\n children = [self._make_tree(et.children_left[node_id], et,\n criterion, depth=depth + 1),\n self._make_tree(et.children_right[node_id], et,\n criterion, depth=depth + 1)]\n else:\n return Tree(name, node_id)\n return Tree(name, node_id, *children)\n\n def export(self, decision_tree, ax=None):\n import matplotlib.pyplot as plt\n from matplotlib.text import Annotation\n\n if ax is None:\n ax = plt.gca()\n ax.clear()\n ax.set_axis_off()\n my_tree = self._make_tree(0, decision_tree.tree_,\n decision_tree.criterion)\n draw_tree = buchheim(my_tree)\n\n # important to make sure we're still\n # inside the axis after drawing the box\n # this makes sense because the width of a box\n # is about the same as the distance between boxes\n max_x, max_y = draw_tree.max_extents() + 1\n ax_width = ax.get_window_extent().width\n ax_height = ax.get_window_extent().height\n\n scale_x = ax_width / max_x\n scale_y = ax_height / max_y\n\n self.recurse(draw_tree, decision_tree.tree_, ax,\n scale_x, scale_y, ax_height)\n\n anns = [ann for ann in ax.get_children()\n if isinstance(ann, Annotation)]\n\n # update sizes of all bboxes\n renderer = ax.figure.canvas.get_renderer()\n\n for ann in anns:\n ann.update_bbox_position_size(renderer)\n\n if self.fontsize is None:\n # get figure to data transform\n # adjust fontsize to avoid overlap\n # get max box width and height\n extents = [ann.get_bbox_patch().get_window_extent()\n for ann in anns]\n max_width = max([extent.width for extent in extents])\n max_height = max([extent.height for extent in extents])\n # width should be around scale_x in axis coordinates\n size = anns[0].get_fontsize() * min(scale_x / max_width,\n scale_y / max_height)\n for ann in anns:\n ann.set_fontsize(size)\n\n return anns\n\n def recurse(self, node, tree, ax, scale_x, scale_y, height, depth=0):\n import matplotlib.pyplot as plt\n kwargs = dict(bbox=self.bbox_args.copy(), ha='center', va='center',\n zorder=100 - 10 * depth, xycoords='axes pixels',\n arrowprops=self.arrow_args.copy())\n kwargs['arrowprops']['edgecolor'] = plt.rcParams['text.color']\n\n if self.fontsize is not None:\n kwargs['fontsize'] = self.fontsize\n\n # offset things by .5 to center them in plot\n xy = ((node.x + .5) * scale_x, height - (node.y + .5) * scale_y)\n\n if self.max_depth is None or depth <= self.max_depth:\n if self.filled:\n kwargs['bbox']['fc'] = self.get_fill_color(tree,\n node.tree.node_id)\n else:\n kwargs['bbox']['fc'] = ax.get_facecolor()\n\n if node.parent is None:\n # root\n ax.annotate(node.tree.label, xy, **kwargs)\n else:\n xy_parent = ((node.parent.x + .5) * scale_x,\n height - (node.parent.y + .5) * scale_y)\n ax.annotate(node.tree.label, xy_parent, xy, **kwargs)\n for child in node.children:\n self.recurse(child, tree, ax, scale_x, scale_y, height,\n depth=depth + 1)\n\n else:\n xy_parent = ((node.parent.x + .5) * scale_x,\n height - (node.parent.y + .5) * scale_y)\n kwargs['bbox']['fc'] = 'grey'\n ax.annotate(\"\\n (...) \\n\", xy_parent, xy, **kwargs)", + "instance_attributes": [ + { + "name": "ranks", + "types": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "name": "colors", + "types": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "name": "characters", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "bbox_args", + "types": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "name": "arrow_args", + "types": { + "kind": "NamedType", + "name": "dict" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/DrawTree", + "name": "DrawTree", + "qname": "sklearn.tree._reingold_tilford.DrawTree", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/__init__", + "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/left", + "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/right", + "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/lbrother", + "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/get_lmost_sibling", + "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/__str__", + "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/__repr__", + "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/max_extents" + ], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class DrawTree:\n def __init__(self, tree, parent=None, depth=0, number=1):\n self.x = -1.\n self.y = depth\n self.tree = tree\n self.children = [DrawTree(c, self, depth + 1, i + 1)\n for i, c\n in enumerate(tree.children)]\n self.parent = parent\n self.thread = None\n self.mod = 0\n self.ancestor = self\n self.change = self.shift = 0\n self._lmost_sibling = None\n # this is the number of the node in its group of siblings 1..n\n self.number = number\n\n def left(self):\n return self.thread or len(self.children) and self.children[0]\n\n def right(self):\n return self.thread or len(self.children) and self.children[-1]\n\n def lbrother(self):\n n = None\n if self.parent:\n for node in self.parent.children:\n if node == self:\n return n\n else:\n n = node\n return n\n\n def get_lmost_sibling(self):\n if not self._lmost_sibling and self.parent and self != \\\n self.parent.children[0]:\n self._lmost_sibling = self.parent.children[0]\n return self._lmost_sibling\n lmost_sibling = property(get_lmost_sibling)\n\n def __str__(self):\n return \"%s: x=%s mod=%s\" % (self.tree, self.x, self.mod)\n\n def __repr__(self):\n return self.__str__()\n\n def max_extents(self):\n extents = [c.max_extents() for c in self. children]\n extents.append((self.x, self.y))\n return np.max(extents, axis=0)", + "instance_attributes": [ + { + "name": "x", + "types": { + "kind": "NamedType", + "name": "float" + } + }, + { + "name": "y", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "mod", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "ancestor", + "types": { + "kind": "NamedType", + "name": "DrawTree" + } + }, + { + "name": "change", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "shift", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "number", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/Tree", + "name": "Tree", + "qname": "sklearn.tree._reingold_tilford.Tree", + "decorators": [], + "superclasses": [], + "methods": ["scikit-learn/sklearn.tree._reingold_tilford/Tree/__init__"], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class Tree:\n def __init__(self, label=\"\", node_id=-1, *children):\n self.label = label\n self.node_id = node_id\n if children:\n self.children = children\n else:\n self.children = []", + "instance_attributes": [ + { + "name": "label", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "node_id", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "children", + "types": { + "kind": "NamedType", + "name": "tuple" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.utils._encode/MissingValues", + "name": "MissingValues", + "qname": "sklearn.utils._encode.MissingValues", + "decorators": [], + "superclasses": ["NamedTuple"], + "methods": ["scikit-learn/sklearn.utils._encode/MissingValues/to_list"], + "is_public": false, + "reexported_by": [], + "description": "Data class for missing data information", + "docstring": "Data class for missing data information", + "code": "class MissingValues(NamedTuple):\n \"\"\"Data class for missing data information\"\"\"\n nan: bool\n none: bool\n\n def to_list(self):\n \"\"\"Convert tuple to a list where None is always first.\"\"\"\n output = []\n if self.none:\n output.append(None)\n if self.nan:\n output.append(np.nan)\n return output", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.utils._encode/_nandict", + "name": "_nandict", + "qname": "sklearn.utils._encode._nandict", + "decorators": [], + "superclasses": ["dict"], + "methods": [ + "scikit-learn/sklearn.utils._encode/_nandict/__init__", + "scikit-learn/sklearn.utils._encode/_nandict/__missing__" + ], + "is_public": false, + "reexported_by": [], + "description": "Dictionary with support for nans.", + "docstring": "Dictionary with support for nans.", + "code": "class _nandict(dict):\n \"\"\"Dictionary with support for nans.\"\"\"\n def __init__(self, mapping):\n super().__init__(mapping)\n for key, value in mapping.items():\n if is_scalar_nan(key):\n self.nan_value = value\n break\n\n def __missing__(self, key):\n if hasattr(self, 'nan_value') and is_scalar_nan(key):\n return self.nan_value\n raise KeyError(key)", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_VisualBlock", + "name": "_VisualBlock", + "qname": "sklearn.utils._estimator_html_repr._VisualBlock", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.utils._estimator_html_repr/_VisualBlock/__init__", + "scikit-learn/sklearn.utils._estimator_html_repr/_VisualBlock/_sk_visual_block_" + ], + "is_public": false, + "reexported_by": [], + "description": "HTML Representation of Estimator", + "docstring": "HTML Representation of Estimator\n\nParameters\n----------\nkind : {'serial', 'parallel', 'single'}\n kind of HTML block\n\nestimators : list of estimators or `_VisualBlock`s or a single estimator\n If kind != 'single', then `estimators` is a list of\n estimators.\n If kind == 'single', then `estimators` is a single estimator.\n\nnames : list of str, default=None\n If kind != 'single', then `names` corresponds to estimators.\n If kind == 'single', then `names` is a single string corresponding to\n the single estimator.\n\nname_details : list of str, str, or None, default=None\n If kind != 'single', then `name_details` corresponds to `names`.\n If kind == 'single', then `name_details` is a single string\n corresponding to the single estimator.\n\ndash_wrapped : bool, default=True\n If true, wrapped HTML element will be wrapped with a dashed border.\n Only active when kind != 'single'.", + "code": "class _VisualBlock:\n \"\"\"HTML Representation of Estimator\n\n Parameters\n ----------\n kind : {'serial', 'parallel', 'single'}\n kind of HTML block\n\n estimators : list of estimators or `_VisualBlock`s or a single estimator\n If kind != 'single', then `estimators` is a list of\n estimators.\n If kind == 'single', then `estimators` is a single estimator.\n\n names : list of str, default=None\n If kind != 'single', then `names` corresponds to estimators.\n If kind == 'single', then `names` is a single string corresponding to\n the single estimator.\n\n name_details : list of str, str, or None, default=None\n If kind != 'single', then `name_details` corresponds to `names`.\n If kind == 'single', then `name_details` is a single string\n corresponding to the single estimator.\n\n dash_wrapped : bool, default=True\n If true, wrapped HTML element will be wrapped with a dashed border.\n Only active when kind != 'single'.\n \"\"\"\n def __init__(self, kind, estimators, *, names=None, name_details=None,\n dash_wrapped=True):\n self.kind = kind\n self.estimators = estimators\n self.dash_wrapped = dash_wrapped\n\n if self.kind in ('parallel', 'serial'):\n if names is None:\n names = (None, ) * len(estimators)\n if name_details is None:\n name_details = (None, ) * len(estimators)\n\n self.names = names\n self.name_details = name_details\n\n def _sk_visual_block_(self):\n return self", + "instance_attributes": [ + { + "name": "dash_wrapped", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.utils._mocking/ArraySlicingWrapper", + "name": "ArraySlicingWrapper", + "qname": "sklearn.utils._mocking.ArraySlicingWrapper", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.utils._mocking/ArraySlicingWrapper/__init__", + "scikit-learn/sklearn.utils._mocking/ArraySlicingWrapper/__getitem__" + ], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "Parameters\n----------\narray", + "code": "class ArraySlicingWrapper:\n \"\"\"\n Parameters\n ----------\n array\n \"\"\"\n\n def __init__(self, array):\n self.array = array\n\n def __getitem__(self, aslice):\n return MockDataFrame(self.array[aslice])", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier", + "name": "CheckingClassifier", + "qname": "sklearn.utils._mocking.CheckingClassifier", + "decorators": [], + "superclasses": ["ClassifierMixin", "BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.utils._mocking/CheckingClassifier/__init__", + "scikit-learn/sklearn.utils._mocking/CheckingClassifier/_check_X_y", + "scikit-learn/sklearn.utils._mocking/CheckingClassifier/fit", + "scikit-learn/sklearn.utils._mocking/CheckingClassifier/predict", + "scikit-learn/sklearn.utils._mocking/CheckingClassifier/predict_proba", + "scikit-learn/sklearn.utils._mocking/CheckingClassifier/decision_function", + "scikit-learn/sklearn.utils._mocking/CheckingClassifier/score", + "scikit-learn/sklearn.utils._mocking/CheckingClassifier/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Dummy classifier to test pipelining and meta-estimators.\n\nChecks some property of `X` and `y`in fit / predict.\nThis allows testing whether pipelines / cross-validation or metaestimators\nchanged the input.\n\nCan also be used to check if `fit_params` are passed correctly, and\nto force a certain score to be returned.", + "docstring": "Dummy classifier to test pipelining and meta-estimators.\n\nChecks some property of `X` and `y`in fit / predict.\nThis allows testing whether pipelines / cross-validation or metaestimators\nchanged the input.\n\nCan also be used to check if `fit_params` are passed correctly, and\nto force a certain score to be returned.\n\nParameters\n----------\ncheck_y, check_X : callable, default=None\n The callable used to validate `X` and `y`. These callable should return\n a bool where `False` will trigger an `AssertionError`.\n\ncheck_y_params, check_X_params : dict, default=None\n The optional parameters to pass to `check_X` and `check_y`.\n\nmethods_to_check : \"all\" or list of str, default=\"all\"\n The methods in which the checks should be applied. By default,\n all checks will be done on all methods (`fit`, `predict`,\n `predict_proba`, `decision_function` and `score`).\n\nfoo_param : int, default=0\n A `foo` param. When `foo > 1`, the output of :meth:`score` will be 1\n otherwise it is 0.\n\nexpected_fit_params : list of str, default=None\n A list of the expected parameters given when calling `fit`.\n\nAttributes\n----------\nclasses_ : int\n The classes seen during `fit`.\n\nn_features_in_ : int\n The number of features seen during `fit`.\n\nExamples\n--------\n>>> from sklearn.utils._mocking import CheckingClassifier\n\nThis helper allow to assert to specificities regarding `X` or `y`. In this\ncase we expect `check_X` or `check_y` to return a boolean.\n\n>>> from sklearn.datasets import load_iris\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = CheckingClassifier(check_X=lambda x: x.shape == (150, 4))\n>>> clf.fit(X, y)\nCheckingClassifier(...)\n\nWe can also provide a check which might raise an error. In this case, we\nexpect `check_X` to return `X` and `check_y` to return `y`.\n\n>>> from sklearn.utils import check_array\n>>> clf = CheckingClassifier(check_X=check_array)\n>>> clf.fit(X, y)\nCheckingClassifier(...)", + "code": "class CheckingClassifier(ClassifierMixin, BaseEstimator):\n \"\"\"Dummy classifier to test pipelining and meta-estimators.\n\n Checks some property of `X` and `y`in fit / predict.\n This allows testing whether pipelines / cross-validation or metaestimators\n changed the input.\n\n Can also be used to check if `fit_params` are passed correctly, and\n to force a certain score to be returned.\n\n Parameters\n ----------\n check_y, check_X : callable, default=None\n The callable used to validate `X` and `y`. These callable should return\n a bool where `False` will trigger an `AssertionError`.\n\n check_y_params, check_X_params : dict, default=None\n The optional parameters to pass to `check_X` and `check_y`.\n\n methods_to_check : \"all\" or list of str, default=\"all\"\n The methods in which the checks should be applied. By default,\n all checks will be done on all methods (`fit`, `predict`,\n `predict_proba`, `decision_function` and `score`).\n\n foo_param : int, default=0\n A `foo` param. When `foo > 1`, the output of :meth:`score` will be 1\n otherwise it is 0.\n\n expected_fit_params : list of str, default=None\n A list of the expected parameters given when calling `fit`.\n\n Attributes\n ----------\n classes_ : int\n The classes seen during `fit`.\n\n n_features_in_ : int\n The number of features seen during `fit`.\n\n Examples\n --------\n >>> from sklearn.utils._mocking import CheckingClassifier\n\n This helper allow to assert to specificities regarding `X` or `y`. In this\n case we expect `check_X` or `check_y` to return a boolean.\n\n >>> from sklearn.datasets import load_iris\n >>> X, y = load_iris(return_X_y=True)\n >>> clf = CheckingClassifier(check_X=lambda x: x.shape == (150, 4))\n >>> clf.fit(X, y)\n CheckingClassifier(...)\n\n We can also provide a check which might raise an error. In this case, we\n expect `check_X` to return `X` and `check_y` to return `y`.\n\n >>> from sklearn.utils import check_array\n >>> clf = CheckingClassifier(check_X=check_array)\n >>> clf.fit(X, y)\n CheckingClassifier(...)\n \"\"\"\n\n def __init__(self, *, check_y=None, check_y_params=None,\n check_X=None, check_X_params=None, methods_to_check=\"all\",\n foo_param=0, expected_fit_params=None):\n self.check_y = check_y\n self.check_y_params = check_y_params\n self.check_X = check_X\n self.check_X_params = check_X_params\n self.methods_to_check = methods_to_check\n self.foo_param = foo_param\n self.expected_fit_params = expected_fit_params\n\n def _check_X_y(self, X, y=None, should_be_fitted=True):\n \"\"\"Validate X and y and make extra check.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data set.\n y : array-like of shape (n_samples), default=None\n The corresponding target, by default None.\n should_be_fitted : bool, default=True\n Whether or not the classifier should be already fitted.\n By default True.\n\n Returns\n -------\n X, y\n \"\"\"\n if should_be_fitted:\n check_is_fitted(self)\n if self.check_X is not None:\n params = {} if self.check_X_params is None else self.check_X_params\n checked_X = self.check_X(X, **params)\n if isinstance(checked_X, (bool, np.bool_)):\n assert checked_X\n else:\n X = checked_X\n if y is not None and self.check_y is not None:\n params = {} if self.check_y_params is None else self.check_y_params\n checked_y = self.check_y(y, **params)\n if isinstance(checked_y, (bool, np.bool_)):\n assert checked_y\n else:\n y = checked_y\n return X, y\n\n def fit(self, X, y, **fit_params):\n \"\"\"Fit classifier.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples, n_outputs) or (n_samples,), \\\n default=None\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\n **fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of the estimator\n\n Returns\n -------\n self\n \"\"\"\n assert _num_samples(X) == _num_samples(y)\n if self.methods_to_check == \"all\" or \"fit\" in self.methods_to_check:\n X, y = self._check_X_y(X, y, should_be_fitted=False)\n self.n_features_in_ = np.shape(X)[1]\n self.classes_ = np.unique(\n check_array(y, ensure_2d=False, allow_nd=True)\n )\n if self.expected_fit_params:\n missing = set(self.expected_fit_params) - set(fit_params)\n if missing:\n raise AssertionError(\n f'Expected fit parameter(s) {list(missing)} not seen.'\n )\n for key, value in fit_params.items():\n if _num_samples(value) != _num_samples(X):\n raise AssertionError(\n f'Fit parameter {key} has length {_num_samples(value)}'\n f'; expected {_num_samples(X)}.'\n )\n\n return self\n\n def predict(self, X):\n \"\"\"Predict the first class seen in `classes_`.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input data.\n\n Returns\n -------\n preds : ndarray of shape (n_samples,)\n Predictions of the first class seens in `classes_`.\n \"\"\"\n if (self.methods_to_check == \"all\" or\n \"predict\" in self.methods_to_check):\n X, y = self._check_X_y(X)\n return self.classes_[np.zeros(_num_samples(X), dtype=int)]\n\n def predict_proba(self, X):\n \"\"\"Predict probabilities for each class.\n\n Here, the dummy classifier will provide a probability of 1 for the\n first class of `classes_` and 0 otherwise.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input data.\n\n Returns\n -------\n proba : ndarray of shape (n_samples, n_classes)\n The probabilities for each sample and class.\n \"\"\"\n if (self.methods_to_check == \"all\" or\n \"predict_proba\" in self.methods_to_check):\n X, y = self._check_X_y(X)\n proba = np.zeros((_num_samples(X), len(self.classes_)))\n proba[:, 0] = 1\n return proba\n\n def decision_function(self, X):\n \"\"\"Confidence score.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input data.\n\n Returns\n -------\n decision : ndarray of shape (n_samples,) if n_classes == 2\\\n else (n_samples, n_classes)\n Confidence score.\n \"\"\"\n if (self.methods_to_check == \"all\" or\n \"decision_function\" in self.methods_to_check):\n X, y = self._check_X_y(X)\n if len(self.classes_) == 2:\n # for binary classifier, the confidence score is related to\n # classes_[1] and therefore should be null.\n return np.zeros(_num_samples(X))\n else:\n decision = np.zeros((_num_samples(X), len(self.classes_)))\n decision[:, 0] = 1\n return decision\n\n def score(self, X=None, Y=None):\n \"\"\"Fake score.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Input data, where n_samples is the number of samples and\n n_features is the number of features.\n\n Y : array-like of shape (n_samples, n_output) or (n_samples,)\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\n Returns\n -------\n score : float\n Either 0 or 1 depending of `foo_param` (i.e. `foo_param > 1 =>\n score=1` otherwise `score=0`).\n \"\"\"\n if self.methods_to_check == \"all\" or \"score\" in self.methods_to_check:\n self._check_X_y(X, Y)\n if self.foo_param > 1:\n score = 1.\n else:\n score = 0.\n return score\n\n def _more_tags(self):\n return {'_skip_test': True, 'X_types': ['1dlabel']}", + "instance_attributes": [ + { + "name": "methods_to_check", + "types": { + "kind": "NamedType", + "name": "str" + } + }, + { + "name": "foo_param", + "types": { + "kind": "NamedType", + "name": "int" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.utils._mocking/MockDataFrame", + "name": "MockDataFrame", + "qname": "sklearn.utils._mocking.MockDataFrame", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.utils._mocking/MockDataFrame/__init__", + "scikit-learn/sklearn.utils._mocking/MockDataFrame/__len__", + "scikit-learn/sklearn.utils._mocking/MockDataFrame/__array__", + "scikit-learn/sklearn.utils._mocking/MockDataFrame/__eq__", + "scikit-learn/sklearn.utils._mocking/MockDataFrame/__ne__" + ], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "Parameters\n----------\narray", + "code": "class MockDataFrame:\n \"\"\"\n Parameters\n ----------\n array\n \"\"\"\n # have shape and length but don't support indexing.\n\n def __init__(self, array):\n self.array = array\n self.values = array\n self.shape = array.shape\n self.ndim = array.ndim\n # ugly hack to make iloc work.\n self.iloc = ArraySlicingWrapper(array)\n\n def __len__(self):\n return len(self.array)\n\n def __array__(self, dtype=None):\n # Pandas data frames also are array-like: we want to make sure that\n # input validation in cross-validation does not try to call that\n # method.\n return self.array\n\n def __eq__(self, other):\n return MockDataFrame(self.array == other.array)\n\n def __ne__(self, other):\n return not self == other", + "instance_attributes": [ + { + "name": "iloc", + "types": { + "kind": "NamedType", + "name": "ArraySlicingWrapper" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.utils._mocking/NoSampleWeightWrapper", + "name": "NoSampleWeightWrapper", + "qname": "sklearn.utils._mocking.NoSampleWeightWrapper", + "decorators": [], + "superclasses": ["BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.utils._mocking/NoSampleWeightWrapper/__init__", + "scikit-learn/sklearn.utils._mocking/NoSampleWeightWrapper/fit", + "scikit-learn/sklearn.utils._mocking/NoSampleWeightWrapper/predict", + "scikit-learn/sklearn.utils._mocking/NoSampleWeightWrapper/predict_proba", + "scikit-learn/sklearn.utils._mocking/NoSampleWeightWrapper/_more_tags" + ], + "is_public": false, + "reexported_by": [], + "description": "Wrap estimator which will not expose `sample_weight`.", + "docstring": "Wrap estimator which will not expose `sample_weight`.\n\nParameters\n----------\nest : estimator, default=None\n The estimator to wrap.", + "code": "class NoSampleWeightWrapper(BaseEstimator):\n \"\"\"Wrap estimator which will not expose `sample_weight`.\n\n Parameters\n ----------\n est : estimator, default=None\n The estimator to wrap.\n \"\"\"\n\n def __init__(self, est=None):\n self.est = est\n\n def fit(self, X, y):\n return self.est.fit(X, y)\n\n def predict(self, X):\n return self.est.predict(X)\n\n def predict_proba(self, X):\n return self.est.predict_proba(X)\n\n def _more_tags(self):\n return {'_skip_test': True}", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.utils._pprint/KeyValTuple", + "name": "KeyValTuple", + "qname": "sklearn.utils._pprint.KeyValTuple", + "decorators": [], + "superclasses": ["tuple"], + "methods": ["scikit-learn/sklearn.utils._pprint/KeyValTuple/__repr__"], + "is_public": false, + "reexported_by": [], + "description": "Dummy class for correctly rendering key-value tuples from dicts.", + "docstring": "Dummy class for correctly rendering key-value tuples from dicts.", + "code": "class KeyValTuple(tuple):\n \"\"\"Dummy class for correctly rendering key-value tuples from dicts.\"\"\"\n def __repr__(self):\n # needed for _dispatch[tuple.__repr__] not to be overridden\n return super().__repr__()", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.utils._pprint/KeyValTupleParam", + "name": "KeyValTupleParam", + "qname": "sklearn.utils._pprint.KeyValTupleParam", + "decorators": [], + "superclasses": ["KeyValTuple"], + "methods": [], + "is_public": false, + "reexported_by": [], + "description": "Dummy class for correctly rendering key-value tuples from parameters.", + "docstring": "Dummy class for correctly rendering key-value tuples from parameters.", + "code": "class KeyValTupleParam(KeyValTuple):\n \"\"\"Dummy class for correctly rendering key-value tuples from parameters.\"\"\"\n pass", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter", + "name": "_EstimatorPrettyPrinter", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter", + "decorators": [], + "superclasses": ["pprint.PrettyPrinter"], + "methods": [ + "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/__init__", + "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/format", + "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_pprint_estimator", + "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_dict_items", + "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_params", + "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_params_or_dict_items", + "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_items", + "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_pprint_key_val_tuple" + ], + "is_public": false, + "reexported_by": [], + "description": "Pretty Printer class for estimator objects.\n\nThis extends the pprint.PrettyPrinter class, because:\n- we need estimators to be printed with their parameters, e.g.\n Estimator(param1=value1, ...) which is not supported by default.\n- the 'compact' parameter of PrettyPrinter is ignored for dicts, which\n may lead to very long representations that we want to avoid.\n\nQuick overview of pprint.PrettyPrinter (see also\nhttps://stackoverflow.com/questions/49565047/pprint-with-hex-numbers):\n\n- the entry point is the _format() method which calls format() (overridden\n here)\n- format() directly calls _safe_repr() for a first try at rendering the\n object\n- _safe_repr formats the whole object reccursively, only calling itself,\n not caring about line length or anything\n- back to _format(), if the output string is too long, _format() then calls\n the appropriate _pprint_TYPE() method (e.g. _pprint_list()) depending on\n the type of the object. This where the line length and the compact\n parameters are taken into account.\n- those _pprint_TYPE() methods will internally use the format() method for\n rendering the nested objects of an object (e.g. the elements of a list)\n\nIn the end, everything has to be implemented twice: in _safe_repr and in\nthe custom _pprint_TYPE methods. Unfortunately PrettyPrinter is really not\nstraightforward to extend (especially when we want a compact output), so\nthe code is a bit convoluted.\n\nThis class overrides:\n- format() to support the changed_only parameter\n- _safe_repr to support printing of estimators (for when they fit on a\n single line)\n- _format_dict_items so that dict are correctly 'compacted'\n- _format_items so that ellipsis is used on long lists and tuples\n\nWhen estimators cannot be printed on a single line, the builtin _format()\nwill call _pprint_estimator() because it was registered to do so (see\n_dispatch[BaseEstimator.__repr__] = _pprint_estimator).\n\nboth _format_dict_items() and _pprint_estimator() use the\n_format_params_or_dict_items() method that will format parameters and\nkey-value pairs respecting the compact parameter. This method needs another\nsubroutine _pprint_key_val_tuple() used when a parameter or a key-value\npair is too long to fit on a single line. This subroutine is called in\n_format() and is registered as well in the _dispatch dict (just like\n_pprint_estimator). We had to create the two classes KeyValTuple and\nKeyValTupleParam for this.", + "docstring": "Pretty Printer class for estimator objects.\n\nThis extends the pprint.PrettyPrinter class, because:\n- we need estimators to be printed with their parameters, e.g.\n Estimator(param1=value1, ...) which is not supported by default.\n- the 'compact' parameter of PrettyPrinter is ignored for dicts, which\n may lead to very long representations that we want to avoid.\n\nQuick overview of pprint.PrettyPrinter (see also\nhttps://stackoverflow.com/questions/49565047/pprint-with-hex-numbers):\n\n- the entry point is the _format() method which calls format() (overridden\n here)\n- format() directly calls _safe_repr() for a first try at rendering the\n object\n- _safe_repr formats the whole object reccursively, only calling itself,\n not caring about line length or anything\n- back to _format(), if the output string is too long, _format() then calls\n the appropriate _pprint_TYPE() method (e.g. _pprint_list()) depending on\n the type of the object. This where the line length and the compact\n parameters are taken into account.\n- those _pprint_TYPE() methods will internally use the format() method for\n rendering the nested objects of an object (e.g. the elements of a list)\n\nIn the end, everything has to be implemented twice: in _safe_repr and in\nthe custom _pprint_TYPE methods. Unfortunately PrettyPrinter is really not\nstraightforward to extend (especially when we want a compact output), so\nthe code is a bit convoluted.\n\nThis class overrides:\n- format() to support the changed_only parameter\n- _safe_repr to support printing of estimators (for when they fit on a\n single line)\n- _format_dict_items so that dict are correctly 'compacted'\n- _format_items so that ellipsis is used on long lists and tuples\n\nWhen estimators cannot be printed on a single line, the builtin _format()\nwill call _pprint_estimator() because it was registered to do so (see\n_dispatch[BaseEstimator.__repr__] = _pprint_estimator).\n\nboth _format_dict_items() and _pprint_estimator() use the\n_format_params_or_dict_items() method that will format parameters and\nkey-value pairs respecting the compact parameter. This method needs another\nsubroutine _pprint_key_val_tuple() used when a parameter or a key-value\npair is too long to fit on a single line. This subroutine is called in\n_format() and is registered as well in the _dispatch dict (just like\n_pprint_estimator). We had to create the two classes KeyValTuple and\nKeyValTupleParam for this.", + "code": "class _EstimatorPrettyPrinter(pprint.PrettyPrinter):\n \"\"\"Pretty Printer class for estimator objects.\n\n This extends the pprint.PrettyPrinter class, because:\n - we need estimators to be printed with their parameters, e.g.\n Estimator(param1=value1, ...) which is not supported by default.\n - the 'compact' parameter of PrettyPrinter is ignored for dicts, which\n may lead to very long representations that we want to avoid.\n\n Quick overview of pprint.PrettyPrinter (see also\n https://stackoverflow.com/questions/49565047/pprint-with-hex-numbers):\n\n - the entry point is the _format() method which calls format() (overridden\n here)\n - format() directly calls _safe_repr() for a first try at rendering the\n object\n - _safe_repr formats the whole object reccursively, only calling itself,\n not caring about line length or anything\n - back to _format(), if the output string is too long, _format() then calls\n the appropriate _pprint_TYPE() method (e.g. _pprint_list()) depending on\n the type of the object. This where the line length and the compact\n parameters are taken into account.\n - those _pprint_TYPE() methods will internally use the format() method for\n rendering the nested objects of an object (e.g. the elements of a list)\n\n In the end, everything has to be implemented twice: in _safe_repr and in\n the custom _pprint_TYPE methods. Unfortunately PrettyPrinter is really not\n straightforward to extend (especially when we want a compact output), so\n the code is a bit convoluted.\n\n This class overrides:\n - format() to support the changed_only parameter\n - _safe_repr to support printing of estimators (for when they fit on a\n single line)\n - _format_dict_items so that dict are correctly 'compacted'\n - _format_items so that ellipsis is used on long lists and tuples\n\n When estimators cannot be printed on a single line, the builtin _format()\n will call _pprint_estimator() because it was registered to do so (see\n _dispatch[BaseEstimator.__repr__] = _pprint_estimator).\n\n both _format_dict_items() and _pprint_estimator() use the\n _format_params_or_dict_items() method that will format parameters and\n key-value pairs respecting the compact parameter. This method needs another\n subroutine _pprint_key_val_tuple() used when a parameter or a key-value\n pair is too long to fit on a single line. This subroutine is called in\n _format() and is registered as well in the _dispatch dict (just like\n _pprint_estimator). We had to create the two classes KeyValTuple and\n KeyValTupleParam for this.\n \"\"\"\n\n def __init__(self, indent=1, width=80, depth=None, stream=None, *,\n compact=False, indent_at_name=True,\n n_max_elements_to_show=None):\n super().__init__(indent, width, depth, stream, compact=compact)\n self._indent_at_name = indent_at_name\n if self._indent_at_name:\n self._indent_per_level = 1 # ignore indent param\n self._changed_only = get_config()['print_changed_only']\n # Max number of elements in a list, dict, tuple until we start using\n # ellipsis. This also affects the number of arguments of an estimators\n # (they are treated as dicts)\n self.n_max_elements_to_show = n_max_elements_to_show\n\n def format(self, object, context, maxlevels, level):\n return _safe_repr(object, context, maxlevels, level,\n changed_only=self._changed_only)\n\n def _pprint_estimator(self, object, stream, indent, allowance, context,\n level):\n stream.write(object.__class__.__name__ + '(')\n if self._indent_at_name:\n indent += len(object.__class__.__name__)\n\n if self._changed_only:\n params = _changed_params(object)\n else:\n params = object.get_params(deep=False)\n\n params = OrderedDict((name, val)\n for (name, val) in sorted(params.items()))\n\n self._format_params(params.items(), stream, indent, allowance + 1,\n context, level)\n stream.write(')')\n\n def _format_dict_items(self, items, stream, indent, allowance, context,\n level):\n return self._format_params_or_dict_items(\n items, stream, indent, allowance, context, level, is_dict=True)\n\n def _format_params(self, items, stream, indent, allowance, context, level):\n return self._format_params_or_dict_items(\n items, stream, indent, allowance, context, level, is_dict=False)\n\n def _format_params_or_dict_items(self, object, stream, indent, allowance,\n context, level, is_dict):\n \"\"\"Format dict items or parameters respecting the compact=True\n parameter. For some reason, the builtin rendering of dict items doesn't\n respect compact=True and will use one line per key-value if all cannot\n fit in a single line.\n Dict items will be rendered as <'key': value> while params will be\n rendered as . The implementation is mostly copy/pasting from\n the builtin _format_items().\n This also adds ellipsis if the number of items is greater than\n self.n_max_elements_to_show.\n \"\"\"\n write = stream.write\n indent += self._indent_per_level\n delimnl = ',\\n' + ' ' * indent\n delim = ''\n width = max_width = self._width - indent + 1\n it = iter(object)\n try:\n next_ent = next(it)\n except StopIteration:\n return\n last = False\n n_items = 0\n while not last:\n if n_items == self.n_max_elements_to_show:\n write(', ...')\n break\n n_items += 1\n ent = next_ent\n try:\n next_ent = next(it)\n except StopIteration:\n last = True\n max_width -= allowance\n width -= allowance\n if self._compact:\n k, v = ent\n krepr = self._repr(k, context, level)\n vrepr = self._repr(v, context, level)\n if not is_dict:\n krepr = krepr.strip(\"'\")\n middle = ': ' if is_dict else '='\n rep = krepr + middle + vrepr\n w = len(rep) + 2\n if width < w:\n width = max_width\n if delim:\n delim = delimnl\n if width >= w:\n width -= w\n write(delim)\n delim = ', '\n write(rep)\n continue\n write(delim)\n delim = delimnl\n class_ = KeyValTuple if is_dict else KeyValTupleParam\n self._format(class_(ent), stream, indent,\n allowance if last else 1, context, level)\n\n def _format_items(self, items, stream, indent, allowance, context, level):\n \"\"\"Format the items of an iterable (list, tuple...). Same as the\n built-in _format_items, with support for ellipsis if the number of\n elements is greater than self.n_max_elements_to_show.\n \"\"\"\n write = stream.write\n indent += self._indent_per_level\n if self._indent_per_level > 1:\n write((self._indent_per_level - 1) * ' ')\n delimnl = ',\\n' + ' ' * indent\n delim = ''\n width = max_width = self._width - indent + 1\n it = iter(items)\n try:\n next_ent = next(it)\n except StopIteration:\n return\n last = False\n n_items = 0\n while not last:\n if n_items == self.n_max_elements_to_show:\n write(', ...')\n break\n n_items += 1\n ent = next_ent\n try:\n next_ent = next(it)\n except StopIteration:\n last = True\n max_width -= allowance\n width -= allowance\n if self._compact:\n rep = self._repr(ent, context, level)\n w = len(rep) + 2\n if width < w:\n width = max_width\n if delim:\n delim = delimnl\n if width >= w:\n width -= w\n write(delim)\n delim = ', '\n write(rep)\n continue\n write(delim)\n delim = delimnl\n self._format(ent, stream, indent,\n allowance if last else 1, context, level)\n\n def _pprint_key_val_tuple(self, object, stream, indent, allowance, context,\n level):\n \"\"\"Pretty printing for key-value tuples from dict or parameters.\"\"\"\n k, v = object\n rep = self._repr(k, context, level)\n if isinstance(object, KeyValTupleParam):\n rep = rep.strip(\"'\")\n middle = '='\n else:\n middle = ': '\n stream.write(rep)\n stream.write(middle)\n self._format(v, stream, indent + len(rep) + len(middle), allowance,\n context, level)\n\n # Note: need to copy _dispatch to prevent instances of the builtin\n # PrettyPrinter class to call methods of _EstimatorPrettyPrinter (see issue\n # 12906)\n # mypy error: \"Type[PrettyPrinter]\" has no attribute \"_dispatch\"\n _dispatch = pprint.PrettyPrinter._dispatch.copy() # type: ignore\n _dispatch[BaseEstimator.__repr__] = _pprint_estimator\n _dispatch[KeyValTuple.__repr__] = _pprint_key_val_tuple", + "instance_attributes": [ + { + "name": "_indent_at_name", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "_indent_per_level", + "types": { + "kind": "NamedType", + "name": "int" + } + }, + { + "name": "_changed_only", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalClassifier", + "name": "MinimalClassifier", + "qname": "sklearn.utils._testing.MinimalClassifier", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.utils._testing/MinimalClassifier/__init__", + "scikit-learn/sklearn.utils._testing/MinimalClassifier/get_params", + "scikit-learn/sklearn.utils._testing/MinimalClassifier/set_params", + "scikit-learn/sklearn.utils._testing/MinimalClassifier/fit", + "scikit-learn/sklearn.utils._testing/MinimalClassifier/predict_proba", + "scikit-learn/sklearn.utils._testing/MinimalClassifier/predict", + "scikit-learn/sklearn.utils._testing/MinimalClassifier/score" + ], + "is_public": false, + "reexported_by": [], + "description": "Minimal classifier implementation with inheriting from BaseEstimator.\n\nThis estimator should be tested with:\n\n* `check_estimator` in `test_estimator_checks.py`;\n* within a `Pipeline` in `test_pipeline.py`;\n* within a `SearchCV` in `test_search.py`.", + "docstring": "Minimal classifier implementation with inheriting from BaseEstimator.\n\nThis estimator should be tested with:\n\n* `check_estimator` in `test_estimator_checks.py`;\n* within a `Pipeline` in `test_pipeline.py`;\n* within a `SearchCV` in `test_search.py`.", + "code": "class MinimalClassifier:\n \"\"\"Minimal classifier implementation with inheriting from BaseEstimator.\n\n This estimator should be tested with:\n\n * `check_estimator` in `test_estimator_checks.py`;\n * within a `Pipeline` in `test_pipeline.py`;\n * within a `SearchCV` in `test_search.py`.\n \"\"\"\n _estimator_type = \"classifier\"\n\n def __init__(self, param=None):\n self.param = param\n\n def get_params(self, deep=True):\n return {\"param\": self.param}\n\n def set_params(self, **params):\n for key, value in params.items():\n setattr(self, key, value)\n return self\n\n def fit(self, X, y):\n X, y = check_X_y(X, y)\n check_classification_targets(y)\n self.classes_, counts = np.unique(y, return_counts=True)\n self._most_frequent_class_idx = counts.argmax()\n return self\n\n def predict_proba(self, X):\n check_is_fitted(self)\n X = check_array(X)\n proba_shape = (X.shape[0], self.classes_.size)\n y_proba = np.zeros(shape=proba_shape, dtype=np.float64)\n y_proba[:, self._most_frequent_class_idx] = 1.0\n return y_proba\n\n def predict(self, X):\n y_proba = self.predict_proba(X)\n y_pred = y_proba.argmax(axis=1)\n return self.classes_[y_pred]\n\n def score(self, X, y):\n from sklearn.metrics import accuracy_score\n return accuracy_score(y, self.predict(X))", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalRegressor", + "name": "MinimalRegressor", + "qname": "sklearn.utils._testing.MinimalRegressor", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.utils._testing/MinimalRegressor/__init__", + "scikit-learn/sklearn.utils._testing/MinimalRegressor/get_params", + "scikit-learn/sklearn.utils._testing/MinimalRegressor/set_params", + "scikit-learn/sklearn.utils._testing/MinimalRegressor/fit", + "scikit-learn/sklearn.utils._testing/MinimalRegressor/predict", + "scikit-learn/sklearn.utils._testing/MinimalRegressor/score" + ], + "is_public": false, + "reexported_by": [], + "description": "Minimal regressor implementation with inheriting from BaseEstimator.\n\nThis estimator should be tested with:\n\n* `check_estimator` in `test_estimator_checks.py`;\n* within a `Pipeline` in `test_pipeline.py`;\n* within a `SearchCV` in `test_search.py`.", + "docstring": "Minimal regressor implementation with inheriting from BaseEstimator.\n\nThis estimator should be tested with:\n\n* `check_estimator` in `test_estimator_checks.py`;\n* within a `Pipeline` in `test_pipeline.py`;\n* within a `SearchCV` in `test_search.py`.", + "code": "class MinimalRegressor:\n \"\"\"Minimal regressor implementation with inheriting from BaseEstimator.\n\n This estimator should be tested with:\n\n * `check_estimator` in `test_estimator_checks.py`;\n * within a `Pipeline` in `test_pipeline.py`;\n * within a `SearchCV` in `test_search.py`.\n \"\"\"\n _estimator_type = \"regressor\"\n\n def __init__(self, param=None):\n self.param = param\n\n def get_params(self, deep=True):\n return {\"param\": self.param}\n\n def set_params(self, **params):\n for key, value in params.items():\n setattr(self, key, value)\n return self\n\n def fit(self, X, y):\n X, y = check_X_y(X, y)\n self.is_fitted_ = True\n self._mean = np.mean(y)\n return self\n\n def predict(self, X):\n check_is_fitted(self)\n X = check_array(X)\n return np.ones(shape=(X.shape[0],)) * self._mean\n\n def score(self, X, y):\n from sklearn.metrics import r2_score\n return r2_score(y, self.predict(X))", + "instance_attributes": [ + { + "name": "is_fitted_", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalTransformer", + "name": "MinimalTransformer", + "qname": "sklearn.utils._testing.MinimalTransformer", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.utils._testing/MinimalTransformer/__init__", + "scikit-learn/sklearn.utils._testing/MinimalTransformer/get_params", + "scikit-learn/sklearn.utils._testing/MinimalTransformer/set_params", + "scikit-learn/sklearn.utils._testing/MinimalTransformer/fit", + "scikit-learn/sklearn.utils._testing/MinimalTransformer/transform", + "scikit-learn/sklearn.utils._testing/MinimalTransformer/fit_transform" + ], + "is_public": false, + "reexported_by": [], + "description": "Minimal transformer implementation with inheriting from\nBaseEstimator.\n\nThis estimator should be tested with:\n\n* `check_estimator` in `test_estimator_checks.py`;\n* within a `Pipeline` in `test_pipeline.py`;\n* within a `SearchCV` in `test_search.py`.", + "docstring": "Minimal transformer implementation with inheriting from\nBaseEstimator.\n\nThis estimator should be tested with:\n\n* `check_estimator` in `test_estimator_checks.py`;\n* within a `Pipeline` in `test_pipeline.py`;\n* within a `SearchCV` in `test_search.py`.", + "code": "class MinimalTransformer:\n \"\"\"Minimal transformer implementation with inheriting from\n BaseEstimator.\n\n This estimator should be tested with:\n\n * `check_estimator` in `test_estimator_checks.py`;\n * within a `Pipeline` in `test_pipeline.py`;\n * within a `SearchCV` in `test_search.py`.\n \"\"\"\n\n def __init__(self, param=None):\n self.param = param\n\n def get_params(self, deep=True):\n return {\"param\": self.param}\n\n def set_params(self, **params):\n for key, value in params.items():\n setattr(self, key, value)\n return self\n\n def fit(self, X, y=None):\n X = check_array(X)\n self.is_fitted_ = True\n return self\n\n def transform(self, X, y=None):\n check_is_fitted(self)\n X = check_array(X)\n return X\n\n def fit_transform(self, X, y=None):\n return self.fit(X, y).transform(X, y)", + "instance_attributes": [ + { + "name": "is_fitted_", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.utils._testing/TempMemmap", + "name": "TempMemmap", + "qname": "sklearn.utils._testing.TempMemmap", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.utils._testing/TempMemmap/__init__", + "scikit-learn/sklearn.utils._testing/TempMemmap/__enter__", + "scikit-learn/sklearn.utils._testing/TempMemmap/__exit__" + ], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "Parameters\n----------\ndata\nmmap_mode : str, default='r'", + "code": "class TempMemmap:\n \"\"\"\n Parameters\n ----------\n data\n mmap_mode : str, default='r'\n \"\"\"\n def __init__(self, data, mmap_mode='r'):\n self.mmap_mode = mmap_mode\n self.data = data\n\n def __enter__(self):\n data_read_only, self.temp_folder = create_memmap_backed_data(\n self.data, mmap_mode=self.mmap_mode, return_folder=True)\n return data_read_only\n\n def __exit__(self, exc_type, exc_val, exc_tb):\n _delete_folder(self.temp_folder)", + "instance_attributes": [ + { + "name": "mmap_mode", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.utils._testing/_IgnoreWarnings", + "name": "_IgnoreWarnings", + "qname": "sklearn.utils._testing._IgnoreWarnings", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.utils._testing/_IgnoreWarnings/__init__", + "scikit-learn/sklearn.utils._testing/_IgnoreWarnings/__call__", + "scikit-learn/sklearn.utils._testing/_IgnoreWarnings/__repr__", + "scikit-learn/sklearn.utils._testing/_IgnoreWarnings/__enter__", + "scikit-learn/sklearn.utils._testing/_IgnoreWarnings/__exit__" + ], + "is_public": false, + "reexported_by": [], + "description": "Improved and simplified Python warnings context manager and decorator.\n\nThis class allows the user to ignore the warnings raised by a function.\nCopied from Python 2.7.5 and modified as required.", + "docstring": "Improved and simplified Python warnings context manager and decorator.\n\nThis class allows the user to ignore the warnings raised by a function.\nCopied from Python 2.7.5 and modified as required.\n\nParameters\n----------\ncategory : tuple of warning class, default=Warning\n The category to filter. By default, all the categories will be muted.", + "code": "class _IgnoreWarnings:\n \"\"\"Improved and simplified Python warnings context manager and decorator.\n\n This class allows the user to ignore the warnings raised by a function.\n Copied from Python 2.7.5 and modified as required.\n\n Parameters\n ----------\n category : tuple of warning class, default=Warning\n The category to filter. By default, all the categories will be muted.\n\n \"\"\"\n\n def __init__(self, category):\n self._record = True\n self._module = sys.modules['warnings']\n self._entered = False\n self.log = []\n self.category = category\n\n def __call__(self, fn):\n \"\"\"Decorator to catch and hide warnings without visual nesting.\"\"\"\n @wraps(fn)\n def wrapper(*args, **kwargs):\n with warnings.catch_warnings():\n warnings.simplefilter(\"ignore\", self.category)\n return fn(*args, **kwargs)\n\n return wrapper\n\n def __repr__(self):\n args = []\n if self._record:\n args.append(\"record=True\")\n if self._module is not sys.modules['warnings']:\n args.append(\"module=%r\" % self._module)\n name = type(self).__name__\n return \"%s(%s)\" % (name, \", \".join(args))\n\n def __enter__(self):\n if self._entered:\n raise RuntimeError(\"Cannot enter %r twice\" % self)\n self._entered = True\n self._filters = self._module.filters\n self._module.filters = self._filters[:]\n self._showwarning = self._module.showwarning\n warnings.simplefilter(\"ignore\", self.category)\n\n def __exit__(self, *exc_info):\n if not self._entered:\n raise RuntimeError(\"Cannot exit %r without entering first\" % self)\n self._module.filters = self._filters\n self._module.showwarning = self._showwarning\n self.log[:] = []", + "instance_attributes": [ + { + "name": "_record", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "_module", + "types": { + "kind": "NamedType", + "name": "module" + } + }, + { + "name": "_entered", + "types": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "name": "log", + "types": { + "kind": "NamedType", + "name": "list" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.utils._testing/_Raises", + "name": "_Raises", + "qname": "sklearn.utils._testing._Raises", + "decorators": [], + "superclasses": ["contextlib.AbstractContextManager"], + "methods": [ + "scikit-learn/sklearn.utils._testing/_Raises/__init__", + "scikit-learn/sklearn.utils._testing/_Raises/__exit__" + ], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class _Raises(contextlib.AbstractContextManager):\n # see raises() for parameters\n def __init__(self, expected_exc_type, match, may_pass, err_msg):\n self.expected_exc_types = (\n expected_exc_type\n if isinstance(expected_exc_type, Iterable)\n else [expected_exc_type]\n )\n self.matches = [match] if isinstance(match, str) else match\n self.may_pass = may_pass\n self.err_msg = err_msg\n self.raised_and_matched = False\n\n def __exit__(self, exc_type, exc_value, _):\n # see\n # https://docs.python.org/2.5/whatsnew/pep-343.html#SECTION000910000000000000000\n\n if exc_type is None: # No exception was raised in the block\n if self.may_pass:\n return True # CM is happy\n else:\n err_msg = (\n self.err_msg or f\"Did not raise: {self.expected_exc_types}\"\n )\n raise AssertionError(err_msg)\n\n if not any(\n issubclass(exc_type, expected_type)\n for expected_type in self.expected_exc_types\n ):\n if self.err_msg is not None:\n raise AssertionError(self.err_msg) from exc_value\n else:\n return False # will re-raise the original exception\n\n if self.matches is not None:\n err_msg = self.err_msg or (\n \"The error message should contain one of the following \"\n \"patterns:\\n{}\\nGot {}\".format(\n \"\\n\".join(self.matches), str(exc_value)\n )\n )\n if not any(re.search(match, str(exc_value))\n for match in self.matches):\n raise AssertionError(err_msg) from exc_value\n self.raised_and_matched = True\n\n return True", + "instance_attributes": [ + { + "name": "matches", + "types": { + "kind": "NamedType", + "name": "list" + } + }, + { + "name": "raised_and_matched", + "types": { + "kind": "NamedType", + "name": "bool" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.utils.deprecation/deprecated", + "name": "deprecated", + "qname": "sklearn.utils.deprecation.deprecated", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.utils.deprecation/deprecated/__init__", + "scikit-learn/sklearn.utils.deprecation/deprecated/__call__", + "scikit-learn/sklearn.utils.deprecation/deprecated/_decorate_class", + "scikit-learn/sklearn.utils.deprecation/deprecated/_decorate_fun", + "scikit-learn/sklearn.utils.deprecation/deprecated/_decorate_property", + "scikit-learn/sklearn.utils.deprecation/deprecated/_update_doc" + ], + "is_public": true, + "reexported_by": [], + "description": "Decorator to mark a function or class as deprecated.\n\nIssue a warning when the function is called/the class is instantiated and\nadds a warning to the docstring.\n\nThe optional extra argument will be appended to the deprecation message\nand the docstring. Note: to use this with the default value for extra, put\nin an empty of parentheses:\n\n>>> from sklearn.utils import deprecated\n>>> deprecated()\n\n\n>>> @deprecated()\n... def some_function(): pass", + "docstring": "Decorator to mark a function or class as deprecated.\n\nIssue a warning when the function is called/the class is instantiated and\nadds a warning to the docstring.\n\nThe optional extra argument will be appended to the deprecation message\nand the docstring. Note: to use this with the default value for extra, put\nin an empty of parentheses:\n\n>>> from sklearn.utils import deprecated\n>>> deprecated()\n\n\n>>> @deprecated()\n... def some_function(): pass\n\nParameters\n----------\nextra : str, default=''\n To be added to the deprecation messages.", + "code": "class deprecated:\n \"\"\"Decorator to mark a function or class as deprecated.\n\n Issue a warning when the function is called/the class is instantiated and\n adds a warning to the docstring.\n\n The optional extra argument will be appended to the deprecation message\n and the docstring. Note: to use this with the default value for extra, put\n in an empty of parentheses:\n\n >>> from sklearn.utils import deprecated\n >>> deprecated()\n \n\n >>> @deprecated()\n ... def some_function(): pass\n\n Parameters\n ----------\n extra : str, default=''\n To be added to the deprecation messages.\n \"\"\"\n\n # Adapted from https://wiki.python.org/moin/PythonDecoratorLibrary,\n # but with many changes.\n\n def __init__(self, extra=''):\n self.extra = extra\n\n def __call__(self, obj):\n \"\"\"Call method\n\n Parameters\n ----------\n obj : object\n \"\"\"\n if isinstance(obj, type):\n return self._decorate_class(obj)\n elif isinstance(obj, property):\n # Note that this is only triggered properly if the `property`\n # decorator comes before the `deprecated` decorator, like so:\n #\n # @deprecated(msg)\n # @property\n # def deprecated_attribute_(self):\n # ...\n return self._decorate_property(obj)\n else:\n return self._decorate_fun(obj)\n\n def _decorate_class(self, cls):\n msg = \"Class %s is deprecated\" % cls.__name__\n if self.extra:\n msg += \"; %s\" % self.extra\n\n # FIXME: we should probably reset __new__ for full generality\n init = cls.__init__\n\n def wrapped(*args, **kwargs):\n warnings.warn(msg, category=FutureWarning)\n return init(*args, **kwargs)\n cls.__init__ = wrapped\n\n wrapped.__name__ = '__init__'\n wrapped.__doc__ = self._update_doc(init.__doc__)\n wrapped.deprecated_original = init\n\n return cls\n\n def _decorate_fun(self, fun):\n \"\"\"Decorate function fun\"\"\"\n\n msg = \"Function %s is deprecated\" % fun.__name__\n if self.extra:\n msg += \"; %s\" % self.extra\n\n @functools.wraps(fun)\n def wrapped(*args, **kwargs):\n warnings.warn(msg, category=FutureWarning)\n return fun(*args, **kwargs)\n\n wrapped.__doc__ = self._update_doc(wrapped.__doc__)\n # Add a reference to the wrapped function so that we can introspect\n # on function arguments in Python 2 (already works in Python 3)\n wrapped.__wrapped__ = fun\n\n return wrapped\n\n def _decorate_property(self, prop):\n msg = self.extra\n\n @property\n def wrapped(*args, **kwargs):\n warnings.warn(msg, category=FutureWarning)\n return prop.fget(*args, **kwargs)\n\n return wrapped\n\n def _update_doc(self, olddoc):\n newdoc = \"DEPRECATED\"\n if self.extra:\n newdoc = \"%s: %s\" % (newdoc, self.extra)\n if olddoc:\n newdoc = \"%s\\n\\n %s\" % (newdoc, olddoc)\n return newdoc", + "instance_attributes": [ + { + "name": "extra", + "types": { + "kind": "NamedType", + "name": "str" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_NotAnArray", + "name": "_NotAnArray", + "qname": "sklearn.utils.estimator_checks._NotAnArray", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.utils.estimator_checks/_NotAnArray/__init__", + "scikit-learn/sklearn.utils.estimator_checks/_NotAnArray/__array__", + "scikit-learn/sklearn.utils.estimator_checks/_NotAnArray/__array_function__" + ], + "is_public": false, + "reexported_by": [], + "description": "An object that is convertible to an array.", + "docstring": "An object that is convertible to an array.\n\nParameters\n----------\ndata : array-like\n The data.", + "code": "class _NotAnArray:\n \"\"\"An object that is convertible to an array.\n\n Parameters\n ----------\n data : array-like\n The data.\n \"\"\"\n\n def __init__(self, data):\n self.data = np.asarray(data)\n\n def __array__(self, dtype=None):\n return self.data\n\n def __array_function__(self, func, types, args, kwargs):\n if func.__name__ == \"may_share_memory\":\n return True\n raise TypeError(\"Don't want to call array_function {}!\".format(\n func.__name__))", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.utils.fixes/MaskedArray", + "name": "MaskedArray", + "qname": "sklearn.utils.fixes.MaskedArray", + "decorators": [ + "deprecated('MaskedArray is deprecated in version 0.23 and will be removed in version 1.0 (renaming of 0.25). Use numpy.ma.MaskedArray instead.')" + ], + "superclasses": ["_MaskedArray"], + "methods": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class MaskedArray(_MaskedArray):\n pass # TODO: remove in 1.0", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.utils.fixes/_FuncWrapper", + "name": "_FuncWrapper", + "qname": "sklearn.utils.fixes._FuncWrapper", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.utils.fixes/_FuncWrapper/__init__", + "scikit-learn/sklearn.utils.fixes/_FuncWrapper/__call__" + ], + "is_public": false, + "reexported_by": [], + "description": "\"Load the global configuration before calling the function.", + "docstring": "\"Load the global configuration before calling the function.", + "code": "class _FuncWrapper:\n \"\"\"\"Load the global configuration before calling the function.\"\"\"\n def __init__(self, function):\n self.function = function\n self.config = get_config()\n update_wrapper(self, self.function)\n\n def __call__(self, *args, **kwargs):\n with config_context(**self.config):\n return self.function(*args, **kwargs)", + "instance_attributes": [ + { + "name": "config", + "types": { + "kind": "NamedType", + "name": "dict" + } + } + ] + }, + { + "id": "scikit-learn/sklearn.utils.fixes/loguniform", + "name": "loguniform", + "qname": "sklearn.utils.fixes.loguniform", + "decorators": [], + "superclasses": ["scipy.stats.reciprocal"], + "methods": [], + "is_public": true, + "reexported_by": [], + "description": "A class supporting log-uniform random variables.", + "docstring": "A class supporting log-uniform random variables.\n\nParameters\n----------\nlow : float\n The minimum value\nhigh : float\n The maximum value\n\nMethods\n-------\nrvs(self, size=None, random_state=None)\n Generate log-uniform random variables\n\nThe most useful method for Scikit-learn usage is highlighted here.\nFor a full list, see\n`scipy.stats.reciprocal\n`_.\nThis list includes all functions of ``scipy.stats`` continuous\ndistributions such as ``pdf``.\n\nNotes\n-----\nThis class generates values between ``low`` and ``high`` or\n\n low <= loguniform(low, high).rvs() <= high\n\nThe logarithmic probability density function (PDF) is uniform. When\n``x`` is a uniformly distributed random variable between 0 and 1, ``10**x``\nare random variables that are equally likely to be returned.\n\nThis class is an alias to ``scipy.stats.reciprocal``, which uses the\nreciprocal distribution:\nhttps://en.wikipedia.org/wiki/Reciprocal_distribution\n\nExamples\n--------\n\n>>> from sklearn.utils.fixes import loguniform\n>>> rv = loguniform(1e-3, 1e1)\n>>> rvs = rv.rvs(random_state=42, size=1000)\n>>> rvs.min() # doctest: +SKIP\n0.0010435856341129003\n>>> rvs.max() # doctest: +SKIP\n9.97403052786026", + "code": "class loguniform(scipy.stats.reciprocal):\n \"\"\"A class supporting log-uniform random variables.\n\n Parameters\n ----------\n low : float\n The minimum value\n high : float\n The maximum value\n\n Methods\n -------\n rvs(self, size=None, random_state=None)\n Generate log-uniform random variables\n\n The most useful method for Scikit-learn usage is highlighted here.\n For a full list, see\n `scipy.stats.reciprocal\n `_.\n This list includes all functions of ``scipy.stats`` continuous\n distributions such as ``pdf``.\n\n Notes\n -----\n This class generates values between ``low`` and ``high`` or\n\n low <= loguniform(low, high).rvs() <= high\n\n The logarithmic probability density function (PDF) is uniform. When\n ``x`` is a uniformly distributed random variable between 0 and 1, ``10**x``\n are random variables that are equally likely to be returned.\n\n This class is an alias to ``scipy.stats.reciprocal``, which uses the\n reciprocal distribution:\n https://en.wikipedia.org/wiki/Reciprocal_distribution\n\n Examples\n --------\n\n >>> from sklearn.utils.fixes import loguniform\n >>> rv = loguniform(1e-3, 1e1)\n >>> rvs = rv.rvs(random_state=42, size=1000)\n >>> rvs.min() # doctest: +SKIP\n 0.0010435856341129003\n >>> rvs.max() # doctest: +SKIP\n 9.97403052786026\n \"\"\"", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_BaseComposition", + "name": "_BaseComposition", + "qname": "sklearn.utils.metaestimators._BaseComposition", + "decorators": [], + "superclasses": ["BaseEstimator"], + "methods": [ + "scikit-learn/sklearn.utils.metaestimators/_BaseComposition/__init__", + "scikit-learn/sklearn.utils.metaestimators/_BaseComposition/_get_params", + "scikit-learn/sklearn.utils.metaestimators/_BaseComposition/_set_params", + "scikit-learn/sklearn.utils.metaestimators/_BaseComposition/_replace_estimator", + "scikit-learn/sklearn.utils.metaestimators/_BaseComposition/_validate_names" + ], + "is_public": false, + "reexported_by": [], + "description": "Handles parameter management for classifiers composed of named estimators.", + "docstring": "Handles parameter management for classifiers composed of named estimators.\n ", + "code": "class _BaseComposition(BaseEstimator, metaclass=ABCMeta):\n \"\"\"Handles parameter management for classifiers composed of named estimators.\n \"\"\"\n steps: List[Any]\n\n @abstractmethod\n def __init__(self):\n pass\n\n def _get_params(self, attr, deep=True):\n out = super().get_params(deep=deep)\n if not deep:\n return out\n estimators = getattr(self, attr)\n out.update(estimators)\n for name, estimator in estimators:\n if hasattr(estimator, 'get_params'):\n for key, value in estimator.get_params(deep=True).items():\n out['%s__%s' % (name, key)] = value\n return out\n\n def _set_params(self, attr, **params):\n # Ensure strict ordering of parameter setting:\n # 1. All steps\n if attr in params:\n setattr(self, attr, params.pop(attr))\n # 2. Step replacement\n items = getattr(self, attr)\n names = []\n if items:\n names, _ = zip(*items)\n for name in list(params.keys()):\n if '__' not in name and name in names:\n self._replace_estimator(attr, name, params.pop(name))\n # 3. Step parameters and other initialisation arguments\n super().set_params(**params)\n return self\n\n def _replace_estimator(self, attr, name, new_val):\n # assumes `name` is a valid estimator name\n new_estimators = list(getattr(self, attr))\n for i, (estimator_name, _) in enumerate(new_estimators):\n if estimator_name == name:\n new_estimators[i] = (name, new_val)\n break\n setattr(self, attr, new_estimators)\n\n def _validate_names(self, names):\n if len(set(names)) != len(names):\n raise ValueError('Names provided are not unique: '\n '{0!r}'.format(list(names)))\n invalid_names = set(names).intersection(self.get_params(deep=False))\n if invalid_names:\n raise ValueError('Estimator names conflict with constructor '\n 'arguments: {0!r}'.format(sorted(invalid_names)))\n invalid_names = [name for name in names if '__' in name]\n if invalid_names:\n raise ValueError('Estimator names must not contain __: got '\n '{0!r}'.format(invalid_names))", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_IffHasAttrDescriptor", + "name": "_IffHasAttrDescriptor", + "qname": "sklearn.utils.metaestimators._IffHasAttrDescriptor", + "decorators": [], + "superclasses": [], + "methods": [ + "scikit-learn/sklearn.utils.metaestimators/_IffHasAttrDescriptor/__init__", + "scikit-learn/sklearn.utils.metaestimators/_IffHasAttrDescriptor/__get__" + ], + "is_public": false, + "reexported_by": [], + "description": "Implements a conditional property using the descriptor protocol.\n\nUsing this class to create a decorator will raise an ``AttributeError``\nif none of the delegates (specified in ``delegate_names``) is an attribute\nof the base object or the first found delegate does not have an attribute\n``attribute_name``.\n\nThis allows ducktyping of the decorated method based on\n``delegate.attribute_name``. Here ``delegate`` is the first item in\n``delegate_names`` for which ``hasattr(object, delegate) is True``.\n\nSee https://docs.python.org/3/howto/descriptor.html for an explanation of\ndescriptors.", + "docstring": "Implements a conditional property using the descriptor protocol.\n\nUsing this class to create a decorator will raise an ``AttributeError``\nif none of the delegates (specified in ``delegate_names``) is an attribute\nof the base object or the first found delegate does not have an attribute\n``attribute_name``.\n\nThis allows ducktyping of the decorated method based on\n``delegate.attribute_name``. Here ``delegate`` is the first item in\n``delegate_names`` for which ``hasattr(object, delegate) is True``.\n\nSee https://docs.python.org/3/howto/descriptor.html for an explanation of\ndescriptors.", + "code": "class _IffHasAttrDescriptor:\n \"\"\"Implements a conditional property using the descriptor protocol.\n\n Using this class to create a decorator will raise an ``AttributeError``\n if none of the delegates (specified in ``delegate_names``) is an attribute\n of the base object or the first found delegate does not have an attribute\n ``attribute_name``.\n\n This allows ducktyping of the decorated method based on\n ``delegate.attribute_name``. Here ``delegate`` is the first item in\n ``delegate_names`` for which ``hasattr(object, delegate) is True``.\n\n See https://docs.python.org/3/howto/descriptor.html for an explanation of\n descriptors.\n \"\"\"\n def __init__(self, fn, delegate_names, attribute_name):\n self.fn = fn\n self.delegate_names = delegate_names\n self.attribute_name = attribute_name\n\n # update the docstring of the descriptor\n update_wrapper(self, fn)\n\n def __get__(self, obj, type=None):\n # raise an AttributeError if the attribute is not present on the object\n if obj is not None:\n # delegate only on instances, not the classes.\n # this is to allow access to the docstrings.\n for delegate_name in self.delegate_names:\n try:\n delegate = attrgetter(delegate_name)(obj)\n except AttributeError:\n continue\n else:\n getattr(delegate, self.attribute_name)\n break\n else:\n attrgetter(self.delegate_names[-1])(obj)\n\n # lambda, but not partial, allows help() to work with update_wrapper\n out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)\n # update the docstring of the returned function\n update_wrapper(out, self.fn)\n return out", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_LineSearchError", + "name": "_LineSearchError", + "qname": "sklearn.utils.optimize._LineSearchError", + "decorators": [], + "superclasses": ["RuntimeError"], + "methods": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "class _LineSearchError(RuntimeError):\n pass", + "instance_attributes": [] + }, + { + "id": "scikit-learn/sklearn.utils/Bunch", + "name": "Bunch", + "qname": "sklearn.utils.Bunch", + "decorators": [], + "superclasses": ["dict"], + "methods": [ + "scikit-learn/sklearn.utils/Bunch/__init__", + "scikit-learn/sklearn.utils/Bunch/__setattr__", + "scikit-learn/sklearn.utils/Bunch/__dir__", + "scikit-learn/sklearn.utils/Bunch/__getattr__", + "scikit-learn/sklearn.utils/Bunch/__setstate__" + ], + "is_public": true, + "reexported_by": [], + "description": "Container object exposing keys as attributes.\n\nBunch objects are sometimes used as an output for functions and methods.\nThey extend dictionaries by enabling values to be accessed by key,\n`bunch[\"value_key\"]`, or by an attribute, `bunch.value_key`.", + "docstring": "Container object exposing keys as attributes.\n\nBunch objects are sometimes used as an output for functions and methods.\nThey extend dictionaries by enabling values to be accessed by key,\n`bunch[\"value_key\"]`, or by an attribute, `bunch.value_key`.\n\nExamples\n--------\n>>> b = Bunch(a=1, b=2)\n>>> b['b']\n2\n>>> b.b\n2\n>>> b.a = 3\n>>> b['a']\n3\n>>> b.c = 6\n>>> b['c']\n6", + "code": "class Bunch(dict):\n \"\"\"Container object exposing keys as attributes.\n\n Bunch objects are sometimes used as an output for functions and methods.\n They extend dictionaries by enabling values to be accessed by key,\n `bunch[\"value_key\"]`, or by an attribute, `bunch.value_key`.\n\n Examples\n --------\n >>> b = Bunch(a=1, b=2)\n >>> b['b']\n 2\n >>> b.b\n 2\n >>> b.a = 3\n >>> b['a']\n 3\n >>> b.c = 6\n >>> b['c']\n 6\n \"\"\"\n\n def __init__(self, **kwargs):\n super().__init__(kwargs)\n\n def __setattr__(self, key, value):\n self[key] = value\n\n def __dir__(self):\n return self.keys()\n\n def __getattr__(self, key):\n try:\n return self[key]\n except KeyError:\n raise AttributeError(key)\n\n def __setstate__(self, state):\n # Bunch pickles generated with scikit-learn 0.16.* have an non\n # empty __dict__. This causes a surprising behaviour when\n # loading these pickles scikit-learn 0.17: reading bunch.key\n # uses __dict__ but assigning to bunch.key use __setattr__ and\n # only changes bunch['key']. More details can be found at:\n # https://github.com/scikit-learn/scikit-learn/issues/6196.\n # Overriding __setstate__ to be a noop has the effect of\n # ignoring the pickled __dict__\n pass", + "instance_attributes": [] + } + ], + "functions": [ + { + "id": "scikit-learn/sklearn.__check_build.setup/configuration", + "name": "configuration", + "qname": "sklearn.__check_build.setup.configuration", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.__check_build.setup/configuration/parent_package", + "name": "parent_package", + "qname": "sklearn.__check_build.setup.configuration.parent_package", + "default_value": "''", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.__check_build.setup/configuration/top_path", + "name": "top_path", + "qname": "sklearn.__check_build.setup.configuration.top_path", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def configuration(parent_package='', top_path=None):\n from numpy.distutils.misc_util import Configuration\n config = Configuration('__check_build', parent_package, top_path)\n config.add_extension('_check_build',\n sources=['_check_build.pyx'],\n include_dirs=[numpy.get_include()])\n\n return config" + }, + { + "id": "scikit-learn/sklearn.__check_build/raise_build_error", + "name": "raise_build_error", + "qname": "sklearn.__check_build.raise_build_error", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.__check_build/raise_build_error/e", + "name": "e", + "qname": "sklearn.__check_build.raise_build_error.e", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def raise_build_error(e):\n # Raise a comprehensible error and list the contents of the\n # directory to help debugging on the mailing list.\n local_dir = os.path.split(__file__)[0]\n msg = STANDARD_MSG\n if local_dir == \"sklearn/__check_build\":\n # Picking up the local install: this will work only if the\n # install is an 'inplace build'\n msg = INPLACE_MSG\n dir_content = list()\n for i, filename in enumerate(os.listdir(local_dir)):\n if ((i + 1) % 3):\n dir_content.append(filename.ljust(26))\n else:\n dir_content.append(filename + '\\n')\n raise ImportError(\"\"\"%s\n___________________________________________________________________________\nContents of %s:\n%s\n___________________________________________________________________________\nIt seems that scikit-learn has not been built correctly.\n\nIf you have installed scikit-learn from source, please do not forget\nto build the package before using it: run `python setup.py install` or\n`make` in the source directory.\n%s\"\"\" % (e, local_dir, ''.join(dir_content).strip(), msg))" + }, + { + "id": "scikit-learn/sklearn._build_utils.openmp_helpers/check_openmp_support", + "name": "check_openmp_support", + "qname": "sklearn._build_utils.openmp_helpers.check_openmp_support", + "decorators": [], + "parameters": [], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check whether OpenMP test code can be compiled and run", + "docstring": "Check whether OpenMP test code can be compiled and run", + "code": "def check_openmp_support():\n \"\"\"Check whether OpenMP test code can be compiled and run\"\"\"\n if \"PYODIDE_PACKAGE_ABI\" in os.environ:\n # Pyodide doesn't support OpenMP\n return False\n code = textwrap.dedent(\n \"\"\"\\\n #include \n #include \n int main(void) {\n #pragma omp parallel\n printf(\"nthreads=%d\\\\n\", omp_get_num_threads());\n return 0;\n }\n \"\"\")\n\n extra_preargs = os.getenv('LDFLAGS', None)\n if extra_preargs is not None:\n extra_preargs = extra_preargs.strip().split(\" \")\n extra_preargs = [\n flag for flag in extra_preargs\n if flag.startswith(('-L', '-Wl,-rpath', '-l'))]\n\n extra_postargs = get_openmp_flag\n\n try:\n output = compile_test_program(code,\n extra_preargs=extra_preargs,\n extra_postargs=extra_postargs)\n\n if output and 'nthreads=' in output[0]:\n nthreads = int(output[0].strip().split('=')[1])\n openmp_supported = len(output) == nthreads\n elif \"PYTHON_CROSSENV\" in os.environ:\n # Since we can't run the test program when cross-compiling\n # assume that openmp is supported if the program can be\n # compiled.\n openmp_supported = True\n else:\n openmp_supported = False\n\n except (CompileError, LinkError, subprocess.CalledProcessError):\n openmp_supported = False\n\n if not openmp_supported:\n if os.getenv(\"SKLEARN_FAIL_NO_OPENMP\"):\n raise CompileError(\"Failed to build with OpenMP\")\n else:\n message = textwrap.dedent(\n \"\"\"\n\n ***********\n * WARNING *\n ***********\n\n It seems that scikit-learn cannot be built with OpenMP.\n\n - Make sure you have followed the installation instructions:\n\n https://scikit-learn.org/dev/developers/advanced_installation.html\n\n - If your compiler supports OpenMP but you still see this\n message, please submit a bug report at:\n\n https://github.com/scikit-learn/scikit-learn/issues\n\n - The build will continue with OpenMP-based parallelism\n disabled. Note however that some estimators will run in\n sequential mode instead of leveraging thread-based\n parallelism.\n\n ***\n \"\"\")\n warnings.warn(message)\n\n return openmp_supported" + }, + { + "id": "scikit-learn/sklearn._build_utils.openmp_helpers/get_openmp_flag", + "name": "get_openmp_flag", + "qname": "sklearn._build_utils.openmp_helpers.get_openmp_flag", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn._build_utils.openmp_helpers/get_openmp_flag/compiler", + "name": "compiler", + "qname": "sklearn._build_utils.openmp_helpers.get_openmp_flag.compiler", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def get_openmp_flag(compiler):\n if hasattr(compiler, 'compiler'):\n compiler = compiler.compiler[0]\n else:\n compiler = compiler.__class__.__name__\n\n if sys.platform == \"win32\" and ('icc' in compiler or 'icl' in compiler):\n return ['/Qopenmp']\n elif sys.platform == \"win32\":\n return ['/openmp']\n elif sys.platform in (\"darwin\", \"linux\") and \"icc\" in compiler:\n return ['-qopenmp']\n elif sys.platform == \"darwin\" and 'openmp' in os.getenv('CPPFLAGS', ''):\n # -fopenmp can't be passed as compile flag when using Apple-clang.\n # OpenMP support has to be enabled during preprocessing.\n #\n # For example, our macOS wheel build jobs use the following environment\n # variables to build with Apple-clang and the brew installed \"libomp\":\n #\n # export CPPFLAGS=\"$CPPFLAGS -Xpreprocessor -fopenmp\"\n # export CFLAGS=\"$CFLAGS -I/usr/local/opt/libomp/include\"\n # export CXXFLAGS=\"$CXXFLAGS -I/usr/local/opt/libomp/include\"\n # export LDFLAGS=\"$LDFLAGS -Wl,-rpath,/usr/local/opt/libomp/lib\n # -L/usr/local/opt/libomp/lib -lomp\"\n return []\n # Default flag for GCC and clang:\n return ['-fopenmp']" + }, + { + "id": "scikit-learn/sklearn._build_utils.pre_build_helpers/_get_compiler", + "name": "_get_compiler", + "qname": "sklearn._build_utils.pre_build_helpers._get_compiler", + "decorators": [], + "parameters": [], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Get a compiler equivalent to the one that will be used to build sklearn\n\nHandles compiler specified as follows:\n - python setup.py build_ext --compiler=\n - CC= python setup.py build_ext", + "docstring": "Get a compiler equivalent to the one that will be used to build sklearn\n\nHandles compiler specified as follows:\n - python setup.py build_ext --compiler=\n - CC= python setup.py build_ext", + "code": "def _get_compiler():\n \"\"\"Get a compiler equivalent to the one that will be used to build sklearn\n\n Handles compiler specified as follows:\n - python setup.py build_ext --compiler=\n - CC= python setup.py build_ext\n \"\"\"\n dist = Distribution({'script_name': os.path.basename(sys.argv[0]),\n 'script_args': sys.argv[1:],\n 'cmdclass': {'config_cc': config_cc}})\n dist.parse_config_files()\n dist.parse_command_line()\n\n cmd_opts = dist.command_options.get('build_ext')\n if cmd_opts is not None and 'compiler' in cmd_opts:\n compiler = cmd_opts['compiler'][1]\n else:\n compiler = None\n\n ccompiler = new_compiler(compiler=compiler)\n customize_compiler(ccompiler)\n\n return ccompiler" + }, + { + "id": "scikit-learn/sklearn._build_utils.pre_build_helpers/basic_check_build", + "name": "basic_check_build", + "qname": "sklearn._build_utils.pre_build_helpers.basic_check_build", + "decorators": [], + "parameters": [], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check basic compilation and linking of C code", + "docstring": "Check basic compilation and linking of C code", + "code": "def basic_check_build():\n \"\"\"Check basic compilation and linking of C code\"\"\"\n if \"PYODIDE_PACKAGE_ABI\" in os.environ:\n # The following check won't work in pyodide\n return\n code = textwrap.dedent(\n \"\"\"\\\n #include \n int main(void) {\n return 0;\n }\n \"\"\")\n compile_test_program(code)" + }, + { + "id": "scikit-learn/sklearn._build_utils.pre_build_helpers/compile_test_program", + "name": "compile_test_program", + "qname": "sklearn._build_utils.pre_build_helpers.compile_test_program", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn._build_utils.pre_build_helpers/compile_test_program/code", + "name": "code", + "qname": "sklearn._build_utils.pre_build_helpers.compile_test_program.code", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn._build_utils.pre_build_helpers/compile_test_program/extra_preargs", + "name": "extra_preargs", + "qname": "sklearn._build_utils.pre_build_helpers.compile_test_program.extra_preargs", + "default_value": "[]", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn._build_utils.pre_build_helpers/compile_test_program/extra_postargs", + "name": "extra_postargs", + "qname": "sklearn._build_utils.pre_build_helpers.compile_test_program.extra_postargs", + "default_value": "[]", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check that some C code can be compiled and run", + "docstring": "Check that some C code can be compiled and run", + "code": "def compile_test_program(code, extra_preargs=[], extra_postargs=[]):\n \"\"\"Check that some C code can be compiled and run\"\"\"\n ccompiler = _get_compiler()\n\n # extra_(pre/post)args can be a callable to make it possible to get its\n # value from the compiler\n if callable(extra_preargs):\n extra_preargs = extra_preargs(ccompiler)\n if callable(extra_postargs):\n extra_postargs = extra_postargs(ccompiler)\n\n start_dir = os.path.abspath('.')\n\n with tempfile.TemporaryDirectory() as tmp_dir:\n try:\n os.chdir(tmp_dir)\n\n # Write test program\n with open('test_program.c', 'w') as f:\n f.write(code)\n\n os.mkdir('objects')\n\n # Compile, test program\n ccompiler.compile(['test_program.c'], output_dir='objects',\n extra_postargs=extra_postargs)\n\n # Link test program\n objects = glob.glob(\n os.path.join('objects', '*' + ccompiler.obj_extension))\n ccompiler.link_executable(objects, 'test_program',\n extra_preargs=extra_preargs,\n extra_postargs=extra_postargs)\n\n if \"PYTHON_CROSSENV\" not in os.environ:\n # Run test program if not cross compiling\n # will raise a CalledProcessError if return code was non-zero\n output = subprocess.check_output('./test_program')\n output = output.decode(\n sys.stdout.encoding or 'utf-8').splitlines()\n else:\n # Return an empty output if we are cross compiling\n # as we cannot run the test_program\n output = []\n except Exception:\n raise\n finally:\n os.chdir(start_dir)\n\n return output" + }, + { + "id": "scikit-learn/sklearn._build_utils/_check_cython_version", + "name": "_check_cython_version", + "qname": "sklearn._build_utils._check_cython_version", + "decorators": [], + "parameters": [], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _check_cython_version():\n message = ('Please install Cython with a version >= {0} in order '\n 'to build a scikit-learn from source.').format(\n CYTHON_MIN_VERSION)\n try:\n import Cython\n except ModuleNotFoundError as e:\n # Re-raise with more informative error message instead:\n raise ModuleNotFoundError(message) from e\n\n if LooseVersion(Cython.__version__) < CYTHON_MIN_VERSION:\n message += (' The current version of Cython is {} installed in {}.'\n .format(Cython.__version__, Cython.__path__))\n raise ValueError(message)" + }, + { + "id": "scikit-learn/sklearn._build_utils/cythonize_extensions", + "name": "cythonize_extensions", + "qname": "sklearn._build_utils.cythonize_extensions", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn._build_utils/cythonize_extensions/top_path", + "name": "top_path", + "qname": "sklearn._build_utils.cythonize_extensions.top_path", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn._build_utils/cythonize_extensions/config", + "name": "config", + "qname": "sklearn._build_utils.cythonize_extensions.config", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check that a recent Cython is available and cythonize extensions", + "docstring": "Check that a recent Cython is available and cythonize extensions", + "code": "def cythonize_extensions(top_path, config):\n \"\"\"Check that a recent Cython is available and cythonize extensions\"\"\"\n _check_cython_version()\n from Cython.Build import cythonize\n\n # Fast fail before cythonization if compiler fails compiling basic test\n # code even without OpenMP\n basic_check_build()\n\n # check simple compilation with OpenMP. If it fails scikit-learn will be\n # built without OpenMP and the test test_openmp_supported in the test suite\n # will fail.\n # `check_openmp_support` compiles a small test program to see if the\n # compilers are properly configured to build with OpenMP. This is expensive\n # and we only want to call this function once.\n # The result of this check is cached as a private attribute on the sklearn\n # module (only at build-time) to be used twice:\n # - First to set the value of SKLEARN_OPENMP_PARALLELISM_ENABLED, the\n # cython build-time variable passed to the cythonize() call.\n # - Then in the build_ext subclass defined in the top-level setup.py file\n # to actually build the compiled extensions with OpenMP flags if needed.\n sklearn._OPENMP_SUPPORTED = check_openmp_support()\n\n n_jobs = 1\n with contextlib.suppress(ImportError):\n import joblib\n if LooseVersion(joblib.__version__) > LooseVersion(\"0.13.0\"):\n # earlier joblib versions don't account for CPU affinity\n # constraints, and may over-estimate the number of available\n # CPU particularly in CI (cf loky#114)\n n_jobs = joblib.cpu_count()\n\n config.ext_modules = cythonize(\n config.ext_modules,\n nthreads=n_jobs,\n compile_time_env={\n 'SKLEARN_OPENMP_PARALLELISM_ENABLED': sklearn._OPENMP_SUPPORTED},\n compiler_directives={'language_level': 3})" + }, + { + "id": "scikit-learn/sklearn._build_utils/gen_from_templates", + "name": "gen_from_templates", + "qname": "sklearn._build_utils.gen_from_templates", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn._build_utils/gen_from_templates/templates", + "name": "templates", + "qname": "sklearn._build_utils.gen_from_templates.templates", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn._build_utils/gen_from_templates/top_path", + "name": "top_path", + "qname": "sklearn._build_utils.gen_from_templates.top_path", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate cython files from a list of templates", + "docstring": "Generate cython files from a list of templates", + "code": "def gen_from_templates(templates, top_path):\n \"\"\"Generate cython files from a list of templates\"\"\"\n # Lazy import because cython is not a runtime dependency.\n from Cython import Tempita\n\n for template in templates:\n outfile = template.replace('.tp', '')\n\n # if the template is not updated, no need to output the cython file\n if not (os.path.exists(outfile) and\n os.stat(template).st_mtime < os.stat(outfile).st_mtime):\n\n with open(template, \"r\") as f:\n tmpl = f.read()\n\n tmpl_ = Tempita.sub(tmpl)\n\n with open(outfile, \"w\") as f:\n f.write(tmpl_)" + }, + { + "id": "scikit-learn/sklearn._config/config_context", + "name": "config_context", + "qname": "sklearn._config.config_context", + "decorators": ["contextmanager"], + "parameters": [ + { + "id": "scikit-learn/sklearn._config/config_context/new_config", + "name": "new_config", + "qname": "sklearn._config.config_context.new_config", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Context manager for global scikit-learn configuration", + "docstring": "Context manager for global scikit-learn configuration\n\nParameters\n----------\nassume_finite : bool, default=False\n If True, validation for finiteness will be skipped,\n saving time, but leading to potential crashes. If\n False, validation for finiteness will be performed,\n avoiding error. Global default: False.\n\nworking_memory : int, default=1024\n If set, scikit-learn will attempt to limit the size of temporary arrays\n to this number of MiB (per job when parallelised), often saving both\n computation time and memory on expensive operations that can be\n performed in chunks. Global default: 1024.\n\nprint_changed_only : bool, default=True\n If True, only the parameters that were set to non-default\n values will be printed when printing an estimator. For example,\n ``print(SVC())`` while True will only print 'SVC()', but would print\n 'SVC(C=1.0, cache_size=200, ...)' with all the non-changed parameters\n when False. Default is True.\n\n .. versionchanged:: 0.23\n Default changed from False to True.\n\ndisplay : {'text', 'diagram'}, default='text'\n If 'diagram', estimators will be displayed as a diagram in a Jupyter\n lab or notebook context. If 'text', estimators will be displayed as\n text. Default is 'text'.\n\n .. versionadded:: 0.23\n\nNotes\n-----\nAll settings, not just those presently modified, will be returned to\ntheir previous values when the context manager is exited. This is not\nthread-safe.\n\nExamples\n--------\n>>> import sklearn\n>>> from sklearn.utils.validation import assert_all_finite\n>>> with sklearn.config_context(assume_finite=True):\n... assert_all_finite([float('nan')])\n>>> with sklearn.config_context(assume_finite=True):\n... with sklearn.config_context(assume_finite=False):\n... assert_all_finite([float('nan')])\nTraceback (most recent call last):\n...\nValueError: Input contains NaN, ...\n\nSee Also\n--------\nset_config : Set global scikit-learn configuration.\nget_config : Retrieve current values of the global configuration.", + "code": "@contextmanager\ndef config_context(**new_config):\n \"\"\"Context manager for global scikit-learn configuration\n\n Parameters\n ----------\n assume_finite : bool, default=False\n If True, validation for finiteness will be skipped,\n saving time, but leading to potential crashes. If\n False, validation for finiteness will be performed,\n avoiding error. Global default: False.\n\n working_memory : int, default=1024\n If set, scikit-learn will attempt to limit the size of temporary arrays\n to this number of MiB (per job when parallelised), often saving both\n computation time and memory on expensive operations that can be\n performed in chunks. Global default: 1024.\n\n print_changed_only : bool, default=True\n If True, only the parameters that were set to non-default\n values will be printed when printing an estimator. For example,\n ``print(SVC())`` while True will only print 'SVC()', but would print\n 'SVC(C=1.0, cache_size=200, ...)' with all the non-changed parameters\n when False. Default is True.\n\n .. versionchanged:: 0.23\n Default changed from False to True.\n\n display : {'text', 'diagram'}, default='text'\n If 'diagram', estimators will be displayed as a diagram in a Jupyter\n lab or notebook context. If 'text', estimators will be displayed as\n text. Default is 'text'.\n\n .. versionadded:: 0.23\n\n Notes\n -----\n All settings, not just those presently modified, will be returned to\n their previous values when the context manager is exited. This is not\n thread-safe.\n\n Examples\n --------\n >>> import sklearn\n >>> from sklearn.utils.validation import assert_all_finite\n >>> with sklearn.config_context(assume_finite=True):\n ... assert_all_finite([float('nan')])\n >>> with sklearn.config_context(assume_finite=True):\n ... with sklearn.config_context(assume_finite=False):\n ... assert_all_finite([float('nan')])\n Traceback (most recent call last):\n ...\n ValueError: Input contains NaN, ...\n\n See Also\n --------\n set_config : Set global scikit-learn configuration.\n get_config : Retrieve current values of the global configuration.\n \"\"\"\n old_config = get_config().copy()\n set_config(**new_config)\n\n try:\n yield\n finally:\n set_config(**old_config)" + }, + { + "id": "scikit-learn/sklearn._config/get_config", + "name": "get_config", + "qname": "sklearn._config.get_config", + "decorators": [], + "parameters": [], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Retrieve current values for configuration set by :func:`set_config`", + "docstring": "Retrieve current values for configuration set by :func:`set_config`\n\nReturns\n-------\nconfig : dict\n Keys are parameter names that can be passed to :func:`set_config`.\n\nSee Also\n--------\nconfig_context : Context manager for global scikit-learn configuration.\nset_config : Set global scikit-learn configuration.", + "code": "def get_config():\n \"\"\"Retrieve current values for configuration set by :func:`set_config`\n\n Returns\n -------\n config : dict\n Keys are parameter names that can be passed to :func:`set_config`.\n\n See Also\n --------\n config_context : Context manager for global scikit-learn configuration.\n set_config : Set global scikit-learn configuration.\n \"\"\"\n return _global_config.copy()" + }, + { + "id": "scikit-learn/sklearn._config/set_config", + "name": "set_config", + "qname": "sklearn._config.set_config", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn._config/set_config/assume_finite", + "name": "assume_finite", + "qname": "sklearn._config.set_config.assume_finite", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "None", + "description": "If True, validation for finiteness will be skipped,\nsaving time, but leading to potential crashes. If\nFalse, validation for finiteness will be performed,\navoiding error. Global default: False.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn._config/set_config/working_memory", + "name": "working_memory", + "qname": "sklearn._config.set_config.working_memory", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "If set, scikit-learn will attempt to limit the size of temporary arrays\nto this number of MiB (per job when parallelised), often saving both\ncomputation time and memory on expensive operations that can be\nperformed in chunks. Global default: 1024.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn._config/set_config/print_changed_only", + "name": "print_changed_only", + "qname": "sklearn._config.set_config.print_changed_only", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "None", + "description": "If True, only the parameters that were set to non-default\nvalues will be printed when printing an estimator. For example,\n``print(SVC())`` while True will only print 'SVC()' while the default\nbehaviour would be to print 'SVC(C=1.0, cache_size=200, ...)' with\nall the non-changed parameters.\n\n.. versionadded:: 0.21" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn._config/set_config/display", + "name": "display", + "qname": "sklearn._config.set_config.display", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'text', 'diagram'}", + "default_value": "None", + "description": "If 'diagram', estimators will be displayed as a diagram in a Jupyter\nlab or notebook context. If 'text', estimators will be displayed as\ntext. Default is 'text'.\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "EnumType", + "values": ["diagram", "text"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Set global scikit-learn configuration\n\n.. versionadded:: 0.19", + "docstring": "Set global scikit-learn configuration\n\n.. versionadded:: 0.19\n\nParameters\n----------\nassume_finite : bool, default=None\n If True, validation for finiteness will be skipped,\n saving time, but leading to potential crashes. If\n False, validation for finiteness will be performed,\n avoiding error. Global default: False.\n\n .. versionadded:: 0.19\n\nworking_memory : int, default=None\n If set, scikit-learn will attempt to limit the size of temporary arrays\n to this number of MiB (per job when parallelised), often saving both\n computation time and memory on expensive operations that can be\n performed in chunks. Global default: 1024.\n\n .. versionadded:: 0.20\n\nprint_changed_only : bool, default=None\n If True, only the parameters that were set to non-default\n values will be printed when printing an estimator. For example,\n ``print(SVC())`` while True will only print 'SVC()' while the default\n behaviour would be to print 'SVC(C=1.0, cache_size=200, ...)' with\n all the non-changed parameters.\n\n .. versionadded:: 0.21\n\ndisplay : {'text', 'diagram'}, default=None\n If 'diagram', estimators will be displayed as a diagram in a Jupyter\n lab or notebook context. If 'text', estimators will be displayed as\n text. Default is 'text'.\n\n .. versionadded:: 0.23\n\nSee Also\n--------\nconfig_context : Context manager for global scikit-learn configuration.\nget_config : Retrieve current values of the global configuration.", + "code": "def set_config(assume_finite=None, working_memory=None,\n print_changed_only=None, display=None):\n \"\"\"Set global scikit-learn configuration\n\n .. versionadded:: 0.19\n\n Parameters\n ----------\n assume_finite : bool, default=None\n If True, validation for finiteness will be skipped,\n saving time, but leading to potential crashes. If\n False, validation for finiteness will be performed,\n avoiding error. Global default: False.\n\n .. versionadded:: 0.19\n\n working_memory : int, default=None\n If set, scikit-learn will attempt to limit the size of temporary arrays\n to this number of MiB (per job when parallelised), often saving both\n computation time and memory on expensive operations that can be\n performed in chunks. Global default: 1024.\n\n .. versionadded:: 0.20\n\n print_changed_only : bool, default=None\n If True, only the parameters that were set to non-default\n values will be printed when printing an estimator. For example,\n ``print(SVC())`` while True will only print 'SVC()' while the default\n behaviour would be to print 'SVC(C=1.0, cache_size=200, ...)' with\n all the non-changed parameters.\n\n .. versionadded:: 0.21\n\n display : {'text', 'diagram'}, default=None\n If 'diagram', estimators will be displayed as a diagram in a Jupyter\n lab or notebook context. If 'text', estimators will be displayed as\n text. Default is 'text'.\n\n .. versionadded:: 0.23\n\n See Also\n --------\n config_context : Context manager for global scikit-learn configuration.\n get_config : Retrieve current values of the global configuration.\n \"\"\"\n if assume_finite is not None:\n _global_config['assume_finite'] = assume_finite\n if working_memory is not None:\n _global_config['working_memory'] = working_memory\n if print_changed_only is not None:\n _global_config['print_changed_only'] = print_changed_only\n if display is not None:\n _global_config['display'] = display" + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/deviance", + "name": "deviance", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.deviance", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/deviance/self", + "name": "self", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.deviance.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/deviance/y", + "name": "y", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.deviance.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/deviance/y_pred", + "name": "y_pred", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.deviance.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples,)", + "default_value": "", + "description": "Predicted mean." + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/deviance/weights", + "name": "weights", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.deviance.weights", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{int, array of shape (n_samples,)}", + "default_value": "1", + "description": "Weights or exposure to which variance is inverse proportional." + }, + "type": { + "kind": "EnumType", + "values": [] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the deviance.\n\nThe deviance is a weighted sum of the per sample unit deviances,\n:math:`D = \\sum_i s_i \\cdot d(y_i, y_\\textrm{pred}_i)`\nwith weights :math:`s_i` and unit deviance\n:math:`d(y,y_\\textrm{pred})`.\nIn terms of the log-likelihood it is :math:`D = -2\\phi\\cdot\n\\left(loglike(y,y_\\textrm{pred},\\frac{phi}{s})\n- loglike(y,y,\\frac{phi}{s})\\right)`.", + "docstring": "Compute the deviance.\n\nThe deviance is a weighted sum of the per sample unit deviances,\n:math:`D = \\sum_i s_i \\cdot d(y_i, y_\\textrm{pred}_i)`\nwith weights :math:`s_i` and unit deviance\n:math:`d(y,y_\\textrm{pred})`.\nIn terms of the log-likelihood it is :math:`D = -2\\phi\\cdot\n\\left(loglike(y,y_\\textrm{pred},\\frac{phi}{s})\n- loglike(y,y,\\frac{phi}{s})\\right)`.\n\nParameters\n----------\ny : array of shape (n_samples,)\n Target values.\n\ny_pred : array of shape (n_samples,)\n Predicted mean.\n\nweights : {int, array of shape (n_samples,)}, default=1\n Weights or exposure to which variance is inverse proportional.", + "code": " def deviance(self, y, y_pred, weights=1):\n r\"\"\"Compute the deviance.\n\n The deviance is a weighted sum of the per sample unit deviances,\n :math:`D = \\sum_i s_i \\cdot d(y_i, y_\\textrm{pred}_i)`\n with weights :math:`s_i` and unit deviance\n :math:`d(y,y_\\textrm{pred})`.\n In terms of the log-likelihood it is :math:`D = -2\\phi\\cdot\n \\left(loglike(y,y_\\textrm{pred},\\frac{phi}{s})\n - loglike(y,y,\\frac{phi}{s})\\right)`.\n\n Parameters\n ----------\n y : array of shape (n_samples,)\n Target values.\n\n y_pred : array of shape (n_samples,)\n Predicted mean.\n\n weights : {int, array of shape (n_samples,)}, default=1\n Weights or exposure to which variance is inverse proportional.\n \"\"\"\n return np.sum(weights * self.unit_deviance(y, y_pred))" + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/deviance_derivative", + "name": "deviance_derivative", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.deviance_derivative", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/deviance_derivative/self", + "name": "self", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.deviance_derivative.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/deviance_derivative/y", + "name": "y", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.deviance_derivative.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array, shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array" + }, + { + "kind": "NamedType", + "name": "shape (n_samples,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/deviance_derivative/y_pred", + "name": "y_pred", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.deviance_derivative.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array, shape (n_samples,)", + "default_value": "", + "description": "Predicted mean." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array" + }, + { + "kind": "NamedType", + "name": "shape (n_samples,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/deviance_derivative/weights", + "name": "weights", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.deviance_derivative.weights", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{int, array of shape (n_samples,)}", + "default_value": "1", + "description": "Weights or exposure to which variance is inverse proportional." + }, + "type": { + "kind": "EnumType", + "values": [] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the derivative of the deviance w.r.t. y_pred.\n\nIt gives :math:`\\frac{\\partial}{\\partial y_\\textrm{pred}}\nD(y, \\y_\\textrm{pred}; weights)`.", + "docstring": "Compute the derivative of the deviance w.r.t. y_pred.\n\nIt gives :math:`\\frac{\\partial}{\\partial y_\\textrm{pred}}\nD(y, \\y_\\textrm{pred}; weights)`.\n\nParameters\n----------\ny : array, shape (n_samples,)\n Target values.\n\ny_pred : array, shape (n_samples,)\n Predicted mean.\n\nweights : {int, array of shape (n_samples,)}, default=1\n Weights or exposure to which variance is inverse proportional.", + "code": " def deviance_derivative(self, y, y_pred, weights=1):\n r\"\"\"Compute the derivative of the deviance w.r.t. y_pred.\n\n It gives :math:`\\frac{\\partial}{\\partial y_\\textrm{pred}}\n D(y, \\y_\\textrm{pred}; weights)`.\n\n Parameters\n ----------\n y : array, shape (n_samples,)\n Target values.\n\n y_pred : array, shape (n_samples,)\n Predicted mean.\n\n weights : {int, array of shape (n_samples,)}, default=1\n Weights or exposure to which variance is inverse proportional.\n \"\"\"\n return weights * self.unit_deviance_derivative(y, y_pred)" + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/in_y_range", + "name": "in_y_range", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.in_y_range", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/in_y_range/self", + "name": "self", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.in_y_range.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/in_y_range/y", + "name": "y", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.in_y_range.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns ``True`` if y is in the valid range of Y~EDM.", + "docstring": "Returns ``True`` if y is in the valid range of Y~EDM.\n\nParameters\n----------\ny : array of shape (n_samples,)\n Target values.", + "code": " def in_y_range(self, y):\n \"\"\"Returns ``True`` if y is in the valid range of Y~EDM.\n\n Parameters\n ----------\n y : array of shape (n_samples,)\n Target values.\n \"\"\"\n # Note that currently supported distributions have +inf upper bound\n\n if not isinstance(self._lower_bound, DistributionBoundary):\n raise TypeError('_lower_bound attribute must be of type '\n 'DistributionBoundary')\n\n if self._lower_bound.inclusive:\n return np.greater_equal(y, self._lower_bound.value)\n else:\n return np.greater(y, self._lower_bound.value)" + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/unit_deviance", + "name": "unit_deviance", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.unit_deviance", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/unit_deviance/self", + "name": "self", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.unit_deviance.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/unit_deviance/y", + "name": "y", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.unit_deviance.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/unit_deviance/y_pred", + "name": "y_pred", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.unit_deviance.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples,)", + "default_value": "", + "description": "Predicted mean." + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/unit_deviance/check_input", + "name": "check_input", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.unit_deviance.check_input", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True raise an exception on invalid y or y_pred values, otherwise\nthey will be propagated as NaN." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the unit deviance.\n\nThe unit_deviance :math:`d(y,y_\\textrm{pred})` can be defined by the\nlog-likelihood as\n:math:`d(y,y_\\textrm{pred}) = -2\\phi\\cdot\n\\left(loglike(y,y_\\textrm{pred},\\phi) - loglike(y,y,\\phi)\\right).`", + "docstring": "Compute the unit deviance.\n\nThe unit_deviance :math:`d(y,y_\\textrm{pred})` can be defined by the\nlog-likelihood as\n:math:`d(y,y_\\textrm{pred}) = -2\\phi\\cdot\n\\left(loglike(y,y_\\textrm{pred},\\phi) - loglike(y,y,\\phi)\\right).`\n\nParameters\n----------\ny : array of shape (n_samples,)\n Target values.\n\ny_pred : array of shape (n_samples,)\n Predicted mean.\n\ncheck_input : bool, default=False\n If True raise an exception on invalid y or y_pred values, otherwise\n they will be propagated as NaN.\nReturns\n-------\ndeviance: array of shape (n_samples,)\n Computed deviance", + "code": " @abstractmethod\n def unit_deviance(self, y, y_pred, check_input=False):\n r\"\"\"Compute the unit deviance.\n\n The unit_deviance :math:`d(y,y_\\textrm{pred})` can be defined by the\n log-likelihood as\n :math:`d(y,y_\\textrm{pred}) = -2\\phi\\cdot\n \\left(loglike(y,y_\\textrm{pred},\\phi) - loglike(y,y,\\phi)\\right).`\n\n Parameters\n ----------\n y : array of shape (n_samples,)\n Target values.\n\n y_pred : array of shape (n_samples,)\n Predicted mean.\n\n check_input : bool, default=False\n If True raise an exception on invalid y or y_pred values, otherwise\n they will be propagated as NaN.\n Returns\n -------\n deviance: array of shape (n_samples,)\n Computed deviance\n \"\"\"" + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/unit_deviance_derivative", + "name": "unit_deviance_derivative", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.unit_deviance_derivative", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/unit_deviance_derivative/self", + "name": "self", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.unit_deviance_derivative.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/unit_deviance_derivative/y", + "name": "y", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.unit_deviance_derivative.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/unit_deviance_derivative/y_pred", + "name": "y_pred", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.unit_deviance_derivative.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples,)", + "default_value": "", + "description": "Predicted mean." + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the derivative of the unit deviance w.r.t. y_pred.\n\nThe derivative of the unit deviance is given by\n:math:`\\frac{\\partial}{\\partialy_\\textrm{pred}}d(y,y_\\textrm{pred})\n = -2\\frac{y-y_\\textrm{pred}}{v(y_\\textrm{pred})}`\nwith unit variance :math:`v(y_\\textrm{pred})`.", + "docstring": "Compute the derivative of the unit deviance w.r.t. y_pred.\n\nThe derivative of the unit deviance is given by\n:math:`\\frac{\\partial}{\\partialy_\\textrm{pred}}d(y,y_\\textrm{pred})\n = -2\\frac{y-y_\\textrm{pred}}{v(y_\\textrm{pred})}`\nwith unit variance :math:`v(y_\\textrm{pred})`.\n\nParameters\n----------\ny : array of shape (n_samples,)\n Target values.\n\ny_pred : array of shape (n_samples,)\n Predicted mean.", + "code": " def unit_deviance_derivative(self, y, y_pred):\n r\"\"\"Compute the derivative of the unit deviance w.r.t. y_pred.\n\n The derivative of the unit deviance is given by\n :math:`\\frac{\\partial}{\\partialy_\\textrm{pred}}d(y,y_\\textrm{pred})\n = -2\\frac{y-y_\\textrm{pred}}{v(y_\\textrm{pred})}`\n with unit variance :math:`v(y_\\textrm{pred})`.\n\n Parameters\n ----------\n y : array of shape (n_samples,)\n Target values.\n\n y_pred : array of shape (n_samples,)\n Predicted mean.\n \"\"\"\n return -2 * (y - y_pred) / self.unit_variance(y_pred)" + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/unit_variance", + "name": "unit_variance", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.unit_variance", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/unit_variance/self", + "name": "self", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.unit_variance.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/ExponentialDispersionModel/unit_variance/y_pred", + "name": "y_pred", + "qname": "sklearn._loss.glm_distribution.ExponentialDispersionModel.unit_variance.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples,)", + "default_value": "", + "description": "Predicted mean." + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the unit variance function.\n\nThe unit variance :math:`v(y_\\textrm{pred})` determines the variance as\na function of the mean :math:`y_\\textrm{pred}` by\n:math:`\\mathrm{Var}[Y_i] = \\phi/s_i*v(y_\\textrm{pred}_i)`.\nIt can also be derived from the unit deviance\n:math:`d(y,y_\\textrm{pred})` as\n\n.. math:: v(y_\\textrm{pred}) = \\frac{2}{\n \\frac{\\partial^2 d(y,y_\\textrm{pred})}{\n \\partialy_\\textrm{pred}^2}}\\big|_{y=y_\\textrm{pred}}\n\nSee also :func:`variance`.", + "docstring": "Compute the unit variance function.\n\nThe unit variance :math:`v(y_\\textrm{pred})` determines the variance as\na function of the mean :math:`y_\\textrm{pred}` by\n:math:`\\mathrm{Var}[Y_i] = \\phi/s_i*v(y_\\textrm{pred}_i)`.\nIt can also be derived from the unit deviance\n:math:`d(y,y_\\textrm{pred})` as\n\n.. math:: v(y_\\textrm{pred}) = \\frac{2}{\n \\frac{\\partial^2 d(y,y_\\textrm{pred})}{\n \\partialy_\\textrm{pred}^2}}\\big|_{y=y_\\textrm{pred}}\n\nSee also :func:`variance`.\n\nParameters\n----------\ny_pred : array of shape (n_samples,)\n Predicted mean.", + "code": " @abstractmethod\n def unit_variance(self, y_pred):\n r\"\"\"Compute the unit variance function.\n\n The unit variance :math:`v(y_\\textrm{pred})` determines the variance as\n a function of the mean :math:`y_\\textrm{pred}` by\n :math:`\\mathrm{Var}[Y_i] = \\phi/s_i*v(y_\\textrm{pred}_i)`.\n It can also be derived from the unit deviance\n :math:`d(y,y_\\textrm{pred})` as\n\n .. math:: v(y_\\textrm{pred}) = \\frac{2}{\n \\frac{\\partial^2 d(y,y_\\textrm{pred})}{\n \\partialy_\\textrm{pred}^2}}\\big|_{y=y_\\textrm{pred}}\n\n See also :func:`variance`.\n\n Parameters\n ----------\n y_pred : array of shape (n_samples,)\n Predicted mean.\n \"\"\"" + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/GammaDistribution/__init__", + "name": "__init__", + "qname": "sklearn._loss.glm_distribution.GammaDistribution.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn._loss.glm_distribution/GammaDistribution/__init__/self", + "name": "self", + "qname": "sklearn._loss.glm_distribution.GammaDistribution.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Class for the Gamma distribution.", + "docstring": "", + "code": " def __init__(self):\n super().__init__(power=2)" + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/InverseGaussianDistribution/__init__", + "name": "__init__", + "qname": "sklearn._loss.glm_distribution.InverseGaussianDistribution.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn._loss.glm_distribution/InverseGaussianDistribution/__init__/self", + "name": "self", + "qname": "sklearn._loss.glm_distribution.InverseGaussianDistribution.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Class for the scaled InverseGaussianDistribution distribution.", + "docstring": "", + "code": " def __init__(self):\n super().__init__(power=3)" + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/NormalDistribution/__init__", + "name": "__init__", + "qname": "sklearn._loss.glm_distribution.NormalDistribution.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn._loss.glm_distribution/NormalDistribution/__init__/self", + "name": "self", + "qname": "sklearn._loss.glm_distribution.NormalDistribution.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Class for the Normal (aka Gaussian) distribution.", + "docstring": "", + "code": " def __init__(self):\n super().__init__(power=0)" + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/PoissonDistribution/__init__", + "name": "__init__", + "qname": "sklearn._loss.glm_distribution.PoissonDistribution.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn._loss.glm_distribution/PoissonDistribution/__init__/self", + "name": "self", + "qname": "sklearn._loss.glm_distribution.PoissonDistribution.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Class for the scaled Poisson distribution.", + "docstring": "", + "code": " def __init__(self):\n super().__init__(power=1)" + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/TweedieDistribution/__init__", + "name": "__init__", + "qname": "sklearn._loss.glm_distribution.TweedieDistribution.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn._loss.glm_distribution/TweedieDistribution/__init__/self", + "name": "self", + "qname": "sklearn._loss.glm_distribution.TweedieDistribution.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/TweedieDistribution/__init__/power", + "name": "power", + "qname": "sklearn._loss.glm_distribution.TweedieDistribution.__init__.power", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0", + "description": "The variance power of the `unit_variance`\n:math:`v(y_\\textrm{pred}) = y_\\textrm{pred}^{power}`.\nFor ``0=1.')\n elif 1 <= power < 2:\n # Poisson or Compound Poisson distribution\n self._lower_bound = DistributionBoundary(0, inclusive=True)\n elif power >= 2:\n # Gamma, Positive Stable, Inverse Gaussian distributions\n self._lower_bound = DistributionBoundary(0, inclusive=False)\n else: # pragma: no cover\n # this branch should be unreachable.\n raise ValueError\n\n self._power = power" + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/TweedieDistribution/unit_deviance", + "name": "unit_deviance", + "qname": "sklearn._loss.glm_distribution.TweedieDistribution.unit_deviance", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn._loss.glm_distribution/TweedieDistribution/unit_deviance/self", + "name": "self", + "qname": "sklearn._loss.glm_distribution.TweedieDistribution.unit_deviance.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/TweedieDistribution/unit_deviance/y", + "name": "y", + "qname": "sklearn._loss.glm_distribution.TweedieDistribution.unit_deviance.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/TweedieDistribution/unit_deviance/y_pred", + "name": "y_pred", + "qname": "sklearn._loss.glm_distribution.TweedieDistribution.unit_deviance.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples,)", + "default_value": "", + "description": "Predicted mean." + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/TweedieDistribution/unit_deviance/check_input", + "name": "check_input", + "qname": "sklearn._loss.glm_distribution.TweedieDistribution.unit_deviance.check_input", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True raise an exception on invalid y or y_pred values, otherwise\nthey will be propagated as NaN." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the unit deviance.\n\nThe unit_deviance :math:`d(y,y_\\textrm{pred})` can be defined by the\nlog-likelihood as\n:math:`d(y,y_\\textrm{pred}) = -2\\phi\\cdot\n\\left(loglike(y,y_\\textrm{pred},\\phi) - loglike(y,y,\\phi)\\right).`", + "docstring": "Compute the unit deviance.\n\nThe unit_deviance :math:`d(y,y_\\textrm{pred})` can be defined by the\nlog-likelihood as\n:math:`d(y,y_\\textrm{pred}) = -2\\phi\\cdot\n\\left(loglike(y,y_\\textrm{pred},\\phi) - loglike(y,y,\\phi)\\right).`\n\nParameters\n----------\ny : array of shape (n_samples,)\n Target values.\n\ny_pred : array of shape (n_samples,)\n Predicted mean.\n\ncheck_input : bool, default=False\n If True raise an exception on invalid y or y_pred values, otherwise\n they will be propagated as NaN.\nReturns\n-------\ndeviance: array of shape (n_samples,)\n Computed deviance", + "code": " def unit_deviance(self, y, y_pred, check_input=False):\n r\"\"\"Compute the unit deviance.\n\n The unit_deviance :math:`d(y,y_\\textrm{pred})` can be defined by the\n log-likelihood as\n :math:`d(y,y_\\textrm{pred}) = -2\\phi\\cdot\n \\left(loglike(y,y_\\textrm{pred},\\phi) - loglike(y,y,\\phi)\\right).`\n\n Parameters\n ----------\n y : array of shape (n_samples,)\n Target values.\n\n y_pred : array of shape (n_samples,)\n Predicted mean.\n\n check_input : bool, default=False\n If True raise an exception on invalid y or y_pred values, otherwise\n they will be propagated as NaN.\n Returns\n -------\n deviance: array of shape (n_samples,)\n Computed deviance\n \"\"\"\n p = self.power\n\n if check_input:\n message = (\"Mean Tweedie deviance error with power={} can only be \"\n \"used on \".format(p))\n if p < 0:\n # 'Extreme stable', y any realy number, y_pred > 0\n if (y_pred <= 0).any():\n raise ValueError(message + \"strictly positive y_pred.\")\n elif p == 0:\n # Normal, y and y_pred can be any real number\n pass\n elif 0 < p < 1:\n raise ValueError(\"Tweedie deviance is only defined for \"\n \"power<=0 and power>=1.\")\n elif 1 <= p < 2:\n # Poisson and Compount poisson distribution, y >= 0, y_pred > 0\n if (y < 0).any() or (y_pred <= 0).any():\n raise ValueError(message + \"non-negative y and strictly \"\n \"positive y_pred.\")\n elif p >= 2:\n # Gamma and Extreme stable distribution, y and y_pred > 0\n if (y <= 0).any() or (y_pred <= 0).any():\n raise ValueError(message\n + \"strictly positive y and y_pred.\")\n else: # pragma: nocover\n # Unreachable statement\n raise ValueError\n\n if p < 0:\n # 'Extreme stable', y any realy number, y_pred > 0\n dev = 2 * (np.power(np.maximum(y, 0), 2-p) / ((1-p) * (2-p))\n - y * np.power(y_pred, 1-p) / (1-p)\n + np.power(y_pred, 2-p) / (2-p))\n\n elif p == 0:\n # Normal distribution, y and y_pred any real number\n dev = (y - y_pred)**2\n elif p < 1:\n raise ValueError(\"Tweedie deviance is only defined for power<=0 \"\n \"and power>=1.\")\n elif p == 1:\n # Poisson distribution\n dev = 2 * (xlogy(y, y/y_pred) - y + y_pred)\n elif p == 2:\n # Gamma distribution\n dev = 2 * (np.log(y_pred/y) + y/y_pred - 1)\n else:\n dev = 2 * (np.power(y, 2-p) / ((1-p) * (2-p))\n - y * np.power(y_pred, 1-p) / (1-p)\n + np.power(y_pred, 2-p) / (2-p))\n return dev" + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/TweedieDistribution/unit_variance", + "name": "unit_variance", + "qname": "sklearn._loss.glm_distribution.TweedieDistribution.unit_variance", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn._loss.glm_distribution/TweedieDistribution/unit_variance/self", + "name": "self", + "qname": "sklearn._loss.glm_distribution.TweedieDistribution.unit_variance.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn._loss.glm_distribution/TweedieDistribution/unit_variance/y_pred", + "name": "y_pred", + "qname": "sklearn._loss.glm_distribution.TweedieDistribution.unit_variance.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples,)", + "default_value": "", + "description": "Predicted mean." + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the unit variance of a Tweedie distribution\nv(y_ extrm{pred})=y_ extrm{pred}**power.", + "docstring": "Compute the unit variance of a Tweedie distribution\nv(y_ extrm{pred})=y_ extrm{pred}**power.\n\nParameters\n----------\ny_pred : array of shape (n_samples,)\n Predicted mean.", + "code": " def unit_variance(self, y_pred):\n \"\"\"Compute the unit variance of a Tweedie distribution\n v(y_\\textrm{pred})=y_\\textrm{pred}**power.\n\n Parameters\n ----------\n y_pred : array of shape (n_samples,)\n Predicted mean.\n \"\"\"\n return np.power(y_pred, self.power)" + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/__getstate__", + "name": "__getstate__", + "qname": "sklearn.base.BaseEstimator.__getstate__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/BaseEstimator/__getstate__/self", + "name": "self", + "qname": "sklearn.base.BaseEstimator.__getstate__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __getstate__(self):\n try:\n state = super().__getstate__()\n except AttributeError:\n state = self.__dict__.copy()\n\n if type(self).__module__.startswith('sklearn.'):\n return dict(state.items(), _sklearn_version=__version__)\n else:\n return state" + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/__repr__", + "name": "__repr__", + "qname": "sklearn.base.BaseEstimator.__repr__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/BaseEstimator/__repr__/self", + "name": "self", + "qname": "sklearn.base.BaseEstimator.__repr__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/__repr__/N_CHAR_MAX", + "name": "N_CHAR_MAX", + "qname": "sklearn.base.BaseEstimator.__repr__.N_CHAR_MAX", + "default_value": "700", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __repr__(self, N_CHAR_MAX=700):\n # N_CHAR_MAX is the (approximate) maximum number of non-blank\n # characters to render. We pass it as an optional parameter to ease\n # the tests.\n\n from .utils._pprint import _EstimatorPrettyPrinter\n\n N_MAX_ELEMENTS_TO_SHOW = 30 # number of elements to show in sequences\n\n # use ellipsis for sequences with a lot of elements\n pp = _EstimatorPrettyPrinter(\n compact=True, indent=1, indent_at_name=True,\n n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW)\n\n repr_ = pp.pformat(self)\n\n # Use bruteforce ellipsis when there are a lot of non-blank characters\n n_nonblank = len(''.join(repr_.split()))\n if n_nonblank > N_CHAR_MAX:\n lim = N_CHAR_MAX // 2 # apprx number of chars to keep on both ends\n regex = r'^(\\s*\\S){%d}' % lim\n # The regex '^(\\s*\\S){%d}' % n\n # matches from the start of the string until the nth non-blank\n # character:\n # - ^ matches the start of string\n # - (pattern){n} matches n repetitions of pattern\n # - \\s*\\S matches a non-blank char following zero or more blanks\n left_lim = re.match(regex, repr_).end()\n right_lim = re.match(regex, repr_[::-1]).end()\n\n if '\\n' in repr_[left_lim:-right_lim]:\n # The left side and right side aren't on the same line.\n # To avoid weird cuts, e.g.:\n # categoric...ore',\n # we need to start the right side with an appropriate newline\n # character so that it renders properly as:\n # categoric...\n # handle_unknown='ignore',\n # so we add [^\\n]*\\n which matches until the next \\n\n regex += r'[^\\n]*\\n'\n right_lim = re.match(regex, repr_[::-1]).end()\n\n ellipsis = '...'\n if left_lim + len(ellipsis) < len(repr_) - right_lim:\n # Only add ellipsis if it results in a shorter repr\n repr_ = repr_[:left_lim] + '...' + repr_[-right_lim:]\n\n return repr_" + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/__setstate__", + "name": "__setstate__", + "qname": "sklearn.base.BaseEstimator.__setstate__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/BaseEstimator/__setstate__/self", + "name": "self", + "qname": "sklearn.base.BaseEstimator.__setstate__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/__setstate__/state", + "name": "state", + "qname": "sklearn.base.BaseEstimator.__setstate__.state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __setstate__(self, state):\n if type(self).__module__.startswith('sklearn.'):\n pickle_version = state.pop(\"_sklearn_version\", \"pre-0.18\")\n if pickle_version != __version__:\n warnings.warn(\n \"Trying to unpickle estimator {0} from version {1} when \"\n \"using version {2}. This might lead to breaking code or \"\n \"invalid results. Use at your own risk.\".format(\n self.__class__.__name__, pickle_version, __version__),\n UserWarning)\n try:\n super().__setstate__(state)\n except AttributeError:\n self.__dict__.update(state)" + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/_check_n_features", + "name": "_check_n_features", + "qname": "sklearn.base.BaseEstimator._check_n_features", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/BaseEstimator/_check_n_features/self", + "name": "self", + "qname": "sklearn.base.BaseEstimator._check_n_features.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/_check_n_features/X", + "name": "X", + "qname": "sklearn.base.BaseEstimator._check_n_features.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{ndarray, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/_check_n_features/reset", + "name": "reset", + "qname": "sklearn.base.BaseEstimator._check_n_features.reset", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "If True, the `n_features_in_` attribute is set to `X.shape[1]`.\nIf False and the attribute exists, then check that it is equal to\n`X.shape[1]`. If False and the attribute does *not* exist, then\nthe check is skipped.\n.. note::\n It is recommended to call reset=True in `fit` and in the first\n call to `partial_fit`. All other methods that validate `X`\n should set `reset=False`." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Set the `n_features_in_` attribute, or check against it.", + "docstring": "Set the `n_features_in_` attribute, or check against it.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The input samples.\nreset : bool\n If True, the `n_features_in_` attribute is set to `X.shape[1]`.\n If False and the attribute exists, then check that it is equal to\n `X.shape[1]`. If False and the attribute does *not* exist, then\n the check is skipped.\n .. note::\n It is recommended to call reset=True in `fit` and in the first\n call to `partial_fit`. All other methods that validate `X`\n should set `reset=False`.", + "code": " def _check_n_features(self, X, reset):\n \"\"\"Set the `n_features_in_` attribute, or check against it.\n\n Parameters\n ----------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n reset : bool\n If True, the `n_features_in_` attribute is set to `X.shape[1]`.\n If False and the attribute exists, then check that it is equal to\n `X.shape[1]`. If False and the attribute does *not* exist, then\n the check is skipped.\n .. note::\n It is recommended to call reset=True in `fit` and in the first\n call to `partial_fit`. All other methods that validate `X`\n should set `reset=False`.\n \"\"\"\n n_features = X.shape[1]\n\n if reset:\n self.n_features_in_ = n_features\n return\n\n if not hasattr(self, \"n_features_in_\"):\n # Skip this check if the expected number of expected input features\n # was not recorded by calling fit first. This is typically the case\n # for stateless transformers.\n return\n\n if n_features != self.n_features_in_:\n raise ValueError(\n f\"X has {n_features} features, but {self.__class__.__name__} \"\n f\"is expecting {self.n_features_in_} features as input.\")" + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/_get_param_names", + "name": "_get_param_names", + "qname": "sklearn.base.BaseEstimator._get_param_names", + "decorators": ["classmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/BaseEstimator/_get_param_names/cls", + "name": "cls", + "qname": "sklearn.base.BaseEstimator._get_param_names.cls", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Get parameter names for the estimator", + "docstring": "Get parameter names for the estimator", + "code": " @classmethod\n def _get_param_names(cls):\n \"\"\"Get parameter names for the estimator\"\"\"\n # fetch the constructor or the original constructor before\n # deprecation wrapping if any\n init = getattr(cls.__init__, 'deprecated_original', cls.__init__)\n if init is object.__init__:\n # No explicit constructor to introspect\n return []\n\n # introspect the constructor arguments to find the model parameters\n # to represent\n init_signature = inspect.signature(init)\n # Consider the constructor parameters excluding 'self'\n parameters = [p for p in init_signature.parameters.values()\n if p.name != 'self' and p.kind != p.VAR_KEYWORD]\n for p in parameters:\n if p.kind == p.VAR_POSITIONAL:\n raise RuntimeError(\"scikit-learn estimators should always \"\n \"specify their parameters in the signature\"\n \" of their __init__ (no varargs).\"\n \" %s with constructor %s doesn't \"\n \" follow this convention.\"\n % (cls, init_signature))\n # Extract and sort argument names excluding 'self'\n return sorted([p.name for p in parameters])" + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/_get_tags", + "name": "_get_tags", + "qname": "sklearn.base.BaseEstimator._get_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/BaseEstimator/_get_tags/self", + "name": "self", + "qname": "sklearn.base.BaseEstimator._get_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_tags(self):\n collected_tags = {}\n for base_class in reversed(inspect.getmro(self.__class__)):\n if hasattr(base_class, '_more_tags'):\n # need the if because mixins might not have _more_tags\n # but might do redundant work in estimators\n # (i.e. calling more tags on BaseEstimator multiple times)\n more_tags = base_class._more_tags(self)\n collected_tags.update(more_tags)\n return collected_tags" + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/_more_tags", + "name": "_more_tags", + "qname": "sklearn.base.BaseEstimator._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/BaseEstimator/_more_tags/self", + "name": "self", + "qname": "sklearn.base.BaseEstimator._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return _DEFAULT_TAGS" + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/_repr_html_@getter", + "name": "_repr_html_", + "qname": "sklearn.base.BaseEstimator._repr_html_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/BaseEstimator/_repr_html_/self", + "name": "self", + "qname": "sklearn.base.BaseEstimator._repr_html_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "HTML representation of estimator.\n\nThis is redundant with the logic of `_repr_mimebundle_`. The latter\nshould be favorted in the long term, `_repr_html_` is only\nimplemented for consumers who do not interpret `_repr_mimbundle_`.", + "docstring": "HTML representation of estimator.\n\nThis is redundant with the logic of `_repr_mimebundle_`. The latter\nshould be favorted in the long term, `_repr_html_` is only\nimplemented for consumers who do not interpret `_repr_mimbundle_`.", + "code": " @property\n def _repr_html_(self):\n \"\"\"HTML representation of estimator.\n\n This is redundant with the logic of `_repr_mimebundle_`. The latter\n should be favorted in the long term, `_repr_html_` is only\n implemented for consumers who do not interpret `_repr_mimbundle_`.\n \"\"\"\n if get_config()[\"display\"] != 'diagram':\n raise AttributeError(\"_repr_html_ is only defined when the \"\n \"'display' configuration option is set to \"\n \"'diagram'\")\n return self._repr_html_inner" + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/_repr_html_inner", + "name": "_repr_html_inner", + "qname": "sklearn.base.BaseEstimator._repr_html_inner", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/BaseEstimator/_repr_html_inner/self", + "name": "self", + "qname": "sklearn.base.BaseEstimator._repr_html_inner.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "This function is returned by the @property `_repr_html_` to make\n`hasattr(estimator, \"_repr_html_\") return `True` or `False` depending\non `get_config()[\"display\"]`.", + "docstring": "This function is returned by the @property `_repr_html_` to make\n`hasattr(estimator, \"_repr_html_\") return `True` or `False` depending\non `get_config()[\"display\"]`.", + "code": " def _repr_html_inner(self):\n \"\"\"This function is returned by the @property `_repr_html_` to make\n `hasattr(estimator, \"_repr_html_\") return `True` or `False` depending\n on `get_config()[\"display\"]`.\n \"\"\"\n return estimator_html_repr(self)" + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/_repr_mimebundle_", + "name": "_repr_mimebundle_", + "qname": "sklearn.base.BaseEstimator._repr_mimebundle_", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/BaseEstimator/_repr_mimebundle_/self", + "name": "self", + "qname": "sklearn.base.BaseEstimator._repr_mimebundle_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/_repr_mimebundle_/kwargs", + "name": "kwargs", + "qname": "sklearn.base.BaseEstimator._repr_mimebundle_.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Mime bundle used by jupyter kernels to display estimator", + "docstring": "Mime bundle used by jupyter kernels to display estimator", + "code": " def _repr_mimebundle_(self, **kwargs):\n \"\"\"Mime bundle used by jupyter kernels to display estimator\"\"\"\n output = {\"text/plain\": repr(self)}\n if get_config()[\"display\"] == 'diagram':\n output[\"text/html\"] = estimator_html_repr(self)\n return output" + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/_validate_data", + "name": "_validate_data", + "qname": "sklearn.base.BaseEstimator._validate_data", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/BaseEstimator/_validate_data/self", + "name": "self", + "qname": "sklearn.base.BaseEstimator._validate_data.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/_validate_data/X", + "name": "X", + "qname": "sklearn.base.BaseEstimator._validate_data.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix, dataframe} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/_validate_data/y", + "name": "y", + "qname": "sklearn.base.BaseEstimator._validate_data.y", + "default_value": "'no_validation'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "'no_validation'", + "description": "The targets.\n\n- If `None`, `check_array` is called on `X`. If the estimator's\n requires_y tag is True, then an error will be raised.\n- If `'no_validation'`, `check_array` is called on `X` and the\n estimator's requires_y tag is ignored. This is a default\n placeholder and is never meant to be explicitly set.\n- Otherwise, both `X` and `y` are checked with either `check_array`\n or `check_X_y` depending on `validate_separately`." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/_validate_data/reset", + "name": "reset", + "qname": "sklearn.base.BaseEstimator._validate_data.reset", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to reset the `n_features_in_` attribute.\nIf False, the input will be checked for consistency with data\nprovided when reset was last True.\n.. note::\n It is recommended to call reset=True in `fit` and in the first\n call to `partial_fit`. All other methods that validate `X`\n should set `reset=False`." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/_validate_data/validate_separately", + "name": "validate_separately", + "qname": "sklearn.base.BaseEstimator._validate_data.validate_separately", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "False or tuple of dicts", + "default_value": "False", + "description": "Only used if y is not None.\nIf False, call validate_X_y(). Else, it must be a tuple of kwargs\nto be used for calling check_array() on X and y respectively." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "False" + }, + { + "kind": "NamedType", + "name": "tuple of dicts" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/_validate_data/check_params", + "name": "check_params", + "qname": "sklearn.base.BaseEstimator._validate_data.check_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "kwargs", + "default_value": "", + "description": "Parameters passed to :func:`sklearn.utils.check_array` or\n:func:`sklearn.utils.check_X_y`. Ignored if validate_separately\nis not False." + }, + "type": { + "kind": "NamedType", + "name": "kwargs" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Validate input data and set or check the `n_features_in_` attribute.", + "docstring": "Validate input data and set or check the `n_features_in_` attribute.\n\nParameters\n----------\nX : {array-like, sparse matrix, dataframe} of shape (n_samples, n_features)\n The input samples.\ny : array-like of shape (n_samples,), default='no_validation'\n The targets.\n\n - If `None`, `check_array` is called on `X`. If the estimator's\n requires_y tag is True, then an error will be raised.\n - If `'no_validation'`, `check_array` is called on `X` and the\n estimator's requires_y tag is ignored. This is a default\n placeholder and is never meant to be explicitly set.\n - Otherwise, both `X` and `y` are checked with either `check_array`\n or `check_X_y` depending on `validate_separately`.\n\nreset : bool, default=True\n Whether to reset the `n_features_in_` attribute.\n If False, the input will be checked for consistency with data\n provided when reset was last True.\n .. note::\n It is recommended to call reset=True in `fit` and in the first\n call to `partial_fit`. All other methods that validate `X`\n should set `reset=False`.\nvalidate_separately : False or tuple of dicts, default=False\n Only used if y is not None.\n If False, call validate_X_y(). Else, it must be a tuple of kwargs\n to be used for calling check_array() on X and y respectively.\n**check_params : kwargs\n Parameters passed to :func:`sklearn.utils.check_array` or\n :func:`sklearn.utils.check_X_y`. Ignored if validate_separately\n is not False.\n\nReturns\n-------\nout : {ndarray, sparse matrix} or tuple of these\n The validated input. A tuple is returned if `y` is not None.", + "code": " def _validate_data(self, X, y='no_validation', reset=True,\n validate_separately=False, **check_params):\n \"\"\"Validate input data and set or check the `n_features_in_` attribute.\n\n Parameters\n ----------\n X : {array-like, sparse matrix, dataframe} of shape \\\n (n_samples, n_features)\n The input samples.\n y : array-like of shape (n_samples,), default='no_validation'\n The targets.\n\n - If `None`, `check_array` is called on `X`. If the estimator's\n requires_y tag is True, then an error will be raised.\n - If `'no_validation'`, `check_array` is called on `X` and the\n estimator's requires_y tag is ignored. This is a default\n placeholder and is never meant to be explicitly set.\n - Otherwise, both `X` and `y` are checked with either `check_array`\n or `check_X_y` depending on `validate_separately`.\n\n reset : bool, default=True\n Whether to reset the `n_features_in_` attribute.\n If False, the input will be checked for consistency with data\n provided when reset was last True.\n .. note::\n It is recommended to call reset=True in `fit` and in the first\n call to `partial_fit`. All other methods that validate `X`\n should set `reset=False`.\n validate_separately : False or tuple of dicts, default=False\n Only used if y is not None.\n If False, call validate_X_y(). Else, it must be a tuple of kwargs\n to be used for calling check_array() on X and y respectively.\n **check_params : kwargs\n Parameters passed to :func:`sklearn.utils.check_array` or\n :func:`sklearn.utils.check_X_y`. Ignored if validate_separately\n is not False.\n\n Returns\n -------\n out : {ndarray, sparse matrix} or tuple of these\n The validated input. A tuple is returned if `y` is not None.\n \"\"\"\n\n if y is None:\n if self._get_tags()['requires_y']:\n raise ValueError(\n f\"This {self.__class__.__name__} estimator \"\n f\"requires y to be passed, but the target y is None.\"\n )\n X = check_array(X, **check_params)\n out = X\n elif isinstance(y, str) and y == 'no_validation':\n X = check_array(X, **check_params)\n out = X\n else:\n if validate_separately:\n # We need this because some estimators validate X and y\n # separately, and in general, separately calling check_array()\n # on X and y isn't equivalent to just calling check_X_y()\n # :(\n check_X_params, check_y_params = validate_separately\n X = check_array(X, **check_X_params)\n y = check_array(y, **check_y_params)\n else:\n X, y = check_X_y(X, y, **check_params)\n out = X, y\n\n if check_params.get('ensure_2d', True):\n self._check_n_features(X, reset=reset)\n\n return out" + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/get_params", + "name": "get_params", + "qname": "sklearn.base.BaseEstimator.get_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/BaseEstimator/get_params/self", + "name": "self", + "qname": "sklearn.base.BaseEstimator.get_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/get_params/deep", + "name": "deep", + "qname": "sklearn.base.BaseEstimator.get_params.deep", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, will return the parameters for this estimator and\ncontained subobjects that are estimators." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Get parameters for this estimator.", + "docstring": "Get parameters for this estimator.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : dict\n Parameter names mapped to their values.", + "code": " def get_params(self, deep=True):\n \"\"\"\n Get parameters for this estimator.\n\n Parameters\n ----------\n deep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\n Returns\n -------\n params : dict\n Parameter names mapped to their values.\n \"\"\"\n out = dict()\n for key in self._get_param_names():\n value = getattr(self, key)\n if deep and hasattr(value, 'get_params'):\n deep_items = value.get_params().items()\n out.update((key + '__' + k, val) for k, val in deep_items)\n out[key] = value\n return out" + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/set_params", + "name": "set_params", + "qname": "sklearn.base.BaseEstimator.set_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/BaseEstimator/set_params/self", + "name": "self", + "qname": "sklearn.base.BaseEstimator.set_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.base/BaseEstimator/set_params/params", + "name": "params", + "qname": "sklearn.base.BaseEstimator.set_params.params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": true, + "docstring": { + "type": "dict", + "default_value": "", + "description": "Estimator parameters." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Set the parameters of this estimator.\n\nThe method works on simple estimators as well as on nested objects\n(such as :class:`~sklearn.pipeline.Pipeline`). The latter have\nparameters of the form ``__`` so that it's\npossible to update each component of a nested object.", + "docstring": "Set the parameters of this estimator.\n\nThe method works on simple estimators as well as on nested objects\n(such as :class:`~sklearn.pipeline.Pipeline`). The latter have\nparameters of the form ``__`` so that it's\npossible to update each component of a nested object.\n\nParameters\n----------\n**params : dict\n Estimator parameters.\n\nReturns\n-------\nself : estimator instance\n Estimator instance.", + "code": " def set_params(self, **params):\n \"\"\"\n Set the parameters of this estimator.\n\n The method works on simple estimators as well as on nested objects\n (such as :class:`~sklearn.pipeline.Pipeline`). The latter have\n parameters of the form ``__`` so that it's\n possible to update each component of a nested object.\n\n Parameters\n ----------\n **params : dict\n Estimator parameters.\n\n Returns\n -------\n self : estimator instance\n Estimator instance.\n \"\"\"\n if not params:\n # Simple optimization to gain speed (inspect is slow)\n return self\n valid_params = self.get_params(deep=True)\n\n nested_params = defaultdict(dict) # grouped by prefix\n for key, value in params.items():\n key, delim, sub_key = key.partition('__')\n if key not in valid_params:\n raise ValueError('Invalid parameter %s for estimator %s. '\n 'Check the list of available parameters '\n 'with `estimator.get_params().keys()`.' %\n (key, self))\n\n if delim:\n nested_params[key][sub_key] = value\n else:\n setattr(self, key, value)\n valid_params[key] = value\n\n for key, sub_params in nested_params.items():\n valid_params[key].set_params(**sub_params)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.base/BiclusterMixin/biclusters_@getter", + "name": "biclusters_", + "qname": "sklearn.base.BiclusterMixin.biclusters_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/BiclusterMixin/biclusters_/self", + "name": "self", + "qname": "sklearn.base.BiclusterMixin.biclusters_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Convenient way to get row and column indicators together.\n\nReturns the ``rows_`` and ``columns_`` members.", + "docstring": "Convenient way to get row and column indicators together.\n\nReturns the ``rows_`` and ``columns_`` members.", + "code": " @property\n def biclusters_(self):\n \"\"\"Convenient way to get row and column indicators together.\n\n Returns the ``rows_`` and ``columns_`` members.\n \"\"\"\n return self.rows_, self.columns_" + }, + { + "id": "scikit-learn/sklearn.base/BiclusterMixin/get_indices", + "name": "get_indices", + "qname": "sklearn.base.BiclusterMixin.get_indices", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/BiclusterMixin/get_indices/self", + "name": "self", + "qname": "sklearn.base.BiclusterMixin.get_indices.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.base/BiclusterMixin/get_indices/i", + "name": "i", + "qname": "sklearn.base.BiclusterMixin.get_indices.i", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "", + "description": "The index of the cluster." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Row and column indices of the `i`'th bicluster.\n\nOnly works if ``rows_`` and ``columns_`` attributes exist.", + "docstring": "Row and column indices of the `i`'th bicluster.\n\nOnly works if ``rows_`` and ``columns_`` attributes exist.\n\nParameters\n----------\ni : int\n The index of the cluster.\n\nReturns\n-------\nrow_ind : ndarray, dtype=np.intp\n Indices of rows in the dataset that belong to the bicluster.\ncol_ind : ndarray, dtype=np.intp\n Indices of columns in the dataset that belong to the bicluster.", + "code": " def get_indices(self, i):\n \"\"\"Row and column indices of the `i`'th bicluster.\n\n Only works if ``rows_`` and ``columns_`` attributes exist.\n\n Parameters\n ----------\n i : int\n The index of the cluster.\n\n Returns\n -------\n row_ind : ndarray, dtype=np.intp\n Indices of rows in the dataset that belong to the bicluster.\n col_ind : ndarray, dtype=np.intp\n Indices of columns in the dataset that belong to the bicluster.\n\n \"\"\"\n rows = self.rows_[i]\n columns = self.columns_[i]\n return np.nonzero(rows)[0], np.nonzero(columns)[0]" + }, + { + "id": "scikit-learn/sklearn.base/BiclusterMixin/get_shape", + "name": "get_shape", + "qname": "sklearn.base.BiclusterMixin.get_shape", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/BiclusterMixin/get_shape/self", + "name": "self", + "qname": "sklearn.base.BiclusterMixin.get_shape.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.base/BiclusterMixin/get_shape/i", + "name": "i", + "qname": "sklearn.base.BiclusterMixin.get_shape.i", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "", + "description": "The index of the cluster." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Shape of the `i`'th bicluster.", + "docstring": "Shape of the `i`'th bicluster.\n\nParameters\n----------\ni : int\n The index of the cluster.\n\nReturns\n-------\nn_rows : int\n Number of rows in the bicluster.\n\nn_cols : int\n Number of columns in the bicluster.", + "code": " def get_shape(self, i):\n \"\"\"Shape of the `i`'th bicluster.\n\n Parameters\n ----------\n i : int\n The index of the cluster.\n\n Returns\n -------\n n_rows : int\n Number of rows in the bicluster.\n\n n_cols : int\n Number of columns in the bicluster.\n \"\"\"\n indices = self.get_indices(i)\n return tuple(len(i) for i in indices)" + }, + { + "id": "scikit-learn/sklearn.base/BiclusterMixin/get_submatrix", + "name": "get_submatrix", + "qname": "sklearn.base.BiclusterMixin.get_submatrix", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/BiclusterMixin/get_submatrix/self", + "name": "self", + "qname": "sklearn.base.BiclusterMixin.get_submatrix.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.base/BiclusterMixin/get_submatrix/i", + "name": "i", + "qname": "sklearn.base.BiclusterMixin.get_submatrix.i", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "", + "description": "The index of the cluster." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.base/BiclusterMixin/get_submatrix/data", + "name": "data", + "qname": "sklearn.base.BiclusterMixin.get_submatrix.data", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return the submatrix corresponding to bicluster `i`.", + "docstring": "Return the submatrix corresponding to bicluster `i`.\n\nParameters\n----------\ni : int\n The index of the cluster.\ndata : array-like of shape (n_samples, n_features)\n The data.\n\nReturns\n-------\nsubmatrix : ndarray of shape (n_rows, n_cols)\n The submatrix corresponding to bicluster `i`.\n\nNotes\n-----\nWorks with sparse matrices. Only works if ``rows_`` and\n``columns_`` attributes exist.", + "code": " def get_submatrix(self, i, data):\n \"\"\"Return the submatrix corresponding to bicluster `i`.\n\n Parameters\n ----------\n i : int\n The index of the cluster.\n data : array-like of shape (n_samples, n_features)\n The data.\n\n Returns\n -------\n submatrix : ndarray of shape (n_rows, n_cols)\n The submatrix corresponding to bicluster `i`.\n\n Notes\n -----\n Works with sparse matrices. Only works if ``rows_`` and\n ``columns_`` attributes exist.\n \"\"\"\n from .utils.validation import check_array\n data = check_array(data, accept_sparse='csr')\n row_ind, col_ind = self.get_indices(i)\n return data[row_ind[:, np.newaxis], col_ind]" + }, + { + "id": "scikit-learn/sklearn.base/ClassifierMixin/_more_tags", + "name": "_more_tags", + "qname": "sklearn.base.ClassifierMixin._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/ClassifierMixin/_more_tags/self", + "name": "self", + "qname": "sklearn.base.ClassifierMixin._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'requires_y': True}" + }, + { + "id": "scikit-learn/sklearn.base/ClassifierMixin/score", + "name": "score", + "qname": "sklearn.base.ClassifierMixin.score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/ClassifierMixin/score/self", + "name": "self", + "qname": "sklearn.base.ClassifierMixin.score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.base/ClassifierMixin/score/X", + "name": "X", + "qname": "sklearn.base.ClassifierMixin.score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Test samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.base/ClassifierMixin/score/y", + "name": "y", + "qname": "sklearn.base.ClassifierMixin.score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "True labels for `X`." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.base/ClassifierMixin/score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.base.ClassifierMixin.score.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return the mean accuracy on the given test data and labels.\n\nIn multi-label classification, this is the subset accuracy\nwhich is a harsh metric since you require for each sample that\neach label set be correctly predicted.", + "docstring": "Return the mean accuracy on the given test data and labels.\n\nIn multi-label classification, this is the subset accuracy\nwhich is a harsh metric since you require for each sample that\neach label set be correctly predicted.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test samples.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True labels for `X`.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Mean accuracy of ``self.predict(X)`` wrt. `y`.", + "code": " def score(self, X, y, sample_weight=None):\n \"\"\"\n Return the mean accuracy on the given test data and labels.\n\n In multi-label classification, this is the subset accuracy\n which is a harsh metric since you require for each sample that\n each label set be correctly predicted.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Test samples.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True labels for `X`.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n score : float\n Mean accuracy of ``self.predict(X)`` wrt. `y`.\n \"\"\"\n from .metrics import accuracy_score\n return accuracy_score(y, self.predict(X), sample_weight=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.base/ClusterMixin/_more_tags", + "name": "_more_tags", + "qname": "sklearn.base.ClusterMixin._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/ClusterMixin/_more_tags/self", + "name": "self", + "qname": "sklearn.base.ClusterMixin._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\"preserves_dtype\": []}" + }, + { + "id": "scikit-learn/sklearn.base/ClusterMixin/fit_predict", + "name": "fit_predict", + "qname": "sklearn.base.ClusterMixin.fit_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/ClusterMixin/fit_predict/self", + "name": "self", + "qname": "sklearn.base.ClusterMixin.fit_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.base/ClusterMixin/fit_predict/X", + "name": "X", + "qname": "sklearn.base.ClusterMixin.fit_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.base/ClusterMixin/fit_predict/y", + "name": "y", + "qname": "sklearn.base.ClusterMixin.fit_predict.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Perform clustering on `X` and returns cluster labels.", + "docstring": "Perform clustering on `X` and returns cluster labels.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,), dtype=np.int64\n Cluster labels.", + "code": " def fit_predict(self, X, y=None):\n \"\"\"\n Perform clustering on `X` and returns cluster labels.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Input data.\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n labels : ndarray of shape (n_samples,), dtype=np.int64\n Cluster labels.\n \"\"\"\n # non-optimized default implementation; override when a better\n # method is possible for a given clustering algorithm\n self.fit(X)\n return self.labels_" + }, + { + "id": "scikit-learn/sklearn.base/DensityMixin/score", + "name": "score", + "qname": "sklearn.base.DensityMixin.score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/DensityMixin/score/self", + "name": "self", + "qname": "sklearn.base.DensityMixin.score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.base/DensityMixin/score/X", + "name": "X", + "qname": "sklearn.base.DensityMixin.score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Test samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.base/DensityMixin/score/y", + "name": "y", + "qname": "sklearn.base.DensityMixin.score.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return the score of the model on the data `X`.", + "docstring": "Return the score of the model on the data `X`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test samples.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nscore : float", + "code": " def score(self, X, y=None):\n \"\"\"Return the score of the model on the data `X`.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Test samples.\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n score : float\n \"\"\"\n pass" + }, + { + "id": "scikit-learn/sklearn.base/MultiOutputMixin/_more_tags", + "name": "_more_tags", + "qname": "sklearn.base.MultiOutputMixin._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/MultiOutputMixin/_more_tags/self", + "name": "self", + "qname": "sklearn.base.MultiOutputMixin._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'multioutput': True}" + }, + { + "id": "scikit-learn/sklearn.base/OutlierMixin/fit_predict", + "name": "fit_predict", + "qname": "sklearn.base.OutlierMixin.fit_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/OutlierMixin/fit_predict/self", + "name": "self", + "qname": "sklearn.base.OutlierMixin.fit_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.base/OutlierMixin/fit_predict/X", + "name": "X", + "qname": "sklearn.base.OutlierMixin.fit_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix, dataframe} of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.base/OutlierMixin/fit_predict/y", + "name": "y", + "qname": "sklearn.base.OutlierMixin.fit_predict.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Perform fit on X and returns labels for X.\n\nReturns -1 for outliers and 1 for inliers.", + "docstring": "Perform fit on X and returns labels for X.\n\nReturns -1 for outliers and 1 for inliers.\n\nParameters\n----------\nX : {array-like, sparse matrix, dataframe} of shape (n_samples, n_features)\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n 1 for inliers, -1 for outliers.", + "code": " def fit_predict(self, X, y=None):\n \"\"\"Perform fit on X and returns labels for X.\n\n Returns -1 for outliers and 1 for inliers.\n\n Parameters\n ----------\n X : {array-like, sparse matrix, dataframe} of shape \\\n (n_samples, n_features)\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n y : ndarray of shape (n_samples,)\n 1 for inliers, -1 for outliers.\n \"\"\"\n # override for transductive outlier detectors like LocalOulierFactor\n return self.fit(X).predict(X)" + }, + { + "id": "scikit-learn/sklearn.base/RegressorMixin/_more_tags", + "name": "_more_tags", + "qname": "sklearn.base.RegressorMixin._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/RegressorMixin/_more_tags/self", + "name": "self", + "qname": "sklearn.base.RegressorMixin._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'requires_y': True}" + }, + { + "id": "scikit-learn/sklearn.base/RegressorMixin/score", + "name": "score", + "qname": "sklearn.base.RegressorMixin.score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/RegressorMixin/score/self", + "name": "self", + "qname": "sklearn.base.RegressorMixin.score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.base/RegressorMixin/score/X", + "name": "X", + "qname": "sklearn.base.RegressorMixin.score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Test samples. For some estimators this may be a precomputed\nkernel matrix or a list of generic objects instead with shape\n``(n_samples, n_samples_fitted)``, where ``n_samples_fitted``\nis the number of samples used in the fitting for the estimator." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.base/RegressorMixin/score/y", + "name": "y", + "qname": "sklearn.base.RegressorMixin.score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "True values for `X`." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.base/RegressorMixin/score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.base.RegressorMixin.score.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return the coefficient of determination :math:`R^2` of the\nprediction.\n\nThe coefficient :math:`R^2` is defined as :math:`(1 - \\frac{u}{v})`,\nwhere :math:`u` is the residual sum of squares ``((y_true - y_pred)\n** 2).sum()`` and :math:`v` is the total sum of squares ``((y_true -\ny_true.mean()) ** 2).sum()``. The best possible score is 1.0 and it\ncan be negative (because the model can be arbitrarily worse). A\nconstant model that always predicts the expected value of `y`,\ndisregarding the input features, would get a :math:`R^2` score of\n0.0.", + "docstring": "Return the coefficient of determination :math:`R^2` of the\nprediction.\n\nThe coefficient :math:`R^2` is defined as :math:`(1 - \\frac{u}{v})`,\nwhere :math:`u` is the residual sum of squares ``((y_true - y_pred)\n** 2).sum()`` and :math:`v` is the total sum of squares ``((y_true -\ny_true.mean()) ** 2).sum()``. The best possible score is 1.0 and it\ncan be negative (because the model can be arbitrarily worse). A\nconstant model that always predicts the expected value of `y`,\ndisregarding the input features, would get a :math:`R^2` score of\n0.0.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test samples. For some estimators this may be a precomputed\n kernel matrix or a list of generic objects instead with shape\n ``(n_samples, n_samples_fitted)``, where ``n_samples_fitted``\n is the number of samples used in the fitting for the estimator.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True values for `X`.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n :math:`R^2` of ``self.predict(X)`` wrt. `y`.\n\nNotes\n-----\nThe :math:`R^2` score used when calling ``score`` on a regressor uses\n``multioutput='uniform_average'`` from version 0.23 to keep consistent\nwith default value of :func:`~sklearn.metrics.r2_score`.\nThis influences the ``score`` method of all the multioutput\nregressors (except for\n:class:`~sklearn.multioutput.MultiOutputRegressor`).", + "code": " def score(self, X, y, sample_weight=None):\n \"\"\"Return the coefficient of determination :math:`R^2` of the\n prediction.\n\n The coefficient :math:`R^2` is defined as :math:`(1 - \\\\frac{u}{v})`,\n where :math:`u` is the residual sum of squares ``((y_true - y_pred)\n ** 2).sum()`` and :math:`v` is the total sum of squares ``((y_true -\n y_true.mean()) ** 2).sum()``. The best possible score is 1.0 and it\n can be negative (because the model can be arbitrarily worse). A\n constant model that always predicts the expected value of `y`,\n disregarding the input features, would get a :math:`R^2` score of\n 0.0.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Test samples. For some estimators this may be a precomputed\n kernel matrix or a list of generic objects instead with shape\n ``(n_samples, n_samples_fitted)``, where ``n_samples_fitted``\n is the number of samples used in the fitting for the estimator.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True values for `X`.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n score : float\n :math:`R^2` of ``self.predict(X)`` wrt. `y`.\n\n Notes\n -----\n The :math:`R^2` score used when calling ``score`` on a regressor uses\n ``multioutput='uniform_average'`` from version 0.23 to keep consistent\n with default value of :func:`~sklearn.metrics.r2_score`.\n This influences the ``score`` method of all the multioutput\n regressors (except for\n :class:`~sklearn.multioutput.MultiOutputRegressor`).\n \"\"\"\n\n from .metrics import r2_score\n y_pred = self.predict(X)\n return r2_score(y, y_pred, sample_weight=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.base/TransformerMixin/fit_transform", + "name": "fit_transform", + "qname": "sklearn.base.TransformerMixin.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/TransformerMixin/fit_transform/self", + "name": "self", + "qname": "sklearn.base.TransformerMixin.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.base/TransformerMixin/fit_transform/X", + "name": "X", + "qname": "sklearn.base.TransformerMixin.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Input samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.base/TransformerMixin/fit_transform/y", + "name": "y", + "qname": "sklearn.base.TransformerMixin.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "None", + "description": "Target values (None for unsupervised transformations)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.base/TransformerMixin/fit_transform/fit_params", + "name": "fit_params", + "qname": "sklearn.base.TransformerMixin.fit_transform.fit_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": true, + "docstring": { + "type": "dict", + "default_value": "", + "description": "Additional fit parameters." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fit to data, then transform it.\n\nFits transformer to `X` and `y` with optional parameters `fit_params`\nand returns a transformed version of `X`.", + "docstring": "Fit to data, then transform it.\n\nFits transformer to `X` and `y` with optional parameters `fit_params`\nand returns a transformed version of `X`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input samples.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n Target values (None for unsupervised transformations).\n\n**fit_params : dict\n Additional fit parameters.\n\nReturns\n-------\nX_new : ndarray array of shape (n_samples, n_features_new)\n Transformed array.", + "code": " def fit_transform(self, X, y=None, **fit_params):\n \"\"\"\n Fit to data, then transform it.\n\n Fits transformer to `X` and `y` with optional parameters `fit_params`\n and returns a transformed version of `X`.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Input samples.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs), \\\n default=None\n Target values (None for unsupervised transformations).\n\n **fit_params : dict\n Additional fit parameters.\n\n Returns\n -------\n X_new : ndarray array of shape (n_samples, n_features_new)\n Transformed array.\n \"\"\"\n # non-optimized default implementation; override when a better\n # method is possible for a given clustering algorithm\n if y is None:\n # fit method of arity 1 (unsupervised transformation)\n return self.fit(X, **fit_params).transform(X)\n else:\n # fit method of arity 2 (supervised transformation)\n return self.fit(X, y, **fit_params).transform(X)" + }, + { + "id": "scikit-learn/sklearn.base/_UnstableArchMixin/_more_tags", + "name": "_more_tags", + "qname": "sklearn.base._UnstableArchMixin._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/_UnstableArchMixin/_more_tags/self", + "name": "self", + "qname": "sklearn.base._UnstableArchMixin._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'non_deterministic': (\n _IS_32BIT or platform.machine().startswith(('ppc', 'powerpc')))}" + }, + { + "id": "scikit-learn/sklearn.base/_is_pairwise", + "name": "_is_pairwise", + "qname": "sklearn.base._is_pairwise", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/_is_pairwise/estimator", + "name": "estimator", + "qname": "sklearn.base._is_pairwise.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Estimator object to test." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns True if estimator is pairwise.\n\n- If the `_pairwise` attribute and the tag are present and consistent,\n then use the value and not issue a warning.\n- If the `_pairwise` attribute and the tag are present and not\n consistent, use the `_pairwise` value and issue a deprecation\n warning.\n- If only the `_pairwise` attribute is present and it is not False,\n issue a deprecation warning and use the `_pairwise` value.", + "docstring": "Returns True if estimator is pairwise.\n\n- If the `_pairwise` attribute and the tag are present and consistent,\n then use the value and not issue a warning.\n- If the `_pairwise` attribute and the tag are present and not\n consistent, use the `_pairwise` value and issue a deprecation\n warning.\n- If only the `_pairwise` attribute is present and it is not False,\n issue a deprecation warning and use the `_pairwise` value.\n\nParameters\n----------\nestimator : object\n Estimator object to test.\n\nReturns\n-------\nout : bool\n True if the estimator is pairwise and False otherwise.", + "code": "def _is_pairwise(estimator):\n \"\"\"Returns True if estimator is pairwise.\n\n - If the `_pairwise` attribute and the tag are present and consistent,\n then use the value and not issue a warning.\n - If the `_pairwise` attribute and the tag are present and not\n consistent, use the `_pairwise` value and issue a deprecation\n warning.\n - If only the `_pairwise` attribute is present and it is not False,\n issue a deprecation warning and use the `_pairwise` value.\n\n Parameters\n ----------\n estimator : object\n Estimator object to test.\n\n Returns\n -------\n out : bool\n True if the estimator is pairwise and False otherwise.\n \"\"\"\n with warnings.catch_warnings():\n warnings.filterwarnings('ignore', category=FutureWarning)\n has_pairwise_attribute = hasattr(estimator, '_pairwise')\n pairwise_attribute = getattr(estimator, '_pairwise', False)\n pairwise_tag = _safe_tags(estimator, key=\"pairwise\")\n\n if has_pairwise_attribute:\n if pairwise_attribute != pairwise_tag:\n warnings.warn(\n \"_pairwise was deprecated in 0.24 and will be removed in 1.1 \"\n \"(renaming of 0.26). Set the estimator tags of your estimator \"\n \"instead\",\n FutureWarning\n )\n return pairwise_attribute\n\n # use pairwise tag when the attribute is not present\n return pairwise_tag" + }, + { + "id": "scikit-learn/sklearn.base/_pprint", + "name": "_pprint", + "qname": "sklearn.base._pprint", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/_pprint/params", + "name": "params", + "qname": "sklearn.base._pprint.params", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "", + "description": "The dictionary to pretty print" + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.base/_pprint/offset", + "name": "offset", + "qname": "sklearn.base._pprint.offset", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "The offset in characters to add at the begin of each line." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.base/_pprint/printer", + "name": "printer", + "qname": "sklearn.base._pprint.printer", + "default_value": "repr", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "repr", + "description": "The function to convert entries to strings, typically\nthe builtin str or repr" + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Pretty print the dictionary 'params'", + "docstring": "Pretty print the dictionary 'params'\n\nParameters\n----------\nparams : dict\n The dictionary to pretty print\n\noffset : int, default=0\n The offset in characters to add at the begin of each line.\n\nprinter : callable, default=repr\n The function to convert entries to strings, typically\n the builtin str or repr", + "code": "def _pprint(params, offset=0, printer=repr):\n \"\"\"Pretty print the dictionary 'params'\n\n Parameters\n ----------\n params : dict\n The dictionary to pretty print\n\n offset : int, default=0\n The offset in characters to add at the begin of each line.\n\n printer : callable, default=repr\n The function to convert entries to strings, typically\n the builtin str or repr\n\n \"\"\"\n # Do a multi-line justified repr:\n options = np.get_printoptions()\n np.set_printoptions(precision=5, threshold=64, edgeitems=2)\n params_list = list()\n this_line_length = offset\n line_sep = ',\\n' + (1 + offset // 2) * ' '\n for i, (k, v) in enumerate(sorted(params.items())):\n if type(v) is float:\n # use str for representing floating point numbers\n # this way we get consistent representation across\n # architectures and versions.\n this_repr = '%s=%s' % (k, str(v))\n else:\n # use repr of the rest\n this_repr = '%s=%s' % (k, printer(v))\n if len(this_repr) > 500:\n this_repr = this_repr[:300] + '...' + this_repr[-100:]\n if i > 0:\n if (this_line_length + len(this_repr) >= 75 or '\\n' in this_repr):\n params_list.append(line_sep)\n this_line_length = len(line_sep)\n else:\n params_list.append(', ')\n this_line_length += 2\n params_list.append(this_repr)\n this_line_length += len(this_repr)\n\n np.set_printoptions(**options)\n lines = ''.join(params_list)\n # Strip trailing space to avoid nightmare in doctests\n lines = '\\n'.join(l.rstrip(' ') for l in lines.split('\\n'))\n return lines" + }, + { + "id": "scikit-learn/sklearn.base/clone", + "name": "clone", + "qname": "sklearn.base.clone", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/clone/estimator", + "name": "estimator", + "qname": "sklearn.base.clone.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{list, tuple, set} of estimator instance or a single estimator instance", + "default_value": "", + "description": "The estimator or group of estimators to be cloned." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of estimator instance" + }, + { + "kind": "NamedType", + "name": "a single estimator instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.base/clone/safe", + "name": "safe", + "qname": "sklearn.base.clone.safe", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If safe is False, clone will fall back to a deep copy on objects\nthat are not estimators." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Constructs a new unfitted estimator with the same parameters.\n\nClone does a deep copy of the model in an estimator\nwithout actually copying attached data. It yields a new estimator\nwith the same parameters that has not been fitted on any data.\n\nIf the estimator's `random_state` parameter is an integer (or if the\nestimator doesn't have a `random_state` parameter), an *exact clone* is\nreturned: the clone and the original estimator will give the exact same\nresults. Otherwise, *statistical clone* is returned: the clone might\nyield different results from the original estimator. More details can be\nfound in :ref:`randomness`.", + "docstring": "Constructs a new unfitted estimator with the same parameters.\n\nClone does a deep copy of the model in an estimator\nwithout actually copying attached data. It yields a new estimator\nwith the same parameters that has not been fitted on any data.\n\nIf the estimator's `random_state` parameter is an integer (or if the\nestimator doesn't have a `random_state` parameter), an *exact clone* is\nreturned: the clone and the original estimator will give the exact same\nresults. Otherwise, *statistical clone* is returned: the clone might\nyield different results from the original estimator. More details can be\nfound in :ref:`randomness`.\n\nParameters\n----------\nestimator : {list, tuple, set} of estimator instance or a single estimator instance\n The estimator or group of estimators to be cloned.\n\nsafe : bool, default=True\n If safe is False, clone will fall back to a deep copy on objects\n that are not estimators.", + "code": "@_deprecate_positional_args\ndef clone(estimator, *, safe=True):\n \"\"\"Constructs a new unfitted estimator with the same parameters.\n\n Clone does a deep copy of the model in an estimator\n without actually copying attached data. It yields a new estimator\n with the same parameters that has not been fitted on any data.\n\n If the estimator's `random_state` parameter is an integer (or if the\n estimator doesn't have a `random_state` parameter), an *exact clone* is\n returned: the clone and the original estimator will give the exact same\n results. Otherwise, *statistical clone* is returned: the clone might\n yield different results from the original estimator. More details can be\n found in :ref:`randomness`.\n\n Parameters\n ----------\n estimator : {list, tuple, set} of estimator instance or a single \\\n estimator instance\n The estimator or group of estimators to be cloned.\n\n safe : bool, default=True\n If safe is False, clone will fall back to a deep copy on objects\n that are not estimators.\n\n \"\"\"\n estimator_type = type(estimator)\n # XXX: not handling dictionaries\n if estimator_type in (list, tuple, set, frozenset):\n return estimator_type([clone(e, safe=safe) for e in estimator])\n elif not hasattr(estimator, 'get_params') or isinstance(estimator, type):\n if not safe:\n return copy.deepcopy(estimator)\n else:\n if isinstance(estimator, type):\n raise TypeError(\"Cannot clone object. \" +\n \"You should provide an instance of \" +\n \"scikit-learn estimator instead of a class.\")\n else:\n raise TypeError(\"Cannot clone object '%s' (type %s): \"\n \"it does not seem to be a scikit-learn \"\n \"estimator as it does not implement a \"\n \"'get_params' method.\"\n % (repr(estimator), type(estimator)))\n\n klass = estimator.__class__\n new_object_params = estimator.get_params(deep=False)\n for name, param in new_object_params.items():\n new_object_params[name] = clone(param, safe=False)\n new_object = klass(**new_object_params)\n params_set = new_object.get_params(deep=False)\n\n # quick sanity check of the parameters of the clone\n for name in new_object_params:\n param1 = new_object_params[name]\n param2 = params_set[name]\n if param1 is not param2:\n raise RuntimeError('Cannot clone object %s, as the constructor '\n 'either does not set or modifies parameter %s' %\n (estimator, name))\n return new_object" + }, + { + "id": "scikit-learn/sklearn.base/is_classifier", + "name": "is_classifier", + "qname": "sklearn.base.is_classifier", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/is_classifier/estimator", + "name": "estimator", + "qname": "sklearn.base.is_classifier.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "object", + "default_value": "", + "description": "Estimator object to test." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return True if the given estimator is (probably) a classifier.", + "docstring": "Return True if the given estimator is (probably) a classifier.\n\nParameters\n----------\nestimator : object\n Estimator object to test.\n\nReturns\n-------\nout : bool\n True if estimator is a classifier and False otherwise.", + "code": "def is_classifier(estimator):\n \"\"\"Return True if the given estimator is (probably) a classifier.\n\n Parameters\n ----------\n estimator : object\n Estimator object to test.\n\n Returns\n -------\n out : bool\n True if estimator is a classifier and False otherwise.\n \"\"\"\n return getattr(estimator, \"_estimator_type\", None) == \"classifier\"" + }, + { + "id": "scikit-learn/sklearn.base/is_outlier_detector", + "name": "is_outlier_detector", + "qname": "sklearn.base.is_outlier_detector", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/is_outlier_detector/estimator", + "name": "estimator", + "qname": "sklearn.base.is_outlier_detector.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "estimator instance", + "default_value": "", + "description": "Estimator object to test." + }, + "type": { + "kind": "NamedType", + "name": "estimator instance" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return True if the given estimator is (probably) an outlier detector.", + "docstring": "Return True if the given estimator is (probably) an outlier detector.\n\nParameters\n----------\nestimator : estimator instance\n Estimator object to test.\n\nReturns\n-------\nout : bool\n True if estimator is an outlier detector and False otherwise.", + "code": "def is_outlier_detector(estimator):\n \"\"\"Return True if the given estimator is (probably) an outlier detector.\n\n Parameters\n ----------\n estimator : estimator instance\n Estimator object to test.\n\n Returns\n -------\n out : bool\n True if estimator is an outlier detector and False otherwise.\n \"\"\"\n return getattr(estimator, \"_estimator_type\", None) == \"outlier_detector\"" + }, + { + "id": "scikit-learn/sklearn.base/is_regressor", + "name": "is_regressor", + "qname": "sklearn.base.is_regressor", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.base/is_regressor/estimator", + "name": "estimator", + "qname": "sklearn.base.is_regressor.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "estimator instance", + "default_value": "", + "description": "Estimator object to test." + }, + "type": { + "kind": "NamedType", + "name": "estimator instance" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return True if the given estimator is (probably) a regressor.", + "docstring": "Return True if the given estimator is (probably) a regressor.\n\nParameters\n----------\nestimator : estimator instance\n Estimator object to test.\n\nReturns\n-------\nout : bool\n True if estimator is a regressor and False otherwise.", + "code": "def is_regressor(estimator):\n \"\"\"Return True if the given estimator is (probably) a regressor.\n\n Parameters\n ----------\n estimator : estimator instance\n Estimator object to test.\n\n Returns\n -------\n out : bool\n True if estimator is a regressor and False otherwise.\n \"\"\"\n return getattr(estimator, \"_estimator_type\", None) == \"regressor\"" + }, + { + "id": "scikit-learn/sklearn.calibration/CalibratedClassifierCV/__init__", + "name": "__init__", + "qname": "sklearn.calibration.CalibratedClassifierCV.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.calibration/CalibratedClassifierCV/__init__/self", + "name": "self", + "qname": "sklearn.calibration.CalibratedClassifierCV.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.calibration/CalibratedClassifierCV/__init__/base_estimator", + "name": "base_estimator", + "qname": "sklearn.calibration.CalibratedClassifierCV.__init__.base_estimator", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "estimator instance", + "default_value": "None", + "description": "The classifier whose output need to be calibrated to provide more\naccurate `predict_proba` outputs. The default classifier is\na :class:`~sklearn.svm.LinearSVC`." + }, + "type": { + "kind": "NamedType", + "name": "estimator instance" + } + }, + { + "id": "scikit-learn/sklearn.calibration/CalibratedClassifierCV/__init__/method", + "name": "method", + "qname": "sklearn.calibration.CalibratedClassifierCV.__init__.method", + "default_value": "'sigmoid'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'sigmoid', 'isotonic'}", + "default_value": "'sigmoid'", + "description": "The method to use for calibration. Can be 'sigmoid' which\ncorresponds to Platt's method (i.e. a logistic regression model) or\n'isotonic' which is a non-parametric approach. It is not advised to\nuse isotonic calibration with too few calibration samples\n``(<<1000)`` since it tends to overfit." + }, + "type": { + "kind": "EnumType", + "values": ["isotonic", "sigmoid"] + } + }, + { + "id": "scikit-learn/sklearn.calibration/CalibratedClassifierCV/__init__/cv", + "name": "cv", + "qname": "sklearn.calibration.CalibratedClassifierCV.__init__.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int, cross-validation generator, iterable or \"prefit\"", + "default_value": "None", + "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if ``y`` is binary or multiclass,\n:class:`~sklearn.model_selection.StratifiedKFold` is used. If ``y`` is\nneither binary nor multiclass, :class:`~sklearn.model_selection.KFold`\nis used.\n\nRefer to the :ref:`User Guide ` for the various\ncross-validation strategies that can be used here.\n\nIf \"prefit\" is passed, it is assumed that `base_estimator` has been\nfitted already and all data is used for calibration.\n\n.. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "iterable" + }, + { + "kind": "NamedType", + "name": "\"prefit\"" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.calibration/CalibratedClassifierCV/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.calibration.CalibratedClassifierCV.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of jobs to run in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors.\n\nBase estimator clones are fitted in parallel across cross-validation\niterations. Therefore parallelism happens only when `cv != \"prefit\"`.\n\nSee :term:`Glossary ` for more details.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.calibration/CalibratedClassifierCV/__init__/ensemble", + "name": "ensemble", + "qname": "sklearn.calibration.CalibratedClassifierCV.__init__.ensemble", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Determines how the calibrator is fitted when `cv` is not `'prefit'`.\nIgnored if `cv='prefit'`.\n\nIf `True`, the `base_estimator` is fitted using training data and\ncalibrated using testing data, for each `cv` fold. The final estimator\nis an ensemble of `n_cv` fitted classifer and calibrator pairs, where\n`n_cv` is the number of cross-validation folds. The output is the\naverage predicted probabilities of all pairs.\n\nIf `False`, `cv` is used to compute unbiased predictions, via\n:func:`~sklearn.model_selection.cross_val_predict`, which are then\nused for calibration. At prediction time, the classifier used is the\n`base_estimator` trained on all the data.\nNote that this method is also internally implemented in\n:mod:`sklearn.svm` estimators with the `probabilities=True` parameter.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Probability calibration with isotonic regression or logistic regression.\n\nThis class uses cross-validation to both estimate the parameters of a\nclassifier and subsequently calibrate a classifier. With default\n`ensemble=True`, for each cv split it\nfits a copy of the base estimator to the training subset, and calibrates it\nusing the testing subset. For prediction, predicted probabilities are\naveraged across these individual calibrated classifiers. When\n`ensemble=False`, cross-validation is used to obtain unbiased predictions,\nvia :func:`~sklearn.model_selection.cross_val_predict`, which are then\nused for calibration. For prediction, the base estimator, trained using all\nthe data, is used. This is the method implemented when `probabilities=True`\nfor :mod:`sklearn.svm` estimators.\n\nAlready fitted classifiers can be calibrated via the parameter\n`cv=\"prefit\"`. In this case, no cross-validation is used and all provided\ndata is used for calibration. The user has to take care manually that data\nfor model fitting and calibration are disjoint.\n\nThe calibration is based on the :term:`decision_function` method of the\n`base_estimator` if it exists, else on :term:`predict_proba`.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, base_estimator=None, *, method='sigmoid',\n cv=None, n_jobs=None, ensemble=True):\n self.base_estimator = base_estimator\n self.method = method\n self.cv = cv\n self.n_jobs = n_jobs\n self.ensemble = ensemble" + }, + { + "id": "scikit-learn/sklearn.calibration/CalibratedClassifierCV/_more_tags", + "name": "_more_tags", + "qname": "sklearn.calibration.CalibratedClassifierCV._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.calibration/CalibratedClassifierCV/_more_tags/self", + "name": "self", + "qname": "sklearn.calibration.CalibratedClassifierCV._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }" + }, + { + "id": "scikit-learn/sklearn.calibration/CalibratedClassifierCV/fit", + "name": "fit", + "qname": "sklearn.calibration.CalibratedClassifierCV.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.calibration/CalibratedClassifierCV/fit/self", + "name": "self", + "qname": "sklearn.calibration.CalibratedClassifierCV.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.calibration/CalibratedClassifierCV/fit/X", + "name": "X", + "qname": "sklearn.calibration.CalibratedClassifierCV.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.calibration/CalibratedClassifierCV/fit/y", + "name": "y", + "qname": "sklearn.calibration.CalibratedClassifierCV.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.calibration/CalibratedClassifierCV/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.calibration.CalibratedClassifierCV.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, then samples are equally weighted." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fit the calibrated model.", + "docstring": "Fit the calibrated model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\nReturns\n-------\nself : object\n Returns an instance of self.", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the calibrated model.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\n Returns\n -------\n self : object\n Returns an instance of self.\n \"\"\"\n check_classification_targets(y)\n X, y = indexable(X, y)\n\n if self.base_estimator is None:\n # we want all classifiers that don't expose a random_state\n # to be deterministic (and we don't want to expose this one).\n base_estimator = LinearSVC(random_state=0)\n else:\n base_estimator = self.base_estimator\n\n self.calibrated_classifiers_ = []\n if self.cv == \"prefit\":\n # `classes_` and `n_features_in_` should be consistent with that\n # of base_estimator\n if isinstance(self.base_estimator, Pipeline):\n check_is_fitted(self.base_estimator[-1])\n else:\n check_is_fitted(self.base_estimator)\n with suppress(AttributeError):\n self.n_features_in_ = base_estimator.n_features_in_\n self.classes_ = self.base_estimator.classes_\n\n pred_method = _get_prediction_method(base_estimator)\n n_classes = len(self.classes_)\n predictions = _compute_predictions(pred_method, X, n_classes)\n\n calibrated_classifier = _fit_calibrator(\n base_estimator, predictions, y, self.classes_, self.method,\n sample_weight\n )\n self.calibrated_classifiers_.append(calibrated_classifier)\n else:\n X, y = self._validate_data(\n X, y, accept_sparse=['csc', 'csr', 'coo'],\n force_all_finite=False, allow_nd=True\n )\n # Set `classes_` using all `y`\n label_encoder_ = LabelEncoder().fit(y)\n self.classes_ = label_encoder_.classes_\n n_classes = len(self.classes_)\n\n # sample_weight checks\n fit_parameters = signature(base_estimator.fit).parameters\n supports_sw = \"sample_weight\" in fit_parameters\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X)\n if not supports_sw:\n estimator_name = type(base_estimator).__name__\n warnings.warn(f\"Since {estimator_name} does not support \"\n \"sample_weights, sample weights will only be\"\n \" used for the calibration itself.\")\n\n # Check that each cross-validation fold can have at least one\n # example per class\n if isinstance(self.cv, int):\n n_folds = self.cv\n elif hasattr(self.cv, \"n_splits\"):\n n_folds = self.cv.n_splits\n else:\n n_folds = None\n if n_folds and np.any([np.sum(y == class_) < n_folds\n for class_ in self.classes_]):\n raise ValueError(f\"Requesting {n_folds}-fold \"\n \"cross-validation but provided less than \"\n f\"{n_folds} examples for at least one class.\")\n cv = check_cv(self.cv, y, classifier=True)\n\n if self.ensemble:\n parallel = Parallel(n_jobs=self.n_jobs)\n\n self.calibrated_classifiers_ = parallel(\n delayed(_fit_classifier_calibrator_pair)(\n clone(base_estimator), X, y, train=train, test=test,\n method=self.method, classes=self.classes_,\n supports_sw=supports_sw, sample_weight=sample_weight)\n for train, test in cv.split(X, y)\n )\n else:\n this_estimator = clone(base_estimator)\n method_name = _get_prediction_method(this_estimator).__name__\n pred_method = partial(\n cross_val_predict, estimator=this_estimator, X=X, y=y,\n cv=cv, method=method_name, n_jobs=self.n_jobs\n )\n predictions = _compute_predictions(pred_method, X, n_classes)\n\n if sample_weight is not None and supports_sw:\n this_estimator.fit(X, y, sample_weight)\n else:\n this_estimator.fit(X, y)\n calibrated_classifier = _fit_calibrator(\n this_estimator, predictions, y, self.classes_, self.method,\n sample_weight\n )\n self.calibrated_classifiers_.append(calibrated_classifier)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.calibration/CalibratedClassifierCV/predict", + "name": "predict", + "qname": "sklearn.calibration.CalibratedClassifierCV.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.calibration/CalibratedClassifierCV/predict/self", + "name": "self", + "qname": "sklearn.calibration.CalibratedClassifierCV.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.calibration/CalibratedClassifierCV/predict/X", + "name": "X", + "qname": "sklearn.calibration.CalibratedClassifierCV.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Predict the target of new samples. The predicted class is the\nclass that has the highest probability, and can thus be different\nfrom the prediction of the uncalibrated classifier.", + "docstring": "Predict the target of new samples. The predicted class is the\nclass that has the highest probability, and can thus be different\nfrom the prediction of the uncalibrated classifier.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The samples.\n\nReturns\n-------\nC : ndarray of shape (n_samples,)\n The predicted class.", + "code": " def predict(self, X):\n \"\"\"Predict the target of new samples. The predicted class is the\n class that has the highest probability, and can thus be different\n from the prediction of the uncalibrated classifier.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The samples.\n\n Returns\n -------\n C : ndarray of shape (n_samples,)\n The predicted class.\n \"\"\"\n check_is_fitted(self)\n return self.classes_[np.argmax(self.predict_proba(X), axis=1)]" + }, + { + "id": "scikit-learn/sklearn.calibration/CalibratedClassifierCV/predict_proba", + "name": "predict_proba", + "qname": "sklearn.calibration.CalibratedClassifierCV.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.calibration/CalibratedClassifierCV/predict_proba/self", + "name": "self", + "qname": "sklearn.calibration.CalibratedClassifierCV.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.calibration/CalibratedClassifierCV/predict_proba/X", + "name": "X", + "qname": "sklearn.calibration.CalibratedClassifierCV.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Calibrated probabilities of classification.\n\nThis function returns calibrated probabilities of classification\naccording to each class on an array of test vectors X.", + "docstring": "Calibrated probabilities of classification.\n\nThis function returns calibrated probabilities of classification\naccording to each class on an array of test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The samples.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n The predicted probas.", + "code": " def predict_proba(self, X):\n \"\"\"Calibrated probabilities of classification.\n\n This function returns calibrated probabilities of classification\n according to each class on an array of test vectors X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The samples.\n\n Returns\n -------\n C : ndarray of shape (n_samples, n_classes)\n The predicted probas.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, accept_sparse=['csc', 'csr', 'coo'],\n force_all_finite=False)\n # Compute the arithmetic mean of the predictions of the calibrated\n # classifiers\n mean_proba = np.zeros((X.shape[0], len(self.classes_)))\n for calibrated_classifier in self.calibrated_classifiers_:\n proba = calibrated_classifier.predict_proba(X)\n mean_proba += proba\n\n mean_proba /= len(self.calibrated_classifiers_)\n\n return mean_proba" + }, + { + "id": "scikit-learn/sklearn.calibration/_CalibratedClassifier/__init__", + "name": "__init__", + "qname": "sklearn.calibration._CalibratedClassifier.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.calibration/_CalibratedClassifier/__init__/self", + "name": "self", + "qname": "sklearn.calibration._CalibratedClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.calibration/_CalibratedClassifier/__init__/base_estimator", + "name": "base_estimator", + "qname": "sklearn.calibration._CalibratedClassifier.__init__.base_estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator instance", + "default_value": "", + "description": "Fitted classifier." + }, + "type": { + "kind": "NamedType", + "name": "estimator instance" + } + }, + { + "id": "scikit-learn/sklearn.calibration/_CalibratedClassifier/__init__/calibrators", + "name": "calibrators", + "qname": "sklearn.calibration._CalibratedClassifier.__init__.calibrators", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list of fitted estimator instances", + "default_value": "", + "description": "List of fitted calibrators (either 'IsotonicRegression' or\n'_SigmoidCalibration'). The number of calibrators equals the number of\nclasses. However, if there are 2 classes, the list contains only one\nfitted calibrator." + }, + "type": { + "kind": "NamedType", + "name": "list of fitted estimator instances" + } + }, + { + "id": "scikit-learn/sklearn.calibration/_CalibratedClassifier/__init__/classes", + "name": "classes", + "qname": "sklearn.calibration._CalibratedClassifier.__init__.classes", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_classes,)", + "default_value": "", + "description": "All the prediction classes." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_classes,)" + } + }, + { + "id": "scikit-learn/sklearn.calibration/_CalibratedClassifier/__init__/method", + "name": "method", + "qname": "sklearn.calibration._CalibratedClassifier.__init__.method", + "default_value": "'sigmoid'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'sigmoid', 'isotonic'}", + "default_value": "'sigmoid'", + "description": "The method to use for calibration. Can be 'sigmoid' which\ncorresponds to Platt's method or 'isotonic' which is a\nnon-parametric approach based on isotonic regression." + }, + "type": { + "kind": "EnumType", + "values": ["isotonic", "sigmoid"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Pipeline-like chaining a fitted classifier and its fitted calibrators.", + "docstring": "", + "code": " def __init__(self, base_estimator, calibrators, *, classes,\n method='sigmoid'):\n self.base_estimator = base_estimator\n self.calibrators = calibrators\n self.classes = classes\n self.method = method" + }, + { + "id": "scikit-learn/sklearn.calibration/_CalibratedClassifier/calibrators_@getter", + "name": "calibrators_", + "qname": "sklearn.calibration._CalibratedClassifier.calibrators_", + "decorators": [ + "deprecated('calibrators_ is deprecated in 0.24 and will be removed in 1.1(renaming of 0.26). Use calibrators instead.')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.calibration/_CalibratedClassifier/calibrators_/self", + "name": "self", + "qname": "sklearn.calibration._CalibratedClassifier.calibrators_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated( # type: ignore\n \"calibrators_ is deprecated in 0.24 and will be removed in 1.1\"\n \"(renaming of 0.26). Use calibrators instead.\"\n )\n @property\n def calibrators_(self):\n return self.calibrators" + }, + { + "id": "scikit-learn/sklearn.calibration/_CalibratedClassifier/predict_proba", + "name": "predict_proba", + "qname": "sklearn.calibration._CalibratedClassifier.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.calibration/_CalibratedClassifier/predict_proba/self", + "name": "self", + "qname": "sklearn.calibration._CalibratedClassifier.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.calibration/_CalibratedClassifier/predict_proba/X", + "name": "X", + "qname": "sklearn.calibration._CalibratedClassifier.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "The sample data." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Calculate calibrated probabilities.\n\nCalculates classification calibrated probabilities\nfor each class, in a one-vs-all manner, for `X`.", + "docstring": "Calculate calibrated probabilities.\n\nCalculates classification calibrated probabilities\nfor each class, in a one-vs-all manner, for `X`.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n The sample data.\n\nReturns\n-------\nproba : array, shape (n_samples, n_classes)\n The predicted probabilities. Can be exact zeros.", + "code": " def predict_proba(self, X):\n \"\"\"Calculate calibrated probabilities.\n\n Calculates classification calibrated probabilities\n for each class, in a one-vs-all manner, for `X`.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n The sample data.\n\n Returns\n -------\n proba : array, shape (n_samples, n_classes)\n The predicted probabilities. Can be exact zeros.\n \"\"\"\n n_classes = len(self.classes)\n pred_method = _get_prediction_method(self.base_estimator)\n predictions = _compute_predictions(pred_method, X, n_classes)\n\n label_encoder = LabelEncoder().fit(self.classes)\n pos_class_indices = label_encoder.transform(\n self.base_estimator.classes_\n )\n\n proba = np.zeros((X.shape[0], n_classes))\n for class_idx, this_pred, calibrator in \\\n zip(pos_class_indices, predictions.T, self.calibrators):\n if n_classes == 2:\n # When binary, `predictions` consists only of predictions for\n # clf.classes_[1] but `pos_class_indices` = 0\n class_idx += 1\n proba[:, class_idx] = calibrator.predict(this_pred)\n\n # Normalize the probabilities\n if n_classes == 2:\n proba[:, 0] = 1. - proba[:, 1]\n else:\n proba /= np.sum(proba, axis=1)[:, np.newaxis]\n\n # XXX : for some reason all probas can be 0\n proba[np.isnan(proba)] = 1. / n_classes\n\n # Deal with cases where the predicted probability minimally exceeds 1.0\n proba[(1.0 < proba) & (proba <= 1.0 + 1e-5)] = 1.0\n\n return proba" + }, + { + "id": "scikit-learn/sklearn.calibration/_SigmoidCalibration/fit", + "name": "fit", + "qname": "sklearn.calibration._SigmoidCalibration.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.calibration/_SigmoidCalibration/fit/self", + "name": "self", + "qname": "sklearn.calibration._SigmoidCalibration.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.calibration/_SigmoidCalibration/fit/X", + "name": "X", + "qname": "sklearn.calibration._SigmoidCalibration.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.calibration/_SigmoidCalibration/fit/y", + "name": "y", + "qname": "sklearn.calibration._SigmoidCalibration.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Training target." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.calibration/_SigmoidCalibration/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.calibration._SigmoidCalibration.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, then samples are equally weighted." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model using X, y as training data.", + "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples,)\n Training data.\n\ny : array-like of shape (n_samples,)\n Training target.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\nReturns\n-------\nself : object\n Returns an instance of self.", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the model using X, y as training data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples,)\n Training data.\n\n y : array-like of shape (n_samples,)\n Training target.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\n Returns\n -------\n self : object\n Returns an instance of self.\n \"\"\"\n X = column_or_1d(X)\n y = column_or_1d(y)\n X, y = indexable(X, y)\n\n self.a_, self.b_ = _sigmoid_calibration(X, y, sample_weight)\n return self" + }, + { + "id": "scikit-learn/sklearn.calibration/_SigmoidCalibration/predict", + "name": "predict", + "qname": "sklearn.calibration._SigmoidCalibration.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.calibration/_SigmoidCalibration/predict/self", + "name": "self", + "qname": "sklearn.calibration._SigmoidCalibration.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.calibration/_SigmoidCalibration/predict/T", + "name": "T", + "qname": "sklearn.calibration._SigmoidCalibration.predict.T", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Data to predict from." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict new data by linear interpolation.", + "docstring": "Predict new data by linear interpolation.\n\nParameters\n----------\nT : array-like of shape (n_samples,)\n Data to predict from.\n\nReturns\n-------\nT_ : ndarray of shape (n_samples,)\n The predicted data.", + "code": " def predict(self, T):\n \"\"\"Predict new data by linear interpolation.\n\n Parameters\n ----------\n T : array-like of shape (n_samples,)\n Data to predict from.\n\n Returns\n -------\n T_ : ndarray of shape (n_samples,)\n The predicted data.\n \"\"\"\n T = column_or_1d(T)\n return expit(-(self.a_ * T + self.b_))" + }, + { + "id": "scikit-learn/sklearn.calibration/_compute_predictions", + "name": "_compute_predictions", + "qname": "sklearn.calibration._compute_predictions", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.calibration/_compute_predictions/pred_method", + "name": "pred_method", + "qname": "sklearn.calibration._compute_predictions.pred_method", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "", + "description": "Prediction method." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.calibration/_compute_predictions/X", + "name": "X", + "qname": "sklearn.calibration._compute_predictions.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like or None", + "default_value": "", + "description": "Data used to obtain predictions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.calibration/_compute_predictions/n_classes", + "name": "n_classes", + "qname": "sklearn.calibration._compute_predictions.n_classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of classes present." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return predictions for `X` and reshape binary outputs to shape\n(n_samples, 1).", + "docstring": "Return predictions for `X` and reshape binary outputs to shape\n(n_samples, 1).\n\nParameters\n----------\npred_method : callable\n Prediction method.\n\nX : array-like or None\n Data used to obtain predictions.\n\nn_classes : int\n Number of classes present.\n\nReturns\n-------\npredictions : array-like, shape (X.shape[0], len(clf.classes_))\n The predictions. Note if there are 2 classes, array is of shape\n (X.shape[0], 1).", + "code": "def _compute_predictions(pred_method, X, n_classes):\n \"\"\"Return predictions for `X` and reshape binary outputs to shape\n (n_samples, 1).\n\n Parameters\n ----------\n pred_method : callable\n Prediction method.\n\n X : array-like or None\n Data used to obtain predictions.\n\n n_classes : int\n Number of classes present.\n\n Returns\n -------\n predictions : array-like, shape (X.shape[0], len(clf.classes_))\n The predictions. Note if there are 2 classes, array is of shape\n (X.shape[0], 1).\n \"\"\"\n predictions = pred_method(X=X)\n if hasattr(pred_method, '__name__'):\n method_name = pred_method.__name__\n else:\n method_name = signature(pred_method).parameters['method'].default\n\n if method_name == 'decision_function':\n if predictions.ndim == 1:\n predictions = predictions[:, np.newaxis]\n elif method_name == 'predict_proba':\n if n_classes == 2:\n predictions = predictions[:, 1:]\n else: # pragma: no cover\n # this branch should be unreachable.\n raise ValueError(f\"Invalid prediction method: {method_name}\")\n return predictions" + }, + { + "id": "scikit-learn/sklearn.calibration/_fit_calibrator", + "name": "_fit_calibrator", + "qname": "sklearn.calibration._fit_calibrator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.calibration/_fit_calibrator/clf", + "name": "clf", + "qname": "sklearn.calibration._fit_calibrator.clf", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator instance", + "default_value": "", + "description": "Fitted classifier." + }, + "type": { + "kind": "NamedType", + "name": "estimator instance" + } + }, + { + "id": "scikit-learn/sklearn.calibration/_fit_calibrator/predictions", + "name": "predictions", + "qname": "sklearn.calibration._fit_calibrator.predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples, n_classes) or (n_samples, 1) when binary.", + "default_value": "", + "description": "Raw predictions returned by the un-calibrated base classifier." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_classes) or (n_samples, 1) when binary." + } + ] + } + }, + { + "id": "scikit-learn/sklearn.calibration/_fit_calibrator/y", + "name": "y", + "qname": "sklearn.calibration._fit_calibrator.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples,)", + "default_value": "", + "description": "The targets." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.calibration/_fit_calibrator/classes", + "name": "classes", + "qname": "sklearn.calibration._fit_calibrator.classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (n_classes,)", + "default_value": "", + "description": "All the prediction classes." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (n_classes,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.calibration/_fit_calibrator/method", + "name": "method", + "qname": "sklearn.calibration._fit_calibrator.method", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'sigmoid', 'isotonic'}", + "default_value": "", + "description": "The method to use for calibration." + }, + "type": { + "kind": "EnumType", + "values": ["isotonic", "sigmoid"] + } + }, + { + "id": "scikit-learn/sklearn.calibration/_fit_calibrator/sample_weight", + "name": "sample_weight", + "qname": "sklearn.calibration._fit_calibrator.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, then samples are equally weighted." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (n_samples,)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit calibrator(s) and return a `_CalibratedClassifier`\ninstance.\n\n`n_classes` (i.e. `len(clf.classes_)`) calibrators are fitted.\nHowever, if `n_classes` equals 2, one calibrator is fitted.", + "docstring": "Fit calibrator(s) and return a `_CalibratedClassifier`\ninstance.\n\n`n_classes` (i.e. `len(clf.classes_)`) calibrators are fitted.\nHowever, if `n_classes` equals 2, one calibrator is fitted.\n\nParameters\n----------\nclf : estimator instance\n Fitted classifier.\n\npredictions : array-like, shape (n_samples, n_classes) or (n_samples, 1) when binary.\n Raw predictions returned by the un-calibrated base classifier.\n\ny : array-like, shape (n_samples,)\n The targets.\n\nclasses : ndarray, shape (n_classes,)\n All the prediction classes.\n\nmethod : {'sigmoid', 'isotonic'}\n The method to use for calibration.\n\nsample_weight : ndarray, shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\nReturns\n-------\npipeline : _CalibratedClassifier instance", + "code": "def _fit_calibrator(clf, predictions, y, classes, method, sample_weight=None):\n \"\"\"Fit calibrator(s) and return a `_CalibratedClassifier`\n instance.\n\n `n_classes` (i.e. `len(clf.classes_)`) calibrators are fitted.\n However, if `n_classes` equals 2, one calibrator is fitted.\n\n Parameters\n ----------\n clf : estimator instance\n Fitted classifier.\n\n predictions : array-like, shape (n_samples, n_classes) or (n_samples, 1) \\\n when binary.\n Raw predictions returned by the un-calibrated base classifier.\n\n y : array-like, shape (n_samples,)\n The targets.\n\n classes : ndarray, shape (n_classes,)\n All the prediction classes.\n\n method : {'sigmoid', 'isotonic'}\n The method to use for calibration.\n\n sample_weight : ndarray, shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\n Returns\n -------\n pipeline : _CalibratedClassifier instance\n \"\"\"\n Y = label_binarize(y, classes=classes)\n label_encoder = LabelEncoder().fit(classes)\n pos_class_indices = label_encoder.transform(clf.classes_)\n calibrators = []\n for class_idx, this_pred in zip(pos_class_indices, predictions.T):\n if method == 'isotonic':\n calibrator = IsotonicRegression(out_of_bounds='clip')\n elif method == 'sigmoid':\n calibrator = _SigmoidCalibration()\n else:\n raise ValueError(\"'method' should be one of: 'sigmoid' or \"\n f\"'isotonic'. Got {method}.\")\n calibrator.fit(this_pred, Y[:, class_idx], sample_weight)\n calibrators.append(calibrator)\n\n pipeline = _CalibratedClassifier(\n clf, calibrators, method=method, classes=classes\n )\n return pipeline" + }, + { + "id": "scikit-learn/sklearn.calibration/_fit_classifier_calibrator_pair", + "name": "_fit_classifier_calibrator_pair", + "qname": "sklearn.calibration._fit_classifier_calibrator_pair", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.calibration/_fit_classifier_calibrator_pair/estimator", + "name": "estimator", + "qname": "sklearn.calibration._fit_classifier_calibrator_pair.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator instance", + "default_value": "", + "description": "Cloned base estimator." + }, + "type": { + "kind": "NamedType", + "name": "estimator instance" + } + }, + { + "id": "scikit-learn/sklearn.calibration/_fit_classifier_calibrator_pair/X", + "name": "X", + "qname": "sklearn.calibration._fit_classifier_calibrator_pair.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples, n_features)", + "default_value": "", + "description": "Sample data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.calibration/_fit_classifier_calibrator_pair/y", + "name": "y", + "qname": "sklearn.calibration._fit_classifier_calibrator_pair.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples,)", + "default_value": "", + "description": "Targets." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.calibration/_fit_classifier_calibrator_pair/train", + "name": "train", + "qname": "sklearn.calibration._fit_classifier_calibrator_pair.train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (n_train_indicies,)", + "default_value": "", + "description": "Indices of the training subset." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (n_train_indicies,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.calibration/_fit_classifier_calibrator_pair/test", + "name": "test", + "qname": "sklearn.calibration._fit_classifier_calibrator_pair.test", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (n_test_indicies,)", + "default_value": "", + "description": "Indices of the testing subset." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (n_test_indicies,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.calibration/_fit_classifier_calibrator_pair/supports_sw", + "name": "supports_sw", + "qname": "sklearn.calibration._fit_classifier_calibrator_pair.supports_sw", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "Whether or not the `estimator` supports sample weights." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.calibration/_fit_classifier_calibrator_pair/method", + "name": "method", + "qname": "sklearn.calibration._fit_classifier_calibrator_pair.method", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'sigmoid', 'isotonic'}", + "default_value": "", + "description": "Method to use for calibration." + }, + "type": { + "kind": "EnumType", + "values": ["isotonic", "sigmoid"] + } + }, + { + "id": "scikit-learn/sklearn.calibration/_fit_classifier_calibrator_pair/classes", + "name": "classes", + "qname": "sklearn.calibration._fit_classifier_calibrator_pair.classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (n_classes,)", + "default_value": "", + "description": "The target classes." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (n_classes,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.calibration/_fit_classifier_calibrator_pair/sample_weight", + "name": "sample_weight", + "qname": "sklearn.calibration._fit_classifier_calibrator_pair.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "None", + "description": "Sample weights for `X`." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit a classifier/calibration pair on a given train/test split.\n\nFit the classifier on the train set, compute its predictions on the test\nset and use the predictions as input to fit the calibrator along with the\ntest labels.", + "docstring": "Fit a classifier/calibration pair on a given train/test split.\n\nFit the classifier on the train set, compute its predictions on the test\nset and use the predictions as input to fit the calibrator along with the\ntest labels.\n\nParameters\n----------\nestimator : estimator instance\n Cloned base estimator.\n\nX : array-like, shape (n_samples, n_features)\n Sample data.\n\ny : array-like, shape (n_samples,)\n Targets.\n\ntrain : ndarray, shape (n_train_indicies,)\n Indices of the training subset.\n\ntest : ndarray, shape (n_test_indicies,)\n Indices of the testing subset.\n\nsupports_sw : bool\n Whether or not the `estimator` supports sample weights.\n\nmethod : {'sigmoid', 'isotonic'}\n Method to use for calibration.\n\nclasses : ndarray, shape (n_classes,)\n The target classes.\n\nsample_weight : array-like, default=None\n Sample weights for `X`.\n\nReturns\n-------\ncalibrated_classifier : _CalibratedClassifier instance", + "code": "def _fit_classifier_calibrator_pair(estimator, X, y, train, test, supports_sw,\n method, classes, sample_weight=None):\n \"\"\"Fit a classifier/calibration pair on a given train/test split.\n\n Fit the classifier on the train set, compute its predictions on the test\n set and use the predictions as input to fit the calibrator along with the\n test labels.\n\n Parameters\n ----------\n estimator : estimator instance\n Cloned base estimator.\n\n X : array-like, shape (n_samples, n_features)\n Sample data.\n\n y : array-like, shape (n_samples,)\n Targets.\n\n train : ndarray, shape (n_train_indicies,)\n Indices of the training subset.\n\n test : ndarray, shape (n_test_indicies,)\n Indices of the testing subset.\n\n supports_sw : bool\n Whether or not the `estimator` supports sample weights.\n\n method : {'sigmoid', 'isotonic'}\n Method to use for calibration.\n\n classes : ndarray, shape (n_classes,)\n The target classes.\n\n sample_weight : array-like, default=None\n Sample weights for `X`.\n\n Returns\n -------\n calibrated_classifier : _CalibratedClassifier instance\n \"\"\"\n if sample_weight is not None and supports_sw:\n estimator.fit(X[train], y[train],\n sample_weight=sample_weight[train])\n else:\n estimator.fit(X[train], y[train])\n\n n_classes = len(classes)\n pred_method = _get_prediction_method(estimator)\n predictions = _compute_predictions(pred_method, X[test], n_classes)\n\n sw = None if sample_weight is None else sample_weight[test]\n calibrated_classifier = _fit_calibrator(\n estimator, predictions, y[test], classes, method, sample_weight=sw\n )\n return calibrated_classifier" + }, + { + "id": "scikit-learn/sklearn.calibration/_get_prediction_method", + "name": "_get_prediction_method", + "qname": "sklearn.calibration._get_prediction_method", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.calibration/_get_prediction_method/clf", + "name": "clf", + "qname": "sklearn.calibration._get_prediction_method.clf", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Estimator instance", + "default_value": "", + "description": "Fitted classifier to obtain the prediction method from." + }, + "type": { + "kind": "NamedType", + "name": "Estimator instance" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return prediction method.\n\n`decision_function` method of `clf` returned, if it\nexists, otherwise `predict_proba` method returned.", + "docstring": "Return prediction method.\n\n`decision_function` method of `clf` returned, if it\nexists, otherwise `predict_proba` method returned.\n\nParameters\n----------\nclf : Estimator instance\n Fitted classifier to obtain the prediction method from.\n\nReturns\n-------\nprediction_method : callable\n The prediction method.", + "code": "def _get_prediction_method(clf):\n \"\"\"Return prediction method.\n\n `decision_function` method of `clf` returned, if it\n exists, otherwise `predict_proba` method returned.\n\n Parameters\n ----------\n clf : Estimator instance\n Fitted classifier to obtain the prediction method from.\n\n Returns\n -------\n prediction_method : callable\n The prediction method.\n \"\"\"\n if hasattr(clf, 'decision_function'):\n method = getattr(clf, 'decision_function')\n elif hasattr(clf, 'predict_proba'):\n method = getattr(clf, 'predict_proba')\n else:\n raise RuntimeError(\"'base_estimator' has no 'decision_function' or \"\n \"'predict_proba' method.\")\n return method" + }, + { + "id": "scikit-learn/sklearn.calibration/_sigmoid_calibration", + "name": "_sigmoid_calibration", + "qname": "sklearn.calibration._sigmoid_calibration", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.calibration/_sigmoid_calibration/predictions", + "name": "predictions", + "qname": "sklearn.calibration._sigmoid_calibration.predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "The decision function or predict proba for the samples." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.calibration/_sigmoid_calibration/y", + "name": "y", + "qname": "sklearn.calibration._sigmoid_calibration.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "The targets." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.calibration/_sigmoid_calibration/sample_weight", + "name": "sample_weight", + "qname": "sklearn.calibration._sigmoid_calibration.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, then samples are equally weighted." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Probability Calibration with sigmoid method (Platt 2000)", + "docstring": "Probability Calibration with sigmoid method (Platt 2000)\n\nParameters\n----------\npredictions : ndarray of shape (n_samples,)\n The decision function or predict proba for the samples.\n\ny : ndarray of shape (n_samples,)\n The targets.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\nReturns\n-------\na : float\n The slope.\n\nb : float\n The intercept.\n\nReferences\n----------\nPlatt, \"Probabilistic Outputs for Support Vector Machines\"", + "code": "def _sigmoid_calibration(predictions, y, sample_weight=None):\n \"\"\"Probability Calibration with sigmoid method (Platt 2000)\n\n Parameters\n ----------\n predictions : ndarray of shape (n_samples,)\n The decision function or predict proba for the samples.\n\n y : ndarray of shape (n_samples,)\n The targets.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\n Returns\n -------\n a : float\n The slope.\n\n b : float\n The intercept.\n\n References\n ----------\n Platt, \"Probabilistic Outputs for Support Vector Machines\"\n \"\"\"\n predictions = column_or_1d(predictions)\n y = column_or_1d(y)\n\n F = predictions # F follows Platt's notations\n\n # Bayesian priors (see Platt end of section 2.2)\n prior0 = float(np.sum(y <= 0))\n prior1 = y.shape[0] - prior0\n T = np.zeros(y.shape)\n T[y > 0] = (prior1 + 1.) / (prior1 + 2.)\n T[y <= 0] = 1. / (prior0 + 2.)\n T1 = 1. - T\n\n def objective(AB):\n # From Platt (beginning of Section 2.2)\n P = expit(-(AB[0] * F + AB[1]))\n loss = -(xlogy(T, P) + xlogy(T1, 1. - P))\n if sample_weight is not None:\n return (sample_weight * loss).sum()\n else:\n return loss.sum()\n\n def grad(AB):\n # gradient of the objective function\n P = expit(-(AB[0] * F + AB[1]))\n TEP_minus_T1P = T - P\n if sample_weight is not None:\n TEP_minus_T1P *= sample_weight\n dA = np.dot(TEP_minus_T1P, F)\n dB = np.sum(TEP_minus_T1P)\n return np.array([dA, dB])\n\n AB0 = np.array([0., log((prior0 + 1.) / (prior1 + 1.))])\n AB_ = fmin_bfgs(objective, AB0, fprime=grad, disp=False)\n return AB_[0], AB_[1]" + }, + { + "id": "scikit-learn/sklearn.calibration/calibration_curve", + "name": "calibration_curve", + "qname": "sklearn.calibration.calibration_curve", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.calibration/calibration_curve/y_true", + "name": "y_true", + "qname": "sklearn.calibration.calibration_curve.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "True targets." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.calibration/calibration_curve/y_prob", + "name": "y_prob", + "qname": "sklearn.calibration.calibration_curve.y_prob", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Probabilities of the positive class." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.calibration/calibration_curve/normalize", + "name": "normalize", + "qname": "sklearn.calibration.calibration_curve.normalize", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether y_prob needs to be normalized into the [0, 1] interval, i.e.\nis not a proper probability. If True, the smallest value in y_prob\nis linearly mapped onto 0 and the largest one onto 1." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.calibration/calibration_curve/n_bins", + "name": "n_bins", + "qname": "sklearn.calibration.calibration_curve.n_bins", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "5", + "description": "Number of bins to discretize the [0, 1] interval. A bigger number\nrequires more data. Bins with no samples (i.e. without\ncorresponding values in `y_prob`) will not be returned, thus the\nreturned arrays may have less than `n_bins` values." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.calibration/calibration_curve/strategy", + "name": "strategy", + "qname": "sklearn.calibration.calibration_curve.strategy", + "default_value": "'uniform'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'uniform', 'quantile'}", + "default_value": "'uniform'", + "description": "Strategy used to define the widths of the bins.\n\nuniform\n The bins have identical widths.\nquantile\n The bins have the same number of samples and depend on `y_prob`." + }, + "type": { + "kind": "EnumType", + "values": ["quantile", "uniform"] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Compute true and predicted probabilities for a calibration curve.\n\nThe method assumes the inputs come from a binary classifier, and\ndiscretize the [0, 1] interval into bins.\n\nCalibration curves may also be referred to as reliability diagrams.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute true and predicted probabilities for a calibration curve.\n\nThe method assumes the inputs come from a binary classifier, and\ndiscretize the [0, 1] interval into bins.\n\nCalibration curves may also be referred to as reliability diagrams.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n True targets.\n\ny_prob : array-like of shape (n_samples,)\n Probabilities of the positive class.\n\nnormalize : bool, default=False\n Whether y_prob needs to be normalized into the [0, 1] interval, i.e.\n is not a proper probability. If True, the smallest value in y_prob\n is linearly mapped onto 0 and the largest one onto 1.\n\nn_bins : int, default=5\n Number of bins to discretize the [0, 1] interval. A bigger number\n requires more data. Bins with no samples (i.e. without\n corresponding values in `y_prob`) will not be returned, thus the\n returned arrays may have less than `n_bins` values.\n\nstrategy : {'uniform', 'quantile'}, default='uniform'\n Strategy used to define the widths of the bins.\n\n uniform\n The bins have identical widths.\n quantile\n The bins have the same number of samples and depend on `y_prob`.\n\nReturns\n-------\nprob_true : ndarray of shape (n_bins,) or smaller\n The proportion of samples whose class is the positive class, in each\n bin (fraction of positives).\n\nprob_pred : ndarray of shape (n_bins,) or smaller\n The mean predicted probability in each bin.\n\nReferences\n----------\nAlexandru Niculescu-Mizil and Rich Caruana (2005) Predicting Good\nProbabilities With Supervised Learning, in Proceedings of the 22nd\nInternational Conference on Machine Learning (ICML).\nSee section 4 (Qualitative Analysis of Predictions).\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.calibration import calibration_curve\n>>> y_true = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1])\n>>> y_pred = np.array([0.1, 0.2, 0.3, 0.4, 0.65, 0.7, 0.8, 0.9, 1.])\n>>> prob_true, prob_pred = calibration_curve(y_true, y_pred, n_bins=3)\n>>> prob_true\narray([0. , 0.5, 1. ])\n>>> prob_pred\narray([0.2 , 0.525, 0.85 ])", + "code": "@_deprecate_positional_args\ndef calibration_curve(y_true, y_prob, *, normalize=False, n_bins=5,\n strategy='uniform'):\n \"\"\"Compute true and predicted probabilities for a calibration curve.\n\n The method assumes the inputs come from a binary classifier, and\n discretize the [0, 1] interval into bins.\n\n Calibration curves may also be referred to as reliability diagrams.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : array-like of shape (n_samples,)\n True targets.\n\n y_prob : array-like of shape (n_samples,)\n Probabilities of the positive class.\n\n normalize : bool, default=False\n Whether y_prob needs to be normalized into the [0, 1] interval, i.e.\n is not a proper probability. If True, the smallest value in y_prob\n is linearly mapped onto 0 and the largest one onto 1.\n\n n_bins : int, default=5\n Number of bins to discretize the [0, 1] interval. A bigger number\n requires more data. Bins with no samples (i.e. without\n corresponding values in `y_prob`) will not be returned, thus the\n returned arrays may have less than `n_bins` values.\n\n strategy : {'uniform', 'quantile'}, default='uniform'\n Strategy used to define the widths of the bins.\n\n uniform\n The bins have identical widths.\n quantile\n The bins have the same number of samples and depend on `y_prob`.\n\n Returns\n -------\n prob_true : ndarray of shape (n_bins,) or smaller\n The proportion of samples whose class is the positive class, in each\n bin (fraction of positives).\n\n prob_pred : ndarray of shape (n_bins,) or smaller\n The mean predicted probability in each bin.\n\n References\n ----------\n Alexandru Niculescu-Mizil and Rich Caruana (2005) Predicting Good\n Probabilities With Supervised Learning, in Proceedings of the 22nd\n International Conference on Machine Learning (ICML).\n See section 4 (Qualitative Analysis of Predictions).\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.calibration import calibration_curve\n >>> y_true = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1])\n >>> y_pred = np.array([0.1, 0.2, 0.3, 0.4, 0.65, 0.7, 0.8, 0.9, 1.])\n >>> prob_true, prob_pred = calibration_curve(y_true, y_pred, n_bins=3)\n >>> prob_true\n array([0. , 0.5, 1. ])\n >>> prob_pred\n array([0.2 , 0.525, 0.85 ])\n \"\"\"\n y_true = column_or_1d(y_true)\n y_prob = column_or_1d(y_prob)\n check_consistent_length(y_true, y_prob)\n\n if normalize: # Normalize predicted values into interval [0, 1]\n y_prob = (y_prob - y_prob.min()) / (y_prob.max() - y_prob.min())\n elif y_prob.min() < 0 or y_prob.max() > 1:\n raise ValueError(\"y_prob has values outside [0, 1] and normalize is \"\n \"set to False.\")\n\n labels = np.unique(y_true)\n if len(labels) > 2:\n raise ValueError(\"Only binary classification is supported. \"\n \"Provided labels %s.\" % labels)\n y_true = label_binarize(y_true, classes=labels)[:, 0]\n\n if strategy == 'quantile': # Determine bin edges by distribution of data\n quantiles = np.linspace(0, 1, n_bins + 1)\n bins = np.percentile(y_prob, quantiles * 100)\n bins[-1] = bins[-1] + 1e-8\n elif strategy == 'uniform':\n bins = np.linspace(0., 1. + 1e-8, n_bins + 1)\n else:\n raise ValueError(\"Invalid entry to 'strategy' input. Strategy \"\n \"must be either 'quantile' or 'uniform'.\")\n\n binids = np.digitize(y_prob, bins) - 1\n\n bin_sums = np.bincount(binids, weights=y_prob, minlength=len(bins))\n bin_true = np.bincount(binids, weights=y_true, minlength=len(bins))\n bin_total = np.bincount(binids, minlength=len(bins))\n\n nonzero = bin_total != 0\n prob_true = bin_true[nonzero] / bin_total[nonzero]\n prob_pred = bin_sums[nonzero] / bin_total[nonzero]\n\n return prob_true, prob_pred" + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/__init__", + "name": "__init__", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/__init__/self", + "name": "self", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/__init__/damping", + "name": "damping", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation.__init__.damping", + "default_value": "0.5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.5", + "description": "Damping factor (between 0.5 and 1) is the extent to\nwhich the current value is maintained relative to\nincoming values (weighted 1 - damping). This in order\nto avoid numerical oscillations when updating these\nvalues (messages)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation.__init__.max_iter", + "default_value": "200", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "200", + "description": "Maximum number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/__init__/convergence_iter", + "name": "convergence_iter", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation.__init__.convergence_iter", + "default_value": "15", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "15", + "description": "Number of iterations with no change in the number\nof estimated clusters that stops the convergence." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/__init__/copy", + "name": "copy", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation.__init__.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Make a copy of input data." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/__init__/preference", + "name": "preference", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation.__init__.preference", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or float", + "default_value": "None", + "description": "Preferences for each point - points with larger values of\npreferences are more likely to be chosen as exemplars. The number\nof exemplars, ie of clusters, is influenced by the input\npreferences value. If the preferences are not passed as arguments,\nthey will be set to the median of the input similarities." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/__init__/affinity", + "name": "affinity", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation.__init__.affinity", + "default_value": "'euclidean'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'euclidean', 'precomputed'}", + "default_value": "'euclidean'", + "description": "Which affinity to use. At the moment 'precomputed' and\n``euclidean`` are supported. 'euclidean' uses the\nnegative squared euclidean distance between points." + }, + "type": { + "kind": "EnumType", + "values": ["precomputed", "euclidean"] + } + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/__init__/verbose", + "name": "verbose", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to be verbose." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/__init__/random_state", + "name": "random_state", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation.__init__.random_state", + "default_value": "'warn'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "0", + "description": "Pseudo-random number generator to control the starting state.\nUse an int for reproducible results across function calls.\nSee the :term:`Glossary `.\n\n.. versionadded:: 0.23\n this parameter was previously hardcoded as 0." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform Affinity Propagation Clustering of data.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, damping=.5, max_iter=200, convergence_iter=15,\n copy=True, preference=None, affinity='euclidean',\n verbose=False, random_state='warn'):\n\n self.damping = damping\n self.max_iter = max_iter\n self.convergence_iter = convergence_iter\n self.copy = copy\n self.verbose = verbose\n self.preference = preference\n self.affinity = affinity\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/_more_tags", + "name": "_more_tags", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/_more_tags/self", + "name": "self", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'pairwise': self.affinity == 'precomputed'}" + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/_pairwise@getter", + "name": "_pairwise", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation._pairwise", + "decorators": [ + "deprecated('Attribute _pairwise was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/_pairwise/self", + "name": "self", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation._pairwise.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n return self.affinity == \"precomputed\"" + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/fit", + "name": "fit", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/fit/self", + "name": "self", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/fit/X", + "name": "X", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features), or array-like of shape (n_samples, n_samples)", + "default_value": "", + "description": "Training instances to cluster, or similarities / affinities between\ninstances if ``affinity='precomputed'``. If a sparse feature matrix\nis provided, it will be converted into a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_samples)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/fit/y", + "name": "y", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present here for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the clustering from features, or affinity matrix.", + "docstring": "Fit the clustering from features, or affinity matrix.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), or array-like of shape (n_samples, n_samples)\n Training instances to cluster, or similarities / affinities between\n instances if ``affinity='precomputed'``. If a sparse feature matrix\n is provided, it will be converted into a sparse ``csr_matrix``.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nself", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the clustering from features, or affinity matrix.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features), or \\\n array-like of shape (n_samples, n_samples)\n Training instances to cluster, or similarities / affinities between\n instances if ``affinity='precomputed'``. If a sparse feature matrix\n is provided, it will be converted into a sparse ``csr_matrix``.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n Returns\n -------\n self\n\n \"\"\"\n if self.affinity == \"precomputed\":\n accept_sparse = False\n else:\n accept_sparse = 'csr'\n X = self._validate_data(X, accept_sparse=accept_sparse)\n if self.affinity == \"precomputed\":\n self.affinity_matrix_ = X\n elif self.affinity == \"euclidean\":\n self.affinity_matrix_ = -euclidean_distances(X, squared=True)\n else:\n raise ValueError(\"Affinity must be 'precomputed' or \"\n \"'euclidean'. Got %s instead\"\n % str(self.affinity))\n\n self.cluster_centers_indices_, self.labels_, self.n_iter_ = \\\n affinity_propagation(\n self.affinity_matrix_, preference=self.preference,\n max_iter=self.max_iter,\n convergence_iter=self.convergence_iter, damping=self.damping,\n copy=self.copy, verbose=self.verbose, return_n_iter=True,\n random_state=self.random_state)\n\n if self.affinity != \"precomputed\":\n self.cluster_centers_ = X[self.cluster_centers_indices_].copy()\n\n return self" + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/fit_predict", + "name": "fit_predict", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation.fit_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/fit_predict/self", + "name": "self", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation.fit_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/fit_predict/X", + "name": "X", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation.fit_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features), or array-like of shape (n_samples, n_samples)", + "default_value": "", + "description": "Training instances to cluster, or similarities / affinities between\ninstances if ``affinity='precomputed'``. If a sparse feature matrix\nis provided, it will be converted into a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_samples)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/fit_predict/y", + "name": "y", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation.fit_predict.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present here for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the clustering from features or affinity matrix, and return\ncluster labels.", + "docstring": "Fit the clustering from features or affinity matrix, and return\ncluster labels.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), or array-like of shape (n_samples, n_samples)\n Training instances to cluster, or similarities / affinities between\n instances if ``affinity='precomputed'``. If a sparse feature matrix\n is provided, it will be converted into a sparse ``csr_matrix``.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Cluster labels.", + "code": " def fit_predict(self, X, y=None):\n \"\"\"Fit the clustering from features or affinity matrix, and return\n cluster labels.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features), or \\\n array-like of shape (n_samples, n_samples)\n Training instances to cluster, or similarities / affinities between\n instances if ``affinity='precomputed'``. If a sparse feature matrix\n is provided, it will be converted into a sparse ``csr_matrix``.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n Returns\n -------\n labels : ndarray of shape (n_samples,)\n Cluster labels.\n \"\"\"\n return super().fit_predict(X, y)" + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/predict", + "name": "predict", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/predict/self", + "name": "self", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/AffinityPropagation/predict/X", + "name": "X", + "qname": "sklearn.cluster._affinity_propagation.AffinityPropagation.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "New data to predict. If a sparse matrix is provided, it will be\nconverted into a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict the closest cluster each sample in X belongs to.", + "docstring": "Predict the closest cluster each sample in X belongs to.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to predict. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Cluster labels.", + "code": " def predict(self, X):\n \"\"\"Predict the closest cluster each sample in X belongs to.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to predict. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\n Returns\n -------\n labels : ndarray of shape (n_samples,)\n Cluster labels.\n \"\"\"\n check_is_fitted(self)\n X = self._validate_data(X, reset=False)\n if not hasattr(self, \"cluster_centers_\"):\n raise ValueError(\"Predict method is not supported when \"\n \"affinity='precomputed'.\")\n\n if self.cluster_centers_.shape[0] > 0:\n with config_context(assume_finite=True):\n return pairwise_distances_argmin(X, self.cluster_centers_)\n else:\n warnings.warn(\"This model does not have any cluster centers \"\n \"because affinity propagation did not converge. \"\n \"Labeling every sample as '-1'.\", ConvergenceWarning)\n return np.array([-1] * X.shape[0])" + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/_equal_similarities_and_preferences", + "name": "_equal_similarities_and_preferences", + "qname": "sklearn.cluster._affinity_propagation._equal_similarities_and_preferences", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/_equal_similarities_and_preferences/S", + "name": "S", + "qname": "sklearn.cluster._affinity_propagation._equal_similarities_and_preferences.S", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/_equal_similarities_and_preferences/preference", + "name": "preference", + "qname": "sklearn.cluster._affinity_propagation._equal_similarities_and_preferences.preference", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _equal_similarities_and_preferences(S, preference):\n def all_equal_preferences():\n return np.all(preference == preference.flat[0])\n\n def all_equal_similarities():\n # Create mask to ignore diagonal of S\n mask = np.ones(S.shape, dtype=bool)\n np.fill_diagonal(mask, 0)\n\n return np.all(S[mask].flat == S[mask].flat[0])\n\n return all_equal_preferences() and all_equal_similarities()" + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/affinity_propagation", + "name": "affinity_propagation", + "qname": "sklearn.cluster._affinity_propagation.affinity_propagation", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/affinity_propagation/S", + "name": "S", + "qname": "sklearn.cluster._affinity_propagation.affinity_propagation.S", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_samples)", + "default_value": "", + "description": "Matrix of similarities between points." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/affinity_propagation/preference", + "name": "preference", + "qname": "sklearn.cluster._affinity_propagation.affinity_propagation.preference", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or float", + "default_value": "None", + "description": "Preferences for each point - points with larger values of\npreferences are more likely to be chosen as exemplars. The number of\nexemplars, i.e. of clusters, is influenced by the input preferences\nvalue. If the preferences are not passed as arguments, they will be\nset to the median of the input similarities (resulting in a moderate\nnumber of clusters). For a smaller amount of clusters, this can be set\nto the minimum value of the similarities." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/affinity_propagation/convergence_iter", + "name": "convergence_iter", + "qname": "sklearn.cluster._affinity_propagation.affinity_propagation.convergence_iter", + "default_value": "15", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "15", + "description": "Number of iterations with no change in the number\nof estimated clusters that stops the convergence." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/affinity_propagation/max_iter", + "name": "max_iter", + "qname": "sklearn.cluster._affinity_propagation.affinity_propagation.max_iter", + "default_value": "200", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "200", + "description": "Maximum number of iterations" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/affinity_propagation/damping", + "name": "damping", + "qname": "sklearn.cluster._affinity_propagation.affinity_propagation.damping", + "default_value": "0.5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.5", + "description": "Damping factor between 0.5 and 1." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/affinity_propagation/copy", + "name": "copy", + "qname": "sklearn.cluster._affinity_propagation.affinity_propagation.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If copy is False, the affinity matrix is modified inplace by the\nalgorithm, for memory efficiency." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/affinity_propagation/verbose", + "name": "verbose", + "qname": "sklearn.cluster._affinity_propagation.affinity_propagation.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "The verbosity level." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/affinity_propagation/return_n_iter", + "name": "return_n_iter", + "qname": "sklearn.cluster._affinity_propagation.affinity_propagation.return_n_iter", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether or not to return the number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cluster._affinity_propagation/affinity_propagation/random_state", + "name": "random_state", + "qname": "sklearn.cluster._affinity_propagation.affinity_propagation.random_state", + "default_value": "'warn'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "0", + "description": "Pseudo-random number generator to control the starting state.\nUse an int for reproducible results across function calls.\nSee the :term:`Glossary `.\n\n.. versionadded:: 0.23\n this parameter was previously hardcoded as 0." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform Affinity Propagation Clustering of data.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Perform Affinity Propagation Clustering of data.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nS : array-like of shape (n_samples, n_samples)\n Matrix of similarities between points.\n\npreference : array-like of shape (n_samples,) or float, default=None\n Preferences for each point - points with larger values of\n preferences are more likely to be chosen as exemplars. The number of\n exemplars, i.e. of clusters, is influenced by the input preferences\n value. If the preferences are not passed as arguments, they will be\n set to the median of the input similarities (resulting in a moderate\n number of clusters). For a smaller amount of clusters, this can be set\n to the minimum value of the similarities.\n\nconvergence_iter : int, default=15\n Number of iterations with no change in the number\n of estimated clusters that stops the convergence.\n\nmax_iter : int, default=200\n Maximum number of iterations\n\ndamping : float, default=0.5\n Damping factor between 0.5 and 1.\n\ncopy : bool, default=True\n If copy is False, the affinity matrix is modified inplace by the\n algorithm, for memory efficiency.\n\nverbose : bool, default=False\n The verbosity level.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\nrandom_state : int, RandomState instance or None, default=0\n Pseudo-random number generator to control the starting state.\n Use an int for reproducible results across function calls.\n See the :term:`Glossary `.\n\n .. versionadded:: 0.23\n this parameter was previously hardcoded as 0.\n\nReturns\n-------\n\ncluster_centers_indices : ndarray of shape (n_clusters,)\n Index of clusters centers.\n\nlabels : ndarray of shape (n_samples,)\n Cluster labels for each point.\n\nn_iter : int\n Number of iterations run. Returned only if `return_n_iter` is\n set to True.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_affinity_propagation.py\n`.\n\nWhen the algorithm does not converge, it returns an empty array as\n``cluster_center_indices`` and ``-1`` as label for each training sample.\n\nWhen all training samples have equal similarities and equal preferences,\nthe assignment of cluster centers and labels depends on the preference.\nIf the preference is smaller than the similarities, a single cluster center\nand label ``0`` for every sample will be returned. Otherwise, every\ntraining sample becomes its own cluster center and is assigned a unique\nlabel.\n\nReferences\n----------\nBrendan J. Frey and Delbert Dueck, \"Clustering by Passing Messages\nBetween Data Points\", Science Feb. 2007", + "code": "@_deprecate_positional_args\ndef affinity_propagation(S, *, preference=None, convergence_iter=15,\n max_iter=200, damping=0.5, copy=True, verbose=False,\n return_n_iter=False, random_state='warn'):\n \"\"\"Perform Affinity Propagation Clustering of data.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n\n S : array-like of shape (n_samples, n_samples)\n Matrix of similarities between points.\n\n preference : array-like of shape (n_samples,) or float, default=None\n Preferences for each point - points with larger values of\n preferences are more likely to be chosen as exemplars. The number of\n exemplars, i.e. of clusters, is influenced by the input preferences\n value. If the preferences are not passed as arguments, they will be\n set to the median of the input similarities (resulting in a moderate\n number of clusters). For a smaller amount of clusters, this can be set\n to the minimum value of the similarities.\n\n convergence_iter : int, default=15\n Number of iterations with no change in the number\n of estimated clusters that stops the convergence.\n\n max_iter : int, default=200\n Maximum number of iterations\n\n damping : float, default=0.5\n Damping factor between 0.5 and 1.\n\n copy : bool, default=True\n If copy is False, the affinity matrix is modified inplace by the\n algorithm, for memory efficiency.\n\n verbose : bool, default=False\n The verbosity level.\n\n return_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\n random_state : int, RandomState instance or None, default=0\n Pseudo-random number generator to control the starting state.\n Use an int for reproducible results across function calls.\n See the :term:`Glossary `.\n\n .. versionadded:: 0.23\n this parameter was previously hardcoded as 0.\n\n Returns\n -------\n\n cluster_centers_indices : ndarray of shape (n_clusters,)\n Index of clusters centers.\n\n labels : ndarray of shape (n_samples,)\n Cluster labels for each point.\n\n n_iter : int\n Number of iterations run. Returned only if `return_n_iter` is\n set to True.\n\n Notes\n -----\n For an example, see :ref:`examples/cluster/plot_affinity_propagation.py\n `.\n\n When the algorithm does not converge, it returns an empty array as\n ``cluster_center_indices`` and ``-1`` as label for each training sample.\n\n When all training samples have equal similarities and equal preferences,\n the assignment of cluster centers and labels depends on the preference.\n If the preference is smaller than the similarities, a single cluster center\n and label ``0`` for every sample will be returned. Otherwise, every\n training sample becomes its own cluster center and is assigned a unique\n label.\n\n References\n ----------\n Brendan J. Frey and Delbert Dueck, \"Clustering by Passing Messages\n Between Data Points\", Science Feb. 2007\n \"\"\"\n S = as_float_array(S, copy=copy)\n n_samples = S.shape[0]\n\n if S.shape[0] != S.shape[1]:\n raise ValueError(\"S must be a square array (shape=%s)\" % repr(S.shape))\n\n if preference is None:\n preference = np.median(S)\n if damping < 0.5 or damping >= 1:\n raise ValueError('damping must be >= 0.5 and < 1')\n\n preference = np.array(preference)\n\n if (n_samples == 1 or\n _equal_similarities_and_preferences(S, preference)):\n # It makes no sense to run the algorithm in this case, so return 1 or\n # n_samples clusters, depending on preferences\n warnings.warn(\"All samples have mutually equal similarities. \"\n \"Returning arbitrary cluster center(s).\")\n if preference.flat[0] >= S.flat[n_samples - 1]:\n return ((np.arange(n_samples), np.arange(n_samples), 0)\n if return_n_iter\n else (np.arange(n_samples), np.arange(n_samples)))\n else:\n return ((np.array([0]), np.array([0] * n_samples), 0)\n if return_n_iter\n else (np.array([0]), np.array([0] * n_samples)))\n\n if random_state == 'warn':\n warnings.warn(\n \"'random_state' has been introduced in 0.23. It will be set to \"\n \"None starting from 1.0 (renaming of 0.25) which means that \"\n \"results will differ at every function call. Set 'random_state' \"\n \"to None to silence this warning, or to 0 to keep the behavior of \"\n \"versions <0.23.\",\n FutureWarning\n )\n random_state = 0\n random_state = check_random_state(random_state)\n\n # Place preference on the diagonal of S\n S.flat[::(n_samples + 1)] = preference\n\n A = np.zeros((n_samples, n_samples))\n R = np.zeros((n_samples, n_samples)) # Initialize messages\n # Intermediate results\n tmp = np.zeros((n_samples, n_samples))\n\n # Remove degeneracies\n S += ((np.finfo(S.dtype).eps * S + np.finfo(S.dtype).tiny * 100) *\n random_state.randn(n_samples, n_samples))\n\n # Execute parallel affinity propagation updates\n e = np.zeros((n_samples, convergence_iter))\n\n ind = np.arange(n_samples)\n\n for it in range(max_iter):\n # tmp = A + S; compute responsibilities\n np.add(A, S, tmp)\n I = np.argmax(tmp, axis=1)\n Y = tmp[ind, I] # np.max(A + S, axis=1)\n tmp[ind, I] = -np.inf\n Y2 = np.max(tmp, axis=1)\n\n # tmp = Rnew\n np.subtract(S, Y[:, None], tmp)\n tmp[ind, I] = S[ind, I] - Y2\n\n # Damping\n tmp *= 1 - damping\n R *= damping\n R += tmp\n\n # tmp = Rp; compute availabilities\n np.maximum(R, 0, tmp)\n tmp.flat[::n_samples + 1] = R.flat[::n_samples + 1]\n\n # tmp = -Anew\n tmp -= np.sum(tmp, axis=0)\n dA = np.diag(tmp).copy()\n tmp.clip(0, np.inf, tmp)\n tmp.flat[::n_samples + 1] = dA\n\n # Damping\n tmp *= 1 - damping\n A *= damping\n A -= tmp\n\n # Check for convergence\n E = (np.diag(A) + np.diag(R)) > 0\n e[:, it % convergence_iter] = E\n K = np.sum(E, axis=0)\n\n if it >= convergence_iter:\n se = np.sum(e, axis=1)\n unconverged = (np.sum((se == convergence_iter) + (se == 0))\n != n_samples)\n if (not unconverged and (K > 0)) or (it == max_iter):\n never_converged = False\n if verbose:\n print(\"Converged after %d iterations.\" % it)\n break\n else:\n never_converged = True\n if verbose:\n print(\"Did not converge\")\n\n I = np.flatnonzero(E)\n K = I.size # Identify exemplars\n\n if K > 0 and not never_converged:\n c = np.argmax(S[:, I], axis=1)\n c[I] = np.arange(K) # Identify clusters\n # Refine the final set of exemplars and clusters and return results\n for k in range(K):\n ii = np.where(c == k)[0]\n j = np.argmax(np.sum(S[ii[:, np.newaxis], ii], axis=0))\n I[k] = ii[j]\n\n c = np.argmax(S[:, I], axis=1)\n c[I] = np.arange(K)\n labels = I[c]\n # Reduce labels to a sorted, gapless, list\n cluster_centers_indices = np.unique(labels)\n labels = np.searchsorted(cluster_centers_indices, labels)\n else:\n warnings.warn(\"Affinity propagation did not converge, this model \"\n \"will not have any cluster centers.\", ConvergenceWarning)\n labels = np.array([-1] * n_samples)\n cluster_centers_indices = []\n\n if return_n_iter:\n return cluster_centers_indices, labels, it + 1\n else:\n return cluster_centers_indices, labels" + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/AgglomerativeClustering/__init__", + "name": "__init__", + "qname": "sklearn.cluster._agglomerative.AgglomerativeClustering.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._agglomerative/AgglomerativeClustering/__init__/self", + "name": "self", + "qname": "sklearn.cluster._agglomerative.AgglomerativeClustering.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/AgglomerativeClustering/__init__/n_clusters", + "name": "n_clusters", + "qname": "sklearn.cluster._agglomerative.AgglomerativeClustering.__init__.n_clusters", + "default_value": "2", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or None", + "default_value": "2", + "description": "The number of clusters to find. It must be ``None`` if\n``distance_threshold`` is not ``None``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/AgglomerativeClustering/__init__/affinity", + "name": "affinity", + "qname": "sklearn.cluster._agglomerative.AgglomerativeClustering.__init__.affinity", + "default_value": "'euclidean'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "'euclidean'", + "description": "Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\",\n\"manhattan\", \"cosine\", or \"precomputed\".\nIf linkage is \"ward\", only \"euclidean\" is accepted.\nIf \"precomputed\", a distance matrix (instead of a similarity matrix)\nis needed as input for the fit method." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/AgglomerativeClustering/__init__/memory", + "name": "memory", + "qname": "sklearn.cluster._agglomerative.AgglomerativeClustering.__init__.memory", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or object with the joblib.Memory interface", + "default_value": "None", + "description": "Used to cache the output of the computation of the tree.\nBy default, no caching is done. If a string is given, it is the\npath to the caching directory." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "object with the joblib.Memory interface" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/AgglomerativeClustering/__init__/connectivity", + "name": "connectivity", + "qname": "sklearn.cluster._agglomerative.AgglomerativeClustering.__init__.connectivity", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like or callable", + "default_value": "None", + "description": "Connectivity matrix. Defines for each sample the neighboring\nsamples following a given structure of the data.\nThis can be a connectivity matrix itself or a callable that transforms\nthe data into a connectivity matrix, such as derived from\nkneighbors_graph. Default is ``None``, i.e, the\nhierarchical clustering algorithm is unstructured." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/AgglomerativeClustering/__init__/compute_full_tree", + "name": "compute_full_tree", + "qname": "sklearn.cluster._agglomerative.AgglomerativeClustering.__init__.compute_full_tree", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'auto' or bool", + "default_value": "'auto'", + "description": "Stop early the construction of the tree at ``n_clusters``. This is\nuseful to decrease computation time if the number of clusters is not\nsmall compared to the number of samples. This option is useful only\nwhen specifying a connectivity matrix. Note also that when varying the\nnumber of clusters and using caching, it may be advantageous to compute\nthe full tree. It must be ``True`` if ``distance_threshold`` is not\n``None``. By default `compute_full_tree` is \"auto\", which is equivalent\nto `True` when `distance_threshold` is not `None` or that `n_clusters`\nis inferior to the maximum between 100 or `0.02 * n_samples`.\nOtherwise, \"auto\" is equivalent to `False`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "bool" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/AgglomerativeClustering/__init__/linkage", + "name": "linkage", + "qname": "sklearn.cluster._agglomerative.AgglomerativeClustering.__init__.linkage", + "default_value": "'ward'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'ward', 'complete', 'average', 'single'}", + "default_value": "'ward'", + "description": "Which linkage criterion to use. The linkage criterion determines which\ndistance to use between sets of observation. The algorithm will merge\nthe pairs of cluster that minimize this criterion.\n\n- 'ward' minimizes the variance of the clusters being merged.\n- 'average' uses the average of the distances of each observation of\n the two sets.\n- 'complete' or 'maximum' linkage uses the maximum distances between\n all observations of the two sets.\n- 'single' uses the minimum of the distances between all observations\n of the two sets.\n\n.. versionadded:: 0.20\n Added the 'single' option" + }, + "type": { + "kind": "EnumType", + "values": ["single", "ward", "average", "complete"] + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/AgglomerativeClustering/__init__/distance_threshold", + "name": "distance_threshold", + "qname": "sklearn.cluster._agglomerative.AgglomerativeClustering.__init__.distance_threshold", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "The linkage distance threshold above which, clusters will not be\nmerged. If not ``None``, ``n_clusters`` must be ``None`` and\n``compute_full_tree`` must be ``True``.\n\n.. versionadded:: 0.21" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/AgglomerativeClustering/__init__/compute_distances", + "name": "compute_distances", + "qname": "sklearn.cluster._agglomerative.AgglomerativeClustering.__init__.compute_distances", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Computes distances between clusters even if `distance_threshold` is not\nused. This can be used to make dendrogram visualization, but introduces\na computational and memory overhead.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Agglomerative Clustering\n\nRecursively merges the pair of clusters that minimally increases\na given linkage distance.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_clusters=2, *, affinity=\"euclidean\",\n memory=None,\n connectivity=None, compute_full_tree='auto',\n linkage='ward', distance_threshold=None,\n compute_distances=False):\n self.n_clusters = n_clusters\n self.distance_threshold = distance_threshold\n self.memory = memory\n self.connectivity = connectivity\n self.compute_full_tree = compute_full_tree\n self.linkage = linkage\n self.affinity = affinity\n self.compute_distances = compute_distances" + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/AgglomerativeClustering/fit", + "name": "fit", + "qname": "sklearn.cluster._agglomerative.AgglomerativeClustering.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._agglomerative/AgglomerativeClustering/fit/self", + "name": "self", + "qname": "sklearn.cluster._agglomerative.AgglomerativeClustering.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/AgglomerativeClustering/fit/X", + "name": "X", + "qname": "sklearn.cluster._agglomerative.AgglomerativeClustering.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples, n_features) or (n_samples, n_samples)", + "default_value": "", + "description": "Training instances to cluster, or distances between instances if\n``affinity='precomputed'``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features) or (n_samples, n_samples)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/AgglomerativeClustering/fit/y", + "name": "y", + "qname": "sklearn.cluster._agglomerative.AgglomerativeClustering.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present here for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the hierarchical clustering from features, or distance matrix.", + "docstring": "Fit the hierarchical clustering from features, or distance matrix.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features) or (n_samples, n_samples)\n Training instances to cluster, or distances between instances if\n ``affinity='precomputed'``.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nself", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the hierarchical clustering from features, or distance matrix.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features) or (n_samples, n_samples)\n Training instances to cluster, or distances between instances if\n ``affinity='precomputed'``.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n Returns\n -------\n self\n \"\"\"\n X = self._validate_data(X, ensure_min_samples=2, estimator=self)\n memory = check_memory(self.memory)\n\n if self.n_clusters is not None and self.n_clusters <= 0:\n raise ValueError(\"n_clusters should be an integer greater than 0.\"\n \" %s was provided.\" % str(self.n_clusters))\n\n if not ((self.n_clusters is None) ^ (self.distance_threshold is None)):\n raise ValueError(\"Exactly one of n_clusters and \"\n \"distance_threshold has to be set, and the other \"\n \"needs to be None.\")\n\n if (self.distance_threshold is not None\n and not self.compute_full_tree):\n raise ValueError(\"compute_full_tree must be True if \"\n \"distance_threshold is set.\")\n\n if self.linkage == \"ward\" and self.affinity != \"euclidean\":\n raise ValueError(\"%s was provided as affinity. Ward can only \"\n \"work with euclidean distances.\" %\n (self.affinity, ))\n\n if self.linkage not in _TREE_BUILDERS:\n raise ValueError(\"Unknown linkage type %s. \"\n \"Valid options are %s\" % (self.linkage,\n _TREE_BUILDERS.keys()))\n tree_builder = _TREE_BUILDERS[self.linkage]\n\n connectivity = self.connectivity\n if self.connectivity is not None:\n if callable(self.connectivity):\n connectivity = self.connectivity(X)\n connectivity = check_array(\n connectivity, accept_sparse=['csr', 'coo', 'lil'])\n\n n_samples = len(X)\n compute_full_tree = self.compute_full_tree\n if self.connectivity is None:\n compute_full_tree = True\n if compute_full_tree == 'auto':\n if self.distance_threshold is not None:\n compute_full_tree = True\n else:\n # Early stopping is likely to give a speed up only for\n # a large number of clusters. The actual threshold\n # implemented here is heuristic\n compute_full_tree = self.n_clusters < max(100, .02 * n_samples)\n n_clusters = self.n_clusters\n if compute_full_tree:\n n_clusters = None\n\n # Construct the tree\n kwargs = {}\n if self.linkage != 'ward':\n kwargs['linkage'] = self.linkage\n kwargs['affinity'] = self.affinity\n\n distance_threshold = self.distance_threshold\n\n return_distance = (\n (distance_threshold is not None) or self.compute_distances\n )\n\n out = memory.cache(tree_builder)(X, connectivity=connectivity,\n n_clusters=n_clusters,\n return_distance=return_distance,\n **kwargs)\n (self.children_,\n self.n_connected_components_,\n self.n_leaves_,\n parents) = out[:4]\n\n if return_distance:\n self.distances_ = out[-1]\n\n if self.distance_threshold is not None: # distance_threshold is used\n self.n_clusters_ = np.count_nonzero(\n self.distances_ >= distance_threshold) + 1\n else: # n_clusters is used\n self.n_clusters_ = self.n_clusters\n\n # Cut the tree\n if compute_full_tree:\n self.labels_ = _hc_cut(self.n_clusters_, self.children_,\n self.n_leaves_)\n else:\n labels = _hierarchical.hc_get_heads(parents, copy=False)\n # copy to avoid holding a reference on the original array\n labels = np.copy(labels[:n_samples])\n # Reassign cluster numbers\n self.labels_ = np.searchsorted(np.unique(labels), labels)\n return self" + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/AgglomerativeClustering/fit_predict", + "name": "fit_predict", + "qname": "sklearn.cluster._agglomerative.AgglomerativeClustering.fit_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._agglomerative/AgglomerativeClustering/fit_predict/self", + "name": "self", + "qname": "sklearn.cluster._agglomerative.AgglomerativeClustering.fit_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/AgglomerativeClustering/fit_predict/X", + "name": "X", + "qname": "sklearn.cluster._agglomerative.AgglomerativeClustering.fit_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features) or (n_samples, n_samples)", + "default_value": "", + "description": "Training instances to cluster, or distances between instances if\n``affinity='precomputed'``." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features) or (n_samples, n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/AgglomerativeClustering/fit_predict/y", + "name": "y", + "qname": "sklearn.cluster._agglomerative.AgglomerativeClustering.fit_predict.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present here for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the hierarchical clustering from features or distance matrix,\nand return cluster labels.", + "docstring": "Fit the hierarchical clustering from features or distance matrix,\nand return cluster labels.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or (n_samples, n_samples)\n Training instances to cluster, or distances between instances if\n ``affinity='precomputed'``.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Cluster labels.", + "code": " def fit_predict(self, X, y=None):\n \"\"\"Fit the hierarchical clustering from features or distance matrix,\n and return cluster labels.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or \\\n (n_samples, n_samples)\n Training instances to cluster, or distances between instances if\n ``affinity='precomputed'``.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n Returns\n -------\n labels : ndarray of shape (n_samples,)\n Cluster labels.\n \"\"\"\n return super().fit_predict(X, y)" + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/FeatureAgglomeration/__init__", + "name": "__init__", + "qname": "sklearn.cluster._agglomerative.FeatureAgglomeration.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._agglomerative/FeatureAgglomeration/__init__/self", + "name": "self", + "qname": "sklearn.cluster._agglomerative.FeatureAgglomeration.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/FeatureAgglomeration/__init__/n_clusters", + "name": "n_clusters", + "qname": "sklearn.cluster._agglomerative.FeatureAgglomeration.__init__.n_clusters", + "default_value": "2", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "The number of clusters to find. It must be ``None`` if\n``distance_threshold`` is not ``None``." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/FeatureAgglomeration/__init__/affinity", + "name": "affinity", + "qname": "sklearn.cluster._agglomerative.FeatureAgglomeration.__init__.affinity", + "default_value": "'euclidean'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "'euclidean'", + "description": "Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\",\n\"manhattan\", \"cosine\", or 'precomputed'.\nIf linkage is \"ward\", only \"euclidean\" is accepted." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/FeatureAgglomeration/__init__/memory", + "name": "memory", + "qname": "sklearn.cluster._agglomerative.FeatureAgglomeration.__init__.memory", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or object with the joblib.Memory interface", + "default_value": "None", + "description": "Used to cache the output of the computation of the tree.\nBy default, no caching is done. If a string is given, it is the\npath to the caching directory." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "object with the joblib.Memory interface" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/FeatureAgglomeration/__init__/connectivity", + "name": "connectivity", + "qname": "sklearn.cluster._agglomerative.FeatureAgglomeration.__init__.connectivity", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like or callable", + "default_value": "None", + "description": "Connectivity matrix. Defines for each feature the neighboring\nfeatures following a given structure of the data.\nThis can be a connectivity matrix itself or a callable that transforms\nthe data into a connectivity matrix, such as derived from\nkneighbors_graph. Default is None, i.e, the\nhierarchical clustering algorithm is unstructured." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/FeatureAgglomeration/__init__/compute_full_tree", + "name": "compute_full_tree", + "qname": "sklearn.cluster._agglomerative.FeatureAgglomeration.__init__.compute_full_tree", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'auto' or bool", + "default_value": "'auto'", + "description": "Stop early the construction of the tree at n_clusters. This is useful\nto decrease computation time if the number of clusters is not small\ncompared to the number of features. This option is useful only when\nspecifying a connectivity matrix. Note also that when varying the\nnumber of clusters and using caching, it may be advantageous to compute\nthe full tree. It must be ``True`` if ``distance_threshold`` is not\n``None``. By default `compute_full_tree` is \"auto\", which is equivalent\nto `True` when `distance_threshold` is not `None` or that `n_clusters`\nis inferior to the maximum between 100 or `0.02 * n_samples`.\nOtherwise, \"auto\" is equivalent to `False`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "bool" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/FeatureAgglomeration/__init__/linkage", + "name": "linkage", + "qname": "sklearn.cluster._agglomerative.FeatureAgglomeration.__init__.linkage", + "default_value": "'ward'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'ward', 'complete', 'average', 'single'}", + "default_value": "'ward'", + "description": "Which linkage criterion to use. The linkage criterion determines which\ndistance to use between sets of features. The algorithm will merge\nthe pairs of cluster that minimize this criterion.\n\n- ward minimizes the variance of the clusters being merged.\n- average uses the average of the distances of each feature of\n the two sets.\n- complete or maximum linkage uses the maximum distances between\n all features of the two sets.\n- single uses the minimum of the distances between all features\n of the two sets." + }, + "type": { + "kind": "EnumType", + "values": ["single", "ward", "average", "complete"] + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/FeatureAgglomeration/__init__/pooling_func", + "name": "pooling_func", + "qname": "sklearn.cluster._agglomerative.FeatureAgglomeration.__init__.pooling_func", + "default_value": "np.mean", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "np.mean", + "description": "This combines the values of agglomerated features into a single\nvalue, and should accept an array of shape [M, N] and the keyword\nargument `axis=1`, and reduce it to an array of size [M]." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/FeatureAgglomeration/__init__/distance_threshold", + "name": "distance_threshold", + "qname": "sklearn.cluster._agglomerative.FeatureAgglomeration.__init__.distance_threshold", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "The linkage distance threshold above which, clusters will not be\nmerged. If not ``None``, ``n_clusters`` must be ``None`` and\n``compute_full_tree`` must be ``True``.\n\n.. versionadded:: 0.21" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/FeatureAgglomeration/__init__/compute_distances", + "name": "compute_distances", + "qname": "sklearn.cluster._agglomerative.FeatureAgglomeration.__init__.compute_distances", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Computes distances between clusters even if `distance_threshold` is not\nused. This can be used to make dendrogram visualization, but introduces\na computational and memory overhead.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Agglomerate features.\n\nSimilar to AgglomerativeClustering, but recursively merges features\ninstead of samples.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_clusters=2, *, affinity=\"euclidean\",\n memory=None,\n connectivity=None, compute_full_tree='auto',\n linkage='ward', pooling_func=np.mean,\n distance_threshold=None, compute_distances=False):\n super().__init__(\n n_clusters=n_clusters, memory=memory, connectivity=connectivity,\n compute_full_tree=compute_full_tree, linkage=linkage,\n affinity=affinity, distance_threshold=distance_threshold,\n compute_distances=compute_distances)\n self.pooling_func = pooling_func" + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/FeatureAgglomeration/fit", + "name": "fit", + "qname": "sklearn.cluster._agglomerative.FeatureAgglomeration.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._agglomerative/FeatureAgglomeration/fit/self", + "name": "self", + "qname": "sklearn.cluster._agglomerative.FeatureAgglomeration.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/FeatureAgglomeration/fit/X", + "name": "X", + "qname": "sklearn.cluster._agglomerative.FeatureAgglomeration.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/FeatureAgglomeration/fit/y", + "name": "y", + "qname": "sklearn.cluster._agglomerative.FeatureAgglomeration.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/FeatureAgglomeration/fit/params", + "name": "params", + "qname": "sklearn.cluster._agglomerative.FeatureAgglomeration.fit.params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the hierarchical clustering on the data", + "docstring": "Fit the hierarchical clustering on the data\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data\n\ny : Ignored\n\nReturns\n-------\nself", + "code": " def fit(self, X, y=None, **params):\n \"\"\"Fit the hierarchical clustering on the data\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data\n\n y : Ignored\n\n Returns\n -------\n self\n \"\"\"\n X = self._validate_data(X, accept_sparse=['csr', 'csc', 'coo'],\n ensure_min_features=2, estimator=self)\n # save n_features_in_ attribute here to reset it after, because it will\n # be overridden in AgglomerativeClustering since we passed it X.T.\n n_features_in_ = self.n_features_in_\n AgglomerativeClustering.fit(self, X.T, **params)\n self.n_features_in_ = n_features_in_\n return self" + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/FeatureAgglomeration/fit_predict@getter", + "name": "fit_predict", + "qname": "sklearn.cluster._agglomerative.FeatureAgglomeration.fit_predict", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._agglomerative/FeatureAgglomeration/fit_predict/self", + "name": "self", + "qname": "sklearn.cluster._agglomerative.FeatureAgglomeration.fit_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def fit_predict(self):\n raise AttributeError" + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/_average_linkage", + "name": "_average_linkage", + "qname": "sklearn.cluster._agglomerative._average_linkage", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._agglomerative/_average_linkage/args", + "name": "args", + "qname": "sklearn.cluster._agglomerative._average_linkage.args", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/_average_linkage/kwargs", + "name": "kwargs", + "qname": "sklearn.cluster._agglomerative._average_linkage.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _average_linkage(*args, **kwargs):\n kwargs['linkage'] = 'average'\n return linkage_tree(*args, **kwargs)" + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/_complete_linkage", + "name": "_complete_linkage", + "qname": "sklearn.cluster._agglomerative._complete_linkage", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._agglomerative/_complete_linkage/args", + "name": "args", + "qname": "sklearn.cluster._agglomerative._complete_linkage.args", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/_complete_linkage/kwargs", + "name": "kwargs", + "qname": "sklearn.cluster._agglomerative._complete_linkage.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _complete_linkage(*args, **kwargs):\n kwargs['linkage'] = 'complete'\n return linkage_tree(*args, **kwargs)" + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/_fix_connectivity", + "name": "_fix_connectivity", + "qname": "sklearn.cluster._agglomerative._fix_connectivity", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._agglomerative/_fix_connectivity/X", + "name": "X", + "qname": "sklearn.cluster._agglomerative._fix_connectivity.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/_fix_connectivity/connectivity", + "name": "connectivity", + "qname": "sklearn.cluster._agglomerative._fix_connectivity.connectivity", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/_fix_connectivity/affinity", + "name": "affinity", + "qname": "sklearn.cluster._agglomerative._fix_connectivity.affinity", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fixes the connectivity matrix\n\n - copies it\n - makes it symmetric\n - converts it to LIL if necessary\n - completes it if necessary", + "docstring": "Fixes the connectivity matrix\n\n - copies it\n - makes it symmetric\n - converts it to LIL if necessary\n - completes it if necessary", + "code": "def _fix_connectivity(X, connectivity, affinity):\n \"\"\"\n Fixes the connectivity matrix\n\n - copies it\n - makes it symmetric\n - converts it to LIL if necessary\n - completes it if necessary\n \"\"\"\n n_samples = X.shape[0]\n if (connectivity.shape[0] != n_samples or\n connectivity.shape[1] != n_samples):\n raise ValueError('Wrong shape for connectivity matrix: %s '\n 'when X is %s' % (connectivity.shape, X.shape))\n\n # Make the connectivity matrix symmetric:\n connectivity = connectivity + connectivity.T\n\n # Convert connectivity matrix to LIL\n if not sparse.isspmatrix_lil(connectivity):\n if not sparse.isspmatrix(connectivity):\n connectivity = sparse.lil_matrix(connectivity)\n else:\n connectivity = connectivity.tolil()\n\n # Compute the number of nodes\n n_connected_components, labels = connected_components(connectivity)\n\n if n_connected_components > 1:\n warnings.warn(\"the number of connected components of the \"\n \"connectivity matrix is %d > 1. Completing it to avoid \"\n \"stopping the tree early.\" % n_connected_components,\n stacklevel=2)\n # XXX: Can we do without completing the matrix?\n for i in range(n_connected_components):\n idx_i = np.where(labels == i)[0]\n Xi = X[idx_i]\n for j in range(i):\n idx_j = np.where(labels == j)[0]\n Xj = X[idx_j]\n D = pairwise_distances(Xi, Xj, metric=affinity)\n ii, jj = np.where(D == np.min(D))\n ii = ii[0]\n jj = jj[0]\n connectivity[idx_i[ii], idx_j[jj]] = True\n connectivity[idx_j[jj], idx_i[ii]] = True\n\n return connectivity, n_connected_components" + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/_hc_cut", + "name": "_hc_cut", + "qname": "sklearn.cluster._agglomerative._hc_cut", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._agglomerative/_hc_cut/n_clusters", + "name": "n_clusters", + "qname": "sklearn.cluster._agglomerative._hc_cut.n_clusters", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or ndarray", + "default_value": "", + "description": "The number of clusters to form." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "ndarray" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/_hc_cut/children", + "name": "children", + "qname": "sklearn.cluster._agglomerative._hc_cut.children", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_nodes-1, 2)", + "default_value": "", + "description": "The children of each non-leaf node. Values less than `n_samples`\ncorrespond to leaves of the tree which are the original samples.\nA node `i` greater than or equal to `n_samples` is a non-leaf\nnode and has children `children_[i - n_samples]`. Alternatively\nat the i-th iteration, children[i][0] and children[i][1]\nare merged to form node `n_samples + i`" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_nodes-1, 2)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/_hc_cut/n_leaves", + "name": "n_leaves", + "qname": "sklearn.cluster._agglomerative._hc_cut.n_leaves", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of leaves of the tree." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Function cutting the ward tree for a given number of clusters.", + "docstring": "Function cutting the ward tree for a given number of clusters.\n\nParameters\n----------\nn_clusters : int or ndarray\n The number of clusters to form.\n\nchildren : ndarray of shape (n_nodes-1, 2)\n The children of each non-leaf node. Values less than `n_samples`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_samples` is a non-leaf\n node and has children `children_[i - n_samples]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_samples + i`\n\nn_leaves : int\n Number of leaves of the tree.\n\nReturns\n-------\nlabels : array [n_samples]\n cluster labels for each point", + "code": "def _hc_cut(n_clusters, children, n_leaves):\n \"\"\"Function cutting the ward tree for a given number of clusters.\n\n Parameters\n ----------\n n_clusters : int or ndarray\n The number of clusters to form.\n\n children : ndarray of shape (n_nodes-1, 2)\n The children of each non-leaf node. Values less than `n_samples`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_samples` is a non-leaf\n node and has children `children_[i - n_samples]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_samples + i`\n\n n_leaves : int\n Number of leaves of the tree.\n\n Returns\n -------\n labels : array [n_samples]\n cluster labels for each point\n\n \"\"\"\n if n_clusters > n_leaves:\n raise ValueError('Cannot extract more clusters than samples: '\n '%s clusters where given for a tree with %s leaves.'\n % (n_clusters, n_leaves))\n # In this function, we store nodes as a heap to avoid recomputing\n # the max of the nodes: the first element is always the smallest\n # We use negated indices as heaps work on smallest elements, and we\n # are interested in largest elements\n # children[-1] is the root of the tree\n nodes = [-(max(children[-1]) + 1)]\n for _ in range(n_clusters - 1):\n # As we have a heap, nodes[0] is the smallest element\n these_children = children[-nodes[0] - n_leaves]\n # Insert the 2 children and remove the largest node\n heappush(nodes, -these_children[0])\n heappushpop(nodes, -these_children[1])\n label = np.zeros(n_leaves, dtype=np.intp)\n for i, node in enumerate(nodes):\n label[_hierarchical._hc_get_descendent(-node, children, n_leaves)] = i\n return label" + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/_single_linkage", + "name": "_single_linkage", + "qname": "sklearn.cluster._agglomerative._single_linkage", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._agglomerative/_single_linkage/args", + "name": "args", + "qname": "sklearn.cluster._agglomerative._single_linkage.args", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/_single_linkage/kwargs", + "name": "kwargs", + "qname": "sklearn.cluster._agglomerative._single_linkage.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _single_linkage(*args, **kwargs):\n kwargs['linkage'] = 'single'\n return linkage_tree(*args, **kwargs)" + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/_single_linkage_tree", + "name": "_single_linkage_tree", + "qname": "sklearn.cluster._agglomerative._single_linkage_tree", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._agglomerative/_single_linkage_tree/connectivity", + "name": "connectivity", + "qname": "sklearn.cluster._agglomerative._single_linkage_tree.connectivity", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/_single_linkage_tree/n_samples", + "name": "n_samples", + "qname": "sklearn.cluster._agglomerative._single_linkage_tree.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/_single_linkage_tree/n_nodes", + "name": "n_nodes", + "qname": "sklearn.cluster._agglomerative._single_linkage_tree.n_nodes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/_single_linkage_tree/n_clusters", + "name": "n_clusters", + "qname": "sklearn.cluster._agglomerative._single_linkage_tree.n_clusters", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/_single_linkage_tree/n_connected_components", + "name": "n_connected_components", + "qname": "sklearn.cluster._agglomerative._single_linkage_tree.n_connected_components", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/_single_linkage_tree/return_distance", + "name": "return_distance", + "qname": "sklearn.cluster._agglomerative._single_linkage_tree.return_distance", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform single linkage clustering on sparse data via the minimum\nspanning tree from scipy.sparse.csgraph, then using union-find to label.\nThe parent array is then generated by walking through the tree.", + "docstring": "Perform single linkage clustering on sparse data via the minimum\nspanning tree from scipy.sparse.csgraph, then using union-find to label.\nThe parent array is then generated by walking through the tree.", + "code": "def _single_linkage_tree(connectivity, n_samples, n_nodes, n_clusters,\n n_connected_components, return_distance):\n \"\"\"\n Perform single linkage clustering on sparse data via the minimum\n spanning tree from scipy.sparse.csgraph, then using union-find to label.\n The parent array is then generated by walking through the tree.\n \"\"\"\n from scipy.sparse.csgraph import minimum_spanning_tree\n\n # explicitly cast connectivity to ensure safety\n connectivity = connectivity.astype('float64',\n **_astype_copy_false(connectivity))\n\n # Ensure zero distances aren't ignored by setting them to \"epsilon\"\n epsilon_value = np.finfo(dtype=connectivity.data.dtype).eps\n connectivity.data[connectivity.data == 0] = epsilon_value\n\n # Use scipy.sparse.csgraph to generate a minimum spanning tree\n mst = minimum_spanning_tree(connectivity.tocsr())\n\n # Convert the graph to scipy.cluster.hierarchy array format\n mst = mst.tocoo()\n\n # Undo the epsilon values\n mst.data[mst.data == epsilon_value] = 0\n\n mst_array = np.vstack([mst.row, mst.col, mst.data]).T\n\n # Sort edges of the min_spanning_tree by weight\n mst_array = mst_array[np.argsort(mst_array.T[2], kind='mergesort'), :]\n\n # Convert edge list into standard hierarchical clustering format\n single_linkage_tree = _hierarchical._single_linkage_label(mst_array)\n children_ = single_linkage_tree[:, :2].astype(int)\n\n # Compute parents\n parent = np.arange(n_nodes, dtype=np.intp)\n for i, (left, right) in enumerate(children_, n_samples):\n if n_clusters is not None and i >= n_nodes:\n break\n if left < n_nodes:\n parent[left] = i\n if right < n_nodes:\n parent[right] = i\n\n if return_distance:\n distances = single_linkage_tree[:, 2]\n return children_, n_connected_components, n_samples, parent, distances\n return children_, n_connected_components, n_samples, parent" + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/linkage_tree", + "name": "linkage_tree", + "qname": "sklearn.cluster._agglomerative.linkage_tree", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._agglomerative/linkage_tree/X", + "name": "X", + "qname": "sklearn.cluster._agglomerative.linkage_tree.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "feature matrix representing n_samples samples to be clustered" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/linkage_tree/connectivity", + "name": "connectivity", + "qname": "sklearn.cluster._agglomerative.linkage_tree.connectivity", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "sparse matrix", + "default_value": "None", + "description": "connectivity matrix. Defines for each sample the neighboring samples\nfollowing a given structure of the data. The matrix is assumed to\nbe symmetric and only the upper triangular half is used.\nDefault is None, i.e, the Ward algorithm is unstructured." + }, + "type": { + "kind": "NamedType", + "name": "sparse matrix" + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/linkage_tree/n_clusters", + "name": "n_clusters", + "qname": "sklearn.cluster._agglomerative.linkage_tree.n_clusters", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Stop early the construction of the tree at n_clusters. This is\nuseful to decrease computation time if the number of clusters is\nnot small compared to the number of samples. In this case, the\ncomplete tree is not computed, thus the 'children' output is of\nlimited use, and the 'parents' output should rather be used.\nThis option is valid only when specifying a connectivity matrix." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/linkage_tree/linkage", + "name": "linkage", + "qname": "sklearn.cluster._agglomerative.linkage_tree.linkage", + "default_value": "'complete'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{\"average\", \"complete\", \"single\"}", + "default_value": "\"complete\"", + "description": "Which linkage criteria to use. The linkage criterion determines which\ndistance to use between sets of observation.\n - average uses the average of the distances of each observation of\n the two sets\n - complete or maximum linkage uses the maximum distances between\n all observations of the two sets.\n - single uses the minimum of the distances between all observations\n of the two sets." + }, + "type": { + "kind": "EnumType", + "values": ["single", "average", "complete"] + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/linkage_tree/affinity", + "name": "affinity", + "qname": "sklearn.cluster._agglomerative.linkage_tree.affinity", + "default_value": "'euclidean'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "\"euclidean\".", + "description": "which metric to use. Can be \"euclidean\", \"manhattan\", or any\ndistance know to paired distance (see metric.pairwise)" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/linkage_tree/return_distance", + "name": "return_distance", + "qname": "sklearn.cluster._agglomerative.linkage_tree.return_distance", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "whether or not to return the distances between the clusters." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Linkage agglomerative clustering based on a Feature matrix.\n\nThe inertia matrix uses a Heapq-based representation.\n\nThis is the structured version, that takes into account some topological\nstructure between samples.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Linkage agglomerative clustering based on a Feature matrix.\n\nThe inertia matrix uses a Heapq-based representation.\n\nThis is the structured version, that takes into account some topological\nstructure between samples.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n feature matrix representing n_samples samples to be clustered\n\nconnectivity : sparse matrix, default=None\n connectivity matrix. Defines for each sample the neighboring samples\n following a given structure of the data. The matrix is assumed to\n be symmetric and only the upper triangular half is used.\n Default is None, i.e, the Ward algorithm is unstructured.\n\nn_clusters : int, default=None\n Stop early the construction of the tree at n_clusters. This is\n useful to decrease computation time if the number of clusters is\n not small compared to the number of samples. In this case, the\n complete tree is not computed, thus the 'children' output is of\n limited use, and the 'parents' output should rather be used.\n This option is valid only when specifying a connectivity matrix.\n\nlinkage : {\"average\", \"complete\", \"single\"}, default=\"complete\"\n Which linkage criteria to use. The linkage criterion determines which\n distance to use between sets of observation.\n - average uses the average of the distances of each observation of\n the two sets\n - complete or maximum linkage uses the maximum distances between\n all observations of the two sets.\n - single uses the minimum of the distances between all observations\n of the two sets.\n\naffinity : str or callable, default=\"euclidean\".\n which metric to use. Can be \"euclidean\", \"manhattan\", or any\n distance know to paired distance (see metric.pairwise)\n\nreturn_distance : bool, default=False\n whether or not to return the distances between the clusters.\n\nReturns\n-------\nchildren : ndarray of shape (n_nodes-1, 2)\n The children of each non-leaf node. Values less than `n_samples`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_samples` is a non-leaf\n node and has children `children_[i - n_samples]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_samples + i`\n\nn_connected_components : int\n The number of connected components in the graph.\n\nn_leaves : int\n The number of leaves in the tree.\n\nparents : ndarray of shape (n_nodes, ) or None\n The parent of each node. Only returned when a connectivity matrix\n is specified, elsewhere 'None' is returned.\n\ndistances : ndarray of shape (n_nodes-1,)\n Returned when return_distance is set to True.\n\n distances[i] refers to the distance between children[i][0] and\n children[i][1] when they are merged.\n\nSee Also\n--------\nward_tree : Hierarchical clustering with ward linkage.", + "code": "def linkage_tree(X, connectivity=None, n_clusters=None, linkage='complete',\n affinity=\"euclidean\", return_distance=False):\n \"\"\"Linkage agglomerative clustering based on a Feature matrix.\n\n The inertia matrix uses a Heapq-based representation.\n\n This is the structured version, that takes into account some topological\n structure between samples.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n feature matrix representing n_samples samples to be clustered\n\n connectivity : sparse matrix, default=None\n connectivity matrix. Defines for each sample the neighboring samples\n following a given structure of the data. The matrix is assumed to\n be symmetric and only the upper triangular half is used.\n Default is None, i.e, the Ward algorithm is unstructured.\n\n n_clusters : int, default=None\n Stop early the construction of the tree at n_clusters. This is\n useful to decrease computation time if the number of clusters is\n not small compared to the number of samples. In this case, the\n complete tree is not computed, thus the 'children' output is of\n limited use, and the 'parents' output should rather be used.\n This option is valid only when specifying a connectivity matrix.\n\n linkage : {\"average\", \"complete\", \"single\"}, default=\"complete\"\n Which linkage criteria to use. The linkage criterion determines which\n distance to use between sets of observation.\n - average uses the average of the distances of each observation of\n the two sets\n - complete or maximum linkage uses the maximum distances between\n all observations of the two sets.\n - single uses the minimum of the distances between all observations\n of the two sets.\n\n affinity : str or callable, default=\"euclidean\".\n which metric to use. Can be \"euclidean\", \"manhattan\", or any\n distance know to paired distance (see metric.pairwise)\n\n return_distance : bool, default=False\n whether or not to return the distances between the clusters.\n\n Returns\n -------\n children : ndarray of shape (n_nodes-1, 2)\n The children of each non-leaf node. Values less than `n_samples`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_samples` is a non-leaf\n node and has children `children_[i - n_samples]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_samples + i`\n\n n_connected_components : int\n The number of connected components in the graph.\n\n n_leaves : int\n The number of leaves in the tree.\n\n parents : ndarray of shape (n_nodes, ) or None\n The parent of each node. Only returned when a connectivity matrix\n is specified, elsewhere 'None' is returned.\n\n distances : ndarray of shape (n_nodes-1,)\n Returned when return_distance is set to True.\n\n distances[i] refers to the distance between children[i][0] and\n children[i][1] when they are merged.\n\n See Also\n --------\n ward_tree : Hierarchical clustering with ward linkage.\n \"\"\"\n X = np.asarray(X)\n if X.ndim == 1:\n X = np.reshape(X, (-1, 1))\n n_samples, n_features = X.shape\n\n linkage_choices = {'complete': _hierarchical.max_merge,\n 'average': _hierarchical.average_merge,\n 'single': None} # Single linkage is handled differently\n try:\n join_func = linkage_choices[linkage]\n except KeyError as e:\n raise ValueError(\n 'Unknown linkage option, linkage should be one '\n 'of %s, but %s was given' % (linkage_choices.keys(), linkage)\n ) from e\n\n if affinity == 'cosine' and np.any(~np.any(X, axis=1)):\n raise ValueError(\n 'Cosine affinity cannot be used when X contains zero vectors')\n\n if connectivity is None:\n from scipy.cluster import hierarchy # imports PIL\n\n if n_clusters is not None:\n warnings.warn('Partial build of the tree is implemented '\n 'only for structured clustering (i.e. with '\n 'explicit connectivity). The algorithm '\n 'will build the full tree and only '\n 'retain the lower branches required '\n 'for the specified number of clusters',\n stacklevel=2)\n\n if affinity == 'precomputed':\n # for the linkage function of hierarchy to work on precomputed\n # data, provide as first argument an ndarray of the shape returned\n # by sklearn.metrics.pairwise_distances.\n if X.shape[0] != X.shape[1]:\n raise ValueError(\n 'Distance matrix should be square, '\n 'Got matrix of shape {X.shape}'\n )\n i, j = np.triu_indices(X.shape[0], k=1)\n X = X[i, j]\n elif affinity == 'l2':\n # Translate to something understood by scipy\n affinity = 'euclidean'\n elif affinity in ('l1', 'manhattan'):\n affinity = 'cityblock'\n elif callable(affinity):\n X = affinity(X)\n i, j = np.triu_indices(X.shape[0], k=1)\n X = X[i, j]\n if (linkage == 'single'\n and affinity != 'precomputed'\n and not callable(affinity)\n and affinity in METRIC_MAPPING):\n\n # We need the fast cythonized metric from neighbors\n dist_metric = DistanceMetric.get_metric(affinity)\n\n # The Cython routines used require contiguous arrays\n X = np.ascontiguousarray(X, dtype=np.double)\n\n mst = _hierarchical.mst_linkage_core(X, dist_metric)\n # Sort edges of the min_spanning_tree by weight\n mst = mst[np.argsort(mst.T[2], kind='mergesort'), :]\n\n # Convert edge list into standard hierarchical clustering format\n out = _hierarchical.single_linkage_label(mst)\n else:\n out = hierarchy.linkage(X, method=linkage, metric=affinity)\n children_ = out[:, :2].astype(int, copy=False)\n\n if return_distance:\n distances = out[:, 2]\n return children_, 1, n_samples, None, distances\n return children_, 1, n_samples, None\n\n connectivity, n_connected_components = _fix_connectivity(\n X, connectivity,\n affinity=affinity)\n connectivity = connectivity.tocoo()\n # Put the diagonal to zero\n diag_mask = (connectivity.row != connectivity.col)\n connectivity.row = connectivity.row[diag_mask]\n connectivity.col = connectivity.col[diag_mask]\n connectivity.data = connectivity.data[diag_mask]\n del diag_mask\n\n if affinity == 'precomputed':\n distances = X[connectivity.row, connectivity.col].astype(\n 'float64', **_astype_copy_false(X))\n else:\n # FIXME We compute all the distances, while we could have only computed\n # the \"interesting\" distances\n distances = paired_distances(X[connectivity.row],\n X[connectivity.col],\n metric=affinity)\n connectivity.data = distances\n\n if n_clusters is None:\n n_nodes = 2 * n_samples - 1\n else:\n assert n_clusters <= n_samples\n n_nodes = 2 * n_samples - n_clusters\n\n if linkage == 'single':\n return _single_linkage_tree(connectivity, n_samples, n_nodes,\n n_clusters, n_connected_components,\n return_distance)\n\n if return_distance:\n distances = np.empty(n_nodes - n_samples)\n # create inertia heap and connection matrix\n A = np.empty(n_nodes, dtype=object)\n inertia = list()\n\n # LIL seems to the best format to access the rows quickly,\n # without the numpy overhead of slicing CSR indices and data.\n connectivity = connectivity.tolil()\n # We are storing the graph in a list of IntFloatDict\n for ind, (data, row) in enumerate(zip(connectivity.data,\n connectivity.rows)):\n A[ind] = IntFloatDict(np.asarray(row, dtype=np.intp),\n np.asarray(data, dtype=np.float64))\n # We keep only the upper triangular for the heap\n # Generator expressions are faster than arrays on the following\n inertia.extend(_hierarchical.WeightedEdge(d, ind, r)\n for r, d in zip(row, data) if r < ind)\n del connectivity\n\n heapify(inertia)\n\n # prepare the main fields\n parent = np.arange(n_nodes, dtype=np.intp)\n used_node = np.ones(n_nodes, dtype=np.intp)\n children = []\n\n # recursive merge loop\n for k in range(n_samples, n_nodes):\n # identify the merge\n while True:\n edge = heappop(inertia)\n if used_node[edge.a] and used_node[edge.b]:\n break\n i = edge.a\n j = edge.b\n\n if return_distance:\n # store distances\n distances[k - n_samples] = edge.weight\n\n parent[i] = parent[j] = k\n children.append((i, j))\n # Keep track of the number of elements per cluster\n n_i = used_node[i]\n n_j = used_node[j]\n used_node[k] = n_i + n_j\n used_node[i] = used_node[j] = False\n\n # update the structure matrix A and the inertia matrix\n # a clever 'min', or 'max' operation between A[i] and A[j]\n coord_col = join_func(A[i], A[j], used_node, n_i, n_j)\n for col, d in coord_col:\n A[col].append(k, d)\n # Here we use the information from coord_col (containing the\n # distances) to update the heap\n heappush(inertia, _hierarchical.WeightedEdge(d, k, col))\n A[k] = coord_col\n # Clear A[i] and A[j] to save memory\n A[i] = A[j] = 0\n\n # Separate leaves in children (empty lists up to now)\n n_leaves = n_samples\n\n # # return numpy array for efficient caching\n children = np.array(children)[:, ::-1]\n\n if return_distance:\n return children, n_connected_components, n_leaves, parent, distances\n return children, n_connected_components, n_leaves, parent" + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/ward_tree", + "name": "ward_tree", + "qname": "sklearn.cluster._agglomerative.ward_tree", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._agglomerative/ward_tree/X", + "name": "X", + "qname": "sklearn.cluster._agglomerative.ward_tree.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "feature matrix representing n_samples samples to be clustered" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/ward_tree/connectivity", + "name": "connectivity", + "qname": "sklearn.cluster._agglomerative.ward_tree.connectivity", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "sparse matrix", + "default_value": "None", + "description": "connectivity matrix. Defines for each sample the neighboring samples\nfollowing a given structure of the data. The matrix is assumed to\nbe symmetric and only the upper triangular half is used.\nDefault is None, i.e, the Ward algorithm is unstructured." + }, + "type": { + "kind": "NamedType", + "name": "sparse matrix" + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/ward_tree/n_clusters", + "name": "n_clusters", + "qname": "sklearn.cluster._agglomerative.ward_tree.n_clusters", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Stop early the construction of the tree at n_clusters. This is\nuseful to decrease computation time if the number of clusters is\nnot small compared to the number of samples. In this case, the\ncomplete tree is not computed, thus the 'children' output is of\nlimited use, and the 'parents' output should rather be used.\nThis option is valid only when specifying a connectivity matrix." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._agglomerative/ward_tree/return_distance", + "name": "return_distance", + "qname": "sklearn.cluster._agglomerative.ward_tree.return_distance", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "None", + "description": "If True, return the distance between the clusters." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Ward clustering based on a Feature matrix.\n\nRecursively merges the pair of clusters that minimally increases\nwithin-cluster variance.\n\nThe inertia matrix uses a Heapq-based representation.\n\nThis is the structured version, that takes into account some topological\nstructure between samples.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Ward clustering based on a Feature matrix.\n\nRecursively merges the pair of clusters that minimally increases\nwithin-cluster variance.\n\nThe inertia matrix uses a Heapq-based representation.\n\nThis is the structured version, that takes into account some topological\nstructure between samples.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n feature matrix representing n_samples samples to be clustered\n\nconnectivity : sparse matrix, default=None\n connectivity matrix. Defines for each sample the neighboring samples\n following a given structure of the data. The matrix is assumed to\n be symmetric and only the upper triangular half is used.\n Default is None, i.e, the Ward algorithm is unstructured.\n\nn_clusters : int, default=None\n Stop early the construction of the tree at n_clusters. This is\n useful to decrease computation time if the number of clusters is\n not small compared to the number of samples. In this case, the\n complete tree is not computed, thus the 'children' output is of\n limited use, and the 'parents' output should rather be used.\n This option is valid only when specifying a connectivity matrix.\n\nreturn_distance : bool, default=None\n If True, return the distance between the clusters.\n\nReturns\n-------\nchildren : ndarray of shape (n_nodes-1, 2)\n The children of each non-leaf node. Values less than `n_samples`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_samples` is a non-leaf\n node and has children `children_[i - n_samples]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_samples + i`\n\nn_connected_components : int\n The number of connected components in the graph.\n\nn_leaves : int\n The number of leaves in the tree\n\nparents : ndarray of shape (n_nodes,) or None\n The parent of each node. Only returned when a connectivity matrix\n is specified, elsewhere 'None' is returned.\n\ndistances : ndarray of shape (n_nodes-1,)\n Only returned if return_distance is set to True (for compatibility).\n The distances between the centers of the nodes. `distances[i]`\n corresponds to a weighted euclidean distance between\n the nodes `children[i, 1]` and `children[i, 2]`. If the nodes refer to\n leaves of the tree, then `distances[i]` is their unweighted euclidean\n distance. Distances are updated in the following way\n (from scipy.hierarchy.linkage):\n\n The new entry :math:`d(u,v)` is computed as follows,\n\n .. math::\n\n d(u,v) = \\sqrt{\\frac{|v|+|s|}\n {T}d(v,s)^2\n + \\frac{|v|+|t|}\n {T}d(v,t)^2\n - \\frac{|v|}\n {T}d(s,t)^2}\n\n where :math:`u` is the newly joined cluster consisting of\n clusters :math:`s` and :math:`t`, :math:`v` is an unused\n cluster in the forest, :math:`T=|v|+|s|+|t|`, and\n :math:`|*|` is the cardinality of its argument. This is also\n known as the incremental algorithm.", + "code": "@_deprecate_positional_args\ndef ward_tree(X, *, connectivity=None, n_clusters=None, return_distance=False):\n \"\"\"Ward clustering based on a Feature matrix.\n\n Recursively merges the pair of clusters that minimally increases\n within-cluster variance.\n\n The inertia matrix uses a Heapq-based representation.\n\n This is the structured version, that takes into account some topological\n structure between samples.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n feature matrix representing n_samples samples to be clustered\n\n connectivity : sparse matrix, default=None\n connectivity matrix. Defines for each sample the neighboring samples\n following a given structure of the data. The matrix is assumed to\n be symmetric and only the upper triangular half is used.\n Default is None, i.e, the Ward algorithm is unstructured.\n\n n_clusters : int, default=None\n Stop early the construction of the tree at n_clusters. This is\n useful to decrease computation time if the number of clusters is\n not small compared to the number of samples. In this case, the\n complete tree is not computed, thus the 'children' output is of\n limited use, and the 'parents' output should rather be used.\n This option is valid only when specifying a connectivity matrix.\n\n return_distance : bool, default=None\n If True, return the distance between the clusters.\n\n Returns\n -------\n children : ndarray of shape (n_nodes-1, 2)\n The children of each non-leaf node. Values less than `n_samples`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_samples` is a non-leaf\n node and has children `children_[i - n_samples]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_samples + i`\n\n n_connected_components : int\n The number of connected components in the graph.\n\n n_leaves : int\n The number of leaves in the tree\n\n parents : ndarray of shape (n_nodes,) or None\n The parent of each node. Only returned when a connectivity matrix\n is specified, elsewhere 'None' is returned.\n\n distances : ndarray of shape (n_nodes-1,)\n Only returned if return_distance is set to True (for compatibility).\n The distances between the centers of the nodes. `distances[i]`\n corresponds to a weighted euclidean distance between\n the nodes `children[i, 1]` and `children[i, 2]`. If the nodes refer to\n leaves of the tree, then `distances[i]` is their unweighted euclidean\n distance. Distances are updated in the following way\n (from scipy.hierarchy.linkage):\n\n The new entry :math:`d(u,v)` is computed as follows,\n\n .. math::\n\n d(u,v) = \\\\sqrt{\\\\frac{|v|+|s|}\n {T}d(v,s)^2\n + \\\\frac{|v|+|t|}\n {T}d(v,t)^2\n - \\\\frac{|v|}\n {T}d(s,t)^2}\n\n where :math:`u` is the newly joined cluster consisting of\n clusters :math:`s` and :math:`t`, :math:`v` is an unused\n cluster in the forest, :math:`T=|v|+|s|+|t|`, and\n :math:`|*|` is the cardinality of its argument. This is also\n known as the incremental algorithm.\n \"\"\"\n X = np.asarray(X)\n if X.ndim == 1:\n X = np.reshape(X, (-1, 1))\n n_samples, n_features = X.shape\n\n if connectivity is None:\n from scipy.cluster import hierarchy # imports PIL\n\n if n_clusters is not None:\n warnings.warn('Partial build of the tree is implemented '\n 'only for structured clustering (i.e. with '\n 'explicit connectivity). The algorithm '\n 'will build the full tree and only '\n 'retain the lower branches required '\n 'for the specified number of clusters',\n stacklevel=2)\n X = np.require(X, requirements=\"W\")\n out = hierarchy.ward(X)\n children_ = out[:, :2].astype(np.intp)\n\n if return_distance:\n distances = out[:, 2]\n return children_, 1, n_samples, None, distances\n else:\n return children_, 1, n_samples, None\n\n connectivity, n_connected_components = _fix_connectivity(\n X, connectivity,\n affinity='euclidean')\n if n_clusters is None:\n n_nodes = 2 * n_samples - 1\n else:\n if n_clusters > n_samples:\n raise ValueError('Cannot provide more clusters than samples. '\n '%i n_clusters was asked, and there are %i '\n 'samples.' % (n_clusters, n_samples))\n n_nodes = 2 * n_samples - n_clusters\n\n # create inertia matrix\n coord_row = []\n coord_col = []\n A = []\n for ind, row in enumerate(connectivity.rows):\n A.append(row)\n # We keep only the upper triangular for the moments\n # Generator expressions are faster than arrays on the following\n row = [i for i in row if i < ind]\n coord_row.extend(len(row) * [ind, ])\n coord_col.extend(row)\n\n coord_row = np.array(coord_row, dtype=np.intp, order='C')\n coord_col = np.array(coord_col, dtype=np.intp, order='C')\n\n # build moments as a list\n moments_1 = np.zeros(n_nodes, order='C')\n moments_1[:n_samples] = 1\n moments_2 = np.zeros((n_nodes, n_features), order='C')\n moments_2[:n_samples] = X\n inertia = np.empty(len(coord_row), dtype=np.float64, order='C')\n _hierarchical.compute_ward_dist(moments_1, moments_2, coord_row, coord_col,\n inertia)\n inertia = list(zip(inertia, coord_row, coord_col))\n heapify(inertia)\n\n # prepare the main fields\n parent = np.arange(n_nodes, dtype=np.intp)\n used_node = np.ones(n_nodes, dtype=bool)\n children = []\n if return_distance:\n distances = np.empty(n_nodes - n_samples)\n\n not_visited = np.empty(n_nodes, dtype=np.int8, order='C')\n\n # recursive merge loop\n for k in range(n_samples, n_nodes):\n # identify the merge\n while True:\n inert, i, j = heappop(inertia)\n if used_node[i] and used_node[j]:\n break\n parent[i], parent[j] = k, k\n children.append((i, j))\n used_node[i] = used_node[j] = False\n if return_distance: # store inertia value\n distances[k - n_samples] = inert\n\n # update the moments\n moments_1[k] = moments_1[i] + moments_1[j]\n moments_2[k] = moments_2[i] + moments_2[j]\n\n # update the structure matrix A and the inertia matrix\n coord_col = []\n not_visited.fill(1)\n not_visited[k] = 0\n _hierarchical._get_parents(A[i], coord_col, parent, not_visited)\n _hierarchical._get_parents(A[j], coord_col, parent, not_visited)\n # List comprehension is faster than a for loop\n [A[col].append(k) for col in coord_col]\n A.append(coord_col)\n coord_col = np.array(coord_col, dtype=np.intp, order='C')\n coord_row = np.empty(coord_col.shape, dtype=np.intp, order='C')\n coord_row.fill(k)\n n_additions = len(coord_row)\n ini = np.empty(n_additions, dtype=np.float64, order='C')\n\n _hierarchical.compute_ward_dist(moments_1, moments_2,\n coord_row, coord_col, ini)\n\n # List comprehension is faster than a for loop\n [heappush(inertia, (ini[idx], k, coord_col[idx]))\n for idx in range(n_additions)]\n\n # Separate leaves in children (empty lists up to now)\n n_leaves = n_samples\n # sort children to get consistent output with unstructured version\n children = [c[::-1] for c in children]\n children = np.array(children) # return numpy array for efficient caching\n\n if return_distance:\n # 2 is scaling factor to compare w/ unstructured version\n distances = np.sqrt(2. * distances)\n return children, n_connected_components, n_leaves, parent, distances\n else:\n return children, n_connected_components, n_leaves, parent" + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/__init__", + "name": "__init__", + "qname": "sklearn.cluster._bicluster.BaseSpectral.__init__", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/__init__/self", + "name": "self", + "qname": "sklearn.cluster._bicluster.BaseSpectral.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/__init__/n_clusters", + "name": "n_clusters", + "qname": "sklearn.cluster._bicluster.BaseSpectral.__init__.n_clusters", + "default_value": "3", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/__init__/svd_method", + "name": "svd_method", + "qname": "sklearn.cluster._bicluster.BaseSpectral.__init__.svd_method", + "default_value": "'randomized'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/__init__/n_svd_vecs", + "name": "n_svd_vecs", + "qname": "sklearn.cluster._bicluster.BaseSpectral.__init__.n_svd_vecs", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/__init__/mini_batch", + "name": "mini_batch", + "qname": "sklearn.cluster._bicluster.BaseSpectral.__init__.mini_batch", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/__init__/init", + "name": "init", + "qname": "sklearn.cluster._bicluster.BaseSpectral.__init__.init", + "default_value": "'k-means++'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/__init__/n_init", + "name": "n_init", + "qname": "sklearn.cluster._bicluster.BaseSpectral.__init__.n_init", + "default_value": "10", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.cluster._bicluster.BaseSpectral.__init__.n_jobs", + "default_value": "'deprecated'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/__init__/random_state", + "name": "random_state", + "qname": "sklearn.cluster._bicluster.BaseSpectral.__init__.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base class for spectral biclustering.", + "docstring": "", + "code": " @abstractmethod\n def __init__(self, n_clusters=3, svd_method=\"randomized\",\n n_svd_vecs=None, mini_batch=False, init=\"k-means++\",\n n_init=10, n_jobs='deprecated', random_state=None):\n self.n_clusters = n_clusters\n self.svd_method = svd_method\n self.n_svd_vecs = n_svd_vecs\n self.mini_batch = mini_batch\n self.init = init\n self.n_init = n_init\n self.n_jobs = n_jobs\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/_check_parameters", + "name": "_check_parameters", + "qname": "sklearn.cluster._bicluster.BaseSpectral._check_parameters", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/_check_parameters/self", + "name": "self", + "qname": "sklearn.cluster._bicluster.BaseSpectral._check_parameters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_parameters(self):\n legal_svd_methods = ('randomized', 'arpack')\n if self.svd_method not in legal_svd_methods:\n raise ValueError(\"Unknown SVD method: '{0}'. svd_method must be\"\n \" one of {1}.\".format(self.svd_method,\n legal_svd_methods))" + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/_k_means", + "name": "_k_means", + "qname": "sklearn.cluster._bicluster.BaseSpectral._k_means", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/_k_means/self", + "name": "self", + "qname": "sklearn.cluster._bicluster.BaseSpectral._k_means.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/_k_means/data", + "name": "data", + "qname": "sklearn.cluster._bicluster.BaseSpectral._k_means.data", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/_k_means/n_clusters", + "name": "n_clusters", + "qname": "sklearn.cluster._bicluster.BaseSpectral._k_means.n_clusters", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _k_means(self, data, n_clusters):\n if self.mini_batch:\n model = MiniBatchKMeans(n_clusters,\n init=self.init,\n n_init=self.n_init,\n random_state=self.random_state)\n else:\n model = KMeans(n_clusters, init=self.init,\n n_init=self.n_init, n_jobs=self.n_jobs,\n random_state=self.random_state)\n model.fit(data)\n centroid = model.cluster_centers_\n labels = model.labels_\n return centroid, labels" + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/_svd", + "name": "_svd", + "qname": "sklearn.cluster._bicluster.BaseSpectral._svd", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/_svd/self", + "name": "self", + "qname": "sklearn.cluster._bicluster.BaseSpectral._svd.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/_svd/array", + "name": "array", + "qname": "sklearn.cluster._bicluster.BaseSpectral._svd.array", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/_svd/n_components", + "name": "n_components", + "qname": "sklearn.cluster._bicluster.BaseSpectral._svd.n_components", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/_svd/n_discard", + "name": "n_discard", + "qname": "sklearn.cluster._bicluster.BaseSpectral._svd.n_discard", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns first `n_components` left and right singular\nvectors u and v, discarding the first `n_discard`.", + "docstring": "Returns first `n_components` left and right singular\nvectors u and v, discarding the first `n_discard`.", + "code": " def _svd(self, array, n_components, n_discard):\n \"\"\"Returns first `n_components` left and right singular\n vectors u and v, discarding the first `n_discard`.\n\n \"\"\"\n if self.svd_method == 'randomized':\n kwargs = {}\n if self.n_svd_vecs is not None:\n kwargs['n_oversamples'] = self.n_svd_vecs\n u, _, vt = randomized_svd(array, n_components,\n random_state=self.random_state,\n **kwargs)\n\n elif self.svd_method == 'arpack':\n u, _, vt = svds(array, k=n_components, ncv=self.n_svd_vecs)\n if np.any(np.isnan(vt)):\n # some eigenvalues of A * A.T are negative, causing\n # sqrt() to be np.nan. This causes some vectors in vt\n # to be np.nan.\n A = safe_sparse_dot(array.T, array)\n random_state = check_random_state(self.random_state)\n # initialize with [-1,1] as in ARPACK\n v0 = random_state.uniform(-1, 1, A.shape[0])\n _, v = eigsh(A, ncv=self.n_svd_vecs, v0=v0)\n vt = v.T\n if np.any(np.isnan(u)):\n A = safe_sparse_dot(array, array.T)\n random_state = check_random_state(self.random_state)\n # initialize with [-1,1] as in ARPACK\n v0 = random_state.uniform(-1, 1, A.shape[0])\n _, u = eigsh(A, ncv=self.n_svd_vecs, v0=v0)\n\n assert_all_finite(u)\n assert_all_finite(vt)\n u = u[:, n_discard:]\n vt = vt[n_discard:]\n return u, vt.T" + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/fit", + "name": "fit", + "qname": "sklearn.cluster._bicluster.BaseSpectral.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/fit/self", + "name": "self", + "qname": "sklearn.cluster._bicluster.BaseSpectral.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/fit/X", + "name": "X", + "qname": "sklearn.cluster._bicluster.BaseSpectral.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/BaseSpectral/fit/y", + "name": "y", + "qname": "sklearn.cluster._bicluster.BaseSpectral.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Creates a biclustering for X.", + "docstring": "Creates a biclustering for X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\ny : Ignored", + "code": " def fit(self, X, y=None):\n \"\"\"Creates a biclustering for X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n y : Ignored\n\n \"\"\"\n if self.n_jobs != 'deprecated':\n warnings.warn(\"'n_jobs' was deprecated in version 0.23 and will be\"\n \" removed in 1.0 (renaming of 0.25).\", FutureWarning)\n\n X = self._validate_data(X, accept_sparse='csr', dtype=np.float64)\n self._check_parameters()\n self._fit(X)\n return self" + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/__init__", + "name": "__init__", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/__init__/self", + "name": "self", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/__init__/n_clusters", + "name": "n_clusters", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering.__init__.n_clusters", + "default_value": "3", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or tuple (n_row_clusters, n_column_clusters)", + "default_value": "3", + "description": "The number of row and column clusters in the checkerboard\nstructure." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "tuple (n_row_clusters, n_column_clusters)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/__init__/method", + "name": "method", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering.__init__.method", + "default_value": "'bistochastic'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'bistochastic', 'scale', 'log'}", + "default_value": "'bistochastic'", + "description": "Method of normalizing and converting singular vectors into\nbiclusters. May be one of 'scale', 'bistochastic', or 'log'.\nThe authors recommend using 'log'. If the data is sparse,\nhowever, log normalization will not work, which is why the\ndefault is 'bistochastic'.\n\n.. warning::\n if `method='log'`, the data must be sparse." + }, + "type": { + "kind": "EnumType", + "values": ["bistochastic", "log", "scale"] + } + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/__init__/n_components", + "name": "n_components", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering.__init__.n_components", + "default_value": "6", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "6", + "description": "Number of singular vectors to check." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/__init__/n_best", + "name": "n_best", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering.__init__.n_best", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "3", + "description": "Number of best singular vectors to which to project the data\nfor clustering." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/__init__/svd_method", + "name": "svd_method", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering.__init__.svd_method", + "default_value": "'randomized'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'randomized', 'arpack'}", + "default_value": "'randomized'", + "description": "Selects the algorithm for finding singular vectors. May be\n'randomized' or 'arpack'. If 'randomized', uses\n:func:`~sklearn.utils.extmath.randomized_svd`, which may be faster\nfor large matrices. If 'arpack', uses\n`scipy.sparse.linalg.svds`, which is more accurate, but\npossibly slower in some cases." + }, + "type": { + "kind": "EnumType", + "values": ["randomized", "arpack"] + } + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/__init__/n_svd_vecs", + "name": "n_svd_vecs", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering.__init__.n_svd_vecs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of vectors to use in calculating the SVD. Corresponds\nto `ncv` when `svd_method=arpack` and `n_oversamples` when\n`svd_method` is 'randomized`." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/__init__/mini_batch", + "name": "mini_batch", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering.__init__.mini_batch", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to use mini-batch k-means, which is faster but may get\ndifferent results." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/__init__/init", + "name": "init", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering.__init__.init", + "default_value": "'k-means++'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'k-means++', 'random'} or ndarray of (n_clusters, n_features)", + "default_value": "'k-means++'", + "description": "Method for initialization of k-means algorithm; defaults to\n'k-means++'." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["random", "k-means++"] + }, + { + "kind": "NamedType", + "name": "ndarray of (n_clusters, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/__init__/n_init", + "name": "n_init", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering.__init__.n_init", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Number of random initializations that are tried with the\nk-means algorithm.\n\nIf mini-batch k-means is used, the best initialization is\nchosen and the algorithm runs once. Otherwise, the algorithm\nis run for each initialization and the best solution chosen." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering.__init__.n_jobs", + "default_value": "'deprecated'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to use for the computation. This works by breaking\ndown the pairwise matrix into n_jobs even slices and computing them in\nparallel.\n\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details.\n\n.. deprecated:: 0.23\n ``n_jobs`` was deprecated in version 0.23 and will be removed in\n 1.0 (renaming of 0.25)." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/__init__/random_state", + "name": "random_state", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "Used for randomizing the singular value decomposition and the k-means\ninitialization. Use an int to make the randomness deterministic.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Spectral biclustering (Kluger, 2003).\n\nPartitions rows and columns under the assumption that the data has\nan underlying checkerboard structure. For instance, if there are\ntwo row partitions and three column partitions, each row will\nbelong to three biclusters, and each column will belong to two\nbiclusters. The outer product of the corresponding row and column\nlabel vectors gives this checkerboard structure.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_clusters=3, *, method='bistochastic',\n n_components=6, n_best=3, svd_method='randomized',\n n_svd_vecs=None, mini_batch=False, init='k-means++',\n n_init=10, n_jobs='deprecated', random_state=None):\n super().__init__(n_clusters,\n svd_method,\n n_svd_vecs,\n mini_batch,\n init,\n n_init,\n n_jobs,\n random_state)\n self.method = method\n self.n_components = n_components\n self.n_best = n_best" + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/_check_parameters", + "name": "_check_parameters", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering._check_parameters", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/_check_parameters/self", + "name": "self", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering._check_parameters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_parameters(self):\n super()._check_parameters()\n legal_methods = ('bistochastic', 'scale', 'log')\n if self.method not in legal_methods:\n raise ValueError(\"Unknown method: '{0}'. method must be\"\n \" one of {1}.\".format(self.method, legal_methods))\n try:\n int(self.n_clusters)\n except TypeError:\n try:\n r, c = self.n_clusters\n int(r)\n int(c)\n except (ValueError, TypeError) as e:\n raise ValueError(\"Incorrect parameter n_clusters has value:\"\n \" {}. It should either be a single integer\"\n \" or an iterable with two integers:\"\n \" (n_row_clusters, n_column_clusters)\") from e\n if self.n_components < 1:\n raise ValueError(\"Parameter n_components must be greater than 0,\"\n \" but its value is {}\".format(self.n_components))\n if self.n_best < 1:\n raise ValueError(\"Parameter n_best must be greater than 0,\"\n \" but its value is {}\".format(self.n_best))\n if self.n_best > self.n_components:\n raise ValueError(\"n_best cannot be larger than\"\n \" n_components, but {} > {}\"\n \"\".format(self.n_best, self.n_components))" + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/_fit", + "name": "_fit", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering._fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/_fit/self", + "name": "self", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering._fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/_fit/X", + "name": "X", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering._fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _fit(self, X):\n n_sv = self.n_components\n if self.method == 'bistochastic':\n normalized_data = _bistochastic_normalize(X)\n n_sv += 1\n elif self.method == 'scale':\n normalized_data, _, _ = _scale_normalize(X)\n n_sv += 1\n elif self.method == 'log':\n normalized_data = _log_normalize(X)\n n_discard = 0 if self.method == 'log' else 1\n u, v = self._svd(normalized_data, n_sv, n_discard)\n ut = u.T\n vt = v.T\n\n try:\n n_row_clusters, n_col_clusters = self.n_clusters\n except TypeError:\n n_row_clusters = n_col_clusters = self.n_clusters\n\n best_ut = self._fit_best_piecewise(ut, self.n_best,\n n_row_clusters)\n\n best_vt = self._fit_best_piecewise(vt, self.n_best,\n n_col_clusters)\n\n self.row_labels_ = self._project_and_cluster(X, best_vt.T,\n n_row_clusters)\n\n self.column_labels_ = self._project_and_cluster(X.T, best_ut.T,\n n_col_clusters)\n\n self.rows_ = np.vstack([self.row_labels_ == label\n for label in range(n_row_clusters)\n for _ in range(n_col_clusters)])\n self.columns_ = np.vstack([self.column_labels_ == label\n for _ in range(n_row_clusters)\n for label in range(n_col_clusters)])" + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/_fit_best_piecewise", + "name": "_fit_best_piecewise", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering._fit_best_piecewise", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/_fit_best_piecewise/self", + "name": "self", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering._fit_best_piecewise.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/_fit_best_piecewise/vectors", + "name": "vectors", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering._fit_best_piecewise.vectors", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/_fit_best_piecewise/n_best", + "name": "n_best", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering._fit_best_piecewise.n_best", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/_fit_best_piecewise/n_clusters", + "name": "n_clusters", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering._fit_best_piecewise.n_clusters", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Find the ``n_best`` vectors that are best approximated by piecewise\nconstant vectors.\n\nThe piecewise vectors are found by k-means; the best is chosen\naccording to Euclidean distance.", + "docstring": "Find the ``n_best`` vectors that are best approximated by piecewise\nconstant vectors.\n\nThe piecewise vectors are found by k-means; the best is chosen\naccording to Euclidean distance.", + "code": " def _fit_best_piecewise(self, vectors, n_best, n_clusters):\n \"\"\"Find the ``n_best`` vectors that are best approximated by piecewise\n constant vectors.\n\n The piecewise vectors are found by k-means; the best is chosen\n according to Euclidean distance.\n\n \"\"\"\n def make_piecewise(v):\n centroid, labels = self._k_means(v.reshape(-1, 1), n_clusters)\n return centroid[labels].ravel()\n piecewise_vectors = np.apply_along_axis(make_piecewise,\n axis=1, arr=vectors)\n dists = np.apply_along_axis(norm, axis=1,\n arr=(vectors - piecewise_vectors))\n result = vectors[np.argsort(dists)[:n_best]]\n return result" + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/_project_and_cluster", + "name": "_project_and_cluster", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering._project_and_cluster", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/_project_and_cluster/self", + "name": "self", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering._project_and_cluster.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/_project_and_cluster/data", + "name": "data", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering._project_and_cluster.data", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/_project_and_cluster/vectors", + "name": "vectors", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering._project_and_cluster.vectors", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralBiclustering/_project_and_cluster/n_clusters", + "name": "n_clusters", + "qname": "sklearn.cluster._bicluster.SpectralBiclustering._project_and_cluster.n_clusters", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Project ``data`` to ``vectors`` and cluster the result.", + "docstring": "Project ``data`` to ``vectors`` and cluster the result.", + "code": " def _project_and_cluster(self, data, vectors, n_clusters):\n \"\"\"Project ``data`` to ``vectors`` and cluster the result.\"\"\"\n projected = safe_sparse_dot(data, vectors)\n _, labels = self._k_means(projected, n_clusters)\n return labels" + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralCoclustering/__init__", + "name": "__init__", + "qname": "sklearn.cluster._bicluster.SpectralCoclustering.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralCoclustering/__init__/self", + "name": "self", + "qname": "sklearn.cluster._bicluster.SpectralCoclustering.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralCoclustering/__init__/n_clusters", + "name": "n_clusters", + "qname": "sklearn.cluster._bicluster.SpectralCoclustering.__init__.n_clusters", + "default_value": "3", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "3", + "description": "The number of biclusters to find." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralCoclustering/__init__/svd_method", + "name": "svd_method", + "qname": "sklearn.cluster._bicluster.SpectralCoclustering.__init__.svd_method", + "default_value": "'randomized'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'randomized', 'arpack'}", + "default_value": "'randomized'", + "description": "Selects the algorithm for finding singular vectors. May be\n'randomized' or 'arpack'. If 'randomized', use\n:func:`sklearn.utils.extmath.randomized_svd`, which may be faster\nfor large matrices. If 'arpack', use\n:func:`scipy.sparse.linalg.svds`, which is more accurate, but\npossibly slower in some cases." + }, + "type": { + "kind": "EnumType", + "values": ["randomized", "arpack"] + } + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralCoclustering/__init__/n_svd_vecs", + "name": "n_svd_vecs", + "qname": "sklearn.cluster._bicluster.SpectralCoclustering.__init__.n_svd_vecs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of vectors to use in calculating the SVD. Corresponds\nto `ncv` when `svd_method=arpack` and `n_oversamples` when\n`svd_method` is 'randomized`." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralCoclustering/__init__/mini_batch", + "name": "mini_batch", + "qname": "sklearn.cluster._bicluster.SpectralCoclustering.__init__.mini_batch", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to use mini-batch k-means, which is faster but may get\ndifferent results." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralCoclustering/__init__/init", + "name": "init", + "qname": "sklearn.cluster._bicluster.SpectralCoclustering.__init__.init", + "default_value": "'k-means++'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'k-means++', 'random', or ndarray of shape (n_clusters, n_features)", + "default_value": "'k-means++'", + "description": "Method for initialization of k-means algorithm; defaults to\n'k-means++'." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "{'k-means++'" + }, + { + "kind": "NamedType", + "name": "'random'" + }, + { + "kind": "NamedType", + "name": "ndarray of shape (n_clusters, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralCoclustering/__init__/n_init", + "name": "n_init", + "qname": "sklearn.cluster._bicluster.SpectralCoclustering.__init__.n_init", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Number of random initializations that are tried with the\nk-means algorithm.\n\nIf mini-batch k-means is used, the best initialization is\nchosen and the algorithm runs once. Otherwise, the algorithm\nis run for each initialization and the best solution chosen." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralCoclustering/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.cluster._bicluster.SpectralCoclustering.__init__.n_jobs", + "default_value": "'deprecated'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to use for the computation. This works by breaking\ndown the pairwise matrix into n_jobs even slices and computing them in\nparallel.\n\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details.\n\n.. deprecated:: 0.23\n ``n_jobs`` was deprecated in version 0.23 and will be removed in\n 1.0 (renaming of 0.25)." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralCoclustering/__init__/random_state", + "name": "random_state", + "qname": "sklearn.cluster._bicluster.SpectralCoclustering.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "Used for randomizing the singular value decomposition and the k-means\ninitialization. Use an int to make the randomness deterministic.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Spectral Co-Clustering algorithm (Dhillon, 2001).\n\nClusters rows and columns of an array `X` to solve the relaxed\nnormalized cut of the bipartite graph created from `X` as follows:\nthe edge between row vertex `i` and column vertex `j` has weight\n`X[i, j]`.\n\nThe resulting bicluster structure is block-diagonal, since each\nrow and each column belongs to exactly one bicluster.\n\nSupports sparse matrices, as long as they are nonnegative.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_clusters=3, *, svd_method='randomized',\n n_svd_vecs=None, mini_batch=False, init='k-means++',\n n_init=10, n_jobs='deprecated', random_state=None):\n super().__init__(n_clusters,\n svd_method,\n n_svd_vecs,\n mini_batch,\n init,\n n_init,\n n_jobs,\n random_state)" + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralCoclustering/_fit", + "name": "_fit", + "qname": "sklearn.cluster._bicluster.SpectralCoclustering._fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralCoclustering/_fit/self", + "name": "self", + "qname": "sklearn.cluster._bicluster.SpectralCoclustering._fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/SpectralCoclustering/_fit/X", + "name": "X", + "qname": "sklearn.cluster._bicluster.SpectralCoclustering._fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _fit(self, X):\n normalized_data, row_diag, col_diag = _scale_normalize(X)\n n_sv = 1 + int(np.ceil(np.log2(self.n_clusters)))\n u, v = self._svd(normalized_data, n_sv, n_discard=1)\n z = np.vstack((row_diag[:, np.newaxis] * u,\n col_diag[:, np.newaxis] * v))\n\n _, labels = self._k_means(z, self.n_clusters)\n\n n_rows = X.shape[0]\n self.row_labels_ = labels[:n_rows]\n self.column_labels_ = labels[n_rows:]\n\n self.rows_ = np.vstack([self.row_labels_ == c\n for c in range(self.n_clusters)])\n self.columns_ = np.vstack([self.column_labels_ == c\n for c in range(self.n_clusters)])" + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/_bistochastic_normalize", + "name": "_bistochastic_normalize", + "qname": "sklearn.cluster._bicluster._bistochastic_normalize", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._bicluster/_bistochastic_normalize/X", + "name": "X", + "qname": "sklearn.cluster._bicluster._bistochastic_normalize.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/_bistochastic_normalize/max_iter", + "name": "max_iter", + "qname": "sklearn.cluster._bicluster._bistochastic_normalize.max_iter", + "default_value": "1000", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/_bistochastic_normalize/tol", + "name": "tol", + "qname": "sklearn.cluster._bicluster._bistochastic_normalize.tol", + "default_value": "1e-05", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Normalize rows and columns of ``X`` simultaneously so that all\nrows sum to one constant and all columns sum to a different\nconstant.", + "docstring": "Normalize rows and columns of ``X`` simultaneously so that all\nrows sum to one constant and all columns sum to a different\nconstant.", + "code": "def _bistochastic_normalize(X, max_iter=1000, tol=1e-5):\n \"\"\"Normalize rows and columns of ``X`` simultaneously so that all\n rows sum to one constant and all columns sum to a different\n constant.\n\n \"\"\"\n # According to paper, this can also be done more efficiently with\n # deviation reduction and balancing algorithms.\n X = make_nonnegative(X)\n X_scaled = X\n for _ in range(max_iter):\n X_new, _, _ = _scale_normalize(X_scaled)\n if issparse(X):\n dist = norm(X_scaled.data - X.data)\n else:\n dist = norm(X_scaled - X_new)\n X_scaled = X_new\n if dist is not None and dist < tol:\n break\n return X_scaled" + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/_log_normalize", + "name": "_log_normalize", + "qname": "sklearn.cluster._bicluster._log_normalize", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._bicluster/_log_normalize/X", + "name": "X", + "qname": "sklearn.cluster._bicluster._log_normalize.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Normalize ``X`` according to Kluger's log-interactions scheme.", + "docstring": "Normalize ``X`` according to Kluger's log-interactions scheme.", + "code": "def _log_normalize(X):\n \"\"\"Normalize ``X`` according to Kluger's log-interactions scheme.\"\"\"\n X = make_nonnegative(X, min_value=1)\n if issparse(X):\n raise ValueError(\"Cannot compute log of a sparse matrix,\"\n \" because log(x) diverges to -infinity as x\"\n \" goes to 0.\")\n L = np.log(X)\n row_avg = L.mean(axis=1)[:, np.newaxis]\n col_avg = L.mean(axis=0)\n avg = L.mean()\n return L - row_avg - col_avg + avg" + }, + { + "id": "scikit-learn/sklearn.cluster._bicluster/_scale_normalize", + "name": "_scale_normalize", + "qname": "sklearn.cluster._bicluster._scale_normalize", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._bicluster/_scale_normalize/X", + "name": "X", + "qname": "sklearn.cluster._bicluster._scale_normalize.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Normalize ``X`` by scaling rows and columns independently.\n\nReturns the normalized matrix and the row and column scaling\nfactors.", + "docstring": "Normalize ``X`` by scaling rows and columns independently.\n\nReturns the normalized matrix and the row and column scaling\nfactors.", + "code": "def _scale_normalize(X):\n \"\"\"Normalize ``X`` by scaling rows and columns independently.\n\n Returns the normalized matrix and the row and column scaling\n factors.\n\n \"\"\"\n X = make_nonnegative(X)\n row_diag = np.asarray(1.0 / np.sqrt(X.sum(axis=1))).squeeze()\n col_diag = np.asarray(1.0 / np.sqrt(X.sum(axis=0))).squeeze()\n row_diag = np.where(np.isnan(row_diag), 0, row_diag)\n col_diag = np.where(np.isnan(col_diag), 0, col_diag)\n if issparse(X):\n n_rows, n_cols = X.shape\n r = dia_matrix((row_diag, [0]), shape=(n_rows, n_rows))\n c = dia_matrix((col_diag, [0]), shape=(n_cols, n_cols))\n an = r * X * c\n else:\n an = row_diag[:, np.newaxis] * X * col_diag\n return an, row_diag, col_diag" + }, + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/__init__", + "name": "__init__", + "qname": "sklearn.cluster._birch.Birch.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/__init__/self", + "name": "self", + "qname": "sklearn.cluster._birch.Birch.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/__init__/threshold", + "name": "threshold", + "qname": "sklearn.cluster._birch.Birch.__init__.threshold", + "default_value": "0.5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.5", + "description": "The radius of the subcluster obtained by merging a new sample and the\nclosest subcluster should be lesser than the threshold. Otherwise a new\nsubcluster is started. Setting this value to be very low promotes\nsplitting and vice-versa." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/__init__/branching_factor", + "name": "branching_factor", + "qname": "sklearn.cluster._birch.Birch.__init__.branching_factor", + "default_value": "50", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "50", + "description": "Maximum number of CF subclusters in each node. If a new samples enters\nsuch that the number of subclusters exceed the branching_factor then\nthat node is split into two nodes with the subclusters redistributed\nin each. The parent subcluster of that node is removed and two new\nsubclusters are added as parents of the 2 split nodes." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/__init__/n_clusters", + "name": "n_clusters", + "qname": "sklearn.cluster._birch.Birch.__init__.n_clusters", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, instance of sklearn.cluster model", + "default_value": "3", + "description": "Number of clusters after the final clustering step, which treats the\nsubclusters from the leaves as new samples.\n\n- `None` : the final clustering step is not performed and the\n subclusters are returned as they are.\n\n- :mod:`sklearn.cluster` Estimator : If a model is provided, the model\n is fit treating the subclusters as new samples and the initial data\n is mapped to the label of the closest subcluster.\n\n- `int` : the model fit is :class:`AgglomerativeClustering` with\n `n_clusters` set to be equal to the int." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "instance of sklearn.cluster model" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/__init__/compute_labels", + "name": "compute_labels", + "qname": "sklearn.cluster._birch.Birch.__init__.compute_labels", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether or not to compute labels for each fit." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/__init__/copy", + "name": "copy", + "qname": "sklearn.cluster._birch.Birch.__init__.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether or not to make a copy of the given data. If set to False,\nthe initial data will be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Implements the BIRCH clustering algorithm.\n\nIt is a memory-efficient, online-learning algorithm provided as an\nalternative to :class:`MiniBatchKMeans`. It constructs a tree\ndata structure with the cluster centroids being read off the leaf.\nThese can be either the final cluster centroids or can be provided as input\nto another clustering algorithm such as :class:`AgglomerativeClustering`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.16", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, threshold=0.5, branching_factor=50, n_clusters=3,\n compute_labels=True, copy=True):\n self.threshold = threshold\n self.branching_factor = branching_factor\n self.n_clusters = n_clusters\n self.compute_labels = compute_labels\n self.copy = copy" + }, + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/_check_fit", + "name": "_check_fit", + "qname": "sklearn.cluster._birch.Birch._check_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/_check_fit/self", + "name": "self", + "qname": "sklearn.cluster._birch.Birch._check_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/_check_fit/X", + "name": "X", + "qname": "sklearn.cluster._birch.Birch._check_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_fit(self, X):\n check_is_fitted(self)\n\n if (hasattr(self, 'subcluster_centers_') and\n X.shape[1] != self.subcluster_centers_.shape[1]):\n raise ValueError(\n \"Training data and predicted data do \"\n \"not have same number of features.\")" + }, + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/_fit", + "name": "_fit", + "qname": "sklearn.cluster._birch.Birch._fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/_fit/self", + "name": "self", + "qname": "sklearn.cluster._birch.Birch._fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/_fit/X", + "name": "X", + "qname": "sklearn.cluster._birch.Birch._fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _fit(self, X):\n has_root = getattr(self, 'root_', None)\n first_call = self.fit_ or (self.partial_fit_ and not has_root)\n\n X = self._validate_data(X, accept_sparse='csr', copy=self.copy,\n reset=first_call)\n threshold = self.threshold\n branching_factor = self.branching_factor\n\n if branching_factor <= 1:\n raise ValueError(\"Branching_factor should be greater than one.\")\n n_samples, n_features = X.shape\n\n # If partial_fit is called for the first time or fit is called, we\n # start a new tree.\n if first_call:\n # The first root is the leaf. Manipulate this object throughout.\n self.root_ = _CFNode(threshold=threshold,\n branching_factor=branching_factor,\n is_leaf=True,\n n_features=n_features)\n\n # To enable getting back subclusters.\n self.dummy_leaf_ = _CFNode(threshold=threshold,\n branching_factor=branching_factor,\n is_leaf=True, n_features=n_features)\n self.dummy_leaf_.next_leaf_ = self.root_\n self.root_.prev_leaf_ = self.dummy_leaf_\n\n # Cannot vectorize. Enough to convince to use cython.\n if not sparse.issparse(X):\n iter_func = iter\n else:\n iter_func = _iterate_sparse_X\n\n for sample in iter_func(X):\n subcluster = _CFSubcluster(linear_sum=sample)\n split = self.root_.insert_cf_subcluster(subcluster)\n\n if split:\n new_subcluster1, new_subcluster2 = _split_node(\n self.root_, threshold, branching_factor)\n del self.root_\n self.root_ = _CFNode(threshold=threshold,\n branching_factor=branching_factor,\n is_leaf=False,\n n_features=n_features)\n self.root_.append_subcluster(new_subcluster1)\n self.root_.append_subcluster(new_subcluster2)\n\n centroids = np.concatenate([\n leaf.centroids_ for leaf in self._get_leaves()])\n self.subcluster_centers_ = centroids\n\n self._global_clustering(X)\n return self" + }, + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/_get_leaves", + "name": "_get_leaves", + "qname": "sklearn.cluster._birch.Birch._get_leaves", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/_get_leaves/self", + "name": "self", + "qname": "sklearn.cluster._birch.Birch._get_leaves.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Retrieve the leaves of the CF Node.", + "docstring": "Retrieve the leaves of the CF Node.\n\nReturns\n-------\nleaves : list of shape (n_leaves,)\n List of the leaf nodes.", + "code": " def _get_leaves(self):\n \"\"\"\n Retrieve the leaves of the CF Node.\n\n Returns\n -------\n leaves : list of shape (n_leaves,)\n List of the leaf nodes.\n \"\"\"\n leaf_ptr = self.dummy_leaf_.next_leaf_\n leaves = []\n while leaf_ptr is not None:\n leaves.append(leaf_ptr)\n leaf_ptr = leaf_ptr.next_leaf_\n return leaves" + }, + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/_global_clustering", + "name": "_global_clustering", + "qname": "sklearn.cluster._birch.Birch._global_clustering", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/_global_clustering/self", + "name": "self", + "qname": "sklearn.cluster._birch.Birch._global_clustering.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/_global_clustering/X", + "name": "X", + "qname": "sklearn.cluster._birch.Birch._global_clustering.X", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Global clustering for the subclusters obtained after fitting", + "docstring": "Global clustering for the subclusters obtained after fitting", + "code": " def _global_clustering(self, X=None):\n \"\"\"\n Global clustering for the subclusters obtained after fitting\n \"\"\"\n clusterer = self.n_clusters\n centroids = self.subcluster_centers_\n compute_labels = (X is not None) and self.compute_labels\n\n # Preprocessing for the global clustering.\n not_enough_centroids = False\n if isinstance(clusterer, numbers.Integral):\n clusterer = AgglomerativeClustering(\n n_clusters=self.n_clusters)\n # There is no need to perform the global clustering step.\n if len(centroids) < self.n_clusters:\n not_enough_centroids = True\n elif (clusterer is not None and not\n hasattr(clusterer, 'fit_predict')):\n raise ValueError(\"n_clusters should be an instance of \"\n \"ClusterMixin or an int\")\n\n # To use in predict to avoid recalculation.\n self._subcluster_norms = row_norms(\n self.subcluster_centers_, squared=True)\n\n if clusterer is None or not_enough_centroids:\n self.subcluster_labels_ = np.arange(len(centroids))\n if not_enough_centroids:\n warnings.warn(\n \"Number of subclusters found (%d) by BIRCH is less \"\n \"than (%d). Decrease the threshold.\"\n % (len(centroids), self.n_clusters), ConvergenceWarning)\n else:\n # The global clustering step that clusters the subclusters of\n # the leaves. It assumes the centroids of the subclusters as\n # samples and finds the final centroids.\n self.subcluster_labels_ = clusterer.fit_predict(\n self.subcluster_centers_)\n\n if compute_labels:\n self.labels_ = self.predict(X)" + }, + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/fit", + "name": "fit", + "qname": "sklearn.cluster._birch.Birch.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/fit/self", + "name": "self", + "qname": "sklearn.cluster._birch.Birch.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/fit/X", + "name": "X", + "qname": "sklearn.cluster._birch.Birch.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/fit/y", + "name": "y", + "qname": "sklearn.cluster._birch.Birch.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present here for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Build a CF Tree for the input data.", + "docstring": "Build a CF Tree for the input data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nself\n Fitted estimator.", + "code": " def fit(self, X, y=None):\n \"\"\"\n Build a CF Tree for the input data.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n Returns\n -------\n self\n Fitted estimator.\n \"\"\"\n self.fit_, self.partial_fit_ = True, False\n return self._fit(X)" + }, + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/partial_fit", + "name": "partial_fit", + "qname": "sklearn.cluster._birch.Birch.partial_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/partial_fit/self", + "name": "self", + "qname": "sklearn.cluster._birch.Birch.partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/partial_fit/X", + "name": "X", + "qname": "sklearn.cluster._birch.Birch.partial_fit.X", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "None", + "description": "Input data. If X is not provided, only the global clustering\nstep is done." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/partial_fit/y", + "name": "y", + "qname": "sklearn.cluster._birch.Birch.partial_fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present here for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Online learning. Prevents rebuilding of CFTree from scratch.", + "docstring": "Online learning. Prevents rebuilding of CFTree from scratch.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), default=None\n Input data. If X is not provided, only the global clustering\n step is done.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nself\n Fitted estimator.", + "code": " def partial_fit(self, X=None, y=None):\n \"\"\"\n Online learning. Prevents rebuilding of CFTree from scratch.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features), \\\n default=None\n Input data. If X is not provided, only the global clustering\n step is done.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n Returns\n -------\n self\n Fitted estimator.\n \"\"\"\n self.partial_fit_, self.fit_ = True, False\n if X is None:\n # Perform just the final global clustering step.\n self._global_clustering()\n return self\n else:\n return self._fit(X)" + }, + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/predict", + "name": "predict", + "qname": "sklearn.cluster._birch.Birch.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/predict/self", + "name": "self", + "qname": "sklearn.cluster._birch.Birch.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/predict/X", + "name": "X", + "qname": "sklearn.cluster._birch.Birch.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict data using the ``centroids_`` of subclusters.\n\nAvoid computation of the row norms of X.", + "docstring": "Predict data using the ``centroids_`` of subclusters.\n\nAvoid computation of the row norms of X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data.\n\nReturns\n-------\nlabels : ndarray of shape(n_samples,)\n Labelled data.", + "code": " def predict(self, X):\n \"\"\"\n Predict data using the ``centroids_`` of subclusters.\n\n Avoid computation of the row norms of X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data.\n\n Returns\n -------\n labels : ndarray of shape(n_samples,)\n Labelled data.\n \"\"\"\n check_is_fitted(self)\n X = self._validate_data(X, accept_sparse='csr', reset=False)\n kwargs = {'Y_norm_squared': self._subcluster_norms}\n\n with config_context(assume_finite=True):\n argmin = pairwise_distances_argmin(X, self.subcluster_centers_,\n metric_kwargs=kwargs)\n return self.subcluster_labels_[argmin]" + }, + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/transform", + "name": "transform", + "qname": "sklearn.cluster._birch.Birch.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/transform/self", + "name": "self", + "qname": "sklearn.cluster._birch.Birch.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._birch/Birch/transform/X", + "name": "X", + "qname": "sklearn.cluster._birch.Birch.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform X into subcluster centroids dimension.\n\nEach dimension represents the distance from the sample point to each\ncluster centroid.", + "docstring": "Transform X into subcluster centroids dimension.\n\nEach dimension represents the distance from the sample point to each\ncluster centroid.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data.\n\nReturns\n-------\nX_trans : {array-like, sparse matrix} of shape (n_samples, n_clusters)\n Transformed data.", + "code": " def transform(self, X):\n \"\"\"\n Transform X into subcluster centroids dimension.\n\n Each dimension represents the distance from the sample point to each\n cluster centroid.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data.\n\n Returns\n -------\n X_trans : {array-like, sparse matrix} of shape (n_samples, n_clusters)\n Transformed data.\n \"\"\"\n check_is_fitted(self)\n self._validate_data(X, accept_sparse='csr', reset=False)\n with config_context(assume_finite=True):\n return euclidean_distances(X, self.subcluster_centers_)" + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_CFNode/__init__", + "name": "__init__", + "qname": "sklearn.cluster._birch._CFNode.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._birch/_CFNode/__init__/self", + "name": "self", + "qname": "sklearn.cluster._birch._CFNode.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_CFNode/__init__/threshold", + "name": "threshold", + "qname": "sklearn.cluster._birch._CFNode.__init__.threshold", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Threshold needed for a new subcluster to enter a CFSubcluster." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_CFNode/__init__/branching_factor", + "name": "branching_factor", + "qname": "sklearn.cluster._birch._CFNode.__init__.branching_factor", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Maximum number of CF subclusters in each node." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_CFNode/__init__/is_leaf", + "name": "is_leaf", + "qname": "sklearn.cluster._birch._CFNode.__init__.is_leaf", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "We need to know if the CFNode is a leaf or not, in order to\nretrieve the final subclusters." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_CFNode/__init__/n_features", + "name": "n_features", + "qname": "sklearn.cluster._birch._CFNode.__init__.n_features", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The number of features." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Each node in a CFTree is called a CFNode.\n\nThe CFNode can have a maximum of branching_factor\nnumber of CFSubclusters.", + "docstring": "", + "code": " def __init__(self, *, threshold, branching_factor, is_leaf, n_features):\n self.threshold = threshold\n self.branching_factor = branching_factor\n self.is_leaf = is_leaf\n self.n_features = n_features\n\n # The list of subclusters, centroids and squared norms\n # to manipulate throughout.\n self.subclusters_ = []\n self.init_centroids_ = np.zeros((branching_factor + 1, n_features))\n self.init_sq_norm_ = np.zeros((branching_factor + 1))\n self.squared_norm_ = []\n self.prev_leaf_ = None\n self.next_leaf_ = None" + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_CFNode/append_subcluster", + "name": "append_subcluster", + "qname": "sklearn.cluster._birch._CFNode.append_subcluster", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._birch/_CFNode/append_subcluster/self", + "name": "self", + "qname": "sklearn.cluster._birch._CFNode.append_subcluster.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_CFNode/append_subcluster/subcluster", + "name": "subcluster", + "qname": "sklearn.cluster._birch._CFNode.append_subcluster.subcluster", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def append_subcluster(self, subcluster):\n n_samples = len(self.subclusters_)\n self.subclusters_.append(subcluster)\n self.init_centroids_[n_samples] = subcluster.centroid_\n self.init_sq_norm_[n_samples] = subcluster.sq_norm_\n\n # Keep centroids and squared norm as views. In this way\n # if we change init_centroids and init_sq_norm_, it is\n # sufficient,\n self.centroids_ = self.init_centroids_[:n_samples + 1, :]\n self.squared_norm_ = self.init_sq_norm_[:n_samples + 1]" + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_CFNode/insert_cf_subcluster", + "name": "insert_cf_subcluster", + "qname": "sklearn.cluster._birch._CFNode.insert_cf_subcluster", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._birch/_CFNode/insert_cf_subcluster/self", + "name": "self", + "qname": "sklearn.cluster._birch._CFNode.insert_cf_subcluster.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_CFNode/insert_cf_subcluster/subcluster", + "name": "subcluster", + "qname": "sklearn.cluster._birch._CFNode.insert_cf_subcluster.subcluster", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Insert a new subcluster into the node.", + "docstring": "Insert a new subcluster into the node.", + "code": " def insert_cf_subcluster(self, subcluster):\n \"\"\"Insert a new subcluster into the node.\"\"\"\n if not self.subclusters_:\n self.append_subcluster(subcluster)\n return False\n\n threshold = self.threshold\n branching_factor = self.branching_factor\n # We need to find the closest subcluster among all the\n # subclusters so that we can insert our new subcluster.\n dist_matrix = np.dot(self.centroids_, subcluster.centroid_)\n dist_matrix *= -2.\n dist_matrix += self.squared_norm_\n closest_index = np.argmin(dist_matrix)\n closest_subcluster = self.subclusters_[closest_index]\n\n # If the subcluster has a child, we need a recursive strategy.\n if closest_subcluster.child_ is not None:\n split_child = closest_subcluster.child_.insert_cf_subcluster(\n subcluster)\n\n if not split_child:\n # If it is determined that the child need not be split, we\n # can just update the closest_subcluster\n closest_subcluster.update(subcluster)\n self.init_centroids_[closest_index] = \\\n self.subclusters_[closest_index].centroid_\n self.init_sq_norm_[closest_index] = \\\n self.subclusters_[closest_index].sq_norm_\n return False\n\n # things not too good. we need to redistribute the subclusters in\n # our child node, and add a new subcluster in the parent\n # subcluster to accommodate the new child.\n else:\n new_subcluster1, new_subcluster2 = _split_node(\n closest_subcluster.child_, threshold, branching_factor)\n self.update_split_subclusters(\n closest_subcluster, new_subcluster1, new_subcluster2)\n\n if len(self.subclusters_) > self.branching_factor:\n return True\n return False\n\n # good to go!\n else:\n merged = closest_subcluster.merge_subcluster(\n subcluster, self.threshold)\n if merged:\n self.init_centroids_[closest_index] = \\\n closest_subcluster.centroid_\n self.init_sq_norm_[closest_index] = \\\n closest_subcluster.sq_norm_\n return False\n\n # not close to any other subclusters, and we still\n # have space, so add.\n elif len(self.subclusters_) < self.branching_factor:\n self.append_subcluster(subcluster)\n return False\n\n # We do not have enough space nor is it closer to an\n # other subcluster. We need to split.\n else:\n self.append_subcluster(subcluster)\n return True" + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_CFNode/update_split_subclusters", + "name": "update_split_subclusters", + "qname": "sklearn.cluster._birch._CFNode.update_split_subclusters", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._birch/_CFNode/update_split_subclusters/self", + "name": "self", + "qname": "sklearn.cluster._birch._CFNode.update_split_subclusters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_CFNode/update_split_subclusters/subcluster", + "name": "subcluster", + "qname": "sklearn.cluster._birch._CFNode.update_split_subclusters.subcluster", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_CFNode/update_split_subclusters/new_subcluster1", + "name": "new_subcluster1", + "qname": "sklearn.cluster._birch._CFNode.update_split_subclusters.new_subcluster1", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_CFNode/update_split_subclusters/new_subcluster2", + "name": "new_subcluster2", + "qname": "sklearn.cluster._birch._CFNode.update_split_subclusters.new_subcluster2", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Remove a subcluster from a node and update it with the\nsplit subclusters.", + "docstring": "Remove a subcluster from a node and update it with the\nsplit subclusters.", + "code": " def update_split_subclusters(self, subcluster,\n new_subcluster1, new_subcluster2):\n \"\"\"Remove a subcluster from a node and update it with the\n split subclusters.\n \"\"\"\n ind = self.subclusters_.index(subcluster)\n self.subclusters_[ind] = new_subcluster1\n self.init_centroids_[ind] = new_subcluster1.centroid_\n self.init_sq_norm_[ind] = new_subcluster1.sq_norm_\n self.append_subcluster(new_subcluster2)" + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_CFSubcluster/__init__", + "name": "__init__", + "qname": "sklearn.cluster._birch._CFSubcluster.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._birch/_CFSubcluster/__init__/self", + "name": "self", + "qname": "sklearn.cluster._birch._CFSubcluster.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_CFSubcluster/__init__/linear_sum", + "name": "linear_sum", + "qname": "sklearn.cluster._birch._CFSubcluster.__init__.linear_sum", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features,)", + "default_value": "None", + "description": "Sample. This is kept optional to allow initialization of empty\nsubclusters." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Each subcluster in a CFNode is called a CFSubcluster.\n\nA CFSubcluster can have a CFNode has its child.", + "docstring": "", + "code": " def __init__(self, *, linear_sum=None):\n if linear_sum is None:\n self.n_samples_ = 0\n self.squared_sum_ = 0.0\n self.centroid_ = self.linear_sum_ = 0\n else:\n self.n_samples_ = 1\n self.centroid_ = self.linear_sum_ = linear_sum\n self.squared_sum_ = self.sq_norm_ = np.dot(\n self.linear_sum_, self.linear_sum_)\n self.child_ = None" + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_CFSubcluster/merge_subcluster", + "name": "merge_subcluster", + "qname": "sklearn.cluster._birch._CFSubcluster.merge_subcluster", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._birch/_CFSubcluster/merge_subcluster/self", + "name": "self", + "qname": "sklearn.cluster._birch._CFSubcluster.merge_subcluster.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_CFSubcluster/merge_subcluster/nominee_cluster", + "name": "nominee_cluster", + "qname": "sklearn.cluster._birch._CFSubcluster.merge_subcluster.nominee_cluster", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_CFSubcluster/merge_subcluster/threshold", + "name": "threshold", + "qname": "sklearn.cluster._birch._CFSubcluster.merge_subcluster.threshold", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check if a cluster is worthy enough to be merged. If\nyes then merge.", + "docstring": "Check if a cluster is worthy enough to be merged. If\nyes then merge.", + "code": " def merge_subcluster(self, nominee_cluster, threshold):\n \"\"\"Check if a cluster is worthy enough to be merged. If\n yes then merge.\n \"\"\"\n new_ss = self.squared_sum_ + nominee_cluster.squared_sum_\n new_ls = self.linear_sum_ + nominee_cluster.linear_sum_\n new_n = self.n_samples_ + nominee_cluster.n_samples_\n new_centroid = (1 / new_n) * new_ls\n new_norm = np.dot(new_centroid, new_centroid)\n dot_product = (-2 * new_n) * new_norm\n sq_radius = (new_ss + dot_product) / new_n + new_norm\n if sq_radius <= threshold ** 2:\n (self.n_samples_, self.linear_sum_, self.squared_sum_,\n self.centroid_, self.sq_norm_) = \\\n new_n, new_ls, new_ss, new_centroid, new_norm\n return True\n return False" + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_CFSubcluster/radius@getter", + "name": "radius", + "qname": "sklearn.cluster._birch._CFSubcluster.radius", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._birch/_CFSubcluster/radius/self", + "name": "self", + "qname": "sklearn.cluster._birch._CFSubcluster.radius.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return radius of the subcluster", + "docstring": "Return radius of the subcluster", + "code": " @property\n def radius(self):\n \"\"\"Return radius of the subcluster\"\"\"\n dot_product = -2 * np.dot(self.linear_sum_, self.centroid_)\n return sqrt(\n ((self.squared_sum_ + dot_product) / self.n_samples_) +\n self.sq_norm_)" + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_CFSubcluster/update", + "name": "update", + "qname": "sklearn.cluster._birch._CFSubcluster.update", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._birch/_CFSubcluster/update/self", + "name": "self", + "qname": "sklearn.cluster._birch._CFSubcluster.update.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_CFSubcluster/update/subcluster", + "name": "subcluster", + "qname": "sklearn.cluster._birch._CFSubcluster.update.subcluster", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def update(self, subcluster):\n self.n_samples_ += subcluster.n_samples_\n self.linear_sum_ += subcluster.linear_sum_\n self.squared_sum_ += subcluster.squared_sum_\n self.centroid_ = self.linear_sum_ / self.n_samples_\n self.sq_norm_ = np.dot(self.centroid_, self.centroid_)" + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_iterate_sparse_X", + "name": "_iterate_sparse_X", + "qname": "sklearn.cluster._birch._iterate_sparse_X", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._birch/_iterate_sparse_X/X", + "name": "X", + "qname": "sklearn.cluster._birch._iterate_sparse_X.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "This little hack returns a densified row when iterating over a sparse\nmatrix, instead of constructing a sparse matrix for every row that is\nexpensive.", + "docstring": "This little hack returns a densified row when iterating over a sparse\nmatrix, instead of constructing a sparse matrix for every row that is\nexpensive.", + "code": "def _iterate_sparse_X(X):\n \"\"\"This little hack returns a densified row when iterating over a sparse\n matrix, instead of constructing a sparse matrix for every row that is\n expensive.\n \"\"\"\n n_samples = X.shape[0]\n X_indices = X.indices\n X_data = X.data\n X_indptr = X.indptr\n\n for i in range(n_samples):\n row = np.zeros(X.shape[1])\n startptr, endptr = X_indptr[i], X_indptr[i + 1]\n nonzero_indices = X_indices[startptr:endptr]\n row[nonzero_indices] = X_data[startptr:endptr]\n yield row" + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_split_node", + "name": "_split_node", + "qname": "sklearn.cluster._birch._split_node", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._birch/_split_node/node", + "name": "node", + "qname": "sklearn.cluster._birch._split_node.node", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_split_node/threshold", + "name": "threshold", + "qname": "sklearn.cluster._birch._split_node.threshold", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._birch/_split_node/branching_factor", + "name": "branching_factor", + "qname": "sklearn.cluster._birch._split_node.branching_factor", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "The node has to be split if there is no place for a new subcluster\nin the node.\n1. Two empty nodes and two empty subclusters are initialized.\n2. The pair of distant subclusters are found.\n3. The properties of the empty subclusters and nodes are updated\n according to the nearest distance between the subclusters to the\n pair of distant subclusters.\n4. The two nodes are set as children to the two subclusters.", + "docstring": "The node has to be split if there is no place for a new subcluster\nin the node.\n1. Two empty nodes and two empty subclusters are initialized.\n2. The pair of distant subclusters are found.\n3. The properties of the empty subclusters and nodes are updated\n according to the nearest distance between the subclusters to the\n pair of distant subclusters.\n4. The two nodes are set as children to the two subclusters.", + "code": "def _split_node(node, threshold, branching_factor):\n \"\"\"The node has to be split if there is no place for a new subcluster\n in the node.\n 1. Two empty nodes and two empty subclusters are initialized.\n 2. The pair of distant subclusters are found.\n 3. The properties of the empty subclusters and nodes are updated\n according to the nearest distance between the subclusters to the\n pair of distant subclusters.\n 4. The two nodes are set as children to the two subclusters.\n \"\"\"\n new_subcluster1 = _CFSubcluster()\n new_subcluster2 = _CFSubcluster()\n new_node1 = _CFNode(\n threshold=threshold, branching_factor=branching_factor,\n is_leaf=node.is_leaf,\n n_features=node.n_features)\n new_node2 = _CFNode(\n threshold=threshold, branching_factor=branching_factor,\n is_leaf=node.is_leaf,\n n_features=node.n_features)\n new_subcluster1.child_ = new_node1\n new_subcluster2.child_ = new_node2\n\n if node.is_leaf:\n if node.prev_leaf_ is not None:\n node.prev_leaf_.next_leaf_ = new_node1\n new_node1.prev_leaf_ = node.prev_leaf_\n new_node1.next_leaf_ = new_node2\n new_node2.prev_leaf_ = new_node1\n new_node2.next_leaf_ = node.next_leaf_\n if node.next_leaf_ is not None:\n node.next_leaf_.prev_leaf_ = new_node2\n\n dist = euclidean_distances(\n node.centroids_, Y_norm_squared=node.squared_norm_, squared=True)\n n_clusters = dist.shape[0]\n\n farthest_idx = np.unravel_index(\n dist.argmax(), (n_clusters, n_clusters))\n node1_dist, node2_dist = dist[(farthest_idx,)]\n\n node1_closer = node1_dist < node2_dist\n for idx, subcluster in enumerate(node.subclusters_):\n if node1_closer[idx]:\n new_node1.append_subcluster(subcluster)\n new_subcluster1.update(subcluster)\n else:\n new_node2.append_subcluster(subcluster)\n new_subcluster2.update(subcluster)\n return new_subcluster1, new_subcluster2" + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/DBSCAN/__init__", + "name": "__init__", + "qname": "sklearn.cluster._dbscan.DBSCAN.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._dbscan/DBSCAN/__init__/self", + "name": "self", + "qname": "sklearn.cluster._dbscan.DBSCAN.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/DBSCAN/__init__/eps", + "name": "eps", + "qname": "sklearn.cluster._dbscan.DBSCAN.__init__.eps", + "default_value": "0.5", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.5", + "description": "The maximum distance between two samples for one to be considered\nas in the neighborhood of the other. This is not a maximum bound\non the distances of points within a cluster. This is the most\nimportant DBSCAN parameter to choose appropriately for your data set\nand distance function." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/DBSCAN/__init__/min_samples", + "name": "min_samples", + "qname": "sklearn.cluster._dbscan.DBSCAN.__init__.min_samples", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "The number of samples (or total weight) in a neighborhood for a point\nto be considered as a core point. This includes the point itself." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/DBSCAN/__init__/metric", + "name": "metric", + "qname": "sklearn.cluster._dbscan.DBSCAN.__init__.metric", + "default_value": "'euclidean'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "string, or callable", + "default_value": "'euclidean'", + "description": "The metric to use when calculating distance between instances in a\nfeature array. If metric is a string or callable, it must be one of\nthe options allowed by :func:`sklearn.metrics.pairwise_distances` for\nits metric parameter.\nIf metric is \"precomputed\", X is assumed to be a distance matrix and\nmust be square. X may be a :term:`Glossary `, in which\ncase only \"nonzero\" elements may be considered neighbors for DBSCAN.\n\n.. versionadded:: 0.17\n metric *precomputed* to accept precomputed sparse matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "string" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/DBSCAN/__init__/metric_params", + "name": "metric_params", + "qname": "sklearn.cluster._dbscan.DBSCAN.__init__.metric_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Additional keyword arguments for the metric function.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/DBSCAN/__init__/algorithm", + "name": "algorithm", + "qname": "sklearn.cluster._dbscan.DBSCAN.__init__.algorithm", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'ball_tree', 'kd_tree', 'brute'}", + "default_value": "'auto'", + "description": "The algorithm to be used by the NearestNeighbors module\nto compute pointwise distances and find nearest neighbors.\nSee NearestNeighbors module documentation for details." + }, + "type": { + "kind": "EnumType", + "values": ["kd_tree", "auto", "brute", "ball_tree"] + } + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/DBSCAN/__init__/leaf_size", + "name": "leaf_size", + "qname": "sklearn.cluster._dbscan.DBSCAN.__init__.leaf_size", + "default_value": "30", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "30", + "description": "Leaf size passed to BallTree or cKDTree. This can affect the speed\nof the construction and query, as well as the memory required\nto store the tree. The optimal value depends\non the nature of the problem." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/DBSCAN/__init__/p", + "name": "p", + "qname": "sklearn.cluster._dbscan.DBSCAN.__init__.p", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "The power of the Minkowski metric to be used to calculate distance\nbetween points. If None, then ``p=2`` (equivalent to the Euclidean\ndistance)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/DBSCAN/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.cluster._dbscan.DBSCAN.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of parallel jobs to run.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform DBSCAN clustering from vector array or distance matrix.\n\nDBSCAN - Density-Based Spatial Clustering of Applications with Noise.\nFinds core samples of high density and expands clusters from them.\nGood for data which contains clusters of similar density.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, eps=0.5, *, min_samples=5, metric='euclidean',\n metric_params=None, algorithm='auto', leaf_size=30, p=None,\n n_jobs=None):\n self.eps = eps\n self.min_samples = min_samples\n self.metric = metric\n self.metric_params = metric_params\n self.algorithm = algorithm\n self.leaf_size = leaf_size\n self.p = p\n self.n_jobs = n_jobs" + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/DBSCAN/fit", + "name": "fit", + "qname": "sklearn.cluster._dbscan.DBSCAN.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._dbscan/DBSCAN/fit/self", + "name": "self", + "qname": "sklearn.cluster._dbscan.DBSCAN.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/DBSCAN/fit/X", + "name": "X", + "qname": "sklearn.cluster._dbscan.DBSCAN.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features), or (n_samples, n_samples)", + "default_value": "", + "description": "Training instances to cluster, or distances between instances if\n``metric='precomputed'``. If a sparse matrix is provided, it will\nbe converted into a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + }, + { + "kind": "NamedType", + "name": "(n_samples, n_samples)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/DBSCAN/fit/y", + "name": "y", + "qname": "sklearn.cluster._dbscan.DBSCAN.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present here for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/DBSCAN/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.cluster._dbscan.DBSCAN.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Weight of each sample, such that a sample with a weight of at least\n``min_samples`` is by itself a core sample; a sample with a\nnegative weight may inhibit its eps-neighbor from being core.\nNote that weights are absolute, and default to 1." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform DBSCAN clustering from features, or distance matrix.", + "docstring": "Perform DBSCAN clustering from features, or distance matrix.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), or (n_samples, n_samples)\n Training instances to cluster, or distances between instances if\n ``metric='precomputed'``. If a sparse matrix is provided, it will\n be converted into a sparse ``csr_matrix``.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weight of each sample, such that a sample with a weight of at least\n ``min_samples`` is by itself a core sample; a sample with a\n negative weight may inhibit its eps-neighbor from being core.\n Note that weights are absolute, and default to 1.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nself", + "code": " def fit(self, X, y=None, sample_weight=None):\n \"\"\"Perform DBSCAN clustering from features, or distance matrix.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features), or \\\n (n_samples, n_samples)\n Training instances to cluster, or distances between instances if\n ``metric='precomputed'``. If a sparse matrix is provided, it will\n be converted into a sparse ``csr_matrix``.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Weight of each sample, such that a sample with a weight of at least\n ``min_samples`` is by itself a core sample; a sample with a\n negative weight may inhibit its eps-neighbor from being core.\n Note that weights are absolute, and default to 1.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n Returns\n -------\n self\n\n \"\"\"\n X = self._validate_data(X, accept_sparse='csr')\n\n if not self.eps > 0.0:\n raise ValueError(\"eps must be positive.\")\n\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X)\n\n # Calculate neighborhood for all samples. This leaves the original\n # point in, which needs to be considered later (i.e. point i is in the\n # neighborhood of point i. While True, its useless information)\n if self.metric == 'precomputed' and sparse.issparse(X):\n # set the diagonal to explicit values, as a point is its own\n # neighbor\n with warnings.catch_warnings():\n warnings.simplefilter('ignore', sparse.SparseEfficiencyWarning)\n X.setdiag(X.diagonal()) # XXX: modifies X's internals in-place\n\n neighbors_model = NearestNeighbors(\n radius=self.eps, algorithm=self.algorithm,\n leaf_size=self.leaf_size, metric=self.metric,\n metric_params=self.metric_params, p=self.p, n_jobs=self.n_jobs)\n neighbors_model.fit(X)\n # This has worst case O(n^2) memory complexity\n neighborhoods = neighbors_model.radius_neighbors(X,\n return_distance=False)\n\n if sample_weight is None:\n n_neighbors = np.array([len(neighbors)\n for neighbors in neighborhoods])\n else:\n n_neighbors = np.array([np.sum(sample_weight[neighbors])\n for neighbors in neighborhoods])\n\n # Initially, all samples are noise.\n labels = np.full(X.shape[0], -1, dtype=np.intp)\n\n # A list of all core samples found.\n core_samples = np.asarray(n_neighbors >= self.min_samples,\n dtype=np.uint8)\n dbscan_inner(core_samples, neighborhoods, labels)\n\n self.core_sample_indices_ = np.where(core_samples)[0]\n self.labels_ = labels\n\n if len(self.core_sample_indices_):\n # fix for scipy sparse indexing issue\n self.components_ = X[self.core_sample_indices_].copy()\n else:\n # no core samples\n self.components_ = np.empty((0, X.shape[1]))\n return self" + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/DBSCAN/fit_predict", + "name": "fit_predict", + "qname": "sklearn.cluster._dbscan.DBSCAN.fit_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._dbscan/DBSCAN/fit_predict/self", + "name": "self", + "qname": "sklearn.cluster._dbscan.DBSCAN.fit_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/DBSCAN/fit_predict/X", + "name": "X", + "qname": "sklearn.cluster._dbscan.DBSCAN.fit_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features), or (n_samples, n_samples)", + "default_value": "", + "description": "Training instances to cluster, or distances between instances if\n``metric='precomputed'``. If a sparse matrix is provided, it will\nbe converted into a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + }, + { + "kind": "NamedType", + "name": "(n_samples, n_samples)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/DBSCAN/fit_predict/y", + "name": "y", + "qname": "sklearn.cluster._dbscan.DBSCAN.fit_predict.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present here for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/DBSCAN/fit_predict/sample_weight", + "name": "sample_weight", + "qname": "sklearn.cluster._dbscan.DBSCAN.fit_predict.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Weight of each sample, such that a sample with a weight of at least\n``min_samples`` is by itself a core sample; a sample with a\nnegative weight may inhibit its eps-neighbor from being core.\nNote that weights are absolute, and default to 1." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform DBSCAN clustering from features or distance matrix,\nand return cluster labels.", + "docstring": "Perform DBSCAN clustering from features or distance matrix,\nand return cluster labels.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), or (n_samples, n_samples)\n Training instances to cluster, or distances between instances if\n ``metric='precomputed'``. If a sparse matrix is provided, it will\n be converted into a sparse ``csr_matrix``.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weight of each sample, such that a sample with a weight of at least\n ``min_samples`` is by itself a core sample; a sample with a\n negative weight may inhibit its eps-neighbor from being core.\n Note that weights are absolute, and default to 1.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Cluster labels. Noisy samples are given the label -1.", + "code": " def fit_predict(self, X, y=None, sample_weight=None):\n \"\"\"Perform DBSCAN clustering from features or distance matrix,\n and return cluster labels.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features), or \\\n (n_samples, n_samples)\n Training instances to cluster, or distances between instances if\n ``metric='precomputed'``. If a sparse matrix is provided, it will\n be converted into a sparse ``csr_matrix``.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Weight of each sample, such that a sample with a weight of at least\n ``min_samples`` is by itself a core sample; a sample with a\n negative weight may inhibit its eps-neighbor from being core.\n Note that weights are absolute, and default to 1.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n Returns\n -------\n labels : ndarray of shape (n_samples,)\n Cluster labels. Noisy samples are given the label -1.\n \"\"\"\n self.fit(X, sample_weight=sample_weight)\n return self.labels_" + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/dbscan", + "name": "dbscan", + "qname": "sklearn.cluster._dbscan.dbscan", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._dbscan/dbscan/X", + "name": "X", + "qname": "sklearn.cluster._dbscan.dbscan.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse (CSR) matrix} of shape (n_samples, n_features) or (n_samples, n_samples)", + "default_value": "", + "description": "A feature array, or array of distances between samples if\n``metric='precomputed'``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features) or (n_samples, n_samples)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/dbscan/eps", + "name": "eps", + "qname": "sklearn.cluster._dbscan.dbscan.eps", + "default_value": "0.5", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.5", + "description": "The maximum distance between two samples for one to be considered\nas in the neighborhood of the other. This is not a maximum bound\non the distances of points within a cluster. This is the most\nimportant DBSCAN parameter to choose appropriately for your data set\nand distance function." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/dbscan/min_samples", + "name": "min_samples", + "qname": "sklearn.cluster._dbscan.dbscan.min_samples", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "The number of samples (or total weight) in a neighborhood for a point\nto be considered as a core point. This includes the point itself." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/dbscan/metric", + "name": "metric", + "qname": "sklearn.cluster._dbscan.dbscan.metric", + "default_value": "'minkowski'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "'minkowski'", + "description": "The metric to use when calculating distance between instances in a\nfeature array. If metric is a string or callable, it must be one of\nthe options allowed by :func:`sklearn.metrics.pairwise_distances` for\nits metric parameter.\nIf metric is \"precomputed\", X is assumed to be a distance matrix and\nmust be square during fit.\nX may be a :term:`sparse graph `,\nin which case only \"nonzero\" elements may be considered neighbors." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/dbscan/metric_params", + "name": "metric_params", + "qname": "sklearn.cluster._dbscan.dbscan.metric_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Additional keyword arguments for the metric function.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/dbscan/algorithm", + "name": "algorithm", + "qname": "sklearn.cluster._dbscan.dbscan.algorithm", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'ball_tree', 'kd_tree', 'brute'}", + "default_value": "'auto'", + "description": "The algorithm to be used by the NearestNeighbors module\nto compute pointwise distances and find nearest neighbors.\nSee NearestNeighbors module documentation for details." + }, + "type": { + "kind": "EnumType", + "values": ["kd_tree", "auto", "brute", "ball_tree"] + } + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/dbscan/leaf_size", + "name": "leaf_size", + "qname": "sklearn.cluster._dbscan.dbscan.leaf_size", + "default_value": "30", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "30", + "description": "Leaf size passed to BallTree or cKDTree. This can affect the speed\nof the construction and query, as well as the memory required\nto store the tree. The optimal value depends\non the nature of the problem." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/dbscan/p", + "name": "p", + "qname": "sklearn.cluster._dbscan.dbscan.p", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "2", + "description": "The power of the Minkowski metric to be used to calculate distance\nbetween points." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/dbscan/sample_weight", + "name": "sample_weight", + "qname": "sklearn.cluster._dbscan.dbscan.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Weight of each sample, such that a sample with a weight of at least\n``min_samples`` is by itself a core sample; a sample with negative\nweight may inhibit its eps-neighbor from being core.\nNote that weights are absolute, and default to 1." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._dbscan/dbscan/n_jobs", + "name": "n_jobs", + "qname": "sklearn.cluster._dbscan.dbscan.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of parallel jobs to run for neighbors search. ``None`` means\n1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means\nusing all processors. See :term:`Glossary ` for more details.\nIf precomputed distance are used, parallel execution is not available\nand thus n_jobs will have no effect." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform DBSCAN clustering from vector array or distance matrix.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Perform DBSCAN clustering from vector array or distance matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse (CSR) matrix} of shape (n_samples, n_features) or (n_samples, n_samples)\n A feature array, or array of distances between samples if\n ``metric='precomputed'``.\n\neps : float, default=0.5\n The maximum distance between two samples for one to be considered\n as in the neighborhood of the other. This is not a maximum bound\n on the distances of points within a cluster. This is the most\n important DBSCAN parameter to choose appropriately for your data set\n and distance function.\n\nmin_samples : int, default=5\n The number of samples (or total weight) in a neighborhood for a point\n to be considered as a core point. This includes the point itself.\n\nmetric : str or callable, default='minkowski'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string or callable, it must be one of\n the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n its metric parameter.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit.\n X may be a :term:`sparse graph `,\n in which case only \"nonzero\" elements may be considered neighbors.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\n .. versionadded:: 0.19\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n The algorithm to be used by the NearestNeighbors module\n to compute pointwise distances and find nearest neighbors.\n See NearestNeighbors module documentation for details.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or cKDTree. This can affect the speed\n of the construction and query, as well as the memory required\n to store the tree. The optimal value depends\n on the nature of the problem.\n\np : float, default=2\n The power of the Minkowski metric to be used to calculate distance\n between points.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weight of each sample, such that a sample with a weight of at least\n ``min_samples`` is by itself a core sample; a sample with negative\n weight may inhibit its eps-neighbor from being core.\n Note that weights are absolute, and default to 1.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search. ``None`` means\n 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means\n using all processors. See :term:`Glossary ` for more details.\n If precomputed distance are used, parallel execution is not available\n and thus n_jobs will have no effect.\n\nReturns\n-------\ncore_samples : ndarray of shape (n_core_samples,)\n Indices of core samples.\n\nlabels : ndarray of shape (n_samples,)\n Cluster labels for each point. Noisy samples are given the label -1.\n\nSee Also\n--------\nDBSCAN : An estimator interface for this clustering algorithm.\nOPTICS : A similar estimator interface clustering at multiple values of\n eps. Our implementation is optimized for memory usage.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_dbscan.py\n`.\n\nThis implementation bulk-computes all neighborhood queries, which increases\nthe memory complexity to O(n.d) where d is the average number of neighbors,\nwhile original DBSCAN had memory complexity O(n). It may attract a higher\nmemory complexity when querying these nearest neighborhoods, depending\non the ``algorithm``.\n\nOne way to avoid the query complexity is to pre-compute sparse\nneighborhoods in chunks using\n:func:`NearestNeighbors.radius_neighbors_graph\n` with\n``mode='distance'``, then using ``metric='precomputed'`` here.\n\nAnother way to reduce memory and computation time is to remove\n(near-)duplicate points and use ``sample_weight`` instead.\n\n:func:`cluster.optics ` provides a similar\nclustering with lower memory usage.\n\nReferences\n----------\nEster, M., H. P. Kriegel, J. Sander, and X. Xu, \"A Density-Based\nAlgorithm for Discovering Clusters in Large Spatial Databases with Noise\".\nIn: Proceedings of the 2nd International Conference on Knowledge Discovery\nand Data Mining, Portland, OR, AAAI Press, pp. 226-231. 1996\n\nSchubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017).\nDBSCAN revisited, revisited: why and how you should (still) use DBSCAN.\nACM Transactions on Database Systems (TODS), 42(3), 19.", + "code": "@_deprecate_positional_args\ndef dbscan(X, eps=0.5, *, min_samples=5, metric='minkowski',\n metric_params=None, algorithm='auto', leaf_size=30, p=2,\n sample_weight=None, n_jobs=None):\n \"\"\"Perform DBSCAN clustering from vector array or distance matrix.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : {array-like, sparse (CSR) matrix} of shape (n_samples, n_features) or \\\n (n_samples, n_samples)\n A feature array, or array of distances between samples if\n ``metric='precomputed'``.\n\n eps : float, default=0.5\n The maximum distance between two samples for one to be considered\n as in the neighborhood of the other. This is not a maximum bound\n on the distances of points within a cluster. This is the most\n important DBSCAN parameter to choose appropriately for your data set\n and distance function.\n\n min_samples : int, default=5\n The number of samples (or total weight) in a neighborhood for a point\n to be considered as a core point. This includes the point itself.\n\n metric : str or callable, default='minkowski'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string or callable, it must be one of\n the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n its metric parameter.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit.\n X may be a :term:`sparse graph `,\n in which case only \"nonzero\" elements may be considered neighbors.\n\n metric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\n .. versionadded:: 0.19\n\n algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n The algorithm to be used by the NearestNeighbors module\n to compute pointwise distances and find nearest neighbors.\n See NearestNeighbors module documentation for details.\n\n leaf_size : int, default=30\n Leaf size passed to BallTree or cKDTree. This can affect the speed\n of the construction and query, as well as the memory required\n to store the tree. The optimal value depends\n on the nature of the problem.\n\n p : float, default=2\n The power of the Minkowski metric to be used to calculate distance\n between points.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Weight of each sample, such that a sample with a weight of at least\n ``min_samples`` is by itself a core sample; a sample with negative\n weight may inhibit its eps-neighbor from being core.\n Note that weights are absolute, and default to 1.\n\n n_jobs : int, default=None\n The number of parallel jobs to run for neighbors search. ``None`` means\n 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means\n using all processors. See :term:`Glossary ` for more details.\n If precomputed distance are used, parallel execution is not available\n and thus n_jobs will have no effect.\n\n Returns\n -------\n core_samples : ndarray of shape (n_core_samples,)\n Indices of core samples.\n\n labels : ndarray of shape (n_samples,)\n Cluster labels for each point. Noisy samples are given the label -1.\n\n See Also\n --------\n DBSCAN : An estimator interface for this clustering algorithm.\n OPTICS : A similar estimator interface clustering at multiple values of\n eps. Our implementation is optimized for memory usage.\n\n Notes\n -----\n For an example, see :ref:`examples/cluster/plot_dbscan.py\n `.\n\n This implementation bulk-computes all neighborhood queries, which increases\n the memory complexity to O(n.d) where d is the average number of neighbors,\n while original DBSCAN had memory complexity O(n). It may attract a higher\n memory complexity when querying these nearest neighborhoods, depending\n on the ``algorithm``.\n\n One way to avoid the query complexity is to pre-compute sparse\n neighborhoods in chunks using\n :func:`NearestNeighbors.radius_neighbors_graph\n ` with\n ``mode='distance'``, then using ``metric='precomputed'`` here.\n\n Another way to reduce memory and computation time is to remove\n (near-)duplicate points and use ``sample_weight`` instead.\n\n :func:`cluster.optics ` provides a similar\n clustering with lower memory usage.\n\n References\n ----------\n Ester, M., H. P. Kriegel, J. Sander, and X. Xu, \"A Density-Based\n Algorithm for Discovering Clusters in Large Spatial Databases with Noise\".\n In: Proceedings of the 2nd International Conference on Knowledge Discovery\n and Data Mining, Portland, OR, AAAI Press, pp. 226-231. 1996\n\n Schubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017).\n DBSCAN revisited, revisited: why and how you should (still) use DBSCAN.\n ACM Transactions on Database Systems (TODS), 42(3), 19.\n \"\"\"\n\n est = DBSCAN(eps=eps, min_samples=min_samples, metric=metric,\n metric_params=metric_params, algorithm=algorithm,\n leaf_size=leaf_size, p=p, n_jobs=n_jobs)\n est.fit(X, sample_weight=sample_weight)\n return est.core_sample_indices_, est.labels_" + }, + { + "id": "scikit-learn/sklearn.cluster._feature_agglomeration/AgglomerationTransform/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.cluster._feature_agglomeration.AgglomerationTransform.inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._feature_agglomeration/AgglomerationTransform/inverse_transform/self", + "name": "self", + "qname": "sklearn.cluster._feature_agglomeration.AgglomerationTransform.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._feature_agglomeration/AgglomerationTransform/inverse_transform/Xred", + "name": "Xred", + "qname": "sklearn.cluster._feature_agglomeration.AgglomerationTransform.inverse_transform.Xred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_clusters) or (n_clusters,)", + "default_value": "", + "description": "The values to be assigned to each cluster of samples" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_clusters) or (n_clusters,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Inverse the transformation.\nReturn a vector of size nb_features with the values of Xred assigned\nto each group of features", + "docstring": "Inverse the transformation.\nReturn a vector of size nb_features with the values of Xred assigned\nto each group of features\n\nParameters\n----------\nXred : array-like of shape (n_samples, n_clusters) or (n_clusters,)\n The values to be assigned to each cluster of samples\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features) or (n_features,)\n A vector of size n_samples with the values of Xred assigned to\n each of the cluster of samples.", + "code": " def inverse_transform(self, Xred):\n \"\"\"\n Inverse the transformation.\n Return a vector of size nb_features with the values of Xred assigned\n to each group of features\n\n Parameters\n ----------\n Xred : array-like of shape (n_samples, n_clusters) or (n_clusters,)\n The values to be assigned to each cluster of samples\n\n Returns\n -------\n X : ndarray of shape (n_samples, n_features) or (n_features,)\n A vector of size n_samples with the values of Xred assigned to\n each of the cluster of samples.\n \"\"\"\n check_is_fitted(self)\n\n unil, inverse = np.unique(self.labels_, return_inverse=True)\n return Xred[..., inverse]" + }, + { + "id": "scikit-learn/sklearn.cluster._feature_agglomeration/AgglomerationTransform/transform", + "name": "transform", + "qname": "sklearn.cluster._feature_agglomeration.AgglomerationTransform.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._feature_agglomeration/AgglomerationTransform/transform/self", + "name": "self", + "qname": "sklearn.cluster._feature_agglomeration.AgglomerationTransform.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._feature_agglomeration/AgglomerationTransform/transform/X", + "name": "X", + "qname": "sklearn.cluster._feature_agglomeration.AgglomerationTransform.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features) or (n_samples,)", + "default_value": "", + "description": "A M by N array of M observations in N dimensions or a length\nM array of M one-dimensional observations." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features) or (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform a new matrix using the built clustering", + "docstring": "Transform a new matrix using the built clustering\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or (n_samples,)\n A M by N array of M observations in N dimensions or a length\n M array of M one-dimensional observations.\n\nReturns\n-------\nY : ndarray of shape (n_samples, n_clusters) or (n_clusters,)\n The pooled values for each feature cluster.", + "code": " def transform(self, X):\n \"\"\"\n Transform a new matrix using the built clustering\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or (n_samples,)\n A M by N array of M observations in N dimensions or a length\n M array of M one-dimensional observations.\n\n Returns\n -------\n Y : ndarray of shape (n_samples, n_clusters) or (n_clusters,)\n The pooled values for each feature cluster.\n \"\"\"\n check_is_fitted(self)\n\n X = self._validate_data(X, reset=False)\n if self.pooling_func == np.mean and not issparse(X):\n size = np.bincount(self.labels_)\n n_samples = X.shape[0]\n # a fast way to compute the mean of grouped features\n nX = np.array([np.bincount(self.labels_, X[i, :]) / size\n for i in range(n_samples)])\n else:\n nX = [self.pooling_func(X[:, self.labels_ == l], axis=1)\n for l in np.unique(self.labels_)]\n nX = np.array(nX).T\n return nX" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/__init__", + "name": "__init__", + "qname": "sklearn.cluster._kmeans.KMeans.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/__init__/self", + "name": "self", + "qname": "sklearn.cluster._kmeans.KMeans.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/__init__/n_clusters", + "name": "n_clusters", + "qname": "sklearn.cluster._kmeans.KMeans.__init__.n_clusters", + "default_value": "8", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "8", + "description": "The number of clusters to form as well as the number of\ncentroids to generate." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/__init__/init", + "name": "init", + "qname": "sklearn.cluster._kmeans.KMeans.__init__.init", + "default_value": "'k-means++'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'k-means++', 'random'}, callable or array-like of shape (n_clusters, n_features)", + "default_value": "'k-means++'", + "description": "Method for initialization:\n\n'k-means++' : selects initial cluster centers for k-mean\nclustering in a smart way to speed up convergence. See section\nNotes in k_init for more details.\n\n'random': choose `n_clusters` observations (rows) at random from data\nfor the initial centroids.\n\nIf an array is passed, it should be of shape (n_clusters, n_features)\nand gives the initial centers.\n\nIf a callable is passed, it should take arguments X, n_clusters and a\nrandom state and return an initialization." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["random", "k-means++"] + }, + { + "kind": "NamedType", + "name": "callable" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_clusters, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/__init__/n_init", + "name": "n_init", + "qname": "sklearn.cluster._kmeans.KMeans.__init__.n_init", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Number of time the k-means algorithm will be run with different\ncentroid seeds. The final results will be the best output of\nn_init consecutive runs in terms of inertia." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.cluster._kmeans.KMeans.__init__.max_iter", + "default_value": "300", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "300", + "description": "Maximum number of iterations of the k-means algorithm for a\nsingle run." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/__init__/tol", + "name": "tol", + "qname": "sklearn.cluster._kmeans.KMeans.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Relative tolerance with regards to Frobenius norm of the difference\nin the cluster centers of two consecutive iterations to declare\nconvergence." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/__init__/precompute_distances", + "name": "precompute_distances", + "qname": "sklearn.cluster._kmeans.KMeans.__init__.precompute_distances", + "default_value": "'deprecated'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', True, False}", + "default_value": "'auto'", + "description": "Precompute distances (faster but takes more memory).\n\n'auto' : do not precompute distances if n_samples * n_clusters > 12\nmillion. This corresponds to about 100MB overhead per job using\ndouble precision.\n\nTrue : always precompute distances.\n\nFalse : never precompute distances.\n\n.. deprecated:: 0.23\n 'precompute_distances' was deprecated in version 0.22 and will be\n removed in 1.0 (renaming of 0.25). It has no effect." + }, + "type": { + "kind": "EnumType", + "values": ["auto"] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/__init__/verbose", + "name": "verbose", + "qname": "sklearn.cluster._kmeans.KMeans.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Verbosity mode." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/__init__/random_state", + "name": "random_state", + "qname": "sklearn.cluster._kmeans.KMeans.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for centroid initialization. Use\nan int to make the randomness deterministic.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/__init__/copy_x", + "name": "copy_x", + "qname": "sklearn.cluster._kmeans.KMeans.__init__.copy_x", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "When pre-computing distances it is more numerically accurate to center\nthe data first. If copy_x is True (default), then the original data is\nnot modified. If False, the original data is modified, and put back\nbefore the function returns, but small numerical differences may be\nintroduced by subtracting and then adding the data mean. Note that if\nthe original data is not C-contiguous, a copy will be made even if\ncopy_x is False. If the original data is sparse, but not in CSR format,\na copy will be made even if copy_x is False." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.cluster._kmeans.KMeans.__init__.n_jobs", + "default_value": "'deprecated'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of OpenMP threads to use for the computation. Parallelism is\nsample-wise on the main cython loop which assigns each sample to its\nclosest center.\n\n``None`` or ``-1`` means using all processors.\n\n.. deprecated:: 0.23\n ``n_jobs`` was deprecated in version 0.23 and will be removed in\n 1.0 (renaming of 0.25)." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/__init__/algorithm", + "name": "algorithm", + "qname": "sklearn.cluster._kmeans.KMeans.__init__.algorithm", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{\"auto\", \"full\", \"elkan\"}", + "default_value": "\"auto\"", + "description": "K-means algorithm to use. The classical EM-style algorithm is \"full\".\nThe \"elkan\" variation is more efficient on data with well-defined\nclusters, by using the triangle inequality. However it's more memory\nintensive due to the allocation of an extra array of shape\n(n_samples, n_clusters).\n\nFor now \"auto\" (kept for backward compatibiliy) chooses \"elkan\" but it\nmight change in the future for a better heuristic.\n\n.. versionchanged:: 0.18\n Added Elkan algorithm" + }, + "type": { + "kind": "EnumType", + "values": ["full", "auto", "elkan"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "K-Means clustering.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_clusters=8, *, init='k-means++', n_init=10,\n max_iter=300, tol=1e-4, precompute_distances='deprecated',\n verbose=0, random_state=None, copy_x=True,\n n_jobs='deprecated', algorithm='auto'):\n\n self.n_clusters = n_clusters\n self.init = init\n self.max_iter = max_iter\n self.tol = tol\n self.precompute_distances = precompute_distances\n self.n_init = n_init\n self.verbose = verbose\n self.random_state = random_state\n self.copy_x = copy_x\n self.n_jobs = n_jobs\n self.algorithm = algorithm" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_check_mkl_vcomp", + "name": "_check_mkl_vcomp", + "qname": "sklearn.cluster._kmeans.KMeans._check_mkl_vcomp", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_check_mkl_vcomp/self", + "name": "self", + "qname": "sklearn.cluster._kmeans.KMeans._check_mkl_vcomp.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_check_mkl_vcomp/X", + "name": "X", + "qname": "sklearn.cluster._kmeans.KMeans._check_mkl_vcomp.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_check_mkl_vcomp/n_samples", + "name": "n_samples", + "qname": "sklearn.cluster._kmeans.KMeans._check_mkl_vcomp.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Warns when vcomp and mkl are both present", + "docstring": "Warns when vcomp and mkl are both present", + "code": " def _check_mkl_vcomp(self, X, n_samples):\n \"\"\"Warns when vcomp and mkl are both present\"\"\"\n # The BLAS call inside a prange in lloyd_iter_chunked_dense is known to\n # cause a small memory leak when there are less chunks than the number\n # of available threads. It only happens when the OpenMP library is\n # vcomp (microsoft OpenMP) and the BLAS library is MKL. see #18653\n if sp.issparse(X):\n return\n\n active_threads = int(np.ceil(n_samples / CHUNK_SIZE))\n if active_threads < self._n_threads:\n modules = threadpool_info()\n has_vcomp = \"vcomp\" in [module[\"prefix\"] for module in modules]\n has_mkl = (\"mkl\", \"intel\") in [\n (module[\"internal_api\"], module.get(\"threading_layer\", None))\n for module in modules]\n if has_vcomp and has_mkl:\n if not hasattr(self, \"batch_size\"): # KMeans\n warnings.warn(\n f\"KMeans is known to have a memory leak on Windows \"\n f\"with MKL, when there are less chunks than available \"\n f\"threads. You can avoid it by setting the environment\"\n f\" variable OMP_NUM_THREADS={active_threads}.\")\n else: # MiniBatchKMeans\n warnings.warn(\n f\"MiniBatchKMeans is known to have a memory leak on \"\n f\"Windows with MKL, when there are less chunks than \"\n f\"available threads. You can prevent it by setting \"\n f\"batch_size >= {self._n_threads * CHUNK_SIZE} or by \"\n f\"setting the environment variable \"\n f\"OMP_NUM_THREADS={active_threads}\")" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_check_params", + "name": "_check_params", + "qname": "sklearn.cluster._kmeans.KMeans._check_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_check_params/self", + "name": "self", + "qname": "sklearn.cluster._kmeans.KMeans._check_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_check_params/X", + "name": "X", + "qname": "sklearn.cluster._kmeans.KMeans._check_params.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_params(self, X):\n # precompute_distances\n if self.precompute_distances != 'deprecated':\n warnings.warn(\"'precompute_distances' was deprecated in version \"\n \"0.23 and will be removed in 1.0 (renaming of 0.25)\"\n \". It has no effect\", FutureWarning)\n\n # n_jobs\n if self.n_jobs != 'deprecated':\n warnings.warn(\"'n_jobs' was deprecated in version 0.23 and will be\"\n \" removed in 1.0 (renaming of 0.25).\", FutureWarning)\n self._n_threads = self.n_jobs\n else:\n self._n_threads = None\n self._n_threads = _openmp_effective_n_threads(self._n_threads)\n\n # n_init\n if self.n_init <= 0:\n raise ValueError(\n f\"n_init should be > 0, got {self.n_init} instead.\")\n self._n_init = self.n_init\n\n # max_iter\n if self.max_iter <= 0:\n raise ValueError(\n f\"max_iter should be > 0, got {self.max_iter} instead.\")\n\n # n_clusters\n if X.shape[0] < self.n_clusters:\n raise ValueError(f\"n_samples={X.shape[0]} should be >= \"\n f\"n_clusters={self.n_clusters}.\")\n\n # tol\n self._tol = _tolerance(X, self.tol)\n\n # algorithm\n if self.algorithm not in (\"auto\", \"full\", \"elkan\"):\n raise ValueError(f\"Algorithm must be 'auto', 'full' or 'elkan', \"\n f\"got {self.algorithm} instead.\")\n\n self._algorithm = self.algorithm\n if self._algorithm == \"auto\":\n self._algorithm = \"full\" if self.n_clusters == 1 else \"elkan\"\n if self._algorithm == \"elkan\" and self.n_clusters == 1:\n warnings.warn(\"algorithm='elkan' doesn't make sense for a single \"\n \"cluster. Using 'full' instead.\", RuntimeWarning)\n self._algorithm = \"full\"\n\n # init\n if not (hasattr(self.init, '__array__') or callable(self.init)\n or (isinstance(self.init, str)\n and self.init in [\"k-means++\", \"random\"])):\n raise ValueError(\n f\"init should be either 'k-means++', 'random', a ndarray or a \"\n f\"callable, got '{self.init}' instead.\")\n\n if hasattr(self.init, '__array__') and self._n_init != 1:\n warnings.warn(\n f\"Explicit initial center position passed: performing only\"\n f\" one init in {self.__class__.__name__} instead of \"\n f\"n_init={self._n_init}.\", RuntimeWarning, stacklevel=2)\n self._n_init = 1" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_check_test_data", + "name": "_check_test_data", + "qname": "sklearn.cluster._kmeans.KMeans._check_test_data", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_check_test_data/self", + "name": "self", + "qname": "sklearn.cluster._kmeans.KMeans._check_test_data.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_check_test_data/X", + "name": "X", + "qname": "sklearn.cluster._kmeans.KMeans._check_test_data.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_test_data(self, X):\n X = self._validate_data(X, accept_sparse='csr', reset=False,\n dtype=[np.float64, np.float32],\n order='C', accept_large_sparse=False)\n return X" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_init_centroids", + "name": "_init_centroids", + "qname": "sklearn.cluster._kmeans.KMeans._init_centroids", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_init_centroids/self", + "name": "self", + "qname": "sklearn.cluster._kmeans.KMeans._init_centroids.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_init_centroids/X", + "name": "X", + "qname": "sklearn.cluster._kmeans.KMeans._init_centroids.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{ndarray, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_init_centroids/x_squared_norms", + "name": "x_squared_norms", + "qname": "sklearn.cluster._kmeans.KMeans._init_centroids.x_squared_norms", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Squared euclidean norm of each data point. Pass it if you have it\nat hands already to avoid it being recomputed here." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_init_centroids/init", + "name": "init", + "qname": "sklearn.cluster._kmeans.KMeans._init_centroids.init", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'k-means++', 'random'}, callable or ndarray of shape (n_clusters, n_features)", + "default_value": "", + "description": "Method for initialization." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["random", "k-means++"] + }, + { + "kind": "NamedType", + "name": "callable" + }, + { + "kind": "NamedType", + "name": "ndarray of shape (n_clusters, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_init_centroids/random_state", + "name": "random_state", + "qname": "sklearn.cluster._kmeans.KMeans._init_centroids.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "RandomState instance", + "default_value": "", + "description": "Determines random number generation for centroid initialization.\nSee :term:`Glossary `." + }, + "type": { + "kind": "NamedType", + "name": "RandomState instance" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_init_centroids/init_size", + "name": "init_size", + "qname": "sklearn.cluster._kmeans.KMeans._init_centroids.init_size", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of samples to randomly sample for speeding up the\ninitialization (sometimes at the expense of accuracy)." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the initial centroids.", + "docstring": "Compute the initial centroids.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\nx_squared_norms : ndarray of shape (n_samples,)\n Squared euclidean norm of each data point. Pass it if you have it\n at hands already to avoid it being recomputed here.\n\ninit : {'k-means++', 'random'}, callable or ndarray of shape (n_clusters, n_features)\n Method for initialization.\n\nrandom_state : RandomState instance\n Determines random number generation for centroid initialization.\n See :term:`Glossary `.\n\ninit_size : int, default=None\n Number of samples to randomly sample for speeding up the\n initialization (sometimes at the expense of accuracy).\n\nReturns\n-------\ncenters : ndarray of shape (n_clusters, n_features)", + "code": " def _init_centroids(self, X, x_squared_norms, init, random_state,\n init_size=None):\n \"\"\"Compute the initial centroids.\n\n Parameters\n ----------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\n x_squared_norms : ndarray of shape (n_samples,)\n Squared euclidean norm of each data point. Pass it if you have it\n at hands already to avoid it being recomputed here.\n\n init : {'k-means++', 'random'}, callable or ndarray of shape \\\n (n_clusters, n_features)\n Method for initialization.\n\n random_state : RandomState instance\n Determines random number generation for centroid initialization.\n See :term:`Glossary `.\n\n init_size : int, default=None\n Number of samples to randomly sample for speeding up the\n initialization (sometimes at the expense of accuracy).\n\n Returns\n -------\n centers : ndarray of shape (n_clusters, n_features)\n \"\"\"\n n_samples = X.shape[0]\n n_clusters = self.n_clusters\n\n if init_size is not None and init_size < n_samples:\n init_indices = random_state.randint(0, n_samples, init_size)\n X = X[init_indices]\n x_squared_norms = x_squared_norms[init_indices]\n n_samples = X.shape[0]\n\n if isinstance(init, str) and init == 'k-means++':\n centers, _ = _kmeans_plusplus(X, n_clusters,\n random_state=random_state,\n x_squared_norms=x_squared_norms)\n elif isinstance(init, str) and init == 'random':\n seeds = random_state.permutation(n_samples)[:n_clusters]\n centers = X[seeds]\n elif hasattr(init, '__array__'):\n centers = init\n elif callable(init):\n centers = init(X, n_clusters, random_state=random_state)\n centers = check_array(\n centers, dtype=X.dtype, copy=False, order='C')\n self._validate_center_shape(X, centers)\n\n if sp.issparse(centers):\n centers = centers.toarray()\n\n return centers" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_more_tags", + "name": "_more_tags", + "qname": "sklearn.cluster._kmeans.KMeans._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_more_tags/self", + "name": "self", + "qname": "sklearn.cluster._kmeans.KMeans._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n },\n }" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_transform", + "name": "_transform", + "qname": "sklearn.cluster._kmeans.KMeans._transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_transform/self", + "name": "self", + "qname": "sklearn.cluster._kmeans.KMeans._transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_transform/X", + "name": "X", + "qname": "sklearn.cluster._kmeans.KMeans._transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Guts of transform method; no input validation.", + "docstring": "Guts of transform method; no input validation.", + "code": " def _transform(self, X):\n \"\"\"Guts of transform method; no input validation.\"\"\"\n return euclidean_distances(X, self.cluster_centers_)" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_validate_center_shape", + "name": "_validate_center_shape", + "qname": "sklearn.cluster._kmeans.KMeans._validate_center_shape", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_validate_center_shape/self", + "name": "self", + "qname": "sklearn.cluster._kmeans.KMeans._validate_center_shape.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_validate_center_shape/X", + "name": "X", + "qname": "sklearn.cluster._kmeans.KMeans._validate_center_shape.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/_validate_center_shape/centers", + "name": "centers", + "qname": "sklearn.cluster._kmeans.KMeans._validate_center_shape.centers", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check if centers is compatible with X and n_clusters.", + "docstring": "Check if centers is compatible with X and n_clusters.", + "code": " def _validate_center_shape(self, X, centers):\n \"\"\"Check if centers is compatible with X and n_clusters.\"\"\"\n if centers.shape[0] != self.n_clusters:\n raise ValueError(\n f\"The shape of the initial centers {centers.shape} does not \"\n f\"match the number of clusters {self.n_clusters}.\")\n if centers.shape[1] != X.shape[1]:\n raise ValueError(\n f\"The shape of the initial centers {centers.shape} does not \"\n f\"match the number of features of the data {X.shape[1]}.\")" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/fit", + "name": "fit", + "qname": "sklearn.cluster._kmeans.KMeans.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/fit/self", + "name": "self", + "qname": "sklearn.cluster._kmeans.KMeans.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/fit/X", + "name": "X", + "qname": "sklearn.cluster._kmeans.KMeans.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training instances to cluster. It must be noted that the data\nwill be converted to C ordering, which will cause a memory\ncopy if the given data is not C-contiguous.\nIf a sparse matrix is passed, a copy will be made if it's not in\nCSR format." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/fit/y", + "name": "y", + "qname": "sklearn.cluster._kmeans.KMeans.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present here for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.cluster._kmeans.KMeans.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "The weights for each observation in X. If None, all observations\nare assigned equal weight.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute k-means clustering.", + "docstring": "Compute k-means clustering.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training instances to cluster. It must be noted that the data\n will be converted to C ordering, which will cause a memory\n copy if the given data is not C-contiguous.\n If a sparse matrix is passed, a copy will be made if it's not in\n CSR format.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\n .. versionadded:: 0.20\n\nReturns\n-------\nself\n Fitted estimator.", + "code": " def fit(self, X, y=None, sample_weight=None):\n \"\"\"Compute k-means clustering.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training instances to cluster. It must be noted that the data\n will be converted to C ordering, which will cause a memory\n copy if the given data is not C-contiguous.\n If a sparse matrix is passed, a copy will be made if it's not in\n CSR format.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n sample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\n .. versionadded:: 0.20\n\n Returns\n -------\n self\n Fitted estimator.\n \"\"\"\n X = self._validate_data(X, accept_sparse='csr',\n dtype=[np.float64, np.float32],\n order='C', copy=self.copy_x,\n accept_large_sparse=False)\n\n self._check_params(X)\n random_state = check_random_state(self.random_state)\n sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n # Validate init array\n init = self.init\n if hasattr(init, '__array__'):\n init = check_array(init, dtype=X.dtype, copy=True, order='C')\n self._validate_center_shape(X, init)\n\n # subtract of mean of x for more accurate distance computations\n if not sp.issparse(X):\n X_mean = X.mean(axis=0)\n # The copy was already done above\n X -= X_mean\n\n if hasattr(init, '__array__'):\n init -= X_mean\n\n # precompute squared norms of data points\n x_squared_norms = row_norms(X, squared=True)\n\n if self._algorithm == \"full\":\n kmeans_single = _kmeans_single_lloyd\n self._check_mkl_vcomp(X, X.shape[0])\n else:\n kmeans_single = _kmeans_single_elkan\n\n best_inertia = None\n\n for i in range(self._n_init):\n # Initialize centers\n centers_init = self._init_centroids(\n X, x_squared_norms=x_squared_norms, init=init,\n random_state=random_state)\n if self.verbose:\n print(\"Initialization complete\")\n\n # run a k-means once\n labels, inertia, centers, n_iter_ = kmeans_single(\n X, sample_weight, centers_init, max_iter=self.max_iter,\n verbose=self.verbose, tol=self._tol,\n x_squared_norms=x_squared_norms, n_threads=self._n_threads)\n\n # determine if these results are the best so far\n if best_inertia is None or inertia < best_inertia:\n best_labels = labels\n best_centers = centers\n best_inertia = inertia\n best_n_iter = n_iter_\n\n if not sp.issparse(X):\n if not self.copy_x:\n X += X_mean\n best_centers += X_mean\n\n distinct_clusters = len(set(best_labels))\n if distinct_clusters < self.n_clusters:\n warnings.warn(\n \"Number of distinct clusters ({}) found smaller than \"\n \"n_clusters ({}). Possibly due to duplicate points \"\n \"in X.\".format(distinct_clusters, self.n_clusters),\n ConvergenceWarning, stacklevel=2)\n\n self.cluster_centers_ = best_centers\n self.labels_ = best_labels\n self.inertia_ = best_inertia\n self.n_iter_ = best_n_iter\n return self" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/fit_predict", + "name": "fit_predict", + "qname": "sklearn.cluster._kmeans.KMeans.fit_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/fit_predict/self", + "name": "self", + "qname": "sklearn.cluster._kmeans.KMeans.fit_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/fit_predict/X", + "name": "X", + "qname": "sklearn.cluster._kmeans.KMeans.fit_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "New data to transform." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/fit_predict/y", + "name": "y", + "qname": "sklearn.cluster._kmeans.KMeans.fit_predict.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present here for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/fit_predict/sample_weight", + "name": "sample_weight", + "qname": "sklearn.cluster._kmeans.KMeans.fit_predict.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "The weights for each observation in X. If None, all observations\nare assigned equal weight." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute cluster centers and predict cluster index for each sample.\n\nConvenience method; equivalent to calling fit(X) followed by\npredict(X).", + "docstring": "Compute cluster centers and predict cluster index for each sample.\n\nConvenience method; equivalent to calling fit(X) followed by\npredict(X).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to transform.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Index of the cluster each sample belongs to.", + "code": " def fit_predict(self, X, y=None, sample_weight=None):\n \"\"\"Compute cluster centers and predict cluster index for each sample.\n\n Convenience method; equivalent to calling fit(X) followed by\n predict(X).\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to transform.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n sample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\n Returns\n -------\n labels : ndarray of shape (n_samples,)\n Index of the cluster each sample belongs to.\n \"\"\"\n return self.fit(X, sample_weight=sample_weight).labels_" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/fit_transform", + "name": "fit_transform", + "qname": "sklearn.cluster._kmeans.KMeans.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/fit_transform/self", + "name": "self", + "qname": "sklearn.cluster._kmeans.KMeans.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/fit_transform/X", + "name": "X", + "qname": "sklearn.cluster._kmeans.KMeans.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "New data to transform." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/fit_transform/y", + "name": "y", + "qname": "sklearn.cluster._kmeans.KMeans.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present here for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/fit_transform/sample_weight", + "name": "sample_weight", + "qname": "sklearn.cluster._kmeans.KMeans.fit_transform.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "The weights for each observation in X. If None, all observations\nare assigned equal weight." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute clustering and transform X to cluster-distance space.\n\nEquivalent to fit(X).transform(X), but more efficiently implemented.", + "docstring": "Compute clustering and transform X to cluster-distance space.\n\nEquivalent to fit(X).transform(X), but more efficiently implemented.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to transform.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_clusters)\n X transformed in the new space.", + "code": " def fit_transform(self, X, y=None, sample_weight=None):\n \"\"\"Compute clustering and transform X to cluster-distance space.\n\n Equivalent to fit(X).transform(X), but more efficiently implemented.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to transform.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n sample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_clusters)\n X transformed in the new space.\n \"\"\"\n # Currently, this just skips a copy of the data if it is not in\n # np.array or CSR format already.\n # XXX This skips _check_test_data, which may change the dtype;\n # we should refactor the input validation.\n return self.fit(X, sample_weight=sample_weight)._transform(X)" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/predict", + "name": "predict", + "qname": "sklearn.cluster._kmeans.KMeans.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/predict/self", + "name": "self", + "qname": "sklearn.cluster._kmeans.KMeans.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/predict/X", + "name": "X", + "qname": "sklearn.cluster._kmeans.KMeans.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "New data to predict." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/predict/sample_weight", + "name": "sample_weight", + "qname": "sklearn.cluster._kmeans.KMeans.predict.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "The weights for each observation in X. If None, all observations\nare assigned equal weight." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict the closest cluster each sample in X belongs to.\n\nIn the vector quantization literature, `cluster_centers_` is called\nthe code book and each value returned by `predict` is the index of\nthe closest code in the code book.", + "docstring": "Predict the closest cluster each sample in X belongs to.\n\nIn the vector quantization literature, `cluster_centers_` is called\nthe code book and each value returned by `predict` is the index of\nthe closest code in the code book.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to predict.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Index of the cluster each sample belongs to.", + "code": " def predict(self, X, sample_weight=None):\n \"\"\"Predict the closest cluster each sample in X belongs to.\n\n In the vector quantization literature, `cluster_centers_` is called\n the code book and each value returned by `predict` is the index of\n the closest code in the code book.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to predict.\n\n sample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\n Returns\n -------\n labels : ndarray of shape (n_samples,)\n Index of the cluster each sample belongs to.\n \"\"\"\n check_is_fitted(self)\n\n X = self._check_test_data(X)\n x_squared_norms = row_norms(X, squared=True)\n sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n return _labels_inertia(X, sample_weight, x_squared_norms,\n self.cluster_centers_, self._n_threads)[0]" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/score", + "name": "score", + "qname": "sklearn.cluster._kmeans.KMeans.score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/score/self", + "name": "self", + "qname": "sklearn.cluster._kmeans.KMeans.score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/score/X", + "name": "X", + "qname": "sklearn.cluster._kmeans.KMeans.score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "New data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/score/y", + "name": "y", + "qname": "sklearn.cluster._kmeans.KMeans.score.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present here for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.cluster._kmeans.KMeans.score.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "The weights for each observation in X. If None, all observations\nare assigned equal weight." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Opposite of the value of X on the K-means objective.", + "docstring": "Opposite of the value of X on the K-means objective.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\nReturns\n-------\nscore : float\n Opposite of the value of X on the K-means objective.", + "code": " def score(self, X, y=None, sample_weight=None):\n \"\"\"Opposite of the value of X on the K-means objective.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n sample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\n Returns\n -------\n score : float\n Opposite of the value of X on the K-means objective.\n \"\"\"\n check_is_fitted(self)\n\n X = self._check_test_data(X)\n x_squared_norms = row_norms(X, squared=True)\n sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n return -_labels_inertia(X, sample_weight, x_squared_norms,\n self.cluster_centers_)[1]" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/transform", + "name": "transform", + "qname": "sklearn.cluster._kmeans.KMeans.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/transform/self", + "name": "self", + "qname": "sklearn.cluster._kmeans.KMeans.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/KMeans/transform/X", + "name": "X", + "qname": "sklearn.cluster._kmeans.KMeans.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "New data to transform." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform X to a cluster-distance space.\n\nIn the new space, each dimension is the distance to the cluster\ncenters. Note that even if X is sparse, the array returned by\n`transform` will typically be dense.", + "docstring": "Transform X to a cluster-distance space.\n\nIn the new space, each dimension is the distance to the cluster\ncenters. Note that even if X is sparse, the array returned by\n`transform` will typically be dense.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to transform.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_clusters)\n X transformed in the new space.", + "code": " def transform(self, X):\n \"\"\"Transform X to a cluster-distance space.\n\n In the new space, each dimension is the distance to the cluster\n centers. Note that even if X is sparse, the array returned by\n `transform` will typically be dense.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to transform.\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_clusters)\n X transformed in the new space.\n \"\"\"\n check_is_fitted(self)\n\n X = self._check_test_data(X)\n return self._transform(X)" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/__init__", + "name": "__init__", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/__init__/self", + "name": "self", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/__init__/n_clusters", + "name": "n_clusters", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.__init__.n_clusters", + "default_value": "8", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "8", + "description": "The number of clusters to form as well as the number of\ncentroids to generate." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/__init__/init", + "name": "init", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.__init__.init", + "default_value": "'k-means++'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'k-means++', 'random'}, callable or array-like of shape (n_clusters, n_features)", + "default_value": "'k-means++'", + "description": "Method for initialization:\n\n'k-means++' : selects initial cluster centers for k-mean\nclustering in a smart way to speed up convergence. See section\nNotes in k_init for more details.\n\n'random': choose `n_clusters` observations (rows) at random from data\nfor the initial centroids.\n\nIf an array is passed, it should be of shape (n_clusters, n_features)\nand gives the initial centers.\n\nIf a callable is passed, it should take arguments X, n_clusters and a\nrandom state and return an initialization." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["random", "k-means++"] + }, + { + "kind": "NamedType", + "name": "callable" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_clusters, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.__init__.max_iter", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Maximum number of iterations over the complete dataset before\nstopping independently of any early stopping criterion heuristics." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/__init__/batch_size", + "name": "batch_size", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.__init__.batch_size", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Size of the mini batches." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/__init__/verbose", + "name": "verbose", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Verbosity mode." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/__init__/compute_labels", + "name": "compute_labels", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.__init__.compute_labels", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Compute label assignment and inertia for the complete dataset\nonce the minibatch optimization has converged in fit." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/__init__/random_state", + "name": "random_state", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for centroid initialization and\nrandom reassignment. Use an int to make the randomness deterministic.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/__init__/tol", + "name": "tol", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.__init__.tol", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "Control early stopping based on the relative center changes as\nmeasured by a smoothed, variance-normalized of the mean center\nsquared position changes. This early stopping heuristics is\ncloser to the one used for the batch variant of the algorithms\nbut induces a slight computational and memory overhead over the\ninertia heuristic.\n\nTo disable convergence detection based on normalized center\nchange, set tol to 0.0 (default)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/__init__/max_no_improvement", + "name": "max_no_improvement", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.__init__.max_no_improvement", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Control early stopping based on the consecutive number of mini\nbatches that does not yield an improvement on the smoothed inertia.\n\nTo disable convergence detection based on inertia, set\nmax_no_improvement to None." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/__init__/init_size", + "name": "init_size", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.__init__.init_size", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of samples to randomly sample for speeding up the\ninitialization (sometimes at the expense of accuracy): the\nonly algorithm is initialized by running a batch KMeans on a\nrandom subset of the data. This needs to be larger than n_clusters.\n\nIf `None`, `init_size= 3 * batch_size`." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/__init__/n_init", + "name": "n_init", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.__init__.n_init", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "3", + "description": "Number of random initializations that are tried.\nIn contrast to KMeans, the algorithm is only run once, using the\nbest of the ``n_init`` initializations as measured by inertia." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/__init__/reassignment_ratio", + "name": "reassignment_ratio", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.__init__.reassignment_ratio", + "default_value": "0.01", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.01", + "description": "Control the fraction of the maximum number of counts for a\ncenter to be reassigned. A higher value means that low count\ncenters are more easily reassigned, which means that the\nmodel will take longer to converge, but should converge in a\nbetter clustering." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Mini-Batch K-Means clustering.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_clusters=8, *, init='k-means++', max_iter=100,\n batch_size=100, verbose=0, compute_labels=True,\n random_state=None, tol=0.0, max_no_improvement=10,\n init_size=None, n_init=3, reassignment_ratio=0.01):\n\n super().__init__(\n n_clusters=n_clusters, init=init, max_iter=max_iter,\n verbose=verbose, random_state=random_state, tol=tol, n_init=n_init)\n\n self.max_no_improvement = max_no_improvement\n self.batch_size = batch_size\n self.compute_labels = compute_labels\n self.init_size = init_size\n self.reassignment_ratio = reassignment_ratio" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/_check_params", + "name": "_check_params", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans._check_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/_check_params/self", + "name": "self", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans._check_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/_check_params/X", + "name": "X", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans._check_params.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_params(self, X):\n super()._check_params(X)\n\n # max_no_improvement\n if self.max_no_improvement is not None and self.max_no_improvement < 0:\n raise ValueError(\n f\"max_no_improvement should be >= 0, got \"\n f\"{self.max_no_improvement} instead.\")\n\n # batch_size\n if self.batch_size <= 0:\n raise ValueError(\n f\"batch_size should be > 0, got {self.batch_size} instead.\")\n\n # init_size\n if self.init_size is not None and self.init_size <= 0:\n raise ValueError(\n f\"init_size should be > 0, got {self.init_size} instead.\")\n self._init_size = self.init_size\n if self._init_size is None:\n self._init_size = 3 * self.batch_size\n if self._init_size < self.n_clusters:\n self._init_size = 3 * self.n_clusters\n elif self._init_size < self.n_clusters:\n warnings.warn(\n f\"init_size={self._init_size} should be larger than \"\n f\"n_clusters={self.n_clusters}. Setting it to \"\n f\"min(3*n_clusters, n_samples)\",\n RuntimeWarning, stacklevel=2)\n self._init_size = 3 * self.n_clusters\n self._init_size = min(self._init_size, X.shape[0])\n\n # reassignment_ratio\n if self.reassignment_ratio < 0:\n raise ValueError(\n f\"reassignment_ratio should be >= 0, got \"\n f\"{self.reassignment_ratio} instead.\")" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/_labels_inertia_minibatch", + "name": "_labels_inertia_minibatch", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans._labels_inertia_minibatch", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/_labels_inertia_minibatch/self", + "name": "self", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans._labels_inertia_minibatch.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/_labels_inertia_minibatch/X", + "name": "X", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans._labels_inertia_minibatch.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/_labels_inertia_minibatch/sample_weight", + "name": "sample_weight", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans._labels_inertia_minibatch.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The weights for each observation in X." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute labels and inertia using mini batches.\n\nThis is slightly slower than doing everything at once but prevents\nmemory errors / segfaults.", + "docstring": "Compute labels and inertia using mini batches.\n\nThis is slightly slower than doing everything at once but prevents\nmemory errors / segfaults.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nsample_weight : array-like of shape (n_samples,)\n The weights for each observation in X.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Cluster labels for each point.\n\ninertia : float\n Sum of squared distances of points to nearest cluster.", + "code": " def _labels_inertia_minibatch(self, X, sample_weight):\n \"\"\"Compute labels and inertia using mini batches.\n\n This is slightly slower than doing everything at once but prevents\n memory errors / segfaults.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Input data.\n\n sample_weight : array-like of shape (n_samples,)\n The weights for each observation in X.\n\n Returns\n -------\n labels : ndarray of shape (n_samples,)\n Cluster labels for each point.\n\n inertia : float\n Sum of squared distances of points to nearest cluster.\n \"\"\"\n if self.verbose:\n print('Computing label assignment and total inertia')\n sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n x_squared_norms = row_norms(X, squared=True)\n slices = gen_batches(X.shape[0], self.batch_size)\n results = [_labels_inertia(X[s], sample_weight[s], x_squared_norms[s],\n self.cluster_centers_) for s in slices]\n labels, inertia = zip(*results)\n return np.hstack(labels), np.sum(inertia)" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/_more_tags", + "name": "_more_tags", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/_more_tags/self", + "name": "self", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/counts_@getter", + "name": "counts_", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.counts_", + "decorators": [ + "deprecated(\"The attribute 'counts_' is deprecated in 0.24 and will be removed in 1.1 (renaming of 0.26).\")", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/counts_/self", + "name": "self", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.counts_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated(\"The attribute 'counts_' is deprecated in 0.24\" # type: ignore\n \" and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def counts_(self):\n return self._counts" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/fit", + "name": "fit", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/fit/self", + "name": "self", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/fit/X", + "name": "X", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training instances to cluster. It must be noted that the data\nwill be converted to C ordering, which will cause a memory copy\nif the given data is not C-contiguous." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/fit/y", + "name": "y", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present here for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "The weights for each observation in X. If None, all observations\nare assigned equal weight (default: None).\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the centroids on X by chunking it into mini-batches.", + "docstring": "Compute the centroids on X by chunking it into mini-batches.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training instances to cluster. It must be noted that the data\n will be converted to C ordering, which will cause a memory copy\n if the given data is not C-contiguous.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight (default: None).\n\n .. versionadded:: 0.20\n\nReturns\n-------\nself", + "code": " def fit(self, X, y=None, sample_weight=None):\n \"\"\"Compute the centroids on X by chunking it into mini-batches.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training instances to cluster. It must be noted that the data\n will be converted to C ordering, which will cause a memory copy\n if the given data is not C-contiguous.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n sample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight (default: None).\n\n .. versionadded:: 0.20\n\n Returns\n -------\n self\n \"\"\"\n X = self._validate_data(X, accept_sparse='csr',\n dtype=[np.float64, np.float32],\n order='C', accept_large_sparse=False)\n\n self._check_params(X)\n random_state = check_random_state(self.random_state)\n sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n # Validate init array\n init = self.init\n if hasattr(init, '__array__'):\n init = check_array(init, dtype=X.dtype, copy=True, order='C')\n self._validate_center_shape(X, init)\n\n n_samples, n_features = X.shape\n x_squared_norms = row_norms(X, squared=True)\n\n if self.tol > 0.0:\n tol = _tolerance(X, self.tol)\n\n # using tol-based early stopping needs the allocation of a\n # dedicated before which can be expensive for high dim data:\n # hence we allocate it outside of the main loop\n old_center_buffer = np.zeros(n_features, dtype=X.dtype)\n else:\n tol = 0.0\n # no need for the center buffer if tol-based early stopping is\n # disabled\n old_center_buffer = np.zeros(0, dtype=X.dtype)\n\n distances = np.zeros(self.batch_size, dtype=X.dtype)\n n_batches = int(np.ceil(float(n_samples) / self.batch_size))\n n_iter = int(self.max_iter * n_batches)\n\n self._check_mkl_vcomp(X, self.batch_size)\n\n validation_indices = random_state.randint(0, n_samples,\n self._init_size)\n X_valid = X[validation_indices]\n sample_weight_valid = sample_weight[validation_indices]\n x_squared_norms_valid = x_squared_norms[validation_indices]\n\n # perform several inits with random sub-sets\n best_inertia = None\n for init_idx in range(self._n_init):\n if self.verbose:\n print(\"Init %d/%d with method: %s\"\n % (init_idx + 1, self._n_init, init))\n weight_sums = np.zeros(self.n_clusters, dtype=sample_weight.dtype)\n\n # TODO: once the `k_means` function works with sparse input we\n # should refactor the following init to use it instead.\n\n # Initialize the centers using only a fraction of the data as we\n # expect n_samples to be very large when using MiniBatchKMeans\n cluster_centers = self._init_centroids(\n X, x_squared_norms=x_squared_norms,\n init=init,\n random_state=random_state,\n init_size=self._init_size)\n\n # Compute the label assignment on the init dataset\n _mini_batch_step(\n X_valid, sample_weight_valid,\n x_squared_norms[validation_indices], cluster_centers,\n weight_sums, old_center_buffer, False, distances=None,\n verbose=self.verbose)\n\n # Keep only the best cluster centers across independent inits on\n # the common validation set\n _, inertia = _labels_inertia(X_valid, sample_weight_valid,\n x_squared_norms_valid,\n cluster_centers)\n if self.verbose:\n print(\"Inertia for init %d/%d: %f\"\n % (init_idx + 1, self._n_init, inertia))\n if best_inertia is None or inertia < best_inertia:\n self.cluster_centers_ = cluster_centers\n self._counts = weight_sums\n best_inertia = inertia\n\n # Empty context to be used inplace by the convergence check routine\n convergence_context = {}\n\n # Perform the iterative optimization until the final convergence\n # criterion\n for iteration_idx in range(n_iter):\n # Sample a minibatch from the full dataset\n minibatch_indices = random_state.randint(\n 0, n_samples, self.batch_size)\n\n # Perform the actual update step on the minibatch data\n batch_inertia, centers_squared_diff = _mini_batch_step(\n X[minibatch_indices], sample_weight[minibatch_indices],\n x_squared_norms[minibatch_indices],\n self.cluster_centers_, self._counts,\n old_center_buffer, tol > 0.0, distances=distances,\n # Here we randomly choose whether to perform\n # random reassignment: the choice is done as a function\n # of the iteration index, and the minimum number of\n # counts, in order to force this reassignment to happen\n # every once in a while\n random_reassign=((iteration_idx + 1)\n % (10 + int(self._counts.min())) == 0),\n random_state=random_state,\n reassignment_ratio=self.reassignment_ratio,\n verbose=self.verbose)\n\n # Monitor convergence and do early stopping if necessary\n if _mini_batch_convergence(\n self, iteration_idx, n_iter, tol, n_samples,\n centers_squared_diff, batch_inertia, convergence_context,\n verbose=self.verbose):\n break\n\n self.n_iter_ = iteration_idx + 1\n\n if self.compute_labels:\n self.labels_, self.inertia_ = \\\n self._labels_inertia_minibatch(X, sample_weight)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/init_size_@getter", + "name": "init_size_", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.init_size_", + "decorators": [ + "deprecated(\"The attribute 'init_size_' is deprecated in 0.24 and will be removed in 1.1 (renaming of 0.26).\")", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/init_size_/self", + "name": "self", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.init_size_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated(\"The attribute 'init_size_' is deprecated in \" # type: ignore\n \"0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def init_size_(self):\n return self._init_size" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/partial_fit", + "name": "partial_fit", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.partial_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/partial_fit/self", + "name": "self", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/partial_fit/X", + "name": "X", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Coordinates of the data points to cluster. It must be noted that\nX will be copied if it is not C-contiguous." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/partial_fit/y", + "name": "y", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.partial_fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present here for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/partial_fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.partial_fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "The weights for each observation in X. If None, all observations\nare assigned equal weight (default: None)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Update k means estimate on a single mini-batch X.", + "docstring": "Update k means estimate on a single mini-batch X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Coordinates of the data points to cluster. It must be noted that\n X will be copied if it is not C-contiguous.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight (default: None).\n\nReturns\n-------\nself", + "code": " def partial_fit(self, X, y=None, sample_weight=None):\n \"\"\"Update k means estimate on a single mini-batch X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Coordinates of the data points to cluster. It must be noted that\n X will be copied if it is not C-contiguous.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n sample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight (default: None).\n\n Returns\n -------\n self\n \"\"\"\n is_first_call_to_partial_fit = not hasattr(self, 'cluster_centers_')\n\n X = self._validate_data(X, accept_sparse='csr',\n dtype=[np.float64, np.float32],\n order='C', accept_large_sparse=False,\n reset=is_first_call_to_partial_fit)\n\n self._random_state = getattr(self, \"_random_state\",\n check_random_state(self.random_state))\n sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n x_squared_norms = row_norms(X, squared=True)\n\n if is_first_call_to_partial_fit:\n # this is the first call to partial_fit on this object\n self._check_params(X)\n\n # Validate init array\n init = self.init\n if hasattr(init, '__array__'):\n init = check_array(init, dtype=X.dtype, copy=True, order='C')\n self._validate_center_shape(X, init)\n\n self._check_mkl_vcomp(X, X.shape[0])\n\n # initialize the cluster centers\n self.cluster_centers_ = self._init_centroids(\n X, x_squared_norms=x_squared_norms,\n init=init,\n random_state=self._random_state,\n init_size=self._init_size)\n\n self._counts = np.zeros(self.n_clusters,\n dtype=sample_weight.dtype)\n random_reassign = False\n distances = None\n else:\n # The lower the minimum count is, the more we do random\n # reassignment, however, we don't want to do random\n # reassignment too often, to allow for building up counts\n random_reassign = self._random_state.randint(\n 10 * (1 + self._counts.min())) == 0\n distances = np.zeros(X.shape[0], dtype=X.dtype)\n\n _mini_batch_step(X, sample_weight, x_squared_norms,\n self.cluster_centers_, self._counts,\n np.zeros(0, dtype=X.dtype), 0,\n random_reassign=random_reassign, distances=distances,\n random_state=self._random_state,\n reassignment_ratio=self.reassignment_ratio,\n verbose=self.verbose)\n\n if self.compute_labels:\n self.labels_, self.inertia_ = _labels_inertia(\n X, sample_weight, x_squared_norms, self.cluster_centers_)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/predict", + "name": "predict", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/predict/self", + "name": "self", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/predict/X", + "name": "X", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "New data to predict." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/predict/sample_weight", + "name": "sample_weight", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.predict.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "The weights for each observation in X. If None, all observations\nare assigned equal weight (default: None)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict the closest cluster each sample in X belongs to.\n\nIn the vector quantization literature, `cluster_centers_` is called\nthe code book and each value returned by `predict` is the index of\nthe closest code in the code book.", + "docstring": "Predict the closest cluster each sample in X belongs to.\n\nIn the vector quantization literature, `cluster_centers_` is called\nthe code book and each value returned by `predict` is the index of\nthe closest code in the code book.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to predict.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight (default: None).\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Index of the cluster each sample belongs to.", + "code": " def predict(self, X, sample_weight=None):\n \"\"\"Predict the closest cluster each sample in X belongs to.\n\n In the vector quantization literature, `cluster_centers_` is called\n the code book and each value returned by `predict` is the index of\n the closest code in the code book.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to predict.\n\n sample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight (default: None).\n\n Returns\n -------\n labels : ndarray of shape (n_samples,)\n Index of the cluster each sample belongs to.\n \"\"\"\n check_is_fitted(self)\n\n X = self._check_test_data(X)\n return self._labels_inertia_minibatch(X, sample_weight)[0]" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/random_state_@getter", + "name": "random_state_", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.random_state_", + "decorators": [ + "deprecated(\"The attribute 'random_state_' is deprecated in 0.24 and will be removed in 1.1 (renaming of 0.26).\")", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/MiniBatchKMeans/random_state_/self", + "name": "self", + "qname": "sklearn.cluster._kmeans.MiniBatchKMeans.random_state_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated(\"The attribute 'random_state_' is deprecated \" # type: ignore\n \"in 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def random_state_(self):\n return getattr(self, \"_random_state\", None)" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_kmeans_plusplus", + "name": "_kmeans_plusplus", + "qname": "sklearn.cluster._kmeans._kmeans_plusplus", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/_kmeans_plusplus/X", + "name": "X", + "qname": "sklearn.cluster._kmeans._kmeans_plusplus.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{ndarray, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to pick seeds for." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_kmeans_plusplus/n_clusters", + "name": "n_clusters", + "qname": "sklearn.cluster._kmeans._kmeans_plusplus.n_clusters", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The number of seeds to choose." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_kmeans_plusplus/x_squared_norms", + "name": "x_squared_norms", + "qname": "sklearn.cluster._kmeans._kmeans_plusplus.x_squared_norms", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Squared Euclidean norm of each data point." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_kmeans_plusplus/random_state", + "name": "random_state", + "qname": "sklearn.cluster._kmeans._kmeans_plusplus.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "RandomState instance", + "default_value": "", + "description": "The generator used to initialize the centers.\nSee :term:`Glossary `." + }, + "type": { + "kind": "NamedType", + "name": "RandomState instance" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_kmeans_plusplus/n_local_trials", + "name": "n_local_trials", + "qname": "sklearn.cluster._kmeans._kmeans_plusplus.n_local_trials", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of seeding trials for each center (except the first),\nof which the one reducing inertia the most is greedily chosen.\nSet to None to make the number of trials depend logarithmically\non the number of seeds (2+log(k)); this is the default." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computational component for initialization of n_clusters by\nk-means++. Prior validation of data is assumed.", + "docstring": "Computational component for initialization of n_clusters by\nk-means++. Prior validation of data is assumed.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The data to pick seeds for.\n\nn_clusters : int\n The number of seeds to choose.\n\nx_squared_norms : ndarray of shape (n_samples,)\n Squared Euclidean norm of each data point.\n\nrandom_state : RandomState instance\n The generator used to initialize the centers.\n See :term:`Glossary `.\n\nn_local_trials : int, default=None\n The number of seeding trials for each center (except the first),\n of which the one reducing inertia the most is greedily chosen.\n Set to None to make the number of trials depend logarithmically\n on the number of seeds (2+log(k)); this is the default.\n\nReturns\n-------\ncenters : ndarray of shape (n_clusters, n_features)\n The inital centers for k-means.\n\nindices : ndarray of shape (n_clusters,)\n The index location of the chosen centers in the data array X. For a\n given index and center, X[index] = center.", + "code": "def _kmeans_plusplus(X, n_clusters, x_squared_norms,\n random_state, n_local_trials=None):\n \"\"\"Computational component for initialization of n_clusters by\n k-means++. Prior validation of data is assumed.\n\n Parameters\n ----------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The data to pick seeds for.\n\n n_clusters : int\n The number of seeds to choose.\n\n x_squared_norms : ndarray of shape (n_samples,)\n Squared Euclidean norm of each data point.\n\n random_state : RandomState instance\n The generator used to initialize the centers.\n See :term:`Glossary `.\n\n n_local_trials : int, default=None\n The number of seeding trials for each center (except the first),\n of which the one reducing inertia the most is greedily chosen.\n Set to None to make the number of trials depend logarithmically\n on the number of seeds (2+log(k)); this is the default.\n\n Returns\n -------\n centers : ndarray of shape (n_clusters, n_features)\n The inital centers for k-means.\n\n indices : ndarray of shape (n_clusters,)\n The index location of the chosen centers in the data array X. For a\n given index and center, X[index] = center.\n \"\"\"\n n_samples, n_features = X.shape\n\n centers = np.empty((n_clusters, n_features), dtype=X.dtype)\n\n # Set the number of local seeding trials if none is given\n if n_local_trials is None:\n # This is what Arthur/Vassilvitskii tried, but did not report\n # specific results for other than mentioning in the conclusion\n # that it helped.\n n_local_trials = 2 + int(np.log(n_clusters))\n\n # Pick first center randomly and track index of point\n center_id = random_state.randint(n_samples)\n indices = np.full(n_clusters, -1, dtype=int)\n if sp.issparse(X):\n centers[0] = X[center_id].toarray()\n else:\n centers[0] = X[center_id]\n indices[0] = center_id\n\n # Initialize list of closest distances and calculate current potential\n closest_dist_sq = euclidean_distances(\n centers[0, np.newaxis], X, Y_norm_squared=x_squared_norms,\n squared=True)\n current_pot = closest_dist_sq.sum()\n\n # Pick the remaining n_clusters-1 points\n for c in range(1, n_clusters):\n # Choose center candidates by sampling with probability proportional\n # to the squared distance to the closest existing center\n rand_vals = random_state.random_sample(n_local_trials) * current_pot\n candidate_ids = np.searchsorted(stable_cumsum(closest_dist_sq),\n rand_vals)\n # XXX: numerical imprecision can result in a candidate_id out of range\n np.clip(candidate_ids, None, closest_dist_sq.size - 1,\n out=candidate_ids)\n\n # Compute distances to center candidates\n distance_to_candidates = euclidean_distances(\n X[candidate_ids], X, Y_norm_squared=x_squared_norms, squared=True)\n\n # update closest distances squared and potential for each candidate\n np.minimum(closest_dist_sq, distance_to_candidates,\n out=distance_to_candidates)\n candidates_pot = distance_to_candidates.sum(axis=1)\n\n # Decide which candidate is the best\n best_candidate = np.argmin(candidates_pot)\n current_pot = candidates_pot[best_candidate]\n closest_dist_sq = distance_to_candidates[best_candidate]\n best_candidate = candidate_ids[best_candidate]\n\n # Permanently add best center candidate found in local tries\n if sp.issparse(X):\n centers[c] = X[best_candidate].toarray()\n else:\n centers[c] = X[best_candidate]\n indices[c] = best_candidate\n\n return centers, indices" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_kmeans_single_elkan", + "name": "_kmeans_single_elkan", + "qname": "sklearn.cluster._kmeans._kmeans_single_elkan", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/_kmeans_single_elkan/X", + "name": "X", + "qname": "sklearn.cluster._kmeans._kmeans_single_elkan.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{ndarray, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The observations to cluster. If sparse matrix, must be in CSR format." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_kmeans_single_elkan/sample_weight", + "name": "sample_weight", + "qname": "sklearn.cluster._kmeans._kmeans_single_elkan.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The weights for each observation in X." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_kmeans_single_elkan/centers_init", + "name": "centers_init", + "qname": "sklearn.cluster._kmeans._kmeans_single_elkan.centers_init", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_clusters, n_features)", + "default_value": "", + "description": "The initial centers." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_clusters, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_kmeans_single_elkan/max_iter", + "name": "max_iter", + "qname": "sklearn.cluster._kmeans._kmeans_single_elkan.max_iter", + "default_value": "300", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "300", + "description": "Maximum number of iterations of the k-means algorithm to run." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_kmeans_single_elkan/verbose", + "name": "verbose", + "qname": "sklearn.cluster._kmeans._kmeans_single_elkan.verbose", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Verbosity mode." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_kmeans_single_elkan/x_squared_norms", + "name": "x_squared_norms", + "qname": "sklearn.cluster._kmeans._kmeans_single_elkan.x_squared_norms", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "None", + "description": "Precomputed x_squared_norms." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_kmeans_single_elkan/tol", + "name": "tol", + "qname": "sklearn.cluster._kmeans._kmeans_single_elkan.tol", + "default_value": "0.0001", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Relative tolerance with regards to Frobenius norm of the difference\nin the cluster centers of two consecutive iterations to declare\nconvergence.\nIt's not advised to set `tol=0` since convergence might never be\ndeclared due to rounding errors. Use a very small number instead." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_kmeans_single_elkan/n_threads", + "name": "n_threads", + "qname": "sklearn.cluster._kmeans._kmeans_single_elkan.n_threads", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "The number of OpenMP threads to use for the computation. Parallelism is\nsample-wise on the main cython loop which assigns each sample to its\nclosest center." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "A single run of k-means elkan, assumes preparation completed prior.", + "docstring": "A single run of k-means elkan, assumes preparation completed prior.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The observations to cluster. If sparse matrix, must be in CSR format.\n\nsample_weight : array-like of shape (n_samples,)\n The weights for each observation in X.\n\ncenters_init : ndarray of shape (n_clusters, n_features)\n The initial centers.\n\nmax_iter : int, default=300\n Maximum number of iterations of the k-means algorithm to run.\n\nverbose : bool, default=False\n Verbosity mode.\n\nx_squared_norms : array-like, default=None\n Precomputed x_squared_norms.\n\ntol : float, default=1e-4\n Relative tolerance with regards to Frobenius norm of the difference\n in the cluster centers of two consecutive iterations to declare\n convergence.\n It's not advised to set `tol=0` since convergence might never be\n declared due to rounding errors. Use a very small number instead.\n\nn_threads : int, default=1\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\nReturns\n-------\ncentroid : ndarray of shape (n_clusters, n_features)\n Centroids found at the last iteration of k-means.\n\nlabel : ndarray of shape (n_samples,)\n label[i] is the code or index of the centroid the\n i'th observation is closest to.\n\ninertia : float\n The final value of the inertia criterion (sum of squared distances to\n the closest centroid for all observations in the training set).\n\nn_iter : int\n Number of iterations run.", + "code": "def _kmeans_single_elkan(X, sample_weight, centers_init, max_iter=300,\n verbose=False, x_squared_norms=None, tol=1e-4,\n n_threads=1):\n \"\"\"A single run of k-means elkan, assumes preparation completed prior.\n\n Parameters\n ----------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The observations to cluster. If sparse matrix, must be in CSR format.\n\n sample_weight : array-like of shape (n_samples,)\n The weights for each observation in X.\n\n centers_init : ndarray of shape (n_clusters, n_features)\n The initial centers.\n\n max_iter : int, default=300\n Maximum number of iterations of the k-means algorithm to run.\n\n verbose : bool, default=False\n Verbosity mode.\n\n x_squared_norms : array-like, default=None\n Precomputed x_squared_norms.\n\n tol : float, default=1e-4\n Relative tolerance with regards to Frobenius norm of the difference\n in the cluster centers of two consecutive iterations to declare\n convergence.\n It's not advised to set `tol=0` since convergence might never be\n declared due to rounding errors. Use a very small number instead.\n\n n_threads : int, default=1\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\n Returns\n -------\n centroid : ndarray of shape (n_clusters, n_features)\n Centroids found at the last iteration of k-means.\n\n label : ndarray of shape (n_samples,)\n label[i] is the code or index of the centroid the\n i'th observation is closest to.\n\n inertia : float\n The final value of the inertia criterion (sum of squared distances to\n the closest centroid for all observations in the training set).\n\n n_iter : int\n Number of iterations run.\n \"\"\"\n n_samples = X.shape[0]\n n_clusters = centers_init.shape[0]\n\n # Buffers to avoid new allocations at each iteration.\n centers = centers_init\n centers_new = np.zeros_like(centers)\n weight_in_clusters = np.zeros(n_clusters, dtype=X.dtype)\n labels = np.full(n_samples, -1, dtype=np.int32)\n labels_old = labels.copy()\n center_half_distances = euclidean_distances(centers) / 2\n distance_next_center = np.partition(np.asarray(center_half_distances),\n kth=1, axis=0)[1]\n upper_bounds = np.zeros(n_samples, dtype=X.dtype)\n lower_bounds = np.zeros((n_samples, n_clusters), dtype=X.dtype)\n center_shift = np.zeros(n_clusters, dtype=X.dtype)\n\n if sp.issparse(X):\n init_bounds = init_bounds_sparse\n elkan_iter = elkan_iter_chunked_sparse\n _inertia = _inertia_sparse\n else:\n init_bounds = init_bounds_dense\n elkan_iter = elkan_iter_chunked_dense\n _inertia = _inertia_dense\n\n init_bounds(X, centers, center_half_distances,\n labels, upper_bounds, lower_bounds)\n\n strict_convergence = False\n\n for i in range(max_iter):\n elkan_iter(X, sample_weight, centers, centers_new,\n weight_in_clusters, center_half_distances,\n distance_next_center, upper_bounds, lower_bounds,\n labels, center_shift, n_threads)\n\n # compute new pairwise distances between centers and closest other\n # center of each center for next iterations\n center_half_distances = euclidean_distances(centers_new) / 2\n distance_next_center = np.partition(\n np.asarray(center_half_distances), kth=1, axis=0)[1]\n\n if verbose:\n inertia = _inertia(X, sample_weight, centers, labels)\n print(f\"Iteration {i}, inertia {inertia}\")\n\n centers, centers_new = centers_new, centers\n\n if np.array_equal(labels, labels_old):\n # First check the labels for strict convergence.\n if verbose:\n print(f\"Converged at iteration {i}: strict convergence.\")\n strict_convergence = True\n break\n else:\n # No strict convergence, check for tol based convergence.\n center_shift_tot = (center_shift**2).sum()\n if center_shift_tot <= tol:\n if verbose:\n print(f\"Converged at iteration {i}: center shift \"\n f\"{center_shift_tot} within tolerance {tol}.\")\n break\n\n labels_old[:] = labels\n\n if not strict_convergence:\n # rerun E-step so that predicted labels match cluster centers\n elkan_iter(X, sample_weight, centers, centers, weight_in_clusters,\n center_half_distances, distance_next_center,\n upper_bounds, lower_bounds, labels, center_shift,\n n_threads, update_centers=False)\n\n inertia = _inertia(X, sample_weight, centers, labels)\n\n return labels, inertia, centers, i + 1" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_kmeans_single_lloyd", + "name": "_kmeans_single_lloyd", + "qname": "sklearn.cluster._kmeans._kmeans_single_lloyd", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/_kmeans_single_lloyd/X", + "name": "X", + "qname": "sklearn.cluster._kmeans._kmeans_single_lloyd.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{ndarray, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The observations to cluster. If sparse matrix, must be in CSR format." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_kmeans_single_lloyd/sample_weight", + "name": "sample_weight", + "qname": "sklearn.cluster._kmeans._kmeans_single_lloyd.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "The weights for each observation in X." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_kmeans_single_lloyd/centers_init", + "name": "centers_init", + "qname": "sklearn.cluster._kmeans._kmeans_single_lloyd.centers_init", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_clusters, n_features)", + "default_value": "", + "description": "The initial centers." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_clusters, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_kmeans_single_lloyd/max_iter", + "name": "max_iter", + "qname": "sklearn.cluster._kmeans._kmeans_single_lloyd.max_iter", + "default_value": "300", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "300", + "description": "Maximum number of iterations of the k-means algorithm to run." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_kmeans_single_lloyd/verbose", + "name": "verbose", + "qname": "sklearn.cluster._kmeans._kmeans_single_lloyd.verbose", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Verbosity mode" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_kmeans_single_lloyd/x_squared_norms", + "name": "x_squared_norms", + "qname": "sklearn.cluster._kmeans._kmeans_single_lloyd.x_squared_norms", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "None", + "description": "Precomputed x_squared_norms." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_kmeans_single_lloyd/tol", + "name": "tol", + "qname": "sklearn.cluster._kmeans._kmeans_single_lloyd.tol", + "default_value": "0.0001", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Relative tolerance with regards to Frobenius norm of the difference\nin the cluster centers of two consecutive iterations to declare\nconvergence.\nIt's not advised to set `tol=0` since convergence might never be\ndeclared due to rounding errors. Use a very small number instead." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_kmeans_single_lloyd/n_threads", + "name": "n_threads", + "qname": "sklearn.cluster._kmeans._kmeans_single_lloyd.n_threads", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "The number of OpenMP threads to use for the computation. Parallelism is\nsample-wise on the main cython loop which assigns each sample to its\nclosest center." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "A single run of k-means lloyd, assumes preparation completed prior.", + "docstring": "A single run of k-means lloyd, assumes preparation completed prior.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The observations to cluster. If sparse matrix, must be in CSR format.\n\nsample_weight : ndarray of shape (n_samples,)\n The weights for each observation in X.\n\ncenters_init : ndarray of shape (n_clusters, n_features)\n The initial centers.\n\nmax_iter : int, default=300\n Maximum number of iterations of the k-means algorithm to run.\n\nverbose : bool, default=False\n Verbosity mode\n\nx_squared_norms : ndarray of shape (n_samples,), default=None\n Precomputed x_squared_norms.\n\ntol : float, default=1e-4\n Relative tolerance with regards to Frobenius norm of the difference\n in the cluster centers of two consecutive iterations to declare\n convergence.\n It's not advised to set `tol=0` since convergence might never be\n declared due to rounding errors. Use a very small number instead.\n\nn_threads : int, default=1\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\nReturns\n-------\ncentroid : ndarray of shape (n_clusters, n_features)\n Centroids found at the last iteration of k-means.\n\nlabel : ndarray of shape (n_samples,)\n label[i] is the code or index of the centroid the\n i'th observation is closest to.\n\ninertia : float\n The final value of the inertia criterion (sum of squared distances to\n the closest centroid for all observations in the training set).\n\nn_iter : int\n Number of iterations run.", + "code": "def _kmeans_single_lloyd(X, sample_weight, centers_init, max_iter=300,\n verbose=False, x_squared_norms=None, tol=1e-4,\n n_threads=1):\n \"\"\"A single run of k-means lloyd, assumes preparation completed prior.\n\n Parameters\n ----------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The observations to cluster. If sparse matrix, must be in CSR format.\n\n sample_weight : ndarray of shape (n_samples,)\n The weights for each observation in X.\n\n centers_init : ndarray of shape (n_clusters, n_features)\n The initial centers.\n\n max_iter : int, default=300\n Maximum number of iterations of the k-means algorithm to run.\n\n verbose : bool, default=False\n Verbosity mode\n\n x_squared_norms : ndarray of shape (n_samples,), default=None\n Precomputed x_squared_norms.\n\n tol : float, default=1e-4\n Relative tolerance with regards to Frobenius norm of the difference\n in the cluster centers of two consecutive iterations to declare\n convergence.\n It's not advised to set `tol=0` since convergence might never be\n declared due to rounding errors. Use a very small number instead.\n\n n_threads : int, default=1\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\n Returns\n -------\n centroid : ndarray of shape (n_clusters, n_features)\n Centroids found at the last iteration of k-means.\n\n label : ndarray of shape (n_samples,)\n label[i] is the code or index of the centroid the\n i'th observation is closest to.\n\n inertia : float\n The final value of the inertia criterion (sum of squared distances to\n the closest centroid for all observations in the training set).\n\n n_iter : int\n Number of iterations run.\n \"\"\"\n n_clusters = centers_init.shape[0]\n\n # Buffers to avoid new allocations at each iteration.\n centers = centers_init\n centers_new = np.zeros_like(centers)\n labels = np.full(X.shape[0], -1, dtype=np.int32)\n labels_old = labels.copy()\n weight_in_clusters = np.zeros(n_clusters, dtype=X.dtype)\n center_shift = np.zeros(n_clusters, dtype=X.dtype)\n\n if sp.issparse(X):\n lloyd_iter = lloyd_iter_chunked_sparse\n _inertia = _inertia_sparse\n else:\n lloyd_iter = lloyd_iter_chunked_dense\n _inertia = _inertia_dense\n\n strict_convergence = False\n\n # Threadpoolctl context to limit the number of threads in second level of\n # nested parallelism (i.e. BLAS) to avoid oversubsciption.\n with threadpool_limits(limits=1, user_api=\"blas\"):\n for i in range(max_iter):\n lloyd_iter(X, sample_weight, x_squared_norms, centers, centers_new,\n weight_in_clusters, labels, center_shift, n_threads)\n\n if verbose:\n inertia = _inertia(X, sample_weight, centers, labels)\n print(f\"Iteration {i}, inertia {inertia}.\")\n\n centers, centers_new = centers_new, centers\n\n if np.array_equal(labels, labels_old):\n # First check the labels for strict convergence.\n if verbose:\n print(f\"Converged at iteration {i}: strict convergence.\")\n strict_convergence = True\n break\n else:\n # No strict convergence, check for tol based convergence.\n center_shift_tot = (center_shift**2).sum()\n if center_shift_tot <= tol:\n if verbose:\n print(f\"Converged at iteration {i}: center shift \"\n f\"{center_shift_tot} within tolerance {tol}.\")\n break\n\n labels_old[:] = labels\n\n if not strict_convergence:\n # rerun E-step so that predicted labels match cluster centers\n lloyd_iter(X, sample_weight, x_squared_norms, centers, centers,\n weight_in_clusters, labels, center_shift, n_threads,\n update_centers=False)\n\n inertia = _inertia(X, sample_weight, centers, labels)\n\n return labels, inertia, centers, i + 1" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_labels_inertia", + "name": "_labels_inertia", + "qname": "sklearn.cluster._kmeans._labels_inertia", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/_labels_inertia/X", + "name": "X", + "qname": "sklearn.cluster._kmeans._labels_inertia.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{ndarray, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples to assign to the labels. If sparse matrix, must\nbe in CSR format." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_labels_inertia/sample_weight", + "name": "sample_weight", + "qname": "sklearn.cluster._kmeans._labels_inertia.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "The weights for each observation in X." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_labels_inertia/x_squared_norms", + "name": "x_squared_norms", + "qname": "sklearn.cluster._kmeans._labels_inertia.x_squared_norms", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Precomputed squared euclidean norm of each data point, to speed up\ncomputations." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_labels_inertia/centers", + "name": "centers", + "qname": "sklearn.cluster._kmeans._labels_inertia.centers", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_clusters, n_features)", + "default_value": "", + "description": "The cluster centers." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_clusters, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_labels_inertia/n_threads", + "name": "n_threads", + "qname": "sklearn.cluster._kmeans._labels_inertia.n_threads", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of OpenMP threads to use for the computation. Parallelism is\nsample-wise on the main cython loop which assigns each sample to its\nclosest center." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "E step of the K-means EM algorithm.\n\nCompute the labels and the inertia of the given samples and centers.", + "docstring": "E step of the K-means EM algorithm.\n\nCompute the labels and the inertia of the given samples and centers.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The input samples to assign to the labels. If sparse matrix, must\n be in CSR format.\n\nsample_weight : ndarray of shape (n_samples,)\n The weights for each observation in X.\n\nx_squared_norms : ndarray of shape (n_samples,)\n Precomputed squared euclidean norm of each data point, to speed up\n computations.\n\ncenters : ndarray of shape (n_clusters, n_features)\n The cluster centers.\n\nn_threads : int, default=None\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n The resulting assignment.\n\ninertia : float\n Sum of squared distances of samples to their closest cluster center.", + "code": "def _labels_inertia(X, sample_weight, x_squared_norms, centers,\n n_threads=None):\n \"\"\"E step of the K-means EM algorithm.\n\n Compute the labels and the inertia of the given samples and centers.\n\n Parameters\n ----------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The input samples to assign to the labels. If sparse matrix, must\n be in CSR format.\n\n sample_weight : ndarray of shape (n_samples,)\n The weights for each observation in X.\n\n x_squared_norms : ndarray of shape (n_samples,)\n Precomputed squared euclidean norm of each data point, to speed up\n computations.\n\n centers : ndarray of shape (n_clusters, n_features)\n The cluster centers.\n\n n_threads : int, default=None\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\n Returns\n -------\n labels : ndarray of shape (n_samples,)\n The resulting assignment.\n\n inertia : float\n Sum of squared distances of samples to their closest cluster center.\n \"\"\"\n n_samples = X.shape[0]\n n_clusters = centers.shape[0]\n\n n_threads = _openmp_effective_n_threads(n_threads)\n\n labels = np.full(n_samples, -1, dtype=np.int32)\n weight_in_clusters = np.zeros(n_clusters, dtype=centers.dtype)\n center_shift = np.zeros_like(weight_in_clusters)\n\n if sp.issparse(X):\n _labels = lloyd_iter_chunked_sparse\n _inertia = _inertia_sparse\n else:\n _labels = lloyd_iter_chunked_dense\n _inertia = _inertia_dense\n\n _labels(X, sample_weight, x_squared_norms, centers, centers,\n weight_in_clusters, labels, center_shift, n_threads,\n update_centers=False)\n\n inertia = _inertia(X, sample_weight, centers, labels)\n\n return labels, inertia" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_mini_batch_convergence", + "name": "_mini_batch_convergence", + "qname": "sklearn.cluster._kmeans._mini_batch_convergence", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/_mini_batch_convergence/model", + "name": "model", + "qname": "sklearn.cluster._kmeans._mini_batch_convergence.model", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_mini_batch_convergence/iteration_idx", + "name": "iteration_idx", + "qname": "sklearn.cluster._kmeans._mini_batch_convergence.iteration_idx", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_mini_batch_convergence/n_iter", + "name": "n_iter", + "qname": "sklearn.cluster._kmeans._mini_batch_convergence.n_iter", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_mini_batch_convergence/tol", + "name": "tol", + "qname": "sklearn.cluster._kmeans._mini_batch_convergence.tol", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_mini_batch_convergence/n_samples", + "name": "n_samples", + "qname": "sklearn.cluster._kmeans._mini_batch_convergence.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_mini_batch_convergence/centers_squared_diff", + "name": "centers_squared_diff", + "qname": "sklearn.cluster._kmeans._mini_batch_convergence.centers_squared_diff", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_mini_batch_convergence/batch_inertia", + "name": "batch_inertia", + "qname": "sklearn.cluster._kmeans._mini_batch_convergence.batch_inertia", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_mini_batch_convergence/context", + "name": "context", + "qname": "sklearn.cluster._kmeans._mini_batch_convergence.context", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_mini_batch_convergence/verbose", + "name": "verbose", + "qname": "sklearn.cluster._kmeans._mini_batch_convergence.verbose", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Helper function to encapsulate the early stopping logic.", + "docstring": "Helper function to encapsulate the early stopping logic.", + "code": "def _mini_batch_convergence(model, iteration_idx, n_iter, tol,\n n_samples, centers_squared_diff, batch_inertia,\n context, verbose=0):\n \"\"\"Helper function to encapsulate the early stopping logic.\"\"\"\n # Normalize inertia to be able to compare values when\n # batch_size changes\n batch_inertia /= model.batch_size\n centers_squared_diff /= model.batch_size\n\n # Compute an Exponentially Weighted Average of the squared\n # diff to monitor the convergence while discarding\n # minibatch-local stochastic variability:\n # https://en.wikipedia.org/wiki/Moving_average\n ewa_diff = context.get('ewa_diff')\n ewa_inertia = context.get('ewa_inertia')\n if ewa_diff is None:\n ewa_diff = centers_squared_diff\n ewa_inertia = batch_inertia\n else:\n alpha = float(model.batch_size) * 2.0 / (n_samples + 1)\n alpha = 1.0 if alpha > 1.0 else alpha\n ewa_diff = ewa_diff * (1 - alpha) + centers_squared_diff * alpha\n ewa_inertia = ewa_inertia * (1 - alpha) + batch_inertia * alpha\n\n # Log progress to be able to monitor convergence\n if verbose:\n progress_msg = (\n 'Minibatch iteration %d/%d:'\n ' mean batch inertia: %f, ewa inertia: %f ' % (\n iteration_idx + 1, n_iter, batch_inertia,\n ewa_inertia))\n print(progress_msg)\n\n # Early stopping based on absolute tolerance on squared change of\n # centers position (using EWA smoothing)\n if tol > 0.0 and ewa_diff <= tol:\n if verbose:\n print('Converged (small centers change) at iteration %d/%d'\n % (iteration_idx + 1, n_iter))\n return True\n\n # Early stopping heuristic due to lack of improvement on smoothed inertia\n ewa_inertia_min = context.get('ewa_inertia_min')\n no_improvement = context.get('no_improvement', 0)\n if ewa_inertia_min is None or ewa_inertia < ewa_inertia_min:\n no_improvement = 0\n ewa_inertia_min = ewa_inertia\n else:\n no_improvement += 1\n\n if (model.max_no_improvement is not None\n and no_improvement >= model.max_no_improvement):\n if verbose:\n print('Converged (lack of improvement in inertia)'\n ' at iteration %d/%d'\n % (iteration_idx + 1, n_iter))\n return True\n\n # update the convergence context to maintain state across successive calls:\n context['ewa_diff'] = ewa_diff\n context['ewa_inertia'] = ewa_inertia\n context['ewa_inertia_min'] = ewa_inertia_min\n context['no_improvement'] = no_improvement\n return False" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_mini_batch_step", + "name": "_mini_batch_step", + "qname": "sklearn.cluster._kmeans._mini_batch_step", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/_mini_batch_step/X", + "name": "X", + "qname": "sklearn.cluster._kmeans._mini_batch_step.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "The original data array." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_mini_batch_step/sample_weight", + "name": "sample_weight", + "qname": "sklearn.cluster._kmeans._mini_batch_step.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The weights for each observation in X." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_mini_batch_step/x_squared_norms", + "name": "x_squared_norms", + "qname": "sklearn.cluster._kmeans._mini_batch_step.x_squared_norms", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Squared euclidean norm of each data point." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_mini_batch_step/centers", + "name": "centers", + "qname": "sklearn.cluster._kmeans._mini_batch_step.centers", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (k, n_features)", + "default_value": "", + "description": "The cluster centers. This array is MODIFIED IN PLACE" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (k, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_mini_batch_step/weight_sums", + "name": "weight_sums", + "qname": "sklearn.cluster._kmeans._mini_batch_step.weight_sums", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_mini_batch_step/old_center_buffer", + "name": "old_center_buffer", + "qname": "sklearn.cluster._kmeans._mini_batch_step.old_center_buffer", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Copy of old centers for monitoring convergence." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_mini_batch_step/compute_squared_diff", + "name": "compute_squared_diff", + "qname": "sklearn.cluster._kmeans._mini_batch_step.compute_squared_diff", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "If set to False, the squared diff computation is skipped." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_mini_batch_step/distances", + "name": "distances", + "qname": "sklearn.cluster._kmeans._mini_batch_step.distances", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,), dtype=float", + "default_value": "None", + "description": "If not None, should be a pre-allocated array that will be used to store\nthe distances of each sample to its closest center.\nMay not be None when random_reassign is True." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + }, + { + "kind": "NamedType", + "name": "dtype=float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_mini_batch_step/random_reassign", + "name": "random_reassign", + "qname": "sklearn.cluster._kmeans._mini_batch_step.random_reassign", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, centers with very low counts are randomly reassigned\nto observations." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_mini_batch_step/random_state", + "name": "random_state", + "qname": "sklearn.cluster._kmeans._mini_batch_step.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for centroid initialization and to\npick new clusters amongst observations with uniform probability. Use\nan int to make the randomness deterministic.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_mini_batch_step/reassignment_ratio", + "name": "reassignment_ratio", + "qname": "sklearn.cluster._kmeans._mini_batch_step.reassignment_ratio", + "default_value": "0.01", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": ".01", + "description": "Control the fraction of the maximum number of counts for a\ncenter to be reassigned. A higher value means that low count\ncenters are more likely to be reassigned, which means that the\nmodel will take longer to converge, but should converge in a\nbetter clustering." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_mini_batch_step/verbose", + "name": "verbose", + "qname": "sklearn.cluster._kmeans._mini_batch_step.verbose", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Controls the verbosity." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Incremental update of the centers for the Minibatch K-Means algorithm.", + "docstring": "Incremental update of the centers for the Minibatch K-Means algorithm.\n\nParameters\n----------\n\nX : ndarray of shape (n_samples, n_features)\n The original data array.\n\nsample_weight : array-like of shape (n_samples,)\n The weights for each observation in X.\n\nx_squared_norms : ndarray of shape (n_samples,)\n Squared euclidean norm of each data point.\n\ncenters : ndarray of shape (k, n_features)\n The cluster centers. This array is MODIFIED IN PLACE\n\nold_center_buffer : int\n Copy of old centers for monitoring convergence.\n\ncompute_squared_diff : bool\n If set to False, the squared diff computation is skipped.\n\ndistances : ndarray of shape (n_samples,), dtype=float, default=None\n If not None, should be a pre-allocated array that will be used to store\n the distances of each sample to its closest center.\n May not be None when random_reassign is True.\n\nrandom_reassign : bool, default=False\n If True, centers with very low counts are randomly reassigned\n to observations.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for centroid initialization and to\n pick new clusters amongst observations with uniform probability. Use\n an int to make the randomness deterministic.\n See :term:`Glossary `.\n\nreassignment_ratio : float, default=.01\n Control the fraction of the maximum number of counts for a\n center to be reassigned. A higher value means that low count\n centers are more likely to be reassigned, which means that the\n model will take longer to converge, but should converge in a\n better clustering.\n\nverbose : bool, default=False\n Controls the verbosity.\n\nReturns\n-------\ninertia : float\n Sum of squared distances of samples to their closest cluster center.\n\nsquared_diff : ndarray of shape (n_clusters,)\n Squared distances between previous and updated cluster centers.", + "code": "def _mini_batch_step(X, sample_weight, x_squared_norms, centers, weight_sums,\n old_center_buffer, compute_squared_diff,\n distances, random_reassign=False,\n random_state=None, reassignment_ratio=.01,\n verbose=False):\n \"\"\"Incremental update of the centers for the Minibatch K-Means algorithm.\n\n Parameters\n ----------\n\n X : ndarray of shape (n_samples, n_features)\n The original data array.\n\n sample_weight : array-like of shape (n_samples,)\n The weights for each observation in X.\n\n x_squared_norms : ndarray of shape (n_samples,)\n Squared euclidean norm of each data point.\n\n centers : ndarray of shape (k, n_features)\n The cluster centers. This array is MODIFIED IN PLACE\n\n old_center_buffer : int\n Copy of old centers for monitoring convergence.\n\n compute_squared_diff : bool\n If set to False, the squared diff computation is skipped.\n\n distances : ndarray of shape (n_samples,), dtype=float, default=None\n If not None, should be a pre-allocated array that will be used to store\n the distances of each sample to its closest center.\n May not be None when random_reassign is True.\n\n random_reassign : bool, default=False\n If True, centers with very low counts are randomly reassigned\n to observations.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for centroid initialization and to\n pick new clusters amongst observations with uniform probability. Use\n an int to make the randomness deterministic.\n See :term:`Glossary `.\n\n reassignment_ratio : float, default=.01\n Control the fraction of the maximum number of counts for a\n center to be reassigned. A higher value means that low count\n centers are more likely to be reassigned, which means that the\n model will take longer to converge, but should converge in a\n better clustering.\n\n verbose : bool, default=False\n Controls the verbosity.\n\n Returns\n -------\n inertia : float\n Sum of squared distances of samples to their closest cluster center.\n\n squared_diff : ndarray of shape (n_clusters,)\n Squared distances between previous and updated cluster centers.\n\n \"\"\"\n # Perform label assignment to nearest centers\n nearest_center, inertia = _labels_inertia(X, sample_weight,\n x_squared_norms, centers)\n\n if random_reassign and reassignment_ratio > 0:\n random_state = check_random_state(random_state)\n # Reassign clusters that have very low weight\n to_reassign = weight_sums < reassignment_ratio * weight_sums.max()\n # pick at most .5 * batch_size samples as new centers\n if to_reassign.sum() > .5 * X.shape[0]:\n indices_dont_reassign = \\\n np.argsort(weight_sums)[int(.5 * X.shape[0]):]\n to_reassign[indices_dont_reassign] = False\n n_reassigns = to_reassign.sum()\n if n_reassigns:\n # Pick new clusters amongst observations with uniform probability\n new_centers = random_state.choice(X.shape[0], replace=False,\n size=n_reassigns)\n if verbose:\n print(\"[MiniBatchKMeans] Reassigning %i cluster centers.\"\n % n_reassigns)\n\n if sp.issparse(X) and not sp.issparse(centers):\n assign_rows_csr(\n X, new_centers.astype(np.intp, copy=False),\n np.where(to_reassign)[0].astype(np.intp, copy=False),\n centers)\n else:\n centers[to_reassign] = X[new_centers]\n # reset counts of reassigned centers, but don't reset them too small\n # to avoid instant reassignment. This is a pretty dirty hack as it\n # also modifies the learning rates.\n weight_sums[to_reassign] = np.min(weight_sums[~to_reassign])\n\n # implementation for the sparse CSR representation completely written in\n # cython\n if sp.issparse(X):\n return inertia, _mini_batch_update_csr(\n X, sample_weight, x_squared_norms, centers, weight_sums,\n nearest_center, old_center_buffer, compute_squared_diff)\n\n # dense variant in mostly numpy (not as memory efficient though)\n k = centers.shape[0]\n squared_diff = 0.0\n for center_idx in range(k):\n # find points from minibatch that are assigned to this center\n center_mask = nearest_center == center_idx\n wsum = sample_weight[center_mask].sum()\n\n if wsum > 0:\n if compute_squared_diff:\n old_center_buffer[:] = centers[center_idx]\n\n # inplace remove previous count scaling\n centers[center_idx] *= weight_sums[center_idx]\n\n # inplace sum with new points members of this cluster\n centers[center_idx] += \\\n np.sum(X[center_mask] *\n sample_weight[center_mask, np.newaxis], axis=0)\n\n # update the count statistics for this center\n weight_sums[center_idx] += wsum\n\n # inplace rescale to compute mean of all points (old and new)\n # Note: numpy >= 1.10 does not support '/=' for the following\n # expression for a mixture of int and float (see numpy issue #6464)\n centers[center_idx] = centers[center_idx] / weight_sums[center_idx]\n\n # update the squared diff if necessary\n if compute_squared_diff:\n diff = centers[center_idx].ravel() - old_center_buffer.ravel()\n squared_diff += np.dot(diff, diff)\n\n return inertia, squared_diff" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_tolerance", + "name": "_tolerance", + "qname": "sklearn.cluster._kmeans._tolerance", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/_tolerance/X", + "name": "X", + "qname": "sklearn.cluster._kmeans._tolerance.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/_tolerance/tol", + "name": "tol", + "qname": "sklearn.cluster._kmeans._tolerance.tol", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return a tolerance which is independent of the dataset.", + "docstring": "Return a tolerance which is independent of the dataset.", + "code": "def _tolerance(X, tol):\n \"\"\"Return a tolerance which is independent of the dataset.\"\"\"\n if tol == 0:\n return 0\n if sp.issparse(X):\n variances = mean_variance_axis(X, axis=0)[1]\n else:\n variances = np.var(X, axis=0)\n return np.mean(variances) * tol" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/k_means", + "name": "k_means", + "qname": "sklearn.cluster._kmeans.k_means", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/k_means/X", + "name": "X", + "qname": "sklearn.cluster._kmeans.k_means.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The observations to cluster. It must be noted that the data\nwill be converted to C ordering, which will cause a memory copy\nif the given data is not C-contiguous." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/k_means/n_clusters", + "name": "n_clusters", + "qname": "sklearn.cluster._kmeans.k_means.n_clusters", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The number of clusters to form as well as the number of\ncentroids to generate." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/k_means/sample_weight", + "name": "sample_weight", + "qname": "sklearn.cluster._kmeans.k_means.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "The weights for each observation in X. If None, all observations\nare assigned equal weight." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/k_means/init", + "name": "init", + "qname": "sklearn.cluster._kmeans.k_means.init", + "default_value": "'k-means++'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'k-means++', 'random'}, callable or array-like of shape (n_clusters, n_features)", + "default_value": "'k-means++'", + "description": "Method for initialization:\n\n'k-means++' : selects initial cluster centers for k-mean\nclustering in a smart way to speed up convergence. See section\nNotes in k_init for more details.\n\n'random': choose `n_clusters` observations (rows) at random from data\nfor the initial centroids.\n\nIf an array is passed, it should be of shape (n_clusters, n_features)\nand gives the initial centers.\n\nIf a callable is passed, it should take arguments X, n_clusters and a\nrandom state and return an initialization." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["random", "k-means++"] + }, + { + "kind": "NamedType", + "name": "callable" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_clusters, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/k_means/precompute_distances", + "name": "precompute_distances", + "qname": "sklearn.cluster._kmeans.k_means.precompute_distances", + "default_value": "'deprecated'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', True, False}", + "default_value": "", + "description": "Precompute distances (faster but takes more memory).\n\n'auto' : do not precompute distances if n_samples * n_clusters > 12\nmillion. This corresponds to about 100MB overhead per job using\ndouble precision.\n\nTrue : always precompute distances\n\nFalse : never precompute distances\n\n.. deprecated:: 0.23\n 'precompute_distances' was deprecated in version 0.23 and will be\n removed in 1.0 (renaming of 0.25). It has no effect." + }, + "type": { + "kind": "EnumType", + "values": ["auto"] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/k_means/n_init", + "name": "n_init", + "qname": "sklearn.cluster._kmeans.k_means.n_init", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Number of time the k-means algorithm will be run with different\ncentroid seeds. The final results will be the best output of\nn_init consecutive runs in terms of inertia." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/k_means/max_iter", + "name": "max_iter", + "qname": "sklearn.cluster._kmeans.k_means.max_iter", + "default_value": "300", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "300", + "description": "Maximum number of iterations of the k-means algorithm to run." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/k_means/verbose", + "name": "verbose", + "qname": "sklearn.cluster._kmeans.k_means.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Verbosity mode." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/k_means/tol", + "name": "tol", + "qname": "sklearn.cluster._kmeans.k_means.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Relative tolerance with regards to Frobenius norm of the difference\nin the cluster centers of two consecutive iterations to declare\nconvergence." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/k_means/random_state", + "name": "random_state", + "qname": "sklearn.cluster._kmeans.k_means.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for centroid initialization. Use\nan int to make the randomness deterministic.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/k_means/copy_x", + "name": "copy_x", + "qname": "sklearn.cluster._kmeans.k_means.copy_x", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "When pre-computing distances it is more numerically accurate to center\nthe data first. If copy_x is True (default), then the original data is\nnot modified. If False, the original data is modified, and put back\nbefore the function returns, but small numerical differences may be\nintroduced by subtracting and then adding the data mean. Note that if\nthe original data is not C-contiguous, a copy will be made even if\ncopy_x is False. If the original data is sparse, but not in CSR format,\na copy will be made even if copy_x is False." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/k_means/n_jobs", + "name": "n_jobs", + "qname": "sklearn.cluster._kmeans.k_means.n_jobs", + "default_value": "'deprecated'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of OpenMP threads to use for the computation. Parallelism is\nsample-wise on the main cython loop which assigns each sample to its\nclosest center.\n\n``None`` or ``-1`` means using all processors.\n\n.. deprecated:: 0.23\n ``n_jobs`` was deprecated in version 0.23 and will be removed in\n 1.0 (renaming of 0.25)." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/k_means/algorithm", + "name": "algorithm", + "qname": "sklearn.cluster._kmeans.k_means.algorithm", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{\"auto\", \"full\", \"elkan\"}", + "default_value": "\"auto\"", + "description": "K-means algorithm to use. The classical EM-style algorithm is \"full\".\nThe \"elkan\" variation is more efficient on data with well-defined\nclusters, by using the triangle inequality. However it's more memory\nintensive due to the allocation of an extra array of shape\n(n_samples, n_clusters).\n\nFor now \"auto\" (kept for backward compatibility) chooses \"elkan\" but it\nmight change in the future for a better heuristic." + }, + "type": { + "kind": "EnumType", + "values": ["full", "auto", "elkan"] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/k_means/return_n_iter", + "name": "return_n_iter", + "qname": "sklearn.cluster._kmeans.k_means.return_n_iter", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether or not to return the number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "K-means clustering algorithm.\n\nRead more in the :ref:`User Guide `.", + "docstring": "K-means clustering algorithm.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The observations to cluster. It must be noted that the data\n will be converted to C ordering, which will cause a memory copy\n if the given data is not C-contiguous.\n\nn_clusters : int\n The number of clusters to form as well as the number of\n centroids to generate.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\ninit : {'k-means++', 'random'}, callable or array-like of shape (n_clusters, n_features), default='k-means++'\n Method for initialization:\n\n 'k-means++' : selects initial cluster centers for k-mean\n clustering in a smart way to speed up convergence. See section\n Notes in k_init for more details.\n\n 'random': choose `n_clusters` observations (rows) at random from data\n for the initial centroids.\n\n If an array is passed, it should be of shape (n_clusters, n_features)\n and gives the initial centers.\n\n If a callable is passed, it should take arguments X, n_clusters and a\n random state and return an initialization.\n\nprecompute_distances : {'auto', True, False}\n Precompute distances (faster but takes more memory).\n\n 'auto' : do not precompute distances if n_samples * n_clusters > 12\n million. This corresponds to about 100MB overhead per job using\n double precision.\n\n True : always precompute distances\n\n False : never precompute distances\n\n .. deprecated:: 0.23\n 'precompute_distances' was deprecated in version 0.23 and will be\n removed in 1.0 (renaming of 0.25). It has no effect.\n\nn_init : int, default=10\n Number of time the k-means algorithm will be run with different\n centroid seeds. The final results will be the best output of\n n_init consecutive runs in terms of inertia.\n\nmax_iter : int, default=300\n Maximum number of iterations of the k-means algorithm to run.\n\nverbose : bool, default=False\n Verbosity mode.\n\ntol : float, default=1e-4\n Relative tolerance with regards to Frobenius norm of the difference\n in the cluster centers of two consecutive iterations to declare\n convergence.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for centroid initialization. Use\n an int to make the randomness deterministic.\n See :term:`Glossary `.\n\ncopy_x : bool, default=True\n When pre-computing distances it is more numerically accurate to center\n the data first. If copy_x is True (default), then the original data is\n not modified. If False, the original data is modified, and put back\n before the function returns, but small numerical differences may be\n introduced by subtracting and then adding the data mean. Note that if\n the original data is not C-contiguous, a copy will be made even if\n copy_x is False. If the original data is sparse, but not in CSR format,\n a copy will be made even if copy_x is False.\n\nn_jobs : int, default=None\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\n ``None`` or ``-1`` means using all processors.\n\n .. deprecated:: 0.23\n ``n_jobs`` was deprecated in version 0.23 and will be removed in\n 1.0 (renaming of 0.25).\n\nalgorithm : {\"auto\", \"full\", \"elkan\"}, default=\"auto\"\n K-means algorithm to use. The classical EM-style algorithm is \"full\".\n The \"elkan\" variation is more efficient on data with well-defined\n clusters, by using the triangle inequality. However it's more memory\n intensive due to the allocation of an extra array of shape\n (n_samples, n_clusters).\n\n For now \"auto\" (kept for backward compatibility) chooses \"elkan\" but it\n might change in the future for a better heuristic.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\nReturns\n-------\ncentroid : ndarray of shape (n_clusters, n_features)\n Centroids found at the last iteration of k-means.\n\nlabel : ndarray of shape (n_samples,)\n label[i] is the code or index of the centroid the\n i'th observation is closest to.\n\ninertia : float\n The final value of the inertia criterion (sum of squared distances to\n the closest centroid for all observations in the training set).\n\nbest_n_iter : int\n Number of iterations corresponding to the best results.\n Returned only if `return_n_iter` is set to True.", + "code": "@_deprecate_positional_args\ndef k_means(X, n_clusters, *, sample_weight=None, init='k-means++',\n precompute_distances='deprecated', n_init=10, max_iter=300,\n verbose=False, tol=1e-4, random_state=None, copy_x=True,\n n_jobs='deprecated', algorithm=\"auto\", return_n_iter=False):\n \"\"\"K-means clustering algorithm.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The observations to cluster. It must be noted that the data\n will be converted to C ordering, which will cause a memory copy\n if the given data is not C-contiguous.\n\n n_clusters : int\n The number of clusters to form as well as the number of\n centroids to generate.\n\n sample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\n init : {'k-means++', 'random'}, callable or array-like of shape \\\n (n_clusters, n_features), default='k-means++'\n Method for initialization:\n\n 'k-means++' : selects initial cluster centers for k-mean\n clustering in a smart way to speed up convergence. See section\n Notes in k_init for more details.\n\n 'random': choose `n_clusters` observations (rows) at random from data\n for the initial centroids.\n\n If an array is passed, it should be of shape (n_clusters, n_features)\n and gives the initial centers.\n\n If a callable is passed, it should take arguments X, n_clusters and a\n random state and return an initialization.\n\n precompute_distances : {'auto', True, False}\n Precompute distances (faster but takes more memory).\n\n 'auto' : do not precompute distances if n_samples * n_clusters > 12\n million. This corresponds to about 100MB overhead per job using\n double precision.\n\n True : always precompute distances\n\n False : never precompute distances\n\n .. deprecated:: 0.23\n 'precompute_distances' was deprecated in version 0.23 and will be\n removed in 1.0 (renaming of 0.25). It has no effect.\n\n n_init : int, default=10\n Number of time the k-means algorithm will be run with different\n centroid seeds. The final results will be the best output of\n n_init consecutive runs in terms of inertia.\n\n max_iter : int, default=300\n Maximum number of iterations of the k-means algorithm to run.\n\n verbose : bool, default=False\n Verbosity mode.\n\n tol : float, default=1e-4\n Relative tolerance with regards to Frobenius norm of the difference\n in the cluster centers of two consecutive iterations to declare\n convergence.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for centroid initialization. Use\n an int to make the randomness deterministic.\n See :term:`Glossary `.\n\n copy_x : bool, default=True\n When pre-computing distances it is more numerically accurate to center\n the data first. If copy_x is True (default), then the original data is\n not modified. If False, the original data is modified, and put back\n before the function returns, but small numerical differences may be\n introduced by subtracting and then adding the data mean. Note that if\n the original data is not C-contiguous, a copy will be made even if\n copy_x is False. If the original data is sparse, but not in CSR format,\n a copy will be made even if copy_x is False.\n\n n_jobs : int, default=None\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\n ``None`` or ``-1`` means using all processors.\n\n .. deprecated:: 0.23\n ``n_jobs`` was deprecated in version 0.23 and will be removed in\n 1.0 (renaming of 0.25).\n\n algorithm : {\"auto\", \"full\", \"elkan\"}, default=\"auto\"\n K-means algorithm to use. The classical EM-style algorithm is \"full\".\n The \"elkan\" variation is more efficient on data with well-defined\n clusters, by using the triangle inequality. However it's more memory\n intensive due to the allocation of an extra array of shape\n (n_samples, n_clusters).\n\n For now \"auto\" (kept for backward compatibility) chooses \"elkan\" but it\n might change in the future for a better heuristic.\n\n return_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\n Returns\n -------\n centroid : ndarray of shape (n_clusters, n_features)\n Centroids found at the last iteration of k-means.\n\n label : ndarray of shape (n_samples,)\n label[i] is the code or index of the centroid the\n i'th observation is closest to.\n\n inertia : float\n The final value of the inertia criterion (sum of squared distances to\n the closest centroid for all observations in the training set).\n\n best_n_iter : int\n Number of iterations corresponding to the best results.\n Returned only if `return_n_iter` is set to True.\n \"\"\"\n est = KMeans(\n n_clusters=n_clusters, init=init, n_init=n_init, max_iter=max_iter,\n verbose=verbose, precompute_distances=precompute_distances, tol=tol,\n random_state=random_state, copy_x=copy_x, n_jobs=n_jobs,\n algorithm=algorithm\n ).fit(X, sample_weight=sample_weight)\n if return_n_iter:\n return est.cluster_centers_, est.labels_, est.inertia_, est.n_iter_\n else:\n return est.cluster_centers_, est.labels_, est.inertia_" + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/kmeans_plusplus", + "name": "kmeans_plusplus", + "qname": "sklearn.cluster._kmeans.kmeans_plusplus", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._kmeans/kmeans_plusplus/X", + "name": "X", + "qname": "sklearn.cluster._kmeans.kmeans_plusplus.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to pick seeds from." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/kmeans_plusplus/n_clusters", + "name": "n_clusters", + "qname": "sklearn.cluster._kmeans.kmeans_plusplus.n_clusters", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The number of centroids to initialize" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/kmeans_plusplus/x_squared_norms", + "name": "x_squared_norms", + "qname": "sklearn.cluster._kmeans.kmeans_plusplus.x_squared_norms", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Squared Euclidean norm of each data point." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/kmeans_plusplus/random_state", + "name": "random_state", + "qname": "sklearn.cluster._kmeans.kmeans_plusplus.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or RandomState instance", + "default_value": "None", + "description": "Determines random number generation for centroid initialization. Pass\nan int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._kmeans/kmeans_plusplus/n_local_trials", + "name": "n_local_trials", + "qname": "sklearn.cluster._kmeans.kmeans_plusplus.n_local_trials", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of seeding trials for each center (except the first),\nof which the one reducing inertia the most is greedily chosen.\nSet to None to make the number of trials depend logarithmically\non the number of seeds (2+log(k))." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Init n_clusters seeds according to k-means++\n\n.. versionadded:: 0.24", + "docstring": "Init n_clusters seeds according to k-means++\n\n.. versionadded:: 0.24\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to pick seeds from.\n\nn_clusters : int\n The number of centroids to initialize\n\nx_squared_norms : array-like of shape (n_samples,), default=None\n Squared Euclidean norm of each data point.\n\nrandom_state : int or RandomState instance, default=None\n Determines random number generation for centroid initialization. Pass\n an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nn_local_trials : int, default=None\n The number of seeding trials for each center (except the first),\n of which the one reducing inertia the most is greedily chosen.\n Set to None to make the number of trials depend logarithmically\n on the number of seeds (2+log(k)).\n\nReturns\n-------\ncenters : ndarray of shape (n_clusters, n_features)\n The inital centers for k-means.\n\nindices : ndarray of shape (n_clusters,)\n The index location of the chosen centers in the data array X. For a\n given index and center, X[index] = center.\n\nNotes\n-----\nSelects initial cluster centers for k-mean clustering in a smart way\nto speed up convergence. see: Arthur, D. and Vassilvitskii, S.\n\"k-means++: the advantages of careful seeding\". ACM-SIAM symposium\non Discrete algorithms. 2007\n\nExamples\n--------\n\n>>> from sklearn.cluster import kmeans_plusplus\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n... [10, 2], [10, 4], [10, 0]])\n>>> centers, indices = kmeans_plusplus(X, n_clusters=2, random_state=0)\n>>> centers\narray([[10, 4],\n [ 1, 0]])\n>>> indices\narray([4, 2])", + "code": "def kmeans_plusplus(X, n_clusters, *, x_squared_norms=None,\n random_state=None, n_local_trials=None):\n \"\"\"Init n_clusters seeds according to k-means++\n\n .. versionadded:: 0.24\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to pick seeds from.\n\n n_clusters : int\n The number of centroids to initialize\n\n x_squared_norms : array-like of shape (n_samples,), default=None\n Squared Euclidean norm of each data point.\n\n random_state : int or RandomState instance, default=None\n Determines random number generation for centroid initialization. Pass\n an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n n_local_trials : int, default=None\n The number of seeding trials for each center (except the first),\n of which the one reducing inertia the most is greedily chosen.\n Set to None to make the number of trials depend logarithmically\n on the number of seeds (2+log(k)).\n\n Returns\n -------\n centers : ndarray of shape (n_clusters, n_features)\n The inital centers for k-means.\n\n indices : ndarray of shape (n_clusters,)\n The index location of the chosen centers in the data array X. For a\n given index and center, X[index] = center.\n\n Notes\n -----\n Selects initial cluster centers for k-mean clustering in a smart way\n to speed up convergence. see: Arthur, D. and Vassilvitskii, S.\n \"k-means++: the advantages of careful seeding\". ACM-SIAM symposium\n on Discrete algorithms. 2007\n\n Examples\n --------\n\n >>> from sklearn.cluster import kmeans_plusplus\n >>> import numpy as np\n >>> X = np.array([[1, 2], [1, 4], [1, 0],\n ... [10, 2], [10, 4], [10, 0]])\n >>> centers, indices = kmeans_plusplus(X, n_clusters=2, random_state=0)\n >>> centers\n array([[10, 4],\n [ 1, 0]])\n >>> indices\n array([4, 2])\n \"\"\"\n\n # Check data\n check_array(X, accept_sparse='csr',\n dtype=[np.float64, np.float32])\n\n if X.shape[0] < n_clusters:\n raise ValueError(f\"n_samples={X.shape[0]} should be >= \"\n f\"n_clusters={n_clusters}.\")\n\n # Check parameters\n if x_squared_norms is None:\n x_squared_norms = row_norms(X, squared=True)\n else:\n x_squared_norms = check_array(x_squared_norms,\n dtype=X.dtype,\n ensure_2d=False)\n\n if x_squared_norms.shape[0] != X.shape[0]:\n raise ValueError(\n f\"The length of x_squared_norms {x_squared_norms.shape[0]} should \"\n f\"be equal to the length of n_samples {X.shape[0]}.\")\n\n if n_local_trials is not None and n_local_trials < 1:\n raise ValueError(\n f\"n_local_trials is set to {n_local_trials} but should be an \"\n f\"integer value greater than zero.\")\n\n random_state = check_random_state(random_state)\n\n # Call private k-means++\n centers, indices = _kmeans_plusplus(X, n_clusters, x_squared_norms,\n random_state, n_local_trials)\n\n return centers, indices" + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/MeanShift/__init__", + "name": "__init__", + "qname": "sklearn.cluster._mean_shift.MeanShift.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._mean_shift/MeanShift/__init__/self", + "name": "self", + "qname": "sklearn.cluster._mean_shift.MeanShift.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/MeanShift/__init__/bandwidth", + "name": "bandwidth", + "qname": "sklearn.cluster._mean_shift.MeanShift.__init__.bandwidth", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Bandwidth used in the RBF kernel.\n\nIf not given, the bandwidth is estimated using\nsklearn.cluster.estimate_bandwidth; see the documentation for that\nfunction for hints on scalability (see also the Notes, below)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/MeanShift/__init__/seeds", + "name": "seeds", + "qname": "sklearn.cluster._mean_shift.MeanShift.__init__.seeds", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "None", + "description": "Seeds used to initialize kernels. If not set,\nthe seeds are calculated by clustering.get_bin_seeds\nwith bandwidth as the grid size and default values for\nother parameters." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/MeanShift/__init__/bin_seeding", + "name": "bin_seeding", + "qname": "sklearn.cluster._mean_shift.MeanShift.__init__.bin_seeding", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If true, initial kernel locations are not locations of all\npoints, but rather the location of the discretized version of\npoints, where points are binned onto a grid whose coarseness\ncorresponds to the bandwidth. Setting this option to True will speed\nup the algorithm because fewer seeds will be initialized.\nThe default value is False.\nIgnored if seeds argument is not None." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/MeanShift/__init__/min_bin_freq", + "name": "min_bin_freq", + "qname": "sklearn.cluster._mean_shift.MeanShift.__init__.min_bin_freq", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "To speed up the algorithm, accept only those bins with at least\nmin_bin_freq points as seeds." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/MeanShift/__init__/cluster_all", + "name": "cluster_all", + "qname": "sklearn.cluster._mean_shift.MeanShift.__init__.cluster_all", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If true, then all points are clustered, even those orphans that are\nnot within any kernel. Orphans are assigned to the nearest kernel.\nIf false, then orphans are given cluster label -1." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/MeanShift/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.cluster._mean_shift.MeanShift.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to use for the computation. This works by computing\neach of the n_init runs in parallel.\n\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/MeanShift/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.cluster._mean_shift.MeanShift.__init__.max_iter", + "default_value": "300", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "300", + "description": "Maximum number of iterations, per seed point before the clustering\noperation terminates (for that seed point), if has not converged yet.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Mean shift clustering using a flat kernel.\n\nMean shift clustering aims to discover \"blobs\" in a smooth density of\nsamples. It is a centroid-based algorithm, which works by updating\ncandidates for centroids to be the mean of the points within a given\nregion. These candidates are then filtered in a post-processing stage to\neliminate near-duplicates to form the final set of centroids.\n\nSeeding is performed using a binning technique for scalability.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, bandwidth=None, seeds=None, bin_seeding=False,\n min_bin_freq=1, cluster_all=True, n_jobs=None, max_iter=300):\n self.bandwidth = bandwidth\n self.seeds = seeds\n self.bin_seeding = bin_seeding\n self.cluster_all = cluster_all\n self.min_bin_freq = min_bin_freq\n self.n_jobs = n_jobs\n self.max_iter = max_iter" + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/MeanShift/fit", + "name": "fit", + "qname": "sklearn.cluster._mean_shift.MeanShift.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._mean_shift/MeanShift/fit/self", + "name": "self", + "qname": "sklearn.cluster._mean_shift.MeanShift.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/MeanShift/fit/X", + "name": "X", + "qname": "sklearn.cluster._mean_shift.MeanShift.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Samples to cluster." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/MeanShift/fit/y", + "name": "y", + "qname": "sklearn.cluster._mean_shift.MeanShift.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform clustering.", + "docstring": "Perform clustering.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Samples to cluster.\n\ny : Ignored", + "code": " def fit(self, X, y=None):\n \"\"\"Perform clustering.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Samples to cluster.\n\n y : Ignored\n\n \"\"\"\n X = self._validate_data(X)\n bandwidth = self.bandwidth\n if bandwidth is None:\n bandwidth = estimate_bandwidth(X, n_jobs=self.n_jobs)\n elif bandwidth <= 0:\n raise ValueError(\"bandwidth needs to be greater than zero or None,\"\n \" got %f\" % bandwidth)\n\n seeds = self.seeds\n if seeds is None:\n if self.bin_seeding:\n seeds = get_bin_seeds(X, bandwidth, self.min_bin_freq)\n else:\n seeds = X\n n_samples, n_features = X.shape\n center_intensity_dict = {}\n\n # We use n_jobs=1 because this will be used in nested calls under\n # parallel calls to _mean_shift_single_seed so there is no need for\n # for further parallelism.\n nbrs = NearestNeighbors(radius=bandwidth, n_jobs=1).fit(X)\n\n # execute iterations on all seeds in parallel\n all_res = Parallel(n_jobs=self.n_jobs)(\n delayed(_mean_shift_single_seed)\n (seed, X, nbrs, self.max_iter) for seed in seeds)\n # copy results in a dictionary\n for i in range(len(seeds)):\n if all_res[i][1]: # i.e. len(points_within) > 0\n center_intensity_dict[all_res[i][0]] = all_res[i][1]\n\n self.n_iter_ = max([x[2] for x in all_res])\n\n if not center_intensity_dict:\n # nothing near seeds\n raise ValueError(\"No point was within bandwidth=%f of any seed.\"\n \" Try a different seeding strategy \\\n or increase the bandwidth.\"\n % bandwidth)\n\n # POST PROCESSING: remove near duplicate points\n # If the distance between two kernels is less than the bandwidth,\n # then we have to remove one because it is a duplicate. Remove the\n # one with fewer points.\n\n sorted_by_intensity = sorted(center_intensity_dict.items(),\n key=lambda tup: (tup[1], tup[0]),\n reverse=True)\n sorted_centers = np.array([tup[0] for tup in sorted_by_intensity])\n unique = np.ones(len(sorted_centers), dtype=bool)\n nbrs = NearestNeighbors(radius=bandwidth,\n n_jobs=self.n_jobs).fit(sorted_centers)\n for i, center in enumerate(sorted_centers):\n if unique[i]:\n neighbor_idxs = nbrs.radius_neighbors([center],\n return_distance=False)[0]\n unique[neighbor_idxs] = 0\n unique[i] = 1 # leave the current point as unique\n cluster_centers = sorted_centers[unique]\n\n # ASSIGN LABELS: a point belongs to the cluster that it is closest to\n nbrs = NearestNeighbors(n_neighbors=1,\n n_jobs=self.n_jobs).fit(cluster_centers)\n labels = np.zeros(n_samples, dtype=int)\n distances, idxs = nbrs.kneighbors(X)\n if self.cluster_all:\n labels = idxs.flatten()\n else:\n labels.fill(-1)\n bool_selector = distances.flatten() <= bandwidth\n labels[bool_selector] = idxs.flatten()[bool_selector]\n\n self.cluster_centers_, self.labels_ = cluster_centers, labels\n return self" + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/MeanShift/predict", + "name": "predict", + "qname": "sklearn.cluster._mean_shift.MeanShift.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._mean_shift/MeanShift/predict/self", + "name": "self", + "qname": "sklearn.cluster._mean_shift.MeanShift.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/MeanShift/predict/X", + "name": "X", + "qname": "sklearn.cluster._mean_shift.MeanShift.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "New data to predict." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict the closest cluster each sample in X belongs to.", + "docstring": "Predict the closest cluster each sample in X belongs to.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to predict.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Index of the cluster each sample belongs to.", + "code": " def predict(self, X):\n \"\"\"Predict the closest cluster each sample in X belongs to.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to predict.\n\n Returns\n -------\n labels : ndarray of shape (n_samples,)\n Index of the cluster each sample belongs to.\n \"\"\"\n check_is_fitted(self)\n X = self._validate_data(X, reset=False)\n with config_context(assume_finite=True):\n return pairwise_distances_argmin(X, self.cluster_centers_)" + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/_mean_shift_single_seed", + "name": "_mean_shift_single_seed", + "qname": "sklearn.cluster._mean_shift._mean_shift_single_seed", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._mean_shift/_mean_shift_single_seed/my_mean", + "name": "my_mean", + "qname": "sklearn.cluster._mean_shift._mean_shift_single_seed.my_mean", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/_mean_shift_single_seed/X", + "name": "X", + "qname": "sklearn.cluster._mean_shift._mean_shift_single_seed.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/_mean_shift_single_seed/nbrs", + "name": "nbrs", + "qname": "sklearn.cluster._mean_shift._mean_shift_single_seed.nbrs", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/_mean_shift_single_seed/max_iter", + "name": "max_iter", + "qname": "sklearn.cluster._mean_shift._mean_shift_single_seed.max_iter", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _mean_shift_single_seed(my_mean, X, nbrs, max_iter):\n # For each seed, climb gradient until convergence or max_iter\n bandwidth = nbrs.get_params()['radius']\n stop_thresh = 1e-3 * bandwidth # when mean has converged\n completed_iterations = 0\n while True:\n # Find mean of points within bandwidth\n i_nbrs = nbrs.radius_neighbors([my_mean], bandwidth,\n return_distance=False)[0]\n points_within = X[i_nbrs]\n if len(points_within) == 0:\n break # Depending on seeding strategy this condition may occur\n my_old_mean = my_mean # save the old mean\n my_mean = np.mean(points_within, axis=0)\n # If converged or at max_iter, adds the cluster\n if (np.linalg.norm(my_mean - my_old_mean) < stop_thresh or\n completed_iterations == max_iter):\n break\n completed_iterations += 1\n return tuple(my_mean), len(points_within), completed_iterations" + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/estimate_bandwidth", + "name": "estimate_bandwidth", + "qname": "sklearn.cluster._mean_shift.estimate_bandwidth", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._mean_shift/estimate_bandwidth/X", + "name": "X", + "qname": "sklearn.cluster._mean_shift.estimate_bandwidth.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Input points." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/estimate_bandwidth/quantile", + "name": "quantile", + "qname": "sklearn.cluster._mean_shift.estimate_bandwidth.quantile", + "default_value": "0.3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.3", + "description": "should be between [0, 1]\n0.5 means that the median of all pairwise distances is used." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/estimate_bandwidth/n_samples", + "name": "n_samples", + "qname": "sklearn.cluster._mean_shift.estimate_bandwidth.n_samples", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of samples to use. If not given, all samples are used." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/estimate_bandwidth/random_state", + "name": "random_state", + "qname": "sklearn.cluster._mean_shift.estimate_bandwidth.random_state", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "The generator used to randomly select the samples from input points\nfor bandwidth estimation. Use an int to make the randomness\ndeterministic.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/estimate_bandwidth/n_jobs", + "name": "n_jobs", + "qname": "sklearn.cluster._mean_shift.estimate_bandwidth.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of parallel jobs to run for neighbors search.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate the bandwidth to use with the mean-shift algorithm.\n\nThat this function takes time at least quadratic in n_samples. For large\ndatasets, it's wise to set that parameter to a small value.", + "docstring": "Estimate the bandwidth to use with the mean-shift algorithm.\n\nThat this function takes time at least quadratic in n_samples. For large\ndatasets, it's wise to set that parameter to a small value.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input points.\n\nquantile : float, default=0.3\n should be between [0, 1]\n 0.5 means that the median of all pairwise distances is used.\n\nn_samples : int, default=None\n The number of samples to use. If not given, all samples are used.\n\nrandom_state : int, RandomState instance, default=None\n The generator used to randomly select the samples from input points\n for bandwidth estimation. Use an int to make the randomness\n deterministic.\n See :term:`Glossary `.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nReturns\n-------\nbandwidth : float\n The bandwidth parameter.", + "code": "@_deprecate_positional_args\ndef estimate_bandwidth(X, *, quantile=0.3, n_samples=None, random_state=0,\n n_jobs=None):\n \"\"\"Estimate the bandwidth to use with the mean-shift algorithm.\n\n That this function takes time at least quadratic in n_samples. For large\n datasets, it's wise to set that parameter to a small value.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Input points.\n\n quantile : float, default=0.3\n should be between [0, 1]\n 0.5 means that the median of all pairwise distances is used.\n\n n_samples : int, default=None\n The number of samples to use. If not given, all samples are used.\n\n random_state : int, RandomState instance, default=None\n The generator used to randomly select the samples from input points\n for bandwidth estimation. Use an int to make the randomness\n deterministic.\n See :term:`Glossary `.\n\n n_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n Returns\n -------\n bandwidth : float\n The bandwidth parameter.\n \"\"\"\n X = check_array(X)\n\n random_state = check_random_state(random_state)\n if n_samples is not None:\n idx = random_state.permutation(X.shape[0])[:n_samples]\n X = X[idx]\n n_neighbors = int(X.shape[0] * quantile)\n if n_neighbors < 1: # cannot fit NearestNeighbors with n_neighbors = 0\n n_neighbors = 1\n nbrs = NearestNeighbors(n_neighbors=n_neighbors,\n n_jobs=n_jobs)\n nbrs.fit(X)\n\n bandwidth = 0.\n for batch in gen_batches(len(X), 500):\n d, _ = nbrs.kneighbors(X[batch, :], return_distance=True)\n bandwidth += np.max(d, axis=1).sum()\n\n return bandwidth / X.shape[0]" + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/get_bin_seeds", + "name": "get_bin_seeds", + "qname": "sklearn.cluster._mean_shift.get_bin_seeds", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._mean_shift/get_bin_seeds/X", + "name": "X", + "qname": "sklearn.cluster._mean_shift.get_bin_seeds.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Input points, the same points that will be used in mean_shift." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/get_bin_seeds/bin_size", + "name": "bin_size", + "qname": "sklearn.cluster._mean_shift.get_bin_seeds.bin_size", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Controls the coarseness of the binning. Smaller values lead\nto more seeding (which is computationally more expensive). If you're\nnot sure how to set this, set it to the value of the bandwidth used\nin clustering.mean_shift." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/get_bin_seeds/min_bin_freq", + "name": "min_bin_freq", + "qname": "sklearn.cluster._mean_shift.get_bin_seeds.min_bin_freq", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "Only bins with at least min_bin_freq will be selected as seeds.\nRaising this value decreases the number of seeds found, which\nmakes mean_shift computationally cheaper." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Finds seeds for mean_shift.\n\nFinds seeds by first binning data onto a grid whose lines are\nspaced bin_size apart, and then choosing those bins with at least\nmin_bin_freq points.", + "docstring": "Finds seeds for mean_shift.\n\nFinds seeds by first binning data onto a grid whose lines are\nspaced bin_size apart, and then choosing those bins with at least\nmin_bin_freq points.\n\nParameters\n----------\n\nX : array-like of shape (n_samples, n_features)\n Input points, the same points that will be used in mean_shift.\n\nbin_size : float\n Controls the coarseness of the binning. Smaller values lead\n to more seeding (which is computationally more expensive). If you're\n not sure how to set this, set it to the value of the bandwidth used\n in clustering.mean_shift.\n\nmin_bin_freq : int, default=1\n Only bins with at least min_bin_freq will be selected as seeds.\n Raising this value decreases the number of seeds found, which\n makes mean_shift computationally cheaper.\n\nReturns\n-------\nbin_seeds : array-like of shape (n_samples, n_features)\n Points used as initial kernel positions in clustering.mean_shift.", + "code": "def get_bin_seeds(X, bin_size, min_bin_freq=1):\n \"\"\"Finds seeds for mean_shift.\n\n Finds seeds by first binning data onto a grid whose lines are\n spaced bin_size apart, and then choosing those bins with at least\n min_bin_freq points.\n\n Parameters\n ----------\n\n X : array-like of shape (n_samples, n_features)\n Input points, the same points that will be used in mean_shift.\n\n bin_size : float\n Controls the coarseness of the binning. Smaller values lead\n to more seeding (which is computationally more expensive). If you're\n not sure how to set this, set it to the value of the bandwidth used\n in clustering.mean_shift.\n\n min_bin_freq : int, default=1\n Only bins with at least min_bin_freq will be selected as seeds.\n Raising this value decreases the number of seeds found, which\n makes mean_shift computationally cheaper.\n\n Returns\n -------\n bin_seeds : array-like of shape (n_samples, n_features)\n Points used as initial kernel positions in clustering.mean_shift.\n \"\"\"\n if bin_size == 0:\n return X\n\n # Bin points\n bin_sizes = defaultdict(int)\n for point in X:\n binned_point = np.round(point / bin_size)\n bin_sizes[tuple(binned_point)] += 1\n\n # Select only those bins as seeds which have enough members\n bin_seeds = np.array([point for point, freq in bin_sizes.items() if\n freq >= min_bin_freq], dtype=np.float32)\n if len(bin_seeds) == len(X):\n warnings.warn(\"Binning data failed with provided bin_size=%f,\"\n \" using data points as seeds.\" % bin_size)\n return X\n bin_seeds = bin_seeds * bin_size\n return bin_seeds" + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/mean_shift", + "name": "mean_shift", + "qname": "sklearn.cluster._mean_shift.mean_shift", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._mean_shift/mean_shift/X", + "name": "X", + "qname": "sklearn.cluster._mean_shift.mean_shift.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/mean_shift/bandwidth", + "name": "bandwidth", + "qname": "sklearn.cluster._mean_shift.mean_shift.bandwidth", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Kernel bandwidth.\n\nIf bandwidth is not given, it is determined using a heuristic based on\nthe median of all pairwise distances. This will take quadratic time in\nthe number of samples. The sklearn.cluster.estimate_bandwidth function\ncan be used to do this more efficiently." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/mean_shift/seeds", + "name": "seeds", + "qname": "sklearn.cluster._mean_shift.mean_shift.seeds", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_seeds, n_features) or None", + "default_value": "", + "description": "Point used as initial kernel locations. If None and bin_seeding=False,\neach data point is used as a seed. If None and bin_seeding=True,\nsee bin_seeding." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_seeds, n_features)" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/mean_shift/bin_seeding", + "name": "bin_seeding", + "qname": "sklearn.cluster._mean_shift.mean_shift.bin_seeding", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If true, initial kernel locations are not locations of all\npoints, but rather the location of the discretized version of\npoints, where points are binned onto a grid whose coarseness\ncorresponds to the bandwidth. Setting this option to True will speed\nup the algorithm because fewer seeds will be initialized.\nIgnored if seeds argument is not None." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/mean_shift/min_bin_freq", + "name": "min_bin_freq", + "qname": "sklearn.cluster._mean_shift.mean_shift.min_bin_freq", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "To speed up the algorithm, accept only those bins with at least\nmin_bin_freq points as seeds." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/mean_shift/cluster_all", + "name": "cluster_all", + "qname": "sklearn.cluster._mean_shift.mean_shift.cluster_all", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If true, then all points are clustered, even those orphans that are\nnot within any kernel. Orphans are assigned to the nearest kernel.\nIf false, then orphans are given cluster label -1." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/mean_shift/max_iter", + "name": "max_iter", + "qname": "sklearn.cluster._mean_shift.mean_shift.max_iter", + "default_value": "300", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "300", + "description": "Maximum number of iterations, per seed point before the clustering\noperation terminates (for that seed point), if has not converged yet." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._mean_shift/mean_shift/n_jobs", + "name": "n_jobs", + "qname": "sklearn.cluster._mean_shift.mean_shift.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to use for the computation. This works by computing\neach of the n_init runs in parallel.\n\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details.\n\n.. versionadded:: 0.17\n Parallel Execution using *n_jobs*." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform mean shift clustering of data using a flat kernel.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Perform mean shift clustering of data using a flat kernel.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nbandwidth : float, default=None\n Kernel bandwidth.\n\n If bandwidth is not given, it is determined using a heuristic based on\n the median of all pairwise distances. This will take quadratic time in\n the number of samples. The sklearn.cluster.estimate_bandwidth function\n can be used to do this more efficiently.\n\nseeds : array-like of shape (n_seeds, n_features) or None\n Point used as initial kernel locations. If None and bin_seeding=False,\n each data point is used as a seed. If None and bin_seeding=True,\n see bin_seeding.\n\nbin_seeding : bool, default=False\n If true, initial kernel locations are not locations of all\n points, but rather the location of the discretized version of\n points, where points are binned onto a grid whose coarseness\n corresponds to the bandwidth. Setting this option to True will speed\n up the algorithm because fewer seeds will be initialized.\n Ignored if seeds argument is not None.\n\nmin_bin_freq : int, default=1\n To speed up the algorithm, accept only those bins with at least\n min_bin_freq points as seeds.\n\ncluster_all : bool, default=True\n If true, then all points are clustered, even those orphans that are\n not within any kernel. Orphans are assigned to the nearest kernel.\n If false, then orphans are given cluster label -1.\n\nmax_iter : int, default=300\n Maximum number of iterations, per seed point before the clustering\n operation terminates (for that seed point), if has not converged yet.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by computing\n each of the n_init runs in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.17\n Parallel Execution using *n_jobs*.\n\nReturns\n-------\n\ncluster_centers : ndarray of shape (n_clusters, n_features)\n Coordinates of cluster centers.\n\nlabels : ndarray of shape (n_samples,)\n Cluster labels for each point.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_mean_shift.py\n`.", + "code": "@_deprecate_positional_args\ndef mean_shift(X, *, bandwidth=None, seeds=None, bin_seeding=False,\n min_bin_freq=1, cluster_all=True, max_iter=300,\n n_jobs=None):\n \"\"\"Perform mean shift clustering of data using a flat kernel.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n\n X : array-like of shape (n_samples, n_features)\n Input data.\n\n bandwidth : float, default=None\n Kernel bandwidth.\n\n If bandwidth is not given, it is determined using a heuristic based on\n the median of all pairwise distances. This will take quadratic time in\n the number of samples. The sklearn.cluster.estimate_bandwidth function\n can be used to do this more efficiently.\n\n seeds : array-like of shape (n_seeds, n_features) or None\n Point used as initial kernel locations. If None and bin_seeding=False,\n each data point is used as a seed. If None and bin_seeding=True,\n see bin_seeding.\n\n bin_seeding : bool, default=False\n If true, initial kernel locations are not locations of all\n points, but rather the location of the discretized version of\n points, where points are binned onto a grid whose coarseness\n corresponds to the bandwidth. Setting this option to True will speed\n up the algorithm because fewer seeds will be initialized.\n Ignored if seeds argument is not None.\n\n min_bin_freq : int, default=1\n To speed up the algorithm, accept only those bins with at least\n min_bin_freq points as seeds.\n\n cluster_all : bool, default=True\n If true, then all points are clustered, even those orphans that are\n not within any kernel. Orphans are assigned to the nearest kernel.\n If false, then orphans are given cluster label -1.\n\n max_iter : int, default=300\n Maximum number of iterations, per seed point before the clustering\n operation terminates (for that seed point), if has not converged yet.\n\n n_jobs : int, default=None\n The number of jobs to use for the computation. This works by computing\n each of the n_init runs in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.17\n Parallel Execution using *n_jobs*.\n\n Returns\n -------\n\n cluster_centers : ndarray of shape (n_clusters, n_features)\n Coordinates of cluster centers.\n\n labels : ndarray of shape (n_samples,)\n Cluster labels for each point.\n\n Notes\n -----\n For an example, see :ref:`examples/cluster/plot_mean_shift.py\n `.\n\n \"\"\"\n model = MeanShift(bandwidth=bandwidth, seeds=seeds,\n min_bin_freq=min_bin_freq,\n bin_seeding=bin_seeding,\n cluster_all=cluster_all, n_jobs=n_jobs,\n max_iter=max_iter).fit(X)\n return model.cluster_centers_, model.labels_" + }, + { + "id": "scikit-learn/sklearn.cluster._optics/OPTICS/__init__", + "name": "__init__", + "qname": "sklearn.cluster._optics.OPTICS.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._optics/OPTICS/__init__/self", + "name": "self", + "qname": "sklearn.cluster._optics.OPTICS.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._optics/OPTICS/__init__/min_samples", + "name": "min_samples", + "qname": "sklearn.cluster._optics.OPTICS.__init__.min_samples", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int > 1 or float between 0 and 1", + "default_value": "5", + "description": "The number of samples in a neighborhood for a point to be considered as\na core point. Also, up and down steep regions can't have more than\n``min_samples`` consecutive non-steep points. Expressed as an absolute\nnumber or a fraction of the number of samples (rounded to be at least\n2)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int > 1" + }, + { + "kind": "NamedType", + "name": "float between 0 and 1" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/OPTICS/__init__/max_eps", + "name": "max_eps", + "qname": "sklearn.cluster._optics.OPTICS.__init__.max_eps", + "default_value": "np.inf", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "np.inf", + "description": "The maximum distance between two samples for one to be considered as\nin the neighborhood of the other. Default value of ``np.inf`` will\nidentify clusters across all scales; reducing ``max_eps`` will result\nin shorter run times." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/OPTICS/__init__/metric", + "name": "metric", + "qname": "sklearn.cluster._optics.OPTICS.__init__.metric", + "default_value": "'minkowski'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "'minkowski'", + "description": "Metric to use for distance computation. Any metric from scikit-learn\nor scipy.spatial.distance can be used.\n\nIf metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays as input and return one value indicating the\ndistance between them. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string. If metric is\n\"precomputed\", X is assumed to be a distance matrix and must be square.\n\nValid values for metric are:\n\n- from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n- from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\nSee the documentation for scipy.spatial.distance for details on these\nmetrics." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/OPTICS/__init__/p", + "name": "p", + "qname": "sklearn.cluster._optics.OPTICS.__init__.p", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Parameter for the Minkowski metric from\n:class:`~sklearn.metrics.pairwise_distances`. When p = 1, this is\nequivalent to using manhattan_distance (l1), and euclidean_distance\n(l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/OPTICS/__init__/metric_params", + "name": "metric_params", + "qname": "sklearn.cluster._optics.OPTICS.__init__.metric_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Additional keyword arguments for the metric function." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/OPTICS/__init__/cluster_method", + "name": "cluster_method", + "qname": "sklearn.cluster._optics.OPTICS.__init__.cluster_method", + "default_value": "'xi'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "'xi'", + "description": "The extraction method used to extract clusters using the calculated\nreachability and ordering. Possible values are \"xi\" and \"dbscan\"." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/OPTICS/__init__/eps", + "name": "eps", + "qname": "sklearn.cluster._optics.OPTICS.__init__.eps", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "The maximum distance between two samples for one to be considered as\nin the neighborhood of the other. By default it assumes the same value\nas ``max_eps``.\nUsed only when ``cluster_method='dbscan'``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/OPTICS/__init__/xi", + "name": "xi", + "qname": "sklearn.cluster._optics.OPTICS.__init__.xi", + "default_value": "0.05", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float between 0 and 1", + "default_value": "0.05", + "description": "Determines the minimum steepness on the reachability plot that\nconstitutes a cluster boundary. For example, an upwards point in the\nreachability plot is defined by the ratio from one point to its\nsuccessor being at most 1-xi.\nUsed only when ``cluster_method='xi'``." + }, + "type": { + "kind": "NamedType", + "name": "float between 0 and 1" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/OPTICS/__init__/predecessor_correction", + "name": "predecessor_correction", + "qname": "sklearn.cluster._optics.OPTICS.__init__.predecessor_correction", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Correct clusters according to the predecessors calculated by OPTICS\n[2]_. This parameter has minimal effect on most datasets.\nUsed only when ``cluster_method='xi'``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/OPTICS/__init__/min_cluster_size", + "name": "min_cluster_size", + "qname": "sklearn.cluster._optics.OPTICS.__init__.min_cluster_size", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int > 1 or float between 0 and 1", + "default_value": "None", + "description": "Minimum number of samples in an OPTICS cluster, expressed as an\nabsolute number or a fraction of the number of samples (rounded to be\nat least 2). If ``None``, the value of ``min_samples`` is used instead.\nUsed only when ``cluster_method='xi'``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int > 1" + }, + { + "kind": "NamedType", + "name": "float between 0 and 1" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/OPTICS/__init__/algorithm", + "name": "algorithm", + "qname": "sklearn.cluster._optics.OPTICS.__init__.algorithm", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'ball_tree', 'kd_tree', 'brute'}", + "default_value": "'auto'", + "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method. (default)\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force." + }, + "type": { + "kind": "EnumType", + "values": ["kd_tree", "auto", "brute", "ball_tree"] + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/OPTICS/__init__/leaf_size", + "name": "leaf_size", + "qname": "sklearn.cluster._optics.OPTICS.__init__.leaf_size", + "default_value": "30", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "30", + "description": "Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can\naffect the speed of the construction and query, as well as the memory\nrequired to store the tree. The optimal value depends on the\nnature of the problem." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/OPTICS/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.cluster._optics.OPTICS.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of parallel jobs to run for neighbors search.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate clustering structure from vector array.\n\nOPTICS (Ordering Points To Identify the Clustering Structure), closely\nrelated to DBSCAN, finds core sample of high density and expands clusters\nfrom them [1]_. Unlike DBSCAN, keeps cluster hierarchy for a variable\nneighborhood radius. Better suited for usage on large datasets than the\ncurrent sklearn implementation of DBSCAN.\n\nClusters are then extracted using a DBSCAN-like method\n(cluster_method = 'dbscan') or an automatic\ntechnique proposed in [1]_ (cluster_method = 'xi').\n\nThis implementation deviates from the original OPTICS by first performing\nk-nearest-neighborhood searches on all points to identify core sizes, then\ncomputing only the distances to unprocessed points when constructing the\ncluster order. Note that we do not employ a heap to manage the expansion\ncandidates, so the time complexity will be O(n^2).\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, min_samples=5, max_eps=np.inf, metric='minkowski',\n p=2, metric_params=None, cluster_method='xi', eps=None,\n xi=0.05, predecessor_correction=True, min_cluster_size=None,\n algorithm='auto', leaf_size=30, n_jobs=None):\n self.max_eps = max_eps\n self.min_samples = min_samples\n self.min_cluster_size = min_cluster_size\n self.algorithm = algorithm\n self.metric = metric\n self.metric_params = metric_params\n self.p = p\n self.leaf_size = leaf_size\n self.cluster_method = cluster_method\n self.eps = eps\n self.xi = xi\n self.predecessor_correction = predecessor_correction\n self.n_jobs = n_jobs" + }, + { + "id": "scikit-learn/sklearn.cluster._optics/OPTICS/fit", + "name": "fit", + "qname": "sklearn.cluster._optics.OPTICS.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._optics/OPTICS/fit/self", + "name": "self", + "qname": "sklearn.cluster._optics.OPTICS.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._optics/OPTICS/fit/X", + "name": "X", + "qname": "sklearn.cluster._optics.OPTICS.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features), or (n_samples, n_samples) if metric=\u2019precomputed\u2019", + "default_value": "", + "description": "A feature array, or array of distances between samples if\nmetric='precomputed'." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + }, + { + "kind": "NamedType", + "name": "(n_samples, n_samples) if metric=\u2019precomputed\u2019" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/OPTICS/fit/y", + "name": "y", + "qname": "sklearn.cluster._optics.OPTICS.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ignored", + "default_value": "", + "description": "Ignored." + }, + "type": { + "kind": "NamedType", + "name": "ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform OPTICS clustering.\n\nExtracts an ordered list of points and reachability distances, and\nperforms initial clustering using ``max_eps`` distance specified at\nOPTICS object instantiation.", + "docstring": "Perform OPTICS clustering.\n\nExtracts an ordered list of points and reachability distances, and\nperforms initial clustering using ``max_eps`` distance specified at\nOPTICS object instantiation.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features), or (n_samples, n_samples) if metric=\u2019precomputed\u2019\n A feature array, or array of distances between samples if\n metric='precomputed'.\n\ny : ignored\n Ignored.\n\nReturns\n-------\nself : instance of OPTICS\n The instance.", + "code": " def fit(self, X, y=None):\n \"\"\"Perform OPTICS clustering.\n\n Extracts an ordered list of points and reachability distances, and\n performs initial clustering using ``max_eps`` distance specified at\n OPTICS object instantiation.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features), or \\\n (n_samples, n_samples) if metric=\u2019precomputed\u2019\n A feature array, or array of distances between samples if\n metric='precomputed'.\n\n y : ignored\n Ignored.\n\n Returns\n -------\n self : instance of OPTICS\n The instance.\n \"\"\"\n X = self._validate_data(X, dtype=float)\n\n if self.cluster_method not in ['dbscan', 'xi']:\n raise ValueError(\"cluster_method should be one of\"\n \" 'dbscan' or 'xi' but is %s\" %\n self.cluster_method)\n\n (self.ordering_, self.core_distances_, self.reachability_,\n self.predecessor_) = compute_optics_graph(\n X=X, min_samples=self.min_samples, algorithm=self.algorithm,\n leaf_size=self.leaf_size, metric=self.metric,\n metric_params=self.metric_params, p=self.p, n_jobs=self.n_jobs,\n max_eps=self.max_eps)\n\n # Extract clusters from the calculated orders and reachability\n if self.cluster_method == 'xi':\n labels_, clusters_ = cluster_optics_xi(\n reachability=self.reachability_,\n predecessor=self.predecessor_,\n ordering=self.ordering_,\n min_samples=self.min_samples,\n min_cluster_size=self.min_cluster_size,\n xi=self.xi,\n predecessor_correction=self.predecessor_correction)\n self.cluster_hierarchy_ = clusters_\n elif self.cluster_method == 'dbscan':\n if self.eps is None:\n eps = self.max_eps\n else:\n eps = self.eps\n\n if eps > self.max_eps:\n raise ValueError('Specify an epsilon smaller than %s. Got %s.'\n % (self.max_eps, eps))\n\n labels_ = cluster_optics_dbscan(\n reachability=self.reachability_,\n core_distances=self.core_distances_,\n ordering=self.ordering_, eps=eps)\n\n self.labels_ = labels_\n return self" + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_compute_core_distances_", + "name": "_compute_core_distances_", + "qname": "sklearn.cluster._optics._compute_core_distances_", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._optics/_compute_core_distances_/X", + "name": "X", + "qname": "sklearn.cluster._optics._compute_core_distances_.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_compute_core_distances_/neighbors", + "name": "neighbors", + "qname": "sklearn.cluster._optics._compute_core_distances_.neighbors", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "NearestNeighbors instance", + "default_value": "", + "description": "The fitted nearest neighbors estimator." + }, + "type": { + "kind": "NamedType", + "name": "NearestNeighbors instance" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_compute_core_distances_/min_samples", + "name": "min_samples", + "qname": "sklearn.cluster._optics._compute_core_distances_.min_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_compute_core_distances_/working_memory", + "name": "working_memory", + "qname": "sklearn.cluster._optics._compute_core_distances_.working_memory", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The sought maximum memory for temporary distance matrix chunks.\nWhen None (default), the value of\n``sklearn.get_config()['working_memory']`` is used." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the k-th nearest neighbor of each sample\n\nEquivalent to neighbors.kneighbors(X, self.min_samples)[0][:, -1]\nbut with more memory efficiency.", + "docstring": "Compute the k-th nearest neighbor of each sample\n\nEquivalent to neighbors.kneighbors(X, self.min_samples)[0][:, -1]\nbut with more memory efficiency.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data.\nneighbors : NearestNeighbors instance\n The fitted nearest neighbors estimator.\nworking_memory : int, default=None\n The sought maximum memory for temporary distance matrix chunks.\n When None (default), the value of\n ``sklearn.get_config()['working_memory']`` is used.\n\nReturns\n-------\ncore_distances : ndarray of shape (n_samples,)\n Distance at which each sample becomes a core point.\n Points which will never be core have a distance of inf.", + "code": "def _compute_core_distances_(X, neighbors, min_samples, working_memory):\n \"\"\"Compute the k-th nearest neighbor of each sample\n\n Equivalent to neighbors.kneighbors(X, self.min_samples)[0][:, -1]\n but with more memory efficiency.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data.\n neighbors : NearestNeighbors instance\n The fitted nearest neighbors estimator.\n working_memory : int, default=None\n The sought maximum memory for temporary distance matrix chunks.\n When None (default), the value of\n ``sklearn.get_config()['working_memory']`` is used.\n\n Returns\n -------\n core_distances : ndarray of shape (n_samples,)\n Distance at which each sample becomes a core point.\n Points which will never be core have a distance of inf.\n \"\"\"\n n_samples = X.shape[0]\n core_distances = np.empty(n_samples)\n core_distances.fill(np.nan)\n\n chunk_n_rows = get_chunk_n_rows(row_bytes=16 * min_samples,\n max_n_rows=n_samples,\n working_memory=working_memory)\n slices = gen_batches(n_samples, chunk_n_rows)\n for sl in slices:\n core_distances[sl] = neighbors.kneighbors(\n X[sl], min_samples)[0][:, -1]\n return core_distances" + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_correct_predecessor", + "name": "_correct_predecessor", + "qname": "sklearn.cluster._optics._correct_predecessor", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._optics/_correct_predecessor/reachability_plot", + "name": "reachability_plot", + "qname": "sklearn.cluster._optics._correct_predecessor.reachability_plot", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_correct_predecessor/predecessor_plot", + "name": "predecessor_plot", + "qname": "sklearn.cluster._optics._correct_predecessor.predecessor_plot", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_correct_predecessor/ordering", + "name": "ordering", + "qname": "sklearn.cluster._optics._correct_predecessor.ordering", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_correct_predecessor/s", + "name": "s", + "qname": "sklearn.cluster._optics._correct_predecessor.s", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_correct_predecessor/e", + "name": "e", + "qname": "sklearn.cluster._optics._correct_predecessor.e", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Correct for predecessors.\n\nApplies Algorithm 2 of [1]_.\n\nInput parameters are ordered by the computer OPTICS ordering.\n\n.. [1] Schubert, Erich, Michael Gertz.\n \"Improving the Cluster Structure Extracted from OPTICS Plots.\" Proc. of\n the Conference \"Lernen, Wissen, Daten, Analysen\" (LWDA) (2018): 318-329.", + "docstring": "Correct for predecessors.\n\nApplies Algorithm 2 of [1]_.\n\nInput parameters are ordered by the computer OPTICS ordering.\n\n.. [1] Schubert, Erich, Michael Gertz.\n \"Improving the Cluster Structure Extracted from OPTICS Plots.\" Proc. of\n the Conference \"Lernen, Wissen, Daten, Analysen\" (LWDA) (2018): 318-329.", + "code": "def _correct_predecessor(reachability_plot, predecessor_plot, ordering, s, e):\n \"\"\"Correct for predecessors.\n\n Applies Algorithm 2 of [1]_.\n\n Input parameters are ordered by the computer OPTICS ordering.\n\n .. [1] Schubert, Erich, Michael Gertz.\n \"Improving the Cluster Structure Extracted from OPTICS Plots.\" Proc. of\n the Conference \"Lernen, Wissen, Daten, Analysen\" (LWDA) (2018): 318-329.\n \"\"\"\n while s < e:\n if reachability_plot[s] > reachability_plot[e]:\n return s, e\n p_e = ordering[predecessor_plot[e]]\n for i in range(s, e):\n if p_e == ordering[i]:\n return s, e\n e -= 1\n return None, None" + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_extend_region", + "name": "_extend_region", + "qname": "sklearn.cluster._optics._extend_region", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._optics/_extend_region/steep_point", + "name": "steep_point", + "qname": "sklearn.cluster._optics._extend_region.steep_point", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,), dtype=bool", + "default_value": "", + "description": "True if the point is steep downward (upward)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + }, + { + "kind": "NamedType", + "name": "dtype=bool" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_extend_region/xward_point", + "name": "xward_point", + "qname": "sklearn.cluster._optics._extend_region.xward_point", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,), dtype=bool", + "default_value": "", + "description": "True if the point is an upward (respectively downward) point." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + }, + { + "kind": "NamedType", + "name": "dtype=bool" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_extend_region/start", + "name": "start", + "qname": "sklearn.cluster._optics._extend_region.start", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The start of the xward region." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_extend_region/min_samples", + "name": "min_samples", + "qname": "sklearn.cluster._optics._extend_region.min_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The same as the min_samples given to OPTICS. Up and down steep\nregions can't have more then ``min_samples`` consecutive non-steep\npoints." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Extend the area until it's maximal.\n\nIt's the same function for both upward and downward reagions, depending on\nthe given input parameters. Assuming:\n\n - steep_{upward/downward}: bool array indicating whether a point is a\n steep {upward/downward};\n - upward/downward: bool array indicating whether a point is\n upward/downward;\n\nTo extend an upward reagion, ``steep_point=steep_upward`` and\n``xward_point=downward`` are expected, and to extend a downward region,\n``steep_point=steep_downward`` and ``xward_point=upward``.", + "docstring": "Extend the area until it's maximal.\n\nIt's the same function for both upward and downward reagions, depending on\nthe given input parameters. Assuming:\n\n - steep_{upward/downward}: bool array indicating whether a point is a\n steep {upward/downward};\n - upward/downward: bool array indicating whether a point is\n upward/downward;\n\nTo extend an upward reagion, ``steep_point=steep_upward`` and\n``xward_point=downward`` are expected, and to extend a downward region,\n``steep_point=steep_downward`` and ``xward_point=upward``.\n\nParameters\n----------\nsteep_point : ndarray of shape (n_samples,), dtype=bool\n True if the point is steep downward (upward).\n\nxward_point : ndarray of shape (n_samples,), dtype=bool\n True if the point is an upward (respectively downward) point.\n\nstart : int\n The start of the xward region.\n\nmin_samples : int\n The same as the min_samples given to OPTICS. Up and down steep\n regions can't have more then ``min_samples`` consecutive non-steep\n points.\n\nReturns\n-------\nindex : int\n The current index iterating over all the samples, i.e. where we are up\n to in our search.\n\nend : int\n The end of the region, which can be behind the index. The region\n includes the ``end`` index.", + "code": "def _extend_region(steep_point, xward_point, start, min_samples):\n \"\"\"Extend the area until it's maximal.\n\n It's the same function for both upward and downward reagions, depending on\n the given input parameters. Assuming:\n\n - steep_{upward/downward}: bool array indicating whether a point is a\n steep {upward/downward};\n - upward/downward: bool array indicating whether a point is\n upward/downward;\n\n To extend an upward reagion, ``steep_point=steep_upward`` and\n ``xward_point=downward`` are expected, and to extend a downward region,\n ``steep_point=steep_downward`` and ``xward_point=upward``.\n\n Parameters\n ----------\n steep_point : ndarray of shape (n_samples,), dtype=bool\n True if the point is steep downward (upward).\n\n xward_point : ndarray of shape (n_samples,), dtype=bool\n True if the point is an upward (respectively downward) point.\n\n start : int\n The start of the xward region.\n\n min_samples : int\n The same as the min_samples given to OPTICS. Up and down steep\n regions can't have more then ``min_samples`` consecutive non-steep\n points.\n\n Returns\n -------\n index : int\n The current index iterating over all the samples, i.e. where we are up\n to in our search.\n\n end : int\n The end of the region, which can be behind the index. The region\n includes the ``end`` index.\n \"\"\"\n n_samples = len(steep_point)\n non_xward_points = 0\n index = start\n end = start\n # find a maximal area\n while index < n_samples:\n if steep_point[index]:\n non_xward_points = 0\n end = index\n elif not xward_point[index]:\n # it's not a steep point, but still goes up.\n non_xward_points += 1\n # region should include no more than min_samples consecutive\n # non steep xward points.\n if non_xward_points > min_samples:\n break\n else:\n return end\n index += 1\n return end" + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_extract_xi_labels", + "name": "_extract_xi_labels", + "qname": "sklearn.cluster._optics._extract_xi_labels", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._optics/_extract_xi_labels/ordering", + "name": "ordering", + "qname": "sklearn.cluster._optics._extract_xi_labels.ordering", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The ordering of points calculated by OPTICS" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_extract_xi_labels/clusters", + "name": "clusters", + "qname": "sklearn.cluster._optics._extract_xi_labels.clusters", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_clusters, 2)", + "default_value": "", + "description": "List of clusters i.e. (start, end) tuples,\nas returned by `_xi_cluster`." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_clusters, 2)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Extracts the labels from the clusters returned by `_xi_cluster`.\nWe rely on the fact that clusters are stored\nwith the smaller clusters coming before the larger ones.", + "docstring": "Extracts the labels from the clusters returned by `_xi_cluster`.\nWe rely on the fact that clusters are stored\nwith the smaller clusters coming before the larger ones.\n\nParameters\n----------\nordering : array-like of shape (n_samples,)\n The ordering of points calculated by OPTICS\n\nclusters : array-like of shape (n_clusters, 2)\n List of clusters i.e. (start, end) tuples,\n as returned by `_xi_cluster`.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)", + "code": "def _extract_xi_labels(ordering, clusters):\n \"\"\"Extracts the labels from the clusters returned by `_xi_cluster`.\n We rely on the fact that clusters are stored\n with the smaller clusters coming before the larger ones.\n\n Parameters\n ----------\n ordering : array-like of shape (n_samples,)\n The ordering of points calculated by OPTICS\n\n clusters : array-like of shape (n_clusters, 2)\n List of clusters i.e. (start, end) tuples,\n as returned by `_xi_cluster`.\n\n Returns\n -------\n labels : ndarray of shape (n_samples,)\n \"\"\"\n\n labels = np.full(len(ordering), -1, dtype=int)\n label = 0\n for c in clusters:\n if not np.any(labels[c[0]:(c[1] + 1)] != -1):\n labels[c[0]:(c[1] + 1)] = label\n label += 1\n labels[ordering] = labels.copy()\n return labels" + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_set_reach_dist", + "name": "_set_reach_dist", + "qname": "sklearn.cluster._optics._set_reach_dist", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._optics/_set_reach_dist/core_distances_", + "name": "core_distances_", + "qname": "sklearn.cluster._optics._set_reach_dist.core_distances_", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_set_reach_dist/reachability_", + "name": "reachability_", + "qname": "sklearn.cluster._optics._set_reach_dist.reachability_", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_set_reach_dist/predecessor_", + "name": "predecessor_", + "qname": "sklearn.cluster._optics._set_reach_dist.predecessor_", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_set_reach_dist/point_index", + "name": "point_index", + "qname": "sklearn.cluster._optics._set_reach_dist.point_index", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_set_reach_dist/processed", + "name": "processed", + "qname": "sklearn.cluster._optics._set_reach_dist.processed", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_set_reach_dist/X", + "name": "X", + "qname": "sklearn.cluster._optics._set_reach_dist.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_set_reach_dist/nbrs", + "name": "nbrs", + "qname": "sklearn.cluster._optics._set_reach_dist.nbrs", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_set_reach_dist/metric", + "name": "metric", + "qname": "sklearn.cluster._optics._set_reach_dist.metric", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_set_reach_dist/metric_params", + "name": "metric_params", + "qname": "sklearn.cluster._optics._set_reach_dist.metric_params", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_set_reach_dist/p", + "name": "p", + "qname": "sklearn.cluster._optics._set_reach_dist.p", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_set_reach_dist/max_eps", + "name": "max_eps", + "qname": "sklearn.cluster._optics._set_reach_dist.max_eps", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _set_reach_dist(core_distances_, reachability_, predecessor_,\n point_index, processed, X, nbrs, metric, metric_params,\n p, max_eps):\n P = X[point_index:point_index + 1]\n # Assume that radius_neighbors is faster without distances\n # and we don't need all distances, nevertheless, this means\n # we may be doing some work twice.\n indices = nbrs.radius_neighbors(P, radius=max_eps,\n return_distance=False)[0]\n\n # Getting indices of neighbors that have not been processed\n unproc = np.compress(~np.take(processed, indices), indices)\n # Neighbors of current point are already processed.\n if not unproc.size:\n return\n\n # Only compute distances to unprocessed neighbors:\n if metric == 'precomputed':\n dists = X[point_index, unproc]\n else:\n _params = dict() if metric_params is None else metric_params.copy()\n if metric == 'minkowski' and 'p' not in _params:\n # the same logic as neighbors, p is ignored if explicitly set\n # in the dict params\n _params['p'] = p\n dists = pairwise_distances(P, np.take(X, unproc, axis=0),\n metric=metric, n_jobs=None,\n **_params).ravel()\n\n rdists = np.maximum(dists, core_distances_[point_index])\n np.around(rdists, decimals=np.finfo(rdists.dtype).precision, out=rdists)\n improved = np.where(rdists < np.take(reachability_, unproc))\n reachability_[unproc[improved]] = rdists[improved]\n predecessor_[unproc[improved]] = point_index" + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_update_filter_sdas", + "name": "_update_filter_sdas", + "qname": "sklearn.cluster._optics._update_filter_sdas", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._optics/_update_filter_sdas/sdas", + "name": "sdas", + "qname": "sklearn.cluster._optics._update_filter_sdas.sdas", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_update_filter_sdas/mib", + "name": "mib", + "qname": "sklearn.cluster._optics._update_filter_sdas.mib", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_update_filter_sdas/xi_complement", + "name": "xi_complement", + "qname": "sklearn.cluster._optics._update_filter_sdas.xi_complement", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_update_filter_sdas/reachability_plot", + "name": "reachability_plot", + "qname": "sklearn.cluster._optics._update_filter_sdas.reachability_plot", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Update steep down areas (SDAs) using the new maximum in between (mib)\nvalue, and the given complement of xi, i.e. ``1 - xi``.", + "docstring": "Update steep down areas (SDAs) using the new maximum in between (mib)\nvalue, and the given complement of xi, i.e. ``1 - xi``.", + "code": "def _update_filter_sdas(sdas, mib, xi_complement, reachability_plot):\n \"\"\"Update steep down areas (SDAs) using the new maximum in between (mib)\n value, and the given complement of xi, i.e. ``1 - xi``.\n \"\"\"\n if np.isinf(mib):\n return []\n res = [sda for sda in sdas\n if mib <= reachability_plot[sda['start']] * xi_complement]\n for sda in res:\n sda['mib'] = max(sda['mib'], mib)\n return res" + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_validate_size", + "name": "_validate_size", + "qname": "sklearn.cluster._optics._validate_size", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._optics/_validate_size/size", + "name": "size", + "qname": "sklearn.cluster._optics._validate_size.size", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_validate_size/n_samples", + "name": "n_samples", + "qname": "sklearn.cluster._optics._validate_size.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_validate_size/param_name", + "name": "param_name", + "qname": "sklearn.cluster._optics._validate_size.param_name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _validate_size(size, n_samples, param_name):\n if size <= 0 or (size !=\n int(size)\n and size > 1):\n raise ValueError('%s must be a positive integer '\n 'or a float between 0 and 1. Got %r' %\n (param_name, size))\n elif size > n_samples:\n raise ValueError('%s must be no greater than the'\n ' number of samples (%d). Got %d' %\n (param_name, n_samples, size))" + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_xi_cluster", + "name": "_xi_cluster", + "qname": "sklearn.cluster._optics._xi_cluster", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._optics/_xi_cluster/reachability_plot", + "name": "reachability_plot", + "qname": "sklearn.cluster._optics._xi_cluster.reachability_plot", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The reachability plot, i.e. reachability ordered according to\nthe calculated ordering, all computed by OPTICS." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_xi_cluster/predecessor_plot", + "name": "predecessor_plot", + "qname": "sklearn.cluster._optics._xi_cluster.predecessor_plot", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Predecessors ordered according to the calculated ordering." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_xi_cluster/ordering", + "name": "ordering", + "qname": "sklearn.cluster._optics._xi_cluster.ordering", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_xi_cluster/xi", + "name": "xi", + "qname": "sklearn.cluster._optics._xi_cluster.xi", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float, between 0 and 1", + "default_value": "", + "description": "Determines the minimum steepness on the reachability plot that\nconstitutes a cluster boundary. For example, an upwards point in the\nreachability plot is defined by the ratio from one point to its\nsuccessor being at most 1-xi." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "between 0 and 1" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_xi_cluster/min_samples", + "name": "min_samples", + "qname": "sklearn.cluster._optics._xi_cluster.min_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int > 1", + "default_value": "", + "description": "The same as the min_samples given to OPTICS. Up and down steep regions\ncan't have more then ``min_samples`` consecutive non-steep points." + }, + "type": { + "kind": "NamedType", + "name": "int > 1" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_xi_cluster/min_cluster_size", + "name": "min_cluster_size", + "qname": "sklearn.cluster._optics._xi_cluster.min_cluster_size", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int > 1", + "default_value": "", + "description": "Minimum number of samples in an OPTICS cluster." + }, + "type": { + "kind": "NamedType", + "name": "int > 1" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/_xi_cluster/predecessor_correction", + "name": "predecessor_correction", + "qname": "sklearn.cluster._optics._xi_cluster.predecessor_correction", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "Correct clusters based on the calculated predecessors." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Automatically extract clusters according to the Xi-steep method.\n\nThis is rouphly an implementation of Figure 19 of the OPTICS paper.", + "docstring": "Automatically extract clusters according to the Xi-steep method.\n\nThis is rouphly an implementation of Figure 19 of the OPTICS paper.\n\nParameters\n----------\nreachability_plot : array-like of shape (n_samples,)\n The reachability plot, i.e. reachability ordered according to\n the calculated ordering, all computed by OPTICS.\n\npredecessor_plot : array-like of shape (n_samples,)\n Predecessors ordered according to the calculated ordering.\n\nxi : float, between 0 and 1\n Determines the minimum steepness on the reachability plot that\n constitutes a cluster boundary. For example, an upwards point in the\n reachability plot is defined by the ratio from one point to its\n successor being at most 1-xi.\n\nmin_samples : int > 1\n The same as the min_samples given to OPTICS. Up and down steep regions\n can't have more then ``min_samples`` consecutive non-steep points.\n\nmin_cluster_size : int > 1\n Minimum number of samples in an OPTICS cluster.\n\npredecessor_correction : bool\n Correct clusters based on the calculated predecessors.\n\nReturns\n-------\nclusters : ndarray of shape (n_clusters, 2)\n The list of clusters in the form of [start, end] in each row, with all\n indices inclusive. The clusters are ordered in a way that larger\n clusters encompassing smaller clusters come after those smaller\n clusters.", + "code": "def _xi_cluster(reachability_plot, predecessor_plot, ordering, xi, min_samples,\n min_cluster_size, predecessor_correction):\n \"\"\"Automatically extract clusters according to the Xi-steep method.\n\n This is rouphly an implementation of Figure 19 of the OPTICS paper.\n\n Parameters\n ----------\n reachability_plot : array-like of shape (n_samples,)\n The reachability plot, i.e. reachability ordered according to\n the calculated ordering, all computed by OPTICS.\n\n predecessor_plot : array-like of shape (n_samples,)\n Predecessors ordered according to the calculated ordering.\n\n xi : float, between 0 and 1\n Determines the minimum steepness on the reachability plot that\n constitutes a cluster boundary. For example, an upwards point in the\n reachability plot is defined by the ratio from one point to its\n successor being at most 1-xi.\n\n min_samples : int > 1\n The same as the min_samples given to OPTICS. Up and down steep regions\n can't have more then ``min_samples`` consecutive non-steep points.\n\n min_cluster_size : int > 1\n Minimum number of samples in an OPTICS cluster.\n\n predecessor_correction : bool\n Correct clusters based on the calculated predecessors.\n\n Returns\n -------\n clusters : ndarray of shape (n_clusters, 2)\n The list of clusters in the form of [start, end] in each row, with all\n indices inclusive. The clusters are ordered in a way that larger\n clusters encompassing smaller clusters come after those smaller\n clusters.\n \"\"\"\n\n # Our implementation adds an inf to the end of reachability plot\n # this helps to find potential clusters at the end of the\n # reachability plot even if there's no upward region at the end of it.\n reachability_plot = np.hstack((reachability_plot, np.inf))\n\n xi_complement = 1 - xi\n sdas = [] # steep down areas, introduced in section 4.3.2 of the paper\n clusters = []\n index = 0\n mib = 0. # maximum in between, section 4.3.2\n\n # Our implementation corrects a mistake in the original\n # paper, i.e., in Definition 9 steep downward point,\n # r(p) * (1 - x1) <= r(p + 1) should be\n # r(p) * (1 - x1) >= r(p + 1)\n with np.errstate(invalid='ignore'):\n ratio = reachability_plot[:-1] / reachability_plot[1:]\n steep_upward = ratio <= xi_complement\n steep_downward = ratio >= 1 / xi_complement\n downward = ratio > 1\n upward = ratio < 1\n\n # the following loop is is almost exactly as Figure 19 of the paper.\n # it jumps over the areas which are not either steep down or up areas\n for steep_index in iter(np.flatnonzero(steep_upward | steep_downward)):\n # just continue if steep_index has been a part of a discovered xward\n # area.\n if steep_index < index:\n continue\n\n mib = max(mib, np.max(reachability_plot[index:steep_index + 1]))\n\n # steep downward areas\n if steep_downward[steep_index]:\n sdas = _update_filter_sdas(sdas, mib, xi_complement,\n reachability_plot)\n D_start = steep_index\n D_end = _extend_region(steep_downward, upward,\n D_start, min_samples)\n D = {'start': D_start, 'end': D_end, 'mib': 0.}\n sdas.append(D)\n index = D_end + 1\n mib = reachability_plot[index]\n\n # steep upward areas\n else:\n sdas = _update_filter_sdas(sdas, mib, xi_complement,\n reachability_plot)\n U_start = steep_index\n U_end = _extend_region(steep_upward, downward, U_start,\n min_samples)\n index = U_end + 1\n mib = reachability_plot[index]\n\n U_clusters = []\n for D in sdas:\n c_start = D['start']\n c_end = U_end\n\n # line (**), sc2*\n if reachability_plot[c_end + 1] * xi_complement < D['mib']:\n continue\n\n # Definition 11: criterion 4\n D_max = reachability_plot[D['start']]\n if D_max * xi_complement >= reachability_plot[c_end + 1]:\n # Find the first index from the left side which is almost\n # at the same level as the end of the detected cluster.\n while (reachability_plot[c_start + 1] >\n reachability_plot[c_end + 1]\n and c_start < D['end']):\n c_start += 1\n elif reachability_plot[c_end + 1] * xi_complement >= D_max:\n # Find the first index from the right side which is almost\n # at the same level as the beginning of the detected\n # cluster.\n # Our implementation corrects a mistake in the original\n # paper, i.e., in Definition 11 4c, r(x) < r(sD) should be\n # r(x) > r(sD).\n while (reachability_plot[c_end - 1] > D_max\n and c_end > U_start):\n c_end -= 1\n\n # predecessor correction\n if predecessor_correction:\n c_start, c_end = _correct_predecessor(reachability_plot,\n predecessor_plot,\n ordering,\n c_start,\n c_end)\n if c_start is None:\n continue\n\n # Definition 11: criterion 3.a\n if c_end - c_start + 1 < min_cluster_size:\n continue\n\n # Definition 11: criterion 1\n if c_start > D['end']:\n continue\n\n # Definition 11: criterion 2\n if c_end < U_start:\n continue\n\n U_clusters.append((c_start, c_end))\n\n # add smaller clusters first.\n U_clusters.reverse()\n clusters.extend(U_clusters)\n\n return np.array(clusters)" + }, + { + "id": "scikit-learn/sklearn.cluster._optics/cluster_optics_dbscan", + "name": "cluster_optics_dbscan", + "qname": "sklearn.cluster._optics.cluster_optics_dbscan", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._optics/cluster_optics_dbscan/reachability", + "name": "reachability", + "qname": "sklearn.cluster._optics.cluster_optics_dbscan.reachability", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples,)", + "default_value": "", + "description": "Reachability distances calculated by OPTICS (``reachability_``)" + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/cluster_optics_dbscan/core_distances", + "name": "core_distances", + "qname": "sklearn.cluster._optics.cluster_optics_dbscan.core_distances", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples,)", + "default_value": "", + "description": "Distances at which points become core (``core_distances_``)" + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/cluster_optics_dbscan/ordering", + "name": "ordering", + "qname": "sklearn.cluster._optics.cluster_optics_dbscan.ordering", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples,)", + "default_value": "", + "description": "OPTICS ordered point indices (``ordering_``)" + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/cluster_optics_dbscan/eps", + "name": "eps", + "qname": "sklearn.cluster._optics.cluster_optics_dbscan.eps", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "DBSCAN ``eps`` parameter. Must be set to < ``max_eps``. Results\nwill be close to DBSCAN algorithm if ``eps`` and ``max_eps`` are close\nto one another." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Performs DBSCAN extraction for an arbitrary epsilon.\n\nExtracting the clusters runs in linear time. Note that this results in\n``labels_`` which are close to a :class:`~sklearn.cluster.DBSCAN` with\nsimilar settings and ``eps``, only if ``eps`` is close to ``max_eps``.", + "docstring": "Performs DBSCAN extraction for an arbitrary epsilon.\n\nExtracting the clusters runs in linear time. Note that this results in\n``labels_`` which are close to a :class:`~sklearn.cluster.DBSCAN` with\nsimilar settings and ``eps``, only if ``eps`` is close to ``max_eps``.\n\nParameters\n----------\nreachability : array of shape (n_samples,)\n Reachability distances calculated by OPTICS (``reachability_``)\n\ncore_distances : array of shape (n_samples,)\n Distances at which points become core (``core_distances_``)\n\nordering : array of shape (n_samples,)\n OPTICS ordered point indices (``ordering_``)\n\neps : float\n DBSCAN ``eps`` parameter. Must be set to < ``max_eps``. Results\n will be close to DBSCAN algorithm if ``eps`` and ``max_eps`` are close\n to one another.\n\nReturns\n-------\nlabels_ : array of shape (n_samples,)\n The estimated labels.", + "code": "@_deprecate_positional_args\ndef cluster_optics_dbscan(*, reachability, core_distances, ordering, eps):\n \"\"\"Performs DBSCAN extraction for an arbitrary epsilon.\n\n Extracting the clusters runs in linear time. Note that this results in\n ``labels_`` which are close to a :class:`~sklearn.cluster.DBSCAN` with\n similar settings and ``eps``, only if ``eps`` is close to ``max_eps``.\n\n Parameters\n ----------\n reachability : array of shape (n_samples,)\n Reachability distances calculated by OPTICS (``reachability_``)\n\n core_distances : array of shape (n_samples,)\n Distances at which points become core (``core_distances_``)\n\n ordering : array of shape (n_samples,)\n OPTICS ordered point indices (``ordering_``)\n\n eps : float\n DBSCAN ``eps`` parameter. Must be set to < ``max_eps``. Results\n will be close to DBSCAN algorithm if ``eps`` and ``max_eps`` are close\n to one another.\n\n Returns\n -------\n labels_ : array of shape (n_samples,)\n The estimated labels.\n\n \"\"\"\n n_samples = len(core_distances)\n labels = np.zeros(n_samples, dtype=int)\n\n far_reach = reachability > eps\n near_core = core_distances <= eps\n labels[ordering] = np.cumsum(far_reach[ordering] & near_core[ordering]) - 1\n labels[far_reach & ~near_core] = -1\n return labels" + }, + { + "id": "scikit-learn/sklearn.cluster._optics/cluster_optics_xi", + "name": "cluster_optics_xi", + "qname": "sklearn.cluster._optics.cluster_optics_xi", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._optics/cluster_optics_xi/reachability", + "name": "reachability", + "qname": "sklearn.cluster._optics.cluster_optics_xi.reachability", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Reachability distances calculated by OPTICS (`reachability_`)" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/cluster_optics_xi/predecessor", + "name": "predecessor", + "qname": "sklearn.cluster._optics.cluster_optics_xi.predecessor", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Predecessors calculated by OPTICS." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/cluster_optics_xi/ordering", + "name": "ordering", + "qname": "sklearn.cluster._optics.cluster_optics_xi.ordering", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "OPTICS ordered point indices (`ordering_`)" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/cluster_optics_xi/min_samples", + "name": "min_samples", + "qname": "sklearn.cluster._optics.cluster_optics_xi.min_samples", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int > 1 or float between 0 and 1", + "default_value": "", + "description": "The same as the min_samples given to OPTICS. Up and down steep regions\ncan't have more then ``min_samples`` consecutive non-steep points.\nExpressed as an absolute number or a fraction of the number of samples\n(rounded to be at least 2)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int > 1" + }, + { + "kind": "NamedType", + "name": "float between 0 and 1" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/cluster_optics_xi/min_cluster_size", + "name": "min_cluster_size", + "qname": "sklearn.cluster._optics.cluster_optics_xi.min_cluster_size", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int > 1 or float between 0 and 1", + "default_value": "None", + "description": "Minimum number of samples in an OPTICS cluster, expressed as an\nabsolute number or a fraction of the number of samples (rounded to be\nat least 2). If ``None``, the value of ``min_samples`` is used instead." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int > 1" + }, + { + "kind": "NamedType", + "name": "float between 0 and 1" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/cluster_optics_xi/xi", + "name": "xi", + "qname": "sklearn.cluster._optics.cluster_optics_xi.xi", + "default_value": "0.05", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float between 0 and 1", + "default_value": "0.05", + "description": "Determines the minimum steepness on the reachability plot that\nconstitutes a cluster boundary. For example, an upwards point in the\nreachability plot is defined by the ratio from one point to its\nsuccessor being at most 1-xi." + }, + "type": { + "kind": "NamedType", + "name": "float between 0 and 1" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/cluster_optics_xi/predecessor_correction", + "name": "predecessor_correction", + "qname": "sklearn.cluster._optics.cluster_optics_xi.predecessor_correction", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Correct clusters based on the calculated predecessors." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Automatically extract clusters according to the Xi-steep method.", + "docstring": "Automatically extract clusters according to the Xi-steep method.\n\nParameters\n----------\nreachability : ndarray of shape (n_samples,)\n Reachability distances calculated by OPTICS (`reachability_`)\n\npredecessor : ndarray of shape (n_samples,)\n Predecessors calculated by OPTICS.\n\nordering : ndarray of shape (n_samples,)\n OPTICS ordered point indices (`ordering_`)\n\nmin_samples : int > 1 or float between 0 and 1\n The same as the min_samples given to OPTICS. Up and down steep regions\n can't have more then ``min_samples`` consecutive non-steep points.\n Expressed as an absolute number or a fraction of the number of samples\n (rounded to be at least 2).\n\nmin_cluster_size : int > 1 or float between 0 and 1, default=None\n Minimum number of samples in an OPTICS cluster, expressed as an\n absolute number or a fraction of the number of samples (rounded to be\n at least 2). If ``None``, the value of ``min_samples`` is used instead.\n\nxi : float between 0 and 1, default=0.05\n Determines the minimum steepness on the reachability plot that\n constitutes a cluster boundary. For example, an upwards point in the\n reachability plot is defined by the ratio from one point to its\n successor being at most 1-xi.\n\npredecessor_correction : bool, default=True\n Correct clusters based on the calculated predecessors.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n The labels assigned to samples. Points which are not included\n in any cluster are labeled as -1.\n\nclusters : ndarray of shape (n_clusters, 2)\n The list of clusters in the form of ``[start, end]`` in each row, with\n all indices inclusive. The clusters are ordered according to ``(end,\n -start)`` (ascending) so that larger clusters encompassing smaller\n clusters come after such nested smaller clusters. Since ``labels`` does\n not reflect the hierarchy, usually ``len(clusters) >\n np.unique(labels)``.", + "code": "def cluster_optics_xi(*, reachability, predecessor, ordering, min_samples,\n min_cluster_size=None, xi=0.05,\n predecessor_correction=True):\n \"\"\"Automatically extract clusters according to the Xi-steep method.\n\n Parameters\n ----------\n reachability : ndarray of shape (n_samples,)\n Reachability distances calculated by OPTICS (`reachability_`)\n\n predecessor : ndarray of shape (n_samples,)\n Predecessors calculated by OPTICS.\n\n ordering : ndarray of shape (n_samples,)\n OPTICS ordered point indices (`ordering_`)\n\n min_samples : int > 1 or float between 0 and 1\n The same as the min_samples given to OPTICS. Up and down steep regions\n can't have more then ``min_samples`` consecutive non-steep points.\n Expressed as an absolute number or a fraction of the number of samples\n (rounded to be at least 2).\n\n min_cluster_size : int > 1 or float between 0 and 1, default=None\n Minimum number of samples in an OPTICS cluster, expressed as an\n absolute number or a fraction of the number of samples (rounded to be\n at least 2). If ``None``, the value of ``min_samples`` is used instead.\n\n xi : float between 0 and 1, default=0.05\n Determines the minimum steepness on the reachability plot that\n constitutes a cluster boundary. For example, an upwards point in the\n reachability plot is defined by the ratio from one point to its\n successor being at most 1-xi.\n\n predecessor_correction : bool, default=True\n Correct clusters based on the calculated predecessors.\n\n Returns\n -------\n labels : ndarray of shape (n_samples,)\n The labels assigned to samples. Points which are not included\n in any cluster are labeled as -1.\n\n clusters : ndarray of shape (n_clusters, 2)\n The list of clusters in the form of ``[start, end]`` in each row, with\n all indices inclusive. The clusters are ordered according to ``(end,\n -start)`` (ascending) so that larger clusters encompassing smaller\n clusters come after such nested smaller clusters. Since ``labels`` does\n not reflect the hierarchy, usually ``len(clusters) >\n np.unique(labels)``.\n \"\"\"\n n_samples = len(reachability)\n _validate_size(min_samples, n_samples, 'min_samples')\n if min_samples <= 1:\n min_samples = max(2, int(min_samples * n_samples))\n if min_cluster_size is None:\n min_cluster_size = min_samples\n _validate_size(min_cluster_size, n_samples, 'min_cluster_size')\n if min_cluster_size <= 1:\n min_cluster_size = max(2, int(min_cluster_size * n_samples))\n\n clusters = _xi_cluster(reachability[ordering], predecessor[ordering],\n ordering, xi,\n min_samples, min_cluster_size,\n predecessor_correction)\n labels = _extract_xi_labels(ordering, clusters)\n return labels, clusters" + }, + { + "id": "scikit-learn/sklearn.cluster._optics/compute_optics_graph", + "name": "compute_optics_graph", + "qname": "sklearn.cluster._optics.compute_optics_graph", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._optics/compute_optics_graph/X", + "name": "X", + "qname": "sklearn.cluster._optics.compute_optics_graph.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features), or (n_samples, n_samples) if metric=\u2019precomputed\u2019.", + "default_value": "", + "description": "A feature array, or array of distances between samples if\nmetric='precomputed'" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + }, + { + "kind": "NamedType", + "name": "(n_samples, n_samples) if metric=\u2019precomputed\u2019." + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/compute_optics_graph/min_samples", + "name": "min_samples", + "qname": "sklearn.cluster._optics.compute_optics_graph.min_samples", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int > 1 or float between 0 and 1", + "default_value": "", + "description": "The number of samples in a neighborhood for a point to be considered\nas a core point. Expressed as an absolute number or a fraction of the\nnumber of samples (rounded to be at least 2)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int > 1" + }, + { + "kind": "NamedType", + "name": "float between 0 and 1" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/compute_optics_graph/max_eps", + "name": "max_eps", + "qname": "sklearn.cluster._optics.compute_optics_graph.max_eps", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "np.inf", + "description": "The maximum distance between two samples for one to be considered as\nin the neighborhood of the other. Default value of ``np.inf`` will\nidentify clusters across all scales; reducing ``max_eps`` will result\nin shorter run times." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/compute_optics_graph/metric", + "name": "metric", + "qname": "sklearn.cluster._optics.compute_optics_graph.metric", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "'minkowski'", + "description": "Metric to use for distance computation. Any metric from scikit-learn\nor scipy.spatial.distance can be used.\n\nIf metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays as input and return one value indicating the\ndistance between them. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string. If metric is\n\"precomputed\", X is assumed to be a distance matrix and must be square.\n\nValid values for metric are:\n\n- from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n- from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\nSee the documentation for scipy.spatial.distance for details on these\nmetrics." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/compute_optics_graph/p", + "name": "p", + "qname": "sklearn.cluster._optics.compute_optics_graph.p", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Parameter for the Minkowski metric from\n:class:`~sklearn.metrics.pairwise_distances`. When p = 1, this is\nequivalent to using manhattan_distance (l1), and euclidean_distance\n(l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/compute_optics_graph/metric_params", + "name": "metric_params", + "qname": "sklearn.cluster._optics.compute_optics_graph.metric_params", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Additional keyword arguments for the metric function." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/compute_optics_graph/algorithm", + "name": "algorithm", + "qname": "sklearn.cluster._optics.compute_optics_graph.algorithm", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'ball_tree', 'kd_tree', 'brute'}", + "default_value": "'auto'", + "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method. (default)\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force." + }, + "type": { + "kind": "EnumType", + "values": ["kd_tree", "auto", "brute", "ball_tree"] + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/compute_optics_graph/leaf_size", + "name": "leaf_size", + "qname": "sklearn.cluster._optics.compute_optics_graph.leaf_size", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "30", + "description": "Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can\naffect the speed of the construction and query, as well as the memory\nrequired to store the tree. The optimal value depends on the\nnature of the problem." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._optics/compute_optics_graph/n_jobs", + "name": "n_jobs", + "qname": "sklearn.cluster._optics.compute_optics_graph.n_jobs", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of parallel jobs to run for neighbors search.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes the OPTICS reachability graph.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Computes the OPTICS reachability graph.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features), or (n_samples, n_samples) if metric=\u2019precomputed\u2019.\n A feature array, or array of distances between samples if\n metric='precomputed'\n\nmin_samples : int > 1 or float between 0 and 1\n The number of samples in a neighborhood for a point to be considered\n as a core point. Expressed as an absolute number or a fraction of the\n number of samples (rounded to be at least 2).\n\nmax_eps : float, default=np.inf\n The maximum distance between two samples for one to be considered as\n in the neighborhood of the other. Default value of ``np.inf`` will\n identify clusters across all scales; reducing ``max_eps`` will result\n in shorter run times.\n\nmetric : str or callable, default='minkowski'\n Metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string. If metric is\n \"precomputed\", X is assumed to be a distance matrix and must be square.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\np : int, default=2\n Parameter for the Minkowski metric from\n :class:`~sklearn.metrics.pairwise_distances`. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method. (default)\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can\n affect the speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nReturns\n-------\nordering_ : array of shape (n_samples,)\n The cluster ordered list of sample indices.\n\ncore_distances_ : array of shape (n_samples,)\n Distance at which each sample becomes a core point, indexed by object\n order. Points which will never be core have a distance of inf. Use\n ``clust.core_distances_[clust.ordering_]`` to access in cluster order.\n\nreachability_ : array of shape (n_samples,)\n Reachability distances per sample, indexed by object order. Use\n ``clust.reachability_[clust.ordering_]`` to access in cluster order.\n\npredecessor_ : array of shape (n_samples,)\n Point that a sample was reached from, indexed by object order.\n Seed points have a predecessor of -1.\n\nReferences\n----------\n.. [1] Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel,\n and J\u00f6rg Sander. \"OPTICS: ordering points to identify the clustering\n structure.\" ACM SIGMOD Record 28, no. 2 (1999): 49-60.", + "code": "@_deprecate_positional_args\ndef compute_optics_graph(X, *, min_samples, max_eps, metric, p, metric_params,\n algorithm, leaf_size, n_jobs):\n \"\"\"Computes the OPTICS reachability graph.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features), or \\\n (n_samples, n_samples) if metric=\u2019precomputed\u2019.\n A feature array, or array of distances between samples if\n metric='precomputed'\n\n min_samples : int > 1 or float between 0 and 1\n The number of samples in a neighborhood for a point to be considered\n as a core point. Expressed as an absolute number or a fraction of the\n number of samples (rounded to be at least 2).\n\n max_eps : float, default=np.inf\n The maximum distance between two samples for one to be considered as\n in the neighborhood of the other. Default value of ``np.inf`` will\n identify clusters across all scales; reducing ``max_eps`` will result\n in shorter run times.\n\n metric : str or callable, default='minkowski'\n Metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string. If metric is\n \"precomputed\", X is assumed to be a distance matrix and must be square.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\n p : int, default=2\n Parameter for the Minkowski metric from\n :class:`~sklearn.metrics.pairwise_distances`. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n metric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\n algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method. (default)\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\n leaf_size : int, default=30\n Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can\n affect the speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\n n_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n Returns\n -------\n ordering_ : array of shape (n_samples,)\n The cluster ordered list of sample indices.\n\n core_distances_ : array of shape (n_samples,)\n Distance at which each sample becomes a core point, indexed by object\n order. Points which will never be core have a distance of inf. Use\n ``clust.core_distances_[clust.ordering_]`` to access in cluster order.\n\n reachability_ : array of shape (n_samples,)\n Reachability distances per sample, indexed by object order. Use\n ``clust.reachability_[clust.ordering_]`` to access in cluster order.\n\n predecessor_ : array of shape (n_samples,)\n Point that a sample was reached from, indexed by object order.\n Seed points have a predecessor of -1.\n\n References\n ----------\n .. [1] Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel,\n and J\u00f6rg Sander. \"OPTICS: ordering points to identify the clustering\n structure.\" ACM SIGMOD Record 28, no. 2 (1999): 49-60.\n \"\"\"\n n_samples = X.shape[0]\n _validate_size(min_samples, n_samples, 'min_samples')\n if min_samples <= 1:\n min_samples = max(2, int(min_samples * n_samples))\n\n # Start all points as 'unprocessed' ##\n reachability_ = np.empty(n_samples)\n reachability_.fill(np.inf)\n predecessor_ = np.empty(n_samples, dtype=int)\n predecessor_.fill(-1)\n\n nbrs = NearestNeighbors(n_neighbors=min_samples,\n algorithm=algorithm,\n leaf_size=leaf_size,\n metric=metric,\n metric_params=metric_params,\n p=p,\n n_jobs=n_jobs)\n\n nbrs.fit(X)\n # Here we first do a kNN query for each point, this differs from\n # the original OPTICS that only used epsilon range queries.\n # TODO: handle working_memory somehow?\n core_distances_ = _compute_core_distances_(X=X, neighbors=nbrs,\n min_samples=min_samples,\n working_memory=None)\n # OPTICS puts an upper limit on these, use inf for undefined.\n core_distances_[core_distances_ > max_eps] = np.inf\n np.around(core_distances_,\n decimals=np.finfo(core_distances_.dtype).precision,\n out=core_distances_)\n\n # Main OPTICS loop. Not parallelizable. The order that entries are\n # written to the 'ordering_' list is important!\n # Note that this implementation is O(n^2) theoretically, but\n # supposedly with very low constant factors.\n processed = np.zeros(X.shape[0], dtype=bool)\n ordering = np.zeros(X.shape[0], dtype=int)\n for ordering_idx in range(X.shape[0]):\n # Choose next based on smallest reachability distance\n # (And prefer smaller ids on ties, possibly np.inf!)\n index = np.where(processed == 0)[0]\n point = index[np.argmin(reachability_[index])]\n\n processed[point] = True\n ordering[ordering_idx] = point\n if core_distances_[point] != np.inf:\n _set_reach_dist(core_distances_=core_distances_,\n reachability_=reachability_,\n predecessor_=predecessor_,\n point_index=point,\n processed=processed, X=X, nbrs=nbrs,\n metric=metric, metric_params=metric_params,\n p=p, max_eps=max_eps)\n if np.all(np.isinf(reachability_)):\n warnings.warn(\"All reachability values are inf. Set a larger\"\n \" max_eps or all data will be considered outliers.\",\n UserWarning)\n return ordering, core_distances_, reachability_, predecessor_" + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/__init__", + "name": "__init__", + "qname": "sklearn.cluster._spectral.SpectralClustering.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/__init__/self", + "name": "self", + "qname": "sklearn.cluster._spectral.SpectralClustering.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/__init__/n_clusters", + "name": "n_clusters", + "qname": "sklearn.cluster._spectral.SpectralClustering.__init__.n_clusters", + "default_value": "8", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "8", + "description": "The dimension of the projection subspace." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/__init__/eigen_solver", + "name": "eigen_solver", + "qname": "sklearn.cluster._spectral.SpectralClustering.__init__.eigen_solver", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'arpack', 'lobpcg', 'amg'}", + "default_value": "None", + "description": "The eigenvalue decomposition strategy to use. AMG requires pyamg\nto be installed. It can be faster on very large, sparse problems,\nbut may also lead to instabilities. If None, then ``'arpack'`` is\nused." + }, + "type": { + "kind": "EnumType", + "values": ["lobpcg", "arpack", "amg"] + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/__init__/n_components", + "name": "n_components", + "qname": "sklearn.cluster._spectral.SpectralClustering.__init__.n_components", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "n_clusters", + "description": "Number of eigenvectors to use for the spectral embedding" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/__init__/random_state", + "name": "random_state", + "qname": "sklearn.cluster._spectral.SpectralClustering.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "A pseudo random number generator used for the initialization of the\nlobpcg eigenvectors decomposition when ``eigen_solver='amg'`` and by\nthe K-Means initialization. Use an int to make the randomness\ndeterministic.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/__init__/n_init", + "name": "n_init", + "qname": "sklearn.cluster._spectral.SpectralClustering.__init__.n_init", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Number of time the k-means algorithm will be run with different\ncentroid seeds. The final results will be the best output of n_init\nconsecutive runs in terms of inertia. Only used if\n``assign_labels='kmeans'``." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/__init__/gamma", + "name": "gamma", + "qname": "sklearn.cluster._spectral.SpectralClustering.__init__.gamma", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Kernel coefficient for rbf, poly, sigmoid, laplacian and chi2 kernels.\nIgnored for ``affinity='nearest_neighbors'``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/__init__/affinity", + "name": "affinity", + "qname": "sklearn.cluster._spectral.SpectralClustering.__init__.affinity", + "default_value": "'rbf'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "'rbf'", + "description": "How to construct the affinity matrix.\n - 'nearest_neighbors': construct the affinity matrix by computing a\n graph of nearest neighbors.\n - 'rbf': construct the affinity matrix using a radial basis function\n (RBF) kernel.\n - 'precomputed': interpret ``X`` as a precomputed affinity matrix,\n where larger values indicate greater similarity between instances.\n - 'precomputed_nearest_neighbors': interpret ``X`` as a sparse graph\n of precomputed distances, and construct a binary affinity matrix\n from the ``n_neighbors`` nearest neighbors of each instance.\n - one of the kernels supported by\n :func:`~sklearn.metrics.pairwise_kernels`.\n\nOnly kernels that produce similarity scores (non-negative values that\nincrease with similarity) should be used. This property is not checked\nby the clustering algorithm." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/__init__/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.cluster._spectral.SpectralClustering.__init__.n_neighbors", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Number of neighbors to use when constructing the affinity matrix using\nthe nearest neighbors method. Ignored for ``affinity='rbf'``." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/__init__/eigen_tol", + "name": "eigen_tol", + "qname": "sklearn.cluster._spectral.SpectralClustering.__init__.eigen_tol", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "Stopping criterion for eigendecomposition of the Laplacian matrix\nwhen ``eigen_solver='arpack'``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/__init__/assign_labels", + "name": "assign_labels", + "qname": "sklearn.cluster._spectral.SpectralClustering.__init__.assign_labels", + "default_value": "'kmeans'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'kmeans', 'discretize'}", + "default_value": "'kmeans'", + "description": "The strategy for assigning labels in the embedding space. There are two\nways to assign labels after the Laplacian embedding. k-means is a\npopular choice, but it can be sensitive to initialization.\nDiscretization is another approach which is less sensitive to random\ninitialization." + }, + "type": { + "kind": "EnumType", + "values": ["kmeans", "discretize"] + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/__init__/degree", + "name": "degree", + "qname": "sklearn.cluster._spectral.SpectralClustering.__init__.degree", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "3", + "description": "Degree of the polynomial kernel. Ignored by other kernels." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/__init__/coef0", + "name": "coef0", + "qname": "sklearn.cluster._spectral.SpectralClustering.__init__.coef0", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1", + "description": "Zero coefficient for polynomial and sigmoid kernels.\nIgnored by other kernels." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/__init__/kernel_params", + "name": "kernel_params", + "qname": "sklearn.cluster._spectral.SpectralClustering.__init__.kernel_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict of str to any", + "default_value": "None", + "description": "Parameters (keyword arguments) and values for kernel passed as\ncallable object. Ignored by other kernels." + }, + "type": { + "kind": "NamedType", + "name": "dict of str to any" + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.cluster._spectral.SpectralClustering.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of parallel jobs to run when `affinity='nearest_neighbors'`\nor `affinity='precomputed_nearest_neighbors'`. The neighbors search\nwill be done in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/__init__/verbose", + "name": "verbose", + "qname": "sklearn.cluster._spectral.SpectralClustering.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Verbosity mode.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Apply clustering to a projection of the normalized Laplacian.\n\nIn practice Spectral Clustering is very useful when the structure of\nthe individual clusters is highly non-convex, or more generally when\na measure of the center and spread of the cluster is not a suitable\ndescription of the complete cluster, such as when clusters are\nnested circles on the 2D plane.\n\nIf the affinity matrix is the adjacency matrix of a graph, this method\ncan be used to find normalized graph cuts.\n\nWhen calling ``fit``, an affinity matrix is constructed using either\na kernel function such the Gaussian (aka RBF) kernel with Euclidean\ndistance ``d(X, X)``::\n\n np.exp(-gamma * d(X,X) ** 2)\n\nor a k-nearest neighbors connectivity matrix.\n\nAlternatively, a user-provided affinity matrix can be specified by\nsetting ``affinity='precomputed'``.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_clusters=8, *, eigen_solver=None, n_components=None,\n random_state=None, n_init=10, gamma=1., affinity='rbf',\n n_neighbors=10, eigen_tol=0.0, assign_labels='kmeans',\n degree=3, coef0=1, kernel_params=None, n_jobs=None,\n verbose=False):\n self.n_clusters = n_clusters\n self.eigen_solver = eigen_solver\n self.n_components = n_components\n self.random_state = random_state\n self.n_init = n_init\n self.gamma = gamma\n self.affinity = affinity\n self.n_neighbors = n_neighbors\n self.eigen_tol = eigen_tol\n self.assign_labels = assign_labels\n self.degree = degree\n self.coef0 = coef0\n self.kernel_params = kernel_params\n self.n_jobs = n_jobs\n self.verbose = verbose" + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/_more_tags", + "name": "_more_tags", + "qname": "sklearn.cluster._spectral.SpectralClustering._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/_more_tags/self", + "name": "self", + "qname": "sklearn.cluster._spectral.SpectralClustering._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'pairwise': self.affinity in [\"precomputed\",\n \"precomputed_nearest_neighbors\"]}" + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/_pairwise@getter", + "name": "_pairwise", + "qname": "sklearn.cluster._spectral.SpectralClustering._pairwise", + "decorators": [ + "deprecated('Attribute _pairwise was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/_pairwise/self", + "name": "self", + "qname": "sklearn.cluster._spectral.SpectralClustering._pairwise.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n return self.affinity in [\"precomputed\",\n \"precomputed_nearest_neighbors\"]" + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/fit", + "name": "fit", + "qname": "sklearn.cluster._spectral.SpectralClustering.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/fit/self", + "name": "self", + "qname": "sklearn.cluster._spectral.SpectralClustering.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/fit/X", + "name": "X", + "qname": "sklearn.cluster._spectral.SpectralClustering.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples)", + "default_value": "", + "description": "Training instances to cluster, similarities / affinities between\ninstances if ``affinity='precomputed'``, or distances between\ninstances if ``affinity='precomputed_nearest_neighbors``. If a\nsparse matrix is provided in a format other than ``csr_matrix``,\n``csc_matrix``, or ``coo_matrix``, it will be converted into a\nsparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features) or (n_samples, n_samples)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/fit/y", + "name": "y", + "qname": "sklearn.cluster._spectral.SpectralClustering.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present here for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform spectral clustering from features, or affinity matrix.", + "docstring": "Perform spectral clustering from features, or affinity matrix.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples)\n Training instances to cluster, similarities / affinities between\n instances if ``affinity='precomputed'``, or distances between\n instances if ``affinity='precomputed_nearest_neighbors``. If a\n sparse matrix is provided in a format other than ``csr_matrix``,\n ``csc_matrix``, or ``coo_matrix``, it will be converted into a\n sparse ``csr_matrix``.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nself", + "code": " def fit(self, X, y=None):\n \"\"\"Perform spectral clustering from features, or affinity matrix.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n (n_samples, n_samples)\n Training instances to cluster, similarities / affinities between\n instances if ``affinity='precomputed'``, or distances between\n instances if ``affinity='precomputed_nearest_neighbors``. If a\n sparse matrix is provided in a format other than ``csr_matrix``,\n ``csc_matrix``, or ``coo_matrix``, it will be converted into a\n sparse ``csr_matrix``.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n Returns\n -------\n self\n\n \"\"\"\n X = self._validate_data(X, accept_sparse=['csr', 'csc', 'coo'],\n dtype=np.float64, ensure_min_samples=2)\n allow_squared = self.affinity in [\"precomputed\",\n \"precomputed_nearest_neighbors\"]\n if X.shape[0] == X.shape[1] and not allow_squared:\n warnings.warn(\"The spectral clustering API has changed. ``fit``\"\n \"now constructs an affinity matrix from data. To use\"\n \" a custom affinity matrix, \"\n \"set ``affinity=precomputed``.\")\n\n if self.affinity == 'nearest_neighbors':\n connectivity = kneighbors_graph(X, n_neighbors=self.n_neighbors,\n include_self=True,\n n_jobs=self.n_jobs)\n self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)\n elif self.affinity == 'precomputed_nearest_neighbors':\n estimator = NearestNeighbors(n_neighbors=self.n_neighbors,\n n_jobs=self.n_jobs,\n metric=\"precomputed\").fit(X)\n connectivity = estimator.kneighbors_graph(X=X, mode='connectivity')\n self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)\n elif self.affinity == 'precomputed':\n self.affinity_matrix_ = X\n else:\n params = self.kernel_params\n if params is None:\n params = {}\n if not callable(self.affinity):\n params['gamma'] = self.gamma\n params['degree'] = self.degree\n params['coef0'] = self.coef0\n self.affinity_matrix_ = pairwise_kernels(X, metric=self.affinity,\n filter_params=True,\n **params)\n\n random_state = check_random_state(self.random_state)\n self.labels_ = spectral_clustering(self.affinity_matrix_,\n n_clusters=self.n_clusters,\n n_components=self.n_components,\n eigen_solver=self.eigen_solver,\n random_state=random_state,\n n_init=self.n_init,\n eigen_tol=self.eigen_tol,\n assign_labels=self.assign_labels,\n verbose=self.verbose)\n return self" + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/fit_predict", + "name": "fit_predict", + "qname": "sklearn.cluster._spectral.SpectralClustering.fit_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/fit_predict/self", + "name": "self", + "qname": "sklearn.cluster._spectral.SpectralClustering.fit_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/fit_predict/X", + "name": "X", + "qname": "sklearn.cluster._spectral.SpectralClustering.fit_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples)", + "default_value": "", + "description": "Training instances to cluster, similarities / affinities between\ninstances if ``affinity='precomputed'``, or distances between\ninstances if ``affinity='precomputed_nearest_neighbors``. If a\nsparse matrix is provided in a format other than ``csr_matrix``,\n``csc_matrix``, or ``coo_matrix``, it will be converted into a\nsparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features) or (n_samples, n_samples)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/SpectralClustering/fit_predict/y", + "name": "y", + "qname": "sklearn.cluster._spectral.SpectralClustering.fit_predict.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present here for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform spectral clustering from features, or affinity matrix,\nand return cluster labels.", + "docstring": "Perform spectral clustering from features, or affinity matrix,\nand return cluster labels.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples)\n Training instances to cluster, similarities / affinities between\n instances if ``affinity='precomputed'``, or distances between\n instances if ``affinity='precomputed_nearest_neighbors``. If a\n sparse matrix is provided in a format other than ``csr_matrix``,\n ``csc_matrix``, or ``coo_matrix``, it will be converted into a\n sparse ``csr_matrix``.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Cluster labels.", + "code": " def fit_predict(self, X, y=None):\n \"\"\"Perform spectral clustering from features, or affinity matrix,\n and return cluster labels.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n (n_samples, n_samples)\n Training instances to cluster, similarities / affinities between\n instances if ``affinity='precomputed'``, or distances between\n instances if ``affinity='precomputed_nearest_neighbors``. If a\n sparse matrix is provided in a format other than ``csr_matrix``,\n ``csc_matrix``, or ``coo_matrix``, it will be converted into a\n sparse ``csr_matrix``.\n\n y : Ignored\n Not used, present here for API consistency by convention.\n\n Returns\n -------\n labels : ndarray of shape (n_samples,)\n Cluster labels.\n \"\"\"\n return super().fit_predict(X, y)" + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/discretize", + "name": "discretize", + "qname": "sklearn.cluster._spectral.discretize", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._spectral/discretize/vectors", + "name": "vectors", + "qname": "sklearn.cluster._spectral.discretize.vectors", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_clusters)", + "default_value": "", + "description": "The embedding space of the samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_clusters)" + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/discretize/copy", + "name": "copy", + "qname": "sklearn.cluster._spectral.discretize.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to copy vectors, or perform in-place normalization." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/discretize/max_svd_restarts", + "name": "max_svd_restarts", + "qname": "sklearn.cluster._spectral.discretize.max_svd_restarts", + "default_value": "30", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "30", + "description": "Maximum number of attempts to restart SVD if convergence fails" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/discretize/n_iter_max", + "name": "n_iter_max", + "qname": "sklearn.cluster._spectral.discretize.n_iter_max", + "default_value": "20", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "30", + "description": "Maximum number of iterations to attempt in rotation and partition\nmatrix search if machine precision convergence is not reached" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/discretize/random_state", + "name": "random_state", + "qname": "sklearn.cluster._spectral.discretize.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "Determines random number generation for rotation matrix initialization.\nUse an int to make the randomness deterministic.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Search for a partition matrix (clustering) which is closest to the\neigenvector embedding.", + "docstring": "Search for a partition matrix (clustering) which is closest to the\neigenvector embedding.\n\nParameters\n----------\nvectors : array-like of shape (n_samples, n_clusters)\n The embedding space of the samples.\n\ncopy : bool, default=True\n Whether to copy vectors, or perform in-place normalization.\n\nmax_svd_restarts : int, default=30\n Maximum number of attempts to restart SVD if convergence fails\n\nn_iter_max : int, default=30\n Maximum number of iterations to attempt in rotation and partition\n matrix search if machine precision convergence is not reached\n\nrandom_state : int, RandomState instance, default=None\n Determines random number generation for rotation matrix initialization.\n Use an int to make the randomness deterministic.\n See :term:`Glossary `.\n\nReturns\n-------\nlabels : array of integers, shape: n_samples\n The labels of the clusters.\n\nReferences\n----------\n\n- Multiclass spectral clustering, 2003\n Stella X. Yu, Jianbo Shi\n https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf\n\nNotes\n-----\n\nThe eigenvector embedding is used to iteratively search for the\nclosest discrete partition. First, the eigenvector embedding is\nnormalized to the space of partition matrices. An optimal discrete\npartition matrix closest to this normalized embedding multiplied by\nan initial rotation is calculated. Fixing this discrete partition\nmatrix, an optimal rotation matrix is calculated. These two\ncalculations are performed until convergence. The discrete partition\nmatrix is returned as the clustering solution. Used in spectral\nclustering, this method tends to be faster and more robust to random\ninitialization than k-means.", + "code": "@_deprecate_positional_args\ndef discretize(vectors, *, copy=True, max_svd_restarts=30, n_iter_max=20,\n random_state=None):\n \"\"\"Search for a partition matrix (clustering) which is closest to the\n eigenvector embedding.\n\n Parameters\n ----------\n vectors : array-like of shape (n_samples, n_clusters)\n The embedding space of the samples.\n\n copy : bool, default=True\n Whether to copy vectors, or perform in-place normalization.\n\n max_svd_restarts : int, default=30\n Maximum number of attempts to restart SVD if convergence fails\n\n n_iter_max : int, default=30\n Maximum number of iterations to attempt in rotation and partition\n matrix search if machine precision convergence is not reached\n\n random_state : int, RandomState instance, default=None\n Determines random number generation for rotation matrix initialization.\n Use an int to make the randomness deterministic.\n See :term:`Glossary `.\n\n Returns\n -------\n labels : array of integers, shape: n_samples\n The labels of the clusters.\n\n References\n ----------\n\n - Multiclass spectral clustering, 2003\n Stella X. Yu, Jianbo Shi\n https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf\n\n Notes\n -----\n\n The eigenvector embedding is used to iteratively search for the\n closest discrete partition. First, the eigenvector embedding is\n normalized to the space of partition matrices. An optimal discrete\n partition matrix closest to this normalized embedding multiplied by\n an initial rotation is calculated. Fixing this discrete partition\n matrix, an optimal rotation matrix is calculated. These two\n calculations are performed until convergence. The discrete partition\n matrix is returned as the clustering solution. Used in spectral\n clustering, this method tends to be faster and more robust to random\n initialization than k-means.\n\n \"\"\"\n\n from scipy.sparse import csc_matrix\n from scipy.linalg import LinAlgError\n\n random_state = check_random_state(random_state)\n\n vectors = as_float_array(vectors, copy=copy)\n\n eps = np.finfo(float).eps\n n_samples, n_components = vectors.shape\n\n # Normalize the eigenvectors to an equal length of a vector of ones.\n # Reorient the eigenvectors to point in the negative direction with respect\n # to the first element. This may have to do with constraining the\n # eigenvectors to lie in a specific quadrant to make the discretization\n # search easier.\n norm_ones = np.sqrt(n_samples)\n for i in range(vectors.shape[1]):\n vectors[:, i] = (vectors[:, i] / np.linalg.norm(vectors[:, i])) \\\n * norm_ones\n if vectors[0, i] != 0:\n vectors[:, i] = -1 * vectors[:, i] * np.sign(vectors[0, i])\n\n # Normalize the rows of the eigenvectors. Samples should lie on the unit\n # hypersphere centered at the origin. This transforms the samples in the\n # embedding space to the space of partition matrices.\n vectors = vectors / np.sqrt((vectors ** 2).sum(axis=1))[:, np.newaxis]\n\n svd_restarts = 0\n has_converged = False\n\n # If there is an exception we try to randomize and rerun SVD again\n # do this max_svd_restarts times.\n while (svd_restarts < max_svd_restarts) and not has_converged:\n\n # Initialize first column of rotation matrix with a row of the\n # eigenvectors\n rotation = np.zeros((n_components, n_components))\n rotation[:, 0] = vectors[random_state.randint(n_samples), :].T\n\n # To initialize the rest of the rotation matrix, find the rows\n # of the eigenvectors that are as orthogonal to each other as\n # possible\n c = np.zeros(n_samples)\n for j in range(1, n_components):\n # Accumulate c to ensure row is as orthogonal as possible to\n # previous picks as well as current one\n c += np.abs(np.dot(vectors, rotation[:, j - 1]))\n rotation[:, j] = vectors[c.argmin(), :].T\n\n last_objective_value = 0.0\n n_iter = 0\n\n while not has_converged:\n n_iter += 1\n\n t_discrete = np.dot(vectors, rotation)\n\n labels = t_discrete.argmax(axis=1)\n vectors_discrete = csc_matrix(\n (np.ones(len(labels)), (np.arange(0, n_samples), labels)),\n shape=(n_samples, n_components))\n\n t_svd = vectors_discrete.T * vectors\n\n try:\n U, S, Vh = np.linalg.svd(t_svd)\n svd_restarts += 1\n except LinAlgError:\n print(\"SVD did not converge, randomizing and trying again\")\n break\n\n ncut_value = 2.0 * (n_samples - S.sum())\n if ((abs(ncut_value - last_objective_value) < eps) or\n (n_iter > n_iter_max)):\n has_converged = True\n else:\n # otherwise calculate rotation and continue\n last_objective_value = ncut_value\n rotation = np.dot(Vh.T, U.T)\n\n if not has_converged:\n raise LinAlgError('SVD did not converge')\n return labels" + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/spectral_clustering", + "name": "spectral_clustering", + "qname": "sklearn.cluster._spectral.spectral_clustering", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster._spectral/spectral_clustering/affinity", + "name": "affinity", + "qname": "sklearn.cluster._spectral.spectral_clustering.affinity", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_samples)", + "default_value": "", + "description": "The affinity matrix describing the relationship of the samples to\nembed. **Must be symmetric**.\n\nPossible examples:\n - adjacency matrix of a graph,\n - heat kernel of the pairwise distance matrix of the samples,\n - symmetric k-nearest neighbours connectivity matrix of the samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_samples)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/spectral_clustering/n_clusters", + "name": "n_clusters", + "qname": "sklearn.cluster._spectral.spectral_clustering.n_clusters", + "default_value": "8", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of clusters to extract." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/spectral_clustering/n_components", + "name": "n_components", + "qname": "sklearn.cluster._spectral.spectral_clustering.n_components", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "n_clusters", + "description": "Number of eigenvectors to use for the spectral embedding" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/spectral_clustering/eigen_solver", + "name": "eigen_solver", + "qname": "sklearn.cluster._spectral.spectral_clustering.eigen_solver", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{None, 'arpack', 'lobpcg', or 'amg'}", + "default_value": "", + "description": "The eigenvalue decomposition strategy to use. AMG requires pyamg\nto be installed. It can be faster on very large, sparse problems,\nbut may also lead to instabilities. If None, then ``'arpack'`` is\nused." + }, + "type": { + "kind": "EnumType", + "values": ["lobpcg", "arpack", "amg"] + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/spectral_clustering/random_state", + "name": "random_state", + "qname": "sklearn.cluster._spectral.spectral_clustering.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "A pseudo random number generator used for the initialization of the\nlobpcg eigenvectors decomposition when eigen_solver == 'amg' and by\nthe K-Means initialization. Use an int to make the randomness\ndeterministic.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/spectral_clustering/n_init", + "name": "n_init", + "qname": "sklearn.cluster._spectral.spectral_clustering.n_init", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Number of time the k-means algorithm will be run with different\ncentroid seeds. The final results will be the best output of n_init\nconsecutive runs in terms of inertia. Only used if\n``assign_labels='kmeans'``." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/spectral_clustering/eigen_tol", + "name": "eigen_tol", + "qname": "sklearn.cluster._spectral.spectral_clustering.eigen_tol", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "Stopping criterion for eigendecomposition of the Laplacian matrix\nwhen using arpack eigen_solver." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/spectral_clustering/assign_labels", + "name": "assign_labels", + "qname": "sklearn.cluster._spectral.spectral_clustering.assign_labels", + "default_value": "'kmeans'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'kmeans', 'discretize'}", + "default_value": "'kmeans'", + "description": "The strategy to use to assign labels in the embedding\nspace. There are two ways to assign labels after the Laplacian\nembedding. k-means can be applied and is a popular choice. But it can\nalso be sensitive to initialization. Discretization is another\napproach which is less sensitive to random initialization. See\nthe 'Multiclass spectral clustering' paper referenced below for\nmore details on the discretization approach." + }, + "type": { + "kind": "EnumType", + "values": ["kmeans", "discretize"] + } + }, + { + "id": "scikit-learn/sklearn.cluster._spectral/spectral_clustering/verbose", + "name": "verbose", + "qname": "sklearn.cluster._spectral.spectral_clustering.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Verbosity mode.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Apply clustering to a projection of the normalized Laplacian.\n\nIn practice Spectral Clustering is very useful when the structure of\nthe individual clusters is highly non-convex or more generally when\na measure of the center and spread of the cluster is not a suitable\ndescription of the complete cluster. For instance, when clusters are\nnested circles on the 2D plane.\n\nIf affinity is the adjacency matrix of a graph, this method can be\nused to find normalized graph cuts.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Apply clustering to a projection of the normalized Laplacian.\n\nIn practice Spectral Clustering is very useful when the structure of\nthe individual clusters is highly non-convex or more generally when\na measure of the center and spread of the cluster is not a suitable\ndescription of the complete cluster. For instance, when clusters are\nnested circles on the 2D plane.\n\nIf affinity is the adjacency matrix of a graph, this method can be\nused to find normalized graph cuts.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\naffinity : {array-like, sparse matrix} of shape (n_samples, n_samples)\n The affinity matrix describing the relationship of the samples to\n embed. **Must be symmetric**.\n\n Possible examples:\n - adjacency matrix of a graph,\n - heat kernel of the pairwise distance matrix of the samples,\n - symmetric k-nearest neighbours connectivity matrix of the samples.\n\nn_clusters : int, default=None\n Number of clusters to extract.\n\nn_components : int, default=n_clusters\n Number of eigenvectors to use for the spectral embedding\n\neigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}\n The eigenvalue decomposition strategy to use. AMG requires pyamg\n to be installed. It can be faster on very large, sparse problems,\n but may also lead to instabilities. If None, then ``'arpack'`` is\n used.\n\nrandom_state : int, RandomState instance, default=None\n A pseudo random number generator used for the initialization of the\n lobpcg eigenvectors decomposition when eigen_solver == 'amg' and by\n the K-Means initialization. Use an int to make the randomness\n deterministic.\n See :term:`Glossary `.\n\nn_init : int, default=10\n Number of time the k-means algorithm will be run with different\n centroid seeds. The final results will be the best output of n_init\n consecutive runs in terms of inertia. Only used if\n ``assign_labels='kmeans'``.\n\neigen_tol : float, default=0.0\n Stopping criterion for eigendecomposition of the Laplacian matrix\n when using arpack eigen_solver.\n\nassign_labels : {'kmeans', 'discretize'}, default='kmeans'\n The strategy to use to assign labels in the embedding\n space. There are two ways to assign labels after the Laplacian\n embedding. k-means can be applied and is a popular choice. But it can\n also be sensitive to initialization. Discretization is another\n approach which is less sensitive to random initialization. See\n the 'Multiclass spectral clustering' paper referenced below for\n more details on the discretization approach.\n\nverbose : bool, default=False\n Verbosity mode.\n\n .. versionadded:: 0.24\n\nReturns\n-------\nlabels : array of integers, shape: n_samples\n The labels of the clusters.\n\nReferences\n----------\n\n- Normalized cuts and image segmentation, 2000\n Jianbo Shi, Jitendra Malik\n http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.2324\n\n- A Tutorial on Spectral Clustering, 2007\n Ulrike von Luxburg\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323\n\n- Multiclass spectral clustering, 2003\n Stella X. Yu, Jianbo Shi\n https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf\n\nNotes\n-----\nThe graph should contain only one connect component, elsewhere\nthe results make little sense.\n\nThis algorithm solves the normalized cut for k=2: it is a\nnormalized spectral clustering.", + "code": "@_deprecate_positional_args\ndef spectral_clustering(affinity, *, n_clusters=8, n_components=None,\n eigen_solver=None, random_state=None, n_init=10,\n eigen_tol=0.0, assign_labels='kmeans',\n verbose=False):\n \"\"\"Apply clustering to a projection of the normalized Laplacian.\n\n In practice Spectral Clustering is very useful when the structure of\n the individual clusters is highly non-convex or more generally when\n a measure of the center and spread of the cluster is not a suitable\n description of the complete cluster. For instance, when clusters are\n nested circles on the 2D plane.\n\n If affinity is the adjacency matrix of a graph, this method can be\n used to find normalized graph cuts.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n affinity : {array-like, sparse matrix} of shape (n_samples, n_samples)\n The affinity matrix describing the relationship of the samples to\n embed. **Must be symmetric**.\n\n Possible examples:\n - adjacency matrix of a graph,\n - heat kernel of the pairwise distance matrix of the samples,\n - symmetric k-nearest neighbours connectivity matrix of the samples.\n\n n_clusters : int, default=None\n Number of clusters to extract.\n\n n_components : int, default=n_clusters\n Number of eigenvectors to use for the spectral embedding\n\n eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}\n The eigenvalue decomposition strategy to use. AMG requires pyamg\n to be installed. It can be faster on very large, sparse problems,\n but may also lead to instabilities. If None, then ``'arpack'`` is\n used.\n\n random_state : int, RandomState instance, default=None\n A pseudo random number generator used for the initialization of the\n lobpcg eigenvectors decomposition when eigen_solver == 'amg' and by\n the K-Means initialization. Use an int to make the randomness\n deterministic.\n See :term:`Glossary `.\n\n n_init : int, default=10\n Number of time the k-means algorithm will be run with different\n centroid seeds. The final results will be the best output of n_init\n consecutive runs in terms of inertia. Only used if\n ``assign_labels='kmeans'``.\n\n eigen_tol : float, default=0.0\n Stopping criterion for eigendecomposition of the Laplacian matrix\n when using arpack eigen_solver.\n\n assign_labels : {'kmeans', 'discretize'}, default='kmeans'\n The strategy to use to assign labels in the embedding\n space. There are two ways to assign labels after the Laplacian\n embedding. k-means can be applied and is a popular choice. But it can\n also be sensitive to initialization. Discretization is another\n approach which is less sensitive to random initialization. See\n the 'Multiclass spectral clustering' paper referenced below for\n more details on the discretization approach.\n\n verbose : bool, default=False\n Verbosity mode.\n\n .. versionadded:: 0.24\n\n Returns\n -------\n labels : array of integers, shape: n_samples\n The labels of the clusters.\n\n References\n ----------\n\n - Normalized cuts and image segmentation, 2000\n Jianbo Shi, Jitendra Malik\n http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.2324\n\n - A Tutorial on Spectral Clustering, 2007\n Ulrike von Luxburg\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323\n\n - Multiclass spectral clustering, 2003\n Stella X. Yu, Jianbo Shi\n https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf\n\n Notes\n -----\n The graph should contain only one connect component, elsewhere\n the results make little sense.\n\n This algorithm solves the normalized cut for k=2: it is a\n normalized spectral clustering.\n \"\"\"\n if assign_labels not in ('kmeans', 'discretize'):\n raise ValueError(\"The 'assign_labels' parameter should be \"\n \"'kmeans' or 'discretize', but '%s' was given\"\n % assign_labels)\n\n random_state = check_random_state(random_state)\n n_components = n_clusters if n_components is None else n_components\n\n # The first eigenvector is constant only for fully connected graphs\n # and should be kept for spectral clustering (drop_first = False)\n # See spectral_embedding documentation.\n maps = spectral_embedding(affinity, n_components=n_components,\n eigen_solver=eigen_solver,\n random_state=random_state,\n eigen_tol=eigen_tol, drop_first=False)\n if verbose:\n print(f'Computing label assignment using {assign_labels}')\n\n if assign_labels == 'kmeans':\n _, labels, _ = k_means(maps, n_clusters, random_state=random_state,\n n_init=n_init, verbose=verbose)\n else:\n labels = discretize(maps, random_state=random_state)\n\n return labels" + }, + { + "id": "scikit-learn/sklearn.cluster.setup/configuration", + "name": "configuration", + "qname": "sklearn.cluster.setup.configuration", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cluster.setup/configuration/parent_package", + "name": "parent_package", + "qname": "sklearn.cluster.setup.configuration.parent_package", + "default_value": "''", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cluster.setup/configuration/top_path", + "name": "top_path", + "qname": "sklearn.cluster.setup.configuration.top_path", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def configuration(parent_package='', top_path=None):\n from numpy.distutils.misc_util import Configuration\n\n libraries = []\n if os.name == 'posix':\n libraries.append('m')\n\n config = Configuration('cluster', parent_package, top_path)\n\n config.add_extension('_dbscan_inner',\n sources=['_dbscan_inner.pyx'],\n include_dirs=[numpy.get_include()],\n language=\"c++\")\n\n config.add_extension('_hierarchical_fast',\n sources=['_hierarchical_fast.pyx'],\n language=\"c++\",\n include_dirs=[numpy.get_include()],\n libraries=libraries)\n\n config.add_extension('_k_means_fast',\n sources=['_k_means_fast.pyx'],\n include_dirs=[numpy.get_include()],\n libraries=libraries)\n\n config.add_extension('_k_means_lloyd',\n sources=['_k_means_lloyd.pyx'],\n include_dirs=[numpy.get_include()],\n libraries=libraries)\n\n config.add_extension('_k_means_elkan',\n sources=['_k_means_elkan.pyx'],\n include_dirs=[numpy.get_include()],\n libraries=libraries)\n\n config.add_subpackage('tests')\n\n return config" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/__init__", + "name": "__init__", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/__init__/self", + "name": "self", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/__init__/transformers", + "name": "transformers", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.__init__.transformers", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list of tuples", + "default_value": "", + "description": "List of (name, transformer, columns) tuples specifying the\ntransformer objects to be applied to subsets of the data.\n\nname : str\n Like in Pipeline and FeatureUnion, this allows the transformer and\n its parameters to be set using ``set_params`` and searched in grid\n search.\ntransformer : {'drop', 'passthrough'} or estimator\n Estimator must support :term:`fit` and :term:`transform`.\n Special-cased strings 'drop' and 'passthrough' are accepted as\n well, to indicate to drop the columns or to pass them through\n untransformed, respectively.\ncolumns : str, array-like of str, int, array-like of int, array-like of bool, slice or callable\n Indexes the data on its second axis. Integers are interpreted as\n positional columns, while strings can reference DataFrame columns\n by name. A scalar string or int should be used where\n ``transformer`` expects X to be a 1d array-like (vector),\n otherwise a 2d array will be passed to the transformer.\n A callable is passed the input data `X` and can return any of the\n above. To select multiple columns by name or dtype, you can use\n :obj:`make_column_selector`." + }, + "type": { + "kind": "NamedType", + "name": "list of tuples" + } + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/__init__/remainder", + "name": "remainder", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.__init__.remainder", + "default_value": "'drop'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'drop', 'passthrough'} or estimator", + "default_value": "'drop'", + "description": "By default, only the specified columns in `transformers` are\ntransformed and combined in the output, and the non-specified\ncolumns are dropped. (default of ``'drop'``).\nBy specifying ``remainder='passthrough'``, all remaining columns that\nwere not specified in `transformers` will be automatically passed\nthrough. This subset of columns is concatenated with the output of\nthe transformers.\nBy setting ``remainder`` to be an estimator, the remaining\nnon-specified columns will use the ``remainder`` estimator. The\nestimator must support :term:`fit` and :term:`transform`.\nNote that using this feature requires that the DataFrame columns\ninput at :term:`fit` and :term:`transform` have identical order." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["passthrough", "drop"] + }, + { + "kind": "NamedType", + "name": "estimator" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/__init__/sparse_threshold", + "name": "sparse_threshold", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.__init__.sparse_threshold", + "default_value": "0.3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.3", + "description": "If the output of the different transformers contains sparse matrices,\nthese will be stacked as a sparse matrix if the overall density is\nlower than this value. Use ``sparse_threshold=0`` to always return\ndense. When the transformed output consists of all dense data, the\nstacked result will be dense, and this keyword will be ignored." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of jobs to run in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/__init__/transformer_weights", + "name": "transformer_weights", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.__init__.transformer_weights", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Multiplicative weights for features per transformer. The output of the\ntransformer is multiplied by these weights. Keys are transformer names,\nvalues the weights." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/__init__/verbose", + "name": "verbose", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the time elapsed while fitting each transformer will be\nprinted as it is completed." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Applies transformers to columns of an array or pandas DataFrame.\n\nThis estimator allows different columns or column subsets of the input\nto be transformed separately and the features generated by each transformer\nwill be concatenated to form a single feature space.\nThis is useful for heterogeneous or columnar data, to combine several\nfeature extraction mechanisms or transformations into a single transformer.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self,\n transformers, *,\n remainder='drop',\n sparse_threshold=0.3,\n n_jobs=None,\n transformer_weights=None,\n verbose=False):\n self.transformers = transformers\n self.remainder = remainder\n self.sparse_threshold = sparse_threshold\n self.n_jobs = n_jobs\n self.transformer_weights = transformer_weights\n self.verbose = verbose" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_fit_transform", + "name": "_fit_transform", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_fit_transform/self", + "name": "self", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_fit_transform/X", + "name": "X", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_fit_transform/y", + "name": "y", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._fit_transform.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_fit_transform/func", + "name": "func", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._fit_transform.func", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_fit_transform/fitted", + "name": "fitted", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._fit_transform.fitted", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Private function to fit and/or transform on demand.\n\nReturn value (transformers and/or transformed X data) depends\non the passed function.\n``fitted=True`` ensures the fitted transformers are used.", + "docstring": "Private function to fit and/or transform on demand.\n\nReturn value (transformers and/or transformed X data) depends\non the passed function.\n``fitted=True`` ensures the fitted transformers are used.", + "code": " def _fit_transform(self, X, y, func, fitted=False):\n \"\"\"\n Private function to fit and/or transform on demand.\n\n Return value (transformers and/or transformed X data) depends\n on the passed function.\n ``fitted=True`` ensures the fitted transformers are used.\n \"\"\"\n transformers = list(\n self._iter(fitted=fitted, replace_strings=True))\n try:\n return Parallel(n_jobs=self.n_jobs)(\n delayed(func)(\n transformer=clone(trans) if not fitted else trans,\n X=_safe_indexing(X, column, axis=1),\n y=y,\n weight=weight,\n message_clsname='ColumnTransformer',\n message=self._log_message(name, idx, len(transformers)))\n for idx, (name, trans, column, weight) in enumerate(\n self._iter(fitted=fitted, replace_strings=True), 1))\n except ValueError as e:\n if \"Expected 2D array, got 1D array instead\" in str(e):\n raise ValueError(_ERR_MSG_1DCOLUMN) from e\n else:\n raise" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_hstack", + "name": "_hstack", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._hstack", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_hstack/self", + "name": "self", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._hstack.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_hstack/Xs", + "name": "Xs", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._hstack.Xs", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list of {array-like, sparse matrix, dataframe}", + "default_value": "", + "description": "" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "list of" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Stacks Xs horizontally.\n\nThis allows subclasses to control the stacking behavior, while reusing\neverything else from ColumnTransformer.", + "docstring": "Stacks Xs horizontally.\n\nThis allows subclasses to control the stacking behavior, while reusing\neverything else from ColumnTransformer.\n\nParameters\n----------\nXs : list of {array-like, sparse matrix, dataframe}", + "code": " def _hstack(self, Xs):\n \"\"\"Stacks Xs horizontally.\n\n This allows subclasses to control the stacking behavior, while reusing\n everything else from ColumnTransformer.\n\n Parameters\n ----------\n Xs : list of {array-like, sparse matrix, dataframe}\n \"\"\"\n if self.sparse_output_:\n try:\n # since all columns should be numeric before stacking them\n # in a sparse matrix, `check_array` is used for the\n # dtype conversion if necessary.\n converted_Xs = [check_array(X,\n accept_sparse=True,\n force_all_finite=False)\n for X in Xs]\n except ValueError as e:\n raise ValueError(\n \"For a sparse output, all columns should \"\n \"be a numeric or convertible to a numeric.\"\n ) from e\n\n return sparse.hstack(converted_Xs).tocsr()\n else:\n Xs = [f.toarray() if sparse.issparse(f) else f for f in Xs]\n return np.hstack(Xs)" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_iter", + "name": "_iter", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._iter", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_iter/self", + "name": "self", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._iter.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_iter/fitted", + "name": "fitted", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._iter.fitted", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_iter/replace_strings", + "name": "replace_strings", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._iter.replace_strings", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate (name, trans, column, weight) tuples.\n\nIf fitted=True, use the fitted transformers, else use the\nuser specified transformers updated with converted column names\nand potentially appended with transformer for remainder.", + "docstring": "Generate (name, trans, column, weight) tuples.\n\nIf fitted=True, use the fitted transformers, else use the\nuser specified transformers updated with converted column names\nand potentially appended with transformer for remainder.", + "code": " def _iter(self, fitted=False, replace_strings=False):\n \"\"\"\n Generate (name, trans, column, weight) tuples.\n\n If fitted=True, use the fitted transformers, else use the\n user specified transformers updated with converted column names\n and potentially appended with transformer for remainder.\n\n \"\"\"\n if fitted:\n transformers = self.transformers_\n else:\n # interleave the validated column specifiers\n transformers = [\n (name, trans, column) for (name, trans, _), column\n in zip(self.transformers, self._columns)\n ]\n # add transformer tuple for remainder\n if self._remainder[2] is not None:\n transformers = chain(transformers, [self._remainder])\n get_weight = (self.transformer_weights or {}).get\n\n for name, trans, column in transformers:\n if replace_strings:\n # replace 'passthrough' with identity transformer and\n # skip in case of 'drop'\n if trans == 'passthrough':\n trans = FunctionTransformer(\n accept_sparse=True, check_inverse=False\n )\n elif trans == 'drop':\n continue\n elif _is_empty_column_selection(column):\n continue\n\n yield (name, trans, column, get_weight(name))" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_log_message", + "name": "_log_message", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._log_message", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_log_message/self", + "name": "self", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._log_message.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_log_message/name", + "name": "name", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._log_message.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_log_message/idx", + "name": "idx", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._log_message.idx", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_log_message/total", + "name": "total", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._log_message.total", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _log_message(self, name, idx, total):\n if not self.verbose:\n return None\n return '(%d of %d) Processing %s' % (idx, total, name)" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_sk_visual_block_", + "name": "_sk_visual_block_", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._sk_visual_block_", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_sk_visual_block_/self", + "name": "self", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._sk_visual_block_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _sk_visual_block_(self):\n if isinstance(self.remainder, str) and self.remainder == 'drop':\n transformers = self.transformers\n elif hasattr(self, \"_remainder\"):\n remainder_columns = self._remainder[2]\n if hasattr(self, '_df_columns'):\n remainder_columns = (\n self._df_columns[remainder_columns].tolist()\n )\n transformers = chain(self.transformers,\n [('remainder', self.remainder,\n remainder_columns)])\n else:\n transformers = chain(self.transformers,\n [('remainder', self.remainder, '')])\n\n names, transformers, name_details = zip(*transformers)\n return _VisualBlock('parallel', transformers,\n names=names, name_details=name_details)" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_transformers@getter", + "name": "_transformers", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._transformers", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_transformers/self", + "name": "self", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._transformers.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Internal list of transformer only containing the name and\ntransformers, dropping the columns. This is for the implementation\nof get_params via BaseComposition._get_params which expects lists\nof tuples of len 2.", + "docstring": "Internal list of transformer only containing the name and\ntransformers, dropping the columns. This is for the implementation\nof get_params via BaseComposition._get_params which expects lists\nof tuples of len 2.", + "code": " @property\n def _transformers(self):\n \"\"\"\n Internal list of transformer only containing the name and\n transformers, dropping the columns. This is for the implementation\n of get_params via BaseComposition._get_params which expects lists\n of tuples of len 2.\n \"\"\"\n return [(name, trans) for name, trans, _ in self.transformers]" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_transformers@setter", + "name": "_transformers", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._transformers", + "decorators": ["_transformers.setter"], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_transformers/self", + "name": "self", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._transformers.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_transformers/value", + "name": "value", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._transformers.value", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @_transformers.setter\n def _transformers(self, value):\n self.transformers = [\n (name, trans, col) for ((name, trans), (_, _, col))\n in zip(value, self.transformers)]" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_update_fitted_transformers", + "name": "_update_fitted_transformers", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._update_fitted_transformers", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_update_fitted_transformers/self", + "name": "self", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._update_fitted_transformers.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_update_fitted_transformers/transformers", + "name": "transformers", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._update_fitted_transformers.transformers", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _update_fitted_transformers(self, transformers):\n # transformers are fitted; excludes 'drop' cases\n fitted_transformers = iter(transformers)\n transformers_ = []\n\n for name, old, column, _ in self._iter():\n if old == 'drop':\n trans = 'drop'\n elif old == 'passthrough':\n # FunctionTransformer is present in list of transformers,\n # so get next transformer, but save original string\n next(fitted_transformers)\n trans = 'passthrough'\n elif _is_empty_column_selection(column):\n trans = old\n else:\n trans = next(fitted_transformers)\n transformers_.append((name, trans, column))\n\n # sanity check that transformers is exhausted\n assert not list(fitted_transformers)\n self.transformers_ = transformers_" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_validate_column_callables", + "name": "_validate_column_callables", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._validate_column_callables", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_validate_column_callables/self", + "name": "self", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._validate_column_callables.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_validate_column_callables/X", + "name": "X", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._validate_column_callables.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Converts callable column specifications.", + "docstring": "Converts callable column specifications.", + "code": " def _validate_column_callables(self, X):\n \"\"\"\n Converts callable column specifications.\n \"\"\"\n columns = []\n for _, _, column in self.transformers:\n if callable(column):\n column = column(X)\n columns.append(column)\n self._columns = columns" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_validate_output", + "name": "_validate_output", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._validate_output", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_validate_output/self", + "name": "self", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._validate_output.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_validate_output/result", + "name": "result", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._validate_output.result", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Ensure that the output of each transformer is 2D. Otherwise\nhstack can raise an error or produce incorrect results.", + "docstring": "Ensure that the output of each transformer is 2D. Otherwise\nhstack can raise an error or produce incorrect results.", + "code": " def _validate_output(self, result):\n \"\"\"\n Ensure that the output of each transformer is 2D. Otherwise\n hstack can raise an error or produce incorrect results.\n \"\"\"\n names = [name for name, _, _, _ in self._iter(fitted=True,\n replace_strings=True)]\n for Xs, name in zip(result, names):\n if not getattr(Xs, 'ndim', 0) == 2:\n raise ValueError(\n \"The output of the '{0}' transformer should be 2D (scipy \"\n \"matrix, array, or pandas DataFrame).\".format(name))" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_validate_remainder", + "name": "_validate_remainder", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._validate_remainder", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_validate_remainder/self", + "name": "self", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._validate_remainder.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_validate_remainder/X", + "name": "X", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._validate_remainder.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Validates ``remainder`` and defines ``_remainder`` targeting\nthe remaining columns.", + "docstring": "Validates ``remainder`` and defines ``_remainder`` targeting\nthe remaining columns.", + "code": " def _validate_remainder(self, X):\n \"\"\"\n Validates ``remainder`` and defines ``_remainder`` targeting\n the remaining columns.\n \"\"\"\n is_transformer = ((hasattr(self.remainder, \"fit\")\n or hasattr(self.remainder, \"fit_transform\"))\n and hasattr(self.remainder, \"transform\"))\n if (self.remainder not in ('drop', 'passthrough')\n and not is_transformer):\n raise ValueError(\n \"The remainder keyword needs to be one of 'drop', \"\n \"'passthrough', or estimator. '%s' was passed instead\" %\n self.remainder)\n\n # Make it possible to check for reordered named columns on transform\n self._has_str_cols = any(_determine_key_type(cols) == 'str'\n for cols in self._columns)\n if hasattr(X, 'columns'):\n self._df_columns = X.columns\n\n self._n_features = X.shape[1]\n cols = []\n for columns in self._columns:\n cols.extend(_get_column_indices(X, columns))\n\n remaining_idx = sorted(set(range(self._n_features)) - set(cols))\n self._remainder = ('remainder', self.remainder, remaining_idx or None)" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_validate_transformers", + "name": "_validate_transformers", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._validate_transformers", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/_validate_transformers/self", + "name": "self", + "qname": "sklearn.compose._column_transformer.ColumnTransformer._validate_transformers.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _validate_transformers(self):\n if not self.transformers:\n return\n\n names, transformers, _ = zip(*self.transformers)\n\n # validate names\n self._validate_names(names)\n\n # validate estimators\n for t in transformers:\n if t in ('drop', 'passthrough'):\n continue\n if (not (hasattr(t, \"fit\") or hasattr(t, \"fit_transform\")) or not\n hasattr(t, \"transform\")):\n raise TypeError(\"All estimators should implement fit and \"\n \"transform, or can be 'drop' or 'passthrough' \"\n \"specifiers. '%s' (type %s) doesn't.\" %\n (t, type(t)))" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/fit", + "name": "fit", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/fit/self", + "name": "self", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/fit/X", + "name": "X", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, dataframe} of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data, of which specified subsets are used to fit the\ntransformers." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/fit/y", + "name": "y", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,...)", + "default_value": "None", + "description": "Targets for supervised learning." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,...)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit all transformers using X.", + "docstring": "Fit all transformers using X.\n\nParameters\n----------\nX : {array-like, dataframe} of shape (n_samples, n_features)\n Input data, of which specified subsets are used to fit the\n transformers.\n\ny : array-like of shape (n_samples,...), default=None\n Targets for supervised learning.\n\nReturns\n-------\nself : ColumnTransformer\n This estimator", + "code": " def fit(self, X, y=None):\n \"\"\"Fit all transformers using X.\n\n Parameters\n ----------\n X : {array-like, dataframe} of shape (n_samples, n_features)\n Input data, of which specified subsets are used to fit the\n transformers.\n\n y : array-like of shape (n_samples,...), default=None\n Targets for supervised learning.\n\n Returns\n -------\n self : ColumnTransformer\n This estimator\n\n \"\"\"\n # we use fit_transform to make sure to set sparse_output_ (for which we\n # need the transformed data) to have consistent output type in predict\n self.fit_transform(X, y=y)\n return self" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/fit_transform", + "name": "fit_transform", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/fit_transform/self", + "name": "self", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/fit_transform/X", + "name": "X", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, dataframe} of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data, of which specified subsets are used to fit the\ntransformers." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/fit_transform/y", + "name": "y", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Targets for supervised learning." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit all transformers, transform the data and concatenate results.", + "docstring": "Fit all transformers, transform the data and concatenate results.\n\nParameters\n----------\nX : {array-like, dataframe} of shape (n_samples, n_features)\n Input data, of which specified subsets are used to fit the\n transformers.\n\ny : array-like of shape (n_samples,), default=None\n Targets for supervised learning.\n\nReturns\n-------\nX_t : {array-like, sparse matrix} of shape (n_samples, sum_n_components)\n hstack of results of transformers. sum_n_components is the\n sum of n_components (output dimension) over transformers. If\n any result is a sparse matrix, everything will be converted to\n sparse matrices.", + "code": " def fit_transform(self, X, y=None):\n \"\"\"Fit all transformers, transform the data and concatenate results.\n\n Parameters\n ----------\n X : {array-like, dataframe} of shape (n_samples, n_features)\n Input data, of which specified subsets are used to fit the\n transformers.\n\n y : array-like of shape (n_samples,), default=None\n Targets for supervised learning.\n\n Returns\n -------\n X_t : {array-like, sparse matrix} of \\\n shape (n_samples, sum_n_components)\n hstack of results of transformers. sum_n_components is the\n sum of n_components (output dimension) over transformers. If\n any result is a sparse matrix, everything will be converted to\n sparse matrices.\n\n \"\"\"\n # TODO: this should be `feature_names_in_` when we start having it\n if hasattr(X, \"columns\"):\n self._feature_names_in = np.asarray(X.columns)\n else:\n self._feature_names_in = None\n X = _check_X(X)\n # set n_features_in_ attribute\n self._check_n_features(X, reset=True)\n self._validate_transformers()\n self._validate_column_callables(X)\n self._validate_remainder(X)\n\n result = self._fit_transform(X, y, _fit_transform_one)\n\n if not result:\n self._update_fitted_transformers([])\n # All transformers are None\n return np.zeros((X.shape[0], 0))\n\n Xs, transformers = zip(*result)\n\n # determine if concatenated output will be sparse or not\n if any(sparse.issparse(X) for X in Xs):\n nnz = sum(X.nnz if sparse.issparse(X) else X.size for X in Xs)\n total = sum(X.shape[0] * X.shape[1] if sparse.issparse(X)\n else X.size for X in Xs)\n density = nnz / total\n self.sparse_output_ = density < self.sparse_threshold\n else:\n self.sparse_output_ = False\n\n self._update_fitted_transformers(transformers)\n self._validate_output(Xs)\n\n return self._hstack(list(Xs))" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/get_feature_names", + "name": "get_feature_names", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.get_feature_names", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/get_feature_names/self", + "name": "self", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.get_feature_names.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Get feature names from all transformers.", + "docstring": "Get feature names from all transformers.\n\nReturns\n-------\nfeature_names : list of strings\n Names of the features produced by transform.", + "code": " def get_feature_names(self):\n \"\"\"Get feature names from all transformers.\n\n Returns\n -------\n feature_names : list of strings\n Names of the features produced by transform.\n \"\"\"\n check_is_fitted(self)\n feature_names = []\n for name, trans, column, _ in self._iter(fitted=True):\n if trans == 'drop' or _is_empty_column_selection(column):\n continue\n if trans == 'passthrough':\n if hasattr(self, '_df_columns'):\n if ((not isinstance(column, slice))\n and all(isinstance(col, str) for col in column)):\n feature_names.extend(column)\n else:\n feature_names.extend(self._df_columns[column])\n else:\n indices = np.arange(self._n_features)\n feature_names.extend(['x%d' % i for i in indices[column]])\n continue\n if not hasattr(trans, 'get_feature_names'):\n raise AttributeError(\"Transformer %s (type %s) does not \"\n \"provide get_feature_names.\"\n % (str(name), type(trans).__name__))\n feature_names.extend([name + \"__\" + f for f in\n trans.get_feature_names()])\n return feature_names" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/get_params", + "name": "get_params", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.get_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/get_params/self", + "name": "self", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.get_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/get_params/deep", + "name": "deep", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.get_params.deep", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, will return the parameters for this estimator and\ncontained subobjects that are estimators." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Get parameters for this estimator.\n\nReturns the parameters given in the constructor as well as the\nestimators contained within the `transformers` of the\n`ColumnTransformer`.", + "docstring": "Get parameters for this estimator.\n\nReturns the parameters given in the constructor as well as the\nestimators contained within the `transformers` of the\n`ColumnTransformer`.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : dict\n Parameter names mapped to their values.", + "code": " def get_params(self, deep=True):\n \"\"\"Get parameters for this estimator.\n\n Returns the parameters given in the constructor as well as the\n estimators contained within the `transformers` of the\n `ColumnTransformer`.\n\n Parameters\n ----------\n deep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\n Returns\n -------\n params : dict\n Parameter names mapped to their values.\n \"\"\"\n return self._get_params('_transformers', deep=deep)" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/named_transformers_@getter", + "name": "named_transformers_", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.named_transformers_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/named_transformers_/self", + "name": "self", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.named_transformers_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Access the fitted transformer by name.\n\nRead-only attribute to access any transformer by given name.\nKeys are transformer names and values are the fitted transformer\nobjects.", + "docstring": "Access the fitted transformer by name.\n\nRead-only attribute to access any transformer by given name.\nKeys are transformer names and values are the fitted transformer\nobjects.", + "code": " @property\n def named_transformers_(self):\n \"\"\"Access the fitted transformer by name.\n\n Read-only attribute to access any transformer by given name.\n Keys are transformer names and values are the fitted transformer\n objects.\n\n \"\"\"\n # Use Bunch object to improve autocomplete\n return Bunch(**{name: trans for name, trans, _\n in self.transformers_})" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/set_params", + "name": "set_params", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.set_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/set_params/self", + "name": "self", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.set_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/set_params/kwargs", + "name": "kwargs", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.set_params.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Set the parameters of this estimator.\n\nValid parameter keys can be listed with ``get_params()``. Note that you\ncan directly set the parameters of the estimators contained in\n`transformers` of `ColumnTransformer`.", + "docstring": "Set the parameters of this estimator.\n\nValid parameter keys can be listed with ``get_params()``. Note that you\ncan directly set the parameters of the estimators contained in\n`transformers` of `ColumnTransformer`.\n\nReturns\n-------\nself", + "code": " def set_params(self, **kwargs):\n \"\"\"Set the parameters of this estimator.\n\n Valid parameter keys can be listed with ``get_params()``. Note that you\n can directly set the parameters of the estimators contained in\n `transformers` of `ColumnTransformer`.\n\n Returns\n -------\n self\n \"\"\"\n self._set_params('_transformers', **kwargs)\n return self" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/transform", + "name": "transform", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/transform/self", + "name": "self", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/ColumnTransformer/transform/X", + "name": "X", + "qname": "sklearn.compose._column_transformer.ColumnTransformer.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, dataframe} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to be transformed by subset." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform X separately by each transformer, concatenate results.", + "docstring": "Transform X separately by each transformer, concatenate results.\n\nParameters\n----------\nX : {array-like, dataframe} of shape (n_samples, n_features)\n The data to be transformed by subset.\n\nReturns\n-------\nX_t : {array-like, sparse matrix} of shape (n_samples, sum_n_components)\n hstack of results of transformers. sum_n_components is the\n sum of n_components (output dimension) over transformers. If\n any result is a sparse matrix, everything will be converted to\n sparse matrices.", + "code": " def transform(self, X):\n \"\"\"Transform X separately by each transformer, concatenate results.\n\n Parameters\n ----------\n X : {array-like, dataframe} of shape (n_samples, n_features)\n The data to be transformed by subset.\n\n Returns\n -------\n X_t : {array-like, sparse matrix} of \\\n shape (n_samples, sum_n_components)\n hstack of results of transformers. sum_n_components is the\n sum of n_components (output dimension) over transformers. If\n any result is a sparse matrix, everything will be converted to\n sparse matrices.\n\n \"\"\"\n check_is_fitted(self)\n X = _check_X(X)\n if hasattr(X, \"columns\"):\n X_feature_names = np.asarray(X.columns)\n else:\n X_feature_names = None\n\n self._check_n_features(X, reset=False)\n if (self._feature_names_in is not None and\n X_feature_names is not None and\n np.any(self._feature_names_in != X_feature_names)):\n raise RuntimeError(\n \"Given feature/column names do not match the ones for the \"\n \"data given during fit.\"\n )\n Xs = self._fit_transform(X, None, _transform_one, fitted=True)\n self._validate_output(Xs)\n\n if not Xs:\n # All transformers are None\n return np.zeros((X.shape[0], 0))\n\n return self._hstack(list(Xs))" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/_check_X", + "name": "_check_X", + "qname": "sklearn.compose._column_transformer._check_X", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/_check_X/X", + "name": "X", + "qname": "sklearn.compose._column_transformer._check_X.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Use check_array only on lists and other non-array-likes / sparse", + "docstring": "Use check_array only on lists and other non-array-likes / sparse", + "code": "def _check_X(X):\n \"\"\"Use check_array only on lists and other non-array-likes / sparse\"\"\"\n if hasattr(X, '__array__') or sparse.issparse(X):\n return X\n return check_array(X, force_all_finite='allow-nan', dtype=object)" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/_get_transformer_list", + "name": "_get_transformer_list", + "qname": "sklearn.compose._column_transformer._get_transformer_list", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/_get_transformer_list/estimators", + "name": "estimators", + "qname": "sklearn.compose._column_transformer._get_transformer_list.estimators", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Construct (name, trans, column) tuples from list", + "docstring": "Construct (name, trans, column) tuples from list", + "code": "def _get_transformer_list(estimators):\n \"\"\"\n Construct (name, trans, column) tuples from list\n\n \"\"\"\n transformers, columns = zip(*estimators)\n names, _ = zip(*_name_estimators(transformers))\n\n transformer_list = list(zip(names, transformers, columns))\n return transformer_list" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/_is_empty_column_selection", + "name": "_is_empty_column_selection", + "qname": "sklearn.compose._column_transformer._is_empty_column_selection", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/_is_empty_column_selection/column", + "name": "column", + "qname": "sklearn.compose._column_transformer._is_empty_column_selection.column", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return True if the column selection is empty (empty list or all-False\nboolean array).", + "docstring": "Return True if the column selection is empty (empty list or all-False\nboolean array).", + "code": "def _is_empty_column_selection(column):\n \"\"\"\n Return True if the column selection is empty (empty list or all-False\n boolean array).\n\n \"\"\"\n if hasattr(column, 'dtype') and np.issubdtype(column.dtype, np.bool_):\n return not column.any()\n elif hasattr(column, '__len__'):\n return (len(column) == 0 or\n all(isinstance(col, bool) for col in column)\n and not any(column))\n else:\n return False" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/make_column_selector/__call__", + "name": "__call__", + "qname": "sklearn.compose._column_transformer.make_column_selector.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/make_column_selector/__call__/self", + "name": "self", + "qname": "sklearn.compose._column_transformer.make_column_selector.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/make_column_selector/__call__/df", + "name": "df", + "qname": "sklearn.compose._column_transformer.make_column_selector.__call__.df", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dataframe of shape (n_features, n_samples)", + "default_value": "", + "description": "DataFrame to select columns from." + }, + "type": { + "kind": "NamedType", + "name": "dataframe of shape (n_features, n_samples)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Callable for column selection to be used by a\n:class:`ColumnTransformer`.", + "docstring": "Callable for column selection to be used by a\n:class:`ColumnTransformer`.\n\nParameters\n----------\ndf : dataframe of shape (n_features, n_samples)\n DataFrame to select columns from.", + "code": " def __call__(self, df):\n \"\"\"Callable for column selection to be used by a\n :class:`ColumnTransformer`.\n\n Parameters\n ----------\n df : dataframe of shape (n_features, n_samples)\n DataFrame to select columns from.\n \"\"\"\n if not hasattr(df, 'iloc'):\n raise ValueError(\"make_column_selector can only be applied to \"\n \"pandas dataframes\")\n df_row = df.iloc[:1]\n if self.dtype_include is not None or self.dtype_exclude is not None:\n df_row = df_row.select_dtypes(include=self.dtype_include,\n exclude=self.dtype_exclude)\n cols = df_row.columns\n if self.pattern is not None:\n cols = cols[cols.str.contains(self.pattern, regex=True)]\n return cols.tolist()" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/make_column_selector/__init__", + "name": "__init__", + "qname": "sklearn.compose._column_transformer.make_column_selector.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/make_column_selector/__init__/self", + "name": "self", + "qname": "sklearn.compose._column_transformer.make_column_selector.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/make_column_selector/__init__/pattern", + "name": "pattern", + "qname": "sklearn.compose._column_transformer.make_column_selector.__init__.pattern", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Name of columns containing this regex pattern will be included. If\nNone, column selection will not be selected based on pattern." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/make_column_selector/__init__/dtype_include", + "name": "dtype_include", + "qname": "sklearn.compose._column_transformer.make_column_selector.__init__.dtype_include", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "column dtype or list of column dtypes", + "default_value": "None", + "description": "A selection of dtypes to include. For more details, see\n:meth:`pandas.DataFrame.select_dtypes`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "column dtype" + }, + { + "kind": "NamedType", + "name": "list of column dtypes" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/make_column_selector/__init__/dtype_exclude", + "name": "dtype_exclude", + "qname": "sklearn.compose._column_transformer.make_column_selector.__init__.dtype_exclude", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "column dtype or list of column dtypes", + "default_value": "None", + "description": "A selection of dtypes to exclude. For more details, see\n:meth:`pandas.DataFrame.select_dtypes`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "column dtype" + }, + { + "kind": "NamedType", + "name": "list of column dtypes" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Create a callable to select columns to be used with\n:class:`ColumnTransformer`.\n\n:func:`make_column_selector` can select columns based on datatype or the\ncolumns name with a regex. When using multiple selection criteria, **all**\ncriteria must match for a column to be selected.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, pattern=None, *, dtype_include=None,\n dtype_exclude=None):\n self.pattern = pattern\n self.dtype_include = dtype_include\n self.dtype_exclude = dtype_exclude" + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/make_column_transformer", + "name": "make_column_transformer", + "qname": "sklearn.compose._column_transformer.make_column_transformer", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._column_transformer/make_column_transformer/transformers", + "name": "transformers", + "qname": "sklearn.compose._column_transformer.make_column_transformer.transformers", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "tuples", + "default_value": "", + "description": "Tuples of the form (transformer, columns) specifying the\ntransformer objects to be applied to subsets of the data.\n\ntransformer : {'drop', 'passthrough'} or estimator\n Estimator must support :term:`fit` and :term:`transform`.\n Special-cased strings 'drop' and 'passthrough' are accepted as\n well, to indicate to drop the columns or to pass them through\n untransformed, respectively.\ncolumns : str, array-like of str, int, array-like of int, slice, array-like of bool or callable\n Indexes the data on its second axis. Integers are interpreted as\n positional columns, while strings can reference DataFrame columns\n by name. A scalar string or int should be used where\n ``transformer`` expects X to be a 1d array-like (vector),\n otherwise a 2d array will be passed to the transformer.\n A callable is passed the input data `X` and can return any of the\n above. To select multiple columns by name or dtype, you can use\n :obj:`make_column_selector`." + }, + "type": { + "kind": "NamedType", + "name": "tuples" + } + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/make_column_transformer/remainder", + "name": "remainder", + "qname": "sklearn.compose._column_transformer.make_column_transformer.remainder", + "default_value": "'drop'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'drop', 'passthrough'} or estimator", + "default_value": "'drop'", + "description": "By default, only the specified columns in `transformers` are\ntransformed and combined in the output, and the non-specified\ncolumns are dropped. (default of ``'drop'``).\nBy specifying ``remainder='passthrough'``, all remaining columns that\nwere not specified in `transformers` will be automatically passed\nthrough. This subset of columns is concatenated with the output of\nthe transformers.\nBy setting ``remainder`` to be an estimator, the remaining\nnon-specified columns will use the ``remainder`` estimator. The\nestimator must support :term:`fit` and :term:`transform`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["passthrough", "drop"] + }, + { + "kind": "NamedType", + "name": "estimator" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/make_column_transformer/sparse_threshold", + "name": "sparse_threshold", + "qname": "sklearn.compose._column_transformer.make_column_transformer.sparse_threshold", + "default_value": "0.3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.3", + "description": "If the transformed output consists of a mix of sparse and dense data,\nit will be stacked as a sparse matrix if the density is lower than this\nvalue. Use ``sparse_threshold=0`` to always return dense.\nWhen the transformed output consists of all sparse or all dense data,\nthe stacked result will be sparse or dense, respectively, and this\nkeyword will be ignored." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/make_column_transformer/n_jobs", + "name": "n_jobs", + "qname": "sklearn.compose._column_transformer.make_column_transformer.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of jobs to run in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.compose._column_transformer/make_column_transformer/verbose", + "name": "verbose", + "qname": "sklearn.compose._column_transformer.make_column_transformer.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the time elapsed while fitting each transformer will be\nprinted as it is completed." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Construct a ColumnTransformer from the given transformers.\n\nThis is a shorthand for the ColumnTransformer constructor; it does not\nrequire, and does not permit, naming the transformers. Instead, they will\nbe given names automatically based on their types. It also does not allow\nweighting with ``transformer_weights``.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Construct a ColumnTransformer from the given transformers.\n\nThis is a shorthand for the ColumnTransformer constructor; it does not\nrequire, and does not permit, naming the transformers. Instead, they will\nbe given names automatically based on their types. It also does not allow\nweighting with ``transformer_weights``.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n*transformers : tuples\n Tuples of the form (transformer, columns) specifying the\n transformer objects to be applied to subsets of the data.\n\n transformer : {'drop', 'passthrough'} or estimator\n Estimator must support :term:`fit` and :term:`transform`.\n Special-cased strings 'drop' and 'passthrough' are accepted as\n well, to indicate to drop the columns or to pass them through\n untransformed, respectively.\n columns : str, array-like of str, int, array-like of int, slice, array-like of bool or callable\n Indexes the data on its second axis. Integers are interpreted as\n positional columns, while strings can reference DataFrame columns\n by name. A scalar string or int should be used where\n ``transformer`` expects X to be a 1d array-like (vector),\n otherwise a 2d array will be passed to the transformer.\n A callable is passed the input data `X` and can return any of the\n above. To select multiple columns by name or dtype, you can use\n :obj:`make_column_selector`.\n\nremainder : {'drop', 'passthrough'} or estimator, default='drop'\n By default, only the specified columns in `transformers` are\n transformed and combined in the output, and the non-specified\n columns are dropped. (default of ``'drop'``).\n By specifying ``remainder='passthrough'``, all remaining columns that\n were not specified in `transformers` will be automatically passed\n through. This subset of columns is concatenated with the output of\n the transformers.\n By setting ``remainder`` to be an estimator, the remaining\n non-specified columns will use the ``remainder`` estimator. The\n estimator must support :term:`fit` and :term:`transform`.\n\nsparse_threshold : float, default=0.3\n If the transformed output consists of a mix of sparse and dense data,\n it will be stacked as a sparse matrix if the density is lower than this\n value. Use ``sparse_threshold=0`` to always return dense.\n When the transformed output consists of all sparse or all dense data,\n the stacked result will be sparse or dense, respectively, and this\n keyword will be ignored.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : bool, default=False\n If True, the time elapsed while fitting each transformer will be\n printed as it is completed.\n\nReturns\n-------\nct : ColumnTransformer\n\nSee Also\n--------\nColumnTransformer : Class that allows combining the\n outputs of multiple transformer objects used on column subsets\n of the data into a single feature space.\n\nExamples\n--------\n>>> from sklearn.preprocessing import StandardScaler, OneHotEncoder\n>>> from sklearn.compose import make_column_transformer\n>>> make_column_transformer(\n... (StandardScaler(), ['numerical_column']),\n... (OneHotEncoder(), ['categorical_column']))\nColumnTransformer(transformers=[('standardscaler', StandardScaler(...),\n ['numerical_column']),\n ('onehotencoder', OneHotEncoder(...),\n ['categorical_column'])])", + "code": "def make_column_transformer(*transformers,\n remainder='drop',\n sparse_threshold=0.3,\n n_jobs=None,\n verbose=False):\n \"\"\"Construct a ColumnTransformer from the given transformers.\n\n This is a shorthand for the ColumnTransformer constructor; it does not\n require, and does not permit, naming the transformers. Instead, they will\n be given names automatically based on their types. It also does not allow\n weighting with ``transformer_weights``.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n *transformers : tuples\n Tuples of the form (transformer, columns) specifying the\n transformer objects to be applied to subsets of the data.\n\n transformer : {'drop', 'passthrough'} or estimator\n Estimator must support :term:`fit` and :term:`transform`.\n Special-cased strings 'drop' and 'passthrough' are accepted as\n well, to indicate to drop the columns or to pass them through\n untransformed, respectively.\n columns : str, array-like of str, int, array-like of int, slice, \\\n array-like of bool or callable\n Indexes the data on its second axis. Integers are interpreted as\n positional columns, while strings can reference DataFrame columns\n by name. A scalar string or int should be used where\n ``transformer`` expects X to be a 1d array-like (vector),\n otherwise a 2d array will be passed to the transformer.\n A callable is passed the input data `X` and can return any of the\n above. To select multiple columns by name or dtype, you can use\n :obj:`make_column_selector`.\n\n remainder : {'drop', 'passthrough'} or estimator, default='drop'\n By default, only the specified columns in `transformers` are\n transformed and combined in the output, and the non-specified\n columns are dropped. (default of ``'drop'``).\n By specifying ``remainder='passthrough'``, all remaining columns that\n were not specified in `transformers` will be automatically passed\n through. This subset of columns is concatenated with the output of\n the transformers.\n By setting ``remainder`` to be an estimator, the remaining\n non-specified columns will use the ``remainder`` estimator. The\n estimator must support :term:`fit` and :term:`transform`.\n\n sparse_threshold : float, default=0.3\n If the transformed output consists of a mix of sparse and dense data,\n it will be stacked as a sparse matrix if the density is lower than this\n value. Use ``sparse_threshold=0`` to always return dense.\n When the transformed output consists of all sparse or all dense data,\n the stacked result will be sparse or dense, respectively, and this\n keyword will be ignored.\n\n n_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n verbose : bool, default=False\n If True, the time elapsed while fitting each transformer will be\n printed as it is completed.\n\n Returns\n -------\n ct : ColumnTransformer\n\n See Also\n --------\n ColumnTransformer : Class that allows combining the\n outputs of multiple transformer objects used on column subsets\n of the data into a single feature space.\n\n Examples\n --------\n >>> from sklearn.preprocessing import StandardScaler, OneHotEncoder\n >>> from sklearn.compose import make_column_transformer\n >>> make_column_transformer(\n ... (StandardScaler(), ['numerical_column']),\n ... (OneHotEncoder(), ['categorical_column']))\n ColumnTransformer(transformers=[('standardscaler', StandardScaler(...),\n ['numerical_column']),\n ('onehotencoder', OneHotEncoder(...),\n ['categorical_column'])])\n\n \"\"\"\n # transformer_weights keyword is not passed through because the user\n # would need to know the automatically generated names of the transformers\n transformer_list = _get_transformer_list(transformers)\n return ColumnTransformer(transformer_list, n_jobs=n_jobs,\n remainder=remainder,\n sparse_threshold=sparse_threshold,\n verbose=verbose)" + }, + { + "id": "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/__init__", + "name": "__init__", + "qname": "sklearn.compose._target.TransformedTargetRegressor.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/__init__/self", + "name": "self", + "qname": "sklearn.compose._target.TransformedTargetRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/__init__/regressor", + "name": "regressor", + "qname": "sklearn.compose._target.TransformedTargetRegressor.__init__.regressor", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "None", + "description": "Regressor object such as derived from ``RegressorMixin``. This\nregressor will automatically be cloned each time prior to fitting.\nIf regressor is ``None``, ``LinearRegression()`` is created and used." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/__init__/transformer", + "name": "transformer", + "qname": "sklearn.compose._target.TransformedTargetRegressor.__init__.transformer", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "None", + "description": "Estimator object such as derived from ``TransformerMixin``. Cannot be\nset at the same time as ``func`` and ``inverse_func``. If\n``transformer`` is ``None`` as well as ``func`` and ``inverse_func``,\nthe transformer will be an identity transformer. Note that the\ntransformer will be cloned during fitting. Also, the transformer is\nrestricting ``y`` to be a numpy array." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/__init__/func", + "name": "func", + "qname": "sklearn.compose._target.TransformedTargetRegressor.__init__.func", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "function", + "default_value": "None", + "description": "Function to apply to ``y`` before passing to ``fit``. Cannot be set at\nthe same time as ``transformer``. The function needs to return a\n2-dimensional array. If ``func`` is ``None``, the function used will be\nthe identity function." + }, + "type": { + "kind": "NamedType", + "name": "function" + } + }, + { + "id": "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/__init__/inverse_func", + "name": "inverse_func", + "qname": "sklearn.compose._target.TransformedTargetRegressor.__init__.inverse_func", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "function", + "default_value": "None", + "description": "Function to apply to the prediction of the regressor. Cannot be set at\nthe same time as ``transformer`` as well. The function needs to return\na 2-dimensional array. The inverse function is used to return\npredictions to the same space of the original training labels." + }, + "type": { + "kind": "NamedType", + "name": "function" + } + }, + { + "id": "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/__init__/check_inverse", + "name": "check_inverse", + "qname": "sklearn.compose._target.TransformedTargetRegressor.__init__.check_inverse", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to check that ``transform`` followed by ``inverse_transform``\nor ``func`` followed by ``inverse_func`` leads to the original targets." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Meta-estimator to regress on a transformed target.\n\nUseful for applying a non-linear transformation to the target ``y`` in\nregression problems. This transformation can be given as a Transformer\nsuch as the QuantileTransformer or as a function and its inverse such as\n``log`` and ``exp``.\n\nThe computation during ``fit`` is::\n\n regressor.fit(X, func(y))\n\nor::\n\n regressor.fit(X, transformer.transform(y))\n\nThe computation during ``predict`` is::\n\n inverse_func(regressor.predict(X))\n\nor::\n\n transformer.inverse_transform(regressor.predict(X))\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, regressor=None, *, transformer=None,\n func=None, inverse_func=None, check_inverse=True):\n self.regressor = regressor\n self.transformer = transformer\n self.func = func\n self.inverse_func = inverse_func\n self.check_inverse = check_inverse" + }, + { + "id": "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/_fit_transformer", + "name": "_fit_transformer", + "qname": "sklearn.compose._target.TransformedTargetRegressor._fit_transformer", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/_fit_transformer/self", + "name": "self", + "qname": "sklearn.compose._target.TransformedTargetRegressor._fit_transformer.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/_fit_transformer/y", + "name": "y", + "qname": "sklearn.compose._target.TransformedTargetRegressor._fit_transformer.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check transformer and fit transformer.\n\nCreate the default transformer, fit it and make additional inverse\ncheck on a subset (optional).", + "docstring": "Check transformer and fit transformer.\n\nCreate the default transformer, fit it and make additional inverse\ncheck on a subset (optional).", + "code": " def _fit_transformer(self, y):\n \"\"\"Check transformer and fit transformer.\n\n Create the default transformer, fit it and make additional inverse\n check on a subset (optional).\n\n \"\"\"\n if (self.transformer is not None and\n (self.func is not None or self.inverse_func is not None)):\n raise ValueError(\"'transformer' and functions 'func'/\"\n \"'inverse_func' cannot both be set.\")\n elif self.transformer is not None:\n self.transformer_ = clone(self.transformer)\n else:\n if self.func is not None and self.inverse_func is None:\n raise ValueError(\"When 'func' is provided, 'inverse_func' must\"\n \" also be provided\")\n self.transformer_ = FunctionTransformer(\n func=self.func, inverse_func=self.inverse_func, validate=True,\n check_inverse=self.check_inverse)\n # XXX: sample_weight is not currently passed to the\n # transformer. However, if transformer starts using sample_weight, the\n # code should be modified accordingly. At the time to consider the\n # sample_prop feature, it is also a good use case to be considered.\n self.transformer_.fit(y)\n if self.check_inverse:\n idx_selected = slice(None, None, max(1, y.shape[0] // 10))\n y_sel = _safe_indexing(y, idx_selected)\n y_sel_t = self.transformer_.transform(y_sel)\n if not np.allclose(y_sel,\n self.transformer_.inverse_transform(y_sel_t)):\n warnings.warn(\"The provided functions or transformer are\"\n \" not strictly inverse of each other. If\"\n \" you are sure you want to proceed regardless\"\n \", set 'check_inverse=False'\", UserWarning)" + }, + { + "id": "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/_more_tags", + "name": "_more_tags", + "qname": "sklearn.compose._target.TransformedTargetRegressor._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/_more_tags/self", + "name": "self", + "qname": "sklearn.compose._target.TransformedTargetRegressor._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'poor_score': True, 'no_validation': True}" + }, + { + "id": "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/fit", + "name": "fit", + "qname": "sklearn.compose._target.TransformedTargetRegressor.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/fit/self", + "name": "self", + "qname": "sklearn.compose._target.TransformedTargetRegressor.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/fit/X", + "name": "X", + "qname": "sklearn.compose._target.TransformedTargetRegressor.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/fit/y", + "name": "y", + "qname": "sklearn.compose._target.TransformedTargetRegressor.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/fit/fit_params", + "name": "fit_params", + "qname": "sklearn.compose._target.TransformedTargetRegressor.fit.fit_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "", + "description": "Parameters passed to the ``fit`` method of the underlying\nregressor." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model according to the given training data.", + "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\n**fit_params : dict\n Parameters passed to the ``fit`` method of the underlying\n regressor.\n\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y, **fit_params):\n \"\"\"Fit the model according to the given training data.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n **fit_params : dict\n Parameters passed to the ``fit`` method of the underlying\n regressor.\n\n\n Returns\n -------\n self : object\n \"\"\"\n y = check_array(y, accept_sparse=False, force_all_finite=True,\n ensure_2d=False, dtype='numeric')\n\n # store the number of dimension of the target to predict an array of\n # similar shape at predict\n self._training_dim = y.ndim\n\n # transformers are designed to modify X which is 2d dimensional, we\n # need to modify y accordingly.\n if y.ndim == 1:\n y_2d = y.reshape(-1, 1)\n else:\n y_2d = y\n self._fit_transformer(y_2d)\n\n # transform y and convert back to 1d array if needed\n y_trans = self.transformer_.transform(y_2d)\n # FIXME: a FunctionTransformer can return a 1D array even when validate\n # is set to True. Therefore, we need to check the number of dimension\n # first.\n if y_trans.ndim == 2 and y_trans.shape[1] == 1:\n y_trans = y_trans.squeeze(axis=1)\n\n if self.regressor is None:\n from ..linear_model import LinearRegression\n self.regressor_ = LinearRegression()\n else:\n self.regressor_ = clone(self.regressor)\n\n self.regressor_.fit(X, y_trans, **fit_params)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/n_features_in_@getter", + "name": "n_features_in_", + "qname": "sklearn.compose._target.TransformedTargetRegressor.n_features_in_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/n_features_in_/self", + "name": "self", + "qname": "sklearn.compose._target.TransformedTargetRegressor.n_features_in_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def n_features_in_(self):\n # For consistency with other estimators we raise a AttributeError so\n # that hasattr() returns False the estimator isn't fitted.\n try:\n check_is_fitted(self)\n except NotFittedError as nfe:\n raise AttributeError(\n \"{} object has no n_features_in_ attribute.\"\n .format(self.__class__.__name__)\n ) from nfe\n\n return self.regressor_.n_features_in_" + }, + { + "id": "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/predict", + "name": "predict", + "qname": "sklearn.compose._target.TransformedTargetRegressor.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/predict/self", + "name": "self", + "qname": "sklearn.compose._target.TransformedTargetRegressor.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.compose._target/TransformedTargetRegressor/predict/X", + "name": "X", + "qname": "sklearn.compose._target.TransformedTargetRegressor.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict using the base regressor, applying inverse.\n\nThe regressor is used to predict and the ``inverse_func`` or\n``inverse_transform`` is applied before returning the prediction.", + "docstring": "Predict using the base regressor, applying inverse.\n\nThe regressor is used to predict and the ``inverse_func`` or\n``inverse_transform`` is applied before returning the prediction.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\nReturns\n-------\ny_hat : ndarray of shape (n_samples,)\n Predicted values.", + "code": " def predict(self, X):\n \"\"\"Predict using the base regressor, applying inverse.\n\n The regressor is used to predict and the ``inverse_func`` or\n ``inverse_transform`` is applied before returning the prediction.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\n Returns\n -------\n y_hat : ndarray of shape (n_samples,)\n Predicted values.\n\n \"\"\"\n check_is_fitted(self)\n pred = self.regressor_.predict(X)\n if pred.ndim == 1:\n pred_trans = self.transformer_.inverse_transform(\n pred.reshape(-1, 1))\n else:\n pred_trans = self.transformer_.inverse_transform(pred)\n if (self._training_dim == 1 and\n pred_trans.ndim == 2 and pred_trans.shape[1] == 1):\n pred_trans = pred_trans.squeeze(axis=1)\n\n return pred_trans" + }, + { + "id": "scikit-learn/sklearn.conftest/_fetch_fixture", + "name": "_fetch_fixture", + "qname": "sklearn.conftest._fetch_fixture", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.conftest/_fetch_fixture/f", + "name": "f", + "qname": "sklearn.conftest._fetch_fixture.f", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fetch dataset (download if missing and requested by environment).", + "docstring": "Fetch dataset (download if missing and requested by environment).", + "code": "def _fetch_fixture(f):\n \"\"\"Fetch dataset (download if missing and requested by environment).\"\"\"\n download_if_missing = environ.get('SKLEARN_SKIP_NETWORK_TESTS', '1') == '0'\n\n @wraps(f)\n def wrapped(*args, **kwargs):\n kwargs['download_if_missing'] = download_if_missing\n try:\n return f(*args, **kwargs)\n except IOError:\n pytest.skip(\"test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0\")\n return pytest.fixture(lambda: wrapped)" + }, + { + "id": "scikit-learn/sklearn.conftest/pyplot", + "name": "pyplot", + "qname": "sklearn.conftest.pyplot", + "decorators": ["pytest.fixture(scope='function')"], + "parameters": [], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Setup and teardown fixture for matplotlib.\n\nThis fixture checks if we can import matplotlib. If not, the tests will be\nskipped. Otherwise, we setup matplotlib backend and close the figures\nafter running the functions.", + "docstring": "Setup and teardown fixture for matplotlib.\n\nThis fixture checks if we can import matplotlib. If not, the tests will be\nskipped. Otherwise, we setup matplotlib backend and close the figures\nafter running the functions.\n\nReturns\n-------\npyplot : module\n The ``matplotlib.pyplot`` module.", + "code": "@pytest.fixture(scope='function')\ndef pyplot():\n \"\"\"Setup and teardown fixture for matplotlib.\n\n This fixture checks if we can import matplotlib. If not, the tests will be\n skipped. Otherwise, we setup matplotlib backend and close the figures\n after running the functions.\n\n Returns\n -------\n pyplot : module\n The ``matplotlib.pyplot`` module.\n \"\"\"\n matplotlib = pytest.importorskip('matplotlib')\n matplotlib.use('agg')\n pyplot = pytest.importorskip('matplotlib.pyplot')\n yield pyplot\n pyplot.close('all')" + }, + { + "id": "scikit-learn/sklearn.conftest/pytest_collection_modifyitems", + "name": "pytest_collection_modifyitems", + "qname": "sklearn.conftest.pytest_collection_modifyitems", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.conftest/pytest_collection_modifyitems/config", + "name": "config", + "qname": "sklearn.conftest.pytest_collection_modifyitems.config", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "pytest config", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "pytest config" + } + }, + { + "id": "scikit-learn/sklearn.conftest/pytest_collection_modifyitems/items", + "name": "items", + "qname": "sklearn.conftest.pytest_collection_modifyitems.items", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "list of collected items", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "list of collected items" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Called after collect is completed.", + "docstring": "Called after collect is completed.\n\nParameters\n----------\nconfig : pytest config\nitems : list of collected items", + "code": "def pytest_collection_modifyitems(config, items):\n \"\"\"Called after collect is completed.\n\n Parameters\n ----------\n config : pytest config\n items : list of collected items\n \"\"\"\n run_network_tests = environ.get('SKLEARN_SKIP_NETWORK_TESTS', '1') == '0'\n skip_network = pytest.mark.skip(\n reason=\"test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0\")\n\n # download datasets during collection to avoid thread unsafe behavior\n # when running pytest in parallel with pytest-xdist\n dataset_features_set = set(dataset_fetchers)\n datasets_to_download = set()\n\n for item in items:\n if not hasattr(item, \"fixturenames\"):\n continue\n item_fixtures = set(item.fixturenames)\n dataset_to_fetch = item_fixtures & dataset_features_set\n if not dataset_to_fetch:\n continue\n\n if run_network_tests:\n datasets_to_download |= dataset_to_fetch\n else:\n # network tests are skipped\n item.add_marker(skip_network)\n\n # Only download datasets on the first worker spawned by pytest-xdist\n # to avoid thread unsafe behavior. If pytest-xdist is not used, we still\n # download before tests run.\n worker_id = environ.get(\"PYTEST_XDIST_WORKER\", \"gw0\")\n if worker_id == \"gw0\" and run_network_tests:\n for name in datasets_to_download:\n dataset_fetchers[name]()" + }, + { + "id": "scikit-learn/sklearn.conftest/pytest_runtest_setup", + "name": "pytest_runtest_setup", + "qname": "sklearn.conftest.pytest_runtest_setup", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.conftest/pytest_runtest_setup/item", + "name": "item", + "qname": "sklearn.conftest.pytest_runtest_setup.item", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "pytest item", + "default_value": "", + "description": "item to be processed" + }, + "type": { + "kind": "NamedType", + "name": "pytest item" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Set the number of openmp threads based on the number of workers\nxdist is using to prevent oversubscription.", + "docstring": "Set the number of openmp threads based on the number of workers\nxdist is using to prevent oversubscription.\n\nParameters\n----------\nitem : pytest item\n item to be processed", + "code": "def pytest_runtest_setup(item):\n \"\"\"Set the number of openmp threads based on the number of workers\n xdist is using to prevent oversubscription.\n\n Parameters\n ----------\n item : pytest item\n item to be processed\n \"\"\"\n try:\n xdist_worker_count = int(os.environ['PYTEST_XDIST_WORKER_COUNT'])\n except KeyError:\n # raises when pytest-xdist is not installed\n return\n\n openmp_threads = _openmp_effective_n_threads()\n threads_per_worker = max(openmp_threads // xdist_worker_count, 1)\n threadpool_limits(threads_per_worker, user_api='openmp')" + }, + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/__init__", + "name": "__init__", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/__init__/self", + "name": "self", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/__init__/store_precision", + "name": "store_precision", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.__init__.store_precision", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Specify if the estimated precision is stored." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/__init__/assume_centered", + "name": "assume_centered", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.__init__.assume_centered", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the support of robust location and covariance estimates\nis computed, and a covariance estimate is recomputed from it,\nwithout centering the data.\nUseful to work with data whose mean is significantly equal to\nzero but is not exactly zero.\nIf False, the robust location and covariance are directly computed\nwith the FastMCD algorithm without additional treatment." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/__init__/support_fraction", + "name": "support_fraction", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.__init__.support_fraction", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "The proportion of points to be included in the support of the raw\nMCD estimate. If None, the minimum value of support_fraction will\nbe used within the algorithm: `[n_sample + n_features + 1] / 2`.\nRange is (0, 1)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/__init__/contamination", + "name": "contamination", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.__init__.contamination", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "The amount of contamination of the data set, i.e. the proportion\nof outliers in the data set. Range is (0, 0.5)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/__init__/random_state", + "name": "random_state", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines the pseudo random number generator for shuffling\nthe data. Pass an int for reproducible results across multiple function\ncalls. See :term: `Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "An object for detecting outliers in a Gaussian distributed dataset.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, store_precision=True, assume_centered=False,\n support_fraction=None, contamination=0.1,\n random_state=None):\n super().__init__(\n store_precision=store_precision,\n assume_centered=assume_centered,\n support_fraction=support_fraction,\n random_state=random_state)\n self.contamination = contamination" + }, + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/decision_function", + "name": "decision_function", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/decision_function/self", + "name": "self", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/decision_function/X", + "name": "X", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data matrix." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the decision function of the given observations.", + "docstring": "Compute the decision function of the given observations.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix.\n\nReturns\n-------\ndecision : ndarray of shape (n_samples,)\n Decision function of the samples.\n It is equal to the shifted Mahalanobis distances.\n The threshold for being an outlier is 0, which ensures a\n compatibility with other outlier detection algorithms.", + "code": " def decision_function(self, X):\n \"\"\"Compute the decision function of the given observations.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data matrix.\n\n Returns\n -------\n decision : ndarray of shape (n_samples,)\n Decision function of the samples.\n It is equal to the shifted Mahalanobis distances.\n The threshold for being an outlier is 0, which ensures a\n compatibility with other outlier detection algorithms.\n \"\"\"\n check_is_fitted(self)\n negative_mahal_dist = self.score_samples(X)\n return negative_mahal_dist - self.offset_" + }, + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/fit", + "name": "fit", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/fit/self", + "name": "self", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/fit/X", + "name": "X", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/fit/y", + "name": "y", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the EllipticEnvelope model.", + "docstring": "Fit the EllipticEnvelope model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : Ignored\n Not used, present for API consistency by convention.", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the EllipticEnvelope model.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\n y : Ignored\n Not used, present for API consistency by convention.\n \"\"\"\n super().fit(X)\n self.offset_ = np.percentile(-self.dist_, 100. * self.contamination)\n return self" + }, + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/predict", + "name": "predict", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/predict/self", + "name": "self", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/predict/X", + "name": "X", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data matrix." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict the labels (1 inlier, -1 outlier) of X according to the\nfitted model.", + "docstring": "Predict the labels (1 inlier, -1 outlier) of X according to the\nfitted model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and +1 for inliers.", + "code": " def predict(self, X):\n \"\"\"\n Predict the labels (1 inlier, -1 outlier) of X according to the\n fitted model.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data matrix.\n\n Returns\n -------\n is_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and +1 for inliers.\n \"\"\"\n X = check_array(X)\n is_inlier = np.full(X.shape[0], -1, dtype=int)\n values = self.decision_function(X)\n is_inlier[values >= 0] = 1\n\n return is_inlier" + }, + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/score", + "name": "score", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/score/self", + "name": "self", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/score/X", + "name": "X", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Test samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/score/y", + "name": "y", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "True labels for X." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.score.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns the mean accuracy on the given test data and labels.\n\nIn multi-label classification, this is the subset accuracy\nwhich is a harsh metric since you require for each sample that\neach label set be correctly predicted.", + "docstring": "Returns the mean accuracy on the given test data and labels.\n\nIn multi-label classification, this is the subset accuracy\nwhich is a harsh metric since you require for each sample that\neach label set be correctly predicted.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test samples.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True labels for X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Mean accuracy of self.predict(X) w.r.t. y.", + "code": " def score(self, X, y, sample_weight=None):\n \"\"\"Returns the mean accuracy on the given test data and labels.\n\n In multi-label classification, this is the subset accuracy\n which is a harsh metric since you require for each sample that\n each label set be correctly predicted.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Test samples.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True labels for X.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n score : float\n Mean accuracy of self.predict(X) w.r.t. y.\n \"\"\"\n return accuracy_score(y, self.predict(X), sample_weight=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/score_samples", + "name": "score_samples", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.score_samples", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/score_samples/self", + "name": "self", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.score_samples.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._elliptic_envelope/EllipticEnvelope/score_samples/X", + "name": "X", + "qname": "sklearn.covariance._elliptic_envelope.EllipticEnvelope.score_samples.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data matrix." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the negative Mahalanobis distances.", + "docstring": "Compute the negative Mahalanobis distances.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix.\n\nReturns\n-------\nnegative_mahal_distances : array-like of shape (n_samples,)\n Opposite of the Mahalanobis distances.", + "code": " def score_samples(self, X):\n \"\"\"Compute the negative Mahalanobis distances.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data matrix.\n\n Returns\n -------\n negative_mahal_distances : array-like of shape (n_samples,)\n Opposite of the Mahalanobis distances.\n \"\"\"\n check_is_fitted(self)\n return -self.mahalanobis(X)" + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/__init__", + "name": "__init__", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/__init__/self", + "name": "self", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/__init__/store_precision", + "name": "store_precision", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance.__init__.store_precision", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Specifies if the estimated precision is stored." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/__init__/assume_centered", + "name": "assume_centered", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance.__init__.assume_centered", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, data are not centered before computation.\nUseful when working with data whose mean is almost, but not exactly\nzero.\nIf False (default), data are centered before computation." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Maximum likelihood covariance estimator\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, store_precision=True, assume_centered=False):\n self.store_precision = store_precision\n self.assume_centered = assume_centered" + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/_set_covariance", + "name": "_set_covariance", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance._set_covariance", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/_set_covariance/self", + "name": "self", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance._set_covariance.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/_set_covariance/covariance", + "name": "covariance", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance._set_covariance.covariance", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_features, n_features)", + "default_value": "", + "description": "Estimated covariance matrix to be stored, and from which precision\nis computed." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_features, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Saves the covariance and precision estimates\n\nStorage is done accordingly to `self.store_precision`.\nPrecision stored only if invertible.", + "docstring": "Saves the covariance and precision estimates\n\nStorage is done accordingly to `self.store_precision`.\nPrecision stored only if invertible.\n\nParameters\n----------\ncovariance : array-like of shape (n_features, n_features)\n Estimated covariance matrix to be stored, and from which precision\n is computed.", + "code": " def _set_covariance(self, covariance):\n \"\"\"Saves the covariance and precision estimates\n\n Storage is done accordingly to `self.store_precision`.\n Precision stored only if invertible.\n\n Parameters\n ----------\n covariance : array-like of shape (n_features, n_features)\n Estimated covariance matrix to be stored, and from which precision\n is computed.\n \"\"\"\n covariance = check_array(covariance)\n # set covariance\n self.covariance_ = covariance\n # set precision\n if self.store_precision:\n self.precision_ = linalg.pinvh(covariance, check_finite=False)\n else:\n self.precision_ = None" + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/error_norm", + "name": "error_norm", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance.error_norm", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/error_norm/self", + "name": "self", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance.error_norm.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/error_norm/comp_cov", + "name": "comp_cov", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance.error_norm.comp_cov", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_features, n_features)", + "default_value": "", + "description": "The covariance to compare with." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_features, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/error_norm/norm", + "name": "norm", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance.error_norm.norm", + "default_value": "'frobenius'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{\"frobenius\", \"spectral\"}", + "default_value": "\"frobenius\"", + "description": "The type of norm used to compute the error. Available error types:\n- 'frobenius' (default): sqrt(tr(A^t.A))\n- 'spectral': sqrt(max(eigenvalues(A^t.A))\nwhere A is the error ``(comp_cov - self.covariance_)``." + }, + "type": { + "kind": "EnumType", + "values": ["spectral", "frobenius"] + } + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/error_norm/scaling", + "name": "scaling", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance.error_norm.scaling", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True (default), the squared error norm is divided by n_features.\nIf False, the squared error norm is not rescaled." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/error_norm/squared", + "name": "squared", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance.error_norm.squared", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to compute the squared error norm or the error norm.\nIf True (default), the squared error norm is returned.\nIf False, the error norm is returned." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes the Mean Squared Error between two covariance estimators.\n(In the sense of the Frobenius norm).", + "docstring": "Computes the Mean Squared Error between two covariance estimators.\n(In the sense of the Frobenius norm).\n\nParameters\n----------\ncomp_cov : array-like of shape (n_features, n_features)\n The covariance to compare with.\n\nnorm : {\"frobenius\", \"spectral\"}, default=\"frobenius\"\n The type of norm used to compute the error. Available error types:\n - 'frobenius' (default): sqrt(tr(A^t.A))\n - 'spectral': sqrt(max(eigenvalues(A^t.A))\n where A is the error ``(comp_cov - self.covariance_)``.\n\nscaling : bool, default=True\n If True (default), the squared error norm is divided by n_features.\n If False, the squared error norm is not rescaled.\n\nsquared : bool, default=True\n Whether to compute the squared error norm or the error norm.\n If True (default), the squared error norm is returned.\n If False, the error norm is returned.\n\nReturns\n-------\nresult : float\n The Mean Squared Error (in the sense of the Frobenius norm) between\n `self` and `comp_cov` covariance estimators.", + "code": " def error_norm(self, comp_cov, norm='frobenius', scaling=True,\n squared=True):\n \"\"\"Computes the Mean Squared Error between two covariance estimators.\n (In the sense of the Frobenius norm).\n\n Parameters\n ----------\n comp_cov : array-like of shape (n_features, n_features)\n The covariance to compare with.\n\n norm : {\"frobenius\", \"spectral\"}, default=\"frobenius\"\n The type of norm used to compute the error. Available error types:\n - 'frobenius' (default): sqrt(tr(A^t.A))\n - 'spectral': sqrt(max(eigenvalues(A^t.A))\n where A is the error ``(comp_cov - self.covariance_)``.\n\n scaling : bool, default=True\n If True (default), the squared error norm is divided by n_features.\n If False, the squared error norm is not rescaled.\n\n squared : bool, default=True\n Whether to compute the squared error norm or the error norm.\n If True (default), the squared error norm is returned.\n If False, the error norm is returned.\n\n Returns\n -------\n result : float\n The Mean Squared Error (in the sense of the Frobenius norm) between\n `self` and `comp_cov` covariance estimators.\n \"\"\"\n # compute the error\n error = comp_cov - self.covariance_\n # compute the error norm\n if norm == \"frobenius\":\n squared_norm = np.sum(error ** 2)\n elif norm == \"spectral\":\n squared_norm = np.amax(linalg.svdvals(np.dot(error.T, error)))\n else:\n raise NotImplementedError(\n \"Only spectral and frobenius norms are implemented\")\n # optionally scale the error norm\n if scaling:\n squared_norm = squared_norm / error.shape[0]\n # finally get either the squared norm or the norm\n if squared:\n result = squared_norm\n else:\n result = np.sqrt(squared_norm)\n\n return result" + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/fit", + "name": "fit", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/fit/self", + "name": "self", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/fit/X", + "name": "X", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/fit/y", + "name": "y", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fits the Maximum Likelihood Estimator covariance model\naccording to the given training data and parameters.", + "docstring": "Fits the Maximum Likelihood Estimator covariance model\naccording to the given training data and parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y=None):\n \"\"\"Fits the Maximum Likelihood Estimator covariance model\n according to the given training data and parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n self : object\n \"\"\"\n X = self._validate_data(X)\n if self.assume_centered:\n self.location_ = np.zeros(X.shape[1])\n else:\n self.location_ = X.mean(0)\n covariance = empirical_covariance(\n X, assume_centered=self.assume_centered)\n self._set_covariance(covariance)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/get_precision", + "name": "get_precision", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance.get_precision", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/get_precision/self", + "name": "self", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance.get_precision.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Getter for the precision matrix.", + "docstring": "Getter for the precision matrix.\n\nReturns\n-------\nprecision_ : array-like of shape (n_features, n_features)\n The precision matrix associated to the current covariance object.", + "code": " def get_precision(self):\n \"\"\"Getter for the precision matrix.\n\n Returns\n -------\n precision_ : array-like of shape (n_features, n_features)\n The precision matrix associated to the current covariance object.\n \"\"\"\n if self.store_precision:\n precision = self.precision_\n else:\n precision = linalg.pinvh(self.covariance_, check_finite=False)\n return precision" + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/mahalanobis", + "name": "mahalanobis", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance.mahalanobis", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/mahalanobis/self", + "name": "self", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance.mahalanobis.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/mahalanobis/X", + "name": "X", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance.mahalanobis.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The observations, the Mahalanobis distances of the which we\ncompute. Observations are assumed to be drawn from the same\ndistribution than the data used in fit." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes the squared Mahalanobis distances of given observations.", + "docstring": "Computes the squared Mahalanobis distances of given observations.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The observations, the Mahalanobis distances of the which we\n compute. Observations are assumed to be drawn from the same\n distribution than the data used in fit.\n\nReturns\n-------\ndist : ndarray of shape (n_samples,)\n Squared Mahalanobis distances of the observations.", + "code": " def mahalanobis(self, X):\n \"\"\"Computes the squared Mahalanobis distances of given observations.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The observations, the Mahalanobis distances of the which we\n compute. Observations are assumed to be drawn from the same\n distribution than the data used in fit.\n\n Returns\n -------\n dist : ndarray of shape (n_samples,)\n Squared Mahalanobis distances of the observations.\n \"\"\"\n precision = self.get_precision()\n # compute mahalanobis distances\n dist = pairwise_distances(X, self.location_[np.newaxis, :],\n metric='mahalanobis', VI=precision)\n\n return np.reshape(dist, (len(X),)) ** 2" + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/score", + "name": "score", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance.score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/score/self", + "name": "self", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance.score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/score/X_test", + "name": "X_test", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance.score.X_test", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Test data of which we compute the likelihood, where n_samples is\nthe number of samples and n_features is the number of features.\nX_test is assumed to be drawn from the same distribution than\nthe data used in fit (including centering)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/EmpiricalCovariance/score/y", + "name": "y", + "qname": "sklearn.covariance._empirical_covariance.EmpiricalCovariance.score.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes the log-likelihood of a Gaussian data set with\n`self.covariance_` as an estimator of its covariance matrix.", + "docstring": "Computes the log-likelihood of a Gaussian data set with\n`self.covariance_` as an estimator of its covariance matrix.\n\nParameters\n----------\nX_test : array-like of shape (n_samples, n_features)\n Test data of which we compute the likelihood, where n_samples is\n the number of samples and n_features is the number of features.\n X_test is assumed to be drawn from the same distribution than\n the data used in fit (including centering).\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nres : float\n The likelihood of the data set with `self.covariance_` as an\n estimator of its covariance matrix.", + "code": " def score(self, X_test, y=None):\n \"\"\"Computes the log-likelihood of a Gaussian data set with\n `self.covariance_` as an estimator of its covariance matrix.\n\n Parameters\n ----------\n X_test : array-like of shape (n_samples, n_features)\n Test data of which we compute the likelihood, where n_samples is\n the number of samples and n_features is the number of features.\n X_test is assumed to be drawn from the same distribution than\n the data used in fit (including centering).\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n res : float\n The likelihood of the data set with `self.covariance_` as an\n estimator of its covariance matrix.\n \"\"\"\n # compute empirical covariance of the test set\n test_cov = empirical_covariance(\n X_test - self.location_, assume_centered=True)\n # compute log likelihood\n res = log_likelihood(test_cov, self.get_precision())\n\n return res" + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/empirical_covariance", + "name": "empirical_covariance", + "qname": "sklearn.covariance._empirical_covariance.empirical_covariance", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/empirical_covariance/X", + "name": "X", + "qname": "sklearn.covariance._empirical_covariance.empirical_covariance.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Data from which to compute the covariance estimate" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/empirical_covariance/assume_centered", + "name": "assume_centered", + "qname": "sklearn.covariance._empirical_covariance.empirical_covariance.assume_centered", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, data will not be centered before computation.\nUseful when working with data whose mean is almost, but not exactly\nzero.\nIf False, data will be centered before computation." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes the Maximum likelihood covariance estimator", + "docstring": "Computes the Maximum likelihood covariance estimator\n\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Data from which to compute the covariance estimate\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False, data will be centered before computation.\n\nReturns\n-------\ncovariance : ndarray of shape (n_features, n_features)\n Empirical covariance (Maximum Likelihood Estimator).\n\nExamples\n--------\n>>> from sklearn.covariance import empirical_covariance\n>>> X = [[1,1,1],[1,1,1],[1,1,1],\n... [0,0,0],[0,0,0],[0,0,0]]\n>>> empirical_covariance(X)\narray([[0.25, 0.25, 0.25],\n [0.25, 0.25, 0.25],\n [0.25, 0.25, 0.25]])", + "code": "@_deprecate_positional_args\ndef empirical_covariance(X, *, assume_centered=False):\n \"\"\"Computes the Maximum likelihood covariance estimator\n\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Data from which to compute the covariance estimate\n\n assume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False, data will be centered before computation.\n\n Returns\n -------\n covariance : ndarray of shape (n_features, n_features)\n Empirical covariance (Maximum Likelihood Estimator).\n\n Examples\n --------\n >>> from sklearn.covariance import empirical_covariance\n >>> X = [[1,1,1],[1,1,1],[1,1,1],\n ... [0,0,0],[0,0,0],[0,0,0]]\n >>> empirical_covariance(X)\n array([[0.25, 0.25, 0.25],\n [0.25, 0.25, 0.25],\n [0.25, 0.25, 0.25]])\n \"\"\"\n X = np.asarray(X)\n\n if X.ndim == 1:\n X = np.reshape(X, (1, -1))\n\n if X.shape[0] == 1:\n warnings.warn(\"Only one sample available. \"\n \"You may want to reshape your data array\")\n\n if assume_centered:\n covariance = np.dot(X.T, X) / X.shape[0]\n else:\n covariance = np.cov(X.T, bias=1)\n\n if covariance.ndim == 0:\n covariance = np.array([[covariance]])\n return covariance" + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/log_likelihood", + "name": "log_likelihood", + "qname": "sklearn.covariance._empirical_covariance.log_likelihood", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/log_likelihood/emp_cov", + "name": "emp_cov", + "qname": "sklearn.covariance._empirical_covariance.log_likelihood.emp_cov", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features, n_features)", + "default_value": "", + "description": "Maximum Likelihood Estimator of covariance." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._empirical_covariance/log_likelihood/precision", + "name": "precision", + "qname": "sklearn.covariance._empirical_covariance.log_likelihood.precision", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features, n_features)", + "default_value": "", + "description": "The precision matrix of the covariance model to be tested." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes the sample mean of the log_likelihood under a covariance model\n\ncomputes the empirical expected log-likelihood (accounting for the\nnormalization terms and scaling), allowing for universal comparison (beyond\nthis software package)", + "docstring": "Computes the sample mean of the log_likelihood under a covariance model\n\ncomputes the empirical expected log-likelihood (accounting for the\nnormalization terms and scaling), allowing for universal comparison (beyond\nthis software package)\n\nParameters\n----------\nemp_cov : ndarray of shape (n_features, n_features)\n Maximum Likelihood Estimator of covariance.\n\nprecision : ndarray of shape (n_features, n_features)\n The precision matrix of the covariance model to be tested.\n\nReturns\n-------\nlog_likelihood_ : float\n Sample mean of the log-likelihood.", + "code": "def log_likelihood(emp_cov, precision):\n \"\"\"Computes the sample mean of the log_likelihood under a covariance model\n\n computes the empirical expected log-likelihood (accounting for the\n normalization terms and scaling), allowing for universal comparison (beyond\n this software package)\n\n Parameters\n ----------\n emp_cov : ndarray of shape (n_features, n_features)\n Maximum Likelihood Estimator of covariance.\n\n precision : ndarray of shape (n_features, n_features)\n The precision matrix of the covariance model to be tested.\n\n Returns\n -------\n log_likelihood_ : float\n Sample mean of the log-likelihood.\n \"\"\"\n p = precision.shape[0]\n log_likelihood_ = - np.sum(emp_cov * precision) + fast_logdet(precision)\n log_likelihood_ -= p * np.log(2 * np.pi)\n log_likelihood_ /= 2.\n return log_likelihood_" + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLasso/__init__", + "name": "__init__", + "qname": "sklearn.covariance._graph_lasso.GraphicalLasso.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLasso/__init__/self", + "name": "self", + "qname": "sklearn.covariance._graph_lasso.GraphicalLasso.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLasso/__init__/alpha", + "name": "alpha", + "qname": "sklearn.covariance._graph_lasso.GraphicalLasso.__init__.alpha", + "default_value": "0.01", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.01", + "description": "The regularization parameter: the higher alpha, the more\nregularization, the sparser the inverse covariance.\nRange is (0, inf]." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLasso/__init__/mode", + "name": "mode", + "qname": "sklearn.covariance._graph_lasso.GraphicalLasso.__init__.mode", + "default_value": "'cd'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'cd', 'lars'}", + "default_value": "'cd'", + "description": "The Lasso solver to use: coordinate descent or LARS. Use LARS for\nvery sparse underlying graphs, where p > n. Elsewhere prefer cd\nwhich is more numerically stable." + }, + "type": { + "kind": "EnumType", + "values": ["cd", "lars"] + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLasso/__init__/tol", + "name": "tol", + "qname": "sklearn.covariance._graph_lasso.GraphicalLasso.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "The tolerance to declare convergence: if the dual gap goes below\nthis value, iterations are stopped. Range is (0, inf]." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLasso/__init__/enet_tol", + "name": "enet_tol", + "qname": "sklearn.covariance._graph_lasso.GraphicalLasso.__init__.enet_tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "The tolerance for the elastic net solver used to calculate the descent\ndirection. This parameter controls the accuracy of the search direction\nfor a given column update, not of the overall parameter estimate. Only\nused for mode='cd'. Range is (0, inf]." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLasso/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.covariance._graph_lasso.GraphicalLasso.__init__.max_iter", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The maximum number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLasso/__init__/verbose", + "name": "verbose", + "qname": "sklearn.covariance._graph_lasso.GraphicalLasso.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If verbose is True, the objective function and dual gap are\nplotted at each iteration." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLasso/__init__/assume_centered", + "name": "assume_centered", + "qname": "sklearn.covariance._graph_lasso.GraphicalLasso.__init__.assume_centered", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, data are not centered before computation.\nUseful when working with data whose mean is almost, but not exactly\nzero.\nIf False, data are centered before computation." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Sparse inverse covariance estimation with an l1-penalized estimator.\n\nRead more in the :ref:`User Guide `.\n\n.. versionchanged:: v0.20\n GraphLasso has been renamed to GraphicalLasso", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, alpha=.01, *, mode='cd', tol=1e-4, enet_tol=1e-4,\n max_iter=100, verbose=False, assume_centered=False):\n super().__init__(assume_centered=assume_centered)\n self.alpha = alpha\n self.mode = mode\n self.tol = tol\n self.enet_tol = enet_tol\n self.max_iter = max_iter\n self.verbose = verbose" + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLasso/fit", + "name": "fit", + "qname": "sklearn.covariance._graph_lasso.GraphicalLasso.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLasso/fit/self", + "name": "self", + "qname": "sklearn.covariance._graph_lasso.GraphicalLasso.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLasso/fit/X", + "name": "X", + "qname": "sklearn.covariance._graph_lasso.GraphicalLasso.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Data from which to compute the covariance estimate" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLasso/fit/y", + "name": "y", + "qname": "sklearn.covariance._graph_lasso.GraphicalLasso.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fits the GraphicalLasso model to X.", + "docstring": "Fits the GraphicalLasso model to X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data from which to compute the covariance estimate\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y=None):\n \"\"\"Fits the GraphicalLasso model to X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Data from which to compute the covariance estimate\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n self : object\n \"\"\"\n # Covariance does not make sense for a single feature\n X = self._validate_data(X, ensure_min_features=2, ensure_min_samples=2,\n estimator=self)\n\n if self.assume_centered:\n self.location_ = np.zeros(X.shape[1])\n else:\n self.location_ = X.mean(0)\n emp_cov = empirical_covariance(\n X, assume_centered=self.assume_centered)\n self.covariance_, self.precision_, self.n_iter_ = graphical_lasso(\n emp_cov, alpha=self.alpha, mode=self.mode, tol=self.tol,\n enet_tol=self.enet_tol, max_iter=self.max_iter,\n verbose=self.verbose, return_n_iter=True)\n return self" + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV/__init__", + "name": "__init__", + "qname": "sklearn.covariance._graph_lasso.GraphicalLassoCV.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV/__init__/self", + "name": "self", + "qname": "sklearn.covariance._graph_lasso.GraphicalLassoCV.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV/__init__/alphas", + "name": "alphas", + "qname": "sklearn.covariance._graph_lasso.GraphicalLassoCV.__init__.alphas", + "default_value": "4", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or array-like of shape (n_alphas,), dtype=float", + "default_value": "4", + "description": "If an integer is given, it fixes the number of points on the\ngrids of alpha to be used. If a list is given, it gives the\ngrid to be used. See the notes in the class docstring for\nmore details. Range is (0, inf] when floats given." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_alphas,)" + }, + { + "kind": "NamedType", + "name": "dtype=float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV/__init__/n_refinements", + "name": "n_refinements", + "qname": "sklearn.covariance._graph_lasso.GraphicalLassoCV.__init__.n_refinements", + "default_value": "4", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "4", + "description": "The number of times the grid is refined. Not used if explicit\nvalues of alphas are passed. Range is [1, inf)." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV/__init__/cv", + "name": "cv", + "qname": "sklearn.covariance._graph_lasso.GraphicalLassoCV.__init__.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, cross-validation generator or iterable", + "default_value": "None", + "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs :class:`KFold` is used.\n\nRefer :ref:`User Guide ` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.20\n ``cv`` default value if None changed from 3-fold to 5-fold." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV/__init__/tol", + "name": "tol", + "qname": "sklearn.covariance._graph_lasso.GraphicalLassoCV.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "The tolerance to declare convergence: if the dual gap goes below\nthis value, iterations are stopped. Range is (0, inf]." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV/__init__/enet_tol", + "name": "enet_tol", + "qname": "sklearn.covariance._graph_lasso.GraphicalLassoCV.__init__.enet_tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "The tolerance for the elastic net solver used to calculate the descent\ndirection. This parameter controls the accuracy of the search direction\nfor a given column update, not of the overall parameter estimate. Only\nused for mode='cd'. Range is (0, inf]." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.covariance._graph_lasso.GraphicalLassoCV.__init__.max_iter", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Maximum number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV/__init__/mode", + "name": "mode", + "qname": "sklearn.covariance._graph_lasso.GraphicalLassoCV.__init__.mode", + "default_value": "'cd'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'cd', 'lars'}", + "default_value": "'cd'", + "description": "The Lasso solver to use: coordinate descent or LARS. Use LARS for\nvery sparse underlying graphs, where number of features is greater\nthan number of samples. Elsewhere prefer cd which is more numerically\nstable." + }, + "type": { + "kind": "EnumType", + "values": ["cd", "lars"] + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.covariance._graph_lasso.GraphicalLassoCV.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "number of jobs to run in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details.\n\n.. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV/__init__/verbose", + "name": "verbose", + "qname": "sklearn.covariance._graph_lasso.GraphicalLassoCV.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If verbose is True, the objective function and duality gap are\nprinted at each iteration." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV/__init__/assume_centered", + "name": "assume_centered", + "qname": "sklearn.covariance._graph_lasso.GraphicalLassoCV.__init__.assume_centered", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, data are not centered before computation.\nUseful when working with data whose mean is almost, but not exactly\nzero.\nIf False, data are centered before computation." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Sparse inverse covariance w/ cross-validated choice of the l1 penalty.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionchanged:: v0.20\n GraphLassoCV has been renamed to GraphicalLassoCV", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, alphas=4, n_refinements=4, cv=None, tol=1e-4,\n enet_tol=1e-4, max_iter=100, mode='cd', n_jobs=None,\n verbose=False, assume_centered=False):\n super().__init__(\n mode=mode, tol=tol, verbose=verbose, enet_tol=enet_tol,\n max_iter=max_iter, assume_centered=assume_centered)\n self.alphas = alphas\n self.n_refinements = n_refinements\n self.cv = cv\n self.n_jobs = n_jobs" + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV/cv_alphas_@getter", + "name": "cv_alphas_", + "qname": "sklearn.covariance._graph_lasso.GraphicalLassoCV.cv_alphas_", + "decorators": [ + "deprecated(\"The cv_alphas_ attribute is deprecated in version 0.24 in favor of cv_results_['alpha'] and will be removed in version 1.1 (renaming of 0.26).\")", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV/cv_alphas_/self", + "name": "self", + "qname": "sklearn.covariance._graph_lasso.GraphicalLassoCV.cv_alphas_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated( # type: ignore\n \"The cv_alphas_ attribute is deprecated in version 0.24 in favor \"\n \"of cv_results_['alpha'] and will be removed in version 1.1 \"\n \"(renaming of 0.26).\"\n )\n @property\n def cv_alphas_(self):\n return self.cv_results_['alphas'].tolist()" + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV/fit", + "name": "fit", + "qname": "sklearn.covariance._graph_lasso.GraphicalLassoCV.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV/fit/self", + "name": "self", + "qname": "sklearn.covariance._graph_lasso.GraphicalLassoCV.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV/fit/X", + "name": "X", + "qname": "sklearn.covariance._graph_lasso.GraphicalLassoCV.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Data from which to compute the covariance estimate" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV/fit/y", + "name": "y", + "qname": "sklearn.covariance._graph_lasso.GraphicalLassoCV.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fits the GraphicalLasso covariance model to X.", + "docstring": "Fits the GraphicalLasso covariance model to X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data from which to compute the covariance estimate\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y=None):\n \"\"\"Fits the GraphicalLasso covariance model to X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Data from which to compute the covariance estimate\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n self : object\n \"\"\"\n # Covariance does not make sense for a single feature\n X = self._validate_data(X, ensure_min_features=2, estimator=self)\n if self.assume_centered:\n self.location_ = np.zeros(X.shape[1])\n else:\n self.location_ = X.mean(0)\n emp_cov = empirical_covariance(\n X, assume_centered=self.assume_centered)\n\n cv = check_cv(self.cv, y, classifier=False)\n\n # List of (alpha, scores, covs)\n path = list()\n n_alphas = self.alphas\n inner_verbose = max(0, self.verbose - 1)\n\n if isinstance(n_alphas, Sequence):\n alphas = self.alphas\n n_refinements = 1\n else:\n n_refinements = self.n_refinements\n alpha_1 = alpha_max(emp_cov)\n alpha_0 = 1e-2 * alpha_1\n alphas = np.logspace(np.log10(alpha_0), np.log10(alpha_1),\n n_alphas)[::-1]\n\n t0 = time.time()\n for i in range(n_refinements):\n with warnings.catch_warnings():\n # No need to see the convergence warnings on this grid:\n # they will always be points that will not converge\n # during the cross-validation\n warnings.simplefilter('ignore', ConvergenceWarning)\n # Compute the cross-validated loss on the current grid\n\n # NOTE: Warm-restarting graphical_lasso_path has been tried,\n # and this did not allow to gain anything\n # (same execution time with or without).\n this_path = Parallel(\n n_jobs=self.n_jobs,\n verbose=self.verbose\n )(delayed(graphical_lasso_path)(X[train], alphas=alphas,\n X_test=X[test], mode=self.mode,\n tol=self.tol,\n enet_tol=self.enet_tol,\n max_iter=int(.1 *\n self.max_iter),\n verbose=inner_verbose)\n for train, test in cv.split(X, y))\n\n # Little danse to transform the list in what we need\n covs, _, scores = zip(*this_path)\n covs = zip(*covs)\n scores = zip(*scores)\n path.extend(zip(alphas, scores, covs))\n path = sorted(path, key=operator.itemgetter(0), reverse=True)\n\n # Find the maximum (avoid using built in 'max' function to\n # have a fully-reproducible selection of the smallest alpha\n # in case of equality)\n best_score = -np.inf\n last_finite_idx = 0\n for index, (alpha, scores, _) in enumerate(path):\n this_score = np.mean(scores)\n if this_score >= .1 / np.finfo(np.float64).eps:\n this_score = np.nan\n if np.isfinite(this_score):\n last_finite_idx = index\n if this_score >= best_score:\n best_score = this_score\n best_index = index\n\n # Refine the grid\n if best_index == 0:\n # We do not need to go back: we have chosen\n # the highest value of alpha for which there are\n # non-zero coefficients\n alpha_1 = path[0][0]\n alpha_0 = path[1][0]\n elif (best_index == last_finite_idx\n and not best_index == len(path) - 1):\n # We have non-converged models on the upper bound of the\n # grid, we need to refine the grid there\n alpha_1 = path[best_index][0]\n alpha_0 = path[best_index + 1][0]\n elif best_index == len(path) - 1:\n alpha_1 = path[best_index][0]\n alpha_0 = 0.01 * path[best_index][0]\n else:\n alpha_1 = path[best_index - 1][0]\n alpha_0 = path[best_index + 1][0]\n\n if not isinstance(n_alphas, Sequence):\n alphas = np.logspace(np.log10(alpha_1), np.log10(alpha_0),\n n_alphas + 2)\n alphas = alphas[1:-1]\n\n if self.verbose and n_refinements > 1:\n print('[GraphicalLassoCV] Done refinement % 2i out of'\n ' %i: % 3is' % (i + 1, n_refinements, time.time() - t0))\n\n path = list(zip(*path))\n grid_scores = list(path[1])\n alphas = list(path[0])\n # Finally, compute the score with alpha = 0\n alphas.append(0)\n grid_scores.append(cross_val_score(EmpiricalCovariance(), X,\n cv=cv, n_jobs=self.n_jobs,\n verbose=inner_verbose))\n grid_scores = np.array(grid_scores)\n self.cv_results_ = {'alphas': np.array(alphas)}\n for i in range(grid_scores.shape[1]):\n key = \"split{}_score\".format(i)\n self.cv_results_[key] = grid_scores[:, i]\n\n self.cv_results_[\"mean_score\"] = np.mean(grid_scores, axis=1)\n self.cv_results_[\"std_score\"] = np.std(grid_scores, axis=1)\n\n best_alpha = alphas[best_index]\n self.alpha_ = best_alpha\n\n # Finally fit the model with the selected alpha\n self.covariance_, self.precision_, self.n_iter_ = graphical_lasso(\n emp_cov, alpha=best_alpha, mode=self.mode, tol=self.tol,\n enet_tol=self.enet_tol, max_iter=self.max_iter,\n verbose=inner_verbose, return_n_iter=True)\n return self" + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV/grid_scores_@getter", + "name": "grid_scores_", + "qname": "sklearn.covariance._graph_lasso.GraphicalLassoCV.grid_scores_", + "decorators": [ + "deprecated('The grid_scores_ attribute is deprecated in version 0.24 in favor of cv_results_ and will be removed in version 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/GraphicalLassoCV/grid_scores_/self", + "name": "self", + "qname": "sklearn.covariance._graph_lasso.GraphicalLassoCV.grid_scores_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated( # type: ignore\n \"The grid_scores_ attribute is deprecated in version 0.24 in favor \"\n \"of cv_results_ and will be removed in version 1.1 (renaming of 0.26).\"\n )\n @property\n def grid_scores_(self):\n # remove 3 for mean_score, std_score, and alphas\n n_alphas = len(self.cv_results_) - 3\n return np.asarray(\n [self.cv_results_[\"split{}_score\".format(i)]\n for i in range(n_alphas)]).T" + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/_dual_gap", + "name": "_dual_gap", + "qname": "sklearn.covariance._graph_lasso._dual_gap", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/_dual_gap/emp_cov", + "name": "emp_cov", + "qname": "sklearn.covariance._graph_lasso._dual_gap.emp_cov", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/_dual_gap/precision_", + "name": "precision_", + "qname": "sklearn.covariance._graph_lasso._dual_gap.precision_", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/_dual_gap/alpha", + "name": "alpha", + "qname": "sklearn.covariance._graph_lasso._dual_gap.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Expression of the dual gap convergence criterion\n\nThe specific definition is given in Duchi \"Projected Subgradient Methods\nfor Learning Sparse Gaussians\".", + "docstring": "Expression of the dual gap convergence criterion\n\nThe specific definition is given in Duchi \"Projected Subgradient Methods\nfor Learning Sparse Gaussians\".", + "code": "def _dual_gap(emp_cov, precision_, alpha):\n \"\"\"Expression of the dual gap convergence criterion\n\n The specific definition is given in Duchi \"Projected Subgradient Methods\n for Learning Sparse Gaussians\".\n \"\"\"\n gap = np.sum(emp_cov * precision_)\n gap -= precision_.shape[0]\n gap += alpha * (np.abs(precision_).sum()\n - np.abs(np.diag(precision_)).sum())\n return gap" + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/_objective", + "name": "_objective", + "qname": "sklearn.covariance._graph_lasso._objective", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/_objective/mle", + "name": "mle", + "qname": "sklearn.covariance._graph_lasso._objective.mle", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/_objective/precision_", + "name": "precision_", + "qname": "sklearn.covariance._graph_lasso._objective.precision_", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/_objective/alpha", + "name": "alpha", + "qname": "sklearn.covariance._graph_lasso._objective.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Evaluation of the graphical-lasso objective function\n\nthe objective function is made of a shifted scaled version of the\nnormalized log-likelihood (i.e. its empirical mean over the samples) and a\npenalisation term to promote sparsity", + "docstring": "Evaluation of the graphical-lasso objective function\n\nthe objective function is made of a shifted scaled version of the\nnormalized log-likelihood (i.e. its empirical mean over the samples) and a\npenalisation term to promote sparsity", + "code": "def _objective(mle, precision_, alpha):\n \"\"\"Evaluation of the graphical-lasso objective function\n\n the objective function is made of a shifted scaled version of the\n normalized log-likelihood (i.e. its empirical mean over the samples) and a\n penalisation term to promote sparsity\n \"\"\"\n p = precision_.shape[0]\n cost = - 2. * log_likelihood(mle, precision_) + p * np.log(2 * np.pi)\n cost += alpha * (np.abs(precision_).sum()\n - np.abs(np.diag(precision_)).sum())\n return cost" + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/alpha_max", + "name": "alpha_max", + "qname": "sklearn.covariance._graph_lasso.alpha_max", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/alpha_max/emp_cov", + "name": "emp_cov", + "qname": "sklearn.covariance._graph_lasso.alpha_max.emp_cov", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features, n_features)", + "default_value": "", + "description": "The sample covariance matrix." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Find the maximum alpha for which there are some non-zeros off-diagonal.", + "docstring": "Find the maximum alpha for which there are some non-zeros off-diagonal.\n\nParameters\n----------\nemp_cov : ndarray of shape (n_features, n_features)\n The sample covariance matrix.\n\nNotes\n-----\nThis results from the bound for the all the Lasso that are solved\nin GraphicalLasso: each time, the row of cov corresponds to Xy. As the\nbound for alpha is given by `max(abs(Xy))`, the result follows.", + "code": "def alpha_max(emp_cov):\n \"\"\"Find the maximum alpha for which there are some non-zeros off-diagonal.\n\n Parameters\n ----------\n emp_cov : ndarray of shape (n_features, n_features)\n The sample covariance matrix.\n\n Notes\n -----\n This results from the bound for the all the Lasso that are solved\n in GraphicalLasso: each time, the row of cov corresponds to Xy. As the\n bound for alpha is given by `max(abs(Xy))`, the result follows.\n \"\"\"\n A = np.copy(emp_cov)\n A.flat[::A.shape[0] + 1] = 0\n return np.max(np.abs(A))" + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/graphical_lasso", + "name": "graphical_lasso", + "qname": "sklearn.covariance._graph_lasso.graphical_lasso", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/graphical_lasso/emp_cov", + "name": "emp_cov", + "qname": "sklearn.covariance._graph_lasso.graphical_lasso.emp_cov", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features, n_features)", + "default_value": "", + "description": "Empirical covariance from which to compute the covariance estimate." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/graphical_lasso/alpha", + "name": "alpha", + "qname": "sklearn.covariance._graph_lasso.graphical_lasso.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "The regularization parameter: the higher alpha, the more\nregularization, the sparser the inverse covariance.\nRange is (0, inf]." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/graphical_lasso/cov_init", + "name": "cov_init", + "qname": "sklearn.covariance._graph_lasso.graphical_lasso.cov_init", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array of shape (n_features, n_features)", + "default_value": "None", + "description": "The initial guess for the covariance. If None, then the empirical\ncovariance is used." + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_features, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/graphical_lasso/mode", + "name": "mode", + "qname": "sklearn.covariance._graph_lasso.graphical_lasso.mode", + "default_value": "'cd'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'cd', 'lars'}", + "default_value": "'cd'", + "description": "The Lasso solver to use: coordinate descent or LARS. Use LARS for\nvery sparse underlying graphs, where p > n. Elsewhere prefer cd\nwhich is more numerically stable." + }, + "type": { + "kind": "EnumType", + "values": ["cd", "lars"] + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/graphical_lasso/tol", + "name": "tol", + "qname": "sklearn.covariance._graph_lasso.graphical_lasso.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "The tolerance to declare convergence: if the dual gap goes below\nthis value, iterations are stopped. Range is (0, inf]." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/graphical_lasso/enet_tol", + "name": "enet_tol", + "qname": "sklearn.covariance._graph_lasso.graphical_lasso.enet_tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "The tolerance for the elastic net solver used to calculate the descent\ndirection. This parameter controls the accuracy of the search direction\nfor a given column update, not of the overall parameter estimate. Only\nused for mode='cd'. Range is (0, inf]." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/graphical_lasso/max_iter", + "name": "max_iter", + "qname": "sklearn.covariance._graph_lasso.graphical_lasso.max_iter", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The maximum number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/graphical_lasso/verbose", + "name": "verbose", + "qname": "sklearn.covariance._graph_lasso.graphical_lasso.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If verbose is True, the objective function and dual gap are\nprinted at each iteration." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/graphical_lasso/return_costs", + "name": "return_costs", + "qname": "sklearn.covariance._graph_lasso.graphical_lasso.return_costs", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "Flase", + "description": "If return_costs is True, the objective function and dual gap\nat each iteration are returned." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/graphical_lasso/eps", + "name": "eps", + "qname": "sklearn.covariance._graph_lasso.graphical_lasso.eps", + "default_value": "np.finfo(np.float64).eps", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "eps", + "description": "The machine-precision regularization in the computation of the\nCholesky diagonal factors. Increase this for very ill-conditioned\nsystems. Default is `np.finfo(np.float64).eps`." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/graphical_lasso/return_n_iter", + "name": "return_n_iter", + "qname": "sklearn.covariance._graph_lasso.graphical_lasso.return_n_iter", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether or not to return the number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "l1-penalized covariance estimator\n\nRead more in the :ref:`User Guide `.\n\n.. versionchanged:: v0.20\n graph_lasso has been renamed to graphical_lasso", + "docstring": "l1-penalized covariance estimator\n\nRead more in the :ref:`User Guide `.\n\n.. versionchanged:: v0.20\n graph_lasso has been renamed to graphical_lasso\n\nParameters\n----------\nemp_cov : ndarray of shape (n_features, n_features)\n Empirical covariance from which to compute the covariance estimate.\n\nalpha : float\n The regularization parameter: the higher alpha, the more\n regularization, the sparser the inverse covariance.\n Range is (0, inf].\n\ncov_init : array of shape (n_features, n_features), default=None\n The initial guess for the covariance. If None, then the empirical\n covariance is used.\n\nmode : {'cd', 'lars'}, default='cd'\n The Lasso solver to use: coordinate descent or LARS. Use LARS for\n very sparse underlying graphs, where p > n. Elsewhere prefer cd\n which is more numerically stable.\n\ntol : float, default=1e-4\n The tolerance to declare convergence: if the dual gap goes below\n this value, iterations are stopped. Range is (0, inf].\n\nenet_tol : float, default=1e-4\n The tolerance for the elastic net solver used to calculate the descent\n direction. This parameter controls the accuracy of the search direction\n for a given column update, not of the overall parameter estimate. Only\n used for mode='cd'. Range is (0, inf].\n\nmax_iter : int, default=100\n The maximum number of iterations.\n\nverbose : bool, default=False\n If verbose is True, the objective function and dual gap are\n printed at each iteration.\n\nreturn_costs : bool, default=Flase\n If return_costs is True, the objective function and dual gap\n at each iteration are returned.\n\neps : float, default=eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Default is `np.finfo(np.float64).eps`.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\nReturns\n-------\ncovariance : ndarray of shape (n_features, n_features)\n The estimated covariance matrix.\n\nprecision : ndarray of shape (n_features, n_features)\n The estimated (sparse) precision matrix.\n\ncosts : list of (objective, dual_gap) pairs\n The list of values of the objective function and the dual gap at\n each iteration. Returned only if return_costs is True.\n\nn_iter : int\n Number of iterations. Returned only if `return_n_iter` is set to True.\n\nSee Also\n--------\nGraphicalLasso, GraphicalLassoCV\n\nNotes\n-----\nThe algorithm employed to solve this problem is the GLasso algorithm,\nfrom the Friedman 2008 Biostatistics paper. It is the same algorithm\nas in the R `glasso` package.\n\nOne possible difference with the `glasso` R package is that the\ndiagonal coefficients are not penalized.", + "code": "@_deprecate_positional_args\ndef graphical_lasso(emp_cov, alpha, *, cov_init=None, mode='cd', tol=1e-4,\n enet_tol=1e-4, max_iter=100, verbose=False,\n return_costs=False, eps=np.finfo(np.float64).eps,\n return_n_iter=False):\n \"\"\"l1-penalized covariance estimator\n\n Read more in the :ref:`User Guide `.\n\n .. versionchanged:: v0.20\n graph_lasso has been renamed to graphical_lasso\n\n Parameters\n ----------\n emp_cov : ndarray of shape (n_features, n_features)\n Empirical covariance from which to compute the covariance estimate.\n\n alpha : float\n The regularization parameter: the higher alpha, the more\n regularization, the sparser the inverse covariance.\n Range is (0, inf].\n\n cov_init : array of shape (n_features, n_features), default=None\n The initial guess for the covariance. If None, then the empirical\n covariance is used.\n\n mode : {'cd', 'lars'}, default='cd'\n The Lasso solver to use: coordinate descent or LARS. Use LARS for\n very sparse underlying graphs, where p > n. Elsewhere prefer cd\n which is more numerically stable.\n\n tol : float, default=1e-4\n The tolerance to declare convergence: if the dual gap goes below\n this value, iterations are stopped. Range is (0, inf].\n\n enet_tol : float, default=1e-4\n The tolerance for the elastic net solver used to calculate the descent\n direction. This parameter controls the accuracy of the search direction\n for a given column update, not of the overall parameter estimate. Only\n used for mode='cd'. Range is (0, inf].\n\n max_iter : int, default=100\n The maximum number of iterations.\n\n verbose : bool, default=False\n If verbose is True, the objective function and dual gap are\n printed at each iteration.\n\n return_costs : bool, default=Flase\n If return_costs is True, the objective function and dual gap\n at each iteration are returned.\n\n eps : float, default=eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Default is `np.finfo(np.float64).eps`.\n\n return_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\n Returns\n -------\n covariance : ndarray of shape (n_features, n_features)\n The estimated covariance matrix.\n\n precision : ndarray of shape (n_features, n_features)\n The estimated (sparse) precision matrix.\n\n costs : list of (objective, dual_gap) pairs\n The list of values of the objective function and the dual gap at\n each iteration. Returned only if return_costs is True.\n\n n_iter : int\n Number of iterations. Returned only if `return_n_iter` is set to True.\n\n See Also\n --------\n GraphicalLasso, GraphicalLassoCV\n\n Notes\n -----\n The algorithm employed to solve this problem is the GLasso algorithm,\n from the Friedman 2008 Biostatistics paper. It is the same algorithm\n as in the R `glasso` package.\n\n One possible difference with the `glasso` R package is that the\n diagonal coefficients are not penalized.\n \"\"\"\n _, n_features = emp_cov.shape\n if alpha == 0:\n if return_costs:\n precision_ = linalg.inv(emp_cov)\n cost = - 2. * log_likelihood(emp_cov, precision_)\n cost += n_features * np.log(2 * np.pi)\n d_gap = np.sum(emp_cov * precision_) - n_features\n if return_n_iter:\n return emp_cov, precision_, (cost, d_gap), 0\n else:\n return emp_cov, precision_, (cost, d_gap)\n else:\n if return_n_iter:\n return emp_cov, linalg.inv(emp_cov), 0\n else:\n return emp_cov, linalg.inv(emp_cov)\n if cov_init is None:\n covariance_ = emp_cov.copy()\n else:\n covariance_ = cov_init.copy()\n # As a trivial regularization (Tikhonov like), we scale down the\n # off-diagonal coefficients of our starting point: This is needed, as\n # in the cross-validation the cov_init can easily be\n # ill-conditioned, and the CV loop blows. Beside, this takes\n # conservative stand-point on the initial conditions, and it tends to\n # make the convergence go faster.\n covariance_ *= 0.95\n diagonal = emp_cov.flat[::n_features + 1]\n covariance_.flat[::n_features + 1] = diagonal\n precision_ = linalg.pinvh(covariance_)\n\n indices = np.arange(n_features)\n costs = list()\n # The different l1 regression solver have different numerical errors\n if mode == 'cd':\n errors = dict(over='raise', invalid='ignore')\n else:\n errors = dict(invalid='raise')\n try:\n # be robust to the max_iter=0 edge case, see:\n # https://github.com/scikit-learn/scikit-learn/issues/4134\n d_gap = np.inf\n # set a sub_covariance buffer\n sub_covariance = np.copy(covariance_[1:, 1:], order='C')\n for i in range(max_iter):\n for idx in range(n_features):\n # To keep the contiguous matrix `sub_covariance` equal to\n # covariance_[indices != idx].T[indices != idx]\n # we only need to update 1 column and 1 line when idx changes\n if idx > 0:\n di = idx - 1\n sub_covariance[di] = covariance_[di][indices != idx]\n sub_covariance[:, di] = covariance_[:, di][indices != idx]\n else:\n sub_covariance[:] = covariance_[1:, 1:]\n row = emp_cov[idx, indices != idx]\n with np.errstate(**errors):\n if mode == 'cd':\n # Use coordinate descent\n coefs = -(precision_[indices != idx, idx]\n / (precision_[idx, idx] + 1000 * eps))\n coefs, _, _, _ = cd_fast.enet_coordinate_descent_gram(\n coefs, alpha, 0, sub_covariance,\n row, row, max_iter, enet_tol,\n check_random_state(None), False)\n else:\n # Use LARS\n _, _, coefs = lars_path_gram(\n Xy=row, Gram=sub_covariance, n_samples=row.size,\n alpha_min=alpha / (n_features - 1), copy_Gram=True,\n eps=eps, method='lars', return_path=False)\n # Update the precision matrix\n precision_[idx, idx] = (\n 1. / (covariance_[idx, idx]\n - np.dot(covariance_[indices != idx, idx], coefs)))\n precision_[indices != idx, idx] = (- precision_[idx, idx]\n * coefs)\n precision_[idx, indices != idx] = (- precision_[idx, idx]\n * coefs)\n coefs = np.dot(sub_covariance, coefs)\n covariance_[idx, indices != idx] = coefs\n covariance_[indices != idx, idx] = coefs\n if not np.isfinite(precision_.sum()):\n raise FloatingPointError('The system is too ill-conditioned '\n 'for this solver')\n d_gap = _dual_gap(emp_cov, precision_, alpha)\n cost = _objective(emp_cov, precision_, alpha)\n if verbose:\n print('[graphical_lasso] Iteration '\n '% 3i, cost % 3.2e, dual gap %.3e'\n % (i, cost, d_gap))\n if return_costs:\n costs.append((cost, d_gap))\n if np.abs(d_gap) < tol:\n break\n if not np.isfinite(cost) and i > 0:\n raise FloatingPointError('Non SPD result: the system is '\n 'too ill-conditioned for this solver')\n else:\n warnings.warn('graphical_lasso: did not converge after '\n '%i iteration: dual gap: %.3e'\n % (max_iter, d_gap), ConvergenceWarning)\n except FloatingPointError as e:\n e.args = (e.args[0]\n + '. The system is too ill-conditioned for this solver',)\n raise e\n\n if return_costs:\n if return_n_iter:\n return covariance_, precision_, costs, i + 1\n else:\n return covariance_, precision_, costs\n else:\n if return_n_iter:\n return covariance_, precision_, i + 1\n else:\n return covariance_, precision_" + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/graphical_lasso_path", + "name": "graphical_lasso_path", + "qname": "sklearn.covariance._graph_lasso.graphical_lasso_path", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/graphical_lasso_path/X", + "name": "X", + "qname": "sklearn.covariance._graph_lasso.graphical_lasso_path.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Data from which to compute the covariance estimate." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/graphical_lasso_path/alphas", + "name": "alphas", + "qname": "sklearn.covariance._graph_lasso.graphical_lasso_path.alphas", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_alphas,)", + "default_value": "", + "description": "The list of regularization parameters, decreasing order." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_alphas,)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/graphical_lasso_path/cov_init", + "name": "cov_init", + "qname": "sklearn.covariance._graph_lasso.graphical_lasso_path.cov_init", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_features, n_features)", + "default_value": "None", + "description": "The initial guess for the covariance." + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_features, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/graphical_lasso_path/X_test", + "name": "X_test", + "qname": "sklearn.covariance._graph_lasso.graphical_lasso_path.X_test", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_test_samples, n_features)", + "default_value": "None", + "description": "Optional test matrix to measure generalisation error." + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_test_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/graphical_lasso_path/mode", + "name": "mode", + "qname": "sklearn.covariance._graph_lasso.graphical_lasso_path.mode", + "default_value": "'cd'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'cd', 'lars'}", + "default_value": "'cd'", + "description": "The Lasso solver to use: coordinate descent or LARS. Use LARS for\nvery sparse underlying graphs, where p > n. Elsewhere prefer cd\nwhich is more numerically stable." + }, + "type": { + "kind": "EnumType", + "values": ["cd", "lars"] + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/graphical_lasso_path/tol", + "name": "tol", + "qname": "sklearn.covariance._graph_lasso.graphical_lasso_path.tol", + "default_value": "0.0001", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "The tolerance to declare convergence: if the dual gap goes below\nthis value, iterations are stopped. The tolerance must be a positive\nnumber." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/graphical_lasso_path/enet_tol", + "name": "enet_tol", + "qname": "sklearn.covariance._graph_lasso.graphical_lasso_path.enet_tol", + "default_value": "0.0001", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "The tolerance for the elastic net solver used to calculate the descent\ndirection. This parameter controls the accuracy of the search direction\nfor a given column update, not of the overall parameter estimate. Only\nused for mode='cd'. The tolerance must be a positive number." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/graphical_lasso_path/max_iter", + "name": "max_iter", + "qname": "sklearn.covariance._graph_lasso.graphical_lasso_path.max_iter", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The maximum number of iterations. This parameter should be a strictly\npositive integer." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.covariance._graph_lasso/graphical_lasso_path/verbose", + "name": "verbose", + "qname": "sklearn.covariance._graph_lasso.graphical_lasso_path.verbose", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or bool", + "default_value": "False", + "description": "The higher the verbosity flag, the more information is printed\nduring the fitting." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "bool" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "l1-penalized covariance estimator along a path of decreasing alphas\n\nRead more in the :ref:`User Guide `.", + "docstring": "l1-penalized covariance estimator along a path of decreasing alphas\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Data from which to compute the covariance estimate.\n\nalphas : array-like of shape (n_alphas,)\n The list of regularization parameters, decreasing order.\n\ncov_init : array of shape (n_features, n_features), default=None\n The initial guess for the covariance.\n\nX_test : array of shape (n_test_samples, n_features), default=None\n Optional test matrix to measure generalisation error.\n\nmode : {'cd', 'lars'}, default='cd'\n The Lasso solver to use: coordinate descent or LARS. Use LARS for\n very sparse underlying graphs, where p > n. Elsewhere prefer cd\n which is more numerically stable.\n\ntol : float, default=1e-4\n The tolerance to declare convergence: if the dual gap goes below\n this value, iterations are stopped. The tolerance must be a positive\n number.\n\nenet_tol : float, default=1e-4\n The tolerance for the elastic net solver used to calculate the descent\n direction. This parameter controls the accuracy of the search direction\n for a given column update, not of the overall parameter estimate. Only\n used for mode='cd'. The tolerance must be a positive number.\n\nmax_iter : int, default=100\n The maximum number of iterations. This parameter should be a strictly\n positive integer.\n\nverbose : int or bool, default=False\n The higher the verbosity flag, the more information is printed\n during the fitting.\n\nReturns\n-------\ncovariances_ : list of shape (n_alphas,) of ndarray of shape (n_features, n_features)\n The estimated covariance matrices.\n\nprecisions_ : list of shape (n_alphas,) of ndarray of shape (n_features, n_features)\n The estimated (sparse) precision matrices.\n\nscores_ : list of shape (n_alphas,), dtype=float\n The generalisation error (log-likelihood) on the test data.\n Returned only if test data is passed.", + "code": "def graphical_lasso_path(X, alphas, cov_init=None, X_test=None, mode='cd',\n tol=1e-4, enet_tol=1e-4, max_iter=100, verbose=False):\n \"\"\"l1-penalized covariance estimator along a path of decreasing alphas\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Data from which to compute the covariance estimate.\n\n alphas : array-like of shape (n_alphas,)\n The list of regularization parameters, decreasing order.\n\n cov_init : array of shape (n_features, n_features), default=None\n The initial guess for the covariance.\n\n X_test : array of shape (n_test_samples, n_features), default=None\n Optional test matrix to measure generalisation error.\n\n mode : {'cd', 'lars'}, default='cd'\n The Lasso solver to use: coordinate descent or LARS. Use LARS for\n very sparse underlying graphs, where p > n. Elsewhere prefer cd\n which is more numerically stable.\n\n tol : float, default=1e-4\n The tolerance to declare convergence: if the dual gap goes below\n this value, iterations are stopped. The tolerance must be a positive\n number.\n\n enet_tol : float, default=1e-4\n The tolerance for the elastic net solver used to calculate the descent\n direction. This parameter controls the accuracy of the search direction\n for a given column update, not of the overall parameter estimate. Only\n used for mode='cd'. The tolerance must be a positive number.\n\n max_iter : int, default=100\n The maximum number of iterations. This parameter should be a strictly\n positive integer.\n\n verbose : int or bool, default=False\n The higher the verbosity flag, the more information is printed\n during the fitting.\n\n Returns\n -------\n covariances_ : list of shape (n_alphas,) of ndarray of shape \\\n (n_features, n_features)\n The estimated covariance matrices.\n\n precisions_ : list of shape (n_alphas,) of ndarray of shape \\\n (n_features, n_features)\n The estimated (sparse) precision matrices.\n\n scores_ : list of shape (n_alphas,), dtype=float\n The generalisation error (log-likelihood) on the test data.\n Returned only if test data is passed.\n \"\"\"\n inner_verbose = max(0, verbose - 1)\n emp_cov = empirical_covariance(X)\n if cov_init is None:\n covariance_ = emp_cov.copy()\n else:\n covariance_ = cov_init\n covariances_ = list()\n precisions_ = list()\n scores_ = list()\n if X_test is not None:\n test_emp_cov = empirical_covariance(X_test)\n\n for alpha in alphas:\n try:\n # Capture the errors, and move on\n covariance_, precision_ = graphical_lasso(\n emp_cov, alpha=alpha, cov_init=covariance_, mode=mode, tol=tol,\n enet_tol=enet_tol, max_iter=max_iter, verbose=inner_verbose)\n covariances_.append(covariance_)\n precisions_.append(precision_)\n if X_test is not None:\n this_score = log_likelihood(test_emp_cov, precision_)\n except FloatingPointError:\n this_score = -np.inf\n covariances_.append(np.nan)\n precisions_.append(np.nan)\n if X_test is not None:\n if not np.isfinite(this_score):\n this_score = -np.inf\n scores_.append(this_score)\n if verbose == 1:\n sys.stderr.write('.')\n elif verbose > 1:\n if X_test is not None:\n print('[graphical_lasso_path] alpha: %.2e, score: %.2e'\n % (alpha, this_score))\n else:\n print('[graphical_lasso_path] alpha: %.2e' % alpha)\n if X_test is not None:\n return covariances_, precisions_, scores_\n return covariances_, precisions_" + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/MinCovDet/__init__", + "name": "__init__", + "qname": "sklearn.covariance._robust_covariance.MinCovDet.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/MinCovDet/__init__/self", + "name": "self", + "qname": "sklearn.covariance._robust_covariance.MinCovDet.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/MinCovDet/__init__/store_precision", + "name": "store_precision", + "qname": "sklearn.covariance._robust_covariance.MinCovDet.__init__.store_precision", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Specify if the estimated precision is stored." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/MinCovDet/__init__/assume_centered", + "name": "assume_centered", + "qname": "sklearn.covariance._robust_covariance.MinCovDet.__init__.assume_centered", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the support of the robust location and the covariance\nestimates is computed, and a covariance estimate is recomputed from\nit, without centering the data.\nUseful to work with data whose mean is significantly equal to\nzero but is not exactly zero.\nIf False, the robust location and covariance are directly computed\nwith the FastMCD algorithm without additional treatment." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/MinCovDet/__init__/support_fraction", + "name": "support_fraction", + "qname": "sklearn.covariance._robust_covariance.MinCovDet.__init__.support_fraction", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "The proportion of points to be included in the support of the raw\nMCD estimate. Default is None, which implies that the minimum\nvalue of support_fraction will be used within the algorithm:\n`(n_sample + n_features + 1) / 2`. The parameter must be in the range\n(0, 1)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/MinCovDet/__init__/random_state", + "name": "random_state", + "qname": "sklearn.covariance._robust_covariance.MinCovDet.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines the pseudo random number generator for shuffling the data.\nPass an int for reproducible results across multiple function calls.\nSee :term: `Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Minimum Covariance Determinant (MCD): robust estimator of covariance.\n\nThe Minimum Covariance Determinant covariance estimator is to be applied\non Gaussian-distributed data, but could still be relevant on data\ndrawn from a unimodal, symmetric distribution. It is not meant to be used\nwith multi-modal data (the algorithm used to fit a MinCovDet object is\nlikely to fail in such a case).\nOne should consider projection pursuit methods to deal with multi-modal\ndatasets.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, store_precision=True, assume_centered=False,\n support_fraction=None, random_state=None):\n self.store_precision = store_precision\n self.assume_centered = assume_centered\n self.support_fraction = support_fraction\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/MinCovDet/correct_covariance", + "name": "correct_covariance", + "qname": "sklearn.covariance._robust_covariance.MinCovDet.correct_covariance", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/MinCovDet/correct_covariance/self", + "name": "self", + "qname": "sklearn.covariance._robust_covariance.MinCovDet.correct_covariance.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/MinCovDet/correct_covariance/data", + "name": "data", + "qname": "sklearn.covariance._robust_covariance.MinCovDet.correct_covariance.data", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data matrix, with p features and n samples.\nThe data set must be the one which was used to compute\nthe raw estimates." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Apply a correction to raw Minimum Covariance Determinant estimates.\n\nCorrection using the empirical correction factor suggested\nby Rousseeuw and Van Driessen in [RVD]_.", + "docstring": "Apply a correction to raw Minimum Covariance Determinant estimates.\n\nCorrection using the empirical correction factor suggested\nby Rousseeuw and Van Driessen in [RVD]_.\n\nParameters\n----------\ndata : array-like of shape (n_samples, n_features)\n The data matrix, with p features and n samples.\n The data set must be the one which was used to compute\n the raw estimates.\n\nReturns\n-------\ncovariance_corrected : ndarray of shape (n_features, n_features)\n Corrected robust covariance estimate.\n\nReferences\n----------\n\n.. [RVD] A Fast Algorithm for the Minimum Covariance\n Determinant Estimator, 1999, American Statistical Association\n and the American Society for Quality, TECHNOMETRICS", + "code": " def correct_covariance(self, data):\n \"\"\"Apply a correction to raw Minimum Covariance Determinant estimates.\n\n Correction using the empirical correction factor suggested\n by Rousseeuw and Van Driessen in [RVD]_.\n\n Parameters\n ----------\n data : array-like of shape (n_samples, n_features)\n The data matrix, with p features and n samples.\n The data set must be the one which was used to compute\n the raw estimates.\n\n Returns\n -------\n covariance_corrected : ndarray of shape (n_features, n_features)\n Corrected robust covariance estimate.\n\n References\n ----------\n\n .. [RVD] A Fast Algorithm for the Minimum Covariance\n Determinant Estimator, 1999, American Statistical Association\n and the American Society for Quality, TECHNOMETRICS\n \"\"\"\n\n # Check that the covariance of the support data is not equal to 0.\n # Otherwise self.dist_ = 0 and thus correction = 0.\n n_samples = len(self.dist_)\n n_support = np.sum(self.support_)\n if n_support < n_samples and np.allclose(self.raw_covariance_, 0):\n raise ValueError('The covariance matrix of the support data '\n 'is equal to 0, try to increase support_fraction')\n correction = np.median(self.dist_) / chi2(data.shape[1]).isf(0.5)\n covariance_corrected = self.raw_covariance_ * correction\n self.dist_ /= correction\n return covariance_corrected" + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/MinCovDet/fit", + "name": "fit", + "qname": "sklearn.covariance._robust_covariance.MinCovDet.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/MinCovDet/fit/self", + "name": "self", + "qname": "sklearn.covariance._robust_covariance.MinCovDet.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/MinCovDet/fit/X", + "name": "X", + "qname": "sklearn.covariance._robust_covariance.MinCovDet.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where `n_samples` is the number of samples\nand `n_features` is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/MinCovDet/fit/y", + "name": "y", + "qname": "sklearn.covariance._robust_covariance.MinCovDet.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fits a Minimum Covariance Determinant with the FastMCD algorithm.", + "docstring": "Fits a Minimum Covariance Determinant with the FastMCD algorithm.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where `n_samples` is the number of samples\n and `n_features` is the number of features.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y=None):\n \"\"\"Fits a Minimum Covariance Determinant with the FastMCD algorithm.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where `n_samples` is the number of samples\n and `n_features` is the number of features.\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n self : object\n \"\"\"\n X = self._validate_data(X, ensure_min_samples=2, estimator='MinCovDet')\n random_state = check_random_state(self.random_state)\n n_samples, n_features = X.shape\n # check that the empirical covariance is full rank\n if (linalg.svdvals(np.dot(X.T, X)) > 1e-8).sum() != n_features:\n warnings.warn(\"The covariance matrix associated to your dataset \"\n \"is not full rank\")\n # compute and store raw estimates\n raw_location, raw_covariance, raw_support, raw_dist = fast_mcd(\n X, support_fraction=self.support_fraction,\n cov_computation_method=self._nonrobust_covariance,\n random_state=random_state)\n if self.assume_centered:\n raw_location = np.zeros(n_features)\n raw_covariance = self._nonrobust_covariance(X[raw_support],\n assume_centered=True)\n # get precision matrix in an optimized way\n precision = linalg.pinvh(raw_covariance)\n raw_dist = np.sum(np.dot(X, precision) * X, 1)\n self.raw_location_ = raw_location\n self.raw_covariance_ = raw_covariance\n self.raw_support_ = raw_support\n self.location_ = raw_location\n self.support_ = raw_support\n self.dist_ = raw_dist\n # obtain consistency at normal models\n self.correct_covariance(X)\n # re-weight estimator\n self.reweight_covariance(X)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/MinCovDet/reweight_covariance", + "name": "reweight_covariance", + "qname": "sklearn.covariance._robust_covariance.MinCovDet.reweight_covariance", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/MinCovDet/reweight_covariance/self", + "name": "self", + "qname": "sklearn.covariance._robust_covariance.MinCovDet.reweight_covariance.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/MinCovDet/reweight_covariance/data", + "name": "data", + "qname": "sklearn.covariance._robust_covariance.MinCovDet.reweight_covariance.data", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data matrix, with p features and n samples.\nThe data set must be the one which was used to compute\nthe raw estimates." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Re-weight raw Minimum Covariance Determinant estimates.\n\nRe-weight observations using Rousseeuw's method (equivalent to\ndeleting outlying observations from the data set before\ncomputing location and covariance estimates) described\nin [RVDriessen]_.", + "docstring": "Re-weight raw Minimum Covariance Determinant estimates.\n\nRe-weight observations using Rousseeuw's method (equivalent to\ndeleting outlying observations from the data set before\ncomputing location and covariance estimates) described\nin [RVDriessen]_.\n\nParameters\n----------\ndata : array-like of shape (n_samples, n_features)\n The data matrix, with p features and n samples.\n The data set must be the one which was used to compute\n the raw estimates.\n\nReturns\n-------\nlocation_reweighted : ndarray of shape (n_features,)\n Re-weighted robust location estimate.\n\ncovariance_reweighted : ndarray of shape (n_features, n_features)\n Re-weighted robust covariance estimate.\n\nsupport_reweighted : ndarray of shape (n_samples,), dtype=bool\n A mask of the observations that have been used to compute\n the re-weighted robust location and covariance estimates.\n\nReferences\n----------\n\n.. [RVDriessen] A Fast Algorithm for the Minimum Covariance\n Determinant Estimator, 1999, American Statistical Association\n and the American Society for Quality, TECHNOMETRICS", + "code": " def reweight_covariance(self, data):\n \"\"\"Re-weight raw Minimum Covariance Determinant estimates.\n\n Re-weight observations using Rousseeuw's method (equivalent to\n deleting outlying observations from the data set before\n computing location and covariance estimates) described\n in [RVDriessen]_.\n\n Parameters\n ----------\n data : array-like of shape (n_samples, n_features)\n The data matrix, with p features and n samples.\n The data set must be the one which was used to compute\n the raw estimates.\n\n Returns\n -------\n location_reweighted : ndarray of shape (n_features,)\n Re-weighted robust location estimate.\n\n covariance_reweighted : ndarray of shape (n_features, n_features)\n Re-weighted robust covariance estimate.\n\n support_reweighted : ndarray of shape (n_samples,), dtype=bool\n A mask of the observations that have been used to compute\n the re-weighted robust location and covariance estimates.\n\n References\n ----------\n\n .. [RVDriessen] A Fast Algorithm for the Minimum Covariance\n Determinant Estimator, 1999, American Statistical Association\n and the American Society for Quality, TECHNOMETRICS\n \"\"\"\n n_samples, n_features = data.shape\n mask = self.dist_ < chi2(n_features).isf(0.025)\n if self.assume_centered:\n location_reweighted = np.zeros(n_features)\n else:\n location_reweighted = data[mask].mean(0)\n covariance_reweighted = self._nonrobust_covariance(\n data[mask], assume_centered=self.assume_centered)\n support_reweighted = np.zeros(n_samples, dtype=bool)\n support_reweighted[mask] = True\n self._set_covariance(covariance_reweighted)\n self.location_ = location_reweighted\n self.support_ = support_reweighted\n X_centered = data - self.location_\n self.dist_ = np.sum(\n np.dot(X_centered, self.get_precision()) * X_centered, 1)\n return location_reweighted, covariance_reweighted, support_reweighted" + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/_c_step", + "name": "_c_step", + "qname": "sklearn.covariance._robust_covariance._c_step", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/_c_step/X", + "name": "X", + "qname": "sklearn.covariance._robust_covariance._c_step.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/_c_step/n_support", + "name": "n_support", + "qname": "sklearn.covariance._robust_covariance._c_step.n_support", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/_c_step/random_state", + "name": "random_state", + "qname": "sklearn.covariance._robust_covariance._c_step.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/_c_step/remaining_iterations", + "name": "remaining_iterations", + "qname": "sklearn.covariance._robust_covariance._c_step.remaining_iterations", + "default_value": "30", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/_c_step/initial_estimates", + "name": "initial_estimates", + "qname": "sklearn.covariance._robust_covariance._c_step.initial_estimates", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/_c_step/verbose", + "name": "verbose", + "qname": "sklearn.covariance._robust_covariance._c_step.verbose", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/_c_step/cov_computation_method", + "name": "cov_computation_method", + "qname": "sklearn.covariance._robust_covariance._c_step.cov_computation_method", + "default_value": "empirical_covariance", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _c_step(X, n_support, random_state, remaining_iterations=30,\n initial_estimates=None, verbose=False,\n cov_computation_method=empirical_covariance):\n n_samples, n_features = X.shape\n dist = np.inf\n\n # Initialisation\n support = np.zeros(n_samples, dtype=bool)\n if initial_estimates is None:\n # compute initial robust estimates from a random subset\n support[random_state.permutation(n_samples)[:n_support]] = True\n else:\n # get initial robust estimates from the function parameters\n location = initial_estimates[0]\n covariance = initial_estimates[1]\n # run a special iteration for that case (to get an initial support)\n precision = linalg.pinvh(covariance)\n X_centered = X - location\n dist = (np.dot(X_centered, precision) * X_centered).sum(1)\n # compute new estimates\n support[np.argsort(dist)[:n_support]] = True\n\n X_support = X[support]\n location = X_support.mean(0)\n covariance = cov_computation_method(X_support)\n\n # Iterative procedure for Minimum Covariance Determinant computation\n det = fast_logdet(covariance)\n # If the data already has singular covariance, calculate the precision,\n # as the loop below will not be entered.\n if np.isinf(det):\n precision = linalg.pinvh(covariance)\n\n previous_det = np.inf\n while (det < previous_det and remaining_iterations > 0\n and not np.isinf(det)):\n # save old estimates values\n previous_location = location\n previous_covariance = covariance\n previous_det = det\n previous_support = support\n # compute a new support from the full data set mahalanobis distances\n precision = linalg.pinvh(covariance)\n X_centered = X - location\n dist = (np.dot(X_centered, precision) * X_centered).sum(axis=1)\n # compute new estimates\n support = np.zeros(n_samples, dtype=bool)\n support[np.argsort(dist)[:n_support]] = True\n X_support = X[support]\n location = X_support.mean(axis=0)\n covariance = cov_computation_method(X_support)\n det = fast_logdet(covariance)\n # update remaining iterations for early stopping\n remaining_iterations -= 1\n\n previous_dist = dist\n dist = (np.dot(X - location, precision) * (X - location)).sum(axis=1)\n # Check if best fit already found (det => 0, logdet => -inf)\n if np.isinf(det):\n results = location, covariance, det, support, dist\n # Check convergence\n if np.allclose(det, previous_det):\n # c_step procedure converged\n if verbose:\n print(\"Optimal couple (location, covariance) found before\"\n \" ending iterations (%d left)\" % (remaining_iterations))\n results = location, covariance, det, support, dist\n elif det > previous_det:\n # determinant has increased (should not happen)\n warnings.warn(\"Determinant has increased; this should not happen: \"\n \"log(det) > log(previous_det) (%.15f > %.15f). \"\n \"You may want to try with a higher value of \"\n \"support_fraction (current value: %.3f).\"\n % (det, previous_det, n_support / n_samples),\n RuntimeWarning)\n results = previous_location, previous_covariance, \\\n previous_det, previous_support, previous_dist\n\n # Check early stopping\n if remaining_iterations == 0:\n if verbose:\n print('Maximum number of iterations reached')\n results = location, covariance, det, support, dist\n\n return results" + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/c_step", + "name": "c_step", + "qname": "sklearn.covariance._robust_covariance.c_step", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/c_step/X", + "name": "X", + "qname": "sklearn.covariance._robust_covariance.c_step.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Data set in which we look for the n_support observations whose\nscatter matrix has minimum determinant." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/c_step/n_support", + "name": "n_support", + "qname": "sklearn.covariance._robust_covariance.c_step.n_support", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of observations to compute the robust estimates of location\nand covariance from. This parameter must be greater than\n`n_samples / 2`." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/c_step/remaining_iterations", + "name": "remaining_iterations", + "qname": "sklearn.covariance._robust_covariance.c_step.remaining_iterations", + "default_value": "30", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "30", + "description": "Number of iterations to perform.\nAccording to [Rouseeuw1999]_, two iterations are sufficient to get\nclose to the minimum, and we never need more than 30 to reach\nconvergence." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/c_step/initial_estimates", + "name": "initial_estimates", + "qname": "sklearn.covariance._robust_covariance.c_step.initial_estimates", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "tuple of shape (2,)", + "default_value": "None", + "description": "Initial estimates of location and shape from which to run the c_step\nprocedure:\n- initial_estimates[0]: an initial location estimate\n- initial_estimates[1]: an initial covariance estimate" + }, + "type": { + "kind": "NamedType", + "name": "tuple of shape (2,)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/c_step/verbose", + "name": "verbose", + "qname": "sklearn.covariance._robust_covariance.c_step.verbose", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Verbose mode." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/c_step/cov_computation_method", + "name": "cov_computation_method", + "qname": "sklearn.covariance._robust_covariance.c_step.cov_computation_method", + "default_value": "empirical_covariance", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": ":func:`sklearn.covariance.empirical_covariance`", + "description": "The function which will be used to compute the covariance.\nMust return array of shape (n_features, n_features)." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/c_step/random_state", + "name": "random_state", + "qname": "sklearn.covariance._robust_covariance.c_step.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines the pseudo random number generator for shuffling the data.\nPass an int for reproducible results across multiple function calls.\nSee :term: `Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "C_step procedure described in [Rouseeuw1984]_ aiming at computing MCD.", + "docstring": "C_step procedure described in [Rouseeuw1984]_ aiming at computing MCD.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data set in which we look for the n_support observations whose\n scatter matrix has minimum determinant.\n\nn_support : int\n Number of observations to compute the robust estimates of location\n and covariance from. This parameter must be greater than\n `n_samples / 2`.\n\nremaining_iterations : int, default=30\n Number of iterations to perform.\n According to [Rouseeuw1999]_, two iterations are sufficient to get\n close to the minimum, and we never need more than 30 to reach\n convergence.\n\ninitial_estimates : tuple of shape (2,), default=None\n Initial estimates of location and shape from which to run the c_step\n procedure:\n - initial_estimates[0]: an initial location estimate\n - initial_estimates[1]: an initial covariance estimate\n\nverbose : bool, default=False\n Verbose mode.\n\ncov_computation_method : callable, default=:func:`sklearn.covariance.empirical_covariance`\n The function which will be used to compute the covariance.\n Must return array of shape (n_features, n_features).\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling the data.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nReturns\n-------\nlocation : ndarray of shape (n_features,)\n Robust location estimates.\n\ncovariance : ndarray of shape (n_features, n_features)\n Robust covariance estimates.\n\nsupport : ndarray of shape (n_samples,)\n A mask for the `n_support` observations whose scatter matrix has\n minimum determinant.\n\nReferences\n----------\n.. [Rouseeuw1999] A Fast Algorithm for the Minimum Covariance Determinant\n Estimator, 1999, American Statistical Association and the American\n Society for Quality, TECHNOMETRICS", + "code": "def c_step(X, n_support, remaining_iterations=30, initial_estimates=None,\n verbose=False, cov_computation_method=empirical_covariance,\n random_state=None):\n \"\"\"C_step procedure described in [Rouseeuw1984]_ aiming at computing MCD.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Data set in which we look for the n_support observations whose\n scatter matrix has minimum determinant.\n\n n_support : int\n Number of observations to compute the robust estimates of location\n and covariance from. This parameter must be greater than\n `n_samples / 2`.\n\n remaining_iterations : int, default=30\n Number of iterations to perform.\n According to [Rouseeuw1999]_, two iterations are sufficient to get\n close to the minimum, and we never need more than 30 to reach\n convergence.\n\n initial_estimates : tuple of shape (2,), default=None\n Initial estimates of location and shape from which to run the c_step\n procedure:\n - initial_estimates[0]: an initial location estimate\n - initial_estimates[1]: an initial covariance estimate\n\n verbose : bool, default=False\n Verbose mode.\n\n cov_computation_method : callable, \\\n default=:func:`sklearn.covariance.empirical_covariance`\n The function which will be used to compute the covariance.\n Must return array of shape (n_features, n_features).\n\n random_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling the data.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\n Returns\n -------\n location : ndarray of shape (n_features,)\n Robust location estimates.\n\n covariance : ndarray of shape (n_features, n_features)\n Robust covariance estimates.\n\n support : ndarray of shape (n_samples,)\n A mask for the `n_support` observations whose scatter matrix has\n minimum determinant.\n\n References\n ----------\n .. [Rouseeuw1999] A Fast Algorithm for the Minimum Covariance Determinant\n Estimator, 1999, American Statistical Association and the American\n Society for Quality, TECHNOMETRICS\n \"\"\"\n X = np.asarray(X)\n random_state = check_random_state(random_state)\n return _c_step(X, n_support, remaining_iterations=remaining_iterations,\n initial_estimates=initial_estimates, verbose=verbose,\n cov_computation_method=cov_computation_method,\n random_state=random_state)" + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/fast_mcd", + "name": "fast_mcd", + "qname": "sklearn.covariance._robust_covariance.fast_mcd", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/fast_mcd/X", + "name": "X", + "qname": "sklearn.covariance._robust_covariance.fast_mcd.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data matrix, with p features and n samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/fast_mcd/support_fraction", + "name": "support_fraction", + "qname": "sklearn.covariance._robust_covariance.fast_mcd.support_fraction", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "The proportion of points to be included in the support of the raw\nMCD estimate. Default is `None`, which implies that the minimum\nvalue of `support_fraction` will be used within the algorithm:\n`(n_sample + n_features + 1) / 2`. This parameter must be in the\nrange (0, 1)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/fast_mcd/cov_computation_method", + "name": "cov_computation_method", + "qname": "sklearn.covariance._robust_covariance.fast_mcd.cov_computation_method", + "default_value": "empirical_covariance", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": ":func:`sklearn.covariance.empirical_covariance`", + "description": "The function which will be used to compute the covariance.\nMust return an array of shape (n_features, n_features)." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/fast_mcd/random_state", + "name": "random_state", + "qname": "sklearn.covariance._robust_covariance.fast_mcd.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines the pseudo random number generator for shuffling the data.\nPass an int for reproducible results across multiple function calls.\nSee :term: `Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimates the Minimum Covariance Determinant matrix.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Estimates the Minimum Covariance Determinant matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix, with p features and n samples.\n\nsupport_fraction : float, default=None\n The proportion of points to be included in the support of the raw\n MCD estimate. Default is `None`, which implies that the minimum\n value of `support_fraction` will be used within the algorithm:\n `(n_sample + n_features + 1) / 2`. This parameter must be in the\n range (0, 1).\n\ncov_computation_method : callable, default=:func:`sklearn.covariance.empirical_covariance`\n The function which will be used to compute the covariance.\n Must return an array of shape (n_features, n_features).\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling the data.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nReturns\n-------\nlocation : ndarray of shape (n_features,)\n Robust location of the data.\n\ncovariance : ndarray of shape (n_features, n_features)\n Robust covariance of the features.\n\nsupport : ndarray of shape (n_samples,), dtype=bool\n A mask of the observations that have been used to compute\n the robust location and covariance estimates of the data set.\n\nNotes\n-----\nThe FastMCD algorithm has been introduced by Rousseuw and Van Driessen\nin \"A Fast Algorithm for the Minimum Covariance Determinant Estimator,\n1999, American Statistical Association and the American Society\nfor Quality, TECHNOMETRICS\".\nThe principle is to compute robust estimates and random subsets before\npooling them into a larger subsets, and finally into the full data set.\nDepending on the size of the initial sample, we have one, two or three\nsuch computation levels.\n\nNote that only raw estimates are returned. If one is interested in\nthe correction and reweighting steps described in [RouseeuwVan]_,\nsee the MinCovDet object.\n\nReferences\n----------\n\n.. [RouseeuwVan] A Fast Algorithm for the Minimum Covariance\n Determinant Estimator, 1999, American Statistical Association\n and the American Society for Quality, TECHNOMETRICS\n\n.. [Butler1993] R. W. Butler, P. L. Davies and M. Jhun,\n Asymptotics For The Minimum Covariance Determinant Estimator,\n The Annals of Statistics, 1993, Vol. 21, No. 3, 1385-1400", + "code": "def fast_mcd(X, support_fraction=None,\n cov_computation_method=empirical_covariance,\n random_state=None):\n \"\"\"Estimates the Minimum Covariance Determinant matrix.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data matrix, with p features and n samples.\n\n support_fraction : float, default=None\n The proportion of points to be included in the support of the raw\n MCD estimate. Default is `None`, which implies that the minimum\n value of `support_fraction` will be used within the algorithm:\n `(n_sample + n_features + 1) / 2`. This parameter must be in the\n range (0, 1).\n\n cov_computation_method : callable, \\\n default=:func:`sklearn.covariance.empirical_covariance`\n The function which will be used to compute the covariance.\n Must return an array of shape (n_features, n_features).\n\n random_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling the data.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\n Returns\n -------\n location : ndarray of shape (n_features,)\n Robust location of the data.\n\n covariance : ndarray of shape (n_features, n_features)\n Robust covariance of the features.\n\n support : ndarray of shape (n_samples,), dtype=bool\n A mask of the observations that have been used to compute\n the robust location and covariance estimates of the data set.\n\n Notes\n -----\n The FastMCD algorithm has been introduced by Rousseuw and Van Driessen\n in \"A Fast Algorithm for the Minimum Covariance Determinant Estimator,\n 1999, American Statistical Association and the American Society\n for Quality, TECHNOMETRICS\".\n The principle is to compute robust estimates and random subsets before\n pooling them into a larger subsets, and finally into the full data set.\n Depending on the size of the initial sample, we have one, two or three\n such computation levels.\n\n Note that only raw estimates are returned. If one is interested in\n the correction and reweighting steps described in [RouseeuwVan]_,\n see the MinCovDet object.\n\n References\n ----------\n\n .. [RouseeuwVan] A Fast Algorithm for the Minimum Covariance\n Determinant Estimator, 1999, American Statistical Association\n and the American Society for Quality, TECHNOMETRICS\n\n .. [Butler1993] R. W. Butler, P. L. Davies and M. Jhun,\n Asymptotics For The Minimum Covariance Determinant Estimator,\n The Annals of Statistics, 1993, Vol. 21, No. 3, 1385-1400\n \"\"\"\n random_state = check_random_state(random_state)\n\n X = check_array(X, ensure_min_samples=2, estimator='fast_mcd')\n n_samples, n_features = X.shape\n\n # minimum breakdown value\n if support_fraction is None:\n n_support = int(np.ceil(0.5 * (n_samples + n_features + 1)))\n else:\n n_support = int(support_fraction * n_samples)\n\n # 1-dimensional case quick computation\n # (Rousseeuw, P. J. and Leroy, A. M. (2005) References, in Robust\n # Regression and Outlier Detection, John Wiley & Sons, chapter 4)\n if n_features == 1:\n if n_support < n_samples:\n # find the sample shortest halves\n X_sorted = np.sort(np.ravel(X))\n diff = X_sorted[n_support:] - X_sorted[:(n_samples - n_support)]\n halves_start = np.where(diff == np.min(diff))[0]\n # take the middle points' mean to get the robust location estimate\n location = 0.5 * (X_sorted[n_support + halves_start] +\n X_sorted[halves_start]).mean()\n support = np.zeros(n_samples, dtype=bool)\n X_centered = X - location\n support[np.argsort(np.abs(X_centered), 0)[:n_support]] = True\n covariance = np.asarray([[np.var(X[support])]])\n location = np.array([location])\n # get precision matrix in an optimized way\n precision = linalg.pinvh(covariance)\n dist = (np.dot(X_centered, precision) * (X_centered)).sum(axis=1)\n else:\n support = np.ones(n_samples, dtype=bool)\n covariance = np.asarray([[np.var(X)]])\n location = np.asarray([np.mean(X)])\n X_centered = X - location\n # get precision matrix in an optimized way\n precision = linalg.pinvh(covariance)\n dist = (np.dot(X_centered, precision) * (X_centered)).sum(axis=1)\n # Starting FastMCD algorithm for p-dimensional case\n if (n_samples > 500) and (n_features > 1):\n # 1. Find candidate supports on subsets\n # a. split the set in subsets of size ~ 300\n n_subsets = n_samples // 300\n n_samples_subsets = n_samples // n_subsets\n samples_shuffle = random_state.permutation(n_samples)\n h_subset = int(np.ceil(n_samples_subsets *\n (n_support / float(n_samples))))\n # b. perform a total of 500 trials\n n_trials_tot = 500\n # c. select 10 best (location, covariance) for each subset\n n_best_sub = 10\n n_trials = max(10, n_trials_tot // n_subsets)\n n_best_tot = n_subsets * n_best_sub\n all_best_locations = np.zeros((n_best_tot, n_features))\n try:\n all_best_covariances = np.zeros((n_best_tot, n_features,\n n_features))\n except MemoryError:\n # The above is too big. Let's try with something much small\n # (and less optimal)\n n_best_tot = 10\n all_best_covariances = np.zeros((n_best_tot, n_features,\n n_features))\n n_best_sub = 2\n for i in range(n_subsets):\n low_bound = i * n_samples_subsets\n high_bound = low_bound + n_samples_subsets\n current_subset = X[samples_shuffle[low_bound:high_bound]]\n best_locations_sub, best_covariances_sub, _, _ = select_candidates(\n current_subset, h_subset, n_trials,\n select=n_best_sub, n_iter=2,\n cov_computation_method=cov_computation_method,\n random_state=random_state)\n subset_slice = np.arange(i * n_best_sub, (i + 1) * n_best_sub)\n all_best_locations[subset_slice] = best_locations_sub\n all_best_covariances[subset_slice] = best_covariances_sub\n # 2. Pool the candidate supports into a merged set\n # (possibly the full dataset)\n n_samples_merged = min(1500, n_samples)\n h_merged = int(np.ceil(n_samples_merged *\n (n_support / float(n_samples))))\n if n_samples > 1500:\n n_best_merged = 10\n else:\n n_best_merged = 1\n # find the best couples (location, covariance) on the merged set\n selection = random_state.permutation(n_samples)[:n_samples_merged]\n locations_merged, covariances_merged, supports_merged, d = \\\n select_candidates(\n X[selection], h_merged,\n n_trials=(all_best_locations, all_best_covariances),\n select=n_best_merged,\n cov_computation_method=cov_computation_method,\n random_state=random_state)\n # 3. Finally get the overall best (locations, covariance) couple\n if n_samples < 1500:\n # directly get the best couple (location, covariance)\n location = locations_merged[0]\n covariance = covariances_merged[0]\n support = np.zeros(n_samples, dtype=bool)\n dist = np.zeros(n_samples)\n support[selection] = supports_merged[0]\n dist[selection] = d[0]\n else:\n # select the best couple on the full dataset\n locations_full, covariances_full, supports_full, d = \\\n select_candidates(\n X, n_support,\n n_trials=(locations_merged, covariances_merged),\n select=1,\n cov_computation_method=cov_computation_method,\n random_state=random_state)\n location = locations_full[0]\n covariance = covariances_full[0]\n support = supports_full[0]\n dist = d[0]\n elif n_features > 1:\n # 1. Find the 10 best couples (location, covariance)\n # considering two iterations\n n_trials = 30\n n_best = 10\n locations_best, covariances_best, _, _ = select_candidates(\n X, n_support, n_trials=n_trials, select=n_best, n_iter=2,\n cov_computation_method=cov_computation_method,\n random_state=random_state)\n # 2. Select the best couple on the full dataset amongst the 10\n locations_full, covariances_full, supports_full, d = select_candidates(\n X, n_support, n_trials=(locations_best, covariances_best),\n select=1, cov_computation_method=cov_computation_method,\n random_state=random_state)\n location = locations_full[0]\n covariance = covariances_full[0]\n support = supports_full[0]\n dist = d[0]\n\n return location, covariance, support, dist" + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/select_candidates", + "name": "select_candidates", + "qname": "sklearn.covariance._robust_covariance.select_candidates", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/select_candidates/X", + "name": "X", + "qname": "sklearn.covariance._robust_covariance.select_candidates.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Data (sub)set in which we look for the n_support purest observations." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/select_candidates/n_support", + "name": "n_support", + "qname": "sklearn.covariance._robust_covariance.select_candidates.n_support", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The number of samples the pure data set must contain.\nThis parameter must be in the range `[(n + p + 1)/2] < n_support < n`." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/select_candidates/n_trials", + "name": "n_trials", + "qname": "sklearn.covariance._robust_covariance.select_candidates.n_trials", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or tuple of shape (2,)", + "default_value": "", + "description": "Number of different initial sets of observations from which to\nrun the algorithm. This parameter should be a strictly positive\ninteger.\nInstead of giving a number of trials to perform, one can provide a\nlist of initial estimates that will be used to iteratively run\nc_step procedures. In this case:\n- n_trials[0]: array-like, shape (n_trials, n_features)\n is the list of `n_trials` initial location estimates\n- n_trials[1]: array-like, shape (n_trials, n_features, n_features)\n is the list of `n_trials` initial covariances estimates" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "tuple of shape (2,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/select_candidates/select", + "name": "select", + "qname": "sklearn.covariance._robust_covariance.select_candidates.select", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "Number of best candidates results to return. This parameter must be\na strictly positive integer." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/select_candidates/n_iter", + "name": "n_iter", + "qname": "sklearn.covariance._robust_covariance.select_candidates.n_iter", + "default_value": "30", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "30", + "description": "Maximum number of iterations for the c_step procedure.\n(2 is enough to be close to the final solution. \"Never\" exceeds 20).\nThis parameter must be a strictly positive integer." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/select_candidates/verbose", + "name": "verbose", + "qname": "sklearn.covariance._robust_covariance.select_candidates.verbose", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Control the output verbosity." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/select_candidates/cov_computation_method", + "name": "cov_computation_method", + "qname": "sklearn.covariance._robust_covariance.select_candidates.cov_computation_method", + "default_value": "empirical_covariance", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": ":func:`sklearn.covariance.empirical_covariance`", + "description": "The function which will be used to compute the covariance.\nMust return an array of shape (n_features, n_features)." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.covariance._robust_covariance/select_candidates/random_state", + "name": "random_state", + "qname": "sklearn.covariance._robust_covariance.select_candidates.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines the pseudo random number generator for shuffling the data.\nPass an int for reproducible results across multiple function calls.\nSee :term: `Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Finds the best pure subset of observations to compute MCD from it.\n\nThe purpose of this function is to find the best sets of n_support\nobservations with respect to a minimization of their covariance\nmatrix determinant. Equivalently, it removes n_samples-n_support\nobservations to construct what we call a pure data set (i.e. not\ncontaining outliers). The list of the observations of the pure\ndata set is referred to as the `support`.\n\nStarting from a random support, the pure data set is found by the\nc_step procedure introduced by Rousseeuw and Van Driessen in\n[RV]_.", + "docstring": "Finds the best pure subset of observations to compute MCD from it.\n\nThe purpose of this function is to find the best sets of n_support\nobservations with respect to a minimization of their covariance\nmatrix determinant. Equivalently, it removes n_samples-n_support\nobservations to construct what we call a pure data set (i.e. not\ncontaining outliers). The list of the observations of the pure\ndata set is referred to as the `support`.\n\nStarting from a random support, the pure data set is found by the\nc_step procedure introduced by Rousseeuw and Van Driessen in\n[RV]_.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data (sub)set in which we look for the n_support purest observations.\n\nn_support : int\n The number of samples the pure data set must contain.\n This parameter must be in the range `[(n + p + 1)/2] < n_support < n`.\n\nn_trials : int or tuple of shape (2,)\n Number of different initial sets of observations from which to\n run the algorithm. This parameter should be a strictly positive\n integer.\n Instead of giving a number of trials to perform, one can provide a\n list of initial estimates that will be used to iteratively run\n c_step procedures. In this case:\n - n_trials[0]: array-like, shape (n_trials, n_features)\n is the list of `n_trials` initial location estimates\n - n_trials[1]: array-like, shape (n_trials, n_features, n_features)\n is the list of `n_trials` initial covariances estimates\n\nselect : int, default=1\n Number of best candidates results to return. This parameter must be\n a strictly positive integer.\n\nn_iter : int, default=30\n Maximum number of iterations for the c_step procedure.\n (2 is enough to be close to the final solution. \"Never\" exceeds 20).\n This parameter must be a strictly positive integer.\n\nverbose : bool, default=False\n Control the output verbosity.\n\ncov_computation_method : callable, default=:func:`sklearn.covariance.empirical_covariance`\n The function which will be used to compute the covariance.\n Must return an array of shape (n_features, n_features).\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling the data.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nSee Also\n---------\nc_step\n\nReturns\n-------\nbest_locations : ndarray of shape (select, n_features)\n The `select` location estimates computed from the `select` best\n supports found in the data set (`X`).\n\nbest_covariances : ndarray of shape (select, n_features, n_features)\n The `select` covariance estimates computed from the `select`\n best supports found in the data set (`X`).\n\nbest_supports : ndarray of shape (select, n_samples)\n The `select` best supports found in the data set (`X`).\n\nReferences\n----------\n.. [RV] A Fast Algorithm for the Minimum Covariance Determinant\n Estimator, 1999, American Statistical Association and the American\n Society for Quality, TECHNOMETRICS", + "code": "def select_candidates(X, n_support, n_trials, select=1, n_iter=30,\n verbose=False,\n cov_computation_method=empirical_covariance,\n random_state=None):\n \"\"\"Finds the best pure subset of observations to compute MCD from it.\n\n The purpose of this function is to find the best sets of n_support\n observations with respect to a minimization of their covariance\n matrix determinant. Equivalently, it removes n_samples-n_support\n observations to construct what we call a pure data set (i.e. not\n containing outliers). The list of the observations of the pure\n data set is referred to as the `support`.\n\n Starting from a random support, the pure data set is found by the\n c_step procedure introduced by Rousseeuw and Van Driessen in\n [RV]_.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Data (sub)set in which we look for the n_support purest observations.\n\n n_support : int\n The number of samples the pure data set must contain.\n This parameter must be in the range `[(n + p + 1)/2] < n_support < n`.\n\n n_trials : int or tuple of shape (2,)\n Number of different initial sets of observations from which to\n run the algorithm. This parameter should be a strictly positive\n integer.\n Instead of giving a number of trials to perform, one can provide a\n list of initial estimates that will be used to iteratively run\n c_step procedures. In this case:\n - n_trials[0]: array-like, shape (n_trials, n_features)\n is the list of `n_trials` initial location estimates\n - n_trials[1]: array-like, shape (n_trials, n_features, n_features)\n is the list of `n_trials` initial covariances estimates\n\n select : int, default=1\n Number of best candidates results to return. This parameter must be\n a strictly positive integer.\n\n n_iter : int, default=30\n Maximum number of iterations for the c_step procedure.\n (2 is enough to be close to the final solution. \"Never\" exceeds 20).\n This parameter must be a strictly positive integer.\n\n verbose : bool, default=False\n Control the output verbosity.\n\n cov_computation_method : callable, \\\n default=:func:`sklearn.covariance.empirical_covariance`\n The function which will be used to compute the covariance.\n Must return an array of shape (n_features, n_features).\n\n random_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling the data.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\n See Also\n ---------\n c_step\n\n Returns\n -------\n best_locations : ndarray of shape (select, n_features)\n The `select` location estimates computed from the `select` best\n supports found in the data set (`X`).\n\n best_covariances : ndarray of shape (select, n_features, n_features)\n The `select` covariance estimates computed from the `select`\n best supports found in the data set (`X`).\n\n best_supports : ndarray of shape (select, n_samples)\n The `select` best supports found in the data set (`X`).\n\n References\n ----------\n .. [RV] A Fast Algorithm for the Minimum Covariance Determinant\n Estimator, 1999, American Statistical Association and the American\n Society for Quality, TECHNOMETRICS\n \"\"\"\n random_state = check_random_state(random_state)\n\n if isinstance(n_trials, numbers.Integral):\n run_from_estimates = False\n elif isinstance(n_trials, tuple):\n run_from_estimates = True\n estimates_list = n_trials\n n_trials = estimates_list[0].shape[0]\n else:\n raise TypeError(\"Invalid 'n_trials' parameter, expected tuple or \"\n \" integer, got %s (%s)\" % (n_trials, type(n_trials)))\n\n # compute `n_trials` location and shape estimates candidates in the subset\n all_estimates = []\n if not run_from_estimates:\n # perform `n_trials` computations from random initial supports\n for j in range(n_trials):\n all_estimates.append(\n _c_step(\n X, n_support, remaining_iterations=n_iter, verbose=verbose,\n cov_computation_method=cov_computation_method,\n random_state=random_state))\n else:\n # perform computations from every given initial estimates\n for j in range(n_trials):\n initial_estimates = (estimates_list[0][j], estimates_list[1][j])\n all_estimates.append(_c_step(\n X, n_support, remaining_iterations=n_iter,\n initial_estimates=initial_estimates, verbose=verbose,\n cov_computation_method=cov_computation_method,\n random_state=random_state))\n all_locs_sub, all_covs_sub, all_dets_sub, all_supports_sub, all_ds_sub = \\\n zip(*all_estimates)\n # find the `n_best` best results among the `n_trials` ones\n index_best = np.argsort(all_dets_sub)[:select]\n best_locations = np.asarray(all_locs_sub)[index_best]\n best_covariances = np.asarray(all_covs_sub)[index_best]\n best_supports = np.asarray(all_supports_sub)[index_best]\n best_ds = np.asarray(all_ds_sub)[index_best]\n\n return best_locations, best_covariances, best_supports, best_ds" + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/LedoitWolf/__init__", + "name": "__init__", + "qname": "sklearn.covariance._shrunk_covariance.LedoitWolf.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/LedoitWolf/__init__/self", + "name": "self", + "qname": "sklearn.covariance._shrunk_covariance.LedoitWolf.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/LedoitWolf/__init__/store_precision", + "name": "store_precision", + "qname": "sklearn.covariance._shrunk_covariance.LedoitWolf.__init__.store_precision", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Specify if the estimated precision is stored." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/LedoitWolf/__init__/assume_centered", + "name": "assume_centered", + "qname": "sklearn.covariance._shrunk_covariance.LedoitWolf.__init__.assume_centered", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, data will not be centered before computation.\nUseful when working with data whose mean is almost, but not exactly\nzero.\nIf False (default), data will be centered before computation." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/LedoitWolf/__init__/block_size", + "name": "block_size", + "qname": "sklearn.covariance._shrunk_covariance.LedoitWolf.__init__.block_size", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "Size of blocks into which the covariance matrix will be split\nduring its Ledoit-Wolf estimation. This is purely a memory\noptimization and does not affect results." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "LedoitWolf Estimator\n\nLedoit-Wolf is a particular form of shrinkage, where the shrinkage\ncoefficient is computed using O. Ledoit and M. Wolf's formula as\ndescribed in \"A Well-Conditioned Estimator for Large-Dimensional\nCovariance Matrices\", Ledoit and Wolf, Journal of Multivariate\nAnalysis, Volume 88, Issue 2, February 2004, pages 365-411.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, store_precision=True, assume_centered=False,\n block_size=1000):\n super().__init__(store_precision=store_precision,\n assume_centered=assume_centered)\n self.block_size = block_size" + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/LedoitWolf/fit", + "name": "fit", + "qname": "sklearn.covariance._shrunk_covariance.LedoitWolf.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/LedoitWolf/fit/self", + "name": "self", + "qname": "sklearn.covariance._shrunk_covariance.LedoitWolf.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/LedoitWolf/fit/X", + "name": "X", + "qname": "sklearn.covariance._shrunk_covariance.LedoitWolf.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where `n_samples` is the number of samples\nand `n_features` is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/LedoitWolf/fit/y", + "name": "y", + "qname": "sklearn.covariance._shrunk_covariance.LedoitWolf.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the Ledoit-Wolf shrunk covariance model according to the given\ntraining data and parameters.", + "docstring": "Fit the Ledoit-Wolf shrunk covariance model according to the given\ntraining data and parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where `n_samples` is the number of samples\n and `n_features` is the number of features.\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the Ledoit-Wolf shrunk covariance model according to the given\n training data and parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where `n_samples` is the number of samples\n and `n_features` is the number of features.\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n self : object\n \"\"\"\n # Not calling the parent object to fit, to avoid computing the\n # covariance matrix (and potentially the precision)\n X = self._validate_data(X)\n if self.assume_centered:\n self.location_ = np.zeros(X.shape[1])\n else:\n self.location_ = X.mean(0)\n covariance, shrinkage = ledoit_wolf(X - self.location_,\n assume_centered=True,\n block_size=self.block_size)\n self.shrinkage_ = shrinkage\n self._set_covariance(covariance)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/OAS/fit", + "name": "fit", + "qname": "sklearn.covariance._shrunk_covariance.OAS.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/OAS/fit/self", + "name": "self", + "qname": "sklearn.covariance._shrunk_covariance.OAS.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/OAS/fit/X", + "name": "X", + "qname": "sklearn.covariance._shrunk_covariance.OAS.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where `n_samples` is the number of samples\nand `n_features` is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/OAS/fit/y", + "name": "y", + "qname": "sklearn.covariance._shrunk_covariance.OAS.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the Oracle Approximating Shrinkage covariance model\naccording to the given training data and parameters.", + "docstring": "Fit the Oracle Approximating Shrinkage covariance model\naccording to the given training data and parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where `n_samples` is the number of samples\n and `n_features` is the number of features.\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the Oracle Approximating Shrinkage covariance model\n according to the given training data and parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where `n_samples` is the number of samples\n and `n_features` is the number of features.\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n self : object\n \"\"\"\n X = self._validate_data(X)\n # Not calling the parent object to fit, to avoid computing the\n # covariance matrix (and potentially the precision)\n if self.assume_centered:\n self.location_ = np.zeros(X.shape[1])\n else:\n self.location_ = X.mean(0)\n\n covariance, shrinkage = oas(X - self.location_, assume_centered=True)\n self.shrinkage_ = shrinkage\n self._set_covariance(covariance)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/ShrunkCovariance/__init__", + "name": "__init__", + "qname": "sklearn.covariance._shrunk_covariance.ShrunkCovariance.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/ShrunkCovariance/__init__/self", + "name": "self", + "qname": "sklearn.covariance._shrunk_covariance.ShrunkCovariance.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/ShrunkCovariance/__init__/store_precision", + "name": "store_precision", + "qname": "sklearn.covariance._shrunk_covariance.ShrunkCovariance.__init__.store_precision", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Specify if the estimated precision is stored" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/ShrunkCovariance/__init__/assume_centered", + "name": "assume_centered", + "qname": "sklearn.covariance._shrunk_covariance.ShrunkCovariance.__init__.assume_centered", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, data will not be centered before computation.\nUseful when working with data whose mean is almost, but not exactly\nzero.\nIf False, data will be centered before computation." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/ShrunkCovariance/__init__/shrinkage", + "name": "shrinkage", + "qname": "sklearn.covariance._shrunk_covariance.ShrunkCovariance.__init__.shrinkage", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "Coefficient in the convex combination used for the computation\nof the shrunk estimate. Range is [0, 1]." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Covariance estimator with shrinkage\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, store_precision=True, assume_centered=False,\n shrinkage=0.1):\n super().__init__(store_precision=store_precision,\n assume_centered=assume_centered)\n self.shrinkage = shrinkage" + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/ShrunkCovariance/fit", + "name": "fit", + "qname": "sklearn.covariance._shrunk_covariance.ShrunkCovariance.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/ShrunkCovariance/fit/self", + "name": "self", + "qname": "sklearn.covariance._shrunk_covariance.ShrunkCovariance.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/ShrunkCovariance/fit/X", + "name": "X", + "qname": "sklearn.covariance._shrunk_covariance.ShrunkCovariance.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples is the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/ShrunkCovariance/fit/y", + "name": "y", + "qname": "sklearn.covariance._shrunk_covariance.ShrunkCovariance.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the shrunk covariance model according to the given training data\nand parameters.", + "docstring": "Fit the shrunk covariance model according to the given training data\nand parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the shrunk covariance model according to the given training data\n and parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n self : object\n \"\"\"\n X = self._validate_data(X)\n # Not calling the parent object to fit, to avoid a potential\n # matrix inversion when setting the precision\n if self.assume_centered:\n self.location_ = np.zeros(X.shape[1])\n else:\n self.location_ = X.mean(0)\n covariance = empirical_covariance(\n X, assume_centered=self.assume_centered)\n covariance = shrunk_covariance(covariance, self.shrinkage)\n self._set_covariance(covariance)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/ledoit_wolf", + "name": "ledoit_wolf", + "qname": "sklearn.covariance._shrunk_covariance.ledoit_wolf", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/ledoit_wolf/X", + "name": "X", + "qname": "sklearn.covariance._shrunk_covariance.ledoit_wolf.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Data from which to compute the covariance estimate" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/ledoit_wolf/assume_centered", + "name": "assume_centered", + "qname": "sklearn.covariance._shrunk_covariance.ledoit_wolf.assume_centered", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, data will not be centered before computation.\nUseful to work with data whose mean is significantly equal to\nzero but is not exactly zero.\nIf False, data will be centered before computation." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/ledoit_wolf/block_size", + "name": "block_size", + "qname": "sklearn.covariance._shrunk_covariance.ledoit_wolf.block_size", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "Size of blocks into which the covariance matrix will be split.\nThis is purely a memory optimization and does not affect results." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimates the shrunk Ledoit-Wolf covariance matrix.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Estimates the shrunk Ledoit-Wolf covariance matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data from which to compute the covariance estimate\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, data will be centered before computation.\n\nblock_size : int, default=1000\n Size of blocks into which the covariance matrix will be split.\n This is purely a memory optimization and does not affect results.\n\nReturns\n-------\nshrunk_cov : ndarray of shape (n_features, n_features)\n Shrunk covariance.\n\nshrinkage : float\n Coefficient in the convex combination used for the computation\n of the shrunk estimate.\n\nNotes\n-----\nThe regularized (shrunk) covariance is:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features", + "code": "@_deprecate_positional_args\ndef ledoit_wolf(X, *, assume_centered=False, block_size=1000):\n \"\"\"Estimates the shrunk Ledoit-Wolf covariance matrix.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Data from which to compute the covariance estimate\n\n assume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, data will be centered before computation.\n\n block_size : int, default=1000\n Size of blocks into which the covariance matrix will be split.\n This is purely a memory optimization and does not affect results.\n\n Returns\n -------\n shrunk_cov : ndarray of shape (n_features, n_features)\n Shrunk covariance.\n\n shrinkage : float\n Coefficient in the convex combination used for the computation\n of the shrunk estimate.\n\n Notes\n -----\n The regularized (shrunk) covariance is:\n\n (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\n where mu = trace(cov) / n_features\n \"\"\"\n X = np.asarray(X)\n # for only one feature, the result is the same whatever the shrinkage\n if len(X.shape) == 2 and X.shape[1] == 1:\n if not assume_centered:\n X = X - X.mean()\n return np.atleast_2d((X ** 2).mean()), 0.\n if X.ndim == 1:\n X = np.reshape(X, (1, -1))\n warnings.warn(\"Only one sample available. \"\n \"You may want to reshape your data array\")\n n_features = X.size\n else:\n _, n_features = X.shape\n\n # get Ledoit-Wolf shrinkage\n shrinkage = ledoit_wolf_shrinkage(\n X, assume_centered=assume_centered, block_size=block_size)\n emp_cov = empirical_covariance(X, assume_centered=assume_centered)\n mu = np.sum(np.trace(emp_cov)) / n_features\n shrunk_cov = (1. - shrinkage) * emp_cov\n shrunk_cov.flat[::n_features + 1] += shrinkage * mu\n\n return shrunk_cov, shrinkage" + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/ledoit_wolf_shrinkage", + "name": "ledoit_wolf_shrinkage", + "qname": "sklearn.covariance._shrunk_covariance.ledoit_wolf_shrinkage", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/ledoit_wolf_shrinkage/X", + "name": "X", + "qname": "sklearn.covariance._shrunk_covariance.ledoit_wolf_shrinkage.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Data from which to compute the Ledoit-Wolf shrunk covariance shrinkage." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/ledoit_wolf_shrinkage/assume_centered", + "name": "assume_centered", + "qname": "sklearn.covariance._shrunk_covariance.ledoit_wolf_shrinkage.assume_centered", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, data will not be centered before computation.\nUseful to work with data whose mean is significantly equal to\nzero but is not exactly zero.\nIf False, data will be centered before computation." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/ledoit_wolf_shrinkage/block_size", + "name": "block_size", + "qname": "sklearn.covariance._shrunk_covariance.ledoit_wolf_shrinkage.block_size", + "default_value": "1000", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "Size of blocks into which the covariance matrix will be split." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimates the shrunk Ledoit-Wolf covariance matrix.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Estimates the shrunk Ledoit-Wolf covariance matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data from which to compute the Ledoit-Wolf shrunk covariance shrinkage.\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, data will be centered before computation.\n\nblock_size : int, default=1000\n Size of blocks into which the covariance matrix will be split.\n\nReturns\n-------\nshrinkage : float\n Coefficient in the convex combination used for the computation\n of the shrunk estimate.\n\nNotes\n-----\nThe regularized (shrunk) covariance is:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features", + "code": "def ledoit_wolf_shrinkage(X, assume_centered=False, block_size=1000):\n \"\"\"Estimates the shrunk Ledoit-Wolf covariance matrix.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Data from which to compute the Ledoit-Wolf shrunk covariance shrinkage.\n\n assume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, data will be centered before computation.\n\n block_size : int, default=1000\n Size of blocks into which the covariance matrix will be split.\n\n Returns\n -------\n shrinkage : float\n Coefficient in the convex combination used for the computation\n of the shrunk estimate.\n\n Notes\n -----\n The regularized (shrunk) covariance is:\n\n (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\n where mu = trace(cov) / n_features\n \"\"\"\n X = np.asarray(X)\n # for only one feature, the result is the same whatever the shrinkage\n if len(X.shape) == 2 and X.shape[1] == 1:\n return 0.\n if X.ndim == 1:\n X = np.reshape(X, (1, -1))\n\n if X.shape[0] == 1:\n warnings.warn(\"Only one sample available. \"\n \"You may want to reshape your data array\")\n n_samples, n_features = X.shape\n\n # optionally center data\n if not assume_centered:\n X = X - X.mean(0)\n\n # A non-blocked version of the computation is present in the tests\n # in tests/test_covariance.py\n\n # number of blocks to split the covariance matrix into\n n_splits = int(n_features / block_size)\n X2 = X ** 2\n emp_cov_trace = np.sum(X2, axis=0) / n_samples\n mu = np.sum(emp_cov_trace) / n_features\n beta_ = 0. # sum of the coefficients of \n delta_ = 0. # sum of the *squared* coefficients of \n # starting block computation\n for i in range(n_splits):\n for j in range(n_splits):\n rows = slice(block_size * i, block_size * (i + 1))\n cols = slice(block_size * j, block_size * (j + 1))\n beta_ += np.sum(np.dot(X2.T[rows], X2[:, cols]))\n delta_ += np.sum(np.dot(X.T[rows], X[:, cols]) ** 2)\n rows = slice(block_size * i, block_size * (i + 1))\n beta_ += np.sum(np.dot(X2.T[rows], X2[:, block_size * n_splits:]))\n delta_ += np.sum(\n np.dot(X.T[rows], X[:, block_size * n_splits:]) ** 2)\n for j in range(n_splits):\n cols = slice(block_size * j, block_size * (j + 1))\n beta_ += np.sum(np.dot(X2.T[block_size * n_splits:], X2[:, cols]))\n delta_ += np.sum(\n np.dot(X.T[block_size * n_splits:], X[:, cols]) ** 2)\n delta_ += np.sum(np.dot(X.T[block_size * n_splits:],\n X[:, block_size * n_splits:]) ** 2)\n delta_ /= n_samples ** 2\n beta_ += np.sum(np.dot(X2.T[block_size * n_splits:],\n X2[:, block_size * n_splits:]))\n # use delta_ to compute beta\n beta = 1. / (n_features * n_samples) * (beta_ / n_samples - delta_)\n # delta is the sum of the squared coefficients of ( - mu*Id) / p\n delta = delta_ - 2. * mu * emp_cov_trace.sum() + n_features * mu ** 2\n delta /= n_features\n # get final beta as the min between beta and delta\n # We do this to prevent shrinking more than \"1\", which whould invert\n # the value of covariances\n beta = min(beta, delta)\n # finally get shrinkage\n shrinkage = 0 if beta == 0 else beta / delta\n return shrinkage" + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/oas", + "name": "oas", + "qname": "sklearn.covariance._shrunk_covariance.oas", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/oas/X", + "name": "X", + "qname": "sklearn.covariance._shrunk_covariance.oas.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Data from which to compute the covariance estimate." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/oas/assume_centered", + "name": "assume_centered", + "qname": "sklearn.covariance._shrunk_covariance.oas.assume_centered", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, data will not be centered before computation.\nUseful to work with data whose mean is significantly equal to\nzero but is not exactly zero.\nIf False, data will be centered before computation." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate covariance with the Oracle Approximating Shrinkage algorithm.", + "docstring": "Estimate covariance with the Oracle Approximating Shrinkage algorithm.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data from which to compute the covariance estimate.\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, data will be centered before computation.\n\nReturns\n-------\nshrunk_cov : array-like of shape (n_features, n_features)\n Shrunk covariance.\n\nshrinkage : float\n Coefficient in the convex combination used for the computation\n of the shrunk estimate.\n\nNotes\n-----\nThe regularised (shrunk) covariance is:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features\n\nThe formula we used to implement the OAS is slightly modified compared\nto the one given in the article. See :class:`OAS` for more details.", + "code": "@_deprecate_positional_args\ndef oas(X, *, assume_centered=False):\n \"\"\"Estimate covariance with the Oracle Approximating Shrinkage algorithm.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Data from which to compute the covariance estimate.\n\n assume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, data will be centered before computation.\n\n Returns\n -------\n shrunk_cov : array-like of shape (n_features, n_features)\n Shrunk covariance.\n\n shrinkage : float\n Coefficient in the convex combination used for the computation\n of the shrunk estimate.\n\n Notes\n -----\n The regularised (shrunk) covariance is:\n\n (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\n where mu = trace(cov) / n_features\n\n The formula we used to implement the OAS is slightly modified compared\n to the one given in the article. See :class:`OAS` for more details.\n \"\"\"\n X = np.asarray(X)\n # for only one feature, the result is the same whatever the shrinkage\n if len(X.shape) == 2 and X.shape[1] == 1:\n if not assume_centered:\n X = X - X.mean()\n return np.atleast_2d((X ** 2).mean()), 0.\n if X.ndim == 1:\n X = np.reshape(X, (1, -1))\n warnings.warn(\"Only one sample available. \"\n \"You may want to reshape your data array\")\n n_samples = 1\n n_features = X.size\n else:\n n_samples, n_features = X.shape\n\n emp_cov = empirical_covariance(X, assume_centered=assume_centered)\n mu = np.trace(emp_cov) / n_features\n\n # formula from Chen et al.'s **implementation**\n alpha = np.mean(emp_cov ** 2)\n num = alpha + mu ** 2\n den = (n_samples + 1.) * (alpha - (mu ** 2) / n_features)\n\n shrinkage = 1. if den == 0 else min(num / den, 1.)\n shrunk_cov = (1. - shrinkage) * emp_cov\n shrunk_cov.flat[::n_features + 1] += shrinkage * mu\n\n return shrunk_cov, shrinkage" + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/shrunk_covariance", + "name": "shrunk_covariance", + "qname": "sklearn.covariance._shrunk_covariance.shrunk_covariance", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/shrunk_covariance/emp_cov", + "name": "emp_cov", + "qname": "sklearn.covariance._shrunk_covariance.shrunk_covariance.emp_cov", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_features, n_features)", + "default_value": "", + "description": "Covariance matrix to be shrunk" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_features, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.covariance._shrunk_covariance/shrunk_covariance/shrinkage", + "name": "shrinkage", + "qname": "sklearn.covariance._shrunk_covariance.shrunk_covariance.shrinkage", + "default_value": "0.1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "Coefficient in the convex combination used for the computation\nof the shrunk estimate. Range is [0, 1]." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Calculates a covariance matrix shrunk on the diagonal\n\nRead more in the :ref:`User Guide `.", + "docstring": "Calculates a covariance matrix shrunk on the diagonal\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nemp_cov : array-like of shape (n_features, n_features)\n Covariance matrix to be shrunk\n\nshrinkage : float, default=0.1\n Coefficient in the convex combination used for the computation\n of the shrunk estimate. Range is [0, 1].\n\nReturns\n-------\nshrunk_cov : ndarray of shape (n_features, n_features)\n Shrunk covariance.\n\nNotes\n-----\nThe regularized (shrunk) covariance is given by:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features", + "code": "def shrunk_covariance(emp_cov, shrinkage=0.1):\n \"\"\"Calculates a covariance matrix shrunk on the diagonal\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n emp_cov : array-like of shape (n_features, n_features)\n Covariance matrix to be shrunk\n\n shrinkage : float, default=0.1\n Coefficient in the convex combination used for the computation\n of the shrunk estimate. Range is [0, 1].\n\n Returns\n -------\n shrunk_cov : ndarray of shape (n_features, n_features)\n Shrunk covariance.\n\n Notes\n -----\n The regularized (shrunk) covariance is given by:\n\n (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\n where mu = trace(cov) / n_features\n \"\"\"\n emp_cov = check_array(emp_cov)\n n_features = emp_cov.shape[0]\n\n mu = np.trace(emp_cov) / n_features\n shrunk_cov = (1. - shrinkage) * emp_cov\n shrunk_cov.flat[::n_features + 1] += shrinkage * mu\n\n return shrunk_cov" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/CCA/__init__", + "name": "__init__", + "qname": "sklearn.cross_decomposition._pls.CCA.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/CCA/__init__/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls.CCA.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/CCA/__init__/n_components", + "name": "n_components", + "qname": "sklearn.cross_decomposition._pls.CCA.__init__.n_components", + "default_value": "2", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Number of components to keep. Should be in `[1, min(n_samples,\nn_features, n_targets)]`." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/CCA/__init__/scale", + "name": "scale", + "qname": "sklearn.cross_decomposition._pls.CCA.__init__.scale", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to scale `X` and `Y`." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/CCA/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.cross_decomposition._pls.CCA.__init__.max_iter", + "default_value": "500", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "500", + "description": "the maximum number of iterations of the power method." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/CCA/__init__/tol", + "name": "tol", + "qname": "sklearn.cross_decomposition._pls.CCA.__init__.tol", + "default_value": "1e-06", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-06", + "description": "The tolerance used as convergence criteria in the power method: the\nalgorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\nthan `tol`, where `u` corresponds to the left singular vector." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/CCA/__init__/copy", + "name": "copy", + "qname": "sklearn.cross_decomposition._pls.CCA.__init__.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to copy `X` and `Y` in fit before applying centering, and\npotentially scaling. If False, these operations will be done inplace,\nmodifying both arrays." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Canonical Correlation Analysis, also known as \"Mode B\" PLS.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_components=2, *, scale=True,\n max_iter=500, tol=1e-06, copy=True):\n super().__init__(n_components=n_components, scale=scale,\n deflation_mode=\"canonical\", mode=\"B\",\n algorithm=\"nipals\", max_iter=max_iter, tol=tol,\n copy=copy)" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSCanonical/__init__", + "name": "__init__", + "qname": "sklearn.cross_decomposition._pls.PLSCanonical.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSCanonical/__init__/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls.PLSCanonical.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSCanonical/__init__/n_components", + "name": "n_components", + "qname": "sklearn.cross_decomposition._pls.PLSCanonical.__init__.n_components", + "default_value": "2", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Number of components to keep. Should be in `[1, min(n_samples,\nn_features, n_targets)]`." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSCanonical/__init__/scale", + "name": "scale", + "qname": "sklearn.cross_decomposition._pls.PLSCanonical.__init__.scale", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to scale `X` and `Y`." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSCanonical/__init__/algorithm", + "name": "algorithm", + "qname": "sklearn.cross_decomposition._pls.PLSCanonical.__init__.algorithm", + "default_value": "'nipals'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'nipals', 'svd'}", + "default_value": "'nipals'", + "description": "The algorithm used to estimate the first singular vectors of the\ncross-covariance matrix. 'nipals' uses the power method while 'svd'\nwill compute the whole SVD." + }, + "type": { + "kind": "EnumType", + "values": ["nipals", "svd"] + } + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSCanonical/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.cross_decomposition._pls.PLSCanonical.__init__.max_iter", + "default_value": "500", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "500", + "description": "the maximum number of iterations of the power method when\n`algorithm='nipals'`. Ignored otherwise." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSCanonical/__init__/tol", + "name": "tol", + "qname": "sklearn.cross_decomposition._pls.PLSCanonical.__init__.tol", + "default_value": "1e-06", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-06", + "description": "The tolerance used as convergence criteria in the power method: the\nalgorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\nthan `tol`, where `u` corresponds to the left singular vector." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSCanonical/__init__/copy", + "name": "copy", + "qname": "sklearn.cross_decomposition._pls.PLSCanonical.__init__.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to copy `X` and `Y` in fit before applying centering, and\npotentially scaling. If False, these operations will be done inplace,\nmodifying both arrays." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Partial Least Squares transformer and regressor.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.8", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_components=2, *, scale=True, algorithm=\"nipals\",\n max_iter=500, tol=1e-06, copy=True):\n super().__init__(\n n_components=n_components, scale=scale,\n deflation_mode=\"canonical\", mode=\"A\",\n algorithm=algorithm,\n max_iter=max_iter, tol=tol, copy=copy)" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSRegression/__init__", + "name": "__init__", + "qname": "sklearn.cross_decomposition._pls.PLSRegression.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSRegression/__init__/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls.PLSRegression.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSRegression/__init__/n_components", + "name": "n_components", + "qname": "sklearn.cross_decomposition._pls.PLSRegression.__init__.n_components", + "default_value": "2", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Number of components to keep. Should be in `[1, min(n_samples,\nn_features, n_targets)]`." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSRegression/__init__/scale", + "name": "scale", + "qname": "sklearn.cross_decomposition._pls.PLSRegression.__init__.scale", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to scale `X` and `Y`." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSRegression/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.cross_decomposition._pls.PLSRegression.__init__.max_iter", + "default_value": "500", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "500", + "description": "The maximum number of iterations of the power method when\n`algorithm='nipals'`. Ignored otherwise." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSRegression/__init__/tol", + "name": "tol", + "qname": "sklearn.cross_decomposition._pls.PLSRegression.__init__.tol", + "default_value": "1e-06", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-06", + "description": "The tolerance used as convergence criteria in the power method: the\nalgorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\nthan `tol`, where `u` corresponds to the left singular vector." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSRegression/__init__/copy", + "name": "copy", + "qname": "sklearn.cross_decomposition._pls.PLSRegression.__init__.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to copy `X` and `Y` in fit before applying centering, and\npotentially scaling. If False, these operations will be done inplace,\nmodifying both arrays." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "PLS regression\n\nPLSRegression is also known as PLS2 or PLS1, depending on the number of\ntargets.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.8", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_components=2, *, scale=True,\n max_iter=500, tol=1e-06, copy=True):\n super().__init__(\n n_components=n_components, scale=scale,\n deflation_mode=\"regression\", mode=\"A\",\n algorithm='nipals', max_iter=max_iter,\n tol=tol, copy=copy)" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/__init__", + "name": "__init__", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/__init__/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/__init__/n_components", + "name": "n_components", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.__init__.n_components", + "default_value": "2", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "The number of components to keep. Should be in `[1,\nmin(n_samples, n_features, n_targets)]`." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/__init__/scale", + "name": "scale", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.__init__.scale", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to scale `X` and `Y`." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/__init__/copy", + "name": "copy", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.__init__.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to copy `X` and `Y` in fit before applying centering, and\npotentially scaling. If False, these operations will be done inplace,\nmodifying both arrays." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Partial Least Square SVD.\n\nThis transformer simply performs a SVD on the crosscovariance matrix X'Y.\nIt is able to project both the training data `X` and the targets `Y`. The\ntraining data X is projected on the left singular vectors, while the\ntargets are projected on the right singular vectors.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.8", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_components=2, *, scale=True, copy=True):\n self.n_components = n_components\n self.scale = scale\n self.copy = copy" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/fit", + "name": "fit", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/fit/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/fit/X", + "name": "X", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/fit/Y", + "name": "Y", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.fit.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "", + "description": "Targets." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_targets)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit model to data.", + "docstring": "Fit model to data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training samples.\n\nY : array-like of shape (n_samples,) or (n_samples, n_targets)\n Targets.", + "code": " def fit(self, X, Y):\n \"\"\"Fit model to data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training samples.\n\n Y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Targets.\n \"\"\"\n check_consistent_length(X, Y)\n X = self._validate_data(X, dtype=np.float64, copy=self.copy,\n ensure_min_samples=2)\n Y = check_array(Y, dtype=np.float64, copy=self.copy, ensure_2d=False)\n if Y.ndim == 1:\n Y = Y.reshape(-1, 1)\n\n # we'll compute the SVD of the cross-covariance matrix = X.T.dot(Y)\n # This matrix rank is at most min(n_samples, n_features, n_targets) so\n # n_components cannot be bigger than that.\n n_components = self.n_components\n rank_upper_bound = min(X.shape[0], X.shape[1], Y.shape[1])\n if not 1 <= n_components <= rank_upper_bound:\n # TODO: raise an error in 1.1\n warnings.warn(\n f\"As of version 0.24, n_components({n_components}) should be \"\n f\"in [1, min(n_features, n_samples, n_targets)] = \"\n f\"[1, {rank_upper_bound}]. \"\n f\"n_components={rank_upper_bound} will be used instead. \"\n f\"In version 1.1 (renaming of 0.26), an error will be raised.\",\n FutureWarning\n )\n n_components = rank_upper_bound\n\n X, Y, self._x_mean, self._y_mean, self._x_std, self._y_std = (\n _center_scale_xy(X, Y, self.scale))\n\n # Compute SVD of cross-covariance matrix\n C = np.dot(X.T, Y)\n U, s, Vt = svd(C, full_matrices=False)\n U = U[:, :n_components]\n Vt = Vt[:n_components]\n U, Vt = svd_flip(U, Vt)\n V = Vt.T\n\n self._x_scores = np.dot(X, U) # TODO: remove in 1.1\n self._y_scores = np.dot(Y, V) # TODO: remove in 1.1\n self.x_weights_ = U\n self.y_weights_ = V\n return self" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/fit_transform", + "name": "fit_transform", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/fit_transform/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/fit_transform/X", + "name": "X", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/fit_transform/y", + "name": "y", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "None", + "description": "Targets." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_targets)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Learn and apply the dimensionality reduction.", + "docstring": "Learn and apply the dimensionality reduction.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training samples.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets), default=None\n Targets.\n\nReturns\n-------\nout : array-like or tuple of array-like\n The transformed data `X_tranformed` if `Y` is not None,\n `(X_transformed, Y_transformed)` otherwise.", + "code": " def fit_transform(self, X, y=None):\n \"\"\"Learn and apply the dimensionality reduction.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training samples.\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets), \\\n default=None\n Targets.\n\n Returns\n -------\n out : array-like or tuple of array-like\n The transformed data `X_tranformed` if `Y` is not None,\n `(X_transformed, Y_transformed)` otherwise.\n \"\"\"\n return self.fit(X, y).transform(X, y)" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/transform", + "name": "transform", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/transform/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/transform/X", + "name": "X", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Samples to be transformed." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/transform/Y", + "name": "Y", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.transform.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "None", + "description": "Targets." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_targets)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Apply the dimensionality reduction.", + "docstring": "Apply the dimensionality reduction.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Samples to be transformed.\n\nY : array-like of shape (n_samples,) or (n_samples, n_targets), default=None\n Targets.\n\nReturns\n-------\nout : array-like or tuple of array-like\n The transformed data `X_tranformed` if `Y` is not None,\n `(X_transformed, Y_transformed)` otherwise.", + "code": " def transform(self, X, Y=None):\n \"\"\"\n Apply the dimensionality reduction.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Samples to be transformed.\n\n Y : array-like of shape (n_samples,) or (n_samples, n_targets), \\\n default=None\n Targets.\n\n Returns\n -------\n out : array-like or tuple of array-like\n The transformed data `X_tranformed` if `Y` is not None,\n `(X_transformed, Y_transformed)` otherwise.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, dtype=np.float64)\n Xr = (X - self._x_mean) / self._x_std\n x_scores = np.dot(Xr, self.x_weights_)\n if Y is not None:\n Y = check_array(Y, ensure_2d=False, dtype=np.float64)\n if Y.ndim == 1:\n Y = Y.reshape(-1, 1)\n Yr = (Y - self._y_mean) / self._y_std\n y_scores = np.dot(Yr, self.y_weights_)\n return x_scores, y_scores\n return x_scores" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/x_mean_@getter", + "name": "x_mean_", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.x_mean_", + "decorators": [ + "deprecated('Attribute x_mean_ was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/x_mean_/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.x_mean_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated( # type: ignore\n \"Attribute x_mean_ was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26).\")\n @property\n def x_mean_(self):\n return self._x_mean" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/x_scores_@getter", + "name": "x_scores_", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.x_scores_", + "decorators": [ + "deprecated('Attribute x_scores_ was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26). Use est.transform(X) on the training data instead.')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/x_scores_/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.x_scores_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated( # type: ignore\n \"Attribute x_scores_ was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26). Use est.transform(X) on \"\n \"the training data instead.\"\n )\n @property\n def x_scores_(self):\n return self._x_scores" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/x_std_@getter", + "name": "x_std_", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.x_std_", + "decorators": [ + "deprecated('Attribute x_std_ was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/x_std_/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.x_std_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated( # type: ignore\n \"Attribute x_std_ was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26).\")\n @property\n def x_std_(self):\n return self._x_std" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/y_mean_@getter", + "name": "y_mean_", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.y_mean_", + "decorators": [ + "deprecated('Attribute y_mean_ was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/y_mean_/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.y_mean_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated( # type: ignore\n \"Attribute y_mean_ was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26).\")\n @property\n def y_mean_(self):\n return self._y_mean" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/y_scores_@getter", + "name": "y_scores_", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.y_scores_", + "decorators": [ + "deprecated('Attribute y_scores_ was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26). Use est.transform(X, Y) on the training data instead.')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/y_scores_/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.y_scores_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated( # type: ignore\n \"Attribute y_scores_ was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26). Use est.transform(X, Y) \"\n \"on the training data instead.\"\n )\n @property\n def y_scores_(self):\n return self._y_scores" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/y_std_@getter", + "name": "y_std_", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.y_std_", + "decorators": [ + "deprecated('Attribute y_std_ was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/PLSSVD/y_std_/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls.PLSSVD.y_std_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated( # type: ignore\n \"Attribute y_std_ was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26).\")\n @property\n def y_std_(self):\n return self._y_std" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/__init__", + "name": "__init__", + "qname": "sklearn.cross_decomposition._pls._PLS.__init__", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/__init__/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls._PLS.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/__init__/n_components", + "name": "n_components", + "qname": "sklearn.cross_decomposition._pls._PLS.__init__.n_components", + "default_value": "2", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/__init__/scale", + "name": "scale", + "qname": "sklearn.cross_decomposition._pls._PLS.__init__.scale", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/__init__/deflation_mode", + "name": "deflation_mode", + "qname": "sklearn.cross_decomposition._pls._PLS.__init__.deflation_mode", + "default_value": "'regression'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/__init__/mode", + "name": "mode", + "qname": "sklearn.cross_decomposition._pls._PLS.__init__.mode", + "default_value": "'A'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/__init__/algorithm", + "name": "algorithm", + "qname": "sklearn.cross_decomposition._pls._PLS.__init__.algorithm", + "default_value": "'nipals'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.cross_decomposition._pls._PLS.__init__.max_iter", + "default_value": "500", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/__init__/tol", + "name": "tol", + "qname": "sklearn.cross_decomposition._pls._PLS.__init__.tol", + "default_value": "1e-06", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/__init__/copy", + "name": "copy", + "qname": "sklearn.cross_decomposition._pls._PLS.__init__.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Partial Least Squares (PLS)\n\nThis class implements the generic PLS algorithm.\n\nMain ref: Wegelin, a survey of Partial Least Squares (PLS) methods,\nwith emphasis on the two-block case\nhttps://www.stat.washington.edu/research/reports/2000/tr371.pdf", + "docstring": "", + "code": " @abstractmethod\n def __init__(self, n_components=2, *, scale=True,\n deflation_mode=\"regression\",\n mode=\"A\", algorithm=\"nipals\", max_iter=500, tol=1e-06,\n copy=True):\n self.n_components = n_components\n self.deflation_mode = deflation_mode\n self.mode = mode\n self.scale = scale\n self.algorithm = algorithm\n self.max_iter = max_iter\n self.tol = tol\n self.copy = copy" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/_more_tags", + "name": "_more_tags", + "qname": "sklearn.cross_decomposition._pls._PLS._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/_more_tags/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls._PLS._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'poor_score': True,\n 'requires_y': False}" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/fit", + "name": "fit", + "qname": "sklearn.cross_decomposition._pls._PLS.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/fit/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls._PLS.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/fit/X", + "name": "X", + "qname": "sklearn.cross_decomposition._pls._PLS.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vectors, where `n_samples` is the number of samples and\n`n_features` is the number of predictors." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/fit/Y", + "name": "Y", + "qname": "sklearn.cross_decomposition._pls._PLS.fit.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "", + "description": "Target vectors, where `n_samples` is the number of samples and\n`n_targets` is the number of response variables." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_targets)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit model to data.", + "docstring": "Fit model to data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of predictors.\n\nY : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target vectors, where `n_samples` is the number of samples and\n `n_targets` is the number of response variables.", + "code": " def fit(self, X, Y):\n \"\"\"Fit model to data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of predictors.\n\n Y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target vectors, where `n_samples` is the number of samples and\n `n_targets` is the number of response variables.\n \"\"\"\n\n check_consistent_length(X, Y)\n X = self._validate_data(X, dtype=np.float64, copy=self.copy,\n ensure_min_samples=2)\n Y = check_array(Y, dtype=np.float64, copy=self.copy, ensure_2d=False)\n if Y.ndim == 1:\n Y = Y.reshape(-1, 1)\n\n n = X.shape[0]\n p = X.shape[1]\n q = Y.shape[1]\n\n n_components = self.n_components\n if self.deflation_mode == 'regression':\n # With PLSRegression n_components is bounded by the rank of (X.T X)\n # see Wegelin page 25\n rank_upper_bound = p\n if not 1 <= n_components <= rank_upper_bound:\n # TODO: raise an error in 1.1\n warnings.warn(\n f\"As of version 0.24, n_components({n_components}) should \"\n f\"be in [1, n_features].\"\n f\"n_components={rank_upper_bound} will be used instead. \"\n f\"In version 1.1 (renaming of 0.26), an error will be \"\n f\"raised.\",\n FutureWarning\n )\n n_components = rank_upper_bound\n else:\n # With CCA and PLSCanonical, n_components is bounded by the rank of\n # X and the rank of Y: see Wegelin page 12\n rank_upper_bound = min(n, p, q)\n if not 1 <= self.n_components <= rank_upper_bound:\n # TODO: raise an error in 1.1\n warnings.warn(\n f\"As of version 0.24, n_components({n_components}) should \"\n f\"be in [1, min(n_features, n_samples, n_targets)] = \"\n f\"[1, {rank_upper_bound}]. \"\n f\"n_components={rank_upper_bound} will be used instead. \"\n f\"In version 1.1 (renaming of 0.26), an error will be \"\n f\"raised.\",\n FutureWarning\n )\n n_components = rank_upper_bound\n\n if self.algorithm not in (\"svd\", \"nipals\"):\n raise ValueError(\"algorithm should be 'svd' or 'nipals', got \"\n f\"{self.algorithm}.\")\n\n self._norm_y_weights = (self.deflation_mode == 'canonical') # 1.1\n norm_y_weights = self._norm_y_weights\n\n # Scale (in place)\n Xk, Yk, self._x_mean, self._y_mean, self._x_std, self._y_std = (\n _center_scale_xy(X, Y, self.scale))\n\n self.x_weights_ = np.zeros((p, n_components)) # U\n self.y_weights_ = np.zeros((q, n_components)) # V\n self._x_scores = np.zeros((n, n_components)) # Xi\n self._y_scores = np.zeros((n, n_components)) # Omega\n self.x_loadings_ = np.zeros((p, n_components)) # Gamma\n self.y_loadings_ = np.zeros((q, n_components)) # Delta\n self.n_iter_ = []\n\n # This whole thing corresponds to the algorithm in section 4.1 of the\n # review from Wegelin. See above for a notation mapping from code to\n # paper.\n Y_eps = np.finfo(Yk.dtype).eps\n for k in range(n_components):\n # Find first left and right singular vectors of the X.T.dot(Y)\n # cross-covariance matrix.\n if self.algorithm == \"nipals\":\n # Replace columns that are all close to zero with zeros\n Yk_mask = np.all(np.abs(Yk) < 10 * Y_eps, axis=0)\n Yk[:, Yk_mask] = 0.0\n\n try:\n x_weights, y_weights, n_iter_ = \\\n _get_first_singular_vectors_power_method(\n Xk, Yk, mode=self.mode, max_iter=self.max_iter,\n tol=self.tol, norm_y_weights=norm_y_weights)\n except StopIteration as e:\n if str(e) != \"Y residual is constant\":\n raise\n warnings.warn(f\"Y residual is constant at iteration {k}\")\n break\n\n self.n_iter_.append(n_iter_)\n\n elif self.algorithm == \"svd\":\n x_weights, y_weights = _get_first_singular_vectors_svd(Xk, Yk)\n\n # inplace sign flip for consistency across solvers and archs\n _svd_flip_1d(x_weights, y_weights)\n\n # compute scores, i.e. the projections of X and Y\n x_scores = np.dot(Xk, x_weights)\n if norm_y_weights:\n y_ss = 1\n else:\n y_ss = np.dot(y_weights, y_weights)\n y_scores = np.dot(Yk, y_weights) / y_ss\n\n # Deflation: subtract rank-one approx to obtain Xk+1 and Yk+1\n x_loadings = np.dot(x_scores, Xk) / np.dot(x_scores, x_scores)\n Xk -= np.outer(x_scores, x_loadings)\n\n if self.deflation_mode == \"canonical\":\n # regress Yk on y_score\n y_loadings = np.dot(y_scores, Yk) / np.dot(y_scores, y_scores)\n Yk -= np.outer(y_scores, y_loadings)\n if self.deflation_mode == \"regression\":\n # regress Yk on x_score\n y_loadings = np.dot(x_scores, Yk) / np.dot(x_scores, x_scores)\n Yk -= np.outer(x_scores, y_loadings)\n\n self.x_weights_[:, k] = x_weights\n self.y_weights_[:, k] = y_weights\n self._x_scores[:, k] = x_scores\n self._y_scores[:, k] = y_scores\n self.x_loadings_[:, k] = x_loadings\n self.y_loadings_[:, k] = y_loadings\n\n # X was approximated as Xi . Gamma.T + X_(R+1)\n # Xi . Gamma.T is a sum of n_components rank-1 matrices. X_(R+1) is\n # whatever is left to fully reconstruct X, and can be 0 if X is of rank\n # n_components.\n # Similiarly, Y was approximated as Omega . Delta.T + Y_(R+1)\n\n # Compute transformation matrices (rotations_). See User Guide.\n self.x_rotations_ = np.dot(\n self.x_weights_,\n pinv2(np.dot(self.x_loadings_.T, self.x_weights_),\n check_finite=False))\n self.y_rotations_ = np.dot(\n self.y_weights_, pinv2(np.dot(self.y_loadings_.T, self.y_weights_),\n check_finite=False))\n\n self.coef_ = np.dot(self.x_rotations_, self.y_loadings_.T)\n self.coef_ = self.coef_ * self._y_std\n return self" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/fit_transform", + "name": "fit_transform", + "qname": "sklearn.cross_decomposition._pls._PLS.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/fit_transform/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls._PLS.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/fit_transform/X", + "name": "X", + "qname": "sklearn.cross_decomposition._pls._PLS.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vectors, where n_samples is the number of samples and\nn_features is the number of predictors." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/fit_transform/y", + "name": "y", + "qname": "sklearn.cross_decomposition._pls._PLS.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_targets)", + "default_value": "None", + "description": "Target vectors, where n_samples is the number of samples and\nn_targets is the number of response variables." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_targets)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Learn and apply the dimension reduction on the train data.", + "docstring": "Learn and apply the dimension reduction on the train data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of predictors.\n\ny : array-like of shape (n_samples, n_targets), default=None\n Target vectors, where n_samples is the number of samples and\n n_targets is the number of response variables.\n\nReturns\n-------\nx_scores if Y is not given, (x_scores, y_scores) otherwise.", + "code": " def fit_transform(self, X, y=None):\n \"\"\"Learn and apply the dimension reduction on the train data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of predictors.\n\n y : array-like of shape (n_samples, n_targets), default=None\n Target vectors, where n_samples is the number of samples and\n n_targets is the number of response variables.\n\n Returns\n -------\n x_scores if Y is not given, (x_scores, y_scores) otherwise.\n \"\"\"\n return self.fit(X, y).transform(X, y)" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.cross_decomposition._pls._PLS.inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/inverse_transform/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls._PLS.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/inverse_transform/X", + "name": "X", + "qname": "sklearn.cross_decomposition._pls._PLS.inverse_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_components)", + "default_value": "", + "description": "New data, where `n_samples` is the number of samples\nand `n_components` is the number of pls components." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_components)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform data back to its original space.", + "docstring": "Transform data back to its original space.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_components)\n New data, where `n_samples` is the number of samples\n and `n_components` is the number of pls components.\n\nReturns\n-------\nx_reconstructed : array-like of shape (n_samples, n_features)\n\nNotes\n-----\nThis transformation will only be exact if `n_components=n_features`.", + "code": " def inverse_transform(self, X):\n \"\"\"Transform data back to its original space.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_components)\n New data, where `n_samples` is the number of samples\n and `n_components` is the number of pls components.\n\n Returns\n -------\n x_reconstructed : array-like of shape (n_samples, n_features)\n\n Notes\n -----\n This transformation will only be exact if `n_components=n_features`.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, dtype=FLOAT_DTYPES)\n # From pls space to original space\n X_reconstructed = np.matmul(X, self.x_loadings_.T)\n\n # Denormalize\n X_reconstructed *= self._x_std\n X_reconstructed += self._x_mean\n return X_reconstructed" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/norm_y_weights@getter", + "name": "norm_y_weights", + "qname": "sklearn.cross_decomposition._pls._PLS.norm_y_weights", + "decorators": [ + "deprecated('Attribute norm_y_weights was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/norm_y_weights/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls._PLS.norm_y_weights.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated( # type: ignore\n \"Attribute norm_y_weights was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26).\")\n @property\n def norm_y_weights(self):\n return self._norm_y_weights" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/predict", + "name": "predict", + "qname": "sklearn.cross_decomposition._pls._PLS.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/predict/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls._PLS.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/predict/X", + "name": "X", + "qname": "sklearn.cross_decomposition._pls._PLS.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/predict/copy", + "name": "copy", + "qname": "sklearn.cross_decomposition._pls._PLS.predict.copy", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to copy `X` and `Y`, or perform in-place normalization." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict targets of given samples.", + "docstring": "Predict targets of given samples.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Samples.\n\ncopy : bool, default=True\n Whether to copy `X` and `Y`, or perform in-place normalization.\n\nNotes\n-----\nThis call requires the estimation of a matrix of shape\n`(n_features, n_targets)`, which may be an issue in high dimensional\nspace.", + "code": " def predict(self, X, copy=True):\n \"\"\"Predict targets of given samples.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Samples.\n\n copy : bool, default=True\n Whether to copy `X` and `Y`, or perform in-place normalization.\n\n Notes\n -----\n This call requires the estimation of a matrix of shape\n `(n_features, n_targets)`, which may be an issue in high dimensional\n space.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, copy=copy, dtype=FLOAT_DTYPES)\n # Normalize\n X -= self._x_mean\n X /= self._x_std\n Ypred = np.dot(X, self.coef_)\n return Ypred + self._y_mean" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/transform", + "name": "transform", + "qname": "sklearn.cross_decomposition._pls._PLS.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/transform/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls._PLS.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/transform/X", + "name": "X", + "qname": "sklearn.cross_decomposition._pls._PLS.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Samples to transform." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/transform/Y", + "name": "Y", + "qname": "sklearn.cross_decomposition._pls._PLS.transform.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_targets)", + "default_value": "None", + "description": "Target vectors." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_targets)" + } + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/transform/copy", + "name": "copy", + "qname": "sklearn.cross_decomposition._pls._PLS.transform.copy", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to copy `X` and `Y`, or perform in-place normalization." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Apply the dimension reduction.", + "docstring": "Apply the dimension reduction.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Samples to transform.\n\nY : array-like of shape (n_samples, n_targets), default=None\n Target vectors.\n\ncopy : bool, default=True\n Whether to copy `X` and `Y`, or perform in-place normalization.\n\nReturns\n-------\n`x_scores` if `Y` is not given, `(x_scores, y_scores)` otherwise.", + "code": " def transform(self, X, Y=None, copy=True):\n \"\"\"Apply the dimension reduction.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Samples to transform.\n\n Y : array-like of shape (n_samples, n_targets), default=None\n Target vectors.\n\n copy : bool, default=True\n Whether to copy `X` and `Y`, or perform in-place normalization.\n\n Returns\n -------\n `x_scores` if `Y` is not given, `(x_scores, y_scores)` otherwise.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, copy=copy, dtype=FLOAT_DTYPES)\n # Normalize\n X -= self._x_mean\n X /= self._x_std\n # Apply rotation\n x_scores = np.dot(X, self.x_rotations_)\n if Y is not None:\n Y = check_array(Y, ensure_2d=False, copy=copy, dtype=FLOAT_DTYPES)\n if Y.ndim == 1:\n Y = Y.reshape(-1, 1)\n Y -= self._y_mean\n Y /= self._y_std\n y_scores = np.dot(Y, self.y_rotations_)\n return x_scores, y_scores\n\n return x_scores" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/x_mean_@getter", + "name": "x_mean_", + "qname": "sklearn.cross_decomposition._pls._PLS.x_mean_", + "decorators": [ + "deprecated('Attribute x_mean_ was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/x_mean_/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls._PLS.x_mean_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated( # type: ignore\n \"Attribute x_mean_ was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26).\")\n @property\n def x_mean_(self):\n return self._x_mean" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/x_scores_@getter", + "name": "x_scores_", + "qname": "sklearn.cross_decomposition._pls._PLS.x_scores_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/x_scores_/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls._PLS.x_scores_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def x_scores_(self):\n # TODO: raise error in 1.1 instead\n if not isinstance(self, PLSRegression):\n pass\n warnings.warn(\n \"Attribute x_scores_ was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26). Use \"\n \"est.transform(X) on the training data instead.\",\n FutureWarning\n )\n return self._x_scores" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/x_std_@getter", + "name": "x_std_", + "qname": "sklearn.cross_decomposition._pls._PLS.x_std_", + "decorators": [ + "deprecated('Attribute x_std_ was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/x_std_/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls._PLS.x_std_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated( # type: ignore\n \"Attribute x_std_ was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26).\")\n @property\n def x_std_(self):\n return self._x_std" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/y_mean_@getter", + "name": "y_mean_", + "qname": "sklearn.cross_decomposition._pls._PLS.y_mean_", + "decorators": [ + "deprecated('Attribute y_mean_ was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/y_mean_/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls._PLS.y_mean_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated( # type: ignore\n \"Attribute y_mean_ was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26).\")\n @property\n def y_mean_(self):\n return self._y_mean" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/y_scores_@getter", + "name": "y_scores_", + "qname": "sklearn.cross_decomposition._pls._PLS.y_scores_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/y_scores_/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls._PLS.y_scores_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def y_scores_(self):\n # TODO: raise error in 1.1 instead\n if not isinstance(self, PLSRegression):\n warnings.warn(\n \"Attribute y_scores_ was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26). Use \"\n \"est.transform(X) on the training data instead.\",\n FutureWarning\n )\n return self._y_scores" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/y_std_@getter", + "name": "y_std_", + "qname": "sklearn.cross_decomposition._pls._PLS.y_std_", + "decorators": [ + "deprecated('Attribute y_std_ was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_PLS/y_std_/self", + "name": "self", + "qname": "sklearn.cross_decomposition._pls._PLS.y_std_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated( # type: ignore\n \"Attribute y_std_ was deprecated in version 0.24 and \"\n \"will be removed in 1.1 (renaming of 0.26).\")\n @property\n def y_std_(self):\n return self._y_std" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_center_scale_xy", + "name": "_center_scale_xy", + "qname": "sklearn.cross_decomposition._pls._center_scale_xy", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_center_scale_xy/X", + "name": "X", + "qname": "sklearn.cross_decomposition._pls._center_scale_xy.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_center_scale_xy/Y", + "name": "Y", + "qname": "sklearn.cross_decomposition._pls._center_scale_xy.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_center_scale_xy/scale", + "name": "scale", + "qname": "sklearn.cross_decomposition._pls._center_scale_xy.scale", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Center X, Y and scale if the scale parameter==True", + "docstring": "Center X, Y and scale if the scale parameter==True\n\nReturns\n-------\n X, Y, x_mean, y_mean, x_std, y_std", + "code": "def _center_scale_xy(X, Y, scale=True):\n \"\"\" Center X, Y and scale if the scale parameter==True\n\n Returns\n -------\n X, Y, x_mean, y_mean, x_std, y_std\n \"\"\"\n # center\n x_mean = X.mean(axis=0)\n X -= x_mean\n y_mean = Y.mean(axis=0)\n Y -= y_mean\n # scale\n if scale:\n x_std = X.std(axis=0, ddof=1)\n x_std[x_std == 0.0] = 1.0\n X /= x_std\n y_std = Y.std(axis=0, ddof=1)\n y_std[y_std == 0.0] = 1.0\n Y /= y_std\n else:\n x_std = np.ones(X.shape[1])\n y_std = np.ones(Y.shape[1])\n return X, Y, x_mean, y_mean, x_std, y_std" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_get_first_singular_vectors_power_method", + "name": "_get_first_singular_vectors_power_method", + "qname": "sklearn.cross_decomposition._pls._get_first_singular_vectors_power_method", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_get_first_singular_vectors_power_method/X", + "name": "X", + "qname": "sklearn.cross_decomposition._pls._get_first_singular_vectors_power_method.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_get_first_singular_vectors_power_method/Y", + "name": "Y", + "qname": "sklearn.cross_decomposition._pls._get_first_singular_vectors_power_method.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_get_first_singular_vectors_power_method/mode", + "name": "mode", + "qname": "sklearn.cross_decomposition._pls._get_first_singular_vectors_power_method.mode", + "default_value": "'A'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_get_first_singular_vectors_power_method/max_iter", + "name": "max_iter", + "qname": "sklearn.cross_decomposition._pls._get_first_singular_vectors_power_method.max_iter", + "default_value": "500", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_get_first_singular_vectors_power_method/tol", + "name": "tol", + "qname": "sklearn.cross_decomposition._pls._get_first_singular_vectors_power_method.tol", + "default_value": "1e-06", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_get_first_singular_vectors_power_method/norm_y_weights", + "name": "norm_y_weights", + "qname": "sklearn.cross_decomposition._pls._get_first_singular_vectors_power_method.norm_y_weights", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the first left and right singular vectors of X'Y.\n\nProvides an alternative to the svd(X'Y) and uses the power method instead.\nWith norm_y_weights to True and in mode A, this corresponds to the\nalgorithm section 11.3 of the Wegelin's review, except this starts at the\n\"update saliences\" part.", + "docstring": "Return the first left and right singular vectors of X'Y.\n\nProvides an alternative to the svd(X'Y) and uses the power method instead.\nWith norm_y_weights to True and in mode A, this corresponds to the\nalgorithm section 11.3 of the Wegelin's review, except this starts at the\n\"update saliences\" part.", + "code": "def _get_first_singular_vectors_power_method(X, Y, mode=\"A\", max_iter=500,\n tol=1e-06, norm_y_weights=False):\n \"\"\"Return the first left and right singular vectors of X'Y.\n\n Provides an alternative to the svd(X'Y) and uses the power method instead.\n With norm_y_weights to True and in mode A, this corresponds to the\n algorithm section 11.3 of the Wegelin's review, except this starts at the\n \"update saliences\" part.\n \"\"\"\n\n eps = np.finfo(X.dtype).eps\n try:\n y_score = next(col for col in Y.T if np.any(np.abs(col) > eps))\n except StopIteration as e:\n raise StopIteration(\"Y residual is constant\") from e\n\n x_weights_old = 100 # init to big value for first convergence check\n\n if mode == 'B':\n # Precompute pseudo inverse matrices\n # Basically: X_pinv = (X.T X)^-1 X.T\n # Which requires inverting a (n_features, n_features) matrix.\n # As a result, and as detailed in the Wegelin's review, CCA (i.e. mode\n # B) will be unstable if n_features > n_samples or n_targets >\n # n_samples\n X_pinv, Y_pinv = _pinv2_old(X), _pinv2_old(Y)\n\n for i in range(max_iter):\n if mode == \"B\":\n x_weights = np.dot(X_pinv, y_score)\n else:\n x_weights = np.dot(X.T, y_score) / np.dot(y_score, y_score)\n\n x_weights /= np.sqrt(np.dot(x_weights, x_weights)) + eps\n x_score = np.dot(X, x_weights)\n\n if mode == \"B\":\n y_weights = np.dot(Y_pinv, x_score)\n else:\n y_weights = np.dot(Y.T, x_score) / np.dot(x_score.T, x_score)\n\n if norm_y_weights:\n y_weights /= np.sqrt(np.dot(y_weights, y_weights)) + eps\n\n y_score = np.dot(Y, y_weights) / (np.dot(y_weights, y_weights) + eps)\n\n x_weights_diff = x_weights - x_weights_old\n if np.dot(x_weights_diff, x_weights_diff) < tol or Y.shape[1] == 1:\n break\n x_weights_old = x_weights\n\n n_iter = i + 1\n if n_iter == max_iter:\n warnings.warn('Maximum number of iterations reached',\n ConvergenceWarning)\n\n return x_weights, y_weights, n_iter" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_get_first_singular_vectors_svd", + "name": "_get_first_singular_vectors_svd", + "qname": "sklearn.cross_decomposition._pls._get_first_singular_vectors_svd", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_get_first_singular_vectors_svd/X", + "name": "X", + "qname": "sklearn.cross_decomposition._pls._get_first_singular_vectors_svd.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_get_first_singular_vectors_svd/Y", + "name": "Y", + "qname": "sklearn.cross_decomposition._pls._get_first_singular_vectors_svd.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the first left and right singular vectors of X'Y.\n\nHere the whole SVD is computed.", + "docstring": "Return the first left and right singular vectors of X'Y.\n\nHere the whole SVD is computed.", + "code": "def _get_first_singular_vectors_svd(X, Y):\n \"\"\"Return the first left and right singular vectors of X'Y.\n\n Here the whole SVD is computed.\n \"\"\"\n C = np.dot(X.T, Y)\n U, _, Vt = svd(C, full_matrices=False)\n return U[:, 0], Vt[0, :]" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_pinv2_old", + "name": "_pinv2_old", + "qname": "sklearn.cross_decomposition._pls._pinv2_old", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_pinv2_old/a", + "name": "a", + "qname": "sklearn.cross_decomposition._pls._pinv2_old.a", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _pinv2_old(a):\n # Used previous scipy pinv2 that was updated in:\n # https://github.com/scipy/scipy/pull/10067\n # We can not set `cond` or `rcond` for pinv2 in scipy >= 1.3 to keep the\n # same behavior of pinv2 for scipy < 1.3, because the condition used to\n # determine the rank is dependent on the output of svd.\n u, s, vh = svd(a, full_matrices=False, check_finite=False)\n\n t = u.dtype.char.lower()\n factor = {'f': 1E3, 'd': 1E6}\n cond = np.max(s) * factor[t] * np.finfo(t).eps\n rank = np.sum(s > cond)\n\n u = u[:, :rank]\n u /= s[:rank]\n return np.transpose(np.conjugate(np.dot(u, vh[:rank])))" + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_svd_flip_1d", + "name": "_svd_flip_1d", + "qname": "sklearn.cross_decomposition._pls._svd_flip_1d", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_svd_flip_1d/u", + "name": "u", + "qname": "sklearn.cross_decomposition._pls._svd_flip_1d.u", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.cross_decomposition._pls/_svd_flip_1d/v", + "name": "v", + "qname": "sklearn.cross_decomposition._pls._svd_flip_1d.v", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Same as svd_flip but works on 1d arrays, and is inplace", + "docstring": "Same as svd_flip but works on 1d arrays, and is inplace", + "code": "def _svd_flip_1d(u, v):\n \"\"\"Same as svd_flip but works on 1d arrays, and is inplace\"\"\"\n # svd_flip would force us to convert to 2d array and would also return 2d\n # arrays. We don't want that.\n biggest_abs_val_idx = np.argmax(np.abs(u))\n sign = np.sign(u[biggest_abs_val_idx])\n u *= sign\n v *= sign" + }, + { + "id": "scikit-learn/sklearn.datasets._base/_convert_data_dataframe", + "name": "_convert_data_dataframe", + "qname": "sklearn.datasets._base._convert_data_dataframe", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._base/_convert_data_dataframe/caller_name", + "name": "caller_name", + "qname": "sklearn.datasets._base._convert_data_dataframe.caller_name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._base/_convert_data_dataframe/data", + "name": "data", + "qname": "sklearn.datasets._base._convert_data_dataframe.data", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._base/_convert_data_dataframe/target", + "name": "target", + "qname": "sklearn.datasets._base._convert_data_dataframe.target", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._base/_convert_data_dataframe/feature_names", + "name": "feature_names", + "qname": "sklearn.datasets._base._convert_data_dataframe.feature_names", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._base/_convert_data_dataframe/target_names", + "name": "target_names", + "qname": "sklearn.datasets._base._convert_data_dataframe.target_names", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._base/_convert_data_dataframe/sparse_data", + "name": "sparse_data", + "qname": "sklearn.datasets._base._convert_data_dataframe.sparse_data", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _convert_data_dataframe(caller_name, data, target,\n feature_names, target_names, sparse_data=False):\n pd = check_pandas_support('{} with as_frame=True'.format(caller_name))\n if not sparse_data:\n data_df = pd.DataFrame(data, columns=feature_names)\n else:\n data_df = pd.DataFrame.sparse.from_spmatrix(\n data, columns=feature_names\n )\n\n target_df = pd.DataFrame(target, columns=target_names)\n combined_df = pd.concat([data_df, target_df], axis=1)\n X = combined_df[feature_names]\n y = combined_df[target_names]\n if y.shape[1] == 1:\n y = y.iloc[:, 0]\n return combined_df, X, y" + }, + { + "id": "scikit-learn/sklearn.datasets._base/_fetch_remote", + "name": "_fetch_remote", + "qname": "sklearn.datasets._base._fetch_remote", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._base/_fetch_remote/remote", + "name": "remote", + "qname": "sklearn.datasets._base._fetch_remote.remote", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "RemoteFileMetadata", + "default_value": "", + "description": "Named tuple containing remote dataset meta information: url, filename\nand checksum" + }, + "type": { + "kind": "NamedType", + "name": "RemoteFileMetadata" + } + }, + { + "id": "scikit-learn/sklearn.datasets._base/_fetch_remote/dirname", + "name": "dirname", + "qname": "sklearn.datasets._base._fetch_remote.dirname", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "string", + "default_value": "", + "description": "Directory to save the file to." + }, + "type": { + "kind": "NamedType", + "name": "string" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Helper function to download a remote dataset into path\n\nFetch a dataset pointed by remote's url, save into path using remote's\nfilename and ensure its integrity based on the SHA256 Checksum of the\ndownloaded file.", + "docstring": "Helper function to download a remote dataset into path\n\nFetch a dataset pointed by remote's url, save into path using remote's\nfilename and ensure its integrity based on the SHA256 Checksum of the\ndownloaded file.\n\nParameters\n----------\nremote : RemoteFileMetadata\n Named tuple containing remote dataset meta information: url, filename\n and checksum\n\ndirname : string\n Directory to save the file to.\n\nReturns\n-------\nfile_path: string\n Full path of the created file.", + "code": "def _fetch_remote(remote, dirname=None):\n \"\"\"Helper function to download a remote dataset into path\n\n Fetch a dataset pointed by remote's url, save into path using remote's\n filename and ensure its integrity based on the SHA256 Checksum of the\n downloaded file.\n\n Parameters\n ----------\n remote : RemoteFileMetadata\n Named tuple containing remote dataset meta information: url, filename\n and checksum\n\n dirname : string\n Directory to save the file to.\n\n Returns\n -------\n file_path: string\n Full path of the created file.\n \"\"\"\n\n file_path = (remote.filename if dirname is None\n else join(dirname, remote.filename))\n urlretrieve(remote.url, file_path)\n checksum = _sha256(file_path)\n if remote.checksum != checksum:\n raise IOError(\"{} has an SHA256 checksum ({}) \"\n \"differing from expected ({}), \"\n \"file may be corrupted.\".format(file_path, checksum,\n remote.checksum))\n return file_path" + }, + { + "id": "scikit-learn/sklearn.datasets._base/_pkl_filepath", + "name": "_pkl_filepath", + "qname": "sklearn.datasets._base._pkl_filepath", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._base/_pkl_filepath/args", + "name": "args", + "qname": "sklearn.datasets._base._pkl_filepath.args", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._base/_pkl_filepath/kwargs", + "name": "kwargs", + "qname": "sklearn.datasets._base._pkl_filepath.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return filename for Python 3 pickles\n\nargs[-1] is expected to be the \".pkl\" filename. For compatibility with\nolder scikit-learn versions, a suffix is inserted before the extension.\n\n_pkl_filepath('/path/to/folder', 'filename.pkl') returns\n'/path/to/folder/filename_py3.pkl'", + "docstring": "Return filename for Python 3 pickles\n\nargs[-1] is expected to be the \".pkl\" filename. For compatibility with\nolder scikit-learn versions, a suffix is inserted before the extension.\n\n_pkl_filepath('/path/to/folder', 'filename.pkl') returns\n'/path/to/folder/filename_py3.pkl'", + "code": "def _pkl_filepath(*args, **kwargs):\n \"\"\"Return filename for Python 3 pickles\n\n args[-1] is expected to be the \".pkl\" filename. For compatibility with\n older scikit-learn versions, a suffix is inserted before the extension.\n\n _pkl_filepath('/path/to/folder', 'filename.pkl') returns\n '/path/to/folder/filename_py3.pkl'\n\n \"\"\"\n py3_suffix = kwargs.get(\"py3_suffix\", \"_py3\")\n basename, ext = splitext(args[-1])\n basename += py3_suffix\n new_args = args[:-1] + (basename + ext,)\n return join(*new_args)" + }, + { + "id": "scikit-learn/sklearn.datasets._base/_sha256", + "name": "_sha256", + "qname": "sklearn.datasets._base._sha256", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._base/_sha256/path", + "name": "path", + "qname": "sklearn.datasets._base._sha256.path", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Calculate the sha256 hash of the file at path.", + "docstring": "Calculate the sha256 hash of the file at path.", + "code": "def _sha256(path):\n \"\"\"Calculate the sha256 hash of the file at path.\"\"\"\n sha256hash = hashlib.sha256()\n chunk_size = 8192\n with open(path, \"rb\") as f:\n while True:\n buffer = f.read(chunk_size)\n if not buffer:\n break\n sha256hash.update(buffer)\n return sha256hash.hexdigest()" + }, + { + "id": "scikit-learn/sklearn.datasets._base/clear_data_home", + "name": "clear_data_home", + "qname": "sklearn.datasets._base.clear_data_home", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._base/clear_data_home/data_home", + "name": "data_home", + "qname": "sklearn.datasets._base.clear_data_home.data_home", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "The path to scikit-learn data directory. If `None`, the default path\nis `~/sklearn_learn_data`." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Delete all the content of the data home cache.", + "docstring": "Delete all the content of the data home cache.\n\nParameters\n----------\ndata_home : str, default=None\n The path to scikit-learn data directory. If `None`, the default path\n is `~/sklearn_learn_data`.", + "code": "def clear_data_home(data_home=None):\n \"\"\"Delete all the content of the data home cache.\n\n Parameters\n ----------\n data_home : str, default=None\n The path to scikit-learn data directory. If `None`, the default path\n is `~/sklearn_learn_data`.\n \"\"\"\n data_home = get_data_home(data_home)\n shutil.rmtree(data_home)" + }, + { + "id": "scikit-learn/sklearn.datasets._base/get_data_home", + "name": "get_data_home", + "qname": "sklearn.datasets._base.get_data_home", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._base/get_data_home/data_home", + "name": "data_home", + "qname": "sklearn.datasets._base.get_data_home.data_home", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "The path to scikit-learn data directory. If `None`, the default path\nis `~/sklearn_learn_data`." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the path of the scikit-learn data dir.\n\nThis folder is used by some large dataset loaders to avoid downloading the\ndata several times.\n\nBy default the data dir is set to a folder named 'scikit_learn_data' in the\nuser home folder.\n\nAlternatively, it can be set by the 'SCIKIT_LEARN_DATA' environment\nvariable or programmatically by giving an explicit folder path. The '~'\nsymbol is expanded to the user home folder.\n\nIf the folder does not already exist, it is automatically created.", + "docstring": "Return the path of the scikit-learn data dir.\n\nThis folder is used by some large dataset loaders to avoid downloading the\ndata several times.\n\nBy default the data dir is set to a folder named 'scikit_learn_data' in the\nuser home folder.\n\nAlternatively, it can be set by the 'SCIKIT_LEARN_DATA' environment\nvariable or programmatically by giving an explicit folder path. The '~'\nsymbol is expanded to the user home folder.\n\nIf the folder does not already exist, it is automatically created.\n\nParameters\n----------\ndata_home : str, default=None\n The path to scikit-learn data directory. If `None`, the default path\n is `~/sklearn_learn_data`.", + "code": "def get_data_home(data_home=None) -> str:\n \"\"\"Return the path of the scikit-learn data dir.\n\n This folder is used by some large dataset loaders to avoid downloading the\n data several times.\n\n By default the data dir is set to a folder named 'scikit_learn_data' in the\n user home folder.\n\n Alternatively, it can be set by the 'SCIKIT_LEARN_DATA' environment\n variable or programmatically by giving an explicit folder path. The '~'\n symbol is expanded to the user home folder.\n\n If the folder does not already exist, it is automatically created.\n\n Parameters\n ----------\n data_home : str, default=None\n The path to scikit-learn data directory. If `None`, the default path\n is `~/sklearn_learn_data`.\n \"\"\"\n if data_home is None:\n data_home = environ.get('SCIKIT_LEARN_DATA',\n join('~', 'scikit_learn_data'))\n data_home = expanduser(data_home)\n makedirs(data_home, exist_ok=True)\n return data_home" + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_boston", + "name": "load_boston", + "qname": "sklearn.datasets._base.load_boston", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._base/load_boston/return_X_y", + "name": "return_X_y", + "qname": "sklearn.datasets._base.load_boston.return_X_y", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, returns ``(data, target)`` instead of a Bunch object.\nSee below for more information about the `data` and `target` object.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load and return the boston house-prices dataset (regression).\n\n============== ==============\nSamples total 506\nDimensionality 13\nFeatures real, positive\nTargets real 5. - 50.\n============== ==============\n\nRead more in the :ref:`User Guide `.", + "docstring": "Load and return the boston house-prices dataset (regression).\n\n============== ==============\nSamples total 506\nDimensionality 13\nFeatures real, positive\nTargets real 5. - 50.\n============== ==============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray of shape (506, 13)\n The data matrix.\n target : ndarray of shape (506, )\n The regression target.\n filename : str\n The physical location of boston csv dataset.\n\n .. versionadded:: 0.20\n\n DESCR : str\n The full description of the dataset.\n feature_names : ndarray\n The names of features\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18\n\nNotes\n-----\n .. versionchanged:: 0.20\n Fixed a wrong data point at [445, 0].\n\nExamples\n--------\n>>> from sklearn.datasets import load_boston\n>>> X, y = load_boston(return_X_y=True)\n>>> print(X.shape)\n(506, 13)", + "code": "@_deprecate_positional_args\ndef load_boston(*, return_X_y=False):\n \"\"\"Load and return the boston house-prices dataset (regression).\n\n ============== ==============\n Samples total 506\n Dimensionality 13\n Features real, positive\n Targets real 5. - 50.\n ============== ==============\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n return_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\n Returns\n -------\n data : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray of shape (506, 13)\n The data matrix.\n target : ndarray of shape (506, )\n The regression target.\n filename : str\n The physical location of boston csv dataset.\n\n .. versionadded:: 0.20\n\n DESCR : str\n The full description of the dataset.\n feature_names : ndarray\n The names of features\n\n (data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18\n\n Notes\n -----\n .. versionchanged:: 0.20\n Fixed a wrong data point at [445, 0].\n\n Examples\n --------\n >>> from sklearn.datasets import load_boston\n >>> X, y = load_boston(return_X_y=True)\n >>> print(X.shape)\n (506, 13)\n \"\"\"\n module_path = dirname(__file__)\n\n fdescr_name = join(module_path, 'descr', 'boston_house_prices.rst')\n with open(fdescr_name) as f:\n descr_text = f.read()\n\n data_file_name = join(module_path, 'data', 'boston_house_prices.csv')\n with open(data_file_name) as f:\n data_file = csv.reader(f)\n temp = next(data_file)\n n_samples = int(temp[0])\n n_features = int(temp[1])\n data = np.empty((n_samples, n_features))\n target = np.empty((n_samples,))\n temp = next(data_file) # names of features\n feature_names = np.array(temp)\n\n for i, d in enumerate(data_file):\n data[i] = np.asarray(d[:-1], dtype=np.float64)\n target[i] = np.asarray(d[-1], dtype=np.float64)\n\n if return_X_y:\n return data, target\n\n return Bunch(data=data,\n target=target,\n # last column is target value\n feature_names=feature_names[:-1],\n DESCR=descr_text,\n filename=data_file_name)" + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_breast_cancer", + "name": "load_breast_cancer", + "qname": "sklearn.datasets._base.load_breast_cancer", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._base/load_breast_cancer/return_X_y", + "name": "return_X_y", + "qname": "sklearn.datasets._base.load_breast_cancer.return_X_y", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, returns ``(data, target)`` instead of a Bunch object.\nSee below for more information about the `data` and `target` object.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_breast_cancer/as_frame", + "name": "as_frame", + "qname": "sklearn.datasets._base.load_breast_cancer.as_frame", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the data is a pandas DataFrame including columns with\nappropriate dtypes (numeric). The target is\na pandas DataFrame or Series depending on the number of target columns.\nIf `return_X_y` is True, then (`data`, `target`) will be pandas\nDataFrames or Series as described below.\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load and return the breast cancer wisconsin dataset (classification).\n\nThe breast cancer dataset is a classic and very easy binary classification\ndataset.\n\n================= ==============\nClasses 2\nSamples per class 212(M),357(B)\nSamples total 569\nDimensionality 30\nFeatures real, positive\n================= ==============\n\nRead more in the :ref:`User Guide `.", + "docstring": "Load and return the breast cancer wisconsin dataset (classification).\n\nThe breast cancer dataset is a classic and very easy binary classification\ndataset.\n\n================= ==============\nClasses 2\nSamples per class 212(M),357(B)\nSamples total 569\nDimensionality 30\nFeatures real, positive\n================= ==============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (569, 30)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, Series} of shape (569,)\n The classification target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of target classes.\n frame: DataFrame of shape (569, 31)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n filename: str\n The path to the location of the data.\n\n .. versionadded:: 0.20\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18\n\nThe copy of UCI ML Breast Cancer Wisconsin (Diagnostic) dataset is\ndownloaded from:\nhttps://goo.gl/U2Uwz2\n\nExamples\n--------\nLet's say you are interested in the samples 10, 50, and 85, and want to\nknow their class name.\n\n>>> from sklearn.datasets import load_breast_cancer\n>>> data = load_breast_cancer()\n>>> data.target[[10, 50, 85]]\narray([0, 1, 0])\n>>> list(data.target_names)\n['malignant', 'benign']", + "code": "@_deprecate_positional_args\ndef load_breast_cancer(*, return_X_y=False, as_frame=False):\n \"\"\"Load and return the breast cancer wisconsin dataset (classification).\n\n The breast cancer dataset is a classic and very easy binary classification\n dataset.\n\n ================= ==============\n Classes 2\n Samples per class 212(M),357(B)\n Samples total 569\n Dimensionality 30\n Features real, positive\n ================= ==============\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n return_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\n as_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\n Returns\n -------\n data : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (569, 30)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, Series} of shape (569,)\n The classification target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of target classes.\n frame: DataFrame of shape (569, 31)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n filename: str\n The path to the location of the data.\n\n .. versionadded:: 0.20\n\n (data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18\n\n The copy of UCI ML Breast Cancer Wisconsin (Diagnostic) dataset is\n downloaded from:\n https://goo.gl/U2Uwz2\n\n Examples\n --------\n Let's say you are interested in the samples 10, 50, and 85, and want to\n know their class name.\n\n >>> from sklearn.datasets import load_breast_cancer\n >>> data = load_breast_cancer()\n >>> data.target[[10, 50, 85]]\n array([0, 1, 0])\n >>> list(data.target_names)\n ['malignant', 'benign']\n \"\"\"\n module_path = dirname(__file__)\n data, target, target_names = load_data(module_path, 'breast_cancer.csv')\n csv_filename = join(module_path, 'data', 'breast_cancer.csv')\n\n with open(join(module_path, 'descr', 'breast_cancer.rst')) as rst_file:\n fdescr = rst_file.read()\n\n feature_names = np.array(['mean radius', 'mean texture',\n 'mean perimeter', 'mean area',\n 'mean smoothness', 'mean compactness',\n 'mean concavity', 'mean concave points',\n 'mean symmetry', 'mean fractal dimension',\n 'radius error', 'texture error',\n 'perimeter error', 'area error',\n 'smoothness error', 'compactness error',\n 'concavity error', 'concave points error',\n 'symmetry error', 'fractal dimension error',\n 'worst radius', 'worst texture',\n 'worst perimeter', 'worst area',\n 'worst smoothness', 'worst compactness',\n 'worst concavity', 'worst concave points',\n 'worst symmetry', 'worst fractal dimension'])\n\n frame = None\n target_columns = ['target', ]\n if as_frame:\n frame, data, target = _convert_data_dataframe(\"load_breast_cancer\",\n data,\n target,\n feature_names,\n target_columns)\n\n if return_X_y:\n return data, target\n\n return Bunch(data=data,\n target=target,\n frame=frame,\n target_names=target_names,\n DESCR=fdescr,\n feature_names=feature_names,\n filename=csv_filename)" + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_data", + "name": "load_data", + "qname": "sklearn.datasets._base.load_data", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._base/load_data/module_path", + "name": "module_path", + "qname": "sklearn.datasets._base.load_data.module_path", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "string", + "default_value": "", + "description": "The module path." + }, + "type": { + "kind": "NamedType", + "name": "string" + } + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_data/data_file_name", + "name": "data_file_name", + "qname": "sklearn.datasets._base.load_data.data_file_name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "string", + "default_value": "", + "description": "Name of csv file to be loaded from\nmodule_path/data/data_file_name. For example 'wine_data.csv'." + }, + "type": { + "kind": "NamedType", + "name": "string" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Loads data from module_path/data/data_file_name.", + "docstring": "Loads data from module_path/data/data_file_name.\n\nParameters\n----------\nmodule_path : string\n The module path.\n\ndata_file_name : string\n Name of csv file to be loaded from\n module_path/data/data_file_name. For example 'wine_data.csv'.\n\nReturns\n-------\ndata : Numpy array\n A 2D array with each row representing one sample and each column\n representing the features of a given sample.\n\ntarget : Numpy array\n A 1D array holding target variables for all the samples in `data.\n For example target[0] is the target varible for data[0].\n\ntarget_names : Numpy array\n A 1D array containing the names of the classifications. For example\n target_names[0] is the name of the target[0] class.", + "code": "def load_data(module_path, data_file_name):\n \"\"\"Loads data from module_path/data/data_file_name.\n\n Parameters\n ----------\n module_path : string\n The module path.\n\n data_file_name : string\n Name of csv file to be loaded from\n module_path/data/data_file_name. For example 'wine_data.csv'.\n\n Returns\n -------\n data : Numpy array\n A 2D array with each row representing one sample and each column\n representing the features of a given sample.\n\n target : Numpy array\n A 1D array holding target variables for all the samples in `data.\n For example target[0] is the target varible for data[0].\n\n target_names : Numpy array\n A 1D array containing the names of the classifications. For example\n target_names[0] is the name of the target[0] class.\n \"\"\"\n with open(join(module_path, 'data', data_file_name)) as csv_file:\n data_file = csv.reader(csv_file)\n temp = next(data_file)\n n_samples = int(temp[0])\n n_features = int(temp[1])\n target_names = np.array(temp[2:])\n data = np.empty((n_samples, n_features))\n target = np.empty((n_samples,), dtype=int)\n\n for i, ir in enumerate(data_file):\n data[i] = np.asarray(ir[:-1], dtype=np.float64)\n target[i] = np.asarray(ir[-1], dtype=int)\n\n return data, target, target_names" + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_diabetes", + "name": "load_diabetes", + "qname": "sklearn.datasets._base.load_diabetes", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._base/load_diabetes/return_X_y", + "name": "return_X_y", + "qname": "sklearn.datasets._base.load_diabetes.return_X_y", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False.", + "description": "If True, returns ``(data, target)`` instead of a Bunch object.\nSee below for more information about the `data` and `target` object.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_diabetes/as_frame", + "name": "as_frame", + "qname": "sklearn.datasets._base.load_diabetes.as_frame", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the data is a pandas DataFrame including columns with\nappropriate dtypes (numeric). The target is\na pandas DataFrame or Series depending on the number of target columns.\nIf `return_X_y` is True, then (`data`, `target`) will be pandas\nDataFrames or Series as described below.\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load and return the diabetes dataset (regression).\n\n============== ==================\nSamples total 442\nDimensionality 10\nFeatures real, -.2 < x < .2\nTargets integer 25 - 346\n============== ==================\n\n.. note::\n The meaning of each feature (i.e. `feature_names`) might be unclear\n (especially for `ltg`) as the documentation of the original dataset is\n not explicit. We provide information that seems correct in regard with\n the scientific literature in this field of research.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Load and return the diabetes dataset (regression).\n\n============== ==================\nSamples total 442\nDimensionality 10\nFeatures real, -.2 < x < .2\nTargets integer 25 - 346\n============== ==================\n\n.. note::\n The meaning of each feature (i.e. `feature_names`) might be unclear\n (especially for `ltg`) as the documentation of the original dataset is\n not explicit. We provide information that seems correct in regard with\n the scientific literature in this field of research.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nreturn_X_y : bool, default=False.\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (442, 10)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, Series} of shape (442,)\n The regression target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n frame: DataFrame of shape (442, 11)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n data_filename: str\n The path to the location of the data.\n target_filename: str\n The path to the location of the target.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18", + "code": "@_deprecate_positional_args\ndef load_diabetes(*, return_X_y=False, as_frame=False):\n \"\"\"Load and return the diabetes dataset (regression).\n\n ============== ==================\n Samples total 442\n Dimensionality 10\n Features real, -.2 < x < .2\n Targets integer 25 - 346\n ============== ==================\n\n .. note::\n The meaning of each feature (i.e. `feature_names`) might be unclear\n (especially for `ltg`) as the documentation of the original dataset is\n not explicit. We provide information that seems correct in regard with\n the scientific literature in this field of research.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n return_X_y : bool, default=False.\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\n as_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\n Returns\n -------\n data : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (442, 10)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, Series} of shape (442,)\n The regression target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n frame: DataFrame of shape (442, 11)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n data_filename: str\n The path to the location of the data.\n target_filename: str\n The path to the location of the target.\n\n (data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18\n \"\"\"\n module_path = dirname(__file__)\n base_dir = join(module_path, 'data')\n data_filename = join(base_dir, 'diabetes_data.csv.gz')\n data = np.loadtxt(data_filename)\n target_filename = join(base_dir, 'diabetes_target.csv.gz')\n target = np.loadtxt(target_filename)\n\n with open(join(module_path, 'descr', 'diabetes.rst')) as rst_file:\n fdescr = rst_file.read()\n\n feature_names = ['age', 'sex', 'bmi', 'bp',\n 's1', 's2', 's3', 's4', 's5', 's6']\n\n frame = None\n target_columns = ['target', ]\n if as_frame:\n frame, data, target = _convert_data_dataframe(\"load_diabetes\",\n data,\n target,\n feature_names,\n target_columns)\n\n if return_X_y:\n return data, target\n\n return Bunch(data=data,\n target=target,\n frame=frame,\n DESCR=fdescr,\n feature_names=feature_names,\n data_filename=data_filename,\n target_filename=target_filename)" + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_digits", + "name": "load_digits", + "qname": "sklearn.datasets._base.load_digits", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._base/load_digits/n_class", + "name": "n_class", + "qname": "sklearn.datasets._base.load_digits.n_class", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "The number of classes to return. Between 0 and 10." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_digits/return_X_y", + "name": "return_X_y", + "qname": "sklearn.datasets._base.load_digits.return_X_y", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, returns ``(data, target)`` instead of a Bunch object.\nSee below for more information about the `data` and `target` object.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_digits/as_frame", + "name": "as_frame", + "qname": "sklearn.datasets._base.load_digits.as_frame", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the data is a pandas DataFrame including columns with\nappropriate dtypes (numeric). The target is\na pandas DataFrame or Series depending on the number of target columns.\nIf `return_X_y` is True, then (`data`, `target`) will be pandas\nDataFrames or Series as described below.\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load and return the digits dataset (classification).\n\nEach datapoint is a 8x8 image of a digit.\n\n================= ==============\nClasses 10\nSamples per class ~180\nSamples total 1797\nDimensionality 64\nFeatures integers 0-16\n================= ==============\n\nRead more in the :ref:`User Guide `.", + "docstring": "Load and return the digits dataset (classification).\n\nEach datapoint is a 8x8 image of a digit.\n\n================= ==============\nClasses 10\nSamples per class ~180\nSamples total 1797\nDimensionality 64\nFeatures integers 0-16\n================= ==============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_class : int, default=10\n The number of classes to return. Between 0 and 10.\n\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (1797, 64)\n The flattened data matrix. If `as_frame=True`, `data` will be\n a pandas DataFrame.\n target: {ndarray, Series} of shape (1797,)\n The classification target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of target classes.\n\n .. versionadded:: 0.20\n\n frame: DataFrame of shape (1797, 65)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n images: {ndarray} of shape (1797, 8, 8)\n The raw image data.\n DESCR: str\n The full description of the dataset.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18\n\nThis is a copy of the test set of the UCI ML hand-written digits datasets\nhttps://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits\n\nExamples\n--------\nTo load the data and visualize the images::\n\n >>> from sklearn.datasets import load_digits\n >>> digits = load_digits()\n >>> print(digits.data.shape)\n (1797, 64)\n >>> import matplotlib.pyplot as plt #doctest: +SKIP\n >>> plt.gray() #doctest: +SKIP\n >>> plt.matshow(digits.images[0]) #doctest: +SKIP\n >>> plt.show() #doctest: +SKIP", + "code": "@_deprecate_positional_args\ndef load_digits(*, n_class=10, return_X_y=False, as_frame=False):\n \"\"\"Load and return the digits dataset (classification).\n\n Each datapoint is a 8x8 image of a digit.\n\n ================= ==============\n Classes 10\n Samples per class ~180\n Samples total 1797\n Dimensionality 64\n Features integers 0-16\n ================= ==============\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_class : int, default=10\n The number of classes to return. Between 0 and 10.\n\n return_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\n as_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\n Returns\n -------\n data : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (1797, 64)\n The flattened data matrix. If `as_frame=True`, `data` will be\n a pandas DataFrame.\n target: {ndarray, Series} of shape (1797,)\n The classification target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of target classes.\n\n .. versionadded:: 0.20\n\n frame: DataFrame of shape (1797, 65)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n images: {ndarray} of shape (1797, 8, 8)\n The raw image data.\n DESCR: str\n The full description of the dataset.\n\n (data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18\n\n This is a copy of the test set of the UCI ML hand-written digits datasets\n https://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits\n\n Examples\n --------\n To load the data and visualize the images::\n\n >>> from sklearn.datasets import load_digits\n >>> digits = load_digits()\n >>> print(digits.data.shape)\n (1797, 64)\n >>> import matplotlib.pyplot as plt #doctest: +SKIP\n >>> plt.gray() #doctest: +SKIP\n >>> plt.matshow(digits.images[0]) #doctest: +SKIP\n >>> plt.show() #doctest: +SKIP\n \"\"\"\n module_path = dirname(__file__)\n data = np.loadtxt(join(module_path, 'data', 'digits.csv.gz'),\n delimiter=',')\n with open(join(module_path, 'descr', 'digits.rst')) as f:\n descr = f.read()\n target = data[:, -1].astype(int, copy=False)\n flat_data = data[:, :-1]\n images = flat_data.view()\n images.shape = (-1, 8, 8)\n\n if n_class < 10:\n idx = target < n_class\n flat_data, target = flat_data[idx], target[idx]\n images = images[idx]\n\n feature_names = ['pixel_{}_{}'.format(row_idx, col_idx)\n for row_idx in range(8)\n for col_idx in range(8)]\n\n frame = None\n target_columns = ['target', ]\n if as_frame:\n frame, flat_data, target = _convert_data_dataframe(\"load_digits\",\n flat_data,\n target,\n feature_names,\n target_columns)\n\n if return_X_y:\n return flat_data, target\n\n return Bunch(data=flat_data,\n target=target,\n frame=frame,\n feature_names=feature_names,\n target_names=np.arange(10),\n images=images,\n DESCR=descr)" + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_files", + "name": "load_files", + "qname": "sklearn.datasets._base.load_files", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._base/load_files/container_path", + "name": "container_path", + "qname": "sklearn.datasets._base.load_files.container_path", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str or unicode", + "default_value": "", + "description": "Path to the main folder holding one subfolder per category" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "unicode" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_files/description", + "name": "description", + "qname": "sklearn.datasets._base.load_files.description", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or unicode", + "default_value": "None", + "description": "A paragraph describing the characteristic of the dataset: its source,\nreference, etc." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "unicode" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_files/categories", + "name": "categories", + "qname": "sklearn.datasets._base.load_files.categories", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "list of str", + "default_value": "None", + "description": "If None (default), load all the categories. If not None, list of\ncategory names to load (other categories ignored)." + }, + "type": { + "kind": "NamedType", + "name": "list of str" + } + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_files/load_content", + "name": "load_content", + "qname": "sklearn.datasets._base.load_files.load_content", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to load or not the content of the different files. If true a\n'data' attribute containing the text information is present in the data\nstructure returned. If not, a filenames attribute gives the path to the\nfiles." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_files/shuffle", + "name": "shuffle", + "qname": "sklearn.datasets._base.load_files.shuffle", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether or not to shuffle the data: might be important for models that\nmake the assumption that the samples are independent and identically\ndistributed (i.i.d.), such as stochastic gradient descent." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_files/encoding", + "name": "encoding", + "qname": "sklearn.datasets._base.load_files.encoding", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "If None, do not try to decode the content of the files (e.g. for images\nor other non-text content). If not None, encoding to use to decode text\nfiles to Unicode if load_content is True." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_files/decode_error", + "name": "decode_error", + "qname": "sklearn.datasets._base.load_files.decode_error", + "default_value": "'strict'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'strict', 'ignore', 'replace'}", + "default_value": "'strict'", + "description": "Instruction on what to do if a byte sequence is given to analyze that\ncontains characters not of the given `encoding`. Passed as keyword\nargument 'errors' to bytes.decode." + }, + "type": { + "kind": "EnumType", + "values": ["strict", "replace", "ignore"] + } + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_files/random_state", + "name": "random_state", + "qname": "sklearn.datasets._base.load_files.random_state", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "0", + "description": "Determines random number generation for dataset shuffling. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load text files with categories as subfolder names.\n\nIndividual samples are assumed to be files stored a two levels folder\nstructure such as the following:\n\n container_folder/\n category_1_folder/\n file_1.txt\n file_2.txt\n ...\n file_42.txt\n category_2_folder/\n file_43.txt\n file_44.txt\n ...\n\nThe folder names are used as supervised signal label names. The individual\nfile names are not important.\n\nThis function does not try to extract features into a numpy array or scipy\nsparse matrix. In addition, if load_content is false it does not try to\nload the files in memory.\n\nTo use text files in a scikit-learn classification or clustering algorithm,\nyou will need to use the :mod`~sklearn.feature_extraction.text` module to\nbuild a feature extraction transformer that suits your problem.\n\nIf you set load_content=True, you should also specify the encoding of the\ntext using the 'encoding' parameter. For many modern text files, 'utf-8'\nwill be the correct encoding. If you leave encoding equal to None, then the\ncontent will be made of bytes instead of Unicode, and you will not be able\nto use most functions in :mod:`~sklearn.feature_extraction.text`.\n\nSimilar feature extractors should be built for other kind of unstructured\ndata input such as images, audio, video, ...\n\nRead more in the :ref:`User Guide `.", + "docstring": "Load text files with categories as subfolder names.\n\nIndividual samples are assumed to be files stored a two levels folder\nstructure such as the following:\n\n container_folder/\n category_1_folder/\n file_1.txt\n file_2.txt\n ...\n file_42.txt\n category_2_folder/\n file_43.txt\n file_44.txt\n ...\n\nThe folder names are used as supervised signal label names. The individual\nfile names are not important.\n\nThis function does not try to extract features into a numpy array or scipy\nsparse matrix. In addition, if load_content is false it does not try to\nload the files in memory.\n\nTo use text files in a scikit-learn classification or clustering algorithm,\nyou will need to use the :mod`~sklearn.feature_extraction.text` module to\nbuild a feature extraction transformer that suits your problem.\n\nIf you set load_content=True, you should also specify the encoding of the\ntext using the 'encoding' parameter. For many modern text files, 'utf-8'\nwill be the correct encoding. If you leave encoding equal to None, then the\ncontent will be made of bytes instead of Unicode, and you will not be able\nto use most functions in :mod:`~sklearn.feature_extraction.text`.\n\nSimilar feature extractors should be built for other kind of unstructured\ndata input such as images, audio, video, ...\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncontainer_path : str or unicode\n Path to the main folder holding one subfolder per category\n\ndescription : str or unicode, default=None\n A paragraph describing the characteristic of the dataset: its source,\n reference, etc.\n\ncategories : list of str, default=None\n If None (default), load all the categories. If not None, list of\n category names to load (other categories ignored).\n\nload_content : bool, default=True\n Whether to load or not the content of the different files. If true a\n 'data' attribute containing the text information is present in the data\n structure returned. If not, a filenames attribute gives the path to the\n files.\n\nshuffle : bool, default=True\n Whether or not to shuffle the data: might be important for models that\n make the assumption that the samples are independent and identically\n distributed (i.i.d.), such as stochastic gradient descent.\n\nencoding : str, default=None\n If None, do not try to decode the content of the files (e.g. for images\n or other non-text content). If not None, encoding to use to decode text\n files to Unicode if load_content is True.\n\ndecode_error : {'strict', 'ignore', 'replace'}, default='strict'\n Instruction on what to do if a byte sequence is given to analyze that\n contains characters not of the given `encoding`. Passed as keyword\n argument 'errors' to bytes.decode.\n\nrandom_state : int, RandomState instance or None, default=0\n Determines random number generation for dataset shuffling. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : list of str\n Only present when `load_content=True`.\n The raw text data to learn.\n target : ndarray\n The target labels (integer index).\n target_names : list\n The names of target classes.\n DESCR : str\n The full description of the dataset.\n filenames: ndarray\n The filenames holding the dataset.", + "code": "@_deprecate_positional_args\ndef load_files(container_path, *, description=None, categories=None,\n load_content=True, shuffle=True, encoding=None,\n decode_error='strict', random_state=0):\n \"\"\"Load text files with categories as subfolder names.\n\n Individual samples are assumed to be files stored a two levels folder\n structure such as the following:\n\n container_folder/\n category_1_folder/\n file_1.txt\n file_2.txt\n ...\n file_42.txt\n category_2_folder/\n file_43.txt\n file_44.txt\n ...\n\n The folder names are used as supervised signal label names. The individual\n file names are not important.\n\n This function does not try to extract features into a numpy array or scipy\n sparse matrix. In addition, if load_content is false it does not try to\n load the files in memory.\n\n To use text files in a scikit-learn classification or clustering algorithm,\n you will need to use the :mod`~sklearn.feature_extraction.text` module to\n build a feature extraction transformer that suits your problem.\n\n If you set load_content=True, you should also specify the encoding of the\n text using the 'encoding' parameter. For many modern text files, 'utf-8'\n will be the correct encoding. If you leave encoding equal to None, then the\n content will be made of bytes instead of Unicode, and you will not be able\n to use most functions in :mod:`~sklearn.feature_extraction.text`.\n\n Similar feature extractors should be built for other kind of unstructured\n data input such as images, audio, video, ...\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n container_path : str or unicode\n Path to the main folder holding one subfolder per category\n\n description : str or unicode, default=None\n A paragraph describing the characteristic of the dataset: its source,\n reference, etc.\n\n categories : list of str, default=None\n If None (default), load all the categories. If not None, list of\n category names to load (other categories ignored).\n\n load_content : bool, default=True\n Whether to load or not the content of the different files. If true a\n 'data' attribute containing the text information is present in the data\n structure returned. If not, a filenames attribute gives the path to the\n files.\n\n shuffle : bool, default=True\n Whether or not to shuffle the data: might be important for models that\n make the assumption that the samples are independent and identically\n distributed (i.i.d.), such as stochastic gradient descent.\n\n encoding : str, default=None\n If None, do not try to decode the content of the files (e.g. for images\n or other non-text content). If not None, encoding to use to decode text\n files to Unicode if load_content is True.\n\n decode_error : {'strict', 'ignore', 'replace'}, default='strict'\n Instruction on what to do if a byte sequence is given to analyze that\n contains characters not of the given `encoding`. Passed as keyword\n argument 'errors' to bytes.decode.\n\n random_state : int, RandomState instance or None, default=0\n Determines random number generation for dataset shuffling. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n data : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : list of str\n Only present when `load_content=True`.\n The raw text data to learn.\n target : ndarray\n The target labels (integer index).\n target_names : list\n The names of target classes.\n DESCR : str\n The full description of the dataset.\n filenames: ndarray\n The filenames holding the dataset.\n \"\"\"\n target = []\n target_names = []\n filenames = []\n\n folders = [f for f in sorted(listdir(container_path))\n if isdir(join(container_path, f))]\n\n if categories is not None:\n folders = [f for f in folders if f in categories]\n\n for label, folder in enumerate(folders):\n target_names.append(folder)\n folder_path = join(container_path, folder)\n documents = [join(folder_path, d)\n for d in sorted(listdir(folder_path))]\n target.extend(len(documents) * [label])\n filenames.extend(documents)\n\n # convert to array for fancy indexing\n filenames = np.array(filenames)\n target = np.array(target)\n\n if shuffle:\n random_state = check_random_state(random_state)\n indices = np.arange(filenames.shape[0])\n random_state.shuffle(indices)\n filenames = filenames[indices]\n target = target[indices]\n\n if load_content:\n data = []\n for filename in filenames:\n with open(filename, 'rb') as f:\n data.append(f.read())\n if encoding is not None:\n data = [d.decode(encoding, decode_error) for d in data]\n return Bunch(data=data,\n filenames=filenames,\n target_names=target_names,\n target=target,\n DESCR=description)\n\n return Bunch(filenames=filenames,\n target_names=target_names,\n target=target,\n DESCR=description)" + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_iris", + "name": "load_iris", + "qname": "sklearn.datasets._base.load_iris", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._base/load_iris/return_X_y", + "name": "return_X_y", + "qname": "sklearn.datasets._base.load_iris.return_X_y", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, returns ``(data, target)`` instead of a Bunch object. See\nbelow for more information about the `data` and `target` object.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_iris/as_frame", + "name": "as_frame", + "qname": "sklearn.datasets._base.load_iris.as_frame", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the data is a pandas DataFrame including columns with\nappropriate dtypes (numeric). The target is\na pandas DataFrame or Series depending on the number of target columns.\nIf `return_X_y` is True, then (`data`, `target`) will be pandas\nDataFrames or Series as described below.\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load and return the iris dataset (classification).\n\nThe iris dataset is a classic and very easy multi-class classification\ndataset.\n\n================= ==============\nClasses 3\nSamples per class 50\nSamples total 150\nDimensionality 4\nFeatures real, positive\n================= ==============\n\nRead more in the :ref:`User Guide `.", + "docstring": "Load and return the iris dataset (classification).\n\nThe iris dataset is a classic and very easy multi-class classification\ndataset.\n\n================= ==============\nClasses 3\nSamples per class 50\nSamples total 150\nDimensionality 4\nFeatures real, positive\n================= ==============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object. See\n below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (150, 4)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, Series} of shape (150,)\n The classification target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of target classes.\n frame: DataFrame of shape (150, 5)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n filename: str\n The path to the location of the data.\n\n .. versionadded:: 0.20\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18\n\nNotes\n-----\n .. versionchanged:: 0.20\n Fixed two wrong data points according to Fisher's paper.\n The new version is the same as in R, but not as in the UCI\n Machine Learning Repository.\n\nExamples\n--------\nLet's say you are interested in the samples 10, 25, and 50, and want to\nknow their class name.\n\n>>> from sklearn.datasets import load_iris\n>>> data = load_iris()\n>>> data.target[[10, 25, 50]]\narray([0, 0, 1])\n>>> list(data.target_names)\n['setosa', 'versicolor', 'virginica']", + "code": "@_deprecate_positional_args\ndef load_iris(*, return_X_y=False, as_frame=False):\n \"\"\"Load and return the iris dataset (classification).\n\n The iris dataset is a classic and very easy multi-class classification\n dataset.\n\n ================= ==============\n Classes 3\n Samples per class 50\n Samples total 150\n Dimensionality 4\n Features real, positive\n ================= ==============\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n return_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object. See\n below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\n as_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\n Returns\n -------\n data : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (150, 4)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, Series} of shape (150,)\n The classification target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of target classes.\n frame: DataFrame of shape (150, 5)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n filename: str\n The path to the location of the data.\n\n .. versionadded:: 0.20\n\n (data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18\n\n Notes\n -----\n .. versionchanged:: 0.20\n Fixed two wrong data points according to Fisher's paper.\n The new version is the same as in R, but not as in the UCI\n Machine Learning Repository.\n\n Examples\n --------\n Let's say you are interested in the samples 10, 25, and 50, and want to\n know their class name.\n\n >>> from sklearn.datasets import load_iris\n >>> data = load_iris()\n >>> data.target[[10, 25, 50]]\n array([0, 0, 1])\n >>> list(data.target_names)\n ['setosa', 'versicolor', 'virginica']\n \"\"\"\n module_path = dirname(__file__)\n data, target, target_names = load_data(module_path, 'iris.csv')\n iris_csv_filename = join(module_path, 'data', 'iris.csv')\n\n with open(join(module_path, 'descr', 'iris.rst')) as rst_file:\n fdescr = rst_file.read()\n\n feature_names = ['sepal length (cm)', 'sepal width (cm)',\n 'petal length (cm)', 'petal width (cm)']\n\n frame = None\n target_columns = ['target', ]\n if as_frame:\n frame, data, target = _convert_data_dataframe(\"load_iris\",\n data,\n target,\n feature_names,\n target_columns)\n\n if return_X_y:\n return data, target\n\n return Bunch(data=data,\n target=target,\n frame=frame,\n target_names=target_names,\n DESCR=fdescr,\n feature_names=feature_names,\n filename=iris_csv_filename)" + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_linnerud", + "name": "load_linnerud", + "qname": "sklearn.datasets._base.load_linnerud", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._base/load_linnerud/return_X_y", + "name": "return_X_y", + "qname": "sklearn.datasets._base.load_linnerud.return_X_y", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, returns ``(data, target)`` instead of a Bunch object.\nSee below for more information about the `data` and `target` object.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_linnerud/as_frame", + "name": "as_frame", + "qname": "sklearn.datasets._base.load_linnerud.as_frame", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the data is a pandas DataFrame including columns with\nappropriate dtypes (numeric, string or categorical). The target is\na pandas DataFrame or Series depending on the number of target columns.\nIf `return_X_y` is True, then (`data`, `target`) will be pandas\nDataFrames or Series as described below.\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load and return the physical excercise linnerud dataset.\n\nThis dataset is suitable for multi-ouput regression tasks.\n\n============== ============================\nSamples total 20\nDimensionality 3 (for both data and target)\nFeatures integer\nTargets integer\n============== ============================\n\nRead more in the :ref:`User Guide `.", + "docstring": "Load and return the physical excercise linnerud dataset.\n\nThis dataset is suitable for multi-ouput regression tasks.\n\n============== ============================\nSamples total 20\nDimensionality 3 (for both data and target)\nFeatures integer\nTargets integer\n============== ============================\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric, string or categorical). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (20, 3)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, dataframe} of shape (20, 3)\n The regression targets. If `as_frame=True`, `target` will be\n a pandas DataFrame.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of the target columns.\n frame: DataFrame of shape (20, 6)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n data_filename: str\n The path to the location of the data.\n target_filename: str\n The path to the location of the target.\n\n .. versionadded:: 0.20\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18", + "code": "@_deprecate_positional_args\ndef load_linnerud(*, return_X_y=False, as_frame=False):\n \"\"\"Load and return the physical excercise linnerud dataset.\n\n This dataset is suitable for multi-ouput regression tasks.\n\n ============== ============================\n Samples total 20\n Dimensionality 3 (for both data and target)\n Features integer\n Targets integer\n ============== ============================\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n return_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\n as_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric, string or categorical). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\n Returns\n -------\n data : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (20, 3)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, dataframe} of shape (20, 3)\n The regression targets. If `as_frame=True`, `target` will be\n a pandas DataFrame.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of the target columns.\n frame: DataFrame of shape (20, 6)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n data_filename: str\n The path to the location of the data.\n target_filename: str\n The path to the location of the target.\n\n .. versionadded:: 0.20\n\n (data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18\n \"\"\"\n base_dir = join(dirname(__file__), 'data/')\n data_filename = join(base_dir, 'linnerud_exercise.csv')\n target_filename = join(base_dir, 'linnerud_physiological.csv')\n\n # Read data\n data_exercise = np.loadtxt(data_filename, skiprows=1)\n data_physiological = np.loadtxt(target_filename, skiprows=1)\n\n # Read header\n with open(data_filename) as f:\n header_exercise = f.readline().split()\n with open(target_filename) as f:\n header_physiological = f.readline().split()\n\n with open(dirname(__file__) + '/descr/linnerud.rst') as f:\n descr = f.read()\n\n frame = None\n if as_frame:\n (frame,\n data_exercise,\n data_physiological) = _convert_data_dataframe(\"load_linnerud\",\n data_exercise,\n data_physiological,\n header_exercise,\n header_physiological)\n if return_X_y:\n return data_exercise, data_physiological\n\n return Bunch(data=data_exercise,\n feature_names=header_exercise,\n target=data_physiological,\n target_names=header_physiological,\n frame=frame,\n DESCR=descr,\n data_filename=data_filename,\n target_filename=target_filename)" + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_sample_image", + "name": "load_sample_image", + "qname": "sklearn.datasets._base.load_sample_image", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._base/load_sample_image/image_name", + "name": "image_name", + "qname": "sklearn.datasets._base.load_sample_image.image_name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{`china.jpg`, `flower.jpg`}", + "default_value": "", + "description": "The name of the sample image loaded" + }, + "type": { + "kind": "EnumType", + "values": [] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load the numpy array of a single sample image\n\nRead more in the :ref:`User Guide `.", + "docstring": "Load the numpy array of a single sample image\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nimage_name : {`china.jpg`, `flower.jpg`}\n The name of the sample image loaded\n\nReturns\n-------\nimg : 3D array\n The image as a numpy array: height x width x color\n\nExamples\n--------\n\n>>> from sklearn.datasets import load_sample_image\n>>> china = load_sample_image('china.jpg') # doctest: +SKIP\n>>> china.dtype # doctest: +SKIP\ndtype('uint8')\n>>> china.shape # doctest: +SKIP\n(427, 640, 3)\n>>> flower = load_sample_image('flower.jpg') # doctest: +SKIP\n>>> flower.dtype # doctest: +SKIP\ndtype('uint8')\n>>> flower.shape # doctest: +SKIP\n(427, 640, 3)", + "code": "def load_sample_image(image_name):\n \"\"\"Load the numpy array of a single sample image\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n image_name : {`china.jpg`, `flower.jpg`}\n The name of the sample image loaded\n\n Returns\n -------\n img : 3D array\n The image as a numpy array: height x width x color\n\n Examples\n --------\n\n >>> from sklearn.datasets import load_sample_image\n >>> china = load_sample_image('china.jpg') # doctest: +SKIP\n >>> china.dtype # doctest: +SKIP\n dtype('uint8')\n >>> china.shape # doctest: +SKIP\n (427, 640, 3)\n >>> flower = load_sample_image('flower.jpg') # doctest: +SKIP\n >>> flower.dtype # doctest: +SKIP\n dtype('uint8')\n >>> flower.shape # doctest: +SKIP\n (427, 640, 3)\n \"\"\"\n images = load_sample_images()\n index = None\n for i, filename in enumerate(images.filenames):\n if filename.endswith(image_name):\n index = i\n break\n if index is None:\n raise AttributeError(\"Cannot find sample image: %s\" % image_name)\n return images.images[index]" + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_sample_images", + "name": "load_sample_images", + "qname": "sklearn.datasets._base.load_sample_images", + "decorators": [], + "parameters": [], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load sample images for image manipulation.\n\nLoads both, ``china`` and ``flower``.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Load sample images for image manipulation.\n\nLoads both, ``china`` and ``flower``.\n\nRead more in the :ref:`User Guide `.\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n images : list of ndarray of shape (427, 640, 3)\n The two sample image.\n filenames : list\n The filenames for the images.\n DESCR : str\n The full description of the dataset.\n\nExamples\n--------\nTo load the data and visualize the images:\n\n>>> from sklearn.datasets import load_sample_images\n>>> dataset = load_sample_images() #doctest: +SKIP\n>>> len(dataset.images) #doctest: +SKIP\n2\n>>> first_img_data = dataset.images[0] #doctest: +SKIP\n>>> first_img_data.shape #doctest: +SKIP\n(427, 640, 3)\n>>> first_img_data.dtype #doctest: +SKIP\ndtype('uint8')", + "code": "def load_sample_images():\n \"\"\"Load sample images for image manipulation.\n\n Loads both, ``china`` and ``flower``.\n\n Read more in the :ref:`User Guide `.\n\n Returns\n -------\n data : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n images : list of ndarray of shape (427, 640, 3)\n The two sample image.\n filenames : list\n The filenames for the images.\n DESCR : str\n The full description of the dataset.\n\n Examples\n --------\n To load the data and visualize the images:\n\n >>> from sklearn.datasets import load_sample_images\n >>> dataset = load_sample_images() #doctest: +SKIP\n >>> len(dataset.images) #doctest: +SKIP\n 2\n >>> first_img_data = dataset.images[0] #doctest: +SKIP\n >>> first_img_data.shape #doctest: +SKIP\n (427, 640, 3)\n >>> first_img_data.dtype #doctest: +SKIP\n dtype('uint8')\n \"\"\"\n # import PIL only when needed\n from ..externals._pilutil import imread\n\n module_path = join(dirname(__file__), \"images\")\n with open(join(module_path, 'README.txt')) as f:\n descr = f.read()\n filenames = [join(module_path, filename)\n for filename in sorted(os.listdir(module_path))\n if filename.endswith(\".jpg\")]\n # Load image data for each image in the source folder.\n images = [imread(filename) for filename in filenames]\n\n return Bunch(images=images,\n filenames=filenames,\n DESCR=descr)" + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_wine", + "name": "load_wine", + "qname": "sklearn.datasets._base.load_wine", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._base/load_wine/return_X_y", + "name": "return_X_y", + "qname": "sklearn.datasets._base.load_wine.return_X_y", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, returns ``(data, target)`` instead of a Bunch object.\nSee below for more information about the `data` and `target` object." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._base/load_wine/as_frame", + "name": "as_frame", + "qname": "sklearn.datasets._base.load_wine.as_frame", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the data is a pandas DataFrame including columns with\nappropriate dtypes (numeric). The target is\na pandas DataFrame or Series depending on the number of target columns.\nIf `return_X_y` is True, then (`data`, `target`) will be pandas\nDataFrames or Series as described below.\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load and return the wine dataset (classification).\n\n.. versionadded:: 0.18\n\nThe wine dataset is a classic and very easy multi-class classification\ndataset.\n\n================= ==============\nClasses 3\nSamples per class [59,71,48]\nSamples total 178\nDimensionality 13\nFeatures real, positive\n================= ==============\n\nRead more in the :ref:`User Guide `.", + "docstring": "Load and return the wine dataset (classification).\n\n.. versionadded:: 0.18\n\nThe wine dataset is a classic and very easy multi-class classification\ndataset.\n\n================= ==============\nClasses 3\nSamples per class [59,71,48]\nSamples total 178\nDimensionality 13\nFeatures real, positive\n================= ==============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (178, 13)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, Series} of shape (178,)\n The classification target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of target classes.\n frame: DataFrame of shape (178, 14)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n\n(data, target) : tuple if ``return_X_y`` is True\n\nThe copy of UCI ML Wine Data Set dataset is downloaded and modified to fit\nstandard format from:\nhttps://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data\n\nExamples\n--------\nLet's say you are interested in the samples 10, 80, and 140, and want to\nknow their class name.\n\n>>> from sklearn.datasets import load_wine\n>>> data = load_wine()\n>>> data.target[[10, 80, 140]]\narray([0, 1, 2])\n>>> list(data.target_names)\n['class_0', 'class_1', 'class_2']", + "code": "@_deprecate_positional_args\ndef load_wine(*, return_X_y=False, as_frame=False):\n \"\"\"Load and return the wine dataset (classification).\n\n .. versionadded:: 0.18\n\n The wine dataset is a classic and very easy multi-class classification\n dataset.\n\n ================= ==============\n Classes 3\n Samples per class [59,71,48]\n Samples total 178\n Dimensionality 13\n Features real, positive\n ================= ==============\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n return_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n as_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\n Returns\n -------\n data : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (178, 13)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, Series} of shape (178,)\n The classification target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of target classes.\n frame: DataFrame of shape (178, 14)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n\n (data, target) : tuple if ``return_X_y`` is True\n\n The copy of UCI ML Wine Data Set dataset is downloaded and modified to fit\n standard format from:\n https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data\n\n Examples\n --------\n Let's say you are interested in the samples 10, 80, and 140, and want to\n know their class name.\n\n >>> from sklearn.datasets import load_wine\n >>> data = load_wine()\n >>> data.target[[10, 80, 140]]\n array([0, 1, 2])\n >>> list(data.target_names)\n ['class_0', 'class_1', 'class_2']\n \"\"\"\n module_path = dirname(__file__)\n data, target, target_names = load_data(module_path, 'wine_data.csv')\n\n with open(join(module_path, 'descr', 'wine_data.rst')) as rst_file:\n fdescr = rst_file.read()\n\n feature_names = ['alcohol',\n 'malic_acid',\n 'ash',\n 'alcalinity_of_ash',\n 'magnesium',\n 'total_phenols',\n 'flavanoids',\n 'nonflavanoid_phenols',\n 'proanthocyanins',\n 'color_intensity',\n 'hue',\n 'od280/od315_of_diluted_wines',\n 'proline']\n\n frame = None\n target_columns = ['target', ]\n if as_frame:\n frame, data, target = _convert_data_dataframe(\"load_wine\",\n data,\n target,\n feature_names,\n target_columns)\n\n if return_X_y:\n return data, target\n\n return Bunch(data=data,\n target=target,\n frame=frame,\n target_names=target_names,\n DESCR=fdescr,\n feature_names=feature_names)" + }, + { + "id": "scikit-learn/sklearn.datasets._california_housing/fetch_california_housing", + "name": "fetch_california_housing", + "qname": "sklearn.datasets._california_housing.fetch_california_housing", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._california_housing/fetch_california_housing/data_home", + "name": "data_home", + "qname": "sklearn.datasets._california_housing.fetch_california_housing.data_home", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Specify another download and cache folder for the datasets. By default\nall scikit-learn data is stored in '~/scikit_learn_data' subfolders." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.datasets._california_housing/fetch_california_housing/download_if_missing", + "name": "download_if_missing", + "qname": "sklearn.datasets._california_housing.fetch_california_housing.download_if_missing", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If False, raise a IOError if the data is not locally available\ninstead of trying to download the data from the source site." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._california_housing/fetch_california_housing/return_X_y", + "name": "return_X_y", + "qname": "sklearn.datasets._california_housing.fetch_california_housing.return_X_y", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False.", + "description": "If True, returns ``(data.data, data.target)`` instead of a Bunch\nobject.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._california_housing/fetch_california_housing/as_frame", + "name": "as_frame", + "qname": "sklearn.datasets._california_housing.fetch_california_housing.as_frame", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the data is a pandas DataFrame including columns with\nappropriate dtypes (numeric, string or categorical). The target is\na pandas DataFrame or Series depending on the number of target_columns.\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load the California housing dataset (regression).\n\n============== ==============\nSamples total 20640\nDimensionality 8\nFeatures real\nTarget real 0.15 - 5.\n============== ==============\n\nRead more in the :ref:`User Guide `.", + "docstring": "Load the California housing dataset (regression).\n\n============== ==============\nSamples total 20640\nDimensionality 8\nFeatures real\nTarget real 0.15 - 5.\n============== ==============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\n\nreturn_X_y : bool, default=False.\n If True, returns ``(data.data, data.target)`` instead of a Bunch\n object.\n\n .. versionadded:: 0.20\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric, string or categorical). The target is\n a pandas DataFrame or Series depending on the number of target_columns.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray, shape (20640, 8)\n Each row corresponding to the 8 feature values in order.\n If ``as_frame`` is True, ``data`` is a pandas object.\n target : numpy array of shape (20640,)\n Each value corresponds to the average\n house value in units of 100,000.\n If ``as_frame`` is True, ``target`` is a pandas object.\n feature_names : list of length 8\n Array of ordered feature names used in the dataset.\n DESCR : string\n Description of the California housing dataset.\n frame : pandas DataFrame\n Only present when `as_frame=True`. DataFrame with ``data`` and\n ``target``.\n\n .. versionadded:: 0.23\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20\n\nNotes\n-----\n\nThis dataset consists of 20,640 samples and 9 features.", + "code": "@_deprecate_positional_args\ndef fetch_california_housing(*, data_home=None, download_if_missing=True,\n return_X_y=False, as_frame=False):\n \"\"\"Load the California housing dataset (regression).\n\n ============== ==============\n Samples total 20640\n Dimensionality 8\n Features real\n Target real 0.15 - 5.\n ============== ==============\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n data_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\n download_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\n\n return_X_y : bool, default=False.\n If True, returns ``(data.data, data.target)`` instead of a Bunch\n object.\n\n .. versionadded:: 0.20\n\n as_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric, string or categorical). The target is\n a pandas DataFrame or Series depending on the number of target_columns.\n\n .. versionadded:: 0.23\n\n Returns\n -------\n dataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray, shape (20640, 8)\n Each row corresponding to the 8 feature values in order.\n If ``as_frame`` is True, ``data`` is a pandas object.\n target : numpy array of shape (20640,)\n Each value corresponds to the average\n house value in units of 100,000.\n If ``as_frame`` is True, ``target`` is a pandas object.\n feature_names : list of length 8\n Array of ordered feature names used in the dataset.\n DESCR : string\n Description of the California housing dataset.\n frame : pandas DataFrame\n Only present when `as_frame=True`. DataFrame with ``data`` and\n ``target``.\n\n .. versionadded:: 0.23\n\n (data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20\n\n Notes\n -----\n\n This dataset consists of 20,640 samples and 9 features.\n \"\"\"\n data_home = get_data_home(data_home=data_home)\n if not exists(data_home):\n makedirs(data_home)\n\n filepath = _pkl_filepath(data_home, 'cal_housing.pkz')\n if not exists(filepath):\n if not download_if_missing:\n raise IOError(\"Data not found and `download_if_missing` is False\")\n\n logger.info('Downloading Cal. housing from {} to {}'.format(\n ARCHIVE.url, data_home))\n\n archive_path = _fetch_remote(ARCHIVE, dirname=data_home)\n\n with tarfile.open(mode=\"r:gz\", name=archive_path) as f:\n cal_housing = np.loadtxt(\n f.extractfile('CaliforniaHousing/cal_housing.data'),\n delimiter=',')\n # Columns are not in the same order compared to the previous\n # URL resource on lib.stat.cmu.edu\n columns_index = [8, 7, 2, 3, 4, 5, 6, 1, 0]\n cal_housing = cal_housing[:, columns_index]\n\n joblib.dump(cal_housing, filepath, compress=6)\n remove(archive_path)\n\n else:\n cal_housing = joblib.load(filepath)\n\n feature_names = [\"MedInc\", \"HouseAge\", \"AveRooms\", \"AveBedrms\",\n \"Population\", \"AveOccup\", \"Latitude\", \"Longitude\"]\n\n target, data = cal_housing[:, 0], cal_housing[:, 1:]\n\n # avg rooms = total rooms / households\n data[:, 2] /= data[:, 5]\n\n # avg bed rooms = total bed rooms / households\n data[:, 3] /= data[:, 5]\n\n # avg occupancy = population / households\n data[:, 5] = data[:, 4] / data[:, 5]\n\n # target in units of 100,000\n target = target / 100000.0\n\n module_path = dirname(__file__)\n with open(join(module_path, 'descr', 'california_housing.rst')) as dfile:\n descr = dfile.read()\n\n X = data\n y = target\n\n frame = None\n target_names = [\"MedHouseVal\", ]\n if as_frame:\n frame, X, y = _convert_data_dataframe(\"fetch_california_housing\",\n data,\n target,\n feature_names,\n target_names)\n\n if return_X_y:\n return X, y\n\n return Bunch(data=X,\n target=y,\n frame=frame,\n target_names=target_names,\n feature_names=feature_names,\n DESCR=descr)" + }, + { + "id": "scikit-learn/sklearn.datasets._covtype/fetch_covtype", + "name": "fetch_covtype", + "qname": "sklearn.datasets._covtype.fetch_covtype", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._covtype/fetch_covtype/data_home", + "name": "data_home", + "qname": "sklearn.datasets._covtype.fetch_covtype.data_home", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Specify another download and cache folder for the datasets. By default\nall scikit-learn data is stored in '~/scikit_learn_data' subfolders." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.datasets._covtype/fetch_covtype/download_if_missing", + "name": "download_if_missing", + "qname": "sklearn.datasets._covtype.fetch_covtype.download_if_missing", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If False, raise a IOError if the data is not locally available\ninstead of trying to download the data from the source site." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._covtype/fetch_covtype/random_state", + "name": "random_state", + "qname": "sklearn.datasets._covtype.fetch_covtype.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for dataset shuffling. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._covtype/fetch_covtype/shuffle", + "name": "shuffle", + "qname": "sklearn.datasets._covtype.fetch_covtype.shuffle", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to shuffle dataset." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._covtype/fetch_covtype/return_X_y", + "name": "return_X_y", + "qname": "sklearn.datasets._covtype.fetch_covtype.return_X_y", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, returns ``(data.data, data.target)`` instead of a Bunch\nobject.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._covtype/fetch_covtype/as_frame", + "name": "as_frame", + "qname": "sklearn.datasets._covtype.fetch_covtype.as_frame", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the data is a pandas DataFrame including columns with\nappropriate dtypes (numeric). The target is a pandas DataFrame or\nSeries depending on the number of target columns. If `return_X_y` is\nTrue, then (`data`, `target`) will be pandas DataFrames or Series as\ndescribed below.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load the covertype dataset (classification).\n\nDownload it if necessary.\n\n================= ============\nClasses 7\nSamples total 581012\nDimensionality 54\nFeatures int\n================= ============\n\nRead more in the :ref:`User Guide `.", + "docstring": "Load the covertype dataset (classification).\n\nDownload it if necessary.\n\n================= ============\nClasses 7\nSamples total 581012\nDimensionality 54\nFeatures int\n================= ============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nshuffle : bool, default=False\n Whether to shuffle dataset.\n\nreturn_X_y : bool, default=False\n If True, returns ``(data.data, data.target)`` instead of a Bunch\n object.\n\n .. versionadded:: 0.20\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is a pandas DataFrame or\n Series depending on the number of target columns. If `return_X_y` is\n True, then (`data`, `target`) will be pandas DataFrames or Series as\n described below.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ndataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray of shape (581012, 54)\n Each row corresponds to the 54 features in the dataset.\n target : ndarray of shape (581012,)\n Each value corresponds to one of\n the 7 forest covertypes with values\n ranging between 1 to 7.\n frame : dataframe of shape (581012, 55)\n Only present when `as_frame=True`. Contains `data` and `target`.\n DESCR : str\n Description of the forest covertype dataset.\n feature_names : list\n The names of the dataset columns.\n target_names: list\n The names of the target columns.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20", + "code": "@_deprecate_positional_args\ndef fetch_covtype(*, data_home=None, download_if_missing=True,\n random_state=None, shuffle=False, return_X_y=False,\n as_frame=False):\n \"\"\"Load the covertype dataset (classification).\n\n Download it if necessary.\n\n ================= ============\n Classes 7\n Samples total 581012\n Dimensionality 54\n Features int\n ================= ============\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n data_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\n download_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n shuffle : bool, default=False\n Whether to shuffle dataset.\n\n return_X_y : bool, default=False\n If True, returns ``(data.data, data.target)`` instead of a Bunch\n object.\n\n .. versionadded:: 0.20\n\n as_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is a pandas DataFrame or\n Series depending on the number of target columns. If `return_X_y` is\n True, then (`data`, `target`) will be pandas DataFrames or Series as\n described below.\n\n .. versionadded:: 0.24\n\n Returns\n -------\n dataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray of shape (581012, 54)\n Each row corresponds to the 54 features in the dataset.\n target : ndarray of shape (581012,)\n Each value corresponds to one of\n the 7 forest covertypes with values\n ranging between 1 to 7.\n frame : dataframe of shape (581012, 55)\n Only present when `as_frame=True`. Contains `data` and `target`.\n DESCR : str\n Description of the forest covertype dataset.\n feature_names : list\n The names of the dataset columns.\n target_names: list\n The names of the target columns.\n\n (data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20\n\n \"\"\"\n\n data_home = get_data_home(data_home=data_home)\n covtype_dir = join(data_home, \"covertype\")\n samples_path = _pkl_filepath(covtype_dir, \"samples\")\n targets_path = _pkl_filepath(covtype_dir, \"targets\")\n available = exists(samples_path)\n\n if download_if_missing and not available:\n if not exists(covtype_dir):\n makedirs(covtype_dir)\n logger.info(\"Downloading %s\" % ARCHIVE.url)\n\n archive_path = _fetch_remote(ARCHIVE, dirname=covtype_dir)\n Xy = np.genfromtxt(GzipFile(filename=archive_path), delimiter=',')\n # delete archive\n remove(archive_path)\n\n X = Xy[:, :-1]\n y = Xy[:, -1].astype(np.int32, copy=False)\n\n joblib.dump(X, samples_path, compress=9)\n joblib.dump(y, targets_path, compress=9)\n\n elif not available and not download_if_missing:\n raise IOError(\"Data not found and `download_if_missing` is False\")\n try:\n X, y\n except NameError:\n X = joblib.load(samples_path)\n y = joblib.load(targets_path)\n\n if shuffle:\n ind = np.arange(X.shape[0])\n rng = check_random_state(random_state)\n rng.shuffle(ind)\n X = X[ind]\n y = y[ind]\n\n module_path = dirname(__file__)\n with open(join(module_path, 'descr', 'covtype.rst')) as rst_file:\n fdescr = rst_file.read()\n\n frame = None\n if as_frame:\n frame, X, y = _convert_data_dataframe(caller_name=\"fetch_covtype\",\n data=X,\n target=y,\n feature_names=FEATURE_NAMES,\n target_names=TARGET_NAMES)\n if return_X_y:\n return X, y\n\n return Bunch(data=X,\n target=y,\n frame=frame,\n target_names=TARGET_NAMES,\n feature_names=FEATURE_NAMES,\n DESCR=fdescr)" + }, + { + "id": "scikit-learn/sklearn.datasets._kddcup99/_fetch_brute_kddcup99", + "name": "_fetch_brute_kddcup99", + "qname": "sklearn.datasets._kddcup99._fetch_brute_kddcup99", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._kddcup99/_fetch_brute_kddcup99/data_home", + "name": "data_home", + "qname": "sklearn.datasets._kddcup99._fetch_brute_kddcup99.data_home", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Specify another download and cache folder for the datasets. By default\nall scikit-learn data is stored in '~/scikit_learn_data' subfolders." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.datasets._kddcup99/_fetch_brute_kddcup99/download_if_missing", + "name": "download_if_missing", + "qname": "sklearn.datasets._kddcup99._fetch_brute_kddcup99.download_if_missing", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If False, raise a IOError if the data is not locally available\ninstead of trying to download the data from the source site." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._kddcup99/_fetch_brute_kddcup99/percent10", + "name": "percent10", + "qname": "sklearn.datasets._kddcup99._fetch_brute_kddcup99.percent10", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to load only 10 percent of the data." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load the kddcup99 dataset, downloading it if necessary.", + "docstring": "Load the kddcup99 dataset, downloading it if necessary.\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\npercent10 : bool, default=True\n Whether to load only 10 percent of the data.\n\nReturns\n-------\ndataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray of shape (494021, 41)\n Each row corresponds to the 41 features in the dataset.\n target : ndarray of shape (494021,)\n Each value corresponds to one of the 21 attack types or to the\n label 'normal.'.\n feature_names : list\n The names of the dataset columns\n target_names: list\n The names of the target columns\n DESCR : str\n Description of the kddcup99 dataset.", + "code": "def _fetch_brute_kddcup99(data_home=None,\n download_if_missing=True, percent10=True):\n\n \"\"\"Load the kddcup99 dataset, downloading it if necessary.\n\n Parameters\n ----------\n data_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\n download_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\n percent10 : bool, default=True\n Whether to load only 10 percent of the data.\n\n Returns\n -------\n dataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray of shape (494021, 41)\n Each row corresponds to the 41 features in the dataset.\n target : ndarray of shape (494021,)\n Each value corresponds to one of the 21 attack types or to the\n label 'normal.'.\n feature_names : list\n The names of the dataset columns\n target_names: list\n The names of the target columns\n DESCR : str\n Description of the kddcup99 dataset.\n\n \"\"\"\n\n data_home = get_data_home(data_home=data_home)\n dir_suffix = \"-py3\"\n\n if percent10:\n kddcup_dir = join(data_home, \"kddcup99_10\" + dir_suffix)\n archive = ARCHIVE_10_PERCENT\n else:\n kddcup_dir = join(data_home, \"kddcup99\" + dir_suffix)\n archive = ARCHIVE\n\n samples_path = join(kddcup_dir, \"samples\")\n targets_path = join(kddcup_dir, \"targets\")\n available = exists(samples_path)\n\n dt = [('duration', int),\n ('protocol_type', 'S4'),\n ('service', 'S11'),\n ('flag', 'S6'),\n ('src_bytes', int),\n ('dst_bytes', int),\n ('land', int),\n ('wrong_fragment', int),\n ('urgent', int),\n ('hot', int),\n ('num_failed_logins', int),\n ('logged_in', int),\n ('num_compromised', int),\n ('root_shell', int),\n ('su_attempted', int),\n ('num_root', int),\n ('num_file_creations', int),\n ('num_shells', int),\n ('num_access_files', int),\n ('num_outbound_cmds', int),\n ('is_host_login', int),\n ('is_guest_login', int),\n ('count', int),\n ('srv_count', int),\n ('serror_rate', float),\n ('srv_serror_rate', float),\n ('rerror_rate', float),\n ('srv_rerror_rate', float),\n ('same_srv_rate', float),\n ('diff_srv_rate', float),\n ('srv_diff_host_rate', float),\n ('dst_host_count', int),\n ('dst_host_srv_count', int),\n ('dst_host_same_srv_rate', float),\n ('dst_host_diff_srv_rate', float),\n ('dst_host_same_src_port_rate', float),\n ('dst_host_srv_diff_host_rate', float),\n ('dst_host_serror_rate', float),\n ('dst_host_srv_serror_rate', float),\n ('dst_host_rerror_rate', float),\n ('dst_host_srv_rerror_rate', float),\n ('labels', 'S16')]\n\n column_names = [c[0] for c in dt]\n target_names = column_names[-1]\n feature_names = column_names[:-1]\n if download_if_missing and not available:\n _mkdirp(kddcup_dir)\n logger.info(\"Downloading %s\" % archive.url)\n _fetch_remote(archive, dirname=kddcup_dir)\n DT = np.dtype(dt)\n logger.debug(\"extracting archive\")\n archive_path = join(kddcup_dir, archive.filename)\n file_ = GzipFile(filename=archive_path, mode='r')\n Xy = []\n for line in file_.readlines():\n line = line.decode()\n Xy.append(line.replace('\\n', '').split(','))\n file_.close()\n logger.debug('extraction done')\n os.remove(archive_path)\n\n Xy = np.asarray(Xy, dtype=object)\n for j in range(42):\n Xy[:, j] = Xy[:, j].astype(DT[j])\n\n X = Xy[:, :-1]\n y = Xy[:, -1]\n # XXX bug when compress!=0:\n # (error: 'Incorrect data length while decompressing[...] the file\n # could be corrupted.')\n\n joblib.dump(X, samples_path, compress=0)\n joblib.dump(y, targets_path, compress=0)\n elif not available:\n if not download_if_missing:\n raise IOError(\"Data not found and `download_if_missing` is False\")\n\n try:\n X, y\n except NameError:\n X = joblib.load(samples_path)\n y = joblib.load(targets_path)\n\n return Bunch(\n data=X,\n target=y,\n feature_names=feature_names,\n target_names=[target_names],\n )" + }, + { + "id": "scikit-learn/sklearn.datasets._kddcup99/_mkdirp", + "name": "_mkdirp", + "qname": "sklearn.datasets._kddcup99._mkdirp", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._kddcup99/_mkdirp/d", + "name": "d", + "qname": "sklearn.datasets._kddcup99._mkdirp.d", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Ensure directory d exists (like mkdir -p on Unix)\nNo guarantee that the directory is writable.", + "docstring": "Ensure directory d exists (like mkdir -p on Unix)\nNo guarantee that the directory is writable.", + "code": "def _mkdirp(d):\n \"\"\"Ensure directory d exists (like mkdir -p on Unix)\n No guarantee that the directory is writable.\n \"\"\"\n try:\n os.makedirs(d)\n except OSError as e:\n if e.errno != errno.EEXIST:\n raise" + }, + { + "id": "scikit-learn/sklearn.datasets._kddcup99/fetch_kddcup99", + "name": "fetch_kddcup99", + "qname": "sklearn.datasets._kddcup99.fetch_kddcup99", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._kddcup99/fetch_kddcup99/subset", + "name": "subset", + "qname": "sklearn.datasets._kddcup99.fetch_kddcup99.subset", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'SA', 'SF', 'http', 'smtp'}", + "default_value": "None", + "description": "To return the corresponding classical subsets of kddcup 99.\nIf None, return the entire kddcup 99 dataset." + }, + "type": { + "kind": "EnumType", + "values": ["http", "smtp", "SA", "SF"] + } + }, + { + "id": "scikit-learn/sklearn.datasets._kddcup99/fetch_kddcup99/data_home", + "name": "data_home", + "qname": "sklearn.datasets._kddcup99.fetch_kddcup99.data_home", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Specify another download and cache folder for the datasets. By default\nall scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.datasets._kddcup99/fetch_kddcup99/shuffle", + "name": "shuffle", + "qname": "sklearn.datasets._kddcup99.fetch_kddcup99.shuffle", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to shuffle dataset." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._kddcup99/fetch_kddcup99/random_state", + "name": "random_state", + "qname": "sklearn.datasets._kddcup99.fetch_kddcup99.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for dataset shuffling and for\nselection of abnormal samples if `subset='SA'`. Pass an int for\nreproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._kddcup99/fetch_kddcup99/percent10", + "name": "percent10", + "qname": "sklearn.datasets._kddcup99.fetch_kddcup99.percent10", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to load only 10 percent of the data." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._kddcup99/fetch_kddcup99/download_if_missing", + "name": "download_if_missing", + "qname": "sklearn.datasets._kddcup99.fetch_kddcup99.download_if_missing", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If False, raise a IOError if the data is not locally available\ninstead of trying to download the data from the source site." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._kddcup99/fetch_kddcup99/return_X_y", + "name": "return_X_y", + "qname": "sklearn.datasets._kddcup99.fetch_kddcup99.return_X_y", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, returns ``(data, target)`` instead of a Bunch object. See\nbelow for more information about the `data` and `target` object.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._kddcup99/fetch_kddcup99/as_frame", + "name": "as_frame", + "qname": "sklearn.datasets._kddcup99.fetch_kddcup99.as_frame", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If `True`, returns a pandas Dataframe for the ``data`` and ``target``\nobjects in the `Bunch` returned object; `Bunch` return object will also\nhave a ``frame`` member.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load the kddcup99 dataset (classification).\n\nDownload it if necessary.\n\n================= ====================================\nClasses 23\nSamples total 4898431\nDimensionality 41\nFeatures discrete (int) or continuous (float)\n================= ====================================\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "Load the kddcup99 dataset (classification).\n\nDownload it if necessary.\n\n================= ====================================\nClasses 23\nSamples total 4898431\nDimensionality 41\nFeatures discrete (int) or continuous (float)\n================= ====================================\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nsubset : {'SA', 'SF', 'http', 'smtp'}, default=None\n To return the corresponding classical subsets of kddcup 99.\n If None, return the entire kddcup 99 dataset.\n\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n .. versionadded:: 0.19\n\nshuffle : bool, default=False\n Whether to shuffle dataset.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling and for\n selection of abnormal samples if `subset='SA'`. Pass an int for\n reproducible output across multiple function calls.\n See :term:`Glossary `.\n\npercent10 : bool, default=True\n Whether to load only 10 percent of the data.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object. See\n below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.20\n\nas_frame : bool, default=False\n If `True`, returns a pandas Dataframe for the ``data`` and ``target``\n objects in the `Bunch` returned object; `Bunch` return object will also\n have a ``frame`` member.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (494021, 41)\n The data matrix to learn. If `as_frame=True`, `data` will be a\n pandas DataFrame.\n target : {ndarray, series} of shape (494021,)\n The regression target for each sample. If `as_frame=True`, `target`\n will be a pandas Series.\n frame : dataframe of shape (494021, 42)\n Only present when `as_frame=True`. Contains `data` and `target`.\n DESCR : str\n The full description of the dataset.\n feature_names : list\n The names of the dataset columns\n target_names: list\n The names of the target columns\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20", + "code": "@_deprecate_positional_args\ndef fetch_kddcup99(*, subset=None, data_home=None, shuffle=False,\n random_state=None,\n percent10=True, download_if_missing=True, return_X_y=False,\n as_frame=False):\n \"\"\"Load the kddcup99 dataset (classification).\n\n Download it if necessary.\n\n ================= ====================================\n Classes 23\n Samples total 4898431\n Dimensionality 41\n Features discrete (int) or continuous (float)\n ================= ====================================\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n subset : {'SA', 'SF', 'http', 'smtp'}, default=None\n To return the corresponding classical subsets of kddcup 99.\n If None, return the entire kddcup 99 dataset.\n\n data_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n .. versionadded:: 0.19\n\n shuffle : bool, default=False\n Whether to shuffle dataset.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling and for\n selection of abnormal samples if `subset='SA'`. Pass an int for\n reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n percent10 : bool, default=True\n Whether to load only 10 percent of the data.\n\n download_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\n return_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object. See\n below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.20\n\n as_frame : bool, default=False\n If `True`, returns a pandas Dataframe for the ``data`` and ``target``\n objects in the `Bunch` returned object; `Bunch` return object will also\n have a ``frame`` member.\n\n .. versionadded:: 0.24\n\n Returns\n -------\n data : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (494021, 41)\n The data matrix to learn. If `as_frame=True`, `data` will be a\n pandas DataFrame.\n target : {ndarray, series} of shape (494021,)\n The regression target for each sample. If `as_frame=True`, `target`\n will be a pandas Series.\n frame : dataframe of shape (494021, 42)\n Only present when `as_frame=True`. Contains `data` and `target`.\n DESCR : str\n The full description of the dataset.\n feature_names : list\n The names of the dataset columns\n target_names: list\n The names of the target columns\n\n (data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20\n \"\"\"\n data_home = get_data_home(data_home=data_home)\n kddcup99 = _fetch_brute_kddcup99(\n data_home=data_home,\n percent10=percent10,\n download_if_missing=download_if_missing\n )\n\n data = kddcup99.data\n target = kddcup99.target\n feature_names = kddcup99.feature_names\n target_names = kddcup99.target_names\n\n if subset == 'SA':\n s = target == b'normal.'\n t = np.logical_not(s)\n normal_samples = data[s, :]\n normal_targets = target[s]\n abnormal_samples = data[t, :]\n abnormal_targets = target[t]\n\n n_samples_abnormal = abnormal_samples.shape[0]\n # selected abnormal samples:\n random_state = check_random_state(random_state)\n r = random_state.randint(0, n_samples_abnormal, 3377)\n abnormal_samples = abnormal_samples[r]\n abnormal_targets = abnormal_targets[r]\n\n data = np.r_[normal_samples, abnormal_samples]\n target = np.r_[normal_targets, abnormal_targets]\n\n if subset == 'SF' or subset == 'http' or subset == 'smtp':\n # select all samples with positive logged_in attribute:\n s = data[:, 11] == 1\n data = np.c_[data[s, :11], data[s, 12:]]\n feature_names = feature_names[:11] + feature_names[12:]\n target = target[s]\n\n data[:, 0] = np.log((data[:, 0] + 0.1).astype(float, copy=False))\n data[:, 4] = np.log((data[:, 4] + 0.1).astype(float, copy=False))\n data[:, 5] = np.log((data[:, 5] + 0.1).astype(float, copy=False))\n\n if subset == 'http':\n s = data[:, 2] == b'http'\n data = data[s]\n target = target[s]\n data = np.c_[data[:, 0], data[:, 4], data[:, 5]]\n feature_names = [feature_names[0], feature_names[4],\n feature_names[5]]\n\n if subset == 'smtp':\n s = data[:, 2] == b'smtp'\n data = data[s]\n target = target[s]\n data = np.c_[data[:, 0], data[:, 4], data[:, 5]]\n feature_names = [feature_names[0], feature_names[4],\n feature_names[5]]\n\n if subset == 'SF':\n data = np.c_[data[:, 0], data[:, 2], data[:, 4], data[:, 5]]\n feature_names = [feature_names[0], feature_names[2],\n feature_names[4], feature_names[5]]\n\n if shuffle:\n data, target = shuffle_method(data, target, random_state=random_state)\n\n module_path = dirname(__file__)\n with open(join(module_path, 'descr', 'kddcup99.rst')) as rst_file:\n fdescr = rst_file.read()\n\n frame = None\n if as_frame:\n frame, data, target = _convert_data_dataframe(\n \"fetch_kddcup99\", data, target, feature_names, target_names\n )\n\n if return_X_y:\n return data, target\n\n return Bunch(\n data=data,\n target=target,\n frame=frame,\n target_names=target_names,\n feature_names=feature_names,\n DESCR=fdescr,\n )" + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/_check_fetch_lfw", + "name": "_check_fetch_lfw", + "qname": "sklearn.datasets._lfw._check_fetch_lfw", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._lfw/_check_fetch_lfw/data_home", + "name": "data_home", + "qname": "sklearn.datasets._lfw._check_fetch_lfw.data_home", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/_check_fetch_lfw/funneled", + "name": "funneled", + "qname": "sklearn.datasets._lfw._check_fetch_lfw.funneled", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/_check_fetch_lfw/download_if_missing", + "name": "download_if_missing", + "qname": "sklearn.datasets._lfw._check_fetch_lfw.download_if_missing", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Helper function to download any missing LFW data", + "docstring": "Helper function to download any missing LFW data", + "code": "def _check_fetch_lfw(data_home=None, funneled=True, download_if_missing=True):\n \"\"\"Helper function to download any missing LFW data\"\"\"\n\n data_home = get_data_home(data_home=data_home)\n lfw_home = join(data_home, \"lfw_home\")\n\n if not exists(lfw_home):\n makedirs(lfw_home)\n\n for target in TARGETS:\n target_filepath = join(lfw_home, target.filename)\n if not exists(target_filepath):\n if download_if_missing:\n logger.info(\"Downloading LFW metadata: %s\", target.url)\n _fetch_remote(target, dirname=lfw_home)\n else:\n raise IOError(\"%s is missing\" % target_filepath)\n\n if funneled:\n data_folder_path = join(lfw_home, \"lfw_funneled\")\n archive = FUNNELED_ARCHIVE\n else:\n data_folder_path = join(lfw_home, \"lfw\")\n archive = ARCHIVE\n\n if not exists(data_folder_path):\n archive_path = join(lfw_home, archive.filename)\n if not exists(archive_path):\n if download_if_missing:\n logger.info(\"Downloading LFW data (~200MB): %s\",\n archive.url)\n _fetch_remote(archive, dirname=lfw_home)\n else:\n raise IOError(\"%s is missing\" % archive_path)\n\n import tarfile\n logger.debug(\"Decompressing the data archive to %s\", data_folder_path)\n tarfile.open(archive_path, \"r:gz\").extractall(path=lfw_home)\n remove(archive_path)\n\n return lfw_home, data_folder_path" + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/_fetch_lfw_pairs", + "name": "_fetch_lfw_pairs", + "qname": "sklearn.datasets._lfw._fetch_lfw_pairs", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._lfw/_fetch_lfw_pairs/index_file_path", + "name": "index_file_path", + "qname": "sklearn.datasets._lfw._fetch_lfw_pairs.index_file_path", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/_fetch_lfw_pairs/data_folder_path", + "name": "data_folder_path", + "qname": "sklearn.datasets._lfw._fetch_lfw_pairs.data_folder_path", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/_fetch_lfw_pairs/slice_", + "name": "slice_", + "qname": "sklearn.datasets._lfw._fetch_lfw_pairs.slice_", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/_fetch_lfw_pairs/color", + "name": "color", + "qname": "sklearn.datasets._lfw._fetch_lfw_pairs.color", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/_fetch_lfw_pairs/resize", + "name": "resize", + "qname": "sklearn.datasets._lfw._fetch_lfw_pairs.resize", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform the actual data loading for the LFW pairs dataset\n\nThis operation is meant to be cached by a joblib wrapper.", + "docstring": "Perform the actual data loading for the LFW pairs dataset\n\nThis operation is meant to be cached by a joblib wrapper.", + "code": "def _fetch_lfw_pairs(index_file_path, data_folder_path, slice_=None,\n color=False, resize=None):\n \"\"\"Perform the actual data loading for the LFW pairs dataset\n\n This operation is meant to be cached by a joblib wrapper.\n \"\"\"\n # parse the index file to find the number of pairs to be able to allocate\n # the right amount of memory before starting to decode the jpeg files\n with open(index_file_path, 'rb') as index_file:\n split_lines = [ln.decode().strip().split('\\t') for ln in index_file]\n pair_specs = [sl for sl in split_lines if len(sl) > 2]\n n_pairs = len(pair_specs)\n\n # iterating over the metadata lines for each pair to find the filename to\n # decode and load in memory\n target = np.zeros(n_pairs, dtype=int)\n file_paths = list()\n for i, components in enumerate(pair_specs):\n if len(components) == 3:\n target[i] = 1\n pair = (\n (components[0], int(components[1]) - 1),\n (components[0], int(components[2]) - 1),\n )\n elif len(components) == 4:\n target[i] = 0\n pair = (\n (components[0], int(components[1]) - 1),\n (components[2], int(components[3]) - 1),\n )\n else:\n raise ValueError(\"invalid line %d: %r\" % (i + 1, components))\n for j, (name, idx) in enumerate(pair):\n try:\n person_folder = join(data_folder_path, name)\n except TypeError:\n person_folder = join(data_folder_path, str(name, 'UTF-8'))\n filenames = list(sorted(listdir(person_folder)))\n file_path = join(person_folder, filenames[idx])\n file_paths.append(file_path)\n\n pairs = _load_imgs(file_paths, slice_, color, resize)\n shape = list(pairs.shape)\n n_faces = shape.pop(0)\n shape.insert(0, 2)\n shape.insert(0, n_faces // 2)\n pairs.shape = shape\n\n return pairs, target, np.array(['Different persons', 'Same person'])" + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/_fetch_lfw_people", + "name": "_fetch_lfw_people", + "qname": "sklearn.datasets._lfw._fetch_lfw_people", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._lfw/_fetch_lfw_people/data_folder_path", + "name": "data_folder_path", + "qname": "sklearn.datasets._lfw._fetch_lfw_people.data_folder_path", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/_fetch_lfw_people/slice_", + "name": "slice_", + "qname": "sklearn.datasets._lfw._fetch_lfw_people.slice_", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/_fetch_lfw_people/color", + "name": "color", + "qname": "sklearn.datasets._lfw._fetch_lfw_people.color", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/_fetch_lfw_people/resize", + "name": "resize", + "qname": "sklearn.datasets._lfw._fetch_lfw_people.resize", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/_fetch_lfw_people/min_faces_per_person", + "name": "min_faces_per_person", + "qname": "sklearn.datasets._lfw._fetch_lfw_people.min_faces_per_person", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform the actual data loading for the lfw people dataset\n\nThis operation is meant to be cached by a joblib wrapper.", + "docstring": "Perform the actual data loading for the lfw people dataset\n\nThis operation is meant to be cached by a joblib wrapper.", + "code": "def _fetch_lfw_people(data_folder_path, slice_=None, color=False, resize=None,\n min_faces_per_person=0):\n \"\"\"Perform the actual data loading for the lfw people dataset\n\n This operation is meant to be cached by a joblib wrapper.\n \"\"\"\n # scan the data folder content to retain people with more that\n # `min_faces_per_person` face pictures\n person_names, file_paths = [], []\n for person_name in sorted(listdir(data_folder_path)):\n folder_path = join(data_folder_path, person_name)\n if not isdir(folder_path):\n continue\n paths = [join(folder_path, f) for f in sorted(listdir(folder_path))]\n n_pictures = len(paths)\n if n_pictures >= min_faces_per_person:\n person_name = person_name.replace('_', ' ')\n person_names.extend([person_name] * n_pictures)\n file_paths.extend(paths)\n\n n_faces = len(file_paths)\n if n_faces == 0:\n raise ValueError(\"min_faces_per_person=%d is too restrictive\" %\n min_faces_per_person)\n\n target_names = np.unique(person_names)\n target = np.searchsorted(target_names, person_names)\n\n faces = _load_imgs(file_paths, slice_, color, resize)\n\n # shuffle the faces with a deterministic RNG scheme to avoid having\n # all faces of the same person in a row, as it would break some\n # cross validation and learning algorithms such as SGD and online\n # k-means that make an IID assumption\n\n indices = np.arange(n_faces)\n np.random.RandomState(42).shuffle(indices)\n faces, target = faces[indices], target[indices]\n return faces, target, target_names" + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/_load_imgs", + "name": "_load_imgs", + "qname": "sklearn.datasets._lfw._load_imgs", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._lfw/_load_imgs/file_paths", + "name": "file_paths", + "qname": "sklearn.datasets._lfw._load_imgs.file_paths", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/_load_imgs/slice_", + "name": "slice_", + "qname": "sklearn.datasets._lfw._load_imgs.slice_", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/_load_imgs/color", + "name": "color", + "qname": "sklearn.datasets._lfw._load_imgs.color", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/_load_imgs/resize", + "name": "resize", + "qname": "sklearn.datasets._lfw._load_imgs.resize", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Internally used to load images", + "docstring": "Internally used to load images", + "code": "def _load_imgs(file_paths, slice_, color, resize):\n \"\"\"Internally used to load images\"\"\"\n # import PIL only when needed\n from ..externals._pilutil import imread, imresize\n\n # compute the portion of the images to load to respect the slice_ parameter\n # given by the caller\n default_slice = (slice(0, 250), slice(0, 250))\n if slice_ is None:\n slice_ = default_slice\n else:\n slice_ = tuple(s or ds for s, ds in zip(slice_, default_slice))\n\n h_slice, w_slice = slice_\n h = (h_slice.stop - h_slice.start) // (h_slice.step or 1)\n w = (w_slice.stop - w_slice.start) // (w_slice.step or 1)\n\n if resize is not None:\n resize = float(resize)\n h = int(resize * h)\n w = int(resize * w)\n\n # allocate some contiguous memory to host the decoded image slices\n n_faces = len(file_paths)\n if not color:\n faces = np.zeros((n_faces, h, w), dtype=np.float32)\n else:\n faces = np.zeros((n_faces, h, w, 3), dtype=np.float32)\n\n # iterate over the collected file path to load the jpeg files as numpy\n # arrays\n for i, file_path in enumerate(file_paths):\n if i % 1000 == 0:\n logger.debug(\"Loading face #%05d / %05d\", i + 1, n_faces)\n\n # Checks if jpeg reading worked. Refer to issue #3594 for more\n # details.\n img = imread(file_path)\n if img.ndim == 0:\n raise RuntimeError(\"Failed to read the image file %s, \"\n \"Please make sure that libjpeg is installed\"\n % file_path)\n\n face = np.asarray(img[slice_], dtype=np.float32)\n face /= 255.0 # scale uint8 coded colors to the [0.0, 1.0] floats\n if resize is not None:\n face = imresize(face, resize)\n if not color:\n # average the color channels to compute a gray levels\n # representation\n face = face.mean(axis=2)\n\n faces[i, ...] = face\n\n return faces" + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/fetch_lfw_pairs", + "name": "fetch_lfw_pairs", + "qname": "sklearn.datasets._lfw.fetch_lfw_pairs", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._lfw/fetch_lfw_pairs/subset", + "name": "subset", + "qname": "sklearn.datasets._lfw.fetch_lfw_pairs.subset", + "default_value": "'train'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'train', 'test', '10_folds'}", + "default_value": "'train'", + "description": "Select the dataset to load: 'train' for the development training\nset, 'test' for the development test set, and '10_folds' for the\nofficial evaluation set that is meant to be used with a 10-folds\ncross validation." + }, + "type": { + "kind": "EnumType", + "values": ["10_folds", "test", "train"] + } + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/fetch_lfw_pairs/data_home", + "name": "data_home", + "qname": "sklearn.datasets._lfw.fetch_lfw_pairs.data_home", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Specify another download and cache folder for the datasets. By\ndefault all scikit-learn data is stored in '~/scikit_learn_data'\nsubfolders." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/fetch_lfw_pairs/funneled", + "name": "funneled", + "qname": "sklearn.datasets._lfw.fetch_lfw_pairs.funneled", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Download and use the funneled variant of the dataset." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/fetch_lfw_pairs/resize", + "name": "resize", + "qname": "sklearn.datasets._lfw.fetch_lfw_pairs.resize", + "default_value": "0.5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.5", + "description": "Ratio used to resize the each face picture." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/fetch_lfw_pairs/color", + "name": "color", + "qname": "sklearn.datasets._lfw.fetch_lfw_pairs.color", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Keep the 3 RGB channels instead of averaging them to a single\ngray level channel. If color is True the shape of the data has\none more dimension than the shape with color = False." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/fetch_lfw_pairs/slice_", + "name": "slice_", + "qname": "sklearn.datasets._lfw.fetch_lfw_pairs.slice_", + "default_value": "(slice(70, 195), slice(78, 172))", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "tuple of slice", + "default_value": "(slice(70, 195), slice(78, 172))", + "description": "Provide a custom 2D slice (height, width) to extract the\n'interesting' part of the jpeg files and avoid use statistical\ncorrelation from the background" + }, + "type": { + "kind": "NamedType", + "name": "tuple of slice" + } + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/fetch_lfw_pairs/download_if_missing", + "name": "download_if_missing", + "qname": "sklearn.datasets._lfw.fetch_lfw_pairs.download_if_missing", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If False, raise a IOError if the data is not locally available\ninstead of trying to download the data from the source site." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load the Labeled Faces in the Wild (LFW) pairs dataset (classification).\n\nDownload it if necessary.\n\n================= =======================\nClasses 2\nSamples total 13233\nDimensionality 5828\nFeatures real, between 0 and 255\n================= =======================\n\nIn the official `README.txt`_ this task is described as the\n\"Restricted\" task. As I am not sure as to implement the\n\"Unrestricted\" variant correctly, I left it as unsupported for now.\n\n .. _`README.txt`: http://vis-www.cs.umass.edu/lfw/README.txt\n\nThe original images are 250 x 250 pixels, but the default slice and resize\narguments reduce them to 62 x 47.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Load the Labeled Faces in the Wild (LFW) pairs dataset (classification).\n\nDownload it if necessary.\n\n================= =======================\nClasses 2\nSamples total 13233\nDimensionality 5828\nFeatures real, between 0 and 255\n================= =======================\n\nIn the official `README.txt`_ this task is described as the\n\"Restricted\" task. As I am not sure as to implement the\n\"Unrestricted\" variant correctly, I left it as unsupported for now.\n\n .. _`README.txt`: http://vis-www.cs.umass.edu/lfw/README.txt\n\nThe original images are 250 x 250 pixels, but the default slice and resize\narguments reduce them to 62 x 47.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nsubset : {'train', 'test', '10_folds'}, default='train'\n Select the dataset to load: 'train' for the development training\n set, 'test' for the development test set, and '10_folds' for the\n official evaluation set that is meant to be used with a 10-folds\n cross validation.\n\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By\n default all scikit-learn data is stored in '~/scikit_learn_data'\n subfolders.\n\nfunneled : bool, default=True\n Download and use the funneled variant of the dataset.\n\nresize : float, default=0.5\n Ratio used to resize the each face picture.\n\ncolor : bool, default=False\n Keep the 3 RGB channels instead of averaging them to a single\n gray level channel. If color is True the shape of the data has\n one more dimension than the shape with color = False.\n\nslice_ : tuple of slice, default=(slice(70, 195), slice(78, 172))\n Provide a custom 2D slice (height, width) to extract the\n 'interesting' part of the jpeg files and avoid use statistical\n correlation from the background\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray of shape (2200, 5828). Shape depends on ``subset``.\n Each row corresponds to 2 ravel'd face images\n of original size 62 x 47 pixels.\n Changing the ``slice_``, ``resize`` or ``subset`` parameters\n will change the shape of the output.\n pairs : ndarray of shape (2200, 2, 62, 47). Shape depends on ``subset``\n Each row has 2 face images corresponding\n to same or different person from the dataset\n containing 5749 people. Changing the ``slice_``,\n ``resize`` or ``subset`` parameters will change the shape of the\n output.\n target : numpy array of shape (2200,). Shape depends on ``subset``.\n Labels associated to each pair of images.\n The two label values being different persons or the same person.\n DESCR : string\n Description of the Labeled Faces in the Wild (LFW) dataset.", + "code": "@_deprecate_positional_args\ndef fetch_lfw_pairs(*, subset='train', data_home=None, funneled=True,\n resize=0.5,\n color=False, slice_=(slice(70, 195), slice(78, 172)),\n download_if_missing=True):\n \"\"\"Load the Labeled Faces in the Wild (LFW) pairs dataset (classification).\n\n Download it if necessary.\n\n ================= =======================\n Classes 2\n Samples total 13233\n Dimensionality 5828\n Features real, between 0 and 255\n ================= =======================\n\n In the official `README.txt`_ this task is described as the\n \"Restricted\" task. As I am not sure as to implement the\n \"Unrestricted\" variant correctly, I left it as unsupported for now.\n\n .. _`README.txt`: http://vis-www.cs.umass.edu/lfw/README.txt\n\n The original images are 250 x 250 pixels, but the default slice and resize\n arguments reduce them to 62 x 47.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n subset : {'train', 'test', '10_folds'}, default='train'\n Select the dataset to load: 'train' for the development training\n set, 'test' for the development test set, and '10_folds' for the\n official evaluation set that is meant to be used with a 10-folds\n cross validation.\n\n data_home : str, default=None\n Specify another download and cache folder for the datasets. By\n default all scikit-learn data is stored in '~/scikit_learn_data'\n subfolders.\n\n funneled : bool, default=True\n Download and use the funneled variant of the dataset.\n\n resize : float, default=0.5\n Ratio used to resize the each face picture.\n\n color : bool, default=False\n Keep the 3 RGB channels instead of averaging them to a single\n gray level channel. If color is True the shape of the data has\n one more dimension than the shape with color = False.\n\n slice_ : tuple of slice, default=(slice(70, 195), slice(78, 172))\n Provide a custom 2D slice (height, width) to extract the\n 'interesting' part of the jpeg files and avoid use statistical\n correlation from the background\n\n download_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\n Returns\n -------\n data : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray of shape (2200, 5828). Shape depends on ``subset``.\n Each row corresponds to 2 ravel'd face images\n of original size 62 x 47 pixels.\n Changing the ``slice_``, ``resize`` or ``subset`` parameters\n will change the shape of the output.\n pairs : ndarray of shape (2200, 2, 62, 47). Shape depends on ``subset``\n Each row has 2 face images corresponding\n to same or different person from the dataset\n containing 5749 people. Changing the ``slice_``,\n ``resize`` or ``subset`` parameters will change the shape of the\n output.\n target : numpy array of shape (2200,). Shape depends on ``subset``.\n Labels associated to each pair of images.\n The two label values being different persons or the same person.\n DESCR : string\n Description of the Labeled Faces in the Wild (LFW) dataset.\n\n \"\"\"\n lfw_home, data_folder_path = _check_fetch_lfw(\n data_home=data_home, funneled=funneled,\n download_if_missing=download_if_missing)\n logger.debug('Loading %s LFW pairs from %s', subset, lfw_home)\n\n # wrap the loader in a memoizing function that will return memmaped data\n # arrays for optimal memory usage\n if parse_version(joblib.__version__) < parse_version('0.12'):\n # Deal with change of API in joblib\n m = Memory(cachedir=lfw_home, compress=6, verbose=0)\n else:\n m = Memory(location=lfw_home, compress=6, verbose=0)\n load_func = m.cache(_fetch_lfw_pairs)\n\n # select the right metadata file according to the requested subset\n label_filenames = {\n 'train': 'pairsDevTrain.txt',\n 'test': 'pairsDevTest.txt',\n '10_folds': 'pairs.txt',\n }\n if subset not in label_filenames:\n raise ValueError(\"subset='%s' is invalid: should be one of %r\" % (\n subset, list(sorted(label_filenames.keys()))))\n index_file_path = join(lfw_home, label_filenames[subset])\n\n # load and memoize the pairs as np arrays\n pairs, target, target_names = load_func(\n index_file_path, data_folder_path, resize=resize, color=color,\n slice_=slice_)\n\n module_path = dirname(__file__)\n with open(join(module_path, 'descr', 'lfw.rst')) as rst_file:\n fdescr = rst_file.read()\n\n # pack the results as a Bunch instance\n return Bunch(data=pairs.reshape(len(pairs), -1), pairs=pairs,\n target=target, target_names=target_names,\n DESCR=fdescr)" + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/fetch_lfw_people", + "name": "fetch_lfw_people", + "qname": "sklearn.datasets._lfw.fetch_lfw_people", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._lfw/fetch_lfw_people/data_home", + "name": "data_home", + "qname": "sklearn.datasets._lfw.fetch_lfw_people.data_home", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Specify another download and cache folder for the datasets. By default\nall scikit-learn data is stored in '~/scikit_learn_data' subfolders." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/fetch_lfw_people/funneled", + "name": "funneled", + "qname": "sklearn.datasets._lfw.fetch_lfw_people.funneled", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Download and use the funneled variant of the dataset." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/fetch_lfw_people/resize", + "name": "resize", + "qname": "sklearn.datasets._lfw.fetch_lfw_people.resize", + "default_value": "0.5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.5", + "description": "Ratio used to resize the each face picture." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/fetch_lfw_people/min_faces_per_person", + "name": "min_faces_per_person", + "qname": "sklearn.datasets._lfw.fetch_lfw_people.min_faces_per_person", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The extracted dataset will only retain pictures of people that have at\nleast `min_faces_per_person` different pictures." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/fetch_lfw_people/color", + "name": "color", + "qname": "sklearn.datasets._lfw.fetch_lfw_people.color", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Keep the 3 RGB channels instead of averaging them to a single\ngray level channel. If color is True the shape of the data has\none more dimension than the shape with color = False." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/fetch_lfw_people/slice_", + "name": "slice_", + "qname": "sklearn.datasets._lfw.fetch_lfw_people.slice_", + "default_value": "(slice(70, 195), slice(78, 172))", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "tuple of slice", + "default_value": "(slice(70, 195), slice(78, 172))", + "description": "Provide a custom 2D slice (height, width) to extract the\n'interesting' part of the jpeg files and avoid use statistical\ncorrelation from the background" + }, + "type": { + "kind": "NamedType", + "name": "tuple of slice" + } + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/fetch_lfw_people/download_if_missing", + "name": "download_if_missing", + "qname": "sklearn.datasets._lfw.fetch_lfw_people.download_if_missing", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If False, raise a IOError if the data is not locally available\ninstead of trying to download the data from the source site." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._lfw/fetch_lfw_people/return_X_y", + "name": "return_X_y", + "qname": "sklearn.datasets._lfw.fetch_lfw_people.return_X_y", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch\nobject. See below for more information about the `dataset.data` and\n`dataset.target` object.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load the Labeled Faces in the Wild (LFW) people dataset (classification).\n\nDownload it if necessary.\n\n================= =======================\nClasses 5749\nSamples total 13233\nDimensionality 5828\nFeatures real, between 0 and 255\n================= =======================\n\nRead more in the :ref:`User Guide `.", + "docstring": "Load the Labeled Faces in the Wild (LFW) people dataset (classification).\n\nDownload it if necessary.\n\n================= =======================\nClasses 5749\nSamples total 13233\nDimensionality 5828\nFeatures real, between 0 and 255\n================= =======================\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\nfunneled : bool, default=True\n Download and use the funneled variant of the dataset.\n\nresize : float, default=0.5\n Ratio used to resize the each face picture.\n\nmin_faces_per_person : int, default=None\n The extracted dataset will only retain pictures of people that have at\n least `min_faces_per_person` different pictures.\n\ncolor : bool, default=False\n Keep the 3 RGB channels instead of averaging them to a single\n gray level channel. If color is True the shape of the data has\n one more dimension than the shape with color = False.\n\nslice_ : tuple of slice, default=(slice(70, 195), slice(78, 172))\n Provide a custom 2D slice (height, width) to extract the\n 'interesting' part of the jpeg files and avoid use statistical\n correlation from the background\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nreturn_X_y : bool, default=False\n If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch\n object. See below for more information about the `dataset.data` and\n `dataset.target` object.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ndataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : numpy array of shape (13233, 2914)\n Each row corresponds to a ravelled face image\n of original size 62 x 47 pixels.\n Changing the ``slice_`` or resize parameters will change the\n shape of the output.\n images : numpy array of shape (13233, 62, 47)\n Each row is a face image corresponding to one of the 5749 people in\n the dataset. Changing the ``slice_``\n or resize parameters will change the shape of the output.\n target : numpy array of shape (13233,)\n Labels associated to each face image.\n Those labels range from 0-5748 and correspond to the person IDs.\n DESCR : string\n Description of the Labeled Faces in the Wild (LFW) dataset.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20", + "code": "@_deprecate_positional_args\ndef fetch_lfw_people(*, data_home=None, funneled=True, resize=0.5,\n min_faces_per_person=0, color=False,\n slice_=(slice(70, 195), slice(78, 172)),\n download_if_missing=True, return_X_y=False):\n \"\"\"Load the Labeled Faces in the Wild (LFW) people dataset \\\n(classification).\n\n Download it if necessary.\n\n ================= =======================\n Classes 5749\n Samples total 13233\n Dimensionality 5828\n Features real, between 0 and 255\n ================= =======================\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n data_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\n funneled : bool, default=True\n Download and use the funneled variant of the dataset.\n\n resize : float, default=0.5\n Ratio used to resize the each face picture.\n\n min_faces_per_person : int, default=None\n The extracted dataset will only retain pictures of people that have at\n least `min_faces_per_person` different pictures.\n\n color : bool, default=False\n Keep the 3 RGB channels instead of averaging them to a single\n gray level channel. If color is True the shape of the data has\n one more dimension than the shape with color = False.\n\n slice_ : tuple of slice, default=(slice(70, 195), slice(78, 172))\n Provide a custom 2D slice (height, width) to extract the\n 'interesting' part of the jpeg files and avoid use statistical\n correlation from the background\n\n download_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\n return_X_y : bool, default=False\n If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch\n object. See below for more information about the `dataset.data` and\n `dataset.target` object.\n\n .. versionadded:: 0.20\n\n Returns\n -------\n dataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : numpy array of shape (13233, 2914)\n Each row corresponds to a ravelled face image\n of original size 62 x 47 pixels.\n Changing the ``slice_`` or resize parameters will change the\n shape of the output.\n images : numpy array of shape (13233, 62, 47)\n Each row is a face image corresponding to one of the 5749 people in\n the dataset. Changing the ``slice_``\n or resize parameters will change the shape of the output.\n target : numpy array of shape (13233,)\n Labels associated to each face image.\n Those labels range from 0-5748 and correspond to the person IDs.\n DESCR : string\n Description of the Labeled Faces in the Wild (LFW) dataset.\n\n (data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20\n\n \"\"\"\n lfw_home, data_folder_path = _check_fetch_lfw(\n data_home=data_home, funneled=funneled,\n download_if_missing=download_if_missing)\n logger.debug('Loading LFW people faces from %s', lfw_home)\n\n # wrap the loader in a memoizing function that will return memmaped data\n # arrays for optimal memory usage\n if parse_version(joblib.__version__) < parse_version('0.12'):\n # Deal with change of API in joblib\n m = Memory(cachedir=lfw_home, compress=6, verbose=0)\n else:\n m = Memory(location=lfw_home, compress=6, verbose=0)\n load_func = m.cache(_fetch_lfw_people)\n\n # load and memoize the pairs as np arrays\n faces, target, target_names = load_func(\n data_folder_path, resize=resize,\n min_faces_per_person=min_faces_per_person, color=color, slice_=slice_)\n\n X = faces.reshape(len(faces), -1)\n\n module_path = dirname(__file__)\n with open(join(module_path, 'descr', 'lfw.rst')) as rst_file:\n fdescr = rst_file.read()\n\n if return_X_y:\n return X, target\n\n # pack the results as a Bunch instance\n return Bunch(data=X, images=faces,\n target=target, target_names=target_names,\n DESCR=fdescr)" + }, + { + "id": "scikit-learn/sklearn.datasets._olivetti_faces/fetch_olivetti_faces", + "name": "fetch_olivetti_faces", + "qname": "sklearn.datasets._olivetti_faces.fetch_olivetti_faces", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._olivetti_faces/fetch_olivetti_faces/data_home", + "name": "data_home", + "qname": "sklearn.datasets._olivetti_faces.fetch_olivetti_faces.data_home", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Specify another download and cache folder for the datasets. By default\nall scikit-learn data is stored in '~/scikit_learn_data' subfolders." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.datasets._olivetti_faces/fetch_olivetti_faces/shuffle", + "name": "shuffle", + "qname": "sklearn.datasets._olivetti_faces.fetch_olivetti_faces.shuffle", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True the order of the dataset is shuffled to avoid having\nimages of the same person grouped." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._olivetti_faces/fetch_olivetti_faces/random_state", + "name": "random_state", + "qname": "sklearn.datasets._olivetti_faces.fetch_olivetti_faces.random_state", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "0", + "description": "Determines random number generation for dataset shuffling. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._olivetti_faces/fetch_olivetti_faces/download_if_missing", + "name": "download_if_missing", + "qname": "sklearn.datasets._olivetti_faces.fetch_olivetti_faces.download_if_missing", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If False, raise a IOError if the data is not locally available\ninstead of trying to download the data from the source site." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._olivetti_faces/fetch_olivetti_faces/return_X_y", + "name": "return_X_y", + "qname": "sklearn.datasets._olivetti_faces.fetch_olivetti_faces.return_X_y", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, returns `(data, target)` instead of a `Bunch` object. See\nbelow for more information about the `data` and `target` object.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load the Olivetti faces data-set from AT&T (classification).\n\nDownload it if necessary.\n\n================= =====================\nClasses 40\nSamples total 400\nDimensionality 4096\nFeatures real, between 0 and 1\n================= =====================\n\nRead more in the :ref:`User Guide `.", + "docstring": "Load the Olivetti faces data-set from AT&T (classification).\n\nDownload it if necessary.\n\n================= =====================\nClasses 40\nSamples total 400\nDimensionality 4096\nFeatures real, between 0 and 1\n================= =====================\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\nshuffle : bool, default=False\n If True the order of the dataset is shuffled to avoid having\n images of the same person grouped.\n\nrandom_state : int, RandomState instance or None, default=0\n Determines random number generation for dataset shuffling. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nreturn_X_y : bool, default=False\n If True, returns `(data, target)` instead of a `Bunch` object. See\n below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.22\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data: ndarray, shape (400, 4096)\n Each row corresponds to a ravelled\n face image of original size 64 x 64 pixels.\n images : ndarray, shape (400, 64, 64)\n Each row is a face image\n corresponding to one of the 40 subjects of the dataset.\n target : ndarray, shape (400,)\n Labels associated to each face image.\n Those labels are ranging from 0-39 and correspond to the\n Subject IDs.\n DESCR : str\n Description of the modified Olivetti Faces Dataset.\n\n(data, target) : tuple if `return_X_y=True`\n .. versionadded:: 0.22", + "code": "@_deprecate_positional_args\ndef fetch_olivetti_faces(*, data_home=None, shuffle=False, random_state=0,\n download_if_missing=True, return_X_y=False):\n \"\"\"Load the Olivetti faces data-set from AT&T (classification).\n\n Download it if necessary.\n\n ================= =====================\n Classes 40\n Samples total 400\n Dimensionality 4096\n Features real, between 0 and 1\n ================= =====================\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n data_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\n shuffle : bool, default=False\n If True the order of the dataset is shuffled to avoid having\n images of the same person grouped.\n\n random_state : int, RandomState instance or None, default=0\n Determines random number generation for dataset shuffling. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n download_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\n return_X_y : bool, default=False\n If True, returns `(data, target)` instead of a `Bunch` object. See\n below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.22\n\n Returns\n -------\n data : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data: ndarray, shape (400, 4096)\n Each row corresponds to a ravelled\n face image of original size 64 x 64 pixels.\n images : ndarray, shape (400, 64, 64)\n Each row is a face image\n corresponding to one of the 40 subjects of the dataset.\n target : ndarray, shape (400,)\n Labels associated to each face image.\n Those labels are ranging from 0-39 and correspond to the\n Subject IDs.\n DESCR : str\n Description of the modified Olivetti Faces Dataset.\n\n (data, target) : tuple if `return_X_y=True`\n .. versionadded:: 0.22\n \"\"\"\n data_home = get_data_home(data_home=data_home)\n if not exists(data_home):\n makedirs(data_home)\n filepath = _pkl_filepath(data_home, 'olivetti.pkz')\n if not exists(filepath):\n if not download_if_missing:\n raise IOError(\"Data not found and `download_if_missing` is False\")\n\n print('downloading Olivetti faces from %s to %s'\n % (FACES.url, data_home))\n mat_path = _fetch_remote(FACES, dirname=data_home)\n mfile = loadmat(file_name=mat_path)\n # delete raw .mat data\n remove(mat_path)\n\n faces = mfile['faces'].T.copy()\n joblib.dump(faces, filepath, compress=6)\n del mfile\n else:\n faces = joblib.load(filepath)\n\n # We want floating point data, but float32 is enough (there is only\n # one byte of precision in the original uint8s anyway)\n faces = np.float32(faces)\n faces = faces - faces.min()\n faces /= faces.max()\n faces = faces.reshape((400, 64, 64)).transpose(0, 2, 1)\n # 10 images per class, 400 images total, each class is contiguous.\n target = np.array([i // 10 for i in range(400)])\n if shuffle:\n random_state = check_random_state(random_state)\n order = random_state.permutation(len(faces))\n faces = faces[order]\n target = target[order]\n faces_vectorized = faces.reshape(len(faces), -1)\n\n module_path = dirname(__file__)\n with open(join(module_path, 'descr', 'olivetti_faces.rst')) as rst_file:\n fdescr = rst_file.read()\n\n if return_X_y:\n return faces_vectorized, target\n\n return Bunch(data=faces_vectorized,\n images=faces,\n target=target,\n DESCR=fdescr)" + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_convert_arff_data", + "name": "_convert_arff_data", + "qname": "sklearn.datasets._openml._convert_arff_data", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._openml/_convert_arff_data/arff", + "name": "arff", + "qname": "sklearn.datasets._openml._convert_arff_data.arff", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "", + "description": "As obtained from liac-arff object." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_convert_arff_data/col_slice_x", + "name": "col_slice_x", + "qname": "sklearn.datasets._openml._convert_arff_data.col_slice_x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list", + "default_value": "", + "description": "The column indices that are sliced from the original array to return\nas X data" + }, + "type": { + "kind": "NamedType", + "name": "list" + } + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_convert_arff_data/col_slice_y", + "name": "col_slice_y", + "qname": "sklearn.datasets._openml._convert_arff_data.col_slice_y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list", + "default_value": "", + "description": "The column indices that are sliced from the original array to return\nas y data" + }, + "type": { + "kind": "NamedType", + "name": "list" + } + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_convert_arff_data/shape", + "name": "shape", + "qname": "sklearn.datasets._openml._convert_arff_data.shape", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "converts the arff object into the appropriate matrix type (np.array or\nscipy.sparse.csr_matrix) based on the 'data part' (i.e., in the\nliac-arff dict, the object from the 'data' key)", + "docstring": "converts the arff object into the appropriate matrix type (np.array or\nscipy.sparse.csr_matrix) based on the 'data part' (i.e., in the\nliac-arff dict, the object from the 'data' key)\n\nParameters\n----------\narff : dict\n As obtained from liac-arff object.\n\ncol_slice_x : list\n The column indices that are sliced from the original array to return\n as X data\n\ncol_slice_y : list\n The column indices that are sliced from the original array to return\n as y data\n\nReturns\n-------\nX : np.array or scipy.sparse.csr_matrix\ny : np.array", + "code": "def _convert_arff_data(\n arff: ArffContainerType,\n col_slice_x: List[int],\n col_slice_y: List[int],\n shape: Optional[Tuple] = None\n) -> Tuple:\n \"\"\"\n converts the arff object into the appropriate matrix type (np.array or\n scipy.sparse.csr_matrix) based on the 'data part' (i.e., in the\n liac-arff dict, the object from the 'data' key)\n\n Parameters\n ----------\n arff : dict\n As obtained from liac-arff object.\n\n col_slice_x : list\n The column indices that are sliced from the original array to return\n as X data\n\n col_slice_y : list\n The column indices that are sliced from the original array to return\n as y data\n\n Returns\n -------\n X : np.array or scipy.sparse.csr_matrix\n y : np.array\n \"\"\"\n arff_data = arff['data']\n if isinstance(arff_data, Generator):\n if shape is None:\n raise ValueError(\n \"shape must be provided when arr['data'] is a Generator\"\n )\n if shape[0] == -1:\n count = -1\n else:\n count = shape[0] * shape[1]\n data = np.fromiter(itertools.chain.from_iterable(arff_data),\n dtype='float64', count=count)\n data = data.reshape(*shape)\n X = data[:, col_slice_x]\n y = data[:, col_slice_y]\n return X, y\n elif isinstance(arff_data, tuple):\n arff_data_X = _split_sparse_columns(arff_data, col_slice_x)\n num_obs = max(arff_data[1]) + 1\n X_shape = (num_obs, len(col_slice_x))\n X = scipy.sparse.coo_matrix(\n (arff_data_X[0], (arff_data_X[1], arff_data_X[2])),\n shape=X_shape, dtype=np.float64)\n X = X.tocsr()\n y = _sparse_data_to_array(arff_data, col_slice_y)\n return X, y\n else:\n # This should never happen\n raise ValueError('Unexpected Data Type obtained from arff.')" + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_convert_arff_data_dataframe", + "name": "_convert_arff_data_dataframe", + "qname": "sklearn.datasets._openml._convert_arff_data_dataframe", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._openml/_convert_arff_data_dataframe/arff", + "name": "arff", + "qname": "sklearn.datasets._openml._convert_arff_data_dataframe.arff", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "", + "description": "As obtained from liac-arff object." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_convert_arff_data_dataframe/columns", + "name": "columns", + "qname": "sklearn.datasets._openml._convert_arff_data_dataframe.columns", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list", + "default_value": "", + "description": "Columns from dataframe to return." + }, + "type": { + "kind": "NamedType", + "name": "list" + } + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_convert_arff_data_dataframe/features_dict", + "name": "features_dict", + "qname": "sklearn.datasets._openml._convert_arff_data_dataframe.features_dict", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "", + "description": "Maps feature name to feature info from openml." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Convert the ARFF object into a pandas DataFrame.", + "docstring": "Convert the ARFF object into a pandas DataFrame.\n\nParameters\n----------\narff : dict\n As obtained from liac-arff object.\n\ncolumns : list\n Columns from dataframe to return.\n\nfeatures_dict : dict\n Maps feature name to feature info from openml.\n\nReturns\n-------\nresult : tuple\n tuple with the resulting dataframe", + "code": "def _convert_arff_data_dataframe(\n arff: ArffContainerType, columns: List, features_dict: Dict[str, Any]\n) -> Tuple:\n \"\"\"Convert the ARFF object into a pandas DataFrame.\n\n Parameters\n ----------\n arff : dict\n As obtained from liac-arff object.\n\n columns : list\n Columns from dataframe to return.\n\n features_dict : dict\n Maps feature name to feature info from openml.\n\n Returns\n -------\n result : tuple\n tuple with the resulting dataframe\n \"\"\"\n pd = check_pandas_support('fetch_openml with as_frame=True')\n\n attributes = OrderedDict(arff['attributes'])\n arff_columns = list(attributes)\n\n if not isinstance(arff['data'], Generator):\n raise ValueError(\n \"arff['data'] must be a generator when converting to pd.DataFrame.\"\n )\n\n # calculate chunksize\n first_row = next(arff['data'])\n first_df = pd.DataFrame([first_row], columns=arff_columns)\n\n row_bytes = first_df.memory_usage(deep=True).sum()\n chunksize = get_chunk_n_rows(row_bytes)\n\n # read arff data with chunks\n columns_to_keep = [col for col in arff_columns if col in columns]\n dfs = []\n dfs.append(first_df[columns_to_keep])\n for data in _chunk_generator(arff['data'], chunksize):\n dfs.append(pd.DataFrame(data, columns=arff_columns)[columns_to_keep])\n df = pd.concat(dfs, ignore_index=True)\n\n for column in columns_to_keep:\n dtype = _feature_to_dtype(features_dict[column])\n if dtype == 'category':\n dtype = pd.api.types.CategoricalDtype(attributes[column])\n df[column] = df[column].astype(dtype, copy=False)\n return (df, )" + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_download_data_to_bunch", + "name": "_download_data_to_bunch", + "qname": "sklearn.datasets._openml._download_data_to_bunch", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._openml/_download_data_to_bunch/url", + "name": "url", + "qname": "sklearn.datasets._openml._download_data_to_bunch.url", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_download_data_to_bunch/sparse", + "name": "sparse", + "qname": "sklearn.datasets._openml._download_data_to_bunch.sparse", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_download_data_to_bunch/data_home", + "name": "data_home", + "qname": "sklearn.datasets._openml._download_data_to_bunch.data_home", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_download_data_to_bunch/as_frame", + "name": "as_frame", + "qname": "sklearn.datasets._openml._download_data_to_bunch.as_frame", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_download_data_to_bunch/features_list", + "name": "features_list", + "qname": "sklearn.datasets._openml._download_data_to_bunch.features_list", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_download_data_to_bunch/data_columns", + "name": "data_columns", + "qname": "sklearn.datasets._openml._download_data_to_bunch.data_columns", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_download_data_to_bunch/target_columns", + "name": "target_columns", + "qname": "sklearn.datasets._openml._download_data_to_bunch.target_columns", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_download_data_to_bunch/shape", + "name": "shape", + "qname": "sklearn.datasets._openml._download_data_to_bunch.shape", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_download_data_to_bunch/md5_checksum", + "name": "md5_checksum", + "qname": "sklearn.datasets._openml._download_data_to_bunch.md5_checksum", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Download OpenML ARFF and convert to Bunch of data", + "docstring": "Download OpenML ARFF and convert to Bunch of data\n ", + "code": "def _download_data_to_bunch(\n url: str,\n sparse: bool,\n data_home: Optional[str],\n *,\n as_frame: bool,\n features_list: List,\n data_columns: List[int],\n target_columns: List,\n shape: Optional[Tuple[int, int]],\n md5_checksum: str\n):\n \"\"\"Download OpenML ARFF and convert to Bunch of data\n \"\"\"\n # NB: this function is long in order to handle retry for any failure\n # during the streaming parse of the ARFF.\n\n # Prepare which columns and data types should be returned for the X and y\n features_dict = {feature['name']: feature for feature in features_list}\n\n # XXX: col_slice_y should be all nominal or all numeric\n _verify_target_data_type(features_dict, target_columns)\n\n col_slice_y = [int(features_dict[col_name]['index'])\n for col_name in target_columns]\n\n col_slice_x = [int(features_dict[col_name]['index'])\n for col_name in data_columns]\n for col_idx in col_slice_y:\n feat = features_list[col_idx]\n nr_missing = int(feat['number_of_missing_values'])\n if nr_missing > 0:\n raise ValueError('Target column {} has {} missing values. '\n 'Missing values are not supported for target '\n 'columns. '.format(feat['name'], nr_missing))\n\n # Access an ARFF file on the OpenML server. Documentation:\n # https://www.openml.org/api_data_docs#!/data/get_download_id\n\n if sparse is True:\n return_type = _arff.COO\n else:\n return_type = _arff.DENSE_GEN\n\n frame = nominal_attributes = None\n\n parse_arff: Callable\n postprocess: Callable\n if as_frame:\n columns = data_columns + target_columns\n parse_arff = partial(_convert_arff_data_dataframe, columns=columns,\n features_dict=features_dict)\n\n def postprocess(frame):\n X = frame[data_columns]\n if len(target_columns) >= 2:\n y = frame[target_columns]\n elif len(target_columns) == 1:\n y = frame[target_columns[0]]\n else:\n y = None\n return X, y, frame, nominal_attributes\n else:\n def parse_arff(arff):\n X, y = _convert_arff_data(arff, col_slice_x, col_slice_y, shape)\n # nominal attributes is a dict mapping from the attribute name to\n # the possible values. Includes also the target column (which will\n # be popped off below, before it will be packed in the Bunch\n # object)\n nominal_attributes = {k: v for k, v in arff['attributes']\n if isinstance(v, list) and\n k in data_columns + target_columns}\n return X, y, nominal_attributes\n\n def postprocess(X, y, nominal_attributes):\n is_classification = {col_name in nominal_attributes\n for col_name in target_columns}\n if not is_classification:\n # No target\n pass\n elif all(is_classification):\n y = np.hstack([\n np.take(\n np.asarray(nominal_attributes.pop(col_name),\n dtype='O'),\n y[:, i:i + 1].astype(int, copy=False))\n for i, col_name in enumerate(target_columns)\n ])\n elif any(is_classification):\n raise ValueError('Mix of nominal and non-nominal targets is '\n 'not currently supported')\n\n # reshape y back to 1-D array, if there is only 1 target column;\n # back to None if there are not target columns\n if y.shape[1] == 1:\n y = y.reshape((-1,))\n elif y.shape[1] == 0:\n y = None\n return X, y, frame, nominal_attributes\n\n out = _retry_with_clean_cache(url, data_home)(\n _load_arff_response)(url, data_home,\n return_type=return_type,\n encode_nominal=not as_frame,\n parse_arff=parse_arff,\n md5_checksum=md5_checksum)\n X, y, frame, nominal_attributes = postprocess(*out)\n\n return Bunch(data=X, target=y, frame=frame,\n categories=nominal_attributes,\n feature_names=data_columns,\n target_names=target_columns)" + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_feature_to_dtype", + "name": "_feature_to_dtype", + "qname": "sklearn.datasets._openml._feature_to_dtype", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._openml/_feature_to_dtype/feature", + "name": "feature", + "qname": "sklearn.datasets._openml._feature_to_dtype.feature", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Map feature to dtype for pandas DataFrame", + "docstring": "Map feature to dtype for pandas DataFrame\n ", + "code": "def _feature_to_dtype(feature: Dict[str, str]):\n \"\"\"Map feature to dtype for pandas DataFrame\n \"\"\"\n if feature['data_type'] == 'string':\n return object\n elif feature['data_type'] == 'nominal':\n return 'category'\n # only numeric, integer, real are left\n elif (feature['number_of_missing_values'] != '0' or\n feature['data_type'] in ['numeric', 'real']):\n # cast to floats when there are any missing values\n return np.float64\n elif feature['data_type'] == 'integer':\n return np.int64\n raise ValueError('Unsupported feature: {}'.format(feature))" + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_get_data_description_by_id", + "name": "_get_data_description_by_id", + "qname": "sklearn.datasets._openml._get_data_description_by_id", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._openml/_get_data_description_by_id/data_id", + "name": "data_id", + "qname": "sklearn.datasets._openml._get_data_description_by_id.data_id", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_get_data_description_by_id/data_home", + "name": "data_home", + "qname": "sklearn.datasets._openml._get_data_description_by_id.data_home", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _get_data_description_by_id(\n data_id: int, data_home: Optional[str]\n) -> Dict[str, Any]:\n # OpenML API function: https://www.openml.org/api_docs#!/data/get_data_id\n url = _DATA_INFO.format(data_id)\n error_message = \"Dataset with data_id {} not found.\".format(data_id)\n json_data = _get_json_content_from_openml_api(\n url, error_message, data_home=data_home\n )\n return json_data['data_set_description']" + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_get_data_features", + "name": "_get_data_features", + "qname": "sklearn.datasets._openml._get_data_features", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._openml/_get_data_features/data_id", + "name": "data_id", + "qname": "sklearn.datasets._openml._get_data_features.data_id", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_get_data_features/data_home", + "name": "data_home", + "qname": "sklearn.datasets._openml._get_data_features.data_home", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _get_data_features(\n data_id: int, data_home: Optional[str]\n) -> OpenmlFeaturesType:\n # OpenML function:\n # https://www.openml.org/api_docs#!/data/get_data_features_id\n url = _DATA_FEATURES.format(data_id)\n error_message = \"Dataset with data_id {} not found.\".format(data_id)\n json_data = _get_json_content_from_openml_api(\n url, error_message, data_home=data_home\n )\n return json_data['data_features']['feature']" + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_get_data_info_by_name", + "name": "_get_data_info_by_name", + "qname": "sklearn.datasets._openml._get_data_info_by_name", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._openml/_get_data_info_by_name/name", + "name": "name", + "qname": "sklearn.datasets._openml._get_data_info_by_name.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "", + "description": "name of the dataset" + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_get_data_info_by_name/version", + "name": "version", + "qname": "sklearn.datasets._openml._get_data_info_by_name.version", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or str", + "default_value": "", + "description": "If version is an integer, the exact name/version will be obtained from\nOpenML. If version is a string (value: \"active\") it will take the first\nversion from OpenML that is annotated as active. Any other string\nvalues except \"active\" are treated as integer." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "str" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_get_data_info_by_name/data_home", + "name": "data_home", + "qname": "sklearn.datasets._openml._get_data_info_by_name.data_home", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str or None", + "default_value": "", + "description": "Location to cache the response. None if no cache is required." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Utilizes the openml dataset listing api to find a dataset by\nname/version\nOpenML api function:\nhttps://www.openml.org/api_docs#!/data/get_data_list_data_name_data_name", + "docstring": "Utilizes the openml dataset listing api to find a dataset by\nname/version\nOpenML api function:\nhttps://www.openml.org/api_docs#!/data/get_data_list_data_name_data_name\n\nParameters\n----------\nname : str\n name of the dataset\n\nversion : int or str\n If version is an integer, the exact name/version will be obtained from\n OpenML. If version is a string (value: \"active\") it will take the first\n version from OpenML that is annotated as active. Any other string\n values except \"active\" are treated as integer.\n\ndata_home : str or None\n Location to cache the response. None if no cache is required.\n\nReturns\n-------\nfirst_dataset : json\n json representation of the first dataset object that adhired to the\n search criteria", + "code": "def _get_data_info_by_name(\n name: str, version: Union[int, str], data_home: Optional[str]\n):\n \"\"\"\n Utilizes the openml dataset listing api to find a dataset by\n name/version\n OpenML api function:\n https://www.openml.org/api_docs#!/data/get_data_list_data_name_data_name\n\n Parameters\n ----------\n name : str\n name of the dataset\n\n version : int or str\n If version is an integer, the exact name/version will be obtained from\n OpenML. If version is a string (value: \"active\") it will take the first\n version from OpenML that is annotated as active. Any other string\n values except \"active\" are treated as integer.\n\n data_home : str or None\n Location to cache the response. None if no cache is required.\n\n Returns\n -------\n first_dataset : json\n json representation of the first dataset object that adhired to the\n search criteria\n\n \"\"\"\n if version == \"active\":\n # situation in which we return the oldest active version\n url = _SEARCH_NAME.format(name) + \"/status/active/\"\n error_msg = \"No active dataset {} found.\".format(name)\n json_data = _get_json_content_from_openml_api(\n url, error_msg, data_home=data_home\n )\n res = json_data['data']['dataset']\n if len(res) > 1:\n warn(\"Multiple active versions of the dataset matching the name\"\n \" {name} exist. Versions may be fundamentally different, \"\n \"returning version\"\n \" {version}.\".format(name=name, version=res[0]['version']))\n return res[0]\n\n # an integer version has been provided\n url = (_SEARCH_NAME + \"/data_version/{}\").format(name, version)\n try:\n json_data = _get_json_content_from_openml_api(\n url, error_message=None, data_home=data_home\n )\n except OpenMLError:\n # we can do this in 1 function call if OpenML does not require the\n # specification of the dataset status (i.e., return datasets with a\n # given name / version regardless of active, deactivated, etc. )\n # TODO: feature request OpenML.\n url += \"/status/deactivated\"\n error_msg = \"Dataset {} with version {} not found.\".format(name,\n version)\n json_data = _get_json_content_from_openml_api(\n url, error_msg, data_home=data_home\n )\n\n return json_data['data']['dataset'][0]" + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_get_data_qualities", + "name": "_get_data_qualities", + "qname": "sklearn.datasets._openml._get_data_qualities", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._openml/_get_data_qualities/data_id", + "name": "data_id", + "qname": "sklearn.datasets._openml._get_data_qualities.data_id", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_get_data_qualities/data_home", + "name": "data_home", + "qname": "sklearn.datasets._openml._get_data_qualities.data_home", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _get_data_qualities(\n data_id: int, data_home: Optional[str]\n) -> OpenmlQualitiesType:\n # OpenML API function:\n # https://www.openml.org/api_docs#!/data/get_data_qualities_id\n url = _DATA_QUALITIES.format(data_id)\n error_message = \"Dataset with data_id {} not found.\".format(data_id)\n json_data = _get_json_content_from_openml_api(\n url, error_message, data_home=data_home\n )\n # the qualities might not be available, but we still try to process\n # the data\n return json_data.get('data_qualities', {}).get('quality', [])" + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_get_json_content_from_openml_api", + "name": "_get_json_content_from_openml_api", + "qname": "sklearn.datasets._openml._get_json_content_from_openml_api", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._openml/_get_json_content_from_openml_api/url", + "name": "url", + "qname": "sklearn.datasets._openml._get_json_content_from_openml_api.url", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "", + "description": "The URL to load from. Should be an official OpenML endpoint" + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_get_json_content_from_openml_api/error_message", + "name": "error_message", + "qname": "sklearn.datasets._openml._get_json_content_from_openml_api.error_message", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str or None", + "default_value": "", + "description": "The error message to raise if an acceptable OpenML error is thrown\n(acceptable error is, e.g., data id not found. Other errors, like 404's\nwill throw the native error message)" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_get_json_content_from_openml_api/data_home", + "name": "data_home", + "qname": "sklearn.datasets._openml._get_json_content_from_openml_api.data_home", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str or None", + "default_value": "", + "description": "Location to cache the response. None if no cache is required." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Loads json data from the openml api", + "docstring": "Loads json data from the openml api\n\nParameters\n----------\nurl : str\n The URL to load from. Should be an official OpenML endpoint\n\nerror_message : str or None\n The error message to raise if an acceptable OpenML error is thrown\n (acceptable error is, e.g., data id not found. Other errors, like 404's\n will throw the native error message)\n\ndata_home : str or None\n Location to cache the response. None if no cache is required.\n\nReturns\n-------\njson_data : json\n the json result from the OpenML server if the call was successful.\n An exception otherwise.", + "code": "def _get_json_content_from_openml_api(\n url: str,\n error_message: Optional[str],\n data_home: Optional[str]\n) -> Dict:\n \"\"\"\n Loads json data from the openml api\n\n Parameters\n ----------\n url : str\n The URL to load from. Should be an official OpenML endpoint\n\n error_message : str or None\n The error message to raise if an acceptable OpenML error is thrown\n (acceptable error is, e.g., data id not found. Other errors, like 404's\n will throw the native error message)\n\n data_home : str or None\n Location to cache the response. None if no cache is required.\n\n Returns\n -------\n json_data : json\n the json result from the OpenML server if the call was successful.\n An exception otherwise.\n \"\"\"\n\n @_retry_with_clean_cache(url, data_home)\n def _load_json():\n with closing(_open_openml_url(url, data_home)) as response:\n return json.loads(response.read().decode(\"utf-8\"))\n\n try:\n return _load_json()\n except HTTPError as error:\n # 412 is an OpenML specific error code, indicating a generic error\n # (e.g., data not found)\n if error.code != 412:\n raise error\n\n # 412 error, not in except for nicer traceback\n raise OpenMLError(error_message)" + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_get_local_path", + "name": "_get_local_path", + "qname": "sklearn.datasets._openml._get_local_path", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._openml/_get_local_path/openml_path", + "name": "openml_path", + "qname": "sklearn.datasets._openml._get_local_path.openml_path", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_get_local_path/data_home", + "name": "data_home", + "qname": "sklearn.datasets._openml._get_local_path.data_home", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _get_local_path(openml_path: str, data_home: str) -> str:\n return os.path.join(data_home, 'openml.org', openml_path + \".gz\")" + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_get_num_samples", + "name": "_get_num_samples", + "qname": "sklearn.datasets._openml._get_num_samples", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._openml/_get_num_samples/data_qualities", + "name": "data_qualities", + "qname": "sklearn.datasets._openml._get_num_samples.data_qualities", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list of dict", + "default_value": "", + "description": "Used to retrieve the number of instances (samples) in the dataset." + }, + "type": { + "kind": "NamedType", + "name": "list of dict" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Get the number of samples from data qualities.", + "docstring": "Get the number of samples from data qualities.\n\nParameters\n----------\ndata_qualities : list of dict\n Used to retrieve the number of instances (samples) in the dataset.\n\nReturns\n-------\nn_samples : int\n The number of samples in the dataset or -1 if data qualities are\n unavailable.", + "code": "def _get_num_samples(data_qualities: OpenmlQualitiesType) -> int:\n \"\"\"Get the number of samples from data qualities.\n\n Parameters\n ----------\n data_qualities : list of dict\n Used to retrieve the number of instances (samples) in the dataset.\n\n Returns\n -------\n n_samples : int\n The number of samples in the dataset or -1 if data qualities are\n unavailable.\n \"\"\"\n # If the data qualities are unavailable, we return -1\n default_n_samples = -1\n\n qualities = {d['name']: d['value'] for d in data_qualities}\n return int(float(qualities.get('NumberOfInstances', default_n_samples)))" + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_load_arff_response", + "name": "_load_arff_response", + "qname": "sklearn.datasets._openml._load_arff_response", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._openml/_load_arff_response/url", + "name": "url", + "qname": "sklearn.datasets._openml._load_arff_response.url", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_load_arff_response/data_home", + "name": "data_home", + "qname": "sklearn.datasets._openml._load_arff_response.data_home", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_load_arff_response/return_type", + "name": "return_type", + "qname": "sklearn.datasets._openml._load_arff_response.return_type", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_load_arff_response/encode_nominal", + "name": "encode_nominal", + "qname": "sklearn.datasets._openml._load_arff_response.encode_nominal", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_load_arff_response/parse_arff", + "name": "parse_arff", + "qname": "sklearn.datasets._openml._load_arff_response.parse_arff", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_load_arff_response/md5_checksum", + "name": "md5_checksum", + "qname": "sklearn.datasets._openml._load_arff_response.md5_checksum", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load arff data with url and parses arff response with parse_arff", + "docstring": "Load arff data with url and parses arff response with parse_arff", + "code": "def _load_arff_response(\n url: str,\n data_home: Optional[str],\n return_type, encode_nominal: bool,\n parse_arff: Callable[[ArffContainerType], Tuple],\n md5_checksum: str\n) -> Tuple:\n \"\"\"Load arff data with url and parses arff response with parse_arff\"\"\"\n response = _open_openml_url(url, data_home)\n\n with closing(response):\n # Note that if the data is dense, no reading is done until the data\n # generator is iterated.\n actual_md5_checksum = hashlib.md5()\n\n def _stream_checksum_generator(response):\n for line in response:\n actual_md5_checksum.update(line)\n yield line.decode('utf-8')\n\n stream = _stream_checksum_generator(response)\n\n arff = _arff.load(stream,\n return_type=return_type,\n encode_nominal=encode_nominal)\n\n parsed_arff = parse_arff(arff)\n\n # consume remaining stream, if early exited\n for _ in stream:\n pass\n\n if actual_md5_checksum.hexdigest() != md5_checksum:\n raise ValueError(\"md5 checksum of local file for \" + url +\n \" does not match description. \"\n \"Downloaded file could have been modified / \"\n \"corrupted, clean cache and retry...\")\n\n return parsed_arff" + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_open_openml_url", + "name": "_open_openml_url", + "qname": "sklearn.datasets._openml._open_openml_url", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._openml/_open_openml_url/openml_path", + "name": "openml_path", + "qname": "sklearn.datasets._openml._open_openml_url.openml_path", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "", + "description": "OpenML URL that will be accessed. This will be prefixes with\n_OPENML_PREFIX" + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_open_openml_url/data_home", + "name": "data_home", + "qname": "sklearn.datasets._openml._open_openml_url.data_home", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "", + "description": "Directory to which the files will be cached. If None, no caching will\nbe applied." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns a resource from OpenML.org. Caches it to data_home if required.", + "docstring": "Returns a resource from OpenML.org. Caches it to data_home if required.\n\nParameters\n----------\nopenml_path : str\n OpenML URL that will be accessed. This will be prefixes with\n _OPENML_PREFIX\n\ndata_home : str\n Directory to which the files will be cached. If None, no caching will\n be applied.\n\nReturns\n-------\nresult : stream\n A stream to the OpenML resource", + "code": "def _open_openml_url(openml_path: str, data_home: Optional[str]):\n \"\"\"\n Returns a resource from OpenML.org. Caches it to data_home if required.\n\n Parameters\n ----------\n openml_path : str\n OpenML URL that will be accessed. This will be prefixes with\n _OPENML_PREFIX\n\n data_home : str\n Directory to which the files will be cached. If None, no caching will\n be applied.\n\n Returns\n -------\n result : stream\n A stream to the OpenML resource\n \"\"\"\n def is_gzip_encoded(_fsrc):\n return _fsrc.info().get('Content-Encoding', '') == 'gzip'\n\n req = Request(_OPENML_PREFIX + openml_path)\n req.add_header('Accept-encoding', 'gzip')\n\n if data_home is None:\n fsrc = urlopen(req)\n if is_gzip_encoded(fsrc):\n return gzip.GzipFile(fileobj=fsrc, mode='rb')\n return fsrc\n\n local_path = _get_local_path(openml_path, data_home)\n if not os.path.exists(local_path):\n try:\n os.makedirs(os.path.dirname(local_path))\n except OSError:\n # potentially, the directory has been created already\n pass\n\n try:\n with closing(urlopen(req)) as fsrc:\n opener: Callable\n if is_gzip_encoded(fsrc):\n opener = open\n else:\n opener = gzip.GzipFile\n with opener(local_path, 'wb') as fdst:\n shutil.copyfileobj(fsrc, fdst)\n except Exception:\n if os.path.exists(local_path):\n os.unlink(local_path)\n raise\n\n # XXX: First time, decompression will not be necessary (by using fsrc), but\n # it will happen nonetheless\n return gzip.GzipFile(local_path, 'rb')" + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_retry_with_clean_cache", + "name": "_retry_with_clean_cache", + "qname": "sklearn.datasets._openml._retry_with_clean_cache", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._openml/_retry_with_clean_cache/openml_path", + "name": "openml_path", + "qname": "sklearn.datasets._openml._retry_with_clean_cache.openml_path", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_retry_with_clean_cache/data_home", + "name": "data_home", + "qname": "sklearn.datasets._openml._retry_with_clean_cache.data_home", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "If the first call to the decorated function fails, the local cached\nfile is removed, and the function is called again. If ``data_home`` is\n``None``, then the function is called once.", + "docstring": "If the first call to the decorated function fails, the local cached\nfile is removed, and the function is called again. If ``data_home`` is\n``None``, then the function is called once.", + "code": "def _retry_with_clean_cache(\n openml_path: str, data_home: Optional[str]\n) -> Callable:\n \"\"\"If the first call to the decorated function fails, the local cached\n file is removed, and the function is called again. If ``data_home`` is\n ``None``, then the function is called once.\n \"\"\"\n def decorator(f):\n @wraps(f)\n def wrapper(*args, **kw):\n if data_home is None:\n return f(*args, **kw)\n try:\n return f(*args, **kw)\n except HTTPError:\n raise\n except Exception:\n warn(\"Invalid cache, redownloading file\", RuntimeWarning)\n local_path = _get_local_path(openml_path, data_home)\n if os.path.exists(local_path):\n os.unlink(local_path)\n return f(*args, **kw)\n return wrapper\n return decorator" + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_sparse_data_to_array", + "name": "_sparse_data_to_array", + "qname": "sklearn.datasets._openml._sparse_data_to_array", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._openml/_sparse_data_to_array/arff_data", + "name": "arff_data", + "qname": "sklearn.datasets._openml._sparse_data_to_array.arff_data", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_sparse_data_to_array/include_columns", + "name": "include_columns", + "qname": "sklearn.datasets._openml._sparse_data_to_array.include_columns", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _sparse_data_to_array(\n arff_data: ArffSparseDataType, include_columns: List\n) -> np.ndarray:\n # turns the sparse data back into an array (can't use toarray() function,\n # as this does only work on numeric data)\n num_obs = max(arff_data[1]) + 1\n y_shape = (num_obs, len(include_columns))\n reindexed_columns = {column_idx: array_idx for array_idx, column_idx\n in enumerate(include_columns)}\n # TODO: improve for efficiency\n y = np.empty(y_shape, dtype=np.float64)\n for val, row_idx, col_idx in zip(arff_data[0], arff_data[1], arff_data[2]):\n if col_idx in include_columns:\n y[row_idx, reindexed_columns[col_idx]] = val\n return y" + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_split_sparse_columns", + "name": "_split_sparse_columns", + "qname": "sklearn.datasets._openml._split_sparse_columns", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._openml/_split_sparse_columns/arff_data", + "name": "arff_data", + "qname": "sklearn.datasets._openml._split_sparse_columns.arff_data", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "tuple", + "default_value": "", + "description": "A tuple of three lists of equal size; first list indicating the value,\nsecond the x coordinate and the third the y coordinate." + }, + "type": { + "kind": "NamedType", + "name": "tuple" + } + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_split_sparse_columns/include_columns", + "name": "include_columns", + "qname": "sklearn.datasets._openml._split_sparse_columns.include_columns", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list", + "default_value": "", + "description": "A list of columns to include." + }, + "type": { + "kind": "NamedType", + "name": "list" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "obtains several columns from sparse arff representation. Additionally, the\ncolumn indices are re-labelled, given the columns that are not included.\n(e.g., when including [1, 2, 3], the columns will be relabelled to\n[0, 1, 2])", + "docstring": "obtains several columns from sparse arff representation. Additionally, the\ncolumn indices are re-labelled, given the columns that are not included.\n(e.g., when including [1, 2, 3], the columns will be relabelled to\n[0, 1, 2])\n\nParameters\n----------\narff_data : tuple\n A tuple of three lists of equal size; first list indicating the value,\n second the x coordinate and the third the y coordinate.\n\ninclude_columns : list\n A list of columns to include.\n\nReturns\n-------\narff_data_new : tuple\n Subset of arff data with only the include columns indicated by the\n include_columns argument.", + "code": "def _split_sparse_columns(\n arff_data: ArffSparseDataType, include_columns: List\n) -> ArffSparseDataType:\n \"\"\"\n obtains several columns from sparse arff representation. Additionally, the\n column indices are re-labelled, given the columns that are not included.\n (e.g., when including [1, 2, 3], the columns will be relabelled to\n [0, 1, 2])\n\n Parameters\n ----------\n arff_data : tuple\n A tuple of three lists of equal size; first list indicating the value,\n second the x coordinate and the third the y coordinate.\n\n include_columns : list\n A list of columns to include.\n\n Returns\n -------\n arff_data_new : tuple\n Subset of arff data with only the include columns indicated by the\n include_columns argument.\n \"\"\"\n arff_data_new: ArffSparseDataType = (list(), list(), list())\n reindexed_columns = {column_idx: array_idx for array_idx, column_idx\n in enumerate(include_columns)}\n for val, row_idx, col_idx in zip(arff_data[0], arff_data[1], arff_data[2]):\n if col_idx in include_columns:\n arff_data_new[0].append(val)\n arff_data_new[1].append(row_idx)\n arff_data_new[2].append(reindexed_columns[col_idx])\n return arff_data_new" + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_valid_data_column_names", + "name": "_valid_data_column_names", + "qname": "sklearn.datasets._openml._valid_data_column_names", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._openml/_valid_data_column_names/features_list", + "name": "features_list", + "qname": "sklearn.datasets._openml._valid_data_column_names.features_list", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_valid_data_column_names/target_columns", + "name": "target_columns", + "qname": "sklearn.datasets._openml._valid_data_column_names.target_columns", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _valid_data_column_names(features_list, target_columns):\n # logic for determining on which columns can be learned. Note that from the\n # OpenML guide follows that columns that have the `is_row_identifier` or\n # `is_ignore` flag, these can not be learned on. Also target columns are\n # excluded.\n valid_data_column_names = []\n for feature in features_list:\n if (feature['name'] not in target_columns\n and feature['is_ignore'] != 'true'\n and feature['is_row_identifier'] != 'true'):\n valid_data_column_names.append(feature['name'])\n return valid_data_column_names" + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_verify_target_data_type", + "name": "_verify_target_data_type", + "qname": "sklearn.datasets._openml._verify_target_data_type", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._openml/_verify_target_data_type/features_dict", + "name": "features_dict", + "qname": "sklearn.datasets._openml._verify_target_data_type.features_dict", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._openml/_verify_target_data_type/target_columns", + "name": "target_columns", + "qname": "sklearn.datasets._openml._verify_target_data_type.target_columns", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _verify_target_data_type(features_dict, target_columns):\n # verifies the data type of the y array in case there are multiple targets\n # (throws an error if these targets do not comply with sklearn support)\n if not isinstance(target_columns, list):\n raise ValueError('target_column should be list, '\n 'got: %s' % type(target_columns))\n found_types = set()\n for target_column in target_columns:\n if target_column not in features_dict:\n raise KeyError('Could not find target_column={}')\n if features_dict[target_column]['data_type'] == \"numeric\":\n found_types.add(np.float64)\n else:\n found_types.add(object)\n\n # note: we compare to a string, not boolean\n if features_dict[target_column]['is_ignore'] == 'true':\n warn('target_column={} has flag is_ignore.'.format(\n target_column))\n if features_dict[target_column]['is_row_identifier'] == 'true':\n warn('target_column={} has flag is_row_identifier.'.format(\n target_column))\n if len(found_types) > 1:\n raise ValueError('Can only handle homogeneous multi-target datasets, '\n 'i.e., all targets are either numeric or '\n 'categorical.')" + }, + { + "id": "scikit-learn/sklearn.datasets._openml/fetch_openml", + "name": "fetch_openml", + "qname": "sklearn.datasets._openml.fetch_openml", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._openml/fetch_openml/name", + "name": "name", + "qname": "sklearn.datasets._openml.fetch_openml.name", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "String identifier of the dataset. Note that OpenML can have multiple\ndatasets with the same name." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.datasets._openml/fetch_openml/version", + "name": "version", + "qname": "sklearn.datasets._openml.fetch_openml.version", + "default_value": "'active'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or 'active'", + "default_value": "'active'", + "description": "Version of the dataset. Can only be provided if also ``name`` is given.\nIf 'active' the oldest version that's still active is used. Since\nthere may be more than one active version of a dataset, and those\nversions may fundamentally be different from one another, setting an\nexact version is highly recommended." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "'active'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._openml/fetch_openml/data_id", + "name": "data_id", + "qname": "sklearn.datasets._openml.fetch_openml.data_id", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "OpenML ID of the dataset. The most specific way of retrieving a\ndataset. If data_id is not given, name (and potential version) are\nused to obtain a dataset." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._openml/fetch_openml/data_home", + "name": "data_home", + "qname": "sklearn.datasets._openml.fetch_openml.data_home", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Specify another download and cache folder for the data sets. By default\nall scikit-learn data is stored in '~/scikit_learn_data' subfolders." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.datasets._openml/fetch_openml/target_column", + "name": "target_column", + "qname": "sklearn.datasets._openml.fetch_openml.target_column", + "default_value": "'default-target'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str, list or None", + "default_value": "'default-target'", + "description": "Specify the column name in the data to use as target. If\n'default-target', the standard target column a stored on the server\nis used. If ``None``, all columns are returned as data and the\ntarget is ``None``. If list (of strings), all columns with these names\nare returned as multi-target (Note: not all scikit-learn classifiers\ncan handle all types of multi-output combinations)" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "list" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._openml/fetch_openml/cache", + "name": "cache", + "qname": "sklearn.datasets._openml.fetch_openml.cache", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to cache downloaded datasets using joblib." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._openml/fetch_openml/return_X_y", + "name": "return_X_y", + "qname": "sklearn.datasets._openml.fetch_openml.return_X_y", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, returns ``(data, target)`` instead of a Bunch object. See\nbelow for more information about the `data` and `target` objects." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._openml/fetch_openml/as_frame", + "name": "as_frame", + "qname": "sklearn.datasets._openml.fetch_openml.as_frame", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or 'auto'", + "default_value": "'auto'", + "description": "If True, the data is a pandas DataFrame including columns with\nappropriate dtypes (numeric, string or categorical). The target is\na pandas DataFrame or Series depending on the number of target_columns.\nThe Bunch will contain a ``frame`` attribute with the target and the\ndata. If ``return_X_y`` is True, then ``(data, target)`` will be pandas\nDataFrames or Series as describe above.\n\nIf as_frame is 'auto', the data and target will be converted to\nDataFrame or Series as if as_frame is set to True, unless the dataset\nis stored in sparse format.\n\n.. versionchanged:: 0.24\n The default value of `as_frame` changed from `False` to `'auto'`\n in 0.24." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "'auto'" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fetch dataset from openml by name or dataset id.\n\nDatasets are uniquely identified by either an integer ID or by a\ncombination of name and version (i.e. there might be multiple\nversions of the 'iris' dataset). Please give either name or data_id\n(not both). In case a name is given, a version can also be\nprovided.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\n.. note:: EXPERIMENTAL\n\n The API is experimental (particularly the return value structure),\n and might have small backward-incompatible changes without notice\n or warning in future releases.", + "docstring": "Fetch dataset from openml by name or dataset id.\n\nDatasets are uniquely identified by either an integer ID or by a\ncombination of name and version (i.e. there might be multiple\nversions of the 'iris' dataset). Please give either name or data_id\n(not both). In case a name is given, a version can also be\nprovided.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\n.. note:: EXPERIMENTAL\n\n The API is experimental (particularly the return value structure),\n and might have small backward-incompatible changes without notice\n or warning in future releases.\n\nParameters\n----------\nname : str, default=None\n String identifier of the dataset. Note that OpenML can have multiple\n datasets with the same name.\n\nversion : int or 'active', default='active'\n Version of the dataset. Can only be provided if also ``name`` is given.\n If 'active' the oldest version that's still active is used. Since\n there may be more than one active version of a dataset, and those\n versions may fundamentally be different from one another, setting an\n exact version is highly recommended.\n\ndata_id : int, default=None\n OpenML ID of the dataset. The most specific way of retrieving a\n dataset. If data_id is not given, name (and potential version) are\n used to obtain a dataset.\n\ndata_home : str, default=None\n Specify another download and cache folder for the data sets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ntarget_column : str, list or None, default='default-target'\n Specify the column name in the data to use as target. If\n 'default-target', the standard target column a stored on the server\n is used. If ``None``, all columns are returned as data and the\n target is ``None``. If list (of strings), all columns with these names\n are returned as multi-target (Note: not all scikit-learn classifiers\n can handle all types of multi-output combinations)\n\ncache : bool, default=True\n Whether to cache downloaded datasets using joblib.\n\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object. See\n below for more information about the `data` and `target` objects.\n\nas_frame : bool or 'auto', default='auto'\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric, string or categorical). The target is\n a pandas DataFrame or Series depending on the number of target_columns.\n The Bunch will contain a ``frame`` attribute with the target and the\n data. If ``return_X_y`` is True, then ``(data, target)`` will be pandas\n DataFrames or Series as describe above.\n\n If as_frame is 'auto', the data and target will be converted to\n DataFrame or Series as if as_frame is set to True, unless the dataset\n is stored in sparse format.\n\n .. versionchanged:: 0.24\n The default value of `as_frame` changed from `False` to `'auto'`\n in 0.24.\n\nReturns\n-------\n\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : np.array, scipy.sparse.csr_matrix of floats, or pandas DataFrame\n The feature matrix. Categorical features are encoded as ordinals.\n target : np.array, pandas Series or DataFrame\n The regression target or classification labels, if applicable.\n Dtype is float if numeric, and object if categorical. If\n ``as_frame`` is True, ``target`` is a pandas object.\n DESCR : str\n The full description of the dataset\n feature_names : list\n The names of the dataset columns\n target_names: list\n The names of the target columns\n\n .. versionadded:: 0.22\n\n categories : dict or None\n Maps each categorical feature name to a list of values, such\n that the value encoded as i is ith in the list. If ``as_frame``\n is True, this is None.\n details : dict\n More metadata from OpenML\n frame : pandas DataFrame\n Only present when `as_frame=True`. DataFrame with ``data`` and\n ``target``.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. note:: EXPERIMENTAL\n\n This interface is **experimental** and subsequent releases may\n change attributes without notice (although there should only be\n minor changes to ``data`` and ``target``).\n\n Missing values in the 'data' are represented as NaN's. Missing values\n in 'target' are represented as NaN's (numerical target) or None\n (categorical target)", + "code": "@_deprecate_positional_args\ndef fetch_openml(\n name: Optional[str] = None,\n *,\n version: Union[str, int] = 'active',\n data_id: Optional[int] = None,\n data_home: Optional[str] = None,\n target_column: Optional[Union[str, List]] = 'default-target',\n cache: bool = True,\n return_X_y: bool = False,\n as_frame: Union[str, bool] = 'auto'\n):\n \"\"\"Fetch dataset from openml by name or dataset id.\n\n Datasets are uniquely identified by either an integer ID or by a\n combination of name and version (i.e. there might be multiple\n versions of the 'iris' dataset). Please give either name or data_id\n (not both). In case a name is given, a version can also be\n provided.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.20\n\n .. note:: EXPERIMENTAL\n\n The API is experimental (particularly the return value structure),\n and might have small backward-incompatible changes without notice\n or warning in future releases.\n\n Parameters\n ----------\n name : str, default=None\n String identifier of the dataset. Note that OpenML can have multiple\n datasets with the same name.\n\n version : int or 'active', default='active'\n Version of the dataset. Can only be provided if also ``name`` is given.\n If 'active' the oldest version that's still active is used. Since\n there may be more than one active version of a dataset, and those\n versions may fundamentally be different from one another, setting an\n exact version is highly recommended.\n\n data_id : int, default=None\n OpenML ID of the dataset. The most specific way of retrieving a\n dataset. If data_id is not given, name (and potential version) are\n used to obtain a dataset.\n\n data_home : str, default=None\n Specify another download and cache folder for the data sets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\n target_column : str, list or None, default='default-target'\n Specify the column name in the data to use as target. If\n 'default-target', the standard target column a stored on the server\n is used. If ``None``, all columns are returned as data and the\n target is ``None``. If list (of strings), all columns with these names\n are returned as multi-target (Note: not all scikit-learn classifiers\n can handle all types of multi-output combinations)\n\n cache : bool, default=True\n Whether to cache downloaded datasets using joblib.\n\n return_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object. See\n below for more information about the `data` and `target` objects.\n\n as_frame : bool or 'auto', default='auto'\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric, string or categorical). The target is\n a pandas DataFrame or Series depending on the number of target_columns.\n The Bunch will contain a ``frame`` attribute with the target and the\n data. If ``return_X_y`` is True, then ``(data, target)`` will be pandas\n DataFrames or Series as describe above.\n\n If as_frame is 'auto', the data and target will be converted to\n DataFrame or Series as if as_frame is set to True, unless the dataset\n is stored in sparse format.\n\n .. versionchanged:: 0.24\n The default value of `as_frame` changed from `False` to `'auto'`\n in 0.24.\n\n Returns\n -------\n\n data : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : np.array, scipy.sparse.csr_matrix of floats, or pandas DataFrame\n The feature matrix. Categorical features are encoded as ordinals.\n target : np.array, pandas Series or DataFrame\n The regression target or classification labels, if applicable.\n Dtype is float if numeric, and object if categorical. If\n ``as_frame`` is True, ``target`` is a pandas object.\n DESCR : str\n The full description of the dataset\n feature_names : list\n The names of the dataset columns\n target_names: list\n The names of the target columns\n\n .. versionadded:: 0.22\n\n categories : dict or None\n Maps each categorical feature name to a list of values, such\n that the value encoded as i is ith in the list. If ``as_frame``\n is True, this is None.\n details : dict\n More metadata from OpenML\n frame : pandas DataFrame\n Only present when `as_frame=True`. DataFrame with ``data`` and\n ``target``.\n\n (data, target) : tuple if ``return_X_y`` is True\n\n .. note:: EXPERIMENTAL\n\n This interface is **experimental** and subsequent releases may\n change attributes without notice (although there should only be\n minor changes to ``data`` and ``target``).\n\n Missing values in the 'data' are represented as NaN's. Missing values\n in 'target' are represented as NaN's (numerical target) or None\n (categorical target)\n \"\"\"\n if cache is False:\n # no caching will be applied\n data_home = None\n else:\n data_home = get_data_home(data_home=data_home)\n data_home = join(data_home, 'openml')\n\n # check valid function arguments. data_id XOR (name, version) should be\n # provided\n if name is not None:\n # OpenML is case-insensitive, but the caching mechanism is not\n # convert all data names (str) to lower case\n name = name.lower()\n if data_id is not None:\n raise ValueError(\n \"Dataset data_id={} and name={} passed, but you can only \"\n \"specify a numeric data_id or a name, not \"\n \"both.\".format(data_id, name))\n data_info = _get_data_info_by_name(name, version, data_home)\n data_id = data_info['did']\n elif data_id is not None:\n # from the previous if statement, it is given that name is None\n if version != \"active\":\n raise ValueError(\n \"Dataset data_id={} and version={} passed, but you can only \"\n \"specify a numeric data_id or a version, not \"\n \"both.\".format(data_id, name))\n else:\n raise ValueError(\n \"Neither name nor data_id are provided. Please provide name or \"\n \"data_id.\")\n\n data_description = _get_data_description_by_id(data_id, data_home)\n if data_description['status'] != \"active\":\n warn(\"Version {} of dataset {} is inactive, meaning that issues have \"\n \"been found in the dataset. Try using a newer version from \"\n \"this URL: {}\".format(\n data_description['version'],\n data_description['name'],\n data_description['url']))\n if 'error' in data_description:\n warn(\"OpenML registered a problem with the dataset. It might be \"\n \"unusable. Error: {}\".format(data_description['error']))\n if 'warning' in data_description:\n warn(\"OpenML raised a warning on the dataset. It might be \"\n \"unusable. Warning: {}\".format(data_description['warning']))\n\n return_sparse = False\n if data_description['format'].lower() == 'sparse_arff':\n return_sparse = True\n\n if as_frame == 'auto':\n as_frame = not return_sparse\n\n if as_frame and return_sparse:\n raise ValueError('Cannot return dataframe with sparse data')\n\n # download data features, meta-info about column types\n features_list = _get_data_features(data_id, data_home)\n\n if not as_frame:\n for feature in features_list:\n if 'true' in (feature['is_ignore'], feature['is_row_identifier']):\n continue\n if feature['data_type'] == 'string':\n raise ValueError('STRING attributes are not supported for '\n 'array representation. Try as_frame=True')\n\n if target_column == \"default-target\":\n # determines the default target based on the data feature results\n # (which is currently more reliable than the data description;\n # see issue: https://github.com/openml/OpenML/issues/768)\n target_columns = [feature['name'] for feature in features_list\n if feature['is_target'] == 'true']\n elif isinstance(target_column, str):\n # for code-simplicity, make target_column by default a list\n target_columns = [target_column]\n elif target_column is None:\n target_columns = []\n elif isinstance(target_column, list):\n target_columns = target_column\n else:\n raise TypeError(\"Did not recognize type of target_column\"\n \"Should be str, list or None. Got: \"\n \"{}\".format(type(target_column)))\n data_columns = _valid_data_column_names(features_list,\n target_columns)\n\n shape: Optional[Tuple[int, int]]\n # determine arff encoding to return\n if not return_sparse:\n # The shape must include the ignored features to keep the right indexes\n # during the arff data conversion.\n data_qualities = _get_data_qualities(data_id, data_home)\n shape = _get_num_samples(data_qualities), len(features_list)\n else:\n shape = None\n\n # obtain the data\n url = _DATA_FILE.format(data_description['file_id'])\n bunch = _download_data_to_bunch(url, return_sparse, data_home,\n as_frame=bool(as_frame),\n features_list=features_list, shape=shape,\n target_columns=target_columns,\n data_columns=data_columns,\n md5_checksum=data_description[\n \"md5_checksum\"])\n\n if return_X_y:\n return bunch.data, bunch.target\n\n description = \"{}\\n\\nDownloaded from openml.org.\".format(\n data_description.pop('description'))\n\n bunch.update(\n DESCR=description, details=data_description,\n url=\"https://www.openml.org/d/{}\".format(data_id))\n\n return bunch" + }, + { + "id": "scikit-learn/sklearn.datasets._rcv1/_find_permutation", + "name": "_find_permutation", + "qname": "sklearn.datasets._rcv1._find_permutation", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._rcv1/_find_permutation/a", + "name": "a", + "qname": "sklearn.datasets._rcv1._find_permutation.a", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._rcv1/_find_permutation/b", + "name": "b", + "qname": "sklearn.datasets._rcv1._find_permutation.b", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Find the permutation from a to b.", + "docstring": "Find the permutation from a to b.", + "code": "def _find_permutation(a, b):\n \"\"\"Find the permutation from a to b.\"\"\"\n t = np.argsort(a)\n u = np.argsort(b)\n u_ = _inverse_permutation(u)\n return t[u_]" + }, + { + "id": "scikit-learn/sklearn.datasets._rcv1/_inverse_permutation", + "name": "_inverse_permutation", + "qname": "sklearn.datasets._rcv1._inverse_permutation", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._rcv1/_inverse_permutation/p", + "name": "p", + "qname": "sklearn.datasets._rcv1._inverse_permutation.p", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Inverse permutation p.", + "docstring": "Inverse permutation p.", + "code": "def _inverse_permutation(p):\n \"\"\"Inverse permutation p.\"\"\"\n n = p.size\n s = np.zeros(n, dtype=np.int32)\n i = np.arange(n, dtype=np.int32)\n np.put(s, p, i) # s[p] = i\n return s" + }, + { + "id": "scikit-learn/sklearn.datasets._rcv1/fetch_rcv1", + "name": "fetch_rcv1", + "qname": "sklearn.datasets._rcv1.fetch_rcv1", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._rcv1/fetch_rcv1/data_home", + "name": "data_home", + "qname": "sklearn.datasets._rcv1.fetch_rcv1.data_home", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Specify another download and cache folder for the datasets. By default\nall scikit-learn data is stored in '~/scikit_learn_data' subfolders." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.datasets._rcv1/fetch_rcv1/subset", + "name": "subset", + "qname": "sklearn.datasets._rcv1.fetch_rcv1.subset", + "default_value": "'all'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'train', 'test', 'all'}", + "default_value": "'all'", + "description": "Select the dataset to load: 'train' for the training set\n(23149 samples), 'test' for the test set (781265 samples),\n'all' for both, with the training samples first if shuffle is False.\nThis follows the official LYRL2004 chronological split." + }, + "type": { + "kind": "EnumType", + "values": ["all", "test", "train"] + } + }, + { + "id": "scikit-learn/sklearn.datasets._rcv1/fetch_rcv1/download_if_missing", + "name": "download_if_missing", + "qname": "sklearn.datasets._rcv1.fetch_rcv1.download_if_missing", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If False, raise a IOError if the data is not locally available\ninstead of trying to download the data from the source site." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._rcv1/fetch_rcv1/random_state", + "name": "random_state", + "qname": "sklearn.datasets._rcv1.fetch_rcv1.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for dataset shuffling. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._rcv1/fetch_rcv1/shuffle", + "name": "shuffle", + "qname": "sklearn.datasets._rcv1.fetch_rcv1.shuffle", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to shuffle dataset." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._rcv1/fetch_rcv1/return_X_y", + "name": "return_X_y", + "qname": "sklearn.datasets._rcv1.fetch_rcv1.return_X_y", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch\nobject. See below for more information about the `dataset.data` and\n`dataset.target` object.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load the RCV1 multilabel dataset (classification).\n\nDownload it if necessary.\n\nVersion: RCV1-v2, vectors, full sets, topics multilabels.\n\n================= =====================\nClasses 103\nSamples total 804414\nDimensionality 47236\nFeatures real, between 0 and 1\n================= =====================\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.17", + "docstring": "Load the RCV1 multilabel dataset (classification).\n\nDownload it if necessary.\n\nVersion: RCV1-v2, vectors, full sets, topics multilabels.\n\n================= =====================\nClasses 103\nSamples total 804414\nDimensionality 47236\nFeatures real, between 0 and 1\n================= =====================\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.17\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\nsubset : {'train', 'test', 'all'}, default='all'\n Select the dataset to load: 'train' for the training set\n (23149 samples), 'test' for the test set (781265 samples),\n 'all' for both, with the training samples first if shuffle is False.\n This follows the official LYRL2004 chronological split.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nshuffle : bool, default=False\n Whether to shuffle dataset.\n\nreturn_X_y : bool, default=False\n If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch\n object. See below for more information about the `dataset.data` and\n `dataset.target` object.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ndataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : sparse matrix of shape (804414, 47236), dtype=np.float64\n The array has 0.16% of non zero values. Will be of CSR format.\n target : sparse matrix of shape (804414, 103), dtype=np.uint8\n Each sample has a value of 1 in its categories, and 0 in others.\n The array has 3.15% of non zero values. Will be of CSR format.\n sample_id : ndarray of shape (804414,), dtype=np.uint32,\n Identification number of each sample, as ordered in dataset.data.\n target_names : ndarray of shape (103,), dtype=object\n Names of each target (RCV1 topics), as ordered in dataset.target.\n DESCR : str\n Description of the RCV1 dataset.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20", + "code": "@_deprecate_positional_args\ndef fetch_rcv1(*, data_home=None, subset='all', download_if_missing=True,\n random_state=None, shuffle=False, return_X_y=False):\n \"\"\"Load the RCV1 multilabel dataset (classification).\n\n Download it if necessary.\n\n Version: RCV1-v2, vectors, full sets, topics multilabels.\n\n ================= =====================\n Classes 103\n Samples total 804414\n Dimensionality 47236\n Features real, between 0 and 1\n ================= =====================\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.17\n\n Parameters\n ----------\n data_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\n subset : {'train', 'test', 'all'}, default='all'\n Select the dataset to load: 'train' for the training set\n (23149 samples), 'test' for the test set (781265 samples),\n 'all' for both, with the training samples first if shuffle is False.\n This follows the official LYRL2004 chronological split.\n\n download_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n shuffle : bool, default=False\n Whether to shuffle dataset.\n\n return_X_y : bool, default=False\n If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch\n object. See below for more information about the `dataset.data` and\n `dataset.target` object.\n\n .. versionadded:: 0.20\n\n Returns\n -------\n dataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : sparse matrix of shape (804414, 47236), dtype=np.float64\n The array has 0.16% of non zero values. Will be of CSR format.\n target : sparse matrix of shape (804414, 103), dtype=np.uint8\n Each sample has a value of 1 in its categories, and 0 in others.\n The array has 3.15% of non zero values. Will be of CSR format.\n sample_id : ndarray of shape (804414,), dtype=np.uint32,\n Identification number of each sample, as ordered in dataset.data.\n target_names : ndarray of shape (103,), dtype=object\n Names of each target (RCV1 topics), as ordered in dataset.target.\n DESCR : str\n Description of the RCV1 dataset.\n\n (data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20\n \"\"\"\n N_SAMPLES = 804414\n N_FEATURES = 47236\n N_CATEGORIES = 103\n N_TRAIN = 23149\n\n data_home = get_data_home(data_home=data_home)\n rcv1_dir = join(data_home, \"RCV1\")\n if download_if_missing:\n if not exists(rcv1_dir):\n makedirs(rcv1_dir)\n\n samples_path = _pkl_filepath(rcv1_dir, \"samples.pkl\")\n sample_id_path = _pkl_filepath(rcv1_dir, \"sample_id.pkl\")\n sample_topics_path = _pkl_filepath(rcv1_dir, \"sample_topics.pkl\")\n topics_path = _pkl_filepath(rcv1_dir, \"topics_names.pkl\")\n\n # load data (X) and sample_id\n if download_if_missing and (not exists(samples_path) or\n not exists(sample_id_path)):\n files = []\n for each in XY_METADATA:\n logger.info(\"Downloading %s\" % each.url)\n file_path = _fetch_remote(each, dirname=rcv1_dir)\n files.append(GzipFile(filename=file_path))\n\n Xy = load_svmlight_files(files, n_features=N_FEATURES)\n\n # Training data is before testing data\n X = sp.vstack([Xy[8], Xy[0], Xy[2], Xy[4], Xy[6]]).tocsr()\n sample_id = np.hstack((Xy[9], Xy[1], Xy[3], Xy[5], Xy[7]))\n sample_id = sample_id.astype(np.uint32, copy=False)\n\n joblib.dump(X, samples_path, compress=9)\n joblib.dump(sample_id, sample_id_path, compress=9)\n\n # delete archives\n for f in files:\n f.close()\n remove(f.name)\n else:\n X = joblib.load(samples_path)\n sample_id = joblib.load(sample_id_path)\n\n # load target (y), categories, and sample_id_bis\n if download_if_missing and (not exists(sample_topics_path) or\n not exists(topics_path)):\n logger.info(\"Downloading %s\" % TOPICS_METADATA.url)\n topics_archive_path = _fetch_remote(TOPICS_METADATA,\n dirname=rcv1_dir)\n\n # parse the target file\n n_cat = -1\n n_doc = -1\n doc_previous = -1\n y = np.zeros((N_SAMPLES, N_CATEGORIES), dtype=np.uint8)\n sample_id_bis = np.zeros(N_SAMPLES, dtype=np.int32)\n category_names = {}\n with GzipFile(filename=topics_archive_path, mode='rb') as f:\n for line in f:\n line_components = line.decode(\"ascii\").split(\" \")\n if len(line_components) == 3:\n cat, doc, _ = line_components\n if cat not in category_names:\n n_cat += 1\n category_names[cat] = n_cat\n\n doc = int(doc)\n if doc != doc_previous:\n doc_previous = doc\n n_doc += 1\n sample_id_bis[n_doc] = doc\n y[n_doc, category_names[cat]] = 1\n\n # delete archive\n remove(topics_archive_path)\n\n # Samples in X are ordered with sample_id,\n # whereas in y, they are ordered with sample_id_bis.\n permutation = _find_permutation(sample_id_bis, sample_id)\n y = y[permutation, :]\n\n # save category names in a list, with same order than y\n categories = np.empty(N_CATEGORIES, dtype=object)\n for k in category_names.keys():\n categories[category_names[k]] = k\n\n # reorder categories in lexicographic order\n order = np.argsort(categories)\n categories = categories[order]\n y = sp.csr_matrix(y[:, order])\n\n joblib.dump(y, sample_topics_path, compress=9)\n joblib.dump(categories, topics_path, compress=9)\n else:\n y = joblib.load(sample_topics_path)\n categories = joblib.load(topics_path)\n\n if subset == 'all':\n pass\n elif subset == 'train':\n X = X[:N_TRAIN, :]\n y = y[:N_TRAIN, :]\n sample_id = sample_id[:N_TRAIN]\n elif subset == 'test':\n X = X[N_TRAIN:, :]\n y = y[N_TRAIN:, :]\n sample_id = sample_id[N_TRAIN:]\n else:\n raise ValueError(\"Unknown subset parameter. Got '%s' instead of one\"\n \" of ('all', 'train', test')\" % subset)\n\n if shuffle:\n X, y, sample_id = shuffle_(X, y, sample_id, random_state=random_state)\n\n module_path = dirname(__file__)\n with open(join(module_path, 'descr', 'rcv1.rst')) as rst_file:\n fdescr = rst_file.read()\n\n if return_X_y:\n return X, y\n\n return Bunch(data=X, target=y, sample_id=sample_id,\n target_names=categories, DESCR=fdescr)" + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/_generate_hypercube", + "name": "_generate_hypercube", + "qname": "sklearn.datasets._samples_generator._generate_hypercube", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._samples_generator/_generate_hypercube/samples", + "name": "samples", + "qname": "sklearn.datasets._samples_generator._generate_hypercube.samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/_generate_hypercube/dimensions", + "name": "dimensions", + "qname": "sklearn.datasets._samples_generator._generate_hypercube.dimensions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/_generate_hypercube/rng", + "name": "rng", + "qname": "sklearn.datasets._samples_generator._generate_hypercube.rng", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns distinct binary samples of length dimensions.", + "docstring": "Returns distinct binary samples of length dimensions.\n ", + "code": "def _generate_hypercube(samples, dimensions, rng):\n \"\"\"Returns distinct binary samples of length dimensions.\n \"\"\"\n if dimensions > 30:\n return np.hstack([rng.randint(2, size=(samples, dimensions - 30)),\n _generate_hypercube(samples, 30, rng)])\n out = sample_without_replacement(2 ** dimensions, samples,\n random_state=rng).astype(dtype='>u4',\n copy=False)\n out = np.unpackbits(out.view('>u1')).reshape((-1, 32))[:, -dimensions:]\n return out" + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/_shuffle", + "name": "_shuffle", + "qname": "sklearn.datasets._samples_generator._shuffle", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._samples_generator/_shuffle/data", + "name": "data", + "qname": "sklearn.datasets._samples_generator._shuffle.data", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/_shuffle/random_state", + "name": "random_state", + "qname": "sklearn.datasets._samples_generator._shuffle.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _shuffle(data, random_state=None):\n generator = check_random_state(random_state)\n n_rows, n_cols = data.shape\n row_idx = generator.permutation(n_rows)\n col_idx = generator.permutation(n_cols)\n result = data[row_idx][:, col_idx]\n return result, row_idx, col_idx" + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_biclusters", + "name": "make_biclusters", + "qname": "sklearn.datasets._samples_generator.make_biclusters", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_biclusters/shape", + "name": "shape", + "qname": "sklearn.datasets._samples_generator.make_biclusters.shape", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "iterable of shape (n_rows, n_cols)", + "default_value": "", + "description": "The shape of the result." + }, + "type": { + "kind": "NamedType", + "name": "iterable of shape (n_rows, n_cols)" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_biclusters/n_clusters", + "name": "n_clusters", + "qname": "sklearn.datasets._samples_generator.make_biclusters.n_clusters", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The number of biclusters." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_biclusters/noise", + "name": "noise", + "qname": "sklearn.datasets._samples_generator.make_biclusters.noise", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "The standard deviation of the gaussian noise." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_biclusters/minval", + "name": "minval", + "qname": "sklearn.datasets._samples_generator.make_biclusters.minval", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Minimum value of a bicluster." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_biclusters/maxval", + "name": "maxval", + "qname": "sklearn.datasets._samples_generator.make_biclusters.maxval", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Maximum value of a bicluster." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_biclusters/shuffle", + "name": "shuffle", + "qname": "sklearn.datasets._samples_generator.make_biclusters.shuffle", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Shuffle the samples." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_biclusters/random_state", + "name": "random_state", + "qname": "sklearn.datasets._samples_generator.make_biclusters.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for dataset creation. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate an array with constant block diagonal structure for\nbiclustering.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Generate an array with constant block diagonal structure for\nbiclustering.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nshape : iterable of shape (n_rows, n_cols)\n The shape of the result.\n\nn_clusters : int\n The number of biclusters.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise.\n\nminval : int, default=10\n Minimum value of a bicluster.\n\nmaxval : int, default=100\n Maximum value of a bicluster.\n\nshuffle : bool, default=True\n Shuffle the samples.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape `shape`\n The generated array.\n\nrows : ndarray of shape (n_clusters, X.shape[0])\n The indicators for cluster membership of each row.\n\ncols : ndarray of shape (n_clusters, X.shape[1])\n The indicators for cluster membership of each column.\n\nReferences\n----------\n\n.. [1] Dhillon, I. S. (2001, August). Co-clustering documents and\n words using bipartite spectral graph partitioning. In Proceedings\n of the seventh ACM SIGKDD international conference on Knowledge\n discovery and data mining (pp. 269-274). ACM.\n\nSee Also\n--------\nmake_checkerboard", + "code": "@_deprecate_positional_args\ndef make_biclusters(shape, n_clusters, *, noise=0.0, minval=10,\n maxval=100, shuffle=True, random_state=None):\n \"\"\"Generate an array with constant block diagonal structure for\n biclustering.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n shape : iterable of shape (n_rows, n_cols)\n The shape of the result.\n\n n_clusters : int\n The number of biclusters.\n\n noise : float, default=0.0\n The standard deviation of the gaussian noise.\n\n minval : int, default=10\n Minimum value of a bicluster.\n\n maxval : int, default=100\n Maximum value of a bicluster.\n\n shuffle : bool, default=True\n Shuffle the samples.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n X : ndarray of shape `shape`\n The generated array.\n\n rows : ndarray of shape (n_clusters, X.shape[0])\n The indicators for cluster membership of each row.\n\n cols : ndarray of shape (n_clusters, X.shape[1])\n The indicators for cluster membership of each column.\n\n References\n ----------\n\n .. [1] Dhillon, I. S. (2001, August). Co-clustering documents and\n words using bipartite spectral graph partitioning. In Proceedings\n of the seventh ACM SIGKDD international conference on Knowledge\n discovery and data mining (pp. 269-274). ACM.\n\n See Also\n --------\n make_checkerboard\n \"\"\"\n generator = check_random_state(random_state)\n n_rows, n_cols = shape\n consts = generator.uniform(minval, maxval, n_clusters)\n\n # row and column clusters of approximately equal sizes\n row_sizes = generator.multinomial(n_rows,\n np.repeat(1.0 / n_clusters,\n n_clusters))\n col_sizes = generator.multinomial(n_cols,\n np.repeat(1.0 / n_clusters,\n n_clusters))\n\n row_labels = np.hstack(list(np.repeat(val, rep) for val, rep in\n zip(range(n_clusters), row_sizes)))\n col_labels = np.hstack(list(np.repeat(val, rep) for val, rep in\n zip(range(n_clusters), col_sizes)))\n\n result = np.zeros(shape, dtype=np.float64)\n for i in range(n_clusters):\n selector = np.outer(row_labels == i, col_labels == i)\n result[selector] += consts[i]\n\n if noise > 0:\n result += generator.normal(scale=noise, size=result.shape)\n\n if shuffle:\n result, row_idx, col_idx = _shuffle(result, random_state)\n row_labels = row_labels[row_idx]\n col_labels = col_labels[col_idx]\n\n rows = np.vstack([row_labels == c for c in range(n_clusters)])\n cols = np.vstack([col_labels == c for c in range(n_clusters)])\n\n return result, rows, cols" + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_blobs", + "name": "make_blobs", + "qname": "sklearn.datasets._samples_generator.make_blobs", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_blobs/n_samples", + "name": "n_samples", + "qname": "sklearn.datasets._samples_generator.make_blobs.n_samples", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or array-like", + "default_value": "100", + "description": "If int, it is the total number of points equally divided among\nclusters.\nIf array-like, each element of the sequence indicates\nthe number of samples per cluster.\n\n.. versionchanged:: v0.20\n one can now pass an array-like to the ``n_samples`` parameter" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "array-like" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_blobs/n_features", + "name": "n_features", + "qname": "sklearn.datasets._samples_generator.make_blobs.n_features", + "default_value": "2", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "The number of features for each sample." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_blobs/centers", + "name": "centers", + "qname": "sklearn.datasets._samples_generator.make_blobs.centers", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or ndarray of shape (n_centers, n_features)", + "default_value": "None", + "description": "The number of centers to generate, or the fixed center locations.\nIf n_samples is an int and centers is None, 3 centers are generated.\nIf n_samples is array-like, centers must be\neither None or an array of length equal to the length of n_samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "ndarray of shape (n_centers, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_blobs/cluster_std", + "name": "cluster_std", + "qname": "sklearn.datasets._samples_generator.make_blobs.cluster_std", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float or array-like of float", + "default_value": "1.0", + "description": "The standard deviation of the clusters." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "array-like of float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_blobs/center_box", + "name": "center_box", + "qname": "sklearn.datasets._samples_generator.make_blobs.center_box", + "default_value": "(-10.0, 10.0)", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "tuple of float (min, max)", + "default_value": "(-10.0, 10.0)", + "description": "The bounding box for each cluster center when centers are\ngenerated at random." + }, + "type": { + "kind": "NamedType", + "name": "tuple of float (min, max)" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_blobs/shuffle", + "name": "shuffle", + "qname": "sklearn.datasets._samples_generator.make_blobs.shuffle", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Shuffle the samples." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_blobs/random_state", + "name": "random_state", + "qname": "sklearn.datasets._samples_generator.make_blobs.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for dataset creation. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_blobs/return_centers", + "name": "return_centers", + "qname": "sklearn.datasets._samples_generator.make_blobs.return_centers", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, then return the centers of each cluster\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate isotropic Gaussian blobs for clustering.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Generate isotropic Gaussian blobs for clustering.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int or array-like, default=100\n If int, it is the total number of points equally divided among\n clusters.\n If array-like, each element of the sequence indicates\n the number of samples per cluster.\n\n .. versionchanged:: v0.20\n one can now pass an array-like to the ``n_samples`` parameter\n\nn_features : int, default=2\n The number of features for each sample.\n\ncenters : int or ndarray of shape (n_centers, n_features), default=None\n The number of centers to generate, or the fixed center locations.\n If n_samples is an int and centers is None, 3 centers are generated.\n If n_samples is array-like, centers must be\n either None or an array of length equal to the length of n_samples.\n\ncluster_std : float or array-like of float, default=1.0\n The standard deviation of the clusters.\n\ncenter_box : tuple of float (min, max), default=(-10.0, 10.0)\n The bounding box for each cluster center when centers are\n generated at random.\n\nshuffle : bool, default=True\n Shuffle the samples.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nreturn_centers : bool, default=False\n If True, then return the centers of each cluster\n\n .. versionadded:: 0.23\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The generated samples.\n\ny : ndarray of shape (n_samples,)\n The integer labels for cluster membership of each sample.\n\ncenters : ndarray of shape (n_centers, n_features)\n The centers of each cluster. Only returned if\n ``return_centers=True``.\n\nExamples\n--------\n>>> from sklearn.datasets import make_blobs\n>>> X, y = make_blobs(n_samples=10, centers=3, n_features=2,\n... random_state=0)\n>>> print(X.shape)\n(10, 2)\n>>> y\narray([0, 0, 1, 0, 2, 2, 2, 1, 1, 0])\n>>> X, y = make_blobs(n_samples=[3, 3, 4], centers=None, n_features=2,\n... random_state=0)\n>>> print(X.shape)\n(10, 2)\n>>> y\narray([0, 1, 2, 0, 2, 2, 2, 1, 1, 0])\n\nSee Also\n--------\nmake_classification : A more intricate variant.", + "code": "@_deprecate_positional_args\ndef make_blobs(n_samples=100, n_features=2, *, centers=None, cluster_std=1.0,\n center_box=(-10.0, 10.0), shuffle=True, random_state=None,\n return_centers=False):\n \"\"\"Generate isotropic Gaussian blobs for clustering.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_samples : int or array-like, default=100\n If int, it is the total number of points equally divided among\n clusters.\n If array-like, each element of the sequence indicates\n the number of samples per cluster.\n\n .. versionchanged:: v0.20\n one can now pass an array-like to the ``n_samples`` parameter\n\n n_features : int, default=2\n The number of features for each sample.\n\n centers : int or ndarray of shape (n_centers, n_features), default=None\n The number of centers to generate, or the fixed center locations.\n If n_samples is an int and centers is None, 3 centers are generated.\n If n_samples is array-like, centers must be\n either None or an array of length equal to the length of n_samples.\n\n cluster_std : float or array-like of float, default=1.0\n The standard deviation of the clusters.\n\n center_box : tuple of float (min, max), default=(-10.0, 10.0)\n The bounding box for each cluster center when centers are\n generated at random.\n\n shuffle : bool, default=True\n Shuffle the samples.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n return_centers : bool, default=False\n If True, then return the centers of each cluster\n\n .. versionadded:: 0.23\n\n Returns\n -------\n X : ndarray of shape (n_samples, n_features)\n The generated samples.\n\n y : ndarray of shape (n_samples,)\n The integer labels for cluster membership of each sample.\n\n centers : ndarray of shape (n_centers, n_features)\n The centers of each cluster. Only returned if\n ``return_centers=True``.\n\n Examples\n --------\n >>> from sklearn.datasets import make_blobs\n >>> X, y = make_blobs(n_samples=10, centers=3, n_features=2,\n ... random_state=0)\n >>> print(X.shape)\n (10, 2)\n >>> y\n array([0, 0, 1, 0, 2, 2, 2, 1, 1, 0])\n >>> X, y = make_blobs(n_samples=[3, 3, 4], centers=None, n_features=2,\n ... random_state=0)\n >>> print(X.shape)\n (10, 2)\n >>> y\n array([0, 1, 2, 0, 2, 2, 2, 1, 1, 0])\n\n See Also\n --------\n make_classification : A more intricate variant.\n \"\"\"\n generator = check_random_state(random_state)\n\n if isinstance(n_samples, numbers.Integral):\n # Set n_centers by looking at centers arg\n if centers is None:\n centers = 3\n\n if isinstance(centers, numbers.Integral):\n n_centers = centers\n centers = generator.uniform(center_box[0], center_box[1],\n size=(n_centers, n_features))\n\n else:\n centers = check_array(centers)\n n_features = centers.shape[1]\n n_centers = centers.shape[0]\n\n else:\n # Set n_centers by looking at [n_samples] arg\n n_centers = len(n_samples)\n if centers is None:\n centers = generator.uniform(center_box[0], center_box[1],\n size=(n_centers, n_features))\n try:\n assert len(centers) == n_centers\n except TypeError as e:\n raise ValueError(\"Parameter `centers` must be array-like. \"\n \"Got {!r} instead\".format(centers)) from e\n except AssertionError as e:\n raise ValueError(\n f\"Length of `n_samples` not consistent with number of \"\n f\"centers. Got n_samples = {n_samples} and centers = {centers}\"\n ) from e\n else:\n centers = check_array(centers)\n n_features = centers.shape[1]\n\n # stds: if cluster_std is given as list, it must be consistent\n # with the n_centers\n if (hasattr(cluster_std, \"__len__\") and len(cluster_std) != n_centers):\n raise ValueError(\"Length of `clusters_std` not consistent with \"\n \"number of centers. Got centers = {} \"\n \"and cluster_std = {}\".format(centers, cluster_std))\n\n if isinstance(cluster_std, numbers.Real):\n cluster_std = np.full(len(centers), cluster_std)\n\n X = []\n y = []\n\n if isinstance(n_samples, Iterable):\n n_samples_per_center = n_samples\n else:\n n_samples_per_center = [int(n_samples // n_centers)] * n_centers\n\n for i in range(n_samples % n_centers):\n n_samples_per_center[i] += 1\n\n for i, (n, std) in enumerate(zip(n_samples_per_center, cluster_std)):\n X.append(generator.normal(loc=centers[i], scale=std,\n size=(n, n_features)))\n y += [i] * n\n\n X = np.concatenate(X)\n y = np.array(y)\n\n if shuffle:\n total_n_samples = np.sum(n_samples)\n indices = np.arange(total_n_samples)\n generator.shuffle(indices)\n X = X[indices]\n y = y[indices]\n\n if return_centers:\n return X, y, centers\n else:\n return X, y" + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_checkerboard", + "name": "make_checkerboard", + "qname": "sklearn.datasets._samples_generator.make_checkerboard", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_checkerboard/shape", + "name": "shape", + "qname": "sklearn.datasets._samples_generator.make_checkerboard.shape", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "tuple of shape (n_rows, n_cols)", + "default_value": "", + "description": "The shape of the result." + }, + "type": { + "kind": "NamedType", + "name": "tuple of shape (n_rows, n_cols)" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_checkerboard/n_clusters", + "name": "n_clusters", + "qname": "sklearn.datasets._samples_generator.make_checkerboard.n_clusters", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or array-like or shape (n_row_clusters, n_column_clusters)", + "default_value": "", + "description": "The number of row and column clusters." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_row_clusters, n_column_clusters)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_checkerboard/noise", + "name": "noise", + "qname": "sklearn.datasets._samples_generator.make_checkerboard.noise", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "The standard deviation of the gaussian noise." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_checkerboard/minval", + "name": "minval", + "qname": "sklearn.datasets._samples_generator.make_checkerboard.minval", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Minimum value of a bicluster." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_checkerboard/maxval", + "name": "maxval", + "qname": "sklearn.datasets._samples_generator.make_checkerboard.maxval", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Maximum value of a bicluster." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_checkerboard/shuffle", + "name": "shuffle", + "qname": "sklearn.datasets._samples_generator.make_checkerboard.shuffle", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Shuffle the samples." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_checkerboard/random_state", + "name": "random_state", + "qname": "sklearn.datasets._samples_generator.make_checkerboard.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for dataset creation. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate an array with block checkerboard structure for\nbiclustering.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Generate an array with block checkerboard structure for\nbiclustering.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nshape : tuple of shape (n_rows, n_cols)\n The shape of the result.\n\nn_clusters : int or array-like or shape (n_row_clusters, n_column_clusters)\n The number of row and column clusters.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise.\n\nminval : int, default=10\n Minimum value of a bicluster.\n\nmaxval : int, default=100\n Maximum value of a bicluster.\n\nshuffle : bool, default=True\n Shuffle the samples.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape `shape`\n The generated array.\n\nrows : ndarray of shape (n_clusters, X.shape[0])\n The indicators for cluster membership of each row.\n\ncols : ndarray of shape (n_clusters, X.shape[1])\n The indicators for cluster membership of each column.\n\n\nReferences\n----------\n\n.. [1] Kluger, Y., Basri, R., Chang, J. T., & Gerstein, M. (2003).\n Spectral biclustering of microarray data: coclustering genes\n and conditions. Genome research, 13(4), 703-716.\n\nSee Also\n--------\nmake_biclusters", + "code": "@_deprecate_positional_args\ndef make_checkerboard(shape, n_clusters, *, noise=0.0, minval=10,\n maxval=100, shuffle=True, random_state=None):\n \"\"\"Generate an array with block checkerboard structure for\n biclustering.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n shape : tuple of shape (n_rows, n_cols)\n The shape of the result.\n\n n_clusters : int or array-like or shape (n_row_clusters, n_column_clusters)\n The number of row and column clusters.\n\n noise : float, default=0.0\n The standard deviation of the gaussian noise.\n\n minval : int, default=10\n Minimum value of a bicluster.\n\n maxval : int, default=100\n Maximum value of a bicluster.\n\n shuffle : bool, default=True\n Shuffle the samples.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n X : ndarray of shape `shape`\n The generated array.\n\n rows : ndarray of shape (n_clusters, X.shape[0])\n The indicators for cluster membership of each row.\n\n cols : ndarray of shape (n_clusters, X.shape[1])\n The indicators for cluster membership of each column.\n\n\n References\n ----------\n\n .. [1] Kluger, Y., Basri, R., Chang, J. T., & Gerstein, M. (2003).\n Spectral biclustering of microarray data: coclustering genes\n and conditions. Genome research, 13(4), 703-716.\n\n See Also\n --------\n make_biclusters\n \"\"\"\n generator = check_random_state(random_state)\n\n if hasattr(n_clusters, \"__len__\"):\n n_row_clusters, n_col_clusters = n_clusters\n else:\n n_row_clusters = n_col_clusters = n_clusters\n\n # row and column clusters of approximately equal sizes\n n_rows, n_cols = shape\n row_sizes = generator.multinomial(n_rows,\n np.repeat(1.0 / n_row_clusters,\n n_row_clusters))\n col_sizes = generator.multinomial(n_cols,\n np.repeat(1.0 / n_col_clusters,\n n_col_clusters))\n\n row_labels = np.hstack(list(np.repeat(val, rep) for val, rep in\n zip(range(n_row_clusters), row_sizes)))\n col_labels = np.hstack(list(np.repeat(val, rep) for val, rep in\n zip(range(n_col_clusters), col_sizes)))\n\n result = np.zeros(shape, dtype=np.float64)\n for i in range(n_row_clusters):\n for j in range(n_col_clusters):\n selector = np.outer(row_labels == i, col_labels == j)\n result[selector] += generator.uniform(minval, maxval)\n\n if noise > 0:\n result += generator.normal(scale=noise, size=result.shape)\n\n if shuffle:\n result, row_idx, col_idx = _shuffle(result, random_state)\n row_labels = row_labels[row_idx]\n col_labels = col_labels[col_idx]\n\n rows = np.vstack([row_labels == label\n for label in range(n_row_clusters)\n for _ in range(n_col_clusters)])\n cols = np.vstack([col_labels == label\n for _ in range(n_row_clusters)\n for label in range(n_col_clusters)])\n\n return result, rows, cols" + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_circles", + "name": "make_circles", + "qname": "sklearn.datasets._samples_generator.make_circles", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_circles/n_samples", + "name": "n_samples", + "qname": "sklearn.datasets._samples_generator.make_circles.n_samples", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or tuple of shape (2,), dtype=int", + "default_value": "100", + "description": "If int, it is the total number of points generated.\nFor odd numbers, the inner circle will have one point more than the\nouter circle.\nIf two-element tuple, number of points in outer circle and inner\ncircle.\n\n.. versionchanged:: 0.23\n Added two-element tuple." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "tuple of shape (2,)" + }, + { + "kind": "NamedType", + "name": "dtype=int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_circles/shuffle", + "name": "shuffle", + "qname": "sklearn.datasets._samples_generator.make_circles.shuffle", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to shuffle the samples." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_circles/noise", + "name": "noise", + "qname": "sklearn.datasets._samples_generator.make_circles.noise", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Standard deviation of Gaussian noise added to the data." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_circles/random_state", + "name": "random_state", + "qname": "sklearn.datasets._samples_generator.make_circles.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for dataset shuffling and noise.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_circles/factor", + "name": "factor", + "qname": "sklearn.datasets._samples_generator.make_circles.factor", + "default_value": "0.8", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": ".8", + "description": "Scale factor between inner and outer circle in the range `(0, 1)`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "BoundaryType", + "base_type": "float", + "min": 0.0, + "max": 1.0, + "min_inclusive": false, + "max_inclusive": false + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Make a large circle containing a smaller circle in 2d.\n\nA simple toy dataset to visualize clustering and classification\nalgorithms.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Make a large circle containing a smaller circle in 2d.\n\nA simple toy dataset to visualize clustering and classification\nalgorithms.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int or tuple of shape (2,), dtype=int, default=100\n If int, it is the total number of points generated.\n For odd numbers, the inner circle will have one point more than the\n outer circle.\n If two-element tuple, number of points in outer circle and inner\n circle.\n\n .. versionchanged:: 0.23\n Added two-element tuple.\n\nshuffle : bool, default=True\n Whether to shuffle the samples.\n\nnoise : float, default=None\n Standard deviation of Gaussian noise added to the data.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling and noise.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nfactor : float, default=.8\n Scale factor between inner and outer circle in the range `(0, 1)`.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 2)\n The generated samples.\n\ny : ndarray of shape (n_samples,)\n The integer labels (0 or 1) for class membership of each sample.", + "code": "@_deprecate_positional_args\ndef make_circles(n_samples=100, *, shuffle=True, noise=None, random_state=None,\n factor=.8):\n \"\"\"Make a large circle containing a smaller circle in 2d.\n\n A simple toy dataset to visualize clustering and classification\n algorithms.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_samples : int or tuple of shape (2,), dtype=int, default=100\n If int, it is the total number of points generated.\n For odd numbers, the inner circle will have one point more than the\n outer circle.\n If two-element tuple, number of points in outer circle and inner\n circle.\n\n .. versionchanged:: 0.23\n Added two-element tuple.\n\n shuffle : bool, default=True\n Whether to shuffle the samples.\n\n noise : float, default=None\n Standard deviation of Gaussian noise added to the data.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling and noise.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n factor : float, default=.8\n Scale factor between inner and outer circle in the range `(0, 1)`.\n\n Returns\n -------\n X : ndarray of shape (n_samples, 2)\n The generated samples.\n\n y : ndarray of shape (n_samples,)\n The integer labels (0 or 1) for class membership of each sample.\n \"\"\"\n\n if factor >= 1 or factor < 0:\n raise ValueError(\"'factor' has to be between 0 and 1.\")\n\n if isinstance(n_samples, numbers.Integral):\n n_samples_out = n_samples // 2\n n_samples_in = n_samples - n_samples_out\n else:\n try:\n n_samples_out, n_samples_in = n_samples\n except ValueError as e:\n raise ValueError('`n_samples` can be either an int or '\n 'a two-element tuple.') from e\n\n generator = check_random_state(random_state)\n # so as not to have the first point = last point, we set endpoint=False\n linspace_out = np.linspace(0, 2 * np.pi, n_samples_out, endpoint=False)\n linspace_in = np.linspace(0, 2 * np.pi, n_samples_in, endpoint=False)\n outer_circ_x = np.cos(linspace_out)\n outer_circ_y = np.sin(linspace_out)\n inner_circ_x = np.cos(linspace_in) * factor\n inner_circ_y = np.sin(linspace_in) * factor\n\n X = np.vstack([np.append(outer_circ_x, inner_circ_x),\n np.append(outer_circ_y, inner_circ_y)]).T\n y = np.hstack([np.zeros(n_samples_out, dtype=np.intp),\n np.ones(n_samples_in, dtype=np.intp)])\n if shuffle:\n X, y = util_shuffle(X, y, random_state=generator)\n\n if noise is not None:\n X += generator.normal(scale=noise, size=X.shape)\n\n return X, y" + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_classification", + "name": "make_classification", + "qname": "sklearn.datasets._samples_generator.make_classification", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_classification/n_samples", + "name": "n_samples", + "qname": "sklearn.datasets._samples_generator.make_classification.n_samples", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The number of samples." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_classification/n_features", + "name": "n_features", + "qname": "sklearn.datasets._samples_generator.make_classification.n_features", + "default_value": "20", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "20", + "description": "The total number of features. These comprise ``n_informative``\ninformative features, ``n_redundant`` redundant features,\n``n_repeated`` duplicated features and\n``n_features-n_informative-n_redundant-n_repeated`` useless features\ndrawn at random." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_classification/n_informative", + "name": "n_informative", + "qname": "sklearn.datasets._samples_generator.make_classification.n_informative", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "The number of informative features. Each class is composed of a number\nof gaussian clusters each located around the vertices of a hypercube\nin a subspace of dimension ``n_informative``. For each cluster,\ninformative features are drawn independently from N(0, 1) and then\nrandomly linearly combined within each cluster in order to add\ncovariance. The clusters are then placed on the vertices of the\nhypercube." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_classification/n_redundant", + "name": "n_redundant", + "qname": "sklearn.datasets._samples_generator.make_classification.n_redundant", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "The number of redundant features. These features are generated as\nrandom linear combinations of the informative features." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_classification/n_repeated", + "name": "n_repeated", + "qname": "sklearn.datasets._samples_generator.make_classification.n_repeated", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "The number of duplicated features, drawn randomly from the informative\nand the redundant features." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_classification/n_classes", + "name": "n_classes", + "qname": "sklearn.datasets._samples_generator.make_classification.n_classes", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "The number of classes (or labels) of the classification problem." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_classification/n_clusters_per_class", + "name": "n_clusters_per_class", + "qname": "sklearn.datasets._samples_generator.make_classification.n_clusters_per_class", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "The number of clusters per class." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_classification/weights", + "name": "weights", + "qname": "sklearn.datasets._samples_generator.make_classification.weights", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_classes,) or (n_classes - 1,)", + "default_value": "None", + "description": "The proportions of samples assigned to each class. If None, then\nclasses are balanced. Note that if ``len(weights) == n_classes - 1``,\nthen the last class weight is automatically inferred.\nMore than ``n_samples`` samples may be returned if the sum of\n``weights`` exceeds 1. Note that the actual class proportions will\nnot exactly match ``weights`` when ``flip_y`` isn't 0." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_classes,) or (n_classes - 1,)" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_classification/flip_y", + "name": "flip_y", + "qname": "sklearn.datasets._samples_generator.make_classification.flip_y", + "default_value": "0.01", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.01", + "description": "The fraction of samples whose class is assigned randomly. Larger\nvalues introduce noise in the labels and make the classification\ntask harder. Note that the default setting flip_y > 0 might lead\nto less than ``n_classes`` in y in some cases." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_classification/class_sep", + "name": "class_sep", + "qname": "sklearn.datasets._samples_generator.make_classification.class_sep", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "The factor multiplying the hypercube size. Larger values spread\nout the clusters/classes and make the classification task easier." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_classification/hypercube", + "name": "hypercube", + "qname": "sklearn.datasets._samples_generator.make_classification.hypercube", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, the clusters are put on the vertices of a hypercube. If\nFalse, the clusters are put on the vertices of a random polytope." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_classification/shift", + "name": "shift", + "qname": "sklearn.datasets._samples_generator.make_classification.shift", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float, ndarray of shape (n_features,) or None", + "default_value": "0.0", + "description": "Shift features by the specified value. If None, then features\nare shifted by a random value drawn in [-class_sep, class_sep]." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "ndarray of shape (n_features,)" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_classification/scale", + "name": "scale", + "qname": "sklearn.datasets._samples_generator.make_classification.scale", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float, ndarray of shape (n_features,) or None", + "default_value": "1.0", + "description": "Multiply features by the specified value. If None, then features\nare scaled by a random value drawn in [1, 100]. Note that scaling\nhappens after shifting." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "ndarray of shape (n_features,)" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_classification/shuffle", + "name": "shuffle", + "qname": "sklearn.datasets._samples_generator.make_classification.shuffle", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Shuffle the samples and the features." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_classification/random_state", + "name": "random_state", + "qname": "sklearn.datasets._samples_generator.make_classification.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for dataset creation. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate a random n-class classification problem.\n\nThis initially creates clusters of points normally distributed (std=1)\nabout vertices of an ``n_informative``-dimensional hypercube with sides of\nlength ``2*class_sep`` and assigns an equal number of clusters to each\nclass. It introduces interdependence between these features and adds\nvarious types of further noise to the data.\n\nWithout shuffling, ``X`` horizontally stacks features in the following\norder: the primary ``n_informative`` features, followed by ``n_redundant``\nlinear combinations of the informative features, followed by ``n_repeated``\nduplicates, drawn randomly with replacement from the informative and\nredundant features. The remaining features are filled with random noise.\nThus, without shuffling, all useful features are contained in the columns\n``X[:, :n_informative + n_redundant + n_repeated]``.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Generate a random n-class classification problem.\n\nThis initially creates clusters of points normally distributed (std=1)\nabout vertices of an ``n_informative``-dimensional hypercube with sides of\nlength ``2*class_sep`` and assigns an equal number of clusters to each\nclass. It introduces interdependence between these features and adds\nvarious types of further noise to the data.\n\nWithout shuffling, ``X`` horizontally stacks features in the following\norder: the primary ``n_informative`` features, followed by ``n_redundant``\nlinear combinations of the informative features, followed by ``n_repeated``\nduplicates, drawn randomly with replacement from the informative and\nredundant features. The remaining features are filled with random noise.\nThus, without shuffling, all useful features are contained in the columns\n``X[:, :n_informative + n_redundant + n_repeated]``.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nn_features : int, default=20\n The total number of features. These comprise ``n_informative``\n informative features, ``n_redundant`` redundant features,\n ``n_repeated`` duplicated features and\n ``n_features-n_informative-n_redundant-n_repeated`` useless features\n drawn at random.\n\nn_informative : int, default=2\n The number of informative features. Each class is composed of a number\n of gaussian clusters each located around the vertices of a hypercube\n in a subspace of dimension ``n_informative``. For each cluster,\n informative features are drawn independently from N(0, 1) and then\n randomly linearly combined within each cluster in order to add\n covariance. The clusters are then placed on the vertices of the\n hypercube.\n\nn_redundant : int, default=2\n The number of redundant features. These features are generated as\n random linear combinations of the informative features.\n\nn_repeated : int, default=0\n The number of duplicated features, drawn randomly from the informative\n and the redundant features.\n\nn_classes : int, default=2\n The number of classes (or labels) of the classification problem.\n\nn_clusters_per_class : int, default=2\n The number of clusters per class.\n\nweights : array-like of shape (n_classes,) or (n_classes - 1,), default=None\n The proportions of samples assigned to each class. If None, then\n classes are balanced. Note that if ``len(weights) == n_classes - 1``,\n then the last class weight is automatically inferred.\n More than ``n_samples`` samples may be returned if the sum of\n ``weights`` exceeds 1. Note that the actual class proportions will\n not exactly match ``weights`` when ``flip_y`` isn't 0.\n\nflip_y : float, default=0.01\n The fraction of samples whose class is assigned randomly. Larger\n values introduce noise in the labels and make the classification\n task harder. Note that the default setting flip_y > 0 might lead\n to less than ``n_classes`` in y in some cases.\n\nclass_sep : float, default=1.0\n The factor multiplying the hypercube size. Larger values spread\n out the clusters/classes and make the classification task easier.\n\nhypercube : bool, default=True\n If True, the clusters are put on the vertices of a hypercube. If\n False, the clusters are put on the vertices of a random polytope.\n\nshift : float, ndarray of shape (n_features,) or None, default=0.0\n Shift features by the specified value. If None, then features\n are shifted by a random value drawn in [-class_sep, class_sep].\n\nscale : float, ndarray of shape (n_features,) or None, default=1.0\n Multiply features by the specified value. If None, then features\n are scaled by a random value drawn in [1, 100]. Note that scaling\n happens after shifting.\n\nshuffle : bool, default=True\n Shuffle the samples and the features.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The generated samples.\n\ny : ndarray of shape (n_samples,)\n The integer labels for class membership of each sample.\n\nNotes\n-----\nThe algorithm is adapted from Guyon [1] and was designed to generate\nthe \"Madelon\" dataset.\n\nReferences\n----------\n.. [1] I. Guyon, \"Design of experiments for the NIPS 2003 variable\n selection benchmark\", 2003.\n\nSee Also\n--------\nmake_blobs : Simplified variant.\nmake_multilabel_classification : Unrelated generator for multilabel tasks.", + "code": "@_deprecate_positional_args\ndef make_classification(n_samples=100, n_features=20, *, n_informative=2,\n n_redundant=2, n_repeated=0, n_classes=2,\n n_clusters_per_class=2, weights=None, flip_y=0.01,\n class_sep=1.0, hypercube=True, shift=0.0, scale=1.0,\n shuffle=True, random_state=None):\n \"\"\"Generate a random n-class classification problem.\n\n This initially creates clusters of points normally distributed (std=1)\n about vertices of an ``n_informative``-dimensional hypercube with sides of\n length ``2*class_sep`` and assigns an equal number of clusters to each\n class. It introduces interdependence between these features and adds\n various types of further noise to the data.\n\n Without shuffling, ``X`` horizontally stacks features in the following\n order: the primary ``n_informative`` features, followed by ``n_redundant``\n linear combinations of the informative features, followed by ``n_repeated``\n duplicates, drawn randomly with replacement from the informative and\n redundant features. The remaining features are filled with random noise.\n Thus, without shuffling, all useful features are contained in the columns\n ``X[:, :n_informative + n_redundant + n_repeated]``.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_samples : int, default=100\n The number of samples.\n\n n_features : int, default=20\n The total number of features. These comprise ``n_informative``\n informative features, ``n_redundant`` redundant features,\n ``n_repeated`` duplicated features and\n ``n_features-n_informative-n_redundant-n_repeated`` useless features\n drawn at random.\n\n n_informative : int, default=2\n The number of informative features. Each class is composed of a number\n of gaussian clusters each located around the vertices of a hypercube\n in a subspace of dimension ``n_informative``. For each cluster,\n informative features are drawn independently from N(0, 1) and then\n randomly linearly combined within each cluster in order to add\n covariance. The clusters are then placed on the vertices of the\n hypercube.\n\n n_redundant : int, default=2\n The number of redundant features. These features are generated as\n random linear combinations of the informative features.\n\n n_repeated : int, default=0\n The number of duplicated features, drawn randomly from the informative\n and the redundant features.\n\n n_classes : int, default=2\n The number of classes (or labels) of the classification problem.\n\n n_clusters_per_class : int, default=2\n The number of clusters per class.\n\n weights : array-like of shape (n_classes,) or (n_classes - 1,),\\\n default=None\n The proportions of samples assigned to each class. If None, then\n classes are balanced. Note that if ``len(weights) == n_classes - 1``,\n then the last class weight is automatically inferred.\n More than ``n_samples`` samples may be returned if the sum of\n ``weights`` exceeds 1. Note that the actual class proportions will\n not exactly match ``weights`` when ``flip_y`` isn't 0.\n\n flip_y : float, default=0.01\n The fraction of samples whose class is assigned randomly. Larger\n values introduce noise in the labels and make the classification\n task harder. Note that the default setting flip_y > 0 might lead\n to less than ``n_classes`` in y in some cases.\n\n class_sep : float, default=1.0\n The factor multiplying the hypercube size. Larger values spread\n out the clusters/classes and make the classification task easier.\n\n hypercube : bool, default=True\n If True, the clusters are put on the vertices of a hypercube. If\n False, the clusters are put on the vertices of a random polytope.\n\n shift : float, ndarray of shape (n_features,) or None, default=0.0\n Shift features by the specified value. If None, then features\n are shifted by a random value drawn in [-class_sep, class_sep].\n\n scale : float, ndarray of shape (n_features,) or None, default=1.0\n Multiply features by the specified value. If None, then features\n are scaled by a random value drawn in [1, 100]. Note that scaling\n happens after shifting.\n\n shuffle : bool, default=True\n Shuffle the samples and the features.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n X : ndarray of shape (n_samples, n_features)\n The generated samples.\n\n y : ndarray of shape (n_samples,)\n The integer labels for class membership of each sample.\n\n Notes\n -----\n The algorithm is adapted from Guyon [1] and was designed to generate\n the \"Madelon\" dataset.\n\n References\n ----------\n .. [1] I. Guyon, \"Design of experiments for the NIPS 2003 variable\n selection benchmark\", 2003.\n\n See Also\n --------\n make_blobs : Simplified variant.\n make_multilabel_classification : Unrelated generator for multilabel tasks.\n \"\"\"\n generator = check_random_state(random_state)\n\n # Count features, clusters and samples\n if n_informative + n_redundant + n_repeated > n_features:\n raise ValueError(\"Number of informative, redundant and repeated \"\n \"features must sum to less than the number of total\"\n \" features\")\n # Use log2 to avoid overflow errors\n if n_informative < np.log2(n_classes * n_clusters_per_class):\n msg = \"n_classes({}) * n_clusters_per_class({}) must be\"\n msg += \" smaller or equal 2**n_informative({})={}\"\n raise ValueError(msg.format(n_classes, n_clusters_per_class,\n n_informative, 2**n_informative))\n\n if weights is not None:\n if len(weights) not in [n_classes, n_classes - 1]:\n raise ValueError(\"Weights specified but incompatible with number \"\n \"of classes.\")\n if len(weights) == n_classes - 1:\n if isinstance(weights, list):\n weights = weights + [1.0 - sum(weights)]\n else:\n weights = np.resize(weights, n_classes)\n weights[-1] = 1.0 - sum(weights[:-1])\n else:\n weights = [1.0 / n_classes] * n_classes\n\n n_useless = n_features - n_informative - n_redundant - n_repeated\n n_clusters = n_classes * n_clusters_per_class\n\n # Distribute samples among clusters by weight\n n_samples_per_cluster = [\n int(n_samples * weights[k % n_classes] / n_clusters_per_class)\n for k in range(n_clusters)]\n\n for i in range(n_samples - sum(n_samples_per_cluster)):\n n_samples_per_cluster[i % n_clusters] += 1\n\n # Initialize X and y\n X = np.zeros((n_samples, n_features))\n y = np.zeros(n_samples, dtype=int)\n\n # Build the polytope whose vertices become cluster centroids\n centroids = _generate_hypercube(n_clusters, n_informative,\n generator).astype(float, copy=False)\n centroids *= 2 * class_sep\n centroids -= class_sep\n if not hypercube:\n centroids *= generator.rand(n_clusters, 1)\n centroids *= generator.rand(1, n_informative)\n\n # Initially draw informative features from the standard normal\n X[:, :n_informative] = generator.randn(n_samples, n_informative)\n\n # Create each cluster; a variant of make_blobs\n stop = 0\n for k, centroid in enumerate(centroids):\n start, stop = stop, stop + n_samples_per_cluster[k]\n y[start:stop] = k % n_classes # assign labels\n X_k = X[start:stop, :n_informative] # slice a view of the cluster\n\n A = 2 * generator.rand(n_informative, n_informative) - 1\n X_k[...] = np.dot(X_k, A) # introduce random covariance\n\n X_k += centroid # shift the cluster to a vertex\n\n # Create redundant features\n if n_redundant > 0:\n B = 2 * generator.rand(n_informative, n_redundant) - 1\n X[:, n_informative:n_informative + n_redundant] = \\\n np.dot(X[:, :n_informative], B)\n\n # Repeat some features\n if n_repeated > 0:\n n = n_informative + n_redundant\n indices = ((n - 1) * generator.rand(n_repeated) + 0.5).astype(np.intp)\n X[:, n:n + n_repeated] = X[:, indices]\n\n # Fill useless features\n if n_useless > 0:\n X[:, -n_useless:] = generator.randn(n_samples, n_useless)\n\n # Randomly replace labels\n if flip_y >= 0.0:\n flip_mask = generator.rand(n_samples) < flip_y\n y[flip_mask] = generator.randint(n_classes, size=flip_mask.sum())\n\n # Randomly shift and scale\n if shift is None:\n shift = (2 * generator.rand(n_features) - 1) * class_sep\n X += shift\n\n if scale is None:\n scale = 1 + 100 * generator.rand(n_features)\n X *= scale\n\n if shuffle:\n # Randomly permute samples\n X, y = util_shuffle(X, y, random_state=generator)\n\n # Randomly permute features\n indices = np.arange(n_features)\n generator.shuffle(indices)\n X[:, :] = X[:, indices]\n\n return X, y" + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_friedman1", + "name": "make_friedman1", + "qname": "sklearn.datasets._samples_generator.make_friedman1", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_friedman1/n_samples", + "name": "n_samples", + "qname": "sklearn.datasets._samples_generator.make_friedman1.n_samples", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The number of samples." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_friedman1/n_features", + "name": "n_features", + "qname": "sklearn.datasets._samples_generator.make_friedman1.n_features", + "default_value": "10", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "The number of features. Should be at least 5." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_friedman1/noise", + "name": "noise", + "qname": "sklearn.datasets._samples_generator.make_friedman1.noise", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "The standard deviation of the gaussian noise applied to the output." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_friedman1/random_state", + "name": "random_state", + "qname": "sklearn.datasets._samples_generator.make_friedman1.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for dataset noise. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate the \"Friedman #1\" regression problem.\n\nThis dataset is described in Friedman [1] and Breiman [2].\n\nInputs `X` are independent features uniformly distributed on the interval\n[0, 1]. The output `y` is created according to the formula::\n\n y(X) = 10 * sin(pi * X[:, 0] * X[:, 1]) + 20 * (X[:, 2] - 0.5) ** 2 + 10 * X[:, 3] + 5 * X[:, 4] + noise * N(0, 1).\n\nOut of the `n_features` features, only 5 are actually used to compute\n`y`. The remaining features are independent of `y`.\n\nThe number of features has to be >= 5.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Generate the \"Friedman #1\" regression problem.\n\nThis dataset is described in Friedman [1] and Breiman [2].\n\nInputs `X` are independent features uniformly distributed on the interval\n[0, 1]. The output `y` is created according to the formula::\n\n y(X) = 10 * sin(pi * X[:, 0] * X[:, 1]) + 20 * (X[:, 2] - 0.5) ** 2 + 10 * X[:, 3] + 5 * X[:, 4] + noise * N(0, 1).\n\nOut of the `n_features` features, only 5 are actually used to compute\n`y`. The remaining features are independent of `y`.\n\nThe number of features has to be >= 5.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nn_features : int, default=10\n The number of features. Should be at least 5.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise applied to the output.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset noise. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The input samples.\n\ny : ndarray of shape (n_samples,)\n The output values.\n\nReferences\n----------\n.. [1] J. Friedman, \"Multivariate adaptive regression splines\", The Annals\n of Statistics 19 (1), pages 1-67, 1991.\n\n.. [2] L. Breiman, \"Bagging predictors\", Machine Learning 24,\n pages 123-140, 1996.", + "code": "@_deprecate_positional_args\ndef make_friedman1(n_samples=100, n_features=10, *, noise=0.0,\n random_state=None):\n \"\"\"Generate the \"Friedman #1\" regression problem.\n\n This dataset is described in Friedman [1] and Breiman [2].\n\n Inputs `X` are independent features uniformly distributed on the interval\n [0, 1]. The output `y` is created according to the formula::\n\n y(X) = 10 * sin(pi * X[:, 0] * X[:, 1]) + 20 * (X[:, 2] - 0.5) ** 2 \\\n+ 10 * X[:, 3] + 5 * X[:, 4] + noise * N(0, 1).\n\n Out of the `n_features` features, only 5 are actually used to compute\n `y`. The remaining features are independent of `y`.\n\n The number of features has to be >= 5.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_samples : int, default=100\n The number of samples.\n\n n_features : int, default=10\n The number of features. Should be at least 5.\n\n noise : float, default=0.0\n The standard deviation of the gaussian noise applied to the output.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset noise. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n X : ndarray of shape (n_samples, n_features)\n The input samples.\n\n y : ndarray of shape (n_samples,)\n The output values.\n\n References\n ----------\n .. [1] J. Friedman, \"Multivariate adaptive regression splines\", The Annals\n of Statistics 19 (1), pages 1-67, 1991.\n\n .. [2] L. Breiman, \"Bagging predictors\", Machine Learning 24,\n pages 123-140, 1996.\n \"\"\"\n if n_features < 5:\n raise ValueError(\"n_features must be at least five.\")\n\n generator = check_random_state(random_state)\n\n X = generator.rand(n_samples, n_features)\n y = 10 * np.sin(np.pi * X[:, 0] * X[:, 1]) + 20 * (X[:, 2] - 0.5) ** 2 \\\n + 10 * X[:, 3] + 5 * X[:, 4] + noise * generator.randn(n_samples)\n\n return X, y" + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_friedman2", + "name": "make_friedman2", + "qname": "sklearn.datasets._samples_generator.make_friedman2", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_friedman2/n_samples", + "name": "n_samples", + "qname": "sklearn.datasets._samples_generator.make_friedman2.n_samples", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The number of samples." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_friedman2/noise", + "name": "noise", + "qname": "sklearn.datasets._samples_generator.make_friedman2.noise", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "The standard deviation of the gaussian noise applied to the output." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_friedman2/random_state", + "name": "random_state", + "qname": "sklearn.datasets._samples_generator.make_friedman2.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for dataset noise. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate the \"Friedman #2\" regression problem.\n\nThis dataset is described in Friedman [1] and Breiman [2].\n\nInputs `X` are 4 independent features uniformly distributed on the\nintervals::\n\n 0 <= X[:, 0] <= 100,\n 40 * pi <= X[:, 1] <= 560 * pi,\n 0 <= X[:, 2] <= 1,\n 1 <= X[:, 3] <= 11.\n\nThe output `y` is created according to the formula::\n\n y(X) = (X[:, 0] ** 2 + (X[:, 1] * X[:, 2] - 1 / (X[:, 1] * X[:, 3])) ** 2) ** 0.5 + noise * N(0, 1).\n\nRead more in the :ref:`User Guide `.", + "docstring": "Generate the \"Friedman #2\" regression problem.\n\nThis dataset is described in Friedman [1] and Breiman [2].\n\nInputs `X` are 4 independent features uniformly distributed on the\nintervals::\n\n 0 <= X[:, 0] <= 100,\n 40 * pi <= X[:, 1] <= 560 * pi,\n 0 <= X[:, 2] <= 1,\n 1 <= X[:, 3] <= 11.\n\nThe output `y` is created according to the formula::\n\n y(X) = (X[:, 0] ** 2 + (X[:, 1] * X[:, 2] - 1 / (X[:, 1] * X[:, 3])) ** 2) ** 0.5 + noise * N(0, 1).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise applied to the output.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset noise. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 4)\n The input samples.\n\ny : ndarray of shape (n_samples,)\n The output values.\n\nReferences\n----------\n.. [1] J. Friedman, \"Multivariate adaptive regression splines\", The Annals\n of Statistics 19 (1), pages 1-67, 1991.\n\n.. [2] L. Breiman, \"Bagging predictors\", Machine Learning 24,\n pages 123-140, 1996.", + "code": "@_deprecate_positional_args\ndef make_friedman2(n_samples=100, *, noise=0.0, random_state=None):\n \"\"\"Generate the \"Friedman #2\" regression problem.\n\n This dataset is described in Friedman [1] and Breiman [2].\n\n Inputs `X` are 4 independent features uniformly distributed on the\n intervals::\n\n 0 <= X[:, 0] <= 100,\n 40 * pi <= X[:, 1] <= 560 * pi,\n 0 <= X[:, 2] <= 1,\n 1 <= X[:, 3] <= 11.\n\n The output `y` is created according to the formula::\n\n y(X) = (X[:, 0] ** 2 + (X[:, 1] * X[:, 2] \\\n - 1 / (X[:, 1] * X[:, 3])) ** 2) ** 0.5 + noise * N(0, 1).\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_samples : int, default=100\n The number of samples.\n\n noise : float, default=0.0\n The standard deviation of the gaussian noise applied to the output.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset noise. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n X : ndarray of shape (n_samples, 4)\n The input samples.\n\n y : ndarray of shape (n_samples,)\n The output values.\n\n References\n ----------\n .. [1] J. Friedman, \"Multivariate adaptive regression splines\", The Annals\n of Statistics 19 (1), pages 1-67, 1991.\n\n .. [2] L. Breiman, \"Bagging predictors\", Machine Learning 24,\n pages 123-140, 1996.\n \"\"\"\n generator = check_random_state(random_state)\n\n X = generator.rand(n_samples, 4)\n X[:, 0] *= 100\n X[:, 1] *= 520 * np.pi\n X[:, 1] += 40 * np.pi\n X[:, 3] *= 10\n X[:, 3] += 1\n\n y = (X[:, 0] ** 2\n + (X[:, 1] * X[:, 2] - 1 / (X[:, 1] * X[:, 3])) ** 2) ** 0.5 \\\n + noise * generator.randn(n_samples)\n\n return X, y" + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_friedman3", + "name": "make_friedman3", + "qname": "sklearn.datasets._samples_generator.make_friedman3", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_friedman3/n_samples", + "name": "n_samples", + "qname": "sklearn.datasets._samples_generator.make_friedman3.n_samples", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The number of samples." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_friedman3/noise", + "name": "noise", + "qname": "sklearn.datasets._samples_generator.make_friedman3.noise", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "The standard deviation of the gaussian noise applied to the output." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_friedman3/random_state", + "name": "random_state", + "qname": "sklearn.datasets._samples_generator.make_friedman3.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for dataset noise. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate the \"Friedman #3\" regression problem.\n\nThis dataset is described in Friedman [1] and Breiman [2].\n\nInputs `X` are 4 independent features uniformly distributed on the\nintervals::\n\n 0 <= X[:, 0] <= 100,\n 40 * pi <= X[:, 1] <= 560 * pi,\n 0 <= X[:, 2] <= 1,\n 1 <= X[:, 3] <= 11.\n\nThe output `y` is created according to the formula::\n\n y(X) = arctan((X[:, 1] * X[:, 2] - 1 / (X[:, 1] * X[:, 3])) / X[:, 0]) + noise * N(0, 1).\n\nRead more in the :ref:`User Guide `.", + "docstring": "Generate the \"Friedman #3\" regression problem.\n\nThis dataset is described in Friedman [1] and Breiman [2].\n\nInputs `X` are 4 independent features uniformly distributed on the\nintervals::\n\n 0 <= X[:, 0] <= 100,\n 40 * pi <= X[:, 1] <= 560 * pi,\n 0 <= X[:, 2] <= 1,\n 1 <= X[:, 3] <= 11.\n\nThe output `y` is created according to the formula::\n\n y(X) = arctan((X[:, 1] * X[:, 2] - 1 / (X[:, 1] * X[:, 3])) / X[:, 0]) + noise * N(0, 1).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise applied to the output.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset noise. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 4)\n The input samples.\n\ny : ndarray of shape (n_samples,)\n The output values.\n\nReferences\n----------\n.. [1] J. Friedman, \"Multivariate adaptive regression splines\", The Annals\n of Statistics 19 (1), pages 1-67, 1991.\n\n.. [2] L. Breiman, \"Bagging predictors\", Machine Learning 24,\n pages 123-140, 1996.", + "code": "@_deprecate_positional_args\ndef make_friedman3(n_samples=100, *, noise=0.0, random_state=None):\n \"\"\"Generate the \"Friedman #3\" regression problem.\n\n This dataset is described in Friedman [1] and Breiman [2].\n\n Inputs `X` are 4 independent features uniformly distributed on the\n intervals::\n\n 0 <= X[:, 0] <= 100,\n 40 * pi <= X[:, 1] <= 560 * pi,\n 0 <= X[:, 2] <= 1,\n 1 <= X[:, 3] <= 11.\n\n The output `y` is created according to the formula::\n\n y(X) = arctan((X[:, 1] * X[:, 2] - 1 / (X[:, 1] * X[:, 3])) \\\n/ X[:, 0]) + noise * N(0, 1).\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_samples : int, default=100\n The number of samples.\n\n noise : float, default=0.0\n The standard deviation of the gaussian noise applied to the output.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset noise. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n X : ndarray of shape (n_samples, 4)\n The input samples.\n\n y : ndarray of shape (n_samples,)\n The output values.\n\n References\n ----------\n .. [1] J. Friedman, \"Multivariate adaptive regression splines\", The Annals\n of Statistics 19 (1), pages 1-67, 1991.\n\n .. [2] L. Breiman, \"Bagging predictors\", Machine Learning 24,\n pages 123-140, 1996.\n \"\"\"\n generator = check_random_state(random_state)\n\n X = generator.rand(n_samples, 4)\n X[:, 0] *= 100\n X[:, 1] *= 520 * np.pi\n X[:, 1] += 40 * np.pi\n X[:, 3] *= 10\n X[:, 3] += 1\n\n y = np.arctan((X[:, 1] * X[:, 2] - 1 / (X[:, 1] * X[:, 3])) / X[:, 0]) \\\n + noise * generator.randn(n_samples)\n\n return X, y" + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_gaussian_quantiles", + "name": "make_gaussian_quantiles", + "qname": "sklearn.datasets._samples_generator.make_gaussian_quantiles", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_gaussian_quantiles/mean", + "name": "mean", + "qname": "sklearn.datasets._samples_generator.make_gaussian_quantiles.mean", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features,)", + "default_value": "None", + "description": "The mean of the multi-dimensional normal distribution.\nIf None then use the origin (0, 0, ...)." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features,)" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_gaussian_quantiles/cov", + "name": "cov", + "qname": "sklearn.datasets._samples_generator.make_gaussian_quantiles.cov", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "The covariance matrix will be this value times the unit matrix. This\ndataset only produces symmetric normal distributions." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_gaussian_quantiles/n_samples", + "name": "n_samples", + "qname": "sklearn.datasets._samples_generator.make_gaussian_quantiles.n_samples", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The total number of points equally divided among classes." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_gaussian_quantiles/n_features", + "name": "n_features", + "qname": "sklearn.datasets._samples_generator.make_gaussian_quantiles.n_features", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "The number of features for each sample." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_gaussian_quantiles/n_classes", + "name": "n_classes", + "qname": "sklearn.datasets._samples_generator.make_gaussian_quantiles.n_classes", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "3", + "description": "The number of classes" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_gaussian_quantiles/shuffle", + "name": "shuffle", + "qname": "sklearn.datasets._samples_generator.make_gaussian_quantiles.shuffle", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Shuffle the samples." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_gaussian_quantiles/random_state", + "name": "random_state", + "qname": "sklearn.datasets._samples_generator.make_gaussian_quantiles.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for dataset creation. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate isotropic Gaussian and label samples by quantile.\n\nThis classification dataset is constructed by taking a multi-dimensional\nstandard normal distribution and defining classes separated by nested\nconcentric multi-dimensional spheres such that roughly equal numbers of\nsamples are in each class (quantiles of the :math:`\\chi^2` distribution).\n\nRead more in the :ref:`User Guide `.", + "docstring": "Generate isotropic Gaussian and label samples by quantile.\n\nThis classification dataset is constructed by taking a multi-dimensional\nstandard normal distribution and defining classes separated by nested\nconcentric multi-dimensional spheres such that roughly equal numbers of\nsamples are in each class (quantiles of the :math:`\\chi^2` distribution).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nmean : ndarray of shape (n_features,), default=None\n The mean of the multi-dimensional normal distribution.\n If None then use the origin (0, 0, ...).\n\ncov : float, default=1.0\n The covariance matrix will be this value times the unit matrix. This\n dataset only produces symmetric normal distributions.\n\nn_samples : int, default=100\n The total number of points equally divided among classes.\n\nn_features : int, default=2\n The number of features for each sample.\n\nn_classes : int, default=3\n The number of classes\n\nshuffle : bool, default=True\n Shuffle the samples.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The generated samples.\n\ny : ndarray of shape (n_samples,)\n The integer labels for quantile membership of each sample.\n\nNotes\n-----\nThe dataset is from Zhu et al [1].\n\nReferences\n----------\n.. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\", 2009.", + "code": "@_deprecate_positional_args\ndef make_gaussian_quantiles(*, mean=None, cov=1., n_samples=100,\n n_features=2, n_classes=3,\n shuffle=True, random_state=None):\n r\"\"\"Generate isotropic Gaussian and label samples by quantile.\n\n This classification dataset is constructed by taking a multi-dimensional\n standard normal distribution and defining classes separated by nested\n concentric multi-dimensional spheres such that roughly equal numbers of\n samples are in each class (quantiles of the :math:`\\chi^2` distribution).\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n mean : ndarray of shape (n_features,), default=None\n The mean of the multi-dimensional normal distribution.\n If None then use the origin (0, 0, ...).\n\n cov : float, default=1.0\n The covariance matrix will be this value times the unit matrix. This\n dataset only produces symmetric normal distributions.\n\n n_samples : int, default=100\n The total number of points equally divided among classes.\n\n n_features : int, default=2\n The number of features for each sample.\n\n n_classes : int, default=3\n The number of classes\n\n shuffle : bool, default=True\n Shuffle the samples.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n X : ndarray of shape (n_samples, n_features)\n The generated samples.\n\n y : ndarray of shape (n_samples,)\n The integer labels for quantile membership of each sample.\n\n Notes\n -----\n The dataset is from Zhu et al [1].\n\n References\n ----------\n .. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\", 2009.\n\n \"\"\"\n if n_samples < n_classes:\n raise ValueError(\"n_samples must be at least n_classes\")\n\n generator = check_random_state(random_state)\n\n if mean is None:\n mean = np.zeros(n_features)\n else:\n mean = np.array(mean)\n\n # Build multivariate normal distribution\n X = generator.multivariate_normal(mean, cov * np.identity(n_features),\n (n_samples,))\n\n # Sort by distance from origin\n idx = np.argsort(np.sum((X - mean[np.newaxis, :]) ** 2, axis=1))\n X = X[idx, :]\n\n # Label by quantile\n step = n_samples // n_classes\n\n y = np.hstack([np.repeat(np.arange(n_classes), step),\n np.repeat(n_classes - 1, n_samples - step * n_classes)])\n\n if shuffle:\n X, y = util_shuffle(X, y, random_state=generator)\n\n return X, y" + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_hastie_10_2", + "name": "make_hastie_10_2", + "qname": "sklearn.datasets._samples_generator.make_hastie_10_2", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_hastie_10_2/n_samples", + "name": "n_samples", + "qname": "sklearn.datasets._samples_generator.make_hastie_10_2.n_samples", + "default_value": "12000", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "12000", + "description": "The number of samples." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_hastie_10_2/random_state", + "name": "random_state", + "qname": "sklearn.datasets._samples_generator.make_hastie_10_2.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for dataset creation. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generates data for binary classification used in\nHastie et al. 2009, Example 10.2.\n\nThe ten features are standard independent Gaussian and\nthe target ``y`` is defined by::\n\n y[i] = 1 if np.sum(X[i] ** 2) > 9.34 else -1\n\nRead more in the :ref:`User Guide `.", + "docstring": "Generates data for binary classification used in\nHastie et al. 2009, Example 10.2.\n\nThe ten features are standard independent Gaussian and\nthe target ``y`` is defined by::\n\n y[i] = 1 if np.sum(X[i] ** 2) > 9.34 else -1\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=12000\n The number of samples.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 10)\n The input samples.\n\ny : ndarray of shape (n_samples,)\n The output values.\n\nReferences\n----------\n.. [1] T. Hastie, R. Tibshirani and J. Friedman, \"Elements of Statistical\n Learning Ed. 2\", Springer, 2009.\n\nSee Also\n--------\nmake_gaussian_quantiles : A generalization of this dataset approach.", + "code": "@_deprecate_positional_args\ndef make_hastie_10_2(n_samples=12000, *, random_state=None):\n \"\"\"Generates data for binary classification used in\n Hastie et al. 2009, Example 10.2.\n\n The ten features are standard independent Gaussian and\n the target ``y`` is defined by::\n\n y[i] = 1 if np.sum(X[i] ** 2) > 9.34 else -1\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_samples : int, default=12000\n The number of samples.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n X : ndarray of shape (n_samples, 10)\n The input samples.\n\n y : ndarray of shape (n_samples,)\n The output values.\n\n References\n ----------\n .. [1] T. Hastie, R. Tibshirani and J. Friedman, \"Elements of Statistical\n Learning Ed. 2\", Springer, 2009.\n\n See Also\n --------\n make_gaussian_quantiles : A generalization of this dataset approach.\n \"\"\"\n rs = check_random_state(random_state)\n\n shape = (n_samples, 10)\n X = rs.normal(size=shape).reshape(shape)\n y = ((X ** 2.0).sum(axis=1) > 9.34).astype(np.float64, copy=False)\n y[y == 0.0] = -1.0\n\n return X, y" + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_low_rank_matrix", + "name": "make_low_rank_matrix", + "qname": "sklearn.datasets._samples_generator.make_low_rank_matrix", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_low_rank_matrix/n_samples", + "name": "n_samples", + "qname": "sklearn.datasets._samples_generator.make_low_rank_matrix.n_samples", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The number of samples." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_low_rank_matrix/n_features", + "name": "n_features", + "qname": "sklearn.datasets._samples_generator.make_low_rank_matrix.n_features", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The number of features." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_low_rank_matrix/effective_rank", + "name": "effective_rank", + "qname": "sklearn.datasets._samples_generator.make_low_rank_matrix.effective_rank", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "The approximate number of singular vectors required to explain most of\nthe data by linear combinations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_low_rank_matrix/tail_strength", + "name": "tail_strength", + "qname": "sklearn.datasets._samples_generator.make_low_rank_matrix.tail_strength", + "default_value": "0.5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.5", + "description": "The relative importance of the fat noisy tail of the singular values\nprofile. The value should be between 0 and 1." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_low_rank_matrix/random_state", + "name": "random_state", + "qname": "sklearn.datasets._samples_generator.make_low_rank_matrix.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for dataset creation. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate a mostly low rank matrix with bell-shaped singular values.\n\nMost of the variance can be explained by a bell-shaped curve of width\neffective_rank: the low rank part of the singular values profile is::\n\n (1 - tail_strength) * exp(-1.0 * (i / effective_rank) ** 2)\n\nThe remaining singular values' tail is fat, decreasing as::\n\n tail_strength * exp(-0.1 * i / effective_rank).\n\nThe low rank part of the profile can be considered the structured\nsignal part of the data while the tail can be considered the noisy\npart of the data that cannot be summarized by a low number of linear\ncomponents (singular vectors).\n\nThis kind of singular profiles is often seen in practice, for instance:\n - gray level pictures of faces\n - TF-IDF vectors of text documents crawled from the web\n\nRead more in the :ref:`User Guide `.", + "docstring": "Generate a mostly low rank matrix with bell-shaped singular values.\n\nMost of the variance can be explained by a bell-shaped curve of width\neffective_rank: the low rank part of the singular values profile is::\n\n (1 - tail_strength) * exp(-1.0 * (i / effective_rank) ** 2)\n\nThe remaining singular values' tail is fat, decreasing as::\n\n tail_strength * exp(-0.1 * i / effective_rank).\n\nThe low rank part of the profile can be considered the structured\nsignal part of the data while the tail can be considered the noisy\npart of the data that cannot be summarized by a low number of linear\ncomponents (singular vectors).\n\nThis kind of singular profiles is often seen in practice, for instance:\n - gray level pictures of faces\n - TF-IDF vectors of text documents crawled from the web\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nn_features : int, default=100\n The number of features.\n\neffective_rank : int, default=10\n The approximate number of singular vectors required to explain most of\n the data by linear combinations.\n\ntail_strength : float, default=0.5\n The relative importance of the fat noisy tail of the singular values\n profile. The value should be between 0 and 1.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The matrix.", + "code": "@_deprecate_positional_args\ndef make_low_rank_matrix(n_samples=100, n_features=100, *, effective_rank=10,\n tail_strength=0.5, random_state=None):\n \"\"\"Generate a mostly low rank matrix with bell-shaped singular values.\n\n Most of the variance can be explained by a bell-shaped curve of width\n effective_rank: the low rank part of the singular values profile is::\n\n (1 - tail_strength) * exp(-1.0 * (i / effective_rank) ** 2)\n\n The remaining singular values' tail is fat, decreasing as::\n\n tail_strength * exp(-0.1 * i / effective_rank).\n\n The low rank part of the profile can be considered the structured\n signal part of the data while the tail can be considered the noisy\n part of the data that cannot be summarized by a low number of linear\n components (singular vectors).\n\n This kind of singular profiles is often seen in practice, for instance:\n - gray level pictures of faces\n - TF-IDF vectors of text documents crawled from the web\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_samples : int, default=100\n The number of samples.\n\n n_features : int, default=100\n The number of features.\n\n effective_rank : int, default=10\n The approximate number of singular vectors required to explain most of\n the data by linear combinations.\n\n tail_strength : float, default=0.5\n The relative importance of the fat noisy tail of the singular values\n profile. The value should be between 0 and 1.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n X : ndarray of shape (n_samples, n_features)\n The matrix.\n \"\"\"\n generator = check_random_state(random_state)\n n = min(n_samples, n_features)\n\n # Random (ortho normal) vectors\n u, _ = linalg.qr(generator.randn(n_samples, n), mode='economic',\n check_finite=False)\n v, _ = linalg.qr(generator.randn(n_features, n), mode='economic',\n check_finite=False)\n\n # Index of the singular values\n singular_ind = np.arange(n, dtype=np.float64)\n\n # Build the singular profile by assembling signal and noise components\n low_rank = ((1 - tail_strength) *\n np.exp(-1.0 * (singular_ind / effective_rank) ** 2))\n tail = tail_strength * np.exp(-0.1 * singular_ind / effective_rank)\n s = np.identity(n) * (low_rank + tail)\n\n return np.dot(np.dot(u, s), v.T)" + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_moons", + "name": "make_moons", + "qname": "sklearn.datasets._samples_generator.make_moons", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_moons/n_samples", + "name": "n_samples", + "qname": "sklearn.datasets._samples_generator.make_moons.n_samples", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or tuple of shape (2,), dtype=int", + "default_value": "100", + "description": "If int, the total number of points generated.\nIf two-element tuple, number of points in each of two moons.\n\n.. versionchanged:: 0.23\n Added two-element tuple." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "tuple of shape (2,)" + }, + { + "kind": "NamedType", + "name": "dtype=int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_moons/shuffle", + "name": "shuffle", + "qname": "sklearn.datasets._samples_generator.make_moons.shuffle", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to shuffle the samples." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_moons/noise", + "name": "noise", + "qname": "sklearn.datasets._samples_generator.make_moons.noise", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Standard deviation of Gaussian noise added to the data." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_moons/random_state", + "name": "random_state", + "qname": "sklearn.datasets._samples_generator.make_moons.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for dataset shuffling and noise.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Make two interleaving half circles.\n\nA simple toy dataset to visualize clustering and classification\nalgorithms. Read more in the :ref:`User Guide `.", + "docstring": "Make two interleaving half circles.\n\nA simple toy dataset to visualize clustering and classification\nalgorithms. Read more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int or tuple of shape (2,), dtype=int, default=100\n If int, the total number of points generated.\n If two-element tuple, number of points in each of two moons.\n\n .. versionchanged:: 0.23\n Added two-element tuple.\n\nshuffle : bool, default=True\n Whether to shuffle the samples.\n\nnoise : float, default=None\n Standard deviation of Gaussian noise added to the data.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling and noise.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 2)\n The generated samples.\n\ny : ndarray of shape (n_samples,)\n The integer labels (0 or 1) for class membership of each sample.", + "code": "@_deprecate_positional_args\ndef make_moons(n_samples=100, *, shuffle=True, noise=None, random_state=None):\n \"\"\"Make two interleaving half circles.\n\n A simple toy dataset to visualize clustering and classification\n algorithms. Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_samples : int or tuple of shape (2,), dtype=int, default=100\n If int, the total number of points generated.\n If two-element tuple, number of points in each of two moons.\n\n .. versionchanged:: 0.23\n Added two-element tuple.\n\n shuffle : bool, default=True\n Whether to shuffle the samples.\n\n noise : float, default=None\n Standard deviation of Gaussian noise added to the data.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling and noise.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n X : ndarray of shape (n_samples, 2)\n The generated samples.\n\n y : ndarray of shape (n_samples,)\n The integer labels (0 or 1) for class membership of each sample.\n \"\"\"\n\n if isinstance(n_samples, numbers.Integral):\n n_samples_out = n_samples // 2\n n_samples_in = n_samples - n_samples_out\n else:\n try:\n n_samples_out, n_samples_in = n_samples\n except ValueError as e:\n raise ValueError('`n_samples` can be either an int or '\n 'a two-element tuple.') from e\n\n generator = check_random_state(random_state)\n\n outer_circ_x = np.cos(np.linspace(0, np.pi, n_samples_out))\n outer_circ_y = np.sin(np.linspace(0, np.pi, n_samples_out))\n inner_circ_x = 1 - np.cos(np.linspace(0, np.pi, n_samples_in))\n inner_circ_y = 1 - np.sin(np.linspace(0, np.pi, n_samples_in)) - .5\n\n X = np.vstack([np.append(outer_circ_x, inner_circ_x),\n np.append(outer_circ_y, inner_circ_y)]).T\n y = np.hstack([np.zeros(n_samples_out, dtype=np.intp),\n np.ones(n_samples_in, dtype=np.intp)])\n\n if shuffle:\n X, y = util_shuffle(X, y, random_state=generator)\n\n if noise is not None:\n X += generator.normal(scale=noise, size=X.shape)\n\n return X, y" + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_multilabel_classification", + "name": "make_multilabel_classification", + "qname": "sklearn.datasets._samples_generator.make_multilabel_classification", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_multilabel_classification/n_samples", + "name": "n_samples", + "qname": "sklearn.datasets._samples_generator.make_multilabel_classification.n_samples", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The number of samples." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_multilabel_classification/n_features", + "name": "n_features", + "qname": "sklearn.datasets._samples_generator.make_multilabel_classification.n_features", + "default_value": "20", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "20", + "description": "The total number of features." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_multilabel_classification/n_classes", + "name": "n_classes", + "qname": "sklearn.datasets._samples_generator.make_multilabel_classification.n_classes", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "The number of classes of the classification problem." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_multilabel_classification/n_labels", + "name": "n_labels", + "qname": "sklearn.datasets._samples_generator.make_multilabel_classification.n_labels", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "The average number of labels per instance. More precisely, the number\nof labels per sample is drawn from a Poisson distribution with\n``n_labels`` as its expected value, but samples are bounded (using\nrejection sampling) by ``n_classes``, and must be nonzero if\n``allow_unlabeled`` is False." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_multilabel_classification/length", + "name": "length", + "qname": "sklearn.datasets._samples_generator.make_multilabel_classification.length", + "default_value": "50", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "50", + "description": "The sum of the features (number of words if documents) is drawn from\na Poisson distribution with this expected value." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_multilabel_classification/allow_unlabeled", + "name": "allow_unlabeled", + "qname": "sklearn.datasets._samples_generator.make_multilabel_classification.allow_unlabeled", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``True``, some instances might not belong to any class." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_multilabel_classification/sparse", + "name": "sparse", + "qname": "sklearn.datasets._samples_generator.make_multilabel_classification.sparse", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If ``True``, return a sparse feature matrix\n\n.. versionadded:: 0.17\n parameter to allow *sparse* output." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_multilabel_classification/return_indicator", + "name": "return_indicator", + "qname": "sklearn.datasets._samples_generator.make_multilabel_classification.return_indicator", + "default_value": "'dense'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'dense', 'sparse'} or False", + "default_value": "'dense'", + "description": "If ``'dense'`` return ``Y`` in the dense binary indicator format. If\n``'sparse'`` return ``Y`` in the sparse binary indicator format.\n``False`` returns a list of lists of labels." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["sparse", "dense"] + }, + { + "kind": "NamedType", + "name": "False" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_multilabel_classification/return_distributions", + "name": "return_distributions", + "qname": "sklearn.datasets._samples_generator.make_multilabel_classification.return_distributions", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If ``True``, return the prior class probability and conditional\nprobabilities of features given classes, from which the data was\ndrawn." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_multilabel_classification/random_state", + "name": "random_state", + "qname": "sklearn.datasets._samples_generator.make_multilabel_classification.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for dataset creation. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate a random multilabel classification problem.\n\nFor each sample, the generative process is:\n - pick the number of labels: n ~ Poisson(n_labels)\n - n times, choose a class c: c ~ Multinomial(theta)\n - pick the document length: k ~ Poisson(length)\n - k times, choose a word: w ~ Multinomial(theta_c)\n\nIn the above process, rejection sampling is used to make sure that\nn is never zero or more than `n_classes`, and that the document length\nis never zero. Likewise, we reject classes which have already been chosen.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Generate a random multilabel classification problem.\n\nFor each sample, the generative process is:\n - pick the number of labels: n ~ Poisson(n_labels)\n - n times, choose a class c: c ~ Multinomial(theta)\n - pick the document length: k ~ Poisson(length)\n - k times, choose a word: w ~ Multinomial(theta_c)\n\nIn the above process, rejection sampling is used to make sure that\nn is never zero or more than `n_classes`, and that the document length\nis never zero. Likewise, we reject classes which have already been chosen.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nn_features : int, default=20\n The total number of features.\n\nn_classes : int, default=5\n The number of classes of the classification problem.\n\nn_labels : int, default=2\n The average number of labels per instance. More precisely, the number\n of labels per sample is drawn from a Poisson distribution with\n ``n_labels`` as its expected value, but samples are bounded (using\n rejection sampling) by ``n_classes``, and must be nonzero if\n ``allow_unlabeled`` is False.\n\nlength : int, default=50\n The sum of the features (number of words if documents) is drawn from\n a Poisson distribution with this expected value.\n\nallow_unlabeled : bool, default=True\n If ``True``, some instances might not belong to any class.\n\nsparse : bool, default=False\n If ``True``, return a sparse feature matrix\n\n .. versionadded:: 0.17\n parameter to allow *sparse* output.\n\nreturn_indicator : {'dense', 'sparse'} or False, default='dense'\n If ``'dense'`` return ``Y`` in the dense binary indicator format. If\n ``'sparse'`` return ``Y`` in the sparse binary indicator format.\n ``False`` returns a list of lists of labels.\n\nreturn_distributions : bool, default=False\n If ``True``, return the prior class probability and conditional\n probabilities of features given classes, from which the data was\n drawn.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The generated samples.\n\nY : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n The label sets. Sparse matrix should be of CSR format.\n\np_c : ndarray of shape (n_classes,)\n The probability of each class being drawn. Only returned if\n ``return_distributions=True``.\n\np_w_c : ndarray of shape (n_features, n_classes)\n The probability of each feature being drawn given each class.\n Only returned if ``return_distributions=True``.", + "code": "@_deprecate_positional_args\ndef make_multilabel_classification(n_samples=100, n_features=20, *,\n n_classes=5,\n n_labels=2, length=50, allow_unlabeled=True,\n sparse=False, return_indicator='dense',\n return_distributions=False,\n random_state=None):\n \"\"\"Generate a random multilabel classification problem.\n\n For each sample, the generative process is:\n - pick the number of labels: n ~ Poisson(n_labels)\n - n times, choose a class c: c ~ Multinomial(theta)\n - pick the document length: k ~ Poisson(length)\n - k times, choose a word: w ~ Multinomial(theta_c)\n\n In the above process, rejection sampling is used to make sure that\n n is never zero or more than `n_classes`, and that the document length\n is never zero. Likewise, we reject classes which have already been chosen.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_samples : int, default=100\n The number of samples.\n\n n_features : int, default=20\n The total number of features.\n\n n_classes : int, default=5\n The number of classes of the classification problem.\n\n n_labels : int, default=2\n The average number of labels per instance. More precisely, the number\n of labels per sample is drawn from a Poisson distribution with\n ``n_labels`` as its expected value, but samples are bounded (using\n rejection sampling) by ``n_classes``, and must be nonzero if\n ``allow_unlabeled`` is False.\n\n length : int, default=50\n The sum of the features (number of words if documents) is drawn from\n a Poisson distribution with this expected value.\n\n allow_unlabeled : bool, default=True\n If ``True``, some instances might not belong to any class.\n\n sparse : bool, default=False\n If ``True``, return a sparse feature matrix\n\n .. versionadded:: 0.17\n parameter to allow *sparse* output.\n\n return_indicator : {'dense', 'sparse'} or False, default='dense'\n If ``'dense'`` return ``Y`` in the dense binary indicator format. If\n ``'sparse'`` return ``Y`` in the sparse binary indicator format.\n ``False`` returns a list of lists of labels.\n\n return_distributions : bool, default=False\n If ``True``, return the prior class probability and conditional\n probabilities of features given classes, from which the data was\n drawn.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n X : ndarray of shape (n_samples, n_features)\n The generated samples.\n\n Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n The label sets. Sparse matrix should be of CSR format.\n\n p_c : ndarray of shape (n_classes,)\n The probability of each class being drawn. Only returned if\n ``return_distributions=True``.\n\n p_w_c : ndarray of shape (n_features, n_classes)\n The probability of each feature being drawn given each class.\n Only returned if ``return_distributions=True``.\n\n \"\"\"\n if n_classes < 1:\n raise ValueError(\n \"'n_classes' should be an integer greater than 0. Got {} instead.\"\n .format(n_classes)\n )\n if length < 1:\n raise ValueError(\n \"'length' should be an integer greater than 0. Got {} instead.\"\n .format(length)\n )\n\n generator = check_random_state(random_state)\n p_c = generator.rand(n_classes)\n p_c /= p_c.sum()\n cumulative_p_c = np.cumsum(p_c)\n p_w_c = generator.rand(n_features, n_classes)\n p_w_c /= np.sum(p_w_c, axis=0)\n\n def sample_example():\n _, n_classes = p_w_c.shape\n\n # pick a nonzero number of labels per document by rejection sampling\n y_size = n_classes + 1\n while (not allow_unlabeled and y_size == 0) or y_size > n_classes:\n y_size = generator.poisson(n_labels)\n\n # pick n classes\n y = set()\n while len(y) != y_size:\n # pick a class with probability P(c)\n c = np.searchsorted(cumulative_p_c,\n generator.rand(y_size - len(y)))\n y.update(c)\n y = list(y)\n\n # pick a non-zero document length by rejection sampling\n n_words = 0\n while n_words == 0:\n n_words = generator.poisson(length)\n\n # generate a document of length n_words\n if len(y) == 0:\n # if sample does not belong to any class, generate noise word\n words = generator.randint(n_features, size=n_words)\n return words, y\n\n # sample words with replacement from selected classes\n cumulative_p_w_sample = p_w_c.take(y, axis=1).sum(axis=1).cumsum()\n cumulative_p_w_sample /= cumulative_p_w_sample[-1]\n words = np.searchsorted(cumulative_p_w_sample, generator.rand(n_words))\n return words, y\n\n X_indices = array.array('i')\n X_indptr = array.array('i', [0])\n Y = []\n for i in range(n_samples):\n words, y = sample_example()\n X_indices.extend(words)\n X_indptr.append(len(X_indices))\n Y.append(y)\n X_data = np.ones(len(X_indices), dtype=np.float64)\n X = sp.csr_matrix((X_data, X_indices, X_indptr),\n shape=(n_samples, n_features))\n X.sum_duplicates()\n if not sparse:\n X = X.toarray()\n\n # return_indicator can be True due to backward compatibility\n if return_indicator in (True, 'sparse', 'dense'):\n lb = MultiLabelBinarizer(sparse_output=(return_indicator == 'sparse'))\n Y = lb.fit([range(n_classes)]).transform(Y)\n elif return_indicator is not False:\n raise ValueError(\"return_indicator must be either 'sparse', 'dense' \"\n 'or False.')\n if return_distributions:\n return X, Y, p_c, p_w_c\n return X, Y" + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_regression", + "name": "make_regression", + "qname": "sklearn.datasets._samples_generator.make_regression", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_regression/n_samples", + "name": "n_samples", + "qname": "sklearn.datasets._samples_generator.make_regression.n_samples", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The number of samples." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_regression/n_features", + "name": "n_features", + "qname": "sklearn.datasets._samples_generator.make_regression.n_features", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The number of features." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_regression/n_informative", + "name": "n_informative", + "qname": "sklearn.datasets._samples_generator.make_regression.n_informative", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "The number of informative features, i.e., the number of features used\nto build the linear model used to generate the output." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_regression/n_targets", + "name": "n_targets", + "qname": "sklearn.datasets._samples_generator.make_regression.n_targets", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "The number of regression targets, i.e., the dimension of the y output\nvector associated with a sample. By default, the output is a scalar." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_regression/bias", + "name": "bias", + "qname": "sklearn.datasets._samples_generator.make_regression.bias", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "The bias term in the underlying linear model." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_regression/effective_rank", + "name": "effective_rank", + "qname": "sklearn.datasets._samples_generator.make_regression.effective_rank", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "if not None:\n The approximate number of singular vectors required to explain most\n of the input data by linear combinations. Using this kind of\n singular spectrum in the input allows the generator to reproduce\n the correlations often observed in practice.\nif None:\n The input set is well conditioned, centered and gaussian with\n unit variance." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_regression/tail_strength", + "name": "tail_strength", + "qname": "sklearn.datasets._samples_generator.make_regression.tail_strength", + "default_value": "0.5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.5", + "description": "The relative importance of the fat noisy tail of the singular values\nprofile if `effective_rank` is not None. When a float, it should be\nbetween 0 and 1." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_regression/noise", + "name": "noise", + "qname": "sklearn.datasets._samples_generator.make_regression.noise", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "The standard deviation of the gaussian noise applied to the output." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_regression/shuffle", + "name": "shuffle", + "qname": "sklearn.datasets._samples_generator.make_regression.shuffle", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Shuffle the samples and the features." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_regression/coef", + "name": "coef", + "qname": "sklearn.datasets._samples_generator.make_regression.coef", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the coefficients of the underlying linear model are returned." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_regression/random_state", + "name": "random_state", + "qname": "sklearn.datasets._samples_generator.make_regression.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for dataset creation. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate a random regression problem.\n\nThe input set can either be well conditioned (by default) or have a low\nrank-fat tail singular profile. See :func:`make_low_rank_matrix` for\nmore details.\n\nThe output is generated by applying a (potentially biased) random linear\nregression model with `n_informative` nonzero regressors to the previously\ngenerated input and some gaussian centered noise with some adjustable\nscale.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Generate a random regression problem.\n\nThe input set can either be well conditioned (by default) or have a low\nrank-fat tail singular profile. See :func:`make_low_rank_matrix` for\nmore details.\n\nThe output is generated by applying a (potentially biased) random linear\nregression model with `n_informative` nonzero regressors to the previously\ngenerated input and some gaussian centered noise with some adjustable\nscale.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nn_features : int, default=100\n The number of features.\n\nn_informative : int, default=10\n The number of informative features, i.e., the number of features used\n to build the linear model used to generate the output.\n\nn_targets : int, default=1\n The number of regression targets, i.e., the dimension of the y output\n vector associated with a sample. By default, the output is a scalar.\n\nbias : float, default=0.0\n The bias term in the underlying linear model.\n\neffective_rank : int, default=None\n if not None:\n The approximate number of singular vectors required to explain most\n of the input data by linear combinations. Using this kind of\n singular spectrum in the input allows the generator to reproduce\n the correlations often observed in practice.\n if None:\n The input set is well conditioned, centered and gaussian with\n unit variance.\n\ntail_strength : float, default=0.5\n The relative importance of the fat noisy tail of the singular values\n profile if `effective_rank` is not None. When a float, it should be\n between 0 and 1.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise applied to the output.\n\nshuffle : bool, default=True\n Shuffle the samples and the features.\n\ncoef : bool, default=False\n If True, the coefficients of the underlying linear model are returned.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The input samples.\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n The output values.\n\ncoef : ndarray of shape (n_features,) or (n_features, n_targets)\n The coefficient of the underlying linear model. It is returned only if\n coef is True.", + "code": "@_deprecate_positional_args\ndef make_regression(n_samples=100, n_features=100, *, n_informative=10,\n n_targets=1, bias=0.0, effective_rank=None,\n tail_strength=0.5, noise=0.0, shuffle=True, coef=False,\n random_state=None):\n \"\"\"Generate a random regression problem.\n\n The input set can either be well conditioned (by default) or have a low\n rank-fat tail singular profile. See :func:`make_low_rank_matrix` for\n more details.\n\n The output is generated by applying a (potentially biased) random linear\n regression model with `n_informative` nonzero regressors to the previously\n generated input and some gaussian centered noise with some adjustable\n scale.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_samples : int, default=100\n The number of samples.\n\n n_features : int, default=100\n The number of features.\n\n n_informative : int, default=10\n The number of informative features, i.e., the number of features used\n to build the linear model used to generate the output.\n\n n_targets : int, default=1\n The number of regression targets, i.e., the dimension of the y output\n vector associated with a sample. By default, the output is a scalar.\n\n bias : float, default=0.0\n The bias term in the underlying linear model.\n\n effective_rank : int, default=None\n if not None:\n The approximate number of singular vectors required to explain most\n of the input data by linear combinations. Using this kind of\n singular spectrum in the input allows the generator to reproduce\n the correlations often observed in practice.\n if None:\n The input set is well conditioned, centered and gaussian with\n unit variance.\n\n tail_strength : float, default=0.5\n The relative importance of the fat noisy tail of the singular values\n profile if `effective_rank` is not None. When a float, it should be\n between 0 and 1.\n\n noise : float, default=0.0\n The standard deviation of the gaussian noise applied to the output.\n\n shuffle : bool, default=True\n Shuffle the samples and the features.\n\n coef : bool, default=False\n If True, the coefficients of the underlying linear model are returned.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n X : ndarray of shape (n_samples, n_features)\n The input samples.\n\n y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n The output values.\n\n coef : ndarray of shape (n_features,) or (n_features, n_targets)\n The coefficient of the underlying linear model. It is returned only if\n coef is True.\n \"\"\"\n n_informative = min(n_features, n_informative)\n generator = check_random_state(random_state)\n\n if effective_rank is None:\n # Randomly generate a well conditioned input set\n X = generator.randn(n_samples, n_features)\n\n else:\n # Randomly generate a low rank, fat tail input set\n X = make_low_rank_matrix(n_samples=n_samples,\n n_features=n_features,\n effective_rank=effective_rank,\n tail_strength=tail_strength,\n random_state=generator)\n\n # Generate a ground truth model with only n_informative features being non\n # zeros (the other features are not correlated to y and should be ignored\n # by a sparsifying regularizers such as L1 or elastic net)\n ground_truth = np.zeros((n_features, n_targets))\n ground_truth[:n_informative, :] = 100 * generator.rand(n_informative,\n n_targets)\n\n y = np.dot(X, ground_truth) + bias\n\n # Add noise\n if noise > 0.0:\n y += generator.normal(scale=noise, size=y.shape)\n\n # Randomly permute samples and features\n if shuffle:\n X, y = util_shuffle(X, y, random_state=generator)\n\n indices = np.arange(n_features)\n generator.shuffle(indices)\n X[:, :] = X[:, indices]\n ground_truth = ground_truth[indices]\n\n y = np.squeeze(y)\n\n if coef:\n return X, y, np.squeeze(ground_truth)\n\n else:\n return X, y" + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_s_curve", + "name": "make_s_curve", + "qname": "sklearn.datasets._samples_generator.make_s_curve", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_s_curve/n_samples", + "name": "n_samples", + "qname": "sklearn.datasets._samples_generator.make_s_curve.n_samples", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The number of sample points on the S curve." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_s_curve/noise", + "name": "noise", + "qname": "sklearn.datasets._samples_generator.make_s_curve.noise", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "The standard deviation of the gaussian noise." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_s_curve/random_state", + "name": "random_state", + "qname": "sklearn.datasets._samples_generator.make_s_curve.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for dataset creation. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate an S curve dataset.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Generate an S curve dataset.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of sample points on the S curve.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 3)\n The points.\n\nt : ndarray of shape (n_samples,)\n The univariate position of the sample according to the main dimension\n of the points in the manifold.", + "code": "@_deprecate_positional_args\ndef make_s_curve(n_samples=100, *, noise=0.0, random_state=None):\n \"\"\"Generate an S curve dataset.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_samples : int, default=100\n The number of sample points on the S curve.\n\n noise : float, default=0.0\n The standard deviation of the gaussian noise.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n X : ndarray of shape (n_samples, 3)\n The points.\n\n t : ndarray of shape (n_samples,)\n The univariate position of the sample according to the main dimension\n of the points in the manifold.\n \"\"\"\n generator = check_random_state(random_state)\n\n t = 3 * np.pi * (generator.rand(1, n_samples) - 0.5)\n x = np.sin(t)\n y = 2.0 * generator.rand(1, n_samples)\n z = np.sign(t) * (np.cos(t) - 1)\n\n X = np.concatenate((x, y, z))\n X += noise * generator.randn(3, n_samples)\n X = X.T\n t = np.squeeze(t)\n\n return X, t" + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_sparse_coded_signal", + "name": "make_sparse_coded_signal", + "qname": "sklearn.datasets._samples_generator.make_sparse_coded_signal", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_sparse_coded_signal/n_samples", + "name": "n_samples", + "qname": "sklearn.datasets._samples_generator.make_sparse_coded_signal.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of samples to generate" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_sparse_coded_signal/n_components", + "name": "n_components", + "qname": "sklearn.datasets._samples_generator.make_sparse_coded_signal.n_components", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of components in the dictionary" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_sparse_coded_signal/n_features", + "name": "n_features", + "qname": "sklearn.datasets._samples_generator.make_sparse_coded_signal.n_features", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of features of the dataset to generate" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_sparse_coded_signal/n_nonzero_coefs", + "name": "n_nonzero_coefs", + "qname": "sklearn.datasets._samples_generator.make_sparse_coded_signal.n_nonzero_coefs", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of active (non-zero) coefficients in each sample" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_sparse_coded_signal/random_state", + "name": "random_state", + "qname": "sklearn.datasets._samples_generator.make_sparse_coded_signal.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for dataset creation. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate a signal as a sparse combination of dictionary elements.\n\nReturns a matrix Y = DX, such as D is (n_features, n_components),\nX is (n_components, n_samples) and each column of X has exactly\nn_nonzero_coefs non-zero elements.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Generate a signal as a sparse combination of dictionary elements.\n\nReturns a matrix Y = DX, such as D is (n_features, n_components),\nX is (n_components, n_samples) and each column of X has exactly\nn_nonzero_coefs non-zero elements.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int\n Number of samples to generate\n\nn_components : int\n Number of components in the dictionary\n\nn_features : int\n Number of features of the dataset to generate\n\nn_nonzero_coefs : int\n Number of active (non-zero) coefficients in each sample\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\ndata : ndarray of shape (n_features, n_samples)\n The encoded signal (Y).\n\ndictionary : ndarray of shape (n_features, n_components)\n The dictionary with normalized components (D).\n\ncode : ndarray of shape (n_components, n_samples)\n The sparse code such that each column of this matrix has exactly\n n_nonzero_coefs non-zero items (X).", + "code": "@_deprecate_positional_args\ndef make_sparse_coded_signal(n_samples, *, n_components, n_features,\n n_nonzero_coefs, random_state=None):\n \"\"\"Generate a signal as a sparse combination of dictionary elements.\n\n Returns a matrix Y = DX, such as D is (n_features, n_components),\n X is (n_components, n_samples) and each column of X has exactly\n n_nonzero_coefs non-zero elements.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_samples : int\n Number of samples to generate\n\n n_components : int\n Number of components in the dictionary\n\n n_features : int\n Number of features of the dataset to generate\n\n n_nonzero_coefs : int\n Number of active (non-zero) coefficients in each sample\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n data : ndarray of shape (n_features, n_samples)\n The encoded signal (Y).\n\n dictionary : ndarray of shape (n_features, n_components)\n The dictionary with normalized components (D).\n\n code : ndarray of shape (n_components, n_samples)\n The sparse code such that each column of this matrix has exactly\n n_nonzero_coefs non-zero items (X).\n\n \"\"\"\n generator = check_random_state(random_state)\n\n # generate dictionary\n D = generator.randn(n_features, n_components)\n D /= np.sqrt(np.sum((D ** 2), axis=0))\n\n # generate code\n X = np.zeros((n_components, n_samples))\n for i in range(n_samples):\n idx = np.arange(n_components)\n generator.shuffle(idx)\n idx = idx[:n_nonzero_coefs]\n X[idx, i] = generator.randn(n_nonzero_coefs)\n\n # encode signal\n Y = np.dot(D, X)\n\n return map(np.squeeze, (Y, D, X))" + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_sparse_spd_matrix", + "name": "make_sparse_spd_matrix", + "qname": "sklearn.datasets._samples_generator.make_sparse_spd_matrix", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_sparse_spd_matrix/dim", + "name": "dim", + "qname": "sklearn.datasets._samples_generator.make_sparse_spd_matrix.dim", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "The size of the random matrix to generate." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_sparse_spd_matrix/alpha", + "name": "alpha", + "qname": "sklearn.datasets._samples_generator.make_sparse_spd_matrix.alpha", + "default_value": "0.95", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.95", + "description": "The probability that a coefficient is zero (see notes). Larger values\nenforce more sparsity. The value should be in the range 0 and 1." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_sparse_spd_matrix/norm_diag", + "name": "norm_diag", + "qname": "sklearn.datasets._samples_generator.make_sparse_spd_matrix.norm_diag", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to normalize the output matrix to make the leading diagonal\nelements all 1" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_sparse_spd_matrix/smallest_coef", + "name": "smallest_coef", + "qname": "sklearn.datasets._samples_generator.make_sparse_spd_matrix.smallest_coef", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "The value of the smallest coefficient between 0 and 1." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_sparse_spd_matrix/largest_coef", + "name": "largest_coef", + "qname": "sklearn.datasets._samples_generator.make_sparse_spd_matrix.largest_coef", + "default_value": "0.9", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.9", + "description": "The value of the largest coefficient between 0 and 1." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_sparse_spd_matrix/random_state", + "name": "random_state", + "qname": "sklearn.datasets._samples_generator.make_sparse_spd_matrix.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for dataset creation. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate a sparse symmetric definite positive matrix.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Generate a sparse symmetric definite positive matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndim : int, default=1\n The size of the random matrix to generate.\n\nalpha : float, default=0.95\n The probability that a coefficient is zero (see notes). Larger values\n enforce more sparsity. The value should be in the range 0 and 1.\n\nnorm_diag : bool, default=False\n Whether to normalize the output matrix to make the leading diagonal\n elements all 1\n\nsmallest_coef : float, default=0.1\n The value of the smallest coefficient between 0 and 1.\n\nlargest_coef : float, default=0.9\n The value of the largest coefficient between 0 and 1.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nprec : sparse matrix of shape (dim, dim)\n The generated matrix.\n\nNotes\n-----\nThe sparsity is actually imposed on the cholesky factor of the matrix.\nThus alpha does not translate directly into the filling fraction of\nthe matrix itself.\n\nSee Also\n--------\nmake_spd_matrix", + "code": "@_deprecate_positional_args\ndef make_sparse_spd_matrix(dim=1, *, alpha=0.95, norm_diag=False,\n smallest_coef=.1, largest_coef=.9,\n random_state=None):\n \"\"\"Generate a sparse symmetric definite positive matrix.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n dim : int, default=1\n The size of the random matrix to generate.\n\n alpha : float, default=0.95\n The probability that a coefficient is zero (see notes). Larger values\n enforce more sparsity. The value should be in the range 0 and 1.\n\n norm_diag : bool, default=False\n Whether to normalize the output matrix to make the leading diagonal\n elements all 1\n\n smallest_coef : float, default=0.1\n The value of the smallest coefficient between 0 and 1.\n\n largest_coef : float, default=0.9\n The value of the largest coefficient between 0 and 1.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n prec : sparse matrix of shape (dim, dim)\n The generated matrix.\n\n Notes\n -----\n The sparsity is actually imposed on the cholesky factor of the matrix.\n Thus alpha does not translate directly into the filling fraction of\n the matrix itself.\n\n See Also\n --------\n make_spd_matrix\n \"\"\"\n random_state = check_random_state(random_state)\n\n chol = -np.eye(dim)\n aux = random_state.rand(dim, dim)\n aux[aux < alpha] = 0\n aux[aux > alpha] = (smallest_coef\n + (largest_coef - smallest_coef)\n * random_state.rand(np.sum(aux > alpha)))\n aux = np.tril(aux, k=-1)\n\n # Permute the lines: we don't want to have asymmetries in the final\n # SPD matrix\n permutation = random_state.permutation(dim)\n aux = aux[permutation].T[permutation]\n chol += aux\n prec = np.dot(chol.T, chol)\n\n if norm_diag:\n # Form the diagonal vector into a row matrix\n d = np.diag(prec).reshape(1, prec.shape[0])\n d = 1. / np.sqrt(d)\n\n prec *= d\n prec *= d.T\n\n return prec" + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_sparse_uncorrelated", + "name": "make_sparse_uncorrelated", + "qname": "sklearn.datasets._samples_generator.make_sparse_uncorrelated", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_sparse_uncorrelated/n_samples", + "name": "n_samples", + "qname": "sklearn.datasets._samples_generator.make_sparse_uncorrelated.n_samples", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The number of samples." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_sparse_uncorrelated/n_features", + "name": "n_features", + "qname": "sklearn.datasets._samples_generator.make_sparse_uncorrelated.n_features", + "default_value": "10", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "The number of features." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_sparse_uncorrelated/random_state", + "name": "random_state", + "qname": "sklearn.datasets._samples_generator.make_sparse_uncorrelated.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for dataset creation. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate a random regression problem with sparse uncorrelated design.\n\nThis dataset is described in Celeux et al [1]. as::\n\n X ~ N(0, 1)\n y(X) = X[:, 0] + 2 * X[:, 1] - 2 * X[:, 2] - 1.5 * X[:, 3]\n\nOnly the first 4 features are informative. The remaining features are\nuseless.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Generate a random regression problem with sparse uncorrelated design.\n\nThis dataset is described in Celeux et al [1]. as::\n\n X ~ N(0, 1)\n y(X) = X[:, 0] + 2 * X[:, 1] - 2 * X[:, 2] - 1.5 * X[:, 3]\n\nOnly the first 4 features are informative. The remaining features are\nuseless.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nn_features : int, default=10\n The number of features.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The input samples.\n\ny : ndarray of shape (n_samples,)\n The output values.\n\nReferences\n----------\n.. [1] G. Celeux, M. El Anbari, J.-M. Marin, C. P. Robert,\n \"Regularization in regression: comparing Bayesian and frequentist\n methods in a poorly informative situation\", 2009.", + "code": "@_deprecate_positional_args\ndef make_sparse_uncorrelated(n_samples=100, n_features=10, *,\n random_state=None):\n \"\"\"Generate a random regression problem with sparse uncorrelated design.\n\n This dataset is described in Celeux et al [1]. as::\n\n X ~ N(0, 1)\n y(X) = X[:, 0] + 2 * X[:, 1] - 2 * X[:, 2] - 1.5 * X[:, 3]\n\n Only the first 4 features are informative. The remaining features are\n useless.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_samples : int, default=100\n The number of samples.\n\n n_features : int, default=10\n The number of features.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n X : ndarray of shape (n_samples, n_features)\n The input samples.\n\n y : ndarray of shape (n_samples,)\n The output values.\n\n References\n ----------\n .. [1] G. Celeux, M. El Anbari, J.-M. Marin, C. P. Robert,\n \"Regularization in regression: comparing Bayesian and frequentist\n methods in a poorly informative situation\", 2009.\n \"\"\"\n generator = check_random_state(random_state)\n\n X = generator.normal(loc=0, scale=1, size=(n_samples, n_features))\n y = generator.normal(loc=(X[:, 0] +\n 2 * X[:, 1] -\n 2 * X[:, 2] -\n 1.5 * X[:, 3]), scale=np.ones(n_samples))\n\n return X, y" + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_spd_matrix", + "name": "make_spd_matrix", + "qname": "sklearn.datasets._samples_generator.make_spd_matrix", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_spd_matrix/n_dim", + "name": "n_dim", + "qname": "sklearn.datasets._samples_generator.make_spd_matrix.n_dim", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The matrix dimension." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_spd_matrix/random_state", + "name": "random_state", + "qname": "sklearn.datasets._samples_generator.make_spd_matrix.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for dataset creation. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate a random symmetric, positive-definite matrix.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Generate a random symmetric, positive-definite matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_dim : int\n The matrix dimension.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_dim, n_dim)\n The random symmetric, positive-definite matrix.\n\nSee Also\n--------\nmake_sparse_spd_matrix", + "code": "@_deprecate_positional_args\ndef make_spd_matrix(n_dim, *, random_state=None):\n \"\"\"Generate a random symmetric, positive-definite matrix.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_dim : int\n The matrix dimension.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n X : ndarray of shape (n_dim, n_dim)\n The random symmetric, positive-definite matrix.\n\n See Also\n --------\n make_sparse_spd_matrix\n \"\"\"\n generator = check_random_state(random_state)\n\n A = generator.rand(n_dim, n_dim)\n U, _, Vt = linalg.svd(np.dot(A.T, A), check_finite=False)\n X = np.dot(np.dot(U, 1.0 + np.diag(generator.rand(n_dim))), Vt)\n\n return X" + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_swiss_roll", + "name": "make_swiss_roll", + "qname": "sklearn.datasets._samples_generator.make_swiss_roll", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_swiss_roll/n_samples", + "name": "n_samples", + "qname": "sklearn.datasets._samples_generator.make_swiss_roll.n_samples", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The number of sample points on the S curve." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_swiss_roll/noise", + "name": "noise", + "qname": "sklearn.datasets._samples_generator.make_swiss_roll.noise", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "The standard deviation of the gaussian noise." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.datasets._samples_generator/make_swiss_roll/random_state", + "name": "random_state", + "qname": "sklearn.datasets._samples_generator.make_swiss_roll.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for dataset creation. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate a swiss roll dataset.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Generate a swiss roll dataset.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of sample points on the S curve.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 3)\n The points.\n\nt : ndarray of shape (n_samples,)\n The univariate position of the sample according to the main dimension\n of the points in the manifold.\n\nNotes\n-----\nThe algorithm is from Marsland [1].\n\nReferences\n----------\n.. [1] S. Marsland, \"Machine Learning: An Algorithmic Perspective\",\n Chapter 10, 2009.\n http://seat.massey.ac.nz/personal/s.r.marsland/Code/10/lle.py", + "code": "@_deprecate_positional_args\ndef make_swiss_roll(n_samples=100, *, noise=0.0, random_state=None):\n \"\"\"Generate a swiss roll dataset.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_samples : int, default=100\n The number of sample points on the S curve.\n\n noise : float, default=0.0\n The standard deviation of the gaussian noise.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n X : ndarray of shape (n_samples, 3)\n The points.\n\n t : ndarray of shape (n_samples,)\n The univariate position of the sample according to the main dimension\n of the points in the manifold.\n\n Notes\n -----\n The algorithm is from Marsland [1].\n\n References\n ----------\n .. [1] S. Marsland, \"Machine Learning: An Algorithmic Perspective\",\n Chapter 10, 2009.\n http://seat.massey.ac.nz/personal/s.r.marsland/Code/10/lle.py\n \"\"\"\n generator = check_random_state(random_state)\n\n t = 1.5 * np.pi * (1 + 2 * generator.rand(1, n_samples))\n x = t * np.cos(t)\n y = 21 * generator.rand(1, n_samples)\n z = t * np.sin(t)\n\n X = np.concatenate((x, y, z))\n X += noise * generator.randn(3, n_samples)\n X = X.T\n t = np.squeeze(t)\n\n return X, t" + }, + { + "id": "scikit-learn/sklearn.datasets._species_distributions/_load_coverage", + "name": "_load_coverage", + "qname": "sklearn.datasets._species_distributions._load_coverage", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._species_distributions/_load_coverage/F", + "name": "F", + "qname": "sklearn.datasets._species_distributions._load_coverage.F", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._species_distributions/_load_coverage/header_length", + "name": "header_length", + "qname": "sklearn.datasets._species_distributions._load_coverage.header_length", + "default_value": "6", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._species_distributions/_load_coverage/dtype", + "name": "dtype", + "qname": "sklearn.datasets._species_distributions._load_coverage.dtype", + "default_value": "np.int16", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load a coverage file from an open file object.\n\nThis will return a numpy array of the given dtype", + "docstring": "Load a coverage file from an open file object.\n\nThis will return a numpy array of the given dtype", + "code": "def _load_coverage(F, header_length=6, dtype=np.int16):\n \"\"\"Load a coverage file from an open file object.\n\n This will return a numpy array of the given dtype\n \"\"\"\n header = [F.readline() for _ in range(header_length)]\n make_tuple = lambda t: (t.split()[0], float(t.split()[1]))\n header = dict([make_tuple(line) for line in header])\n\n M = np.loadtxt(F, dtype=dtype)\n nodata = int(header[b'NODATA_value'])\n if nodata != -9999:\n M[nodata] = -9999\n return M" + }, + { + "id": "scikit-learn/sklearn.datasets._species_distributions/_load_csv", + "name": "_load_csv", + "qname": "sklearn.datasets._species_distributions._load_csv", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._species_distributions/_load_csv/F", + "name": "F", + "qname": "sklearn.datasets._species_distributions._load_csv.F", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "file object", + "default_value": "", + "description": "CSV file open in byte mode." + }, + "type": { + "kind": "NamedType", + "name": "file object" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load csv file.", + "docstring": "Load csv file.\n\nParameters\n----------\nF : file object\n CSV file open in byte mode.\n\nReturns\n-------\nrec : np.ndarray\n record array representing the data", + "code": "def _load_csv(F):\n \"\"\"Load csv file.\n\n Parameters\n ----------\n F : file object\n CSV file open in byte mode.\n\n Returns\n -------\n rec : np.ndarray\n record array representing the data\n \"\"\"\n names = F.readline().decode('ascii').strip().split(',')\n\n rec = np.loadtxt(F, skiprows=0, delimiter=',', dtype='a22,f4,f4')\n rec.dtype.names = names\n return rec" + }, + { + "id": "scikit-learn/sklearn.datasets._species_distributions/construct_grids", + "name": "construct_grids", + "qname": "sklearn.datasets._species_distributions.construct_grids", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._species_distributions/construct_grids/batch", + "name": "batch", + "qname": "sklearn.datasets._species_distributions.construct_grids.batch", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Batch object", + "default_value": "", + "description": "The object returned by :func:`fetch_species_distributions`" + }, + "type": { + "kind": "NamedType", + "name": "Batch object" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Construct the map grid from the batch object", + "docstring": "Construct the map grid from the batch object\n\nParameters\n----------\nbatch : Batch object\n The object returned by :func:`fetch_species_distributions`\n\nReturns\n-------\n(xgrid, ygrid) : 1-D arrays\n The grid corresponding to the values in batch.coverages", + "code": "def construct_grids(batch):\n \"\"\"Construct the map grid from the batch object\n\n Parameters\n ----------\n batch : Batch object\n The object returned by :func:`fetch_species_distributions`\n\n Returns\n -------\n (xgrid, ygrid) : 1-D arrays\n The grid corresponding to the values in batch.coverages\n \"\"\"\n # x,y coordinates for corner cells\n xmin = batch.x_left_lower_corner + batch.grid_size\n xmax = xmin + (batch.Nx * batch.grid_size)\n ymin = batch.y_left_lower_corner + batch.grid_size\n ymax = ymin + (batch.Ny * batch.grid_size)\n\n # x coordinates of the grid cells\n xgrid = np.arange(xmin, xmax, batch.grid_size)\n # y coordinates of the grid cells\n ygrid = np.arange(ymin, ymax, batch.grid_size)\n\n return (xgrid, ygrid)" + }, + { + "id": "scikit-learn/sklearn.datasets._species_distributions/fetch_species_distributions", + "name": "fetch_species_distributions", + "qname": "sklearn.datasets._species_distributions.fetch_species_distributions", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._species_distributions/fetch_species_distributions/data_home", + "name": "data_home", + "qname": "sklearn.datasets._species_distributions.fetch_species_distributions.data_home", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Specify another download and cache folder for the datasets. By default\nall scikit-learn data is stored in '~/scikit_learn_data' subfolders." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.datasets._species_distributions/fetch_species_distributions/download_if_missing", + "name": "download_if_missing", + "qname": "sklearn.datasets._species_distributions.fetch_species_distributions.download_if_missing", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If False, raise a IOError if the data is not locally available\ninstead of trying to download the data from the source site." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Loader for species distribution dataset from Phillips et. al. (2006)\n\nRead more in the :ref:`User Guide `.", + "docstring": "Loader for species distribution dataset from Phillips et. al. (2006)\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n coverages : array, shape = [14, 1592, 1212]\n These represent the 14 features measured\n at each point of the map grid.\n The latitude/longitude values for the grid are discussed below.\n Missing data is represented by the value -9999.\n train : record array, shape = (1624,)\n The training points for the data. Each point has three fields:\n\n - train['species'] is the species name\n - train['dd long'] is the longitude, in degrees\n - train['dd lat'] is the latitude, in degrees\n test : record array, shape = (620,)\n The test points for the data. Same format as the training data.\n Nx, Ny : integers\n The number of longitudes (x) and latitudes (y) in the grid\n x_left_lower_corner, y_left_lower_corner : floats\n The (x,y) position of the lower-left corner, in degrees\n grid_size : float\n The spacing between points of the grid, in degrees\n\nReferences\n----------\n\n* `\"Maximum entropy modeling of species geographic distributions\"\n `_\n S. J. Phillips, R. P. Anderson, R. E. Schapire - Ecological Modelling,\n 190:231-259, 2006.\n\nNotes\n-----\n\nThis dataset represents the geographic distribution of species.\nThe dataset is provided by Phillips et. al. (2006).\n\nThe two species are:\n\n- `\"Bradypus variegatus\"\n `_ ,\n the Brown-throated Sloth.\n\n- `\"Microryzomys minutus\"\n `_ ,\n also known as the Forest Small Rice Rat, a rodent that lives in Peru,\n Colombia, Ecuador, Peru, and Venezuela.\n\n- For an example of using this dataset with scikit-learn, see\n :ref:`examples/applications/plot_species_distribution_modeling.py\n `.", + "code": "@_deprecate_positional_args\ndef fetch_species_distributions(*, data_home=None,\n download_if_missing=True):\n \"\"\"Loader for species distribution dataset from Phillips et. al. (2006)\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n data_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\n download_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\n Returns\n -------\n data : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n coverages : array, shape = [14, 1592, 1212]\n These represent the 14 features measured\n at each point of the map grid.\n The latitude/longitude values for the grid are discussed below.\n Missing data is represented by the value -9999.\n train : record array, shape = (1624,)\n The training points for the data. Each point has three fields:\n\n - train['species'] is the species name\n - train['dd long'] is the longitude, in degrees\n - train['dd lat'] is the latitude, in degrees\n test : record array, shape = (620,)\n The test points for the data. Same format as the training data.\n Nx, Ny : integers\n The number of longitudes (x) and latitudes (y) in the grid\n x_left_lower_corner, y_left_lower_corner : floats\n The (x,y) position of the lower-left corner, in degrees\n grid_size : float\n The spacing between points of the grid, in degrees\n\n References\n ----------\n\n * `\"Maximum entropy modeling of species geographic distributions\"\n `_\n S. J. Phillips, R. P. Anderson, R. E. Schapire - Ecological Modelling,\n 190:231-259, 2006.\n\n Notes\n -----\n\n This dataset represents the geographic distribution of species.\n The dataset is provided by Phillips et. al. (2006).\n\n The two species are:\n\n - `\"Bradypus variegatus\"\n `_ ,\n the Brown-throated Sloth.\n\n - `\"Microryzomys minutus\"\n `_ ,\n also known as the Forest Small Rice Rat, a rodent that lives in Peru,\n Colombia, Ecuador, Peru, and Venezuela.\n\n - For an example of using this dataset with scikit-learn, see\n :ref:`examples/applications/plot_species_distribution_modeling.py\n `.\n \"\"\"\n data_home = get_data_home(data_home)\n if not exists(data_home):\n makedirs(data_home)\n\n # Define parameters for the data files. These should not be changed\n # unless the data model changes. They will be saved in the npz file\n # with the downloaded data.\n extra_params = dict(x_left_lower_corner=-94.8,\n Nx=1212,\n y_left_lower_corner=-56.05,\n Ny=1592,\n grid_size=0.05)\n dtype = np.int16\n\n archive_path = _pkl_filepath(data_home, DATA_ARCHIVE_NAME)\n\n if not exists(archive_path):\n if not download_if_missing:\n raise IOError(\"Data not found and `download_if_missing` is False\")\n logger.info('Downloading species data from %s to %s' % (\n SAMPLES.url, data_home))\n samples_path = _fetch_remote(SAMPLES, dirname=data_home)\n with np.load(samples_path) as X: # samples.zip is a valid npz\n for f in X.files:\n fhandle = BytesIO(X[f])\n if 'train' in f:\n train = _load_csv(fhandle)\n if 'test' in f:\n test = _load_csv(fhandle)\n remove(samples_path)\n\n logger.info('Downloading coverage data from %s to %s' % (\n COVERAGES.url, data_home))\n coverages_path = _fetch_remote(COVERAGES, dirname=data_home)\n with np.load(coverages_path) as X: # coverages.zip is a valid npz\n coverages = []\n for f in X.files:\n fhandle = BytesIO(X[f])\n logger.debug(' - converting {}'.format(f))\n coverages.append(_load_coverage(fhandle))\n coverages = np.asarray(coverages, dtype=dtype)\n remove(coverages_path)\n\n bunch = Bunch(coverages=coverages,\n test=test,\n train=train,\n **extra_params)\n joblib.dump(bunch, archive_path, compress=9)\n else:\n bunch = joblib.load(archive_path)\n\n return bunch" + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/_dump_svmlight", + "name": "_dump_svmlight", + "qname": "sklearn.datasets._svmlight_format_io._dump_svmlight", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/_dump_svmlight/X", + "name": "X", + "qname": "sklearn.datasets._svmlight_format_io._dump_svmlight.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/_dump_svmlight/y", + "name": "y", + "qname": "sklearn.datasets._svmlight_format_io._dump_svmlight.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/_dump_svmlight/f", + "name": "f", + "qname": "sklearn.datasets._svmlight_format_io._dump_svmlight.f", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/_dump_svmlight/multilabel", + "name": "multilabel", + "qname": "sklearn.datasets._svmlight_format_io._dump_svmlight.multilabel", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/_dump_svmlight/one_based", + "name": "one_based", + "qname": "sklearn.datasets._svmlight_format_io._dump_svmlight.one_based", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/_dump_svmlight/comment", + "name": "comment", + "qname": "sklearn.datasets._svmlight_format_io._dump_svmlight.comment", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/_dump_svmlight/query_id", + "name": "query_id", + "qname": "sklearn.datasets._svmlight_format_io._dump_svmlight.query_id", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _dump_svmlight(X, y, f, multilabel, one_based, comment, query_id):\n X_is_sp = int(hasattr(X, \"tocsr\"))\n y_is_sp = int(hasattr(y, \"tocsr\"))\n if X.dtype.kind == 'i':\n value_pattern = \"%d:%d\"\n else:\n value_pattern = \"%d:%.16g\"\n\n if y.dtype.kind == 'i':\n label_pattern = \"%d\"\n else:\n label_pattern = \"%.16g\"\n\n line_pattern = \"%s\"\n if query_id is not None:\n line_pattern += \" qid:%d\"\n line_pattern += \" %s\\n\"\n\n if comment:\n f.write((\"# Generated by dump_svmlight_file from scikit-learn %s\\n\"\n % __version__).encode())\n f.write((\"# Column indices are %s-based\\n\"\n % [\"zero\", \"one\"][one_based]).encode())\n\n f.write(b\"#\\n\")\n f.writelines(b\"# %s\\n\" % line for line in comment.splitlines())\n\n for i in range(X.shape[0]):\n if X_is_sp:\n span = slice(X.indptr[i], X.indptr[i + 1])\n row = zip(X.indices[span], X.data[span])\n else:\n nz = X[i] != 0\n row = zip(np.where(nz)[0], X[i, nz])\n\n s = \" \".join(value_pattern % (j + one_based, x) for j, x in row)\n\n if multilabel:\n if y_is_sp:\n nz_labels = y[i].nonzero()[1]\n else:\n nz_labels = np.where(y[i] != 0)[0]\n labels_str = \",\".join(label_pattern % j for j in nz_labels)\n else:\n if y_is_sp:\n labels_str = label_pattern % y.data[i]\n else:\n labels_str = label_pattern % y[i]\n\n if query_id is not None:\n feat = (labels_str, query_id[i], s)\n else:\n feat = (labels_str, s)\n\n f.write((line_pattern % feat).encode('ascii'))" + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/_gen_open", + "name": "_gen_open", + "qname": "sklearn.datasets._svmlight_format_io._gen_open", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/_gen_open/f", + "name": "f", + "qname": "sklearn.datasets._svmlight_format_io._gen_open.f", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _gen_open(f):\n if isinstance(f, int): # file descriptor\n return io.open(f, \"rb\", closefd=False)\n elif not isinstance(f, str):\n raise TypeError(\"expected {str, int, file-like}, got %s\" % type(f))\n\n _, ext = os.path.splitext(f)\n if ext == \".gz\":\n import gzip\n return gzip.open(f, \"rb\")\n elif ext == \".bz2\":\n from bz2 import BZ2File\n return BZ2File(f, \"rb\")\n else:\n return open(f, \"rb\")" + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/_load_svmlight_file", + "name": "_load_svmlight_file", + "qname": "sklearn.datasets._svmlight_format_io._load_svmlight_file", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/_load_svmlight_file/args", + "name": "args", + "qname": "sklearn.datasets._svmlight_format_io._load_svmlight_file.args", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/_load_svmlight_file/kwargs", + "name": "kwargs", + "qname": "sklearn.datasets._svmlight_format_io._load_svmlight_file.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _load_svmlight_file(*args, **kwargs):\n raise NotImplementedError(\n 'load_svmlight_file is currently not '\n 'compatible with PyPy (see '\n 'https://github.com/scikit-learn/scikit-learn/issues/11543 '\n 'for the status updates).')" + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/_open_and_load", + "name": "_open_and_load", + "qname": "sklearn.datasets._svmlight_format_io._open_and_load", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/_open_and_load/f", + "name": "f", + "qname": "sklearn.datasets._svmlight_format_io._open_and_load.f", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/_open_and_load/dtype", + "name": "dtype", + "qname": "sklearn.datasets._svmlight_format_io._open_and_load.dtype", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/_open_and_load/multilabel", + "name": "multilabel", + "qname": "sklearn.datasets._svmlight_format_io._open_and_load.multilabel", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/_open_and_load/zero_based", + "name": "zero_based", + "qname": "sklearn.datasets._svmlight_format_io._open_and_load.zero_based", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/_open_and_load/query_id", + "name": "query_id", + "qname": "sklearn.datasets._svmlight_format_io._open_and_load.query_id", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/_open_and_load/offset", + "name": "offset", + "qname": "sklearn.datasets._svmlight_format_io._open_and_load.offset", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/_open_and_load/length", + "name": "length", + "qname": "sklearn.datasets._svmlight_format_io._open_and_load.length", + "default_value": "-1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _open_and_load(f, dtype, multilabel, zero_based, query_id,\n offset=0, length=-1):\n if hasattr(f, \"read\"):\n actual_dtype, data, ind, indptr, labels, query = \\\n _load_svmlight_file(f, dtype, multilabel, zero_based, query_id,\n offset, length)\n else:\n with closing(_gen_open(f)) as f:\n actual_dtype, data, ind, indptr, labels, query = \\\n _load_svmlight_file(f, dtype, multilabel, zero_based, query_id,\n offset, length)\n\n # convert from array.array, give data the right dtype\n if not multilabel:\n labels = np.frombuffer(labels, np.float64)\n data = np.frombuffer(data, actual_dtype)\n indices = np.frombuffer(ind, np.longlong)\n indptr = np.frombuffer(indptr, dtype=np.longlong) # never empty\n query = np.frombuffer(query, np.int64)\n\n data = np.asarray(data, dtype=dtype) # no-op for float{32,64}\n return data, indices, indptr, labels, query" + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/dump_svmlight_file", + "name": "dump_svmlight_file", + "qname": "sklearn.datasets._svmlight_format_io.dump_svmlight_file", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/dump_svmlight_file/X", + "name": "X", + "qname": "sklearn.datasets._svmlight_format_io.dump_svmlight_file.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vectors, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/dump_svmlight_file/y", + "name": "y", + "qname": "sklearn.datasets._svmlight_format_io.dump_svmlight_file.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}, shape = [n_samples (, n_labels)]", + "default_value": "", + "description": "Target values. Class labels must be an\ninteger or float, or array-like objects of integer or float for\nmultilabel classifications." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape = [n_samples (, n_labels)]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/dump_svmlight_file/f", + "name": "f", + "qname": "sklearn.datasets._svmlight_format_io.dump_svmlight_file.f", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "string or file-like in binary mode", + "default_value": "", + "description": "If string, specifies the path that will contain the data.\nIf file-like, data will be written to f. f should be opened in binary\nmode." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "string" + }, + { + "kind": "NamedType", + "name": "file-like in binary mode" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/dump_svmlight_file/zero_based", + "name": "zero_based", + "qname": "sklearn.datasets._svmlight_format_io.dump_svmlight_file.zero_based", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "boolean", + "default_value": "True", + "description": "Whether column indices should be written zero-based (True) or one-based\n(False)." + }, + "type": { + "kind": "NamedType", + "name": "boolean" + } + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/dump_svmlight_file/comment", + "name": "comment", + "qname": "sklearn.datasets._svmlight_format_io.dump_svmlight_file.comment", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "string", + "default_value": "None", + "description": "Comment to insert at the top of the file. This should be either a\nUnicode string, which will be encoded as UTF-8, or an ASCII byte\nstring.\nIf a comment is given, then it will be preceded by one that identifies\nthe file as having been dumped by scikit-learn. Note that not all\ntools grok comments in SVMlight files." + }, + "type": { + "kind": "NamedType", + "name": "string" + } + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/dump_svmlight_file/query_id", + "name": "query_id", + "qname": "sklearn.datasets._svmlight_format_io.dump_svmlight_file.query_id", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Array containing pairwise preference constraints (qid in svmlight\nformat)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/dump_svmlight_file/multilabel", + "name": "multilabel", + "qname": "sklearn.datasets._svmlight_format_io.dump_svmlight_file.multilabel", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "boolean", + "default_value": "False", + "description": "Samples may have several labels each (see\nhttps://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)\n\n.. versionadded:: 0.17\n parameter *multilabel* to support multilabel datasets." + }, + "type": { + "kind": "NamedType", + "name": "boolean" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Dump the dataset in svmlight / libsvm file format.\n\nThis format is a text-based format, with one sample per line. It does\nnot store zero valued features hence is suitable for sparse dataset.\n\nThe first element of each line can be used to store a target variable\nto predict.", + "docstring": "Dump the dataset in svmlight / libsvm file format.\n\nThis format is a text-based format, with one sample per line. It does\nnot store zero valued features hence is suitable for sparse dataset.\n\nThe first element of each line can be used to store a target variable\nto predict.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : {array-like, sparse matrix}, shape = [n_samples (, n_labels)]\n Target values. Class labels must be an\n integer or float, or array-like objects of integer or float for\n multilabel classifications.\n\nf : string or file-like in binary mode\n If string, specifies the path that will contain the data.\n If file-like, data will be written to f. f should be opened in binary\n mode.\n\nzero_based : boolean, default=True\n Whether column indices should be written zero-based (True) or one-based\n (False).\n\ncomment : string, default=None\n Comment to insert at the top of the file. This should be either a\n Unicode string, which will be encoded as UTF-8, or an ASCII byte\n string.\n If a comment is given, then it will be preceded by one that identifies\n the file as having been dumped by scikit-learn. Note that not all\n tools grok comments in SVMlight files.\n\nquery_id : array-like of shape (n_samples,), default=None\n Array containing pairwise preference constraints (qid in svmlight\n format).\n\nmultilabel : boolean, default=False\n Samples may have several labels each (see\n https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)\n\n .. versionadded:: 0.17\n parameter *multilabel* to support multilabel datasets.", + "code": "@_deprecate_positional_args\ndef dump_svmlight_file(X, y, f, *, zero_based=True, comment=None,\n query_id=None,\n multilabel=False):\n \"\"\"Dump the dataset in svmlight / libsvm file format.\n\n This format is a text-based format, with one sample per line. It does\n not store zero valued features hence is suitable for sparse dataset.\n\n The first element of each line can be used to store a target variable\n to predict.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : {array-like, sparse matrix}, shape = [n_samples (, n_labels)]\n Target values. Class labels must be an\n integer or float, or array-like objects of integer or float for\n multilabel classifications.\n\n f : string or file-like in binary mode\n If string, specifies the path that will contain the data.\n If file-like, data will be written to f. f should be opened in binary\n mode.\n\n zero_based : boolean, default=True\n Whether column indices should be written zero-based (True) or one-based\n (False).\n\n comment : string, default=None\n Comment to insert at the top of the file. This should be either a\n Unicode string, which will be encoded as UTF-8, or an ASCII byte\n string.\n If a comment is given, then it will be preceded by one that identifies\n the file as having been dumped by scikit-learn. Note that not all\n tools grok comments in SVMlight files.\n\n query_id : array-like of shape (n_samples,), default=None\n Array containing pairwise preference constraints (qid in svmlight\n format).\n\n multilabel : boolean, default=False\n Samples may have several labels each (see\n https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)\n\n .. versionadded:: 0.17\n parameter *multilabel* to support multilabel datasets.\n \"\"\"\n if comment is not None:\n # Convert comment string to list of lines in UTF-8.\n # If a byte string is passed, then check whether it's ASCII;\n # if a user wants to get fancy, they'll have to decode themselves.\n # Avoid mention of str and unicode types for Python 3.x compat.\n if isinstance(comment, bytes):\n comment.decode(\"ascii\") # just for the exception\n else:\n comment = comment.encode(\"utf-8\")\n if b\"\\0\" in comment:\n raise ValueError(\"comment string contains NUL byte\")\n\n yval = check_array(y, accept_sparse='csr', ensure_2d=False)\n if sp.issparse(yval):\n if yval.shape[1] != 1 and not multilabel:\n raise ValueError(\"expected y of shape (n_samples, 1),\"\n \" got %r\" % (yval.shape,))\n else:\n if yval.ndim != 1 and not multilabel:\n raise ValueError(\"expected y of shape (n_samples,), got %r\"\n % (yval.shape,))\n\n Xval = check_array(X, accept_sparse='csr')\n if Xval.shape[0] != yval.shape[0]:\n raise ValueError(\n \"X.shape[0] and y.shape[0] should be the same, got\"\n \" %r and %r instead.\" % (Xval.shape[0], yval.shape[0])\n )\n\n # We had some issues with CSR matrices with unsorted indices (e.g. #1501),\n # so sort them here, but first make sure we don't modify the user's X.\n # TODO We can do this cheaper; sorted_indices copies the whole matrix.\n if yval is y and hasattr(yval, \"sorted_indices\"):\n y = yval.sorted_indices()\n else:\n y = yval\n if hasattr(y, \"sort_indices\"):\n y.sort_indices()\n\n if Xval is X and hasattr(Xval, \"sorted_indices\"):\n X = Xval.sorted_indices()\n else:\n X = Xval\n if hasattr(X, \"sort_indices\"):\n X.sort_indices()\n\n if query_id is not None:\n query_id = np.asarray(query_id)\n if query_id.shape[0] != y.shape[0]:\n raise ValueError(\"expected query_id of shape (n_samples,), got %r\"\n % (query_id.shape,))\n\n one_based = not zero_based\n\n if hasattr(f, \"write\"):\n _dump_svmlight(X, y, f, multilabel, one_based, comment, query_id)\n else:\n with open(f, \"wb\") as f:\n _dump_svmlight(X, y, f, multilabel, one_based, comment, query_id)" + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/load_svmlight_file", + "name": "load_svmlight_file", + "qname": "sklearn.datasets._svmlight_format_io.load_svmlight_file", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/load_svmlight_file/f", + "name": "f", + "qname": "sklearn.datasets._svmlight_format_io.load_svmlight_file.f", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str, file-like or int", + "default_value": "", + "description": "(Path to) a file to load. If a path ends in \".gz\" or \".bz2\", it will\nbe uncompressed on the fly. If an integer is passed, it is assumed to\nbe a file descriptor. A file-like or file descriptor will not be closed\nby this function. A file-like object must be opened in binary mode." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "file-like" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/load_svmlight_file/n_features", + "name": "n_features", + "qname": "sklearn.datasets._svmlight_format_io.load_svmlight_file.n_features", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of features to use. If None, it will be inferred. This\nargument is useful to load several files that are subsets of a\nbigger sliced dataset: each subset might not have examples of\nevery feature, hence the inferred shape might vary from one\nslice to another.\nn_features is only required if ``offset`` or ``length`` are passed a\nnon-default value." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/load_svmlight_file/dtype", + "name": "dtype", + "qname": "sklearn.datasets._svmlight_format_io.load_svmlight_file.dtype", + "default_value": "np.float64", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "numpy data type", + "default_value": "np.float64", + "description": "Data type of dataset to be loaded. This will be the data type of the\noutput numpy arrays ``X`` and ``y``." + }, + "type": { + "kind": "NamedType", + "name": "numpy data type" + } + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/load_svmlight_file/multilabel", + "name": "multilabel", + "qname": "sklearn.datasets._svmlight_format_io.load_svmlight_file.multilabel", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Samples may have several labels each (see\nhttps://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/load_svmlight_file/zero_based", + "name": "zero_based", + "qname": "sklearn.datasets._svmlight_format_io.load_svmlight_file.zero_based", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or \"auto\"", + "default_value": "\"auto\"", + "description": "Whether column indices in f are zero-based (True) or one-based\n(False). If column indices are one-based, they are transformed to\nzero-based to match Python/NumPy conventions.\nIf set to \"auto\", a heuristic check is applied to determine this from\nthe file contents. Both kinds of files occur \"in the wild\", but they\nare unfortunately not self-identifying. Using \"auto\" or True should\nalways be safe when no ``offset`` or ``length`` is passed.\nIf ``offset`` or ``length`` are passed, the \"auto\" mode falls back\nto ``zero_based=True`` to avoid having the heuristic check yield\ninconsistent results on different segments of the file." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "\"auto\"" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/load_svmlight_file/query_id", + "name": "query_id", + "qname": "sklearn.datasets._svmlight_format_io.load_svmlight_file.query_id", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, will return the query_id array for each file." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/load_svmlight_file/offset", + "name": "offset", + "qname": "sklearn.datasets._svmlight_format_io.load_svmlight_file.offset", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Ignore the offset first bytes by seeking forward, then\ndiscarding the following bytes up until the next new line\ncharacter." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/load_svmlight_file/length", + "name": "length", + "qname": "sklearn.datasets._svmlight_format_io.load_svmlight_file.length", + "default_value": "-1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "-1", + "description": "If strictly positive, stop reading any new line of data once the\nposition in the file has reached the (offset + length) bytes threshold." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load datasets in the svmlight / libsvm format into sparse CSR matrix\n\nThis format is a text-based format, with one sample per line. It does\nnot store zero valued features hence is suitable for sparse dataset.\n\nThe first element of each line can be used to store a target variable\nto predict.\n\nThis format is used as the default format for both svmlight and the\nlibsvm command line programs.\n\nParsing a text based source can be expensive. When working on\nrepeatedly on the same dataset, it is recommended to wrap this\nloader with joblib.Memory.cache to store a memmapped backup of the\nCSR results of the first call and benefit from the near instantaneous\nloading of memmapped structures for the subsequent calls.\n\nIn case the file contains a pairwise preference constraint (known\nas \"qid\" in the svmlight format) these are ignored unless the\nquery_id parameter is set to True. These pairwise preference\nconstraints can be used to constraint the combination of samples\nwhen using pairwise loss functions (as is the case in some\nlearning to rank problems) so that only pairs with the same\nquery_id value are considered.\n\nThis implementation is written in Cython and is reasonably fast.\nHowever, a faster API-compatible loader is also available at:\n\n https://github.com/mblondel/svmlight-loader", + "docstring": "Load datasets in the svmlight / libsvm format into sparse CSR matrix\n\nThis format is a text-based format, with one sample per line. It does\nnot store zero valued features hence is suitable for sparse dataset.\n\nThe first element of each line can be used to store a target variable\nto predict.\n\nThis format is used as the default format for both svmlight and the\nlibsvm command line programs.\n\nParsing a text based source can be expensive. When working on\nrepeatedly on the same dataset, it is recommended to wrap this\nloader with joblib.Memory.cache to store a memmapped backup of the\nCSR results of the first call and benefit from the near instantaneous\nloading of memmapped structures for the subsequent calls.\n\nIn case the file contains a pairwise preference constraint (known\nas \"qid\" in the svmlight format) these are ignored unless the\nquery_id parameter is set to True. These pairwise preference\nconstraints can be used to constraint the combination of samples\nwhen using pairwise loss functions (as is the case in some\nlearning to rank problems) so that only pairs with the same\nquery_id value are considered.\n\nThis implementation is written in Cython and is reasonably fast.\nHowever, a faster API-compatible loader is also available at:\n\n https://github.com/mblondel/svmlight-loader\n\nParameters\n----------\nf : str, file-like or int\n (Path to) a file to load. If a path ends in \".gz\" or \".bz2\", it will\n be uncompressed on the fly. If an integer is passed, it is assumed to\n be a file descriptor. A file-like or file descriptor will not be closed\n by this function. A file-like object must be opened in binary mode.\n\nn_features : int, default=None\n The number of features to use. If None, it will be inferred. This\n argument is useful to load several files that are subsets of a\n bigger sliced dataset: each subset might not have examples of\n every feature, hence the inferred shape might vary from one\n slice to another.\n n_features is only required if ``offset`` or ``length`` are passed a\n non-default value.\n\ndtype : numpy data type, default=np.float64\n Data type of dataset to be loaded. This will be the data type of the\n output numpy arrays ``X`` and ``y``.\n\nmultilabel : bool, default=False\n Samples may have several labels each (see\n https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)\n\nzero_based : bool or \"auto\", default=\"auto\"\n Whether column indices in f are zero-based (True) or one-based\n (False). If column indices are one-based, they are transformed to\n zero-based to match Python/NumPy conventions.\n If set to \"auto\", a heuristic check is applied to determine this from\n the file contents. Both kinds of files occur \"in the wild\", but they\n are unfortunately not self-identifying. Using \"auto\" or True should\n always be safe when no ``offset`` or ``length`` is passed.\n If ``offset`` or ``length`` are passed, the \"auto\" mode falls back\n to ``zero_based=True`` to avoid having the heuristic check yield\n inconsistent results on different segments of the file.\n\nquery_id : bool, default=False\n If True, will return the query_id array for each file.\n\noffset : int, default=0\n Ignore the offset first bytes by seeking forward, then\n discarding the following bytes up until the next new line\n character.\n\nlength : int, default=-1\n If strictly positive, stop reading any new line of data once the\n position in the file has reached the (offset + length) bytes threshold.\n\nReturns\n-------\nX : scipy.sparse matrix of shape (n_samples, n_features)\n\ny : ndarray of shape (n_samples,), or, in the multilabel a list of\n tuples of length n_samples.\n\nquery_id : array of shape (n_samples,)\n query_id for each sample. Only returned when query_id is set to\n True.\n\nSee Also\n--------\nload_svmlight_files : Similar function for loading multiple files in this\n format, enforcing the same number of features/columns on all of them.\n\nExamples\n--------\nTo use joblib.Memory to cache the svmlight file::\n\n from joblib import Memory\n from .datasets import load_svmlight_file\n mem = Memory(\"./mycache\")\n\n @mem.cache\n def get_data():\n data = load_svmlight_file(\"mysvmlightfile\")\n return data[0], data[1]\n\n X, y = get_data()", + "code": "@_deprecate_positional_args\ndef load_svmlight_file(f, *, n_features=None, dtype=np.float64,\n multilabel=False, zero_based=\"auto\", query_id=False,\n offset=0, length=-1):\n \"\"\"Load datasets in the svmlight / libsvm format into sparse CSR matrix\n\n This format is a text-based format, with one sample per line. It does\n not store zero valued features hence is suitable for sparse dataset.\n\n The first element of each line can be used to store a target variable\n to predict.\n\n This format is used as the default format for both svmlight and the\n libsvm command line programs.\n\n Parsing a text based source can be expensive. When working on\n repeatedly on the same dataset, it is recommended to wrap this\n loader with joblib.Memory.cache to store a memmapped backup of the\n CSR results of the first call and benefit from the near instantaneous\n loading of memmapped structures for the subsequent calls.\n\n In case the file contains a pairwise preference constraint (known\n as \"qid\" in the svmlight format) these are ignored unless the\n query_id parameter is set to True. These pairwise preference\n constraints can be used to constraint the combination of samples\n when using pairwise loss functions (as is the case in some\n learning to rank problems) so that only pairs with the same\n query_id value are considered.\n\n This implementation is written in Cython and is reasonably fast.\n However, a faster API-compatible loader is also available at:\n\n https://github.com/mblondel/svmlight-loader\n\n Parameters\n ----------\n f : str, file-like or int\n (Path to) a file to load. If a path ends in \".gz\" or \".bz2\", it will\n be uncompressed on the fly. If an integer is passed, it is assumed to\n be a file descriptor. A file-like or file descriptor will not be closed\n by this function. A file-like object must be opened in binary mode.\n\n n_features : int, default=None\n The number of features to use. If None, it will be inferred. This\n argument is useful to load several files that are subsets of a\n bigger sliced dataset: each subset might not have examples of\n every feature, hence the inferred shape might vary from one\n slice to another.\n n_features is only required if ``offset`` or ``length`` are passed a\n non-default value.\n\n dtype : numpy data type, default=np.float64\n Data type of dataset to be loaded. This will be the data type of the\n output numpy arrays ``X`` and ``y``.\n\n multilabel : bool, default=False\n Samples may have several labels each (see\n https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)\n\n zero_based : bool or \"auto\", default=\"auto\"\n Whether column indices in f are zero-based (True) or one-based\n (False). If column indices are one-based, they are transformed to\n zero-based to match Python/NumPy conventions.\n If set to \"auto\", a heuristic check is applied to determine this from\n the file contents. Both kinds of files occur \"in the wild\", but they\n are unfortunately not self-identifying. Using \"auto\" or True should\n always be safe when no ``offset`` or ``length`` is passed.\n If ``offset`` or ``length`` are passed, the \"auto\" mode falls back\n to ``zero_based=True`` to avoid having the heuristic check yield\n inconsistent results on different segments of the file.\n\n query_id : bool, default=False\n If True, will return the query_id array for each file.\n\n offset : int, default=0\n Ignore the offset first bytes by seeking forward, then\n discarding the following bytes up until the next new line\n character.\n\n length : int, default=-1\n If strictly positive, stop reading any new line of data once the\n position in the file has reached the (offset + length) bytes threshold.\n\n Returns\n -------\n X : scipy.sparse matrix of shape (n_samples, n_features)\n\n y : ndarray of shape (n_samples,), or, in the multilabel a list of\n tuples of length n_samples.\n\n query_id : array of shape (n_samples,)\n query_id for each sample. Only returned when query_id is set to\n True.\n\n See Also\n --------\n load_svmlight_files : Similar function for loading multiple files in this\n format, enforcing the same number of features/columns on all of them.\n\n Examples\n --------\n To use joblib.Memory to cache the svmlight file::\n\n from joblib import Memory\n from .datasets import load_svmlight_file\n mem = Memory(\"./mycache\")\n\n @mem.cache\n def get_data():\n data = load_svmlight_file(\"mysvmlightfile\")\n return data[0], data[1]\n\n X, y = get_data()\n \"\"\"\n return tuple(load_svmlight_files([f], n_features=n_features,\n dtype=dtype,\n multilabel=multilabel,\n zero_based=zero_based,\n query_id=query_id,\n offset=offset,\n length=length))" + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/load_svmlight_files", + "name": "load_svmlight_files", + "qname": "sklearn.datasets._svmlight_format_io.load_svmlight_files", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/load_svmlight_files/files", + "name": "files", + "qname": "sklearn.datasets._svmlight_format_io.load_svmlight_files.files", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, dtype=str, file-like or int", + "default_value": "", + "description": "(Paths of) files to load. If a path ends in \".gz\" or \".bz2\", it will\nbe uncompressed on the fly. If an integer is passed, it is assumed to\nbe a file descriptor. File-likes and file descriptors will not be\nclosed by this function. File-like objects must be opened in binary\nmode." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "dtype=str" + }, + { + "kind": "NamedType", + "name": "file-like" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/load_svmlight_files/n_features", + "name": "n_features", + "qname": "sklearn.datasets._svmlight_format_io.load_svmlight_files.n_features", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of features to use. If None, it will be inferred from the\nmaximum column index occurring in any of the files.\n\nThis can be set to a higher value than the actual number of features\nin any of the input files, but setting it to a lower value will cause\nan exception to be raised." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/load_svmlight_files/dtype", + "name": "dtype", + "qname": "sklearn.datasets._svmlight_format_io.load_svmlight_files.dtype", + "default_value": "np.float64", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "numpy data type", + "default_value": "np.float64", + "description": "Data type of dataset to be loaded. This will be the data type of the\noutput numpy arrays ``X`` and ``y``." + }, + "type": { + "kind": "NamedType", + "name": "numpy data type" + } + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/load_svmlight_files/multilabel", + "name": "multilabel", + "qname": "sklearn.datasets._svmlight_format_io.load_svmlight_files.multilabel", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Samples may have several labels each (see\nhttps://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/load_svmlight_files/zero_based", + "name": "zero_based", + "qname": "sklearn.datasets._svmlight_format_io.load_svmlight_files.zero_based", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or \"auto\"", + "default_value": "\"auto\"", + "description": "Whether column indices in f are zero-based (True) or one-based\n(False). If column indices are one-based, they are transformed to\nzero-based to match Python/NumPy conventions.\nIf set to \"auto\", a heuristic check is applied to determine this from\nthe file contents. Both kinds of files occur \"in the wild\", but they\nare unfortunately not self-identifying. Using \"auto\" or True should\nalways be safe when no offset or length is passed.\nIf offset or length are passed, the \"auto\" mode falls back\nto zero_based=True to avoid having the heuristic check yield\ninconsistent results on different segments of the file." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "\"auto\"" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/load_svmlight_files/query_id", + "name": "query_id", + "qname": "sklearn.datasets._svmlight_format_io.load_svmlight_files.query_id", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, will return the query_id array for each file." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/load_svmlight_files/offset", + "name": "offset", + "qname": "sklearn.datasets._svmlight_format_io.load_svmlight_files.offset", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Ignore the offset first bytes by seeking forward, then\ndiscarding the following bytes up until the next new line\ncharacter." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.datasets._svmlight_format_io/load_svmlight_files/length", + "name": "length", + "qname": "sklearn.datasets._svmlight_format_io.load_svmlight_files.length", + "default_value": "-1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "-1", + "description": "If strictly positive, stop reading any new line of data once the\nposition in the file has reached the (offset + length) bytes threshold." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load dataset from multiple files in SVMlight format\n\nThis function is equivalent to mapping load_svmlight_file over a list of\nfiles, except that the results are concatenated into a single, flat list\nand the samples vectors are constrained to all have the same number of\nfeatures.\n\nIn case the file contains a pairwise preference constraint (known\nas \"qid\" in the svmlight format) these are ignored unless the\nquery_id parameter is set to True. These pairwise preference\nconstraints can be used to constraint the combination of samples\nwhen using pairwise loss functions (as is the case in some\nlearning to rank problems) so that only pairs with the same\nquery_id value are considered.", + "docstring": "Load dataset from multiple files in SVMlight format\n\nThis function is equivalent to mapping load_svmlight_file over a list of\nfiles, except that the results are concatenated into a single, flat list\nand the samples vectors are constrained to all have the same number of\nfeatures.\n\nIn case the file contains a pairwise preference constraint (known\nas \"qid\" in the svmlight format) these are ignored unless the\nquery_id parameter is set to True. These pairwise preference\nconstraints can be used to constraint the combination of samples\nwhen using pairwise loss functions (as is the case in some\nlearning to rank problems) so that only pairs with the same\nquery_id value are considered.\n\nParameters\n----------\nfiles : array-like, dtype=str, file-like or int\n (Paths of) files to load. If a path ends in \".gz\" or \".bz2\", it will\n be uncompressed on the fly. If an integer is passed, it is assumed to\n be a file descriptor. File-likes and file descriptors will not be\n closed by this function. File-like objects must be opened in binary\n mode.\n\nn_features : int, default=None\n The number of features to use. If None, it will be inferred from the\n maximum column index occurring in any of the files.\n\n This can be set to a higher value than the actual number of features\n in any of the input files, but setting it to a lower value will cause\n an exception to be raised.\n\ndtype : numpy data type, default=np.float64\n Data type of dataset to be loaded. This will be the data type of the\n output numpy arrays ``X`` and ``y``.\n\nmultilabel : bool, default=False\n Samples may have several labels each (see\n https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)\n\nzero_based : bool or \"auto\", default=\"auto\"\n Whether column indices in f are zero-based (True) or one-based\n (False). If column indices are one-based, they are transformed to\n zero-based to match Python/NumPy conventions.\n If set to \"auto\", a heuristic check is applied to determine this from\n the file contents. Both kinds of files occur \"in the wild\", but they\n are unfortunately not self-identifying. Using \"auto\" or True should\n always be safe when no offset or length is passed.\n If offset or length are passed, the \"auto\" mode falls back\n to zero_based=True to avoid having the heuristic check yield\n inconsistent results on different segments of the file.\n\nquery_id : bool, default=False\n If True, will return the query_id array for each file.\n\noffset : int, default=0\n Ignore the offset first bytes by seeking forward, then\n discarding the following bytes up until the next new line\n character.\n\nlength : int, default=-1\n If strictly positive, stop reading any new line of data once the\n position in the file has reached the (offset + length) bytes threshold.\n\nReturns\n-------\n[X1, y1, ..., Xn, yn]\nwhere each (Xi, yi) pair is the result from load_svmlight_file(files[i]).\n\nIf query_id is set to True, this will return instead [X1, y1, q1,\n..., Xn, yn, qn] where (Xi, yi, qi) is the result from\nload_svmlight_file(files[i])\n\nNotes\n-----\nWhen fitting a model to a matrix X_train and evaluating it against a\nmatrix X_test, it is essential that X_train and X_test have the same\nnumber of features (X_train.shape[1] == X_test.shape[1]). This may not\nbe the case if you load the files individually with load_svmlight_file.\n\nSee Also\n--------\nload_svmlight_file", + "code": "@_deprecate_positional_args\ndef load_svmlight_files(files, *, n_features=None, dtype=np.float64,\n multilabel=False, zero_based=\"auto\", query_id=False,\n offset=0, length=-1):\n \"\"\"Load dataset from multiple files in SVMlight format\n\n This function is equivalent to mapping load_svmlight_file over a list of\n files, except that the results are concatenated into a single, flat list\n and the samples vectors are constrained to all have the same number of\n features.\n\n In case the file contains a pairwise preference constraint (known\n as \"qid\" in the svmlight format) these are ignored unless the\n query_id parameter is set to True. These pairwise preference\n constraints can be used to constraint the combination of samples\n when using pairwise loss functions (as is the case in some\n learning to rank problems) so that only pairs with the same\n query_id value are considered.\n\n Parameters\n ----------\n files : array-like, dtype=str, file-like or int\n (Paths of) files to load. If a path ends in \".gz\" or \".bz2\", it will\n be uncompressed on the fly. If an integer is passed, it is assumed to\n be a file descriptor. File-likes and file descriptors will not be\n closed by this function. File-like objects must be opened in binary\n mode.\n\n n_features : int, default=None\n The number of features to use. If None, it will be inferred from the\n maximum column index occurring in any of the files.\n\n This can be set to a higher value than the actual number of features\n in any of the input files, but setting it to a lower value will cause\n an exception to be raised.\n\n dtype : numpy data type, default=np.float64\n Data type of dataset to be loaded. This will be the data type of the\n output numpy arrays ``X`` and ``y``.\n\n multilabel : bool, default=False\n Samples may have several labels each (see\n https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)\n\n zero_based : bool or \"auto\", default=\"auto\"\n Whether column indices in f are zero-based (True) or one-based\n (False). If column indices are one-based, they are transformed to\n zero-based to match Python/NumPy conventions.\n If set to \"auto\", a heuristic check is applied to determine this from\n the file contents. Both kinds of files occur \"in the wild\", but they\n are unfortunately not self-identifying. Using \"auto\" or True should\n always be safe when no offset or length is passed.\n If offset or length are passed, the \"auto\" mode falls back\n to zero_based=True to avoid having the heuristic check yield\n inconsistent results on different segments of the file.\n\n query_id : bool, default=False\n If True, will return the query_id array for each file.\n\n offset : int, default=0\n Ignore the offset first bytes by seeking forward, then\n discarding the following bytes up until the next new line\n character.\n\n length : int, default=-1\n If strictly positive, stop reading any new line of data once the\n position in the file has reached the (offset + length) bytes threshold.\n\n Returns\n -------\n [X1, y1, ..., Xn, yn]\n where each (Xi, yi) pair is the result from load_svmlight_file(files[i]).\n\n If query_id is set to True, this will return instead [X1, y1, q1,\n ..., Xn, yn, qn] where (Xi, yi, qi) is the result from\n load_svmlight_file(files[i])\n\n Notes\n -----\n When fitting a model to a matrix X_train and evaluating it against a\n matrix X_test, it is essential that X_train and X_test have the same\n number of features (X_train.shape[1] == X_test.shape[1]). This may not\n be the case if you load the files individually with load_svmlight_file.\n\n See Also\n --------\n load_svmlight_file\n \"\"\"\n if (offset != 0 or length > 0) and zero_based == \"auto\":\n # disable heuristic search to avoid getting inconsistent results on\n # different segments of the file\n zero_based = True\n\n if (offset != 0 or length > 0) and n_features is None:\n raise ValueError(\n \"n_features is required when offset or length is specified.\")\n\n r = [_open_and_load(f, dtype, multilabel, bool(zero_based), bool(query_id),\n offset=offset, length=length)\n for f in files]\n\n if (zero_based is False or\n zero_based == \"auto\" and all(len(tmp[1]) and np.min(tmp[1]) > 0\n for tmp in r)):\n for _, indices, _, _, _ in r:\n indices -= 1\n\n n_f = max(ind[1].max() if len(ind[1]) else 0 for ind in r) + 1\n\n if n_features is None:\n n_features = n_f\n elif n_features < n_f:\n raise ValueError(\"n_features was set to {},\"\n \" but input file contains {} features\"\n .format(n_features, n_f))\n\n result = []\n for data, indices, indptr, y, query_values in r:\n shape = (indptr.shape[0] - 1, n_features)\n X = sp.csr_matrix((data, indices, indptr), shape)\n X.sort_indices()\n result += X, y\n if query_id:\n result.append(query_values)\n\n return result" + }, + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/_download_20newsgroups", + "name": "_download_20newsgroups", + "qname": "sklearn.datasets._twenty_newsgroups._download_20newsgroups", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/_download_20newsgroups/target_dir", + "name": "target_dir", + "qname": "sklearn.datasets._twenty_newsgroups._download_20newsgroups.target_dir", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/_download_20newsgroups/cache_path", + "name": "cache_path", + "qname": "sklearn.datasets._twenty_newsgroups._download_20newsgroups.cache_path", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Download the 20 newsgroups data and stored it as a zipped pickle.", + "docstring": "Download the 20 newsgroups data and stored it as a zipped pickle.", + "code": "def _download_20newsgroups(target_dir, cache_path):\n \"\"\"Download the 20 newsgroups data and stored it as a zipped pickle.\"\"\"\n train_path = os.path.join(target_dir, TRAIN_FOLDER)\n test_path = os.path.join(target_dir, TEST_FOLDER)\n\n if not os.path.exists(target_dir):\n os.makedirs(target_dir)\n\n logger.info(\"Downloading dataset from %s (14 MB)\", ARCHIVE.url)\n archive_path = _fetch_remote(ARCHIVE, dirname=target_dir)\n\n logger.debug(\"Decompressing %s\", archive_path)\n tarfile.open(archive_path, \"r:gz\").extractall(path=target_dir)\n os.remove(archive_path)\n\n # Store a zipped pickle\n cache = dict(train=load_files(train_path, encoding='latin1'),\n test=load_files(test_path, encoding='latin1'))\n compressed_content = codecs.encode(pickle.dumps(cache), 'zlib_codec')\n with open(cache_path, 'wb') as f:\n f.write(compressed_content)\n\n shutil.rmtree(target_dir)\n return cache" + }, + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/fetch_20newsgroups", + "name": "fetch_20newsgroups", + "qname": "sklearn.datasets._twenty_newsgroups.fetch_20newsgroups", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/fetch_20newsgroups/data_home", + "name": "data_home", + "qname": "sklearn.datasets._twenty_newsgroups.fetch_20newsgroups.data_home", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Specify a download and cache folder for the datasets. If None,\nall scikit-learn data is stored in '~/scikit_learn_data' subfolders." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/fetch_20newsgroups/subset", + "name": "subset", + "qname": "sklearn.datasets._twenty_newsgroups.fetch_20newsgroups.subset", + "default_value": "'train'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'train', 'test', 'all'}", + "default_value": "'train'", + "description": "Select the dataset to load: 'train' for the training set, 'test'\nfor the test set, 'all' for both, with shuffled ordering." + }, + "type": { + "kind": "EnumType", + "values": ["all", "test", "train"] + } + }, + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/fetch_20newsgroups/categories", + "name": "categories", + "qname": "sklearn.datasets._twenty_newsgroups.fetch_20newsgroups.categories", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like, dtype=str or unicode", + "default_value": "None", + "description": "If None (default), load all the categories.\nIf not None, list of category names to load (other categories\nignored)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "dtype=str" + }, + { + "kind": "NamedType", + "name": "unicode" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/fetch_20newsgroups/shuffle", + "name": "shuffle", + "qname": "sklearn.datasets._twenty_newsgroups.fetch_20newsgroups.shuffle", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether or not to shuffle the data: might be important for models that\nmake the assumption that the samples are independent and identically\ndistributed (i.i.d.), such as stochastic gradient descent." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/fetch_20newsgroups/random_state", + "name": "random_state", + "qname": "sklearn.datasets._twenty_newsgroups.fetch_20newsgroups.random_state", + "default_value": "42", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for dataset shuffling. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/fetch_20newsgroups/remove", + "name": "remove", + "qname": "sklearn.datasets._twenty_newsgroups.fetch_20newsgroups.remove", + "default_value": "()", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "tuple", + "default_value": "()", + "description": "May contain any subset of ('headers', 'footers', 'quotes'). Each of\nthese are kinds of text that will be detected and removed from the\nnewsgroup posts, preventing classifiers from overfitting on\nmetadata.\n\n'headers' removes newsgroup headers, 'footers' removes blocks at the\nends of posts that look like signatures, and 'quotes' removes lines\nthat appear to be quoting another post.\n\n'headers' follows an exact standard; the other filters are not always\ncorrect." + }, + "type": { + "kind": "NamedType", + "name": "tuple" + } + }, + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/fetch_20newsgroups/download_if_missing", + "name": "download_if_missing", + "qname": "sklearn.datasets._twenty_newsgroups.fetch_20newsgroups.download_if_missing", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If False, raise an IOError if the data is not locally available\ninstead of trying to download the data from the source site." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/fetch_20newsgroups/return_X_y", + "name": "return_X_y", + "qname": "sklearn.datasets._twenty_newsgroups.fetch_20newsgroups.return_X_y", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, returns `(data.data, data.target)` instead of a Bunch\nobject.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load the filenames and data from the 20 newsgroups dataset (classification).\n\nDownload it if necessary.\n\n================= ==========\nClasses 20\nSamples total 18846\nDimensionality 1\nFeatures text\n================= ==========\n\nRead more in the :ref:`User Guide <20newsgroups_dataset>`.", + "docstring": "Load the filenames and data from the 20 newsgroups dataset (classification).\n\nDownload it if necessary.\n\n================= ==========\nClasses 20\nSamples total 18846\nDimensionality 1\nFeatures text\n================= ==========\n\nRead more in the :ref:`User Guide <20newsgroups_dataset>`.\n\nParameters\n----------\ndata_home : str, default=None\n Specify a download and cache folder for the datasets. If None,\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\nsubset : {'train', 'test', 'all'}, default='train'\n Select the dataset to load: 'train' for the training set, 'test'\n for the test set, 'all' for both, with shuffled ordering.\n\ncategories : array-like, dtype=str or unicode, default=None\n If None (default), load all the categories.\n If not None, list of category names to load (other categories\n ignored).\n\nshuffle : bool, default=True\n Whether or not to shuffle the data: might be important for models that\n make the assumption that the samples are independent and identically\n distributed (i.i.d.), such as stochastic gradient descent.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nremove : tuple, default=()\n May contain any subset of ('headers', 'footers', 'quotes'). Each of\n these are kinds of text that will be detected and removed from the\n newsgroup posts, preventing classifiers from overfitting on\n metadata.\n\n 'headers' removes newsgroup headers, 'footers' removes blocks at the\n ends of posts that look like signatures, and 'quotes' removes lines\n that appear to be quoting another post.\n\n 'headers' follows an exact standard; the other filters are not always\n correct.\n\ndownload_if_missing : bool, default=True\n If False, raise an IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nreturn_X_y : bool, default=False\n If True, returns `(data.data, data.target)` instead of a Bunch\n object.\n\n .. versionadded:: 0.22\n\nReturns\n-------\nbunch : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : list of shape (n_samples,)\n The data list to learn.\n target: ndarray of shape (n_samples,)\n The target labels.\n filenames: list of shape (n_samples,)\n The path to the location of the data.\n DESCR: str\n The full description of the dataset.\n target_names: list of shape (n_classes,)\n The names of target classes.\n\n(data, target) : tuple if `return_X_y=True`\n .. versionadded:: 0.22", + "code": "@_deprecate_positional_args\ndef fetch_20newsgroups(*, data_home=None, subset='train', categories=None,\n shuffle=True, random_state=42,\n remove=(),\n download_if_missing=True, return_X_y=False):\n \"\"\"Load the filenames and data from the 20 newsgroups dataset \\\n(classification).\n\n Download it if necessary.\n\n ================= ==========\n Classes 20\n Samples total 18846\n Dimensionality 1\n Features text\n ================= ==========\n\n Read more in the :ref:`User Guide <20newsgroups_dataset>`.\n\n Parameters\n ----------\n data_home : str, default=None\n Specify a download and cache folder for the datasets. If None,\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\n subset : {'train', 'test', 'all'}, default='train'\n Select the dataset to load: 'train' for the training set, 'test'\n for the test set, 'all' for both, with shuffled ordering.\n\n categories : array-like, dtype=str or unicode, default=None\n If None (default), load all the categories.\n If not None, list of category names to load (other categories\n ignored).\n\n shuffle : bool, default=True\n Whether or not to shuffle the data: might be important for models that\n make the assumption that the samples are independent and identically\n distributed (i.i.d.), such as stochastic gradient descent.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n remove : tuple, default=()\n May contain any subset of ('headers', 'footers', 'quotes'). Each of\n these are kinds of text that will be detected and removed from the\n newsgroup posts, preventing classifiers from overfitting on\n metadata.\n\n 'headers' removes newsgroup headers, 'footers' removes blocks at the\n ends of posts that look like signatures, and 'quotes' removes lines\n that appear to be quoting another post.\n\n 'headers' follows an exact standard; the other filters are not always\n correct.\n\n download_if_missing : bool, default=True\n If False, raise an IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\n return_X_y : bool, default=False\n If True, returns `(data.data, data.target)` instead of a Bunch\n object.\n\n .. versionadded:: 0.22\n\n Returns\n -------\n bunch : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : list of shape (n_samples,)\n The data list to learn.\n target: ndarray of shape (n_samples,)\n The target labels.\n filenames: list of shape (n_samples,)\n The path to the location of the data.\n DESCR: str\n The full description of the dataset.\n target_names: list of shape (n_classes,)\n The names of target classes.\n\n (data, target) : tuple if `return_X_y=True`\n .. versionadded:: 0.22\n \"\"\"\n\n data_home = get_data_home(data_home=data_home)\n cache_path = _pkl_filepath(data_home, CACHE_NAME)\n twenty_home = os.path.join(data_home, \"20news_home\")\n cache = None\n if os.path.exists(cache_path):\n try:\n with open(cache_path, 'rb') as f:\n compressed_content = f.read()\n uncompressed_content = codecs.decode(\n compressed_content, 'zlib_codec')\n cache = pickle.loads(uncompressed_content)\n except Exception as e:\n print(80 * '_')\n print('Cache loading failed')\n print(80 * '_')\n print(e)\n\n if cache is None:\n if download_if_missing:\n logger.info(\"Downloading 20news dataset. \"\n \"This may take a few minutes.\")\n cache = _download_20newsgroups(target_dir=twenty_home,\n cache_path=cache_path)\n else:\n raise IOError('20Newsgroups dataset not found')\n\n if subset in ('train', 'test'):\n data = cache[subset]\n elif subset == 'all':\n data_lst = list()\n target = list()\n filenames = list()\n for subset in ('train', 'test'):\n data = cache[subset]\n data_lst.extend(data.data)\n target.extend(data.target)\n filenames.extend(data.filenames)\n\n data.data = data_lst\n data.target = np.array(target)\n data.filenames = np.array(filenames)\n else:\n raise ValueError(\n \"subset can only be 'train', 'test' or 'all', got '%s'\" % subset)\n\n module_path = dirname(__file__)\n with open(join(module_path, 'descr', 'twenty_newsgroups.rst')) as rst_file:\n fdescr = rst_file.read()\n\n data.DESCR = fdescr\n\n if 'headers' in remove:\n data.data = [strip_newsgroup_header(text) for text in data.data]\n if 'footers' in remove:\n data.data = [strip_newsgroup_footer(text) for text in data.data]\n if 'quotes' in remove:\n data.data = [strip_newsgroup_quoting(text) for text in data.data]\n\n if categories is not None:\n labels = [(data.target_names.index(cat), cat) for cat in categories]\n # Sort the categories to have the ordering of the labels\n labels.sort()\n labels, categories = zip(*labels)\n mask = np.in1d(data.target, labels)\n data.filenames = data.filenames[mask]\n data.target = data.target[mask]\n # searchsorted to have continuous labels\n data.target = np.searchsorted(labels, data.target)\n data.target_names = list(categories)\n # Use an object array to shuffle: avoids memory copy\n data_lst = np.array(data.data, dtype=object)\n data_lst = data_lst[mask]\n data.data = data_lst.tolist()\n\n if shuffle:\n random_state = check_random_state(random_state)\n indices = np.arange(data.target.shape[0])\n random_state.shuffle(indices)\n data.filenames = data.filenames[indices]\n data.target = data.target[indices]\n # Use an object array to shuffle: avoids memory copy\n data_lst = np.array(data.data, dtype=object)\n data_lst = data_lst[indices]\n data.data = data_lst.tolist()\n\n if return_X_y:\n return data.data, data.target\n\n return data" + }, + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/fetch_20newsgroups_vectorized", + "name": "fetch_20newsgroups_vectorized", + "qname": "sklearn.datasets._twenty_newsgroups.fetch_20newsgroups_vectorized", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/fetch_20newsgroups_vectorized/subset", + "name": "subset", + "qname": "sklearn.datasets._twenty_newsgroups.fetch_20newsgroups_vectorized.subset", + "default_value": "'train'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'train', 'test', 'all'}", + "default_value": "'train'", + "description": "Select the dataset to load: 'train' for the training set, 'test'\nfor the test set, 'all' for both, with shuffled ordering." + }, + "type": { + "kind": "EnumType", + "values": ["all", "test", "train"] + } + }, + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/fetch_20newsgroups_vectorized/remove", + "name": "remove", + "qname": "sklearn.datasets._twenty_newsgroups.fetch_20newsgroups_vectorized.remove", + "default_value": "()", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "tuple", + "default_value": "()", + "description": "May contain any subset of ('headers', 'footers', 'quotes'). Each of\nthese are kinds of text that will be detected and removed from the\nnewsgroup posts, preventing classifiers from overfitting on\nmetadata.\n\n'headers' removes newsgroup headers, 'footers' removes blocks at the\nends of posts that look like signatures, and 'quotes' removes lines\nthat appear to be quoting another post." + }, + "type": { + "kind": "NamedType", + "name": "tuple" + } + }, + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/fetch_20newsgroups_vectorized/data_home", + "name": "data_home", + "qname": "sklearn.datasets._twenty_newsgroups.fetch_20newsgroups_vectorized.data_home", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Specify an download and cache folder for the datasets. If None,\nall scikit-learn data is stored in '~/scikit_learn_data' subfolders." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/fetch_20newsgroups_vectorized/download_if_missing", + "name": "download_if_missing", + "qname": "sklearn.datasets._twenty_newsgroups.fetch_20newsgroups_vectorized.download_if_missing", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If False, raise an IOError if the data is not locally available\ninstead of trying to download the data from the source site." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/fetch_20newsgroups_vectorized/return_X_y", + "name": "return_X_y", + "qname": "sklearn.datasets._twenty_newsgroups.fetch_20newsgroups_vectorized.return_X_y", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, returns ``(data.data, data.target)`` instead of a Bunch\nobject.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/fetch_20newsgroups_vectorized/normalize", + "name": "normalize", + "qname": "sklearn.datasets._twenty_newsgroups.fetch_20newsgroups_vectorized.normalize", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, normalizes each document's feature vector to unit norm using\n:func:`sklearn.preprocessing.normalize`.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/fetch_20newsgroups_vectorized/as_frame", + "name": "as_frame", + "qname": "sklearn.datasets._twenty_newsgroups.fetch_20newsgroups_vectorized.as_frame", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the data is a pandas DataFrame including columns with\nappropriate dtypes (numeric, string, or categorical). The target is\na pandas DataFrame or Series depending on the number of\n`target_columns`.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Load and vectorize the 20 newsgroups dataset (classification).\n\nDownload it if necessary.\n\nThis is a convenience function; the transformation is done using the\ndefault settings for\n:class:`~sklearn.feature_extraction.text.CountVectorizer`. For more\nadvanced usage (stopword filtering, n-gram extraction, etc.), combine\nfetch_20newsgroups with a custom\n:class:`~sklearn.feature_extraction.text.CountVectorizer`,\n:class:`~sklearn.feature_extraction.text.HashingVectorizer`,\n:class:`~sklearn.feature_extraction.text.TfidfTransformer` or\n:class:`~sklearn.feature_extraction.text.TfidfVectorizer`.\n\nThe resulting counts are normalized using\n:func:`sklearn.preprocessing.normalize` unless normalize is set to False.\n\n================= ==========\nClasses 20\nSamples total 18846\nDimensionality 130107\nFeatures real\n================= ==========\n\nRead more in the :ref:`User Guide <20newsgroups_dataset>`.", + "docstring": "Load and vectorize the 20 newsgroups dataset (classification).\n\nDownload it if necessary.\n\nThis is a convenience function; the transformation is done using the\ndefault settings for\n:class:`~sklearn.feature_extraction.text.CountVectorizer`. For more\nadvanced usage (stopword filtering, n-gram extraction, etc.), combine\nfetch_20newsgroups with a custom\n:class:`~sklearn.feature_extraction.text.CountVectorizer`,\n:class:`~sklearn.feature_extraction.text.HashingVectorizer`,\n:class:`~sklearn.feature_extraction.text.TfidfTransformer` or\n:class:`~sklearn.feature_extraction.text.TfidfVectorizer`.\n\nThe resulting counts are normalized using\n:func:`sklearn.preprocessing.normalize` unless normalize is set to False.\n\n================= ==========\nClasses 20\nSamples total 18846\nDimensionality 130107\nFeatures real\n================= ==========\n\nRead more in the :ref:`User Guide <20newsgroups_dataset>`.\n\nParameters\n----------\nsubset : {'train', 'test', 'all'}, default='train'\n Select the dataset to load: 'train' for the training set, 'test'\n for the test set, 'all' for both, with shuffled ordering.\n\nremove : tuple, default=()\n May contain any subset of ('headers', 'footers', 'quotes'). Each of\n these are kinds of text that will be detected and removed from the\n newsgroup posts, preventing classifiers from overfitting on\n metadata.\n\n 'headers' removes newsgroup headers, 'footers' removes blocks at the\n ends of posts that look like signatures, and 'quotes' removes lines\n that appear to be quoting another post.\n\ndata_home : str, default=None\n Specify an download and cache folder for the datasets. If None,\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ndownload_if_missing : bool, default=True\n If False, raise an IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nreturn_X_y : bool, default=False\n If True, returns ``(data.data, data.target)`` instead of a Bunch\n object.\n\n .. versionadded:: 0.20\n\nnormalize : bool, default=True\n If True, normalizes each document's feature vector to unit norm using\n :func:`sklearn.preprocessing.normalize`.\n\n .. versionadded:: 0.22\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric, string, or categorical). The target is\n a pandas DataFrame or Series depending on the number of\n `target_columns`.\n\n .. versionadded:: 0.24\n\nReturns\n-------\nbunch : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data: {sparse matrix, dataframe} of shape (n_samples, n_features)\n The input data matrix. If ``as_frame`` is `True`, ``data`` is\n a pandas DataFrame with sparse columns.\n target: {ndarray, series} of shape (n_samples,)\n The target labels. If ``as_frame`` is `True`, ``target`` is a\n pandas Series.\n target_names: list of shape (n_classes,)\n The names of target classes.\n DESCR: str\n The full description of the dataset.\n frame: dataframe of shape (n_samples, n_features + 1)\n Only present when `as_frame=True`. Pandas DataFrame with ``data``\n and ``target``.\n\n .. versionadded:: 0.24\n\n(data, target) : tuple if ``return_X_y`` is True\n `data` and `target` would be of the format defined in the `Bunch`\n description above.\n\n .. versionadded:: 0.20", + "code": "@_deprecate_positional_args\ndef fetch_20newsgroups_vectorized(*, subset=\"train\", remove=(), data_home=None,\n download_if_missing=True, return_X_y=False,\n normalize=True, as_frame=False):\n \"\"\"Load and vectorize the 20 newsgroups dataset (classification).\n\n Download it if necessary.\n\n This is a convenience function; the transformation is done using the\n default settings for\n :class:`~sklearn.feature_extraction.text.CountVectorizer`. For more\n advanced usage (stopword filtering, n-gram extraction, etc.), combine\n fetch_20newsgroups with a custom\n :class:`~sklearn.feature_extraction.text.CountVectorizer`,\n :class:`~sklearn.feature_extraction.text.HashingVectorizer`,\n :class:`~sklearn.feature_extraction.text.TfidfTransformer` or\n :class:`~sklearn.feature_extraction.text.TfidfVectorizer`.\n\n The resulting counts are normalized using\n :func:`sklearn.preprocessing.normalize` unless normalize is set to False.\n\n ================= ==========\n Classes 20\n Samples total 18846\n Dimensionality 130107\n Features real\n ================= ==========\n\n Read more in the :ref:`User Guide <20newsgroups_dataset>`.\n\n Parameters\n ----------\n subset : {'train', 'test', 'all'}, default='train'\n Select the dataset to load: 'train' for the training set, 'test'\n for the test set, 'all' for both, with shuffled ordering.\n\n remove : tuple, default=()\n May contain any subset of ('headers', 'footers', 'quotes'). Each of\n these are kinds of text that will be detected and removed from the\n newsgroup posts, preventing classifiers from overfitting on\n metadata.\n\n 'headers' removes newsgroup headers, 'footers' removes blocks at the\n ends of posts that look like signatures, and 'quotes' removes lines\n that appear to be quoting another post.\n\n data_home : str, default=None\n Specify an download and cache folder for the datasets. If None,\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\n download_if_missing : bool, default=True\n If False, raise an IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\n return_X_y : bool, default=False\n If True, returns ``(data.data, data.target)`` instead of a Bunch\n object.\n\n .. versionadded:: 0.20\n\n normalize : bool, default=True\n If True, normalizes each document's feature vector to unit norm using\n :func:`sklearn.preprocessing.normalize`.\n\n .. versionadded:: 0.22\n\n as_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric, string, or categorical). The target is\n a pandas DataFrame or Series depending on the number of\n `target_columns`.\n\n .. versionadded:: 0.24\n\n Returns\n -------\n bunch : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data: {sparse matrix, dataframe} of shape (n_samples, n_features)\n The input data matrix. If ``as_frame`` is `True`, ``data`` is\n a pandas DataFrame with sparse columns.\n target: {ndarray, series} of shape (n_samples,)\n The target labels. If ``as_frame`` is `True`, ``target`` is a\n pandas Series.\n target_names: list of shape (n_classes,)\n The names of target classes.\n DESCR: str\n The full description of the dataset.\n frame: dataframe of shape (n_samples, n_features + 1)\n Only present when `as_frame=True`. Pandas DataFrame with ``data``\n and ``target``.\n\n .. versionadded:: 0.24\n\n (data, target) : tuple if ``return_X_y`` is True\n `data` and `target` would be of the format defined in the `Bunch`\n description above.\n\n .. versionadded:: 0.20\n \"\"\"\n data_home = get_data_home(data_home=data_home)\n filebase = '20newsgroup_vectorized'\n if remove:\n filebase += 'remove-' + ('-'.join(remove))\n target_file = _pkl_filepath(data_home, filebase + \".pkl\")\n\n # we shuffle but use a fixed seed for the memoization\n data_train = fetch_20newsgroups(data_home=data_home,\n subset='train',\n categories=None,\n shuffle=True,\n random_state=12,\n remove=remove,\n download_if_missing=download_if_missing)\n\n data_test = fetch_20newsgroups(data_home=data_home,\n subset='test',\n categories=None,\n shuffle=True,\n random_state=12,\n remove=remove,\n download_if_missing=download_if_missing)\n\n if os.path.exists(target_file):\n try:\n X_train, X_test, feature_names = joblib.load(target_file)\n except ValueError as e:\n raise ValueError(\n f\"The cached dataset located in {target_file} was fetched \"\n f\"with an older scikit-learn version and it is not compatible \"\n f\"with the scikit-learn version imported. You need to \"\n f\"manually delete the file: {target_file}.\"\n ) from e\n else:\n vectorizer = CountVectorizer(dtype=np.int16)\n X_train = vectorizer.fit_transform(data_train.data).tocsr()\n X_test = vectorizer.transform(data_test.data).tocsr()\n feature_names = vectorizer.get_feature_names()\n\n joblib.dump((X_train, X_test, feature_names), target_file, compress=9)\n\n # the data is stored as int16 for compactness\n # but normalize needs floats\n if normalize:\n X_train = X_train.astype(np.float64)\n X_test = X_test.astype(np.float64)\n preprocessing.normalize(X_train, copy=False)\n preprocessing.normalize(X_test, copy=False)\n\n target_names = data_train.target_names\n\n if subset == \"train\":\n data = X_train\n target = data_train.target\n elif subset == \"test\":\n data = X_test\n target = data_test.target\n elif subset == \"all\":\n data = sp.vstack((X_train, X_test)).tocsr()\n target = np.concatenate((data_train.target, data_test.target))\n else:\n raise ValueError(\"%r is not a valid subset: should be one of \"\n \"['train', 'test', 'all']\" % subset)\n\n module_path = dirname(__file__)\n with open(join(module_path, 'descr', 'twenty_newsgroups.rst')) as rst_file:\n fdescr = rst_file.read()\n\n frame = None\n target_name = ['category_class']\n\n if as_frame:\n frame, data, target = _convert_data_dataframe(\n \"fetch_20newsgroups_vectorized\",\n data,\n target,\n feature_names,\n target_names=target_name,\n sparse_data=True\n )\n\n if return_X_y:\n return data, target\n\n return Bunch(data=data,\n target=target,\n frame=frame,\n target_names=target_names,\n feature_names=feature_names,\n DESCR=fdescr)" + }, + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/strip_newsgroup_footer", + "name": "strip_newsgroup_footer", + "qname": "sklearn.datasets._twenty_newsgroups.strip_newsgroup_footer", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/strip_newsgroup_footer/text", + "name": "text", + "qname": "sklearn.datasets._twenty_newsgroups.strip_newsgroup_footer.text", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "", + "description": "The text from which to remove the signature block." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Given text in \"news\" format, attempt to remove a signature block.\n\nAs a rough heuristic, we assume that signatures are set apart by either\na blank line or a line made of hyphens, and that it is the last such line\nin the file (disregarding blank lines at the end).", + "docstring": "Given text in \"news\" format, attempt to remove a signature block.\n\nAs a rough heuristic, we assume that signatures are set apart by either\na blank line or a line made of hyphens, and that it is the last such line\nin the file (disregarding blank lines at the end).\n\nParameters\n----------\ntext : str\n The text from which to remove the signature block.", + "code": "def strip_newsgroup_footer(text):\n \"\"\"\n Given text in \"news\" format, attempt to remove a signature block.\n\n As a rough heuristic, we assume that signatures are set apart by either\n a blank line or a line made of hyphens, and that it is the last such line\n in the file (disregarding blank lines at the end).\n\n Parameters\n ----------\n text : str\n The text from which to remove the signature block.\n \"\"\"\n lines = text.strip().split('\\n')\n for line_num in range(len(lines) - 1, -1, -1):\n line = lines[line_num]\n if line.strip().strip('-') == '':\n break\n\n if line_num > 0:\n return '\\n'.join(lines[:line_num])\n else:\n return text" + }, + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/strip_newsgroup_header", + "name": "strip_newsgroup_header", + "qname": "sklearn.datasets._twenty_newsgroups.strip_newsgroup_header", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/strip_newsgroup_header/text", + "name": "text", + "qname": "sklearn.datasets._twenty_newsgroups.strip_newsgroup_header.text", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "", + "description": "The text from which to remove the signature block." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Given text in \"news\" format, strip the headers, by removing everything\nbefore the first blank line.", + "docstring": "Given text in \"news\" format, strip the headers, by removing everything\nbefore the first blank line.\n\nParameters\n----------\ntext : str\n The text from which to remove the signature block.", + "code": "def strip_newsgroup_header(text):\n \"\"\"\n Given text in \"news\" format, strip the headers, by removing everything\n before the first blank line.\n\n Parameters\n ----------\n text : str\n The text from which to remove the signature block.\n \"\"\"\n _before, _blankline, after = text.partition('\\n\\n')\n return after" + }, + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/strip_newsgroup_quoting", + "name": "strip_newsgroup_quoting", + "qname": "sklearn.datasets._twenty_newsgroups.strip_newsgroup_quoting", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets._twenty_newsgroups/strip_newsgroup_quoting/text", + "name": "text", + "qname": "sklearn.datasets._twenty_newsgroups.strip_newsgroup_quoting.text", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "", + "description": "The text from which to remove the signature block." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Given text in \"news\" format, strip lines beginning with the quote\ncharacters > or |, plus lines that often introduce a quoted section\n(for example, because they contain the string 'writes:'.)", + "docstring": "Given text in \"news\" format, strip lines beginning with the quote\ncharacters > or |, plus lines that often introduce a quoted section\n(for example, because they contain the string 'writes:'.)\n\nParameters\n----------\ntext : str\n The text from which to remove the signature block.", + "code": "def strip_newsgroup_quoting(text):\n \"\"\"\n Given text in \"news\" format, strip lines beginning with the quote\n characters > or |, plus lines that often introduce a quoted section\n (for example, because they contain the string 'writes:'.)\n\n Parameters\n ----------\n text : str\n The text from which to remove the signature block.\n \"\"\"\n good_lines = [line for line in text.split('\\n')\n if not _QUOTE_RE.search(line)]\n return '\\n'.join(good_lines)" + }, + { + "id": "scikit-learn/sklearn.datasets.setup/configuration", + "name": "configuration", + "qname": "sklearn.datasets.setup.configuration", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.datasets.setup/configuration/parent_package", + "name": "parent_package", + "qname": "sklearn.datasets.setup.configuration.parent_package", + "default_value": "''", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.datasets.setup/configuration/top_path", + "name": "top_path", + "qname": "sklearn.datasets.setup.configuration.top_path", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def configuration(parent_package='', top_path=None):\n from numpy.distutils.misc_util import Configuration\n config = Configuration('datasets', parent_package, top_path)\n config.add_data_dir('data')\n config.add_data_dir('descr')\n config.add_data_dir('images')\n config.add_data_dir(os.path.join('tests', 'data'))\n if platform.python_implementation() != 'PyPy':\n config.add_extension('_svmlight_format_fast',\n sources=['_svmlight_format_fast.pyx'],\n include_dirs=[numpy.get_include()])\n config.add_subpackage('tests')\n return config" + }, + { + "id": "scikit-learn/sklearn.decomposition._base/_BasePCA/fit", + "name": "fit", + "qname": "sklearn.decomposition._base._BasePCA.fit", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._base/_BasePCA/fit/self", + "name": "self", + "qname": "sklearn.decomposition._base._BasePCA.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._base/_BasePCA/fit/X", + "name": "X", + "qname": "sklearn.decomposition._base._BasePCA.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._base/_BasePCA/fit/y", + "name": "y", + "qname": "sklearn.decomposition._base._BasePCA.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Placeholder for fit. Subclasses should implement this method!\n\nFit the model with X.", + "docstring": "Placeholder for fit. Subclasses should implement this method!\n\nFit the model with X.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Training data, where n_samples is the number of samples and\n n_features is the number of features.\n\nReturns\n-------\nself : object\n Returns the instance itself.", + "code": " @abstractmethod\n def fit(self, X, y=None):\n \"\"\"Placeholder for fit. Subclasses should implement this method!\n\n Fit the model with X.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n Training data, where n_samples is the number of samples and\n n_features is the number of features.\n\n Returns\n -------\n self : object\n Returns the instance itself.\n \"\"\"" + }, + { + "id": "scikit-learn/sklearn.decomposition._base/_BasePCA/get_covariance", + "name": "get_covariance", + "qname": "sklearn.decomposition._base._BasePCA.get_covariance", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._base/_BasePCA/get_covariance/self", + "name": "self", + "qname": "sklearn.decomposition._base._BasePCA.get_covariance.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute data covariance with the generative model.\n\n``cov = components_.T * S**2 * components_ + sigma2 * eye(n_features)``\nwhere S**2 contains the explained variances, and sigma2 contains the\nnoise variances.", + "docstring": "Compute data covariance with the generative model.\n\n``cov = components_.T * S**2 * components_ + sigma2 * eye(n_features)``\nwhere S**2 contains the explained variances, and sigma2 contains the\nnoise variances.\n\nReturns\n-------\ncov : array, shape=(n_features, n_features)\n Estimated covariance of data.", + "code": " def get_covariance(self):\n \"\"\"Compute data covariance with the generative model.\n\n ``cov = components_.T * S**2 * components_ + sigma2 * eye(n_features)``\n where S**2 contains the explained variances, and sigma2 contains the\n noise variances.\n\n Returns\n -------\n cov : array, shape=(n_features, n_features)\n Estimated covariance of data.\n \"\"\"\n components_ = self.components_\n exp_var = self.explained_variance_\n if self.whiten:\n components_ = components_ * np.sqrt(exp_var[:, np.newaxis])\n exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.)\n cov = np.dot(components_.T * exp_var_diff, components_)\n cov.flat[::len(cov) + 1] += self.noise_variance_ # modify diag inplace\n return cov" + }, + { + "id": "scikit-learn/sklearn.decomposition._base/_BasePCA/get_precision", + "name": "get_precision", + "qname": "sklearn.decomposition._base._BasePCA.get_precision", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._base/_BasePCA/get_precision/self", + "name": "self", + "qname": "sklearn.decomposition._base._BasePCA.get_precision.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute data precision matrix with the generative model.\n\nEquals the inverse of the covariance but computed with\nthe matrix inversion lemma for efficiency.", + "docstring": "Compute data precision matrix with the generative model.\n\nEquals the inverse of the covariance but computed with\nthe matrix inversion lemma for efficiency.\n\nReturns\n-------\nprecision : array, shape=(n_features, n_features)\n Estimated precision of data.", + "code": " def get_precision(self):\n \"\"\"Compute data precision matrix with the generative model.\n\n Equals the inverse of the covariance but computed with\n the matrix inversion lemma for efficiency.\n\n Returns\n -------\n precision : array, shape=(n_features, n_features)\n Estimated precision of data.\n \"\"\"\n n_features = self.components_.shape[1]\n\n # handle corner cases first\n if self.n_components_ == 0:\n return np.eye(n_features) / self.noise_variance_\n if self.n_components_ == n_features:\n return linalg.inv(self.get_covariance())\n\n # Get precision using matrix inversion lemma\n components_ = self.components_\n exp_var = self.explained_variance_\n if self.whiten:\n components_ = components_ * np.sqrt(exp_var[:, np.newaxis])\n exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.)\n precision = np.dot(components_, components_.T) / self.noise_variance_\n precision.flat[::len(precision) + 1] += 1. / exp_var_diff\n precision = np.dot(components_.T,\n np.dot(linalg.inv(precision), components_))\n precision /= -(self.noise_variance_ ** 2)\n precision.flat[::len(precision) + 1] += 1. / self.noise_variance_\n return precision" + }, + { + "id": "scikit-learn/sklearn.decomposition._base/_BasePCA/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.decomposition._base._BasePCA.inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._base/_BasePCA/inverse_transform/self", + "name": "self", + "qname": "sklearn.decomposition._base._BasePCA.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._base/_BasePCA/inverse_transform/X", + "name": "X", + "qname": "sklearn.decomposition._base._BasePCA.inverse_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples, n_components)", + "default_value": "", + "description": "New data, where n_samples is the number of samples\nand n_components is the number of components." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_components)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform data back to its original space.\n\nIn other words, return an input X_original whose transform would be X.", + "docstring": "Transform data back to its original space.\n\nIn other words, return an input X_original whose transform would be X.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_components)\n New data, where n_samples is the number of samples\n and n_components is the number of components.\n\nReturns\n-------\nX_original array-like, shape (n_samples, n_features)\n\nNotes\n-----\nIf whitening is enabled, inverse_transform will compute the\nexact inverse operation, which includes reversing whitening.", + "code": " def inverse_transform(self, X):\n \"\"\"Transform data back to its original space.\n\n In other words, return an input X_original whose transform would be X.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_components)\n New data, where n_samples is the number of samples\n and n_components is the number of components.\n\n Returns\n -------\n X_original array-like, shape (n_samples, n_features)\n\n Notes\n -----\n If whitening is enabled, inverse_transform will compute the\n exact inverse operation, which includes reversing whitening.\n \"\"\"\n if self.whiten:\n return np.dot(X, np.sqrt(self.explained_variance_[:, np.newaxis]) *\n self.components_) + self.mean_\n else:\n return np.dot(X, self.components_) + self.mean_" + }, + { + "id": "scikit-learn/sklearn.decomposition._base/_BasePCA/transform", + "name": "transform", + "qname": "sklearn.decomposition._base._BasePCA.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._base/_BasePCA/transform/self", + "name": "self", + "qname": "sklearn.decomposition._base._BasePCA.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._base/_BasePCA/transform/X", + "name": "X", + "qname": "sklearn.decomposition._base._BasePCA.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples, n_features)", + "default_value": "", + "description": "New data, where n_samples is the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Apply dimensionality reduction to X.\n\nX is projected on the first principal components previously extracted\nfrom a training set.", + "docstring": "Apply dimensionality reduction to X.\n\nX is projected on the first principal components previously extracted\nfrom a training set.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n New data, where n_samples is the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)", + "code": " def transform(self, X):\n \"\"\"Apply dimensionality reduction to X.\n\n X is projected on the first principal components previously extracted\n from a training set.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n New data, where n_samples is the number of samples\n and n_features is the number of features.\n\n Returns\n -------\n X_new : array-like, shape (n_samples, n_components)\n \"\"\"\n check_is_fitted(self)\n\n X = self._validate_data(X, dtype=[np.float64, np.float32], reset=False)\n if self.mean_ is not None:\n X = X - self.mean_\n X_transformed = np.dot(X, self.components_.T)\n if self.whiten:\n X_transformed /= np.sqrt(self.explained_variance_)\n return X_transformed" + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/__init__", + "name": "__init__", + "qname": "sklearn.decomposition._dict_learning.DictionaryLearning.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/__init__/self", + "name": "self", + "qname": "sklearn.decomposition._dict_learning.DictionaryLearning.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/__init__/n_components", + "name": "n_components", + "qname": "sklearn.decomposition._dict_learning.DictionaryLearning.__init__.n_components", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "n_features", + "description": "Number of dictionary elements to extract." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/__init__/alpha", + "name": "alpha", + "qname": "sklearn.decomposition._dict_learning.DictionaryLearning.__init__.alpha", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Sparsity controlling parameter." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.decomposition._dict_learning.DictionaryLearning.__init__.max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "Maximum number of iterations to perform." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/__init__/tol", + "name": "tol", + "qname": "sklearn.decomposition._dict_learning.DictionaryLearning.__init__.tol", + "default_value": "1e-08", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-8", + "description": "Tolerance for numerical error." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/__init__/fit_algorithm", + "name": "fit_algorithm", + "qname": "sklearn.decomposition._dict_learning.DictionaryLearning.__init__.fit_algorithm", + "default_value": "'lars'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'lars', 'cd'}", + "default_value": "'lars'", + "description": "* `'lars'`: uses the least angle regression method to solve the lasso\n problem (:func:`~sklearn.linear_model.lars_path`);\n* `'cd'`: uses the coordinate descent method to compute the\n Lasso solution (:class:`~sklearn.linear_model.Lasso`). Lars will be\n faster if the estimated components are sparse.\n\n.. versionadded:: 0.17\n *cd* coordinate descent method to improve speed." + }, + "type": { + "kind": "EnumType", + "values": ["lars", "cd"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/__init__/transform_algorithm", + "name": "transform_algorithm", + "qname": "sklearn.decomposition._dict_learning.DictionaryLearning.__init__.transform_algorithm", + "default_value": "'omp'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}", + "default_value": "'omp'", + "description": "Algorithm used to transform the data:\n\n- `'lars'`: uses the least angle regression method\n (:func:`~sklearn.linear_model.lars_path`);\n- `'lasso_lars'`: uses Lars to compute the Lasso solution.\n- `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (:class:`~sklearn.linear_model.Lasso`). `'lasso_lars'`\n will be faster if the estimated components are sparse.\n- `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution.\n- `'threshold'`: squashes to zero all coefficients less than alpha from\n the projection ``dictionary * X'``.\n\n.. versionadded:: 0.17\n *lasso_cd* coordinate descent method to improve speed." + }, + "type": { + "kind": "EnumType", + "values": ["omp", "lasso_cd", "lasso_lars", "lars", "threshold"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/__init__/transform_n_nonzero_coefs", + "name": "transform_n_nonzero_coefs", + "qname": "sklearn.decomposition._dict_learning.DictionaryLearning.__init__.transform_n_nonzero_coefs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of nonzero coefficients to target in each column of the\nsolution. This is only used by `algorithm='lars'` and `algorithm='omp'`\nand is overridden by `alpha` in the `omp` case. If `None`, then\n`transform_n_nonzero_coefs=int(n_features / 10)`." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/__init__/transform_alpha", + "name": "transform_alpha", + "qname": "sklearn.decomposition._dict_learning.DictionaryLearning.__init__.transform_alpha", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\npenalty applied to the L1 norm.\nIf `algorithm='threshold'`, `alpha` is the absolute value of the\nthreshold below which coefficients will be squashed to zero.\nIf `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\nthe reconstruction error targeted. In this case, it overrides\n`n_nonzero_coefs`.\nIf `None`, default to 1.0" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.decomposition._dict_learning.DictionaryLearning.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or None", + "default_value": "None", + "description": "Number of parallel jobs to run.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/__init__/code_init", + "name": "code_init", + "qname": "sklearn.decomposition._dict_learning.DictionaryLearning.__init__.code_init", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_components)", + "default_value": "None", + "description": "Initial value for the code, for warm restart. Only used if `code_init`\nand `dict_init` are not None." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_components)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/__init__/dict_init", + "name": "dict_init", + "qname": "sklearn.decomposition._dict_learning.DictionaryLearning.__init__.dict_init", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_components, n_features)", + "default_value": "None", + "description": "Initial values for the dictionary, for warm restart. Only used if\n`code_init` and `dict_init` are not None." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_components, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/__init__/verbose", + "name": "verbose", + "qname": "sklearn.decomposition._dict_learning.DictionaryLearning.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "To control the verbosity of the procedure." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/__init__/split_sign", + "name": "split_sign", + "qname": "sklearn.decomposition._dict_learning.DictionaryLearning.__init__.split_sign", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to split the sparse feature vector into the concatenation of\nits negative part and its positive part. This can improve the\nperformance of downstream classifiers." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/__init__/random_state", + "name": "random_state", + "qname": "sklearn.decomposition._dict_learning.DictionaryLearning.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Used for initializing the dictionary when ``dict_init`` is not\nspecified, randomly shuffling the data when ``shuffle`` is set to\n``True``, and updating the dictionary. Pass an int for reproducible\nresults across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/__init__/positive_code", + "name": "positive_code", + "qname": "sklearn.decomposition._dict_learning.DictionaryLearning.__init__.positive_code", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to enforce positivity when finding the code.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/__init__/positive_dict", + "name": "positive_dict", + "qname": "sklearn.decomposition._dict_learning.DictionaryLearning.__init__.positive_dict", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to enforce positivity when finding the dictionary\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/__init__/transform_max_iter", + "name": "transform_max_iter", + "qname": "sklearn.decomposition._dict_learning.DictionaryLearning.__init__.transform_max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n`'lasso_lars'`.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Dictionary learning\n\nFinds a dictionary (a set of atoms) that can best be used to represent data\nusing a sparse code.\n\nSolves the optimization problem::\n\n (U^*,V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1\n (U,V)\n with || V_k ||_2 = 1 for all 0 <= k < n_components\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_components=None, *, alpha=1, max_iter=1000, tol=1e-8,\n fit_algorithm='lars', transform_algorithm='omp',\n transform_n_nonzero_coefs=None, transform_alpha=None,\n n_jobs=None, code_init=None, dict_init=None, verbose=False,\n split_sign=False, random_state=None, positive_code=False,\n positive_dict=False, transform_max_iter=1000):\n\n super().__init__(\n transform_algorithm, transform_n_nonzero_coefs,\n transform_alpha, split_sign, n_jobs, positive_code,\n transform_max_iter\n )\n self.n_components = n_components\n self.alpha = alpha\n self.max_iter = max_iter\n self.tol = tol\n self.fit_algorithm = fit_algorithm\n self.code_init = code_init\n self.dict_init = dict_init\n self.verbose = verbose\n self.random_state = random_state\n self.positive_dict = positive_dict" + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/fit", + "name": "fit", + "qname": "sklearn.decomposition._dict_learning.DictionaryLearning.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/fit/self", + "name": "self", + "qname": "sklearn.decomposition._dict_learning.DictionaryLearning.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/fit/X", + "name": "X", + "qname": "sklearn.decomposition._dict_learning.DictionaryLearning.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where `n_samples` in the number of samples\nand `n_features` is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/DictionaryLearning/fit/y", + "name": "y", + "qname": "sklearn.decomposition._dict_learning.DictionaryLearning.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model from data in X.", + "docstring": "Fit the model from data in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where `n_samples` in the number of samples\n and `n_features` is the number of features.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the object itself.", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the model from data in X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vector, where `n_samples` in the number of samples\n and `n_features` is the number of features.\n\n y : Ignored\n\n Returns\n -------\n self : object\n Returns the object itself.\n \"\"\"\n random_state = check_random_state(self.random_state)\n X = self._validate_data(X)\n if self.n_components is None:\n n_components = X.shape[1]\n else:\n n_components = self.n_components\n\n V, U, E, self.n_iter_ = dict_learning(\n X, n_components, alpha=self.alpha,\n tol=self.tol, max_iter=self.max_iter,\n method=self.fit_algorithm,\n method_max_iter=self.transform_max_iter,\n n_jobs=self.n_jobs,\n code_init=self.code_init,\n dict_init=self.dict_init,\n verbose=self.verbose,\n random_state=random_state,\n return_n_iter=True,\n positive_dict=self.positive_dict,\n positive_code=self.positive_code)\n self.components_ = U\n self.error_ = E\n return self" + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/__init__", + "name": "__init__", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/__init__/self", + "name": "self", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/__init__/n_components", + "name": "n_components", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.__init__.n_components", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of dictionary elements to extract." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/__init__/alpha", + "name": "alpha", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.__init__.alpha", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1", + "description": "Sparsity controlling parameter." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/__init__/n_iter", + "name": "n_iter", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.__init__.n_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "Total number of iterations to perform." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/__init__/fit_algorithm", + "name": "fit_algorithm", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.__init__.fit_algorithm", + "default_value": "'lars'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'lars', 'cd'}", + "default_value": "'lars'", + "description": "The algorithm used:\n\n- `'lars'`: uses the least angle regression method to solve the lasso\n problem (`linear_model.lars_path`)\n- `'cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). Lars will be faster if\n the estimated components are sparse." + }, + "type": { + "kind": "EnumType", + "values": ["lars", "cd"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of parallel jobs to run.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/__init__/batch_size", + "name": "batch_size", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.__init__.batch_size", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "3", + "description": "Number of samples in each mini-batch." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/__init__/shuffle", + "name": "shuffle", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.__init__.shuffle", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to shuffle the samples before forming batches." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/__init__/dict_init", + "name": "dict_init", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.__init__.dict_init", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_components, n_features)", + "default_value": "None", + "description": "initial value of the dictionary for warm restart scenarios" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_components, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/__init__/transform_algorithm", + "name": "transform_algorithm", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.__init__.transform_algorithm", + "default_value": "'omp'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}", + "default_value": "'omp'", + "description": "Algorithm used to transform the data:\n\n- `'lars'`: uses the least angle regression method\n (`linear_model.lars_path`);\n- `'lasso_lars'`: uses Lars to compute the Lasso solution.\n- `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). `'lasso_lars'` will be faster\n if the estimated components are sparse.\n- `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution.\n- `'threshold'`: squashes to zero all coefficients less than alpha from\n the projection ``dictionary * X'``." + }, + "type": { + "kind": "EnumType", + "values": ["omp", "lasso_cd", "lasso_lars", "lars", "threshold"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/__init__/transform_n_nonzero_coefs", + "name": "transform_n_nonzero_coefs", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.__init__.transform_n_nonzero_coefs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of nonzero coefficients to target in each column of the\nsolution. This is only used by `algorithm='lars'` and `algorithm='omp'`\nand is overridden by `alpha` in the `omp` case. If `None`, then\n`transform_n_nonzero_coefs=int(n_features / 10)`." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/__init__/transform_alpha", + "name": "transform_alpha", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.__init__.transform_alpha", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\npenalty applied to the L1 norm.\nIf `algorithm='threshold'`, `alpha` is the absolute value of the\nthreshold below which coefficients will be squashed to zero.\nIf `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\nthe reconstruction error targeted. In this case, it overrides\n`n_nonzero_coefs`.\nIf `None`, default to 1." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/__init__/verbose", + "name": "verbose", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "To control the verbosity of the procedure." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/__init__/split_sign", + "name": "split_sign", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.__init__.split_sign", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to split the sparse feature vector into the concatenation of\nits negative part and its positive part. This can improve the\nperformance of downstream classifiers." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/__init__/random_state", + "name": "random_state", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Used for initializing the dictionary when ``dict_init`` is not\nspecified, randomly shuffling the data when ``shuffle`` is set to\n``True``, and updating the dictionary. Pass an int for reproducible\nresults across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/__init__/positive_code", + "name": "positive_code", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.__init__.positive_code", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to enforce positivity when finding the code.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/__init__/positive_dict", + "name": "positive_dict", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.__init__.positive_dict", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to enforce positivity when finding the dictionary.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/__init__/transform_max_iter", + "name": "transform_max_iter", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.__init__.transform_max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n`'lasso_lars'`.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Mini-batch dictionary learning\n\nFinds a dictionary (a set of atoms) that can best be used to represent data\nusing a sparse code.\n\nSolves the optimization problem::\n\n (U^*,V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1\n (U,V)\n with || V_k ||_2 = 1 for all 0 <= k < n_components\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_components=None, *, alpha=1, n_iter=1000,\n fit_algorithm='lars', n_jobs=None, batch_size=3, shuffle=True,\n dict_init=None, transform_algorithm='omp',\n transform_n_nonzero_coefs=None, transform_alpha=None,\n verbose=False, split_sign=False, random_state=None,\n positive_code=False, positive_dict=False,\n transform_max_iter=1000):\n\n super().__init__(\n transform_algorithm, transform_n_nonzero_coefs, transform_alpha,\n split_sign, n_jobs, positive_code, transform_max_iter\n )\n self.n_components = n_components\n self.alpha = alpha\n self.n_iter = n_iter\n self.fit_algorithm = fit_algorithm\n self.dict_init = dict_init\n self.verbose = verbose\n self.shuffle = shuffle\n self.batch_size = batch_size\n self.split_sign = split_sign\n self.random_state = random_state\n self.positive_dict = positive_dict" + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/fit", + "name": "fit", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/fit/self", + "name": "self", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/fit/X", + "name": "X", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples in the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/fit/y", + "name": "y", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model from data in X.", + "docstring": "Fit the model from data in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the instance itself.", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the model from data in X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\n y : Ignored\n\n Returns\n -------\n self : object\n Returns the instance itself.\n \"\"\"\n random_state = check_random_state(self.random_state)\n X = self._validate_data(X)\n\n U, (A, B), self.n_iter_ = dict_learning_online(\n X, self.n_components, alpha=self.alpha,\n n_iter=self.n_iter, return_code=False,\n method=self.fit_algorithm,\n method_max_iter=self.transform_max_iter,\n n_jobs=self.n_jobs, dict_init=self.dict_init,\n batch_size=self.batch_size, shuffle=self.shuffle,\n verbose=self.verbose, random_state=random_state,\n return_inner_stats=True,\n return_n_iter=True,\n positive_dict=self.positive_dict,\n positive_code=self.positive_code)\n self.components_ = U\n # Keep track of the state of the algorithm to be able to do\n # some online fitting (partial_fit)\n self.inner_stats_ = (A, B)\n self.iter_offset_ = self.n_iter\n self.random_state_ = random_state\n return self" + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/partial_fit", + "name": "partial_fit", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.partial_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/partial_fit/self", + "name": "self", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/partial_fit/X", + "name": "X", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples in the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/partial_fit/y", + "name": "y", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.partial_fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/MiniBatchDictionaryLearning/partial_fit/iter_offset", + "name": "iter_offset", + "qname": "sklearn.decomposition._dict_learning.MiniBatchDictionaryLearning.partial_fit.iter_offset", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of iteration on data batches that has been\nperformed before this call to partial_fit. This is optional:\nif no number is passed, the memory of the object is\nused." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Updates the model using the data in X as a mini-batch.", + "docstring": "Updates the model using the data in X as a mini-batch.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\niter_offset : int, default=None\n The number of iteration on data batches that has been\n performed before this call to partial_fit. This is optional:\n if no number is passed, the memory of the object is\n used.\n\nReturns\n-------\nself : object\n Returns the instance itself.", + "code": " def partial_fit(self, X, y=None, iter_offset=None):\n \"\"\"Updates the model using the data in X as a mini-batch.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\n y : Ignored\n\n iter_offset : int, default=None\n The number of iteration on data batches that has been\n performed before this call to partial_fit. This is optional:\n if no number is passed, the memory of the object is\n used.\n\n Returns\n -------\n self : object\n Returns the instance itself.\n \"\"\"\n if not hasattr(self, 'random_state_'):\n self.random_state_ = check_random_state(self.random_state)\n if hasattr(self, 'components_'):\n dict_init = self.components_\n else:\n dict_init = self.dict_init\n inner_stats = getattr(self, 'inner_stats_', None)\n if iter_offset is None:\n iter_offset = getattr(self, 'iter_offset_', 0)\n X = self._validate_data(X, reset=(iter_offset == 0))\n U, (A, B) = dict_learning_online(\n X, self.n_components, alpha=self.alpha,\n n_iter=1, method=self.fit_algorithm,\n method_max_iter=self.transform_max_iter,\n n_jobs=self.n_jobs, dict_init=dict_init,\n batch_size=len(X), shuffle=False,\n verbose=self.verbose, return_code=False,\n iter_offset=iter_offset, random_state=self.random_state_,\n return_inner_stats=True, inner_stats=inner_stats,\n positive_dict=self.positive_dict,\n positive_code=self.positive_code)\n self.components_ = U\n\n # Keep track of the state of the algorithm to be able to do\n # some online fitting (partial_fit)\n self.inner_stats_ = (A, B)\n self.iter_offset_ = iter_offset + 1\n return self" + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/__init__", + "name": "__init__", + "qname": "sklearn.decomposition._dict_learning.SparseCoder.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/__init__/self", + "name": "self", + "qname": "sklearn.decomposition._dict_learning.SparseCoder.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/__init__/dictionary", + "name": "dictionary", + "qname": "sklearn.decomposition._dict_learning.SparseCoder.__init__.dictionary", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_components, n_features)", + "default_value": "", + "description": "The dictionary atoms used for sparse coding. Lines are assumed to be\nnormalized to unit norm." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_components, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/__init__/transform_algorithm", + "name": "transform_algorithm", + "qname": "sklearn.decomposition._dict_learning.SparseCoder.__init__.transform_algorithm", + "default_value": "'omp'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}", + "default_value": "'omp'", + "description": "Algorithm used to transform the data:\n\n- `'lars'`: uses the least angle regression method\n (`linear_model.lars_path`);\n- `'lasso_lars'`: uses Lars to compute the Lasso solution;\n- `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (linear_model.Lasso). `'lasso_lars'` will be faster if\n the estimated components are sparse;\n- `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution;\n- `'threshold'`: squashes to zero all coefficients less than alpha from\n the projection ``dictionary * X'``." + }, + "type": { + "kind": "EnumType", + "values": ["omp", "lasso_cd", "lasso_lars", "lars", "threshold"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/__init__/transform_n_nonzero_coefs", + "name": "transform_n_nonzero_coefs", + "qname": "sklearn.decomposition._dict_learning.SparseCoder.__init__.transform_n_nonzero_coefs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of nonzero coefficients to target in each column of the\nsolution. This is only used by `algorithm='lars'` and `algorithm='omp'`\nand is overridden by `alpha` in the `omp` case. If `None`, then\n`transform_n_nonzero_coefs=int(n_features / 10)`." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/__init__/transform_alpha", + "name": "transform_alpha", + "qname": "sklearn.decomposition._dict_learning.SparseCoder.__init__.transform_alpha", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\npenalty applied to the L1 norm.\nIf `algorithm='threshold'`, `alpha` is the absolute value of the\nthreshold below which coefficients will be squashed to zero.\nIf `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\nthe reconstruction error targeted. In this case, it overrides\n`n_nonzero_coefs`.\nIf `None`, default to 1." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/__init__/split_sign", + "name": "split_sign", + "qname": "sklearn.decomposition._dict_learning.SparseCoder.__init__.split_sign", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to split the sparse feature vector into the concatenation of\nits negative part and its positive part. This can improve the\nperformance of downstream classifiers." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.decomposition._dict_learning.SparseCoder.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of parallel jobs to run.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/__init__/positive_code", + "name": "positive_code", + "qname": "sklearn.decomposition._dict_learning.SparseCoder.__init__.positive_code", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to enforce positivity when finding the code.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/__init__/transform_max_iter", + "name": "transform_max_iter", + "qname": "sklearn.decomposition._dict_learning.SparseCoder.__init__.transform_max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n`lasso_lars`.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Sparse coding\n\nFinds a sparse representation of data against a fixed, precomputed\ndictionary.\n\nEach row of the result is the solution to a sparse coding problem.\nThe goal is to find a sparse array `code` such that::\n\n X ~= code * dictionary\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, dictionary, *, transform_algorithm='omp',\n transform_n_nonzero_coefs=None, transform_alpha=None,\n split_sign=False, n_jobs=None, positive_code=False,\n transform_max_iter=1000):\n super().__init__(\n transform_algorithm, transform_n_nonzero_coefs,\n transform_alpha, split_sign, n_jobs, positive_code,\n transform_max_iter\n )\n self.dictionary = dictionary" + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/_more_tags", + "name": "_more_tags", + "qname": "sklearn.decomposition._dict_learning.SparseCoder._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/_more_tags/self", + "name": "self", + "qname": "sklearn.decomposition._dict_learning.SparseCoder._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\"requires_fit\": False}" + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/components_@getter", + "name": "components_", + "qname": "sklearn.decomposition._dict_learning.SparseCoder.components_", + "decorators": [ + "deprecated(\"The attribute 'components_' is deprecated in 0.24 and will be removed in 1.1 (renaming of 0.26). Use the 'dictionary' instead.\")", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/components_/self", + "name": "self", + "qname": "sklearn.decomposition._dict_learning.SparseCoder.components_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated(\"The attribute 'components_' is deprecated \" # type: ignore\n \"in 0.24 and will be removed in 1.1 (renaming of 0.26). Use \"\n \"the 'dictionary' instead.\")\n @property\n def components_(self):\n return self.dictionary" + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/fit", + "name": "fit", + "qname": "sklearn.decomposition._dict_learning.SparseCoder.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/fit/self", + "name": "self", + "qname": "sklearn.decomposition._dict_learning.SparseCoder.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/fit/X", + "name": "X", + "qname": "sklearn.decomposition._dict_learning.SparseCoder.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/fit/y", + "name": "y", + "qname": "sklearn.decomposition._dict_learning.SparseCoder.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Do nothing and return the estimator unchanged.\n\nThis method is just there to implement the usual API and hence\nwork in pipelines.", + "docstring": "Do nothing and return the estimator unchanged.\n\nThis method is just there to implement the usual API and hence\nwork in pipelines.\n\nParameters\n----------\nX : Ignored\n\ny : Ignored\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y=None):\n \"\"\"Do nothing and return the estimator unchanged.\n\n This method is just there to implement the usual API and hence\n work in pipelines.\n\n Parameters\n ----------\n X : Ignored\n\n y : Ignored\n\n Returns\n -------\n self : object\n \"\"\"\n return self" + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/n_components_@getter", + "name": "n_components_", + "qname": "sklearn.decomposition._dict_learning.SparseCoder.n_components_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/n_components_/self", + "name": "self", + "qname": "sklearn.decomposition._dict_learning.SparseCoder.n_components_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def n_components_(self):\n return self.dictionary.shape[0]" + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/n_features_in_@getter", + "name": "n_features_in_", + "qname": "sklearn.decomposition._dict_learning.SparseCoder.n_features_in_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/n_features_in_/self", + "name": "self", + "qname": "sklearn.decomposition._dict_learning.SparseCoder.n_features_in_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def n_features_in_(self):\n return self.dictionary.shape[1]" + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/transform", + "name": "transform", + "qname": "sklearn.decomposition._dict_learning.SparseCoder.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/transform/self", + "name": "self", + "qname": "sklearn.decomposition._dict_learning.SparseCoder.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/transform/X", + "name": "X", + "qname": "sklearn.decomposition._dict_learning.SparseCoder.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Test data to be transformed, must have the same number of\nfeatures as the data used to train the model." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/SparseCoder/transform/y", + "name": "y", + "qname": "sklearn.decomposition._dict_learning.SparseCoder.transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Encode the data as a sparse combination of the dictionary atoms.\n\nCoding method is determined by the object parameter\n`transform_algorithm`.", + "docstring": "Encode the data as a sparse combination of the dictionary atoms.\n\nCoding method is determined by the object parameter\n`transform_algorithm`.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Test data to be transformed, must have the same number of\n features as the data used to train the model.\n\ny : Ignored\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Transformed data.", + "code": " def transform(self, X, y=None):\n \"\"\"Encode the data as a sparse combination of the dictionary atoms.\n\n Coding method is determined by the object parameter\n `transform_algorithm`.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Test data to be transformed, must have the same number of\n features as the data used to train the model.\n\n y : Ignored\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n Transformed data.\n \"\"\"\n return super()._transform(X, self.dictionary)" + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_BaseSparseCoding/__init__", + "name": "__init__", + "qname": "sklearn.decomposition._dict_learning._BaseSparseCoding.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_BaseSparseCoding/__init__/self", + "name": "self", + "qname": "sklearn.decomposition._dict_learning._BaseSparseCoding.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_BaseSparseCoding/__init__/transform_algorithm", + "name": "transform_algorithm", + "qname": "sklearn.decomposition._dict_learning._BaseSparseCoding.__init__.transform_algorithm", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_BaseSparseCoding/__init__/transform_n_nonzero_coefs", + "name": "transform_n_nonzero_coefs", + "qname": "sklearn.decomposition._dict_learning._BaseSparseCoding.__init__.transform_n_nonzero_coefs", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_BaseSparseCoding/__init__/transform_alpha", + "name": "transform_alpha", + "qname": "sklearn.decomposition._dict_learning._BaseSparseCoding.__init__.transform_alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_BaseSparseCoding/__init__/split_sign", + "name": "split_sign", + "qname": "sklearn.decomposition._dict_learning._BaseSparseCoding.__init__.split_sign", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_BaseSparseCoding/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.decomposition._dict_learning._BaseSparseCoding.__init__.n_jobs", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_BaseSparseCoding/__init__/positive_code", + "name": "positive_code", + "qname": "sklearn.decomposition._dict_learning._BaseSparseCoding.__init__.positive_code", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_BaseSparseCoding/__init__/transform_max_iter", + "name": "transform_max_iter", + "qname": "sklearn.decomposition._dict_learning._BaseSparseCoding.__init__.transform_max_iter", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base class from SparseCoder and DictionaryLearning algorithms.", + "docstring": "", + "code": " def __init__(self, transform_algorithm, transform_n_nonzero_coefs,\n transform_alpha, split_sign, n_jobs, positive_code,\n transform_max_iter):\n self.transform_algorithm = transform_algorithm\n self.transform_n_nonzero_coefs = transform_n_nonzero_coefs\n self.transform_alpha = transform_alpha\n self.transform_max_iter = transform_max_iter\n self.split_sign = split_sign\n self.n_jobs = n_jobs\n self.positive_code = positive_code" + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_BaseSparseCoding/_transform", + "name": "_transform", + "qname": "sklearn.decomposition._dict_learning._BaseSparseCoding._transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_BaseSparseCoding/_transform/self", + "name": "self", + "qname": "sklearn.decomposition._dict_learning._BaseSparseCoding._transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_BaseSparseCoding/_transform/X", + "name": "X", + "qname": "sklearn.decomposition._dict_learning._BaseSparseCoding._transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_BaseSparseCoding/_transform/dictionary", + "name": "dictionary", + "qname": "sklearn.decomposition._dict_learning._BaseSparseCoding._transform.dictionary", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Private method allowing to accomodate both DictionaryLearning and\nSparseCoder.", + "docstring": "Private method allowing to accomodate both DictionaryLearning and\nSparseCoder.", + "code": " def _transform(self, X, dictionary):\n \"\"\"Private method allowing to accomodate both DictionaryLearning and\n SparseCoder.\"\"\"\n X = self._validate_data(X, reset=False)\n\n code = sparse_encode(\n X, dictionary, algorithm=self.transform_algorithm,\n n_nonzero_coefs=self.transform_n_nonzero_coefs,\n alpha=self.transform_alpha, max_iter=self.transform_max_iter,\n n_jobs=self.n_jobs, positive=self.positive_code)\n\n if self.split_sign:\n # feature vector is split into a positive and negative side\n n_samples, n_features = code.shape\n split_code = np.empty((n_samples, 2 * n_features))\n split_code[:, :n_features] = np.maximum(code, 0)\n split_code[:, n_features:] = -np.minimum(code, 0)\n code = split_code\n\n return code" + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_BaseSparseCoding/transform", + "name": "transform", + "qname": "sklearn.decomposition._dict_learning._BaseSparseCoding.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_BaseSparseCoding/transform/self", + "name": "self", + "qname": "sklearn.decomposition._dict_learning._BaseSparseCoding.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_BaseSparseCoding/transform/X", + "name": "X", + "qname": "sklearn.decomposition._dict_learning._BaseSparseCoding.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Test data to be transformed, must have the same number of\nfeatures as the data used to train the model." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Encode the data as a sparse combination of the dictionary atoms.\n\nCoding method is determined by the object parameter\n`transform_algorithm`.", + "docstring": "Encode the data as a sparse combination of the dictionary atoms.\n\nCoding method is determined by the object parameter\n`transform_algorithm`.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Test data to be transformed, must have the same number of\n features as the data used to train the model.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Transformed data.", + "code": " def transform(self, X):\n \"\"\"Encode the data as a sparse combination of the dictionary atoms.\n\n Coding method is determined by the object parameter\n `transform_algorithm`.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Test data to be transformed, must have the same number of\n features as the data used to train the model.\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n Transformed data.\n \"\"\"\n check_is_fitted(self)\n return self._transform(X, self.components_)" + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_check_positive_coding", + "name": "_check_positive_coding", + "qname": "sklearn.decomposition._dict_learning._check_positive_coding", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_check_positive_coding/method", + "name": "method", + "qname": "sklearn.decomposition._dict_learning._check_positive_coding.method", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_check_positive_coding/positive", + "name": "positive", + "qname": "sklearn.decomposition._dict_learning._check_positive_coding.positive", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _check_positive_coding(method, positive):\n if positive and method in [\"omp\", \"lars\"]:\n raise ValueError(\n \"Positive constraint not supported for '{}' \"\n \"coding method.\".format(method)\n )" + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_sparse_encode", + "name": "_sparse_encode", + "qname": "sklearn.decomposition._dict_learning._sparse_encode", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_sparse_encode/X", + "name": "X", + "qname": "sklearn.decomposition._dict_learning._sparse_encode.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Data matrix." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_sparse_encode/dictionary", + "name": "dictionary", + "qname": "sklearn.decomposition._dict_learning._sparse_encode.dictionary", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_components, n_features)", + "default_value": "", + "description": "The dictionary matrix against which to solve the sparse coding of\nthe data. Some of the algorithms assume normalized rows." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_components, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_sparse_encode/gram", + "name": "gram", + "qname": "sklearn.decomposition._dict_learning._sparse_encode.gram", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_components, n_components) or None", + "default_value": "", + "description": "Precomputed Gram matrix, `dictionary * dictionary'`\ngram can be `None` if method is 'threshold'." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray of shape (n_components, n_components)" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_sparse_encode/cov", + "name": "cov", + "qname": "sklearn.decomposition._dict_learning._sparse_encode.cov", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_components, n_samples)", + "default_value": "None", + "description": "Precomputed covariance, `dictionary * X'`." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_components, n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_sparse_encode/algorithm", + "name": "algorithm", + "qname": "sklearn.decomposition._dict_learning._sparse_encode.algorithm", + "default_value": "'lasso_lars'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}", + "default_value": "'lasso_lars'", + "description": "The algorithm used:\n\n* `'lars'`: uses the least angle regression method\n (`linear_model.lars_path`);\n* `'lasso_lars'`: uses Lars to compute the Lasso solution;\n* `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). lasso_lars will be faster if\n the estimated components are sparse;\n* `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution;\n* `'threshold'`: squashes to zero all coefficients less than\n regularization from the projection `dictionary * data'`." + }, + "type": { + "kind": "EnumType", + "values": ["omp", "lasso_cd", "lasso_lars", "lars", "threshold"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_sparse_encode/regularization", + "name": "regularization", + "qname": "sklearn.decomposition._dict_learning._sparse_encode.regularization", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "None", + "description": "The regularization parameter. It corresponds to alpha when\nalgorithm is `'lasso_lars'`, `'lasso_cd'` or `'threshold'`.\nOtherwise it corresponds to `n_nonzero_coefs`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_sparse_encode/copy_cov", + "name": "copy_cov", + "qname": "sklearn.decomposition._dict_learning._sparse_encode.copy_cov", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to copy the precomputed covariance matrix; if `False`, it may\nbe overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_sparse_encode/init", + "name": "init", + "qname": "sklearn.decomposition._dict_learning._sparse_encode.init", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_components)", + "default_value": "None", + "description": "Initialization value of the sparse code. Only used if\n`algorithm='lasso_cd'`." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_components)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_sparse_encode/max_iter", + "name": "max_iter", + "qname": "sklearn.decomposition._dict_learning._sparse_encode.max_iter", + "default_value": "1000", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n`'lasso_lars'`." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_sparse_encode/check_input", + "name": "check_input", + "qname": "sklearn.decomposition._dict_learning._sparse_encode.check_input", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If `False`, the input arrays `X` and dictionary will not be checked." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_sparse_encode/verbose", + "name": "verbose", + "qname": "sklearn.decomposition._dict_learning._sparse_encode.verbose", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Controls the verbosity; the higher, the more messages." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_sparse_encode/positive", + "name": "positive", + "qname": "sklearn.decomposition._dict_learning._sparse_encode.positive", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generic sparse coding.\n\nEach column of the result is the solution to a Lasso problem.", + "docstring": "Generic sparse coding.\n\nEach column of the result is the solution to a Lasso problem.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Data matrix.\n\ndictionary : ndarray of shape (n_components, n_features)\n The dictionary matrix against which to solve the sparse coding of\n the data. Some of the algorithms assume normalized rows.\n\ngram : ndarray of shape (n_components, n_components) or None\n Precomputed Gram matrix, `dictionary * dictionary'`\n gram can be `None` if method is 'threshold'.\n\ncov : ndarray of shape (n_components, n_samples), default=None\n Precomputed covariance, `dictionary * X'`.\n\nalgorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}, default='lasso_lars'\n The algorithm used:\n\n * `'lars'`: uses the least angle regression method\n (`linear_model.lars_path`);\n * `'lasso_lars'`: uses Lars to compute the Lasso solution;\n * `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). lasso_lars will be faster if\n the estimated components are sparse;\n * `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution;\n * `'threshold'`: squashes to zero all coefficients less than\n regularization from the projection `dictionary * data'`.\n\nregularization : int or float, default=None\n The regularization parameter. It corresponds to alpha when\n algorithm is `'lasso_lars'`, `'lasso_cd'` or `'threshold'`.\n Otherwise it corresponds to `n_nonzero_coefs`.\n\ninit : ndarray of shape (n_samples, n_components), default=None\n Initialization value of the sparse code. Only used if\n `algorithm='lasso_cd'`.\n\nmax_iter : int, default=1000\n Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n `'lasso_lars'`.\n\ncopy_cov : bool, default=True\n Whether to copy the precomputed covariance matrix; if `False`, it may\n be overwritten.\n\ncheck_input : bool, default=True\n If `False`, the input arrays `X` and dictionary will not be checked.\n\nverbose : int, default=0\n Controls the verbosity; the higher, the more messages.\n\npositive: bool, default=False\n Whether to enforce a positivity constraint on the sparse code.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ncode : ndarray of shape (n_components, n_features)\n The sparse codes.\n\nSee Also\n--------\nsklearn.linear_model.lars_path\nsklearn.linear_model.orthogonal_mp\nsklearn.linear_model.Lasso\nSparseCoder", + "code": "def _sparse_encode(X, dictionary, gram, cov=None, algorithm='lasso_lars',\n regularization=None, copy_cov=True,\n init=None, max_iter=1000, check_input=True, verbose=0,\n positive=False):\n \"\"\"Generic sparse coding.\n\n Each column of the result is the solution to a Lasso problem.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Data matrix.\n\n dictionary : ndarray of shape (n_components, n_features)\n The dictionary matrix against which to solve the sparse coding of\n the data. Some of the algorithms assume normalized rows.\n\n gram : ndarray of shape (n_components, n_components) or None\n Precomputed Gram matrix, `dictionary * dictionary'`\n gram can be `None` if method is 'threshold'.\n\n cov : ndarray of shape (n_components, n_samples), default=None\n Precomputed covariance, `dictionary * X'`.\n\n algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}, \\\n default='lasso_lars'\n The algorithm used:\n\n * `'lars'`: uses the least angle regression method\n (`linear_model.lars_path`);\n * `'lasso_lars'`: uses Lars to compute the Lasso solution;\n * `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). lasso_lars will be faster if\n the estimated components are sparse;\n * `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution;\n * `'threshold'`: squashes to zero all coefficients less than\n regularization from the projection `dictionary * data'`.\n\n regularization : int or float, default=None\n The regularization parameter. It corresponds to alpha when\n algorithm is `'lasso_lars'`, `'lasso_cd'` or `'threshold'`.\n Otherwise it corresponds to `n_nonzero_coefs`.\n\n init : ndarray of shape (n_samples, n_components), default=None\n Initialization value of the sparse code. Only used if\n `algorithm='lasso_cd'`.\n\n max_iter : int, default=1000\n Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n `'lasso_lars'`.\n\n copy_cov : bool, default=True\n Whether to copy the precomputed covariance matrix; if `False`, it may\n be overwritten.\n\n check_input : bool, default=True\n If `False`, the input arrays `X` and dictionary will not be checked.\n\n verbose : int, default=0\n Controls the verbosity; the higher, the more messages.\n\n positive: bool, default=False\n Whether to enforce a positivity constraint on the sparse code.\n\n .. versionadded:: 0.20\n\n Returns\n -------\n code : ndarray of shape (n_components, n_features)\n The sparse codes.\n\n See Also\n --------\n sklearn.linear_model.lars_path\n sklearn.linear_model.orthogonal_mp\n sklearn.linear_model.Lasso\n SparseCoder\n \"\"\"\n if X.ndim == 1:\n X = X[:, np.newaxis]\n n_samples, n_features = X.shape\n n_components = dictionary.shape[0]\n if dictionary.shape[1] != X.shape[1]:\n raise ValueError(\"Dictionary and X have different numbers of features:\"\n \"dictionary.shape: {} X.shape{}\".format(\n dictionary.shape, X.shape))\n if cov is None and algorithm != 'lasso_cd':\n # overwriting cov is safe\n copy_cov = False\n cov = np.dot(dictionary, X.T)\n\n _check_positive_coding(algorithm, positive)\n\n if algorithm == 'lasso_lars':\n alpha = float(regularization) / n_features # account for scaling\n try:\n err_mgt = np.seterr(all='ignore')\n\n # Not passing in verbose=max(0, verbose-1) because Lars.fit already\n # corrects the verbosity level.\n lasso_lars = LassoLars(alpha=alpha, fit_intercept=False,\n verbose=verbose, normalize=False,\n precompute=gram, fit_path=False,\n positive=positive, max_iter=max_iter)\n lasso_lars.fit(dictionary.T, X.T, Xy=cov)\n new_code = lasso_lars.coef_\n finally:\n np.seterr(**err_mgt)\n\n elif algorithm == 'lasso_cd':\n alpha = float(regularization) / n_features # account for scaling\n\n # TODO: Make verbosity argument for Lasso?\n # sklearn.linear_model.coordinate_descent.enet_path has a verbosity\n # argument that we could pass in from Lasso.\n clf = Lasso(alpha=alpha, fit_intercept=False, normalize=False,\n precompute=gram, max_iter=max_iter, warm_start=True,\n positive=positive)\n\n if init is not None:\n clf.coef_ = init\n\n clf.fit(dictionary.T, X.T, check_input=check_input)\n new_code = clf.coef_\n\n elif algorithm == 'lars':\n try:\n err_mgt = np.seterr(all='ignore')\n\n # Not passing in verbose=max(0, verbose-1) because Lars.fit already\n # corrects the verbosity level.\n lars = Lars(fit_intercept=False, verbose=verbose, normalize=False,\n precompute=gram, n_nonzero_coefs=int(regularization),\n fit_path=False)\n lars.fit(dictionary.T, X.T, Xy=cov)\n new_code = lars.coef_\n finally:\n np.seterr(**err_mgt)\n\n elif algorithm == 'threshold':\n new_code = ((np.sign(cov) *\n np.maximum(np.abs(cov) - regularization, 0)).T)\n if positive:\n np.clip(new_code, 0, None, out=new_code)\n\n elif algorithm == 'omp':\n new_code = orthogonal_mp_gram(\n Gram=gram, Xy=cov, n_nonzero_coefs=int(regularization),\n tol=None, norms_squared=row_norms(X, squared=True),\n copy_Xy=copy_cov).T\n else:\n raise ValueError('Sparse coding method must be \"lasso_lars\" '\n '\"lasso_cd\", \"lasso\", \"threshold\" or \"omp\", got %s.'\n % algorithm)\n if new_code.ndim != 2:\n return new_code.reshape(n_samples, n_components)\n return new_code" + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_update_dict", + "name": "_update_dict", + "qname": "sklearn.decomposition._dict_learning._update_dict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_update_dict/dictionary", + "name": "dictionary", + "qname": "sklearn.decomposition._dict_learning._update_dict.dictionary", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features, n_components)", + "default_value": "", + "description": "Value of the dictionary at the previous iteration." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features, n_components)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_update_dict/Y", + "name": "Y", + "qname": "sklearn.decomposition._dict_learning._update_dict.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features, n_samples)", + "default_value": "", + "description": "Data matrix." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features, n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_update_dict/code", + "name": "code", + "qname": "sklearn.decomposition._dict_learning._update_dict.code", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_components, n_samples)", + "default_value": "", + "description": "Sparse coding of the data against which to optimize the dictionary." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_components, n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_update_dict/verbose", + "name": "verbose", + "qname": "sklearn.decomposition._dict_learning._update_dict.verbose", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_update_dict/return_r2", + "name": "return_r2", + "qname": "sklearn.decomposition._dict_learning._update_dict.return_r2", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to compute and return the residual sum of squares corresponding\nto the computed solution." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_update_dict/random_state", + "name": "random_state", + "qname": "sklearn.decomposition._dict_learning._update_dict.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Used for randomly initializing the dictionary. Pass an int for\nreproducible results across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/_update_dict/positive", + "name": "positive", + "qname": "sklearn.decomposition._dict_learning._update_dict.positive", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to enforce positivity when finding the dictionary.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Update the dense dictionary factor in place.", + "docstring": "Update the dense dictionary factor in place.\n\nParameters\n----------\ndictionary : ndarray of shape (n_features, n_components)\n Value of the dictionary at the previous iteration.\n\nY : ndarray of shape (n_features, n_samples)\n Data matrix.\n\ncode : ndarray of shape (n_components, n_samples)\n Sparse coding of the data against which to optimize the dictionary.\n\nverbose: bool, default=False\n Degree of output the procedure will print.\n\nreturn_r2 : bool, default=False\n Whether to compute and return the residual sum of squares corresponding\n to the computed solution.\n\nrandom_state : int, RandomState instance or None, default=None\n Used for randomly initializing the dictionary. Pass an int for\n reproducible results across multiple function calls.\n See :term:`Glossary `.\n\npositive : bool, default=False\n Whether to enforce positivity when finding the dictionary.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ndictionary : ndarray of shape (n_features, n_components)\n Updated dictionary.", + "code": "def _update_dict(dictionary, Y, code, verbose=False, return_r2=False,\n random_state=None, positive=False):\n \"\"\"Update the dense dictionary factor in place.\n\n Parameters\n ----------\n dictionary : ndarray of shape (n_features, n_components)\n Value of the dictionary at the previous iteration.\n\n Y : ndarray of shape (n_features, n_samples)\n Data matrix.\n\n code : ndarray of shape (n_components, n_samples)\n Sparse coding of the data against which to optimize the dictionary.\n\n verbose: bool, default=False\n Degree of output the procedure will print.\n\n return_r2 : bool, default=False\n Whether to compute and return the residual sum of squares corresponding\n to the computed solution.\n\n random_state : int, RandomState instance or None, default=None\n Used for randomly initializing the dictionary. Pass an int for\n reproducible results across multiple function calls.\n See :term:`Glossary `.\n\n positive : bool, default=False\n Whether to enforce positivity when finding the dictionary.\n\n .. versionadded:: 0.20\n\n Returns\n -------\n dictionary : ndarray of shape (n_features, n_components)\n Updated dictionary.\n \"\"\"\n n_components = len(code)\n n_features = Y.shape[0]\n random_state = check_random_state(random_state)\n # Get BLAS functions\n gemm, = linalg.get_blas_funcs(('gemm',), (dictionary, code, Y))\n ger, = linalg.get_blas_funcs(('ger',), (dictionary, code))\n nrm2, = linalg.get_blas_funcs(('nrm2',), (dictionary,))\n # Residuals, computed with BLAS for speed and efficiency\n # R <- -1.0 * U * V^T + 1.0 * Y\n # Outputs R as Fortran array for efficiency\n R = gemm(-1.0, dictionary, code, 1.0, Y)\n for k in range(n_components):\n # R <- 1.0 * U_k * V_k^T + R\n R = ger(1.0, dictionary[:, k], code[k, :], a=R, overwrite_a=True)\n dictionary[:, k] = np.dot(R, code[k, :])\n if positive:\n np.clip(dictionary[:, k], 0, None, out=dictionary[:, k])\n # Scale k'th atom\n # (U_k * U_k) ** 0.5\n atom_norm = nrm2(dictionary[:, k])\n if atom_norm < 1e-10:\n if verbose == 1:\n sys.stdout.write(\"+\")\n sys.stdout.flush()\n elif verbose:\n print(\"Adding new random atom\")\n dictionary[:, k] = random_state.randn(n_features)\n if positive:\n np.clip(dictionary[:, k], 0, None, out=dictionary[:, k])\n # Setting corresponding coefs to 0\n code[k, :] = 0.0\n # (U_k * U_k) ** 0.5\n atom_norm = nrm2(dictionary[:, k])\n dictionary[:, k] /= atom_norm\n else:\n dictionary[:, k] /= atom_norm\n # R <- -1.0 * U_k * V_k^T + R\n R = ger(-1.0, dictionary[:, k], code[k, :], a=R, overwrite_a=True)\n if return_r2:\n R = nrm2(R) ** 2.0\n return dictionary, R\n return dictionary" + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning", + "name": "dict_learning", + "qname": "sklearn.decomposition._dict_learning.dict_learning", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning/X", + "name": "X", + "qname": "sklearn.decomposition._dict_learning.dict_learning.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Data matrix." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning/n_components", + "name": "n_components", + "qname": "sklearn.decomposition._dict_learning.dict_learning.n_components", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of dictionary atoms to extract." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning/alpha", + "name": "alpha", + "qname": "sklearn.decomposition._dict_learning.dict_learning.alpha", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Sparsity controlling parameter." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning/max_iter", + "name": "max_iter", + "qname": "sklearn.decomposition._dict_learning.dict_learning.max_iter", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Maximum number of iterations to perform." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning/tol", + "name": "tol", + "qname": "sklearn.decomposition._dict_learning.dict_learning.tol", + "default_value": "1e-08", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-8", + "description": "Tolerance for the stopping condition." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning/method", + "name": "method", + "qname": "sklearn.decomposition._dict_learning.dict_learning.method", + "default_value": "'lars'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'lars', 'cd'}", + "default_value": "'lars'", + "description": "The method used:\n\n* `'lars'`: uses the least angle regression method to solve the lasso\n problem (`linear_model.lars_path`);\n* `'cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). Lars will be faster if\n the estimated components are sparse." + }, + "type": { + "kind": "EnumType", + "values": ["lars", "cd"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning/n_jobs", + "name": "n_jobs", + "qname": "sklearn.decomposition._dict_learning.dict_learning.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of parallel jobs to run.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning/dict_init", + "name": "dict_init", + "qname": "sklearn.decomposition._dict_learning.dict_learning.dict_init", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_components, n_features)", + "default_value": "None", + "description": "Initial value for the dictionary for warm restart scenarios. Only used\nif `code_init` and `dict_init` are not None." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_components, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning/code_init", + "name": "code_init", + "qname": "sklearn.decomposition._dict_learning.dict_learning.code_init", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_components)", + "default_value": "None", + "description": "Initial value for the sparse code for warm restart scenarios. Only used\nif `code_init` and `dict_init` are not None." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_components)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning/callback", + "name": "callback", + "qname": "sklearn.decomposition._dict_learning.dict_learning.callback", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "None", + "description": "Callable that gets invoked every five iterations" + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning/verbose", + "name": "verbose", + "qname": "sklearn.decomposition._dict_learning.dict_learning.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "To control the verbosity of the procedure." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning/random_state", + "name": "random_state", + "qname": "sklearn.decomposition._dict_learning.dict_learning.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Used for randomly initializing the dictionary. Pass an int for\nreproducible results across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning/return_n_iter", + "name": "return_n_iter", + "qname": "sklearn.decomposition._dict_learning.dict_learning.return_n_iter", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether or not to return the number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning/positive_dict", + "name": "positive_dict", + "qname": "sklearn.decomposition._dict_learning.dict_learning.positive_dict", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to enforce positivity when finding the dictionary.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning/positive_code", + "name": "positive_code", + "qname": "sklearn.decomposition._dict_learning.dict_learning.positive_code", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to enforce positivity when finding the code.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning/method_max_iter", + "name": "method_max_iter", + "qname": "sklearn.decomposition._dict_learning.dict_learning.method_max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "Maximum number of iterations to perform.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Solves a dictionary learning matrix factorization problem.\n\nFinds the best dictionary and the corresponding sparse code for\napproximating the data matrix X by solving::\n\n (U^*, V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1\n (U,V)\n with || V_k ||_2 = 1 for all 0 <= k < n_components\n\nwhere V is the dictionary and U is the sparse code.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Solves a dictionary learning matrix factorization problem.\n\nFinds the best dictionary and the corresponding sparse code for\napproximating the data matrix X by solving::\n\n (U^*, V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1\n (U,V)\n with || V_k ||_2 = 1 for all 0 <= k < n_components\n\nwhere V is the dictionary and U is the sparse code.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Data matrix.\n\nn_components : int\n Number of dictionary atoms to extract.\n\nalpha : int\n Sparsity controlling parameter.\n\nmax_iter : int, default=100\n Maximum number of iterations to perform.\n\ntol : float, default=1e-8\n Tolerance for the stopping condition.\n\nmethod : {'lars', 'cd'}, default='lars'\n The method used:\n\n * `'lars'`: uses the least angle regression method to solve the lasso\n problem (`linear_model.lars_path`);\n * `'cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). Lars will be faster if\n the estimated components are sparse.\n\nn_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\ndict_init : ndarray of shape (n_components, n_features), default=None\n Initial value for the dictionary for warm restart scenarios. Only used\n if `code_init` and `dict_init` are not None.\n\ncode_init : ndarray of shape (n_samples, n_components), default=None\n Initial value for the sparse code for warm restart scenarios. Only used\n if `code_init` and `dict_init` are not None.\n\ncallback : callable, default=None\n Callable that gets invoked every five iterations\n\nverbose : bool, default=False\n To control the verbosity of the procedure.\n\nrandom_state : int, RandomState instance or None, default=None\n Used for randomly initializing the dictionary. Pass an int for\n reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\npositive_dict : bool, default=False\n Whether to enforce positivity when finding the dictionary.\n\n .. versionadded:: 0.20\n\npositive_code : bool, default=False\n Whether to enforce positivity when finding the code.\n\n .. versionadded:: 0.20\n\nmethod_max_iter : int, default=1000\n Maximum number of iterations to perform.\n\n .. versionadded:: 0.22\n\nReturns\n-------\ncode : ndarray of shape (n_samples, n_components)\n The sparse code factor in the matrix factorization.\n\ndictionary : ndarray of shape (n_components, n_features),\n The dictionary factor in the matrix factorization.\n\nerrors : array\n Vector of errors at each iteration.\n\nn_iter : int\n Number of iterations run. Returned only if `return_n_iter` is\n set to True.\n\nSee Also\n--------\ndict_learning_online\nDictionaryLearning\nMiniBatchDictionaryLearning\nSparsePCA\nMiniBatchSparsePCA", + "code": "@_deprecate_positional_args\ndef dict_learning(X, n_components, *, alpha, max_iter=100, tol=1e-8,\n method='lars', n_jobs=None, dict_init=None, code_init=None,\n callback=None, verbose=False, random_state=None,\n return_n_iter=False, positive_dict=False,\n positive_code=False, method_max_iter=1000):\n \"\"\"Solves a dictionary learning matrix factorization problem.\n\n Finds the best dictionary and the corresponding sparse code for\n approximating the data matrix X by solving::\n\n (U^*, V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1\n (U,V)\n with || V_k ||_2 = 1 for all 0 <= k < n_components\n\n where V is the dictionary and U is the sparse code.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Data matrix.\n\n n_components : int\n Number of dictionary atoms to extract.\n\n alpha : int\n Sparsity controlling parameter.\n\n max_iter : int, default=100\n Maximum number of iterations to perform.\n\n tol : float, default=1e-8\n Tolerance for the stopping condition.\n\n method : {'lars', 'cd'}, default='lars'\n The method used:\n\n * `'lars'`: uses the least angle regression method to solve the lasso\n problem (`linear_model.lars_path`);\n * `'cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). Lars will be faster if\n the estimated components are sparse.\n\n n_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n dict_init : ndarray of shape (n_components, n_features), default=None\n Initial value for the dictionary for warm restart scenarios. Only used\n if `code_init` and `dict_init` are not None.\n\n code_init : ndarray of shape (n_samples, n_components), default=None\n Initial value for the sparse code for warm restart scenarios. Only used\n if `code_init` and `dict_init` are not None.\n\n callback : callable, default=None\n Callable that gets invoked every five iterations\n\n verbose : bool, default=False\n To control the verbosity of the procedure.\n\n random_state : int, RandomState instance or None, default=None\n Used for randomly initializing the dictionary. Pass an int for\n reproducible results across multiple function calls.\n See :term:`Glossary `.\n\n return_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\n positive_dict : bool, default=False\n Whether to enforce positivity when finding the dictionary.\n\n .. versionadded:: 0.20\n\n positive_code : bool, default=False\n Whether to enforce positivity when finding the code.\n\n .. versionadded:: 0.20\n\n method_max_iter : int, default=1000\n Maximum number of iterations to perform.\n\n .. versionadded:: 0.22\n\n Returns\n -------\n code : ndarray of shape (n_samples, n_components)\n The sparse code factor in the matrix factorization.\n\n dictionary : ndarray of shape (n_components, n_features),\n The dictionary factor in the matrix factorization.\n\n errors : array\n Vector of errors at each iteration.\n\n n_iter : int\n Number of iterations run. Returned only if `return_n_iter` is\n set to True.\n\n See Also\n --------\n dict_learning_online\n DictionaryLearning\n MiniBatchDictionaryLearning\n SparsePCA\n MiniBatchSparsePCA\n \"\"\"\n if method not in ('lars', 'cd'):\n raise ValueError('Coding method %r not supported as a fit algorithm.'\n % method)\n\n _check_positive_coding(method, positive_code)\n\n method = 'lasso_' + method\n\n t0 = time.time()\n # Avoid integer division problems\n alpha = float(alpha)\n random_state = check_random_state(random_state)\n\n # Init the code and the dictionary with SVD of Y\n if code_init is not None and dict_init is not None:\n code = np.array(code_init, order='F')\n # Don't copy V, it will happen below\n dictionary = dict_init\n else:\n code, S, dictionary = linalg.svd(X, full_matrices=False)\n dictionary = S[:, np.newaxis] * dictionary\n r = len(dictionary)\n if n_components <= r: # True even if n_components=None\n code = code[:, :n_components]\n dictionary = dictionary[:n_components, :]\n else:\n code = np.c_[code, np.zeros((len(code), n_components - r))]\n dictionary = np.r_[dictionary,\n np.zeros((n_components - r, dictionary.shape[1]))]\n\n # Fortran-order dict, as we are going to access its row vectors\n dictionary = np.array(dictionary, order='F')\n\n residuals = 0\n\n errors = []\n current_cost = np.nan\n\n if verbose == 1:\n print('[dict_learning]', end=' ')\n\n # If max_iter is 0, number of iterations returned should be zero\n ii = -1\n\n for ii in range(max_iter):\n dt = (time.time() - t0)\n if verbose == 1:\n sys.stdout.write(\".\")\n sys.stdout.flush()\n elif verbose:\n print(\"Iteration % 3i \"\n \"(elapsed time: % 3is, % 4.1fmn, current cost % 7.3f)\"\n % (ii, dt, dt / 60, current_cost))\n\n # Update code\n code = sparse_encode(X, dictionary, algorithm=method, alpha=alpha,\n init=code, n_jobs=n_jobs, positive=positive_code,\n max_iter=method_max_iter, verbose=verbose)\n # Update dictionary\n dictionary, residuals = _update_dict(dictionary.T, X.T, code.T,\n verbose=verbose, return_r2=True,\n random_state=random_state,\n positive=positive_dict)\n dictionary = dictionary.T\n\n # Cost function\n current_cost = 0.5 * residuals + alpha * np.sum(np.abs(code))\n errors.append(current_cost)\n\n if ii > 0:\n dE = errors[-2] - errors[-1]\n # assert(dE >= -tol * errors[-1])\n if dE < tol * errors[-1]:\n if verbose == 1:\n # A line return\n print(\"\")\n elif verbose:\n print(\"--- Convergence reached after %d iterations\" % ii)\n break\n if ii % 5 == 0 and callback is not None:\n callback(locals())\n\n if return_n_iter:\n return code, dictionary, errors, ii + 1\n else:\n return code, dictionary, errors" + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning_online", + "name": "dict_learning_online", + "qname": "sklearn.decomposition._dict_learning.dict_learning_online", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning_online/X", + "name": "X", + "qname": "sklearn.decomposition._dict_learning.dict_learning_online.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Data matrix." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning_online/n_components", + "name": "n_components", + "qname": "sklearn.decomposition._dict_learning.dict_learning_online.n_components", + "default_value": "2", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Number of dictionary atoms to extract." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning_online/alpha", + "name": "alpha", + "qname": "sklearn.decomposition._dict_learning.dict_learning_online.alpha", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1", + "description": "Sparsity controlling parameter." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning_online/n_iter", + "name": "n_iter", + "qname": "sklearn.decomposition._dict_learning.dict_learning_online.n_iter", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Number of mini-batch iterations to perform." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning_online/return_code", + "name": "return_code", + "qname": "sklearn.decomposition._dict_learning.dict_learning_online.return_code", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to also return the code U or just the dictionary `V`." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning_online/dict_init", + "name": "dict_init", + "qname": "sklearn.decomposition._dict_learning.dict_learning_online.dict_init", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_components, n_features)", + "default_value": "None", + "description": "Initial value for the dictionary for warm restart scenarios." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_components, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning_online/callback", + "name": "callback", + "qname": "sklearn.decomposition._dict_learning.dict_learning_online.callback", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "None", + "description": "callable that gets invoked every five iterations." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning_online/batch_size", + "name": "batch_size", + "qname": "sklearn.decomposition._dict_learning.dict_learning_online.batch_size", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "3", + "description": "The number of samples to take in each batch." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning_online/verbose", + "name": "verbose", + "qname": "sklearn.decomposition._dict_learning.dict_learning_online.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "To control the verbosity of the procedure." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning_online/shuffle", + "name": "shuffle", + "qname": "sklearn.decomposition._dict_learning.dict_learning_online.shuffle", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to shuffle the data before splitting it in batches." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning_online/n_jobs", + "name": "n_jobs", + "qname": "sklearn.decomposition._dict_learning.dict_learning_online.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of parallel jobs to run.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning_online/method", + "name": "method", + "qname": "sklearn.decomposition._dict_learning.dict_learning_online.method", + "default_value": "'lars'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'lars', 'cd'}", + "default_value": "'lars'", + "description": "* `'lars'`: uses the least angle regression method to solve the lasso\n problem (`linear_model.lars_path`);\n* `'cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). Lars will be faster if\n the estimated components are sparse." + }, + "type": { + "kind": "EnumType", + "values": ["lars", "cd"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning_online/iter_offset", + "name": "iter_offset", + "qname": "sklearn.decomposition._dict_learning.dict_learning_online.iter_offset", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Number of previous iterations completed on the dictionary used for\ninitialization." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning_online/random_state", + "name": "random_state", + "qname": "sklearn.decomposition._dict_learning.dict_learning_online.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Used for initializing the dictionary when ``dict_init`` is not\nspecified, randomly shuffling the data when ``shuffle`` is set to\n``True``, and updating the dictionary. Pass an int for reproducible\nresults across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning_online/return_inner_stats", + "name": "return_inner_stats", + "qname": "sklearn.decomposition._dict_learning.dict_learning_online.return_inner_stats", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Return the inner statistics A (dictionary covariance) and B\n(data approximation). Useful to restart the algorithm in an\nonline setting. If `return_inner_stats` is `True`, `return_code` is\nignored." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning_online/inner_stats", + "name": "inner_stats", + "qname": "sklearn.decomposition._dict_learning.dict_learning_online.inner_stats", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "tuple of (A, B) ndarrays", + "default_value": "None", + "description": "Inner sufficient statistics that are kept by the algorithm.\nPassing them at initialization is useful in online settings, to\navoid losing the history of the evolution.\n`A` `(n_components, n_components)` is the dictionary covariance matrix.\n`B` `(n_features, n_components)` is the data approximation matrix." + }, + "type": { + "kind": "NamedType", + "name": "tuple of (A, B) ndarrays" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning_online/return_n_iter", + "name": "return_n_iter", + "qname": "sklearn.decomposition._dict_learning.dict_learning_online.return_n_iter", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether or not to return the number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning_online/positive_dict", + "name": "positive_dict", + "qname": "sklearn.decomposition._dict_learning.dict_learning_online.positive_dict", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to enforce positivity when finding the dictionary.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning_online/positive_code", + "name": "positive_code", + "qname": "sklearn.decomposition._dict_learning.dict_learning_online.positive_code", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to enforce positivity when finding the code.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/dict_learning_online/method_max_iter", + "name": "method_max_iter", + "qname": "sklearn.decomposition._dict_learning.dict_learning_online.method_max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "Maximum number of iterations to perform when solving the lasso problem.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Solves a dictionary learning matrix factorization problem online.\n\nFinds the best dictionary and the corresponding sparse code for\napproximating the data matrix X by solving::\n\n (U^*, V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1\n (U,V)\n with || V_k ||_2 = 1 for all 0 <= k < n_components\n\nwhere V is the dictionary and U is the sparse code. This is\naccomplished by repeatedly iterating over mini-batches by slicing\nthe input data.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Solves a dictionary learning matrix factorization problem online.\n\nFinds the best dictionary and the corresponding sparse code for\napproximating the data matrix X by solving::\n\n (U^*, V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1\n (U,V)\n with || V_k ||_2 = 1 for all 0 <= k < n_components\n\nwhere V is the dictionary and U is the sparse code. This is\naccomplished by repeatedly iterating over mini-batches by slicing\nthe input data.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Data matrix.\n\nn_components : int, default=2\n Number of dictionary atoms to extract.\n\nalpha : float, default=1\n Sparsity controlling parameter.\n\nn_iter : int, default=100\n Number of mini-batch iterations to perform.\n\nreturn_code : bool, default=True\n Whether to also return the code U or just the dictionary `V`.\n\ndict_init : ndarray of shape (n_components, n_features), default=None\n Initial value for the dictionary for warm restart scenarios.\n\ncallback : callable, default=None\n callable that gets invoked every five iterations.\n\nbatch_size : int, default=3\n The number of samples to take in each batch.\n\nverbose : bool, default=False\n To control the verbosity of the procedure.\n\nshuffle : bool, default=True\n Whether to shuffle the data before splitting it in batches.\n\nn_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nmethod : {'lars', 'cd'}, default='lars'\n * `'lars'`: uses the least angle regression method to solve the lasso\n problem (`linear_model.lars_path`);\n * `'cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). Lars will be faster if\n the estimated components are sparse.\n\niter_offset : int, default=0\n Number of previous iterations completed on the dictionary used for\n initialization.\n\nrandom_state : int, RandomState instance or None, default=None\n Used for initializing the dictionary when ``dict_init`` is not\n specified, randomly shuffling the data when ``shuffle`` is set to\n ``True``, and updating the dictionary. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\nreturn_inner_stats : bool, default=False\n Return the inner statistics A (dictionary covariance) and B\n (data approximation). Useful to restart the algorithm in an\n online setting. If `return_inner_stats` is `True`, `return_code` is\n ignored.\n\ninner_stats : tuple of (A, B) ndarrays, default=None\n Inner sufficient statistics that are kept by the algorithm.\n Passing them at initialization is useful in online settings, to\n avoid losing the history of the evolution.\n `A` `(n_components, n_components)` is the dictionary covariance matrix.\n `B` `(n_features, n_components)` is the data approximation matrix.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\npositive_dict : bool, default=False\n Whether to enforce positivity when finding the dictionary.\n\n .. versionadded:: 0.20\n\npositive_code : bool, default=False\n Whether to enforce positivity when finding the code.\n\n .. versionadded:: 0.20\n\nmethod_max_iter : int, default=1000\n Maximum number of iterations to perform when solving the lasso problem.\n\n .. versionadded:: 0.22\n\nReturns\n-------\ncode : ndarray of shape (n_samples, n_components),\n The sparse code (only returned if `return_code=True`).\n\ndictionary : ndarray of shape (n_components, n_features),\n The solutions to the dictionary learning problem.\n\nn_iter : int\n Number of iterations run. Returned only if `return_n_iter` is\n set to `True`.\n\nSee Also\n--------\ndict_learning\nDictionaryLearning\nMiniBatchDictionaryLearning\nSparsePCA\nMiniBatchSparsePCA", + "code": "@_deprecate_positional_args\ndef dict_learning_online(X, n_components=2, *, alpha=1, n_iter=100,\n return_code=True, dict_init=None, callback=None,\n batch_size=3, verbose=False, shuffle=True,\n n_jobs=None, method='lars', iter_offset=0,\n random_state=None, return_inner_stats=False,\n inner_stats=None, return_n_iter=False,\n positive_dict=False, positive_code=False,\n method_max_iter=1000):\n \"\"\"Solves a dictionary learning matrix factorization problem online.\n\n Finds the best dictionary and the corresponding sparse code for\n approximating the data matrix X by solving::\n\n (U^*, V^*) = argmin 0.5 || X - U V ||_2^2 + alpha * || U ||_1\n (U,V)\n with || V_k ||_2 = 1 for all 0 <= k < n_components\n\n where V is the dictionary and U is the sparse code. This is\n accomplished by repeatedly iterating over mini-batches by slicing\n the input data.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Data matrix.\n\n n_components : int, default=2\n Number of dictionary atoms to extract.\n\n alpha : float, default=1\n Sparsity controlling parameter.\n\n n_iter : int, default=100\n Number of mini-batch iterations to perform.\n\n return_code : bool, default=True\n Whether to also return the code U or just the dictionary `V`.\n\n dict_init : ndarray of shape (n_components, n_features), default=None\n Initial value for the dictionary for warm restart scenarios.\n\n callback : callable, default=None\n callable that gets invoked every five iterations.\n\n batch_size : int, default=3\n The number of samples to take in each batch.\n\n verbose : bool, default=False\n To control the verbosity of the procedure.\n\n shuffle : bool, default=True\n Whether to shuffle the data before splitting it in batches.\n\n n_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n method : {'lars', 'cd'}, default='lars'\n * `'lars'`: uses the least angle regression method to solve the lasso\n problem (`linear_model.lars_path`);\n * `'cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). Lars will be faster if\n the estimated components are sparse.\n\n iter_offset : int, default=0\n Number of previous iterations completed on the dictionary used for\n initialization.\n\n random_state : int, RandomState instance or None, default=None\n Used for initializing the dictionary when ``dict_init`` is not\n specified, randomly shuffling the data when ``shuffle`` is set to\n ``True``, and updating the dictionary. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\n return_inner_stats : bool, default=False\n Return the inner statistics A (dictionary covariance) and B\n (data approximation). Useful to restart the algorithm in an\n online setting. If `return_inner_stats` is `True`, `return_code` is\n ignored.\n\n inner_stats : tuple of (A, B) ndarrays, default=None\n Inner sufficient statistics that are kept by the algorithm.\n Passing them at initialization is useful in online settings, to\n avoid losing the history of the evolution.\n `A` `(n_components, n_components)` is the dictionary covariance matrix.\n `B` `(n_features, n_components)` is the data approximation matrix.\n\n return_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\n positive_dict : bool, default=False\n Whether to enforce positivity when finding the dictionary.\n\n .. versionadded:: 0.20\n\n positive_code : bool, default=False\n Whether to enforce positivity when finding the code.\n\n .. versionadded:: 0.20\n\n method_max_iter : int, default=1000\n Maximum number of iterations to perform when solving the lasso problem.\n\n .. versionadded:: 0.22\n\n Returns\n -------\n code : ndarray of shape (n_samples, n_components),\n The sparse code (only returned if `return_code=True`).\n\n dictionary : ndarray of shape (n_components, n_features),\n The solutions to the dictionary learning problem.\n\n n_iter : int\n Number of iterations run. Returned only if `return_n_iter` is\n set to `True`.\n\n See Also\n --------\n dict_learning\n DictionaryLearning\n MiniBatchDictionaryLearning\n SparsePCA\n MiniBatchSparsePCA\n \"\"\"\n if n_components is None:\n n_components = X.shape[1]\n\n if method not in ('lars', 'cd'):\n raise ValueError('Coding method not supported as a fit algorithm.')\n\n _check_positive_coding(method, positive_code)\n\n method = 'lasso_' + method\n\n t0 = time.time()\n n_samples, n_features = X.shape\n # Avoid integer division problems\n alpha = float(alpha)\n random_state = check_random_state(random_state)\n\n # Init V with SVD of X\n if dict_init is not None:\n dictionary = dict_init\n else:\n _, S, dictionary = randomized_svd(X, n_components,\n random_state=random_state)\n dictionary = S[:, np.newaxis] * dictionary\n r = len(dictionary)\n if n_components <= r:\n dictionary = dictionary[:n_components, :]\n else:\n dictionary = np.r_[dictionary,\n np.zeros((n_components - r, dictionary.shape[1]))]\n\n if verbose == 1:\n print('[dict_learning]', end=' ')\n\n if shuffle:\n X_train = X.copy()\n random_state.shuffle(X_train)\n else:\n X_train = X\n\n dictionary = check_array(dictionary.T, order='F', dtype=np.float64,\n copy=False)\n dictionary = np.require(dictionary, requirements='W')\n\n X_train = check_array(X_train, order='C', dtype=np.float64, copy=False)\n\n batches = gen_batches(n_samples, batch_size)\n batches = itertools.cycle(batches)\n\n # The covariance of the dictionary\n if inner_stats is None:\n A = np.zeros((n_components, n_components))\n # The data approximation\n B = np.zeros((n_features, n_components))\n else:\n A = inner_stats[0].copy()\n B = inner_stats[1].copy()\n\n # If n_iter is zero, we need to return zero.\n ii = iter_offset - 1\n\n for ii, batch in zip(range(iter_offset, iter_offset + n_iter), batches):\n this_X = X_train[batch]\n dt = (time.time() - t0)\n if verbose == 1:\n sys.stdout.write(\".\")\n sys.stdout.flush()\n elif verbose:\n if verbose > 10 or ii % ceil(100. / verbose) == 0:\n print(\"Iteration % 3i (elapsed time: % 3is, % 4.1fmn)\"\n % (ii, dt, dt / 60))\n\n this_code = sparse_encode(this_X, dictionary.T, algorithm=method,\n alpha=alpha, n_jobs=n_jobs,\n check_input=False,\n positive=positive_code,\n max_iter=method_max_iter, verbose=verbose).T\n\n # Update the auxiliary variables\n if ii < batch_size - 1:\n theta = float((ii + 1) * batch_size)\n else:\n theta = float(batch_size ** 2 + ii + 1 - batch_size)\n beta = (theta + 1 - batch_size) / (theta + 1)\n\n A *= beta\n A += np.dot(this_code, this_code.T)\n B *= beta\n B += np.dot(this_X.T, this_code.T)\n\n # Update dictionary\n dictionary = _update_dict(dictionary, B, A, verbose=verbose,\n random_state=random_state,\n positive=positive_dict)\n # XXX: Can the residuals be of any use?\n\n # Maybe we need a stopping criteria based on the amount of\n # modification in the dictionary\n if callback is not None:\n callback(locals())\n\n if return_inner_stats:\n if return_n_iter:\n return dictionary.T, (A, B), ii - iter_offset + 1\n else:\n return dictionary.T, (A, B)\n if return_code:\n if verbose > 1:\n print('Learning code...', end=' ')\n elif verbose == 1:\n print('|', end=' ')\n code = sparse_encode(X, dictionary.T, algorithm=method, alpha=alpha,\n n_jobs=n_jobs, check_input=False,\n positive=positive_code, max_iter=method_max_iter,\n verbose=verbose)\n if verbose > 1:\n dt = (time.time() - t0)\n print('done (total time: % 3is, % 4.1fmn)' % (dt, dt / 60))\n if return_n_iter:\n return code, dictionary.T, ii - iter_offset + 1\n else:\n return code, dictionary.T\n\n if return_n_iter:\n return dictionary.T, ii - iter_offset + 1\n else:\n return dictionary.T" + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/sparse_encode", + "name": "sparse_encode", + "qname": "sklearn.decomposition._dict_learning.sparse_encode", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/sparse_encode/X", + "name": "X", + "qname": "sklearn.decomposition._dict_learning.sparse_encode.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Data matrix." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/sparse_encode/dictionary", + "name": "dictionary", + "qname": "sklearn.decomposition._dict_learning.sparse_encode.dictionary", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_components, n_features)", + "default_value": "", + "description": "The dictionary matrix against which to solve the sparse coding of\nthe data. Some of the algorithms assume normalized rows for meaningful\noutput." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_components, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/sparse_encode/gram", + "name": "gram", + "qname": "sklearn.decomposition._dict_learning.sparse_encode.gram", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_components, n_components)", + "default_value": "None", + "description": "Precomputed Gram matrix, `dictionary * dictionary'`." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_components, n_components)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/sparse_encode/cov", + "name": "cov", + "qname": "sklearn.decomposition._dict_learning.sparse_encode.cov", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_components, n_samples)", + "default_value": "None", + "description": "Precomputed covariance, `dictionary' * X`." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_components, n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/sparse_encode/algorithm", + "name": "algorithm", + "qname": "sklearn.decomposition._dict_learning.sparse_encode.algorithm", + "default_value": "'lasso_lars'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}", + "default_value": "'lasso_lars'", + "description": "The algorithm used:\n\n* `'lars'`: uses the least angle regression method\n (`linear_model.lars_path`);\n* `'lasso_lars'`: uses Lars to compute the Lasso solution;\n* `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). lasso_lars will be faster if\n the estimated components are sparse;\n* `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution;\n* `'threshold'`: squashes to zero all coefficients less than\n regularization from the projection `dictionary * data'`." + }, + "type": { + "kind": "EnumType", + "values": ["omp", "lasso_cd", "lasso_lars", "lars", "threshold"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/sparse_encode/n_nonzero_coefs", + "name": "n_nonzero_coefs", + "qname": "sklearn.decomposition._dict_learning.sparse_encode.n_nonzero_coefs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of nonzero coefficients to target in each column of the\nsolution. This is only used by `algorithm='lars'` and `algorithm='omp'`\nand is overridden by `alpha` in the `omp` case. If `None`, then\n`n_nonzero_coefs=int(n_features / 10)`." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/sparse_encode/alpha", + "name": "alpha", + "qname": "sklearn.decomposition._dict_learning.sparse_encode.alpha", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\npenalty applied to the L1 norm.\nIf `algorithm='threshold'`, `alpha` is the absolute value of the\nthreshold below which coefficients will be squashed to zero.\nIf `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\nthe reconstruction error targeted. In this case, it overrides\n`n_nonzero_coefs`.\nIf `None`, default to 1." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/sparse_encode/copy_cov", + "name": "copy_cov", + "qname": "sklearn.decomposition._dict_learning.sparse_encode.copy_cov", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to copy the precomputed covariance matrix; if `False`, it may\nbe overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/sparse_encode/init", + "name": "init", + "qname": "sklearn.decomposition._dict_learning.sparse_encode.init", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_components)", + "default_value": "None", + "description": "Initialization value of the sparse codes. Only used if\n`algorithm='lasso_cd'`." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_components)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/sparse_encode/max_iter", + "name": "max_iter", + "qname": "sklearn.decomposition._dict_learning.sparse_encode.max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n`'lasso_lars'`." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/sparse_encode/n_jobs", + "name": "n_jobs", + "qname": "sklearn.decomposition._dict_learning.sparse_encode.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of parallel jobs to run.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/sparse_encode/check_input", + "name": "check_input", + "qname": "sklearn.decomposition._dict_learning.sparse_encode.check_input", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If `False`, the input arrays X and dictionary will not be checked." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/sparse_encode/verbose", + "name": "verbose", + "qname": "sklearn.decomposition._dict_learning.sparse_encode.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Controls the verbosity; the higher, the more messages." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._dict_learning/sparse_encode/positive", + "name": "positive", + "qname": "sklearn.decomposition._dict_learning.sparse_encode.positive", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to enforce positivity when finding the encoding.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Sparse coding\n\nEach row of the result is the solution to a sparse coding problem.\nThe goal is to find a sparse array `code` such that::\n\n X ~= code * dictionary\n\nRead more in the :ref:`User Guide `.", + "docstring": "Sparse coding\n\nEach row of the result is the solution to a sparse coding problem.\nThe goal is to find a sparse array `code` such that::\n\n X ~= code * dictionary\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Data matrix.\n\ndictionary : ndarray of shape (n_components, n_features)\n The dictionary matrix against which to solve the sparse coding of\n the data. Some of the algorithms assume normalized rows for meaningful\n output.\n\ngram : ndarray of shape (n_components, n_components), default=None\n Precomputed Gram matrix, `dictionary * dictionary'`.\n\ncov : ndarray of shape (n_components, n_samples), default=None\n Precomputed covariance, `dictionary' * X`.\n\nalgorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}, default='lasso_lars'\n The algorithm used:\n\n * `'lars'`: uses the least angle regression method\n (`linear_model.lars_path`);\n * `'lasso_lars'`: uses Lars to compute the Lasso solution;\n * `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). lasso_lars will be faster if\n the estimated components are sparse;\n * `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution;\n * `'threshold'`: squashes to zero all coefficients less than\n regularization from the projection `dictionary * data'`.\n\nn_nonzero_coefs : int, default=None\n Number of nonzero coefficients to target in each column of the\n solution. This is only used by `algorithm='lars'` and `algorithm='omp'`\n and is overridden by `alpha` in the `omp` case. If `None`, then\n `n_nonzero_coefs=int(n_features / 10)`.\n\nalpha : float, default=None\n If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n penalty applied to the L1 norm.\n If `algorithm='threshold'`, `alpha` is the absolute value of the\n threshold below which coefficients will be squashed to zero.\n If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\n the reconstruction error targeted. In this case, it overrides\n `n_nonzero_coefs`.\n If `None`, default to 1.\n\ncopy_cov : bool, default=True\n Whether to copy the precomputed covariance matrix; if `False`, it may\n be overwritten.\n\ninit : ndarray of shape (n_samples, n_components), default=None\n Initialization value of the sparse codes. Only used if\n `algorithm='lasso_cd'`.\n\nmax_iter : int, default=1000\n Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n `'lasso_lars'`.\n\nn_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\ncheck_input : bool, default=True\n If `False`, the input arrays X and dictionary will not be checked.\n\nverbose : int, default=0\n Controls the verbosity; the higher, the more messages.\n\npositive : bool, default=False\n Whether to enforce positivity when finding the encoding.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ncode : ndarray of shape (n_samples, n_components)\n The sparse codes\n\nSee Also\n--------\nsklearn.linear_model.lars_path\nsklearn.linear_model.orthogonal_mp\nsklearn.linear_model.Lasso\nSparseCoder", + "code": "@_deprecate_positional_args\ndef sparse_encode(X, dictionary, *, gram=None, cov=None,\n algorithm='lasso_lars', n_nonzero_coefs=None, alpha=None,\n copy_cov=True, init=None, max_iter=1000, n_jobs=None,\n check_input=True, verbose=0, positive=False):\n \"\"\"Sparse coding\n\n Each row of the result is the solution to a sparse coding problem.\n The goal is to find a sparse array `code` such that::\n\n X ~= code * dictionary\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Data matrix.\n\n dictionary : ndarray of shape (n_components, n_features)\n The dictionary matrix against which to solve the sparse coding of\n the data. Some of the algorithms assume normalized rows for meaningful\n output.\n\n gram : ndarray of shape (n_components, n_components), default=None\n Precomputed Gram matrix, `dictionary * dictionary'`.\n\n cov : ndarray of shape (n_components, n_samples), default=None\n Precomputed covariance, `dictionary' * X`.\n\n algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}, \\\n default='lasso_lars'\n The algorithm used:\n\n * `'lars'`: uses the least angle regression method\n (`linear_model.lars_path`);\n * `'lasso_lars'`: uses Lars to compute the Lasso solution;\n * `'lasso_cd'`: uses the coordinate descent method to compute the\n Lasso solution (`linear_model.Lasso`). lasso_lars will be faster if\n the estimated components are sparse;\n * `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n solution;\n * `'threshold'`: squashes to zero all coefficients less than\n regularization from the projection `dictionary * data'`.\n\n n_nonzero_coefs : int, default=None\n Number of nonzero coefficients to target in each column of the\n solution. This is only used by `algorithm='lars'` and `algorithm='omp'`\n and is overridden by `alpha` in the `omp` case. If `None`, then\n `n_nonzero_coefs=int(n_features / 10)`.\n\n alpha : float, default=None\n If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n penalty applied to the L1 norm.\n If `algorithm='threshold'`, `alpha` is the absolute value of the\n threshold below which coefficients will be squashed to zero.\n If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\n the reconstruction error targeted. In this case, it overrides\n `n_nonzero_coefs`.\n If `None`, default to 1.\n\n copy_cov : bool, default=True\n Whether to copy the precomputed covariance matrix; if `False`, it may\n be overwritten.\n\n init : ndarray of shape (n_samples, n_components), default=None\n Initialization value of the sparse codes. Only used if\n `algorithm='lasso_cd'`.\n\n max_iter : int, default=1000\n Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n `'lasso_lars'`.\n\n n_jobs : int, default=None\n Number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n check_input : bool, default=True\n If `False`, the input arrays X and dictionary will not be checked.\n\n verbose : int, default=0\n Controls the verbosity; the higher, the more messages.\n\n positive : bool, default=False\n Whether to enforce positivity when finding the encoding.\n\n .. versionadded:: 0.20\n\n Returns\n -------\n code : ndarray of shape (n_samples, n_components)\n The sparse codes\n\n See Also\n --------\n sklearn.linear_model.lars_path\n sklearn.linear_model.orthogonal_mp\n sklearn.linear_model.Lasso\n SparseCoder\n \"\"\"\n if check_input:\n if algorithm == 'lasso_cd':\n dictionary = check_array(dictionary, order='C', dtype='float64')\n X = check_array(X, order='C', dtype='float64')\n else:\n dictionary = check_array(dictionary)\n X = check_array(X)\n\n n_samples, n_features = X.shape\n n_components = dictionary.shape[0]\n\n if gram is None and algorithm != 'threshold':\n gram = np.dot(dictionary, dictionary.T)\n\n if cov is None and algorithm != 'lasso_cd':\n copy_cov = False\n cov = np.dot(dictionary, X.T)\n\n if algorithm in ('lars', 'omp'):\n regularization = n_nonzero_coefs\n if regularization is None:\n regularization = min(max(n_features / 10, 1), n_components)\n else:\n regularization = alpha\n if regularization is None:\n regularization = 1.\n\n if effective_n_jobs(n_jobs) == 1 or algorithm == 'threshold':\n code = _sparse_encode(X,\n dictionary, gram, cov=cov,\n algorithm=algorithm,\n regularization=regularization, copy_cov=copy_cov,\n init=init,\n max_iter=max_iter,\n check_input=False,\n verbose=verbose,\n positive=positive)\n return code\n\n # Enter parallel code block\n code = np.empty((n_samples, n_components))\n slices = list(gen_even_slices(n_samples, effective_n_jobs(n_jobs)))\n\n code_views = Parallel(n_jobs=n_jobs, verbose=verbose)(\n delayed(_sparse_encode)(\n X[this_slice], dictionary, gram,\n cov[:, this_slice] if cov is not None else None,\n algorithm,\n regularization=regularization, copy_cov=copy_cov,\n init=init[this_slice] if init is not None else None,\n max_iter=max_iter,\n check_input=False,\n verbose=verbose,\n positive=positive)\n for this_slice in slices)\n for this_slice, this_view in zip(slices, code_views):\n code[this_slice] = this_view\n return code" + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/__init__", + "name": "__init__", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/__init__/self", + "name": "self", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/__init__/n_components", + "name": "n_components", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.__init__.n_components", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Dimensionality of latent space, the number of components\nof ``X`` that are obtained after ``transform``.\nIf None, n_components is set to the number of features." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/__init__/tol", + "name": "tol", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.__init__.tol", + "default_value": "0.01", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float, defaul=1e-2", + "default_value": "", + "description": "Stopping tolerance for log-likelihood increase." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "defaul=1e-2" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/__init__/copy", + "name": "copy", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.__init__.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to make a copy of X. If ``False``, the input X gets overwritten\nduring fitting." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.__init__.max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "Maximum number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/__init__/noise_variance_init", + "name": "noise_variance_init", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.__init__.noise_variance_init", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features,)", + "default_value": "None", + "description": "The initial guess of the noise variance for each feature.\nIf None, it defaults to np.ones(n_features)." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features,)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/__init__/svd_method", + "name": "svd_method", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.__init__.svd_method", + "default_value": "'randomized'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'lapack', 'randomized'}", + "default_value": "'randomized'", + "description": "Which SVD method to use. If 'lapack' use standard SVD from\nscipy.linalg, if 'randomized' use fast ``randomized_svd`` function.\nDefaults to 'randomized'. For most applications 'randomized' will\nbe sufficiently precise while providing significant speed gains.\nAccuracy can also be improved by setting higher values for\n`iterated_power`. If this is not sufficient, for maximum precision\nyou should choose 'lapack'." + }, + "type": { + "kind": "EnumType", + "values": ["lapack", "randomized"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/__init__/iterated_power", + "name": "iterated_power", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.__init__.iterated_power", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "3", + "description": "Number of iterations for the power method. 3 by default. Only used\nif ``svd_method`` equals 'randomized'." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/__init__/rotation", + "name": "rotation", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.__init__.rotation", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'varimax', 'quartimax'}", + "default_value": "None", + "description": "If not None, apply the indicated rotation. Currently, varimax and\nquartimax are implemented. See\n`\"The varimax criterion for analytic rotation in factor analysis\"\n`_\nH. F. Kaiser, 1958.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "EnumType", + "values": ["varimax", "quartimax"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/__init__/random_state", + "name": "random_state", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.__init__.random_state", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or RandomState instance", + "default_value": "0", + "description": "Only used when ``svd_method`` equals 'randomized'. Pass an int for\nreproducible results across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Factor Analysis (FA).\n\nA simple linear generative model with Gaussian latent variables.\n\nThe observations are assumed to be caused by a linear transformation of\nlower dimensional latent factors and added Gaussian noise.\nWithout loss of generality the factors are distributed according to a\nGaussian with zero mean and unit covariance. The noise is also zero mean\nand has an arbitrary diagonal covariance matrix.\n\nIf we would restrict the model further, by assuming that the Gaussian\nnoise is even isotropic (all diagonal entries are the same) we would obtain\n:class:`PPCA`.\n\nFactorAnalysis performs a maximum likelihood estimate of the so-called\n`loading` matrix, the transformation of the latent variables to the\nobserved ones, using SVD based approach.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_components=None, *, tol=1e-2, copy=True,\n max_iter=1000,\n noise_variance_init=None, svd_method='randomized',\n iterated_power=3, rotation=None, random_state=0):\n self.n_components = n_components\n self.copy = copy\n self.tol = tol\n self.max_iter = max_iter\n if svd_method not in ['lapack', 'randomized']:\n raise ValueError('SVD method %s is not supported. Please consider'\n ' the documentation' % svd_method)\n self.svd_method = svd_method\n\n self.noise_variance_init = noise_variance_init\n self.iterated_power = iterated_power\n self.random_state = random_state\n self.rotation = rotation" + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/_rotate", + "name": "_rotate", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis._rotate", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/_rotate/self", + "name": "self", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis._rotate.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/_rotate/components", + "name": "components", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis._rotate.components", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/_rotate/n_components", + "name": "n_components", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis._rotate.n_components", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/_rotate/tol", + "name": "tol", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis._rotate.tol", + "default_value": "1e-06", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Rotate the factor analysis solution.", + "docstring": "Rotate the factor analysis solution.", + "code": " def _rotate(self, components, n_components=None, tol=1e-6):\n \"Rotate the factor analysis solution.\"\n # note that tol is not exposed\n implemented = (\"varimax\", \"quartimax\")\n method = self.rotation\n if method in implemented:\n return _ortho_rotation(components.T, method=method,\n tol=tol)[:self.n_components]\n else:\n raise ValueError(\"'method' must be in %s, not %s\"\n % (implemented, method))" + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/fit", + "name": "fit", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/fit/self", + "name": "self", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/fit/X", + "name": "X", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/fit/y", + "name": "y", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the FactorAnalysis model to X using SVD based approach", + "docstring": "Fit the FactorAnalysis model to X using SVD based approach\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : Ignored\n\nReturns\n-------\nself", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the FactorAnalysis model to X using SVD based approach\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n y : Ignored\n\n Returns\n -------\n self\n \"\"\"\n X = self._validate_data(X, copy=self.copy, dtype=np.float64)\n\n n_samples, n_features = X.shape\n n_components = self.n_components\n if n_components is None:\n n_components = n_features\n\n self.mean_ = np.mean(X, axis=0)\n X -= self.mean_\n\n # some constant terms\n nsqrt = sqrt(n_samples)\n llconst = n_features * log(2. * np.pi) + n_components\n var = np.var(X, axis=0)\n\n if self.noise_variance_init is None:\n psi = np.ones(n_features, dtype=X.dtype)\n else:\n if len(self.noise_variance_init) != n_features:\n raise ValueError(\"noise_variance_init dimension does not \"\n \"with number of features : %d != %d\" %\n (len(self.noise_variance_init), n_features))\n psi = np.array(self.noise_variance_init)\n\n loglike = []\n old_ll = -np.inf\n SMALL = 1e-12\n\n # we'll modify svd outputs to return unexplained variance\n # to allow for unified computation of loglikelihood\n if self.svd_method == 'lapack':\n def my_svd(X):\n _, s, Vt = linalg.svd(X,\n full_matrices=False,\n check_finite=False)\n return (s[:n_components], Vt[:n_components],\n squared_norm(s[n_components:]))\n elif self.svd_method == 'randomized':\n random_state = check_random_state(self.random_state)\n\n def my_svd(X):\n _, s, Vt = randomized_svd(X, n_components,\n random_state=random_state,\n n_iter=self.iterated_power)\n return s, Vt, squared_norm(X) - squared_norm(s)\n else:\n raise ValueError('SVD method %s is not supported. Please consider'\n ' the documentation' % self.svd_method)\n\n for i in range(self.max_iter):\n # SMALL helps numerics\n sqrt_psi = np.sqrt(psi) + SMALL\n s, Vt, unexp_var = my_svd(X / (sqrt_psi * nsqrt))\n s **= 2\n # Use 'maximum' here to avoid sqrt problems.\n W = np.sqrt(np.maximum(s - 1., 0.))[:, np.newaxis] * Vt\n del Vt\n W *= sqrt_psi\n\n # loglikelihood\n ll = llconst + np.sum(np.log(s))\n ll += unexp_var + np.sum(np.log(psi))\n ll *= -n_samples / 2.\n loglike.append(ll)\n if (ll - old_ll) < self.tol:\n break\n old_ll = ll\n\n psi = np.maximum(var - np.sum(W ** 2, axis=0), SMALL)\n else:\n warnings.warn('FactorAnalysis did not converge.' +\n ' You might want' +\n ' to increase the number of iterations.',\n ConvergenceWarning)\n\n self.components_ = W\n if self.rotation is not None:\n self.components_ = self._rotate(W)\n self.noise_variance_ = psi\n self.loglike_ = loglike\n self.n_iter_ = i + 1\n return self" + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/get_covariance", + "name": "get_covariance", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.get_covariance", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/get_covariance/self", + "name": "self", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.get_covariance.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute data covariance with the FactorAnalysis model.\n\n``cov = components_.T * components_ + diag(noise_variance)``", + "docstring": "Compute data covariance with the FactorAnalysis model.\n\n``cov = components_.T * components_ + diag(noise_variance)``\n\nReturns\n-------\ncov : ndarray of shape (n_features, n_features)\n Estimated covariance of data.", + "code": " def get_covariance(self):\n \"\"\"Compute data covariance with the FactorAnalysis model.\n\n ``cov = components_.T * components_ + diag(noise_variance)``\n\n Returns\n -------\n cov : ndarray of shape (n_features, n_features)\n Estimated covariance of data.\n \"\"\"\n check_is_fitted(self)\n\n cov = np.dot(self.components_.T, self.components_)\n cov.flat[::len(cov) + 1] += self.noise_variance_ # modify diag inplace\n return cov" + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/get_precision", + "name": "get_precision", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.get_precision", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/get_precision/self", + "name": "self", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.get_precision.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute data precision matrix with the FactorAnalysis model.", + "docstring": "Compute data precision matrix with the FactorAnalysis model.\n\nReturns\n-------\nprecision : ndarray of shape (n_features, n_features)\n Estimated precision of data.", + "code": " def get_precision(self):\n \"\"\"Compute data precision matrix with the FactorAnalysis model.\n\n Returns\n -------\n precision : ndarray of shape (n_features, n_features)\n Estimated precision of data.\n \"\"\"\n check_is_fitted(self)\n\n n_features = self.components_.shape[1]\n\n # handle corner cases first\n if self.n_components == 0:\n return np.diag(1. / self.noise_variance_)\n if self.n_components == n_features:\n return linalg.inv(self.get_covariance())\n\n # Get precision using matrix inversion lemma\n components_ = self.components_\n precision = np.dot(components_ / self.noise_variance_, components_.T)\n precision.flat[::len(precision) + 1] += 1.\n precision = np.dot(components_.T,\n np.dot(linalg.inv(precision), components_))\n precision /= self.noise_variance_[:, np.newaxis]\n precision /= -self.noise_variance_[np.newaxis, :]\n precision.flat[::len(precision) + 1] += 1. / self.noise_variance_\n return precision" + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/score", + "name": "score", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/score/self", + "name": "self", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/score/X", + "name": "X", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "The data" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/score/y", + "name": "y", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.score.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the average log-likelihood of the samples", + "docstring": "Compute the average log-likelihood of the samples\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n The data\n\ny : Ignored\n\nReturns\n-------\nll : float\n Average log-likelihood of the samples under the current model", + "code": " def score(self, X, y=None):\n \"\"\"Compute the average log-likelihood of the samples\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n The data\n\n y : Ignored\n\n Returns\n -------\n ll : float\n Average log-likelihood of the samples under the current model\n \"\"\"\n return np.mean(self.score_samples(X))" + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/score_samples", + "name": "score_samples", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.score_samples", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/score_samples/self", + "name": "self", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.score_samples.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/score_samples/X", + "name": "X", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.score_samples.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "The data" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the log-likelihood of each sample", + "docstring": "Compute the log-likelihood of each sample\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n The data\n\nReturns\n-------\nll : ndarray of shape (n_samples,)\n Log-likelihood of each sample under the current model", + "code": " def score_samples(self, X):\n \"\"\"Compute the log-likelihood of each sample\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n The data\n\n Returns\n -------\n ll : ndarray of shape (n_samples,)\n Log-likelihood of each sample under the current model\n \"\"\"\n check_is_fitted(self)\n X = self._validate_data(X, reset=False)\n Xr = X - self.mean_\n precision = self.get_precision()\n n_features = X.shape[1]\n log_like = -.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1)\n log_like -= .5 * (n_features * log(2. * np.pi)\n - fast_logdet(precision))\n return log_like" + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/transform", + "name": "transform", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/transform/self", + "name": "self", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/FactorAnalysis/transform/X", + "name": "X", + "qname": "sklearn.decomposition._factor_analysis.FactorAnalysis.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Apply dimensionality reduction to X using the model.\n\nCompute the expected mean of the latent variables.\nSee Barber, 21.2.33 (or Bishop, 12.66).", + "docstring": "Apply dimensionality reduction to X using the model.\n\nCompute the expected mean of the latent variables.\nSee Barber, 21.2.33 (or Bishop, 12.66).\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n The latent variables of X.", + "code": " def transform(self, X):\n \"\"\"Apply dimensionality reduction to X using the model.\n\n Compute the expected mean of the latent variables.\n See Barber, 21.2.33 (or Bishop, 12.66).\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n The latent variables of X.\n \"\"\"\n check_is_fitted(self)\n\n X = self._validate_data(X, reset=False)\n Ih = np.eye(len(self.components_))\n\n X_transformed = X - self.mean_\n\n Wpsi = self.components_ / self.noise_variance_\n cov_z = linalg.inv(Ih + np.dot(Wpsi, self.components_.T))\n tmp = np.dot(X_transformed, Wpsi.T)\n X_transformed = np.dot(tmp, cov_z)\n\n return X_transformed" + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/_ortho_rotation", + "name": "_ortho_rotation", + "qname": "sklearn.decomposition._factor_analysis._ortho_rotation", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/_ortho_rotation/components", + "name": "components", + "qname": "sklearn.decomposition._factor_analysis._ortho_rotation.components", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/_ortho_rotation/method", + "name": "method", + "qname": "sklearn.decomposition._factor_analysis._ortho_rotation.method", + "default_value": "'varimax'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/_ortho_rotation/tol", + "name": "tol", + "qname": "sklearn.decomposition._factor_analysis._ortho_rotation.tol", + "default_value": "1e-06", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._factor_analysis/_ortho_rotation/max_iter", + "name": "max_iter", + "qname": "sklearn.decomposition._factor_analysis._ortho_rotation.max_iter", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return rotated components.", + "docstring": "Return rotated components.", + "code": "def _ortho_rotation(components, method='varimax', tol=1e-6, max_iter=100):\n \"\"\"Return rotated components.\"\"\"\n nrow, ncol = components.shape\n rotation_matrix = np.eye(ncol)\n var = 0\n\n for _ in range(max_iter):\n comp_rot = np.dot(components, rotation_matrix)\n if method == \"varimax\":\n tmp = comp_rot * np.transpose((comp_rot ** 2).sum(axis=0) / nrow)\n elif method == \"quartimax\":\n tmp = 0\n u, s, v = np.linalg.svd(\n np.dot(components.T, comp_rot ** 3 - tmp))\n rotation_matrix = np.dot(u, v)\n var_new = np.sum(s)\n if var != 0 and var_new < var * (1 + tol):\n break\n var = var_new\n\n return np.dot(components, rotation_matrix).T" + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/__init__", + "name": "__init__", + "qname": "sklearn.decomposition._fastica.FastICA.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/__init__/self", + "name": "self", + "qname": "sklearn.decomposition._fastica.FastICA.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/__init__/n_components", + "name": "n_components", + "qname": "sklearn.decomposition._fastica.FastICA.__init__.n_components", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of components to use. If None is passed, all are used." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/__init__/algorithm", + "name": "algorithm", + "qname": "sklearn.decomposition._fastica.FastICA.__init__.algorithm", + "default_value": "'parallel'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'parallel', 'deflation'}", + "default_value": "'parallel'", + "description": "Apply parallel or deflational algorithm for FastICA." + }, + "type": { + "kind": "EnumType", + "values": ["deflation", "parallel"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/__init__/whiten", + "name": "whiten", + "qname": "sklearn.decomposition._fastica.FastICA.__init__.whiten", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If whiten is false, the data is already considered to be\nwhitened, and no whitening is performed." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/__init__/fun", + "name": "fun", + "qname": "sklearn.decomposition._fastica.FastICA.__init__.fun", + "default_value": "'logcosh'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'logcosh', 'exp', 'cube'} or callable", + "default_value": "'logcosh'", + "description": "The functional form of the G function used in the\napproximation to neg-entropy. Could be either 'logcosh', 'exp',\nor 'cube'.\nYou can also provide your own function. It should return a tuple\ncontaining the value of the function, and of its derivative, in the\npoint. Example::\n\n def my_g(x):\n return x ** 3, (3 * x ** 2).mean(axis=-1)" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["exp", "cube", "logcosh"] + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/__init__/fun_args", + "name": "fun_args", + "qname": "sklearn.decomposition._fastica.FastICA.__init__.fun_args", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Arguments to send to the functional form.\nIf empty and if fun='logcosh', fun_args will take value\n{'alpha' : 1.0}." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.decomposition._fastica.FastICA.__init__.max_iter", + "default_value": "200", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "200", + "description": "Maximum number of iterations during fit." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/__init__/tol", + "name": "tol", + "qname": "sklearn.decomposition._fastica.FastICA.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Tolerance on update at each iteration." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/__init__/w_init", + "name": "w_init", + "qname": "sklearn.decomposition._fastica.FastICA.__init__.w_init", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_components, n_components)", + "default_value": "None", + "description": "The mixing matrix to be used to initialize the algorithm." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_components, n_components)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/__init__/random_state", + "name": "random_state", + "qname": "sklearn.decomposition._fastica.FastICA.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Used to initialize ``w_init`` when not specified, with a\nnormal distribution. Pass an int, for reproducible results\nacross multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "FastICA: a fast algorithm for Independent Component Analysis.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_components=None, *, algorithm='parallel', whiten=True,\n fun='logcosh', fun_args=None, max_iter=200, tol=1e-4,\n w_init=None, random_state=None):\n super().__init__()\n if max_iter < 1:\n raise ValueError(\"max_iter should be greater than 1, got \"\n \"(max_iter={})\".format(max_iter))\n self.n_components = n_components\n self.algorithm = algorithm\n self.whiten = whiten\n self.fun = fun\n self.fun_args = fun_args\n self.max_iter = max_iter\n self.tol = tol\n self.w_init = w_init\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/_fit", + "name": "_fit", + "qname": "sklearn.decomposition._fastica.FastICA._fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/_fit/self", + "name": "self", + "qname": "sklearn.decomposition._fastica.FastICA._fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/_fit/X", + "name": "X", + "qname": "sklearn.decomposition._fastica.FastICA._fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples is the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/_fit/compute_sources", + "name": "compute_sources", + "qname": "sklearn.decomposition._fastica.FastICA._fit.compute_sources", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If False, sources are not computes but only the rotation matrix.\nThis can save memory when working with big data. Defaults to False." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model", + "docstring": "Fit the model\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ncompute_sources : bool, default=False\n If False, sources are not computes but only the rotation matrix.\n This can save memory when working with big data. Defaults to False.\n\nReturns\n-------\n X_new : ndarray of shape (n_samples, n_components)", + "code": " def _fit(self, X, compute_sources=False):\n \"\"\"Fit the model\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n compute_sources : bool, default=False\n If False, sources are not computes but only the rotation matrix.\n This can save memory when working with big data. Defaults to False.\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n \"\"\"\n\n X = self._validate_data(X, copy=self.whiten, dtype=FLOAT_DTYPES,\n ensure_min_samples=2).T\n fun_args = {} if self.fun_args is None else self.fun_args\n random_state = check_random_state(self.random_state)\n\n alpha = fun_args.get('alpha', 1.0)\n if not 1 <= alpha <= 2:\n raise ValueError('alpha must be in [1,2]')\n\n if self.fun == 'logcosh':\n g = _logcosh\n elif self.fun == 'exp':\n g = _exp\n elif self.fun == 'cube':\n g = _cube\n elif callable(self.fun):\n def g(x, fun_args):\n return self.fun(x, **fun_args)\n else:\n exc = ValueError if isinstance(self.fun, str) else TypeError\n raise exc(\n \"Unknown function %r;\"\n \" should be one of 'logcosh', 'exp', 'cube' or callable\"\n % self.fun\n )\n\n n_samples, n_features = X.shape\n\n n_components = self.n_components\n if not self.whiten and n_components is not None:\n n_components = None\n warnings.warn('Ignoring n_components with whiten=False.')\n\n if n_components is None:\n n_components = min(n_samples, n_features)\n if (n_components > min(n_samples, n_features)):\n n_components = min(n_samples, n_features)\n warnings.warn(\n 'n_components is too large: it will be set to %s'\n % n_components\n )\n\n if self.whiten:\n # Centering the columns (ie the variables)\n X_mean = X.mean(axis=-1)\n X -= X_mean[:, np.newaxis]\n\n # Whitening and preprocessing by PCA\n u, d, _ = linalg.svd(X, full_matrices=False, check_finite=False)\n\n del _\n K = (u / d).T[:n_components] # see (6.33) p.140\n del u, d\n X1 = np.dot(K, X)\n # see (13.6) p.267 Here X1 is white and data\n # in X has been projected onto a subspace by PCA\n X1 *= np.sqrt(n_features)\n else:\n # X must be casted to floats to avoid typing issues with numpy\n # 2.0 and the line below\n X1 = as_float_array(X, copy=False) # copy has been taken care of\n\n w_init = self.w_init\n if w_init is None:\n w_init = np.asarray(random_state.normal(\n size=(n_components, n_components)), dtype=X1.dtype)\n\n else:\n w_init = np.asarray(w_init)\n if w_init.shape != (n_components, n_components):\n raise ValueError(\n 'w_init has invalid shape -- should be %(shape)s'\n % {'shape': (n_components, n_components)})\n\n kwargs = {'tol': self.tol,\n 'g': g,\n 'fun_args': fun_args,\n 'max_iter': self.max_iter,\n 'w_init': w_init}\n\n if self.algorithm == 'parallel':\n W, n_iter = _ica_par(X1, **kwargs)\n elif self.algorithm == 'deflation':\n W, n_iter = _ica_def(X1, **kwargs)\n else:\n raise ValueError('Invalid algorithm: must be either `parallel` or'\n ' `deflation`.')\n del X1\n\n if compute_sources:\n if self.whiten:\n S = np.linalg.multi_dot([W, K, X]).T\n else:\n S = np.dot(W, X).T\n else:\n S = None\n\n self.n_iter_ = n_iter\n\n if self.whiten:\n self.components_ = np.dot(W, K)\n self.mean_ = X_mean\n self.whitening_ = K\n else:\n self.components_ = W\n\n self.mixing_ = linalg.pinv(self.components_, check_finite=False)\n self._unmixing = W\n\n return S" + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/fit", + "name": "fit", + "qname": "sklearn.decomposition._fastica.FastICA.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/fit/self", + "name": "self", + "qname": "sklearn.decomposition._fastica.FastICA.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/fit/X", + "name": "X", + "qname": "sklearn.decomposition._fastica.FastICA.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples is the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/fit/y", + "name": "y", + "qname": "sklearn.decomposition._fastica.FastICA.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model to X.", + "docstring": "Fit the model to X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nself", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the model to X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : Ignored\n\n Returns\n -------\n self\n \"\"\"\n self._fit(X, compute_sources=False)\n return self" + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/fit_transform", + "name": "fit_transform", + "qname": "sklearn.decomposition._fastica.FastICA.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/fit_transform/self", + "name": "self", + "qname": "sklearn.decomposition._fastica.FastICA.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/fit_transform/X", + "name": "X", + "qname": "sklearn.decomposition._fastica.FastICA.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples is the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/fit_transform/y", + "name": "y", + "qname": "sklearn.decomposition._fastica.FastICA.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model and recover the sources from X.", + "docstring": "Fit the model and recover the sources from X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)", + "code": " def fit_transform(self, X, y=None):\n \"\"\"Fit the model and recover the sources from X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : Ignored\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n \"\"\"\n return self._fit(X, compute_sources=True)" + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.decomposition._fastica.FastICA.inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/inverse_transform/self", + "name": "self", + "qname": "sklearn.decomposition._fastica.FastICA.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/inverse_transform/X", + "name": "X", + "qname": "sklearn.decomposition._fastica.FastICA.inverse_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_components)", + "default_value": "", + "description": "Sources, where n_samples is the number of samples\nand n_components is the number of components." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_components)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/inverse_transform/copy", + "name": "copy", + "qname": "sklearn.decomposition._fastica.FastICA.inverse_transform.copy", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If False, data passed to fit are overwritten. Defaults to True." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform the sources back to the mixed data (apply mixing matrix).", + "docstring": "Transform the sources back to the mixed data (apply mixing matrix).\n\nParameters\n----------\nX : array-like of shape (n_samples, n_components)\n Sources, where n_samples is the number of samples\n and n_components is the number of components.\ncopy : bool, default=True\n If False, data passed to fit are overwritten. Defaults to True.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_features)", + "code": " def inverse_transform(self, X, copy=True):\n \"\"\"Transform the sources back to the mixed data (apply mixing matrix).\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_components)\n Sources, where n_samples is the number of samples\n and n_components is the number of components.\n copy : bool, default=True\n If False, data passed to fit are overwritten. Defaults to True.\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_features)\n \"\"\"\n check_is_fitted(self)\n\n X = check_array(X, copy=(copy and self.whiten), dtype=FLOAT_DTYPES)\n X = np.dot(X, self.mixing_.T)\n if self.whiten:\n X += self.mean_\n\n return X" + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/transform", + "name": "transform", + "qname": "sklearn.decomposition._fastica.FastICA.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/transform/self", + "name": "self", + "qname": "sklearn.decomposition._fastica.FastICA.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/transform/X", + "name": "X", + "qname": "sklearn.decomposition._fastica.FastICA.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Data to transform, where n_samples is the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/FastICA/transform/copy", + "name": "copy", + "qname": "sklearn.decomposition._fastica.FastICA.transform.copy", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If False, data passed to fit can be overwritten. Defaults to True." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Recover the sources from X (apply the unmixing matrix).", + "docstring": "Recover the sources from X (apply the unmixing matrix).\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data to transform, where n_samples is the number of samples\n and n_features is the number of features.\n\ncopy : bool, default=True\n If False, data passed to fit can be overwritten. Defaults to True.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)", + "code": " def transform(self, X, copy=True):\n \"\"\"Recover the sources from X (apply the unmixing matrix).\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Data to transform, where n_samples is the number of samples\n and n_features is the number of features.\n\n copy : bool, default=True\n If False, data passed to fit can be overwritten. Defaults to True.\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n \"\"\"\n check_is_fitted(self)\n\n X = self._validate_data(X, copy=(copy and self.whiten),\n dtype=FLOAT_DTYPES, reset=False)\n if self.whiten:\n X -= self.mean_\n\n return np.dot(X, self.components_.T)" + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/_cube", + "name": "_cube", + "qname": "sklearn.decomposition._fastica._cube", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._fastica/_cube/x", + "name": "x", + "qname": "sklearn.decomposition._fastica._cube.x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/_cube/fun_args", + "name": "fun_args", + "qname": "sklearn.decomposition._fastica._cube.fun_args", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _cube(x, fun_args):\n return x ** 3, (3 * x ** 2).mean(axis=-1)" + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/_exp", + "name": "_exp", + "qname": "sklearn.decomposition._fastica._exp", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._fastica/_exp/x", + "name": "x", + "qname": "sklearn.decomposition._fastica._exp.x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/_exp/fun_args", + "name": "fun_args", + "qname": "sklearn.decomposition._fastica._exp.fun_args", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _exp(x, fun_args):\n exp = np.exp(-(x ** 2) / 2)\n gx = x * exp\n g_x = (1 - x ** 2) * exp\n return gx, g_x.mean(axis=-1)" + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/_gs_decorrelation", + "name": "_gs_decorrelation", + "qname": "sklearn.decomposition._fastica._gs_decorrelation", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._fastica/_gs_decorrelation/w", + "name": "w", + "qname": "sklearn.decomposition._fastica._gs_decorrelation.w", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n,)", + "default_value": "", + "description": "Array to be orthogonalized" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n,)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/_gs_decorrelation/W", + "name": "W", + "qname": "sklearn.decomposition._fastica._gs_decorrelation.W", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (p, n)", + "default_value": "", + "description": "Null space definition" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (p, n)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/_gs_decorrelation/j", + "name": "j", + "qname": "sklearn.decomposition._fastica._gs_decorrelation.j", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int < p", + "default_value": "", + "description": "The no of (from the first) rows of Null space W wrt which w is\northogonalized." + }, + "type": { + "kind": "NamedType", + "name": "int < p" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Orthonormalize w wrt the first j rows of W.", + "docstring": "Orthonormalize w wrt the first j rows of W.\n\nParameters\n----------\nw : ndarray of shape (n,)\n Array to be orthogonalized\n\nW : ndarray of shape (p, n)\n Null space definition\n\nj : int < p\n The no of (from the first) rows of Null space W wrt which w is\n orthogonalized.\n\nNotes\n-----\nAssumes that W is orthogonal\nw changed in place", + "code": "def _gs_decorrelation(w, W, j):\n \"\"\"\n Orthonormalize w wrt the first j rows of W.\n\n Parameters\n ----------\n w : ndarray of shape (n,)\n Array to be orthogonalized\n\n W : ndarray of shape (p, n)\n Null space definition\n\n j : int < p\n The no of (from the first) rows of Null space W wrt which w is\n orthogonalized.\n\n Notes\n -----\n Assumes that W is orthogonal\n w changed in place\n \"\"\"\n w -= np.linalg.multi_dot([w, W[:j].T, W[:j]])\n return w" + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/_ica_def", + "name": "_ica_def", + "qname": "sklearn.decomposition._fastica._ica_def", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._fastica/_ica_def/X", + "name": "X", + "qname": "sklearn.decomposition._fastica._ica_def.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/_ica_def/tol", + "name": "tol", + "qname": "sklearn.decomposition._fastica._ica_def.tol", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/_ica_def/g", + "name": "g", + "qname": "sklearn.decomposition._fastica._ica_def.g", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/_ica_def/fun_args", + "name": "fun_args", + "qname": "sklearn.decomposition._fastica._ica_def.fun_args", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/_ica_def/max_iter", + "name": "max_iter", + "qname": "sklearn.decomposition._fastica._ica_def.max_iter", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/_ica_def/w_init", + "name": "w_init", + "qname": "sklearn.decomposition._fastica._ica_def.w_init", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Deflationary FastICA using fun approx to neg-entropy function\n\nUsed internally by FastICA.", + "docstring": "Deflationary FastICA using fun approx to neg-entropy function\n\nUsed internally by FastICA.", + "code": "def _ica_def(X, tol, g, fun_args, max_iter, w_init):\n \"\"\"Deflationary FastICA using fun approx to neg-entropy function\n\n Used internally by FastICA.\n \"\"\"\n\n n_components = w_init.shape[0]\n W = np.zeros((n_components, n_components), dtype=X.dtype)\n n_iter = []\n\n # j is the index of the extracted component\n for j in range(n_components):\n w = w_init[j, :].copy()\n w /= np.sqrt((w ** 2).sum())\n\n for i in range(max_iter):\n gwtx, g_wtx = g(np.dot(w.T, X), fun_args)\n\n w1 = (X * gwtx).mean(axis=1) - g_wtx.mean() * w\n\n _gs_decorrelation(w1, W, j)\n\n w1 /= np.sqrt((w1 ** 2).sum())\n\n lim = np.abs(np.abs((w1 * w).sum()) - 1)\n w = w1\n if lim < tol:\n break\n\n n_iter.append(i + 1)\n W[j, :] = w\n\n return W, max(n_iter)" + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/_ica_par", + "name": "_ica_par", + "qname": "sklearn.decomposition._fastica._ica_par", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._fastica/_ica_par/X", + "name": "X", + "qname": "sklearn.decomposition._fastica._ica_par.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/_ica_par/tol", + "name": "tol", + "qname": "sklearn.decomposition._fastica._ica_par.tol", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/_ica_par/g", + "name": "g", + "qname": "sklearn.decomposition._fastica._ica_par.g", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/_ica_par/fun_args", + "name": "fun_args", + "qname": "sklearn.decomposition._fastica._ica_par.fun_args", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/_ica_par/max_iter", + "name": "max_iter", + "qname": "sklearn.decomposition._fastica._ica_par.max_iter", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/_ica_par/w_init", + "name": "w_init", + "qname": "sklearn.decomposition._fastica._ica_par.w_init", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Parallel FastICA.\n\nUsed internally by FastICA --main loop", + "docstring": "Parallel FastICA.\n\nUsed internally by FastICA --main loop", + "code": "def _ica_par(X, tol, g, fun_args, max_iter, w_init):\n \"\"\"Parallel FastICA.\n\n Used internally by FastICA --main loop\n\n \"\"\"\n W = _sym_decorrelation(w_init)\n del w_init\n p_ = float(X.shape[1])\n for ii in range(max_iter):\n gwtx, g_wtx = g(np.dot(W, X), fun_args)\n W1 = _sym_decorrelation(np.dot(gwtx, X.T) / p_\n - g_wtx[:, np.newaxis] * W)\n del gwtx, g_wtx\n # builtin max, abs are faster than numpy counter parts.\n lim = max(abs(abs(np.diag(np.dot(W1, W.T))) - 1))\n W = W1\n if lim < tol:\n break\n else:\n warnings.warn('FastICA did not converge. Consider increasing '\n 'tolerance or the maximum number of iterations.',\n ConvergenceWarning)\n\n return W, ii + 1" + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/_logcosh", + "name": "_logcosh", + "qname": "sklearn.decomposition._fastica._logcosh", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._fastica/_logcosh/x", + "name": "x", + "qname": "sklearn.decomposition._fastica._logcosh.x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/_logcosh/fun_args", + "name": "fun_args", + "qname": "sklearn.decomposition._fastica._logcosh.fun_args", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _logcosh(x, fun_args=None):\n alpha = fun_args.get('alpha', 1.0) # comment it out?\n\n x *= alpha\n gx = np.tanh(x, x) # apply the tanh inplace\n g_x = np.empty(x.shape[0])\n # XXX compute in chunks to avoid extra allocation\n for i, gx_i in enumerate(gx): # please don't vectorize.\n g_x[i] = (alpha * (1 - gx_i ** 2)).mean()\n return gx, g_x" + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/_sym_decorrelation", + "name": "_sym_decorrelation", + "qname": "sklearn.decomposition._fastica._sym_decorrelation", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._fastica/_sym_decorrelation/W", + "name": "W", + "qname": "sklearn.decomposition._fastica._sym_decorrelation.W", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Symmetric decorrelation\ni.e. W <- (W * W.T) ^{-1/2} * W", + "docstring": "Symmetric decorrelation\ni.e. W <- (W * W.T) ^{-1/2} * W", + "code": "def _sym_decorrelation(W):\n \"\"\" Symmetric decorrelation\n i.e. W <- (W * W.T) ^{-1/2} * W\n \"\"\"\n s, u = linalg.eigh(np.dot(W, W.T))\n # u (resp. s) contains the eigenvectors (resp. square roots of\n # the eigenvalues) of W * W.T\n return np.linalg.multi_dot([u * (1. / np.sqrt(s)), u.T, W])" + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/fastica", + "name": "fastica", + "qname": "sklearn.decomposition._fastica.fastica", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._fastica/fastica/X", + "name": "X", + "qname": "sklearn.decomposition._fastica.fastica.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/fastica/n_components", + "name": "n_components", + "qname": "sklearn.decomposition._fastica.fastica.n_components", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of components to extract. If None no dimension reduction\nis performed." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/fastica/algorithm", + "name": "algorithm", + "qname": "sklearn.decomposition._fastica.fastica.algorithm", + "default_value": "'parallel'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'parallel', 'deflation'}", + "default_value": "'parallel'", + "description": "Apply a parallel or deflational FASTICA algorithm." + }, + "type": { + "kind": "EnumType", + "values": ["deflation", "parallel"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/fastica/whiten", + "name": "whiten", + "qname": "sklearn.decomposition._fastica.fastica.whiten", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True perform an initial whitening of the data.\nIf False, the data is assumed to have already been\npreprocessed: it should be centered, normed and white.\nOtherwise you will get incorrect results.\nIn this case the parameter n_components will be ignored." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/fastica/fun", + "name": "fun", + "qname": "sklearn.decomposition._fastica.fastica.fun", + "default_value": "'logcosh'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'logcosh', 'exp', 'cube'} or callable", + "default_value": "'logcosh'", + "description": "The functional form of the G function used in the\napproximation to neg-entropy. Could be either 'logcosh', 'exp',\nor 'cube'.\nYou can also provide your own function. It should return a tuple\ncontaining the value of the function, and of its derivative, in the\npoint. The derivative should be averaged along its last dimension.\nExample:\n\ndef my_g(x):\n return x ** 3, np.mean(3 * x ** 2, axis=-1)" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["exp", "cube", "logcosh"] + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/fastica/fun_args", + "name": "fun_args", + "qname": "sklearn.decomposition._fastica.fastica.fun_args", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Arguments to send to the functional form.\nIf empty or None and if fun='logcosh', fun_args will take value\n{'alpha' : 1.0}" + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/fastica/max_iter", + "name": "max_iter", + "qname": "sklearn.decomposition._fastica.fastica.max_iter", + "default_value": "200", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "200", + "description": "Maximum number of iterations to perform." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/fastica/tol", + "name": "tol", + "qname": "sklearn.decomposition._fastica.fastica.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-04", + "description": "A positive scalar giving the tolerance at which the\nun-mixing matrix is considered to have converged." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/fastica/w_init", + "name": "w_init", + "qname": "sklearn.decomposition._fastica.fastica.w_init", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_components, n_components)", + "default_value": "None", + "description": "Initial un-mixing array of dimension (n.comp,n.comp).\nIf None (default) then an array of normal r.v.'s is used." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_components, n_components)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/fastica/random_state", + "name": "random_state", + "qname": "sklearn.decomposition._fastica.fastica.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Used to initialize ``w_init`` when not specified, with a\nnormal distribution. Pass an int, for reproducible results\nacross multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/fastica/return_X_mean", + "name": "return_X_mean", + "qname": "sklearn.decomposition._fastica.fastica.return_X_mean", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, X_mean is returned too." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/fastica/compute_sources", + "name": "compute_sources", + "qname": "sklearn.decomposition._fastica.fastica.compute_sources", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If False, sources are not computed, but only the rotation matrix.\nThis can save memory when working with big data. Defaults to True." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._fastica/fastica/return_n_iter", + "name": "return_n_iter", + "qname": "sklearn.decomposition._fastica.fastica.return_n_iter", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether or not to return the number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform Fast Independent Component Analysis.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Perform Fast Independent Component Analysis.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\nn_components : int, default=None\n Number of components to extract. If None no dimension reduction\n is performed.\n\nalgorithm : {'parallel', 'deflation'}, default='parallel'\n Apply a parallel or deflational FASTICA algorithm.\n\nwhiten : bool, default=True\n If True perform an initial whitening of the data.\n If False, the data is assumed to have already been\n preprocessed: it should be centered, normed and white.\n Otherwise you will get incorrect results.\n In this case the parameter n_components will be ignored.\n\nfun : {'logcosh', 'exp', 'cube'} or callable, default='logcosh'\n The functional form of the G function used in the\n approximation to neg-entropy. Could be either 'logcosh', 'exp',\n or 'cube'.\n You can also provide your own function. It should return a tuple\n containing the value of the function, and of its derivative, in the\n point. The derivative should be averaged along its last dimension.\n Example:\n\n def my_g(x):\n return x ** 3, np.mean(3 * x ** 2, axis=-1)\n\nfun_args : dict, default=None\n Arguments to send to the functional form.\n If empty or None and if fun='logcosh', fun_args will take value\n {'alpha' : 1.0}\n\nmax_iter : int, default=200\n Maximum number of iterations to perform.\n\ntol : float, default=1e-04\n A positive scalar giving the tolerance at which the\n un-mixing matrix is considered to have converged.\n\nw_init : ndarray of shape (n_components, n_components), default=None\n Initial un-mixing array of dimension (n.comp,n.comp).\n If None (default) then an array of normal r.v.'s is used.\n\nrandom_state : int, RandomState instance or None, default=None\n Used to initialize ``w_init`` when not specified, with a\n normal distribution. Pass an int, for reproducible results\n across multiple function calls.\n See :term:`Glossary `.\n\nreturn_X_mean : bool, default=False\n If True, X_mean is returned too.\n\ncompute_sources : bool, default=True\n If False, sources are not computed, but only the rotation matrix.\n This can save memory when working with big data. Defaults to True.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\nReturns\n-------\nK : ndarray of shape (n_components, n_features) or None\n If whiten is 'True', K is the pre-whitening matrix that projects data\n onto the first n_components principal components. If whiten is 'False',\n K is 'None'.\n\nW : ndarray of shape (n_components, n_components)\n The square matrix that unmixes the data after whitening.\n The mixing matrix is the pseudo-inverse of matrix ``W K``\n if K is not None, else it is the inverse of W.\n\nS : ndarray of shape (n_samples, n_components) or None\n Estimated source matrix\n\nX_mean : ndarray of shape (n_features,)\n The mean over features. Returned only if return_X_mean is True.\n\nn_iter : int\n If the algorithm is \"deflation\", n_iter is the\n maximum number of iterations run across all components. Else\n they are just the number of iterations taken to converge. This is\n returned only when return_n_iter is set to `True`.\n\nNotes\n-----\n\nThe data matrix X is considered to be a linear combination of\nnon-Gaussian (independent) components i.e. X = AS where columns of S\ncontain the independent components and A is a linear mixing\nmatrix. In short ICA attempts to `un-mix' the data by estimating an\nun-mixing matrix W where ``S = W K X.``\nWhile FastICA was proposed to estimate as many sources\nas features, it is possible to estimate less by setting\nn_components < n_features. It this case K is not a square matrix\nand the estimated A is the pseudo-inverse of ``W K``.\n\nThis implementation was originally made for data of shape\n[n_features, n_samples]. Now the input is transposed\nbefore the algorithm is applied. This makes it slightly\nfaster for Fortran-ordered input.\n\nImplemented using FastICA:\n*A. Hyvarinen and E. Oja, Independent Component Analysis:\nAlgorithms and Applications, Neural Networks, 13(4-5), 2000,\npp. 411-430*", + "code": "@_deprecate_positional_args\ndef fastica(X, n_components=None, *, algorithm=\"parallel\", whiten=True,\n fun=\"logcosh\", fun_args=None, max_iter=200, tol=1e-04, w_init=None,\n random_state=None, return_X_mean=False, compute_sources=True,\n return_n_iter=False):\n \"\"\"Perform Fast Independent Component Analysis.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\n n_components : int, default=None\n Number of components to extract. If None no dimension reduction\n is performed.\n\n algorithm : {'parallel', 'deflation'}, default='parallel'\n Apply a parallel or deflational FASTICA algorithm.\n\n whiten : bool, default=True\n If True perform an initial whitening of the data.\n If False, the data is assumed to have already been\n preprocessed: it should be centered, normed and white.\n Otherwise you will get incorrect results.\n In this case the parameter n_components will be ignored.\n\n fun : {'logcosh', 'exp', 'cube'} or callable, default='logcosh'\n The functional form of the G function used in the\n approximation to neg-entropy. Could be either 'logcosh', 'exp',\n or 'cube'.\n You can also provide your own function. It should return a tuple\n containing the value of the function, and of its derivative, in the\n point. The derivative should be averaged along its last dimension.\n Example:\n\n def my_g(x):\n return x ** 3, np.mean(3 * x ** 2, axis=-1)\n\n fun_args : dict, default=None\n Arguments to send to the functional form.\n If empty or None and if fun='logcosh', fun_args will take value\n {'alpha' : 1.0}\n\n max_iter : int, default=200\n Maximum number of iterations to perform.\n\n tol : float, default=1e-04\n A positive scalar giving the tolerance at which the\n un-mixing matrix is considered to have converged.\n\n w_init : ndarray of shape (n_components, n_components), default=None\n Initial un-mixing array of dimension (n.comp,n.comp).\n If None (default) then an array of normal r.v.'s is used.\n\n random_state : int, RandomState instance or None, default=None\n Used to initialize ``w_init`` when not specified, with a\n normal distribution. Pass an int, for reproducible results\n across multiple function calls.\n See :term:`Glossary `.\n\n return_X_mean : bool, default=False\n If True, X_mean is returned too.\n\n compute_sources : bool, default=True\n If False, sources are not computed, but only the rotation matrix.\n This can save memory when working with big data. Defaults to True.\n\n return_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\n Returns\n -------\n K : ndarray of shape (n_components, n_features) or None\n If whiten is 'True', K is the pre-whitening matrix that projects data\n onto the first n_components principal components. If whiten is 'False',\n K is 'None'.\n\n W : ndarray of shape (n_components, n_components)\n The square matrix that unmixes the data after whitening.\n The mixing matrix is the pseudo-inverse of matrix ``W K``\n if K is not None, else it is the inverse of W.\n\n S : ndarray of shape (n_samples, n_components) or None\n Estimated source matrix\n\n X_mean : ndarray of shape (n_features,)\n The mean over features. Returned only if return_X_mean is True.\n\n n_iter : int\n If the algorithm is \"deflation\", n_iter is the\n maximum number of iterations run across all components. Else\n they are just the number of iterations taken to converge. This is\n returned only when return_n_iter is set to `True`.\n\n Notes\n -----\n\n The data matrix X is considered to be a linear combination of\n non-Gaussian (independent) components i.e. X = AS where columns of S\n contain the independent components and A is a linear mixing\n matrix. In short ICA attempts to `un-mix' the data by estimating an\n un-mixing matrix W where ``S = W K X.``\n While FastICA was proposed to estimate as many sources\n as features, it is possible to estimate less by setting\n n_components < n_features. It this case K is not a square matrix\n and the estimated A is the pseudo-inverse of ``W K``.\n\n This implementation was originally made for data of shape\n [n_features, n_samples]. Now the input is transposed\n before the algorithm is applied. This makes it slightly\n faster for Fortran-ordered input.\n\n Implemented using FastICA:\n *A. Hyvarinen and E. Oja, Independent Component Analysis:\n Algorithms and Applications, Neural Networks, 13(4-5), 2000,\n pp. 411-430*\n\n \"\"\"\n\n est = FastICA(n_components=n_components, algorithm=algorithm,\n whiten=whiten, fun=fun, fun_args=fun_args,\n max_iter=max_iter, tol=tol, w_init=w_init,\n random_state=random_state)\n sources = est._fit(X, compute_sources=compute_sources)\n\n if whiten:\n if return_X_mean:\n if return_n_iter:\n return (est.whitening_, est._unmixing, sources, est.mean_,\n est.n_iter_)\n else:\n return est.whitening_, est._unmixing, sources, est.mean_\n else:\n if return_n_iter:\n return est.whitening_, est._unmixing, sources, est.n_iter_\n else:\n return est.whitening_, est._unmixing, sources\n\n else:\n if return_X_mean:\n if return_n_iter:\n return None, est._unmixing, sources, None, est.n_iter_\n else:\n return None, est._unmixing, sources, None\n else:\n if return_n_iter:\n return None, est._unmixing, sources, est.n_iter_\n else:\n return None, est._unmixing, sources" + }, + { + "id": "scikit-learn/sklearn.decomposition._incremental_pca/IncrementalPCA/__init__", + "name": "__init__", + "qname": "sklearn.decomposition._incremental_pca.IncrementalPCA.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._incremental_pca/IncrementalPCA/__init__/self", + "name": "self", + "qname": "sklearn.decomposition._incremental_pca.IncrementalPCA.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._incremental_pca/IncrementalPCA/__init__/n_components", + "name": "n_components", + "qname": "sklearn.decomposition._incremental_pca.IncrementalPCA.__init__.n_components", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of components to keep. If ``n_components`` is ``None``,\nthen ``n_components`` is set to ``min(n_samples, n_features)``." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._incremental_pca/IncrementalPCA/__init__/whiten", + "name": "whiten", + "qname": "sklearn.decomposition._incremental_pca.IncrementalPCA.__init__.whiten", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When True (False by default) the ``components_`` vectors are divided\nby ``n_samples`` times ``components_`` to ensure uncorrelated outputs\nwith unit component-wise variances.\n\nWhitening will remove some information from the transformed signal\n(the relative variance scales of the components) but can sometimes\nimprove the predictive accuracy of the downstream estimators by\nmaking data respect some hard-wired assumptions." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._incremental_pca/IncrementalPCA/__init__/copy", + "name": "copy", + "qname": "sklearn.decomposition._incremental_pca.IncrementalPCA.__init__.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If False, X will be overwritten. ``copy=False`` can be used to\nsave memory but is unsafe for general use." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._incremental_pca/IncrementalPCA/__init__/batch_size", + "name": "batch_size", + "qname": "sklearn.decomposition._incremental_pca.IncrementalPCA.__init__.batch_size", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of samples to use for each batch. Only used when calling\n``fit``. If ``batch_size`` is ``None``, then ``batch_size``\nis inferred from the data and set to ``5 * n_features``, to provide a\nbalance between approximation accuracy and memory consumption." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Incremental principal components analysis (IPCA).\n\nLinear dimensionality reduction using Singular Value Decomposition of\nthe data, keeping only the most significant singular vectors to\nproject the data to a lower dimensional space. The input data is centered\nbut not scaled for each feature before applying the SVD.\n\nDepending on the size of the input data, this algorithm can be much more\nmemory efficient than a PCA, and allows sparse input.\n\nThis algorithm has constant memory complexity, on the order\nof ``batch_size * n_features``, enabling use of np.memmap files without\nloading the entire file into memory. For sparse matrices, the input\nis converted to dense in batches (in order to be able to subtract the\nmean) which avoids storing the entire dense matrix at any one time.\n\nThe computational overhead of each SVD is\n``O(batch_size * n_features ** 2)``, but only 2 * batch_size samples\nremain in memory at a time. There will be ``n_samples / batch_size`` SVD\ncomputations to get the principal components, versus 1 large SVD of\ncomplexity ``O(n_samples * n_features ** 2)`` for PCA.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.16", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_components=None, *, whiten=False, copy=True,\n batch_size=None):\n self.n_components = n_components\n self.whiten = whiten\n self.copy = copy\n self.batch_size = batch_size" + }, + { + "id": "scikit-learn/sklearn.decomposition._incremental_pca/IncrementalPCA/fit", + "name": "fit", + "qname": "sklearn.decomposition._incremental_pca.IncrementalPCA.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._incremental_pca/IncrementalPCA/fit/self", + "name": "self", + "qname": "sklearn.decomposition._incremental_pca.IncrementalPCA.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._incremental_pca/IncrementalPCA/fit/X", + "name": "X", + "qname": "sklearn.decomposition._incremental_pca.IncrementalPCA.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._incremental_pca/IncrementalPCA/fit/y", + "name": "y", + "qname": "sklearn.decomposition._incremental_pca.IncrementalPCA.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model with X, using minibatches of size batch_size.", + "docstring": "Fit the model with X, using minibatches of size batch_size.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the instance itself.", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the model with X, using minibatches of size batch_size.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : Ignored\n\n Returns\n -------\n self : object\n Returns the instance itself.\n \"\"\"\n self.components_ = None\n self.n_samples_seen_ = 0\n self.mean_ = .0\n self.var_ = .0\n self.singular_values_ = None\n self.explained_variance_ = None\n self.explained_variance_ratio_ = None\n self.noise_variance_ = None\n\n X = self._validate_data(X, accept_sparse=['csr', 'csc', 'lil'],\n copy=self.copy, dtype=[np.float64, np.float32])\n n_samples, n_features = X.shape\n\n if self.batch_size is None:\n self.batch_size_ = 5 * n_features\n else:\n self.batch_size_ = self.batch_size\n\n for batch in gen_batches(n_samples, self.batch_size_,\n min_batch_size=self.n_components or 0):\n X_batch = X[batch]\n if sparse.issparse(X_batch):\n X_batch = X_batch.toarray()\n self.partial_fit(X_batch, check_input=False)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.decomposition._incremental_pca/IncrementalPCA/partial_fit", + "name": "partial_fit", + "qname": "sklearn.decomposition._incremental_pca.IncrementalPCA.partial_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._incremental_pca/IncrementalPCA/partial_fit/self", + "name": "self", + "qname": "sklearn.decomposition._incremental_pca.IncrementalPCA.partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._incremental_pca/IncrementalPCA/partial_fit/X", + "name": "X", + "qname": "sklearn.decomposition._incremental_pca.IncrementalPCA.partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._incremental_pca/IncrementalPCA/partial_fit/y", + "name": "y", + "qname": "sklearn.decomposition._incremental_pca.IncrementalPCA.partial_fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._incremental_pca/IncrementalPCA/partial_fit/check_input", + "name": "check_input", + "qname": "sklearn.decomposition._incremental_pca.IncrementalPCA.partial_fit.check_input", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Run check_array on X." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Incremental fit with X. All of X is processed as a single batch.", + "docstring": "Incremental fit with X. All of X is processed as a single batch.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples and\n n_features is the number of features.\n\ncheck_input : bool, default=True\n Run check_array on X.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the instance itself.", + "code": " def partial_fit(self, X, y=None, check_input=True):\n \"\"\"Incremental fit with X. All of X is processed as a single batch.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples and\n n_features is the number of features.\n\n check_input : bool, default=True\n Run check_array on X.\n\n y : Ignored\n\n Returns\n -------\n self : object\n Returns the instance itself.\n \"\"\"\n first_pass = not hasattr(self, \"components_\")\n if check_input:\n if sparse.issparse(X):\n raise TypeError(\n \"IncrementalPCA.partial_fit does not support \"\n \"sparse input. Either convert data to dense \"\n \"or use IncrementalPCA.fit to do so in batches.\")\n X = self._validate_data(\n X, copy=self.copy, dtype=[np.float64, np.float32],\n reset=first_pass)\n n_samples, n_features = X.shape\n if first_pass:\n self.components_ = None\n\n if self.n_components is None:\n if self.components_ is None:\n self.n_components_ = min(n_samples, n_features)\n else:\n self.n_components_ = self.components_.shape[0]\n elif not 1 <= self.n_components <= n_features:\n raise ValueError(\"n_components=%r invalid for n_features=%d, need \"\n \"more rows than columns for IncrementalPCA \"\n \"processing\" % (self.n_components, n_features))\n elif not self.n_components <= n_samples:\n raise ValueError(\"n_components=%r must be less or equal to \"\n \"the batch number of samples \"\n \"%d.\" % (self.n_components, n_samples))\n else:\n self.n_components_ = self.n_components\n\n if (self.components_ is not None) and (self.components_.shape[0] !=\n self.n_components_):\n raise ValueError(\"Number of input features has changed from %i \"\n \"to %i between calls to partial_fit! Try \"\n \"setting n_components to a fixed value.\" %\n (self.components_.shape[0], self.n_components_))\n\n # This is the first partial_fit\n if not hasattr(self, 'n_samples_seen_'):\n self.n_samples_seen_ = 0\n self.mean_ = .0\n self.var_ = .0\n\n # Update stats - they are 0 if this is the first step\n col_mean, col_var, n_total_samples = \\\n _incremental_mean_and_var(\n X, last_mean=self.mean_, last_variance=self.var_,\n last_sample_count=np.repeat(self.n_samples_seen_, X.shape[1]))\n n_total_samples = n_total_samples[0]\n\n # Whitening\n if self.n_samples_seen_ == 0:\n # If it is the first step, simply whiten X\n X -= col_mean\n else:\n col_batch_mean = np.mean(X, axis=0)\n X -= col_batch_mean\n # Build matrix of combined previous basis and new data\n mean_correction = \\\n np.sqrt((self.n_samples_seen_ / n_total_samples) *\n n_samples) * (self.mean_ - col_batch_mean)\n X = np.vstack((self.singular_values_.reshape((-1, 1)) *\n self.components_, X, mean_correction))\n\n U, S, Vt = linalg.svd(X, full_matrices=False, check_finite=False)\n U, Vt = svd_flip(U, Vt, u_based_decision=False)\n explained_variance = S ** 2 / (n_total_samples - 1)\n explained_variance_ratio = S ** 2 / np.sum(col_var * n_total_samples)\n\n self.n_samples_seen_ = n_total_samples\n self.components_ = Vt[:self.n_components_]\n self.singular_values_ = S[:self.n_components_]\n self.mean_ = col_mean\n self.var_ = col_var\n self.explained_variance_ = explained_variance[:self.n_components_]\n self.explained_variance_ratio_ = \\\n explained_variance_ratio[:self.n_components_]\n if self.n_components_ < n_features:\n self.noise_variance_ = \\\n explained_variance[self.n_components_:].mean()\n else:\n self.noise_variance_ = 0.\n return self" + }, + { + "id": "scikit-learn/sklearn.decomposition._incremental_pca/IncrementalPCA/transform", + "name": "transform", + "qname": "sklearn.decomposition._incremental_pca.IncrementalPCA.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._incremental_pca/IncrementalPCA/transform/self", + "name": "self", + "qname": "sklearn.decomposition._incremental_pca.IncrementalPCA.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._incremental_pca/IncrementalPCA/transform/X", + "name": "X", + "qname": "sklearn.decomposition._incremental_pca.IncrementalPCA.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "New data, where n_samples is the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Apply dimensionality reduction to X.\n\nX is projected on the first principal components previously extracted\nfrom a training set, using minibatches of size batch_size if X is\nsparse.", + "docstring": "Apply dimensionality reduction to X.\n\nX is projected on the first principal components previously extracted\nfrom a training set, using minibatches of size batch_size if X is\nsparse.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data, where n_samples is the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n\nExamples\n--------\n\n>>> import numpy as np\n>>> from sklearn.decomposition import IncrementalPCA\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2],\n... [1, 1], [2, 1], [3, 2]])\n>>> ipca = IncrementalPCA(n_components=2, batch_size=3)\n>>> ipca.fit(X)\nIncrementalPCA(batch_size=3, n_components=2)\n>>> ipca.transform(X) # doctest: +SKIP", + "code": " def transform(self, X):\n \"\"\"Apply dimensionality reduction to X.\n\n X is projected on the first principal components previously extracted\n from a training set, using minibatches of size batch_size if X is\n sparse.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data, where n_samples is the number of samples\n and n_features is the number of features.\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n\n Examples\n --------\n\n >>> import numpy as np\n >>> from sklearn.decomposition import IncrementalPCA\n >>> X = np.array([[-1, -1], [-2, -1], [-3, -2],\n ... [1, 1], [2, 1], [3, 2]])\n >>> ipca = IncrementalPCA(n_components=2, batch_size=3)\n >>> ipca.fit(X)\n IncrementalPCA(batch_size=3, n_components=2)\n >>> ipca.transform(X) # doctest: +SKIP\n \"\"\"\n if sparse.issparse(X):\n n_samples = X.shape[0]\n output = []\n for batch in gen_batches(n_samples, self.batch_size_,\n min_batch_size=self.n_components or 0):\n output.append(super().transform(X[batch].toarray()))\n return np.vstack(output)\n else:\n return super().transform(X)" + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/__init__", + "name": "__init__", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/__init__/self", + "name": "self", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/__init__/n_components", + "name": "n_components", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.__init__.n_components", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of components. If None, all non-zero components are kept." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/__init__/kernel", + "name": "kernel", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.__init__.kernel", + "default_value": "'linear'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed'}", + "default_value": "'linear'", + "description": "Kernel used for PCA." + }, + "type": { + "kind": "EnumType", + "values": ["sigmoid", "cosine", "rbf", "linear", "precomputed", "poly"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/__init__/gamma", + "name": "gamma", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.__init__.gamma", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other\nkernels. If ``gamma`` is ``None``, then it is set to ``1/n_features``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/__init__/degree", + "name": "degree", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.__init__.degree", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "3", + "description": "Degree for poly kernels. Ignored by other kernels." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/__init__/coef0", + "name": "coef0", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.__init__.coef0", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1", + "description": "Independent term in poly and sigmoid kernels.\nIgnored by other kernels." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/__init__/kernel_params", + "name": "kernel_params", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.__init__.kernel_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Parameters (keyword arguments) and\nvalues for kernel passed as callable object.\nIgnored by other kernels." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/__init__/alpha", + "name": "alpha", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.__init__.alpha", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Hyperparameter of the ridge regression that learns the\ninverse transform (when fit_inverse_transform=True)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/__init__/fit_inverse_transform", + "name": "fit_inverse_transform", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.__init__.fit_inverse_transform", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Learn the inverse transform for non-precomputed kernels.\n(i.e. learn to find the pre-image of a point)" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/__init__/eigen_solver", + "name": "eigen_solver", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.__init__.eigen_solver", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'dense', 'arpack'}", + "default_value": "'auto'", + "description": "Select eigensolver to use. If n_components is much less than\nthe number of training samples, arpack may be more efficient\nthan the dense eigensolver." + }, + "type": { + "kind": "EnumType", + "values": ["dense", "auto", "arpack"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/__init__/tol", + "name": "tol", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.__init__.tol", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0", + "description": "Convergence tolerance for arpack.\nIf 0, optimal value will be chosen by arpack." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.__init__.max_iter", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Maximum number of iterations for arpack.\nIf None, optimal value will be chosen by arpack." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/__init__/remove_zero_eig", + "name": "remove_zero_eig", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.__init__.remove_zero_eig", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, then all components with zero eigenvalues are removed, so\nthat the number of components in the output may be < n_components\n(and sometimes even zero due to numerical instability).\nWhen n_components is None, this parameter is ignored and components\nwith zero eigenvalues are removed regardless." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/__init__/random_state", + "name": "random_state", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Used when ``eigen_solver`` == 'arpack'. Pass an int for reproducible\nresults across multiple function calls.\nSee :term:`Glossary `.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/__init__/copy_X", + "name": "copy_X", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.__init__.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, input X is copied and stored by the model in the `X_fit_`\nattribute. If no further changes will be done to X, setting\n`copy_X=False` saves memory by storing a reference.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of parallel jobs to run.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Kernel Principal component analysis (KPCA).\n\nNon-linear dimensionality reduction through the use of kernels (see\n:ref:`metrics`).\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_components=None, *, kernel=\"linear\",\n gamma=None, degree=3, coef0=1, kernel_params=None,\n alpha=1.0, fit_inverse_transform=False, eigen_solver='auto',\n tol=0, max_iter=None, remove_zero_eig=False,\n random_state=None, copy_X=True, n_jobs=None):\n if fit_inverse_transform and kernel == 'precomputed':\n raise ValueError(\n \"Cannot fit_inverse_transform with a precomputed kernel.\")\n self.n_components = n_components\n self.kernel = kernel\n self.kernel_params = kernel_params\n self.gamma = gamma\n self.degree = degree\n self.coef0 = coef0\n self.alpha = alpha\n self.fit_inverse_transform = fit_inverse_transform\n self.eigen_solver = eigen_solver\n self.remove_zero_eig = remove_zero_eig\n self.tol = tol\n self.max_iter = max_iter\n self.random_state = random_state\n self.n_jobs = n_jobs\n self.copy_X = copy_X" + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/_fit_inverse_transform", + "name": "_fit_inverse_transform", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA._fit_inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/_fit_inverse_transform/self", + "name": "self", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA._fit_inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/_fit_inverse_transform/X_transformed", + "name": "X_transformed", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA._fit_inverse_transform.X_transformed", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/_fit_inverse_transform/X", + "name": "X", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA._fit_inverse_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _fit_inverse_transform(self, X_transformed, X):\n if hasattr(X, \"tocsr\"):\n raise NotImplementedError(\"Inverse transform not implemented for \"\n \"sparse matrices!\")\n\n n_samples = X_transformed.shape[0]\n K = self._get_kernel(X_transformed)\n K.flat[::n_samples + 1] += self.alpha\n self.dual_coef_ = linalg.solve(K, X, sym_pos=True, overwrite_a=True)\n self.X_transformed_fit_ = X_transformed" + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/_fit_transform", + "name": "_fit_transform", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA._fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/_fit_transform/self", + "name": "self", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA._fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/_fit_transform/K", + "name": "K", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA._fit_transform.K", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit's using kernel K", + "docstring": "Fit's using kernel K", + "code": " def _fit_transform(self, K):\n \"\"\" Fit's using kernel K\"\"\"\n # center kernel\n K = self._centerer.fit_transform(K)\n\n if self.n_components is None:\n n_components = K.shape[0]\n else:\n n_components = min(K.shape[0], self.n_components)\n\n # compute eigenvectors\n if self.eigen_solver == 'auto':\n if K.shape[0] > 200 and n_components < 10:\n eigen_solver = 'arpack'\n else:\n eigen_solver = 'dense'\n else:\n eigen_solver = self.eigen_solver\n\n if eigen_solver == 'dense':\n self.lambdas_, self.alphas_ = linalg.eigh(\n K, eigvals=(K.shape[0] - n_components, K.shape[0] - 1))\n elif eigen_solver == 'arpack':\n v0 = _init_arpack_v0(K.shape[0], self.random_state)\n self.lambdas_, self.alphas_ = eigsh(K, n_components,\n which=\"LA\",\n tol=self.tol,\n maxiter=self.max_iter,\n v0=v0)\n\n # make sure that the eigenvalues are ok and fix numerical issues\n self.lambdas_ = _check_psd_eigenvalues(self.lambdas_,\n enable_warnings=False)\n\n # flip eigenvectors' sign to enforce deterministic output\n self.alphas_, _ = svd_flip(self.alphas_,\n np.zeros_like(self.alphas_).T)\n\n # sort eigenvectors in descending order\n indices = self.lambdas_.argsort()[::-1]\n self.lambdas_ = self.lambdas_[indices]\n self.alphas_ = self.alphas_[:, indices]\n\n # remove eigenvectors with a zero eigenvalue (null space) if required\n if self.remove_zero_eig or self.n_components is None:\n self.alphas_ = self.alphas_[:, self.lambdas_ > 0]\n self.lambdas_ = self.lambdas_[self.lambdas_ > 0]\n\n # Maintenance note on Eigenvectors normalization\n # ----------------------------------------------\n # there is a link between\n # the eigenvectors of K=Phi(X)'Phi(X) and the ones of Phi(X)Phi(X)'\n # if v is an eigenvector of K\n # then Phi(X)v is an eigenvector of Phi(X)Phi(X)'\n # if u is an eigenvector of Phi(X)Phi(X)'\n # then Phi(X)'u is an eigenvector of Phi(X)'Phi(X)\n #\n # At this stage our self.alphas_ (the v) have norm 1, we need to scale\n # them so that eigenvectors in kernel feature space (the u) have norm=1\n # instead\n #\n # We COULD scale them here:\n # self.alphas_ = self.alphas_ / np.sqrt(self.lambdas_)\n #\n # But choose to perform that LATER when needed, in `fit()` and in\n # `transform()`.\n\n return K" + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/_get_kernel", + "name": "_get_kernel", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA._get_kernel", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/_get_kernel/self", + "name": "self", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA._get_kernel.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/_get_kernel/X", + "name": "X", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA._get_kernel.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/_get_kernel/Y", + "name": "Y", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA._get_kernel.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_kernel(self, X, Y=None):\n if callable(self.kernel):\n params = self.kernel_params or {}\n else:\n params = {\"gamma\": self.gamma,\n \"degree\": self.degree,\n \"coef0\": self.coef0}\n return pairwise_kernels(X, Y, metric=self.kernel,\n filter_params=True, n_jobs=self.n_jobs,\n **params)" + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/_more_tags", + "name": "_more_tags", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/_more_tags/self", + "name": "self", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'preserves_dtype': [np.float64, np.float32],\n 'pairwise': self.kernel == 'precomputed'}" + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/_pairwise@getter", + "name": "_pairwise", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA._pairwise", + "decorators": [ + "deprecated('Attribute _pairwise was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/_pairwise/self", + "name": "self", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA._pairwise.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n return self.kernel == \"precomputed\"" + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/fit", + "name": "fit", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/fit/self", + "name": "self", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/fit/X", + "name": "X", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples in the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/fit/y", + "name": "y", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model from data in X.", + "docstring": "Fit the model from data in X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nself : object\n Returns the instance itself.", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the model from data in X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\n Returns\n -------\n self : object\n Returns the instance itself.\n \"\"\"\n X = self._validate_data(X, accept_sparse='csr', copy=self.copy_X)\n self._centerer = KernelCenterer()\n K = self._get_kernel(X)\n self._fit_transform(K)\n\n if self.fit_inverse_transform:\n # no need to use the kernel to transform X, use shortcut expression\n X_transformed = self.alphas_ * np.sqrt(self.lambdas_)\n\n self._fit_inverse_transform(X_transformed, X)\n\n self.X_fit_ = X\n return self" + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/fit_transform", + "name": "fit_transform", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/fit_transform/self", + "name": "self", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/fit_transform/X", + "name": "X", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples in the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/fit_transform/y", + "name": "y", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/fit_transform/params", + "name": "params", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.fit_transform.params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model from data in X and transform X.", + "docstring": "Fit the model from data in X and transform X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)", + "code": " def fit_transform(self, X, y=None, **params):\n \"\"\"Fit the model from data in X and transform X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n \"\"\"\n self.fit(X, **params)\n\n # no need to use the kernel to transform X, use shortcut expression\n X_transformed = self.alphas_ * np.sqrt(self.lambdas_)\n\n if self.fit_inverse_transform:\n self._fit_inverse_transform(X_transformed, X)\n\n return X_transformed" + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/inverse_transform/self", + "name": "self", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/inverse_transform/X", + "name": "X", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.inverse_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_components)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_components)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform X back to original space.\n\n``inverse_transform`` approximates the inverse transformation using\na learned pre-image. The pre-image is learned by kernel ridge\nregression of the original data on their low-dimensional representation\nvectors.\n\n.. note:\n :meth:`~sklearn.decomposition.fit` internally uses a centered\n kernel. As the centered kernel no longer contains the information\n of the mean of kernel features, such information is not taken into\n account in reconstruction.\n\n.. note::\n When users want to compute inverse transformation for 'linear'\n kernel, it is recommended that they use\n :class:`~sklearn.decomposition.PCA` instead. Unlike\n :class:`~sklearn.decomposition.PCA`,\n :class:`~sklearn.decomposition.KernelPCA`'s ``inverse_transform``\n does not reconstruct the mean of data when 'linear' kernel is used\n due to the use of centered kernel.", + "docstring": "Transform X back to original space.\n\n``inverse_transform`` approximates the inverse transformation using\na learned pre-image. The pre-image is learned by kernel ridge\nregression of the original data on their low-dimensional representation\nvectors.\n\n.. note:\n :meth:`~sklearn.decomposition.fit` internally uses a centered\n kernel. As the centered kernel no longer contains the information\n of the mean of kernel features, such information is not taken into\n account in reconstruction.\n\n.. note::\n When users want to compute inverse transformation for 'linear'\n kernel, it is recommended that they use\n :class:`~sklearn.decomposition.PCA` instead. Unlike\n :class:`~sklearn.decomposition.PCA`,\n :class:`~sklearn.decomposition.KernelPCA`'s ``inverse_transform``\n does not reconstruct the mean of data when 'linear' kernel is used\n due to the use of centered kernel.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_components)\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_features)\n\nReferences\n----------\n\"Learning to Find Pre-Images\", G BakIr et al, 2004.", + "code": " def inverse_transform(self, X):\n \"\"\"Transform X back to original space.\n\n ``inverse_transform`` approximates the inverse transformation using\n a learned pre-image. The pre-image is learned by kernel ridge\n regression of the original data on their low-dimensional representation\n vectors.\n\n .. note:\n :meth:`~sklearn.decomposition.fit` internally uses a centered\n kernel. As the centered kernel no longer contains the information\n of the mean of kernel features, such information is not taken into\n account in reconstruction.\n\n .. note::\n When users want to compute inverse transformation for 'linear'\n kernel, it is recommended that they use\n :class:`~sklearn.decomposition.PCA` instead. Unlike\n :class:`~sklearn.decomposition.PCA`,\n :class:`~sklearn.decomposition.KernelPCA`'s ``inverse_transform``\n does not reconstruct the mean of data when 'linear' kernel is used\n due to the use of centered kernel.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_components)\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_features)\n\n References\n ----------\n \"Learning to Find Pre-Images\", G BakIr et al, 2004.\n \"\"\"\n if not self.fit_inverse_transform:\n raise NotFittedError(\"The fit_inverse_transform parameter was not\"\n \" set to True when instantiating and hence \"\n \"the inverse transform is not available.\")\n\n K = self._get_kernel(X, self.X_transformed_fit_)\n return np.dot(K, self.dual_coef_)" + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/transform", + "name": "transform", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/transform/self", + "name": "self", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._kernel_pca/KernelPCA/transform/X", + "name": "X", + "qname": "sklearn.decomposition._kernel_pca.KernelPCA.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform X.", + "docstring": "Transform X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)", + "code": " def transform(self, X):\n \"\"\"Transform X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n \"\"\"\n check_is_fitted(self)\n X = self._validate_data(X, accept_sparse='csr', reset=False)\n\n # Compute centered gram matrix between X and training data X_fit_\n K = self._centerer.transform(self._get_kernel(X, self.X_fit_))\n\n # scale eigenvectors (properly account for null-space for dot product)\n non_zeros = np.flatnonzero(self.lambdas_)\n scaled_alphas = np.zeros_like(self.alphas_)\n scaled_alphas[:, non_zeros] = (self.alphas_[:, non_zeros]\n / np.sqrt(self.lambdas_[non_zeros]))\n\n # Project with a scalar product between K and the scaled eigenvectors\n return np.dot(K, scaled_alphas)" + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/__init__", + "name": "__init__", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/__init__/self", + "name": "self", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/__init__/n_components", + "name": "n_components", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.__init__.n_components", + "default_value": "10", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Number of topics.\n\n.. versionchanged:: 0.19\n ``n_topics`` was renamed to ``n_components``" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/__init__/doc_topic_prior", + "name": "doc_topic_prior", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.__init__.doc_topic_prior", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Prior of document topic distribution `theta`. If the value is None,\ndefaults to `1 / n_components`.\nIn [1]_, this is called `alpha`." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/__init__/topic_word_prior", + "name": "topic_word_prior", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.__init__.topic_word_prior", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Prior of topic word distribution `beta`. If the value is None, defaults\nto `1 / n_components`.\nIn [1]_, this is called `eta`." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/__init__/learning_method", + "name": "learning_method", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.__init__.learning_method", + "default_value": "'batch'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'batch', 'online'}", + "default_value": "'batch'", + "description": "Method used to update `_component`. Only used in :meth:`fit` method.\nIn general, if the data size is large, the online update will be much\nfaster than the batch update.\n\nValid options::\n\n 'batch': Batch variational Bayes method. Use all training data in\n each EM update.\n Old `components_` will be overwritten in each iteration.\n 'online': Online variational Bayes method. In each EM update, use\n mini-batch of training data to update the ``components_``\n variable incrementally. The learning rate is controlled by the\n ``learning_decay`` and the ``learning_offset`` parameters.\n\n.. versionchanged:: 0.20\n The default learning method is now ``\"batch\"``." + }, + "type": { + "kind": "EnumType", + "values": ["batch", "online"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/__init__/learning_decay", + "name": "learning_decay", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.__init__.learning_decay", + "default_value": "0.7", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.7", + "description": "It is a parameter that control learning rate in the online learning\nmethod. The value should be set between (0.5, 1.0] to guarantee\nasymptotic convergence. When the value is 0.0 and batch_size is\n``n_samples``, the update method is same as batch learning. In the\nliterature, this is called kappa." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/__init__/learning_offset", + "name": "learning_offset", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.__init__.learning_offset", + "default_value": "10.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "10.", + "description": "A (positive) parameter that downweights early iterations in online\nlearning. It should be greater than 1.0. In the literature, this is\ncalled tau_0." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.__init__.max_iter", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "The maximum number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/__init__/batch_size", + "name": "batch_size", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.__init__.batch_size", + "default_value": "128", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "128", + "description": "Number of documents to use in each EM iteration. Only used in online\nlearning." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/__init__/evaluate_every", + "name": "evaluate_every", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.__init__.evaluate_every", + "default_value": "-1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "-1", + "description": "How often to evaluate perplexity. Only used in `fit` method.\nset it to 0 or negative number to not evaluate perplexity in\ntraining at all. Evaluating perplexity can help you check convergence\nin training process, but it will also increase total training time.\nEvaluating perplexity in every iteration might increase training time\nup to two-fold." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/__init__/total_samples", + "name": "total_samples", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.__init__.total_samples", + "default_value": "1000000.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1e6", + "description": "Total number of documents. Only used in the :meth:`partial_fit` method." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/__init__/perp_tol", + "name": "perp_tol", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.__init__.perp_tol", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-1", + "description": "Perplexity tolerance in batch learning. Only used when\n``evaluate_every`` is greater than 0." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/__init__/mean_change_tol", + "name": "mean_change_tol", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.__init__.mean_change_tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "Stopping tolerance for updating document topic distribution in E-step." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/__init__/max_doc_update_iter", + "name": "max_doc_update_iter", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.__init__.max_doc_update_iter", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Max number of iterations for updating document topic distribution in\nthe E-step." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to use in the E-step.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/__init__/verbose", + "name": "verbose", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Verbosity level." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/__init__/random_state", + "name": "random_state", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Pass an int for reproducible results across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Latent Dirichlet Allocation with online variational Bayes algorithm\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_components=10, *, doc_topic_prior=None,\n topic_word_prior=None, learning_method='batch',\n learning_decay=.7, learning_offset=10., max_iter=10,\n batch_size=128, evaluate_every=-1, total_samples=1e6,\n perp_tol=1e-1, mean_change_tol=1e-3, max_doc_update_iter=100,\n n_jobs=None, verbose=0, random_state=None):\n self.n_components = n_components\n self.doc_topic_prior = doc_topic_prior\n self.topic_word_prior = topic_word_prior\n self.learning_method = learning_method\n self.learning_decay = learning_decay\n self.learning_offset = learning_offset\n self.max_iter = max_iter\n self.batch_size = batch_size\n self.evaluate_every = evaluate_every\n self.total_samples = total_samples\n self.perp_tol = perp_tol\n self.mean_change_tol = mean_change_tol\n self.max_doc_update_iter = max_doc_update_iter\n self.n_jobs = n_jobs\n self.verbose = verbose\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_approx_bound", + "name": "_approx_bound", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._approx_bound", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_approx_bound/self", + "name": "self", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._approx_bound.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_approx_bound/X", + "name": "X", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._approx_bound.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Document word matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_approx_bound/doc_topic_distr", + "name": "doc_topic_distr", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._approx_bound.doc_topic_distr", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_components)", + "default_value": "", + "description": "Document topic distribution. In the literature, this is called\ngamma." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_components)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_approx_bound/sub_sampling", + "name": "sub_sampling", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._approx_bound.sub_sampling", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Compensate for subsampling of documents.\nIt is used in calculate bound in online learning." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate the variational bound.\n\nEstimate the variational bound over \"all documents\" using only the\ndocuments passed in as X. Since log-likelihood of each word cannot\nbe computed directly, we use this bound to estimate it.", + "docstring": "Estimate the variational bound.\n\nEstimate the variational bound over \"all documents\" using only the\ndocuments passed in as X. Since log-likelihood of each word cannot\nbe computed directly, we use this bound to estimate it.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\ndoc_topic_distr : ndarray of shape (n_samples, n_components)\n Document topic distribution. In the literature, this is called\n gamma.\n\nsub_sampling : bool, default=False\n Compensate for subsampling of documents.\n It is used in calculate bound in online learning.\n\nReturns\n-------\nscore : float", + "code": " def _approx_bound(self, X, doc_topic_distr, sub_sampling):\n \"\"\"Estimate the variational bound.\n\n Estimate the variational bound over \"all documents\" using only the\n documents passed in as X. Since log-likelihood of each word cannot\n be computed directly, we use this bound to estimate it.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\n doc_topic_distr : ndarray of shape (n_samples, n_components)\n Document topic distribution. In the literature, this is called\n gamma.\n\n sub_sampling : bool, default=False\n Compensate for subsampling of documents.\n It is used in calculate bound in online learning.\n\n Returns\n -------\n score : float\n\n \"\"\"\n\n def _loglikelihood(prior, distr, dirichlet_distr, size):\n # calculate log-likelihood\n score = np.sum((prior - distr) * dirichlet_distr)\n score += np.sum(gammaln(distr) - gammaln(prior))\n score += np.sum(gammaln(prior * size) - gammaln(np.sum(distr, 1)))\n return score\n\n is_sparse_x = sp.issparse(X)\n n_samples, n_components = doc_topic_distr.shape\n n_features = self.components_.shape[1]\n score = 0\n\n dirichlet_doc_topic = _dirichlet_expectation_2d(doc_topic_distr)\n dirichlet_component_ = _dirichlet_expectation_2d(self.components_)\n doc_topic_prior = self.doc_topic_prior_\n topic_word_prior = self.topic_word_prior_\n\n if is_sparse_x:\n X_data = X.data\n X_indices = X.indices\n X_indptr = X.indptr\n\n # E[log p(docs | theta, beta)]\n for idx_d in range(0, n_samples):\n if is_sparse_x:\n ids = X_indices[X_indptr[idx_d]:X_indptr[idx_d + 1]]\n cnts = X_data[X_indptr[idx_d]:X_indptr[idx_d + 1]]\n else:\n ids = np.nonzero(X[idx_d, :])[0]\n cnts = X[idx_d, ids]\n temp = (dirichlet_doc_topic[idx_d, :, np.newaxis]\n + dirichlet_component_[:, ids])\n norm_phi = logsumexp(temp, axis=0)\n score += np.dot(cnts, norm_phi)\n\n # compute E[log p(theta | alpha) - log q(theta | gamma)]\n score += _loglikelihood(doc_topic_prior, doc_topic_distr,\n dirichlet_doc_topic, self.n_components)\n\n # Compensate for the subsampling of the population of documents\n if sub_sampling:\n doc_ratio = float(self.total_samples) / n_samples\n score *= doc_ratio\n\n # E[log p(beta | eta) - log q (beta | lambda)]\n score += _loglikelihood(topic_word_prior, self.components_,\n dirichlet_component_, n_features)\n\n return score" + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_check_non_neg_array", + "name": "_check_non_neg_array", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._check_non_neg_array", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_check_non_neg_array/self", + "name": "self", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._check_non_neg_array.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_check_non_neg_array/X", + "name": "X", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._check_non_neg_array.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like or sparse matrix", + "default_value": "", + "description": "" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "sparse matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_check_non_neg_array/reset_n_features", + "name": "reset_n_features", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._check_non_neg_array.reset_n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_check_non_neg_array/whom", + "name": "whom", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._check_non_neg_array.whom", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "check X format\n\ncheck X format and make sure no negative value in X.", + "docstring": "check X format\n\ncheck X format and make sure no negative value in X.\n\nParameters\n----------\nX : array-like or sparse matrix", + "code": " def _check_non_neg_array(self, X, reset_n_features, whom):\n \"\"\"check X format\n\n check X format and make sure no negative value in X.\n\n Parameters\n ----------\n X : array-like or sparse matrix\n\n \"\"\"\n X = self._validate_data(X, reset=reset_n_features,\n accept_sparse='csr')\n check_non_negative(X, whom)\n return X" + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_check_params", + "name": "_check_params", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._check_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_check_params/self", + "name": "self", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._check_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check model parameters.", + "docstring": "Check model parameters.", + "code": " def _check_params(self):\n \"\"\"Check model parameters.\"\"\"\n if self.n_components <= 0:\n raise ValueError(\"Invalid 'n_components' parameter: %r\"\n % self.n_components)\n\n if self.total_samples <= 0:\n raise ValueError(\"Invalid 'total_samples' parameter: %r\"\n % self.total_samples)\n\n if self.learning_offset < 0:\n raise ValueError(\"Invalid 'learning_offset' parameter: %r\"\n % self.learning_offset)\n\n if self.learning_method not in (\"batch\", \"online\"):\n raise ValueError(\"Invalid 'learning_method' parameter: %r\"\n % self.learning_method)" + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_e_step", + "name": "_e_step", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._e_step", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_e_step/self", + "name": "self", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._e_step.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_e_step/X", + "name": "X", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._e_step.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Document word matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_e_step/cal_sstats", + "name": "cal_sstats", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._e_step.cal_sstats", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "Parameter that indicate whether to calculate sufficient statistics\nor not. Set ``cal_sstats`` to True when we need to run M-step." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_e_step/random_init", + "name": "random_init", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._e_step.random_init", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "Parameter that indicate whether to initialize document topic\ndistribution randomly in the E-step. Set it to True in training\nsteps." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_e_step/parallel", + "name": "parallel", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._e_step.parallel", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "joblib.Parallel", + "default_value": "None", + "description": "Pre-initialized instance of joblib.Parallel." + }, + "type": { + "kind": "NamedType", + "name": "joblib.Parallel" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "E-step in EM update.", + "docstring": "E-step in EM update.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\ncal_sstats : bool\n Parameter that indicate whether to calculate sufficient statistics\n or not. Set ``cal_sstats`` to True when we need to run M-step.\n\nrandom_init : bool\n Parameter that indicate whether to initialize document topic\n distribution randomly in the E-step. Set it to True in training\n steps.\n\nparallel : joblib.Parallel, default=None\n Pre-initialized instance of joblib.Parallel.\n\nReturns\n-------\n(doc_topic_distr, suff_stats) :\n `doc_topic_distr` is unnormalized topic distribution for each\n document. In the literature, this is called `gamma`.\n `suff_stats` is expected sufficient statistics for the M-step.\n When `cal_sstats == False`, it will be None.", + "code": " def _e_step(self, X, cal_sstats, random_init, parallel=None):\n \"\"\"E-step in EM update.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\n cal_sstats : bool\n Parameter that indicate whether to calculate sufficient statistics\n or not. Set ``cal_sstats`` to True when we need to run M-step.\n\n random_init : bool\n Parameter that indicate whether to initialize document topic\n distribution randomly in the E-step. Set it to True in training\n steps.\n\n parallel : joblib.Parallel, default=None\n Pre-initialized instance of joblib.Parallel.\n\n Returns\n -------\n (doc_topic_distr, suff_stats) :\n `doc_topic_distr` is unnormalized topic distribution for each\n document. In the literature, this is called `gamma`.\n `suff_stats` is expected sufficient statistics for the M-step.\n When `cal_sstats == False`, it will be None.\n\n \"\"\"\n\n # Run e-step in parallel\n random_state = self.random_state_ if random_init else None\n\n # TODO: make Parallel._effective_n_jobs public instead?\n n_jobs = effective_n_jobs(self.n_jobs)\n if parallel is None:\n parallel = Parallel(n_jobs=n_jobs, verbose=max(0,\n self.verbose - 1))\n results = parallel(\n delayed(_update_doc_distribution)(X[idx_slice, :],\n self.exp_dirichlet_component_,\n self.doc_topic_prior_,\n self.max_doc_update_iter,\n self.mean_change_tol, cal_sstats,\n random_state)\n for idx_slice in gen_even_slices(X.shape[0], n_jobs))\n\n # merge result\n doc_topics, sstats_list = zip(*results)\n doc_topic_distr = np.vstack(doc_topics)\n\n if cal_sstats:\n # This step finishes computing the sufficient statistics for the\n # M-step.\n suff_stats = np.zeros(self.components_.shape)\n for sstats in sstats_list:\n suff_stats += sstats\n suff_stats *= self.exp_dirichlet_component_\n else:\n suff_stats = None\n\n return (doc_topic_distr, suff_stats)" + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_em_step", + "name": "_em_step", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._em_step", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_em_step/self", + "name": "self", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._em_step.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_em_step/X", + "name": "X", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._em_step.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Document word matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_em_step/total_samples", + "name": "total_samples", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._em_step.total_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Total number of documents. It is only used when\nbatch_update is `False`." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_em_step/batch_update", + "name": "batch_update", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._em_step.batch_update", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "Parameter that controls updating method.\n`True` for batch learning, `False` for online learning." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_em_step/parallel", + "name": "parallel", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._em_step.parallel", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "joblib.Parallel", + "default_value": "None", + "description": "Pre-initialized instance of joblib.Parallel" + }, + "type": { + "kind": "NamedType", + "name": "joblib.Parallel" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "EM update for 1 iteration.\n\nupdate `_component` by batch VB or online VB.", + "docstring": "EM update for 1 iteration.\n\nupdate `_component` by batch VB or online VB.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\ntotal_samples : int\n Total number of documents. It is only used when\n batch_update is `False`.\n\nbatch_update : bool\n Parameter that controls updating method.\n `True` for batch learning, `False` for online learning.\n\nparallel : joblib.Parallel, default=None\n Pre-initialized instance of joblib.Parallel\n\nReturns\n-------\ndoc_topic_distr : ndarray of shape (n_samples, n_components)\n Unnormalized document topic distribution.", + "code": " def _em_step(self, X, total_samples, batch_update, parallel=None):\n \"\"\"EM update for 1 iteration.\n\n update `_component` by batch VB or online VB.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\n total_samples : int\n Total number of documents. It is only used when\n batch_update is `False`.\n\n batch_update : bool\n Parameter that controls updating method.\n `True` for batch learning, `False` for online learning.\n\n parallel : joblib.Parallel, default=None\n Pre-initialized instance of joblib.Parallel\n\n Returns\n -------\n doc_topic_distr : ndarray of shape (n_samples, n_components)\n Unnormalized document topic distribution.\n \"\"\"\n\n # E-step\n _, suff_stats = self._e_step(X, cal_sstats=True, random_init=True,\n parallel=parallel)\n\n # M-step\n if batch_update:\n self.components_ = self.topic_word_prior_ + suff_stats\n else:\n # online update\n # In the literature, the weight is `rho`\n weight = np.power(self.learning_offset + self.n_batch_iter_,\n -self.learning_decay)\n doc_ratio = float(total_samples) / X.shape[0]\n self.components_ *= (1 - weight)\n self.components_ += (weight * (self.topic_word_prior_\n + doc_ratio * suff_stats))\n\n # update `component_` related variables\n self.exp_dirichlet_component_ = np.exp(\n _dirichlet_expectation_2d(self.components_))\n self.n_batch_iter_ += 1\n return" + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_init_latent_vars", + "name": "_init_latent_vars", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._init_latent_vars", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_init_latent_vars/self", + "name": "self", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._init_latent_vars.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_init_latent_vars/n_features", + "name": "n_features", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._init_latent_vars.n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Initialize latent variables.", + "docstring": "Initialize latent variables.", + "code": " def _init_latent_vars(self, n_features):\n \"\"\"Initialize latent variables.\"\"\"\n\n self.random_state_ = check_random_state(self.random_state)\n self.n_batch_iter_ = 1\n self.n_iter_ = 0\n\n if self.doc_topic_prior is None:\n self.doc_topic_prior_ = 1. / self.n_components\n else:\n self.doc_topic_prior_ = self.doc_topic_prior\n\n if self.topic_word_prior is None:\n self.topic_word_prior_ = 1. / self.n_components\n else:\n self.topic_word_prior_ = self.topic_word_prior\n\n init_gamma = 100.\n init_var = 1. / init_gamma\n # In the literature, this is called `lambda`\n self.components_ = self.random_state_.gamma(\n init_gamma, init_var, (self.n_components, n_features))\n\n # In the literature, this is `exp(E[log(beta)])`\n self.exp_dirichlet_component_ = np.exp(\n _dirichlet_expectation_2d(self.components_))" + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_more_tags", + "name": "_more_tags", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_more_tags/self", + "name": "self", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'requires_positive_X': True}" + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_perplexity_precomp_distr", + "name": "_perplexity_precomp_distr", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._perplexity_precomp_distr", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_perplexity_precomp_distr/self", + "name": "self", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._perplexity_precomp_distr.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_perplexity_precomp_distr/X", + "name": "X", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._perplexity_precomp_distr.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Document word matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_perplexity_precomp_distr/doc_topic_distr", + "name": "doc_topic_distr", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._perplexity_precomp_distr.doc_topic_distr", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_components)", + "default_value": "None", + "description": "Document topic distribution.\nIf it is None, it will be generated by applying transform on X." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_components)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_perplexity_precomp_distr/sub_sampling", + "name": "sub_sampling", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._perplexity_precomp_distr.sub_sampling", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Calculate approximate perplexity for data X with ability to accept\nprecomputed doc_topic_distr\n\nPerplexity is defined as exp(-1. * log-likelihood per word)", + "docstring": "Calculate approximate perplexity for data X with ability to accept\nprecomputed doc_topic_distr\n\nPerplexity is defined as exp(-1. * log-likelihood per word)\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\ndoc_topic_distr : ndarray of shape (n_samples, n_components), default=None\n Document topic distribution.\n If it is None, it will be generated by applying transform on X.\n\nReturns\n-------\nscore : float\n Perplexity score.", + "code": " def _perplexity_precomp_distr(self, X, doc_topic_distr=None,\n sub_sampling=False):\n \"\"\"Calculate approximate perplexity for data X with ability to accept\n precomputed doc_topic_distr\n\n Perplexity is defined as exp(-1. * log-likelihood per word)\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\n doc_topic_distr : ndarray of shape (n_samples, n_components), \\\n default=None\n Document topic distribution.\n If it is None, it will be generated by applying transform on X.\n\n Returns\n -------\n score : float\n Perplexity score.\n \"\"\"\n check_is_fitted(self)\n\n X = self._check_non_neg_array(\n X, reset_n_features=True,\n whom=\"LatentDirichletAllocation.perplexity\")\n\n if doc_topic_distr is None:\n doc_topic_distr = self._unnormalized_transform(X)\n else:\n n_samples, n_components = doc_topic_distr.shape\n if n_samples != X.shape[0]:\n raise ValueError(\"Number of samples in X and doc_topic_distr\"\n \" do not match.\")\n\n if n_components != self.n_components:\n raise ValueError(\"Number of topics does not match.\")\n\n current_samples = X.shape[0]\n bound = self._approx_bound(X, doc_topic_distr, sub_sampling)\n\n if sub_sampling:\n word_cnt = X.sum() * (float(self.total_samples) / current_samples)\n else:\n word_cnt = X.sum()\n perword_bound = bound / word_cnt\n\n return np.exp(-1.0 * perword_bound)" + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_unnormalized_transform", + "name": "_unnormalized_transform", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._unnormalized_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_unnormalized_transform/self", + "name": "self", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._unnormalized_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/_unnormalized_transform/X", + "name": "X", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation._unnormalized_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Document word matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform data X according to fitted model.", + "docstring": "Transform data X according to fitted model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\nReturns\n-------\ndoc_topic_distr : ndarray of shape (n_samples, n_components)\n Document topic distribution for X.", + "code": " def _unnormalized_transform(self, X):\n \"\"\"Transform data X according to fitted model.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\n Returns\n -------\n doc_topic_distr : ndarray of shape (n_samples, n_components)\n Document topic distribution for X.\n \"\"\"\n check_is_fitted(self)\n\n # make sure feature size is the same in fitted model and in X\n X = self._check_non_neg_array(\n X, reset_n_features=True,\n whom=\"LatentDirichletAllocation.transform\")\n n_samples, n_features = X.shape\n if n_features != self.components_.shape[1]:\n raise ValueError(\n \"The provided data has %d dimensions while \"\n \"the model was trained with feature size %d.\" %\n (n_features, self.components_.shape[1]))\n\n doc_topic_distr, _ = self._e_step(X, cal_sstats=False,\n random_init=False)\n\n return doc_topic_distr" + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/fit", + "name": "fit", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/fit/self", + "name": "self", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/fit/X", + "name": "X", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Document word matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/fit/y", + "name": "y", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Learn model for the data X with variational Bayes method.\n\nWhen `learning_method` is 'online', use mini-batch update.\nOtherwise, use batch update.", + "docstring": "Learn model for the data X with variational Bayes method.\n\nWhen `learning_method` is 'online', use mini-batch update.\nOtherwise, use batch update.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\ny : Ignored\n\nReturns\n-------\nself", + "code": " def fit(self, X, y=None):\n \"\"\"Learn model for the data X with variational Bayes method.\n\n When `learning_method` is 'online', use mini-batch update.\n Otherwise, use batch update.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\n y : Ignored\n\n Returns\n -------\n self\n \"\"\"\n self._check_params()\n X = self._check_non_neg_array(X, reset_n_features=True,\n whom=\"LatentDirichletAllocation.fit\")\n n_samples, n_features = X.shape\n max_iter = self.max_iter\n evaluate_every = self.evaluate_every\n learning_method = self.learning_method\n\n batch_size = self.batch_size\n\n # initialize parameters\n self._init_latent_vars(n_features)\n # change to perplexity later\n last_bound = None\n n_jobs = effective_n_jobs(self.n_jobs)\n with Parallel(n_jobs=n_jobs,\n verbose=max(0, self.verbose - 1)) as parallel:\n for i in range(max_iter):\n if learning_method == 'online':\n for idx_slice in gen_batches(n_samples, batch_size):\n self._em_step(X[idx_slice, :], total_samples=n_samples,\n batch_update=False, parallel=parallel)\n else:\n # batch update\n self._em_step(X, total_samples=n_samples,\n batch_update=True, parallel=parallel)\n\n # check perplexity\n if evaluate_every > 0 and (i + 1) % evaluate_every == 0:\n doc_topics_distr, _ = self._e_step(X, cal_sstats=False,\n random_init=False,\n parallel=parallel)\n bound = self._perplexity_precomp_distr(X, doc_topics_distr,\n sub_sampling=False)\n if self.verbose:\n print('iteration: %d of max_iter: %d, perplexity: %.4f'\n % (i + 1, max_iter, bound))\n\n if last_bound and abs(last_bound - bound) < self.perp_tol:\n break\n last_bound = bound\n\n elif self.verbose:\n print('iteration: %d of max_iter: %d' % (i + 1, max_iter))\n self.n_iter_ += 1\n\n # calculate final perplexity value on train set\n doc_topics_distr, _ = self._e_step(X, cal_sstats=False,\n random_init=False,\n parallel=parallel)\n self.bound_ = self._perplexity_precomp_distr(X, doc_topics_distr,\n sub_sampling=False)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/partial_fit", + "name": "partial_fit", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.partial_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/partial_fit/self", + "name": "self", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/partial_fit/X", + "name": "X", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Document word matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/partial_fit/y", + "name": "y", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.partial_fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Online VB with Mini-Batch update.", + "docstring": "Online VB with Mini-Batch update.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\ny : Ignored\n\nReturns\n-------\nself", + "code": " def partial_fit(self, X, y=None):\n \"\"\"Online VB with Mini-Batch update.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\n y : Ignored\n\n Returns\n -------\n self\n \"\"\"\n self._check_params()\n first_time = not hasattr(self, 'components_')\n X = self._check_non_neg_array(\n X, reset_n_features=first_time,\n whom=\"LatentDirichletAllocation.partial_fit\")\n n_samples, n_features = X.shape\n batch_size = self.batch_size\n\n # initialize parameters or check\n if first_time:\n self._init_latent_vars(n_features)\n\n if n_features != self.components_.shape[1]:\n raise ValueError(\n \"The provided data has %d dimensions while \"\n \"the model was trained with feature size %d.\" %\n (n_features, self.components_.shape[1]))\n\n n_jobs = effective_n_jobs(self.n_jobs)\n with Parallel(n_jobs=n_jobs,\n verbose=max(0, self.verbose - 1)) as parallel:\n for idx_slice in gen_batches(n_samples, batch_size):\n self._em_step(X[idx_slice, :],\n total_samples=self.total_samples,\n batch_update=False,\n parallel=parallel)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/perplexity", + "name": "perplexity", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.perplexity", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/perplexity/self", + "name": "self", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.perplexity.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/perplexity/X", + "name": "X", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.perplexity.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Document word matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/perplexity/sub_sampling", + "name": "sub_sampling", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.perplexity.sub_sampling", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "Do sub-sampling or not." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Calculate approximate perplexity for data X.\n\nPerplexity is defined as exp(-1. * log-likelihood per word)\n\n.. versionchanged:: 0.19\n *doc_topic_distr* argument has been deprecated and is ignored\n because user no longer has access to unnormalized distribution", + "docstring": "Calculate approximate perplexity for data X.\n\nPerplexity is defined as exp(-1. * log-likelihood per word)\n\n.. versionchanged:: 0.19\n *doc_topic_distr* argument has been deprecated and is ignored\n because user no longer has access to unnormalized distribution\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\nsub_sampling : bool\n Do sub-sampling or not.\n\nReturns\n-------\nscore : float\n Perplexity score.", + "code": " def perplexity(self, X, sub_sampling=False):\n \"\"\"Calculate approximate perplexity for data X.\n\n Perplexity is defined as exp(-1. * log-likelihood per word)\n\n .. versionchanged:: 0.19\n *doc_topic_distr* argument has been deprecated and is ignored\n because user no longer has access to unnormalized distribution\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\n sub_sampling : bool\n Do sub-sampling or not.\n\n Returns\n -------\n score : float\n Perplexity score.\n \"\"\"\n return self._perplexity_precomp_distr(X, sub_sampling=sub_sampling)" + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/score", + "name": "score", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/score/self", + "name": "self", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/score/X", + "name": "X", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Document word matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/score/y", + "name": "y", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.score.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Calculate approximate log-likelihood as score.", + "docstring": "Calculate approximate log-likelihood as score.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\ny : Ignored\n\nReturns\n-------\nscore : float\n Use approximate bound as score.", + "code": " def score(self, X, y=None):\n \"\"\"Calculate approximate log-likelihood as score.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\n y : Ignored\n\n Returns\n -------\n score : float\n Use approximate bound as score.\n \"\"\"\n check_is_fitted(self)\n X = self._check_non_neg_array(X, reset_n_features=False,\n whom=\"LatentDirichletAllocation.score\")\n\n doc_topic_distr = self._unnormalized_transform(X)\n score = self._approx_bound(X, doc_topic_distr, sub_sampling=False)\n return score" + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/transform", + "name": "transform", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/transform/self", + "name": "self", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/LatentDirichletAllocation/transform/X", + "name": "X", + "qname": "sklearn.decomposition._lda.LatentDirichletAllocation.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Document word matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform data X according to the fitted model.\n\n .. versionchanged:: 0.18\n *doc_topic_distr* is now normalized", + "docstring": "Transform data X according to the fitted model.\n\n .. versionchanged:: 0.18\n *doc_topic_distr* is now normalized\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\nReturns\n-------\ndoc_topic_distr : ndarray of shape (n_samples, n_components)\n Document topic distribution for X.", + "code": " def transform(self, X):\n \"\"\"Transform data X according to the fitted model.\n\n .. versionchanged:: 0.18\n *doc_topic_distr* is now normalized\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\n Returns\n -------\n doc_topic_distr : ndarray of shape (n_samples, n_components)\n Document topic distribution for X.\n \"\"\"\n check_is_fitted(self)\n X = self._check_non_neg_array(\n X, reset_n_features=False,\n whom=\"LatentDirichletAllocation.transform\")\n doc_topic_distr = self._unnormalized_transform(X)\n doc_topic_distr /= doc_topic_distr.sum(axis=1)[:, np.newaxis]\n return doc_topic_distr" + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/_update_doc_distribution", + "name": "_update_doc_distribution", + "qname": "sklearn.decomposition._lda._update_doc_distribution", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._lda/_update_doc_distribution/X", + "name": "X", + "qname": "sklearn.decomposition._lda._update_doc_distribution.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Document word matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/_update_doc_distribution/exp_topic_word_distr", + "name": "exp_topic_word_distr", + "qname": "sklearn.decomposition._lda._update_doc_distribution.exp_topic_word_distr", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_topics, n_features)", + "default_value": "", + "description": "Exponential value of expectation of log topic word distribution.\nIn the literature, this is `exp(E[log(beta)])`." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_topics, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/_update_doc_distribution/doc_topic_prior", + "name": "doc_topic_prior", + "qname": "sklearn.decomposition._lda._update_doc_distribution.doc_topic_prior", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Prior of document topic distribution `theta`." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/_update_doc_distribution/max_iters", + "name": "max_iters", + "qname": "sklearn.decomposition._lda._update_doc_distribution.max_iters", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Max number of iterations for updating document topic distribution in\nthe E-step." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/_update_doc_distribution/mean_change_tol", + "name": "mean_change_tol", + "qname": "sklearn.decomposition._lda._update_doc_distribution.mean_change_tol", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Stopping tolerance for updating document topic distribution in E-setp." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/_update_doc_distribution/cal_sstats", + "name": "cal_sstats", + "qname": "sklearn.decomposition._lda._update_doc_distribution.cal_sstats", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "Parameter that indicate to calculate sufficient statistics or not.\nSet `cal_sstats` to `True` when we need to run M-step." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._lda/_update_doc_distribution/random_state", + "name": "random_state", + "qname": "sklearn.decomposition._lda._update_doc_distribution.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "RandomState instance or None", + "default_value": "", + "description": "Parameter that indicate how to initialize document topic distribution.\nSet `random_state` to None will initialize document topic distribution\nto a constant number." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "E-step: update document-topic distribution.", + "docstring": "E-step: update document-topic distribution.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\nexp_topic_word_distr : ndarray of shape (n_topics, n_features)\n Exponential value of expectation of log topic word distribution.\n In the literature, this is `exp(E[log(beta)])`.\n\ndoc_topic_prior : float\n Prior of document topic distribution `theta`.\n\nmax_iters : int\n Max number of iterations for updating document topic distribution in\n the E-step.\n\nmean_change_tol : float\n Stopping tolerance for updating document topic distribution in E-setp.\n\ncal_sstats : bool\n Parameter that indicate to calculate sufficient statistics or not.\n Set `cal_sstats` to `True` when we need to run M-step.\n\nrandom_state : RandomState instance or None\n Parameter that indicate how to initialize document topic distribution.\n Set `random_state` to None will initialize document topic distribution\n to a constant number.\n\nReturns\n-------\n(doc_topic_distr, suff_stats) :\n `doc_topic_distr` is unnormalized topic distribution for each document.\n In the literature, this is `gamma`. we can calculate `E[log(theta)]`\n from it.\n `suff_stats` is expected sufficient statistics for the M-step.\n When `cal_sstats == False`, this will be None.", + "code": "def _update_doc_distribution(X, exp_topic_word_distr, doc_topic_prior,\n max_iters,\n mean_change_tol, cal_sstats, random_state):\n \"\"\"E-step: update document-topic distribution.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document word matrix.\n\n exp_topic_word_distr : ndarray of shape (n_topics, n_features)\n Exponential value of expectation of log topic word distribution.\n In the literature, this is `exp(E[log(beta)])`.\n\n doc_topic_prior : float\n Prior of document topic distribution `theta`.\n\n max_iters : int\n Max number of iterations for updating document topic distribution in\n the E-step.\n\n mean_change_tol : float\n Stopping tolerance for updating document topic distribution in E-setp.\n\n cal_sstats : bool\n Parameter that indicate to calculate sufficient statistics or not.\n Set `cal_sstats` to `True` when we need to run M-step.\n\n random_state : RandomState instance or None\n Parameter that indicate how to initialize document topic distribution.\n Set `random_state` to None will initialize document topic distribution\n to a constant number.\n\n Returns\n -------\n (doc_topic_distr, suff_stats) :\n `doc_topic_distr` is unnormalized topic distribution for each document.\n In the literature, this is `gamma`. we can calculate `E[log(theta)]`\n from it.\n `suff_stats` is expected sufficient statistics for the M-step.\n When `cal_sstats == False`, this will be None.\n\n \"\"\"\n is_sparse_x = sp.issparse(X)\n n_samples, n_features = X.shape\n n_topics = exp_topic_word_distr.shape[0]\n\n if random_state:\n doc_topic_distr = random_state.gamma(100., 0.01, (n_samples, n_topics))\n else:\n doc_topic_distr = np.ones((n_samples, n_topics))\n\n # In the literature, this is `exp(E[log(theta)])`\n exp_doc_topic = np.exp(_dirichlet_expectation_2d(doc_topic_distr))\n\n # diff on `component_` (only calculate it when `cal_diff` is True)\n suff_stats = np.zeros(exp_topic_word_distr.shape) if cal_sstats else None\n\n if is_sparse_x:\n X_data = X.data\n X_indices = X.indices\n X_indptr = X.indptr\n\n for idx_d in range(n_samples):\n if is_sparse_x:\n ids = X_indices[X_indptr[idx_d]:X_indptr[idx_d + 1]]\n cnts = X_data[X_indptr[idx_d]:X_indptr[idx_d + 1]]\n else:\n ids = np.nonzero(X[idx_d, :])[0]\n cnts = X[idx_d, ids]\n\n doc_topic_d = doc_topic_distr[idx_d, :]\n # The next one is a copy, since the inner loop overwrites it.\n exp_doc_topic_d = exp_doc_topic[idx_d, :].copy()\n exp_topic_word_d = exp_topic_word_distr[:, ids]\n\n # Iterate between `doc_topic_d` and `norm_phi` until convergence\n for _ in range(0, max_iters):\n last_d = doc_topic_d\n\n # The optimal phi_{dwk} is proportional to\n # exp(E[log(theta_{dk})]) * exp(E[log(beta_{dw})]).\n norm_phi = np.dot(exp_doc_topic_d, exp_topic_word_d) + EPS\n\n doc_topic_d = (exp_doc_topic_d *\n np.dot(cnts / norm_phi, exp_topic_word_d.T))\n # Note: adds doc_topic_prior to doc_topic_d, in-place.\n _dirichlet_expectation_1d(doc_topic_d, doc_topic_prior,\n exp_doc_topic_d)\n\n if mean_change(last_d, doc_topic_d) < mean_change_tol:\n break\n doc_topic_distr[idx_d, :] = doc_topic_d\n\n # Contribution of document d to the expected sufficient\n # statistics for the M step.\n if cal_sstats:\n norm_phi = np.dot(exp_doc_topic_d, exp_topic_word_d) + EPS\n suff_stats[:, ids] += np.outer(exp_doc_topic_d, cnts / norm_phi)\n\n return (doc_topic_distr, suff_stats)" + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/__init__", + "name": "__init__", + "qname": "sklearn.decomposition._nmf.NMF.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/__init__/self", + "name": "self", + "qname": "sklearn.decomposition._nmf.NMF.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/__init__/n_components", + "name": "n_components", + "qname": "sklearn.decomposition._nmf.NMF.__init__.n_components", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of components, if n_components is not set all features\nare kept." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/__init__/init", + "name": "init", + "qname": "sklearn.decomposition._nmf.NMF.__init__.init", + "default_value": "'warn'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}", + "default_value": "None", + "description": "Method used to initialize the procedure.\nDefault: None.\nValid options:\n\n- `None`: 'nndsvd' if n_components <= min(n_samples, n_features),\n otherwise random.\n\n- `'random'`: non-negative random matrices, scaled with:\n sqrt(X.mean() / n_components)\n\n- `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD)\n initialization (better for sparseness)\n\n- `'nndsvda'`: NNDSVD with zeros filled with the average of X\n (better when sparsity is not desired)\n\n- `'nndsvdar'` NNDSVD with zeros filled with small random values\n (generally faster, less accurate alternative to NNDSVDa\n for when sparsity is not desired)\n\n- `'custom'`: use custom matrices W and H" + }, + "type": { + "kind": "EnumType", + "values": ["nndsvd", "nndsvdar", "random", "custom", "nndsvda"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/__init__/solver", + "name": "solver", + "qname": "sklearn.decomposition._nmf.NMF.__init__.solver", + "default_value": "'cd'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'cd', 'mu'}", + "default_value": "'cd'", + "description": "Numerical solver to use:\n'cd' is a Coordinate Descent solver.\n'mu' is a Multiplicative Update solver.\n\n.. versionadded:: 0.17\n Coordinate Descent solver.\n\n.. versionadded:: 0.19\n Multiplicative Update solver." + }, + "type": { + "kind": "EnumType", + "values": ["mu", "cd"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/__init__/beta_loss", + "name": "beta_loss", + "qname": "sklearn.decomposition._nmf.NMF.__init__.beta_loss", + "default_value": "'frobenius'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float or {'frobenius', 'kullback-leibler', 'itakura-saito'}", + "default_value": "'frobenius'", + "description": "Beta divergence to be minimized, measuring the distance between X\nand the dot product WH. Note that values different from 'frobenius'\n(or 2) and 'kullback-leibler' (or 1) lead to significantly slower\nfits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\nmatrix X cannot contain zeros. Used only in 'mu' solver.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["kullback-leibler", "frobenius", "itakura-saito"] + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/__init__/tol", + "name": "tol", + "qname": "sklearn.decomposition._nmf.NMF.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Tolerance of the stopping condition." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.decomposition._nmf.NMF.__init__.max_iter", + "default_value": "200", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "200", + "description": "Maximum number of iterations before timing out." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/__init__/random_state", + "name": "random_state", + "qname": "sklearn.decomposition._nmf.NMF.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Used for initialisation (when ``init`` == 'nndsvdar' or\n'random'), and in Coordinate Descent. Pass an int for reproducible\nresults across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/__init__/alpha", + "name": "alpha", + "qname": "sklearn.decomposition._nmf.NMF.__init__.alpha", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.", + "description": "Constant that multiplies the regularization terms. Set it to zero to\nhave no regularization.\n\n.. versionadded:: 0.17\n *alpha* used in the Coordinate Descent solver." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/__init__/l1_ratio", + "name": "l1_ratio", + "qname": "sklearn.decomposition._nmf.NMF.__init__.l1_ratio", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.", + "description": "The regularization mixing parameter, with 0 <= l1_ratio <= 1.\nFor l1_ratio = 0 the penalty is an elementwise L2 penalty\n(aka Frobenius Norm).\nFor l1_ratio = 1 it is an elementwise L1 penalty.\nFor 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\n.. versionadded:: 0.17\n Regularization parameter *l1_ratio* used in the Coordinate Descent\n solver." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/__init__/verbose", + "name": "verbose", + "qname": "sklearn.decomposition._nmf.NMF.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Whether to be verbose." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/__init__/shuffle", + "name": "shuffle", + "qname": "sklearn.decomposition._nmf.NMF.__init__.shuffle", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If true, randomize the order of coordinates in the CD solver.\n\n.. versionadded:: 0.17\n *shuffle* parameter used in the Coordinate Descent solver." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/__init__/regularization", + "name": "regularization", + "qname": "sklearn.decomposition._nmf.NMF.__init__.regularization", + "default_value": "'both'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'both', 'components', 'transformation', None}", + "default_value": "'both'", + "description": "Select whether the regularization affects the components (H), the\ntransformation (W), both or none of them.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "EnumType", + "values": ["both", "components", "transformation"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Non-Negative Matrix Factorization (NMF).\n\nFind two non-negative matrices (W, H) whose product approximates the non-\nnegative matrix X. This factorization can be used for example for\ndimensionality reduction, source separation or topic extraction.\n\nThe objective function is:\n\n .. math::\n\n 0.5 * ||X - WH||_{loss}^2 + alpha * l1_{ratio} * ||vec(W)||_1\n\n + alpha * l1_{ratio} * ||vec(H)||_1\n\n + 0.5 * alpha * (1 - l1_{ratio}) * ||W||_{Fro}^2\n\n + 0.5 * alpha * (1 - l1_{ratio}) * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe objective function is minimized with an alternating minimization of W\nand H.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_components=None, *, init='warn', solver='cd',\n beta_loss='frobenius', tol=1e-4, max_iter=200,\n random_state=None, alpha=0., l1_ratio=0., verbose=0,\n shuffle=False, regularization='both'):\n self.n_components = n_components\n self.init = init\n self.solver = solver\n self.beta_loss = beta_loss\n self.tol = tol\n self.max_iter = max_iter\n self.random_state = random_state\n self.alpha = alpha\n self.l1_ratio = l1_ratio\n self.verbose = verbose\n self.shuffle = shuffle\n self.regularization = regularization" + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/_more_tags", + "name": "_more_tags", + "qname": "sklearn.decomposition._nmf.NMF._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/_more_tags/self", + "name": "self", + "qname": "sklearn.decomposition._nmf.NMF._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'requires_positive_X': True}" + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/fit", + "name": "fit", + "qname": "sklearn.decomposition._nmf.NMF.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/fit/self", + "name": "self", + "qname": "sklearn.decomposition._nmf.NMF.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/fit/X", + "name": "X", + "qname": "sklearn.decomposition._nmf.NMF.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Data matrix to be decomposed" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/fit/y", + "name": "y", + "qname": "sklearn.decomposition._nmf.NMF.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/fit/params", + "name": "params", + "qname": "sklearn.decomposition._nmf.NMF.fit.params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Learn a NMF model for the data X.", + "docstring": "Learn a NMF model for the data X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data matrix to be decomposed\n\ny : Ignored\n\nReturns\n-------\nself", + "code": " def fit(self, X, y=None, **params):\n \"\"\"Learn a NMF model for the data X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data matrix to be decomposed\n\n y : Ignored\n\n Returns\n -------\n self\n \"\"\"\n self.fit_transform(X, **params)\n return self" + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/fit_transform", + "name": "fit_transform", + "qname": "sklearn.decomposition._nmf.NMF.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/fit_transform/self", + "name": "self", + "qname": "sklearn.decomposition._nmf.NMF.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/fit_transform/X", + "name": "X", + "qname": "sklearn.decomposition._nmf.NMF.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Data matrix to be decomposed" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/fit_transform/y", + "name": "y", + "qname": "sklearn.decomposition._nmf.NMF.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/fit_transform/W", + "name": "W", + "qname": "sklearn.decomposition._nmf.NMF.fit_transform.W", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_components)", + "default_value": "", + "description": "If init='custom', it is used as initial guess for the solution." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_components)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/fit_transform/H", + "name": "H", + "qname": "sklearn.decomposition._nmf.NMF.fit_transform.H", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components, n_features)", + "default_value": "", + "description": "If init='custom', it is used as initial guess for the solution." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Learn a NMF model for the data X and returns the transformed data.\n\nThis is more efficient than calling fit followed by transform.", + "docstring": "Learn a NMF model for the data X and returns the transformed data.\n\nThis is more efficient than calling fit followed by transform.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data matrix to be decomposed\n\ny : Ignored\n\nW : array-like of shape (n_samples, n_components)\n If init='custom', it is used as initial guess for the solution.\n\nH : array-like of shape (n_components, n_features)\n If init='custom', it is used as initial guess for the solution.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n Transformed data.", + "code": " def fit_transform(self, X, y=None, W=None, H=None):\n \"\"\"Learn a NMF model for the data X and returns the transformed data.\n\n This is more efficient than calling fit followed by transform.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data matrix to be decomposed\n\n y : Ignored\n\n W : array-like of shape (n_samples, n_components)\n If init='custom', it is used as initial guess for the solution.\n\n H : array-like of shape (n_components, n_features)\n If init='custom', it is used as initial guess for the solution.\n\n Returns\n -------\n W : ndarray of shape (n_samples, n_components)\n Transformed data.\n \"\"\"\n X = self._validate_data(X, accept_sparse=('csr', 'csc'),\n dtype=[np.float64, np.float32])\n\n with config_context(assume_finite=True):\n W, H, n_iter_ = non_negative_factorization(\n X=X, W=W, H=H, n_components=self.n_components, init=self.init,\n update_H=True, solver=self.solver, beta_loss=self.beta_loss,\n tol=self.tol, max_iter=self.max_iter, alpha=self.alpha,\n l1_ratio=self.l1_ratio, regularization=self.regularization,\n random_state=self.random_state, verbose=self.verbose,\n shuffle=self.shuffle)\n\n self.reconstruction_err_ = _beta_divergence(X, W, H, self.beta_loss,\n square_root=True)\n\n self.n_components_ = H.shape[0]\n self.components_ = H\n self.n_iter_ = n_iter_\n\n return W" + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.decomposition._nmf.NMF.inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/inverse_transform/self", + "name": "self", + "qname": "sklearn.decomposition._nmf.NMF.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/inverse_transform/W", + "name": "W", + "qname": "sklearn.decomposition._nmf.NMF.inverse_transform.W", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{ndarray, sparse matrix} of shape (n_samples, n_components)", + "default_value": "", + "description": "Transformed data matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_components)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform data back to its original space.", + "docstring": "Transform data back to its original space.\n\nParameters\n----------\nW : {ndarray, sparse matrix} of shape (n_samples, n_components)\n Transformed data matrix.\n\nReturns\n-------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Data matrix of original shape.\n\n.. versionadded:: 0.18", + "code": " def inverse_transform(self, W):\n \"\"\"Transform data back to its original space.\n\n Parameters\n ----------\n W : {ndarray, sparse matrix} of shape (n_samples, n_components)\n Transformed data matrix.\n\n Returns\n -------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Data matrix of original shape.\n\n .. versionadded:: 0.18\n \"\"\"\n check_is_fitted(self)\n return np.dot(W, self.components_)" + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/transform", + "name": "transform", + "qname": "sklearn.decomposition._nmf.NMF.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/transform/self", + "name": "self", + "qname": "sklearn.decomposition._nmf.NMF.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/NMF/transform/X", + "name": "X", + "qname": "sklearn.decomposition._nmf.NMF.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Data matrix to be transformed by the model." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform the data X according to the fitted NMF model.", + "docstring": "Transform the data X according to the fitted NMF model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data matrix to be transformed by the model.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n Transformed data.", + "code": " def transform(self, X):\n \"\"\"Transform the data X according to the fitted NMF model.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data matrix to be transformed by the model.\n\n Returns\n -------\n W : ndarray of shape (n_samples, n_components)\n Transformed data.\n \"\"\"\n check_is_fitted(self)\n X = self._validate_data(X, accept_sparse=('csr', 'csc'),\n dtype=[np.float64, np.float32],\n reset=False)\n\n with config_context(assume_finite=True):\n W, _, n_iter_ = non_negative_factorization(\n X=X, W=None, H=self.components_,\n n_components=self.n_components_,\n init=self.init, update_H=False, solver=self.solver,\n beta_loss=self.beta_loss, tol=self.tol, max_iter=self.max_iter,\n alpha=self.alpha, l1_ratio=self.l1_ratio,\n regularization=self.regularization,\n random_state=self.random_state,\n verbose=self.verbose, shuffle=self.shuffle)\n\n return W" + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_beta_divergence", + "name": "_beta_divergence", + "qname": "sklearn.decomposition._nmf._beta_divergence", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._nmf/_beta_divergence/X", + "name": "X", + "qname": "sklearn.decomposition._nmf._beta_divergence.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float or array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_beta_divergence/W", + "name": "W", + "qname": "sklearn.decomposition._nmf._beta_divergence.W", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float or array-like of shape (n_samples, n_components)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_components)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_beta_divergence/H", + "name": "H", + "qname": "sklearn.decomposition._nmf._beta_divergence.H", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float or array-like of shape (n_components, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_components, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_beta_divergence/beta", + "name": "beta", + "qname": "sklearn.decomposition._nmf._beta_divergence.beta", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float or {'frobenius', 'kullback-leibler', 'itakura-saito'}", + "default_value": "", + "description": "Parameter of the beta-divergence.\nIf beta == 2, this is half the Frobenius *squared* norm.\nIf beta == 1, this is the generalized Kullback-Leibler divergence.\nIf beta == 0, this is the Itakura-Saito divergence.\nElse, this is the general beta-divergence." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["kullback-leibler", "frobenius", "itakura-saito"] + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_beta_divergence/square_root", + "name": "square_root", + "qname": "sklearn.decomposition._nmf._beta_divergence.square_root", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, return np.sqrt(2 * res)\nFor beta == 2, it corresponds to the Frobenius norm." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the beta-divergence of X and dot(W, H).", + "docstring": "Compute the beta-divergence of X and dot(W, H).\n\nParameters\n----------\nX : float or array-like of shape (n_samples, n_features)\n\nW : float or array-like of shape (n_samples, n_components)\n\nH : float or array-like of shape (n_components, n_features)\n\nbeta : float or {'frobenius', 'kullback-leibler', 'itakura-saito'}\n Parameter of the beta-divergence.\n If beta == 2, this is half the Frobenius *squared* norm.\n If beta == 1, this is the generalized Kullback-Leibler divergence.\n If beta == 0, this is the Itakura-Saito divergence.\n Else, this is the general beta-divergence.\n\nsquare_root : bool, default=False\n If True, return np.sqrt(2 * res)\n For beta == 2, it corresponds to the Frobenius norm.\n\nReturns\n-------\n res : float\n Beta divergence of X and np.dot(X, H).", + "code": "def _beta_divergence(X, W, H, beta, square_root=False):\n \"\"\"Compute the beta-divergence of X and dot(W, H).\n\n Parameters\n ----------\n X : float or array-like of shape (n_samples, n_features)\n\n W : float or array-like of shape (n_samples, n_components)\n\n H : float or array-like of shape (n_components, n_features)\n\n beta : float or {'frobenius', 'kullback-leibler', 'itakura-saito'}\n Parameter of the beta-divergence.\n If beta == 2, this is half the Frobenius *squared* norm.\n If beta == 1, this is the generalized Kullback-Leibler divergence.\n If beta == 0, this is the Itakura-Saito divergence.\n Else, this is the general beta-divergence.\n\n square_root : bool, default=False\n If True, return np.sqrt(2 * res)\n For beta == 2, it corresponds to the Frobenius norm.\n\n Returns\n -------\n res : float\n Beta divergence of X and np.dot(X, H).\n \"\"\"\n beta = _beta_loss_to_float(beta)\n\n # The method can be called with scalars\n if not sp.issparse(X):\n X = np.atleast_2d(X)\n W = np.atleast_2d(W)\n H = np.atleast_2d(H)\n\n # Frobenius norm\n if beta == 2:\n # Avoid the creation of the dense np.dot(W, H) if X is sparse.\n if sp.issparse(X):\n norm_X = np.dot(X.data, X.data)\n norm_WH = trace_dot(np.linalg.multi_dot([W.T, W, H]), H)\n cross_prod = trace_dot((X * H.T), W)\n res = (norm_X + norm_WH - 2. * cross_prod) / 2.\n else:\n res = squared_norm(X - np.dot(W, H)) / 2.\n\n if square_root:\n return np.sqrt(res * 2)\n else:\n return res\n\n if sp.issparse(X):\n # compute np.dot(W, H) only where X is nonzero\n WH_data = _special_sparse_dot(W, H, X).data\n X_data = X.data\n else:\n WH = np.dot(W, H)\n WH_data = WH.ravel()\n X_data = X.ravel()\n\n # do not affect the zeros: here 0 ** (-1) = 0 and not infinity\n indices = X_data > EPSILON\n WH_data = WH_data[indices]\n X_data = X_data[indices]\n\n # used to avoid division by zero\n WH_data[WH_data == 0] = EPSILON\n\n # generalized Kullback-Leibler divergence\n if beta == 1:\n # fast and memory efficient computation of np.sum(np.dot(W, H))\n sum_WH = np.dot(np.sum(W, axis=0), np.sum(H, axis=1))\n # computes np.sum(X * log(X / WH)) only where X is nonzero\n div = X_data / WH_data\n res = np.dot(X_data, np.log(div))\n # add full np.sum(np.dot(W, H)) - np.sum(X)\n res += sum_WH - X_data.sum()\n\n # Itakura-Saito divergence\n elif beta == 0:\n div = X_data / WH_data\n res = np.sum(div) - np.product(X.shape) - np.sum(np.log(div))\n\n # beta-divergence, beta not in (0, 1, 2)\n else:\n if sp.issparse(X):\n # slow loop, but memory efficient computation of :\n # np.sum(np.dot(W, H) ** beta)\n sum_WH_beta = 0\n for i in range(X.shape[1]):\n sum_WH_beta += np.sum(np.dot(W, H[:, i]) ** beta)\n\n else:\n sum_WH_beta = np.sum(WH ** beta)\n\n sum_X_WH = np.dot(X_data, WH_data ** (beta - 1))\n res = (X_data ** beta).sum() - beta * sum_X_WH\n res += sum_WH_beta * (beta - 1)\n res /= beta * (beta - 1)\n\n if square_root:\n return np.sqrt(2 * res)\n else:\n return res" + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_beta_loss_to_float", + "name": "_beta_loss_to_float", + "qname": "sklearn.decomposition._nmf._beta_loss_to_float", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._nmf/_beta_loss_to_float/beta_loss", + "name": "beta_loss", + "qname": "sklearn.decomposition._nmf._beta_loss_to_float.beta_loss", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Convert string beta_loss to float.", + "docstring": "Convert string beta_loss to float.", + "code": "def _beta_loss_to_float(beta_loss):\n \"\"\"Convert string beta_loss to float.\"\"\"\n allowed_beta_loss = {'frobenius': 2,\n 'kullback-leibler': 1,\n 'itakura-saito': 0}\n if isinstance(beta_loss, str) and beta_loss in allowed_beta_loss:\n beta_loss = allowed_beta_loss[beta_loss]\n\n if not isinstance(beta_loss, numbers.Number):\n raise ValueError('Invalid beta_loss parameter: got %r instead '\n 'of one of %r, or a float.' %\n (beta_loss, allowed_beta_loss.keys()))\n return beta_loss" + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_check_init", + "name": "_check_init", + "qname": "sklearn.decomposition._nmf._check_init", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._nmf/_check_init/A", + "name": "A", + "qname": "sklearn.decomposition._nmf._check_init.A", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_check_init/shape", + "name": "shape", + "qname": "sklearn.decomposition._nmf._check_init.shape", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_check_init/whom", + "name": "whom", + "qname": "sklearn.decomposition._nmf._check_init.whom", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _check_init(A, shape, whom):\n A = check_array(A)\n if np.shape(A) != shape:\n raise ValueError('Array with wrong shape passed to %s. Expected %s, '\n 'but got %s ' % (whom, shape, np.shape(A)))\n check_non_negative(A, whom)\n if np.max(A) == 0:\n raise ValueError('Array passed to %s is full of zeros.' % whom)" + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_check_string_param", + "name": "_check_string_param", + "qname": "sklearn.decomposition._nmf._check_string_param", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._nmf/_check_string_param/solver", + "name": "solver", + "qname": "sklearn.decomposition._nmf._check_string_param.solver", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_check_string_param/regularization", + "name": "regularization", + "qname": "sklearn.decomposition._nmf._check_string_param.regularization", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_check_string_param/beta_loss", + "name": "beta_loss", + "qname": "sklearn.decomposition._nmf._check_string_param.beta_loss", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_check_string_param/init", + "name": "init", + "qname": "sklearn.decomposition._nmf._check_string_param.init", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _check_string_param(solver, regularization, beta_loss, init):\n allowed_solver = ('cd', 'mu')\n if solver not in allowed_solver:\n raise ValueError(\n 'Invalid solver parameter: got %r instead of one of %r' %\n (solver, allowed_solver))\n\n allowed_regularization = ('both', 'components', 'transformation', None)\n if regularization not in allowed_regularization:\n raise ValueError(\n 'Invalid regularization parameter: got %r instead of one of %r' %\n (regularization, allowed_regularization))\n\n # 'mu' is the only solver that handles other beta losses than 'frobenius'\n if solver != 'mu' and beta_loss not in (2, 'frobenius'):\n raise ValueError(\n 'Invalid beta_loss parameter: solver %r does not handle beta_loss'\n ' = %r' % (solver, beta_loss))\n\n if solver == 'mu' and init == 'nndsvd':\n warnings.warn(\"The multiplicative update ('mu') solver cannot update \"\n \"zeros present in the initialization, and so leads to \"\n \"poorer results when used jointly with init='nndsvd'. \"\n \"You may try init='nndsvda' or init='nndsvdar' instead.\",\n UserWarning)\n\n beta_loss = _beta_loss_to_float(beta_loss)\n return beta_loss" + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_compute_regularization", + "name": "_compute_regularization", + "qname": "sklearn.decomposition._nmf._compute_regularization", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._nmf/_compute_regularization/alpha", + "name": "alpha", + "qname": "sklearn.decomposition._nmf._compute_regularization.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_compute_regularization/l1_ratio", + "name": "l1_ratio", + "qname": "sklearn.decomposition._nmf._compute_regularization.l1_ratio", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_compute_regularization/regularization", + "name": "regularization", + "qname": "sklearn.decomposition._nmf._compute_regularization.regularization", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute L1 and L2 regularization coefficients for W and H.", + "docstring": "Compute L1 and L2 regularization coefficients for W and H.", + "code": "def _compute_regularization(alpha, l1_ratio, regularization):\n \"\"\"Compute L1 and L2 regularization coefficients for W and H.\"\"\"\n alpha_H = 0.\n alpha_W = 0.\n if regularization in ('both', 'components'):\n alpha_H = float(alpha)\n if regularization in ('both', 'transformation'):\n alpha_W = float(alpha)\n\n l1_reg_W = alpha_W * l1_ratio\n l1_reg_H = alpha_H * l1_ratio\n l2_reg_W = alpha_W * (1. - l1_ratio)\n l2_reg_H = alpha_H * (1. - l1_ratio)\n return l1_reg_W, l1_reg_H, l2_reg_W, l2_reg_H" + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_coordinate_descent", + "name": "_fit_coordinate_descent", + "qname": "sklearn.decomposition._nmf._fit_coordinate_descent", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_coordinate_descent/X", + "name": "X", + "qname": "sklearn.decomposition._nmf._fit_coordinate_descent.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Constant matrix." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_coordinate_descent/W", + "name": "W", + "qname": "sklearn.decomposition._nmf._fit_coordinate_descent.W", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_components)", + "default_value": "", + "description": "Initial guess for the solution." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_components)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_coordinate_descent/H", + "name": "H", + "qname": "sklearn.decomposition._nmf._fit_coordinate_descent.H", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components, n_features)", + "default_value": "", + "description": "Initial guess for the solution." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_coordinate_descent/tol", + "name": "tol", + "qname": "sklearn.decomposition._nmf._fit_coordinate_descent.tol", + "default_value": "0.0001", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Tolerance of the stopping condition." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_coordinate_descent/max_iter", + "name": "max_iter", + "qname": "sklearn.decomposition._nmf._fit_coordinate_descent.max_iter", + "default_value": "200", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "200", + "description": "Maximum number of iterations before timing out." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_coordinate_descent/l1_reg_W", + "name": "l1_reg_W", + "qname": "sklearn.decomposition._nmf._fit_coordinate_descent.l1_reg_W", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.", + "description": "L1 regularization parameter for W." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_coordinate_descent/l1_reg_H", + "name": "l1_reg_H", + "qname": "sklearn.decomposition._nmf._fit_coordinate_descent.l1_reg_H", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.", + "description": "L1 regularization parameter for H." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_coordinate_descent/l2_reg_W", + "name": "l2_reg_W", + "qname": "sklearn.decomposition._nmf._fit_coordinate_descent.l2_reg_W", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.", + "description": "L2 regularization parameter for W." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_coordinate_descent/l2_reg_H", + "name": "l2_reg_H", + "qname": "sklearn.decomposition._nmf._fit_coordinate_descent.l2_reg_H", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.", + "description": "L2 regularization parameter for H." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_coordinate_descent/update_H", + "name": "update_H", + "qname": "sklearn.decomposition._nmf._fit_coordinate_descent.update_H", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Set to True, both W and H will be estimated from initial guesses.\nSet to False, only W will be estimated." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_coordinate_descent/verbose", + "name": "verbose", + "qname": "sklearn.decomposition._nmf._fit_coordinate_descent.verbose", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "The verbosity level." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_coordinate_descent/shuffle", + "name": "shuffle", + "qname": "sklearn.decomposition._nmf._fit_coordinate_descent.shuffle", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If true, randomize the order of coordinates in the CD solver." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_coordinate_descent/random_state", + "name": "random_state", + "qname": "sklearn.decomposition._nmf._fit_coordinate_descent.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Used to randomize the coordinates in the CD solver, when\n``shuffle`` is set to ``True``. Pass an int for reproducible\nresults across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute Non-negative Matrix Factorization (NMF) with Coordinate Descent\n\nThe objective function is minimized with an alternating minimization of W\nand H. Each minimization is done with a cyclic (up to a permutation of the\nfeatures) Coordinate Descent.", + "docstring": "Compute Non-negative Matrix Factorization (NMF) with Coordinate Descent\n\nThe objective function is minimized with an alternating minimization of W\nand H. Each minimization is done with a cyclic (up to a permutation of the\nfeatures) Coordinate Descent.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Constant matrix.\n\nW : array-like of shape (n_samples, n_components)\n Initial guess for the solution.\n\nH : array-like of shape (n_components, n_features)\n Initial guess for the solution.\n\ntol : float, default=1e-4\n Tolerance of the stopping condition.\n\nmax_iter : int, default=200\n Maximum number of iterations before timing out.\n\nl1_reg_W : float, default=0.\n L1 regularization parameter for W.\n\nl1_reg_H : float, default=0.\n L1 regularization parameter for H.\n\nl2_reg_W : float, default=0.\n L2 regularization parameter for W.\n\nl2_reg_H : float, default=0.\n L2 regularization parameter for H.\n\nupdate_H : bool, default=True\n Set to True, both W and H will be estimated from initial guesses.\n Set to False, only W will be estimated.\n\nverbose : int, default=0\n The verbosity level.\n\nshuffle : bool, default=False\n If true, randomize the order of coordinates in the CD solver.\n\nrandom_state : int, RandomState instance or None, default=None\n Used to randomize the coordinates in the CD solver, when\n ``shuffle`` is set to ``True``. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n Solution to the non-negative least squares problem.\n\nH : ndarray of shape (n_components, n_features)\n Solution to the non-negative least squares problem.\n\nn_iter : int\n The number of iterations done by the algorithm.\n\nReferences\n----------\nCichocki, Andrzej, and Phan, Anh-Huy. \"Fast local algorithms for\nlarge scale nonnegative matrix and tensor factorizations.\"\nIEICE transactions on fundamentals of electronics, communications and\ncomputer sciences 92.3: 708-721, 2009.", + "code": "def _fit_coordinate_descent(X, W, H, tol=1e-4, max_iter=200, l1_reg_W=0,\n l1_reg_H=0, l2_reg_W=0, l2_reg_H=0, update_H=True,\n verbose=0, shuffle=False, random_state=None):\n \"\"\"Compute Non-negative Matrix Factorization (NMF) with Coordinate Descent\n\n The objective function is minimized with an alternating minimization of W\n and H. Each minimization is done with a cyclic (up to a permutation of the\n features) Coordinate Descent.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Constant matrix.\n\n W : array-like of shape (n_samples, n_components)\n Initial guess for the solution.\n\n H : array-like of shape (n_components, n_features)\n Initial guess for the solution.\n\n tol : float, default=1e-4\n Tolerance of the stopping condition.\n\n max_iter : int, default=200\n Maximum number of iterations before timing out.\n\n l1_reg_W : float, default=0.\n L1 regularization parameter for W.\n\n l1_reg_H : float, default=0.\n L1 regularization parameter for H.\n\n l2_reg_W : float, default=0.\n L2 regularization parameter for W.\n\n l2_reg_H : float, default=0.\n L2 regularization parameter for H.\n\n update_H : bool, default=True\n Set to True, both W and H will be estimated from initial guesses.\n Set to False, only W will be estimated.\n\n verbose : int, default=0\n The verbosity level.\n\n shuffle : bool, default=False\n If true, randomize the order of coordinates in the CD solver.\n\n random_state : int, RandomState instance or None, default=None\n Used to randomize the coordinates in the CD solver, when\n ``shuffle`` is set to ``True``. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n W : ndarray of shape (n_samples, n_components)\n Solution to the non-negative least squares problem.\n\n H : ndarray of shape (n_components, n_features)\n Solution to the non-negative least squares problem.\n\n n_iter : int\n The number of iterations done by the algorithm.\n\n References\n ----------\n Cichocki, Andrzej, and Phan, Anh-Huy. \"Fast local algorithms for\n large scale nonnegative matrix and tensor factorizations.\"\n IEICE transactions on fundamentals of electronics, communications and\n computer sciences 92.3: 708-721, 2009.\n \"\"\"\n # so W and Ht are both in C order in memory\n Ht = check_array(H.T, order='C')\n X = check_array(X, accept_sparse='csr')\n\n rng = check_random_state(random_state)\n\n for n_iter in range(1, max_iter + 1):\n violation = 0.\n\n # Update W\n violation += _update_coordinate_descent(X, W, Ht, l1_reg_W,\n l2_reg_W, shuffle, rng)\n # Update H\n if update_H:\n violation += _update_coordinate_descent(X.T, Ht, W, l1_reg_H,\n l2_reg_H, shuffle, rng)\n\n if n_iter == 1:\n violation_init = violation\n\n if violation_init == 0:\n break\n\n if verbose:\n print(\"violation:\", violation / violation_init)\n\n if violation / violation_init <= tol:\n if verbose:\n print(\"Converged at iteration\", n_iter + 1)\n break\n\n return W, Ht.T, n_iter" + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_multiplicative_update", + "name": "_fit_multiplicative_update", + "qname": "sklearn.decomposition._nmf._fit_multiplicative_update", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_multiplicative_update/X", + "name": "X", + "qname": "sklearn.decomposition._nmf._fit_multiplicative_update.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Constant input matrix." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_multiplicative_update/W", + "name": "W", + "qname": "sklearn.decomposition._nmf._fit_multiplicative_update.W", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_components)", + "default_value": "", + "description": "Initial guess for the solution." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_components)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_multiplicative_update/H", + "name": "H", + "qname": "sklearn.decomposition._nmf._fit_multiplicative_update.H", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components, n_features)", + "default_value": "", + "description": "Initial guess for the solution." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_multiplicative_update/beta_loss", + "name": "beta_loss", + "qname": "sklearn.decomposition._nmf._fit_multiplicative_update.beta_loss", + "default_value": "'frobenius'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float or {'frobenius', 'kullback-leibler', 'itakura-saito'}", + "default_value": "'frobenius'", + "description": "String must be in {'frobenius', 'kullback-leibler', 'itakura-saito'}.\nBeta divergence to be minimized, measuring the distance between X\nand the dot product WH. Note that values different from 'frobenius'\n(or 2) and 'kullback-leibler' (or 1) lead to significantly slower\nfits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\nmatrix X cannot contain zeros." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["kullback-leibler", "frobenius", "itakura-saito"] + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_multiplicative_update/max_iter", + "name": "max_iter", + "qname": "sklearn.decomposition._nmf._fit_multiplicative_update.max_iter", + "default_value": "200", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "200", + "description": "Number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_multiplicative_update/tol", + "name": "tol", + "qname": "sklearn.decomposition._nmf._fit_multiplicative_update.tol", + "default_value": "0.0001", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Tolerance of the stopping condition." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_multiplicative_update/l1_reg_W", + "name": "l1_reg_W", + "qname": "sklearn.decomposition._nmf._fit_multiplicative_update.l1_reg_W", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.", + "description": "L1 regularization parameter for W." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_multiplicative_update/l1_reg_H", + "name": "l1_reg_H", + "qname": "sklearn.decomposition._nmf._fit_multiplicative_update.l1_reg_H", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.", + "description": "L1 regularization parameter for H." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_multiplicative_update/l2_reg_W", + "name": "l2_reg_W", + "qname": "sklearn.decomposition._nmf._fit_multiplicative_update.l2_reg_W", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.", + "description": "L2 regularization parameter for W." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_multiplicative_update/l2_reg_H", + "name": "l2_reg_H", + "qname": "sklearn.decomposition._nmf._fit_multiplicative_update.l2_reg_H", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.", + "description": "L2 regularization parameter for H." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_multiplicative_update/update_H", + "name": "update_H", + "qname": "sklearn.decomposition._nmf._fit_multiplicative_update.update_H", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Set to True, both W and H will be estimated from initial guesses.\nSet to False, only W will be estimated." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_fit_multiplicative_update/verbose", + "name": "verbose", + "qname": "sklearn.decomposition._nmf._fit_multiplicative_update.verbose", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "The verbosity level." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute Non-negative Matrix Factorization with Multiplicative Update.\n\nThe objective function is _beta_divergence(X, WH) and is minimized with an\nalternating minimization of W and H. Each minimization is done with a\nMultiplicative Update.", + "docstring": "Compute Non-negative Matrix Factorization with Multiplicative Update.\n\nThe objective function is _beta_divergence(X, WH) and is minimized with an\nalternating minimization of W and H. Each minimization is done with a\nMultiplicative Update.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Constant input matrix.\n\nW : array-like of shape (n_samples, n_components)\n Initial guess for the solution.\n\nH : array-like of shape (n_components, n_features)\n Initial guess for the solution.\n\nbeta_loss : float or {'frobenius', 'kullback-leibler', 'itakura-saito'}, default='frobenius'\n String must be in {'frobenius', 'kullback-leibler', 'itakura-saito'}.\n Beta divergence to be minimized, measuring the distance between X\n and the dot product WH. Note that values different from 'frobenius'\n (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\n matrix X cannot contain zeros.\n\nmax_iter : int, default=200\n Number of iterations.\n\ntol : float, default=1e-4\n Tolerance of the stopping condition.\n\nl1_reg_W : float, default=0.\n L1 regularization parameter for W.\n\nl1_reg_H : float, default=0.\n L1 regularization parameter for H.\n\nl2_reg_W : float, default=0.\n L2 regularization parameter for W.\n\nl2_reg_H : float, default=0.\n L2 regularization parameter for H.\n\nupdate_H : bool, default=True\n Set to True, both W and H will be estimated from initial guesses.\n Set to False, only W will be estimated.\n\nverbose : int, default=0\n The verbosity level.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n Solution to the non-negative least squares problem.\n\nH : ndarray of shape (n_components, n_features)\n Solution to the non-negative least squares problem.\n\nn_iter : int\n The number of iterations done by the algorithm.\n\nReferences\n----------\nFevotte, C., & Idier, J. (2011). Algorithms for nonnegative matrix\nfactorization with the beta-divergence. Neural Computation, 23(9).", + "code": "def _fit_multiplicative_update(X, W, H, beta_loss='frobenius',\n max_iter=200, tol=1e-4,\n l1_reg_W=0, l1_reg_H=0, l2_reg_W=0, l2_reg_H=0,\n update_H=True, verbose=0):\n \"\"\"Compute Non-negative Matrix Factorization with Multiplicative Update.\n\n The objective function is _beta_divergence(X, WH) and is minimized with an\n alternating minimization of W and H. Each minimization is done with a\n Multiplicative Update.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Constant input matrix.\n\n W : array-like of shape (n_samples, n_components)\n Initial guess for the solution.\n\n H : array-like of shape (n_components, n_features)\n Initial guess for the solution.\n\n beta_loss : float or {'frobenius', 'kullback-leibler', \\\n 'itakura-saito'}, default='frobenius'\n String must be in {'frobenius', 'kullback-leibler', 'itakura-saito'}.\n Beta divergence to be minimized, measuring the distance between X\n and the dot product WH. Note that values different from 'frobenius'\n (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\n matrix X cannot contain zeros.\n\n max_iter : int, default=200\n Number of iterations.\n\n tol : float, default=1e-4\n Tolerance of the stopping condition.\n\n l1_reg_W : float, default=0.\n L1 regularization parameter for W.\n\n l1_reg_H : float, default=0.\n L1 regularization parameter for H.\n\n l2_reg_W : float, default=0.\n L2 regularization parameter for W.\n\n l2_reg_H : float, default=0.\n L2 regularization parameter for H.\n\n update_H : bool, default=True\n Set to True, both W and H will be estimated from initial guesses.\n Set to False, only W will be estimated.\n\n verbose : int, default=0\n The verbosity level.\n\n Returns\n -------\n W : ndarray of shape (n_samples, n_components)\n Solution to the non-negative least squares problem.\n\n H : ndarray of shape (n_components, n_features)\n Solution to the non-negative least squares problem.\n\n n_iter : int\n The number of iterations done by the algorithm.\n\n References\n ----------\n Fevotte, C., & Idier, J. (2011). Algorithms for nonnegative matrix\n factorization with the beta-divergence. Neural Computation, 23(9).\n \"\"\"\n start_time = time.time()\n\n beta_loss = _beta_loss_to_float(beta_loss)\n\n # gamma for Maximization-Minimization (MM) algorithm [Fevotte 2011]\n if beta_loss < 1:\n gamma = 1. / (2. - beta_loss)\n elif beta_loss > 2:\n gamma = 1. / (beta_loss - 1.)\n else:\n gamma = 1.\n\n # used for the convergence criterion\n error_at_init = _beta_divergence(X, W, H, beta_loss, square_root=True)\n previous_error = error_at_init\n\n H_sum, HHt, XHt = None, None, None\n for n_iter in range(1, max_iter + 1):\n # update W\n # H_sum, HHt and XHt are saved and reused if not update_H\n delta_W, H_sum, HHt, XHt = _multiplicative_update_w(\n X, W, H, beta_loss, l1_reg_W, l2_reg_W, gamma,\n H_sum, HHt, XHt, update_H)\n W *= delta_W\n\n # necessary for stability with beta_loss < 1\n if beta_loss < 1:\n W[W < np.finfo(np.float64).eps] = 0.\n\n # update H\n if update_H:\n delta_H = _multiplicative_update_h(X, W, H, beta_loss, l1_reg_H,\n l2_reg_H, gamma)\n H *= delta_H\n\n # These values will be recomputed since H changed\n H_sum, HHt, XHt = None, None, None\n\n # necessary for stability with beta_loss < 1\n if beta_loss <= 1:\n H[H < np.finfo(np.float64).eps] = 0.\n\n # test convergence criterion every 10 iterations\n if tol > 0 and n_iter % 10 == 0:\n error = _beta_divergence(X, W, H, beta_loss, square_root=True)\n\n if verbose:\n iter_time = time.time()\n print(\"Epoch %02d reached after %.3f seconds, error: %f\" %\n (n_iter, iter_time - start_time, error))\n\n if (previous_error - error) / error_at_init < tol:\n break\n previous_error = error\n\n # do not print if we have already printed in the convergence test\n if verbose and (tol == 0 or n_iter % 10 != 0):\n end_time = time.time()\n print(\"Epoch %02d reached after %.3f seconds.\" %\n (n_iter, end_time - start_time))\n\n return W, H, n_iter" + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_initialize_nmf", + "name": "_initialize_nmf", + "qname": "sklearn.decomposition._nmf._initialize_nmf", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._nmf/_initialize_nmf/X", + "name": "X", + "qname": "sklearn.decomposition._nmf._initialize_nmf.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data matrix to be decomposed." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_initialize_nmf/n_components", + "name": "n_components", + "qname": "sklearn.decomposition._nmf._initialize_nmf.n_components", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The number of components desired in the approximation." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_initialize_nmf/init", + "name": "init", + "qname": "sklearn.decomposition._nmf._initialize_nmf.init", + "default_value": "'warn'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'random', 'nndsvd', 'nndsvda', 'nndsvdar'}", + "default_value": "None", + "description": "Method used to initialize the procedure.\nDefault: None.\nValid options:\n\n- None: 'nndsvd' if n_components <= min(n_samples, n_features),\n otherwise 'random'.\n\n- 'random': non-negative random matrices, scaled with:\n sqrt(X.mean() / n_components)\n\n- 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)\n initialization (better for sparseness)\n\n- 'nndsvda': NNDSVD with zeros filled with the average of X\n (better when sparsity is not desired)\n\n- 'nndsvdar': NNDSVD with zeros filled with small random values\n (generally faster, less accurate alternative to NNDSVDa\n for when sparsity is not desired)\n\n- 'custom': use custom matrices W and H" + }, + "type": { + "kind": "EnumType", + "values": ["nndsvdar", "random", "nndsvda", "nndsvd"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_initialize_nmf/eps", + "name": "eps", + "qname": "sklearn.decomposition._nmf._initialize_nmf.eps", + "default_value": "1e-06", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-6", + "description": "Truncate all values less then this in output to zero." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_initialize_nmf/random_state", + "name": "random_state", + "qname": "sklearn.decomposition._nmf._initialize_nmf.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Used when ``init`` == 'nndsvdar' or 'random'. Pass an int for\nreproducible results across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Algorithms for NMF initialization.\n\nComputes an initial guess for the non-negative\nrank k matrix approximation for X: X = WH.", + "docstring": "Algorithms for NMF initialization.\n\nComputes an initial guess for the non-negative\nrank k matrix approximation for X: X = WH.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix to be decomposed.\n\nn_components : int\n The number of components desired in the approximation.\n\ninit : {'random', 'nndsvd', 'nndsvda', 'nndsvdar'}, default=None\n Method used to initialize the procedure.\n Default: None.\n Valid options:\n\n - None: 'nndsvd' if n_components <= min(n_samples, n_features),\n otherwise 'random'.\n\n - 'random': non-negative random matrices, scaled with:\n sqrt(X.mean() / n_components)\n\n - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)\n initialization (better for sparseness)\n\n - 'nndsvda': NNDSVD with zeros filled with the average of X\n (better when sparsity is not desired)\n\n - 'nndsvdar': NNDSVD with zeros filled with small random values\n (generally faster, less accurate alternative to NNDSVDa\n for when sparsity is not desired)\n\n - 'custom': use custom matrices W and H\n\neps : float, default=1e-6\n Truncate all values less then this in output to zero.\n\nrandom_state : int, RandomState instance or None, default=None\n Used when ``init`` == 'nndsvdar' or 'random'. Pass an int for\n reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nW : array-like of shape (n_samples, n_components)\n Initial guesses for solving X ~= WH.\n\nH : array-like of shape (n_components, n_features)\n Initial guesses for solving X ~= WH.\n\nReferences\n----------\nC. Boutsidis, E. Gallopoulos: SVD based initialization: A head start for\nnonnegative matrix factorization - Pattern Recognition, 2008\nhttp://tinyurl.com/nndsvd", + "code": "def _initialize_nmf(X, n_components, init='warn', eps=1e-6,\n random_state=None):\n \"\"\"Algorithms for NMF initialization.\n\n Computes an initial guess for the non-negative\n rank k matrix approximation for X: X = WH.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data matrix to be decomposed.\n\n n_components : int\n The number of components desired in the approximation.\n\n init : {'random', 'nndsvd', 'nndsvda', 'nndsvdar'}, default=None\n Method used to initialize the procedure.\n Default: None.\n Valid options:\n\n - None: 'nndsvd' if n_components <= min(n_samples, n_features),\n otherwise 'random'.\n\n - 'random': non-negative random matrices, scaled with:\n sqrt(X.mean() / n_components)\n\n - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)\n initialization (better for sparseness)\n\n - 'nndsvda': NNDSVD with zeros filled with the average of X\n (better when sparsity is not desired)\n\n - 'nndsvdar': NNDSVD with zeros filled with small random values\n (generally faster, less accurate alternative to NNDSVDa\n for when sparsity is not desired)\n\n - 'custom': use custom matrices W and H\n\n eps : float, default=1e-6\n Truncate all values less then this in output to zero.\n\n random_state : int, RandomState instance or None, default=None\n Used when ``init`` == 'nndsvdar' or 'random'. Pass an int for\n reproducible results across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n W : array-like of shape (n_samples, n_components)\n Initial guesses for solving X ~= WH.\n\n H : array-like of shape (n_components, n_features)\n Initial guesses for solving X ~= WH.\n\n References\n ----------\n C. Boutsidis, E. Gallopoulos: SVD based initialization: A head start for\n nonnegative matrix factorization - Pattern Recognition, 2008\n http://tinyurl.com/nndsvd\n \"\"\"\n if init == 'warn':\n warnings.warn((\"The 'init' value, when 'init=None' and \"\n \"n_components is less than n_samples and \"\n \"n_features, will be changed from 'nndsvd' to \"\n \"'nndsvda' in 1.1 (renaming of 0.26).\"), FutureWarning)\n init = None\n\n check_non_negative(X, \"NMF initialization\")\n n_samples, n_features = X.shape\n\n if (init is not None and init != 'random'\n and n_components > min(n_samples, n_features)):\n raise ValueError(\"init = '{}' can only be used when \"\n \"n_components <= min(n_samples, n_features)\"\n .format(init))\n\n if init is None:\n if n_components <= min(n_samples, n_features):\n init = 'nndsvd'\n else:\n init = 'random'\n\n # Random initialization\n if init == 'random':\n avg = np.sqrt(X.mean() / n_components)\n rng = check_random_state(random_state)\n H = avg * rng.randn(n_components, n_features).astype(X.dtype,\n copy=False)\n W = avg * rng.randn(n_samples, n_components).astype(X.dtype,\n copy=False)\n np.abs(H, out=H)\n np.abs(W, out=W)\n return W, H\n\n # NNDSVD initialization\n U, S, V = randomized_svd(X, n_components, random_state=random_state)\n W = np.zeros_like(U)\n H = np.zeros_like(V)\n\n # The leading singular triplet is non-negative\n # so it can be used as is for initialization.\n W[:, 0] = np.sqrt(S[0]) * np.abs(U[:, 0])\n H[0, :] = np.sqrt(S[0]) * np.abs(V[0, :])\n\n for j in range(1, n_components):\n x, y = U[:, j], V[j, :]\n\n # extract positive and negative parts of column vectors\n x_p, y_p = np.maximum(x, 0), np.maximum(y, 0)\n x_n, y_n = np.abs(np.minimum(x, 0)), np.abs(np.minimum(y, 0))\n\n # and their norms\n x_p_nrm, y_p_nrm = norm(x_p), norm(y_p)\n x_n_nrm, y_n_nrm = norm(x_n), norm(y_n)\n\n m_p, m_n = x_p_nrm * y_p_nrm, x_n_nrm * y_n_nrm\n\n # choose update\n if m_p > m_n:\n u = x_p / x_p_nrm\n v = y_p / y_p_nrm\n sigma = m_p\n else:\n u = x_n / x_n_nrm\n v = y_n / y_n_nrm\n sigma = m_n\n\n lbd = np.sqrt(S[j] * sigma)\n W[:, j] = lbd * u\n H[j, :] = lbd * v\n\n W[W < eps] = 0\n H[H < eps] = 0\n\n if init == \"nndsvd\":\n pass\n elif init == \"nndsvda\":\n avg = X.mean()\n W[W == 0] = avg\n H[H == 0] = avg\n elif init == \"nndsvdar\":\n rng = check_random_state(random_state)\n avg = X.mean()\n W[W == 0] = abs(avg * rng.randn(len(W[W == 0])) / 100)\n H[H == 0] = abs(avg * rng.randn(len(H[H == 0])) / 100)\n else:\n raise ValueError(\n 'Invalid init parameter: got %r instead of one of %r' %\n (init, (None, 'random', 'nndsvd', 'nndsvda', 'nndsvdar')))\n\n return W, H" + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_multiplicative_update_h", + "name": "_multiplicative_update_h", + "qname": "sklearn.decomposition._nmf._multiplicative_update_h", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._nmf/_multiplicative_update_h/X", + "name": "X", + "qname": "sklearn.decomposition._nmf._multiplicative_update_h.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_multiplicative_update_h/W", + "name": "W", + "qname": "sklearn.decomposition._nmf._multiplicative_update_h.W", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_multiplicative_update_h/H", + "name": "H", + "qname": "sklearn.decomposition._nmf._multiplicative_update_h.H", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_multiplicative_update_h/beta_loss", + "name": "beta_loss", + "qname": "sklearn.decomposition._nmf._multiplicative_update_h.beta_loss", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_multiplicative_update_h/l1_reg_H", + "name": "l1_reg_H", + "qname": "sklearn.decomposition._nmf._multiplicative_update_h.l1_reg_H", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_multiplicative_update_h/l2_reg_H", + "name": "l2_reg_H", + "qname": "sklearn.decomposition._nmf._multiplicative_update_h.l2_reg_H", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_multiplicative_update_h/gamma", + "name": "gamma", + "qname": "sklearn.decomposition._nmf._multiplicative_update_h.gamma", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Update H in Multiplicative Update NMF.", + "docstring": "Update H in Multiplicative Update NMF.", + "code": "def _multiplicative_update_h(X, W, H, beta_loss, l1_reg_H, l2_reg_H, gamma):\n \"\"\"Update H in Multiplicative Update NMF.\"\"\"\n if beta_loss == 2:\n numerator = safe_sparse_dot(W.T, X)\n denominator = np.linalg.multi_dot([W.T, W, H])\n\n else:\n # Numerator\n WH_safe_X = _special_sparse_dot(W, H, X)\n if sp.issparse(X):\n WH_safe_X_data = WH_safe_X.data\n X_data = X.data\n else:\n WH_safe_X_data = WH_safe_X\n X_data = X\n # copy used in the Denominator\n WH = WH_safe_X.copy()\n if beta_loss - 1. < 0:\n WH[WH == 0] = EPSILON\n\n # to avoid division by zero\n if beta_loss - 2. < 0:\n WH_safe_X_data[WH_safe_X_data == 0] = EPSILON\n\n if beta_loss == 1:\n np.divide(X_data, WH_safe_X_data, out=WH_safe_X_data)\n elif beta_loss == 0:\n # speeds up computation time\n # refer to /numpy/numpy/issues/9363\n WH_safe_X_data **= -1\n WH_safe_X_data **= 2\n # element-wise multiplication\n WH_safe_X_data *= X_data\n else:\n WH_safe_X_data **= beta_loss - 2\n # element-wise multiplication\n WH_safe_X_data *= X_data\n\n # here numerator = dot(W.T, (dot(W, H) ** (beta_loss - 2)) * X)\n numerator = safe_sparse_dot(W.T, WH_safe_X)\n\n # Denominator\n if beta_loss == 1:\n W_sum = np.sum(W, axis=0) # shape(n_components, )\n W_sum[W_sum == 0] = 1.\n denominator = W_sum[:, np.newaxis]\n\n # beta_loss not in (1, 2)\n else:\n # computation of WtWH = dot(W.T, dot(W, H) ** beta_loss - 1)\n if sp.issparse(X):\n # memory efficient computation\n # (compute column by column, avoiding the dense matrix WH)\n WtWH = np.empty(H.shape)\n for i in range(X.shape[1]):\n WHi = np.dot(W, H[:, i])\n if beta_loss - 1 < 0:\n WHi[WHi == 0] = EPSILON\n WHi **= beta_loss - 1\n WtWH[:, i] = np.dot(W.T, WHi)\n else:\n WH **= beta_loss - 1\n WtWH = np.dot(W.T, WH)\n denominator = WtWH\n\n # Add L1 and L2 regularization\n if l1_reg_H > 0:\n denominator += l1_reg_H\n if l2_reg_H > 0:\n denominator = denominator + l2_reg_H * H\n denominator[denominator == 0] = EPSILON\n\n numerator /= denominator\n delta_H = numerator\n\n # gamma is in ]0, 1]\n if gamma != 1:\n delta_H **= gamma\n\n return delta_H" + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_multiplicative_update_w", + "name": "_multiplicative_update_w", + "qname": "sklearn.decomposition._nmf._multiplicative_update_w", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._nmf/_multiplicative_update_w/X", + "name": "X", + "qname": "sklearn.decomposition._nmf._multiplicative_update_w.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_multiplicative_update_w/W", + "name": "W", + "qname": "sklearn.decomposition._nmf._multiplicative_update_w.W", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_multiplicative_update_w/H", + "name": "H", + "qname": "sklearn.decomposition._nmf._multiplicative_update_w.H", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_multiplicative_update_w/beta_loss", + "name": "beta_loss", + "qname": "sklearn.decomposition._nmf._multiplicative_update_w.beta_loss", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_multiplicative_update_w/l1_reg_W", + "name": "l1_reg_W", + "qname": "sklearn.decomposition._nmf._multiplicative_update_w.l1_reg_W", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_multiplicative_update_w/l2_reg_W", + "name": "l2_reg_W", + "qname": "sklearn.decomposition._nmf._multiplicative_update_w.l2_reg_W", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_multiplicative_update_w/gamma", + "name": "gamma", + "qname": "sklearn.decomposition._nmf._multiplicative_update_w.gamma", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_multiplicative_update_w/H_sum", + "name": "H_sum", + "qname": "sklearn.decomposition._nmf._multiplicative_update_w.H_sum", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_multiplicative_update_w/HHt", + "name": "HHt", + "qname": "sklearn.decomposition._nmf._multiplicative_update_w.HHt", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_multiplicative_update_w/XHt", + "name": "XHt", + "qname": "sklearn.decomposition._nmf._multiplicative_update_w.XHt", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_multiplicative_update_w/update_H", + "name": "update_H", + "qname": "sklearn.decomposition._nmf._multiplicative_update_w.update_H", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Update W in Multiplicative Update NMF.", + "docstring": "Update W in Multiplicative Update NMF.", + "code": "def _multiplicative_update_w(X, W, H, beta_loss, l1_reg_W, l2_reg_W, gamma,\n H_sum=None, HHt=None, XHt=None, update_H=True):\n \"\"\"Update W in Multiplicative Update NMF.\"\"\"\n if beta_loss == 2:\n # Numerator\n if XHt is None:\n XHt = safe_sparse_dot(X, H.T)\n if update_H:\n # avoid a copy of XHt, which will be re-computed (update_H=True)\n numerator = XHt\n else:\n # preserve the XHt, which is not re-computed (update_H=False)\n numerator = XHt.copy()\n\n # Denominator\n if HHt is None:\n HHt = np.dot(H, H.T)\n denominator = np.dot(W, HHt)\n\n else:\n # Numerator\n # if X is sparse, compute WH only where X is non zero\n WH_safe_X = _special_sparse_dot(W, H, X)\n if sp.issparse(X):\n WH_safe_X_data = WH_safe_X.data\n X_data = X.data\n else:\n WH_safe_X_data = WH_safe_X\n X_data = X\n # copy used in the Denominator\n WH = WH_safe_X.copy()\n if beta_loss - 1. < 0:\n WH[WH == 0] = EPSILON\n\n # to avoid taking a negative power of zero\n if beta_loss - 2. < 0:\n WH_safe_X_data[WH_safe_X_data == 0] = EPSILON\n\n if beta_loss == 1:\n np.divide(X_data, WH_safe_X_data, out=WH_safe_X_data)\n elif beta_loss == 0:\n # speeds up computation time\n # refer to /numpy/numpy/issues/9363\n WH_safe_X_data **= -1\n WH_safe_X_data **= 2\n # element-wise multiplication\n WH_safe_X_data *= X_data\n else:\n WH_safe_X_data **= beta_loss - 2\n # element-wise multiplication\n WH_safe_X_data *= X_data\n\n # here numerator = dot(X * (dot(W, H) ** (beta_loss - 2)), H.T)\n numerator = safe_sparse_dot(WH_safe_X, H.T)\n\n # Denominator\n if beta_loss == 1:\n if H_sum is None:\n H_sum = np.sum(H, axis=1) # shape(n_components, )\n denominator = H_sum[np.newaxis, :]\n\n else:\n # computation of WHHt = dot(dot(W, H) ** beta_loss - 1, H.T)\n if sp.issparse(X):\n # memory efficient computation\n # (compute row by row, avoiding the dense matrix WH)\n WHHt = np.empty(W.shape)\n for i in range(X.shape[0]):\n WHi = np.dot(W[i, :], H)\n if beta_loss - 1 < 0:\n WHi[WHi == 0] = EPSILON\n WHi **= beta_loss - 1\n WHHt[i, :] = np.dot(WHi, H.T)\n else:\n WH **= beta_loss - 1\n WHHt = np.dot(WH, H.T)\n denominator = WHHt\n\n # Add L1 and L2 regularization\n if l1_reg_W > 0:\n denominator += l1_reg_W\n if l2_reg_W > 0:\n denominator = denominator + l2_reg_W * W\n denominator[denominator == 0] = EPSILON\n\n numerator /= denominator\n delta_W = numerator\n\n # gamma is in ]0, 1]\n if gamma != 1:\n delta_W **= gamma\n\n return delta_W, H_sum, HHt, XHt" + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_special_sparse_dot", + "name": "_special_sparse_dot", + "qname": "sklearn.decomposition._nmf._special_sparse_dot", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._nmf/_special_sparse_dot/W", + "name": "W", + "qname": "sklearn.decomposition._nmf._special_sparse_dot.W", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_special_sparse_dot/H", + "name": "H", + "qname": "sklearn.decomposition._nmf._special_sparse_dot.H", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_special_sparse_dot/X", + "name": "X", + "qname": "sklearn.decomposition._nmf._special_sparse_dot.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes np.dot(W, H), only where X is non zero.", + "docstring": "Computes np.dot(W, H), only where X is non zero.", + "code": "def _special_sparse_dot(W, H, X):\n \"\"\"Computes np.dot(W, H), only where X is non zero.\"\"\"\n if sp.issparse(X):\n ii, jj = X.nonzero()\n n_vals = ii.shape[0]\n dot_vals = np.empty(n_vals)\n n_components = W.shape[1]\n\n batch_size = max(n_components, n_vals // n_components)\n for start in range(0, n_vals, batch_size):\n batch = slice(start, start + batch_size)\n dot_vals[batch] = np.multiply(W[ii[batch], :],\n H.T[jj[batch], :]).sum(axis=1)\n\n WH = sp.coo_matrix((dot_vals, (ii, jj)), shape=X.shape)\n return WH.tocsr()\n else:\n return np.dot(W, H)" + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_update_coordinate_descent", + "name": "_update_coordinate_descent", + "qname": "sklearn.decomposition._nmf._update_coordinate_descent", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._nmf/_update_coordinate_descent/X", + "name": "X", + "qname": "sklearn.decomposition._nmf._update_coordinate_descent.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_update_coordinate_descent/W", + "name": "W", + "qname": "sklearn.decomposition._nmf._update_coordinate_descent.W", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_update_coordinate_descent/Ht", + "name": "Ht", + "qname": "sklearn.decomposition._nmf._update_coordinate_descent.Ht", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_update_coordinate_descent/l1_reg", + "name": "l1_reg", + "qname": "sklearn.decomposition._nmf._update_coordinate_descent.l1_reg", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_update_coordinate_descent/l2_reg", + "name": "l2_reg", + "qname": "sklearn.decomposition._nmf._update_coordinate_descent.l2_reg", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_update_coordinate_descent/shuffle", + "name": "shuffle", + "qname": "sklearn.decomposition._nmf._update_coordinate_descent.shuffle", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/_update_coordinate_descent/random_state", + "name": "random_state", + "qname": "sklearn.decomposition._nmf._update_coordinate_descent.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Helper function for _fit_coordinate_descent.\n\nUpdate W to minimize the objective function, iterating once over all\ncoordinates. By symmetry, to update H, one can call\n_update_coordinate_descent(X.T, Ht, W, ...).", + "docstring": "Helper function for _fit_coordinate_descent.\n\nUpdate W to minimize the objective function, iterating once over all\ncoordinates. By symmetry, to update H, one can call\n_update_coordinate_descent(X.T, Ht, W, ...).", + "code": "def _update_coordinate_descent(X, W, Ht, l1_reg, l2_reg, shuffle,\n random_state):\n \"\"\"Helper function for _fit_coordinate_descent.\n\n Update W to minimize the objective function, iterating once over all\n coordinates. By symmetry, to update H, one can call\n _update_coordinate_descent(X.T, Ht, W, ...).\n\n \"\"\"\n n_components = Ht.shape[1]\n\n HHt = np.dot(Ht.T, Ht)\n XHt = safe_sparse_dot(X, Ht)\n\n # L2 regularization corresponds to increase of the diagonal of HHt\n if l2_reg != 0.:\n # adds l2_reg only on the diagonal\n HHt.flat[::n_components + 1] += l2_reg\n # L1 regularization corresponds to decrease of each element of XHt\n if l1_reg != 0.:\n XHt -= l1_reg\n\n if shuffle:\n permutation = random_state.permutation(n_components)\n else:\n permutation = np.arange(n_components)\n # The following seems to be required on 64-bit Windows w/ Python 3.5.\n permutation = np.asarray(permutation, dtype=np.intp)\n return _update_cdnmf_fast(W, HHt, XHt, permutation)" + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/non_negative_factorization", + "name": "non_negative_factorization", + "qname": "sklearn.decomposition._nmf.non_negative_factorization", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._nmf/non_negative_factorization/X", + "name": "X", + "qname": "sklearn.decomposition._nmf.non_negative_factorization.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Constant matrix." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/non_negative_factorization/W", + "name": "W", + "qname": "sklearn.decomposition._nmf.non_negative_factorization.W", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_components)", + "default_value": "None", + "description": "If init='custom', it is used as initial guess for the solution." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_components)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/non_negative_factorization/H", + "name": "H", + "qname": "sklearn.decomposition._nmf.non_negative_factorization.H", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components, n_features)", + "default_value": "None", + "description": "If init='custom', it is used as initial guess for the solution.\nIf update_H=False, it is used as a constant, to solve for W only." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/non_negative_factorization/n_components", + "name": "n_components", + "qname": "sklearn.decomposition._nmf.non_negative_factorization.n_components", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of components, if n_components is not set all features\nare kept." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/non_negative_factorization/init", + "name": "init", + "qname": "sklearn.decomposition._nmf.non_negative_factorization.init", + "default_value": "'warn'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}", + "default_value": "None", + "description": "Method used to initialize the procedure.\n\nValid options:\n\n- None: 'nndsvd' if n_components < n_features, otherwise 'random'.\n\n- 'random': non-negative random matrices, scaled with:\n sqrt(X.mean() / n_components)\n\n- 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)\n initialization (better for sparseness)\n\n- 'nndsvda': NNDSVD with zeros filled with the average of X\n (better when sparsity is not desired)\n\n- 'nndsvdar': NNDSVD with zeros filled with small random values\n (generally faster, less accurate alternative to NNDSVDa\n for when sparsity is not desired)\n\n- 'custom': use custom matrices W and H if `update_H=True`. If\n `update_H=False`, then only custom matrix H is used.\n\n.. versionchanged:: 0.23\n The default value of `init` changed from 'random' to None in 0.23." + }, + "type": { + "kind": "EnumType", + "values": ["nndsvd", "nndsvdar", "random", "custom", "nndsvda"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/non_negative_factorization/update_H", + "name": "update_H", + "qname": "sklearn.decomposition._nmf.non_negative_factorization.update_H", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Set to True, both W and H will be estimated from initial guesses.\nSet to False, only W will be estimated." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/non_negative_factorization/solver", + "name": "solver", + "qname": "sklearn.decomposition._nmf.non_negative_factorization.solver", + "default_value": "'cd'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'cd', 'mu'}", + "default_value": "'cd'", + "description": "Numerical solver to use:\n\n- 'cd' is a Coordinate Descent solver that uses Fast Hierarchical\n Alternating Least Squares (Fast HALS).\n\n- 'mu' is a Multiplicative Update solver.\n\n.. versionadded:: 0.17\n Coordinate Descent solver.\n\n.. versionadded:: 0.19\n Multiplicative Update solver." + }, + "type": { + "kind": "EnumType", + "values": ["mu", "cd"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/non_negative_factorization/beta_loss", + "name": "beta_loss", + "qname": "sklearn.decomposition._nmf.non_negative_factorization.beta_loss", + "default_value": "'frobenius'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float or {'frobenius', 'kullback-leibler', 'itakura-saito'}", + "default_value": "'frobenius'", + "description": "Beta divergence to be minimized, measuring the distance between X\nand the dot product WH. Note that values different from 'frobenius'\n(or 2) and 'kullback-leibler' (or 1) lead to significantly slower\nfits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\nmatrix X cannot contain zeros. Used only in 'mu' solver.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["kullback-leibler", "frobenius", "itakura-saito"] + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/non_negative_factorization/tol", + "name": "tol", + "qname": "sklearn.decomposition._nmf.non_negative_factorization.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Tolerance of the stopping condition." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/non_negative_factorization/max_iter", + "name": "max_iter", + "qname": "sklearn.decomposition._nmf.non_negative_factorization.max_iter", + "default_value": "200", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "200", + "description": "Maximum number of iterations before timing out." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/non_negative_factorization/alpha", + "name": "alpha", + "qname": "sklearn.decomposition._nmf.non_negative_factorization.alpha", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.", + "description": "Constant that multiplies the regularization terms." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/non_negative_factorization/l1_ratio", + "name": "l1_ratio", + "qname": "sklearn.decomposition._nmf.non_negative_factorization.l1_ratio", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.", + "description": "The regularization mixing parameter, with 0 <= l1_ratio <= 1.\nFor l1_ratio = 0 the penalty is an elementwise L2 penalty\n(aka Frobenius Norm).\nFor l1_ratio = 1 it is an elementwise L1 penalty.\nFor 0 < l1_ratio < 1, the penalty is a combination of L1 and L2." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/non_negative_factorization/regularization", + "name": "regularization", + "qname": "sklearn.decomposition._nmf.non_negative_factorization.regularization", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'both', 'components', 'transformation'}", + "default_value": "None", + "description": "Select whether the regularization affects the components (H), the\ntransformation (W), both or none of them." + }, + "type": { + "kind": "EnumType", + "values": ["both", "components", "transformation"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/non_negative_factorization/random_state", + "name": "random_state", + "qname": "sklearn.decomposition._nmf.non_negative_factorization.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Used for NMF initialisation (when ``init`` == 'nndsvdar' or\n'random'), and in Coordinate Descent. Pass an int for reproducible\nresults across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/non_negative_factorization/verbose", + "name": "verbose", + "qname": "sklearn.decomposition._nmf.non_negative_factorization.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "The verbosity level." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/non_negative_factorization/shuffle", + "name": "shuffle", + "qname": "sklearn.decomposition._nmf.non_negative_factorization.shuffle", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If true, randomize the order of coordinates in the CD solver." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute Non-negative Matrix Factorization (NMF).\n\nFind two non-negative matrices (W, H) whose product approximates the non-\nnegative matrix X. This factorization can be used for example for\ndimensionality reduction, source separation or topic extraction.\n\nThe objective function is:\n\n .. math::\n\n 0.5 * ||X - WH||_{loss}^2 + alpha * l1_{ratio} * ||vec(W)||_1\n\n + alpha * l1_{ratio} * ||vec(H)||_1\n\n + 0.5 * alpha * (1 - l1_{ratio}) * ||W||_{Fro}^2\n\n + 0.5 * alpha * (1 - l1_{ratio}) * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}^2` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe objective function is minimized with an alternating minimization of W\nand H. If H is given and update_H=False, it solves for W only.", + "docstring": "Compute Non-negative Matrix Factorization (NMF).\n\nFind two non-negative matrices (W, H) whose product approximates the non-\nnegative matrix X. This factorization can be used for example for\ndimensionality reduction, source separation or topic extraction.\n\nThe objective function is:\n\n .. math::\n\n 0.5 * ||X - WH||_{loss}^2 + alpha * l1_{ratio} * ||vec(W)||_1\n\n + alpha * l1_{ratio} * ||vec(H)||_1\n\n + 0.5 * alpha * (1 - l1_{ratio}) * ||W||_{Fro}^2\n\n + 0.5 * alpha * (1 - l1_{ratio}) * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}^2` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe objective function is minimized with an alternating minimization of W\nand H. If H is given and update_H=False, it solves for W only.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Constant matrix.\n\nW : array-like of shape (n_samples, n_components), default=None\n If init='custom', it is used as initial guess for the solution.\n\nH : array-like of shape (n_components, n_features), default=None\n If init='custom', it is used as initial guess for the solution.\n If update_H=False, it is used as a constant, to solve for W only.\n\nn_components : int, default=None\n Number of components, if n_components is not set all features\n are kept.\n\ninit : {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None\n Method used to initialize the procedure.\n\n Valid options:\n\n - None: 'nndsvd' if n_components < n_features, otherwise 'random'.\n\n - 'random': non-negative random matrices, scaled with:\n sqrt(X.mean() / n_components)\n\n - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)\n initialization (better for sparseness)\n\n - 'nndsvda': NNDSVD with zeros filled with the average of X\n (better when sparsity is not desired)\n\n - 'nndsvdar': NNDSVD with zeros filled with small random values\n (generally faster, less accurate alternative to NNDSVDa\n for when sparsity is not desired)\n\n - 'custom': use custom matrices W and H if `update_H=True`. If\n `update_H=False`, then only custom matrix H is used.\n\n .. versionchanged:: 0.23\n The default value of `init` changed from 'random' to None in 0.23.\n\nupdate_H : bool, default=True\n Set to True, both W and H will be estimated from initial guesses.\n Set to False, only W will be estimated.\n\nsolver : {'cd', 'mu'}, default='cd'\n Numerical solver to use:\n\n - 'cd' is a Coordinate Descent solver that uses Fast Hierarchical\n Alternating Least Squares (Fast HALS).\n\n - 'mu' is a Multiplicative Update solver.\n\n .. versionadded:: 0.17\n Coordinate Descent solver.\n\n .. versionadded:: 0.19\n Multiplicative Update solver.\n\nbeta_loss : float or {'frobenius', 'kullback-leibler', 'itakura-saito'}, default='frobenius'\n Beta divergence to be minimized, measuring the distance between X\n and the dot product WH. Note that values different from 'frobenius'\n (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\n matrix X cannot contain zeros. Used only in 'mu' solver.\n\n .. versionadded:: 0.19\n\ntol : float, default=1e-4\n Tolerance of the stopping condition.\n\nmax_iter : int, default=200\n Maximum number of iterations before timing out.\n\nalpha : float, default=0.\n Constant that multiplies the regularization terms.\n\nl1_ratio : float, default=0.\n The regularization mixing parameter, with 0 <= l1_ratio <= 1.\n For l1_ratio = 0 the penalty is an elementwise L2 penalty\n (aka Frobenius Norm).\n For l1_ratio = 1 it is an elementwise L1 penalty.\n For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\nregularization : {'both', 'components', 'transformation'}, default=None\n Select whether the regularization affects the components (H), the\n transformation (W), both or none of them.\n\nrandom_state : int, RandomState instance or None, default=None\n Used for NMF initialisation (when ``init`` == 'nndsvdar' or\n 'random'), and in Coordinate Descent. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\nverbose : int, default=0\n The verbosity level.\n\nshuffle : bool, default=False\n If true, randomize the order of coordinates in the CD solver.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n Solution to the non-negative least squares problem.\n\nH : ndarray of shape (n_components, n_features)\n Solution to the non-negative least squares problem.\n\nn_iter : int\n Actual number of iterations.\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[1,1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])\n>>> from sklearn.decomposition import non_negative_factorization\n>>> W, H, n_iter = non_negative_factorization(X, n_components=2,\n... init='random', random_state=0)\n\nReferences\n----------\nCichocki, Andrzej, and P. H. A. N. Anh-Huy. \"Fast local algorithms for\nlarge scale nonnegative matrix and tensor factorizations.\"\nIEICE transactions on fundamentals of electronics, communications and\ncomputer sciences 92.3: 708-721, 2009.\n\nFevotte, C., & Idier, J. (2011). Algorithms for nonnegative matrix\nfactorization with the beta-divergence. Neural Computation, 23(9).", + "code": "@_deprecate_positional_args\ndef non_negative_factorization(X, W=None, H=None, n_components=None, *,\n init='warn', update_H=True, solver='cd',\n beta_loss='frobenius', tol=1e-4,\n max_iter=200, alpha=0., l1_ratio=0.,\n regularization=None, random_state=None,\n verbose=0, shuffle=False):\n \"\"\"Compute Non-negative Matrix Factorization (NMF).\n\n Find two non-negative matrices (W, H) whose product approximates the non-\n negative matrix X. This factorization can be used for example for\n dimensionality reduction, source separation or topic extraction.\n\n The objective function is:\n\n .. math::\n\n 0.5 * ||X - WH||_{loss}^2 + alpha * l1_{ratio} * ||vec(W)||_1\n\n + alpha * l1_{ratio} * ||vec(H)||_1\n\n + 0.5 * alpha * (1 - l1_{ratio}) * ||W||_{Fro}^2\n\n + 0.5 * alpha * (1 - l1_{ratio}) * ||H||_{Fro}^2\n\n Where:\n\n :math:`||A||_{Fro}^2 = \\\\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n :math:`||vec(A)||_1 = \\\\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\n The generic norm :math:`||X - WH||_{loss}^2` may represent\n the Frobenius norm or another supported beta-divergence loss.\n The choice between options is controlled by the `beta_loss` parameter.\n\n The objective function is minimized with an alternating minimization of W\n and H. If H is given and update_H=False, it solves for W only.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Constant matrix.\n\n W : array-like of shape (n_samples, n_components), default=None\n If init='custom', it is used as initial guess for the solution.\n\n H : array-like of shape (n_components, n_features), default=None\n If init='custom', it is used as initial guess for the solution.\n If update_H=False, it is used as a constant, to solve for W only.\n\n n_components : int, default=None\n Number of components, if n_components is not set all features\n are kept.\n\n init : {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None\n Method used to initialize the procedure.\n\n Valid options:\n\n - None: 'nndsvd' if n_components < n_features, otherwise 'random'.\n\n - 'random': non-negative random matrices, scaled with:\n sqrt(X.mean() / n_components)\n\n - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)\n initialization (better for sparseness)\n\n - 'nndsvda': NNDSVD with zeros filled with the average of X\n (better when sparsity is not desired)\n\n - 'nndsvdar': NNDSVD with zeros filled with small random values\n (generally faster, less accurate alternative to NNDSVDa\n for when sparsity is not desired)\n\n - 'custom': use custom matrices W and H if `update_H=True`. If\n `update_H=False`, then only custom matrix H is used.\n\n .. versionchanged:: 0.23\n The default value of `init` changed from 'random' to None in 0.23.\n\n update_H : bool, default=True\n Set to True, both W and H will be estimated from initial guesses.\n Set to False, only W will be estimated.\n\n solver : {'cd', 'mu'}, default='cd'\n Numerical solver to use:\n\n - 'cd' is a Coordinate Descent solver that uses Fast Hierarchical\n Alternating Least Squares (Fast HALS).\n\n - 'mu' is a Multiplicative Update solver.\n\n .. versionadded:: 0.17\n Coordinate Descent solver.\n\n .. versionadded:: 0.19\n Multiplicative Update solver.\n\n beta_loss : float or {'frobenius', 'kullback-leibler', \\\n 'itakura-saito'}, default='frobenius'\n Beta divergence to be minimized, measuring the distance between X\n and the dot product WH. Note that values different from 'frobenius'\n (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\n matrix X cannot contain zeros. Used only in 'mu' solver.\n\n .. versionadded:: 0.19\n\n tol : float, default=1e-4\n Tolerance of the stopping condition.\n\n max_iter : int, default=200\n Maximum number of iterations before timing out.\n\n alpha : float, default=0.\n Constant that multiplies the regularization terms.\n\n l1_ratio : float, default=0.\n The regularization mixing parameter, with 0 <= l1_ratio <= 1.\n For l1_ratio = 0 the penalty is an elementwise L2 penalty\n (aka Frobenius Norm).\n For l1_ratio = 1 it is an elementwise L1 penalty.\n For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\n regularization : {'both', 'components', 'transformation'}, default=None\n Select whether the regularization affects the components (H), the\n transformation (W), both or none of them.\n\n random_state : int, RandomState instance or None, default=None\n Used for NMF initialisation (when ``init`` == 'nndsvdar' or\n 'random'), and in Coordinate Descent. Pass an int for reproducible\n results across multiple function calls.\n See :term:`Glossary `.\n\n verbose : int, default=0\n The verbosity level.\n\n shuffle : bool, default=False\n If true, randomize the order of coordinates in the CD solver.\n\n Returns\n -------\n W : ndarray of shape (n_samples, n_components)\n Solution to the non-negative least squares problem.\n\n H : ndarray of shape (n_components, n_features)\n Solution to the non-negative least squares problem.\n\n n_iter : int\n Actual number of iterations.\n\n Examples\n --------\n >>> import numpy as np\n >>> X = np.array([[1,1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])\n >>> from sklearn.decomposition import non_negative_factorization\n >>> W, H, n_iter = non_negative_factorization(X, n_components=2,\n ... init='random', random_state=0)\n\n References\n ----------\n Cichocki, Andrzej, and P. H. A. N. Anh-Huy. \"Fast local algorithms for\n large scale nonnegative matrix and tensor factorizations.\"\n IEICE transactions on fundamentals of electronics, communications and\n computer sciences 92.3: 708-721, 2009.\n\n Fevotte, C., & Idier, J. (2011). Algorithms for nonnegative matrix\n factorization with the beta-divergence. Neural Computation, 23(9).\n \"\"\"\n X = check_array(X, accept_sparse=('csr', 'csc'),\n dtype=[np.float64, np.float32])\n check_non_negative(X, \"NMF (input X)\")\n beta_loss = _check_string_param(solver, regularization, beta_loss, init)\n\n if X.min() == 0 and beta_loss <= 0:\n raise ValueError(\"When beta_loss <= 0 and X contains zeros, \"\n \"the solver may diverge. Please add small values to \"\n \"X, or use a positive beta_loss.\")\n\n n_samples, n_features = X.shape\n if n_components is None:\n n_components = n_features\n\n if not isinstance(n_components, numbers.Integral) or n_components <= 0:\n raise ValueError(\"Number of components must be a positive integer;\"\n \" got (n_components=%r)\" % n_components)\n if not isinstance(max_iter, numbers.Integral) or max_iter < 0:\n raise ValueError(\"Maximum number of iterations must be a positive \"\n \"integer; got (max_iter=%r)\" % max_iter)\n if not isinstance(tol, numbers.Number) or tol < 0:\n raise ValueError(\"Tolerance for stopping criteria must be \"\n \"positive; got (tol=%r)\" % tol)\n\n # check W and H, or initialize them\n if init == 'custom' and update_H:\n _check_init(H, (n_components, n_features), \"NMF (input H)\")\n _check_init(W, (n_samples, n_components), \"NMF (input W)\")\n if H.dtype != X.dtype or W.dtype != X.dtype:\n raise TypeError(\"H and W should have the same dtype as X. Got \"\n \"H.dtype = {} and W.dtype = {}.\"\n .format(H.dtype, W.dtype))\n elif not update_H:\n _check_init(H, (n_components, n_features), \"NMF (input H)\")\n if H.dtype != X.dtype:\n raise TypeError(\"H should have the same dtype as X. Got H.dtype = \"\n \"{}.\".format(H.dtype))\n # 'mu' solver should not be initialized by zeros\n if solver == 'mu':\n avg = np.sqrt(X.mean() / n_components)\n W = np.full((n_samples, n_components), avg, dtype=X.dtype)\n else:\n W = np.zeros((n_samples, n_components), dtype=X.dtype)\n else:\n W, H = _initialize_nmf(X, n_components, init=init,\n random_state=random_state)\n\n l1_reg_W, l1_reg_H, l2_reg_W, l2_reg_H = _compute_regularization(\n alpha, l1_ratio, regularization)\n\n if solver == 'cd':\n W, H, n_iter = _fit_coordinate_descent(X, W, H, tol, max_iter,\n l1_reg_W, l1_reg_H,\n l2_reg_W, l2_reg_H,\n update_H=update_H,\n verbose=verbose,\n shuffle=shuffle,\n random_state=random_state)\n elif solver == 'mu':\n W, H, n_iter = _fit_multiplicative_update(X, W, H, beta_loss, max_iter,\n tol, l1_reg_W, l1_reg_H,\n l2_reg_W, l2_reg_H, update_H,\n verbose)\n\n else:\n raise ValueError(\"Invalid solver parameter '%s'.\" % solver)\n\n if n_iter == max_iter and tol > 0:\n warnings.warn(\"Maximum number of iterations %d reached. Increase it to\"\n \" improve convergence.\" % max_iter, ConvergenceWarning)\n\n return W, H, n_iter" + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/norm", + "name": "norm", + "qname": "sklearn.decomposition._nmf.norm", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._nmf/norm/x", + "name": "x", + "qname": "sklearn.decomposition._nmf.norm.x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "Vector for which to compute the norm." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Dot product-based Euclidean norm implementation.\n\nSee: http://fseoane.net/blog/2011/computing-the-vector-norm/", + "docstring": "Dot product-based Euclidean norm implementation.\n\nSee: http://fseoane.net/blog/2011/computing-the-vector-norm/\n\nParameters\n----------\nx : array-like\n Vector for which to compute the norm.", + "code": "def norm(x):\n \"\"\"Dot product-based Euclidean norm implementation.\n\n See: http://fseoane.net/blog/2011/computing-the-vector-norm/\n\n Parameters\n ----------\n x : array-like\n Vector for which to compute the norm.\n \"\"\"\n return sqrt(squared_norm(x))" + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/trace_dot", + "name": "trace_dot", + "qname": "sklearn.decomposition._nmf.trace_dot", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._nmf/trace_dot/X", + "name": "X", + "qname": "sklearn.decomposition._nmf.trace_dot.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "First matrix." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._nmf/trace_dot/Y", + "name": "Y", + "qname": "sklearn.decomposition._nmf.trace_dot.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "Second matrix." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Trace of np.dot(X, Y.T).", + "docstring": "Trace of np.dot(X, Y.T).\n\nParameters\n----------\nX : array-like\n First matrix.\nY : array-like\n Second matrix.", + "code": "def trace_dot(X, Y):\n \"\"\"Trace of np.dot(X, Y.T).\n\n Parameters\n ----------\n X : array-like\n First matrix.\n Y : array-like\n Second matrix.\n \"\"\"\n return np.dot(X.ravel(), Y.ravel())" + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/__init__", + "name": "__init__", + "qname": "sklearn.decomposition._pca.PCA.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/__init__/self", + "name": "self", + "qname": "sklearn.decomposition._pca.PCA.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/__init__/n_components", + "name": "n_components", + "qname": "sklearn.decomposition._pca.PCA.__init__.n_components", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, float or 'mle'", + "default_value": "None", + "description": "Number of components to keep.\nif n_components is not set all components are kept::\n\n n_components == min(n_samples, n_features)\n\nIf ``n_components == 'mle'`` and ``svd_solver == 'full'``, Minka's\nMLE is used to guess the dimension. Use of ``n_components == 'mle'``\nwill interpret ``svd_solver == 'auto'`` as ``svd_solver == 'full'``.\n\nIf ``0 < n_components < 1`` and ``svd_solver == 'full'``, select the\nnumber of components such that the amount of variance that needs to be\nexplained is greater than the percentage specified by n_components.\n\nIf ``svd_solver == 'arpack'``, the number of components must be\nstrictly less than the minimum of n_features and n_samples.\n\nHence, the None case results in::\n\n n_components == min(n_samples, n_features) - 1" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "'mle'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/__init__/copy", + "name": "copy", + "qname": "sklearn.decomposition._pca.PCA.__init__.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If False, data passed to fit are overwritten and running\nfit(X).transform(X) will not yield the expected results,\nuse fit_transform(X) instead." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/__init__/whiten", + "name": "whiten", + "qname": "sklearn.decomposition._pca.PCA.__init__.whiten", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When True (False by default) the `components_` vectors are multiplied\nby the square root of n_samples and then divided by the singular values\nto ensure uncorrelated outputs with unit component-wise variances.\n\nWhitening will remove some information from the transformed signal\n(the relative variance scales of the components) but can sometime\nimprove the predictive accuracy of the downstream estimators by\nmaking their data respect some hard-wired assumptions." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/__init__/svd_solver", + "name": "svd_solver", + "qname": "sklearn.decomposition._pca.PCA.__init__.svd_solver", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'full', 'arpack', 'randomized'}", + "default_value": "'auto'", + "description": "If auto :\n The solver is selected by a default policy based on `X.shape` and\n `n_components`: if the input data is larger than 500x500 and the\n number of components to extract is lower than 80% of the smallest\n dimension of the data, then the more efficient 'randomized'\n method is enabled. Otherwise the exact full SVD is computed and\n optionally truncated afterwards.\nIf full :\n run exact full SVD calling the standard LAPACK solver via\n `scipy.linalg.svd` and select the components by postprocessing\nIf arpack :\n run SVD truncated to n_components calling ARPACK solver via\n `scipy.sparse.linalg.svds`. It requires strictly\n 0 < n_components < min(X.shape)\nIf randomized :\n run randomized SVD by the method of Halko et al.\n\n.. versionadded:: 0.18.0" + }, + "type": { + "kind": "EnumType", + "values": ["full", "auto", "randomized", "arpack"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/__init__/tol", + "name": "tol", + "qname": "sklearn.decomposition._pca.PCA.__init__.tol", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "Tolerance for singular values computed by svd_solver == 'arpack'.\nMust be of range [0.0, infinity).\n\n.. versionadded:: 0.18.0" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "BoundaryType", + "base_type": "float", + "min": 0.0, + "max": "Infinity", + "min_inclusive": true, + "max_inclusive": false + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/__init__/iterated_power", + "name": "iterated_power", + "qname": "sklearn.decomposition._pca.PCA.__init__.iterated_power", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or 'auto'", + "default_value": "'auto'", + "description": "Number of iterations for the power method computed by\nsvd_solver == 'randomized'.\nMust be of range [0, infinity).\n\n.. versionadded:: 0.18.0" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "BoundaryType", + "base_type": "float", + "min": 0.0, + "max": "Infinity", + "min_inclusive": true, + "max_inclusive": false + }, + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "'auto'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/__init__/random_state", + "name": "random_state", + "qname": "sklearn.decomposition._pca.PCA.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Used when the 'arpack' or 'randomized' solvers are used. Pass an int\nfor reproducible results across multiple function calls.\nSee :term:`Glossary `.\n\n.. versionadded:: 0.18.0" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Principal component analysis (PCA).\n\nLinear dimensionality reduction using Singular Value Decomposition of the\ndata to project it to a lower dimensional space. The input data is centered\nbut not scaled for each feature before applying the SVD.\n\nIt uses the LAPACK implementation of the full SVD or a randomized truncated\nSVD by the method of Halko et al. 2009, depending on the shape of the input\ndata and the number of components to extract.\n\nIt can also use the scipy.sparse.linalg ARPACK implementation of the\ntruncated SVD.\n\nNotice that this class does not support sparse input. See\n:class:`TruncatedSVD` for an alternative with sparse data.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_components=None, *, copy=True, whiten=False,\n svd_solver='auto', tol=0.0, iterated_power='auto',\n random_state=None):\n self.n_components = n_components\n self.copy = copy\n self.whiten = whiten\n self.svd_solver = svd_solver\n self.tol = tol\n self.iterated_power = iterated_power\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/_fit", + "name": "_fit", + "qname": "sklearn.decomposition._pca.PCA._fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/_fit/self", + "name": "self", + "qname": "sklearn.decomposition._pca.PCA._fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/_fit/X", + "name": "X", + "qname": "sklearn.decomposition._pca.PCA._fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Dispatch to the right submethod depending on the chosen solver.", + "docstring": "Dispatch to the right submethod depending on the chosen solver.", + "code": " def _fit(self, X):\n \"\"\"Dispatch to the right submethod depending on the chosen solver.\"\"\"\n\n # Raise an error for sparse input.\n # This is more informative than the generic one raised by check_array.\n if issparse(X):\n raise TypeError('PCA does not support sparse input. See '\n 'TruncatedSVD for a possible alternative.')\n\n X = self._validate_data(X, dtype=[np.float64, np.float32],\n ensure_2d=True, copy=self.copy)\n\n # Handle n_components==None\n if self.n_components is None:\n if self.svd_solver != 'arpack':\n n_components = min(X.shape)\n else:\n n_components = min(X.shape) - 1\n else:\n n_components = self.n_components\n\n # Handle svd_solver\n self._fit_svd_solver = self.svd_solver\n if self._fit_svd_solver == 'auto':\n # Small problem or n_components == 'mle', just call full PCA\n if max(X.shape) <= 500 or n_components == 'mle':\n self._fit_svd_solver = 'full'\n elif n_components >= 1 and n_components < .8 * min(X.shape):\n self._fit_svd_solver = 'randomized'\n # This is also the case of n_components in (0,1)\n else:\n self._fit_svd_solver = 'full'\n\n # Call different fits for either full or truncated SVD\n if self._fit_svd_solver == 'full':\n return self._fit_full(X, n_components)\n elif self._fit_svd_solver in ['arpack', 'randomized']:\n return self._fit_truncated(X, n_components, self._fit_svd_solver)\n else:\n raise ValueError(\"Unrecognized svd_solver='{0}'\"\n \"\".format(self._fit_svd_solver))" + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/_fit_full", + "name": "_fit_full", + "qname": "sklearn.decomposition._pca.PCA._fit_full", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/_fit_full/self", + "name": "self", + "qname": "sklearn.decomposition._pca.PCA._fit_full.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/_fit_full/X", + "name": "X", + "qname": "sklearn.decomposition._pca.PCA._fit_full.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/_fit_full/n_components", + "name": "n_components", + "qname": "sklearn.decomposition._pca.PCA._fit_full.n_components", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model by computing full SVD on X.", + "docstring": "Fit the model by computing full SVD on X.", + "code": " def _fit_full(self, X, n_components):\n \"\"\"Fit the model by computing full SVD on X.\"\"\"\n n_samples, n_features = X.shape\n\n if n_components == 'mle':\n if n_samples < n_features:\n raise ValueError(\"n_components='mle' is only supported \"\n \"if n_samples >= n_features\")\n elif not 0 <= n_components <= min(n_samples, n_features):\n raise ValueError(\"n_components=%r must be between 0 and \"\n \"min(n_samples, n_features)=%r with \"\n \"svd_solver='full'\"\n % (n_components, min(n_samples, n_features)))\n elif n_components >= 1:\n if not isinstance(n_components, numbers.Integral):\n raise ValueError(\"n_components=%r must be of type int \"\n \"when greater than or equal to 1, \"\n \"was of type=%r\"\n % (n_components, type(n_components)))\n\n # Center data\n self.mean_ = np.mean(X, axis=0)\n X -= self.mean_\n\n U, S, Vt = linalg.svd(X, full_matrices=False)\n # flip eigenvectors' sign to enforce deterministic output\n U, Vt = svd_flip(U, Vt)\n\n components_ = Vt\n\n # Get variance explained by singular values\n explained_variance_ = (S ** 2) / (n_samples - 1)\n total_var = explained_variance_.sum()\n explained_variance_ratio_ = explained_variance_ / total_var\n singular_values_ = S.copy() # Store the singular values.\n\n # Postprocess the number of components required\n if n_components == 'mle':\n n_components = \\\n _infer_dimension(explained_variance_, n_samples)\n elif 0 < n_components < 1.0:\n # number of components for which the cumulated explained\n # variance percentage is superior to the desired threshold\n # side='right' ensures that number of features selected\n # their variance is always greater than n_components float\n # passed. More discussion in issue: #15669\n ratio_cumsum = stable_cumsum(explained_variance_ratio_)\n n_components = np.searchsorted(ratio_cumsum, n_components,\n side='right') + 1\n # Compute noise covariance using Probabilistic PCA model\n # The sigma2 maximum likelihood (cf. eq. 12.46)\n if n_components < min(n_features, n_samples):\n self.noise_variance_ = explained_variance_[n_components:].mean()\n else:\n self.noise_variance_ = 0.\n\n self.n_samples_, self.n_features_ = n_samples, n_features\n self.components_ = components_[:n_components]\n self.n_components_ = n_components\n self.explained_variance_ = explained_variance_[:n_components]\n self.explained_variance_ratio_ = \\\n explained_variance_ratio_[:n_components]\n self.singular_values_ = singular_values_[:n_components]\n\n return U, S, Vt" + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/_fit_truncated", + "name": "_fit_truncated", + "qname": "sklearn.decomposition._pca.PCA._fit_truncated", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/_fit_truncated/self", + "name": "self", + "qname": "sklearn.decomposition._pca.PCA._fit_truncated.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/_fit_truncated/X", + "name": "X", + "qname": "sklearn.decomposition._pca.PCA._fit_truncated.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/_fit_truncated/n_components", + "name": "n_components", + "qname": "sklearn.decomposition._pca.PCA._fit_truncated.n_components", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/_fit_truncated/svd_solver", + "name": "svd_solver", + "qname": "sklearn.decomposition._pca.PCA._fit_truncated.svd_solver", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model by computing truncated SVD (by ARPACK or randomized)\non X.", + "docstring": "Fit the model by computing truncated SVD (by ARPACK or randomized)\non X.", + "code": " def _fit_truncated(self, X, n_components, svd_solver):\n \"\"\"Fit the model by computing truncated SVD (by ARPACK or randomized)\n on X.\n \"\"\"\n n_samples, n_features = X.shape\n\n if isinstance(n_components, str):\n raise ValueError(\"n_components=%r cannot be a string \"\n \"with svd_solver='%s'\"\n % (n_components, svd_solver))\n elif not 1 <= n_components <= min(n_samples, n_features):\n raise ValueError(\"n_components=%r must be between 1 and \"\n \"min(n_samples, n_features)=%r with \"\n \"svd_solver='%s'\"\n % (n_components, min(n_samples, n_features),\n svd_solver))\n elif not isinstance(n_components, numbers.Integral):\n raise ValueError(\"n_components=%r must be of type int \"\n \"when greater than or equal to 1, was of type=%r\"\n % (n_components, type(n_components)))\n elif svd_solver == 'arpack' and n_components == min(n_samples,\n n_features):\n raise ValueError(\"n_components=%r must be strictly less than \"\n \"min(n_samples, n_features)=%r with \"\n \"svd_solver='%s'\"\n % (n_components, min(n_samples, n_features),\n svd_solver))\n\n random_state = check_random_state(self.random_state)\n\n # Center data\n self.mean_ = np.mean(X, axis=0)\n X -= self.mean_\n\n if svd_solver == 'arpack':\n v0 = _init_arpack_v0(min(X.shape), random_state)\n U, S, Vt = svds(X, k=n_components, tol=self.tol, v0=v0)\n # svds doesn't abide by scipy.linalg.svd/randomized_svd\n # conventions, so reverse its outputs.\n S = S[::-1]\n # flip eigenvectors' sign to enforce deterministic output\n U, Vt = svd_flip(U[:, ::-1], Vt[::-1])\n\n elif svd_solver == 'randomized':\n # sign flipping is done inside\n U, S, Vt = randomized_svd(X, n_components=n_components,\n n_iter=self.iterated_power,\n flip_sign=True,\n random_state=random_state)\n\n self.n_samples_, self.n_features_ = n_samples, n_features\n self.components_ = Vt\n self.n_components_ = n_components\n\n # Get variance explained by singular values\n self.explained_variance_ = (S ** 2) / (n_samples - 1)\n total_var = np.var(X, ddof=1, axis=0)\n self.explained_variance_ratio_ = \\\n self.explained_variance_ / total_var.sum()\n self.singular_values_ = S.copy() # Store the singular values.\n\n if self.n_components_ < min(n_features, n_samples):\n self.noise_variance_ = (total_var.sum() -\n self.explained_variance_.sum())\n self.noise_variance_ /= min(n_features, n_samples) - n_components\n else:\n self.noise_variance_ = 0.\n\n return U, S, Vt" + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/_more_tags", + "name": "_more_tags", + "qname": "sklearn.decomposition._pca.PCA._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/_more_tags/self", + "name": "self", + "qname": "sklearn.decomposition._pca.PCA._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'preserves_dtype': [np.float64, np.float32]}" + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/fit", + "name": "fit", + "qname": "sklearn.decomposition._pca.PCA.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/fit/self", + "name": "self", + "qname": "sklearn.decomposition._pca.PCA.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/fit/X", + "name": "X", + "qname": "sklearn.decomposition._pca.PCA.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples is the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/fit/y", + "name": "y", + "qname": "sklearn.decomposition._pca.PCA.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model with X.", + "docstring": "Fit the model with X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the instance itself.", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the model with X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : Ignored\n\n Returns\n -------\n self : object\n Returns the instance itself.\n \"\"\"\n self._fit(X)\n return self" + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/fit_transform", + "name": "fit_transform", + "qname": "sklearn.decomposition._pca.PCA.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/fit_transform/self", + "name": "self", + "qname": "sklearn.decomposition._pca.PCA.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/fit_transform/X", + "name": "X", + "qname": "sklearn.decomposition._pca.PCA.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples is the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/fit_transform/y", + "name": "y", + "qname": "sklearn.decomposition._pca.PCA.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model with X and apply the dimensionality reduction on X.", + "docstring": "Fit the model with X and apply the dimensionality reduction on X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Transformed values.\n\nNotes\n-----\nThis method returns a Fortran-ordered array. To convert it to a\nC-ordered array, use 'np.ascontiguousarray'.", + "code": " def fit_transform(self, X, y=None):\n \"\"\"Fit the model with X and apply the dimensionality reduction on X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : Ignored\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n Transformed values.\n\n Notes\n -----\n This method returns a Fortran-ordered array. To convert it to a\n C-ordered array, use 'np.ascontiguousarray'.\n \"\"\"\n U, S, Vt = self._fit(X)\n U = U[:, :self.n_components_]\n\n if self.whiten:\n # X_new = X * V / S * sqrt(n_samples) = U * sqrt(n_samples)\n U *= sqrt(X.shape[0] - 1)\n else:\n # X_new = X * V = U * S * Vt * V = U * S\n U *= S[:self.n_components_]\n\n return U" + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/score", + "name": "score", + "qname": "sklearn.decomposition._pca.PCA.score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/score/self", + "name": "self", + "qname": "sklearn.decomposition._pca.PCA.score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/score/X", + "name": "X", + "qname": "sklearn.decomposition._pca.PCA.score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/score/y", + "name": "y", + "qname": "sklearn.decomposition._pca.PCA.score.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the average log-likelihood of all samples.\n\nSee. \"Pattern Recognition and Machine Learning\"\nby C. Bishop, 12.2.1 p. 574\nor http://www.miketipping.com/papers/met-mppca.pdf", + "docstring": "Return the average log-likelihood of all samples.\n\nSee. \"Pattern Recognition and Machine Learning\"\nby C. Bishop, 12.2.1 p. 574\nor http://www.miketipping.com/papers/met-mppca.pdf\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data.\n\ny : Ignored\n\nReturns\n-------\nll : float\n Average log-likelihood of the samples under the current model.", + "code": " def score(self, X, y=None):\n \"\"\"Return the average log-likelihood of all samples.\n\n See. \"Pattern Recognition and Machine Learning\"\n by C. Bishop, 12.2.1 p. 574\n or http://www.miketipping.com/papers/met-mppca.pdf\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data.\n\n y : Ignored\n\n Returns\n -------\n ll : float\n Average log-likelihood of the samples under the current model.\n \"\"\"\n return np.mean(self.score_samples(X))" + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/score_samples", + "name": "score_samples", + "qname": "sklearn.decomposition._pca.PCA.score_samples", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/score_samples/self", + "name": "self", + "qname": "sklearn.decomposition._pca.PCA.score_samples.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/PCA/score_samples/X", + "name": "X", + "qname": "sklearn.decomposition._pca.PCA.score_samples.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the log-likelihood of each sample.\n\nSee. \"Pattern Recognition and Machine Learning\"\nby C. Bishop, 12.2.1 p. 574\nor http://www.miketipping.com/papers/met-mppca.pdf", + "docstring": "Return the log-likelihood of each sample.\n\nSee. \"Pattern Recognition and Machine Learning\"\nby C. Bishop, 12.2.1 p. 574\nor http://www.miketipping.com/papers/met-mppca.pdf\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data.\n\nReturns\n-------\nll : ndarray of shape (n_samples,)\n Log-likelihood of each sample under the current model.", + "code": " def score_samples(self, X):\n \"\"\"Return the log-likelihood of each sample.\n\n See. \"Pattern Recognition and Machine Learning\"\n by C. Bishop, 12.2.1 p. 574\n or http://www.miketipping.com/papers/met-mppca.pdf\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data.\n\n Returns\n -------\n ll : ndarray of shape (n_samples,)\n Log-likelihood of each sample under the current model.\n \"\"\"\n check_is_fitted(self)\n\n X = self._validate_data(X, dtype=[np.float64, np.float32], reset=False)\n Xr = X - self.mean_\n n_features = X.shape[1]\n precision = self.get_precision()\n log_like = -.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1)\n log_like -= .5 * (n_features * log(2. * np.pi) -\n fast_logdet(precision))\n return log_like" + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/_assess_dimension", + "name": "_assess_dimension", + "qname": "sklearn.decomposition._pca._assess_dimension", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._pca/_assess_dimension/spectrum", + "name": "spectrum", + "qname": "sklearn.decomposition._pca._assess_dimension.spectrum", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features,)", + "default_value": "", + "description": "Data spectrum." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features,)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/_assess_dimension/rank", + "name": "rank", + "qname": "sklearn.decomposition._pca._assess_dimension.rank", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Tested rank value. It should be strictly lower than n_features,\notherwise the method isn't specified (division by zero in equation\n(31) from the paper)." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/_assess_dimension/n_samples", + "name": "n_samples", + "qname": "sklearn.decomposition._pca._assess_dimension.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of samples." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the log-likelihood of a rank ``rank`` dataset.\n\nThe dataset is assumed to be embedded in gaussian noise of shape(n,\ndimf) having spectrum ``spectrum``. This implements the method of\nT. P. Minka.", + "docstring": "Compute the log-likelihood of a rank ``rank`` dataset.\n\nThe dataset is assumed to be embedded in gaussian noise of shape(n,\ndimf) having spectrum ``spectrum``. This implements the method of\nT. P. Minka.\n\nParameters\n----------\nspectrum : ndarray of shape (n_features,)\n Data spectrum.\nrank : int\n Tested rank value. It should be strictly lower than n_features,\n otherwise the method isn't specified (division by zero in equation\n (31) from the paper).\nn_samples : int\n Number of samples.\n\nReturns\n-------\nll : float\n The log-likelihood.\n\nReferences\n----------\nThis implements the method of `Thomas P. Minka:\nAutomatic Choice of Dimensionality for PCA. NIPS 2000: 598-604\n`_", + "code": "def _assess_dimension(spectrum, rank, n_samples):\n \"\"\"Compute the log-likelihood of a rank ``rank`` dataset.\n\n The dataset is assumed to be embedded in gaussian noise of shape(n,\n dimf) having spectrum ``spectrum``. This implements the method of\n T. P. Minka.\n\n Parameters\n ----------\n spectrum : ndarray of shape (n_features,)\n Data spectrum.\n rank : int\n Tested rank value. It should be strictly lower than n_features,\n otherwise the method isn't specified (division by zero in equation\n (31) from the paper).\n n_samples : int\n Number of samples.\n\n Returns\n -------\n ll : float\n The log-likelihood.\n\n References\n ----------\n This implements the method of `Thomas P. Minka:\n Automatic Choice of Dimensionality for PCA. NIPS 2000: 598-604\n `_\n \"\"\"\n\n n_features = spectrum.shape[0]\n if not 1 <= rank < n_features:\n raise ValueError(\"the tested rank should be in [1, n_features - 1]\")\n\n eps = 1e-15\n\n if spectrum[rank - 1] < eps:\n # When the tested rank is associated with a small eigenvalue, there's\n # no point in computing the log-likelihood: it's going to be very\n # small and won't be the max anyway. Also, it can lead to numerical\n # issues below when computing pa, in particular in log((spectrum[i] -\n # spectrum[j]) because this will take the log of something very small.\n return -np.inf\n\n pu = -rank * log(2.)\n for i in range(1, rank + 1):\n pu += (gammaln((n_features - i + 1) / 2.) -\n log(np.pi) * (n_features - i + 1) / 2.)\n\n pl = np.sum(np.log(spectrum[:rank]))\n pl = -pl * n_samples / 2.\n\n v = max(eps, np.sum(spectrum[rank:]) / (n_features - rank))\n pv = -np.log(v) * n_samples * (n_features - rank) / 2.\n\n m = n_features * rank - rank * (rank + 1.) / 2.\n pp = log(2. * np.pi) * (m + rank) / 2.\n\n pa = 0.\n spectrum_ = spectrum.copy()\n spectrum_[rank:n_features] = v\n for i in range(rank):\n for j in range(i + 1, len(spectrum)):\n pa += log((spectrum[i] - spectrum[j]) *\n (1. / spectrum_[j] - 1. / spectrum_[i])) + log(n_samples)\n\n ll = pu + pl + pv + pp - pa / 2. - rank * log(n_samples) / 2.\n\n return ll" + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/_infer_dimension", + "name": "_infer_dimension", + "qname": "sklearn.decomposition._pca._infer_dimension", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._pca/_infer_dimension/spectrum", + "name": "spectrum", + "qname": "sklearn.decomposition._pca._infer_dimension.spectrum", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._pca/_infer_dimension/n_samples", + "name": "n_samples", + "qname": "sklearn.decomposition._pca._infer_dimension.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Infers the dimension of a dataset with a given spectrum.\n\nThe returned value will be in [1, n_features - 1].", + "docstring": "Infers the dimension of a dataset with a given spectrum.\n\nThe returned value will be in [1, n_features - 1].", + "code": "def _infer_dimension(spectrum, n_samples):\n \"\"\"Infers the dimension of a dataset with a given spectrum.\n\n The returned value will be in [1, n_features - 1].\n \"\"\"\n ll = np.empty_like(spectrum)\n ll[0] = -np.inf # we don't want to return n_components = 0\n for rank in range(1, spectrum.shape[0]):\n ll[rank] = _assess_dimension(spectrum, rank, n_samples)\n return ll.argmax()" + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/__init__", + "name": "__init__", + "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/__init__/self", + "name": "self", + "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/__init__/n_components", + "name": "n_components", + "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.__init__.n_components", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "number of sparse atoms to extract" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/__init__/alpha", + "name": "alpha", + "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.__init__.alpha", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "Sparsity controlling parameter. Higher values lead to sparser\ncomponents." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/__init__/ridge_alpha", + "name": "ridge_alpha", + "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.__init__.ridge_alpha", + "default_value": "0.01", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.01", + "description": "Amount of ridge shrinkage to apply in order to improve\nconditioning when calling the transform method." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/__init__/n_iter", + "name": "n_iter", + "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.__init__.n_iter", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "number of iterations to perform for each mini batch" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/__init__/callback", + "name": "callback", + "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.__init__.callback", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "None", + "description": "callable that gets invoked every five iterations" + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/__init__/batch_size", + "name": "batch_size", + "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.__init__.batch_size", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "3", + "description": "the number of features to take in each mini batch" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/__init__/verbose", + "name": "verbose", + "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or bool", + "default_value": "False", + "description": "Controls the verbosity; the higher, the more messages. Defaults to 0." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "bool" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/__init__/shuffle", + "name": "shuffle", + "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.__init__.shuffle", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "whether to shuffle the data before splitting it in batches" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of parallel jobs to run.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/__init__/method", + "name": "method", + "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.__init__.method", + "default_value": "'lars'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'lars', 'cd'}", + "default_value": "'lars'", + "description": "lars: uses the least angle regression method to solve the lasso problem\n(linear_model.lars_path)\ncd: uses the coordinate descent method to compute the\nLasso solution (linear_model.Lasso). Lars will be faster if\nthe estimated components are sparse." + }, + "type": { + "kind": "EnumType", + "values": ["lars", "cd"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/__init__/random_state", + "name": "random_state", + "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Used for random shuffling when ``shuffle`` is set to ``True``,\nduring online dictionary learning. Pass an int for reproducible results\nacross multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Mini-batch Sparse Principal Components Analysis\n\nFinds the set of sparse components that can optimally reconstruct\nthe data. The amount of sparseness is controllable by the coefficient\nof the L1 penalty, given by the parameter alpha.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_components=None, *, alpha=1, ridge_alpha=0.01,\n n_iter=100, callback=None, batch_size=3, verbose=False,\n shuffle=True, n_jobs=None, method='lars', random_state=None):\n super().__init__(\n n_components=n_components, alpha=alpha, verbose=verbose,\n ridge_alpha=ridge_alpha, n_jobs=n_jobs, method=method,\n random_state=random_state)\n self.n_iter = n_iter\n self.callback = callback\n self.batch_size = batch_size\n self.shuffle = shuffle" + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/fit", + "name": "fit", + "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/fit/self", + "name": "self", + "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/fit/X", + "name": "X", + "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples in the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/MiniBatchSparsePCA/fit/y", + "name": "y", + "qname": "sklearn.decomposition._sparse_pca.MiniBatchSparsePCA.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model from data in X.", + "docstring": "Fit the model from data in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the instance itself.", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the model from data in X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\n y : Ignored\n\n Returns\n -------\n self : object\n Returns the instance itself.\n \"\"\"\n random_state = check_random_state(self.random_state)\n X = self._validate_data(X)\n\n self.mean_ = X.mean(axis=0)\n X = X - self.mean_\n\n if self.n_components is None:\n n_components = X.shape[1]\n else:\n n_components = self.n_components\n Vt, _, self.n_iter_ = dict_learning_online(\n X.T, n_components, alpha=self.alpha,\n n_iter=self.n_iter, return_code=True,\n dict_init=None, verbose=self.verbose,\n callback=self.callback,\n batch_size=self.batch_size,\n shuffle=self.shuffle,\n n_jobs=self.n_jobs, method=self.method,\n random_state=random_state,\n return_n_iter=True)\n self.components_ = Vt.T\n\n components_norm = np.linalg.norm(\n self.components_, axis=1)[:, np.newaxis]\n components_norm[components_norm == 0] = 1\n self.components_ /= components_norm\n self.n_components_ = len(self.components_)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA/__init__", + "name": "__init__", + "qname": "sklearn.decomposition._sparse_pca.SparsePCA.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA/__init__/self", + "name": "self", + "qname": "sklearn.decomposition._sparse_pca.SparsePCA.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA/__init__/n_components", + "name": "n_components", + "qname": "sklearn.decomposition._sparse_pca.SparsePCA.__init__.n_components", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of sparse atoms to extract." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA/__init__/alpha", + "name": "alpha", + "qname": "sklearn.decomposition._sparse_pca.SparsePCA.__init__.alpha", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1", + "description": "Sparsity controlling parameter. Higher values lead to sparser\ncomponents." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA/__init__/ridge_alpha", + "name": "ridge_alpha", + "qname": "sklearn.decomposition._sparse_pca.SparsePCA.__init__.ridge_alpha", + "default_value": "0.01", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.01", + "description": "Amount of ridge shrinkage to apply in order to improve\nconditioning when calling the transform method." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.decomposition._sparse_pca.SparsePCA.__init__.max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "Maximum number of iterations to perform." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA/__init__/tol", + "name": "tol", + "qname": "sklearn.decomposition._sparse_pca.SparsePCA.__init__.tol", + "default_value": "1e-08", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-8", + "description": "Tolerance for the stopping condition." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA/__init__/method", + "name": "method", + "qname": "sklearn.decomposition._sparse_pca.SparsePCA.__init__.method", + "default_value": "'lars'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'lars', 'cd'}", + "default_value": "'lars'", + "description": "lars: uses the least angle regression method to solve the lasso problem\n(linear_model.lars_path)\ncd: uses the coordinate descent method to compute the\nLasso solution (linear_model.Lasso). Lars will be faster if\nthe estimated components are sparse." + }, + "type": { + "kind": "EnumType", + "values": ["lars", "cd"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.decomposition._sparse_pca.SparsePCA.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of parallel jobs to run.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA/__init__/U_init", + "name": "U_init", + "qname": "sklearn.decomposition._sparse_pca.SparsePCA.__init__.U_init", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_components)", + "default_value": "None", + "description": "Initial values for the loadings for warm restart scenarios. Only used\nif `U_init` and `V_init` are not None." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_components)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA/__init__/V_init", + "name": "V_init", + "qname": "sklearn.decomposition._sparse_pca.SparsePCA.__init__.V_init", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_components, n_features)", + "default_value": "None", + "description": "Initial values for the components for warm restart scenarios. Only used\nif `U_init` and `V_init` are not None." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_components, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA/__init__/verbose", + "name": "verbose", + "qname": "sklearn.decomposition._sparse_pca.SparsePCA.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or bool", + "default_value": "False", + "description": "Controls the verbosity; the higher, the more messages. Defaults to 0." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "bool" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA/__init__/random_state", + "name": "random_state", + "qname": "sklearn.decomposition._sparse_pca.SparsePCA.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Used during dictionary learning. Pass an int for reproducible results\nacross multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Sparse Principal Components Analysis (SparsePCA).\n\nFinds the set of sparse components that can optimally reconstruct\nthe data. The amount of sparseness is controllable by the coefficient\nof the L1 penalty, given by the parameter alpha.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_components=None, *, alpha=1, ridge_alpha=0.01,\n max_iter=1000, tol=1e-8, method='lars', n_jobs=None,\n U_init=None, V_init=None, verbose=False, random_state=None):\n self.n_components = n_components\n self.alpha = alpha\n self.ridge_alpha = ridge_alpha\n self.max_iter = max_iter\n self.tol = tol\n self.method = method\n self.n_jobs = n_jobs\n self.U_init = U_init\n self.V_init = V_init\n self.verbose = verbose\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA/fit", + "name": "fit", + "qname": "sklearn.decomposition._sparse_pca.SparsePCA.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA/fit/self", + "name": "self", + "qname": "sklearn.decomposition._sparse_pca.SparsePCA.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA/fit/X", + "name": "X", + "qname": "sklearn.decomposition._sparse_pca.SparsePCA.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples in the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA/fit/y", + "name": "y", + "qname": "sklearn.decomposition._sparse_pca.SparsePCA.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model from data in X.", + "docstring": "Fit the model from data in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the instance itself.", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the model from data in X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\n y : Ignored\n\n Returns\n -------\n self : object\n Returns the instance itself.\n \"\"\"\n random_state = check_random_state(self.random_state)\n X = self._validate_data(X)\n\n self.mean_ = X.mean(axis=0)\n X = X - self.mean_\n\n if self.n_components is None:\n n_components = X.shape[1]\n else:\n n_components = self.n_components\n code_init = self.V_init.T if self.V_init is not None else None\n dict_init = self.U_init.T if self.U_init is not None else None\n Vt, _, E, self.n_iter_ = dict_learning(X.T, n_components,\n alpha=self.alpha,\n tol=self.tol,\n max_iter=self.max_iter,\n method=self.method,\n n_jobs=self.n_jobs,\n verbose=self.verbose,\n random_state=random_state,\n code_init=code_init,\n dict_init=dict_init,\n return_n_iter=True)\n self.components_ = Vt.T\n components_norm = np.linalg.norm(\n self.components_, axis=1)[:, np.newaxis]\n components_norm[components_norm == 0] = 1\n self.components_ /= components_norm\n self.n_components_ = len(self.components_)\n\n self.error_ = E\n return self" + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA/transform", + "name": "transform", + "qname": "sklearn.decomposition._sparse_pca.SparsePCA.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA/transform/self", + "name": "self", + "qname": "sklearn.decomposition._sparse_pca.SparsePCA.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._sparse_pca/SparsePCA/transform/X", + "name": "X", + "qname": "sklearn.decomposition._sparse_pca.SparsePCA.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Test data to be transformed, must have the same number of\nfeatures as the data used to train the model." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Least Squares projection of the data onto the sparse components.\n\nTo avoid instability issues in case the system is under-determined,\nregularization can be applied (Ridge regression) via the\n`ridge_alpha` parameter.\n\nNote that Sparse PCA components orthogonality is not enforced as in PCA\nhence one cannot use a simple linear projection.", + "docstring": "Least Squares projection of the data onto the sparse components.\n\nTo avoid instability issues in case the system is under-determined,\nregularization can be applied (Ridge regression) via the\n`ridge_alpha` parameter.\n\nNote that Sparse PCA components orthogonality is not enforced as in PCA\nhence one cannot use a simple linear projection.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Test data to be transformed, must have the same number of\n features as the data used to train the model.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Transformed data.", + "code": " def transform(self, X):\n \"\"\"Least Squares projection of the data onto the sparse components.\n\n To avoid instability issues in case the system is under-determined,\n regularization can be applied (Ridge regression) via the\n `ridge_alpha` parameter.\n\n Note that Sparse PCA components orthogonality is not enforced as in PCA\n hence one cannot use a simple linear projection.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Test data to be transformed, must have the same number of\n features as the data used to train the model.\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n Transformed data.\n \"\"\"\n check_is_fitted(self)\n\n X = self._validate_data(X, reset=False)\n X = X - self.mean_\n\n U = ridge_regression(self.components_.T, X.T, self.ridge_alpha,\n solver='cholesky')\n\n return U" + }, + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/__init__", + "name": "__init__", + "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/__init__/self", + "name": "self", + "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/__init__/n_components", + "name": "n_components", + "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD.__init__.n_components", + "default_value": "2", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Desired dimensionality of output data.\nMust be strictly less than the number of features.\nThe default value is useful for visualisation. For LSA, a value of\n100 is recommended." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/__init__/algorithm", + "name": "algorithm", + "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD.__init__.algorithm", + "default_value": "'randomized'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'arpack', 'randomized'}", + "default_value": "'randomized'", + "description": "SVD solver to use. Either \"arpack\" for the ARPACK wrapper in SciPy\n(scipy.sparse.linalg.svds), or \"randomized\" for the randomized\nalgorithm due to Halko (2009)." + }, + "type": { + "kind": "EnumType", + "values": ["randomized", "arpack"] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/__init__/n_iter", + "name": "n_iter", + "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD.__init__.n_iter", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "Number of iterations for randomized SVD solver. Not used by ARPACK. The\ndefault is larger than the default in\n:func:`~sklearn.utils.extmath.randomized_svd` to handle sparse\nmatrices that may have large slowly decaying spectrum." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/__init__/random_state", + "name": "random_state", + "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Used during randomized svd. Pass an int for reproducible results across\nmultiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/__init__/tol", + "name": "tol", + "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD.__init__.tol", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.", + "description": "Tolerance for ARPACK. 0 means machine precision. Ignored by randomized\nSVD solver." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Dimensionality reduction using truncated SVD (aka LSA).\n\nThis transformer performs linear dimensionality reduction by means of\ntruncated singular value decomposition (SVD). Contrary to PCA, this\nestimator does not center the data before computing the singular value\ndecomposition. This means it can work with sparse matrices\nefficiently.\n\nIn particular, truncated SVD works on term count/tf-idf matrices as\nreturned by the vectorizers in :mod:`sklearn.feature_extraction.text`. In\nthat context, it is known as latent semantic analysis (LSA).\n\nThis estimator supports two algorithms: a fast randomized SVD solver, and\na \"naive\" algorithm that uses ARPACK as an eigensolver on `X * X.T` or\n`X.T * X`, whichever is more efficient.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_components=2, *, algorithm=\"randomized\", n_iter=5,\n random_state=None, tol=0.):\n self.algorithm = algorithm\n self.n_components = n_components\n self.n_iter = n_iter\n self.random_state = random_state\n self.tol = tol" + }, + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/_more_tags", + "name": "_more_tags", + "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/_more_tags/self", + "name": "self", + "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'preserves_dtype': [np.float64, np.float32]}" + }, + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/fit", + "name": "fit", + "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/fit/self", + "name": "self", + "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/fit/X", + "name": "X", + "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/fit/y", + "name": "y", + "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit model on training data X.", + "docstring": "Fit model on training data X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the transformer object.", + "code": " def fit(self, X, y=None):\n \"\"\"Fit model on training data X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\n y : Ignored\n\n Returns\n -------\n self : object\n Returns the transformer object.\n \"\"\"\n self.fit_transform(X)\n return self" + }, + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/fit_transform", + "name": "fit_transform", + "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/fit_transform/self", + "name": "self", + "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/fit_transform/X", + "name": "X", + "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/fit_transform/y", + "name": "y", + "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit model to X and perform dimensionality reduction on X.", + "docstring": "Fit model to X and perform dimensionality reduction on X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : Ignored\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Reduced version of X. This will always be a dense array.", + "code": " def fit_transform(self, X, y=None):\n \"\"\"Fit model to X and perform dimensionality reduction on X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\n y : Ignored\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n Reduced version of X. This will always be a dense array.\n \"\"\"\n X = self._validate_data(X, accept_sparse=['csr', 'csc'],\n ensure_min_features=2)\n random_state = check_random_state(self.random_state)\n\n if self.algorithm == \"arpack\":\n v0 = _init_arpack_v0(min(X.shape), random_state)\n U, Sigma, VT = svds(X, k=self.n_components, tol=self.tol, v0=v0)\n # svds doesn't abide by scipy.linalg.svd/randomized_svd\n # conventions, so reverse its outputs.\n Sigma = Sigma[::-1]\n U, VT = svd_flip(U[:, ::-1], VT[::-1])\n\n elif self.algorithm == \"randomized\":\n k = self.n_components\n n_features = X.shape[1]\n if k >= n_features:\n raise ValueError(\"n_components must be < n_features;\"\n \" got %d >= %d\" % (k, n_features))\n U, Sigma, VT = randomized_svd(X, self.n_components,\n n_iter=self.n_iter,\n random_state=random_state)\n else:\n raise ValueError(\"unknown algorithm %r\" % self.algorithm)\n\n self.components_ = VT\n\n # As a result of the SVD approximation error on X ~ U @ Sigma @ V.T,\n # X @ V is not the same as U @ Sigma\n if self.algorithm == \"randomized\" or \\\n (self.algorithm == \"arpack\" and self.tol > 0):\n X_transformed = safe_sparse_dot(X, self.components_.T)\n else:\n X_transformed = U * Sigma\n\n # Calculate explained variance & explained variance ratio\n self.explained_variance_ = exp_var = np.var(X_transformed, axis=0)\n if sp.issparse(X):\n _, full_var = mean_variance_axis(X, axis=0)\n full_var = full_var.sum()\n else:\n full_var = np.var(X, axis=0).sum()\n self.explained_variance_ratio_ = exp_var / full_var\n self.singular_values_ = Sigma # Store the singular values.\n\n return X_transformed" + }, + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD.inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/inverse_transform/self", + "name": "self", + "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/inverse_transform/X", + "name": "X", + "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD.inverse_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_components)", + "default_value": "", + "description": "New data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_components)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform X back to its original space.\n\nReturns an array X_original whose transform would be X.", + "docstring": "Transform X back to its original space.\n\nReturns an array X_original whose transform would be X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_components)\n New data.\n\nReturns\n-------\nX_original : ndarray of shape (n_samples, n_features)\n Note that this is always a dense array.", + "code": " def inverse_transform(self, X):\n \"\"\"Transform X back to its original space.\n\n Returns an array X_original whose transform would be X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_components)\n New data.\n\n Returns\n -------\n X_original : ndarray of shape (n_samples, n_features)\n Note that this is always a dense array.\n \"\"\"\n X = check_array(X)\n return np.dot(X, self.components_)" + }, + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/transform", + "name": "transform", + "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/transform/self", + "name": "self", + "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition._truncated_svd/TruncatedSVD/transform/X", + "name": "X", + "qname": "sklearn.decomposition._truncated_svd.TruncatedSVD.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "New data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform dimensionality reduction on X.", + "docstring": "Perform dimensionality reduction on X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Reduced version of X. This will always be a dense array.", + "code": " def transform(self, X):\n \"\"\"Perform dimensionality reduction on X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data.\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n Reduced version of X. This will always be a dense array.\n \"\"\"\n check_is_fitted(self)\n X = self._validate_data(X, accept_sparse=['csr', 'csc'], reset=False)\n return safe_sparse_dot(X, self.components_.T)" + }, + { + "id": "scikit-learn/sklearn.decomposition.setup/configuration", + "name": "configuration", + "qname": "sklearn.decomposition.setup.configuration", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.decomposition.setup/configuration/parent_package", + "name": "parent_package", + "qname": "sklearn.decomposition.setup.configuration.parent_package", + "default_value": "''", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.decomposition.setup/configuration/top_path", + "name": "top_path", + "qname": "sklearn.decomposition.setup.configuration.top_path", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def configuration(parent_package=\"\", top_path=None):\n config = Configuration(\"decomposition\", parent_package, top_path)\n\n libraries = []\n if os.name == 'posix':\n libraries.append('m')\n\n config.add_extension(\"_online_lda_fast\",\n sources=[\"_online_lda_fast.pyx\"],\n include_dirs=[numpy.get_include()],\n libraries=libraries)\n\n config.add_extension('_cdnmf_fast',\n sources=['_cdnmf_fast.pyx'],\n include_dirs=[numpy.get_include()],\n libraries=libraries)\n\n config.add_subpackage(\"tests\")\n\n return config" + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/__init__", + "name": "__init__", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/__init__/self", + "name": "self", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/__init__/solver", + "name": "solver", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.__init__.solver", + "default_value": "'svd'", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{'svd', 'lsqr', 'eigen'}", + "default_value": "'svd'", + "description": "Solver to use, possible values:\n - 'svd': Singular value decomposition (default).\n Does not compute the covariance matrix, therefore this solver is\n recommended for data with a large number of features.\n - 'lsqr': Least squares solution.\n Can be combined with shrinkage or custom covariance estimator.\n - 'eigen': Eigenvalue decomposition.\n Can be combined with shrinkage or custom covariance estimator." + }, + "type": { + "kind": "EnumType", + "values": ["eigen", "svd", "lsqr"] + } + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/__init__/shrinkage", + "name": "shrinkage", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.__init__.shrinkage", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "'auto' or float", + "default_value": "None", + "description": "Shrinkage parameter, possible values:\n - None: no shrinkage (default).\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\nThis should be left to None if `covariance_estimator` is used.\nNote that shrinkage works only with 'lsqr' and 'eigen' solvers." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/__init__/priors", + "name": "priors", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.__init__.priors", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_classes,)", + "default_value": "None", + "description": "The class prior probabilities. By default, the class proportions are\ninferred from the training data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_classes,)" + } + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/__init__/n_components", + "name": "n_components", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.__init__.n_components", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of components (<= min(n_classes - 1, n_features)) for\ndimensionality reduction. If None, will be set to\nmin(n_classes - 1, n_features). This parameter only affects the\n`transform` method." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/__init__/store_covariance", + "name": "store_covariance", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.__init__.store_covariance", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, explicitely compute the weighted within-class covariance\nmatrix when solver is 'svd'. The matrix is always computed\nand stored for the other solvers.\n\n.. versionadded:: 0.17" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/__init__/tol", + "name": "tol", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.__init__.tol", + "default_value": "0.0001", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "1.0e-4", + "description": "Absolute threshold for a singular value of X to be considered\nsignificant, used to estimate the rank of X. Dimensions whose\nsingular values are non-significant are discarded. Only used if\nsolver is 'svd'.\n\n.. versionadded:: 0.17" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/__init__/covariance_estimator", + "name": "covariance_estimator", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.__init__.covariance_estimator", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "covariance estimator", + "default_value": "None", + "description": "If not None, `covariance_estimator` is used to estimate\nthe covariance matrices instead of relying on the empirical\ncovariance estimator (with potential shrinkage).\nThe object should have a fit method and a ``covariance_`` attribute\nlike the estimators in :mod:`sklearn.covariance`.\nif None the shrinkage parameter drives the estimate.\n\nThis should be left to None if `shrinkage` is used.\nNote that `covariance_estimator` works only with 'lsqr' and 'eigen'\nsolvers.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "covariance estimator" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Linear Discriminant Analysis\n\nA classifier with a linear decision boundary, generated by fitting class\nconditional densities to the data and using Bayes' rule.\n\nThe model fits a Gaussian density to each class, assuming that all classes\nshare the same covariance matrix.\n\nThe fitted model can also be used to reduce the dimensionality of the input\nby projecting it to the most discriminative directions, using the\n`transform` method.\n\n.. versionadded:: 0.17\n *LinearDiscriminantAnalysis*.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " def __init__(self, solver='svd', shrinkage=None, priors=None,\n n_components=None, store_covariance=False, tol=1e-4,\n covariance_estimator=None):\n self.solver = solver\n self.shrinkage = shrinkage\n self.priors = priors\n self.n_components = n_components\n self.store_covariance = store_covariance # used only in svd solver\n self.tol = tol # used only in svd solver\n self.covariance_estimator = covariance_estimator" + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_eigen", + "name": "_solve_eigen", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_eigen", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_eigen/self", + "name": "self", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_eigen.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_eigen/X", + "name": "X", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_eigen.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_eigen/y", + "name": "y", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_eigen.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_targets)" + } + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_eigen/shrinkage", + "name": "shrinkage", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_eigen.shrinkage", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "'auto', float or None", + "default_value": "", + "description": "Shrinkage parameter, possible values:\n - None: no shrinkage.\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage constant.\n\nShrinkage parameter is ignored if `covariance_estimator` i\nnot None" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_eigen/covariance_estimator", + "name": "covariance_estimator", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_eigen.covariance_estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator", + "default_value": "None", + "description": "If not None, `covariance_estimator` is used to estimate\nthe covariance matrices instead of relying the empirical\ncovariance estimator (with potential shrinkage).\nThe object should have a fit method and a ``covariance_`` attribute\nlike the estimators in sklearn.covariance.\nif None the shrinkage parameter drives the estimate.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "estimator" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Eigenvalue solver.\n\nThe eigenvalue solver computes the optimal solution of the Rayleigh\ncoefficient (basically the ratio of between class scatter to within\nclass scatter). This solver supports both classification and\ndimensionality reduction (with any covariance estimator).", + "docstring": "Eigenvalue solver.\n\nThe eigenvalue solver computes the optimal solution of the Rayleigh\ncoefficient (basically the ratio of between class scatter to within\nclass scatter). This solver supports both classification and\ndimensionality reduction (with any covariance estimator).\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\nshrinkage : 'auto', float or None\n Shrinkage parameter, possible values:\n - None: no shrinkage.\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage constant.\n\n Shrinkage parameter is ignored if `covariance_estimator` i\n not None\n\ncovariance_estimator : estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in sklearn.covariance.\n if None the shrinkage parameter drives the estimate.\n\n .. versionadded:: 0.24\n\nNotes\n-----\nThis solver is based on [1]_, section 3.8.3, pp. 121-124.\n\nReferences\n----------\n.. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification\n (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN\n 0-471-05669-3.", + "code": " def _solve_eigen(self, X, y, shrinkage,\n covariance_estimator):\n \"\"\"Eigenvalue solver.\n\n The eigenvalue solver computes the optimal solution of the Rayleigh\n coefficient (basically the ratio of between class scatter to within\n class scatter). This solver supports both classification and\n dimensionality reduction (with any covariance estimator).\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\n shrinkage : 'auto', float or None\n Shrinkage parameter, possible values:\n - None: no shrinkage.\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage constant.\n\n Shrinkage parameter is ignored if `covariance_estimator` i\n not None\n\n covariance_estimator : estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in sklearn.covariance.\n if None the shrinkage parameter drives the estimate.\n\n .. versionadded:: 0.24\n\n Notes\n -----\n This solver is based on [1]_, section 3.8.3, pp. 121-124.\n\n References\n ----------\n .. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification\n (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN\n 0-471-05669-3.\n \"\"\"\n self.means_ = _class_means(X, y)\n self.covariance_ = _class_cov(X, y, self.priors_, shrinkage,\n covariance_estimator)\n\n Sw = self.covariance_ # within scatter\n St = _cov(X, shrinkage, covariance_estimator) # total scatter\n Sb = St - Sw # between scatter\n\n evals, evecs = linalg.eigh(Sb, Sw)\n self.explained_variance_ratio_ = np.sort(evals / np.sum(evals)\n )[::-1][:self._max_components]\n evecs = evecs[:, np.argsort(evals)[::-1]] # sort eigenvectors\n\n self.scalings_ = evecs\n self.coef_ = np.dot(self.means_, evecs).dot(evecs.T)\n self.intercept_ = (-0.5 * np.diag(np.dot(self.means_, self.coef_.T)) +\n np.log(self.priors_))" + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_lsqr", + "name": "_solve_lsqr", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_lsqr", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_lsqr/self", + "name": "self", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_lsqr.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_lsqr/X", + "name": "X", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_lsqr.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_lsqr/y", + "name": "y", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_lsqr.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_classes)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_classes)" + } + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_lsqr/shrinkage", + "name": "shrinkage", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_lsqr.shrinkage", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "'auto', float or None", + "default_value": "", + "description": "Shrinkage parameter, possible values:\n - None: no shrinkage.\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\nShrinkage parameter is ignored if `covariance_estimator` i\nnot None" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_lsqr/covariance_estimator", + "name": "covariance_estimator", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_lsqr.covariance_estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator", + "default_value": "None", + "description": "If not None, `covariance_estimator` is used to estimate\nthe covariance matrices instead of relying the empirical\ncovariance estimator (with potential shrinkage).\nThe object should have a fit method and a ``covariance_`` attribute\nlike the estimators in sklearn.covariance.\nif None the shrinkage parameter drives the estimate.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "estimator" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Least squares solver.\n\nThe least squares solver computes a straightforward solution of the\noptimal decision rule based directly on the discriminant functions. It\ncan only be used for classification (with any covariance estimator),\nbecause\nestimation of eigenvectors is not performed. Therefore, dimensionality\nreduction with the transform is not supported.", + "docstring": "Least squares solver.\n\nThe least squares solver computes a straightforward solution of the\noptimal decision rule based directly on the discriminant functions. It\ncan only be used for classification (with any covariance estimator),\nbecause\nestimation of eigenvectors is not performed. Therefore, dimensionality\nreduction with the transform is not supported.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_classes)\n Target values.\n\nshrinkage : 'auto', float or None\n Shrinkage parameter, possible values:\n - None: no shrinkage.\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\n Shrinkage parameter is ignored if `covariance_estimator` i\n not None\n\ncovariance_estimator : estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in sklearn.covariance.\n if None the shrinkage parameter drives the estimate.\n\n .. versionadded:: 0.24\n\nNotes\n-----\nThis solver is based on [1]_, section 2.6.2, pp. 39-41.\n\nReferences\n----------\n.. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification\n (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN\n 0-471-05669-3.", + "code": " def _solve_lsqr(self, X, y, shrinkage, covariance_estimator):\n \"\"\"Least squares solver.\n\n The least squares solver computes a straightforward solution of the\n optimal decision rule based directly on the discriminant functions. It\n can only be used for classification (with any covariance estimator),\n because\n estimation of eigenvectors is not performed. Therefore, dimensionality\n reduction with the transform is not supported.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_classes)\n Target values.\n\n shrinkage : 'auto', float or None\n Shrinkage parameter, possible values:\n - None: no shrinkage.\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\n Shrinkage parameter is ignored if `covariance_estimator` i\n not None\n\n covariance_estimator : estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in sklearn.covariance.\n if None the shrinkage parameter drives the estimate.\n\n .. versionadded:: 0.24\n\n Notes\n -----\n This solver is based on [1]_, section 2.6.2, pp. 39-41.\n\n References\n ----------\n .. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification\n (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN\n 0-471-05669-3.\n \"\"\"\n self.means_ = _class_means(X, y)\n self.covariance_ = _class_cov(X, y, self.priors_, shrinkage,\n covariance_estimator)\n self.coef_ = linalg.lstsq(self.covariance_, self.means_.T)[0].T\n self.intercept_ = (-0.5 * np.diag(np.dot(self.means_, self.coef_.T)) +\n np.log(self.priors_))" + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_svd", + "name": "_solve_svd", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_svd", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_svd/self", + "name": "self", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_svd.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_svd/X", + "name": "X", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_svd.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/_solve_svd/y", + "name": "y", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis._solve_svd.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_targets)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "SVD solver.", + "docstring": "SVD solver.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.", + "code": " def _solve_svd(self, X, y):\n \"\"\"SVD solver.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n \"\"\"\n n_samples, n_features = X.shape\n n_classes = len(self.classes_)\n\n self.means_ = _class_means(X, y)\n if self.store_covariance:\n self.covariance_ = _class_cov(X, y, self.priors_)\n\n Xc = []\n for idx, group in enumerate(self.classes_):\n Xg = X[y == group, :]\n Xc.append(Xg - self.means_[idx])\n\n self.xbar_ = np.dot(self.priors_, self.means_)\n\n Xc = np.concatenate(Xc, axis=0)\n\n # 1) within (univariate) scaling by with classes std-dev\n std = Xc.std(axis=0)\n # avoid division by zero in normalization\n std[std == 0] = 1.\n fac = 1. / (n_samples - n_classes)\n\n # 2) Within variance scaling\n X = np.sqrt(fac) * (Xc / std)\n # SVD of centered (within)scaled data\n U, S, Vt = linalg.svd(X, full_matrices=False)\n\n rank = np.sum(S > self.tol)\n # Scaling of within covariance is: V' 1/S\n scalings = (Vt[:rank] / std).T / S[:rank]\n\n # 3) Between variance scaling\n # Scale weighted centers\n X = np.dot(((np.sqrt((n_samples * self.priors_) * fac)) *\n (self.means_ - self.xbar_).T).T, scalings)\n # Centers are living in a space with n_classes-1 dim (maximum)\n # Use SVD to find projection in the space spanned by the\n # (n_classes) centers\n _, S, Vt = linalg.svd(X, full_matrices=0)\n\n self.explained_variance_ratio_ = (S**2 / np.sum(\n S**2))[:self._max_components]\n rank = np.sum(S > self.tol * S[0])\n self.scalings_ = np.dot(scalings, Vt.T[:, :rank])\n coef = np.dot(self.means_ - self.xbar_, self.scalings_)\n self.intercept_ = (-0.5 * np.sum(coef ** 2, axis=1) +\n np.log(self.priors_))\n self.coef_ = np.dot(coef, self.scalings_.T)\n self.intercept_ -= np.dot(self.xbar_, self.coef_.T)" + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/decision_function", + "name": "decision_function", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/decision_function/self", + "name": "self", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/decision_function/X", + "name": "X", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Array of samples (test vectors)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Apply decision function to an array of samples.\n\nThe decision function is equal (up to a constant factor) to the\nlog-posterior of the model, i.e. `log p(y = k | x)`. In a binary\nclassification setting this instead corresponds to the difference\n`log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`.", + "docstring": "Apply decision function to an array of samples.\n\nThe decision function is equal (up to a constant factor) to the\nlog-posterior of the model, i.e. `log p(y = k | x)`. In a binary\nclassification setting this instead corresponds to the difference\n`log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Array of samples (test vectors).\n\nReturns\n-------\nC : ndarray of shape (n_samples,) or (n_samples, n_classes)\n Decision function values related to each class, per sample.\n In the two-class case, the shape is (n_samples,), giving the\n log likelihood ratio of the positive class.", + "code": " def decision_function(self, X):\n \"\"\"Apply decision function to an array of samples.\n\n The decision function is equal (up to a constant factor) to the\n log-posterior of the model, i.e. `log p(y = k | x)`. In a binary\n classification setting this instead corresponds to the difference\n `log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Array of samples (test vectors).\n\n Returns\n -------\n C : ndarray of shape (n_samples,) or (n_samples, n_classes)\n Decision function values related to each class, per sample.\n In the two-class case, the shape is (n_samples,), giving the\n log likelihood ratio of the positive class.\n \"\"\"\n # Only override for the doc\n return super().decision_function(X)" + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/fit", + "name": "fit", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/fit/self", + "name": "self", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/fit/X", + "name": "X", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/fit/y", + "name": "y", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fit LinearDiscriminantAnalysis model according to the given\n training data and parameters.\n\n .. versionchanged:: 0.19\n *store_covariance* has been moved to main constructor.\n\n .. versionchanged:: 0.19\n *tol* has been moved to main constructor.", + "docstring": "Fit LinearDiscriminantAnalysis model according to the given\n training data and parameters.\n\n .. versionchanged:: 0.19\n *store_covariance* has been moved to main constructor.\n\n .. versionchanged:: 0.19\n *tol* has been moved to main constructor.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,)\n Target values.", + "code": " def fit(self, X, y):\n \"\"\"Fit LinearDiscriminantAnalysis model according to the given\n training data and parameters.\n\n .. versionchanged:: 0.19\n *store_covariance* has been moved to main constructor.\n\n .. versionchanged:: 0.19\n *tol* has been moved to main constructor.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,)\n Target values.\n \"\"\"\n X, y = self._validate_data(X, y, ensure_min_samples=2, estimator=self,\n dtype=[np.float64, np.float32])\n self.classes_ = unique_labels(y)\n n_samples, _ = X.shape\n n_classes = len(self.classes_)\n\n if n_samples == n_classes:\n raise ValueError(\"The number of samples must be more \"\n \"than the number of classes.\")\n\n if self.priors is None: # estimate priors from sample\n _, y_t = np.unique(y, return_inverse=True) # non-negative ints\n self.priors_ = np.bincount(y_t) / float(len(y))\n else:\n self.priors_ = np.asarray(self.priors)\n\n if (self.priors_ < 0).any():\n raise ValueError(\"priors must be non-negative\")\n if not np.isclose(self.priors_.sum(), 1.0):\n warnings.warn(\"The priors do not sum to 1. Renormalizing\",\n UserWarning)\n self.priors_ = self.priors_ / self.priors_.sum()\n\n # Maximum number of components no matter what n_components is\n # specified:\n max_components = min(len(self.classes_) - 1, X.shape[1])\n\n if self.n_components is None:\n self._max_components = max_components\n else:\n if self.n_components > max_components:\n raise ValueError(\n \"n_components cannot be larger than min(n_features, \"\n \"n_classes - 1).\"\n )\n self._max_components = self.n_components\n\n if self.solver == 'svd':\n if self.shrinkage is not None:\n raise NotImplementedError('shrinkage not supported')\n if self.covariance_estimator is not None:\n raise ValueError(\n 'covariance estimator '\n 'is not supported '\n 'with svd solver. Try another solver')\n self._solve_svd(X, y)\n elif self.solver == 'lsqr':\n self._solve_lsqr(X, y, shrinkage=self.shrinkage,\n covariance_estimator=self.covariance_estimator)\n elif self.solver == 'eigen':\n self._solve_eigen(X, y,\n shrinkage=self.shrinkage,\n covariance_estimator=self.covariance_estimator)\n else:\n raise ValueError(\"unknown solver {} (valid solvers are 'svd', \"\n \"'lsqr', and 'eigen').\".format(self.solver))\n if self.classes_.size == 2: # treat binary case as a special case\n self.coef_ = np.array(self.coef_[1, :] - self.coef_[0, :], ndmin=2,\n dtype=X.dtype)\n self.intercept_ = np.array(self.intercept_[1] - self.intercept_[0],\n ndmin=1, dtype=X.dtype)\n return self" + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/predict_log_proba", + "name": "predict_log_proba", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.predict_log_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/predict_log_proba/self", + "name": "self", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.predict_log_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/predict_log_proba/X", + "name": "X", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.predict_log_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Estimate log probability.", + "docstring": "Estimate log probability.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n Estimated log probabilities.", + "code": " def predict_log_proba(self, X):\n \"\"\"Estimate log probability.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Input data.\n\n Returns\n -------\n C : ndarray of shape (n_samples, n_classes)\n Estimated log probabilities.\n \"\"\"\n prediction = self.predict_proba(X)\n prediction[prediction == 0.0] += np.finfo(prediction.dtype).tiny\n return np.log(prediction)" + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/predict_proba", + "name": "predict_proba", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/predict_proba/self", + "name": "self", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/predict_proba/X", + "name": "X", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Estimate probability.", + "docstring": "Estimate probability.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n Estimated probabilities.", + "code": " def predict_proba(self, X):\n \"\"\"Estimate probability.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Input data.\n\n Returns\n -------\n C : ndarray of shape (n_samples, n_classes)\n Estimated probabilities.\n \"\"\"\n check_is_fitted(self)\n\n decision = self.decision_function(X)\n if self.classes_.size == 2:\n proba = expit(decision)\n return np.vstack([1-proba, proba]).T\n else:\n return softmax(decision)" + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/transform", + "name": "transform", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/transform/self", + "name": "self", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/LinearDiscriminantAnalysis/transform/X", + "name": "X", + "qname": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Project data to maximize class separation.", + "docstring": "Project data to maximize class separation.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Transformed data.", + "code": " def transform(self, X):\n \"\"\"Project data to maximize class separation.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Input data.\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n Transformed data.\n \"\"\"\n if self.solver == 'lsqr':\n raise NotImplementedError(\"transform not implemented for 'lsqr' \"\n \"solver (use 'svd' or 'eigen').\")\n check_is_fitted(self)\n\n X = check_array(X)\n if self.solver == 'svd':\n X_new = np.dot(X - self.xbar_, self.scalings_)\n elif self.solver == 'eigen':\n X_new = np.dot(X, self.scalings_)\n\n return X_new[:, :self._max_components]" + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/__init__", + "name": "__init__", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/__init__/self", + "name": "self", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/__init__/priors", + "name": "priors", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.__init__.priors", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_classes,)", + "default_value": "None", + "description": "Class priors. By default, the class proportions are inferred from the\ntraining data." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_classes,)" + } + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/__init__/reg_param", + "name": "reg_param", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.__init__.reg_param", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "Regularizes the per-class covariance estimates by transforming S2 as\n``S2 = (1 - reg_param) * S2 + reg_param * np.eye(n_features)``,\nwhere S2 corresponds to the `scaling_` attribute of a given class." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/__init__/store_covariance", + "name": "store_covariance", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.__init__.store_covariance", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the class covariance matrices are explicitely computed and\nstored in the `self.covariance_` attribute.\n\n.. versionadded:: 0.17" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/__init__/tol", + "name": "tol", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "1.0e-4", + "description": "Absolute threshold for a singular value to be considered significant,\nused to estimate the rank of `Xk` where `Xk` is the centered matrix\nof samples in class k. This parameter does not affect the\npredictions. It only controls a warning that is raised when features\nare considered to be colinear.\n\n.. versionadded:: 0.17" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Quadratic Discriminant Analysis\n\nA classifier with a quadratic decision boundary, generated\nby fitting class conditional densities to the data\nand using Bayes' rule.\n\nThe model fits a Gaussian density to each class.\n\n.. versionadded:: 0.17\n *QuadraticDiscriminantAnalysis*\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, priors=None, reg_param=0., store_covariance=False,\n tol=1.0e-4):\n self.priors = np.asarray(priors) if priors is not None else None\n self.reg_param = reg_param\n self.store_covariance = store_covariance\n self.tol = tol" + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/_decision_function", + "name": "_decision_function", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis._decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/_decision_function/self", + "name": "self", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis._decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/_decision_function/X", + "name": "X", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis._decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _decision_function(self, X):\n # return log posterior, see eq (4.12) p. 110 of the ESL.\n check_is_fitted(self)\n\n X = check_array(X)\n norm2 = []\n for i in range(len(self.classes_)):\n R = self.rotations_[i]\n S = self.scalings_[i]\n Xm = X - self.means_[i]\n X2 = np.dot(Xm, R * (S ** (-0.5)))\n norm2.append(np.sum(X2 ** 2, axis=1))\n norm2 = np.array(norm2).T # shape = [len(X), n_classes]\n u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])\n return (-0.5 * (norm2 + u) + np.log(self.priors_))" + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/decision_function", + "name": "decision_function", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/decision_function/self", + "name": "self", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/decision_function/X", + "name": "X", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Array of samples (test vectors)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Apply decision function to an array of samples.\n\nThe decision function is equal (up to a constant factor) to the\nlog-posterior of the model, i.e. `log p(y = k | x)`. In a binary\nclassification setting this instead corresponds to the difference\n`log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`.", + "docstring": "Apply decision function to an array of samples.\n\nThe decision function is equal (up to a constant factor) to the\nlog-posterior of the model, i.e. `log p(y = k | x)`. In a binary\nclassification setting this instead corresponds to the difference\n`log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Array of samples (test vectors).\n\nReturns\n-------\nC : ndarray of shape (n_samples,) or (n_samples, n_classes)\n Decision function values related to each class, per sample.\n In the two-class case, the shape is (n_samples,), giving the\n log likelihood ratio of the positive class.", + "code": " def decision_function(self, X):\n \"\"\"Apply decision function to an array of samples.\n\n The decision function is equal (up to a constant factor) to the\n log-posterior of the model, i.e. `log p(y = k | x)`. In a binary\n classification setting this instead corresponds to the difference\n `log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Array of samples (test vectors).\n\n Returns\n -------\n C : ndarray of shape (n_samples,) or (n_samples, n_classes)\n Decision function values related to each class, per sample.\n In the two-class case, the shape is (n_samples,), giving the\n log likelihood ratio of the positive class.\n \"\"\"\n dec_func = self._decision_function(X)\n # handle special case of two classes\n if len(self.classes_) == 2:\n return dec_func[:, 1] - dec_func[:, 0]\n return dec_func" + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/fit", + "name": "fit", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/fit/self", + "name": "self", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/fit/X", + "name": "X", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/fit/y", + "name": "y", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values (integers)" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fit the model according to the given training data and parameters.\n\n .. versionchanged:: 0.19\n ``store_covariances`` has been moved to main constructor as\n ``store_covariance``\n\n .. versionchanged:: 0.19\n ``tol`` has been moved to main constructor.", + "docstring": "Fit the model according to the given training data and parameters.\n\n .. versionchanged:: 0.19\n ``store_covariances`` has been moved to main constructor as\n ``store_covariance``\n\n .. versionchanged:: 0.19\n ``tol`` has been moved to main constructor.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values (integers)", + "code": " def fit(self, X, y):\n \"\"\"Fit the model according to the given training data and parameters.\n\n .. versionchanged:: 0.19\n ``store_covariances`` has been moved to main constructor as\n ``store_covariance``\n\n .. versionchanged:: 0.19\n ``tol`` has been moved to main constructor.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target values (integers)\n \"\"\"\n X, y = self._validate_data(X, y)\n check_classification_targets(y)\n self.classes_, y = np.unique(y, return_inverse=True)\n n_samples, n_features = X.shape\n n_classes = len(self.classes_)\n if n_classes < 2:\n raise ValueError('The number of classes has to be greater than'\n ' one; got %d class' % (n_classes))\n if self.priors is None:\n self.priors_ = np.bincount(y) / float(n_samples)\n else:\n self.priors_ = self.priors\n\n cov = None\n store_covariance = self.store_covariance\n if store_covariance:\n cov = []\n means = []\n scalings = []\n rotations = []\n for ind in range(n_classes):\n Xg = X[y == ind, :]\n meang = Xg.mean(0)\n means.append(meang)\n if len(Xg) == 1:\n raise ValueError('y has only 1 sample in class %s, covariance '\n 'is ill defined.' % str(self.classes_[ind]))\n Xgc = Xg - meang\n # Xgc = U * S * V.T\n _, S, Vt = np.linalg.svd(Xgc, full_matrices=False)\n rank = np.sum(S > self.tol)\n if rank < n_features:\n warnings.warn(\"Variables are collinear\")\n S2 = (S ** 2) / (len(Xg) - 1)\n S2 = ((1 - self.reg_param) * S2) + self.reg_param\n if self.store_covariance or store_covariance:\n # cov = V * (S^2 / (n-1)) * V.T\n cov.append(np.dot(S2 * Vt.T, Vt))\n scalings.append(S2)\n rotations.append(Vt.T)\n if self.store_covariance or store_covariance:\n self.covariance_ = cov\n self.means_ = np.asarray(means)\n self.scalings_ = scalings\n self.rotations_ = rotations\n return self" + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/predict", + "name": "predict", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/predict/self", + "name": "self", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/predict/X", + "name": "X", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Perform classification on an array of test vectors X.\n\nThe predicted class C for each sample in X is returned.", + "docstring": "Perform classification on an array of test vectors X.\n\nThe predicted class C for each sample in X is returned.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nC : ndarray of shape (n_samples,)", + "code": " def predict(self, X):\n \"\"\"Perform classification on an array of test vectors X.\n\n The predicted class C for each sample in X is returned.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n C : ndarray of shape (n_samples,)\n \"\"\"\n d = self._decision_function(X)\n y_pred = self.classes_.take(d.argmax(1))\n return y_pred" + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/predict_log_proba", + "name": "predict_log_proba", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.predict_log_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/predict_log_proba/self", + "name": "self", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.predict_log_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/predict_log_proba/X", + "name": "X", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.predict_log_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Array of samples/test vectors." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return log of posterior probabilities of classification.", + "docstring": "Return log of posterior probabilities of classification.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Array of samples/test vectors.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n Posterior log-probabilities of classification per class.", + "code": " def predict_log_proba(self, X):\n \"\"\"Return log of posterior probabilities of classification.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Array of samples/test vectors.\n\n Returns\n -------\n C : ndarray of shape (n_samples, n_classes)\n Posterior log-probabilities of classification per class.\n \"\"\"\n # XXX : can do better to avoid precision overflows\n probas_ = self.predict_proba(X)\n return np.log(probas_)" + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/predict_proba", + "name": "predict_proba", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/predict_proba/self", + "name": "self", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/QuadraticDiscriminantAnalysis/predict_proba/X", + "name": "X", + "qname": "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Array of samples/test vectors." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return posterior probabilities of classification.", + "docstring": "Return posterior probabilities of classification.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Array of samples/test vectors.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n Posterior probabilities of classification per class.", + "code": " def predict_proba(self, X):\n \"\"\"Return posterior probabilities of classification.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Array of samples/test vectors.\n\n Returns\n -------\n C : ndarray of shape (n_samples, n_classes)\n Posterior probabilities of classification per class.\n \"\"\"\n values = self._decision_function(X)\n # compute the likelihood of the underlying gaussian models\n # up to a multiplicative constant.\n likelihood = np.exp(values - values.max(axis=1)[:, np.newaxis])\n # compute posterior probabilities\n return likelihood / likelihood.sum(axis=1)[:, np.newaxis]" + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/_class_cov", + "name": "_class_cov", + "qname": "sklearn.discriminant_analysis._class_cov", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.discriminant_analysis/_class_cov/X", + "name": "X", + "qname": "sklearn.discriminant_analysis._class_cov.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/_class_cov/y", + "name": "y", + "qname": "sklearn.discriminant_analysis._class_cov.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_targets)" + } + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/_class_cov/priors", + "name": "priors", + "qname": "sklearn.discriminant_analysis._class_cov.priors", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_classes,)", + "default_value": "", + "description": "Class priors." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_classes,)" + } + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/_class_cov/shrinkage", + "name": "shrinkage", + "qname": "sklearn.discriminant_analysis._class_cov.shrinkage", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "'auto' or float", + "default_value": "None", + "description": "Shrinkage parameter, possible values:\n - None: no shrinkage (default).\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\nShrinkage parameter is ignored if `covariance_estimator` is not None." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/_class_cov/covariance_estimator", + "name": "covariance_estimator", + "qname": "sklearn.discriminant_analysis._class_cov.covariance_estimator", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator", + "default_value": "None", + "description": "If not None, `covariance_estimator` is used to estimate\nthe covariance matrices instead of relying the empirical\ncovariance estimator (with potential shrinkage).\nThe object should have a fit method and a ``covariance_`` attribute\nlike the estimators in sklearn.covariance.\nIf None, the shrinkage parameter drives the estimate.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "estimator" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute weighted within-class covariance matrix.\n\nThe per-class covariance are weighted by the class priors.", + "docstring": "Compute weighted within-class covariance matrix.\n\nThe per-class covariance are weighted by the class priors.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\npriors : array-like of shape (n_classes,)\n Class priors.\n\nshrinkage : 'auto' or float, default=None\n Shrinkage parameter, possible values:\n - None: no shrinkage (default).\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\n Shrinkage parameter is ignored if `covariance_estimator` is not None.\n\ncovariance_estimator : estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in sklearn.covariance.\n If None, the shrinkage parameter drives the estimate.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ncov : array-like of shape (n_features, n_features)\n Weighted within-class covariance matrix", + "code": "def _class_cov(X, y, priors, shrinkage=None, covariance_estimator=None):\n \"\"\"Compute weighted within-class covariance matrix.\n\n The per-class covariance are weighted by the class priors.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Input data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\n priors : array-like of shape (n_classes,)\n Class priors.\n\n shrinkage : 'auto' or float, default=None\n Shrinkage parameter, possible values:\n - None: no shrinkage (default).\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\n Shrinkage parameter is ignored if `covariance_estimator` is not None.\n\n covariance_estimator : estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in sklearn.covariance.\n If None, the shrinkage parameter drives the estimate.\n\n .. versionadded:: 0.24\n\n Returns\n -------\n cov : array-like of shape (n_features, n_features)\n Weighted within-class covariance matrix\n \"\"\"\n classes = np.unique(y)\n cov = np.zeros(shape=(X.shape[1], X.shape[1]))\n for idx, group in enumerate(classes):\n Xg = X[y == group, :]\n cov += priors[idx] * np.atleast_2d(\n _cov(Xg, shrinkage, covariance_estimator))\n return cov" + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/_class_means", + "name": "_class_means", + "qname": "sklearn.discriminant_analysis._class_means", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.discriminant_analysis/_class_means/X", + "name": "X", + "qname": "sklearn.discriminant_analysis._class_means.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/_class_means/y", + "name": "y", + "qname": "sklearn.discriminant_analysis._class_means.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_targets)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute class means.", + "docstring": "Compute class means.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\nReturns\n-------\nmeans : array-like of shape (n_classes, n_features)\n Class means.", + "code": "def _class_means(X, y):\n \"\"\"Compute class means.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Input data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\n Returns\n -------\n means : array-like of shape (n_classes, n_features)\n Class means.\n \"\"\"\n classes, y = np.unique(y, return_inverse=True)\n cnt = np.bincount(y)\n means = np.zeros(shape=(len(classes), X.shape[1]))\n np.add.at(means, y, X)\n means /= cnt[:, None]\n return means" + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/_cov", + "name": "_cov", + "qname": "sklearn.discriminant_analysis._cov", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.discriminant_analysis/_cov/X", + "name": "X", + "qname": "sklearn.discriminant_analysis._cov.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/_cov/shrinkage", + "name": "shrinkage", + "qname": "sklearn.discriminant_analysis._cov.shrinkage", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.discriminant_analysis/_cov/covariance_estimator", + "name": "covariance_estimator", + "qname": "sklearn.discriminant_analysis._cov.covariance_estimator", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate covariance matrix (using optional covariance_estimator).\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nshrinkage : {'empirical', 'auto'} or float, default=None\n Shrinkage parameter, possible values:\n - None or 'empirical': no shrinkage (default).\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\n Shrinkage parameter is ignored if `covariance_estimator`\n is not None.\n\ncovariance_estimator : estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying on the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in :mod:`sklearn.covariance``.\n if None the shrinkage parameter drives the estimate.\n\n .. versionadded:: 0.24", + "docstring": "Estimate covariance matrix (using optional covariance_estimator).\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nshrinkage : {'empirical', 'auto'} or float, default=None\n Shrinkage parameter, possible values:\n - None or 'empirical': no shrinkage (default).\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\n Shrinkage parameter is ignored if `covariance_estimator`\n is not None.\n\ncovariance_estimator : estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying on the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in :mod:`sklearn.covariance``.\n if None the shrinkage parameter drives the estimate.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ns : ndarray of shape (n_features, n_features)\n Estimated covariance matrix.", + "code": "def _cov(X, shrinkage=None, covariance_estimator=None):\n \"\"\"Estimate covariance matrix (using optional covariance_estimator).\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Input data.\n\n shrinkage : {'empirical', 'auto'} or float, default=None\n Shrinkage parameter, possible values:\n - None or 'empirical': no shrinkage (default).\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\n Shrinkage parameter is ignored if `covariance_estimator`\n is not None.\n\n covariance_estimator : estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying on the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in :mod:`sklearn.covariance``.\n if None the shrinkage parameter drives the estimate.\n\n .. versionadded:: 0.24\n\n Returns\n -------\n s : ndarray of shape (n_features, n_features)\n Estimated covariance matrix.\n \"\"\"\n if covariance_estimator is None:\n shrinkage = \"empirical\" if shrinkage is None else shrinkage\n if isinstance(shrinkage, str):\n if shrinkage == 'auto':\n sc = StandardScaler() # standardize features\n X = sc.fit_transform(X)\n s = ledoit_wolf(X)[0]\n # rescale\n s = sc.scale_[:, np.newaxis] * s * sc.scale_[np.newaxis, :]\n elif shrinkage == 'empirical':\n s = empirical_covariance(X)\n else:\n raise ValueError('unknown shrinkage parameter')\n elif isinstance(shrinkage, float) or isinstance(shrinkage, int):\n if shrinkage < 0 or shrinkage > 1:\n raise ValueError('shrinkage parameter must be between 0 and 1')\n s = shrunk_covariance(empirical_covariance(X), shrinkage)\n else:\n raise TypeError('shrinkage must be a float or a string')\n else:\n if shrinkage is not None and shrinkage != 0:\n raise ValueError(\"covariance_estimator and shrinkage parameters \"\n \"are not None. Only one of the two can be set.\")\n covariance_estimator.fit(X)\n if not hasattr(covariance_estimator, 'covariance_'):\n raise ValueError(\"%s does not have a covariance_ attribute\" %\n covariance_estimator.__class__.__name__)\n s = covariance_estimator.covariance_\n return s" + }, + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/__init__", + "name": "__init__", + "qname": "sklearn.dummy.DummyClassifier.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/__init__/self", + "name": "self", + "qname": "sklearn.dummy.DummyClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/__init__/strategy", + "name": "strategy", + "qname": "sklearn.dummy.DummyClassifier.__init__.strategy", + "default_value": "'prior'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{\"stratified\", \"most_frequent\", \"prior\", \"uniform\", \"constant\"}", + "default_value": "\"prior\"", + "description": "Strategy to use to generate predictions.\n\n* \"stratified\": generates predictions by respecting the training\n set's class distribution.\n* \"most_frequent\": always predicts the most frequent label in the\n training set.\n* \"prior\": always predicts the class that maximizes the class prior\n (like \"most_frequent\") and ``predict_proba`` returns the class prior.\n* \"uniform\": generates predictions uniformly at random.\n* \"constant\": always predicts a constant label that is provided by\n the user. This is useful for metrics that evaluate a non-majority\n class\n\n .. versionchanged:: 0.24\n The default value of `strategy` has changed to \"prior\" in version\n 0.24." + }, + "type": { + "kind": "EnumType", + "values": ["prior", "most_frequent", "stratified", "constant", "uniform"] + } + }, + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/__init__/random_state", + "name": "random_state", + "qname": "sklearn.dummy.DummyClassifier.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the randomness to generate the predictions when\n``strategy='stratified'`` or ``strategy='uniform'``.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/__init__/constant", + "name": "constant", + "qname": "sklearn.dummy.DummyClassifier.__init__.constant", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int or str or array-like of shape (n_outputs,)", + "default_value": "", + "description": "The explicit constant as predicted by the \"constant\" strategy. This\nparameter is useful only for the \"constant\" strategy." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_outputs,)" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "DummyClassifier is a classifier that makes predictions using simple rules.\n\nThis classifier is useful as a simple baseline to compare with other\n(real) classifiers. Do not use it for real problems.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, strategy=\"prior\", random_state=None,\n constant=None):\n self.strategy = strategy\n self.random_state = random_state\n self.constant = constant" + }, + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/_more_tags", + "name": "_more_tags", + "qname": "sklearn.dummy.DummyClassifier._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/_more_tags/self", + "name": "self", + "qname": "sklearn.dummy.DummyClassifier._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n 'poor_score': True, 'no_validation': True,\n '_xfail_checks': {\n 'check_methods_subset_invariance':\n 'fails for the predict method',\n 'check_methods_sample_order_invariance':\n 'fails for the predict method'\n }\n }" + }, + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/fit", + "name": "fit", + "qname": "sklearn.dummy.DummyClassifier.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/fit/self", + "name": "self", + "qname": "sklearn.dummy.DummyClassifier.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/fit/X", + "name": "X", + "qname": "sklearn.dummy.DummyClassifier.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/fit/y", + "name": "y", + "qname": "sklearn.dummy.DummyClassifier.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.dummy.DummyClassifier.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fit the random classifier.", + "docstring": "Fit the random classifier.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the random classifier.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n self : object\n \"\"\"\n allowed_strategies = (\"most_frequent\", \"stratified\", \"uniform\",\n \"constant\", \"prior\")\n\n if self.strategy not in allowed_strategies:\n raise ValueError(\"Unknown strategy type: %s, expected one of %s.\"\n % (self.strategy, allowed_strategies))\n\n self._strategy = self.strategy\n\n if self._strategy == \"uniform\" and sp.issparse(y):\n y = y.toarray()\n warnings.warn('A local copy of the target data has been converted '\n 'to a numpy array. Predicting on sparse target data '\n 'with the uniform strategy would not save memory '\n 'and would be slower.',\n UserWarning)\n\n self.sparse_output_ = sp.issparse(y)\n\n if not self.sparse_output_:\n y = np.asarray(y)\n y = np.atleast_1d(y)\n\n if y.ndim == 1:\n y = np.reshape(y, (-1, 1))\n\n self.n_outputs_ = y.shape[1]\n\n self.n_features_in_ = None # No input validation is done for X\n\n check_consistent_length(X, y)\n\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X)\n\n if self._strategy == \"constant\":\n if self.constant is None:\n raise ValueError(\"Constant target value has to be specified \"\n \"when the constant strategy is used.\")\n else:\n constant = np.reshape(np.atleast_1d(self.constant), (-1, 1))\n if constant.shape[0] != self.n_outputs_:\n raise ValueError(\"Constant target value should have \"\n \"shape (%d, 1).\" % self.n_outputs_)\n\n (self.classes_,\n self.n_classes_,\n self.class_prior_) = class_distribution(y, sample_weight)\n\n if self._strategy == \"constant\":\n for k in range(self.n_outputs_):\n if not any(constant[k][0] == c for c in self.classes_[k]):\n # Checking in case of constant strategy if the constant\n # provided by the user is in y.\n err_msg = (\"The constant target value must be present in \"\n \"the training data. You provided constant={}. \"\n \"Possible values are: {}.\"\n .format(self.constant, list(self.classes_[k])))\n raise ValueError(err_msg)\n\n if self.n_outputs_ == 1:\n self.n_classes_ = self.n_classes_[0]\n self.classes_ = self.classes_[0]\n self.class_prior_ = self.class_prior_[0]\n\n return self" + }, + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/predict", + "name": "predict", + "qname": "sklearn.dummy.DummyClassifier.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/predict/self", + "name": "self", + "qname": "sklearn.dummy.DummyClassifier.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/predict/X", + "name": "X", + "qname": "sklearn.dummy.DummyClassifier.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Test data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Perform classification on test vectors X.", + "docstring": "Perform classification on test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test data.\n\nReturns\n-------\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Predicted target values for X.", + "code": " def predict(self, X):\n \"\"\"Perform classification on test vectors X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Test data.\n\n Returns\n -------\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Predicted target values for X.\n \"\"\"\n check_is_fitted(self)\n\n # numpy random_state expects Python int and not long as size argument\n # under Windows\n n_samples = _num_samples(X)\n rs = check_random_state(self.random_state)\n\n n_classes_ = self.n_classes_\n classes_ = self.classes_\n class_prior_ = self.class_prior_\n constant = self.constant\n if self.n_outputs_ == 1:\n # Get same type even for self.n_outputs_ == 1\n n_classes_ = [n_classes_]\n classes_ = [classes_]\n class_prior_ = [class_prior_]\n constant = [constant]\n # Compute probability only once\n if self._strategy == \"stratified\":\n proba = self.predict_proba(X)\n if self.n_outputs_ == 1:\n proba = [proba]\n\n if self.sparse_output_:\n class_prob = None\n if self._strategy in (\"most_frequent\", \"prior\"):\n classes_ = [np.array([cp.argmax()]) for cp in class_prior_]\n\n elif self._strategy == \"stratified\":\n class_prob = class_prior_\n\n elif self._strategy == \"uniform\":\n raise ValueError(\"Sparse target prediction is not \"\n \"supported with the uniform strategy\")\n\n elif self._strategy == \"constant\":\n classes_ = [np.array([c]) for c in constant]\n\n y = _random_choice_csc(n_samples, classes_, class_prob,\n self.random_state)\n else:\n if self._strategy in (\"most_frequent\", \"prior\"):\n y = np.tile([classes_[k][class_prior_[k].argmax()] for\n k in range(self.n_outputs_)], [n_samples, 1])\n\n elif self._strategy == \"stratified\":\n y = np.vstack([classes_[k][proba[k].argmax(axis=1)] for\n k in range(self.n_outputs_)]).T\n\n elif self._strategy == \"uniform\":\n ret = [classes_[k][rs.randint(n_classes_[k], size=n_samples)]\n for k in range(self.n_outputs_)]\n y = np.vstack(ret).T\n\n elif self._strategy == \"constant\":\n y = np.tile(self.constant, (n_samples, 1))\n\n if self.n_outputs_ == 1:\n y = np.ravel(y)\n\n return y" + }, + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/predict_log_proba", + "name": "predict_log_proba", + "qname": "sklearn.dummy.DummyClassifier.predict_log_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/predict_log_proba/self", + "name": "self", + "qname": "sklearn.dummy.DummyClassifier.predict_log_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/predict_log_proba/X", + "name": "X", + "qname": "sklearn.dummy.DummyClassifier.predict_log_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, object with finite length or shape}", + "default_value": "", + "description": "Training data, requires length = n_samples" + }, + "type": { + "kind": "EnumType", + "values": [] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return log probability estimates for the test vectors X.", + "docstring": "Return log probability estimates for the test vectors X.\n\nParameters\n----------\nX : {array-like, object with finite length or shape}\n Training data, requires length = n_samples\n\nReturns\n-------\nP : ndarray of shape (n_samples, n_classes) or list of such arrays\n Returns the log probability of the sample for each class in\n the model, where classes are ordered arithmetically for each\n output.", + "code": " def predict_log_proba(self, X):\n \"\"\"\n Return log probability estimates for the test vectors X.\n\n Parameters\n ----------\n X : {array-like, object with finite length or shape}\n Training data, requires length = n_samples\n\n Returns\n -------\n P : ndarray of shape (n_samples, n_classes) or list of such arrays\n Returns the log probability of the sample for each class in\n the model, where classes are ordered arithmetically for each\n output.\n \"\"\"\n proba = self.predict_proba(X)\n if self.n_outputs_ == 1:\n return np.log(proba)\n else:\n return [np.log(p) for p in proba]" + }, + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/predict_proba", + "name": "predict_proba", + "qname": "sklearn.dummy.DummyClassifier.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/predict_proba/self", + "name": "self", + "qname": "sklearn.dummy.DummyClassifier.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/predict_proba/X", + "name": "X", + "qname": "sklearn.dummy.DummyClassifier.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Test data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return probability estimates for the test vectors X.", + "docstring": "Return probability estimates for the test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test data.\n\nReturns\n-------\nP : ndarray of shape (n_samples, n_classes) or list of such arrays\n Returns the probability of the sample for each class in\n the model, where classes are ordered arithmetically, for each\n output.", + "code": " def predict_proba(self, X):\n \"\"\"\n Return probability estimates for the test vectors X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Test data.\n\n Returns\n -------\n P : ndarray of shape (n_samples, n_classes) or list of such arrays\n Returns the probability of the sample for each class in\n the model, where classes are ordered arithmetically, for each\n output.\n \"\"\"\n check_is_fitted(self)\n\n # numpy random_state expects Python int and not long as size argument\n # under Windows\n n_samples = _num_samples(X)\n rs = check_random_state(self.random_state)\n\n n_classes_ = self.n_classes_\n classes_ = self.classes_\n class_prior_ = self.class_prior_\n constant = self.constant\n if self.n_outputs_ == 1:\n # Get same type even for self.n_outputs_ == 1\n n_classes_ = [n_classes_]\n classes_ = [classes_]\n class_prior_ = [class_prior_]\n constant = [constant]\n\n P = []\n for k in range(self.n_outputs_):\n if self._strategy == \"most_frequent\":\n ind = class_prior_[k].argmax()\n out = np.zeros((n_samples, n_classes_[k]), dtype=np.float64)\n out[:, ind] = 1.0\n elif self._strategy == \"prior\":\n out = np.ones((n_samples, 1)) * class_prior_[k]\n\n elif self._strategy == \"stratified\":\n out = rs.multinomial(1, class_prior_[k], size=n_samples)\n out = out.astype(np.float64)\n\n elif self._strategy == \"uniform\":\n out = np.ones((n_samples, n_classes_[k]), dtype=np.float64)\n out /= n_classes_[k]\n\n elif self._strategy == \"constant\":\n ind = np.where(classes_[k] == constant[k])\n out = np.zeros((n_samples, n_classes_[k]), dtype=np.float64)\n out[:, ind] = 1.0\n\n P.append(out)\n\n if self.n_outputs_ == 1:\n P = P[0]\n\n return P" + }, + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/score", + "name": "score", + "qname": "sklearn.dummy.DummyClassifier.score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/score/self", + "name": "self", + "qname": "sklearn.dummy.DummyClassifier.score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/score/X", + "name": "X", + "qname": "sklearn.dummy.DummyClassifier.score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "None or array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Test samples. Passing None as test samples gives the same result\nas passing real test samples, since DummyClassifier\noperates independently of the sampled observations." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "None" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/score/y", + "name": "y", + "qname": "sklearn.dummy.DummyClassifier.score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "True labels for X." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.dummy/DummyClassifier/score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.dummy.DummyClassifier.score.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the mean accuracy on the given test data and labels.\n\nIn multi-label classification, this is the subset accuracy\nwhich is a harsh metric since you require for each sample that\neach label set be correctly predicted.", + "docstring": "Returns the mean accuracy on the given test data and labels.\n\nIn multi-label classification, this is the subset accuracy\nwhich is a harsh metric since you require for each sample that\neach label set be correctly predicted.\n\nParameters\n----------\nX : None or array-like of shape (n_samples, n_features)\n Test samples. Passing None as test samples gives the same result\n as passing real test samples, since DummyClassifier\n operates independently of the sampled observations.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True labels for X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Mean accuracy of self.predict(X) wrt. y.", + "code": " def score(self, X, y, sample_weight=None):\n \"\"\"Returns the mean accuracy on the given test data and labels.\n\n In multi-label classification, this is the subset accuracy\n which is a harsh metric since you require for each sample that\n each label set be correctly predicted.\n\n Parameters\n ----------\n X : None or array-like of shape (n_samples, n_features)\n Test samples. Passing None as test samples gives the same result\n as passing real test samples, since DummyClassifier\n operates independently of the sampled observations.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True labels for X.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n score : float\n Mean accuracy of self.predict(X) wrt. y.\n\n \"\"\"\n if X is None:\n X = np.zeros(shape=(len(y), 1))\n return super().score(X, y, sample_weight)" + }, + { + "id": "scikit-learn/sklearn.dummy/DummyRegressor/__init__", + "name": "__init__", + "qname": "sklearn.dummy.DummyRegressor.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.dummy/DummyRegressor/__init__/self", + "name": "self", + "qname": "sklearn.dummy.DummyRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.dummy/DummyRegressor/__init__/strategy", + "name": "strategy", + "qname": "sklearn.dummy.DummyRegressor.__init__.strategy", + "default_value": "'mean'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{\"mean\", \"median\", \"quantile\", \"constant\"}", + "default_value": "\"mean\"", + "description": "Strategy to use to generate predictions.\n\n* \"mean\": always predicts the mean of the training set\n* \"median\": always predicts the median of the training set\n* \"quantile\": always predicts a specified quantile of the training set,\n provided with the quantile parameter.\n* \"constant\": always predicts a constant value that is provided by\n the user." + }, + "type": { + "kind": "EnumType", + "values": ["quantile", "median", "mean", "constant"] + } + }, + { + "id": "scikit-learn/sklearn.dummy/DummyRegressor/__init__/constant", + "name": "constant", + "qname": "sklearn.dummy.DummyRegressor.__init__.constant", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int or float or array-like of shape (n_outputs,)", + "default_value": "None", + "description": "The explicit constant as predicted by the \"constant\" strategy. This\nparameter is useful only for the \"constant\" strategy." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_outputs,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.dummy/DummyRegressor/__init__/quantile", + "name": "quantile", + "qname": "sklearn.dummy.DummyRegressor.__init__.quantile", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float in [0.0, 1.0]", + "default_value": "None", + "description": "The quantile to predict using the \"quantile\" strategy. A quantile of\n0.5 corresponds to the median, while 0.0 to the minimum and 1.0 to the\nmaximum." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float in [0.0" + }, + { + "kind": "NamedType", + "name": "1.0]" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "DummyRegressor is a regressor that makes predictions using\nsimple rules.\n\nThis regressor is useful as a simple baseline to compare with other\n(real) regressors. Do not use it for real problems.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, strategy=\"mean\", constant=None, quantile=None):\n self.strategy = strategy\n self.constant = constant\n self.quantile = quantile" + }, + { + "id": "scikit-learn/sklearn.dummy/DummyRegressor/_more_tags", + "name": "_more_tags", + "qname": "sklearn.dummy.DummyRegressor._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.dummy/DummyRegressor/_more_tags/self", + "name": "self", + "qname": "sklearn.dummy.DummyRegressor._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'poor_score': True, 'no_validation': True}" + }, + { + "id": "scikit-learn/sklearn.dummy/DummyRegressor/fit", + "name": "fit", + "qname": "sklearn.dummy.DummyRegressor.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.dummy/DummyRegressor/fit/self", + "name": "self", + "qname": "sklearn.dummy.DummyRegressor.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.dummy/DummyRegressor/fit/X", + "name": "X", + "qname": "sklearn.dummy.DummyRegressor.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.dummy/DummyRegressor/fit/y", + "name": "y", + "qname": "sklearn.dummy.DummyRegressor.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.dummy/DummyRegressor/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.dummy.DummyRegressor.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fit the random regressor.", + "docstring": "Fit the random regressor.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the random regressor.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n self : object\n \"\"\"\n allowed_strategies = (\"mean\", \"median\", \"quantile\", \"constant\")\n if self.strategy not in allowed_strategies:\n raise ValueError(\"Unknown strategy type: %s, expected one of %s.\"\n % (self.strategy, allowed_strategies))\n\n y = check_array(y, ensure_2d=False)\n self.n_features_in_ = None # No input validation is done for X\n if len(y) == 0:\n raise ValueError(\"y must not be empty.\")\n\n if y.ndim == 1:\n y = np.reshape(y, (-1, 1))\n self.n_outputs_ = y.shape[1]\n\n check_consistent_length(X, y, sample_weight)\n\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X)\n\n if self.strategy == \"mean\":\n self.constant_ = np.average(y, axis=0, weights=sample_weight)\n\n elif self.strategy == \"median\":\n if sample_weight is None:\n self.constant_ = np.median(y, axis=0)\n else:\n self.constant_ = [_weighted_percentile(y[:, k], sample_weight,\n percentile=50.)\n for k in range(self.n_outputs_)]\n\n elif self.strategy == \"quantile\":\n if self.quantile is None or not np.isscalar(self.quantile):\n raise ValueError(\"Quantile must be a scalar in the range \"\n \"[0.0, 1.0], but got %s.\" % self.quantile)\n\n percentile = self.quantile * 100.0\n if sample_weight is None:\n self.constant_ = np.percentile(y, axis=0, q=percentile)\n else:\n self.constant_ = [_weighted_percentile(y[:, k], sample_weight,\n percentile=percentile)\n for k in range(self.n_outputs_)]\n\n elif self.strategy == \"constant\":\n if self.constant is None:\n raise TypeError(\"Constant target value has to be specified \"\n \"when the constant strategy is used.\")\n\n self.constant = check_array(self.constant,\n accept_sparse=['csr', 'csc', 'coo'],\n ensure_2d=False, ensure_min_samples=0)\n\n if self.n_outputs_ != 1 and self.constant.shape[0] != y.shape[1]:\n raise ValueError(\n \"Constant target value should have \"\n \"shape (%d, 1).\" % y.shape[1])\n\n self.constant_ = self.constant\n\n self.constant_ = np.reshape(self.constant_, (1, -1))\n return self" + }, + { + "id": "scikit-learn/sklearn.dummy/DummyRegressor/predict", + "name": "predict", + "qname": "sklearn.dummy.DummyRegressor.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.dummy/DummyRegressor/predict/self", + "name": "self", + "qname": "sklearn.dummy.DummyRegressor.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.dummy/DummyRegressor/predict/X", + "name": "X", + "qname": "sklearn.dummy.DummyRegressor.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Test data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.dummy/DummyRegressor/predict/return_std", + "name": "return_std", + "qname": "sklearn.dummy.DummyRegressor.predict.return_std", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to return the standard deviation of posterior prediction.\nAll zeros in this case.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Perform classification on test vectors X.", + "docstring": "Perform classification on test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test data.\n\nreturn_std : bool, default=False\n Whether to return the standard deviation of posterior prediction.\n All zeros in this case.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Predicted target values for X.\n\ny_std : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Standard deviation of predictive distribution of query points.", + "code": " def predict(self, X, return_std=False):\n \"\"\"\n Perform classification on test vectors X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Test data.\n\n return_std : bool, default=False\n Whether to return the standard deviation of posterior prediction.\n All zeros in this case.\n\n .. versionadded:: 0.20\n\n Returns\n -------\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Predicted target values for X.\n\n y_std : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Standard deviation of predictive distribution of query points.\n \"\"\"\n check_is_fitted(self)\n n_samples = _num_samples(X)\n\n y = np.full((n_samples, self.n_outputs_), self.constant_,\n dtype=np.array(self.constant_).dtype)\n y_std = np.zeros((n_samples, self.n_outputs_))\n\n if self.n_outputs_ == 1:\n y = np.ravel(y)\n y_std = np.ravel(y_std)\n\n return (y, y_std) if return_std else y" + }, + { + "id": "scikit-learn/sklearn.dummy/DummyRegressor/score", + "name": "score", + "qname": "sklearn.dummy.DummyRegressor.score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.dummy/DummyRegressor/score/self", + "name": "self", + "qname": "sklearn.dummy.DummyRegressor.score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.dummy/DummyRegressor/score/X", + "name": "X", + "qname": "sklearn.dummy.DummyRegressor.score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "None or array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Test samples. Passing None as test samples gives the same result\nas passing real test samples, since DummyRegressor\noperates independently of the sampled observations." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "None" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.dummy/DummyRegressor/score/y", + "name": "y", + "qname": "sklearn.dummy.DummyRegressor.score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "True values for X." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.dummy/DummyRegressor/score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.dummy.DummyRegressor.score.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the coefficient of determination R^2 of the prediction.\n\nThe coefficient R^2 is defined as (1 - u/v), where u is the residual\nsum of squares ((y_true - y_pred) ** 2).sum() and v is the total\nsum of squares ((y_true - y_true.mean()) ** 2).sum().\nThe best possible score is 1.0 and it can be negative (because the\nmodel can be arbitrarily worse). A constant model that always\npredicts the expected value of y, disregarding the input features,\nwould get a R^2 score of 0.0.", + "docstring": "Returns the coefficient of determination R^2 of the prediction.\n\nThe coefficient R^2 is defined as (1 - u/v), where u is the residual\nsum of squares ((y_true - y_pred) ** 2).sum() and v is the total\nsum of squares ((y_true - y_true.mean()) ** 2).sum().\nThe best possible score is 1.0 and it can be negative (because the\nmodel can be arbitrarily worse). A constant model that always\npredicts the expected value of y, disregarding the input features,\nwould get a R^2 score of 0.0.\n\nParameters\n----------\nX : None or array-like of shape (n_samples, n_features)\n Test samples. Passing None as test samples gives the same result\n as passing real test samples, since DummyRegressor\n operates independently of the sampled observations.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True values for X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n R^2 of self.predict(X) wrt. y.", + "code": " def score(self, X, y, sample_weight=None):\n \"\"\"Returns the coefficient of determination R^2 of the prediction.\n\n The coefficient R^2 is defined as (1 - u/v), where u is the residual\n sum of squares ((y_true - y_pred) ** 2).sum() and v is the total\n sum of squares ((y_true - y_true.mean()) ** 2).sum().\n The best possible score is 1.0 and it can be negative (because the\n model can be arbitrarily worse). A constant model that always\n predicts the expected value of y, disregarding the input features,\n would get a R^2 score of 0.0.\n\n Parameters\n ----------\n X : None or array-like of shape (n_samples, n_features)\n Test samples. Passing None as test samples gives the same result\n as passing real test samples, since DummyRegressor\n operates independently of the sampled observations.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True values for X.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n score : float\n R^2 of self.predict(X) wrt. y.\n \"\"\"\n if X is None:\n X = np.zeros(shape=(len(y), 1))\n return super().score(X, y, sample_weight)" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/__init__/base_estimator", + "name": "base_estimator", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.__init__.base_estimator", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "None", + "description": "The base estimator to fit on random subsets of the dataset.\nIf None, then the base estimator is a\n:class:`~sklearn.tree.DecisionTreeClassifier`." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/__init__/n_estimators", + "name": "n_estimators", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.__init__.n_estimators", + "default_value": "10", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "The number of base estimators in the ensemble." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/__init__/max_samples", + "name": "max_samples", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.__init__.max_samples", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "1.0", + "description": "The number of samples to draw from X to train each base estimator (with\nreplacement by default, see `bootstrap` for more details).\n\n- If int, then draw `max_samples` samples.\n- If float, then draw `max_samples * X.shape[0]` samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/__init__/max_features", + "name": "max_features", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.__init__.max_features", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "1.0", + "description": "The number of features to draw from X to train each base estimator (\nwithout replacement by default, see `bootstrap_features` for more\ndetails).\n\n- If int, then draw `max_features` features.\n- If float, then draw `max_features * X.shape[1]` features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/__init__/bootstrap", + "name": "bootstrap", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.__init__.bootstrap", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether samples are drawn with replacement. If False, sampling\nwithout replacement is performed." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/__init__/bootstrap_features", + "name": "bootstrap_features", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.__init__.bootstrap_features", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether features are drawn with replacement." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/__init__/oob_score", + "name": "oob_score", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.__init__.oob_score", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to use out-of-bag samples to estimate\nthe generalization error. Only available if bootstrap=True." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to True, reuse the solution of the previous call to fit\nand add more estimators to the ensemble, otherwise, just fit\na whole new ensemble. See :term:`the Glossary `.\n\n.. versionadded:: 0.17\n *warm_start* constructor parameter." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to run in parallel for both :meth:`fit` and\n:meth:`predict`. ``None`` means 1 unless in a\n:obj:`joblib.parallel_backend` context. ``-1`` means using all\nprocessors. See :term:`Glossary ` for more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/__init__/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the random resampling of the original dataset\n(sample wise and feature wise).\nIf the base estimator accepts a `random_state` attribute, a different\nseed is generated for each instance in the ensemble.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/__init__/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Controls the verbosity when fitting and predicting." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "A Bagging classifier.\n\nA Bagging classifier is an ensemble meta-estimator that fits base\nclassifiers each on random subsets of the original dataset and then\naggregate their individual predictions (either by voting or by averaging)\nto form a final prediction. Such a meta-estimator can typically be used as\na way to reduce the variance of a black-box estimator (e.g., a decision\ntree), by introducing randomization into its construction procedure and\nthen making an ensemble out of it.\n\nThis algorithm encompasses several works from the literature. When random\nsubsets of the dataset are drawn as random subsets of the samples, then\nthis algorithm is known as Pasting [1]_. If samples are drawn with\nreplacement, then the method is known as Bagging [2]_. When random subsets\nof the dataset are drawn as random subsets of the features, then the method\nis known as Random Subspaces [3]_. Finally, when base estimators are built\non subsets of both samples and features, then the method is known as\nRandom Patches [4]_.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.15", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self,\n base_estimator=None,\n n_estimators=10, *,\n max_samples=1.0,\n max_features=1.0,\n bootstrap=True,\n bootstrap_features=False,\n oob_score=False,\n warm_start=False,\n n_jobs=None,\n random_state=None,\n verbose=0):\n\n super().__init__(\n base_estimator,\n n_estimators=n_estimators,\n max_samples=max_samples,\n max_features=max_features,\n bootstrap=bootstrap,\n bootstrap_features=bootstrap_features,\n oob_score=oob_score,\n warm_start=warm_start,\n n_jobs=n_jobs,\n random_state=random_state,\n verbose=verbose)" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/_set_oob_score", + "name": "_set_oob_score", + "qname": "sklearn.ensemble._bagging.BaggingClassifier._set_oob_score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/_set_oob_score/self", + "name": "self", + "qname": "sklearn.ensemble._bagging.BaggingClassifier._set_oob_score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/_set_oob_score/X", + "name": "X", + "qname": "sklearn.ensemble._bagging.BaggingClassifier._set_oob_score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/_set_oob_score/y", + "name": "y", + "qname": "sklearn.ensemble._bagging.BaggingClassifier._set_oob_score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _set_oob_score(self, X, y):\n n_samples = y.shape[0]\n n_classes_ = self.n_classes_\n\n predictions = np.zeros((n_samples, n_classes_))\n\n for estimator, samples, features in zip(self.estimators_,\n self.estimators_samples_,\n self.estimators_features_):\n # Create mask for OOB samples\n mask = ~indices_to_mask(samples, n_samples)\n\n if hasattr(estimator, \"predict_proba\"):\n predictions[mask, :] += estimator.predict_proba(\n (X[mask, :])[:, features])\n\n else:\n p = estimator.predict((X[mask, :])[:, features])\n j = 0\n\n for i in range(n_samples):\n if mask[i]:\n predictions[i, p[j]] += 1\n j += 1\n\n if (predictions.sum(axis=1) == 0).any():\n warn(\"Some inputs do not have OOB scores. \"\n \"This probably means too few estimators were used \"\n \"to compute any reliable oob estimates.\")\n\n oob_decision_function = (predictions /\n predictions.sum(axis=1)[:, np.newaxis])\n oob_score = accuracy_score(y, np.argmax(predictions, axis=1))\n\n self.oob_decision_function_ = oob_decision_function\n self.oob_score_ = oob_score" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/_validate_estimator", + "name": "_validate_estimator", + "qname": "sklearn.ensemble._bagging.BaggingClassifier._validate_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/_validate_estimator/self", + "name": "self", + "qname": "sklearn.ensemble._bagging.BaggingClassifier._validate_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check the estimator and set the base_estimator_ attribute.", + "docstring": "Check the estimator and set the base_estimator_ attribute.", + "code": " def _validate_estimator(self):\n \"\"\"Check the estimator and set the base_estimator_ attribute.\"\"\"\n super()._validate_estimator(\n default=DecisionTreeClassifier())" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/_validate_y", + "name": "_validate_y", + "qname": "sklearn.ensemble._bagging.BaggingClassifier._validate_y", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/_validate_y/self", + "name": "self", + "qname": "sklearn.ensemble._bagging.BaggingClassifier._validate_y.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/_validate_y/y", + "name": "y", + "qname": "sklearn.ensemble._bagging.BaggingClassifier._validate_y.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _validate_y(self, y):\n y = column_or_1d(y, warn=True)\n check_classification_targets(y)\n self.classes_, y = np.unique(y, return_inverse=True)\n self.n_classes_ = len(self.classes_)\n\n return y" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/decision_function", + "name": "decision_function", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.decision_function", + "decorators": ["if_delegate_has_method(delegate='base_estimator')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/decision_function/self", + "name": "self", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/decision_function/X", + "name": "X", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples. Sparse matrices are accepted only if\nthey are supported by the base estimator." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Average of the decision functions of the base classifiers.", + "docstring": "Average of the decision functions of the base classifiers.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\nReturns\n-------\nscore : ndarray of shape (n_samples, k)\n The decision function of the input samples. The columns correspond\n to the classes in sorted order, as they appear in the attribute\n ``classes_``. Regression and binary classification are special\n cases with ``k == 1``, otherwise ``k==n_classes``.", + "code": " @if_delegate_has_method(delegate='base_estimator')\n def decision_function(self, X):\n \"\"\"Average of the decision functions of the base classifiers.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\n Returns\n -------\n score : ndarray of shape (n_samples, k)\n The decision function of the input samples. The columns correspond\n to the classes in sorted order, as they appear in the attribute\n ``classes_``. Regression and binary classification are special\n cases with ``k == 1``, otherwise ``k==n_classes``.\n\n \"\"\"\n check_is_fitted(self)\n\n # Check data\n X = check_array(\n X, accept_sparse=['csr', 'csc'], dtype=None,\n force_all_finite=False\n )\n\n if self.n_features_ != X.shape[1]:\n raise ValueError(\"Number of features of the model must \"\n \"match the input. Model n_features is {0} and \"\n \"input n_features is {1} \"\n \"\".format(self.n_features_, X.shape[1]))\n\n # Parallel loop\n n_jobs, n_estimators, starts = _partition_estimators(self.n_estimators,\n self.n_jobs)\n\n all_decisions = Parallel(n_jobs=n_jobs, verbose=self.verbose)(\n delayed(_parallel_decision_function)(\n self.estimators_[starts[i]:starts[i + 1]],\n self.estimators_features_[starts[i]:starts[i + 1]],\n X)\n for i in range(n_jobs))\n\n # Reduce\n decisions = sum(all_decisions) / self.n_estimators\n\n return decisions" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/predict", + "name": "predict", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/predict/self", + "name": "self", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/predict/X", + "name": "X", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples. Sparse matrices are accepted only if\nthey are supported by the base estimator." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class for X.\n\nThe predicted class of an input sample is computed as the class with\nthe highest mean predicted probability. If base estimators do not\nimplement a ``predict_proba`` method, then it resorts to voting.", + "docstring": "Predict class for X.\n\nThe predicted class of an input sample is computed as the class with\nthe highest mean predicted probability. If base estimators do not\nimplement a ``predict_proba`` method, then it resorts to voting.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n The predicted classes.", + "code": " def predict(self, X):\n \"\"\"Predict class for X.\n\n The predicted class of an input sample is computed as the class with\n the highest mean predicted probability. If base estimators do not\n implement a ``predict_proba`` method, then it resorts to voting.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\n Returns\n -------\n y : ndarray of shape (n_samples,)\n The predicted classes.\n \"\"\"\n predicted_probabilitiy = self.predict_proba(X)\n return self.classes_.take((np.argmax(predicted_probabilitiy, axis=1)),\n axis=0)" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/predict_log_proba", + "name": "predict_log_proba", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.predict_log_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/predict_log_proba/self", + "name": "self", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.predict_log_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/predict_log_proba/X", + "name": "X", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.predict_log_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples. Sparse matrices are accepted only if\nthey are supported by the base estimator." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class log-probabilities for X.\n\nThe predicted class log-probabilities of an input sample is computed as\nthe log of the mean predicted class probabilities of the base\nestimators in the ensemble.", + "docstring": "Predict class log-probabilities for X.\n\nThe predicted class log-probabilities of an input sample is computed as\nthe log of the mean predicted class probabilities of the base\nestimators in the ensemble.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes)\n The class log-probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.", + "code": " def predict_log_proba(self, X):\n \"\"\"Predict class log-probabilities for X.\n\n The predicted class log-probabilities of an input sample is computed as\n the log of the mean predicted class probabilities of the base\n estimators in the ensemble.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\n Returns\n -------\n p : ndarray of shape (n_samples, n_classes)\n The class log-probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n \"\"\"\n check_is_fitted(self)\n if hasattr(self.base_estimator_, \"predict_log_proba\"):\n # Check data\n X = check_array(\n X, accept_sparse=['csr', 'csc'], dtype=None,\n force_all_finite=False\n )\n\n if self.n_features_ != X.shape[1]:\n raise ValueError(\"Number of features of the model must \"\n \"match the input. Model n_features is {0} \"\n \"and input n_features is {1} \"\n \"\".format(self.n_features_, X.shape[1]))\n\n # Parallel loop\n n_jobs, n_estimators, starts = _partition_estimators(\n self.n_estimators, self.n_jobs)\n\n all_log_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose)(\n delayed(_parallel_predict_log_proba)(\n self.estimators_[starts[i]:starts[i + 1]],\n self.estimators_features_[starts[i]:starts[i + 1]],\n X,\n self.n_classes_)\n for i in range(n_jobs))\n\n # Reduce\n log_proba = all_log_proba[0]\n\n for j in range(1, len(all_log_proba)):\n log_proba = np.logaddexp(log_proba, all_log_proba[j])\n\n log_proba -= np.log(self.n_estimators)\n\n return log_proba\n\n else:\n return np.log(self.predict_proba(X))" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/predict_proba", + "name": "predict_proba", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/predict_proba/self", + "name": "self", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingClassifier/predict_proba/X", + "name": "X", + "qname": "sklearn.ensemble._bagging.BaggingClassifier.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples. Sparse matrices are accepted only if\nthey are supported by the base estimator." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class probabilities for X.\n\nThe predicted class probabilities of an input sample is computed as\nthe mean predicted class probabilities of the base estimators in the\nensemble. If base estimators do not implement a ``predict_proba``\nmethod, then it resorts to voting and the predicted class probabilities\nof an input sample represents the proportion of estimators predicting\neach class.", + "docstring": "Predict class probabilities for X.\n\nThe predicted class probabilities of an input sample is computed as\nthe mean predicted class probabilities of the base estimators in the\nensemble. If base estimators do not implement a ``predict_proba``\nmethod, then it resorts to voting and the predicted class probabilities\nof an input sample represents the proportion of estimators predicting\neach class.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.", + "code": " def predict_proba(self, X):\n \"\"\"Predict class probabilities for X.\n\n The predicted class probabilities of an input sample is computed as\n the mean predicted class probabilities of the base estimators in the\n ensemble. If base estimators do not implement a ``predict_proba``\n method, then it resorts to voting and the predicted class probabilities\n of an input sample represents the proportion of estimators predicting\n each class.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\n Returns\n -------\n p : ndarray of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n \"\"\"\n check_is_fitted(self)\n # Check data\n X = check_array(\n X, accept_sparse=['csr', 'csc'], dtype=None,\n force_all_finite=False\n )\n\n if self.n_features_ != X.shape[1]:\n raise ValueError(\"Number of features of the model must \"\n \"match the input. Model n_features is {0} and \"\n \"input n_features is {1}.\"\n \"\".format(self.n_features_, X.shape[1]))\n\n # Parallel loop\n n_jobs, n_estimators, starts = _partition_estimators(self.n_estimators,\n self.n_jobs)\n\n all_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose,\n **self._parallel_args())(\n delayed(_parallel_predict_proba)(\n self.estimators_[starts[i]:starts[i + 1]],\n self.estimators_features_[starts[i]:starts[i + 1]],\n X,\n self.n_classes_)\n for i in range(n_jobs))\n\n # Reduce\n proba = sum(all_proba) / self.n_estimators\n\n return proba" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._bagging.BaggingRegressor.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._bagging.BaggingRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/__init__/base_estimator", + "name": "base_estimator", + "qname": "sklearn.ensemble._bagging.BaggingRegressor.__init__.base_estimator", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "None", + "description": "The base estimator to fit on random subsets of the dataset.\nIf None, then the base estimator is a\n:class:`~sklearn.tree.DecisionTreeRegressor`." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/__init__/n_estimators", + "name": "n_estimators", + "qname": "sklearn.ensemble._bagging.BaggingRegressor.__init__.n_estimators", + "default_value": "10", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "The number of base estimators in the ensemble." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/__init__/max_samples", + "name": "max_samples", + "qname": "sklearn.ensemble._bagging.BaggingRegressor.__init__.max_samples", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "1.0", + "description": "The number of samples to draw from X to train each base estimator (with\nreplacement by default, see `bootstrap` for more details).\n\n- If int, then draw `max_samples` samples.\n- If float, then draw `max_samples * X.shape[0]` samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/__init__/max_features", + "name": "max_features", + "qname": "sklearn.ensemble._bagging.BaggingRegressor.__init__.max_features", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "1.0", + "description": "The number of features to draw from X to train each base estimator (\nwithout replacement by default, see `bootstrap_features` for more\ndetails).\n\n- If int, then draw `max_features` features.\n- If float, then draw `max_features * X.shape[1]` features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/__init__/bootstrap", + "name": "bootstrap", + "qname": "sklearn.ensemble._bagging.BaggingRegressor.__init__.bootstrap", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether samples are drawn with replacement. If False, sampling\nwithout replacement is performed." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/__init__/bootstrap_features", + "name": "bootstrap_features", + "qname": "sklearn.ensemble._bagging.BaggingRegressor.__init__.bootstrap_features", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether features are drawn with replacement." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/__init__/oob_score", + "name": "oob_score", + "qname": "sklearn.ensemble._bagging.BaggingRegressor.__init__.oob_score", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to use out-of-bag samples to estimate\nthe generalization error. Only available if bootstrap=True." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.ensemble._bagging.BaggingRegressor.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to True, reuse the solution of the previous call to fit\nand add more estimators to the ensemble, otherwise, just fit\na whole new ensemble. See :term:`the Glossary `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.ensemble._bagging.BaggingRegressor.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to run in parallel for both :meth:`fit` and\n:meth:`predict`. ``None`` means 1 unless in a\n:obj:`joblib.parallel_backend` context. ``-1`` means using all\nprocessors. See :term:`Glossary ` for more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/__init__/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._bagging.BaggingRegressor.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the random resampling of the original dataset\n(sample wise and feature wise).\nIf the base estimator accepts a `random_state` attribute, a different\nseed is generated for each instance in the ensemble.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/__init__/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._bagging.BaggingRegressor.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Controls the verbosity when fitting and predicting." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "A Bagging regressor.\n\nA Bagging regressor is an ensemble meta-estimator that fits base\nregressors each on random subsets of the original dataset and then\naggregate their individual predictions (either by voting or by averaging)\nto form a final prediction. Such a meta-estimator can typically be used as\na way to reduce the variance of a black-box estimator (e.g., a decision\ntree), by introducing randomization into its construction procedure and\nthen making an ensemble out of it.\n\nThis algorithm encompasses several works from the literature. When random\nsubsets of the dataset are drawn as random subsets of the samples, then\nthis algorithm is known as Pasting [1]_. If samples are drawn with\nreplacement, then the method is known as Bagging [2]_. When random subsets\nof the dataset are drawn as random subsets of the features, then the method\nis known as Random Subspaces [3]_. Finally, when base estimators are built\non subsets of both samples and features, then the method is known as\nRandom Patches [4]_.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.15", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self,\n base_estimator=None,\n n_estimators=10, *,\n max_samples=1.0,\n max_features=1.0,\n bootstrap=True,\n bootstrap_features=False,\n oob_score=False,\n warm_start=False,\n n_jobs=None,\n random_state=None,\n verbose=0):\n super().__init__(\n base_estimator,\n n_estimators=n_estimators,\n max_samples=max_samples,\n max_features=max_features,\n bootstrap=bootstrap,\n bootstrap_features=bootstrap_features,\n oob_score=oob_score,\n warm_start=warm_start,\n n_jobs=n_jobs,\n random_state=random_state,\n verbose=verbose)" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/_set_oob_score", + "name": "_set_oob_score", + "qname": "sklearn.ensemble._bagging.BaggingRegressor._set_oob_score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/_set_oob_score/self", + "name": "self", + "qname": "sklearn.ensemble._bagging.BaggingRegressor._set_oob_score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/_set_oob_score/X", + "name": "X", + "qname": "sklearn.ensemble._bagging.BaggingRegressor._set_oob_score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/_set_oob_score/y", + "name": "y", + "qname": "sklearn.ensemble._bagging.BaggingRegressor._set_oob_score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _set_oob_score(self, X, y):\n n_samples = y.shape[0]\n\n predictions = np.zeros((n_samples,))\n n_predictions = np.zeros((n_samples,))\n\n for estimator, samples, features in zip(self.estimators_,\n self.estimators_samples_,\n self.estimators_features_):\n # Create mask for OOB samples\n mask = ~indices_to_mask(samples, n_samples)\n\n predictions[mask] += estimator.predict((X[mask, :])[:, features])\n n_predictions[mask] += 1\n\n if (n_predictions == 0).any():\n warn(\"Some inputs do not have OOB scores. \"\n \"This probably means too few estimators were used \"\n \"to compute any reliable oob estimates.\")\n n_predictions[n_predictions == 0] = 1\n\n predictions /= n_predictions\n\n self.oob_prediction_ = predictions\n self.oob_score_ = r2_score(y, predictions)" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/_validate_estimator", + "name": "_validate_estimator", + "qname": "sklearn.ensemble._bagging.BaggingRegressor._validate_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/_validate_estimator/self", + "name": "self", + "qname": "sklearn.ensemble._bagging.BaggingRegressor._validate_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check the estimator and set the base_estimator_ attribute.", + "docstring": "Check the estimator and set the base_estimator_ attribute.", + "code": " def _validate_estimator(self):\n \"\"\"Check the estimator and set the base_estimator_ attribute.\"\"\"\n super()._validate_estimator(\n default=DecisionTreeRegressor())" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/predict", + "name": "predict", + "qname": "sklearn.ensemble._bagging.BaggingRegressor.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/predict/self", + "name": "self", + "qname": "sklearn.ensemble._bagging.BaggingRegressor.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaggingRegressor/predict/X", + "name": "X", + "qname": "sklearn.ensemble._bagging.BaggingRegressor.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples. Sparse matrices are accepted only if\nthey are supported by the base estimator." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict regression target for X.\n\nThe predicted regression target of an input sample is computed as the\nmean predicted regression targets of the estimators in the ensemble.", + "docstring": "Predict regression target for X.\n\nThe predicted regression target of an input sample is computed as the\nmean predicted regression targets of the estimators in the ensemble.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n The predicted values.", + "code": " def predict(self, X):\n \"\"\"Predict regression target for X.\n\n The predicted regression target of an input sample is computed as the\n mean predicted regression targets of the estimators in the ensemble.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\n Returns\n -------\n y : ndarray of shape (n_samples,)\n The predicted values.\n \"\"\"\n check_is_fitted(self)\n # Check data\n X = check_array(\n X, accept_sparse=['csr', 'csc'], dtype=None,\n force_all_finite=False\n )\n\n # Parallel loop\n n_jobs, n_estimators, starts = _partition_estimators(self.n_estimators,\n self.n_jobs)\n\n all_y_hat = Parallel(n_jobs=n_jobs, verbose=self.verbose)(\n delayed(_parallel_predict_regression)(\n self.estimators_[starts[i]:starts[i + 1]],\n self.estimators_features_[starts[i]:starts[i + 1]],\n X)\n for i in range(n_jobs))\n\n # Reduce\n y_hat = sum(all_y_hat) / self.n_estimators\n\n return y_hat" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._bagging.BaseBagging.__init__", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._bagging.BaseBagging.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/__init__/base_estimator", + "name": "base_estimator", + "qname": "sklearn.ensemble._bagging.BaseBagging.__init__.base_estimator", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/__init__/n_estimators", + "name": "n_estimators", + "qname": "sklearn.ensemble._bagging.BaseBagging.__init__.n_estimators", + "default_value": "10", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/__init__/max_samples", + "name": "max_samples", + "qname": "sklearn.ensemble._bagging.BaseBagging.__init__.max_samples", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/__init__/max_features", + "name": "max_features", + "qname": "sklearn.ensemble._bagging.BaseBagging.__init__.max_features", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/__init__/bootstrap", + "name": "bootstrap", + "qname": "sklearn.ensemble._bagging.BaseBagging.__init__.bootstrap", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/__init__/bootstrap_features", + "name": "bootstrap_features", + "qname": "sklearn.ensemble._bagging.BaseBagging.__init__.bootstrap_features", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/__init__/oob_score", + "name": "oob_score", + "qname": "sklearn.ensemble._bagging.BaseBagging.__init__.oob_score", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.ensemble._bagging.BaseBagging.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.ensemble._bagging.BaseBagging.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/__init__/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._bagging.BaseBagging.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/__init__/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._bagging.BaseBagging.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base class for Bagging meta-estimator.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.", + "docstring": "", + "code": " @abstractmethod\n def __init__(self,\n base_estimator=None,\n n_estimators=10, *,\n max_samples=1.0,\n max_features=1.0,\n bootstrap=True,\n bootstrap_features=False,\n oob_score=False,\n warm_start=False,\n n_jobs=None,\n random_state=None,\n verbose=0):\n super().__init__(\n base_estimator=base_estimator,\n n_estimators=n_estimators)\n\n self.max_samples = max_samples\n self.max_features = max_features\n self.bootstrap = bootstrap\n self.bootstrap_features = bootstrap_features\n self.oob_score = oob_score\n self.warm_start = warm_start\n self.n_jobs = n_jobs\n self.random_state = random_state\n self.verbose = verbose" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/_fit", + "name": "_fit", + "qname": "sklearn.ensemble._bagging.BaseBagging._fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/_fit/self", + "name": "self", + "qname": "sklearn.ensemble._bagging.BaseBagging._fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/_fit/X", + "name": "X", + "qname": "sklearn.ensemble._bagging.BaseBagging._fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples. Sparse matrices are accepted only if\nthey are supported by the base estimator." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/_fit/y", + "name": "y", + "qname": "sklearn.ensemble._bagging.BaseBagging._fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The target values (class labels in classification, real numbers in\nregression)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/_fit/max_samples", + "name": "max_samples", + "qname": "sklearn.ensemble._bagging.BaseBagging._fit.max_samples", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "None", + "description": "Argument to use instead of self.max_samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/_fit/max_depth", + "name": "max_depth", + "qname": "sklearn.ensemble._bagging.BaseBagging._fit.max_depth", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Override value used when constructing base estimator. Only\nsupported if the base estimator has a max_depth parameter." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/_fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._bagging.BaseBagging._fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, then samples are equally weighted.\nNote that this is supported only if the base estimator supports\nsample weighting." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Build a Bagging ensemble of estimators from the training\n set (X, y).", + "docstring": "Build a Bagging ensemble of estimators from the training\n set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\ny : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\nmax_samples : int or float, default=None\n Argument to use instead of self.max_samples.\n\nmax_depth : int, default=None\n Override value used when constructing base estimator. Only\n supported if the base estimator has a max_depth parameter.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if the base estimator supports\n sample weighting.\n\nReturns\n-------\nself : object", + "code": " def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):\n \"\"\"Build a Bagging ensemble of estimators from the training\n set (X, y).\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\n y : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\n max_samples : int or float, default=None\n Argument to use instead of self.max_samples.\n\n max_depth : int, default=None\n Override value used when constructing base estimator. Only\n supported if the base estimator has a max_depth parameter.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if the base estimator supports\n sample weighting.\n\n Returns\n -------\n self : object\n \"\"\"\n random_state = check_random_state(self.random_state)\n\n # Convert data (X is required to be 2d and indexable)\n X, y = self._validate_data(\n X, y, accept_sparse=['csr', 'csc'], dtype=None,\n force_all_finite=False, multi_output=True\n )\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X, dtype=None)\n\n # Remap output\n n_samples, self.n_features_ = X.shape\n self._n_samples = n_samples\n y = self._validate_y(y)\n\n # Check parameters\n self._validate_estimator()\n\n if max_depth is not None:\n self.base_estimator_.max_depth = max_depth\n\n # Validate max_samples\n if max_samples is None:\n max_samples = self.max_samples\n elif not isinstance(max_samples, numbers.Integral):\n max_samples = int(max_samples * X.shape[0])\n\n if not (0 < max_samples <= X.shape[0]):\n raise ValueError(\"max_samples must be in (0, n_samples]\")\n\n # Store validated integer row sampling value\n self._max_samples = max_samples\n\n # Validate max_features\n if isinstance(self.max_features, numbers.Integral):\n max_features = self.max_features\n elif isinstance(self.max_features, float):\n max_features = self.max_features * self.n_features_\n else:\n raise ValueError(\"max_features must be int or float\")\n\n if not (0 < max_features <= self.n_features_):\n raise ValueError(\"max_features must be in (0, n_features]\")\n\n max_features = max(1, int(max_features))\n\n # Store validated integer feature sampling value\n self._max_features = max_features\n\n # Other checks\n if not self.bootstrap and self.oob_score:\n raise ValueError(\"Out of bag estimation only available\"\n \" if bootstrap=True\")\n\n if self.warm_start and self.oob_score:\n raise ValueError(\"Out of bag estimate only available\"\n \" if warm_start=False\")\n\n if hasattr(self, \"oob_score_\") and self.warm_start:\n del self.oob_score_\n\n if not self.warm_start or not hasattr(self, 'estimators_'):\n # Free allocated memory, if any\n self.estimators_ = []\n self.estimators_features_ = []\n\n n_more_estimators = self.n_estimators - len(self.estimators_)\n\n if n_more_estimators < 0:\n raise ValueError('n_estimators=%d must be larger or equal to '\n 'len(estimators_)=%d when warm_start==True'\n % (self.n_estimators, len(self.estimators_)))\n\n elif n_more_estimators == 0:\n warn(\"Warm-start fitting without increasing n_estimators does not \"\n \"fit new trees.\")\n return self\n\n # Parallel loop\n n_jobs, n_estimators, starts = _partition_estimators(n_more_estimators,\n self.n_jobs)\n total_n_estimators = sum(n_estimators)\n\n # Advance random state to state after training\n # the first n_estimators\n if self.warm_start and len(self.estimators_) > 0:\n random_state.randint(MAX_INT, size=len(self.estimators_))\n\n seeds = random_state.randint(MAX_INT, size=n_more_estimators)\n self._seeds = seeds\n\n all_results = Parallel(n_jobs=n_jobs, verbose=self.verbose,\n **self._parallel_args())(\n delayed(_parallel_build_estimators)(\n n_estimators[i],\n self,\n X,\n y,\n sample_weight,\n seeds[starts[i]:starts[i + 1]],\n total_n_estimators,\n verbose=self.verbose)\n for i in range(n_jobs))\n\n # Reduce\n self.estimators_ += list(itertools.chain.from_iterable(\n t[0] for t in all_results))\n self.estimators_features_ += list(itertools.chain.from_iterable(\n t[1] for t in all_results))\n\n if self.oob_score:\n self._set_oob_score(X, y)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/_get_estimators_indices", + "name": "_get_estimators_indices", + "qname": "sklearn.ensemble._bagging.BaseBagging._get_estimators_indices", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/_get_estimators_indices/self", + "name": "self", + "qname": "sklearn.ensemble._bagging.BaseBagging._get_estimators_indices.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_estimators_indices(self):\n # Get drawn indices along both sample and feature axes\n for seed in self._seeds:\n # Operations accessing random_state must be performed identically\n # to those in `_parallel_build_estimators()`\n feature_indices, sample_indices = _generate_bagging_indices(\n seed, self.bootstrap_features, self.bootstrap,\n self.n_features_, self._n_samples, self._max_features,\n self._max_samples)\n\n yield feature_indices, sample_indices" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/_parallel_args", + "name": "_parallel_args", + "qname": "sklearn.ensemble._bagging.BaseBagging._parallel_args", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/_parallel_args/self", + "name": "self", + "qname": "sklearn.ensemble._bagging.BaseBagging._parallel_args.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _parallel_args(self):\n return {}" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/_set_oob_score", + "name": "_set_oob_score", + "qname": "sklearn.ensemble._bagging.BaseBagging._set_oob_score", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/_set_oob_score/self", + "name": "self", + "qname": "sklearn.ensemble._bagging.BaseBagging._set_oob_score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/_set_oob_score/X", + "name": "X", + "qname": "sklearn.ensemble._bagging.BaseBagging._set_oob_score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/_set_oob_score/y", + "name": "y", + "qname": "sklearn.ensemble._bagging.BaseBagging._set_oob_score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Calculate out of bag predictions and score.", + "docstring": "Calculate out of bag predictions and score.", + "code": " @abstractmethod\n def _set_oob_score(self, X, y):\n \"\"\"Calculate out of bag predictions and score.\"\"\"" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/_validate_y", + "name": "_validate_y", + "qname": "sklearn.ensemble._bagging.BaseBagging._validate_y", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/_validate_y/self", + "name": "self", + "qname": "sklearn.ensemble._bagging.BaseBagging._validate_y.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/_validate_y/y", + "name": "y", + "qname": "sklearn.ensemble._bagging.BaseBagging._validate_y.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _validate_y(self, y):\n if len(y.shape) == 1 or y.shape[1] == 1:\n return column_or_1d(y, warn=True)\n else:\n return y" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/estimators_samples_@getter", + "name": "estimators_samples_", + "qname": "sklearn.ensemble._bagging.BaseBagging.estimators_samples_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/estimators_samples_/self", + "name": "self", + "qname": "sklearn.ensemble._bagging.BaseBagging.estimators_samples_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "The subset of drawn samples for each base estimator.\n\nReturns a dynamically generated list of indices identifying\nthe samples used for fitting each member of the ensemble, i.e.,\nthe in-bag samples.\n\nNote: the list is re-created at each call to the property in order\nto reduce the object memory footprint by not storing the sampling\ndata. Thus fetching the property may be slower than expected.", + "docstring": "The subset of drawn samples for each base estimator.\n\nReturns a dynamically generated list of indices identifying\nthe samples used for fitting each member of the ensemble, i.e.,\nthe in-bag samples.\n\nNote: the list is re-created at each call to the property in order\nto reduce the object memory footprint by not storing the sampling\ndata. Thus fetching the property may be slower than expected.", + "code": " @property\n def estimators_samples_(self):\n \"\"\"\n The subset of drawn samples for each base estimator.\n\n Returns a dynamically generated list of indices identifying\n the samples used for fitting each member of the ensemble, i.e.,\n the in-bag samples.\n\n Note: the list is re-created at each call to the property in order\n to reduce the object memory footprint by not storing the sampling\n data. Thus fetching the property may be slower than expected.\n \"\"\"\n return [sample_indices\n for _, sample_indices in self._get_estimators_indices()]" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/fit", + "name": "fit", + "qname": "sklearn.ensemble._bagging.BaseBagging.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/fit/self", + "name": "self", + "qname": "sklearn.ensemble._bagging.BaseBagging.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/fit/X", + "name": "X", + "qname": "sklearn.ensemble._bagging.BaseBagging.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples. Sparse matrices are accepted only if\nthey are supported by the base estimator." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/fit/y", + "name": "y", + "qname": "sklearn.ensemble._bagging.BaseBagging.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The target values (class labels in classification, real numbers in\nregression)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/BaseBagging/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._bagging.BaseBagging.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, then samples are equally weighted.\nNote that this is supported only if the base estimator supports\nsample weighting." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Build a Bagging ensemble of estimators from the training\n set (X, y).", + "docstring": "Build a Bagging ensemble of estimators from the training\n set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\ny : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if the base estimator supports\n sample weighting.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Build a Bagging ensemble of estimators from the training\n set (X, y).\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrices are accepted only if\n they are supported by the base estimator.\n\n y : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if the base estimator supports\n sample weighting.\n\n Returns\n -------\n self : object\n \"\"\"\n return self._fit(X, y, self.max_samples, sample_weight=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_generate_bagging_indices", + "name": "_generate_bagging_indices", + "qname": "sklearn.ensemble._bagging._generate_bagging_indices", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/_generate_bagging_indices/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._bagging._generate_bagging_indices.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_generate_bagging_indices/bootstrap_features", + "name": "bootstrap_features", + "qname": "sklearn.ensemble._bagging._generate_bagging_indices.bootstrap_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_generate_bagging_indices/bootstrap_samples", + "name": "bootstrap_samples", + "qname": "sklearn.ensemble._bagging._generate_bagging_indices.bootstrap_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_generate_bagging_indices/n_features", + "name": "n_features", + "qname": "sklearn.ensemble._bagging._generate_bagging_indices.n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_generate_bagging_indices/n_samples", + "name": "n_samples", + "qname": "sklearn.ensemble._bagging._generate_bagging_indices.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_generate_bagging_indices/max_features", + "name": "max_features", + "qname": "sklearn.ensemble._bagging._generate_bagging_indices.max_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_generate_bagging_indices/max_samples", + "name": "max_samples", + "qname": "sklearn.ensemble._bagging._generate_bagging_indices.max_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Randomly draw feature and sample indices.", + "docstring": "Randomly draw feature and sample indices.", + "code": "def _generate_bagging_indices(random_state, bootstrap_features,\n bootstrap_samples, n_features, n_samples,\n max_features, max_samples):\n \"\"\"Randomly draw feature and sample indices.\"\"\"\n # Get valid random state\n random_state = check_random_state(random_state)\n\n # Draw indices\n feature_indices = _generate_indices(random_state, bootstrap_features,\n n_features, max_features)\n sample_indices = _generate_indices(random_state, bootstrap_samples,\n n_samples, max_samples)\n\n return feature_indices, sample_indices" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_generate_indices", + "name": "_generate_indices", + "qname": "sklearn.ensemble._bagging._generate_indices", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/_generate_indices/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._bagging._generate_indices.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_generate_indices/bootstrap", + "name": "bootstrap", + "qname": "sklearn.ensemble._bagging._generate_indices.bootstrap", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_generate_indices/n_population", + "name": "n_population", + "qname": "sklearn.ensemble._bagging._generate_indices.n_population", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_generate_indices/n_samples", + "name": "n_samples", + "qname": "sklearn.ensemble._bagging._generate_indices.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Draw randomly sampled indices.", + "docstring": "Draw randomly sampled indices.", + "code": "def _generate_indices(random_state, bootstrap, n_population, n_samples):\n \"\"\"Draw randomly sampled indices.\"\"\"\n # Draw sample indices\n if bootstrap:\n indices = random_state.randint(0, n_population, n_samples)\n else:\n indices = sample_without_replacement(n_population, n_samples,\n random_state=random_state)\n\n return indices" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_build_estimators", + "name": "_parallel_build_estimators", + "qname": "sklearn.ensemble._bagging._parallel_build_estimators", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_build_estimators/n_estimators", + "name": "n_estimators", + "qname": "sklearn.ensemble._bagging._parallel_build_estimators.n_estimators", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_build_estimators/ensemble", + "name": "ensemble", + "qname": "sklearn.ensemble._bagging._parallel_build_estimators.ensemble", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_build_estimators/X", + "name": "X", + "qname": "sklearn.ensemble._bagging._parallel_build_estimators.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_build_estimators/y", + "name": "y", + "qname": "sklearn.ensemble._bagging._parallel_build_estimators.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_build_estimators/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._bagging._parallel_build_estimators.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_build_estimators/seeds", + "name": "seeds", + "qname": "sklearn.ensemble._bagging._parallel_build_estimators.seeds", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_build_estimators/total_n_estimators", + "name": "total_n_estimators", + "qname": "sklearn.ensemble._bagging._parallel_build_estimators.total_n_estimators", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_build_estimators/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._bagging._parallel_build_estimators.verbose", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Private function used to build a batch of estimators within a job.", + "docstring": "Private function used to build a batch of estimators within a job.", + "code": "def _parallel_build_estimators(n_estimators, ensemble, X, y, sample_weight,\n seeds, total_n_estimators, verbose):\n \"\"\"Private function used to build a batch of estimators within a job.\"\"\"\n # Retrieve settings\n n_samples, n_features = X.shape\n max_features = ensemble._max_features\n max_samples = ensemble._max_samples\n bootstrap = ensemble.bootstrap\n bootstrap_features = ensemble.bootstrap_features\n support_sample_weight = has_fit_parameter(ensemble.base_estimator_,\n \"sample_weight\")\n if not support_sample_weight and sample_weight is not None:\n raise ValueError(\"The base estimator doesn't support sample weight\")\n\n # Build estimators\n estimators = []\n estimators_features = []\n\n for i in range(n_estimators):\n if verbose > 1:\n print(\"Building estimator %d of %d for this parallel run \"\n \"(total %d)...\" % (i + 1, n_estimators, total_n_estimators))\n\n random_state = seeds[i]\n estimator = ensemble._make_estimator(append=False,\n random_state=random_state)\n\n # Draw random feature, sample indices\n features, indices = _generate_bagging_indices(random_state,\n bootstrap_features,\n bootstrap, n_features,\n n_samples, max_features,\n max_samples)\n\n # Draw samples, using sample weights, and then fit\n if support_sample_weight:\n if sample_weight is None:\n curr_sample_weight = np.ones((n_samples,))\n else:\n curr_sample_weight = sample_weight.copy()\n\n if bootstrap:\n sample_counts = np.bincount(indices, minlength=n_samples)\n curr_sample_weight *= sample_counts\n else:\n not_indices_mask = ~indices_to_mask(indices, n_samples)\n curr_sample_weight[not_indices_mask] = 0\n\n estimator.fit(X[:, features], y, sample_weight=curr_sample_weight)\n\n else:\n estimator.fit((X[indices])[:, features], y[indices])\n\n estimators.append(estimator)\n estimators_features.append(features)\n\n return estimators, estimators_features" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_decision_function", + "name": "_parallel_decision_function", + "qname": "sklearn.ensemble._bagging._parallel_decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_decision_function/estimators", + "name": "estimators", + "qname": "sklearn.ensemble._bagging._parallel_decision_function.estimators", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_decision_function/estimators_features", + "name": "estimators_features", + "qname": "sklearn.ensemble._bagging._parallel_decision_function.estimators_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_decision_function/X", + "name": "X", + "qname": "sklearn.ensemble._bagging._parallel_decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Private function used to compute decisions within a job.", + "docstring": "Private function used to compute decisions within a job.", + "code": "def _parallel_decision_function(estimators, estimators_features, X):\n \"\"\"Private function used to compute decisions within a job.\"\"\"\n return sum(estimator.decision_function(X[:, features])\n for estimator, features in zip(estimators,\n estimators_features))" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_predict_log_proba", + "name": "_parallel_predict_log_proba", + "qname": "sklearn.ensemble._bagging._parallel_predict_log_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_predict_log_proba/estimators", + "name": "estimators", + "qname": "sklearn.ensemble._bagging._parallel_predict_log_proba.estimators", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_predict_log_proba/estimators_features", + "name": "estimators_features", + "qname": "sklearn.ensemble._bagging._parallel_predict_log_proba.estimators_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_predict_log_proba/X", + "name": "X", + "qname": "sklearn.ensemble._bagging._parallel_predict_log_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_predict_log_proba/n_classes", + "name": "n_classes", + "qname": "sklearn.ensemble._bagging._parallel_predict_log_proba.n_classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Private function used to compute log probabilities within a job.", + "docstring": "Private function used to compute log probabilities within a job.", + "code": "def _parallel_predict_log_proba(estimators, estimators_features, X, n_classes):\n \"\"\"Private function used to compute log probabilities within a job.\"\"\"\n n_samples = X.shape[0]\n log_proba = np.empty((n_samples, n_classes))\n log_proba.fill(-np.inf)\n all_classes = np.arange(n_classes, dtype=int)\n\n for estimator, features in zip(estimators, estimators_features):\n log_proba_estimator = estimator.predict_log_proba(X[:, features])\n\n if n_classes == len(estimator.classes_):\n log_proba = np.logaddexp(log_proba, log_proba_estimator)\n\n else:\n log_proba[:, estimator.classes_] = np.logaddexp(\n log_proba[:, estimator.classes_],\n log_proba_estimator[:, range(len(estimator.classes_))])\n\n missing = np.setdiff1d(all_classes, estimator.classes_)\n log_proba[:, missing] = np.logaddexp(log_proba[:, missing],\n -np.inf)\n\n return log_proba" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_predict_proba", + "name": "_parallel_predict_proba", + "qname": "sklearn.ensemble._bagging._parallel_predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_predict_proba/estimators", + "name": "estimators", + "qname": "sklearn.ensemble._bagging._parallel_predict_proba.estimators", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_predict_proba/estimators_features", + "name": "estimators_features", + "qname": "sklearn.ensemble._bagging._parallel_predict_proba.estimators_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_predict_proba/X", + "name": "X", + "qname": "sklearn.ensemble._bagging._parallel_predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_predict_proba/n_classes", + "name": "n_classes", + "qname": "sklearn.ensemble._bagging._parallel_predict_proba.n_classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Private function used to compute (proba-)predictions within a job.", + "docstring": "Private function used to compute (proba-)predictions within a job.", + "code": "def _parallel_predict_proba(estimators, estimators_features, X, n_classes):\n \"\"\"Private function used to compute (proba-)predictions within a job.\"\"\"\n n_samples = X.shape[0]\n proba = np.zeros((n_samples, n_classes))\n\n for estimator, features in zip(estimators, estimators_features):\n if hasattr(estimator, \"predict_proba\"):\n proba_estimator = estimator.predict_proba(X[:, features])\n\n if n_classes == len(estimator.classes_):\n proba += proba_estimator\n\n else:\n proba[:, estimator.classes_] += \\\n proba_estimator[:, range(len(estimator.classes_))]\n\n else:\n # Resort to voting\n predictions = estimator.predict(X[:, features])\n\n for i in range(n_samples):\n proba[i, predictions[i]] += 1\n\n return proba" + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_predict_regression", + "name": "_parallel_predict_regression", + "qname": "sklearn.ensemble._bagging._parallel_predict_regression", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_predict_regression/estimators", + "name": "estimators", + "qname": "sklearn.ensemble._bagging._parallel_predict_regression.estimators", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_predict_regression/estimators_features", + "name": "estimators_features", + "qname": "sklearn.ensemble._bagging._parallel_predict_regression.estimators_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._bagging/_parallel_predict_regression/X", + "name": "X", + "qname": "sklearn.ensemble._bagging._parallel_predict_regression.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Private function used to compute predictions within a job.", + "docstring": "Private function used to compute predictions within a job.", + "code": "def _parallel_predict_regression(estimators, estimators_features, X):\n \"\"\"Private function used to compute predictions within a job.\"\"\"\n return sum(estimator.predict(X[:, features])\n for estimator, features in zip(estimators,\n estimators_features))" + }, + { + "id": "scikit-learn/sklearn.ensemble._base/BaseEnsemble/__getitem__", + "name": "__getitem__", + "qname": "sklearn.ensemble._base.BaseEnsemble.__getitem__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._base/BaseEnsemble/__getitem__/self", + "name": "self", + "qname": "sklearn.ensemble._base.BaseEnsemble.__getitem__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._base/BaseEnsemble/__getitem__/index", + "name": "index", + "qname": "sklearn.ensemble._base.BaseEnsemble.__getitem__.index", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the index'th estimator in the ensemble.", + "docstring": "Return the index'th estimator in the ensemble.", + "code": " def __getitem__(self, index):\n \"\"\"Return the index'th estimator in the ensemble.\"\"\"\n return self.estimators_[index]" + }, + { + "id": "scikit-learn/sklearn.ensemble._base/BaseEnsemble/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._base.BaseEnsemble.__init__", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._base/BaseEnsemble/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._base.BaseEnsemble.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._base/BaseEnsemble/__init__/base_estimator", + "name": "base_estimator", + "qname": "sklearn.ensemble._base.BaseEnsemble.__init__.base_estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "The base estimator from which the ensemble is built." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._base/BaseEnsemble/__init__/n_estimators", + "name": "n_estimators", + "qname": "sklearn.ensemble._base.BaseEnsemble.__init__.n_estimators", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "The number of estimators in the ensemble." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._base/BaseEnsemble/__init__/estimator_params", + "name": "estimator_params", + "qname": "sklearn.ensemble._base.BaseEnsemble.__init__.estimator_params", + "default_value": "tuple()", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "list of str", + "default_value": "tuple()", + "description": "The list of attributes to use as parameters when instantiating a\nnew base estimator. If none are given, default parameters are used." + }, + "type": { + "kind": "NamedType", + "name": "list of str" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base class for all ensemble classes.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.", + "docstring": "", + "code": " @abstractmethod\n def __init__(self, base_estimator, *, n_estimators=10,\n estimator_params=tuple()):\n # Set parameters\n self.base_estimator = base_estimator\n self.n_estimators = n_estimators\n self.estimator_params = estimator_params" + }, + { + "id": "scikit-learn/sklearn.ensemble._base/BaseEnsemble/__iter__", + "name": "__iter__", + "qname": "sklearn.ensemble._base.BaseEnsemble.__iter__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._base/BaseEnsemble/__iter__/self", + "name": "self", + "qname": "sklearn.ensemble._base.BaseEnsemble.__iter__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return iterator over estimators in the ensemble.", + "docstring": "Return iterator over estimators in the ensemble.", + "code": " def __iter__(self):\n \"\"\"Return iterator over estimators in the ensemble.\"\"\"\n return iter(self.estimators_)" + }, + { + "id": "scikit-learn/sklearn.ensemble._base/BaseEnsemble/__len__", + "name": "__len__", + "qname": "sklearn.ensemble._base.BaseEnsemble.__len__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._base/BaseEnsemble/__len__/self", + "name": "self", + "qname": "sklearn.ensemble._base.BaseEnsemble.__len__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the number of estimators in the ensemble.", + "docstring": "Return the number of estimators in the ensemble.", + "code": " def __len__(self):\n \"\"\"Return the number of estimators in the ensemble.\"\"\"\n return len(self.estimators_)" + }, + { + "id": "scikit-learn/sklearn.ensemble._base/BaseEnsemble/_make_estimator", + "name": "_make_estimator", + "qname": "sklearn.ensemble._base.BaseEnsemble._make_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._base/BaseEnsemble/_make_estimator/self", + "name": "self", + "qname": "sklearn.ensemble._base.BaseEnsemble._make_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._base/BaseEnsemble/_make_estimator/append", + "name": "append", + "qname": "sklearn.ensemble._base.BaseEnsemble._make_estimator.append", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._base/BaseEnsemble/_make_estimator/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._base.BaseEnsemble._make_estimator.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Make and configure a copy of the `base_estimator_` attribute.\n\nWarning: This method should be used to properly instantiate new\nsub-estimators.", + "docstring": "Make and configure a copy of the `base_estimator_` attribute.\n\nWarning: This method should be used to properly instantiate new\nsub-estimators.", + "code": " def _make_estimator(self, append=True, random_state=None):\n \"\"\"Make and configure a copy of the `base_estimator_` attribute.\n\n Warning: This method should be used to properly instantiate new\n sub-estimators.\n \"\"\"\n estimator = clone(self.base_estimator_)\n estimator.set_params(**{p: getattr(self, p)\n for p in self.estimator_params})\n\n if random_state is not None:\n _set_random_states(estimator, random_state)\n\n if append:\n self.estimators_.append(estimator)\n\n return estimator" + }, + { + "id": "scikit-learn/sklearn.ensemble._base/BaseEnsemble/_validate_estimator", + "name": "_validate_estimator", + "qname": "sklearn.ensemble._base.BaseEnsemble._validate_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._base/BaseEnsemble/_validate_estimator/self", + "name": "self", + "qname": "sklearn.ensemble._base.BaseEnsemble._validate_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._base/BaseEnsemble/_validate_estimator/default", + "name": "default", + "qname": "sklearn.ensemble._base.BaseEnsemble._validate_estimator.default", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check the estimator and the n_estimator attribute.\n\nSets the base_estimator_` attributes.", + "docstring": "Check the estimator and the n_estimator attribute.\n\nSets the base_estimator_` attributes.", + "code": " def _validate_estimator(self, default=None):\n \"\"\"Check the estimator and the n_estimator attribute.\n\n Sets the base_estimator_` attributes.\n \"\"\"\n if not isinstance(self.n_estimators, numbers.Integral):\n raise ValueError(\"n_estimators must be an integer, \"\n \"got {0}.\".format(type(self.n_estimators)))\n\n if self.n_estimators <= 0:\n raise ValueError(\"n_estimators must be greater than zero, \"\n \"got {0}.\".format(self.n_estimators))\n\n if self.base_estimator is not None:\n self.base_estimator_ = self.base_estimator\n else:\n self.base_estimator_ = default\n\n if self.base_estimator_ is None:\n raise ValueError(\"base_estimator cannot be None\")" + }, + { + "id": "scikit-learn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._base._BaseHeterogeneousEnsemble.__init__", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._base._BaseHeterogeneousEnsemble.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble/__init__/estimators", + "name": "estimators", + "qname": "sklearn.ensemble._base._BaseHeterogeneousEnsemble.__init__.estimators", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list of (str, estimator) tuples", + "default_value": "", + "description": "The ensemble of estimators to use in the ensemble. Each element of the\nlist is defined as a tuple of string (i.e. name of the estimator) and\nan estimator instance. An estimator can be set to `'drop'` using\n`set_params`." + }, + "type": { + "kind": "NamedType", + "name": "list of (str, estimator) tuples" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base class for heterogeneous ensemble of learners.", + "docstring": "", + "code": " @abstractmethod\n def __init__(self, estimators):\n self.estimators = estimators" + }, + { + "id": "scikit-learn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble/_validate_estimators", + "name": "_validate_estimators", + "qname": "sklearn.ensemble._base._BaseHeterogeneousEnsemble._validate_estimators", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble/_validate_estimators/self", + "name": "self", + "qname": "sklearn.ensemble._base._BaseHeterogeneousEnsemble._validate_estimators.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _validate_estimators(self):\n if self.estimators is None or len(self.estimators) == 0:\n raise ValueError(\n \"Invalid 'estimators' attribute, 'estimators' should be a list\"\n \" of (string, estimator) tuples.\"\n )\n names, estimators = zip(*self.estimators)\n # defined by MetaEstimatorMixin\n self._validate_names(names)\n\n has_estimator = any(est != 'drop' for est in estimators)\n if not has_estimator:\n raise ValueError(\n \"All estimators are dropped. At least one is required \"\n \"to be an estimator.\"\n )\n\n is_estimator_type = (is_classifier if is_classifier(self)\n else is_regressor)\n\n for est in estimators:\n if est != 'drop' and not is_estimator_type(est):\n raise ValueError(\n \"The estimator {} should be a {}.\".format(\n est.__class__.__name__, is_estimator_type.__name__[3:]\n )\n )\n\n return names, estimators" + }, + { + "id": "scikit-learn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble/get_params", + "name": "get_params", + "qname": "sklearn.ensemble._base._BaseHeterogeneousEnsemble.get_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble/get_params/self", + "name": "self", + "qname": "sklearn.ensemble._base._BaseHeterogeneousEnsemble.get_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble/get_params/deep", + "name": "deep", + "qname": "sklearn.ensemble._base._BaseHeterogeneousEnsemble.get_params.deep", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Setting it to True gets the various estimators and the parameters\nof the estimators as well." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Get the parameters of an estimator from the ensemble.\n\nReturns the parameters given in the constructor as well as the\nestimators contained within the `estimators` parameter.", + "docstring": "Get the parameters of an estimator from the ensemble.\n\nReturns the parameters given in the constructor as well as the\nestimators contained within the `estimators` parameter.\n\nParameters\n----------\ndeep : bool, default=True\n Setting it to True gets the various estimators and the parameters\n of the estimators as well.", + "code": " def get_params(self, deep=True):\n \"\"\"\n Get the parameters of an estimator from the ensemble.\n\n Returns the parameters given in the constructor as well as the\n estimators contained within the `estimators` parameter.\n\n Parameters\n ----------\n deep : bool, default=True\n Setting it to True gets the various estimators and the parameters\n of the estimators as well.\n \"\"\"\n return super()._get_params('estimators', deep=deep)" + }, + { + "id": "scikit-learn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble/named_estimators@getter", + "name": "named_estimators", + "qname": "sklearn.ensemble._base._BaseHeterogeneousEnsemble.named_estimators", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble/named_estimators/self", + "name": "self", + "qname": "sklearn.ensemble._base._BaseHeterogeneousEnsemble.named_estimators.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def named_estimators(self):\n return Bunch(**dict(self.estimators))" + }, + { + "id": "scikit-learn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble/set_params", + "name": "set_params", + "qname": "sklearn.ensemble._base._BaseHeterogeneousEnsemble.set_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble/set_params/self", + "name": "self", + "qname": "sklearn.ensemble._base._BaseHeterogeneousEnsemble.set_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._base/_BaseHeterogeneousEnsemble/set_params/params", + "name": "params", + "qname": "sklearn.ensemble._base._BaseHeterogeneousEnsemble.set_params.params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "keyword arguments", + "default_value": "", + "description": "Specific parameters using e.g.\n`set_params(parameter_name=new_value)`. In addition, to setting the\nparameters of the estimator, the individual estimator of the\nestimators can also be set, or can be removed by setting them to\n'drop'." + }, + "type": { + "kind": "NamedType", + "name": "keyword arguments" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Set the parameters of an estimator from the ensemble.\n\nValid parameter keys can be listed with `get_params()`. Note that you\ncan directly set the parameters of the estimators contained in\n`estimators`.", + "docstring": "Set the parameters of an estimator from the ensemble.\n\nValid parameter keys can be listed with `get_params()`. Note that you\ncan directly set the parameters of the estimators contained in\n`estimators`.\n\nParameters\n----------\n**params : keyword arguments\n Specific parameters using e.g.\n `set_params(parameter_name=new_value)`. In addition, to setting the\n parameters of the estimator, the individual estimator of the\n estimators can also be set, or can be removed by setting them to\n 'drop'.", + "code": " def set_params(self, **params):\n \"\"\"\n Set the parameters of an estimator from the ensemble.\n\n Valid parameter keys can be listed with `get_params()`. Note that you\n can directly set the parameters of the estimators contained in\n `estimators`.\n\n Parameters\n ----------\n **params : keyword arguments\n Specific parameters using e.g.\n `set_params(parameter_name=new_value)`. In addition, to setting the\n parameters of the estimator, the individual estimator of the\n estimators can also be set, or can be removed by setting them to\n 'drop'.\n \"\"\"\n super()._set_params('estimators', **params)\n return self" + }, + { + "id": "scikit-learn/sklearn.ensemble._base/_fit_single_estimator", + "name": "_fit_single_estimator", + "qname": "sklearn.ensemble._base._fit_single_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._base/_fit_single_estimator/estimator", + "name": "estimator", + "qname": "sklearn.ensemble._base._fit_single_estimator.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._base/_fit_single_estimator/X", + "name": "X", + "qname": "sklearn.ensemble._base._fit_single_estimator.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._base/_fit_single_estimator/y", + "name": "y", + "qname": "sklearn.ensemble._base._fit_single_estimator.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._base/_fit_single_estimator/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._base._fit_single_estimator.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._base/_fit_single_estimator/message_clsname", + "name": "message_clsname", + "qname": "sklearn.ensemble._base._fit_single_estimator.message_clsname", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._base/_fit_single_estimator/message", + "name": "message", + "qname": "sklearn.ensemble._base._fit_single_estimator.message", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Private function used to fit an estimator within a job.", + "docstring": "Private function used to fit an estimator within a job.", + "code": "def _fit_single_estimator(estimator, X, y, sample_weight=None,\n message_clsname=None, message=None):\n \"\"\"Private function used to fit an estimator within a job.\"\"\"\n if sample_weight is not None:\n try:\n with _print_elapsed_time(message_clsname, message):\n estimator.fit(X, y, sample_weight=sample_weight)\n except TypeError as exc:\n if \"unexpected keyword argument 'sample_weight'\" in str(exc):\n raise TypeError(\n \"Underlying estimator {} does not support sample weights.\"\n .format(estimator.__class__.__name__)\n ) from exc\n raise\n else:\n with _print_elapsed_time(message_clsname, message):\n estimator.fit(X, y)\n return estimator" + }, + { + "id": "scikit-learn/sklearn.ensemble._base/_partition_estimators", + "name": "_partition_estimators", + "qname": "sklearn.ensemble._base._partition_estimators", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._base/_partition_estimators/n_estimators", + "name": "n_estimators", + "qname": "sklearn.ensemble._base._partition_estimators.n_estimators", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._base/_partition_estimators/n_jobs", + "name": "n_jobs", + "qname": "sklearn.ensemble._base._partition_estimators.n_jobs", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Private function used to partition estimators between jobs.", + "docstring": "Private function used to partition estimators between jobs.", + "code": "def _partition_estimators(n_estimators, n_jobs):\n \"\"\"Private function used to partition estimators between jobs.\"\"\"\n # Compute the number of jobs\n n_jobs = min(effective_n_jobs(n_jobs), n_estimators)\n\n # Partition estimators between jobs\n n_estimators_per_job = np.full(n_jobs, n_estimators // n_jobs,\n dtype=int)\n n_estimators_per_job[:n_estimators % n_jobs] += 1\n starts = np.cumsum(n_estimators_per_job)\n\n return n_jobs, n_estimators_per_job.tolist(), [0] + starts.tolist()" + }, + { + "id": "scikit-learn/sklearn.ensemble._base/_set_random_states", + "name": "_set_random_states", + "qname": "sklearn.ensemble._base._set_random_states", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._base/_set_random_states/estimator", + "name": "estimator", + "qname": "sklearn.ensemble._base._set_random_states.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator supporting get/set_params", + "default_value": "", + "description": "Estimator with potential randomness managed by random_state\nparameters." + }, + "type": { + "kind": "NamedType", + "name": "estimator supporting get/set_params" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._base/_set_random_states/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._base._set_random_states.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Pseudo-random number generator to control the generation of the random\nintegers. Pass an int for reproducible output across multiple function\ncalls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Set fixed random_state parameters for an estimator.\n\nFinds all parameters ending ``random_state`` and sets them to integers\nderived from ``random_state``.", + "docstring": "Set fixed random_state parameters for an estimator.\n\nFinds all parameters ending ``random_state`` and sets them to integers\nderived from ``random_state``.\n\nParameters\n----------\nestimator : estimator supporting get/set_params\n Estimator with potential randomness managed by random_state\n parameters.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the generation of the random\n integers. Pass an int for reproducible output across multiple function\n calls.\n See :term:`Glossary `.\n\nNotes\n-----\nThis does not necessarily set *all* ``random_state`` attributes that\ncontrol an estimator's randomness, only those accessible through\n``estimator.get_params()``. ``random_state``s not controlled include\nthose belonging to:\n\n * cross-validation splitters\n * ``scipy.stats`` rvs", + "code": "def _set_random_states(estimator, random_state=None):\n \"\"\"Set fixed random_state parameters for an estimator.\n\n Finds all parameters ending ``random_state`` and sets them to integers\n derived from ``random_state``.\n\n Parameters\n ----------\n estimator : estimator supporting get/set_params\n Estimator with potential randomness managed by random_state\n parameters.\n\n random_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the generation of the random\n integers. Pass an int for reproducible output across multiple function\n calls.\n See :term:`Glossary `.\n\n Notes\n -----\n This does not necessarily set *all* ``random_state`` attributes that\n control an estimator's randomness, only those accessible through\n ``estimator.get_params()``. ``random_state``s not controlled include\n those belonging to:\n\n * cross-validation splitters\n * ``scipy.stats`` rvs\n \"\"\"\n random_state = check_random_state(random_state)\n to_set = {}\n for key in sorted(estimator.get_params(deep=True)):\n if key == 'random_state' or key.endswith('__random_state'):\n to_set[key] = random_state.randint(np.iinfo(np.int32).max)\n\n if to_set:\n estimator.set_params(**to_set)" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._forest.BaseForest.__init__", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._forest.BaseForest.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/__init__/base_estimator", + "name": "base_estimator", + "qname": "sklearn.ensemble._forest.BaseForest.__init__.base_estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/__init__/n_estimators", + "name": "n_estimators", + "qname": "sklearn.ensemble._forest.BaseForest.__init__.n_estimators", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/__init__/estimator_params", + "name": "estimator_params", + "qname": "sklearn.ensemble._forest.BaseForest.__init__.estimator_params", + "default_value": "tuple()", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/__init__/bootstrap", + "name": "bootstrap", + "qname": "sklearn.ensemble._forest.BaseForest.__init__.bootstrap", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/__init__/oob_score", + "name": "oob_score", + "qname": "sklearn.ensemble._forest.BaseForest.__init__.oob_score", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.ensemble._forest.BaseForest.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/__init__/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._forest.BaseForest.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/__init__/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._forest.BaseForest.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.ensemble._forest.BaseForest.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/__init__/class_weight", + "name": "class_weight", + "qname": "sklearn.ensemble._forest.BaseForest.__init__.class_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/__init__/max_samples", + "name": "max_samples", + "qname": "sklearn.ensemble._forest.BaseForest.__init__.max_samples", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base class for forests of trees.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.", + "docstring": "", + "code": " @abstractmethod\n def __init__(self,\n base_estimator,\n n_estimators=100, *,\n estimator_params=tuple(),\n bootstrap=False,\n oob_score=False,\n n_jobs=None,\n random_state=None,\n verbose=0,\n warm_start=False,\n class_weight=None,\n max_samples=None):\n super().__init__(\n base_estimator=base_estimator,\n n_estimators=n_estimators,\n estimator_params=estimator_params)\n\n self.bootstrap = bootstrap\n self.oob_score = oob_score\n self.n_jobs = n_jobs\n self.random_state = random_state\n self.verbose = verbose\n self.warm_start = warm_start\n self.class_weight = class_weight\n self.max_samples = max_samples" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/_set_oob_score", + "name": "_set_oob_score", + "qname": "sklearn.ensemble._forest.BaseForest._set_oob_score", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/_set_oob_score/self", + "name": "self", + "qname": "sklearn.ensemble._forest.BaseForest._set_oob_score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/_set_oob_score/X", + "name": "X", + "qname": "sklearn.ensemble._forest.BaseForest._set_oob_score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/_set_oob_score/y", + "name": "y", + "qname": "sklearn.ensemble._forest.BaseForest._set_oob_score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Calculate out of bag predictions and score.", + "docstring": "Calculate out of bag predictions and score.", + "code": " @abstractmethod\n def _set_oob_score(self, X, y):\n \"\"\"\n Calculate out of bag predictions and score.\"\"\"" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/_validate_X_predict", + "name": "_validate_X_predict", + "qname": "sklearn.ensemble._forest.BaseForest._validate_X_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/_validate_X_predict/self", + "name": "self", + "qname": "sklearn.ensemble._forest.BaseForest._validate_X_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/_validate_X_predict/X", + "name": "X", + "qname": "sklearn.ensemble._forest.BaseForest._validate_X_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Validate X whenever one tries to predict, apply, predict_proba.", + "docstring": "Validate X whenever one tries to predict, apply, predict_proba.", + "code": " def _validate_X_predict(self, X):\n \"\"\"\n Validate X whenever one tries to predict, apply, predict_proba.\"\"\"\n check_is_fitted(self)\n\n return self.estimators_[0]._validate_X_predict(X, check_input=True)" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/_validate_y_class_weight", + "name": "_validate_y_class_weight", + "qname": "sklearn.ensemble._forest.BaseForest._validate_y_class_weight", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/_validate_y_class_weight/self", + "name": "self", + "qname": "sklearn.ensemble._forest.BaseForest._validate_y_class_weight.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/_validate_y_class_weight/y", + "name": "y", + "qname": "sklearn.ensemble._forest.BaseForest._validate_y_class_weight.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _validate_y_class_weight(self, y):\n # Default implementation\n return y, None" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/apply", + "name": "apply", + "qname": "sklearn.ensemble._forest.BaseForest.apply", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/apply/self", + "name": "self", + "qname": "sklearn.ensemble._forest.BaseForest.apply.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/apply/X", + "name": "X", + "qname": "sklearn.ensemble._forest.BaseForest.apply.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, its dtype will be converted to\n``dtype=np.float32``. If a sparse matrix is provided, it will be\nconverted into a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Apply trees in the forest to X, return leaf indices.", + "docstring": "Apply trees in the forest to X, return leaf indices.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\nReturns\n-------\nX_leaves : ndarray of shape (n_samples, n_estimators)\n For each datapoint x in X and for each tree in the forest,\n return the index of the leaf x ends up in.", + "code": " def apply(self, X):\n \"\"\"\n Apply trees in the forest to X, return leaf indices.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\n Returns\n -------\n X_leaves : ndarray of shape (n_samples, n_estimators)\n For each datapoint x in X and for each tree in the forest,\n return the index of the leaf x ends up in.\n \"\"\"\n X = self._validate_X_predict(X)\n results = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,\n **_joblib_parallel_args(prefer=\"threads\"))(\n delayed(tree.apply)(X, check_input=False)\n for tree in self.estimators_)\n\n return np.array(results).T" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/decision_path", + "name": "decision_path", + "qname": "sklearn.ensemble._forest.BaseForest.decision_path", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/decision_path/self", + "name": "self", + "qname": "sklearn.ensemble._forest.BaseForest.decision_path.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/decision_path/X", + "name": "X", + "qname": "sklearn.ensemble._forest.BaseForest.decision_path.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, its dtype will be converted to\n``dtype=np.float32``. If a sparse matrix is provided, it will be\nconverted into a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the decision path in the forest.\n\n.. versionadded:: 0.18", + "docstring": "Return the decision path in the forest.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\nReturns\n-------\nindicator : sparse matrix of shape (n_samples, n_nodes)\n Return a node indicator matrix where non zero elements indicates\n that the samples goes through the nodes. The matrix is of CSR\n format.\n\nn_nodes_ptr : ndarray of shape (n_estimators + 1,)\n The columns from indicator[n_nodes_ptr[i]:n_nodes_ptr[i+1]]\n gives the indicator value for the i-th estimator.", + "code": " def decision_path(self, X):\n \"\"\"\n Return the decision path in the forest.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\n Returns\n -------\n indicator : sparse matrix of shape (n_samples, n_nodes)\n Return a node indicator matrix where non zero elements indicates\n that the samples goes through the nodes. The matrix is of CSR\n format.\n\n n_nodes_ptr : ndarray of shape (n_estimators + 1,)\n The columns from indicator[n_nodes_ptr[i]:n_nodes_ptr[i+1]]\n gives the indicator value for the i-th estimator.\n\n \"\"\"\n X = self._validate_X_predict(X)\n indicators = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,\n **_joblib_parallel_args(prefer='threads'))(\n delayed(tree.decision_path)(X, check_input=False)\n for tree in self.estimators_)\n\n n_nodes = [0]\n n_nodes.extend([i.shape[1] for i in indicators])\n n_nodes_ptr = np.array(n_nodes).cumsum()\n\n return sparse_hstack(indicators).tocsr(), n_nodes_ptr" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/feature_importances_@getter", + "name": "feature_importances_", + "qname": "sklearn.ensemble._forest.BaseForest.feature_importances_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/feature_importances_/self", + "name": "self", + "qname": "sklearn.ensemble._forest.BaseForest.feature_importances_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "The impurity-based feature importances.\n\nThe higher, the more important the feature.\nThe importance of a feature is computed as the (normalized)\ntotal reduction of the criterion brought by that feature. It is also\nknown as the Gini importance.\n\nWarning: impurity-based feature importances can be misleading for\nhigh cardinality features (many unique values). See\n:func:`sklearn.inspection.permutation_importance` as an alternative.", + "docstring": "The impurity-based feature importances.\n\nThe higher, the more important the feature.\nThe importance of a feature is computed as the (normalized)\ntotal reduction of the criterion brought by that feature. It is also\nknown as the Gini importance.\n\nWarning: impurity-based feature importances can be misleading for\nhigh cardinality features (many unique values). See\n:func:`sklearn.inspection.permutation_importance` as an alternative.\n\nReturns\n-------\nfeature_importances_ : ndarray of shape (n_features,)\n The values of this array sum to 1, unless all trees are single node\n trees consisting of only the root node, in which case it will be an\n array of zeros.", + "code": " @property\n def feature_importances_(self):\n \"\"\"\n The impurity-based feature importances.\n\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n Returns\n -------\n feature_importances_ : ndarray of shape (n_features,)\n The values of this array sum to 1, unless all trees are single node\n trees consisting of only the root node, in which case it will be an\n array of zeros.\n \"\"\"\n check_is_fitted(self)\n\n all_importances = Parallel(n_jobs=self.n_jobs,\n **_joblib_parallel_args(prefer='threads'))(\n delayed(getattr)(tree, 'feature_importances_')\n for tree in self.estimators_ if tree.tree_.node_count > 1)\n\n if not all_importances:\n return np.zeros(self.n_features_, dtype=np.float64)\n\n all_importances = np.mean(all_importances,\n axis=0, dtype=np.float64)\n return all_importances / np.sum(all_importances)" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/fit", + "name": "fit", + "qname": "sklearn.ensemble._forest.BaseForest.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/fit/self", + "name": "self", + "qname": "sklearn.ensemble._forest.BaseForest.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/fit/X", + "name": "X", + "qname": "sklearn.ensemble._forest.BaseForest.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples. Internally, its dtype will be converted\nto ``dtype=np.float32``. If a sparse matrix is provided, it will be\nconverted into a sparse ``csc_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/fit/y", + "name": "y", + "qname": "sklearn.ensemble._forest.BaseForest.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "The target values (class labels in classification, real numbers in\nregression)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/BaseForest/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._forest.BaseForest.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, then samples are equally weighted. Splits\nthat would create child nodes with net zero or negative weight are\nignored while searching for a split in each node. In the case of\nclassification, splits are also ignored if they would result in any\nsingle class carrying a negative weight in either child node." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Build a forest of trees from the training set (X, y).", + "docstring": "Build a forest of trees from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Internally, its dtype will be converted\n to ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csc_matrix``.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The target values (class labels in classification, real numbers in\n regression).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. In the case of\n classification, splits are also ignored if they would result in any\n single class carrying a negative weight in either child node.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"\n Build a forest of trees from the training set (X, y).\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Internally, its dtype will be converted\n to ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csc_matrix``.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The target values (class labels in classification, real numbers in\n regression).\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. In the case of\n classification, splits are also ignored if they would result in any\n single class carrying a negative weight in either child node.\n\n Returns\n -------\n self : object\n \"\"\"\n # Validate or convert input data\n if issparse(y):\n raise ValueError(\n \"sparse multilabel-indicator for y is not supported.\"\n )\n X, y = self._validate_data(X, y, multi_output=True,\n accept_sparse=\"csc\", dtype=DTYPE)\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X)\n\n if issparse(X):\n # Pre-sort indices to avoid that each individual tree of the\n # ensemble sorts the indices.\n X.sort_indices()\n\n # Remap output\n self.n_features_ = X.shape[1]\n\n y = np.atleast_1d(y)\n if y.ndim == 2 and y.shape[1] == 1:\n warn(\"A column-vector y was passed when a 1d array was\"\n \" expected. Please change the shape of y to \"\n \"(n_samples,), for example using ravel().\",\n DataConversionWarning, stacklevel=2)\n\n if y.ndim == 1:\n # reshape is necessary to preserve the data contiguity against vs\n # [:, np.newaxis] that does not.\n y = np.reshape(y, (-1, 1))\n\n self.n_outputs_ = y.shape[1]\n\n y, expanded_class_weight = self._validate_y_class_weight(y)\n\n if getattr(y, \"dtype\", None) != DOUBLE or not y.flags.contiguous:\n y = np.ascontiguousarray(y, dtype=DOUBLE)\n\n if expanded_class_weight is not None:\n if sample_weight is not None:\n sample_weight = sample_weight * expanded_class_weight\n else:\n sample_weight = expanded_class_weight\n\n # Get bootstrap sample size\n n_samples_bootstrap = _get_n_samples_bootstrap(\n n_samples=X.shape[0],\n max_samples=self.max_samples\n )\n\n # Check parameters\n self._validate_estimator()\n\n if not self.bootstrap and self.oob_score:\n raise ValueError(\"Out of bag estimation only available\"\n \" if bootstrap=True\")\n\n random_state = check_random_state(self.random_state)\n\n if not self.warm_start or not hasattr(self, \"estimators_\"):\n # Free allocated memory, if any\n self.estimators_ = []\n\n n_more_estimators = self.n_estimators - len(self.estimators_)\n\n if n_more_estimators < 0:\n raise ValueError('n_estimators=%d must be larger or equal to '\n 'len(estimators_)=%d when warm_start==True'\n % (self.n_estimators, len(self.estimators_)))\n\n elif n_more_estimators == 0:\n warn(\"Warm-start fitting without increasing n_estimators does not \"\n \"fit new trees.\")\n else:\n if self.warm_start and len(self.estimators_) > 0:\n # We draw from the random state to get the random state we\n # would have got if we hadn't used a warm_start.\n random_state.randint(MAX_INT, size=len(self.estimators_))\n\n trees = [self._make_estimator(append=False,\n random_state=random_state)\n for i in range(n_more_estimators)]\n\n # Parallel loop: we prefer the threading backend as the Cython code\n # for fitting the trees is internally releasing the Python GIL\n # making threading more efficient than multiprocessing in\n # that case. However, for joblib 0.12+ we respect any\n # parallel_backend contexts set at a higher level,\n # since correctness does not rely on using threads.\n trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,\n **_joblib_parallel_args(prefer='threads'))(\n delayed(_parallel_build_trees)(\n t, self, X, y, sample_weight, i, len(trees),\n verbose=self.verbose, class_weight=self.class_weight,\n n_samples_bootstrap=n_samples_bootstrap)\n for i, t in enumerate(trees))\n\n # Collect newly grown trees\n self.estimators_.extend(trees)\n\n if self.oob_score:\n self._set_oob_score(X, y)\n\n # Decapsulate classes_ attributes\n if hasattr(self, \"classes_\") and self.n_outputs_ == 1:\n self.n_classes_ = self.n_classes_[0]\n self.classes_ = self.classes_[0]\n\n return self" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesClassifier/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._forest.ExtraTreesClassifier.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesClassifier/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._forest.ExtraTreesClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesClassifier/__init__/n_estimators", + "name": "n_estimators", + "qname": "sklearn.ensemble._forest.ExtraTreesClassifier.__init__.n_estimators", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The number of trees in the forest.\n\n.. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesClassifier/__init__/criterion", + "name": "criterion", + "qname": "sklearn.ensemble._forest.ExtraTreesClassifier.__init__.criterion", + "default_value": "'gini'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{\"gini\", \"entropy\"}", + "default_value": "\"gini\"", + "description": "The function to measure the quality of a split. Supported criteria are\n\"gini\" for the Gini impurity and \"entropy\" for the information gain." + }, + "type": { + "kind": "EnumType", + "values": ["entropy", "gini"] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesClassifier/__init__/max_depth", + "name": "max_depth", + "qname": "sklearn.ensemble._forest.ExtraTreesClassifier.__init__.max_depth", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The maximum depth of the tree. If None, then nodes are expanded until\nall leaves are pure or until all leaves contain less than\nmin_samples_split samples." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesClassifier/__init__/min_samples_split", + "name": "min_samples_split", + "qname": "sklearn.ensemble._forest.ExtraTreesClassifier.__init__.min_samples_split", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "2", + "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n.. versionchanged:: 0.18\n Added float values for fractions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesClassifier/__init__/min_samples_leaf", + "name": "min_samples_leaf", + "qname": "sklearn.ensemble._forest.ExtraTreesClassifier.__init__.min_samples_leaf", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "1", + "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches. This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n.. versionchanged:: 0.18\n Added float values for fractions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesClassifier/__init__/min_weight_fraction_leaf", + "name": "min_weight_fraction_leaf", + "qname": "sklearn.ensemble._forest.ExtraTreesClassifier.__init__.min_weight_fraction_leaf", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "The minimum weighted fraction of the sum total of weights (of all\nthe input samples) required to be at a leaf node. Samples have\nequal weight when sample_weight is not provided." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesClassifier/__init__/max_features", + "name": "max_features", + "qname": "sklearn.ensemble._forest.ExtraTreesClassifier.__init__.max_features", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{\"auto\", \"sqrt\", \"log2\"}, int or float", + "default_value": "\"auto\"", + "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n `round(max_features * n_features)` features are considered at each\n split.\n- If \"auto\", then `max_features=sqrt(n_features)`.\n- If \"sqrt\", then `max_features=sqrt(n_features)`.\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["auto", "log2", "sqrt"] + }, + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesClassifier/__init__/max_leaf_nodes", + "name": "max_leaf_nodes", + "qname": "sklearn.ensemble._forest.ExtraTreesClassifier.__init__.max_leaf_nodes", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Grow trees with ``max_leaf_nodes`` in best-first fashion.\nBest nodes are defined as relative reduction in impurity.\nIf None then unlimited number of leaf nodes." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesClassifier/__init__/min_impurity_decrease", + "name": "min_impurity_decrease", + "qname": "sklearn.ensemble._forest.ExtraTreesClassifier.__init__.min_impurity_decrease", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesClassifier/__init__/min_impurity_split", + "name": "min_impurity_split", + "qname": "sklearn.ensemble._forest.ExtraTreesClassifier.__init__.min_impurity_split", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Threshold for early stopping in tree growth. A node will split\nif its impurity is above the threshold, otherwise it is a leaf.\n\n.. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesClassifier/__init__/bootstrap", + "name": "bootstrap", + "qname": "sklearn.ensemble._forest.ExtraTreesClassifier.__init__.bootstrap", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether bootstrap samples are used when building trees. If False, the\nwhole dataset is used to build each tree." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesClassifier/__init__/oob_score", + "name": "oob_score", + "qname": "sklearn.ensemble._forest.ExtraTreesClassifier.__init__.oob_score", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to use out-of-bag samples to estimate the generalization score.\nOnly available if bootstrap=True." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesClassifier/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.ensemble._forest.ExtraTreesClassifier.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n:meth:`decision_path` and :meth:`apply` are all parallelized over the\ntrees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\ncontext. ``-1`` means using all processors. See :term:`Glossary\n` for more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesClassifier/__init__/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._forest.ExtraTreesClassifier.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls 3 sources of randomness:\n\n- the bootstrapping of the samples used when building trees\n (if ``bootstrap=True``)\n- the sampling of the features to consider when looking for the best\n split at each node (if ``max_features < n_features``)\n- the draw of the splits for each of the `max_features`\n\nSee :term:`Glossary ` for details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesClassifier/__init__/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._forest.ExtraTreesClassifier.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Controls the verbosity when fitting and predicting." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesClassifier/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.ensemble._forest.ExtraTreesClassifier.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, reuse the solution of the previous call to fit\nand add more estimators to the ensemble, otherwise, just fit a whole\nnew forest. See :term:`the Glossary `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesClassifier/__init__/class_weight", + "name": "class_weight", + "qname": "sklearn.ensemble._forest.ExtraTreesClassifier.__init__.class_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{\"balanced\", \"balanced_subsample\"}, dict or list of dicts", + "default_value": "None", + "description": "Weights associated with classes in the form ``{class_label: weight}``.\nIf not given, all classes are supposed to have weight one. For\nmulti-output problems, a list of dicts can be provided in the same\norder as the columns of y.\n\nNote that for multioutput (including multilabel) weights should be\ndefined for each class of every column in its own dict. For example,\nfor four-class multilabel classification weights should be\n[{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n[{1:1}, {2:5}, {3:1}, {4:1}].\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``\n\nThe \"balanced_subsample\" mode is the same as \"balanced\" except that\nweights are computed based on the bootstrap sample for every tree\ngrown.\n\nFor multi-output, the weights of each column of y will be multiplied.\n\nNote that these weights will be multiplied with sample_weight (passed\nthrough the fit method) if sample_weight is specified." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["balanced", "balanced_subsample"] + }, + { + "kind": "NamedType", + "name": "dict" + }, + { + "kind": "NamedType", + "name": "list of dicts" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesClassifier/__init__/ccp_alpha", + "name": "ccp_alpha", + "qname": "sklearn.ensemble._forest.ExtraTreesClassifier.__init__.ccp_alpha", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "non-negative float", + "default_value": "0.0", + "description": "Complexity parameter used for Minimal Cost-Complexity Pruning. The\nsubtree with the largest cost complexity that is smaller than\n``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n:ref:`minimal_cost_complexity_pruning` for details.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "non-negative float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesClassifier/__init__/max_samples", + "name": "max_samples", + "qname": "sklearn.ensemble._forest.ExtraTreesClassifier.__init__.max_samples", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "None", + "description": "If bootstrap is True, the number of samples to draw from X\nto train each base estimator.\n\n- If None (default), then draw `X.shape[0]` samples.\n- If int, then draw `max_samples` samples.\n- If float, then draw `max_samples * X.shape[0]` samples. Thus,\n `max_samples` should be in the interval `(0, 1)`.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "BoundaryType", + "base_type": "float", + "min": 0.0, + "max": 1.0, + "min_inclusive": false, + "max_inclusive": false + }, + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "An extra-trees classifier.\n\nThis class implements a meta estimator that fits a number of\nrandomized decision trees (a.k.a. extra-trees) on various sub-samples\nof the dataset and uses averaging to improve the predictive accuracy\nand control over-fitting.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self,\n n_estimators=100, *,\n criterion=\"gini\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.,\n min_impurity_split=None,\n bootstrap=False,\n oob_score=False,\n n_jobs=None,\n random_state=None,\n verbose=0,\n warm_start=False,\n class_weight=None,\n ccp_alpha=0.0,\n max_samples=None):\n super().__init__(\n base_estimator=ExtraTreeClassifier(),\n n_estimators=n_estimators,\n estimator_params=(\"criterion\", \"max_depth\", \"min_samples_split\",\n \"min_samples_leaf\", \"min_weight_fraction_leaf\",\n \"max_features\", \"max_leaf_nodes\",\n \"min_impurity_decrease\", \"min_impurity_split\",\n \"random_state\", \"ccp_alpha\"),\n bootstrap=bootstrap,\n oob_score=oob_score,\n n_jobs=n_jobs,\n random_state=random_state,\n verbose=verbose,\n warm_start=warm_start,\n class_weight=class_weight,\n max_samples=max_samples)\n\n self.criterion = criterion\n self.max_depth = max_depth\n self.min_samples_split = min_samples_split\n self.min_samples_leaf = min_samples_leaf\n self.min_weight_fraction_leaf = min_weight_fraction_leaf\n self.max_features = max_features\n self.max_leaf_nodes = max_leaf_nodes\n self.min_impurity_decrease = min_impurity_decrease\n self.min_impurity_split = min_impurity_split\n self.ccp_alpha = ccp_alpha" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesRegressor/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._forest.ExtraTreesRegressor.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesRegressor/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._forest.ExtraTreesRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesRegressor/__init__/n_estimators", + "name": "n_estimators", + "qname": "sklearn.ensemble._forest.ExtraTreesRegressor.__init__.n_estimators", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The number of trees in the forest.\n\n.. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesRegressor/__init__/criterion", + "name": "criterion", + "qname": "sklearn.ensemble._forest.ExtraTreesRegressor.__init__.criterion", + "default_value": "'mse'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{\"mse\", \"mae\"}", + "default_value": "\"mse\"", + "description": "The function to measure the quality of a split. Supported criteria\nare \"mse\" for the mean squared error, which is equal to variance\nreduction as feature selection criterion, and \"mae\" for the mean\nabsolute error.\n\n.. versionadded:: 0.18\n Mean Absolute Error (MAE) criterion." + }, + "type": { + "kind": "EnumType", + "values": ["mse", "mae"] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesRegressor/__init__/max_depth", + "name": "max_depth", + "qname": "sklearn.ensemble._forest.ExtraTreesRegressor.__init__.max_depth", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The maximum depth of the tree. If None, then nodes are expanded until\nall leaves are pure or until all leaves contain less than\nmin_samples_split samples." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesRegressor/__init__/min_samples_split", + "name": "min_samples_split", + "qname": "sklearn.ensemble._forest.ExtraTreesRegressor.__init__.min_samples_split", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "2", + "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n.. versionchanged:: 0.18\n Added float values for fractions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesRegressor/__init__/min_samples_leaf", + "name": "min_samples_leaf", + "qname": "sklearn.ensemble._forest.ExtraTreesRegressor.__init__.min_samples_leaf", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "1", + "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches. This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n.. versionchanged:: 0.18\n Added float values for fractions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesRegressor/__init__/min_weight_fraction_leaf", + "name": "min_weight_fraction_leaf", + "qname": "sklearn.ensemble._forest.ExtraTreesRegressor.__init__.min_weight_fraction_leaf", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "The minimum weighted fraction of the sum total of weights (of all\nthe input samples) required to be at a leaf node. Samples have\nequal weight when sample_weight is not provided." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesRegressor/__init__/max_features", + "name": "max_features", + "qname": "sklearn.ensemble._forest.ExtraTreesRegressor.__init__.max_features", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{\"auto\", \"sqrt\", \"log2\"}, int or float", + "default_value": "\"auto\"", + "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n `round(max_features * n_features)` features are considered at each\n split.\n- If \"auto\", then `max_features=n_features`.\n- If \"sqrt\", then `max_features=sqrt(n_features)`.\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["auto", "log2", "sqrt"] + }, + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesRegressor/__init__/max_leaf_nodes", + "name": "max_leaf_nodes", + "qname": "sklearn.ensemble._forest.ExtraTreesRegressor.__init__.max_leaf_nodes", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Grow trees with ``max_leaf_nodes`` in best-first fashion.\nBest nodes are defined as relative reduction in impurity.\nIf None then unlimited number of leaf nodes." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesRegressor/__init__/min_impurity_decrease", + "name": "min_impurity_decrease", + "qname": "sklearn.ensemble._forest.ExtraTreesRegressor.__init__.min_impurity_decrease", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesRegressor/__init__/min_impurity_split", + "name": "min_impurity_split", + "qname": "sklearn.ensemble._forest.ExtraTreesRegressor.__init__.min_impurity_split", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Threshold for early stopping in tree growth. A node will split\nif its impurity is above the threshold, otherwise it is a leaf.\n\n.. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesRegressor/__init__/bootstrap", + "name": "bootstrap", + "qname": "sklearn.ensemble._forest.ExtraTreesRegressor.__init__.bootstrap", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether bootstrap samples are used when building trees. If False, the\nwhole dataset is used to build each tree." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesRegressor/__init__/oob_score", + "name": "oob_score", + "qname": "sklearn.ensemble._forest.ExtraTreesRegressor.__init__.oob_score", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to use out-of-bag samples to estimate the generalization score.\nOnly available if bootstrap=True." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesRegressor/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.ensemble._forest.ExtraTreesRegressor.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n:meth:`decision_path` and :meth:`apply` are all parallelized over the\ntrees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\ncontext. ``-1`` means using all processors. See :term:`Glossary\n` for more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesRegressor/__init__/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._forest.ExtraTreesRegressor.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls 3 sources of randomness:\n\n- the bootstrapping of the samples used when building trees\n (if ``bootstrap=True``)\n- the sampling of the features to consider when looking for the best\n split at each node (if ``max_features < n_features``)\n- the draw of the splits for each of the `max_features`\n\nSee :term:`Glossary ` for details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesRegressor/__init__/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._forest.ExtraTreesRegressor.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Controls the verbosity when fitting and predicting." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesRegressor/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.ensemble._forest.ExtraTreesRegressor.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, reuse the solution of the previous call to fit\nand add more estimators to the ensemble, otherwise, just fit a whole\nnew forest. See :term:`the Glossary `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesRegressor/__init__/ccp_alpha", + "name": "ccp_alpha", + "qname": "sklearn.ensemble._forest.ExtraTreesRegressor.__init__.ccp_alpha", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "non-negative float", + "default_value": "0.0", + "description": "Complexity parameter used for Minimal Cost-Complexity Pruning. The\nsubtree with the largest cost complexity that is smaller than\n``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n:ref:`minimal_cost_complexity_pruning` for details.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "non-negative float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ExtraTreesRegressor/__init__/max_samples", + "name": "max_samples", + "qname": "sklearn.ensemble._forest.ExtraTreesRegressor.__init__.max_samples", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "None", + "description": "If bootstrap is True, the number of samples to draw from X\nto train each base estimator.\n\n- If None (default), then draw `X.shape[0]` samples.\n- If int, then draw `max_samples` samples.\n- If float, then draw `max_samples * X.shape[0]` samples. Thus,\n `max_samples` should be in the interval `(0, 1)`.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "BoundaryType", + "base_type": "float", + "min": 0.0, + "max": 1.0, + "min_inclusive": false, + "max_inclusive": false + }, + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "An extra-trees regressor.\n\nThis class implements a meta estimator that fits a number of\nrandomized decision trees (a.k.a. extra-trees) on various sub-samples\nof the dataset and uses averaging to improve the predictive accuracy\nand control over-fitting.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self,\n n_estimators=100, *,\n criterion=\"mse\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.,\n min_impurity_split=None,\n bootstrap=False,\n oob_score=False,\n n_jobs=None,\n random_state=None,\n verbose=0,\n warm_start=False,\n ccp_alpha=0.0,\n max_samples=None):\n super().__init__(\n base_estimator=ExtraTreeRegressor(),\n n_estimators=n_estimators,\n estimator_params=(\"criterion\", \"max_depth\", \"min_samples_split\",\n \"min_samples_leaf\", \"min_weight_fraction_leaf\",\n \"max_features\", \"max_leaf_nodes\",\n \"min_impurity_decrease\", \"min_impurity_split\",\n \"random_state\", \"ccp_alpha\"),\n bootstrap=bootstrap,\n oob_score=oob_score,\n n_jobs=n_jobs,\n random_state=random_state,\n verbose=verbose,\n warm_start=warm_start,\n max_samples=max_samples)\n\n self.criterion = criterion\n self.max_depth = max_depth\n self.min_samples_split = min_samples_split\n self.min_samples_leaf = min_samples_leaf\n self.min_weight_fraction_leaf = min_weight_fraction_leaf\n self.max_features = max_features\n self.max_leaf_nodes = max_leaf_nodes\n self.min_impurity_decrease = min_impurity_decrease\n self.min_impurity_split = min_impurity_split\n self.ccp_alpha = ccp_alpha" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._forest.ForestClassifier.__init__", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._forest.ForestClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/__init__/base_estimator", + "name": "base_estimator", + "qname": "sklearn.ensemble._forest.ForestClassifier.__init__.base_estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/__init__/n_estimators", + "name": "n_estimators", + "qname": "sklearn.ensemble._forest.ForestClassifier.__init__.n_estimators", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/__init__/estimator_params", + "name": "estimator_params", + "qname": "sklearn.ensemble._forest.ForestClassifier.__init__.estimator_params", + "default_value": "tuple()", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/__init__/bootstrap", + "name": "bootstrap", + "qname": "sklearn.ensemble._forest.ForestClassifier.__init__.bootstrap", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/__init__/oob_score", + "name": "oob_score", + "qname": "sklearn.ensemble._forest.ForestClassifier.__init__.oob_score", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.ensemble._forest.ForestClassifier.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/__init__/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._forest.ForestClassifier.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/__init__/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._forest.ForestClassifier.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.ensemble._forest.ForestClassifier.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/__init__/class_weight", + "name": "class_weight", + "qname": "sklearn.ensemble._forest.ForestClassifier.__init__.class_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/__init__/max_samples", + "name": "max_samples", + "qname": "sklearn.ensemble._forest.ForestClassifier.__init__.max_samples", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base class for forest of trees-based classifiers.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.", + "docstring": "", + "code": " @abstractmethod\n def __init__(self,\n base_estimator,\n n_estimators=100, *,\n estimator_params=tuple(),\n bootstrap=False,\n oob_score=False,\n n_jobs=None,\n random_state=None,\n verbose=0,\n warm_start=False,\n class_weight=None,\n max_samples=None):\n super().__init__(\n base_estimator,\n n_estimators=n_estimators,\n estimator_params=estimator_params,\n bootstrap=bootstrap,\n oob_score=oob_score,\n n_jobs=n_jobs,\n random_state=random_state,\n verbose=verbose,\n warm_start=warm_start,\n class_weight=class_weight,\n max_samples=max_samples)" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/_set_oob_score", + "name": "_set_oob_score", + "qname": "sklearn.ensemble._forest.ForestClassifier._set_oob_score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/_set_oob_score/self", + "name": "self", + "qname": "sklearn.ensemble._forest.ForestClassifier._set_oob_score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/_set_oob_score/X", + "name": "X", + "qname": "sklearn.ensemble._forest.ForestClassifier._set_oob_score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/_set_oob_score/y", + "name": "y", + "qname": "sklearn.ensemble._forest.ForestClassifier._set_oob_score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute out-of-bag score.", + "docstring": "Compute out-of-bag score.", + "code": " def _set_oob_score(self, X, y):\n \"\"\"\n Compute out-of-bag score.\"\"\"\n X = check_array(X, dtype=DTYPE, accept_sparse='csr')\n\n n_classes_ = self.n_classes_\n n_samples = y.shape[0]\n\n oob_decision_function = []\n oob_score = 0.0\n predictions = [np.zeros((n_samples, n_classes_[k]))\n for k in range(self.n_outputs_)]\n\n n_samples_bootstrap = _get_n_samples_bootstrap(\n n_samples, self.max_samples\n )\n\n for estimator in self.estimators_:\n unsampled_indices = _generate_unsampled_indices(\n estimator.random_state, n_samples, n_samples_bootstrap)\n p_estimator = estimator.predict_proba(X[unsampled_indices, :],\n check_input=False)\n\n if self.n_outputs_ == 1:\n p_estimator = [p_estimator]\n\n for k in range(self.n_outputs_):\n predictions[k][unsampled_indices, :] += p_estimator[k]\n\n for k in range(self.n_outputs_):\n if (predictions[k].sum(axis=1) == 0).any():\n warn(\"Some inputs do not have OOB scores. \"\n \"This probably means too few trees were used \"\n \"to compute any reliable oob estimates.\")\n\n decision = (predictions[k] /\n predictions[k].sum(axis=1)[:, np.newaxis])\n oob_decision_function.append(decision)\n oob_score += np.mean(y[:, k] ==\n np.argmax(predictions[k], axis=1), axis=0)\n\n if self.n_outputs_ == 1:\n self.oob_decision_function_ = oob_decision_function[0]\n else:\n self.oob_decision_function_ = oob_decision_function\n\n self.oob_score_ = oob_score / self.n_outputs_" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/_validate_y_class_weight", + "name": "_validate_y_class_weight", + "qname": "sklearn.ensemble._forest.ForestClassifier._validate_y_class_weight", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/_validate_y_class_weight/self", + "name": "self", + "qname": "sklearn.ensemble._forest.ForestClassifier._validate_y_class_weight.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/_validate_y_class_weight/y", + "name": "y", + "qname": "sklearn.ensemble._forest.ForestClassifier._validate_y_class_weight.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _validate_y_class_weight(self, y):\n check_classification_targets(y)\n\n y = np.copy(y)\n expanded_class_weight = None\n\n if self.class_weight is not None:\n y_original = np.copy(y)\n\n self.classes_ = []\n self.n_classes_ = []\n\n y_store_unique_indices = np.zeros(y.shape, dtype=int)\n for k in range(self.n_outputs_):\n classes_k, y_store_unique_indices[:, k] = \\\n np.unique(y[:, k], return_inverse=True)\n self.classes_.append(classes_k)\n self.n_classes_.append(classes_k.shape[0])\n y = y_store_unique_indices\n\n if self.class_weight is not None:\n valid_presets = ('balanced', 'balanced_subsample')\n if isinstance(self.class_weight, str):\n if self.class_weight not in valid_presets:\n raise ValueError('Valid presets for class_weight include '\n '\"balanced\" and \"balanced_subsample\".'\n 'Given \"%s\".'\n % self.class_weight)\n if self.warm_start:\n warn('class_weight presets \"balanced\" or '\n '\"balanced_subsample\" are '\n 'not recommended for warm_start if the fitted data '\n 'differs from the full dataset. In order to use '\n '\"balanced\" weights, use compute_class_weight '\n '(\"balanced\", classes, y). In place of y you can use '\n 'a large enough sample of the full training set '\n 'target to properly estimate the class frequency '\n 'distributions. Pass the resulting weights as the '\n 'class_weight parameter.')\n\n if (self.class_weight != 'balanced_subsample' or\n not self.bootstrap):\n if self.class_weight == \"balanced_subsample\":\n class_weight = \"balanced\"\n else:\n class_weight = self.class_weight\n expanded_class_weight = compute_sample_weight(class_weight,\n y_original)\n\n return y, expanded_class_weight" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/predict", + "name": "predict", + "qname": "sklearn.ensemble._forest.ForestClassifier.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/predict/self", + "name": "self", + "qname": "sklearn.ensemble._forest.ForestClassifier.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/predict/X", + "name": "X", + "qname": "sklearn.ensemble._forest.ForestClassifier.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, its dtype will be converted to\n``dtype=np.float32``. If a sparse matrix is provided, it will be\nconverted into a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class for X.\n\nThe predicted class of an input sample is a vote by the trees in\nthe forest, weighted by their probability estimates. That is,\nthe predicted class is the one with highest mean probability\nestimate across the trees.", + "docstring": "Predict class for X.\n\nThe predicted class of an input sample is a vote by the trees in\nthe forest, weighted by their probability estimates. That is,\nthe predicted class is the one with highest mean probability\nestimate across the trees.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\nReturns\n-------\ny : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n The predicted classes.", + "code": " def predict(self, X):\n \"\"\"\n Predict class for X.\n\n The predicted class of an input sample is a vote by the trees in\n the forest, weighted by their probability estimates. That is,\n the predicted class is the one with highest mean probability\n estimate across the trees.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\n Returns\n -------\n y : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n The predicted classes.\n \"\"\"\n proba = self.predict_proba(X)\n\n if self.n_outputs_ == 1:\n return self.classes_.take(np.argmax(proba, axis=1), axis=0)\n\n else:\n n_samples = proba[0].shape[0]\n # all dtypes should be the same, so just take the first\n class_type = self.classes_[0].dtype\n predictions = np.empty((n_samples, self.n_outputs_),\n dtype=class_type)\n\n for k in range(self.n_outputs_):\n predictions[:, k] = self.classes_[k].take(np.argmax(proba[k],\n axis=1),\n axis=0)\n\n return predictions" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/predict_log_proba", + "name": "predict_log_proba", + "qname": "sklearn.ensemble._forest.ForestClassifier.predict_log_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/predict_log_proba/self", + "name": "self", + "qname": "sklearn.ensemble._forest.ForestClassifier.predict_log_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/predict_log_proba/X", + "name": "X", + "qname": "sklearn.ensemble._forest.ForestClassifier.predict_log_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, its dtype will be converted to\n``dtype=np.float32``. If a sparse matrix is provided, it will be\nconverted into a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class log-probabilities for X.\n\nThe predicted class log-probabilities of an input sample is computed as\nthe log of the mean predicted class probabilities of the trees in the\nforest.", + "docstring": "Predict class log-probabilities for X.\n\nThe predicted class log-probabilities of an input sample is computed as\nthe log of the mean predicted class probabilities of the trees in the\nforest.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes), or a list of n_outputs\n such arrays if n_outputs > 1.\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.", + "code": " def predict_log_proba(self, X):\n \"\"\"\n Predict class log-probabilities for X.\n\n The predicted class log-probabilities of an input sample is computed as\n the log of the mean predicted class probabilities of the trees in the\n forest.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\n Returns\n -------\n p : ndarray of shape (n_samples, n_classes), or a list of n_outputs\n such arrays if n_outputs > 1.\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n \"\"\"\n proba = self.predict_proba(X)\n\n if self.n_outputs_ == 1:\n return np.log(proba)\n\n else:\n for k in range(self.n_outputs_):\n proba[k] = np.log(proba[k])\n\n return proba" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/predict_proba", + "name": "predict_proba", + "qname": "sklearn.ensemble._forest.ForestClassifier.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/predict_proba/self", + "name": "self", + "qname": "sklearn.ensemble._forest.ForestClassifier.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestClassifier/predict_proba/X", + "name": "X", + "qname": "sklearn.ensemble._forest.ForestClassifier.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, its dtype will be converted to\n``dtype=np.float32``. If a sparse matrix is provided, it will be\nconverted into a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class probabilities for X.\n\nThe predicted class probabilities of an input sample are computed as\nthe mean predicted class probabilities of the trees in the forest.\nThe class probability of a single tree is the fraction of samples of\nthe same class in a leaf.", + "docstring": "Predict class probabilities for X.\n\nThe predicted class probabilities of an input sample are computed as\nthe mean predicted class probabilities of the trees in the forest.\nThe class probability of a single tree is the fraction of samples of\nthe same class in a leaf.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes), or a list of n_outputs\n such arrays if n_outputs > 1.\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.", + "code": " def predict_proba(self, X):\n \"\"\"\n Predict class probabilities for X.\n\n The predicted class probabilities of an input sample are computed as\n the mean predicted class probabilities of the trees in the forest.\n The class probability of a single tree is the fraction of samples of\n the same class in a leaf.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\n Returns\n -------\n p : ndarray of shape (n_samples, n_classes), or a list of n_outputs\n such arrays if n_outputs > 1.\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n \"\"\"\n check_is_fitted(self)\n # Check data\n X = self._validate_X_predict(X)\n\n # Assign chunk of trees to jobs\n n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)\n\n # avoid storing the output of every estimator by summing them here\n all_proba = [np.zeros((X.shape[0], j), dtype=np.float64)\n for j in np.atleast_1d(self.n_classes_)]\n lock = threading.Lock()\n Parallel(n_jobs=n_jobs, verbose=self.verbose,\n **_joblib_parallel_args(require=\"sharedmem\"))(\n delayed(_accumulate_prediction)(e.predict_proba, X, all_proba,\n lock)\n for e in self.estimators_)\n\n for proba in all_proba:\n proba /= len(self.estimators_)\n\n if len(all_proba) == 1:\n return all_proba[0]\n else:\n return all_proba" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestRegressor/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._forest.ForestRegressor.__init__", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestRegressor/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._forest.ForestRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestRegressor/__init__/base_estimator", + "name": "base_estimator", + "qname": "sklearn.ensemble._forest.ForestRegressor.__init__.base_estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestRegressor/__init__/n_estimators", + "name": "n_estimators", + "qname": "sklearn.ensemble._forest.ForestRegressor.__init__.n_estimators", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestRegressor/__init__/estimator_params", + "name": "estimator_params", + "qname": "sklearn.ensemble._forest.ForestRegressor.__init__.estimator_params", + "default_value": "tuple()", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestRegressor/__init__/bootstrap", + "name": "bootstrap", + "qname": "sklearn.ensemble._forest.ForestRegressor.__init__.bootstrap", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestRegressor/__init__/oob_score", + "name": "oob_score", + "qname": "sklearn.ensemble._forest.ForestRegressor.__init__.oob_score", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestRegressor/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.ensemble._forest.ForestRegressor.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestRegressor/__init__/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._forest.ForestRegressor.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestRegressor/__init__/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._forest.ForestRegressor.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestRegressor/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.ensemble._forest.ForestRegressor.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestRegressor/__init__/max_samples", + "name": "max_samples", + "qname": "sklearn.ensemble._forest.ForestRegressor.__init__.max_samples", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base class for forest of trees-based regressors.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.", + "docstring": "", + "code": " @abstractmethod\n def __init__(self,\n base_estimator,\n n_estimators=100, *,\n estimator_params=tuple(),\n bootstrap=False,\n oob_score=False,\n n_jobs=None,\n random_state=None,\n verbose=0,\n warm_start=False,\n max_samples=None):\n super().__init__(\n base_estimator,\n n_estimators=n_estimators,\n estimator_params=estimator_params,\n bootstrap=bootstrap,\n oob_score=oob_score,\n n_jobs=n_jobs,\n random_state=random_state,\n verbose=verbose,\n warm_start=warm_start,\n max_samples=max_samples)" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestRegressor/_compute_partial_dependence_recursion", + "name": "_compute_partial_dependence_recursion", + "qname": "sklearn.ensemble._forest.ForestRegressor._compute_partial_dependence_recursion", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestRegressor/_compute_partial_dependence_recursion/self", + "name": "self", + "qname": "sklearn.ensemble._forest.ForestRegressor._compute_partial_dependence_recursion.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestRegressor/_compute_partial_dependence_recursion/grid", + "name": "grid", + "qname": "sklearn.ensemble._forest.ForestRegressor._compute_partial_dependence_recursion.grid", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_target_features)", + "default_value": "", + "description": "The grid points on which the partial dependence should be\nevaluated." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_target_features)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestRegressor/_compute_partial_dependence_recursion/target_features", + "name": "target_features", + "qname": "sklearn.ensemble._forest.ForestRegressor._compute_partial_dependence_recursion.target_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_target_features)", + "default_value": "", + "description": "The set of target features for which the partial dependence\nshould be evaluated." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_target_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fast partial dependence computation.", + "docstring": "Fast partial dependence computation.\n\nParameters\n----------\ngrid : ndarray of shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\ntarget_features : ndarray of shape (n_target_features)\n The set of target features for which the partial dependence\n should be evaluated.\n\nReturns\n-------\naveraged_predictions : ndarray of shape (n_samples,)\n The value of the partial dependence function on each grid point.", + "code": " def _compute_partial_dependence_recursion(self, grid, target_features):\n \"\"\"Fast partial dependence computation.\n\n Parameters\n ----------\n grid : ndarray of shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\n target_features : ndarray of shape (n_target_features)\n The set of target features for which the partial dependence\n should be evaluated.\n\n Returns\n -------\n averaged_predictions : ndarray of shape (n_samples,)\n The value of the partial dependence function on each grid point.\n \"\"\"\n grid = np.asarray(grid, dtype=DTYPE, order='C')\n averaged_predictions = np.zeros(shape=grid.shape[0],\n dtype=np.float64, order='C')\n\n for tree in self.estimators_:\n # Note: we don't sum in parallel because the GIL isn't released in\n # the fast method.\n tree.tree_.compute_partial_dependence(\n grid, target_features, averaged_predictions)\n # Average over the forest\n averaged_predictions /= len(self.estimators_)\n\n return averaged_predictions" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestRegressor/_set_oob_score", + "name": "_set_oob_score", + "qname": "sklearn.ensemble._forest.ForestRegressor._set_oob_score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestRegressor/_set_oob_score/self", + "name": "self", + "qname": "sklearn.ensemble._forest.ForestRegressor._set_oob_score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestRegressor/_set_oob_score/X", + "name": "X", + "qname": "sklearn.ensemble._forest.ForestRegressor._set_oob_score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestRegressor/_set_oob_score/y", + "name": "y", + "qname": "sklearn.ensemble._forest.ForestRegressor._set_oob_score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute out-of-bag scores.", + "docstring": "Compute out-of-bag scores.", + "code": " def _set_oob_score(self, X, y):\n \"\"\"\n Compute out-of-bag scores.\"\"\"\n X = check_array(X, dtype=DTYPE, accept_sparse='csr')\n\n n_samples = y.shape[0]\n\n predictions = np.zeros((n_samples, self.n_outputs_))\n n_predictions = np.zeros((n_samples, self.n_outputs_))\n\n n_samples_bootstrap = _get_n_samples_bootstrap(\n n_samples, self.max_samples\n )\n\n for estimator in self.estimators_:\n unsampled_indices = _generate_unsampled_indices(\n estimator.random_state, n_samples, n_samples_bootstrap)\n p_estimator = estimator.predict(\n X[unsampled_indices, :], check_input=False)\n\n if self.n_outputs_ == 1:\n p_estimator = p_estimator[:, np.newaxis]\n\n predictions[unsampled_indices, :] += p_estimator\n n_predictions[unsampled_indices, :] += 1\n\n if (n_predictions == 0).any():\n warn(\"Some inputs do not have OOB scores. \"\n \"This probably means too few trees were used \"\n \"to compute any reliable oob estimates.\")\n n_predictions[n_predictions == 0] = 1\n\n predictions /= n_predictions\n self.oob_prediction_ = predictions\n\n if self.n_outputs_ == 1:\n self.oob_prediction_ = \\\n self.oob_prediction_.reshape((n_samples, ))\n\n self.oob_score_ = 0.0\n\n for k in range(self.n_outputs_):\n self.oob_score_ += r2_score(y[:, k],\n predictions[:, k])\n\n self.oob_score_ /= self.n_outputs_" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestRegressor/predict", + "name": "predict", + "qname": "sklearn.ensemble._forest.ForestRegressor.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestRegressor/predict/self", + "name": "self", + "qname": "sklearn.ensemble._forest.ForestRegressor.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/ForestRegressor/predict/X", + "name": "X", + "qname": "sklearn.ensemble._forest.ForestRegressor.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, its dtype will be converted to\n``dtype=np.float32``. If a sparse matrix is provided, it will be\nconverted into a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict regression target for X.\n\nThe predicted regression target of an input sample is computed as the\nmean predicted regression targets of the trees in the forest.", + "docstring": "Predict regression target for X.\n\nThe predicted regression target of an input sample is computed as the\nmean predicted regression targets of the trees in the forest.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\nReturns\n-------\ny : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n The predicted values.", + "code": " def predict(self, X):\n \"\"\"\n Predict regression target for X.\n\n The predicted regression target of an input sample is computed as the\n mean predicted regression targets of the trees in the forest.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\n Returns\n -------\n y : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n The predicted values.\n \"\"\"\n check_is_fitted(self)\n # Check data\n X = self._validate_X_predict(X)\n\n # Assign chunk of trees to jobs\n n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)\n\n # avoid storing the output of every estimator by summing them here\n if self.n_outputs_ > 1:\n y_hat = np.zeros((X.shape[0], self.n_outputs_), dtype=np.float64)\n else:\n y_hat = np.zeros((X.shape[0]), dtype=np.float64)\n\n # Parallel loop\n lock = threading.Lock()\n Parallel(n_jobs=n_jobs, verbose=self.verbose,\n **_joblib_parallel_args(require=\"sharedmem\"))(\n delayed(_accumulate_prediction)(e.predict, X, [y_hat], lock)\n for e in self.estimators_)\n\n y_hat /= len(self.estimators_)\n\n return y_hat" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestClassifier/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._forest.RandomForestClassifier.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestClassifier/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._forest.RandomForestClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestClassifier/__init__/n_estimators", + "name": "n_estimators", + "qname": "sklearn.ensemble._forest.RandomForestClassifier.__init__.n_estimators", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The number of trees in the forest.\n\n.. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestClassifier/__init__/criterion", + "name": "criterion", + "qname": "sklearn.ensemble._forest.RandomForestClassifier.__init__.criterion", + "default_value": "'gini'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{\"gini\", \"entropy\"}", + "default_value": "\"gini\"", + "description": "The function to measure the quality of a split. Supported criteria are\n\"gini\" for the Gini impurity and \"entropy\" for the information gain.\nNote: this parameter is tree-specific." + }, + "type": { + "kind": "EnumType", + "values": ["entropy", "gini"] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestClassifier/__init__/max_depth", + "name": "max_depth", + "qname": "sklearn.ensemble._forest.RandomForestClassifier.__init__.max_depth", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The maximum depth of the tree. If None, then nodes are expanded until\nall leaves are pure or until all leaves contain less than\nmin_samples_split samples." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestClassifier/__init__/min_samples_split", + "name": "min_samples_split", + "qname": "sklearn.ensemble._forest.RandomForestClassifier.__init__.min_samples_split", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "2", + "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n.. versionchanged:: 0.18\n Added float values for fractions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestClassifier/__init__/min_samples_leaf", + "name": "min_samples_leaf", + "qname": "sklearn.ensemble._forest.RandomForestClassifier.__init__.min_samples_leaf", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "1", + "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches. This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n.. versionchanged:: 0.18\n Added float values for fractions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestClassifier/__init__/min_weight_fraction_leaf", + "name": "min_weight_fraction_leaf", + "qname": "sklearn.ensemble._forest.RandomForestClassifier.__init__.min_weight_fraction_leaf", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "The minimum weighted fraction of the sum total of weights (of all\nthe input samples) required to be at a leaf node. Samples have\nequal weight when sample_weight is not provided." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestClassifier/__init__/max_features", + "name": "max_features", + "qname": "sklearn.ensemble._forest.RandomForestClassifier.__init__.max_features", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{\"auto\", \"sqrt\", \"log2\"}, int or float", + "default_value": "\"auto\"", + "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n `round(max_features * n_features)` features are considered at each\n split.\n- If \"auto\", then `max_features=sqrt(n_features)`.\n- If \"sqrt\", then `max_features=sqrt(n_features)` (same as \"auto\").\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["auto", "log2", "sqrt"] + }, + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestClassifier/__init__/max_leaf_nodes", + "name": "max_leaf_nodes", + "qname": "sklearn.ensemble._forest.RandomForestClassifier.__init__.max_leaf_nodes", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Grow trees with ``max_leaf_nodes`` in best-first fashion.\nBest nodes are defined as relative reduction in impurity.\nIf None then unlimited number of leaf nodes." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestClassifier/__init__/min_impurity_decrease", + "name": "min_impurity_decrease", + "qname": "sklearn.ensemble._forest.RandomForestClassifier.__init__.min_impurity_decrease", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestClassifier/__init__/min_impurity_split", + "name": "min_impurity_split", + "qname": "sklearn.ensemble._forest.RandomForestClassifier.__init__.min_impurity_split", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Threshold for early stopping in tree growth. A node will split\nif its impurity is above the threshold, otherwise it is a leaf.\n\n.. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestClassifier/__init__/bootstrap", + "name": "bootstrap", + "qname": "sklearn.ensemble._forest.RandomForestClassifier.__init__.bootstrap", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether bootstrap samples are used when building trees. If False, the\nwhole dataset is used to build each tree." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestClassifier/__init__/oob_score", + "name": "oob_score", + "qname": "sklearn.ensemble._forest.RandomForestClassifier.__init__.oob_score", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to use out-of-bag samples to estimate the generalization score.\nOnly available if bootstrap=True." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestClassifier/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.ensemble._forest.RandomForestClassifier.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n:meth:`decision_path` and :meth:`apply` are all parallelized over the\ntrees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\ncontext. ``-1`` means using all processors. See :term:`Glossary\n` for more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestClassifier/__init__/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._forest.RandomForestClassifier.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls both the randomness of the bootstrapping of the samples used\nwhen building trees (if ``bootstrap=True``) and the sampling of the\nfeatures to consider when looking for the best split at each node\n(if ``max_features < n_features``).\nSee :term:`Glossary ` for details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestClassifier/__init__/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._forest.RandomForestClassifier.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Controls the verbosity when fitting and predicting." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestClassifier/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.ensemble._forest.RandomForestClassifier.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, reuse the solution of the previous call to fit\nand add more estimators to the ensemble, otherwise, just fit a whole\nnew forest. See :term:`the Glossary `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestClassifier/__init__/class_weight", + "name": "class_weight", + "qname": "sklearn.ensemble._forest.RandomForestClassifier.__init__.class_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{\"balanced\", \"balanced_subsample\"}, dict or list of dicts", + "default_value": "None", + "description": "Weights associated with classes in the form ``{class_label: weight}``.\nIf not given, all classes are supposed to have weight one. For\nmulti-output problems, a list of dicts can be provided in the same\norder as the columns of y.\n\nNote that for multioutput (including multilabel) weights should be\ndefined for each class of every column in its own dict. For example,\nfor four-class multilabel classification weights should be\n[{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n[{1:1}, {2:5}, {3:1}, {4:1}].\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``\n\nThe \"balanced_subsample\" mode is the same as \"balanced\" except that\nweights are computed based on the bootstrap sample for every tree\ngrown.\n\nFor multi-output, the weights of each column of y will be multiplied.\n\nNote that these weights will be multiplied with sample_weight (passed\nthrough the fit method) if sample_weight is specified." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["balanced", "balanced_subsample"] + }, + { + "kind": "NamedType", + "name": "dict" + }, + { + "kind": "NamedType", + "name": "list of dicts" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestClassifier/__init__/ccp_alpha", + "name": "ccp_alpha", + "qname": "sklearn.ensemble._forest.RandomForestClassifier.__init__.ccp_alpha", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "non-negative float", + "default_value": "0.0", + "description": "Complexity parameter used for Minimal Cost-Complexity Pruning. The\nsubtree with the largest cost complexity that is smaller than\n``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n:ref:`minimal_cost_complexity_pruning` for details.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "non-negative float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestClassifier/__init__/max_samples", + "name": "max_samples", + "qname": "sklearn.ensemble._forest.RandomForestClassifier.__init__.max_samples", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "None", + "description": "If bootstrap is True, the number of samples to draw from X\nto train each base estimator.\n\n- If None (default), then draw `X.shape[0]` samples.\n- If int, then draw `max_samples` samples.\n- If float, then draw `max_samples * X.shape[0]` samples. Thus,\n `max_samples` should be in the interval `(0, 1)`.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "BoundaryType", + "base_type": "float", + "min": 0.0, + "max": 1.0, + "min_inclusive": false, + "max_inclusive": false + }, + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "A random forest classifier.\n\nA random forest is a meta estimator that fits a number of decision tree\nclassifiers on various sub-samples of the dataset and uses averaging to\nimprove the predictive accuracy and control over-fitting.\nThe sub-sample size is controlled with the `max_samples` parameter if\n`bootstrap=True` (default), otherwise the whole dataset is used to build\neach tree.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self,\n n_estimators=100, *,\n criterion=\"gini\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.,\n min_impurity_split=None,\n bootstrap=True,\n oob_score=False,\n n_jobs=None,\n random_state=None,\n verbose=0,\n warm_start=False,\n class_weight=None,\n ccp_alpha=0.0,\n max_samples=None):\n super().__init__(\n base_estimator=DecisionTreeClassifier(),\n n_estimators=n_estimators,\n estimator_params=(\"criterion\", \"max_depth\", \"min_samples_split\",\n \"min_samples_leaf\", \"min_weight_fraction_leaf\",\n \"max_features\", \"max_leaf_nodes\",\n \"min_impurity_decrease\", \"min_impurity_split\",\n \"random_state\", \"ccp_alpha\"),\n bootstrap=bootstrap,\n oob_score=oob_score,\n n_jobs=n_jobs,\n random_state=random_state,\n verbose=verbose,\n warm_start=warm_start,\n class_weight=class_weight,\n max_samples=max_samples)\n\n self.criterion = criterion\n self.max_depth = max_depth\n self.min_samples_split = min_samples_split\n self.min_samples_leaf = min_samples_leaf\n self.min_weight_fraction_leaf = min_weight_fraction_leaf\n self.max_features = max_features\n self.max_leaf_nodes = max_leaf_nodes\n self.min_impurity_decrease = min_impurity_decrease\n self.min_impurity_split = min_impurity_split\n self.ccp_alpha = ccp_alpha" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestRegressor/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._forest.RandomForestRegressor.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestRegressor/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._forest.RandomForestRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestRegressor/__init__/n_estimators", + "name": "n_estimators", + "qname": "sklearn.ensemble._forest.RandomForestRegressor.__init__.n_estimators", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The number of trees in the forest.\n\n.. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestRegressor/__init__/criterion", + "name": "criterion", + "qname": "sklearn.ensemble._forest.RandomForestRegressor.__init__.criterion", + "default_value": "'mse'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{\"mse\", \"mae\"}", + "default_value": "\"mse\"", + "description": "The function to measure the quality of a split. Supported criteria\nare \"mse\" for the mean squared error, which is equal to variance\nreduction as feature selection criterion, and \"mae\" for the mean\nabsolute error.\n\n.. versionadded:: 0.18\n Mean Absolute Error (MAE) criterion." + }, + "type": { + "kind": "EnumType", + "values": ["mse", "mae"] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestRegressor/__init__/max_depth", + "name": "max_depth", + "qname": "sklearn.ensemble._forest.RandomForestRegressor.__init__.max_depth", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The maximum depth of the tree. If None, then nodes are expanded until\nall leaves are pure or until all leaves contain less than\nmin_samples_split samples." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestRegressor/__init__/min_samples_split", + "name": "min_samples_split", + "qname": "sklearn.ensemble._forest.RandomForestRegressor.__init__.min_samples_split", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "2", + "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n.. versionchanged:: 0.18\n Added float values for fractions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestRegressor/__init__/min_samples_leaf", + "name": "min_samples_leaf", + "qname": "sklearn.ensemble._forest.RandomForestRegressor.__init__.min_samples_leaf", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "1", + "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches. This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n.. versionchanged:: 0.18\n Added float values for fractions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestRegressor/__init__/min_weight_fraction_leaf", + "name": "min_weight_fraction_leaf", + "qname": "sklearn.ensemble._forest.RandomForestRegressor.__init__.min_weight_fraction_leaf", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "The minimum weighted fraction of the sum total of weights (of all\nthe input samples) required to be at a leaf node. Samples have\nequal weight when sample_weight is not provided." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestRegressor/__init__/max_features", + "name": "max_features", + "qname": "sklearn.ensemble._forest.RandomForestRegressor.__init__.max_features", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{\"auto\", \"sqrt\", \"log2\"}, int or float", + "default_value": "\"auto\"", + "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n `round(max_features * n_features)` features are considered at each\n split.\n- If \"auto\", then `max_features=n_features`.\n- If \"sqrt\", then `max_features=sqrt(n_features)`.\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["auto", "log2", "sqrt"] + }, + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestRegressor/__init__/max_leaf_nodes", + "name": "max_leaf_nodes", + "qname": "sklearn.ensemble._forest.RandomForestRegressor.__init__.max_leaf_nodes", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Grow trees with ``max_leaf_nodes`` in best-first fashion.\nBest nodes are defined as relative reduction in impurity.\nIf None then unlimited number of leaf nodes." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestRegressor/__init__/min_impurity_decrease", + "name": "min_impurity_decrease", + "qname": "sklearn.ensemble._forest.RandomForestRegressor.__init__.min_impurity_decrease", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestRegressor/__init__/min_impurity_split", + "name": "min_impurity_split", + "qname": "sklearn.ensemble._forest.RandomForestRegressor.__init__.min_impurity_split", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Threshold for early stopping in tree growth. A node will split\nif its impurity is above the threshold, otherwise it is a leaf.\n\n.. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestRegressor/__init__/bootstrap", + "name": "bootstrap", + "qname": "sklearn.ensemble._forest.RandomForestRegressor.__init__.bootstrap", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether bootstrap samples are used when building trees. If False, the\nwhole dataset is used to build each tree." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestRegressor/__init__/oob_score", + "name": "oob_score", + "qname": "sklearn.ensemble._forest.RandomForestRegressor.__init__.oob_score", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to use out-of-bag samples to estimate the generalization score.\nOnly available if bootstrap=True." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestRegressor/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.ensemble._forest.RandomForestRegressor.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n:meth:`decision_path` and :meth:`apply` are all parallelized over the\ntrees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\ncontext. ``-1`` means using all processors. See :term:`Glossary\n` for more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestRegressor/__init__/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._forest.RandomForestRegressor.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls both the randomness of the bootstrapping of the samples used\nwhen building trees (if ``bootstrap=True``) and the sampling of the\nfeatures to consider when looking for the best split at each node\n(if ``max_features < n_features``).\nSee :term:`Glossary ` for details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestRegressor/__init__/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._forest.RandomForestRegressor.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Controls the verbosity when fitting and predicting." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestRegressor/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.ensemble._forest.RandomForestRegressor.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, reuse the solution of the previous call to fit\nand add more estimators to the ensemble, otherwise, just fit a whole\nnew forest. See :term:`the Glossary `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestRegressor/__init__/ccp_alpha", + "name": "ccp_alpha", + "qname": "sklearn.ensemble._forest.RandomForestRegressor.__init__.ccp_alpha", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "non-negative float", + "default_value": "0.0", + "description": "Complexity parameter used for Minimal Cost-Complexity Pruning. The\nsubtree with the largest cost complexity that is smaller than\n``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n:ref:`minimal_cost_complexity_pruning` for details.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "non-negative float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomForestRegressor/__init__/max_samples", + "name": "max_samples", + "qname": "sklearn.ensemble._forest.RandomForestRegressor.__init__.max_samples", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "None", + "description": "If bootstrap is True, the number of samples to draw from X\nto train each base estimator.\n\n- If None (default), then draw `X.shape[0]` samples.\n- If int, then draw `max_samples` samples.\n- If float, then draw `max_samples * X.shape[0]` samples. Thus,\n `max_samples` should be in the interval `(0, 1)`.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "BoundaryType", + "base_type": "float", + "min": 0.0, + "max": 1.0, + "min_inclusive": false, + "max_inclusive": false + }, + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "A random forest regressor.\n\nA random forest is a meta estimator that fits a number of classifying\ndecision trees on various sub-samples of the dataset and uses averaging\nto improve the predictive accuracy and control over-fitting.\nThe sub-sample size is controlled with the `max_samples` parameter if\n`bootstrap=True` (default), otherwise the whole dataset is used to build\neach tree.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self,\n n_estimators=100, *,\n criterion=\"mse\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.,\n max_features=\"auto\",\n max_leaf_nodes=None,\n min_impurity_decrease=0.,\n min_impurity_split=None,\n bootstrap=True,\n oob_score=False,\n n_jobs=None,\n random_state=None,\n verbose=0,\n warm_start=False,\n ccp_alpha=0.0,\n max_samples=None):\n super().__init__(\n base_estimator=DecisionTreeRegressor(),\n n_estimators=n_estimators,\n estimator_params=(\"criterion\", \"max_depth\", \"min_samples_split\",\n \"min_samples_leaf\", \"min_weight_fraction_leaf\",\n \"max_features\", \"max_leaf_nodes\",\n \"min_impurity_decrease\", \"min_impurity_split\",\n \"random_state\", \"ccp_alpha\"),\n bootstrap=bootstrap,\n oob_score=oob_score,\n n_jobs=n_jobs,\n random_state=random_state,\n verbose=verbose,\n warm_start=warm_start,\n max_samples=max_samples)\n\n self.criterion = criterion\n self.max_depth = max_depth\n self.min_samples_split = min_samples_split\n self.min_samples_leaf = min_samples_leaf\n self.min_weight_fraction_leaf = min_weight_fraction_leaf\n self.max_features = max_features\n self.max_leaf_nodes = max_leaf_nodes\n self.min_impurity_decrease = min_impurity_decrease\n self.min_impurity_split = min_impurity_split\n self.ccp_alpha = ccp_alpha" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/__init__/n_estimators", + "name": "n_estimators", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.__init__.n_estimators", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Number of trees in the forest.\n\n.. versionchanged:: 0.22\n The default value of ``n_estimators`` changed from 10 to 100\n in 0.22." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/__init__/max_depth", + "name": "max_depth", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.__init__.max_depth", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "The maximum depth of each tree. If None, then nodes are expanded until\nall leaves are pure or until all leaves contain less than\nmin_samples_split samples." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/__init__/min_samples_split", + "name": "min_samples_split", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.__init__.min_samples_split", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "2", + "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` is the minimum\n number of samples for each split.\n\n.. versionchanged:: 0.18\n Added float values for fractions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/__init__/min_samples_leaf", + "name": "min_samples_leaf", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.__init__.min_samples_leaf", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "1", + "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches. This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` is the minimum\n number of samples for each node.\n\n.. versionchanged:: 0.18\n Added float values for fractions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/__init__/min_weight_fraction_leaf", + "name": "min_weight_fraction_leaf", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.__init__.min_weight_fraction_leaf", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "The minimum weighted fraction of the sum total of weights (of all\nthe input samples) required to be at a leaf node. Samples have\nequal weight when sample_weight is not provided." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/__init__/max_leaf_nodes", + "name": "max_leaf_nodes", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.__init__.max_leaf_nodes", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Grow trees with ``max_leaf_nodes`` in best-first fashion.\nBest nodes are defined as relative reduction in impurity.\nIf None then unlimited number of leaf nodes." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/__init__/min_impurity_decrease", + "name": "min_impurity_decrease", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.__init__.min_impurity_decrease", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/__init__/min_impurity_split", + "name": "min_impurity_split", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.__init__.min_impurity_split", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Threshold for early stopping in tree growth. A node will split\nif its impurity is above the threshold, otherwise it is a leaf.\n\n.. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/__init__/sparse_output", + "name": "sparse_output", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.__init__.sparse_output", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether or not to return a sparse CSR matrix, as default behavior,\nor to return a dense array compatible with dense pipeline operators." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to run in parallel. :meth:`fit`, :meth:`transform`,\n:meth:`decision_path` and :meth:`apply` are all parallelized over the\ntrees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\ncontext. ``-1`` means using all processors. See :term:`Glossary\n` for more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/__init__/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the generation of the random `y` used to fit the trees\nand the draw of the splits for each feature at the trees' nodes.\nSee :term:`Glossary ` for details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/__init__/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Controls the verbosity when fitting and predicting." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, reuse the solution of the previous call to fit\nand add more estimators to the ensemble, otherwise, just fit a whole\nnew forest. See :term:`the Glossary `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "An ensemble of totally random trees.\n\nAn unsupervised transformation of a dataset to a high-dimensional\nsparse representation. A datapoint is coded according to which leaf of\neach tree it is sorted into. Using a one-hot encoding of the leaves,\nthis leads to a binary coding with as many ones as there are trees in\nthe forest.\n\nThe dimensionality of the resulting representation is\n``n_out <= n_estimators * max_leaf_nodes``. If ``max_leaf_nodes == None``,\nthe number of leaf nodes is at most ``n_estimators * 2 ** max_depth``.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self,\n n_estimators=100, *,\n max_depth=5,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.,\n max_leaf_nodes=None,\n min_impurity_decrease=0.,\n min_impurity_split=None,\n sparse_output=True,\n n_jobs=None,\n random_state=None,\n verbose=0,\n warm_start=False):\n super().__init__(\n base_estimator=ExtraTreeRegressor(),\n n_estimators=n_estimators,\n estimator_params=(\"criterion\", \"max_depth\", \"min_samples_split\",\n \"min_samples_leaf\", \"min_weight_fraction_leaf\",\n \"max_features\", \"max_leaf_nodes\",\n \"min_impurity_decrease\", \"min_impurity_split\",\n \"random_state\"),\n bootstrap=False,\n oob_score=False,\n n_jobs=n_jobs,\n random_state=random_state,\n verbose=verbose,\n warm_start=warm_start,\n max_samples=None)\n\n self.max_depth = max_depth\n self.min_samples_split = min_samples_split\n self.min_samples_leaf = min_samples_leaf\n self.min_weight_fraction_leaf = min_weight_fraction_leaf\n self.max_leaf_nodes = max_leaf_nodes\n self.min_impurity_decrease = min_impurity_decrease\n self.min_impurity_split = min_impurity_split\n self.sparse_output = sparse_output" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/_set_oob_score", + "name": "_set_oob_score", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding._set_oob_score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/_set_oob_score/self", + "name": "self", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding._set_oob_score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/_set_oob_score/X", + "name": "X", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding._set_oob_score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/_set_oob_score/y", + "name": "y", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding._set_oob_score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _set_oob_score(self, X, y):\n raise NotImplementedError(\"OOB score not supported by tree embedding\")" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/fit", + "name": "fit", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/fit/self", + "name": "self", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/fit/X", + "name": "X", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Use ``dtype=np.float32`` for maximum\nefficiency. Sparse matrices are also supported, use sparse\n``csc_matrix`` for maximum efficiency." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/fit/y", + "name": "y", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, then samples are equally weighted. Splits\nthat would create child nodes with net zero or negative weight are\nignored while searching for a split in each node. In the case of\nclassification, splits are also ignored if they would result in any\nsingle class carrying a negative weight in either child node." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit estimator.", + "docstring": "Fit estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Use ``dtype=np.float32`` for maximum\n efficiency. Sparse matrices are also supported, use sparse\n ``csc_matrix`` for maximum efficiency.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. In the case of\n classification, splits are also ignored if they would result in any\n single class carrying a negative weight in either child node.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y=None, sample_weight=None):\n \"\"\"\n Fit estimator.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Use ``dtype=np.float32`` for maximum\n efficiency. Sparse matrices are also supported, use sparse\n ``csc_matrix`` for maximum efficiency.\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. In the case of\n classification, splits are also ignored if they would result in any\n single class carrying a negative weight in either child node.\n\n Returns\n -------\n self : object\n\n \"\"\"\n self.fit_transform(X, y, sample_weight=sample_weight)\n return self" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/fit_transform", + "name": "fit_transform", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/fit_transform/self", + "name": "self", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/fit_transform/X", + "name": "X", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data used to build forests. Use ``dtype=np.float32`` for\nmaximum efficiency." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/fit_transform/y", + "name": "y", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/fit_transform/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.fit_transform.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, then samples are equally weighted. Splits\nthat would create child nodes with net zero or negative weight are\nignored while searching for a split in each node. In the case of\nclassification, splits are also ignored if they would result in any\nsingle class carrying a negative weight in either child node." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit estimator and transform dataset.", + "docstring": "Fit estimator and transform dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data used to build forests. Use ``dtype=np.float32`` for\n maximum efficiency.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. In the case of\n classification, splits are also ignored if they would result in any\n single class carrying a negative weight in either child node.\n\nReturns\n-------\nX_transformed : sparse matrix of shape (n_samples, n_out)\n Transformed dataset.", + "code": " def fit_transform(self, X, y=None, sample_weight=None):\n \"\"\"\n Fit estimator and transform dataset.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data used to build forests. Use ``dtype=np.float32`` for\n maximum efficiency.\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. In the case of\n classification, splits are also ignored if they would result in any\n single class carrying a negative weight in either child node.\n\n Returns\n -------\n X_transformed : sparse matrix of shape (n_samples, n_out)\n Transformed dataset.\n \"\"\"\n X = check_array(X, accept_sparse=['csc'])\n if issparse(X):\n # Pre-sort indices to avoid that each individual tree of the\n # ensemble sorts the indices.\n X.sort_indices()\n\n rnd = check_random_state(self.random_state)\n y = rnd.uniform(size=X.shape[0])\n super().fit(X, y, sample_weight=sample_weight)\n\n self.one_hot_encoder_ = OneHotEncoder(sparse=self.sparse_output)\n return self.one_hot_encoder_.fit_transform(self.apply(X))" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/transform", + "name": "transform", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/transform/self", + "name": "self", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/RandomTreesEmbedding/transform/X", + "name": "X", + "qname": "sklearn.ensemble._forest.RandomTreesEmbedding.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data to be transformed. Use ``dtype=np.float32`` for maximum\nefficiency. Sparse matrices are also supported, use sparse\n``csr_matrix`` for maximum efficiency." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform dataset.", + "docstring": "Transform dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data to be transformed. Use ``dtype=np.float32`` for maximum\n efficiency. Sparse matrices are also supported, use sparse\n ``csr_matrix`` for maximum efficiency.\n\nReturns\n-------\nX_transformed : sparse matrix of shape (n_samples, n_out)\n Transformed dataset.", + "code": " def transform(self, X):\n \"\"\"\n Transform dataset.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data to be transformed. Use ``dtype=np.float32`` for maximum\n efficiency. Sparse matrices are also supported, use sparse\n ``csr_matrix`` for maximum efficiency.\n\n Returns\n -------\n X_transformed : sparse matrix of shape (n_samples, n_out)\n Transformed dataset.\n \"\"\"\n check_is_fitted(self)\n return self.one_hot_encoder_.transform(self.apply(X))" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/_accumulate_prediction", + "name": "_accumulate_prediction", + "qname": "sklearn.ensemble._forest._accumulate_prediction", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/_accumulate_prediction/predict", + "name": "predict", + "qname": "sklearn.ensemble._forest._accumulate_prediction.predict", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/_accumulate_prediction/X", + "name": "X", + "qname": "sklearn.ensemble._forest._accumulate_prediction.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/_accumulate_prediction/out", + "name": "out", + "qname": "sklearn.ensemble._forest._accumulate_prediction.out", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/_accumulate_prediction/lock", + "name": "lock", + "qname": "sklearn.ensemble._forest._accumulate_prediction.lock", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "This is a utility function for joblib's Parallel.\n\nIt can't go locally in ForestClassifier or ForestRegressor, because joblib\ncomplains that it cannot pickle it when placed there.", + "docstring": "This is a utility function for joblib's Parallel.\n\nIt can't go locally in ForestClassifier or ForestRegressor, because joblib\ncomplains that it cannot pickle it when placed there.", + "code": "def _accumulate_prediction(predict, X, out, lock):\n \"\"\"\n This is a utility function for joblib's Parallel.\n\n It can't go locally in ForestClassifier or ForestRegressor, because joblib\n complains that it cannot pickle it when placed there.\n \"\"\"\n prediction = predict(X, check_input=False)\n with lock:\n if len(out) == 1:\n out[0] += prediction\n else:\n for i in range(len(out)):\n out[i] += prediction[i]" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/_generate_sample_indices", + "name": "_generate_sample_indices", + "qname": "sklearn.ensemble._forest._generate_sample_indices", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/_generate_sample_indices/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._forest._generate_sample_indices.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/_generate_sample_indices/n_samples", + "name": "n_samples", + "qname": "sklearn.ensemble._forest._generate_sample_indices.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/_generate_sample_indices/n_samples_bootstrap", + "name": "n_samples_bootstrap", + "qname": "sklearn.ensemble._forest._generate_sample_indices.n_samples_bootstrap", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Private function used to _parallel_build_trees function.", + "docstring": "Private function used to _parallel_build_trees function.", + "code": "def _generate_sample_indices(random_state, n_samples, n_samples_bootstrap):\n \"\"\"\n Private function used to _parallel_build_trees function.\"\"\"\n\n random_instance = check_random_state(random_state)\n sample_indices = random_instance.randint(0, n_samples, n_samples_bootstrap)\n\n return sample_indices" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/_generate_unsampled_indices", + "name": "_generate_unsampled_indices", + "qname": "sklearn.ensemble._forest._generate_unsampled_indices", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/_generate_unsampled_indices/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._forest._generate_unsampled_indices.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/_generate_unsampled_indices/n_samples", + "name": "n_samples", + "qname": "sklearn.ensemble._forest._generate_unsampled_indices.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/_generate_unsampled_indices/n_samples_bootstrap", + "name": "n_samples_bootstrap", + "qname": "sklearn.ensemble._forest._generate_unsampled_indices.n_samples_bootstrap", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Private function used to forest._set_oob_score function.", + "docstring": "Private function used to forest._set_oob_score function.", + "code": "def _generate_unsampled_indices(random_state, n_samples, n_samples_bootstrap):\n \"\"\"\n Private function used to forest._set_oob_score function.\"\"\"\n sample_indices = _generate_sample_indices(random_state, n_samples,\n n_samples_bootstrap)\n sample_counts = np.bincount(sample_indices, minlength=n_samples)\n unsampled_mask = sample_counts == 0\n indices_range = np.arange(n_samples)\n unsampled_indices = indices_range[unsampled_mask]\n\n return unsampled_indices" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/_get_n_samples_bootstrap", + "name": "_get_n_samples_bootstrap", + "qname": "sklearn.ensemble._forest._get_n_samples_bootstrap", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/_get_n_samples_bootstrap/n_samples", + "name": "n_samples", + "qname": "sklearn.ensemble._forest._get_n_samples_bootstrap.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of samples in the dataset." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/_get_n_samples_bootstrap/max_samples", + "name": "max_samples", + "qname": "sklearn.ensemble._forest._get_n_samples_bootstrap.max_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "", + "description": "The maximum number of samples to draw from the total available:\n - if float, this indicates a fraction of the total and should be\n the interval `(0, 1)`;\n - if int, this indicates the exact number of samples;\n - if None, this indicates the total number of samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Get the number of samples in a bootstrap sample.", + "docstring": "Get the number of samples in a bootstrap sample.\n\nParameters\n----------\nn_samples : int\n Number of samples in the dataset.\nmax_samples : int or float\n The maximum number of samples to draw from the total available:\n - if float, this indicates a fraction of the total and should be\n the interval `(0, 1)`;\n - if int, this indicates the exact number of samples;\n - if None, this indicates the total number of samples.\n\nReturns\n-------\nn_samples_bootstrap : int\n The total number of samples to draw for the bootstrap sample.", + "code": "def _get_n_samples_bootstrap(n_samples, max_samples):\n \"\"\"\n Get the number of samples in a bootstrap sample.\n\n Parameters\n ----------\n n_samples : int\n Number of samples in the dataset.\n max_samples : int or float\n The maximum number of samples to draw from the total available:\n - if float, this indicates a fraction of the total and should be\n the interval `(0, 1)`;\n - if int, this indicates the exact number of samples;\n - if None, this indicates the total number of samples.\n\n Returns\n -------\n n_samples_bootstrap : int\n The total number of samples to draw for the bootstrap sample.\n \"\"\"\n if max_samples is None:\n return n_samples\n\n if isinstance(max_samples, numbers.Integral):\n if not (1 <= max_samples <= n_samples):\n msg = \"`max_samples` must be in range 1 to {} but got value {}\"\n raise ValueError(msg.format(n_samples, max_samples))\n return max_samples\n\n if isinstance(max_samples, numbers.Real):\n if not (0 < max_samples < 1):\n msg = \"`max_samples` must be in range (0, 1) but got value {}\"\n raise ValueError(msg.format(max_samples))\n return round(n_samples * max_samples)\n\n msg = \"`max_samples` should be int or float, but got type '{}'\"\n raise TypeError(msg.format(type(max_samples)))" + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/_parallel_build_trees", + "name": "_parallel_build_trees", + "qname": "sklearn.ensemble._forest._parallel_build_trees", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._forest/_parallel_build_trees/tree", + "name": "tree", + "qname": "sklearn.ensemble._forest._parallel_build_trees.tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/_parallel_build_trees/forest", + "name": "forest", + "qname": "sklearn.ensemble._forest._parallel_build_trees.forest", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/_parallel_build_trees/X", + "name": "X", + "qname": "sklearn.ensemble._forest._parallel_build_trees.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/_parallel_build_trees/y", + "name": "y", + "qname": "sklearn.ensemble._forest._parallel_build_trees.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/_parallel_build_trees/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._forest._parallel_build_trees.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/_parallel_build_trees/tree_idx", + "name": "tree_idx", + "qname": "sklearn.ensemble._forest._parallel_build_trees.tree_idx", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/_parallel_build_trees/n_trees", + "name": "n_trees", + "qname": "sklearn.ensemble._forest._parallel_build_trees.n_trees", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/_parallel_build_trees/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._forest._parallel_build_trees.verbose", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/_parallel_build_trees/class_weight", + "name": "class_weight", + "qname": "sklearn.ensemble._forest._parallel_build_trees.class_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._forest/_parallel_build_trees/n_samples_bootstrap", + "name": "n_samples_bootstrap", + "qname": "sklearn.ensemble._forest._parallel_build_trees.n_samples_bootstrap", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Private function used to fit a single tree in parallel.", + "docstring": "Private function used to fit a single tree in parallel.", + "code": "def _parallel_build_trees(tree, forest, X, y, sample_weight, tree_idx, n_trees,\n verbose=0, class_weight=None,\n n_samples_bootstrap=None):\n \"\"\"\n Private function used to fit a single tree in parallel.\"\"\"\n if verbose > 1:\n print(\"building tree %d of %d\" % (tree_idx + 1, n_trees))\n\n if forest.bootstrap:\n n_samples = X.shape[0]\n if sample_weight is None:\n curr_sample_weight = np.ones((n_samples,), dtype=np.float64)\n else:\n curr_sample_weight = sample_weight.copy()\n\n indices = _generate_sample_indices(tree.random_state, n_samples,\n n_samples_bootstrap)\n sample_counts = np.bincount(indices, minlength=n_samples)\n curr_sample_weight *= sample_counts\n\n if class_weight == 'subsample':\n with catch_warnings():\n simplefilter('ignore', DeprecationWarning)\n curr_sample_weight *= compute_sample_weight('auto', y,\n indices=indices)\n elif class_weight == 'balanced_subsample':\n curr_sample_weight *= compute_sample_weight('balanced', y,\n indices=indices)\n\n tree.fit(X, y, sample_weight=curr_sample_weight, check_input=False)\n else:\n tree.fit(X, y, sample_weight=sample_weight, check_input=False)\n\n return tree" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.__init__", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__/loss", + "name": "loss", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.__init__.loss", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__/learning_rate", + "name": "learning_rate", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.__init__.learning_rate", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__/n_estimators", + "name": "n_estimators", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.__init__.n_estimators", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__/criterion", + "name": "criterion", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.__init__.criterion", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__/min_samples_split", + "name": "min_samples_split", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.__init__.min_samples_split", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__/min_samples_leaf", + "name": "min_samples_leaf", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.__init__.min_samples_leaf", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__/min_weight_fraction_leaf", + "name": "min_weight_fraction_leaf", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.__init__.min_weight_fraction_leaf", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__/max_depth", + "name": "max_depth", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.__init__.max_depth", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__/min_impurity_decrease", + "name": "min_impurity_decrease", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.__init__.min_impurity_decrease", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__/min_impurity_split", + "name": "min_impurity_split", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.__init__.min_impurity_split", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__/init", + "name": "init", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.__init__.init", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__/subsample", + "name": "subsample", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.__init__.subsample", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__/max_features", + "name": "max_features", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.__init__.max_features", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__/ccp_alpha", + "name": "ccp_alpha", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.__init__.ccp_alpha", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.__init__.random_state", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__/alpha", + "name": "alpha", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.__init__.alpha", + "default_value": "0.9", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__/max_leaf_nodes", + "name": "max_leaf_nodes", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.__init__.max_leaf_nodes", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__/validation_fraction", + "name": "validation_fraction", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.__init__.validation_fraction", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__/n_iter_no_change", + "name": "n_iter_no_change", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.__init__.n_iter_no_change", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/__init__/tol", + "name": "tol", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Abstract base class for Gradient Boosting.", + "docstring": "", + "code": " @abstractmethod\n def __init__(self, *, loss, learning_rate, n_estimators, criterion,\n min_samples_split, min_samples_leaf, min_weight_fraction_leaf,\n max_depth, min_impurity_decrease, min_impurity_split,\n init, subsample, max_features, ccp_alpha,\n random_state, alpha=0.9, verbose=0, max_leaf_nodes=None,\n warm_start=False, validation_fraction=0.1,\n n_iter_no_change=None, tol=1e-4):\n\n self.n_estimators = n_estimators\n self.learning_rate = learning_rate\n self.loss = loss\n self.criterion = criterion\n self.min_samples_split = min_samples_split\n self.min_samples_leaf = min_samples_leaf\n self.min_weight_fraction_leaf = min_weight_fraction_leaf\n self.subsample = subsample\n self.max_features = max_features\n self.max_depth = max_depth\n self.min_impurity_decrease = min_impurity_decrease\n self.min_impurity_split = min_impurity_split\n self.ccp_alpha = ccp_alpha\n self.init = init\n self.random_state = random_state\n self.alpha = alpha\n self.verbose = verbose\n self.max_leaf_nodes = max_leaf_nodes\n self.warm_start = warm_start\n self.validation_fraction = validation_fraction\n self.n_iter_no_change = n_iter_no_change\n self.tol = tol" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_check_initialized", + "name": "_check_initialized", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._check_initialized", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_check_initialized/self", + "name": "self", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._check_initialized.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check that the estimator is initialized, raising an error if not.", + "docstring": "Check that the estimator is initialized, raising an error if not.", + "code": " def _check_initialized(self):\n \"\"\"Check that the estimator is initialized, raising an error if not.\"\"\"\n check_is_fitted(self)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_check_params", + "name": "_check_params", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._check_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_check_params/self", + "name": "self", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._check_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check validity of parameters and raise ValueError if not valid.", + "docstring": "Check validity of parameters and raise ValueError if not valid.", + "code": " def _check_params(self):\n \"\"\"Check validity of parameters and raise ValueError if not valid.\"\"\"\n if self.n_estimators <= 0:\n raise ValueError(\"n_estimators must be greater than 0 but \"\n \"was %r\" % self.n_estimators)\n\n if self.learning_rate <= 0.0:\n raise ValueError(\"learning_rate must be greater than 0 but \"\n \"was %r\" % self.learning_rate)\n\n if (self.loss not in self._SUPPORTED_LOSS\n or self.loss not in _gb_losses.LOSS_FUNCTIONS):\n raise ValueError(\"Loss '{0:s}' not supported. \".format(self.loss))\n\n if self.loss == 'deviance':\n loss_class = (_gb_losses.MultinomialDeviance\n if len(self.classes_) > 2\n else _gb_losses.BinomialDeviance)\n else:\n loss_class = _gb_losses.LOSS_FUNCTIONS[self.loss]\n\n if is_classifier(self):\n self.loss_ = loss_class(self.n_classes_)\n elif self.loss in (\"huber\", \"quantile\"):\n self.loss_ = loss_class(self.alpha)\n else:\n self.loss_ = loss_class()\n\n if not (0.0 < self.subsample <= 1.0):\n raise ValueError(\"subsample must be in (0,1] but \"\n \"was %r\" % self.subsample)\n\n if self.init is not None:\n # init must be an estimator or 'zero'\n if isinstance(self.init, BaseEstimator):\n self.loss_.check_init_estimator(self.init)\n elif not (isinstance(self.init, str) and self.init == 'zero'):\n raise ValueError(\n \"The init parameter must be an estimator or 'zero'. \"\n \"Got init={}\".format(self.init)\n )\n\n if not (0.0 < self.alpha < 1.0):\n raise ValueError(\"alpha must be in (0.0, 1.0) but \"\n \"was %r\" % self.alpha)\n\n if isinstance(self.max_features, str):\n if self.max_features == \"auto\":\n if is_classifier(self):\n max_features = max(1, int(np.sqrt(self.n_features_)))\n else:\n max_features = self.n_features_\n elif self.max_features == \"sqrt\":\n max_features = max(1, int(np.sqrt(self.n_features_)))\n elif self.max_features == \"log2\":\n max_features = max(1, int(np.log2(self.n_features_)))\n else:\n raise ValueError(\"Invalid value for max_features: %r. \"\n \"Allowed string values are 'auto', 'sqrt' \"\n \"or 'log2'.\" % self.max_features)\n elif self.max_features is None:\n max_features = self.n_features_\n elif isinstance(self.max_features, numbers.Integral):\n max_features = self.max_features\n else: # float\n if 0. < self.max_features <= 1.:\n max_features = max(int(self.max_features *\n self.n_features_), 1)\n else:\n raise ValueError(\"max_features must be in (0, n_features]\")\n\n self.max_features_ = max_features\n\n if not isinstance(self.n_iter_no_change,\n (numbers.Integral, type(None))):\n raise ValueError(\"n_iter_no_change should either be None or an \"\n \"integer. %r was passed\"\n % self.n_iter_no_change)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_clear_state", + "name": "_clear_state", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._clear_state", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_clear_state/self", + "name": "self", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._clear_state.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Clear the state of the gradient boosting model.", + "docstring": "Clear the state of the gradient boosting model. ", + "code": " def _clear_state(self):\n \"\"\"Clear the state of the gradient boosting model. \"\"\"\n if hasattr(self, 'estimators_'):\n self.estimators_ = np.empty((0, 0), dtype=object)\n if hasattr(self, 'train_score_'):\n del self.train_score_\n if hasattr(self, 'oob_improvement_'):\n del self.oob_improvement_\n if hasattr(self, 'init_'):\n del self.init_\n if hasattr(self, '_rng'):\n del self._rng" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_compute_partial_dependence_recursion", + "name": "_compute_partial_dependence_recursion", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._compute_partial_dependence_recursion", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_compute_partial_dependence_recursion/self", + "name": "self", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._compute_partial_dependence_recursion.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_compute_partial_dependence_recursion/grid", + "name": "grid", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._compute_partial_dependence_recursion.grid", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_target_features)", + "default_value": "", + "description": "The grid points on which the partial dependence should be\nevaluated." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_target_features)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_compute_partial_dependence_recursion/target_features", + "name": "target_features", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._compute_partial_dependence_recursion.target_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_target_features,)", + "default_value": "", + "description": "The set of target features for which the partial dependence\nshould be evaluated." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_target_features,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fast partial dependence computation.", + "docstring": "Fast partial dependence computation.\n\nParameters\n----------\ngrid : ndarray of shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\ntarget_features : ndarray of shape (n_target_features,)\n The set of target features for which the partial dependence\n should be evaluated.\n\nReturns\n-------\naveraged_predictions : ndarray of shape (n_trees_per_iteration, n_samples)\n The value of the partial dependence function on each grid point.", + "code": " def _compute_partial_dependence_recursion(self, grid, target_features):\n \"\"\"Fast partial dependence computation.\n\n Parameters\n ----------\n grid : ndarray of shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\n target_features : ndarray of shape (n_target_features,)\n The set of target features for which the partial dependence\n should be evaluated.\n\n Returns\n -------\n averaged_predictions : ndarray of shape \\\n (n_trees_per_iteration, n_samples)\n The value of the partial dependence function on each grid point.\n \"\"\"\n if self.init is not None:\n warnings.warn(\n 'Using recursion method with a non-constant init predictor '\n 'will lead to incorrect partial dependence values. '\n 'Got init=%s.' % self.init,\n UserWarning\n )\n grid = np.asarray(grid, dtype=DTYPE, order='C')\n n_estimators, n_trees_per_stage = self.estimators_.shape\n averaged_predictions = np.zeros((n_trees_per_stage, grid.shape[0]),\n dtype=np.float64, order='C')\n for stage in range(n_estimators):\n for k in range(n_trees_per_stage):\n tree = self.estimators_[stage, k].tree_\n tree.compute_partial_dependence(grid, target_features,\n averaged_predictions[k])\n averaged_predictions *= self.learning_rate\n\n return averaged_predictions" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stage", + "name": "_fit_stage", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._fit_stage", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stage/self", + "name": "self", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._fit_stage.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stage/i", + "name": "i", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._fit_stage.i", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stage/X", + "name": "X", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._fit_stage.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stage/y", + "name": "y", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._fit_stage.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stage/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._fit_stage.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stage/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._fit_stage.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stage/sample_mask", + "name": "sample_mask", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._fit_stage.sample_mask", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stage/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._fit_stage.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stage/X_csc", + "name": "X_csc", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._fit_stage.X_csc", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stage/X_csr", + "name": "X_csr", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._fit_stage.X_csr", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit another stage of ``_n_classes`` trees to the boosting model.", + "docstring": "Fit another stage of ``_n_classes`` trees to the boosting model.", + "code": " def _fit_stage(self, i, X, y, raw_predictions, sample_weight, sample_mask,\n random_state, X_csc=None, X_csr=None):\n \"\"\"Fit another stage of ``_n_classes`` trees to the boosting model.\"\"\"\n\n assert sample_mask.dtype == bool\n loss = self.loss_\n original_y = y\n\n # Need to pass a copy of raw_predictions to negative_gradient()\n # because raw_predictions is partially updated at the end of the loop\n # in update_terminal_regions(), and gradients need to be evaluated at\n # iteration i - 1.\n raw_predictions_copy = raw_predictions.copy()\n\n for k in range(loss.K):\n if loss.is_multi_class:\n y = np.array(original_y == k, dtype=np.float64)\n\n residual = loss.negative_gradient(y, raw_predictions_copy, k=k,\n sample_weight=sample_weight)\n\n # induce regression tree on residuals\n tree = DecisionTreeRegressor(\n criterion=self.criterion,\n splitter='best',\n max_depth=self.max_depth,\n min_samples_split=self.min_samples_split,\n min_samples_leaf=self.min_samples_leaf,\n min_weight_fraction_leaf=self.min_weight_fraction_leaf,\n min_impurity_decrease=self.min_impurity_decrease,\n min_impurity_split=self.min_impurity_split,\n max_features=self.max_features,\n max_leaf_nodes=self.max_leaf_nodes,\n random_state=random_state,\n ccp_alpha=self.ccp_alpha)\n\n if self.subsample < 1.0:\n # no inplace multiplication!\n sample_weight = sample_weight * sample_mask.astype(np.float64)\n\n X = X_csr if X_csr is not None else X\n tree.fit(X, residual, sample_weight=sample_weight,\n check_input=False)\n\n # update tree leaves\n loss.update_terminal_regions(\n tree.tree_, X, y, residual, raw_predictions, sample_weight,\n sample_mask, learning_rate=self.learning_rate, k=k)\n\n # add tree to ensemble\n self.estimators_[i, k] = tree\n\n return raw_predictions" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stages", + "name": "_fit_stages", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._fit_stages", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stages/self", + "name": "self", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._fit_stages.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stages/X", + "name": "X", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._fit_stages.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stages/y", + "name": "y", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._fit_stages.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stages/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._fit_stages.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stages/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._fit_stages.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stages/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._fit_stages.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stages/X_val", + "name": "X_val", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._fit_stages.X_val", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stages/y_val", + "name": "y_val", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._fit_stages.y_val", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stages/sample_weight_val", + "name": "sample_weight_val", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._fit_stages.sample_weight_val", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stages/begin_at_stage", + "name": "begin_at_stage", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._fit_stages.begin_at_stage", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_fit_stages/monitor", + "name": "monitor", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._fit_stages.monitor", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Iteratively fits the stages.\n\nFor each stage it computes the progress (OOB, train score)\nand delegates to ``_fit_stage``.\nReturns the number of stages fit; might differ from ``n_estimators``\ndue to early stopping.", + "docstring": "Iteratively fits the stages.\n\nFor each stage it computes the progress (OOB, train score)\nand delegates to ``_fit_stage``.\nReturns the number of stages fit; might differ from ``n_estimators``\ndue to early stopping.", + "code": " def _fit_stages(self, X, y, raw_predictions, sample_weight, random_state,\n X_val, y_val, sample_weight_val,\n begin_at_stage=0, monitor=None):\n \"\"\"Iteratively fits the stages.\n\n For each stage it computes the progress (OOB, train score)\n and delegates to ``_fit_stage``.\n Returns the number of stages fit; might differ from ``n_estimators``\n due to early stopping.\n \"\"\"\n n_samples = X.shape[0]\n do_oob = self.subsample < 1.0\n sample_mask = np.ones((n_samples, ), dtype=bool)\n n_inbag = max(1, int(self.subsample * n_samples))\n loss_ = self.loss_\n\n if self.verbose:\n verbose_reporter = VerboseReporter(verbose=self.verbose)\n verbose_reporter.init(self, begin_at_stage)\n\n X_csc = csc_matrix(X) if issparse(X) else None\n X_csr = csr_matrix(X) if issparse(X) else None\n\n if self.n_iter_no_change is not None:\n loss_history = np.full(self.n_iter_no_change, np.inf)\n # We create a generator to get the predictions for X_val after\n # the addition of each successive stage\n y_val_pred_iter = self._staged_raw_predict(X_val)\n\n # perform boosting iterations\n i = begin_at_stage\n for i in range(begin_at_stage, self.n_estimators):\n\n # subsampling\n if do_oob:\n sample_mask = _random_sample_mask(n_samples, n_inbag,\n random_state)\n # OOB score before adding this stage\n old_oob_score = loss_(y[~sample_mask],\n raw_predictions[~sample_mask],\n sample_weight[~sample_mask])\n\n # fit next stage of trees\n raw_predictions = self._fit_stage(\n i, X, y, raw_predictions, sample_weight, sample_mask,\n random_state, X_csc, X_csr)\n\n # track deviance (= loss)\n if do_oob:\n self.train_score_[i] = loss_(y[sample_mask],\n raw_predictions[sample_mask],\n sample_weight[sample_mask])\n self.oob_improvement_[i] = (\n old_oob_score - loss_(y[~sample_mask],\n raw_predictions[~sample_mask],\n sample_weight[~sample_mask]))\n else:\n # no need to fancy index w/ no subsampling\n self.train_score_[i] = loss_(y, raw_predictions, sample_weight)\n\n if self.verbose > 0:\n verbose_reporter.update(i, self)\n\n if monitor is not None:\n early_stopping = monitor(i, self, locals())\n if early_stopping:\n break\n\n # We also provide an early stopping based on the score from\n # validation set (X_val, y_val), if n_iter_no_change is set\n if self.n_iter_no_change is not None:\n # By calling next(y_val_pred_iter), we get the predictions\n # for X_val after the addition of the current stage\n validation_loss = loss_(y_val, next(y_val_pred_iter),\n sample_weight_val)\n\n # Require validation_score to be better (less) than at least\n # one of the last n_iter_no_change evaluations\n if np.any(validation_loss + self.tol < loss_history):\n loss_history[i % len(loss_history)] = validation_loss\n else:\n break\n\n return i + 1" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_init_state", + "name": "_init_state", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._init_state", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_init_state/self", + "name": "self", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._init_state.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Initialize model state and allocate model state data structures.", + "docstring": "Initialize model state and allocate model state data structures. ", + "code": " def _init_state(self):\n \"\"\"Initialize model state and allocate model state data structures. \"\"\"\n\n self.init_ = self.init\n if self.init_ is None:\n self.init_ = self.loss_.init_estimator()\n\n self.estimators_ = np.empty((self.n_estimators, self.loss_.K),\n dtype=object)\n self.train_score_ = np.zeros((self.n_estimators,), dtype=np.float64)\n # do oob?\n if self.subsample < 1.0:\n self.oob_improvement_ = np.zeros((self.n_estimators),\n dtype=np.float64)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_is_initialized", + "name": "_is_initialized", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._is_initialized", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_is_initialized/self", + "name": "self", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._is_initialized.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _is_initialized(self):\n return len(getattr(self, 'estimators_', [])) > 0" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_make_estimator", + "name": "_make_estimator", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._make_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_make_estimator/self", + "name": "self", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._make_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_make_estimator/append", + "name": "append", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._make_estimator.append", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _make_estimator(self, append=True):\n # we don't need _make_estimator\n raise NotImplementedError()" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_raw_predict", + "name": "_raw_predict", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._raw_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_raw_predict/self", + "name": "self", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._raw_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_raw_predict/X", + "name": "X", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._raw_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the sum of the trees raw predictions (+ init estimator).", + "docstring": "Return the sum of the trees raw predictions (+ init estimator).", + "code": " def _raw_predict(self, X):\n \"\"\"Return the sum of the trees raw predictions (+ init estimator).\"\"\"\n raw_predictions = self._raw_predict_init(X)\n predict_stages(self.estimators_, X, self.learning_rate,\n raw_predictions)\n return raw_predictions" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_raw_predict_init", + "name": "_raw_predict_init", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._raw_predict_init", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_raw_predict_init/self", + "name": "self", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._raw_predict_init.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_raw_predict_init/X", + "name": "X", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._raw_predict_init.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check input and compute raw predictions of the init estimator.", + "docstring": "Check input and compute raw predictions of the init estimator.", + "code": " def _raw_predict_init(self, X):\n \"\"\"Check input and compute raw predictions of the init estimator.\"\"\"\n self._check_initialized()\n X = self.estimators_[0, 0]._validate_X_predict(X, check_input=True)\n if X.shape[1] != self.n_features_:\n raise ValueError(\"X.shape[1] should be {0:d}, not {1:d}.\".format(\n self.n_features_, X.shape[1]))\n if self.init_ == 'zero':\n raw_predictions = np.zeros(shape=(X.shape[0], self.loss_.K),\n dtype=np.float64)\n else:\n raw_predictions = self.loss_.get_init_raw_predictions(\n X, self.init_).astype(np.float64)\n return raw_predictions" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_resize_state", + "name": "_resize_state", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._resize_state", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_resize_state/self", + "name": "self", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._resize_state.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Add additional ``n_estimators`` entries to all attributes.", + "docstring": "Add additional ``n_estimators`` entries to all attributes.", + "code": " def _resize_state(self):\n \"\"\"Add additional ``n_estimators`` entries to all attributes.\"\"\"\n # self.n_estimators is the number of additional est to fit\n total_n_estimators = self.n_estimators\n if total_n_estimators < self.estimators_.shape[0]:\n raise ValueError('resize with smaller n_estimators %d < %d' %\n (total_n_estimators, self.estimators_[0]))\n\n self.estimators_ = np.resize(self.estimators_,\n (total_n_estimators, self.loss_.K))\n self.train_score_ = np.resize(self.train_score_, total_n_estimators)\n if (self.subsample < 1 or hasattr(self, 'oob_improvement_')):\n # if do oob resize arrays or create new if not available\n if hasattr(self, 'oob_improvement_'):\n self.oob_improvement_ = np.resize(self.oob_improvement_,\n total_n_estimators)\n else:\n self.oob_improvement_ = np.zeros((total_n_estimators,),\n dtype=np.float64)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_staged_raw_predict", + "name": "_staged_raw_predict", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._staged_raw_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_staged_raw_predict/self", + "name": "self", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._staged_raw_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_staged_raw_predict/X", + "name": "X", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._staged_raw_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, it will be converted to\n``dtype=np.float32`` and if a sparse matrix is provided\nto a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute raw predictions of ``X`` for each iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.", + "docstring": "Compute raw predictions of ``X`` for each iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\nraw_predictions : generator of ndarray of shape (n_samples, k)\n The raw predictions of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n Regression and binary classification are special cases with\n ``k == 1``, otherwise ``k==n_classes``.", + "code": " def _staged_raw_predict(self, X):\n \"\"\"Compute raw predictions of ``X`` for each iteration.\n\n This method allows monitoring (i.e. determine error on testing set)\n after each stage.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n raw_predictions : generator of ndarray of shape (n_samples, k)\n The raw predictions of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n Regression and binary classification are special cases with\n ``k == 1``, otherwise ``k==n_classes``.\n \"\"\"\n X = check_array(X, dtype=DTYPE, order=\"C\", accept_sparse='csr')\n raw_predictions = self._raw_predict_init(X)\n for i in range(self.estimators_.shape[0]):\n predict_stage(self.estimators_, i, X, self.learning_rate,\n raw_predictions)\n yield raw_predictions.copy()" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_validate_y", + "name": "_validate_y", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._validate_y", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_validate_y/self", + "name": "self", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._validate_y.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_validate_y/y", + "name": "y", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._validate_y.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_validate_y/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._validate_y.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Called by fit to validate y.", + "docstring": "Called by fit to validate y.", + "code": " @abstractmethod\n def _validate_y(self, y, sample_weight=None):\n \"\"\"Called by fit to validate y.\"\"\"" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_warn_mae_for_criterion", + "name": "_warn_mae_for_criterion", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._warn_mae_for_criterion", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/_warn_mae_for_criterion/self", + "name": "self", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting._warn_mae_for_criterion.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @abstractmethod\n def _warn_mae_for_criterion(self):\n pass" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/apply", + "name": "apply", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.apply", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/apply/self", + "name": "self", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.apply.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/apply/X", + "name": "X", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.apply.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, its dtype will be converted to\n``dtype=np.float32``. If a sparse matrix is provided, it will\nbe converted to a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Apply trees in the ensemble to X, return leaf indices.\n\n.. versionadded:: 0.17", + "docstring": "Apply trees in the ensemble to X, return leaf indices.\n\n.. versionadded:: 0.17\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will\n be converted to a sparse ``csr_matrix``.\n\nReturns\n-------\nX_leaves : array-like of shape (n_samples, n_estimators, n_classes)\n For each datapoint x in X and for each tree in the ensemble,\n return the index of the leaf x ends up in each estimator.\n In the case of binary classification n_classes is 1.", + "code": " def apply(self, X):\n \"\"\"Apply trees in the ensemble to X, return leaf indices.\n\n .. versionadded:: 0.17\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will\n be converted to a sparse ``csr_matrix``.\n\n Returns\n -------\n X_leaves : array-like of shape (n_samples, n_estimators, n_classes)\n For each datapoint x in X and for each tree in the ensemble,\n return the index of the leaf x ends up in each estimator.\n In the case of binary classification n_classes is 1.\n \"\"\"\n\n self._check_initialized()\n X = self.estimators_[0, 0]._validate_X_predict(X, check_input=True)\n\n # n_classes will be equal to 1 in the binary classification or the\n # regression case.\n n_estimators, n_classes = self.estimators_.shape\n leaves = np.zeros((X.shape[0], n_estimators, n_classes))\n\n for i in range(n_estimators):\n for j in range(n_classes):\n estimator = self.estimators_[i, j]\n leaves[:, i, j] = estimator.apply(X, check_input=False)\n\n return leaves" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/feature_importances_@getter", + "name": "feature_importances_", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.feature_importances_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/feature_importances_/self", + "name": "self", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.feature_importances_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "The impurity-based feature importances.\n\nThe higher, the more important the feature.\nThe importance of a feature is computed as the (normalized)\ntotal reduction of the criterion brought by that feature. It is also\nknown as the Gini importance.\n\nWarning: impurity-based feature importances can be misleading for\nhigh cardinality features (many unique values). See\n:func:`sklearn.inspection.permutation_importance` as an alternative.", + "docstring": "The impurity-based feature importances.\n\nThe higher, the more important the feature.\nThe importance of a feature is computed as the (normalized)\ntotal reduction of the criterion brought by that feature. It is also\nknown as the Gini importance.\n\nWarning: impurity-based feature importances can be misleading for\nhigh cardinality features (many unique values). See\n:func:`sklearn.inspection.permutation_importance` as an alternative.\n\nReturns\n-------\nfeature_importances_ : ndarray of shape (n_features,)\n The values of this array sum to 1, unless all trees are single node\n trees consisting of only the root node, in which case it will be an\n array of zeros.", + "code": " @property\n def feature_importances_(self):\n \"\"\"The impurity-based feature importances.\n\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n Returns\n -------\n feature_importances_ : ndarray of shape (n_features,)\n The values of this array sum to 1, unless all trees are single node\n trees consisting of only the root node, in which case it will be an\n array of zeros.\n \"\"\"\n self._check_initialized()\n\n relevant_trees = [tree\n for stage in self.estimators_ for tree in stage\n if tree.tree_.node_count > 1]\n if not relevant_trees:\n # degenerate case where all trees have only one node\n return np.zeros(shape=self.n_features_, dtype=np.float64)\n\n relevant_feature_importances = [\n tree.tree_.compute_feature_importances(normalize=False)\n for tree in relevant_trees\n ]\n avg_feature_importances = np.mean(relevant_feature_importances,\n axis=0, dtype=np.float64)\n return avg_feature_importances / np.sum(avg_feature_importances)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/fit", + "name": "fit", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/fit/self", + "name": "self", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/fit/X", + "name": "X", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, it will be converted to\n``dtype=np.float32`` and if a sparse matrix is provided\nto a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/fit/y", + "name": "y", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values (strings or integers in classification, real numbers\nin regression)\nFor classification, labels must correspond to classes." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, then samples are equally weighted. Splits\nthat would create child nodes with net zero or negative weight are\nignored while searching for a split in each node. In the case of\nclassification, splits are also ignored if they would result in any\nsingle class carrying a negative weight in either child node." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/BaseGradientBoosting/fit/monitor", + "name": "monitor", + "qname": "sklearn.ensemble._gb.BaseGradientBoosting.fit.monitor", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "None", + "description": "The monitor is called after each iteration with the current\niteration, a reference to the estimator and the local variables of\n``_fit_stages`` as keyword arguments ``callable(i, self,\nlocals())``. If the callable returns ``True`` the fitting procedure\nis stopped. The monitor can be used for various things such as\ncomputing held-out estimates, early stopping, model introspect, and\nsnapshoting." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the gradient boosting model.", + "docstring": "Fit the gradient boosting model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\ny : array-like of shape (n_samples,)\n Target values (strings or integers in classification, real numbers\n in regression)\n For classification, labels must correspond to classes.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. In the case of\n classification, splits are also ignored if they would result in any\n single class carrying a negative weight in either child node.\n\nmonitor : callable, default=None\n The monitor is called after each iteration with the current\n iteration, a reference to the estimator and the local variables of\n ``_fit_stages`` as keyword arguments ``callable(i, self,\n locals())``. If the callable returns ``True`` the fitting procedure\n is stopped. The monitor can be used for various things such as\n computing held-out estimates, early stopping, model introspect, and\n snapshoting.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y, sample_weight=None, monitor=None):\n \"\"\"Fit the gradient boosting model.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n y : array-like of shape (n_samples,)\n Target values (strings or integers in classification, real numbers\n in regression)\n For classification, labels must correspond to classes.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. In the case of\n classification, splits are also ignored if they would result in any\n single class carrying a negative weight in either child node.\n\n monitor : callable, default=None\n The monitor is called after each iteration with the current\n iteration, a reference to the estimator and the local variables of\n ``_fit_stages`` as keyword arguments ``callable(i, self,\n locals())``. If the callable returns ``True`` the fitting procedure\n is stopped. The monitor can be used for various things such as\n computing held-out estimates, early stopping, model introspect, and\n snapshoting.\n\n Returns\n -------\n self : object\n \"\"\"\n if self.criterion == 'mae':\n # TODO: This should raise an error from 1.1\n self._warn_mae_for_criterion()\n\n # if not warmstart - clear the estimator state\n if not self.warm_start:\n self._clear_state()\n\n # Check input\n # Since check_array converts both X and y to the same dtype, but the\n # trees use different types for X and y, checking them separately.\n\n X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc', 'coo'],\n dtype=DTYPE, multi_output=True)\n n_samples, self.n_features_ = X.shape\n\n sample_weight_is_none = sample_weight is None\n\n sample_weight = _check_sample_weight(sample_weight, X)\n\n y = column_or_1d(y, warn=True)\n\n if is_classifier(self):\n y = self._validate_y(y, sample_weight)\n else:\n y = self._validate_y(y)\n\n if self.n_iter_no_change is not None:\n stratify = y if is_classifier(self) else None\n X, X_val, y, y_val, sample_weight, sample_weight_val = (\n train_test_split(X, y, sample_weight,\n random_state=self.random_state,\n test_size=self.validation_fraction,\n stratify=stratify))\n if is_classifier(self):\n if self._n_classes != np.unique(y).shape[0]:\n # We choose to error here. The problem is that the init\n # estimator would be trained on y, which has some missing\n # classes now, so its predictions would not have the\n # correct shape.\n raise ValueError(\n 'The training data after the early stopping split '\n 'is missing some classes. Try using another random '\n 'seed.'\n )\n else:\n X_val = y_val = sample_weight_val = None\n\n self._check_params()\n\n if not self._is_initialized():\n # init state\n self._init_state()\n\n # fit initial model and initialize raw predictions\n if self.init_ == 'zero':\n raw_predictions = np.zeros(shape=(X.shape[0], self.loss_.K),\n dtype=np.float64)\n else:\n # XXX clean this once we have a support_sample_weight tag\n if sample_weight_is_none:\n self.init_.fit(X, y)\n else:\n msg = (\"The initial estimator {} does not support sample \"\n \"weights.\".format(self.init_.__class__.__name__))\n try:\n self.init_.fit(X, y, sample_weight=sample_weight)\n except TypeError as e:\n # regular estimator without SW support\n raise ValueError(msg) from e\n except ValueError as e:\n if \"pass parameters to specific steps of \"\\\n \"your pipeline using the \"\\\n \"stepname__parameter\" in str(e): # pipeline\n raise ValueError(msg) from e\n else: # regular estimator whose input checking failed\n raise\n\n raw_predictions = \\\n self.loss_.get_init_raw_predictions(X, self.init_)\n\n begin_at_stage = 0\n\n # The rng state must be preserved if warm_start is True\n self._rng = check_random_state(self.random_state)\n\n else:\n # add more estimators to fitted model\n # invariant: warm_start = True\n if self.n_estimators < self.estimators_.shape[0]:\n raise ValueError('n_estimators=%d must be larger or equal to '\n 'estimators_.shape[0]=%d when '\n 'warm_start==True'\n % (self.n_estimators,\n self.estimators_.shape[0]))\n begin_at_stage = self.estimators_.shape[0]\n # The requirements of _decision_function (called in two lines\n # below) are more constrained than fit. It accepts only CSR\n # matrices.\n X = check_array(X, dtype=DTYPE, order=\"C\", accept_sparse='csr')\n raw_predictions = self._raw_predict(X)\n self._resize_state()\n\n # fit the boosting stages\n n_stages = self._fit_stages(\n X, y, raw_predictions, sample_weight, self._rng, X_val, y_val,\n sample_weight_val, begin_at_stage, monitor)\n\n # change shape of arrays after fit (early-stopping or additional ests)\n if n_stages != self.estimators_.shape[0]:\n self.estimators_ = self.estimators_[:n_stages]\n self.train_score_ = self.train_score_[:n_stages]\n if hasattr(self, 'oob_improvement_'):\n self.oob_improvement_ = self.oob_improvement_[:n_stages]\n\n self.n_estimators_ = n_stages\n return self" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__/loss", + "name": "loss", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.__init__.loss", + "default_value": "'deviance'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'deviance', 'exponential'}", + "default_value": "'deviance'", + "description": "The loss function to be optimized. 'deviance' refers to\ndeviance (= logistic regression) for classification\nwith probabilistic outputs. For loss 'exponential' gradient\nboosting recovers the AdaBoost algorithm." + }, + "type": { + "kind": "EnumType", + "values": ["exponential", "deviance"] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__/learning_rate", + "name": "learning_rate", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.__init__.learning_rate", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "Learning rate shrinks the contribution of each tree by `learning_rate`.\nThere is a trade-off between learning_rate and n_estimators." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__/n_estimators", + "name": "n_estimators", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.__init__.n_estimators", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The number of boosting stages to perform. Gradient boosting\nis fairly robust to over-fitting so a large number usually\nresults in better performance." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__/subsample", + "name": "subsample", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.__init__.subsample", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "The fraction of samples to be used for fitting the individual base\nlearners. If smaller than 1.0 this results in Stochastic Gradient\nBoosting. `subsample` interacts with the parameter `n_estimators`.\nChoosing `subsample < 1.0` leads to a reduction of variance\nand an increase in bias." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__/criterion", + "name": "criterion", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.__init__.criterion", + "default_value": "'friedman_mse'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'friedman_mse', 'mse', 'mae'}", + "default_value": "'friedman_mse'", + "description": "The function to measure the quality of a split. Supported criteria\nare 'friedman_mse' for the mean squared error with improvement\nscore by Friedman, 'mse' for mean squared error, and 'mae' for\nthe mean absolute error. The default value of 'friedman_mse' is\ngenerally the best as it can provide a better approximation in\nsome cases.\n\n.. versionadded:: 0.18\n.. deprecated:: 0.24\n `criterion='mae'` is deprecated and will be removed in version\n 1.1 (renaming of 0.26). Use `criterion='friedman_mse'` or `'mse'`\n instead, as trees should use a least-square criterion in\n Gradient Boosting." + }, + "type": { + "kind": "EnumType", + "values": ["friedman_mse", "mse", "mae"] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__/min_samples_split", + "name": "min_samples_split", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.__init__.min_samples_split", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "2", + "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n.. versionchanged:: 0.18\n Added float values for fractions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__/min_samples_leaf", + "name": "min_samples_leaf", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.__init__.min_samples_leaf", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "1", + "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches. This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n.. versionchanged:: 0.18\n Added float values for fractions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__/min_weight_fraction_leaf", + "name": "min_weight_fraction_leaf", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.__init__.min_weight_fraction_leaf", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "The minimum weighted fraction of the sum total of weights (of all\nthe input samples) required to be at a leaf node. Samples have\nequal weight when sample_weight is not provided." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__/max_depth", + "name": "max_depth", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.__init__.max_depth", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "3", + "description": "The maximum depth of the individual regression estimators. The maximum\ndepth limits the number of nodes in the tree. Tune this parameter\nfor best performance; the best value depends on the interaction\nof the input variables." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__/min_impurity_decrease", + "name": "min_impurity_decrease", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.__init__.min_impurity_decrease", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__/min_impurity_split", + "name": "min_impurity_split", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.__init__.min_impurity_split", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Threshold for early stopping in tree growth. A node will split\nif its impurity is above the threshold, otherwise it is a leaf.\n\n.. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__/init", + "name": "init", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.__init__.init", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "estimator or 'zero'", + "default_value": "None", + "description": "An estimator object that is used to compute the initial predictions.\n``init`` has to provide :meth:`fit` and :meth:`predict_proba`. If\n'zero', the initial raw predictions are set to zero. By default, a\n``DummyEstimator`` predicting the classes priors is used." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "estimator" + }, + { + "kind": "NamedType", + "name": "'zero'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the random seed given to each Tree estimator at each\nboosting iteration.\nIn addition, it controls the random permutation of the features at\neach split (see Notes for more details).\nIt also controls the random spliting of the training data to obtain a\nvalidation set if `n_iter_no_change` is not None.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__/max_features", + "name": "max_features", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.__init__.max_features", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'sqrt', 'log2'}, int or float", + "default_value": "None", + "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n- If 'auto', then `max_features=sqrt(n_features)`.\n- If 'sqrt', then `max_features=sqrt(n_features)`.\n- If 'log2', then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\nChoosing `max_features < n_features` leads to a reduction of variance\nand an increase in bias.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["auto", "log2", "sqrt"] + }, + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Enable verbose output. If 1 then it prints progress and performance\nonce in a while (the more trees the lower the frequency). If greater\nthan 1 then it prints progress and performance for every tree." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__/max_leaf_nodes", + "name": "max_leaf_nodes", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.__init__.max_leaf_nodes", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Grow trees with ``max_leaf_nodes`` in best-first fashion.\nBest nodes are defined as relative reduction in impurity.\nIf None then unlimited number of leaf nodes." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, reuse the solution of the previous call to fit\nand add more estimators to the ensemble, otherwise, just erase the\nprevious solution. See :term:`the Glossary `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__/validation_fraction", + "name": "validation_fraction", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.__init__.validation_fraction", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "The proportion of training data to set aside as validation set for\nearly stopping. Must be between 0 and 1.\nOnly used if ``n_iter_no_change`` is set to an integer.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__/n_iter_no_change", + "name": "n_iter_no_change", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.__init__.n_iter_no_change", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "``n_iter_no_change`` is used to decide if early stopping will be used\nto terminate training when validation score is not improving. By\ndefault it is set to None to disable early stopping. If set to a\nnumber, it will set aside ``validation_fraction`` size of the training\ndata as validation and terminate training when validation score is not\nimproving in all of the previous ``n_iter_no_change`` numbers of\niterations. The split is stratified.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__/tol", + "name": "tol", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Tolerance for the early stopping. When the loss is not improving\nby at least tol for ``n_iter_no_change`` iterations (if set to a\nnumber), the training stops.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/__init__/ccp_alpha", + "name": "ccp_alpha", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.__init__.ccp_alpha", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "non-negative float", + "default_value": "0.0", + "description": "Complexity parameter used for Minimal Cost-Complexity Pruning. The\nsubtree with the largest cost complexity that is smaller than\n``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n:ref:`minimal_cost_complexity_pruning` for details.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "non-negative float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Gradient Boosting for classification.\n\nGB builds an additive model in a\nforward stage-wise fashion; it allows for the optimization of\narbitrary differentiable loss functions. In each stage ``n_classes_``\nregression trees are fit on the negative gradient of the\nbinomial or multinomial deviance loss function. Binary classification\nis a special case where only a single regression tree is induced.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, loss='deviance', learning_rate=0.1, n_estimators=100,\n subsample=1.0, criterion='friedman_mse', min_samples_split=2,\n min_samples_leaf=1, min_weight_fraction_leaf=0.,\n max_depth=3, min_impurity_decrease=0.,\n min_impurity_split=None, init=None,\n random_state=None, max_features=None, verbose=0,\n max_leaf_nodes=None, warm_start=False,\n validation_fraction=0.1, n_iter_no_change=None, tol=1e-4,\n ccp_alpha=0.0):\n\n super().__init__(\n loss=loss, learning_rate=learning_rate, n_estimators=n_estimators,\n criterion=criterion, min_samples_split=min_samples_split,\n min_samples_leaf=min_samples_leaf,\n min_weight_fraction_leaf=min_weight_fraction_leaf,\n max_depth=max_depth, init=init, subsample=subsample,\n max_features=max_features,\n random_state=random_state, verbose=verbose,\n max_leaf_nodes=max_leaf_nodes,\n min_impurity_decrease=min_impurity_decrease,\n min_impurity_split=min_impurity_split,\n warm_start=warm_start, validation_fraction=validation_fraction,\n n_iter_no_change=n_iter_no_change, tol=tol, ccp_alpha=ccp_alpha)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/_validate_y", + "name": "_validate_y", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier._validate_y", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/_validate_y/self", + "name": "self", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier._validate_y.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/_validate_y/y", + "name": "y", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier._validate_y.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/_validate_y/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier._validate_y.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _validate_y(self, y, sample_weight):\n check_classification_targets(y)\n self.classes_, y = np.unique(y, return_inverse=True)\n n_trim_classes = np.count_nonzero(np.bincount(y, sample_weight))\n if n_trim_classes < 2:\n raise ValueError(\"y contains %d class after sample_weight \"\n \"trimmed classes with zero weights, while a \"\n \"minimum of 2 classes are required.\"\n % n_trim_classes)\n self._n_classes = len(self.classes_)\n # expose n_classes_ attribute\n self.n_classes_ = self._n_classes\n return y" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/_warn_mae_for_criterion", + "name": "_warn_mae_for_criterion", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier._warn_mae_for_criterion", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/_warn_mae_for_criterion/self", + "name": "self", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier._warn_mae_for_criterion.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _warn_mae_for_criterion(self):\n # TODO: This should raise an error from 1.1\n warnings.warn(\"criterion='mae' was deprecated in version 0.24 and \"\n \"will be removed in version 1.1 (renaming of 0.26). Use \"\n \"criterion='friedman_mse' or 'mse' instead, as trees \"\n \"should use a least-square criterion in Gradient \"\n \"Boosting.\", FutureWarning)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/decision_function", + "name": "decision_function", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/decision_function/self", + "name": "self", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/decision_function/X", + "name": "X", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, it will be converted to\n``dtype=np.float32`` and if a sparse matrix is provided\nto a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the decision function of ``X``.", + "docstring": "Compute the decision function of ``X``.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\nscore : ndarray of shape (n_samples, n_classes) or (n_samples,)\n The decision function of the input samples, which corresponds to\n the raw values predicted from the trees of the ensemble . The\n order of the classes corresponds to that in the attribute\n :term:`classes_`. Regression and binary classification produce an\n array of shape (n_samples,).", + "code": " def decision_function(self, X):\n \"\"\"Compute the decision function of ``X``.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n score : ndarray of shape (n_samples, n_classes) or (n_samples,)\n The decision function of the input samples, which corresponds to\n the raw values predicted from the trees of the ensemble . The\n order of the classes corresponds to that in the attribute\n :term:`classes_`. Regression and binary classification produce an\n array of shape (n_samples,).\n \"\"\"\n X = check_array(X, dtype=DTYPE, order=\"C\", accept_sparse='csr')\n raw_predictions = self._raw_predict(X)\n if raw_predictions.shape[1] == 1:\n return raw_predictions.ravel()\n return raw_predictions" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/predict", + "name": "predict", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/predict/self", + "name": "self", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/predict/X", + "name": "X", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, it will be converted to\n``dtype=np.float32`` and if a sparse matrix is provided\nto a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class for X.", + "docstring": "Predict class for X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n The predicted values.", + "code": " def predict(self, X):\n \"\"\"Predict class for X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n y : ndarray of shape (n_samples,)\n The predicted values.\n \"\"\"\n raw_predictions = self.decision_function(X)\n encoded_labels = \\\n self.loss_._raw_prediction_to_decision(raw_predictions)\n return self.classes_.take(encoded_labels, axis=0)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/predict_log_proba", + "name": "predict_log_proba", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.predict_log_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/predict_log_proba/self", + "name": "self", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.predict_log_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/predict_log_proba/X", + "name": "X", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.predict_log_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, it will be converted to\n``dtype=np.float32`` and if a sparse matrix is provided\nto a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class log-probabilities for X.", + "docstring": "Predict class log-probabilities for X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nRaises\n------\nAttributeError\n If the ``loss`` does not support probabilities.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes)\n The class log-probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.", + "code": " def predict_log_proba(self, X):\n \"\"\"Predict class log-probabilities for X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Raises\n ------\n AttributeError\n If the ``loss`` does not support probabilities.\n\n Returns\n -------\n p : ndarray of shape (n_samples, n_classes)\n The class log-probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n \"\"\"\n proba = self.predict_proba(X)\n return np.log(proba)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/predict_proba", + "name": "predict_proba", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/predict_proba/self", + "name": "self", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/predict_proba/X", + "name": "X", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, it will be converted to\n``dtype=np.float32`` and if a sparse matrix is provided\nto a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class probabilities for X.", + "docstring": "Predict class probabilities for X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nRaises\n------\nAttributeError\n If the ``loss`` does not support probabilities.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.", + "code": " def predict_proba(self, X):\n \"\"\"Predict class probabilities for X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Raises\n ------\n AttributeError\n If the ``loss`` does not support probabilities.\n\n Returns\n -------\n p : ndarray of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n \"\"\"\n raw_predictions = self.decision_function(X)\n try:\n return self.loss_._raw_prediction_to_proba(raw_predictions)\n except NotFittedError:\n raise\n except AttributeError as e:\n raise AttributeError('loss=%r does not support predict_proba' %\n self.loss) from e" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/staged_decision_function", + "name": "staged_decision_function", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.staged_decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/staged_decision_function/self", + "name": "self", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.staged_decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/staged_decision_function/X", + "name": "X", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.staged_decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, it will be converted to\n``dtype=np.float32`` and if a sparse matrix is provided\nto a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute decision function of ``X`` for each iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.", + "docstring": "Compute decision function of ``X`` for each iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\nscore : generator of ndarray of shape (n_samples, k)\n The decision function of the input samples, which corresponds to\n the raw values predicted from the trees of the ensemble . The\n classes corresponds to that in the attribute :term:`classes_`.\n Regression and binary classification are special cases with\n ``k == 1``, otherwise ``k==n_classes``.", + "code": " def staged_decision_function(self, X):\n \"\"\"Compute decision function of ``X`` for each iteration.\n\n This method allows monitoring (i.e. determine error on testing set)\n after each stage.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n score : generator of ndarray of shape (n_samples, k)\n The decision function of the input samples, which corresponds to\n the raw values predicted from the trees of the ensemble . The\n classes corresponds to that in the attribute :term:`classes_`.\n Regression and binary classification are special cases with\n ``k == 1``, otherwise ``k==n_classes``.\n \"\"\"\n yield from self._staged_raw_predict(X)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/staged_predict", + "name": "staged_predict", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.staged_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/staged_predict/self", + "name": "self", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.staged_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/staged_predict/X", + "name": "X", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.staged_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, it will be converted to\n``dtype=np.float32`` and if a sparse matrix is provided\nto a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class at each stage for X.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.", + "docstring": "Predict class at each stage for X.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\ny : generator of ndarray of shape (n_samples,)\n The predicted value of the input samples.", + "code": " def staged_predict(self, X):\n \"\"\"Predict class at each stage for X.\n\n This method allows monitoring (i.e. determine error on testing set)\n after each stage.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n y : generator of ndarray of shape (n_samples,)\n The predicted value of the input samples.\n \"\"\"\n for raw_predictions in self._staged_raw_predict(X):\n encoded_labels = \\\n self.loss_._raw_prediction_to_decision(raw_predictions)\n yield self.classes_.take(encoded_labels, axis=0)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/staged_predict_proba", + "name": "staged_predict_proba", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.staged_predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/staged_predict_proba/self", + "name": "self", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.staged_predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingClassifier/staged_predict_proba/X", + "name": "X", + "qname": "sklearn.ensemble._gb.GradientBoostingClassifier.staged_predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, it will be converted to\n``dtype=np.float32`` and if a sparse matrix is provided\nto a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class probabilities at each stage for X.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.", + "docstring": "Predict class probabilities at each stage for X.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\ny : generator of ndarray of shape (n_samples,)\n The predicted value of the input samples.", + "code": " def staged_predict_proba(self, X):\n \"\"\"Predict class probabilities at each stage for X.\n\n This method allows monitoring (i.e. determine error on testing set)\n after each stage.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n y : generator of ndarray of shape (n_samples,)\n The predicted value of the input samples.\n \"\"\"\n try:\n for raw_predictions in self._staged_raw_predict(X):\n yield self.loss_._raw_prediction_to_proba(raw_predictions)\n except NotFittedError:\n raise\n except AttributeError as e:\n raise AttributeError('loss=%r does not support predict_proba' %\n self.loss) from e" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__/loss", + "name": "loss", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.__init__.loss", + "default_value": "'ls'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'ls', 'lad', 'huber', 'quantile'}", + "default_value": "'ls'", + "description": "Loss function to be optimized. 'ls' refers to least squares\nregression. 'lad' (least absolute deviation) is a highly robust\nloss function solely based on order information of the input\nvariables. 'huber' is a combination of the two. 'quantile'\nallows quantile regression (use `alpha` to specify the quantile)." + }, + "type": { + "kind": "EnumType", + "values": ["quantile", "lad", "huber", "ls"] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__/learning_rate", + "name": "learning_rate", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.__init__.learning_rate", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "Learning rate shrinks the contribution of each tree by `learning_rate`.\nThere is a trade-off between learning_rate and n_estimators." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__/n_estimators", + "name": "n_estimators", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.__init__.n_estimators", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The number of boosting stages to perform. Gradient boosting\nis fairly robust to over-fitting so a large number usually\nresults in better performance." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__/subsample", + "name": "subsample", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.__init__.subsample", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "The fraction of samples to be used for fitting the individual base\nlearners. If smaller than 1.0 this results in Stochastic Gradient\nBoosting. `subsample` interacts with the parameter `n_estimators`.\nChoosing `subsample < 1.0` leads to a reduction of variance\nand an increase in bias." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__/criterion", + "name": "criterion", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.__init__.criterion", + "default_value": "'friedman_mse'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'friedman_mse', 'mse', 'mae'}", + "default_value": "'friedman_mse'", + "description": "The function to measure the quality of a split. Supported criteria\nare \"friedman_mse\" for the mean squared error with improvement\nscore by Friedman, \"mse\" for mean squared error, and \"mae\" for\nthe mean absolute error. The default value of \"friedman_mse\" is\ngenerally the best as it can provide a better approximation in\nsome cases.\n\n.. versionadded:: 0.18\n.. deprecated:: 0.24\n `criterion='mae'` is deprecated and will be removed in version\n 1.1 (renaming of 0.26). The correct way of minimizing the absolute\n error is to use `loss='lad'` instead." + }, + "type": { + "kind": "EnumType", + "values": ["friedman_mse", "mse", "mae"] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__/min_samples_split", + "name": "min_samples_split", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.__init__.min_samples_split", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "2", + "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n.. versionchanged:: 0.18\n Added float values for fractions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__/min_samples_leaf", + "name": "min_samples_leaf", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.__init__.min_samples_leaf", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "1", + "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches. This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n.. versionchanged:: 0.18\n Added float values for fractions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__/min_weight_fraction_leaf", + "name": "min_weight_fraction_leaf", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.__init__.min_weight_fraction_leaf", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "The minimum weighted fraction of the sum total of weights (of all\nthe input samples) required to be at a leaf node. Samples have\nequal weight when sample_weight is not provided." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__/max_depth", + "name": "max_depth", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.__init__.max_depth", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "3", + "description": "Maximum depth of the individual regression estimators. The maximum\ndepth limits the number of nodes in the tree. Tune this parameter\nfor best performance; the best value depends on the interaction\nof the input variables." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__/min_impurity_decrease", + "name": "min_impurity_decrease", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.__init__.min_impurity_decrease", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__/min_impurity_split", + "name": "min_impurity_split", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.__init__.min_impurity_split", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Threshold for early stopping in tree growth. A node will split\nif its impurity is above the threshold, otherwise it is a leaf.\n\n.. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__/init", + "name": "init", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.__init__.init", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "estimator or 'zero'", + "default_value": "None", + "description": "An estimator object that is used to compute the initial predictions.\n``init`` has to provide :term:`fit` and :term:`predict`. If 'zero', the\ninitial raw predictions are set to zero. By default a\n``DummyEstimator`` is used, predicting either the average target value\n(for loss='ls'), or a quantile for the other losses." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "estimator" + }, + { + "kind": "NamedType", + "name": "'zero'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the random seed given to each Tree estimator at each\nboosting iteration.\nIn addition, it controls the random permutation of the features at\neach split (see Notes for more details).\nIt also controls the random spliting of the training data to obtain a\nvalidation set if `n_iter_no_change` is not None.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__/max_features", + "name": "max_features", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.__init__.max_features", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'sqrt', 'log2'}, int or float", + "default_value": "None", + "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n- If \"auto\", then `max_features=n_features`.\n- If \"sqrt\", then `max_features=sqrt(n_features)`.\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\nChoosing `max_features < n_features` leads to a reduction of variance\nand an increase in bias.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["auto", "log2", "sqrt"] + }, + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__/alpha", + "name": "alpha", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.__init__.alpha", + "default_value": "0.9", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.9", + "description": "The alpha-quantile of the huber loss function and the quantile\nloss function. Only if ``loss='huber'`` or ``loss='quantile'``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Enable verbose output. If 1 then it prints progress and performance\nonce in a while (the more trees the lower the frequency). If greater\nthan 1 then it prints progress and performance for every tree." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__/max_leaf_nodes", + "name": "max_leaf_nodes", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.__init__.max_leaf_nodes", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Grow trees with ``max_leaf_nodes`` in best-first fashion.\nBest nodes are defined as relative reduction in impurity.\nIf None then unlimited number of leaf nodes." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, reuse the solution of the previous call to fit\nand add more estimators to the ensemble, otherwise, just erase the\nprevious solution. See :term:`the Glossary `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__/validation_fraction", + "name": "validation_fraction", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.__init__.validation_fraction", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "The proportion of training data to set aside as validation set for\nearly stopping. Must be between 0 and 1.\nOnly used if ``n_iter_no_change`` is set to an integer.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__/n_iter_no_change", + "name": "n_iter_no_change", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.__init__.n_iter_no_change", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "``n_iter_no_change`` is used to decide if early stopping will be used\nto terminate training when validation score is not improving. By\ndefault it is set to None to disable early stopping. If set to a\nnumber, it will set aside ``validation_fraction`` size of the training\ndata as validation and terminate training when validation score is not\nimproving in all of the previous ``n_iter_no_change`` numbers of\niterations.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__/tol", + "name": "tol", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Tolerance for the early stopping. When the loss is not improving\nby at least tol for ``n_iter_no_change`` iterations (if set to a\nnumber), the training stops.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/__init__/ccp_alpha", + "name": "ccp_alpha", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.__init__.ccp_alpha", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "non-negative float", + "default_value": "0.0", + "description": "Complexity parameter used for Minimal Cost-Complexity Pruning. The\nsubtree with the largest cost complexity that is smaller than\n``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n:ref:`minimal_cost_complexity_pruning` for details.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "non-negative float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Gradient Boosting for regression.\n\nGB builds an additive model in a forward stage-wise fashion;\nit allows for the optimization of arbitrary differentiable loss functions.\nIn each stage a regression tree is fit on the negative gradient of the\ngiven loss function.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, loss='ls', learning_rate=0.1, n_estimators=100,\n subsample=1.0, criterion='friedman_mse', min_samples_split=2,\n min_samples_leaf=1, min_weight_fraction_leaf=0.,\n max_depth=3, min_impurity_decrease=0.,\n min_impurity_split=None, init=None, random_state=None,\n max_features=None, alpha=0.9, verbose=0, max_leaf_nodes=None,\n warm_start=False, validation_fraction=0.1,\n n_iter_no_change=None, tol=1e-4, ccp_alpha=0.0):\n\n super().__init__(\n loss=loss, learning_rate=learning_rate, n_estimators=n_estimators,\n criterion=criterion, min_samples_split=min_samples_split,\n min_samples_leaf=min_samples_leaf,\n min_weight_fraction_leaf=min_weight_fraction_leaf,\n max_depth=max_depth, init=init, subsample=subsample,\n max_features=max_features,\n min_impurity_decrease=min_impurity_decrease,\n min_impurity_split=min_impurity_split,\n random_state=random_state, alpha=alpha, verbose=verbose,\n max_leaf_nodes=max_leaf_nodes, warm_start=warm_start,\n validation_fraction=validation_fraction,\n n_iter_no_change=n_iter_no_change, tol=tol, ccp_alpha=ccp_alpha)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/_validate_y", + "name": "_validate_y", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor._validate_y", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/_validate_y/self", + "name": "self", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor._validate_y.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/_validate_y/y", + "name": "y", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor._validate_y.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/_validate_y/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor._validate_y.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _validate_y(self, y, sample_weight=None):\n if y.dtype.kind == 'O':\n y = y.astype(DOUBLE)\n return y" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/_warn_mae_for_criterion", + "name": "_warn_mae_for_criterion", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor._warn_mae_for_criterion", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/_warn_mae_for_criterion/self", + "name": "self", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor._warn_mae_for_criterion.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _warn_mae_for_criterion(self):\n # TODO: This should raise an error from 1.1\n warnings.warn(\"criterion='mae' was deprecated in version 0.24 and \"\n \"will be removed in version 1.1 (renaming of 0.26). The \"\n \"correct way of minimizing the absolute error is to use \"\n \" loss='lad' instead.\", FutureWarning)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/apply", + "name": "apply", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.apply", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/apply/self", + "name": "self", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.apply.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/apply/X", + "name": "X", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.apply.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, its dtype will be converted to\n``dtype=np.float32``. If a sparse matrix is provided, it will\nbe converted to a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Apply trees in the ensemble to X, return leaf indices.\n\n.. versionadded:: 0.17", + "docstring": "Apply trees in the ensemble to X, return leaf indices.\n\n.. versionadded:: 0.17\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will\n be converted to a sparse ``csr_matrix``.\n\nReturns\n-------\nX_leaves : array-like of shape (n_samples, n_estimators)\n For each datapoint x in X and for each tree in the ensemble,\n return the index of the leaf x ends up in each estimator.", + "code": " def apply(self, X):\n \"\"\"Apply trees in the ensemble to X, return leaf indices.\n\n .. versionadded:: 0.17\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, its dtype will be converted to\n ``dtype=np.float32``. If a sparse matrix is provided, it will\n be converted to a sparse ``csr_matrix``.\n\n Returns\n -------\n X_leaves : array-like of shape (n_samples, n_estimators)\n For each datapoint x in X and for each tree in the ensemble,\n return the index of the leaf x ends up in each estimator.\n \"\"\"\n\n leaves = super().apply(X)\n leaves = leaves.reshape(X.shape[0], self.estimators_.shape[0])\n return leaves" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/n_classes_@getter", + "name": "n_classes_", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.n_classes_", + "decorators": [ + "deprecated('Attribute n_classes_ was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/n_classes_/self", + "name": "self", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.n_classes_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated(\"Attribute n_classes_ was deprecated \" # type: ignore\n \"in version 0.24 and will be removed in 1.1 \"\n \"(renaming of 0.26).\")\n @property\n def n_classes_(self):\n try:\n check_is_fitted(self)\n except NotFittedError as nfe:\n raise AttributeError(\n \"{} object has no n_classes_ attribute.\"\n .format(self.__class__.__name__)\n ) from nfe\n return 1" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/predict", + "name": "predict", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/predict/self", + "name": "self", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/predict/X", + "name": "X", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, it will be converted to\n``dtype=np.float32`` and if a sparse matrix is provided\nto a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict regression target for X.", + "docstring": "Predict regression target for X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n The predicted values.", + "code": " def predict(self, X):\n \"\"\"Predict regression target for X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n y : ndarray of shape (n_samples,)\n The predicted values.\n \"\"\"\n X = check_array(X, dtype=DTYPE, order=\"C\", accept_sparse='csr')\n # In regression we can directly return the raw value from the trees.\n return self._raw_predict(X).ravel()" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/staged_predict", + "name": "staged_predict", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.staged_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/staged_predict/self", + "name": "self", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.staged_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/GradientBoostingRegressor/staged_predict/X", + "name": "X", + "qname": "sklearn.ensemble._gb.GradientBoostingRegressor.staged_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, it will be converted to\n``dtype=np.float32`` and if a sparse matrix is provided\nto a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict regression target at each stage for X.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.", + "docstring": "Predict regression target at each stage for X.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\ny : generator of ndarray of shape (n_samples,)\n The predicted value of the input samples.", + "code": " def staged_predict(self, X):\n \"\"\"Predict regression target at each stage for X.\n\n This method allows monitoring (i.e. determine error on testing set)\n after each stage.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n y : generator of ndarray of shape (n_samples,)\n The predicted value of the input samples.\n \"\"\"\n for raw_predictions in self._staged_raw_predict(X):\n yield raw_predictions.ravel()" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/VerboseReporter/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._gb.VerboseReporter.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/VerboseReporter/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._gb.VerboseReporter.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/VerboseReporter/__init__/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._gb.VerboseReporter.__init__.verbose", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Verbosity level. If ``verbose==1`` output is printed once in a while\n(when iteration mod verbose_mod is zero).; if larger than 1 then output\nis printed for each update." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Reports verbose output to stdout.", + "docstring": "", + "code": " def __init__(self, verbose):\n self.verbose = verbose" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/VerboseReporter/init", + "name": "init", + "qname": "sklearn.ensemble._gb.VerboseReporter.init", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/VerboseReporter/init/self", + "name": "self", + "qname": "sklearn.ensemble._gb.VerboseReporter.init.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/VerboseReporter/init/est", + "name": "est", + "qname": "sklearn.ensemble._gb.VerboseReporter.init.est", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Estimator", + "default_value": "", + "description": "The estimator" + }, + "type": { + "kind": "NamedType", + "name": "Estimator" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/VerboseReporter/init/begin_at_stage", + "name": "begin_at_stage", + "qname": "sklearn.ensemble._gb.VerboseReporter.init.begin_at_stage", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "stage at which to begin reporting" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Initialize reporter", + "docstring": "Initialize reporter\n\nParameters\n----------\nest : Estimator\n The estimator\n\nbegin_at_stage : int, default=0\n stage at which to begin reporting", + "code": " def init(self, est, begin_at_stage=0):\n \"\"\"Initialize reporter\n\n Parameters\n ----------\n est : Estimator\n The estimator\n\n begin_at_stage : int, default=0\n stage at which to begin reporting\n \"\"\"\n # header fields and line format str\n header_fields = ['Iter', 'Train Loss']\n verbose_fmt = ['{iter:>10d}', '{train_score:>16.4f}']\n # do oob?\n if est.subsample < 1:\n header_fields.append('OOB Improve')\n verbose_fmt.append('{oob_impr:>16.4f}')\n header_fields.append('Remaining Time')\n verbose_fmt.append('{remaining_time:>16s}')\n\n # print the header line\n print(('%10s ' + '%16s ' *\n (len(header_fields) - 1)) % tuple(header_fields))\n\n self.verbose_fmt = ' '.join(verbose_fmt)\n # plot verbose info each time i % verbose_mod == 0\n self.verbose_mod = 1\n self.start_time = time()\n self.begin_at_stage = begin_at_stage" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/VerboseReporter/update", + "name": "update", + "qname": "sklearn.ensemble._gb.VerboseReporter.update", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb/VerboseReporter/update/self", + "name": "self", + "qname": "sklearn.ensemble._gb.VerboseReporter.update.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/VerboseReporter/update/j", + "name": "j", + "qname": "sklearn.ensemble._gb.VerboseReporter.update.j", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The new iteration." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb/VerboseReporter/update/est", + "name": "est", + "qname": "sklearn.ensemble._gb.VerboseReporter.update.est", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Estimator", + "default_value": "", + "description": "The estimator." + }, + "type": { + "kind": "NamedType", + "name": "Estimator" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Update reporter with new iteration.", + "docstring": "Update reporter with new iteration.\n\nParameters\n----------\nj : int\n The new iteration.\nest : Estimator\n The estimator.", + "code": " def update(self, j, est):\n \"\"\"Update reporter with new iteration.\n\n Parameters\n ----------\n j : int\n The new iteration.\n est : Estimator\n The estimator.\n \"\"\"\n do_oob = est.subsample < 1\n # we need to take into account if we fit additional estimators.\n i = j - self.begin_at_stage # iteration relative to the start iter\n if (i + 1) % self.verbose_mod == 0:\n oob_impr = est.oob_improvement_[j] if do_oob else 0\n remaining_time = ((est.n_estimators - (j + 1)) *\n (time() - self.start_time) / float(i + 1))\n if remaining_time > 60:\n remaining_time = '{0:.2f}m'.format(remaining_time / 60.0)\n else:\n remaining_time = '{0:.2f}s'.format(remaining_time)\n print(self.verbose_fmt.format(iter=j + 1,\n train_score=est.train_score_[j],\n oob_impr=oob_impr,\n remaining_time=remaining_time))\n if self.verbose == 1 and ((i + 1) // (self.verbose_mod * 10) > 0):\n # adjust verbose frequency (powers of 10)\n self.verbose_mod *= 10" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/__call__", + "name": "__call__", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/__call__/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/__call__/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance.__call__.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "True labels." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/__call__/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance.__call__.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, K)", + "default_value": "", + "description": "The raw predictions (i.e. values from the tree leaves) of the\ntree ensemble." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, K)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/__call__/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance.__call__.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the deviance (= 2 * negative log-likelihood).", + "docstring": "Compute the deviance (= 2 * negative log-likelihood).\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.", + "code": " def __call__(self, y, raw_predictions, sample_weight=None):\n \"\"\"Compute the deviance (= 2 * negative log-likelihood).\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n True labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\n sample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.\n \"\"\"\n # logaddexp(0, v) == log(1.0 + exp(v))\n raw_predictions = raw_predictions.ravel()\n if sample_weight is None:\n return -2 * np.mean((y * raw_predictions) -\n np.logaddexp(0, raw_predictions))\n else:\n return (-2 / sample_weight.sum() * np.sum(\n sample_weight * ((y * raw_predictions) -\n np.logaddexp(0, raw_predictions))))" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/__init__/n_classes", + "name": "n_classes", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance.__init__.n_classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of classes." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Binomial deviance loss function for binary classification.\n\nBinary classification is a special case; here, we only need to\nfit one tree instead of ``n_classes`` trees.", + "docstring": "", + "code": " def __init__(self, n_classes):\n if n_classes != 2:\n raise ValueError(\"{0:s} requires 2 classes; got {1:d} class(es)\"\n .format(self.__class__.__name__, n_classes))\n # we only need to fit one tree for binary clf.\n super().__init__(n_classes=1)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/_raw_prediction_to_decision", + "name": "_raw_prediction_to_decision", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance._raw_prediction_to_decision", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/_raw_prediction_to_decision/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance._raw_prediction_to_decision.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/_raw_prediction_to_decision/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance._raw_prediction_to_decision.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _raw_prediction_to_decision(self, raw_predictions):\n proba = self._raw_prediction_to_proba(raw_predictions)\n return np.argmax(proba, axis=1)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/_raw_prediction_to_proba", + "name": "_raw_prediction_to_proba", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance._raw_prediction_to_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/_raw_prediction_to_proba/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance._raw_prediction_to_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/_raw_prediction_to_proba/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance._raw_prediction_to_proba.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _raw_prediction_to_proba(self, raw_predictions):\n proba = np.ones((raw_predictions.shape[0], 2), dtype=np.float64)\n proba[:, 1] = expit(raw_predictions.ravel())\n proba[:, 0] -= proba[:, 1]\n return proba" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/_update_terminal_region", + "name": "_update_terminal_region", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance._update_terminal_region", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/_update_terminal_region/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance._update_terminal_region.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/_update_terminal_region/tree", + "name": "tree", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance._update_terminal_region.tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/_update_terminal_region/terminal_regions", + "name": "terminal_regions", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance._update_terminal_region.terminal_regions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/_update_terminal_region/leaf", + "name": "leaf", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance._update_terminal_region.leaf", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/_update_terminal_region/X", + "name": "X", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance._update_terminal_region.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/_update_terminal_region/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance._update_terminal_region.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/_update_terminal_region/residual", + "name": "residual", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance._update_terminal_region.residual", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/_update_terminal_region/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance._update_terminal_region.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/_update_terminal_region/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance._update_terminal_region.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Make a single Newton-Raphson step.\n\nour node estimate is given by:\n\n sum(w * (y - prob)) / sum(w * prob * (1 - prob))\n\nwe take advantage that: y - prob = residual", + "docstring": "Make a single Newton-Raphson step.\n\nour node estimate is given by:\n\n sum(w * (y - prob)) / sum(w * prob * (1 - prob))\n\nwe take advantage that: y - prob = residual", + "code": " def _update_terminal_region(self, tree, terminal_regions, leaf, X, y,\n residual, raw_predictions, sample_weight):\n \"\"\"Make a single Newton-Raphson step.\n\n our node estimate is given by:\n\n sum(w * (y - prob)) / sum(w * prob * (1 - prob))\n\n we take advantage that: y - prob = residual\n \"\"\"\n terminal_region = np.where(terminal_regions == leaf)[0]\n residual = residual.take(terminal_region, axis=0)\n y = y.take(terminal_region, axis=0)\n sample_weight = sample_weight.take(terminal_region, axis=0)\n\n numerator = np.sum(sample_weight * residual)\n denominator = np.sum(sample_weight *\n (y - residual) * (1 - y + residual))\n\n # prevents overflow and division by zero\n if abs(denominator) < 1e-150:\n tree.value[leaf, 0, 0] = 0.0\n else:\n tree.value[leaf, 0, 0] = numerator / denominator" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/get_init_raw_predictions", + "name": "get_init_raw_predictions", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance.get_init_raw_predictions", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/get_init_raw_predictions/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance.get_init_raw_predictions.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/get_init_raw_predictions/X", + "name": "X", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance.get_init_raw_predictions.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/get_init_raw_predictions/estimator", + "name": "estimator", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance.get_init_raw_predictions.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def get_init_raw_predictions(self, X, estimator):\n probas = estimator.predict_proba(X)\n proba_pos_class = probas[:, 1]\n eps = np.finfo(np.float32).eps\n proba_pos_class = np.clip(proba_pos_class, eps, 1 - eps)\n # log(x / (1 - x)) is the inverse of the sigmoid (expit) function\n raw_predictions = np.log(proba_pos_class / (1 - proba_pos_class))\n return raw_predictions.reshape(-1, 1).astype(np.float64)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/init_estimator", + "name": "init_estimator", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance.init_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/init_estimator/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance.init_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def init_estimator(self):\n # return the most common class, taking into account the samples\n # weights\n return DummyClassifier(strategy='prior')" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/negative_gradient", + "name": "negative_gradient", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance.negative_gradient", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/negative_gradient/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance.negative_gradient.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/negative_gradient/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance.negative_gradient.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "True labels." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/negative_gradient/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance.negative_gradient.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, K)", + "default_value": "", + "description": "The raw predictions (i.e. values from the tree leaves) of the\ntree ensemble at iteration ``i - 1``." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, K)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/BinomialDeviance/negative_gradient/kargs", + "name": "kargs", + "qname": "sklearn.ensemble._gb_losses.BinomialDeviance.negative_gradient.kargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute half of the negative gradient.", + "docstring": "Compute half of the negative gradient.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.", + "code": " def negative_gradient(self, y, raw_predictions, **kargs):\n \"\"\"Compute half of the negative gradient.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n True labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n \"\"\"\n return y - expit(raw_predictions.ravel())" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ClassificationLossFunction/_raw_prediction_to_decision", + "name": "_raw_prediction_to_decision", + "qname": "sklearn.ensemble._gb_losses.ClassificationLossFunction._raw_prediction_to_decision", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ClassificationLossFunction/_raw_prediction_to_decision/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.ClassificationLossFunction._raw_prediction_to_decision.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ClassificationLossFunction/_raw_prediction_to_decision/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.ClassificationLossFunction._raw_prediction_to_decision.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, K)", + "default_value": "", + "description": "The raw predictions (i.e. values from the tree leaves) of the\ntree ensemble." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, K)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Template method to convert raw predictions to decisions.", + "docstring": "Template method to convert raw predictions to decisions.\n\nParameters\n----------\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\nReturns\n-------\nencoded_predictions : ndarray of shape (n_samples, K)\n The predicted encoded labels.", + "code": " @abstractmethod\n def _raw_prediction_to_decision(self, raw_predictions):\n \"\"\"Template method to convert raw predictions to decisions.\n\n Parameters\n ----------\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\n Returns\n -------\n encoded_predictions : ndarray of shape (n_samples, K)\n The predicted encoded labels.\n \"\"\"" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ClassificationLossFunction/_raw_prediction_to_proba", + "name": "_raw_prediction_to_proba", + "qname": "sklearn.ensemble._gb_losses.ClassificationLossFunction._raw_prediction_to_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ClassificationLossFunction/_raw_prediction_to_proba/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.ClassificationLossFunction._raw_prediction_to_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ClassificationLossFunction/_raw_prediction_to_proba/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.ClassificationLossFunction._raw_prediction_to_proba.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, K)", + "default_value": "", + "description": "The raw predictions (i.e. values from the tree leaves) of the\ntree ensemble." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, K)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Template method to convert raw predictions into probabilities.", + "docstring": "Template method to convert raw predictions into probabilities.\n\nParameters\n----------\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\nReturns\n-------\nprobas : ndarray of shape (n_samples, K)\n The predicted probabilities.", + "code": " def _raw_prediction_to_proba(self, raw_predictions):\n \"\"\"Template method to convert raw predictions into probabilities.\n\n Parameters\n ----------\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\n Returns\n -------\n probas : ndarray of shape (n_samples, K)\n The predicted probabilities.\n \"\"\"" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ClassificationLossFunction/check_init_estimator", + "name": "check_init_estimator", + "qname": "sklearn.ensemble._gb_losses.ClassificationLossFunction.check_init_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ClassificationLossFunction/check_init_estimator/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.ClassificationLossFunction.check_init_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ClassificationLossFunction/check_init_estimator/estimator", + "name": "estimator", + "qname": "sklearn.ensemble._gb_losses.ClassificationLossFunction.check_init_estimator.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "The init estimator to check." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Make sure estimator has fit and predict_proba methods.", + "docstring": "Make sure estimator has fit and predict_proba methods.\n\nParameters\n----------\nestimator : object\n The init estimator to check.", + "code": " def check_init_estimator(self, estimator):\n \"\"\"Make sure estimator has fit and predict_proba methods.\n\n Parameters\n ----------\n estimator : object\n The init estimator to check.\n \"\"\"\n if not (hasattr(estimator, 'fit') and\n hasattr(estimator, 'predict_proba')):\n raise ValueError(\n \"The init parameter must be a valid estimator \"\n \"and support both fit and predict_proba.\"\n )" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/__call__", + "name": "__call__", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/__call__/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/__call__/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss.__call__.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "True labels." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/__call__/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss.__call__.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, K)", + "default_value": "", + "description": "The raw predictions (i.e. values from the tree leaves) of the\ntree ensemble." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, K)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/__call__/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss.__call__.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the exponential loss", + "docstring": "Compute the exponential loss\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.", + "code": " def __call__(self, y, raw_predictions, sample_weight=None):\n \"\"\"Compute the exponential loss\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n True labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\n sample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.\n \"\"\"\n raw_predictions = raw_predictions.ravel()\n if sample_weight is None:\n return np.mean(np.exp(-(2. * y - 1.) * raw_predictions))\n else:\n return (1.0 / sample_weight.sum() * np.sum(\n sample_weight * np.exp(-(2 * y - 1) * raw_predictions)))" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/__init__/n_classes", + "name": "n_classes", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss.__init__.n_classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of classes." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Exponential loss function for binary classification.\n\nSame loss as AdaBoost.", + "docstring": "", + "code": " def __init__(self, n_classes):\n if n_classes != 2:\n raise ValueError(\"{0:s} requires 2 classes; got {1:d} class(es)\"\n .format(self.__class__.__name__, n_classes))\n # we only need to fit one tree for binary clf.\n super().__init__(n_classes=1)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/_raw_prediction_to_decision", + "name": "_raw_prediction_to_decision", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss._raw_prediction_to_decision", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/_raw_prediction_to_decision/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss._raw_prediction_to_decision.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/_raw_prediction_to_decision/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss._raw_prediction_to_decision.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _raw_prediction_to_decision(self, raw_predictions):\n return (raw_predictions.ravel() >= 0).astype(int)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/_raw_prediction_to_proba", + "name": "_raw_prediction_to_proba", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss._raw_prediction_to_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/_raw_prediction_to_proba/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss._raw_prediction_to_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/_raw_prediction_to_proba/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss._raw_prediction_to_proba.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _raw_prediction_to_proba(self, raw_predictions):\n proba = np.ones((raw_predictions.shape[0], 2), dtype=np.float64)\n proba[:, 1] = expit(2.0 * raw_predictions.ravel())\n proba[:, 0] -= proba[:, 1]\n return proba" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/_update_terminal_region", + "name": "_update_terminal_region", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss._update_terminal_region", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/_update_terminal_region/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss._update_terminal_region.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/_update_terminal_region/tree", + "name": "tree", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss._update_terminal_region.tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/_update_terminal_region/terminal_regions", + "name": "terminal_regions", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss._update_terminal_region.terminal_regions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/_update_terminal_region/leaf", + "name": "leaf", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss._update_terminal_region.leaf", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/_update_terminal_region/X", + "name": "X", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss._update_terminal_region.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/_update_terminal_region/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss._update_terminal_region.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/_update_terminal_region/residual", + "name": "residual", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss._update_terminal_region.residual", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/_update_terminal_region/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss._update_terminal_region.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/_update_terminal_region/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss._update_terminal_region.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _update_terminal_region(self, tree, terminal_regions, leaf, X, y,\n residual, raw_predictions, sample_weight):\n terminal_region = np.where(terminal_regions == leaf)[0]\n raw_predictions = raw_predictions.take(terminal_region, axis=0)\n y = y.take(terminal_region, axis=0)\n sample_weight = sample_weight.take(terminal_region, axis=0)\n\n y_ = 2. * y - 1.\n\n numerator = np.sum(y_ * sample_weight * np.exp(-y_ * raw_predictions))\n denominator = np.sum(sample_weight * np.exp(-y_ * raw_predictions))\n\n # prevents overflow and division by zero\n if abs(denominator) < 1e-150:\n tree.value[leaf, 0, 0] = 0.0\n else:\n tree.value[leaf, 0, 0] = numerator / denominator" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/get_init_raw_predictions", + "name": "get_init_raw_predictions", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss.get_init_raw_predictions", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/get_init_raw_predictions/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss.get_init_raw_predictions.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/get_init_raw_predictions/X", + "name": "X", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss.get_init_raw_predictions.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/get_init_raw_predictions/estimator", + "name": "estimator", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss.get_init_raw_predictions.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def get_init_raw_predictions(self, X, estimator):\n probas = estimator.predict_proba(X)\n proba_pos_class = probas[:, 1]\n eps = np.finfo(np.float32).eps\n proba_pos_class = np.clip(proba_pos_class, eps, 1 - eps)\n # according to The Elements of Statistical Learning sec. 10.5, the\n # minimizer of the exponential loss is .5 * log odds ratio. So this is\n # the equivalent to .5 * binomial_deviance.get_init_raw_predictions()\n raw_predictions = .5 * np.log(proba_pos_class / (1 - proba_pos_class))\n return raw_predictions.reshape(-1, 1).astype(np.float64)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/init_estimator", + "name": "init_estimator", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss.init_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/init_estimator/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss.init_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def init_estimator(self):\n return DummyClassifier(strategy='prior')" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/negative_gradient", + "name": "negative_gradient", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss.negative_gradient", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/negative_gradient/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss.negative_gradient.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/negative_gradient/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss.negative_gradient.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "True labels." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/negative_gradient/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss.negative_gradient.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, K)", + "default_value": "", + "description": "The raw predictions (i.e. values from the tree leaves) of the\ntree ensemble at iteration ``i - 1``." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, K)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/ExponentialLoss/negative_gradient/kargs", + "name": "kargs", + "qname": "sklearn.ensemble._gb_losses.ExponentialLoss.negative_gradient.kargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the residual (= negative gradient).", + "docstring": "Compute the residual (= negative gradient).\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.", + "code": " def negative_gradient(self, y, raw_predictions, **kargs):\n \"\"\"Compute the residual (= negative gradient).\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n True labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n \"\"\"\n y_ = -(2. * y - 1.)\n return y_ * np.exp(y_ * raw_predictions.ravel())" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/__call__", + "name": "__call__", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/__call__/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/__call__/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction.__call__.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "True labels." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/__call__/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction.__call__.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, K)", + "default_value": "", + "description": "The raw predictions (i.e. values from the tree leaves) of the\ntree ensemble." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, K)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/__call__/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction.__call__.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the Huber loss.", + "docstring": "Compute the Huber loss.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.", + "code": " def __call__(self, y, raw_predictions, sample_weight=None):\n \"\"\"Compute the Huber loss.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n True labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\n sample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.\n \"\"\"\n raw_predictions = raw_predictions.ravel()\n diff = y - raw_predictions\n gamma = self.gamma\n if gamma is None:\n if sample_weight is None:\n gamma = np.percentile(np.abs(diff), self.alpha * 100)\n else:\n gamma = _weighted_percentile(np.abs(diff), sample_weight,\n self.alpha * 100)\n\n gamma_mask = np.abs(diff) <= gamma\n if sample_weight is None:\n sq_loss = np.sum(0.5 * diff[gamma_mask] ** 2)\n lin_loss = np.sum(gamma * (np.abs(diff[~gamma_mask]) -\n gamma / 2))\n loss = (sq_loss + lin_loss) / y.shape[0]\n else:\n sq_loss = np.sum(0.5 * sample_weight[gamma_mask] *\n diff[gamma_mask] ** 2)\n lin_loss = np.sum(gamma * sample_weight[~gamma_mask] *\n (np.abs(diff[~gamma_mask]) - gamma / 2))\n loss = (sq_loss + lin_loss) / sample_weight.sum()\n return loss" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/__init__/alpha", + "name": "alpha", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction.__init__.alpha", + "default_value": "0.9", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.9", + "description": "Percentile at which to extract score." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Huber loss function for robust regression.\n\nM-Regression proposed in Friedman 2001.", + "docstring": "", + "code": " def __init__(self, alpha=0.9):\n super().__init__()\n self.alpha = alpha\n self.gamma = None" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/_update_terminal_region", + "name": "_update_terminal_region", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction._update_terminal_region", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/_update_terminal_region/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction._update_terminal_region.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/_update_terminal_region/tree", + "name": "tree", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction._update_terminal_region.tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/_update_terminal_region/terminal_regions", + "name": "terminal_regions", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction._update_terminal_region.terminal_regions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/_update_terminal_region/leaf", + "name": "leaf", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction._update_terminal_region.leaf", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/_update_terminal_region/X", + "name": "X", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction._update_terminal_region.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/_update_terminal_region/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction._update_terminal_region.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/_update_terminal_region/residual", + "name": "residual", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction._update_terminal_region.residual", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/_update_terminal_region/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction._update_terminal_region.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/_update_terminal_region/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction._update_terminal_region.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _update_terminal_region(self, tree, terminal_regions, leaf, X, y,\n residual, raw_predictions, sample_weight):\n terminal_region = np.where(terminal_regions == leaf)[0]\n sample_weight = sample_weight.take(terminal_region, axis=0)\n gamma = self.gamma\n diff = (y.take(terminal_region, axis=0)\n - raw_predictions.take(terminal_region, axis=0))\n median = _weighted_percentile(diff, sample_weight, percentile=50)\n diff_minus_median = diff - median\n tree.value[leaf, 0] = median + np.mean(\n np.sign(diff_minus_median) *\n np.minimum(np.abs(diff_minus_median), gamma))" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/init_estimator", + "name": "init_estimator", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction.init_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/init_estimator/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction.init_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def init_estimator(self):\n return DummyRegressor(strategy='quantile', quantile=.5)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/negative_gradient", + "name": "negative_gradient", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction.negative_gradient", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/negative_gradient/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction.negative_gradient.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/negative_gradient/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction.negative_gradient.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "The target labels." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/negative_gradient/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction.negative_gradient.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, K)", + "default_value": "", + "description": "The raw predictions (i.e. values from the tree leaves) of the\ntree ensemble at iteration ``i - 1``." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, K)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/negative_gradient/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction.negative_gradient.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/HuberLossFunction/negative_gradient/kargs", + "name": "kargs", + "qname": "sklearn.ensemble._gb_losses.HuberLossFunction.negative_gradient.kargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the negative gradient.", + "docstring": "Compute the negative gradient.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n The target labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.", + "code": " def negative_gradient(self, y, raw_predictions, sample_weight=None,\n **kargs):\n \"\"\"Compute the negative gradient.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n The target labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n\n sample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.\n \"\"\"\n raw_predictions = raw_predictions.ravel()\n diff = y - raw_predictions\n if sample_weight is None:\n gamma = np.percentile(np.abs(diff), self.alpha * 100)\n else:\n gamma = _weighted_percentile(np.abs(diff), sample_weight,\n self.alpha * 100)\n gamma_mask = np.abs(diff) <= gamma\n residual = np.zeros((y.shape[0],), dtype=np.float64)\n residual[gamma_mask] = diff[gamma_mask]\n residual[~gamma_mask] = gamma * np.sign(diff[~gamma_mask])\n self.gamma = gamma\n return residual" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/__call__", + "name": "__call__", + "qname": "sklearn.ensemble._gb_losses.LeastAbsoluteError.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/__call__/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.LeastAbsoluteError.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/__call__/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.LeastAbsoluteError.__call__.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "True labels." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/__call__/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.LeastAbsoluteError.__call__.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, K)", + "default_value": "", + "description": "The raw predictions (i.e. values from the tree leaves)." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, K)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/__call__/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb_losses.LeastAbsoluteError.__call__.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the least absolute error.", + "docstring": "Compute the least absolute error.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves).\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.", + "code": " def __call__(self, y, raw_predictions, sample_weight=None):\n \"\"\"Compute the least absolute error.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n True labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves).\n\n sample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.\n \"\"\"\n if sample_weight is None:\n return np.abs(y - raw_predictions.ravel()).mean()\n else:\n return (1 / sample_weight.sum() * np.sum(\n sample_weight * np.abs(y - raw_predictions.ravel())))" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/_update_terminal_region", + "name": "_update_terminal_region", + "qname": "sklearn.ensemble._gb_losses.LeastAbsoluteError._update_terminal_region", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/_update_terminal_region/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.LeastAbsoluteError._update_terminal_region.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/_update_terminal_region/tree", + "name": "tree", + "qname": "sklearn.ensemble._gb_losses.LeastAbsoluteError._update_terminal_region.tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/_update_terminal_region/terminal_regions", + "name": "terminal_regions", + "qname": "sklearn.ensemble._gb_losses.LeastAbsoluteError._update_terminal_region.terminal_regions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/_update_terminal_region/leaf", + "name": "leaf", + "qname": "sklearn.ensemble._gb_losses.LeastAbsoluteError._update_terminal_region.leaf", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/_update_terminal_region/X", + "name": "X", + "qname": "sklearn.ensemble._gb_losses.LeastAbsoluteError._update_terminal_region.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/_update_terminal_region/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.LeastAbsoluteError._update_terminal_region.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/_update_terminal_region/residual", + "name": "residual", + "qname": "sklearn.ensemble._gb_losses.LeastAbsoluteError._update_terminal_region.residual", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/_update_terminal_region/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.LeastAbsoluteError._update_terminal_region.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/_update_terminal_region/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb_losses.LeastAbsoluteError._update_terminal_region.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "LAD updates terminal regions to median estimates.", + "docstring": "LAD updates terminal regions to median estimates.", + "code": " def _update_terminal_region(self, tree, terminal_regions, leaf, X, y,\n residual, raw_predictions, sample_weight):\n \"\"\"LAD updates terminal regions to median estimates.\"\"\"\n terminal_region = np.where(terminal_regions == leaf)[0]\n sample_weight = sample_weight.take(terminal_region, axis=0)\n diff = (y.take(terminal_region, axis=0) -\n raw_predictions.take(terminal_region, axis=0))\n tree.value[leaf, 0, 0] = _weighted_percentile(diff, sample_weight,\n percentile=50)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/init_estimator", + "name": "init_estimator", + "qname": "sklearn.ensemble._gb_losses.LeastAbsoluteError.init_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/init_estimator/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.LeastAbsoluteError.init_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def init_estimator(self):\n return DummyRegressor(strategy='quantile', quantile=.5)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/negative_gradient", + "name": "negative_gradient", + "qname": "sklearn.ensemble._gb_losses.LeastAbsoluteError.negative_gradient", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/negative_gradient/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.LeastAbsoluteError.negative_gradient.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/negative_gradient/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.LeastAbsoluteError.negative_gradient.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "The target labels." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/negative_gradient/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.LeastAbsoluteError.negative_gradient.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, K)", + "default_value": "", + "description": "The raw predictions (i.e. values from the tree leaves) of the\ntree ensemble at iteration ``i - 1``." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, K)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastAbsoluteError/negative_gradient/kargs", + "name": "kargs", + "qname": "sklearn.ensemble._gb_losses.LeastAbsoluteError.negative_gradient.kargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the negative gradient.\n\n1.0 if y - raw_predictions > 0.0 else -1.0", + "docstring": "Compute the negative gradient.\n\n1.0 if y - raw_predictions > 0.0 else -1.0\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n The target labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.", + "code": " def negative_gradient(self, y, raw_predictions, **kargs):\n \"\"\"Compute the negative gradient.\n\n 1.0 if y - raw_predictions > 0.0 else -1.0\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n The target labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n \"\"\"\n raw_predictions = raw_predictions.ravel()\n return 2 * (y - raw_predictions > 0) - 1" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/__call__", + "name": "__call__", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/__call__/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/__call__/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError.__call__.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "True labels." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/__call__/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError.__call__.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, K)", + "default_value": "", + "description": "The raw predictions (i.e. values from the tree leaves)." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, K)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/__call__/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError.__call__.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the least squares loss.", + "docstring": "Compute the least squares loss.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves).\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.", + "code": " def __call__(self, y, raw_predictions, sample_weight=None):\n \"\"\"Compute the least squares loss.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n True labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves).\n\n sample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.\n \"\"\"\n if sample_weight is None:\n return np.mean((y - raw_predictions.ravel()) ** 2)\n else:\n return (1 / sample_weight.sum() * np.sum(\n sample_weight * ((y - raw_predictions.ravel()) ** 2)))" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/_update_terminal_region", + "name": "_update_terminal_region", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError._update_terminal_region", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/_update_terminal_region/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError._update_terminal_region.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/_update_terminal_region/tree", + "name": "tree", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError._update_terminal_region.tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/_update_terminal_region/terminal_regions", + "name": "terminal_regions", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError._update_terminal_region.terminal_regions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/_update_terminal_region/leaf", + "name": "leaf", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError._update_terminal_region.leaf", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/_update_terminal_region/X", + "name": "X", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError._update_terminal_region.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/_update_terminal_region/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError._update_terminal_region.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/_update_terminal_region/residual", + "name": "residual", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError._update_terminal_region.residual", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/_update_terminal_region/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError._update_terminal_region.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/_update_terminal_region/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError._update_terminal_region.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _update_terminal_region(self, tree, terminal_regions, leaf, X, y,\n residual, raw_predictions, sample_weight):\n pass" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/init_estimator", + "name": "init_estimator", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError.init_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/init_estimator/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError.init_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def init_estimator(self):\n return DummyRegressor(strategy='mean')" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/negative_gradient", + "name": "negative_gradient", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError.negative_gradient", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/negative_gradient/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError.negative_gradient.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/negative_gradient/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError.negative_gradient.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "The target labels." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/negative_gradient/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError.negative_gradient.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "The raw predictions (i.e. values from the tree leaves) of the\ntree ensemble at iteration ``i - 1``." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/negative_gradient/kargs", + "name": "kargs", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError.negative_gradient.kargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute half of the negative gradient.", + "docstring": "Compute half of the negative gradient.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n The target labels.\n\nraw_predictions : ndarray of shape (n_samples,)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.", + "code": " def negative_gradient(self, y, raw_predictions, **kargs):\n \"\"\"Compute half of the negative gradient.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n The target labels.\n\n raw_predictions : ndarray of shape (n_samples,)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n \"\"\"\n return y - raw_predictions.ravel()" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/update_terminal_regions", + "name": "update_terminal_regions", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError.update_terminal_regions", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/update_terminal_regions/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError.update_terminal_regions.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/update_terminal_regions/tree", + "name": "tree", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError.update_terminal_regions.tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "tree.Tree", + "default_value": "", + "description": "The tree object." + }, + "type": { + "kind": "NamedType", + "name": "tree.Tree" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/update_terminal_regions/X", + "name": "X", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError.update_terminal_regions.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "The data array." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/update_terminal_regions/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError.update_terminal_regions.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "The target labels." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/update_terminal_regions/residual", + "name": "residual", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError.update_terminal_regions.residual", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "The residuals (usually the negative gradient)." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/update_terminal_regions/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError.update_terminal_regions.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, K)", + "default_value": "", + "description": "The raw predictions (i.e. values from the tree leaves) of the\ntree ensemble at iteration ``i - 1``." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, K)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/update_terminal_regions/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError.update_terminal_regions.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n,)", + "default_value": "", + "description": "The weight of each sample." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/update_terminal_regions/sample_mask", + "name": "sample_mask", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError.update_terminal_regions.sample_mask", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n,)", + "default_value": "", + "description": "The sample mask to be used." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/update_terminal_regions/learning_rate", + "name": "learning_rate", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError.update_terminal_regions.learning_rate", + "default_value": "0.1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "Learning rate shrinks the contribution of each tree by\n ``learning_rate``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LeastSquaresError/update_terminal_regions/k", + "name": "k", + "qname": "sklearn.ensemble._gb_losses.LeastSquaresError.update_terminal_regions.k", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "The index of the estimator being updated." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Least squares does not need to update terminal regions.\n\nBut it has to update the predictions.", + "docstring": "Least squares does not need to update terminal regions.\n\nBut it has to update the predictions.\n\nParameters\n----------\ntree : tree.Tree\n The tree object.\nX : ndarray of shape (n_samples, n_features)\n The data array.\ny : ndarray of shape (n_samples,)\n The target labels.\nresidual : ndarray of shape (n_samples,)\n The residuals (usually the negative gradient).\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\nsample_weight : ndarray of shape (n,)\n The weight of each sample.\nsample_mask : ndarray of shape (n,)\n The sample mask to be used.\nlearning_rate : float, default=0.1\n Learning rate shrinks the contribution of each tree by\n ``learning_rate``.\nk : int, default=0\n The index of the estimator being updated.", + "code": " def update_terminal_regions(self, tree, X, y, residual, raw_predictions,\n sample_weight, sample_mask,\n learning_rate=0.1, k=0):\n \"\"\"Least squares does not need to update terminal regions.\n\n But it has to update the predictions.\n\n Parameters\n ----------\n tree : tree.Tree\n The tree object.\n X : ndarray of shape (n_samples, n_features)\n The data array.\n y : ndarray of shape (n_samples,)\n The target labels.\n residual : ndarray of shape (n_samples,)\n The residuals (usually the negative gradient).\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n sample_weight : ndarray of shape (n,)\n The weight of each sample.\n sample_mask : ndarray of shape (n,)\n The sample mask to be used.\n learning_rate : float, default=0.1\n Learning rate shrinks the contribution of each tree by\n ``learning_rate``.\n k : int, default=0\n The index of the estimator being updated.\n \"\"\"\n # update predictions\n raw_predictions[:, k] += learning_rate * tree.predict(X).ravel()" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/__call__", + "name": "__call__", + "qname": "sklearn.ensemble._gb_losses.LossFunction.__call__", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/__call__/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.LossFunction.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/__call__/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.LossFunction.__call__.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "True labels." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/__call__/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.LossFunction.__call__.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, K)", + "default_value": "", + "description": "The raw predictions (i.e. values from the tree leaves)." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, K)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/__call__/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb_losses.LossFunction.__call__.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the loss.", + "docstring": "Compute the loss.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves).\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.", + "code": " @abstractmethod\n def __call__(self, y, raw_predictions, sample_weight=None):\n \"\"\"Compute the loss.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n True labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves).\n\n sample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.\n \"\"\"" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._gb_losses.LossFunction.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.LossFunction.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/__init__/n_classes", + "name": "n_classes", + "qname": "sklearn.ensemble._gb_losses.LossFunction.__init__.n_classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of classes." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Abstract base class for various loss functions.", + "docstring": "", + "code": " def __init__(self, n_classes):\n self.K = n_classes" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/_update_terminal_region", + "name": "_update_terminal_region", + "qname": "sklearn.ensemble._gb_losses.LossFunction._update_terminal_region", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/_update_terminal_region/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.LossFunction._update_terminal_region.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/_update_terminal_region/tree", + "name": "tree", + "qname": "sklearn.ensemble._gb_losses.LossFunction._update_terminal_region.tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/_update_terminal_region/terminal_regions", + "name": "terminal_regions", + "qname": "sklearn.ensemble._gb_losses.LossFunction._update_terminal_region.terminal_regions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/_update_terminal_region/leaf", + "name": "leaf", + "qname": "sklearn.ensemble._gb_losses.LossFunction._update_terminal_region.leaf", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/_update_terminal_region/X", + "name": "X", + "qname": "sklearn.ensemble._gb_losses.LossFunction._update_terminal_region.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/_update_terminal_region/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.LossFunction._update_terminal_region.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/_update_terminal_region/residual", + "name": "residual", + "qname": "sklearn.ensemble._gb_losses.LossFunction._update_terminal_region.residual", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/_update_terminal_region/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.LossFunction._update_terminal_region.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/_update_terminal_region/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb_losses.LossFunction._update_terminal_region.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Template method for updating terminal regions (i.e., leaves).", + "docstring": "Template method for updating terminal regions (i.e., leaves).", + "code": " @abstractmethod\n def _update_terminal_region(self, tree, terminal_regions, leaf, X, y,\n residual, raw_predictions, sample_weight):\n \"\"\"Template method for updating terminal regions (i.e., leaves).\"\"\"" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/get_init_raw_predictions", + "name": "get_init_raw_predictions", + "qname": "sklearn.ensemble._gb_losses.LossFunction.get_init_raw_predictions", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/get_init_raw_predictions/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.LossFunction.get_init_raw_predictions.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/get_init_raw_predictions/X", + "name": "X", + "qname": "sklearn.ensemble._gb_losses.LossFunction.get_init_raw_predictions.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "The data array." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/get_init_raw_predictions/estimator", + "name": "estimator", + "qname": "sklearn.ensemble._gb_losses.LossFunction.get_init_raw_predictions.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "The estimator to use to compute the predictions." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the initial raw predictions.", + "docstring": "Return the initial raw predictions.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n The data array.\nestimator : object\n The estimator to use to compute the predictions.\n\nReturns\n-------\nraw_predictions : ndarray of shape (n_samples, K)\n The initial raw predictions. K is equal to 1 for binary\n classification and regression, and equal to the number of classes\n for multiclass classification. ``raw_predictions`` is casted\n into float64.", + "code": " @abstractmethod\n def get_init_raw_predictions(self, X, estimator):\n \"\"\"Return the initial raw predictions.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n The data array.\n estimator : object\n The estimator to use to compute the predictions.\n\n Returns\n -------\n raw_predictions : ndarray of shape (n_samples, K)\n The initial raw predictions. K is equal to 1 for binary\n classification and regression, and equal to the number of classes\n for multiclass classification. ``raw_predictions`` is casted\n into float64.\n \"\"\"\n pass" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/init_estimator", + "name": "init_estimator", + "qname": "sklearn.ensemble._gb_losses.LossFunction.init_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/init_estimator/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.LossFunction.init_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Default ``init`` estimator for loss function.", + "docstring": "Default ``init`` estimator for loss function. ", + "code": " def init_estimator(self):\n \"\"\"Default ``init`` estimator for loss function. \"\"\"\n raise NotImplementedError()" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/negative_gradient", + "name": "negative_gradient", + "qname": "sklearn.ensemble._gb_losses.LossFunction.negative_gradient", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/negative_gradient/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.LossFunction.negative_gradient.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/negative_gradient/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.LossFunction.negative_gradient.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "The target labels." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/negative_gradient/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.LossFunction.negative_gradient.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, K)", + "default_value": "", + "description": "The raw predictions (i.e. values from the tree leaves) of the\ntree ensemble at iteration ``i - 1``." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, K)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/negative_gradient/kargs", + "name": "kargs", + "qname": "sklearn.ensemble._gb_losses.LossFunction.negative_gradient.kargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the negative gradient.", + "docstring": "Compute the negative gradient.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n The target labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.", + "code": " @abstractmethod\n def negative_gradient(self, y, raw_predictions, **kargs):\n \"\"\"Compute the negative gradient.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n The target labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n \"\"\"" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/update_terminal_regions", + "name": "update_terminal_regions", + "qname": "sklearn.ensemble._gb_losses.LossFunction.update_terminal_regions", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/update_terminal_regions/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.LossFunction.update_terminal_regions.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/update_terminal_regions/tree", + "name": "tree", + "qname": "sklearn.ensemble._gb_losses.LossFunction.update_terminal_regions.tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "tree.Tree", + "default_value": "", + "description": "The tree object." + }, + "type": { + "kind": "NamedType", + "name": "tree.Tree" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/update_terminal_regions/X", + "name": "X", + "qname": "sklearn.ensemble._gb_losses.LossFunction.update_terminal_regions.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "The data array." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/update_terminal_regions/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.LossFunction.update_terminal_regions.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "The target labels." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/update_terminal_regions/residual", + "name": "residual", + "qname": "sklearn.ensemble._gb_losses.LossFunction.update_terminal_regions.residual", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "The residuals (usually the negative gradient)." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/update_terminal_regions/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.LossFunction.update_terminal_regions.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, K)", + "default_value": "", + "description": "The raw predictions (i.e. values from the tree leaves) of the\ntree ensemble at iteration ``i - 1``." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, K)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/update_terminal_regions/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb_losses.LossFunction.update_terminal_regions.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "The weight of each sample." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/update_terminal_regions/sample_mask", + "name": "sample_mask", + "qname": "sklearn.ensemble._gb_losses.LossFunction.update_terminal_regions.sample_mask", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "The sample mask to be used." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/update_terminal_regions/learning_rate", + "name": "learning_rate", + "qname": "sklearn.ensemble._gb_losses.LossFunction.update_terminal_regions.learning_rate", + "default_value": "0.1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "Learning rate shrinks the contribution of each tree by\n ``learning_rate``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/LossFunction/update_terminal_regions/k", + "name": "k", + "qname": "sklearn.ensemble._gb_losses.LossFunction.update_terminal_regions.k", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "The index of the estimator being updated." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Update the terminal regions (=leaves) of the given tree and\nupdates the current predictions of the model. Traverses tree\nand invokes template method `_update_terminal_region`.", + "docstring": "Update the terminal regions (=leaves) of the given tree and\nupdates the current predictions of the model. Traverses tree\nand invokes template method `_update_terminal_region`.\n\nParameters\n----------\ntree : tree.Tree\n The tree object.\nX : ndarray of shape (n_samples, n_features)\n The data array.\ny : ndarray of shape (n_samples,)\n The target labels.\nresidual : ndarray of shape (n_samples,)\n The residuals (usually the negative gradient).\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\nsample_weight : ndarray of shape (n_samples,)\n The weight of each sample.\nsample_mask : ndarray of shape (n_samples,)\n The sample mask to be used.\nlearning_rate : float, default=0.1\n Learning rate shrinks the contribution of each tree by\n ``learning_rate``.\nk : int, default=0\n The index of the estimator being updated.", + "code": " def update_terminal_regions(self, tree, X, y, residual, raw_predictions,\n sample_weight, sample_mask,\n learning_rate=0.1, k=0):\n \"\"\"Update the terminal regions (=leaves) of the given tree and\n updates the current predictions of the model. Traverses tree\n and invokes template method `_update_terminal_region`.\n\n Parameters\n ----------\n tree : tree.Tree\n The tree object.\n X : ndarray of shape (n_samples, n_features)\n The data array.\n y : ndarray of shape (n_samples,)\n The target labels.\n residual : ndarray of shape (n_samples,)\n The residuals (usually the negative gradient).\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n sample_weight : ndarray of shape (n_samples,)\n The weight of each sample.\n sample_mask : ndarray of shape (n_samples,)\n The sample mask to be used.\n learning_rate : float, default=0.1\n Learning rate shrinks the contribution of each tree by\n ``learning_rate``.\n k : int, default=0\n The index of the estimator being updated.\n\n \"\"\"\n # compute leaf for each sample in ``X``.\n terminal_regions = tree.apply(X)\n\n # mask all which are not in sample mask.\n masked_terminal_regions = terminal_regions.copy()\n masked_terminal_regions[~sample_mask] = -1\n\n # update each leaf (= perform line search)\n for leaf in np.where(tree.children_left == TREE_LEAF)[0]:\n self._update_terminal_region(tree, masked_terminal_regions,\n leaf, X, y, residual,\n raw_predictions[:, k], sample_weight)\n\n # update predictions (both in-bag and out-of-bag)\n raw_predictions[:, k] += \\\n learning_rate * tree.value[:, 0, 0].take(terminal_regions, axis=0)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/__call__", + "name": "__call__", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/__call__/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/__call__/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance.__call__.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "True labels." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/__call__/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance.__call__.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, K)", + "default_value": "", + "description": "The raw predictions (i.e. values from the tree leaves) of the\ntree ensemble." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, K)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/__call__/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance.__call__.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the Multinomial deviance.", + "docstring": "Compute the Multinomial deviance.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.", + "code": " def __call__(self, y, raw_predictions, sample_weight=None):\n \"\"\"Compute the Multinomial deviance.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n True labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\n sample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.\n \"\"\"\n # create one-hot label encoding\n Y = np.zeros((y.shape[0], self.K), dtype=np.float64)\n for k in range(self.K):\n Y[:, k] = y == k\n\n return np.average(\n -1 * (Y * raw_predictions).sum(axis=1) +\n logsumexp(raw_predictions, axis=1),\n weights=sample_weight\n )" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/__init__/n_classes", + "name": "n_classes", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance.__init__.n_classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of classes." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Multinomial deviance loss function for multi-class classification.\n\nFor multi-class classification we need to fit ``n_classes`` trees at\neach stage.", + "docstring": "", + "code": " def __init__(self, n_classes):\n if n_classes < 3:\n raise ValueError(\"{0:s} requires more than 2 classes.\".format(\n self.__class__.__name__))\n super().__init__(n_classes)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/_raw_prediction_to_decision", + "name": "_raw_prediction_to_decision", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance._raw_prediction_to_decision", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/_raw_prediction_to_decision/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance._raw_prediction_to_decision.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/_raw_prediction_to_decision/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance._raw_prediction_to_decision.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _raw_prediction_to_decision(self, raw_predictions):\n proba = self._raw_prediction_to_proba(raw_predictions)\n return np.argmax(proba, axis=1)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/_raw_prediction_to_proba", + "name": "_raw_prediction_to_proba", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance._raw_prediction_to_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/_raw_prediction_to_proba/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance._raw_prediction_to_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/_raw_prediction_to_proba/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance._raw_prediction_to_proba.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _raw_prediction_to_proba(self, raw_predictions):\n return np.nan_to_num(\n np.exp(raw_predictions -\n (logsumexp(raw_predictions, axis=1)[:, np.newaxis])))" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/_update_terminal_region", + "name": "_update_terminal_region", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance._update_terminal_region", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/_update_terminal_region/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance._update_terminal_region.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/_update_terminal_region/tree", + "name": "tree", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance._update_terminal_region.tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/_update_terminal_region/terminal_regions", + "name": "terminal_regions", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance._update_terminal_region.terminal_regions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/_update_terminal_region/leaf", + "name": "leaf", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance._update_terminal_region.leaf", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/_update_terminal_region/X", + "name": "X", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance._update_terminal_region.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/_update_terminal_region/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance._update_terminal_region.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/_update_terminal_region/residual", + "name": "residual", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance._update_terminal_region.residual", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/_update_terminal_region/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance._update_terminal_region.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/_update_terminal_region/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance._update_terminal_region.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Make a single Newton-Raphson step.", + "docstring": "Make a single Newton-Raphson step. ", + "code": " def _update_terminal_region(self, tree, terminal_regions, leaf, X, y,\n residual, raw_predictions, sample_weight):\n \"\"\"Make a single Newton-Raphson step. \"\"\"\n terminal_region = np.where(terminal_regions == leaf)[0]\n residual = residual.take(terminal_region, axis=0)\n y = y.take(terminal_region, axis=0)\n sample_weight = sample_weight.take(terminal_region, axis=0)\n\n numerator = np.sum(sample_weight * residual)\n numerator *= (self.K - 1) / self.K\n\n denominator = np.sum(sample_weight * (y - residual) *\n (1 - y + residual))\n\n # prevents overflow and division by zero\n if abs(denominator) < 1e-150:\n tree.value[leaf, 0, 0] = 0.0\n else:\n tree.value[leaf, 0, 0] = numerator / denominator" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/get_init_raw_predictions", + "name": "get_init_raw_predictions", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance.get_init_raw_predictions", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/get_init_raw_predictions/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance.get_init_raw_predictions.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/get_init_raw_predictions/X", + "name": "X", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance.get_init_raw_predictions.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/get_init_raw_predictions/estimator", + "name": "estimator", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance.get_init_raw_predictions.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def get_init_raw_predictions(self, X, estimator):\n probas = estimator.predict_proba(X)\n eps = np.finfo(np.float32).eps\n probas = np.clip(probas, eps, 1 - eps)\n raw_predictions = np.log(probas).astype(np.float64)\n return raw_predictions" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/init_estimator", + "name": "init_estimator", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance.init_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/init_estimator/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance.init_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def init_estimator(self):\n return DummyClassifier(strategy='prior')" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/negative_gradient", + "name": "negative_gradient", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance.negative_gradient", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/negative_gradient/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance.negative_gradient.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/negative_gradient/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance.negative_gradient.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "The target labels." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/negative_gradient/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance.negative_gradient.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, K)", + "default_value": "", + "description": "The raw predictions (i.e. values from the tree leaves) of the\ntree ensemble at iteration ``i - 1``." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, K)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/negative_gradient/k", + "name": "k", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance.negative_gradient.k", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "The index of the class." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/MultinomialDeviance/negative_gradient/kwargs", + "name": "kwargs", + "qname": "sklearn.ensemble._gb_losses.MultinomialDeviance.negative_gradient.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute negative gradient for the ``k``-th class.", + "docstring": "Compute negative gradient for the ``k``-th class.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n The target labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n\nk : int, default=0\n The index of the class.", + "code": " def negative_gradient(self, y, raw_predictions, k=0, **kwargs):\n \"\"\"Compute negative gradient for the ``k``-th class.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n The target labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n\n k : int, default=0\n The index of the class.\n \"\"\"\n return y - np.nan_to_num(np.exp(raw_predictions[:, k] -\n logsumexp(raw_predictions, axis=1)))" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/__call__", + "name": "__call__", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/__call__/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/__call__/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction.__call__.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "True labels." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/__call__/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction.__call__.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, K)", + "default_value": "", + "description": "The raw predictions (i.e. values from the tree leaves) of the\ntree ensemble." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, K)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/__call__/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction.__call__.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the Quantile loss.", + "docstring": "Compute the Quantile loss.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n True labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.", + "code": " def __call__(self, y, raw_predictions, sample_weight=None):\n \"\"\"Compute the Quantile loss.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n True labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble.\n\n sample_weight : ndarray of shape (n_samples,), default=None\n Sample weights.\n \"\"\"\n raw_predictions = raw_predictions.ravel()\n diff = y - raw_predictions\n alpha = self.alpha\n\n mask = y > raw_predictions\n if sample_weight is None:\n loss = (alpha * diff[mask].sum() -\n (1 - alpha) * diff[~mask].sum()) / y.shape[0]\n else:\n loss = ((alpha * np.sum(sample_weight[mask] * diff[mask]) -\n (1 - alpha) * np.sum(sample_weight[~mask] *\n diff[~mask])) / sample_weight.sum())\n return loss" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/__init__/alpha", + "name": "alpha", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction.__init__.alpha", + "default_value": "0.9", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.9", + "description": "The percentile." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Loss function for quantile regression.\n\nQuantile regression allows to estimate the percentiles\nof the conditional distribution of the target.", + "docstring": "", + "code": " def __init__(self, alpha=0.9):\n super().__init__()\n self.alpha = alpha\n self.percentile = alpha * 100" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/_update_terminal_region", + "name": "_update_terminal_region", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction._update_terminal_region", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/_update_terminal_region/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction._update_terminal_region.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/_update_terminal_region/tree", + "name": "tree", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction._update_terminal_region.tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/_update_terminal_region/terminal_regions", + "name": "terminal_regions", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction._update_terminal_region.terminal_regions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/_update_terminal_region/leaf", + "name": "leaf", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction._update_terminal_region.leaf", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/_update_terminal_region/X", + "name": "X", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction._update_terminal_region.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/_update_terminal_region/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction._update_terminal_region.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/_update_terminal_region/residual", + "name": "residual", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction._update_terminal_region.residual", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/_update_terminal_region/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction._update_terminal_region.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/_update_terminal_region/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction._update_terminal_region.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _update_terminal_region(self, tree, terminal_regions, leaf, X, y,\n residual, raw_predictions, sample_weight):\n terminal_region = np.where(terminal_regions == leaf)[0]\n diff = (y.take(terminal_region, axis=0)\n - raw_predictions.take(terminal_region, axis=0))\n sample_weight = sample_weight.take(terminal_region, axis=0)\n\n val = _weighted_percentile(diff, sample_weight, self.percentile)\n tree.value[leaf, 0] = val" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/init_estimator", + "name": "init_estimator", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction.init_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/init_estimator/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction.init_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def init_estimator(self):\n return DummyRegressor(strategy='quantile', quantile=self.alpha)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/negative_gradient", + "name": "negative_gradient", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction.negative_gradient", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/negative_gradient/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction.negative_gradient.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/negative_gradient/y", + "name": "y", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction.negative_gradient.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "The target labels." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/negative_gradient/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction.negative_gradient.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, K)", + "default_value": "", + "description": "The raw predictions (i.e. values from the tree leaves) of the\ntree ensemble at iteration ``i - 1``." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, K)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/QuantileLossFunction/negative_gradient/kargs", + "name": "kargs", + "qname": "sklearn.ensemble._gb_losses.QuantileLossFunction.negative_gradient.kargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the negative gradient.", + "docstring": "Compute the negative gradient.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n The target labels.\n\nraw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.", + "code": " def negative_gradient(self, y, raw_predictions, **kargs):\n \"\"\"Compute the negative gradient.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n The target labels.\n\n raw_predictions : ndarray of shape (n_samples, K)\n The raw predictions (i.e. values from the tree leaves) of the\n tree ensemble at iteration ``i - 1``.\n \"\"\"\n alpha = self.alpha\n raw_predictions = raw_predictions.ravel()\n mask = y > raw_predictions\n return (alpha * mask) - ((1 - alpha) * ~mask)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/RegressionLossFunction/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._gb_losses.RegressionLossFunction.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/RegressionLossFunction/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.RegressionLossFunction.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base class for regression loss functions.", + "docstring": "", + "code": " def __init__(self):\n super().__init__(n_classes=1)" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/RegressionLossFunction/check_init_estimator", + "name": "check_init_estimator", + "qname": "sklearn.ensemble._gb_losses.RegressionLossFunction.check_init_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/RegressionLossFunction/check_init_estimator/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.RegressionLossFunction.check_init_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/RegressionLossFunction/check_init_estimator/estimator", + "name": "estimator", + "qname": "sklearn.ensemble._gb_losses.RegressionLossFunction.check_init_estimator.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "The init estimator to check." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Make sure estimator has the required fit and predict methods.", + "docstring": "Make sure estimator has the required fit and predict methods.\n\nParameters\n----------\nestimator : object\n The init estimator to check.", + "code": " def check_init_estimator(self, estimator):\n \"\"\"Make sure estimator has the required fit and predict methods.\n\n Parameters\n ----------\n estimator : object\n The init estimator to check.\n \"\"\"\n if not (hasattr(estimator, 'fit') and hasattr(estimator, 'predict')):\n raise ValueError(\n \"The init parameter must be a valid estimator and \"\n \"support both fit and predict.\"\n )" + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/RegressionLossFunction/get_init_raw_predictions", + "name": "get_init_raw_predictions", + "qname": "sklearn.ensemble._gb_losses.RegressionLossFunction.get_init_raw_predictions", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/RegressionLossFunction/get_init_raw_predictions/self", + "name": "self", + "qname": "sklearn.ensemble._gb_losses.RegressionLossFunction.get_init_raw_predictions.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/RegressionLossFunction/get_init_raw_predictions/X", + "name": "X", + "qname": "sklearn.ensemble._gb_losses.RegressionLossFunction.get_init_raw_predictions.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._gb_losses/RegressionLossFunction/get_init_raw_predictions/estimator", + "name": "estimator", + "qname": "sklearn.ensemble._gb_losses.RegressionLossFunction.get_init_raw_predictions.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def get_init_raw_predictions(self, X, estimator):\n predictions = estimator.predict(X)\n return predictions.reshape(-1, 1).astype(np.float64)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_BinMapper/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._hist_gradient_boosting.binning._BinMapper.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_BinMapper/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.binning._BinMapper.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_BinMapper/__init__/n_bins", + "name": "n_bins", + "qname": "sklearn.ensemble._hist_gradient_boosting.binning._BinMapper.__init__.n_bins", + "default_value": "256", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "256", + "description": "The maximum number of bins to use (including the bin for missing\nvalues). Should be in [3, 256]. Non-missing values are binned on\n``max_bins = n_bins - 1`` bins. The last bin is always reserved for\nmissing values. If for a given feature the number of unique values is\nless than ``max_bins``, then those unique values will be used to\ncompute the bin thresholds, instead of the quantiles. For categorical\nfeatures indicated by ``is_categorical``, the docstring for\n``is_categorical`` details on this procedure." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_BinMapper/__init__/subsample", + "name": "subsample", + "qname": "sklearn.ensemble._hist_gradient_boosting.binning._BinMapper.__init__.subsample", + "default_value": "int(200000.0)", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or None", + "default_value": "2e5", + "description": "If ``n_samples > subsample``, then ``sub_samples`` samples will be\nrandomly chosen to compute the quantiles. If ``None``, the whole data\nis used." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_BinMapper/__init__/is_categorical", + "name": "is_categorical", + "qname": "sklearn.ensemble._hist_gradient_boosting.binning._BinMapper.__init__.is_categorical", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of bool of shape (n_features,)", + "default_value": "None", + "description": "Indicates categorical features. By default, all features are\nconsidered continuous." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of bool of shape (n_features,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_BinMapper/__init__/known_categories", + "name": "known_categories", + "qname": "sklearn.ensemble._hist_gradient_boosting.binning._BinMapper.__init__.known_categories", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list of {ndarray, None} of shape (n_features,)", + "default_value": "none", + "description": "For each categorical feature, the array indicates the set of unique\ncategorical values. These should be the possible values over all the\ndata, not just the training data. For continuous features, the\ncorresponding entry should be None." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "list of of shape (n_features,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_BinMapper/__init__/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._hist_gradient_boosting.binning._BinMapper.__init__.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transformer that maps a dataset into integer-valued bins.\n\nFor continuous features, the bins are created in a feature-wise fashion,\nusing quantiles so that each bins contains approximately the same number\nof samples. For large datasets, quantiles are computed on a subset of the\ndata to speed-up the binning, but the quantiles should remain stable.\n\nFor categorical features, the raw categorical values are expected to be\nin [0, 254] (this is not validated here though) and each category\ncorresponds to a bin. All categorical values must be known at\ninitialization: transform() doesn't know how to bin unknown categorical\nvalues. Note that transform() is only used on non-training data in the\ncase of early stopping.\n\nFeatures with a small number of values may be binned into less than\n``n_bins`` bins. The last bin (at index ``n_bins - 1``) is always reserved\nfor missing values.", + "docstring": "", + "code": " def __init__(self, n_bins=256, subsample=int(2e5), is_categorical=None,\n known_categories=None, random_state=None):\n self.n_bins = n_bins\n self.subsample = subsample\n self.is_categorical = is_categorical\n self.known_categories = known_categories\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_BinMapper/fit", + "name": "fit", + "qname": "sklearn.ensemble._hist_gradient_boosting.binning._BinMapper.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_BinMapper/fit/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.binning._BinMapper.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_BinMapper/fit/X", + "name": "X", + "qname": "sklearn.ensemble._hist_gradient_boosting.binning._BinMapper.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to bin." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_BinMapper/fit/y", + "name": "y", + "qname": "sklearn.ensemble._hist_gradient_boosting.binning._BinMapper.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit data X by computing the binning thresholds.\n\nThe last bin is reserved for missing values, whether missing values\nare present in the data or not.", + "docstring": "Fit data X by computing the binning thresholds.\n\nThe last bin is reserved for missing values, whether missing values\nare present in the data or not.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data to bin.\ny: None\n Ignored.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y=None):\n \"\"\"Fit data X by computing the binning thresholds.\n\n The last bin is reserved for missing values, whether missing values\n are present in the data or not.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data to bin.\n y: None\n Ignored.\n\n Returns\n -------\n self : object\n \"\"\"\n if not (3 <= self.n_bins <= 256):\n # min is 3: at least 2 distinct bins and a missing values bin\n raise ValueError('n_bins={} should be no smaller than 3 '\n 'and no larger than 256.'.format(self.n_bins))\n\n X = check_array(X, dtype=[X_DTYPE], force_all_finite=False)\n max_bins = self.n_bins - 1\n\n rng = check_random_state(self.random_state)\n if self.subsample is not None and X.shape[0] > self.subsample:\n subset = rng.choice(X.shape[0], self.subsample, replace=False)\n X = X.take(subset, axis=0)\n\n if self.is_categorical is None:\n self.is_categorical_ = np.zeros(X.shape[1], dtype=np.uint8)\n else:\n self.is_categorical_ = np.asarray(self.is_categorical,\n dtype=np.uint8)\n\n n_features = X.shape[1]\n known_categories = self.known_categories\n if known_categories is None:\n known_categories = [None] * n_features\n\n # validate is_categorical and known_categories parameters\n for f_idx in range(n_features):\n is_categorical = self.is_categorical_[f_idx]\n known_cats = known_categories[f_idx]\n if is_categorical and known_cats is None:\n raise ValueError(\n f\"Known categories for feature {f_idx} must be provided.\"\n )\n if not is_categorical and known_cats is not None:\n raise ValueError(\n f\"Feature {f_idx} isn't marked as a categorical feature, \"\n f\"but categories were passed.\"\n )\n\n self.missing_values_bin_idx_ = self.n_bins - 1\n\n self.bin_thresholds_ = []\n n_bins_non_missing = []\n\n for f_idx in range(n_features):\n if not self.is_categorical_[f_idx]:\n thresholds = _find_binning_thresholds(X[:, f_idx], max_bins)\n n_bins_non_missing.append(thresholds.shape[0] + 1)\n else:\n # Since categories are assumed to be encoded in\n # [0, n_cats] and since n_cats <= max_bins,\n # the thresholds *are* the unique categorical values. This will\n # lead to the correct mapping in transform()\n thresholds = known_categories[f_idx]\n n_bins_non_missing.append(thresholds.shape[0])\n\n self.bin_thresholds_.append(thresholds)\n\n self.n_bins_non_missing_ = np.array(n_bins_non_missing,\n dtype=np.uint32)\n return self" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_BinMapper/make_known_categories_bitsets", + "name": "make_known_categories_bitsets", + "qname": "sklearn.ensemble._hist_gradient_boosting.binning._BinMapper.make_known_categories_bitsets", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_BinMapper/make_known_categories_bitsets/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.binning._BinMapper.make_known_categories_bitsets.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Create bitsets of known categories.", + "docstring": "Create bitsets of known categories.\n\nReturns\n-------\n- known_cat_bitsets : ndarray of shape (n_categorical_features, 8)\n Array of bitsets of known categories, for each categorical feature.\n- f_idx_map : ndarray of shape (n_features,)\n Map from original feature index to the corresponding index in the\n known_cat_bitsets array.", + "code": " def make_known_categories_bitsets(self):\n \"\"\"Create bitsets of known categories.\n\n Returns\n -------\n - known_cat_bitsets : ndarray of shape (n_categorical_features, 8)\n Array of bitsets of known categories, for each categorical feature.\n - f_idx_map : ndarray of shape (n_features,)\n Map from original feature index to the corresponding index in the\n known_cat_bitsets array.\n \"\"\"\n\n categorical_features_indices = np.flatnonzero(self.is_categorical_)\n\n n_features = self.is_categorical_.size\n n_categorical_features = categorical_features_indices.size\n\n f_idx_map = np.zeros(n_features, dtype=np.uint32)\n f_idx_map[categorical_features_indices] = np.arange(\n n_categorical_features, dtype=np.uint32)\n\n known_categories = self.bin_thresholds_\n\n known_cat_bitsets = np.zeros((n_categorical_features, 8),\n dtype=X_BITSET_INNER_DTYPE)\n\n # TODO: complexity is O(n_categorical_features * 255). Maybe this is\n # worth cythonizing\n for mapped_f_idx, f_idx in enumerate(categorical_features_indices):\n for raw_cat_val in known_categories[f_idx]:\n set_bitset_memoryview(known_cat_bitsets[mapped_f_idx],\n raw_cat_val)\n\n return known_cat_bitsets, f_idx_map" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_BinMapper/transform", + "name": "transform", + "qname": "sklearn.ensemble._hist_gradient_boosting.binning._BinMapper.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_BinMapper/transform/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.binning._BinMapper.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_BinMapper/transform/X", + "name": "X", + "qname": "sklearn.ensemble._hist_gradient_boosting.binning._BinMapper.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to bin." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Bin data X.\n\nMissing values will be mapped to the last bin.\n\nFor categorical features, the mapping will be incorrect for unknown\ncategories. Since the BinMapper is given known_categories of the\nentire training data (i.e. before the call to train_test_split() in\ncase of early-stopping), this never happens.", + "docstring": "Bin data X.\n\nMissing values will be mapped to the last bin.\n\nFor categorical features, the mapping will be incorrect for unknown\ncategories. Since the BinMapper is given known_categories of the\nentire training data (i.e. before the call to train_test_split() in\ncase of early-stopping), this never happens.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data to bin.\n\nReturns\n-------\nX_binned : array-like of shape (n_samples, n_features)\n The binned data (fortran-aligned).", + "code": " def transform(self, X):\n \"\"\"Bin data X.\n\n Missing values will be mapped to the last bin.\n\n For categorical features, the mapping will be incorrect for unknown\n categories. Since the BinMapper is given known_categories of the\n entire training data (i.e. before the call to train_test_split() in\n case of early-stopping), this never happens.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data to bin.\n\n Returns\n -------\n X_binned : array-like of shape (n_samples, n_features)\n The binned data (fortran-aligned).\n \"\"\"\n X = check_array(X, dtype=[X_DTYPE], force_all_finite=False)\n check_is_fitted(self)\n if X.shape[1] != self.n_bins_non_missing_.shape[0]:\n raise ValueError(\n 'This estimator was fitted with {} features but {} got passed '\n 'to transform()'.format(self.n_bins_non_missing_.shape[0],\n X.shape[1])\n )\n binned = np.zeros_like(X, dtype=X_BINNED_DTYPE, order='F')\n _map_to_bins(X, self.bin_thresholds_, self.missing_values_bin_idx_,\n binned)\n return binned" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_find_binning_thresholds", + "name": "_find_binning_thresholds", + "qname": "sklearn.ensemble._hist_gradient_boosting.binning._find_binning_thresholds", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_find_binning_thresholds/col_data", + "name": "col_data", + "qname": "sklearn.ensemble._hist_gradient_boosting.binning._find_binning_thresholds.col_data", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples,)", + "default_value": "", + "description": "The continuous feature to bin." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.binning/_find_binning_thresholds/max_bins", + "name": "max_bins", + "qname": "sklearn.ensemble._hist_gradient_boosting.binning._find_binning_thresholds.max_bins", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Extract quantiles from a continuous feature.\n\nMissing values are ignored for finding the thresholds.", + "docstring": "Extract quantiles from a continuous feature.\n\nMissing values are ignored for finding the thresholds.\n\nParameters\n----------\ncol_data : array-like, shape (n_samples,)\n The continuous feature to bin.\nmax_bins: int\n The maximum number of bins to use for non-missing values. If for a\n given feature the number of unique values is less than ``max_bins``,\n then those unique values will be used to compute the bin thresholds,\n instead of the quantiles\n\nReturn\n------\nbinning_thresholds : ndarray of shape(min(max_bins, n_unique_values) - 1,)\n The increasing numeric values that can be used to separate the bins.\n A given value x will be mapped into bin value i iff\n bining_thresholds[i - 1] < x <= binning_thresholds[i]", + "code": "def _find_binning_thresholds(col_data, max_bins):\n \"\"\"Extract quantiles from a continuous feature.\n\n Missing values are ignored for finding the thresholds.\n\n Parameters\n ----------\n col_data : array-like, shape (n_samples,)\n The continuous feature to bin.\n max_bins: int\n The maximum number of bins to use for non-missing values. If for a\n given feature the number of unique values is less than ``max_bins``,\n then those unique values will be used to compute the bin thresholds,\n instead of the quantiles\n\n Return\n ------\n binning_thresholds : ndarray of shape(min(max_bins, n_unique_values) - 1,)\n The increasing numeric values that can be used to separate the bins.\n A given value x will be mapped into bin value i iff\n bining_thresholds[i - 1] < x <= binning_thresholds[i]\n \"\"\"\n # ignore missing values when computing bin thresholds\n missing_mask = np.isnan(col_data)\n if missing_mask.any():\n col_data = col_data[~missing_mask]\n col_data = np.ascontiguousarray(col_data, dtype=X_DTYPE)\n distinct_values = np.unique(col_data)\n if len(distinct_values) <= max_bins:\n midpoints = distinct_values[:-1] + distinct_values[1:]\n midpoints *= .5\n else:\n # We sort again the data in this case. We could compute\n # approximate midpoint percentiles using the output of\n # np.unique(col_data, return_counts) instead but this is more\n # work and the performance benefit will be limited because we\n # work on a fixed-size subsample of the full data.\n percentiles = np.linspace(0, 100, num=max_bins + 1)\n percentiles = percentiles[1:-1]\n midpoints = np.percentile(col_data, percentiles,\n interpolation='midpoint').astype(X_DTYPE)\n assert midpoints.shape[0] == max_bins - 1\n\n # We avoid having +inf thresholds: +inf thresholds are only allowed in\n # a \"split on nan\" situation.\n np.clip(midpoints, a_min=None, a_max=ALMOST_INF, out=midpoints)\n return midpoints" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.__init__", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/__init__/loss", + "name": "loss", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.__init__.loss", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/__init__/learning_rate", + "name": "learning_rate", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.__init__.learning_rate", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.__init__.max_iter", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/__init__/max_leaf_nodes", + "name": "max_leaf_nodes", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.__init__.max_leaf_nodes", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/__init__/max_depth", + "name": "max_depth", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.__init__.max_depth", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/__init__/min_samples_leaf", + "name": "min_samples_leaf", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.__init__.min_samples_leaf", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/__init__/l2_regularization", + "name": "l2_regularization", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.__init__.l2_regularization", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/__init__/max_bins", + "name": "max_bins", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.__init__.max_bins", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/__init__/categorical_features", + "name": "categorical_features", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.__init__.categorical_features", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/__init__/monotonic_cst", + "name": "monotonic_cst", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.__init__.monotonic_cst", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.__init__.warm_start", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/__init__/early_stopping", + "name": "early_stopping", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.__init__.early_stopping", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/__init__/scoring", + "name": "scoring", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.__init__.scoring", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/__init__/validation_fraction", + "name": "validation_fraction", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.__init__.validation_fraction", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/__init__/n_iter_no_change", + "name": "n_iter_no_change", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.__init__.n_iter_no_change", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/__init__/tol", + "name": "tol", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.__init__.tol", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/__init__/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.__init__.verbose", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/__init__/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.__init__.random_state", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base class for histogram-based gradient boosting estimators.", + "docstring": "", + "code": " @abstractmethod\n def __init__(self, loss, *, learning_rate, max_iter, max_leaf_nodes,\n max_depth, min_samples_leaf, l2_regularization, max_bins,\n categorical_features, monotonic_cst,\n warm_start, early_stopping, scoring,\n validation_fraction, n_iter_no_change, tol, verbose,\n random_state):\n self.loss = loss\n self.learning_rate = learning_rate\n self.max_iter = max_iter\n self.max_leaf_nodes = max_leaf_nodes\n self.max_depth = max_depth\n self.min_samples_leaf = min_samples_leaf\n self.l2_regularization = l2_regularization\n self.max_bins = max_bins\n self.monotonic_cst = monotonic_cst\n self.categorical_features = categorical_features\n self.warm_start = warm_start\n self.early_stopping = early_stopping\n self.scoring = scoring\n self.validation_fraction = validation_fraction\n self.n_iter_no_change = n_iter_no_change\n self.tol = tol\n self.verbose = verbose\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_bin_data", + "name": "_bin_data", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._bin_data", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_bin_data/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._bin_data.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_bin_data/X", + "name": "X", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._bin_data.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_bin_data/is_training_data", + "name": "is_training_data", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._bin_data.is_training_data", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Bin data X.\n\nIf is_training_data, then fit the _bin_mapper attribute.\nElse, the binned data is converted to a C-contiguous array.", + "docstring": "Bin data X.\n\nIf is_training_data, then fit the _bin_mapper attribute.\nElse, the binned data is converted to a C-contiguous array.", + "code": " def _bin_data(self, X, is_training_data):\n \"\"\"Bin data X.\n\n If is_training_data, then fit the _bin_mapper attribute.\n Else, the binned data is converted to a C-contiguous array.\n \"\"\"\n\n description = 'training' if is_training_data else 'validation'\n if self.verbose:\n print(\"Binning {:.3f} GB of {} data: \".format(\n X.nbytes / 1e9, description), end=\"\", flush=True)\n tic = time()\n if is_training_data:\n X_binned = self._bin_mapper.fit_transform(X) # F-aligned array\n else:\n X_binned = self._bin_mapper.transform(X) # F-aligned array\n # We convert the array to C-contiguous since predicting is faster\n # with this layout (training is faster on F-arrays though)\n X_binned = np.ascontiguousarray(X_binned)\n toc = time()\n if self.verbose:\n duration = toc - tic\n print(\"{:.3f} s\".format(duration))\n\n return X_binned" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_categories", + "name": "_check_categories", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._check_categories", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_categories/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._check_categories.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_categories/X", + "name": "X", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._check_categories.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check and validate categorical features in X", + "docstring": "Check and validate categorical features in X\n\nReturn\n------\nis_categorical : ndarray of shape (n_features,) or None, dtype=bool\n Indicates whether a feature is categorical. If no feature is\n categorical, this is None.\nknown_categories : list of size n_features or None\n The list contains, for each feature:\n - an array of shape (n_categories,) with the unique cat values\n - None if the feature is not categorical\n None if no feature is categorical.", + "code": " def _check_categories(self, X):\n \"\"\"Check and validate categorical features in X\n\n Return\n ------\n is_categorical : ndarray of shape (n_features,) or None, dtype=bool\n Indicates whether a feature is categorical. If no feature is\n categorical, this is None.\n known_categories : list of size n_features or None\n The list contains, for each feature:\n - an array of shape (n_categories,) with the unique cat values\n - None if the feature is not categorical\n None if no feature is categorical.\n \"\"\"\n if self.categorical_features is None:\n return None, None\n\n categorical_features = np.asarray(self.categorical_features)\n\n if categorical_features.size == 0:\n return None, None\n\n if categorical_features.dtype.kind not in ('i', 'b'):\n raise ValueError(\"categorical_features must be an array-like of \"\n \"bools or array-like of ints.\")\n\n n_features = X.shape[1]\n\n # check for categorical features as indices\n if categorical_features.dtype.kind == 'i':\n if (np.max(categorical_features) >= n_features\n or np.min(categorical_features) < 0):\n raise ValueError(\"categorical_features set as integer \"\n \"indices must be in [0, n_features - 1]\")\n is_categorical = np.zeros(n_features, dtype=bool)\n is_categorical[categorical_features] = True\n else:\n if categorical_features.shape[0] != n_features:\n raise ValueError(\"categorical_features set as a boolean mask \"\n \"must have shape (n_features,), got: \"\n f\"{categorical_features.shape}\")\n is_categorical = categorical_features\n\n if not np.any(is_categorical):\n return None, None\n\n # compute the known categories in the training data. We need to do\n # that here instead of in the BinMapper because in case of early\n # stopping, the mapper only gets a fraction of the training data.\n known_categories = []\n\n for f_idx in range(n_features):\n if is_categorical[f_idx]:\n categories = np.unique(X[:, f_idx])\n missing = np.isnan(categories)\n if missing.any():\n categories = categories[~missing]\n\n if categories.size > self.max_bins:\n raise ValueError(\n f\"Categorical feature at index {f_idx} is \"\n f\"expected to have a \"\n f\"cardinality <= {self.max_bins}\"\n )\n\n if (categories >= self.max_bins).any():\n raise ValueError(\n f\"Categorical feature at index {f_idx} is \"\n f\"expected to be encoded with \"\n f\"values < {self.max_bins}\"\n )\n else:\n categories = None\n known_categories.append(categories)\n\n return is_categorical, known_categories" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_early_stopping_loss", + "name": "_check_early_stopping_loss", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._check_early_stopping_loss", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_early_stopping_loss/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._check_early_stopping_loss.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_early_stopping_loss/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._check_early_stopping_loss.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_early_stopping_loss/y_train", + "name": "y_train", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._check_early_stopping_loss.y_train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_early_stopping_loss/sample_weight_train", + "name": "sample_weight_train", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._check_early_stopping_loss.sample_weight_train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_early_stopping_loss/raw_predictions_val", + "name": "raw_predictions_val", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._check_early_stopping_loss.raw_predictions_val", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_early_stopping_loss/y_val", + "name": "y_val", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._check_early_stopping_loss.y_val", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_early_stopping_loss/sample_weight_val", + "name": "sample_weight_val", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._check_early_stopping_loss.sample_weight_val", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check if fitting should be early-stopped based on loss.\n\nScores are computed on validation data or on training data.", + "docstring": "Check if fitting should be early-stopped based on loss.\n\nScores are computed on validation data or on training data.", + "code": " def _check_early_stopping_loss(self,\n raw_predictions,\n y_train,\n sample_weight_train,\n raw_predictions_val,\n y_val,\n sample_weight_val):\n \"\"\"Check if fitting should be early-stopped based on loss.\n\n Scores are computed on validation data or on training data.\n \"\"\"\n\n self.train_score_.append(\n -self._loss(y_train, raw_predictions, sample_weight_train)\n )\n\n if self._use_validation_data:\n self.validation_score_.append(\n -self._loss(y_val, raw_predictions_val, sample_weight_val)\n )\n return self._should_stop(self.validation_score_)\n else:\n return self._should_stop(self.train_score_)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_early_stopping_scorer", + "name": "_check_early_stopping_scorer", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._check_early_stopping_scorer", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_early_stopping_scorer/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._check_early_stopping_scorer.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_early_stopping_scorer/X_binned_small_train", + "name": "X_binned_small_train", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._check_early_stopping_scorer.X_binned_small_train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_early_stopping_scorer/y_small_train", + "name": "y_small_train", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._check_early_stopping_scorer.y_small_train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_early_stopping_scorer/sample_weight_small_train", + "name": "sample_weight_small_train", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._check_early_stopping_scorer.sample_weight_small_train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_early_stopping_scorer/X_binned_val", + "name": "X_binned_val", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._check_early_stopping_scorer.X_binned_val", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_early_stopping_scorer/y_val", + "name": "y_val", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._check_early_stopping_scorer.y_val", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_check_early_stopping_scorer/sample_weight_val", + "name": "sample_weight_val", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._check_early_stopping_scorer.sample_weight_val", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check if fitting should be early-stopped based on scorer.\n\nScores are computed on validation data or on training data.", + "docstring": "Check if fitting should be early-stopped based on scorer.\n\nScores are computed on validation data or on training data.", + "code": " def _check_early_stopping_scorer(self, X_binned_small_train, y_small_train,\n sample_weight_small_train,\n X_binned_val, y_val, sample_weight_val):\n \"\"\"Check if fitting should be early-stopped based on scorer.\n\n Scores are computed on validation data or on training data.\n \"\"\"\n if is_classifier(self):\n y_small_train = self.classes_[y_small_train.astype(int)]\n\n if sample_weight_small_train is None:\n self.train_score_.append(\n self._scorer(self, X_binned_small_train, y_small_train)\n )\n else:\n self.train_score_.append(\n self._scorer(self, X_binned_small_train, y_small_train,\n sample_weight=sample_weight_small_train)\n )\n\n if self._use_validation_data:\n if is_classifier(self):\n y_val = self.classes_[y_val.astype(int)]\n if sample_weight_val is None:\n self.validation_score_.append(\n self._scorer(self, X_binned_val, y_val)\n )\n else:\n self.validation_score_.append(\n self._scorer(self, X_binned_val, y_val,\n sample_weight=sample_weight_val)\n )\n return self._should_stop(self.validation_score_)\n else:\n return self._should_stop(self.train_score_)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_clear_state", + "name": "_clear_state", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._clear_state", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_clear_state/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._clear_state.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Clear the state of the gradient boosting model.", + "docstring": "Clear the state of the gradient boosting model.", + "code": " def _clear_state(self):\n \"\"\"Clear the state of the gradient boosting model.\"\"\"\n for var in ('train_score_', 'validation_score_'):\n if hasattr(self, var):\n delattr(self, var)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_compute_partial_dependence_recursion", + "name": "_compute_partial_dependence_recursion", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._compute_partial_dependence_recursion", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_compute_partial_dependence_recursion/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._compute_partial_dependence_recursion.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_compute_partial_dependence_recursion/grid", + "name": "grid", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._compute_partial_dependence_recursion.grid", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (n_samples, n_target_features)", + "default_value": "", + "description": "The grid points on which the partial dependence should be\nevaluated." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_target_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_compute_partial_dependence_recursion/target_features", + "name": "target_features", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._compute_partial_dependence_recursion.target_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (n_target_features)", + "default_value": "", + "description": "The set of target features for which the partial dependence\nshould be evaluated." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (n_target_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fast partial dependence computation.", + "docstring": "Fast partial dependence computation.\n\nParameters\n----------\ngrid : ndarray, shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\ntarget_features : ndarray, shape (n_target_features)\n The set of target features for which the partial dependence\n should be evaluated.\n\nReturns\n-------\naveraged_predictions : ndarray, shape (n_trees_per_iteration, n_samples)\n The value of the partial dependence function on each grid point.", + "code": " def _compute_partial_dependence_recursion(self, grid, target_features):\n \"\"\"Fast partial dependence computation.\n\n Parameters\n ----------\n grid : ndarray, shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\n target_features : ndarray, shape (n_target_features)\n The set of target features for which the partial dependence\n should be evaluated.\n\n Returns\n -------\n averaged_predictions : ndarray, shape \\\n (n_trees_per_iteration, n_samples)\n The value of the partial dependence function on each grid point.\n \"\"\"\n\n if getattr(self, '_fitted_with_sw', False):\n raise NotImplementedError(\"{} does not support partial dependence \"\n \"plots with the 'recursion' method when \"\n \"sample weights were given during fit \"\n \"time.\".format(self.__class__.__name__))\n\n grid = np.asarray(grid, dtype=X_DTYPE, order='C')\n averaged_predictions = np.zeros(\n (self.n_trees_per_iteration_, grid.shape[0]), dtype=Y_DTYPE)\n\n for predictors_of_ith_iteration in self._predictors:\n for k, predictor in enumerate(predictors_of_ith_iteration):\n predictor.compute_partial_dependence(grid, target_features,\n averaged_predictions[k])\n # Note that the learning rate is already accounted for in the leaves\n # values.\n\n return averaged_predictions" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_encode_y", + "name": "_encode_y", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._encode_y", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_encode_y/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._encode_y.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_encode_y/y", + "name": "y", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._encode_y.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @abstractmethod\n def _encode_y(self, y=None):\n pass" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_get_loss", + "name": "_get_loss", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._get_loss", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_get_loss/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._get_loss.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_get_loss/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._get_loss.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @abstractmethod\n def _get_loss(self, sample_weight):\n pass" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_get_small_trainset", + "name": "_get_small_trainset", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._get_small_trainset", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_get_small_trainset/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._get_small_trainset.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_get_small_trainset/X_binned_train", + "name": "X_binned_train", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._get_small_trainset.X_binned_train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_get_small_trainset/y_train", + "name": "y_train", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._get_small_trainset.y_train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_get_small_trainset/sample_weight_train", + "name": "sample_weight_train", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._get_small_trainset.sample_weight_train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_get_small_trainset/seed", + "name": "seed", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._get_small_trainset.seed", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the indices of the subsample set and return this set.\n\nFor efficiency, we need to subsample the training set to compute scores\nwith scorers.", + "docstring": "Compute the indices of the subsample set and return this set.\n\nFor efficiency, we need to subsample the training set to compute scores\nwith scorers.", + "code": " def _get_small_trainset(self, X_binned_train, y_train, sample_weight_train,\n seed):\n \"\"\"Compute the indices of the subsample set and return this set.\n\n For efficiency, we need to subsample the training set to compute scores\n with scorers.\n \"\"\"\n # TODO: incorporate sample_weights here in `resample`\n subsample_size = 10000\n if X_binned_train.shape[0] > subsample_size:\n indices = np.arange(X_binned_train.shape[0])\n stratify = y_train if is_classifier(self) else None\n indices = resample(indices, n_samples=subsample_size,\n replace=False, random_state=seed,\n stratify=stratify)\n X_binned_small_train = X_binned_train[indices]\n y_small_train = y_train[indices]\n if sample_weight_train is not None:\n sample_weight_small_train = sample_weight_train[indices]\n else:\n sample_weight_small_train = None\n X_binned_small_train = np.ascontiguousarray(X_binned_small_train)\n return (X_binned_small_train, y_small_train,\n sample_weight_small_train)\n else:\n return X_binned_train, y_train, sample_weight_train" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_is_fitted", + "name": "_is_fitted", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._is_fitted", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_is_fitted/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._is_fitted.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _is_fitted(self):\n return len(getattr(self, '_predictors', [])) > 0" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_more_tags", + "name": "_more_tags", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_more_tags/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'allow_nan': True}" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_predict_iterations", + "name": "_predict_iterations", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._predict_iterations", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_predict_iterations/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._predict_iterations.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_predict_iterations/X", + "name": "X", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._predict_iterations.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_predict_iterations/predictors", + "name": "predictors", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._predict_iterations.predictors", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_predict_iterations/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._predict_iterations.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_predict_iterations/is_binned", + "name": "is_binned", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._predict_iterations.is_binned", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Add the predictions of the predictors to raw_predictions.", + "docstring": "Add the predictions of the predictors to raw_predictions.", + "code": " def _predict_iterations(self, X, predictors, raw_predictions, is_binned):\n \"\"\"Add the predictions of the predictors to raw_predictions.\"\"\"\n if not is_binned:\n known_cat_bitsets, f_idx_map = (\n self._bin_mapper.make_known_categories_bitsets())\n\n for predictors_of_ith_iteration in predictors:\n for k, predictor in enumerate(predictors_of_ith_iteration):\n if is_binned:\n predict = partial(\n predictor.predict_binned,\n missing_values_bin_idx=self._bin_mapper.missing_values_bin_idx_ # noqa\n )\n else:\n predict = partial(\n predictor.predict,\n known_cat_bitsets=known_cat_bitsets,\n f_idx_map=f_idx_map)\n raw_predictions[k, :] += predict(X)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_print_iteration_stats", + "name": "_print_iteration_stats", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._print_iteration_stats", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_print_iteration_stats/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._print_iteration_stats.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_print_iteration_stats/iteration_start_time", + "name": "iteration_start_time", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._print_iteration_stats.iteration_start_time", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Print info about the current fitting iteration.", + "docstring": "Print info about the current fitting iteration.", + "code": " def _print_iteration_stats(self, iteration_start_time):\n \"\"\"Print info about the current fitting iteration.\"\"\"\n log_msg = ''\n\n predictors_of_ith_iteration = [\n predictors_list for predictors_list in self._predictors[-1]\n if predictors_list\n ]\n n_trees = len(predictors_of_ith_iteration)\n max_depth = max(predictor.get_max_depth()\n for predictor in predictors_of_ith_iteration)\n n_leaves = sum(predictor.get_n_leaf_nodes()\n for predictor in predictors_of_ith_iteration)\n\n if n_trees == 1:\n log_msg += (\"{} tree, {} leaves, \".format(n_trees, n_leaves))\n else:\n log_msg += (\"{} trees, {} leaves \".format(n_trees, n_leaves))\n log_msg += (\"({} on avg), \".format(int(n_leaves / n_trees)))\n\n log_msg += \"max depth = {}, \".format(max_depth)\n\n if self.do_early_stopping_:\n if self.scoring == 'loss':\n factor = -1 # score_ arrays contain the negative loss\n name = 'loss'\n else:\n factor = 1\n name = 'score'\n log_msg += \"train {}: {:.5f}, \".format(name, factor *\n self.train_score_[-1])\n if self._use_validation_data:\n log_msg += \"val {}: {:.5f}, \".format(\n name, factor * self.validation_score_[-1])\n\n iteration_time = time() - iteration_start_time\n log_msg += \"in {:0.3f}s\".format(iteration_time)\n\n print(log_msg)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_raw_predict", + "name": "_raw_predict", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._raw_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_raw_predict/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._raw_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_raw_predict/X", + "name": "X", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._raw_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the sum of the leaves values over all predictors.", + "docstring": "Return the sum of the leaves values over all predictors.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\nraw_predictions : array, shape (n_trees_per_iteration, n_samples)\n The raw predicted values.", + "code": " def _raw_predict(self, X):\n \"\"\"Return the sum of the leaves values over all predictors.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input samples.\n\n Returns\n -------\n raw_predictions : array, shape (n_trees_per_iteration, n_samples)\n The raw predicted values.\n \"\"\"\n is_binned = getattr(self, '_in_fit', False)\n dtype = X_BINNED_DTYPE if is_binned else X_DTYPE\n X = check_array(X, dtype=dtype, force_all_finite=False)\n check_is_fitted(self)\n if X.shape[1] != self._n_features:\n raise ValueError(\n 'X has {} features but this estimator was trained with '\n '{} features.'.format(X.shape[1], self._n_features)\n )\n n_samples = X.shape[0]\n raw_predictions = np.zeros(\n shape=(self.n_trees_per_iteration_, n_samples),\n dtype=self._baseline_prediction.dtype\n )\n raw_predictions += self._baseline_prediction\n self._predict_iterations(\n X, self._predictors, raw_predictions, is_binned\n )\n return raw_predictions" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_should_stop", + "name": "_should_stop", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._should_stop", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_should_stop/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._should_stop.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_should_stop/scores", + "name": "scores", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._should_stop.scores", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return True (do early stopping) if the last n scores aren't better\nthan the (n-1)th-to-last score, up to some tolerance.", + "docstring": "Return True (do early stopping) if the last n scores aren't better\nthan the (n-1)th-to-last score, up to some tolerance.", + "code": " def _should_stop(self, scores):\n \"\"\"\n Return True (do early stopping) if the last n scores aren't better\n than the (n-1)th-to-last score, up to some tolerance.\n \"\"\"\n reference_position = self.n_iter_no_change + 1\n if len(scores) < reference_position:\n return False\n\n # A higher score is always better. Higher tol means that it will be\n # harder for subsequent iteration to be considered an improvement upon\n # the reference score, and therefore it is more likely to early stop\n # because of the lack of significant improvement.\n tol = 0 if self.tol is None else self.tol\n reference_score = scores[-reference_position] + tol\n recent_scores = scores[-reference_position + 1:]\n recent_improvements = [score > reference_score\n for score in recent_scores]\n return not any(recent_improvements)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_staged_raw_predict", + "name": "_staged_raw_predict", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._staged_raw_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_staged_raw_predict/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._staged_raw_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_staged_raw_predict/X", + "name": "X", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._staged_raw_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute raw predictions of ``X`` for each iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.", + "docstring": "Compute raw predictions of ``X`` for each iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input samples.\n\nYields\n-------\nraw_predictions : generator of ndarray of shape (n_trees_per_iteration, n_samples)\n The raw predictions of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.", + "code": " def _staged_raw_predict(self, X):\n \"\"\"Compute raw predictions of ``X`` for each iteration.\n\n This method allows monitoring (i.e. determine error on testing set)\n after each stage.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input samples.\n\n Yields\n -------\n raw_predictions : generator of ndarray of shape \\\n (n_trees_per_iteration, n_samples)\n The raw predictions of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n \"\"\"\n X = check_array(X, dtype=X_DTYPE, force_all_finite=False)\n check_is_fitted(self)\n if X.shape[1] != self._n_features:\n raise ValueError(\n 'X has {} features but this estimator was trained with '\n '{} features.'.format(X.shape[1], self._n_features)\n )\n n_samples = X.shape[0]\n raw_predictions = np.zeros(\n shape=(self.n_trees_per_iteration_, n_samples),\n dtype=self._baseline_prediction.dtype\n )\n raw_predictions += self._baseline_prediction\n for iteration in range(len(self._predictors)):\n self._predict_iterations(\n X,\n self._predictors[iteration:iteration + 1],\n raw_predictions,\n is_binned=False\n )\n yield raw_predictions.copy()" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_validate_parameters", + "name": "_validate_parameters", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._validate_parameters", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/_validate_parameters/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting._validate_parameters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Validate parameters passed to __init__.\n\nThe parameters that are directly passed to the grower are checked in\nTreeGrower.", + "docstring": "Validate parameters passed to __init__.\n\nThe parameters that are directly passed to the grower are checked in\nTreeGrower.", + "code": " def _validate_parameters(self):\n \"\"\"Validate parameters passed to __init__.\n\n The parameters that are directly passed to the grower are checked in\n TreeGrower.\"\"\"\n\n if (self.loss not in self._VALID_LOSSES and\n not isinstance(self.loss, BaseLoss)):\n raise ValueError(\n \"Loss {} is not supported for {}. Accepted losses: \"\n \"{}.\".format(self.loss, self.__class__.__name__,\n ', '.join(self._VALID_LOSSES)))\n\n if self.learning_rate <= 0:\n raise ValueError('learning_rate={} must '\n 'be strictly positive'.format(self.learning_rate))\n if self.max_iter < 1:\n raise ValueError('max_iter={} must not be smaller '\n 'than 1.'.format(self.max_iter))\n if self.n_iter_no_change < 0:\n raise ValueError('n_iter_no_change={} must be '\n 'positive.'.format(self.n_iter_no_change))\n if (self.validation_fraction is not None and\n self.validation_fraction <= 0):\n raise ValueError(\n 'validation_fraction={} must be strictly '\n 'positive, or None.'.format(self.validation_fraction))\n if self.tol is not None and self.tol < 0:\n raise ValueError('tol={} '\n 'must not be smaller than 0.'.format(self.tol))\n\n if not (2 <= self.max_bins <= 255):\n raise ValueError('max_bins={} should be no smaller than 2 '\n 'and no larger than 255.'.format(self.max_bins))\n\n if self.monotonic_cst is not None and self.n_trees_per_iteration_ != 1:\n raise ValueError(\n 'monotonic constraints are not supported for '\n 'multiclass classification.'\n )" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/fit", + "name": "fit", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/fit/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/fit/X", + "name": "X", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/fit/y", + "name": "y", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) default=None", + "default_value": "", + "description": "Weights of training data.\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the gradient boosting model.", + "docstring": "Fit the gradient boosting model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input samples.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,) default=None\n Weights of training data.\n\n .. versionadded:: 0.23\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the gradient boosting model.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input samples.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n sample_weight : array-like of shape (n_samples,) default=None\n Weights of training data.\n\n .. versionadded:: 0.23\n\n Returns\n -------\n self : object\n \"\"\"\n fit_start_time = time()\n acc_find_split_time = 0. # time spent finding the best splits\n acc_apply_split_time = 0. # time spent splitting nodes\n acc_compute_hist_time = 0. # time spent computing histograms\n # time spent predicting X for gradient and hessians update\n acc_prediction_time = 0.\n X, y = self._validate_data(X, y, dtype=[X_DTYPE],\n force_all_finite=False)\n y = self._encode_y(y)\n check_consistent_length(X, y)\n # Do not create unit sample weights by default to later skip some\n # computation\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X,\n dtype=np.float64)\n # TODO: remove when PDP suports sample weights\n self._fitted_with_sw = True\n\n rng = check_random_state(self.random_state)\n\n # When warm starting, we want to re-use the same seed that was used\n # the first time fit was called (e.g. for subsampling or for the\n # train/val split).\n if not (self.warm_start and self._is_fitted()):\n self._random_seed = rng.randint(np.iinfo(np.uint32).max,\n dtype='u8')\n\n self._validate_parameters()\n\n # used for validation in predict\n n_samples, self._n_features = X.shape\n\n self.is_categorical_, known_categories = self._check_categories(X)\n\n # we need this stateful variable to tell raw_predict() that it was\n # called from fit() (this current method), and that the data it has\n # received is pre-binned.\n # predicting is faster on pre-binned data, so we want early stopping\n # predictions to be made on pre-binned data. Unfortunately the _scorer\n # can only call predict() or predict_proba(), not raw_predict(), and\n # there's no way to tell the scorer that it needs to predict binned\n # data.\n self._in_fit = True\n\n if isinstance(self.loss, str):\n self._loss = self._get_loss(sample_weight=sample_weight)\n elif isinstance(self.loss, BaseLoss):\n self._loss = self.loss\n\n if self.early_stopping == 'auto':\n self.do_early_stopping_ = n_samples > 10000\n else:\n self.do_early_stopping_ = self.early_stopping\n\n # create validation data if needed\n self._use_validation_data = self.validation_fraction is not None\n if self.do_early_stopping_ and self._use_validation_data:\n # stratify for classification\n stratify = y if hasattr(self._loss, 'predict_proba') else None\n\n # Save the state of the RNG for the training and validation split.\n # This is needed in order to have the same split when using\n # warm starting.\n\n if sample_weight is None:\n X_train, X_val, y_train, y_val = train_test_split(\n X, y, test_size=self.validation_fraction,\n stratify=stratify,\n random_state=self._random_seed)\n sample_weight_train = sample_weight_val = None\n else:\n # TODO: incorporate sample_weight in sampling here, as well as\n # stratify\n (X_train, X_val, y_train, y_val, sample_weight_train,\n sample_weight_val) = train_test_split(\n X, y, sample_weight, test_size=self.validation_fraction,\n stratify=stratify,\n random_state=self._random_seed)\n else:\n X_train, y_train, sample_weight_train = X, y, sample_weight\n X_val = y_val = sample_weight_val = None\n\n # Bin the data\n # For ease of use of the API, the user-facing GBDT classes accept the\n # parameter max_bins, which doesn't take into account the bin for\n # missing values (which is always allocated). However, since max_bins\n # isn't the true maximal number of bins, all other private classes\n # (binmapper, histbuilder...) accept n_bins instead, which is the\n # actual total number of bins. Everywhere in the code, the\n # convention is that n_bins == max_bins + 1\n n_bins = self.max_bins + 1 # + 1 for missing values\n self._bin_mapper = _BinMapper(\n n_bins=n_bins,\n is_categorical=self.is_categorical_,\n known_categories=known_categories,\n random_state=self._random_seed)\n X_binned_train = self._bin_data(X_train, is_training_data=True)\n if X_val is not None:\n X_binned_val = self._bin_data(X_val, is_training_data=False)\n else:\n X_binned_val = None\n\n # Uses binned data to check for missing values\n has_missing_values = (\n X_binned_train == self._bin_mapper.missing_values_bin_idx_).any(\n axis=0).astype(np.uint8)\n\n if self.verbose:\n print(\"Fitting gradient boosted rounds:\")\n\n n_samples = X_binned_train.shape[0]\n\n # First time calling fit, or no warm start\n if not (self._is_fitted() and self.warm_start):\n # Clear random state and score attributes\n self._clear_state()\n\n # initialize raw_predictions: those are the accumulated values\n # predicted by the trees for the training data. raw_predictions has\n # shape (n_trees_per_iteration, n_samples) where\n # n_trees_per_iterations is n_classes in multiclass classification,\n # else 1.\n self._baseline_prediction = self._loss.get_baseline_prediction(\n y_train, sample_weight_train, self.n_trees_per_iteration_\n )\n raw_predictions = np.zeros(\n shape=(self.n_trees_per_iteration_, n_samples),\n dtype=self._baseline_prediction.dtype\n )\n raw_predictions += self._baseline_prediction\n\n # predictors is a matrix (list of lists) of TreePredictor objects\n # with shape (n_iter_, n_trees_per_iteration)\n self._predictors = predictors = []\n\n # Initialize structures and attributes related to early stopping\n self._scorer = None # set if scoring != loss\n raw_predictions_val = None # set if scoring == loss and use val\n self.train_score_ = []\n self.validation_score_ = []\n\n if self.do_early_stopping_:\n # populate train_score and validation_score with the\n # predictions of the initial model (before the first tree)\n\n if self.scoring == 'loss':\n # we're going to compute scoring w.r.t the loss. As losses\n # take raw predictions as input (unlike the scorers), we\n # can optimize a bit and avoid repeating computing the\n # predictions of the previous trees. We'll re-use\n # raw_predictions (as it's needed for training anyway) for\n # evaluating the training loss, and create\n # raw_predictions_val for storing the raw predictions of\n # the validation data.\n\n if self._use_validation_data:\n raw_predictions_val = np.zeros(\n shape=(self.n_trees_per_iteration_,\n X_binned_val.shape[0]),\n dtype=self._baseline_prediction.dtype\n )\n\n raw_predictions_val += self._baseline_prediction\n\n self._check_early_stopping_loss(raw_predictions, y_train,\n sample_weight_train,\n raw_predictions_val, y_val,\n sample_weight_val)\n else:\n self._scorer = check_scoring(self, self.scoring)\n # _scorer is a callable with signature (est, X, y) and\n # calls est.predict() or est.predict_proba() depending on\n # its nature.\n # Unfortunately, each call to _scorer() will compute\n # the predictions of all the trees. So we use a subset of\n # the training set to compute train scores.\n\n # Compute the subsample set\n (X_binned_small_train,\n y_small_train,\n sample_weight_small_train) = self._get_small_trainset(\n X_binned_train, y_train, sample_weight_train,\n self._random_seed)\n\n self._check_early_stopping_scorer(\n X_binned_small_train, y_small_train,\n sample_weight_small_train,\n X_binned_val, y_val, sample_weight_val,\n )\n begin_at_stage = 0\n\n # warm start: this is not the first time fit was called\n else:\n # Check that the maximum number of iterations is not smaller\n # than the number of iterations from the previous fit\n if self.max_iter < self.n_iter_:\n raise ValueError(\n 'max_iter=%d must be larger than or equal to '\n 'n_iter_=%d when warm_start==True'\n % (self.max_iter, self.n_iter_)\n )\n\n # Convert array attributes to lists\n self.train_score_ = self.train_score_.tolist()\n self.validation_score_ = self.validation_score_.tolist()\n\n # Compute raw predictions\n raw_predictions = self._raw_predict(X_binned_train)\n if self.do_early_stopping_ and self._use_validation_data:\n raw_predictions_val = self._raw_predict(X_binned_val)\n else:\n raw_predictions_val = None\n\n if self.do_early_stopping_ and self.scoring != 'loss':\n # Compute the subsample set\n (X_binned_small_train,\n y_small_train,\n sample_weight_small_train) = self._get_small_trainset(\n X_binned_train, y_train, sample_weight_train,\n self._random_seed)\n\n # Get the predictors from the previous fit\n predictors = self._predictors\n\n begin_at_stage = self.n_iter_\n\n # initialize gradients and hessians (empty arrays).\n # shape = (n_trees_per_iteration, n_samples).\n gradients, hessians = self._loss.init_gradients_and_hessians(\n n_samples=n_samples,\n prediction_dim=self.n_trees_per_iteration_,\n sample_weight=sample_weight_train\n )\n\n for iteration in range(begin_at_stage, self.max_iter):\n\n if self.verbose:\n iteration_start_time = time()\n print(\"[{}/{}] \".format(iteration + 1, self.max_iter),\n end='', flush=True)\n\n # Update gradients and hessians, inplace\n self._loss.update_gradients_and_hessians(gradients, hessians,\n y_train, raw_predictions,\n sample_weight_train)\n\n # Append a list since there may be more than 1 predictor per iter\n predictors.append([])\n\n # Build `n_trees_per_iteration` trees.\n for k in range(self.n_trees_per_iteration_):\n grower = TreeGrower(\n X_binned_train, gradients[k, :], hessians[k, :],\n n_bins=n_bins,\n n_bins_non_missing=self._bin_mapper.n_bins_non_missing_,\n has_missing_values=has_missing_values,\n is_categorical=self.is_categorical_,\n monotonic_cst=self.monotonic_cst,\n max_leaf_nodes=self.max_leaf_nodes,\n max_depth=self.max_depth,\n min_samples_leaf=self.min_samples_leaf,\n l2_regularization=self.l2_regularization,\n shrinkage=self.learning_rate)\n grower.grow()\n\n acc_apply_split_time += grower.total_apply_split_time\n acc_find_split_time += grower.total_find_split_time\n acc_compute_hist_time += grower.total_compute_hist_time\n\n if self._loss.need_update_leaves_values:\n self._loss.update_leaves_values(grower, y_train,\n raw_predictions[k, :],\n sample_weight_train)\n\n predictor = grower.make_predictor(\n binning_thresholds=self._bin_mapper.bin_thresholds_\n )\n predictors[-1].append(predictor)\n\n # Update raw_predictions with the predictions of the newly\n # created tree.\n tic_pred = time()\n _update_raw_predictions(raw_predictions[k, :], grower)\n toc_pred = time()\n acc_prediction_time += toc_pred - tic_pred\n\n should_early_stop = False\n if self.do_early_stopping_:\n if self.scoring == 'loss':\n # Update raw_predictions_val with the newest tree(s)\n if self._use_validation_data:\n for k, pred in enumerate(self._predictors[-1]):\n raw_predictions_val[k, :] += (\n pred.predict_binned(\n X_binned_val,\n self._bin_mapper.missing_values_bin_idx_\n )\n )\n\n should_early_stop = self._check_early_stopping_loss(\n raw_predictions, y_train, sample_weight_train,\n raw_predictions_val, y_val, sample_weight_val\n )\n\n else:\n should_early_stop = self._check_early_stopping_scorer(\n X_binned_small_train, y_small_train,\n sample_weight_small_train,\n X_binned_val, y_val, sample_weight_val\n )\n\n if self.verbose:\n self._print_iteration_stats(iteration_start_time)\n\n # maybe we could also early stop if all the trees are stumps?\n if should_early_stop:\n break\n\n if self.verbose:\n duration = time() - fit_start_time\n n_total_leaves = sum(\n predictor.get_n_leaf_nodes()\n for predictors_at_ith_iteration in self._predictors\n for predictor in predictors_at_ith_iteration\n )\n n_predictors = sum(\n len(predictors_at_ith_iteration)\n for predictors_at_ith_iteration in self._predictors)\n print(\"Fit {} trees in {:.3f} s, ({} total leaves)\".format(\n n_predictors, duration, n_total_leaves))\n print(\"{:<32} {:.3f}s\".format('Time spent computing histograms:',\n acc_compute_hist_time))\n print(\"{:<32} {:.3f}s\".format('Time spent finding best splits:',\n acc_find_split_time))\n print(\"{:<32} {:.3f}s\".format('Time spent applying splits:',\n acc_apply_split_time))\n print(\"{:<32} {:.3f}s\".format('Time spent predicting:',\n acc_prediction_time))\n\n self.train_score_ = np.asarray(self.train_score_)\n self.validation_score_ = np.asarray(self.validation_score_)\n del self._in_fit # hard delete so we're sure it can't be used anymore\n return self" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/n_iter_@getter", + "name": "n_iter_", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.n_iter_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/BaseHistGradientBoosting/n_iter_/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.BaseHistGradientBoosting.n_iter_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def n_iter_(self):\n check_is_fitted(self)\n return len(self._predictors)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/__init__/loss", + "name": "loss", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.__init__.loss", + "default_value": "'auto'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'auto', 'binary_crossentropy', 'categorical_crossentropy'}", + "default_value": "'auto'", + "description": "The loss function to use in the boosting process. 'binary_crossentropy'\n(also known as logistic loss) is used for binary classification and\ngeneralizes to 'categorical_crossentropy' for multiclass\nclassification. 'auto' will automatically choose either loss depending\non the nature of the problem." + }, + "type": { + "kind": "EnumType", + "values": ["auto", "categorical_crossentropy", "binary_crossentropy"] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/__init__/learning_rate", + "name": "learning_rate", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.__init__.learning_rate", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "The learning rate, also known as *shrinkage*. This is used as a\nmultiplicative factor for the leaves values. Use ``1`` for no\nshrinkage." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.__init__.max_iter", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The maximum number of iterations of the boosting process, i.e. the\nmaximum number of trees for binary classification. For multiclass\nclassification, `n_classes` trees per iteration are built." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/__init__/max_leaf_nodes", + "name": "max_leaf_nodes", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.__init__.max_leaf_nodes", + "default_value": "31", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or None", + "default_value": "31", + "description": "The maximum number of leaves for each tree. Must be strictly greater\nthan 1. If None, there is no maximum limit." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/__init__/max_depth", + "name": "max_depth", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.__init__.max_depth", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or None", + "default_value": "None", + "description": "The maximum depth of each tree. The depth of a tree is the number of\nedges to go from the root to the deepest leaf.\nDepth isn't constrained by default." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/__init__/min_samples_leaf", + "name": "min_samples_leaf", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.__init__.min_samples_leaf", + "default_value": "20", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "20", + "description": "The minimum number of samples per leaf. For small datasets with less\nthan a few hundred samples, it is recommended to lower this value\nsince only very shallow trees would be built." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/__init__/l2_regularization", + "name": "l2_regularization", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.__init__.l2_regularization", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0", + "description": "The L2 regularization parameter. Use 0 for no regularization." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/__init__/max_bins", + "name": "max_bins", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.__init__.max_bins", + "default_value": "255", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "255", + "description": "The maximum number of bins to use for non-missing values. Before\ntraining, each feature of the input array `X` is binned into\ninteger-valued bins, which allows for a much faster training stage.\nFeatures with a small number of unique values may use less than\n``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin\nis always reserved for missing values. Must be no larger than 255." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/__init__/categorical_features", + "name": "categorical_features", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.__init__.categorical_features", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of {bool, int} of shape (n_features) or shape (n_categorical_features,)", + "default_value": "None.", + "description": "Indicates the categorical features.\n\n- None : no feature will be considered categorical.\n- boolean array-like : boolean mask indicating categorical features.\n- integer array-like : integer indices indicating categorical\n features.\n\nFor each categorical feature, there must be at most `max_bins` unique\ncategories, and each categorical value must be in [0, max_bins -1].\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "array-like of of shape (n_features)" + }, + { + "kind": "NamedType", + "name": "shape (n_categorical_features,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/__init__/monotonic_cst", + "name": "monotonic_cst", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.__init__.monotonic_cst", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of int of shape (n_features)", + "default_value": "None", + "description": "Indicates the monotonic constraint to enforce on each feature. -1, 1\nand 0 respectively correspond to a negative constraint, positive\nconstraint and no constraint. Read more in the :ref:`User Guide\n`.\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "NamedType", + "name": "array-like of int of shape (n_features)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, reuse the solution of the previous call to fit\nand add more estimators to the ensemble. For results to be valid, the\nestimator should be re-trained on the same data only.\nSee :term:`the Glossary `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/__init__/early_stopping", + "name": "early_stopping", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.__init__.early_stopping", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'auto' or bool", + "default_value": "'auto'", + "description": "If 'auto', early stopping is enabled if the sample size is larger than\n10000. If True, early stopping is enabled, otherwise early stopping is\ndisabled.\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "bool" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/__init__/scoring", + "name": "scoring", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.__init__.scoring", + "default_value": "'loss'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable or None", + "default_value": "'loss'", + "description": "Scoring parameter to use for early stopping. It can be a single\nstring (see :ref:`scoring_parameter`) or a callable (see\n:ref:`scoring`). If None, the estimator's default scorer\nis used. If ``scoring='loss'``, early stopping is checked\nw.r.t the loss value. Only used if early stopping is performed." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/__init__/validation_fraction", + "name": "validation_fraction", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.__init__.validation_fraction", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float or None", + "default_value": "0.1", + "description": "Proportion (or absolute size) of training data to set aside as\nvalidation data for early stopping. If None, early stopping is done on\nthe training data. Only used if early stopping is performed." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/__init__/n_iter_no_change", + "name": "n_iter_no_change", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.__init__.n_iter_no_change", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Used to determine when to \"early stop\". The fitting process is\nstopped when none of the last ``n_iter_no_change`` scores are better\nthan the ``n_iter_no_change - 1`` -th-to-last one, up to some\ntolerance. Only used if early stopping is performed." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/__init__/tol", + "name": "tol", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.__init__.tol", + "default_value": "1e-07", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float or None", + "default_value": "1e-7", + "description": "The absolute tolerance to use when comparing scores. The higher the\ntolerance, the more likely we are to early stop: higher tolerance\nmeans that it will be harder for subsequent iterations to be\nconsidered an improvement upon the reference score." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/__init__/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "The verbosity level. If not zero, print some information about the\nfitting process." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/__init__/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Pseudo-random number generator to control the subsampling in the\nbinning process, and the train/validation data split if early stopping\nis enabled.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Histogram-based Gradient Boosting Classification Tree.\n\nThis estimator is much faster than\n:class:`GradientBoostingClassifier`\nfor big datasets (n_samples >= 10 000).\n\nThis estimator has native support for missing values (NaNs). During\ntraining, the tree grower learns at each split point whether samples\nwith missing values should go to the left or right child, based on the\npotential gain. When predicting, samples with missing values are\nassigned to the left or right child consequently. If no missing values\nwere encountered for a given feature during training, then samples with\nmissing values are mapped to whichever child has the most samples.\n\nThis implementation is inspired by\n`LightGBM `_.\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_hist_gradient_boosting``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_hist_gradient_boosting # noqa\n >>> # now you can import normally from ensemble\n >>> from sklearn.ensemble import HistGradientBoostingClassifier\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.21", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, loss='auto', *, learning_rate=0.1, max_iter=100,\n max_leaf_nodes=31, max_depth=None, min_samples_leaf=20,\n l2_regularization=0., max_bins=255,\n categorical_features=None, monotonic_cst=None,\n warm_start=False, early_stopping='auto', scoring='loss',\n validation_fraction=0.1, n_iter_no_change=10, tol=1e-7,\n verbose=0, random_state=None):\n super(HistGradientBoostingClassifier, self).__init__(\n loss=loss, learning_rate=learning_rate, max_iter=max_iter,\n max_leaf_nodes=max_leaf_nodes, max_depth=max_depth,\n min_samples_leaf=min_samples_leaf,\n l2_regularization=l2_regularization, max_bins=max_bins,\n categorical_features=categorical_features,\n monotonic_cst=monotonic_cst,\n warm_start=warm_start,\n early_stopping=early_stopping, scoring=scoring,\n validation_fraction=validation_fraction,\n n_iter_no_change=n_iter_no_change, tol=tol, verbose=verbose,\n random_state=random_state)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/_encode_y", + "name": "_encode_y", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier._encode_y", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/_encode_y/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier._encode_y.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/_encode_y/y", + "name": "y", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier._encode_y.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _encode_y(self, y):\n # encode classes into 0 ... n_classes - 1 and sets attributes classes_\n # and n_trees_per_iteration_\n check_classification_targets(y)\n\n label_encoder = LabelEncoder()\n encoded_y = label_encoder.fit_transform(y)\n self.classes_ = label_encoder.classes_\n n_classes = self.classes_.shape[0]\n # only 1 tree for binary classification. For multiclass classification,\n # we build 1 tree per class.\n self.n_trees_per_iteration_ = 1 if n_classes <= 2 else n_classes\n encoded_y = encoded_y.astype(Y_DTYPE, copy=False)\n return encoded_y" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/_get_loss", + "name": "_get_loss", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier._get_loss", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/_get_loss/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier._get_loss.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/_get_loss/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier._get_loss.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_loss(self, sample_weight):\n if (self.loss == 'categorical_crossentropy' and\n self.n_trees_per_iteration_ == 1):\n raise ValueError(\"'categorical_crossentropy' is not suitable for \"\n \"a binary classification problem. Please use \"\n \"'auto' or 'binary_crossentropy' instead.\")\n\n if self.loss == 'auto':\n if self.n_trees_per_iteration_ == 1:\n return _LOSSES['binary_crossentropy'](\n sample_weight=sample_weight)\n else:\n return _LOSSES['categorical_crossentropy'](\n sample_weight=sample_weight)\n\n return _LOSSES[self.loss](sample_weight=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/decision_function", + "name": "decision_function", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/decision_function/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/decision_function/X", + "name": "X", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the decision function of ``X``.", + "docstring": "Compute the decision function of ``X``.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\ndecision : ndarray, shape (n_samples,) or (n_samples, n_trees_per_iteration)\n The raw predicted values (i.e. the sum of the trees leaves) for\n each sample. n_trees_per_iteration is equal to the number of\n classes in multiclass classification.", + "code": " def decision_function(self, X):\n \"\"\"Compute the decision function of ``X``.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n The input samples.\n\n Returns\n -------\n decision : ndarray, shape (n_samples,) or \\\n (n_samples, n_trees_per_iteration)\n The raw predicted values (i.e. the sum of the trees leaves) for\n each sample. n_trees_per_iteration is equal to the number of\n classes in multiclass classification.\n \"\"\"\n decision = self._raw_predict(X)\n if decision.shape[0] == 1:\n decision = decision.ravel()\n return decision.T" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/predict", + "name": "predict", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/predict/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/predict/X", + "name": "X", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict classes for X.", + "docstring": "Predict classes for X.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\ny : ndarray, shape (n_samples,)\n The predicted classes.", + "code": " def predict(self, X):\n \"\"\"Predict classes for X.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n The input samples.\n\n Returns\n -------\n y : ndarray, shape (n_samples,)\n The predicted classes.\n \"\"\"\n # TODO: This could be done in parallel\n encoded_classes = np.argmax(self.predict_proba(X), axis=1)\n return self.classes_[encoded_classes]" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/predict_proba", + "name": "predict_proba", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/predict_proba/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/predict_proba/X", + "name": "X", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class probabilities for X.", + "docstring": "Predict class probabilities for X.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\np : ndarray, shape (n_samples, n_classes)\n The class probabilities of the input samples.", + "code": " def predict_proba(self, X):\n \"\"\"Predict class probabilities for X.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n The input samples.\n\n Returns\n -------\n p : ndarray, shape (n_samples, n_classes)\n The class probabilities of the input samples.\n \"\"\"\n raw_predictions = self._raw_predict(X)\n return self._loss.predict_proba(raw_predictions)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/staged_decision_function", + "name": "staged_decision_function", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.staged_decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/staged_decision_function/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.staged_decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/staged_decision_function/X", + "name": "X", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.staged_decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute decision function of ``X`` for each iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.", + "docstring": "Compute decision function of ``X`` for each iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input samples.\n\nYields\n-------\ndecision : generator of ndarray of shape (n_samples,) or (n_samples, n_trees_per_iteration)\n The decision function of the input samples, which corresponds to\n the raw values predicted from the trees of the ensemble . The\n classes corresponds to that in the attribute :term:`classes_`.", + "code": " def staged_decision_function(self, X):\n \"\"\"Compute decision function of ``X`` for each iteration.\n\n This method allows monitoring (i.e. determine error on testing set)\n after each stage.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input samples.\n\n Yields\n -------\n decision : generator of ndarray of shape (n_samples,) or \\\n (n_samples, n_trees_per_iteration)\n The decision function of the input samples, which corresponds to\n the raw values predicted from the trees of the ensemble . The\n classes corresponds to that in the attribute :term:`classes_`.\n \"\"\"\n for staged_decision in self._staged_raw_predict(X):\n if staged_decision.shape[0] == 1:\n staged_decision = staged_decision.ravel()\n yield staged_decision.T" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/staged_predict", + "name": "staged_predict", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.staged_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/staged_predict/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.staged_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/staged_predict/X", + "name": "X", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.staged_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict classes at each iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\n.. versionadded:: 0.24", + "docstring": "Predict classes at each iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input samples.\n\nYields\n-------\ny : generator of ndarray of shape (n_samples,)\n The predicted classes of the input samples, for each iteration.", + "code": " def staged_predict(self, X):\n \"\"\"Predict classes at each iteration.\n\n This method allows monitoring (i.e. determine error on testing set)\n after each stage.\n\n .. versionadded:: 0.24\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input samples.\n\n Yields\n -------\n y : generator of ndarray of shape (n_samples,)\n The predicted classes of the input samples, for each iteration.\n \"\"\"\n for proba in self.staged_predict_proba(X):\n encoded_classes = np.argmax(proba, axis=1)\n yield self.classes_.take(encoded_classes, axis=0)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/staged_predict_proba", + "name": "staged_predict_proba", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.staged_predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/staged_predict_proba/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.staged_predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingClassifier/staged_predict_proba/X", + "name": "X", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier.staged_predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class probabilities at each iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.", + "docstring": "Predict class probabilities at each iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input samples.\n\nYields\n-------\ny : generator of ndarray of shape (n_samples,)\n The predicted class probabilities of the input samples,\n for each iteration.", + "code": " def staged_predict_proba(self, X):\n \"\"\"Predict class probabilities at each iteration.\n\n This method allows monitoring (i.e. determine error on testing set)\n after each stage.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input samples.\n\n Yields\n -------\n y : generator of ndarray of shape (n_samples,)\n The predicted class probabilities of the input samples,\n for each iteration.\n \"\"\"\n for raw_predictions in self._staged_raw_predict(X):\n yield self._loss.predict_proba(raw_predictions)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/__init__/loss", + "name": "loss", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.__init__.loss", + "default_value": "'least_squares'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'least_squares', 'least_absolute_deviation', 'poisson'}", + "default_value": "'least_squares'", + "description": "The loss function to use in the boosting process. Note that the\n\"least squares\" and \"poisson\" losses actually implement\n\"half least squares loss\" and \"half poisson deviance\" to simplify the\ncomputation of the gradient. Furthermore, \"poisson\" loss internally\nuses a log-link and requires ``y >= 0``\n\n.. versionchanged:: 0.23\n Added option 'poisson'." + }, + "type": { + "kind": "EnumType", + "values": ["least_absolute_deviation", "poisson", "least_squares"] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/__init__/learning_rate", + "name": "learning_rate", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.__init__.learning_rate", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "The learning rate, also known as *shrinkage*. This is used as a\nmultiplicative factor for the leaves values. Use ``1`` for no\nshrinkage." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.__init__.max_iter", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The maximum number of iterations of the boosting process, i.e. the\nmaximum number of trees." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/__init__/max_leaf_nodes", + "name": "max_leaf_nodes", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.__init__.max_leaf_nodes", + "default_value": "31", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or None", + "default_value": "31", + "description": "The maximum number of leaves for each tree. Must be strictly greater\nthan 1. If None, there is no maximum limit." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/__init__/max_depth", + "name": "max_depth", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.__init__.max_depth", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or None", + "default_value": "None", + "description": "The maximum depth of each tree. The depth of a tree is the number of\nedges to go from the root to the deepest leaf.\nDepth isn't constrained by default." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/__init__/min_samples_leaf", + "name": "min_samples_leaf", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.__init__.min_samples_leaf", + "default_value": "20", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "20", + "description": "The minimum number of samples per leaf. For small datasets with less\nthan a few hundred samples, it is recommended to lower this value\nsince only very shallow trees would be built." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/__init__/l2_regularization", + "name": "l2_regularization", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.__init__.l2_regularization", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0", + "description": "The L2 regularization parameter. Use ``0`` for no regularization\n(default)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/__init__/max_bins", + "name": "max_bins", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.__init__.max_bins", + "default_value": "255", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "255", + "description": "The maximum number of bins to use for non-missing values. Before\ntraining, each feature of the input array `X` is binned into\ninteger-valued bins, which allows for a much faster training stage.\nFeatures with a small number of unique values may use less than\n``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin\nis always reserved for missing values. Must be no larger than 255." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/__init__/categorical_features", + "name": "categorical_features", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.__init__.categorical_features", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of {bool, int} of shape (n_features) or shape (n_categorical_features,)", + "default_value": "None.", + "description": "Indicates the categorical features.\n\n- None : no feature will be considered categorical.\n- boolean array-like : boolean mask indicating categorical features.\n- integer array-like : integer indices indicating categorical\n features.\n\nFor each categorical feature, there must be at most `max_bins` unique\ncategories, and each categorical value must be in [0, max_bins -1].\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "array-like of of shape (n_features)" + }, + { + "kind": "NamedType", + "name": "shape (n_categorical_features,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/__init__/monotonic_cst", + "name": "monotonic_cst", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.__init__.monotonic_cst", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of int of shape (n_features)", + "default_value": "None", + "description": "Indicates the monotonic constraint to enforce on each feature. -1, 1\nand 0 respectively correspond to a negative constraint, positive\nconstraint and no constraint. Read more in the :ref:`User Guide\n`.\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "NamedType", + "name": "array-like of int of shape (n_features)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, reuse the solution of the previous call to fit\nand add more estimators to the ensemble. For results to be valid, the\nestimator should be re-trained on the same data only.\nSee :term:`the Glossary `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/__init__/early_stopping", + "name": "early_stopping", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.__init__.early_stopping", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'auto' or bool", + "default_value": "'auto'", + "description": "If 'auto', early stopping is enabled if the sample size is larger than\n10000. If True, early stopping is enabled, otherwise early stopping is\ndisabled.\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "bool" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/__init__/scoring", + "name": "scoring", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.__init__.scoring", + "default_value": "'loss'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable or None", + "default_value": "'loss'", + "description": "Scoring parameter to use for early stopping. It can be a single\nstring (see :ref:`scoring_parameter`) or a callable (see\n:ref:`scoring`). If None, the estimator's default scorer is used. If\n``scoring='loss'``, early stopping is checked w.r.t the loss value.\nOnly used if early stopping is performed." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/__init__/validation_fraction", + "name": "validation_fraction", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.__init__.validation_fraction", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float or None", + "default_value": "0.1", + "description": "Proportion (or absolute size) of training data to set aside as\nvalidation data for early stopping. If None, early stopping is done on\nthe training data. Only used if early stopping is performed." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/__init__/n_iter_no_change", + "name": "n_iter_no_change", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.__init__.n_iter_no_change", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Used to determine when to \"early stop\". The fitting process is\nstopped when none of the last ``n_iter_no_change`` scores are better\nthan the ``n_iter_no_change - 1`` -th-to-last one, up to some\ntolerance. Only used if early stopping is performed." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/__init__/tol", + "name": "tol", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.__init__.tol", + "default_value": "1e-07", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float or None", + "default_value": "1e-7", + "description": "The absolute tolerance to use when comparing scores during early\nstopping. The higher the tolerance, the more likely we are to early\nstop: higher tolerance means that it will be harder for subsequent\niterations to be considered an improvement upon the reference score." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/__init__/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "The verbosity level. If not zero, print some information about the\nfitting process." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/__init__/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Pseudo-random number generator to control the subsampling in the\nbinning process, and the train/validation data split if early stopping\nis enabled.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Histogram-based Gradient Boosting Regression Tree.\n\nThis estimator is much faster than\n:class:`GradientBoostingRegressor`\nfor big datasets (n_samples >= 10 000).\n\nThis estimator has native support for missing values (NaNs). During\ntraining, the tree grower learns at each split point whether samples\nwith missing values should go to the left or right child, based on the\npotential gain. When predicting, samples with missing values are\nassigned to the left or right child consequently. If no missing values\nwere encountered for a given feature during training, then samples with\nmissing values are mapped to whichever child has the most samples.\n\nThis implementation is inspired by\n`LightGBM `_.\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_hist_gradient_boosting``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_hist_gradient_boosting # noqa\n >>> # now you can import normally from ensemble\n >>> from sklearn.ensemble import HistGradientBoostingRegressor\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.21", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, loss='least_squares', *, learning_rate=0.1,\n max_iter=100, max_leaf_nodes=31, max_depth=None,\n min_samples_leaf=20, l2_regularization=0., max_bins=255,\n categorical_features=None, monotonic_cst=None,\n warm_start=False, early_stopping='auto',\n scoring='loss', validation_fraction=0.1,\n n_iter_no_change=10, tol=1e-7,\n verbose=0, random_state=None):\n super(HistGradientBoostingRegressor, self).__init__(\n loss=loss, learning_rate=learning_rate, max_iter=max_iter,\n max_leaf_nodes=max_leaf_nodes, max_depth=max_depth,\n min_samples_leaf=min_samples_leaf,\n l2_regularization=l2_regularization, max_bins=max_bins,\n monotonic_cst=monotonic_cst,\n categorical_features=categorical_features,\n early_stopping=early_stopping,\n warm_start=warm_start, scoring=scoring,\n validation_fraction=validation_fraction,\n n_iter_no_change=n_iter_no_change, tol=tol, verbose=verbose,\n random_state=random_state)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/_encode_y", + "name": "_encode_y", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor._encode_y", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/_encode_y/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor._encode_y.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/_encode_y/y", + "name": "y", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor._encode_y.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _encode_y(self, y):\n # Just convert y to the expected dtype\n self.n_trees_per_iteration_ = 1\n y = y.astype(Y_DTYPE, copy=False)\n if self.loss == 'poisson':\n # Ensure y >= 0 and sum(y) > 0\n if not (np.all(y >= 0) and np.sum(y) > 0):\n raise ValueError(\"loss='poisson' requires non-negative y and \"\n \"sum(y) > 0.\")\n return y" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/_get_loss", + "name": "_get_loss", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor._get_loss", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/_get_loss/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor._get_loss.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/_get_loss/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor._get_loss.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_loss(self, sample_weight):\n return _LOSSES[self.loss](sample_weight=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/predict", + "name": "predict", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/predict/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/predict/X", + "name": "X", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict values for X.", + "docstring": "Predict values for X.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\ny : ndarray, shape (n_samples,)\n The predicted values.", + "code": " def predict(self, X):\n \"\"\"Predict values for X.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n The input samples.\n\n Returns\n -------\n y : ndarray, shape (n_samples,)\n The predicted values.\n \"\"\"\n check_is_fitted(self)\n # Return inverse link of raw predictions after converting\n # shape (n_samples, 1) to (n_samples,)\n return self._loss.inverse_link_function(self._raw_predict(X).ravel())" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/staged_predict", + "name": "staged_predict", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.staged_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/staged_predict/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.staged_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.gradient_boosting/HistGradientBoostingRegressor/staged_predict/X", + "name": "X", + "qname": "sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor.staged_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict regression target for each iteration\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\n.. versionadded:: 0.24", + "docstring": "Predict regression target for each iteration\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each stage.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input samples.\n\nYields\n-------\ny : generator of ndarray of shape (n_samples,)\n The predicted values of the input samples, for each iteration.", + "code": " def staged_predict(self, X):\n \"\"\"Predict regression target for each iteration\n\n This method allows monitoring (i.e. determine error on testing set)\n after each stage.\n\n .. versionadded:: 0.24\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input samples.\n\n Yields\n -------\n y : generator of ndarray of shape (n_samples,)\n The predicted values of the input samples, for each iteration.\n \"\"\"\n for raw_predictions in self._staged_raw_predict(X):\n yield self._loss.inverse_link_function(raw_predictions.ravel())" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/__init__/X_binned", + "name": "X_binned", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.__init__.X_binned", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features), dtype=np.uint8", + "default_value": "", + "description": "The binned input samples. Must be Fortran-aligned." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + }, + { + "kind": "NamedType", + "name": "dtype=np.uint8" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/__init__/gradients", + "name": "gradients", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.__init__.gradients", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "The gradients of each training sample. Those are the gradients of the\nloss w.r.t the predictions, evaluated at iteration ``i - 1``." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/__init__/hessians", + "name": "hessians", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.__init__.hessians", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "The hessians of each training sample. Those are the hessians of the\nloss w.r.t the predictions, evaluated at iteration ``i - 1``." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/__init__/max_leaf_nodes", + "name": "max_leaf_nodes", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.__init__.max_leaf_nodes", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The maximum number of leaves for each tree. If None, there is no\nmaximum limit." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/__init__/max_depth", + "name": "max_depth", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.__init__.max_depth", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The maximum depth of each tree. The depth of a tree is the number of\nedges to go from the root to the deepest leaf.\nDepth isn't constrained by default." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/__init__/min_samples_leaf", + "name": "min_samples_leaf", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.__init__.min_samples_leaf", + "default_value": "20", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "20", + "description": "The minimum number of samples per leaf." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/__init__/min_gain_to_split", + "name": "min_gain_to_split", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.__init__.min_gain_to_split", + "default_value": "0.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.", + "description": "The minimum gain needed to split a node. Splits with lower gain will\nbe ignored." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/__init__/n_bins", + "name": "n_bins", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.__init__.n_bins", + "default_value": "256", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "256", + "description": "The total number of bins, including the bin for missing values. Used\nto define the shape of the histograms." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/__init__/n_bins_non_missing", + "name": "n_bins_non_missing", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.__init__.n_bins_non_missing", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, dtype=np.uint32", + "default_value": "None", + "description": "For each feature, gives the number of bins actually used for\nnon-missing values. For features with a lot of unique values, this\nis equal to ``n_bins - 1``. If it's an int, all features are\nconsidered to have the same number of bins. If None, all features\nare considered to have ``n_bins - 1`` bins." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "dtype=np.uint32" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/__init__/has_missing_values", + "name": "has_missing_values", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.__init__.has_missing_values", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool or ndarray, dtype=bool", + "default_value": "False", + "description": "Whether each feature contains missing values (in the training data).\nIf it's a bool, the same value is used for all features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "dtype=bool" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/__init__/is_categorical", + "name": "is_categorical", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.__init__.is_categorical", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of bool of shape (n_features,)", + "default_value": "None", + "description": "Indicates categorical features." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of bool of shape (n_features,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/__init__/monotonic_cst", + "name": "monotonic_cst", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.__init__.monotonic_cst", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_features,), dtype=int", + "default_value": "None", + "description": "Indicates the monotonic constraint to enforce on each feature. -1, 1\nand 0 respectively correspond to a positive constraint, negative\nconstraint and no constraint. Read more in the :ref:`User Guide\n`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_features,)" + }, + { + "kind": "NamedType", + "name": "dtype=int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/__init__/l2_regularization", + "name": "l2_regularization", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.__init__.l2_regularization", + "default_value": "0.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.", + "description": "The L2 regularization parameter." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/__init__/min_hessian_to_split", + "name": "min_hessian_to_split", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.__init__.min_hessian_to_split", + "default_value": "0.001", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "The minimum sum of hessians needed in each node. Splits that result in\nat least one child having a sum of hessians less than\n``min_hessian_to_split`` are discarded." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/__init__/shrinkage", + "name": "shrinkage", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.__init__.shrinkage", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.", + "description": "The shrinkage parameter to apply to the leaves values, also known as\nlearning rate." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Tree grower class used to build a tree.\n\nThe tree is fitted to predict the values of a Newton-Raphson step. The\nsplits are considered in a best-first fashion, and the quality of a\nsplit is defined in splitting._split_gain.", + "docstring": "", + "code": " def __init__(self, X_binned, gradients, hessians, max_leaf_nodes=None,\n max_depth=None, min_samples_leaf=20, min_gain_to_split=0.,\n n_bins=256, n_bins_non_missing=None, has_missing_values=False,\n is_categorical=None, monotonic_cst=None,\n l2_regularization=0., min_hessian_to_split=1e-3,\n shrinkage=1.):\n\n self._validate_parameters(X_binned, max_leaf_nodes, max_depth,\n min_samples_leaf, min_gain_to_split,\n l2_regularization, min_hessian_to_split)\n\n if n_bins_non_missing is None:\n n_bins_non_missing = n_bins - 1\n\n if isinstance(n_bins_non_missing, numbers.Integral):\n n_bins_non_missing = np.array(\n [n_bins_non_missing] * X_binned.shape[1],\n dtype=np.uint32)\n else:\n n_bins_non_missing = np.asarray(n_bins_non_missing,\n dtype=np.uint32)\n\n if isinstance(has_missing_values, bool):\n has_missing_values = [has_missing_values] * X_binned.shape[1]\n has_missing_values = np.asarray(has_missing_values, dtype=np.uint8)\n\n if monotonic_cst is None:\n self.with_monotonic_cst = False\n monotonic_cst = np.full(shape=X_binned.shape[1],\n fill_value=MonotonicConstraint.NO_CST,\n dtype=np.int8)\n else:\n self.with_monotonic_cst = True\n monotonic_cst = np.asarray(monotonic_cst, dtype=np.int8)\n\n if monotonic_cst.shape[0] != X_binned.shape[1]:\n raise ValueError(\n \"monotonic_cst has shape {} but the input data \"\n \"X has {} features.\".format(\n monotonic_cst.shape[0], X_binned.shape[1]\n )\n )\n if np.any(monotonic_cst < -1) or np.any(monotonic_cst > 1):\n raise ValueError(\n \"monotonic_cst must be None or an array-like of \"\n \"-1, 0 or 1.\"\n )\n\n if is_categorical is None:\n is_categorical = np.zeros(shape=X_binned.shape[1], dtype=np.uint8)\n else:\n is_categorical = np.asarray(is_categorical, dtype=np.uint8)\n\n if np.any(np.logical_and(is_categorical == 1,\n monotonic_cst != MonotonicConstraint.NO_CST)):\n raise ValueError(\"Categorical features cannot have monotonic \"\n \"constraints.\")\n\n hessians_are_constant = hessians.shape[0] == 1\n self.histogram_builder = HistogramBuilder(\n X_binned, n_bins, gradients, hessians, hessians_are_constant)\n missing_values_bin_idx = n_bins - 1\n self.splitter = Splitter(\n X_binned, n_bins_non_missing, missing_values_bin_idx,\n has_missing_values, is_categorical, monotonic_cst,\n l2_regularization, min_hessian_to_split,\n min_samples_leaf, min_gain_to_split, hessians_are_constant)\n self.n_bins_non_missing = n_bins_non_missing\n self.missing_values_bin_idx = missing_values_bin_idx\n self.max_leaf_nodes = max_leaf_nodes\n self.has_missing_values = has_missing_values\n self.monotonic_cst = monotonic_cst\n self.is_categorical = is_categorical\n self.l2_regularization = l2_regularization\n self.n_features = X_binned.shape[1]\n self.max_depth = max_depth\n self.min_samples_leaf = min_samples_leaf\n self.X_binned = X_binned\n self.min_gain_to_split = min_gain_to_split\n self.shrinkage = shrinkage\n self.splittable_nodes = []\n self.finalized_leaves = []\n self.total_find_split_time = 0. # time spent finding the best splits\n self.total_compute_hist_time = 0. # time spent computing histograms\n self.total_apply_split_time = 0. # time spent splitting nodes\n self.n_categorical_splits = 0\n self._intilialize_root(gradients, hessians, hessians_are_constant)\n self.n_nodes = 1" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_apply_shrinkage", + "name": "_apply_shrinkage", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._apply_shrinkage", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_apply_shrinkage/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._apply_shrinkage.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Multiply leaves values by shrinkage parameter.\n\nThis must be done at the very end of the growing process. If this were\ndone during the growing process e.g. in finalize_leaf(), then a leaf\nwould be shrunk but its sibling would potentially not be (if it's a\nnon-leaf), which would lead to a wrong computation of the 'middle'\nvalue needed to enforce the monotonic constraints.", + "docstring": "Multiply leaves values by shrinkage parameter.\n\nThis must be done at the very end of the growing process. If this were\ndone during the growing process e.g. in finalize_leaf(), then a leaf\nwould be shrunk but its sibling would potentially not be (if it's a\nnon-leaf), which would lead to a wrong computation of the 'middle'\nvalue needed to enforce the monotonic constraints.", + "code": " def _apply_shrinkage(self):\n \"\"\"Multiply leaves values by shrinkage parameter.\n\n This must be done at the very end of the growing process. If this were\n done during the growing process e.g. in finalize_leaf(), then a leaf\n would be shrunk but its sibling would potentially not be (if it's a\n non-leaf), which would lead to a wrong computation of the 'middle'\n value needed to enforce the monotonic constraints.\n \"\"\"\n for leaf in self.finalized_leaves:\n leaf.value *= self.shrinkage" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_compute_best_split_and_push", + "name": "_compute_best_split_and_push", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._compute_best_split_and_push", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_compute_best_split_and_push/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._compute_best_split_and_push.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_compute_best_split_and_push/node", + "name": "node", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._compute_best_split_and_push.node", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the best possible split (SplitInfo) of a given node.\n\nAlso push it in the heap of splittable nodes if gain isn't zero.\nThe gain of a node is 0 if either all the leaves are pure\n(best gain = 0), or if no split would satisfy the constraints,\n(min_hessians_to_split, min_gain_to_split, min_samples_leaf)", + "docstring": "Compute the best possible split (SplitInfo) of a given node.\n\nAlso push it in the heap of splittable nodes if gain isn't zero.\nThe gain of a node is 0 if either all the leaves are pure\n(best gain = 0), or if no split would satisfy the constraints,\n(min_hessians_to_split, min_gain_to_split, min_samples_leaf)", + "code": " def _compute_best_split_and_push(self, node):\n \"\"\"Compute the best possible split (SplitInfo) of a given node.\n\n Also push it in the heap of splittable nodes if gain isn't zero.\n The gain of a node is 0 if either all the leaves are pure\n (best gain = 0), or if no split would satisfy the constraints,\n (min_hessians_to_split, min_gain_to_split, min_samples_leaf)\n \"\"\"\n\n node.split_info = self.splitter.find_node_split(\n node.n_samples, node.histograms, node.sum_gradients,\n node.sum_hessians, node.value, node.children_lower_bound,\n node.children_upper_bound)\n\n if node.split_info.gain <= 0: # no valid split\n self._finalize_leaf(node)\n else:\n heappush(self.splittable_nodes, node)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_finalize_leaf", + "name": "_finalize_leaf", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._finalize_leaf", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_finalize_leaf/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._finalize_leaf.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_finalize_leaf/node", + "name": "node", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._finalize_leaf.node", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Make node a leaf of the tree being grown.", + "docstring": "Make node a leaf of the tree being grown.", + "code": " def _finalize_leaf(self, node):\n \"\"\"Make node a leaf of the tree being grown.\"\"\"\n\n node.is_leaf = True\n self.finalized_leaves.append(node)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_finalize_splittable_nodes", + "name": "_finalize_splittable_nodes", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._finalize_splittable_nodes", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_finalize_splittable_nodes/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._finalize_splittable_nodes.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform all splittable nodes into leaves.\n\nUsed when some constraint is met e.g. maximum number of leaves or\nmaximum depth.", + "docstring": "Transform all splittable nodes into leaves.\n\nUsed when some constraint is met e.g. maximum number of leaves or\nmaximum depth.", + "code": " def _finalize_splittable_nodes(self):\n \"\"\"Transform all splittable nodes into leaves.\n\n Used when some constraint is met e.g. maximum number of leaves or\n maximum depth.\"\"\"\n while len(self.splittable_nodes) > 0:\n node = self.splittable_nodes.pop()\n self._finalize_leaf(node)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_intilialize_root", + "name": "_intilialize_root", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._intilialize_root", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_intilialize_root/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._intilialize_root.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_intilialize_root/gradients", + "name": "gradients", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._intilialize_root.gradients", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_intilialize_root/hessians", + "name": "hessians", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._intilialize_root.hessians", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_intilialize_root/hessians_are_constant", + "name": "hessians_are_constant", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._intilialize_root.hessians_are_constant", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Initialize root node and finalize it if needed.", + "docstring": "Initialize root node and finalize it if needed.", + "code": " def _intilialize_root(self, gradients, hessians, hessians_are_constant):\n \"\"\"Initialize root node and finalize it if needed.\"\"\"\n n_samples = self.X_binned.shape[0]\n depth = 0\n sum_gradients = sum_parallel(gradients)\n if self.histogram_builder.hessians_are_constant:\n sum_hessians = hessians[0] * n_samples\n else:\n sum_hessians = sum_parallel(hessians)\n self.root = TreeNode(\n depth=depth,\n sample_indices=self.splitter.partition,\n sum_gradients=sum_gradients,\n sum_hessians=sum_hessians,\n value=0\n )\n\n self.root.partition_start = 0\n self.root.partition_stop = n_samples\n\n if self.root.n_samples < 2 * self.min_samples_leaf:\n # Do not even bother computing any splitting statistics.\n self._finalize_leaf(self.root)\n return\n if sum_hessians < self.splitter.min_hessian_to_split:\n self._finalize_leaf(self.root)\n return\n\n self.root.histograms = self.histogram_builder.compute_histograms_brute(\n self.root.sample_indices)\n self._compute_best_split_and_push(self.root)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_validate_parameters", + "name": "_validate_parameters", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._validate_parameters", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_validate_parameters/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._validate_parameters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_validate_parameters/X_binned", + "name": "X_binned", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._validate_parameters.X_binned", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_validate_parameters/max_leaf_nodes", + "name": "max_leaf_nodes", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._validate_parameters.max_leaf_nodes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_validate_parameters/max_depth", + "name": "max_depth", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._validate_parameters.max_depth", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_validate_parameters/min_samples_leaf", + "name": "min_samples_leaf", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._validate_parameters.min_samples_leaf", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_validate_parameters/min_gain_to_split", + "name": "min_gain_to_split", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._validate_parameters.min_gain_to_split", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_validate_parameters/l2_regularization", + "name": "l2_regularization", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._validate_parameters.l2_regularization", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/_validate_parameters/min_hessian_to_split", + "name": "min_hessian_to_split", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower._validate_parameters.min_hessian_to_split", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Validate parameters passed to __init__.\n\nAlso validate parameters passed to splitter.", + "docstring": "Validate parameters passed to __init__.\n\nAlso validate parameters passed to splitter.", + "code": " def _validate_parameters(self, X_binned, max_leaf_nodes, max_depth,\n min_samples_leaf, min_gain_to_split,\n l2_regularization, min_hessian_to_split):\n \"\"\"Validate parameters passed to __init__.\n\n Also validate parameters passed to splitter.\n \"\"\"\n if X_binned.dtype != np.uint8:\n raise NotImplementedError(\n \"X_binned must be of type uint8.\")\n if not X_binned.flags.f_contiguous:\n raise ValueError(\n \"X_binned should be passed as Fortran contiguous \"\n \"array for maximum efficiency.\")\n if max_leaf_nodes is not None and max_leaf_nodes <= 1:\n raise ValueError('max_leaf_nodes={} should not be'\n ' smaller than 2'.format(max_leaf_nodes))\n if max_depth is not None and max_depth < 1:\n raise ValueError('max_depth={} should not be'\n ' smaller than 1'.format(max_depth))\n if min_samples_leaf < 1:\n raise ValueError('min_samples_leaf={} should '\n 'not be smaller than 1'.format(min_samples_leaf))\n if min_gain_to_split < 0:\n raise ValueError('min_gain_to_split={} '\n 'must be positive.'.format(min_gain_to_split))\n if l2_regularization < 0:\n raise ValueError('l2_regularization={} must be '\n 'positive.'.format(l2_regularization))\n if min_hessian_to_split < 0:\n raise ValueError('min_hessian_to_split={} '\n 'must be positive.'.format(min_hessian_to_split))" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/grow", + "name": "grow", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.grow", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/grow/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.grow.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Grow the tree, from root to leaves.", + "docstring": "Grow the tree, from root to leaves.", + "code": " def grow(self):\n \"\"\"Grow the tree, from root to leaves.\"\"\"\n while self.splittable_nodes:\n self.split_next()\n\n self._apply_shrinkage()" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/make_predictor", + "name": "make_predictor", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.make_predictor", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/make_predictor/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.make_predictor.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/make_predictor/binning_thresholds", + "name": "binning_thresholds", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.make_predictor.binning_thresholds", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of floats", + "default_value": "", + "description": "Corresponds to the bin_thresholds_ attribute of the BinMapper.\nFor each feature, this stores:\n\n- the bin frontiers for continuous features\n- the unique raw category values for categorical features" + }, + "type": { + "kind": "NamedType", + "name": "array-like of floats" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Make a TreePredictor object out of the current tree.", + "docstring": "Make a TreePredictor object out of the current tree.\n\nParameters\n----------\nbinning_thresholds : array-like of floats\n Corresponds to the bin_thresholds_ attribute of the BinMapper.\n For each feature, this stores:\n\n - the bin frontiers for continuous features\n - the unique raw category values for categorical features\n\nReturns\n-------\nA TreePredictor object.", + "code": " def make_predictor(self, binning_thresholds):\n \"\"\"Make a TreePredictor object out of the current tree.\n\n Parameters\n ----------\n binning_thresholds : array-like of floats\n Corresponds to the bin_thresholds_ attribute of the BinMapper.\n For each feature, this stores:\n\n - the bin frontiers for continuous features\n - the unique raw category values for categorical features\n\n Returns\n -------\n A TreePredictor object.\n \"\"\"\n predictor_nodes = np.zeros(self.n_nodes, dtype=PREDICTOR_RECORD_DTYPE)\n binned_left_cat_bitsets = np.zeros((self.n_categorical_splits, 8),\n dtype=X_BITSET_INNER_DTYPE)\n raw_left_cat_bitsets = np.zeros((self.n_categorical_splits, 8),\n dtype=X_BITSET_INNER_DTYPE)\n _fill_predictor_arrays(predictor_nodes, binned_left_cat_bitsets,\n raw_left_cat_bitsets,\n self.root, binning_thresholds,\n self.n_bins_non_missing)\n return TreePredictor(predictor_nodes, binned_left_cat_bitsets,\n raw_left_cat_bitsets)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/split_next", + "name": "split_next", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.split_next", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeGrower/split_next/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeGrower.split_next.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Split the node with highest potential gain.", + "docstring": "Split the node with highest potential gain.\n\nReturns\n-------\nleft : TreeNode\n The resulting left child.\nright : TreeNode\n The resulting right child.", + "code": " def split_next(self):\n \"\"\"Split the node with highest potential gain.\n\n Returns\n -------\n left : TreeNode\n The resulting left child.\n right : TreeNode\n The resulting right child.\n \"\"\"\n # Consider the node with the highest loss reduction (a.k.a. gain)\n node = heappop(self.splittable_nodes)\n\n tic = time()\n (sample_indices_left,\n sample_indices_right,\n right_child_pos) = self.splitter.split_indices(node.split_info,\n node.sample_indices)\n self.total_apply_split_time += time() - tic\n\n depth = node.depth + 1\n n_leaf_nodes = len(self.finalized_leaves) + len(self.splittable_nodes)\n n_leaf_nodes += 2\n\n left_child_node = TreeNode(depth,\n sample_indices_left,\n node.split_info.sum_gradient_left,\n node.split_info.sum_hessian_left,\n value=node.split_info.value_left,\n )\n right_child_node = TreeNode(depth,\n sample_indices_right,\n node.split_info.sum_gradient_right,\n node.split_info.sum_hessian_right,\n value=node.split_info.value_right,\n )\n\n node.right_child = right_child_node\n node.left_child = left_child_node\n\n # set start and stop indices\n left_child_node.partition_start = node.partition_start\n left_child_node.partition_stop = node.partition_start + right_child_pos\n right_child_node.partition_start = left_child_node.partition_stop\n right_child_node.partition_stop = node.partition_stop\n\n if not self.has_missing_values[node.split_info.feature_idx]:\n # If no missing values are encountered at fit time, then samples\n # with missing values during predict() will go to whichever child\n # has the most samples.\n node.split_info.missing_go_to_left = (\n left_child_node.n_samples > right_child_node.n_samples)\n\n self.n_nodes += 2\n self.n_categorical_splits += node.split_info.is_categorical\n\n if (self.max_leaf_nodes is not None\n and n_leaf_nodes == self.max_leaf_nodes):\n self._finalize_leaf(left_child_node)\n self._finalize_leaf(right_child_node)\n self._finalize_splittable_nodes()\n return left_child_node, right_child_node\n\n if self.max_depth is not None and depth == self.max_depth:\n self._finalize_leaf(left_child_node)\n self._finalize_leaf(right_child_node)\n return left_child_node, right_child_node\n\n if left_child_node.n_samples < self.min_samples_leaf * 2:\n self._finalize_leaf(left_child_node)\n if right_child_node.n_samples < self.min_samples_leaf * 2:\n self._finalize_leaf(right_child_node)\n\n if self.with_monotonic_cst:\n # Set value bounds for respecting monotonic constraints\n # See test_nodes_values() for details\n if (self.monotonic_cst[node.split_info.feature_idx] ==\n MonotonicConstraint.NO_CST):\n lower_left = lower_right = node.children_lower_bound\n upper_left = upper_right = node.children_upper_bound\n else:\n mid = (left_child_node.value + right_child_node.value) / 2\n if (self.monotonic_cst[node.split_info.feature_idx] ==\n MonotonicConstraint.POS):\n lower_left, upper_left = node.children_lower_bound, mid\n lower_right, upper_right = mid, node.children_upper_bound\n else: # NEG\n lower_left, upper_left = mid, node.children_upper_bound\n lower_right, upper_right = node.children_lower_bound, mid\n left_child_node.set_children_bounds(lower_left, upper_left)\n right_child_node.set_children_bounds(lower_right, upper_right)\n\n # Compute histograms of children, and compute their best possible split\n # (if needed)\n should_split_left = not left_child_node.is_leaf\n should_split_right = not right_child_node.is_leaf\n if should_split_left or should_split_right:\n\n # We will compute the histograms of both nodes even if one of them\n # is a leaf, since computing the second histogram is very cheap\n # (using histogram subtraction).\n n_samples_left = left_child_node.sample_indices.shape[0]\n n_samples_right = right_child_node.sample_indices.shape[0]\n if n_samples_left < n_samples_right:\n smallest_child = left_child_node\n largest_child = right_child_node\n else:\n smallest_child = right_child_node\n largest_child = left_child_node\n\n # We use the brute O(n_samples) method on the child that has the\n # smallest number of samples, and the subtraction trick O(n_bins)\n # on the other one.\n tic = time()\n smallest_child.histograms = \\\n self.histogram_builder.compute_histograms_brute(\n smallest_child.sample_indices)\n largest_child.histograms = \\\n self.histogram_builder.compute_histograms_subtraction(\n node.histograms, smallest_child.histograms)\n self.total_compute_hist_time += time() - tic\n\n tic = time()\n if should_split_left:\n self._compute_best_split_and_push(left_child_node)\n if should_split_right:\n self._compute_best_split_and_push(right_child_node)\n self.total_find_split_time += time() - tic\n\n # Release memory used by histograms as they are no longer needed\n # for leaf nodes since they won't be split.\n for child in (left_child_node, right_child_node):\n if child.is_leaf:\n del child.histograms\n\n # Release memory used by histograms as they are no longer needed for\n # internal nodes once children histograms have been computed.\n del node.histograms\n\n return left_child_node, right_child_node" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeNode/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeNode.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeNode/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeNode.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeNode/__init__/depth", + "name": "depth", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeNode.__init__.depth", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The depth of the node, i.e. its distance from the root." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeNode/__init__/sample_indices", + "name": "sample_indices", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeNode.__init__.sample_indices", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples_at_node,), dtype=np.uint", + "default_value": "", + "description": "The indices of the samples at the node." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_at_node,)" + }, + { + "kind": "NamedType", + "name": "dtype=np.uint" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeNode/__init__/sum_gradients", + "name": "sum_gradients", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeNode.__init__.sum_gradients", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "The sum of the gradients of the samples at the node." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeNode/__init__/sum_hessians", + "name": "sum_hessians", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeNode.__init__.sum_hessians", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "The sum of the hessians of the samples at the node." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeNode/__init__/value", + "name": "value", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeNode.__init__.value", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Tree Node class used in TreeGrower.\n\nThis isn't used for prediction purposes, only for training (see\nTreePredictor).", + "docstring": "", + "code": " def __init__(self, depth, sample_indices, sum_gradients,\n sum_hessians, value=None):\n self.depth = depth\n self.sample_indices = sample_indices\n self.n_samples = sample_indices.shape[0]\n self.sum_gradients = sum_gradients\n self.sum_hessians = sum_hessians\n self.value = value\n self.is_leaf = False\n self.set_children_bounds(float('-inf'), float('+inf'))" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeNode/__lt__", + "name": "__lt__", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeNode.__lt__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeNode/__lt__/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeNode.__lt__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeNode/__lt__/other_node", + "name": "other_node", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeNode.__lt__.other_node", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "TreeNode", + "default_value": "", + "description": "The node to compare with." + }, + "type": { + "kind": "NamedType", + "name": "TreeNode" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Comparison for priority queue.\n\nNodes with high gain are higher priority than nodes with low gain.\n\nheapq.heappush only need the '<' operator.\nheapq.heappop take the smallest item first (smaller is higher\npriority).", + "docstring": "Comparison for priority queue.\n\nNodes with high gain are higher priority than nodes with low gain.\n\nheapq.heappush only need the '<' operator.\nheapq.heappop take the smallest item first (smaller is higher\npriority).\n\nParameters\n----------\nother_node : TreeNode\n The node to compare with.", + "code": " def __lt__(self, other_node):\n \"\"\"Comparison for priority queue.\n\n Nodes with high gain are higher priority than nodes with low gain.\n\n heapq.heappush only need the '<' operator.\n heapq.heappop take the smallest item first (smaller is higher\n priority).\n\n Parameters\n ----------\n other_node : TreeNode\n The node to compare with.\n \"\"\"\n return self.split_info.gain > other_node.split_info.gain" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeNode/set_children_bounds", + "name": "set_children_bounds", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeNode.set_children_bounds", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeNode/set_children_bounds/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeNode.set_children_bounds.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeNode/set_children_bounds/lower", + "name": "lower", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeNode.set_children_bounds.lower", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/TreeNode/set_children_bounds/upper", + "name": "upper", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower.TreeNode.set_children_bounds.upper", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Set children values bounds to respect monotonic constraints.", + "docstring": "Set children values bounds to respect monotonic constraints.", + "code": " def set_children_bounds(self, lower, upper):\n \"\"\"Set children values bounds to respect monotonic constraints.\"\"\"\n\n # These are bounds for the node's *children* values, not the node's\n # value. The bounds are used in the splitter when considering potential\n # left and right child.\n self.children_lower_bound = lower\n self.children_upper_bound = upper" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/_fill_predictor_arrays", + "name": "_fill_predictor_arrays", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower._fill_predictor_arrays", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/_fill_predictor_arrays/predictor_nodes", + "name": "predictor_nodes", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower._fill_predictor_arrays.predictor_nodes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/_fill_predictor_arrays/binned_left_cat_bitsets", + "name": "binned_left_cat_bitsets", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower._fill_predictor_arrays.binned_left_cat_bitsets", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/_fill_predictor_arrays/raw_left_cat_bitsets", + "name": "raw_left_cat_bitsets", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower._fill_predictor_arrays.raw_left_cat_bitsets", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/_fill_predictor_arrays/grower_node", + "name": "grower_node", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower._fill_predictor_arrays.grower_node", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/_fill_predictor_arrays/binning_thresholds", + "name": "binning_thresholds", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower._fill_predictor_arrays.binning_thresholds", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/_fill_predictor_arrays/n_bins_non_missing", + "name": "n_bins_non_missing", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower._fill_predictor_arrays.n_bins_non_missing", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/_fill_predictor_arrays/next_free_node_idx", + "name": "next_free_node_idx", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower._fill_predictor_arrays.next_free_node_idx", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.grower/_fill_predictor_arrays/next_free_bitset_idx", + "name": "next_free_bitset_idx", + "qname": "sklearn.ensemble._hist_gradient_boosting.grower._fill_predictor_arrays.next_free_bitset_idx", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Helper used in make_predictor to set the TreePredictor fields.", + "docstring": "Helper used in make_predictor to set the TreePredictor fields.", + "code": "def _fill_predictor_arrays(predictor_nodes, binned_left_cat_bitsets,\n raw_left_cat_bitsets, grower_node,\n binning_thresholds, n_bins_non_missing,\n next_free_node_idx=0, next_free_bitset_idx=0):\n \"\"\"Helper used in make_predictor to set the TreePredictor fields.\"\"\"\n node = predictor_nodes[next_free_node_idx]\n node['count'] = grower_node.n_samples\n node['depth'] = grower_node.depth\n if grower_node.split_info is not None:\n node['gain'] = grower_node.split_info.gain\n else:\n node['gain'] = -1\n\n node['value'] = grower_node.value\n\n if grower_node.is_leaf:\n # Leaf node\n node['is_leaf'] = True\n return next_free_node_idx + 1, next_free_bitset_idx\n\n split_info = grower_node.split_info\n feature_idx, bin_idx = split_info.feature_idx, split_info.bin_idx\n node['feature_idx'] = feature_idx\n node['bin_threshold'] = bin_idx\n node['missing_go_to_left'] = split_info.missing_go_to_left\n node['is_categorical'] = split_info.is_categorical\n\n if split_info.bin_idx == n_bins_non_missing[feature_idx] - 1:\n # Split is on the last non-missing bin: it's a \"split on nans\".\n # All nans go to the right, the rest go to the left.\n # Note: for categorical splits, bin_idx is 0 and we rely on the bitset\n node['num_threshold'] = np.inf\n elif split_info.is_categorical:\n categories = binning_thresholds[feature_idx]\n node['bitset_idx'] = next_free_bitset_idx\n binned_left_cat_bitsets[next_free_bitset_idx] = (\n split_info.left_cat_bitset)\n set_raw_bitset_from_binned_bitset(\n raw_left_cat_bitsets[next_free_bitset_idx],\n split_info.left_cat_bitset, categories\n )\n next_free_bitset_idx += 1\n else:\n node['num_threshold'] = binning_thresholds[feature_idx][bin_idx]\n\n next_free_node_idx += 1\n\n node['left'] = next_free_node_idx\n next_free_node_idx, next_free_bitset_idx = _fill_predictor_arrays(\n predictor_nodes, binned_left_cat_bitsets, raw_left_cat_bitsets,\n grower_node.left_child, binning_thresholds=binning_thresholds,\n n_bins_non_missing=n_bins_non_missing,\n next_free_node_idx=next_free_node_idx,\n next_free_bitset_idx=next_free_bitset_idx)\n\n node['right'] = next_free_node_idx\n return _fill_predictor_arrays(\n predictor_nodes, binned_left_cat_bitsets, raw_left_cat_bitsets,\n grower_node.right_child, binning_thresholds=binning_thresholds,\n n_bins_non_missing=n_bins_non_missing,\n next_free_node_idx=next_free_node_idx,\n next_free_bitset_idx=next_free_bitset_idx)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/__call__", + "name": "__call__", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/__call__/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/__call__/y_true", + "name": "y_true", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.__call__.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/__call__/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.__call__.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/__call__/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.__call__.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the weighted average loss", + "docstring": "Return the weighted average loss", + "code": " def __call__(self, y_true, raw_predictions, sample_weight):\n \"\"\"Return the weighted average loss\"\"\"\n return np.average(self.pointwise_loss(y_true, raw_predictions),\n weights=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/__init__/hessians_are_constant", + "name": "hessians_are_constant", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.__init__.hessians_are_constant", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base class for a loss.", + "docstring": "", + "code": " def __init__(self, hessians_are_constant):\n self.hessians_are_constant = hessians_are_constant" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/get_baseline_prediction", + "name": "get_baseline_prediction", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.get_baseline_prediction", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/get_baseline_prediction/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.get_baseline_prediction.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/get_baseline_prediction/y_train", + "name": "y_train", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.get_baseline_prediction.y_train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (n_samples,)", + "default_value": "", + "description": "The target training values." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (n_samples,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/get_baseline_prediction/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.get_baseline_prediction.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape(n_samples,) default=None", + "default_value": "", + "description": "Weights of training data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape(n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/get_baseline_prediction/prediction_dim", + "name": "prediction_dim", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.get_baseline_prediction.prediction_dim", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The dimension of one prediction: 1 for binary classification and\nregression, n_classes for multiclass classification." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return initial predictions (before the first iteration).", + "docstring": "Return initial predictions (before the first iteration).\n\nParameters\n----------\ny_train : ndarray, shape (n_samples,)\n The target training values.\n\nsample_weight : array-like of shape(n_samples,) default=None\n Weights of training data.\n\nprediction_dim : int\n The dimension of one prediction: 1 for binary classification and\n regression, n_classes for multiclass classification.\n\nReturns\n-------\nbaseline_prediction : float or ndarray, shape (1, prediction_dim)\n The baseline prediction.", + "code": " @abstractmethod\n def get_baseline_prediction(self, y_train, sample_weight, prediction_dim):\n \"\"\"Return initial predictions (before the first iteration).\n\n Parameters\n ----------\n y_train : ndarray, shape (n_samples,)\n The target training values.\n\n sample_weight : array-like of shape(n_samples,) default=None\n Weights of training data.\n\n prediction_dim : int\n The dimension of one prediction: 1 for binary classification and\n regression, n_classes for multiclass classification.\n\n Returns\n -------\n baseline_prediction : float or ndarray, shape (1, prediction_dim)\n The baseline prediction.\n \"\"\"" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/init_gradients_and_hessians", + "name": "init_gradients_and_hessians", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.init_gradients_and_hessians", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/init_gradients_and_hessians/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.init_gradients_and_hessians.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/init_gradients_and_hessians/n_samples", + "name": "n_samples", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.init_gradients_and_hessians.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The number of samples passed to `fit()`." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/init_gradients_and_hessians/prediction_dim", + "name": "prediction_dim", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.init_gradients_and_hessians.prediction_dim", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The dimension of a raw prediction, i.e. the number of trees\nbuilt at each iteration. Equals 1 for regression and binary\nclassification, or K where K is the number of classes for\nmulticlass classification." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/init_gradients_and_hessians/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.init_gradients_and_hessians.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape(n_samples,) default=None", + "default_value": "", + "description": "Weights of training data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape(n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return initial gradients and hessians.\n\nUnless hessians are constant, arrays are initialized with undefined\nvalues.", + "docstring": "Return initial gradients and hessians.\n\nUnless hessians are constant, arrays are initialized with undefined\nvalues.\n\nParameters\n----------\nn_samples : int\n The number of samples passed to `fit()`.\n\nprediction_dim : int\n The dimension of a raw prediction, i.e. the number of trees\n built at each iteration. Equals 1 for regression and binary\n classification, or K where K is the number of classes for\n multiclass classification.\n\nsample_weight : array-like of shape(n_samples,) default=None\n Weights of training data.\n\nReturns\n-------\ngradients : ndarray, shape (prediction_dim, n_samples)\n The initial gradients. The array is not initialized.\nhessians : ndarray, shape (prediction_dim, n_samples)\n If hessians are constant (e.g. for `LeastSquares` loss, the\n array is initialized to ``1``. Otherwise, the array is allocated\n without being initialized.", + "code": " def init_gradients_and_hessians(self, n_samples, prediction_dim,\n sample_weight):\n \"\"\"Return initial gradients and hessians.\n\n Unless hessians are constant, arrays are initialized with undefined\n values.\n\n Parameters\n ----------\n n_samples : int\n The number of samples passed to `fit()`.\n\n prediction_dim : int\n The dimension of a raw prediction, i.e. the number of trees\n built at each iteration. Equals 1 for regression and binary\n classification, or K where K is the number of classes for\n multiclass classification.\n\n sample_weight : array-like of shape(n_samples,) default=None\n Weights of training data.\n\n Returns\n -------\n gradients : ndarray, shape (prediction_dim, n_samples)\n The initial gradients. The array is not initialized.\n hessians : ndarray, shape (prediction_dim, n_samples)\n If hessians are constant (e.g. for `LeastSquares` loss, the\n array is initialized to ``1``. Otherwise, the array is allocated\n without being initialized.\n \"\"\"\n shape = (prediction_dim, n_samples)\n gradients = np.empty(shape=shape, dtype=G_H_DTYPE)\n\n if self.hessians_are_constant:\n # If the hessians are constant, we consider they are equal to 1.\n # - This is correct for the half LS loss\n # - For LAD loss, hessians are actually 0, but they are always\n # ignored anyway.\n hessians = np.ones(shape=(1, 1), dtype=G_H_DTYPE)\n else:\n hessians = np.empty(shape=shape, dtype=G_H_DTYPE)\n\n return gradients, hessians" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/pointwise_loss", + "name": "pointwise_loss", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.pointwise_loss", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/pointwise_loss/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.pointwise_loss.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/pointwise_loss/y_true", + "name": "y_true", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.pointwise_loss.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/pointwise_loss/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.pointwise_loss.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return loss value for each input", + "docstring": "Return loss value for each input", + "code": " @abstractmethod\n def pointwise_loss(self, y_true, raw_predictions):\n \"\"\"Return loss value for each input\"\"\"" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/update_gradients_and_hessians", + "name": "update_gradients_and_hessians", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.update_gradients_and_hessians", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/update_gradients_and_hessians/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.update_gradients_and_hessians.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/update_gradients_and_hessians/gradients", + "name": "gradients", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.update_gradients_and_hessians.gradients", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (prediction_dim, n_samples)", + "default_value": "", + "description": "The gradients (treated as OUT array)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (prediction_dim, n_samples)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/update_gradients_and_hessians/hessians", + "name": "hessians", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.update_gradients_and_hessians.hessians", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (prediction_dim, n_samples) or (1,)", + "default_value": "", + "description": "The hessians (treated as OUT array)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (prediction_dim, n_samples) or (1,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/update_gradients_and_hessians/y_true", + "name": "y_true", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.update_gradients_and_hessians.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (n_samples,)", + "default_value": "", + "description": "The true target values or each training sample." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (n_samples,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/update_gradients_and_hessians/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.update_gradients_and_hessians.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (prediction_dim, n_samples)", + "default_value": "", + "description": "The raw_predictions (i.e. values from the trees) of the tree\nensemble at iteration ``i - 1``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (prediction_dim, n_samples)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BaseLoss/update_gradients_and_hessians/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BaseLoss.update_gradients_and_hessians.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape(n_samples,) default=None", + "default_value": "", + "description": "Weights of training data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape(n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Update gradients and hessians arrays, inplace.\n\nThe gradients (resp. hessians) are the first (resp. second) order\nderivatives of the loss for each sample with respect to the\npredictions of model, evaluated at iteration ``i - 1``.", + "docstring": "Update gradients and hessians arrays, inplace.\n\nThe gradients (resp. hessians) are the first (resp. second) order\nderivatives of the loss for each sample with respect to the\npredictions of model, evaluated at iteration ``i - 1``.\n\nParameters\n----------\ngradients : ndarray, shape (prediction_dim, n_samples)\n The gradients (treated as OUT array).\n\nhessians : ndarray, shape (prediction_dim, n_samples) or (1,)\n The hessians (treated as OUT array).\n\ny_true : ndarray, shape (n_samples,)\n The true target values or each training sample.\n\nraw_predictions : ndarray, shape (prediction_dim, n_samples)\n The raw_predictions (i.e. values from the trees) of the tree\n ensemble at iteration ``i - 1``.\n\nsample_weight : array-like of shape(n_samples,) default=None\n Weights of training data.", + "code": " @abstractmethod\n def update_gradients_and_hessians(self, gradients, hessians, y_true,\n raw_predictions, sample_weight):\n \"\"\"Update gradients and hessians arrays, inplace.\n\n The gradients (resp. hessians) are the first (resp. second) order\n derivatives of the loss for each sample with respect to the\n predictions of model, evaluated at iteration ``i - 1``.\n\n Parameters\n ----------\n gradients : ndarray, shape (prediction_dim, n_samples)\n The gradients (treated as OUT array).\n\n hessians : ndarray, shape (prediction_dim, n_samples) or \\\n (1,)\n The hessians (treated as OUT array).\n\n y_true : ndarray, shape (n_samples,)\n The true target values or each training sample.\n\n raw_predictions : ndarray, shape (prediction_dim, n_samples)\n The raw_predictions (i.e. values from the trees) of the tree\n ensemble at iteration ``i - 1``.\n\n sample_weight : array-like of shape(n_samples,) default=None\n Weights of training data.\n \"\"\"" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BinaryCrossEntropy.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BinaryCrossEntropy.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/__init__/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BinaryCrossEntropy.__init__.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Binary cross-entropy loss, for binary classification.\n\nFor a given sample x_i, the binary cross-entropy loss is defined as the\nnegative log-likelihood of the model which can be expressed as::\n\n loss(x_i) = log(1 + exp(raw_pred_i)) - y_true_i * raw_pred_i\n\nSee The Elements of Statistical Learning, by Hastie, Tibshirani, Friedman,\nsection 4.4.1 (about logistic regression).", + "docstring": "", + "code": " def __init__(self, sample_weight):\n super().__init__(hessians_are_constant=False)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/get_baseline_prediction", + "name": "get_baseline_prediction", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BinaryCrossEntropy.get_baseline_prediction", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/get_baseline_prediction/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BinaryCrossEntropy.get_baseline_prediction.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/get_baseline_prediction/y_train", + "name": "y_train", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BinaryCrossEntropy.get_baseline_prediction.y_train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/get_baseline_prediction/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BinaryCrossEntropy.get_baseline_prediction.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/get_baseline_prediction/prediction_dim", + "name": "prediction_dim", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BinaryCrossEntropy.get_baseline_prediction.prediction_dim", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def get_baseline_prediction(self, y_train, sample_weight, prediction_dim):\n if prediction_dim > 2:\n raise ValueError(\n \"loss='binary_crossentropy' is not defined for multiclass\"\n \" classification with n_classes=%d, use\"\n \" loss='categorical_crossentropy' instead\" % prediction_dim)\n proba_positive_class = np.average(y_train, weights=sample_weight)\n eps = np.finfo(y_train.dtype).eps\n proba_positive_class = np.clip(proba_positive_class, eps, 1 - eps)\n # log(x / 1 - x) is the anti function of sigmoid, or the link function\n # of the Binomial model.\n return np.log(proba_positive_class / (1 - proba_positive_class))" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/pointwise_loss", + "name": "pointwise_loss", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BinaryCrossEntropy.pointwise_loss", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/pointwise_loss/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BinaryCrossEntropy.pointwise_loss.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/pointwise_loss/y_true", + "name": "y_true", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BinaryCrossEntropy.pointwise_loss.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/pointwise_loss/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BinaryCrossEntropy.pointwise_loss.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def pointwise_loss(self, y_true, raw_predictions):\n # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to\n # return a view.\n raw_predictions = raw_predictions.reshape(-1)\n # logaddexp(0, x) = log(1 + exp(x))\n loss = np.logaddexp(0, raw_predictions) - y_true * raw_predictions\n return loss" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/predict_proba", + "name": "predict_proba", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BinaryCrossEntropy.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/predict_proba/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BinaryCrossEntropy.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/predict_proba/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BinaryCrossEntropy.predict_proba.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def predict_proba(self, raw_predictions):\n # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to\n # return a view.\n raw_predictions = raw_predictions.reshape(-1)\n proba = np.empty((raw_predictions.shape[0], 2), dtype=Y_DTYPE)\n proba[:, 1] = expit(raw_predictions)\n proba[:, 0] = 1 - proba[:, 1]\n return proba" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/update_gradients_and_hessians", + "name": "update_gradients_and_hessians", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BinaryCrossEntropy.update_gradients_and_hessians", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/update_gradients_and_hessians/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BinaryCrossEntropy.update_gradients_and_hessians.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/update_gradients_and_hessians/gradients", + "name": "gradients", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BinaryCrossEntropy.update_gradients_and_hessians.gradients", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/update_gradients_and_hessians/hessians", + "name": "hessians", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BinaryCrossEntropy.update_gradients_and_hessians.hessians", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/update_gradients_and_hessians/y_true", + "name": "y_true", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BinaryCrossEntropy.update_gradients_and_hessians.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/update_gradients_and_hessians/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BinaryCrossEntropy.update_gradients_and_hessians.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/BinaryCrossEntropy/update_gradients_and_hessians/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.BinaryCrossEntropy.update_gradients_and_hessians.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def update_gradients_and_hessians(self, gradients, hessians, y_true,\n raw_predictions, sample_weight):\n # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to\n # return a view.\n raw_predictions = raw_predictions.reshape(-1)\n gradients = gradients.reshape(-1)\n hessians = hessians.reshape(-1)\n _update_gradients_hessians_binary_crossentropy(\n gradients, hessians, y_true, raw_predictions, sample_weight)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.CategoricalCrossEntropy.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.CategoricalCrossEntropy.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/__init__/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.CategoricalCrossEntropy.__init__.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Categorical cross-entropy loss, for multiclass classification.\n\nFor a given sample x_i, the categorical cross-entropy loss is defined as\nthe negative log-likelihood of the model and generalizes the binary\ncross-entropy to more than 2 classes.", + "docstring": "", + "code": " def __init__(self, sample_weight):\n super().__init__(hessians_are_constant=False)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/get_baseline_prediction", + "name": "get_baseline_prediction", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.CategoricalCrossEntropy.get_baseline_prediction", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/get_baseline_prediction/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.CategoricalCrossEntropy.get_baseline_prediction.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/get_baseline_prediction/y_train", + "name": "y_train", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.CategoricalCrossEntropy.get_baseline_prediction.y_train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/get_baseline_prediction/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.CategoricalCrossEntropy.get_baseline_prediction.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/get_baseline_prediction/prediction_dim", + "name": "prediction_dim", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.CategoricalCrossEntropy.get_baseline_prediction.prediction_dim", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def get_baseline_prediction(self, y_train, sample_weight, prediction_dim):\n init_value = np.zeros(shape=(prediction_dim, 1), dtype=Y_DTYPE)\n eps = np.finfo(y_train.dtype).eps\n for k in range(prediction_dim):\n proba_kth_class = np.average(y_train == k,\n weights=sample_weight)\n proba_kth_class = np.clip(proba_kth_class, eps, 1 - eps)\n init_value[k, :] += np.log(proba_kth_class)\n\n return init_value" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/pointwise_loss", + "name": "pointwise_loss", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.CategoricalCrossEntropy.pointwise_loss", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/pointwise_loss/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.CategoricalCrossEntropy.pointwise_loss.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/pointwise_loss/y_true", + "name": "y_true", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.CategoricalCrossEntropy.pointwise_loss.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/pointwise_loss/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.CategoricalCrossEntropy.pointwise_loss.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def pointwise_loss(self, y_true, raw_predictions):\n one_hot_true = np.zeros_like(raw_predictions)\n prediction_dim = raw_predictions.shape[0]\n for k in range(prediction_dim):\n one_hot_true[k, :] = (y_true == k)\n\n loss = (logsumexp(raw_predictions, axis=0) -\n (one_hot_true * raw_predictions).sum(axis=0))\n return loss" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/predict_proba", + "name": "predict_proba", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.CategoricalCrossEntropy.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/predict_proba/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.CategoricalCrossEntropy.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/predict_proba/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.CategoricalCrossEntropy.predict_proba.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def predict_proba(self, raw_predictions):\n # TODO: This could be done in parallel\n # compute softmax (using exp(log(softmax)))\n proba = np.exp(raw_predictions -\n logsumexp(raw_predictions, axis=0)[np.newaxis, :])\n return proba.T" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/update_gradients_and_hessians", + "name": "update_gradients_and_hessians", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.CategoricalCrossEntropy.update_gradients_and_hessians", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/update_gradients_and_hessians/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.CategoricalCrossEntropy.update_gradients_and_hessians.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/update_gradients_and_hessians/gradients", + "name": "gradients", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.CategoricalCrossEntropy.update_gradients_and_hessians.gradients", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/update_gradients_and_hessians/hessians", + "name": "hessians", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.CategoricalCrossEntropy.update_gradients_and_hessians.hessians", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/update_gradients_and_hessians/y_true", + "name": "y_true", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.CategoricalCrossEntropy.update_gradients_and_hessians.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/update_gradients_and_hessians/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.CategoricalCrossEntropy.update_gradients_and_hessians.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/CategoricalCrossEntropy/update_gradients_and_hessians/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.CategoricalCrossEntropy.update_gradients_and_hessians.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def update_gradients_and_hessians(self, gradients, hessians, y_true,\n raw_predictions, sample_weight):\n _update_gradients_hessians_categorical_crossentropy(\n gradients, hessians, y_true, raw_predictions, sample_weight)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/__init__/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.__init__.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Least absolute deviation, for regression.\n\nFor a given sample x_i, the loss is defined as::\n\n loss(x_i) = |y_true_i - raw_pred_i|", + "docstring": "", + "code": " def __init__(self, sample_weight):\n # If sample weights are provided, the hessians and gradients\n # are multiplied by sample_weight, which means the hessians are\n # equal to sample weights.\n super().__init__(hessians_are_constant=sample_weight is None)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/get_baseline_prediction", + "name": "get_baseline_prediction", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.get_baseline_prediction", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/get_baseline_prediction/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.get_baseline_prediction.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/get_baseline_prediction/y_train", + "name": "y_train", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.get_baseline_prediction.y_train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/get_baseline_prediction/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.get_baseline_prediction.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/get_baseline_prediction/prediction_dim", + "name": "prediction_dim", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.get_baseline_prediction.prediction_dim", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def get_baseline_prediction(self, y_train, sample_weight, prediction_dim):\n if sample_weight is None:\n return np.median(y_train)\n else:\n return _weighted_percentile(y_train, sample_weight, 50)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/inverse_link_function", + "name": "inverse_link_function", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.inverse_link_function", + "decorators": ["staticmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/inverse_link_function/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.inverse_link_function.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @staticmethod\n def inverse_link_function(raw_predictions):\n return raw_predictions" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/pointwise_loss", + "name": "pointwise_loss", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.pointwise_loss", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/pointwise_loss/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.pointwise_loss.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/pointwise_loss/y_true", + "name": "y_true", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.pointwise_loss.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/pointwise_loss/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.pointwise_loss.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def pointwise_loss(self, y_true, raw_predictions):\n # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to\n # return a view.\n raw_predictions = raw_predictions.reshape(-1)\n loss = np.abs(y_true - raw_predictions)\n return loss" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/update_gradients_and_hessians", + "name": "update_gradients_and_hessians", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.update_gradients_and_hessians", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/update_gradients_and_hessians/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.update_gradients_and_hessians.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/update_gradients_and_hessians/gradients", + "name": "gradients", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.update_gradients_and_hessians.gradients", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/update_gradients_and_hessians/hessians", + "name": "hessians", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.update_gradients_and_hessians.hessians", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/update_gradients_and_hessians/y_true", + "name": "y_true", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.update_gradients_and_hessians.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/update_gradients_and_hessians/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.update_gradients_and_hessians.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/update_gradients_and_hessians/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.update_gradients_and_hessians.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def update_gradients_and_hessians(self, gradients, hessians, y_true,\n raw_predictions, sample_weight):\n # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to\n # return a view.\n raw_predictions = raw_predictions.reshape(-1)\n gradients = gradients.reshape(-1)\n if sample_weight is None:\n _update_gradients_least_absolute_deviation(gradients, y_true,\n raw_predictions)\n else:\n hessians = hessians.reshape(-1)\n _update_gradients_hessians_least_absolute_deviation(\n gradients, hessians, y_true, raw_predictions, sample_weight)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/update_leaves_values", + "name": "update_leaves_values", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.update_leaves_values", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/update_leaves_values/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.update_leaves_values.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/update_leaves_values/grower", + "name": "grower", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.update_leaves_values.grower", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/update_leaves_values/y_true", + "name": "y_true", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.update_leaves_values.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/update_leaves_values/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.update_leaves_values.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastAbsoluteDeviation/update_leaves_values/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastAbsoluteDeviation.update_leaves_values.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def update_leaves_values(self, grower, y_true, raw_predictions,\n sample_weight):\n # Update the values predicted by the tree with\n # median(y_true - raw_predictions).\n # See note about need_update_leaves_values in BaseLoss.\n\n # TODO: ideally this should be computed in parallel over the leaves\n # using something similar to _update_raw_predictions(), but this\n # requires a cython version of median()\n for leaf in grower.finalized_leaves:\n indices = leaf.sample_indices\n if sample_weight is None:\n median_res = np.median(y_true[indices]\n - raw_predictions[indices])\n else:\n median_res = _weighted_percentile(\n y_true[indices] - raw_predictions[indices],\n sample_weight=sample_weight[indices],\n percentile=50\n )\n leaf.value = grower.shrinkage * median_res" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastSquares.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastSquares.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/__init__/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastSquares.__init__.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Least squares loss, for regression.\n\nFor a given sample x_i, least squares loss is defined as::\n\n loss(x_i) = 0.5 * (y_true_i - raw_pred_i)**2\n\nThis actually computes the half least squares loss to simplify\nthe computation of the gradients and get a unit hessian (and be consistent\nwith what is done in LightGBM).", + "docstring": "", + "code": " def __init__(self, sample_weight):\n # If sample weights are provided, the hessians and gradients\n # are multiplied by sample_weight, which means the hessians are\n # equal to sample weights.\n super().__init__(hessians_are_constant=sample_weight is None)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/get_baseline_prediction", + "name": "get_baseline_prediction", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastSquares.get_baseline_prediction", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/get_baseline_prediction/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastSquares.get_baseline_prediction.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/get_baseline_prediction/y_train", + "name": "y_train", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastSquares.get_baseline_prediction.y_train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/get_baseline_prediction/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastSquares.get_baseline_prediction.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/get_baseline_prediction/prediction_dim", + "name": "prediction_dim", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastSquares.get_baseline_prediction.prediction_dim", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def get_baseline_prediction(self, y_train, sample_weight, prediction_dim):\n return np.average(y_train, weights=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/inverse_link_function", + "name": "inverse_link_function", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastSquares.inverse_link_function", + "decorators": ["staticmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/inverse_link_function/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastSquares.inverse_link_function.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @staticmethod\n def inverse_link_function(raw_predictions):\n return raw_predictions" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/pointwise_loss", + "name": "pointwise_loss", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastSquares.pointwise_loss", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/pointwise_loss/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastSquares.pointwise_loss.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/pointwise_loss/y_true", + "name": "y_true", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastSquares.pointwise_loss.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/pointwise_loss/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastSquares.pointwise_loss.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def pointwise_loss(self, y_true, raw_predictions):\n # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to\n # return a view.\n raw_predictions = raw_predictions.reshape(-1)\n loss = 0.5 * np.power(y_true - raw_predictions, 2)\n return loss" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/update_gradients_and_hessians", + "name": "update_gradients_and_hessians", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastSquares.update_gradients_and_hessians", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/update_gradients_and_hessians/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastSquares.update_gradients_and_hessians.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/update_gradients_and_hessians/gradients", + "name": "gradients", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastSquares.update_gradients_and_hessians.gradients", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/update_gradients_and_hessians/hessians", + "name": "hessians", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastSquares.update_gradients_and_hessians.hessians", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/update_gradients_and_hessians/y_true", + "name": "y_true", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastSquares.update_gradients_and_hessians.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/update_gradients_and_hessians/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastSquares.update_gradients_and_hessians.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/LeastSquares/update_gradients_and_hessians/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.LeastSquares.update_gradients_and_hessians.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def update_gradients_and_hessians(self, gradients, hessians, y_true,\n raw_predictions, sample_weight):\n # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to\n # return a view.\n raw_predictions = raw_predictions.reshape(-1)\n gradients = gradients.reshape(-1)\n if sample_weight is None:\n _update_gradients_least_squares(gradients, y_true, raw_predictions)\n else:\n hessians = hessians.reshape(-1)\n _update_gradients_hessians_least_squares(gradients, hessians,\n y_true, raw_predictions,\n sample_weight)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.Poisson.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.Poisson.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson/__init__/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.Poisson.__init__.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Poisson deviance loss with log-link, for regression.\n\nFor a given sample x_i, Poisson deviance loss is defined as::\n\n loss(x_i) = y_true_i * log(y_true_i/exp(raw_pred_i))\n - y_true_i + exp(raw_pred_i))\n\nThis actually computes half the Poisson deviance to simplify\nthe computation of the gradients.", + "docstring": "", + "code": " def __init__(self, sample_weight):\n super().__init__(hessians_are_constant=False)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson/get_baseline_prediction", + "name": "get_baseline_prediction", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.Poisson.get_baseline_prediction", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson/get_baseline_prediction/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.Poisson.get_baseline_prediction.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson/get_baseline_prediction/y_train", + "name": "y_train", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.Poisson.get_baseline_prediction.y_train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson/get_baseline_prediction/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.Poisson.get_baseline_prediction.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson/get_baseline_prediction/prediction_dim", + "name": "prediction_dim", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.Poisson.get_baseline_prediction.prediction_dim", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def get_baseline_prediction(self, y_train, sample_weight, prediction_dim):\n y_pred = np.average(y_train, weights=sample_weight)\n eps = np.finfo(y_train.dtype).eps\n y_pred = np.clip(y_pred, eps, None)\n return np.log(y_pred)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson/pointwise_loss", + "name": "pointwise_loss", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.Poisson.pointwise_loss", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson/pointwise_loss/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.Poisson.pointwise_loss.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson/pointwise_loss/y_true", + "name": "y_true", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.Poisson.pointwise_loss.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson/pointwise_loss/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.Poisson.pointwise_loss.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def pointwise_loss(self, y_true, raw_predictions):\n # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to\n # return a view.\n raw_predictions = raw_predictions.reshape(-1)\n # TODO: For speed, we could remove the constant xlogy(y_true, y_true)\n # Advantage of this form: minimum of zero at raw_predictions = y_true.\n loss = (xlogy(y_true, y_true) - y_true * (raw_predictions + 1)\n + np.exp(raw_predictions))\n return loss" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson/update_gradients_and_hessians", + "name": "update_gradients_and_hessians", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.Poisson.update_gradients_and_hessians", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson/update_gradients_and_hessians/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.Poisson.update_gradients_and_hessians.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson/update_gradients_and_hessians/gradients", + "name": "gradients", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.Poisson.update_gradients_and_hessians.gradients", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson/update_gradients_and_hessians/hessians", + "name": "hessians", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.Poisson.update_gradients_and_hessians.hessians", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson/update_gradients_and_hessians/y_true", + "name": "y_true", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.Poisson.update_gradients_and_hessians.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson/update_gradients_and_hessians/raw_predictions", + "name": "raw_predictions", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.Poisson.update_gradients_and_hessians.raw_predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.loss/Poisson/update_gradients_and_hessians/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._hist_gradient_boosting.loss.Poisson.update_gradients_and_hessians.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def update_gradients_and_hessians(self, gradients, hessians, y_true,\n raw_predictions, sample_weight):\n # shape (1, n_samples) --> (n_samples,). reshape(-1) is more likely to\n # return a view.\n raw_predictions = raw_predictions.reshape(-1)\n gradients = gradients.reshape(-1)\n hessians = hessians.reshape(-1)\n _update_gradients_hessians_poisson(gradients, hessians,\n y_true, raw_predictions,\n sample_weight)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/__init__/nodes", + "name": "nodes", + "qname": "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor.__init__.nodes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of PREDICTOR_RECORD_DTYPE", + "default_value": "", + "description": "The nodes of the tree." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of PREDICTOR_RECORD_DTYPE" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/__init__/binned_left_cat_bitsets", + "name": "binned_left_cat_bitsets", + "qname": "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor.__init__.binned_left_cat_bitsets", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_categorical_splits, 8), dtype=uint32", + "default_value": "", + "description": "Array of bitsets for binned categories used in predict_binned when a\nsplit is categorical." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray of shape (n_categorical_splits, 8)" + }, + { + "kind": "NamedType", + "name": "dtype=uint32" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/__init__/raw_left_cat_bitsets", + "name": "raw_left_cat_bitsets", + "qname": "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor.__init__.raw_left_cat_bitsets", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_categorical_splits, 8), dtype=uint32", + "default_value": "", + "description": "Array of bitsets for raw categories used in predict when a split is\ncategorical." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray of shape (n_categorical_splits, 8)" + }, + { + "kind": "NamedType", + "name": "dtype=uint32" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Tree class used for predictions.", + "docstring": "", + "code": " def __init__(self, nodes, binned_left_cat_bitsets,\n raw_left_cat_bitsets):\n self.nodes = nodes\n self.binned_left_cat_bitsets = binned_left_cat_bitsets\n self.raw_left_cat_bitsets = raw_left_cat_bitsets" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/compute_partial_dependence", + "name": "compute_partial_dependence", + "qname": "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor.compute_partial_dependence", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/compute_partial_dependence/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor.compute_partial_dependence.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/compute_partial_dependence/grid", + "name": "grid", + "qname": "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor.compute_partial_dependence.grid", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (n_samples, n_target_features)", + "default_value": "", + "description": "The grid points on which the partial dependence should be\nevaluated." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_target_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/compute_partial_dependence/target_features", + "name": "target_features", + "qname": "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor.compute_partial_dependence.target_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (n_target_features)", + "default_value": "", + "description": "The set of target features for which the partial dependence\nshould be evaluated." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (n_target_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/compute_partial_dependence/out", + "name": "out", + "qname": "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor.compute_partial_dependence.out", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (n_samples)", + "default_value": "", + "description": "The value of the partial dependence function on each grid\npoint." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (n_samples)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fast partial dependence computation.", + "docstring": "Fast partial dependence computation.\n\nParameters\n----------\ngrid : ndarray, shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\ntarget_features : ndarray, shape (n_target_features)\n The set of target features for which the partial dependence\n should be evaluated.\nout : ndarray, shape (n_samples)\n The value of the partial dependence function on each grid\n point.", + "code": " def compute_partial_dependence(self, grid, target_features, out):\n \"\"\"Fast partial dependence computation.\n\n Parameters\n ----------\n grid : ndarray, shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\n target_features : ndarray, shape (n_target_features)\n The set of target features for which the partial dependence\n should be evaluated.\n out : ndarray, shape (n_samples)\n The value of the partial dependence function on each grid\n point.\n \"\"\"\n _compute_partial_dependence(self.nodes, grid, target_features, out)" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/get_max_depth", + "name": "get_max_depth", + "qname": "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor.get_max_depth", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/get_max_depth/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor.get_max_depth.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return maximum depth among all leaves.", + "docstring": "Return maximum depth among all leaves.", + "code": " def get_max_depth(self):\n \"\"\"Return maximum depth among all leaves.\"\"\"\n return int(self.nodes['depth'].max())" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/get_n_leaf_nodes", + "name": "get_n_leaf_nodes", + "qname": "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor.get_n_leaf_nodes", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/get_n_leaf_nodes/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor.get_n_leaf_nodes.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return number of leaves.", + "docstring": "Return number of leaves.", + "code": " def get_n_leaf_nodes(self):\n \"\"\"Return number of leaves.\"\"\"\n return int(self.nodes['is_leaf'].sum())" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/predict", + "name": "predict", + "qname": "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/predict/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/predict/X", + "name": "X", + "qname": "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/predict/known_cat_bitsets", + "name": "known_cat_bitsets", + "qname": "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor.predict.known_cat_bitsets", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_categorical_features, 8)", + "default_value": "", + "description": "Array of bitsets of known categories, for each categorical feature." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_categorical_features, 8)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/predict/f_idx_map", + "name": "f_idx_map", + "qname": "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor.predict.f_idx_map", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features,)", + "default_value": "", + "description": "Map from original feature index to the corresponding index in the\nknown_cat_bitsets array." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict raw values for non-binned data.", + "docstring": "Predict raw values for non-binned data.\n\nParameters\n----------\nX : ndarray, shape (n_samples, n_features)\n The input samples.\n\nknown_cat_bitsets : ndarray of shape (n_categorical_features, 8)\n Array of bitsets of known categories, for each categorical feature.\n\nf_idx_map : ndarray of shape (n_features,)\n Map from original feature index to the corresponding index in the\n known_cat_bitsets array.\n\nReturns\n-------\ny : ndarray, shape (n_samples,)\n The raw predicted values.", + "code": " def predict(self, X, known_cat_bitsets, f_idx_map):\n \"\"\"Predict raw values for non-binned data.\n\n Parameters\n ----------\n X : ndarray, shape (n_samples, n_features)\n The input samples.\n\n known_cat_bitsets : ndarray of shape (n_categorical_features, 8)\n Array of bitsets of known categories, for each categorical feature.\n\n f_idx_map : ndarray of shape (n_features,)\n Map from original feature index to the corresponding index in the\n known_cat_bitsets array.\n\n Returns\n -------\n y : ndarray, shape (n_samples,)\n The raw predicted values.\n \"\"\"\n out = np.empty(X.shape[0], dtype=Y_DTYPE)\n _predict_from_raw_data(self.nodes, X, self.raw_left_cat_bitsets,\n known_cat_bitsets, f_idx_map, out)\n return out" + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/predict_binned", + "name": "predict_binned", + "qname": "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor.predict_binned", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/predict_binned/self", + "name": "self", + "qname": "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor.predict_binned.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/predict_binned/X", + "name": "X", + "qname": "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor.predict_binned.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._hist_gradient_boosting.predictor/TreePredictor/predict_binned/missing_values_bin_idx", + "name": "missing_values_bin_idx", + "qname": "sklearn.ensemble._hist_gradient_boosting.predictor.TreePredictor.predict_binned.missing_values_bin_idx", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "uint8", + "default_value": "", + "description": "Index of the bin that is used for missing values. This is the\nindex of the last bin and is always equal to max_bins (as passed\nto the GBDT classes), or equivalently to n_bins - 1." + }, + "type": { + "kind": "NamedType", + "name": "uint8" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict raw values for binned data.", + "docstring": "Predict raw values for binned data.\n\nParameters\n----------\nX : ndarray, shape (n_samples, n_features)\n The input samples.\nmissing_values_bin_idx : uint8\n Index of the bin that is used for missing values. This is the\n index of the last bin and is always equal to max_bins (as passed\n to the GBDT classes), or equivalently to n_bins - 1.\n\nReturns\n-------\ny : ndarray, shape (n_samples,)\n The raw predicted values.", + "code": " def predict_binned(self, X, missing_values_bin_idx):\n \"\"\"Predict raw values for binned data.\n\n Parameters\n ----------\n X : ndarray, shape (n_samples, n_features)\n The input samples.\n missing_values_bin_idx : uint8\n Index of the bin that is used for missing values. This is the\n index of the last bin and is always equal to max_bins (as passed\n to the GBDT classes), or equivalently to n_bins - 1.\n\n Returns\n -------\n y : ndarray, shape (n_samples,)\n The raw predicted values.\n \"\"\"\n out = np.empty(X.shape[0], dtype=Y_DTYPE)\n _predict_from_binned_data(self.nodes, X,\n self.binned_left_cat_bitsets,\n missing_values_bin_idx, out)\n return out" + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._iforest.IsolationForest.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._iforest.IsolationForest.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/__init__/n_estimators", + "name": "n_estimators", + "qname": "sklearn.ensemble._iforest.IsolationForest.__init__.n_estimators", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The number of base estimators in the ensemble." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/__init__/max_samples", + "name": "max_samples", + "qname": "sklearn.ensemble._iforest.IsolationForest.__init__.max_samples", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "\"auto\", int or float", + "default_value": "\"auto\"", + "description": "The number of samples to draw from X to train each base estimator.\n - If int, then draw `max_samples` samples.\n - If float, then draw `max_samples * X.shape[0]` samples.\n - If \"auto\", then `max_samples=min(256, n_samples)`.\n\nIf max_samples is larger than the number of samples provided,\nall samples will be used for all trees (no sampling)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "\"auto\"" + }, + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/__init__/contamination", + "name": "contamination", + "qname": "sklearn.ensemble._iforest.IsolationForest.__init__.contamination", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'auto' or float", + "default_value": "'auto'", + "description": "The amount of contamination of the data set, i.e. the proportion\nof outliers in the data set. Used when fitting to define the threshold\non the scores of the samples.\n\n - If 'auto', the threshold is determined as in the\n original paper.\n - If float, the contamination should be in the range [0, 0.5].\n\n.. versionchanged:: 0.22\n The default value of ``contamination`` changed from 0.1\n to ``'auto'``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "BoundaryType", + "base_type": "float", + "min": 0.0, + "max": 0.5, + "min_inclusive": true, + "max_inclusive": true + }, + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/__init__/max_features", + "name": "max_features", + "qname": "sklearn.ensemble._iforest.IsolationForest.__init__.max_features", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "1.0", + "description": "The number of features to draw from X to train each base estimator.\n\n - If int, then draw `max_features` features.\n - If float, then draw `max_features * X.shape[1]` features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/__init__/bootstrap", + "name": "bootstrap", + "qname": "sklearn.ensemble._iforest.IsolationForest.__init__.bootstrap", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, individual trees are fit on random subsets of the training\ndata sampled with replacement. If False, sampling without replacement\nis performed." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.ensemble._iforest.IsolationForest.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to run in parallel for both :meth:`fit` and\n:meth:`predict`. ``None`` means 1 unless in a\n:obj:`joblib.parallel_backend` context. ``-1`` means using all\nprocessors. See :term:`Glossary ` for more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/__init__/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._iforest.IsolationForest.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the pseudo-randomness of the selection of the feature\nand split values for each branching step and each tree in the forest.\n\nPass an int for reproducible results across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/__init__/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._iforest.IsolationForest.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Controls the verbosity of the tree building process." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.ensemble._iforest.IsolationForest.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, reuse the solution of the previous call to fit\nand add more estimators to the ensemble, otherwise, just fit a whole\nnew forest. See :term:`the Glossary `.\n\n.. versionadded:: 0.21" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Isolation Forest Algorithm.\n\nReturn the anomaly score of each sample using the IsolationForest algorithm\n\nThe IsolationForest 'isolates' observations by randomly selecting a feature\nand then randomly selecting a split value between the maximum and minimum\nvalues of the selected feature.\n\nSince recursive partitioning can be represented by a tree structure, the\nnumber of splittings required to isolate a sample is equivalent to the path\nlength from the root node to the terminating node.\n\nThis path length, averaged over a forest of such random trees, is a\nmeasure of normality and our decision function.\n\nRandom partitioning produces noticeably shorter paths for anomalies.\nHence, when a forest of random trees collectively produce shorter path\nlengths for particular samples, they are highly likely to be anomalies.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *,\n n_estimators=100,\n max_samples=\"auto\",\n contamination=\"auto\",\n max_features=1.,\n bootstrap=False,\n n_jobs=None,\n random_state=None,\n verbose=0,\n warm_start=False):\n super().__init__(\n base_estimator=ExtraTreeRegressor(\n max_features=1,\n splitter='random',\n random_state=random_state),\n # here above max_features has no links with self.max_features\n bootstrap=bootstrap,\n bootstrap_features=False,\n n_estimators=n_estimators,\n max_samples=max_samples,\n max_features=max_features,\n warm_start=warm_start,\n n_jobs=n_jobs,\n random_state=random_state,\n verbose=verbose)\n\n self.contamination = contamination" + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/_compute_chunked_score_samples", + "name": "_compute_chunked_score_samples", + "qname": "sklearn.ensemble._iforest.IsolationForest._compute_chunked_score_samples", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/_compute_chunked_score_samples/self", + "name": "self", + "qname": "sklearn.ensemble._iforest.IsolationForest._compute_chunked_score_samples.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/_compute_chunked_score_samples/X", + "name": "X", + "qname": "sklearn.ensemble._iforest.IsolationForest._compute_chunked_score_samples.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _compute_chunked_score_samples(self, X):\n\n n_samples = _num_samples(X)\n\n if self._max_features == X.shape[1]:\n subsample_features = False\n else:\n subsample_features = True\n\n # We get as many rows as possible within our working_memory budget\n # (defined by sklearn.get_config()['working_memory']) to store\n # self._max_features in each row during computation.\n #\n # Note:\n # - this will get at least 1 row, even if 1 row of score will\n # exceed working_memory.\n # - this does only account for temporary memory usage while loading\n # the data needed to compute the scores -- the returned scores\n # themselves are 1D.\n\n chunk_n_rows = get_chunk_n_rows(row_bytes=16 * self._max_features,\n max_n_rows=n_samples)\n slices = gen_batches(n_samples, chunk_n_rows)\n\n scores = np.zeros(n_samples, order=\"f\")\n\n for sl in slices:\n # compute score on the slices of test samples:\n scores[sl] = self._compute_score_samples(X[sl], subsample_features)\n\n return scores" + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/_compute_score_samples", + "name": "_compute_score_samples", + "qname": "sklearn.ensemble._iforest.IsolationForest._compute_score_samples", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/_compute_score_samples/self", + "name": "self", + "qname": "sklearn.ensemble._iforest.IsolationForest._compute_score_samples.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/_compute_score_samples/X", + "name": "X", + "qname": "sklearn.ensemble._iforest.IsolationForest._compute_score_samples.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like or sparse matrix", + "default_value": "", + "description": "Data matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "sparse matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/_compute_score_samples/subsample_features", + "name": "subsample_features", + "qname": "sklearn.ensemble._iforest.IsolationForest._compute_score_samples.subsample_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "Whether features should be subsampled." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the score of each samples in X going through the extra trees.", + "docstring": "Compute the score of each samples in X going through the extra trees.\n\nParameters\n----------\nX : array-like or sparse matrix\n Data matrix.\n\nsubsample_features : bool\n Whether features should be subsampled.", + "code": " def _compute_score_samples(self, X, subsample_features):\n \"\"\"\n Compute the score of each samples in X going through the extra trees.\n\n Parameters\n ----------\n X : array-like or sparse matrix\n Data matrix.\n\n subsample_features : bool\n Whether features should be subsampled.\n \"\"\"\n n_samples = X.shape[0]\n\n depths = np.zeros(n_samples, order=\"f\")\n\n for tree, features in zip(self.estimators_, self.estimators_features_):\n X_subset = X[:, features] if subsample_features else X\n\n leaves_index = tree.apply(X_subset)\n node_indicator = tree.decision_path(X_subset)\n n_samples_leaf = tree.tree_.n_node_samples[leaves_index]\n\n depths += (\n np.ravel(node_indicator.sum(axis=1))\n + _average_path_length(n_samples_leaf)\n - 1.0\n )\n\n scores = 2 ** (\n -depths\n / (len(self.estimators_)\n * _average_path_length([self.max_samples_]))\n )\n return scores" + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/_more_tags", + "name": "_more_tags", + "qname": "sklearn.ensemble._iforest.IsolationForest._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/_more_tags/self", + "name": "self", + "qname": "sklearn.ensemble._iforest.IsolationForest._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }" + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/_parallel_args", + "name": "_parallel_args", + "qname": "sklearn.ensemble._iforest.IsolationForest._parallel_args", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/_parallel_args/self", + "name": "self", + "qname": "sklearn.ensemble._iforest.IsolationForest._parallel_args.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _parallel_args(self):\n # ExtraTreeRegressor releases the GIL, so it's more efficient to use\n # a thread-based backend rather than a process-based backend so as\n # to avoid suffering from communication overhead and extra memory\n # copies.\n return _joblib_parallel_args(prefer='threads')" + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/_set_oob_score", + "name": "_set_oob_score", + "qname": "sklearn.ensemble._iforest.IsolationForest._set_oob_score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/_set_oob_score/self", + "name": "self", + "qname": "sklearn.ensemble._iforest.IsolationForest._set_oob_score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/_set_oob_score/X", + "name": "X", + "qname": "sklearn.ensemble._iforest.IsolationForest._set_oob_score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/_set_oob_score/y", + "name": "y", + "qname": "sklearn.ensemble._iforest.IsolationForest._set_oob_score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _set_oob_score(self, X, y):\n raise NotImplementedError(\"OOB score not supported by iforest\")" + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/decision_function", + "name": "decision_function", + "qname": "sklearn.ensemble._iforest.IsolationForest.decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/decision_function/self", + "name": "self", + "qname": "sklearn.ensemble._iforest.IsolationForest.decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/decision_function/X", + "name": "X", + "qname": "sklearn.ensemble._iforest.IsolationForest.decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, it will be converted to\n``dtype=np.float32`` and if a sparse matrix is provided\nto a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Average anomaly score of X of the base classifiers.\n\nThe anomaly score of an input sample is computed as\nthe mean anomaly score of the trees in the forest.\n\nThe measure of normality of an observation given a tree is the depth\nof the leaf containing this observation, which is equivalent to\nthe number of splittings required to isolate this point. In case of\nseveral observations n_left in the leaf, the average path length of\na n_left samples isolation tree is added.", + "docstring": "Average anomaly score of X of the base classifiers.\n\nThe anomaly score of an input sample is computed as\nthe mean anomaly score of the trees in the forest.\n\nThe measure of normality of an observation given a tree is the depth\nof the leaf containing this observation, which is equivalent to\nthe number of splittings required to isolate this point. In case of\nseveral observations n_left in the leaf, the average path length of\na n_left samples isolation tree is added.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\nscores : ndarray of shape (n_samples,)\n The anomaly score of the input samples.\n The lower, the more abnormal. Negative scores represent outliers,\n positive scores represent inliers.", + "code": " def decision_function(self, X):\n \"\"\"\n Average anomaly score of X of the base classifiers.\n\n The anomaly score of an input sample is computed as\n the mean anomaly score of the trees in the forest.\n\n The measure of normality of an observation given a tree is the depth\n of the leaf containing this observation, which is equivalent to\n the number of splittings required to isolate this point. In case of\n several observations n_left in the leaf, the average path length of\n a n_left samples isolation tree is added.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n scores : ndarray of shape (n_samples,)\n The anomaly score of the input samples.\n The lower, the more abnormal. Negative scores represent outliers,\n positive scores represent inliers.\n \"\"\"\n # We subtract self.offset_ to make 0 be the threshold value for being\n # an outlier:\n\n return self.score_samples(X) - self.offset_" + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/fit", + "name": "fit", + "qname": "sklearn.ensemble._iforest.IsolationForest.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/fit/self", + "name": "self", + "qname": "sklearn.ensemble._iforest.IsolationForest.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/fit/X", + "name": "X", + "qname": "sklearn.ensemble._iforest.IsolationForest.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Use ``dtype=np.float32`` for maximum\nefficiency. Sparse matrices are also supported, use sparse\n``csc_matrix`` for maximum efficiency." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/fit/y", + "name": "y", + "qname": "sklearn.ensemble._iforest.IsolationForest.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._iforest.IsolationForest.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, then samples are equally weighted." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit estimator.", + "docstring": "Fit estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Use ``dtype=np.float32`` for maximum\n efficiency. Sparse matrices are also supported, use sparse\n ``csc_matrix`` for maximum efficiency.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\nReturns\n-------\nself : object\n Fitted estimator.", + "code": " def fit(self, X, y=None, sample_weight=None):\n \"\"\"\n Fit estimator.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Use ``dtype=np.float32`` for maximum\n efficiency. Sparse matrices are also supported, use sparse\n ``csc_matrix`` for maximum efficiency.\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\n Returns\n -------\n self : object\n Fitted estimator.\n \"\"\"\n X = check_array(X, accept_sparse=['csc'])\n if issparse(X):\n # Pre-sort indices to avoid that each individual tree of the\n # ensemble sorts the indices.\n X.sort_indices()\n\n rnd = check_random_state(self.random_state)\n y = rnd.uniform(size=X.shape[0])\n\n # ensure that max_sample is in [1, n_samples]:\n n_samples = X.shape[0]\n\n if isinstance(self.max_samples, str):\n if self.max_samples == 'auto':\n max_samples = min(256, n_samples)\n else:\n raise ValueError('max_samples (%s) is not supported.'\n 'Valid choices are: \"auto\", int or'\n 'float' % self.max_samples)\n\n elif isinstance(self.max_samples, numbers.Integral):\n if self.max_samples > n_samples:\n warn(\"max_samples (%s) is greater than the \"\n \"total number of samples (%s). max_samples \"\n \"will be set to n_samples for estimation.\"\n % (self.max_samples, n_samples))\n max_samples = n_samples\n else:\n max_samples = self.max_samples\n else: # float\n if not 0. < self.max_samples <= 1.:\n raise ValueError(\"max_samples must be in (0, 1], got %r\"\n % self.max_samples)\n max_samples = int(self.max_samples * X.shape[0])\n\n self.max_samples_ = max_samples\n max_depth = int(np.ceil(np.log2(max(max_samples, 2))))\n super()._fit(X, y, max_samples,\n max_depth=max_depth,\n sample_weight=sample_weight)\n\n if self.contamination == \"auto\":\n # 0.5 plays a special role as described in the original paper.\n # we take the opposite as we consider the opposite of their score.\n self.offset_ = -0.5\n return self\n\n # else, define offset_ wrt contamination parameter\n self.offset_ = np.percentile(self.score_samples(X),\n 100. * self.contamination)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/predict", + "name": "predict", + "qname": "sklearn.ensemble._iforest.IsolationForest.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/predict/self", + "name": "self", + "qname": "sklearn.ensemble._iforest.IsolationForest.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/predict/X", + "name": "X", + "qname": "sklearn.ensemble._iforest.IsolationForest.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, it will be converted to\n``dtype=np.float32`` and if a sparse matrix is provided\nto a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict if a particular sample is an outlier or not.", + "docstring": "Predict if a particular sample is an outlier or not.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n For each observation, tells whether or not (+1 or -1) it should\n be considered as an inlier according to the fitted model.", + "code": " def predict(self, X):\n \"\"\"\n Predict if a particular sample is an outlier or not.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n is_inlier : ndarray of shape (n_samples,)\n For each observation, tells whether or not (+1 or -1) it should\n be considered as an inlier according to the fitted model.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, accept_sparse='csr')\n is_inlier = np.ones(X.shape[0], dtype=int)\n is_inlier[self.decision_function(X) < 0] = -1\n return is_inlier" + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/score_samples", + "name": "score_samples", + "qname": "sklearn.ensemble._iforest.IsolationForest.score_samples", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/score_samples/self", + "name": "self", + "qname": "sklearn.ensemble._iforest.IsolationForest.score_samples.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/IsolationForest/score_samples/X", + "name": "X", + "qname": "sklearn.ensemble._iforest.IsolationForest.score_samples.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Opposite of the anomaly score defined in the original paper.\n\nThe anomaly score of an input sample is computed as\nthe mean anomaly score of the trees in the forest.\n\nThe measure of normality of an observation given a tree is the depth\nof the leaf containing this observation, which is equivalent to\nthe number of splittings required to isolate this point. In case of\nseveral observations n_left in the leaf, the average path length of\na n_left samples isolation tree is added.", + "docstring": "Opposite of the anomaly score defined in the original paper.\n\nThe anomaly score of an input sample is computed as\nthe mean anomaly score of the trees in the forest.\n\nThe measure of normality of an observation given a tree is the depth\nof the leaf containing this observation, which is equivalent to\nthe number of splittings required to isolate this point. In case of\nseveral observations n_left in the leaf, the average path length of\na n_left samples isolation tree is added.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\nscores : ndarray of shape (n_samples,)\n The anomaly score of the input samples.\n The lower, the more abnormal.", + "code": " def score_samples(self, X):\n \"\"\"\n Opposite of the anomaly score defined in the original paper.\n\n The anomaly score of an input sample is computed as\n the mean anomaly score of the trees in the forest.\n\n The measure of normality of an observation given a tree is the depth\n of the leaf containing this observation, which is equivalent to\n the number of splittings required to isolate this point. In case of\n several observations n_left in the leaf, the average path length of\n a n_left samples isolation tree is added.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\n Returns\n -------\n scores : ndarray of shape (n_samples,)\n The anomaly score of the input samples.\n The lower, the more abnormal.\n \"\"\"\n # code structure from ForestClassifier/predict_proba\n\n check_is_fitted(self)\n\n # Check data\n X = check_array(X, accept_sparse='csr')\n if self.n_features_ != X.shape[1]:\n raise ValueError(\"Number of features of the model must \"\n \"match the input. Model n_features is {0} and \"\n \"input n_features is {1}.\"\n \"\".format(self.n_features_, X.shape[1]))\n\n # Take the opposite of the scores as bigger is better (here less\n # abnormal)\n return -self._compute_chunked_score_samples(X)" + }, + { + "id": "scikit-learn/sklearn.ensemble._iforest/_average_path_length", + "name": "_average_path_length", + "qname": "sklearn.ensemble._iforest._average_path_length", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._iforest/_average_path_length/n_samples_leaf", + "name": "n_samples_leaf", + "qname": "sklearn.ensemble._iforest._average_path_length.n_samples_leaf", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "The average path length in a n_samples iTree, which is equal to\nthe average path length of an unsuccessful BST search since the\nlatter has the same structure as an isolation tree.\nParameters\n----------\nn_samples_leaf : array-like of shape (n_samples,)\n The number of training samples in each test sample leaf, for\n each estimators.", + "docstring": "The average path length in a n_samples iTree, which is equal to\nthe average path length of an unsuccessful BST search since the\nlatter has the same structure as an isolation tree.\nParameters\n----------\nn_samples_leaf : array-like of shape (n_samples,)\n The number of training samples in each test sample leaf, for\n each estimators.\n\nReturns\n-------\naverage_path_length : ndarray of shape (n_samples,)", + "code": "def _average_path_length(n_samples_leaf):\n \"\"\"\n The average path length in a n_samples iTree, which is equal to\n the average path length of an unsuccessful BST search since the\n latter has the same structure as an isolation tree.\n Parameters\n ----------\n n_samples_leaf : array-like of shape (n_samples,)\n The number of training samples in each test sample leaf, for\n each estimators.\n\n Returns\n -------\n average_path_length : ndarray of shape (n_samples,)\n \"\"\"\n\n n_samples_leaf = check_array(n_samples_leaf, ensure_2d=False)\n\n n_samples_leaf_shape = n_samples_leaf.shape\n n_samples_leaf = n_samples_leaf.reshape((1, -1))\n average_path_length = np.zeros(n_samples_leaf.shape)\n\n mask_1 = n_samples_leaf <= 1\n mask_2 = n_samples_leaf == 2\n not_mask = ~np.logical_or(mask_1, mask_2)\n\n average_path_length[mask_1] = 0.\n average_path_length[mask_2] = 1.\n average_path_length[not_mask] = (\n 2.0 * (np.log(n_samples_leaf[not_mask] - 1.0) + np.euler_gamma)\n - 2.0 * (n_samples_leaf[not_mask] - 1.0) / n_samples_leaf[not_mask]\n )\n\n return average_path_length.reshape(n_samples_leaf_shape)" + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._stacking.StackingClassifier.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._stacking.StackingClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/__init__/estimators", + "name": "estimators", + "qname": "sklearn.ensemble._stacking.StackingClassifier.__init__.estimators", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list of (str, estimator)", + "default_value": "", + "description": "Base estimators which will be stacked together. Each element of the\nlist is defined as a tuple of string (i.e. name) and an estimator\ninstance. An estimator can be set to 'drop' using `set_params`." + }, + "type": { + "kind": "NamedType", + "name": "list of (str, estimator)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/__init__/final_estimator", + "name": "final_estimator", + "qname": "sklearn.ensemble._stacking.StackingClassifier.__init__.final_estimator", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator", + "default_value": "None", + "description": "A classifier which will be used to combine the base estimators.\nThe default classifier is a\n:class:`~sklearn.linear_model.LogisticRegression`." + }, + "type": { + "kind": "NamedType", + "name": "estimator" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/__init__/cv", + "name": "cv", + "qname": "sklearn.ensemble._stacking.StackingClassifier.__init__.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, cross-validation generator or an iterable", + "default_value": "None", + "description": "Determines the cross-validation splitting strategy used in\n`cross_val_predict` to train `final_estimator`. Possible inputs for\ncv are:\n\n* None, to use the default 5-fold cross validation,\n* integer, to specify the number of folds in a (Stratified) KFold,\n* An object to be used as a cross-validation generator,\n* An iterable yielding train, test splits.\n\nFor integer/None inputs, if the estimator is a classifier and y is\neither binary or multiclass,\n:class:`~sklearn.model_selection.StratifiedKFold` is used.\nIn all other cases, :class:`~sklearn.model_selection.KFold` is used.\nThese splitters are instantiated with `shuffle=False` so the splits\nwill be the same across calls.\n\nRefer :ref:`User Guide ` for the various\ncross-validation strategies that can be used here.\n\n.. note::\n A larger number of split will provide no benefits if the number\n of training samples is large enough. Indeed, the training time\n will increase. ``cv`` is not used for model evaluation but for\n prediction." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "an iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/__init__/stack_method", + "name": "stack_method", + "qname": "sklearn.ensemble._stacking.StackingClassifier.__init__.stack_method", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'predict_proba', 'decision_function', 'predict'}", + "default_value": "'auto'", + "description": "Methods called for each base estimator. It can be:\n\n* if 'auto', it will try to invoke, for each estimator,\n `'predict_proba'`, `'decision_function'` or `'predict'` in that\n order.\n* otherwise, one of `'predict_proba'`, `'decision_function'` or\n `'predict'`. If the method is not implemented by the estimator, it\n will raise an error." + }, + "type": { + "kind": "EnumType", + "values": ["predict_proba", "auto", "decision_function", "predict"] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.ensemble._stacking.StackingClassifier.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to run in parallel all `estimators` `fit`.\n`None` means 1 unless in a `joblib.parallel_backend` context. -1 means\nusing all processors. See Glossary for more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/__init__/passthrough", + "name": "passthrough", + "qname": "sklearn.ensemble._stacking.StackingClassifier.__init__.passthrough", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When False, only the predictions of estimators will be used as\ntraining data for `final_estimator`. When True, the\n`final_estimator` is trained on the predictions as well as the\noriginal training data." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/__init__/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._stacking.StackingClassifier.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Verbosity level." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Stack of estimators with a final classifier.\n\nStacked generalization consists in stacking the output of individual\nestimator and use a classifier to compute the final prediction. Stacking\nallows to use the strength of each individual estimator by using their\noutput as input of a final estimator.\n\nNote that `estimators_` are fitted on the full `X` while `final_estimator_`\nis trained using cross-validated predictions of the base estimators using\n`cross_val_predict`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, estimators, final_estimator=None, *, cv=None,\n stack_method='auto', n_jobs=None, passthrough=False,\n verbose=0):\n super().__init__(\n estimators=estimators,\n final_estimator=final_estimator,\n cv=cv,\n stack_method=stack_method,\n n_jobs=n_jobs,\n passthrough=passthrough,\n verbose=verbose\n )" + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/_sk_visual_block_", + "name": "_sk_visual_block_", + "qname": "sklearn.ensemble._stacking.StackingClassifier._sk_visual_block_", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/_sk_visual_block_/self", + "name": "self", + "qname": "sklearn.ensemble._stacking.StackingClassifier._sk_visual_block_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _sk_visual_block_(self):\n # If final_estimator's default changes then this should be\n # updated.\n if self.final_estimator is None:\n final_estimator = LogisticRegression()\n else:\n final_estimator = self.final_estimator\n return super()._sk_visual_block_(final_estimator)" + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/_validate_final_estimator", + "name": "_validate_final_estimator", + "qname": "sklearn.ensemble._stacking.StackingClassifier._validate_final_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/_validate_final_estimator/self", + "name": "self", + "qname": "sklearn.ensemble._stacking.StackingClassifier._validate_final_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _validate_final_estimator(self):\n self._clone_final_estimator(default=LogisticRegression())\n if not is_classifier(self.final_estimator_):\n raise ValueError(\n \"'final_estimator' parameter should be a classifier. Got {}\"\n .format(self.final_estimator_)\n )" + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/decision_function", + "name": "decision_function", + "qname": "sklearn.ensemble._stacking.StackingClassifier.decision_function", + "decorators": ["if_delegate_has_method(delegate='final_estimator_')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/decision_function/self", + "name": "self", + "qname": "sklearn.ensemble._stacking.StackingClassifier.decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/decision_function/X", + "name": "X", + "qname": "sklearn.ensemble._stacking.StackingClassifier.decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vectors, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict decision function for samples in X using\n`final_estimator_.decision_function`.", + "docstring": "Predict decision function for samples in X using\n`final_estimator_.decision_function`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\nReturns\n-------\ndecisions : ndarray of shape (n_samples,), (n_samples, n_classes), or (n_samples, n_classes * (n_classes-1) / 2)\n The decision function computed the final estimator.", + "code": " @if_delegate_has_method(delegate='final_estimator_')\n def decision_function(self, X):\n \"\"\"Predict decision function for samples in X using\n `final_estimator_.decision_function`.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n Returns\n -------\n decisions : ndarray of shape (n_samples,), (n_samples, n_classes), \\\n or (n_samples, n_classes * (n_classes-1) / 2)\n The decision function computed the final estimator.\n \"\"\"\n check_is_fitted(self)\n return self.final_estimator_.decision_function(self.transform(X))" + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/fit", + "name": "fit", + "qname": "sklearn.ensemble._stacking.StackingClassifier.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/fit/self", + "name": "self", + "qname": "sklearn.ensemble._stacking.StackingClassifier.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/fit/X", + "name": "X", + "qname": "sklearn.ensemble._stacking.StackingClassifier.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vectors, where `n_samples` is the number of samples and\n`n_features` is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/fit/y", + "name": "y", + "qname": "sklearn.ensemble._stacking.StackingClassifier.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._stacking.StackingClassifier.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, then samples are equally weighted.\nNote that this is supported only if all underlying estimators\nsupport sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the estimators.", + "docstring": "Fit the estimators.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the estimators.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\n Returns\n -------\n self : object\n \"\"\"\n check_classification_targets(y)\n self._le = LabelEncoder().fit(y)\n self.classes_ = self._le.classes_\n return super().fit(X, self._le.transform(y), sample_weight)" + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/predict", + "name": "predict", + "qname": "sklearn.ensemble._stacking.StackingClassifier.predict", + "decorators": ["if_delegate_has_method(delegate='final_estimator_')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/predict/self", + "name": "self", + "qname": "sklearn.ensemble._stacking.StackingClassifier.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/predict/X", + "name": "X", + "qname": "sklearn.ensemble._stacking.StackingClassifier.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vectors, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/predict/predict_params", + "name": "predict_params", + "qname": "sklearn.ensemble._stacking.StackingClassifier.predict.predict_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "dict of str -> obj", + "default_value": "", + "description": "Parameters to the `predict` called by the `final_estimator`. Note\nthat this may be used to return uncertainties from some estimators\nwith `return_std` or `return_cov`. Be aware that it will only\naccounts for uncertainty in the final estimator." + }, + "type": { + "kind": "NamedType", + "name": "dict of str -> obj" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict target for X.", + "docstring": "Predict target for X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n**predict_params : dict of str -> obj\n Parameters to the `predict` called by the `final_estimator`. Note\n that this may be used to return uncertainties from some estimators\n with `return_std` or `return_cov`. Be aware that it will only\n accounts for uncertainty in the final estimator.\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,) or (n_samples, n_output)\n Predicted targets.", + "code": " @if_delegate_has_method(delegate='final_estimator_')\n def predict(self, X, **predict_params):\n \"\"\"Predict target for X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n **predict_params : dict of str -> obj\n Parameters to the `predict` called by the `final_estimator`. Note\n that this may be used to return uncertainties from some estimators\n with `return_std` or `return_cov`. Be aware that it will only\n accounts for uncertainty in the final estimator.\n\n Returns\n -------\n y_pred : ndarray of shape (n_samples,) or (n_samples, n_output)\n Predicted targets.\n \"\"\"\n y_pred = super().predict(X, **predict_params)\n return self._le.inverse_transform(y_pred)" + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/predict_proba", + "name": "predict_proba", + "qname": "sklearn.ensemble._stacking.StackingClassifier.predict_proba", + "decorators": ["if_delegate_has_method(delegate='final_estimator_')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/predict_proba/self", + "name": "self", + "qname": "sklearn.ensemble._stacking.StackingClassifier.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/predict_proba/X", + "name": "X", + "qname": "sklearn.ensemble._stacking.StackingClassifier.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vectors, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class probabilities for X using\n`final_estimator_.predict_proba`.", + "docstring": "Predict class probabilities for X using\n`final_estimator_.predict_proba`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\nReturns\n-------\nprobabilities : ndarray of shape (n_samples, n_classes) or list of ndarray of shape (n_output,)\n The class probabilities of the input samples.", + "code": " @if_delegate_has_method(delegate='final_estimator_')\n def predict_proba(self, X):\n \"\"\"Predict class probabilities for X using\n `final_estimator_.predict_proba`.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n Returns\n -------\n probabilities : ndarray of shape (n_samples, n_classes) or \\\n list of ndarray of shape (n_output,)\n The class probabilities of the input samples.\n \"\"\"\n check_is_fitted(self)\n return self.final_estimator_.predict_proba(self.transform(X))" + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/transform", + "name": "transform", + "qname": "sklearn.ensemble._stacking.StackingClassifier.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/transform/self", + "name": "self", + "qname": "sklearn.ensemble._stacking.StackingClassifier.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingClassifier/transform/X", + "name": "X", + "qname": "sklearn.ensemble._stacking.StackingClassifier.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vectors, where `n_samples` is the number of samples and\n`n_features` is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return class labels or probabilities for X for each estimator.", + "docstring": "Return class labels or probabilities for X for each estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\nReturns\n-------\ny_preds : ndarray of shape (n_samples, n_estimators) or (n_samples, n_classes * n_estimators)\n Prediction outputs for each estimator.", + "code": " def transform(self, X):\n \"\"\"Return class labels or probabilities for X for each estimator.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\n Returns\n -------\n y_preds : ndarray of shape (n_samples, n_estimators) or \\\n (n_samples, n_classes * n_estimators)\n Prediction outputs for each estimator.\n \"\"\"\n return self._transform(X)" + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._stacking.StackingRegressor.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._stacking.StackingRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/__init__/estimators", + "name": "estimators", + "qname": "sklearn.ensemble._stacking.StackingRegressor.__init__.estimators", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list of (str, estimator)", + "default_value": "", + "description": "Base estimators which will be stacked together. Each element of the\nlist is defined as a tuple of string (i.e. name) and an estimator\ninstance. An estimator can be set to 'drop' using `set_params`." + }, + "type": { + "kind": "NamedType", + "name": "list of (str, estimator)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/__init__/final_estimator", + "name": "final_estimator", + "qname": "sklearn.ensemble._stacking.StackingRegressor.__init__.final_estimator", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator", + "default_value": "None", + "description": "A regressor which will be used to combine the base estimators.\nThe default regressor is a :class:`~sklearn.linear_model.RidgeCV`." + }, + "type": { + "kind": "NamedType", + "name": "estimator" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/__init__/cv", + "name": "cv", + "qname": "sklearn.ensemble._stacking.StackingRegressor.__init__.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, cross-validation generator or an iterable", + "default_value": "None", + "description": "Determines the cross-validation splitting strategy used in\n`cross_val_predict` to train `final_estimator`. Possible inputs for\ncv are:\n\n* None, to use the default 5-fold cross validation,\n* integer, to specify the number of folds in a (Stratified) KFold,\n* An object to be used as a cross-validation generator,\n* An iterable yielding train, test splits.\n\nFor integer/None inputs, if the estimator is a classifier and y is\neither binary or multiclass,\n:class:`~sklearn.model_selection.StratifiedKFold` is used.\nIn all other cases, :class:`~sklearn.model_selection.KFold` is used.\nThese splitters are instantiated with `shuffle=False` so the splits\nwill be the same across calls.\n\nRefer :ref:`User Guide ` for the various\ncross-validation strategies that can be used here.\n\n.. note::\n A larger number of split will provide no benefits if the number\n of training samples is large enough. Indeed, the training time\n will increase. ``cv`` is not used for model evaluation but for\n prediction." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "an iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.ensemble._stacking.StackingRegressor.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to run in parallel for `fit` of all `estimators`.\n`None` means 1 unless in a `joblib.parallel_backend` context. -1 means\nusing all processors. See Glossary for more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/__init__/passthrough", + "name": "passthrough", + "qname": "sklearn.ensemble._stacking.StackingRegressor.__init__.passthrough", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When False, only the predictions of estimators will be used as\ntraining data for `final_estimator`. When True, the\n`final_estimator` is trained on the predictions as well as the\noriginal training data." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/__init__/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._stacking.StackingRegressor.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Verbosity level." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Stack of estimators with a final regressor.\n\nStacked generalization consists in stacking the output of individual\nestimator and use a regressor to compute the final prediction. Stacking\nallows to use the strength of each individual estimator by using their\noutput as input of a final estimator.\n\nNote that `estimators_` are fitted on the full `X` while `final_estimator_`\nis trained using cross-validated predictions of the base estimators using\n`cross_val_predict`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, estimators, final_estimator=None, *, cv=None,\n n_jobs=None, passthrough=False, verbose=0):\n super().__init__(\n estimators=estimators,\n final_estimator=final_estimator,\n cv=cv,\n stack_method=\"predict\",\n n_jobs=n_jobs,\n passthrough=passthrough,\n verbose=verbose\n )" + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/_sk_visual_block_", + "name": "_sk_visual_block_", + "qname": "sklearn.ensemble._stacking.StackingRegressor._sk_visual_block_", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/_sk_visual_block_/self", + "name": "self", + "qname": "sklearn.ensemble._stacking.StackingRegressor._sk_visual_block_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _sk_visual_block_(self):\n # If final_estimator's default changes then this should be\n # updated.\n if self.final_estimator is None:\n final_estimator = RidgeCV()\n else:\n final_estimator = self.final_estimator\n return super()._sk_visual_block_(final_estimator)" + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/_validate_final_estimator", + "name": "_validate_final_estimator", + "qname": "sklearn.ensemble._stacking.StackingRegressor._validate_final_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/_validate_final_estimator/self", + "name": "self", + "qname": "sklearn.ensemble._stacking.StackingRegressor._validate_final_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _validate_final_estimator(self):\n self._clone_final_estimator(default=RidgeCV())\n if not is_regressor(self.final_estimator_):\n raise ValueError(\n \"'final_estimator' parameter should be a regressor. Got {}\"\n .format(self.final_estimator_)\n )" + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/fit", + "name": "fit", + "qname": "sklearn.ensemble._stacking.StackingRegressor.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/fit/self", + "name": "self", + "qname": "sklearn.ensemble._stacking.StackingRegressor.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/fit/X", + "name": "X", + "qname": "sklearn.ensemble._stacking.StackingRegressor.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vectors, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/fit/y", + "name": "y", + "qname": "sklearn.ensemble._stacking.StackingRegressor.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._stacking.StackingRegressor.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, then samples are equally weighted.\nNote that this is supported only if all underlying estimators\nsupport sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the estimators.", + "docstring": "Fit the estimators.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the estimators.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\n Returns\n -------\n self : object\n \"\"\"\n y = column_or_1d(y, warn=True)\n return super().fit(X, y, sample_weight)" + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/transform", + "name": "transform", + "qname": "sklearn.ensemble._stacking.StackingRegressor.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/transform/self", + "name": "self", + "qname": "sklearn.ensemble._stacking.StackingRegressor.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/StackingRegressor/transform/X", + "name": "X", + "qname": "sklearn.ensemble._stacking.StackingRegressor.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vectors, where `n_samples` is the number of samples and\n`n_features` is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the predictions for X for each estimator.", + "docstring": "Return the predictions for X for each estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\nReturns\n-------\ny_preds : ndarray of shape (n_samples, n_estimators)\n Prediction outputs for each estimator.", + "code": " def transform(self, X):\n \"\"\"Return the predictions for X for each estimator.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\n Returns\n -------\n y_preds : ndarray of shape (n_samples, n_estimators)\n Prediction outputs for each estimator.\n \"\"\"\n return self._transform(X)" + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._stacking._BaseStacking.__init__", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._stacking._BaseStacking.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/__init__/estimators", + "name": "estimators", + "qname": "sklearn.ensemble._stacking._BaseStacking.__init__.estimators", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/__init__/final_estimator", + "name": "final_estimator", + "qname": "sklearn.ensemble._stacking._BaseStacking.__init__.final_estimator", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/__init__/cv", + "name": "cv", + "qname": "sklearn.ensemble._stacking._BaseStacking.__init__.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/__init__/stack_method", + "name": "stack_method", + "qname": "sklearn.ensemble._stacking._BaseStacking.__init__.stack_method", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.ensemble._stacking._BaseStacking.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/__init__/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._stacking._BaseStacking.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/__init__/passthrough", + "name": "passthrough", + "qname": "sklearn.ensemble._stacking._BaseStacking.__init__.passthrough", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base class for stacking method.", + "docstring": "", + "code": " @abstractmethod\n def __init__(self, estimators, final_estimator=None, *, cv=None,\n stack_method='auto', n_jobs=None, verbose=0,\n passthrough=False):\n super().__init__(estimators=estimators)\n self.final_estimator = final_estimator\n self.cv = cv\n self.stack_method = stack_method\n self.n_jobs = n_jobs\n self.verbose = verbose\n self.passthrough = passthrough" + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/_clone_final_estimator", + "name": "_clone_final_estimator", + "qname": "sklearn.ensemble._stacking._BaseStacking._clone_final_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/_clone_final_estimator/self", + "name": "self", + "qname": "sklearn.ensemble._stacking._BaseStacking._clone_final_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/_clone_final_estimator/default", + "name": "default", + "qname": "sklearn.ensemble._stacking._BaseStacking._clone_final_estimator.default", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _clone_final_estimator(self, default):\n if self.final_estimator is not None:\n self.final_estimator_ = clone(self.final_estimator)\n else:\n self.final_estimator_ = clone(default)" + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/_concatenate_predictions", + "name": "_concatenate_predictions", + "qname": "sklearn.ensemble._stacking._BaseStacking._concatenate_predictions", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/_concatenate_predictions/self", + "name": "self", + "qname": "sklearn.ensemble._stacking._BaseStacking._concatenate_predictions.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/_concatenate_predictions/X", + "name": "X", + "qname": "sklearn.ensemble._stacking._BaseStacking._concatenate_predictions.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/_concatenate_predictions/predictions", + "name": "predictions", + "qname": "sklearn.ensemble._stacking._BaseStacking._concatenate_predictions.predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Concatenate the predictions of each first layer learner and\npossibly the input dataset `X`.\n\nIf `X` is sparse and `self.passthrough` is False, the output of\n`transform` will be dense (the predictions). If `X` is sparse\nand `self.passthrough` is True, the output of `transform` will\nbe sparse.\n\nThis helper is in charge of ensuring the predictions are 2D arrays and\nit will drop one of the probability column when using probabilities\nin the binary case. Indeed, the p(y|c=0) = 1 - p(y|c=1)", + "docstring": "Concatenate the predictions of each first layer learner and\npossibly the input dataset `X`.\n\nIf `X` is sparse and `self.passthrough` is False, the output of\n`transform` will be dense (the predictions). If `X` is sparse\nand `self.passthrough` is True, the output of `transform` will\nbe sparse.\n\nThis helper is in charge of ensuring the predictions are 2D arrays and\nit will drop one of the probability column when using probabilities\nin the binary case. Indeed, the p(y|c=0) = 1 - p(y|c=1)", + "code": " def _concatenate_predictions(self, X, predictions):\n \"\"\"Concatenate the predictions of each first layer learner and\n possibly the input dataset `X`.\n\n If `X` is sparse and `self.passthrough` is False, the output of\n `transform` will be dense (the predictions). If `X` is sparse\n and `self.passthrough` is True, the output of `transform` will\n be sparse.\n\n This helper is in charge of ensuring the predictions are 2D arrays and\n it will drop one of the probability column when using probabilities\n in the binary case. Indeed, the p(y|c=0) = 1 - p(y|c=1)\n \"\"\"\n X_meta = []\n for est_idx, preds in enumerate(predictions):\n # case where the the estimator returned a 1D array\n if preds.ndim == 1:\n X_meta.append(preds.reshape(-1, 1))\n else:\n if (self.stack_method_[est_idx] == 'predict_proba' and\n len(self.classes_) == 2):\n # Remove the first column when using probabilities in\n # binary classification because both features are perfectly\n # collinear.\n X_meta.append(preds[:, 1:])\n else:\n X_meta.append(preds)\n if self.passthrough:\n X_meta.append(X)\n if sparse.issparse(X):\n return sparse.hstack(X_meta, format=X.format)\n\n return np.hstack(X_meta)" + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/_method_name", + "name": "_method_name", + "qname": "sklearn.ensemble._stacking._BaseStacking._method_name", + "decorators": ["staticmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/_method_name/name", + "name": "name", + "qname": "sklearn.ensemble._stacking._BaseStacking._method_name.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/_method_name/estimator", + "name": "estimator", + "qname": "sklearn.ensemble._stacking._BaseStacking._method_name.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/_method_name/method", + "name": "method", + "qname": "sklearn.ensemble._stacking._BaseStacking._method_name.method", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @staticmethod\n def _method_name(name, estimator, method):\n if estimator == 'drop':\n return None\n if method == 'auto':\n if getattr(estimator, 'predict_proba', None):\n return 'predict_proba'\n elif getattr(estimator, 'decision_function', None):\n return 'decision_function'\n else:\n return 'predict'\n else:\n if not hasattr(estimator, method):\n raise ValueError('Underlying estimator {} does not implement '\n 'the method {}.'.format(name, method))\n return method" + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/_sk_visual_block_", + "name": "_sk_visual_block_", + "qname": "sklearn.ensemble._stacking._BaseStacking._sk_visual_block_", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/_sk_visual_block_/self", + "name": "self", + "qname": "sklearn.ensemble._stacking._BaseStacking._sk_visual_block_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/_sk_visual_block_/final_estimator", + "name": "final_estimator", + "qname": "sklearn.ensemble._stacking._BaseStacking._sk_visual_block_.final_estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _sk_visual_block_(self, final_estimator):\n names, estimators = zip(*self.estimators)\n parallel = _VisualBlock('parallel', estimators, names=names,\n dash_wrapped=False)\n serial = _VisualBlock('serial', (parallel, final_estimator),\n dash_wrapped=False)\n return _VisualBlock('serial', [serial])" + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/_transform", + "name": "_transform", + "qname": "sklearn.ensemble._stacking._BaseStacking._transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/_transform/self", + "name": "self", + "qname": "sklearn.ensemble._stacking._BaseStacking._transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/_transform/X", + "name": "X", + "qname": "sklearn.ensemble._stacking._BaseStacking._transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Concatenate and return the predictions of the estimators.", + "docstring": "Concatenate and return the predictions of the estimators.", + "code": " def _transform(self, X):\n \"\"\"Concatenate and return the predictions of the estimators.\"\"\"\n check_is_fitted(self)\n predictions = [\n getattr(est, meth)(X)\n for est, meth in zip(self.estimators_, self.stack_method_)\n if est != 'drop'\n ]\n return self._concatenate_predictions(X, predictions)" + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/fit", + "name": "fit", + "qname": "sklearn.ensemble._stacking._BaseStacking.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/fit/self", + "name": "self", + "qname": "sklearn.ensemble._stacking._BaseStacking.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/fit/X", + "name": "X", + "qname": "sklearn.ensemble._stacking._BaseStacking.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vectors, where `n_samples` is the number of samples and\n`n_features` is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/fit/y", + "name": "y", + "qname": "sklearn.ensemble._stacking._BaseStacking.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._stacking._BaseStacking.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or default=None", + "default_value": "", + "description": "Sample weights. If None, then samples are equally weighted.\nNote that this is supported only if all underlying estimators\nsupport sample weights.\n\n.. versionchanged:: 0.23\n when not None, `sample_weight` is passed to all underlying\n estimators" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the estimators.", + "docstring": "Fit the estimators.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,) or default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\n .. versionchanged:: 0.23\n when not None, `sample_weight` is passed to all underlying\n estimators\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the estimators.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n sample_weight : array-like of shape (n_samples,) or default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\n .. versionchanged:: 0.23\n when not None, `sample_weight` is passed to all underlying\n estimators\n\n Returns\n -------\n self : object\n \"\"\"\n # all_estimators contains all estimators, the one to be fitted and the\n # 'drop' string.\n names, all_estimators = self._validate_estimators()\n self._validate_final_estimator()\n\n stack_method = [self.stack_method] * len(all_estimators)\n\n # Fit the base estimators on the whole training data. Those\n # base estimators will be used in transform, predict, and\n # predict_proba. They are exposed publicly.\n self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n delayed(_fit_single_estimator)(clone(est), X, y, sample_weight)\n for est in all_estimators if est != 'drop'\n )\n\n self.named_estimators_ = Bunch()\n est_fitted_idx = 0\n for name_est, org_est in zip(names, all_estimators):\n if org_est != 'drop':\n self.named_estimators_[name_est] = self.estimators_[\n est_fitted_idx]\n est_fitted_idx += 1\n else:\n self.named_estimators_[name_est] = 'drop'\n\n # To train the meta-classifier using the most data as possible, we use\n # a cross-validation to obtain the output of the stacked estimators.\n\n # To ensure that the data provided to each estimator are the same, we\n # need to set the random state of the cv if there is one and we need to\n # take a copy.\n cv = check_cv(self.cv, y=y, classifier=is_classifier(self))\n if hasattr(cv, 'random_state') and cv.random_state is None:\n cv.random_state = np.random.RandomState()\n\n self.stack_method_ = [\n self._method_name(name, est, meth)\n for name, est, meth in zip(names, all_estimators, stack_method)\n ]\n fit_params = ({\"sample_weight\": sample_weight}\n if sample_weight is not None\n else None)\n predictions = Parallel(n_jobs=self.n_jobs)(\n delayed(cross_val_predict)(clone(est), X, y, cv=deepcopy(cv),\n method=meth, n_jobs=self.n_jobs,\n fit_params=fit_params,\n verbose=self.verbose)\n for est, meth in zip(all_estimators, self.stack_method_)\n if est != 'drop'\n )\n\n # Only not None or not 'drop' estimators will be used in transform.\n # Remove the None from the method as well.\n self.stack_method_ = [\n meth for (meth, est) in zip(self.stack_method_, all_estimators)\n if est != 'drop'\n ]\n\n X_meta = self._concatenate_predictions(X, predictions)\n _fit_single_estimator(self.final_estimator_, X_meta, y,\n sample_weight=sample_weight)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/n_features_in_@getter", + "name": "n_features_in_", + "qname": "sklearn.ensemble._stacking._BaseStacking.n_features_in_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/n_features_in_/self", + "name": "self", + "qname": "sklearn.ensemble._stacking._BaseStacking.n_features_in_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Number of features seen during :term:`fit`.", + "docstring": "Number of features seen during :term:`fit`.", + "code": " @property\n def n_features_in_(self):\n \"\"\"Number of features seen during :term:`fit`.\"\"\"\n try:\n check_is_fitted(self)\n except NotFittedError as nfe:\n raise AttributeError(\n f\"{self.__class__.__name__} object has no attribute \"\n f\"n_features_in_\") from nfe\n return self.estimators_[0].n_features_in_" + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/predict", + "name": "predict", + "qname": "sklearn.ensemble._stacking._BaseStacking.predict", + "decorators": ["if_delegate_has_method(delegate='final_estimator_')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/predict/self", + "name": "self", + "qname": "sklearn.ensemble._stacking._BaseStacking.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/predict/X", + "name": "X", + "qname": "sklearn.ensemble._stacking._BaseStacking.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vectors, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._stacking/_BaseStacking/predict/predict_params", + "name": "predict_params", + "qname": "sklearn.ensemble._stacking._BaseStacking.predict.predict_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "dict of str -> obj", + "default_value": "", + "description": "Parameters to the `predict` called by the `final_estimator`. Note\nthat this may be used to return uncertainties from some estimators\nwith `return_std` or `return_cov`. Be aware that it will only\naccounts for uncertainty in the final estimator." + }, + "type": { + "kind": "NamedType", + "name": "dict of str -> obj" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict target for X.", + "docstring": "Predict target for X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n**predict_params : dict of str -> obj\n Parameters to the `predict` called by the `final_estimator`. Note\n that this may be used to return uncertainties from some estimators\n with `return_std` or `return_cov`. Be aware that it will only\n accounts for uncertainty in the final estimator.\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,) or (n_samples, n_output)\n Predicted targets.", + "code": " @if_delegate_has_method(delegate='final_estimator_')\n def predict(self, X, **predict_params):\n \"\"\"Predict target for X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n **predict_params : dict of str -> obj\n Parameters to the `predict` called by the `final_estimator`. Note\n that this may be used to return uncertainties from some estimators\n with `return_std` or `return_cov`. Be aware that it will only\n accounts for uncertainty in the final estimator.\n\n Returns\n -------\n y_pred : ndarray of shape (n_samples,) or (n_samples, n_output)\n Predicted targets.\n \"\"\"\n\n check_is_fitted(self)\n return self.final_estimator_.predict(\n self.transform(X), **predict_params\n )" + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._voting.VotingClassifier.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._voting.VotingClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/__init__/estimators", + "name": "estimators", + "qname": "sklearn.ensemble._voting.VotingClassifier.__init__.estimators", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list of (str, estimator) tuples", + "default_value": "", + "description": "Invoking the ``fit`` method on the ``VotingClassifier`` will fit clones\nof those original estimators that will be stored in the class attribute\n``self.estimators_``. An estimator can be set to ``'drop'``\nusing ``set_params``.\n\n.. versionchanged:: 0.21\n ``'drop'`` is accepted. Using None was deprecated in 0.22 and\n support was removed in 0.24." + }, + "type": { + "kind": "NamedType", + "name": "list of (str, estimator) tuples" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/__init__/voting", + "name": "voting", + "qname": "sklearn.ensemble._voting.VotingClassifier.__init__.voting", + "default_value": "'hard'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'hard', 'soft'}", + "default_value": "'hard'", + "description": "If 'hard', uses predicted class labels for majority rule voting.\nElse if 'soft', predicts the class label based on the argmax of\nthe sums of the predicted probabilities, which is recommended for\nan ensemble of well-calibrated classifiers." + }, + "type": { + "kind": "EnumType", + "values": ["hard", "soft"] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/__init__/weights", + "name": "weights", + "qname": "sklearn.ensemble._voting.VotingClassifier.__init__.weights", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_classifiers,)", + "default_value": "None", + "description": "Sequence of weights (`float` or `int`) to weight the occurrences of\npredicted class labels (`hard` voting) or class probabilities\nbefore averaging (`soft` voting). Uses uniform weights if `None`." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_classifiers,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.ensemble._voting.VotingClassifier.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to run in parallel for ``fit``.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/__init__/flatten_transform", + "name": "flatten_transform", + "qname": "sklearn.ensemble._voting.VotingClassifier.__init__.flatten_transform", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Affects shape of transform output only when voting='soft'\nIf voting='soft' and flatten_transform=True, transform method returns\nmatrix with shape (n_samples, n_classifiers * n_classes). If\nflatten_transform=False, it returns\n(n_classifiers, n_samples, n_classes)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/__init__/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._voting.VotingClassifier.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the time elapsed while fitting will be printed as it\nis completed.\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Soft Voting/Majority Rule classifier for unfitted estimators.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.17", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, estimators, *, voting='hard', weights=None,\n n_jobs=None, flatten_transform=True, verbose=False):\n super().__init__(estimators=estimators)\n self.voting = voting\n self.weights = weights\n self.n_jobs = n_jobs\n self.flatten_transform = flatten_transform\n self.verbose = verbose" + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/_collect_probas", + "name": "_collect_probas", + "qname": "sklearn.ensemble._voting.VotingClassifier._collect_probas", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/_collect_probas/self", + "name": "self", + "qname": "sklearn.ensemble._voting.VotingClassifier._collect_probas.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/_collect_probas/X", + "name": "X", + "qname": "sklearn.ensemble._voting.VotingClassifier._collect_probas.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Collect results from clf.predict calls.", + "docstring": "Collect results from clf.predict calls.", + "code": " def _collect_probas(self, X):\n \"\"\"Collect results from clf.predict calls.\"\"\"\n return np.asarray([clf.predict_proba(X) for clf in self.estimators_])" + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/_predict_proba", + "name": "_predict_proba", + "qname": "sklearn.ensemble._voting.VotingClassifier._predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/_predict_proba/self", + "name": "self", + "qname": "sklearn.ensemble._voting.VotingClassifier._predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/_predict_proba/X", + "name": "X", + "qname": "sklearn.ensemble._voting.VotingClassifier._predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class probabilities for X in 'soft' voting.", + "docstring": "Predict class probabilities for X in 'soft' voting.", + "code": " def _predict_proba(self, X):\n \"\"\"Predict class probabilities for X in 'soft' voting.\"\"\"\n check_is_fitted(self)\n avg = np.average(self._collect_probas(X), axis=0,\n weights=self._weights_not_none)\n return avg" + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/fit", + "name": "fit", + "qname": "sklearn.ensemble._voting.VotingClassifier.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/fit/self", + "name": "self", + "qname": "sklearn.ensemble._voting.VotingClassifier.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/fit/X", + "name": "X", + "qname": "sklearn.ensemble._voting.VotingClassifier.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vectors, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/fit/y", + "name": "y", + "qname": "sklearn.ensemble._voting.VotingClassifier.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._voting.VotingClassifier.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, then samples are equally weighted.\nNote that this is supported only if all underlying estimators\nsupport sample weights.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the estimators.", + "docstring": "Fit the estimators.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\n .. versionadded:: 0.18\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the estimators.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\n .. versionadded:: 0.18\n\n Returns\n -------\n self : object\n\n \"\"\"\n check_classification_targets(y)\n if isinstance(y, np.ndarray) and len(y.shape) > 1 and y.shape[1] > 1:\n raise NotImplementedError('Multilabel and multi-output'\n ' classification is not supported.')\n\n if self.voting not in ('soft', 'hard'):\n raise ValueError(\"Voting must be 'soft' or 'hard'; got (voting=%r)\"\n % self.voting)\n\n self.le_ = LabelEncoder().fit(y)\n self.classes_ = self.le_.classes_\n transformed_y = self.le_.transform(y)\n\n return super().fit(X, transformed_y, sample_weight)" + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/predict", + "name": "predict", + "qname": "sklearn.ensemble._voting.VotingClassifier.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/predict/self", + "name": "self", + "qname": "sklearn.ensemble._voting.VotingClassifier.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/predict/X", + "name": "X", + "qname": "sklearn.ensemble._voting.VotingClassifier.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class labels for X.", + "docstring": "Predict class labels for X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\nmaj : array-like of shape (n_samples,)\n Predicted class labels.", + "code": " def predict(self, X):\n \"\"\"Predict class labels for X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\n Returns\n -------\n maj : array-like of shape (n_samples,)\n Predicted class labels.\n \"\"\"\n check_is_fitted(self)\n if self.voting == 'soft':\n maj = np.argmax(self.predict_proba(X), axis=1)\n\n else: # 'hard' voting\n predictions = self._predict(X)\n maj = np.apply_along_axis(\n lambda x: np.argmax(\n np.bincount(x, weights=self._weights_not_none)),\n axis=1, arr=predictions)\n\n maj = self.le_.inverse_transform(maj)\n\n return maj" + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/predict_proba@getter", + "name": "predict_proba", + "qname": "sklearn.ensemble._voting.VotingClassifier.predict_proba", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/predict_proba/self", + "name": "self", + "qname": "sklearn.ensemble._voting.VotingClassifier.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute probabilities of possible outcomes for samples in X.", + "docstring": "Compute probabilities of possible outcomes for samples in X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\navg : array-like of shape (n_samples, n_classes)\n Weighted average probability for each class per sample.", + "code": " @property\n def predict_proba(self):\n \"\"\"Compute probabilities of possible outcomes for samples in X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\n Returns\n -------\n avg : array-like of shape (n_samples, n_classes)\n Weighted average probability for each class per sample.\n \"\"\"\n if self.voting == 'hard':\n raise AttributeError(\"predict_proba is not available when\"\n \" voting=%r\" % self.voting)\n return self._predict_proba" + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/transform", + "name": "transform", + "qname": "sklearn.ensemble._voting.VotingClassifier.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/transform/self", + "name": "self", + "qname": "sklearn.ensemble._voting.VotingClassifier.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingClassifier/transform/X", + "name": "X", + "qname": "sklearn.ensemble._voting.VotingClassifier.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vectors, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return class labels or probabilities for X for each estimator.", + "docstring": "Return class labels or probabilities for X for each estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\nReturns\n-------\nprobabilities_or_labels\n If `voting='soft'` and `flatten_transform=True`:\n returns ndarray of shape (n_classifiers, n_samples *\n n_classes), being class probabilities calculated by each\n classifier.\n If `voting='soft' and `flatten_transform=False`:\n ndarray of shape (n_classifiers, n_samples, n_classes)\n If `voting='hard'`:\n ndarray of shape (n_samples, n_classifiers), being\n class labels predicted by each classifier.", + "code": " def transform(self, X):\n \"\"\"Return class labels or probabilities for X for each estimator.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n Returns\n -------\n probabilities_or_labels\n If `voting='soft'` and `flatten_transform=True`:\n returns ndarray of shape (n_classifiers, n_samples *\n n_classes), being class probabilities calculated by each\n classifier.\n If `voting='soft' and `flatten_transform=False`:\n ndarray of shape (n_classifiers, n_samples, n_classes)\n If `voting='hard'`:\n ndarray of shape (n_samples, n_classifiers), being\n class labels predicted by each classifier.\n \"\"\"\n check_is_fitted(self)\n\n if self.voting == 'soft':\n probas = self._collect_probas(X)\n if not self.flatten_transform:\n return probas\n return np.hstack(probas)\n\n else:\n return self._predict(X)" + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingRegressor/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._voting.VotingRegressor.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingRegressor/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._voting.VotingRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingRegressor/__init__/estimators", + "name": "estimators", + "qname": "sklearn.ensemble._voting.VotingRegressor.__init__.estimators", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list of (str, estimator) tuples", + "default_value": "", + "description": "Invoking the ``fit`` method on the ``VotingRegressor`` will fit clones\nof those original estimators that will be stored in the class attribute\n``self.estimators_``. An estimator can be set to ``'drop'`` using\n``set_params``.\n\n.. versionchanged:: 0.21\n ``'drop'`` is accepted. Using None was deprecated in 0.22 and\n support was removed in 0.24." + }, + "type": { + "kind": "NamedType", + "name": "list of (str, estimator) tuples" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingRegressor/__init__/weights", + "name": "weights", + "qname": "sklearn.ensemble._voting.VotingRegressor.__init__.weights", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_regressors,)", + "default_value": "None", + "description": "Sequence of weights (`float` or `int`) to weight the occurrences of\npredicted values before averaging. Uses uniform weights if `None`." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_regressors,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingRegressor/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.ensemble._voting.VotingRegressor.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to run in parallel for ``fit``.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingRegressor/__init__/verbose", + "name": "verbose", + "qname": "sklearn.ensemble._voting.VotingRegressor.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the time elapsed while fitting will be printed as it\nis completed.\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Prediction voting regressor for unfitted estimators.\n\nA voting regressor is an ensemble meta-estimator that fits several base\nregressors, each on the whole dataset. Then it averages the individual\npredictions to form a final prediction.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.21", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, estimators, *, weights=None, n_jobs=None,\n verbose=False):\n super().__init__(estimators=estimators)\n self.weights = weights\n self.n_jobs = n_jobs\n self.verbose = verbose" + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingRegressor/fit", + "name": "fit", + "qname": "sklearn.ensemble._voting.VotingRegressor.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingRegressor/fit/self", + "name": "self", + "qname": "sklearn.ensemble._voting.VotingRegressor.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingRegressor/fit/X", + "name": "X", + "qname": "sklearn.ensemble._voting.VotingRegressor.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vectors, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingRegressor/fit/y", + "name": "y", + "qname": "sklearn.ensemble._voting.VotingRegressor.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingRegressor/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._voting.VotingRegressor.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, then samples are equally weighted.\nNote that this is supported only if all underlying estimators\nsupport sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the estimators.", + "docstring": "Fit the estimators.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\nReturns\n-------\nself : object\n Fitted estimator.", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the estimators.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Note that this is supported only if all underlying estimators\n support sample weights.\n\n Returns\n -------\n self : object\n Fitted estimator.\n \"\"\"\n y = column_or_1d(y, warn=True)\n return super().fit(X, y, sample_weight)" + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingRegressor/predict", + "name": "predict", + "qname": "sklearn.ensemble._voting.VotingRegressor.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingRegressor/predict/self", + "name": "self", + "qname": "sklearn.ensemble._voting.VotingRegressor.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingRegressor/predict/X", + "name": "X", + "qname": "sklearn.ensemble._voting.VotingRegressor.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict regression target for X.\n\nThe predicted regression target of an input sample is computed as the\nmean predicted regression targets of the estimators in the ensemble.", + "docstring": "Predict regression target for X.\n\nThe predicted regression target of an input sample is computed as the\nmean predicted regression targets of the estimators in the ensemble.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n The predicted values.", + "code": " def predict(self, X):\n \"\"\"Predict regression target for X.\n\n The predicted regression target of an input sample is computed as the\n mean predicted regression targets of the estimators in the ensemble.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\n Returns\n -------\n y : ndarray of shape (n_samples,)\n The predicted values.\n \"\"\"\n check_is_fitted(self)\n return np.average(self._predict(X), axis=1,\n weights=self._weights_not_none)" + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingRegressor/transform", + "name": "transform", + "qname": "sklearn.ensemble._voting.VotingRegressor.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingRegressor/transform/self", + "name": "self", + "qname": "sklearn.ensemble._voting.VotingRegressor.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/VotingRegressor/transform/X", + "name": "X", + "qname": "sklearn.ensemble._voting.VotingRegressor.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return predictions for X for each estimator.", + "docstring": "Return predictions for X for each estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\nReturns\n-------\npredictions: ndarray of shape (n_samples, n_classifiers)\n Values predicted by each regressor.", + "code": " def transform(self, X):\n \"\"\"Return predictions for X for each estimator.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\n Returns\n -------\n predictions: ndarray of shape (n_samples, n_classifiers)\n Values predicted by each regressor.\n \"\"\"\n check_is_fitted(self)\n return self._predict(X)" + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/_log_message", + "name": "_log_message", + "qname": "sklearn.ensemble._voting._BaseVoting._log_message", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/_log_message/self", + "name": "self", + "qname": "sklearn.ensemble._voting._BaseVoting._log_message.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/_log_message/name", + "name": "name", + "qname": "sklearn.ensemble._voting._BaseVoting._log_message.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/_log_message/idx", + "name": "idx", + "qname": "sklearn.ensemble._voting._BaseVoting._log_message.idx", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/_log_message/total", + "name": "total", + "qname": "sklearn.ensemble._voting._BaseVoting._log_message.total", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _log_message(self, name, idx, total):\n if not self.verbose:\n return None\n return '(%d of %d) Processing %s' % (idx, total, name)" + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/_more_tags", + "name": "_more_tags", + "qname": "sklearn.ensemble._voting._BaseVoting._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/_more_tags/self", + "name": "self", + "qname": "sklearn.ensemble._voting._BaseVoting._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\"preserves_dtype\": []}" + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/_predict", + "name": "_predict", + "qname": "sklearn.ensemble._voting._BaseVoting._predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/_predict/self", + "name": "self", + "qname": "sklearn.ensemble._voting._BaseVoting._predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/_predict/X", + "name": "X", + "qname": "sklearn.ensemble._voting._BaseVoting._predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Collect results from clf.predict calls.", + "docstring": "Collect results from clf.predict calls.", + "code": " def _predict(self, X):\n \"\"\"Collect results from clf.predict calls.\"\"\"\n return np.asarray([est.predict(X) for est in self.estimators_]).T" + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/_sk_visual_block_", + "name": "_sk_visual_block_", + "qname": "sklearn.ensemble._voting._BaseVoting._sk_visual_block_", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/_sk_visual_block_/self", + "name": "self", + "qname": "sklearn.ensemble._voting._BaseVoting._sk_visual_block_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _sk_visual_block_(self):\n names, estimators = zip(*self.estimators)\n return _VisualBlock('parallel', estimators, names=names)" + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/_weights_not_none@getter", + "name": "_weights_not_none", + "qname": "sklearn.ensemble._voting._BaseVoting._weights_not_none", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/_weights_not_none/self", + "name": "self", + "qname": "sklearn.ensemble._voting._BaseVoting._weights_not_none.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Get the weights of not `None` estimators.", + "docstring": "Get the weights of not `None` estimators.", + "code": " @property\n def _weights_not_none(self):\n \"\"\"Get the weights of not `None` estimators.\"\"\"\n if self.weights is None:\n return None\n return [w for est, w in zip(self.estimators, self.weights)\n if est[1] != 'drop']" + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/fit", + "name": "fit", + "qname": "sklearn.ensemble._voting._BaseVoting.fit", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/fit/self", + "name": "self", + "qname": "sklearn.ensemble._voting._BaseVoting.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/fit/X", + "name": "X", + "qname": "sklearn.ensemble._voting._BaseVoting.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/fit/y", + "name": "y", + "qname": "sklearn.ensemble._voting._BaseVoting.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._voting._BaseVoting.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Get common fit operations.", + "docstring": "Get common fit operations.", + "code": " @abstractmethod\n def fit(self, X, y, sample_weight=None):\n \"\"\"Get common fit operations.\"\"\"\n names, clfs = self._validate_estimators()\n\n if (self.weights is not None and\n len(self.weights) != len(self.estimators)):\n raise ValueError('Number of `estimators` and weights must be equal'\n '; got %d weights, %d estimators'\n % (len(self.weights), len(self.estimators)))\n\n self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n delayed(_fit_single_estimator)(\n clone(clf), X, y,\n sample_weight=sample_weight,\n message_clsname='Voting',\n message=self._log_message(names[idx],\n idx + 1, len(clfs))\n )\n for idx, clf in enumerate(clfs) if clf != 'drop'\n )\n\n self.named_estimators_ = Bunch()\n\n # Uses 'drop' as placeholder for dropped estimators\n est_iter = iter(self.estimators_)\n for name, est in self.estimators:\n current_est = est if est == 'drop' else next(est_iter)\n self.named_estimators_[name] = current_est\n\n return self" + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/fit_transform", + "name": "fit_transform", + "qname": "sklearn.ensemble._voting._BaseVoting.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/fit_transform/self", + "name": "self", + "qname": "sklearn.ensemble._voting._BaseVoting.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/fit_transform/X", + "name": "X", + "qname": "sklearn.ensemble._voting._BaseVoting.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix, dataframe} of shape (n_samples, n_features)", + "default_value": "", + "description": "Input samples" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/fit_transform/y", + "name": "y", + "qname": "sklearn.ensemble._voting._BaseVoting.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "None", + "description": "Target values (None for unsupervised transformations)." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/fit_transform/fit_params", + "name": "fit_params", + "qname": "sklearn.ensemble._voting._BaseVoting.fit_transform.fit_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "", + "description": "Additional fit parameters." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return class labels or probabilities for each estimator.\n\nReturn predictions for X for each estimator.", + "docstring": "Return class labels or probabilities for each estimator.\n\nReturn predictions for X for each estimator.\n\nParameters\n----------\nX : {array-like, sparse matrix, dataframe} of shape (n_samples, n_features)\n Input samples\n\ny : ndarray of shape (n_samples,), default=None\n Target values (None for unsupervised transformations).\n\n**fit_params : dict\n Additional fit parameters.\n\nReturns\n-------\nX_new : ndarray array of shape (n_samples, n_features_new)\n Transformed array.", + "code": " def fit_transform(self, X, y=None, **fit_params):\n \"\"\"Return class labels or probabilities for each estimator.\n\n Return predictions for X for each estimator.\n\n Parameters\n ----------\n X : {array-like, sparse matrix, dataframe} of shape \\\n (n_samples, n_features)\n Input samples\n\n y : ndarray of shape (n_samples,), default=None\n Target values (None for unsupervised transformations).\n\n **fit_params : dict\n Additional fit parameters.\n\n Returns\n -------\n X_new : ndarray array of shape (n_samples, n_features_new)\n Transformed array.\n \"\"\"\n return super().fit_transform(X, y, **fit_params)" + }, + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/n_features_in_@getter", + "name": "n_features_in_", + "qname": "sklearn.ensemble._voting._BaseVoting.n_features_in_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._voting/_BaseVoting/n_features_in_/self", + "name": "self", + "qname": "sklearn.ensemble._voting._BaseVoting.n_features_in_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def n_features_in_(self):\n # For consistency with other estimators we raise a AttributeError so\n # that hasattr() fails if the estimator isn't fitted.\n try:\n check_is_fitted(self)\n except NotFittedError as nfe:\n raise AttributeError(\n \"{} object has no n_features_in_ attribute.\"\n .format(self.__class__.__name__)\n ) from nfe\n\n return self.estimators_[0].n_features_in_" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/__init__/base_estimator", + "name": "base_estimator", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.__init__.base_estimator", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "None", + "description": "The base estimator from which the boosted ensemble is built.\nSupport for sample weighting is required, as well as proper\n``classes_`` and ``n_classes_`` attributes. If ``None``, then\nthe base estimator is :class:`~sklearn.tree.DecisionTreeClassifier`\ninitialized with `max_depth=1`." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/__init__/n_estimators", + "name": "n_estimators", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.__init__.n_estimators", + "default_value": "50", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "50", + "description": "The maximum number of estimators at which boosting is terminated.\nIn case of perfect fit, the learning procedure is stopped early." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/__init__/learning_rate", + "name": "learning_rate", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.__init__.learning_rate", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.", + "description": "Weight applied to each classifier at each boosting iteration. A higher\nlearning rate increases the contribution of each classifier. There is\na trade-off between the `learning_rate` and `n_estimators` parameters." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/__init__/algorithm", + "name": "algorithm", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.__init__.algorithm", + "default_value": "'SAMME.R'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'SAMME', 'SAMME.R'}", + "default_value": "'SAMME.R'", + "description": "If 'SAMME.R' then use the SAMME.R real boosting algorithm.\n``base_estimator`` must support calculation of class probabilities.\nIf 'SAMME' then use the SAMME discrete boosting algorithm.\nThe SAMME.R algorithm typically converges faster than SAMME,\nachieving a lower test error with fewer boosting iterations." + }, + "type": { + "kind": "EnumType", + "values": ["SAMME.R", "SAMME"] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/__init__/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the random seed given at each `base_estimator` at each\nboosting iteration.\nThus, it is only used when `base_estimator` exposes a `random_state`.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "An AdaBoost classifier.\n\nAn AdaBoost [1] classifier is a meta-estimator that begins by fitting a\nclassifier on the original dataset and then fits additional copies of the\nclassifier on the same dataset but where the weights of incorrectly\nclassified instances are adjusted such that subsequent classifiers focus\nmore on difficult cases.\n\nThis class implements the algorithm known as AdaBoost-SAMME [2].\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.14", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self,\n base_estimator=None, *,\n n_estimators=50,\n learning_rate=1.,\n algorithm='SAMME.R',\n random_state=None):\n\n super().__init__(\n base_estimator=base_estimator,\n n_estimators=n_estimators,\n learning_rate=learning_rate,\n random_state=random_state)\n\n self.algorithm = algorithm" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost", + "name": "_boost", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._boost", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._boost.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost/iboost", + "name": "iboost", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._boost.iboost", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The index of the current boost iteration." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost/X", + "name": "X", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._boost.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost/y", + "name": "y", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._boost.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The target values (class labels)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._boost.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The current sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._boost.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "RandomState instance", + "default_value": "", + "description": "The RandomState instance used if the base estimator accepts a\n`random_state` attribute." + }, + "type": { + "kind": "NamedType", + "name": "RandomState instance" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Implement a single boost.\n\nPerform a single boost according to the real multi-class SAMME.R\nalgorithm or to the discrete SAMME algorithm and return the updated\nsample weights.", + "docstring": "Implement a single boost.\n\nPerform a single boost according to the real multi-class SAMME.R\nalgorithm or to the discrete SAMME algorithm and return the updated\nsample weights.\n\nParameters\n----------\niboost : int\n The index of the current boost iteration.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples.\n\ny : array-like of shape (n_samples,)\n The target values (class labels).\n\nsample_weight : array-like of shape (n_samples,)\n The current sample weights.\n\nrandom_state : RandomState instance\n The RandomState instance used if the base estimator accepts a\n `random_state` attribute.\n\nReturns\n-------\nsample_weight : array-like of shape (n_samples,) or None\n The reweighted sample weights.\n If None then boosting has terminated early.\n\nestimator_weight : float\n The weight for the current boost.\n If None then boosting has terminated early.\n\nestimator_error : float\n The classification error for the current boost.\n If None then boosting has terminated early.", + "code": " def _boost(self, iboost, X, y, sample_weight, random_state):\n \"\"\"Implement a single boost.\n\n Perform a single boost according to the real multi-class SAMME.R\n algorithm or to the discrete SAMME algorithm and return the updated\n sample weights.\n\n Parameters\n ----------\n iboost : int\n The index of the current boost iteration.\n\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples.\n\n y : array-like of shape (n_samples,)\n The target values (class labels).\n\n sample_weight : array-like of shape (n_samples,)\n The current sample weights.\n\n random_state : RandomState instance\n The RandomState instance used if the base estimator accepts a\n `random_state` attribute.\n\n Returns\n -------\n sample_weight : array-like of shape (n_samples,) or None\n The reweighted sample weights.\n If None then boosting has terminated early.\n\n estimator_weight : float\n The weight for the current boost.\n If None then boosting has terminated early.\n\n estimator_error : float\n The classification error for the current boost.\n If None then boosting has terminated early.\n \"\"\"\n if self.algorithm == 'SAMME.R':\n return self._boost_real(iboost, X, y, sample_weight, random_state)\n\n else: # elif self.algorithm == \"SAMME\":\n return self._boost_discrete(iboost, X, y, sample_weight,\n random_state)" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost_discrete", + "name": "_boost_discrete", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._boost_discrete", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost_discrete/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._boost_discrete.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost_discrete/iboost", + "name": "iboost", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._boost_discrete.iboost", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost_discrete/X", + "name": "X", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._boost_discrete.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost_discrete/y", + "name": "y", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._boost_discrete.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost_discrete/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._boost_discrete.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost_discrete/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._boost_discrete.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Implement a single boost using the SAMME discrete algorithm.", + "docstring": "Implement a single boost using the SAMME discrete algorithm.", + "code": " def _boost_discrete(self, iboost, X, y, sample_weight, random_state):\n \"\"\"Implement a single boost using the SAMME discrete algorithm.\"\"\"\n estimator = self._make_estimator(random_state=random_state)\n\n estimator.fit(X, y, sample_weight=sample_weight)\n\n y_predict = estimator.predict(X)\n\n if iboost == 0:\n self.classes_ = getattr(estimator, 'classes_', None)\n self.n_classes_ = len(self.classes_)\n\n # Instances incorrectly classified\n incorrect = y_predict != y\n\n # Error fraction\n estimator_error = np.mean(\n np.average(incorrect, weights=sample_weight, axis=0))\n\n # Stop if classification is perfect\n if estimator_error <= 0:\n return sample_weight, 1., 0.\n\n n_classes = self.n_classes_\n\n # Stop if the error is at least as bad as random guessing\n if estimator_error >= 1. - (1. / n_classes):\n self.estimators_.pop(-1)\n if len(self.estimators_) == 0:\n raise ValueError('BaseClassifier in AdaBoostClassifier '\n 'ensemble is worse than random, ensemble '\n 'can not be fit.')\n return None, None, None\n\n # Boost weight using multi-class AdaBoost SAMME alg\n estimator_weight = self.learning_rate * (\n np.log((1. - estimator_error) / estimator_error) +\n np.log(n_classes - 1.))\n\n # Only boost the weights if I will fit again\n if not iboost == self.n_estimators - 1:\n # Only boost positive weights\n sample_weight *= np.exp(estimator_weight * incorrect *\n (sample_weight > 0))\n\n return sample_weight, estimator_weight, estimator_error" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost_real", + "name": "_boost_real", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._boost_real", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost_real/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._boost_real.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost_real/iboost", + "name": "iboost", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._boost_real.iboost", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost_real/X", + "name": "X", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._boost_real.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost_real/y", + "name": "y", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._boost_real.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost_real/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._boost_real.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_boost_real/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._boost_real.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Implement a single boost using the SAMME.R real algorithm.", + "docstring": "Implement a single boost using the SAMME.R real algorithm.", + "code": " def _boost_real(self, iboost, X, y, sample_weight, random_state):\n \"\"\"Implement a single boost using the SAMME.R real algorithm.\"\"\"\n estimator = self._make_estimator(random_state=random_state)\n\n estimator.fit(X, y, sample_weight=sample_weight)\n\n y_predict_proba = estimator.predict_proba(X)\n\n if iboost == 0:\n self.classes_ = getattr(estimator, 'classes_', None)\n self.n_classes_ = len(self.classes_)\n\n y_predict = self.classes_.take(np.argmax(y_predict_proba, axis=1),\n axis=0)\n\n # Instances incorrectly classified\n incorrect = y_predict != y\n\n # Error fraction\n estimator_error = np.mean(\n np.average(incorrect, weights=sample_weight, axis=0))\n\n # Stop if classification is perfect\n if estimator_error <= 0:\n return sample_weight, 1., 0.\n\n # Construct y coding as described in Zhu et al [2]:\n #\n # y_k = 1 if c == k else -1 / (K - 1)\n #\n # where K == n_classes_ and c, k in [0, K) are indices along the second\n # axis of the y coding with c being the index corresponding to the true\n # class label.\n n_classes = self.n_classes_\n classes = self.classes_\n y_codes = np.array([-1. / (n_classes - 1), 1.])\n y_coding = y_codes.take(classes == y[:, np.newaxis])\n\n # Displace zero probabilities so the log is defined.\n # Also fix negative elements which may occur with\n # negative sample weights.\n proba = y_predict_proba # alias for readability\n np.clip(proba, np.finfo(proba.dtype).eps, None, out=proba)\n\n # Boost weight using multi-class AdaBoost SAMME.R alg\n estimator_weight = (-1. * self.learning_rate\n * ((n_classes - 1.) / n_classes)\n * xlogy(y_coding, y_predict_proba).sum(axis=1))\n\n # Only boost the weights if it will fit again\n if not iboost == self.n_estimators - 1:\n # Only boost positive weights\n sample_weight *= np.exp(estimator_weight *\n ((sample_weight > 0) |\n (estimator_weight < 0)))\n\n return sample_weight, 1., estimator_error" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_compute_proba_from_decision", + "name": "_compute_proba_from_decision", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._compute_proba_from_decision", + "decorators": ["staticmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_compute_proba_from_decision/decision", + "name": "decision", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._compute_proba_from_decision.decision", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_compute_proba_from_decision/n_classes", + "name": "n_classes", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._compute_proba_from_decision.n_classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute probabilities from the decision function.\n\nThis is based eq. (4) of [1] where:\n p(y=c|X) = exp((1 / K-1) f_c(X)) / sum_k(exp((1 / K-1) f_k(X)))\n = softmax((1 / K-1) * f(X))", + "docstring": "Compute probabilities from the decision function.\n\nThis is based eq. (4) of [1] where:\n p(y=c|X) = exp((1 / K-1) f_c(X)) / sum_k(exp((1 / K-1) f_k(X)))\n = softmax((1 / K-1) * f(X))\n\nReferences\n----------\n.. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\",\n 2009.", + "code": " @staticmethod\n def _compute_proba_from_decision(decision, n_classes):\n \"\"\"Compute probabilities from the decision function.\n\n This is based eq. (4) of [1] where:\n p(y=c|X) = exp((1 / K-1) f_c(X)) / sum_k(exp((1 / K-1) f_k(X)))\n = softmax((1 / K-1) * f(X))\n\n References\n ----------\n .. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\",\n 2009.\n \"\"\"\n if n_classes == 2:\n decision = np.vstack([-decision, decision]).T / 2\n else:\n decision /= (n_classes - 1)\n return softmax(decision, copy=False)" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_validate_estimator", + "name": "_validate_estimator", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._validate_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/_validate_estimator/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier._validate_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check the estimator and set the base_estimator_ attribute.", + "docstring": "Check the estimator and set the base_estimator_ attribute.", + "code": " def _validate_estimator(self):\n \"\"\"Check the estimator and set the base_estimator_ attribute.\"\"\"\n super()._validate_estimator(\n default=DecisionTreeClassifier(max_depth=1))\n\n # SAMME-R requires predict_proba-enabled base estimators\n if self.algorithm == 'SAMME.R':\n if not hasattr(self.base_estimator_, 'predict_proba'):\n raise TypeError(\n \"AdaBoostClassifier with algorithm='SAMME.R' requires \"\n \"that the weak learner supports the calculation of class \"\n \"probabilities with a predict_proba method.\\n\"\n \"Please change the base estimator or set \"\n \"algorithm='SAMME' instead.\")\n if not has_fit_parameter(self.base_estimator_, \"sample_weight\"):\n raise ValueError(\"%s doesn't support sample_weight.\"\n % self.base_estimator_.__class__.__name__)" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/decision_function", + "name": "decision_function", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/decision_function/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/decision_function/X", + "name": "X", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples. Sparse matrix can be CSC, CSR, COO,\nDOK, or LIL. COO, DOK, and LIL are converted to CSR." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the decision function of ``X``.", + "docstring": "Compute the decision function of ``X``.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nReturns\n-------\nscore : ndarray of shape of (n_samples, k)\n The decision function of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute.\n Binary classification is a special cases with ``k == 1``,\n otherwise ``k==n_classes``. For binary classification,\n values closer to -1 or 1 mean more like the first or second\n class in ``classes_``, respectively.", + "code": " def decision_function(self, X):\n \"\"\"Compute the decision function of ``X``.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n Returns\n -------\n score : ndarray of shape of (n_samples, k)\n The decision function of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute.\n Binary classification is a special cases with ``k == 1``,\n otherwise ``k==n_classes``. For binary classification,\n values closer to -1 or 1 mean more like the first or second\n class in ``classes_``, respectively.\n \"\"\"\n check_is_fitted(self)\n X = self._check_X(X)\n\n n_classes = self.n_classes_\n classes = self.classes_[:, np.newaxis]\n\n if self.algorithm == 'SAMME.R':\n # The weights are all 1. for SAMME.R\n pred = sum(_samme_proba(estimator, n_classes, X)\n for estimator in self.estimators_)\n else: # self.algorithm == \"SAMME\"\n pred = sum((estimator.predict(X) == classes).T * w\n for estimator, w in zip(self.estimators_,\n self.estimator_weights_))\n\n pred /= self.estimator_weights_.sum()\n if n_classes == 2:\n pred[:, 0] *= -1\n return pred.sum(axis=1)\n return pred" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/fit", + "name": "fit", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/fit/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/fit/X", + "name": "X", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples. Sparse matrix can be CSC, CSR, COO,\nDOK, or LIL. COO, DOK, and LIL are converted to CSR." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/fit/y", + "name": "y", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The target values (class labels)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, the sample weights are initialized to\n``1 / n_samples``." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Build a boosted classifier from the training set (X, y).", + "docstring": "Build a boosted classifier from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\ny : array-like of shape (n_samples,)\n The target values (class labels).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, the sample weights are initialized to\n ``1 / n_samples``.\n\nReturns\n-------\nself : object\n Fitted estimator.", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Build a boosted classifier from the training set (X, y).\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n y : array-like of shape (n_samples,)\n The target values (class labels).\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, the sample weights are initialized to\n ``1 / n_samples``.\n\n Returns\n -------\n self : object\n Fitted estimator.\n \"\"\"\n # Check that algorithm is supported\n if self.algorithm not in ('SAMME', 'SAMME.R'):\n raise ValueError(\"algorithm %s is not supported\" % self.algorithm)\n\n # Fit\n return super().fit(X, y, sample_weight)" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/predict", + "name": "predict", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/predict/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/predict/X", + "name": "X", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples. Sparse matrix can be CSC, CSR, COO,\nDOK, or LIL. COO, DOK, and LIL are converted to CSR." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict classes for X.\n\nThe predicted class of an input sample is computed as the weighted mean\nprediction of the classifiers in the ensemble.", + "docstring": "Predict classes for X.\n\nThe predicted class of an input sample is computed as the weighted mean\nprediction of the classifiers in the ensemble.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n The predicted classes.", + "code": " def predict(self, X):\n \"\"\"Predict classes for X.\n\n The predicted class of an input sample is computed as the weighted mean\n prediction of the classifiers in the ensemble.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n Returns\n -------\n y : ndarray of shape (n_samples,)\n The predicted classes.\n \"\"\"\n X = self._check_X(X)\n\n pred = self.decision_function(X)\n\n if self.n_classes_ == 2:\n return self.classes_.take(pred > 0, axis=0)\n\n return self.classes_.take(np.argmax(pred, axis=1), axis=0)" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/predict_log_proba", + "name": "predict_log_proba", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.predict_log_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/predict_log_proba/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.predict_log_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/predict_log_proba/X", + "name": "X", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.predict_log_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples. Sparse matrix can be CSC, CSR, COO,\nDOK, or LIL. COO, DOK, and LIL are converted to CSR." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class log-probabilities for X.\n\nThe predicted class log-probabilities of an input sample is computed as\nthe weighted mean predicted class log-probabilities of the classifiers\nin the ensemble.", + "docstring": "Predict class log-probabilities for X.\n\nThe predicted class log-probabilities of an input sample is computed as\nthe weighted mean predicted class log-probabilities of the classifiers\nin the ensemble.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute.", + "code": " def predict_log_proba(self, X):\n \"\"\"Predict class log-probabilities for X.\n\n The predicted class log-probabilities of an input sample is computed as\n the weighted mean predicted class log-probabilities of the classifiers\n in the ensemble.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n Returns\n -------\n p : ndarray of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute.\n \"\"\"\n X = self._check_X(X)\n return np.log(self.predict_proba(X))" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/predict_proba", + "name": "predict_proba", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/predict_proba/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/predict_proba/X", + "name": "X", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples. Sparse matrix can be CSC, CSR, COO,\nDOK, or LIL. COO, DOK, and LIL are converted to CSR." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class probabilities for X.\n\nThe predicted class probabilities of an input sample is computed as\nthe weighted mean predicted class probabilities of the classifiers\nin the ensemble.", + "docstring": "Predict class probabilities for X.\n\nThe predicted class probabilities of an input sample is computed as\nthe weighted mean predicted class probabilities of the classifiers\nin the ensemble.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nReturns\n-------\np : ndarray of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute.", + "code": " def predict_proba(self, X):\n \"\"\"Predict class probabilities for X.\n\n The predicted class probabilities of an input sample is computed as\n the weighted mean predicted class probabilities of the classifiers\n in the ensemble.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n Returns\n -------\n p : ndarray of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute.\n \"\"\"\n check_is_fitted(self)\n X = self._check_X(X)\n\n n_classes = self.n_classes_\n\n if n_classes == 1:\n return np.ones((_num_samples(X), 1))\n\n decision = self.decision_function(X)\n return self._compute_proba_from_decision(decision, n_classes)" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/staged_decision_function", + "name": "staged_decision_function", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.staged_decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/staged_decision_function/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.staged_decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/staged_decision_function/X", + "name": "X", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.staged_decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples. Sparse matrix can be CSC, CSR, COO,\nDOK, or LIL. COO, DOK, and LIL are converted to CSR." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute decision function of ``X`` for each boosting iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each boosting iteration.", + "docstring": "Compute decision function of ``X`` for each boosting iteration.\n\nThis method allows monitoring (i.e. determine error on testing set)\nafter each boosting iteration.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nYields\n------\nscore : generator of ndarray of shape (n_samples, k)\n The decision function of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute.\n Binary classification is a special cases with ``k == 1``,\n otherwise ``k==n_classes``. For binary classification,\n values closer to -1 or 1 mean more like the first or second\n class in ``classes_``, respectively.", + "code": " def staged_decision_function(self, X):\n \"\"\"Compute decision function of ``X`` for each boosting iteration.\n\n This method allows monitoring (i.e. determine error on testing set)\n after each boosting iteration.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n Yields\n ------\n score : generator of ndarray of shape (n_samples, k)\n The decision function of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute.\n Binary classification is a special cases with ``k == 1``,\n otherwise ``k==n_classes``. For binary classification,\n values closer to -1 or 1 mean more like the first or second\n class in ``classes_``, respectively.\n \"\"\"\n check_is_fitted(self)\n X = self._check_X(X)\n\n n_classes = self.n_classes_\n classes = self.classes_[:, np.newaxis]\n pred = None\n norm = 0.\n\n for weight, estimator in zip(self.estimator_weights_,\n self.estimators_):\n norm += weight\n\n if self.algorithm == 'SAMME.R':\n # The weights are all 1. for SAMME.R\n current_pred = _samme_proba(estimator, n_classes, X)\n else: # elif self.algorithm == \"SAMME\":\n current_pred = estimator.predict(X)\n current_pred = (current_pred == classes).T * weight\n\n if pred is None:\n pred = current_pred\n else:\n pred += current_pred\n\n if n_classes == 2:\n tmp_pred = np.copy(pred)\n tmp_pred[:, 0] *= -1\n yield (tmp_pred / norm).sum(axis=1)\n else:\n yield pred / norm" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/staged_predict", + "name": "staged_predict", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.staged_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/staged_predict/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.staged_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/staged_predict/X", + "name": "X", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.staged_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Sparse matrix can be CSC, CSR, COO,\nDOK, or LIL. COO, DOK, and LIL are converted to CSR." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return staged predictions for X.\n\nThe predicted class of an input sample is computed as the weighted mean\nprediction of the classifiers in the ensemble.\n\nThis generator method yields the ensemble prediction after each\niteration of boosting and therefore allows monitoring, such as to\ndetermine the prediction on a test set after each boost.", + "docstring": "Return staged predictions for X.\n\nThe predicted class of an input sample is computed as the weighted mean\nprediction of the classifiers in the ensemble.\n\nThis generator method yields the ensemble prediction after each\niteration of boosting and therefore allows monitoring, such as to\ndetermine the prediction on a test set after each boost.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nYields\n------\ny : generator of ndarray of shape (n_samples,)\n The predicted classes.", + "code": " def staged_predict(self, X):\n \"\"\"Return staged predictions for X.\n\n The predicted class of an input sample is computed as the weighted mean\n prediction of the classifiers in the ensemble.\n\n This generator method yields the ensemble prediction after each\n iteration of boosting and therefore allows monitoring, such as to\n determine the prediction on a test set after each boost.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n Yields\n ------\n y : generator of ndarray of shape (n_samples,)\n The predicted classes.\n \"\"\"\n X = self._check_X(X)\n\n n_classes = self.n_classes_\n classes = self.classes_\n\n if n_classes == 2:\n for pred in self.staged_decision_function(X):\n yield np.array(classes.take(pred > 0, axis=0))\n\n else:\n for pred in self.staged_decision_function(X):\n yield np.array(classes.take(\n np.argmax(pred, axis=1), axis=0))" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/staged_predict_proba", + "name": "staged_predict_proba", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.staged_predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/staged_predict_proba/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.staged_predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostClassifier/staged_predict_proba/X", + "name": "X", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostClassifier.staged_predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples. Sparse matrix can be CSC, CSR, COO,\nDOK, or LIL. COO, DOK, and LIL are converted to CSR." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class probabilities for X.\n\nThe predicted class probabilities of an input sample is computed as\nthe weighted mean predicted class probabilities of the classifiers\nin the ensemble.\n\nThis generator method yields the ensemble predicted class probabilities\nafter each iteration of boosting and therefore allows monitoring, such\nas to determine the predicted class probabilities on a test set after\neach boost.", + "docstring": "Predict class probabilities for X.\n\nThe predicted class probabilities of an input sample is computed as\nthe weighted mean predicted class probabilities of the classifiers\nin the ensemble.\n\nThis generator method yields the ensemble predicted class probabilities\nafter each iteration of boosting and therefore allows monitoring, such\nas to determine the predicted class probabilities on a test set after\neach boost.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nYields\n-------\np : generator of ndarray of shape (n_samples,)\n The class probabilities of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute.", + "code": " def staged_predict_proba(self, X):\n \"\"\"Predict class probabilities for X.\n\n The predicted class probabilities of an input sample is computed as\n the weighted mean predicted class probabilities of the classifiers\n in the ensemble.\n\n This generator method yields the ensemble predicted class probabilities\n after each iteration of boosting and therefore allows monitoring, such\n as to determine the predicted class probabilities on a test set after\n each boost.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n Yields\n -------\n p : generator of ndarray of shape (n_samples,)\n The class probabilities of the input samples. The order of\n outputs is the same of that of the :term:`classes_` attribute.\n \"\"\"\n X = self._check_X(X)\n\n n_classes = self.n_classes_\n\n for decision in self.staged_decision_function(X):\n yield self._compute_proba_from_decision(decision, n_classes)" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/__init__/base_estimator", + "name": "base_estimator", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.__init__.base_estimator", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "None", + "description": "The base estimator from which the boosted ensemble is built.\nIf ``None``, then the base estimator is\n:class:`~sklearn.tree.DecisionTreeRegressor` initialized with\n`max_depth=3`." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/__init__/n_estimators", + "name": "n_estimators", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.__init__.n_estimators", + "default_value": "50", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "50", + "description": "The maximum number of estimators at which boosting is terminated.\nIn case of perfect fit, the learning procedure is stopped early." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/__init__/learning_rate", + "name": "learning_rate", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.__init__.learning_rate", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.", + "description": "Weight applied to each classifier at each boosting iteration. A higher\nlearning rate increases the contribution of each classifier. There is\na trade-off between the `learning_rate` and `n_estimators` parameters." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/__init__/loss", + "name": "loss", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.__init__.loss", + "default_value": "'linear'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'linear', 'square', 'exponential'}", + "default_value": "'linear'", + "description": "The loss function to use when updating the weights after each\nboosting iteration." + }, + "type": { + "kind": "EnumType", + "values": ["square", "exponential", "linear"] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/__init__/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the random seed given at each `base_estimator` at each\nboosting iteration.\nThus, it is only used when `base_estimator` exposes a `random_state`.\nIn addition, it controls the bootstrap of the weights used to train the\n`base_estimator` at each boosting iteration.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "An AdaBoost regressor.\n\nAn AdaBoost [1] regressor is a meta-estimator that begins by fitting a\nregressor on the original dataset and then fits additional copies of the\nregressor on the same dataset but where the weights of instances are\nadjusted according to the error of the current prediction. As such,\nsubsequent regressors focus more on difficult cases.\n\nThis class implements the algorithm known as AdaBoost.R2 [2].\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.14", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self,\n base_estimator=None, *,\n n_estimators=50,\n learning_rate=1.,\n loss='linear',\n random_state=None):\n\n super().__init__(\n base_estimator=base_estimator,\n n_estimators=n_estimators,\n learning_rate=learning_rate,\n random_state=random_state)\n\n self.loss = loss\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/_boost", + "name": "_boost", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor._boost", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/_boost/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor._boost.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/_boost/iboost", + "name": "iboost", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor._boost.iboost", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The index of the current boost iteration." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/_boost/X", + "name": "X", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor._boost.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/_boost/y", + "name": "y", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor._boost.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The target values (class labels in classification, real numbers in\nregression)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/_boost/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor._boost.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The current sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/_boost/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor._boost.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "RandomState", + "default_value": "", + "description": "The RandomState instance used if the base estimator accepts a\n`random_state` attribute.\nControls also the bootstrap of the weights used to train the weak\nlearner.\nreplacement." + }, + "type": { + "kind": "NamedType", + "name": "RandomState" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Implement a single boost for regression\n\nPerform a single boost according to the AdaBoost.R2 algorithm and\nreturn the updated sample weights.", + "docstring": "Implement a single boost for regression\n\nPerform a single boost according to the AdaBoost.R2 algorithm and\nreturn the updated sample weights.\n\nParameters\n----------\niboost : int\n The index of the current boost iteration.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples.\n\ny : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\nsample_weight : array-like of shape (n_samples,)\n The current sample weights.\n\nrandom_state : RandomState\n The RandomState instance used if the base estimator accepts a\n `random_state` attribute.\n Controls also the bootstrap of the weights used to train the weak\n learner.\n replacement.\n\nReturns\n-------\nsample_weight : array-like of shape (n_samples,) or None\n The reweighted sample weights.\n If None then boosting has terminated early.\n\nestimator_weight : float\n The weight for the current boost.\n If None then boosting has terminated early.\n\nestimator_error : float\n The regression error for the current boost.\n If None then boosting has terminated early.", + "code": " def _boost(self, iboost, X, y, sample_weight, random_state):\n \"\"\"Implement a single boost for regression\n\n Perform a single boost according to the AdaBoost.R2 algorithm and\n return the updated sample weights.\n\n Parameters\n ----------\n iboost : int\n The index of the current boost iteration.\n\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples.\n\n y : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\n sample_weight : array-like of shape (n_samples,)\n The current sample weights.\n\n random_state : RandomState\n The RandomState instance used if the base estimator accepts a\n `random_state` attribute.\n Controls also the bootstrap of the weights used to train the weak\n learner.\n replacement.\n\n Returns\n -------\n sample_weight : array-like of shape (n_samples,) or None\n The reweighted sample weights.\n If None then boosting has terminated early.\n\n estimator_weight : float\n The weight for the current boost.\n If None then boosting has terminated early.\n\n estimator_error : float\n The regression error for the current boost.\n If None then boosting has terminated early.\n \"\"\"\n estimator = self._make_estimator(random_state=random_state)\n\n # Weighted sampling of the training set with replacement\n bootstrap_idx = random_state.choice(\n np.arange(_num_samples(X)), size=_num_samples(X), replace=True,\n p=sample_weight\n )\n\n # Fit on the bootstrapped sample and obtain a prediction\n # for all samples in the training set\n X_ = _safe_indexing(X, bootstrap_idx)\n y_ = _safe_indexing(y, bootstrap_idx)\n estimator.fit(X_, y_)\n y_predict = estimator.predict(X)\n\n error_vect = np.abs(y_predict - y)\n sample_mask = sample_weight > 0\n masked_sample_weight = sample_weight[sample_mask]\n masked_error_vector = error_vect[sample_mask]\n\n error_max = masked_error_vector.max()\n if error_max != 0:\n masked_error_vector /= error_max\n\n if self.loss == 'square':\n masked_error_vector **= 2\n elif self.loss == 'exponential':\n masked_error_vector = 1. - np.exp(-masked_error_vector)\n\n # Calculate the average loss\n estimator_error = (masked_sample_weight * masked_error_vector).sum()\n\n if estimator_error <= 0:\n # Stop if fit is perfect\n return sample_weight, 1., 0.\n\n elif estimator_error >= 0.5:\n # Discard current estimator only if it isn't the only one\n if len(self.estimators_) > 1:\n self.estimators_.pop(-1)\n return None, None, None\n\n beta = estimator_error / (1. - estimator_error)\n\n # Boost weight using AdaBoost.R2 alg\n estimator_weight = self.learning_rate * np.log(1. / beta)\n\n if not iboost == self.n_estimators - 1:\n sample_weight[sample_mask] *= np.power(\n beta, (1. - masked_error_vector) * self.learning_rate\n )\n\n return sample_weight, estimator_weight, estimator_error" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/_get_median_predict", + "name": "_get_median_predict", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor._get_median_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/_get_median_predict/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor._get_median_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/_get_median_predict/X", + "name": "X", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor._get_median_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/_get_median_predict/limit", + "name": "limit", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor._get_median_predict.limit", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_median_predict(self, X, limit):\n # Evaluate predictions of all estimators\n predictions = np.array([\n est.predict(X) for est in self.estimators_[:limit]]).T\n\n # Sort the predictions\n sorted_idx = np.argsort(predictions, axis=1)\n\n # Find index of median prediction for each sample\n weight_cdf = stable_cumsum(self.estimator_weights_[sorted_idx], axis=1)\n median_or_above = weight_cdf >= 0.5 * weight_cdf[:, -1][:, np.newaxis]\n median_idx = median_or_above.argmax(axis=1)\n\n median_estimators = sorted_idx[np.arange(_num_samples(X)), median_idx]\n\n # Return median predictions\n return predictions[np.arange(_num_samples(X)), median_estimators]" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/_validate_estimator", + "name": "_validate_estimator", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor._validate_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/_validate_estimator/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor._validate_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check the estimator and set the base_estimator_ attribute.", + "docstring": "Check the estimator and set the base_estimator_ attribute.", + "code": " def _validate_estimator(self):\n \"\"\"Check the estimator and set the base_estimator_ attribute.\"\"\"\n super()._validate_estimator(\n default=DecisionTreeRegressor(max_depth=3))" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/fit", + "name": "fit", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/fit/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/fit/X", + "name": "X", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples. Sparse matrix can be CSC, CSR, COO,\nDOK, or LIL. COO, DOK, and LIL are converted to CSR." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/fit/y", + "name": "y", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The target values (real numbers)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, the sample weights are initialized to\n1 / n_samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Build a boosted regressor from the training set (X, y).", + "docstring": "Build a boosted regressor from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\ny : array-like of shape (n_samples,)\n The target values (real numbers).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, the sample weights are initialized to\n 1 / n_samples.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Build a boosted regressor from the training set (X, y).\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n y : array-like of shape (n_samples,)\n The target values (real numbers).\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, the sample weights are initialized to\n 1 / n_samples.\n\n Returns\n -------\n self : object\n \"\"\"\n # Check loss\n if self.loss not in ('linear', 'square', 'exponential'):\n raise ValueError(\n \"loss must be 'linear', 'square', or 'exponential'\")\n\n # Fit\n return super().fit(X, y, sample_weight)" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/predict", + "name": "predict", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/predict/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/predict/X", + "name": "X", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples. Sparse matrix can be CSC, CSR, COO,\nDOK, or LIL. COO, DOK, and LIL are converted to CSR." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict regression value for X.\n\nThe predicted regression value of an input sample is computed\nas the weighted median prediction of the classifiers in the ensemble.", + "docstring": "Predict regression value for X.\n\nThe predicted regression value of an input sample is computed\nas the weighted median prediction of the classifiers in the ensemble.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n The predicted regression values.", + "code": " def predict(self, X):\n \"\"\"Predict regression value for X.\n\n The predicted regression value of an input sample is computed\n as the weighted median prediction of the classifiers in the ensemble.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n Returns\n -------\n y : ndarray of shape (n_samples,)\n The predicted regression values.\n \"\"\"\n check_is_fitted(self)\n X = self._check_X(X)\n\n return self._get_median_predict(X, len(self.estimators_))" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/staged_predict", + "name": "staged_predict", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.staged_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/staged_predict/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.staged_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/AdaBoostRegressor/staged_predict/X", + "name": "X", + "qname": "sklearn.ensemble._weight_boosting.AdaBoostRegressor.staged_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return staged predictions for X.\n\nThe predicted regression value of an input sample is computed\nas the weighted median prediction of the classifiers in the ensemble.\n\nThis generator method yields the ensemble prediction after each\niteration of boosting and therefore allows monitoring, such as to\ndetermine the prediction on a test set after each boost.", + "docstring": "Return staged predictions for X.\n\nThe predicted regression value of an input sample is computed\nas the weighted median prediction of the classifiers in the ensemble.\n\nThis generator method yields the ensemble prediction after each\niteration of boosting and therefore allows monitoring, such as to\ndetermine the prediction on a test set after each boost.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples.\n\nYields\n-------\ny : generator of ndarray of shape (n_samples,)\n The predicted regression values.", + "code": " def staged_predict(self, X):\n \"\"\"Return staged predictions for X.\n\n The predicted regression value of an input sample is computed\n as the weighted median prediction of the classifiers in the ensemble.\n\n This generator method yields the ensemble prediction after each\n iteration of boosting and therefore allows monitoring, such as to\n determine the prediction on a test set after each boost.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples.\n\n Yields\n -------\n y : generator of ndarray of shape (n_samples,)\n The predicted regression values.\n \"\"\"\n check_is_fitted(self)\n X = self._check_X(X)\n\n for i, _ in enumerate(self.estimators_, 1):\n yield self._get_median_predict(X, limit=i)" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/__init__", + "name": "__init__", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting.__init__", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/__init__/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/__init__/base_estimator", + "name": "base_estimator", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting.__init__.base_estimator", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/__init__/n_estimators", + "name": "n_estimators", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting.__init__.n_estimators", + "default_value": "50", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/__init__/estimator_params", + "name": "estimator_params", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting.__init__.estimator_params", + "default_value": "tuple()", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/__init__/learning_rate", + "name": "learning_rate", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting.__init__.learning_rate", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/__init__/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base class for AdaBoost estimators.\n\nWarning: This class should not be used directly. Use derived classes\ninstead.", + "docstring": "", + "code": " @abstractmethod\n def __init__(self,\n base_estimator=None, *,\n n_estimators=50,\n estimator_params=tuple(),\n learning_rate=1.,\n random_state=None):\n\n super().__init__(\n base_estimator=base_estimator,\n n_estimators=n_estimators,\n estimator_params=estimator_params)\n\n self.learning_rate = learning_rate\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/_boost", + "name": "_boost", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting._boost", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/_boost/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting._boost.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/_boost/iboost", + "name": "iboost", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting._boost.iboost", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The index of the current boost iteration." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/_boost/X", + "name": "X", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting._boost.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples. Sparse matrix can be CSC, CSR, COO,\nDOK, or LIL. COO, DOK, and LIL are converted to CSR." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/_boost/y", + "name": "y", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting._boost.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The target values (class labels)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/_boost/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting._boost.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The current sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/_boost/random_state", + "name": "random_state", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting._boost.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "RandomState", + "default_value": "", + "description": "The current random number generator" + }, + "type": { + "kind": "NamedType", + "name": "RandomState" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Implement a single boost.\n\nWarning: This method needs to be overridden by subclasses.", + "docstring": "Implement a single boost.\n\nWarning: This method needs to be overridden by subclasses.\n\nParameters\n----------\niboost : int\n The index of the current boost iteration.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\ny : array-like of shape (n_samples,)\n The target values (class labels).\n\nsample_weight : array-like of shape (n_samples,)\n The current sample weights.\n\nrandom_state : RandomState\n The current random number generator\n\nReturns\n-------\nsample_weight : array-like of shape (n_samples,) or None\n The reweighted sample weights.\n If None then boosting has terminated early.\n\nestimator_weight : float\n The weight for the current boost.\n If None then boosting has terminated early.\n\nerror : float\n The classification error for the current boost.\n If None then boosting has terminated early.", + "code": " @abstractmethod\n def _boost(self, iboost, X, y, sample_weight, random_state):\n \"\"\"Implement a single boost.\n\n Warning: This method needs to be overridden by subclasses.\n\n Parameters\n ----------\n iboost : int\n The index of the current boost iteration.\n\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n y : array-like of shape (n_samples,)\n The target values (class labels).\n\n sample_weight : array-like of shape (n_samples,)\n The current sample weights.\n\n random_state : RandomState\n The current random number generator\n\n Returns\n -------\n sample_weight : array-like of shape (n_samples,) or None\n The reweighted sample weights.\n If None then boosting has terminated early.\n\n estimator_weight : float\n The weight for the current boost.\n If None then boosting has terminated early.\n\n error : float\n The classification error for the current boost.\n If None then boosting has terminated early.\n \"\"\"\n pass" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/_check_X", + "name": "_check_X", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting._check_X", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/_check_X/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting._check_X.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/_check_X/X", + "name": "X", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting._check_X.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_X(self, X):\n return check_array(X, accept_sparse=['csr', 'csc'], ensure_2d=True,\n allow_nd=True, dtype=None)" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/feature_importances_@getter", + "name": "feature_importances_", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting.feature_importances_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/feature_importances_/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting.feature_importances_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "The impurity-based feature importances.\n\nThe higher, the more important the feature.\nThe importance of a feature is computed as the (normalized)\ntotal reduction of the criterion brought by that feature. It is also\nknown as the Gini importance.\n\nWarning: impurity-based feature importances can be misleading for\nhigh cardinality features (many unique values). See\n:func:`sklearn.inspection.permutation_importance` as an alternative.", + "docstring": "The impurity-based feature importances.\n\nThe higher, the more important the feature.\nThe importance of a feature is computed as the (normalized)\ntotal reduction of the criterion brought by that feature. It is also\nknown as the Gini importance.\n\nWarning: impurity-based feature importances can be misleading for\nhigh cardinality features (many unique values). See\n:func:`sklearn.inspection.permutation_importance` as an alternative.\n\nReturns\n-------\nfeature_importances_ : ndarray of shape (n_features,)\n The feature importances.", + "code": " @property\n def feature_importances_(self):\n \"\"\"The impurity-based feature importances.\n\n The higher, the more important the feature.\n The importance of a feature is computed as the (normalized)\n total reduction of the criterion brought by that feature. It is also\n known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n Returns\n -------\n feature_importances_ : ndarray of shape (n_features,)\n The feature importances.\n \"\"\"\n if self.estimators_ is None or len(self.estimators_) == 0:\n raise ValueError(\"Estimator not fitted, \"\n \"call `fit` before `feature_importances_`.\")\n\n try:\n norm = self.estimator_weights_.sum()\n return (sum(weight * clf.feature_importances_ for weight, clf\n in zip(self.estimator_weights_, self.estimators_))\n / norm)\n\n except AttributeError as e:\n raise AttributeError(\n \"Unable to compute feature importances \"\n \"since base_estimator does not have a \"\n \"feature_importances_ attribute\") from e" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/fit", + "name": "fit", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/fit/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/fit/X", + "name": "X", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples. Sparse matrix can be CSC, CSR, COO,\nDOK, or LIL. COO, DOK, and LIL are converted to CSR." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/fit/y", + "name": "y", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The target values (class labels in classification, real numbers in\nregression)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, the sample weights are initialized to\n1 / n_samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Build a boosted classifier/regressor from the training set (X, y).", + "docstring": "Build a boosted classifier/regressor from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\ny : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, the sample weights are initialized to\n 1 / n_samples.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Build a boosted classifier/regressor from the training set (X, y).\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n y : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, the sample weights are initialized to\n 1 / n_samples.\n\n Returns\n -------\n self : object\n \"\"\"\n # Check parameters\n if self.learning_rate <= 0:\n raise ValueError(\"learning_rate must be greater than zero\")\n\n X, y = self._validate_data(X, y,\n accept_sparse=['csr', 'csc'],\n ensure_2d=True,\n allow_nd=True,\n dtype=None,\n y_numeric=is_regressor(self))\n\n sample_weight = _check_sample_weight(sample_weight, X, np.float64)\n sample_weight /= sample_weight.sum()\n if np.any(sample_weight < 0):\n raise ValueError(\"sample_weight cannot contain negative weights\")\n\n # Check parameters\n self._validate_estimator()\n\n # Clear any previous fit results\n self.estimators_ = []\n self.estimator_weights_ = np.zeros(self.n_estimators, dtype=np.float64)\n self.estimator_errors_ = np.ones(self.n_estimators, dtype=np.float64)\n\n # Initializion of the random number instance that will be used to\n # generate a seed at each iteration\n random_state = check_random_state(self.random_state)\n\n for iboost in range(self.n_estimators):\n # Boosting step\n sample_weight, estimator_weight, estimator_error = self._boost(\n iboost,\n X, y,\n sample_weight,\n random_state)\n\n # Early termination\n if sample_weight is None:\n break\n\n self.estimator_weights_[iboost] = estimator_weight\n self.estimator_errors_[iboost] = estimator_error\n\n # Stop if error is zero\n if estimator_error == 0:\n break\n\n sample_weight_sum = np.sum(sample_weight)\n\n # Stop if the sum of sample weights has become non-positive\n if sample_weight_sum <= 0:\n break\n\n if iboost < self.n_estimators - 1:\n # Normalize\n sample_weight /= sample_weight_sum\n\n return self" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/staged_score", + "name": "staged_score", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting.staged_score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/staged_score/self", + "name": "self", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting.staged_score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/staged_score/X", + "name": "X", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting.staged_score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples. Sparse matrix can be CSC, CSR, COO,\nDOK, or LIL. COO, DOK, and LIL are converted to CSR." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/staged_score/y", + "name": "y", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting.staged_score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Labels for X." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/BaseWeightBoosting/staged_score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.ensemble._weight_boosting.BaseWeightBoosting.staged_score.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return staged scores for X, y.\n\nThis generator method yields the ensemble score after each iteration of\nboosting and therefore allows monitoring, such as to determine the\nscore on a test set after each boost.", + "docstring": "Return staged scores for X, y.\n\nThis generator method yields the ensemble score after each iteration of\nboosting and therefore allows monitoring, such as to determine the\nscore on a test set after each boost.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\ny : array-like of shape (n_samples,)\n Labels for X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nYields\n------\nz : float", + "code": " def staged_score(self, X, y, sample_weight=None):\n \"\"\"Return staged scores for X, y.\n\n This generator method yields the ensemble score after each iteration of\n boosting and therefore allows monitoring, such as to determine the\n score on a test set after each boost.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Sparse matrix can be CSC, CSR, COO,\n DOK, or LIL. COO, DOK, and LIL are converted to CSR.\n\n y : array-like of shape (n_samples,)\n Labels for X.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Yields\n ------\n z : float\n \"\"\"\n X = self._check_X(X)\n\n for y_pred in self.staged_predict(X):\n if is_classifier(self):\n yield accuracy_score(y, y_pred, sample_weight=sample_weight)\n else:\n yield r2_score(y, y_pred, sample_weight=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/_samme_proba", + "name": "_samme_proba", + "qname": "sklearn.ensemble._weight_boosting._samme_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/_samme_proba/estimator", + "name": "estimator", + "qname": "sklearn.ensemble._weight_boosting._samme_proba.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/_samme_proba/n_classes", + "name": "n_classes", + "qname": "sklearn.ensemble._weight_boosting._samme_proba.n_classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble._weight_boosting/_samme_proba/X", + "name": "X", + "qname": "sklearn.ensemble._weight_boosting._samme_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Calculate algorithm 4, step 2, equation c) of Zhu et al [1].", + "docstring": "Calculate algorithm 4, step 2, equation c) of Zhu et al [1].\n\nReferences\n----------\n.. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\", 2009.", + "code": "def _samme_proba(estimator, n_classes, X):\n \"\"\"Calculate algorithm 4, step 2, equation c) of Zhu et al [1].\n\n References\n ----------\n .. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\", 2009.\n\n \"\"\"\n proba = estimator.predict_proba(X)\n\n # Displace zero probabilities so the log is defined.\n # Also fix negative elements which may occur with\n # negative sample weights.\n np.clip(proba, np.finfo(proba.dtype).eps, None, out=proba)\n log_proba = np.log(proba)\n\n return (n_classes - 1) * (log_proba - (1. / n_classes)\n * log_proba.sum(axis=1)[:, np.newaxis])" + }, + { + "id": "scikit-learn/sklearn.ensemble.setup/configuration", + "name": "configuration", + "qname": "sklearn.ensemble.setup.configuration", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.ensemble.setup/configuration/parent_package", + "name": "parent_package", + "qname": "sklearn.ensemble.setup.configuration.parent_package", + "default_value": "''", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.ensemble.setup/configuration/top_path", + "name": "top_path", + "qname": "sklearn.ensemble.setup.configuration.top_path", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def configuration(parent_package=\"\", top_path=None):\n config = Configuration(\"ensemble\", parent_package, top_path)\n\n config.add_extension(\"_gradient_boosting\",\n sources=[\"_gradient_boosting.pyx\"],\n include_dirs=[numpy.get_include()])\n\n config.add_subpackage(\"tests\")\n\n # Histogram-based gradient boosting files\n config.add_extension(\n \"_hist_gradient_boosting._gradient_boosting\",\n sources=[\"_hist_gradient_boosting/_gradient_boosting.pyx\"],\n include_dirs=[numpy.get_include()])\n\n config.add_extension(\"_hist_gradient_boosting.histogram\",\n sources=[\"_hist_gradient_boosting/histogram.pyx\"],\n include_dirs=[numpy.get_include()])\n\n config.add_extension(\"_hist_gradient_boosting.splitting\",\n sources=[\"_hist_gradient_boosting/splitting.pyx\"],\n include_dirs=[numpy.get_include()])\n\n config.add_extension(\"_hist_gradient_boosting._binning\",\n sources=[\"_hist_gradient_boosting/_binning.pyx\"],\n include_dirs=[numpy.get_include()])\n\n config.add_extension(\"_hist_gradient_boosting._predictor\",\n sources=[\"_hist_gradient_boosting/_predictor.pyx\"],\n include_dirs=[numpy.get_include()])\n\n config.add_extension(\"_hist_gradient_boosting._loss\",\n sources=[\"_hist_gradient_boosting/_loss.pyx\"],\n include_dirs=[numpy.get_include()])\n\n config.add_extension(\"_hist_gradient_boosting._bitset\",\n sources=[\"_hist_gradient_boosting/_bitset.pyx\"],\n include_dirs=[numpy.get_include()])\n\n config.add_extension(\"_hist_gradient_boosting.common\",\n sources=[\"_hist_gradient_boosting/common.pyx\"],\n include_dirs=[numpy.get_include()])\n\n config.add_extension(\"_hist_gradient_boosting.utils\",\n sources=[\"_hist_gradient_boosting/utils.pyx\"],\n include_dirs=[numpy.get_include()])\n\n config.add_subpackage(\"_hist_gradient_boosting.tests\")\n\n return config" + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffDecoder/__init__", + "name": "__init__", + "qname": "sklearn.externals._arff.ArffDecoder.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/ArffDecoder/__init__/self", + "name": "self", + "qname": "sklearn.externals._arff.ArffDecoder.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "An ARFF decoder.", + "docstring": "Constructor.", + "code": " def __init__(self):\n '''Constructor.'''\n self._conversors = []\n self._current_line = 0" + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffDecoder/_decode", + "name": "_decode", + "qname": "sklearn.externals._arff.ArffDecoder._decode", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/ArffDecoder/_decode/self", + "name": "self", + "qname": "sklearn.externals._arff.ArffDecoder._decode.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffDecoder/_decode/s", + "name": "s", + "qname": "sklearn.externals._arff.ArffDecoder._decode.s", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffDecoder/_decode/encode_nominal", + "name": "encode_nominal", + "qname": "sklearn.externals._arff.ArffDecoder._decode.encode_nominal", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffDecoder/_decode/matrix_type", + "name": "matrix_type", + "qname": "sklearn.externals._arff.ArffDecoder._decode.matrix_type", + "default_value": "DENSE", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Do the job the ``encode``.", + "docstring": "Do the job the ``encode``.", + "code": " def _decode(self, s, encode_nominal=False, matrix_type=DENSE):\n '''Do the job the ``encode``.'''\n\n # Make sure this method is idempotent\n self._current_line = 0\n\n # If string, convert to a list of lines\n if isinstance(s, str):\n s = s.strip('\\r\\n ').replace('\\r\\n', '\\n').split('\\n')\n\n # Create the return object\n obj: ArffContainerType = {\n 'description': '',\n 'relation': '',\n 'attributes': [],\n 'data': []\n }\n attribute_names = {}\n\n # Create the data helper object\n data = _get_data_object_for_decoding(matrix_type)\n\n # Read all lines\n STATE = _TK_DESCRIPTION\n s = iter(s)\n for row in s:\n self._current_line += 1\n # Ignore empty lines\n row = row.strip(' \\r\\n')\n if not row: continue\n\n u_row = row.upper()\n\n # DESCRIPTION -----------------------------------------------------\n if u_row.startswith(_TK_DESCRIPTION) and STATE == _TK_DESCRIPTION:\n obj['description'] += self._decode_comment(row) + '\\n'\n # -----------------------------------------------------------------\n\n # RELATION --------------------------------------------------------\n elif u_row.startswith(_TK_RELATION):\n if STATE != _TK_DESCRIPTION:\n raise BadLayout()\n\n STATE = _TK_RELATION\n obj['relation'] = self._decode_relation(row)\n # -----------------------------------------------------------------\n\n # ATTRIBUTE -------------------------------------------------------\n elif u_row.startswith(_TK_ATTRIBUTE):\n if STATE != _TK_RELATION and STATE != _TK_ATTRIBUTE:\n raise BadLayout()\n\n STATE = _TK_ATTRIBUTE\n\n attr = self._decode_attribute(row)\n if attr[0] in attribute_names:\n raise BadAttributeName(attr[0], attribute_names[attr[0]])\n else:\n attribute_names[attr[0]] = self._current_line\n obj['attributes'].append(attr)\n\n if isinstance(attr[1], (list, tuple)):\n if encode_nominal:\n conversor = EncodedNominalConversor(attr[1])\n else:\n conversor = NominalConversor(attr[1])\n else:\n CONVERSOR_MAP = {'STRING': str,\n 'INTEGER': lambda x: int(float(x)),\n 'NUMERIC': float,\n 'REAL': float}\n conversor = CONVERSOR_MAP[attr[1]]\n\n self._conversors.append(conversor)\n # -----------------------------------------------------------------\n\n # DATA ------------------------------------------------------------\n elif u_row.startswith(_TK_DATA):\n if STATE != _TK_ATTRIBUTE:\n raise BadLayout()\n\n break\n # -----------------------------------------------------------------\n\n # COMMENT ---------------------------------------------------------\n elif u_row.startswith(_TK_COMMENT):\n pass\n # -----------------------------------------------------------------\n else:\n # Never found @DATA\n raise BadLayout()\n\n def stream():\n for row in s:\n self._current_line += 1\n row = row.strip()\n # Ignore empty lines and comment lines.\n if row and not row.startswith(_TK_COMMENT):\n yield row\n\n # Alter the data object\n obj['data'] = data.decode_rows(stream(), self._conversors)\n if obj['description'].endswith('\\n'):\n obj['description'] = obj['description'][:-1]\n\n return obj" + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffDecoder/_decode_attribute", + "name": "_decode_attribute", + "qname": "sklearn.externals._arff.ArffDecoder._decode_attribute", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/ArffDecoder/_decode_attribute/self", + "name": "self", + "qname": "sklearn.externals._arff.ArffDecoder._decode_attribute.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffDecoder/_decode_attribute/s", + "name": "s", + "qname": "sklearn.externals._arff.ArffDecoder._decode_attribute.s", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "(INTERNAL) Decodes an attribute line.\n\n The attribute is the most complex declaration in an arff file. All\n attributes must follow the template::\n\n @attribute \n\n where ``attribute-name`` is a string, quoted if the name contains any\n whitespace, and ``datatype`` can be:\n\n - Numerical attributes as ``NUMERIC``, ``INTEGER`` or ``REAL``.\n - Strings as ``STRING``.\n - Dates (NOT IMPLEMENTED).\n - Nominal attributes with format:\n\n {, , , ...}\n\n The nominal names follow the rules for the attribute names, i.e., they\n must be quoted if the name contains whitespaces.\n\n This method must receive a normalized string, i.e., a string without\n padding, including the \"\n\" characters.\n\n :param s: a normalized string.\n :return: a tuple (ATTRIBUTE_NAME, TYPE_OR_VALUES).", + "docstring": "(INTERNAL) Decodes an attribute line.\n\n The attribute is the most complex declaration in an arff file. All\n attributes must follow the template::\n\n @attribute \n\n where ``attribute-name`` is a string, quoted if the name contains any\n whitespace, and ``datatype`` can be:\n\n - Numerical attributes as ``NUMERIC``, ``INTEGER`` or ``REAL``.\n - Strings as ``STRING``.\n - Dates (NOT IMPLEMENTED).\n - Nominal attributes with format:\n\n {, , , ...}\n\n The nominal names follow the rules for the attribute names, i.e., they\n must be quoted if the name contains whitespaces.\n\n This method must receive a normalized string, i.e., a string without\n padding, including the \"\r\n\" characters.\n\n :param s: a normalized string.\n :return: a tuple (ATTRIBUTE_NAME, TYPE_OR_VALUES).\n ", + "code": " def _decode_attribute(self, s):\n '''(INTERNAL) Decodes an attribute line.\n\n The attribute is the most complex declaration in an arff file. All\n attributes must follow the template::\n\n @attribute \n\n where ``attribute-name`` is a string, quoted if the name contains any\n whitespace, and ``datatype`` can be:\n\n - Numerical attributes as ``NUMERIC``, ``INTEGER`` or ``REAL``.\n - Strings as ``STRING``.\n - Dates (NOT IMPLEMENTED).\n - Nominal attributes with format:\n\n {, , , ...}\n\n The nominal names follow the rules for the attribute names, i.e., they\n must be quoted if the name contains whitespaces.\n\n This method must receive a normalized string, i.e., a string without\n padding, including the \"\\r\\n\" characters.\n\n :param s: a normalized string.\n :return: a tuple (ATTRIBUTE_NAME, TYPE_OR_VALUES).\n '''\n _, v = s.split(' ', 1)\n v = v.strip()\n\n # Verify the general structure of declaration\n m = _RE_ATTRIBUTE.match(v)\n if not m:\n raise BadAttributeFormat()\n\n # Extracts the raw name and type\n name, type_ = m.groups()\n\n # Extracts the final name\n name = str(name.strip('\"\\''))\n\n # Extracts the final type\n if type_[:1] == \"{\" and type_[-1:] == \"}\":\n try:\n type_ = _parse_values(type_.strip('{} '))\n except Exception:\n raise BadAttributeType()\n if isinstance(type_, dict):\n raise BadAttributeType()\n\n else:\n # If not nominal, verify the type name\n type_ = str(type_).upper()\n if type_ not in ['NUMERIC', 'REAL', 'INTEGER', 'STRING']:\n raise BadAttributeType()\n\n return (name, type_)" + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffDecoder/_decode_comment", + "name": "_decode_comment", + "qname": "sklearn.externals._arff.ArffDecoder._decode_comment", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/ArffDecoder/_decode_comment/self", + "name": "self", + "qname": "sklearn.externals._arff.ArffDecoder._decode_comment.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffDecoder/_decode_comment/s", + "name": "s", + "qname": "sklearn.externals._arff.ArffDecoder._decode_comment.s", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "(INTERNAL) Decodes a comment line.\n\n Comments are single line strings starting, obligatorily, with the ``%``\n character, and can have any symbol, including whitespaces or special\n characters.\n\n This method must receive a normalized string, i.e., a string without\n padding, including the \"\n\" characters.\n\n :param s: a normalized string.\n :return: a string with the decoded comment.", + "docstring": "(INTERNAL) Decodes a comment line.\n\n Comments are single line strings starting, obligatorily, with the ``%``\n character, and can have any symbol, including whitespaces or special\n characters.\n\n This method must receive a normalized string, i.e., a string without\n padding, including the \"\r\n\" characters.\n\n :param s: a normalized string.\n :return: a string with the decoded comment.\n ", + "code": " def _decode_comment(self, s):\n '''(INTERNAL) Decodes a comment line.\n\n Comments are single line strings starting, obligatorily, with the ``%``\n character, and can have any symbol, including whitespaces or special\n characters.\n\n This method must receive a normalized string, i.e., a string without\n padding, including the \"\\r\\n\" characters.\n\n :param s: a normalized string.\n :return: a string with the decoded comment.\n '''\n res = re.sub(r'^\\%( )?', '', s)\n return res" + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffDecoder/_decode_relation", + "name": "_decode_relation", + "qname": "sklearn.externals._arff.ArffDecoder._decode_relation", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/ArffDecoder/_decode_relation/self", + "name": "self", + "qname": "sklearn.externals._arff.ArffDecoder._decode_relation.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffDecoder/_decode_relation/s", + "name": "s", + "qname": "sklearn.externals._arff.ArffDecoder._decode_relation.s", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "(INTERNAL) Decodes a relation line.\n\n The relation declaration is a line with the format ``@RELATION\n ``, where ``relation-name`` is a string. The string must\n start with alphabetic character and must be quoted if the name includes\n spaces, otherwise this method will raise a `BadRelationFormat` exception.\n\n This method must receive a normalized string, i.e., a string without\n padding, including the \"\n\" characters.\n\n :param s: a normalized string.\n :return: a string with the decoded relation name.", + "docstring": "(INTERNAL) Decodes a relation line.\n\n The relation declaration is a line with the format ``@RELATION\n ``, where ``relation-name`` is a string. The string must\n start with alphabetic character and must be quoted if the name includes\n spaces, otherwise this method will raise a `BadRelationFormat` exception.\n\n This method must receive a normalized string, i.e., a string without\n padding, including the \"\r\n\" characters.\n\n :param s: a normalized string.\n :return: a string with the decoded relation name.\n ", + "code": " def _decode_relation(self, s):\n '''(INTERNAL) Decodes a relation line.\n\n The relation declaration is a line with the format ``@RELATION\n ``, where ``relation-name`` is a string. The string must\n start with alphabetic character and must be quoted if the name includes\n spaces, otherwise this method will raise a `BadRelationFormat` exception.\n\n This method must receive a normalized string, i.e., a string without\n padding, including the \"\\r\\n\" characters.\n\n :param s: a normalized string.\n :return: a string with the decoded relation name.\n '''\n _, v = s.split(' ', 1)\n v = v.strip()\n\n if not _RE_RELATION.match(v):\n raise BadRelationFormat()\n\n res = str(v.strip('\"\\''))\n return res" + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffDecoder/decode", + "name": "decode", + "qname": "sklearn.externals._arff.ArffDecoder.decode", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/ArffDecoder/decode/self", + "name": "self", + "qname": "sklearn.externals._arff.ArffDecoder.decode.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffDecoder/decode/s", + "name": "s", + "qname": "sklearn.externals._arff.ArffDecoder.decode.s", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffDecoder/decode/encode_nominal", + "name": "encode_nominal", + "qname": "sklearn.externals._arff.ArffDecoder.decode.encode_nominal", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffDecoder/decode/return_type", + "name": "return_type", + "qname": "sklearn.externals._arff.ArffDecoder.decode.return_type", + "default_value": "DENSE", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns the Python representation of a given ARFF file.\n\nWhen a file object is passed as an argument, this method reads lines\niteratively, avoiding to load unnecessary information to the memory.\n\n:param s: a string or file object with the ARFF file.\n:param encode_nominal: boolean, if True perform a label encoding\n while reading the .arff file.\n:param return_type: determines the data structure used to store the\n dataset. Can be one of `arff.DENSE`, `arff.COO`, `arff.LOD`,\n `arff.DENSE_GEN` or `arff.LOD_GEN`.\n Consult the sections on `working with sparse data`_ and `loading\n progressively`_.", + "docstring": "Returns the Python representation of a given ARFF file.\n\nWhen a file object is passed as an argument, this method reads lines\niteratively, avoiding to load unnecessary information to the memory.\n\n:param s: a string or file object with the ARFF file.\n:param encode_nominal: boolean, if True perform a label encoding\n while reading the .arff file.\n:param return_type: determines the data structure used to store the\n dataset. Can be one of `arff.DENSE`, `arff.COO`, `arff.LOD`,\n `arff.DENSE_GEN` or `arff.LOD_GEN`.\n Consult the sections on `working with sparse data`_ and `loading\n progressively`_.", + "code": " def decode(self, s, encode_nominal=False, return_type=DENSE):\n '''Returns the Python representation of a given ARFF file.\n\n When a file object is passed as an argument, this method reads lines\n iteratively, avoiding to load unnecessary information to the memory.\n\n :param s: a string or file object with the ARFF file.\n :param encode_nominal: boolean, if True perform a label encoding\n while reading the .arff file.\n :param return_type: determines the data structure used to store the\n dataset. Can be one of `arff.DENSE`, `arff.COO`, `arff.LOD`,\n `arff.DENSE_GEN` or `arff.LOD_GEN`.\n Consult the sections on `working with sparse data`_ and `loading\n progressively`_.\n '''\n try:\n return self._decode(s, encode_nominal=encode_nominal,\n matrix_type=return_type)\n except ArffException as e:\n e.line = self._current_line\n raise e" + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffEncoder/_encode_attribute", + "name": "_encode_attribute", + "qname": "sklearn.externals._arff.ArffEncoder._encode_attribute", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/ArffEncoder/_encode_attribute/self", + "name": "self", + "qname": "sklearn.externals._arff.ArffEncoder._encode_attribute.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffEncoder/_encode_attribute/name", + "name": "name", + "qname": "sklearn.externals._arff.ArffEncoder._encode_attribute.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffEncoder/_encode_attribute/type_", + "name": "type_", + "qname": "sklearn.externals._arff.ArffEncoder._encode_attribute.type_", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "(INTERNAL) Encodes an attribute line.\n\nThe attribute follow the template::\n\n @attribute \n\nwhere ``attribute-name`` is a string, and ``datatype`` can be:\n\n- Numerical attributes as ``NUMERIC``, ``INTEGER`` or ``REAL``.\n- Strings as ``STRING``.\n- Dates (NOT IMPLEMENTED).\n- Nominal attributes with format:\n\n {, , , ...}\n\nThis method must receive a the name of the attribute and its type, if\nthe attribute type is nominal, ``type`` must be a list of values.\n\n:param name: a string.\n:param type_: a string or a list of string.\n:return: a string with the encoded attribute declaration.", + "docstring": "(INTERNAL) Encodes an attribute line.\n\nThe attribute follow the template::\n\n @attribute \n\nwhere ``attribute-name`` is a string, and ``datatype`` can be:\n\n- Numerical attributes as ``NUMERIC``, ``INTEGER`` or ``REAL``.\n- Strings as ``STRING``.\n- Dates (NOT IMPLEMENTED).\n- Nominal attributes with format:\n\n {, , , ...}\n\nThis method must receive a the name of the attribute and its type, if\nthe attribute type is nominal, ``type`` must be a list of values.\n\n:param name: a string.\n:param type_: a string or a list of string.\n:return: a string with the encoded attribute declaration.", + "code": " def _encode_attribute(self, name, type_):\n '''(INTERNAL) Encodes an attribute line.\n\n The attribute follow the template::\n\n @attribute \n\n where ``attribute-name`` is a string, and ``datatype`` can be:\n\n - Numerical attributes as ``NUMERIC``, ``INTEGER`` or ``REAL``.\n - Strings as ``STRING``.\n - Dates (NOT IMPLEMENTED).\n - Nominal attributes with format:\n\n {, , , ...}\n\n This method must receive a the name of the attribute and its type, if\n the attribute type is nominal, ``type`` must be a list of values.\n\n :param name: a string.\n :param type_: a string or a list of string.\n :return: a string with the encoded attribute declaration.\n '''\n for char in ' %{},':\n if char in name:\n name = '\"%s\"'%name\n break\n\n if isinstance(type_, (tuple, list)):\n type_tmp = ['%s' % encode_string(type_k) for type_k in type_]\n type_ = '{%s}'%(', '.join(type_tmp))\n\n return '%s %s %s'%(_TK_ATTRIBUTE, name, type_)" + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffEncoder/_encode_comment", + "name": "_encode_comment", + "qname": "sklearn.externals._arff.ArffEncoder._encode_comment", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/ArffEncoder/_encode_comment/self", + "name": "self", + "qname": "sklearn.externals._arff.ArffEncoder._encode_comment.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffEncoder/_encode_comment/s", + "name": "s", + "qname": "sklearn.externals._arff.ArffEncoder._encode_comment.s", + "default_value": "''", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "(INTERNAL) Encodes a comment line.\n\nComments are single line strings starting, obligatorily, with the ``%``\ncharacter, and can have any symbol, including whitespaces or special\ncharacters.\n\nIf ``s`` is None, this method will simply return an empty comment.\n\n:param s: (OPTIONAL) string.\n:return: a string with the encoded comment line.", + "docstring": "(INTERNAL) Encodes a comment line.\n\nComments are single line strings starting, obligatorily, with the ``%``\ncharacter, and can have any symbol, including whitespaces or special\ncharacters.\n\nIf ``s`` is None, this method will simply return an empty comment.\n\n:param s: (OPTIONAL) string.\n:return: a string with the encoded comment line.", + "code": " def _encode_comment(self, s=''):\n '''(INTERNAL) Encodes a comment line.\n\n Comments are single line strings starting, obligatorily, with the ``%``\n character, and can have any symbol, including whitespaces or special\n characters.\n\n If ``s`` is None, this method will simply return an empty comment.\n\n :param s: (OPTIONAL) string.\n :return: a string with the encoded comment line.\n '''\n if s:\n return '%s %s'%(_TK_COMMENT, s)\n else:\n return '%s' % _TK_COMMENT" + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffEncoder/_encode_relation", + "name": "_encode_relation", + "qname": "sklearn.externals._arff.ArffEncoder._encode_relation", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/ArffEncoder/_encode_relation/self", + "name": "self", + "qname": "sklearn.externals._arff.ArffEncoder._encode_relation.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffEncoder/_encode_relation/name", + "name": "name", + "qname": "sklearn.externals._arff.ArffEncoder._encode_relation.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "(INTERNAL) Decodes a relation line.\n\nThe relation declaration is a line with the format ``@RELATION\n``, where ``relation-name`` is a string.\n\n:param name: a string.\n:return: a string with the encoded relation declaration.", + "docstring": "(INTERNAL) Decodes a relation line.\n\nThe relation declaration is a line with the format ``@RELATION\n``, where ``relation-name`` is a string.\n\n:param name: a string.\n:return: a string with the encoded relation declaration.", + "code": " def _encode_relation(self, name):\n '''(INTERNAL) Decodes a relation line.\n\n The relation declaration is a line with the format ``@RELATION\n ``, where ``relation-name`` is a string.\n\n :param name: a string.\n :return: a string with the encoded relation declaration.\n '''\n for char in ' %{},':\n if char in name:\n name = '\"%s\"'%name\n break\n\n return '%s %s'%(_TK_RELATION, name)" + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffEncoder/encode", + "name": "encode", + "qname": "sklearn.externals._arff.ArffEncoder.encode", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/ArffEncoder/encode/self", + "name": "self", + "qname": "sklearn.externals._arff.ArffEncoder.encode.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffEncoder/encode/obj", + "name": "obj", + "qname": "sklearn.externals._arff.ArffEncoder.encode.obj", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Encodes a given object to an ARFF file.\n\n:param obj: the object containing the ARFF information.\n:return: the ARFF file as an string.", + "docstring": "Encodes a given object to an ARFF file.\n\n:param obj: the object containing the ARFF information.\n:return: the ARFF file as an string.", + "code": " def encode(self, obj):\n '''Encodes a given object to an ARFF file.\n\n :param obj: the object containing the ARFF information.\n :return: the ARFF file as an string.\n '''\n data = [row for row in self.iter_encode(obj)]\n\n return '\\n'.join(data)" + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffEncoder/iter_encode", + "name": "iter_encode", + "qname": "sklearn.externals._arff.ArffEncoder.iter_encode", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/ArffEncoder/iter_encode/self", + "name": "self", + "qname": "sklearn.externals._arff.ArffEncoder.iter_encode.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffEncoder/iter_encode/obj", + "name": "obj", + "qname": "sklearn.externals._arff.ArffEncoder.iter_encode.obj", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "The iterative version of `arff.ArffEncoder.encode`.\n\nThis encodes iteratively a given object and return, one-by-one, the\nlines of the ARFF file.\n\n:param obj: the object containing the ARFF information.\n:return: (yields) the ARFF file as strings.", + "docstring": "The iterative version of `arff.ArffEncoder.encode`.\n\nThis encodes iteratively a given object and return, one-by-one, the\nlines of the ARFF file.\n\n:param obj: the object containing the ARFF information.\n:return: (yields) the ARFF file as strings.", + "code": " def iter_encode(self, obj):\n '''The iterative version of `arff.ArffEncoder.encode`.\n\n This encodes iteratively a given object and return, one-by-one, the\n lines of the ARFF file.\n\n :param obj: the object containing the ARFF information.\n :return: (yields) the ARFF file as strings.\n '''\n # DESCRIPTION\n if obj.get('description', None):\n for row in obj['description'].split('\\n'):\n yield self._encode_comment(row)\n\n # RELATION\n if not obj.get('relation'):\n raise BadObject('Relation name not found or with invalid value.')\n\n yield self._encode_relation(obj['relation'])\n yield ''\n\n # ATTRIBUTES\n if not obj.get('attributes'):\n raise BadObject('Attributes not found.')\n\n attribute_names = set()\n for attr in obj['attributes']:\n # Verify for bad object format\n if not isinstance(attr, (tuple, list)) or \\\n len(attr) != 2 or \\\n not isinstance(attr[0], str):\n raise BadObject('Invalid attribute declaration \"%s\"'%str(attr))\n\n if isinstance(attr[1], str):\n # Verify for invalid types\n if attr[1] not in _SIMPLE_TYPES:\n raise BadObject('Invalid attribute type \"%s\"'%str(attr))\n\n # Verify for bad object format\n elif not isinstance(attr[1], (tuple, list)):\n raise BadObject('Invalid attribute type \"%s\"'%str(attr))\n\n # Verify attribute name is not used twice\n if attr[0] in attribute_names:\n raise BadObject('Trying to use attribute name \"%s\" for the '\n 'second time.' % str(attr[0]))\n else:\n attribute_names.add(attr[0])\n\n yield self._encode_attribute(attr[0], attr[1])\n yield ''\n attributes = obj['attributes']\n\n # DATA\n yield _TK_DATA\n if 'data' in obj:\n data = _get_data_object_for_encoding(obj.get('data'))\n yield from data.encode_data(obj.get('data'), attributes)\n\n yield ''" + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffException/__init__", + "name": "__init__", + "qname": "sklearn.externals._arff.ArffException.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/ArffException/__init__/self", + "name": "self", + "qname": "sklearn.externals._arff.ArffException.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __init__(self):\n self.line = -1" + }, + { + "id": "scikit-learn/sklearn.externals._arff/ArffException/__str__", + "name": "__str__", + "qname": "sklearn.externals._arff.ArffException.__str__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/ArffException/__str__/self", + "name": "self", + "qname": "sklearn.externals._arff.ArffException.__str__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __str__(self):\n return self.message%self.line" + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadAttributeName/__init__", + "name": "__init__", + "qname": "sklearn.externals._arff.BadAttributeName.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/BadAttributeName/__init__/self", + "name": "self", + "qname": "sklearn.externals._arff.BadAttributeName.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadAttributeName/__init__/value", + "name": "value", + "qname": "sklearn.externals._arff.BadAttributeName.__init__.value", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadAttributeName/__init__/value2", + "name": "value2", + "qname": "sklearn.externals._arff.BadAttributeName.__init__.value2", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Error raised when an attribute name is provided twice the attribute\ndeclaration.", + "docstring": "", + "code": " def __init__(self, value, value2):\n super().__init__()\n self.message = (\n ('Bad @ATTRIBUTE name %s at line' % value) +\n ' %d, this name is already in use in line' +\n (' %d.' % value2)\n )" + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadDataFormat/__init__", + "name": "__init__", + "qname": "sklearn.externals._arff.BadDataFormat.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/BadDataFormat/__init__/self", + "name": "self", + "qname": "sklearn.externals._arff.BadDataFormat.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadDataFormat/__init__/value", + "name": "value", + "qname": "sklearn.externals._arff.BadDataFormat.__init__.value", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Error raised when some data instance is in an invalid format.", + "docstring": "", + "code": " def __init__(self, value):\n super().__init__()\n self.message = (\n 'Bad @DATA instance format in line %d: ' +\n ('%s' % value)\n )" + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadLayout/__init__", + "name": "__init__", + "qname": "sklearn.externals._arff.BadLayout.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/BadLayout/__init__/self", + "name": "self", + "qname": "sklearn.externals._arff.BadLayout.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadLayout/__init__/msg", + "name": "msg", + "qname": "sklearn.externals._arff.BadLayout.__init__.msg", + "default_value": "''", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Error raised when the layout of the ARFF file has something wrong.", + "docstring": "", + "code": " def __init__(self, msg=''):\n super().__init__()\n if msg:\n self.message = BadLayout.message + ' ' + msg.replace('%', '%%')" + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadNominalFormatting/__init__", + "name": "__init__", + "qname": "sklearn.externals._arff.BadNominalFormatting.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/BadNominalFormatting/__init__/self", + "name": "self", + "qname": "sklearn.externals._arff.BadNominalFormatting.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadNominalFormatting/__init__/value", + "name": "value", + "qname": "sklearn.externals._arff.BadNominalFormatting.__init__.value", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Error raised when a nominal value with space is not properly quoted.", + "docstring": "", + "code": " def __init__(self, value):\n super().__init__()\n self.message = (\n ('Nominal data value \"%s\" not properly quoted in line ' % value) +\n '%d.'\n )" + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadNominalValue/__init__", + "name": "__init__", + "qname": "sklearn.externals._arff.BadNominalValue.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/BadNominalValue/__init__/self", + "name": "self", + "qname": "sklearn.externals._arff.BadNominalValue.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadNominalValue/__init__/value", + "name": "value", + "qname": "sklearn.externals._arff.BadNominalValue.__init__.value", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Error raised when a value in used in some data instance but is not\ndeclared into it respective attribute declaration.", + "docstring": "", + "code": " def __init__(self, value):\n super().__init__()\n self.message = (\n ('Data value %s not found in nominal declaration, ' % value)\n + 'at line %d.'\n )" + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadObject/__init__", + "name": "__init__", + "qname": "sklearn.externals._arff.BadObject.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/BadObject/__init__/self", + "name": "self", + "qname": "sklearn.externals._arff.BadObject.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadObject/__init__/msg", + "name": "msg", + "qname": "sklearn.externals._arff.BadObject.__init__.msg", + "default_value": "'Invalid object.'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Error raised when the object representing the ARFF file has something\nwrong.", + "docstring": "", + "code": " def __init__(self, msg='Invalid object.'):\n self.msg = msg" + }, + { + "id": "scikit-learn/sklearn.externals._arff/BadObject/__str__", + "name": "__str__", + "qname": "sklearn.externals._arff.BadObject.__str__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/BadObject/__str__/self", + "name": "self", + "qname": "sklearn.externals._arff.BadObject.__str__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __str__(self):\n return '%s' % self.msg" + }, + { + "id": "scikit-learn/sklearn.externals._arff/COOData/decode_rows", + "name": "decode_rows", + "qname": "sklearn.externals._arff.COOData.decode_rows", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/COOData/decode_rows/self", + "name": "self", + "qname": "sklearn.externals._arff.COOData.decode_rows.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/COOData/decode_rows/stream", + "name": "stream", + "qname": "sklearn.externals._arff.COOData.decode_rows.stream", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/COOData/decode_rows/conversors", + "name": "conversors", + "qname": "sklearn.externals._arff.COOData.decode_rows.conversors", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def decode_rows(self, stream, conversors):\n data, rows, cols = [], [], []\n for i, row in enumerate(stream):\n values = _parse_values(row)\n if not isinstance(values, dict):\n raise BadLayout()\n if not values:\n continue\n row_cols, values = zip(*sorted(values.items()))\n try:\n values = [value if value is None else conversors[key](value)\n for key, value in zip(row_cols, values)]\n except ValueError as exc:\n if 'float: ' in str(exc):\n raise BadNumericalValue()\n raise\n except IndexError:\n # conversor out of range\n raise BadDataFormat(row)\n\n data.extend(values)\n rows.extend([i] * len(values))\n cols.extend(row_cols)\n\n return data, rows, cols" + }, + { + "id": "scikit-learn/sklearn.externals._arff/COOData/encode_data", + "name": "encode_data", + "qname": "sklearn.externals._arff.COOData.encode_data", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/COOData/encode_data/self", + "name": "self", + "qname": "sklearn.externals._arff.COOData.encode_data.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/COOData/encode_data/data", + "name": "data", + "qname": "sklearn.externals._arff.COOData.encode_data.data", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/COOData/encode_data/attributes", + "name": "attributes", + "qname": "sklearn.externals._arff.COOData.encode_data.attributes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def encode_data(self, data, attributes):\n num_attributes = len(attributes)\n new_data = []\n current_row = 0\n\n row = data.row\n col = data.col\n data = data.data\n\n # Check if the rows are sorted\n if not all(row[i] <= row[i + 1] for i in range(len(row) - 1)):\n raise ValueError(\"liac-arff can only output COO matrices with \"\n \"sorted rows.\")\n\n for v, col, row in zip(data, col, row):\n if row > current_row:\n # Add empty rows if necessary\n while current_row < row:\n yield \" \".join([\"{\", ','.join(new_data), \"}\"])\n new_data = []\n current_row += 1\n\n if col >= num_attributes:\n raise BadObject(\n 'Instance %d has at least %d attributes, expected %d' %\n (current_row, col + 1, num_attributes)\n )\n\n if v is None or v == '' or v != v:\n s = '?'\n else:\n s = encode_string(str(v))\n new_data.append(\"%d %s\" % (col, s))\n\n yield \" \".join([\"{\", ','.join(new_data), \"}\"])" + }, + { + "id": "scikit-learn/sklearn.externals._arff/DenseGeneratorData/_decode_values", + "name": "_decode_values", + "qname": "sklearn.externals._arff.DenseGeneratorData._decode_values", + "decorators": ["staticmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/DenseGeneratorData/_decode_values/values", + "name": "values", + "qname": "sklearn.externals._arff.DenseGeneratorData._decode_values.values", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/DenseGeneratorData/_decode_values/conversors", + "name": "conversors", + "qname": "sklearn.externals._arff.DenseGeneratorData._decode_values.conversors", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @staticmethod\n def _decode_values(values, conversors):\n try:\n values = [None if value is None else conversor(value)\n for conversor, value\n in zip(conversors, values)]\n except ValueError as exc:\n if 'float: ' in str(exc):\n raise BadNumericalValue()\n return values" + }, + { + "id": "scikit-learn/sklearn.externals._arff/DenseGeneratorData/decode_rows", + "name": "decode_rows", + "qname": "sklearn.externals._arff.DenseGeneratorData.decode_rows", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/DenseGeneratorData/decode_rows/self", + "name": "self", + "qname": "sklearn.externals._arff.DenseGeneratorData.decode_rows.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/DenseGeneratorData/decode_rows/stream", + "name": "stream", + "qname": "sklearn.externals._arff.DenseGeneratorData.decode_rows.stream", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/DenseGeneratorData/decode_rows/conversors", + "name": "conversors", + "qname": "sklearn.externals._arff.DenseGeneratorData.decode_rows.conversors", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def decode_rows(self, stream, conversors):\n for row in stream:\n values = _parse_values(row)\n\n if isinstance(values, dict):\n if values and max(values) >= len(conversors):\n raise BadDataFormat(row)\n # XXX: int 0 is used for implicit values, not '0'\n values = [values[i] if i in values else 0 for i in\n range(len(conversors))]\n else:\n if len(values) != len(conversors):\n raise BadDataFormat(row)\n\n yield self._decode_values(values, conversors)" + }, + { + "id": "scikit-learn/sklearn.externals._arff/DenseGeneratorData/encode_data", + "name": "encode_data", + "qname": "sklearn.externals._arff.DenseGeneratorData.encode_data", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/DenseGeneratorData/encode_data/self", + "name": "self", + "qname": "sklearn.externals._arff.DenseGeneratorData.encode_data.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/DenseGeneratorData/encode_data/data", + "name": "data", + "qname": "sklearn.externals._arff.DenseGeneratorData.encode_data.data", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/DenseGeneratorData/encode_data/attributes", + "name": "attributes", + "qname": "sklearn.externals._arff.DenseGeneratorData.encode_data.attributes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "(INTERNAL) Encodes a line of data.\n\nData instances follow the csv format, i.e, attribute values are\ndelimited by commas. After converted from csv.\n\n:param data: a list of values.\n:param attributes: a list of attributes. Used to check if data is valid.\n:return: a string with the encoded data line.", + "docstring": "(INTERNAL) Encodes a line of data.\n\nData instances follow the csv format, i.e, attribute values are\ndelimited by commas. After converted from csv.\n\n:param data: a list of values.\n:param attributes: a list of attributes. Used to check if data is valid.\n:return: a string with the encoded data line.", + "code": " def encode_data(self, data, attributes):\n '''(INTERNAL) Encodes a line of data.\n\n Data instances follow the csv format, i.e, attribute values are\n delimited by commas. After converted from csv.\n\n :param data: a list of values.\n :param attributes: a list of attributes. Used to check if data is valid.\n :return: a string with the encoded data line.\n '''\n current_row = 0\n\n for inst in data:\n if len(inst) != len(attributes):\n raise BadObject(\n 'Instance %d has %d attributes, expected %d' %\n (current_row, len(inst), len(attributes))\n )\n\n new_data = []\n for value in inst:\n if value is None or value == '' or value != value:\n s = '?'\n else:\n s = encode_string(str(value))\n new_data.append(s)\n\n current_row += 1\n yield ','.join(new_data)" + }, + { + "id": "scikit-learn/sklearn.externals._arff/EncodedNominalConversor/__call__", + "name": "__call__", + "qname": "sklearn.externals._arff.EncodedNominalConversor.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/EncodedNominalConversor/__call__/self", + "name": "self", + "qname": "sklearn.externals._arff.EncodedNominalConversor.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/EncodedNominalConversor/__call__/value", + "name": "value", + "qname": "sklearn.externals._arff.EncodedNominalConversor.__call__.value", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __call__(self, value):\n try:\n return self.values[value]\n except KeyError:\n raise BadNominalValue(value)" + }, + { + "id": "scikit-learn/sklearn.externals._arff/EncodedNominalConversor/__init__", + "name": "__init__", + "qname": "sklearn.externals._arff.EncodedNominalConversor.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/EncodedNominalConversor/__init__/self", + "name": "self", + "qname": "sklearn.externals._arff.EncodedNominalConversor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/EncodedNominalConversor/__init__/values", + "name": "values", + "qname": "sklearn.externals._arff.EncodedNominalConversor.__init__.values", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __init__(self, values):\n self.values = {v: i for i, v in enumerate(values)}\n self.values[0] = 0" + }, + { + "id": "scikit-learn/sklearn.externals._arff/LODGeneratorData/decode_rows", + "name": "decode_rows", + "qname": "sklearn.externals._arff.LODGeneratorData.decode_rows", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/LODGeneratorData/decode_rows/self", + "name": "self", + "qname": "sklearn.externals._arff.LODGeneratorData.decode_rows.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/LODGeneratorData/decode_rows/stream", + "name": "stream", + "qname": "sklearn.externals._arff.LODGeneratorData.decode_rows.stream", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/LODGeneratorData/decode_rows/conversors", + "name": "conversors", + "qname": "sklearn.externals._arff.LODGeneratorData.decode_rows.conversors", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def decode_rows(self, stream, conversors):\n for row in stream:\n values = _parse_values(row)\n\n if not isinstance(values, dict):\n raise BadLayout()\n try:\n yield {key: None if value is None else conversors[key](value)\n for key, value in values.items()}\n except ValueError as exc:\n if 'float: ' in str(exc):\n raise BadNumericalValue()\n raise\n except IndexError:\n # conversor out of range\n raise BadDataFormat(row)" + }, + { + "id": "scikit-learn/sklearn.externals._arff/LODGeneratorData/encode_data", + "name": "encode_data", + "qname": "sklearn.externals._arff.LODGeneratorData.encode_data", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/LODGeneratorData/encode_data/self", + "name": "self", + "qname": "sklearn.externals._arff.LODGeneratorData.encode_data.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/LODGeneratorData/encode_data/data", + "name": "data", + "qname": "sklearn.externals._arff.LODGeneratorData.encode_data.data", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/LODGeneratorData/encode_data/attributes", + "name": "attributes", + "qname": "sklearn.externals._arff.LODGeneratorData.encode_data.attributes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def encode_data(self, data, attributes):\n current_row = 0\n\n num_attributes = len(attributes)\n for row in data:\n new_data = []\n\n if len(row) > 0 and max(row) >= num_attributes:\n raise BadObject(\n 'Instance %d has %d attributes, expected %d' %\n (current_row, max(row) + 1, num_attributes)\n )\n\n for col in sorted(row):\n v = row[col]\n if v is None or v == '' or v != v:\n s = '?'\n else:\n s = encode_string(str(v))\n new_data.append(\"%d %s\" % (col, s))\n\n current_row += 1\n yield \" \".join([\"{\", ','.join(new_data), \"}\"])" + }, + { + "id": "scikit-learn/sklearn.externals._arff/NominalConversor/__call__", + "name": "__call__", + "qname": "sklearn.externals._arff.NominalConversor.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/NominalConversor/__call__/self", + "name": "self", + "qname": "sklearn.externals._arff.NominalConversor.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/NominalConversor/__call__/value", + "name": "value", + "qname": "sklearn.externals._arff.NominalConversor.__call__.value", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __call__(self, value):\n if value not in self.values:\n if value == 0:\n # Sparse decode\n # See issue #52: nominals should take their first value when\n # unspecified in a sparse matrix. Naturally, this is consistent\n # with EncodedNominalConversor.\n return self.zero_value\n raise BadNominalValue(value)\n return str(value)" + }, + { + "id": "scikit-learn/sklearn.externals._arff/NominalConversor/__init__", + "name": "__init__", + "qname": "sklearn.externals._arff.NominalConversor.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/NominalConversor/__init__/self", + "name": "self", + "qname": "sklearn.externals._arff.NominalConversor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/NominalConversor/__init__/values", + "name": "values", + "qname": "sklearn.externals._arff.NominalConversor.__init__.values", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __init__(self, values):\n self.values = set(values)\n self.zero_value = values[0]" + }, + { + "id": "scikit-learn/sklearn.externals._arff/_DataListMixin/decode_rows", + "name": "decode_rows", + "qname": "sklearn.externals._arff._DataListMixin.decode_rows", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._arff/_DataListMixin/decode_rows/self", + "name": "self", + "qname": "sklearn.externals._arff._DataListMixin.decode_rows.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/_DataListMixin/decode_rows/stream", + "name": "stream", + "qname": "sklearn.externals._arff._DataListMixin.decode_rows.stream", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._arff/_DataListMixin/decode_rows/conversors", + "name": "conversors", + "qname": "sklearn.externals._arff._DataListMixin.decode_rows.conversors", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def decode_rows(self, stream, conversors):\n return list(super().decode_rows(stream, conversors))" + }, + { + "id": "scikit-learn/sklearn.externals._arff/_build_re_values", + "name": "_build_re_values", + "qname": "sklearn.externals._arff._build_re_values", + "decorators": [], + "parameters": [], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _build_re_values():\n quoted_re = r'''\n \" # open quote followed by zero or more of:\n (?:\n (? tol:\n print('matrix %s of the type %s is not sufficiently Hermitian:'\n % (name, M.dtype))\n print('condition: %.e < %e' % (nmd, tol))" + }, + { + "id": "scikit-learn/sklearn.externals._lobpcg/_save", + "name": "_save", + "qname": "sklearn.externals._lobpcg._save", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._lobpcg/_save/ar", + "name": "ar", + "qname": "sklearn.externals._lobpcg._save.ar", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._lobpcg/_save/fileName", + "name": "fileName", + "qname": "sklearn.externals._lobpcg._save.fileName", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _save(ar, fileName):\n # Used only when verbosity level > 10.\n np.savetxt(fileName, ar)" + }, + { + "id": "scikit-learn/sklearn.externals._lobpcg/bmat", + "name": "bmat", + "qname": "sklearn.externals._lobpcg.bmat", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._lobpcg/bmat/args", + "name": "args", + "qname": "sklearn.externals._lobpcg.bmat.args", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._lobpcg/bmat/kwargs", + "name": "kwargs", + "qname": "sklearn.externals._lobpcg.bmat.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def bmat(*args, **kwargs):\n import warnings\n with warnings.catch_warnings(record=True):\n warnings.filterwarnings(\n 'ignore', '.*the matrix subclass is not the recommended way.*')\n return np.bmat(*args, **kwargs)" + }, + { + "id": "scikit-learn/sklearn.externals._lobpcg/lobpcg", + "name": "lobpcg", + "qname": "sklearn.externals._lobpcg.lobpcg", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._lobpcg/lobpcg/A", + "name": "A", + "qname": "sklearn.externals._lobpcg.lobpcg.A", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{sparse matrix, dense matrix, LinearOperator}", + "default_value": "", + "description": "The symmetric linear operator of the problem, usually a\nsparse matrix. Often called the \"stiffness matrix\"." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.externals._lobpcg/lobpcg/X", + "name": "X", + "qname": "sklearn.externals._lobpcg.lobpcg.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, float32 or float64", + "default_value": "", + "description": "Initial approximation to the ``k`` eigenvectors (non-sparse). If `A`\nhas ``shape=(n,n)`` then `X` should have shape ``shape=(n,k)``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "float32" + }, + { + "kind": "NamedType", + "name": "float64" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.externals._lobpcg/lobpcg/B", + "name": "B", + "qname": "sklearn.externals._lobpcg.lobpcg.B", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{dense matrix, sparse matrix, LinearOperator}", + "default_value": "", + "description": "The right hand side operator in a generalized eigenproblem.\nBy default, ``B = Identity``. Often called the \"mass matrix\"." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.externals._lobpcg/lobpcg/M", + "name": "M", + "qname": "sklearn.externals._lobpcg.lobpcg.M", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{dense matrix, sparse matrix, LinearOperator}", + "default_value": "", + "description": "Preconditioner to `A`; by default ``M = Identity``.\n`M` should approximate the inverse of `A`." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.externals._lobpcg/lobpcg/Y", + "name": "Y", + "qname": "sklearn.externals._lobpcg.lobpcg.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, float32 or float64", + "default_value": "", + "description": "n-by-sizeY matrix of constraints (non-sparse), sizeY < n\nThe iterations will be performed in the B-orthogonal complement\nof the column-space of Y. Y must be full rank." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "float32" + }, + { + "kind": "NamedType", + "name": "float64" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.externals._lobpcg/lobpcg/tol", + "name": "tol", + "qname": "sklearn.externals._lobpcg.lobpcg.tol", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "scalar", + "default_value": "", + "description": "Solver tolerance (stopping criterion).\nThe default is ``tol=n*sqrt(eps)``." + }, + "type": { + "kind": "NamedType", + "name": "scalar" + } + }, + { + "id": "scikit-learn/sklearn.externals._lobpcg/lobpcg/maxiter", + "name": "maxiter", + "qname": "sklearn.externals._lobpcg.lobpcg.maxiter", + "default_value": "20", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Maximum number of iterations. The default is ``maxiter=min(n, 20)``." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.externals._lobpcg/lobpcg/largest", + "name": "largest", + "qname": "sklearn.externals._lobpcg.lobpcg.largest", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "When True, solve for the largest eigenvalues, otherwise the smallest." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.externals._lobpcg/lobpcg/verbosityLevel", + "name": "verbosityLevel", + "qname": "sklearn.externals._lobpcg.lobpcg.verbosityLevel", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Controls solver output. The default is ``verbosityLevel=0``." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.externals._lobpcg/lobpcg/retLambdaHistory", + "name": "retLambdaHistory", + "qname": "sklearn.externals._lobpcg.lobpcg.retLambdaHistory", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "Whether to return eigenvalue history. Default is False." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.externals._lobpcg/lobpcg/retResidualNormsHistory", + "name": "retResidualNormsHistory", + "qname": "sklearn.externals._lobpcg.lobpcg.retResidualNormsHistory", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "Whether to return history of residual norms. Default is False." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Locally Optimal Block Preconditioned Conjugate Gradient Method (LOBPCG)\n\nLOBPCG is a preconditioned eigensolver for large symmetric positive\ndefinite (SPD) generalized eigenproblems.", + "docstring": "Locally Optimal Block Preconditioned Conjugate Gradient Method (LOBPCG)\n\nLOBPCG is a preconditioned eigensolver for large symmetric positive\ndefinite (SPD) generalized eigenproblems.\n\nParameters\n----------\nA : {sparse matrix, dense matrix, LinearOperator}\n The symmetric linear operator of the problem, usually a\n sparse matrix. Often called the \"stiffness matrix\".\nX : ndarray, float32 or float64\n Initial approximation to the ``k`` eigenvectors (non-sparse). If `A`\n has ``shape=(n,n)`` then `X` should have shape ``shape=(n,k)``.\nB : {dense matrix, sparse matrix, LinearOperator}, optional\n The right hand side operator in a generalized eigenproblem.\n By default, ``B = Identity``. Often called the \"mass matrix\".\nM : {dense matrix, sparse matrix, LinearOperator}, optional\n Preconditioner to `A`; by default ``M = Identity``.\n `M` should approximate the inverse of `A`.\nY : ndarray, float32 or float64, optional\n n-by-sizeY matrix of constraints (non-sparse), sizeY < n\n The iterations will be performed in the B-orthogonal complement\n of the column-space of Y. Y must be full rank.\ntol : scalar, optional\n Solver tolerance (stopping criterion).\n The default is ``tol=n*sqrt(eps)``.\nmaxiter : int, optional\n Maximum number of iterations. The default is ``maxiter=min(n, 20)``.\nlargest : bool, optional\n When True, solve for the largest eigenvalues, otherwise the smallest.\nverbosityLevel : int, optional\n Controls solver output. The default is ``verbosityLevel=0``.\nretLambdaHistory : bool, optional\n Whether to return eigenvalue history. Default is False.\nretResidualNormsHistory : bool, optional\n Whether to return history of residual norms. Default is False.\n\nReturns\n-------\nw : ndarray\n Array of ``k`` eigenvalues\nv : ndarray\n An array of ``k`` eigenvectors. `v` has the same shape as `X`.\nlambdas : list of ndarray, optional\n The eigenvalue history, if `retLambdaHistory` is True.\nrnorms : list of ndarray, optional\n The history of residual norms, if `retResidualNormsHistory` is True.\n\nNotes\n-----\nIf both ``retLambdaHistory`` and ``retResidualNormsHistory`` are True,\nthe return tuple has the following format\n``(lambda, V, lambda history, residual norms history)``.\n\nIn the following ``n`` denotes the matrix size and ``m`` the number\nof required eigenvalues (smallest or largest).\n\nThe LOBPCG code internally solves eigenproblems of the size ``3m`` on every\niteration by calling the \"standard\" dense eigensolver, so if ``m`` is not\nsmall enough compared to ``n``, it does not make sense to call the LOBPCG\ncode, but rather one should use the \"standard\" eigensolver, e.g. numpy or\nscipy function in this case.\nIf one calls the LOBPCG algorithm for ``5m > n``, it will most likely break\ninternally, so the code tries to call the standard function instead.\n\nIt is not that ``n`` should be large for the LOBPCG to work, but rather the\nratio ``n / m`` should be large. It you call LOBPCG with ``m=1``\nand ``n=10``, it works though ``n`` is small. The method is intended\nfor extremely large ``n / m``, see e.g., reference [28] in\nhttps://arxiv.org/abs/0705.2626\n\nThe convergence speed depends basically on two factors:\n\n1. How well relatively separated the seeking eigenvalues are from the rest\n of the eigenvalues. One can try to vary ``m`` to make this better.\n\n2. How well conditioned the problem is. This can be changed by using proper\n preconditioning. For example, a rod vibration test problem (under tests\n directory) is ill-conditioned for large ``n``, so convergence will be\n slow, unless efficient preconditioning is used. For this specific\n problem, a good simple preconditioner function would be a linear solve\n for `A`, which is easy to code since A is tridiagonal.\n\nReferences\n----------\n.. [1] A. V. Knyazev (2001),\n Toward the Optimal Preconditioned Eigensolver: Locally Optimal\n Block Preconditioned Conjugate Gradient Method.\n SIAM Journal on Scientific Computing 23, no. 2,\n pp. 517-541. http://dx.doi.org/10.1137/S1064827500366124\n\n.. [2] A. V. Knyazev, I. Lashuk, M. E. Argentati, and E. Ovchinnikov\n (2007), Block Locally Optimal Preconditioned Eigenvalue Xolvers\n (BLOPEX) in hypre and PETSc. https://arxiv.org/abs/0705.2626\n\n.. [3] A. V. Knyazev's C and MATLAB implementations:\n https://bitbucket.org/joseroman/blopex\n\nExamples\n--------\n\nSolve ``A x = lambda x`` with constraints and preconditioning.\n\n>>> import numpy as np\n>>> from scipy.sparse import spdiags, issparse\n>>> from scipy.sparse.linalg import lobpcg, LinearOperator\n>>> n = 100\n>>> vals = np.arange(1, n + 1)\n>>> A = spdiags(vals, 0, n, n)\n>>> A.toarray()\narray([[ 1., 0., 0., ..., 0., 0., 0.],\n [ 0., 2., 0., ..., 0., 0., 0.],\n [ 0., 0., 3., ..., 0., 0., 0.],\n ...,\n [ 0., 0., 0., ..., 98., 0., 0.],\n [ 0., 0., 0., ..., 0., 99., 0.],\n [ 0., 0., 0., ..., 0., 0., 100.]])\n\nConstraints:\n\n>>> Y = np.eye(n, 3)\n\nInitial guess for eigenvectors, should have linearly independent\ncolumns. Column dimension = number of requested eigenvalues.\n\n>>> X = np.random.rand(n, 3)\n\nPreconditioner in the inverse of A in this example:\n\n>>> invA = spdiags([1./vals], 0, n, n)\n\nThe preconditiner must be defined by a function:\n\n>>> def precond( x ):\n... return invA @ x\n\nThe argument x of the preconditioner function is a matrix inside `lobpcg`,\nthus the use of matrix-matrix product ``@``.\n\nThe preconditioner function is passed to lobpcg as a `LinearOperator`:\n\n>>> M = LinearOperator(matvec=precond, matmat=precond,\n... shape=(n, n), dtype=float)\n\nLet us now solve the eigenvalue problem for the matrix A:\n\n>>> eigenvalues, _ = lobpcg(A, X, Y=Y, M=M, largest=False)\n>>> eigenvalues\narray([4., 5., 6.])\n\nNote that the vectors passed in Y are the eigenvectors of the 3 smallest\neigenvalues. The results returned are orthogonal to those.", + "code": "def lobpcg(A, X,\n B=None, M=None, Y=None,\n tol=None, maxiter=20,\n largest=True, verbosityLevel=0,\n retLambdaHistory=False, retResidualNormsHistory=False):\n \"\"\"Locally Optimal Block Preconditioned Conjugate Gradient Method (LOBPCG)\n\n LOBPCG is a preconditioned eigensolver for large symmetric positive\n definite (SPD) generalized eigenproblems.\n\n Parameters\n ----------\n A : {sparse matrix, dense matrix, LinearOperator}\n The symmetric linear operator of the problem, usually a\n sparse matrix. Often called the \"stiffness matrix\".\n X : ndarray, float32 or float64\n Initial approximation to the ``k`` eigenvectors (non-sparse). If `A`\n has ``shape=(n,n)`` then `X` should have shape ``shape=(n,k)``.\n B : {dense matrix, sparse matrix, LinearOperator}, optional\n The right hand side operator in a generalized eigenproblem.\n By default, ``B = Identity``. Often called the \"mass matrix\".\n M : {dense matrix, sparse matrix, LinearOperator}, optional\n Preconditioner to `A`; by default ``M = Identity``.\n `M` should approximate the inverse of `A`.\n Y : ndarray, float32 or float64, optional\n n-by-sizeY matrix of constraints (non-sparse), sizeY < n\n The iterations will be performed in the B-orthogonal complement\n of the column-space of Y. Y must be full rank.\n tol : scalar, optional\n Solver tolerance (stopping criterion).\n The default is ``tol=n*sqrt(eps)``.\n maxiter : int, optional\n Maximum number of iterations. The default is ``maxiter=min(n, 20)``.\n largest : bool, optional\n When True, solve for the largest eigenvalues, otherwise the smallest.\n verbosityLevel : int, optional\n Controls solver output. The default is ``verbosityLevel=0``.\n retLambdaHistory : bool, optional\n Whether to return eigenvalue history. Default is False.\n retResidualNormsHistory : bool, optional\n Whether to return history of residual norms. Default is False.\n\n Returns\n -------\n w : ndarray\n Array of ``k`` eigenvalues\n v : ndarray\n An array of ``k`` eigenvectors. `v` has the same shape as `X`.\n lambdas : list of ndarray, optional\n The eigenvalue history, if `retLambdaHistory` is True.\n rnorms : list of ndarray, optional\n The history of residual norms, if `retResidualNormsHistory` is True.\n\n Notes\n -----\n If both ``retLambdaHistory`` and ``retResidualNormsHistory`` are True,\n the return tuple has the following format\n ``(lambda, V, lambda history, residual norms history)``.\n\n In the following ``n`` denotes the matrix size and ``m`` the number\n of required eigenvalues (smallest or largest).\n\n The LOBPCG code internally solves eigenproblems of the size ``3m`` on every\n iteration by calling the \"standard\" dense eigensolver, so if ``m`` is not\n small enough compared to ``n``, it does not make sense to call the LOBPCG\n code, but rather one should use the \"standard\" eigensolver, e.g. numpy or\n scipy function in this case.\n If one calls the LOBPCG algorithm for ``5m > n``, it will most likely break\n internally, so the code tries to call the standard function instead.\n\n It is not that ``n`` should be large for the LOBPCG to work, but rather the\n ratio ``n / m`` should be large. It you call LOBPCG with ``m=1``\n and ``n=10``, it works though ``n`` is small. The method is intended\n for extremely large ``n / m``, see e.g., reference [28] in\n https://arxiv.org/abs/0705.2626\n\n The convergence speed depends basically on two factors:\n\n 1. How well relatively separated the seeking eigenvalues are from the rest\n of the eigenvalues. One can try to vary ``m`` to make this better.\n\n 2. How well conditioned the problem is. This can be changed by using proper\n preconditioning. For example, a rod vibration test problem (under tests\n directory) is ill-conditioned for large ``n``, so convergence will be\n slow, unless efficient preconditioning is used. For this specific\n problem, a good simple preconditioner function would be a linear solve\n for `A`, which is easy to code since A is tridiagonal.\n\n References\n ----------\n .. [1] A. V. Knyazev (2001),\n Toward the Optimal Preconditioned Eigensolver: Locally Optimal\n Block Preconditioned Conjugate Gradient Method.\n SIAM Journal on Scientific Computing 23, no. 2,\n pp. 517-541. http://dx.doi.org/10.1137/S1064827500366124\n\n .. [2] A. V. Knyazev, I. Lashuk, M. E. Argentati, and E. Ovchinnikov\n (2007), Block Locally Optimal Preconditioned Eigenvalue Xolvers\n (BLOPEX) in hypre and PETSc. https://arxiv.org/abs/0705.2626\n\n .. [3] A. V. Knyazev's C and MATLAB implementations:\n https://bitbucket.org/joseroman/blopex\n\n Examples\n --------\n\n Solve ``A x = lambda x`` with constraints and preconditioning.\n\n >>> import numpy as np\n >>> from scipy.sparse import spdiags, issparse\n >>> from scipy.sparse.linalg import lobpcg, LinearOperator\n >>> n = 100\n >>> vals = np.arange(1, n + 1)\n >>> A = spdiags(vals, 0, n, n)\n >>> A.toarray()\n array([[ 1., 0., 0., ..., 0., 0., 0.],\n [ 0., 2., 0., ..., 0., 0., 0.],\n [ 0., 0., 3., ..., 0., 0., 0.],\n ...,\n [ 0., 0., 0., ..., 98., 0., 0.],\n [ 0., 0., 0., ..., 0., 99., 0.],\n [ 0., 0., 0., ..., 0., 0., 100.]])\n\n Constraints:\n\n >>> Y = np.eye(n, 3)\n\n Initial guess for eigenvectors, should have linearly independent\n columns. Column dimension = number of requested eigenvalues.\n\n >>> X = np.random.rand(n, 3)\n\n Preconditioner in the inverse of A in this example:\n\n >>> invA = spdiags([1./vals], 0, n, n)\n\n The preconditiner must be defined by a function:\n\n >>> def precond( x ):\n ... return invA @ x\n\n The argument x of the preconditioner function is a matrix inside `lobpcg`,\n thus the use of matrix-matrix product ``@``.\n\n The preconditioner function is passed to lobpcg as a `LinearOperator`:\n\n >>> M = LinearOperator(matvec=precond, matmat=precond,\n ... shape=(n, n), dtype=float)\n\n Let us now solve the eigenvalue problem for the matrix A:\n\n >>> eigenvalues, _ = lobpcg(A, X, Y=Y, M=M, largest=False)\n >>> eigenvalues\n array([4., 5., 6.])\n\n Note that the vectors passed in Y are the eigenvectors of the 3 smallest\n eigenvalues. The results returned are orthogonal to those.\n\n \"\"\"\n blockVectorX = X\n blockVectorY = Y\n residualTolerance = tol\n maxIterations = maxiter\n\n if blockVectorY is not None:\n sizeY = blockVectorY.shape[1]\n else:\n sizeY = 0\n\n # Block size.\n if len(blockVectorX.shape) != 2:\n raise ValueError('expected rank-2 array for argument X')\n\n n, sizeX = blockVectorX.shape\n\n if verbosityLevel:\n aux = \"Solving \"\n if B is None:\n aux += \"standard\"\n else:\n aux += \"generalized\"\n aux += \" eigenvalue problem with\"\n if M is None:\n aux += \"out\"\n aux += \" preconditioning\\n\\n\"\n aux += \"matrix size %d\\n\" % n\n aux += \"block size %d\\n\\n\" % sizeX\n if blockVectorY is None:\n aux += \"No constraints\\n\\n\"\n else:\n if sizeY > 1:\n aux += \"%d constraints\\n\\n\" % sizeY\n else:\n aux += \"%d constraint\\n\\n\" % sizeY\n print(aux)\n\n A = _makeOperator(A, (n, n))\n B = _makeOperator(B, (n, n))\n M = _makeOperator(M, (n, n))\n\n if (n - sizeY) < (5 * sizeX):\n # warn('The problem size is small compared to the block size.' \\\n # ' Using dense eigensolver instead of LOBPCG.')\n\n sizeX = min(sizeX, n)\n\n if blockVectorY is not None:\n raise NotImplementedError('The dense eigensolver '\n 'does not support constraints.')\n\n # Define the closed range of indices of eigenvalues to return.\n if largest:\n eigvals = (n - sizeX, n-1)\n else:\n eigvals = (0, sizeX-1)\n\n A_dense = A(np.eye(n, dtype=A.dtype))\n B_dense = None if B is None else B(np.eye(n, dtype=B.dtype))\n\n vals, vecs = eigh(A_dense, B_dense, eigvals=eigvals,\n check_finite=False)\n if largest:\n # Reverse order to be compatible with eigs() in 'LM' mode.\n vals = vals[::-1]\n vecs = vecs[:, ::-1]\n\n return vals, vecs\n\n if (residualTolerance is None) or (residualTolerance <= 0.0):\n residualTolerance = np.sqrt(1e-15) * n\n\n # Apply constraints to X.\n if blockVectorY is not None:\n\n if B is not None:\n blockVectorBY = B(blockVectorY)\n else:\n blockVectorBY = blockVectorY\n\n # gramYBY is a dense array.\n gramYBY = np.dot(blockVectorY.T.conj(), blockVectorBY)\n try:\n # gramYBY is a Cholesky factor from now on...\n gramYBY = cho_factor(gramYBY)\n except LinAlgError as e:\n raise ValueError('cannot handle linearly dependent constraints') from e\n\n _applyConstraints(blockVectorX, gramYBY, blockVectorBY, blockVectorY)\n\n ##\n # B-orthonormalize X.\n blockVectorX, blockVectorBX = _b_orthonormalize(B, blockVectorX)\n\n ##\n # Compute the initial Ritz vectors: solve the eigenproblem.\n blockVectorAX = A(blockVectorX)\n gramXAX = np.dot(blockVectorX.T.conj(), blockVectorAX)\n\n _lambda, eigBlockVector = eigh(gramXAX, check_finite=False)\n ii = _get_indx(_lambda, sizeX, largest)\n _lambda = _lambda[ii]\n\n eigBlockVector = np.asarray(eigBlockVector[:, ii])\n blockVectorX = np.dot(blockVectorX, eigBlockVector)\n blockVectorAX = np.dot(blockVectorAX, eigBlockVector)\n if B is not None:\n blockVectorBX = np.dot(blockVectorBX, eigBlockVector)\n\n ##\n # Active index set.\n activeMask = np.ones((sizeX,), dtype=bool)\n\n lambdaHistory = [_lambda]\n residualNormsHistory = []\n\n previousBlockSize = sizeX\n ident = np.eye(sizeX, dtype=A.dtype)\n ident0 = np.eye(sizeX, dtype=A.dtype)\n\n ##\n # Main iteration loop.\n\n blockVectorP = None # set during iteration\n blockVectorAP = None\n blockVectorBP = None\n\n iterationNumber = -1\n restart = True\n explicitGramFlag = False\n while iterationNumber < maxIterations:\n iterationNumber += 1\n if verbosityLevel > 0:\n print('iteration %d' % iterationNumber)\n\n if B is not None:\n aux = blockVectorBX * _lambda[np.newaxis, :]\n else:\n aux = blockVectorX * _lambda[np.newaxis, :]\n\n blockVectorR = blockVectorAX - aux\n\n aux = np.sum(blockVectorR.conj() * blockVectorR, 0)\n residualNorms = np.sqrt(aux)\n\n residualNormsHistory.append(residualNorms)\n\n ii = np.where(residualNorms > residualTolerance, True, False)\n activeMask = activeMask & ii\n if verbosityLevel > 2:\n print(activeMask)\n\n currentBlockSize = activeMask.sum()\n if currentBlockSize != previousBlockSize:\n previousBlockSize = currentBlockSize\n ident = np.eye(currentBlockSize, dtype=A.dtype)\n\n if currentBlockSize == 0:\n break\n\n if verbosityLevel > 0:\n print('current block size:', currentBlockSize)\n print('eigenvalue:', _lambda)\n print('residual norms:', residualNorms)\n if verbosityLevel > 10:\n print(eigBlockVector)\n\n activeBlockVectorR = _as2d(blockVectorR[:, activeMask])\n\n if iterationNumber > 0:\n activeBlockVectorP = _as2d(blockVectorP[:, activeMask])\n activeBlockVectorAP = _as2d(blockVectorAP[:, activeMask])\n if B is not None:\n activeBlockVectorBP = _as2d(blockVectorBP[:, activeMask])\n\n if M is not None:\n # Apply preconditioner T to the active residuals.\n activeBlockVectorR = M(activeBlockVectorR)\n\n ##\n # Apply constraints to the preconditioned residuals.\n if blockVectorY is not None:\n _applyConstraints(activeBlockVectorR,\n gramYBY, blockVectorBY, blockVectorY)\n\n ##\n # B-orthogonalize the preconditioned residuals to X.\n if B is not None:\n activeBlockVectorR = activeBlockVectorR - \\\n np.matmul(blockVectorX,\n np.matmul(blockVectorBX.T.conj(),\n activeBlockVectorR))\n else:\n activeBlockVectorR = activeBlockVectorR - \\\n np.matmul(blockVectorX,\n np.matmul(blockVectorX.T.conj(),\n activeBlockVectorR))\n\n ##\n # B-orthonormalize the preconditioned residuals.\n aux = _b_orthonormalize(B, activeBlockVectorR)\n activeBlockVectorR, activeBlockVectorBR = aux\n\n activeBlockVectorAR = A(activeBlockVectorR)\n\n if iterationNumber > 0:\n if B is not None:\n aux = _b_orthonormalize(B, activeBlockVectorP,\n activeBlockVectorBP, retInvR=True)\n activeBlockVectorP, activeBlockVectorBP, invR, normal = aux\n else:\n aux = _b_orthonormalize(B, activeBlockVectorP, retInvR=True)\n activeBlockVectorP, _, invR, normal = aux\n # Function _b_orthonormalize returns None if Cholesky fails\n if activeBlockVectorP is not None:\n activeBlockVectorAP = activeBlockVectorAP / normal\n activeBlockVectorAP = np.dot(activeBlockVectorAP, invR)\n restart = False\n else:\n restart = True\n\n ##\n # Perform the Rayleigh Ritz Procedure:\n # Compute symmetric Gram matrices:\n\n if activeBlockVectorAR.dtype == 'float32':\n myeps = 1\n elif activeBlockVectorR.dtype == 'float32':\n myeps = 1e-4\n else:\n myeps = 1e-8\n\n if residualNorms.max() > myeps and not explicitGramFlag:\n explicitGramFlag = False\n else:\n # Once explicitGramFlag, forever explicitGramFlag.\n explicitGramFlag = True\n\n # Shared memory assingments to simplify the code\n if B is None:\n blockVectorBX = blockVectorX\n activeBlockVectorBR = activeBlockVectorR\n if not restart:\n activeBlockVectorBP = activeBlockVectorP\n\n # Common submatrices:\n gramXAR = np.dot(blockVectorX.T.conj(), activeBlockVectorAR)\n gramRAR = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorAR)\n\n if explicitGramFlag:\n gramRAR = (gramRAR + gramRAR.T.conj())/2\n gramXAX = np.dot(blockVectorX.T.conj(), blockVectorAX)\n gramXAX = (gramXAX + gramXAX.T.conj())/2\n gramXBX = np.dot(blockVectorX.T.conj(), blockVectorBX)\n gramRBR = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorBR)\n gramXBR = np.dot(blockVectorX.T.conj(), activeBlockVectorBR)\n else:\n gramXAX = np.diag(_lambda)\n gramXBX = ident0\n gramRBR = ident\n gramXBR = np.zeros((sizeX, currentBlockSize), dtype=A.dtype)\n\n def _handle_gramA_gramB_verbosity(gramA, gramB):\n if verbosityLevel > 0:\n _report_nonhermitian(gramA, 'gramA')\n _report_nonhermitian(gramB, 'gramB')\n if verbosityLevel > 10:\n # Note: not documented, but leave it in here for now\n np.savetxt('gramA.txt', gramA)\n np.savetxt('gramB.txt', gramB)\n\n if not restart:\n gramXAP = np.dot(blockVectorX.T.conj(), activeBlockVectorAP)\n gramRAP = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorAP)\n gramPAP = np.dot(activeBlockVectorP.T.conj(), activeBlockVectorAP)\n gramXBP = np.dot(blockVectorX.T.conj(), activeBlockVectorBP)\n gramRBP = np.dot(activeBlockVectorR.T.conj(), activeBlockVectorBP)\n if explicitGramFlag:\n gramPAP = (gramPAP + gramPAP.T.conj())/2\n gramPBP = np.dot(activeBlockVectorP.T.conj(),\n activeBlockVectorBP)\n else:\n gramPBP = ident\n\n gramA = bmat([[gramXAX, gramXAR, gramXAP],\n [gramXAR.T.conj(), gramRAR, gramRAP],\n [gramXAP.T.conj(), gramRAP.T.conj(), gramPAP]])\n gramB = bmat([[gramXBX, gramXBR, gramXBP],\n [gramXBR.T.conj(), gramRBR, gramRBP],\n [gramXBP.T.conj(), gramRBP.T.conj(), gramPBP]])\n\n _handle_gramA_gramB_verbosity(gramA, gramB)\n\n try:\n _lambda, eigBlockVector = eigh(gramA, gramB,\n check_finite=False)\n except LinAlgError:\n # try again after dropping the direction vectors P from RR\n restart = True\n\n if restart:\n gramA = bmat([[gramXAX, gramXAR],\n [gramXAR.T.conj(), gramRAR]])\n gramB = bmat([[gramXBX, gramXBR],\n [gramXBR.T.conj(), gramRBR]])\n\n _handle_gramA_gramB_verbosity(gramA, gramB)\n\n try:\n _lambda, eigBlockVector = eigh(gramA, gramB,\n check_finite=False)\n except LinAlgError as e:\n raise ValueError('eigh has failed in lobpcg iterations') from e\n\n ii = _get_indx(_lambda, sizeX, largest)\n if verbosityLevel > 10:\n print(ii)\n print(_lambda)\n\n _lambda = _lambda[ii]\n eigBlockVector = eigBlockVector[:, ii]\n\n lambdaHistory.append(_lambda)\n\n if verbosityLevel > 10:\n print('lambda:', _lambda)\n# # Normalize eigenvectors!\n# aux = np.sum( eigBlockVector.conj() * eigBlockVector, 0 )\n# eigVecNorms = np.sqrt( aux )\n# eigBlockVector = eigBlockVector / eigVecNorms[np.newaxis, :]\n# eigBlockVector, aux = _b_orthonormalize( B, eigBlockVector )\n\n if verbosityLevel > 10:\n print(eigBlockVector)\n\n # Compute Ritz vectors.\n if B is not None:\n if not restart:\n eigBlockVectorX = eigBlockVector[:sizeX]\n eigBlockVectorR = eigBlockVector[sizeX:sizeX+currentBlockSize]\n eigBlockVectorP = eigBlockVector[sizeX+currentBlockSize:]\n\n pp = np.dot(activeBlockVectorR, eigBlockVectorR)\n pp += np.dot(activeBlockVectorP, eigBlockVectorP)\n\n app = np.dot(activeBlockVectorAR, eigBlockVectorR)\n app += np.dot(activeBlockVectorAP, eigBlockVectorP)\n\n bpp = np.dot(activeBlockVectorBR, eigBlockVectorR)\n bpp += np.dot(activeBlockVectorBP, eigBlockVectorP)\n else:\n eigBlockVectorX = eigBlockVector[:sizeX]\n eigBlockVectorR = eigBlockVector[sizeX:]\n\n pp = np.dot(activeBlockVectorR, eigBlockVectorR)\n app = np.dot(activeBlockVectorAR, eigBlockVectorR)\n bpp = np.dot(activeBlockVectorBR, eigBlockVectorR)\n\n if verbosityLevel > 10:\n print(pp)\n print(app)\n print(bpp)\n\n blockVectorX = np.dot(blockVectorX, eigBlockVectorX) + pp\n blockVectorAX = np.dot(blockVectorAX, eigBlockVectorX) + app\n blockVectorBX = np.dot(blockVectorBX, eigBlockVectorX) + bpp\n\n blockVectorP, blockVectorAP, blockVectorBP = pp, app, bpp\n\n else:\n if not restart:\n eigBlockVectorX = eigBlockVector[:sizeX]\n eigBlockVectorR = eigBlockVector[sizeX:sizeX+currentBlockSize]\n eigBlockVectorP = eigBlockVector[sizeX+currentBlockSize:]\n\n pp = np.dot(activeBlockVectorR, eigBlockVectorR)\n pp += np.dot(activeBlockVectorP, eigBlockVectorP)\n\n app = np.dot(activeBlockVectorAR, eigBlockVectorR)\n app += np.dot(activeBlockVectorAP, eigBlockVectorP)\n else:\n eigBlockVectorX = eigBlockVector[:sizeX]\n eigBlockVectorR = eigBlockVector[sizeX:]\n\n pp = np.dot(activeBlockVectorR, eigBlockVectorR)\n app = np.dot(activeBlockVectorAR, eigBlockVectorR)\n\n if verbosityLevel > 10:\n print(pp)\n print(app)\n\n blockVectorX = np.dot(blockVectorX, eigBlockVectorX) + pp\n blockVectorAX = np.dot(blockVectorAX, eigBlockVectorX) + app\n\n blockVectorP, blockVectorAP = pp, app\n\n if B is not None:\n aux = blockVectorBX * _lambda[np.newaxis, :]\n\n else:\n aux = blockVectorX * _lambda[np.newaxis, :]\n\n blockVectorR = blockVectorAX - aux\n\n aux = np.sum(blockVectorR.conj() * blockVectorR, 0)\n residualNorms = np.sqrt(aux)\n\n # Future work: Need to add Postprocessing here:\n # Making sure eigenvectors \"exactly\" satisfy the blockVectorY constrains?\n # Making sure eigenvecotrs are \"exactly\" othonormalized by final \"exact\" RR\n # Computing the actual true residuals\n\n if verbosityLevel > 0:\n print('final eigenvalue:', _lambda)\n print('final residual norms:', residualNorms)\n\n if retLambdaHistory:\n if retResidualNormsHistory:\n return _lambda, blockVectorX, lambdaHistory, residualNormsHistory\n else:\n return _lambda, blockVectorX, lambdaHistory\n else:\n if retResidualNormsHistory:\n return _lambda, blockVectorX, residualNormsHistory\n else:\n return _lambda, blockVectorX" + }, + { + "id": "scikit-learn/sklearn.externals._pep562/Pep562/__dir__", + "name": "__dir__", + "qname": "sklearn.externals._pep562.Pep562.__dir__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._pep562/Pep562/__dir__/self", + "name": "self", + "qname": "sklearn.externals._pep562.Pep562.__dir__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the overridden `dir` if one was provided, else apply `dir` to the module.", + "docstring": "Return the overridden `dir` if one was provided, else apply `dir` to the module.", + "code": " def __dir__(self):\n \"\"\"Return the overridden `dir` if one was provided, else apply `dir` to the module.\"\"\"\n\n return self._get_dir() if self._get_dir else dir(self._module)" + }, + { + "id": "scikit-learn/sklearn.externals._pep562/Pep562/__getattr__", + "name": "__getattr__", + "qname": "sklearn.externals._pep562.Pep562.__getattr__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._pep562/Pep562/__getattr__/self", + "name": "self", + "qname": "sklearn.externals._pep562.Pep562.__getattr__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._pep562/Pep562/__getattr__/name", + "name": "name", + "qname": "sklearn.externals._pep562.Pep562.__getattr__.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Attempt to retrieve the attribute from the module, and if missing, use the overridden function if present.", + "docstring": "Attempt to retrieve the attribute from the module, and if missing, use the overridden function if present.", + "code": " def __getattr__(self, name):\n \"\"\"Attempt to retrieve the attribute from the module, and if missing, use the overridden function if present.\"\"\"\n\n try:\n return getattr(self._module, name)\n except AttributeError:\n if self._get_attr:\n return self._get_attr(name)\n raise" + }, + { + "id": "scikit-learn/sklearn.externals._pep562/Pep562/__init__", + "name": "__init__", + "qname": "sklearn.externals._pep562.Pep562.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._pep562/Pep562/__init__/self", + "name": "self", + "qname": "sklearn.externals._pep562.Pep562.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._pep562/Pep562/__init__/name", + "name": "name", + "qname": "sklearn.externals._pep562.Pep562.__init__.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Backport of PEP 562 .\n\nWraps the module in a class that exposes the mechanics to override `__dir__` and `__getattr__`.\nThe given module will be searched for overrides of `__dir__` and `__getattr__` and use them when needed.", + "docstring": "Acquire `__getattr__` and `__dir__`, but only replace module for versions less than Python 3.7.", + "code": " def __init__(self, name):\n \"\"\"Acquire `__getattr__` and `__dir__`, but only replace module for versions less than Python 3.7.\"\"\"\n\n self._module = sys.modules[name]\n self._get_attr = getattr(self._module, '__getattr__', None)\n self._get_dir = getattr(self._module, '__dir__', None)\n sys.modules[name] = self" + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/bytescale", + "name": "bytescale", + "qname": "sklearn.externals._pilutil.bytescale", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._pilutil/bytescale/data", + "name": "data", + "qname": "sklearn.externals._pilutil.bytescale.data", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "PIL image data array." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/bytescale/cmin", + "name": "cmin", + "qname": "sklearn.externals._pilutil.bytescale.cmin", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "scalar", + "default_value": "None", + "description": "Bias scaling of small values. Default is ``data.min()``." + }, + "type": { + "kind": "NamedType", + "name": "scalar" + } + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/bytescale/cmax", + "name": "cmax", + "qname": "sklearn.externals._pilutil.bytescale.cmax", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "scalar", + "default_value": "None", + "description": "Bias scaling of large values. Default is ``data.max()``." + }, + "type": { + "kind": "NamedType", + "name": "scalar" + } + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/bytescale/high", + "name": "high", + "qname": "sklearn.externals._pilutil.bytescale.high", + "default_value": "255", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "scalar", + "default_value": "None", + "description": "Scale max value to `high`. Default is 255." + }, + "type": { + "kind": "NamedType", + "name": "scalar" + } + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/bytescale/low", + "name": "low", + "qname": "sklearn.externals._pilutil.bytescale.low", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "scalar", + "default_value": "None", + "description": "Scale min value to `low`. Default is 0." + }, + "type": { + "kind": "NamedType", + "name": "scalar" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Byte scales an array (image).\n\nByte scaling means converting the input image to uint8 dtype and scaling\nthe range to ``(low, high)`` (default 0-255).\nIf the input image already has dtype uint8, no scaling is done.\n\nThis function is only available if Python Imaging Library (PIL) is installed.", + "docstring": "Byte scales an array (image).\n\nByte scaling means converting the input image to uint8 dtype and scaling\nthe range to ``(low, high)`` (default 0-255).\nIf the input image already has dtype uint8, no scaling is done.\n\nThis function is only available if Python Imaging Library (PIL) is installed.\n\nParameters\n----------\ndata : ndarray\n PIL image data array.\ncmin : scalar, default=None\n Bias scaling of small values. Default is ``data.min()``.\ncmax : scalar, default=None\n Bias scaling of large values. Default is ``data.max()``.\nhigh : scalar, default=None\n Scale max value to `high`. Default is 255.\nlow : scalar, default=None\n Scale min value to `low`. Default is 0.\n\nReturns\n-------\nimg_array : uint8 ndarray\n The byte-scaled array.\n\nExamples\n--------\n>>> import numpy as np\n>>> from scipy.misc import bytescale\n>>> img = np.array([[ 91.06794177, 3.39058326, 84.4221549 ],\n... [ 73.88003259, 80.91433048, 4.88878881],\n... [ 51.53875334, 34.45808177, 27.5873488 ]])\n>>> bytescale(img)\narray([[255, 0, 236],\n [205, 225, 4],\n [140, 90, 70]], dtype=uint8)\n>>> bytescale(img, high=200, low=100)\narray([[200, 100, 192],\n [180, 188, 102],\n [155, 135, 128]], dtype=uint8)\n>>> bytescale(img, cmin=0, cmax=255)\narray([[91, 3, 84],\n [74, 81, 5],\n [52, 34, 28]], dtype=uint8)", + "code": "def bytescale(data, cmin=None, cmax=None, high=255, low=0):\n \"\"\"\n Byte scales an array (image).\n\n Byte scaling means converting the input image to uint8 dtype and scaling\n the range to ``(low, high)`` (default 0-255).\n If the input image already has dtype uint8, no scaling is done.\n\n This function is only available if Python Imaging Library (PIL) is installed.\n\n Parameters\n ----------\n data : ndarray\n PIL image data array.\n cmin : scalar, default=None\n Bias scaling of small values. Default is ``data.min()``.\n cmax : scalar, default=None\n Bias scaling of large values. Default is ``data.max()``.\n high : scalar, default=None\n Scale max value to `high`. Default is 255.\n low : scalar, default=None\n Scale min value to `low`. Default is 0.\n\n Returns\n -------\n img_array : uint8 ndarray\n The byte-scaled array.\n\n Examples\n --------\n >>> import numpy as np\n >>> from scipy.misc import bytescale\n >>> img = np.array([[ 91.06794177, 3.39058326, 84.4221549 ],\n ... [ 73.88003259, 80.91433048, 4.88878881],\n ... [ 51.53875334, 34.45808177, 27.5873488 ]])\n >>> bytescale(img)\n array([[255, 0, 236],\n [205, 225, 4],\n [140, 90, 70]], dtype=uint8)\n >>> bytescale(img, high=200, low=100)\n array([[200, 100, 192],\n [180, 188, 102],\n [155, 135, 128]], dtype=uint8)\n >>> bytescale(img, cmin=0, cmax=255)\n array([[91, 3, 84],\n [74, 81, 5],\n [52, 34, 28]], dtype=uint8)\n\n \"\"\"\n if data.dtype == uint8:\n return data\n\n if high > 255:\n raise ValueError(\"`high` should be less than or equal to 255.\")\n if low < 0:\n raise ValueError(\"`low` should be greater than or equal to 0.\")\n if high < low:\n raise ValueError(\"`high` should be greater than or equal to `low`.\")\n\n if cmin is None:\n cmin = data.min()\n if cmax is None:\n cmax = data.max()\n\n cscale = cmax - cmin\n if cscale < 0:\n raise ValueError(\"`cmax` should be larger than `cmin`.\")\n elif cscale == 0:\n cscale = 1\n\n scale = float(high - low) / cscale\n bytedata = (data - cmin) * scale + low\n return (bytedata.clip(low, high) + 0.5).astype(uint8)" + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/fromimage", + "name": "fromimage", + "qname": "sklearn.externals._pilutil.fromimage", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._pilutil/fromimage/im", + "name": "im", + "qname": "sklearn.externals._pilutil.fromimage.im", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "PIL image", + "default_value": "", + "description": "Input image." + }, + "type": { + "kind": "NamedType", + "name": "PIL image" + } + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/fromimage/flatten", + "name": "flatten", + "qname": "sklearn.externals._pilutil.fromimage.flatten", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If true, convert the output to grey-scale." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/fromimage/mode", + "name": "mode", + "qname": "sklearn.externals._pilutil.fromimage.mode", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Mode to convert image to, e.g. ``'RGB'``. See the Notes of the\n`imread` docstring for more details." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return a copy of a PIL image as a numpy array.\n\nThis function is only available if Python Imaging Library (PIL) is installed.", + "docstring": "Return a copy of a PIL image as a numpy array.\n\nThis function is only available if Python Imaging Library (PIL) is installed.\n\nParameters\n----------\nim : PIL image\n Input image.\nflatten : bool, default=False\n If true, convert the output to grey-scale.\nmode : str, default=None\n Mode to convert image to, e.g. ``'RGB'``. See the Notes of the\n `imread` docstring for more details.\n\nReturns\n-------\nfromimage : ndarray\n The different colour bands/channels are stored in the\n third dimension, such that a grey-image is MxN, an\n RGB-image MxNx3 and an RGBA-image MxNx4.", + "code": "def fromimage(im, flatten=False, mode=None):\n \"\"\"\n Return a copy of a PIL image as a numpy array.\n\n This function is only available if Python Imaging Library (PIL) is installed.\n\n Parameters\n ----------\n im : PIL image\n Input image.\n flatten : bool, default=False\n If true, convert the output to grey-scale.\n mode : str, default=None\n Mode to convert image to, e.g. ``'RGB'``. See the Notes of the\n `imread` docstring for more details.\n\n Returns\n -------\n fromimage : ndarray\n The different colour bands/channels are stored in the\n third dimension, such that a grey-image is MxN, an\n RGB-image MxNx3 and an RGBA-image MxNx4.\n\n \"\"\"\n if not pillow_installed:\n raise ImportError(PILLOW_ERROR_MESSAGE)\n\n if not Image.isImageType(im):\n raise TypeError(\"Input is not a PIL image.\")\n\n if mode is not None:\n if mode != im.mode:\n im = im.convert(mode)\n elif im.mode == 'P':\n # Mode 'P' means there is an indexed \"palette\". If we leave the mode\n # as 'P', then when we do `a = array(im)` below, `a` will be a 2-D\n # containing the indices into the palette, and not a 3-D array\n # containing the RGB or RGBA values.\n if 'transparency' in im.info:\n im = im.convert('RGBA')\n else:\n im = im.convert('RGB')\n\n if flatten:\n im = im.convert('F')\n elif im.mode == '1':\n # Workaround for crash in PIL. When im is 1-bit, the call array(im)\n # can cause a seg. fault, or generate garbage. See\n # https://github.com/scipy/scipy/issues/2138 and\n # https://github.com/python-pillow/Pillow/issues/350.\n #\n # This converts im from a 1-bit image to an 8-bit image.\n im = im.convert('L')\n\n a = array(im)\n return a" + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/imread", + "name": "imread", + "qname": "sklearn.externals._pilutil.imread", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._pilutil/imread/name", + "name": "name", + "qname": "sklearn.externals._pilutil.imread.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str or file object", + "default_value": "", + "description": "The file name or file object to be read." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "file object" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/imread/flatten", + "name": "flatten", + "qname": "sklearn.externals._pilutil.imread.flatten", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, flattens the color layers into a single gray-scale layer." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/imread/mode", + "name": "mode", + "qname": "sklearn.externals._pilutil.imread.mode", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Mode to convert image to, e.g. ``'RGB'``. See the Notes for more\ndetails." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Read an image from a file as an array.\n\nThis function is only available if Python Imaging Library (PIL) is installed.", + "docstring": "Read an image from a file as an array.\n\nThis function is only available if Python Imaging Library (PIL) is installed.\n\nParameters\n----------\nname : str or file object\n The file name or file object to be read.\nflatten : bool, default=False\n If True, flattens the color layers into a single gray-scale layer.\nmode : str, default=None\n Mode to convert image to, e.g. ``'RGB'``. See the Notes for more\n details.\n\nReturns\n-------\nimread : ndarray\n The array obtained by reading the image.\n\nNotes\n-----\n`imread` uses the Python Imaging Library (PIL) to read an image.\nThe following notes are from the PIL documentation.\n\n`mode` can be one of the following strings:\n\n* 'L' (8-bit pixels, black and white)\n* 'P' (8-bit pixels, mapped to any other mode using a color palette)\n* 'RGB' (3x8-bit pixels, true color)\n* 'RGBA' (4x8-bit pixels, true color with transparency mask)\n* 'CMYK' (4x8-bit pixels, color separation)\n* 'YCbCr' (3x8-bit pixels, color video format)\n* 'I' (32-bit signed integer pixels)\n* 'F' (32-bit floating point pixels)\n\nPIL also provides limited support for a few special modes, including\n'LA' ('L' with alpha), 'RGBX' (true color with padding) and 'RGBa'\n(true color with premultiplied alpha).\n\nWhen translating a color image to black and white (mode 'L', 'I' or\n'F'), the library uses the ITU-R 601-2 luma transform::\n\n L = R * 299/1000 + G * 587/1000 + B * 114/1000\n\nWhen `flatten` is True, the image is converted using mode 'F'.\nWhen `mode` is not None and `flatten` is True, the image is first\nconverted according to `mode`, and the result is then flattened using\nmode 'F'.", + "code": "def imread(name, flatten=False, mode=None):\n \"\"\"\n Read an image from a file as an array.\n\n This function is only available if Python Imaging Library (PIL) is installed.\n\n Parameters\n ----------\n name : str or file object\n The file name or file object to be read.\n flatten : bool, default=False\n If True, flattens the color layers into a single gray-scale layer.\n mode : str, default=None\n Mode to convert image to, e.g. ``'RGB'``. See the Notes for more\n details.\n\n Returns\n -------\n imread : ndarray\n The array obtained by reading the image.\n\n Notes\n -----\n `imread` uses the Python Imaging Library (PIL) to read an image.\n The following notes are from the PIL documentation.\n\n `mode` can be one of the following strings:\n\n * 'L' (8-bit pixels, black and white)\n * 'P' (8-bit pixels, mapped to any other mode using a color palette)\n * 'RGB' (3x8-bit pixels, true color)\n * 'RGBA' (4x8-bit pixels, true color with transparency mask)\n * 'CMYK' (4x8-bit pixels, color separation)\n * 'YCbCr' (3x8-bit pixels, color video format)\n * 'I' (32-bit signed integer pixels)\n * 'F' (32-bit floating point pixels)\n\n PIL also provides limited support for a few special modes, including\n 'LA' ('L' with alpha), 'RGBX' (true color with padding) and 'RGBa'\n (true color with premultiplied alpha).\n\n When translating a color image to black and white (mode 'L', 'I' or\n 'F'), the library uses the ITU-R 601-2 luma transform::\n\n L = R * 299/1000 + G * 587/1000 + B * 114/1000\n\n When `flatten` is True, the image is converted using mode 'F'.\n When `mode` is not None and `flatten` is True, the image is first\n converted according to `mode`, and the result is then flattened using\n mode 'F'.\n\n \"\"\"\n if not pillow_installed:\n raise ImportError(PILLOW_ERROR_MESSAGE)\n\n im = Image.open(name)\n return fromimage(im, flatten=flatten, mode=mode)" + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/imresize", + "name": "imresize", + "qname": "sklearn.externals._pilutil.imresize", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._pilutil/imresize/arr", + "name": "arr", + "qname": "sklearn.externals._pilutil.imresize.arr", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "The array of image to be resized." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/imresize/size", + "name": "size", + "qname": "sklearn.externals._pilutil.imresize.size", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, float or tuple", + "default_value": "", + "description": "* int - Percentage of current size.\n* float - Fraction of current size.\n* tuple - Size of the output image (height, width)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "tuple" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/imresize/interp", + "name": "interp", + "qname": "sklearn.externals._pilutil.imresize.interp", + "default_value": "'bilinear'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "'bilinear'", + "description": "Interpolation to use for re-sizing ('nearest', 'lanczos', 'bilinear',\n'bicubic' or 'cubic')." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/imresize/mode", + "name": "mode", + "qname": "sklearn.externals._pilutil.imresize.mode", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "The PIL image mode ('P', 'L', etc.) to convert `arr` before resizing.\nIf ``mode=None`` (the default), 2-D images will be treated like\n``mode='L'``, i.e. casting to long integer. For 3-D and 4-D arrays,\n`mode` will be set to ``'RGB'`` and ``'RGBA'`` respectively." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Resize an image.\n\nThis function is only available if Python Imaging Library (PIL) is installed.\n\n.. warning::\n\n This function uses `bytescale` under the hood to rescale images to use\n the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``.\n It will also cast data for 2-D images to ``uint32`` for ``mode=None``\n (which is the default).", + "docstring": "Resize an image.\n\nThis function is only available if Python Imaging Library (PIL) is installed.\n\n.. warning::\n\n This function uses `bytescale` under the hood to rescale images to use\n the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``.\n It will also cast data for 2-D images to ``uint32`` for ``mode=None``\n (which is the default).\n\nParameters\n----------\narr : ndarray\n The array of image to be resized.\nsize : int, float or tuple\n * int - Percentage of current size.\n * float - Fraction of current size.\n * tuple - Size of the output image (height, width).\n\ninterp : str, default='bilinear'\n Interpolation to use for re-sizing ('nearest', 'lanczos', 'bilinear',\n 'bicubic' or 'cubic').\nmode : str, default=None\n The PIL image mode ('P', 'L', etc.) to convert `arr` before resizing.\n If ``mode=None`` (the default), 2-D images will be treated like\n ``mode='L'``, i.e. casting to long integer. For 3-D and 4-D arrays,\n `mode` will be set to ``'RGB'`` and ``'RGBA'`` respectively.\n\nReturns\n-------\nimresize : ndarray\n The resized array of image.\n\nSee Also\n--------\ntoimage : Implicitly used to convert `arr` according to `mode`.\nscipy.ndimage.zoom : More generic implementation that does not use PIL.", + "code": "def imresize(arr, size, interp='bilinear', mode=None):\n \"\"\"\n Resize an image.\n\n This function is only available if Python Imaging Library (PIL) is installed.\n\n .. warning::\n\n This function uses `bytescale` under the hood to rescale images to use\n the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``.\n It will also cast data for 2-D images to ``uint32`` for ``mode=None``\n (which is the default).\n\n Parameters\n ----------\n arr : ndarray\n The array of image to be resized.\n size : int, float or tuple\n * int - Percentage of current size.\n * float - Fraction of current size.\n * tuple - Size of the output image (height, width).\n\n interp : str, default='bilinear'\n Interpolation to use for re-sizing ('nearest', 'lanczos', 'bilinear',\n 'bicubic' or 'cubic').\n mode : str, default=None\n The PIL image mode ('P', 'L', etc.) to convert `arr` before resizing.\n If ``mode=None`` (the default), 2-D images will be treated like\n ``mode='L'``, i.e. casting to long integer. For 3-D and 4-D arrays,\n `mode` will be set to ``'RGB'`` and ``'RGBA'`` respectively.\n\n Returns\n -------\n imresize : ndarray\n The resized array of image.\n\n See Also\n --------\n toimage : Implicitly used to convert `arr` according to `mode`.\n scipy.ndimage.zoom : More generic implementation that does not use PIL.\n\n \"\"\"\n im = toimage(arr, mode=mode)\n ts = type(size)\n if issubdtype(ts, numpy.signedinteger):\n percent = size / 100.0\n size = tuple((array(im.size)*percent).astype(int))\n elif issubdtype(type(size), numpy.floating):\n size = tuple((array(im.size)*size).astype(int))\n else:\n size = (size[1], size[0])\n func = {'nearest': 0, 'lanczos': 1, 'bilinear': 2, 'bicubic': 3, 'cubic': 3}\n imnew = im.resize(size, resample=func[interp])\n return fromimage(imnew)" + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/imsave", + "name": "imsave", + "qname": "sklearn.externals._pilutil.imsave", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._pilutil/imsave/name", + "name": "name", + "qname": "sklearn.externals._pilutil.imsave.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str or file object", + "default_value": "", + "description": "Output file name or file object." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "file object" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/imsave/arr", + "name": "arr", + "qname": "sklearn.externals._pilutil.imsave.arr", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, MxN or MxNx3 or MxNx4", + "default_value": "", + "description": "Array containing image values. If the shape is ``MxN``, the array\nrepresents a grey-level image. Shape ``MxNx3`` stores the red, green\nand blue bands along the last dimension. An alpha layer may be\nincluded, specified as the last colour band of an ``MxNx4`` array." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "MxN" + }, + { + "kind": "NamedType", + "name": "MxNx3" + }, + { + "kind": "NamedType", + "name": "MxNx4" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/imsave/format", + "name": "format", + "qname": "sklearn.externals._pilutil.imsave.format", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Image format. If omitted, the format to use is determined from the\nfile name extension. If a file object was used instead of a file name,\nthis parameter should always be used." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Save an array as an image.\n\nThis function is only available if Python Imaging Library (PIL) is installed.\n\n.. warning::\n\n This function uses `bytescale` under the hood to rescale images to use\n the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``.\n It will also cast data for 2-D images to ``uint32`` for ``mode=None``\n (which is the default).", + "docstring": "Save an array as an image.\n\nThis function is only available if Python Imaging Library (PIL) is installed.\n\n.. warning::\n\n This function uses `bytescale` under the hood to rescale images to use\n the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``.\n It will also cast data for 2-D images to ``uint32`` for ``mode=None``\n (which is the default).\n\nParameters\n----------\nname : str or file object\n Output file name or file object.\narr : ndarray, MxN or MxNx3 or MxNx4\n Array containing image values. If the shape is ``MxN``, the array\n represents a grey-level image. Shape ``MxNx3`` stores the red, green\n and blue bands along the last dimension. An alpha layer may be\n included, specified as the last colour band of an ``MxNx4`` array.\nformat : str, default=None\n Image format. If omitted, the format to use is determined from the\n file name extension. If a file object was used instead of a file name,\n this parameter should always be used.\n\nExamples\n--------\nConstruct an array of gradient intensity values and save to file:\n\n>>> import numpy as np\n>>> from scipy.misc import imsave\n>>> x = np.zeros((255, 255))\n>>> x = np.zeros((255, 255), dtype=np.uint8)\n>>> x[:] = np.arange(255)\n>>> imsave('gradient.png', x)\n\nConstruct an array with three colour bands (R, G, B) and store to file:\n\n>>> rgb = np.zeros((255, 255, 3), dtype=np.uint8)\n>>> rgb[..., 0] = np.arange(255)\n>>> rgb[..., 1] = 55\n>>> rgb[..., 2] = 1 - np.arange(255)\n>>> imsave('rgb_gradient.png', rgb)", + "code": "def imsave(name, arr, format=None):\n \"\"\"\n Save an array as an image.\n\n This function is only available if Python Imaging Library (PIL) is installed.\n\n .. warning::\n\n This function uses `bytescale` under the hood to rescale images to use\n the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``.\n It will also cast data for 2-D images to ``uint32`` for ``mode=None``\n (which is the default).\n\n Parameters\n ----------\n name : str or file object\n Output file name or file object.\n arr : ndarray, MxN or MxNx3 or MxNx4\n Array containing image values. If the shape is ``MxN``, the array\n represents a grey-level image. Shape ``MxNx3`` stores the red, green\n and blue bands along the last dimension. An alpha layer may be\n included, specified as the last colour band of an ``MxNx4`` array.\n format : str, default=None\n Image format. If omitted, the format to use is determined from the\n file name extension. If a file object was used instead of a file name,\n this parameter should always be used.\n\n Examples\n --------\n Construct an array of gradient intensity values and save to file:\n\n >>> import numpy as np\n >>> from scipy.misc import imsave\n >>> x = np.zeros((255, 255))\n >>> x = np.zeros((255, 255), dtype=np.uint8)\n >>> x[:] = np.arange(255)\n >>> imsave('gradient.png', x)\n\n Construct an array with three colour bands (R, G, B) and store to file:\n\n >>> rgb = np.zeros((255, 255, 3), dtype=np.uint8)\n >>> rgb[..., 0] = np.arange(255)\n >>> rgb[..., 1] = 55\n >>> rgb[..., 2] = 1 - np.arange(255)\n >>> imsave('rgb_gradient.png', rgb)\n\n \"\"\"\n im = toimage(arr, channel_axis=2)\n if format is None:\n im.save(name)\n else:\n im.save(name, format)\n return" + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/toimage", + "name": "toimage", + "qname": "sklearn.externals._pilutil.toimage", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals._pilutil/toimage/arr", + "name": "arr", + "qname": "sklearn.externals._pilutil.toimage.arr", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/toimage/high", + "name": "high", + "qname": "sklearn.externals._pilutil.toimage.high", + "default_value": "255", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/toimage/low", + "name": "low", + "qname": "sklearn.externals._pilutil.toimage.low", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/toimage/cmin", + "name": "cmin", + "qname": "sklearn.externals._pilutil.toimage.cmin", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/toimage/cmax", + "name": "cmax", + "qname": "sklearn.externals._pilutil.toimage.cmax", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/toimage/pal", + "name": "pal", + "qname": "sklearn.externals._pilutil.toimage.pal", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/toimage/mode", + "name": "mode", + "qname": "sklearn.externals._pilutil.toimage.mode", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals._pilutil/toimage/channel_axis", + "name": "channel_axis", + "qname": "sklearn.externals._pilutil.toimage.channel_axis", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Takes a numpy array and returns a PIL image.\n\nThis function is only available if Python Imaging Library (PIL) is installed.\n\nThe mode of the PIL image depends on the array shape and the `pal` and\n`mode` keywords.\n\nFor 2-D arrays, if `pal` is a valid (N,3) byte-array giving the RGB values\n(from 0 to 255) then ``mode='P'``, otherwise ``mode='L'``, unless mode\nis given as 'F' or 'I' in which case a float and/or integer array is made.\n\n.. warning::\n\n This function uses `bytescale` under the hood to rescale images to use\n the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``.\n It will also cast data for 2-D images to ``uint32`` for ``mode=None``\n (which is the default).", + "docstring": "Takes a numpy array and returns a PIL image.\n\nThis function is only available if Python Imaging Library (PIL) is installed.\n\nThe mode of the PIL image depends on the array shape and the `pal` and\n`mode` keywords.\n\nFor 2-D arrays, if `pal` is a valid (N,3) byte-array giving the RGB values\n(from 0 to 255) then ``mode='P'``, otherwise ``mode='L'``, unless mode\nis given as 'F' or 'I' in which case a float and/or integer array is made.\n\n.. warning::\n\n This function uses `bytescale` under the hood to rescale images to use\n the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``.\n It will also cast data for 2-D images to ``uint32`` for ``mode=None``\n (which is the default).\n\nNotes\n-----\nFor 3-D arrays, the `channel_axis` argument tells which dimension of the\narray holds the channel data.\n\nFor 3-D arrays if one of the dimensions is 3, the mode is 'RGB'\nby default or 'YCbCr' if selected.\n\nThe numpy array must be either 2 dimensional or 3 dimensional.", + "code": "def toimage(arr, high=255, low=0, cmin=None, cmax=None, pal=None,\n mode=None, channel_axis=None):\n \"\"\"Takes a numpy array and returns a PIL image.\n\n This function is only available if Python Imaging Library (PIL) is installed.\n\n The mode of the PIL image depends on the array shape and the `pal` and\n `mode` keywords.\n\n For 2-D arrays, if `pal` is a valid (N,3) byte-array giving the RGB values\n (from 0 to 255) then ``mode='P'``, otherwise ``mode='L'``, unless mode\n is given as 'F' or 'I' in which case a float and/or integer array is made.\n\n .. warning::\n\n This function uses `bytescale` under the hood to rescale images to use\n the full (0, 255) range if ``mode`` is one of ``None, 'L', 'P', 'l'``.\n It will also cast data for 2-D images to ``uint32`` for ``mode=None``\n (which is the default).\n\n Notes\n -----\n For 3-D arrays, the `channel_axis` argument tells which dimension of the\n array holds the channel data.\n\n For 3-D arrays if one of the dimensions is 3, the mode is 'RGB'\n by default or 'YCbCr' if selected.\n\n The numpy array must be either 2 dimensional or 3 dimensional.\n\n \"\"\"\n if not pillow_installed:\n raise ImportError(PILLOW_ERROR_MESSAGE)\n\n data = asarray(arr)\n if iscomplexobj(data):\n raise ValueError(\"Cannot convert a complex-valued array.\")\n shape = list(data.shape)\n valid = len(shape) == 2 or ((len(shape) == 3) and\n ((3 in shape) or (4 in shape)))\n if not valid:\n raise ValueError(\"'arr' does not have a suitable array shape for \"\n \"any mode.\")\n if len(shape) == 2:\n shape = (shape[1], shape[0]) # columns show up first\n if mode == 'F':\n data32 = data.astype(numpy.float32)\n image = Image.frombytes(mode, shape, data32.tobytes())\n return image\n if mode in [None, 'L', 'P']:\n bytedata = bytescale(data, high=high, low=low,\n cmin=cmin, cmax=cmax)\n image = Image.frombytes('L', shape, bytedata.tobytes())\n if pal is not None:\n image.putpalette(asarray(pal, dtype=uint8).tobytes())\n # Becomes a mode='P' automagically.\n elif mode == 'P': # default gray-scale\n pal = (arange(0, 256, 1, dtype=uint8)[:, newaxis] *\n ones((3,), dtype=uint8)[newaxis, :])\n image.putpalette(asarray(pal, dtype=uint8).tobytes())\n return image\n if mode == '1': # high input gives threshold for 1\n bytedata = (data > high)\n image = Image.frombytes('1', shape, bytedata.tobytes())\n return image\n if cmin is None:\n cmin = amin(ravel(data))\n if cmax is None:\n cmax = amax(ravel(data))\n data = (data*1.0 - cmin)*(high - low)/(cmax - cmin) + low\n if mode == 'I':\n data32 = data.astype(numpy.uint32)\n image = Image.frombytes(mode, shape, data32.tobytes())\n else:\n raise ValueError(_errstr)\n return image\n\n # if here then 3-d array with a 3 or a 4 in the shape length.\n # Check for 3 in datacube shape --- 'RGB' or 'YCbCr'\n if channel_axis is None:\n if (3 in shape):\n ca = numpy.flatnonzero(asarray(shape) == 3)[0]\n else:\n ca = numpy.flatnonzero(asarray(shape) == 4)\n if len(ca):\n ca = ca[0]\n else:\n raise ValueError(\"Could not find channel dimension.\")\n else:\n ca = channel_axis\n\n numch = shape[ca]\n if numch not in [3, 4]:\n raise ValueError(\"Channel axis dimension is not valid.\")\n\n bytedata = bytescale(data, high=high, low=low, cmin=cmin, cmax=cmax)\n if ca == 2:\n strdata = bytedata.tobytes()\n shape = (shape[1], shape[0])\n elif ca == 1:\n strdata = transpose(bytedata, (0, 2, 1)).tobytes()\n shape = (shape[2], shape[0])\n elif ca == 0:\n strdata = transpose(bytedata, (1, 2, 0)).tobytes()\n shape = (shape[2], shape[1])\n if mode is None:\n if numch == 3:\n mode = 'RGB'\n else:\n mode = 'RGBA'\n\n if mode not in ['RGB', 'RGBA', 'YCbCr', 'CMYK']:\n raise ValueError(_errstr)\n\n if mode in ['RGB', 'YCbCr']:\n if numch != 3:\n raise ValueError(\"Invalid array shape for mode.\")\n if mode in ['RGBA', 'CMYK']:\n if numch != 4:\n raise ValueError(\"Invalid array shape for mode.\")\n\n # Here we know data and mode is correct\n image = Image.frombytes(mode, shape, strdata)\n return image" + }, + { + "id": "scikit-learn/sklearn.externals.conftest/pytest_ignore_collect", + "name": "pytest_ignore_collect", + "qname": "sklearn.externals.conftest.pytest_ignore_collect", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.externals.conftest/pytest_ignore_collect/path", + "name": "path", + "qname": "sklearn.externals.conftest.pytest_ignore_collect.path", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.externals.conftest/pytest_ignore_collect/config", + "name": "config", + "qname": "sklearn.externals.conftest.pytest_ignore_collect.config", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def pytest_ignore_collect(path, config):\n return True" + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/__init__", + "name": "__init__", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/__init__/self", + "name": "self", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/__init__/dtype", + "name": "dtype", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.__init__.dtype", + "default_value": "np.float64", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dtype", + "default_value": "np.float64", + "description": "The type of feature values. Passed to Numpy array/scipy.sparse matrix\nconstructors as the dtype argument." + }, + "type": { + "kind": "NamedType", + "name": "dtype" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/__init__/separator", + "name": "separator", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.__init__.separator", + "default_value": "'='", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "\"=\"", + "description": "Separator string used when constructing new features for one-hot\ncoding." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/__init__/sparse", + "name": "sparse", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.__init__.sparse", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether transform should produce scipy.sparse matrices." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/__init__/sort", + "name": "sort", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.__init__.sort", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether ``feature_names_`` and ``vocabulary_`` should be\nsorted when fitting." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transforms lists of feature-value mappings to vectors.\n\nThis transformer turns lists of mappings (dict-like objects) of feature\nnames to feature values into Numpy arrays or scipy.sparse matrices for use\nwith scikit-learn estimators.\n\nWhen feature values are strings, this transformer will do a binary one-hot\n(aka one-of-K) coding: one boolean-valued feature is constructed for each\nof the possible string values that the feature can take on. For instance,\na feature \"f\" that can take on the values \"ham\" and \"spam\" will become two\nfeatures in the output, one signifying \"f=ham\", the other \"f=spam\".\n\nIf a feature value is a sequence or set of strings, this transformer\nwill iterate over the values and will count the occurrences of each string\nvalue.\n\nHowever, note that this transformer will only do a binary one-hot encoding\nwhen feature values are of type string. If categorical features are\nrepresented as numeric values such as int or iterables of strings, the\nDictVectorizer can be followed by\n:class:`~sklearn.preprocessing.OneHotEncoder` to complete\nbinary one-hot encoding.\n\nFeatures that do not occur in a sample (mapping) will have a zero value\nin the resulting array/matrix.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, dtype=np.float64, separator=\"=\", sparse=True,\n sort=True):\n self.dtype = dtype\n self.separator = separator\n self.sparse = sparse\n self.sort = sort" + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/_add_iterable_element", + "name": "_add_iterable_element", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer._add_iterable_element", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/_add_iterable_element/self", + "name": "self", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer._add_iterable_element.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/_add_iterable_element/f", + "name": "f", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer._add_iterable_element.f", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/_add_iterable_element/v", + "name": "v", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer._add_iterable_element.v", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/_add_iterable_element/feature_names", + "name": "feature_names", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer._add_iterable_element.feature_names", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/_add_iterable_element/vocab", + "name": "vocab", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer._add_iterable_element.vocab", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/_add_iterable_element/fitting", + "name": "fitting", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer._add_iterable_element.fitting", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/_add_iterable_element/transforming", + "name": "transforming", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer._add_iterable_element.transforming", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/_add_iterable_element/indices", + "name": "indices", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer._add_iterable_element.indices", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/_add_iterable_element/values", + "name": "values", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer._add_iterable_element.values", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Add feature names for iterable of strings", + "docstring": "Add feature names for iterable of strings", + "code": " def _add_iterable_element(self, f, v, feature_names, vocab, *,\n fitting=True, transforming=False,\n indices=None, values=None):\n \"\"\"Add feature names for iterable of strings\"\"\"\n for vv in v:\n if isinstance(vv, str):\n feature_name = \"%s%s%s\" % (f, self.separator, vv)\n vv = 1\n else:\n raise TypeError(f'Unsupported type {type(vv)} in iterable '\n 'value. Only iterables of string are '\n 'supported.')\n if fitting and feature_name not in vocab:\n vocab[feature_name] = len(feature_names)\n feature_names.append(feature_name)\n\n if transforming and feature_name in vocab:\n indices.append(vocab[feature_name])\n values.append(self.dtype(vv))\n\n return" + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/_more_tags", + "name": "_more_tags", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/_more_tags/self", + "name": "self", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'X_types': [\"dict\"]}" + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/_transform", + "name": "_transform", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer._transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/_transform/self", + "name": "self", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer._transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/_transform/X", + "name": "X", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer._transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/_transform/fitting", + "name": "fitting", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer._transform.fitting", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _transform(self, X, fitting):\n # Sanity check: Python's array has no way of explicitly requesting the\n # signed 32-bit integers that scipy.sparse needs, so we use the next\n # best thing: typecode \"i\" (int). However, if that gives larger or\n # smaller integers than 32-bit ones, np.frombuffer screws up.\n assert array(\"i\").itemsize == 4, (\n \"sizeof(int) != 4 on your platform; please report this at\"\n \" https://github.com/scikit-learn/scikit-learn/issues and\"\n \" include the output from platform.platform() in your bug report\")\n\n dtype = self.dtype\n if fitting:\n feature_names = []\n vocab = {}\n else:\n feature_names = self.feature_names_\n vocab = self.vocabulary_\n\n transforming = True\n\n # Process everything as sparse regardless of setting\n X = [X] if isinstance(X, Mapping) else X\n\n indices = array(\"i\")\n indptr = [0]\n # XXX we could change values to an array.array as well, but it\n # would require (heuristic) conversion of dtype to typecode...\n values = []\n\n # collect all the possible feature names and build sparse matrix at\n # same time\n for x in X:\n for f, v in x.items():\n if isinstance(v, str):\n feature_name = \"%s%s%s\" % (f, self.separator, v)\n v = 1\n elif isinstance(v, Number) or (v is None):\n feature_name = f\n elif isinstance(v, Mapping):\n raise TypeError(f'Unsupported value Type {type(v)} '\n f'for {f}: {v}.\\n'\n 'Mapping objects are not supported.')\n elif isinstance(v, Iterable):\n feature_name = None\n self._add_iterable_element(f, v, feature_names, vocab,\n fitting=fitting,\n transforming=transforming,\n indices=indices, values=values)\n\n if feature_name is not None:\n if fitting and feature_name not in vocab:\n vocab[feature_name] = len(feature_names)\n feature_names.append(feature_name)\n\n if feature_name in vocab:\n indices.append(vocab[feature_name])\n values.append(self.dtype(v))\n\n indptr.append(len(indices))\n\n if len(indptr) == 1:\n raise ValueError(\"Sample sequence X is empty.\")\n\n indices = np.frombuffer(indices, dtype=np.intc)\n shape = (len(indptr) - 1, len(vocab))\n\n result_matrix = sp.csr_matrix((values, indices, indptr),\n shape=shape, dtype=dtype)\n\n # Sort everything if asked\n if fitting and self.sort:\n feature_names.sort()\n map_index = np.empty(len(feature_names), dtype=np.int32)\n for new_val, f in enumerate(feature_names):\n map_index[new_val] = vocab[f]\n vocab[f] = new_val\n result_matrix = result_matrix[:, map_index]\n\n if self.sparse:\n result_matrix.sort_indices()\n else:\n result_matrix = result_matrix.toarray()\n\n if fitting:\n self.feature_names_ = feature_names\n self.vocabulary_ = vocab\n\n return result_matrix" + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/fit", + "name": "fit", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/fit/self", + "name": "self", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/fit/X", + "name": "X", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Mapping or iterable over Mappings", + "default_value": "", + "description": "Dict(s) or Mapping(s) from feature names (arbitrary Python\nobjects) to feature values (strings or convertible to dtype).\n\n.. versionchanged:: 0.24\n Accepts multiple string values for one categorical feature." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "Mapping" + }, + { + "kind": "NamedType", + "name": "iterable over Mappings" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/fit/y", + "name": "y", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "(ignored)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "(ignored)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Learn a list of feature name -> indices mappings.", + "docstring": "Learn a list of feature name -> indices mappings.\n\nParameters\n----------\nX : Mapping or iterable over Mappings\n Dict(s) or Mapping(s) from feature names (arbitrary Python\n objects) to feature values (strings or convertible to dtype).\n\n .. versionchanged:: 0.24\n Accepts multiple string values for one categorical feature.\n\ny : (ignored)\n\nReturns\n-------\nself", + "code": " def fit(self, X, y=None):\n \"\"\"Learn a list of feature name -> indices mappings.\n\n Parameters\n ----------\n X : Mapping or iterable over Mappings\n Dict(s) or Mapping(s) from feature names (arbitrary Python\n objects) to feature values (strings or convertible to dtype).\n\n .. versionchanged:: 0.24\n Accepts multiple string values for one categorical feature.\n\n y : (ignored)\n\n Returns\n -------\n self\n \"\"\"\n feature_names = []\n vocab = {}\n\n for x in X:\n for f, v in x.items():\n if isinstance(v, str):\n feature_name = \"%s%s%s\" % (f, self.separator, v)\n v = 1\n elif isinstance(v, Number) or (v is None):\n feature_name = f\n elif isinstance(v, Mapping):\n raise TypeError(f'Unsupported value type {type(v)} '\n f'for {f}: {v}.\\n'\n 'Mapping objects are not supported.')\n elif isinstance(v, Iterable):\n feature_name = None\n self._add_iterable_element(f, v, feature_names, vocab)\n\n if feature_name is not None:\n if feature_name not in vocab:\n vocab[feature_name] = len(feature_names)\n feature_names.append(feature_name)\n\n if self.sort:\n feature_names.sort()\n vocab = {f: i for i, f in enumerate(feature_names)}\n\n self.feature_names_ = feature_names\n self.vocabulary_ = vocab\n\n return self" + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/fit_transform", + "name": "fit_transform", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/fit_transform/self", + "name": "self", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/fit_transform/X", + "name": "X", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Mapping or iterable over Mappings", + "default_value": "", + "description": "Dict(s) or Mapping(s) from feature names (arbitrary Python\nobjects) to feature values (strings or convertible to dtype).\n\n.. versionchanged:: 0.24\n Accepts multiple string values for one categorical feature." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "Mapping" + }, + { + "kind": "NamedType", + "name": "iterable over Mappings" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/fit_transform/y", + "name": "y", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "(ignored)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "(ignored)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Learn a list of feature name -> indices mappings and transform X.\n\nLike fit(X) followed by transform(X), but does not require\nmaterializing X in memory.", + "docstring": "Learn a list of feature name -> indices mappings and transform X.\n\nLike fit(X) followed by transform(X), but does not require\nmaterializing X in memory.\n\nParameters\n----------\nX : Mapping or iterable over Mappings\n Dict(s) or Mapping(s) from feature names (arbitrary Python\n objects) to feature values (strings or convertible to dtype).\n\n .. versionchanged:: 0.24\n Accepts multiple string values for one categorical feature.\n\ny : (ignored)\n\nReturns\n-------\nXa : {array, sparse matrix}\n Feature vectors; always 2-d.", + "code": " def fit_transform(self, X, y=None):\n \"\"\"Learn a list of feature name -> indices mappings and transform X.\n\n Like fit(X) followed by transform(X), but does not require\n materializing X in memory.\n\n Parameters\n ----------\n X : Mapping or iterable over Mappings\n Dict(s) or Mapping(s) from feature names (arbitrary Python\n objects) to feature values (strings or convertible to dtype).\n\n .. versionchanged:: 0.24\n Accepts multiple string values for one categorical feature.\n\n y : (ignored)\n\n Returns\n -------\n Xa : {array, sparse matrix}\n Feature vectors; always 2-d.\n \"\"\"\n return self._transform(X, fitting=True)" + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/get_feature_names", + "name": "get_feature_names", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.get_feature_names", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/get_feature_names/self", + "name": "self", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.get_feature_names.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns a list of feature names, ordered by their indices.\n\nIf one-of-K coding is applied to categorical features, this will\ninclude the constructed feature names but not the original ones.", + "docstring": "Returns a list of feature names, ordered by their indices.\n\nIf one-of-K coding is applied to categorical features, this will\ninclude the constructed feature names but not the original ones.", + "code": " def get_feature_names(self):\n \"\"\"Returns a list of feature names, ordered by their indices.\n\n If one-of-K coding is applied to categorical features, this will\n include the constructed feature names but not the original ones.\n \"\"\"\n return self.feature_names_" + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/inverse_transform/self", + "name": "self", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/inverse_transform/X", + "name": "X", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.inverse_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Sample matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/inverse_transform/dict_type", + "name": "dict_type", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.inverse_transform.dict_type", + "default_value": "dict", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "type", + "default_value": "dict", + "description": "Constructor for feature mappings. Must conform to the\ncollections.Mapping API." + }, + "type": { + "kind": "NamedType", + "name": "type" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform array or sparse matrix X back to feature mappings.\n\nX must have been produced by this DictVectorizer's transform or\nfit_transform method; it may only have passed through transformers\nthat preserve the number of features and their order.\n\nIn the case of one-hot/one-of-K coding, the constructed feature\nnames and values are returned rather than the original ones.", + "docstring": "Transform array or sparse matrix X back to feature mappings.\n\nX must have been produced by this DictVectorizer's transform or\nfit_transform method; it may only have passed through transformers\nthat preserve the number of features and their order.\n\nIn the case of one-hot/one-of-K coding, the constructed feature\nnames and values are returned rather than the original ones.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Sample matrix.\ndict_type : type, default=dict\n Constructor for feature mappings. Must conform to the\n collections.Mapping API.\n\nReturns\n-------\nD : list of dict_type objects of shape (n_samples,)\n Feature mappings for the samples in X.", + "code": " def inverse_transform(self, X, dict_type=dict):\n \"\"\"Transform array or sparse matrix X back to feature mappings.\n\n X must have been produced by this DictVectorizer's transform or\n fit_transform method; it may only have passed through transformers\n that preserve the number of features and their order.\n\n In the case of one-hot/one-of-K coding, the constructed feature\n names and values are returned rather than the original ones.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Sample matrix.\n dict_type : type, default=dict\n Constructor for feature mappings. Must conform to the\n collections.Mapping API.\n\n Returns\n -------\n D : list of dict_type objects of shape (n_samples,)\n Feature mappings for the samples in X.\n \"\"\"\n # COO matrix is not subscriptable\n X = check_array(X, accept_sparse=['csr', 'csc'])\n n_samples = X.shape[0]\n\n names = self.feature_names_\n dicts = [dict_type() for _ in range(n_samples)]\n\n if sp.issparse(X):\n for i, j in zip(*X.nonzero()):\n dicts[i][names[j]] = X[i, j]\n else:\n for i, d in enumerate(dicts):\n for j, v in enumerate(X[i, :]):\n if v != 0:\n d[names[j]] = X[i, j]\n\n return dicts" + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/restrict", + "name": "restrict", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.restrict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/restrict/self", + "name": "self", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.restrict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/restrict/support", + "name": "support", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.restrict.support", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "Boolean mask or list of indices (as returned by the get_support\nmember of feature selectors)." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/restrict/indices", + "name": "indices", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.restrict.indices", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether support is a list of indices." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Restrict the features to those in support using feature selection.\n\nThis function modifies the estimator in-place.", + "docstring": "Restrict the features to those in support using feature selection.\n\nThis function modifies the estimator in-place.\n\nParameters\n----------\nsupport : array-like\n Boolean mask or list of indices (as returned by the get_support\n member of feature selectors).\nindices : bool, default=False\n Whether support is a list of indices.\n\nReturns\n-------\nself\n\nExamples\n--------\n>>> from sklearn.feature_extraction import DictVectorizer\n>>> from sklearn.feature_selection import SelectKBest, chi2\n>>> v = DictVectorizer()\n>>> D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]\n>>> X = v.fit_transform(D)\n>>> support = SelectKBest(chi2, k=2).fit(X, [0, 1])\n>>> v.get_feature_names()\n['bar', 'baz', 'foo']\n>>> v.restrict(support.get_support())\nDictVectorizer()\n>>> v.get_feature_names()\n['bar', 'foo']", + "code": " def restrict(self, support, indices=False):\n \"\"\"Restrict the features to those in support using feature selection.\n\n This function modifies the estimator in-place.\n\n Parameters\n ----------\n support : array-like\n Boolean mask or list of indices (as returned by the get_support\n member of feature selectors).\n indices : bool, default=False\n Whether support is a list of indices.\n\n Returns\n -------\n self\n\n Examples\n --------\n >>> from sklearn.feature_extraction import DictVectorizer\n >>> from sklearn.feature_selection import SelectKBest, chi2\n >>> v = DictVectorizer()\n >>> D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]\n >>> X = v.fit_transform(D)\n >>> support = SelectKBest(chi2, k=2).fit(X, [0, 1])\n >>> v.get_feature_names()\n ['bar', 'baz', 'foo']\n >>> v.restrict(support.get_support())\n DictVectorizer()\n >>> v.get_feature_names()\n ['bar', 'foo']\n \"\"\"\n if not indices:\n support = np.where(support)[0]\n\n names = self.feature_names_\n new_vocab = {}\n for i in support:\n new_vocab[names[i]] = len(new_vocab)\n\n self.vocabulary_ = new_vocab\n self.feature_names_ = [f for f, i in sorted(new_vocab.items(),\n key=itemgetter(1))]\n\n return self" + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/transform", + "name": "transform", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/transform/self", + "name": "self", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/DictVectorizer/transform/X", + "name": "X", + "qname": "sklearn.feature_extraction._dict_vectorizer.DictVectorizer.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Mapping or iterable over Mappings of shape (n_samples,)", + "default_value": "", + "description": "Dict(s) or Mapping(s) from feature names (arbitrary Python\nobjects) to feature values (strings or convertible to dtype)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "Mapping" + }, + { + "kind": "NamedType", + "name": "iterable over Mappings of shape (n_samples,)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform feature->value dicts to array or sparse matrix.\n\nNamed features not encountered during fit or fit_transform will be\nsilently ignored.", + "docstring": "Transform feature->value dicts to array or sparse matrix.\n\nNamed features not encountered during fit or fit_transform will be\nsilently ignored.\n\nParameters\n----------\nX : Mapping or iterable over Mappings of shape (n_samples,)\n Dict(s) or Mapping(s) from feature names (arbitrary Python\n objects) to feature values (strings or convertible to dtype).\n\nReturns\n-------\nXa : {array, sparse matrix}\n Feature vectors; always 2-d.", + "code": " def transform(self, X):\n \"\"\"Transform feature->value dicts to array or sparse matrix.\n\n Named features not encountered during fit or fit_transform will be\n silently ignored.\n\n Parameters\n ----------\n X : Mapping or iterable over Mappings of shape (n_samples,)\n Dict(s) or Mapping(s) from feature names (arbitrary Python\n objects) to feature values (strings or convertible to dtype).\n\n Returns\n -------\n Xa : {array, sparse matrix}\n Feature vectors; always 2-d.\n \"\"\"\n if self.sparse:\n return self._transform(X, fitting=False)\n\n else:\n dtype = self.dtype\n vocab = self.vocabulary_\n X = _tosequence(X)\n Xa = np.zeros((len(X), len(vocab)), dtype=dtype)\n\n for i, x in enumerate(X):\n for f, v in x.items():\n if isinstance(v, str):\n f = \"%s%s%s\" % (f, self.separator, v)\n v = 1\n try:\n Xa[i, vocab[f]] = dtype(v)\n except KeyError:\n pass\n\n return Xa" + }, + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/_tosequence", + "name": "_tosequence", + "qname": "sklearn.feature_extraction._dict_vectorizer._tosequence", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction._dict_vectorizer/_tosequence/X", + "name": "X", + "qname": "sklearn.feature_extraction._dict_vectorizer._tosequence.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Turn X into a sequence or ndarray, avoiding a copy if possible.", + "docstring": "Turn X into a sequence or ndarray, avoiding a copy if possible.", + "code": "def _tosequence(X):\n \"\"\"Turn X into a sequence or ndarray, avoiding a copy if possible.\"\"\"\n if isinstance(X, Mapping): # single sample\n return [X]\n else:\n return tosequence(X)" + }, + { + "id": "scikit-learn/sklearn.feature_extraction._hash/FeatureHasher/__init__", + "name": "__init__", + "qname": "sklearn.feature_extraction._hash.FeatureHasher.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction._hash/FeatureHasher/__init__/self", + "name": "self", + "qname": "sklearn.feature_extraction._hash.FeatureHasher.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction._hash/FeatureHasher/__init__/n_features", + "name": "n_features", + "qname": "sklearn.feature_extraction._hash.FeatureHasher.__init__.n_features", + "default_value": "2**20", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2**20", + "description": "The number of features (columns) in the output matrices. Small numbers\nof features are likely to cause hash collisions, but large numbers\nwill cause larger coefficient dimensions in linear learners." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction._hash/FeatureHasher/__init__/input_type", + "name": "input_type", + "qname": "sklearn.feature_extraction._hash.FeatureHasher.__init__.input_type", + "default_value": "'dict'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{\"dict\", \"pair\", \"string\"}", + "default_value": "\"dict\"", + "description": "Either \"dict\" (the default) to accept dictionaries over\n(feature_name, value); \"pair\" to accept pairs of (feature_name, value);\nor \"string\" to accept single strings.\nfeature_name should be a string, while value should be a number.\nIn the case of \"string\", a value of 1 is implied.\nThe feature_name is hashed to find the appropriate column for the\nfeature. The value's sign might be flipped in the output (but see\nnon_negative, below)." + }, + "type": { + "kind": "EnumType", + "values": ["pair", "dict", "string"] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction._hash/FeatureHasher/__init__/dtype", + "name": "dtype", + "qname": "sklearn.feature_extraction._hash.FeatureHasher.__init__.dtype", + "default_value": "np.float64", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "numpy dtype", + "default_value": "np.float64", + "description": "The type of feature values. Passed to scipy.sparse matrix constructors\nas the dtype argument. Do not set this to bool, np.boolean or any\nunsigned integer type." + }, + "type": { + "kind": "NamedType", + "name": "numpy dtype" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction._hash/FeatureHasher/__init__/alternate_sign", + "name": "alternate_sign", + "qname": "sklearn.feature_extraction._hash.FeatureHasher.__init__.alternate_sign", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "When True, an alternating sign is added to the features as to\napproximately conserve the inner product in the hashed space even for\nsmall n_features. This approach is similar to sparse random projection.\n\n.. versionchanged:: 0.19\n ``alternate_sign`` replaces the now deprecated ``non_negative``\n parameter." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Implements feature hashing, aka the hashing trick.\n\nThis class turns sequences of symbolic feature names (strings) into\nscipy.sparse matrices, using a hash function to compute the matrix column\ncorresponding to a name. The hash function employed is the signed 32-bit\nversion of Murmurhash3.\n\nFeature names of type byte string are used as-is. Unicode strings are\nconverted to UTF-8 first, but no Unicode normalization is done.\nFeature values must be (finite) numbers.\n\nThis class is a low-memory alternative to DictVectorizer and\nCountVectorizer, intended for large-scale (online) learning and situations\nwhere memory is tight, e.g. when running prediction code on embedded\ndevices.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_features=(2 ** 20), *, input_type=\"dict\",\n dtype=np.float64, alternate_sign=True):\n self._validate_params(n_features, input_type)\n\n self.dtype = dtype\n self.input_type = input_type\n self.n_features = n_features\n self.alternate_sign = alternate_sign" + }, + { + "id": "scikit-learn/sklearn.feature_extraction._hash/FeatureHasher/_more_tags", + "name": "_more_tags", + "qname": "sklearn.feature_extraction._hash.FeatureHasher._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction._hash/FeatureHasher/_more_tags/self", + "name": "self", + "qname": "sklearn.feature_extraction._hash.FeatureHasher._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'X_types': [self.input_type]}" + }, + { + "id": "scikit-learn/sklearn.feature_extraction._hash/FeatureHasher/_validate_params", + "name": "_validate_params", + "qname": "sklearn.feature_extraction._hash.FeatureHasher._validate_params", + "decorators": ["staticmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction._hash/FeatureHasher/_validate_params/n_features", + "name": "n_features", + "qname": "sklearn.feature_extraction._hash.FeatureHasher._validate_params.n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction._hash/FeatureHasher/_validate_params/input_type", + "name": "input_type", + "qname": "sklearn.feature_extraction._hash.FeatureHasher._validate_params.input_type", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @staticmethod\n def _validate_params(n_features, input_type):\n # strangely, np.int16 instances are not instances of Integral,\n # while np.int64 instances are...\n if not isinstance(n_features, numbers.Integral):\n raise TypeError(\"n_features must be integral, got %r (%s).\"\n % (n_features, type(n_features)))\n elif n_features < 1 or n_features >= np.iinfo(np.int32).max + 1:\n raise ValueError(\"Invalid number of features (%d).\" % n_features)\n\n if input_type not in (\"dict\", \"pair\", \"string\"):\n raise ValueError(\"input_type must be 'dict', 'pair' or 'string',\"\n \" got %r.\" % input_type)" + }, + { + "id": "scikit-learn/sklearn.feature_extraction._hash/FeatureHasher/fit", + "name": "fit", + "qname": "sklearn.feature_extraction._hash.FeatureHasher.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction._hash/FeatureHasher/fit/self", + "name": "self", + "qname": "sklearn.feature_extraction._hash.FeatureHasher.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction._hash/FeatureHasher/fit/X", + "name": "X", + "qname": "sklearn.feature_extraction._hash.FeatureHasher.fit.X", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction._hash/FeatureHasher/fit/y", + "name": "y", + "qname": "sklearn.feature_extraction._hash.FeatureHasher.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "No-op.\n\nThis method doesn't do anything. It exists purely for compatibility\nwith the scikit-learn transformer API.", + "docstring": "No-op.\n\nThis method doesn't do anything. It exists purely for compatibility\nwith the scikit-learn transformer API.\n\nParameters\n----------\nX : ndarray\n\nReturns\n-------\nself : FeatureHasher", + "code": " def fit(self, X=None, y=None):\n \"\"\"No-op.\n\n This method doesn't do anything. It exists purely for compatibility\n with the scikit-learn transformer API.\n\n Parameters\n ----------\n X : ndarray\n\n Returns\n -------\n self : FeatureHasher\n\n \"\"\"\n # repeat input validation for grid search (which calls set_params)\n self._validate_params(self.n_features, self.input_type)\n return self" + }, + { + "id": "scikit-learn/sklearn.feature_extraction._hash/FeatureHasher/transform", + "name": "transform", + "qname": "sklearn.feature_extraction._hash.FeatureHasher.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction._hash/FeatureHasher/transform/self", + "name": "self", + "qname": "sklearn.feature_extraction._hash.FeatureHasher.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction._hash/FeatureHasher/transform/raw_X", + "name": "raw_X", + "qname": "sklearn.feature_extraction._hash.FeatureHasher.transform.raw_X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "iterable over iterable over raw features, length = n_samples", + "default_value": "", + "description": "Samples. Each sample must be iterable an (e.g., a list or tuple)\ncontaining/generating feature names (and optionally values, see\nthe input_type constructor argument) which will be hashed.\nraw_X need not support the len function, so it can be the result\nof a generator; n_samples is determined on the fly." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "iterable over iterable over raw features" + }, + { + "kind": "NamedType", + "name": "length = n_samples" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform a sequence of instances to a scipy.sparse matrix.", + "docstring": "Transform a sequence of instances to a scipy.sparse matrix.\n\nParameters\n----------\nraw_X : iterable over iterable over raw features, length = n_samples\n Samples. Each sample must be iterable an (e.g., a list or tuple)\n containing/generating feature names (and optionally values, see\n the input_type constructor argument) which will be hashed.\n raw_X need not support the len function, so it can be the result\n of a generator; n_samples is determined on the fly.\n\nReturns\n-------\nX : sparse matrix of shape (n_samples, n_features)\n Feature matrix, for use with estimators or further transformers.", + "code": " def transform(self, raw_X):\n \"\"\"Transform a sequence of instances to a scipy.sparse matrix.\n\n Parameters\n ----------\n raw_X : iterable over iterable over raw features, length = n_samples\n Samples. Each sample must be iterable an (e.g., a list or tuple)\n containing/generating feature names (and optionally values, see\n the input_type constructor argument) which will be hashed.\n raw_X need not support the len function, so it can be the result\n of a generator; n_samples is determined on the fly.\n\n Returns\n -------\n X : sparse matrix of shape (n_samples, n_features)\n Feature matrix, for use with estimators or further transformers.\n\n \"\"\"\n raw_X = iter(raw_X)\n if self.input_type == \"dict\":\n raw_X = (_iteritems(d) for d in raw_X)\n elif self.input_type == \"string\":\n raw_X = (((f, 1) for f in x) for x in raw_X)\n indices, indptr, values = \\\n _hashing_transform(raw_X, self.n_features, self.dtype,\n self.alternate_sign, seed=0)\n n_samples = indptr.shape[0] - 1\n\n if n_samples == 0:\n raise ValueError(\"Cannot vectorize empty sequence.\")\n\n X = sp.csr_matrix((values, indices, indptr), dtype=self.dtype,\n shape=(n_samples, self.n_features))\n X.sum_duplicates() # also sorts the indices\n\n return X" + }, + { + "id": "scikit-learn/sklearn.feature_extraction._hash/_hashing_transform", + "name": "_hashing_transform", + "qname": "sklearn.feature_extraction._hash._hashing_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction._hash/_hashing_transform/args", + "name": "args", + "qname": "sklearn.feature_extraction._hash._hashing_transform.args", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction._hash/_hashing_transform/kwargs", + "name": "kwargs", + "qname": "sklearn.feature_extraction._hash._hashing_transform.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _hashing_transform(*args, **kwargs):\n raise NotImplementedError(\n 'FeatureHasher is not compatible with PyPy (see '\n 'https://github.com/scikit-learn/scikit-learn/issues/11540 '\n 'for the status updates).')" + }, + { + "id": "scikit-learn/sklearn.feature_extraction._hash/_iteritems", + "name": "_iteritems", + "qname": "sklearn.feature_extraction._hash._iteritems", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction._hash/_iteritems/d", + "name": "d", + "qname": "sklearn.feature_extraction._hash._iteritems.d", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Like d.iteritems, but accepts any collections.Mapping.", + "docstring": "Like d.iteritems, but accepts any collections.Mapping.", + "code": "def _iteritems(d):\n \"\"\"Like d.iteritems, but accepts any collections.Mapping.\"\"\"\n return d.iteritems() if hasattr(d, \"iteritems\") else d.items()" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/PatchExtractor/__init__", + "name": "__init__", + "qname": "sklearn.feature_extraction.image.PatchExtractor.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.image/PatchExtractor/__init__/self", + "name": "self", + "qname": "sklearn.feature_extraction.image.PatchExtractor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/PatchExtractor/__init__/patch_size", + "name": "patch_size", + "qname": "sklearn.feature_extraction.image.PatchExtractor.__init__.patch_size", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "tuple of int (patch_height, patch_width)", + "default_value": "None", + "description": "The dimensions of one patch." + }, + "type": { + "kind": "NamedType", + "name": "tuple of int (patch_height, patch_width)" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/PatchExtractor/__init__/max_patches", + "name": "max_patches", + "qname": "sklearn.feature_extraction.image.PatchExtractor.__init__.max_patches", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int or float", + "default_value": "None", + "description": "The maximum number of patches per image to extract. If max_patches is a\nfloat in (0, 1), it is taken to mean a proportion of the total number\nof patches." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/PatchExtractor/__init__/random_state", + "name": "random_state", + "qname": "sklearn.feature_extraction.image.PatchExtractor.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "Determines the random number generator used for random sampling when\n`max_patches` is not None. Use an int to make the randomness\ndeterministic.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Extracts patches from a collection of images\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.9", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, patch_size=None, max_patches=None,\n random_state=None):\n self.patch_size = patch_size\n self.max_patches = max_patches\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/PatchExtractor/_more_tags", + "name": "_more_tags", + "qname": "sklearn.feature_extraction.image.PatchExtractor._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.image/PatchExtractor/_more_tags/self", + "name": "self", + "qname": "sklearn.feature_extraction.image.PatchExtractor._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'X_types': ['3darray']}" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/PatchExtractor/fit", + "name": "fit", + "qname": "sklearn.feature_extraction.image.PatchExtractor.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.image/PatchExtractor/fit/self", + "name": "self", + "qname": "sklearn.feature_extraction.image.PatchExtractor.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/PatchExtractor/fit/X", + "name": "X", + "qname": "sklearn.feature_extraction.image.PatchExtractor.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/PatchExtractor/fit/y", + "name": "y", + "qname": "sklearn.feature_extraction.image.PatchExtractor.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Do nothing and return the estimator unchanged.\n\nThis method is just there to implement the usual API and hence\nwork in pipelines.", + "docstring": "Do nothing and return the estimator unchanged.\n\nThis method is just there to implement the usual API and hence\nwork in pipelines.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.", + "code": " def fit(self, X, y=None):\n \"\"\"Do nothing and return the estimator unchanged.\n\n This method is just there to implement the usual API and hence\n work in pipelines.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n \"\"\"\n return self" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/PatchExtractor/transform", + "name": "transform", + "qname": "sklearn.feature_extraction.image.PatchExtractor.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.image/PatchExtractor/transform/self", + "name": "self", + "qname": "sklearn.feature_extraction.image.PatchExtractor.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/PatchExtractor/transform/X", + "name": "X", + "qname": "sklearn.feature_extraction.image.PatchExtractor.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples, image_height, image_width) or (n_samples, image_height, image_width, n_channels)", + "default_value": "", + "description": "Array of images from which to extract patches. For color images,\nthe last dimension specifies the channel: a RGB image would have\n`n_channels=3`." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, image_height, image_width) or (n_samples, image_height, image_width, n_channels)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Transforms the image samples in X into a matrix of patch data.", + "docstring": "Transforms the image samples in X into a matrix of patch data.\n\nParameters\n----------\nX : ndarray of shape (n_samples, image_height, image_width) or (n_samples, image_height, image_width, n_channels)\n Array of images from which to extract patches. For color images,\n the last dimension specifies the channel: a RGB image would have\n `n_channels=3`.\n\nReturns\n-------\npatches : array of shape (n_patches, patch_height, patch_width) or (n_patches, patch_height, patch_width, n_channels)\n The collection of patches extracted from the images, where\n `n_patches` is either `n_samples * max_patches` or the total\n number of patches that can be extracted.", + "code": " def transform(self, X):\n \"\"\"Transforms the image samples in X into a matrix of patch data.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, image_height, image_width) or \\\n (n_samples, image_height, image_width, n_channels)\n Array of images from which to extract patches. For color images,\n the last dimension specifies the channel: a RGB image would have\n `n_channels=3`.\n\n Returns\n -------\n patches : array of shape (n_patches, patch_height, patch_width) or \\\n (n_patches, patch_height, patch_width, n_channels)\n The collection of patches extracted from the images, where\n `n_patches` is either `n_samples * max_patches` or the total\n number of patches that can be extracted.\n \"\"\"\n self.random_state = check_random_state(self.random_state)\n n_images, i_h, i_w = X.shape[:3]\n X = np.reshape(X, (n_images, i_h, i_w, -1))\n n_channels = X.shape[-1]\n if self.patch_size is None:\n patch_size = i_h // 10, i_w // 10\n else:\n patch_size = self.patch_size\n\n # compute the dimensions of the patches array\n p_h, p_w = patch_size\n n_patches = _compute_n_patches(i_h, i_w, p_h, p_w, self.max_patches)\n patches_shape = (n_images * n_patches,) + patch_size\n if n_channels > 1:\n patches_shape += (n_channels,)\n\n # extract the patches\n patches = np.empty(patches_shape)\n for ii, image in enumerate(X):\n patches[ii * n_patches:(ii + 1) * n_patches] = extract_patches_2d(\n image, patch_size, max_patches=self.max_patches,\n random_state=self.random_state)\n return patches" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/_compute_gradient_3d", + "name": "_compute_gradient_3d", + "qname": "sklearn.feature_extraction.image._compute_gradient_3d", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.image/_compute_gradient_3d/edges", + "name": "edges", + "qname": "sklearn.feature_extraction.image._compute_gradient_3d.edges", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/_compute_gradient_3d/img", + "name": "img", + "qname": "sklearn.feature_extraction.image._compute_gradient_3d.img", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _compute_gradient_3d(edges, img):\n _, n_y, n_z = img.shape\n gradient = np.abs(img[edges[0] // (n_y * n_z),\n (edges[0] % (n_y * n_z)) // n_z,\n (edges[0] % (n_y * n_z)) % n_z] -\n img[edges[1] // (n_y * n_z),\n (edges[1] % (n_y * n_z)) // n_z,\n (edges[1] % (n_y * n_z)) % n_z])\n return gradient" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/_compute_n_patches", + "name": "_compute_n_patches", + "qname": "sklearn.feature_extraction.image._compute_n_patches", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.image/_compute_n_patches/i_h", + "name": "i_h", + "qname": "sklearn.feature_extraction.image._compute_n_patches.i_h", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The image height" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/_compute_n_patches/i_w", + "name": "i_w", + "qname": "sklearn.feature_extraction.image._compute_n_patches.i_w", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The image with" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/_compute_n_patches/p_h", + "name": "p_h", + "qname": "sklearn.feature_extraction.image._compute_n_patches.p_h", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The height of a patch" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/_compute_n_patches/p_w", + "name": "p_w", + "qname": "sklearn.feature_extraction.image._compute_n_patches.p_w", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The width of a patch" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/_compute_n_patches/max_patches", + "name": "max_patches", + "qname": "sklearn.feature_extraction.image._compute_n_patches.max_patches", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "None", + "description": "The maximum number of patches to extract. If max_patches is a float\nbetween 0 and 1, it is taken to be a proportion of the total number\nof patches." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the number of patches that will be extracted in an image.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute the number of patches that will be extracted in an image.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ni_h : int\n The image height\ni_w : int\n The image with\np_h : int\n The height of a patch\np_w : int\n The width of a patch\nmax_patches : int or float, default=None\n The maximum number of patches to extract. If max_patches is a float\n between 0 and 1, it is taken to be a proportion of the total number\n of patches.", + "code": "def _compute_n_patches(i_h, i_w, p_h, p_w, max_patches=None):\n \"\"\"Compute the number of patches that will be extracted in an image.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n i_h : int\n The image height\n i_w : int\n The image with\n p_h : int\n The height of a patch\n p_w : int\n The width of a patch\n max_patches : int or float, default=None\n The maximum number of patches to extract. If max_patches is a float\n between 0 and 1, it is taken to be a proportion of the total number\n of patches.\n \"\"\"\n n_h = i_h - p_h + 1\n n_w = i_w - p_w + 1\n all_patches = n_h * n_w\n\n if max_patches:\n if (isinstance(max_patches, (numbers.Integral))\n and max_patches < all_patches):\n return max_patches\n elif (isinstance(max_patches, (numbers.Integral))\n and max_patches >= all_patches):\n return all_patches\n elif (isinstance(max_patches, (numbers.Real))\n and 0 < max_patches < 1):\n return int(max_patches * all_patches)\n else:\n raise ValueError(\"Invalid value for max_patches: %r\" % max_patches)\n else:\n return all_patches" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/_extract_patches", + "name": "_extract_patches", + "qname": "sklearn.feature_extraction.image._extract_patches", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.image/_extract_patches/arr", + "name": "arr", + "qname": "sklearn.feature_extraction.image._extract_patches.arr", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "n-dimensional array of which patches are to be extracted" + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/_extract_patches/patch_shape", + "name": "patch_shape", + "qname": "sklearn.feature_extraction.image._extract_patches.patch_shape", + "default_value": "8", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or tuple of length arr.ndim.default=8", + "default_value": "", + "description": "Indicates the shape of the patches to be extracted. If an\ninteger is given, the shape will be a hypercube of\nsidelength given by its value." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "tuple of length arr.ndim." + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/_extract_patches/extraction_step", + "name": "extraction_step", + "qname": "sklearn.feature_extraction.image._extract_patches.extraction_step", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or tuple of length arr.ndim", + "default_value": "1", + "description": "Indicates step size at which extraction shall be performed.\nIf integer is given, then the step is uniform in all dimensions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "tuple of length arr.ndim" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Extracts patches of any n-dimensional array in place using strides.\n\nGiven an n-dimensional array it will return a 2n-dimensional array with\nthe first n dimensions indexing patch position and the last n indexing\nthe patch content. This operation is immediate (O(1)). A reshape\nperformed on the first n dimensions will cause numpy to copy data, leading\nto a list of extracted patches.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Extracts patches of any n-dimensional array in place using strides.\n\nGiven an n-dimensional array it will return a 2n-dimensional array with\nthe first n dimensions indexing patch position and the last n indexing\nthe patch content. This operation is immediate (O(1)). A reshape\nperformed on the first n dimensions will cause numpy to copy data, leading\nto a list of extracted patches.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\narr : ndarray\n n-dimensional array of which patches are to be extracted\n\npatch_shape : int or tuple of length arr.ndim.default=8\n Indicates the shape of the patches to be extracted. If an\n integer is given, the shape will be a hypercube of\n sidelength given by its value.\n\nextraction_step : int or tuple of length arr.ndim, default=1\n Indicates step size at which extraction shall be performed.\n If integer is given, then the step is uniform in all dimensions.\n\n\nReturns\n-------\npatches : strided ndarray\n 2n-dimensional array indexing patches on first n dimensions and\n containing patches on the last n dimensions. These dimensions\n are fake, but this way no data is copied. A simple reshape invokes\n a copying operation to obtain a list of patches:\n result.reshape([-1] + list(patch_shape))", + "code": "def _extract_patches(arr, patch_shape=8, extraction_step=1):\n \"\"\"Extracts patches of any n-dimensional array in place using strides.\n\n Given an n-dimensional array it will return a 2n-dimensional array with\n the first n dimensions indexing patch position and the last n indexing\n the patch content. This operation is immediate (O(1)). A reshape\n performed on the first n dimensions will cause numpy to copy data, leading\n to a list of extracted patches.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n arr : ndarray\n n-dimensional array of which patches are to be extracted\n\n patch_shape : int or tuple of length arr.ndim.default=8\n Indicates the shape of the patches to be extracted. If an\n integer is given, the shape will be a hypercube of\n sidelength given by its value.\n\n extraction_step : int or tuple of length arr.ndim, default=1\n Indicates step size at which extraction shall be performed.\n If integer is given, then the step is uniform in all dimensions.\n\n\n Returns\n -------\n patches : strided ndarray\n 2n-dimensional array indexing patches on first n dimensions and\n containing patches on the last n dimensions. These dimensions\n are fake, but this way no data is copied. A simple reshape invokes\n a copying operation to obtain a list of patches:\n result.reshape([-1] + list(patch_shape))\n \"\"\"\n\n arr_ndim = arr.ndim\n\n if isinstance(patch_shape, numbers.Number):\n patch_shape = tuple([patch_shape] * arr_ndim)\n if isinstance(extraction_step, numbers.Number):\n extraction_step = tuple([extraction_step] * arr_ndim)\n\n patch_strides = arr.strides\n\n slices = tuple(slice(None, None, st) for st in extraction_step)\n indexing_strides = arr[slices].strides\n\n patch_indices_shape = ((np.array(arr.shape) - np.array(patch_shape)) //\n np.array(extraction_step)) + 1\n\n shape = tuple(list(patch_indices_shape) + list(patch_shape))\n strides = tuple(list(indexing_strides) + list(patch_strides))\n\n patches = as_strided(arr, shape=shape, strides=strides)\n return patches" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/_make_edges_3d", + "name": "_make_edges_3d", + "qname": "sklearn.feature_extraction.image._make_edges_3d", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.image/_make_edges_3d/n_x", + "name": "n_x", + "qname": "sklearn.feature_extraction.image._make_edges_3d.n_x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The size of the grid in the x direction." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/_make_edges_3d/n_y", + "name": "n_y", + "qname": "sklearn.feature_extraction.image._make_edges_3d.n_y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The size of the grid in the y direction." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/_make_edges_3d/n_z", + "name": "n_z", + "qname": "sklearn.feature_extraction.image._make_edges_3d.n_z", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "integer", + "default_value": "1", + "description": "The size of the grid in the z direction, defaults to 1" + }, + "type": { + "kind": "NamedType", + "name": "integer" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns a list of edges for a 3D image.", + "docstring": "Returns a list of edges for a 3D image.\n\nParameters\n----------\nn_x : int\n The size of the grid in the x direction.\nn_y : int\n The size of the grid in the y direction.\nn_z : integer, default=1\n The size of the grid in the z direction, defaults to 1", + "code": "def _make_edges_3d(n_x, n_y, n_z=1):\n \"\"\"Returns a list of edges for a 3D image.\n\n Parameters\n ----------\n n_x : int\n The size of the grid in the x direction.\n n_y : int\n The size of the grid in the y direction.\n n_z : integer, default=1\n The size of the grid in the z direction, defaults to 1\n \"\"\"\n vertices = np.arange(n_x * n_y * n_z).reshape((n_x, n_y, n_z))\n edges_deep = np.vstack((vertices[:, :, :-1].ravel(),\n vertices[:, :, 1:].ravel()))\n edges_right = np.vstack((vertices[:, :-1].ravel(),\n vertices[:, 1:].ravel()))\n edges_down = np.vstack((vertices[:-1].ravel(), vertices[1:].ravel()))\n edges = np.hstack((edges_deep, edges_right, edges_down))\n return edges" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/_mask_edges_weights", + "name": "_mask_edges_weights", + "qname": "sklearn.feature_extraction.image._mask_edges_weights", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.image/_mask_edges_weights/mask", + "name": "mask", + "qname": "sklearn.feature_extraction.image._mask_edges_weights.mask", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/_mask_edges_weights/edges", + "name": "edges", + "qname": "sklearn.feature_extraction.image._mask_edges_weights.edges", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/_mask_edges_weights/weights", + "name": "weights", + "qname": "sklearn.feature_extraction.image._mask_edges_weights.weights", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Apply a mask to edges (weighted or not)", + "docstring": "Apply a mask to edges (weighted or not)", + "code": "def _mask_edges_weights(mask, edges, weights=None):\n \"\"\"Apply a mask to edges (weighted or not)\"\"\"\n inds = np.arange(mask.size)\n inds = inds[mask.ravel()]\n ind_mask = np.logical_and(np.in1d(edges[0], inds),\n np.in1d(edges[1], inds))\n edges = edges[:, ind_mask]\n if weights is not None:\n weights = weights[ind_mask]\n if len(edges.ravel()):\n maxval = edges.max()\n else:\n maxval = 0\n order = np.searchsorted(np.unique(edges.ravel()), np.arange(maxval + 1))\n edges = order[edges]\n if weights is None:\n return edges\n else:\n return edges, weights" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/_to_graph", + "name": "_to_graph", + "qname": "sklearn.feature_extraction.image._to_graph", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.image/_to_graph/n_x", + "name": "n_x", + "qname": "sklearn.feature_extraction.image._to_graph.n_x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/_to_graph/n_y", + "name": "n_y", + "qname": "sklearn.feature_extraction.image._to_graph.n_y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/_to_graph/n_z", + "name": "n_z", + "qname": "sklearn.feature_extraction.image._to_graph.n_z", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/_to_graph/mask", + "name": "mask", + "qname": "sklearn.feature_extraction.image._to_graph.mask", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/_to_graph/img", + "name": "img", + "qname": "sklearn.feature_extraction.image._to_graph.img", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/_to_graph/return_as", + "name": "return_as", + "qname": "sklearn.feature_extraction.image._to_graph.return_as", + "default_value": "sparse.coo_matrix", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/_to_graph/dtype", + "name": "dtype", + "qname": "sklearn.feature_extraction.image._to_graph.dtype", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Auxiliary function for img_to_graph and grid_to_graph", + "docstring": "Auxiliary function for img_to_graph and grid_to_graph\n ", + "code": "def _to_graph(n_x, n_y, n_z, mask=None, img=None,\n return_as=sparse.coo_matrix, dtype=None):\n \"\"\"Auxiliary function for img_to_graph and grid_to_graph\n \"\"\"\n edges = _make_edges_3d(n_x, n_y, n_z)\n\n if dtype is None:\n if img is None:\n dtype = int\n else:\n dtype = img.dtype\n\n if img is not None:\n img = np.atleast_3d(img)\n weights = _compute_gradient_3d(edges, img)\n if mask is not None:\n edges, weights = _mask_edges_weights(mask, edges, weights)\n diag = img.squeeze()[mask]\n else:\n diag = img.ravel()\n n_voxels = diag.size\n else:\n if mask is not None:\n mask = mask.astype(dtype=bool, copy=False)\n mask = np.asarray(mask, dtype=bool)\n edges = _mask_edges_weights(mask, edges)\n n_voxels = np.sum(mask)\n else:\n n_voxels = n_x * n_y * n_z\n weights = np.ones(edges.shape[1], dtype=dtype)\n diag = np.ones(n_voxels, dtype=dtype)\n\n diag_idx = np.arange(n_voxels)\n i_idx = np.hstack((edges[0], edges[1]))\n j_idx = np.hstack((edges[1], edges[0]))\n graph = sparse.coo_matrix((np.hstack((weights, weights, diag)),\n (np.hstack((i_idx, diag_idx)),\n np.hstack((j_idx, diag_idx)))),\n (n_voxels, n_voxels),\n dtype=dtype)\n if return_as is np.ndarray:\n return graph.toarray()\n return return_as(graph)" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/extract_patches_2d", + "name": "extract_patches_2d", + "qname": "sklearn.feature_extraction.image.extract_patches_2d", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.image/extract_patches_2d/image", + "name": "image", + "qname": "sklearn.feature_extraction.image.extract_patches_2d.image", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (image_height, image_width) or (image_height, image_width, n_channels)", + "default_value": "", + "description": "The original image data. For color images, the last dimension specifies\nthe channel: a RGB image would have `n_channels=3`." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (image_height, image_width) or (image_height, image_width, n_channels)" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/extract_patches_2d/patch_size", + "name": "patch_size", + "qname": "sklearn.feature_extraction.image.extract_patches_2d.patch_size", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "tuple of int (patch_height, patch_width)", + "default_value": "", + "description": "The dimensions of one patch." + }, + "type": { + "kind": "NamedType", + "name": "tuple of int (patch_height, patch_width)" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/extract_patches_2d/max_patches", + "name": "max_patches", + "qname": "sklearn.feature_extraction.image.extract_patches_2d.max_patches", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int or float", + "default_value": "None", + "description": "The maximum number of patches to extract. If `max_patches` is a float\nbetween 0 and 1, it is taken to be a proportion of the total number\nof patches." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/extract_patches_2d/random_state", + "name": "random_state", + "qname": "sklearn.feature_extraction.image.extract_patches_2d.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "Determines the random number generator used for random sampling when\n`max_patches` is not None. Use an int to make the randomness\ndeterministic.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Reshape a 2D image into a collection of patches\n\nThe resulting patches are allocated in a dedicated array.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Reshape a 2D image into a collection of patches\n\nThe resulting patches are allocated in a dedicated array.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nimage : ndarray of shape (image_height, image_width) or (image_height, image_width, n_channels)\n The original image data. For color images, the last dimension specifies\n the channel: a RGB image would have `n_channels=3`.\n\npatch_size : tuple of int (patch_height, patch_width)\n The dimensions of one patch.\n\nmax_patches : int or float, default=None\n The maximum number of patches to extract. If `max_patches` is a float\n between 0 and 1, it is taken to be a proportion of the total number\n of patches.\n\nrandom_state : int, RandomState instance, default=None\n Determines the random number generator used for random sampling when\n `max_patches` is not None. Use an int to make the randomness\n deterministic.\n See :term:`Glossary `.\n\nReturns\n-------\npatches : array of shape (n_patches, patch_height, patch_width) or (n_patches, patch_height, patch_width, n_channels)\n The collection of patches extracted from the image, where `n_patches`\n is either `max_patches` or the total number of patches that can be\n extracted.\n\nExamples\n--------\n>>> from sklearn.datasets import load_sample_image\n>>> from sklearn.feature_extraction import image\n>>> # Use the array data from the first image in this dataset:\n>>> one_image = load_sample_image(\"china.jpg\")\n>>> print('Image shape: {}'.format(one_image.shape))\nImage shape: (427, 640, 3)\n>>> patches = image.extract_patches_2d(one_image, (2, 2))\n>>> print('Patches shape: {}'.format(patches.shape))\nPatches shape: (272214, 2, 2, 3)\n>>> # Here are just two of these patches:\n>>> print(patches[1])\n[[[174 201 231]\n [174 201 231]]\n [[173 200 230]\n [173 200 230]]]\n>>> print(patches[800])\n[[[187 214 243]\n [188 215 244]]\n [[187 214 243]\n [188 215 244]]]", + "code": "@_deprecate_positional_args\ndef extract_patches_2d(image, patch_size, *, max_patches=None,\n random_state=None):\n \"\"\"Reshape a 2D image into a collection of patches\n\n The resulting patches are allocated in a dedicated array.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n image : ndarray of shape (image_height, image_width) or \\\n (image_height, image_width, n_channels)\n The original image data. For color images, the last dimension specifies\n the channel: a RGB image would have `n_channels=3`.\n\n patch_size : tuple of int (patch_height, patch_width)\n The dimensions of one patch.\n\n max_patches : int or float, default=None\n The maximum number of patches to extract. If `max_patches` is a float\n between 0 and 1, it is taken to be a proportion of the total number\n of patches.\n\n random_state : int, RandomState instance, default=None\n Determines the random number generator used for random sampling when\n `max_patches` is not None. Use an int to make the randomness\n deterministic.\n See :term:`Glossary `.\n\n Returns\n -------\n patches : array of shape (n_patches, patch_height, patch_width) or \\\n (n_patches, patch_height, patch_width, n_channels)\n The collection of patches extracted from the image, where `n_patches`\n is either `max_patches` or the total number of patches that can be\n extracted.\n\n Examples\n --------\n >>> from sklearn.datasets import load_sample_image\n >>> from sklearn.feature_extraction import image\n >>> # Use the array data from the first image in this dataset:\n >>> one_image = load_sample_image(\"china.jpg\")\n >>> print('Image shape: {}'.format(one_image.shape))\n Image shape: (427, 640, 3)\n >>> patches = image.extract_patches_2d(one_image, (2, 2))\n >>> print('Patches shape: {}'.format(patches.shape))\n Patches shape: (272214, 2, 2, 3)\n >>> # Here are just two of these patches:\n >>> print(patches[1])\n [[[174 201 231]\n [174 201 231]]\n [[173 200 230]\n [173 200 230]]]\n >>> print(patches[800])\n [[[187 214 243]\n [188 215 244]]\n [[187 214 243]\n [188 215 244]]]\n \"\"\"\n i_h, i_w = image.shape[:2]\n p_h, p_w = patch_size\n\n if p_h > i_h:\n raise ValueError(\"Height of the patch should be less than the height\"\n \" of the image.\")\n\n if p_w > i_w:\n raise ValueError(\"Width of the patch should be less than the width\"\n \" of the image.\")\n\n image = check_array(image, allow_nd=True)\n image = image.reshape((i_h, i_w, -1))\n n_colors = image.shape[-1]\n\n extracted_patches = _extract_patches(image,\n patch_shape=(p_h, p_w, n_colors),\n extraction_step=1)\n\n n_patches = _compute_n_patches(i_h, i_w, p_h, p_w, max_patches)\n if max_patches:\n rng = check_random_state(random_state)\n i_s = rng.randint(i_h - p_h + 1, size=n_patches)\n j_s = rng.randint(i_w - p_w + 1, size=n_patches)\n patches = extracted_patches[i_s, j_s, 0]\n else:\n patches = extracted_patches\n\n patches = patches.reshape(-1, p_h, p_w, n_colors)\n # remove the color dimension if useless\n if patches.shape[-1] == 1:\n return patches.reshape((n_patches, p_h, p_w))\n else:\n return patches" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/grid_to_graph", + "name": "grid_to_graph", + "qname": "sklearn.feature_extraction.image.grid_to_graph", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.image/grid_to_graph/n_x", + "name": "n_x", + "qname": "sklearn.feature_extraction.image.grid_to_graph.n_x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "", + "description": "Dimension in x axis" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/grid_to_graph/n_y", + "name": "n_y", + "qname": "sklearn.feature_extraction.image.grid_to_graph.n_y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "", + "description": "Dimension in y axis" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/grid_to_graph/n_z", + "name": "n_z", + "qname": "sklearn.feature_extraction.image.grid_to_graph.n_z", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "1", + "description": "Dimension in z axis" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/grid_to_graph/mask", + "name": "mask", + "qname": "sklearn.feature_extraction.image.grid_to_graph.mask", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_x, n_y, n_z), dtype=bool", + "default_value": "None", + "description": "An optional mask of the image, to consider only part of the\npixels." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray of shape (n_x, n_y, n_z)" + }, + { + "kind": "NamedType", + "name": "dtype=bool" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/grid_to_graph/return_as", + "name": "return_as", + "qname": "sklearn.feature_extraction.image.grid_to_graph.return_as", + "default_value": "sparse.coo_matrix", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "np.ndarray or a sparse matrix class", + "default_value": "sparse.coo_matrix", + "description": "The class to use to build the returned adjacency matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "np.ndarray" + }, + { + "kind": "NamedType", + "name": "a sparse matrix class" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/grid_to_graph/dtype", + "name": "dtype", + "qname": "sklearn.feature_extraction.image.grid_to_graph.dtype", + "default_value": "int", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "dtype", + "default_value": "int", + "description": "The data of the returned sparse matrix. By default it is int" + }, + "type": { + "kind": "NamedType", + "name": "dtype" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Graph of the pixel-to-pixel connections\n\nEdges exist if 2 voxels are connected.", + "docstring": "Graph of the pixel-to-pixel connections\n\nEdges exist if 2 voxels are connected.\n\nParameters\n----------\nn_x : int\n Dimension in x axis\nn_y : int\n Dimension in y axis\nn_z : int, default=1\n Dimension in z axis\nmask : ndarray of shape (n_x, n_y, n_z), dtype=bool, default=None\n An optional mask of the image, to consider only part of the\n pixels.\nreturn_as : np.ndarray or a sparse matrix class, default=sparse.coo_matrix\n The class to use to build the returned adjacency matrix.\ndtype : dtype, default=int\n The data of the returned sparse matrix. By default it is int\n\nNotes\n-----\nFor scikit-learn versions 0.14.1 and prior, return_as=np.ndarray was\nhandled by returning a dense np.matrix instance. Going forward, np.ndarray\nreturns an np.ndarray, as expected.\n\nFor compatibility, user code relying on this method should wrap its\ncalls in ``np.asarray`` to avoid type issues.", + "code": "@_deprecate_positional_args\ndef grid_to_graph(n_x, n_y, n_z=1, *, mask=None, return_as=sparse.coo_matrix,\n dtype=int):\n \"\"\"Graph of the pixel-to-pixel connections\n\n Edges exist if 2 voxels are connected.\n\n Parameters\n ----------\n n_x : int\n Dimension in x axis\n n_y : int\n Dimension in y axis\n n_z : int, default=1\n Dimension in z axis\n mask : ndarray of shape (n_x, n_y, n_z), dtype=bool, default=None\n An optional mask of the image, to consider only part of the\n pixels.\n return_as : np.ndarray or a sparse matrix class, \\\n default=sparse.coo_matrix\n The class to use to build the returned adjacency matrix.\n dtype : dtype, default=int\n The data of the returned sparse matrix. By default it is int\n\n Notes\n -----\n For scikit-learn versions 0.14.1 and prior, return_as=np.ndarray was\n handled by returning a dense np.matrix instance. Going forward, np.ndarray\n returns an np.ndarray, as expected.\n\n For compatibility, user code relying on this method should wrap its\n calls in ``np.asarray`` to avoid type issues.\n \"\"\"\n return _to_graph(n_x, n_y, n_z, mask=mask, return_as=return_as,\n dtype=dtype)" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/img_to_graph", + "name": "img_to_graph", + "qname": "sklearn.feature_extraction.image.img_to_graph", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.image/img_to_graph/img", + "name": "img", + "qname": "sklearn.feature_extraction.image.img_to_graph.img", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (height, width) or (height, width, channel)", + "default_value": "", + "description": "2D or 3D image." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (height, width) or (height, width, channel)" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/img_to_graph/mask", + "name": "mask", + "qname": "sklearn.feature_extraction.image.img_to_graph.mask", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "ndarray of shape (height, width) or (height, width, channel), dtype=bool", + "default_value": "None", + "description": "An optional mask of the image, to consider only part of the\npixels." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray of shape (height, width) or (height, width, channel)" + }, + { + "kind": "NamedType", + "name": "dtype=bool" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/img_to_graph/return_as", + "name": "return_as", + "qname": "sklearn.feature_extraction.image.img_to_graph.return_as", + "default_value": "sparse.coo_matrix", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "np.ndarray or a sparse matrix class", + "default_value": "sparse.coo_matrix", + "description": "The class to use to build the returned adjacency matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "np.ndarray" + }, + { + "kind": "NamedType", + "name": "a sparse matrix class" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/img_to_graph/dtype", + "name": "dtype", + "qname": "sklearn.feature_extraction.image.img_to_graph.dtype", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "dtype", + "default_value": "None", + "description": "The data of the returned sparse matrix. By default it is the\ndtype of img" + }, + "type": { + "kind": "NamedType", + "name": "dtype" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Graph of the pixel-to-pixel gradient connections\n\nEdges are weighted with the gradient values.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Graph of the pixel-to-pixel gradient connections\n\nEdges are weighted with the gradient values.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nimg : ndarray of shape (height, width) or (height, width, channel)\n 2D or 3D image.\nmask : ndarray of shape (height, width) or (height, width, channel), dtype=bool, default=None\n An optional mask of the image, to consider only part of the\n pixels.\nreturn_as : np.ndarray or a sparse matrix class, default=sparse.coo_matrix\n The class to use to build the returned adjacency matrix.\ndtype : dtype, default=None\n The data of the returned sparse matrix. By default it is the\n dtype of img\n\nNotes\n-----\nFor scikit-learn versions 0.14.1 and prior, return_as=np.ndarray was\nhandled by returning a dense np.matrix instance. Going forward, np.ndarray\nreturns an np.ndarray, as expected.\n\nFor compatibility, user code relying on this method should wrap its\ncalls in ``np.asarray`` to avoid type issues.", + "code": "@_deprecate_positional_args\ndef img_to_graph(img, *, mask=None, return_as=sparse.coo_matrix, dtype=None):\n \"\"\"Graph of the pixel-to-pixel gradient connections\n\n Edges are weighted with the gradient values.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n img : ndarray of shape (height, width) or (height, width, channel)\n 2D or 3D image.\n mask : ndarray of shape (height, width) or \\\n (height, width, channel), dtype=bool, default=None\n An optional mask of the image, to consider only part of the\n pixels.\n return_as : np.ndarray or a sparse matrix class, \\\n default=sparse.coo_matrix\n The class to use to build the returned adjacency matrix.\n dtype : dtype, default=None\n The data of the returned sparse matrix. By default it is the\n dtype of img\n\n Notes\n -----\n For scikit-learn versions 0.14.1 and prior, return_as=np.ndarray was\n handled by returning a dense np.matrix instance. Going forward, np.ndarray\n returns an np.ndarray, as expected.\n\n For compatibility, user code relying on this method should wrap its\n calls in ``np.asarray`` to avoid type issues.\n \"\"\"\n img = np.atleast_3d(img)\n n_x, n_y, n_z = img.shape\n return _to_graph(n_x, n_y, n_z, mask, img, return_as, dtype)" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/reconstruct_from_patches_2d", + "name": "reconstruct_from_patches_2d", + "qname": "sklearn.feature_extraction.image.reconstruct_from_patches_2d", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.image/reconstruct_from_patches_2d/patches", + "name": "patches", + "qname": "sklearn.feature_extraction.image.reconstruct_from_patches_2d.patches", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_patches, patch_height, patch_width) or (n_patches, patch_height, patch_width, n_channels)", + "default_value": "", + "description": "The complete set of patches. If the patches contain colour information,\nchannels are indexed along the last dimension: RGB patches would\nhave `n_channels=3`." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_patches, patch_height, patch_width) or (n_patches, patch_height, patch_width, n_channels)" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.image/reconstruct_from_patches_2d/image_size", + "name": "image_size", + "qname": "sklearn.feature_extraction.image.reconstruct_from_patches_2d.image_size", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "tuple of int (image_height, image_width) or (image_height, image_width, n_channels)", + "default_value": "", + "description": "The size of the image that will be reconstructed." + }, + "type": { + "kind": "NamedType", + "name": "tuple of int (image_height, image_width) or (image_height, image_width, n_channels)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Reconstruct the image from all of its patches.\n\nPatches are assumed to overlap and the image is constructed by filling in\nthe patches from left to right, top to bottom, averaging the overlapping\nregions.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Reconstruct the image from all of its patches.\n\nPatches are assumed to overlap and the image is constructed by filling in\nthe patches from left to right, top to bottom, averaging the overlapping\nregions.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\npatches : ndarray of shape (n_patches, patch_height, patch_width) or (n_patches, patch_height, patch_width, n_channels)\n The complete set of patches. If the patches contain colour information,\n channels are indexed along the last dimension: RGB patches would\n have `n_channels=3`.\n\nimage_size : tuple of int (image_height, image_width) or (image_height, image_width, n_channels)\n The size of the image that will be reconstructed.\n\nReturns\n-------\nimage : ndarray of shape image_size\n The reconstructed image.", + "code": "def reconstruct_from_patches_2d(patches, image_size):\n \"\"\"Reconstruct the image from all of its patches.\n\n Patches are assumed to overlap and the image is constructed by filling in\n the patches from left to right, top to bottom, averaging the overlapping\n regions.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n patches : ndarray of shape (n_patches, patch_height, patch_width) or \\\n (n_patches, patch_height, patch_width, n_channels)\n The complete set of patches. If the patches contain colour information,\n channels are indexed along the last dimension: RGB patches would\n have `n_channels=3`.\n\n image_size : tuple of int (image_height, image_width) or \\\n (image_height, image_width, n_channels)\n The size of the image that will be reconstructed.\n\n Returns\n -------\n image : ndarray of shape image_size\n The reconstructed image.\n \"\"\"\n i_h, i_w = image_size[:2]\n p_h, p_w = patches.shape[1:3]\n img = np.zeros(image_size)\n # compute the dimensions of the patches array\n n_h = i_h - p_h + 1\n n_w = i_w - p_w + 1\n for p, (i, j) in zip(patches, product(range(n_h), range(n_w))):\n img[i:i + p_h, j:j + p_w] += p\n\n for i in range(i_h):\n for j in range(i_w):\n # divide by the amount of overlap\n # XXX: is this the most efficient way? memory-wise yes, cpu wise?\n img[i, j] /= float(min(i + 1, p_h, i_h - i) *\n min(j + 1, p_w, i_w - j))\n return img" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.setup/configuration", + "name": "configuration", + "qname": "sklearn.feature_extraction.setup.configuration", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.setup/configuration/parent_package", + "name": "parent_package", + "qname": "sklearn.feature_extraction.setup.configuration.parent_package", + "default_value": "''", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.setup/configuration/top_path", + "name": "top_path", + "qname": "sklearn.feature_extraction.setup.configuration.top_path", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def configuration(parent_package='', top_path=None):\n import numpy\n from numpy.distutils.misc_util import Configuration\n\n config = Configuration('feature_extraction', parent_package, top_path)\n libraries = []\n if os.name == 'posix':\n libraries.append('m')\n\n if platform.python_implementation() != 'PyPy':\n config.add_extension('_hashing_fast',\n sources=['_hashing_fast.pyx'],\n include_dirs=[numpy.get_include()],\n libraries=libraries)\n config.add_subpackage(\"tests\")\n\n return config" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/__init__", + "name": "__init__", + "qname": "sklearn.feature_extraction.text.CountVectorizer.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/__init__/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.CountVectorizer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/__init__/input", + "name": "input", + "qname": "sklearn.feature_extraction.text.CountVectorizer.__init__.input", + "default_value": "'content'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'filename', 'file', 'content'}", + "default_value": "'content'", + "description": "- If `'filename'`, the sequence passed as an argument to fit is\n expected to be a list of filenames that need reading to fetch\n the raw content to analyze.\n\n- If `'file'`, the sequence items must have a 'read' method (file-like\n object) that is called to fetch the bytes in memory.\n\n- If `'content'`, the input is expected to be a sequence of items that\n can be of type string or byte." + }, + "type": { + "kind": "EnumType", + "values": ["filename", "file", "content"] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/__init__/encoding", + "name": "encoding", + "qname": "sklearn.feature_extraction.text.CountVectorizer.__init__.encoding", + "default_value": "'utf-8'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "string", + "default_value": "'utf-8'", + "description": "If bytes or files are given to analyze, this encoding is used to\ndecode." + }, + "type": { + "kind": "NamedType", + "name": "string" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/__init__/decode_error", + "name": "decode_error", + "qname": "sklearn.feature_extraction.text.CountVectorizer.__init__.decode_error", + "default_value": "'strict'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'strict', 'ignore', 'replace'}", + "default_value": "'strict'", + "description": "Instruction on what to do if a byte sequence is given to analyze that\ncontains characters not of the given `encoding`. By default, it is\n'strict', meaning that a UnicodeDecodeError will be raised. Other\nvalues are 'ignore' and 'replace'." + }, + "type": { + "kind": "EnumType", + "values": ["strict", "replace", "ignore"] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/__init__/strip_accents", + "name": "strip_accents", + "qname": "sklearn.feature_extraction.text.CountVectorizer.__init__.strip_accents", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'ascii', 'unicode'}", + "default_value": "None", + "description": "Remove accents and perform other character normalization\nduring the preprocessing step.\n'ascii' is a fast method that only works on characters that have\nan direct ASCII mapping.\n'unicode' is a slightly slower method that works on any characters.\nNone (default) does nothing.\n\nBoth 'ascii' and 'unicode' use NFKD normalization from\n:func:`unicodedata.normalize`." + }, + "type": { + "kind": "EnumType", + "values": ["unicode", "ascii"] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/__init__/lowercase", + "name": "lowercase", + "qname": "sklearn.feature_extraction.text.CountVectorizer.__init__.lowercase", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Convert all characters to lowercase before tokenizing." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/__init__/preprocessor", + "name": "preprocessor", + "qname": "sklearn.feature_extraction.text.CountVectorizer.__init__.preprocessor", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "callable", + "default_value": "None", + "description": "Override the preprocessing (strip_accents and lowercase) stage while\npreserving the tokenizing and n-grams generation steps.\nOnly applies if ``analyzer is not callable``." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/__init__/tokenizer", + "name": "tokenizer", + "qname": "sklearn.feature_extraction.text.CountVectorizer.__init__.tokenizer", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "callable", + "default_value": "None", + "description": "Override the string tokenization step while preserving the\npreprocessing and n-grams generation steps.\nOnly applies if ``analyzer == 'word'``." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/__init__/stop_words", + "name": "stop_words", + "qname": "sklearn.feature_extraction.text.CountVectorizer.__init__.stop_words", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'english'}, list", + "default_value": "None", + "description": "If 'english', a built-in stop word list for English is used.\nThere are several known issues with 'english' and you should\nconsider an alternative (see :ref:`stop_words`).\n\nIf a list, that list is assumed to contain stop words, all of which\nwill be removed from the resulting tokens.\nOnly applies if ``analyzer == 'word'``.\n\nIf None, no stop words will be used. max_df can be set to a value\nin the range [0.7, 1.0) to automatically detect and filter stop\nwords based on intra corpus document frequency of terms." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["english"] + }, + { + "kind": "NamedType", + "name": "list" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/__init__/token_pattern", + "name": "token_pattern", + "qname": "sklearn.feature_extraction.text.CountVectorizer.__init__.token_pattern", + "default_value": "'(?u)\\\\b\\\\w\\\\w+\\\\b'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "str", + "default_value": "r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"", + "description": "Regular expression denoting what constitutes a \"token\", only used\nif ``analyzer == 'word'``. The default regexp select tokens of 2\nor more alphanumeric characters (punctuation is completely ignored\nand always treated as a token separator).\n\nIf there is a capturing group in token_pattern then the\ncaptured group content, not the entire match, becomes the token.\nAt most one capturing group is permitted." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/__init__/ngram_range", + "name": "ngram_range", + "qname": "sklearn.feature_extraction.text.CountVectorizer.__init__.ngram_range", + "default_value": "(1, 1)", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "tuple (min_n, max_n)", + "default_value": "(1, 1)", + "description": "The lower and upper boundary of the range of n-values for different\nword n-grams or char n-grams to be extracted. All values of n such\nsuch that min_n <= n <= max_n will be used. For example an\n``ngram_range`` of ``(1, 1)`` means only unigrams, ``(1, 2)`` means\nunigrams and bigrams, and ``(2, 2)`` means only bigrams.\nOnly applies if ``analyzer is not callable``." + }, + "type": { + "kind": "NamedType", + "name": "tuple (min_n, max_n)" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/__init__/analyzer", + "name": "analyzer", + "qname": "sklearn.feature_extraction.text.CountVectorizer.__init__.analyzer", + "default_value": "'word'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'word', 'char', 'char_wb'} or callable", + "default_value": "'word'", + "description": "Whether the feature should be made of word n-gram or character\nn-grams.\nOption 'char_wb' creates character n-grams only from text inside\nword boundaries; n-grams at the edges of words are padded with space.\n\nIf a callable is passed it is used to extract the sequence of features\nout of the raw, unprocessed input.\n\n.. versionchanged:: 0.21\n\nSince v0.21, if ``input`` is ``filename`` or ``file``, the data is\nfirst read from the file and then passed to the given callable\nanalyzer." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["char_wb", "word", "char"] + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/__init__/max_df", + "name": "max_df", + "qname": "sklearn.feature_extraction.text.CountVectorizer.__init__.max_df", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float in range [0.0, 1.0] or int", + "default_value": "1.0", + "description": "When building the vocabulary ignore terms that have a document\nfrequency strictly higher than the given threshold (corpus-specific\nstop words).\nIf float, the parameter represents a proportion of documents, integer\nabsolute counts.\nThis parameter is ignored if vocabulary is not None." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float in range [0.0" + }, + { + "kind": "NamedType", + "name": "1.0]" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/__init__/min_df", + "name": "min_df", + "qname": "sklearn.feature_extraction.text.CountVectorizer.__init__.min_df", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float in range [0.0, 1.0] or int", + "default_value": "1", + "description": "When building the vocabulary ignore terms that have a document\nfrequency strictly lower than the given threshold. This value is also\ncalled cut-off in the literature.\nIf float, the parameter represents a proportion of documents, integer\nabsolute counts.\nThis parameter is ignored if vocabulary is not None." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float in range [0.0" + }, + { + "kind": "NamedType", + "name": "1.0]" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/__init__/max_features", + "name": "max_features", + "qname": "sklearn.feature_extraction.text.CountVectorizer.__init__.max_features", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "None", + "description": "If not None, build a vocabulary that only consider the top\nmax_features ordered by term frequency across the corpus.\n\nThis parameter is ignored if vocabulary is not None." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/__init__/vocabulary", + "name": "vocabulary", + "qname": "sklearn.feature_extraction.text.CountVectorizer.__init__.vocabulary", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "Mapping or iterable", + "default_value": "None", + "description": "Either a Mapping (e.g., a dict) where keys are terms and values are\nindices in the feature matrix, or an iterable over terms. If not\ngiven, a vocabulary is determined from the input documents. Indices\nin the mapping should not be repeated and should not have any gap\nbetween 0 and the largest index." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "Mapping" + }, + { + "kind": "NamedType", + "name": "iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/__init__/binary", + "name": "binary", + "qname": "sklearn.feature_extraction.text.CountVectorizer.__init__.binary", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, all non zero counts are set to 1. This is useful for discrete\nprobabilistic models that model binary events rather than integer\ncounts." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/__init__/dtype", + "name": "dtype", + "qname": "sklearn.feature_extraction.text.CountVectorizer.__init__.dtype", + "default_value": "np.int64", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "type", + "default_value": "np.int64", + "description": "Type of the matrix returned by fit_transform() or transform()." + }, + "type": { + "kind": "NamedType", + "name": "type" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Convert a collection of text documents to a matrix of token counts\n\nThis implementation produces a sparse representation of the counts using\nscipy.sparse.csr_matrix.\n\nIf you do not provide an a-priori dictionary and you do not use an analyzer\nthat does some kind of feature selection then the number of features will\nbe equal to the vocabulary size found by analyzing the data.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, input='content', encoding='utf-8',\n decode_error='strict', strip_accents=None,\n lowercase=True, preprocessor=None, tokenizer=None,\n stop_words=None, token_pattern=r\"(?u)\\b\\w\\w+\\b\",\n ngram_range=(1, 1), analyzer='word',\n max_df=1.0, min_df=1, max_features=None,\n vocabulary=None, binary=False, dtype=np.int64):\n self.input = input\n self.encoding = encoding\n self.decode_error = decode_error\n self.strip_accents = strip_accents\n self.preprocessor = preprocessor\n self.tokenizer = tokenizer\n self.analyzer = analyzer\n self.lowercase = lowercase\n self.token_pattern = token_pattern\n self.stop_words = stop_words\n self.max_df = max_df\n self.min_df = min_df\n if max_df < 0 or min_df < 0:\n raise ValueError(\"negative value for max_df or min_df\")\n self.max_features = max_features\n if max_features is not None:\n if (not isinstance(max_features, numbers.Integral) or\n max_features <= 0):\n raise ValueError(\n \"max_features=%r, neither a positive integer nor None\"\n % max_features)\n self.ngram_range = ngram_range\n self.vocabulary = vocabulary\n self.binary = binary\n self.dtype = dtype" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/_count_vocab", + "name": "_count_vocab", + "qname": "sklearn.feature_extraction.text.CountVectorizer._count_vocab", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/_count_vocab/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.CountVectorizer._count_vocab.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/_count_vocab/raw_documents", + "name": "raw_documents", + "qname": "sklearn.feature_extraction.text.CountVectorizer._count_vocab.raw_documents", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/_count_vocab/fixed_vocab", + "name": "fixed_vocab", + "qname": "sklearn.feature_extraction.text.CountVectorizer._count_vocab.fixed_vocab", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Create sparse feature matrix, and vocabulary where fixed_vocab=False", + "docstring": "Create sparse feature matrix, and vocabulary where fixed_vocab=False\n ", + "code": " def _count_vocab(self, raw_documents, fixed_vocab):\n \"\"\"Create sparse feature matrix, and vocabulary where fixed_vocab=False\n \"\"\"\n if fixed_vocab:\n vocabulary = self.vocabulary_\n else:\n # Add a new value when a new vocabulary item is seen\n vocabulary = defaultdict()\n vocabulary.default_factory = vocabulary.__len__\n\n analyze = self.build_analyzer()\n j_indices = []\n indptr = []\n\n values = _make_int_array()\n indptr.append(0)\n for doc in raw_documents:\n feature_counter = {}\n for feature in analyze(doc):\n try:\n feature_idx = vocabulary[feature]\n if feature_idx not in feature_counter:\n feature_counter[feature_idx] = 1\n else:\n feature_counter[feature_idx] += 1\n except KeyError:\n # Ignore out-of-vocabulary items for fixed_vocab=True\n continue\n\n j_indices.extend(feature_counter.keys())\n values.extend(feature_counter.values())\n indptr.append(len(j_indices))\n\n if not fixed_vocab:\n # disable defaultdict behaviour\n vocabulary = dict(vocabulary)\n if not vocabulary:\n raise ValueError(\"empty vocabulary; perhaps the documents only\"\n \" contain stop words\")\n\n if indptr[-1] > np.iinfo(np.int32).max: # = 2**31 - 1\n if _IS_32BIT:\n raise ValueError(('sparse CSR array has {} non-zero '\n 'elements and requires 64 bit indexing, '\n 'which is unsupported with 32 bit Python.')\n .format(indptr[-1]))\n indices_dtype = np.int64\n\n else:\n indices_dtype = np.int32\n j_indices = np.asarray(j_indices, dtype=indices_dtype)\n indptr = np.asarray(indptr, dtype=indices_dtype)\n values = np.frombuffer(values, dtype=np.intc)\n\n X = sp.csr_matrix((values, j_indices, indptr),\n shape=(len(indptr) - 1, len(vocabulary)),\n dtype=self.dtype)\n X.sort_indices()\n return vocabulary, X" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/_limit_features", + "name": "_limit_features", + "qname": "sklearn.feature_extraction.text.CountVectorizer._limit_features", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/_limit_features/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.CountVectorizer._limit_features.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/_limit_features/X", + "name": "X", + "qname": "sklearn.feature_extraction.text.CountVectorizer._limit_features.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/_limit_features/vocabulary", + "name": "vocabulary", + "qname": "sklearn.feature_extraction.text.CountVectorizer._limit_features.vocabulary", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/_limit_features/high", + "name": "high", + "qname": "sklearn.feature_extraction.text.CountVectorizer._limit_features.high", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/_limit_features/low", + "name": "low", + "qname": "sklearn.feature_extraction.text.CountVectorizer._limit_features.low", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/_limit_features/limit", + "name": "limit", + "qname": "sklearn.feature_extraction.text.CountVectorizer._limit_features.limit", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Remove too rare or too common features.\n\nPrune features that are non zero in more samples than high or less\ndocuments than low, modifying the vocabulary, and restricting it to\nat most the limit most frequent.\n\nThis does not prune samples with zero features.", + "docstring": "Remove too rare or too common features.\n\nPrune features that are non zero in more samples than high or less\ndocuments than low, modifying the vocabulary, and restricting it to\nat most the limit most frequent.\n\nThis does not prune samples with zero features.", + "code": " def _limit_features(self, X, vocabulary, high=None, low=None,\n limit=None):\n \"\"\"Remove too rare or too common features.\n\n Prune features that are non zero in more samples than high or less\n documents than low, modifying the vocabulary, and restricting it to\n at most the limit most frequent.\n\n This does not prune samples with zero features.\n \"\"\"\n if high is None and low is None and limit is None:\n return X, set()\n\n # Calculate a mask based on document frequencies\n dfs = _document_frequency(X)\n mask = np.ones(len(dfs), dtype=bool)\n if high is not None:\n mask &= dfs <= high\n if low is not None:\n mask &= dfs >= low\n if limit is not None and mask.sum() > limit:\n tfs = np.asarray(X.sum(axis=0)).ravel()\n mask_inds = (-tfs[mask]).argsort()[:limit]\n new_mask = np.zeros(len(dfs), dtype=bool)\n new_mask[np.where(mask)[0][mask_inds]] = True\n mask = new_mask\n\n new_indices = np.cumsum(mask) - 1 # maps old indices to new\n removed_terms = set()\n for term, old_index in list(vocabulary.items()):\n if mask[old_index]:\n vocabulary[term] = new_indices[old_index]\n else:\n del vocabulary[term]\n removed_terms.add(term)\n kept_indices = np.where(mask)[0]\n if len(kept_indices) == 0:\n raise ValueError(\"After pruning, no terms remain. Try a lower\"\n \" min_df or a higher max_df.\")\n return X[:, kept_indices], removed_terms" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/_more_tags", + "name": "_more_tags", + "qname": "sklearn.feature_extraction.text.CountVectorizer._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/_more_tags/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.CountVectorizer._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'X_types': ['string']}" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/_sort_features", + "name": "_sort_features", + "qname": "sklearn.feature_extraction.text.CountVectorizer._sort_features", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/_sort_features/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.CountVectorizer._sort_features.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/_sort_features/X", + "name": "X", + "qname": "sklearn.feature_extraction.text.CountVectorizer._sort_features.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/_sort_features/vocabulary", + "name": "vocabulary", + "qname": "sklearn.feature_extraction.text.CountVectorizer._sort_features.vocabulary", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Sort features by name\n\nReturns a reordered matrix and modifies the vocabulary in place", + "docstring": "Sort features by name\n\nReturns a reordered matrix and modifies the vocabulary in place", + "code": " def _sort_features(self, X, vocabulary):\n \"\"\"Sort features by name\n\n Returns a reordered matrix and modifies the vocabulary in place\n \"\"\"\n sorted_features = sorted(vocabulary.items())\n map_index = np.empty(len(sorted_features), dtype=X.indices.dtype)\n for new_val, (term, old_val) in enumerate(sorted_features):\n vocabulary[term] = new_val\n map_index[old_val] = new_val\n\n X.indices = map_index.take(X.indices, mode='clip')\n return X" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/fit", + "name": "fit", + "qname": "sklearn.feature_extraction.text.CountVectorizer.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/fit/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.CountVectorizer.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/fit/raw_documents", + "name": "raw_documents", + "qname": "sklearn.feature_extraction.text.CountVectorizer.fit.raw_documents", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable", + "default_value": "", + "description": "An iterable which yields either str, unicode or file objects." + }, + "type": { + "kind": "NamedType", + "name": "iterable" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/fit/y", + "name": "y", + "qname": "sklearn.feature_extraction.text.CountVectorizer.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Learn a vocabulary dictionary of all tokens in the raw documents.", + "docstring": "Learn a vocabulary dictionary of all tokens in the raw documents.\n\nParameters\n----------\nraw_documents : iterable\n An iterable which yields either str, unicode or file objects.\n\nReturns\n-------\nself", + "code": " def fit(self, raw_documents, y=None):\n \"\"\"Learn a vocabulary dictionary of all tokens in the raw documents.\n\n Parameters\n ----------\n raw_documents : iterable\n An iterable which yields either str, unicode or file objects.\n\n Returns\n -------\n self\n \"\"\"\n self._warn_for_unused_params()\n self.fit_transform(raw_documents)\n return self" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/fit_transform", + "name": "fit_transform", + "qname": "sklearn.feature_extraction.text.CountVectorizer.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/fit_transform/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.CountVectorizer.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/fit_transform/raw_documents", + "name": "raw_documents", + "qname": "sklearn.feature_extraction.text.CountVectorizer.fit_transform.raw_documents", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable", + "default_value": "", + "description": "An iterable which yields either str, unicode or file objects." + }, + "type": { + "kind": "NamedType", + "name": "iterable" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/fit_transform/y", + "name": "y", + "qname": "sklearn.feature_extraction.text.CountVectorizer.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Learn the vocabulary dictionary and return document-term matrix.\n\nThis is equivalent to fit followed by transform, but more efficiently\nimplemented.", + "docstring": "Learn the vocabulary dictionary and return document-term matrix.\n\nThis is equivalent to fit followed by transform, but more efficiently\nimplemented.\n\nParameters\n----------\nraw_documents : iterable\n An iterable which yields either str, unicode or file objects.\n\nReturns\n-------\nX : array of shape (n_samples, n_features)\n Document-term matrix.", + "code": " def fit_transform(self, raw_documents, y=None):\n \"\"\"Learn the vocabulary dictionary and return document-term matrix.\n\n This is equivalent to fit followed by transform, but more efficiently\n implemented.\n\n Parameters\n ----------\n raw_documents : iterable\n An iterable which yields either str, unicode or file objects.\n\n Returns\n -------\n X : array of shape (n_samples, n_features)\n Document-term matrix.\n \"\"\"\n # We intentionally don't call the transform method to make\n # fit_transform overridable without unwanted side effects in\n # TfidfVectorizer.\n if isinstance(raw_documents, str):\n raise ValueError(\n \"Iterable over raw text documents expected, \"\n \"string object received.\")\n\n self._validate_params()\n self._validate_vocabulary()\n max_df = self.max_df\n min_df = self.min_df\n max_features = self.max_features\n\n vocabulary, X = self._count_vocab(raw_documents,\n self.fixed_vocabulary_)\n\n if self.binary:\n X.data.fill(1)\n\n if not self.fixed_vocabulary_:\n n_doc = X.shape[0]\n max_doc_count = (max_df\n if isinstance(max_df, numbers.Integral)\n else max_df * n_doc)\n min_doc_count = (min_df\n if isinstance(min_df, numbers.Integral)\n else min_df * n_doc)\n if max_doc_count < min_doc_count:\n raise ValueError(\n \"max_df corresponds to < documents than min_df\")\n if max_features is not None:\n X = self._sort_features(X, vocabulary)\n X, self.stop_words_ = self._limit_features(X, vocabulary,\n max_doc_count,\n min_doc_count,\n max_features)\n if max_features is None:\n X = self._sort_features(X, vocabulary)\n self.vocabulary_ = vocabulary\n\n return X" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/get_feature_names", + "name": "get_feature_names", + "qname": "sklearn.feature_extraction.text.CountVectorizer.get_feature_names", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/get_feature_names/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.CountVectorizer.get_feature_names.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Array mapping from feature integer indices to feature name.", + "docstring": "Array mapping from feature integer indices to feature name.\n\nReturns\n-------\nfeature_names : list\n A list of feature names.", + "code": " def get_feature_names(self):\n \"\"\"Array mapping from feature integer indices to feature name.\n\n Returns\n -------\n feature_names : list\n A list of feature names.\n \"\"\"\n\n self._check_vocabulary()\n\n return [t for t, i in sorted(self.vocabulary_.items(),\n key=itemgetter(1))]" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.feature_extraction.text.CountVectorizer.inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/inverse_transform/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.CountVectorizer.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/inverse_transform/X", + "name": "X", + "qname": "sklearn.feature_extraction.text.CountVectorizer.inverse_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Document-term matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return terms per document with nonzero entries in X.", + "docstring": "Return terms per document with nonzero entries in X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document-term matrix.\n\nReturns\n-------\nX_inv : list of arrays of shape (n_samples,)\n List of arrays of terms.", + "code": " def inverse_transform(self, X):\n \"\"\"Return terms per document with nonzero entries in X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Document-term matrix.\n\n Returns\n -------\n X_inv : list of arrays of shape (n_samples,)\n List of arrays of terms.\n \"\"\"\n self._check_vocabulary()\n # We need CSR format for fast row manipulations.\n X = check_array(X, accept_sparse='csr')\n n_samples = X.shape[0]\n\n terms = np.array(list(self.vocabulary_.keys()))\n indices = np.array(list(self.vocabulary_.values()))\n inverse_vocabulary = terms[np.argsort(indices)]\n\n if sp.issparse(X):\n return [inverse_vocabulary[X[i, :].nonzero()[1]].ravel()\n for i in range(n_samples)]\n else:\n return [inverse_vocabulary[np.flatnonzero(X[i, :])].ravel()\n for i in range(n_samples)]" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/transform", + "name": "transform", + "qname": "sklearn.feature_extraction.text.CountVectorizer.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/transform/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.CountVectorizer.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/CountVectorizer/transform/raw_documents", + "name": "raw_documents", + "qname": "sklearn.feature_extraction.text.CountVectorizer.transform.raw_documents", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable", + "default_value": "", + "description": "An iterable which yields either str, unicode or file objects." + }, + "type": { + "kind": "NamedType", + "name": "iterable" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Transform documents to document-term matrix.\n\nExtract token counts out of raw text documents using the vocabulary\nfitted with fit or the one provided to the constructor.", + "docstring": "Transform documents to document-term matrix.\n\nExtract token counts out of raw text documents using the vocabulary\nfitted with fit or the one provided to the constructor.\n\nParameters\n----------\nraw_documents : iterable\n An iterable which yields either str, unicode or file objects.\n\nReturns\n-------\nX : sparse matrix of shape (n_samples, n_features)\n Document-term matrix.", + "code": " def transform(self, raw_documents):\n \"\"\"Transform documents to document-term matrix.\n\n Extract token counts out of raw text documents using the vocabulary\n fitted with fit or the one provided to the constructor.\n\n Parameters\n ----------\n raw_documents : iterable\n An iterable which yields either str, unicode or file objects.\n\n Returns\n -------\n X : sparse matrix of shape (n_samples, n_features)\n Document-term matrix.\n \"\"\"\n if isinstance(raw_documents, str):\n raise ValueError(\n \"Iterable over raw text documents expected, \"\n \"string object received.\")\n self._check_vocabulary()\n\n # use the same matrix-building strategy as fit_transform\n _, X = self._count_vocab(raw_documents, fixed_vocab=True)\n if self.binary:\n X.data.fill(1)\n return X" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/__init__", + "name": "__init__", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/__init__/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/__init__/input", + "name": "input", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.__init__.input", + "default_value": "'content'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'filename', 'file', 'content'}", + "default_value": "'content'", + "description": "- If `'filename'`, the sequence passed as an argument to fit is\n expected to be a list of filenames that need reading to fetch\n the raw content to analyze.\n\n- If `'file'`, the sequence items must have a 'read' method (file-like\n object) that is called to fetch the bytes in memory.\n\n- If `'content'`, the input is expected to be a sequence of items that\n can be of type string or byte." + }, + "type": { + "kind": "EnumType", + "values": ["filename", "file", "content"] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/__init__/encoding", + "name": "encoding", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.__init__.encoding", + "default_value": "'utf-8'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "string", + "default_value": "'utf-8'", + "description": "If bytes or files are given to analyze, this encoding is used to\ndecode." + }, + "type": { + "kind": "NamedType", + "name": "string" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/__init__/decode_error", + "name": "decode_error", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.__init__.decode_error", + "default_value": "'strict'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'strict', 'ignore', 'replace'}", + "default_value": "'strict'", + "description": "Instruction on what to do if a byte sequence is given to analyze that\ncontains characters not of the given `encoding`. By default, it is\n'strict', meaning that a UnicodeDecodeError will be raised. Other\nvalues are 'ignore' and 'replace'." + }, + "type": { + "kind": "EnumType", + "values": ["strict", "replace", "ignore"] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/__init__/strip_accents", + "name": "strip_accents", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.__init__.strip_accents", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'ascii', 'unicode'}", + "default_value": "None", + "description": "Remove accents and perform other character normalization\nduring the preprocessing step.\n'ascii' is a fast method that only works on characters that have\nan direct ASCII mapping.\n'unicode' is a slightly slower method that works on any characters.\nNone (default) does nothing.\n\nBoth 'ascii' and 'unicode' use NFKD normalization from\n:func:`unicodedata.normalize`." + }, + "type": { + "kind": "EnumType", + "values": ["unicode", "ascii"] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/__init__/lowercase", + "name": "lowercase", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.__init__.lowercase", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Convert all characters to lowercase before tokenizing." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/__init__/preprocessor", + "name": "preprocessor", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.__init__.preprocessor", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "callable", + "default_value": "None", + "description": "Override the preprocessing (string transformation) stage while\npreserving the tokenizing and n-grams generation steps.\nOnly applies if ``analyzer is not callable``." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/__init__/tokenizer", + "name": "tokenizer", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.__init__.tokenizer", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "callable", + "default_value": "None", + "description": "Override the string tokenization step while preserving the\npreprocessing and n-grams generation steps.\nOnly applies if ``analyzer == 'word'``." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/__init__/stop_words", + "name": "stop_words", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.__init__.stop_words", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'english'}, list", + "default_value": "None", + "description": "If 'english', a built-in stop word list for English is used.\nThere are several known issues with 'english' and you should\nconsider an alternative (see :ref:`stop_words`).\n\nIf a list, that list is assumed to contain stop words, all of which\nwill be removed from the resulting tokens.\nOnly applies if ``analyzer == 'word'``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["english"] + }, + { + "kind": "NamedType", + "name": "list" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/__init__/token_pattern", + "name": "token_pattern", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.__init__.token_pattern", + "default_value": "'(?u)\\\\b\\\\w\\\\w+\\\\b'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "str", + "default_value": "r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"", + "description": "Regular expression denoting what constitutes a \"token\", only used\nif ``analyzer == 'word'``. The default regexp selects tokens of 2\nor more alphanumeric characters (punctuation is completely ignored\nand always treated as a token separator).\n\nIf there is a capturing group in token_pattern then the\ncaptured group content, not the entire match, becomes the token.\nAt most one capturing group is permitted." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/__init__/ngram_range", + "name": "ngram_range", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.__init__.ngram_range", + "default_value": "(1, 1)", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "tuple (min_n, max_n)", + "default_value": "(1, 1)", + "description": "The lower and upper boundary of the range of n-values for different\nn-grams to be extracted. All values of n such that min_n <= n <= max_n\nwill be used. For example an ``ngram_range`` of ``(1, 1)`` means only\nunigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means\nonly bigrams.\nOnly applies if ``analyzer is not callable``." + }, + "type": { + "kind": "NamedType", + "name": "tuple (min_n, max_n)" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/__init__/analyzer", + "name": "analyzer", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.__init__.analyzer", + "default_value": "'word'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'word', 'char', 'char_wb'} or callable", + "default_value": "'word'", + "description": "Whether the feature should be made of word or character n-grams.\nOption 'char_wb' creates character n-grams only from text inside\nword boundaries; n-grams at the edges of words are padded with space.\n\nIf a callable is passed it is used to extract the sequence of features\nout of the raw, unprocessed input.\n\n.. versionchanged:: 0.21\n Since v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data\n is first read from the file and then passed to the given callable\n analyzer." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["char_wb", "word", "char"] + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/__init__/n_features", + "name": "n_features", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.__init__.n_features", + "default_value": "2**20", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "(2 ** 20)", + "description": "The number of features (columns) in the output matrices. Small numbers\nof features are likely to cause hash collisions, but large numbers\nwill cause larger coefficient dimensions in linear learners." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/__init__/binary", + "name": "binary", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.__init__.binary", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False.", + "description": "If True, all non zero counts are set to 1. This is useful for discrete\nprobabilistic models that model binary events rather than integer\ncounts." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/__init__/norm", + "name": "norm", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.__init__.norm", + "default_value": "'l2'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'l1', 'l2'}", + "default_value": "'l2'", + "description": "Norm used to normalize term vectors. None for no normalization." + }, + "type": { + "kind": "EnumType", + "values": ["l2", "l1"] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/__init__/alternate_sign", + "name": "alternate_sign", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.__init__.alternate_sign", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "When True, an alternating sign is added to the features as to\napproximately conserve the inner product in the hashed space even for\nsmall n_features. This approach is similar to sparse random projection.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/__init__/dtype", + "name": "dtype", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.__init__.dtype", + "default_value": "np.float64", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "type", + "default_value": "np.float64", + "description": "Type of the matrix returned by fit_transform() or transform()." + }, + "type": { + "kind": "NamedType", + "name": "type" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Convert a collection of text documents to a matrix of token occurrences\n\nIt turns a collection of text documents into a scipy.sparse matrix holding\ntoken occurrence counts (or binary occurrence information), possibly\nnormalized as token frequencies if norm='l1' or projected on the euclidean\nunit sphere if norm='l2'.\n\nThis text vectorizer implementation uses the hashing trick to find the\ntoken string name to feature integer index mapping.\n\nThis strategy has several advantages:\n\n- it is very low memory scalable to large datasets as there is no need to\n store a vocabulary dictionary in memory\n\n- it is fast to pickle and un-pickle as it holds no state besides the\n constructor parameters\n\n- it can be used in a streaming (partial fit) or parallel pipeline as there\n is no state computed during fit.\n\nThere are also a couple of cons (vs using a CountVectorizer with an\nin-memory vocabulary):\n\n- there is no way to compute the inverse transform (from feature indices to\n string feature names) which can be a problem when trying to introspect\n which features are most important to a model.\n\n- there can be collisions: distinct tokens can be mapped to the same\n feature index. However in practice this is rarely an issue if n_features\n is large enough (e.g. 2 ** 18 for text classification problems).\n\n- no IDF weighting as this would render the transformer stateful.\n\nThe hash function employed is the signed 32-bit version of Murmurhash3.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, input='content', encoding='utf-8',\n decode_error='strict', strip_accents=None,\n lowercase=True, preprocessor=None, tokenizer=None,\n stop_words=None, token_pattern=r\"(?u)\\b\\w\\w+\\b\",\n ngram_range=(1, 1), analyzer='word', n_features=(2 ** 20),\n binary=False, norm='l2', alternate_sign=True,\n dtype=np.float64):\n self.input = input\n self.encoding = encoding\n self.decode_error = decode_error\n self.strip_accents = strip_accents\n self.preprocessor = preprocessor\n self.tokenizer = tokenizer\n self.analyzer = analyzer\n self.lowercase = lowercase\n self.token_pattern = token_pattern\n self.stop_words = stop_words\n self.n_features = n_features\n self.ngram_range = ngram_range\n self.binary = binary\n self.norm = norm\n self.alternate_sign = alternate_sign\n self.dtype = dtype" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/_get_hasher", + "name": "_get_hasher", + "qname": "sklearn.feature_extraction.text.HashingVectorizer._get_hasher", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/_get_hasher/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.HashingVectorizer._get_hasher.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_hasher(self):\n return FeatureHasher(n_features=self.n_features,\n input_type='string', dtype=self.dtype,\n alternate_sign=self.alternate_sign)" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/_more_tags", + "name": "_more_tags", + "qname": "sklearn.feature_extraction.text.HashingVectorizer._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/_more_tags/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.HashingVectorizer._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'X_types': ['string']}" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/fit", + "name": "fit", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/fit/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/fit/X", + "name": "X", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape [n_samples, n_features]", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray of shape [n_samples" + }, + { + "kind": "NamedType", + "name": "n_features]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/fit/y", + "name": "y", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Does nothing: this transformer is stateless.", + "docstring": "Does nothing: this transformer is stateless.\n\nParameters\n----------\nX : ndarray of shape [n_samples, n_features]\n Training data.", + "code": " def fit(self, X, y=None):\n \"\"\"Does nothing: this transformer is stateless.\n\n Parameters\n ----------\n X : ndarray of shape [n_samples, n_features]\n Training data.\n \"\"\"\n # triggers a parameter validation\n if isinstance(X, str):\n raise ValueError(\n \"Iterable over raw text documents expected, \"\n \"string object received.\")\n\n self._warn_for_unused_params()\n self._validate_params()\n\n self._get_hasher().fit(X, y=y)\n return self" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/fit_transform", + "name": "fit_transform", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/fit_transform/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/fit_transform/X", + "name": "X", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable over raw text documents, length = n_samples", + "default_value": "", + "description": "Samples. Each sample must be a text document (either bytes or\nunicode strings, file name or file object depending on the\nconstructor argument) which will be tokenized and hashed." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "iterable over raw text documents" + }, + { + "kind": "NamedType", + "name": "length = n_samples" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/fit_transform/y", + "name": "y", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "any", + "default_value": "", + "description": "Ignored. This parameter exists only for compatibility with\nsklearn.pipeline.Pipeline." + }, + "type": { + "kind": "NamedType", + "name": "any" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Transform a sequence of documents to a document-term matrix.", + "docstring": "Transform a sequence of documents to a document-term matrix.\n\nParameters\n----------\nX : iterable over raw text documents, length = n_samples\n Samples. Each sample must be a text document (either bytes or\n unicode strings, file name or file object depending on the\n constructor argument) which will be tokenized and hashed.\ny : any\n Ignored. This parameter exists only for compatibility with\n sklearn.pipeline.Pipeline.\n\nReturns\n-------\nX : sparse matrix of shape (n_samples, n_features)\n Document-term matrix.", + "code": " def fit_transform(self, X, y=None):\n \"\"\"Transform a sequence of documents to a document-term matrix.\n\n Parameters\n ----------\n X : iterable over raw text documents, length = n_samples\n Samples. Each sample must be a text document (either bytes or\n unicode strings, file name or file object depending on the\n constructor argument) which will be tokenized and hashed.\n y : any\n Ignored. This parameter exists only for compatibility with\n sklearn.pipeline.Pipeline.\n\n Returns\n -------\n X : sparse matrix of shape (n_samples, n_features)\n Document-term matrix.\n \"\"\"\n return self.fit(X, y).transform(X)" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/partial_fit", + "name": "partial_fit", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.partial_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/partial_fit/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/partial_fit/X", + "name": "X", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape [n_samples, n_features]", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray of shape [n_samples" + }, + { + "kind": "NamedType", + "name": "n_features]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/partial_fit/y", + "name": "y", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.partial_fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Does nothing: this transformer is stateless.\n\nThis method is just there to mark the fact that this transformer\ncan work in a streaming setup.", + "docstring": "Does nothing: this transformer is stateless.\n\nThis method is just there to mark the fact that this transformer\ncan work in a streaming setup.\n\nParameters\n----------\nX : ndarray of shape [n_samples, n_features]\n Training data.", + "code": " def partial_fit(self, X, y=None):\n \"\"\"Does nothing: this transformer is stateless.\n\n This method is just there to mark the fact that this transformer\n can work in a streaming setup.\n\n Parameters\n ----------\n X : ndarray of shape [n_samples, n_features]\n Training data.\n \"\"\"\n return self" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/transform", + "name": "transform", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/transform/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/HashingVectorizer/transform/X", + "name": "X", + "qname": "sklearn.feature_extraction.text.HashingVectorizer.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable over raw text documents, length = n_samples", + "default_value": "", + "description": "Samples. Each sample must be a text document (either bytes or\nunicode strings, file name or file object depending on the\nconstructor argument) which will be tokenized and hashed." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "iterable over raw text documents" + }, + { + "kind": "NamedType", + "name": "length = n_samples" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Transform a sequence of documents to a document-term matrix.", + "docstring": "Transform a sequence of documents to a document-term matrix.\n\nParameters\n----------\nX : iterable over raw text documents, length = n_samples\n Samples. Each sample must be a text document (either bytes or\n unicode strings, file name or file object depending on the\n constructor argument) which will be tokenized and hashed.\n\nReturns\n-------\nX : sparse matrix of shape (n_samples, n_features)\n Document-term matrix.", + "code": " def transform(self, X):\n \"\"\"Transform a sequence of documents to a document-term matrix.\n\n Parameters\n ----------\n X : iterable over raw text documents, length = n_samples\n Samples. Each sample must be a text document (either bytes or\n unicode strings, file name or file object depending on the\n constructor argument) which will be tokenized and hashed.\n\n Returns\n -------\n X : sparse matrix of shape (n_samples, n_features)\n Document-term matrix.\n \"\"\"\n if isinstance(X, str):\n raise ValueError(\n \"Iterable over raw text documents expected, \"\n \"string object received.\")\n\n self._validate_params()\n\n analyzer = self.build_analyzer()\n X = self._get_hasher().transform(analyzer(doc) for doc in X)\n if self.binary:\n X.data.fill(1)\n if self.norm is not None:\n X = normalize(X, norm=self.norm, copy=False)\n return X" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/__init__", + "name": "__init__", + "qname": "sklearn.feature_extraction.text.TfidfTransformer.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/__init__/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.TfidfTransformer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/__init__/norm", + "name": "norm", + "qname": "sklearn.feature_extraction.text.TfidfTransformer.__init__.norm", + "default_value": "'l2'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'l1', 'l2'}", + "default_value": "'l2'", + "description": "Each output row will have unit norm, either:\n* 'l2': Sum of squares of vector elements is 1. The cosine\nsimilarity between two vectors is their dot product when l2 norm has\nbeen applied.\n* 'l1': Sum of absolute values of vector elements is 1.\nSee :func:`preprocessing.normalize`" + }, + "type": { + "kind": "EnumType", + "values": ["l2", "l1"] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/__init__/use_idf", + "name": "use_idf", + "qname": "sklearn.feature_extraction.text.TfidfTransformer.__init__.use_idf", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Enable inverse-document-frequency reweighting." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/__init__/smooth_idf", + "name": "smooth_idf", + "qname": "sklearn.feature_extraction.text.TfidfTransformer.__init__.smooth_idf", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Smooth idf weights by adding one to document frequencies, as if an\nextra document was seen containing every term in the collection\nexactly once. Prevents zero divisions." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/__init__/sublinear_tf", + "name": "sublinear_tf", + "qname": "sklearn.feature_extraction.text.TfidfTransformer.__init__.sublinear_tf", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Transform a count matrix to a normalized tf or tf-idf representation\n\nTf means term-frequency while tf-idf means term-frequency times inverse\ndocument-frequency. This is a common term weighting scheme in information\nretrieval, that has also found good use in document classification.\n\nThe goal of using tf-idf instead of the raw frequencies of occurrence of a\ntoken in a given document is to scale down the impact of tokens that occur\nvery frequently in a given corpus and that are hence empirically less\ninformative than features that occur in a small fraction of the training\ncorpus.\n\nThe formula that is used to compute the tf-idf for a term t of a document d\nin a document set is tf-idf(t, d) = tf(t, d) * idf(t), and the idf is\ncomputed as idf(t) = log [ n / df(t) ] + 1 (if ``smooth_idf=False``), where\nn is the total number of documents in the document set and df(t) is the\ndocument frequency of t; the document frequency is the number of documents\nin the document set that contain the term t. The effect of adding \"1\" to\nthe idf in the equation above is that terms with zero idf, i.e., terms\nthat occur in all documents in a training set, will not be entirely\nignored.\n(Note that the idf formula above differs from the standard textbook\nnotation that defines the idf as\nidf(t) = log [ n / (df(t) + 1) ]).\n\nIf ``smooth_idf=True`` (the default), the constant \"1\" is added to the\nnumerator and denominator of the idf as if an extra document was seen\ncontaining every term in the collection exactly once, which prevents\nzero divisions: idf(t) = log [ (1 + n) / (1 + df(t)) ] + 1.\n\nFurthermore, the formulas used to compute tf and idf depend\non parameter settings that correspond to the SMART notation used in IR\nas follows:\n\nTf is \"n\" (natural) by default, \"l\" (logarithmic) when\n``sublinear_tf=True``.\nIdf is \"t\" when use_idf is given, \"n\" (none) otherwise.\nNormalization is \"c\" (cosine) when ``norm='l2'``, \"n\" (none)\nwhen ``norm=None``.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, norm='l2', use_idf=True, smooth_idf=True,\n sublinear_tf=False):\n self.norm = norm\n self.use_idf = use_idf\n self.smooth_idf = smooth_idf\n self.sublinear_tf = sublinear_tf" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/_more_tags", + "name": "_more_tags", + "qname": "sklearn.feature_extraction.text.TfidfTransformer._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/_more_tags/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.TfidfTransformer._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'X_types': 'sparse'}" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/fit", + "name": "fit", + "qname": "sklearn.feature_extraction.text.TfidfTransformer.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/fit/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.TfidfTransformer.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/fit/X", + "name": "X", + "qname": "sklearn.feature_extraction.text.TfidfTransformer.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "sparse matrix of shape n_samples, n_features)", + "default_value": "", + "description": "A matrix of term/token counts." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "sparse matrix of shape n_samples" + }, + { + "kind": "NamedType", + "name": "n_features" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/fit/y", + "name": "y", + "qname": "sklearn.feature_extraction.text.TfidfTransformer.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Learn the idf vector (global term weights).", + "docstring": "Learn the idf vector (global term weights).\n\nParameters\n----------\nX : sparse matrix of shape n_samples, n_features)\n A matrix of term/token counts.", + "code": " def fit(self, X, y=None):\n \"\"\"Learn the idf vector (global term weights).\n\n Parameters\n ----------\n X : sparse matrix of shape n_samples, n_features)\n A matrix of term/token counts.\n \"\"\"\n X = check_array(X, accept_sparse=('csr', 'csc'))\n if not sp.issparse(X):\n X = sp.csr_matrix(X)\n dtype = X.dtype if X.dtype in FLOAT_DTYPES else np.float64\n\n if self.use_idf:\n n_samples, n_features = X.shape\n df = _document_frequency(X)\n df = df.astype(dtype, **_astype_copy_false(df))\n\n # perform idf smoothing if required\n df += int(self.smooth_idf)\n n_samples += int(self.smooth_idf)\n\n # log+1 instead of log makes sure terms with zero idf don't get\n # suppressed entirely.\n idf = np.log(n_samples / df) + 1\n self._idf_diag = sp.diags(idf, offsets=0,\n shape=(n_features, n_features),\n format='csr',\n dtype=dtype)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/idf_@getter", + "name": "idf_", + "qname": "sklearn.feature_extraction.text.TfidfTransformer.idf_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/idf_/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.TfidfTransformer.idf_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def idf_(self):\n # if _idf_diag is not set, this will raise an attribute error,\n # which means hasattr(self, \"idf_\") is False\n return np.ravel(self._idf_diag.sum(axis=0))" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/idf_@setter", + "name": "idf_", + "qname": "sklearn.feature_extraction.text.TfidfTransformer.idf_", + "decorators": ["idf_.setter"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/idf_/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.TfidfTransformer.idf_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/idf_/value", + "name": "value", + "qname": "sklearn.feature_extraction.text.TfidfTransformer.idf_.value", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @idf_.setter\n def idf_(self, value):\n value = np.asarray(value, dtype=np.float64)\n n_features = value.shape[0]\n self._idf_diag = sp.spdiags(value, diags=0, m=n_features,\n n=n_features, format='csr')" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/transform", + "name": "transform", + "qname": "sklearn.feature_extraction.text.TfidfTransformer.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/transform/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.TfidfTransformer.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/transform/X", + "name": "X", + "qname": "sklearn.feature_extraction.text.TfidfTransformer.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "sparse matrix of (n_samples, n_features)", + "default_value": "", + "description": "a matrix of term/token counts" + }, + "type": { + "kind": "NamedType", + "name": "sparse matrix of (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfTransformer/transform/copy", + "name": "copy", + "qname": "sklearn.feature_extraction.text.TfidfTransformer.transform.copy", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to copy X and operate on the copy or perform in-place\noperations." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Transform a count matrix to a tf or tf-idf representation", + "docstring": "Transform a count matrix to a tf or tf-idf representation\n\nParameters\n----------\nX : sparse matrix of (n_samples, n_features)\n a matrix of term/token counts\n\ncopy : bool, default=True\n Whether to copy X and operate on the copy or perform in-place\n operations.\n\nReturns\n-------\nvectors : sparse matrix of shape (n_samples, n_features)", + "code": " def transform(self, X, copy=True):\n \"\"\"Transform a count matrix to a tf or tf-idf representation\n\n Parameters\n ----------\n X : sparse matrix of (n_samples, n_features)\n a matrix of term/token counts\n\n copy : bool, default=True\n Whether to copy X and operate on the copy or perform in-place\n operations.\n\n Returns\n -------\n vectors : sparse matrix of shape (n_samples, n_features)\n \"\"\"\n X = check_array(X, accept_sparse='csr', dtype=FLOAT_DTYPES, copy=copy)\n if not sp.issparse(X):\n X = sp.csr_matrix(X, dtype=np.float64)\n\n n_samples, n_features = X.shape\n\n if self.sublinear_tf:\n np.log(X.data, X.data)\n X.data += 1\n\n if self.use_idf:\n # idf_ being a property, the automatic attributes detection\n # does not work as usual and we need to specify the attribute\n # name:\n check_is_fitted(self, attributes=[\"idf_\"],\n msg='idf vector is not fitted')\n\n expected_n_features = self._idf_diag.shape[0]\n if n_features != expected_n_features:\n raise ValueError(\"Input has n_features=%d while the model\"\n \" has been trained with n_features=%d\" % (\n n_features, expected_n_features))\n # *= doesn't work\n X = X * self._idf_diag\n\n if self.norm:\n X = normalize(X, norm=self.norm, copy=False)\n\n return X" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/__init__", + "name": "__init__", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/__init__/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/__init__/input", + "name": "input", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.__init__.input", + "default_value": "'content'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'filename', 'file', 'content'}", + "default_value": "'content'", + "description": "- If `'filename'`, the sequence passed as an argument to fit is\n expected to be a list of filenames that need reading to fetch\n the raw content to analyze.\n\n- If `'file'`, the sequence items must have a 'read' method (file-like\n object) that is called to fetch the bytes in memory.\n\n- If `'content'`, the input is expected to be a sequence of items that\n can be of type string or byte." + }, + "type": { + "kind": "EnumType", + "values": ["filename", "file", "content"] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/__init__/encoding", + "name": "encoding", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.__init__.encoding", + "default_value": "'utf-8'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "str", + "default_value": "'utf-8'", + "description": "If bytes or files are given to analyze, this encoding is used to\ndecode." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/__init__/decode_error", + "name": "decode_error", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.__init__.decode_error", + "default_value": "'strict'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'strict', 'ignore', 'replace'}", + "default_value": "'strict'", + "description": "Instruction on what to do if a byte sequence is given to analyze that\ncontains characters not of the given `encoding`. By default, it is\n'strict', meaning that a UnicodeDecodeError will be raised. Other\nvalues are 'ignore' and 'replace'." + }, + "type": { + "kind": "EnumType", + "values": ["strict", "replace", "ignore"] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/__init__/strip_accents", + "name": "strip_accents", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.__init__.strip_accents", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'ascii', 'unicode'}", + "default_value": "None", + "description": "Remove accents and perform other character normalization\nduring the preprocessing step.\n'ascii' is a fast method that only works on characters that have\nan direct ASCII mapping.\n'unicode' is a slightly slower method that works on any characters.\nNone (default) does nothing.\n\nBoth 'ascii' and 'unicode' use NFKD normalization from\n:func:`unicodedata.normalize`." + }, + "type": { + "kind": "EnumType", + "values": ["unicode", "ascii"] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/__init__/lowercase", + "name": "lowercase", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.__init__.lowercase", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Convert all characters to lowercase before tokenizing." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/__init__/preprocessor", + "name": "preprocessor", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.__init__.preprocessor", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "callable", + "default_value": "None", + "description": "Override the preprocessing (string transformation) stage while\npreserving the tokenizing and n-grams generation steps.\nOnly applies if ``analyzer is not callable``." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/__init__/tokenizer", + "name": "tokenizer", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.__init__.tokenizer", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "callable", + "default_value": "None", + "description": "Override the string tokenization step while preserving the\npreprocessing and n-grams generation steps.\nOnly applies if ``analyzer == 'word'``." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/__init__/analyzer", + "name": "analyzer", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.__init__.analyzer", + "default_value": "'word'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'word', 'char', 'char_wb'} or callable", + "default_value": "'word'", + "description": "Whether the feature should be made of word or character n-grams.\nOption 'char_wb' creates character n-grams only from text inside\nword boundaries; n-grams at the edges of words are padded with space.\n\nIf a callable is passed it is used to extract the sequence of features\nout of the raw, unprocessed input.\n\n.. versionchanged:: 0.21\n Since v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data\n is first read from the file and then passed to the given callable\n analyzer." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["char_wb", "word", "char"] + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/__init__/stop_words", + "name": "stop_words", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.__init__.stop_words", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'english'}, list", + "default_value": "None", + "description": "If a string, it is passed to _check_stop_list and the appropriate stop\nlist is returned. 'english' is currently the only supported string\nvalue.\nThere are several known issues with 'english' and you should\nconsider an alternative (see :ref:`stop_words`).\n\nIf a list, that list is assumed to contain stop words, all of which\nwill be removed from the resulting tokens.\nOnly applies if ``analyzer == 'word'``.\n\nIf None, no stop words will be used. max_df can be set to a value\nin the range [0.7, 1.0) to automatically detect and filter stop\nwords based on intra corpus document frequency of terms." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["english"] + }, + { + "kind": "NamedType", + "name": "list" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/__init__/token_pattern", + "name": "token_pattern", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.__init__.token_pattern", + "default_value": "'(?u)\\\\b\\\\w\\\\w+\\\\b'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "str", + "default_value": "r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"", + "description": "Regular expression denoting what constitutes a \"token\", only used\nif ``analyzer == 'word'``. The default regexp selects tokens of 2\nor more alphanumeric characters (punctuation is completely ignored\nand always treated as a token separator).\n\nIf there is a capturing group in token_pattern then the\ncaptured group content, not the entire match, becomes the token.\nAt most one capturing group is permitted." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/__init__/ngram_range", + "name": "ngram_range", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.__init__.ngram_range", + "default_value": "(1, 1)", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "tuple (min_n, max_n)", + "default_value": "(1, 1)", + "description": "The lower and upper boundary of the range of n-values for different\nn-grams to be extracted. All values of n such that min_n <= n <= max_n\nwill be used. For example an ``ngram_range`` of ``(1, 1)`` means only\nunigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means\nonly bigrams.\nOnly applies if ``analyzer is not callable``." + }, + "type": { + "kind": "NamedType", + "name": "tuple (min_n, max_n)" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/__init__/max_df", + "name": "max_df", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.__init__.max_df", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float or int", + "default_value": "1.0", + "description": "When building the vocabulary ignore terms that have a document\nfrequency strictly higher than the given threshold (corpus-specific\nstop words).\nIf float in range [0.0, 1.0], the parameter represents a proportion of\ndocuments, integer absolute counts.\nThis parameter is ignored if vocabulary is not None." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "BoundaryType", + "base_type": "float", + "min": 0.0, + "max": 1.0, + "min_inclusive": true, + "max_inclusive": true + }, + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/__init__/min_df", + "name": "min_df", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.__init__.min_df", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float or int", + "default_value": "1", + "description": "When building the vocabulary ignore terms that have a document\nfrequency strictly lower than the given threshold. This value is also\ncalled cut-off in the literature.\nIf float in range of [0.0, 1.0], the parameter represents a proportion\nof documents, integer absolute counts.\nThis parameter is ignored if vocabulary is not None." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "BoundaryType", + "base_type": "float", + "min": 0.0, + "max": 1.0, + "min_inclusive": true, + "max_inclusive": true + }, + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/__init__/max_features", + "name": "max_features", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.__init__.max_features", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "None", + "description": "If not None, build a vocabulary that only consider the top\nmax_features ordered by term frequency across the corpus.\n\nThis parameter is ignored if vocabulary is not None." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/__init__/vocabulary", + "name": "vocabulary", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.__init__.vocabulary", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "Mapping or iterable", + "default_value": "None", + "description": "Either a Mapping (e.g., a dict) where keys are terms and values are\nindices in the feature matrix, or an iterable over terms. If not\ngiven, a vocabulary is determined from the input documents." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "Mapping" + }, + { + "kind": "NamedType", + "name": "iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/__init__/binary", + "name": "binary", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.__init__.binary", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, all non-zero term counts are set to 1. This does not mean\noutputs will have only 0/1 values, only that the tf term in tf-idf\nis binary. (Set idf and normalization to False to get 0/1 outputs)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/__init__/dtype", + "name": "dtype", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.__init__.dtype", + "default_value": "np.float64", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "dtype", + "default_value": "float64", + "description": "Type of the matrix returned by fit_transform() or transform()." + }, + "type": { + "kind": "NamedType", + "name": "dtype" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/__init__/norm", + "name": "norm", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.__init__.norm", + "default_value": "'l2'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'l1', 'l2'}", + "default_value": "'l2'", + "description": "Each output row will have unit norm, either:\n* 'l2': Sum of squares of vector elements is 1. The cosine\nsimilarity between two vectors is their dot product when l2 norm has\nbeen applied.\n* 'l1': Sum of absolute values of vector elements is 1.\nSee :func:`preprocessing.normalize`." + }, + "type": { + "kind": "EnumType", + "values": ["l2", "l1"] + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/__init__/use_idf", + "name": "use_idf", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.__init__.use_idf", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Enable inverse-document-frequency reweighting." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/__init__/smooth_idf", + "name": "smooth_idf", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.__init__.smooth_idf", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Smooth idf weights by adding one to document frequencies, as if an\nextra document was seen containing every term in the collection\nexactly once. Prevents zero divisions." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/__init__/sublinear_tf", + "name": "sublinear_tf", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.__init__.sublinear_tf", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Convert a collection of raw documents to a matrix of TF-IDF features.\n\nEquivalent to :class:`CountVectorizer` followed by\n:class:`TfidfTransformer`.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, input='content', encoding='utf-8',\n decode_error='strict', strip_accents=None, lowercase=True,\n preprocessor=None, tokenizer=None, analyzer='word',\n stop_words=None, token_pattern=r\"(?u)\\b\\w\\w+\\b\",\n ngram_range=(1, 1), max_df=1.0, min_df=1,\n max_features=None, vocabulary=None, binary=False,\n dtype=np.float64, norm='l2', use_idf=True, smooth_idf=True,\n sublinear_tf=False):\n\n super().__init__(\n input=input, encoding=encoding, decode_error=decode_error,\n strip_accents=strip_accents, lowercase=lowercase,\n preprocessor=preprocessor, tokenizer=tokenizer, analyzer=analyzer,\n stop_words=stop_words, token_pattern=token_pattern,\n ngram_range=ngram_range, max_df=max_df, min_df=min_df,\n max_features=max_features, vocabulary=vocabulary, binary=binary,\n dtype=dtype)\n\n self._tfidf = TfidfTransformer(norm=norm, use_idf=use_idf,\n smooth_idf=smooth_idf,\n sublinear_tf=sublinear_tf)" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/_check_params", + "name": "_check_params", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer._check_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/_check_params/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer._check_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_params(self):\n if self.dtype not in FLOAT_DTYPES:\n warnings.warn(\"Only {} 'dtype' should be used. {} 'dtype' will \"\n \"be converted to np.float64.\"\n .format(FLOAT_DTYPES, self.dtype),\n UserWarning)" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/_more_tags", + "name": "_more_tags", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/_more_tags/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'X_types': ['string'], '_skip_test': True}" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/fit", + "name": "fit", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/fit/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/fit/raw_documents", + "name": "raw_documents", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.fit.raw_documents", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable", + "default_value": "", + "description": "An iterable which yields either str, unicode or file objects." + }, + "type": { + "kind": "NamedType", + "name": "iterable" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/fit/y", + "name": "y", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "None", + "default_value": "", + "description": "This parameter is not needed to compute tfidf." + }, + "type": { + "kind": "NamedType", + "name": "None" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Learn vocabulary and idf from training set.", + "docstring": "Learn vocabulary and idf from training set.\n\nParameters\n----------\nraw_documents : iterable\n An iterable which yields either str, unicode or file objects.\ny : None\n This parameter is not needed to compute tfidf.\n\nReturns\n-------\nself : object\n Fitted vectorizer.", + "code": " def fit(self, raw_documents, y=None):\n \"\"\"Learn vocabulary and idf from training set.\n\n Parameters\n ----------\n raw_documents : iterable\n An iterable which yields either str, unicode or file objects.\n y : None\n This parameter is not needed to compute tfidf.\n\n Returns\n -------\n self : object\n Fitted vectorizer.\n \"\"\"\n self._check_params()\n self._warn_for_unused_params()\n X = super().fit_transform(raw_documents)\n self._tfidf.fit(X)\n return self" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/fit_transform", + "name": "fit_transform", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/fit_transform/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/fit_transform/raw_documents", + "name": "raw_documents", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.fit_transform.raw_documents", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable", + "default_value": "", + "description": "An iterable which yields either str, unicode or file objects." + }, + "type": { + "kind": "NamedType", + "name": "iterable" + } + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/fit_transform/y", + "name": "y", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "None", + "default_value": "", + "description": "This parameter is ignored." + }, + "type": { + "kind": "NamedType", + "name": "None" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Learn vocabulary and idf, return document-term matrix.\n\nThis is equivalent to fit followed by transform, but more efficiently\nimplemented.", + "docstring": "Learn vocabulary and idf, return document-term matrix.\n\nThis is equivalent to fit followed by transform, but more efficiently\nimplemented.\n\nParameters\n----------\nraw_documents : iterable\n An iterable which yields either str, unicode or file objects.\ny : None\n This parameter is ignored.\n\nReturns\n-------\nX : sparse matrix of (n_samples, n_features)\n Tf-idf-weighted document-term matrix.", + "code": " def fit_transform(self, raw_documents, y=None):\n \"\"\"Learn vocabulary and idf, return document-term matrix.\n\n This is equivalent to fit followed by transform, but more efficiently\n implemented.\n\n Parameters\n ----------\n raw_documents : iterable\n An iterable which yields either str, unicode or file objects.\n y : None\n This parameter is ignored.\n\n Returns\n -------\n X : sparse matrix of (n_samples, n_features)\n Tf-idf-weighted document-term matrix.\n \"\"\"\n self._check_params()\n X = super().fit_transform(raw_documents)\n self._tfidf.fit(X)\n # X is already a transformed view of raw_documents so\n # we set copy to False\n return self._tfidf.transform(X, copy=False)" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/idf_@getter", + "name": "idf_", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.idf_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/idf_/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.idf_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def idf_(self):\n return self._tfidf.idf_" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/idf_@setter", + "name": "idf_", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.idf_", + "decorators": ["idf_.setter"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/idf_/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.idf_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/idf_/value", + "name": "value", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.idf_.value", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @idf_.setter\n def idf_(self, value):\n self._validate_vocabulary()\n if hasattr(self, 'vocabulary_'):\n if len(self.vocabulary_) != len(value):\n raise ValueError(\"idf length = %d must be equal \"\n \"to vocabulary size = %d\" %\n (len(value), len(self.vocabulary)))\n self._tfidf.idf_ = value" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/norm@getter", + "name": "norm", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.norm", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/norm/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.norm.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def norm(self):\n return self._tfidf.norm" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/norm@setter", + "name": "norm", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.norm", + "decorators": ["norm.setter"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/norm/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.norm.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/norm/value", + "name": "value", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.norm.value", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @norm.setter\n def norm(self, value):\n self._tfidf.norm = value" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/smooth_idf@getter", + "name": "smooth_idf", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.smooth_idf", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/smooth_idf/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.smooth_idf.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def smooth_idf(self):\n return self._tfidf.smooth_idf" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/smooth_idf@setter", + "name": "smooth_idf", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.smooth_idf", + "decorators": ["smooth_idf.setter"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/smooth_idf/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.smooth_idf.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/smooth_idf/value", + "name": "value", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.smooth_idf.value", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @smooth_idf.setter\n def smooth_idf(self, value):\n self._tfidf.smooth_idf = value" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/sublinear_tf@getter", + "name": "sublinear_tf", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.sublinear_tf", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/sublinear_tf/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.sublinear_tf.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def sublinear_tf(self):\n return self._tfidf.sublinear_tf" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/sublinear_tf@setter", + "name": "sublinear_tf", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.sublinear_tf", + "decorators": ["sublinear_tf.setter"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/sublinear_tf/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.sublinear_tf.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/sublinear_tf/value", + "name": "value", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.sublinear_tf.value", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @sublinear_tf.setter\n def sublinear_tf(self, value):\n self._tfidf.sublinear_tf = value" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/transform", + "name": "transform", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/transform/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/transform/raw_documents", + "name": "raw_documents", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.transform.raw_documents", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable", + "default_value": "", + "description": "An iterable which yields either str, unicode or file objects." + }, + "type": { + "kind": "NamedType", + "name": "iterable" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Transform documents to document-term matrix.\n\nUses the vocabulary and document frequencies (df) learned by fit (or\nfit_transform).", + "docstring": "Transform documents to document-term matrix.\n\nUses the vocabulary and document frequencies (df) learned by fit (or\nfit_transform).\n\nParameters\n----------\nraw_documents : iterable\n An iterable which yields either str, unicode or file objects.\n\nReturns\n-------\nX : sparse matrix of (n_samples, n_features)\n Tf-idf-weighted document-term matrix.", + "code": " def transform(self, raw_documents):\n \"\"\"Transform documents to document-term matrix.\n\n Uses the vocabulary and document frequencies (df) learned by fit (or\n fit_transform).\n\n Parameters\n ----------\n raw_documents : iterable\n An iterable which yields either str, unicode or file objects.\n\n Returns\n -------\n X : sparse matrix of (n_samples, n_features)\n Tf-idf-weighted document-term matrix.\n \"\"\"\n check_is_fitted(self, msg='The TF-IDF vectorizer is not fitted')\n\n X = super().transform(raw_documents)\n return self._tfidf.transform(X, copy=False)" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/use_idf@getter", + "name": "use_idf", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.use_idf", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/use_idf/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.use_idf.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def use_idf(self):\n return self._tfidf.use_idf" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/use_idf@setter", + "name": "use_idf", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.use_idf", + "decorators": ["use_idf.setter"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/use_idf/self", + "name": "self", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.use_idf.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/TfidfVectorizer/use_idf/value", + "name": "value", + "qname": "sklearn.feature_extraction.text.TfidfVectorizer.use_idf.value", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @use_idf.setter\n def use_idf(self, value):\n self._tfidf.use_idf = value" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_char_ngrams", + "name": "_char_ngrams", + "qname": "sklearn.feature_extraction.text._VectorizerMixin._char_ngrams", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_char_ngrams/self", + "name": "self", + "qname": "sklearn.feature_extraction.text._VectorizerMixin._char_ngrams.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_char_ngrams/text_document", + "name": "text_document", + "qname": "sklearn.feature_extraction.text._VectorizerMixin._char_ngrams.text_document", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Tokenize text_document into a sequence of character n-grams", + "docstring": "Tokenize text_document into a sequence of character n-grams", + "code": " def _char_ngrams(self, text_document):\n \"\"\"Tokenize text_document into a sequence of character n-grams\"\"\"\n # normalize white spaces\n text_document = self._white_spaces.sub(\" \", text_document)\n\n text_len = len(text_document)\n min_n, max_n = self.ngram_range\n if min_n == 1:\n # no need to do any slicing for unigrams\n # iterate through the string\n ngrams = list(text_document)\n min_n += 1\n else:\n ngrams = []\n\n # bind method outside of loop to reduce overhead\n ngrams_append = ngrams.append\n\n for n in range(min_n, min(max_n + 1, text_len + 1)):\n for i in range(text_len - n + 1):\n ngrams_append(text_document[i: i + n])\n return ngrams" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_char_wb_ngrams", + "name": "_char_wb_ngrams", + "qname": "sklearn.feature_extraction.text._VectorizerMixin._char_wb_ngrams", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_char_wb_ngrams/self", + "name": "self", + "qname": "sklearn.feature_extraction.text._VectorizerMixin._char_wb_ngrams.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_char_wb_ngrams/text_document", + "name": "text_document", + "qname": "sklearn.feature_extraction.text._VectorizerMixin._char_wb_ngrams.text_document", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Whitespace sensitive char-n-gram tokenization.\n\nTokenize text_document into a sequence of character n-grams\noperating only inside word boundaries. n-grams at the edges\nof words are padded with space.", + "docstring": "Whitespace sensitive char-n-gram tokenization.\n\nTokenize text_document into a sequence of character n-grams\noperating only inside word boundaries. n-grams at the edges\nof words are padded with space.", + "code": " def _char_wb_ngrams(self, text_document):\n \"\"\"Whitespace sensitive char-n-gram tokenization.\n\n Tokenize text_document into a sequence of character n-grams\n operating only inside word boundaries. n-grams at the edges\n of words are padded with space.\"\"\"\n # normalize white spaces\n text_document = self._white_spaces.sub(\" \", text_document)\n\n min_n, max_n = self.ngram_range\n ngrams = []\n\n # bind method outside of loop to reduce overhead\n ngrams_append = ngrams.append\n\n for w in text_document.split():\n w = ' ' + w + ' '\n w_len = len(w)\n for n in range(min_n, max_n + 1):\n offset = 0\n ngrams_append(w[offset:offset + n])\n while offset + n < w_len:\n offset += 1\n ngrams_append(w[offset:offset + n])\n if offset == 0: # count a short word (w_len < n) only once\n break\n return ngrams" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_check_stop_words_consistency", + "name": "_check_stop_words_consistency", + "qname": "sklearn.feature_extraction.text._VectorizerMixin._check_stop_words_consistency", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_check_stop_words_consistency/self", + "name": "self", + "qname": "sklearn.feature_extraction.text._VectorizerMixin._check_stop_words_consistency.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_check_stop_words_consistency/stop_words", + "name": "stop_words", + "qname": "sklearn.feature_extraction.text._VectorizerMixin._check_stop_words_consistency.stop_words", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_check_stop_words_consistency/preprocess", + "name": "preprocess", + "qname": "sklearn.feature_extraction.text._VectorizerMixin._check_stop_words_consistency.preprocess", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_check_stop_words_consistency/tokenize", + "name": "tokenize", + "qname": "sklearn.feature_extraction.text._VectorizerMixin._check_stop_words_consistency.tokenize", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check if stop words are consistent", + "docstring": "Check if stop words are consistent\n\nReturns\n-------\nis_consistent : True if stop words are consistent with the preprocessor\n and tokenizer, False if they are not, None if the check\n was previously performed, \"error\" if it could not be\n performed (e.g. because of the use of a custom\n preprocessor / tokenizer)", + "code": " def _check_stop_words_consistency(self, stop_words, preprocess, tokenize):\n \"\"\"Check if stop words are consistent\n\n Returns\n -------\n is_consistent : True if stop words are consistent with the preprocessor\n and tokenizer, False if they are not, None if the check\n was previously performed, \"error\" if it could not be\n performed (e.g. because of the use of a custom\n preprocessor / tokenizer)\n \"\"\"\n if id(self.stop_words) == getattr(self, '_stop_words_id', None):\n # Stop words are were previously validated\n return None\n\n # NB: stop_words is validated, unlike self.stop_words\n try:\n inconsistent = set()\n for w in stop_words or ():\n tokens = list(tokenize(preprocess(w)))\n for token in tokens:\n if token not in stop_words:\n inconsistent.add(token)\n self._stop_words_id = id(self.stop_words)\n\n if inconsistent:\n warnings.warn('Your stop_words may be inconsistent with '\n 'your preprocessing. Tokenizing the stop '\n 'words generated tokens %r not in '\n 'stop_words.' % sorted(inconsistent))\n return not inconsistent\n except Exception:\n # Failed to check stop words consistency (e.g. because a custom\n # preprocessor or tokenizer was used)\n self._stop_words_id = id(self.stop_words)\n return 'error'" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_check_vocabulary", + "name": "_check_vocabulary", + "qname": "sklearn.feature_extraction.text._VectorizerMixin._check_vocabulary", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_check_vocabulary/self", + "name": "self", + "qname": "sklearn.feature_extraction.text._VectorizerMixin._check_vocabulary.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check if vocabulary is empty or missing (not fitted)", + "docstring": "Check if vocabulary is empty or missing (not fitted)", + "code": " def _check_vocabulary(self):\n \"\"\"Check if vocabulary is empty or missing (not fitted)\"\"\"\n if not hasattr(self, 'vocabulary_'):\n self._validate_vocabulary()\n if not self.fixed_vocabulary_:\n raise NotFittedError(\"Vocabulary not fitted or provided\")\n\n if len(self.vocabulary_) == 0:\n raise ValueError(\"Vocabulary is empty\")" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_validate_params", + "name": "_validate_params", + "qname": "sklearn.feature_extraction.text._VectorizerMixin._validate_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_validate_params/self", + "name": "self", + "qname": "sklearn.feature_extraction.text._VectorizerMixin._validate_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check validity of ngram_range parameter", + "docstring": "Check validity of ngram_range parameter", + "code": " def _validate_params(self):\n \"\"\"Check validity of ngram_range parameter\"\"\"\n min_n, max_m = self.ngram_range\n if min_n > max_m:\n raise ValueError(\n \"Invalid value for ngram_range=%s \"\n \"lower boundary larger than the upper boundary.\"\n % str(self.ngram_range))" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_validate_vocabulary", + "name": "_validate_vocabulary", + "qname": "sklearn.feature_extraction.text._VectorizerMixin._validate_vocabulary", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_validate_vocabulary/self", + "name": "self", + "qname": "sklearn.feature_extraction.text._VectorizerMixin._validate_vocabulary.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _validate_vocabulary(self):\n vocabulary = self.vocabulary\n if vocabulary is not None:\n if isinstance(vocabulary, set):\n vocabulary = sorted(vocabulary)\n if not isinstance(vocabulary, Mapping):\n vocab = {}\n for i, t in enumerate(vocabulary):\n if vocab.setdefault(t, i) != i:\n msg = \"Duplicate term in vocabulary: %r\" % t\n raise ValueError(msg)\n vocabulary = vocab\n else:\n indices = set(vocabulary.values())\n if len(indices) != len(vocabulary):\n raise ValueError(\"Vocabulary contains repeated indices.\")\n for i in range(len(vocabulary)):\n if i not in indices:\n msg = (\"Vocabulary of size %d doesn't contain index \"\n \"%d.\" % (len(vocabulary), i))\n raise ValueError(msg)\n if not vocabulary:\n raise ValueError(\"empty vocabulary passed to fit\")\n self.fixed_vocabulary_ = True\n self.vocabulary_ = dict(vocabulary)\n else:\n self.fixed_vocabulary_ = False" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_warn_for_unused_params", + "name": "_warn_for_unused_params", + "qname": "sklearn.feature_extraction.text._VectorizerMixin._warn_for_unused_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_warn_for_unused_params/self", + "name": "self", + "qname": "sklearn.feature_extraction.text._VectorizerMixin._warn_for_unused_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _warn_for_unused_params(self):\n\n if self.tokenizer is not None and self.token_pattern is not None:\n warnings.warn(\"The parameter 'token_pattern' will not be used\"\n \" since 'tokenizer' is not None'\")\n\n if self.preprocessor is not None and callable(self.analyzer):\n warnings.warn(\"The parameter 'preprocessor' will not be used\"\n \" since 'analyzer' is callable'\")\n\n if (self.ngram_range != (1, 1) and self.ngram_range is not None\n and callable(self.analyzer)):\n warnings.warn(\"The parameter 'ngram_range' will not be used\"\n \" since 'analyzer' is callable'\")\n if self.analyzer != 'word' or callable(self.analyzer):\n if self.stop_words is not None:\n warnings.warn(\"The parameter 'stop_words' will not be used\"\n \" since 'analyzer' != 'word'\")\n if self.token_pattern is not None and \\\n self.token_pattern != r\"(?u)\\b\\w\\w+\\b\":\n warnings.warn(\"The parameter 'token_pattern' will not be used\"\n \" since 'analyzer' != 'word'\")\n if self.tokenizer is not None:\n warnings.warn(\"The parameter 'tokenizer' will not be used\"\n \" since 'analyzer' != 'word'\")" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_word_ngrams", + "name": "_word_ngrams", + "qname": "sklearn.feature_extraction.text._VectorizerMixin._word_ngrams", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_word_ngrams/self", + "name": "self", + "qname": "sklearn.feature_extraction.text._VectorizerMixin._word_ngrams.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_word_ngrams/tokens", + "name": "tokens", + "qname": "sklearn.feature_extraction.text._VectorizerMixin._word_ngrams.tokens", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/_word_ngrams/stop_words", + "name": "stop_words", + "qname": "sklearn.feature_extraction.text._VectorizerMixin._word_ngrams.stop_words", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Turn tokens into a sequence of n-grams after stop words filtering", + "docstring": "Turn tokens into a sequence of n-grams after stop words filtering", + "code": " def _word_ngrams(self, tokens, stop_words=None):\n \"\"\"Turn tokens into a sequence of n-grams after stop words filtering\"\"\"\n # handle stop words\n if stop_words is not None:\n tokens = [w for w in tokens if w not in stop_words]\n\n # handle token n-grams\n min_n, max_n = self.ngram_range\n if max_n != 1:\n original_tokens = tokens\n if min_n == 1:\n # no need to do any slicing for unigrams\n # just iterate through the original tokens\n tokens = list(original_tokens)\n min_n += 1\n else:\n tokens = []\n\n n_original_tokens = len(original_tokens)\n\n # bind method outside of loop to reduce overhead\n tokens_append = tokens.append\n space_join = \" \".join\n\n for n in range(min_n,\n min(max_n + 1, n_original_tokens + 1)):\n for i in range(n_original_tokens - n + 1):\n tokens_append(space_join(original_tokens[i: i + n]))\n\n return tokens" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/build_analyzer", + "name": "build_analyzer", + "qname": "sklearn.feature_extraction.text._VectorizerMixin.build_analyzer", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/build_analyzer/self", + "name": "self", + "qname": "sklearn.feature_extraction.text._VectorizerMixin.build_analyzer.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return a callable that handles preprocessing, tokenization\nand n-grams generation.", + "docstring": "Return a callable that handles preprocessing, tokenization\nand n-grams generation.\n\nReturns\n-------\nanalyzer: callable\n A function to handle preprocessing, tokenization\n and n-grams generation.", + "code": " def build_analyzer(self):\n \"\"\"Return a callable that handles preprocessing, tokenization\n and n-grams generation.\n\n Returns\n -------\n analyzer: callable\n A function to handle preprocessing, tokenization\n and n-grams generation.\n \"\"\"\n\n if callable(self.analyzer):\n return partial(\n _analyze, analyzer=self.analyzer, decoder=self.decode\n )\n\n preprocess = self.build_preprocessor()\n\n if self.analyzer == 'char':\n return partial(_analyze, ngrams=self._char_ngrams,\n preprocessor=preprocess, decoder=self.decode)\n\n elif self.analyzer == 'char_wb':\n\n return partial(_analyze, ngrams=self._char_wb_ngrams,\n preprocessor=preprocess, decoder=self.decode)\n\n elif self.analyzer == 'word':\n stop_words = self.get_stop_words()\n tokenize = self.build_tokenizer()\n self._check_stop_words_consistency(stop_words, preprocess,\n tokenize)\n return partial(_analyze, ngrams=self._word_ngrams,\n tokenizer=tokenize, preprocessor=preprocess,\n decoder=self.decode, stop_words=stop_words)\n\n else:\n raise ValueError('%s is not a valid tokenization scheme/analyzer' %\n self.analyzer)" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/build_preprocessor", + "name": "build_preprocessor", + "qname": "sklearn.feature_extraction.text._VectorizerMixin.build_preprocessor", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/build_preprocessor/self", + "name": "self", + "qname": "sklearn.feature_extraction.text._VectorizerMixin.build_preprocessor.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return a function to preprocess the text before tokenization.", + "docstring": "Return a function to preprocess the text before tokenization.\n\nReturns\n-------\npreprocessor: callable\n A function to preprocess the text before tokenization.", + "code": " def build_preprocessor(self):\n \"\"\"Return a function to preprocess the text before tokenization.\n\n Returns\n -------\n preprocessor: callable\n A function to preprocess the text before tokenization.\n \"\"\"\n if self.preprocessor is not None:\n return self.preprocessor\n\n # accent stripping\n if not self.strip_accents:\n strip_accents = None\n elif callable(self.strip_accents):\n strip_accents = self.strip_accents\n elif self.strip_accents == 'ascii':\n strip_accents = strip_accents_ascii\n elif self.strip_accents == 'unicode':\n strip_accents = strip_accents_unicode\n else:\n raise ValueError('Invalid value for \"strip_accents\": %s' %\n self.strip_accents)\n\n return partial(\n _preprocess, accent_function=strip_accents, lower=self.lowercase\n )" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/build_tokenizer", + "name": "build_tokenizer", + "qname": "sklearn.feature_extraction.text._VectorizerMixin.build_tokenizer", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/build_tokenizer/self", + "name": "self", + "qname": "sklearn.feature_extraction.text._VectorizerMixin.build_tokenizer.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return a function that splits a string into a sequence of tokens.", + "docstring": "Return a function that splits a string into a sequence of tokens.\n\nReturns\n-------\ntokenizer: callable\n A function to split a string into a sequence of tokens.", + "code": " def build_tokenizer(self):\n \"\"\"Return a function that splits a string into a sequence of tokens.\n\n Returns\n -------\n tokenizer: callable\n A function to split a string into a sequence of tokens.\n \"\"\"\n if self.tokenizer is not None:\n return self.tokenizer\n token_pattern = re.compile(self.token_pattern)\n\n if token_pattern.groups > 1:\n raise ValueError(\n \"More than 1 capturing group in token pattern. Only a single \"\n \"group should be captured.\"\n )\n\n return token_pattern.findall" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/decode", + "name": "decode", + "qname": "sklearn.feature_extraction.text._VectorizerMixin.decode", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/decode/self", + "name": "self", + "qname": "sklearn.feature_extraction.text._VectorizerMixin.decode.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/decode/doc", + "name": "doc", + "qname": "sklearn.feature_extraction.text._VectorizerMixin.decode.doc", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "", + "description": "The string to decode." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Decode the input into a string of unicode symbols.\n\nThe decoding strategy depends on the vectorizer parameters.", + "docstring": "Decode the input into a string of unicode symbols.\n\nThe decoding strategy depends on the vectorizer parameters.\n\nParameters\n----------\ndoc : str\n The string to decode.\n\nReturns\n-------\ndoc: str\n A string of unicode symbols.", + "code": " def decode(self, doc):\n \"\"\"Decode the input into a string of unicode symbols.\n\n The decoding strategy depends on the vectorizer parameters.\n\n Parameters\n ----------\n doc : str\n The string to decode.\n\n Returns\n -------\n doc: str\n A string of unicode symbols.\n \"\"\"\n if self.input == 'filename':\n with open(doc, 'rb') as fh:\n doc = fh.read()\n\n elif self.input == 'file':\n doc = doc.read()\n\n if isinstance(doc, bytes):\n doc = doc.decode(self.encoding, self.decode_error)\n\n if doc is np.nan:\n raise ValueError(\"np.nan is an invalid document, expected byte or \"\n \"unicode string.\")\n\n return doc" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/get_stop_words", + "name": "get_stop_words", + "qname": "sklearn.feature_extraction.text._VectorizerMixin.get_stop_words", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/_VectorizerMixin/get_stop_words/self", + "name": "self", + "qname": "sklearn.feature_extraction.text._VectorizerMixin.get_stop_words.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Build or fetch the effective stop words list.", + "docstring": "Build or fetch the effective stop words list.\n\nReturns\n-------\nstop_words: list or None\n A list of stop words.", + "code": " def get_stop_words(self):\n \"\"\"Build or fetch the effective stop words list.\n\n Returns\n -------\n stop_words: list or None\n A list of stop words.\n \"\"\"\n return _check_stop_list(self.stop_words)" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_analyze", + "name": "_analyze", + "qname": "sklearn.feature_extraction.text._analyze", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/_analyze/doc", + "name": "doc", + "qname": "sklearn.feature_extraction.text._analyze.doc", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_analyze/analyzer", + "name": "analyzer", + "qname": "sklearn.feature_extraction.text._analyze.analyzer", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_analyze/tokenizer", + "name": "tokenizer", + "qname": "sklearn.feature_extraction.text._analyze.tokenizer", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_analyze/ngrams", + "name": "ngrams", + "qname": "sklearn.feature_extraction.text._analyze.ngrams", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_analyze/preprocessor", + "name": "preprocessor", + "qname": "sklearn.feature_extraction.text._analyze.preprocessor", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_analyze/decoder", + "name": "decoder", + "qname": "sklearn.feature_extraction.text._analyze.decoder", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_analyze/stop_words", + "name": "stop_words", + "qname": "sklearn.feature_extraction.text._analyze.stop_words", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Chain together an optional series of text processing steps to go from\na single document to ngrams, with or without tokenizing or preprocessing.\n\nIf analyzer is used, only the decoder argument is used, as the analyzer is\nintended to replace the preprocessor, tokenizer, and ngrams steps.", + "docstring": "Chain together an optional series of text processing steps to go from\na single document to ngrams, with or without tokenizing or preprocessing.\n\nIf analyzer is used, only the decoder argument is used, as the analyzer is\nintended to replace the preprocessor, tokenizer, and ngrams steps.\n\nParameters\n----------\nanalyzer: callable, default=None\ntokenizer: callable, default=None\nngrams: callable, default=None\npreprocessor: callable, default=None\ndecoder: callable, default=None\nstop_words: list, default=None\n\nReturns\n-------\nngrams: list\n A sequence of tokens, possibly with pairs, triples, etc.", + "code": "def _analyze(doc, analyzer=None, tokenizer=None, ngrams=None,\n preprocessor=None, decoder=None, stop_words=None):\n \"\"\"Chain together an optional series of text processing steps to go from\n a single document to ngrams, with or without tokenizing or preprocessing.\n\n If analyzer is used, only the decoder argument is used, as the analyzer is\n intended to replace the preprocessor, tokenizer, and ngrams steps.\n\n Parameters\n ----------\n analyzer: callable, default=None\n tokenizer: callable, default=None\n ngrams: callable, default=None\n preprocessor: callable, default=None\n decoder: callable, default=None\n stop_words: list, default=None\n\n Returns\n -------\n ngrams: list\n A sequence of tokens, possibly with pairs, triples, etc.\n \"\"\"\n\n if decoder is not None:\n doc = decoder(doc)\n if analyzer is not None:\n doc = analyzer(doc)\n else:\n if preprocessor is not None:\n doc = preprocessor(doc)\n if tokenizer is not None:\n doc = tokenizer(doc)\n if ngrams is not None:\n if stop_words is not None:\n doc = ngrams(doc, stop_words)\n else:\n doc = ngrams(doc)\n return doc" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_check_stop_list", + "name": "_check_stop_list", + "qname": "sklearn.feature_extraction.text._check_stop_list", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/_check_stop_list/stop", + "name": "stop", + "qname": "sklearn.feature_extraction.text._check_stop_list.stop", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _check_stop_list(stop):\n if stop == \"english\":\n return ENGLISH_STOP_WORDS\n elif isinstance(stop, str):\n raise ValueError(\"not a built-in stop list: %s\" % stop)\n elif stop is None:\n return None\n else: # assume it's a collection\n return frozenset(stop)" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_document_frequency", + "name": "_document_frequency", + "qname": "sklearn.feature_extraction.text._document_frequency", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/_document_frequency/X", + "name": "X", + "qname": "sklearn.feature_extraction.text._document_frequency.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Count the number of non-zero values for each feature in sparse X.", + "docstring": "Count the number of non-zero values for each feature in sparse X.", + "code": "def _document_frequency(X):\n \"\"\"Count the number of non-zero values for each feature in sparse X.\"\"\"\n if sp.isspmatrix_csr(X):\n return np.bincount(X.indices, minlength=X.shape[1])\n else:\n return np.diff(X.indptr)" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_make_int_array", + "name": "_make_int_array", + "qname": "sklearn.feature_extraction.text._make_int_array", + "decorators": [], + "parameters": [], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Construct an array.array of a type suitable for scipy.sparse indices.", + "docstring": "Construct an array.array of a type suitable for scipy.sparse indices.", + "code": "def _make_int_array():\n \"\"\"Construct an array.array of a type suitable for scipy.sparse indices.\"\"\"\n return array.array(str(\"i\"))" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_preprocess", + "name": "_preprocess", + "qname": "sklearn.feature_extraction.text._preprocess", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/_preprocess/doc", + "name": "doc", + "qname": "sklearn.feature_extraction.text._preprocess.doc", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_preprocess/accent_function", + "name": "accent_function", + "qname": "sklearn.feature_extraction.text._preprocess.accent_function", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/_preprocess/lower", + "name": "lower", + "qname": "sklearn.feature_extraction.text._preprocess.lower", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Chain together an optional series of text preprocessing steps to\napply to a document.", + "docstring": "Chain together an optional series of text preprocessing steps to\napply to a document.\n\nParameters\n----------\ndoc: str\n The string to preprocess\naccent_function: callable, default=None\n Function for handling accented characters. Common strategies include\n normalizing and removing.\nlower: bool, default=False\n Whether to use str.lower to lowercase all fo the text\n\nReturns\n-------\ndoc: str\n preprocessed string", + "code": "def _preprocess(doc, accent_function=None, lower=False):\n \"\"\"Chain together an optional series of text preprocessing steps to\n apply to a document.\n\n Parameters\n ----------\n doc: str\n The string to preprocess\n accent_function: callable, default=None\n Function for handling accented characters. Common strategies include\n normalizing and removing.\n lower: bool, default=False\n Whether to use str.lower to lowercase all fo the text\n\n Returns\n -------\n doc: str\n preprocessed string\n \"\"\"\n if lower:\n doc = doc.lower()\n if accent_function is not None:\n doc = accent_function(doc)\n return doc" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/strip_accents_ascii", + "name": "strip_accents_ascii", + "qname": "sklearn.feature_extraction.text.strip_accents_ascii", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/strip_accents_ascii/s", + "name": "s", + "qname": "sklearn.feature_extraction.text.strip_accents_ascii.s", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "string", + "default_value": "", + "description": "The string to strip" + }, + "type": { + "kind": "NamedType", + "name": "string" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Transform accentuated unicode symbols into ascii or nothing\n\nWarning: this solution is only suited for languages that have a direct\ntransliteration to ASCII symbols.", + "docstring": "Transform accentuated unicode symbols into ascii or nothing\n\nWarning: this solution is only suited for languages that have a direct\ntransliteration to ASCII symbols.\n\nParameters\n----------\ns : string\n The string to strip\n\nSee Also\n--------\nstrip_accents_unicode : Remove accentuated char for any unicode symbol.", + "code": "def strip_accents_ascii(s):\n \"\"\"Transform accentuated unicode symbols into ascii or nothing\n\n Warning: this solution is only suited for languages that have a direct\n transliteration to ASCII symbols.\n\n Parameters\n ----------\n s : string\n The string to strip\n\n See Also\n --------\n strip_accents_unicode : Remove accentuated char for any unicode symbol.\n \"\"\"\n nkfd_form = unicodedata.normalize('NFKD', s)\n return nkfd_form.encode('ASCII', 'ignore').decode('ASCII')" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/strip_accents_unicode", + "name": "strip_accents_unicode", + "qname": "sklearn.feature_extraction.text.strip_accents_unicode", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/strip_accents_unicode/s", + "name": "s", + "qname": "sklearn.feature_extraction.text.strip_accents_unicode.s", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "string", + "default_value": "", + "description": "The string to strip" + }, + "type": { + "kind": "NamedType", + "name": "string" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Transform accentuated unicode symbols into their simple counterpart\n\nWarning: the python-level loop and join operations make this\nimplementation 20 times slower than the strip_accents_ascii basic\nnormalization.", + "docstring": "Transform accentuated unicode symbols into their simple counterpart\n\nWarning: the python-level loop and join operations make this\nimplementation 20 times slower than the strip_accents_ascii basic\nnormalization.\n\nParameters\n----------\ns : string\n The string to strip\n\nSee Also\n--------\nstrip_accents_ascii : Remove accentuated char for any unicode symbol that\n has a direct ASCII equivalent.", + "code": "def strip_accents_unicode(s):\n \"\"\"Transform accentuated unicode symbols into their simple counterpart\n\n Warning: the python-level loop and join operations make this\n implementation 20 times slower than the strip_accents_ascii basic\n normalization.\n\n Parameters\n ----------\n s : string\n The string to strip\n\n See Also\n --------\n strip_accents_ascii : Remove accentuated char for any unicode symbol that\n has a direct ASCII equivalent.\n \"\"\"\n try:\n # If `s` is ASCII-compatible, then it does not contain any accented\n # characters and we can avoid an expensive list comprehension\n s.encode(\"ASCII\", errors=\"strict\")\n return s\n except UnicodeEncodeError:\n normalized = unicodedata.normalize('NFKD', s)\n return ''.join([c for c in normalized if not unicodedata.combining(c)])" + }, + { + "id": "scikit-learn/sklearn.feature_extraction.text/strip_tags", + "name": "strip_tags", + "qname": "sklearn.feature_extraction.text.strip_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_extraction.text/strip_tags/s", + "name": "s", + "qname": "sklearn.feature_extraction.text.strip_tags.s", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "string", + "default_value": "", + "description": "The string to strip" + }, + "type": { + "kind": "NamedType", + "name": "string" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Basic regexp based HTML / XML tag stripper function\n\nFor serious HTML/XML preprocessing you should rather use an external\nlibrary such as lxml or BeautifulSoup.", + "docstring": "Basic regexp based HTML / XML tag stripper function\n\nFor serious HTML/XML preprocessing you should rather use an external\nlibrary such as lxml or BeautifulSoup.\n\nParameters\n----------\ns : string\n The string to strip", + "code": "def strip_tags(s):\n \"\"\"Basic regexp based HTML / XML tag stripper function\n\n For serious HTML/XML preprocessing you should rather use an external\n library such as lxml or BeautifulSoup.\n\n Parameters\n ----------\n s : string\n The string to strip\n \"\"\"\n return re.compile(r\"<([^>]+)>\", flags=re.UNICODE).sub(\" \", s)" + }, + { + "id": "scikit-learn/sklearn.feature_selection._base/SelectorMixin/_get_support_mask", + "name": "_get_support_mask", + "qname": "sklearn.feature_selection._base.SelectorMixin._get_support_mask", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._base/SelectorMixin/_get_support_mask/self", + "name": "self", + "qname": "sklearn.feature_selection._base.SelectorMixin._get_support_mask.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Get the boolean mask indicating which features are selected", + "docstring": "Get the boolean mask indicating which features are selected\n\nReturns\n-------\nsupport : boolean array of shape [# input features]\n An element is True iff its corresponding feature is selected for\n retention.", + "code": " @abstractmethod\n def _get_support_mask(self):\n \"\"\"\n Get the boolean mask indicating which features are selected\n\n Returns\n -------\n support : boolean array of shape [# input features]\n An element is True iff its corresponding feature is selected for\n retention.\n \"\"\"" + }, + { + "id": "scikit-learn/sklearn.feature_selection._base/SelectorMixin/get_support", + "name": "get_support", + "qname": "sklearn.feature_selection._base.SelectorMixin.get_support", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._base/SelectorMixin/get_support/self", + "name": "self", + "qname": "sklearn.feature_selection._base.SelectorMixin.get_support.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._base/SelectorMixin/get_support/indices", + "name": "indices", + "qname": "sklearn.feature_selection._base.SelectorMixin.get_support.indices", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the return value will be an array of integers, rather\nthan a boolean mask." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Get a mask, or integer index, of the features selected", + "docstring": "Get a mask, or integer index, of the features selected\n\nParameters\n----------\nindices : bool, default=False\n If True, the return value will be an array of integers, rather\n than a boolean mask.\n\nReturns\n-------\nsupport : array\n An index that selects the retained features from a feature vector.\n If `indices` is False, this is a boolean array of shape\n [# input features], in which an element is True iff its\n corresponding feature is selected for retention. If `indices` is\n True, this is an integer array of shape [# output features] whose\n values are indices into the input feature vector.", + "code": " def get_support(self, indices=False):\n \"\"\"\n Get a mask, or integer index, of the features selected\n\n Parameters\n ----------\n indices : bool, default=False\n If True, the return value will be an array of integers, rather\n than a boolean mask.\n\n Returns\n -------\n support : array\n An index that selects the retained features from a feature vector.\n If `indices` is False, this is a boolean array of shape\n [# input features], in which an element is True iff its\n corresponding feature is selected for retention. If `indices` is\n True, this is an integer array of shape [# output features] whose\n values are indices into the input feature vector.\n \"\"\"\n mask = self._get_support_mask()\n return mask if not indices else np.where(mask)[0]" + }, + { + "id": "scikit-learn/sklearn.feature_selection._base/SelectorMixin/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.feature_selection._base.SelectorMixin.inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._base/SelectorMixin/inverse_transform/self", + "name": "self", + "qname": "sklearn.feature_selection._base.SelectorMixin.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._base/SelectorMixin/inverse_transform/X", + "name": "X", + "qname": "sklearn.feature_selection._base.SelectorMixin.inverse_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape [n_samples, n_selected_features]", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array of shape [n_samples" + }, + { + "kind": "NamedType", + "name": "n_selected_features]" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Reverse the transformation operation", + "docstring": "Reverse the transformation operation\n\nParameters\n----------\nX : array of shape [n_samples, n_selected_features]\n The input samples.\n\nReturns\n-------\nX_r : array of shape [n_samples, n_original_features]\n `X` with columns of zeros inserted where features would have\n been removed by :meth:`transform`.", + "code": " def inverse_transform(self, X):\n \"\"\"\n Reverse the transformation operation\n\n Parameters\n ----------\n X : array of shape [n_samples, n_selected_features]\n The input samples.\n\n Returns\n -------\n X_r : array of shape [n_samples, n_original_features]\n `X` with columns of zeros inserted where features would have\n been removed by :meth:`transform`.\n \"\"\"\n if issparse(X):\n X = X.tocsc()\n # insert additional entries in indptr:\n # e.g. if transform changed indptr from [0 2 6 7] to [0 2 3]\n # col_nonzeros here will be [2 0 1] so indptr becomes [0 2 2 3]\n it = self.inverse_transform(np.diff(X.indptr).reshape(1, -1))\n col_nonzeros = it.ravel()\n indptr = np.concatenate([[0], np.cumsum(col_nonzeros)])\n Xt = csc_matrix((X.data, X.indices, indptr),\n shape=(X.shape[0], len(indptr) - 1), dtype=X.dtype)\n return Xt\n\n support = self.get_support()\n X = check_array(X, dtype=None)\n if support.sum() != X.shape[1]:\n raise ValueError(\"X has a different shape than during fitting.\")\n\n if X.ndim == 1:\n X = X[None, :]\n Xt = np.zeros((X.shape[0], support.size), dtype=X.dtype)\n Xt[:, support] = X\n return Xt" + }, + { + "id": "scikit-learn/sklearn.feature_selection._base/SelectorMixin/transform", + "name": "transform", + "qname": "sklearn.feature_selection._base.SelectorMixin.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._base/SelectorMixin/transform/self", + "name": "self", + "qname": "sklearn.feature_selection._base.SelectorMixin.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._base/SelectorMixin/transform/X", + "name": "X", + "qname": "sklearn.feature_selection._base.SelectorMixin.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape [n_samples, n_features]", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array of shape [n_samples" + }, + { + "kind": "NamedType", + "name": "n_features]" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Reduce X to the selected features.", + "docstring": "Reduce X to the selected features.\n\nParameters\n----------\nX : array of shape [n_samples, n_features]\n The input samples.\n\nReturns\n-------\nX_r : array of shape [n_samples, n_selected_features]\n The input samples with only the selected features.", + "code": " def transform(self, X):\n \"\"\"Reduce X to the selected features.\n\n Parameters\n ----------\n X : array of shape [n_samples, n_features]\n The input samples.\n\n Returns\n -------\n X_r : array of shape [n_samples, n_selected_features]\n The input samples with only the selected features.\n \"\"\"\n # note: we use _safe_tags instead of _get_tags because this is a\n # public Mixin.\n X = check_array(\n X,\n dtype=None,\n accept_sparse=\"csr\",\n force_all_finite=not _safe_tags(self, key=\"allow_nan\"),\n )\n mask = self.get_support()\n if not mask.any():\n warn(\"No features were selected: either the data is\"\n \" too noisy or the selection test too strict.\",\n UserWarning)\n return np.empty(0).reshape((X.shape[0], 0))\n if len(mask) != X.shape[1]:\n raise ValueError(\"X has a different shape than during fitting.\")\n return X[:, safe_mask(X, mask)]" + }, + { + "id": "scikit-learn/sklearn.feature_selection._base/_get_feature_importances", + "name": "_get_feature_importances", + "qname": "sklearn.feature_selection._base._get_feature_importances", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._base/_get_feature_importances/estimator", + "name": "estimator", + "qname": "sklearn.feature_selection._base._get_feature_importances.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator", + "default_value": "", + "description": "A scikit-learn estimator from which we want to get the feature\nimportances." + }, + "type": { + "kind": "NamedType", + "name": "estimator" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._base/_get_feature_importances/getter", + "name": "getter", + "qname": "sklearn.feature_selection._base._get_feature_importances.getter", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "\"auto\", str or callable", + "default_value": "", + "description": "An attribute or a callable to get the feature importance. If `\"auto\"`,\n`estimator` is expected to expose `coef_` or `feature_importances`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "\"auto\"" + }, + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._base/_get_feature_importances/transform_func", + "name": "transform_func", + "qname": "sklearn.feature_selection._base._get_feature_importances.transform_func", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{\"norm\", \"square\"}", + "default_value": "None", + "description": "The transform to apply to the feature importances. By default (`None`)\nno transformation is applied." + }, + "type": { + "kind": "EnumType", + "values": ["square", "norm"] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._base/_get_feature_importances/norm_order", + "name": "norm_order", + "qname": "sklearn.feature_selection._base._get_feature_importances.norm_order", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "The norm order to apply when `transform_func=\"norm\"`. Only applied\nwhen `importances.ndim > 1`." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Retrieve and aggregate (ndim > 1) the feature importances\nfrom an estimator. Also optionally applies transformation.", + "docstring": "Retrieve and aggregate (ndim > 1) the feature importances\nfrom an estimator. Also optionally applies transformation.\n\nParameters\n----------\nestimator : estimator\n A scikit-learn estimator from which we want to get the feature\n importances.\n\ngetter : \"auto\", str or callable\n An attribute or a callable to get the feature importance. If `\"auto\"`,\n `estimator` is expected to expose `coef_` or `feature_importances`.\n\ntransform_func : {\"norm\", \"square\"}, default=None\n The transform to apply to the feature importances. By default (`None`)\n no transformation is applied.\n\nnorm_order : int, default=1\n The norm order to apply when `transform_func=\"norm\"`. Only applied\n when `importances.ndim > 1`.\n\nReturns\n-------\nimportances : ndarray of shape (n_features,)\n The features importances, optionally transformed.", + "code": "def _get_feature_importances(estimator, getter, transform_func=None,\n norm_order=1):\n \"\"\"\n Retrieve and aggregate (ndim > 1) the feature importances\n from an estimator. Also optionally applies transformation.\n\n Parameters\n ----------\n estimator : estimator\n A scikit-learn estimator from which we want to get the feature\n importances.\n\n getter : \"auto\", str or callable\n An attribute or a callable to get the feature importance. If `\"auto\"`,\n `estimator` is expected to expose `coef_` or `feature_importances`.\n\n transform_func : {\"norm\", \"square\"}, default=None\n The transform to apply to the feature importances. By default (`None`)\n no transformation is applied.\n\n norm_order : int, default=1\n The norm order to apply when `transform_func=\"norm\"`. Only applied\n when `importances.ndim > 1`.\n\n Returns\n -------\n importances : ndarray of shape (n_features,)\n The features importances, optionally transformed.\n \"\"\"\n if isinstance(getter, str):\n if getter == 'auto':\n if hasattr(estimator, 'coef_'):\n getter = attrgetter('coef_')\n elif hasattr(estimator, 'feature_importances_'):\n getter = attrgetter('feature_importances_')\n else:\n raise ValueError(\n f\"when `importance_getter=='auto'`, the underlying \"\n f\"estimator {estimator.__class__.__name__} should have \"\n f\"`coef_` or `feature_importances_` attribute. Either \"\n f\"pass a fitted estimator to feature selector or call fit \"\n f\"before calling transform.\"\n )\n else:\n getter = attrgetter(getter)\n elif not callable(getter):\n raise ValueError(\n '`importance_getter` has to be a string or `callable`'\n )\n importances = getter(estimator)\n\n if transform_func is None:\n return importances\n elif transform_func == \"norm\":\n if importances.ndim == 1:\n importances = np.abs(importances)\n else:\n importances = np.linalg.norm(importances, axis=0,\n ord=norm_order)\n elif transform_func == \"square\":\n if importances.ndim == 1:\n importances = safe_sqr(importances)\n else:\n importances = safe_sqr(importances).sum(axis=0)\n else:\n raise ValueError(\"Valid values for `transform_func` are \" +\n \"None, 'norm' and 'square'. Those two \" +\n \"transformation are only supported now\")\n\n return importances" + }, + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/__init__", + "name": "__init__", + "qname": "sklearn.feature_selection._from_model.SelectFromModel.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/__init__/self", + "name": "self", + "qname": "sklearn.feature_selection._from_model.SelectFromModel.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/__init__/estimator", + "name": "estimator", + "qname": "sklearn.feature_selection._from_model.SelectFromModel.__init__.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "The base estimator from which the transformer is built.\nThis can be both a fitted (if ``prefit`` is set to True)\nor a non-fitted estimator. The estimator should have a\n``feature_importances_`` or ``coef_`` attribute after fitting.\nOtherwise, the ``importance_getter`` parameter should be used." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/__init__/threshold", + "name": "threshold", + "qname": "sklearn.feature_selection._from_model.SelectFromModel.__init__.threshold", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "string or float", + "default_value": "None", + "description": "The threshold value to use for feature selection. Features whose\nimportance is greater or equal are kept while the others are\ndiscarded. If \"median\" (resp. \"mean\"), then the ``threshold`` value is\nthe median (resp. the mean) of the feature importances. A scaling\nfactor (e.g., \"1.25*mean\") may also be used. If None and if the\nestimator has a parameter penalty set to l1, either explicitly\nor implicitly (e.g, Lasso), the threshold used is 1e-5.\nOtherwise, \"mean\" is used by default." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "string" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/__init__/prefit", + "name": "prefit", + "qname": "sklearn.feature_selection._from_model.SelectFromModel.__init__.prefit", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether a prefit model is expected to be passed into the constructor\ndirectly or not. If True, ``transform`` must be called directly\nand SelectFromModel cannot be used with ``cross_val_score``,\n``GridSearchCV`` and similar utilities that clone the estimator.\nOtherwise train the model using ``fit`` and then ``transform`` to do\nfeature selection." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/__init__/norm_order", + "name": "norm_order", + "qname": "sklearn.feature_selection._from_model.SelectFromModel.__init__.norm_order", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "non-zero int, inf, -inf", + "default_value": "1", + "description": "Order of the norm used to filter the vectors of coefficients below\n``threshold`` in the case where the ``coef_`` attribute of the\nestimator is of dimension 2." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "non-zero int" + }, + { + "kind": "NamedType", + "name": "inf" + }, + { + "kind": "NamedType", + "name": "-inf" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/__init__/max_features", + "name": "max_features", + "qname": "sklearn.feature_selection._from_model.SelectFromModel.__init__.max_features", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The maximum number of features to select.\nTo only select based on ``max_features``, set ``threshold=-np.inf``.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/__init__/importance_getter", + "name": "importance_getter", + "qname": "sklearn.feature_selection._from_model.SelectFromModel.__init__.importance_getter", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "'auto'", + "description": "If 'auto', uses the feature importance either through a ``coef_``\nattribute or ``feature_importances_`` attribute of estimator.\n\nAlso accepts a string that specifies an attribute name/path\nfor extracting feature importance (implemented with `attrgetter`).\nFor example, give `regressor_.coef_` in case of\n:class:`~sklearn.compose.TransformedTargetRegressor` or\n`named_steps.clf.feature_importances_` in case of\n:class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\nIf `callable`, overrides the default feature importance getter.\nThe callable is passed with the fitted estimator and it should\nreturn importance for each feature.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Meta-transformer for selecting features based on importance weights.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, estimator, *, threshold=None, prefit=False,\n norm_order=1, max_features=None,\n importance_getter='auto'):\n self.estimator = estimator\n self.threshold = threshold\n self.prefit = prefit\n self.importance_getter = importance_getter\n self.norm_order = norm_order\n self.max_features = max_features" + }, + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/_get_support_mask", + "name": "_get_support_mask", + "qname": "sklearn.feature_selection._from_model.SelectFromModel._get_support_mask", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/_get_support_mask/self", + "name": "self", + "qname": "sklearn.feature_selection._from_model.SelectFromModel._get_support_mask.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_support_mask(self):\n # SelectFromModel can directly call on transform.\n if self.prefit:\n estimator = self.estimator\n elif hasattr(self, 'estimator_'):\n estimator = self.estimator_\n else:\n raise ValueError('Either fit the model before transform or set'\n ' \"prefit=True\" while passing the fitted'\n ' estimator to the constructor.')\n scores = _get_feature_importances(\n estimator=estimator, getter=self.importance_getter,\n transform_func='norm', norm_order=self.norm_order)\n threshold = _calculate_threshold(estimator, scores, self.threshold)\n if self.max_features is not None:\n mask = np.zeros_like(scores, dtype=bool)\n candidate_indices = \\\n np.argsort(-scores, kind='mergesort')[:self.max_features]\n mask[candidate_indices] = True\n else:\n mask = np.ones_like(scores, dtype=bool)\n mask[scores < threshold] = False\n return mask" + }, + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/_more_tags", + "name": "_more_tags", + "qname": "sklearn.feature_selection._from_model.SelectFromModel._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/_more_tags/self", + "name": "self", + "qname": "sklearn.feature_selection._from_model.SelectFromModel._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n 'allow_nan': _safe_tags(self.estimator, key=\"allow_nan\")\n }" + }, + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/fit", + "name": "fit", + "qname": "sklearn.feature_selection._from_model.SelectFromModel.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/fit/self", + "name": "self", + "qname": "sklearn.feature_selection._from_model.SelectFromModel.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/fit/X", + "name": "X", + "qname": "sklearn.feature_selection._from_model.SelectFromModel.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/fit/y", + "name": "y", + "qname": "sklearn.feature_selection._from_model.SelectFromModel.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "The target values (integers that correspond to classes in\nclassification, real numbers in regression)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/fit/fit_params", + "name": "fit_params", + "qname": "sklearn.feature_selection._from_model.SelectFromModel.fit.fit_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "Other estimator specific parameters", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Other estimator specific parameters" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the SelectFromModel meta-transformer.", + "docstring": "Fit the SelectFromModel meta-transformer.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The training input samples.\n\ny : array-like of shape (n_samples,), default=None\n The target values (integers that correspond to classes in\n classification, real numbers in regression).\n\n**fit_params : Other estimator specific parameters\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y=None, **fit_params):\n \"\"\"Fit the SelectFromModel meta-transformer.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The training input samples.\n\n y : array-like of shape (n_samples,), default=None\n The target values (integers that correspond to classes in\n classification, real numbers in regression).\n\n **fit_params : Other estimator specific parameters\n\n Returns\n -------\n self : object\n \"\"\"\n if self.max_features is not None:\n if not isinstance(self.max_features, numbers.Integral):\n raise TypeError(\"'max_features' should be an integer between\"\n \" 0 and {} features. Got {!r} instead.\"\n .format(X.shape[1], self.max_features))\n elif self.max_features < 0 or self.max_features > X.shape[1]:\n raise ValueError(\"'max_features' should be 0 and {} features.\"\n \"Got {} instead.\"\n .format(X.shape[1], self.max_features))\n\n if self.prefit:\n raise NotFittedError(\n \"Since 'prefit=True', call transform directly\")\n self.estimator_ = clone(self.estimator)\n self.estimator_.fit(X, y, **fit_params)\n return self" + }, + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/n_features_in_@getter", + "name": "n_features_in_", + "qname": "sklearn.feature_selection._from_model.SelectFromModel.n_features_in_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/n_features_in_/self", + "name": "self", + "qname": "sklearn.feature_selection._from_model.SelectFromModel.n_features_in_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def n_features_in_(self):\n # For consistency with other estimators we raise a AttributeError so\n # that hasattr() fails if the estimator isn't fitted.\n try:\n check_is_fitted(self)\n except NotFittedError as nfe:\n raise AttributeError(\n \"{} object has no n_features_in_ attribute.\"\n .format(self.__class__.__name__)\n ) from nfe\n\n return self.estimator_.n_features_in_" + }, + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/partial_fit", + "name": "partial_fit", + "qname": "sklearn.feature_selection._from_model.SelectFromModel.partial_fit", + "decorators": ["if_delegate_has_method('estimator')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/partial_fit/self", + "name": "self", + "qname": "sklearn.feature_selection._from_model.SelectFromModel.partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/partial_fit/X", + "name": "X", + "qname": "sklearn.feature_selection._from_model.SelectFromModel.partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/partial_fit/y", + "name": "y", + "qname": "sklearn.feature_selection._from_model.SelectFromModel.partial_fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "The target values (integers that correspond to classes in\nclassification, real numbers in regression)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/partial_fit/fit_params", + "name": "fit_params", + "qname": "sklearn.feature_selection._from_model.SelectFromModel.partial_fit.fit_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "Other estimator specific parameters", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Other estimator specific parameters" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the SelectFromModel meta-transformer only once.", + "docstring": "Fit the SelectFromModel meta-transformer only once.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The training input samples.\n\ny : array-like of shape (n_samples,), default=None\n The target values (integers that correspond to classes in\n classification, real numbers in regression).\n\n**fit_params : Other estimator specific parameters\n\nReturns\n-------\nself : object", + "code": " @if_delegate_has_method('estimator')\n def partial_fit(self, X, y=None, **fit_params):\n \"\"\"Fit the SelectFromModel meta-transformer only once.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The training input samples.\n\n y : array-like of shape (n_samples,), default=None\n The target values (integers that correspond to classes in\n classification, real numbers in regression).\n\n **fit_params : Other estimator specific parameters\n\n Returns\n -------\n self : object\n \"\"\"\n if self.prefit:\n raise NotFittedError(\n \"Since 'prefit=True', call transform directly\")\n if not hasattr(self, \"estimator_\"):\n self.estimator_ = clone(self.estimator)\n self.estimator_.partial_fit(X, y, **fit_params)\n return self" + }, + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/threshold_@getter", + "name": "threshold_", + "qname": "sklearn.feature_selection._from_model.SelectFromModel.threshold_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._from_model/SelectFromModel/threshold_/self", + "name": "self", + "qname": "sklearn.feature_selection._from_model.SelectFromModel.threshold_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def threshold_(self):\n scores = _get_feature_importances(estimator=self.estimator_,\n getter=self.importance_getter,\n transform_func='norm',\n norm_order=self.norm_order)\n return _calculate_threshold(self.estimator, scores, self.threshold)" + }, + { + "id": "scikit-learn/sklearn.feature_selection._from_model/_calculate_threshold", + "name": "_calculate_threshold", + "qname": "sklearn.feature_selection._from_model._calculate_threshold", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._from_model/_calculate_threshold/estimator", + "name": "estimator", + "qname": "sklearn.feature_selection._from_model._calculate_threshold.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._from_model/_calculate_threshold/importances", + "name": "importances", + "qname": "sklearn.feature_selection._from_model._calculate_threshold.importances", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._from_model/_calculate_threshold/threshold", + "name": "threshold", + "qname": "sklearn.feature_selection._from_model._calculate_threshold.threshold", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Interpret the threshold value", + "docstring": "Interpret the threshold value", + "code": "def _calculate_threshold(estimator, importances, threshold):\n \"\"\"Interpret the threshold value\"\"\"\n\n if threshold is None:\n # determine default from estimator\n est_name = estimator.__class__.__name__\n if ((hasattr(estimator, \"penalty\") and estimator.penalty == \"l1\") or\n \"Lasso\" in est_name):\n # the natural default threshold is 0 when l1 penalty was used\n threshold = 1e-5\n else:\n threshold = \"mean\"\n\n if isinstance(threshold, str):\n if \"*\" in threshold:\n scale, reference = threshold.split(\"*\")\n scale = float(scale.strip())\n reference = reference.strip()\n\n if reference == \"median\":\n reference = np.median(importances)\n elif reference == \"mean\":\n reference = np.mean(importances)\n else:\n raise ValueError(\"Unknown reference: \" + reference)\n\n threshold = scale * reference\n\n elif threshold == \"median\":\n threshold = np.median(importances)\n\n elif threshold == \"mean\":\n threshold = np.mean(importances)\n\n else:\n raise ValueError(\"Expected threshold='mean' or threshold='median' \"\n \"got %s\" % threshold)\n\n else:\n threshold = float(threshold)\n\n return threshold" + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_compute_mi", + "name": "_compute_mi", + "qname": "sklearn.feature_selection._mutual_info._compute_mi", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_compute_mi/x", + "name": "x", + "qname": "sklearn.feature_selection._mutual_info._compute_mi.x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_compute_mi/y", + "name": "y", + "qname": "sklearn.feature_selection._mutual_info._compute_mi.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_compute_mi/x_discrete", + "name": "x_discrete", + "qname": "sklearn.feature_selection._mutual_info._compute_mi.x_discrete", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_compute_mi/y_discrete", + "name": "y_discrete", + "qname": "sklearn.feature_selection._mutual_info._compute_mi.y_discrete", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_compute_mi/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.feature_selection._mutual_info._compute_mi.n_neighbors", + "default_value": "3", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute mutual information between two variables.\n\nThis is a simple wrapper which selects a proper function to call based on\nwhether `x` and `y` are discrete or not.", + "docstring": "Compute mutual information between two variables.\n\nThis is a simple wrapper which selects a proper function to call based on\nwhether `x` and `y` are discrete or not.", + "code": "def _compute_mi(x, y, x_discrete, y_discrete, n_neighbors=3):\n \"\"\"Compute mutual information between two variables.\n\n This is a simple wrapper which selects a proper function to call based on\n whether `x` and `y` are discrete or not.\n \"\"\"\n if x_discrete and y_discrete:\n return mutual_info_score(x, y)\n elif x_discrete and not y_discrete:\n return _compute_mi_cd(y, x, n_neighbors)\n elif not x_discrete and y_discrete:\n return _compute_mi_cd(x, y, n_neighbors)\n else:\n return _compute_mi_cc(x, y, n_neighbors)" + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_compute_mi_cc", + "name": "_compute_mi_cc", + "qname": "sklearn.feature_selection._mutual_info._compute_mi_cc", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_compute_mi_cc/x", + "name": "x", + "qname": "sklearn.feature_selection._mutual_info._compute_mi_cc.x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (n_samples,)", + "default_value": "", + "description": "Samples of two continuous random variables, must have an identical\nshape." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (n_samples,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_compute_mi_cc/y", + "name": "y", + "qname": "sklearn.feature_selection._mutual_info._compute_mi_cc.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (n_samples,)", + "default_value": "", + "description": "Samples of two continuous random variables, must have an identical\nshape." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (n_samples,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_compute_mi_cc/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.feature_selection._mutual_info._compute_mi_cc.n_neighbors", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of nearest neighbors to search for each point, see [1]_." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute mutual information between two continuous variables.", + "docstring": "Compute mutual information between two continuous variables.\n\nParameters\n----------\nx, y : ndarray, shape (n_samples,)\n Samples of two continuous random variables, must have an identical\n shape.\n\nn_neighbors : int\n Number of nearest neighbors to search for each point, see [1]_.\n\nReturns\n-------\nmi : float\n Estimated mutual information. If it turned out to be negative it is\n replace by 0.\n\nNotes\n-----\nTrue mutual information can't be negative. If its estimate by a numerical\nmethod is negative, it means (providing the method is adequate) that the\nmutual information is close to 0 and replacing it by 0 is a reasonable\nstrategy.\n\nReferences\n----------\n.. [1] A. Kraskov, H. Stogbauer and P. Grassberger, \"Estimating mutual\n information\". Phys. Rev. E 69, 2004.", + "code": "def _compute_mi_cc(x, y, n_neighbors):\n \"\"\"Compute mutual information between two continuous variables.\n\n Parameters\n ----------\n x, y : ndarray, shape (n_samples,)\n Samples of two continuous random variables, must have an identical\n shape.\n\n n_neighbors : int\n Number of nearest neighbors to search for each point, see [1]_.\n\n Returns\n -------\n mi : float\n Estimated mutual information. If it turned out to be negative it is\n replace by 0.\n\n Notes\n -----\n True mutual information can't be negative. If its estimate by a numerical\n method is negative, it means (providing the method is adequate) that the\n mutual information is close to 0 and replacing it by 0 is a reasonable\n strategy.\n\n References\n ----------\n .. [1] A. Kraskov, H. Stogbauer and P. Grassberger, \"Estimating mutual\n information\". Phys. Rev. E 69, 2004.\n \"\"\"\n n_samples = x.size\n\n x = x.reshape((-1, 1))\n y = y.reshape((-1, 1))\n xy = np.hstack((x, y))\n\n # Here we rely on NearestNeighbors to select the fastest algorithm.\n nn = NearestNeighbors(metric='chebyshev', n_neighbors=n_neighbors)\n\n nn.fit(xy)\n radius = nn.kneighbors()[0]\n radius = np.nextafter(radius[:, -1], 0)\n\n # KDTree is explicitly fit to allow for the querying of number of\n # neighbors within a specified radius\n kd = KDTree(x, metric='chebyshev')\n nx = kd.query_radius(x, radius, count_only=True, return_distance=False)\n nx = np.array(nx) - 1.0\n\n kd = KDTree(y, metric='chebyshev')\n ny = kd.query_radius(y, radius, count_only=True, return_distance=False)\n ny = np.array(ny) - 1.0\n\n mi = (digamma(n_samples) + digamma(n_neighbors) -\n np.mean(digamma(nx + 1)) - np.mean(digamma(ny + 1)))\n\n return max(0, mi)" + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_compute_mi_cd", + "name": "_compute_mi_cd", + "qname": "sklearn.feature_selection._mutual_info._compute_mi_cd", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_compute_mi_cd/c", + "name": "c", + "qname": "sklearn.feature_selection._mutual_info._compute_mi_cd.c", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (n_samples,)", + "default_value": "", + "description": "Samples of a continuous random variable." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (n_samples,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_compute_mi_cd/d", + "name": "d", + "qname": "sklearn.feature_selection._mutual_info._compute_mi_cd.d", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (n_samples,)", + "default_value": "", + "description": "Samples of a discrete random variable." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (n_samples,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_compute_mi_cd/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.feature_selection._mutual_info._compute_mi_cd.n_neighbors", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of nearest neighbors to search for each point, see [1]_." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute mutual information between continuous and discrete variables.", + "docstring": "Compute mutual information between continuous and discrete variables.\n\nParameters\n----------\nc : ndarray, shape (n_samples,)\n Samples of a continuous random variable.\n\nd : ndarray, shape (n_samples,)\n Samples of a discrete random variable.\n\nn_neighbors : int\n Number of nearest neighbors to search for each point, see [1]_.\n\nReturns\n-------\nmi : float\n Estimated mutual information. If it turned out to be negative it is\n replace by 0.\n\nNotes\n-----\nTrue mutual information can't be negative. If its estimate by a numerical\nmethod is negative, it means (providing the method is adequate) that the\nmutual information is close to 0 and replacing it by 0 is a reasonable\nstrategy.\n\nReferences\n----------\n.. [1] B. C. Ross \"Mutual Information between Discrete and Continuous\n Data Sets\". PLoS ONE 9(2), 2014.", + "code": "def _compute_mi_cd(c, d, n_neighbors):\n \"\"\"Compute mutual information between continuous and discrete variables.\n\n Parameters\n ----------\n c : ndarray, shape (n_samples,)\n Samples of a continuous random variable.\n\n d : ndarray, shape (n_samples,)\n Samples of a discrete random variable.\n\n n_neighbors : int\n Number of nearest neighbors to search for each point, see [1]_.\n\n Returns\n -------\n mi : float\n Estimated mutual information. If it turned out to be negative it is\n replace by 0.\n\n Notes\n -----\n True mutual information can't be negative. If its estimate by a numerical\n method is negative, it means (providing the method is adequate) that the\n mutual information is close to 0 and replacing it by 0 is a reasonable\n strategy.\n\n References\n ----------\n .. [1] B. C. Ross \"Mutual Information between Discrete and Continuous\n Data Sets\". PLoS ONE 9(2), 2014.\n \"\"\"\n n_samples = c.shape[0]\n c = c.reshape((-1, 1))\n\n radius = np.empty(n_samples)\n label_counts = np.empty(n_samples)\n k_all = np.empty(n_samples)\n nn = NearestNeighbors()\n for label in np.unique(d):\n mask = d == label\n count = np.sum(mask)\n if count > 1:\n k = min(n_neighbors, count - 1)\n nn.set_params(n_neighbors=k)\n nn.fit(c[mask])\n r = nn.kneighbors()[0]\n radius[mask] = np.nextafter(r[:, -1], 0)\n k_all[mask] = k\n label_counts[mask] = count\n\n # Ignore points with unique labels.\n mask = label_counts > 1\n n_samples = np.sum(mask)\n label_counts = label_counts[mask]\n k_all = k_all[mask]\n c = c[mask]\n radius = radius[mask]\n\n kd = KDTree(c)\n m_all = kd.query_radius(c, radius, count_only=True, return_distance=False)\n m_all = np.array(m_all) - 1.0\n\n mi = (digamma(n_samples) + np.mean(digamma(k_all)) -\n np.mean(digamma(label_counts)) -\n np.mean(digamma(m_all + 1)))\n\n return max(0, mi)" + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_estimate_mi", + "name": "_estimate_mi", + "qname": "sklearn.feature_selection._mutual_info._estimate_mi", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_estimate_mi/X", + "name": "X", + "qname": "sklearn.feature_selection._mutual_info._estimate_mi.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like or sparse matrix, shape (n_samples, n_features)", + "default_value": "", + "description": "Feature matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "sparse matrix" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_estimate_mi/y", + "name": "y", + "qname": "sklearn.feature_selection._mutual_info._estimate_mi.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target vector." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_estimate_mi/discrete_features", + "name": "discrete_features", + "qname": "sklearn.feature_selection._mutual_info._estimate_mi.discrete_features", + "default_value": "'auto'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'auto', bool, array-like}", + "default_value": "'auto'", + "description": "If bool, then determines whether to consider all features discrete\nor continuous. If array, then it should be either a boolean mask\nwith shape (n_features,) or array with indices of discrete features.\nIf 'auto', it is assigned to False for dense `X` and to True for\nsparse `X`." + }, + "type": { + "kind": "EnumType", + "values": ["auto"] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_estimate_mi/discrete_target", + "name": "discrete_target", + "qname": "sklearn.feature_selection._mutual_info._estimate_mi.discrete_target", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to consider `y` as a discrete variable." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_estimate_mi/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.feature_selection._mutual_info._estimate_mi.n_neighbors", + "default_value": "3", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "3", + "description": "Number of neighbors to use for MI estimation for continuous variables,\nsee [1]_ and [2]_. Higher values reduce variance of the estimation, but\ncould introduce a bias." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_estimate_mi/copy", + "name": "copy", + "qname": "sklearn.feature_selection._mutual_info._estimate_mi.copy", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to make a copy of the given data. If set to False, the initial\ndata will be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_estimate_mi/random_state", + "name": "random_state", + "qname": "sklearn.feature_selection._mutual_info._estimate_mi.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for adding small noise to\ncontinuous variables in order to remove repeated values.\nPass an int for reproducible results across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate mutual information between the features and the target.", + "docstring": "Estimate mutual information between the features and the target.\n\nParameters\n----------\nX : array-like or sparse matrix, shape (n_samples, n_features)\n Feature matrix.\n\ny : array-like of shape (n_samples,)\n Target vector.\n\ndiscrete_features : {'auto', bool, array-like}, default='auto'\n If bool, then determines whether to consider all features discrete\n or continuous. If array, then it should be either a boolean mask\n with shape (n_features,) or array with indices of discrete features.\n If 'auto', it is assigned to False for dense `X` and to True for\n sparse `X`.\n\ndiscrete_target : bool, default=False\n Whether to consider `y` as a discrete variable.\n\nn_neighbors : int, default=3\n Number of neighbors to use for MI estimation for continuous variables,\n see [1]_ and [2]_. Higher values reduce variance of the estimation, but\n could introduce a bias.\n\ncopy : bool, default=True\n Whether to make a copy of the given data. If set to False, the initial\n data will be overwritten.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for adding small noise to\n continuous variables in order to remove repeated values.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nmi : ndarray, shape (n_features,)\n Estimated mutual information between each feature and the target.\n A negative value will be replaced by 0.\n\nReferences\n----------\n.. [1] A. Kraskov, H. Stogbauer and P. Grassberger, \"Estimating mutual\n information\". Phys. Rev. E 69, 2004.\n.. [2] B. C. Ross \"Mutual Information between Discrete and Continuous\n Data Sets\". PLoS ONE 9(2), 2014.", + "code": "def _estimate_mi(X, y, discrete_features='auto', discrete_target=False,\n n_neighbors=3, copy=True, random_state=None):\n \"\"\"Estimate mutual information between the features and the target.\n\n Parameters\n ----------\n X : array-like or sparse matrix, shape (n_samples, n_features)\n Feature matrix.\n\n y : array-like of shape (n_samples,)\n Target vector.\n\n discrete_features : {'auto', bool, array-like}, default='auto'\n If bool, then determines whether to consider all features discrete\n or continuous. If array, then it should be either a boolean mask\n with shape (n_features,) or array with indices of discrete features.\n If 'auto', it is assigned to False for dense `X` and to True for\n sparse `X`.\n\n discrete_target : bool, default=False\n Whether to consider `y` as a discrete variable.\n\n n_neighbors : int, default=3\n Number of neighbors to use for MI estimation for continuous variables,\n see [1]_ and [2]_. Higher values reduce variance of the estimation, but\n could introduce a bias.\n\n copy : bool, default=True\n Whether to make a copy of the given data. If set to False, the initial\n data will be overwritten.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for adding small noise to\n continuous variables in order to remove repeated values.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n mi : ndarray, shape (n_features,)\n Estimated mutual information between each feature and the target.\n A negative value will be replaced by 0.\n\n References\n ----------\n .. [1] A. Kraskov, H. Stogbauer and P. Grassberger, \"Estimating mutual\n information\". Phys. Rev. E 69, 2004.\n .. [2] B. C. Ross \"Mutual Information between Discrete and Continuous\n Data Sets\". PLoS ONE 9(2), 2014.\n \"\"\"\n X, y = check_X_y(X, y, accept_sparse='csc', y_numeric=not discrete_target)\n n_samples, n_features = X.shape\n\n if isinstance(discrete_features, (str, bool)):\n if isinstance(discrete_features, str):\n if discrete_features == 'auto':\n discrete_features = issparse(X)\n else:\n raise ValueError(\"Invalid string value for discrete_features.\")\n discrete_mask = np.empty(n_features, dtype=bool)\n discrete_mask.fill(discrete_features)\n else:\n discrete_features = check_array(discrete_features, ensure_2d=False)\n if discrete_features.dtype != 'bool':\n discrete_mask = np.zeros(n_features, dtype=bool)\n discrete_mask[discrete_features] = True\n else:\n discrete_mask = discrete_features\n\n continuous_mask = ~discrete_mask\n if np.any(continuous_mask) and issparse(X):\n raise ValueError(\"Sparse matrix `X` can't have continuous features.\")\n\n rng = check_random_state(random_state)\n if np.any(continuous_mask):\n if copy:\n X = X.copy()\n\n if not discrete_target:\n X[:, continuous_mask] = scale(X[:, continuous_mask],\n with_mean=False, copy=False)\n\n # Add small noise to continuous features as advised in Kraskov et. al.\n X = X.astype(float, **_astype_copy_false(X))\n means = np.maximum(1, np.mean(np.abs(X[:, continuous_mask]), axis=0))\n X[:, continuous_mask] += 1e-10 * means * rng.randn(\n n_samples, np.sum(continuous_mask))\n\n if not discrete_target:\n y = scale(y, with_mean=False)\n y += 1e-10 * np.maximum(1, np.mean(np.abs(y))) * rng.randn(n_samples)\n\n mi = [_compute_mi(x, y, discrete_feature, discrete_target, n_neighbors) for\n x, discrete_feature in zip(_iterate_columns(X), discrete_mask)]\n\n return np.array(mi)" + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_iterate_columns", + "name": "_iterate_columns", + "qname": "sklearn.feature_selection._mutual_info._iterate_columns", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_iterate_columns/X", + "name": "X", + "qname": "sklearn.feature_selection._mutual_info._iterate_columns.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray or csc_matrix, shape (n_samples, n_features)", + "default_value": "", + "description": "Matrix over which to iterate." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "csc_matrix" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/_iterate_columns/columns", + "name": "columns", + "qname": "sklearn.feature_selection._mutual_info._iterate_columns.columns", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "iterable or None", + "default_value": "None", + "description": "Indices of columns to iterate over. If None, iterate over all columns." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "iterable" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Iterate over columns of a matrix.", + "docstring": "Iterate over columns of a matrix.\n\nParameters\n----------\nX : ndarray or csc_matrix, shape (n_samples, n_features)\n Matrix over which to iterate.\n\ncolumns : iterable or None, default=None\n Indices of columns to iterate over. If None, iterate over all columns.\n\nYields\n------\nx : ndarray, shape (n_samples,)\n Columns of `X` in dense format.", + "code": "def _iterate_columns(X, columns=None):\n \"\"\"Iterate over columns of a matrix.\n\n Parameters\n ----------\n X : ndarray or csc_matrix, shape (n_samples, n_features)\n Matrix over which to iterate.\n\n columns : iterable or None, default=None\n Indices of columns to iterate over. If None, iterate over all columns.\n\n Yields\n ------\n x : ndarray, shape (n_samples,)\n Columns of `X` in dense format.\n \"\"\"\n if columns is None:\n columns = range(X.shape[1])\n\n if issparse(X):\n for i in columns:\n x = np.zeros(X.shape[0])\n start_ptr, end_ptr = X.indptr[i], X.indptr[i + 1]\n x[X.indices[start_ptr:end_ptr]] = X.data[start_ptr:end_ptr]\n yield x\n else:\n for i in columns:\n yield X[:, i]" + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/mutual_info_classif", + "name": "mutual_info_classif", + "qname": "sklearn.feature_selection._mutual_info.mutual_info_classif", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/mutual_info_classif/X", + "name": "X", + "qname": "sklearn.feature_selection._mutual_info.mutual_info_classif.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like or sparse matrix, shape (n_samples, n_features)", + "default_value": "", + "description": "Feature matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "sparse matrix" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/mutual_info_classif/y", + "name": "y", + "qname": "sklearn.feature_selection._mutual_info.mutual_info_classif.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target vector." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/mutual_info_classif/discrete_features", + "name": "discrete_features", + "qname": "sklearn.feature_selection._mutual_info.mutual_info_classif.discrete_features", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', bool, array-like}", + "default_value": "'auto'", + "description": "If bool, then determines whether to consider all features discrete\nor continuous. If array, then it should be either a boolean mask\nwith shape (n_features,) or array with indices of discrete features.\nIf 'auto', it is assigned to False for dense `X` and to True for\nsparse `X`." + }, + "type": { + "kind": "EnumType", + "values": ["auto"] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/mutual_info_classif/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.feature_selection._mutual_info.mutual_info_classif.n_neighbors", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "3", + "description": "Number of neighbors to use for MI estimation for continuous variables,\nsee [2]_ and [3]_. Higher values reduce variance of the estimation, but\ncould introduce a bias." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/mutual_info_classif/copy", + "name": "copy", + "qname": "sklearn.feature_selection._mutual_info.mutual_info_classif.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to make a copy of the given data. If set to False, the initial\ndata will be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/mutual_info_classif/random_state", + "name": "random_state", + "qname": "sklearn.feature_selection._mutual_info.mutual_info_classif.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for adding small noise to\ncontinuous variables in order to remove repeated values.\nPass an int for reproducible results across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate mutual information for a discrete target variable.\n\nMutual information (MI) [1]_ between two random variables is a non-negative\nvalue, which measures the dependency between the variables. It is equal\nto zero if and only if two random variables are independent, and higher\nvalues mean higher dependency.\n\nThe function relies on nonparametric methods based on entropy estimation\nfrom k-nearest neighbors distances as described in [2]_ and [3]_. Both\nmethods are based on the idea originally proposed in [4]_.\n\nIt can be used for univariate features selection, read more in the\n:ref:`User Guide `.", + "docstring": "Estimate mutual information for a discrete target variable.\n\nMutual information (MI) [1]_ between two random variables is a non-negative\nvalue, which measures the dependency between the variables. It is equal\nto zero if and only if two random variables are independent, and higher\nvalues mean higher dependency.\n\nThe function relies on nonparametric methods based on entropy estimation\nfrom k-nearest neighbors distances as described in [2]_ and [3]_. Both\nmethods are based on the idea originally proposed in [4]_.\n\nIt can be used for univariate features selection, read more in the\n:ref:`User Guide `.\n\nParameters\n----------\nX : array-like or sparse matrix, shape (n_samples, n_features)\n Feature matrix.\n\ny : array-like of shape (n_samples,)\n Target vector.\n\ndiscrete_features : {'auto', bool, array-like}, default='auto'\n If bool, then determines whether to consider all features discrete\n or continuous. If array, then it should be either a boolean mask\n with shape (n_features,) or array with indices of discrete features.\n If 'auto', it is assigned to False for dense `X` and to True for\n sparse `X`.\n\nn_neighbors : int, default=3\n Number of neighbors to use for MI estimation for continuous variables,\n see [2]_ and [3]_. Higher values reduce variance of the estimation, but\n could introduce a bias.\n\ncopy : bool, default=True\n Whether to make a copy of the given data. If set to False, the initial\n data will be overwritten.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for adding small noise to\n continuous variables in order to remove repeated values.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nmi : ndarray, shape (n_features,)\n Estimated mutual information between each feature and the target.\n\nNotes\n-----\n1. The term \"discrete features\" is used instead of naming them\n \"categorical\", because it describes the essence more accurately.\n For example, pixel intensities of an image are discrete features\n (but hardly categorical) and you will get better results if mark them\n as such. Also note, that treating a continuous variable as discrete and\n vice versa will usually give incorrect results, so be attentive about\n that.\n2. True mutual information can't be negative. If its estimate turns out\n to be negative, it is replaced by zero.\n\nReferences\n----------\n.. [1] `Mutual Information\n `_\n on Wikipedia.\n.. [2] A. Kraskov, H. Stogbauer and P. Grassberger, \"Estimating mutual\n information\". Phys. Rev. E 69, 2004.\n.. [3] B. C. Ross \"Mutual Information between Discrete and Continuous\n Data Sets\". PLoS ONE 9(2), 2014.\n.. [4] L. F. Kozachenko, N. N. Leonenko, \"Sample Estimate of the Entropy\n of a Random Vector:, Probl. Peredachi Inf., 23:2 (1987), 9-16", + "code": "@_deprecate_positional_args\ndef mutual_info_classif(X, y, *, discrete_features='auto', n_neighbors=3,\n copy=True, random_state=None):\n \"\"\"Estimate mutual information for a discrete target variable.\n\n Mutual information (MI) [1]_ between two random variables is a non-negative\n value, which measures the dependency between the variables. It is equal\n to zero if and only if two random variables are independent, and higher\n values mean higher dependency.\n\n The function relies on nonparametric methods based on entropy estimation\n from k-nearest neighbors distances as described in [2]_ and [3]_. Both\n methods are based on the idea originally proposed in [4]_.\n\n It can be used for univariate features selection, read more in the\n :ref:`User Guide `.\n\n Parameters\n ----------\n X : array-like or sparse matrix, shape (n_samples, n_features)\n Feature matrix.\n\n y : array-like of shape (n_samples,)\n Target vector.\n\n discrete_features : {'auto', bool, array-like}, default='auto'\n If bool, then determines whether to consider all features discrete\n or continuous. If array, then it should be either a boolean mask\n with shape (n_features,) or array with indices of discrete features.\n If 'auto', it is assigned to False for dense `X` and to True for\n sparse `X`.\n\n n_neighbors : int, default=3\n Number of neighbors to use for MI estimation for continuous variables,\n see [2]_ and [3]_. Higher values reduce variance of the estimation, but\n could introduce a bias.\n\n copy : bool, default=True\n Whether to make a copy of the given data. If set to False, the initial\n data will be overwritten.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for adding small noise to\n continuous variables in order to remove repeated values.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n mi : ndarray, shape (n_features,)\n Estimated mutual information between each feature and the target.\n\n Notes\n -----\n 1. The term \"discrete features\" is used instead of naming them\n \"categorical\", because it describes the essence more accurately.\n For example, pixel intensities of an image are discrete features\n (but hardly categorical) and you will get better results if mark them\n as such. Also note, that treating a continuous variable as discrete and\n vice versa will usually give incorrect results, so be attentive about\n that.\n 2. True mutual information can't be negative. If its estimate turns out\n to be negative, it is replaced by zero.\n\n References\n ----------\n .. [1] `Mutual Information\n `_\n on Wikipedia.\n .. [2] A. Kraskov, H. Stogbauer and P. Grassberger, \"Estimating mutual\n information\". Phys. Rev. E 69, 2004.\n .. [3] B. C. Ross \"Mutual Information between Discrete and Continuous\n Data Sets\". PLoS ONE 9(2), 2014.\n .. [4] L. F. Kozachenko, N. N. Leonenko, \"Sample Estimate of the Entropy\n of a Random Vector:, Probl. Peredachi Inf., 23:2 (1987), 9-16\n \"\"\"\n check_classification_targets(y)\n return _estimate_mi(X, y, discrete_features, True, n_neighbors,\n copy, random_state)" + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/mutual_info_regression", + "name": "mutual_info_regression", + "qname": "sklearn.feature_selection._mutual_info.mutual_info_regression", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/mutual_info_regression/X", + "name": "X", + "qname": "sklearn.feature_selection._mutual_info.mutual_info_regression.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like or sparse matrix, shape (n_samples, n_features)", + "default_value": "", + "description": "Feature matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "sparse matrix" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/mutual_info_regression/y", + "name": "y", + "qname": "sklearn.feature_selection._mutual_info.mutual_info_regression.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target vector." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/mutual_info_regression/discrete_features", + "name": "discrete_features", + "qname": "sklearn.feature_selection._mutual_info.mutual_info_regression.discrete_features", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', bool, array-like}", + "default_value": "'auto'", + "description": "If bool, then determines whether to consider all features discrete\nor continuous. If array, then it should be either a boolean mask\nwith shape (n_features,) or array with indices of discrete features.\nIf 'auto', it is assigned to False for dense `X` and to True for\nsparse `X`." + }, + "type": { + "kind": "EnumType", + "values": ["auto"] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/mutual_info_regression/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.feature_selection._mutual_info.mutual_info_regression.n_neighbors", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "3", + "description": "Number of neighbors to use for MI estimation for continuous variables,\nsee [2]_ and [3]_. Higher values reduce variance of the estimation, but\ncould introduce a bias." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/mutual_info_regression/copy", + "name": "copy", + "qname": "sklearn.feature_selection._mutual_info.mutual_info_regression.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to make a copy of the given data. If set to False, the initial\ndata will be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._mutual_info/mutual_info_regression/random_state", + "name": "random_state", + "qname": "sklearn.feature_selection._mutual_info.mutual_info_regression.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for adding small noise to\ncontinuous variables in order to remove repeated values.\nPass an int for reproducible results across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate mutual information for a continuous target variable.\n\nMutual information (MI) [1]_ between two random variables is a non-negative\nvalue, which measures the dependency between the variables. It is equal\nto zero if and only if two random variables are independent, and higher\nvalues mean higher dependency.\n\nThe function relies on nonparametric methods based on entropy estimation\nfrom k-nearest neighbors distances as described in [2]_ and [3]_. Both\nmethods are based on the idea originally proposed in [4]_.\n\nIt can be used for univariate features selection, read more in the\n:ref:`User Guide `.", + "docstring": "Estimate mutual information for a continuous target variable.\n\nMutual information (MI) [1]_ between two random variables is a non-negative\nvalue, which measures the dependency between the variables. It is equal\nto zero if and only if two random variables are independent, and higher\nvalues mean higher dependency.\n\nThe function relies on nonparametric methods based on entropy estimation\nfrom k-nearest neighbors distances as described in [2]_ and [3]_. Both\nmethods are based on the idea originally proposed in [4]_.\n\nIt can be used for univariate features selection, read more in the\n:ref:`User Guide `.\n\nParameters\n----------\nX : array-like or sparse matrix, shape (n_samples, n_features)\n Feature matrix.\n\ny : array-like of shape (n_samples,)\n Target vector.\n\ndiscrete_features : {'auto', bool, array-like}, default='auto'\n If bool, then determines whether to consider all features discrete\n or continuous. If array, then it should be either a boolean mask\n with shape (n_features,) or array with indices of discrete features.\n If 'auto', it is assigned to False for dense `X` and to True for\n sparse `X`.\n\nn_neighbors : int, default=3\n Number of neighbors to use for MI estimation for continuous variables,\n see [2]_ and [3]_. Higher values reduce variance of the estimation, but\n could introduce a bias.\n\ncopy : bool, default=True\n Whether to make a copy of the given data. If set to False, the initial\n data will be overwritten.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for adding small noise to\n continuous variables in order to remove repeated values.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nmi : ndarray, shape (n_features,)\n Estimated mutual information between each feature and the target.\n\nNotes\n-----\n1. The term \"discrete features\" is used instead of naming them\n \"categorical\", because it describes the essence more accurately.\n For example, pixel intensities of an image are discrete features\n (but hardly categorical) and you will get better results if mark them\n as such. Also note, that treating a continuous variable as discrete and\n vice versa will usually give incorrect results, so be attentive about\n that.\n2. True mutual information can't be negative. If its estimate turns out\n to be negative, it is replaced by zero.\n\nReferences\n----------\n.. [1] `Mutual Information\n `_\n on Wikipedia.\n.. [2] A. Kraskov, H. Stogbauer and P. Grassberger, \"Estimating mutual\n information\". Phys. Rev. E 69, 2004.\n.. [3] B. C. Ross \"Mutual Information between Discrete and Continuous\n Data Sets\". PLoS ONE 9(2), 2014.\n.. [4] L. F. Kozachenko, N. N. Leonenko, \"Sample Estimate of the Entropy\n of a Random Vector\", Probl. Peredachi Inf., 23:2 (1987), 9-16", + "code": "@_deprecate_positional_args\ndef mutual_info_regression(X, y, *, discrete_features='auto', n_neighbors=3,\n copy=True, random_state=None):\n \"\"\"Estimate mutual information for a continuous target variable.\n\n Mutual information (MI) [1]_ between two random variables is a non-negative\n value, which measures the dependency between the variables. It is equal\n to zero if and only if two random variables are independent, and higher\n values mean higher dependency.\n\n The function relies on nonparametric methods based on entropy estimation\n from k-nearest neighbors distances as described in [2]_ and [3]_. Both\n methods are based on the idea originally proposed in [4]_.\n\n It can be used for univariate features selection, read more in the\n :ref:`User Guide `.\n\n Parameters\n ----------\n X : array-like or sparse matrix, shape (n_samples, n_features)\n Feature matrix.\n\n y : array-like of shape (n_samples,)\n Target vector.\n\n discrete_features : {'auto', bool, array-like}, default='auto'\n If bool, then determines whether to consider all features discrete\n or continuous. If array, then it should be either a boolean mask\n with shape (n_features,) or array with indices of discrete features.\n If 'auto', it is assigned to False for dense `X` and to True for\n sparse `X`.\n\n n_neighbors : int, default=3\n Number of neighbors to use for MI estimation for continuous variables,\n see [2]_ and [3]_. Higher values reduce variance of the estimation, but\n could introduce a bias.\n\n copy : bool, default=True\n Whether to make a copy of the given data. If set to False, the initial\n data will be overwritten.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for adding small noise to\n continuous variables in order to remove repeated values.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n mi : ndarray, shape (n_features,)\n Estimated mutual information between each feature and the target.\n\n Notes\n -----\n 1. The term \"discrete features\" is used instead of naming them\n \"categorical\", because it describes the essence more accurately.\n For example, pixel intensities of an image are discrete features\n (but hardly categorical) and you will get better results if mark them\n as such. Also note, that treating a continuous variable as discrete and\n vice versa will usually give incorrect results, so be attentive about\n that.\n 2. True mutual information can't be negative. If its estimate turns out\n to be negative, it is replaced by zero.\n\n References\n ----------\n .. [1] `Mutual Information\n `_\n on Wikipedia.\n .. [2] A. Kraskov, H. Stogbauer and P. Grassberger, \"Estimating mutual\n information\". Phys. Rev. E 69, 2004.\n .. [3] B. C. Ross \"Mutual Information between Discrete and Continuous\n Data Sets\". PLoS ONE 9(2), 2014.\n .. [4] L. F. Kozachenko, N. N. Leonenko, \"Sample Estimate of the Entropy\n of a Random Vector\", Probl. Peredachi Inf., 23:2 (1987), 9-16\n \"\"\"\n return _estimate_mi(X, y, discrete_features, False, n_neighbors,\n copy, random_state)" + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/__init__", + "name": "__init__", + "qname": "sklearn.feature_selection._rfe.RFE.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/__init__/self", + "name": "self", + "qname": "sklearn.feature_selection._rfe.RFE.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/__init__/estimator", + "name": "estimator", + "qname": "sklearn.feature_selection._rfe.RFE.__init__.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "``Estimator`` instance", + "default_value": "", + "description": "A supervised learning estimator with a ``fit`` method that provides\ninformation about feature importance\n(e.g. `coef_`, `feature_importances_`)." + }, + "type": { + "kind": "NamedType", + "name": "``Estimator`` instance" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/__init__/n_features_to_select", + "name": "n_features_to_select", + "qname": "sklearn.feature_selection._rfe.RFE.__init__.n_features_to_select", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "None", + "description": "The number of features to select. If `None`, half of the features are\nselected. If integer, the parameter is the absolute number of features\nto select. If float between 0 and 1, it is the fraction of features to\nselect.\n\n.. versionchanged:: 0.24\n Added float values for fractions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/__init__/step", + "name": "step", + "qname": "sklearn.feature_selection._rfe.RFE.__init__.step", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "1", + "description": "If greater than or equal to 1, then ``step`` corresponds to the\n(integer) number of features to remove at each iteration.\nIf within (0.0, 1.0), then ``step`` corresponds to the percentage\n(rounded down) of features to remove at each iteration." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/__init__/verbose", + "name": "verbose", + "qname": "sklearn.feature_selection._rfe.RFE.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Controls verbosity of output." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/__init__/importance_getter", + "name": "importance_getter", + "qname": "sklearn.feature_selection._rfe.RFE.__init__.importance_getter", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "'auto'", + "description": "If 'auto', uses the feature importance either through a `coef_`\nor `feature_importances_` attributes of estimator.\n\nAlso accepts a string that specifies an attribute name/path\nfor extracting feature importance (implemented with `attrgetter`).\nFor example, give `regressor_.coef_` in case of\n:class:`~sklearn.compose.TransformedTargetRegressor` or\n`named_steps.clf.feature_importances_` in case of\nclass:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\nIf `callable`, overrides the default feature importance getter.\nThe callable is passed with the fitted estimator and it should\nreturn importance for each feature.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Feature ranking with recursive feature elimination.\n\nGiven an external estimator that assigns weights to features (e.g., the\ncoefficients of a linear model), the goal of recursive feature elimination\n(RFE) is to select features by recursively considering smaller and smaller\nsets of features. First, the estimator is trained on the initial set of\nfeatures and the importance of each feature is obtained either through\nany specific attribute or callable.\nThen, the least important features are pruned from current set of features.\nThat procedure is recursively repeated on the pruned set until the desired\nnumber of features to select is eventually reached.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, estimator, *, n_features_to_select=None, step=1,\n verbose=0, importance_getter='auto'):\n self.estimator = estimator\n self.n_features_to_select = n_features_to_select\n self.step = step\n self.importance_getter = importance_getter\n self.verbose = verbose" + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/_estimator_type@getter", + "name": "_estimator_type", + "qname": "sklearn.feature_selection._rfe.RFE._estimator_type", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/_estimator_type/self", + "name": "self", + "qname": "sklearn.feature_selection._rfe.RFE._estimator_type.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def _estimator_type(self):\n return self.estimator._estimator_type" + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/_fit", + "name": "_fit", + "qname": "sklearn.feature_selection._rfe.RFE._fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/_fit/self", + "name": "self", + "qname": "sklearn.feature_selection._rfe.RFE._fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/_fit/X", + "name": "X", + "qname": "sklearn.feature_selection._rfe.RFE._fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/_fit/y", + "name": "y", + "qname": "sklearn.feature_selection._rfe.RFE._fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/_fit/step_score", + "name": "step_score", + "qname": "sklearn.feature_selection._rfe.RFE._fit.step_score", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _fit(self, X, y, step_score=None):\n # Parameter step_score controls the calculation of self.scores_\n # step_score is not exposed to users\n # and is used when implementing RFECV\n # self.scores_ will not be calculated when calling _fit through fit\n\n tags = self._get_tags()\n X, y = self._validate_data(\n X, y, accept_sparse=\"csc\",\n ensure_min_features=2,\n force_all_finite=not tags.get(\"allow_nan\", True),\n multi_output=True\n )\n error_msg = (\"n_features_to_select must be either None, a \"\n \"positive integer representing the absolute \"\n \"number of features or a float in (0.0, 1.0] \"\n \"representing a percentage of features to \"\n f\"select. Got {self.n_features_to_select}\")\n\n # Initialization\n n_features = X.shape[1]\n if self.n_features_to_select is None:\n n_features_to_select = n_features // 2\n elif self.n_features_to_select < 0:\n raise ValueError(error_msg)\n elif isinstance(self.n_features_to_select, numbers.Integral): # int\n n_features_to_select = self.n_features_to_select\n elif self.n_features_to_select > 1.0: # float > 1\n raise ValueError(error_msg)\n else: # float\n n_features_to_select = int(n_features * self.n_features_to_select)\n\n if 0.0 < self.step < 1.0:\n step = int(max(1, self.step * n_features))\n else:\n step = int(self.step)\n if step <= 0:\n raise ValueError(\"Step must be >0\")\n\n support_ = np.ones(n_features, dtype=bool)\n ranking_ = np.ones(n_features, dtype=int)\n\n if step_score:\n self.scores_ = []\n\n # Elimination\n while np.sum(support_) > n_features_to_select:\n # Remaining features\n features = np.arange(n_features)[support_]\n\n # Rank the remaining features\n estimator = clone(self.estimator)\n if self.verbose > 0:\n print(\"Fitting estimator with %d features.\" % np.sum(support_))\n\n estimator.fit(X[:, features], y)\n\n # Get importance and rank them\n importances = _get_feature_importances(\n estimator, self.importance_getter, transform_func=\"square\",\n )\n ranks = np.argsort(importances)\n\n # for sparse case ranks is matrix\n ranks = np.ravel(ranks)\n\n # Eliminate the worse features\n threshold = min(step, np.sum(support_) - n_features_to_select)\n\n # Compute step score on the previous selection iteration\n # because 'estimator' must use features\n # that have not been eliminated yet\n if step_score:\n self.scores_.append(step_score(estimator, features))\n support_[features[ranks][:threshold]] = False\n ranking_[np.logical_not(support_)] += 1\n\n # Set final attributes\n features = np.arange(n_features)[support_]\n self.estimator_ = clone(self.estimator)\n self.estimator_.fit(X[:, features], y)\n\n # Compute step score when only n_features_to_select features left\n if step_score:\n self.scores_.append(step_score(self.estimator_, features))\n self.n_features_ = support_.sum()\n self.support_ = support_\n self.ranking_ = ranking_\n\n return self" + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/_get_support_mask", + "name": "_get_support_mask", + "qname": "sklearn.feature_selection._rfe.RFE._get_support_mask", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/_get_support_mask/self", + "name": "self", + "qname": "sklearn.feature_selection._rfe.RFE._get_support_mask.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_support_mask(self):\n check_is_fitted(self)\n return self.support_" + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/_more_tags", + "name": "_more_tags", + "qname": "sklearn.feature_selection._rfe.RFE._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/_more_tags/self", + "name": "self", + "qname": "sklearn.feature_selection._rfe.RFE._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n 'poor_score': True,\n 'allow_nan': _safe_tags(self.estimator, key='allow_nan'),\n 'requires_y': True,\n }" + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/classes_@getter", + "name": "classes_", + "qname": "sklearn.feature_selection._rfe.RFE.classes_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/classes_/self", + "name": "self", + "qname": "sklearn.feature_selection._rfe.RFE.classes_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def classes_(self):\n return self.estimator_.classes_" + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/decision_function", + "name": "decision_function", + "qname": "sklearn.feature_selection._rfe.RFE.decision_function", + "decorators": ["if_delegate_has_method(delegate='estimator')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/decision_function/self", + "name": "self", + "qname": "sklearn.feature_selection._rfe.RFE.decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/decision_function/X", + "name": "X", + "qname": "sklearn.feature_selection._rfe.RFE.decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like or sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, it will be converted to\n``dtype=np.float32`` and if a sparse matrix is provided\nto a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the decision function of ``X``.", + "docstring": "Compute the decision function of ``X``.\n\nParameters\n----------\nX : {array-like or sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\nscore : array, shape = [n_samples, n_classes] or [n_samples]\n The decision function of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n Regression and binary classification produce an array of shape\n [n_samples].", + "code": " @if_delegate_has_method(delegate='estimator')\n def decision_function(self, X):\n \"\"\"Compute the decision function of ``X``.\n\n Parameters\n ----------\n X : {array-like or sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n score : array, shape = [n_samples, n_classes] or [n_samples]\n The decision function of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n Regression and binary classification produce an array of shape\n [n_samples].\n \"\"\"\n check_is_fitted(self)\n return self.estimator_.decision_function(self.transform(X))" + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/fit", + "name": "fit", + "qname": "sklearn.feature_selection._rfe.RFE.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/fit/self", + "name": "self", + "qname": "sklearn.feature_selection._rfe.RFE.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/fit/X", + "name": "X", + "qname": "sklearn.feature_selection._rfe.RFE.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/fit/y", + "name": "y", + "qname": "sklearn.feature_selection._rfe.RFE.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the RFE model and then the underlying estimator on the selected\n features.", + "docstring": "Fit the RFE model and then the underlying estimator on the selected\n features.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples.\n\ny : array-like of shape (n_samples,)\n The target values.", + "code": " def fit(self, X, y):\n \"\"\"Fit the RFE model and then the underlying estimator on the selected\n features.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples.\n\n y : array-like of shape (n_samples,)\n The target values.\n \"\"\"\n return self._fit(X, y)" + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/predict", + "name": "predict", + "qname": "sklearn.feature_selection._rfe.RFE.predict", + "decorators": ["if_delegate_has_method(delegate='estimator')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/predict/self", + "name": "self", + "qname": "sklearn.feature_selection._rfe.RFE.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/predict/X", + "name": "X", + "qname": "sklearn.feature_selection._rfe.RFE.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape [n_samples, n_features]", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array of shape [n_samples" + }, + { + "kind": "NamedType", + "name": "n_features]" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Reduce X to the selected features and then predict using the\n underlying estimator.", + "docstring": "Reduce X to the selected features and then predict using the\n underlying estimator.\n\nParameters\n----------\nX : array of shape [n_samples, n_features]\n The input samples.\n\nReturns\n-------\ny : array of shape [n_samples]\n The predicted target values.", + "code": " @if_delegate_has_method(delegate='estimator')\n def predict(self, X):\n \"\"\"Reduce X to the selected features and then predict using the\n underlying estimator.\n\n Parameters\n ----------\n X : array of shape [n_samples, n_features]\n The input samples.\n\n Returns\n -------\n y : array of shape [n_samples]\n The predicted target values.\n \"\"\"\n check_is_fitted(self)\n return self.estimator_.predict(self.transform(X))" + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/predict_log_proba", + "name": "predict_log_proba", + "qname": "sklearn.feature_selection._rfe.RFE.predict_log_proba", + "decorators": ["if_delegate_has_method(delegate='estimator')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/predict_log_proba/self", + "name": "self", + "qname": "sklearn.feature_selection._rfe.RFE.predict_log_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/predict_log_proba/X", + "name": "X", + "qname": "sklearn.feature_selection._rfe.RFE.predict_log_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape [n_samples, n_features]", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array of shape [n_samples" + }, + { + "kind": "NamedType", + "name": "n_features]" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class log-probabilities for X.", + "docstring": "Predict class log-probabilities for X.\n\nParameters\n----------\nX : array of shape [n_samples, n_features]\n The input samples.\n\nReturns\n-------\np : array of shape (n_samples, n_classes)\n The class log-probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.", + "code": " @if_delegate_has_method(delegate='estimator')\n def predict_log_proba(self, X):\n \"\"\"Predict class log-probabilities for X.\n\n Parameters\n ----------\n X : array of shape [n_samples, n_features]\n The input samples.\n\n Returns\n -------\n p : array of shape (n_samples, n_classes)\n The class log-probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n \"\"\"\n check_is_fitted(self)\n return self.estimator_.predict_log_proba(self.transform(X))" + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/predict_proba", + "name": "predict_proba", + "qname": "sklearn.feature_selection._rfe.RFE.predict_proba", + "decorators": ["if_delegate_has_method(delegate='estimator')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/predict_proba/self", + "name": "self", + "qname": "sklearn.feature_selection._rfe.RFE.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/predict_proba/X", + "name": "X", + "qname": "sklearn.feature_selection._rfe.RFE.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like or sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, it will be converted to\n``dtype=np.float32`` and if a sparse matrix is provided\nto a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class probabilities for X.", + "docstring": "Predict class probabilities for X.\n\nParameters\n----------\nX : {array-like or sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\np : array of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.", + "code": " @if_delegate_has_method(delegate='estimator')\n def predict_proba(self, X):\n \"\"\"Predict class probabilities for X.\n\n Parameters\n ----------\n X : {array-like or sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n p : array of shape (n_samples, n_classes)\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n \"\"\"\n check_is_fitted(self)\n return self.estimator_.predict_proba(self.transform(X))" + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/score", + "name": "score", + "qname": "sklearn.feature_selection._rfe.RFE.score", + "decorators": ["if_delegate_has_method(delegate='estimator')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/score/self", + "name": "self", + "qname": "sklearn.feature_selection._rfe.RFE.score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/score/X", + "name": "X", + "qname": "sklearn.feature_selection._rfe.RFE.score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape [n_samples, n_features]", + "default_value": "", + "description": "The input samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array of shape [n_samples" + }, + { + "kind": "NamedType", + "name": "n_features]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFE/score/y", + "name": "y", + "qname": "sklearn.feature_selection._rfe.RFE.score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape [n_samples]", + "default_value": "", + "description": "The target values." + }, + "type": { + "kind": "NamedType", + "name": "array of shape [n_samples]" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Reduce X to the selected features and then return the score of the\n underlying estimator.", + "docstring": "Reduce X to the selected features and then return the score of the\n underlying estimator.\n\nParameters\n----------\nX : array of shape [n_samples, n_features]\n The input samples.\n\ny : array of shape [n_samples]\n The target values.", + "code": " @if_delegate_has_method(delegate='estimator')\n def score(self, X, y):\n \"\"\"Reduce X to the selected features and then return the score of the\n underlying estimator.\n\n Parameters\n ----------\n X : array of shape [n_samples, n_features]\n The input samples.\n\n y : array of shape [n_samples]\n The target values.\n \"\"\"\n check_is_fitted(self)\n return self.estimator_.score(self.transform(X), y)" + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFECV/__init__", + "name": "__init__", + "qname": "sklearn.feature_selection._rfe.RFECV.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFECV/__init__/self", + "name": "self", + "qname": "sklearn.feature_selection._rfe.RFECV.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFECV/__init__/estimator", + "name": "estimator", + "qname": "sklearn.feature_selection._rfe.RFECV.__init__.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "``Estimator`` instance", + "default_value": "", + "description": "A supervised learning estimator with a ``fit`` method that provides\ninformation about feature importance either through a ``coef_``\nattribute or through a ``feature_importances_`` attribute." + }, + "type": { + "kind": "NamedType", + "name": "``Estimator`` instance" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFECV/__init__/step", + "name": "step", + "qname": "sklearn.feature_selection._rfe.RFECV.__init__.step", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "1", + "description": "If greater than or equal to 1, then ``step`` corresponds to the\n(integer) number of features to remove at each iteration.\nIf within (0.0, 1.0), then ``step`` corresponds to the percentage\n(rounded down) of features to remove at each iteration.\nNote that the last iteration may remove fewer than ``step`` features in\norder to reach ``min_features_to_select``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFECV/__init__/min_features_to_select", + "name": "min_features_to_select", + "qname": "sklearn.feature_selection._rfe.RFECV.__init__.min_features_to_select", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "The minimum number of features to be selected. This number of features\nwill always be scored, even if the difference between the original\nfeature count and ``min_features_to_select`` isn't divisible by\n``step``.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFECV/__init__/cv", + "name": "cv", + "qname": "sklearn.feature_selection._rfe.RFECV.__init__.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, cross-validation generator or an iterable", + "default_value": "None", + "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if ``y`` is binary or multiclass,\n:class:`~sklearn.model_selection.StratifiedKFold` is used. If the\nestimator is a classifier or if ``y`` is neither binary nor multiclass,\n:class:`~sklearn.model_selection.KFold` is used.\n\nRefer :ref:`User Guide ` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n ``cv`` default value of None changed from 3-fold to 5-fold." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "an iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFECV/__init__/scoring", + "name": "scoring", + "qname": "sklearn.feature_selection._rfe.RFECV.__init__.scoring", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "string, callable or None", + "default_value": "None", + "description": "A string (see model evaluation documentation) or\na scorer callable object / function with signature\n``scorer(estimator, X, y)``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "string" + }, + { + "kind": "NamedType", + "name": "callable" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFECV/__init__/verbose", + "name": "verbose", + "qname": "sklearn.feature_selection._rfe.RFECV.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Controls verbosity of output." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFECV/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.feature_selection._rfe.RFECV.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or None", + "default_value": "None", + "description": "Number of cores to run in parallel while fitting across folds.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFECV/__init__/importance_getter", + "name": "importance_getter", + "qname": "sklearn.feature_selection._rfe.RFECV.__init__.importance_getter", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "'auto'", + "description": "If 'auto', uses the feature importance either through a `coef_`\nor `feature_importances_` attributes of estimator.\n\nAlso accepts a string that specifies an attribute name/path\nfor extracting feature importance.\nFor example, give `regressor_.coef_` in case of\n:class:`~sklearn.compose.TransformedTargetRegressor` or\n`named_steps.clf.feature_importances_` in case of\n:class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.\n\nIf `callable`, overrides the default feature importance getter.\nThe callable is passed with the fitted estimator and it should\nreturn importance for each feature.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Feature ranking with recursive feature elimination and cross-validated\nselection of the best number of features.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, estimator, *, step=1, min_features_to_select=1,\n cv=None, scoring=None, verbose=0, n_jobs=None,\n importance_getter='auto'):\n self.estimator = estimator\n self.step = step\n self.importance_getter = importance_getter\n self.cv = cv\n self.scoring = scoring\n self.verbose = verbose\n self.n_jobs = n_jobs\n self.min_features_to_select = min_features_to_select" + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFECV/fit", + "name": "fit", + "qname": "sklearn.feature_selection._rfe.RFECV.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFECV/fit/self", + "name": "self", + "qname": "sklearn.feature_selection._rfe.RFECV.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFECV/fit/X", + "name": "X", + "qname": "sklearn.feature_selection._rfe.RFECV.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where `n_samples` is the number of samples and\n`n_features` is the total number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFECV/fit/y", + "name": "y", + "qname": "sklearn.feature_selection._rfe.RFECV.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values (integers for classification, real numbers for\nregression)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/RFECV/fit/groups", + "name": "groups", + "qname": "sklearn.feature_selection._rfe.RFECV.fit.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or None", + "default_value": "None", + "description": "Group labels for the samples used while splitting the dataset into\ntrain/test set. Only used in conjunction with a \"Group\" :term:`cv`\ninstance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the RFE model and automatically tune the number of selected\n features.", + "docstring": "Fit the RFE model and automatically tune the number of selected\n features.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where `n_samples` is the number of samples and\n `n_features` is the total number of features.\n\ny : array-like of shape (n_samples,)\n Target values (integers for classification, real numbers for\n regression).\n\ngroups : array-like of shape (n_samples,) or None, default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n .. versionadded:: 0.20", + "code": " def fit(self, X, y, groups=None):\n \"\"\"Fit the RFE model and automatically tune the number of selected\n features.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where `n_samples` is the number of samples and\n `n_features` is the total number of features.\n\n y : array-like of shape (n_samples,)\n Target values (integers for classification, real numbers for\n regression).\n\n groups : array-like of shape (n_samples,) or None, default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n .. versionadded:: 0.20\n \"\"\"\n tags = self._get_tags()\n X, y = self._validate_data(\n X, y, accept_sparse=\"csr\", ensure_min_features=2,\n force_all_finite=not tags.get('allow_nan', True),\n multi_output=True\n )\n\n # Initialization\n cv = check_cv(self.cv, y, classifier=is_classifier(self.estimator))\n scorer = check_scoring(self.estimator, scoring=self.scoring)\n n_features = X.shape[1]\n\n if 0.0 < self.step < 1.0:\n step = int(max(1, self.step * n_features))\n else:\n step = int(self.step)\n if step <= 0:\n raise ValueError(\"Step must be >0\")\n\n # Build an RFE object, which will evaluate and score each possible\n # feature count, down to self.min_features_to_select\n rfe = RFE(estimator=self.estimator,\n n_features_to_select=self.min_features_to_select,\n importance_getter=self.importance_getter,\n step=self.step, verbose=self.verbose)\n\n # Determine the number of subsets of features by fitting across\n # the train folds and choosing the \"features_to_select\" parameter\n # that gives the least averaged error across all folds.\n\n # Note that joblib raises a non-picklable error for bound methods\n # even if n_jobs is set to 1 with the default multiprocessing\n # backend.\n # This branching is done so that to\n # make sure that user code that sets n_jobs to 1\n # and provides bound methods as scorers is not broken with the\n # addition of n_jobs parameter in version 0.18.\n\n if effective_n_jobs(self.n_jobs) == 1:\n parallel, func = list, _rfe_single_fit\n else:\n parallel = Parallel(n_jobs=self.n_jobs)\n func = delayed(_rfe_single_fit)\n\n scores = parallel(\n func(rfe, self.estimator, X, y, train, test, scorer)\n for train, test in cv.split(X, y, groups))\n\n scores = np.sum(scores, axis=0)\n scores_rev = scores[::-1]\n argmax_idx = len(scores) - np.argmax(scores_rev) - 1\n n_features_to_select = max(\n n_features - (argmax_idx * step),\n self.min_features_to_select)\n\n # Re-execute an elimination with best_k over the whole set\n rfe = RFE(estimator=self.estimator,\n n_features_to_select=n_features_to_select, step=self.step,\n importance_getter=self.importance_getter,\n verbose=self.verbose)\n\n rfe.fit(X, y)\n\n # Set final attributes\n self.support_ = rfe.support_\n self.n_features_ = rfe.n_features_\n self.ranking_ = rfe.ranking_\n self.estimator_ = clone(self.estimator)\n self.estimator_.fit(self.transform(X), y)\n\n # Fixing a normalization error, n is equal to get_n_splits(X, y) - 1\n # here, the scores are normalized by get_n_splits(X, y)\n self.grid_scores_ = scores[::-1] / cv.get_n_splits(X, y, groups)\n return self" + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/_rfe_single_fit", + "name": "_rfe_single_fit", + "qname": "sklearn.feature_selection._rfe._rfe_single_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._rfe/_rfe_single_fit/rfe", + "name": "rfe", + "qname": "sklearn.feature_selection._rfe._rfe_single_fit.rfe", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/_rfe_single_fit/estimator", + "name": "estimator", + "qname": "sklearn.feature_selection._rfe._rfe_single_fit.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/_rfe_single_fit/X", + "name": "X", + "qname": "sklearn.feature_selection._rfe._rfe_single_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/_rfe_single_fit/y", + "name": "y", + "qname": "sklearn.feature_selection._rfe._rfe_single_fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/_rfe_single_fit/train", + "name": "train", + "qname": "sklearn.feature_selection._rfe._rfe_single_fit.train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/_rfe_single_fit/test", + "name": "test", + "qname": "sklearn.feature_selection._rfe._rfe_single_fit.test", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._rfe/_rfe_single_fit/scorer", + "name": "scorer", + "qname": "sklearn.feature_selection._rfe._rfe_single_fit.scorer", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the score for a fit across one fold.", + "docstring": "Return the score for a fit across one fold.", + "code": "def _rfe_single_fit(rfe, estimator, X, y, train, test, scorer):\n \"\"\"\n Return the score for a fit across one fold.\n \"\"\"\n X_train, y_train = _safe_split(estimator, X, y, train)\n X_test, y_test = _safe_split(estimator, X, y, test, train)\n return rfe._fit(\n X_train, y_train,\n lambda estimator, features: _score(\n estimator, X_test[:, features], y_test, scorer\n )).scores_" + }, + { + "id": "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/__init__", + "name": "__init__", + "qname": "sklearn.feature_selection._sequential.SequentialFeatureSelector.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/__init__/self", + "name": "self", + "qname": "sklearn.feature_selection._sequential.SequentialFeatureSelector.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/__init__/estimator", + "name": "estimator", + "qname": "sklearn.feature_selection._sequential.SequentialFeatureSelector.__init__.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator instance", + "default_value": "", + "description": "An unfitted estimator." + }, + "type": { + "kind": "NamedType", + "name": "estimator instance" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/__init__/n_features_to_select", + "name": "n_features_to_select", + "qname": "sklearn.feature_selection._sequential.SequentialFeatureSelector.__init__.n_features_to_select", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "None", + "description": "The number of features to select. If `None`, half of the features are\nselected. If integer, the parameter is the absolute number of features\nto select. If float between 0 and 1, it is the fraction of features to\nselect." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/__init__/direction", + "name": "direction", + "qname": "sklearn.feature_selection._sequential.SequentialFeatureSelector.__init__.direction", + "default_value": "'forward'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'forward', 'backward'}", + "default_value": "'forward'", + "description": "Whether to perform forward selection or backward selection." + }, + "type": { + "kind": "EnumType", + "values": ["backward", "forward"] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/__init__/scoring", + "name": "scoring", + "qname": "sklearn.feature_selection._sequential.SequentialFeatureSelector.__init__.scoring", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str, callable, list/tuple or dict", + "default_value": "None", + "description": "A single str (see :ref:`scoring_parameter`) or a callable\n(see :ref:`scoring`) to evaluate the predictions on the test set.\n\nNOTE that when using custom scorers, each scorer should return a single\nvalue. Metric functions returning a list/array of values can be wrapped\ninto multiple scorers that return one value each.\n\nIf None, the estimator's score method is used." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + }, + { + "kind": "NamedType", + "name": "list/tuple" + }, + { + "kind": "NamedType", + "name": "dict" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/__init__/cv", + "name": "cv", + "qname": "sklearn.feature_selection._sequential.SequentialFeatureSelector.__init__.cv", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, cross-validation generator or an iterable", + "default_value": "None", + "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross validation,\n- integer, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide ` for the various\ncross-validation strategies that can be used here." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "an iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.feature_selection._sequential.SequentialFeatureSelector.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of jobs to run in parallel. When evaluating a new feature to\nadd or remove, the cross-validation procedure is parallel over the\nfolds.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transformer that performs Sequential Feature Selection.\n\nThis Sequential Feature Selector adds (forward selection) or\nremoves (backward selection) features to form a feature subset in a\ngreedy fashion. At each stage, this estimator chooses the best feature to\nadd or remove based on the cross-validation score of an estimator.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.24", + "docstring": "", + "code": " def __init__(self, estimator, *, n_features_to_select=None,\n direction='forward', scoring=None, cv=5, n_jobs=None):\n\n self.estimator = estimator\n self.n_features_to_select = n_features_to_select\n self.direction = direction\n self.scoring = scoring\n self.cv = cv\n self.n_jobs = n_jobs" + }, + { + "id": "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/_get_best_new_feature", + "name": "_get_best_new_feature", + "qname": "sklearn.feature_selection._sequential.SequentialFeatureSelector._get_best_new_feature", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/_get_best_new_feature/self", + "name": "self", + "qname": "sklearn.feature_selection._sequential.SequentialFeatureSelector._get_best_new_feature.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/_get_best_new_feature/estimator", + "name": "estimator", + "qname": "sklearn.feature_selection._sequential.SequentialFeatureSelector._get_best_new_feature.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/_get_best_new_feature/X", + "name": "X", + "qname": "sklearn.feature_selection._sequential.SequentialFeatureSelector._get_best_new_feature.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/_get_best_new_feature/y", + "name": "y", + "qname": "sklearn.feature_selection._sequential.SequentialFeatureSelector._get_best_new_feature.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/_get_best_new_feature/current_mask", + "name": "current_mask", + "qname": "sklearn.feature_selection._sequential.SequentialFeatureSelector._get_best_new_feature.current_mask", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_best_new_feature(self, estimator, X, y, current_mask):\n # Return the best new feature to add to the current_mask, i.e. return\n # the best new feature to add (resp. remove) when doing forward\n # selection (resp. backward selection)\n candidate_feature_indices = np.flatnonzero(~current_mask)\n scores = {}\n for feature_idx in candidate_feature_indices:\n candidate_mask = current_mask.copy()\n candidate_mask[feature_idx] = True\n if self.direction == 'backward':\n candidate_mask = ~candidate_mask\n X_new = X[:, candidate_mask]\n scores[feature_idx] = cross_val_score(\n estimator, X_new, y, cv=self.cv, scoring=self.scoring,\n n_jobs=self.n_jobs).mean()\n return max(scores, key=lambda feature_idx: scores[feature_idx])" + }, + { + "id": "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/_get_support_mask", + "name": "_get_support_mask", + "qname": "sklearn.feature_selection._sequential.SequentialFeatureSelector._get_support_mask", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/_get_support_mask/self", + "name": "self", + "qname": "sklearn.feature_selection._sequential.SequentialFeatureSelector._get_support_mask.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_support_mask(self):\n check_is_fitted(self)\n return self.support_" + }, + { + "id": "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/_more_tags", + "name": "_more_tags", + "qname": "sklearn.feature_selection._sequential.SequentialFeatureSelector._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/_more_tags/self", + "name": "self", + "qname": "sklearn.feature_selection._sequential.SequentialFeatureSelector._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n 'allow_nan': _safe_tags(self.estimator, key=\"allow_nan\"),\n 'requires_y': True,\n }" + }, + { + "id": "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/fit", + "name": "fit", + "qname": "sklearn.feature_selection._sequential.SequentialFeatureSelector.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/fit/self", + "name": "self", + "qname": "sklearn.feature_selection._sequential.SequentialFeatureSelector.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/fit/X", + "name": "X", + "qname": "sklearn.feature_selection._sequential.SequentialFeatureSelector.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vectors." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._sequential/SequentialFeatureSelector/fit/y", + "name": "y", + "qname": "sklearn.feature_selection._sequential.SequentialFeatureSelector.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Learn the features to select.", + "docstring": "Learn the features to select.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vectors.\ny : array-like of shape (n_samples,)\n Target values.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y):\n \"\"\"Learn the features to select.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vectors.\n y : array-like of shape (n_samples,)\n Target values.\n\n Returns\n -------\n self : object\n \"\"\"\n tags = self._get_tags()\n X, y = self._validate_data(\n X, y, accept_sparse=\"csc\",\n ensure_min_features=2,\n force_all_finite=not tags.get(\"allow_nan\", True),\n multi_output=True\n )\n n_features = X.shape[1]\n\n error_msg = (\"n_features_to_select must be either None, an \"\n \"integer in [1, n_features - 1] \"\n \"representing the absolute \"\n \"number of features, or a float in (0, 1] \"\n \"representing a percentage of features to \"\n f\"select. Got {self.n_features_to_select}\")\n if self.n_features_to_select is None:\n self.n_features_to_select_ = n_features // 2\n elif isinstance(self.n_features_to_select, numbers.Integral):\n if not 0 < self.n_features_to_select < n_features:\n raise ValueError(error_msg)\n self.n_features_to_select_ = self.n_features_to_select\n elif isinstance(self.n_features_to_select, numbers.Real):\n if not 0 < self.n_features_to_select <= 1:\n raise ValueError(error_msg)\n self.n_features_to_select_ = int(n_features *\n self.n_features_to_select)\n else:\n raise ValueError(error_msg)\n\n if self.direction not in ('forward', 'backward'):\n raise ValueError(\n \"direction must be either 'forward' or 'backward'. \"\n f\"Got {self.direction}.\"\n )\n\n cloned_estimator = clone(self.estimator)\n\n # the current mask corresponds to the set of features:\n # - that we have already *selected* if we do forward selection\n # - that we have already *excluded* if we do backward selection\n current_mask = np.zeros(shape=n_features, dtype=bool)\n n_iterations = (\n self.n_features_to_select_ if self.direction == 'forward'\n else n_features - self.n_features_to_select_\n )\n for _ in range(n_iterations):\n new_feature_idx = self._get_best_new_feature(cloned_estimator, X,\n y, current_mask)\n current_mask[new_feature_idx] = True\n\n if self.direction == 'backward':\n current_mask = ~current_mask\n self.support_ = current_mask\n\n return self" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/GenericUnivariateSelect/__init__", + "name": "__init__", + "qname": "sklearn.feature_selection._univariate_selection.GenericUnivariateSelect.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/GenericUnivariateSelect/__init__/self", + "name": "self", + "qname": "sklearn.feature_selection._univariate_selection.GenericUnivariateSelect.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/GenericUnivariateSelect/__init__/score_func", + "name": "score_func", + "qname": "sklearn.feature_selection._univariate_selection.GenericUnivariateSelect.__init__.score_func", + "default_value": "f_classif", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "f_classif", + "description": "Function taking two arrays X and y, and returning a pair of arrays\n(scores, pvalues). For modes 'percentile' or 'kbest' it can return\na single array scores." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/GenericUnivariateSelect/__init__/mode", + "name": "mode", + "qname": "sklearn.feature_selection._univariate_selection.GenericUnivariateSelect.__init__.mode", + "default_value": "'percentile'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'percentile', 'k_best', 'fpr', 'fdr', 'fwe'}", + "default_value": "'percentile'", + "description": "Feature selection mode." + }, + "type": { + "kind": "EnumType", + "values": ["fdr", "fpr", "percentile", "k_best", "fwe"] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/GenericUnivariateSelect/__init__/param", + "name": "param", + "qname": "sklearn.feature_selection._univariate_selection.GenericUnivariateSelect.__init__.param", + "default_value": "1e-05", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float or int depending on the feature selection mode", + "default_value": "1e-5", + "description": "Parameter of the corresponding mode." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "int depending on the feature selection mode" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Univariate feature selector with configurable strategy.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, score_func=f_classif, *, mode='percentile', param=1e-5):\n super().__init__(score_func=score_func)\n self.mode = mode\n self.param = param" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/GenericUnivariateSelect/_check_params", + "name": "_check_params", + "qname": "sklearn.feature_selection._univariate_selection.GenericUnivariateSelect._check_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/GenericUnivariateSelect/_check_params/self", + "name": "self", + "qname": "sklearn.feature_selection._univariate_selection.GenericUnivariateSelect._check_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/GenericUnivariateSelect/_check_params/X", + "name": "X", + "qname": "sklearn.feature_selection._univariate_selection.GenericUnivariateSelect._check_params.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/GenericUnivariateSelect/_check_params/y", + "name": "y", + "qname": "sklearn.feature_selection._univariate_selection.GenericUnivariateSelect._check_params.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_params(self, X, y):\n if self.mode not in self._selection_modes:\n raise ValueError(\"The mode passed should be one of %s, %r,\"\n \" (type %s) was passed.\"\n % (self._selection_modes.keys(), self.mode,\n type(self.mode)))\n\n self._make_selector()._check_params(X, y)" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/GenericUnivariateSelect/_get_support_mask", + "name": "_get_support_mask", + "qname": "sklearn.feature_selection._univariate_selection.GenericUnivariateSelect._get_support_mask", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/GenericUnivariateSelect/_get_support_mask/self", + "name": "self", + "qname": "sklearn.feature_selection._univariate_selection.GenericUnivariateSelect._get_support_mask.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_support_mask(self):\n check_is_fitted(self)\n\n selector = self._make_selector()\n selector.pvalues_ = self.pvalues_\n selector.scores_ = self.scores_\n return selector._get_support_mask()" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/GenericUnivariateSelect/_make_selector", + "name": "_make_selector", + "qname": "sklearn.feature_selection._univariate_selection.GenericUnivariateSelect._make_selector", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/GenericUnivariateSelect/_make_selector/self", + "name": "self", + "qname": "sklearn.feature_selection._univariate_selection.GenericUnivariateSelect._make_selector.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _make_selector(self):\n selector = self._selection_modes[self.mode](score_func=self.score_func)\n\n # Now perform some acrobatics to set the right named parameter in\n # the selector\n possible_params = selector._get_param_names()\n possible_params.remove('score_func')\n selector.set_params(**{possible_params[0]: self.param})\n\n return selector" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFdr/__init__", + "name": "__init__", + "qname": "sklearn.feature_selection._univariate_selection.SelectFdr.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFdr/__init__/self", + "name": "self", + "qname": "sklearn.feature_selection._univariate_selection.SelectFdr.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFdr/__init__/score_func", + "name": "score_func", + "qname": "sklearn.feature_selection._univariate_selection.SelectFdr.__init__.score_func", + "default_value": "f_classif", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "f_classif", + "description": "Function taking two arrays X and y, and returning a pair of arrays\n(scores, pvalues).\nDefault is f_classif (see below \"See Also\"). The default function only\nworks with classification tasks." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFdr/__init__/alpha", + "name": "alpha", + "qname": "sklearn.feature_selection._univariate_selection.SelectFdr.__init__.alpha", + "default_value": "0.05", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "5e-2", + "description": "The highest uncorrected p-value for features to keep." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Filter: Select the p-values for an estimated false discovery rate\n\nThis uses the Benjamini-Hochberg procedure. ``alpha`` is an upper bound\non the expected false discovery rate.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, score_func=f_classif, *, alpha=5e-2):\n super().__init__(score_func=score_func)\n self.alpha = alpha" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFdr/_get_support_mask", + "name": "_get_support_mask", + "qname": "sklearn.feature_selection._univariate_selection.SelectFdr._get_support_mask", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFdr/_get_support_mask/self", + "name": "self", + "qname": "sklearn.feature_selection._univariate_selection.SelectFdr._get_support_mask.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_support_mask(self):\n check_is_fitted(self)\n\n n_features = len(self.pvalues_)\n sv = np.sort(self.pvalues_)\n selected = sv[sv <= float(self.alpha) / n_features *\n np.arange(1, n_features + 1)]\n if selected.size == 0:\n return np.zeros_like(self.pvalues_, dtype=bool)\n return self.pvalues_ <= selected.max()" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFpr/__init__", + "name": "__init__", + "qname": "sklearn.feature_selection._univariate_selection.SelectFpr.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFpr/__init__/self", + "name": "self", + "qname": "sklearn.feature_selection._univariate_selection.SelectFpr.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFpr/__init__/score_func", + "name": "score_func", + "qname": "sklearn.feature_selection._univariate_selection.SelectFpr.__init__.score_func", + "default_value": "f_classif", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "f_classif", + "description": "Function taking two arrays X and y, and returning a pair of arrays\n(scores, pvalues).\nDefault is f_classif (see below \"See Also\"). The default function only\nworks with classification tasks." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFpr/__init__/alpha", + "name": "alpha", + "qname": "sklearn.feature_selection._univariate_selection.SelectFpr.__init__.alpha", + "default_value": "0.05", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "5e-2", + "description": "The highest p-value for features to be kept." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Filter: Select the pvalues below alpha based on a FPR test.\n\nFPR test stands for False Positive Rate test. It controls the total\namount of false detections.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, score_func=f_classif, *, alpha=5e-2):\n super().__init__(score_func=score_func)\n self.alpha = alpha" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFpr/_get_support_mask", + "name": "_get_support_mask", + "qname": "sklearn.feature_selection._univariate_selection.SelectFpr._get_support_mask", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFpr/_get_support_mask/self", + "name": "self", + "qname": "sklearn.feature_selection._univariate_selection.SelectFpr._get_support_mask.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_support_mask(self):\n check_is_fitted(self)\n\n return self.pvalues_ < self.alpha" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFwe/__init__", + "name": "__init__", + "qname": "sklearn.feature_selection._univariate_selection.SelectFwe.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFwe/__init__/self", + "name": "self", + "qname": "sklearn.feature_selection._univariate_selection.SelectFwe.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFwe/__init__/score_func", + "name": "score_func", + "qname": "sklearn.feature_selection._univariate_selection.SelectFwe.__init__.score_func", + "default_value": "f_classif", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "f_classif", + "description": "Function taking two arrays X and y, and returning a pair of arrays\n(scores, pvalues).\nDefault is f_classif (see below \"See Also\"). The default function only\nworks with classification tasks." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFwe/__init__/alpha", + "name": "alpha", + "qname": "sklearn.feature_selection._univariate_selection.SelectFwe.__init__.alpha", + "default_value": "0.05", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "5e-2", + "description": "The highest uncorrected p-value for features to keep." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Filter: Select the p-values corresponding to Family-wise error rate\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, score_func=f_classif, *, alpha=5e-2):\n super().__init__(score_func=score_func)\n self.alpha = alpha" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFwe/_get_support_mask", + "name": "_get_support_mask", + "qname": "sklearn.feature_selection._univariate_selection.SelectFwe._get_support_mask", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectFwe/_get_support_mask/self", + "name": "self", + "qname": "sklearn.feature_selection._univariate_selection.SelectFwe._get_support_mask.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_support_mask(self):\n check_is_fitted(self)\n\n return (self.pvalues_ < self.alpha / len(self.pvalues_))" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectKBest/__init__", + "name": "__init__", + "qname": "sklearn.feature_selection._univariate_selection.SelectKBest.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectKBest/__init__/self", + "name": "self", + "qname": "sklearn.feature_selection._univariate_selection.SelectKBest.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectKBest/__init__/score_func", + "name": "score_func", + "qname": "sklearn.feature_selection._univariate_selection.SelectKBest.__init__.score_func", + "default_value": "f_classif", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "f_classif", + "description": "Function taking two arrays X and y, and returning a pair of arrays\n(scores, pvalues) or a single array with scores.\nDefault is f_classif (see below \"See Also\"). The default function only\nworks with classification tasks.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectKBest/__init__/k", + "name": "k", + "qname": "sklearn.feature_selection._univariate_selection.SelectKBest.__init__.k", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or \"all\"", + "default_value": "10", + "description": "Number of top features to select.\nThe \"all\" option bypasses selection, for use in a parameter search." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "\"all\"" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Select features according to the k highest scores.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, score_func=f_classif, *, k=10):\n super().__init__(score_func=score_func)\n self.k = k" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectKBest/_check_params", + "name": "_check_params", + "qname": "sklearn.feature_selection._univariate_selection.SelectKBest._check_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectKBest/_check_params/self", + "name": "self", + "qname": "sklearn.feature_selection._univariate_selection.SelectKBest._check_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectKBest/_check_params/X", + "name": "X", + "qname": "sklearn.feature_selection._univariate_selection.SelectKBest._check_params.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectKBest/_check_params/y", + "name": "y", + "qname": "sklearn.feature_selection._univariate_selection.SelectKBest._check_params.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_params(self, X, y):\n if not (self.k == \"all\" or 0 <= self.k <= X.shape[1]):\n raise ValueError(\"k should be >=0, <= n_features = %d; got %r. \"\n \"Use k='all' to return all features.\"\n % (X.shape[1], self.k))" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectKBest/_get_support_mask", + "name": "_get_support_mask", + "qname": "sklearn.feature_selection._univariate_selection.SelectKBest._get_support_mask", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectKBest/_get_support_mask/self", + "name": "self", + "qname": "sklearn.feature_selection._univariate_selection.SelectKBest._get_support_mask.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_support_mask(self):\n check_is_fitted(self)\n\n if self.k == 'all':\n return np.ones(self.scores_.shape, dtype=bool)\n elif self.k == 0:\n return np.zeros(self.scores_.shape, dtype=bool)\n else:\n scores = _clean_nans(self.scores_)\n mask = np.zeros(scores.shape, dtype=bool)\n\n # Request a stable sort. Mergesort takes more memory (~40MB per\n # megafeature on x86-64).\n mask[np.argsort(scores, kind=\"mergesort\")[-self.k:]] = 1\n return mask" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectPercentile/__init__", + "name": "__init__", + "qname": "sklearn.feature_selection._univariate_selection.SelectPercentile.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectPercentile/__init__/self", + "name": "self", + "qname": "sklearn.feature_selection._univariate_selection.SelectPercentile.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectPercentile/__init__/score_func", + "name": "score_func", + "qname": "sklearn.feature_selection._univariate_selection.SelectPercentile.__init__.score_func", + "default_value": "f_classif", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "f_classif", + "description": "Function taking two arrays X and y, and returning a pair of arrays\n(scores, pvalues) or a single array with scores.\nDefault is f_classif (see below \"See Also\"). The default function only\nworks with classification tasks.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectPercentile/__init__/percentile", + "name": "percentile", + "qname": "sklearn.feature_selection._univariate_selection.SelectPercentile.__init__.percentile", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Percent of features to keep." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Select features according to a percentile of the highest scores.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, score_func=f_classif, *, percentile=10):\n super().__init__(score_func=score_func)\n self.percentile = percentile" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectPercentile/_check_params", + "name": "_check_params", + "qname": "sklearn.feature_selection._univariate_selection.SelectPercentile._check_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectPercentile/_check_params/self", + "name": "self", + "qname": "sklearn.feature_selection._univariate_selection.SelectPercentile._check_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectPercentile/_check_params/X", + "name": "X", + "qname": "sklearn.feature_selection._univariate_selection.SelectPercentile._check_params.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectPercentile/_check_params/y", + "name": "y", + "qname": "sklearn.feature_selection._univariate_selection.SelectPercentile._check_params.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_params(self, X, y):\n if not 0 <= self.percentile <= 100:\n raise ValueError(\"percentile should be >=0, <=100; got %r\"\n % self.percentile)" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectPercentile/_get_support_mask", + "name": "_get_support_mask", + "qname": "sklearn.feature_selection._univariate_selection.SelectPercentile._get_support_mask", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/SelectPercentile/_get_support_mask/self", + "name": "self", + "qname": "sklearn.feature_selection._univariate_selection.SelectPercentile._get_support_mask.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_support_mask(self):\n check_is_fitted(self)\n\n # Cater for NaNs\n if self.percentile == 100:\n return np.ones(len(self.scores_), dtype=bool)\n elif self.percentile == 0:\n return np.zeros(len(self.scores_), dtype=bool)\n\n scores = _clean_nans(self.scores_)\n threshold = np.percentile(scores, 100 - self.percentile)\n mask = scores > threshold\n ties = np.where(scores == threshold)[0]\n if len(ties):\n max_feats = int(len(scores) * self.percentile / 100)\n kept_ties = ties[:max_feats - mask.sum()]\n mask[kept_ties] = True\n return mask" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/_BaseFilter/__init__", + "name": "__init__", + "qname": "sklearn.feature_selection._univariate_selection._BaseFilter.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/_BaseFilter/__init__/self", + "name": "self", + "qname": "sklearn.feature_selection._univariate_selection._BaseFilter.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/_BaseFilter/__init__/score_func", + "name": "score_func", + "qname": "sklearn.feature_selection._univariate_selection._BaseFilter.__init__.score_func", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "", + "description": "Function taking two arrays X and y, and returning a pair of arrays\n(scores, pvalues) or a single array with scores." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Initialize the univariate feature selection.", + "docstring": "", + "code": " def __init__(self, score_func):\n self.score_func = score_func" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/_BaseFilter/_check_params", + "name": "_check_params", + "qname": "sklearn.feature_selection._univariate_selection._BaseFilter._check_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/_BaseFilter/_check_params/self", + "name": "self", + "qname": "sklearn.feature_selection._univariate_selection._BaseFilter._check_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/_BaseFilter/_check_params/X", + "name": "X", + "qname": "sklearn.feature_selection._univariate_selection._BaseFilter._check_params.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/_BaseFilter/_check_params/y", + "name": "y", + "qname": "sklearn.feature_selection._univariate_selection._BaseFilter._check_params.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_params(self, X, y):\n pass" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/_BaseFilter/_more_tags", + "name": "_more_tags", + "qname": "sklearn.feature_selection._univariate_selection._BaseFilter._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/_BaseFilter/_more_tags/self", + "name": "self", + "qname": "sklearn.feature_selection._univariate_selection._BaseFilter._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'requires_y': True}" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/_BaseFilter/fit", + "name": "fit", + "qname": "sklearn.feature_selection._univariate_selection._BaseFilter.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/_BaseFilter/fit/self", + "name": "self", + "qname": "sklearn.feature_selection._univariate_selection._BaseFilter.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/_BaseFilter/fit/X", + "name": "X", + "qname": "sklearn.feature_selection._univariate_selection._BaseFilter.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/_BaseFilter/fit/y", + "name": "y", + "qname": "sklearn.feature_selection._univariate_selection._BaseFilter.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The target values (class labels in classification, real numbers in\nregression)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Run score function on (X, y) and get the appropriate features.", + "docstring": "Run score function on (X, y) and get the appropriate features.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The training input samples.\n\ny : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y):\n \"\"\"Run score function on (X, y) and get the appropriate features.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The training input samples.\n\n y : array-like of shape (n_samples,)\n The target values (class labels in classification, real numbers in\n regression).\n\n Returns\n -------\n self : object\n \"\"\"\n X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc'],\n multi_output=True)\n\n if not callable(self.score_func):\n raise TypeError(\"The score function should be a callable, %s (%s) \"\n \"was passed.\"\n % (self.score_func, type(self.score_func)))\n\n self._check_params(X, y)\n score_func_ret = self.score_func(X, y)\n if isinstance(score_func_ret, (list, tuple)):\n self.scores_, self.pvalues_ = score_func_ret\n self.pvalues_ = np.asarray(self.pvalues_)\n else:\n self.scores_ = score_func_ret\n self.pvalues_ = None\n\n self.scores_ = np.asarray(self.scores_)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/_chisquare", + "name": "_chisquare", + "qname": "sklearn.feature_selection._univariate_selection._chisquare", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/_chisquare/f_obs", + "name": "f_obs", + "qname": "sklearn.feature_selection._univariate_selection._chisquare.f_obs", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/_chisquare/f_exp", + "name": "f_exp", + "qname": "sklearn.feature_selection._univariate_selection._chisquare.f_exp", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fast replacement for scipy.stats.chisquare.\n\nVersion from https://github.com/scipy/scipy/pull/2525 with additional\noptimizations.", + "docstring": "Fast replacement for scipy.stats.chisquare.\n\nVersion from https://github.com/scipy/scipy/pull/2525 with additional\noptimizations.", + "code": "def _chisquare(f_obs, f_exp):\n \"\"\"Fast replacement for scipy.stats.chisquare.\n\n Version from https://github.com/scipy/scipy/pull/2525 with additional\n optimizations.\n \"\"\"\n f_obs = np.asarray(f_obs, dtype=np.float64)\n\n k = len(f_obs)\n # Reuse f_obs for chi-squared statistics\n chisq = f_obs\n chisq -= f_exp\n chisq **= 2\n with np.errstate(invalid=\"ignore\"):\n chisq /= f_exp\n chisq = chisq.sum(axis=0)\n return chisq, special.chdtrc(k - 1, chisq)" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/_clean_nans", + "name": "_clean_nans", + "qname": "sklearn.feature_selection._univariate_selection._clean_nans", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/_clean_nans/scores", + "name": "scores", + "qname": "sklearn.feature_selection._univariate_selection._clean_nans.scores", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fixes Issue #1240: NaNs can't be properly compared, so change them to the\nsmallest value of scores's dtype. -inf seems to be unreliable.", + "docstring": "Fixes Issue #1240: NaNs can't be properly compared, so change them to the\nsmallest value of scores's dtype. -inf seems to be unreliable.", + "code": "def _clean_nans(scores):\n \"\"\"\n Fixes Issue #1240: NaNs can't be properly compared, so change them to the\n smallest value of scores's dtype. -inf seems to be unreliable.\n \"\"\"\n # XXX where should this function be called? fit? scoring functions\n # themselves?\n scores = as_float_array(scores, copy=True)\n scores[np.isnan(scores)] = np.finfo(scores.dtype).min\n return scores" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/chi2", + "name": "chi2", + "qname": "sklearn.feature_selection._univariate_selection.chi2", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/chi2/X", + "name": "X", + "qname": "sklearn.feature_selection._univariate_selection.chi2.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Sample vectors." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/chi2/y", + "name": "y", + "qname": "sklearn.feature_selection._univariate_selection.chi2.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target vector (class labels)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute chi-squared stats between each non-negative feature and class.\n\nThis score can be used to select the n_features features with the\nhighest values for the test chi-squared statistic from X, which must\ncontain only non-negative features such as booleans or frequencies\n(e.g., term counts in document classification), relative to the classes.\n\nRecall that the chi-square test measures dependence between stochastic\nvariables, so using this function \"weeds out\" the features that are the\nmost likely to be independent of class and therefore irrelevant for\nclassification.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute chi-squared stats between each non-negative feature and class.\n\nThis score can be used to select the n_features features with the\nhighest values for the test chi-squared statistic from X, which must\ncontain only non-negative features such as booleans or frequencies\n(e.g., term counts in document classification), relative to the classes.\n\nRecall that the chi-square test measures dependence between stochastic\nvariables, so using this function \"weeds out\" the features that are the\nmost likely to be independent of class and therefore irrelevant for\nclassification.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Sample vectors.\n\ny : array-like of shape (n_samples,)\n Target vector (class labels).\n\nReturns\n-------\nchi2 : ndarray of shape (n_features,)\n Chi2 statistics for each feature.\n\np_values : ndarray of shape (n_features,)\n P-values for each feature.\n\nNotes\n-----\nComplexity of this algorithm is O(n_classes * n_features).\n\nSee Also\n--------\nf_classif : ANOVA F-value between label/feature for classification tasks.\nf_regression : F-value between label/feature for regression tasks.", + "code": "def chi2(X, y):\n \"\"\"Compute chi-squared stats between each non-negative feature and class.\n\n This score can be used to select the n_features features with the\n highest values for the test chi-squared statistic from X, which must\n contain only non-negative features such as booleans or frequencies\n (e.g., term counts in document classification), relative to the classes.\n\n Recall that the chi-square test measures dependence between stochastic\n variables, so using this function \"weeds out\" the features that are the\n most likely to be independent of class and therefore irrelevant for\n classification.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Sample vectors.\n\n y : array-like of shape (n_samples,)\n Target vector (class labels).\n\n Returns\n -------\n chi2 : ndarray of shape (n_features,)\n Chi2 statistics for each feature.\n\n p_values : ndarray of shape (n_features,)\n P-values for each feature.\n\n Notes\n -----\n Complexity of this algorithm is O(n_classes * n_features).\n\n See Also\n --------\n f_classif : ANOVA F-value between label/feature for classification tasks.\n f_regression : F-value between label/feature for regression tasks.\n \"\"\"\n\n # XXX: we might want to do some of the following in logspace instead for\n # numerical stability.\n X = check_array(X, accept_sparse='csr')\n if np.any((X.data if issparse(X) else X) < 0):\n raise ValueError(\"Input X must be non-negative.\")\n\n Y = LabelBinarizer().fit_transform(y)\n if Y.shape[1] == 1:\n Y = np.append(1 - Y, Y, axis=1)\n\n observed = safe_sparse_dot(Y.T, X) # n_classes * n_features\n\n feature_count = X.sum(axis=0).reshape(1, -1)\n class_prob = Y.mean(axis=0).reshape(1, -1)\n expected = np.dot(class_prob.T, feature_count)\n\n return _chisquare(observed, expected)" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/f_classif", + "name": "f_classif", + "qname": "sklearn.feature_selection._univariate_selection.f_classif", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/f_classif/X", + "name": "X", + "qname": "sklearn.feature_selection._univariate_selection.f_classif.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The set of regressors that will be tested sequentially." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/f_classif/y", + "name": "y", + "qname": "sklearn.feature_selection._univariate_selection.f_classif.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "The target vector." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the ANOVA F-value for the provided sample.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute the ANOVA F-value for the provided sample.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The set of regressors that will be tested sequentially.\n\ny : ndarray of shape (n_samples,)\n The target vector.\n\nReturns\n-------\nf_statistic : ndarray of shape (n_features,)\n F-statistic for each feature.\n\np_values : ndarray of shape (n_features,)\n P-values associated with the F-statistic.\n\nSee Also\n--------\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nf_regression : F-value between label/feature for regression tasks.", + "code": "def f_classif(X, y):\n \"\"\"Compute the ANOVA F-value for the provided sample.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The set of regressors that will be tested sequentially.\n\n y : ndarray of shape (n_samples,)\n The target vector.\n\n Returns\n -------\n f_statistic : ndarray of shape (n_features,)\n F-statistic for each feature.\n\n p_values : ndarray of shape (n_features,)\n P-values associated with the F-statistic.\n\n See Also\n --------\n chi2 : Chi-squared stats of non-negative features for classification tasks.\n f_regression : F-value between label/feature for regression tasks.\n \"\"\"\n X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'])\n args = [X[safe_mask(X, y == k)] for k in np.unique(y)]\n return f_oneway(*args)" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/f_oneway", + "name": "f_oneway", + "qname": "sklearn.feature_selection._univariate_selection.f_oneway", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/f_oneway/args", + "name": "args", + "qname": "sklearn.feature_selection._univariate_selection.f_oneway.args", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}", + "default_value": "", + "description": "sample1, sample2... The sample measurements should be given as\narguments." + }, + "type": { + "kind": "EnumType", + "values": [] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Performs a 1-way ANOVA.\n\nThe one-way ANOVA tests the null hypothesis that 2 or more groups have\nthe same population mean. The test is applied to samples from two or\nmore groups, possibly with differing sizes.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Performs a 1-way ANOVA.\n\nThe one-way ANOVA tests the null hypothesis that 2 or more groups have\nthe same population mean. The test is applied to samples from two or\nmore groups, possibly with differing sizes.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n*args : {array-like, sparse matrix}\n sample1, sample2... The sample measurements should be given as\n arguments.\n\nReturns\n-------\nf_statistic : float\n The computed F-value of the test.\np_value : float\n The associated p-value from the F-distribution.\n\nNotes\n-----\nThe ANOVA test has important assumptions that must be satisfied in order\nfor the associated p-value to be valid.\n\n1. The samples are independent\n2. Each sample is from a normally distributed population\n3. The population standard deviations of the groups are all equal. This\n property is known as homoscedasticity.\n\nIf these assumptions are not true for a given set of data, it may still be\npossible to use the Kruskal-Wallis H-test (`scipy.stats.kruskal`_) although\nwith some loss of power.\n\nThe algorithm is from Heiman[2], pp.394-7.\n\nSee ``scipy.stats.f_oneway`` that should give the same results while\nbeing less efficient.\n\nReferences\n----------\n\n.. [1] Lowry, Richard. \"Concepts and Applications of Inferential\n Statistics\". Chapter 14.\n http://faculty.vassar.edu/lowry/ch14pt1.html\n\n.. [2] Heiman, G.W. Research Methods in Statistics. 2002.", + "code": "def f_oneway(*args):\n \"\"\"Performs a 1-way ANOVA.\n\n The one-way ANOVA tests the null hypothesis that 2 or more groups have\n the same population mean. The test is applied to samples from two or\n more groups, possibly with differing sizes.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n *args : {array-like, sparse matrix}\n sample1, sample2... The sample measurements should be given as\n arguments.\n\n Returns\n -------\n f_statistic : float\n The computed F-value of the test.\n p_value : float\n The associated p-value from the F-distribution.\n\n Notes\n -----\n The ANOVA test has important assumptions that must be satisfied in order\n for the associated p-value to be valid.\n\n 1. The samples are independent\n 2. Each sample is from a normally distributed population\n 3. The population standard deviations of the groups are all equal. This\n property is known as homoscedasticity.\n\n If these assumptions are not true for a given set of data, it may still be\n possible to use the Kruskal-Wallis H-test (`scipy.stats.kruskal`_) although\n with some loss of power.\n\n The algorithm is from Heiman[2], pp.394-7.\n\n See ``scipy.stats.f_oneway`` that should give the same results while\n being less efficient.\n\n References\n ----------\n\n .. [1] Lowry, Richard. \"Concepts and Applications of Inferential\n Statistics\". Chapter 14.\n http://faculty.vassar.edu/lowry/ch14pt1.html\n\n .. [2] Heiman, G.W. Research Methods in Statistics. 2002.\n\n \"\"\"\n n_classes = len(args)\n args = [as_float_array(a) for a in args]\n n_samples_per_class = np.array([a.shape[0] for a in args])\n n_samples = np.sum(n_samples_per_class)\n ss_alldata = sum(safe_sqr(a).sum(axis=0) for a in args)\n sums_args = [np.asarray(a.sum(axis=0)) for a in args]\n square_of_sums_alldata = sum(sums_args) ** 2\n square_of_sums_args = [s ** 2 for s in sums_args]\n sstot = ss_alldata - square_of_sums_alldata / float(n_samples)\n ssbn = 0.\n for k, _ in enumerate(args):\n ssbn += square_of_sums_args[k] / n_samples_per_class[k]\n ssbn -= square_of_sums_alldata / float(n_samples)\n sswn = sstot - ssbn\n dfbn = n_classes - 1\n dfwn = n_samples - n_classes\n msb = ssbn / float(dfbn)\n msw = sswn / float(dfwn)\n constant_features_idx = np.where(msw == 0.)[0]\n if (np.nonzero(msb)[0].size != msb.size and constant_features_idx.size):\n warnings.warn(\"Features %s are constant.\" % constant_features_idx,\n UserWarning)\n f = msb / msw\n # flatten matrix to vector in sparse case\n f = np.asarray(f).ravel()\n prob = special.fdtrc(dfbn, dfwn, f)\n return f, prob" + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/f_regression", + "name": "f_regression", + "qname": "sklearn.feature_selection._univariate_selection.f_regression", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/f_regression/X", + "name": "X", + "qname": "sklearn.feature_selection._univariate_selection.f_regression.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} shape = (n_samples, n_features)", + "default_value": "", + "description": "The set of regressors that will be tested sequentially." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape = (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/f_regression/y", + "name": "y", + "qname": "sklearn.feature_selection._univariate_selection.f_regression.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape(n_samples).", + "default_value": "", + "description": "The data matrix" + }, + "type": { + "kind": "NamedType", + "name": "array of shape(n_samples)." + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._univariate_selection/f_regression/center", + "name": "center", + "qname": "sklearn.feature_selection._univariate_selection.f_regression.center", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If true, X and y will be centered." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Univariate linear regression tests.\n\nLinear model for testing the individual effect of each of many regressors.\nThis is a scoring function to be used in a feature selection procedure, not\na free standing feature selection procedure.\n\nThis is done in 2 steps:\n\n1. The correlation between each regressor and the target is computed,\n that is, ((X[:, i] - mean(X[:, i])) * (y - mean_y)) / (std(X[:, i]) *\n std(y)).\n2. It is converted to an F score then to a p-value.\n\nFor more on usage see the :ref:`User Guide `.", + "docstring": "Univariate linear regression tests.\n\nLinear model for testing the individual effect of each of many regressors.\nThis is a scoring function to be used in a feature selection procedure, not\na free standing feature selection procedure.\n\nThis is done in 2 steps:\n\n1. The correlation between each regressor and the target is computed,\n that is, ((X[:, i] - mean(X[:, i])) * (y - mean_y)) / (std(X[:, i]) *\n std(y)).\n2. It is converted to an F score then to a p-value.\n\nFor more on usage see the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} shape = (n_samples, n_features)\n The set of regressors that will be tested sequentially.\n\ny : array of shape(n_samples).\n The data matrix\n\ncenter : bool, default=True\n If true, X and y will be centered.\n\nReturns\n-------\nF : array, shape=(n_features,)\n F values of features.\n\npval : array, shape=(n_features,)\n p-values of F-scores.\n\nSee Also\n--------\nmutual_info_regression : Mutual information for a continuous target.\nf_classif : ANOVA F-value between label/feature for classification tasks.\nchi2 : Chi-squared stats of non-negative features for classification tasks.\nSelectKBest : Select features based on the k highest scores.\nSelectFpr : Select features based on a false positive rate test.\nSelectFdr : Select features based on an estimated false discovery rate.\nSelectFwe : Select features based on family-wise error rate.\nSelectPercentile : Select features based on percentile of the highest\n scores.", + "code": "@_deprecate_positional_args\ndef f_regression(X, y, *, center=True):\n \"\"\"Univariate linear regression tests.\n\n Linear model for testing the individual effect of each of many regressors.\n This is a scoring function to be used in a feature selection procedure, not\n a free standing feature selection procedure.\n\n This is done in 2 steps:\n\n 1. The correlation between each regressor and the target is computed,\n that is, ((X[:, i] - mean(X[:, i])) * (y - mean_y)) / (std(X[:, i]) *\n std(y)).\n 2. It is converted to an F score then to a p-value.\n\n For more on usage see the :ref:`User Guide `.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} shape = (n_samples, n_features)\n The set of regressors that will be tested sequentially.\n\n y : array of shape(n_samples).\n The data matrix\n\n center : bool, default=True\n If true, X and y will be centered.\n\n Returns\n -------\n F : array, shape=(n_features,)\n F values of features.\n\n pval : array, shape=(n_features,)\n p-values of F-scores.\n\n See Also\n --------\n mutual_info_regression : Mutual information for a continuous target.\n f_classif : ANOVA F-value between label/feature for classification tasks.\n chi2 : Chi-squared stats of non-negative features for classification tasks.\n SelectKBest : Select features based on the k highest scores.\n SelectFpr : Select features based on a false positive rate test.\n SelectFdr : Select features based on an estimated false discovery rate.\n SelectFwe : Select features based on family-wise error rate.\n SelectPercentile : Select features based on percentile of the highest\n scores.\n \"\"\"\n X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],\n dtype=np.float64)\n n_samples = X.shape[0]\n\n # compute centered values\n # note that E[(x - mean(x))*(y - mean(y))] = E[x*(y - mean(y))], so we\n # need not center X\n if center:\n y = y - np.mean(y)\n if issparse(X):\n X_means = X.mean(axis=0).getA1()\n else:\n X_means = X.mean(axis=0)\n # compute the scaled standard deviations via moments\n X_norms = np.sqrt(row_norms(X.T, squared=True) -\n n_samples * X_means ** 2)\n else:\n X_norms = row_norms(X.T)\n\n # compute the correlation\n corr = safe_sparse_dot(y, X)\n corr /= X_norms\n corr /= np.linalg.norm(y)\n\n # convert to p-value\n degrees_of_freedom = y.size - (2 if center else 1)\n F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom\n pv = stats.f.sf(F, 1, degrees_of_freedom)\n return F, pv" + }, + { + "id": "scikit-learn/sklearn.feature_selection._variance_threshold/VarianceThreshold/__init__", + "name": "__init__", + "qname": "sklearn.feature_selection._variance_threshold.VarianceThreshold.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._variance_threshold/VarianceThreshold/__init__/self", + "name": "self", + "qname": "sklearn.feature_selection._variance_threshold.VarianceThreshold.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._variance_threshold/VarianceThreshold/__init__/threshold", + "name": "threshold", + "qname": "sklearn.feature_selection._variance_threshold.VarianceThreshold.__init__.threshold", + "default_value": "0.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0", + "description": "Features with a training-set variance lower than this threshold will\nbe removed. The default is to keep all features with non-zero variance,\ni.e. remove the features that have the same value in all samples." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Feature selector that removes all low-variance features.\n\nThis feature selection algorithm looks only at the features (X), not the\ndesired outputs (y), and can thus be used for unsupervised learning.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " def __init__(self, threshold=0.):\n self.threshold = threshold" + }, + { + "id": "scikit-learn/sklearn.feature_selection._variance_threshold/VarianceThreshold/_get_support_mask", + "name": "_get_support_mask", + "qname": "sklearn.feature_selection._variance_threshold.VarianceThreshold._get_support_mask", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._variance_threshold/VarianceThreshold/_get_support_mask/self", + "name": "self", + "qname": "sklearn.feature_selection._variance_threshold.VarianceThreshold._get_support_mask.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_support_mask(self):\n check_is_fitted(self)\n\n return self.variances_ > self.threshold" + }, + { + "id": "scikit-learn/sklearn.feature_selection._variance_threshold/VarianceThreshold/_more_tags", + "name": "_more_tags", + "qname": "sklearn.feature_selection._variance_threshold.VarianceThreshold._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._variance_threshold/VarianceThreshold/_more_tags/self", + "name": "self", + "qname": "sklearn.feature_selection._variance_threshold.VarianceThreshold._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'allow_nan': True}" + }, + { + "id": "scikit-learn/sklearn.feature_selection._variance_threshold/VarianceThreshold/fit", + "name": "fit", + "qname": "sklearn.feature_selection._variance_threshold.VarianceThreshold.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.feature_selection._variance_threshold/VarianceThreshold/fit/self", + "name": "self", + "qname": "sklearn.feature_selection._variance_threshold.VarianceThreshold.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.feature_selection._variance_threshold/VarianceThreshold/fit/X", + "name": "X", + "qname": "sklearn.feature_selection._variance_threshold.VarianceThreshold.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "Sample vectors from which to compute variances." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.feature_selection._variance_threshold/VarianceThreshold/fit/y", + "name": "y", + "qname": "sklearn.feature_selection._variance_threshold.VarianceThreshold.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "any", + "default_value": "None", + "description": "Ignored. This parameter exists only for compatibility with\nsklearn.pipeline.Pipeline." + }, + "type": { + "kind": "NamedType", + "name": "any" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Learn empirical variances from X.", + "docstring": "Learn empirical variances from X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Sample vectors from which to compute variances.\n\ny : any, default=None\n Ignored. This parameter exists only for compatibility with\n sklearn.pipeline.Pipeline.\n\nReturns\n-------\nself", + "code": " def fit(self, X, y=None):\n \"\"\"Learn empirical variances from X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n Sample vectors from which to compute variances.\n\n y : any, default=None\n Ignored. This parameter exists only for compatibility with\n sklearn.pipeline.Pipeline.\n\n Returns\n -------\n self\n \"\"\"\n X = self._validate_data(X, accept_sparse=('csr', 'csc'),\n dtype=np.float64,\n force_all_finite='allow-nan')\n\n if hasattr(X, \"toarray\"): # sparse matrix\n _, self.variances_ = mean_variance_axis(X, axis=0)\n if self.threshold == 0:\n mins, maxes = min_max_axis(X, axis=0)\n peak_to_peaks = maxes - mins\n else:\n self.variances_ = np.nanvar(X, axis=0)\n if self.threshold == 0:\n peak_to_peaks = np.ptp(X, axis=0)\n\n if self.threshold == 0:\n # Use peak-to-peak to avoid numeric precision issues\n # for constant features\n compare_arr = np.array([self.variances_, peak_to_peaks])\n self.variances_ = np.nanmin(compare_arr, axis=0)\n\n if np.all(~np.isfinite(self.variances_) |\n (self.variances_ <= self.threshold)):\n msg = \"No feature in X meets the variance threshold {0:.5f}\"\n if X.shape[0] == 1:\n msg += \" (X contains only one sample)\"\n raise ValueError(msg.format(self.threshold))\n\n return self" + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/__init__", + "name": "__init__", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/__init__/self", + "name": "self", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/__init__/kernel", + "name": "kernel", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.__init__.kernel", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "kernel instance", + "default_value": "None", + "description": "The kernel specifying the covariance function of the GP. If None is\npassed, the kernel \"1.0 * RBF(1.0)\" is used as default. Note that\nthe kernel's hyperparameters are optimized during fitting." + }, + "type": { + "kind": "NamedType", + "name": "kernel instance" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/__init__/optimizer", + "name": "optimizer", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.__init__.optimizer", + "default_value": "'fmin_l_bfgs_b'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'fmin_l_bfgs_b' or callable", + "default_value": "'fmin_l_bfgs_b'", + "description": "Can either be one of the internally supported optimizers for optimizing\nthe kernel's parameters, specified by a string, or an externally\ndefined optimizer passed as a callable. If a callable is passed, it\nmust have the signature::\n\n def optimizer(obj_func, initial_theta, bounds):\n # * 'obj_func' is the objective function to be maximized, which\n # takes the hyperparameters theta as parameter and an\n # optional flag eval_gradient, which determines if the\n # gradient is returned additionally to the function value\n # * 'initial_theta': the initial value for theta, which can be\n # used by local optimizers\n # * 'bounds': the bounds on the values of theta\n ....\n # Returned are the best found hyperparameters theta and\n # the corresponding value of the target function.\n return theta_opt, func_min\n\nPer default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize\nis used. If None is passed, the kernel's parameters are kept fixed.\nAvailable internal optimizers are::\n\n 'fmin_l_bfgs_b'" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'fmin_l_bfgs_b'" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/__init__/n_restarts_optimizer", + "name": "n_restarts_optimizer", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.__init__.n_restarts_optimizer", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "The number of restarts of the optimizer for finding the kernel's\nparameters which maximize the log-marginal likelihood. The first run\nof the optimizer is performed from the kernel's initial parameters,\nthe remaining ones (if any) from thetas sampled log-uniform randomly\nfrom the space of allowed theta-values. If greater than 0, all bounds\nmust be finite. Note that n_restarts_optimizer=0 implies that one\nrun is performed." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/__init__/max_iter_predict", + "name": "max_iter_predict", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.__init__.max_iter_predict", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The maximum number of iterations in Newton's method for approximating\nthe posterior during predict. Smaller values will reduce computation\ntime at the cost of worse results." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If warm-starts are enabled, the solution of the last Newton iteration\non the Laplace approximation of the posterior mode is used as\ninitialization for the next call of _posterior_mode(). This can speed\nup convergence when _posterior_mode is called several times on similar\nproblems as in hyperparameter optimization. See :term:`the Glossary\n`." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/__init__/copy_X_train", + "name": "copy_X_train", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.__init__.copy_X_train", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, a persistent copy of the training data is stored in the\nobject. Otherwise, just a reference to the training data is stored,\nwhich might cause predictions to change if the data is modified\nexternally." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/__init__/random_state", + "name": "random_state", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation used to initialize the centers.\nPass an int for reproducible results across multiple function calls.\nSee :term: `Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/__init__/multi_class", + "name": "multi_class", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.__init__.multi_class", + "default_value": "'one_vs_rest'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'one_vs_rest', 'one_vs_one'}", + "default_value": "'one_vs_rest'", + "description": "Specifies how multi-class classification problems are handled.\nSupported are 'one_vs_rest' and 'one_vs_one'. In 'one_vs_rest',\none binary Gaussian process classifier is fitted for each class, which\nis trained to separate this class from the rest. In 'one_vs_one', one\nbinary Gaussian process classifier is fitted for each pair of classes,\nwhich is trained to separate these two classes. The predictions of\nthese binary predictors are combined into multi-class predictions.\nNote that 'one_vs_one' does not support predicting probability\nestimates." + }, + "type": { + "kind": "EnumType", + "values": ["one_vs_rest", "one_vs_one"] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to use for the computation: the specified\nmulticlass problems are computed in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Gaussian process classification (GPC) based on Laplace approximation.\n\nThe implementation is based on Algorithm 3.1, 3.2, and 5.1 of\nGaussian Processes for Machine Learning (GPML) by Rasmussen and\nWilliams.\n\nInternally, the Laplace approximation is used for approximating the\nnon-Gaussian posterior by a Gaussian.\n\nCurrently, the implementation is restricted to using the logistic link\nfunction. For multi-class classification, several binary one-versus rest\nclassifiers are fitted. Note that this class thus does not implement\na true multi-class Laplace approximation.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, kernel=None, *, optimizer=\"fmin_l_bfgs_b\",\n n_restarts_optimizer=0, max_iter_predict=100,\n warm_start=False, copy_X_train=True, random_state=None,\n multi_class=\"one_vs_rest\", n_jobs=None):\n self.kernel = kernel\n self.optimizer = optimizer\n self.n_restarts_optimizer = n_restarts_optimizer\n self.max_iter_predict = max_iter_predict\n self.warm_start = warm_start\n self.copy_X_train = copy_X_train\n self.random_state = random_state\n self.multi_class = multi_class\n self.n_jobs = n_jobs" + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/fit", + "name": "fit", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/fit/self", + "name": "self", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/fit/X", + "name": "X", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features) or list of object", + "default_value": "", + "description": "Feature vectors or other representations of training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/fit/y", + "name": "y", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values, must be binary" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit Gaussian process classification model", + "docstring": "Fit Gaussian process classification model\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Feature vectors or other representations of training data.\n\ny : array-like of shape (n_samples,)\n Target values, must be binary\n\nReturns\n-------\nself : returns an instance of self.", + "code": " def fit(self, X, y):\n \"\"\"Fit Gaussian process classification model\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or list of object\n Feature vectors or other representations of training data.\n\n y : array-like of shape (n_samples,)\n Target values, must be binary\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n if self.kernel is None or self.kernel.requires_vector_input:\n X, y = self._validate_data(X, y, multi_output=False,\n ensure_2d=True, dtype=\"numeric\")\n else:\n X, y = self._validate_data(X, y, multi_output=False,\n ensure_2d=False, dtype=None)\n\n self.base_estimator_ = _BinaryGaussianProcessClassifierLaplace(\n kernel=self.kernel,\n optimizer=self.optimizer,\n n_restarts_optimizer=self.n_restarts_optimizer,\n max_iter_predict=self.max_iter_predict,\n warm_start=self.warm_start,\n copy_X_train=self.copy_X_train,\n random_state=self.random_state)\n\n self.classes_ = np.unique(y)\n self.n_classes_ = self.classes_.size\n if self.n_classes_ == 1:\n raise ValueError(\"GaussianProcessClassifier requires 2 or more \"\n \"distinct classes; got %d class (only class %s \"\n \"is present)\"\n % (self.n_classes_, self.classes_[0]))\n if self.n_classes_ > 2:\n if self.multi_class == \"one_vs_rest\":\n self.base_estimator_ = \\\n OneVsRestClassifier(self.base_estimator_,\n n_jobs=self.n_jobs)\n elif self.multi_class == \"one_vs_one\":\n self.base_estimator_ = \\\n OneVsOneClassifier(self.base_estimator_,\n n_jobs=self.n_jobs)\n else:\n raise ValueError(\"Unknown multi-class mode %s\"\n % self.multi_class)\n\n self.base_estimator_.fit(X, y)\n\n if self.n_classes_ > 2:\n self.log_marginal_likelihood_value_ = np.mean(\n [estimator.log_marginal_likelihood()\n for estimator in self.base_estimator_.estimators_])\n else:\n self.log_marginal_likelihood_value_ = \\\n self.base_estimator_.log_marginal_likelihood()\n\n return self" + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/kernel_@getter", + "name": "kernel_", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.kernel_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/kernel_/self", + "name": "self", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.kernel_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def kernel_(self):\n if self.n_classes_ == 2:\n return self.base_estimator_.kernel_\n else:\n return CompoundKernel(\n [estimator.kernel_\n for estimator in self.base_estimator_.estimators_])" + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/log_marginal_likelihood", + "name": "log_marginal_likelihood", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.log_marginal_likelihood", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/log_marginal_likelihood/self", + "name": "self", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.log_marginal_likelihood.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/log_marginal_likelihood/theta", + "name": "theta", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.log_marginal_likelihood.theta", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_kernel_params,)", + "default_value": "None", + "description": "Kernel hyperparameters for which the log-marginal likelihood is\nevaluated. In the case of multi-class classification, theta may\nbe the hyperparameters of the compound kernel or of an individual\nkernel. In the latter case, all individual kernel get assigned the\nsame theta values. If None, the precomputed log_marginal_likelihood\nof ``self.kernel_.theta`` is returned." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_kernel_params,)" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/log_marginal_likelihood/eval_gradient", + "name": "eval_gradient", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.log_marginal_likelihood.eval_gradient", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the gradient of the log-marginal likelihood with respect\nto the kernel hyperparameters at position theta is returned\nadditionally. Note that gradient computation is not supported\nfor non-binary classification. If True, theta must not be None." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/log_marginal_likelihood/clone_kernel", + "name": "clone_kernel", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.log_marginal_likelihood.clone_kernel", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, the kernel attribute is copied. If False, the kernel\nattribute is modified, but may result in a performance improvement." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns log-marginal likelihood of theta for training data.\n\nIn the case of multi-class classification, the mean log-marginal\nlikelihood of the one-versus-rest classifiers are returned.", + "docstring": "Returns log-marginal likelihood of theta for training data.\n\nIn the case of multi-class classification, the mean log-marginal\nlikelihood of the one-versus-rest classifiers are returned.\n\nParameters\n----------\ntheta : array-like of shape (n_kernel_params,), default=None\n Kernel hyperparameters for which the log-marginal likelihood is\n evaluated. In the case of multi-class classification, theta may\n be the hyperparameters of the compound kernel or of an individual\n kernel. In the latter case, all individual kernel get assigned the\n same theta values. If None, the precomputed log_marginal_likelihood\n of ``self.kernel_.theta`` is returned.\n\neval_gradient : bool, default=False\n If True, the gradient of the log-marginal likelihood with respect\n to the kernel hyperparameters at position theta is returned\n additionally. Note that gradient computation is not supported\n for non-binary classification. If True, theta must not be None.\n\nclone_kernel : bool, default=True\n If True, the kernel attribute is copied. If False, the kernel\n attribute is modified, but may result in a performance improvement.\n\nReturns\n-------\nlog_likelihood : float\n Log-marginal likelihood of theta for training data.\n\nlog_likelihood_gradient : ndarray of shape (n_kernel_params,), optional\n Gradient of the log-marginal likelihood with respect to the kernel\n hyperparameters at position theta.\n Only returned when `eval_gradient` is True.", + "code": " def log_marginal_likelihood(self, theta=None, eval_gradient=False,\n clone_kernel=True):\n \"\"\"Returns log-marginal likelihood of theta for training data.\n\n In the case of multi-class classification, the mean log-marginal\n likelihood of the one-versus-rest classifiers are returned.\n\n Parameters\n ----------\n theta : array-like of shape (n_kernel_params,), default=None\n Kernel hyperparameters for which the log-marginal likelihood is\n evaluated. In the case of multi-class classification, theta may\n be the hyperparameters of the compound kernel or of an individual\n kernel. In the latter case, all individual kernel get assigned the\n same theta values. If None, the precomputed log_marginal_likelihood\n of ``self.kernel_.theta`` is returned.\n\n eval_gradient : bool, default=False\n If True, the gradient of the log-marginal likelihood with respect\n to the kernel hyperparameters at position theta is returned\n additionally. Note that gradient computation is not supported\n for non-binary classification. If True, theta must not be None.\n\n clone_kernel : bool, default=True\n If True, the kernel attribute is copied. If False, the kernel\n attribute is modified, but may result in a performance improvement.\n\n Returns\n -------\n log_likelihood : float\n Log-marginal likelihood of theta for training data.\n\n log_likelihood_gradient : ndarray of shape (n_kernel_params,), optional\n Gradient of the log-marginal likelihood with respect to the kernel\n hyperparameters at position theta.\n Only returned when `eval_gradient` is True.\n \"\"\"\n check_is_fitted(self)\n\n if theta is None:\n if eval_gradient:\n raise ValueError(\n \"Gradient can only be evaluated for theta!=None\")\n return self.log_marginal_likelihood_value_\n\n theta = np.asarray(theta)\n if self.n_classes_ == 2:\n return self.base_estimator_.log_marginal_likelihood(\n theta, eval_gradient, clone_kernel=clone_kernel)\n else:\n if eval_gradient:\n raise NotImplementedError(\n \"Gradient of log-marginal-likelihood not implemented for \"\n \"multi-class GPC.\")\n estimators = self.base_estimator_.estimators_\n n_dims = estimators[0].kernel_.n_dims\n if theta.shape[0] == n_dims: # use same theta for all sub-kernels\n return np.mean(\n [estimator.log_marginal_likelihood(\n theta, clone_kernel=clone_kernel)\n for i, estimator in enumerate(estimators)])\n elif theta.shape[0] == n_dims * self.classes_.shape[0]:\n # theta for compound kernel\n return np.mean(\n [estimator.log_marginal_likelihood(\n theta[n_dims * i:n_dims * (i + 1)],\n clone_kernel=clone_kernel)\n for i, estimator in enumerate(estimators)])\n else:\n raise ValueError(\"Shape of theta must be either %d or %d. \"\n \"Obtained theta with shape %d.\"\n % (n_dims, n_dims * self.classes_.shape[0],\n theta.shape[0]))" + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/predict", + "name": "predict", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/predict/self", + "name": "self", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/predict/X", + "name": "X", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features) or list of object", + "default_value": "", + "description": "Query points where the GP is evaluated for classification." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform classification on an array of test vectors X.", + "docstring": "Perform classification on an array of test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated for classification.\n\nReturns\n-------\nC : ndarray of shape (n_samples,)\n Predicted target values for X, values are from ``classes_``", + "code": " def predict(self, X):\n \"\"\"Perform classification on an array of test vectors X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated for classification.\n\n Returns\n -------\n C : ndarray of shape (n_samples,)\n Predicted target values for X, values are from ``classes_``\n \"\"\"\n check_is_fitted(self)\n\n if self.kernel is None or self.kernel.requires_vector_input:\n X = check_array(X, ensure_2d=True, dtype=\"numeric\")\n else:\n X = check_array(X, ensure_2d=False, dtype=None)\n\n return self.base_estimator_.predict(X)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/predict_proba", + "name": "predict_proba", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/predict_proba/self", + "name": "self", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/GaussianProcessClassifier/predict_proba/X", + "name": "X", + "qname": "sklearn.gaussian_process._gpc.GaussianProcessClassifier.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features) or list of object", + "default_value": "", + "description": "Query points where the GP is evaluated for classification." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return probability estimates for the test vector X.", + "docstring": "Return probability estimates for the test vector X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated for classification.\n\nReturns\n-------\nC : array-like of shape (n_samples, n_classes)\n Returns the probability of the samples for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`.", + "code": " def predict_proba(self, X):\n \"\"\"Return probability estimates for the test vector X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated for classification.\n\n Returns\n -------\n C : array-like of shape (n_samples, n_classes)\n Returns the probability of the samples for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`.\n \"\"\"\n check_is_fitted(self)\n if self.n_classes_ > 2 and self.multi_class == \"one_vs_one\":\n raise ValueError(\"one_vs_one multi-class mode does not support \"\n \"predicting probability estimates. Use \"\n \"one_vs_rest mode instead.\")\n\n if self.kernel is None or self.kernel.requires_vector_input:\n X = check_array(X, ensure_2d=True, dtype=\"numeric\")\n else:\n X = check_array(X, ensure_2d=False, dtype=None)\n\n return self.base_estimator_.predict_proba(X)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/__init__", + "name": "__init__", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/__init__/self", + "name": "self", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/__init__/kernel", + "name": "kernel", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace.__init__.kernel", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "kernel instance", + "default_value": "None", + "description": "The kernel specifying the covariance function of the GP. If None is\npassed, the kernel \"1.0 * RBF(1.0)\" is used as default. Note that\nthe kernel's hyperparameters are optimized during fitting." + }, + "type": { + "kind": "NamedType", + "name": "kernel instance" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/__init__/optimizer", + "name": "optimizer", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace.__init__.optimizer", + "default_value": "'fmin_l_bfgs_b'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'fmin_l_bfgs_b' or callable", + "default_value": "'fmin_l_bfgs_b'", + "description": "Can either be one of the internally supported optimizers for optimizing\nthe kernel's parameters, specified by a string, or an externally\ndefined optimizer passed as a callable. If a callable is passed, it\nmust have the signature::\n\n def optimizer(obj_func, initial_theta, bounds):\n # * 'obj_func' is the objective function to be maximized, which\n # takes the hyperparameters theta as parameter and an\n # optional flag eval_gradient, which determines if the\n # gradient is returned additionally to the function value\n # * 'initial_theta': the initial value for theta, which can be\n # used by local optimizers\n # * 'bounds': the bounds on the values of theta\n ....\n # Returned are the best found hyperparameters theta and\n # the corresponding value of the target function.\n return theta_opt, func_min\n\nPer default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize\nis used. If None is passed, the kernel's parameters are kept fixed.\nAvailable internal optimizers are::\n\n 'fmin_l_bfgs_b'" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'fmin_l_bfgs_b'" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/__init__/n_restarts_optimizer", + "name": "n_restarts_optimizer", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace.__init__.n_restarts_optimizer", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "The number of restarts of the optimizer for finding the kernel's\nparameters which maximize the log-marginal likelihood. The first run\nof the optimizer is performed from the kernel's initial parameters,\nthe remaining ones (if any) from thetas sampled log-uniform randomly\nfrom the space of allowed theta-values. If greater than 0, all bounds\nmust be finite. Note that n_restarts_optimizer=0 implies that one\nrun is performed." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/__init__/max_iter_predict", + "name": "max_iter_predict", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace.__init__.max_iter_predict", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The maximum number of iterations in Newton's method for approximating\nthe posterior during predict. Smaller values will reduce computation\ntime at the cost of worse results." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If warm-starts are enabled, the solution of the last Newton iteration\non the Laplace approximation of the posterior mode is used as\ninitialization for the next call of _posterior_mode(). This can speed\nup convergence when _posterior_mode is called several times on similar\nproblems as in hyperparameter optimization. See :term:`the Glossary\n`." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/__init__/copy_X_train", + "name": "copy_X_train", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace.__init__.copy_X_train", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, a persistent copy of the training data is stored in the\nobject. Otherwise, just a reference to the training data is stored,\nwhich might cause predictions to change if the data is modified\nexternally." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/__init__/random_state", + "name": "random_state", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation used to initialize the centers.\nPass an int for reproducible results across multiple function calls.\nSee :term: `Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Binary Gaussian process classification based on Laplace approximation.\n\nThe implementation is based on Algorithm 3.1, 3.2, and 5.1 of\n``Gaussian Processes for Machine Learning'' (GPML) by Rasmussen and\nWilliams.\n\nInternally, the Laplace approximation is used for approximating the\nnon-Gaussian posterior by a Gaussian.\n\nCurrently, the implementation is restricted to using the logistic link\nfunction.\n\n.. versionadded:: 0.18", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, kernel=None, *, optimizer=\"fmin_l_bfgs_b\",\n n_restarts_optimizer=0, max_iter_predict=100,\n warm_start=False, copy_X_train=True, random_state=None):\n self.kernel = kernel\n self.optimizer = optimizer\n self.n_restarts_optimizer = n_restarts_optimizer\n self.max_iter_predict = max_iter_predict\n self.warm_start = warm_start\n self.copy_X_train = copy_X_train\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/_constrained_optimization", + "name": "_constrained_optimization", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace._constrained_optimization", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/_constrained_optimization/self", + "name": "self", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace._constrained_optimization.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/_constrained_optimization/obj_func", + "name": "obj_func", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace._constrained_optimization.obj_func", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/_constrained_optimization/initial_theta", + "name": "initial_theta", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace._constrained_optimization.initial_theta", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/_constrained_optimization/bounds", + "name": "bounds", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace._constrained_optimization.bounds", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _constrained_optimization(self, obj_func, initial_theta, bounds):\n if self.optimizer == \"fmin_l_bfgs_b\":\n opt_res = scipy.optimize.minimize(\n obj_func, initial_theta, method=\"L-BFGS-B\", jac=True,\n bounds=bounds)\n _check_optimize_result(\"lbfgs\", opt_res)\n theta_opt, func_min = opt_res.x, opt_res.fun\n elif callable(self.optimizer):\n theta_opt, func_min = \\\n self.optimizer(obj_func, initial_theta, bounds=bounds)\n else:\n raise ValueError(\"Unknown optimizer %s.\" % self.optimizer)\n\n return theta_opt, func_min" + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/_posterior_mode", + "name": "_posterior_mode", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace._posterior_mode", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/_posterior_mode/self", + "name": "self", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace._posterior_mode.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/_posterior_mode/K", + "name": "K", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace._posterior_mode.K", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/_posterior_mode/return_temporaries", + "name": "return_temporaries", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace._posterior_mode.return_temporaries", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Mode-finding for binary Laplace GPC and fixed kernel.\n\nThis approximates the posterior of the latent function values for given\ninputs and target observations with a Gaussian approximation and uses\nNewton's iteration to find the mode of this approximation.", + "docstring": "Mode-finding for binary Laplace GPC and fixed kernel.\n\nThis approximates the posterior of the latent function values for given\ninputs and target observations with a Gaussian approximation and uses\nNewton's iteration to find the mode of this approximation.", + "code": " def _posterior_mode(self, K, return_temporaries=False):\n \"\"\"Mode-finding for binary Laplace GPC and fixed kernel.\n\n This approximates the posterior of the latent function values for given\n inputs and target observations with a Gaussian approximation and uses\n Newton's iteration to find the mode of this approximation.\n \"\"\"\n # Based on Algorithm 3.1 of GPML\n\n # If warm_start are enabled, we reuse the last solution for the\n # posterior mode as initialization; otherwise, we initialize with 0\n if self.warm_start and hasattr(self, \"f_cached\") \\\n and self.f_cached.shape == self.y_train_.shape:\n f = self.f_cached\n else:\n f = np.zeros_like(self.y_train_, dtype=np.float64)\n\n # Use Newton's iteration method to find mode of Laplace approximation\n log_marginal_likelihood = -np.inf\n for _ in range(self.max_iter_predict):\n # Line 4\n pi = expit(f)\n W = pi * (1 - pi)\n # Line 5\n W_sr = np.sqrt(W)\n W_sr_K = W_sr[:, np.newaxis] * K\n B = np.eye(W.shape[0]) + W_sr_K * W_sr\n L = cholesky(B, lower=True)\n # Line 6\n b = W * f + (self.y_train_ - pi)\n # Line 7\n a = b - W_sr * cho_solve((L, True), W_sr_K.dot(b))\n # Line 8\n f = K.dot(a)\n\n # Line 10: Compute log marginal likelihood in loop and use as\n # convergence criterion\n lml = -0.5 * a.T.dot(f) \\\n - np.log1p(np.exp(-(self.y_train_ * 2 - 1) * f)).sum() \\\n - np.log(np.diag(L)).sum()\n # Check if we have converged (log marginal likelihood does\n # not decrease)\n # XXX: more complex convergence criterion\n if lml - log_marginal_likelihood < 1e-10:\n break\n log_marginal_likelihood = lml\n\n self.f_cached = f # Remember solution for later warm-starts\n if return_temporaries:\n return log_marginal_likelihood, (pi, W_sr, L, b, a)\n else:\n return log_marginal_likelihood" + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/fit", + "name": "fit", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/fit/self", + "name": "self", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/fit/X", + "name": "X", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features) or list of object", + "default_value": "", + "description": "Feature vectors or other representations of training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/fit/y", + "name": "y", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values, must be binary" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit Gaussian process classification model", + "docstring": "Fit Gaussian process classification model\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Feature vectors or other representations of training data.\n\ny : array-like of shape (n_samples,)\n Target values, must be binary\n\nReturns\n-------\nself : returns an instance of self.", + "code": " def fit(self, X, y):\n \"\"\"Fit Gaussian process classification model\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or list of object\n Feature vectors or other representations of training data.\n\n y : array-like of shape (n_samples,)\n Target values, must be binary\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n if self.kernel is None: # Use an RBF kernel as default\n self.kernel_ = C(1.0, constant_value_bounds=\"fixed\") \\\n * RBF(1.0, length_scale_bounds=\"fixed\")\n else:\n self.kernel_ = clone(self.kernel)\n\n self.rng = check_random_state(self.random_state)\n\n self.X_train_ = np.copy(X) if self.copy_X_train else X\n\n # Encode class labels and check that it is a binary classification\n # problem\n label_encoder = LabelEncoder()\n self.y_train_ = label_encoder.fit_transform(y)\n self.classes_ = label_encoder.classes_\n if self.classes_.size > 2:\n raise ValueError(\"%s supports only binary classification. \"\n \"y contains classes %s\"\n % (self.__class__.__name__, self.classes_))\n elif self.classes_.size == 1:\n raise ValueError(\"{0:s} requires 2 classes; got {1:d} class\"\n .format(self.__class__.__name__,\n self.classes_.size))\n\n if self.optimizer is not None and self.kernel_.n_dims > 0:\n # Choose hyperparameters based on maximizing the log-marginal\n # likelihood (potentially starting from several initial values)\n def obj_func(theta, eval_gradient=True):\n if eval_gradient:\n lml, grad = self.log_marginal_likelihood(\n theta, eval_gradient=True, clone_kernel=False)\n return -lml, -grad\n else:\n return -self.log_marginal_likelihood(theta,\n clone_kernel=False)\n\n # First optimize starting from theta specified in kernel\n optima = [self._constrained_optimization(obj_func,\n self.kernel_.theta,\n self.kernel_.bounds)]\n\n # Additional runs are performed from log-uniform chosen initial\n # theta\n if self.n_restarts_optimizer > 0:\n if not np.isfinite(self.kernel_.bounds).all():\n raise ValueError(\n \"Multiple optimizer restarts (n_restarts_optimizer>0) \"\n \"requires that all bounds are finite.\")\n bounds = self.kernel_.bounds\n for iteration in range(self.n_restarts_optimizer):\n theta_initial = np.exp(self.rng.uniform(bounds[:, 0],\n bounds[:, 1]))\n optima.append(\n self._constrained_optimization(obj_func, theta_initial,\n bounds))\n # Select result from run with minimal (negative) log-marginal\n # likelihood\n lml_values = list(map(itemgetter(1), optima))\n self.kernel_.theta = optima[np.argmin(lml_values)][0]\n self.kernel_._check_bounds_params()\n\n self.log_marginal_likelihood_value_ = -np.min(lml_values)\n else:\n self.log_marginal_likelihood_value_ = \\\n self.log_marginal_likelihood(self.kernel_.theta)\n\n # Precompute quantities required for predictions which are independent\n # of actual query points\n K = self.kernel_(self.X_train_)\n\n _, (self.pi_, self.W_sr_, self.L_, _, _) = \\\n self._posterior_mode(K, return_temporaries=True)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/log_marginal_likelihood", + "name": "log_marginal_likelihood", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace.log_marginal_likelihood", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/log_marginal_likelihood/self", + "name": "self", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace.log_marginal_likelihood.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/log_marginal_likelihood/theta", + "name": "theta", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace.log_marginal_likelihood.theta", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_kernel_params,)", + "default_value": "None", + "description": "Kernel hyperparameters for which the log-marginal likelihood is\nevaluated. If None, the precomputed log_marginal_likelihood\nof ``self.kernel_.theta`` is returned." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_kernel_params,)" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/log_marginal_likelihood/eval_gradient", + "name": "eval_gradient", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace.log_marginal_likelihood.eval_gradient", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the gradient of the log-marginal likelihood with respect\nto the kernel hyperparameters at position theta is returned\nadditionally. If True, theta must not be None." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/log_marginal_likelihood/clone_kernel", + "name": "clone_kernel", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace.log_marginal_likelihood.clone_kernel", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, the kernel attribute is copied. If False, the kernel\nattribute is modified, but may result in a performance improvement." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns log-marginal likelihood of theta for training data.", + "docstring": "Returns log-marginal likelihood of theta for training data.\n\nParameters\n----------\ntheta : array-like of shape (n_kernel_params,), default=None\n Kernel hyperparameters for which the log-marginal likelihood is\n evaluated. If None, the precomputed log_marginal_likelihood\n of ``self.kernel_.theta`` is returned.\n\neval_gradient : bool, default=False\n If True, the gradient of the log-marginal likelihood with respect\n to the kernel hyperparameters at position theta is returned\n additionally. If True, theta must not be None.\n\nclone_kernel : bool, default=True\n If True, the kernel attribute is copied. If False, the kernel\n attribute is modified, but may result in a performance improvement.\n\nReturns\n-------\nlog_likelihood : float\n Log-marginal likelihood of theta for training data.\n\nlog_likelihood_gradient : ndarray of shape (n_kernel_params,), optional\n Gradient of the log-marginal likelihood with respect to the kernel\n hyperparameters at position theta.\n Only returned when `eval_gradient` is True.", + "code": " def log_marginal_likelihood(self, theta=None, eval_gradient=False,\n clone_kernel=True):\n \"\"\"Returns log-marginal likelihood of theta for training data.\n\n Parameters\n ----------\n theta : array-like of shape (n_kernel_params,), default=None\n Kernel hyperparameters for which the log-marginal likelihood is\n evaluated. If None, the precomputed log_marginal_likelihood\n of ``self.kernel_.theta`` is returned.\n\n eval_gradient : bool, default=False\n If True, the gradient of the log-marginal likelihood with respect\n to the kernel hyperparameters at position theta is returned\n additionally. If True, theta must not be None.\n\n clone_kernel : bool, default=True\n If True, the kernel attribute is copied. If False, the kernel\n attribute is modified, but may result in a performance improvement.\n\n Returns\n -------\n log_likelihood : float\n Log-marginal likelihood of theta for training data.\n\n log_likelihood_gradient : ndarray of shape (n_kernel_params,), \\\n optional\n Gradient of the log-marginal likelihood with respect to the kernel\n hyperparameters at position theta.\n Only returned when `eval_gradient` is True.\n \"\"\"\n if theta is None:\n if eval_gradient:\n raise ValueError(\n \"Gradient can only be evaluated for theta!=None\")\n return self.log_marginal_likelihood_value_\n\n if clone_kernel:\n kernel = self.kernel_.clone_with_theta(theta)\n else:\n kernel = self.kernel_\n kernel.theta = theta\n\n if eval_gradient:\n K, K_gradient = kernel(self.X_train_, eval_gradient=True)\n else:\n K = kernel(self.X_train_)\n\n # Compute log-marginal-likelihood Z and also store some temporaries\n # which can be reused for computing Z's gradient\n Z, (pi, W_sr, L, b, a) = \\\n self._posterior_mode(K, return_temporaries=True)\n\n if not eval_gradient:\n return Z\n\n # Compute gradient based on Algorithm 5.1 of GPML\n d_Z = np.empty(theta.shape[0])\n # XXX: Get rid of the np.diag() in the next line\n R = W_sr[:, np.newaxis] * cho_solve((L, True), np.diag(W_sr)) # Line 7\n C = solve(L, W_sr[:, np.newaxis] * K) # Line 8\n # Line 9: (use einsum to compute np.diag(C.T.dot(C))))\n s_2 = -0.5 * (np.diag(K) - np.einsum('ij, ij -> j', C, C)) \\\n * (pi * (1 - pi) * (1 - 2 * pi)) # third derivative\n\n for j in range(d_Z.shape[0]):\n C = K_gradient[:, :, j] # Line 11\n # Line 12: (R.T.ravel().dot(C.ravel()) = np.trace(R.dot(C)))\n s_1 = .5 * a.T.dot(C).dot(a) - .5 * R.T.ravel().dot(C.ravel())\n\n b = C.dot(self.y_train_ - pi) # Line 13\n s_3 = b - K.dot(R.dot(b)) # Line 14\n\n d_Z[j] = s_1 + s_2.T.dot(s_3) # Line 15\n\n return Z, d_Z" + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/predict", + "name": "predict", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/predict/self", + "name": "self", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/predict/X", + "name": "X", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features) or list of object", + "default_value": "", + "description": "Query points where the GP is evaluated for classification." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform classification on an array of test vectors X.", + "docstring": "Perform classification on an array of test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated for classification.\n\nReturns\n-------\nC : ndarray of shape (n_samples,)\n Predicted target values for X, values are from ``classes_``", + "code": " def predict(self, X):\n \"\"\"Perform classification on an array of test vectors X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated for classification.\n\n Returns\n -------\n C : ndarray of shape (n_samples,)\n Predicted target values for X, values are from ``classes_``\n \"\"\"\n check_is_fitted(self)\n\n # As discussed on Section 3.4.2 of GPML, for making hard binary\n # decisions, it is enough to compute the MAP of the posterior and\n # pass it through the link function\n K_star = self.kernel_(self.X_train_, X) # K_star =k(x_star)\n f_star = K_star.T.dot(self.y_train_ - self.pi_) # Algorithm 3.2,Line 4\n\n return np.where(f_star > 0, self.classes_[1], self.classes_[0])" + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/predict_proba", + "name": "predict_proba", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/predict_proba/self", + "name": "self", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpc/_BinaryGaussianProcessClassifierLaplace/predict_proba/X", + "name": "X", + "qname": "sklearn.gaussian_process._gpc._BinaryGaussianProcessClassifierLaplace.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features) or list of object", + "default_value": "", + "description": "Query points where the GP is evaluated for classification." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return probability estimates for the test vector X.", + "docstring": "Return probability estimates for the test vector X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated for classification.\n\nReturns\n-------\nC : array-like of shape (n_samples, n_classes)\n Returns the probability of the samples for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute ``classes_``.", + "code": " def predict_proba(self, X):\n \"\"\"Return probability estimates for the test vector X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated for classification.\n\n Returns\n -------\n C : array-like of shape (n_samples, n_classes)\n Returns the probability of the samples for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute ``classes_``.\n \"\"\"\n check_is_fitted(self)\n\n # Based on Algorithm 3.2 of GPML\n K_star = self.kernel_(self.X_train_, X) # K_star =k(x_star)\n f_star = K_star.T.dot(self.y_train_ - self.pi_) # Line 4\n v = solve(self.L_, self.W_sr_[:, np.newaxis] * K_star) # Line 5\n # Line 6 (compute np.diag(v.T.dot(v)) via einsum)\n var_f_star = self.kernel_.diag(X) - np.einsum(\"ij,ij->j\", v, v)\n\n # Line 7:\n # Approximate \\int log(z) * N(z | f_star, var_f_star)\n # Approximation is due to Williams & Barber, \"Bayesian Classification\n # with Gaussian Processes\", Appendix A: Approximate the logistic\n # sigmoid by a linear combination of 5 error functions.\n # For information on how this integral can be computed see\n # blitiri.blogspot.de/2012/11/gaussian-integral-of-error-function.html\n alpha = 1 / (2 * var_f_star)\n gamma = LAMBDAS * f_star\n integrals = np.sqrt(np.pi / alpha) \\\n * erf(gamma * np.sqrt(alpha / (alpha + LAMBDAS**2))) \\\n / (2 * np.sqrt(var_f_star * 2 * np.pi))\n pi_star = (COEFS * integrals).sum(axis=0) + .5 * COEFS.sum()\n\n return np.vstack((1 - pi_star, pi_star)).T" + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/__init__", + "name": "__init__", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/__init__/self", + "name": "self", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/__init__/kernel", + "name": "kernel", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.__init__.kernel", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "kernel instance", + "default_value": "None", + "description": "The kernel specifying the covariance function of the GP. If None is\npassed, the kernel ``ConstantKernel(1.0, constant_value_bounds=\"fixed\"\n* RBF(1.0, length_scale_bounds=\"fixed\")`` is used as default. Note that\nthe kernel hyperparameters are optimized during fitting unless the\nbounds are marked as \"fixed\"." + }, + "type": { + "kind": "NamedType", + "name": "kernel instance" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/__init__/alpha", + "name": "alpha", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.__init__.alpha", + "default_value": "1e-10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float or ndarray of shape (n_samples,)", + "default_value": "1e-10", + "description": "Value added to the diagonal of the kernel matrix during fitting.\nThis can prevent a potential numerical issue during fitting, by\nensuring that the calculated values form a positive definite matrix.\nIt can also be interpreted as the variance of additional Gaussian\nmeasurement noise on the training observations. Note that this is\ndifferent from using a `WhiteKernel`. If an array is passed, it must\nhave the same number of entries as the data used for fitting and is\nused as datapoint-dependent noise level. Allowing to specify the\nnoise level directly as a parameter is mainly for convenience and\nfor consistency with Ridge." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/__init__/optimizer", + "name": "optimizer", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.__init__.optimizer", + "default_value": "'fmin_l_bfgs_b'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "\"fmin_l_bfgs_b\" or callable", + "default_value": "\"fmin_l_bfgs_b\"", + "description": "Can either be one of the internally supported optimizers for optimizing\nthe kernel's parameters, specified by a string, or an externally\ndefined optimizer passed as a callable. If a callable is passed, it\nmust have the signature::\n\n def optimizer(obj_func, initial_theta, bounds):\n # * 'obj_func' is the objective function to be minimized, which\n # takes the hyperparameters theta as parameter and an\n # optional flag eval_gradient, which determines if the\n # gradient is returned additionally to the function value\n # * 'initial_theta': the initial value for theta, which can be\n # used by local optimizers\n # * 'bounds': the bounds on the values of theta\n ....\n # Returned are the best found hyperparameters theta and\n # the corresponding value of the target function.\n return theta_opt, func_min\n\nPer default, the 'L-BGFS-B' algorithm from scipy.optimize.minimize\nis used. If None is passed, the kernel's parameters are kept fixed.\nAvailable internal optimizers are::\n\n 'fmin_l_bfgs_b'" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "\"fmin_l_bfgs_b\"" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/__init__/n_restarts_optimizer", + "name": "n_restarts_optimizer", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.__init__.n_restarts_optimizer", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "The number of restarts of the optimizer for finding the kernel's\nparameters which maximize the log-marginal likelihood. The first run\nof the optimizer is performed from the kernel's initial parameters,\nthe remaining ones (if any) from thetas sampled log-uniform randomly\nfrom the space of allowed theta-values. If greater than 0, all bounds\nmust be finite. Note that n_restarts_optimizer == 0 implies that one\nrun is performed." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/__init__/normalize_y", + "name": "normalize_y", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.__init__.normalize_y", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether the target values y are normalized, the mean and variance of\nthe target values are set equal to 0 and 1 respectively. This is\nrecommended for cases where zero-mean, unit-variance priors are used.\nNote that, in this implementation, the normalisation is reversed\nbefore the GP predictions are reported.\n\n.. versionchanged:: 0.23" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/__init__/copy_X_train", + "name": "copy_X_train", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.__init__.copy_X_train", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, a persistent copy of the training data is stored in the\nobject. Otherwise, just a reference to the training data is stored,\nwhich might cause predictions to change if the data is modified\nexternally." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/__init__/random_state", + "name": "random_state", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation used to initialize the centers.\nPass an int for reproducible results across multiple function calls.\nSee :term: `Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Gaussian process regression (GPR).\n\nThe implementation is based on Algorithm 2.1 of Gaussian Processes\nfor Machine Learning (GPML) by Rasmussen and Williams.\n\nIn addition to standard scikit-learn estimator API,\nGaussianProcessRegressor:\n\n * allows prediction without prior fitting (based on the GP prior)\n * provides an additional method sample_y(X), which evaluates samples\n drawn from the GPR (prior or posterior) at given inputs\n * exposes a method log_marginal_likelihood(theta), which can be used\n externally for other ways of selecting hyperparameters, e.g., via\n Markov chain Monte Carlo.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, kernel=None, *, alpha=1e-10,\n optimizer=\"fmin_l_bfgs_b\", n_restarts_optimizer=0,\n normalize_y=False, copy_X_train=True, random_state=None):\n self.kernel = kernel\n self.alpha = alpha\n self.optimizer = optimizer\n self.n_restarts_optimizer = n_restarts_optimizer\n self.normalize_y = normalize_y\n self.copy_X_train = copy_X_train\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/_constrained_optimization", + "name": "_constrained_optimization", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor._constrained_optimization", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/_constrained_optimization/self", + "name": "self", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor._constrained_optimization.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/_constrained_optimization/obj_func", + "name": "obj_func", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor._constrained_optimization.obj_func", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/_constrained_optimization/initial_theta", + "name": "initial_theta", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor._constrained_optimization.initial_theta", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/_constrained_optimization/bounds", + "name": "bounds", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor._constrained_optimization.bounds", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _constrained_optimization(self, obj_func, initial_theta, bounds):\n if self.optimizer == \"fmin_l_bfgs_b\":\n opt_res = scipy.optimize.minimize(\n obj_func, initial_theta, method=\"L-BFGS-B\", jac=True,\n bounds=bounds)\n _check_optimize_result(\"lbfgs\", opt_res)\n theta_opt, func_min = opt_res.x, opt_res.fun\n elif callable(self.optimizer):\n theta_opt, func_min = \\\n self.optimizer(obj_func, initial_theta, bounds=bounds)\n else:\n raise ValueError(\"Unknown optimizer %s.\" % self.optimizer)\n\n return theta_opt, func_min" + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/_more_tags", + "name": "_more_tags", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/_more_tags/self", + "name": "self", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'requires_fit': False}" + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/fit", + "name": "fit", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/fit/self", + "name": "self", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/fit/X", + "name": "X", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features) or list of object", + "default_value": "", + "description": "Feature vectors or other representations of training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/fit/y", + "name": "y", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "", + "description": "Target values" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_targets)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit Gaussian process regression model.", + "docstring": "Fit Gaussian process regression model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Feature vectors or other representations of training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values\n\nReturns\n-------\nself : returns an instance of self.", + "code": " def fit(self, X, y):\n \"\"\"Fit Gaussian process regression model.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or list of object\n Feature vectors or other representations of training data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n if self.kernel is None: # Use an RBF kernel as default\n self.kernel_ = C(1.0, constant_value_bounds=\"fixed\") \\\n * RBF(1.0, length_scale_bounds=\"fixed\")\n else:\n self.kernel_ = clone(self.kernel)\n\n self._rng = check_random_state(self.random_state)\n\n if self.kernel_.requires_vector_input:\n X, y = self._validate_data(X, y, multi_output=True, y_numeric=True,\n ensure_2d=True, dtype=\"numeric\")\n else:\n X, y = self._validate_data(X, y, multi_output=True, y_numeric=True,\n ensure_2d=False, dtype=None)\n\n # Normalize target value\n if self.normalize_y:\n self._y_train_mean = np.mean(y, axis=0)\n self._y_train_std = _handle_zeros_in_scale(\n np.std(y, axis=0), copy=False\n )\n\n # Remove mean and make unit variance\n y = (y - self._y_train_mean) / self._y_train_std\n\n else:\n self._y_train_mean = np.zeros(1)\n self._y_train_std = 1\n\n if np.iterable(self.alpha) \\\n and self.alpha.shape[0] != y.shape[0]:\n if self.alpha.shape[0] == 1:\n self.alpha = self.alpha[0]\n else:\n raise ValueError(\"alpha must be a scalar or an array\"\n \" with same number of entries as y.(%d != %d)\"\n % (self.alpha.shape[0], y.shape[0]))\n\n self.X_train_ = np.copy(X) if self.copy_X_train else X\n self.y_train_ = np.copy(y) if self.copy_X_train else y\n\n if self.optimizer is not None and self.kernel_.n_dims > 0:\n # Choose hyperparameters based on maximizing the log-marginal\n # likelihood (potentially starting from several initial values)\n def obj_func(theta, eval_gradient=True):\n if eval_gradient:\n lml, grad = self.log_marginal_likelihood(\n theta, eval_gradient=True, clone_kernel=False)\n return -lml, -grad\n else:\n return -self.log_marginal_likelihood(theta,\n clone_kernel=False)\n\n # First optimize starting from theta specified in kernel\n optima = [(self._constrained_optimization(obj_func,\n self.kernel_.theta,\n self.kernel_.bounds))]\n\n # Additional runs are performed from log-uniform chosen initial\n # theta\n if self.n_restarts_optimizer > 0:\n if not np.isfinite(self.kernel_.bounds).all():\n raise ValueError(\n \"Multiple optimizer restarts (n_restarts_optimizer>0) \"\n \"requires that all bounds are finite.\")\n bounds = self.kernel_.bounds\n for iteration in range(self.n_restarts_optimizer):\n theta_initial = \\\n self._rng.uniform(bounds[:, 0], bounds[:, 1])\n optima.append(\n self._constrained_optimization(obj_func, theta_initial,\n bounds))\n # Select result from run with minimal (negative) log-marginal\n # likelihood\n lml_values = list(map(itemgetter(1), optima))\n self.kernel_.theta = optima[np.argmin(lml_values)][0]\n self.kernel_._check_bounds_params()\n\n self.log_marginal_likelihood_value_ = -np.min(lml_values)\n else:\n self.log_marginal_likelihood_value_ = \\\n self.log_marginal_likelihood(self.kernel_.theta,\n clone_kernel=False)\n\n # Precompute quantities required for predictions which are independent\n # of actual query points\n K = self.kernel_(self.X_train_)\n K[np.diag_indices_from(K)] += self.alpha\n try:\n self.L_ = cholesky(K, lower=True) # Line 2\n except np.linalg.LinAlgError as exc:\n exc.args = (\"The kernel, %s, is not returning a \"\n \"positive definite matrix. Try gradually \"\n \"increasing the 'alpha' parameter of your \"\n \"GaussianProcessRegressor estimator.\"\n % self.kernel_,) + exc.args\n raise\n self.alpha_ = cho_solve((self.L_, True), self.y_train_) # Line 3\n return self" + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/log_marginal_likelihood", + "name": "log_marginal_likelihood", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.log_marginal_likelihood", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/log_marginal_likelihood/self", + "name": "self", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.log_marginal_likelihood.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/log_marginal_likelihood/theta", + "name": "theta", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.log_marginal_likelihood.theta", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_kernel_params,) default=None", + "default_value": "", + "description": "Kernel hyperparameters for which the log-marginal likelihood is\nevaluated. If None, the precomputed log_marginal_likelihood\nof ``self.kernel_.theta`` is returned." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_kernel_params,)" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/log_marginal_likelihood/eval_gradient", + "name": "eval_gradient", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.log_marginal_likelihood.eval_gradient", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the gradient of the log-marginal likelihood with respect\nto the kernel hyperparameters at position theta is returned\nadditionally. If True, theta must not be None." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/log_marginal_likelihood/clone_kernel", + "name": "clone_kernel", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.log_marginal_likelihood.clone_kernel", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, the kernel attribute is copied. If False, the kernel\nattribute is modified, but may result in a performance improvement." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns log-marginal likelihood of theta for training data.", + "docstring": "Returns log-marginal likelihood of theta for training data.\n\nParameters\n----------\ntheta : array-like of shape (n_kernel_params,) default=None\n Kernel hyperparameters for which the log-marginal likelihood is\n evaluated. If None, the precomputed log_marginal_likelihood\n of ``self.kernel_.theta`` is returned.\n\neval_gradient : bool, default=False\n If True, the gradient of the log-marginal likelihood with respect\n to the kernel hyperparameters at position theta is returned\n additionally. If True, theta must not be None.\n\nclone_kernel : bool, default=True\n If True, the kernel attribute is copied. If False, the kernel\n attribute is modified, but may result in a performance improvement.\n\nReturns\n-------\nlog_likelihood : float\n Log-marginal likelihood of theta for training data.\n\nlog_likelihood_gradient : ndarray of shape (n_kernel_params,), optional\n Gradient of the log-marginal likelihood with respect to the kernel\n hyperparameters at position theta.\n Only returned when eval_gradient is True.", + "code": " def log_marginal_likelihood(self, theta=None, eval_gradient=False,\n clone_kernel=True):\n \"\"\"Returns log-marginal likelihood of theta for training data.\n\n Parameters\n ----------\n theta : array-like of shape (n_kernel_params,) default=None\n Kernel hyperparameters for which the log-marginal likelihood is\n evaluated. If None, the precomputed log_marginal_likelihood\n of ``self.kernel_.theta`` is returned.\n\n eval_gradient : bool, default=False\n If True, the gradient of the log-marginal likelihood with respect\n to the kernel hyperparameters at position theta is returned\n additionally. If True, theta must not be None.\n\n clone_kernel : bool, default=True\n If True, the kernel attribute is copied. If False, the kernel\n attribute is modified, but may result in a performance improvement.\n\n Returns\n -------\n log_likelihood : float\n Log-marginal likelihood of theta for training data.\n\n log_likelihood_gradient : ndarray of shape (n_kernel_params,), optional\n Gradient of the log-marginal likelihood with respect to the kernel\n hyperparameters at position theta.\n Only returned when eval_gradient is True.\n \"\"\"\n if theta is None:\n if eval_gradient:\n raise ValueError(\n \"Gradient can only be evaluated for theta!=None\")\n return self.log_marginal_likelihood_value_\n\n if clone_kernel:\n kernel = self.kernel_.clone_with_theta(theta)\n else:\n kernel = self.kernel_\n kernel.theta = theta\n\n if eval_gradient:\n K, K_gradient = kernel(self.X_train_, eval_gradient=True)\n else:\n K = kernel(self.X_train_)\n\n K[np.diag_indices_from(K)] += self.alpha\n try:\n L = cholesky(K, lower=True) # Line 2\n except np.linalg.LinAlgError:\n return (-np.inf, np.zeros_like(theta)) \\\n if eval_gradient else -np.inf\n\n # Support multi-dimensional output of self.y_train_\n y_train = self.y_train_\n if y_train.ndim == 1:\n y_train = y_train[:, np.newaxis]\n\n alpha = cho_solve((L, True), y_train) # Line 3\n\n # Compute log-likelihood (compare line 7)\n log_likelihood_dims = -0.5 * np.einsum(\"ik,ik->k\", y_train, alpha)\n log_likelihood_dims -= np.log(np.diag(L)).sum()\n log_likelihood_dims -= K.shape[0] / 2 * np.log(2 * np.pi)\n log_likelihood = log_likelihood_dims.sum(-1) # sum over dimensions\n\n if eval_gradient: # compare Equation 5.9 from GPML\n tmp = np.einsum(\"ik,jk->ijk\", alpha, alpha) # k: output-dimension\n tmp -= cho_solve((L, True), np.eye(K.shape[0]))[:, :, np.newaxis]\n # Compute \"0.5 * trace(tmp.dot(K_gradient))\" without\n # constructing the full matrix tmp.dot(K_gradient) since only\n # its diagonal is required\n log_likelihood_gradient_dims = \\\n 0.5 * np.einsum(\"ijl,jik->kl\", tmp, K_gradient)\n log_likelihood_gradient = log_likelihood_gradient_dims.sum(-1)\n\n if eval_gradient:\n return log_likelihood, log_likelihood_gradient\n else:\n return log_likelihood" + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/predict", + "name": "predict", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/predict/self", + "name": "self", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/predict/X", + "name": "X", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features) or list of object", + "default_value": "", + "description": "Query points where the GP is evaluated." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/predict/return_std", + "name": "return_std", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.predict.return_std", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the standard-deviation of the predictive distribution at\nthe query points is returned along with the mean." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/predict/return_cov", + "name": "return_cov", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.predict.return_cov", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the covariance of the joint predictive distribution at\nthe query points is returned along with the mean." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict using the Gaussian process regression model\n\nWe can also predict based on an unfitted model by using the GP prior.\nIn addition to the mean of the predictive distribution, also its\nstandard deviation (return_std=True) or covariance (return_cov=True).\nNote that at most one of the two can be requested.", + "docstring": "Predict using the Gaussian process regression model\n\nWe can also predict based on an unfitted model by using the GP prior.\nIn addition to the mean of the predictive distribution, also its\nstandard deviation (return_std=True) or covariance (return_cov=True).\nNote that at most one of the two can be requested.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated.\n\nreturn_std : bool, default=False\n If True, the standard-deviation of the predictive distribution at\n the query points is returned along with the mean.\n\nreturn_cov : bool, default=False\n If True, the covariance of the joint predictive distribution at\n the query points is returned along with the mean.\n\nReturns\n-------\ny_mean : ndarray of shape (n_samples, [n_output_dims])\n Mean of predictive distribution a query points.\n\ny_std : ndarray of shape (n_samples,), optional\n Standard deviation of predictive distribution at query points.\n Only returned when `return_std` is True.\n\ny_cov : ndarray of shape (n_samples, n_samples), optional\n Covariance of joint predictive distribution a query points.\n Only returned when `return_cov` is True.", + "code": " def predict(self, X, return_std=False, return_cov=False):\n \"\"\"Predict using the Gaussian process regression model\n\n We can also predict based on an unfitted model by using the GP prior.\n In addition to the mean of the predictive distribution, also its\n standard deviation (return_std=True) or covariance (return_cov=True).\n Note that at most one of the two can be requested.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated.\n\n return_std : bool, default=False\n If True, the standard-deviation of the predictive distribution at\n the query points is returned along with the mean.\n\n return_cov : bool, default=False\n If True, the covariance of the joint predictive distribution at\n the query points is returned along with the mean.\n\n Returns\n -------\n y_mean : ndarray of shape (n_samples, [n_output_dims])\n Mean of predictive distribution a query points.\n\n y_std : ndarray of shape (n_samples,), optional\n Standard deviation of predictive distribution at query points.\n Only returned when `return_std` is True.\n\n y_cov : ndarray of shape (n_samples, n_samples), optional\n Covariance of joint predictive distribution a query points.\n Only returned when `return_cov` is True.\n \"\"\"\n if return_std and return_cov:\n raise RuntimeError(\n \"Not returning standard deviation of predictions when \"\n \"returning full covariance.\")\n\n if self.kernel is None or self.kernel.requires_vector_input:\n X = check_array(X, ensure_2d=True, dtype=\"numeric\")\n else:\n X = check_array(X, ensure_2d=False, dtype=None)\n\n if not hasattr(self, \"X_train_\"): # Unfitted;predict based on GP prior\n if self.kernel is None:\n kernel = (C(1.0, constant_value_bounds=\"fixed\") *\n RBF(1.0, length_scale_bounds=\"fixed\"))\n else:\n kernel = self.kernel\n y_mean = np.zeros(X.shape[0])\n if return_cov:\n y_cov = kernel(X)\n return y_mean, y_cov\n elif return_std:\n y_var = kernel.diag(X)\n return y_mean, np.sqrt(y_var)\n else:\n return y_mean\n else: # Predict based on GP posterior\n K_trans = self.kernel_(X, self.X_train_)\n y_mean = K_trans.dot(self.alpha_) # Line 4 (y_mean = f_star)\n # undo normalisation\n y_mean = self._y_train_std * y_mean + self._y_train_mean\n\n if return_cov:\n # Solve K @ V = K_trans.T\n V = cho_solve((self.L_, True), K_trans.T) # Line 5\n y_cov = self.kernel_(X) - K_trans.dot(V) # Line 6\n\n # undo normalisation\n y_cov = y_cov * self._y_train_std**2\n\n return y_mean, y_cov\n elif return_std:\n # Solve K @ V = K_trans.T\n V = cho_solve((self.L_, True), K_trans.T) # Line 5\n\n # Compute variance of predictive distribution\n # Use einsum to avoid explicitly forming the large matrix\n # K_trans @ V just to extract its diagonal afterward.\n y_var = self.kernel_.diag(X)\n y_var -= np.einsum(\"ij,ji->i\", K_trans, V)\n\n # Check if any of the variances is negative because of\n # numerical issues. If yes: set the variance to 0.\n y_var_negative = y_var < 0\n if np.any(y_var_negative):\n warnings.warn(\"Predicted variances smaller than 0. \"\n \"Setting those variances to 0.\")\n y_var[y_var_negative] = 0.0\n\n # undo normalisation\n y_var = y_var * self._y_train_std**2\n\n return y_mean, np.sqrt(y_var)\n else:\n return y_mean" + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/sample_y", + "name": "sample_y", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.sample_y", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/sample_y/self", + "name": "self", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.sample_y.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/sample_y/X", + "name": "X", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.sample_y.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features) or list of object", + "default_value": "", + "description": "Query points where the GP is evaluated." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/sample_y/n_samples", + "name": "n_samples", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.sample_y.n_samples", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "The number of samples drawn from the Gaussian process" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process._gpr/GaussianProcessRegressor/sample_y/random_state", + "name": "random_state", + "qname": "sklearn.gaussian_process._gpr.GaussianProcessRegressor.sample_y.random_state", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "0", + "description": "Determines random number generation to randomly draw samples.\nPass an int for reproducible results across multiple function\ncalls.\nSee :term: `Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Draw samples from Gaussian process and evaluate at X.", + "docstring": "Draw samples from Gaussian process and evaluate at X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated.\n\nn_samples : int, default=1\n The number of samples drawn from the Gaussian process\n\nrandom_state : int, RandomState instance or None, default=0\n Determines random number generation to randomly draw samples.\n Pass an int for reproducible results across multiple function\n calls.\n See :term: `Glossary `.\n\nReturns\n-------\ny_samples : ndarray of shape (n_samples_X, [n_output_dims], n_samples)\n Values of n_samples samples drawn from Gaussian process and\n evaluated at query points.", + "code": " def sample_y(self, X, n_samples=1, random_state=0):\n \"\"\"Draw samples from Gaussian process and evaluate at X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or list of object\n Query points where the GP is evaluated.\n\n n_samples : int, default=1\n The number of samples drawn from the Gaussian process\n\n random_state : int, RandomState instance or None, default=0\n Determines random number generation to randomly draw samples.\n Pass an int for reproducible results across multiple function\n calls.\n See :term: `Glossary `.\n\n Returns\n -------\n y_samples : ndarray of shape (n_samples_X, [n_output_dims], n_samples)\n Values of n_samples samples drawn from Gaussian process and\n evaluated at query points.\n \"\"\"\n rng = check_random_state(random_state)\n\n y_mean, y_cov = self.predict(X, return_cov=True)\n if y_mean.ndim == 1:\n y_samples = rng.multivariate_normal(y_mean, y_cov, n_samples).T\n else:\n y_samples = \\\n [rng.multivariate_normal(y_mean[:, i], y_cov,\n n_samples).T[:, np.newaxis]\n for i in range(y_mean.shape[1])]\n y_samples = np.hstack(y_samples)\n return y_samples" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/__call__", + "name": "__call__", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/__call__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/__call__/X", + "name": "X", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.__call__.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_X, n_features) or list of object", + "default_value": "None", + "description": "Left argument of the returned kernel k(X, Y)" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples_X, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/__call__/Y", + "name": "Y", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.__call__.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_X, n_features) or list of object", + "default_value": "None", + "description": "Right argument of the returned kernel k(X, Y). If None, k(X, X)\nis evaluated instead." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples_X, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/__call__/eval_gradient", + "name": "eval_gradient", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.__call__.eval_gradient", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Determines whether the gradient with respect to the log of the\nkernel hyperparameter is computed." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return the kernel k(X, Y) and optionally its gradient.\n\nNote that this compound kernel returns the results of all simple kernel\nstacked along an additional axis.", + "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nNote that this compound kernel returns the results of all simple kernel\nstacked along an additional axis.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object, default=None\n Left argument of the returned kernel k(X, Y)\n\nY : array-like of shape (n_samples_X, n_features) or list of object, default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of the\n kernel hyperparameter is computed.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y, n_kernels)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims, n_kernels), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.", + "code": " def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n Note that this compound kernel returns the results of all simple kernel\n stacked along an additional axis.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features) or list of object, \\\n default=None\n Left argument of the returned kernel k(X, Y)\n\n Y : array-like of shape (n_samples_X, n_features) or list of object, \\\n default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\n eval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of the\n kernel hyperparameter is computed.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_Y, n_kernels)\n Kernel k(X, Y)\n\n K_gradient : ndarray of shape \\\n (n_samples_X, n_samples_X, n_dims, n_kernels), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.\n \"\"\"\n if eval_gradient:\n K = []\n K_grad = []\n for kernel in self.kernels:\n K_single, K_grad_single = kernel(X, Y, eval_gradient)\n K.append(K_single)\n K_grad.append(K_grad_single[..., np.newaxis])\n return np.dstack(K), np.concatenate(K_grad, 3)\n else:\n return np.dstack([kernel(X, Y, eval_gradient)\n for kernel in self.kernels])" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/__eq__", + "name": "__eq__", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.__eq__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/__eq__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.__eq__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/__eq__/b", + "name": "b", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.__eq__.b", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __eq__(self, b):\n if type(self) != type(b) or len(self.kernels) != len(b.kernels):\n return False\n return np.all([self.kernels[i] == b.kernels[i]\n for i in range(len(self.kernels))])" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/__init__", + "name": "__init__", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/__init__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/__init__/kernels", + "name": "kernels", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.__init__.kernels", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "list of Kernels", + "default_value": "", + "description": "The other kernels" + }, + "type": { + "kind": "NamedType", + "name": "list of Kernels" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Kernel which is composed of a set of other kernels.\n\n.. versionadded:: 0.18", + "docstring": "", + "code": " def __init__(self, kernels):\n self.kernels = kernels" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/bounds@getter", + "name": "bounds", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.bounds", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/bounds/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.bounds.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the log-transformed bounds on the theta.", + "docstring": "Returns the log-transformed bounds on the theta.\n\nReturns\n-------\nbounds : array of shape (n_dims, 2)\n The log-transformed bounds on the kernel's hyperparameters theta", + "code": " @property\n def bounds(self):\n \"\"\"Returns the log-transformed bounds on the theta.\n\n Returns\n -------\n bounds : array of shape (n_dims, 2)\n The log-transformed bounds on the kernel's hyperparameters theta\n \"\"\"\n return np.vstack([kernel.bounds for kernel in self.kernels])" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/diag", + "name": "diag", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.diag", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/diag/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.diag.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/diag/X", + "name": "X", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.diag.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_X, n_features) or list of object", + "default_value": "", + "description": "Argument to the kernel." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples_X, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to `np.diag(self(X))`; however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.", + "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to `np.diag(self(X))`; however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X, n_kernels)\n Diagonal of kernel k(X, X)", + "code": " def diag(self, X):\n \"\"\"Returns the diagonal of the kernel k(X, X).\n\n The result of this method is identical to `np.diag(self(X))`; however,\n it can be evaluated more efficiently since only the diagonal is\n evaluated.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\n Returns\n -------\n K_diag : ndarray of shape (n_samples_X, n_kernels)\n Diagonal of kernel k(X, X)\n \"\"\"\n return np.vstack([kernel.diag(X) for kernel in self.kernels]).T" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/get_params", + "name": "get_params", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.get_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/get_params/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.get_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/get_params/deep", + "name": "deep", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.get_params.deep", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, will return the parameters for this estimator and\ncontained subobjects that are estimators." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Get parameters of this kernel.", + "docstring": "Get parameters of this kernel.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : dict\n Parameter names mapped to their values.", + "code": " def get_params(self, deep=True):\n \"\"\"Get parameters of this kernel.\n\n Parameters\n ----------\n deep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\n Returns\n -------\n params : dict\n Parameter names mapped to their values.\n \"\"\"\n return dict(kernels=self.kernels)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/is_stationary", + "name": "is_stationary", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.is_stationary", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/is_stationary/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.is_stationary.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns whether the kernel is stationary.", + "docstring": "Returns whether the kernel is stationary. ", + "code": " def is_stationary(self):\n \"\"\"Returns whether the kernel is stationary. \"\"\"\n return np.all([kernel.is_stationary() for kernel in self.kernels])" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/requires_vector_input@getter", + "name": "requires_vector_input", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.requires_vector_input", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/requires_vector_input/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.requires_vector_input.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns whether the kernel is defined on discrete structures.", + "docstring": "Returns whether the kernel is defined on discrete structures. ", + "code": " @property\n def requires_vector_input(self):\n \"\"\"Returns whether the kernel is defined on discrete structures. \"\"\"\n return np.any([kernel.requires_vector_input\n for kernel in self.kernels])" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/theta@getter", + "name": "theta", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.theta", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/theta/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.theta.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the (flattened, log-transformed) non-fixed hyperparameters.\n\nNote that theta are typically the log-transformed values of the\nkernel's hyperparameters as this representation of the search space\nis more amenable for hyperparameter search, as hyperparameters like\nlength-scales naturally live on a log-scale.", + "docstring": "Returns the (flattened, log-transformed) non-fixed hyperparameters.\n\nNote that theta are typically the log-transformed values of the\nkernel's hyperparameters as this representation of the search space\nis more amenable for hyperparameter search, as hyperparameters like\nlength-scales naturally live on a log-scale.\n\nReturns\n-------\ntheta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel", + "code": " @property\n def theta(self):\n \"\"\"Returns the (flattened, log-transformed) non-fixed hyperparameters.\n\n Note that theta are typically the log-transformed values of the\n kernel's hyperparameters as this representation of the search space\n is more amenable for hyperparameter search, as hyperparameters like\n length-scales naturally live on a log-scale.\n\n Returns\n -------\n theta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel\n \"\"\"\n return np.hstack([kernel.theta for kernel in self.kernels])" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/theta@setter", + "name": "theta", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.theta", + "decorators": ["theta.setter"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/theta/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.theta.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/CompoundKernel/theta/theta", + "name": "theta", + "qname": "sklearn.gaussian_process.kernels.CompoundKernel.theta.theta", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array of shape (n_dims,)", + "default_value": "", + "description": "The non-fixed, log-transformed hyperparameters of the kernel" + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_dims,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Sets the (flattened, log-transformed) non-fixed hyperparameters.", + "docstring": "Sets the (flattened, log-transformed) non-fixed hyperparameters.\n\nParameters\n----------\ntheta : array of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel", + "code": " @theta.setter\n def theta(self, theta):\n \"\"\"Sets the (flattened, log-transformed) non-fixed hyperparameters.\n\n Parameters\n ----------\n theta : array of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel\n \"\"\"\n k_dims = self.k1.n_dims\n for i, kernel in enumerate(self.kernels):\n kernel.theta = theta[i * k_dims:(i + 1) * k_dims]" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ConstantKernel/__call__", + "name": "__call__", + "qname": "sklearn.gaussian_process.kernels.ConstantKernel.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ConstantKernel/__call__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.ConstantKernel.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ConstantKernel/__call__/X", + "name": "X", + "qname": "sklearn.gaussian_process.kernels.ConstantKernel.__call__.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_X, n_features) or list of object", + "default_value": "", + "description": "Left argument of the returned kernel k(X, Y)" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples_X, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ConstantKernel/__call__/Y", + "name": "Y", + "qname": "sklearn.gaussian_process.kernels.ConstantKernel.__call__.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_X, n_features) or list of object", + "default_value": "None", + "description": "Right argument of the returned kernel k(X, Y). If None, k(X, X)\nis evaluated instead." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples_X, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ConstantKernel/__call__/eval_gradient", + "name": "eval_gradient", + "qname": "sklearn.gaussian_process.kernels.ConstantKernel.__call__.eval_gradient", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Determines whether the gradient with respect to the log of\nthe kernel hyperparameter is computed.\nOnly supported when Y is None." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return the kernel k(X, Y) and optionally its gradient.", + "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\nY : array-like of shape (n_samples_X, n_features) or list of object, default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when eval_gradient\n is True.", + "code": " def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\n Y : array-like of shape (n_samples_X, n_features) or list of object, \\\n default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\n eval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\n K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \\\n optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when eval_gradient\n is True.\n \"\"\"\n if Y is None:\n Y = X\n elif eval_gradient:\n raise ValueError(\"Gradient can only be evaluated when Y is None.\")\n\n K = np.full((_num_samples(X), _num_samples(Y)), self.constant_value,\n dtype=np.array(self.constant_value).dtype)\n if eval_gradient:\n if not self.hyperparameter_constant_value.fixed:\n return (K, np.full((_num_samples(X), _num_samples(X), 1),\n self.constant_value,\n dtype=np.array(self.constant_value).dtype))\n else:\n return K, np.empty((_num_samples(X), _num_samples(X), 0))\n else:\n return K" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ConstantKernel/__init__", + "name": "__init__", + "qname": "sklearn.gaussian_process.kernels.ConstantKernel.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ConstantKernel/__init__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.ConstantKernel.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ConstantKernel/__init__/constant_value", + "name": "constant_value", + "qname": "sklearn.gaussian_process.kernels.ConstantKernel.__init__.constant_value", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "The constant value which defines the covariance:\nk(x_1, x_2) = constant_value" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ConstantKernel/__init__/constant_value_bounds", + "name": "constant_value_bounds", + "qname": "sklearn.gaussian_process.kernels.ConstantKernel.__init__.constant_value_bounds", + "default_value": "(1e-05, 100000.0)", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "pair of floats >= 0 or \"fixed\"", + "default_value": "(1e-5, 1e5)", + "description": "The lower and upper bound on `constant_value`.\nIf set to \"fixed\", `constant_value` cannot be changed during\nhyperparameter tuning." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "pair of floats >= 0" + }, + { + "kind": "NamedType", + "name": "\"fixed\"" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Constant kernel.\n\nCan be used as part of a product-kernel where it scales the magnitude of\nthe other factor (kernel) or as part of a sum-kernel, where it modifies\nthe mean of the Gaussian process.\n\n.. math::\n k(x_1, x_2) = constant\\_value \\;\\forall\\; x_1, x_2\n\nAdding a constant kernel is equivalent to adding a constant::\n\n kernel = RBF() + ConstantKernel(constant_value=2)\n\nis the same as::\n\n kernel = RBF() + 2\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "", + "code": " def __init__(self, constant_value=1.0, constant_value_bounds=(1e-5, 1e5)):\n self.constant_value = constant_value\n self.constant_value_bounds = constant_value_bounds" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ConstantKernel/__repr__", + "name": "__repr__", + "qname": "sklearn.gaussian_process.kernels.ConstantKernel.__repr__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ConstantKernel/__repr__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.ConstantKernel.__repr__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __repr__(self):\n return \"{0:.3g}**2\".format(np.sqrt(self.constant_value))" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ConstantKernel/diag", + "name": "diag", + "qname": "sklearn.gaussian_process.kernels.ConstantKernel.diag", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ConstantKernel/diag/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.ConstantKernel.diag.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ConstantKernel/diag/X", + "name": "X", + "qname": "sklearn.gaussian_process.kernels.ConstantKernel.diag.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_X, n_features) or list of object", + "default_value": "", + "description": "Argument to the kernel." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples_X, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.", + "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)", + "code": " def diag(self, X):\n \"\"\"Returns the diagonal of the kernel k(X, X).\n\n The result of this method is identical to np.diag(self(X)); however,\n it can be evaluated more efficiently since only the diagonal is\n evaluated.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\n Returns\n -------\n K_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)\n \"\"\"\n return np.full(_num_samples(X), self.constant_value,\n dtype=np.array(self.constant_value).dtype)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ConstantKernel/hyperparameter_constant_value@getter", + "name": "hyperparameter_constant_value", + "qname": "sklearn.gaussian_process.kernels.ConstantKernel.hyperparameter_constant_value", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ConstantKernel/hyperparameter_constant_value/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.ConstantKernel.hyperparameter_constant_value.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def hyperparameter_constant_value(self):\n return Hyperparameter(\n \"constant_value\", \"numeric\", self.constant_value_bounds)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/DotProduct/__call__", + "name": "__call__", + "qname": "sklearn.gaussian_process.kernels.DotProduct.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/DotProduct/__call__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.DotProduct.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/DotProduct/__call__/X", + "name": "X", + "qname": "sklearn.gaussian_process.kernels.DotProduct.__call__.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_X, n_features)", + "default_value": "", + "description": "Left argument of the returned kernel k(X, Y)" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_X, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/DotProduct/__call__/Y", + "name": "Y", + "qname": "sklearn.gaussian_process.kernels.DotProduct.__call__.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_Y, n_features)", + "default_value": "None", + "description": "Right argument of the returned kernel k(X, Y). If None, k(X, X)\nif evaluated instead." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_Y, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/DotProduct/__call__/eval_gradient", + "name": "eval_gradient", + "qname": "sklearn.gaussian_process.kernels.DotProduct.__call__.eval_gradient", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Determines whether the gradient with respect to the log of\nthe kernel hyperparameter is computed.\nOnly supported when Y is None." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return the kernel k(X, Y) and optionally its gradient.", + "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.", + "code": " def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\n Y : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\n eval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\n K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\\\n optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.\n \"\"\"\n X = np.atleast_2d(X)\n if Y is None:\n K = np.inner(X, X) + self.sigma_0 ** 2\n else:\n if eval_gradient:\n raise ValueError(\n \"Gradient can only be evaluated when Y is None.\")\n K = np.inner(X, Y) + self.sigma_0 ** 2\n\n if eval_gradient:\n if not self.hyperparameter_sigma_0.fixed:\n K_gradient = np.empty((K.shape[0], K.shape[1], 1))\n K_gradient[..., 0] = 2 * self.sigma_0 ** 2\n return K, K_gradient\n else:\n return K, np.empty((X.shape[0], X.shape[0], 0))\n else:\n return K" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/DotProduct/__init__", + "name": "__init__", + "qname": "sklearn.gaussian_process.kernels.DotProduct.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/DotProduct/__init__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.DotProduct.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/DotProduct/__init__/sigma_0", + "name": "sigma_0", + "qname": "sklearn.gaussian_process.kernels.DotProduct.__init__.sigma_0", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float >= 0", + "default_value": "1.0", + "description": "Parameter controlling the inhomogenity of the kernel. If sigma_0=0,\nthe kernel is homogenous." + }, + "type": { + "kind": "NamedType", + "name": "float >= 0" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/DotProduct/__init__/sigma_0_bounds", + "name": "sigma_0_bounds", + "qname": "sklearn.gaussian_process.kernels.DotProduct.__init__.sigma_0_bounds", + "default_value": "(1e-05, 100000.0)", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "pair of floats >= 0 or \"fixed\"", + "default_value": "(1e-5, 1e5)", + "description": "The lower and upper bound on 'sigma_0'.\nIf set to \"fixed\", 'sigma_0' cannot be changed during\nhyperparameter tuning." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "pair of floats >= 0" + }, + { + "kind": "NamedType", + "name": "\"fixed\"" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Dot-Product kernel.\n\nThe DotProduct kernel is non-stationary and can be obtained from linear\nregression by putting :math:`N(0, 1)` priors on the coefficients\nof :math:`x_d (d = 1, . . . , D)` and a prior of :math:`N(0, \\sigma_0^2)`\non the bias. The DotProduct kernel is invariant to a rotation of\nthe coordinates about the origin, but not translations.\nIt is parameterized by a parameter sigma_0 :math:`\\sigma`\nwhich controls the inhomogenity of the kernel. For :math:`\\sigma_0^2 =0`,\nthe kernel is called the homogeneous linear kernel, otherwise\nit is inhomogeneous. The kernel is given by\n\n.. math::\n k(x_i, x_j) = \\sigma_0 ^ 2 + x_i \\cdot x_j\n\nThe DotProduct kernel is commonly combined with exponentiation.\n\nSee [1]_, Chapter 4, Section 4.2, for further details regarding the\nDotProduct kernel.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "", + "code": " def __init__(self, sigma_0=1.0, sigma_0_bounds=(1e-5, 1e5)):\n self.sigma_0 = sigma_0\n self.sigma_0_bounds = sigma_0_bounds" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/DotProduct/__repr__", + "name": "__repr__", + "qname": "sklearn.gaussian_process.kernels.DotProduct.__repr__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/DotProduct/__repr__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.DotProduct.__repr__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __repr__(self):\n return \"{0}(sigma_0={1:.3g})\".format(\n self.__class__.__name__, self.sigma_0)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/DotProduct/diag", + "name": "diag", + "qname": "sklearn.gaussian_process.kernels.DotProduct.diag", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/DotProduct/diag/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.DotProduct.diag.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/DotProduct/diag/X", + "name": "X", + "qname": "sklearn.gaussian_process.kernels.DotProduct.diag.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_X, n_features)", + "default_value": "", + "description": "Left argument of the returned kernel k(X, Y)." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_X, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.", + "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y).\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X).", + "code": " def diag(self, X):\n \"\"\"Returns the diagonal of the kernel k(X, X).\n\n The result of this method is identical to np.diag(self(X)); however,\n it can be evaluated more efficiently since only the diagonal is\n evaluated.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y).\n\n Returns\n -------\n K_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X).\n \"\"\"\n return np.einsum('ij,ij->i', X, X) + self.sigma_0 ** 2" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/DotProduct/hyperparameter_sigma_0@getter", + "name": "hyperparameter_sigma_0", + "qname": "sklearn.gaussian_process.kernels.DotProduct.hyperparameter_sigma_0", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/DotProduct/hyperparameter_sigma_0/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.DotProduct.hyperparameter_sigma_0.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def hyperparameter_sigma_0(self):\n return Hyperparameter(\"sigma_0\", \"numeric\", self.sigma_0_bounds)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/DotProduct/is_stationary", + "name": "is_stationary", + "qname": "sklearn.gaussian_process.kernels.DotProduct.is_stationary", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/DotProduct/is_stationary/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.DotProduct.is_stationary.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns whether the kernel is stationary.", + "docstring": "Returns whether the kernel is stationary. ", + "code": " def is_stationary(self):\n \"\"\"Returns whether the kernel is stationary. \"\"\"\n return False" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ExpSineSquared/__call__", + "name": "__call__", + "qname": "sklearn.gaussian_process.kernels.ExpSineSquared.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ExpSineSquared/__call__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.ExpSineSquared.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ExpSineSquared/__call__/X", + "name": "X", + "qname": "sklearn.gaussian_process.kernels.ExpSineSquared.__call__.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_X, n_features)", + "default_value": "", + "description": "Left argument of the returned kernel k(X, Y)" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_X, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ExpSineSquared/__call__/Y", + "name": "Y", + "qname": "sklearn.gaussian_process.kernels.ExpSineSquared.__call__.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_Y, n_features)", + "default_value": "None", + "description": "Right argument of the returned kernel k(X, Y). If None, k(X, X)\nif evaluated instead." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_Y, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ExpSineSquared/__call__/eval_gradient", + "name": "eval_gradient", + "qname": "sklearn.gaussian_process.kernels.ExpSineSquared.__call__.eval_gradient", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Determines whether the gradient with respect to the log of\nthe kernel hyperparameter is computed.\nOnly supported when Y is None." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return the kernel k(X, Y) and optionally its gradient.", + "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.", + "code": " def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\n Y : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\n eval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\n K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \\\n optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.\n \"\"\"\n X = np.atleast_2d(X)\n if Y is None:\n dists = squareform(pdist(X, metric='euclidean'))\n arg = np.pi * dists / self.periodicity\n sin_of_arg = np.sin(arg)\n K = np.exp(- 2 * (sin_of_arg / self.length_scale) ** 2)\n else:\n if eval_gradient:\n raise ValueError(\n \"Gradient can only be evaluated when Y is None.\")\n dists = cdist(X, Y, metric='euclidean')\n K = np.exp(- 2 * (np.sin(np.pi / self.periodicity * dists)\n / self.length_scale) ** 2)\n\n if eval_gradient:\n cos_of_arg = np.cos(arg)\n # gradient with respect to length_scale\n if not self.hyperparameter_length_scale.fixed:\n length_scale_gradient = \\\n 4 / self.length_scale**2 * sin_of_arg**2 * K\n length_scale_gradient = length_scale_gradient[:, :, np.newaxis]\n else: # length_scale is kept fixed\n length_scale_gradient = np.empty((K.shape[0], K.shape[1], 0))\n # gradient with respect to p\n if not self.hyperparameter_periodicity.fixed:\n periodicity_gradient = \\\n 4 * arg / self.length_scale**2 * cos_of_arg \\\n * sin_of_arg * K\n periodicity_gradient = periodicity_gradient[:, :, np.newaxis]\n else: # p is kept fixed\n periodicity_gradient = np.empty((K.shape[0], K.shape[1], 0))\n\n return K, np.dstack((length_scale_gradient, periodicity_gradient))\n else:\n return K" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ExpSineSquared/__init__", + "name": "__init__", + "qname": "sklearn.gaussian_process.kernels.ExpSineSquared.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ExpSineSquared/__init__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.ExpSineSquared.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ExpSineSquared/__init__/length_scale", + "name": "length_scale", + "qname": "sklearn.gaussian_process.kernels.ExpSineSquared.__init__.length_scale", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float > 0", + "default_value": "1.0", + "description": "The length scale of the kernel." + }, + "type": { + "kind": "NamedType", + "name": "float > 0" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ExpSineSquared/__init__/periodicity", + "name": "periodicity", + "qname": "sklearn.gaussian_process.kernels.ExpSineSquared.__init__.periodicity", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float > 0", + "default_value": "1.0", + "description": "The periodicity of the kernel." + }, + "type": { + "kind": "NamedType", + "name": "float > 0" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ExpSineSquared/__init__/length_scale_bounds", + "name": "length_scale_bounds", + "qname": "sklearn.gaussian_process.kernels.ExpSineSquared.__init__.length_scale_bounds", + "default_value": "(1e-05, 100000.0)", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "pair of floats >= 0 or \"fixed\"", + "default_value": "(1e-5, 1e5)", + "description": "The lower and upper bound on 'length_scale'.\nIf set to \"fixed\", 'length_scale' cannot be changed during\nhyperparameter tuning." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "pair of floats >= 0" + }, + { + "kind": "NamedType", + "name": "\"fixed\"" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ExpSineSquared/__init__/periodicity_bounds", + "name": "periodicity_bounds", + "qname": "sklearn.gaussian_process.kernels.ExpSineSquared.__init__.periodicity_bounds", + "default_value": "(1e-05, 100000.0)", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "pair of floats >= 0 or \"fixed\"", + "default_value": "(1e-5, 1e5)", + "description": "The lower and upper bound on 'periodicity'.\nIf set to \"fixed\", 'periodicity' cannot be changed during\nhyperparameter tuning." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "pair of floats >= 0" + }, + { + "kind": "NamedType", + "name": "\"fixed\"" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Exp-Sine-Squared kernel (aka periodic kernel).\n\nThe ExpSineSquared kernel allows one to model functions which repeat\nthemselves exactly. It is parameterized by a length scale\nparameter :math:`l>0` and a periodicity parameter :math:`p>0`.\nOnly the isotropic variant where :math:`l` is a scalar is\nsupported at the moment. The kernel is given by:\n\n.. math::\n k(x_i, x_j) = \\text{exp}\\left(-\n \\frac{ 2\\sin^2(\\pi d(x_i, x_j)/p) }{ l^ 2} \\right)\n\nwhere :math:`l` is the length scale of the kernel, :math:`p` the\nperiodicity of the kernel and :math:`d(\\\\cdot,\\\\cdot)` is the\nEuclidean distance.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "", + "code": " def __init__(self, length_scale=1.0, periodicity=1.0,\n length_scale_bounds=(1e-5, 1e5),\n periodicity_bounds=(1e-5, 1e5)):\n self.length_scale = length_scale\n self.periodicity = periodicity\n self.length_scale_bounds = length_scale_bounds\n self.periodicity_bounds = periodicity_bounds" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ExpSineSquared/__repr__", + "name": "__repr__", + "qname": "sklearn.gaussian_process.kernels.ExpSineSquared.__repr__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ExpSineSquared/__repr__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.ExpSineSquared.__repr__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __repr__(self):\n return \"{0}(length_scale={1:.3g}, periodicity={2:.3g})\".format(\n self.__class__.__name__, self.length_scale, self.periodicity)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ExpSineSquared/hyperparameter_length_scale@getter", + "name": "hyperparameter_length_scale", + "qname": "sklearn.gaussian_process.kernels.ExpSineSquared.hyperparameter_length_scale", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ExpSineSquared/hyperparameter_length_scale/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.ExpSineSquared.hyperparameter_length_scale.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the length scale", + "docstring": "Returns the length scale", + "code": " @property\n def hyperparameter_length_scale(self):\n \"\"\"Returns the length scale\"\"\"\n return Hyperparameter(\n \"length_scale\", \"numeric\", self.length_scale_bounds)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ExpSineSquared/hyperparameter_periodicity@getter", + "name": "hyperparameter_periodicity", + "qname": "sklearn.gaussian_process.kernels.ExpSineSquared.hyperparameter_periodicity", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/ExpSineSquared/hyperparameter_periodicity/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.ExpSineSquared.hyperparameter_periodicity.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def hyperparameter_periodicity(self):\n return Hyperparameter(\n \"periodicity\", \"numeric\", self.periodicity_bounds)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/__call__", + "name": "__call__", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/__call__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/__call__/X", + "name": "X", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.__call__.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_X, n_features) or list of object", + "default_value": "", + "description": "Left argument of the returned kernel k(X, Y)" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples_X, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/__call__/Y", + "name": "Y", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.__call__.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_Y, n_features) or list of object", + "default_value": "None", + "description": "Right argument of the returned kernel k(X, Y). If None, k(X, X)\nis evaluated instead." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples_Y, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/__call__/eval_gradient", + "name": "eval_gradient", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.__call__.eval_gradient", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Determines whether the gradient with respect to the log of\nthe kernel hyperparameter is computed." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return the kernel k(X, Y) and optionally its gradient.", + "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\nY : array-like of shape (n_samples_Y, n_features) or list of object, default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.", + "code": " def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\n Y : array-like of shape (n_samples_Y, n_features) or list of object,\\\n default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\n eval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\n K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\\\n optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.\n \"\"\"\n if eval_gradient:\n K, K_gradient = self.kernel(X, Y, eval_gradient=True)\n K_gradient *= \\\n self.exponent * K[:, :, np.newaxis] ** (self.exponent - 1)\n return K ** self.exponent, K_gradient\n else:\n K = self.kernel(X, Y, eval_gradient=False)\n return K ** self.exponent" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/__eq__", + "name": "__eq__", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.__eq__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/__eq__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.__eq__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/__eq__/b", + "name": "b", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.__eq__.b", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __eq__(self, b):\n if type(self) != type(b):\n return False\n return (self.kernel == b.kernel and self.exponent == b.exponent)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/__init__", + "name": "__init__", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/__init__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/__init__/kernel", + "name": "kernel", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.__init__.kernel", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "Kernel", + "default_value": "", + "description": "The base kernel" + }, + "type": { + "kind": "NamedType", + "name": "Kernel" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/__init__/exponent", + "name": "exponent", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.__init__.exponent", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "", + "description": "The exponent for the base kernel" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "The Exponentiation kernel takes one base kernel and a scalar parameter\n:math:`p` and combines them via\n\n.. math::\n k_{exp}(X, Y) = k(X, Y) ^p\n\nNote that the `__pow__` magic method is overridden, so\n`Exponentiation(RBF(), 2)` is equivalent to using the ** operator\nwith `RBF() ** 2`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "", + "code": " def __init__(self, kernel, exponent):\n self.kernel = kernel\n self.exponent = exponent" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/__repr__", + "name": "__repr__", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.__repr__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/__repr__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.__repr__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __repr__(self):\n return \"{0} ** {1}\".format(self.kernel, self.exponent)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/bounds@getter", + "name": "bounds", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.bounds", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/bounds/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.bounds.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the log-transformed bounds on the theta.", + "docstring": "Returns the log-transformed bounds on the theta.\n\nReturns\n-------\nbounds : ndarray of shape (n_dims, 2)\n The log-transformed bounds on the kernel's hyperparameters theta", + "code": " @property\n def bounds(self):\n \"\"\"Returns the log-transformed bounds on the theta.\n\n Returns\n -------\n bounds : ndarray of shape (n_dims, 2)\n The log-transformed bounds on the kernel's hyperparameters theta\n \"\"\"\n return self.kernel.bounds" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/diag", + "name": "diag", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.diag", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/diag/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.diag.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/diag/X", + "name": "X", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.diag.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_X, n_features) or list of object", + "default_value": "", + "description": "Argument to the kernel." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples_X, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.", + "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)", + "code": " def diag(self, X):\n \"\"\"Returns the diagonal of the kernel k(X, X).\n\n The result of this method is identical to np.diag(self(X)); however,\n it can be evaluated more efficiently since only the diagonal is\n evaluated.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\n Returns\n -------\n K_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)\n \"\"\"\n return self.kernel.diag(X) ** self.exponent" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/get_params", + "name": "get_params", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.get_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/get_params/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.get_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/get_params/deep", + "name": "deep", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.get_params.deep", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, will return the parameters for this estimator and\ncontained subobjects that are estimators." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Get parameters of this kernel.", + "docstring": "Get parameters of this kernel.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : dict\n Parameter names mapped to their values.", + "code": " def get_params(self, deep=True):\n \"\"\"Get parameters of this kernel.\n\n Parameters\n ----------\n deep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\n Returns\n -------\n params : dict\n Parameter names mapped to their values.\n \"\"\"\n params = dict(kernel=self.kernel, exponent=self.exponent)\n if deep:\n deep_items = self.kernel.get_params().items()\n params.update(('kernel__' + k, val) for k, val in deep_items)\n return params" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/hyperparameters@getter", + "name": "hyperparameters", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.hyperparameters", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/hyperparameters/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.hyperparameters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns a list of all hyperparameter.", + "docstring": "Returns a list of all hyperparameter.", + "code": " @property\n def hyperparameters(self):\n \"\"\"Returns a list of all hyperparameter.\"\"\"\n r = []\n for hyperparameter in self.kernel.hyperparameters:\n r.append(Hyperparameter(\"kernel__\" + hyperparameter.name,\n hyperparameter.value_type,\n hyperparameter.bounds,\n hyperparameter.n_elements))\n return r" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/is_stationary", + "name": "is_stationary", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.is_stationary", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/is_stationary/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.is_stationary.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns whether the kernel is stationary.", + "docstring": "Returns whether the kernel is stationary. ", + "code": " def is_stationary(self):\n \"\"\"Returns whether the kernel is stationary. \"\"\"\n return self.kernel.is_stationary()" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/requires_vector_input@getter", + "name": "requires_vector_input", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.requires_vector_input", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/requires_vector_input/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.requires_vector_input.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns whether the kernel is defined on discrete structures.", + "docstring": "Returns whether the kernel is defined on discrete structures. ", + "code": " @property\n def requires_vector_input(self):\n \"\"\"Returns whether the kernel is defined on discrete structures. \"\"\"\n return self.kernel.requires_vector_input" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/theta@getter", + "name": "theta", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.theta", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/theta/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.theta.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the (flattened, log-transformed) non-fixed hyperparameters.\n\nNote that theta are typically the log-transformed values of the\nkernel's hyperparameters as this representation of the search space\nis more amenable for hyperparameter search, as hyperparameters like\nlength-scales naturally live on a log-scale.", + "docstring": "Returns the (flattened, log-transformed) non-fixed hyperparameters.\n\nNote that theta are typically the log-transformed values of the\nkernel's hyperparameters as this representation of the search space\nis more amenable for hyperparameter search, as hyperparameters like\nlength-scales naturally live on a log-scale.\n\nReturns\n-------\ntheta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel", + "code": " @property\n def theta(self):\n \"\"\"Returns the (flattened, log-transformed) non-fixed hyperparameters.\n\n Note that theta are typically the log-transformed values of the\n kernel's hyperparameters as this representation of the search space\n is more amenable for hyperparameter search, as hyperparameters like\n length-scales naturally live on a log-scale.\n\n Returns\n -------\n theta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel\n \"\"\"\n return self.kernel.theta" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/theta@setter", + "name": "theta", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.theta", + "decorators": ["theta.setter"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/theta/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.theta.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Exponentiation/theta/theta", + "name": "theta", + "qname": "sklearn.gaussian_process.kernels.Exponentiation.theta.theta", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_dims,)", + "default_value": "", + "description": "The non-fixed, log-transformed hyperparameters of the kernel" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_dims,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Sets the (flattened, log-transformed) non-fixed hyperparameters.", + "docstring": "Sets the (flattened, log-transformed) non-fixed hyperparameters.\n\nParameters\n----------\ntheta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel", + "code": " @theta.setter\n def theta(self, theta):\n \"\"\"Sets the (flattened, log-transformed) non-fixed hyperparameters.\n\n Parameters\n ----------\n theta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel\n \"\"\"\n self.kernel.theta = theta" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/GenericKernelMixin/requires_vector_input@getter", + "name": "requires_vector_input", + "qname": "sklearn.gaussian_process.kernels.GenericKernelMixin.requires_vector_input", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/GenericKernelMixin/requires_vector_input/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.GenericKernelMixin.requires_vector_input.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Whether the kernel works only on fixed-length feature vectors.", + "docstring": "Whether the kernel works only on fixed-length feature vectors.", + "code": " @property\n def requires_vector_input(self):\n \"\"\"Whether the kernel works only on fixed-length feature vectors.\"\"\"\n return False" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Hyperparameter/__eq__", + "name": "__eq__", + "qname": "sklearn.gaussian_process.kernels.Hyperparameter.__eq__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Hyperparameter/__eq__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Hyperparameter.__eq__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Hyperparameter/__eq__/other", + "name": "other", + "qname": "sklearn.gaussian_process.kernels.Hyperparameter.__eq__.other", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __eq__(self, other):\n return (self.name == other.name and\n self.value_type == other.value_type and\n np.all(self.bounds == other.bounds) and\n self.n_elements == other.n_elements and\n self.fixed == other.fixed)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Hyperparameter/__new__", + "name": "__new__", + "qname": "sklearn.gaussian_process.kernels.Hyperparameter.__new__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Hyperparameter/__new__/cls", + "name": "cls", + "qname": "sklearn.gaussian_process.kernels.Hyperparameter.__new__.cls", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Hyperparameter/__new__/name", + "name": "name", + "qname": "sklearn.gaussian_process.kernels.Hyperparameter.__new__.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Hyperparameter/__new__/value_type", + "name": "value_type", + "qname": "sklearn.gaussian_process.kernels.Hyperparameter.__new__.value_type", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Hyperparameter/__new__/bounds", + "name": "bounds", + "qname": "sklearn.gaussian_process.kernels.Hyperparameter.__new__.bounds", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Hyperparameter/__new__/n_elements", + "name": "n_elements", + "qname": "sklearn.gaussian_process.kernels.Hyperparameter.__new__.n_elements", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Hyperparameter/__new__/fixed", + "name": "fixed", + "qname": "sklearn.gaussian_process.kernels.Hyperparameter.__new__.fixed", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __new__(cls, name, value_type, bounds, n_elements=1, fixed=None):\n if not isinstance(bounds, str) or bounds != \"fixed\":\n bounds = np.atleast_2d(bounds)\n if n_elements > 1: # vector-valued parameter\n if bounds.shape[0] == 1:\n bounds = np.repeat(bounds, n_elements, 0)\n elif bounds.shape[0] != n_elements:\n raise ValueError(\"Bounds on %s should have either 1 or \"\n \"%d dimensions. Given are %d\"\n % (name, n_elements, bounds.shape[0]))\n\n if fixed is None:\n fixed = isinstance(bounds, str) and bounds == \"fixed\"\n return super(Hyperparameter, cls).__new__(\n cls, name, value_type, bounds, n_elements, fixed)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__add__", + "name": "__add__", + "qname": "sklearn.gaussian_process.kernels.Kernel.__add__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__add__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Kernel.__add__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__add__/b", + "name": "b", + "qname": "sklearn.gaussian_process.kernels.Kernel.__add__.b", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __add__(self, b):\n if not isinstance(b, Kernel):\n return Sum(self, ConstantKernel(b))\n return Sum(self, b)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__call__", + "name": "__call__", + "qname": "sklearn.gaussian_process.kernels.Kernel.__call__", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__call__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Kernel.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__call__/X", + "name": "X", + "qname": "sklearn.gaussian_process.kernels.Kernel.__call__.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__call__/Y", + "name": "Y", + "qname": "sklearn.gaussian_process.kernels.Kernel.__call__.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__call__/eval_gradient", + "name": "eval_gradient", + "qname": "sklearn.gaussian_process.kernels.Kernel.__call__.eval_gradient", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Evaluate the kernel.", + "docstring": "Evaluate the kernel.", + "code": " @abstractmethod\n def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Evaluate the kernel.\"\"\"" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__eq__", + "name": "__eq__", + "qname": "sklearn.gaussian_process.kernels.Kernel.__eq__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__eq__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Kernel.__eq__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__eq__/b", + "name": "b", + "qname": "sklearn.gaussian_process.kernels.Kernel.__eq__.b", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __eq__(self, b):\n if type(self) != type(b):\n return False\n params_a = self.get_params()\n params_b = b.get_params()\n for key in set(list(params_a.keys()) + list(params_b.keys())):\n if np.any(params_a.get(key, None) != params_b.get(key, None)):\n return False\n return True" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__mul__", + "name": "__mul__", + "qname": "sklearn.gaussian_process.kernels.Kernel.__mul__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__mul__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Kernel.__mul__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__mul__/b", + "name": "b", + "qname": "sklearn.gaussian_process.kernels.Kernel.__mul__.b", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __mul__(self, b):\n if not isinstance(b, Kernel):\n return Product(self, ConstantKernel(b))\n return Product(self, b)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__pow__", + "name": "__pow__", + "qname": "sklearn.gaussian_process.kernels.Kernel.__pow__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__pow__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Kernel.__pow__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__pow__/b", + "name": "b", + "qname": "sklearn.gaussian_process.kernels.Kernel.__pow__.b", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __pow__(self, b):\n return Exponentiation(self, b)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__radd__", + "name": "__radd__", + "qname": "sklearn.gaussian_process.kernels.Kernel.__radd__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__radd__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Kernel.__radd__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__radd__/b", + "name": "b", + "qname": "sklearn.gaussian_process.kernels.Kernel.__radd__.b", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __radd__(self, b):\n if not isinstance(b, Kernel):\n return Sum(ConstantKernel(b), self)\n return Sum(b, self)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__repr__", + "name": "__repr__", + "qname": "sklearn.gaussian_process.kernels.Kernel.__repr__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__repr__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Kernel.__repr__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __repr__(self):\n return \"{0}({1})\".format(self.__class__.__name__,\n \", \".join(map(\"{0:.3g}\".format, self.theta)))" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__rmul__", + "name": "__rmul__", + "qname": "sklearn.gaussian_process.kernels.Kernel.__rmul__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__rmul__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Kernel.__rmul__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/__rmul__/b", + "name": "b", + "qname": "sklearn.gaussian_process.kernels.Kernel.__rmul__.b", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __rmul__(self, b):\n if not isinstance(b, Kernel):\n return Product(ConstantKernel(b), self)\n return Product(b, self)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/_check_bounds_params", + "name": "_check_bounds_params", + "qname": "sklearn.gaussian_process.kernels.Kernel._check_bounds_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/_check_bounds_params/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Kernel._check_bounds_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Called after fitting to warn if bounds may have been too tight.", + "docstring": "Called after fitting to warn if bounds may have been too tight.", + "code": " def _check_bounds_params(self):\n \"\"\"Called after fitting to warn if bounds may have been too tight.\"\"\"\n list_close = np.isclose(self.bounds,\n np.atleast_2d(self.theta).T)\n idx = 0\n for hyp in self.hyperparameters:\n if hyp.fixed:\n continue\n for dim in range(hyp.n_elements):\n if list_close[idx, 0]:\n warnings.warn(\"The optimal value found for \"\n \"dimension %s of parameter %s is \"\n \"close to the specified lower \"\n \"bound %s. Decreasing the bound and\"\n \" calling fit again may find a \"\n \"better value.\" %\n (dim, hyp.name, hyp.bounds[dim][0]),\n ConvergenceWarning)\n elif list_close[idx, 1]:\n warnings.warn(\"The optimal value found for \"\n \"dimension %s of parameter %s is \"\n \"close to the specified upper \"\n \"bound %s. Increasing the bound and\"\n \" calling fit again may find a \"\n \"better value.\" %\n (dim, hyp.name, hyp.bounds[dim][1]),\n ConvergenceWarning)\n idx += 1" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/bounds@getter", + "name": "bounds", + "qname": "sklearn.gaussian_process.kernels.Kernel.bounds", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/bounds/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Kernel.bounds.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the log-transformed bounds on the theta.", + "docstring": "Returns the log-transformed bounds on the theta.\n\nReturns\n-------\nbounds : ndarray of shape (n_dims, 2)\n The log-transformed bounds on the kernel's hyperparameters theta", + "code": " @property\n def bounds(self):\n \"\"\"Returns the log-transformed bounds on the theta.\n\n Returns\n -------\n bounds : ndarray of shape (n_dims, 2)\n The log-transformed bounds on the kernel's hyperparameters theta\n \"\"\"\n bounds = [hyperparameter.bounds\n for hyperparameter in self.hyperparameters\n if not hyperparameter.fixed]\n if len(bounds) > 0:\n return np.log(np.vstack(bounds))\n else:\n return np.array([])" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/clone_with_theta", + "name": "clone_with_theta", + "qname": "sklearn.gaussian_process.kernels.Kernel.clone_with_theta", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/clone_with_theta/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Kernel.clone_with_theta.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/clone_with_theta/theta", + "name": "theta", + "qname": "sklearn.gaussian_process.kernels.Kernel.clone_with_theta.theta", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_dims,)", + "default_value": "", + "description": "The hyperparameters" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_dims,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns a clone of self with given hyperparameters theta.", + "docstring": "Returns a clone of self with given hyperparameters theta.\n\nParameters\n----------\ntheta : ndarray of shape (n_dims,)\n The hyperparameters", + "code": " def clone_with_theta(self, theta):\n \"\"\"Returns a clone of self with given hyperparameters theta.\n\n Parameters\n ----------\n theta : ndarray of shape (n_dims,)\n The hyperparameters\n \"\"\"\n cloned = clone(self)\n cloned.theta = theta\n return cloned" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/diag", + "name": "diag", + "qname": "sklearn.gaussian_process.kernels.Kernel.diag", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/diag/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Kernel.diag.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/diag/X", + "name": "X", + "qname": "sklearn.gaussian_process.kernels.Kernel.diag.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Left argument of the returned kernel k(X, Y)" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.", + "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : array-like of shape (n_samples,)\n Left argument of the returned kernel k(X, Y)\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)", + "code": " @abstractmethod\n def diag(self, X):\n \"\"\"Returns the diagonal of the kernel k(X, X).\n\n The result of this method is identical to np.diag(self(X)); however,\n it can be evaluated more efficiently since only the diagonal is\n evaluated.\n\n Parameters\n ----------\n X : array-like of shape (n_samples,)\n Left argument of the returned kernel k(X, Y)\n\n Returns\n -------\n K_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)\n \"\"\"" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/get_params", + "name": "get_params", + "qname": "sklearn.gaussian_process.kernels.Kernel.get_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/get_params/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Kernel.get_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/get_params/deep", + "name": "deep", + "qname": "sklearn.gaussian_process.kernels.Kernel.get_params.deep", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, will return the parameters for this estimator and\ncontained subobjects that are estimators." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Get parameters of this kernel.", + "docstring": "Get parameters of this kernel.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : dict\n Parameter names mapped to their values.", + "code": " def get_params(self, deep=True):\n \"\"\"Get parameters of this kernel.\n\n Parameters\n ----------\n deep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\n Returns\n -------\n params : dict\n Parameter names mapped to their values.\n \"\"\"\n params = dict()\n\n # introspect the constructor arguments to find the model parameters\n # to represent\n cls = self.__class__\n init = getattr(cls.__init__, 'deprecated_original', cls.__init__)\n init_sign = signature(init)\n args, varargs = [], []\n for parameter in init_sign.parameters.values():\n if (parameter.kind != parameter.VAR_KEYWORD and\n parameter.name != 'self'):\n args.append(parameter.name)\n if parameter.kind == parameter.VAR_POSITIONAL:\n varargs.append(parameter.name)\n\n if len(varargs) != 0:\n raise RuntimeError(\"scikit-learn kernels should always \"\n \"specify their parameters in the signature\"\n \" of their __init__ (no varargs).\"\n \" %s doesn't follow this convention.\"\n % (cls, ))\n for arg in args:\n params[arg] = getattr(self, arg)\n\n return params" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/hyperparameters@getter", + "name": "hyperparameters", + "qname": "sklearn.gaussian_process.kernels.Kernel.hyperparameters", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/hyperparameters/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Kernel.hyperparameters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns a list of all hyperparameter specifications.", + "docstring": "Returns a list of all hyperparameter specifications.", + "code": " @property\n def hyperparameters(self):\n \"\"\"Returns a list of all hyperparameter specifications.\"\"\"\n r = [getattr(self, attr) for attr in dir(self)\n if attr.startswith(\"hyperparameter_\")]\n return r" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/is_stationary", + "name": "is_stationary", + "qname": "sklearn.gaussian_process.kernels.Kernel.is_stationary", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/is_stationary/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Kernel.is_stationary.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns whether the kernel is stationary.", + "docstring": "Returns whether the kernel is stationary. ", + "code": " @abstractmethod\n def is_stationary(self):\n \"\"\"Returns whether the kernel is stationary. \"\"\"" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/n_dims@getter", + "name": "n_dims", + "qname": "sklearn.gaussian_process.kernels.Kernel.n_dims", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/n_dims/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Kernel.n_dims.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the number of non-fixed hyperparameters of the kernel.", + "docstring": "Returns the number of non-fixed hyperparameters of the kernel.", + "code": " @property\n def n_dims(self):\n \"\"\"Returns the number of non-fixed hyperparameters of the kernel.\"\"\"\n return self.theta.shape[0]" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/requires_vector_input@getter", + "name": "requires_vector_input", + "qname": "sklearn.gaussian_process.kernels.Kernel.requires_vector_input", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/requires_vector_input/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Kernel.requires_vector_input.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns whether the kernel is defined on fixed-length feature\nvectors or generic objects. Defaults to True for backward\ncompatibility.", + "docstring": "Returns whether the kernel is defined on fixed-length feature\nvectors or generic objects. Defaults to True for backward\ncompatibility.", + "code": " @property\n def requires_vector_input(self):\n \"\"\"Returns whether the kernel is defined on fixed-length feature\n vectors or generic objects. Defaults to True for backward\n compatibility.\"\"\"\n return True" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/set_params", + "name": "set_params", + "qname": "sklearn.gaussian_process.kernels.Kernel.set_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/set_params/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Kernel.set_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/set_params/params", + "name": "params", + "qname": "sklearn.gaussian_process.kernels.Kernel.set_params.params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Set the parameters of this kernel.\n\nThe method works on simple kernels as well as on nested kernels.\nThe latter have parameters of the form ``__``\nso that it's possible to update each component of a nested object.", + "docstring": "Set the parameters of this kernel.\n\nThe method works on simple kernels as well as on nested kernels.\nThe latter have parameters of the form ``__``\nso that it's possible to update each component of a nested object.\n\nReturns\n-------\nself", + "code": " def set_params(self, **params):\n \"\"\"Set the parameters of this kernel.\n\n The method works on simple kernels as well as on nested kernels.\n The latter have parameters of the form ``__``\n so that it's possible to update each component of a nested object.\n\n Returns\n -------\n self\n \"\"\"\n if not params:\n # Simple optimisation to gain speed (inspect is slow)\n return self\n valid_params = self.get_params(deep=True)\n for key, value in params.items():\n split = key.split('__', 1)\n if len(split) > 1:\n # nested objects case\n name, sub_name = split\n if name not in valid_params:\n raise ValueError('Invalid parameter %s for kernel %s. '\n 'Check the list of available parameters '\n 'with `kernel.get_params().keys()`.' %\n (name, self))\n sub_object = valid_params[name]\n sub_object.set_params(**{sub_name: value})\n else:\n # simple objects case\n if key not in valid_params:\n raise ValueError('Invalid parameter %s for kernel %s. '\n 'Check the list of available parameters '\n 'with `kernel.get_params().keys()`.' %\n (key, self.__class__.__name__))\n setattr(self, key, value)\n return self" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/theta@getter", + "name": "theta", + "qname": "sklearn.gaussian_process.kernels.Kernel.theta", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/theta/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Kernel.theta.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the (flattened, log-transformed) non-fixed hyperparameters.\n\nNote that theta are typically the log-transformed values of the\nkernel's hyperparameters as this representation of the search space\nis more amenable for hyperparameter search, as hyperparameters like\nlength-scales naturally live on a log-scale.", + "docstring": "Returns the (flattened, log-transformed) non-fixed hyperparameters.\n\nNote that theta are typically the log-transformed values of the\nkernel's hyperparameters as this representation of the search space\nis more amenable for hyperparameter search, as hyperparameters like\nlength-scales naturally live on a log-scale.\n\nReturns\n-------\ntheta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel", + "code": " @property\n def theta(self):\n \"\"\"Returns the (flattened, log-transformed) non-fixed hyperparameters.\n\n Note that theta are typically the log-transformed values of the\n kernel's hyperparameters as this representation of the search space\n is more amenable for hyperparameter search, as hyperparameters like\n length-scales naturally live on a log-scale.\n\n Returns\n -------\n theta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel\n \"\"\"\n theta = []\n params = self.get_params()\n for hyperparameter in self.hyperparameters:\n if not hyperparameter.fixed:\n theta.append(params[hyperparameter.name])\n if len(theta) > 0:\n return np.log(np.hstack(theta))\n else:\n return np.array([])" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/theta@setter", + "name": "theta", + "qname": "sklearn.gaussian_process.kernels.Kernel.theta", + "decorators": ["theta.setter"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/theta/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Kernel.theta.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Kernel/theta/theta", + "name": "theta", + "qname": "sklearn.gaussian_process.kernels.Kernel.theta.theta", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_dims,)", + "default_value": "", + "description": "The non-fixed, log-transformed hyperparameters of the kernel" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_dims,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Sets the (flattened, log-transformed) non-fixed hyperparameters.", + "docstring": "Sets the (flattened, log-transformed) non-fixed hyperparameters.\n\nParameters\n----------\ntheta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel", + "code": " @theta.setter\n def theta(self, theta):\n \"\"\"Sets the (flattened, log-transformed) non-fixed hyperparameters.\n\n Parameters\n ----------\n theta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel\n \"\"\"\n params = self.get_params()\n i = 0\n for hyperparameter in self.hyperparameters:\n if hyperparameter.fixed:\n continue\n if hyperparameter.n_elements > 1:\n # vector-valued parameter\n params[hyperparameter.name] = np.exp(\n theta[i:i + hyperparameter.n_elements])\n i += hyperparameter.n_elements\n else:\n params[hyperparameter.name] = np.exp(theta[i])\n i += 1\n\n if i != len(theta):\n raise ValueError(\"theta has not the correct number of entries.\"\n \" Should be %d; given are %d\"\n % (i, len(theta)))\n self.set_params(**params)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/__eq__", + "name": "__eq__", + "qname": "sklearn.gaussian_process.kernels.KernelOperator.__eq__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/__eq__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.KernelOperator.__eq__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/__eq__/b", + "name": "b", + "qname": "sklearn.gaussian_process.kernels.KernelOperator.__eq__.b", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __eq__(self, b):\n if type(self) != type(b):\n return False\n return (self.k1 == b.k1 and self.k2 == b.k2) \\\n or (self.k1 == b.k2 and self.k2 == b.k1)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/__init__", + "name": "__init__", + "qname": "sklearn.gaussian_process.kernels.KernelOperator.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/__init__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.KernelOperator.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/__init__/k1", + "name": "k1", + "qname": "sklearn.gaussian_process.kernels.KernelOperator.__init__.k1", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/__init__/k2", + "name": "k2", + "qname": "sklearn.gaussian_process.kernels.KernelOperator.__init__.k2", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Base class for all kernel operators.\n\n.. versionadded:: 0.18", + "docstring": "", + "code": " def __init__(self, k1, k2):\n self.k1 = k1\n self.k2 = k2" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/bounds@getter", + "name": "bounds", + "qname": "sklearn.gaussian_process.kernels.KernelOperator.bounds", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/bounds/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.KernelOperator.bounds.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the log-transformed bounds on the theta.", + "docstring": "Returns the log-transformed bounds on the theta.\n\nReturns\n-------\nbounds : ndarray of shape (n_dims, 2)\n The log-transformed bounds on the kernel's hyperparameters theta", + "code": " @property\n def bounds(self):\n \"\"\"Returns the log-transformed bounds on the theta.\n\n Returns\n -------\n bounds : ndarray of shape (n_dims, 2)\n The log-transformed bounds on the kernel's hyperparameters theta\n \"\"\"\n if self.k1.bounds.size == 0:\n return self.k2.bounds\n if self.k2.bounds.size == 0:\n return self.k1.bounds\n return np.vstack((self.k1.bounds, self.k2.bounds))" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/get_params", + "name": "get_params", + "qname": "sklearn.gaussian_process.kernels.KernelOperator.get_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/get_params/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.KernelOperator.get_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/get_params/deep", + "name": "deep", + "qname": "sklearn.gaussian_process.kernels.KernelOperator.get_params.deep", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, will return the parameters for this estimator and\ncontained subobjects that are estimators." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Get parameters of this kernel.", + "docstring": "Get parameters of this kernel.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : dict\n Parameter names mapped to their values.", + "code": " def get_params(self, deep=True):\n \"\"\"Get parameters of this kernel.\n\n Parameters\n ----------\n deep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\n Returns\n -------\n params : dict\n Parameter names mapped to their values.\n \"\"\"\n params = dict(k1=self.k1, k2=self.k2)\n if deep:\n deep_items = self.k1.get_params().items()\n params.update(('k1__' + k, val) for k, val in deep_items)\n deep_items = self.k2.get_params().items()\n params.update(('k2__' + k, val) for k, val in deep_items)\n\n return params" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/hyperparameters@getter", + "name": "hyperparameters", + "qname": "sklearn.gaussian_process.kernels.KernelOperator.hyperparameters", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/hyperparameters/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.KernelOperator.hyperparameters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns a list of all hyperparameter.", + "docstring": "Returns a list of all hyperparameter.", + "code": " @property\n def hyperparameters(self):\n \"\"\"Returns a list of all hyperparameter.\"\"\"\n r = [Hyperparameter(\"k1__\" + hyperparameter.name,\n hyperparameter.value_type,\n hyperparameter.bounds, hyperparameter.n_elements)\n for hyperparameter in self.k1.hyperparameters]\n\n for hyperparameter in self.k2.hyperparameters:\n r.append(Hyperparameter(\"k2__\" + hyperparameter.name,\n hyperparameter.value_type,\n hyperparameter.bounds,\n hyperparameter.n_elements))\n return r" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/is_stationary", + "name": "is_stationary", + "qname": "sklearn.gaussian_process.kernels.KernelOperator.is_stationary", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/is_stationary/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.KernelOperator.is_stationary.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns whether the kernel is stationary.", + "docstring": "Returns whether the kernel is stationary. ", + "code": " def is_stationary(self):\n \"\"\"Returns whether the kernel is stationary. \"\"\"\n return self.k1.is_stationary() and self.k2.is_stationary()" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/requires_vector_input@getter", + "name": "requires_vector_input", + "qname": "sklearn.gaussian_process.kernels.KernelOperator.requires_vector_input", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/requires_vector_input/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.KernelOperator.requires_vector_input.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns whether the kernel is stationary.", + "docstring": "Returns whether the kernel is stationary. ", + "code": " @property\n def requires_vector_input(self):\n \"\"\"Returns whether the kernel is stationary. \"\"\"\n return (self.k1.requires_vector_input or\n self.k2.requires_vector_input)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/theta@getter", + "name": "theta", + "qname": "sklearn.gaussian_process.kernels.KernelOperator.theta", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/theta/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.KernelOperator.theta.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the (flattened, log-transformed) non-fixed hyperparameters.\n\nNote that theta are typically the log-transformed values of the\nkernel's hyperparameters as this representation of the search space\nis more amenable for hyperparameter search, as hyperparameters like\nlength-scales naturally live on a log-scale.", + "docstring": "Returns the (flattened, log-transformed) non-fixed hyperparameters.\n\nNote that theta are typically the log-transformed values of the\nkernel's hyperparameters as this representation of the search space\nis more amenable for hyperparameter search, as hyperparameters like\nlength-scales naturally live on a log-scale.\n\nReturns\n-------\ntheta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel", + "code": " @property\n def theta(self):\n \"\"\"Returns the (flattened, log-transformed) non-fixed hyperparameters.\n\n Note that theta are typically the log-transformed values of the\n kernel's hyperparameters as this representation of the search space\n is more amenable for hyperparameter search, as hyperparameters like\n length-scales naturally live on a log-scale.\n\n Returns\n -------\n theta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel\n \"\"\"\n return np.append(self.k1.theta, self.k2.theta)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/theta@setter", + "name": "theta", + "qname": "sklearn.gaussian_process.kernels.KernelOperator.theta", + "decorators": ["theta.setter"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/theta/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.KernelOperator.theta.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/KernelOperator/theta/theta", + "name": "theta", + "qname": "sklearn.gaussian_process.kernels.KernelOperator.theta.theta", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_dims,)", + "default_value": "", + "description": "The non-fixed, log-transformed hyperparameters of the kernel" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_dims,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Sets the (flattened, log-transformed) non-fixed hyperparameters.", + "docstring": "Sets the (flattened, log-transformed) non-fixed hyperparameters.\n\nParameters\n----------\ntheta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel", + "code": " @theta.setter\n def theta(self, theta):\n \"\"\"Sets the (flattened, log-transformed) non-fixed hyperparameters.\n\n Parameters\n ----------\n theta : ndarray of shape (n_dims,)\n The non-fixed, log-transformed hyperparameters of the kernel\n \"\"\"\n k1_dims = self.k1.n_dims\n self.k1.theta = theta[:k1_dims]\n self.k2.theta = theta[k1_dims:]" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Matern/__call__", + "name": "__call__", + "qname": "sklearn.gaussian_process.kernels.Matern.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Matern/__call__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Matern.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Matern/__call__/X", + "name": "X", + "qname": "sklearn.gaussian_process.kernels.Matern.__call__.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_X, n_features)", + "default_value": "", + "description": "Left argument of the returned kernel k(X, Y)" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_X, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Matern/__call__/Y", + "name": "Y", + "qname": "sklearn.gaussian_process.kernels.Matern.__call__.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_Y, n_features)", + "default_value": "None", + "description": "Right argument of the returned kernel k(X, Y). If None, k(X, X)\nif evaluated instead." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_Y, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Matern/__call__/eval_gradient", + "name": "eval_gradient", + "qname": "sklearn.gaussian_process.kernels.Matern.__call__.eval_gradient", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Determines whether the gradient with respect to the log of\nthe kernel hyperparameter is computed.\nOnly supported when Y is None." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return the kernel k(X, Y) and optionally its gradient.", + "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.", + "code": " def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\n Y : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\n eval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\n K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \\\n optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.\n \"\"\"\n X = np.atleast_2d(X)\n length_scale = _check_length_scale(X, self.length_scale)\n if Y is None:\n dists = pdist(X / length_scale, metric='euclidean')\n else:\n if eval_gradient:\n raise ValueError(\n \"Gradient can only be evaluated when Y is None.\")\n dists = cdist(X / length_scale, Y / length_scale,\n metric='euclidean')\n\n if self.nu == 0.5:\n K = np.exp(-dists)\n elif self.nu == 1.5:\n K = dists * math.sqrt(3)\n K = (1. + K) * np.exp(-K)\n elif self.nu == 2.5:\n K = dists * math.sqrt(5)\n K = (1. + K + K ** 2 / 3.0) * np.exp(-K)\n elif self.nu == np.inf:\n K = np.exp(-dists ** 2 / 2.0)\n else: # general case; expensive to evaluate\n K = dists\n K[K == 0.0] += np.finfo(float).eps # strict zeros result in nan\n tmp = (math.sqrt(2 * self.nu) * K)\n K.fill((2 ** (1. - self.nu)) / gamma(self.nu))\n K *= tmp ** self.nu\n K *= kv(self.nu, tmp)\n\n if Y is None:\n # convert from upper-triangular matrix to square matrix\n K = squareform(K)\n np.fill_diagonal(K, 1)\n\n if eval_gradient:\n if self.hyperparameter_length_scale.fixed:\n # Hyperparameter l kept fixed\n K_gradient = np.empty((X.shape[0], X.shape[0], 0))\n return K, K_gradient\n\n # We need to recompute the pairwise dimension-wise distances\n if self.anisotropic:\n D = (X[:, np.newaxis, :] - X[np.newaxis, :, :])**2 \\\n / (length_scale ** 2)\n else:\n D = squareform(dists**2)[:, :, np.newaxis]\n\n if self.nu == 0.5:\n K_gradient = K[..., np.newaxis] * D \\\n / np.sqrt(D.sum(2))[:, :, np.newaxis]\n K_gradient[~np.isfinite(K_gradient)] = 0\n elif self.nu == 1.5:\n K_gradient = \\\n 3 * D * np.exp(-np.sqrt(3 * D.sum(-1)))[..., np.newaxis]\n elif self.nu == 2.5:\n tmp = np.sqrt(5 * D.sum(-1))[..., np.newaxis]\n K_gradient = 5.0 / 3.0 * D * (tmp + 1) * np.exp(-tmp)\n elif self.nu == np.inf:\n K_gradient = D * K[..., np.newaxis]\n else:\n # approximate gradient numerically\n def f(theta): # helper function\n return self.clone_with_theta(theta)(X, Y)\n return K, _approx_fprime(self.theta, f, 1e-10)\n\n if not self.anisotropic:\n return K, K_gradient[:, :].sum(-1)[:, :, np.newaxis]\n else:\n return K, K_gradient\n else:\n return K" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Matern/__init__", + "name": "__init__", + "qname": "sklearn.gaussian_process.kernels.Matern.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Matern/__init__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Matern.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Matern/__init__/length_scale", + "name": "length_scale", + "qname": "sklearn.gaussian_process.kernels.Matern.__init__.length_scale", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float or ndarray of shape (n_features,)", + "default_value": "1.0", + "description": "The length scale of the kernel. If a float, an isotropic kernel is\nused. If an array, an anisotropic kernel is used where each dimension\nof l defines the length-scale of the respective feature dimension." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "ndarray of shape (n_features,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Matern/__init__/length_scale_bounds", + "name": "length_scale_bounds", + "qname": "sklearn.gaussian_process.kernels.Matern.__init__.length_scale_bounds", + "default_value": "(1e-05, 100000.0)", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "pair of floats >= 0 or \"fixed\"", + "default_value": "(1e-5, 1e5)", + "description": "The lower and upper bound on 'length_scale'.\nIf set to \"fixed\", 'length_scale' cannot be changed during\nhyperparameter tuning." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "pair of floats >= 0" + }, + { + "kind": "NamedType", + "name": "\"fixed\"" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Matern/__init__/nu", + "name": "nu", + "qname": "sklearn.gaussian_process.kernels.Matern.__init__.nu", + "default_value": "1.5", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "1.5", + "description": "The parameter nu controlling the smoothness of the learned function.\nThe smaller nu, the less smooth the approximated function is.\nFor nu=inf, the kernel becomes equivalent to the RBF kernel and for\nnu=0.5 to the absolute exponential kernel. Important intermediate\nvalues are nu=1.5 (once differentiable functions) and nu=2.5\n(twice differentiable functions). Note that values of nu not in\n[0.5, 1.5, 2.5, inf] incur a considerably higher computational cost\n(appr. 10 times higher) since they require to evaluate the modified\nBessel function. Furthermore, in contrast to l, nu is kept fixed to\nits initial value and not optimized." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Matern kernel.\n\nThe class of Matern kernels is a generalization of the :class:`RBF`.\nIt has an additional parameter :math:`\\nu` which controls the\nsmoothness of the resulting function. The smaller :math:`\\nu`,\nthe less smooth the approximated function is.\nAs :math:`\\nu\\rightarrow\\infty`, the kernel becomes equivalent to\nthe :class:`RBF` kernel. When :math:`\\nu = 1/2`, the Mat\u00e9rn kernel\nbecomes identical to the absolute exponential kernel.\nImportant intermediate values are\n:math:`\\nu=1.5` (once differentiable functions)\nand :math:`\\nu=2.5` (twice differentiable functions).\n\nThe kernel is given by:\n\n.. math::\n k(x_i, x_j) = \\frac{1}{\\Gamma(\\nu)2^{\\nu-1}}\\Bigg(\n \\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\n \\Bigg)^\\nu K_\\nu\\Bigg(\n \\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\\Bigg)\n\nwhere :math:`d(\\cdot,\\cdot)` is the Euclidean distance,\n:math:`K_{\\nu}(\\cdot)` is a modified Bessel function and\n:math:`\\Gamma(\\cdot)` is the gamma function.\nSee [1]_, Chapter 4, Section 4.2, for details regarding the different\nvariants of the Matern kernel.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "", + "code": " def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5),\n nu=1.5):\n super().__init__(length_scale, length_scale_bounds)\n self.nu = nu" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Matern/__repr__", + "name": "__repr__", + "qname": "sklearn.gaussian_process.kernels.Matern.__repr__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Matern/__repr__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Matern.__repr__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __repr__(self):\n if self.anisotropic:\n return \"{0}(length_scale=[{1}], nu={2:.3g})\".format(\n self.__class__.__name__,\n \", \".join(map(\"{0:.3g}\".format, self.length_scale)),\n self.nu)\n else:\n return \"{0}(length_scale={1:.3g}, nu={2:.3g})\".format(\n self.__class__.__name__, np.ravel(self.length_scale)[0],\n self.nu)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/NormalizedKernelMixin/diag", + "name": "diag", + "qname": "sklearn.gaussian_process.kernels.NormalizedKernelMixin.diag", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/NormalizedKernelMixin/diag/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.NormalizedKernelMixin.diag.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/NormalizedKernelMixin/diag/X", + "name": "X", + "qname": "sklearn.gaussian_process.kernels.NormalizedKernelMixin.diag.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_X, n_features)", + "default_value": "", + "description": "Left argument of the returned kernel k(X, Y)" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_X, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.", + "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)", + "code": " def diag(self, X):\n \"\"\"Returns the diagonal of the kernel k(X, X).\n\n The result of this method is identical to np.diag(self(X)); however,\n it can be evaluated more efficiently since only the diagonal is\n evaluated.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\n Returns\n -------\n K_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)\n \"\"\"\n return np.ones(X.shape[0])" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/__call__", + "name": "__call__", + "qname": "sklearn.gaussian_process.kernels.PairwiseKernel.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/__call__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.PairwiseKernel.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/__call__/X", + "name": "X", + "qname": "sklearn.gaussian_process.kernels.PairwiseKernel.__call__.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_X, n_features)", + "default_value": "", + "description": "Left argument of the returned kernel k(X, Y)" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_X, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/__call__/Y", + "name": "Y", + "qname": "sklearn.gaussian_process.kernels.PairwiseKernel.__call__.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_Y, n_features)", + "default_value": "None", + "description": "Right argument of the returned kernel k(X, Y). If None, k(X, X)\nif evaluated instead." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_Y, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/__call__/eval_gradient", + "name": "eval_gradient", + "qname": "sklearn.gaussian_process.kernels.PairwiseKernel.__call__.eval_gradient", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Determines whether the gradient with respect to the log of\nthe kernel hyperparameter is computed.\nOnly supported when Y is None." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return the kernel k(X, Y) and optionally its gradient.", + "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.", + "code": " def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\n Y : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\n eval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\n K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\\\n optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.\n \"\"\"\n pairwise_kernels_kwargs = self.pairwise_kernels_kwargs\n if self.pairwise_kernels_kwargs is None:\n pairwise_kernels_kwargs = {}\n\n X = np.atleast_2d(X)\n K = pairwise_kernels(X, Y, metric=self.metric, gamma=self.gamma,\n filter_params=True,\n **pairwise_kernels_kwargs)\n if eval_gradient:\n if self.hyperparameter_gamma.fixed:\n return K, np.empty((X.shape[0], X.shape[0], 0))\n else:\n # approximate gradient numerically\n def f(gamma): # helper function\n return pairwise_kernels(\n X, Y, metric=self.metric, gamma=np.exp(gamma),\n filter_params=True, **pairwise_kernels_kwargs)\n return K, _approx_fprime(self.theta, f, 1e-10)\n else:\n return K" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/__init__", + "name": "__init__", + "qname": "sklearn.gaussian_process.kernels.PairwiseKernel.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/__init__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.PairwiseKernel.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/__init__/gamma", + "name": "gamma", + "qname": "sklearn.gaussian_process.kernels.PairwiseKernel.__init__.gamma", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Parameter gamma of the pairwise kernel specified by metric. It should\nbe positive." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/__init__/gamma_bounds", + "name": "gamma_bounds", + "qname": "sklearn.gaussian_process.kernels.PairwiseKernel.__init__.gamma_bounds", + "default_value": "(1e-05, 100000.0)", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "pair of floats >= 0 or \"fixed\"", + "default_value": "(1e-5, 1e5)", + "description": "The lower and upper bound on 'gamma'.\nIf set to \"fixed\", 'gamma' cannot be changed during\nhyperparameter tuning." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "pair of floats >= 0" + }, + { + "kind": "NamedType", + "name": "\"fixed\"" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/__init__/metric", + "name": "metric", + "qname": "sklearn.gaussian_process.kernels.PairwiseKernel.__init__.metric", + "default_value": "'linear'", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{\"linear\", \"additive_chi2\", \"chi2\", \"poly\", \"polynomial\", \"rbf\", \"laplacian\", \"sigmoid\", \"cosine\"} or callable", + "default_value": "\"linear\"", + "description": "The metric to use when calculating kernel between instances in a\nfeature array. If metric is a string, it must be one of the metrics\nin pairwise.PAIRWISE_KERNEL_FUNCTIONS.\nIf metric is \"precomputed\", X is assumed to be a kernel matrix.\nAlternatively, if metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays from X as input and return a value indicating\nthe distance between them." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [ + "laplacian", + "sigmoid", + "cosine", + "chi2", + "linear", + "poly", + "rbf", + "polynomial", + "additive_chi2" + ] + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/__init__/pairwise_kernels_kwargs", + "name": "pairwise_kernels_kwargs", + "qname": "sklearn.gaussian_process.kernels.PairwiseKernel.__init__.pairwise_kernels_kwargs", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "All entries of this dict (if any) are passed as keyword arguments to\nthe pairwise kernel function." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Wrapper for kernels in sklearn.metrics.pairwise.\n\nA thin wrapper around the functionality of the kernels in\nsklearn.metrics.pairwise.\n\nNote: Evaluation of eval_gradient is not analytic but numeric and all\n kernels support only isotropic distances. The parameter gamma is\n considered to be a hyperparameter and may be optimized. The other\n kernel parameters are set directly at initialization and are kept\n fixed.\n\n.. versionadded:: 0.18", + "docstring": "", + "code": " def __init__(self, gamma=1.0, gamma_bounds=(1e-5, 1e5), metric=\"linear\",\n pairwise_kernels_kwargs=None):\n self.gamma = gamma\n self.gamma_bounds = gamma_bounds\n self.metric = metric\n self.pairwise_kernels_kwargs = pairwise_kernels_kwargs" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/__repr__", + "name": "__repr__", + "qname": "sklearn.gaussian_process.kernels.PairwiseKernel.__repr__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/__repr__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.PairwiseKernel.__repr__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __repr__(self):\n return \"{0}(gamma={1}, metric={2})\".format(\n self.__class__.__name__, self.gamma, self.metric)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/diag", + "name": "diag", + "qname": "sklearn.gaussian_process.kernels.PairwiseKernel.diag", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/diag/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.PairwiseKernel.diag.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/diag/X", + "name": "X", + "qname": "sklearn.gaussian_process.kernels.PairwiseKernel.diag.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_X, n_features)", + "default_value": "", + "description": "Left argument of the returned kernel k(X, Y)" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_X, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.", + "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)", + "code": " def diag(self, X):\n \"\"\"Returns the diagonal of the kernel k(X, X).\n\n The result of this method is identical to np.diag(self(X)); however,\n it can be evaluated more efficiently since only the diagonal is\n evaluated.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\n Returns\n -------\n K_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)\n \"\"\"\n # We have to fall back to slow way of computing diagonal\n return np.apply_along_axis(self, 1, X).ravel()" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/hyperparameter_gamma@getter", + "name": "hyperparameter_gamma", + "qname": "sklearn.gaussian_process.kernels.PairwiseKernel.hyperparameter_gamma", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/hyperparameter_gamma/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.PairwiseKernel.hyperparameter_gamma.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def hyperparameter_gamma(self):\n return Hyperparameter(\"gamma\", \"numeric\", self.gamma_bounds)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/is_stationary", + "name": "is_stationary", + "qname": "sklearn.gaussian_process.kernels.PairwiseKernel.is_stationary", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/PairwiseKernel/is_stationary/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.PairwiseKernel.is_stationary.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns whether the kernel is stationary.", + "docstring": "Returns whether the kernel is stationary. ", + "code": " def is_stationary(self):\n \"\"\"Returns whether the kernel is stationary. \"\"\"\n return self.metric in [\"rbf\"]" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Product/__call__", + "name": "__call__", + "qname": "sklearn.gaussian_process.kernels.Product.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Product/__call__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Product.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Product/__call__/X", + "name": "X", + "qname": "sklearn.gaussian_process.kernels.Product.__call__.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_X, n_features) or list of object", + "default_value": "", + "description": "Left argument of the returned kernel k(X, Y)" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples_X, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Product/__call__/Y", + "name": "Y", + "qname": "sklearn.gaussian_process.kernels.Product.__call__.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_Y, n_features) or list of object", + "default_value": "None", + "description": "Right argument of the returned kernel k(X, Y). If None, k(X, X)\nis evaluated instead." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples_Y, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Product/__call__/eval_gradient", + "name": "eval_gradient", + "qname": "sklearn.gaussian_process.kernels.Product.__call__.eval_gradient", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Determines whether the gradient with respect to the log of\nthe kernel hyperparameter is computed." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return the kernel k(X, Y) and optionally its gradient.", + "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\nY : array-like of shape (n_samples_Y, n_features) or list of object, default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.", + "code": " def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\n Y : array-like of shape (n_samples_Y, n_features) or list of object,\\\n default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\n eval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\n K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \\\n optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.\n \"\"\"\n if eval_gradient:\n K1, K1_gradient = self.k1(X, Y, eval_gradient=True)\n K2, K2_gradient = self.k2(X, Y, eval_gradient=True)\n return K1 * K2, np.dstack((K1_gradient * K2[:, :, np.newaxis],\n K2_gradient * K1[:, :, np.newaxis]))\n else:\n return self.k1(X, Y) * self.k2(X, Y)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Product/__repr__", + "name": "__repr__", + "qname": "sklearn.gaussian_process.kernels.Product.__repr__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Product/__repr__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Product.__repr__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __repr__(self):\n return \"{0} * {1}\".format(self.k1, self.k2)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Product/diag", + "name": "diag", + "qname": "sklearn.gaussian_process.kernels.Product.diag", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Product/diag/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Product.diag.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Product/diag/X", + "name": "X", + "qname": "sklearn.gaussian_process.kernels.Product.diag.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_X, n_features) or list of object", + "default_value": "", + "description": "Argument to the kernel." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples_X, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.", + "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)", + "code": " def diag(self, X):\n \"\"\"Returns the diagonal of the kernel k(X, X).\n\n The result of this method is identical to np.diag(self(X)); however,\n it can be evaluated more efficiently since only the diagonal is\n evaluated.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\n Returns\n -------\n K_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)\n \"\"\"\n return self.k1.diag(X) * self.k2.diag(X)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RBF/__call__", + "name": "__call__", + "qname": "sklearn.gaussian_process.kernels.RBF.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RBF/__call__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.RBF.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RBF/__call__/X", + "name": "X", + "qname": "sklearn.gaussian_process.kernels.RBF.__call__.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_X, n_features)", + "default_value": "", + "description": "Left argument of the returned kernel k(X, Y)" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_X, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RBF/__call__/Y", + "name": "Y", + "qname": "sklearn.gaussian_process.kernels.RBF.__call__.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_Y, n_features)", + "default_value": "None", + "description": "Right argument of the returned kernel k(X, Y). If None, k(X, X)\nif evaluated instead." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_Y, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RBF/__call__/eval_gradient", + "name": "eval_gradient", + "qname": "sklearn.gaussian_process.kernels.RBF.__call__.eval_gradient", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Determines whether the gradient with respect to the log of\nthe kernel hyperparameter is computed.\nOnly supported when Y is None." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return the kernel k(X, Y) and optionally its gradient.", + "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.", + "code": " def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\n Y : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\n eval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\n K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \\\n optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.\n \"\"\"\n X = np.atleast_2d(X)\n length_scale = _check_length_scale(X, self.length_scale)\n if Y is None:\n dists = pdist(X / length_scale, metric='sqeuclidean')\n K = np.exp(-.5 * dists)\n # convert from upper-triangular matrix to square matrix\n K = squareform(K)\n np.fill_diagonal(K, 1)\n else:\n if eval_gradient:\n raise ValueError(\n \"Gradient can only be evaluated when Y is None.\")\n dists = cdist(X / length_scale, Y / length_scale,\n metric='sqeuclidean')\n K = np.exp(-.5 * dists)\n\n if eval_gradient:\n if self.hyperparameter_length_scale.fixed:\n # Hyperparameter l kept fixed\n return K, np.empty((X.shape[0], X.shape[0], 0))\n elif not self.anisotropic or length_scale.shape[0] == 1:\n K_gradient = \\\n (K * squareform(dists))[:, :, np.newaxis]\n return K, K_gradient\n elif self.anisotropic:\n # We need to recompute the pairwise dimension-wise distances\n K_gradient = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2 \\\n / (length_scale ** 2)\n K_gradient *= K[..., np.newaxis]\n return K, K_gradient\n else:\n return K" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RBF/__init__", + "name": "__init__", + "qname": "sklearn.gaussian_process.kernels.RBF.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RBF/__init__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.RBF.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RBF/__init__/length_scale", + "name": "length_scale", + "qname": "sklearn.gaussian_process.kernels.RBF.__init__.length_scale", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float or ndarray of shape (n_features,)", + "default_value": "1.0", + "description": "The length scale of the kernel. If a float, an isotropic kernel is\nused. If an array, an anisotropic kernel is used where each dimension\nof l defines the length-scale of the respective feature dimension." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "ndarray of shape (n_features,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RBF/__init__/length_scale_bounds", + "name": "length_scale_bounds", + "qname": "sklearn.gaussian_process.kernels.RBF.__init__.length_scale_bounds", + "default_value": "(1e-05, 100000.0)", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "pair of floats >= 0 or \"fixed\"", + "default_value": "(1e-5, 1e5)", + "description": "The lower and upper bound on 'length_scale'.\nIf set to \"fixed\", 'length_scale' cannot be changed during\nhyperparameter tuning." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "pair of floats >= 0" + }, + { + "kind": "NamedType", + "name": "\"fixed\"" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Radial-basis function kernel (aka squared-exponential kernel).\n\nThe RBF kernel is a stationary kernel. It is also known as the\n\"squared exponential\" kernel. It is parameterized by a length scale\nparameter :math:`l>0`, which can either be a scalar (isotropic variant\nof the kernel) or a vector with the same number of dimensions as the inputs\nX (anisotropic variant of the kernel). The kernel is given by:\n\n.. math::\n k(x_i, x_j) = \\exp\\left(- \\frac{d(x_i, x_j)^2}{2l^2} \\right)\n\nwhere :math:`l` is the length scale of the kernel and\n:math:`d(\\cdot,\\cdot)` is the Euclidean distance.\nFor advice on how to set the length scale parameter, see e.g. [1]_.\n\nThis kernel is infinitely differentiable, which implies that GPs with this\nkernel as covariance function have mean square derivatives of all orders,\nand are thus very smooth.\nSee [2]_, Chapter 4, Section 4.2, for further details of the RBF kernel.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "", + "code": " def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5)):\n self.length_scale = length_scale\n self.length_scale_bounds = length_scale_bounds" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RBF/__repr__", + "name": "__repr__", + "qname": "sklearn.gaussian_process.kernels.RBF.__repr__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RBF/__repr__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.RBF.__repr__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __repr__(self):\n if self.anisotropic:\n return \"{0}(length_scale=[{1}])\".format(\n self.__class__.__name__, \", \".join(map(\"{0:.3g}\".format,\n self.length_scale)))\n else: # isotropic\n return \"{0}(length_scale={1:.3g})\".format(\n self.__class__.__name__, np.ravel(self.length_scale)[0])" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RBF/anisotropic@getter", + "name": "anisotropic", + "qname": "sklearn.gaussian_process.kernels.RBF.anisotropic", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RBF/anisotropic/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.RBF.anisotropic.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def anisotropic(self):\n return np.iterable(self.length_scale) and len(self.length_scale) > 1" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RBF/hyperparameter_length_scale@getter", + "name": "hyperparameter_length_scale", + "qname": "sklearn.gaussian_process.kernels.RBF.hyperparameter_length_scale", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RBF/hyperparameter_length_scale/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.RBF.hyperparameter_length_scale.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def hyperparameter_length_scale(self):\n if self.anisotropic:\n return Hyperparameter(\"length_scale\", \"numeric\",\n self.length_scale_bounds,\n len(self.length_scale))\n return Hyperparameter(\n \"length_scale\", \"numeric\", self.length_scale_bounds)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RationalQuadratic/__call__", + "name": "__call__", + "qname": "sklearn.gaussian_process.kernels.RationalQuadratic.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RationalQuadratic/__call__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.RationalQuadratic.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RationalQuadratic/__call__/X", + "name": "X", + "qname": "sklearn.gaussian_process.kernels.RationalQuadratic.__call__.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_X, n_features)", + "default_value": "", + "description": "Left argument of the returned kernel k(X, Y)" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_X, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RationalQuadratic/__call__/Y", + "name": "Y", + "qname": "sklearn.gaussian_process.kernels.RationalQuadratic.__call__.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_Y, n_features)", + "default_value": "None", + "description": "Right argument of the returned kernel k(X, Y). If None, k(X, X)\nif evaluated instead." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_Y, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RationalQuadratic/__call__/eval_gradient", + "name": "eval_gradient", + "qname": "sklearn.gaussian_process.kernels.RationalQuadratic.__call__.eval_gradient", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Determines whether the gradient with respect to the log of\nthe kernel hyperparameter is computed.\nOnly supported when Y is None." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return the kernel k(X, Y) and optionally its gradient.", + "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims)\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when eval_gradient\n is True.", + "code": " def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_features)\n Left argument of the returned kernel k(X, Y)\n\n Y : ndarray of shape (n_samples_Y, n_features), default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n if evaluated instead.\n\n eval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\n K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims)\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when eval_gradient\n is True.\n \"\"\"\n if len(np.atleast_1d(self.length_scale)) > 1:\n raise AttributeError(\n \"RationalQuadratic kernel only supports isotropic version, \"\n \"please use a single scalar for length_scale\")\n X = np.atleast_2d(X)\n if Y is None:\n dists = squareform(pdist(X, metric='sqeuclidean'))\n tmp = dists / (2 * self.alpha * self.length_scale ** 2)\n base = (1 + tmp)\n K = base ** -self.alpha\n np.fill_diagonal(K, 1)\n else:\n if eval_gradient:\n raise ValueError(\n \"Gradient can only be evaluated when Y is None.\")\n dists = cdist(X, Y, metric='sqeuclidean')\n K = (1 + dists / (2 * self.alpha * self.length_scale ** 2)) \\\n ** -self.alpha\n\n if eval_gradient:\n # gradient with respect to length_scale\n if not self.hyperparameter_length_scale.fixed:\n length_scale_gradient = \\\n dists * K / (self.length_scale ** 2 * base)\n length_scale_gradient = length_scale_gradient[:, :, np.newaxis]\n else: # l is kept fixed\n length_scale_gradient = np.empty((K.shape[0], K.shape[1], 0))\n\n # gradient with respect to alpha\n if not self.hyperparameter_alpha.fixed:\n alpha_gradient = \\\n K * (-self.alpha * np.log(base)\n + dists / (2 * self.length_scale ** 2 * base))\n alpha_gradient = alpha_gradient[:, :, np.newaxis]\n else: # alpha is kept fixed\n alpha_gradient = np.empty((K.shape[0], K.shape[1], 0))\n\n return K, np.dstack((alpha_gradient, length_scale_gradient))\n else:\n return K" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RationalQuadratic/__init__", + "name": "__init__", + "qname": "sklearn.gaussian_process.kernels.RationalQuadratic.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RationalQuadratic/__init__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.RationalQuadratic.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RationalQuadratic/__init__/length_scale", + "name": "length_scale", + "qname": "sklearn.gaussian_process.kernels.RationalQuadratic.__init__.length_scale", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float > 0", + "default_value": "1.0", + "description": "The length scale of the kernel." + }, + "type": { + "kind": "NamedType", + "name": "float > 0" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RationalQuadratic/__init__/alpha", + "name": "alpha", + "qname": "sklearn.gaussian_process.kernels.RationalQuadratic.__init__.alpha", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float > 0", + "default_value": "1.0", + "description": "Scale mixture parameter" + }, + "type": { + "kind": "NamedType", + "name": "float > 0" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RationalQuadratic/__init__/length_scale_bounds", + "name": "length_scale_bounds", + "qname": "sklearn.gaussian_process.kernels.RationalQuadratic.__init__.length_scale_bounds", + "default_value": "(1e-05, 100000.0)", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "pair of floats >= 0 or \"fixed\"", + "default_value": "(1e-5, 1e5)", + "description": "The lower and upper bound on 'length_scale'.\nIf set to \"fixed\", 'length_scale' cannot be changed during\nhyperparameter tuning." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "pair of floats >= 0" + }, + { + "kind": "NamedType", + "name": "\"fixed\"" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RationalQuadratic/__init__/alpha_bounds", + "name": "alpha_bounds", + "qname": "sklearn.gaussian_process.kernels.RationalQuadratic.__init__.alpha_bounds", + "default_value": "(1e-05, 100000.0)", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "pair of floats >= 0 or \"fixed\"", + "default_value": "(1e-5, 1e5)", + "description": "The lower and upper bound on 'alpha'.\nIf set to \"fixed\", 'alpha' cannot be changed during\nhyperparameter tuning." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "pair of floats >= 0" + }, + { + "kind": "NamedType", + "name": "\"fixed\"" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Rational Quadratic kernel.\n\nThe RationalQuadratic kernel can be seen as a scale mixture (an infinite\nsum) of RBF kernels with different characteristic length scales. It is\nparameterized by a length scale parameter :math:`l>0` and a scale\nmixture parameter :math:`\\alpha>0`. Only the isotropic variant\nwhere length_scale :math:`l` is a scalar is supported at the moment.\nThe kernel is given by:\n\n.. math::\n k(x_i, x_j) = \\left(\n 1 + \\frac{d(x_i, x_j)^2 }{ 2\\alpha l^2}\\right)^{-\\alpha}\n\nwhere :math:`\\alpha` is the scale mixture parameter, :math:`l` is\nthe length scale of the kernel and :math:`d(\\cdot,\\cdot)` is the\nEuclidean distance.\nFor advice on how to set the parameters, see e.g. [1]_.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "", + "code": " def __init__(self, length_scale=1.0, alpha=1.0,\n length_scale_bounds=(1e-5, 1e5), alpha_bounds=(1e-5, 1e5)):\n self.length_scale = length_scale\n self.alpha = alpha\n self.length_scale_bounds = length_scale_bounds\n self.alpha_bounds = alpha_bounds" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RationalQuadratic/__repr__", + "name": "__repr__", + "qname": "sklearn.gaussian_process.kernels.RationalQuadratic.__repr__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RationalQuadratic/__repr__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.RationalQuadratic.__repr__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __repr__(self):\n return \"{0}(alpha={1:.3g}, length_scale={2:.3g})\".format(\n self.__class__.__name__, self.alpha, self.length_scale)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RationalQuadratic/hyperparameter_alpha@getter", + "name": "hyperparameter_alpha", + "qname": "sklearn.gaussian_process.kernels.RationalQuadratic.hyperparameter_alpha", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RationalQuadratic/hyperparameter_alpha/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.RationalQuadratic.hyperparameter_alpha.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def hyperparameter_alpha(self):\n return Hyperparameter(\"alpha\", \"numeric\", self.alpha_bounds)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RationalQuadratic/hyperparameter_length_scale@getter", + "name": "hyperparameter_length_scale", + "qname": "sklearn.gaussian_process.kernels.RationalQuadratic.hyperparameter_length_scale", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/RationalQuadratic/hyperparameter_length_scale/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.RationalQuadratic.hyperparameter_length_scale.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def hyperparameter_length_scale(self):\n return Hyperparameter(\n \"length_scale\", \"numeric\", self.length_scale_bounds)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/StationaryKernelMixin/is_stationary", + "name": "is_stationary", + "qname": "sklearn.gaussian_process.kernels.StationaryKernelMixin.is_stationary", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/StationaryKernelMixin/is_stationary/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.StationaryKernelMixin.is_stationary.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns whether the kernel is stationary.", + "docstring": "Returns whether the kernel is stationary. ", + "code": " def is_stationary(self):\n \"\"\"Returns whether the kernel is stationary. \"\"\"\n return True" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Sum/__call__", + "name": "__call__", + "qname": "sklearn.gaussian_process.kernels.Sum.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Sum/__call__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Sum.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Sum/__call__/X", + "name": "X", + "qname": "sklearn.gaussian_process.kernels.Sum.__call__.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_X, n_features) or list of object", + "default_value": "", + "description": "Left argument of the returned kernel k(X, Y)" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples_X, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Sum/__call__/Y", + "name": "Y", + "qname": "sklearn.gaussian_process.kernels.Sum.__call__.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_X, n_features) or list of object", + "default_value": "None", + "description": "Right argument of the returned kernel k(X, Y). If None, k(X, X)\nis evaluated instead." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples_X, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Sum/__call__/eval_gradient", + "name": "eval_gradient", + "qname": "sklearn.gaussian_process.kernels.Sum.__call__.eval_gradient", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Determines whether the gradient with respect to the log of\nthe kernel hyperparameter is computed." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return the kernel k(X, Y) and optionally its gradient.", + "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\nY : array-like of shape (n_samples_X, n_features) or list of object, default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.", + "code": " def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\n Y : array-like of shape (n_samples_X, n_features) or list of object,\\\n default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\n eval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\n K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\\\n optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when `eval_gradient`\n is True.\n \"\"\"\n if eval_gradient:\n K1, K1_gradient = self.k1(X, Y, eval_gradient=True)\n K2, K2_gradient = self.k2(X, Y, eval_gradient=True)\n return K1 + K2, np.dstack((K1_gradient, K2_gradient))\n else:\n return self.k1(X, Y) + self.k2(X, Y)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Sum/__repr__", + "name": "__repr__", + "qname": "sklearn.gaussian_process.kernels.Sum.__repr__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Sum/__repr__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Sum.__repr__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __repr__(self):\n return \"{0} + {1}\".format(self.k1, self.k2)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Sum/diag", + "name": "diag", + "qname": "sklearn.gaussian_process.kernels.Sum.diag", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Sum/diag/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.Sum.diag.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/Sum/diag/X", + "name": "X", + "qname": "sklearn.gaussian_process.kernels.Sum.diag.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_X, n_features) or list of object", + "default_value": "", + "description": "Argument to the kernel." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples_X, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to `np.diag(self(X))`; however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.", + "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to `np.diag(self(X))`; however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)", + "code": " def diag(self, X):\n \"\"\"Returns the diagonal of the kernel k(X, X).\n\n The result of this method is identical to `np.diag(self(X))`; however,\n it can be evaluated more efficiently since only the diagonal is\n evaluated.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\n Returns\n -------\n K_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)\n \"\"\"\n return self.k1.diag(X) + self.k2.diag(X)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/WhiteKernel/__call__", + "name": "__call__", + "qname": "sklearn.gaussian_process.kernels.WhiteKernel.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/WhiteKernel/__call__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.WhiteKernel.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/WhiteKernel/__call__/X", + "name": "X", + "qname": "sklearn.gaussian_process.kernels.WhiteKernel.__call__.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_X, n_features) or list of object", + "default_value": "", + "description": "Left argument of the returned kernel k(X, Y)" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples_X, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/WhiteKernel/__call__/Y", + "name": "Y", + "qname": "sklearn.gaussian_process.kernels.WhiteKernel.__call__.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_X, n_features) or list of object", + "default_value": "None", + "description": "Right argument of the returned kernel k(X, Y). If None, k(X, X)\nis evaluated instead." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples_X, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/WhiteKernel/__call__/eval_gradient", + "name": "eval_gradient", + "qname": "sklearn.gaussian_process.kernels.WhiteKernel.__call__.eval_gradient", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Determines whether the gradient with respect to the log of\nthe kernel hyperparameter is computed.\nOnly supported when Y is None." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return the kernel k(X, Y) and optionally its gradient.", + "docstring": "Return the kernel k(X, Y) and optionally its gradient.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\nY : array-like of shape (n_samples_X, n_features) or list of object, default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\neval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\nK_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when eval_gradient\n is True.", + "code": " def __call__(self, X, Y=None, eval_gradient=False):\n \"\"\"Return the kernel k(X, Y) and optionally its gradient.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features) or list of object\n Left argument of the returned kernel k(X, Y)\n\n Y : array-like of shape (n_samples_X, n_features) or list of object,\\\n default=None\n Right argument of the returned kernel k(X, Y). If None, k(X, X)\n is evaluated instead.\n\n eval_gradient : bool, default=False\n Determines whether the gradient with respect to the log of\n the kernel hyperparameter is computed.\n Only supported when Y is None.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_Y)\n Kernel k(X, Y)\n\n K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\\\n optional\n The gradient of the kernel k(X, X) with respect to the log of the\n hyperparameter of the kernel. Only returned when eval_gradient\n is True.\n \"\"\"\n if Y is not None and eval_gradient:\n raise ValueError(\"Gradient can only be evaluated when Y is None.\")\n\n if Y is None:\n K = self.noise_level * np.eye(_num_samples(X))\n if eval_gradient:\n if not self.hyperparameter_noise_level.fixed:\n return (K, self.noise_level\n * np.eye(_num_samples(X))[:, :, np.newaxis])\n else:\n return K, np.empty((_num_samples(X), _num_samples(X), 0))\n else:\n return K\n else:\n return np.zeros((_num_samples(X), _num_samples(Y)))" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/WhiteKernel/__init__", + "name": "__init__", + "qname": "sklearn.gaussian_process.kernels.WhiteKernel.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/WhiteKernel/__init__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.WhiteKernel.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/WhiteKernel/__init__/noise_level", + "name": "noise_level", + "qname": "sklearn.gaussian_process.kernels.WhiteKernel.__init__.noise_level", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Parameter controlling the noise level (variance)" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/WhiteKernel/__init__/noise_level_bounds", + "name": "noise_level_bounds", + "qname": "sklearn.gaussian_process.kernels.WhiteKernel.__init__.noise_level_bounds", + "default_value": "(1e-05, 100000.0)", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "pair of floats >= 0 or \"fixed\"", + "default_value": "(1e-5, 1e5)", + "description": "The lower and upper bound on 'noise_level'.\nIf set to \"fixed\", 'noise_level' cannot be changed during\nhyperparameter tuning." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "pair of floats >= 0" + }, + { + "kind": "NamedType", + "name": "\"fixed\"" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "White kernel.\n\nThe main use-case of this kernel is as part of a sum-kernel where it\nexplains the noise of the signal as independently and identically\nnormally-distributed. The parameter noise_level equals the variance of this\nnoise.\n\n.. math::\n k(x_1, x_2) = noise\\_level \\text{ if } x_i == x_j \\text{ else } 0\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "", + "code": " def __init__(self, noise_level=1.0, noise_level_bounds=(1e-5, 1e5)):\n self.noise_level = noise_level\n self.noise_level_bounds = noise_level_bounds" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/WhiteKernel/__repr__", + "name": "__repr__", + "qname": "sklearn.gaussian_process.kernels.WhiteKernel.__repr__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/WhiteKernel/__repr__/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.WhiteKernel.__repr__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __repr__(self):\n return \"{0}(noise_level={1:.3g})\".format(self.__class__.__name__,\n self.noise_level)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/WhiteKernel/diag", + "name": "diag", + "qname": "sklearn.gaussian_process.kernels.WhiteKernel.diag", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/WhiteKernel/diag/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.WhiteKernel.diag.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/WhiteKernel/diag/X", + "name": "X", + "qname": "sklearn.gaussian_process.kernels.WhiteKernel.diag.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_X, n_features) or list of object", + "default_value": "", + "description": "Argument to the kernel." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples_X, n_features)" + }, + { + "kind": "NamedType", + "name": "list of object" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.", + "docstring": "Returns the diagonal of the kernel k(X, X).\n\nThe result of this method is identical to np.diag(self(X)); however,\nit can be evaluated more efficiently since only the diagonal is\nevaluated.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\nReturns\n-------\nK_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)", + "code": " def diag(self, X):\n \"\"\"Returns the diagonal of the kernel k(X, X).\n\n The result of this method is identical to np.diag(self(X)); however,\n it can be evaluated more efficiently since only the diagonal is\n evaluated.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features) or list of object\n Argument to the kernel.\n\n Returns\n -------\n K_diag : ndarray of shape (n_samples_X,)\n Diagonal of kernel k(X, X)\n \"\"\"\n return np.full(_num_samples(X), self.noise_level,\n dtype=np.array(self.noise_level).dtype)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/WhiteKernel/hyperparameter_noise_level@getter", + "name": "hyperparameter_noise_level", + "qname": "sklearn.gaussian_process.kernels.WhiteKernel.hyperparameter_noise_level", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/WhiteKernel/hyperparameter_noise_level/self", + "name": "self", + "qname": "sklearn.gaussian_process.kernels.WhiteKernel.hyperparameter_noise_level.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def hyperparameter_noise_level(self):\n return Hyperparameter(\n \"noise_level\", \"numeric\", self.noise_level_bounds)" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/_approx_fprime", + "name": "_approx_fprime", + "qname": "sklearn.gaussian_process.kernels._approx_fprime", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/_approx_fprime/xk", + "name": "xk", + "qname": "sklearn.gaussian_process.kernels._approx_fprime.xk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/_approx_fprime/f", + "name": "f", + "qname": "sklearn.gaussian_process.kernels._approx_fprime.f", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/_approx_fprime/epsilon", + "name": "epsilon", + "qname": "sklearn.gaussian_process.kernels._approx_fprime.epsilon", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/_approx_fprime/args", + "name": "args", + "qname": "sklearn.gaussian_process.kernels._approx_fprime.args", + "default_value": "()", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _approx_fprime(xk, f, epsilon, args=()):\n f0 = f(*((xk,) + args))\n grad = np.zeros((f0.shape[0], f0.shape[1], len(xk)), float)\n ei = np.zeros((len(xk), ), float)\n for k in range(len(xk)):\n ei[k] = 1.0\n d = epsilon * ei\n grad[:, :, k] = (f(*((xk + d,) + args)) - f0) / d[k]\n ei[k] = 0.0\n return grad" + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/_check_length_scale", + "name": "_check_length_scale", + "qname": "sklearn.gaussian_process.kernels._check_length_scale", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/_check_length_scale/X", + "name": "X", + "qname": "sklearn.gaussian_process.kernels._check_length_scale.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.gaussian_process.kernels/_check_length_scale/length_scale", + "name": "length_scale", + "qname": "sklearn.gaussian_process.kernels._check_length_scale.length_scale", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _check_length_scale(X, length_scale):\n length_scale = np.squeeze(length_scale).astype(float)\n if np.ndim(length_scale) > 1:\n raise ValueError(\"length_scale cannot be of dimension greater than 1\")\n if np.ndim(length_scale) == 1 and X.shape[1] != length_scale.shape[0]:\n raise ValueError(\"Anisotropic kernel must have the same number of \"\n \"dimensions as data (%d!=%d)\"\n % (length_scale.shape[0], X.shape[1]))\n return length_scale" + }, + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/__init__", + "name": "__init__", + "qname": "sklearn.impute._base.MissingIndicator.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/__init__/self", + "name": "self", + "qname": "sklearn.impute._base.MissingIndicator.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/__init__/missing_values", + "name": "missing_values", + "qname": "sklearn.impute._base.MissingIndicator.__init__.missing_values", + "default_value": "np.nan", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, float, string, np.nan or None", + "default_value": "np.nan", + "description": "The placeholder for the missing values. All occurrences of\n`missing_values` will be imputed. For pandas' dataframes with\nnullable integer dtypes with missing values, `missing_values`\nshould be set to `np.nan`, since `pd.NA` will be converted to `np.nan`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "string" + }, + { + "kind": "NamedType", + "name": "np.nan" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/__init__/features", + "name": "features", + "qname": "sklearn.impute._base.MissingIndicator.__init__.features", + "default_value": "'missing-only'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'missing-only', 'all'}", + "default_value": "'missing-only'", + "description": "Whether the imputer mask should represent all or a subset of\nfeatures.\n\n- If 'missing-only' (default), the imputer mask will only represent\n features containing missing values during fit time.\n- If 'all', the imputer mask will represent all features." + }, + "type": { + "kind": "EnumType", + "values": ["all", "missing-only"] + } + }, + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/__init__/sparse", + "name": "sparse", + "qname": "sklearn.impute._base.MissingIndicator.__init__.sparse", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or 'auto'", + "default_value": "'auto'", + "description": "Whether the imputer mask format should be sparse or dense.\n\n- If 'auto' (default), the imputer mask will be of same type as\n input.\n- If True, the imputer mask will be a sparse matrix.\n- If False, the imputer mask will be a numpy array." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "'auto'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/__init__/error_on_new", + "name": "error_on_new", + "qname": "sklearn.impute._base.MissingIndicator.__init__.error_on_new", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, transform will raise an error when there are features with\nmissing values in transform that have no missing values in fit. This is\napplicable only when `features='missing-only'`." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Binary indicators for missing values.\n\nNote that this component typically should not be used in a vanilla\n:class:`Pipeline` consisting of transformers and a classifier, but rather\ncould be added using a :class:`FeatureUnion` or :class:`ColumnTransformer`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, missing_values=np.nan, features=\"missing-only\",\n sparse=\"auto\", error_on_new=True):\n self.missing_values = missing_values\n self.features = features\n self.sparse = sparse\n self.error_on_new = error_on_new" + }, + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/_fit", + "name": "_fit", + "qname": "sklearn.impute._base.MissingIndicator._fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/_fit/self", + "name": "self", + "qname": "sklearn.impute._base.MissingIndicator._fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/_fit/X", + "name": "X", + "qname": "sklearn.impute._base.MissingIndicator._fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "Input data, where ``n_samples`` is the number of samples and\n``n_features`` is the number of features.\nIf `precomputed` is True, then `X` is a mask of the\ninput data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/_fit/y", + "name": "y", + "qname": "sklearn.impute._base.MissingIndicator._fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/_fit/precomputed", + "name": "precomputed", + "qname": "sklearn.impute._base.MissingIndicator._fit.precomputed", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "Whether the input data is a mask." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the transformer on X.", + "docstring": "Fit the transformer on X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Input data, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features.\n If `precomputed` is True, then `X` is a mask of the\n input data.\n\nprecomputed : bool\n Whether the input data is a mask.\n\nReturns\n-------\nimputer_mask : {ndarray or sparse matrix}, shape (n_samples, n_features)\n The imputer mask of the original data.", + "code": " def _fit(self, X, y=None, precomputed=False):\n \"\"\"Fit the transformer on X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n Input data, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features.\n If `precomputed` is True, then `X` is a mask of the\n input data.\n\n precomputed : bool\n Whether the input data is a mask.\n\n Returns\n -------\n imputer_mask : {ndarray or sparse matrix}, shape (n_samples, \\\n n_features)\n The imputer mask of the original data.\n\n \"\"\"\n if precomputed:\n if not (hasattr(X, 'dtype') and X.dtype.kind == 'b'):\n raise ValueError(\"precomputed is True but the input data is \"\n \"not a mask\")\n self._precomputed = True\n else:\n self._precomputed = False\n\n # Need not validate X again as it would have already been validated\n # in the Imputer calling MissingIndicator\n if not self._precomputed:\n X = self._validate_input(X, in_fit=True)\n\n self._n_features = X.shape[1]\n\n if self.features not in ('missing-only', 'all'):\n raise ValueError(\"'features' has to be either 'missing-only' or \"\n \"'all'. Got {} instead.\".format(self.features))\n\n if not ((isinstance(self.sparse, str) and\n self.sparse == \"auto\") or isinstance(self.sparse, bool)):\n raise ValueError(\"'sparse' has to be a boolean or 'auto'. \"\n \"Got {!r} instead.\".format(self.sparse))\n\n missing_features_info = self._get_missing_features_info(X)\n self.features_ = missing_features_info[1]\n\n return missing_features_info[0]" + }, + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/_get_missing_features_info", + "name": "_get_missing_features_info", + "qname": "sklearn.impute._base.MissingIndicator._get_missing_features_info", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/_get_missing_features_info/self", + "name": "self", + "qname": "sklearn.impute._base.MissingIndicator._get_missing_features_info.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/_get_missing_features_info/X", + "name": "X", + "qname": "sklearn.impute._base.MissingIndicator._get_missing_features_info.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{ndarray or sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "The input data with missing values. Note that ``X`` has been\nchecked in ``fit`` and ``transform`` before to call this function." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the imputer mask and the indices of the features\ncontaining missing values.", + "docstring": "Compute the imputer mask and the indices of the features\ncontaining missing values.\n\nParameters\n----------\nX : {ndarray or sparse matrix}, shape (n_samples, n_features)\n The input data with missing values. Note that ``X`` has been\n checked in ``fit`` and ``transform`` before to call this function.\n\nReturns\n-------\nimputer_mask : {ndarray or sparse matrix}, shape (n_samples, n_features)\n The imputer mask of the original data.\n\nfeatures_with_missing : ndarray, shape (n_features_with_missing)\n The features containing missing values.", + "code": " def _get_missing_features_info(self, X):\n \"\"\"Compute the imputer mask and the indices of the features\n containing missing values.\n\n Parameters\n ----------\n X : {ndarray or sparse matrix}, shape (n_samples, n_features)\n The input data with missing values. Note that ``X`` has been\n checked in ``fit`` and ``transform`` before to call this function.\n\n Returns\n -------\n imputer_mask : {ndarray or sparse matrix}, shape \\\n (n_samples, n_features)\n The imputer mask of the original data.\n\n features_with_missing : ndarray, shape (n_features_with_missing)\n The features containing missing values.\n\n \"\"\"\n if not self._precomputed:\n imputer_mask = _get_mask(X, self.missing_values)\n else:\n imputer_mask = X\n\n if sp.issparse(X):\n imputer_mask.eliminate_zeros()\n\n if self.features == 'missing-only':\n n_missing = imputer_mask.getnnz(axis=0)\n\n if self.sparse is False:\n imputer_mask = imputer_mask.toarray()\n elif imputer_mask.format == 'csr':\n imputer_mask = imputer_mask.tocsc()\n else:\n if not self._precomputed:\n imputer_mask = _get_mask(X, self.missing_values)\n else:\n imputer_mask = X\n\n if self.features == 'missing-only':\n n_missing = imputer_mask.sum(axis=0)\n\n if self.sparse is True:\n imputer_mask = sp.csc_matrix(imputer_mask)\n\n if self.features == 'all':\n features_indices = np.arange(X.shape[1])\n else:\n features_indices = np.flatnonzero(n_missing)\n\n return imputer_mask, features_indices" + }, + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/_more_tags", + "name": "_more_tags", + "qname": "sklearn.impute._base.MissingIndicator._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/_more_tags/self", + "name": "self", + "qname": "sklearn.impute._base.MissingIndicator._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n \"allow_nan\": True,\n \"X_types\": [\"2darray\", \"string\"],\n \"preserves_dtype\": [],\n }" + }, + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/_validate_input", + "name": "_validate_input", + "qname": "sklearn.impute._base.MissingIndicator._validate_input", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/_validate_input/self", + "name": "self", + "qname": "sklearn.impute._base.MissingIndicator._validate_input.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/_validate_input/X", + "name": "X", + "qname": "sklearn.impute._base.MissingIndicator._validate_input.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/_validate_input/in_fit", + "name": "in_fit", + "qname": "sklearn.impute._base.MissingIndicator._validate_input.in_fit", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _validate_input(self, X, in_fit):\n if not is_scalar_nan(self.missing_values):\n force_all_finite = True\n else:\n force_all_finite = \"allow-nan\"\n X = self._validate_data(X, reset=in_fit,\n accept_sparse=('csc', 'csr'), dtype=None,\n force_all_finite=force_all_finite)\n _check_inputs_dtype(X, self.missing_values)\n if X.dtype.kind not in (\"i\", \"u\", \"f\", \"O\"):\n raise ValueError(\"MissingIndicator does not support data with \"\n \"dtype {0}. Please provide either a numeric array\"\n \" (with a floating point or integer dtype) or \"\n \"categorical data represented either as an array \"\n \"with integer dtype or an array of string values \"\n \"with an object dtype.\".format(X.dtype))\n\n if sp.issparse(X) and self.missing_values == 0:\n # missing_values = 0 not allowed with sparse data as it would\n # force densification\n raise ValueError(\"Sparse input with missing_values=0 is \"\n \"not supported. Provide a dense \"\n \"array instead.\")\n\n return X" + }, + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/fit", + "name": "fit", + "qname": "sklearn.impute._base.MissingIndicator.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/fit/self", + "name": "self", + "qname": "sklearn.impute._base.MissingIndicator.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/fit/X", + "name": "X", + "qname": "sklearn.impute._base.MissingIndicator.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "Input data, where ``n_samples`` is the number of samples and\n``n_features`` is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/fit/y", + "name": "y", + "qname": "sklearn.impute._base.MissingIndicator.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the transformer on X.", + "docstring": "Fit the transformer on X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Input data, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features.\n\nReturns\n-------\nself : object\n Returns self.", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the transformer on X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n Input data, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features.\n\n Returns\n -------\n self : object\n Returns self.\n \"\"\"\n self._fit(X, y)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/fit_transform", + "name": "fit_transform", + "qname": "sklearn.impute._base.MissingIndicator.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/fit_transform/self", + "name": "self", + "qname": "sklearn.impute._base.MissingIndicator.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/fit_transform/X", + "name": "X", + "qname": "sklearn.impute._base.MissingIndicator.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "The input data to complete." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/fit_transform/y", + "name": "y", + "qname": "sklearn.impute._base.MissingIndicator.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate missing values indicator for X.", + "docstring": "Generate missing values indicator for X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data to complete.\n\nReturns\n-------\nXt : {ndarray or sparse matrix}, shape (n_samples, n_features) or (n_samples, n_features_with_missing)\n The missing indicator for input data. The data type of ``Xt``\n will be boolean.", + "code": " def fit_transform(self, X, y=None):\n \"\"\"Generate missing values indicator for X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data to complete.\n\n Returns\n -------\n Xt : {ndarray or sparse matrix}, shape (n_samples, n_features) \\\n or (n_samples, n_features_with_missing)\n The missing indicator for input data. The data type of ``Xt``\n will be boolean.\n\n \"\"\"\n imputer_mask = self._fit(X, y)\n\n if self.features_.size < self._n_features:\n imputer_mask = imputer_mask[:, self.features_]\n\n return imputer_mask" + }, + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/transform", + "name": "transform", + "qname": "sklearn.impute._base.MissingIndicator.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/transform/self", + "name": "self", + "qname": "sklearn.impute._base.MissingIndicator.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/MissingIndicator/transform/X", + "name": "X", + "qname": "sklearn.impute._base.MissingIndicator.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "The input data to complete." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate missing values indicator for X.", + "docstring": "Generate missing values indicator for X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data to complete.\n\nReturns\n-------\nXt : {ndarray or sparse matrix}, shape (n_samples, n_features) or (n_samples, n_features_with_missing)\n The missing indicator for input data. The data type of ``Xt``\n will be boolean.", + "code": " def transform(self, X):\n \"\"\"Generate missing values indicator for X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data to complete.\n\n Returns\n -------\n Xt : {ndarray or sparse matrix}, shape (n_samples, n_features) \\\n or (n_samples, n_features_with_missing)\n The missing indicator for input data. The data type of ``Xt``\n will be boolean.\n\n \"\"\"\n check_is_fitted(self)\n\n # Need not validate X again as it would have already been validated\n # in the Imputer calling MissingIndicator\n if not self._precomputed:\n X = self._validate_input(X, in_fit=False)\n else:\n if not (hasattr(X, 'dtype') and X.dtype.kind == 'b'):\n raise ValueError(\"precomputed is True but the input data is \"\n \"not a mask\")\n\n imputer_mask, features = self._get_missing_features_info(X)\n\n if self.features == \"missing-only\":\n features_diff_fit_trans = np.setdiff1d(features, self.features_)\n if (self.error_on_new and features_diff_fit_trans.size > 0):\n raise ValueError(\"The features {} have missing values \"\n \"in transform but have no missing values \"\n \"in fit.\".format(features_diff_fit_trans))\n\n if self.features_.size < self._n_features:\n imputer_mask = imputer_mask[:, self.features_]\n\n return imputer_mask" + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/__init__", + "name": "__init__", + "qname": "sklearn.impute._base.SimpleImputer.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/__init__/self", + "name": "self", + "qname": "sklearn.impute._base.SimpleImputer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/__init__/missing_values", + "name": "missing_values", + "qname": "sklearn.impute._base.SimpleImputer.__init__.missing_values", + "default_value": "np.nan", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, float, str, np.nan or None", + "default_value": "np.nan", + "description": "The placeholder for the missing values. All occurrences of\n`missing_values` will be imputed. For pandas' dataframes with\nnullable integer dtypes with missing values, `missing_values`\nshould be set to `np.nan`, since `pd.NA` will be converted to `np.nan`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "np.nan" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/__init__/strategy", + "name": "strategy", + "qname": "sklearn.impute._base.SimpleImputer.__init__.strategy", + "default_value": "'mean'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "string", + "default_value": "'mean'", + "description": "The imputation strategy.\n\n- If \"mean\", then replace missing values using the mean along\n each column. Can only be used with numeric data.\n- If \"median\", then replace missing values using the median along\n each column. Can only be used with numeric data.\n- If \"most_frequent\", then replace missing using the most frequent\n value along each column. Can be used with strings or numeric data.\n If there is more than one such value, only the smallest is returned.\n- If \"constant\", then replace missing values with fill_value. Can be\n used with strings or numeric data.\n\n.. versionadded:: 0.20\n strategy=\"constant\" for fixed value imputation." + }, + "type": { + "kind": "NamedType", + "name": "string" + } + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/__init__/fill_value", + "name": "fill_value", + "qname": "sklearn.impute._base.SimpleImputer.__init__.fill_value", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "string or numerical value", + "default_value": "None", + "description": "When strategy == \"constant\", fill_value is used to replace all\noccurrences of missing_values.\nIf left to the default, fill_value will be 0 when imputing numerical\ndata and \"missing_value\" for strings or object data types." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "string" + }, + { + "kind": "NamedType", + "name": "numerical value" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/__init__/verbose", + "name": "verbose", + "qname": "sklearn.impute._base.SimpleImputer.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "integer", + "default_value": "0", + "description": "Controls the verbosity of the imputer." + }, + "type": { + "kind": "NamedType", + "name": "integer" + } + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/__init__/copy", + "name": "copy", + "qname": "sklearn.impute._base.SimpleImputer.__init__.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "boolean", + "default_value": "True", + "description": "If True, a copy of X will be created. If False, imputation will\nbe done in-place whenever possible. Note that, in the following cases,\na new copy will always be made, even if `copy=False`:\n\n- If X is not an array of floating values;\n- If X is encoded as a CSR matrix;\n- If add_indicator=True." + }, + "type": { + "kind": "NamedType", + "name": "boolean" + } + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/__init__/add_indicator", + "name": "add_indicator", + "qname": "sklearn.impute._base.SimpleImputer.__init__.add_indicator", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "boolean", + "default_value": "False", + "description": "If True, a :class:`MissingIndicator` transform will stack onto output\nof the imputer's transform. This allows a predictive estimator\nto account for missingness despite imputation. If a feature has no\nmissing values at fit/train time, the feature won't appear on\nthe missing indicator even if there are missing values at\ntransform/test time." + }, + "type": { + "kind": "NamedType", + "name": "boolean" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Imputation transformer for completing missing values.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n `SimpleImputer` replaces the previous `sklearn.preprocessing.Imputer`\n estimator which is now removed.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, missing_values=np.nan, strategy=\"mean\",\n fill_value=None, verbose=0, copy=True, add_indicator=False):\n super().__init__(\n missing_values=missing_values,\n add_indicator=add_indicator\n )\n self.strategy = strategy\n self.fill_value = fill_value\n self.verbose = verbose\n self.copy = copy" + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/_dense_fit", + "name": "_dense_fit", + "qname": "sklearn.impute._base.SimpleImputer._dense_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/_dense_fit/self", + "name": "self", + "qname": "sklearn.impute._base.SimpleImputer._dense_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/_dense_fit/X", + "name": "X", + "qname": "sklearn.impute._base.SimpleImputer._dense_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/_dense_fit/strategy", + "name": "strategy", + "qname": "sklearn.impute._base.SimpleImputer._dense_fit.strategy", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/_dense_fit/missing_values", + "name": "missing_values", + "qname": "sklearn.impute._base.SimpleImputer._dense_fit.missing_values", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/_dense_fit/fill_value", + "name": "fill_value", + "qname": "sklearn.impute._base.SimpleImputer._dense_fit.fill_value", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the transformer on dense data.", + "docstring": "Fit the transformer on dense data.", + "code": " def _dense_fit(self, X, strategy, missing_values, fill_value):\n \"\"\"Fit the transformer on dense data.\"\"\"\n missing_mask = _get_mask(X, missing_values)\n masked_X = ma.masked_array(X, mask=missing_mask)\n\n super()._fit_indicator(missing_mask)\n\n # Mean\n if strategy == \"mean\":\n mean_masked = np.ma.mean(masked_X, axis=0)\n # Avoid the warning \"Warning: converting a masked element to nan.\"\n mean = np.ma.getdata(mean_masked)\n mean[np.ma.getmask(mean_masked)] = np.nan\n\n return mean\n\n # Median\n elif strategy == \"median\":\n median_masked = np.ma.median(masked_X, axis=0)\n # Avoid the warning \"Warning: converting a masked element to nan.\"\n median = np.ma.getdata(median_masked)\n median[np.ma.getmaskarray(median_masked)] = np.nan\n\n return median\n\n # Most frequent\n elif strategy == \"most_frequent\":\n # Avoid use of scipy.stats.mstats.mode due to the required\n # additional overhead and slow benchmarking performance.\n # See Issue 14325 and PR 14399 for full discussion.\n\n # To be able access the elements by columns\n X = X.transpose()\n mask = missing_mask.transpose()\n\n if X.dtype.kind == \"O\":\n most_frequent = np.empty(X.shape[0], dtype=object)\n else:\n most_frequent = np.empty(X.shape[0])\n\n for i, (row, row_mask) in enumerate(zip(X[:], mask[:])):\n row_mask = np.logical_not(row_mask).astype(bool)\n row = row[row_mask]\n most_frequent[i] = _most_frequent(row, np.nan, 0)\n\n return most_frequent\n\n # Constant\n elif strategy == \"constant\":\n # for constant strategy, self.statistcs_ is used to store\n # fill_value in each column\n return np.full(X.shape[1], fill_value, dtype=X.dtype)" + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/_sparse_fit", + "name": "_sparse_fit", + "qname": "sklearn.impute._base.SimpleImputer._sparse_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/_sparse_fit/self", + "name": "self", + "qname": "sklearn.impute._base.SimpleImputer._sparse_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/_sparse_fit/X", + "name": "X", + "qname": "sklearn.impute._base.SimpleImputer._sparse_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/_sparse_fit/strategy", + "name": "strategy", + "qname": "sklearn.impute._base.SimpleImputer._sparse_fit.strategy", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/_sparse_fit/missing_values", + "name": "missing_values", + "qname": "sklearn.impute._base.SimpleImputer._sparse_fit.missing_values", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/_sparse_fit/fill_value", + "name": "fill_value", + "qname": "sklearn.impute._base.SimpleImputer._sparse_fit.fill_value", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the transformer on sparse data.", + "docstring": "Fit the transformer on sparse data.", + "code": " def _sparse_fit(self, X, strategy, missing_values, fill_value):\n \"\"\"Fit the transformer on sparse data.\"\"\"\n missing_mask = _get_mask(X, missing_values)\n mask_data = missing_mask.data\n n_implicit_zeros = X.shape[0] - np.diff(X.indptr)\n\n statistics = np.empty(X.shape[1])\n\n if strategy == \"constant\":\n # for constant strategy, self.statistcs_ is used to store\n # fill_value in each column\n statistics.fill(fill_value)\n else:\n for i in range(X.shape[1]):\n column = X.data[X.indptr[i]:X.indptr[i + 1]]\n mask_column = mask_data[X.indptr[i]:X.indptr[i + 1]]\n column = column[~mask_column]\n\n # combine explicit and implicit zeros\n mask_zeros = _get_mask(column, 0)\n column = column[~mask_zeros]\n n_explicit_zeros = mask_zeros.sum()\n n_zeros = n_implicit_zeros[i] + n_explicit_zeros\n\n if strategy == \"mean\":\n s = column.size + n_zeros\n statistics[i] = np.nan if s == 0 else column.sum() / s\n\n elif strategy == \"median\":\n statistics[i] = _get_median(column,\n n_zeros)\n\n elif strategy == \"most_frequent\":\n statistics[i] = _most_frequent(column,\n 0,\n n_zeros)\n super()._fit_indicator(missing_mask)\n\n return statistics" + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/_validate_input", + "name": "_validate_input", + "qname": "sklearn.impute._base.SimpleImputer._validate_input", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/_validate_input/self", + "name": "self", + "qname": "sklearn.impute._base.SimpleImputer._validate_input.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/_validate_input/X", + "name": "X", + "qname": "sklearn.impute._base.SimpleImputer._validate_input.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/_validate_input/in_fit", + "name": "in_fit", + "qname": "sklearn.impute._base.SimpleImputer._validate_input.in_fit", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _validate_input(self, X, in_fit):\n allowed_strategies = [\"mean\", \"median\", \"most_frequent\", \"constant\"]\n if self.strategy not in allowed_strategies:\n raise ValueError(\"Can only use these strategies: {0} \"\n \" got strategy={1}\".format(allowed_strategies,\n self.strategy))\n\n if self.strategy in (\"most_frequent\", \"constant\"):\n # If input is a list of strings, dtype = object.\n # Otherwise ValueError is raised in SimpleImputer\n # with strategy='most_frequent' or 'constant'\n # because the list is converted to Unicode numpy array\n if isinstance(X, list) and \\\n any(isinstance(elem, str) for row in X for elem in row):\n dtype = object\n else:\n dtype = None\n else:\n dtype = FLOAT_DTYPES\n\n if not is_scalar_nan(self.missing_values):\n force_all_finite = True\n else:\n force_all_finite = \"allow-nan\"\n\n try:\n X = self._validate_data(X, reset=in_fit,\n accept_sparse='csc', dtype=dtype,\n force_all_finite=force_all_finite,\n copy=self.copy)\n except ValueError as ve:\n if \"could not convert\" in str(ve):\n new_ve = ValueError(\"Cannot use {} strategy with non-numeric \"\n \"data:\\n{}\".format(self.strategy, ve))\n raise new_ve from None\n else:\n raise ve\n\n _check_inputs_dtype(X, self.missing_values)\n if X.dtype.kind not in (\"i\", \"u\", \"f\", \"O\"):\n raise ValueError(\"SimpleImputer does not support data with dtype \"\n \"{0}. Please provide either a numeric array (with\"\n \" a floating point or integer dtype) or \"\n \"categorical data represented either as an array \"\n \"with integer dtype or an array of string values \"\n \"with an object dtype.\".format(X.dtype))\n\n return X" + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/fit", + "name": "fit", + "qname": "sklearn.impute._base.SimpleImputer.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/fit/self", + "name": "self", + "qname": "sklearn.impute._base.SimpleImputer.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/fit/X", + "name": "X", + "qname": "sklearn.impute._base.SimpleImputer.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "Input data, where ``n_samples`` is the number of samples and\n``n_features`` is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/fit/y", + "name": "y", + "qname": "sklearn.impute._base.SimpleImputer.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the imputer on X.", + "docstring": "Fit the imputer on X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Input data, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features.\n\nReturns\n-------\nself : SimpleImputer", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the imputer on X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n Input data, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features.\n\n Returns\n -------\n self : SimpleImputer\n \"\"\"\n X = self._validate_input(X, in_fit=True)\n\n # default fill_value is 0 for numerical input and \"missing_value\"\n # otherwise\n if self.fill_value is None:\n if X.dtype.kind in (\"i\", \"u\", \"f\"):\n fill_value = 0\n else:\n fill_value = \"missing_value\"\n else:\n fill_value = self.fill_value\n\n # fill_value should be numerical in case of numerical input\n if (self.strategy == \"constant\" and\n X.dtype.kind in (\"i\", \"u\", \"f\") and\n not isinstance(fill_value, numbers.Real)):\n raise ValueError(\"'fill_value'={0} is invalid. Expected a \"\n \"numerical value when imputing numerical \"\n \"data\".format(fill_value))\n\n if sp.issparse(X):\n # missing_values = 0 not allowed with sparse data as it would\n # force densification\n if self.missing_values == 0:\n raise ValueError(\"Imputation not possible when missing_values \"\n \"== 0 and input is sparse. Provide a dense \"\n \"array instead.\")\n else:\n self.statistics_ = self._sparse_fit(X,\n self.strategy,\n self.missing_values,\n fill_value)\n\n else:\n self.statistics_ = self._dense_fit(X,\n self.strategy,\n self.missing_values,\n fill_value)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.impute._base.SimpleImputer.inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/inverse_transform/self", + "name": "self", + "qname": "sklearn.impute._base.SimpleImputer.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/inverse_transform/X", + "name": "X", + "qname": "sklearn.impute._base.SimpleImputer.inverse_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features + n_features_missing_indicator)", + "default_value": "", + "description": "The imputed data to be reverted to original data. It has to be\nan augmented array of imputed data and the missing indicator mask." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features + n_features_missing_indicator)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Convert the data back to the original representation.\n\nInverts the `transform` operation performed on an array.\nThis operation can only be performed after :class:`SimpleImputer` is\ninstantiated with `add_indicator=True`.\n\nNote that ``inverse_transform`` can only invert the transform in\nfeatures that have binary indicators for missing values. If a feature\nhas no missing values at ``fit`` time, the feature won't have a binary\nindicator, and the imputation done at ``transform`` time won't be\ninverted.\n\n.. versionadded:: 0.24", + "docstring": "Convert the data back to the original representation.\n\nInverts the `transform` operation performed on an array.\nThis operation can only be performed after :class:`SimpleImputer` is\ninstantiated with `add_indicator=True`.\n\nNote that ``inverse_transform`` can only invert the transform in\nfeatures that have binary indicators for missing values. If a feature\nhas no missing values at ``fit`` time, the feature won't have a binary\nindicator, and the imputation done at ``transform`` time won't be\ninverted.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features + n_features_missing_indicator)\n The imputed data to be reverted to original data. It has to be\n an augmented array of imputed data and the missing indicator mask.\n\nReturns\n-------\nX_original : ndarray of shape (n_samples, n_features)\n The original X with missing values as it was prior\n to imputation.", + "code": " def inverse_transform(self, X):\n \"\"\"Convert the data back to the original representation.\n\n Inverts the `transform` operation performed on an array.\n This operation can only be performed after :class:`SimpleImputer` is\n instantiated with `add_indicator=True`.\n\n Note that ``inverse_transform`` can only invert the transform in\n features that have binary indicators for missing values. If a feature\n has no missing values at ``fit`` time, the feature won't have a binary\n indicator, and the imputation done at ``transform`` time won't be\n inverted.\n\n .. versionadded:: 0.24\n\n Parameters\n ----------\n X : array-like of shape \\\n (n_samples, n_features + n_features_missing_indicator)\n The imputed data to be reverted to original data. It has to be\n an augmented array of imputed data and the missing indicator mask.\n\n Returns\n -------\n X_original : ndarray of shape (n_samples, n_features)\n The original X with missing values as it was prior\n to imputation.\n \"\"\"\n check_is_fitted(self)\n\n if not self.add_indicator:\n raise ValueError(\"'inverse_transform' works only when \"\n \"'SimpleImputer' is instantiated with \"\n \"'add_indicator=True'. \"\n f\"Got 'add_indicator={self.add_indicator}' \"\n \"instead.\")\n\n n_features_missing = len(self.indicator_.features_)\n non_empty_feature_count = X.shape[1] - n_features_missing\n array_imputed = X[:, :non_empty_feature_count].copy()\n missing_mask = X[:, non_empty_feature_count:].astype(bool)\n\n n_features_original = len(self.statistics_)\n shape_original = (X.shape[0], n_features_original)\n X_original = np.zeros(shape_original)\n X_original[:, self.indicator_.features_] = missing_mask\n full_mask = X_original.astype(bool)\n\n imputed_idx, original_idx = 0, 0\n while imputed_idx < len(array_imputed.T):\n if not np.all(X_original[:, original_idx]):\n X_original[:, original_idx] = array_imputed.T[imputed_idx]\n imputed_idx += 1\n original_idx += 1\n else:\n original_idx += 1\n\n X_original[full_mask] = self.missing_values\n return X_original" + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/transform", + "name": "transform", + "qname": "sklearn.impute._base.SimpleImputer.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/transform/self", + "name": "self", + "qname": "sklearn.impute._base.SimpleImputer.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/SimpleImputer/transform/X", + "name": "X", + "qname": "sklearn.impute._base.SimpleImputer.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "The input data to complete." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Impute all missing values in X.", + "docstring": "Impute all missing values in X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data to complete.", + "code": " def transform(self, X):\n \"\"\"Impute all missing values in X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data to complete.\n \"\"\"\n check_is_fitted(self)\n\n X = self._validate_input(X, in_fit=False)\n statistics = self.statistics_\n\n if X.shape[1] != statistics.shape[0]:\n raise ValueError(\"X has %d features per sample, expected %d\"\n % (X.shape[1], self.statistics_.shape[0]))\n\n # compute mask before eliminating invalid features\n missing_mask = _get_mask(X, self.missing_values)\n\n # Delete the invalid columns if strategy is not constant\n if self.strategy == \"constant\":\n valid_statistics = statistics\n valid_statistics_indexes = None\n else:\n # same as np.isnan but also works for object dtypes\n invalid_mask = _get_mask(statistics, np.nan)\n valid_mask = np.logical_not(invalid_mask)\n valid_statistics = statistics[valid_mask]\n valid_statistics_indexes = np.flatnonzero(valid_mask)\n\n if invalid_mask.any():\n missing = np.arange(X.shape[1])[invalid_mask]\n if self.verbose:\n warnings.warn(\"Deleting features without \"\n \"observed values: %s\" % missing)\n X = X[:, valid_statistics_indexes]\n\n # Do actual imputation\n if sp.issparse(X):\n if self.missing_values == 0:\n raise ValueError(\"Imputation not possible when missing_values \"\n \"== 0 and input is sparse. Provide a dense \"\n \"array instead.\")\n else:\n # if no invalid statistics are found, use the mask computed\n # before, else recompute mask\n if valid_statistics_indexes is None:\n mask = missing_mask.data\n else:\n mask = _get_mask(X.data, self.missing_values)\n indexes = np.repeat(\n np.arange(len(X.indptr) - 1, dtype=int),\n np.diff(X.indptr))[mask]\n\n X.data[mask] = valid_statistics[indexes].astype(X.dtype,\n copy=False)\n else:\n # use mask computed before eliminating invalid mask\n if valid_statistics_indexes is None:\n mask_valid_features = missing_mask\n else:\n mask_valid_features = missing_mask[:, valid_statistics_indexes]\n n_missing = np.sum(mask_valid_features, axis=0)\n values = np.repeat(valid_statistics, n_missing)\n coordinates = np.where(mask_valid_features.transpose())[::-1]\n\n X[coordinates] = values\n\n X_indicator = super()._transform_indicator(missing_mask)\n\n return super()._concatenate_indicator(X, X_indicator)" + }, + { + "id": "scikit-learn/sklearn.impute._base/_BaseImputer/__init__", + "name": "__init__", + "qname": "sklearn.impute._base._BaseImputer.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._base/_BaseImputer/__init__/self", + "name": "self", + "qname": "sklearn.impute._base._BaseImputer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/_BaseImputer/__init__/missing_values", + "name": "missing_values", + "qname": "sklearn.impute._base._BaseImputer.__init__.missing_values", + "default_value": "np.nan", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/_BaseImputer/__init__/add_indicator", + "name": "add_indicator", + "qname": "sklearn.impute._base._BaseImputer.__init__.add_indicator", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base class for all imputers.\n\nIt adds automatically support for `add_indicator`.", + "docstring": "", + "code": " def __init__(self, *, missing_values=np.nan, add_indicator=False):\n self.missing_values = missing_values\n self.add_indicator = add_indicator" + }, + { + "id": "scikit-learn/sklearn.impute._base/_BaseImputer/_concatenate_indicator", + "name": "_concatenate_indicator", + "qname": "sklearn.impute._base._BaseImputer._concatenate_indicator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._base/_BaseImputer/_concatenate_indicator/self", + "name": "self", + "qname": "sklearn.impute._base._BaseImputer._concatenate_indicator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/_BaseImputer/_concatenate_indicator/X_imputed", + "name": "X_imputed", + "qname": "sklearn.impute._base._BaseImputer._concatenate_indicator.X_imputed", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/_BaseImputer/_concatenate_indicator/X_indicator", + "name": "X_indicator", + "qname": "sklearn.impute._base._BaseImputer._concatenate_indicator.X_indicator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Concatenate indicator mask with the imputed data.", + "docstring": "Concatenate indicator mask with the imputed data.", + "code": " def _concatenate_indicator(self, X_imputed, X_indicator):\n \"\"\"Concatenate indicator mask with the imputed data.\"\"\"\n if not self.add_indicator:\n return X_imputed\n\n hstack = sp.hstack if sp.issparse(X_imputed) else np.hstack\n if X_indicator is None:\n raise ValueError(\n \"Data from the missing indicator are not provided. Call \"\n \"_fit_indicator and _transform_indicator in the imputer \"\n \"implementation.\"\n )\n\n return hstack((X_imputed, X_indicator))" + }, + { + "id": "scikit-learn/sklearn.impute._base/_BaseImputer/_fit_indicator", + "name": "_fit_indicator", + "qname": "sklearn.impute._base._BaseImputer._fit_indicator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._base/_BaseImputer/_fit_indicator/self", + "name": "self", + "qname": "sklearn.impute._base._BaseImputer._fit_indicator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/_BaseImputer/_fit_indicator/X", + "name": "X", + "qname": "sklearn.impute._base._BaseImputer._fit_indicator.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit a MissingIndicator.", + "docstring": "Fit a MissingIndicator.", + "code": " def _fit_indicator(self, X):\n \"\"\"Fit a MissingIndicator.\"\"\"\n if self.add_indicator:\n self.indicator_ = MissingIndicator(\n missing_values=self.missing_values, error_on_new=False)\n self.indicator_._fit(X, precomputed=True)\n else:\n self.indicator_ = None" + }, + { + "id": "scikit-learn/sklearn.impute._base/_BaseImputer/_more_tags", + "name": "_more_tags", + "qname": "sklearn.impute._base._BaseImputer._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._base/_BaseImputer/_more_tags/self", + "name": "self", + "qname": "sklearn.impute._base._BaseImputer._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'allow_nan': is_scalar_nan(self.missing_values)}" + }, + { + "id": "scikit-learn/sklearn.impute._base/_BaseImputer/_transform_indicator", + "name": "_transform_indicator", + "qname": "sklearn.impute._base._BaseImputer._transform_indicator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._base/_BaseImputer/_transform_indicator/self", + "name": "self", + "qname": "sklearn.impute._base._BaseImputer._transform_indicator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/_BaseImputer/_transform_indicator/X", + "name": "X", + "qname": "sklearn.impute._base._BaseImputer._transform_indicator.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the indicator mask.'\n\nNote that X must be the original data as passed to the imputer before\nany imputation, since imputation may be done inplace in some cases.", + "docstring": "Compute the indicator mask.'\n\nNote that X must be the original data as passed to the imputer before\nany imputation, since imputation may be done inplace in some cases.", + "code": " def _transform_indicator(self, X):\n \"\"\"Compute the indicator mask.'\n\n Note that X must be the original data as passed to the imputer before\n any imputation, since imputation may be done inplace in some cases.\n \"\"\"\n if self.add_indicator:\n if not hasattr(self, 'indicator_'):\n raise ValueError(\n \"Make sure to call _fit_indicator before \"\n \"_transform_indicator\"\n )\n return self.indicator_.transform(X)" + }, + { + "id": "scikit-learn/sklearn.impute._base/_check_inputs_dtype", + "name": "_check_inputs_dtype", + "qname": "sklearn.impute._base._check_inputs_dtype", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._base/_check_inputs_dtype/X", + "name": "X", + "qname": "sklearn.impute._base._check_inputs_dtype.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/_check_inputs_dtype/missing_values", + "name": "missing_values", + "qname": "sklearn.impute._base._check_inputs_dtype.missing_values", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _check_inputs_dtype(X, missing_values):\n if (X.dtype.kind in (\"f\", \"i\", \"u\") and\n not isinstance(missing_values, numbers.Real)):\n raise ValueError(\"'X' and 'missing_values' types are expected to be\"\n \" both numerical. Got X.dtype={} and \"\n \" type(missing_values)={}.\"\n .format(X.dtype, type(missing_values)))" + }, + { + "id": "scikit-learn/sklearn.impute._base/_most_frequent", + "name": "_most_frequent", + "qname": "sklearn.impute._base._most_frequent", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._base/_most_frequent/array", + "name": "array", + "qname": "sklearn.impute._base._most_frequent.array", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/_most_frequent/extra_value", + "name": "extra_value", + "qname": "sklearn.impute._base._most_frequent.extra_value", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._base/_most_frequent/n_repeat", + "name": "n_repeat", + "qname": "sklearn.impute._base._most_frequent.n_repeat", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the most frequent value in a 1d array extended with\n[extra_value] * n_repeat, where extra_value is assumed to be not part\nof the array.", + "docstring": "Compute the most frequent value in a 1d array extended with\n[extra_value] * n_repeat, where extra_value is assumed to be not part\nof the array.", + "code": "def _most_frequent(array, extra_value, n_repeat):\n \"\"\"Compute the most frequent value in a 1d array extended with\n [extra_value] * n_repeat, where extra_value is assumed to be not part\n of the array.\"\"\"\n # Compute the most frequent value in array only\n if array.size > 0:\n if array.dtype == object:\n # scipy.stats.mode is slow with object dtype array.\n # Python Counter is more efficient\n counter = Counter(array)\n most_frequent_count = counter.most_common(1)[0][1]\n # tie breaking similarly to scipy.stats.mode\n most_frequent_value = min(\n value for value, count in counter.items()\n if count == most_frequent_count\n )\n else:\n mode = stats.mode(array)\n most_frequent_value = mode[0][0]\n most_frequent_count = mode[1][0]\n else:\n most_frequent_value = 0\n most_frequent_count = 0\n\n # Compare to array + [extra_value] * n_repeat\n if most_frequent_count == 0 and n_repeat == 0:\n return np.nan\n elif most_frequent_count < n_repeat:\n return extra_value\n elif most_frequent_count > n_repeat:\n return most_frequent_value\n elif most_frequent_count == n_repeat:\n # tie breaking similarly to scipy.stats.mode\n return min(most_frequent_value, extra_value)" + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/__init__", + "name": "__init__", + "qname": "sklearn.impute._iterative.IterativeImputer.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/__init__/self", + "name": "self", + "qname": "sklearn.impute._iterative.IterativeImputer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/__init__/estimator", + "name": "estimator", + "qname": "sklearn.impute._iterative.IterativeImputer.__init__.estimator", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator object", + "default_value": "BayesianRidge()", + "description": "The estimator to use at each step of the round-robin imputation.\nIf ``sample_posterior`` is True, the estimator must support\n``return_std`` in its ``predict`` method." + }, + "type": { + "kind": "NamedType", + "name": "estimator object" + } + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/__init__/missing_values", + "name": "missing_values", + "qname": "sklearn.impute._iterative.IterativeImputer.__init__.missing_values", + "default_value": "np.nan", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, np.nan", + "default_value": "np.nan", + "description": "The placeholder for the missing values. All occurrences of\n`missing_values` will be imputed. For pandas' dataframes with\nnullable integer dtypes with missing values, `missing_values`\nshould be set to `np.nan`, since `pd.NA` will be converted to `np.nan`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "np.nan" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/__init__/sample_posterior", + "name": "sample_posterior", + "qname": "sklearn.impute._iterative.IterativeImputer.__init__.sample_posterior", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "boolean", + "default_value": "False", + "description": "Whether to sample from the (Gaussian) predictive posterior of the\nfitted estimator for each imputation. Estimator must support\n``return_std`` in its ``predict`` method if set to ``True``. Set to\n``True`` if using ``IterativeImputer`` for multiple imputations." + }, + "type": { + "kind": "NamedType", + "name": "boolean" + } + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.impute._iterative.IterativeImputer.__init__.max_iter", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Maximum number of imputation rounds to perform before returning the\nimputations computed during the final round. A round is a single\nimputation of each feature with missing values. The stopping criterion\nis met once `max(abs(X_t - X_{t-1}))/max(abs(X[known_vals])) < tol`,\nwhere `X_t` is `X` at iteration `t`. Note that early stopping is only\napplied if ``sample_posterior=False``." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/__init__/tol", + "name": "tol", + "qname": "sklearn.impute._iterative.IterativeImputer.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "Tolerance of the stopping condition." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/__init__/n_nearest_features", + "name": "n_nearest_features", + "qname": "sklearn.impute._iterative.IterativeImputer.__init__.n_nearest_features", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of other features to use to estimate the missing values of\neach feature column. Nearness between features is measured using\nthe absolute correlation coefficient between each feature pair (after\ninitial imputation). To ensure coverage of features throughout the\nimputation process, the neighbor features are not necessarily nearest,\nbut are drawn with probability proportional to correlation for each\nimputed target feature. Can provide significant speed-up when the\nnumber of features is huge. If ``None``, all features will be used." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/__init__/initial_strategy", + "name": "initial_strategy", + "qname": "sklearn.impute._iterative.IterativeImputer.__init__.initial_strategy", + "default_value": "'mean'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "'mean'", + "description": "Which strategy to use to initialize the missing values. Same as the\n``strategy`` parameter in :class:`~sklearn.impute.SimpleImputer`\nValid values: {\"mean\", \"median\", \"most_frequent\", or \"constant\"}." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/__init__/imputation_order", + "name": "imputation_order", + "qname": "sklearn.impute._iterative.IterativeImputer.__init__.imputation_order", + "default_value": "'ascending'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "'ascending'", + "description": "The order in which the features will be imputed. Possible values:\n\n\"ascending\"\n From features with fewest missing values to most.\n\"descending\"\n From features with most missing values to fewest.\n\"roman\"\n Left to right.\n\"arabic\"\n Right to left.\n\"random\"\n A random order for each round." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/__init__/skip_complete", + "name": "skip_complete", + "qname": "sklearn.impute._iterative.IterativeImputer.__init__.skip_complete", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "boolean", + "default_value": "False", + "description": "If ``True`` then features with missing values during ``transform``\nwhich did not have any missing values during ``fit`` will be imputed\nwith the initial imputation method only. Set to ``True`` if you have\nmany features with no missing values at both ``fit`` and ``transform``\ntime to save compute." + }, + "type": { + "kind": "NamedType", + "name": "boolean" + } + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/__init__/min_value", + "name": "min_value", + "qname": "sklearn.impute._iterative.IterativeImputer.__init__.min_value", + "default_value": "-np.inf", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float or array-like of shape (n_features,)", + "default_value": "-np.inf", + "description": "Minimum possible imputed value. Broadcast to shape (n_features,) if\nscalar. If array-like, expects shape (n_features,), one min value for\neach feature. The default is `-np.inf`.\n\n.. versionchanged:: 0.23\n Added support for array-like." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_features,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/__init__/max_value", + "name": "max_value", + "qname": "sklearn.impute._iterative.IterativeImputer.__init__.max_value", + "default_value": "np.inf", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float or array-like of shape (n_features,)", + "default_value": "np.inf", + "description": "Maximum possible imputed value. Broadcast to shape (n_features,) if\nscalar. If array-like, expects shape (n_features,), one max value for\neach feature. The default is `np.inf`.\n\n.. versionchanged:: 0.23\n Added support for array-like." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_features,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/__init__/verbose", + "name": "verbose", + "qname": "sklearn.impute._iterative.IterativeImputer.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Verbosity flag, controls the debug messages that are issued\nas functions are evaluated. The higher, the more verbose. Can be 0, 1,\nor 2." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/__init__/random_state", + "name": "random_state", + "qname": "sklearn.impute._iterative.IterativeImputer.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "The seed of the pseudo random number generator to use. Randomizes\nselection of estimator features if n_nearest_features is not None, the\n``imputation_order`` if ``random``, and the sampling from posterior if\n``sample_posterior`` is True. Use an integer for determinism.\nSee :term:`the Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/__init__/add_indicator", + "name": "add_indicator", + "qname": "sklearn.impute._iterative.IterativeImputer.__init__.add_indicator", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "boolean", + "default_value": "False", + "description": "If True, a :class:`MissingIndicator` transform will stack onto output\nof the imputer's transform. This allows a predictive estimator\nto account for missingness despite imputation. If a feature has no\nmissing values at fit/train time, the feature won't appear on\nthe missing indicator even if there are missing values at\ntransform/test time." + }, + "type": { + "kind": "NamedType", + "name": "boolean" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Multivariate imputer that estimates each feature from all the others.\n\nA strategy for imputing missing values by modeling each feature with\nmissing values as a function of other features in a round-robin fashion.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.21\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_iterative_imputer``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_iterative_imputer # noqa\n >>> # now you can import normally from sklearn.impute\n >>> from sklearn.impute import IterativeImputer", + "docstring": "", + "code": " def __init__(self,\n estimator=None, *,\n missing_values=np.nan,\n sample_posterior=False,\n max_iter=10,\n tol=1e-3,\n n_nearest_features=None,\n initial_strategy=\"mean\",\n imputation_order='ascending',\n skip_complete=False,\n min_value=-np.inf,\n max_value=np.inf,\n verbose=0,\n random_state=None,\n add_indicator=False):\n super().__init__(\n missing_values=missing_values,\n add_indicator=add_indicator\n )\n\n self.estimator = estimator\n self.sample_posterior = sample_posterior\n self.max_iter = max_iter\n self.tol = tol\n self.n_nearest_features = n_nearest_features\n self.initial_strategy = initial_strategy\n self.imputation_order = imputation_order\n self.skip_complete = skip_complete\n self.min_value = min_value\n self.max_value = max_value\n self.verbose = verbose\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_get_abs_corr_mat", + "name": "_get_abs_corr_mat", + "qname": "sklearn.impute._iterative.IterativeImputer._get_abs_corr_mat", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_get_abs_corr_mat/self", + "name": "self", + "qname": "sklearn.impute._iterative.IterativeImputer._get_abs_corr_mat.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_get_abs_corr_mat/X_filled", + "name": "X_filled", + "qname": "sklearn.impute._iterative.IterativeImputer._get_abs_corr_mat.X_filled", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (n_samples, n_features)", + "default_value": "", + "description": "Input data with the most recent imputations." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_get_abs_corr_mat/tolerance", + "name": "tolerance", + "qname": "sklearn.impute._iterative.IterativeImputer._get_abs_corr_mat.tolerance", + "default_value": "1e-06", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-6", + "description": "``abs_corr_mat`` can have nans, which will be replaced\nwith ``tolerance``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Get absolute correlation matrix between features.", + "docstring": "Get absolute correlation matrix between features.\n\nParameters\n----------\nX_filled : ndarray, shape (n_samples, n_features)\n Input data with the most recent imputations.\n\ntolerance : float, default=1e-6\n ``abs_corr_mat`` can have nans, which will be replaced\n with ``tolerance``.\n\nReturns\n-------\nabs_corr_mat : ndarray, shape (n_features, n_features)\n Absolute correlation matrix of ``X`` at the beginning of the\n current round. The diagonal has been zeroed out and each feature's\n absolute correlations with all others have been normalized to sum\n to 1.", + "code": " def _get_abs_corr_mat(self, X_filled, tolerance=1e-6):\n \"\"\"Get absolute correlation matrix between features.\n\n Parameters\n ----------\n X_filled : ndarray, shape (n_samples, n_features)\n Input data with the most recent imputations.\n\n tolerance : float, default=1e-6\n ``abs_corr_mat`` can have nans, which will be replaced\n with ``tolerance``.\n\n Returns\n -------\n abs_corr_mat : ndarray, shape (n_features, n_features)\n Absolute correlation matrix of ``X`` at the beginning of the\n current round. The diagonal has been zeroed out and each feature's\n absolute correlations with all others have been normalized to sum\n to 1.\n \"\"\"\n n_features = X_filled.shape[1]\n if (self.n_nearest_features is None or\n self.n_nearest_features >= n_features):\n return None\n with np.errstate(invalid='ignore'):\n # if a feature in the neighboorhood has only a single value\n # (e.g., categorical feature), the std. dev. will be null and\n # np.corrcoef will raise a warning due to a division by zero\n abs_corr_mat = np.abs(np.corrcoef(X_filled.T))\n # np.corrcoef is not defined for features with zero std\n abs_corr_mat[np.isnan(abs_corr_mat)] = tolerance\n # ensures exploration, i.e. at least some probability of sampling\n np.clip(abs_corr_mat, tolerance, None, out=abs_corr_mat)\n # features are not their own neighbors\n np.fill_diagonal(abs_corr_mat, 0)\n # needs to sum to 1 for np.random.choice sampling\n abs_corr_mat = normalize(abs_corr_mat, norm='l1', axis=0, copy=False)\n return abs_corr_mat" + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_get_neighbor_feat_idx", + "name": "_get_neighbor_feat_idx", + "qname": "sklearn.impute._iterative.IterativeImputer._get_neighbor_feat_idx", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_get_neighbor_feat_idx/self", + "name": "self", + "qname": "sklearn.impute._iterative.IterativeImputer._get_neighbor_feat_idx.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_get_neighbor_feat_idx/n_features", + "name": "n_features", + "qname": "sklearn.impute._iterative.IterativeImputer._get_neighbor_feat_idx.n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of features in ``X``." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_get_neighbor_feat_idx/feat_idx", + "name": "feat_idx", + "qname": "sklearn.impute._iterative.IterativeImputer._get_neighbor_feat_idx.feat_idx", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Index of the feature currently being imputed." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_get_neighbor_feat_idx/abs_corr_mat", + "name": "abs_corr_mat", + "qname": "sklearn.impute._iterative.IterativeImputer._get_neighbor_feat_idx.abs_corr_mat", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (n_features, n_features)", + "default_value": "", + "description": "Absolute correlation matrix of ``X``. The diagonal has been zeroed\nout and each feature has been normalized to sum to 1. Can be None." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (n_features, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Get a list of other features to predict ``feat_idx``.\n\nIf self.n_nearest_features is less than or equal to the total\nnumber of features, then use a probability proportional to the absolute\ncorrelation between ``feat_idx`` and each other feature to randomly\nchoose a subsample of the other features (without replacement).", + "docstring": "Get a list of other features to predict ``feat_idx``.\n\nIf self.n_nearest_features is less than or equal to the total\nnumber of features, then use a probability proportional to the absolute\ncorrelation between ``feat_idx`` and each other feature to randomly\nchoose a subsample of the other features (without replacement).\n\nParameters\n----------\nn_features : int\n Number of features in ``X``.\n\nfeat_idx : int\n Index of the feature currently being imputed.\n\nabs_corr_mat : ndarray, shape (n_features, n_features)\n Absolute correlation matrix of ``X``. The diagonal has been zeroed\n out and each feature has been normalized to sum to 1. Can be None.\n\nReturns\n-------\nneighbor_feat_idx : array-like\n The features to use to impute ``feat_idx``.", + "code": " def _get_neighbor_feat_idx(self,\n n_features,\n feat_idx,\n abs_corr_mat):\n \"\"\"Get a list of other features to predict ``feat_idx``.\n\n If self.n_nearest_features is less than or equal to the total\n number of features, then use a probability proportional to the absolute\n correlation between ``feat_idx`` and each other feature to randomly\n choose a subsample of the other features (without replacement).\n\n Parameters\n ----------\n n_features : int\n Number of features in ``X``.\n\n feat_idx : int\n Index of the feature currently being imputed.\n\n abs_corr_mat : ndarray, shape (n_features, n_features)\n Absolute correlation matrix of ``X``. The diagonal has been zeroed\n out and each feature has been normalized to sum to 1. Can be None.\n\n Returns\n -------\n neighbor_feat_idx : array-like\n The features to use to impute ``feat_idx``.\n \"\"\"\n if (self.n_nearest_features is not None and\n self.n_nearest_features < n_features):\n p = abs_corr_mat[:, feat_idx]\n neighbor_feat_idx = self.random_state_.choice(\n np.arange(n_features), self.n_nearest_features, replace=False,\n p=p)\n else:\n inds_left = np.arange(feat_idx)\n inds_right = np.arange(feat_idx + 1, n_features)\n neighbor_feat_idx = np.concatenate((inds_left, inds_right))\n return neighbor_feat_idx" + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_get_ordered_idx", + "name": "_get_ordered_idx", + "qname": "sklearn.impute._iterative.IterativeImputer._get_ordered_idx", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_get_ordered_idx/self", + "name": "self", + "qname": "sklearn.impute._iterative.IterativeImputer._get_ordered_idx.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_get_ordered_idx/mask_missing_values", + "name": "mask_missing_values", + "qname": "sklearn.impute._iterative.IterativeImputer._get_ordered_idx.mask_missing_values", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples, n_features)", + "default_value": "", + "description": "Input data's missing indicator matrix, where \"n_samples\" is the\nnumber of samples and \"n_features\" is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Decide in what order we will update the features.\n\nAs a homage to the MICE R package, we will have 4 main options of\nhow to order the updates, and use a random order if anything else\nis specified.\n\nAlso, this function skips features which have no missing values.", + "docstring": "Decide in what order we will update the features.\n\nAs a homage to the MICE R package, we will have 4 main options of\nhow to order the updates, and use a random order if anything else\nis specified.\n\nAlso, this function skips features which have no missing values.\n\nParameters\n----------\nmask_missing_values : array-like, shape (n_samples, n_features)\n Input data's missing indicator matrix, where \"n_samples\" is the\n number of samples and \"n_features\" is the number of features.\n\nReturns\n-------\nordered_idx : ndarray, shape (n_features,)\n The order in which to impute the features.", + "code": " def _get_ordered_idx(self, mask_missing_values):\n \"\"\"Decide in what order we will update the features.\n\n As a homage to the MICE R package, we will have 4 main options of\n how to order the updates, and use a random order if anything else\n is specified.\n\n Also, this function skips features which have no missing values.\n\n Parameters\n ----------\n mask_missing_values : array-like, shape (n_samples, n_features)\n Input data's missing indicator matrix, where \"n_samples\" is the\n number of samples and \"n_features\" is the number of features.\n\n Returns\n -------\n ordered_idx : ndarray, shape (n_features,)\n The order in which to impute the features.\n \"\"\"\n frac_of_missing_values = mask_missing_values.mean(axis=0)\n if self.skip_complete:\n missing_values_idx = np.flatnonzero(frac_of_missing_values)\n else:\n missing_values_idx = np.arange(np.shape(frac_of_missing_values)[0])\n if self.imputation_order == 'roman':\n ordered_idx = missing_values_idx\n elif self.imputation_order == 'arabic':\n ordered_idx = missing_values_idx[::-1]\n elif self.imputation_order == 'ascending':\n n = len(frac_of_missing_values) - len(missing_values_idx)\n ordered_idx = np.argsort(frac_of_missing_values,\n kind='mergesort')[n:]\n elif self.imputation_order == 'descending':\n n = len(frac_of_missing_values) - len(missing_values_idx)\n ordered_idx = np.argsort(frac_of_missing_values,\n kind='mergesort')[n:][::-1]\n elif self.imputation_order == 'random':\n ordered_idx = missing_values_idx\n self.random_state_.shuffle(ordered_idx)\n else:\n raise ValueError(\"Got an invalid imputation order: '{0}'. It must \"\n \"be one of the following: 'roman', 'arabic', \"\n \"'ascending', 'descending', or \"\n \"'random'.\".format(self.imputation_order))\n return ordered_idx" + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_impute_one_feature", + "name": "_impute_one_feature", + "qname": "sklearn.impute._iterative.IterativeImputer._impute_one_feature", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_impute_one_feature/self", + "name": "self", + "qname": "sklearn.impute._iterative.IterativeImputer._impute_one_feature.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_impute_one_feature/X_filled", + "name": "X_filled", + "qname": "sklearn.impute._iterative.IterativeImputer._impute_one_feature.X_filled", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "Input data with the most recent imputations." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_impute_one_feature/mask_missing_values", + "name": "mask_missing_values", + "qname": "sklearn.impute._iterative.IterativeImputer._impute_one_feature.mask_missing_values", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "Input data's missing indicator matrix." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_impute_one_feature/feat_idx", + "name": "feat_idx", + "qname": "sklearn.impute._iterative.IterativeImputer._impute_one_feature.feat_idx", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Index of the feature currently being imputed." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_impute_one_feature/neighbor_feat_idx", + "name": "neighbor_feat_idx", + "qname": "sklearn.impute._iterative.IterativeImputer._impute_one_feature.neighbor_feat_idx", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "Indices of the features to be used in imputing ``feat_idx``." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_impute_one_feature/estimator", + "name": "estimator", + "qname": "sklearn.impute._iterative.IterativeImputer._impute_one_feature.estimator", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "The estimator to use at this step of the round-robin imputation.\nIf ``sample_posterior`` is True, the estimator must support\n``return_std`` in its ``predict`` method.\nIf None, it will be cloned from self._estimator." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_impute_one_feature/fit_mode", + "name": "fit_mode", + "qname": "sklearn.impute._iterative.IterativeImputer._impute_one_feature.fit_mode", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "boolean", + "default_value": "True", + "description": "Whether to fit and predict with the estimator or just predict." + }, + "type": { + "kind": "NamedType", + "name": "boolean" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Impute a single feature from the others provided.\n\nThis function predicts the missing values of one of the features using\nthe current estimates of all the other features. The ``estimator`` must\nsupport ``return_std=True`` in its ``predict`` method for this function\nto work.", + "docstring": "Impute a single feature from the others provided.\n\nThis function predicts the missing values of one of the features using\nthe current estimates of all the other features. The ``estimator`` must\nsupport ``return_std=True`` in its ``predict`` method for this function\nto work.\n\nParameters\n----------\nX_filled : ndarray\n Input data with the most recent imputations.\n\nmask_missing_values : ndarray\n Input data's missing indicator matrix.\n\nfeat_idx : int\n Index of the feature currently being imputed.\n\nneighbor_feat_idx : ndarray\n Indices of the features to be used in imputing ``feat_idx``.\n\nestimator : object\n The estimator to use at this step of the round-robin imputation.\n If ``sample_posterior`` is True, the estimator must support\n ``return_std`` in its ``predict`` method.\n If None, it will be cloned from self._estimator.\n\nfit_mode : boolean, default=True\n Whether to fit and predict with the estimator or just predict.\n\nReturns\n-------\nX_filled : ndarray\n Input data with ``X_filled[missing_row_mask, feat_idx]`` updated.\n\nestimator : estimator with sklearn API\n The fitted estimator used to impute\n ``X_filled[missing_row_mask, feat_idx]``.", + "code": " def _impute_one_feature(self,\n X_filled,\n mask_missing_values,\n feat_idx,\n neighbor_feat_idx,\n estimator=None,\n fit_mode=True):\n \"\"\"Impute a single feature from the others provided.\n\n This function predicts the missing values of one of the features using\n the current estimates of all the other features. The ``estimator`` must\n support ``return_std=True`` in its ``predict`` method for this function\n to work.\n\n Parameters\n ----------\n X_filled : ndarray\n Input data with the most recent imputations.\n\n mask_missing_values : ndarray\n Input data's missing indicator matrix.\n\n feat_idx : int\n Index of the feature currently being imputed.\n\n neighbor_feat_idx : ndarray\n Indices of the features to be used in imputing ``feat_idx``.\n\n estimator : object\n The estimator to use at this step of the round-robin imputation.\n If ``sample_posterior`` is True, the estimator must support\n ``return_std`` in its ``predict`` method.\n If None, it will be cloned from self._estimator.\n\n fit_mode : boolean, default=True\n Whether to fit and predict with the estimator or just predict.\n\n Returns\n -------\n X_filled : ndarray\n Input data with ``X_filled[missing_row_mask, feat_idx]`` updated.\n\n estimator : estimator with sklearn API\n The fitted estimator used to impute\n ``X_filled[missing_row_mask, feat_idx]``.\n \"\"\"\n if estimator is None and fit_mode is False:\n raise ValueError(\"If fit_mode is False, then an already-fitted \"\n \"estimator should be passed in.\")\n\n if estimator is None:\n estimator = clone(self._estimator)\n\n missing_row_mask = mask_missing_values[:, feat_idx]\n if fit_mode:\n X_train = _safe_indexing(X_filled[:, neighbor_feat_idx],\n ~missing_row_mask)\n y_train = _safe_indexing(X_filled[:, feat_idx],\n ~missing_row_mask)\n estimator.fit(X_train, y_train)\n\n # if no missing values, don't predict\n if np.sum(missing_row_mask) == 0:\n return X_filled, estimator\n\n # get posterior samples if there is at least one missing value\n X_test = _safe_indexing(X_filled[:, neighbor_feat_idx],\n missing_row_mask)\n if self.sample_posterior:\n mus, sigmas = estimator.predict(X_test, return_std=True)\n imputed_values = np.zeros(mus.shape, dtype=X_filled.dtype)\n # two types of problems: (1) non-positive sigmas\n # (2) mus outside legal range of min_value and max_value\n # (results in inf sample)\n positive_sigmas = sigmas > 0\n imputed_values[~positive_sigmas] = mus[~positive_sigmas]\n mus_too_low = mus < self._min_value[feat_idx]\n imputed_values[mus_too_low] = self._min_value[feat_idx]\n mus_too_high = mus > self._max_value[feat_idx]\n imputed_values[mus_too_high] = self._max_value[feat_idx]\n # the rest can be sampled without statistical issues\n inrange_mask = positive_sigmas & ~mus_too_low & ~mus_too_high\n mus = mus[inrange_mask]\n sigmas = sigmas[inrange_mask]\n a = (self._min_value[feat_idx] - mus) / sigmas\n b = (self._max_value[feat_idx] - mus) / sigmas\n\n truncated_normal = stats.truncnorm(a=a, b=b,\n loc=mus, scale=sigmas)\n imputed_values[inrange_mask] = truncated_normal.rvs(\n random_state=self.random_state_)\n else:\n imputed_values = estimator.predict(X_test)\n imputed_values = np.clip(imputed_values,\n self._min_value[feat_idx],\n self._max_value[feat_idx])\n\n # update the feature\n X_filled[missing_row_mask, feat_idx] = imputed_values\n return X_filled, estimator" + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_initial_imputation", + "name": "_initial_imputation", + "qname": "sklearn.impute._iterative.IterativeImputer._initial_imputation", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_initial_imputation/self", + "name": "self", + "qname": "sklearn.impute._iterative.IterativeImputer._initial_imputation.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_initial_imputation/X", + "name": "X", + "qname": "sklearn.impute._iterative.IterativeImputer._initial_imputation.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (n_samples, n_features)", + "default_value": "", + "description": "Input data, where \"n_samples\" is the number of samples and\n\"n_features\" is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_initial_imputation/in_fit", + "name": "in_fit", + "qname": "sklearn.impute._iterative.IterativeImputer._initial_imputation.in_fit", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether function is called in fit." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform initial imputation for input X.", + "docstring": "Perform initial imputation for input X.\n\nParameters\n----------\nX : ndarray, shape (n_samples, n_features)\n Input data, where \"n_samples\" is the number of samples and\n \"n_features\" is the number of features.\n\nin_fit : bool, default=False\n Whether function is called in fit.\n\nReturns\n-------\nXt : ndarray, shape (n_samples, n_features)\n Input data, where \"n_samples\" is the number of samples and\n \"n_features\" is the number of features.\n\nX_filled : ndarray, shape (n_samples, n_features)\n Input data with the most recent imputations.\n\nmask_missing_values : ndarray, shape (n_samples, n_features)\n Input data's missing indicator matrix, where \"n_samples\" is the\n number of samples and \"n_features\" is the number of features.\n\nX_missing_mask : ndarray, shape (n_samples, n_features)\n Input data's mask matrix indicating missing datapoints, where\n \"n_samples\" is the number of samples and \"n_features\" is the\n number of features.", + "code": " def _initial_imputation(self, X, in_fit=False):\n \"\"\"Perform initial imputation for input X.\n\n Parameters\n ----------\n X : ndarray, shape (n_samples, n_features)\n Input data, where \"n_samples\" is the number of samples and\n \"n_features\" is the number of features.\n\n in_fit : bool, default=False\n Whether function is called in fit.\n\n Returns\n -------\n Xt : ndarray, shape (n_samples, n_features)\n Input data, where \"n_samples\" is the number of samples and\n \"n_features\" is the number of features.\n\n X_filled : ndarray, shape (n_samples, n_features)\n Input data with the most recent imputations.\n\n mask_missing_values : ndarray, shape (n_samples, n_features)\n Input data's missing indicator matrix, where \"n_samples\" is the\n number of samples and \"n_features\" is the number of features.\n\n X_missing_mask : ndarray, shape (n_samples, n_features)\n Input data's mask matrix indicating missing datapoints, where\n \"n_samples\" is the number of samples and \"n_features\" is the\n number of features.\n \"\"\"\n if is_scalar_nan(self.missing_values):\n force_all_finite = \"allow-nan\"\n else:\n force_all_finite = True\n\n X = self._validate_data(X, dtype=FLOAT_DTYPES, order=\"F\", reset=in_fit,\n force_all_finite=force_all_finite)\n _check_inputs_dtype(X, self.missing_values)\n\n X_missing_mask = _get_mask(X, self.missing_values)\n mask_missing_values = X_missing_mask.copy()\n if self.initial_imputer_ is None:\n self.initial_imputer_ = SimpleImputer(\n missing_values=self.missing_values,\n strategy=self.initial_strategy\n )\n X_filled = self.initial_imputer_.fit_transform(X)\n else:\n X_filled = self.initial_imputer_.transform(X)\n\n valid_mask = np.flatnonzero(np.logical_not(\n np.isnan(self.initial_imputer_.statistics_)))\n Xt = X[:, valid_mask]\n mask_missing_values = mask_missing_values[:, valid_mask]\n\n return Xt, X_filled, mask_missing_values, X_missing_mask" + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_validate_limit", + "name": "_validate_limit", + "qname": "sklearn.impute._iterative.IterativeImputer._validate_limit", + "decorators": ["staticmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_validate_limit/limit", + "name": "limit", + "qname": "sklearn.impute._iterative.IterativeImputer._validate_limit.limit", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_validate_limit/limit_type", + "name": "limit_type", + "qname": "sklearn.impute._iterative.IterativeImputer._validate_limit.limit_type", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/_validate_limit/n_features", + "name": "n_features", + "qname": "sklearn.impute._iterative.IterativeImputer._validate_limit.n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Validate the limits (min/max) of the feature values\nConverts scalar min/max limits to vectors of shape (n_features,)", + "docstring": "Validate the limits (min/max) of the feature values\nConverts scalar min/max limits to vectors of shape (n_features,)\n\nParameters\n----------\nlimit: scalar or array-like\n The user-specified limit (i.e, min_value or max_value)\nlimit_type: string, \"max\" or \"min\"\n n_features: Number of features in the dataset\n\nReturns\n-------\nlimit: ndarray, shape(n_features,)\n Array of limits, one for each feature", + "code": " @staticmethod\n def _validate_limit(limit, limit_type, n_features):\n \"\"\"Validate the limits (min/max) of the feature values\n Converts scalar min/max limits to vectors of shape (n_features,)\n\n Parameters\n ----------\n limit: scalar or array-like\n The user-specified limit (i.e, min_value or max_value)\n limit_type: string, \"max\" or \"min\"\n n_features: Number of features in the dataset\n\n Returns\n -------\n limit: ndarray, shape(n_features,)\n Array of limits, one for each feature\n \"\"\"\n limit_bound = np.inf if limit_type == \"max\" else -np.inf\n limit = limit_bound if limit is None else limit\n if np.isscalar(limit):\n limit = np.full(n_features, limit)\n limit = check_array(\n limit, force_all_finite=False, copy=False, ensure_2d=False\n )\n if not limit.shape[0] == n_features:\n raise ValueError(\n f\"'{limit_type}_value' should be of \"\n f\"shape ({n_features},) when an array-like \"\n f\"is provided. Got {limit.shape}, instead.\"\n )\n return limit" + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/fit", + "name": "fit", + "qname": "sklearn.impute._iterative.IterativeImputer.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/fit/self", + "name": "self", + "qname": "sklearn.impute._iterative.IterativeImputer.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/fit/X", + "name": "X", + "qname": "sklearn.impute._iterative.IterativeImputer.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples, n_features)", + "default_value": "", + "description": "Input data, where \"n_samples\" is the number of samples and\n\"n_features\" is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/fit/y", + "name": "y", + "qname": "sklearn.impute._iterative.IterativeImputer.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fits the imputer on X and return self.", + "docstring": "Fits the imputer on X and return self.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Input data, where \"n_samples\" is the number of samples and\n \"n_features\" is the number of features.\n\ny : ignored\n\nReturns\n-------\nself : object\n Returns self.", + "code": " def fit(self, X, y=None):\n \"\"\"Fits the imputer on X and return self.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n Input data, where \"n_samples\" is the number of samples and\n \"n_features\" is the number of features.\n\n y : ignored\n\n Returns\n -------\n self : object\n Returns self.\n \"\"\"\n self.fit_transform(X)\n return self" + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/fit_transform", + "name": "fit_transform", + "qname": "sklearn.impute._iterative.IterativeImputer.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/fit_transform/self", + "name": "self", + "qname": "sklearn.impute._iterative.IterativeImputer.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/fit_transform/X", + "name": "X", + "qname": "sklearn.impute._iterative.IterativeImputer.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples, n_features)", + "default_value": "", + "description": "Input data, where \"n_samples\" is the number of samples and\n\"n_features\" is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/fit_transform/y", + "name": "y", + "qname": "sklearn.impute._iterative.IterativeImputer.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ignored.", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "ignored." + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fits the imputer on X and return the transformed X.", + "docstring": "Fits the imputer on X and return the transformed X.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Input data, where \"n_samples\" is the number of samples and\n \"n_features\" is the number of features.\n\ny : ignored.\n\nReturns\n-------\nXt : array-like, shape (n_samples, n_features)\n The imputed input data.", + "code": " def fit_transform(self, X, y=None):\n \"\"\"Fits the imputer on X and return the transformed X.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n Input data, where \"n_samples\" is the number of samples and\n \"n_features\" is the number of features.\n\n y : ignored.\n\n Returns\n -------\n Xt : array-like, shape (n_samples, n_features)\n The imputed input data.\n \"\"\"\n self.random_state_ = getattr(self, \"random_state_\",\n check_random_state(self.random_state))\n\n if self.max_iter < 0:\n raise ValueError(\n \"'max_iter' should be a positive integer. Got {} instead.\"\n .format(self.max_iter))\n\n if self.tol < 0:\n raise ValueError(\n \"'tol' should be a non-negative float. Got {} instead.\"\n .format(self.tol)\n )\n\n if self.estimator is None:\n from ..linear_model import BayesianRidge\n self._estimator = BayesianRidge()\n else:\n self._estimator = clone(self.estimator)\n\n self.imputation_sequence_ = []\n\n self.initial_imputer_ = None\n\n X, Xt, mask_missing_values, complete_mask = (\n self._initial_imputation(X, in_fit=True))\n\n super()._fit_indicator(complete_mask)\n X_indicator = super()._transform_indicator(complete_mask)\n\n if self.max_iter == 0 or np.all(mask_missing_values):\n self.n_iter_ = 0\n return super()._concatenate_indicator(Xt, X_indicator)\n\n # Edge case: a single feature. We return the initial ...\n if Xt.shape[1] == 1:\n self.n_iter_ = 0\n return super()._concatenate_indicator(Xt, X_indicator)\n\n self._min_value = self._validate_limit(\n self.min_value, \"min\", X.shape[1])\n self._max_value = self._validate_limit(\n self.max_value, \"max\", X.shape[1])\n\n if not np.all(np.greater(self._max_value, self._min_value)):\n raise ValueError(\n \"One (or more) features have min_value >= max_value.\")\n\n # order in which to impute\n # note this is probably too slow for large feature data (d > 100000)\n # and a better way would be good.\n # see: https://goo.gl/KyCNwj and subsequent comments\n ordered_idx = self._get_ordered_idx(mask_missing_values)\n self.n_features_with_missing_ = len(ordered_idx)\n\n abs_corr_mat = self._get_abs_corr_mat(Xt)\n\n n_samples, n_features = Xt.shape\n if self.verbose > 0:\n print(\"[IterativeImputer] Completing matrix with shape %s\"\n % (X.shape,))\n start_t = time()\n if not self.sample_posterior:\n Xt_previous = Xt.copy()\n normalized_tol = self.tol * np.max(\n np.abs(X[~mask_missing_values])\n )\n for self.n_iter_ in range(1, self.max_iter + 1):\n if self.imputation_order == 'random':\n ordered_idx = self._get_ordered_idx(mask_missing_values)\n\n for feat_idx in ordered_idx:\n neighbor_feat_idx = self._get_neighbor_feat_idx(n_features,\n feat_idx,\n abs_corr_mat)\n Xt, estimator = self._impute_one_feature(\n Xt, mask_missing_values, feat_idx, neighbor_feat_idx,\n estimator=None, fit_mode=True)\n estimator_triplet = _ImputerTriplet(feat_idx,\n neighbor_feat_idx,\n estimator)\n self.imputation_sequence_.append(estimator_triplet)\n\n if self.verbose > 1:\n print('[IterativeImputer] Ending imputation round '\n '%d/%d, elapsed time %0.2f'\n % (self.n_iter_, self.max_iter, time() - start_t))\n\n if not self.sample_posterior:\n inf_norm = np.linalg.norm(Xt - Xt_previous, ord=np.inf,\n axis=None)\n if self.verbose > 0:\n print('[IterativeImputer] '\n 'Change: {}, scaled tolerance: {} '.format(\n inf_norm, normalized_tol))\n if inf_norm < normalized_tol:\n if self.verbose > 0:\n print('[IterativeImputer] Early stopping criterion '\n 'reached.')\n break\n Xt_previous = Xt.copy()\n else:\n if not self.sample_posterior:\n warnings.warn(\"[IterativeImputer] Early stopping criterion not\"\n \" reached.\", ConvergenceWarning)\n Xt[~mask_missing_values] = X[~mask_missing_values]\n return super()._concatenate_indicator(Xt, X_indicator)" + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/transform", + "name": "transform", + "qname": "sklearn.impute._iterative.IterativeImputer.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/transform/self", + "name": "self", + "qname": "sklearn.impute._iterative.IterativeImputer.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._iterative/IterativeImputer/transform/X", + "name": "X", + "qname": "sklearn.impute._iterative.IterativeImputer.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The input data to complete." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Imputes all missing values in X.\n\nNote that this is stochastic, and that if random_state is not fixed,\nrepeated calls, or permuted input, will yield different results.", + "docstring": "Imputes all missing values in X.\n\nNote that this is stochastic, and that if random_state is not fixed,\nrepeated calls, or permuted input, will yield different results.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input data to complete.\n\nReturns\n-------\nXt : array-like, shape (n_samples, n_features)\n The imputed input data.", + "code": " def transform(self, X):\n \"\"\"Imputes all missing values in X.\n\n Note that this is stochastic, and that if random_state is not fixed,\n repeated calls, or permuted input, will yield different results.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input data to complete.\n\n Returns\n -------\n Xt : array-like, shape (n_samples, n_features)\n The imputed input data.\n \"\"\"\n check_is_fitted(self)\n\n X, Xt, mask_missing_values, complete_mask = self._initial_imputation(X)\n\n X_indicator = super()._transform_indicator(complete_mask)\n\n if self.n_iter_ == 0 or np.all(mask_missing_values):\n return super()._concatenate_indicator(Xt, X_indicator)\n\n imputations_per_round = len(self.imputation_sequence_) // self.n_iter_\n i_rnd = 0\n if self.verbose > 0:\n print(\"[IterativeImputer] Completing matrix with shape %s\"\n % (X.shape,))\n start_t = time()\n for it, estimator_triplet in enumerate(self.imputation_sequence_):\n Xt, _ = self._impute_one_feature(\n Xt,\n mask_missing_values,\n estimator_triplet.feat_idx,\n estimator_triplet.neighbor_feat_idx,\n estimator=estimator_triplet.estimator,\n fit_mode=False\n )\n if not (it + 1) % imputations_per_round:\n if self.verbose > 1:\n print('[IterativeImputer] Ending imputation round '\n '%d/%d, elapsed time %0.2f'\n % (i_rnd + 1, self.n_iter_, time() - start_t))\n i_rnd += 1\n\n Xt[~mask_missing_values] = X[~mask_missing_values]\n\n return super()._concatenate_indicator(Xt, X_indicator)" + }, + { + "id": "scikit-learn/sklearn.impute._knn/KNNImputer/__init__", + "name": "__init__", + "qname": "sklearn.impute._knn.KNNImputer.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._knn/KNNImputer/__init__/self", + "name": "self", + "qname": "sklearn.impute._knn.KNNImputer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._knn/KNNImputer/__init__/missing_values", + "name": "missing_values", + "qname": "sklearn.impute._knn.KNNImputer.__init__.missing_values", + "default_value": "np.nan", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, float, str, np.nan or None", + "default_value": "np.nan", + "description": "The placeholder for the missing values. All occurrences of\n`missing_values` will be imputed. For pandas' dataframes with\nnullable integer dtypes with missing values, `missing_values`\nshould be set to np.nan, since `pd.NA` will be converted to np.nan." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "np.nan" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.impute._knn/KNNImputer/__init__/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.impute._knn.KNNImputer.__init__.n_neighbors", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "Number of neighboring samples to use for imputation." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.impute._knn/KNNImputer/__init__/weights", + "name": "weights", + "qname": "sklearn.impute._knn.KNNImputer.__init__.weights", + "default_value": "'uniform'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'uniform', 'distance'} or callable", + "default_value": "'uniform'", + "description": "Weight function used in prediction. Possible values:\n\n- 'uniform' : uniform weights. All points in each neighborhood are\n weighted equally.\n- 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n- callable : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["distance", "uniform"] + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.impute._knn/KNNImputer/__init__/metric", + "name": "metric", + "qname": "sklearn.impute._knn.KNNImputer.__init__.metric", + "default_value": "'nan_euclidean'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'nan_euclidean'} or callable", + "default_value": "'nan_euclidean'", + "description": "Distance metric for searching neighbors. Possible values:\n\n- 'nan_euclidean'\n- callable : a user-defined function which conforms to the definition\n of ``_pairwise_callable(X, Y, metric, **kwds)``. The function\n accepts two arrays, X and Y, and a `missing_values` keyword in\n `kwds` and returns a scalar distance value." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["nan_euclidean"] + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.impute._knn/KNNImputer/__init__/copy", + "name": "copy", + "qname": "sklearn.impute._knn.KNNImputer.__init__.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, a copy of X will be created. If False, imputation will\nbe done in-place whenever possible." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.impute._knn/KNNImputer/__init__/add_indicator", + "name": "add_indicator", + "qname": "sklearn.impute._knn.KNNImputer.__init__.add_indicator", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, a :class:`MissingIndicator` transform will stack onto the\noutput of the imputer's transform. This allows a predictive estimator\nto account for missingness despite imputation. If a feature has no\nmissing values at fit/train time, the feature won't appear on the\nmissing indicator even if there are missing values at transform/test\ntime." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Imputation for completing missing values using k-Nearest Neighbors.\n\nEach sample's missing values are imputed using the mean value from\n`n_neighbors` nearest neighbors found in the training set. Two samples are\nclose if the features that neither is missing are close.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, missing_values=np.nan, n_neighbors=5,\n weights=\"uniform\", metric=\"nan_euclidean\", copy=True,\n add_indicator=False):\n super().__init__(\n missing_values=missing_values,\n add_indicator=add_indicator\n )\n self.n_neighbors = n_neighbors\n self.weights = weights\n self.metric = metric\n self.copy = copy" + }, + { + "id": "scikit-learn/sklearn.impute._knn/KNNImputer/_calc_impute", + "name": "_calc_impute", + "qname": "sklearn.impute._knn.KNNImputer._calc_impute", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._knn/KNNImputer/_calc_impute/self", + "name": "self", + "qname": "sklearn.impute._knn.KNNImputer._calc_impute.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._knn/KNNImputer/_calc_impute/dist_pot_donors", + "name": "dist_pot_donors", + "qname": "sklearn.impute._knn.KNNImputer._calc_impute.dist_pot_donors", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_receivers, n_potential_donors)", + "default_value": "", + "description": "Distance matrix between the receivers and potential donors from\ntraining set. There must be at least one non-nan distance between\na receiver and a potential donor." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_receivers, n_potential_donors)" + } + }, + { + "id": "scikit-learn/sklearn.impute._knn/KNNImputer/_calc_impute/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.impute._knn.KNNImputer._calc_impute.n_neighbors", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of neighbors to consider." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.impute._knn/KNNImputer/_calc_impute/fit_X_col", + "name": "fit_X_col", + "qname": "sklearn.impute._knn.KNNImputer._calc_impute.fit_X_col", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_potential_donors,)", + "default_value": "", + "description": "Column of potential donors from training set." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_potential_donors,)" + } + }, + { + "id": "scikit-learn/sklearn.impute._knn/KNNImputer/_calc_impute/mask_fit_X_col", + "name": "mask_fit_X_col", + "qname": "sklearn.impute._knn.KNNImputer._calc_impute.mask_fit_X_col", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_potential_donors,)", + "default_value": "", + "description": "Missing mask for fit_X_col." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_potential_donors,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Helper function to impute a single column.", + "docstring": "Helper function to impute a single column.\n\nParameters\n----------\ndist_pot_donors : ndarray of shape (n_receivers, n_potential_donors)\n Distance matrix between the receivers and potential donors from\n training set. There must be at least one non-nan distance between\n a receiver and a potential donor.\n\nn_neighbors : int\n Number of neighbors to consider.\n\nfit_X_col : ndarray of shape (n_potential_donors,)\n Column of potential donors from training set.\n\nmask_fit_X_col : ndarray of shape (n_potential_donors,)\n Missing mask for fit_X_col.\n\nReturns\n-------\nimputed_values: ndarray of shape (n_receivers,)\n Imputed values for receiver.", + "code": " def _calc_impute(self, dist_pot_donors, n_neighbors,\n fit_X_col, mask_fit_X_col):\n \"\"\"Helper function to impute a single column.\n\n Parameters\n ----------\n dist_pot_donors : ndarray of shape (n_receivers, n_potential_donors)\n Distance matrix between the receivers and potential donors from\n training set. There must be at least one non-nan distance between\n a receiver and a potential donor.\n\n n_neighbors : int\n Number of neighbors to consider.\n\n fit_X_col : ndarray of shape (n_potential_donors,)\n Column of potential donors from training set.\n\n mask_fit_X_col : ndarray of shape (n_potential_donors,)\n Missing mask for fit_X_col.\n\n Returns\n -------\n imputed_values: ndarray of shape (n_receivers,)\n Imputed values for receiver.\n \"\"\"\n # Get donors\n donors_idx = np.argpartition(dist_pot_donors, n_neighbors - 1,\n axis=1)[:, :n_neighbors]\n\n # Get weight matrix from from distance matrix\n donors_dist = dist_pot_donors[\n np.arange(donors_idx.shape[0])[:, None], donors_idx]\n\n weight_matrix = _get_weights(donors_dist, self.weights)\n\n # fill nans with zeros\n if weight_matrix is not None:\n weight_matrix[np.isnan(weight_matrix)] = 0.0\n\n # Retrieve donor values and calculate kNN average\n donors = fit_X_col.take(donors_idx)\n donors_mask = mask_fit_X_col.take(donors_idx)\n donors = np.ma.array(donors, mask=donors_mask)\n\n return np.ma.average(donors, axis=1, weights=weight_matrix).data" + }, + { + "id": "scikit-learn/sklearn.impute._knn/KNNImputer/fit", + "name": "fit", + "qname": "sklearn.impute._knn.KNNImputer.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._knn/KNNImputer/fit/self", + "name": "self", + "qname": "sklearn.impute._knn.KNNImputer.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._knn/KNNImputer/fit/X", + "name": "X", + "qname": "sklearn.impute._knn.KNNImputer.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like shape of (n_samples, n_features)", + "default_value": "", + "description": "Input data, where `n_samples` is the number of samples and\n`n_features` is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like shape of (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.impute._knn/KNNImputer/fit/y", + "name": "y", + "qname": "sklearn.impute._knn.KNNImputer.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the imputer on X.", + "docstring": "Fit the imputer on X.\n\nParameters\n----------\nX : array-like shape of (n_samples, n_features)\n Input data, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the imputer on X.\n\n Parameters\n ----------\n X : array-like shape of (n_samples, n_features)\n Input data, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\n Returns\n -------\n self : object\n \"\"\"\n # Check data integrity and calling arguments\n if not is_scalar_nan(self.missing_values):\n force_all_finite = True\n else:\n force_all_finite = \"allow-nan\"\n if self.metric not in _NAN_METRICS and not callable(self.metric):\n raise ValueError(\n \"The selected metric does not support NaN values\")\n if self.n_neighbors <= 0:\n raise ValueError(\n \"Expected n_neighbors > 0. Got {}\".format(self.n_neighbors))\n\n X = self._validate_data(X, accept_sparse=False, dtype=FLOAT_DTYPES,\n force_all_finite=force_all_finite,\n copy=self.copy)\n\n _check_weights(self.weights)\n self._fit_X = X\n self._mask_fit_X = _get_mask(self._fit_X, self.missing_values)\n\n super()._fit_indicator(self._mask_fit_X)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.impute._knn/KNNImputer/transform", + "name": "transform", + "qname": "sklearn.impute._knn.KNNImputer.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.impute._knn/KNNImputer/transform/self", + "name": "self", + "qname": "sklearn.impute._knn.KNNImputer.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.impute._knn/KNNImputer/transform/X", + "name": "X", + "qname": "sklearn.impute._knn.KNNImputer.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The input data to complete." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Impute all missing values in X.", + "docstring": "Impute all missing values in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input data to complete.\n\nReturns\n-------\nX : array-like of shape (n_samples, n_output_features)\n The imputed dataset. `n_output_features` is the number of features\n that is not always missing during `fit`.", + "code": " def transform(self, X):\n \"\"\"Impute all missing values in X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input data to complete.\n\n Returns\n -------\n X : array-like of shape (n_samples, n_output_features)\n The imputed dataset. `n_output_features` is the number of features\n that is not always missing during `fit`.\n \"\"\"\n\n check_is_fitted(self)\n if not is_scalar_nan(self.missing_values):\n force_all_finite = True\n else:\n force_all_finite = \"allow-nan\"\n X = self._validate_data(X, accept_sparse=False, dtype=FLOAT_DTYPES,\n force_all_finite=force_all_finite,\n copy=self.copy, reset=False)\n\n mask = _get_mask(X, self.missing_values)\n mask_fit_X = self._mask_fit_X\n valid_mask = ~np.all(mask_fit_X, axis=0)\n\n X_indicator = super()._transform_indicator(mask)\n\n # Removes columns where the training data is all nan\n if not np.any(mask):\n # No missing values in X\n # Remove columns where the training data is all nan\n return X[:, valid_mask]\n\n row_missing_idx = np.flatnonzero(mask.any(axis=1))\n\n non_missing_fix_X = np.logical_not(mask_fit_X)\n\n # Maps from indices from X to indices in dist matrix\n dist_idx_map = np.zeros(X.shape[0], dtype=int)\n dist_idx_map[row_missing_idx] = np.arange(row_missing_idx.shape[0])\n\n def process_chunk(dist_chunk, start):\n row_missing_chunk = row_missing_idx[start:start + len(dist_chunk)]\n\n # Find and impute missing by column\n for col in range(X.shape[1]):\n if not valid_mask[col]:\n # column was all missing during training\n continue\n\n col_mask = mask[row_missing_chunk, col]\n if not np.any(col_mask):\n # column has no missing values\n continue\n\n potential_donors_idx, = np.nonzero(non_missing_fix_X[:, col])\n\n # receivers_idx are indices in X\n receivers_idx = row_missing_chunk[np.flatnonzero(col_mask)]\n\n # distances for samples that needed imputation for column\n dist_subset = (dist_chunk[dist_idx_map[receivers_idx] - start]\n [:, potential_donors_idx])\n\n # receivers with all nan distances impute with mean\n all_nan_dist_mask = np.isnan(dist_subset).all(axis=1)\n all_nan_receivers_idx = receivers_idx[all_nan_dist_mask]\n\n if all_nan_receivers_idx.size:\n col_mean = np.ma.array(self._fit_X[:, col],\n mask=mask_fit_X[:, col]).mean()\n X[all_nan_receivers_idx, col] = col_mean\n\n if len(all_nan_receivers_idx) == len(receivers_idx):\n # all receivers imputed with mean\n continue\n\n # receivers with at least one defined distance\n receivers_idx = receivers_idx[~all_nan_dist_mask]\n dist_subset = (dist_chunk[dist_idx_map[receivers_idx]\n - start]\n [:, potential_donors_idx])\n\n n_neighbors = min(self.n_neighbors, len(potential_donors_idx))\n value = self._calc_impute(\n dist_subset,\n n_neighbors,\n self._fit_X[potential_donors_idx, col],\n mask_fit_X[potential_donors_idx, col])\n X[receivers_idx, col] = value\n\n # process in fixed-memory chunks\n gen = pairwise_distances_chunked(\n X[row_missing_idx, :],\n self._fit_X,\n metric=self.metric,\n missing_values=self.missing_values,\n force_all_finite=force_all_finite,\n reduce_func=process_chunk)\n for chunk in gen:\n # process_chunk modifies X in place. No return value.\n pass\n\n return super()._concatenate_indicator(X[:, valid_mask], X_indicator)" + }, + { + "id": "scikit-learn/sklearn.inspection._partial_dependence/_grid_from_X", + "name": "_grid_from_X", + "qname": "sklearn.inspection._partial_dependence._grid_from_X", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.inspection._partial_dependence/_grid_from_X/X", + "name": "X", + "qname": "sklearn.inspection._partial_dependence._grid_from_X.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (n_samples, n_target_features)", + "default_value": "", + "description": "The data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_target_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.inspection._partial_dependence/_grid_from_X/percentiles", + "name": "percentiles", + "qname": "sklearn.inspection._partial_dependence._grid_from_X.percentiles", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "tuple of floats", + "default_value": "", + "description": "The percentiles which are used to construct the extreme values of\nthe grid. Must be in [0, 1]." + }, + "type": { + "kind": "NamedType", + "name": "tuple of floats" + } + }, + { + "id": "scikit-learn/sklearn.inspection._partial_dependence/_grid_from_X/grid_resolution", + "name": "grid_resolution", + "qname": "sklearn.inspection._partial_dependence._grid_from_X.grid_resolution", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The number of equally spaced points to be placed on the grid for each\nfeature." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate a grid of points based on the percentiles of X.\n\nThe grid is a cartesian product between the columns of ``values``. The\nith column of ``values`` consists in ``grid_resolution`` equally-spaced\npoints between the percentiles of the jth column of X.\nIf ``grid_resolution`` is bigger than the number of unique values in the\njth column of X, then those unique values will be used instead.", + "docstring": "Generate a grid of points based on the percentiles of X.\n\nThe grid is a cartesian product between the columns of ``values``. The\nith column of ``values`` consists in ``grid_resolution`` equally-spaced\npoints between the percentiles of the jth column of X.\nIf ``grid_resolution`` is bigger than the number of unique values in the\njth column of X, then those unique values will be used instead.\n\nParameters\n----------\nX : ndarray, shape (n_samples, n_target_features)\n The data.\n\npercentiles : tuple of floats\n The percentiles which are used to construct the extreme values of\n the grid. Must be in [0, 1].\n\ngrid_resolution : int\n The number of equally spaced points to be placed on the grid for each\n feature.\n\nReturns\n-------\ngrid : ndarray, shape (n_points, n_target_features)\n A value for each feature at each point in the grid. ``n_points`` is\n always ``<= grid_resolution ** X.shape[1]``.\n\nvalues : list of 1d ndarrays\n The values with which the grid has been created. The size of each\n array ``values[j]`` is either ``grid_resolution``, or the number of\n unique values in ``X[:, j]``, whichever is smaller.", + "code": "def _grid_from_X(X, percentiles, grid_resolution):\n \"\"\"Generate a grid of points based on the percentiles of X.\n\n The grid is a cartesian product between the columns of ``values``. The\n ith column of ``values`` consists in ``grid_resolution`` equally-spaced\n points between the percentiles of the jth column of X.\n If ``grid_resolution`` is bigger than the number of unique values in the\n jth column of X, then those unique values will be used instead.\n\n Parameters\n ----------\n X : ndarray, shape (n_samples, n_target_features)\n The data.\n\n percentiles : tuple of floats\n The percentiles which are used to construct the extreme values of\n the grid. Must be in [0, 1].\n\n grid_resolution : int\n The number of equally spaced points to be placed on the grid for each\n feature.\n\n Returns\n -------\n grid : ndarray, shape (n_points, n_target_features)\n A value for each feature at each point in the grid. ``n_points`` is\n always ``<= grid_resolution ** X.shape[1]``.\n\n values : list of 1d ndarrays\n The values with which the grid has been created. The size of each\n array ``values[j]`` is either ``grid_resolution``, or the number of\n unique values in ``X[:, j]``, whichever is smaller.\n \"\"\"\n if not isinstance(percentiles, Iterable) or len(percentiles) != 2:\n raise ValueError(\"'percentiles' must be a sequence of 2 elements.\")\n if not all(0 <= x <= 1 for x in percentiles):\n raise ValueError(\"'percentiles' values must be in [0, 1].\")\n if percentiles[0] >= percentiles[1]:\n raise ValueError('percentiles[0] must be strictly less '\n 'than percentiles[1].')\n\n if grid_resolution <= 1:\n raise ValueError(\"'grid_resolution' must be strictly greater than 1.\")\n\n values = []\n for feature in range(X.shape[1]):\n uniques = np.unique(_safe_indexing(X, feature, axis=1))\n if uniques.shape[0] < grid_resolution:\n # feature has low resolution use unique vals\n axis = uniques\n else:\n # create axis based on percentiles and grid resolution\n emp_percentiles = mquantiles(\n _safe_indexing(X, feature, axis=1), prob=percentiles, axis=0\n )\n if np.allclose(emp_percentiles[0], emp_percentiles[1]):\n raise ValueError(\n 'percentiles are too close to each other, '\n 'unable to build the grid. Please choose percentiles '\n 'that are further apart.')\n axis = np.linspace(emp_percentiles[0],\n emp_percentiles[1],\n num=grid_resolution, endpoint=True)\n values.append(axis)\n\n return cartesian(values), values" + }, + { + "id": "scikit-learn/sklearn.inspection._partial_dependence/_partial_dependence_brute", + "name": "_partial_dependence_brute", + "qname": "sklearn.inspection._partial_dependence._partial_dependence_brute", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.inspection._partial_dependence/_partial_dependence_brute/est", + "name": "est", + "qname": "sklearn.inspection._partial_dependence._partial_dependence_brute.est", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection._partial_dependence/_partial_dependence_brute/grid", + "name": "grid", + "qname": "sklearn.inspection._partial_dependence._partial_dependence_brute.grid", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection._partial_dependence/_partial_dependence_brute/features", + "name": "features", + "qname": "sklearn.inspection._partial_dependence._partial_dependence_brute.features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection._partial_dependence/_partial_dependence_brute/X", + "name": "X", + "qname": "sklearn.inspection._partial_dependence._partial_dependence_brute.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection._partial_dependence/_partial_dependence_brute/response_method", + "name": "response_method", + "qname": "sklearn.inspection._partial_dependence._partial_dependence_brute.response_method", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _partial_dependence_brute(est, grid, features, X, response_method):\n\n predictions = []\n averaged_predictions = []\n\n # define the prediction_method (predict, predict_proba, decision_function).\n if is_regressor(est):\n prediction_method = est.predict\n else:\n predict_proba = getattr(est, 'predict_proba', None)\n decision_function = getattr(est, 'decision_function', None)\n if response_method == 'auto':\n # try predict_proba, then decision_function if it doesn't exist\n prediction_method = predict_proba or decision_function\n else:\n prediction_method = (predict_proba if response_method ==\n 'predict_proba' else decision_function)\n if prediction_method is None:\n if response_method == 'auto':\n raise ValueError(\n 'The estimator has no predict_proba and no '\n 'decision_function method.'\n )\n elif response_method == 'predict_proba':\n raise ValueError('The estimator has no predict_proba method.')\n else:\n raise ValueError(\n 'The estimator has no decision_function method.')\n\n for new_values in grid:\n X_eval = X.copy()\n for i, variable in enumerate(features):\n if hasattr(X_eval, 'iloc'):\n X_eval.iloc[:, variable] = new_values[i]\n else:\n X_eval[:, variable] = new_values[i]\n\n try:\n # Note: predictions is of shape\n # (n_points,) for non-multioutput regressors\n # (n_points, n_tasks) for multioutput regressors\n # (n_points, 1) for the regressors in cross_decomposition (I think)\n # (n_points, 2) for binary classification\n # (n_points, n_classes) for multiclass classification\n pred = prediction_method(X_eval)\n\n predictions.append(pred)\n # average over samples\n averaged_predictions.append(np.mean(pred, axis=0))\n except NotFittedError as e:\n raise ValueError(\n \"'estimator' parameter must be a fitted estimator\") from e\n\n n_samples = X.shape[0]\n\n # reshape to (n_targets, n_instances, n_points) where n_targets is:\n # - 1 for non-multioutput regression and binary classification (shape is\n # already correct in those cases)\n # - n_tasks for multi-output regression\n # - n_classes for multiclass classification.\n predictions = np.array(predictions).T\n if is_regressor(est) and predictions.ndim == 2:\n # non-multioutput regression, shape is (n_instances, n_points,)\n predictions = predictions.reshape(n_samples, -1)\n elif is_classifier(est) and predictions.shape[0] == 2:\n # Binary classification, shape is (2, n_instances, n_points).\n # we output the effect of **positive** class\n predictions = predictions[1]\n predictions = predictions.reshape(n_samples, -1)\n\n # reshape averaged_predictions to (n_targets, n_points) where n_targets is:\n # - 1 for non-multioutput regression and binary classification (shape is\n # already correct in those cases)\n # - n_tasks for multi-output regression\n # - n_classes for multiclass classification.\n averaged_predictions = np.array(averaged_predictions).T\n if is_regressor(est) and averaged_predictions.ndim == 1:\n # non-multioutput regression, shape is (n_points,)\n averaged_predictions = averaged_predictions.reshape(1, -1)\n elif is_classifier(est) and averaged_predictions.shape[0] == 2:\n # Binary classification, shape is (2, n_points).\n # we output the effect of **positive** class\n averaged_predictions = averaged_predictions[1]\n averaged_predictions = averaged_predictions.reshape(1, -1)\n\n return averaged_predictions, predictions" + }, + { + "id": "scikit-learn/sklearn.inspection._partial_dependence/_partial_dependence_recursion", + "name": "_partial_dependence_recursion", + "qname": "sklearn.inspection._partial_dependence._partial_dependence_recursion", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.inspection._partial_dependence/_partial_dependence_recursion/est", + "name": "est", + "qname": "sklearn.inspection._partial_dependence._partial_dependence_recursion.est", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection._partial_dependence/_partial_dependence_recursion/grid", + "name": "grid", + "qname": "sklearn.inspection._partial_dependence._partial_dependence_recursion.grid", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection._partial_dependence/_partial_dependence_recursion/features", + "name": "features", + "qname": "sklearn.inspection._partial_dependence._partial_dependence_recursion.features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _partial_dependence_recursion(est, grid, features):\n averaged_predictions = est._compute_partial_dependence_recursion(grid,\n features)\n if averaged_predictions.ndim == 1:\n # reshape to (1, n_points) for consistency with\n # _partial_dependence_brute\n averaged_predictions = averaged_predictions.reshape(1, -1)\n\n return averaged_predictions" + }, + { + "id": "scikit-learn/sklearn.inspection._partial_dependence/partial_dependence", + "name": "partial_dependence", + "qname": "sklearn.inspection._partial_dependence.partial_dependence", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.inspection._partial_dependence/partial_dependence/estimator", + "name": "estimator", + "qname": "sklearn.inspection._partial_dependence.partial_dependence.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "BaseEstimator", + "default_value": "", + "description": "A fitted estimator object implementing :term:`predict`,\n:term:`predict_proba`, or :term:`decision_function`.\nMultioutput-multiclass classifiers are not supported." + }, + "type": { + "kind": "NamedType", + "name": "BaseEstimator" + } + }, + { + "id": "scikit-learn/sklearn.inspection._partial_dependence/partial_dependence/X", + "name": "X", + "qname": "sklearn.inspection._partial_dependence.partial_dependence.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like or dataframe} of shape (n_samples, n_features)", + "default_value": "", + "description": "``X`` is used to generate a grid of values for the target\n``features`` (where the partial dependence will be evaluated), and\nalso to generate values for the complement features when the\n`method` is 'brute'." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.inspection._partial_dependence/partial_dependence/features", + "name": "features", + "qname": "sklearn.inspection._partial_dependence.partial_dependence.features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of {int, str}", + "default_value": "", + "description": "The feature (e.g. `[0]`) or pair of interacting features\n(e.g. `[(0, 1)]`) for which the partial dependency should be computed." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "array-like of" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.inspection._partial_dependence/partial_dependence/response_method", + "name": "response_method", + "qname": "sklearn.inspection._partial_dependence.partial_dependence.response_method", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'predict_proba', 'decision_function'}", + "default_value": "'auto'", + "description": "Specifies whether to use :term:`predict_proba` or\n:term:`decision_function` as the target response. For regressors\nthis parameter is ignored and the response is always the output of\n:term:`predict`. By default, :term:`predict_proba` is tried first\nand we revert to :term:`decision_function` if it doesn't exist. If\n``method`` is 'recursion', the response is always the output of\n:term:`decision_function`." + }, + "type": { + "kind": "EnumType", + "values": ["predict_proba", "auto", "decision_function"] + } + }, + { + "id": "scikit-learn/sklearn.inspection._partial_dependence/partial_dependence/percentiles", + "name": "percentiles", + "qname": "sklearn.inspection._partial_dependence.partial_dependence.percentiles", + "default_value": "(0.05, 0.95)", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "tuple of float", + "default_value": "(0.05, 0.95)", + "description": "The lower and upper percentile used to create the extreme values\nfor the grid. Must be in [0, 1]." + }, + "type": { + "kind": "NamedType", + "name": "tuple of float" + } + }, + { + "id": "scikit-learn/sklearn.inspection._partial_dependence/partial_dependence/grid_resolution", + "name": "grid_resolution", + "qname": "sklearn.inspection._partial_dependence.partial_dependence.grid_resolution", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The number of equally spaced points on the grid, for each target\nfeature." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.inspection._partial_dependence/partial_dependence/method", + "name": "method", + "qname": "sklearn.inspection._partial_dependence.partial_dependence.method", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'recursion', 'brute'}", + "default_value": "'auto'", + "description": "The method used to calculate the averaged predictions:\n\n- `'recursion'` is only supported for some tree-based estimators\n (namely\n :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n :class:`~sklearn.tree.DecisionTreeRegressor`,\n :class:`~sklearn.ensemble.RandomForestRegressor`,\n ) when `kind='average'`.\n This is more efficient in terms of speed.\n With this method, the target response of a\n classifier is always the decision function, not the predicted\n probabilities. Since the `'recursion'` method implicitely computes\n the average of the Individual Conditional Expectation (ICE) by\n design, it is not compatible with ICE and thus `kind` must be\n `'average'`.\n\n- `'brute'` is supported for any estimator, but is more\n computationally intensive.\n\n- `'auto'`: the `'recursion'` is used for estimators that support it,\n and `'brute'` is used otherwise.\n\nPlease see :ref:`this note ` for\ndifferences between the `'brute'` and `'recursion'` method." + }, + "type": { + "kind": "EnumType", + "values": ["auto", "recursion", "brute"] + } + }, + { + "id": "scikit-learn/sklearn.inspection._partial_dependence/partial_dependence/kind", + "name": "kind", + "qname": "sklearn.inspection._partial_dependence.partial_dependence.kind", + "default_value": "'legacy'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'legacy', 'average', 'individual', 'both'}", + "default_value": "'legacy'", + "description": "Whether to return the partial dependence averaged across all the\nsamples in the dataset or one line per sample or both.\nSee Returns below.\n\nNote that the fast `method='recursion'` option is only available for\n`kind='average'`. Plotting individual dependencies requires using the\nslower `method='brute'` option.\n\n.. versionadded:: 0.24\n.. deprecated:: 0.24\n `kind='legacy'` is deprecated and will be removed in version 1.1.\n `kind='average'` will be the new default. It is intended to migrate\n from the ndarray output to :class:`~sklearn.utils.Bunch` output." + }, + "type": { + "kind": "EnumType", + "values": ["both", "average", "individual", "legacy"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Partial dependence of ``features``.\n\nPartial dependence of a feature (or a set of features) corresponds to\nthe average response of an estimator for each possible value of the\nfeature.\n\nRead more in the :ref:`User Guide `.\n\n.. warning::\n\n For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n `'recursion'` method (used by default) will not account for the `init`\n predictor of the boosting process. In practice, this will produce\n the same values as `'brute'` up to a constant offset in the target\n response, provided that `init` is a constant estimator (which is the\n default). However, if `init` is not a constant estimator, the\n partial dependence values are incorrect for `'recursion'` because the\n offset will be sample-dependent. It is preferable to use the `'brute'`\n method. Note that this only applies to\n :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.", + "docstring": "Partial dependence of ``features``.\n\nPartial dependence of a feature (or a set of features) corresponds to\nthe average response of an estimator for each possible value of the\nfeature.\n\nRead more in the :ref:`User Guide `.\n\n.. warning::\n\n For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n `'recursion'` method (used by default) will not account for the `init`\n predictor of the boosting process. In practice, this will produce\n the same values as `'brute'` up to a constant offset in the target\n response, provided that `init` is a constant estimator (which is the\n default). However, if `init` is not a constant estimator, the\n partial dependence values are incorrect for `'recursion'` because the\n offset will be sample-dependent. It is preferable to use the `'brute'`\n method. Note that this only applies to\n :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\nParameters\n----------\nestimator : BaseEstimator\n A fitted estimator object implementing :term:`predict`,\n :term:`predict_proba`, or :term:`decision_function`.\n Multioutput-multiclass classifiers are not supported.\n\nX : {array-like or dataframe} of shape (n_samples, n_features)\n ``X`` is used to generate a grid of values for the target\n ``features`` (where the partial dependence will be evaluated), and\n also to generate values for the complement features when the\n `method` is 'brute'.\n\nfeatures : array-like of {int, str}\n The feature (e.g. `[0]`) or pair of interacting features\n (e.g. `[(0, 1)]`) for which the partial dependency should be computed.\n\nresponse_method : {'auto', 'predict_proba', 'decision_function'}, default='auto'\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the target response. For regressors\n this parameter is ignored and the response is always the output of\n :term:`predict`. By default, :term:`predict_proba` is tried first\n and we revert to :term:`decision_function` if it doesn't exist. If\n ``method`` is 'recursion', the response is always the output of\n :term:`decision_function`.\n\npercentiles : tuple of float, default=(0.05, 0.95)\n The lower and upper percentile used to create the extreme values\n for the grid. Must be in [0, 1].\n\ngrid_resolution : int, default=100\n The number of equally spaced points on the grid, for each target\n feature.\n\nmethod : {'auto', 'recursion', 'brute'}, default='auto'\n The method used to calculate the averaged predictions:\n\n - `'recursion'` is only supported for some tree-based estimators\n (namely\n :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n :class:`~sklearn.tree.DecisionTreeRegressor`,\n :class:`~sklearn.ensemble.RandomForestRegressor`,\n ) when `kind='average'`.\n This is more efficient in terms of speed.\n With this method, the target response of a\n classifier is always the decision function, not the predicted\n probabilities. Since the `'recursion'` method implicitely computes\n the average of the Individual Conditional Expectation (ICE) by\n design, it is not compatible with ICE and thus `kind` must be\n `'average'`.\n\n - `'brute'` is supported for any estimator, but is more\n computationally intensive.\n\n - `'auto'`: the `'recursion'` is used for estimators that support it,\n and `'brute'` is used otherwise.\n\n Please see :ref:`this note ` for\n differences between the `'brute'` and `'recursion'` method.\n\nkind : {'legacy', 'average', 'individual', 'both'}, default='legacy'\n Whether to return the partial dependence averaged across all the\n samples in the dataset or one line per sample or both.\n See Returns below.\n\n Note that the fast `method='recursion'` option is only available for\n `kind='average'`. Plotting individual dependencies requires using the\n slower `method='brute'` option.\n\n .. versionadded:: 0.24\n .. deprecated:: 0.24\n `kind='legacy'` is deprecated and will be removed in version 1.1.\n `kind='average'` will be the new default. It is intended to migrate\n from the ndarray output to :class:`~sklearn.utils.Bunch` output.\n\n\nReturns\n-------\npredictions : ndarray or :class:`~sklearn.utils.Bunch`\n\n - if `kind='legacy'`, return value is ndarray of shape (n_outputs, len(values[0]), len(values[1]), ...)\n The predictions for all the points in the grid, averaged\n over all samples in X (or over the training data if ``method``\n is 'recursion').\n\n - if `kind='individual'`, `'average'` or `'both'`, return value is :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n individual : ndarray of shape (n_outputs, n_instances, len(values[0]), len(values[1]), ...)\n The predictions for all the points in the grid for all\n samples in X. This is also known as Individual\n Conditional Expectation (ICE)\n\n average : ndarray of shape (n_outputs, len(values[0]), len(values[1]), ...)\n The predictions for all the points in the grid, averaged\n over all samples in X (or over the training data if\n ``method`` is 'recursion').\n Only available when kind='both'.\n\n values : seq of 1d ndarrays\n The values with which the grid has been created. The generated\n grid is a cartesian product of the arrays in ``values``.\n ``len(values) == len(features)``. The size of each array\n ``values[j]`` is either ``grid_resolution``, or the number of\n unique values in ``X[:, j]``, whichever is smaller.\n\n ``n_outputs`` corresponds to the number of classes in a multi-class\n setting, or to the number of tasks for multi-output regression.\n For classical regression and binary classification ``n_outputs==1``.\n ``n_values_feature_j`` corresponds to the size ``values[j]``.\n\nvalues : seq of 1d ndarrays\n The values with which the grid has been created. The generated grid\n is a cartesian product of the arrays in ``values``. ``len(values) ==\n len(features)``. The size of each array ``values[j]`` is either\n ``grid_resolution``, or the number of unique values in ``X[:, j]``,\n whichever is smaller. Only available when `kind=\"legacy\"`.\n\nSee Also\n--------\nplot_partial_dependence : Plot Partial Dependence.\nPartialDependenceDisplay : Partial Dependence visualization.\n\nExamples\n--------\n>>> X = [[0, 0, 2], [1, 0, 0]]\n>>> y = [0, 1]\n>>> from sklearn.ensemble import GradientBoostingClassifier\n>>> gb = GradientBoostingClassifier(random_state=0).fit(X, y)\n>>> partial_dependence(gb, features=[0], X=X, percentiles=(0, 1),\n... grid_resolution=2) # doctest: +SKIP\n(array([[-4.52..., 4.52...]]), [array([ 0., 1.])])", + "code": "@_deprecate_positional_args\ndef partial_dependence(estimator, X, features, *, response_method='auto',\n percentiles=(0.05, 0.95), grid_resolution=100,\n method='auto', kind='legacy'):\n \"\"\"Partial dependence of ``features``.\n\n Partial dependence of a feature (or a set of features) corresponds to\n the average response of an estimator for each possible value of the\n feature.\n\n Read more in the :ref:`User Guide `.\n\n .. warning::\n\n For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n `'recursion'` method (used by default) will not account for the `init`\n predictor of the boosting process. In practice, this will produce\n the same values as `'brute'` up to a constant offset in the target\n response, provided that `init` is a constant estimator (which is the\n default). However, if `init` is not a constant estimator, the\n partial dependence values are incorrect for `'recursion'` because the\n offset will be sample-dependent. It is preferable to use the `'brute'`\n method. Note that this only applies to\n :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\n Parameters\n ----------\n estimator : BaseEstimator\n A fitted estimator object implementing :term:`predict`,\n :term:`predict_proba`, or :term:`decision_function`.\n Multioutput-multiclass classifiers are not supported.\n\n X : {array-like or dataframe} of shape (n_samples, n_features)\n ``X`` is used to generate a grid of values for the target\n ``features`` (where the partial dependence will be evaluated), and\n also to generate values for the complement features when the\n `method` is 'brute'.\n\n features : array-like of {int, str}\n The feature (e.g. `[0]`) or pair of interacting features\n (e.g. `[(0, 1)]`) for which the partial dependency should be computed.\n\n response_method : {'auto', 'predict_proba', 'decision_function'}, \\\n default='auto'\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the target response. For regressors\n this parameter is ignored and the response is always the output of\n :term:`predict`. By default, :term:`predict_proba` is tried first\n and we revert to :term:`decision_function` if it doesn't exist. If\n ``method`` is 'recursion', the response is always the output of\n :term:`decision_function`.\n\n percentiles : tuple of float, default=(0.05, 0.95)\n The lower and upper percentile used to create the extreme values\n for the grid. Must be in [0, 1].\n\n grid_resolution : int, default=100\n The number of equally spaced points on the grid, for each target\n feature.\n\n method : {'auto', 'recursion', 'brute'}, default='auto'\n The method used to calculate the averaged predictions:\n\n - `'recursion'` is only supported for some tree-based estimators\n (namely\n :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n :class:`~sklearn.tree.DecisionTreeRegressor`,\n :class:`~sklearn.ensemble.RandomForestRegressor`,\n ) when `kind='average'`.\n This is more efficient in terms of speed.\n With this method, the target response of a\n classifier is always the decision function, not the predicted\n probabilities. Since the `'recursion'` method implicitely computes\n the average of the Individual Conditional Expectation (ICE) by\n design, it is not compatible with ICE and thus `kind` must be\n `'average'`.\n\n - `'brute'` is supported for any estimator, but is more\n computationally intensive.\n\n - `'auto'`: the `'recursion'` is used for estimators that support it,\n and `'brute'` is used otherwise.\n\n Please see :ref:`this note ` for\n differences between the `'brute'` and `'recursion'` method.\n\n kind : {'legacy', 'average', 'individual', 'both'}, default='legacy'\n Whether to return the partial dependence averaged across all the\n samples in the dataset or one line per sample or both.\n See Returns below.\n\n Note that the fast `method='recursion'` option is only available for\n `kind='average'`. Plotting individual dependencies requires using the\n slower `method='brute'` option.\n\n .. versionadded:: 0.24\n .. deprecated:: 0.24\n `kind='legacy'` is deprecated and will be removed in version 1.1.\n `kind='average'` will be the new default. It is intended to migrate\n from the ndarray output to :class:`~sklearn.utils.Bunch` output.\n\n\n Returns\n -------\n predictions : ndarray or :class:`~sklearn.utils.Bunch`\n\n - if `kind='legacy'`, return value is ndarray of shape (n_outputs, \\\n len(values[0]), len(values[1]), ...)\n The predictions for all the points in the grid, averaged\n over all samples in X (or over the training data if ``method``\n is 'recursion').\n\n - if `kind='individual'`, `'average'` or `'both'`, return value is \\\n :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n individual : ndarray of shape (n_outputs, n_instances, \\\n len(values[0]), len(values[1]), ...)\n The predictions for all the points in the grid for all\n samples in X. This is also known as Individual\n Conditional Expectation (ICE)\n\n average : ndarray of shape (n_outputs, len(values[0]), \\\n len(values[1]), ...)\n The predictions for all the points in the grid, averaged\n over all samples in X (or over the training data if\n ``method`` is 'recursion').\n Only available when kind='both'.\n\n values : seq of 1d ndarrays\n The values with which the grid has been created. The generated\n grid is a cartesian product of the arrays in ``values``.\n ``len(values) == len(features)``. The size of each array\n ``values[j]`` is either ``grid_resolution``, or the number of\n unique values in ``X[:, j]``, whichever is smaller.\n\n ``n_outputs`` corresponds to the number of classes in a multi-class\n setting, or to the number of tasks for multi-output regression.\n For classical regression and binary classification ``n_outputs==1``.\n ``n_values_feature_j`` corresponds to the size ``values[j]``.\n\n values : seq of 1d ndarrays\n The values with which the grid has been created. The generated grid\n is a cartesian product of the arrays in ``values``. ``len(values) ==\n len(features)``. The size of each array ``values[j]`` is either\n ``grid_resolution``, or the number of unique values in ``X[:, j]``,\n whichever is smaller. Only available when `kind=\"legacy\"`.\n\n See Also\n --------\n plot_partial_dependence : Plot Partial Dependence.\n PartialDependenceDisplay : Partial Dependence visualization.\n\n Examples\n --------\n >>> X = [[0, 0, 2], [1, 0, 0]]\n >>> y = [0, 1]\n >>> from sklearn.ensemble import GradientBoostingClassifier\n >>> gb = GradientBoostingClassifier(random_state=0).fit(X, y)\n >>> partial_dependence(gb, features=[0], X=X, percentiles=(0, 1),\n ... grid_resolution=2) # doctest: +SKIP\n (array([[-4.52..., 4.52...]]), [array([ 0., 1.])])\n \"\"\"\n if not (is_classifier(estimator) or is_regressor(estimator)):\n raise ValueError(\n \"'estimator' must be a fitted regressor or classifier.\"\n )\n\n if isinstance(estimator, Pipeline):\n # TODO: to be removed if/when pipeline get a `steps_` attributes\n # assuming Pipeline is the only estimator that does not store a new\n # attribute\n for est in estimator:\n # FIXME: remove the None option when it will be deprecated\n if est not in (None, 'drop'):\n check_is_fitted(est)\n else:\n check_is_fitted(estimator)\n\n if (is_classifier(estimator) and\n isinstance(estimator.classes_[0], np.ndarray)):\n raise ValueError(\n 'Multiclass-multioutput estimators are not supported'\n )\n\n # Use check_array only on lists and other non-array-likes / sparse. Do not\n # convert DataFrame into a NumPy array.\n if not(hasattr(X, '__array__') or sparse.issparse(X)):\n X = check_array(X, force_all_finite='allow-nan', dtype=object)\n\n accepted_responses = ('auto', 'predict_proba', 'decision_function')\n if response_method not in accepted_responses:\n raise ValueError(\n 'response_method {} is invalid. Accepted response_method names '\n 'are {}.'.format(response_method, ', '.join(accepted_responses)))\n\n if is_regressor(estimator) and response_method != 'auto':\n raise ValueError(\n \"The response_method parameter is ignored for regressors and \"\n \"must be 'auto'.\"\n )\n\n accepted_methods = ('brute', 'recursion', 'auto')\n if method not in accepted_methods:\n raise ValueError(\n 'method {} is invalid. Accepted method names are {}.'.format(\n method, ', '.join(accepted_methods)))\n\n if kind != 'average' and kind != 'legacy':\n if method == 'recursion':\n raise ValueError(\n \"The 'recursion' method only applies when 'kind' is set \"\n \"to 'average'\"\n )\n method = 'brute'\n\n if method == 'auto':\n if (isinstance(estimator, BaseGradientBoosting) and\n estimator.init is None):\n method = 'recursion'\n elif isinstance(estimator, (BaseHistGradientBoosting,\n DecisionTreeRegressor,\n RandomForestRegressor)):\n method = 'recursion'\n else:\n method = 'brute'\n\n if method == 'recursion':\n if not isinstance(estimator,\n (BaseGradientBoosting, BaseHistGradientBoosting,\n DecisionTreeRegressor, RandomForestRegressor)):\n supported_classes_recursion = (\n 'GradientBoostingClassifier',\n 'GradientBoostingRegressor',\n 'HistGradientBoostingClassifier',\n 'HistGradientBoostingRegressor',\n 'HistGradientBoostingRegressor',\n 'DecisionTreeRegressor',\n 'RandomForestRegressor',\n )\n raise ValueError(\n \"Only the following estimators support the 'recursion' \"\n \"method: {}. Try using method='brute'.\"\n .format(', '.join(supported_classes_recursion)))\n if response_method == 'auto':\n response_method = 'decision_function'\n\n if response_method != 'decision_function':\n raise ValueError(\n \"With the 'recursion' method, the response_method must be \"\n \"'decision_function'. Got {}.\".format(response_method)\n )\n\n if _determine_key_type(features, accept_slice=False) == 'int':\n # _get_column_indices() supports negative indexing. Here, we limit\n # the indexing to be positive. The upper bound will be checked\n # by _get_column_indices()\n if np.any(np.less(features, 0)):\n raise ValueError(\n 'all features must be in [0, {}]'.format(X.shape[1] - 1)\n )\n\n features_indices = np.asarray(\n _get_column_indices(X, features), dtype=np.int32, order='C'\n ).ravel()\n\n grid, values = _grid_from_X(\n _safe_indexing(X, features_indices, axis=1), percentiles,\n grid_resolution\n )\n\n if method == 'brute':\n averaged_predictions, predictions = _partial_dependence_brute(\n estimator, grid, features_indices, X, response_method\n )\n\n # reshape predictions to\n # (n_outputs, n_instances, n_values_feature_0, n_values_feature_1, ...)\n predictions = predictions.reshape(\n -1, X.shape[0], *[val.shape[0] for val in values]\n )\n else:\n averaged_predictions = _partial_dependence_recursion(\n estimator, grid, features_indices\n )\n\n # reshape averaged_predictions to\n # (n_outputs, n_values_feature_0, n_values_feature_1, ...)\n averaged_predictions = averaged_predictions.reshape(\n -1, *[val.shape[0] for val in values])\n\n if kind == 'legacy':\n warnings.warn(\n \"A Bunch will be returned in place of 'predictions' from version\"\n \" 1.1 (renaming of 0.26) with partial dependence results \"\n \"accessible via the 'average' key. In the meantime, pass \"\n \"kind='average' to get the future behaviour.\",\n FutureWarning\n )\n # TODO 1.1: Remove kind == 'legacy' section\n return averaged_predictions, values\n elif kind == 'average':\n return Bunch(average=averaged_predictions, values=values)\n elif kind == 'individual':\n return Bunch(individual=predictions, values=values)\n else: # kind='both'\n return Bunch(\n average=averaged_predictions, individual=predictions,\n values=values,\n )" + }, + { + "id": "scikit-learn/sklearn.inspection._permutation_importance/_calculate_permutation_scores", + "name": "_calculate_permutation_scores", + "qname": "sklearn.inspection._permutation_importance._calculate_permutation_scores", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.inspection._permutation_importance/_calculate_permutation_scores/estimator", + "name": "estimator", + "qname": "sklearn.inspection._permutation_importance._calculate_permutation_scores.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection._permutation_importance/_calculate_permutation_scores/X", + "name": "X", + "qname": "sklearn.inspection._permutation_importance._calculate_permutation_scores.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection._permutation_importance/_calculate_permutation_scores/y", + "name": "y", + "qname": "sklearn.inspection._permutation_importance._calculate_permutation_scores.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection._permutation_importance/_calculate_permutation_scores/sample_weight", + "name": "sample_weight", + "qname": "sklearn.inspection._permutation_importance._calculate_permutation_scores.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection._permutation_importance/_calculate_permutation_scores/col_idx", + "name": "col_idx", + "qname": "sklearn.inspection._permutation_importance._calculate_permutation_scores.col_idx", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection._permutation_importance/_calculate_permutation_scores/random_state", + "name": "random_state", + "qname": "sklearn.inspection._permutation_importance._calculate_permutation_scores.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection._permutation_importance/_calculate_permutation_scores/n_repeats", + "name": "n_repeats", + "qname": "sklearn.inspection._permutation_importance._calculate_permutation_scores.n_repeats", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection._permutation_importance/_calculate_permutation_scores/scorer", + "name": "scorer", + "qname": "sklearn.inspection._permutation_importance._calculate_permutation_scores.scorer", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Calculate score when `col_idx` is permuted.", + "docstring": "Calculate score when `col_idx` is permuted.", + "code": "def _calculate_permutation_scores(estimator, X, y, sample_weight, col_idx,\n random_state, n_repeats, scorer):\n \"\"\"Calculate score when `col_idx` is permuted.\"\"\"\n random_state = check_random_state(random_state)\n\n # Work on a copy of X to to ensure thread-safety in case of threading based\n # parallelism. Furthermore, making a copy is also useful when the joblib\n # backend is 'loky' (default) or the old 'multiprocessing': in those cases,\n # if X is large it will be automatically be backed by a readonly memory map\n # (memmap). X.copy() on the other hand is always guaranteed to return a\n # writable data-structure whose columns can be shuffled inplace.\n X_permuted = X.copy()\n scores = np.zeros(n_repeats)\n shuffling_idx = np.arange(X.shape[0])\n for n_round in range(n_repeats):\n random_state.shuffle(shuffling_idx)\n if hasattr(X_permuted, \"iloc\"):\n col = X_permuted.iloc[shuffling_idx, col_idx]\n col.index = X_permuted.index\n X_permuted.iloc[:, col_idx] = col\n else:\n X_permuted[:, col_idx] = X_permuted[shuffling_idx, col_idx]\n feature_score = _weights_scorer(\n scorer, estimator, X_permuted, y, sample_weight\n )\n scores[n_round] = feature_score\n\n return scores" + }, + { + "id": "scikit-learn/sklearn.inspection._permutation_importance/_weights_scorer", + "name": "_weights_scorer", + "qname": "sklearn.inspection._permutation_importance._weights_scorer", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.inspection._permutation_importance/_weights_scorer/scorer", + "name": "scorer", + "qname": "sklearn.inspection._permutation_importance._weights_scorer.scorer", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection._permutation_importance/_weights_scorer/estimator", + "name": "estimator", + "qname": "sklearn.inspection._permutation_importance._weights_scorer.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection._permutation_importance/_weights_scorer/X", + "name": "X", + "qname": "sklearn.inspection._permutation_importance._weights_scorer.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection._permutation_importance/_weights_scorer/y", + "name": "y", + "qname": "sklearn.inspection._permutation_importance._weights_scorer.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection._permutation_importance/_weights_scorer/sample_weight", + "name": "sample_weight", + "qname": "sklearn.inspection._permutation_importance._weights_scorer.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _weights_scorer(scorer, estimator, X, y, sample_weight):\n if sample_weight is not None:\n return scorer(estimator, X, y, sample_weight)\n return scorer(estimator, X, y)" + }, + { + "id": "scikit-learn/sklearn.inspection._permutation_importance/permutation_importance", + "name": "permutation_importance", + "qname": "sklearn.inspection._permutation_importance.permutation_importance", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.inspection._permutation_importance/permutation_importance/estimator", + "name": "estimator", + "qname": "sklearn.inspection._permutation_importance.permutation_importance.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "An estimator that has already been :term:`fitted` and is compatible\nwith :term:`scorer`." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.inspection._permutation_importance/permutation_importance/X", + "name": "X", + "qname": "sklearn.inspection._permutation_importance.permutation_importance.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray or DataFrame, shape (n_samples, n_features)", + "default_value": "", + "description": "Data on which permutation importance will be computed." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "DataFrame" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.inspection._permutation_importance/permutation_importance/y", + "name": "y", + "qname": "sklearn.inspection._permutation_importance.permutation_importance.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like or None, shape (n_samples, ) or (n_samples, n_classes)", + "default_value": "", + "description": "Targets for supervised or `None` for unsupervised." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "None" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, ) or (n_samples, n_classes)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.inspection._permutation_importance/permutation_importance/scoring", + "name": "scoring", + "qname": "sklearn.inspection._permutation_importance.permutation_importance.scoring", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "string, callable or None", + "default_value": "None", + "description": "Scorer to use. It can be a single\nstring (see :ref:`scoring_parameter`) or a callable (see\n:ref:`scoring`). If None, the estimator's default scorer is used." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "string" + }, + { + "kind": "NamedType", + "name": "callable" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.inspection._permutation_importance/permutation_importance/n_repeats", + "name": "n_repeats", + "qname": "sklearn.inspection._permutation_importance.permutation_importance.n_repeats", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "Number of times to permute a feature." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.inspection._permutation_importance/permutation_importance/n_jobs", + "name": "n_jobs", + "qname": "sklearn.inspection._permutation_importance.permutation_importance.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or None", + "default_value": "None", + "description": "Number of jobs to run in parallel. The computation is done by computing\npermutation score for each columns and parallelized over the columns.\n`None` means 1 unless in a :obj:`joblib.parallel_backend` context.\n`-1` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.inspection._permutation_importance/permutation_importance/random_state", + "name": "random_state", + "qname": "sklearn.inspection._permutation_importance.permutation_importance.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "Pseudo-random number generator to control the permutations of each\nfeature.\nPass an int to get reproducible results across function calls.\nSee :term: `Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.inspection._permutation_importance/permutation_importance/sample_weight", + "name": "sample_weight", + "qname": "sklearn.inspection._permutation_importance.permutation_importance.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights used in scoring.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Permutation importance for feature evaluation [BRE]_.\n\nThe :term:`estimator` is required to be a fitted estimator. `X` can be the\ndata set used to train the estimator or a hold-out set. The permutation\nimportance of a feature is calculated as follows. First, a baseline metric,\ndefined by :term:`scoring`, is evaluated on a (potentially different)\ndataset defined by the `X`. Next, a feature column from the validation set\nis permuted and the metric is evaluated again. The permutation importance\nis defined to be the difference between the baseline metric and metric from\npermutating the feature column.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Permutation importance for feature evaluation [BRE]_.\n\nThe :term:`estimator` is required to be a fitted estimator. `X` can be the\ndata set used to train the estimator or a hold-out set. The permutation\nimportance of a feature is calculated as follows. First, a baseline metric,\ndefined by :term:`scoring`, is evaluated on a (potentially different)\ndataset defined by the `X`. Next, a feature column from the validation set\nis permuted and the metric is evaluated again. The permutation importance\nis defined to be the difference between the baseline metric and metric from\npermutating the feature column.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : object\n An estimator that has already been :term:`fitted` and is compatible\n with :term:`scorer`.\n\nX : ndarray or DataFrame, shape (n_samples, n_features)\n Data on which permutation importance will be computed.\n\ny : array-like or None, shape (n_samples, ) or (n_samples, n_classes)\n Targets for supervised or `None` for unsupervised.\n\nscoring : string, callable or None, default=None\n Scorer to use. It can be a single\n string (see :ref:`scoring_parameter`) or a callable (see\n :ref:`scoring`). If None, the estimator's default scorer is used.\n\nn_repeats : int, default=5\n Number of times to permute a feature.\n\nn_jobs : int or None, default=None\n Number of jobs to run in parallel. The computation is done by computing\n permutation score for each columns and parallelized over the columns.\n `None` means 1 unless in a :obj:`joblib.parallel_backend` context.\n `-1` means using all processors. See :term:`Glossary `\n for more details.\n\nrandom_state : int, RandomState instance, default=None\n Pseudo-random number generator to control the permutations of each\n feature.\n Pass an int to get reproducible results across function calls.\n See :term: `Glossary `.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights used in scoring.\n\n .. versionadded:: 0.24\n\nReturns\n-------\nresult : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n importances_mean : ndarray, shape (n_features, )\n Mean of feature importance over `n_repeats`.\n importances_std : ndarray, shape (n_features, )\n Standard deviation over `n_repeats`.\n importances : ndarray, shape (n_features, n_repeats)\n Raw permutation importance scores.\n\nReferences\n----------\n.. [BRE] L. Breiman, \"Random Forests\", Machine Learning, 45(1), 5-32,\n 2001. https://doi.org/10.1023/A:1010933404324\n\nExamples\n--------\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.inspection import permutation_importance\n>>> X = [[1, 9, 9],[1, 9, 9],[1, 9, 9],\n... [0, 9, 9],[0, 9, 9],[0, 9, 9]]\n>>> y = [1, 1, 1, 0, 0, 0]\n>>> clf = LogisticRegression().fit(X, y)\n>>> result = permutation_importance(clf, X, y, n_repeats=10,\n... random_state=0)\n>>> result.importances_mean\narray([0.4666..., 0. , 0. ])\n>>> result.importances_std\narray([0.2211..., 0. , 0. ])", + "code": "@_deprecate_positional_args\ndef permutation_importance(estimator, X, y, *, scoring=None, n_repeats=5,\n n_jobs=None, random_state=None, sample_weight=None):\n \"\"\"Permutation importance for feature evaluation [BRE]_.\n\n The :term:`estimator` is required to be a fitted estimator. `X` can be the\n data set used to train the estimator or a hold-out set. The permutation\n importance of a feature is calculated as follows. First, a baseline metric,\n defined by :term:`scoring`, is evaluated on a (potentially different)\n dataset defined by the `X`. Next, a feature column from the validation set\n is permuted and the metric is evaluated again. The permutation importance\n is defined to be the difference between the baseline metric and metric from\n permutating the feature column.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n estimator : object\n An estimator that has already been :term:`fitted` and is compatible\n with :term:`scorer`.\n\n X : ndarray or DataFrame, shape (n_samples, n_features)\n Data on which permutation importance will be computed.\n\n y : array-like or None, shape (n_samples, ) or (n_samples, n_classes)\n Targets for supervised or `None` for unsupervised.\n\n scoring : string, callable or None, default=None\n Scorer to use. It can be a single\n string (see :ref:`scoring_parameter`) or a callable (see\n :ref:`scoring`). If None, the estimator's default scorer is used.\n\n n_repeats : int, default=5\n Number of times to permute a feature.\n\n n_jobs : int or None, default=None\n Number of jobs to run in parallel. The computation is done by computing\n permutation score for each columns and parallelized over the columns.\n `None` means 1 unless in a :obj:`joblib.parallel_backend` context.\n `-1` means using all processors. See :term:`Glossary `\n for more details.\n\n random_state : int, RandomState instance, default=None\n Pseudo-random number generator to control the permutations of each\n feature.\n Pass an int to get reproducible results across function calls.\n See :term: `Glossary `.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights used in scoring.\n\n .. versionadded:: 0.24\n\n Returns\n -------\n result : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n importances_mean : ndarray, shape (n_features, )\n Mean of feature importance over `n_repeats`.\n importances_std : ndarray, shape (n_features, )\n Standard deviation over `n_repeats`.\n importances : ndarray, shape (n_features, n_repeats)\n Raw permutation importance scores.\n\n References\n ----------\n .. [BRE] L. Breiman, \"Random Forests\", Machine Learning, 45(1), 5-32,\n 2001. https://doi.org/10.1023/A:1010933404324\n\n Examples\n --------\n >>> from sklearn.linear_model import LogisticRegression\n >>> from sklearn.inspection import permutation_importance\n >>> X = [[1, 9, 9],[1, 9, 9],[1, 9, 9],\n ... [0, 9, 9],[0, 9, 9],[0, 9, 9]]\n >>> y = [1, 1, 1, 0, 0, 0]\n >>> clf = LogisticRegression().fit(X, y)\n >>> result = permutation_importance(clf, X, y, n_repeats=10,\n ... random_state=0)\n >>> result.importances_mean\n array([0.4666..., 0. , 0. ])\n >>> result.importances_std\n array([0.2211..., 0. , 0. ])\n \"\"\"\n if not hasattr(X, \"iloc\"):\n X = check_array(X, force_all_finite='allow-nan', dtype=None)\n\n # Precompute random seed from the random state to be used\n # to get a fresh independent RandomState instance for each\n # parallel call to _calculate_permutation_scores, irrespective of\n # the fact that variables are shared or not depending on the active\n # joblib backend (sequential, thread-based or process-based).\n random_state = check_random_state(random_state)\n random_seed = random_state.randint(np.iinfo(np.int32).max + 1)\n\n scorer = check_scoring(estimator, scoring=scoring)\n baseline_score = _weights_scorer(scorer, estimator, X, y, sample_weight)\n\n scores = Parallel(n_jobs=n_jobs)(delayed(_calculate_permutation_scores)(\n estimator, X, y, sample_weight, col_idx, random_seed, n_repeats, scorer\n ) for col_idx in range(X.shape[1]))\n\n importances = baseline_score - np.array(scores)\n return Bunch(importances_mean=np.mean(importances, axis=1),\n importances_std=np.std(importances, axis=1),\n importances=importances)" + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/__init__", + "name": "__init__", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/__init__/self", + "name": "self", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/__init__/pd_results", + "name": "pd_results", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.__init__.pd_results", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list of Bunch", + "default_value": "", + "description": "Results of :func:`~sklearn.inspection.partial_dependence` for\n``features``." + }, + "type": { + "kind": "NamedType", + "name": "list of Bunch" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/__init__/features", + "name": "features", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.__init__.features", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "list of (int,) or list of (int, int)", + "default_value": "", + "description": "Indices of features for a given plot. A tuple of one integer will plot\na partial dependence curve of one feature. A tuple of two integers will\nplot a two-way partial dependence curve as a contour plot." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "list of (int,)" + }, + { + "kind": "NamedType", + "name": "list of (int, int)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/__init__/feature_names", + "name": "feature_names", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.__init__.feature_names", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "list of str", + "default_value": "", + "description": "Feature names corresponding to the indices in ``features``." + }, + "type": { + "kind": "NamedType", + "name": "list of str" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/__init__/target_idx", + "name": "target_idx", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.__init__.target_idx", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "- In a multiclass setting, specifies the class for which the PDPs\n should be computed. Note that for binary classification, the\n positive class (index 1) is always used.\n- In a multioutput setting, specifies the task for which the PDPs\n should be computed.\n\nIgnored in binary classification or classical regression settings." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/__init__/pdp_lim", + "name": "pdp_lim", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.__init__.pdp_lim", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "", + "description": "Global min and max average predictions, such that all plots will have\nthe same scale and y limits. `pdp_lim[1]` is the global min and max for\nsingle partial dependence curves. `pdp_lim[2]` is the global min and\nmax for two-way partial dependence curves." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/__init__/deciles", + "name": "deciles", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.__init__.deciles", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "", + "description": "Deciles for feature indices in ``features``." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/__init__/kind", + "name": "kind", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.__init__.kind", + "default_value": "'average'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'average', 'individual', 'both'}", + "default_value": "'average'", + "description": " Whether to plot the partial dependence averaged across all the samples\n in the dataset or one line per sample or both.\n\n - ``kind='average'`` results in the traditional PD plot;\n - ``kind='individual'`` results in the ICE plot.\n\nNote that the fast ``method='recursion'`` option is only available for\n``kind='average'``. Plotting individual dependencies requires using the\nslower ``method='brute'`` option.\n\n .. versionadded:: 0.24" + }, + "type": { + "kind": "EnumType", + "values": ["both", "average", "individual"] + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/__init__/subsample", + "name": "subsample", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.__init__.subsample", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float, int or None", + "default_value": "1000", + "description": "Sampling for ICE curves when `kind` is 'individual' or 'both'.\nIf float, should be between 0.0 and 1.0 and represent the proportion\nof the dataset to be used to plot ICE curves. If int, represents the\nmaximum absolute number of samples to use.\n\nNote that the full dataset is still used to calculate partial\ndependence when `kind='both'`.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/__init__/random_state", + "name": "random_state", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the randomness of the selected samples when subsamples is not\n`None`. See :term:`Glossary ` for details.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Partial Dependence Plot (PDP).\n\nThis can also display individual partial dependencies which are often\nreferred to as: Individual Condition Expectation (ICE).\n\nIt is recommended to use\n:func:`~sklearn.inspection.plot_partial_dependence` to create a\n:class:`~sklearn.inspection.PartialDependenceDisplay`. All parameters are\nstored as attributes.\n\nRead more in\n:ref:`sphx_glr_auto_examples_miscellaneous_plot_partial_dependence_visualization_api.py`\nand the :ref:`User Guide `.\n\n .. versionadded:: 0.22", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(\n self,\n pd_results,\n *,\n features,\n feature_names,\n target_idx,\n pdp_lim,\n deciles,\n kind=\"average\",\n subsample=1000,\n random_state=None,\n ):\n self.pd_results = pd_results\n self.features = features\n self.feature_names = feature_names\n self.target_idx = target_idx\n self.pdp_lim = pdp_lim\n self.deciles = deciles\n self.kind = kind\n self.subsample = subsample\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_get_sample_count", + "name": "_get_sample_count", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._get_sample_count", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_get_sample_count/self", + "name": "self", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._get_sample_count.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_get_sample_count/n_samples", + "name": "n_samples", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._get_sample_count.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the number of samples as an integer.", + "docstring": "Compute the number of samples as an integer.", + "code": " def _get_sample_count(self, n_samples):\n \"\"\"Compute the number of samples as an integer.\"\"\"\n if isinstance(self.subsample, numbers.Integral):\n if self.subsample < n_samples:\n return self.subsample\n return n_samples\n elif isinstance(self.subsample, numbers.Real):\n return ceil(n_samples * self.subsample)\n return n_samples" + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_average_dependence", + "name": "_plot_average_dependence", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_average_dependence", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_average_dependence/self", + "name": "self", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_average_dependence.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_average_dependence/avg_preds", + "name": "avg_preds", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_average_dependence.avg_preds", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_grid_points,)", + "default_value": "", + "description": "The average predictions for all points of `feature_values` for a\ngiven feature for all samples in `X`." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_grid_points,)" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_average_dependence/feature_values", + "name": "feature_values", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_average_dependence.feature_values", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_grid_points,)", + "default_value": "", + "description": "The feature values for which the predictions have been computed." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_grid_points,)" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_average_dependence/ax", + "name": "ax", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_average_dependence.ax", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Matplotlib axes", + "default_value": "", + "description": "The axis on which to plot the ICE lines." + }, + "type": { + "kind": "NamedType", + "name": "Matplotlib axes" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_average_dependence/pd_line_idx", + "name": "pd_line_idx", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_average_dependence.pd_line_idx", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The sequential index of the plot. It will be unraveled to find the\nmatching 2D position in the grid layout." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_average_dependence/line_kw", + "name": "line_kw", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_average_dependence.line_kw", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "", + "description": "Dict with keywords passed when plotting the PD plot." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Plot the average partial dependence.", + "docstring": "Plot the average partial dependence.\n\nParameters\n----------\navg_preds : ndarray of shape (n_grid_points,)\n The average predictions for all points of `feature_values` for a\n given feature for all samples in `X`.\nfeature_values : ndarray of shape (n_grid_points,)\n The feature values for which the predictions have been computed.\nax : Matplotlib axes\n The axis on which to plot the ICE lines.\npd_line_idx : int\n The sequential index of the plot. It will be unraveled to find the\n matching 2D position in the grid layout.\nline_kw : dict\n Dict with keywords passed when plotting the PD plot.", + "code": " def _plot_average_dependence(\n self,\n avg_preds,\n feature_values,\n ax,\n pd_line_idx,\n line_kw,\n ):\n \"\"\"Plot the average partial dependence.\n\n Parameters\n ----------\n avg_preds : ndarray of shape (n_grid_points,)\n The average predictions for all points of `feature_values` for a\n given feature for all samples in `X`.\n feature_values : ndarray of shape (n_grid_points,)\n The feature values for which the predictions have been computed.\n ax : Matplotlib axes\n The axis on which to plot the ICE lines.\n pd_line_idx : int\n The sequential index of the plot. It will be unraveled to find the\n matching 2D position in the grid layout.\n line_kw : dict\n Dict with keywords passed when plotting the PD plot.\n \"\"\"\n line_idx = np.unravel_index(pd_line_idx, self.lines_.shape)\n self.lines_[line_idx] = ax.plot(\n feature_values,\n avg_preds,\n **line_kw,\n )[0]" + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_ice_lines", + "name": "_plot_ice_lines", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_ice_lines", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_ice_lines/self", + "name": "self", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_ice_lines.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_ice_lines/preds", + "name": "preds", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_ice_lines.preds", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_instances, n_grid_points)", + "default_value": "", + "description": "The predictions computed for all points of `feature_values` for a\ngiven feature for all samples in `X`." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_instances, n_grid_points)" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_ice_lines/feature_values", + "name": "feature_values", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_ice_lines.feature_values", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_grid_points,)", + "default_value": "", + "description": "The feature values for which the predictions have been computed." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_grid_points,)" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_ice_lines/n_ice_to_plot", + "name": "n_ice_to_plot", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_ice_lines.n_ice_to_plot", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The number of ICE lines to plot." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_ice_lines/ax", + "name": "ax", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_ice_lines.ax", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Matplotlib axes", + "default_value": "", + "description": "The axis on which to plot the ICE lines." + }, + "type": { + "kind": "NamedType", + "name": "Matplotlib axes" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_ice_lines/pd_plot_idx", + "name": "pd_plot_idx", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_ice_lines.pd_plot_idx", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The sequential index of the plot. It will be unraveled to find the\nmatching 2D position in the grid layout." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_ice_lines/n_total_lines_by_plot", + "name": "n_total_lines_by_plot", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_ice_lines.n_total_lines_by_plot", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The total number of lines expected to be plot on the axis." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_ice_lines/individual_line_kw", + "name": "individual_line_kw", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_ice_lines.individual_line_kw", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "", + "description": "Dict with keywords passed when plotting the ICE lines." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Plot the ICE lines.", + "docstring": "Plot the ICE lines.\n\nParameters\n----------\npreds : ndarray of shape (n_instances, n_grid_points)\n The predictions computed for all points of `feature_values` for a\n given feature for all samples in `X`.\nfeature_values : ndarray of shape (n_grid_points,)\n The feature values for which the predictions have been computed.\nn_ice_to_plot : int\n The number of ICE lines to plot.\nax : Matplotlib axes\n The axis on which to plot the ICE lines.\npd_plot_idx : int\n The sequential index of the plot. It will be unraveled to find the\n matching 2D position in the grid layout.\nn_total_lines_by_plot : int\n The total number of lines expected to be plot on the axis.\nindividual_line_kw : dict\n Dict with keywords passed when plotting the ICE lines.", + "code": " def _plot_ice_lines(\n self, preds, feature_values, n_ice_to_plot,\n ax, pd_plot_idx, n_total_lines_by_plot, individual_line_kw\n ):\n \"\"\"Plot the ICE lines.\n\n Parameters\n ----------\n preds : ndarray of shape \\\n (n_instances, n_grid_points)\n The predictions computed for all points of `feature_values` for a\n given feature for all samples in `X`.\n feature_values : ndarray of shape (n_grid_points,)\n The feature values for which the predictions have been computed.\n n_ice_to_plot : int\n The number of ICE lines to plot.\n ax : Matplotlib axes\n The axis on which to plot the ICE lines.\n pd_plot_idx : int\n The sequential index of the plot. It will be unraveled to find the\n matching 2D position in the grid layout.\n n_total_lines_by_plot : int\n The total number of lines expected to be plot on the axis.\n individual_line_kw : dict\n Dict with keywords passed when plotting the ICE lines.\n \"\"\"\n rng = check_random_state(self.random_state)\n # subsample ice\n ice_lines_idx = rng.choice(\n preds.shape[0], n_ice_to_plot, replace=False,\n )\n ice_lines_subsampled = preds[ice_lines_idx, :]\n # plot the subsampled ice\n for ice_idx, ice in enumerate(ice_lines_subsampled):\n line_idx = np.unravel_index(\n pd_plot_idx * n_total_lines_by_plot + ice_idx,\n self.lines_.shape\n )\n self.lines_[line_idx] = ax.plot(\n feature_values, ice.ravel(), **individual_line_kw\n )[0]" + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_one_way_partial_dependence", + "name": "_plot_one_way_partial_dependence", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_one_way_partial_dependence", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_one_way_partial_dependence/self", + "name": "self", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_one_way_partial_dependence.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_one_way_partial_dependence/preds", + "name": "preds", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_one_way_partial_dependence.preds", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_instances, n_grid_points) or None", + "default_value": "", + "description": "The predictions computed for all points of `feature_values` for a\ngiven feature for all samples in `X`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray of shape (n_instances, n_grid_points)" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_one_way_partial_dependence/avg_preds", + "name": "avg_preds", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_one_way_partial_dependence.avg_preds", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_grid_points,)", + "default_value": "", + "description": "The average predictions for all points of `feature_values` for a\ngiven feature for all samples in `X`." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_grid_points,)" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_one_way_partial_dependence/feature_values", + "name": "feature_values", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_one_way_partial_dependence.feature_values", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_grid_points,)", + "default_value": "", + "description": "The feature values for which the predictions have been computed." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_grid_points,)" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_one_way_partial_dependence/feature_idx", + "name": "feature_idx", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_one_way_partial_dependence.feature_idx", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The index corresponding to the target feature." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_one_way_partial_dependence/n_ice_lines", + "name": "n_ice_lines", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_one_way_partial_dependence.n_ice_lines", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The number of ICE lines to plot." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_one_way_partial_dependence/ax", + "name": "ax", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_one_way_partial_dependence.ax", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Matplotlib axes", + "default_value": "", + "description": "The axis on which to plot the ICE and PDP lines." + }, + "type": { + "kind": "NamedType", + "name": "Matplotlib axes" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_one_way_partial_dependence/n_cols", + "name": "n_cols", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_one_way_partial_dependence.n_cols", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or None", + "default_value": "", + "description": "The number of column in the axis." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_one_way_partial_dependence/pd_plot_idx", + "name": "pd_plot_idx", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_one_way_partial_dependence.pd_plot_idx", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The sequential index of the plot. It will be unraveled to find the\nmatching 2D position in the grid layout." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_one_way_partial_dependence/n_lines", + "name": "n_lines", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_one_way_partial_dependence.n_lines", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The total number of lines expected to be plot on the axis." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_one_way_partial_dependence/individual_line_kw", + "name": "individual_line_kw", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_one_way_partial_dependence.individual_line_kw", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "", + "description": "Dict with keywords passed when plotting the ICE lines." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_one_way_partial_dependence/line_kw", + "name": "line_kw", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_one_way_partial_dependence.line_kw", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "", + "description": "Dict with keywords passed when plotting the PD plot." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Plot 1-way partial dependence: ICE and PDP.", + "docstring": "Plot 1-way partial dependence: ICE and PDP.\n\nParameters\n----------\npreds : ndarray of shape (n_instances, n_grid_points) or None\n The predictions computed for all points of `feature_values` for a\n given feature for all samples in `X`.\navg_preds : ndarray of shape (n_grid_points,)\n The average predictions for all points of `feature_values` for a\n given feature for all samples in `X`.\nfeature_values : ndarray of shape (n_grid_points,)\n The feature values for which the predictions have been computed.\nfeature_idx : int\n The index corresponding to the target feature.\nn_ice_lines : int\n The number of ICE lines to plot.\nax : Matplotlib axes\n The axis on which to plot the ICE and PDP lines.\nn_cols : int or None\n The number of column in the axis.\npd_plot_idx : int\n The sequential index of the plot. It will be unraveled to find the\n matching 2D position in the grid layout.\nn_lines : int\n The total number of lines expected to be plot on the axis.\nindividual_line_kw : dict\n Dict with keywords passed when plotting the ICE lines.\nline_kw : dict\n Dict with keywords passed when plotting the PD plot.", + "code": " def _plot_one_way_partial_dependence(\n self,\n preds,\n avg_preds,\n feature_values,\n feature_idx,\n n_ice_lines,\n ax,\n n_cols,\n pd_plot_idx,\n n_lines,\n individual_line_kw,\n line_kw,\n ):\n \"\"\"Plot 1-way partial dependence: ICE and PDP.\n\n Parameters\n ----------\n preds : ndarray of shape \\\n (n_instances, n_grid_points) or None\n The predictions computed for all points of `feature_values` for a\n given feature for all samples in `X`.\n avg_preds : ndarray of shape (n_grid_points,)\n The average predictions for all points of `feature_values` for a\n given feature for all samples in `X`.\n feature_values : ndarray of shape (n_grid_points,)\n The feature values for which the predictions have been computed.\n feature_idx : int\n The index corresponding to the target feature.\n n_ice_lines : int\n The number of ICE lines to plot.\n ax : Matplotlib axes\n The axis on which to plot the ICE and PDP lines.\n n_cols : int or None\n The number of column in the axis.\n pd_plot_idx : int\n The sequential index of the plot. It will be unraveled to find the\n matching 2D position in the grid layout.\n n_lines : int\n The total number of lines expected to be plot on the axis.\n individual_line_kw : dict\n Dict with keywords passed when plotting the ICE lines.\n line_kw : dict\n Dict with keywords passed when plotting the PD plot.\n \"\"\"\n from matplotlib import transforms # noqa\n\n if self.kind in (\"individual\", \"both\"):\n self._plot_ice_lines(\n preds[self.target_idx],\n feature_values,\n n_ice_lines,\n ax,\n pd_plot_idx,\n n_lines,\n individual_line_kw,\n )\n\n if self.kind in (\"average\", \"both\"):\n # the average is stored as the last line\n if self.kind == \"average\":\n pd_line_idx = pd_plot_idx\n else:\n pd_line_idx = pd_plot_idx * n_lines + n_ice_lines\n self._plot_average_dependence(\n avg_preds[self.target_idx].ravel(),\n feature_values,\n ax,\n pd_line_idx,\n line_kw,\n )\n\n trans = transforms.blended_transform_factory(\n ax.transData, ax.transAxes\n )\n # create the decile line for the vertical axis\n vlines_idx = np.unravel_index(pd_plot_idx, self.deciles_vlines_.shape)\n self.deciles_vlines_[vlines_idx] = ax.vlines(\n self.deciles[feature_idx[0]],\n 0,\n 0.05,\n transform=trans,\n color=\"k\",\n )\n # reset ylim which was overwritten by vlines\n ax.set_ylim(self.pdp_lim[1])\n\n # Set xlabel if it is not already set\n if not ax.get_xlabel():\n ax.set_xlabel(self.feature_names[feature_idx[0]])\n\n if n_cols is None or pd_plot_idx % n_cols == 0:\n if not ax.get_ylabel():\n ax.set_ylabel('Partial dependence')\n else:\n ax.set_yticklabels([])\n\n if line_kw.get(\"label\", None) and self.kind != 'individual':\n ax.legend()" + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_two_way_partial_dependence", + "name": "_plot_two_way_partial_dependence", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_two_way_partial_dependence", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_two_way_partial_dependence/self", + "name": "self", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_two_way_partial_dependence.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_two_way_partial_dependence/avg_preds", + "name": "avg_preds", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_two_way_partial_dependence.avg_preds", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_instances, n_grid_points, n_grid_points)", + "default_value": "", + "description": "The average predictions for all points of `feature_values[0]` and\n`feature_values[1]` for some given features for all samples in `X`." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_instances, n_grid_points, n_grid_points)" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_two_way_partial_dependence/feature_values", + "name": "feature_values", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_two_way_partial_dependence.feature_values", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "seq of 1d array", + "default_value": "", + "description": "A sequence of array of the feature values for which the predictions\nhave been computed." + }, + "type": { + "kind": "NamedType", + "name": "seq of 1d array" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_two_way_partial_dependence/feature_idx", + "name": "feature_idx", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_two_way_partial_dependence.feature_idx", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "tuple of int", + "default_value": "", + "description": "The indices of the target features" + }, + "type": { + "kind": "NamedType", + "name": "tuple of int" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_two_way_partial_dependence/ax", + "name": "ax", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_two_way_partial_dependence.ax", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Matplotlib axes", + "default_value": "", + "description": "The axis on which to plot the ICE and PDP lines." + }, + "type": { + "kind": "NamedType", + "name": "Matplotlib axes" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_two_way_partial_dependence/pd_plot_idx", + "name": "pd_plot_idx", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_two_way_partial_dependence.pd_plot_idx", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The sequential index of the plot. It will be unraveled to find the\nmatching 2D position in the grid layout." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_two_way_partial_dependence/Z_level", + "name": "Z_level", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_two_way_partial_dependence.Z_level", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (8, 8)", + "default_value": "", + "description": "The Z-level used to encode the average predictions." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (8, 8)" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/_plot_two_way_partial_dependence/contour_kw", + "name": "contour_kw", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay._plot_two_way_partial_dependence.contour_kw", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "", + "description": "Dict with keywords passed when plotting the contours." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Plot 2-way partial dependence.", + "docstring": "Plot 2-way partial dependence.\n\nParameters\n----------\navg_preds : ndarray of shape (n_instances, n_grid_points, n_grid_points)\n The average predictions for all points of `feature_values[0]` and\n `feature_values[1]` for some given features for all samples in `X`.\nfeature_values : seq of 1d array\n A sequence of array of the feature values for which the predictions\n have been computed.\nfeature_idx : tuple of int\n The indices of the target features\nax : Matplotlib axes\n The axis on which to plot the ICE and PDP lines.\npd_plot_idx : int\n The sequential index of the plot. It will be unraveled to find the\n matching 2D position in the grid layout.\nZ_level : ndarray of shape (8, 8)\n The Z-level used to encode the average predictions.\ncontour_kw : dict\n Dict with keywords passed when plotting the contours.", + "code": " def _plot_two_way_partial_dependence(\n self,\n avg_preds,\n feature_values,\n feature_idx,\n ax,\n pd_plot_idx,\n Z_level,\n contour_kw,\n ):\n \"\"\"Plot 2-way partial dependence.\n\n Parameters\n ----------\n avg_preds : ndarray of shape \\\n (n_instances, n_grid_points, n_grid_points)\n The average predictions for all points of `feature_values[0]` and\n `feature_values[1]` for some given features for all samples in `X`.\n feature_values : seq of 1d array\n A sequence of array of the feature values for which the predictions\n have been computed.\n feature_idx : tuple of int\n The indices of the target features\n ax : Matplotlib axes\n The axis on which to plot the ICE and PDP lines.\n pd_plot_idx : int\n The sequential index of the plot. It will be unraveled to find the\n matching 2D position in the grid layout.\n Z_level : ndarray of shape (8, 8)\n The Z-level used to encode the average predictions.\n contour_kw : dict\n Dict with keywords passed when plotting the contours.\n \"\"\"\n from matplotlib import transforms # noqa\n\n XX, YY = np.meshgrid(feature_values[0], feature_values[1])\n Z = avg_preds[self.target_idx].T\n CS = ax.contour(XX, YY, Z, levels=Z_level, linewidths=0.5, colors=\"k\")\n contour_idx = np.unravel_index(pd_plot_idx, self.contours_.shape)\n self.contours_[contour_idx] = ax.contourf(\n XX,\n YY,\n Z,\n levels=Z_level,\n vmax=Z_level[-1],\n vmin=Z_level[0],\n **contour_kw,\n )\n ax.clabel(CS, fmt=\"%2.2f\", colors=\"k\", fontsize=10, inline=True)\n\n trans = transforms.blended_transform_factory(\n ax.transData, ax.transAxes\n )\n # create the decile line for the vertical axis\n xlim, ylim = ax.get_xlim(), ax.get_ylim()\n vlines_idx = np.unravel_index(pd_plot_idx, self.deciles_vlines_.shape)\n self.deciles_vlines_[vlines_idx] = ax.vlines(\n self.deciles[feature_idx[0]], 0, 0.05, transform=trans, color=\"k\",\n )\n # create the decile line for the horizontal axis\n hlines_idx = np.unravel_index(pd_plot_idx, self.deciles_hlines_.shape)\n self.deciles_hlines_[hlines_idx] = ax.hlines(\n self.deciles[feature_idx[1]], 0, 0.05, transform=trans, color=\"k\",\n )\n # reset xlim and ylim since they are overwritten by hlines and vlines\n ax.set_xlim(xlim)\n ax.set_ylim(ylim)\n\n # set xlabel if it is not already set\n if not ax.get_xlabel():\n ax.set_xlabel(self.feature_names[feature_idx[0]])\n ax.set_ylabel(self.feature_names[feature_idx[1]])" + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/plot", + "name": "plot", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.plot", + "decorators": ["_deprecate_positional_args(version='1.1')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/plot/self", + "name": "self", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.plot.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/plot/ax", + "name": "ax", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.plot.ax", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "Matplotlib axes or array-like of Matplotlib axes", + "default_value": "None", + "description": "- If a single axis is passed in, it is treated as a bounding axes\n and a grid of partial dependence plots will be drawn within\n these bounds. The `n_cols` parameter controls the number of\n columns in the grid.\n- If an array-like of axes are passed in, the partial dependence\n plots will be drawn directly into these axes.\n- If `None`, a figure and a bounding axes is created and treated\n as the single axes case." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "Matplotlib axes" + }, + { + "kind": "NamedType", + "name": "array-like of Matplotlib axes" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/plot/n_cols", + "name": "n_cols", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.plot.n_cols", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "3", + "description": "The maximum number of columns in the grid plot. Only active when\n`ax` is a single axes or `None`." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/plot/line_kw", + "name": "line_kw", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.plot.line_kw", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Dict with keywords passed to the `matplotlib.pyplot.plot` call.\nFor one-way partial dependence plots." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/PartialDependenceDisplay/plot/contour_kw", + "name": "contour_kw", + "qname": "sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay.plot.contour_kw", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Dict with keywords passed to the `matplotlib.pyplot.contourf`\ncall for two-way partial dependence plots." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Plot partial dependence plots.", + "docstring": "Plot partial dependence plots.\n\nParameters\n----------\nax : Matplotlib axes or array-like of Matplotlib axes, default=None\n - If a single axis is passed in, it is treated as a bounding axes\n and a grid of partial dependence plots will be drawn within\n these bounds. The `n_cols` parameter controls the number of\n columns in the grid.\n - If an array-like of axes are passed in, the partial dependence\n plots will be drawn directly into these axes.\n - If `None`, a figure and a bounding axes is created and treated\n as the single axes case.\n\nn_cols : int, default=3\n The maximum number of columns in the grid plot. Only active when\n `ax` is a single axes or `None`.\n\nline_kw : dict, default=None\n Dict with keywords passed to the `matplotlib.pyplot.plot` call.\n For one-way partial dependence plots.\n\ncontour_kw : dict, default=None\n Dict with keywords passed to the `matplotlib.pyplot.contourf`\n call for two-way partial dependence plots.\n\nReturns\n-------\ndisplay : :class:`~sklearn.inspection.PartialDependenceDisplay`", + "code": " @_deprecate_positional_args(version=\"1.1\")\n def plot(self, *, ax=None, n_cols=3, line_kw=None, contour_kw=None):\n \"\"\"Plot partial dependence plots.\n\n Parameters\n ----------\n ax : Matplotlib axes or array-like of Matplotlib axes, default=None\n - If a single axis is passed in, it is treated as a bounding axes\n and a grid of partial dependence plots will be drawn within\n these bounds. The `n_cols` parameter controls the number of\n columns in the grid.\n - If an array-like of axes are passed in, the partial dependence\n plots will be drawn directly into these axes.\n - If `None`, a figure and a bounding axes is created and treated\n as the single axes case.\n\n n_cols : int, default=3\n The maximum number of columns in the grid plot. Only active when\n `ax` is a single axes or `None`.\n\n line_kw : dict, default=None\n Dict with keywords passed to the `matplotlib.pyplot.plot` call.\n For one-way partial dependence plots.\n\n contour_kw : dict, default=None\n Dict with keywords passed to the `matplotlib.pyplot.contourf`\n call for two-way partial dependence plots.\n\n Returns\n -------\n display : :class:`~sklearn.inspection.PartialDependenceDisplay`\n \"\"\"\n\n check_matplotlib_support(\"plot_partial_dependence\")\n import matplotlib.pyplot as plt # noqa\n from matplotlib.gridspec import GridSpecFromSubplotSpec # noqa\n\n if line_kw is None:\n line_kw = {}\n if contour_kw is None:\n contour_kw = {}\n\n if ax is None:\n _, ax = plt.subplots()\n\n default_contour_kws = {\"alpha\": 0.75}\n contour_kw = {**default_contour_kws, **contour_kw}\n\n default_line_kws = {\n \"color\": \"C0\",\n \"label\": \"average\" if self.kind == \"both\" else None,\n }\n line_kw = {**default_line_kws, **line_kw}\n\n individual_line_kw = line_kw.copy()\n del individual_line_kw[\"label\"]\n\n if self.kind == 'individual' or self.kind == 'both':\n individual_line_kw['alpha'] = 0.3\n individual_line_kw['linewidth'] = 0.5\n\n n_features = len(self.features)\n if self.kind in (\"individual\", \"both\"):\n n_ice_lines = self._get_sample_count(\n len(self.pd_results[0].individual[0])\n )\n if self.kind == \"individual\":\n n_lines = n_ice_lines\n else:\n n_lines = n_ice_lines + 1\n else:\n n_ice_lines = 0\n n_lines = 1\n\n if isinstance(ax, plt.Axes):\n # If ax was set off, it has most likely been set to off\n # by a previous call to plot.\n if not ax.axison:\n raise ValueError(\"The ax was already used in another plot \"\n \"function, please set ax=display.axes_ \"\n \"instead\")\n\n ax.set_axis_off()\n self.bounding_ax_ = ax\n self.figure_ = ax.figure\n\n n_cols = min(n_cols, n_features)\n n_rows = int(np.ceil(n_features / float(n_cols)))\n\n self.axes_ = np.empty((n_rows, n_cols), dtype=object)\n if self.kind == 'average':\n self.lines_ = np.empty((n_rows, n_cols), dtype=object)\n else:\n self.lines_ = np.empty((n_rows, n_cols, n_lines), dtype=object)\n self.contours_ = np.empty((n_rows, n_cols), dtype=object)\n\n axes_ravel = self.axes_.ravel()\n\n gs = GridSpecFromSubplotSpec(n_rows, n_cols,\n subplot_spec=ax.get_subplotspec())\n for i, spec in zip(range(n_features), gs):\n axes_ravel[i] = self.figure_.add_subplot(spec)\n\n else: # array-like\n ax = np.asarray(ax, dtype=object)\n if ax.size != n_features:\n raise ValueError(\"Expected ax to have {} axes, got {}\"\n .format(n_features, ax.size))\n\n if ax.ndim == 2:\n n_cols = ax.shape[1]\n else:\n n_cols = None\n\n self.bounding_ax_ = None\n self.figure_ = ax.ravel()[0].figure\n self.axes_ = ax\n if self.kind == 'average':\n self.lines_ = np.empty_like(ax, dtype=object)\n else:\n self.lines_ = np.empty(ax.shape + (n_lines,), dtype=object)\n self.contours_ = np.empty_like(ax, dtype=object)\n\n # create contour levels for two-way plots\n if 2 in self.pdp_lim:\n Z_level = np.linspace(*self.pdp_lim[2], num=8)\n\n self.deciles_vlines_ = np.empty_like(self.axes_, dtype=object)\n self.deciles_hlines_ = np.empty_like(self.axes_, dtype=object)\n\n for pd_plot_idx, (axi, feature_idx, pd_result) in enumerate(\n zip(self.axes_.ravel(), self.features, self.pd_results)\n ):\n avg_preds = None\n preds = None\n feature_values = pd_result[\"values\"]\n if self.kind == 'individual':\n preds = pd_result.individual\n elif self.kind == 'average':\n avg_preds = pd_result.average\n else: # kind='both'\n avg_preds = pd_result.average\n preds = pd_result.individual\n\n if len(feature_values) == 1:\n self._plot_one_way_partial_dependence(\n preds,\n avg_preds,\n feature_values[0],\n feature_idx,\n n_ice_lines,\n axi,\n n_cols,\n pd_plot_idx,\n n_lines,\n individual_line_kw,\n line_kw,\n )\n else:\n self._plot_two_way_partial_dependence(\n avg_preds,\n feature_values,\n feature_idx,\n axi,\n pd_plot_idx,\n Z_level,\n contour_kw,\n )\n\n return self" + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence", + "name": "plot_partial_dependence", + "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/estimator", + "name": "estimator", + "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "BaseEstimator", + "default_value": "", + "description": "A fitted estimator object implementing :term:`predict`,\n:term:`predict_proba`, or :term:`decision_function`.\nMultioutput-multiclass classifiers are not supported." + }, + "type": { + "kind": "NamedType", + "name": "BaseEstimator" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/X", + "name": "X", + "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like or dataframe} of shape (n_samples, n_features)", + "default_value": "", + "description": "``X`` is used to generate a grid of values for the target\n``features`` (where the partial dependence will be evaluated), and\nalso to generate values for the complement features when the\n`method` is `'brute'`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/features", + "name": "features", + "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list of {int, str, pair of int, pair of str}", + "default_value": "", + "description": "The target features for which to create the PDPs.\nIf `features[i]` is an integer or a string, a one-way PDP is created;\nif `features[i]` is a tuple, a two-way PDP is created (only supported\nwith `kind='average'`). Each tuple must be of size 2.\nif any entry is a string, then it must be in ``feature_names``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "list of" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/feature_names", + "name": "feature_names", + "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.feature_names", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_features,), dtype=str", + "default_value": "None", + "description": "Name of each feature; `feature_names[i]` holds the name of the feature\nwith index `i`.\nBy default, the name of the feature corresponds to their numerical\nindex for NumPy array and their column name for pandas dataframe." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_features,)" + }, + { + "kind": "NamedType", + "name": "dtype=str" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/target", + "name": "target", + "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.target", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "- In a multiclass setting, specifies the class for which the PDPs\n should be computed. Note that for binary classification, the\n positive class (index 1) is always used.\n- In a multioutput setting, specifies the task for which the PDPs\n should be computed.\n\nIgnored in binary classification or classical regression settings." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/response_method", + "name": "response_method", + "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.response_method", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'predict_proba', 'decision_function'}", + "default_value": "'auto'", + "description": "Specifies whether to use :term:`predict_proba` or\n:term:`decision_function` as the target response. For regressors\nthis parameter is ignored and the response is always the output of\n:term:`predict`. By default, :term:`predict_proba` is tried first\nand we revert to :term:`decision_function` if it doesn't exist. If\n``method`` is `'recursion'`, the response is always the output of\n:term:`decision_function`." + }, + "type": { + "kind": "EnumType", + "values": ["predict_proba", "auto", "decision_function"] + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/n_cols", + "name": "n_cols", + "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.n_cols", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "3", + "description": "The maximum number of columns in the grid plot. Only active when `ax`\nis a single axis or `None`." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/grid_resolution", + "name": "grid_resolution", + "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.grid_resolution", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The number of equally spaced points on the axes of the plots, for each\ntarget feature." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/percentiles", + "name": "percentiles", + "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.percentiles", + "default_value": "(0.05, 0.95)", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "tuple of float", + "default_value": "(0.05, 0.95)", + "description": "The lower and upper percentile used to create the extreme values\nfor the PDP axes. Must be in [0, 1]." + }, + "type": { + "kind": "NamedType", + "name": "tuple of float" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/method", + "name": "method", + "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.method", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "'auto'", + "description": "The method used to calculate the averaged predictions:\n\n- `'recursion'` is only supported for some tree-based estimators\n (namely\n :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n :class:`~sklearn.tree.DecisionTreeRegressor`,\n :class:`~sklearn.ensemble.RandomForestRegressor`\n but is more efficient in terms of speed.\n With this method, the target response of a\n classifier is always the decision function, not the predicted\n probabilities. Since the `'recursion'` method implicitely computes\n the average of the ICEs by design, it is not compatible with ICE and\n thus `kind` must be `'average'`.\n\n- `'brute'` is supported for any estimator, but is more\n computationally intensive.\n\n- `'auto'`: the `'recursion'` is used for estimators that support it,\n and `'brute'` is used otherwise.\n\nPlease see :ref:`this note ` for\ndifferences between the `'brute'` and `'recursion'` method." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/n_jobs", + "name": "n_jobs", + "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of CPUs to use to compute the partial dependences.\nComputation is parallelized over features specified by the `features`\nparameter.\n\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/verbose", + "name": "verbose", + "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Verbose output during PD computations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/line_kw", + "name": "line_kw", + "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.line_kw", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.\nFor one-way partial dependence plots." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/contour_kw", + "name": "contour_kw", + "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.contour_kw", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call.\nFor two-way partial dependence plots." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/ax", + "name": "ax", + "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.ax", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "Matplotlib axes or array-like of Matplotlib axes", + "default_value": "None", + "description": "- If a single axis is passed in, it is treated as a bounding axes\n and a grid of partial dependence plots will be drawn within\n these bounds. The `n_cols` parameter controls the number of\n columns in the grid.\n- If an array-like of axes are passed in, the partial dependence\n plots will be drawn directly into these axes.\n- If `None`, a figure and a bounding axes is created and treated\n as the single axes case.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "Matplotlib axes" + }, + { + "kind": "NamedType", + "name": "array-like of Matplotlib axes" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/kind", + "name": "kind", + "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.kind", + "default_value": "'average'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'average', 'individual', 'both'}", + "default_value": "'average'", + "description": " Whether to plot the partial dependence averaged across all the samples\n in the dataset or one line per sample or both.\n\n - ``kind='average'`` results in the traditional PD plot;\n - ``kind='individual'`` results in the ICE plot.\n\nNote that the fast ``method='recursion'`` option is only available for\n``kind='average'``. Plotting individual dependencies requires using the\nslower ``method='brute'`` option.\n\n .. versionadded:: 0.24" + }, + "type": { + "kind": "EnumType", + "values": ["both", "average", "individual"] + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/subsample", + "name": "subsample", + "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.subsample", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float, int or None", + "default_value": "1000", + "description": "Sampling for ICE curves when `kind` is 'individual' or 'both'.\nIf `float`, should be between 0.0 and 1.0 and represent the proportion\nof the dataset to be used to plot ICE curves. If `int`, represents the\nabsolute number samples to use.\n\nNote that the full dataset is still used to calculate averaged partial\ndependence when `kind='both'`.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.inspection._plot.partial_dependence/plot_partial_dependence/random_state", + "name": "random_state", + "qname": "sklearn.inspection._plot.partial_dependence.plot_partial_dependence.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the randomness of the selected samples when subsamples is not\n`None` and `kind` is either `'both'` or `'individual'`.\nSee :term:`Glossary ` for details.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Partial dependence (PD) and individual conditional expectation (ICE)\nplots.\n\nPartial dependence plots, individual conditional expectation plots or an\noverlay of both of them can be plotted by setting the ``kind``\nparameter.\nThe ``len(features)`` plots are arranged in a grid with ``n_cols``\ncolumns. Two-way partial dependence plots are plotted as contour plots. The\ndeciles of the feature values will be shown with tick marks on the x-axes\nfor one-way plots, and on both axes for two-way plots.\n\nRead more in the :ref:`User Guide `.\n\n.. note::\n\n :func:`plot_partial_dependence` does not support using the same axes\n with multiple calls. To plot the the partial dependence for multiple\n estimators, please pass the axes created by the first call to the\n second call::\n\n >>> from sklearn.inspection import plot_partial_dependence\n >>> from sklearn.datasets import make_friedman1\n >>> from sklearn.linear_model import LinearRegression\n >>> from sklearn.ensemble import RandomForestRegressor\n >>> X, y = make_friedman1()\n >>> est1 = LinearRegression().fit(X, y)\n >>> est2 = RandomForestRegressor().fit(X, y)\n >>> disp1 = plot_partial_dependence(est1, X,\n ... [1, 2]) # doctest: +SKIP\n >>> disp2 = plot_partial_dependence(est2, X, [1, 2],\n ... ax=disp1.axes_) # doctest: +SKIP\n\n.. warning::\n\n For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n `'recursion'` method (used by default) will not account for the `init`\n predictor of the boosting process. In practice, this will produce\n the same values as `'brute'` up to a constant offset in the target\n response, provided that `init` is a constant estimator (which is the\n default). However, if `init` is not a constant estimator, the\n partial dependence values are incorrect for `'recursion'` because the\n offset will be sample-dependent. It is preferable to use the `'brute'`\n method. Note that this only applies to\n :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.", + "docstring": "Partial dependence (PD) and individual conditional expectation (ICE)\nplots.\n\nPartial dependence plots, individual conditional expectation plots or an\noverlay of both of them can be plotted by setting the ``kind``\nparameter.\nThe ``len(features)`` plots are arranged in a grid with ``n_cols``\ncolumns. Two-way partial dependence plots are plotted as contour plots. The\ndeciles of the feature values will be shown with tick marks on the x-axes\nfor one-way plots, and on both axes for two-way plots.\n\nRead more in the :ref:`User Guide `.\n\n.. note::\n\n :func:`plot_partial_dependence` does not support using the same axes\n with multiple calls. To plot the the partial dependence for multiple\n estimators, please pass the axes created by the first call to the\n second call::\n\n >>> from sklearn.inspection import plot_partial_dependence\n >>> from sklearn.datasets import make_friedman1\n >>> from sklearn.linear_model import LinearRegression\n >>> from sklearn.ensemble import RandomForestRegressor\n >>> X, y = make_friedman1()\n >>> est1 = LinearRegression().fit(X, y)\n >>> est2 = RandomForestRegressor().fit(X, y)\n >>> disp1 = plot_partial_dependence(est1, X,\n ... [1, 2]) # doctest: +SKIP\n >>> disp2 = plot_partial_dependence(est2, X, [1, 2],\n ... ax=disp1.axes_) # doctest: +SKIP\n\n.. warning::\n\n For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n `'recursion'` method (used by default) will not account for the `init`\n predictor of the boosting process. In practice, this will produce\n the same values as `'brute'` up to a constant offset in the target\n response, provided that `init` is a constant estimator (which is the\n default). However, if `init` is not a constant estimator, the\n partial dependence values are incorrect for `'recursion'` because the\n offset will be sample-dependent. It is preferable to use the `'brute'`\n method. Note that this only applies to\n :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\nParameters\n----------\nestimator : BaseEstimator\n A fitted estimator object implementing :term:`predict`,\n :term:`predict_proba`, or :term:`decision_function`.\n Multioutput-multiclass classifiers are not supported.\n\nX : {array-like or dataframe} of shape (n_samples, n_features)\n ``X`` is used to generate a grid of values for the target\n ``features`` (where the partial dependence will be evaluated), and\n also to generate values for the complement features when the\n `method` is `'brute'`.\n\nfeatures : list of {int, str, pair of int, pair of str}\n The target features for which to create the PDPs.\n If `features[i]` is an integer or a string, a one-way PDP is created;\n if `features[i]` is a tuple, a two-way PDP is created (only supported\n with `kind='average'`). Each tuple must be of size 2.\n if any entry is a string, then it must be in ``feature_names``.\n\nfeature_names : array-like of shape (n_features,), dtype=str, default=None\n Name of each feature; `feature_names[i]` holds the name of the feature\n with index `i`.\n By default, the name of the feature corresponds to their numerical\n index for NumPy array and their column name for pandas dataframe.\n\ntarget : int, default=None\n - In a multiclass setting, specifies the class for which the PDPs\n should be computed. Note that for binary classification, the\n positive class (index 1) is always used.\n - In a multioutput setting, specifies the task for which the PDPs\n should be computed.\n\n Ignored in binary classification or classical regression settings.\n\nresponse_method : {'auto', 'predict_proba', 'decision_function'}, default='auto'\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the target response. For regressors\n this parameter is ignored and the response is always the output of\n :term:`predict`. By default, :term:`predict_proba` is tried first\n and we revert to :term:`decision_function` if it doesn't exist. If\n ``method`` is `'recursion'`, the response is always the output of\n :term:`decision_function`.\n\nn_cols : int, default=3\n The maximum number of columns in the grid plot. Only active when `ax`\n is a single axis or `None`.\n\ngrid_resolution : int, default=100\n The number of equally spaced points on the axes of the plots, for each\n target feature.\n\npercentiles : tuple of float, default=(0.05, 0.95)\n The lower and upper percentile used to create the extreme values\n for the PDP axes. Must be in [0, 1].\n\nmethod : str, default='auto'\n The method used to calculate the averaged predictions:\n\n - `'recursion'` is only supported for some tree-based estimators\n (namely\n :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n :class:`~sklearn.tree.DecisionTreeRegressor`,\n :class:`~sklearn.ensemble.RandomForestRegressor`\n but is more efficient in terms of speed.\n With this method, the target response of a\n classifier is always the decision function, not the predicted\n probabilities. Since the `'recursion'` method implicitely computes\n the average of the ICEs by design, it is not compatible with ICE and\n thus `kind` must be `'average'`.\n\n - `'brute'` is supported for any estimator, but is more\n computationally intensive.\n\n - `'auto'`: the `'recursion'` is used for estimators that support it,\n and `'brute'` is used otherwise.\n\n Please see :ref:`this note ` for\n differences between the `'brute'` and `'recursion'` method.\n\nn_jobs : int, default=None\n The number of CPUs to use to compute the partial dependences.\n Computation is parallelized over features specified by the `features`\n parameter.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : int, default=0\n Verbose output during PD computations.\n\nline_kw : dict, default=None\n Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.\n For one-way partial dependence plots.\n\ncontour_kw : dict, default=None\n Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call.\n For two-way partial dependence plots.\n\nax : Matplotlib axes or array-like of Matplotlib axes, default=None\n - If a single axis is passed in, it is treated as a bounding axes\n and a grid of partial dependence plots will be drawn within\n these bounds. The `n_cols` parameter controls the number of\n columns in the grid.\n - If an array-like of axes are passed in, the partial dependence\n plots will be drawn directly into these axes.\n - If `None`, a figure and a bounding axes is created and treated\n as the single axes case.\n\n .. versionadded:: 0.22\n\nkind : {'average', 'individual', 'both'}, default='average'\n Whether to plot the partial dependence averaged across all the samples\n in the dataset or one line per sample or both.\n\n - ``kind='average'`` results in the traditional PD plot;\n - ``kind='individual'`` results in the ICE plot.\n\n Note that the fast ``method='recursion'`` option is only available for\n ``kind='average'``. Plotting individual dependencies requires using the\n slower ``method='brute'`` option.\n\n .. versionadded:: 0.24\n\nsubsample : float, int or None, default=1000\n Sampling for ICE curves when `kind` is 'individual' or 'both'.\n If `float`, should be between 0.0 and 1.0 and represent the proportion\n of the dataset to be used to plot ICE curves. If `int`, represents the\n absolute number samples to use.\n\n Note that the full dataset is still used to calculate averaged partial\n dependence when `kind='both'`.\n\n .. versionadded:: 0.24\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of the selected samples when subsamples is not\n `None` and `kind` is either `'both'` or `'individual'`.\n See :term:`Glossary ` for details.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ndisplay : :class:`~sklearn.inspection.PartialDependenceDisplay`\n\nSee Also\n--------\npartial_dependence : Compute Partial Dependence values.\nPartialDependenceDisplay : Partial Dependence visualization.\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.ensemble import GradientBoostingRegressor\n>>> X, y = make_friedman1()\n>>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)\n>>> plot_partial_dependence(clf, X, [0, (0, 1)]) #doctest: +SKIP", + "code": "@_deprecate_positional_args\ndef plot_partial_dependence(\n estimator,\n X,\n features,\n *,\n feature_names=None,\n target=None,\n response_method=\"auto\",\n n_cols=3,\n grid_resolution=100,\n percentiles=(0.05, 0.95),\n method=\"auto\",\n n_jobs=None,\n verbose=0,\n line_kw=None,\n contour_kw=None,\n ax=None,\n kind=\"average\",\n subsample=1000,\n random_state=None,\n):\n \"\"\"Partial dependence (PD) and individual conditional expectation (ICE)\n plots.\n\n Partial dependence plots, individual conditional expectation plots or an\n overlay of both of them can be plotted by setting the ``kind``\n parameter.\n The ``len(features)`` plots are arranged in a grid with ``n_cols``\n columns. Two-way partial dependence plots are plotted as contour plots. The\n deciles of the feature values will be shown with tick marks on the x-axes\n for one-way plots, and on both axes for two-way plots.\n\n Read more in the :ref:`User Guide `.\n\n .. note::\n\n :func:`plot_partial_dependence` does not support using the same axes\n with multiple calls. To plot the the partial dependence for multiple\n estimators, please pass the axes created by the first call to the\n second call::\n\n >>> from sklearn.inspection import plot_partial_dependence\n >>> from sklearn.datasets import make_friedman1\n >>> from sklearn.linear_model import LinearRegression\n >>> from sklearn.ensemble import RandomForestRegressor\n >>> X, y = make_friedman1()\n >>> est1 = LinearRegression().fit(X, y)\n >>> est2 = RandomForestRegressor().fit(X, y)\n >>> disp1 = plot_partial_dependence(est1, X,\n ... [1, 2]) # doctest: +SKIP\n >>> disp2 = plot_partial_dependence(est2, X, [1, 2],\n ... ax=disp1.axes_) # doctest: +SKIP\n\n .. warning::\n\n For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n `'recursion'` method (used by default) will not account for the `init`\n predictor of the boosting process. In practice, this will produce\n the same values as `'brute'` up to a constant offset in the target\n response, provided that `init` is a constant estimator (which is the\n default). However, if `init` is not a constant estimator, the\n partial dependence values are incorrect for `'recursion'` because the\n offset will be sample-dependent. It is preferable to use the `'brute'`\n method. Note that this only applies to\n :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\n Parameters\n ----------\n estimator : BaseEstimator\n A fitted estimator object implementing :term:`predict`,\n :term:`predict_proba`, or :term:`decision_function`.\n Multioutput-multiclass classifiers are not supported.\n\n X : {array-like or dataframe} of shape (n_samples, n_features)\n ``X`` is used to generate a grid of values for the target\n ``features`` (where the partial dependence will be evaluated), and\n also to generate values for the complement features when the\n `method` is `'brute'`.\n\n features : list of {int, str, pair of int, pair of str}\n The target features for which to create the PDPs.\n If `features[i]` is an integer or a string, a one-way PDP is created;\n if `features[i]` is a tuple, a two-way PDP is created (only supported\n with `kind='average'`). Each tuple must be of size 2.\n if any entry is a string, then it must be in ``feature_names``.\n\n feature_names : array-like of shape (n_features,), dtype=str, default=None\n Name of each feature; `feature_names[i]` holds the name of the feature\n with index `i`.\n By default, the name of the feature corresponds to their numerical\n index for NumPy array and their column name for pandas dataframe.\n\n target : int, default=None\n - In a multiclass setting, specifies the class for which the PDPs\n should be computed. Note that for binary classification, the\n positive class (index 1) is always used.\n - In a multioutput setting, specifies the task for which the PDPs\n should be computed.\n\n Ignored in binary classification or classical regression settings.\n\n response_method : {'auto', 'predict_proba', 'decision_function'}, \\\n default='auto'\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the target response. For regressors\n this parameter is ignored and the response is always the output of\n :term:`predict`. By default, :term:`predict_proba` is tried first\n and we revert to :term:`decision_function` if it doesn't exist. If\n ``method`` is `'recursion'`, the response is always the output of\n :term:`decision_function`.\n\n n_cols : int, default=3\n The maximum number of columns in the grid plot. Only active when `ax`\n is a single axis or `None`.\n\n grid_resolution : int, default=100\n The number of equally spaced points on the axes of the plots, for each\n target feature.\n\n percentiles : tuple of float, default=(0.05, 0.95)\n The lower and upper percentile used to create the extreme values\n for the PDP axes. Must be in [0, 1].\n\n method : str, default='auto'\n The method used to calculate the averaged predictions:\n\n - `'recursion'` is only supported for some tree-based estimators\n (namely\n :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n :class:`~sklearn.tree.DecisionTreeRegressor`,\n :class:`~sklearn.ensemble.RandomForestRegressor`\n but is more efficient in terms of speed.\n With this method, the target response of a\n classifier is always the decision function, not the predicted\n probabilities. Since the `'recursion'` method implicitely computes\n the average of the ICEs by design, it is not compatible with ICE and\n thus `kind` must be `'average'`.\n\n - `'brute'` is supported for any estimator, but is more\n computationally intensive.\n\n - `'auto'`: the `'recursion'` is used for estimators that support it,\n and `'brute'` is used otherwise.\n\n Please see :ref:`this note ` for\n differences between the `'brute'` and `'recursion'` method.\n\n n_jobs : int, default=None\n The number of CPUs to use to compute the partial dependences.\n Computation is parallelized over features specified by the `features`\n parameter.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n verbose : int, default=0\n Verbose output during PD computations.\n\n line_kw : dict, default=None\n Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.\n For one-way partial dependence plots.\n\n contour_kw : dict, default=None\n Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call.\n For two-way partial dependence plots.\n\n ax : Matplotlib axes or array-like of Matplotlib axes, default=None\n - If a single axis is passed in, it is treated as a bounding axes\n and a grid of partial dependence plots will be drawn within\n these bounds. The `n_cols` parameter controls the number of\n columns in the grid.\n - If an array-like of axes are passed in, the partial dependence\n plots will be drawn directly into these axes.\n - If `None`, a figure and a bounding axes is created and treated\n as the single axes case.\n\n .. versionadded:: 0.22\n\n kind : {'average', 'individual', 'both'}, default='average'\n Whether to plot the partial dependence averaged across all the samples\n in the dataset or one line per sample or both.\n\n - ``kind='average'`` results in the traditional PD plot;\n - ``kind='individual'`` results in the ICE plot.\n\n Note that the fast ``method='recursion'`` option is only available for\n ``kind='average'``. Plotting individual dependencies requires using the\n slower ``method='brute'`` option.\n\n .. versionadded:: 0.24\n\n subsample : float, int or None, default=1000\n Sampling for ICE curves when `kind` is 'individual' or 'both'.\n If `float`, should be between 0.0 and 1.0 and represent the proportion\n of the dataset to be used to plot ICE curves. If `int`, represents the\n absolute number samples to use.\n\n Note that the full dataset is still used to calculate averaged partial\n dependence when `kind='both'`.\n\n .. versionadded:: 0.24\n\n random_state : int, RandomState instance or None, default=None\n Controls the randomness of the selected samples when subsamples is not\n `None` and `kind` is either `'both'` or `'individual'`.\n See :term:`Glossary ` for details.\n\n .. versionadded:: 0.24\n\n Returns\n -------\n display : :class:`~sklearn.inspection.PartialDependenceDisplay`\n\n See Also\n --------\n partial_dependence : Compute Partial Dependence values.\n PartialDependenceDisplay : Partial Dependence visualization.\n\n Examples\n --------\n >>> from sklearn.datasets import make_friedman1\n >>> from sklearn.ensemble import GradientBoostingRegressor\n >>> X, y = make_friedman1()\n >>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)\n >>> plot_partial_dependence(clf, X, [0, (0, 1)]) #doctest: +SKIP\n \"\"\"\n check_matplotlib_support('plot_partial_dependence') # noqa\n import matplotlib.pyplot as plt # noqa\n\n # set target_idx for multi-class estimators\n if hasattr(estimator, 'classes_') and np.size(estimator.classes_) > 2:\n if target is None:\n raise ValueError('target must be specified for multi-class')\n target_idx = np.searchsorted(estimator.classes_, target)\n if (not (0 <= target_idx < len(estimator.classes_)) or\n estimator.classes_[target_idx] != target):\n raise ValueError('target not in est.classes_, got {}'.format(\n target))\n else:\n # regression and binary classification\n target_idx = 0\n\n # Use check_array only on lists and other non-array-likes / sparse. Do not\n # convert DataFrame into a NumPy array.\n if not(hasattr(X, '__array__') or sparse.issparse(X)):\n X = check_array(X, force_all_finite='allow-nan', dtype=object)\n n_features = X.shape[1]\n\n # convert feature_names to list\n if feature_names is None:\n if hasattr(X, \"loc\"):\n # get the column names for a pandas dataframe\n feature_names = X.columns.tolist()\n else:\n # define a list of numbered indices for a numpy array\n feature_names = [str(i) for i in range(n_features)]\n elif hasattr(feature_names, \"tolist\"):\n # convert numpy array or pandas index to a list\n feature_names = feature_names.tolist()\n if len(set(feature_names)) != len(feature_names):\n raise ValueError('feature_names should not contain duplicates.')\n\n def convert_feature(fx):\n if isinstance(fx, str):\n try:\n fx = feature_names.index(fx)\n except ValueError as e:\n raise ValueError('Feature %s not in feature_names' % fx) from e\n return int(fx)\n\n # convert features into a seq of int tuples\n tmp_features = []\n for fxs in features:\n if isinstance(fxs, (numbers.Integral, str)):\n fxs = (fxs,)\n try:\n fxs = tuple(convert_feature(fx) for fx in fxs)\n except TypeError as e:\n raise ValueError(\n 'Each entry in features must be either an int, '\n 'a string, or an iterable of size at most 2.'\n ) from e\n if not 1 <= np.size(fxs) <= 2:\n raise ValueError('Each entry in features must be either an int, '\n 'a string, or an iterable of size at most 2.')\n if kind != 'average' and np.size(fxs) > 1:\n raise ValueError(\n f\"It is not possible to display individual effects for more \"\n f\"than one feature at a time. Got: features={features}.\")\n tmp_features.append(fxs)\n\n features = tmp_features\n\n # Early exit if the axes does not have the correct number of axes\n if ax is not None and not isinstance(ax, plt.Axes):\n axes = np.asarray(ax, dtype=object)\n if axes.size != len(features):\n raise ValueError(\"Expected ax to have {} axes, got {}\".format(\n len(features), axes.size))\n\n for i in chain.from_iterable(features):\n if i >= len(feature_names):\n raise ValueError('All entries of features must be less than '\n 'len(feature_names) = {0}, got {1}.'\n .format(len(feature_names), i))\n\n if isinstance(subsample, numbers.Integral):\n if subsample <= 0:\n raise ValueError(\n f\"When an integer, subsample={subsample} should be positive.\"\n )\n elif isinstance(subsample, numbers.Real):\n if subsample <= 0 or subsample >= 1:\n raise ValueError(\n f\"When a floating-point, subsample={subsample} should be in \"\n f\"the (0, 1) range.\"\n )\n\n # compute predictions and/or averaged predictions\n pd_results = Parallel(n_jobs=n_jobs, verbose=verbose)(\n delayed(partial_dependence)(estimator, X, fxs,\n response_method=response_method,\n method=method,\n grid_resolution=grid_resolution,\n percentiles=percentiles,\n kind=kind)\n for fxs in features)\n\n # For multioutput regression, we can only check the validity of target\n # now that we have the predictions.\n # Also note: as multiclass-multioutput classifiers are not supported,\n # multiclass and multioutput scenario are mutually exclusive. So there is\n # no risk of overwriting target_idx here.\n pd_result = pd_results[0] # checking the first result is enough\n n_tasks = (pd_result.average.shape[0] if kind == 'average'\n else pd_result.individual.shape[0])\n if is_regressor(estimator) and n_tasks > 1:\n if target is None:\n raise ValueError(\n 'target must be specified for multi-output regressors')\n if not 0 <= target <= n_tasks:\n raise ValueError(\n 'target must be in [0, n_tasks], got {}.'.format(target))\n target_idx = target\n\n # get global min and max average predictions of PD grouped by plot type\n pdp_lim = {}\n for pdp in pd_results:\n values = pdp[\"values\"]\n preds = (pdp.average if kind == 'average' else pdp.individual)\n min_pd = preds[target_idx].min()\n max_pd = preds[target_idx].max()\n n_fx = len(values)\n old_min_pd, old_max_pd = pdp_lim.get(n_fx, (min_pd, max_pd))\n min_pd = min(min_pd, old_min_pd)\n max_pd = max(max_pd, old_max_pd)\n pdp_lim[n_fx] = (min_pd, max_pd)\n\n deciles = {}\n for fx in chain.from_iterable(features):\n if fx not in deciles:\n X_col = _safe_indexing(X, fx, axis=1)\n deciles[fx] = mquantiles(X_col, prob=np.arange(0.1, 1.0, 0.1))\n\n display = PartialDependenceDisplay(\n pd_results=pd_results,\n features=features,\n feature_names=feature_names,\n target_idx=target_idx,\n pdp_lim=pdp_lim,\n deciles=deciles,\n kind=kind,\n subsample=subsample,\n random_state=random_state,\n )\n return display.plot(\n ax=ax, n_cols=n_cols, line_kw=line_kw, contour_kw=contour_kw\n )" + }, + { + "id": "scikit-learn/sklearn.inspection.setup/configuration", + "name": "configuration", + "qname": "sklearn.inspection.setup.configuration", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.inspection.setup/configuration/parent_package", + "name": "parent_package", + "qname": "sklearn.inspection.setup.configuration.parent_package", + "default_value": "''", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.inspection.setup/configuration/top_path", + "name": "top_path", + "qname": "sklearn.inspection.setup.configuration.top_path", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def configuration(parent_package=\"\", top_path=None):\n config = Configuration(\"inspection\", parent_package, top_path)\n\n config.add_subpackage('_plot')\n config.add_subpackage('_plot.tests')\n\n config.add_subpackage('tests')\n\n return config" + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/__getstate__", + "name": "__getstate__", + "qname": "sklearn.isotonic.IsotonicRegression.__getstate__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/__getstate__/self", + "name": "self", + "qname": "sklearn.isotonic.IsotonicRegression.__getstate__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Pickle-protocol - return state of the estimator.", + "docstring": "Pickle-protocol - return state of the estimator. ", + "code": " def __getstate__(self):\n \"\"\"Pickle-protocol - return state of the estimator. \"\"\"\n state = super().__getstate__()\n # remove interpolation method\n state.pop('f_', None)\n return state" + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/__init__", + "name": "__init__", + "qname": "sklearn.isotonic.IsotonicRegression.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/__init__/self", + "name": "self", + "qname": "sklearn.isotonic.IsotonicRegression.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/__init__/y_min", + "name": "y_min", + "qname": "sklearn.isotonic.IsotonicRegression.__init__.y_min", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Lower bound on the lowest predicted value (the minimum value may\nstill be higher). If not set, defaults to -inf." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/__init__/y_max", + "name": "y_max", + "qname": "sklearn.isotonic.IsotonicRegression.__init__.y_max", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Upper bound on the highest predicted value (the maximum may still be\nlower). If not set, defaults to +inf." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/__init__/increasing", + "name": "increasing", + "qname": "sklearn.isotonic.IsotonicRegression.__init__.increasing", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool or 'auto'", + "default_value": "True", + "description": "Determines whether the predictions should be constrained to increase\nor decrease with `X`. 'auto' will decide based on the Spearman\ncorrelation estimate's sign." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "'auto'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/__init__/out_of_bounds", + "name": "out_of_bounds", + "qname": "sklearn.isotonic.IsotonicRegression.__init__.out_of_bounds", + "default_value": "'nan'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'nan', 'clip', 'raise'}", + "default_value": "'nan'", + "description": "Handles how `X` values outside of the training domain are handled\nduring prediction.\n\n- 'nan', predictions will be NaN.\n- 'clip', predictions will be set to the value corresponding to\n the nearest train interval endpoint.\n- 'raise', a `ValueError` is raised." + }, + "type": { + "kind": "EnumType", + "values": ["nan", "clip", "raise"] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Isotonic regression model.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, y_min=None, y_max=None, increasing=True,\n out_of_bounds='nan'):\n self.y_min = y_min\n self.y_max = y_max\n self.increasing = increasing\n self.out_of_bounds = out_of_bounds" + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/__setstate__", + "name": "__setstate__", + "qname": "sklearn.isotonic.IsotonicRegression.__setstate__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/__setstate__/self", + "name": "self", + "qname": "sklearn.isotonic.IsotonicRegression.__setstate__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/__setstate__/state", + "name": "state", + "qname": "sklearn.isotonic.IsotonicRegression.__setstate__.state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Pickle-protocol - set state of the estimator.\n\nWe need to rebuild the interpolation function.", + "docstring": "Pickle-protocol - set state of the estimator.\n\nWe need to rebuild the interpolation function.", + "code": " def __setstate__(self, state):\n \"\"\"Pickle-protocol - set state of the estimator.\n\n We need to rebuild the interpolation function.\n \"\"\"\n super().__setstate__(state)\n if hasattr(self, 'X_thresholds_') and hasattr(self, 'y_thresholds_'):\n self._build_f(self.X_thresholds_, self.y_thresholds_)" + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/_build_f", + "name": "_build_f", + "qname": "sklearn.isotonic.IsotonicRegression._build_f", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/_build_f/self", + "name": "self", + "qname": "sklearn.isotonic.IsotonicRegression._build_f.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/_build_f/X", + "name": "X", + "qname": "sklearn.isotonic.IsotonicRegression._build_f.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/_build_f/y", + "name": "y", + "qname": "sklearn.isotonic.IsotonicRegression._build_f.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Build the f_ interp1d function.", + "docstring": "Build the f_ interp1d function.", + "code": " def _build_f(self, X, y):\n \"\"\"Build the f_ interp1d function.\"\"\"\n\n # Handle the out_of_bounds argument by setting bounds_error\n if self.out_of_bounds not in [\"raise\", \"nan\", \"clip\"]:\n raise ValueError(\"The argument ``out_of_bounds`` must be in \"\n \"'nan', 'clip', 'raise'; got {0}\"\n .format(self.out_of_bounds))\n\n bounds_error = self.out_of_bounds == \"raise\"\n if len(y) == 1:\n # single y, constant prediction\n self.f_ = lambda x: y.repeat(x.shape)\n else:\n self.f_ = interpolate.interp1d(X, y, kind='linear',\n bounds_error=bounds_error)" + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/_build_y", + "name": "_build_y", + "qname": "sklearn.isotonic.IsotonicRegression._build_y", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/_build_y/self", + "name": "self", + "qname": "sklearn.isotonic.IsotonicRegression._build_y.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/_build_y/X", + "name": "X", + "qname": "sklearn.isotonic.IsotonicRegression._build_y.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/_build_y/y", + "name": "y", + "qname": "sklearn.isotonic.IsotonicRegression._build_y.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/_build_y/sample_weight", + "name": "sample_weight", + "qname": "sklearn.isotonic.IsotonicRegression._build_y.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/_build_y/trim_duplicates", + "name": "trim_duplicates", + "qname": "sklearn.isotonic.IsotonicRegression._build_y.trim_duplicates", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Build the y_ IsotonicRegression.", + "docstring": "Build the y_ IsotonicRegression.", + "code": " def _build_y(self, X, y, sample_weight, trim_duplicates=True):\n \"\"\"Build the y_ IsotonicRegression.\"\"\"\n self._check_input_data_shape(X)\n X = X.reshape(-1) # use 1d view\n\n # Determine increasing if auto-determination requested\n if self.increasing == 'auto':\n self.increasing_ = check_increasing(X, y)\n else:\n self.increasing_ = self.increasing\n\n # If sample_weights is passed, removed zero-weight values and clean\n # order\n sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n mask = sample_weight > 0\n X, y, sample_weight = X[mask], y[mask], sample_weight[mask]\n\n order = np.lexsort((y, X))\n X, y, sample_weight = [array[order] for array in [X, y, sample_weight]]\n unique_X, unique_y, unique_sample_weight = _make_unique(\n X, y, sample_weight)\n\n X = unique_X\n y = isotonic_regression(unique_y, sample_weight=unique_sample_weight,\n y_min=self.y_min, y_max=self.y_max,\n increasing=self.increasing_)\n\n # Handle the left and right bounds on X\n self.X_min_, self.X_max_ = np.min(X), np.max(X)\n\n if trim_duplicates:\n # Remove unnecessary points for faster prediction\n keep_data = np.ones((len(y),), dtype=bool)\n # Aside from the 1st and last point, remove points whose y values\n # are equal to both the point before and the point after it.\n keep_data[1:-1] = np.logical_or(\n np.not_equal(y[1:-1], y[:-2]),\n np.not_equal(y[1:-1], y[2:])\n )\n return X[keep_data], y[keep_data]\n else:\n # The ability to turn off trim_duplicates is only used to it make\n # easier to unit test that removing duplicates in y does not have\n # any impact the resulting interpolation function (besides\n # prediction speed).\n return X, y" + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/_check_input_data_shape", + "name": "_check_input_data_shape", + "qname": "sklearn.isotonic.IsotonicRegression._check_input_data_shape", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/_check_input_data_shape/self", + "name": "self", + "qname": "sklearn.isotonic.IsotonicRegression._check_input_data_shape.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/_check_input_data_shape/X", + "name": "X", + "qname": "sklearn.isotonic.IsotonicRegression._check_input_data_shape.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_input_data_shape(self, X):\n if not (X.ndim == 1 or (X.ndim == 2 and X.shape[1] == 1)):\n msg = \"Isotonic regression input X should be a 1d array or \" \\\n \"2d array with 1 feature\"\n raise ValueError(msg)" + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/_more_tags", + "name": "_more_tags", + "qname": "sklearn.isotonic.IsotonicRegression._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/_more_tags/self", + "name": "self", + "qname": "sklearn.isotonic.IsotonicRegression._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'X_types': ['1darray']}" + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/fit", + "name": "fit", + "qname": "sklearn.isotonic.IsotonicRegression.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/fit/self", + "name": "self", + "qname": "sklearn.isotonic.IsotonicRegression.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/fit/X", + "name": "X", + "qname": "sklearn.isotonic.IsotonicRegression.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, 1)", + "default_value": "", + "description": "Training data.\n\n.. versionchanged:: 0.24\n Also accepts 2d array with 1 feature." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, 1)" + } + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/fit/y", + "name": "y", + "qname": "sklearn.isotonic.IsotonicRegression.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Training target." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.isotonic.IsotonicRegression.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Weights. If set to None, all weights will be set to 1 (equal\nweights)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fit the model using X, y as training data.", + "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples,) or (n_samples, 1)\n Training data.\n\n .. versionchanged:: 0.24\n Also accepts 2d array with 1 feature.\n\ny : array-like of shape (n_samples,)\n Training target.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights. If set to None, all weights will be set to 1 (equal\n weights).\n\nReturns\n-------\nself : object\n Returns an instance of self.\n\nNotes\n-----\nX is stored for future use, as :meth:`transform` needs X to interpolate\nnew input data.", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the model using X, y as training data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples,) or (n_samples, 1)\n Training data.\n\n .. versionchanged:: 0.24\n Also accepts 2d array with 1 feature.\n\n y : array-like of shape (n_samples,)\n Training target.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Weights. If set to None, all weights will be set to 1 (equal\n weights).\n\n Returns\n -------\n self : object\n Returns an instance of self.\n\n Notes\n -----\n X is stored for future use, as :meth:`transform` needs X to interpolate\n new input data.\n \"\"\"\n check_params = dict(accept_sparse=False, ensure_2d=False)\n X = check_array(X, dtype=[np.float64, np.float32], **check_params)\n y = check_array(y, dtype=X.dtype, **check_params)\n check_consistent_length(X, y, sample_weight)\n\n # Transform y by running the isotonic regression algorithm and\n # transform X accordingly.\n X, y = self._build_y(X, y, sample_weight)\n\n # It is necessary to store the non-redundant part of the training set\n # on the model to make it possible to support model persistence via\n # the pickle module as the object built by scipy.interp1d is not\n # picklable directly.\n self.X_thresholds_, self.y_thresholds_ = X, y\n\n # Build the interpolation function\n self._build_f(X, y)\n return self" + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/predict", + "name": "predict", + "qname": "sklearn.isotonic.IsotonicRegression.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/predict/self", + "name": "self", + "qname": "sklearn.isotonic.IsotonicRegression.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/predict/T", + "name": "T", + "qname": "sklearn.isotonic.IsotonicRegression.predict.T", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, 1)", + "default_value": "", + "description": "Data to transform." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, 1)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Predict new data by linear interpolation.", + "docstring": "Predict new data by linear interpolation.\n\nParameters\n----------\nT : array-like of shape (n_samples,) or (n_samples, 1)\n Data to transform.\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,)\n Transformed data.", + "code": " def predict(self, T):\n \"\"\"Predict new data by linear interpolation.\n\n Parameters\n ----------\n T : array-like of shape (n_samples,) or (n_samples, 1)\n Data to transform.\n\n Returns\n -------\n y_pred : ndarray of shape (n_samples,)\n Transformed data.\n \"\"\"\n return self.transform(T)" + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/transform", + "name": "transform", + "qname": "sklearn.isotonic.IsotonicRegression.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/transform/self", + "name": "self", + "qname": "sklearn.isotonic.IsotonicRegression.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.isotonic/IsotonicRegression/transform/T", + "name": "T", + "qname": "sklearn.isotonic.IsotonicRegression.transform.T", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, 1)", + "default_value": "", + "description": "Data to transform.\n\n.. versionchanged:: 0.24\n Also accepts 2d array with 1 feature." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, 1)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Transform new data by linear interpolation", + "docstring": "Transform new data by linear interpolation\n\nParameters\n----------\nT : array-like of shape (n_samples,) or (n_samples, 1)\n Data to transform.\n\n .. versionchanged:: 0.24\n Also accepts 2d array with 1 feature.\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,)\n The transformed data", + "code": " def transform(self, T):\n \"\"\"Transform new data by linear interpolation\n\n Parameters\n ----------\n T : array-like of shape (n_samples,) or (n_samples, 1)\n Data to transform.\n\n .. versionchanged:: 0.24\n Also accepts 2d array with 1 feature.\n\n Returns\n -------\n y_pred : ndarray of shape (n_samples,)\n The transformed data\n \"\"\"\n\n if hasattr(self, 'X_thresholds_'):\n dtype = self.X_thresholds_.dtype\n else:\n dtype = np.float64\n\n T = check_array(T, dtype=dtype, ensure_2d=False)\n\n self._check_input_data_shape(T)\n T = T.reshape(-1) # use 1d view\n\n # Handle the out_of_bounds argument by clipping if needed\n if self.out_of_bounds not in [\"raise\", \"nan\", \"clip\"]:\n raise ValueError(\"The argument ``out_of_bounds`` must be in \"\n \"'nan', 'clip', 'raise'; got {0}\"\n .format(self.out_of_bounds))\n\n if self.out_of_bounds == \"clip\":\n T = np.clip(T, self.X_min_, self.X_max_)\n\n res = self.f_(T)\n\n # on scipy 0.17, interp1d up-casts to float64, so we cast back\n res = res.astype(T.dtype)\n\n return res" + }, + { + "id": "scikit-learn/sklearn.isotonic/check_increasing", + "name": "check_increasing", + "qname": "sklearn.isotonic.check_increasing", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.isotonic/check_increasing/x", + "name": "x", + "qname": "sklearn.isotonic.check_increasing.x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.isotonic/check_increasing/y", + "name": "y", + "qname": "sklearn.isotonic.check_increasing.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Training target." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Determine whether y is monotonically correlated with x.\n\ny is found increasing or decreasing with respect to x based on a Spearman\ncorrelation test.", + "docstring": "Determine whether y is monotonically correlated with x.\n\ny is found increasing or decreasing with respect to x based on a Spearman\ncorrelation test.\n\nParameters\n----------\nx : array-like of shape (n_samples,)\n Training data.\n\ny : array-like of shape (n_samples,)\n Training target.\n\nReturns\n-------\nincreasing_bool : boolean\n Whether the relationship is increasing or decreasing.\n\nNotes\n-----\nThe Spearman correlation coefficient is estimated from the data, and the\nsign of the resulting estimate is used as the result.\n\nIn the event that the 95% confidence interval based on Fisher transform\nspans zero, a warning is raised.\n\nReferences\n----------\nFisher transformation. Wikipedia.\nhttps://en.wikipedia.org/wiki/Fisher_transformation", + "code": "def check_increasing(x, y):\n \"\"\"Determine whether y is monotonically correlated with x.\n\n y is found increasing or decreasing with respect to x based on a Spearman\n correlation test.\n\n Parameters\n ----------\n x : array-like of shape (n_samples,)\n Training data.\n\n y : array-like of shape (n_samples,)\n Training target.\n\n Returns\n -------\n increasing_bool : boolean\n Whether the relationship is increasing or decreasing.\n\n Notes\n -----\n The Spearman correlation coefficient is estimated from the data, and the\n sign of the resulting estimate is used as the result.\n\n In the event that the 95% confidence interval based on Fisher transform\n spans zero, a warning is raised.\n\n References\n ----------\n Fisher transformation. Wikipedia.\n https://en.wikipedia.org/wiki/Fisher_transformation\n \"\"\"\n\n # Calculate Spearman rho estimate and set return accordingly.\n rho, _ = spearmanr(x, y)\n increasing_bool = rho >= 0\n\n # Run Fisher transform to get the rho CI, but handle rho=+/-1\n if rho not in [-1.0, 1.0] and len(x) > 3:\n F = 0.5 * math.log((1. + rho) / (1. - rho))\n F_se = 1 / math.sqrt(len(x) - 3)\n\n # Use a 95% CI, i.e., +/-1.96 S.E.\n # https://en.wikipedia.org/wiki/Fisher_transformation\n rho_0 = math.tanh(F - 1.96 * F_se)\n rho_1 = math.tanh(F + 1.96 * F_se)\n\n # Warn if the CI spans zero.\n if np.sign(rho_0) != np.sign(rho_1):\n warnings.warn(\"Confidence interval of the Spearman \"\n \"correlation coefficient spans zero. \"\n \"Determination of ``increasing`` may be \"\n \"suspect.\")\n\n return increasing_bool" + }, + { + "id": "scikit-learn/sklearn.isotonic/isotonic_regression", + "name": "isotonic_regression", + "qname": "sklearn.isotonic.isotonic_regression", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.isotonic/isotonic_regression/y", + "name": "y", + "qname": "sklearn.isotonic.isotonic_regression.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.isotonic/isotonic_regression/sample_weight", + "name": "sample_weight", + "qname": "sklearn.isotonic.isotonic_regression.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Weights on each point of the regression.\nIf None, weight is set to 1 (equal weights)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.isotonic/isotonic_regression/y_min", + "name": "y_min", + "qname": "sklearn.isotonic.isotonic_regression.y_min", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Lower bound on the lowest predicted value (the minimum value may\nstill be higher). If not set, defaults to -inf." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.isotonic/isotonic_regression/y_max", + "name": "y_max", + "qname": "sklearn.isotonic.isotonic_regression.y_max", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Upper bound on the highest predicted value (the maximum may still be\nlower). If not set, defaults to +inf." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.isotonic/isotonic_regression/increasing", + "name": "increasing", + "qname": "sklearn.isotonic.isotonic_regression.increasing", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to compute ``y_`` is increasing (if set to True) or decreasing\n(if set to False)" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Solve the isotonic regression model.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Solve the isotonic regression model.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny : array-like of shape (n_samples,)\n The data.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights on each point of the regression.\n If None, weight is set to 1 (equal weights).\n\ny_min : float, default=None\n Lower bound on the lowest predicted value (the minimum value may\n still be higher). If not set, defaults to -inf.\n\ny_max : float, default=None\n Upper bound on the highest predicted value (the maximum may still be\n lower). If not set, defaults to +inf.\n\nincreasing : bool, default=True\n Whether to compute ``y_`` is increasing (if set to True) or decreasing\n (if set to False)\n\nReturns\n-------\ny_ : list of floats\n Isotonic fit of y.\n\nReferences\n----------\n\"Active set algorithms for isotonic regression; A unifying framework\"\nby Michael J. Best and Nilotpal Chakravarti, section 3.", + "code": "@_deprecate_positional_args\ndef isotonic_regression(y, *, sample_weight=None, y_min=None, y_max=None,\n increasing=True):\n \"\"\"Solve the isotonic regression model.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y : array-like of shape (n_samples,)\n The data.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Weights on each point of the regression.\n If None, weight is set to 1 (equal weights).\n\n y_min : float, default=None\n Lower bound on the lowest predicted value (the minimum value may\n still be higher). If not set, defaults to -inf.\n\n y_max : float, default=None\n Upper bound on the highest predicted value (the maximum may still be\n lower). If not set, defaults to +inf.\n\n increasing : bool, default=True\n Whether to compute ``y_`` is increasing (if set to True) or decreasing\n (if set to False)\n\n Returns\n -------\n y_ : list of floats\n Isotonic fit of y.\n\n References\n ----------\n \"Active set algorithms for isotonic regression; A unifying framework\"\n by Michael J. Best and Nilotpal Chakravarti, section 3.\n \"\"\"\n order = np.s_[:] if increasing else np.s_[::-1]\n y = check_array(y, ensure_2d=False, dtype=[np.float64, np.float32])\n y = np.array(y[order], dtype=y.dtype)\n sample_weight = _check_sample_weight(sample_weight, y, dtype=y.dtype)\n sample_weight = np.ascontiguousarray(sample_weight[order])\n\n _inplace_contiguous_isotonic_regression(y, sample_weight)\n if y_min is not None or y_max is not None:\n # Older versions of np.clip don't accept None as a bound, so use np.inf\n if y_min is None:\n y_min = -np.inf\n if y_max is None:\n y_max = np.inf\n np.clip(y, y_min, y_max, y)\n return y[order]" + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/__init__", + "name": "__init__", + "qname": "sklearn.kernel_approximation.AdditiveChi2Sampler.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/__init__/self", + "name": "self", + "qname": "sklearn.kernel_approximation.AdditiveChi2Sampler.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/__init__/sample_steps", + "name": "sample_steps", + "qname": "sklearn.kernel_approximation.AdditiveChi2Sampler.__init__.sample_steps", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Gives the number of (complex) sampling points." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/__init__/sample_interval", + "name": "sample_interval", + "qname": "sklearn.kernel_approximation.AdditiveChi2Sampler.__init__.sample_interval", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Sampling interval. Must be specified when sample_steps not in {1,2,3}." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Approximate feature map for additive chi2 kernel.\n\nUses sampling the fourier transform of the kernel characteristic\nat regular intervals.\n\nSince the kernel that is to be approximated is additive, the components of\nthe input vectors can be treated separately. Each entry in the original\nspace is transformed into 2*sample_steps+1 features, where sample_steps is\na parameter of the method. Typical values of sample_steps include 1, 2 and\n3.\n\nOptimal choices for the sampling interval for certain data ranges can be\ncomputed (see the reference). The default values should be reasonable.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, sample_steps=2, sample_interval=None):\n self.sample_steps = sample_steps\n self.sample_interval = sample_interval" + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/_more_tags", + "name": "_more_tags", + "qname": "sklearn.kernel_approximation.AdditiveChi2Sampler._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/_more_tags/self", + "name": "self", + "qname": "sklearn.kernel_approximation.AdditiveChi2Sampler._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'stateless': True,\n 'requires_positive_X': True}" + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/_transform_dense", + "name": "_transform_dense", + "qname": "sklearn.kernel_approximation.AdditiveChi2Sampler._transform_dense", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/_transform_dense/self", + "name": "self", + "qname": "sklearn.kernel_approximation.AdditiveChi2Sampler._transform_dense.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/_transform_dense/X", + "name": "X", + "qname": "sklearn.kernel_approximation.AdditiveChi2Sampler._transform_dense.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _transform_dense(self, X):\n non_zero = (X != 0.0)\n X_nz = X[non_zero]\n\n X_step = np.zeros_like(X)\n X_step[non_zero] = np.sqrt(X_nz * self.sample_interval_)\n\n X_new = [X_step]\n\n log_step_nz = self.sample_interval_ * np.log(X_nz)\n step_nz = 2 * X_nz * self.sample_interval_\n\n for j in range(1, self.sample_steps):\n factor_nz = np.sqrt(step_nz /\n np.cosh(np.pi * j * self.sample_interval_))\n\n X_step = np.zeros_like(X)\n X_step[non_zero] = factor_nz * np.cos(j * log_step_nz)\n X_new.append(X_step)\n\n X_step = np.zeros_like(X)\n X_step[non_zero] = factor_nz * np.sin(j * log_step_nz)\n X_new.append(X_step)\n\n return np.hstack(X_new)" + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/_transform_sparse", + "name": "_transform_sparse", + "qname": "sklearn.kernel_approximation.AdditiveChi2Sampler._transform_sparse", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/_transform_sparse/self", + "name": "self", + "qname": "sklearn.kernel_approximation.AdditiveChi2Sampler._transform_sparse.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/_transform_sparse/X", + "name": "X", + "qname": "sklearn.kernel_approximation.AdditiveChi2Sampler._transform_sparse.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _transform_sparse(self, X):\n indices = X.indices.copy()\n indptr = X.indptr.copy()\n\n data_step = np.sqrt(X.data * self.sample_interval_)\n X_step = sp.csr_matrix((data_step, indices, indptr),\n shape=X.shape, dtype=X.dtype, copy=False)\n X_new = [X_step]\n\n log_step_nz = self.sample_interval_ * np.log(X.data)\n step_nz = 2 * X.data * self.sample_interval_\n\n for j in range(1, self.sample_steps):\n factor_nz = np.sqrt(step_nz /\n np.cosh(np.pi * j * self.sample_interval_))\n\n data_step = factor_nz * np.cos(j * log_step_nz)\n X_step = sp.csr_matrix((data_step, indices, indptr),\n shape=X.shape, dtype=X.dtype, copy=False)\n X_new.append(X_step)\n\n data_step = factor_nz * np.sin(j * log_step_nz)\n X_step = sp.csr_matrix((data_step, indices, indptr),\n shape=X.shape, dtype=X.dtype, copy=False)\n X_new.append(X_step)\n\n return sp.hstack(X_new)" + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/fit", + "name": "fit", + "qname": "sklearn.kernel_approximation.AdditiveChi2Sampler.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/fit/self", + "name": "self", + "qname": "sklearn.kernel_approximation.AdditiveChi2Sampler.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/fit/X", + "name": "X", + "qname": "sklearn.kernel_approximation.AdditiveChi2Sampler.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like, shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples in the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/fit/y", + "name": "y", + "qname": "sklearn.kernel_approximation.AdditiveChi2Sampler.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Set the parameters", + "docstring": "Set the parameters\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Training data, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nself : object\n Returns the transformer.", + "code": " def fit(self, X, y=None):\n \"\"\"Set the parameters\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n Training data, where n_samples in the number of samples\n and n_features is the number of features.\n\n Returns\n -------\n self : object\n Returns the transformer.\n \"\"\"\n X = self._validate_data(X, accept_sparse='csr')\n check_non_negative(X, 'X in AdditiveChi2Sampler.fit')\n\n if self.sample_interval is None:\n # See reference, figure 2 c)\n if self.sample_steps == 1:\n self.sample_interval_ = 0.8\n elif self.sample_steps == 2:\n self.sample_interval_ = 0.5\n elif self.sample_steps == 3:\n self.sample_interval_ = 0.4\n else:\n raise ValueError(\"If sample_steps is not in [1, 2, 3],\"\n \" you need to provide sample_interval\")\n else:\n self.sample_interval_ = self.sample_interval\n return self" + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/transform", + "name": "transform", + "qname": "sklearn.kernel_approximation.AdditiveChi2Sampler.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/transform/self", + "name": "self", + "qname": "sklearn.kernel_approximation.AdditiveChi2Sampler.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/AdditiveChi2Sampler/transform/X", + "name": "X", + "qname": "sklearn.kernel_approximation.AdditiveChi2Sampler.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Apply approximate feature map to X.", + "docstring": "Apply approximate feature map to X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n\nReturns\n-------\nX_new : {ndarray, sparse matrix}, shape = (n_samples, n_features * (2*sample_steps + 1))\n Whether the return value is an array of sparse matrix depends on\n the type of the input X.", + "code": " def transform(self, X):\n \"\"\"Apply approximate feature map to X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n\n Returns\n -------\n X_new : {ndarray, sparse matrix}, \\\n shape = (n_samples, n_features * (2*sample_steps + 1))\n Whether the return value is an array of sparse matrix depends on\n the type of the input X.\n \"\"\"\n msg = (\"%(name)s is not fitted. Call fit to set the parameters before\"\n \" calling transform\")\n check_is_fitted(self, msg=msg)\n\n X = self._validate_data(X, accept_sparse='csr', reset=False)\n check_non_negative(X, 'X in AdditiveChi2Sampler.transform')\n sparse = sp.issparse(X)\n\n # zeroth component\n # 1/cosh = sech\n # cosh(0) = 1.0\n\n transf = self._transform_sparse if sparse else self._transform_dense\n return transf(X)" + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/Nystroem/__init__", + "name": "__init__", + "qname": "sklearn.kernel_approximation.Nystroem.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_approximation/Nystroem/__init__/self", + "name": "self", + "qname": "sklearn.kernel_approximation.Nystroem.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/Nystroem/__init__/kernel", + "name": "kernel", + "qname": "sklearn.kernel_approximation.Nystroem.__init__.kernel", + "default_value": "'rbf'", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "string or callable", + "default_value": "'rbf'", + "description": "Kernel map to be approximated. A callable should accept two arguments\nand the keyword arguments passed to this object as kernel_params, and\nshould return a floating point number." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "string" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/Nystroem/__init__/gamma", + "name": "gamma", + "qname": "sklearn.kernel_approximation.Nystroem.__init__.gamma", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Gamma parameter for the RBF, laplacian, polynomial, exponential chi2\nand sigmoid kernels. Interpretation of the default value is left to\nthe kernel; see the documentation for sklearn.metrics.pairwise.\nIgnored by other kernels." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/Nystroem/__init__/coef0", + "name": "coef0", + "qname": "sklearn.kernel_approximation.Nystroem.__init__.coef0", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Zero coefficient for polynomial and sigmoid kernels.\nIgnored by other kernels." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/Nystroem/__init__/degree", + "name": "degree", + "qname": "sklearn.kernel_approximation.Nystroem.__init__.degree", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Degree of the polynomial kernel. Ignored by other kernels." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/Nystroem/__init__/kernel_params", + "name": "kernel_params", + "qname": "sklearn.kernel_approximation.Nystroem.__init__.kernel_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Additional parameters (keyword arguments) for kernel function passed\nas callable object." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/Nystroem/__init__/n_components", + "name": "n_components", + "qname": "sklearn.kernel_approximation.Nystroem.__init__.n_components", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Number of features to construct.\nHow many data points will be used to construct the mapping." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/Nystroem/__init__/random_state", + "name": "random_state", + "qname": "sklearn.kernel_approximation.Nystroem.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Pseudo-random number generator to control the uniform sampling without\nreplacement of n_components of the training data to construct the basis\nkernel.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/Nystroem/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.kernel_approximation.Nystroem.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to use for the computation. This works by breaking\ndown the kernel matrix into n_jobs even slices and computing them in\nparallel.\n\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Approximate a kernel map using a subset of the training data.\n\nConstructs an approximate feature map for an arbitrary kernel\nusing a subset of the data as basis.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, kernel=\"rbf\", *, gamma=None, coef0=None, degree=None,\n kernel_params=None, n_components=100, random_state=None,\n n_jobs=None):\n\n self.kernel = kernel\n self.gamma = gamma\n self.coef0 = coef0\n self.degree = degree\n self.kernel_params = kernel_params\n self.n_components = n_components\n self.random_state = random_state\n self.n_jobs = n_jobs" + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/Nystroem/_get_kernel_params", + "name": "_get_kernel_params", + "qname": "sklearn.kernel_approximation.Nystroem._get_kernel_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_approximation/Nystroem/_get_kernel_params/self", + "name": "self", + "qname": "sklearn.kernel_approximation.Nystroem._get_kernel_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_kernel_params(self):\n params = self.kernel_params\n if params is None:\n params = {}\n if not callable(self.kernel) and self.kernel != 'precomputed':\n for param in (KERNEL_PARAMS[self.kernel]):\n if getattr(self, param) is not None:\n params[param] = getattr(self, param)\n else:\n if (self.gamma is not None or\n self.coef0 is not None or\n self.degree is not None):\n raise ValueError(\"Don't pass gamma, coef0 or degree to \"\n \"Nystroem if using a callable \"\n \"or precomputed kernel\")\n\n return params" + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/Nystroem/_more_tags", + "name": "_more_tags", + "qname": "sklearn.kernel_approximation.Nystroem._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_approximation/Nystroem/_more_tags/self", + "name": "self", + "qname": "sklearn.kernel_approximation.Nystroem._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_transformer_preserve_dtypes':\n 'dtypes are preserved but not at a close enough precision',\n },\n 'preserves_dtype': [np.float64, np.float32]\n }" + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/Nystroem/fit", + "name": "fit", + "qname": "sklearn.kernel_approximation.Nystroem.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_approximation/Nystroem/fit/self", + "name": "self", + "qname": "sklearn.kernel_approximation.Nystroem.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/Nystroem/fit/X", + "name": "X", + "qname": "sklearn.kernel_approximation.Nystroem.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/Nystroem/fit/y", + "name": "y", + "qname": "sklearn.kernel_approximation.Nystroem.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fit estimator to data.\n\nSamples a subset of training points, computes kernel\non these and computes normalization matrix.", + "docstring": "Fit estimator to data.\n\nSamples a subset of training points, computes kernel\non these and computes normalization matrix.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.", + "code": " def fit(self, X, y=None):\n \"\"\"Fit estimator to data.\n\n Samples a subset of training points, computes kernel\n on these and computes normalization matrix.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n \"\"\"\n X = self._validate_data(X, accept_sparse='csr')\n rnd = check_random_state(self.random_state)\n n_samples = X.shape[0]\n\n # get basis vectors\n if self.n_components > n_samples:\n # XXX should we just bail?\n n_components = n_samples\n warnings.warn(\"n_components > n_samples. This is not possible.\\n\"\n \"n_components was set to n_samples, which results\"\n \" in inefficient evaluation of the full kernel.\")\n\n else:\n n_components = self.n_components\n n_components = min(n_samples, n_components)\n inds = rnd.permutation(n_samples)\n basis_inds = inds[:n_components]\n basis = X[basis_inds]\n\n basis_kernel = pairwise_kernels(basis, metric=self.kernel,\n filter_params=True,\n n_jobs=self.n_jobs,\n **self._get_kernel_params())\n\n # sqrt of kernel matrix on basis vectors\n U, S, V = svd(basis_kernel)\n S = np.maximum(S, 1e-12)\n self.normalization_ = np.dot(U / np.sqrt(S), V)\n self.components_ = basis\n self.component_indices_ = inds\n return self" + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/Nystroem/transform", + "name": "transform", + "qname": "sklearn.kernel_approximation.Nystroem.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_approximation/Nystroem/transform/self", + "name": "self", + "qname": "sklearn.kernel_approximation.Nystroem.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/Nystroem/transform/X", + "name": "X", + "qname": "sklearn.kernel_approximation.Nystroem.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Data to transform." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Apply feature map to X.\n\nComputes an approximate feature map using the kernel\nbetween some training points and X.", + "docstring": "Apply feature map to X.\n\nComputes an approximate feature map using the kernel\nbetween some training points and X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data to transform.\n\nReturns\n-------\nX_transformed : ndarray of shape (n_samples, n_components)\n Transformed data.", + "code": " def transform(self, X):\n \"\"\"Apply feature map to X.\n\n Computes an approximate feature map using the kernel\n between some training points and X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Data to transform.\n\n Returns\n -------\n X_transformed : ndarray of shape (n_samples, n_components)\n Transformed data.\n \"\"\"\n check_is_fitted(self)\n X = self._validate_data(X, accept_sparse='csr', reset=False)\n\n kernel_params = self._get_kernel_params()\n embedded = pairwise_kernels(X, self.components_,\n metric=self.kernel,\n filter_params=True,\n n_jobs=self.n_jobs,\n **kernel_params)\n return np.dot(embedded, self.normalization_.T)" + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/PolynomialCountSketch/__init__", + "name": "__init__", + "qname": "sklearn.kernel_approximation.PolynomialCountSketch.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_approximation/PolynomialCountSketch/__init__/self", + "name": "self", + "qname": "sklearn.kernel_approximation.PolynomialCountSketch.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/PolynomialCountSketch/__init__/gamma", + "name": "gamma", + "qname": "sklearn.kernel_approximation.PolynomialCountSketch.__init__.gamma", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Parameter of the polynomial kernel whose feature map\nwill be approximated." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/PolynomialCountSketch/__init__/degree", + "name": "degree", + "qname": "sklearn.kernel_approximation.PolynomialCountSketch.__init__.degree", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Degree of the polynomial kernel whose feature map\nwill be approximated." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/PolynomialCountSketch/__init__/coef0", + "name": "coef0", + "qname": "sklearn.kernel_approximation.PolynomialCountSketch.__init__.coef0", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Constant term of the polynomial kernel whose feature map\nwill be approximated." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/PolynomialCountSketch/__init__/n_components", + "name": "n_components", + "qname": "sklearn.kernel_approximation.PolynomialCountSketch.__init__.n_components", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Dimensionality of the output feature space. Usually, n_components\nshould be greater than the number of features in input samples in\norder to achieve good performance. The optimal score / run time\nbalance is typically achieved around n_components = 10 * n_features,\nbut this depends on the specific dataset being used." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/PolynomialCountSketch/__init__/random_state", + "name": "random_state", + "qname": "sklearn.kernel_approximation.PolynomialCountSketch.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "Determines random number generation for indexHash and bitHash\ninitialization. Pass an int for reproducible results across multiple\nfunction calls. See :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Polynomial kernel approximation via Tensor Sketch.\n\nImplements Tensor Sketch, which approximates the feature map\nof the polynomial kernel::\n\n K(X, Y) = (gamma * + coef0)^degree\n\nby efficiently computing a Count Sketch of the outer product of a\nvector with itself using Fast Fourier Transforms (FFT). Read more in the\n:ref:`User Guide `.\n\n.. versionadded:: 0.24", + "docstring": "", + "code": " def __init__(self, *, gamma=1., degree=2, coef0=0, n_components=100,\n random_state=None):\n self.gamma = gamma\n self.degree = degree\n self.coef0 = coef0\n self.n_components = n_components\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/PolynomialCountSketch/fit", + "name": "fit", + "qname": "sklearn.kernel_approximation.PolynomialCountSketch.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_approximation/PolynomialCountSketch/fit/self", + "name": "self", + "qname": "sklearn.kernel_approximation.PolynomialCountSketch.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/PolynomialCountSketch/fit/X", + "name": "X", + "qname": "sklearn.kernel_approximation.PolynomialCountSketch.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples in the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/PolynomialCountSketch/fit/y", + "name": "y", + "qname": "sklearn.kernel_approximation.PolynomialCountSketch.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fit the model with X.\n\nInitializes the internal variables. The method needs no information\nabout the distribution of data, so we only care about n_features in X.", + "docstring": "Fit the model with X.\n\nInitializes the internal variables. The method needs no information\nabout the distribution of data, so we only care about n_features in X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nself : object\n Returns the transformer.", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the model with X.\n\n Initializes the internal variables. The method needs no information\n about the distribution of data, so we only care about n_features in X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data, where n_samples in the number of samples\n and n_features is the number of features.\n\n Returns\n -------\n self : object\n Returns the transformer.\n \"\"\"\n if not self.degree >= 1:\n raise ValueError(f\"degree={self.degree} should be >=1.\")\n\n X = self._validate_data(X, accept_sparse=\"csc\")\n random_state = check_random_state(self.random_state)\n\n n_features = X.shape[1]\n if self.coef0 != 0:\n n_features += 1\n\n self.indexHash_ = random_state.randint(0, high=self.n_components,\n size=(self.degree, n_features))\n\n self.bitHash_ = random_state.choice(a=[-1, 1],\n size=(self.degree, n_features))\n return self" + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/PolynomialCountSketch/transform", + "name": "transform", + "qname": "sklearn.kernel_approximation.PolynomialCountSketch.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_approximation/PolynomialCountSketch/transform/self", + "name": "self", + "qname": "sklearn.kernel_approximation.PolynomialCountSketch.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/PolynomialCountSketch/transform/X", + "name": "X", + "qname": "sklearn.kernel_approximation.PolynomialCountSketch.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like}, shape (n_samples, n_features)", + "default_value": "", + "description": "New data, where n_samples in the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Generate the feature map approximation for X.", + "docstring": "Generate the feature map approximation for X.\n\nParameters\n----------\nX : {array-like}, shape (n_samples, n_features)\n New data, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)", + "code": " def transform(self, X):\n \"\"\"Generate the feature map approximation for X.\n\n Parameters\n ----------\n X : {array-like}, shape (n_samples, n_features)\n New data, where n_samples in the number of samples\n and n_features is the number of features.\n\n Returns\n -------\n X_new : array-like, shape (n_samples, n_components)\n \"\"\"\n\n check_is_fitted(self)\n X = self._validate_data(X, accept_sparse=\"csc\", reset=False)\n\n X_gamma = np.sqrt(self.gamma) * X\n\n if sp.issparse(X_gamma) and self.coef0 != 0:\n X_gamma = sp.hstack([X_gamma, np.sqrt(self.coef0) *\n np.ones((X_gamma.shape[0], 1))],\n format=\"csc\")\n\n elif not sp.issparse(X_gamma) and self.coef0 != 0:\n X_gamma = np.hstack([X_gamma, np.sqrt(self.coef0) *\n np.ones((X_gamma.shape[0], 1))])\n\n if X_gamma.shape[1] != self.indexHash_.shape[1]:\n raise ValueError(\"Number of features of test samples does not\"\n \" match that of training samples.\")\n\n count_sketches = np.zeros(\n (X_gamma.shape[0], self.degree, self.n_components))\n\n if sp.issparse(X_gamma):\n for j in range(X_gamma.shape[1]):\n for d in range(self.degree):\n iHashIndex = self.indexHash_[d, j]\n iHashBit = self.bitHash_[d, j]\n count_sketches[:, d, iHashIndex] += \\\n (iHashBit * X_gamma[:, j]).toarray().ravel()\n\n else:\n for j in range(X_gamma.shape[1]):\n for d in range(self.degree):\n iHashIndex = self.indexHash_[d, j]\n iHashBit = self.bitHash_[d, j]\n count_sketches[:, d, iHashIndex] += \\\n iHashBit * X_gamma[:, j]\n\n # For each same, compute a count sketch of phi(x) using the polynomial\n # multiplication (via FFT) of p count sketches of x.\n count_sketches_fft = fft(count_sketches, axis=2, overwrite_x=True)\n count_sketches_fft_prod = np.prod(count_sketches_fft, axis=1)\n data_sketch = np.real(ifft(count_sketches_fft_prod, overwrite_x=True))\n\n return data_sketch" + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/RBFSampler/__init__", + "name": "__init__", + "qname": "sklearn.kernel_approximation.RBFSampler.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_approximation/RBFSampler/__init__/self", + "name": "self", + "qname": "sklearn.kernel_approximation.RBFSampler.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/RBFSampler/__init__/gamma", + "name": "gamma", + "qname": "sklearn.kernel_approximation.RBFSampler.__init__.gamma", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Parameter of RBF kernel: exp(-gamma * x^2)" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/RBFSampler/__init__/n_components", + "name": "n_components", + "qname": "sklearn.kernel_approximation.RBFSampler.__init__.n_components", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Number of Monte Carlo samples per original feature.\nEquals the dimensionality of the computed feature space." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/RBFSampler/__init__/random_state", + "name": "random_state", + "qname": "sklearn.kernel_approximation.RBFSampler.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Pseudo-random number generator to control the generation of the random\nweights and random offset when fitting the training data.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Approximates feature map of an RBF kernel by Monte Carlo approximation\nof its Fourier transform.\n\nIt implements a variant of Random Kitchen Sinks.[1]\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, gamma=1., n_components=100, random_state=None):\n self.gamma = gamma\n self.n_components = n_components\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/RBFSampler/fit", + "name": "fit", + "qname": "sklearn.kernel_approximation.RBFSampler.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_approximation/RBFSampler/fit/self", + "name": "self", + "qname": "sklearn.kernel_approximation.RBFSampler.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/RBFSampler/fit/X", + "name": "X", + "qname": "sklearn.kernel_approximation.RBFSampler.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples in the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/RBFSampler/fit/y", + "name": "y", + "qname": "sklearn.kernel_approximation.RBFSampler.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fit the model with X.\n\nSamples random projection according to n_features.", + "docstring": "Fit the model with X.\n\nSamples random projection according to n_features.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Training data, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nself : object\n Returns the transformer.", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the model with X.\n\n Samples random projection according to n_features.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n Training data, where n_samples in the number of samples\n and n_features is the number of features.\n\n Returns\n -------\n self : object\n Returns the transformer.\n \"\"\"\n\n X = self._validate_data(X, accept_sparse='csr')\n random_state = check_random_state(self.random_state)\n n_features = X.shape[1]\n\n self.random_weights_ = (np.sqrt(2 * self.gamma) * random_state.normal(\n size=(n_features, self.n_components)))\n\n self.random_offset_ = random_state.uniform(0, 2 * np.pi,\n size=self.n_components)\n return self" + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/RBFSampler/transform", + "name": "transform", + "qname": "sklearn.kernel_approximation.RBFSampler.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_approximation/RBFSampler/transform/self", + "name": "self", + "qname": "sklearn.kernel_approximation.RBFSampler.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/RBFSampler/transform/X", + "name": "X", + "qname": "sklearn.kernel_approximation.RBFSampler.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "New data, where n_samples in the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Apply the approximate feature map to X.", + "docstring": "Apply the approximate feature map to X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n New data, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)", + "code": " def transform(self, X):\n \"\"\"Apply the approximate feature map to X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n New data, where n_samples in the number of samples\n and n_features is the number of features.\n\n Returns\n -------\n X_new : array-like, shape (n_samples, n_components)\n \"\"\"\n check_is_fitted(self)\n\n X = self._validate_data(X, accept_sparse='csr', reset=False)\n projection = safe_sparse_dot(X, self.random_weights_)\n projection += self.random_offset_\n np.cos(projection, projection)\n projection *= np.sqrt(2.) / np.sqrt(self.n_components)\n return projection" + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/SkewedChi2Sampler/__init__", + "name": "__init__", + "qname": "sklearn.kernel_approximation.SkewedChi2Sampler.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_approximation/SkewedChi2Sampler/__init__/self", + "name": "self", + "qname": "sklearn.kernel_approximation.SkewedChi2Sampler.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/SkewedChi2Sampler/__init__/skewedness", + "name": "skewedness", + "qname": "sklearn.kernel_approximation.SkewedChi2Sampler.__init__.skewedness", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "\"skewedness\" parameter of the kernel. Needs to be cross-validated." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/SkewedChi2Sampler/__init__/n_components", + "name": "n_components", + "qname": "sklearn.kernel_approximation.SkewedChi2Sampler.__init__.n_components", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "100", + "description": "number of Monte Carlo samples per original feature.\nEquals the dimensionality of the computed feature space." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/SkewedChi2Sampler/__init__/random_state", + "name": "random_state", + "qname": "sklearn.kernel_approximation.SkewedChi2Sampler.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Pseudo-random number generator to control the generation of the random\nweights and random offset when fitting the training data.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Approximates feature map of the \"skewed chi-squared\" kernel by Monte\nCarlo approximation of its Fourier transform.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, skewedness=1., n_components=100, random_state=None):\n self.skewedness = skewedness\n self.n_components = n_components\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/SkewedChi2Sampler/fit", + "name": "fit", + "qname": "sklearn.kernel_approximation.SkewedChi2Sampler.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_approximation/SkewedChi2Sampler/fit/self", + "name": "self", + "qname": "sklearn.kernel_approximation.SkewedChi2Sampler.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/SkewedChi2Sampler/fit/X", + "name": "X", + "qname": "sklearn.kernel_approximation.SkewedChi2Sampler.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like, shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples in the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/SkewedChi2Sampler/fit/y", + "name": "y", + "qname": "sklearn.kernel_approximation.SkewedChi2Sampler.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fit the model with X.\n\nSamples random projection according to n_features.", + "docstring": "Fit the model with X.\n\nSamples random projection according to n_features.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Training data, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nself : object\n Returns the transformer.", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the model with X.\n\n Samples random projection according to n_features.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n Training data, where n_samples in the number of samples\n and n_features is the number of features.\n\n Returns\n -------\n self : object\n Returns the transformer.\n \"\"\"\n\n X = self._validate_data(X)\n random_state = check_random_state(self.random_state)\n n_features = X.shape[1]\n uniform = random_state.uniform(size=(n_features, self.n_components))\n # transform by inverse CDF of sech\n self.random_weights_ = (1. / np.pi\n * np.log(np.tan(np.pi / 2. * uniform)))\n self.random_offset_ = random_state.uniform(0, 2 * np.pi,\n size=self.n_components)\n return self" + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/SkewedChi2Sampler/transform", + "name": "transform", + "qname": "sklearn.kernel_approximation.SkewedChi2Sampler.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_approximation/SkewedChi2Sampler/transform/self", + "name": "self", + "qname": "sklearn.kernel_approximation.SkewedChi2Sampler.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.kernel_approximation/SkewedChi2Sampler/transform/X", + "name": "X", + "qname": "sklearn.kernel_approximation.SkewedChi2Sampler.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like, shape (n_samples, n_features)", + "default_value": "", + "description": "New data, where n_samples in the number of samples\nand n_features is the number of features. All values of X must be\nstrictly greater than \"-skewedness\"." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Apply the approximate feature map to X.", + "docstring": "Apply the approximate feature map to X.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n New data, where n_samples in the number of samples\n and n_features is the number of features. All values of X must be\n strictly greater than \"-skewedness\".\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)", + "code": " def transform(self, X):\n \"\"\"Apply the approximate feature map to X.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n New data, where n_samples in the number of samples\n and n_features is the number of features. All values of X must be\n strictly greater than \"-skewedness\".\n\n Returns\n -------\n X_new : array-like, shape (n_samples, n_components)\n \"\"\"\n check_is_fitted(self)\n\n X = as_float_array(X, copy=True)\n X = self._validate_data(X, copy=False, reset=False)\n if (X <= -self.skewedness).any():\n raise ValueError(\"X may not contain entries smaller than\"\n \" -skewedness.\")\n\n X += self.skewedness\n np.log(X, X)\n projection = safe_sparse_dot(X, self.random_weights_)\n projection += self.random_offset_\n np.cos(projection, projection)\n projection *= np.sqrt(2.) / np.sqrt(self.n_components)\n return projection" + }, + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge/__init__", + "name": "__init__", + "qname": "sklearn.kernel_ridge.KernelRidge.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge/__init__/self", + "name": "self", + "qname": "sklearn.kernel_ridge.KernelRidge.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge/__init__/alpha", + "name": "alpha", + "qname": "sklearn.kernel_ridge.KernelRidge.__init__.alpha", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float or array-like of shape (n_targets,)", + "default_value": "1.0", + "description": "Regularization strength; must be a positive float. Regularization\nimproves the conditioning of the problem and reduces the variance of\nthe estimates. Larger values specify stronger regularization.\nAlpha corresponds to ``1 / (2C)`` in other linear models such as\n:class:`~sklearn.linear_model.LogisticRegression` or\n:class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\nassumed to be specific to the targets. Hence they must correspond in\nnumber. See :ref:`ridge_regression` for formula." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_targets,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge/__init__/kernel", + "name": "kernel", + "qname": "sklearn.kernel_ridge.KernelRidge.__init__.kernel", + "default_value": "'linear'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "string or callable", + "default_value": "\"linear\"", + "description": "Kernel mapping used internally. This parameter is directly passed to\n:class:`~sklearn.metrics.pairwise.pairwise_kernel`.\nIf `kernel` is a string, it must be one of the metrics\nin `pairwise.PAIRWISE_KERNEL_FUNCTIONS`.\nIf `kernel` is \"precomputed\", X is assumed to be a kernel matrix.\nAlternatively, if `kernel` is a callable function, it is called on\neach pair of instances (rows) and the resulting value recorded. The\ncallable should take two rows from X as input and return the\ncorresponding kernel value as a single number. This means that\ncallables from :mod:`sklearn.metrics.pairwise` are not allowed, as\nthey operate on matrices, not single samples. Use the string\nidentifying the kernel instead." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "string" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge/__init__/gamma", + "name": "gamma", + "qname": "sklearn.kernel_ridge.KernelRidge.__init__.gamma", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Gamma parameter for the RBF, laplacian, polynomial, exponential chi2\nand sigmoid kernels. Interpretation of the default value is left to\nthe kernel; see the documentation for sklearn.metrics.pairwise.\nIgnored by other kernels." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge/__init__/degree", + "name": "degree", + "qname": "sklearn.kernel_ridge.KernelRidge.__init__.degree", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "3", + "description": "Degree of the polynomial kernel. Ignored by other kernels." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge/__init__/coef0", + "name": "coef0", + "qname": "sklearn.kernel_ridge.KernelRidge.__init__.coef0", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "1", + "description": "Zero coefficient for polynomial and sigmoid kernels.\nIgnored by other kernels." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge/__init__/kernel_params", + "name": "kernel_params", + "qname": "sklearn.kernel_ridge.KernelRidge.__init__.kernel_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "mapping of string to any", + "default_value": "None", + "description": "Additional parameters (keyword arguments) for kernel function passed\nas callable object." + }, + "type": { + "kind": "NamedType", + "name": "mapping of string to any" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Kernel ridge regression.\n\nKernel ridge regression (KRR) combines ridge regression (linear least\nsquares with l2-norm regularization) with the kernel trick. It thus\nlearns a linear function in the space induced by the respective kernel and\nthe data. For non-linear kernels, this corresponds to a non-linear\nfunction in the original space.\n\nThe form of the model learned by KRR is identical to support vector\nregression (SVR). However, different loss functions are used: KRR uses\nsquared error loss while support vector regression uses epsilon-insensitive\nloss, both combined with l2 regularization. In contrast to SVR, fitting a\nKRR model can be done in closed-form and is typically faster for\nmedium-sized datasets. On the other hand, the learned model is non-sparse\nand thus slower than SVR, which learns a sparse model for epsilon > 0, at\nprediction-time.\n\nThis estimator has built-in support for multi-variate regression\n(i.e., when y is a 2d-array of shape [n_samples, n_targets]).\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, alpha=1, *, kernel=\"linear\", gamma=None, degree=3,\n coef0=1, kernel_params=None):\n self.alpha = alpha\n self.kernel = kernel\n self.gamma = gamma\n self.degree = degree\n self.coef0 = coef0\n self.kernel_params = kernel_params" + }, + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge/_get_kernel", + "name": "_get_kernel", + "qname": "sklearn.kernel_ridge.KernelRidge._get_kernel", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge/_get_kernel/self", + "name": "self", + "qname": "sklearn.kernel_ridge.KernelRidge._get_kernel.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge/_get_kernel/X", + "name": "X", + "qname": "sklearn.kernel_ridge.KernelRidge._get_kernel.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge/_get_kernel/Y", + "name": "Y", + "qname": "sklearn.kernel_ridge.KernelRidge._get_kernel.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_kernel(self, X, Y=None):\n if callable(self.kernel):\n params = self.kernel_params or {}\n else:\n params = {\"gamma\": self.gamma,\n \"degree\": self.degree,\n \"coef0\": self.coef0}\n return pairwise_kernels(X, Y, metric=self.kernel,\n filter_params=True, **params)" + }, + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge/_more_tags", + "name": "_more_tags", + "qname": "sklearn.kernel_ridge.KernelRidge._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge/_more_tags/self", + "name": "self", + "qname": "sklearn.kernel_ridge.KernelRidge._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'pairwise': self.kernel == 'precomputed'}" + }, + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge/_pairwise@getter", + "name": "_pairwise", + "qname": "sklearn.kernel_ridge.KernelRidge._pairwise", + "decorators": [ + "deprecated('Attribute _pairwise was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge/_pairwise/self", + "name": "self", + "qname": "sklearn.kernel_ridge.KernelRidge._pairwise.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n return self.kernel == \"precomputed\"" + }, + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge/fit", + "name": "fit", + "qname": "sklearn.kernel_ridge.KernelRidge.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge/fit/self", + "name": "self", + "qname": "sklearn.kernel_ridge.KernelRidge.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge/fit/X", + "name": "X", + "qname": "sklearn.kernel_ridge.KernelRidge.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data. If kernel == \"precomputed\" this is instead\na precomputed kernel matrix, of shape (n_samples, n_samples)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge/fit/y", + "name": "y", + "qname": "sklearn.kernel_ridge.KernelRidge.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "", + "description": "Target values" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_targets)" + } + }, + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.kernel_ridge.KernelRidge.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float or array-like of shape (n_samples,)", + "default_value": "None", + "description": "Individual weights for each sample, ignored if None is passed." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fit Kernel Ridge regression model", + "docstring": "Fit Kernel Ridge regression model\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data. If kernel == \"precomputed\" this is instead\n a precomputed kernel matrix, of shape (n_samples, n_samples).\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values\n\nsample_weight : float or array-like of shape (n_samples,), default=None\n Individual weights for each sample, ignored if None is passed.\n\nReturns\n-------\nself : returns an instance of self.", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit Kernel Ridge regression model\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data. If kernel == \"precomputed\" this is instead\n a precomputed kernel matrix, of shape (n_samples, n_samples).\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values\n\n sample_weight : float or array-like of shape (n_samples,), default=None\n Individual weights for each sample, ignored if None is passed.\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n # Convert data\n X, y = self._validate_data(X, y, accept_sparse=(\"csr\", \"csc\"),\n multi_output=True, y_numeric=True)\n if sample_weight is not None and not isinstance(sample_weight, float):\n sample_weight = _check_sample_weight(sample_weight, X)\n\n K = self._get_kernel(X)\n alpha = np.atleast_1d(self.alpha)\n\n ravel = False\n if len(y.shape) == 1:\n y = y.reshape(-1, 1)\n ravel = True\n\n copy = self.kernel == \"precomputed\"\n self.dual_coef_ = _solve_cholesky_kernel(K, y, alpha,\n sample_weight,\n copy)\n if ravel:\n self.dual_coef_ = self.dual_coef_.ravel()\n\n self.X_fit_ = X\n\n return self" + }, + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge/predict", + "name": "predict", + "qname": "sklearn.kernel_ridge.KernelRidge.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge/predict/self", + "name": "self", + "qname": "sklearn.kernel_ridge.KernelRidge.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.kernel_ridge/KernelRidge/predict/X", + "name": "X", + "qname": "sklearn.kernel_ridge.KernelRidge.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Samples. If kernel == \"precomputed\" this is instead a\nprecomputed kernel matrix, shape = [n_samples,\nn_samples_fitted], where n_samples_fitted is the number of\nsamples used in the fitting for this estimator." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Predict using the kernel ridge model", + "docstring": "Predict using the kernel ridge model\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples. If kernel == \"precomputed\" this is instead a\n precomputed kernel matrix, shape = [n_samples,\n n_samples_fitted], where n_samples_fitted is the number of\n samples used in the fitting for this estimator.\n\nReturns\n-------\nC : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Returns predicted values.", + "code": " def predict(self, X):\n \"\"\"Predict using the kernel ridge model\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples. If kernel == \"precomputed\" this is instead a\n precomputed kernel matrix, shape = [n_samples,\n n_samples_fitted], where n_samples_fitted is the number of\n samples used in the fitting for this estimator.\n\n Returns\n -------\n C : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Returns predicted values.\n \"\"\"\n check_is_fitted(self)\n X = self._validate_data(X, accept_sparse=(\"csr\", \"csc\"), reset=False)\n K = self._get_kernel(X, self.X_fit_)\n return np.dot(K, self.dual_coef_)" + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearClassifierMixin/_predict_proba_lr", + "name": "_predict_proba_lr", + "qname": "sklearn.linear_model._base.LinearClassifierMixin._predict_proba_lr", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._base/LinearClassifierMixin/_predict_proba_lr/self", + "name": "self", + "qname": "sklearn.linear_model._base.LinearClassifierMixin._predict_proba_lr.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearClassifierMixin/_predict_proba_lr/X", + "name": "X", + "qname": "sklearn.linear_model._base.LinearClassifierMixin._predict_proba_lr.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Probability estimation for OvR logistic regression.\n\nPositive class probabilities are computed as\n1. / (1. + np.exp(-self.decision_function(X)));\nmulticlass is handled by normalizing that over all classes.", + "docstring": "Probability estimation for OvR logistic regression.\n\nPositive class probabilities are computed as\n1. / (1. + np.exp(-self.decision_function(X)));\nmulticlass is handled by normalizing that over all classes.", + "code": " def _predict_proba_lr(self, X):\n \"\"\"Probability estimation for OvR logistic regression.\n\n Positive class probabilities are computed as\n 1. / (1. + np.exp(-self.decision_function(X)));\n multiclass is handled by normalizing that over all classes.\n \"\"\"\n prob = self.decision_function(X)\n expit(prob, out=prob)\n if prob.ndim == 1:\n return np.vstack([1 - prob, prob]).T\n else:\n # OvR normalization, like LibLinear's predict_probability\n prob /= prob.sum(axis=1).reshape((prob.shape[0], -1))\n return prob" + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearClassifierMixin/decision_function", + "name": "decision_function", + "qname": "sklearn.linear_model._base.LinearClassifierMixin.decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._base/LinearClassifierMixin/decision_function/self", + "name": "self", + "qname": "sklearn.linear_model._base.LinearClassifierMixin.decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearClassifierMixin/decision_function/X", + "name": "X", + "qname": "sklearn.linear_model._base.LinearClassifierMixin.decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like or sparse matrix, shape (n_samples, n_features)", + "default_value": "", + "description": "Samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "sparse matrix" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict confidence scores for samples.\n\nThe confidence score for a sample is proportional to the signed\ndistance of that sample to the hyperplane.", + "docstring": "Predict confidence scores for samples.\n\nThe confidence score for a sample is proportional to the signed\ndistance of that sample to the hyperplane.\n\nParameters\n----------\nX : array-like or sparse matrix, shape (n_samples, n_features)\n Samples.\n\nReturns\n-------\narray, shape=(n_samples,) if n_classes == 2 else (n_samples, n_classes)\n Confidence scores per (sample, class) combination. In the binary\n case, confidence score for self.classes_[1] where >0 means this\n class would be predicted.", + "code": " def decision_function(self, X):\n \"\"\"\n Predict confidence scores for samples.\n\n The confidence score for a sample is proportional to the signed\n distance of that sample to the hyperplane.\n\n Parameters\n ----------\n X : array-like or sparse matrix, shape (n_samples, n_features)\n Samples.\n\n Returns\n -------\n array, shape=(n_samples,) if n_classes == 2 else (n_samples, n_classes)\n Confidence scores per (sample, class) combination. In the binary\n case, confidence score for self.classes_[1] where >0 means this\n class would be predicted.\n \"\"\"\n check_is_fitted(self)\n\n X = check_array(X, accept_sparse='csr')\n\n n_features = self.coef_.shape[1]\n if X.shape[1] != n_features:\n raise ValueError(\"X has %d features per sample; expecting %d\"\n % (X.shape[1], n_features))\n\n scores = safe_sparse_dot(X, self.coef_.T,\n dense_output=True) + self.intercept_\n return scores.ravel() if scores.shape[1] == 1 else scores" + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearClassifierMixin/predict", + "name": "predict", + "qname": "sklearn.linear_model._base.LinearClassifierMixin.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._base/LinearClassifierMixin/predict/self", + "name": "self", + "qname": "sklearn.linear_model._base.LinearClassifierMixin.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearClassifierMixin/predict/X", + "name": "X", + "qname": "sklearn.linear_model._base.LinearClassifierMixin.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like or sparse matrix, shape (n_samples, n_features)", + "default_value": "", + "description": "Samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "sparse matrix" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class labels for samples in X.", + "docstring": "Predict class labels for samples in X.\n\nParameters\n----------\nX : array-like or sparse matrix, shape (n_samples, n_features)\n Samples.\n\nReturns\n-------\nC : array, shape [n_samples]\n Predicted class label per sample.", + "code": " def predict(self, X):\n \"\"\"\n Predict class labels for samples in X.\n\n Parameters\n ----------\n X : array-like or sparse matrix, shape (n_samples, n_features)\n Samples.\n\n Returns\n -------\n C : array, shape [n_samples]\n Predicted class label per sample.\n \"\"\"\n scores = self.decision_function(X)\n if len(scores.shape) == 1:\n indices = (scores > 0).astype(int)\n else:\n indices = scores.argmax(axis=1)\n return self.classes_[indices]" + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearModel/_decision_function", + "name": "_decision_function", + "qname": "sklearn.linear_model._base.LinearModel._decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._base/LinearModel/_decision_function/self", + "name": "self", + "qname": "sklearn.linear_model._base.LinearModel._decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearModel/_decision_function/X", + "name": "X", + "qname": "sklearn.linear_model._base.LinearModel._decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _decision_function(self, X):\n check_is_fitted(self)\n\n X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])\n return safe_sparse_dot(X, self.coef_.T,\n dense_output=True) + self.intercept_" + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearModel/_more_tags", + "name": "_more_tags", + "qname": "sklearn.linear_model._base.LinearModel._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._base/LinearModel/_more_tags/self", + "name": "self", + "qname": "sklearn.linear_model._base.LinearModel._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'requires_y': True}" + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearModel/_set_intercept", + "name": "_set_intercept", + "qname": "sklearn.linear_model._base.LinearModel._set_intercept", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._base/LinearModel/_set_intercept/self", + "name": "self", + "qname": "sklearn.linear_model._base.LinearModel._set_intercept.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearModel/_set_intercept/X_offset", + "name": "X_offset", + "qname": "sklearn.linear_model._base.LinearModel._set_intercept.X_offset", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearModel/_set_intercept/y_offset", + "name": "y_offset", + "qname": "sklearn.linear_model._base.LinearModel._set_intercept.y_offset", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearModel/_set_intercept/X_scale", + "name": "X_scale", + "qname": "sklearn.linear_model._base.LinearModel._set_intercept.X_scale", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Set the intercept_", + "docstring": "Set the intercept_\n ", + "code": " def _set_intercept(self, X_offset, y_offset, X_scale):\n \"\"\"Set the intercept_\n \"\"\"\n if self.fit_intercept:\n self.coef_ = self.coef_ / X_scale\n self.intercept_ = y_offset - np.dot(X_offset, self.coef_.T)\n else:\n self.intercept_ = 0." + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearModel/fit", + "name": "fit", + "qname": "sklearn.linear_model._base.LinearModel.fit", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._base/LinearModel/fit/self", + "name": "self", + "qname": "sklearn.linear_model._base.LinearModel.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearModel/fit/X", + "name": "X", + "qname": "sklearn.linear_model._base.LinearModel.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearModel/fit/y", + "name": "y", + "qname": "sklearn.linear_model._base.LinearModel.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit model.", + "docstring": "Fit model.", + "code": " @abstractmethod\n def fit(self, X, y):\n \"\"\"Fit model.\"\"\"" + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearModel/predict", + "name": "predict", + "qname": "sklearn.linear_model._base.LinearModel.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._base/LinearModel/predict/self", + "name": "self", + "qname": "sklearn.linear_model._base.LinearModel.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearModel/predict/X", + "name": "X", + "qname": "sklearn.linear_model._base.LinearModel.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like or sparse matrix, shape (n_samples, n_features)", + "default_value": "", + "description": "Samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "sparse matrix" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict using the linear model.", + "docstring": "Predict using the linear model.\n\nParameters\n----------\nX : array-like or sparse matrix, shape (n_samples, n_features)\n Samples.\n\nReturns\n-------\nC : array, shape (n_samples,)\n Returns predicted values.", + "code": " def predict(self, X):\n \"\"\"\n Predict using the linear model.\n\n Parameters\n ----------\n X : array-like or sparse matrix, shape (n_samples, n_features)\n Samples.\n\n Returns\n -------\n C : array, shape (n_samples,)\n Returns predicted values.\n \"\"\"\n return self._decision_function(X)" + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearRegression/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._base.LinearRegression.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._base/LinearRegression/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._base.LinearRegression.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearRegression/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._base.LinearRegression.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to calculate the intercept for this model. If set\nto False, no intercept will be used in calculations\n(i.e. data is expected to be centered)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearRegression/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._base.LinearRegression.__init__.normalize", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearRegression/__init__/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._base.LinearRegression.__init__.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, X will be copied; else, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearRegression/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.linear_model._base.LinearRegression.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to use for the computation. This will only provide\nspeedup for n_targets > 1 and sufficient large problems.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearRegression/__init__/positive", + "name": "positive", + "qname": "sklearn.linear_model._base.LinearRegression.__init__.positive", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, forces the coefficients to be positive. This\noption is only supported for dense arrays.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Ordinary least squares Linear Regression.\n\nLinearRegression fits a linear model with coefficients w = (w1, ..., wp)\nto minimize the residual sum of squares between the observed targets in\nthe dataset, and the targets predicted by the linear approximation.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, fit_intercept=True, normalize=False, copy_X=True,\n n_jobs=None, positive=False):\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.copy_X = copy_X\n self.n_jobs = n_jobs\n self.positive = positive" + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearRegression/fit", + "name": "fit", + "qname": "sklearn.linear_model._base.LinearRegression.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._base/LinearRegression/fit/self", + "name": "self", + "qname": "sklearn.linear_model._base.LinearRegression.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearRegression/fit/X", + "name": "X", + "qname": "sklearn.linear_model._base.LinearRegression.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearRegression/fit/y", + "name": "y", + "qname": "sklearn.linear_model._base.LinearRegression.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "", + "description": "Target values. Will be cast to X's dtype if necessary" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_targets)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._base/LinearRegression/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._base.LinearRegression.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Individual weights for each sample\n\n.. versionadded:: 0.17\n parameter *sample_weight* support to LinearRegression." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit linear model.", + "docstring": "Fit linear model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values. Will be cast to X's dtype if necessary\n\nsample_weight : array-like of shape (n_samples,), default=None\n Individual weights for each sample\n\n .. versionadded:: 0.17\n parameter *sample_weight* support to LinearRegression.\n\nReturns\n-------\nself : returns an instance of self.", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"\n Fit linear model.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values. Will be cast to X's dtype if necessary\n\n sample_weight : array-like of shape (n_samples,), default=None\n Individual weights for each sample\n\n .. versionadded:: 0.17\n parameter *sample_weight* support to LinearRegression.\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n\n n_jobs_ = self.n_jobs\n\n accept_sparse = False if self.positive else ['csr', 'csc', 'coo']\n\n X, y = self._validate_data(X, y, accept_sparse=accept_sparse,\n y_numeric=True, multi_output=True)\n\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X,\n dtype=X.dtype)\n\n X, y, X_offset, y_offset, X_scale = self._preprocess_data(\n X, y, fit_intercept=self.fit_intercept, normalize=self.normalize,\n copy=self.copy_X, sample_weight=sample_weight,\n return_mean=True)\n\n if sample_weight is not None:\n # Sample weight can be implemented via a simple rescaling.\n X, y = _rescale_data(X, y, sample_weight)\n\n if self.positive:\n if y.ndim < 2:\n self.coef_, self._residues = optimize.nnls(X, y)\n else:\n # scipy.optimize.nnls cannot handle y with shape (M, K)\n outs = Parallel(n_jobs=n_jobs_)(\n delayed(optimize.nnls)(X, y[:, j])\n for j in range(y.shape[1]))\n self.coef_, self._residues = map(np.vstack, zip(*outs))\n elif sp.issparse(X):\n X_offset_scale = X_offset / X_scale\n\n def matvec(b):\n return X.dot(b) - b.dot(X_offset_scale)\n\n def rmatvec(b):\n return X.T.dot(b) - X_offset_scale * np.sum(b)\n\n X_centered = sparse.linalg.LinearOperator(shape=X.shape,\n matvec=matvec,\n rmatvec=rmatvec)\n\n if y.ndim < 2:\n out = sparse_lsqr(X_centered, y)\n self.coef_ = out[0]\n self._residues = out[3]\n else:\n # sparse_lstsq cannot handle y with shape (M, K)\n outs = Parallel(n_jobs=n_jobs_)(\n delayed(sparse_lsqr)(X_centered, y[:, j].ravel())\n for j in range(y.shape[1]))\n self.coef_ = np.vstack([out[0] for out in outs])\n self._residues = np.vstack([out[3] for out in outs])\n else:\n self.coef_, self._residues, self.rank_, self.singular_ = \\\n linalg.lstsq(X, y)\n self.coef_ = self.coef_.T\n\n if y.ndim == 1:\n self.coef_ = np.ravel(self.coef_)\n self._set_intercept(X_offset, y_offset, X_scale)\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._base/SparseCoefMixin/densify", + "name": "densify", + "qname": "sklearn.linear_model._base.SparseCoefMixin.densify", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._base/SparseCoefMixin/densify/self", + "name": "self", + "qname": "sklearn.linear_model._base.SparseCoefMixin.densify.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Convert coefficient matrix to dense array format.\n\nConverts the ``coef_`` member (back) to a numpy.ndarray. This is the\ndefault format of ``coef_`` and is required for fitting, so calling\nthis method is only required on models that have previously been\nsparsified; otherwise, it is a no-op.", + "docstring": "Convert coefficient matrix to dense array format.\n\nConverts the ``coef_`` member (back) to a numpy.ndarray. This is the\ndefault format of ``coef_`` and is required for fitting, so calling\nthis method is only required on models that have previously been\nsparsified; otherwise, it is a no-op.\n\nReturns\n-------\nself\n Fitted estimator.", + "code": " def densify(self):\n \"\"\"\n Convert coefficient matrix to dense array format.\n\n Converts the ``coef_`` member (back) to a numpy.ndarray. This is the\n default format of ``coef_`` and is required for fitting, so calling\n this method is only required on models that have previously been\n sparsified; otherwise, it is a no-op.\n\n Returns\n -------\n self\n Fitted estimator.\n \"\"\"\n msg = \"Estimator, %(name)s, must be fitted before densifying.\"\n check_is_fitted(self, msg=msg)\n if sp.issparse(self.coef_):\n self.coef_ = self.coef_.toarray()\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._base/SparseCoefMixin/sparsify", + "name": "sparsify", + "qname": "sklearn.linear_model._base.SparseCoefMixin.sparsify", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._base/SparseCoefMixin/sparsify/self", + "name": "self", + "qname": "sklearn.linear_model._base.SparseCoefMixin.sparsify.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Convert coefficient matrix to sparse format.\n\nConverts the ``coef_`` member to a scipy.sparse matrix, which for\nL1-regularized models can be much more memory- and storage-efficient\nthan the usual numpy.ndarray representation.\n\nThe ``intercept_`` member is not converted.", + "docstring": "Convert coefficient matrix to sparse format.\n\nConverts the ``coef_`` member to a scipy.sparse matrix, which for\nL1-regularized models can be much more memory- and storage-efficient\nthan the usual numpy.ndarray representation.\n\nThe ``intercept_`` member is not converted.\n\nReturns\n-------\nself\n Fitted estimator.\n\nNotes\n-----\nFor non-sparse models, i.e. when there are not many zeros in ``coef_``,\nthis may actually *increase* memory usage, so use this method with\ncare. A rule of thumb is that the number of zero elements, which can\nbe computed with ``(coef_ == 0).sum()``, must be more than 50% for this\nto provide significant benefits.\n\nAfter calling this method, further fitting with the partial_fit\nmethod (if any) will not work until you call densify.", + "code": " def sparsify(self):\n \"\"\"\n Convert coefficient matrix to sparse format.\n\n Converts the ``coef_`` member to a scipy.sparse matrix, which for\n L1-regularized models can be much more memory- and storage-efficient\n than the usual numpy.ndarray representation.\n\n The ``intercept_`` member is not converted.\n\n Returns\n -------\n self\n Fitted estimator.\n\n Notes\n -----\n For non-sparse models, i.e. when there are not many zeros in ``coef_``,\n this may actually *increase* memory usage, so use this method with\n care. A rule of thumb is that the number of zero elements, which can\n be computed with ``(coef_ == 0).sum()``, must be more than 50% for this\n to provide significant benefits.\n\n After calling this method, further fitting with the partial_fit\n method (if any) will not work until you call densify.\n \"\"\"\n msg = \"Estimator, %(name)s, must be fitted before sparsifying.\"\n check_is_fitted(self, msg=msg)\n self.coef_ = sp.csr_matrix(self.coef_)\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._base/_pre_fit", + "name": "_pre_fit", + "qname": "sklearn.linear_model._base._pre_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._base/_pre_fit/X", + "name": "X", + "qname": "sklearn.linear_model._base._pre_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/_pre_fit/y", + "name": "y", + "qname": "sklearn.linear_model._base._pre_fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/_pre_fit/Xy", + "name": "Xy", + "qname": "sklearn.linear_model._base._pre_fit.Xy", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/_pre_fit/precompute", + "name": "precompute", + "qname": "sklearn.linear_model._base._pre_fit.precompute", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/_pre_fit/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._base._pre_fit.normalize", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/_pre_fit/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._base._pre_fit.fit_intercept", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/_pre_fit/copy", + "name": "copy", + "qname": "sklearn.linear_model._base._pre_fit.copy", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/_pre_fit/check_input", + "name": "check_input", + "qname": "sklearn.linear_model._base._pre_fit.check_input", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/_pre_fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._base._pre_fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Aux function used at beginning of fit in linear models", + "docstring": "Aux function used at beginning of fit in linear models\n\nParameters\n----------\norder : 'F', 'C' or None, default=None\n Whether X and y will be forced to be fortran or c-style. Only relevant\n if sample_weight is not None.", + "code": "def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy,\n check_input=True, sample_weight=None):\n \"\"\"Aux function used at beginning of fit in linear models\n\n Parameters\n ----------\n order : 'F', 'C' or None, default=None\n Whether X and y will be forced to be fortran or c-style. Only relevant\n if sample_weight is not None.\n \"\"\"\n n_samples, n_features = X.shape\n\n if sparse.isspmatrix(X):\n # copy is not needed here as X is not modified inplace when X is sparse\n precompute = False\n X, y, X_offset, y_offset, X_scale = _preprocess_data(\n X, y, fit_intercept=fit_intercept, normalize=normalize,\n copy=False, return_mean=True, check_input=check_input)\n else:\n # copy was done in fit if necessary\n X, y, X_offset, y_offset, X_scale = _preprocess_data(\n X, y, fit_intercept=fit_intercept, normalize=normalize, copy=copy,\n check_input=check_input, sample_weight=sample_weight)\n if sample_weight is not None:\n X, y = _rescale_data(X, y, sample_weight=sample_weight)\n if hasattr(precompute, '__array__') and (\n fit_intercept and not np.allclose(X_offset, np.zeros(n_features)) or\n normalize and not np.allclose(X_scale, np.ones(n_features))):\n warnings.warn(\"Gram matrix was provided but X was centered\"\n \" to fit intercept, \"\n \"or X was normalized : recomputing Gram matrix.\",\n UserWarning)\n # recompute Gram\n precompute = 'auto'\n Xy = None\n\n # precompute if n_samples > n_features\n if isinstance(precompute, str) and precompute == 'auto':\n precompute = (n_samples > n_features)\n\n if precompute is True:\n # make sure that the 'precompute' array is contiguous.\n precompute = np.empty(shape=(n_features, n_features), dtype=X.dtype,\n order='C')\n np.dot(X.T, X, out=precompute)\n\n if not hasattr(precompute, '__array__'):\n Xy = None # cannot use Xy if precompute is not Gram\n\n if hasattr(precompute, '__array__') and Xy is None:\n common_dtype = np.find_common_type([X.dtype, y.dtype], [])\n if y.ndim == 1:\n # Xy is 1d, make sure it is contiguous.\n Xy = np.empty(shape=n_features, dtype=common_dtype, order='C')\n np.dot(X.T, y, out=Xy)\n else:\n # Make sure that Xy is always F contiguous even if X or y are not\n # contiguous: the goal is to make it fast to extract the data for a\n # specific target.\n n_targets = y.shape[1]\n Xy = np.empty(shape=(n_features, n_targets), dtype=common_dtype,\n order='F')\n np.dot(y.T, X, out=Xy.T)\n\n return X, y, X_offset, y_offset, X_scale, precompute, Xy" + }, + { + "id": "scikit-learn/sklearn.linear_model._base/_preprocess_data", + "name": "_preprocess_data", + "qname": "sklearn.linear_model._base._preprocess_data", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._base/_preprocess_data/X", + "name": "X", + "qname": "sklearn.linear_model._base._preprocess_data.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/_preprocess_data/y", + "name": "y", + "qname": "sklearn.linear_model._base._preprocess_data.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/_preprocess_data/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._base._preprocess_data.fit_intercept", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/_preprocess_data/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._base._preprocess_data.normalize", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/_preprocess_data/copy", + "name": "copy", + "qname": "sklearn.linear_model._base._preprocess_data.copy", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/_preprocess_data/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._base._preprocess_data.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/_preprocess_data/return_mean", + "name": "return_mean", + "qname": "sklearn.linear_model._base._preprocess_data.return_mean", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/_preprocess_data/check_input", + "name": "check_input", + "qname": "sklearn.linear_model._base._preprocess_data.check_input", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Center and scale data.\n\nCenters data to have mean zero along axis 0. If fit_intercept=False or if\nthe X is a sparse matrix, no centering is done, but normalization can still\nbe applied. The function returns the statistics necessary to reconstruct\nthe input data, which are X_offset, y_offset, X_scale, such that the output\n\n X = (X - X_offset) / X_scale\n\nX_scale is the L2 norm of X - X_offset. If sample_weight is not None,\nthen the weighted mean of X and y is zero, and not the mean itself. If\nreturn_mean=True, the mean, eventually weighted, is returned, independently\nof whether X was centered (option used for optimization with sparse data in\ncoordinate_descend).\n\nThis is here because nearly all linear models will want their data to be\ncentered. This function also systematically makes y consistent with X.dtype", + "docstring": "Center and scale data.\n\nCenters data to have mean zero along axis 0. If fit_intercept=False or if\nthe X is a sparse matrix, no centering is done, but normalization can still\nbe applied. The function returns the statistics necessary to reconstruct\nthe input data, which are X_offset, y_offset, X_scale, such that the output\n\n X = (X - X_offset) / X_scale\n\nX_scale is the L2 norm of X - X_offset. If sample_weight is not None,\nthen the weighted mean of X and y is zero, and not the mean itself. If\nreturn_mean=True, the mean, eventually weighted, is returned, independently\nof whether X was centered (option used for optimization with sparse data in\ncoordinate_descend).\n\nThis is here because nearly all linear models will want their data to be\ncentered. This function also systematically makes y consistent with X.dtype", + "code": "def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,\n sample_weight=None, return_mean=False, check_input=True):\n \"\"\"Center and scale data.\n\n Centers data to have mean zero along axis 0. If fit_intercept=False or if\n the X is a sparse matrix, no centering is done, but normalization can still\n be applied. The function returns the statistics necessary to reconstruct\n the input data, which are X_offset, y_offset, X_scale, such that the output\n\n X = (X - X_offset) / X_scale\n\n X_scale is the L2 norm of X - X_offset. If sample_weight is not None,\n then the weighted mean of X and y is zero, and not the mean itself. If\n return_mean=True, the mean, eventually weighted, is returned, independently\n of whether X was centered (option used for optimization with sparse data in\n coordinate_descend).\n\n This is here because nearly all linear models will want their data to be\n centered. This function also systematically makes y consistent with X.dtype\n \"\"\"\n if isinstance(sample_weight, numbers.Number):\n sample_weight = None\n if sample_weight is not None:\n sample_weight = np.asarray(sample_weight)\n\n if check_input:\n X = check_array(X, copy=copy, accept_sparse=['csr', 'csc'],\n dtype=FLOAT_DTYPES)\n elif copy:\n if sp.issparse(X):\n X = X.copy()\n else:\n X = X.copy(order='K')\n\n y = np.asarray(y, dtype=X.dtype)\n\n if fit_intercept:\n if sp.issparse(X):\n X_offset, X_var = mean_variance_axis(X, axis=0)\n if not return_mean:\n X_offset[:] = X.dtype.type(0)\n\n if normalize:\n\n # TODO: f_normalize could be used here as well but the function\n # inplace_csr_row_normalize_l2 must be changed such that it\n # can return also the norms computed internally\n\n # transform variance to norm in-place\n X_var *= X.shape[0]\n X_scale = np.sqrt(X_var, X_var)\n del X_var\n X_scale[X_scale == 0] = 1\n inplace_column_scale(X, 1. / X_scale)\n else:\n X_scale = np.ones(X.shape[1], dtype=X.dtype)\n\n else:\n X_offset = np.average(X, axis=0, weights=sample_weight)\n X -= X_offset\n if normalize:\n X, X_scale = f_normalize(X, axis=0, copy=False,\n return_norm=True)\n else:\n X_scale = np.ones(X.shape[1], dtype=X.dtype)\n y_offset = np.average(y, axis=0, weights=sample_weight)\n y = y - y_offset\n else:\n X_offset = np.zeros(X.shape[1], dtype=X.dtype)\n X_scale = np.ones(X.shape[1], dtype=X.dtype)\n if y.ndim == 1:\n y_offset = X.dtype.type(0)\n else:\n y_offset = np.zeros(y.shape[1], dtype=X.dtype)\n\n return X, y, X_offset, y_offset, X_scale" + }, + { + "id": "scikit-learn/sklearn.linear_model._base/_rescale_data", + "name": "_rescale_data", + "qname": "sklearn.linear_model._base._rescale_data", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._base/_rescale_data/X", + "name": "X", + "qname": "sklearn.linear_model._base._rescale_data.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/_rescale_data/y", + "name": "y", + "qname": "sklearn.linear_model._base._rescale_data.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._base/_rescale_data/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._base._rescale_data.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Rescale data sample-wise by square root of sample_weight.\n\nFor many linear models, this enables easy support for sample_weight.", + "docstring": "Rescale data sample-wise by square root of sample_weight.\n\nFor many linear models, this enables easy support for sample_weight.\n\nReturns\n-------\nX_rescaled : {array-like, sparse matrix}\n\ny_rescaled : {array-like, sparse matrix}", + "code": "def _rescale_data(X, y, sample_weight):\n \"\"\"Rescale data sample-wise by square root of sample_weight.\n\n For many linear models, this enables easy support for sample_weight.\n\n Returns\n -------\n X_rescaled : {array-like, sparse matrix}\n\n y_rescaled : {array-like, sparse matrix}\n \"\"\"\n n_samples = X.shape[0]\n sample_weight = np.asarray(sample_weight)\n if sample_weight.ndim == 0:\n sample_weight = np.full(n_samples, sample_weight,\n dtype=sample_weight.dtype)\n sample_weight = np.sqrt(sample_weight)\n sw_matrix = sparse.dia_matrix((sample_weight, 0),\n shape=(n_samples, n_samples))\n X = safe_sparse_dot(sw_matrix, X)\n y = safe_sparse_dot(sw_matrix, y)\n return X, y" + }, + { + "id": "scikit-learn/sklearn.linear_model._base/make_dataset", + "name": "make_dataset", + "qname": "sklearn.linear_model._base.make_dataset", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._base/make_dataset/X", + "name": "X", + "qname": "sklearn.linear_model._base.make_dataset.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples, n_features)", + "default_value": "", + "description": "Training data" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._base/make_dataset/y", + "name": "y", + "qname": "sklearn.linear_model._base.make_dataset.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples, )", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, )" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._base/make_dataset/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._base.make_dataset.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "numpy array of shape (n_samples,)", + "default_value": "", + "description": "The weight of each sample" + }, + "type": { + "kind": "NamedType", + "name": "numpy array of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._base/make_dataset/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._base.make_dataset.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None (default)", + "default_value": "", + "description": "Determines random number generation for dataset shuffling and noise.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None (default)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Create ``Dataset`` abstraction for sparse and dense inputs.\n\nThis also returns the ``intercept_decay`` which is different\nfor sparse datasets.", + "docstring": "Create ``Dataset`` abstraction for sparse and dense inputs.\n\nThis also returns the ``intercept_decay`` which is different\nfor sparse datasets.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Training data\n\ny : array-like, shape (n_samples, )\n Target values.\n\nsample_weight : numpy array of shape (n_samples,)\n The weight of each sample\n\nrandom_state : int, RandomState instance or None (default)\n Determines random number generation for dataset shuffling and noise.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\ndataset\n The ``Dataset`` abstraction\nintercept_decay\n The intercept decay", + "code": "def make_dataset(X, y, sample_weight, random_state=None):\n \"\"\"Create ``Dataset`` abstraction for sparse and dense inputs.\n\n This also returns the ``intercept_decay`` which is different\n for sparse datasets.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n Training data\n\n y : array-like, shape (n_samples, )\n Target values.\n\n sample_weight : numpy array of shape (n_samples,)\n The weight of each sample\n\n random_state : int, RandomState instance or None (default)\n Determines random number generation for dataset shuffling and noise.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n dataset\n The ``Dataset`` abstraction\n intercept_decay\n The intercept decay\n \"\"\"\n\n rng = check_random_state(random_state)\n # seed should never be 0 in SequentialDataset64\n seed = rng.randint(1, np.iinfo(np.int32).max)\n\n if X.dtype == np.float32:\n CSRData = CSRDataset32\n ArrayData = ArrayDataset32\n else:\n CSRData = CSRDataset64\n ArrayData = ArrayDataset64\n\n if sp.issparse(X):\n dataset = CSRData(X.data, X.indptr, X.indices, y, sample_weight,\n seed=seed)\n intercept_decay = SPARSE_INTERCEPT_DECAY\n else:\n X = np.ascontiguousarray(X)\n dataset = ArrayData(X, y, sample_weight, seed=seed)\n intercept_decay = 1.0\n\n return dataset, intercept_decay" + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._bayes.ARDRegression.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._bayes.ARDRegression.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/__init__/n_iter", + "name": "n_iter", + "qname": "sklearn.linear_model._bayes.ARDRegression.__init__.n_iter", + "default_value": "300", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "300", + "description": "Maximum number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._bayes.ARDRegression.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "Stop the algorithm if w has converged." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/__init__/alpha_1", + "name": "alpha_1", + "qname": "sklearn.linear_model._bayes.ARDRegression.__init__.alpha_1", + "default_value": "1e-06", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-6", + "description": "Hyper-parameter : shape parameter for the Gamma distribution prior\nover the alpha parameter." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/__init__/alpha_2", + "name": "alpha_2", + "qname": "sklearn.linear_model._bayes.ARDRegression.__init__.alpha_2", + "default_value": "1e-06", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-6", + "description": "Hyper-parameter : inverse scale parameter (rate parameter) for the\nGamma distribution prior over the alpha parameter." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/__init__/lambda_1", + "name": "lambda_1", + "qname": "sklearn.linear_model._bayes.ARDRegression.__init__.lambda_1", + "default_value": "1e-06", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-6", + "description": "Hyper-parameter : shape parameter for the Gamma distribution prior\nover the lambda parameter." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/__init__/lambda_2", + "name": "lambda_2", + "qname": "sklearn.linear_model._bayes.ARDRegression.__init__.lambda_2", + "default_value": "1e-06", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-6", + "description": "Hyper-parameter : inverse scale parameter (rate parameter) for the\nGamma distribution prior over the lambda parameter." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/__init__/compute_score", + "name": "compute_score", + "qname": "sklearn.linear_model._bayes.ARDRegression.__init__.compute_score", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, compute the objective function at each step of the model." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/__init__/threshold_lambda", + "name": "threshold_lambda", + "qname": "sklearn.linear_model._bayes.ARDRegression.__init__.threshold_lambda", + "default_value": "10000.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "10 000", + "description": "threshold for removing (pruning) weights with high precision from\nthe computation." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._bayes.ARDRegression.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "whether to calculate the intercept for this model. If set\nto false, no intercept will be used in calculations\n(i.e. data is expected to be centered)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._bayes.ARDRegression.__init__.normalize", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/__init__/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._bayes.ARDRegression.__init__.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, X will be copied; else, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._bayes.ARDRegression.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Verbose mode when fitting the model." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Bayesian ARD regression.\n\nFit the weights of a regression model, using an ARD prior. The weights of\nthe regression model are assumed to be in Gaussian distributions.\nAlso estimate the parameters lambda (precisions of the distributions of the\nweights) and alpha (precision of the distribution of the noise).\nThe estimation is done by an iterative procedures (Evidence Maximization)\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6,\n lambda_1=1.e-6, lambda_2=1.e-6, compute_score=False,\n threshold_lambda=1.e+4, fit_intercept=True, normalize=False,\n copy_X=True, verbose=False):\n self.n_iter = n_iter\n self.tol = tol\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.alpha_1 = alpha_1\n self.alpha_2 = alpha_2\n self.lambda_1 = lambda_1\n self.lambda_2 = lambda_2\n self.compute_score = compute_score\n self.threshold_lambda = threshold_lambda\n self.copy_X = copy_X\n self.verbose = verbose" + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/_update_sigma", + "name": "_update_sigma", + "qname": "sklearn.linear_model._bayes.ARDRegression._update_sigma", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/_update_sigma/self", + "name": "self", + "qname": "sklearn.linear_model._bayes.ARDRegression._update_sigma.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/_update_sigma/X", + "name": "X", + "qname": "sklearn.linear_model._bayes.ARDRegression._update_sigma.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/_update_sigma/alpha_", + "name": "alpha_", + "qname": "sklearn.linear_model._bayes.ARDRegression._update_sigma.alpha_", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/_update_sigma/lambda_", + "name": "lambda_", + "qname": "sklearn.linear_model._bayes.ARDRegression._update_sigma.lambda_", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/_update_sigma/keep_lambda", + "name": "keep_lambda", + "qname": "sklearn.linear_model._bayes.ARDRegression._update_sigma.keep_lambda", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _update_sigma(self, X, alpha_, lambda_, keep_lambda):\n # See slides as referenced in the docstring note\n # this function is used when n_samples >= n_features and will\n # invert a matrix of shape (n_features, n_features)\n X_keep = X[:, keep_lambda]\n gram = np.dot(X_keep.T, X_keep)\n eye = np.eye(gram.shape[0])\n sigma_inv = lambda_[keep_lambda] * eye + alpha_ * gram\n sigma_ = pinvh(sigma_inv)\n return sigma_" + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/_update_sigma_woodbury", + "name": "_update_sigma_woodbury", + "qname": "sklearn.linear_model._bayes.ARDRegression._update_sigma_woodbury", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/_update_sigma_woodbury/self", + "name": "self", + "qname": "sklearn.linear_model._bayes.ARDRegression._update_sigma_woodbury.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/_update_sigma_woodbury/X", + "name": "X", + "qname": "sklearn.linear_model._bayes.ARDRegression._update_sigma_woodbury.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/_update_sigma_woodbury/alpha_", + "name": "alpha_", + "qname": "sklearn.linear_model._bayes.ARDRegression._update_sigma_woodbury.alpha_", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/_update_sigma_woodbury/lambda_", + "name": "lambda_", + "qname": "sklearn.linear_model._bayes.ARDRegression._update_sigma_woodbury.lambda_", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/_update_sigma_woodbury/keep_lambda", + "name": "keep_lambda", + "qname": "sklearn.linear_model._bayes.ARDRegression._update_sigma_woodbury.keep_lambda", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _update_sigma_woodbury(self, X, alpha_, lambda_, keep_lambda):\n # See slides as referenced in the docstring note\n # this function is used when n_samples < n_features and will invert\n # a matrix of shape (n_samples, n_samples) making use of the\n # woodbury formula:\n # https://en.wikipedia.org/wiki/Woodbury_matrix_identity\n n_samples = X.shape[0]\n X_keep = X[:, keep_lambda]\n inv_lambda = 1 / lambda_[keep_lambda].reshape(1, -1)\n sigma_ = pinvh(\n np.eye(n_samples) / alpha_ + np.dot(X_keep * inv_lambda, X_keep.T)\n )\n sigma_ = np.dot(sigma_, X_keep * inv_lambda)\n sigma_ = - np.dot(inv_lambda.reshape(-1, 1) * X_keep.T, sigma_)\n sigma_[np.diag_indices(sigma_.shape[1])] += 1. / lambda_[keep_lambda]\n return sigma_" + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/fit", + "name": "fit", + "qname": "sklearn.linear_model._bayes.ARDRegression.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/fit/self", + "name": "self", + "qname": "sklearn.linear_model._bayes.ARDRegression.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/fit/X", + "name": "X", + "qname": "sklearn.linear_model._bayes.ARDRegression.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples in the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/fit/y", + "name": "y", + "qname": "sklearn.linear_model._bayes.ARDRegression.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values (integers). Will be cast to X's dtype if necessary" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the ARDRegression model according to the given training data\nand parameters.\n\nIterative procedure to maximize the evidence", + "docstring": "Fit the ARDRegression model according to the given training data\nand parameters.\n\nIterative procedure to maximize the evidence\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\ny : array-like of shape (n_samples,)\n Target values (integers). Will be cast to X's dtype if necessary\n\nReturns\n-------\nself : returns an instance of self.", + "code": " def fit(self, X, y):\n \"\"\"Fit the ARDRegression model according to the given training data\n and parameters.\n\n Iterative procedure to maximize the evidence\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n y : array-like of shape (n_samples,)\n Target values (integers). Will be cast to X's dtype if necessary\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n X, y = self._validate_data(X, y, dtype=np.float64, y_numeric=True,\n ensure_min_samples=2)\n\n n_samples, n_features = X.shape\n coef_ = np.zeros(n_features)\n\n X, y, X_offset_, y_offset_, X_scale_ = self._preprocess_data(\n X, y, self.fit_intercept, self.normalize, self.copy_X)\n\n self.X_offset_ = X_offset_\n self.X_scale_ = X_scale_\n\n # Launch the convergence loop\n keep_lambda = np.ones(n_features, dtype=bool)\n\n lambda_1 = self.lambda_1\n lambda_2 = self.lambda_2\n alpha_1 = self.alpha_1\n alpha_2 = self.alpha_2\n verbose = self.verbose\n\n # Initialization of the values of the parameters\n eps = np.finfo(np.float64).eps\n # Add `eps` in the denominator to omit division by zero if `np.var(y)`\n # is zero\n alpha_ = 1. / (np.var(y) + eps)\n lambda_ = np.ones(n_features)\n\n self.scores_ = list()\n coef_old_ = None\n\n def update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_):\n coef_[keep_lambda] = alpha_ * np.linalg.multi_dot([\n sigma_, X[:, keep_lambda].T, y])\n return coef_\n\n update_sigma = (self._update_sigma if n_samples >= n_features\n else self._update_sigma_woodbury)\n # Iterative procedure of ARDRegression\n for iter_ in range(self.n_iter):\n sigma_ = update_sigma(X, alpha_, lambda_, keep_lambda)\n coef_ = update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_)\n\n # Update alpha and lambda\n rmse_ = np.sum((y - np.dot(X, coef_)) ** 2)\n gamma_ = 1. - lambda_[keep_lambda] * np.diag(sigma_)\n lambda_[keep_lambda] = ((gamma_ + 2. * lambda_1) /\n ((coef_[keep_lambda]) ** 2 +\n 2. * lambda_2))\n alpha_ = ((n_samples - gamma_.sum() + 2. * alpha_1) /\n (rmse_ + 2. * alpha_2))\n\n # Prune the weights with a precision over a threshold\n keep_lambda = lambda_ < self.threshold_lambda\n coef_[~keep_lambda] = 0\n\n # Compute the objective function\n if self.compute_score:\n s = (lambda_1 * np.log(lambda_) - lambda_2 * lambda_).sum()\n s += alpha_1 * log(alpha_) - alpha_2 * alpha_\n s += 0.5 * (fast_logdet(sigma_) + n_samples * log(alpha_) +\n np.sum(np.log(lambda_)))\n s -= 0.5 * (alpha_ * rmse_ + (lambda_ * coef_ ** 2).sum())\n self.scores_.append(s)\n\n # Check for convergence\n if iter_ > 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:\n if verbose:\n print(\"Converged after %s iterations\" % iter_)\n break\n coef_old_ = np.copy(coef_)\n\n if not keep_lambda.any():\n break\n\n if keep_lambda.any():\n # update sigma and mu using updated params from the last iteration\n sigma_ = update_sigma(X, alpha_, lambda_, keep_lambda)\n coef_ = update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_)\n else:\n sigma_ = np.array([]).reshape(0, 0)\n\n self.coef_ = coef_\n self.alpha_ = alpha_\n self.sigma_ = sigma_\n self.lambda_ = lambda_\n self._set_intercept(X_offset_, y_offset_, X_scale_)\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/predict", + "name": "predict", + "qname": "sklearn.linear_model._bayes.ARDRegression.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/predict/self", + "name": "self", + "qname": "sklearn.linear_model._bayes.ARDRegression.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/predict/X", + "name": "X", + "qname": "sklearn.linear_model._bayes.ARDRegression.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/ARDRegression/predict/return_std", + "name": "return_std", + "qname": "sklearn.linear_model._bayes.ARDRegression.predict.return_std", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to return the standard deviation of posterior prediction." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict using the linear model.\n\nIn addition to the mean of the predictive distribution, also its\nstandard deviation can be returned.", + "docstring": "Predict using the linear model.\n\nIn addition to the mean of the predictive distribution, also its\nstandard deviation can be returned.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\nreturn_std : bool, default=False\n Whether to return the standard deviation of posterior prediction.\n\nReturns\n-------\ny_mean : array-like of shape (n_samples,)\n Mean of predictive distribution of query points.\n\ny_std : array-like of shape (n_samples,)\n Standard deviation of predictive distribution of query points.", + "code": " def predict(self, X, return_std=False):\n \"\"\"Predict using the linear model.\n\n In addition to the mean of the predictive distribution, also its\n standard deviation can be returned.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\n return_std : bool, default=False\n Whether to return the standard deviation of posterior prediction.\n\n Returns\n -------\n y_mean : array-like of shape (n_samples,)\n Mean of predictive distribution of query points.\n\n y_std : array-like of shape (n_samples,)\n Standard deviation of predictive distribution of query points.\n \"\"\"\n y_mean = self._decision_function(X)\n if return_std is False:\n return y_mean\n else:\n if self.normalize:\n X = (X - self.X_offset_) / self.X_scale_\n X = X[:, self.lambda_ < self.threshold_lambda]\n sigmas_squared_data = (np.dot(X, self.sigma_) * X).sum(axis=1)\n y_std = np.sqrt(sigmas_squared_data + (1. / self.alpha_))\n return y_mean, y_std" + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._bayes.BayesianRidge.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._bayes.BayesianRidge.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/__init__/n_iter", + "name": "n_iter", + "qname": "sklearn.linear_model._bayes.BayesianRidge.__init__.n_iter", + "default_value": "300", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "300", + "description": "Maximum number of iterations. Should be greater than or equal to 1." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._bayes.BayesianRidge.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "Stop the algorithm if w has converged." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/__init__/alpha_1", + "name": "alpha_1", + "qname": "sklearn.linear_model._bayes.BayesianRidge.__init__.alpha_1", + "default_value": "1e-06", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-6", + "description": "Hyper-parameter : shape parameter for the Gamma distribution prior\nover the alpha parameter." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/__init__/alpha_2", + "name": "alpha_2", + "qname": "sklearn.linear_model._bayes.BayesianRidge.__init__.alpha_2", + "default_value": "1e-06", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-6", + "description": "Hyper-parameter : inverse scale parameter (rate parameter) for the\nGamma distribution prior over the alpha parameter." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/__init__/lambda_1", + "name": "lambda_1", + "qname": "sklearn.linear_model._bayes.BayesianRidge.__init__.lambda_1", + "default_value": "1e-06", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-6", + "description": "Hyper-parameter : shape parameter for the Gamma distribution prior\nover the lambda parameter." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/__init__/lambda_2", + "name": "lambda_2", + "qname": "sklearn.linear_model._bayes.BayesianRidge.__init__.lambda_2", + "default_value": "1e-06", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-6", + "description": "Hyper-parameter : inverse scale parameter (rate parameter) for the\nGamma distribution prior over the lambda parameter." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/__init__/alpha_init", + "name": "alpha_init", + "qname": "sklearn.linear_model._bayes.BayesianRidge.__init__.alpha_init", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Initial value for alpha (precision of the noise).\nIf not set, alpha_init is 1/Var(y).\n\n .. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/__init__/lambda_init", + "name": "lambda_init", + "qname": "sklearn.linear_model._bayes.BayesianRidge.__init__.lambda_init", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Initial value for lambda (precision of the weights).\nIf not set, lambda_init is 1.\n\n .. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/__init__/compute_score", + "name": "compute_score", + "qname": "sklearn.linear_model._bayes.BayesianRidge.__init__.compute_score", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, compute the log marginal likelihood at each iteration of the\noptimization." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._bayes.BayesianRidge.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to calculate the intercept for this model.\nThe intercept is not treated as a probabilistic parameter\nand thus has no associated variance. If set\nto False, no intercept will be used in calculations\n(i.e. data is expected to be centered)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._bayes.BayesianRidge.__init__.normalize", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/__init__/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._bayes.BayesianRidge.__init__.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, X will be copied; else, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._bayes.BayesianRidge.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Verbose mode when fitting the model." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Bayesian ridge regression.\n\nFit a Bayesian ridge model. See the Notes section for details on this\nimplementation and the optimization of the regularization parameters\nlambda (precision of the weights) and alpha (precision of the noise).\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6,\n lambda_1=1.e-6, lambda_2=1.e-6, alpha_init=None,\n lambda_init=None, compute_score=False, fit_intercept=True,\n normalize=False, copy_X=True, verbose=False):\n self.n_iter = n_iter\n self.tol = tol\n self.alpha_1 = alpha_1\n self.alpha_2 = alpha_2\n self.lambda_1 = lambda_1\n self.lambda_2 = lambda_2\n self.alpha_init = alpha_init\n self.lambda_init = lambda_init\n self.compute_score = compute_score\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.copy_X = copy_X\n self.verbose = verbose" + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/_log_marginal_likelihood", + "name": "_log_marginal_likelihood", + "qname": "sklearn.linear_model._bayes.BayesianRidge._log_marginal_likelihood", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/_log_marginal_likelihood/self", + "name": "self", + "qname": "sklearn.linear_model._bayes.BayesianRidge._log_marginal_likelihood.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/_log_marginal_likelihood/n_samples", + "name": "n_samples", + "qname": "sklearn.linear_model._bayes.BayesianRidge._log_marginal_likelihood.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/_log_marginal_likelihood/n_features", + "name": "n_features", + "qname": "sklearn.linear_model._bayes.BayesianRidge._log_marginal_likelihood.n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/_log_marginal_likelihood/eigen_vals", + "name": "eigen_vals", + "qname": "sklearn.linear_model._bayes.BayesianRidge._log_marginal_likelihood.eigen_vals", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/_log_marginal_likelihood/alpha_", + "name": "alpha_", + "qname": "sklearn.linear_model._bayes.BayesianRidge._log_marginal_likelihood.alpha_", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/_log_marginal_likelihood/lambda_", + "name": "lambda_", + "qname": "sklearn.linear_model._bayes.BayesianRidge._log_marginal_likelihood.lambda_", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/_log_marginal_likelihood/coef", + "name": "coef", + "qname": "sklearn.linear_model._bayes.BayesianRidge._log_marginal_likelihood.coef", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/_log_marginal_likelihood/rmse", + "name": "rmse", + "qname": "sklearn.linear_model._bayes.BayesianRidge._log_marginal_likelihood.rmse", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Log marginal likelihood.", + "docstring": "Log marginal likelihood.", + "code": " def _log_marginal_likelihood(self, n_samples, n_features, eigen_vals,\n alpha_, lambda_, coef, rmse):\n \"\"\"Log marginal likelihood.\"\"\"\n alpha_1 = self.alpha_1\n alpha_2 = self.alpha_2\n lambda_1 = self.lambda_1\n lambda_2 = self.lambda_2\n\n # compute the log of the determinant of the posterior covariance.\n # posterior covariance is given by\n # sigma = (lambda_ * np.eye(n_features) + alpha_ * np.dot(X.T, X))^-1\n if n_samples > n_features:\n logdet_sigma = - np.sum(np.log(lambda_ + alpha_ * eigen_vals))\n else:\n logdet_sigma = np.full(n_features, lambda_,\n dtype=np.array(lambda_).dtype)\n logdet_sigma[:n_samples] += alpha_ * eigen_vals\n logdet_sigma = - np.sum(np.log(logdet_sigma))\n\n score = lambda_1 * log(lambda_) - lambda_2 * lambda_\n score += alpha_1 * log(alpha_) - alpha_2 * alpha_\n score += 0.5 * (n_features * log(lambda_) +\n n_samples * log(alpha_) -\n alpha_ * rmse -\n lambda_ * np.sum(coef ** 2) +\n logdet_sigma -\n n_samples * log(2 * np.pi))\n\n return score" + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/_update_coef_", + "name": "_update_coef_", + "qname": "sklearn.linear_model._bayes.BayesianRidge._update_coef_", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/_update_coef_/self", + "name": "self", + "qname": "sklearn.linear_model._bayes.BayesianRidge._update_coef_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/_update_coef_/X", + "name": "X", + "qname": "sklearn.linear_model._bayes.BayesianRidge._update_coef_.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/_update_coef_/y", + "name": "y", + "qname": "sklearn.linear_model._bayes.BayesianRidge._update_coef_.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/_update_coef_/n_samples", + "name": "n_samples", + "qname": "sklearn.linear_model._bayes.BayesianRidge._update_coef_.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/_update_coef_/n_features", + "name": "n_features", + "qname": "sklearn.linear_model._bayes.BayesianRidge._update_coef_.n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/_update_coef_/XT_y", + "name": "XT_y", + "qname": "sklearn.linear_model._bayes.BayesianRidge._update_coef_.XT_y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/_update_coef_/U", + "name": "U", + "qname": "sklearn.linear_model._bayes.BayesianRidge._update_coef_.U", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/_update_coef_/Vh", + "name": "Vh", + "qname": "sklearn.linear_model._bayes.BayesianRidge._update_coef_.Vh", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/_update_coef_/eigen_vals_", + "name": "eigen_vals_", + "qname": "sklearn.linear_model._bayes.BayesianRidge._update_coef_.eigen_vals_", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/_update_coef_/alpha_", + "name": "alpha_", + "qname": "sklearn.linear_model._bayes.BayesianRidge._update_coef_.alpha_", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/_update_coef_/lambda_", + "name": "lambda_", + "qname": "sklearn.linear_model._bayes.BayesianRidge._update_coef_.lambda_", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Update posterior mean and compute corresponding rmse.\n\nPosterior mean is given by coef_ = scaled_sigma_ * X.T * y where\nscaled_sigma_ = (lambda_/alpha_ * np.eye(n_features)\n + np.dot(X.T, X))^-1", + "docstring": "Update posterior mean and compute corresponding rmse.\n\nPosterior mean is given by coef_ = scaled_sigma_ * X.T * y where\nscaled_sigma_ = (lambda_/alpha_ * np.eye(n_features)\n + np.dot(X.T, X))^-1", + "code": " def _update_coef_(self, X, y, n_samples, n_features, XT_y, U, Vh,\n eigen_vals_, alpha_, lambda_):\n \"\"\"Update posterior mean and compute corresponding rmse.\n\n Posterior mean is given by coef_ = scaled_sigma_ * X.T * y where\n scaled_sigma_ = (lambda_/alpha_ * np.eye(n_features)\n + np.dot(X.T, X))^-1\n \"\"\"\n\n if n_samples > n_features:\n coef_ = np.linalg.multi_dot([Vh.T,\n Vh / (eigen_vals_ + lambda_ /\n alpha_)[:, np.newaxis],\n XT_y])\n else:\n coef_ = np.linalg.multi_dot([X.T,\n U / (eigen_vals_ + lambda_ /\n alpha_)[None, :],\n U.T, y])\n\n rmse_ = np.sum((y - np.dot(X, coef_)) ** 2)\n\n return coef_, rmse_" + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/fit", + "name": "fit", + "qname": "sklearn.linear_model._bayes.BayesianRidge.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/fit/self", + "name": "self", + "qname": "sklearn.linear_model._bayes.BayesianRidge.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/fit/X", + "name": "X", + "qname": "sklearn.linear_model._bayes.BayesianRidge.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/fit/y", + "name": "y", + "qname": "sklearn.linear_model._bayes.BayesianRidge.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Target values. Will be cast to X's dtype if necessary" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._bayes.BayesianRidge.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "None", + "description": "Individual weights for each sample\n\n.. versionadded:: 0.20\n parameter *sample_weight* support to BayesianRidge." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model", + "docstring": "Fit the model\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Training data\ny : ndarray of shape (n_samples,)\n Target values. Will be cast to X's dtype if necessary\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Individual weights for each sample\n\n .. versionadded:: 0.20\n parameter *sample_weight* support to BayesianRidge.\n\nReturns\n-------\nself : returns an instance of self.", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the model\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Training data\n y : ndarray of shape (n_samples,)\n Target values. Will be cast to X's dtype if necessary\n\n sample_weight : ndarray of shape (n_samples,), default=None\n Individual weights for each sample\n\n .. versionadded:: 0.20\n parameter *sample_weight* support to BayesianRidge.\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n\n if self.n_iter < 1:\n raise ValueError('n_iter should be greater than or equal to 1.'\n ' Got {!r}.'.format(self.n_iter))\n\n X, y = self._validate_data(X, y, dtype=np.float64, y_numeric=True)\n\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X,\n dtype=X.dtype)\n\n X, y, X_offset_, y_offset_, X_scale_ = self._preprocess_data(\n X, y, self.fit_intercept, self.normalize, self.copy_X,\n sample_weight=sample_weight)\n\n if sample_weight is not None:\n # Sample weight can be implemented via a simple rescaling.\n X, y = _rescale_data(X, y, sample_weight)\n\n self.X_offset_ = X_offset_\n self.X_scale_ = X_scale_\n n_samples, n_features = X.shape\n\n # Initialization of the values of the parameters\n eps = np.finfo(np.float64).eps\n # Add `eps` in the denominator to omit division by zero if `np.var(y)`\n # is zero\n alpha_ = self.alpha_init\n lambda_ = self.lambda_init\n if alpha_ is None:\n alpha_ = 1. / (np.var(y) + eps)\n if lambda_ is None:\n lambda_ = 1.\n\n verbose = self.verbose\n lambda_1 = self.lambda_1\n lambda_2 = self.lambda_2\n alpha_1 = self.alpha_1\n alpha_2 = self.alpha_2\n\n self.scores_ = list()\n coef_old_ = None\n\n XT_y = np.dot(X.T, y)\n U, S, Vh = linalg.svd(X, full_matrices=False)\n eigen_vals_ = S ** 2\n\n # Convergence loop of the bayesian ridge regression\n for iter_ in range(self.n_iter):\n\n # update posterior mean coef_ based on alpha_ and lambda_ and\n # compute corresponding rmse\n coef_, rmse_ = self._update_coef_(X, y, n_samples, n_features,\n XT_y, U, Vh, eigen_vals_,\n alpha_, lambda_)\n if self.compute_score:\n # compute the log marginal likelihood\n s = self._log_marginal_likelihood(n_samples, n_features,\n eigen_vals_,\n alpha_, lambda_,\n coef_, rmse_)\n self.scores_.append(s)\n\n # Update alpha and lambda according to (MacKay, 1992)\n gamma_ = np.sum((alpha_ * eigen_vals_) /\n (lambda_ + alpha_ * eigen_vals_))\n lambda_ = ((gamma_ + 2 * lambda_1) /\n (np.sum(coef_ ** 2) + 2 * lambda_2))\n alpha_ = ((n_samples - gamma_ + 2 * alpha_1) /\n (rmse_ + 2 * alpha_2))\n\n # Check for convergence\n if iter_ != 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:\n if verbose:\n print(\"Convergence after \", str(iter_), \" iterations\")\n break\n coef_old_ = np.copy(coef_)\n\n self.n_iter_ = iter_ + 1\n\n # return regularization parameters and corresponding posterior mean,\n # log marginal likelihood and posterior covariance\n self.alpha_ = alpha_\n self.lambda_ = lambda_\n self.coef_, rmse_ = self._update_coef_(X, y, n_samples, n_features,\n XT_y, U, Vh, eigen_vals_,\n alpha_, lambda_)\n if self.compute_score:\n # compute the log marginal likelihood\n s = self._log_marginal_likelihood(n_samples, n_features,\n eigen_vals_,\n alpha_, lambda_,\n coef_, rmse_)\n self.scores_.append(s)\n self.scores_ = np.array(self.scores_)\n\n # posterior covariance is given by 1/alpha_ * scaled_sigma_\n scaled_sigma_ = np.dot(Vh.T,\n Vh / (eigen_vals_ +\n lambda_ / alpha_)[:, np.newaxis])\n self.sigma_ = (1. / alpha_) * scaled_sigma_\n\n self._set_intercept(X_offset_, y_offset_, X_scale_)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/predict", + "name": "predict", + "qname": "sklearn.linear_model._bayes.BayesianRidge.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/predict/self", + "name": "self", + "qname": "sklearn.linear_model._bayes.BayesianRidge.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/predict/X", + "name": "X", + "qname": "sklearn.linear_model._bayes.BayesianRidge.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._bayes/BayesianRidge/predict/return_std", + "name": "return_std", + "qname": "sklearn.linear_model._bayes.BayesianRidge.predict.return_std", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to return the standard deviation of posterior prediction." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict using the linear model.\n\nIn addition to the mean of the predictive distribution, also its\nstandard deviation can be returned.", + "docstring": "Predict using the linear model.\n\nIn addition to the mean of the predictive distribution, also its\nstandard deviation can be returned.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\nreturn_std : bool, default=False\n Whether to return the standard deviation of posterior prediction.\n\nReturns\n-------\ny_mean : array-like of shape (n_samples,)\n Mean of predictive distribution of query points.\n\ny_std : array-like of shape (n_samples,)\n Standard deviation of predictive distribution of query points.", + "code": " def predict(self, X, return_std=False):\n \"\"\"Predict using the linear model.\n\n In addition to the mean of the predictive distribution, also its\n standard deviation can be returned.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\n return_std : bool, default=False\n Whether to return the standard deviation of posterior prediction.\n\n Returns\n -------\n y_mean : array-like of shape (n_samples,)\n Mean of predictive distribution of query points.\n\n y_std : array-like of shape (n_samples,)\n Standard deviation of predictive distribution of query points.\n \"\"\"\n y_mean = self._decision_function(X)\n if return_std is False:\n return y_mean\n else:\n if self.normalize:\n X = (X - self.X_offset_) / self.X_scale_\n sigmas_squared_data = (np.dot(X, self.sigma_) * X).sum(axis=1)\n y_std = np.sqrt(sigmas_squared_data + (1. / self.alpha_))\n return y_mean, y_std" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/__init__/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.__init__.alpha", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Constant that multiplies the penalty terms. Defaults to 1.0.\nSee the notes for the exact mathematical meaning of this\nparameter. ``alpha = 0`` is equivalent to an ordinary least square,\nsolved by the :class:`LinearRegression` object. For numerical\nreasons, using ``alpha = 0`` with the ``Lasso`` object is not advised.\nGiven this, you should use the :class:`LinearRegression` object." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/__init__/l1_ratio", + "name": "l1_ratio", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.__init__.l1_ratio", + "default_value": "0.5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.5", + "description": "The ElasticNet mixing parameter, with ``0 <= l1_ratio <= 1``. For\n``l1_ratio = 0`` the penalty is an L2 penalty. ``For l1_ratio = 1`` it\nis an L1 penalty. For ``0 < l1_ratio < 1``, the penalty is a\ncombination of L1 and L2." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether the intercept should be estimated or not. If ``False``, the\ndata is assumed to be already centered." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.__init__.normalize", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/__init__/precompute", + "name": "precompute", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.__init__.precompute", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or array-like of shape (n_features, n_features)", + "default_value": "False", + "description": "Whether to use a precomputed Gram matrix to speed up\ncalculations. The Gram matrix can also be passed as argument.\nFor sparse input this option is always ``False`` to preserve sparsity." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_features, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.__init__.max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "The maximum number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/__init__/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.__init__.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``True``, X will be copied; else, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "The tolerance for the optimization: if the updates are\nsmaller than ``tol``, the optimization code checks the\ndual gap for optimality and continues until it is smaller\nthan ``tol``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, reuse the solution of the previous call to fit as\ninitialization, otherwise, just erase the previous solution.\nSee :term:`the Glossary `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/__init__/positive", + "name": "positive", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.__init__.positive", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, forces the coefficients to be positive." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "The seed of the pseudo random number generator that selects a random\nfeature to update. Used when ``selection`` == 'random'.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/__init__/selection", + "name": "selection", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.__init__.selection", + "default_value": "'cyclic'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'cyclic', 'random'}", + "default_value": "'cyclic'", + "description": "If set to 'random', a random coefficient is updated every iteration\nrather than looping over features sequentially by default. This\n(setting to 'random') often leads to significantly faster convergence\nespecially when tol is higher than 1e-4." + }, + "type": { + "kind": "EnumType", + "values": ["random", "cyclic"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Linear regression with combined L1 and L2 priors as regularizer.\n\nMinimizes the objective function::\n\n 1 / (2 * n_samples) * ||y - Xw||^2_2\n + alpha * l1_ratio * ||w||_1\n + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nIf you are interested in controlling the L1 and L2 penalty\nseparately, keep in mind that this is equivalent to::\n\n a * ||w||_1 + 0.5 * b * ||w||_2^2\n\nwhere::\n\n alpha = a + b and l1_ratio = a / (a + b)\n\nThe parameter l1_ratio corresponds to alpha in the glmnet R package while\nalpha corresponds to the lambda parameter in glmnet. Specifically, l1_ratio\n= 1 is the lasso penalty. Currently, l1_ratio <= 0.01 is not reliable,\nunless you supply your own sequence of alpha.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, alpha=1.0, *, l1_ratio=0.5, fit_intercept=True,\n normalize=False, precompute=False, max_iter=1000,\n copy_X=True, tol=1e-4, warm_start=False, positive=False,\n random_state=None, selection='cyclic'):\n self.alpha = alpha\n self.l1_ratio = l1_ratio\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.precompute = precompute\n self.max_iter = max_iter\n self.copy_X = copy_X\n self.tol = tol\n self.warm_start = warm_start\n self.positive = positive\n self.random_state = random_state\n self.selection = selection" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/_decision_function", + "name": "_decision_function", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet._decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/_decision_function/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet._decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/_decision_function/X", + "name": "X", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet._decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "numpy array or scipy.sparse matrix of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "numpy array" + }, + { + "kind": "NamedType", + "name": "scipy.sparse matrix of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Decision function of the linear model.", + "docstring": "Decision function of the linear model.\n\nParameters\n----------\nX : numpy array or scipy.sparse matrix of shape (n_samples, n_features)\n\nReturns\n-------\nT : ndarray of shape (n_samples,)\n The predicted decision function.", + "code": " def _decision_function(self, X):\n \"\"\"Decision function of the linear model.\n\n Parameters\n ----------\n X : numpy array or scipy.sparse matrix of shape (n_samples, n_features)\n\n Returns\n -------\n T : ndarray of shape (n_samples,)\n The predicted decision function.\n \"\"\"\n check_is_fitted(self)\n if sparse.isspmatrix(X):\n return safe_sparse_dot(X, self.coef_.T,\n dense_output=True) + self.intercept_\n else:\n return super()._decision_function(X)" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/fit", + "name": "fit", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/fit/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/fit/X", + "name": "X", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{ndarray, sparse matrix} of (n_samples, n_features)", + "default_value": "", + "description": "Data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/fit/y", + "name": "y", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{ndarray, sparse matrix} of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "", + "description": "Target. Will be cast to X's dtype if necessary." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples,) or (n_samples, n_targets)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float or array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weight.\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/fit/check_input", + "name": "check_input", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.fit.check_input", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Allow to bypass several input checking.\nDon't use this parameter unless you know what you do." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit model with coordinate descent.", + "docstring": "Fit model with coordinate descent.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of (n_samples, n_features)\n Data.\n\ny : {ndarray, sparse matrix} of shape (n_samples,) or (n_samples, n_targets)\n Target. Will be cast to X's dtype if necessary.\n\nsample_weight : float or array-like of shape (n_samples,), default=None\n Sample weight.\n\n .. versionadded:: 0.23\n\ncheck_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\nNotes\n-----\n\nCoordinate descent is an algorithm that considers each column of\ndata at a time hence it will automatically convert the X input\nas a Fortran-contiguous numpy array if necessary.\n\nTo avoid memory re-allocation it is advised to allocate the\ninitial data in memory directly using that format.", + "code": " def fit(self, X, y, sample_weight=None, check_input=True):\n \"\"\"Fit model with coordinate descent.\n\n Parameters\n ----------\n X : {ndarray, sparse matrix} of (n_samples, n_features)\n Data.\n\n y : {ndarray, sparse matrix} of shape (n_samples,) or \\\n (n_samples, n_targets)\n Target. Will be cast to X's dtype if necessary.\n\n sample_weight : float or array-like of shape (n_samples,), default=None\n Sample weight.\n\n .. versionadded:: 0.23\n\n check_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\n Notes\n -----\n\n Coordinate descent is an algorithm that considers each column of\n data at a time hence it will automatically convert the X input\n as a Fortran-contiguous numpy array if necessary.\n\n To avoid memory re-allocation it is advised to allocate the\n initial data in memory directly using that format.\n \"\"\"\n\n if self.alpha == 0:\n warnings.warn(\"With alpha=0, this algorithm does not converge \"\n \"well. You are advised to use the LinearRegression \"\n \"estimator\", stacklevel=2)\n\n if isinstance(self.precompute, str):\n raise ValueError('precompute should be one of True, False or'\n ' array-like. Got %r' % self.precompute)\n\n if (not isinstance(self.l1_ratio, numbers.Number) or\n self.l1_ratio < 0 or self.l1_ratio > 1):\n raise ValueError(\"l1_ratio must be between 0 and 1; \"\n f\"got l1_ratio={self.l1_ratio}\")\n\n # Remember if X is copied\n X_copied = False\n # We expect X and y to be float64 or float32 Fortran ordered arrays\n # when bypassing checks\n if check_input:\n X_copied = self.copy_X and self.fit_intercept\n X, y = self._validate_data(X, y, accept_sparse='csc',\n order='F',\n dtype=[np.float64, np.float32],\n copy=X_copied, multi_output=True,\n y_numeric=True)\n y = check_array(y, order='F', copy=False, dtype=X.dtype.type,\n ensure_2d=False)\n\n n_samples, n_features = X.shape\n alpha = self.alpha\n\n if isinstance(sample_weight, numbers.Number):\n sample_weight = None\n if sample_weight is not None:\n if check_input:\n if sparse.issparse(X):\n raise ValueError(\"Sample weights do not (yet) support \"\n \"sparse matrices.\")\n sample_weight = _check_sample_weight(sample_weight, X,\n dtype=X.dtype)\n # simplify things by rescaling sw to sum up to n_samples\n # => np.average(x, weights=sw) = np.mean(sw * x)\n sample_weight *= (n_samples / np.sum(sample_weight))\n # Objective function is:\n # 1/2 * np.average(squared error, weights=sw) + alpha * penalty\n # but coordinate descent minimizes:\n # 1/2 * sum(squared error) + alpha * penalty\n # enet_path therefore sets alpha = n_samples * alpha\n # With sw, enet_path should set alpha = sum(sw) * alpha\n # Therefore, we rescale alpha = sum(sw) / n_samples * alpha\n # Note: As we rescaled sample_weights to sum up to n_samples,\n # we don't need this\n # alpha *= np.sum(sample_weight) / n_samples\n\n # Ensure copying happens only once, don't do it again if done above.\n # X and y will be rescaled if sample_weight is not None, order='F'\n # ensures that the returned X and y are still F-contiguous.\n should_copy = self.copy_X and not X_copied\n X, y, X_offset, y_offset, X_scale, precompute, Xy = \\\n _pre_fit(X, y, None, self.precompute, self.normalize,\n self.fit_intercept, copy=should_copy,\n check_input=check_input, sample_weight=sample_weight)\n # coordinate descent needs F-ordered arrays and _pre_fit might have\n # called _rescale_data\n if check_input or sample_weight is not None:\n X, y = _set_order(X, y, order='F')\n if y.ndim == 1:\n y = y[:, np.newaxis]\n if Xy is not None and Xy.ndim == 1:\n Xy = Xy[:, np.newaxis]\n\n n_targets = y.shape[1]\n\n if self.selection not in ['cyclic', 'random']:\n raise ValueError(\"selection should be either random or cyclic.\")\n\n if not self.warm_start or not hasattr(self, \"coef_\"):\n coef_ = np.zeros((n_targets, n_features), dtype=X.dtype,\n order='F')\n else:\n coef_ = self.coef_\n if coef_.ndim == 1:\n coef_ = coef_[np.newaxis, :]\n\n dual_gaps_ = np.zeros(n_targets, dtype=X.dtype)\n self.n_iter_ = []\n\n for k in range(n_targets):\n if Xy is not None:\n this_Xy = Xy[:, k]\n else:\n this_Xy = None\n _, this_coef, this_dual_gap, this_iter = \\\n self.path(X, y[:, k],\n l1_ratio=self.l1_ratio, eps=None,\n n_alphas=None, alphas=[alpha],\n precompute=precompute, Xy=this_Xy,\n fit_intercept=False, normalize=False, copy_X=True,\n verbose=False, tol=self.tol, positive=self.positive,\n X_offset=X_offset, X_scale=X_scale,\n return_n_iter=True, coef_init=coef_[k],\n max_iter=self.max_iter,\n random_state=self.random_state,\n selection=self.selection,\n check_input=False)\n coef_[k] = this_coef[:, 0]\n dual_gaps_[k] = this_dual_gap[0]\n self.n_iter_.append(this_iter[0])\n\n if n_targets == 1:\n self.n_iter_ = self.n_iter_[0]\n self.coef_ = coef_[0]\n self.dual_gap_ = dual_gaps_[0]\n else:\n self.coef_ = coef_\n self.dual_gap_ = dual_gaps_\n\n self._set_intercept(X_offset, y_offset, X_scale)\n\n # workaround since _set_intercept will cast self.coef_ into X.dtype\n self.coef_ = np.asarray(self.coef_, dtype=X.dtype)\n\n # return self for chaining fit and predict calls\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/sparse_coef_@getter", + "name": "sparse_coef_", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.sparse_coef_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNet/sparse_coef_/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNet.sparse_coef_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Sparse representation of the fitted `coef_`.", + "docstring": "Sparse representation of the fitted `coef_`.", + "code": " @property\n def sparse_coef_(self):\n \"\"\"Sparse representation of the fitted `coef_`.\"\"\"\n return sparse.csr_matrix(self.coef_)" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/__init__/l1_ratio", + "name": "l1_ratio", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV.__init__.l1_ratio", + "default_value": "0.5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float or list of float", + "default_value": "0.5", + "description": "float between 0 and 1 passed to ElasticNet (scaling between\nl1 and l2 penalties). For ``l1_ratio = 0``\nthe penalty is an L2 penalty. For ``l1_ratio = 1`` it is an L1 penalty.\nFor ``0 < l1_ratio < 1``, the penalty is a combination of L1 and L2\nThis parameter can be a list, in which case the different\nvalues are tested by cross-validation and the one giving the best\nprediction score is used. Note that a good choice of list of\nvalues for l1_ratio is often to put more values close to 1\n(i.e. Lasso) and less close to 0 (i.e. Ridge), as in ``[.1, .5, .7,\n.9, .95, .99, 1]``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "list of float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/__init__/eps", + "name": "eps", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV.__init__.eps", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "Length of the path. ``eps=1e-3`` means that\n``alpha_min / alpha_max = 1e-3``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/__init__/n_alphas", + "name": "n_alphas", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV.__init__.n_alphas", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Number of alphas along the regularization path, used for each l1_ratio." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/__init__/alphas", + "name": "alphas", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV.__init__.alphas", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "None", + "description": "List of alphas where to compute the models.\nIf None alphas are set automatically." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to calculate the intercept for this model. If set\nto false, no intercept will be used in calculations\n(i.e. data is expected to be centered)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV.__init__.normalize", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/__init__/precompute", + "name": "precompute", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV.__init__.precompute", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'auto', bool or array-like of shape (n_features, n_features)", + "default_value": "'auto'", + "description": "Whether to use a precomputed Gram matrix to speed up\ncalculations. If set to ``'auto'`` let us decide. The Gram\nmatrix can also be passed as argument." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_features, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV.__init__.max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "The maximum number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "The tolerance for the optimization: if the updates are\nsmaller than ``tol``, the optimization code checks the\ndual gap for optimality and continues until it is smaller\nthan ``tol``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/__init__/cv", + "name": "cv", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV.__init__.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, cross-validation generator or iterable", + "default_value": "None", + "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- int, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, :class:`KFold` is used.\n\nRefer :ref:`User Guide ` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/__init__/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV.__init__.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``True``, X will be copied; else, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or int", + "default_value": "0", + "description": "Amount of verbosity." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of CPUs to use during the cross validation.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/__init__/positive", + "name": "positive", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV.__init__.positive", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, forces the coefficients to be positive." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "The seed of the pseudo random number generator that selects a random\nfeature to update. Used when ``selection`` == 'random'.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/__init__/selection", + "name": "selection", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV.__init__.selection", + "default_value": "'cyclic'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'cyclic', 'random'}", + "default_value": "'cyclic'", + "description": "If set to 'random', a random coefficient is updated every iteration\nrather than looping over features sequentially by default. This\n(setting to 'random') often leads to significantly faster convergence\nespecially when tol is higher than 1e-4." + }, + "type": { + "kind": "EnumType", + "values": ["random", "cyclic"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Elastic Net model with iterative fitting along a regularization path.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,\n fit_intercept=True, normalize=False, precompute='auto',\n max_iter=1000, tol=1e-4, cv=None, copy_X=True,\n verbose=0, n_jobs=None, positive=False, random_state=None,\n selection='cyclic'):\n self.l1_ratio = l1_ratio\n self.eps = eps\n self.n_alphas = n_alphas\n self.alphas = alphas\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.precompute = precompute\n self.max_iter = max_iter\n self.tol = tol\n self.cv = cv\n self.copy_X = copy_X\n self.verbose = verbose\n self.n_jobs = n_jobs\n self.positive = positive\n self.random_state = random_state\n self.selection = selection" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/_get_estimator", + "name": "_get_estimator", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV._get_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/_get_estimator/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV._get_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_estimator(self):\n return ElasticNet()" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/_is_multitask", + "name": "_is_multitask", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV._is_multitask", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/_is_multitask/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV._is_multitask.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _is_multitask(self):\n return False" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/_more_tags", + "name": "_more_tags", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/ElasticNetCV/_more_tags/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.ElasticNetCV._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'multioutput': False}" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/Lasso/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._coordinate_descent.Lasso.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/Lasso/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.Lasso.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/Lasso/__init__/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._coordinate_descent.Lasso.__init__.alpha", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Constant that multiplies the L1 term. Defaults to 1.0.\n``alpha = 0`` is equivalent to an ordinary least square, solved\nby the :class:`LinearRegression` object. For numerical\nreasons, using ``alpha = 0`` with the ``Lasso`` object is not advised.\nGiven this, you should use the :class:`LinearRegression` object." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/Lasso/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._coordinate_descent.Lasso.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to calculate the intercept for this model. If set\nto False, no intercept will be used in calculations\n(i.e. data is expected to be centered)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/Lasso/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._coordinate_descent.Lasso.__init__.normalize", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/Lasso/__init__/precompute", + "name": "precompute", + "qname": "sklearn.linear_model._coordinate_descent.Lasso.__init__.precompute", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or array-like of shape (n_features, n_features)", + "default_value": "False", + "description": "Whether to use a precomputed Gram matrix to speed up\ncalculations. The Gram matrix can also be passed as argument.\nFor sparse input this option is always ``False`` to preserve sparsity." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_features, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/Lasso/__init__/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._coordinate_descent.Lasso.__init__.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``True``, X will be copied; else, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/Lasso/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._coordinate_descent.Lasso.__init__.max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "The maximum number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/Lasso/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._coordinate_descent.Lasso.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "The tolerance for the optimization: if the updates are\nsmaller than ``tol``, the optimization code checks the\ndual gap for optimality and continues until it is smaller\nthan ``tol``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/Lasso/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.linear_model._coordinate_descent.Lasso.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to True, reuse the solution of the previous call to fit as\ninitialization, otherwise, just erase the previous solution.\nSee :term:`the Glossary `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/Lasso/__init__/positive", + "name": "positive", + "qname": "sklearn.linear_model._coordinate_descent.Lasso.__init__.positive", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, forces the coefficients to be positive." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/Lasso/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._coordinate_descent.Lasso.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "The seed of the pseudo random number generator that selects a random\nfeature to update. Used when ``selection`` == 'random'.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/Lasso/__init__/selection", + "name": "selection", + "qname": "sklearn.linear_model._coordinate_descent.Lasso.__init__.selection", + "default_value": "'cyclic'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'cyclic', 'random'}", + "default_value": "'cyclic'", + "description": "If set to 'random', a random coefficient is updated every iteration\nrather than looping over features sequentially by default. This\n(setting to 'random') often leads to significantly faster convergence\nespecially when tol is higher than 1e-4." + }, + "type": { + "kind": "EnumType", + "values": ["random", "cyclic"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Linear Model trained with L1 prior as regularizer (aka the Lasso)\n\nThe optimization objective for Lasso is::\n\n (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nTechnically the Lasso model is optimizing the same objective function as\nthe Elastic Net with ``l1_ratio=1.0`` (no L2 penalty).\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, alpha=1.0, *, fit_intercept=True, normalize=False,\n precompute=False, copy_X=True, max_iter=1000,\n tol=1e-4, warm_start=False, positive=False,\n random_state=None, selection='cyclic'):\n super().__init__(\n alpha=alpha, l1_ratio=1.0, fit_intercept=fit_intercept,\n normalize=normalize, precompute=precompute, copy_X=copy_X,\n max_iter=max_iter, tol=tol, warm_start=warm_start,\n positive=positive, random_state=random_state,\n selection=selection)" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._coordinate_descent.LassoCV.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.LassoCV.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/__init__/eps", + "name": "eps", + "qname": "sklearn.linear_model._coordinate_descent.LassoCV.__init__.eps", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "Length of the path. ``eps=1e-3`` means that\n``alpha_min / alpha_max = 1e-3``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/__init__/n_alphas", + "name": "n_alphas", + "qname": "sklearn.linear_model._coordinate_descent.LassoCV.__init__.n_alphas", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Number of alphas along the regularization path." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/__init__/alphas", + "name": "alphas", + "qname": "sklearn.linear_model._coordinate_descent.LassoCV.__init__.alphas", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "None", + "description": "List of alphas where to compute the models.\nIf ``None`` alphas are set automatically." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._coordinate_descent.LassoCV.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to calculate the intercept for this model. If set\nto false, no intercept will be used in calculations\n(i.e. data is expected to be centered)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._coordinate_descent.LassoCV.__init__.normalize", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/__init__/precompute", + "name": "precompute", + "qname": "sklearn.linear_model._coordinate_descent.LassoCV.__init__.precompute", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'auto', bool or array-like of shape (n_features, n_features)", + "default_value": "'auto'", + "description": "Whether to use a precomputed Gram matrix to speed up\ncalculations. If set to ``'auto'`` let us decide. The Gram\nmatrix can also be passed as argument." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_features, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._coordinate_descent.LassoCV.__init__.max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "The maximum number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._coordinate_descent.LassoCV.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "The tolerance for the optimization: if the updates are\nsmaller than ``tol``, the optimization code checks the\ndual gap for optimality and continues until it is smaller\nthan ``tol``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/__init__/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._coordinate_descent.LassoCV.__init__.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``True``, X will be copied; else, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/__init__/cv", + "name": "cv", + "qname": "sklearn.linear_model._coordinate_descent.LassoCV.__init__.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, cross-validation generator or iterable", + "default_value": "None", + "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- int, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, :class:`KFold` is used.\n\nRefer :ref:`User Guide ` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._coordinate_descent.LassoCV.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or int", + "default_value": "False", + "description": "Amount of verbosity." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.linear_model._coordinate_descent.LassoCV.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of CPUs to use during the cross validation.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/__init__/positive", + "name": "positive", + "qname": "sklearn.linear_model._coordinate_descent.LassoCV.__init__.positive", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If positive, restrict regression coefficients to be positive." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._coordinate_descent.LassoCV.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "The seed of the pseudo random number generator that selects a random\nfeature to update. Used when ``selection`` == 'random'.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/__init__/selection", + "name": "selection", + "qname": "sklearn.linear_model._coordinate_descent.LassoCV.__init__.selection", + "default_value": "'cyclic'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'cyclic', 'random'}", + "default_value": "'cyclic'", + "description": "If set to 'random', a random coefficient is updated every iteration\nrather than looping over features sequentially by default. This\n(setting to 'random') often leads to significantly faster convergence\nespecially when tol is higher than 1e-4." + }, + "type": { + "kind": "EnumType", + "values": ["random", "cyclic"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Lasso linear model with iterative fitting along a regularization path.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe best model is selected by cross-validation.\n\nThe optimization objective for Lasso is::\n\n (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, eps=1e-3, n_alphas=100, alphas=None,\n fit_intercept=True,\n normalize=False, precompute='auto', max_iter=1000, tol=1e-4,\n copy_X=True, cv=None, verbose=False, n_jobs=None,\n positive=False, random_state=None, selection='cyclic'):\n super().__init__(\n eps=eps, n_alphas=n_alphas, alphas=alphas,\n fit_intercept=fit_intercept, normalize=normalize,\n precompute=precompute, max_iter=max_iter, tol=tol, copy_X=copy_X,\n cv=cv, verbose=verbose, n_jobs=n_jobs, positive=positive,\n random_state=random_state, selection=selection)" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/_get_estimator", + "name": "_get_estimator", + "qname": "sklearn.linear_model._coordinate_descent.LassoCV._get_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/_get_estimator/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.LassoCV._get_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_estimator(self):\n return Lasso()" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/_is_multitask", + "name": "_is_multitask", + "qname": "sklearn.linear_model._coordinate_descent.LassoCV._is_multitask", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/_is_multitask/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.LassoCV._is_multitask.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _is_multitask(self):\n return False" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/_more_tags", + "name": "_more_tags", + "qname": "sklearn.linear_model._coordinate_descent.LassoCV._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LassoCV/_more_tags/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.LassoCV._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'multioutput': False}" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV.__init__", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/__init__/eps", + "name": "eps", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV.__init__.eps", + "default_value": "0.001", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/__init__/n_alphas", + "name": "n_alphas", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV.__init__.n_alphas", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/__init__/alphas", + "name": "alphas", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV.__init__.alphas", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV.__init__.normalize", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/__init__/precompute", + "name": "precompute", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV.__init__.precompute", + "default_value": "'auto'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV.__init__.max_iter", + "default_value": "1000", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV.__init__.tol", + "default_value": "0.0001", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/__init__/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV.__init__.copy_X", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/__init__/cv", + "name": "cv", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV.__init__.cv", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV.__init__.verbose", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV.__init__.n_jobs", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/__init__/positive", + "name": "positive", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV.__init__.positive", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV.__init__.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/__init__/selection", + "name": "selection", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV.__init__.selection", + "default_value": "'cyclic'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base class for iterative model fitting along a regularization path.", + "docstring": "", + "code": " @abstractmethod\n def __init__(self, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True,\n normalize=False, precompute='auto', max_iter=1000, tol=1e-4,\n copy_X=True, cv=None, verbose=False, n_jobs=None,\n positive=False, random_state=None, selection='cyclic'):\n self.eps = eps\n self.n_alphas = n_alphas\n self.alphas = alphas\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.precompute = precompute\n self.max_iter = max_iter\n self.tol = tol\n self.copy_X = copy_X\n self.cv = cv\n self.verbose = verbose\n self.n_jobs = n_jobs\n self.positive = positive\n self.random_state = random_state\n self.selection = selection" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/_get_estimator", + "name": "_get_estimator", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV._get_estimator", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/_get_estimator/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV._get_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Model to be fitted after the best alpha has been determined.", + "docstring": "Model to be fitted after the best alpha has been determined.", + "code": " @abstractmethod\n def _get_estimator(self):\n \"\"\"Model to be fitted after the best alpha has been determined.\"\"\"" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/_is_multitask", + "name": "_is_multitask", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV._is_multitask", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/_is_multitask/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV._is_multitask.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Bool indicating if class is meant for multidimensional target.", + "docstring": "Bool indicating if class is meant for multidimensional target.", + "code": " @abstractmethod\n def _is_multitask(self):\n \"\"\"Bool indicating if class is meant for multidimensional target.\"\"\"" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/fit", + "name": "fit", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/fit/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/fit/X", + "name": "X", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data. Pass directly as Fortran-contiguous data\nto avoid unnecessary memory duplication. If y is mono-output,\nX can be sparse." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/LinearModelCV/fit/y", + "name": "y", + "qname": "sklearn.linear_model._coordinate_descent.LinearModelCV.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_targets)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit linear model with coordinate descent.\n\nFit is on grid of alphas and best alpha estimated by cross-validation.", + "docstring": "Fit linear model with coordinate descent.\n\nFit is on grid of alphas and best alpha estimated by cross-validation.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data. Pass directly as Fortran-contiguous data\n to avoid unnecessary memory duplication. If y is mono-output,\n X can be sparse.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.", + "code": " def fit(self, X, y):\n \"\"\"Fit linear model with coordinate descent.\n\n Fit is on grid of alphas and best alpha estimated by cross-validation.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data. Pass directly as Fortran-contiguous data\n to avoid unnecessary memory duplication. If y is mono-output,\n X can be sparse.\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n \"\"\"\n # This makes sure that there is no duplication in memory.\n # Dealing right with copy_X is important in the following:\n # Multiple functions touch X and subsamples of X and can induce a\n # lot of duplication of memory\n copy_X = self.copy_X and self.fit_intercept\n\n check_y_params = dict(copy=False, dtype=[np.float64, np.float32],\n ensure_2d=False)\n if isinstance(X, np.ndarray) or sparse.isspmatrix(X):\n # Keep a reference to X\n reference_to_old_X = X\n # Let us not impose fortran ordering so far: it is\n # not useful for the cross-validation loop and will be done\n # by the model fitting itself\n\n # Need to validate separately here.\n # We can't pass multi_ouput=True because that would allow y to be\n # csr. We also want to allow y to be 64 or 32 but check_X_y only\n # allows to convert for 64.\n check_X_params = dict(accept_sparse='csc',\n dtype=[np.float64, np.float32], copy=False)\n X, y = self._validate_data(X, y,\n validate_separately=(check_X_params,\n check_y_params))\n if sparse.isspmatrix(X):\n if (hasattr(reference_to_old_X, \"data\") and\n not np.may_share_memory(reference_to_old_X.data, X.data)):\n # X is a sparse matrix and has been copied\n copy_X = False\n elif not np.may_share_memory(reference_to_old_X, X):\n # X has been copied\n copy_X = False\n del reference_to_old_X\n else:\n # Need to validate separately here.\n # We can't pass multi_ouput=True because that would allow y to be\n # csr. We also want to allow y to be 64 or 32 but check_X_y only\n # allows to convert for 64.\n check_X_params = dict(accept_sparse='csc',\n dtype=[np.float64, np.float32], order='F',\n copy=copy_X)\n X, y = self._validate_data(X, y,\n validate_separately=(check_X_params,\n check_y_params))\n copy_X = False\n\n if y.shape[0] == 0:\n raise ValueError(\"y has 0 samples: %r\" % y)\n\n if not self._is_multitask():\n if y.ndim > 1 and y.shape[1] > 1:\n raise ValueError(\"For multi-task outputs, use \"\n \"MultiTask%s\" % self.__class__.__name__)\n y = column_or_1d(y, warn=True)\n else:\n if sparse.isspmatrix(X):\n raise TypeError(\"X should be dense but a sparse matrix was\"\n \"passed\")\n elif y.ndim == 1:\n raise ValueError(\"For mono-task outputs, use \"\n \"%sCV\" % self.__class__.__name__[9:])\n\n model = self._get_estimator()\n\n if self.selection not in [\"random\", \"cyclic\"]:\n raise ValueError(\"selection should be either random or cyclic.\")\n\n if X.shape[0] != y.shape[0]:\n raise ValueError(\"X and y have inconsistent dimensions (%d != %d)\"\n % (X.shape[0], y.shape[0]))\n\n # All LinearModelCV parameters except 'cv' are acceptable\n path_params = self.get_params()\n if 'l1_ratio' in path_params:\n l1_ratios = np.atleast_1d(path_params['l1_ratio'])\n # For the first path, we need to set l1_ratio\n path_params['l1_ratio'] = l1_ratios[0]\n else:\n l1_ratios = [1, ]\n path_params.pop('cv', None)\n path_params.pop('n_jobs', None)\n\n alphas = self.alphas\n n_l1_ratio = len(l1_ratios)\n if alphas is None:\n alphas = [_alpha_grid(X, y, l1_ratio=l1_ratio,\n fit_intercept=self.fit_intercept,\n eps=self.eps, n_alphas=self.n_alphas,\n normalize=self.normalize, copy_X=self.copy_X)\n for l1_ratio in l1_ratios]\n else:\n # Making sure alphas is properly ordered.\n alphas = np.tile(np.sort(alphas)[::-1], (n_l1_ratio, 1))\n # We want n_alphas to be the number of alphas used for each l1_ratio.\n n_alphas = len(alphas[0])\n path_params.update({'n_alphas': n_alphas})\n\n path_params['copy_X'] = copy_X\n # We are not computing in parallel, we can modify X\n # inplace in the folds\n if effective_n_jobs(self.n_jobs) > 1:\n path_params['copy_X'] = False\n\n # init cross-validation generator\n cv = check_cv(self.cv)\n\n # Compute path for all folds and compute MSE to get the best alpha\n folds = list(cv.split(X, y))\n best_mse = np.inf\n\n # We do a double for loop folded in one, in order to be able to\n # iterate in parallel on l1_ratio and folds\n jobs = (delayed(_path_residuals)(X, y, train, test, self.path,\n path_params, alphas=this_alphas,\n l1_ratio=this_l1_ratio, X_order='F',\n dtype=X.dtype.type)\n for this_l1_ratio, this_alphas in zip(l1_ratios, alphas)\n for train, test in folds)\n mse_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,\n **_joblib_parallel_args(prefer=\"threads\"))(jobs)\n mse_paths = np.reshape(mse_paths, (n_l1_ratio, len(folds), -1))\n mean_mse = np.mean(mse_paths, axis=1)\n self.mse_path_ = np.squeeze(np.rollaxis(mse_paths, 2, 1))\n for l1_ratio, l1_alphas, mse_alphas in zip(l1_ratios, alphas,\n mean_mse):\n i_best_alpha = np.argmin(mse_alphas)\n this_best_mse = mse_alphas[i_best_alpha]\n if this_best_mse < best_mse:\n best_alpha = l1_alphas[i_best_alpha]\n best_l1_ratio = l1_ratio\n best_mse = this_best_mse\n\n self.l1_ratio_ = best_l1_ratio\n self.alpha_ = best_alpha\n if self.alphas is None:\n self.alphas_ = np.asarray(alphas)\n if n_l1_ratio == 1:\n self.alphas_ = self.alphas_[0]\n # Remove duplicate alphas in case alphas is provided.\n else:\n self.alphas_ = np.asarray(alphas[0])\n\n # Refit the model with the parameters selected\n common_params = {name: value\n for name, value in self.get_params().items()\n if name in model.get_params()}\n model.set_params(**common_params)\n model.alpha = best_alpha\n model.l1_ratio = best_l1_ratio\n model.copy_X = copy_X\n precompute = getattr(self, \"precompute\", None)\n if isinstance(precompute, str) and precompute == \"auto\":\n model.precompute = False\n model.fit(X, y)\n if not hasattr(self, 'l1_ratio'):\n del self.l1_ratio_\n self.coef_ = model.coef_\n self.intercept_ = model.intercept_\n self.dual_gap_ = model.dual_gap_\n self.n_iter_ = model.n_iter_\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNet.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNet.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet/__init__/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNet.__init__.alpha", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Constant that multiplies the L1/L2 term. Defaults to 1.0." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet/__init__/l1_ratio", + "name": "l1_ratio", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNet.__init__.l1_ratio", + "default_value": "0.5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.5", + "description": "The ElasticNet mixing parameter, with 0 < l1_ratio <= 1.\nFor l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it\nis an L2 penalty.\nFor ``0 < l1_ratio < 1``, the penalty is a combination of L1/L2 and L2." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNet.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to calculate the intercept for this model. If set\nto false, no intercept will be used in calculations\n(i.e. data is expected to be centered)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNet.__init__.normalize", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet/__init__/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNet.__init__.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``True``, X will be copied; else, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNet.__init__.max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "The maximum number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNet.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "The tolerance for the optimization: if the updates are\nsmaller than ``tol``, the optimization code checks the\ndual gap for optimality and continues until it is smaller\nthan ``tol``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNet.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, reuse the solution of the previous call to fit as\ninitialization, otherwise, just erase the previous solution.\nSee :term:`the Glossary `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNet.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "The seed of the pseudo random number generator that selects a random\nfeature to update. Used when ``selection`` == 'random'.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet/__init__/selection", + "name": "selection", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNet.__init__.selection", + "default_value": "'cyclic'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'cyclic', 'random'}", + "default_value": "'cyclic'", + "description": "If set to 'random', a random coefficient is updated every iteration\nrather than looping over features sequentially by default. This\n(setting to 'random') often leads to significantly faster convergence\nespecially when tol is higher than 1e-4." + }, + "type": { + "kind": "EnumType", + "values": ["random", "cyclic"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Multi-task ElasticNet model trained with L1/L2 mixed-norm as\nregularizer.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n (1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n + alpha * l1_ratio * ||W||_21\n + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n ||W||_21 = sum_i sqrt(sum_j W_ij ^ 2)\n\ni.e. the sum of norms of each row.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, alpha=1.0, *, l1_ratio=0.5, fit_intercept=True,\n normalize=False, copy_X=True, max_iter=1000, tol=1e-4,\n warm_start=False, random_state=None, selection='cyclic'):\n self.l1_ratio = l1_ratio\n self.alpha = alpha\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.max_iter = max_iter\n self.copy_X = copy_X\n self.tol = tol\n self.warm_start = warm_start\n self.random_state = random_state\n self.selection = selection" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet/_more_tags", + "name": "_more_tags", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNet._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet/_more_tags/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNet._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'multioutput_only': True}" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet/fit", + "name": "fit", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNet.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet/fit/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNet.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet/fit/X", + "name": "X", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNet.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Data." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNet/fit/y", + "name": "y", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNet.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_tasks)", + "default_value": "", + "description": "Target. Will be cast to X's dtype if necessary." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_tasks)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit MultiTaskElasticNet model with coordinate descent", + "docstring": "Fit MultiTaskElasticNet model with coordinate descent\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Data.\ny : ndarray of shape (n_samples, n_tasks)\n Target. Will be cast to X's dtype if necessary.\n\nNotes\n-----\n\nCoordinate descent is an algorithm that considers each column of\ndata at a time hence it will automatically convert the X input\nas a Fortran-contiguous numpy array if necessary.\n\nTo avoid memory re-allocation it is advised to allocate the\ninitial data in memory directly using that format.", + "code": " def fit(self, X, y):\n \"\"\"Fit MultiTaskElasticNet model with coordinate descent\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Data.\n y : ndarray of shape (n_samples, n_tasks)\n Target. Will be cast to X's dtype if necessary.\n\n Notes\n -----\n\n Coordinate descent is an algorithm that considers each column of\n data at a time hence it will automatically convert the X input\n as a Fortran-contiguous numpy array if necessary.\n\n To avoid memory re-allocation it is advised to allocate the\n initial data in memory directly using that format.\n \"\"\"\n # Need to validate separately here.\n # We can't pass multi_ouput=True because that would allow y to be csr.\n check_X_params = dict(dtype=[np.float64, np.float32], order='F',\n copy=self.copy_X and self.fit_intercept)\n check_y_params = dict(ensure_2d=False, order='F')\n X, y = self._validate_data(X, y, validate_separately=(check_X_params,\n check_y_params))\n y = y.astype(X.dtype)\n\n if hasattr(self, 'l1_ratio'):\n model_str = 'ElasticNet'\n else:\n model_str = 'Lasso'\n if y.ndim == 1:\n raise ValueError(\"For mono-task outputs, use %s\" % model_str)\n\n n_samples, n_features = X.shape\n _, n_tasks = y.shape\n\n if n_samples != y.shape[0]:\n raise ValueError(\"X and y have inconsistent dimensions (%d != %d)\"\n % (n_samples, y.shape[0]))\n\n X, y, X_offset, y_offset, X_scale = _preprocess_data(\n X, y, self.fit_intercept, self.normalize, copy=False)\n\n if not self.warm_start or not hasattr(self, \"coef_\"):\n self.coef_ = np.zeros((n_tasks, n_features), dtype=X.dtype.type,\n order='F')\n\n l1_reg = self.alpha * self.l1_ratio * n_samples\n l2_reg = self.alpha * (1.0 - self.l1_ratio) * n_samples\n\n self.coef_ = np.asfortranarray(self.coef_) # coef contiguous in memory\n\n if self.selection not in ['random', 'cyclic']:\n raise ValueError(\"selection should be either random or cyclic.\")\n random = (self.selection == 'random')\n\n self.coef_, self.dual_gap_, self.eps_, self.n_iter_ = \\\n cd_fast.enet_coordinate_descent_multi_task(\n self.coef_, l1_reg, l2_reg, X, y, self.max_iter, self.tol,\n check_random_state(self.random_state), random)\n\n self._set_intercept(X_offset, y_offset, X_scale)\n\n # return self for chaining fit and predict calls\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNetCV.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNetCV.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/__init__/l1_ratio", + "name": "l1_ratio", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNetCV.__init__.l1_ratio", + "default_value": "0.5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float or list of float", + "default_value": "0.5", + "description": "The ElasticNet mixing parameter, with 0 < l1_ratio <= 1.\nFor l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it\nis an L2 penalty.\nFor ``0 < l1_ratio < 1``, the penalty is a combination of L1/L2 and L2.\nThis parameter can be a list, in which case the different\nvalues are tested by cross-validation and the one giving the best\nprediction score is used. Note that a good choice of list of\nvalues for l1_ratio is often to put more values close to 1\n(i.e. Lasso) and less close to 0 (i.e. Ridge), as in ``[.1, .5, .7,\n.9, .95, .99, 1]``" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "list of float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/__init__/eps", + "name": "eps", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNetCV.__init__.eps", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "Length of the path. ``eps=1e-3`` means that\n``alpha_min / alpha_max = 1e-3``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/__init__/n_alphas", + "name": "n_alphas", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNetCV.__init__.n_alphas", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Number of alphas along the regularization path." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/__init__/alphas", + "name": "alphas", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNetCV.__init__.alphas", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "None", + "description": "List of alphas where to compute the models.\nIf not provided, set automatically." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNetCV.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to calculate the intercept for this model. If set\nto false, no intercept will be used in calculations\n(i.e. data is expected to be centered)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNetCV.__init__.normalize", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNetCV.__init__.max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "The maximum number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNetCV.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "The tolerance for the optimization: if the updates are\nsmaller than ``tol``, the optimization code checks the\ndual gap for optimality and continues until it is smaller\nthan ``tol``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/__init__/cv", + "name": "cv", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNetCV.__init__.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, cross-validation generator or iterable", + "default_value": "None", + "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- int, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, :class:`KFold` is used.\n\nRefer :ref:`User Guide ` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/__init__/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNetCV.__init__.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``True``, X will be copied; else, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNetCV.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or int", + "default_value": "0", + "description": "Amount of verbosity." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNetCV.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of CPUs to use during the cross validation. Note that this is\nused only if multiple values for l1_ratio are given.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNetCV.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "The seed of the pseudo random number generator that selects a random\nfeature to update. Used when ``selection`` == 'random'.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/__init__/selection", + "name": "selection", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNetCV.__init__.selection", + "default_value": "'cyclic'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'cyclic', 'random'}", + "default_value": "'cyclic'", + "description": "If set to 'random', a random coefficient is updated every iteration\nrather than looping over features sequentially by default. This\n(setting to 'random') often leads to significantly faster convergence\nespecially when tol is higher than 1e-4." + }, + "type": { + "kind": "EnumType", + "values": ["random", "cyclic"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Multi-task L1/L2 ElasticNet with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^Fro_2\n + alpha * l1_ratio * ||W||_21\n + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.15", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,\n fit_intercept=True, normalize=False,\n max_iter=1000, tol=1e-4, cv=None, copy_X=True,\n verbose=0, n_jobs=None, random_state=None,\n selection='cyclic'):\n self.l1_ratio = l1_ratio\n self.eps = eps\n self.n_alphas = n_alphas\n self.alphas = alphas\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.max_iter = max_iter\n self.tol = tol\n self.cv = cv\n self.copy_X = copy_X\n self.verbose = verbose\n self.n_jobs = n_jobs\n self.random_state = random_state\n self.selection = selection" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/_get_estimator", + "name": "_get_estimator", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNetCV._get_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/_get_estimator/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNetCV._get_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_estimator(self):\n return MultiTaskElasticNet()" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/_is_multitask", + "name": "_is_multitask", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNetCV._is_multitask", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/_is_multitask/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNetCV._is_multitask.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _is_multitask(self):\n return True" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/_more_tags", + "name": "_more_tags", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNetCV._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskElasticNetCV/_more_tags/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskElasticNetCV._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'multioutput_only': True}" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLasso/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLasso.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLasso/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLasso.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLasso/__init__/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLasso.__init__.alpha", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Constant that multiplies the L1/L2 term. Defaults to 1.0." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLasso/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLasso.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to calculate the intercept for this model. If set\nto false, no intercept will be used in calculations\n(i.e. data is expected to be centered)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLasso/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLasso.__init__.normalize", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLasso/__init__/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLasso.__init__.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``True``, X will be copied; else, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLasso/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLasso.__init__.max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "The maximum number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLasso/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLasso.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "The tolerance for the optimization: if the updates are\nsmaller than ``tol``, the optimization code checks the\ndual gap for optimality and continues until it is smaller\nthan ``tol``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLasso/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLasso.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, reuse the solution of the previous call to fit as\ninitialization, otherwise, just erase the previous solution.\nSee :term:`the Glossary `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLasso/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLasso.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "The seed of the pseudo random number generator that selects a random\nfeature to update. Used when ``selection`` == 'random'.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLasso/__init__/selection", + "name": "selection", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLasso.__init__.selection", + "default_value": "'cyclic'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'cyclic', 'random'}", + "default_value": "'cyclic'", + "description": "If set to 'random', a random coefficient is updated every iteration\nrather than looping over features sequentially by default. This\n(setting to 'random') often leads to significantly faster convergence\nespecially when tol is higher than 1e-4" + }, + "type": { + "kind": "EnumType", + "values": ["random", "cyclic"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nThe optimization objective for Lasso is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\nWhere::\n\n ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, alpha=1.0, *, fit_intercept=True, normalize=False,\n copy_X=True, max_iter=1000, tol=1e-4, warm_start=False,\n random_state=None, selection='cyclic'):\n self.alpha = alpha\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.max_iter = max_iter\n self.copy_X = copy_X\n self.tol = tol\n self.warm_start = warm_start\n self.l1_ratio = 1.0\n self.random_state = random_state\n self.selection = selection" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLassoCV.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLassoCV.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/__init__/eps", + "name": "eps", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLassoCV.__init__.eps", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "Length of the path. ``eps=1e-3`` means that\n``alpha_min / alpha_max = 1e-3``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/__init__/n_alphas", + "name": "n_alphas", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLassoCV.__init__.n_alphas", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Number of alphas along the regularization path." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/__init__/alphas", + "name": "alphas", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLassoCV.__init__.alphas", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "None", + "description": "List of alphas where to compute the models.\nIf not provided, set automatically." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLassoCV.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to calculate the intercept for this model. If set\nto false, no intercept will be used in calculations\n(i.e. data is expected to be centered)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLassoCV.__init__.normalize", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLassoCV.__init__.max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "The maximum number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLassoCV.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "The tolerance for the optimization: if the updates are\nsmaller than ``tol``, the optimization code checks the\ndual gap for optimality and continues until it is smaller\nthan ``tol``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/__init__/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLassoCV.__init__.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``True``, X will be copied; else, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/__init__/cv", + "name": "cv", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLassoCV.__init__.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, cross-validation generator or iterable", + "default_value": "None", + "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- int, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, :class:`KFold` is used.\n\nRefer :ref:`User Guide ` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLassoCV.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or int", + "default_value": "False", + "description": "Amount of verbosity." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLassoCV.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of CPUs to use during the cross validation. Note that this is\nused only if multiple values for l1_ratio are given.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLassoCV.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "The seed of the pseudo random number generator that selects a random\nfeature to update. Used when ``selection`` == 'random'.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/__init__/selection", + "name": "selection", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLassoCV.__init__.selection", + "default_value": "'cyclic'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'cyclic', 'random'}", + "default_value": "'cyclic'", + "description": "If set to 'random', a random coefficient is updated every iteration\nrather than looping over features sequentially by default. This\n(setting to 'random') often leads to significantly faster convergence\nespecially when tol is higher than 1e-4." + }, + "type": { + "kind": "EnumType", + "values": ["random", "cyclic"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskLasso is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^Fro_2 + alpha * ||W||_21\n\nWhere::\n\n ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.15", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, eps=1e-3, n_alphas=100, alphas=None,\n fit_intercept=True,\n normalize=False, max_iter=1000, tol=1e-4, copy_X=True,\n cv=None, verbose=False, n_jobs=None, random_state=None,\n selection='cyclic'):\n super().__init__(\n eps=eps, n_alphas=n_alphas, alphas=alphas,\n fit_intercept=fit_intercept, normalize=normalize,\n max_iter=max_iter, tol=tol, copy_X=copy_X,\n cv=cv, verbose=verbose, n_jobs=n_jobs, random_state=random_state,\n selection=selection)" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/_get_estimator", + "name": "_get_estimator", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLassoCV._get_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/_get_estimator/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLassoCV._get_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_estimator(self):\n return MultiTaskLasso()" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/_is_multitask", + "name": "_is_multitask", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLassoCV._is_multitask", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/_is_multitask/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLassoCV._is_multitask.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _is_multitask(self):\n return True" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/_more_tags", + "name": "_more_tags", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLassoCV._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/MultiTaskLassoCV/_more_tags/self", + "name": "self", + "qname": "sklearn.linear_model._coordinate_descent.MultiTaskLassoCV._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'multioutput_only': True}" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_alpha_grid", + "name": "_alpha_grid", + "qname": "sklearn.linear_model._coordinate_descent._alpha_grid", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_alpha_grid/X", + "name": "X", + "qname": "sklearn.linear_model._coordinate_descent._alpha_grid.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data. Pass directly as Fortran-contiguous data to avoid\nunnecessary memory duplication" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_alpha_grid/y", + "name": "y", + "qname": "sklearn.linear_model._coordinate_descent._alpha_grid.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Target values" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_alpha_grid/Xy", + "name": "Xy", + "qname": "sklearn.linear_model._coordinate_descent._alpha_grid.Xy", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_features,)", + "default_value": "None", + "description": "Xy = np.dot(X.T, y) that can be precomputed." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_features,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_alpha_grid/l1_ratio", + "name": "l1_ratio", + "qname": "sklearn.linear_model._coordinate_descent._alpha_grid.l1_ratio", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "The elastic net mixing parameter, with ``0 < l1_ratio <= 1``.\nFor ``l1_ratio = 0`` the penalty is an L2 penalty. (currently not\nsupported) ``For l1_ratio = 1`` it is an L1 penalty. For\n``0 < l1_ratio <1``, the penalty is a combination of L1 and L2." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_alpha_grid/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._coordinate_descent._alpha_grid.fit_intercept", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to fit an intercept or not" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_alpha_grid/eps", + "name": "eps", + "qname": "sklearn.linear_model._coordinate_descent._alpha_grid.eps", + "default_value": "0.001", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "Length of the path. ``eps=1e-3`` means that\n``alpha_min / alpha_max = 1e-3``" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_alpha_grid/n_alphas", + "name": "n_alphas", + "qname": "sklearn.linear_model._coordinate_descent._alpha_grid.n_alphas", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Number of alphas along the regularization path" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_alpha_grid/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._coordinate_descent._alpha_grid.normalize", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_alpha_grid/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._coordinate_descent._alpha_grid.copy_X", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``True``, X will be copied; else, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the grid of alpha values for elastic net parameter search", + "docstring": "Compute the grid of alpha values for elastic net parameter search\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data. Pass directly as Fortran-contiguous data to avoid\n unnecessary memory duplication\n\ny : ndarray of shape (n_samples,)\n Target values\n\nXy : array-like of shape (n_features,), default=None\n Xy = np.dot(X.T, y) that can be precomputed.\n\nl1_ratio : float, default=1.0\n The elastic net mixing parameter, with ``0 < l1_ratio <= 1``.\n For ``l1_ratio = 0`` the penalty is an L2 penalty. (currently not\n supported) ``For l1_ratio = 1`` it is an L1 penalty. For\n ``0 < l1_ratio <1``, the penalty is a combination of L1 and L2.\n\neps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``\n\nn_alphas : int, default=100\n Number of alphas along the regularization path\n\nfit_intercept : bool, default=True\n Whether to fit an intercept or not\n\nnormalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.", + "code": "def _alpha_grid(X, y, Xy=None, l1_ratio=1.0, fit_intercept=True,\n eps=1e-3, n_alphas=100, normalize=False, copy_X=True):\n \"\"\" Compute the grid of alpha values for elastic net parameter search\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data. Pass directly as Fortran-contiguous data to avoid\n unnecessary memory duplication\n\n y : ndarray of shape (n_samples,)\n Target values\n\n Xy : array-like of shape (n_features,), default=None\n Xy = np.dot(X.T, y) that can be precomputed.\n\n l1_ratio : float, default=1.0\n The elastic net mixing parameter, with ``0 < l1_ratio <= 1``.\n For ``l1_ratio = 0`` the penalty is an L2 penalty. (currently not\n supported) ``For l1_ratio = 1`` it is an L1 penalty. For\n ``0 < l1_ratio <1``, the penalty is a combination of L1 and L2.\n\n eps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``\n\n n_alphas : int, default=100\n Number of alphas along the regularization path\n\n fit_intercept : bool, default=True\n Whether to fit an intercept or not\n\n normalize : bool, default=False\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n copy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n \"\"\"\n if l1_ratio == 0:\n raise ValueError(\"Automatic alpha grid generation is not supported for\"\n \" l1_ratio=0. Please supply a grid by providing \"\n \"your estimator with the appropriate `alphas=` \"\n \"argument.\")\n n_samples = len(y)\n\n sparse_center = False\n if Xy is None:\n X_sparse = sparse.isspmatrix(X)\n sparse_center = X_sparse and (fit_intercept or normalize)\n X = check_array(X, accept_sparse='csc',\n copy=(copy_X and fit_intercept and not X_sparse))\n if not X_sparse:\n # X can be touched inplace thanks to the above line\n X, y, _, _, _ = _preprocess_data(X, y, fit_intercept,\n normalize, copy=False)\n Xy = safe_sparse_dot(X.T, y, dense_output=True)\n\n if sparse_center:\n # Workaround to find alpha_max for sparse matrices.\n # since we should not destroy the sparsity of such matrices.\n _, _, X_offset, _, X_scale = _preprocess_data(X, y, fit_intercept,\n normalize,\n return_mean=True)\n mean_dot = X_offset * np.sum(y)\n\n if Xy.ndim == 1:\n Xy = Xy[:, np.newaxis]\n\n if sparse_center:\n if fit_intercept:\n Xy -= mean_dot[:, np.newaxis]\n if normalize:\n Xy /= X_scale[:, np.newaxis]\n\n alpha_max = (np.sqrt(np.sum(Xy ** 2, axis=1)).max() /\n (n_samples * l1_ratio))\n\n if alpha_max <= np.finfo(float).resolution:\n alphas = np.empty(n_alphas)\n alphas.fill(np.finfo(float).resolution)\n return alphas\n\n return np.logspace(np.log10(alpha_max * eps), np.log10(alpha_max),\n num=n_alphas)[::-1]" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_path_residuals", + "name": "_path_residuals", + "qname": "sklearn.linear_model._coordinate_descent._path_residuals", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_path_residuals/X", + "name": "X", + "qname": "sklearn.linear_model._coordinate_descent._path_residuals.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_path_residuals/y", + "name": "y", + "qname": "sklearn.linear_model._coordinate_descent._path_residuals.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_targets)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_path_residuals/train", + "name": "train", + "qname": "sklearn.linear_model._coordinate_descent._path_residuals.train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list of indices", + "default_value": "", + "description": "The indices of the train set." + }, + "type": { + "kind": "NamedType", + "name": "list of indices" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_path_residuals/test", + "name": "test", + "qname": "sklearn.linear_model._coordinate_descent._path_residuals.test", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list of indices", + "default_value": "", + "description": "The indices of the test set." + }, + "type": { + "kind": "NamedType", + "name": "list of indices" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_path_residuals/path", + "name": "path", + "qname": "sklearn.linear_model._coordinate_descent._path_residuals.path", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "", + "description": "Function returning a list of models on the path. See\nenet_path for an example of signature." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_path_residuals/path_params", + "name": "path_params", + "qname": "sklearn.linear_model._coordinate_descent._path_residuals.path_params", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dictionary", + "default_value": "", + "description": "Parameters passed to the path function." + }, + "type": { + "kind": "NamedType", + "name": "dictionary" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_path_residuals/alphas", + "name": "alphas", + "qname": "sklearn.linear_model._coordinate_descent._path_residuals.alphas", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "None", + "description": "Array of float that is used for cross-validation. If not\nprovided, computed using 'path'." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_path_residuals/l1_ratio", + "name": "l1_ratio", + "qname": "sklearn.linear_model._coordinate_descent._path_residuals.l1_ratio", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1", + "description": "float between 0 and 1 passed to ElasticNet (scaling between\nl1 and l2 penalties). For ``l1_ratio = 0`` the penalty is an\nL2 penalty. For ``l1_ratio = 1`` it is an L1 penalty. For ``0\n< l1_ratio < 1``, the penalty is a combination of L1 and L2." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_path_residuals/X_order", + "name": "X_order", + "qname": "sklearn.linear_model._coordinate_descent._path_residuals.X_order", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'F', 'C'}", + "default_value": "None", + "description": "The order of the arrays expected by the path function to\navoid memory copies." + }, + "type": { + "kind": "EnumType", + "values": ["F", "C"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_path_residuals/dtype", + "name": "dtype", + "qname": "sklearn.linear_model._coordinate_descent._path_residuals.dtype", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "a numpy dtype", + "default_value": "None", + "description": "The dtype of the arrays expected by the path function to\navoid memory copies." + }, + "type": { + "kind": "NamedType", + "name": "a numpy dtype" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns the MSE for the models computed by 'path'.", + "docstring": "Returns the MSE for the models computed by 'path'.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\ntrain : list of indices\n The indices of the train set.\n\ntest : list of indices\n The indices of the test set.\n\npath : callable\n Function returning a list of models on the path. See\n enet_path for an example of signature.\n\npath_params : dictionary\n Parameters passed to the path function.\n\nalphas : array-like, default=None\n Array of float that is used for cross-validation. If not\n provided, computed using 'path'.\n\nl1_ratio : float, default=1\n float between 0 and 1 passed to ElasticNet (scaling between\n l1 and l2 penalties). For ``l1_ratio = 0`` the penalty is an\n L2 penalty. For ``l1_ratio = 1`` it is an L1 penalty. For ``0\n < l1_ratio < 1``, the penalty is a combination of L1 and L2.\n\nX_order : {'F', 'C'}, default=None\n The order of the arrays expected by the path function to\n avoid memory copies.\n\ndtype : a numpy dtype, default=None\n The dtype of the arrays expected by the path function to\n avoid memory copies.", + "code": "def _path_residuals(X, y, train, test, path, path_params, alphas=None,\n l1_ratio=1, X_order=None, dtype=None):\n \"\"\"Returns the MSE for the models computed by 'path'.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\n train : list of indices\n The indices of the train set.\n\n test : list of indices\n The indices of the test set.\n\n path : callable\n Function returning a list of models on the path. See\n enet_path for an example of signature.\n\n path_params : dictionary\n Parameters passed to the path function.\n\n alphas : array-like, default=None\n Array of float that is used for cross-validation. If not\n provided, computed using 'path'.\n\n l1_ratio : float, default=1\n float between 0 and 1 passed to ElasticNet (scaling between\n l1 and l2 penalties). For ``l1_ratio = 0`` the penalty is an\n L2 penalty. For ``l1_ratio = 1`` it is an L1 penalty. For ``0\n < l1_ratio < 1``, the penalty is a combination of L1 and L2.\n\n X_order : {'F', 'C'}, default=None\n The order of the arrays expected by the path function to\n avoid memory copies.\n\n dtype : a numpy dtype, default=None\n The dtype of the arrays expected by the path function to\n avoid memory copies.\n \"\"\"\n X_train = X[train]\n y_train = y[train]\n X_test = X[test]\n y_test = y[test]\n\n if not sparse.issparse(X):\n for array, array_input in ((X_train, X), (y_train, y),\n (X_test, X), (y_test, y)):\n if array.base is not array_input and not array.flags['WRITEABLE']:\n # fancy indexing should create a writable copy but it doesn't\n # for read-only memmaps (cf. numpy#14132).\n array.setflags(write=True)\n\n fit_intercept = path_params['fit_intercept']\n normalize = path_params['normalize']\n\n if y.ndim == 1:\n precompute = path_params['precompute']\n else:\n # No Gram variant of multi-task exists right now.\n # Fall back to default enet_multitask\n precompute = False\n\n X_train, y_train, X_offset, y_offset, X_scale, precompute, Xy = \\\n _pre_fit(X_train, y_train, None, precompute, normalize, fit_intercept,\n copy=False)\n\n path_params = path_params.copy()\n path_params['Xy'] = Xy\n path_params['X_offset'] = X_offset\n path_params['X_scale'] = X_scale\n path_params['precompute'] = precompute\n path_params['copy_X'] = False\n path_params['alphas'] = alphas\n\n if 'l1_ratio' in path_params:\n path_params['l1_ratio'] = l1_ratio\n\n # Do the ordering and type casting here, as if it is done in the path,\n # X is copied and a reference is kept here\n X_train = check_array(X_train, accept_sparse='csc', dtype=dtype,\n order=X_order)\n alphas, coefs, _ = path(X_train, y_train, **path_params)\n del X_train, y_train\n\n if y.ndim == 1:\n # Doing this so that it becomes coherent with multioutput.\n coefs = coefs[np.newaxis, :, :]\n y_offset = np.atleast_1d(y_offset)\n y_test = y_test[:, np.newaxis]\n\n if normalize:\n nonzeros = np.flatnonzero(X_scale)\n coefs[:, nonzeros] /= X_scale[nonzeros][:, np.newaxis]\n\n intercepts = y_offset[:, np.newaxis] - np.dot(X_offset, coefs)\n X_test_coefs = safe_sparse_dot(X_test, coefs)\n residues = X_test_coefs - y_test[:, :, np.newaxis]\n residues += intercepts\n this_mses = ((residues ** 2).mean(axis=0)).mean(axis=0)\n\n return this_mses" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_set_order", + "name": "_set_order", + "qname": "sklearn.linear_model._coordinate_descent._set_order", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_set_order/X", + "name": "X", + "qname": "sklearn.linear_model._coordinate_descent._set_order.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_set_order/y", + "name": "y", + "qname": "sklearn.linear_model._coordinate_descent._set_order.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/_set_order/order", + "name": "order", + "qname": "sklearn.linear_model._coordinate_descent._set_order.order", + "default_value": "'C'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{None, 'C', 'F'}", + "default_value": "", + "description": "If 'C', dense arrays are returned as C-ordered, sparse matrices in csr\nformat. If 'F', dense arrays are return as F-ordered, sparse matrices\nin csc format." + }, + "type": { + "kind": "EnumType", + "values": ["F", "C"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Change the order of X and y if necessary.", + "docstring": "Change the order of X and y if necessary.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : ndarray of shape (n_samples,)\n Target values.\n\norder : {None, 'C', 'F'}\n If 'C', dense arrays are returned as C-ordered, sparse matrices in csr\n format. If 'F', dense arrays are return as F-ordered, sparse matrices\n in csc format.\n\nReturns\n-------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data with guaranteed order.\n\ny : ndarray of shape (n_samples,)\n Target values with guaranteed order.", + "code": "def _set_order(X, y, order='C'):\n \"\"\"Change the order of X and y if necessary.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\n y : ndarray of shape (n_samples,)\n Target values.\n\n order : {None, 'C', 'F'}\n If 'C', dense arrays are returned as C-ordered, sparse matrices in csr\n format. If 'F', dense arrays are return as F-ordered, sparse matrices\n in csc format.\n\n Returns\n -------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data with guaranteed order.\n\n y : ndarray of shape (n_samples,)\n Target values with guaranteed order.\n \"\"\"\n if order not in [None, 'C', 'F']:\n raise ValueError(\"Unknown value for order. Got {} instead of \"\n \"None, 'C' or 'F'.\".format(order))\n sparse_X = sparse.issparse(X)\n sparse_y = sparse.issparse(y)\n if order is not None:\n sparse_format = \"csc\" if order == \"F\" else \"csr\"\n if sparse_X:\n # As of scipy 1.1.0, new argument copy=False by default.\n # This is what we want.\n X = X.asformat(sparse_format, **_astype_copy_false(X))\n else:\n X = np.asarray(X, order=order)\n if sparse_y:\n y = y.asformat(sparse_format)\n else:\n y = np.asarray(y, order=order)\n return X, y" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/enet_path", + "name": "enet_path", + "qname": "sklearn.linear_model._coordinate_descent.enet_path", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/enet_path/X", + "name": "X", + "qname": "sklearn.linear_model._coordinate_descent.enet_path.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data. Pass directly as Fortran-contiguous data to avoid\nunnecessary memory duplication. If ``y`` is mono-output then ``X``\ncan be sparse." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/enet_path/y", + "name": "y", + "qname": "sklearn.linear_model._coordinate_descent.enet_path.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples,) or (n_samples, n_outputs)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/enet_path/l1_ratio", + "name": "l1_ratio", + "qname": "sklearn.linear_model._coordinate_descent.enet_path.l1_ratio", + "default_value": "0.5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.5", + "description": "Number between 0 and 1 passed to elastic net (scaling between\nl1 and l2 penalties). ``l1_ratio=1`` corresponds to the Lasso." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/enet_path/eps", + "name": "eps", + "qname": "sklearn.linear_model._coordinate_descent.enet_path.eps", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "Length of the path. ``eps=1e-3`` means that\n``alpha_min / alpha_max = 1e-3``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/enet_path/n_alphas", + "name": "n_alphas", + "qname": "sklearn.linear_model._coordinate_descent.enet_path.n_alphas", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Number of alphas along the regularization path." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/enet_path/alphas", + "name": "alphas", + "qname": "sklearn.linear_model._coordinate_descent.enet_path.alphas", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "None", + "description": "List of alphas where to compute the models.\nIf None alphas are set automatically." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/enet_path/precompute", + "name": "precompute", + "qname": "sklearn.linear_model._coordinate_descent.enet_path.precompute", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'auto', bool or array-like of shape (n_features, n_features)", + "default_value": "'auto'", + "description": "Whether to use a precomputed Gram matrix to speed up\ncalculations. If set to ``'auto'`` let us decide. The Gram\nmatrix can also be passed as argument." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_features, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/enet_path/Xy", + "name": "Xy", + "qname": "sklearn.linear_model._coordinate_descent.enet_path.Xy", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_features,) or (n_features, n_outputs)", + "default_value": "None", + "description": "Xy = np.dot(X.T, y) that can be precomputed. It is useful\nonly when the Gram matrix is precomputed." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_features,) or (n_features, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/enet_path/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._coordinate_descent.enet_path.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``True``, X will be copied; else, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/enet_path/coef_init", + "name": "coef_init", + "qname": "sklearn.linear_model._coordinate_descent.enet_path.coef_init", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features, )", + "default_value": "None", + "description": "The initial values of the coefficients." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features, )" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/enet_path/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._coordinate_descent.enet_path.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or int", + "default_value": "False", + "description": "Amount of verbosity." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/enet_path/return_n_iter", + "name": "return_n_iter", + "qname": "sklearn.linear_model._coordinate_descent.enet_path.return_n_iter", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to return the number of iterations or not." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/enet_path/positive", + "name": "positive", + "qname": "sklearn.linear_model._coordinate_descent.enet_path.positive", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If set to True, forces coefficients to be positive.\n(Only allowed when ``y.ndim == 1``)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/enet_path/check_input", + "name": "check_input", + "qname": "sklearn.linear_model._coordinate_descent.enet_path.check_input", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If set to False, the input validation checks are skipped (including the\nGram matrix when provided). It is assumed that they are handled\nby the caller." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/enet_path/params", + "name": "params", + "qname": "sklearn.linear_model._coordinate_descent.enet_path.params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "kwargs", + "default_value": "", + "description": "Keyword arguments passed to the coordinate descent solver." + }, + "type": { + "kind": "NamedType", + "name": "kwargs" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute elastic net path with coordinate descent.\n\nThe elastic net optimization function varies for mono and multi-outputs.\n\nFor mono-output tasks it is::\n\n 1 / (2 * n_samples) * ||y - Xw||^2_2\n + alpha * l1_ratio * ||w||_1\n + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nFor multi-output tasks it is::\n\n (1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n + alpha * l1_ratio * ||W||_21\n + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute elastic net path with coordinate descent.\n\nThe elastic net optimization function varies for mono and multi-outputs.\n\nFor mono-output tasks it is::\n\n 1 / (2 * n_samples) * ||y - Xw||^2_2\n + alpha * l1_ratio * ||w||_1\n + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nFor multi-output tasks it is::\n\n (1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n + alpha * l1_ratio * ||W||_21\n + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data. Pass directly as Fortran-contiguous data to avoid\n unnecessary memory duplication. If ``y`` is mono-output then ``X``\n can be sparse.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\nl1_ratio : float, default=0.5\n Number between 0 and 1 passed to elastic net (scaling between\n l1 and l2 penalties). ``l1_ratio=1`` corresponds to the Lasso.\n\neps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n Number of alphas along the regularization path.\n\nalphas : ndarray, default=None\n List of alphas where to compute the models.\n If None alphas are set automatically.\n\nprecompute : 'auto', bool or array-like of shape (n_features, n_features), default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\nXy : array-like of shape (n_features,) or (n_features, n_outputs), default=None\n Xy = np.dot(X.T, y) that can be precomputed. It is useful\n only when the Gram matrix is precomputed.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\ncoef_init : ndarray of shape (n_features, ), default=None\n The initial values of the coefficients.\n\nverbose : bool or int, default=False\n Amount of verbosity.\n\nreturn_n_iter : bool, default=False\n Whether to return the number of iterations or not.\n\npositive : bool, default=False\n If set to True, forces coefficients to be positive.\n (Only allowed when ``y.ndim == 1``).\n\ncheck_input : bool, default=True\n If set to False, the input validation checks are skipped (including the\n Gram matrix when provided). It is assumed that they are handled\n by the caller.\n\n**params : kwargs\n Keyword arguments passed to the coordinate descent solver.\n\nReturns\n-------\nalphas : ndarray of shape (n_alphas,)\n The alphas along the path where models are computed.\n\ncoefs : ndarray of shape (n_features, n_alphas) or (n_outputs, n_features, n_alphas)\n Coefficients along the path.\n\ndual_gaps : ndarray of shape (n_alphas,)\n The dual gaps at the end of the optimization for each alpha.\n\nn_iters : list of int\n The number of iterations taken by the coordinate descent optimizer to\n reach the specified tolerance for each alpha.\n (Is returned when ``return_n_iter`` is set to True).\n\nSee Also\n--------\nMultiTaskElasticNet\nMultiTaskElasticNetCV\nElasticNet\nElasticNetCV\n\nNotes\n-----\nFor an example, see\n:ref:`examples/linear_model/plot_lasso_coordinate_descent_path.py\n`.", + "code": "@_deprecate_positional_args\ndef enet_path(X, y, *, l1_ratio=0.5, eps=1e-3, n_alphas=100, alphas=None,\n precompute='auto', Xy=None, copy_X=True, coef_init=None,\n verbose=False, return_n_iter=False, positive=False,\n check_input=True, **params):\n \"\"\"\n Compute elastic net path with coordinate descent.\n\n The elastic net optimization function varies for mono and multi-outputs.\n\n For mono-output tasks it is::\n\n 1 / (2 * n_samples) * ||y - Xw||^2_2\n + alpha * l1_ratio * ||w||_1\n + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\n For multi-output tasks it is::\n\n (1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n + alpha * l1_ratio * ||W||_21\n + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\n Where::\n\n ||W||_21 = \\\\sum_i \\\\sqrt{\\\\sum_j w_{ij}^2}\n\n i.e. the sum of norm of each row.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data. Pass directly as Fortran-contiguous data to avoid\n unnecessary memory duplication. If ``y`` is mono-output then ``X``\n can be sparse.\n\n y : {array-like, sparse matrix} of shape (n_samples,) or \\\n (n_samples, n_outputs)\n Target values.\n\n l1_ratio : float, default=0.5\n Number between 0 and 1 passed to elastic net (scaling between\n l1 and l2 penalties). ``l1_ratio=1`` corresponds to the Lasso.\n\n eps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``.\n\n n_alphas : int, default=100\n Number of alphas along the regularization path.\n\n alphas : ndarray, default=None\n List of alphas where to compute the models.\n If None alphas are set automatically.\n\n precompute : 'auto', bool or array-like of shape (n_features, n_features),\\\n default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\n Xy : array-like of shape (n_features,) or (n_features, n_outputs),\\\n default=None\n Xy = np.dot(X.T, y) that can be precomputed. It is useful\n only when the Gram matrix is precomputed.\n\n copy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\n coef_init : ndarray of shape (n_features, ), default=None\n The initial values of the coefficients.\n\n verbose : bool or int, default=False\n Amount of verbosity.\n\n return_n_iter : bool, default=False\n Whether to return the number of iterations or not.\n\n positive : bool, default=False\n If set to True, forces coefficients to be positive.\n (Only allowed when ``y.ndim == 1``).\n\n check_input : bool, default=True\n If set to False, the input validation checks are skipped (including the\n Gram matrix when provided). It is assumed that they are handled\n by the caller.\n\n **params : kwargs\n Keyword arguments passed to the coordinate descent solver.\n\n Returns\n -------\n alphas : ndarray of shape (n_alphas,)\n The alphas along the path where models are computed.\n\n coefs : ndarray of shape (n_features, n_alphas) or \\\n (n_outputs, n_features, n_alphas)\n Coefficients along the path.\n\n dual_gaps : ndarray of shape (n_alphas,)\n The dual gaps at the end of the optimization for each alpha.\n\n n_iters : list of int\n The number of iterations taken by the coordinate descent optimizer to\n reach the specified tolerance for each alpha.\n (Is returned when ``return_n_iter`` is set to True).\n\n See Also\n --------\n MultiTaskElasticNet\n MultiTaskElasticNetCV\n ElasticNet\n ElasticNetCV\n\n Notes\n -----\n For an example, see\n :ref:`examples/linear_model/plot_lasso_coordinate_descent_path.py\n `.\n \"\"\"\n # We expect X and y to be already Fortran ordered when bypassing\n # checks\n if check_input:\n X = check_array(X, accept_sparse='csc', dtype=[np.float64, np.float32],\n order='F', copy=copy_X)\n y = check_array(y, accept_sparse='csc', dtype=X.dtype.type,\n order='F', copy=False, ensure_2d=False)\n if Xy is not None:\n # Xy should be a 1d contiguous array or a 2D C ordered array\n Xy = check_array(Xy, dtype=X.dtype.type, order='C', copy=False,\n ensure_2d=False)\n\n n_samples, n_features = X.shape\n\n multi_output = False\n if y.ndim != 1:\n multi_output = True\n _, n_outputs = y.shape\n\n if multi_output and positive:\n raise ValueError('positive=True is not allowed for multi-output'\n ' (y.ndim != 1)')\n\n # MultiTaskElasticNet does not support sparse matrices\n if not multi_output and sparse.isspmatrix(X):\n if 'X_offset' in params:\n # As sparse matrices are not actually centered we need this\n # to be passed to the CD solver.\n X_sparse_scaling = params['X_offset'] / params['X_scale']\n X_sparse_scaling = np.asarray(X_sparse_scaling, dtype=X.dtype)\n else:\n X_sparse_scaling = np.zeros(n_features, dtype=X.dtype)\n\n # X should be normalized and fit already if function is called\n # from ElasticNet.fit\n if check_input:\n X, y, X_offset, y_offset, X_scale, precompute, Xy = \\\n _pre_fit(X, y, Xy, precompute, normalize=False,\n fit_intercept=False, copy=False, check_input=check_input)\n if alphas is None:\n # No need to normalize of fit_intercept: it has been done\n # above\n alphas = _alpha_grid(X, y, Xy=Xy, l1_ratio=l1_ratio,\n fit_intercept=False, eps=eps, n_alphas=n_alphas,\n normalize=False, copy_X=False)\n else:\n alphas = np.sort(alphas)[::-1] # make sure alphas are properly ordered\n\n n_alphas = len(alphas)\n tol = params.get('tol', 1e-4)\n max_iter = params.get('max_iter', 1000)\n dual_gaps = np.empty(n_alphas)\n n_iters = []\n\n rng = check_random_state(params.get('random_state', None))\n selection = params.get('selection', 'cyclic')\n if selection not in ['random', 'cyclic']:\n raise ValueError(\"selection should be either random or cyclic.\")\n random = (selection == 'random')\n\n if not multi_output:\n coefs = np.empty((n_features, n_alphas), dtype=X.dtype)\n else:\n coefs = np.empty((n_outputs, n_features, n_alphas),\n dtype=X.dtype)\n\n if coef_init is None:\n coef_ = np.zeros(coefs.shape[:-1], dtype=X.dtype, order='F')\n else:\n coef_ = np.asfortranarray(coef_init, dtype=X.dtype)\n\n for i, alpha in enumerate(alphas):\n l1_reg = alpha * l1_ratio * n_samples\n l2_reg = alpha * (1.0 - l1_ratio) * n_samples\n if not multi_output and sparse.isspmatrix(X):\n model = cd_fast.sparse_enet_coordinate_descent(\n coef_, l1_reg, l2_reg, X.data, X.indices,\n X.indptr, y, X_sparse_scaling,\n max_iter, tol, rng, random, positive)\n elif multi_output:\n model = cd_fast.enet_coordinate_descent_multi_task(\n coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random)\n elif isinstance(precompute, np.ndarray):\n # We expect precompute to be already Fortran ordered when bypassing\n # checks\n if check_input:\n precompute = check_array(precompute, dtype=X.dtype.type,\n order='C')\n model = cd_fast.enet_coordinate_descent_gram(\n coef_, l1_reg, l2_reg, precompute, Xy, y, max_iter,\n tol, rng, random, positive)\n elif precompute is False:\n model = cd_fast.enet_coordinate_descent(\n coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random,\n positive)\n else:\n raise ValueError(\"Precompute should be one of True, False, \"\n \"'auto' or array-like. Got %r\" % precompute)\n coef_, dual_gap_, eps_, n_iter_ = model\n coefs[..., i] = coef_\n dual_gaps[i] = dual_gap_\n n_iters.append(n_iter_)\n\n if verbose:\n if verbose > 2:\n print(model)\n elif verbose > 1:\n print('Path: %03i out of %03i' % (i, n_alphas))\n else:\n sys.stderr.write('.')\n\n if return_n_iter:\n return alphas, coefs, dual_gaps, n_iters\n return alphas, coefs, dual_gaps" + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/lasso_path", + "name": "lasso_path", + "qname": "sklearn.linear_model._coordinate_descent.lasso_path", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/lasso_path/X", + "name": "X", + "qname": "sklearn.linear_model._coordinate_descent.lasso_path.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data. Pass directly as Fortran-contiguous data to avoid\nunnecessary memory duplication. If ``y`` is mono-output then ``X``\ncan be sparse." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/lasso_path/y", + "name": "y", + "qname": "sklearn.linear_model._coordinate_descent.lasso_path.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "Target values" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples,) or (n_samples, n_outputs)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/lasso_path/eps", + "name": "eps", + "qname": "sklearn.linear_model._coordinate_descent.lasso_path.eps", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "Length of the path. ``eps=1e-3`` means that\n``alpha_min / alpha_max = 1e-3``" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/lasso_path/n_alphas", + "name": "n_alphas", + "qname": "sklearn.linear_model._coordinate_descent.lasso_path.n_alphas", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Number of alphas along the regularization path" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/lasso_path/alphas", + "name": "alphas", + "qname": "sklearn.linear_model._coordinate_descent.lasso_path.alphas", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "None", + "description": "List of alphas where to compute the models.\nIf ``None`` alphas are set automatically" + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/lasso_path/precompute", + "name": "precompute", + "qname": "sklearn.linear_model._coordinate_descent.lasso_path.precompute", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'auto', bool or array-like of shape (n_features, n_features)", + "default_value": "'auto'", + "description": "Whether to use a precomputed Gram matrix to speed up\ncalculations. If set to ``'auto'`` let us decide. The Gram\nmatrix can also be passed as argument." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_features, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/lasso_path/Xy", + "name": "Xy", + "qname": "sklearn.linear_model._coordinate_descent.lasso_path.Xy", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_features,) or (n_features, n_outputs)", + "default_value": "None", + "description": "Xy = np.dot(X.T, y) that can be precomputed. It is useful\nonly when the Gram matrix is precomputed." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_features,) or (n_features, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/lasso_path/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._coordinate_descent.lasso_path.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``True``, X will be copied; else, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/lasso_path/coef_init", + "name": "coef_init", + "qname": "sklearn.linear_model._coordinate_descent.lasso_path.coef_init", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features, )", + "default_value": "None", + "description": "The initial values of the coefficients." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features, )" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/lasso_path/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._coordinate_descent.lasso_path.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or int", + "default_value": "False", + "description": "Amount of verbosity." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/lasso_path/return_n_iter", + "name": "return_n_iter", + "qname": "sklearn.linear_model._coordinate_descent.lasso_path.return_n_iter", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "whether to return the number of iterations or not." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/lasso_path/positive", + "name": "positive", + "qname": "sklearn.linear_model._coordinate_descent.lasso_path.positive", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If set to True, forces coefficients to be positive.\n(Only allowed when ``y.ndim == 1``)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._coordinate_descent/lasso_path/params", + "name": "params", + "qname": "sklearn.linear_model._coordinate_descent.lasso_path.params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "kwargs", + "default_value": "", + "description": "keyword arguments passed to the coordinate descent solver." + }, + "type": { + "kind": "NamedType", + "name": "kwargs" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute Lasso path with coordinate descent\n\nThe Lasso optimization function varies for mono and multi-outputs.\n\nFor mono-output tasks it is::\n\n (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nFor multi-output tasks it is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\nWhere::\n\n ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute Lasso path with coordinate descent\n\nThe Lasso optimization function varies for mono and multi-outputs.\n\nFor mono-output tasks it is::\n\n (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nFor multi-output tasks it is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\nWhere::\n\n ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data. Pass directly as Fortran-contiguous data to avoid\n unnecessary memory duplication. If ``y`` is mono-output then ``X``\n can be sparse.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)\n Target values\n\neps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``\n\nn_alphas : int, default=100\n Number of alphas along the regularization path\n\nalphas : ndarray, default=None\n List of alphas where to compute the models.\n If ``None`` alphas are set automatically\n\nprecompute : 'auto', bool or array-like of shape (n_features, n_features), default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\nXy : array-like of shape (n_features,) or (n_features, n_outputs), default=None\n Xy = np.dot(X.T, y) that can be precomputed. It is useful\n only when the Gram matrix is precomputed.\n\ncopy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\ncoef_init : ndarray of shape (n_features, ), default=None\n The initial values of the coefficients.\n\nverbose : bool or int, default=False\n Amount of verbosity.\n\nreturn_n_iter : bool, default=False\n whether to return the number of iterations or not.\n\npositive : bool, default=False\n If set to True, forces coefficients to be positive.\n (Only allowed when ``y.ndim == 1``).\n\n**params : kwargs\n keyword arguments passed to the coordinate descent solver.\n\nReturns\n-------\nalphas : ndarray of shape (n_alphas,)\n The alphas along the path where models are computed.\n\ncoefs : ndarray of shape (n_features, n_alphas) or (n_outputs, n_features, n_alphas)\n Coefficients along the path.\n\ndual_gaps : ndarray of shape (n_alphas,)\n The dual gaps at the end of the optimization for each alpha.\n\nn_iters : list of int\n The number of iterations taken by the coordinate descent optimizer to\n reach the specified tolerance for each alpha.\n\nNotes\n-----\nFor an example, see\n:ref:`examples/linear_model/plot_lasso_coordinate_descent_path.py\n`.\n\nTo avoid unnecessary memory duplication the X argument of the fit method\nshould be directly passed as a Fortran-contiguous numpy array.\n\nNote that in certain cases, the Lars solver may be significantly\nfaster to implement this functionality. In particular, linear\ninterpolation can be used to retrieve model coefficients between the\nvalues output by lars_path\n\nExamples\n--------\n\nComparing lasso_path and lars_path with interpolation:\n\n>>> X = np.array([[1, 2, 3.1], [2.3, 5.4, 4.3]]).T\n>>> y = np.array([1, 2, 3.1])\n>>> # Use lasso_path to compute a coefficient path\n>>> _, coef_path, _ = lasso_path(X, y, alphas=[5., 1., .5])\n>>> print(coef_path)\n[[0. 0. 0.46874778]\n [0.2159048 0.4425765 0.23689075]]\n\n>>> # Now use lars_path and 1D linear interpolation to compute the\n>>> # same path\n>>> from sklearn.linear_model import lars_path\n>>> alphas, active, coef_path_lars = lars_path(X, y, method='lasso')\n>>> from scipy import interpolate\n>>> coef_path_continuous = interpolate.interp1d(alphas[::-1],\n... coef_path_lars[:, ::-1])\n>>> print(coef_path_continuous([5., 1., .5]))\n[[0. 0. 0.46915237]\n [0.2159048 0.4425765 0.23668876]]\n\nSee Also\n--------\nlars_path\nLasso\nLassoLars\nLassoCV\nLassoLarsCV\nsklearn.decomposition.sparse_encode", + "code": "@_deprecate_positional_args\ndef lasso_path(X, y, *, eps=1e-3, n_alphas=100, alphas=None,\n precompute='auto', Xy=None, copy_X=True, coef_init=None,\n verbose=False, return_n_iter=False, positive=False, **params):\n \"\"\"Compute Lasso path with coordinate descent\n\n The Lasso optimization function varies for mono and multi-outputs.\n\n For mono-output tasks it is::\n\n (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\n For multi-output tasks it is::\n\n (1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\n Where::\n\n ||W||_21 = \\\\sum_i \\\\sqrt{\\\\sum_j w_{ij}^2}\n\n i.e. the sum of norm of each row.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data. Pass directly as Fortran-contiguous data to avoid\n unnecessary memory duplication. If ``y`` is mono-output then ``X``\n can be sparse.\n\n y : {array-like, sparse matrix} of shape (n_samples,) or \\\n (n_samples, n_outputs)\n Target values\n\n eps : float, default=1e-3\n Length of the path. ``eps=1e-3`` means that\n ``alpha_min / alpha_max = 1e-3``\n\n n_alphas : int, default=100\n Number of alphas along the regularization path\n\n alphas : ndarray, default=None\n List of alphas where to compute the models.\n If ``None`` alphas are set automatically\n\n precompute : 'auto', bool or array-like of shape (n_features, n_features),\\\n default='auto'\n Whether to use a precomputed Gram matrix to speed up\n calculations. If set to ``'auto'`` let us decide. The Gram\n matrix can also be passed as argument.\n\n Xy : array-like of shape (n_features,) or (n_features, n_outputs),\\\n default=None\n Xy = np.dot(X.T, y) that can be precomputed. It is useful\n only when the Gram matrix is precomputed.\n\n copy_X : bool, default=True\n If ``True``, X will be copied; else, it may be overwritten.\n\n coef_init : ndarray of shape (n_features, ), default=None\n The initial values of the coefficients.\n\n verbose : bool or int, default=False\n Amount of verbosity.\n\n return_n_iter : bool, default=False\n whether to return the number of iterations or not.\n\n positive : bool, default=False\n If set to True, forces coefficients to be positive.\n (Only allowed when ``y.ndim == 1``).\n\n **params : kwargs\n keyword arguments passed to the coordinate descent solver.\n\n Returns\n -------\n alphas : ndarray of shape (n_alphas,)\n The alphas along the path where models are computed.\n\n coefs : ndarray of shape (n_features, n_alphas) or \\\n (n_outputs, n_features, n_alphas)\n Coefficients along the path.\n\n dual_gaps : ndarray of shape (n_alphas,)\n The dual gaps at the end of the optimization for each alpha.\n\n n_iters : list of int\n The number of iterations taken by the coordinate descent optimizer to\n reach the specified tolerance for each alpha.\n\n Notes\n -----\n For an example, see\n :ref:`examples/linear_model/plot_lasso_coordinate_descent_path.py\n `.\n\n To avoid unnecessary memory duplication the X argument of the fit method\n should be directly passed as a Fortran-contiguous numpy array.\n\n Note that in certain cases, the Lars solver may be significantly\n faster to implement this functionality. In particular, linear\n interpolation can be used to retrieve model coefficients between the\n values output by lars_path\n\n Examples\n --------\n\n Comparing lasso_path and lars_path with interpolation:\n\n >>> X = np.array([[1, 2, 3.1], [2.3, 5.4, 4.3]]).T\n >>> y = np.array([1, 2, 3.1])\n >>> # Use lasso_path to compute a coefficient path\n >>> _, coef_path, _ = lasso_path(X, y, alphas=[5., 1., .5])\n >>> print(coef_path)\n [[0. 0. 0.46874778]\n [0.2159048 0.4425765 0.23689075]]\n\n >>> # Now use lars_path and 1D linear interpolation to compute the\n >>> # same path\n >>> from sklearn.linear_model import lars_path\n >>> alphas, active, coef_path_lars = lars_path(X, y, method='lasso')\n >>> from scipy import interpolate\n >>> coef_path_continuous = interpolate.interp1d(alphas[::-1],\n ... coef_path_lars[:, ::-1])\n >>> print(coef_path_continuous([5., 1., .5]))\n [[0. 0. 0.46915237]\n [0.2159048 0.4425765 0.23668876]]\n\n See Also\n --------\n lars_path\n Lasso\n LassoLars\n LassoCV\n LassoLarsCV\n sklearn.decomposition.sparse_encode\n \"\"\"\n return enet_path(X, y, l1_ratio=1., eps=eps, n_alphas=n_alphas,\n alphas=alphas, precompute=precompute, Xy=Xy,\n copy_X=copy_X, coef_init=coef_init, verbose=verbose,\n positive=positive, return_n_iter=return_n_iter, **params)" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GammaRegressor/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._glm.glm.GammaRegressor.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GammaRegressor/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._glm.glm.GammaRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GammaRegressor/__init__/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._glm.glm.GammaRegressor.__init__.alpha", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1", + "description": "Constant that multiplies the penalty term and thus determines the\nregularization strength. ``alpha = 0`` is equivalent to unpenalized\nGLMs. In this case, the design matrix `X` must have full column rank\n(no collinearities)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GammaRegressor/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._glm.glm.GammaRegressor.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Specifies if a constant (a.k.a. bias or intercept) should be\nadded to the linear predictor (X @ coef + intercept)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GammaRegressor/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._glm.glm.GammaRegressor.__init__.max_iter", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The maximal number of iterations for the solver." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GammaRegressor/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._glm.glm.GammaRegressor.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Stopping criterion. For the lbfgs solver,\nthe iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\nwhere ``g_j`` is the j-th component of the gradient (derivative) of\nthe objective function." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GammaRegressor/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.linear_model._glm.glm.GammaRegressor.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If set to ``True``, reuse the solution of the previous call to ``fit``\nas initialization for ``coef_`` and ``intercept_`` ." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GammaRegressor/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._glm.glm.GammaRegressor.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "For the lbfgs solver set verbose to any positive number for verbosity." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generalized Linear Model with a Gamma distribution.\n\nThis regressor uses the 'log' link function.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.23", + "docstring": "", + "code": " def __init__(self, *, alpha=1.0, fit_intercept=True, max_iter=100,\n tol=1e-4, warm_start=False, verbose=0):\n\n super().__init__(alpha=alpha, fit_intercept=fit_intercept,\n family=\"gamma\", link='log', max_iter=max_iter,\n tol=tol, warm_start=warm_start, verbose=verbose)" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GammaRegressor/family@getter", + "name": "family", + "qname": "sklearn.linear_model._glm.glm.GammaRegressor.family", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GammaRegressor/family/self", + "name": "self", + "qname": "sklearn.linear_model._glm.glm.GammaRegressor.family.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def family(self):\n # Make this attribute read-only to avoid mis-uses e.g. in GridSearch.\n return \"gamma\"" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GammaRegressor/family@setter", + "name": "family", + "qname": "sklearn.linear_model._glm.glm.GammaRegressor.family", + "decorators": ["family.setter"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GammaRegressor/family/self", + "name": "self", + "qname": "sklearn.linear_model._glm.glm.GammaRegressor.family.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GammaRegressor/family/value", + "name": "value", + "qname": "sklearn.linear_model._glm.glm.GammaRegressor.family.value", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @family.setter\n def family(self, value):\n if value != \"gamma\":\n raise ValueError(\"GammaRegressor.family must be 'gamma'!\")" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/__init__/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor.__init__.alpha", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1", + "description": "Constant that multiplies the penalty term and thus determines the\nregularization strength. ``alpha = 0`` is equivalent to unpenalized\nGLMs. In this case, the design matrix `X` must have full column rank\n(no collinearities)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Specifies if a constant (a.k.a. bias or intercept) should be\nadded to the linear predictor (X @ coef + intercept)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/__init__/family", + "name": "family", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor.__init__.family", + "default_value": "'normal'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'normal', 'poisson', 'gamma', 'inverse-gaussian'} or an ExponentialDispersionModel instance", + "default_value": "'normal'", + "description": "The distributional assumption of the GLM, i.e. which distribution from\nthe EDM, specifies the loss function to be minimized." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["gamma", "poisson", "inverse-gaussian", "normal"] + }, + { + "kind": "NamedType", + "name": "an ExponentialDispersionModel instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/__init__/link", + "name": "link", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor.__init__.link", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'identity', 'log'} or an instance of class BaseLink", + "default_value": "'auto'", + "description": "The link function of the GLM, i.e. mapping from linear predictor\n`X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets\nthe link depending on the chosen family as follows:\n\n- 'identity' for Normal distribution\n- 'log' for Poisson, Gamma and Inverse Gaussian distributions" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["auto", "identity", "log"] + }, + { + "kind": "NamedType", + "name": "an instance of class BaseLink" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/__init__/solver", + "name": "solver", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor.__init__.solver", + "default_value": "'lbfgs'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'lbfgs'", + "default_value": "'lbfgs'", + "description": "Algorithm to use in the optimization problem:\n\n'lbfgs'\n Calls scipy's L-BFGS-B optimizer." + }, + "type": { + "kind": "NamedType", + "name": "'lbfgs'" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor.__init__.max_iter", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The maximal number of iterations for the solver." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Stopping criterion. For the lbfgs solver,\nthe iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\nwhere ``g_j`` is the j-th component of the gradient (derivative) of\nthe objective function." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If set to ``True``, reuse the solution of the previous call to ``fit``\nas initialization for ``coef_`` and ``intercept_``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "For the lbfgs solver set verbose to any positive number for verbosity." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Regression via a penalized Generalized Linear Model (GLM).\n\nGLMs based on a reproductive Exponential Dispersion Model (EDM) aim at\nfitting and predicting the mean of the target y as y_pred=h(X*w).\nTherefore, the fit minimizes the following objective function with L2\npriors as regularizer::\n\n 1/(2*sum(s)) * deviance(y, h(X*w); s)\n + 1/2 * alpha * |w|_2\n\nwith inverse link function h and s=sample_weight.\nThe parameter ``alpha`` corresponds to the lambda parameter in glmnet.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.23", + "docstring": "", + "code": " def __init__(self, *, alpha=1.0,\n fit_intercept=True, family='normal', link='auto',\n solver='lbfgs', max_iter=100, tol=1e-4, warm_start=False,\n verbose=0):\n self.alpha = alpha\n self.fit_intercept = fit_intercept\n self.family = family\n self.link = link\n self.solver = solver\n self.max_iter = max_iter\n self.tol = tol\n self.warm_start = warm_start\n self.verbose = verbose" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/_linear_predictor", + "name": "_linear_predictor", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor._linear_predictor", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/_linear_predictor/self", + "name": "self", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor._linear_predictor.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/_linear_predictor/X", + "name": "X", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor._linear_predictor.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the linear_predictor = `X @ coef_ + intercept_`.", + "docstring": "Compute the linear_predictor = `X @ coef_ + intercept_`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\nReturns\n-------\ny_pred : array of shape (n_samples,)\n Returns predicted values of linear predictor.", + "code": " def _linear_predictor(self, X):\n \"\"\"Compute the linear_predictor = `X @ coef_ + intercept_`.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\n Returns\n -------\n y_pred : array of shape (n_samples,)\n Returns predicted values of linear predictor.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],\n dtype=[np.float64, np.float32], ensure_2d=True,\n allow_nd=False)\n return X @ self.coef_ + self.intercept_" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/_more_tags", + "name": "_more_tags", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/_more_tags/self", + "name": "self", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n # create the _family_instance if fit wasn't called yet.\n if hasattr(self, '_family_instance'):\n _family_instance = self._family_instance\n elif isinstance(self.family, ExponentialDispersionModel):\n _family_instance = self.family\n elif self.family in EDM_DISTRIBUTIONS:\n _family_instance = EDM_DISTRIBUTIONS[self.family]()\n else:\n raise ValueError\n return {\"requires_positive_y\": not _family_instance.in_y_range(-1.0)}" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/fit", + "name": "fit", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/fit/self", + "name": "self", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/fit/X", + "name": "X", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/fit/y", + "name": "y", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit a Generalized Linear Model.", + "docstring": "Fit a Generalized Linear Model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nself : returns an instance of self.", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit a Generalized Linear Model.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n if isinstance(self.family, ExponentialDispersionModel):\n self._family_instance = self.family\n elif self.family in EDM_DISTRIBUTIONS:\n self._family_instance = EDM_DISTRIBUTIONS[self.family]()\n else:\n raise ValueError(\n \"The family must be an instance of class\"\n \" ExponentialDispersionModel or an element of\"\n \" ['normal', 'poisson', 'gamma', 'inverse-gaussian']\"\n \"; got (family={0})\".format(self.family))\n\n # Guarantee that self._link_instance is set to an instance of\n # class BaseLink\n if isinstance(self.link, BaseLink):\n self._link_instance = self.link\n else:\n if self.link == 'auto':\n if isinstance(self._family_instance, TweedieDistribution):\n if self._family_instance.power <= 0:\n self._link_instance = IdentityLink()\n if self._family_instance.power >= 1:\n self._link_instance = LogLink()\n else:\n raise ValueError(\"No default link known for the \"\n \"specified distribution family. Please \"\n \"set link manually, i.e. not to 'auto'; \"\n \"got (link='auto', family={})\"\n .format(self.family))\n elif self.link == 'identity':\n self._link_instance = IdentityLink()\n elif self.link == 'log':\n self._link_instance = LogLink()\n else:\n raise ValueError(\n \"The link must be an instance of class Link or \"\n \"an element of ['auto', 'identity', 'log']; \"\n \"got (link={0})\".format(self.link))\n\n if not isinstance(self.alpha, numbers.Number) or self.alpha < 0:\n raise ValueError(\"Penalty term must be a non-negative number;\"\n \" got (alpha={0})\".format(self.alpha))\n if not isinstance(self.fit_intercept, bool):\n raise ValueError(\"The argument fit_intercept must be bool;\"\n \" got {0}\".format(self.fit_intercept))\n if self.solver not in ['lbfgs']:\n raise ValueError(\"GeneralizedLinearRegressor supports only solvers\"\n \"'lbfgs'; got {0}\".format(self.solver))\n solver = self.solver\n if (not isinstance(self.max_iter, numbers.Integral)\n or self.max_iter <= 0):\n raise ValueError(\"Maximum number of iteration must be a positive \"\n \"integer;\"\n \" got (max_iter={0!r})\".format(self.max_iter))\n if not isinstance(self.tol, numbers.Number) or self.tol <= 0:\n raise ValueError(\"Tolerance for stopping criteria must be \"\n \"positive; got (tol={0!r})\".format(self.tol))\n if not isinstance(self.warm_start, bool):\n raise ValueError(\"The argument warm_start must be bool;\"\n \" got {0}\".format(self.warm_start))\n\n family = self._family_instance\n link = self._link_instance\n\n X, y = check_X_y(X, y, accept_sparse=['csc', 'csr'],\n dtype=[np.float64, np.float32],\n y_numeric=True, multi_output=False)\n\n weights = _check_sample_weight(sample_weight, X)\n\n _, n_features = X.shape\n\n if not np.all(family.in_y_range(y)):\n raise ValueError(\"Some value(s) of y are out of the valid \"\n \"range for family {0}\"\n .format(family.__class__.__name__))\n # TODO: if alpha=0 check that X is not rank deficient\n\n # rescaling of sample_weight\n #\n # IMPORTANT NOTE: Since we want to minimize\n # 1/(2*sum(sample_weight)) * deviance + L2,\n # deviance = sum(sample_weight * unit_deviance),\n # we rescale weights such that sum(weights) = 1 and this becomes\n # 1/2*deviance + L2 with deviance=sum(weights * unit_deviance)\n weights = weights / weights.sum()\n\n if self.warm_start and hasattr(self, 'coef_'):\n if self.fit_intercept:\n coef = np.concatenate((np.array([self.intercept_]),\n self.coef_))\n else:\n coef = self.coef_\n else:\n if self.fit_intercept:\n coef = np.zeros(n_features+1)\n coef[0] = link(np.average(y, weights=weights))\n else:\n coef = np.zeros(n_features)\n\n # algorithms for optimization\n\n if solver == 'lbfgs':\n def func(coef, X, y, weights, alpha, family, link):\n y_pred, devp = _y_pred_deviance_derivative(\n coef, X, y, weights, family, link\n )\n dev = family.deviance(y, y_pred, weights)\n # offset if coef[0] is intercept\n offset = 1 if self.fit_intercept else 0\n coef_scaled = alpha * coef[offset:]\n obj = 0.5 * dev + 0.5 * (coef[offset:] @ coef_scaled)\n objp = 0.5 * devp\n objp[offset:] += coef_scaled\n return obj, objp\n\n args = (X, y, weights, self.alpha, family, link)\n\n opt_res = scipy.optimize.minimize(\n func, coef, method=\"L-BFGS-B\", jac=True,\n options={\n \"maxiter\": self.max_iter,\n \"iprint\": (self.verbose > 0) - 1,\n \"gtol\": self.tol,\n \"ftol\": 1e3*np.finfo(float).eps,\n },\n args=args)\n self.n_iter_ = _check_optimize_result(\"lbfgs\", opt_res)\n coef = opt_res.x\n\n if self.fit_intercept:\n self.intercept_ = coef[0]\n self.coef_ = coef[1:]\n else:\n # set intercept to zero as the other linear models do\n self.intercept_ = 0.\n self.coef_ = coef\n\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/predict", + "name": "predict", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/predict/self", + "name": "self", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/predict/X", + "name": "X", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict using GLM with feature matrix X.", + "docstring": "Predict using GLM with feature matrix X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\nReturns\n-------\ny_pred : array of shape (n_samples,)\n Returns predicted values.", + "code": " def predict(self, X):\n \"\"\"Predict using GLM with feature matrix X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\n Returns\n -------\n y_pred : array of shape (n_samples,)\n Returns predicted values.\n \"\"\"\n # check_array is done in _linear_predictor\n eta = self._linear_predictor(X)\n y_pred = self._link_instance.inverse(eta)\n return y_pred" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/score", + "name": "score", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor.score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/score/self", + "name": "self", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor.score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/score/X", + "name": "X", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor.score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Test samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/score/y", + "name": "y", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor.score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "True values of target." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/GeneralizedLinearRegressor/score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._glm.glm.GeneralizedLinearRegressor.score.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute D^2, the percentage of deviance explained.\n\nD^2 is a generalization of the coefficient of determination R^2.\nR^2 uses squared error and D^2 deviance. Note that those two are equal\nfor ``family='normal'``.\n\nD^2 is defined as\n:math:`D^2 = 1-\\frac{D(y_{true},y_{pred})}{D_{null}}`,\n:math:`D_{null}` is the null deviance, i.e. the deviance of a model\nwith intercept alone, which corresponds to :math:`y_{pred} = \\bar{y}`.\nThe mean :math:`\\bar{y}` is averaged by sample_weight.\nBest possible score is 1.0 and it can be negative (because the model\ncan be arbitrarily worse).", + "docstring": "Compute D^2, the percentage of deviance explained.\n\nD^2 is a generalization of the coefficient of determination R^2.\nR^2 uses squared error and D^2 deviance. Note that those two are equal\nfor ``family='normal'``.\n\nD^2 is defined as\n:math:`D^2 = 1-\\frac{D(y_{true},y_{pred})}{D_{null}}`,\n:math:`D_{null}` is the null deviance, i.e. the deviance of a model\nwith intercept alone, which corresponds to :math:`y_{pred} = \\bar{y}`.\nThe mean :math:`\\bar{y}` is averaged by sample_weight.\nBest possible score is 1.0 and it can be negative (because the model\ncan be arbitrarily worse).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Test samples.\n\ny : array-like of shape (n_samples,)\n True values of target.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n D^2 of self.predict(X) w.r.t. y.", + "code": " def score(self, X, y, sample_weight=None):\n \"\"\"Compute D^2, the percentage of deviance explained.\n\n D^2 is a generalization of the coefficient of determination R^2.\n R^2 uses squared error and D^2 deviance. Note that those two are equal\n for ``family='normal'``.\n\n D^2 is defined as\n :math:`D^2 = 1-\\\\frac{D(y_{true},y_{pred})}{D_{null}}`,\n :math:`D_{null}` is the null deviance, i.e. the deviance of a model\n with intercept alone, which corresponds to :math:`y_{pred} = \\\\bar{y}`.\n The mean :math:`\\\\bar{y}` is averaged by sample_weight.\n Best possible score is 1.0 and it can be negative (because the model\n can be arbitrarily worse).\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Test samples.\n\n y : array-like of shape (n_samples,)\n True values of target.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n score : float\n D^2 of self.predict(X) w.r.t. y.\n \"\"\"\n # Note, default score defined in RegressorMixin is R^2 score.\n # TODO: make D^2 a score function in module metrics (and thereby get\n # input validation and so on)\n weights = _check_sample_weight(sample_weight, X)\n y_pred = self.predict(X)\n dev = self._family_instance.deviance(y, y_pred, weights=weights)\n y_mean = np.average(y, weights=weights)\n dev_null = self._family_instance.deviance(y, y_mean, weights=weights)\n return 1 - dev / dev_null" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/PoissonRegressor/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._glm.glm.PoissonRegressor.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/PoissonRegressor/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._glm.glm.PoissonRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/PoissonRegressor/__init__/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._glm.glm.PoissonRegressor.__init__.alpha", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1", + "description": "Constant that multiplies the penalty term and thus determines the\nregularization strength. ``alpha = 0`` is equivalent to unpenalized\nGLMs. In this case, the design matrix `X` must have full column rank\n(no collinearities)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/PoissonRegressor/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._glm.glm.PoissonRegressor.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Specifies if a constant (a.k.a. bias or intercept) should be\nadded to the linear predictor (X @ coef + intercept)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/PoissonRegressor/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._glm.glm.PoissonRegressor.__init__.max_iter", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The maximal number of iterations for the solver." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/PoissonRegressor/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._glm.glm.PoissonRegressor.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Stopping criterion. For the lbfgs solver,\nthe iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\nwhere ``g_j`` is the j-th component of the gradient (derivative) of\nthe objective function." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/PoissonRegressor/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.linear_model._glm.glm.PoissonRegressor.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If set to ``True``, reuse the solution of the previous call to ``fit``\nas initialization for ``coef_`` and ``intercept_`` ." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/PoissonRegressor/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._glm.glm.PoissonRegressor.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "For the lbfgs solver set verbose to any positive number for verbosity." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generalized Linear Model with a Poisson distribution.\n\nThis regressor uses the 'log' link function.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.23", + "docstring": "", + "code": " def __init__(self, *, alpha=1.0, fit_intercept=True, max_iter=100,\n tol=1e-4, warm_start=False, verbose=0):\n\n super().__init__(alpha=alpha, fit_intercept=fit_intercept,\n family=\"poisson\", link='log', max_iter=max_iter,\n tol=tol, warm_start=warm_start, verbose=verbose)" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/PoissonRegressor/family@getter", + "name": "family", + "qname": "sklearn.linear_model._glm.glm.PoissonRegressor.family", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/PoissonRegressor/family/self", + "name": "self", + "qname": "sklearn.linear_model._glm.glm.PoissonRegressor.family.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def family(self):\n # Make this attribute read-only to avoid mis-uses e.g. in GridSearch.\n return \"poisson\"" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/PoissonRegressor/family@setter", + "name": "family", + "qname": "sklearn.linear_model._glm.glm.PoissonRegressor.family", + "decorators": ["family.setter"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/PoissonRegressor/family/self", + "name": "self", + "qname": "sklearn.linear_model._glm.glm.PoissonRegressor.family.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/PoissonRegressor/family/value", + "name": "value", + "qname": "sklearn.linear_model._glm.glm.PoissonRegressor.family.value", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @family.setter\n def family(self, value):\n if value != \"poisson\":\n raise ValueError(\"PoissonRegressor.family must be 'poisson'!\")" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/TweedieRegressor/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._glm.glm.TweedieRegressor.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/TweedieRegressor/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._glm.glm.TweedieRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/TweedieRegressor/__init__/power", + "name": "power", + "qname": "sklearn.linear_model._glm.glm.TweedieRegressor.__init__.power", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0", + "description": "The power determines the underlying target distribution according\nto the following table:\n\n+-------+------------------------+\n| Power | Distribution |\n+=======+========================+\n| 0 | Normal |\n+-------+------------------------+\n| 1 | Poisson |\n+-------+------------------------+\n| (1,2) | Compound Poisson Gamma |\n+-------+------------------------+\n| 2 | Gamma |\n+-------+------------------------+\n| 3 | Inverse Gaussian |\n+-------+------------------------+\n\nFor ``0 < power < 1``, no distribution exists." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/TweedieRegressor/__init__/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._glm.glm.TweedieRegressor.__init__.alpha", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1", + "description": "Constant that multiplies the penalty term and thus determines the\nregularization strength. ``alpha = 0`` is equivalent to unpenalized\nGLMs. In this case, the design matrix `X` must have full column rank\n(no collinearities)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/TweedieRegressor/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._glm.glm.TweedieRegressor.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Specifies if a constant (a.k.a. bias or intercept) should be\nadded to the linear predictor (X @ coef + intercept)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/TweedieRegressor/__init__/link", + "name": "link", + "qname": "sklearn.linear_model._glm.glm.TweedieRegressor.__init__.link", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'identity', 'log'}", + "default_value": "'auto'", + "description": "The link function of the GLM, i.e. mapping from linear predictor\n`X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets\nthe link depending on the chosen family as follows:\n\n- 'identity' for Normal distribution\n- 'log' for Poisson, Gamma and Inverse Gaussian distributions" + }, + "type": { + "kind": "EnumType", + "values": ["auto", "identity", "log"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/TweedieRegressor/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._glm.glm.TweedieRegressor.__init__.max_iter", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The maximal number of iterations for the solver." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/TweedieRegressor/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._glm.glm.TweedieRegressor.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Stopping criterion. For the lbfgs solver,\nthe iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\nwhere ``g_j`` is the j-th component of the gradient (derivative) of\nthe objective function." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/TweedieRegressor/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.linear_model._glm.glm.TweedieRegressor.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If set to ``True``, reuse the solution of the previous call to ``fit``\nas initialization for ``coef_`` and ``intercept_`` ." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/TweedieRegressor/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._glm.glm.TweedieRegressor.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "For the lbfgs solver set verbose to any positive number for verbosity." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generalized Linear Model with a Tweedie distribution.\n\nThis estimator can be used to model different GLMs depending on the\n``power`` parameter, which determines the underlying distribution.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.23", + "docstring": "", + "code": " def __init__(self, *, power=0.0, alpha=1.0, fit_intercept=True,\n link='auto', max_iter=100, tol=1e-4,\n warm_start=False, verbose=0):\n\n super().__init__(alpha=alpha, fit_intercept=fit_intercept,\n family=TweedieDistribution(power=power), link=link,\n max_iter=max_iter, tol=tol,\n warm_start=warm_start, verbose=verbose)" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/TweedieRegressor/family@getter", + "name": "family", + "qname": "sklearn.linear_model._glm.glm.TweedieRegressor.family", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/TweedieRegressor/family/self", + "name": "self", + "qname": "sklearn.linear_model._glm.glm.TweedieRegressor.family.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def family(self):\n # We use a property with a setter to make sure that the family is\n # always a Tweedie distribution, and that self.power and\n # self.family.power are identical by construction.\n dist = TweedieDistribution(power=self.power)\n # TODO: make the returned object immutable\n return dist" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/TweedieRegressor/family@setter", + "name": "family", + "qname": "sklearn.linear_model._glm.glm.TweedieRegressor.family", + "decorators": ["family.setter"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/TweedieRegressor/family/self", + "name": "self", + "qname": "sklearn.linear_model._glm.glm.TweedieRegressor.family.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/TweedieRegressor/family/value", + "name": "value", + "qname": "sklearn.linear_model._glm.glm.TweedieRegressor.family.value", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @family.setter\n def family(self, value):\n if isinstance(value, TweedieDistribution):\n self.power = value.power\n else:\n raise TypeError(\"TweedieRegressor.family must be of type \"\n \"TweedieDistribution!\")" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/_safe_lin_pred", + "name": "_safe_lin_pred", + "qname": "sklearn.linear_model._glm.glm._safe_lin_pred", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/_safe_lin_pred/X", + "name": "X", + "qname": "sklearn.linear_model._glm.glm._safe_lin_pred.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/_safe_lin_pred/coef", + "name": "coef", + "qname": "sklearn.linear_model._glm.glm._safe_lin_pred.coef", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the linear predictor taking care if intercept is present.", + "docstring": "Compute the linear predictor taking care if intercept is present.", + "code": "def _safe_lin_pred(X, coef):\n \"\"\"Compute the linear predictor taking care if intercept is present.\"\"\"\n if coef.size == X.shape[1] + 1:\n return X @ coef[1:] + coef[0]\n else:\n return X @ coef" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/_y_pred_deviance_derivative", + "name": "_y_pred_deviance_derivative", + "qname": "sklearn.linear_model._glm.glm._y_pred_deviance_derivative", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/_y_pred_deviance_derivative/coef", + "name": "coef", + "qname": "sklearn.linear_model._glm.glm._y_pred_deviance_derivative.coef", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/_y_pred_deviance_derivative/X", + "name": "X", + "qname": "sklearn.linear_model._glm.glm._y_pred_deviance_derivative.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/_y_pred_deviance_derivative/y", + "name": "y", + "qname": "sklearn.linear_model._glm.glm._y_pred_deviance_derivative.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/_y_pred_deviance_derivative/weights", + "name": "weights", + "qname": "sklearn.linear_model._glm.glm._y_pred_deviance_derivative.weights", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/_y_pred_deviance_derivative/family", + "name": "family", + "qname": "sklearn.linear_model._glm.glm._y_pred_deviance_derivative.family", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.glm/_y_pred_deviance_derivative/link", + "name": "link", + "qname": "sklearn.linear_model._glm.glm._y_pred_deviance_derivative.link", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute y_pred and the derivative of the deviance w.r.t coef.", + "docstring": "Compute y_pred and the derivative of the deviance w.r.t coef.", + "code": "def _y_pred_deviance_derivative(coef, X, y, weights, family, link):\n \"\"\"Compute y_pred and the derivative of the deviance w.r.t coef.\"\"\"\n lin_pred = _safe_lin_pred(X, coef)\n y_pred = link.inverse(lin_pred)\n d1 = link.inverse_derivative(lin_pred)\n temp = d1 * family.deviance_derivative(y, y_pred, weights)\n if coef.size == X.shape[1] + 1:\n devp = np.concatenate(([temp.sum()], temp @ X))\n else:\n devp = temp @ X # same as X.T @ temp\n return y_pred, devp" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/BaseLink/__call__", + "name": "__call__", + "qname": "sklearn.linear_model._glm.link.BaseLink.__call__", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.link/BaseLink/__call__/self", + "name": "self", + "qname": "sklearn.linear_model._glm.link.BaseLink.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/BaseLink/__call__/y_pred", + "name": "y_pred", + "qname": "sklearn.linear_model._glm.link.BaseLink.__call__.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples,)", + "default_value": "", + "description": "Usually the (predicted) mean." + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the link function g(y_pred).\n\nThe link function links the mean y_pred=E[Y] to the so called linear\npredictor (X*w), i.e. g(y_pred) = linear predictor.", + "docstring": "Compute the link function g(y_pred).\n\nThe link function links the mean y_pred=E[Y] to the so called linear\npredictor (X*w), i.e. g(y_pred) = linear predictor.\n\nParameters\n----------\ny_pred : array of shape (n_samples,)\n Usually the (predicted) mean.", + "code": " @abstractmethod\n def __call__(self, y_pred):\n \"\"\"Compute the link function g(y_pred).\n\n The link function links the mean y_pred=E[Y] to the so called linear\n predictor (X*w), i.e. g(y_pred) = linear predictor.\n\n Parameters\n ----------\n y_pred : array of shape (n_samples,)\n Usually the (predicted) mean.\n \"\"\"" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/BaseLink/derivative", + "name": "derivative", + "qname": "sklearn.linear_model._glm.link.BaseLink.derivative", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.link/BaseLink/derivative/self", + "name": "self", + "qname": "sklearn.linear_model._glm.link.BaseLink.derivative.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/BaseLink/derivative/y_pred", + "name": "y_pred", + "qname": "sklearn.linear_model._glm.link.BaseLink.derivative.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples,)", + "default_value": "", + "description": "Usually the (predicted) mean." + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the derivative of the link g'(y_pred).", + "docstring": "Compute the derivative of the link g'(y_pred).\n\nParameters\n----------\ny_pred : array of shape (n_samples,)\n Usually the (predicted) mean.", + "code": " @abstractmethod\n def derivative(self, y_pred):\n \"\"\"Compute the derivative of the link g'(y_pred).\n\n Parameters\n ----------\n y_pred : array of shape (n_samples,)\n Usually the (predicted) mean.\n \"\"\"" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/BaseLink/inverse", + "name": "inverse", + "qname": "sklearn.linear_model._glm.link.BaseLink.inverse", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.link/BaseLink/inverse/self", + "name": "self", + "qname": "sklearn.linear_model._glm.link.BaseLink.inverse.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/BaseLink/inverse/lin_pred", + "name": "lin_pred", + "qname": "sklearn.linear_model._glm.link.BaseLink.inverse.lin_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples,)", + "default_value": "", + "description": "Usually the (fitted) linear predictor." + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the inverse link function h(lin_pred).\n\nGives the inverse relationship between linear predictor and the mean\ny_pred=E[Y], i.e. h(linear predictor) = y_pred.", + "docstring": "Compute the inverse link function h(lin_pred).\n\nGives the inverse relationship between linear predictor and the mean\ny_pred=E[Y], i.e. h(linear predictor) = y_pred.\n\nParameters\n----------\nlin_pred : array of shape (n_samples,)\n Usually the (fitted) linear predictor.", + "code": " @abstractmethod\n def inverse(self, lin_pred):\n \"\"\"Compute the inverse link function h(lin_pred).\n\n Gives the inverse relationship between linear predictor and the mean\n y_pred=E[Y], i.e. h(linear predictor) = y_pred.\n\n Parameters\n ----------\n lin_pred : array of shape (n_samples,)\n Usually the (fitted) linear predictor.\n \"\"\"" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/BaseLink/inverse_derivative", + "name": "inverse_derivative", + "qname": "sklearn.linear_model._glm.link.BaseLink.inverse_derivative", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.link/BaseLink/inverse_derivative/self", + "name": "self", + "qname": "sklearn.linear_model._glm.link.BaseLink.inverse_derivative.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/BaseLink/inverse_derivative/lin_pred", + "name": "lin_pred", + "qname": "sklearn.linear_model._glm.link.BaseLink.inverse_derivative.lin_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples,)", + "default_value": "", + "description": "Usually the (fitted) linear predictor." + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the derivative of the inverse link function h'(lin_pred).", + "docstring": "Compute the derivative of the inverse link function h'(lin_pred).\n\nParameters\n----------\nlin_pred : array of shape (n_samples,)\n Usually the (fitted) linear predictor.", + "code": " @abstractmethod\n def inverse_derivative(self, lin_pred):\n \"\"\"Compute the derivative of the inverse link function h'(lin_pred).\n\n Parameters\n ----------\n lin_pred : array of shape (n_samples,)\n Usually the (fitted) linear predictor.\n \"\"\"" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/IdentityLink/__call__", + "name": "__call__", + "qname": "sklearn.linear_model._glm.link.IdentityLink.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.link/IdentityLink/__call__/self", + "name": "self", + "qname": "sklearn.linear_model._glm.link.IdentityLink.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/IdentityLink/__call__/y_pred", + "name": "y_pred", + "qname": "sklearn.linear_model._glm.link.IdentityLink.__call__.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __call__(self, y_pred):\n return y_pred" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/IdentityLink/derivative", + "name": "derivative", + "qname": "sklearn.linear_model._glm.link.IdentityLink.derivative", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.link/IdentityLink/derivative/self", + "name": "self", + "qname": "sklearn.linear_model._glm.link.IdentityLink.derivative.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/IdentityLink/derivative/y_pred", + "name": "y_pred", + "qname": "sklearn.linear_model._glm.link.IdentityLink.derivative.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def derivative(self, y_pred):\n return np.ones_like(y_pred)" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/IdentityLink/inverse", + "name": "inverse", + "qname": "sklearn.linear_model._glm.link.IdentityLink.inverse", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.link/IdentityLink/inverse/self", + "name": "self", + "qname": "sklearn.linear_model._glm.link.IdentityLink.inverse.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/IdentityLink/inverse/lin_pred", + "name": "lin_pred", + "qname": "sklearn.linear_model._glm.link.IdentityLink.inverse.lin_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def inverse(self, lin_pred):\n return lin_pred" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/IdentityLink/inverse_derivative", + "name": "inverse_derivative", + "qname": "sklearn.linear_model._glm.link.IdentityLink.inverse_derivative", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.link/IdentityLink/inverse_derivative/self", + "name": "self", + "qname": "sklearn.linear_model._glm.link.IdentityLink.inverse_derivative.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/IdentityLink/inverse_derivative/lin_pred", + "name": "lin_pred", + "qname": "sklearn.linear_model._glm.link.IdentityLink.inverse_derivative.lin_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def inverse_derivative(self, lin_pred):\n return np.ones_like(lin_pred)" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogLink/__call__", + "name": "__call__", + "qname": "sklearn.linear_model._glm.link.LogLink.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogLink/__call__/self", + "name": "self", + "qname": "sklearn.linear_model._glm.link.LogLink.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogLink/__call__/y_pred", + "name": "y_pred", + "qname": "sklearn.linear_model._glm.link.LogLink.__call__.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __call__(self, y_pred):\n return np.log(y_pred)" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogLink/derivative", + "name": "derivative", + "qname": "sklearn.linear_model._glm.link.LogLink.derivative", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogLink/derivative/self", + "name": "self", + "qname": "sklearn.linear_model._glm.link.LogLink.derivative.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogLink/derivative/y_pred", + "name": "y_pred", + "qname": "sklearn.linear_model._glm.link.LogLink.derivative.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def derivative(self, y_pred):\n return 1 / y_pred" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogLink/inverse", + "name": "inverse", + "qname": "sklearn.linear_model._glm.link.LogLink.inverse", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogLink/inverse/self", + "name": "self", + "qname": "sklearn.linear_model._glm.link.LogLink.inverse.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogLink/inverse/lin_pred", + "name": "lin_pred", + "qname": "sklearn.linear_model._glm.link.LogLink.inverse.lin_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def inverse(self, lin_pred):\n return np.exp(lin_pred)" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogLink/inverse_derivative", + "name": "inverse_derivative", + "qname": "sklearn.linear_model._glm.link.LogLink.inverse_derivative", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogLink/inverse_derivative/self", + "name": "self", + "qname": "sklearn.linear_model._glm.link.LogLink.inverse_derivative.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogLink/inverse_derivative/lin_pred", + "name": "lin_pred", + "qname": "sklearn.linear_model._glm.link.LogLink.inverse_derivative.lin_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def inverse_derivative(self, lin_pred):\n return np.exp(lin_pred)" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogitLink/__call__", + "name": "__call__", + "qname": "sklearn.linear_model._glm.link.LogitLink.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogitLink/__call__/self", + "name": "self", + "qname": "sklearn.linear_model._glm.link.LogitLink.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogitLink/__call__/y_pred", + "name": "y_pred", + "qname": "sklearn.linear_model._glm.link.LogitLink.__call__.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __call__(self, y_pred):\n return logit(y_pred)" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogitLink/derivative", + "name": "derivative", + "qname": "sklearn.linear_model._glm.link.LogitLink.derivative", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogitLink/derivative/self", + "name": "self", + "qname": "sklearn.linear_model._glm.link.LogitLink.derivative.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogitLink/derivative/y_pred", + "name": "y_pred", + "qname": "sklearn.linear_model._glm.link.LogitLink.derivative.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def derivative(self, y_pred):\n return 1 / (y_pred * (1 - y_pred))" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogitLink/inverse", + "name": "inverse", + "qname": "sklearn.linear_model._glm.link.LogitLink.inverse", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogitLink/inverse/self", + "name": "self", + "qname": "sklearn.linear_model._glm.link.LogitLink.inverse.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogitLink/inverse/lin_pred", + "name": "lin_pred", + "qname": "sklearn.linear_model._glm.link.LogitLink.inverse.lin_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def inverse(self, lin_pred):\n return expit(lin_pred)" + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogitLink/inverse_derivative", + "name": "inverse_derivative", + "qname": "sklearn.linear_model._glm.link.LogitLink.inverse_derivative", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogitLink/inverse_derivative/self", + "name": "self", + "qname": "sklearn.linear_model._glm.link.LogitLink.inverse_derivative.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._glm.link/LogitLink/inverse_derivative/lin_pred", + "name": "lin_pred", + "qname": "sklearn.linear_model._glm.link.LogitLink.inverse_derivative.lin_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def inverse_derivative(self, lin_pred):\n ep = expit(lin_pred)\n return ep * (1 - ep)" + }, + { + "id": "scikit-learn/sklearn.linear_model._huber/HuberRegressor/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._huber.HuberRegressor.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._huber/HuberRegressor/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._huber.HuberRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._huber/HuberRegressor/__init__/epsilon", + "name": "epsilon", + "qname": "sklearn.linear_model._huber.HuberRegressor.__init__.epsilon", + "default_value": "1.35", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float, greater than 1.0", + "default_value": "1.35", + "description": "The parameter epsilon controls the number of samples that should be\nclassified as outliers. The smaller the epsilon, the more robust it is\nto outliers." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "greater than 1.0" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._huber/HuberRegressor/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._huber.HuberRegressor.__init__.max_iter", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Maximum number of iterations that\n``scipy.optimize.minimize(method=\"L-BFGS-B\")`` should run for." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._huber/HuberRegressor/__init__/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._huber.HuberRegressor.__init__.alpha", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0001", + "description": "Regularization parameter." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._huber/HuberRegressor/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.linear_model._huber.HuberRegressor.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "This is useful if the stored attributes of a previously used model\nhas to be reused. If set to False, then the coefficients will\nbe rewritten for every call to fit.\nSee :term:`the Glossary `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._huber/HuberRegressor/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._huber.HuberRegressor.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether or not to fit the intercept. This can be set to False\nif the data is already centered around the origin." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._huber/HuberRegressor/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._huber.HuberRegressor.__init__.tol", + "default_value": "1e-05", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-05", + "description": "The iteration will stop when\n``max{|proj g_i | i = 1, ..., n}`` <= ``tol``\nwhere pg_i is the i-th component of the projected gradient." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Linear regression model that is robust to outliers.\n\nThe Huber Regressor optimizes the squared loss for the samples where\n``|(y - X'w) / sigma| < epsilon`` and the absolute loss for the samples\nwhere ``|(y - X'w) / sigma| > epsilon``, where w and sigma are parameters\nto be optimized. The parameter sigma makes sure that if y is scaled up\nor down by a certain factor, one does not need to rescale epsilon to\nachieve the same robustness. Note that this does not take into account\nthe fact that the different features of X may be of different scales.\n\nThis makes sure that the loss function is not heavily influenced by the\noutliers while not completely ignoring their effect.\n\nRead more in the :ref:`User Guide `\n\n.. versionadded:: 0.18", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, epsilon=1.35, max_iter=100, alpha=0.0001,\n warm_start=False, fit_intercept=True, tol=1e-05):\n self.epsilon = epsilon\n self.max_iter = max_iter\n self.alpha = alpha\n self.warm_start = warm_start\n self.fit_intercept = fit_intercept\n self.tol = tol" + }, + { + "id": "scikit-learn/sklearn.linear_model._huber/HuberRegressor/fit", + "name": "fit", + "qname": "sklearn.linear_model._huber.HuberRegressor.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._huber/HuberRegressor/fit/self", + "name": "self", + "qname": "sklearn.linear_model._huber.HuberRegressor.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._huber/HuberRegressor/fit/X", + "name": "X", + "qname": "sklearn.linear_model._huber.HuberRegressor.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples in the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._huber/HuberRegressor/fit/y", + "name": "y", + "qname": "sklearn.linear_model._huber.HuberRegressor.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples,)", + "default_value": "", + "description": "Target vector relative to X." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._huber/HuberRegressor/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._huber.HuberRegressor.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples,)", + "default_value": "", + "description": "Weight given to each sample." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples,)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model according to the given training data.", + "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n\ny : array-like, shape (n_samples,)\n Target vector relative to X.\n\nsample_weight : array-like, shape (n_samples,)\n Weight given to each sample.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the model according to the given training data.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n\n y : array-like, shape (n_samples,)\n Target vector relative to X.\n\n sample_weight : array-like, shape (n_samples,)\n Weight given to each sample.\n\n Returns\n -------\n self : object\n \"\"\"\n X, y = self._validate_data(\n X, y, copy=False, accept_sparse=['csr'], y_numeric=True,\n dtype=[np.float64, np.float32])\n\n sample_weight = _check_sample_weight(sample_weight, X)\n\n if self.epsilon < 1.0:\n raise ValueError(\n \"epsilon should be greater than or equal to 1.0, got %f\"\n % self.epsilon)\n\n if self.warm_start and hasattr(self, 'coef_'):\n parameters = np.concatenate(\n (self.coef_, [self.intercept_, self.scale_]))\n else:\n if self.fit_intercept:\n parameters = np.zeros(X.shape[1] + 2)\n else:\n parameters = np.zeros(X.shape[1] + 1)\n # Make sure to initialize the scale parameter to a strictly\n # positive value:\n parameters[-1] = 1\n\n # Sigma or the scale factor should be non-negative.\n # Setting it to be zero might cause undefined bounds hence we set it\n # to a value close to zero.\n bounds = np.tile([-np.inf, np.inf], (parameters.shape[0], 1))\n bounds[-1][0] = np.finfo(np.float64).eps * 10\n\n opt_res = optimize.minimize(\n _huber_loss_and_gradient, parameters, method=\"L-BFGS-B\", jac=True,\n args=(X, y, self.epsilon, self.alpha, sample_weight),\n options={\"maxiter\": self.max_iter, \"gtol\": self.tol, \"iprint\": -1},\n bounds=bounds)\n\n parameters = opt_res.x\n\n if opt_res.status == 2:\n raise ValueError(\"HuberRegressor convergence failed:\"\n \" l-BFGS-b solver terminated with %s\"\n % opt_res.message)\n self.n_iter_ = _check_optimize_result(\"lbfgs\", opt_res, self.max_iter)\n self.scale_ = parameters[-1]\n if self.fit_intercept:\n self.intercept_ = parameters[-2]\n else:\n self.intercept_ = 0.0\n self.coef_ = parameters[:X.shape[1]]\n\n residual = np.abs(\n y - safe_sparse_dot(X, self.coef_) - self.intercept_)\n self.outliers_ = residual > self.scale_ * self.epsilon\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._huber/_huber_loss_and_gradient", + "name": "_huber_loss_and_gradient", + "qname": "sklearn.linear_model._huber._huber_loss_and_gradient", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._huber/_huber_loss_and_gradient/w", + "name": "w", + "qname": "sklearn.linear_model._huber._huber_loss_and_gradient.w", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, shape (n_features + 1,) or (n_features + 2,)", + "default_value": "", + "description": "Feature vector.\nw[:n_features] gives the coefficients\nw[-1] gives the scale factor and if the intercept is fit w[-2]\ngives the intercept factor." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "shape (n_features + 1,) or (n_features + 2,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._huber/_huber_loss_and_gradient/X", + "name": "X", + "qname": "sklearn.linear_model._huber._huber_loss_and_gradient.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._huber/_huber_loss_and_gradient/y", + "name": "y", + "qname": "sklearn.linear_model._huber._huber_loss_and_gradient.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Target vector." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._huber/_huber_loss_and_gradient/epsilon", + "name": "epsilon", + "qname": "sklearn.linear_model._huber._huber_loss_and_gradient.epsilon", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Robustness of the Huber estimator." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._huber/_huber_loss_and_gradient/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._huber._huber_loss_and_gradient.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Regularization parameter." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._huber/_huber_loss_and_gradient/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._huber._huber_loss_and_gradient.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "None", + "description": "Weight assigned to each sample." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns the Huber loss and the gradient.", + "docstring": "Returns the Huber loss and the gradient.\n\nParameters\n----------\nw : ndarray, shape (n_features + 1,) or (n_features + 2,)\n Feature vector.\n w[:n_features] gives the coefficients\n w[-1] gives the scale factor and if the intercept is fit w[-2]\n gives the intercept factor.\n\nX : ndarray of shape (n_samples, n_features)\n Input data.\n\ny : ndarray of shape (n_samples,)\n Target vector.\n\nepsilon : float\n Robustness of the Huber estimator.\n\nalpha : float\n Regularization parameter.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Weight assigned to each sample.\n\nReturns\n-------\nloss : float\n Huber loss.\n\ngradient : ndarray, shape (len(w))\n Returns the derivative of the Huber loss with respect to each\n coefficient, intercept and the scale as a vector.", + "code": "def _huber_loss_and_gradient(w, X, y, epsilon, alpha, sample_weight=None):\n \"\"\"Returns the Huber loss and the gradient.\n\n Parameters\n ----------\n w : ndarray, shape (n_features + 1,) or (n_features + 2,)\n Feature vector.\n w[:n_features] gives the coefficients\n w[-1] gives the scale factor and if the intercept is fit w[-2]\n gives the intercept factor.\n\n X : ndarray of shape (n_samples, n_features)\n Input data.\n\n y : ndarray of shape (n_samples,)\n Target vector.\n\n epsilon : float\n Robustness of the Huber estimator.\n\n alpha : float\n Regularization parameter.\n\n sample_weight : ndarray of shape (n_samples,), default=None\n Weight assigned to each sample.\n\n Returns\n -------\n loss : float\n Huber loss.\n\n gradient : ndarray, shape (len(w))\n Returns the derivative of the Huber loss with respect to each\n coefficient, intercept and the scale as a vector.\n \"\"\"\n _, n_features = X.shape\n fit_intercept = (n_features + 2 == w.shape[0])\n if fit_intercept:\n intercept = w[-2]\n sigma = w[-1]\n w = w[:n_features]\n n_samples = np.sum(sample_weight)\n\n # Calculate the values where |y - X'w -c / sigma| > epsilon\n # The values above this threshold are outliers.\n linear_loss = y - safe_sparse_dot(X, w)\n if fit_intercept:\n linear_loss -= intercept\n abs_linear_loss = np.abs(linear_loss)\n outliers_mask = abs_linear_loss > epsilon * sigma\n\n # Calculate the linear loss due to the outliers.\n # This is equal to (2 * M * |y - X'w -c / sigma| - M**2) * sigma\n outliers = abs_linear_loss[outliers_mask]\n num_outliers = np.count_nonzero(outliers_mask)\n n_non_outliers = X.shape[0] - num_outliers\n\n # n_sq_outliers includes the weight give to the outliers while\n # num_outliers is just the number of outliers.\n outliers_sw = sample_weight[outliers_mask]\n n_sw_outliers = np.sum(outliers_sw)\n outlier_loss = (2. * epsilon * np.sum(outliers_sw * outliers) -\n sigma * n_sw_outliers * epsilon ** 2)\n\n # Calculate the quadratic loss due to the non-outliers.-\n # This is equal to |(y - X'w - c)**2 / sigma**2| * sigma\n non_outliers = linear_loss[~outliers_mask]\n weighted_non_outliers = sample_weight[~outliers_mask] * non_outliers\n weighted_loss = np.dot(weighted_non_outliers.T, non_outliers)\n squared_loss = weighted_loss / sigma\n\n if fit_intercept:\n grad = np.zeros(n_features + 2)\n else:\n grad = np.zeros(n_features + 1)\n\n # Gradient due to the squared loss.\n X_non_outliers = -axis0_safe_slice(X, ~outliers_mask, n_non_outliers)\n grad[:n_features] = (\n 2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))\n\n # Gradient due to the linear loss.\n signed_outliers = np.ones_like(outliers)\n signed_outliers_mask = linear_loss[outliers_mask] < 0\n signed_outliers[signed_outliers_mask] = -1.0\n X_outliers = axis0_safe_slice(X, outliers_mask, num_outliers)\n sw_outliers = sample_weight[outliers_mask] * signed_outliers\n grad[:n_features] -= 2. * epsilon * (\n safe_sparse_dot(sw_outliers, X_outliers))\n\n # Gradient due to the penalty.\n grad[:n_features] += alpha * 2. * w\n\n # Gradient due to sigma.\n grad[-1] = n_samples\n grad[-1] -= n_sw_outliers * epsilon ** 2\n grad[-1] -= squared_loss / sigma\n\n # Gradient due to the intercept.\n if fit_intercept:\n grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma\n grad[-2] -= 2. * epsilon * np.sum(sw_outliers)\n\n loss = n_samples * sigma + squared_loss + outlier_loss\n loss += alpha * np.dot(w, w)\n return loss, grad" + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._least_angle.Lars.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._least_angle.Lars.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._least_angle.Lars.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to calculate the intercept for this model. If set\nto false, no intercept will be used in calculations\n(i.e. data is expected to be centered)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._least_angle.Lars.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or int", + "default_value": "False", + "description": "Sets the verbosity amount." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._least_angle.Lars.__init__.normalize", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/__init__/precompute", + "name": "precompute", + "qname": "sklearn.linear_model._least_angle.Lars.__init__.precompute", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool, 'auto' or array-like", + "default_value": "'auto'", + "description": "Whether to use a precomputed Gram matrix to speed up\ncalculations. If set to ``'auto'`` let us decide. The Gram\nmatrix can also be passed as argument." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "array-like" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/__init__/n_nonzero_coefs", + "name": "n_nonzero_coefs", + "qname": "sklearn.linear_model._least_angle.Lars.__init__.n_nonzero_coefs", + "default_value": "500", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "500", + "description": "Target number of non-zero coefficients. Use ``np.inf`` for no limit." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/__init__/eps", + "name": "eps", + "qname": "sklearn.linear_model._least_angle.Lars.__init__.eps", + "default_value": "np.finfo(float).eps", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "np.finfo(float).eps", + "description": "The machine-precision regularization in the computation of the\nCholesky diagonal factors. Increase this for very ill-conditioned\nsystems. Unlike the ``tol`` parameter in some iterative\noptimization-based algorithms, this parameter does not control\nthe tolerance of the optimization." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/__init__/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._least_angle.Lars.__init__.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``True``, X will be copied; else, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/__init__/fit_path", + "name": "fit_path", + "qname": "sklearn.linear_model._least_angle.Lars.__init__.fit_path", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True the full path is stored in the ``coef_path_`` attribute.\nIf you compute the solution for a large problem or many targets,\nsetting ``fit_path`` to ``False`` will lead to a speedup, especially\nwith a small alpha." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/__init__/jitter", + "name": "jitter", + "qname": "sklearn.linear_model._least_angle.Lars.__init__.jitter", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Upper bound on a uniform noise parameter to be added to the\n`y` values, to satisfy the model's assumption of\none-at-a-time computations. Might help with stability.\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._least_angle.Lars.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for jittering. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `. Ignored if `jitter` is None.\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Least Angle Regression model a.k.a. LAR\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, fit_intercept=True, verbose=False, normalize=True,\n precompute='auto', n_nonzero_coefs=500,\n eps=np.finfo(float).eps, copy_X=True, fit_path=True,\n jitter=None, random_state=None):\n self.fit_intercept = fit_intercept\n self.verbose = verbose\n self.normalize = normalize\n self.precompute = precompute\n self.n_nonzero_coefs = n_nonzero_coefs\n self.eps = eps\n self.copy_X = copy_X\n self.fit_path = fit_path\n self.jitter = jitter\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/_fit", + "name": "_fit", + "qname": "sklearn.linear_model._least_angle.Lars._fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/_fit/self", + "name": "self", + "qname": "sklearn.linear_model._least_angle.Lars._fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/_fit/X", + "name": "X", + "qname": "sklearn.linear_model._least_angle.Lars._fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/_fit/y", + "name": "y", + "qname": "sklearn.linear_model._least_angle.Lars._fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/_fit/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._least_angle.Lars._fit.max_iter", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/_fit/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._least_angle.Lars._fit.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/_fit/fit_path", + "name": "fit_path", + "qname": "sklearn.linear_model._least_angle.Lars._fit.fit_path", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/_fit/Xy", + "name": "Xy", + "qname": "sklearn.linear_model._least_angle.Lars._fit.Xy", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Auxiliary method to fit the model using X, y as training data", + "docstring": "Auxiliary method to fit the model using X, y as training data", + "code": " def _fit(self, X, y, max_iter, alpha, fit_path, Xy=None):\n \"\"\"Auxiliary method to fit the model using X, y as training data\"\"\"\n n_features = X.shape[1]\n\n X, y, X_offset, y_offset, X_scale = self._preprocess_data(\n X, y, self.fit_intercept, self.normalize, self.copy_X)\n\n if y.ndim == 1:\n y = y[:, np.newaxis]\n\n n_targets = y.shape[1]\n\n Gram = self._get_gram(self.precompute, X, y)\n\n self.alphas_ = []\n self.n_iter_ = []\n self.coef_ = np.empty((n_targets, n_features))\n\n if fit_path:\n self.active_ = []\n self.coef_path_ = []\n for k in range(n_targets):\n this_Xy = None if Xy is None else Xy[:, k]\n alphas, active, coef_path, n_iter_ = lars_path(\n X, y[:, k], Gram=Gram, Xy=this_Xy, copy_X=self.copy_X,\n copy_Gram=True, alpha_min=alpha, method=self.method,\n verbose=max(0, self.verbose - 1), max_iter=max_iter,\n eps=self.eps, return_path=True,\n return_n_iter=True, positive=self.positive)\n self.alphas_.append(alphas)\n self.active_.append(active)\n self.n_iter_.append(n_iter_)\n self.coef_path_.append(coef_path)\n self.coef_[k] = coef_path[:, -1]\n\n if n_targets == 1:\n self.alphas_, self.active_, self.coef_path_, self.coef_ = [\n a[0] for a in (self.alphas_, self.active_, self.coef_path_,\n self.coef_)]\n self.n_iter_ = self.n_iter_[0]\n else:\n for k in range(n_targets):\n this_Xy = None if Xy is None else Xy[:, k]\n alphas, _, self.coef_[k], n_iter_ = lars_path(\n X, y[:, k], Gram=Gram, Xy=this_Xy, copy_X=self.copy_X,\n copy_Gram=True, alpha_min=alpha, method=self.method,\n verbose=max(0, self.verbose - 1), max_iter=max_iter,\n eps=self.eps, return_path=False, return_n_iter=True,\n positive=self.positive)\n self.alphas_.append(alphas)\n self.n_iter_.append(n_iter_)\n if n_targets == 1:\n self.alphas_ = self.alphas_[0]\n self.n_iter_ = self.n_iter_[0]\n\n self._set_intercept(X_offset, y_offset, X_scale)\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/_get_gram", + "name": "_get_gram", + "qname": "sklearn.linear_model._least_angle.Lars._get_gram", + "decorators": ["staticmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/_get_gram/precompute", + "name": "precompute", + "qname": "sklearn.linear_model._least_angle.Lars._get_gram.precompute", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/_get_gram/X", + "name": "X", + "qname": "sklearn.linear_model._least_angle.Lars._get_gram.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/_get_gram/y", + "name": "y", + "qname": "sklearn.linear_model._least_angle.Lars._get_gram.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @staticmethod\n def _get_gram(precompute, X, y):\n if (not hasattr(precompute, '__array__')) and (\n (precompute is True) or\n (precompute == 'auto' and X.shape[0] > X.shape[1]) or\n (precompute == 'auto' and y.shape[1] > 1)):\n precompute = np.dot(X.T, X)\n\n return precompute" + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/fit", + "name": "fit", + "qname": "sklearn.linear_model._least_angle.Lars.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/fit/self", + "name": "self", + "qname": "sklearn.linear_model._least_angle.Lars.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/fit/X", + "name": "X", + "qname": "sklearn.linear_model._least_angle.Lars.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/fit/y", + "name": "y", + "qname": "sklearn.linear_model._least_angle.Lars.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_targets)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/Lars/fit/Xy", + "name": "Xy", + "qname": "sklearn.linear_model._least_angle.Lars.fit.Xy", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "None", + "description": "Xy = np.dot(X.T, y) that can be precomputed. It is useful\nonly when the Gram matrix is precomputed." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_targets)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model using X, y as training data.", + "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\nXy : array-like of shape (n_samples,) or (n_samples, n_targets), default=None\n Xy = np.dot(X.T, y) that can be precomputed. It is useful\n only when the Gram matrix is precomputed.\n\nReturns\n-------\nself : object\n returns an instance of self.", + "code": " def fit(self, X, y, Xy=None):\n \"\"\"Fit the model using X, y as training data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\n Xy : array-like of shape (n_samples,) or (n_samples, n_targets), \\\n default=None\n Xy = np.dot(X.T, y) that can be precomputed. It is useful\n only when the Gram matrix is precomputed.\n\n Returns\n -------\n self : object\n returns an instance of self.\n \"\"\"\n X, y = self._validate_data(X, y, y_numeric=True, multi_output=True)\n\n alpha = getattr(self, 'alpha', 0.)\n if hasattr(self, 'n_nonzero_coefs'):\n alpha = 0. # n_nonzero_coefs parametrization takes priority\n max_iter = self.n_nonzero_coefs\n else:\n max_iter = self.max_iter\n\n if self.jitter is not None:\n rng = check_random_state(self.random_state)\n\n noise = rng.uniform(high=self.jitter, size=len(y))\n y = y + noise\n\n self._fit(X, y, max_iter=max_iter, alpha=alpha, fit_path=self.fit_path,\n Xy=Xy)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LarsCV/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._least_angle.LarsCV.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LarsCV/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._least_angle.LarsCV.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LarsCV/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._least_angle.LarsCV.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "whether to calculate the intercept for this model. If set\nto false, no intercept will be used in calculations\n(i.e. data is expected to be centered)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LarsCV/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._least_angle.LarsCV.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or int", + "default_value": "False", + "description": "Sets the verbosity amount." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LarsCV/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._least_angle.LarsCV.__init__.max_iter", + "default_value": "500", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "500", + "description": "Maximum number of iterations to perform." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LarsCV/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._least_angle.LarsCV.__init__.normalize", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LarsCV/__init__/precompute", + "name": "precompute", + "qname": "sklearn.linear_model._least_angle.LarsCV.__init__.precompute", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool, 'auto' or array-like", + "default_value": "'auto'", + "description": "Whether to use a precomputed Gram matrix to speed up\ncalculations. If set to ``'auto'`` let us decide. The Gram matrix\ncannot be passed as argument since we will use only subsets of X." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "array-like" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LarsCV/__init__/cv", + "name": "cv", + "qname": "sklearn.linear_model._least_angle.LarsCV.__init__.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, cross-validation generator or an iterable", + "default_value": "None", + "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, :class:`KFold` is used.\n\nRefer :ref:`User Guide ` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "an iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LarsCV/__init__/max_n_alphas", + "name": "max_n_alphas", + "qname": "sklearn.linear_model._least_angle.LarsCV.__init__.max_n_alphas", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "The maximum number of points on the path used to compute the\nresiduals in the cross-validation" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LarsCV/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.linear_model._least_angle.LarsCV.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or None", + "default_value": "None", + "description": "Number of CPUs to use during the cross validation.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LarsCV/__init__/eps", + "name": "eps", + "qname": "sklearn.linear_model._least_angle.LarsCV.__init__.eps", + "default_value": "np.finfo(float).eps", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "np.finfo(float).eps", + "description": "The machine-precision regularization in the computation of the\nCholesky diagonal factors. Increase this for very ill-conditioned\nsystems. Unlike the ``tol`` parameter in some iterative\noptimization-based algorithms, this parameter does not control\nthe tolerance of the optimization." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LarsCV/__init__/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._least_angle.LarsCV.__init__.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``True``, X will be copied; else, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Cross-validated Least Angle Regression model.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, fit_intercept=True, verbose=False, max_iter=500,\n normalize=True, precompute='auto', cv=None,\n max_n_alphas=1000, n_jobs=None, eps=np.finfo(float).eps,\n copy_X=True):\n self.max_iter = max_iter\n self.cv = cv\n self.max_n_alphas = max_n_alphas\n self.n_jobs = n_jobs\n super().__init__(fit_intercept=fit_intercept,\n verbose=verbose, normalize=normalize,\n precompute=precompute,\n n_nonzero_coefs=500,\n eps=eps, copy_X=copy_X, fit_path=True)" + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LarsCV/_more_tags", + "name": "_more_tags", + "qname": "sklearn.linear_model._least_angle.LarsCV._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LarsCV/_more_tags/self", + "name": "self", + "qname": "sklearn.linear_model._least_angle.LarsCV._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'multioutput': False}" + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LarsCV/fit", + "name": "fit", + "qname": "sklearn.linear_model._least_angle.LarsCV.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LarsCV/fit/self", + "name": "self", + "qname": "sklearn.linear_model._least_angle.LarsCV.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LarsCV/fit/X", + "name": "X", + "qname": "sklearn.linear_model._least_angle.LarsCV.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LarsCV/fit/y", + "name": "y", + "qname": "sklearn.linear_model._least_angle.LarsCV.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model using X, y as training data.", + "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nReturns\n-------\nself : object\n returns an instance of self.", + "code": " def fit(self, X, y):\n \"\"\"Fit the model using X, y as training data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n Returns\n -------\n self : object\n returns an instance of self.\n \"\"\"\n X, y = self._validate_data(X, y, y_numeric=True)\n X = as_float_array(X, copy=self.copy_X)\n y = as_float_array(y, copy=self.copy_X)\n\n # init cross-validation generator\n cv = check_cv(self.cv, classifier=False)\n\n # As we use cross-validation, the Gram matrix is not precomputed here\n Gram = self.precompute\n if hasattr(Gram, '__array__'):\n warnings.warn('Parameter \"precompute\" cannot be an array in '\n '%s. Automatically switch to \"auto\" instead.'\n % self.__class__.__name__)\n Gram = 'auto'\n\n cv_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(\n delayed(_lars_path_residues)(\n X[train], y[train], X[test], y[test], Gram=Gram, copy=False,\n method=self.method, verbose=max(0, self.verbose - 1),\n normalize=self.normalize, fit_intercept=self.fit_intercept,\n max_iter=self.max_iter, eps=self.eps, positive=self.positive)\n for train, test in cv.split(X, y))\n all_alphas = np.concatenate(list(zip(*cv_paths))[0])\n # Unique also sorts\n all_alphas = np.unique(all_alphas)\n # Take at most max_n_alphas values\n stride = int(max(1, int(len(all_alphas) / float(self.max_n_alphas))))\n all_alphas = all_alphas[::stride]\n\n mse_path = np.empty((len(all_alphas), len(cv_paths)))\n for index, (alphas, _, _, residues) in enumerate(cv_paths):\n alphas = alphas[::-1]\n residues = residues[::-1]\n if alphas[0] != 0:\n alphas = np.r_[0, alphas]\n residues = np.r_[residues[0, np.newaxis], residues]\n if alphas[-1] != all_alphas[-1]:\n alphas = np.r_[alphas, all_alphas[-1]]\n residues = np.r_[residues, residues[-1, np.newaxis]]\n this_residues = interpolate.interp1d(alphas,\n residues,\n axis=0)(all_alphas)\n this_residues **= 2\n mse_path[:, index] = np.mean(this_residues, axis=-1)\n\n mask = np.all(np.isfinite(mse_path), axis=-1)\n all_alphas = all_alphas[mask]\n mse_path = mse_path[mask]\n # Select the alpha that minimizes left-out error\n i_best_alpha = np.argmin(mse_path.mean(axis=-1))\n best_alpha = all_alphas[i_best_alpha]\n\n # Store our parameters\n self.alpha_ = best_alpha\n self.cv_alphas_ = all_alphas\n self.mse_path_ = mse_path\n\n # Now compute the full model\n # it will call a lasso internally when self if LassoLarsCV\n # as self.method == 'lasso'\n self._fit(X, y, max_iter=self.max_iter, alpha=best_alpha,\n Xy=None, fit_path=True)\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLars/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._least_angle.LassoLars.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLars/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._least_angle.LassoLars.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLars/__init__/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._least_angle.LassoLars.__init__.alpha", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Constant that multiplies the penalty term. Defaults to 1.0.\n``alpha = 0`` is equivalent to an ordinary least square, solved\nby :class:`LinearRegression`. For numerical reasons, using\n``alpha = 0`` with the LassoLars object is not advised and you\nshould prefer the LinearRegression object." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLars/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._least_angle.LassoLars.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "whether to calculate the intercept for this model. If set\nto false, no intercept will be used in calculations\n(i.e. data is expected to be centered)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLars/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._least_angle.LassoLars.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or int", + "default_value": "False", + "description": "Sets the verbosity amount." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLars/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._least_angle.LassoLars.__init__.normalize", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLars/__init__/precompute", + "name": "precompute", + "qname": "sklearn.linear_model._least_angle.LassoLars.__init__.precompute", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool, 'auto' or array-like", + "default_value": "'auto'", + "description": "Whether to use a precomputed Gram matrix to speed up\ncalculations. If set to ``'auto'`` let us decide. The Gram\nmatrix can also be passed as argument." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "array-like" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLars/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._least_angle.LassoLars.__init__.max_iter", + "default_value": "500", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "500", + "description": "Maximum number of iterations to perform." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLars/__init__/eps", + "name": "eps", + "qname": "sklearn.linear_model._least_angle.LassoLars.__init__.eps", + "default_value": "np.finfo(float).eps", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "np.finfo(float).eps", + "description": "The machine-precision regularization in the computation of the\nCholesky diagonal factors. Increase this for very ill-conditioned\nsystems. Unlike the ``tol`` parameter in some iterative\noptimization-based algorithms, this parameter does not control\nthe tolerance of the optimization." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLars/__init__/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._least_angle.LassoLars.__init__.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, X will be copied; else, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLars/__init__/fit_path", + "name": "fit_path", + "qname": "sklearn.linear_model._least_angle.LassoLars.__init__.fit_path", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``True`` the full path is stored in the ``coef_path_`` attribute.\nIf you compute the solution for a large problem or many targets,\nsetting ``fit_path`` to ``False`` will lead to a speedup, especially\nwith a small alpha." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLars/__init__/positive", + "name": "positive", + "qname": "sklearn.linear_model._least_angle.LassoLars.__init__.positive", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Restrict coefficients to be >= 0. Be aware that you might want to\nremove fit_intercept which is set True by default.\nUnder the positive restriction the model coefficients will not converge\nto the ordinary-least-squares solution for small values of alpha.\nOnly coefficients up to the smallest alpha value (``alphas_[alphas_ >\n0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\nalgorithm are typically in congruence with the solution of the\ncoordinate descent Lasso estimator." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLars/__init__/jitter", + "name": "jitter", + "qname": "sklearn.linear_model._least_angle.LassoLars.__init__.jitter", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Upper bound on a uniform noise parameter to be added to the\n`y` values, to satisfy the model's assumption of\none-at-a-time computations. Might help with stability.\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLars/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._least_angle.LassoLars.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for jittering. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary `. Ignored if `jitter` is None.\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Lasso model fit with Least Angle Regression a.k.a. Lars\n\nIt is a Linear Model trained with an L1 prior as regularizer.\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, alpha=1.0, *, fit_intercept=True, verbose=False,\n normalize=True, precompute='auto', max_iter=500,\n eps=np.finfo(float).eps, copy_X=True, fit_path=True,\n positive=False, jitter=None, random_state=None):\n self.alpha = alpha\n self.fit_intercept = fit_intercept\n self.max_iter = max_iter\n self.verbose = verbose\n self.normalize = normalize\n self.positive = positive\n self.precompute = precompute\n self.copy_X = copy_X\n self.eps = eps\n self.fit_path = fit_path\n self.jitter = jitter\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsCV/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._least_angle.LassoLarsCV.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsCV/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._least_angle.LassoLarsCV.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsCV/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._least_angle.LassoLarsCV.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "whether to calculate the intercept for this model. If set\nto false, no intercept will be used in calculations\n(i.e. data is expected to be centered)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsCV/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._least_angle.LassoLarsCV.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or int", + "default_value": "False", + "description": "Sets the verbosity amount." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsCV/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._least_angle.LassoLarsCV.__init__.max_iter", + "default_value": "500", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "500", + "description": "Maximum number of iterations to perform." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsCV/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._least_angle.LassoLarsCV.__init__.normalize", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsCV/__init__/precompute", + "name": "precompute", + "qname": "sklearn.linear_model._least_angle.LassoLarsCV.__init__.precompute", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or 'auto'", + "default_value": "'auto'", + "description": "Whether to use a precomputed Gram matrix to speed up\ncalculations. If set to ``'auto'`` let us decide. The Gram matrix\ncannot be passed as argument since we will use only subsets of X." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "'auto'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsCV/__init__/cv", + "name": "cv", + "qname": "sklearn.linear_model._least_angle.LassoLarsCV.__init__.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, cross-validation generator or an iterable", + "default_value": "None", + "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, :class:`KFold` is used.\n\nRefer :ref:`User Guide ` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "an iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsCV/__init__/max_n_alphas", + "name": "max_n_alphas", + "qname": "sklearn.linear_model._least_angle.LassoLarsCV.__init__.max_n_alphas", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "The maximum number of points on the path used to compute the\nresiduals in the cross-validation" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsCV/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.linear_model._least_angle.LassoLarsCV.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or None", + "default_value": "None", + "description": "Number of CPUs to use during the cross validation.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsCV/__init__/eps", + "name": "eps", + "qname": "sklearn.linear_model._least_angle.LassoLarsCV.__init__.eps", + "default_value": "np.finfo(float).eps", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "np.finfo(float).eps", + "description": "The machine-precision regularization in the computation of the\nCholesky diagonal factors. Increase this for very ill-conditioned\nsystems. Unlike the ``tol`` parameter in some iterative\noptimization-based algorithms, this parameter does not control\nthe tolerance of the optimization." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsCV/__init__/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._least_angle.LassoLarsCV.__init__.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, X will be copied; else, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsCV/__init__/positive", + "name": "positive", + "qname": "sklearn.linear_model._least_angle.LassoLarsCV.__init__.positive", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Restrict coefficients to be >= 0. Be aware that you might want to\nremove fit_intercept which is set True by default.\nUnder the positive restriction the model coefficients do not converge\nto the ordinary-least-squares solution for small values of alpha.\nOnly coefficients up to the smallest alpha value (``alphas_[alphas_ >\n0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\nalgorithm are typically in congruence with the solution of the\ncoordinate descent Lasso estimator.\nAs a consequence using LassoLarsCV only makes sense for problems where\na sparse solution is expected and/or reached." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Cross-validated Lasso, using the LARS algorithm.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, fit_intercept=True, verbose=False, max_iter=500,\n normalize=True, precompute='auto', cv=None,\n max_n_alphas=1000, n_jobs=None, eps=np.finfo(float).eps,\n copy_X=True, positive=False):\n self.fit_intercept = fit_intercept\n self.verbose = verbose\n self.max_iter = max_iter\n self.normalize = normalize\n self.precompute = precompute\n self.cv = cv\n self.max_n_alphas = max_n_alphas\n self.n_jobs = n_jobs\n self.eps = eps\n self.copy_X = copy_X\n self.positive = positive" + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsIC/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._least_angle.LassoLarsIC.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsIC/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._least_angle.LassoLarsIC.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsIC/__init__/criterion", + "name": "criterion", + "qname": "sklearn.linear_model._least_angle.LassoLarsIC.__init__.criterion", + "default_value": "'aic'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'bic' , 'aic'}", + "default_value": "'aic'", + "description": "The type of criterion to use." + }, + "type": { + "kind": "EnumType", + "values": ["bic", "aic"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsIC/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._least_angle.LassoLarsIC.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "whether to calculate the intercept for this model. If set\nto false, no intercept will be used in calculations\n(i.e. data is expected to be centered)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsIC/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._least_angle.LassoLarsIC.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or int", + "default_value": "False", + "description": "Sets the verbosity amount." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsIC/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._least_angle.LassoLarsIC.__init__.normalize", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsIC/__init__/precompute", + "name": "precompute", + "qname": "sklearn.linear_model._least_angle.LassoLarsIC.__init__.precompute", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool, 'auto' or array-like", + "default_value": "'auto'", + "description": "Whether to use a precomputed Gram matrix to speed up\ncalculations. If set to ``'auto'`` let us decide. The Gram\nmatrix can also be passed as argument." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "array-like" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsIC/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._least_angle.LassoLarsIC.__init__.max_iter", + "default_value": "500", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "500", + "description": "Maximum number of iterations to perform. Can be used for\nearly stopping." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsIC/__init__/eps", + "name": "eps", + "qname": "sklearn.linear_model._least_angle.LassoLarsIC.__init__.eps", + "default_value": "np.finfo(float).eps", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "np.finfo(float).eps", + "description": "The machine-precision regularization in the computation of the\nCholesky diagonal factors. Increase this for very ill-conditioned\nsystems. Unlike the ``tol`` parameter in some iterative\noptimization-based algorithms, this parameter does not control\nthe tolerance of the optimization." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsIC/__init__/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._least_angle.LassoLarsIC.__init__.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, X will be copied; else, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsIC/__init__/positive", + "name": "positive", + "qname": "sklearn.linear_model._least_angle.LassoLarsIC.__init__.positive", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Restrict coefficients to be >= 0. Be aware that you might want to\nremove fit_intercept which is set True by default.\nUnder the positive restriction the model coefficients do not converge\nto the ordinary-least-squares solution for small values of alpha.\nOnly coefficients up to the smallest alpha value (``alphas_[alphas_ >\n0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\nalgorithm are typically in congruence with the solution of the\ncoordinate descent Lasso estimator.\nAs a consequence using LassoLarsIC only makes sense for problems where\na sparse solution is expected and/or reached." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Lasso model fit with Lars using BIC or AIC for model selection\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nAIC is the Akaike information criterion and BIC is the Bayes\nInformation criterion. Such criteria are useful to select the value\nof the regularization parameter by making a trade-off between the\ngoodness of fit and the complexity of the model. A good model should\nexplain well the data while being simple.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, criterion='aic', *, fit_intercept=True, verbose=False,\n normalize=True, precompute='auto', max_iter=500,\n eps=np.finfo(float).eps, copy_X=True, positive=False):\n self.criterion = criterion\n self.fit_intercept = fit_intercept\n self.positive = positive\n self.max_iter = max_iter\n self.verbose = verbose\n self.normalize = normalize\n self.copy_X = copy_X\n self.precompute = precompute\n self.eps = eps\n self.fit_path = True" + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsIC/_more_tags", + "name": "_more_tags", + "qname": "sklearn.linear_model._least_angle.LassoLarsIC._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsIC/_more_tags/self", + "name": "self", + "qname": "sklearn.linear_model._least_angle.LassoLarsIC._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'multioutput': False}" + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsIC/fit", + "name": "fit", + "qname": "sklearn.linear_model._least_angle.LassoLarsIC.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsIC/fit/self", + "name": "self", + "qname": "sklearn.linear_model._least_angle.LassoLarsIC.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsIC/fit/X", + "name": "X", + "qname": "sklearn.linear_model._least_angle.LassoLarsIC.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "training data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsIC/fit/y", + "name": "y", + "qname": "sklearn.linear_model._least_angle.LassoLarsIC.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "target values. Will be cast to X's dtype if necessary" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/LassoLarsIC/fit/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._least_angle.LassoLarsIC.fit.copy_X", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "None", + "description": "If provided, this parameter will override the choice\nof copy_X made at instance creation.\nIf ``True``, X will be copied; else, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model using X, y as training data.", + "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n training data.\n\ny : array-like of shape (n_samples,)\n target values. Will be cast to X's dtype if necessary\n\ncopy_X : bool, default=None\n If provided, this parameter will override the choice\n of copy_X made at instance creation.\n If ``True``, X will be copied; else, it may be overwritten.\n\nReturns\n-------\nself : object\n returns an instance of self.", + "code": " def fit(self, X, y, copy_X=None):\n \"\"\"Fit the model using X, y as training data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n training data.\n\n y : array-like of shape (n_samples,)\n target values. Will be cast to X's dtype if necessary\n\n copy_X : bool, default=None\n If provided, this parameter will override the choice\n of copy_X made at instance creation.\n If ``True``, X will be copied; else, it may be overwritten.\n\n Returns\n -------\n self : object\n returns an instance of self.\n \"\"\"\n if copy_X is None:\n copy_X = self.copy_X\n X, y = self._validate_data(X, y, y_numeric=True)\n\n X, y, Xmean, ymean, Xstd = LinearModel._preprocess_data(\n X, y, self.fit_intercept, self.normalize, copy_X)\n\n Gram = self.precompute\n\n alphas_, _, coef_path_, self.n_iter_ = lars_path(\n X, y, Gram=Gram, copy_X=copy_X, copy_Gram=True, alpha_min=0.0,\n method='lasso', verbose=self.verbose, max_iter=self.max_iter,\n eps=self.eps, return_n_iter=True, positive=self.positive)\n\n n_samples = X.shape[0]\n\n if self.criterion == 'aic':\n K = 2 # AIC\n elif self.criterion == 'bic':\n K = log(n_samples) # BIC\n else:\n raise ValueError('criterion should be either bic or aic')\n\n R = y[:, np.newaxis] - np.dot(X, coef_path_) # residuals\n mean_squared_error = np.mean(R ** 2, axis=0)\n sigma2 = np.var(y)\n\n df = np.zeros(coef_path_.shape[1], dtype=int) # Degrees of freedom\n for k, coef in enumerate(coef_path_.T):\n mask = np.abs(coef) > np.finfo(coef.dtype).eps\n if not np.any(mask):\n continue\n # get the number of degrees of freedom equal to:\n # Xc = X[:, mask]\n # Trace(Xc * inv(Xc.T, Xc) * Xc.T) ie the number of non-zero coefs\n df[k] = np.sum(mask)\n\n self.alphas_ = alphas_\n eps64 = np.finfo('float64').eps\n self.criterion_ = (n_samples * mean_squared_error / (sigma2 + eps64) +\n K * df) # Eqns. 2.15--16 in (Zou et al, 2007)\n n_best = np.argmin(self.criterion_)\n\n self.alpha_ = alphas_[n_best]\n self.coef_ = coef_path_[:, n_best]\n self._set_intercept(Xmean, ymean, Xstd)\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_check_copy_and_writeable", + "name": "_check_copy_and_writeable", + "qname": "sklearn.linear_model._least_angle._check_copy_and_writeable", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_check_copy_and_writeable/array", + "name": "array", + "qname": "sklearn.linear_model._least_angle._check_copy_and_writeable.array", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_check_copy_and_writeable/copy", + "name": "copy", + "qname": "sklearn.linear_model._least_angle._check_copy_and_writeable.copy", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _check_copy_and_writeable(array, copy=False):\n if copy or not array.flags.writeable:\n return array.copy()\n return array" + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_residues", + "name": "_lars_path_residues", + "qname": "sklearn.linear_model._least_angle._lars_path_residues", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_residues/X_train", + "name": "X_train", + "qname": "sklearn.linear_model._least_angle._lars_path_residues.X_train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to fit the LARS on" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_residues/y_train", + "name": "y_train", + "qname": "sklearn.linear_model._least_angle._lars_path_residues.y_train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The target variable to fit LARS on" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_residues/X_test", + "name": "X_test", + "qname": "sklearn.linear_model._least_angle._lars_path_residues.X_test", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to compute the residues on" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_residues/y_test", + "name": "y_test", + "qname": "sklearn.linear_model._least_angle._lars_path_residues.y_test", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The target variable to compute the residues on" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_residues/Gram", + "name": "Gram", + "qname": "sklearn.linear_model._least_angle._lars_path_residues.Gram", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None, 'auto' or array-like of shape (n_features, n_features)", + "default_value": "None", + "description": "Precomputed Gram matrix (X' * X), if ``'auto'``, the Gram\nmatrix is precomputed from the given X, if there are more samples\nthan features" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "None" + }, + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_features, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_residues/copy", + "name": "copy", + "qname": "sklearn.linear_model._least_angle._lars_path_residues.copy", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether X_train, X_test, y_train and y_test should be copied;\nif False, they may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_residues/method", + "name": "method", + "qname": "sklearn.linear_model._least_angle._lars_path_residues.method", + "default_value": "'lars'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'lar' , 'lasso'}", + "default_value": "'lar'", + "description": "Specifies the returned model. Select ``'lar'`` for Least Angle\nRegression, ``'lasso'`` for the Lasso." + }, + "type": { + "kind": "EnumType", + "values": ["lasso", "lar"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_residues/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._least_angle._lars_path_residues.verbose", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool or int", + "default_value": "False", + "description": "Sets the amount of verbosity" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_residues/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._least_angle._lars_path_residues.fit_intercept", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "whether to calculate the intercept for this model. If set\nto false, no intercept will be used in calculations\n(i.e. data is expected to be centered)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_residues/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._least_angle._lars_path_residues.normalize", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_residues/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._least_angle._lars_path_residues.max_iter", + "default_value": "500", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "500", + "description": "Maximum number of iterations to perform." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_residues/eps", + "name": "eps", + "qname": "sklearn.linear_model._least_angle._lars_path_residues.eps", + "default_value": "np.finfo(float).eps", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "np.finfo(float).eps", + "description": "The machine-precision regularization in the computation of the\nCholesky diagonal factors. Increase this for very ill-conditioned\nsystems. Unlike the ``tol`` parameter in some iterative\noptimization-based algorithms, this parameter does not control\nthe tolerance of the optimization." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_residues/positive", + "name": "positive", + "qname": "sklearn.linear_model._least_angle._lars_path_residues.positive", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Restrict coefficients to be >= 0. Be aware that you might want to\nremove fit_intercept which is set True by default.\nSee reservations for using this option in combination with method\n'lasso' for expected small values of alpha in the doc of LassoLarsCV\nand LassoLarsIC." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the residues on left-out data for a full LARS path", + "docstring": "Compute the residues on left-out data for a full LARS path\n\nParameters\n-----------\nX_train : array-like of shape (n_samples, n_features)\n The data to fit the LARS on\n\ny_train : array-like of shape (n_samples,)\n The target variable to fit LARS on\n\nX_test : array-like of shape (n_samples, n_features)\n The data to compute the residues on\n\ny_test : array-like of shape (n_samples,)\n The target variable to compute the residues on\n\nGram : None, 'auto' or array-like of shape (n_features, n_features), default=None\n Precomputed Gram matrix (X' * X), if ``'auto'``, the Gram\n matrix is precomputed from the given X, if there are more samples\n than features\n\ncopy : bool, default=True\n Whether X_train, X_test, y_train and y_test should be copied;\n if False, they may be overwritten.\n\nmethod : {'lar' , 'lasso'}, default='lar'\n Specifies the returned model. Select ``'lar'`` for Least Angle\n Regression, ``'lasso'`` for the Lasso.\n\nverbose : bool or int, default=False\n Sets the amount of verbosity\n\nfit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\npositive : bool, default=False\n Restrict coefficients to be >= 0. Be aware that you might want to\n remove fit_intercept which is set True by default.\n See reservations for using this option in combination with method\n 'lasso' for expected small values of alpha in the doc of LassoLarsCV\n and LassoLarsIC.\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nmax_iter : int, default=500\n Maximum number of iterations to perform.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\nReturns\n--------\nalphas : array-like of shape (n_alphas,)\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter`` or ``n_features``, whichever\n is smaller.\n\nactive : list\n Indices of active variables at the end of the path.\n\ncoefs : array-like of shape (n_features, n_alphas)\n Coefficients along the path\n\nresidues : array-like of shape (n_alphas, n_samples)\n Residues of the prediction on the test data", + "code": "def _lars_path_residues(X_train, y_train, X_test, y_test, Gram=None,\n copy=True, method='lars', verbose=False,\n fit_intercept=True, normalize=True, max_iter=500,\n eps=np.finfo(float).eps, positive=False):\n \"\"\"Compute the residues on left-out data for a full LARS path\n\n Parameters\n -----------\n X_train : array-like of shape (n_samples, n_features)\n The data to fit the LARS on\n\n y_train : array-like of shape (n_samples,)\n The target variable to fit LARS on\n\n X_test : array-like of shape (n_samples, n_features)\n The data to compute the residues on\n\n y_test : array-like of shape (n_samples,)\n The target variable to compute the residues on\n\n Gram : None, 'auto' or array-like of shape (n_features, n_features), \\\n default=None\n Precomputed Gram matrix (X' * X), if ``'auto'``, the Gram\n matrix is precomputed from the given X, if there are more samples\n than features\n\n copy : bool, default=True\n Whether X_train, X_test, y_train and y_test should be copied;\n if False, they may be overwritten.\n\n method : {'lar' , 'lasso'}, default='lar'\n Specifies the returned model. Select ``'lar'`` for Least Angle\n Regression, ``'lasso'`` for the Lasso.\n\n verbose : bool or int, default=False\n Sets the amount of verbosity\n\n fit_intercept : bool, default=True\n whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\n positive : bool, default=False\n Restrict coefficients to be >= 0. Be aware that you might want to\n remove fit_intercept which is set True by default.\n See reservations for using this option in combination with method\n 'lasso' for expected small values of alpha in the doc of LassoLarsCV\n and LassoLarsIC.\n\n normalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n max_iter : int, default=500\n Maximum number of iterations to perform.\n\n eps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\n Returns\n --------\n alphas : array-like of shape (n_alphas,)\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter`` or ``n_features``, whichever\n is smaller.\n\n active : list\n Indices of active variables at the end of the path.\n\n coefs : array-like of shape (n_features, n_alphas)\n Coefficients along the path\n\n residues : array-like of shape (n_alphas, n_samples)\n Residues of the prediction on the test data\n \"\"\"\n X_train = _check_copy_and_writeable(X_train, copy)\n y_train = _check_copy_and_writeable(y_train, copy)\n X_test = _check_copy_and_writeable(X_test, copy)\n y_test = _check_copy_and_writeable(y_test, copy)\n\n if fit_intercept:\n X_mean = X_train.mean(axis=0)\n X_train -= X_mean\n X_test -= X_mean\n y_mean = y_train.mean(axis=0)\n y_train = as_float_array(y_train, copy=False)\n y_train -= y_mean\n y_test = as_float_array(y_test, copy=False)\n y_test -= y_mean\n\n if normalize:\n norms = np.sqrt(np.sum(X_train ** 2, axis=0))\n nonzeros = np.flatnonzero(norms)\n X_train[:, nonzeros] /= norms[nonzeros]\n\n alphas, active, coefs = lars_path(\n X_train, y_train, Gram=Gram, copy_X=False, copy_Gram=False,\n method=method, verbose=max(0, verbose - 1), max_iter=max_iter, eps=eps,\n positive=positive)\n if normalize:\n coefs[nonzeros] /= norms[nonzeros][:, np.newaxis]\n residues = np.dot(X_test, coefs) - y_test[:, np.newaxis]\n return alphas, active, coefs, residues.T" + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_solver", + "name": "_lars_path_solver", + "qname": "sklearn.linear_model._least_angle._lars_path_solver", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_solver/X", + "name": "X", + "qname": "sklearn.linear_model._least_angle._lars_path_solver.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None or ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data. Note that if X is None then Gram must be specified,\ni.e., cannot be None or False." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "None" + }, + { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_solver/y", + "name": "y", + "qname": "sklearn.linear_model._least_angle._lars_path_solver.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None or ndarray of shape (n_samples,)", + "default_value": "", + "description": "Input targets." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "None" + }, + { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_solver/Xy", + "name": "Xy", + "qname": "sklearn.linear_model._least_angle._lars_path_solver.Xy", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "None", + "description": "`Xy = np.dot(X.T, y)` that can be precomputed. It is useful\nonly when the Gram matrix is precomputed." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_targets)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_solver/Gram", + "name": "Gram", + "qname": "sklearn.linear_model._least_angle._lars_path_solver.Gram", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None, 'auto' or array-like of shape (n_features, n_features)", + "default_value": "None", + "description": "Precomputed Gram matrix `(X' * X)`, if ``'auto'``, the Gram\nmatrix is precomputed from the given X, if there are more samples\nthan features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "None" + }, + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_features, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_solver/n_samples", + "name": "n_samples", + "qname": "sklearn.linear_model._least_angle._lars_path_solver.n_samples", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "None", + "description": "Equivalent size of sample. If `None`, it will be `n_samples`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_solver/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._least_angle._lars_path_solver.max_iter", + "default_value": "500", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "500", + "description": "Maximum number of iterations to perform, set to infinity for no limit." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_solver/alpha_min", + "name": "alpha_min", + "qname": "sklearn.linear_model._least_angle._lars_path_solver.alpha_min", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0", + "description": "Minimum correlation along the path. It corresponds to the\nregularization parameter alpha parameter in the Lasso." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_solver/method", + "name": "method", + "qname": "sklearn.linear_model._least_angle._lars_path_solver.method", + "default_value": "'lar'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'lar', 'lasso'}", + "default_value": "'lar'", + "description": "Specifies the returned model. Select ``'lar'`` for Least Angle\nRegression, ``'lasso'`` for the Lasso." + }, + "type": { + "kind": "EnumType", + "values": ["lasso", "lar"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_solver/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._least_angle._lars_path_solver.copy_X", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``False``, ``X`` is overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_solver/eps", + "name": "eps", + "qname": "sklearn.linear_model._least_angle._lars_path_solver.eps", + "default_value": "np.finfo(float).eps", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "np.finfo(float).eps", + "description": "The machine-precision regularization in the computation of the\nCholesky diagonal factors. Increase this for very ill-conditioned\nsystems. Unlike the ``tol`` parameter in some iterative\noptimization-based algorithms, this parameter does not control\nthe tolerance of the optimization." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_solver/copy_Gram", + "name": "copy_Gram", + "qname": "sklearn.linear_model._least_angle._lars_path_solver.copy_Gram", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``False``, ``Gram`` is overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_solver/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._least_angle._lars_path_solver.verbose", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Controls output verbosity." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_solver/return_path", + "name": "return_path", + "qname": "sklearn.linear_model._least_angle._lars_path_solver.return_path", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``return_path==True`` returns the entire path, else returns only the\nlast point of the path." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_solver/return_n_iter", + "name": "return_n_iter", + "qname": "sklearn.linear_model._least_angle._lars_path_solver.return_n_iter", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to return the number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/_lars_path_solver/positive", + "name": "positive", + "qname": "sklearn.linear_model._least_angle._lars_path_solver.positive", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Restrict coefficients to be >= 0.\nThis option is only allowed with method 'lasso'. Note that the model\ncoefficients will not converge to the ordinary-least-squares solution\nfor small values of alpha. Only coefficients up to the smallest alpha\nvalue (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by\nthe stepwise Lars-Lasso algorithm are typically in congruence with the\nsolution of the coordinate descent lasso_path function." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute Least Angle Regression or Lasso path using LARS algorithm [1]\n\nThe optimization objective for the case method='lasso' is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nin the case of method='lars', the objective function is only known in\nthe form of an implicit equation (see discussion in [1])\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute Least Angle Regression or Lasso path using LARS algorithm [1]\n\nThe optimization objective for the case method='lasso' is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nin the case of method='lars', the objective function is only known in\nthe form of an implicit equation (see discussion in [1])\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : None or ndarray of shape (n_samples, n_features)\n Input data. Note that if X is None then Gram must be specified,\n i.e., cannot be None or False.\n\ny : None or ndarray of shape (n_samples,)\n Input targets.\n\nXy : array-like of shape (n_samples,) or (n_samples, n_targets), default=None\n `Xy = np.dot(X.T, y)` that can be precomputed. It is useful\n only when the Gram matrix is precomputed.\n\nGram : None, 'auto' or array-like of shape (n_features, n_features), default=None\n Precomputed Gram matrix `(X' * X)`, if ``'auto'``, the Gram\n matrix is precomputed from the given X, if there are more samples\n than features.\n\nn_samples : int or float, default=None\n Equivalent size of sample. If `None`, it will be `n_samples`.\n\nmax_iter : int, default=500\n Maximum number of iterations to perform, set to infinity for no limit.\n\nalpha_min : float, default=0\n Minimum correlation along the path. It corresponds to the\n regularization parameter alpha parameter in the Lasso.\n\nmethod : {'lar', 'lasso'}, default='lar'\n Specifies the returned model. Select ``'lar'`` for Least Angle\n Regression, ``'lasso'`` for the Lasso.\n\ncopy_X : bool, default=True\n If ``False``, ``X`` is overwritten.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\ncopy_Gram : bool, default=True\n If ``False``, ``Gram`` is overwritten.\n\nverbose : int, default=0\n Controls output verbosity.\n\nreturn_path : bool, default=True\n If ``return_path==True`` returns the entire path, else returns only the\n last point of the path.\n\nreturn_n_iter : bool, default=False\n Whether to return the number of iterations.\n\npositive : bool, default=False\n Restrict coefficients to be >= 0.\n This option is only allowed with method 'lasso'. Note that the model\n coefficients will not converge to the ordinary-least-squares solution\n for small values of alpha. Only coefficients up to the smallest alpha\n value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by\n the stepwise Lars-Lasso algorithm are typically in congruence with the\n solution of the coordinate descent lasso_path function.\n\nReturns\n-------\nalphas : array-like of shape (n_alphas + 1,)\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n number of nodes in the path with ``alpha >= alpha_min``, whichever\n is smaller.\n\nactive : array-like of shape (n_alphas,)\n Indices of active variables at the end of the path.\n\ncoefs : array-like of shape (n_features, n_alphas + 1)\n Coefficients along the path\n\nn_iter : int\n Number of iterations run. Returned only if return_n_iter is set\n to True.\n\nSee Also\n--------\nlasso_path\nLassoLars\nLars\nLassoLarsCV\nLarsCV\nsklearn.decomposition.sparse_encode\n\nReferences\n----------\n.. [1] \"Least Angle Regression\", Efron et al.\n http://statweb.stanford.edu/~tibs/ftp/lars.pdf\n\n.. [2] `Wikipedia entry on the Least-angle regression\n `_\n\n.. [3] `Wikipedia entry on the Lasso\n `_", + "code": "def _lars_path_solver(\n X,\n y,\n Xy=None,\n Gram=None,\n n_samples=None,\n max_iter=500,\n alpha_min=0,\n method=\"lar\",\n copy_X=True,\n eps=np.finfo(float).eps,\n copy_Gram=True,\n verbose=0,\n return_path=True,\n return_n_iter=False,\n positive=False,\n):\n \"\"\"Compute Least Angle Regression or Lasso path using LARS algorithm [1]\n\n The optimization objective for the case method='lasso' is::\n\n (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\n in the case of method='lars', the objective function is only known in\n the form of an implicit equation (see discussion in [1])\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : None or ndarray of shape (n_samples, n_features)\n Input data. Note that if X is None then Gram must be specified,\n i.e., cannot be None or False.\n\n y : None or ndarray of shape (n_samples,)\n Input targets.\n\n Xy : array-like of shape (n_samples,) or (n_samples, n_targets), \\\n default=None\n `Xy = np.dot(X.T, y)` that can be precomputed. It is useful\n only when the Gram matrix is precomputed.\n\n Gram : None, 'auto' or array-like of shape (n_features, n_features), \\\n default=None\n Precomputed Gram matrix `(X' * X)`, if ``'auto'``, the Gram\n matrix is precomputed from the given X, if there are more samples\n than features.\n\n n_samples : int or float, default=None\n Equivalent size of sample. If `None`, it will be `n_samples`.\n\n max_iter : int, default=500\n Maximum number of iterations to perform, set to infinity for no limit.\n\n alpha_min : float, default=0\n Minimum correlation along the path. It corresponds to the\n regularization parameter alpha parameter in the Lasso.\n\n method : {'lar', 'lasso'}, default='lar'\n Specifies the returned model. Select ``'lar'`` for Least Angle\n Regression, ``'lasso'`` for the Lasso.\n\n copy_X : bool, default=True\n If ``False``, ``X`` is overwritten.\n\n eps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\n copy_Gram : bool, default=True\n If ``False``, ``Gram`` is overwritten.\n\n verbose : int, default=0\n Controls output verbosity.\n\n return_path : bool, default=True\n If ``return_path==True`` returns the entire path, else returns only the\n last point of the path.\n\n return_n_iter : bool, default=False\n Whether to return the number of iterations.\n\n positive : bool, default=False\n Restrict coefficients to be >= 0.\n This option is only allowed with method 'lasso'. Note that the model\n coefficients will not converge to the ordinary-least-squares solution\n for small values of alpha. Only coefficients up to the smallest alpha\n value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by\n the stepwise Lars-Lasso algorithm are typically in congruence with the\n solution of the coordinate descent lasso_path function.\n\n Returns\n -------\n alphas : array-like of shape (n_alphas + 1,)\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n number of nodes in the path with ``alpha >= alpha_min``, whichever\n is smaller.\n\n active : array-like of shape (n_alphas,)\n Indices of active variables at the end of the path.\n\n coefs : array-like of shape (n_features, n_alphas + 1)\n Coefficients along the path\n\n n_iter : int\n Number of iterations run. Returned only if return_n_iter is set\n to True.\n\n See Also\n --------\n lasso_path\n LassoLars\n Lars\n LassoLarsCV\n LarsCV\n sklearn.decomposition.sparse_encode\n\n References\n ----------\n .. [1] \"Least Angle Regression\", Efron et al.\n http://statweb.stanford.edu/~tibs/ftp/lars.pdf\n\n .. [2] `Wikipedia entry on the Least-angle regression\n `_\n\n .. [3] `Wikipedia entry on the Lasso\n `_\n\n \"\"\"\n if method == \"lar\" and positive:\n raise ValueError(\n \"Positive constraint not supported for 'lar' \" \"coding method.\"\n )\n\n n_samples = n_samples if n_samples is not None else y.size\n\n if Xy is None:\n Cov = np.dot(X.T, y)\n else:\n Cov = Xy.copy()\n\n if Gram is None or Gram is False:\n Gram = None\n if X is None:\n raise ValueError('X and Gram cannot both be unspecified.')\n elif isinstance(Gram, str) and Gram == 'auto' or Gram is True:\n if Gram is True or X.shape[0] > X.shape[1]:\n Gram = np.dot(X.T, X)\n else:\n Gram = None\n elif copy_Gram:\n Gram = Gram.copy()\n\n if Gram is None:\n n_features = X.shape[1]\n else:\n n_features = Cov.shape[0]\n if Gram.shape != (n_features, n_features):\n raise ValueError('The shapes of the inputs Gram and Xy'\n ' do not match.')\n\n if copy_X and X is not None and Gram is None:\n # force copy. setting the array to be fortran-ordered\n # speeds up the calculation of the (partial) Gram matrix\n # and allows to easily swap columns\n X = X.copy('F')\n\n max_features = min(max_iter, n_features)\n\n if return_path:\n coefs = np.zeros((max_features + 1, n_features))\n alphas = np.zeros(max_features + 1)\n else:\n coef, prev_coef = np.zeros(n_features), np.zeros(n_features)\n alpha, prev_alpha = np.array([0.]), np.array([0.]) # better ideas?\n\n n_iter, n_active = 0, 0\n active, indices = list(), np.arange(n_features)\n # holds the sign of covariance\n sign_active = np.empty(max_features, dtype=np.int8)\n drop = False\n\n # will hold the cholesky factorization. Only lower part is\n # referenced.\n if Gram is None:\n L = np.empty((max_features, max_features), dtype=X.dtype)\n swap, nrm2 = linalg.get_blas_funcs(('swap', 'nrm2'), (X,))\n else:\n L = np.empty((max_features, max_features), dtype=Gram.dtype)\n swap, nrm2 = linalg.get_blas_funcs(('swap', 'nrm2'), (Cov,))\n solve_cholesky, = get_lapack_funcs(('potrs',), (L,))\n\n if verbose:\n if verbose > 1:\n print(\"Step\\t\\tAdded\\t\\tDropped\\t\\tActive set size\\t\\tC\")\n else:\n sys.stdout.write('.')\n sys.stdout.flush()\n\n tiny32 = np.finfo(np.float32).tiny # to avoid division by 0 warning\n equality_tolerance = np.finfo(np.float32).eps\n\n if Gram is not None:\n Gram_copy = Gram.copy()\n Cov_copy = Cov.copy()\n\n while True:\n if Cov.size:\n if positive:\n C_idx = np.argmax(Cov)\n else:\n C_idx = np.argmax(np.abs(Cov))\n\n C_ = Cov[C_idx]\n\n if positive:\n C = C_\n else:\n C = np.fabs(C_)\n else:\n C = 0.\n\n if return_path:\n alpha = alphas[n_iter, np.newaxis]\n coef = coefs[n_iter]\n prev_alpha = alphas[n_iter - 1, np.newaxis]\n prev_coef = coefs[n_iter - 1]\n\n alpha[0] = C / n_samples\n if alpha[0] <= alpha_min + equality_tolerance: # early stopping\n if abs(alpha[0] - alpha_min) > equality_tolerance:\n # interpolation factor 0 <= ss < 1\n if n_iter > 0:\n # In the first iteration, all alphas are zero, the formula\n # below would make ss a NaN\n ss = ((prev_alpha[0] - alpha_min) /\n (prev_alpha[0] - alpha[0]))\n coef[:] = prev_coef + ss * (coef - prev_coef)\n alpha[0] = alpha_min\n if return_path:\n coefs[n_iter] = coef\n break\n\n if n_iter >= max_iter or n_active >= n_features:\n break\n if not drop:\n\n ##########################################################\n # Append x_j to the Cholesky factorization of (Xa * Xa') #\n # #\n # ( L 0 ) #\n # L -> ( ) , where L * w = Xa' x_j #\n # ( w z ) and z = ||x_j|| #\n # #\n ##########################################################\n\n if positive:\n sign_active[n_active] = np.ones_like(C_)\n else:\n sign_active[n_active] = np.sign(C_)\n m, n = n_active, C_idx + n_active\n\n Cov[C_idx], Cov[0] = swap(Cov[C_idx], Cov[0])\n indices[n], indices[m] = indices[m], indices[n]\n Cov_not_shortened = Cov\n Cov = Cov[1:] # remove Cov[0]\n\n if Gram is None:\n X.T[n], X.T[m] = swap(X.T[n], X.T[m])\n c = nrm2(X.T[n_active]) ** 2\n L[n_active, :n_active] = \\\n np.dot(X.T[n_active], X.T[:n_active].T)\n else:\n # swap does only work inplace if matrix is fortran\n # contiguous ...\n Gram[m], Gram[n] = swap(Gram[m], Gram[n])\n Gram[:, m], Gram[:, n] = swap(Gram[:, m], Gram[:, n])\n c = Gram[n_active, n_active]\n L[n_active, :n_active] = Gram[n_active, :n_active]\n\n # Update the cholesky decomposition for the Gram matrix\n if n_active:\n linalg.solve_triangular(L[:n_active, :n_active],\n L[n_active, :n_active],\n trans=0, lower=1,\n overwrite_b=True,\n **SOLVE_TRIANGULAR_ARGS)\n\n v = np.dot(L[n_active, :n_active], L[n_active, :n_active])\n diag = max(np.sqrt(np.abs(c - v)), eps)\n L[n_active, n_active] = diag\n\n if diag < 1e-7:\n # The system is becoming too ill-conditioned.\n # We have degenerate vectors in our active set.\n # We'll 'drop for good' the last regressor added.\n\n # Note: this case is very rare. It is no longer triggered by\n # the test suite. The `equality_tolerance` margin added in 0.16\n # to get early stopping to work consistently on all versions of\n # Python including 32 bit Python under Windows seems to make it\n # very difficult to trigger the 'drop for good' strategy.\n warnings.warn('Regressors in active set degenerate. '\n 'Dropping a regressor, after %i iterations, '\n 'i.e. alpha=%.3e, '\n 'with an active set of %i regressors, and '\n 'the smallest cholesky pivot element being %.3e.'\n ' Reduce max_iter or increase eps parameters.'\n % (n_iter, alpha, n_active, diag),\n ConvergenceWarning)\n\n # XXX: need to figure a 'drop for good' way\n Cov = Cov_not_shortened\n Cov[0] = 0\n Cov[C_idx], Cov[0] = swap(Cov[C_idx], Cov[0])\n continue\n\n active.append(indices[n_active])\n n_active += 1\n\n if verbose > 1:\n print(\"%s\\t\\t%s\\t\\t%s\\t\\t%s\\t\\t%s\" % (n_iter, active[-1], '',\n n_active, C))\n\n if method == 'lasso' and n_iter > 0 and prev_alpha[0] < alpha[0]:\n # alpha is increasing. This is because the updates of Cov are\n # bringing in too much numerical error that is greater than\n # than the remaining correlation with the\n # regressors. Time to bail out\n warnings.warn('Early stopping the lars path, as the residues '\n 'are small and the current value of alpha is no '\n 'longer well controlled. %i iterations, alpha=%.3e, '\n 'previous alpha=%.3e, with an active set of %i '\n 'regressors.'\n % (n_iter, alpha, prev_alpha, n_active),\n ConvergenceWarning)\n break\n\n # least squares solution\n least_squares, _ = solve_cholesky(L[:n_active, :n_active],\n sign_active[:n_active],\n lower=True)\n\n if least_squares.size == 1 and least_squares == 0:\n # This happens because sign_active[:n_active] = 0\n least_squares[...] = 1\n AA = 1.\n else:\n # is this really needed ?\n AA = 1. / np.sqrt(np.sum(least_squares * sign_active[:n_active]))\n\n if not np.isfinite(AA):\n # L is too ill-conditioned\n i = 0\n L_ = L[:n_active, :n_active].copy()\n while not np.isfinite(AA):\n L_.flat[::n_active + 1] += (2 ** i) * eps\n least_squares, _ = solve_cholesky(\n L_, sign_active[:n_active], lower=True)\n tmp = max(np.sum(least_squares * sign_active[:n_active]),\n eps)\n AA = 1. / np.sqrt(tmp)\n i += 1\n least_squares *= AA\n\n if Gram is None:\n # equiangular direction of variables in the active set\n eq_dir = np.dot(X.T[:n_active].T, least_squares)\n # correlation between each unactive variables and\n # eqiangular vector\n corr_eq_dir = np.dot(X.T[n_active:], eq_dir)\n else:\n # if huge number of features, this takes 50% of time, I\n # think could be avoided if we just update it using an\n # orthogonal (QR) decomposition of X\n corr_eq_dir = np.dot(Gram[:n_active, n_active:].T,\n least_squares)\n\n g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny32))\n if positive:\n gamma_ = min(g1, C / AA)\n else:\n g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir + tiny32))\n gamma_ = min(g1, g2, C / AA)\n\n # TODO: better names for these variables: z\n drop = False\n z = -coef[active] / (least_squares + tiny32)\n z_pos = arrayfuncs.min_pos(z)\n if z_pos < gamma_:\n # some coefficients have changed sign\n idx = np.where(z == z_pos)[0][::-1]\n\n # update the sign, important for LAR\n sign_active[idx] = -sign_active[idx]\n\n if method == 'lasso':\n gamma_ = z_pos\n drop = True\n\n n_iter += 1\n\n if return_path:\n if n_iter >= coefs.shape[0]:\n del coef, alpha, prev_alpha, prev_coef\n # resize the coefs and alphas array\n add_features = 2 * max(1, (max_features - n_active))\n coefs = np.resize(coefs, (n_iter + add_features, n_features))\n coefs[-add_features:] = 0\n alphas = np.resize(alphas, n_iter + add_features)\n alphas[-add_features:] = 0\n coef = coefs[n_iter]\n prev_coef = coefs[n_iter - 1]\n else:\n # mimic the effect of incrementing n_iter on the array references\n prev_coef = coef\n prev_alpha[0] = alpha[0]\n coef = np.zeros_like(coef)\n\n coef[active] = prev_coef[active] + gamma_ * least_squares\n\n # update correlations\n Cov -= gamma_ * corr_eq_dir\n\n # See if any coefficient has changed sign\n if drop and method == 'lasso':\n\n # handle the case when idx is not length of 1\n for ii in idx:\n arrayfuncs.cholesky_delete(L[:n_active, :n_active], ii)\n\n n_active -= 1\n # handle the case when idx is not length of 1\n drop_idx = [active.pop(ii) for ii in idx]\n\n if Gram is None:\n # propagate dropped variable\n for ii in idx:\n for i in range(ii, n_active):\n X.T[i], X.T[i + 1] = swap(X.T[i], X.T[i + 1])\n # yeah this is stupid\n indices[i], indices[i + 1] = indices[i + 1], indices[i]\n\n # TODO: this could be updated\n residual = y - np.dot(X[:, :n_active], coef[active])\n temp = np.dot(X.T[n_active], residual)\n\n Cov = np.r_[temp, Cov]\n else:\n for ii in idx:\n for i in range(ii, n_active):\n indices[i], indices[i + 1] = indices[i + 1], indices[i]\n Gram[i], Gram[i + 1] = swap(Gram[i], Gram[i + 1])\n Gram[:, i], Gram[:, i + 1] = swap(Gram[:, i],\n Gram[:, i + 1])\n\n # Cov_n = Cov_j + x_j * X + increment(betas) TODO:\n # will this still work with multiple drops ?\n\n # recompute covariance. Probably could be done better\n # wrong as Xy is not swapped with the rest of variables\n\n # TODO: this could be updated\n temp = Cov_copy[drop_idx] - np.dot(Gram_copy[drop_idx], coef)\n Cov = np.r_[temp, Cov]\n\n sign_active = np.delete(sign_active, idx)\n sign_active = np.append(sign_active, 0.) # just to maintain size\n if verbose > 1:\n print(\"%s\\t\\t%s\\t\\t%s\\t\\t%s\\t\\t%s\" % (n_iter, '', drop_idx,\n n_active, abs(temp)))\n\n if return_path:\n # resize coefs in case of early stop\n alphas = alphas[:n_iter + 1]\n coefs = coefs[:n_iter + 1]\n\n if return_n_iter:\n return alphas, active, coefs.T, n_iter\n else:\n return alphas, active, coefs.T\n else:\n if return_n_iter:\n return alpha, active, coef, n_iter\n else:\n return alpha, active, coef" + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path", + "name": "lars_path", + "qname": "sklearn.linear_model._least_angle.lars_path", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path/X", + "name": "X", + "qname": "sklearn.linear_model._least_angle.lars_path.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None or array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data. Note that if X is None then the Gram matrix must be\nspecified, i.e., cannot be None or False." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "None" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path/y", + "name": "y", + "qname": "sklearn.linear_model._least_angle.lars_path.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None or array-like of shape (n_samples,)", + "default_value": "", + "description": "Input targets." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "None" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path/Xy", + "name": "Xy", + "qname": "sklearn.linear_model._least_angle.lars_path.Xy", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "None", + "description": "Xy = np.dot(X.T, y) that can be precomputed. It is useful\nonly when the Gram matrix is precomputed." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_targets)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path/Gram", + "name": "Gram", + "qname": "sklearn.linear_model._least_angle.lars_path.Gram", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "None, 'auto', array-like of shape (n_features, n_features)", + "default_value": "None", + "description": "Precomputed Gram matrix (X' * X), if ``'auto'``, the Gram\nmatrix is precomputed from the given X, if there are more samples\nthan features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "None" + }, + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_features, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._least_angle.lars_path.max_iter", + "default_value": "500", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "500", + "description": "Maximum number of iterations to perform, set to infinity for no limit." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path/alpha_min", + "name": "alpha_min", + "qname": "sklearn.linear_model._least_angle.lars_path.alpha_min", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0", + "description": "Minimum correlation along the path. It corresponds to the\nregularization parameter alpha parameter in the Lasso." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path/method", + "name": "method", + "qname": "sklearn.linear_model._least_angle.lars_path.method", + "default_value": "'lar'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'lar', 'lasso'}", + "default_value": "'lar'", + "description": "Specifies the returned model. Select ``'lar'`` for Least Angle\nRegression, ``'lasso'`` for the Lasso." + }, + "type": { + "kind": "EnumType", + "values": ["lasso", "lar"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._least_angle.lars_path.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``False``, ``X`` is overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path/eps", + "name": "eps", + "qname": "sklearn.linear_model._least_angle.lars_path.eps", + "default_value": "np.finfo(float).eps", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "np.finfo(float).eps", + "description": "The machine-precision regularization in the computation of the\nCholesky diagonal factors. Increase this for very ill-conditioned\nsystems. Unlike the ``tol`` parameter in some iterative\noptimization-based algorithms, this parameter does not control\nthe tolerance of the optimization." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path/copy_Gram", + "name": "copy_Gram", + "qname": "sklearn.linear_model._least_angle.lars_path.copy_Gram", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``False``, ``Gram`` is overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._least_angle.lars_path.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Controls output verbosity." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path/return_path", + "name": "return_path", + "qname": "sklearn.linear_model._least_angle.lars_path.return_path", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``return_path==True`` returns the entire path, else returns only the\nlast point of the path." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path/return_n_iter", + "name": "return_n_iter", + "qname": "sklearn.linear_model._least_angle.lars_path.return_n_iter", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to return the number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path/positive", + "name": "positive", + "qname": "sklearn.linear_model._least_angle.lars_path.positive", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Restrict coefficients to be >= 0.\nThis option is only allowed with method 'lasso'. Note that the model\ncoefficients will not converge to the ordinary-least-squares solution\nfor small values of alpha. Only coefficients up to the smallest alpha\nvalue (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by\nthe stepwise Lars-Lasso algorithm are typically in congruence with the\nsolution of the coordinate descent lasso_path function." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute Least Angle Regression or Lasso path using LARS algorithm [1]\n\nThe optimization objective for the case method='lasso' is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nin the case of method='lars', the objective function is only known in\nthe form of an implicit equation (see discussion in [1])\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute Least Angle Regression or Lasso path using LARS algorithm [1]\n\nThe optimization objective for the case method='lasso' is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nin the case of method='lars', the objective function is only known in\nthe form of an implicit equation (see discussion in [1])\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : None or array-like of shape (n_samples, n_features)\n Input data. Note that if X is None then the Gram matrix must be\n specified, i.e., cannot be None or False.\n\ny : None or array-like of shape (n_samples,)\n Input targets.\n\nXy : array-like of shape (n_samples,) or (n_samples, n_targets), default=None\n Xy = np.dot(X.T, y) that can be precomputed. It is useful\n only when the Gram matrix is precomputed.\n\nGram : None, 'auto', array-like of shape (n_features, n_features), default=None\n Precomputed Gram matrix (X' * X), if ``'auto'``, the Gram\n matrix is precomputed from the given X, if there are more samples\n than features.\n\nmax_iter : int, default=500\n Maximum number of iterations to perform, set to infinity for no limit.\n\nalpha_min : float, default=0\n Minimum correlation along the path. It corresponds to the\n regularization parameter alpha parameter in the Lasso.\n\nmethod : {'lar', 'lasso'}, default='lar'\n Specifies the returned model. Select ``'lar'`` for Least Angle\n Regression, ``'lasso'`` for the Lasso.\n\ncopy_X : bool, default=True\n If ``False``, ``X`` is overwritten.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\ncopy_Gram : bool, default=True\n If ``False``, ``Gram`` is overwritten.\n\nverbose : int, default=0\n Controls output verbosity.\n\nreturn_path : bool, default=True\n If ``return_path==True`` returns the entire path, else returns only the\n last point of the path.\n\nreturn_n_iter : bool, default=False\n Whether to return the number of iterations.\n\npositive : bool, default=False\n Restrict coefficients to be >= 0.\n This option is only allowed with method 'lasso'. Note that the model\n coefficients will not converge to the ordinary-least-squares solution\n for small values of alpha. Only coefficients up to the smallest alpha\n value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by\n the stepwise Lars-Lasso algorithm are typically in congruence with the\n solution of the coordinate descent lasso_path function.\n\nReturns\n-------\nalphas : array-like of shape (n_alphas + 1,)\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n number of nodes in the path with ``alpha >= alpha_min``, whichever\n is smaller.\n\nactive : array-like of shape (n_alphas,)\n Indices of active variables at the end of the path.\n\ncoefs : array-like of shape (n_features, n_alphas + 1)\n Coefficients along the path\n\nn_iter : int\n Number of iterations run. Returned only if return_n_iter is set\n to True.\n\nSee Also\n--------\nlars_path_gram\nlasso_path\nlasso_path_gram\nLassoLars\nLars\nLassoLarsCV\nLarsCV\nsklearn.decomposition.sparse_encode\n\nReferences\n----------\n.. [1] \"Least Angle Regression\", Efron et al.\n http://statweb.stanford.edu/~tibs/ftp/lars.pdf\n\n.. [2] `Wikipedia entry on the Least-angle regression\n `_\n\n.. [3] `Wikipedia entry on the Lasso\n `_", + "code": "@_deprecate_positional_args\ndef lars_path(\n X,\n y,\n Xy=None,\n *,\n Gram=None,\n max_iter=500,\n alpha_min=0,\n method=\"lar\",\n copy_X=True,\n eps=np.finfo(float).eps,\n copy_Gram=True,\n verbose=0,\n return_path=True,\n return_n_iter=False,\n positive=False\n):\n \"\"\"Compute Least Angle Regression or Lasso path using LARS algorithm [1]\n\n The optimization objective for the case method='lasso' is::\n\n (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\n in the case of method='lars', the objective function is only known in\n the form of an implicit equation (see discussion in [1])\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : None or array-like of shape (n_samples, n_features)\n Input data. Note that if X is None then the Gram matrix must be\n specified, i.e., cannot be None or False.\n\n y : None or array-like of shape (n_samples,)\n Input targets.\n\n Xy : array-like of shape (n_samples,) or (n_samples, n_targets), \\\n default=None\n Xy = np.dot(X.T, y) that can be precomputed. It is useful\n only when the Gram matrix is precomputed.\n\n Gram : None, 'auto', array-like of shape (n_features, n_features), \\\n default=None\n Precomputed Gram matrix (X' * X), if ``'auto'``, the Gram\n matrix is precomputed from the given X, if there are more samples\n than features.\n\n max_iter : int, default=500\n Maximum number of iterations to perform, set to infinity for no limit.\n\n alpha_min : float, default=0\n Minimum correlation along the path. It corresponds to the\n regularization parameter alpha parameter in the Lasso.\n\n method : {'lar', 'lasso'}, default='lar'\n Specifies the returned model. Select ``'lar'`` for Least Angle\n Regression, ``'lasso'`` for the Lasso.\n\n copy_X : bool, default=True\n If ``False``, ``X`` is overwritten.\n\n eps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\n copy_Gram : bool, default=True\n If ``False``, ``Gram`` is overwritten.\n\n verbose : int, default=0\n Controls output verbosity.\n\n return_path : bool, default=True\n If ``return_path==True`` returns the entire path, else returns only the\n last point of the path.\n\n return_n_iter : bool, default=False\n Whether to return the number of iterations.\n\n positive : bool, default=False\n Restrict coefficients to be >= 0.\n This option is only allowed with method 'lasso'. Note that the model\n coefficients will not converge to the ordinary-least-squares solution\n for small values of alpha. Only coefficients up to the smallest alpha\n value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by\n the stepwise Lars-Lasso algorithm are typically in congruence with the\n solution of the coordinate descent lasso_path function.\n\n Returns\n -------\n alphas : array-like of shape (n_alphas + 1,)\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n number of nodes in the path with ``alpha >= alpha_min``, whichever\n is smaller.\n\n active : array-like of shape (n_alphas,)\n Indices of active variables at the end of the path.\n\n coefs : array-like of shape (n_features, n_alphas + 1)\n Coefficients along the path\n\n n_iter : int\n Number of iterations run. Returned only if return_n_iter is set\n to True.\n\n See Also\n --------\n lars_path_gram\n lasso_path\n lasso_path_gram\n LassoLars\n Lars\n LassoLarsCV\n LarsCV\n sklearn.decomposition.sparse_encode\n\n References\n ----------\n .. [1] \"Least Angle Regression\", Efron et al.\n http://statweb.stanford.edu/~tibs/ftp/lars.pdf\n\n .. [2] `Wikipedia entry on the Least-angle regression\n `_\n\n .. [3] `Wikipedia entry on the Lasso\n `_\n\n \"\"\"\n if X is None and Gram is not None:\n raise ValueError(\n 'X cannot be None if Gram is not None'\n 'Use lars_path_gram to avoid passing X and y.'\n )\n return _lars_path_solver(\n X=X, y=y, Xy=Xy, Gram=Gram, n_samples=None, max_iter=max_iter,\n alpha_min=alpha_min, method=method, copy_X=copy_X,\n eps=eps, copy_Gram=copy_Gram, verbose=verbose, return_path=return_path,\n return_n_iter=return_n_iter, positive=positive)" + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path_gram", + "name": "lars_path_gram", + "qname": "sklearn.linear_model._least_angle.lars_path_gram", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path_gram/Xy", + "name": "Xy", + "qname": "sklearn.linear_model._least_angle.lars_path_gram.Xy", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "", + "description": "Xy = np.dot(X.T, y)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_targets)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path_gram/Gram", + "name": "Gram", + "qname": "sklearn.linear_model._least_angle.lars_path_gram.Gram", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_features, n_features)", + "default_value": "", + "description": "Gram = np.dot(X.T * X)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_features, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path_gram/n_samples", + "name": "n_samples", + "qname": "sklearn.linear_model._least_angle.lars_path_gram.n_samples", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "", + "description": "Equivalent size of sample." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path_gram/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._least_angle.lars_path_gram.max_iter", + "default_value": "500", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "500", + "description": "Maximum number of iterations to perform, set to infinity for no limit." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path_gram/alpha_min", + "name": "alpha_min", + "qname": "sklearn.linear_model._least_angle.lars_path_gram.alpha_min", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0", + "description": "Minimum correlation along the path. It corresponds to the\nregularization parameter alpha parameter in the Lasso." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path_gram/method", + "name": "method", + "qname": "sklearn.linear_model._least_angle.lars_path_gram.method", + "default_value": "'lar'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'lar', 'lasso'}", + "default_value": "'lar'", + "description": "Specifies the returned model. Select ``'lar'`` for Least Angle\nRegression, ``'lasso'`` for the Lasso." + }, + "type": { + "kind": "EnumType", + "values": ["lasso", "lar"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path_gram/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._least_angle.lars_path_gram.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``False``, ``X`` is overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path_gram/eps", + "name": "eps", + "qname": "sklearn.linear_model._least_angle.lars_path_gram.eps", + "default_value": "np.finfo(float).eps", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "np.finfo(float).eps", + "description": "The machine-precision regularization in the computation of the\nCholesky diagonal factors. Increase this for very ill-conditioned\nsystems. Unlike the ``tol`` parameter in some iterative\noptimization-based algorithms, this parameter does not control\nthe tolerance of the optimization." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path_gram/copy_Gram", + "name": "copy_Gram", + "qname": "sklearn.linear_model._least_angle.lars_path_gram.copy_Gram", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``False``, ``Gram`` is overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path_gram/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._least_angle.lars_path_gram.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Controls output verbosity." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path_gram/return_path", + "name": "return_path", + "qname": "sklearn.linear_model._least_angle.lars_path_gram.return_path", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``return_path==True`` returns the entire path, else returns only the\nlast point of the path." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path_gram/return_n_iter", + "name": "return_n_iter", + "qname": "sklearn.linear_model._least_angle.lars_path_gram.return_n_iter", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to return the number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._least_angle/lars_path_gram/positive", + "name": "positive", + "qname": "sklearn.linear_model._least_angle.lars_path_gram.positive", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Restrict coefficients to be >= 0.\nThis option is only allowed with method 'lasso'. Note that the model\ncoefficients will not converge to the ordinary-least-squares solution\nfor small values of alpha. Only coefficients up to the smallest alpha\nvalue (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by\nthe stepwise Lars-Lasso algorithm are typically in congruence with the\nsolution of the coordinate descent lasso_path function." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "lars_path in the sufficient stats mode [1]\n\nThe optimization objective for the case method='lasso' is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nin the case of method='lars', the objective function is only known in\nthe form of an implicit equation (see discussion in [1])\n\nRead more in the :ref:`User Guide `.", + "docstring": "lars_path in the sufficient stats mode [1]\n\nThe optimization objective for the case method='lasso' is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nin the case of method='lars', the objective function is only known in\nthe form of an implicit equation (see discussion in [1])\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nXy : array-like of shape (n_samples,) or (n_samples, n_targets)\n Xy = np.dot(X.T, y).\n\nGram : array-like of shape (n_features, n_features)\n Gram = np.dot(X.T * X).\n\nn_samples : int or float\n Equivalent size of sample.\n\nmax_iter : int, default=500\n Maximum number of iterations to perform, set to infinity for no limit.\n\nalpha_min : float, default=0\n Minimum correlation along the path. It corresponds to the\n regularization parameter alpha parameter in the Lasso.\n\nmethod : {'lar', 'lasso'}, default='lar'\n Specifies the returned model. Select ``'lar'`` for Least Angle\n Regression, ``'lasso'`` for the Lasso.\n\ncopy_X : bool, default=True\n If ``False``, ``X`` is overwritten.\n\neps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\ncopy_Gram : bool, default=True\n If ``False``, ``Gram`` is overwritten.\n\nverbose : int, default=0\n Controls output verbosity.\n\nreturn_path : bool, default=True\n If ``return_path==True`` returns the entire path, else returns only the\n last point of the path.\n\nreturn_n_iter : bool, default=False\n Whether to return the number of iterations.\n\npositive : bool, default=False\n Restrict coefficients to be >= 0.\n This option is only allowed with method 'lasso'. Note that the model\n coefficients will not converge to the ordinary-least-squares solution\n for small values of alpha. Only coefficients up to the smallest alpha\n value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by\n the stepwise Lars-Lasso algorithm are typically in congruence with the\n solution of the coordinate descent lasso_path function.\n\nReturns\n-------\nalphas : array-like of shape (n_alphas + 1,)\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n number of nodes in the path with ``alpha >= alpha_min``, whichever\n is smaller.\n\nactive : array-like of shape (n_alphas,)\n Indices of active variables at the end of the path.\n\ncoefs : array-like of shape (n_features, n_alphas + 1)\n Coefficients along the path\n\nn_iter : int\n Number of iterations run. Returned only if return_n_iter is set\n to True.\n\nSee Also\n--------\nlars_path\nlasso_path\nlasso_path_gram\nLassoLars\nLars\nLassoLarsCV\nLarsCV\nsklearn.decomposition.sparse_encode\n\nReferences\n----------\n.. [1] \"Least Angle Regression\", Efron et al.\n http://statweb.stanford.edu/~tibs/ftp/lars.pdf\n\n.. [2] `Wikipedia entry on the Least-angle regression\n `_\n\n.. [3] `Wikipedia entry on the Lasso\n `_", + "code": "@_deprecate_positional_args\ndef lars_path_gram(\n Xy,\n Gram,\n *,\n n_samples,\n max_iter=500,\n alpha_min=0,\n method=\"lar\",\n copy_X=True,\n eps=np.finfo(float).eps,\n copy_Gram=True,\n verbose=0,\n return_path=True,\n return_n_iter=False,\n positive=False\n):\n \"\"\"lars_path in the sufficient stats mode [1]\n\n The optimization objective for the case method='lasso' is::\n\n (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\n in the case of method='lars', the objective function is only known in\n the form of an implicit equation (see discussion in [1])\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n Xy : array-like of shape (n_samples,) or (n_samples, n_targets)\n Xy = np.dot(X.T, y).\n\n Gram : array-like of shape (n_features, n_features)\n Gram = np.dot(X.T * X).\n\n n_samples : int or float\n Equivalent size of sample.\n\n max_iter : int, default=500\n Maximum number of iterations to perform, set to infinity for no limit.\n\n alpha_min : float, default=0\n Minimum correlation along the path. It corresponds to the\n regularization parameter alpha parameter in the Lasso.\n\n method : {'lar', 'lasso'}, default='lar'\n Specifies the returned model. Select ``'lar'`` for Least Angle\n Regression, ``'lasso'`` for the Lasso.\n\n copy_X : bool, default=True\n If ``False``, ``X`` is overwritten.\n\n eps : float, default=np.finfo(float).eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Unlike the ``tol`` parameter in some iterative\n optimization-based algorithms, this parameter does not control\n the tolerance of the optimization.\n\n copy_Gram : bool, default=True\n If ``False``, ``Gram`` is overwritten.\n\n verbose : int, default=0\n Controls output verbosity.\n\n return_path : bool, default=True\n If ``return_path==True`` returns the entire path, else returns only the\n last point of the path.\n\n return_n_iter : bool, default=False\n Whether to return the number of iterations.\n\n positive : bool, default=False\n Restrict coefficients to be >= 0.\n This option is only allowed with method 'lasso'. Note that the model\n coefficients will not converge to the ordinary-least-squares solution\n for small values of alpha. Only coefficients up to the smallest alpha\n value (``alphas_[alphas_ > 0.].min()`` when fit_path=True) reached by\n the stepwise Lars-Lasso algorithm are typically in congruence with the\n solution of the coordinate descent lasso_path function.\n\n Returns\n -------\n alphas : array-like of shape (n_alphas + 1,)\n Maximum of covariances (in absolute value) at each iteration.\n ``n_alphas`` is either ``max_iter``, ``n_features`` or the\n number of nodes in the path with ``alpha >= alpha_min``, whichever\n is smaller.\n\n active : array-like of shape (n_alphas,)\n Indices of active variables at the end of the path.\n\n coefs : array-like of shape (n_features, n_alphas + 1)\n Coefficients along the path\n\n n_iter : int\n Number of iterations run. Returned only if return_n_iter is set\n to True.\n\n See Also\n --------\n lars_path\n lasso_path\n lasso_path_gram\n LassoLars\n Lars\n LassoLarsCV\n LarsCV\n sklearn.decomposition.sparse_encode\n\n References\n ----------\n .. [1] \"Least Angle Regression\", Efron et al.\n http://statweb.stanford.edu/~tibs/ftp/lars.pdf\n\n .. [2] `Wikipedia entry on the Least-angle regression\n `_\n\n .. [3] `Wikipedia entry on the Lasso\n `_\n\n \"\"\"\n return _lars_path_solver(\n X=None, y=None, Xy=Xy, Gram=Gram, n_samples=n_samples,\n max_iter=max_iter, alpha_min=alpha_min, method=method,\n copy_X=copy_X, eps=eps, copy_Gram=copy_Gram,\n verbose=verbose, return_path=return_path,\n return_n_iter=return_n_iter, positive=positive)" + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._logistic.LogisticRegression.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._logistic.LogisticRegression.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/__init__/penalty", + "name": "penalty", + "qname": "sklearn.linear_model._logistic.LogisticRegression.__init__.penalty", + "default_value": "'l2'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'l1', 'l2', 'elasticnet', 'none'}", + "default_value": "'l2'", + "description": "Used to specify the norm used in the penalization. The 'newton-cg',\n'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\nonly supported by the 'saga' solver. If 'none' (not supported by the\nliblinear solver), no regularization is applied.\n\n.. versionadded:: 0.19\n l1 penalty with SAGA solver (allowing 'multinomial' + L1)" + }, + "type": { + "kind": "EnumType", + "values": ["l2", "l1", "elasticnet", "none"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/__init__/dual", + "name": "dual", + "qname": "sklearn.linear_model._logistic.LogisticRegression.__init__.dual", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Dual or primal formulation. Dual formulation is only implemented for\nl2 penalty with liblinear solver. Prefer dual=False when\nn_samples > n_features." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._logistic.LogisticRegression.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Tolerance for stopping criteria." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/__init__/C", + "name": "C", + "qname": "sklearn.linear_model._logistic.LogisticRegression.__init__.C", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Inverse of regularization strength; must be a positive float.\nLike in support vector machines, smaller values specify stronger\nregularization." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._logistic.LogisticRegression.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Specifies if a constant (a.k.a. bias or intercept) should be\nadded to the decision function." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/__init__/intercept_scaling", + "name": "intercept_scaling", + "qname": "sklearn.linear_model._logistic.LogisticRegression.__init__.intercept_scaling", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1", + "description": "Useful only when the solver 'liblinear' is used\nand self.fit_intercept is set to True. In this case, x becomes\n[x, self.intercept_scaling],\ni.e. a \"synthetic\" feature with constant value equal to\nintercept_scaling is appended to the instance vector.\nThe intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\nNote! the synthetic feature weight is subject to l1/l2 regularization\nas all other features.\nTo lessen the effect of regularization on synthetic feature weight\n(and therefore on the intercept) intercept_scaling has to be increased." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/__init__/class_weight", + "name": "class_weight", + "qname": "sklearn.linear_model._logistic.LogisticRegression.__init__.class_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict or 'balanced'", + "default_value": "None", + "description": "Weights associated with classes in the form ``{class_label: weight}``.\nIf not given, all classes are supposed to have weight one.\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``.\n\nNote that these weights will be multiplied with sample_weight (passed\nthrough the fit method) if sample_weight is specified.\n\n.. versionadded:: 0.17\n *class_weight='balanced'*" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "dict" + }, + { + "kind": "NamedType", + "name": "'balanced'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._logistic.LogisticRegression.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\ndata. See :term:`Glossary ` for details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/__init__/solver", + "name": "solver", + "qname": "sklearn.linear_model._logistic.LogisticRegression.__init__.solver", + "default_value": "'lbfgs'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}", + "default_value": "'lbfgs'", + "description": "Algorithm to use in the optimization problem.\n\n- For small datasets, 'liblinear' is a good choice, whereas 'sag' and\n 'saga' are faster for large ones.\n- For multiclass problems, only 'newton-cg', 'sag', 'saga' and 'lbfgs'\n handle multinomial loss; 'liblinear' is limited to one-versus-rest\n schemes.\n- 'newton-cg', 'lbfgs', 'sag' and 'saga' handle L2 or no penalty\n- 'liblinear' and 'saga' also handle L1 penalty\n- 'saga' also supports 'elasticnet' penalty\n- 'liblinear' does not support setting ``penalty='none'``\n\nNote that 'sag' and 'saga' fast convergence is only guaranteed on\nfeatures with approximately the same scale. You can\npreprocess the data with a scaler from sklearn.preprocessing.\n\n.. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n.. versionadded:: 0.19\n SAGA solver.\n.. versionchanged:: 0.22\n The default solver changed from 'liblinear' to 'lbfgs' in 0.22." + }, + "type": { + "kind": "EnumType", + "values": ["newton-cg", "lbfgs", "saga", "sag", "liblinear"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._logistic.LogisticRegression.__init__.max_iter", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Maximum number of iterations taken for the solvers to converge." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/__init__/multi_class", + "name": "multi_class", + "qname": "sklearn.linear_model._logistic.LogisticRegression.__init__.multi_class", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'ovr', 'multinomial'}", + "default_value": "'auto'", + "description": "If the option chosen is 'ovr', then a binary problem is fit for each\nlabel. For 'multinomial' the loss minimised is the multinomial loss fit\nacross the entire probability distribution, *even when the data is\nbinary*. 'multinomial' is unavailable when solver='liblinear'.\n'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\nand otherwise selects 'multinomial'.\n\n.. versionadded:: 0.18\n Stochastic Average Gradient descent solver for 'multinomial' case.\n.. versionchanged:: 0.22\n Default changed from 'ovr' to 'auto' in 0.22." + }, + "type": { + "kind": "EnumType", + "values": ["multinomial", "auto", "ovr"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._logistic.LogisticRegression.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "For the liblinear and lbfgs solvers set verbose to any positive\nnumber for verbosity." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.linear_model._logistic.LogisticRegression.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to True, reuse the solution of the previous call to fit as\ninitialization, otherwise, just erase the previous solution.\nUseless for liblinear solver. See :term:`the Glossary `.\n\n.. versionadded:: 0.17\n *warm_start* to support *lbfgs*, *newton-cg*, *sag*, *saga* solvers." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.linear_model._logistic.LogisticRegression.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of CPU cores used when parallelizing over classes if\nmulti_class='ovr'\". This parameter is ignored when the ``solver`` is\nset to 'liblinear' regardless of whether 'multi_class' is specified or\nnot. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\ncontext. ``-1`` means using all processors.\nSee :term:`Glossary ` for more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/__init__/l1_ratio", + "name": "l1_ratio", + "qname": "sklearn.linear_model._logistic.LogisticRegression.__init__.l1_ratio", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\nused if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\nto using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\nto using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\ncombination of L1 and L2." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Logistic Regression (aka logit, MaxEnt) classifier.\n\nIn the multiclass case, the training algorithm uses the one-vs-rest (OvR)\nscheme if the 'multi_class' option is set to 'ovr', and uses the\ncross-entropy loss if the 'multi_class' option is set to 'multinomial'.\n(Currently the 'multinomial' option is supported only by the 'lbfgs',\n'sag', 'saga' and 'newton-cg' solvers.)\n\nThis class implements regularized logistic regression using the\n'liblinear' library, 'newton-cg', 'sag', 'saga' and 'lbfgs' solvers. **Note\nthat regularization is applied by default**. It can handle both dense\nand sparse input. Use C-ordered arrays or CSR matrices containing 64-bit\nfloats for optimal performance; any other input format will be converted\n(and copied).\n\nThe 'newton-cg', 'sag', and 'lbfgs' solvers support only L2 regularization\nwith primal formulation, or no regularization. The 'liblinear' solver\nsupports both L1 and L2 regularization, with a dual formulation only for\nthe L2 penalty. The Elastic-Net regularization is only supported by the\n'saga' solver.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, penalty='l2', *, dual=False, tol=1e-4, C=1.0,\n fit_intercept=True, intercept_scaling=1, class_weight=None,\n random_state=None, solver='lbfgs', max_iter=100,\n multi_class='auto', verbose=0, warm_start=False, n_jobs=None,\n l1_ratio=None):\n\n self.penalty = penalty\n self.dual = dual\n self.tol = tol\n self.C = C\n self.fit_intercept = fit_intercept\n self.intercept_scaling = intercept_scaling\n self.class_weight = class_weight\n self.random_state = random_state\n self.solver = solver\n self.max_iter = max_iter\n self.multi_class = multi_class\n self.verbose = verbose\n self.warm_start = warm_start\n self.n_jobs = n_jobs\n self.l1_ratio = l1_ratio" + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/fit", + "name": "fit", + "qname": "sklearn.linear_model._logistic.LogisticRegression.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/fit/self", + "name": "self", + "qname": "sklearn.linear_model._logistic.LogisticRegression.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/fit/X", + "name": "X", + "qname": "sklearn.linear_model._logistic.LogisticRegression.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/fit/y", + "name": "y", + "qname": "sklearn.linear_model._logistic.LogisticRegression.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target vector relative to X." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._logistic.LogisticRegression.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) default=None", + "default_value": "", + "description": "Array of weights that are assigned to individual samples.\nIf not provided, then each sample is given unit weight.\n\n.. versionadded:: 0.17\n *sample_weight* support to LogisticRegression." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model according to the given training data.", + "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target vector relative to X.\n\nsample_weight : array-like of shape (n_samples,) default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\n .. versionadded:: 0.17\n *sample_weight* support to LogisticRegression.\n\nReturns\n-------\nself\n Fitted estimator.\n\nNotes\n-----\nThe SAGA solver supports both float64 and float32 bit arrays.", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"\n Fit the model according to the given training data.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target vector relative to X.\n\n sample_weight : array-like of shape (n_samples,) default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\n .. versionadded:: 0.17\n *sample_weight* support to LogisticRegression.\n\n Returns\n -------\n self\n Fitted estimator.\n\n Notes\n -----\n The SAGA solver supports both float64 and float32 bit arrays.\n \"\"\"\n solver = _check_solver(self.solver, self.penalty, self.dual)\n\n if not isinstance(self.C, numbers.Number) or self.C < 0:\n raise ValueError(\"Penalty term must be positive; got (C=%r)\"\n % self.C)\n if self.penalty == 'elasticnet':\n if (not isinstance(self.l1_ratio, numbers.Number) or\n self.l1_ratio < 0 or self.l1_ratio > 1):\n raise ValueError(\"l1_ratio must be between 0 and 1;\"\n \" got (l1_ratio=%r)\" % self.l1_ratio)\n elif self.l1_ratio is not None:\n warnings.warn(\"l1_ratio parameter is only used when penalty is \"\n \"'elasticnet'. Got \"\n \"(penalty={})\".format(self.penalty))\n if self.penalty == 'none':\n if self.C != 1.0: # default values\n warnings.warn(\n \"Setting penalty='none' will ignore the C and l1_ratio \"\n \"parameters\"\n )\n # Note that check for l1_ratio is done right above\n C_ = np.inf\n penalty = 'l2'\n else:\n C_ = self.C\n penalty = self.penalty\n if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0:\n raise ValueError(\"Maximum number of iteration must be positive;\"\n \" got (max_iter=%r)\" % self.max_iter)\n if not isinstance(self.tol, numbers.Number) or self.tol < 0:\n raise ValueError(\"Tolerance for stopping criteria must be \"\n \"positive; got (tol=%r)\" % self.tol)\n\n if solver == 'lbfgs':\n _dtype = np.float64\n else:\n _dtype = [np.float64, np.float32]\n\n X, y = self._validate_data(X, y, accept_sparse='csr', dtype=_dtype,\n order=\"C\",\n accept_large_sparse=solver != 'liblinear')\n check_classification_targets(y)\n self.classes_ = np.unique(y)\n\n multi_class = _check_multi_class(self.multi_class, solver,\n len(self.classes_))\n\n if solver == 'liblinear':\n if effective_n_jobs(self.n_jobs) != 1:\n warnings.warn(\"'n_jobs' > 1 does not have any effect when\"\n \" 'solver' is set to 'liblinear'. Got 'n_jobs'\"\n \" = {}.\".format(effective_n_jobs(self.n_jobs)))\n self.coef_, self.intercept_, n_iter_ = _fit_liblinear(\n X, y, self.C, self.fit_intercept, self.intercept_scaling,\n self.class_weight, self.penalty, self.dual, self.verbose,\n self.max_iter, self.tol, self.random_state,\n sample_weight=sample_weight)\n self.n_iter_ = np.array([n_iter_])\n return self\n\n if solver in ['sag', 'saga']:\n max_squared_sum = row_norms(X, squared=True).max()\n else:\n max_squared_sum = None\n\n n_classes = len(self.classes_)\n classes_ = self.classes_\n if n_classes < 2:\n raise ValueError(\"This solver needs samples of at least 2 classes\"\n \" in the data, but the data contains only one\"\n \" class: %r\" % classes_[0])\n\n if len(self.classes_) == 2:\n n_classes = 1\n classes_ = classes_[1:]\n\n if self.warm_start:\n warm_start_coef = getattr(self, 'coef_', None)\n else:\n warm_start_coef = None\n if warm_start_coef is not None and self.fit_intercept:\n warm_start_coef = np.append(warm_start_coef,\n self.intercept_[:, np.newaxis],\n axis=1)\n\n # Hack so that we iterate only once for the multinomial case.\n if multi_class == 'multinomial':\n classes_ = [None]\n warm_start_coef = [warm_start_coef]\n if warm_start_coef is None:\n warm_start_coef = [None] * n_classes\n\n path_func = delayed(_logistic_regression_path)\n\n # The SAG solver releases the GIL so it's more efficient to use\n # threads for this solver.\n if solver in ['sag', 'saga']:\n prefer = 'threads'\n else:\n prefer = 'processes'\n fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,\n **_joblib_parallel_args(prefer=prefer))(\n path_func(X, y, pos_class=class_, Cs=[C_],\n l1_ratio=self.l1_ratio, fit_intercept=self.fit_intercept,\n tol=self.tol, verbose=self.verbose, solver=solver,\n multi_class=multi_class, max_iter=self.max_iter,\n class_weight=self.class_weight, check_input=False,\n random_state=self.random_state, coef=warm_start_coef_,\n penalty=penalty, max_squared_sum=max_squared_sum,\n sample_weight=sample_weight)\n for class_, warm_start_coef_ in zip(classes_, warm_start_coef))\n\n fold_coefs_, _, n_iter_ = zip(*fold_coefs_)\n self.n_iter_ = np.asarray(n_iter_, dtype=np.int32)[:, 0]\n\n n_features = X.shape[1]\n if multi_class == 'multinomial':\n self.coef_ = fold_coefs_[0][0]\n else:\n self.coef_ = np.asarray(fold_coefs_)\n self.coef_ = self.coef_.reshape(n_classes, n_features +\n int(self.fit_intercept))\n\n if self.fit_intercept:\n self.intercept_ = self.coef_[:, -1]\n self.coef_ = self.coef_[:, :-1]\n else:\n self.intercept_ = np.zeros(n_classes)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/predict_log_proba", + "name": "predict_log_proba", + "qname": "sklearn.linear_model._logistic.LogisticRegression.predict_log_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/predict_log_proba/self", + "name": "self", + "qname": "sklearn.linear_model._logistic.LogisticRegression.predict_log_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/predict_log_proba/X", + "name": "X", + "qname": "sklearn.linear_model._logistic.LogisticRegression.predict_log_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Vector to be scored, where `n_samples` is the number of samples and\n`n_features` is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict logarithm of probability estimates.\n\nThe returned estimates for all classes are ordered by the\nlabel of classes.", + "docstring": "Predict logarithm of probability estimates.\n\nThe returned estimates for all classes are ordered by the\nlabel of classes.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Vector to be scored, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\nReturns\n-------\nT : array-like of shape (n_samples, n_classes)\n Returns the log-probability of the sample for each class in the\n model, where classes are ordered as they are in ``self.classes_``.", + "code": " def predict_log_proba(self, X):\n \"\"\"\n Predict logarithm of probability estimates.\n\n The returned estimates for all classes are ordered by the\n label of classes.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Vector to be scored, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\n Returns\n -------\n T : array-like of shape (n_samples, n_classes)\n Returns the log-probability of the sample for each class in the\n model, where classes are ordered as they are in ``self.classes_``.\n \"\"\"\n return np.log(self.predict_proba(X))" + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/predict_proba", + "name": "predict_proba", + "qname": "sklearn.linear_model._logistic.LogisticRegression.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/predict_proba/self", + "name": "self", + "qname": "sklearn.linear_model._logistic.LogisticRegression.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegression/predict_proba/X", + "name": "X", + "qname": "sklearn.linear_model._logistic.LogisticRegression.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Vector to be scored, where `n_samples` is the number of samples and\n`n_features` is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Probability estimates.\n\nThe returned estimates for all classes are ordered by the\nlabel of classes.\n\nFor a multi_class problem, if multi_class is set to be \"multinomial\"\nthe softmax function is used to find the predicted probability of\neach class.\nElse use a one-vs-rest approach, i.e calculate the probability\nof each class assuming it to be positive using the logistic function.\nand normalize these values across all the classes.", + "docstring": "Probability estimates.\n\nThe returned estimates for all classes are ordered by the\nlabel of classes.\n\nFor a multi_class problem, if multi_class is set to be \"multinomial\"\nthe softmax function is used to find the predicted probability of\neach class.\nElse use a one-vs-rest approach, i.e calculate the probability\nof each class assuming it to be positive using the logistic function.\nand normalize these values across all the classes.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Vector to be scored, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\nReturns\n-------\nT : array-like of shape (n_samples, n_classes)\n Returns the probability of the sample for each class in the model,\n where classes are ordered as they are in ``self.classes_``.", + "code": " def predict_proba(self, X):\n \"\"\"\n Probability estimates.\n\n The returned estimates for all classes are ordered by the\n label of classes.\n\n For a multi_class problem, if multi_class is set to be \"multinomial\"\n the softmax function is used to find the predicted probability of\n each class.\n Else use a one-vs-rest approach, i.e calculate the probability\n of each class assuming it to be positive using the logistic function.\n and normalize these values across all the classes.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Vector to be scored, where `n_samples` is the number of samples and\n `n_features` is the number of features.\n\n Returns\n -------\n T : array-like of shape (n_samples, n_classes)\n Returns the probability of the sample for each class in the model,\n where classes are ordered as they are in ``self.classes_``.\n \"\"\"\n check_is_fitted(self)\n\n ovr = (self.multi_class in [\"ovr\", \"warn\"] or\n (self.multi_class == 'auto' and (self.classes_.size <= 2 or\n self.solver == 'liblinear')))\n if ovr:\n return super()._predict_proba_lr(X)\n else:\n decision = self.decision_function(X)\n if decision.ndim == 1:\n # Workaround for multi_class=\"multinomial\" and binary outcomes\n # which requires softmax prediction with only a 1D decision.\n decision_2d = np.c_[-decision, decision]\n else:\n decision_2d = decision\n return softmax(decision_2d, copy=False)" + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/__init__/Cs", + "name": "Cs", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.__init__.Cs", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or list of floats", + "default_value": "10", + "description": "Each of the values in Cs describes the inverse of regularization\nstrength. If Cs is as an int, then a grid of Cs values are chosen\nin a logarithmic scale between 1e-4 and 1e4.\nLike in support vector machines, smaller values specify stronger\nregularization." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "list of floats" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Specifies if a constant (a.k.a. bias or intercept) should be\nadded to the decision function." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/__init__/cv", + "name": "cv", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.__init__.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or cross-validation generator", + "default_value": "None", + "description": "The default cross-validation generator used is Stratified K-Folds.\nIf an integer is provided, then it is the number of folds used.\nSee the module :mod:`sklearn.model_selection` module for the\nlist of possible cross-validation objects.\n\n.. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/__init__/dual", + "name": "dual", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.__init__.dual", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Dual or primal formulation. Dual formulation is only implemented for\nl2 penalty with liblinear solver. Prefer dual=False when\nn_samples > n_features." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/__init__/penalty", + "name": "penalty", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.__init__.penalty", + "default_value": "'l2'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'l1', 'l2', 'elasticnet'}", + "default_value": "'l2'", + "description": "Used to specify the norm used in the penalization. The 'newton-cg',\n'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\nonly supported by the 'saga' solver." + }, + "type": { + "kind": "EnumType", + "values": ["l2", "l1", "elasticnet"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/__init__/scoring", + "name": "scoring", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.__init__.scoring", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "None", + "description": "A string (see model evaluation documentation) or\na scorer callable object / function with signature\n``scorer(estimator, X, y)``. For a list of scoring functions\nthat can be used, look at :mod:`sklearn.metrics`. The\ndefault scoring option used is 'accuracy'." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/__init__/solver", + "name": "solver", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.__init__.solver", + "default_value": "'lbfgs'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}", + "default_value": "'lbfgs'", + "description": "Algorithm to use in the optimization problem.\n\n- For small datasets, 'liblinear' is a good choice, whereas 'sag' and\n 'saga' are faster for large ones.\n- For multiclass problems, only 'newton-cg', 'sag', 'saga' and 'lbfgs'\n handle multinomial loss; 'liblinear' is limited to one-versus-rest\n schemes.\n- 'newton-cg', 'lbfgs' and 'sag' only handle L2 penalty, whereas\n 'liblinear' and 'saga' handle L1 penalty.\n- 'liblinear' might be slower in LogisticRegressionCV because it does\n not handle warm-starting.\n\nNote that 'sag' and 'saga' fast convergence is only guaranteed on\nfeatures with approximately the same scale. You can preprocess the data\nwith a scaler from sklearn.preprocessing.\n\n.. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n.. versionadded:: 0.19\n SAGA solver." + }, + "type": { + "kind": "EnumType", + "values": ["newton-cg", "lbfgs", "saga", "sag", "liblinear"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Tolerance for stopping criteria." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.__init__.max_iter", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Maximum number of iterations of the optimization algorithm." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/__init__/class_weight", + "name": "class_weight", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.__init__.class_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict or 'balanced'", + "default_value": "None", + "description": "Weights associated with classes in the form ``{class_label: weight}``.\nIf not given, all classes are supposed to have weight one.\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``.\n\nNote that these weights will be multiplied with sample_weight (passed\nthrough the fit method) if sample_weight is specified.\n\n.. versionadded:: 0.17\n class_weight == 'balanced'" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "dict" + }, + { + "kind": "NamedType", + "name": "'balanced'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of CPU cores used during the cross-validation loop.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "For the 'liblinear', 'sag' and 'lbfgs' solvers set verbose to any\npositive number for verbosity." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/__init__/refit", + "name": "refit", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.__init__.refit", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If set to True, the scores are averaged across all folds, and the\ncoefs and the C that corresponds to the best score is taken, and a\nfinal refit is done using these parameters.\nOtherwise the coefs, intercepts and C that correspond to the\nbest scores across folds are averaged." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/__init__/intercept_scaling", + "name": "intercept_scaling", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.__init__.intercept_scaling", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1", + "description": "Useful only when the solver 'liblinear' is used\nand self.fit_intercept is set to True. In this case, x becomes\n[x, self.intercept_scaling],\ni.e. a \"synthetic\" feature with constant value equal to\nintercept_scaling is appended to the instance vector.\nThe intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\nNote! the synthetic feature weight is subject to l1/l2 regularization\nas all other features.\nTo lessen the effect of regularization on synthetic feature weight\n(and therefore on the intercept) intercept_scaling has to be increased." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/__init__/multi_class", + "name": "multi_class", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.__init__.multi_class", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto, 'ovr', 'multinomial'}", + "default_value": "'auto'", + "description": "If the option chosen is 'ovr', then a binary problem is fit for each\nlabel. For 'multinomial' the loss minimised is the multinomial loss fit\nacross the entire probability distribution, *even when the data is\nbinary*. 'multinomial' is unavailable when solver='liblinear'.\n'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\nand otherwise selects 'multinomial'.\n\n.. versionadded:: 0.18\n Stochastic Average Gradient descent solver for 'multinomial' case.\n.. versionchanged:: 0.22\n Default changed from 'ovr' to 'auto' in 0.22." + }, + "type": { + "kind": "EnumType", + "values": ["auto, ", ", "] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "Used when `solver='sag'`, 'saga' or 'liblinear' to shuffle the data.\nNote that this only applies to the solver and not the cross-validation\ngenerator. See :term:`Glossary ` for details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/__init__/l1_ratios", + "name": "l1_ratios", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.__init__.l1_ratios", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "list of float", + "default_value": "None", + "description": "The list of Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``.\nOnly used if ``penalty='elasticnet'``. A value of 0 is equivalent to\nusing ``penalty='l2'``, while 1 is equivalent to using\n``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a combination\nof L1 and L2." + }, + "type": { + "kind": "NamedType", + "name": "list of float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Logistic Regression CV (aka logit, MaxEnt) classifier.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThis class implements logistic regression using liblinear, newton-cg, sag\nof lbfgs optimizer. The newton-cg, sag and lbfgs solvers support only L2\nregularization with primal formulation. The liblinear solver supports both\nL1 and L2 regularization, with a dual formulation only for the L2 penalty.\nElastic-Net penalty is only supported by the saga solver.\n\nFor the grid of `Cs` values and `l1_ratios` values, the best hyperparameter\nis selected by the cross-validator\n:class:`~sklearn.model_selection.StratifiedKFold`, but it can be changed\nusing the :term:`cv` parameter. The 'newton-cg', 'sag', 'saga' and 'lbfgs'\nsolvers can warm-start the coefficients (see :term:`Glossary`).\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, Cs=10, fit_intercept=True, cv=None, dual=False,\n penalty='l2', scoring=None, solver='lbfgs', tol=1e-4,\n max_iter=100, class_weight=None, n_jobs=None, verbose=0,\n refit=True, intercept_scaling=1., multi_class='auto',\n random_state=None, l1_ratios=None):\n self.Cs = Cs\n self.fit_intercept = fit_intercept\n self.cv = cv\n self.dual = dual\n self.penalty = penalty\n self.scoring = scoring\n self.tol = tol\n self.max_iter = max_iter\n self.class_weight = class_weight\n self.n_jobs = n_jobs\n self.verbose = verbose\n self.solver = solver\n self.refit = refit\n self.intercept_scaling = intercept_scaling\n self.multi_class = multi_class\n self.random_state = random_state\n self.l1_ratios = l1_ratios" + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/_more_tags", + "name": "_more_tags", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/_more_tags/self", + "name": "self", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }" + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/fit", + "name": "fit", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/fit/self", + "name": "self", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/fit/X", + "name": "X", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/fit/y", + "name": "y", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target vector relative to X." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) default=None", + "default_value": "", + "description": "Array of weights that are assigned to individual samples.\nIf not provided, then each sample is given unit weight." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model according to the given training data.", + "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target vector relative to X.\n\nsample_weight : array-like of shape (n_samples,) default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the model according to the given training data.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target vector relative to X.\n\n sample_weight : array-like of shape (n_samples,) default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\n Returns\n -------\n self : object\n \"\"\"\n solver = _check_solver(self.solver, self.penalty, self.dual)\n\n if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0:\n raise ValueError(\"Maximum number of iteration must be positive;\"\n \" got (max_iter=%r)\" % self.max_iter)\n if not isinstance(self.tol, numbers.Number) or self.tol < 0:\n raise ValueError(\"Tolerance for stopping criteria must be \"\n \"positive; got (tol=%r)\" % self.tol)\n if self.penalty == 'elasticnet':\n if self.l1_ratios is None or len(self.l1_ratios) == 0 or any(\n (not isinstance(l1_ratio, numbers.Number) or l1_ratio < 0\n or l1_ratio > 1) for l1_ratio in self.l1_ratios):\n raise ValueError(\"l1_ratios must be a list of numbers between \"\n \"0 and 1; got (l1_ratios=%r)\" %\n self.l1_ratios)\n l1_ratios_ = self.l1_ratios\n else:\n if self.l1_ratios is not None:\n warnings.warn(\"l1_ratios parameter is only used when penalty \"\n \"is 'elasticnet'. Got (penalty={})\".format(\n self.penalty))\n\n l1_ratios_ = [None]\n\n if self.penalty == 'none':\n raise ValueError(\n \"penalty='none' is not useful and not supported by \"\n \"LogisticRegressionCV.\"\n )\n\n X, y = self._validate_data(X, y, accept_sparse='csr', dtype=np.float64,\n order=\"C\",\n accept_large_sparse=solver != 'liblinear')\n check_classification_targets(y)\n\n class_weight = self.class_weight\n\n # Encode for string labels\n label_encoder = LabelEncoder().fit(y)\n y = label_encoder.transform(y)\n if isinstance(class_weight, dict):\n class_weight = {label_encoder.transform([cls])[0]: v\n for cls, v in class_weight.items()}\n\n # The original class labels\n classes = self.classes_ = label_encoder.classes_\n encoded_labels = label_encoder.transform(label_encoder.classes_)\n\n multi_class = _check_multi_class(self.multi_class, solver,\n len(classes))\n\n if solver in ['sag', 'saga']:\n max_squared_sum = row_norms(X, squared=True).max()\n else:\n max_squared_sum = None\n\n # init cross-validation generator\n cv = check_cv(self.cv, y, classifier=True)\n folds = list(cv.split(X, y))\n\n # Use the label encoded classes\n n_classes = len(encoded_labels)\n\n if n_classes < 2:\n raise ValueError(\"This solver needs samples of at least 2 classes\"\n \" in the data, but the data contains only one\"\n \" class: %r\" % classes[0])\n\n if n_classes == 2:\n # OvR in case of binary problems is as good as fitting\n # the higher label\n n_classes = 1\n encoded_labels = encoded_labels[1:]\n classes = classes[1:]\n\n # We need this hack to iterate only once over labels, in the case of\n # multi_class = multinomial, without changing the value of the labels.\n if multi_class == 'multinomial':\n iter_encoded_labels = iter_classes = [None]\n else:\n iter_encoded_labels = encoded_labels\n iter_classes = classes\n\n # compute the class weights for the entire dataset y\n if class_weight == \"balanced\":\n class_weight = compute_class_weight(\n class_weight, classes=np.arange(len(self.classes_)), y=y)\n class_weight = dict(enumerate(class_weight))\n\n path_func = delayed(_log_reg_scoring_path)\n\n # The SAG solver releases the GIL so it's more efficient to use\n # threads for this solver.\n if self.solver in ['sag', 'saga']:\n prefer = 'threads'\n else:\n prefer = 'processes'\n\n fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,\n **_joblib_parallel_args(prefer=prefer))(\n path_func(X, y, train, test, pos_class=label, Cs=self.Cs,\n fit_intercept=self.fit_intercept, penalty=self.penalty,\n dual=self.dual, solver=solver, tol=self.tol,\n max_iter=self.max_iter, verbose=self.verbose,\n class_weight=class_weight, scoring=self.scoring,\n multi_class=multi_class,\n intercept_scaling=self.intercept_scaling,\n random_state=self.random_state,\n max_squared_sum=max_squared_sum,\n sample_weight=sample_weight,\n l1_ratio=l1_ratio\n )\n for label in iter_encoded_labels\n for train, test in folds\n for l1_ratio in l1_ratios_)\n\n # _log_reg_scoring_path will output different shapes depending on the\n # multi_class param, so we need to reshape the outputs accordingly.\n # Cs is of shape (n_classes . n_folds . n_l1_ratios, n_Cs) and all the\n # rows are equal, so we just take the first one.\n # After reshaping,\n # - scores is of shape (n_classes, n_folds, n_Cs . n_l1_ratios)\n # - coefs_paths is of shape\n # (n_classes, n_folds, n_Cs . n_l1_ratios, n_features)\n # - n_iter is of shape\n # (n_classes, n_folds, n_Cs . n_l1_ratios) or\n # (1, n_folds, n_Cs . n_l1_ratios)\n coefs_paths, Cs, scores, n_iter_ = zip(*fold_coefs_)\n self.Cs_ = Cs[0]\n if multi_class == 'multinomial':\n coefs_paths = np.reshape(\n coefs_paths,\n (len(folds), len(l1_ratios_) * len(self.Cs_), n_classes, -1)\n )\n # equiv to coefs_paths = np.moveaxis(coefs_paths, (0, 1, 2, 3),\n # (1, 2, 0, 3))\n coefs_paths = np.swapaxes(coefs_paths, 0, 1)\n coefs_paths = np.swapaxes(coefs_paths, 0, 2)\n self.n_iter_ = np.reshape(\n n_iter_,\n (1, len(folds), len(self.Cs_) * len(l1_ratios_))\n )\n # repeat same scores across all classes\n scores = np.tile(scores, (n_classes, 1, 1))\n else:\n coefs_paths = np.reshape(\n coefs_paths,\n (n_classes, len(folds), len(self.Cs_) * len(l1_ratios_),\n -1)\n )\n self.n_iter_ = np.reshape(\n n_iter_,\n (n_classes, len(folds), len(self.Cs_) * len(l1_ratios_))\n )\n scores = np.reshape(scores, (n_classes, len(folds), -1))\n self.scores_ = dict(zip(classes, scores))\n self.coefs_paths_ = dict(zip(classes, coefs_paths))\n\n self.C_ = list()\n self.l1_ratio_ = list()\n self.coef_ = np.empty((n_classes, X.shape[1]))\n self.intercept_ = np.zeros(n_classes)\n for index, (cls, encoded_label) in enumerate(\n zip(iter_classes, iter_encoded_labels)):\n\n if multi_class == 'ovr':\n scores = self.scores_[cls]\n coefs_paths = self.coefs_paths_[cls]\n else:\n # For multinomial, all scores are the same across classes\n scores = scores[0]\n # coefs_paths will keep its original shape because\n # logistic_regression_path expects it this way\n\n if self.refit:\n # best_index is between 0 and (n_Cs . n_l1_ratios - 1)\n # for example, with n_cs=2 and n_l1_ratios=3\n # the layout of scores is\n # [c1, c2, c1, c2, c1, c2]\n # l1_1 , l1_2 , l1_3\n best_index = scores.sum(axis=0).argmax()\n\n best_index_C = best_index % len(self.Cs_)\n C_ = self.Cs_[best_index_C]\n self.C_.append(C_)\n\n best_index_l1 = best_index // len(self.Cs_)\n l1_ratio_ = l1_ratios_[best_index_l1]\n self.l1_ratio_.append(l1_ratio_)\n\n if multi_class == 'multinomial':\n coef_init = np.mean(coefs_paths[:, :, best_index, :],\n axis=1)\n else:\n coef_init = np.mean(coefs_paths[:, best_index, :], axis=0)\n\n # Note that y is label encoded and hence pos_class must be\n # the encoded label / None (for 'multinomial')\n w, _, _ = _logistic_regression_path(\n X, y, pos_class=encoded_label, Cs=[C_], solver=solver,\n fit_intercept=self.fit_intercept, coef=coef_init,\n max_iter=self.max_iter, tol=self.tol,\n penalty=self.penalty,\n class_weight=class_weight,\n multi_class=multi_class,\n verbose=max(0, self.verbose - 1),\n random_state=self.random_state,\n check_input=False, max_squared_sum=max_squared_sum,\n sample_weight=sample_weight,\n l1_ratio=l1_ratio_)\n w = w[0]\n\n else:\n # Take the best scores across every fold and the average of\n # all coefficients corresponding to the best scores.\n best_indices = np.argmax(scores, axis=1)\n if multi_class == 'ovr':\n w = np.mean([coefs_paths[i, best_indices[i], :]\n for i in range(len(folds))], axis=0)\n else:\n w = np.mean([coefs_paths[:, i, best_indices[i], :]\n for i in range(len(folds))], axis=0)\n\n best_indices_C = best_indices % len(self.Cs_)\n self.C_.append(np.mean(self.Cs_[best_indices_C]))\n\n if self.penalty == 'elasticnet':\n best_indices_l1 = best_indices // len(self.Cs_)\n self.l1_ratio_.append(np.mean(l1_ratios_[best_indices_l1]))\n else:\n self.l1_ratio_.append(None)\n\n if multi_class == 'multinomial':\n self.C_ = np.tile(self.C_, n_classes)\n self.l1_ratio_ = np.tile(self.l1_ratio_, n_classes)\n self.coef_ = w[:, :X.shape[1]]\n if self.fit_intercept:\n self.intercept_ = w[:, -1]\n else:\n self.coef_[index] = w[: X.shape[1]]\n if self.fit_intercept:\n self.intercept_[index] = w[-1]\n\n self.C_ = np.asarray(self.C_)\n self.l1_ratio_ = np.asarray(self.l1_ratio_)\n self.l1_ratios_ = np.asarray(l1_ratios_)\n # if elasticnet was used, add the l1_ratios dimension to some\n # attributes\n if self.l1_ratios is not None:\n # with n_cs=2 and n_l1_ratios=3\n # the layout of scores is\n # [c1, c2, c1, c2, c1, c2]\n # l1_1 , l1_2 , l1_3\n # To get a 2d array with the following layout\n # l1_1, l1_2, l1_3\n # c1 [[ . , . , . ],\n # c2 [ . , . , . ]]\n # We need to first reshape and then transpose.\n # The same goes for the other arrays\n for cls, coefs_path in self.coefs_paths_.items():\n self.coefs_paths_[cls] = coefs_path.reshape(\n (len(folds), self.l1_ratios_.size, self.Cs_.size, -1))\n self.coefs_paths_[cls] = np.transpose(self.coefs_paths_[cls],\n (0, 2, 1, 3))\n for cls, score in self.scores_.items():\n self.scores_[cls] = score.reshape(\n (len(folds), self.l1_ratios_.size, self.Cs_.size))\n self.scores_[cls] = np.transpose(self.scores_[cls], (0, 2, 1))\n\n self.n_iter_ = self.n_iter_.reshape(\n (-1, len(folds), self.l1_ratios_.size, self.Cs_.size))\n self.n_iter_ = np.transpose(self.n_iter_, (0, 1, 3, 2))\n\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/score", + "name": "score", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/score/self", + "name": "self", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/score/X", + "name": "X", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Test samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/score/y", + "name": "y", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "True labels for X." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/LogisticRegressionCV/score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._logistic.LogisticRegressionCV.score.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns the score using the `scoring` option on the given\ntest data and labels.", + "docstring": "Returns the score using the `scoring` option on the given\ntest data and labels.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test samples.\n\ny : array-like of shape (n_samples,)\n True labels for X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Score of self.predict(X) wrt. y.", + "code": " def score(self, X, y, sample_weight=None):\n \"\"\"Returns the score using the `scoring` option on the given\n test data and labels.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Test samples.\n\n y : array-like of shape (n_samples,)\n True labels for X.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n score : float\n Score of self.predict(X) wrt. y.\n\n \"\"\"\n scoring = self.scoring or 'accuracy'\n scoring = get_scorer(scoring)\n\n return scoring(self, X, y, sample_weight=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_check_multi_class", + "name": "_check_multi_class", + "qname": "sklearn.linear_model._logistic._check_multi_class", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._logistic/_check_multi_class/multi_class", + "name": "multi_class", + "qname": "sklearn.linear_model._logistic._check_multi_class.multi_class", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_check_multi_class/solver", + "name": "solver", + "qname": "sklearn.linear_model._logistic._check_multi_class.solver", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_check_multi_class/n_classes", + "name": "n_classes", + "qname": "sklearn.linear_model._logistic._check_multi_class.n_classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _check_multi_class(multi_class, solver, n_classes):\n if multi_class == 'auto':\n if solver == 'liblinear':\n multi_class = 'ovr'\n elif n_classes > 2:\n multi_class = 'multinomial'\n else:\n multi_class = 'ovr'\n if multi_class not in ('multinomial', 'ovr'):\n raise ValueError(\"multi_class should be 'multinomial', 'ovr' or \"\n \"'auto'. Got %s.\" % multi_class)\n if multi_class == 'multinomial' and solver == 'liblinear':\n raise ValueError(\"Solver %s does not support \"\n \"a multinomial backend.\" % solver)\n return multi_class" + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_check_solver", + "name": "_check_solver", + "qname": "sklearn.linear_model._logistic._check_solver", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._logistic/_check_solver/solver", + "name": "solver", + "qname": "sklearn.linear_model._logistic._check_solver.solver", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_check_solver/penalty", + "name": "penalty", + "qname": "sklearn.linear_model._logistic._check_solver.penalty", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_check_solver/dual", + "name": "dual", + "qname": "sklearn.linear_model._logistic._check_solver.dual", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _check_solver(solver, penalty, dual):\n all_solvers = ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga']\n if solver not in all_solvers:\n raise ValueError(\"Logistic Regression supports only solvers in %s, got\"\n \" %s.\" % (all_solvers, solver))\n\n all_penalties = ['l1', 'l2', 'elasticnet', 'none']\n if penalty not in all_penalties:\n raise ValueError(\"Logistic Regression supports only penalties in %s,\"\n \" got %s.\" % (all_penalties, penalty))\n\n if solver not in ['liblinear', 'saga'] and penalty not in ('l2', 'none'):\n raise ValueError(\"Solver %s supports only 'l2' or 'none' penalties, \"\n \"got %s penalty.\" % (solver, penalty))\n if solver != 'liblinear' and dual:\n raise ValueError(\"Solver %s supports only \"\n \"dual=False, got dual=%s\" % (solver, dual))\n\n if penalty == 'elasticnet' and solver != 'saga':\n raise ValueError(\"Only 'saga' solver supports elasticnet penalty,\"\n \" got solver={}.\".format(solver))\n\n if solver == 'liblinear' and penalty == 'none':\n raise ValueError(\n \"penalty='none' is not supported for the liblinear solver\"\n )\n\n return solver" + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_intercept_dot", + "name": "_intercept_dot", + "qname": "sklearn.linear_model._logistic._intercept_dot", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._logistic/_intercept_dot/w", + "name": "w", + "qname": "sklearn.linear_model._logistic._intercept_dot.w", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features,) or (n_features + 1,)", + "default_value": "", + "description": "Coefficient vector." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features,) or (n_features + 1,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_intercept_dot/X", + "name": "X", + "qname": "sklearn.linear_model._logistic._intercept_dot.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_intercept_dot/y", + "name": "y", + "qname": "sklearn.linear_model._logistic._intercept_dot.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Array of labels." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes y * np.dot(X, w).\n\nIt takes into consideration if the intercept should be fit or not.", + "docstring": "Computes y * np.dot(X, w).\n\nIt takes into consideration if the intercept should be fit or not.\n\nParameters\n----------\nw : ndarray of shape (n_features,) or (n_features + 1,)\n Coefficient vector.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : ndarray of shape (n_samples,)\n Array of labels.\n\nReturns\n-------\nw : ndarray of shape (n_features,)\n Coefficient vector without the intercept weight (w[-1]) if the\n intercept should be fit. Unchanged otherwise.\n\nc : float\n The intercept.\n\nyz : float\n y * np.dot(X, w).", + "code": "def _intercept_dot(w, X, y):\n \"\"\"Computes y * np.dot(X, w).\n\n It takes into consideration if the intercept should be fit or not.\n\n Parameters\n ----------\n w : ndarray of shape (n_features,) or (n_features + 1,)\n Coefficient vector.\n\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\n y : ndarray of shape (n_samples,)\n Array of labels.\n\n Returns\n -------\n w : ndarray of shape (n_features,)\n Coefficient vector without the intercept weight (w[-1]) if the\n intercept should be fit. Unchanged otherwise.\n\n c : float\n The intercept.\n\n yz : float\n y * np.dot(X, w).\n \"\"\"\n c = 0.\n if w.size == X.shape[1] + 1:\n c = w[-1]\n w = w[:-1]\n\n z = safe_sparse_dot(X, w) + c\n yz = y * z\n return w, c, yz" + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_log_reg_scoring_path", + "name": "_log_reg_scoring_path", + "qname": "sklearn.linear_model._logistic._log_reg_scoring_path", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._logistic/_log_reg_scoring_path/X", + "name": "X", + "qname": "sklearn.linear_model._logistic._log_reg_scoring_path.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_log_reg_scoring_path/y", + "name": "y", + "qname": "sklearn.linear_model._logistic._log_reg_scoring_path.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "", + "description": "Target labels." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_targets)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_log_reg_scoring_path/train", + "name": "train", + "qname": "sklearn.linear_model._logistic._log_reg_scoring_path.train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list of indices", + "default_value": "", + "description": "The indices of the train set." + }, + "type": { + "kind": "NamedType", + "name": "list of indices" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_log_reg_scoring_path/test", + "name": "test", + "qname": "sklearn.linear_model._logistic._log_reg_scoring_path.test", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list of indices", + "default_value": "", + "description": "The indices of the test set." + }, + "type": { + "kind": "NamedType", + "name": "list of indices" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_log_reg_scoring_path/pos_class", + "name": "pos_class", + "qname": "sklearn.linear_model._logistic._log_reg_scoring_path.pos_class", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The class with respect to which we perform a one-vs-all fit.\nIf None, then it is assumed that the given problem is binary." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_log_reg_scoring_path/Cs", + "name": "Cs", + "qname": "sklearn.linear_model._logistic._log_reg_scoring_path.Cs", + "default_value": "10", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or list of floats", + "default_value": "10", + "description": "Each of the values in Cs describes the inverse of\nregularization strength. If Cs is as an int, then a grid of Cs\nvalues are chosen in a logarithmic scale between 1e-4 and 1e4.\nIf not provided, then a fixed set of values for Cs are used." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "list of floats" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_log_reg_scoring_path/scoring", + "name": "scoring", + "qname": "sklearn.linear_model._logistic._log_reg_scoring_path.scoring", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "None", + "description": "A string (see model evaluation documentation) or\na scorer callable object / function with signature\n``scorer(estimator, X, y)``. For a list of scoring functions\nthat can be used, look at :mod:`sklearn.metrics`. The\ndefault scoring option used is accuracy_score." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_log_reg_scoring_path/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._logistic._log_reg_scoring_path.fit_intercept", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If False, then the bias term is set to zero. Else the last\nterm of each coef_ gives us the intercept." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_log_reg_scoring_path/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._logistic._log_reg_scoring_path.max_iter", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Maximum number of iterations for the solver." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_log_reg_scoring_path/tol", + "name": "tol", + "qname": "sklearn.linear_model._logistic._log_reg_scoring_path.tol", + "default_value": "0.0001", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Tolerance for stopping criteria." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_log_reg_scoring_path/class_weight", + "name": "class_weight", + "qname": "sklearn.linear_model._logistic._log_reg_scoring_path.class_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict or 'balanced'", + "default_value": "None", + "description": "Weights associated with classes in the form ``{class_label: weight}``.\nIf not given, all classes are supposed to have weight one.\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``\n\nNote that these weights will be multiplied with sample_weight (passed\nthrough the fit method) if sample_weight is specified." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "dict" + }, + { + "kind": "NamedType", + "name": "'balanced'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_log_reg_scoring_path/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._logistic._log_reg_scoring_path.verbose", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "For the liblinear and lbfgs solvers set verbose to any positive\nnumber for verbosity." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_log_reg_scoring_path/solver", + "name": "solver", + "qname": "sklearn.linear_model._logistic._log_reg_scoring_path.solver", + "default_value": "'lbfgs'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'}", + "default_value": "'lbfgs'", + "description": "Decides which solver to use." + }, + "type": { + "kind": "EnumType", + "values": ["newton-cg", "lbfgs", "saga", "sag", "liblinear"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_log_reg_scoring_path/penalty", + "name": "penalty", + "qname": "sklearn.linear_model._logistic._log_reg_scoring_path.penalty", + "default_value": "'l2'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'l1', 'l2', 'elasticnet'}", + "default_value": "'l2'", + "description": "Used to specify the norm used in the penalization. The 'newton-cg',\n'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\nonly supported by the 'saga' solver." + }, + "type": { + "kind": "EnumType", + "values": ["l2", "l1", "elasticnet"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_log_reg_scoring_path/dual", + "name": "dual", + "qname": "sklearn.linear_model._logistic._log_reg_scoring_path.dual", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Dual or primal formulation. Dual formulation is only implemented for\nl2 penalty with liblinear solver. Prefer dual=False when\nn_samples > n_features." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_log_reg_scoring_path/intercept_scaling", + "name": "intercept_scaling", + "qname": "sklearn.linear_model._logistic._log_reg_scoring_path.intercept_scaling", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.", + "description": "Useful only when the solver 'liblinear' is used\nand self.fit_intercept is set to True. In this case, x becomes\n[x, self.intercept_scaling],\ni.e. a \"synthetic\" feature with constant value equals to\nintercept_scaling is appended to the instance vector.\nThe intercept becomes intercept_scaling * synthetic feature weight\nNote! the synthetic feature weight is subject to l1/l2 regularization\nas all other features.\nTo lessen the effect of regularization on synthetic feature weight\n(and therefore on the intercept) intercept_scaling has to be increased." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_log_reg_scoring_path/multi_class", + "name": "multi_class", + "qname": "sklearn.linear_model._logistic._log_reg_scoring_path.multi_class", + "default_value": "'auto'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'auto', 'ovr', 'multinomial'}", + "default_value": "'auto'", + "description": "If the option chosen is 'ovr', then a binary problem is fit for each\nlabel. For 'multinomial' the loss minimised is the multinomial loss fit\nacross the entire probability distribution, *even when the data is\nbinary*. 'multinomial' is unavailable when solver='liblinear'." + }, + "type": { + "kind": "EnumType", + "values": ["multinomial", "auto", "ovr"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_log_reg_scoring_path/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._logistic._log_reg_scoring_path.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\ndata. See :term:`Glossary ` for details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_log_reg_scoring_path/max_squared_sum", + "name": "max_squared_sum", + "qname": "sklearn.linear_model._logistic._log_reg_scoring_path.max_squared_sum", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Maximum squared sum of X over samples. Used only in SAG solver.\nIf None, it will be computed, going through all the samples.\nThe value should be precomputed to speed up cross validation." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_log_reg_scoring_path/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._logistic._log_reg_scoring_path.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape(n_samples,)", + "default_value": "None", + "description": "Array of weights that are assigned to individual samples.\nIf not provided, then each sample is given unit weight." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape(n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_log_reg_scoring_path/l1_ratio", + "name": "l1_ratio", + "qname": "sklearn.linear_model._logistic._log_reg_scoring_path.l1_ratio", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\nused if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\nto using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\nto using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\ncombination of L1 and L2." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes scores across logistic_regression_path", + "docstring": "Computes scores across logistic_regression_path\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target labels.\n\ntrain : list of indices\n The indices of the train set.\n\ntest : list of indices\n The indices of the test set.\n\npos_class : int, default=None\n The class with respect to which we perform a one-vs-all fit.\n If None, then it is assumed that the given problem is binary.\n\nCs : int or list of floats, default=10\n Each of the values in Cs describes the inverse of\n regularization strength. If Cs is as an int, then a grid of Cs\n values are chosen in a logarithmic scale between 1e-4 and 1e4.\n If not provided, then a fixed set of values for Cs are used.\n\nscoring : callable, default=None\n A string (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``. For a list of scoring functions\n that can be used, look at :mod:`sklearn.metrics`. The\n default scoring option used is accuracy_score.\n\nfit_intercept : bool, default=False\n If False, then the bias term is set to zero. Else the last\n term of each coef_ gives us the intercept.\n\nmax_iter : int, default=100\n Maximum number of iterations for the solver.\n\ntol : float, default=1e-4\n Tolerance for stopping criteria.\n\nclass_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\nverbose : int, default=0\n For the liblinear and lbfgs solvers set verbose to any positive\n number for verbosity.\n\nsolver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'}, default='lbfgs'\n Decides which solver to use.\n\npenalty : {'l1', 'l2', 'elasticnet'}, default='l2'\n Used to specify the norm used in the penalization. The 'newton-cg',\n 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\n only supported by the 'saga' solver.\n\ndual : bool, default=False\n Dual or primal formulation. Dual formulation is only implemented for\n l2 penalty with liblinear solver. Prefer dual=False when\n n_samples > n_features.\n\nintercept_scaling : float, default=1.\n Useful only when the solver 'liblinear' is used\n and self.fit_intercept is set to True. In this case, x becomes\n [x, self.intercept_scaling],\n i.e. a \"synthetic\" feature with constant value equals to\n intercept_scaling is appended to the instance vector.\n The intercept becomes intercept_scaling * synthetic feature weight\n Note! the synthetic feature weight is subject to l1/l2 regularization\n as all other features.\n To lessen the effect of regularization on synthetic feature weight\n (and therefore on the intercept) intercept_scaling has to be increased.\n\nmulti_class : {'auto', 'ovr', 'multinomial'}, default='auto'\n If the option chosen is 'ovr', then a binary problem is fit for each\n label. For 'multinomial' the loss minimised is the multinomial loss fit\n across the entire probability distribution, *even when the data is\n binary*. 'multinomial' is unavailable when solver='liblinear'.\n\nrandom_state : int, RandomState instance, default=None\n Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\n data. See :term:`Glossary ` for details.\n\nmax_squared_sum : float, default=None\n Maximum squared sum of X over samples. Used only in SAG solver.\n If None, it will be computed, going through all the samples.\n The value should be precomputed to speed up cross validation.\n\nsample_weight : array-like of shape(n_samples,), default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\nl1_ratio : float, default=None\n The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\n used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\n to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\n to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\n combination of L1 and L2.\n\nReturns\n-------\ncoefs : ndarray of shape (n_cs, n_features) or (n_cs, n_features + 1)\n List of coefficients for the Logistic Regression model. If\n fit_intercept is set to True then the second dimension will be\n n_features + 1, where the last item represents the intercept.\n\nCs : ndarray\n Grid of Cs used for cross-validation.\n\nscores : ndarray of shape (n_cs,)\n Scores obtained for each Cs.\n\nn_iter : ndarray of shape(n_cs,)\n Actual number of iteration for each Cs.", + "code": "def _log_reg_scoring_path(X, y, train, test, pos_class=None, Cs=10,\n scoring=None, fit_intercept=False,\n max_iter=100, tol=1e-4, class_weight=None,\n verbose=0, solver='lbfgs', penalty='l2',\n dual=False, intercept_scaling=1.,\n multi_class='auto', random_state=None,\n max_squared_sum=None, sample_weight=None,\n l1_ratio=None):\n \"\"\"Computes scores across logistic_regression_path\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target labels.\n\n train : list of indices\n The indices of the train set.\n\n test : list of indices\n The indices of the test set.\n\n pos_class : int, default=None\n The class with respect to which we perform a one-vs-all fit.\n If None, then it is assumed that the given problem is binary.\n\n Cs : int or list of floats, default=10\n Each of the values in Cs describes the inverse of\n regularization strength. If Cs is as an int, then a grid of Cs\n values are chosen in a logarithmic scale between 1e-4 and 1e4.\n If not provided, then a fixed set of values for Cs are used.\n\n scoring : callable, default=None\n A string (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``. For a list of scoring functions\n that can be used, look at :mod:`sklearn.metrics`. The\n default scoring option used is accuracy_score.\n\n fit_intercept : bool, default=False\n If False, then the bias term is set to zero. Else the last\n term of each coef_ gives us the intercept.\n\n max_iter : int, default=100\n Maximum number of iterations for the solver.\n\n tol : float, default=1e-4\n Tolerance for stopping criteria.\n\n class_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\n verbose : int, default=0\n For the liblinear and lbfgs solvers set verbose to any positive\n number for verbosity.\n\n solver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'}, \\\n default='lbfgs'\n Decides which solver to use.\n\n penalty : {'l1', 'l2', 'elasticnet'}, default='l2'\n Used to specify the norm used in the penalization. The 'newton-cg',\n 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\n only supported by the 'saga' solver.\n\n dual : bool, default=False\n Dual or primal formulation. Dual formulation is only implemented for\n l2 penalty with liblinear solver. Prefer dual=False when\n n_samples > n_features.\n\n intercept_scaling : float, default=1.\n Useful only when the solver 'liblinear' is used\n and self.fit_intercept is set to True. In this case, x becomes\n [x, self.intercept_scaling],\n i.e. a \"synthetic\" feature with constant value equals to\n intercept_scaling is appended to the instance vector.\n The intercept becomes intercept_scaling * synthetic feature weight\n Note! the synthetic feature weight is subject to l1/l2 regularization\n as all other features.\n To lessen the effect of regularization on synthetic feature weight\n (and therefore on the intercept) intercept_scaling has to be increased.\n\n multi_class : {'auto', 'ovr', 'multinomial'}, default='auto'\n If the option chosen is 'ovr', then a binary problem is fit for each\n label. For 'multinomial' the loss minimised is the multinomial loss fit\n across the entire probability distribution, *even when the data is\n binary*. 'multinomial' is unavailable when solver='liblinear'.\n\n random_state : int, RandomState instance, default=None\n Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\n data. See :term:`Glossary ` for details.\n\n max_squared_sum : float, default=None\n Maximum squared sum of X over samples. Used only in SAG solver.\n If None, it will be computed, going through all the samples.\n The value should be precomputed to speed up cross validation.\n\n sample_weight : array-like of shape(n_samples,), default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\n l1_ratio : float, default=None\n The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\n used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\n to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\n to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\n combination of L1 and L2.\n\n Returns\n -------\n coefs : ndarray of shape (n_cs, n_features) or (n_cs, n_features + 1)\n List of coefficients for the Logistic Regression model. If\n fit_intercept is set to True then the second dimension will be\n n_features + 1, where the last item represents the intercept.\n\n Cs : ndarray\n Grid of Cs used for cross-validation.\n\n scores : ndarray of shape (n_cs,)\n Scores obtained for each Cs.\n\n n_iter : ndarray of shape(n_cs,)\n Actual number of iteration for each Cs.\n \"\"\"\n X_train = X[train]\n X_test = X[test]\n y_train = y[train]\n y_test = y[test]\n\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X)\n sample_weight = sample_weight[train]\n\n coefs, Cs, n_iter = _logistic_regression_path(\n X_train, y_train, Cs=Cs, l1_ratio=l1_ratio,\n fit_intercept=fit_intercept, solver=solver, max_iter=max_iter,\n class_weight=class_weight, pos_class=pos_class,\n multi_class=multi_class, tol=tol, verbose=verbose, dual=dual,\n penalty=penalty, intercept_scaling=intercept_scaling,\n random_state=random_state, check_input=False,\n max_squared_sum=max_squared_sum, sample_weight=sample_weight)\n\n log_reg = LogisticRegression(solver=solver, multi_class=multi_class)\n\n # The score method of Logistic Regression has a classes_ attribute.\n if multi_class == 'ovr':\n log_reg.classes_ = np.array([-1, 1])\n elif multi_class == 'multinomial':\n log_reg.classes_ = np.unique(y_train)\n else:\n raise ValueError(\"multi_class should be either multinomial or ovr, \"\n \"got %d\" % multi_class)\n\n if pos_class is not None:\n mask = (y_test == pos_class)\n y_test = np.ones(y_test.shape, dtype=np.float64)\n y_test[~mask] = -1.\n\n scores = list()\n\n scoring = get_scorer(scoring)\n for w in coefs:\n if multi_class == 'ovr':\n w = w[np.newaxis, :]\n if fit_intercept:\n log_reg.coef_ = w[:, :-1]\n log_reg.intercept_ = w[:, -1]\n else:\n log_reg.coef_ = w\n log_reg.intercept_ = 0.\n\n if scoring is None:\n scores.append(log_reg.score(X_test, y_test))\n else:\n scores.append(scoring(log_reg, X_test, y_test))\n\n return coefs, Cs, np.array(scores), n_iter" + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_grad_hess", + "name": "_logistic_grad_hess", + "qname": "sklearn.linear_model._logistic._logistic_grad_hess", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_grad_hess/w", + "name": "w", + "qname": "sklearn.linear_model._logistic._logistic_grad_hess.w", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features,) or (n_features + 1,)", + "default_value": "", + "description": "Coefficient vector." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features,) or (n_features + 1,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_grad_hess/X", + "name": "X", + "qname": "sklearn.linear_model._logistic._logistic_grad_hess.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_grad_hess/y", + "name": "y", + "qname": "sklearn.linear_model._logistic._logistic_grad_hess.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Array of labels." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_grad_hess/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._logistic._logistic_grad_hess.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Regularization parameter. alpha is equal to 1 / C." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_grad_hess/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._logistic._logistic_grad_hess.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) default=None", + "default_value": "", + "description": "Array of weights that are assigned to individual samples.\nIf not provided, then each sample is given unit weight." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes the gradient and the Hessian, in the case of a logistic loss.", + "docstring": "Computes the gradient and the Hessian, in the case of a logistic loss.\n\nParameters\n----------\nw : ndarray of shape (n_features,) or (n_features + 1,)\n Coefficient vector.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : ndarray of shape (n_samples,)\n Array of labels.\n\nalpha : float\n Regularization parameter. alpha is equal to 1 / C.\n\nsample_weight : array-like of shape (n_samples,) default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\nReturns\n-------\ngrad : ndarray of shape (n_features,) or (n_features + 1,)\n Logistic gradient.\n\nHs : callable\n Function that takes the gradient as a parameter and returns the\n matrix product of the Hessian and gradient.", + "code": "def _logistic_grad_hess(w, X, y, alpha, sample_weight=None):\n \"\"\"Computes the gradient and the Hessian, in the case of a logistic loss.\n\n Parameters\n ----------\n w : ndarray of shape (n_features,) or (n_features + 1,)\n Coefficient vector.\n\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\n y : ndarray of shape (n_samples,)\n Array of labels.\n\n alpha : float\n Regularization parameter. alpha is equal to 1 / C.\n\n sample_weight : array-like of shape (n_samples,) default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\n Returns\n -------\n grad : ndarray of shape (n_features,) or (n_features + 1,)\n Logistic gradient.\n\n Hs : callable\n Function that takes the gradient as a parameter and returns the\n matrix product of the Hessian and gradient.\n \"\"\"\n n_samples, n_features = X.shape\n grad = np.empty_like(w)\n fit_intercept = grad.shape[0] > n_features\n\n w, c, yz = _intercept_dot(w, X, y)\n\n if sample_weight is None:\n sample_weight = np.ones(y.shape[0])\n\n z = expit(yz)\n z0 = sample_weight * (z - 1) * y\n\n grad[:n_features] = safe_sparse_dot(X.T, z0) + alpha * w\n\n # Case where we fit the intercept.\n if fit_intercept:\n grad[-1] = z0.sum()\n\n # The mat-vec product of the Hessian\n d = sample_weight * z * (1 - z)\n if sparse.issparse(X):\n dX = safe_sparse_dot(sparse.dia_matrix((d, 0),\n shape=(n_samples, n_samples)), X)\n else:\n # Precompute as much as possible\n dX = d[:, np.newaxis] * X\n\n if fit_intercept:\n # Calculate the double derivative with respect to intercept\n # In the case of sparse matrices this returns a matrix object.\n dd_intercept = np.squeeze(np.array(dX.sum(axis=0)))\n\n def Hs(s):\n ret = np.empty_like(s)\n ret[:n_features] = X.T.dot(dX.dot(s[:n_features]))\n ret[:n_features] += alpha * s[:n_features]\n\n # For the fit intercept case.\n if fit_intercept:\n ret[:n_features] += s[-1] * dd_intercept\n ret[-1] = dd_intercept.dot(s[:n_features])\n ret[-1] += d.sum() * s[-1]\n return ret\n\n return grad, Hs" + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_loss", + "name": "_logistic_loss", + "qname": "sklearn.linear_model._logistic._logistic_loss", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_loss/w", + "name": "w", + "qname": "sklearn.linear_model._logistic._logistic_loss.w", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features,) or (n_features + 1,)", + "default_value": "", + "description": "Coefficient vector." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features,) or (n_features + 1,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_loss/X", + "name": "X", + "qname": "sklearn.linear_model._logistic._logistic_loss.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_loss/y", + "name": "y", + "qname": "sklearn.linear_model._logistic._logistic_loss.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Array of labels." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_loss/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._logistic._logistic_loss.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Regularization parameter. alpha is equal to 1 / C." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_loss/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._logistic._logistic_loss.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) default=None", + "default_value": "", + "description": "Array of weights that are assigned to individual samples.\nIf not provided, then each sample is given unit weight." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes the logistic loss.", + "docstring": "Computes the logistic loss.\n\nParameters\n----------\nw : ndarray of shape (n_features,) or (n_features + 1,)\n Coefficient vector.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : ndarray of shape (n_samples,)\n Array of labels.\n\nalpha : float\n Regularization parameter. alpha is equal to 1 / C.\n\nsample_weight : array-like of shape (n_samples,) default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\nReturns\n-------\nout : float\n Logistic loss.", + "code": "def _logistic_loss(w, X, y, alpha, sample_weight=None):\n \"\"\"Computes the logistic loss.\n\n Parameters\n ----------\n w : ndarray of shape (n_features,) or (n_features + 1,)\n Coefficient vector.\n\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\n y : ndarray of shape (n_samples,)\n Array of labels.\n\n alpha : float\n Regularization parameter. alpha is equal to 1 / C.\n\n sample_weight : array-like of shape (n_samples,) default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\n Returns\n -------\n out : float\n Logistic loss.\n \"\"\"\n w, c, yz = _intercept_dot(w, X, y)\n\n if sample_weight is None:\n sample_weight = np.ones(y.shape[0])\n\n # Logistic loss is the negative of the log of the logistic function.\n out = -np.sum(sample_weight * log_logistic(yz)) + .5 * alpha * np.dot(w, w)\n return out" + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_loss_and_grad", + "name": "_logistic_loss_and_grad", + "qname": "sklearn.linear_model._logistic._logistic_loss_and_grad", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_loss_and_grad/w", + "name": "w", + "qname": "sklearn.linear_model._logistic._logistic_loss_and_grad.w", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features,) or (n_features + 1,)", + "default_value": "", + "description": "Coefficient vector." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features,) or (n_features + 1,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_loss_and_grad/X", + "name": "X", + "qname": "sklearn.linear_model._logistic._logistic_loss_and_grad.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_loss_and_grad/y", + "name": "y", + "qname": "sklearn.linear_model._logistic._logistic_loss_and_grad.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Array of labels." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_loss_and_grad/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._logistic._logistic_loss_and_grad.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Regularization parameter. alpha is equal to 1 / C." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_loss_and_grad/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._logistic._logistic_loss_and_grad.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Array of weights that are assigned to individual samples.\nIf not provided, then each sample is given unit weight." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes the logistic loss and gradient.", + "docstring": "Computes the logistic loss and gradient.\n\nParameters\n----------\nw : ndarray of shape (n_features,) or (n_features + 1,)\n Coefficient vector.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : ndarray of shape (n_samples,)\n Array of labels.\n\nalpha : float\n Regularization parameter. alpha is equal to 1 / C.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\nReturns\n-------\nout : float\n Logistic loss.\n\ngrad : ndarray of shape (n_features,) or (n_features + 1,)\n Logistic gradient.", + "code": "def _logistic_loss_and_grad(w, X, y, alpha, sample_weight=None):\n \"\"\"Computes the logistic loss and gradient.\n\n Parameters\n ----------\n w : ndarray of shape (n_features,) or (n_features + 1,)\n Coefficient vector.\n\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\n y : ndarray of shape (n_samples,)\n Array of labels.\n\n alpha : float\n Regularization parameter. alpha is equal to 1 / C.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\n Returns\n -------\n out : float\n Logistic loss.\n\n grad : ndarray of shape (n_features,) or (n_features + 1,)\n Logistic gradient.\n \"\"\"\n n_samples, n_features = X.shape\n grad = np.empty_like(w)\n\n w, c, yz = _intercept_dot(w, X, y)\n\n if sample_weight is None:\n sample_weight = np.ones(n_samples)\n\n # Logistic loss is the negative of the log of the logistic function.\n out = -np.sum(sample_weight * log_logistic(yz)) + .5 * alpha * np.dot(w, w)\n\n z = expit(yz)\n z0 = sample_weight * (z - 1) * y\n\n grad[:n_features] = safe_sparse_dot(X.T, z0) + alpha * w\n\n # Case where we fit the intercept.\n if grad.shape[0] > n_features:\n grad[-1] = z0.sum()\n return out, grad" + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_regression_path", + "name": "_logistic_regression_path", + "qname": "sklearn.linear_model._logistic._logistic_regression_path", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_regression_path/X", + "name": "X", + "qname": "sklearn.linear_model._logistic._logistic_regression_path.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_regression_path/y", + "name": "y", + "qname": "sklearn.linear_model._logistic._logistic_regression_path.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "", + "description": "Input data, target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_targets)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_regression_path/pos_class", + "name": "pos_class", + "qname": "sklearn.linear_model._logistic._logistic_regression_path.pos_class", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The class with respect to which we perform a one-vs-all fit.\nIf None, then it is assumed that the given problem is binary." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_regression_path/Cs", + "name": "Cs", + "qname": "sklearn.linear_model._logistic._logistic_regression_path.Cs", + "default_value": "10", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or array-like of shape (n_cs,)", + "default_value": "10", + "description": "List of values for the regularization parameter or integer specifying\nthe number of regularization parameters that should be used. In this\ncase, the parameters will be chosen in a logarithmic scale between\n1e-4 and 1e4." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_cs,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_regression_path/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._logistic._logistic_regression_path.fit_intercept", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to fit an intercept for the model. In this case the shape of\nthe returned array is (n_cs, n_features + 1)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_regression_path/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._logistic._logistic_regression_path.max_iter", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Maximum number of iterations for the solver." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_regression_path/tol", + "name": "tol", + "qname": "sklearn.linear_model._logistic._logistic_regression_path.tol", + "default_value": "0.0001", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Stopping criterion. For the newton-cg and lbfgs solvers, the iteration\nwill stop when ``max{|g_i | i = 1, ..., n} <= tol``\nwhere ``g_i`` is the i-th component of the gradient." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_regression_path/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._logistic._logistic_regression_path.verbose", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "For the liblinear and lbfgs solvers set verbose to any positive\nnumber for verbosity." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_regression_path/solver", + "name": "solver", + "qname": "sklearn.linear_model._logistic._logistic_regression_path.solver", + "default_value": "'lbfgs'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'}", + "default_value": "'lbfgs'", + "description": "Numerical solver to use." + }, + "type": { + "kind": "EnumType", + "values": ["newton-cg", "lbfgs", "saga", "sag", "liblinear"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_regression_path/coef", + "name": "coef", + "qname": "sklearn.linear_model._logistic._logistic_regression_path.coef", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_features,)", + "default_value": "None", + "description": "Initialization value for coefficients of logistic regression.\nUseless for liblinear solver." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_features,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_regression_path/class_weight", + "name": "class_weight", + "qname": "sklearn.linear_model._logistic._logistic_regression_path.class_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict or 'balanced'", + "default_value": "None", + "description": "Weights associated with classes in the form ``{class_label: weight}``.\nIf not given, all classes are supposed to have weight one.\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``.\n\nNote that these weights will be multiplied with sample_weight (passed\nthrough the fit method) if sample_weight is specified." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "dict" + }, + { + "kind": "NamedType", + "name": "'balanced'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_regression_path/dual", + "name": "dual", + "qname": "sklearn.linear_model._logistic._logistic_regression_path.dual", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Dual or primal formulation. Dual formulation is only implemented for\nl2 penalty with liblinear solver. Prefer dual=False when\nn_samples > n_features." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_regression_path/penalty", + "name": "penalty", + "qname": "sklearn.linear_model._logistic._logistic_regression_path.penalty", + "default_value": "'l2'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'l1', 'l2', 'elasticnet'}", + "default_value": "'l2'", + "description": "Used to specify the norm used in the penalization. The 'newton-cg',\n'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\nonly supported by the 'saga' solver." + }, + "type": { + "kind": "EnumType", + "values": ["l2", "l1", "elasticnet"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_regression_path/intercept_scaling", + "name": "intercept_scaling", + "qname": "sklearn.linear_model._logistic._logistic_regression_path.intercept_scaling", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.", + "description": "Useful only when the solver 'liblinear' is used\nand self.fit_intercept is set to True. In this case, x becomes\n[x, self.intercept_scaling],\ni.e. a \"synthetic\" feature with constant value equal to\nintercept_scaling is appended to the instance vector.\nThe intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\nNote! the synthetic feature weight is subject to l1/l2 regularization\nas all other features.\nTo lessen the effect of regularization on synthetic feature weight\n(and therefore on the intercept) intercept_scaling has to be increased." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_regression_path/multi_class", + "name": "multi_class", + "qname": "sklearn.linear_model._logistic._logistic_regression_path.multi_class", + "default_value": "'auto'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'ovr', 'multinomial', 'auto'}", + "default_value": "'auto'", + "description": "If the option chosen is 'ovr', then a binary problem is fit for each\nlabel. For 'multinomial' the loss minimised is the multinomial loss fit\nacross the entire probability distribution, *even when the data is\nbinary*. 'multinomial' is unavailable when solver='liblinear'.\n'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\nand otherwise selects 'multinomial'.\n\n.. versionadded:: 0.18\n Stochastic Average Gradient descent solver for 'multinomial' case.\n.. versionchanged:: 0.22\n Default changed from 'ovr' to 'auto' in 0.22." + }, + "type": { + "kind": "EnumType", + "values": ["multinomial", "auto", "ovr"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_regression_path/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._logistic._logistic_regression_path.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\ndata. See :term:`Glossary ` for details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_regression_path/check_input", + "name": "check_input", + "qname": "sklearn.linear_model._logistic._logistic_regression_path.check_input", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If False, the input arrays X and y will not be checked." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_regression_path/max_squared_sum", + "name": "max_squared_sum", + "qname": "sklearn.linear_model._logistic._logistic_regression_path.max_squared_sum", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Maximum squared sum of X over samples. Used only in SAG solver.\nIf None, it will be computed, going through all the samples.\nThe value should be precomputed to speed up cross validation." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_regression_path/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._logistic._logistic_regression_path.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape(n_samples,)", + "default_value": "None", + "description": "Array of weights that are assigned to individual samples.\nIf not provided, then each sample is given unit weight." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape(n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_logistic_regression_path/l1_ratio", + "name": "l1_ratio", + "qname": "sklearn.linear_model._logistic._logistic_regression_path.l1_ratio", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\nused if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\nto using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\nto using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\ncombination of L1 and L2." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute a Logistic Regression model for a list of regularization\nparameters.\n\nThis is an implementation that uses the result of the previous model\nto speed up computations along the set of solutions, making it faster\nthan sequentially calling LogisticRegression for the different parameters.\nNote that there will be no speedup with liblinear solver, since it does\nnot handle warm-starting.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute a Logistic Regression model for a list of regularization\nparameters.\n\nThis is an implementation that uses the result of the previous model\nto speed up computations along the set of solutions, making it faster\nthan sequentially calling LogisticRegression for the different parameters.\nNote that there will be no speedup with liblinear solver, since it does\nnot handle warm-starting.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Input data, target values.\n\npos_class : int, default=None\n The class with respect to which we perform a one-vs-all fit.\n If None, then it is assumed that the given problem is binary.\n\nCs : int or array-like of shape (n_cs,), default=10\n List of values for the regularization parameter or integer specifying\n the number of regularization parameters that should be used. In this\n case, the parameters will be chosen in a logarithmic scale between\n 1e-4 and 1e4.\n\nfit_intercept : bool, default=True\n Whether to fit an intercept for the model. In this case the shape of\n the returned array is (n_cs, n_features + 1).\n\nmax_iter : int, default=100\n Maximum number of iterations for the solver.\n\ntol : float, default=1e-4\n Stopping criterion. For the newton-cg and lbfgs solvers, the iteration\n will stop when ``max{|g_i | i = 1, ..., n} <= tol``\n where ``g_i`` is the i-th component of the gradient.\n\nverbose : int, default=0\n For the liblinear and lbfgs solvers set verbose to any positive\n number for verbosity.\n\nsolver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'}, default='lbfgs'\n Numerical solver to use.\n\ncoef : array-like of shape (n_features,), default=None\n Initialization value for coefficients of logistic regression.\n Useless for liblinear solver.\n\nclass_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\ndual : bool, default=False\n Dual or primal formulation. Dual formulation is only implemented for\n l2 penalty with liblinear solver. Prefer dual=False when\n n_samples > n_features.\n\npenalty : {'l1', 'l2', 'elasticnet'}, default='l2'\n Used to specify the norm used in the penalization. The 'newton-cg',\n 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\n only supported by the 'saga' solver.\n\nintercept_scaling : float, default=1.\n Useful only when the solver 'liblinear' is used\n and self.fit_intercept is set to True. In this case, x becomes\n [x, self.intercept_scaling],\n i.e. a \"synthetic\" feature with constant value equal to\n intercept_scaling is appended to the instance vector.\n The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\n Note! the synthetic feature weight is subject to l1/l2 regularization\n as all other features.\n To lessen the effect of regularization on synthetic feature weight\n (and therefore on the intercept) intercept_scaling has to be increased.\n\nmulti_class : {'ovr', 'multinomial', 'auto'}, default='auto'\n If the option chosen is 'ovr', then a binary problem is fit for each\n label. For 'multinomial' the loss minimised is the multinomial loss fit\n across the entire probability distribution, *even when the data is\n binary*. 'multinomial' is unavailable when solver='liblinear'.\n 'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n and otherwise selects 'multinomial'.\n\n .. versionadded:: 0.18\n Stochastic Average Gradient descent solver for 'multinomial' case.\n .. versionchanged:: 0.22\n Default changed from 'ovr' to 'auto' in 0.22.\n\nrandom_state : int, RandomState instance, default=None\n Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\n data. See :term:`Glossary ` for details.\n\ncheck_input : bool, default=True\n If False, the input arrays X and y will not be checked.\n\nmax_squared_sum : float, default=None\n Maximum squared sum of X over samples. Used only in SAG solver.\n If None, it will be computed, going through all the samples.\n The value should be precomputed to speed up cross validation.\n\nsample_weight : array-like of shape(n_samples,), default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\nl1_ratio : float, default=None\n The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\n used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\n to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\n to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\n combination of L1 and L2.\n\nReturns\n-------\ncoefs : ndarray of shape (n_cs, n_features) or (n_cs, n_features + 1)\n List of coefficients for the Logistic Regression model. If\n fit_intercept is set to True then the second dimension will be\n n_features + 1, where the last item represents the intercept. For\n ``multiclass='multinomial'``, the shape is (n_classes, n_cs,\n n_features) or (n_classes, n_cs, n_features + 1).\n\nCs : ndarray\n Grid of Cs used for cross-validation.\n\nn_iter : array of shape (n_cs,)\n Actual number of iteration for each Cs.\n\nNotes\n-----\nYou might get slightly different results with the solver liblinear than\nwith the others since this uses LIBLINEAR which penalizes the intercept.\n\n.. versionchanged:: 0.19\n The \"copy\" parameter was removed.", + "code": "def _logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,\n max_iter=100, tol=1e-4, verbose=0,\n solver='lbfgs', coef=None,\n class_weight=None, dual=False, penalty='l2',\n intercept_scaling=1., multi_class='auto',\n random_state=None, check_input=True,\n max_squared_sum=None, sample_weight=None,\n l1_ratio=None):\n \"\"\"Compute a Logistic Regression model for a list of regularization\n parameters.\n\n This is an implementation that uses the result of the previous model\n to speed up computations along the set of solutions, making it faster\n than sequentially calling LogisticRegression for the different parameters.\n Note that there will be no speedup with liblinear solver, since it does\n not handle warm-starting.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Input data, target values.\n\n pos_class : int, default=None\n The class with respect to which we perform a one-vs-all fit.\n If None, then it is assumed that the given problem is binary.\n\n Cs : int or array-like of shape (n_cs,), default=10\n List of values for the regularization parameter or integer specifying\n the number of regularization parameters that should be used. In this\n case, the parameters will be chosen in a logarithmic scale between\n 1e-4 and 1e4.\n\n fit_intercept : bool, default=True\n Whether to fit an intercept for the model. In this case the shape of\n the returned array is (n_cs, n_features + 1).\n\n max_iter : int, default=100\n Maximum number of iterations for the solver.\n\n tol : float, default=1e-4\n Stopping criterion. For the newton-cg and lbfgs solvers, the iteration\n will stop when ``max{|g_i | i = 1, ..., n} <= tol``\n where ``g_i`` is the i-th component of the gradient.\n\n verbose : int, default=0\n For the liblinear and lbfgs solvers set verbose to any positive\n number for verbosity.\n\n solver : {'lbfgs', 'newton-cg', 'liblinear', 'sag', 'saga'}, \\\n default='lbfgs'\n Numerical solver to use.\n\n coef : array-like of shape (n_features,), default=None\n Initialization value for coefficients of logistic regression.\n Useless for liblinear solver.\n\n class_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``.\n\n Note that these weights will be multiplied with sample_weight (passed\n through the fit method) if sample_weight is specified.\n\n dual : bool, default=False\n Dual or primal formulation. Dual formulation is only implemented for\n l2 penalty with liblinear solver. Prefer dual=False when\n n_samples > n_features.\n\n penalty : {'l1', 'l2', 'elasticnet'}, default='l2'\n Used to specify the norm used in the penalization. The 'newton-cg',\n 'sag' and 'lbfgs' solvers support only l2 penalties. 'elasticnet' is\n only supported by the 'saga' solver.\n\n intercept_scaling : float, default=1.\n Useful only when the solver 'liblinear' is used\n and self.fit_intercept is set to True. In this case, x becomes\n [x, self.intercept_scaling],\n i.e. a \"synthetic\" feature with constant value equal to\n intercept_scaling is appended to the instance vector.\n The intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\n Note! the synthetic feature weight is subject to l1/l2 regularization\n as all other features.\n To lessen the effect of regularization on synthetic feature weight\n (and therefore on the intercept) intercept_scaling has to be increased.\n\n multi_class : {'ovr', 'multinomial', 'auto'}, default='auto'\n If the option chosen is 'ovr', then a binary problem is fit for each\n label. For 'multinomial' the loss minimised is the multinomial loss fit\n across the entire probability distribution, *even when the data is\n binary*. 'multinomial' is unavailable when solver='liblinear'.\n 'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\n and otherwise selects 'multinomial'.\n\n .. versionadded:: 0.18\n Stochastic Average Gradient descent solver for 'multinomial' case.\n .. versionchanged:: 0.22\n Default changed from 'ovr' to 'auto' in 0.22.\n\n random_state : int, RandomState instance, default=None\n Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\n data. See :term:`Glossary ` for details.\n\n check_input : bool, default=True\n If False, the input arrays X and y will not be checked.\n\n max_squared_sum : float, default=None\n Maximum squared sum of X over samples. Used only in SAG solver.\n If None, it will be computed, going through all the samples.\n The value should be precomputed to speed up cross validation.\n\n sample_weight : array-like of shape(n_samples,), default=None\n Array of weights that are assigned to individual samples.\n If not provided, then each sample is given unit weight.\n\n l1_ratio : float, default=None\n The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\n used if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\n to using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\n to using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\n combination of L1 and L2.\n\n Returns\n -------\n coefs : ndarray of shape (n_cs, n_features) or (n_cs, n_features + 1)\n List of coefficients for the Logistic Regression model. If\n fit_intercept is set to True then the second dimension will be\n n_features + 1, where the last item represents the intercept. For\n ``multiclass='multinomial'``, the shape is (n_classes, n_cs,\n n_features) or (n_classes, n_cs, n_features + 1).\n\n Cs : ndarray\n Grid of Cs used for cross-validation.\n\n n_iter : array of shape (n_cs,)\n Actual number of iteration for each Cs.\n\n Notes\n -----\n You might get slightly different results with the solver liblinear than\n with the others since this uses LIBLINEAR which penalizes the intercept.\n\n .. versionchanged:: 0.19\n The \"copy\" parameter was removed.\n \"\"\"\n if isinstance(Cs, numbers.Integral):\n Cs = np.logspace(-4, 4, Cs)\n\n solver = _check_solver(solver, penalty, dual)\n\n # Preprocessing.\n if check_input:\n X = check_array(X, accept_sparse='csr', dtype=np.float64,\n accept_large_sparse=solver != 'liblinear')\n y = check_array(y, ensure_2d=False, dtype=None)\n check_consistent_length(X, y)\n _, n_features = X.shape\n\n classes = np.unique(y)\n random_state = check_random_state(random_state)\n\n multi_class = _check_multi_class(multi_class, solver, len(classes))\n if pos_class is None and multi_class != 'multinomial':\n if (classes.size > 2):\n raise ValueError('To fit OvR, use the pos_class argument')\n # np.unique(y) gives labels in sorted order.\n pos_class = classes[1]\n\n # If sample weights exist, convert them to array (support for lists)\n # and check length\n # Otherwise set them to 1 for all examples\n sample_weight = _check_sample_weight(sample_weight, X,\n dtype=X.dtype, copy=True)\n\n # If class_weights is a dict (provided by the user), the weights\n # are assigned to the original labels. If it is \"balanced\", then\n # the class_weights are assigned after masking the labels with a OvR.\n le = LabelEncoder()\n if isinstance(class_weight, dict) or multi_class == 'multinomial':\n class_weight_ = compute_class_weight(class_weight,\n classes=classes, y=y)\n sample_weight *= class_weight_[le.fit_transform(y)]\n\n # For doing a ovr, we need to mask the labels first. for the\n # multinomial case this is not necessary.\n if multi_class == 'ovr':\n w0 = np.zeros(n_features + int(fit_intercept), dtype=X.dtype)\n mask_classes = np.array([-1, 1])\n mask = (y == pos_class)\n y_bin = np.ones(y.shape, dtype=X.dtype)\n y_bin[~mask] = -1.\n # for compute_class_weight\n\n if class_weight == \"balanced\":\n class_weight_ = compute_class_weight(class_weight,\n classes=mask_classes,\n y=y_bin)\n sample_weight *= class_weight_[le.fit_transform(y_bin)]\n\n else:\n if solver not in ['sag', 'saga']:\n lbin = LabelBinarizer()\n Y_multi = lbin.fit_transform(y)\n if Y_multi.shape[1] == 1:\n Y_multi = np.hstack([1 - Y_multi, Y_multi])\n else:\n # SAG multinomial solver needs LabelEncoder, not LabelBinarizer\n le = LabelEncoder()\n Y_multi = le.fit_transform(y).astype(X.dtype, copy=False)\n\n w0 = np.zeros((classes.size, n_features + int(fit_intercept)),\n order='F', dtype=X.dtype)\n\n if coef is not None:\n # it must work both giving the bias term and not\n if multi_class == 'ovr':\n if coef.size not in (n_features, w0.size):\n raise ValueError(\n 'Initialization coef is of shape %d, expected shape '\n '%d or %d' % (coef.size, n_features, w0.size))\n w0[:coef.size] = coef\n else:\n # For binary problems coef.shape[0] should be 1, otherwise it\n # should be classes.size.\n n_classes = classes.size\n if n_classes == 2:\n n_classes = 1\n\n if (coef.shape[0] != n_classes or\n coef.shape[1] not in (n_features, n_features + 1)):\n raise ValueError(\n 'Initialization coef is of shape (%d, %d), expected '\n 'shape (%d, %d) or (%d, %d)' % (\n coef.shape[0], coef.shape[1], classes.size,\n n_features, classes.size, n_features + 1))\n\n if n_classes == 1:\n w0[0, :coef.shape[1]] = -coef\n w0[1, :coef.shape[1]] = coef\n else:\n w0[:, :coef.shape[1]] = coef\n\n if multi_class == 'multinomial':\n # scipy.optimize.minimize and newton-cg accepts only\n # ravelled parameters.\n if solver in ['lbfgs', 'newton-cg']:\n w0 = w0.ravel()\n target = Y_multi\n if solver == 'lbfgs':\n def func(x, *args): return _multinomial_loss_grad(x, *args)[0:2]\n elif solver == 'newton-cg':\n def func(x, *args): return _multinomial_loss(x, *args)[0]\n def grad(x, *args): return _multinomial_loss_grad(x, *args)[1]\n hess = _multinomial_grad_hess\n warm_start_sag = {'coef': w0.T}\n else:\n target = y_bin\n if solver == 'lbfgs':\n func = _logistic_loss_and_grad\n elif solver == 'newton-cg':\n func = _logistic_loss\n def grad(x, *args): return _logistic_loss_and_grad(x, *args)[1]\n hess = _logistic_grad_hess\n warm_start_sag = {'coef': np.expand_dims(w0, axis=1)}\n\n coefs = list()\n n_iter = np.zeros(len(Cs), dtype=np.int32)\n for i, C in enumerate(Cs):\n if solver == 'lbfgs':\n iprint = [-1, 50, 1, 100, 101][\n np.searchsorted(np.array([0, 1, 2, 3]), verbose)]\n opt_res = optimize.minimize(\n func, w0, method=\"L-BFGS-B\", jac=True,\n args=(X, target, 1. / C, sample_weight),\n options={\"iprint\": iprint, \"gtol\": tol, \"maxiter\": max_iter}\n )\n n_iter_i = _check_optimize_result(\n solver, opt_res, max_iter,\n extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)\n w0, loss = opt_res.x, opt_res.fun\n elif solver == 'newton-cg':\n args = (X, target, 1. / C, sample_weight)\n w0, n_iter_i = _newton_cg(hess, func, grad, w0, args=args,\n maxiter=max_iter, tol=tol)\n elif solver == 'liblinear':\n coef_, intercept_, n_iter_i, = _fit_liblinear(\n X, target, C, fit_intercept, intercept_scaling, None,\n penalty, dual, verbose, max_iter, tol, random_state,\n sample_weight=sample_weight)\n if fit_intercept:\n w0 = np.concatenate([coef_.ravel(), intercept_])\n else:\n w0 = coef_.ravel()\n\n elif solver in ['sag', 'saga']:\n if multi_class == 'multinomial':\n target = target.astype(X.dtype, copy=False)\n loss = 'multinomial'\n else:\n loss = 'log'\n # alpha is for L2-norm, beta is for L1-norm\n if penalty == 'l1':\n alpha = 0.\n beta = 1. / C\n elif penalty == 'l2':\n alpha = 1. / C\n beta = 0.\n else: # Elastic-Net penalty\n alpha = (1. / C) * (1 - l1_ratio)\n beta = (1. / C) * l1_ratio\n\n w0, n_iter_i, warm_start_sag = sag_solver(\n X, target, sample_weight, loss, alpha,\n beta, max_iter, tol,\n verbose, random_state, False, max_squared_sum, warm_start_sag,\n is_saga=(solver == 'saga'))\n\n else:\n raise ValueError(\"solver must be one of {'liblinear', 'lbfgs', \"\n \"'newton-cg', 'sag'}, got '%s' instead\" % solver)\n\n if multi_class == 'multinomial':\n n_classes = max(2, classes.size)\n multi_w0 = np.reshape(w0, (n_classes, -1))\n if n_classes == 2:\n multi_w0 = multi_w0[1][np.newaxis, :]\n coefs.append(multi_w0.copy())\n else:\n coefs.append(w0.copy())\n\n n_iter[i] = n_iter_i\n\n return np.array(coefs), np.array(Cs), n_iter" + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_multinomial_grad_hess", + "name": "_multinomial_grad_hess", + "qname": "sklearn.linear_model._logistic._multinomial_grad_hess", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._logistic/_multinomial_grad_hess/w", + "name": "w", + "qname": "sklearn.linear_model._logistic._multinomial_grad_hess.w", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_classes * n_features,) or", + "default_value": "", + "description": "(n_classes * (n_features + 1),)\nCoefficient vector." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_classes * n_features,) or" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_multinomial_grad_hess/X", + "name": "X", + "qname": "sklearn.linear_model._logistic._multinomial_grad_hess.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_multinomial_grad_hess/Y", + "name": "Y", + "qname": "sklearn.linear_model._logistic._multinomial_grad_hess.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_classes)", + "default_value": "", + "description": "Transformed labels according to the output of LabelBinarizer." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_classes)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_multinomial_grad_hess/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._logistic._multinomial_grad_hess.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Regularization parameter. alpha is equal to 1 / C." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_multinomial_grad_hess/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._logistic._multinomial_grad_hess.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Array of weights that are assigned to individual samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes the gradient and the Hessian, in the case of a multinomial loss.", + "docstring": "Computes the gradient and the Hessian, in the case of a multinomial loss.\n\nParameters\n----------\nw : ndarray of shape (n_classes * n_features,) or\n (n_classes * (n_features + 1),)\n Coefficient vector.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\nY : ndarray of shape (n_samples, n_classes)\n Transformed labels according to the output of LabelBinarizer.\n\nalpha : float\n Regularization parameter. alpha is equal to 1 / C.\n\nsample_weight : array-like of shape (n_samples,)\n Array of weights that are assigned to individual samples.\n\nReturns\n-------\ngrad : ndarray of shape (n_classes * n_features,) or (n_classes * (n_features + 1),)\n Ravelled gradient of the multinomial loss.\n\nhessp : callable\n Function that takes in a vector input of shape (n_classes * n_features)\n or (n_classes * (n_features + 1)) and returns matrix-vector product\n with hessian.\n\nReferences\n----------\nBarak A. Pearlmutter (1993). Fast Exact Multiplication by the Hessian.\n http://www.bcl.hamilton.ie/~barak/papers/nc-hessian.pdf", + "code": "def _multinomial_grad_hess(w, X, Y, alpha, sample_weight):\n \"\"\"\n Computes the gradient and the Hessian, in the case of a multinomial loss.\n\n Parameters\n ----------\n w : ndarray of shape (n_classes * n_features,) or\n (n_classes * (n_features + 1),)\n Coefficient vector.\n\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\n Y : ndarray of shape (n_samples, n_classes)\n Transformed labels according to the output of LabelBinarizer.\n\n alpha : float\n Regularization parameter. alpha is equal to 1 / C.\n\n sample_weight : array-like of shape (n_samples,)\n Array of weights that are assigned to individual samples.\n\n Returns\n -------\n grad : ndarray of shape (n_classes * n_features,) or \\\n (n_classes * (n_features + 1),)\n Ravelled gradient of the multinomial loss.\n\n hessp : callable\n Function that takes in a vector input of shape (n_classes * n_features)\n or (n_classes * (n_features + 1)) and returns matrix-vector product\n with hessian.\n\n References\n ----------\n Barak A. Pearlmutter (1993). Fast Exact Multiplication by the Hessian.\n http://www.bcl.hamilton.ie/~barak/papers/nc-hessian.pdf\n \"\"\"\n n_features = X.shape[1]\n n_classes = Y.shape[1]\n fit_intercept = w.size == (n_classes * (n_features + 1))\n\n # `loss` is unused. Refactoring to avoid computing it does not\n # significantly speed up the computation and decreases readability\n loss, grad, p = _multinomial_loss_grad(w, X, Y, alpha, sample_weight)\n sample_weight = sample_weight[:, np.newaxis]\n\n # Hessian-vector product derived by applying the R-operator on the gradient\n # of the multinomial loss function.\n def hessp(v):\n v = v.reshape(n_classes, -1)\n if fit_intercept:\n inter_terms = v[:, -1]\n v = v[:, :-1]\n else:\n inter_terms = 0\n # r_yhat holds the result of applying the R-operator on the multinomial\n # estimator.\n r_yhat = safe_sparse_dot(X, v.T)\n r_yhat += inter_terms\n r_yhat += (-p * r_yhat).sum(axis=1)[:, np.newaxis]\n r_yhat *= p\n r_yhat *= sample_weight\n hessProd = np.zeros((n_classes, n_features + bool(fit_intercept)))\n hessProd[:, :n_features] = safe_sparse_dot(r_yhat.T, X)\n hessProd[:, :n_features] += v * alpha\n if fit_intercept:\n hessProd[:, -1] = r_yhat.sum(axis=0)\n return hessProd.ravel()\n\n return grad, hessp" + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_multinomial_loss", + "name": "_multinomial_loss", + "qname": "sklearn.linear_model._logistic._multinomial_loss", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._logistic/_multinomial_loss/w", + "name": "w", + "qname": "sklearn.linear_model._logistic._multinomial_loss.w", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_classes * n_features,) or", + "default_value": "", + "description": "(n_classes * (n_features + 1),)\nCoefficient vector." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_classes * n_features,) or" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_multinomial_loss/X", + "name": "X", + "qname": "sklearn.linear_model._logistic._multinomial_loss.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_multinomial_loss/Y", + "name": "Y", + "qname": "sklearn.linear_model._logistic._multinomial_loss.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_classes)", + "default_value": "", + "description": "Transformed labels according to the output of LabelBinarizer." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_classes)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_multinomial_loss/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._logistic._multinomial_loss.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Regularization parameter. alpha is equal to 1 / C." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_multinomial_loss/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._logistic._multinomial_loss.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Array of weights that are assigned to individual samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes multinomial loss and class probabilities.", + "docstring": "Computes multinomial loss and class probabilities.\n\nParameters\n----------\nw : ndarray of shape (n_classes * n_features,) or\n (n_classes * (n_features + 1),)\n Coefficient vector.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\nY : ndarray of shape (n_samples, n_classes)\n Transformed labels according to the output of LabelBinarizer.\n\nalpha : float\n Regularization parameter. alpha is equal to 1 / C.\n\nsample_weight : array-like of shape (n_samples,)\n Array of weights that are assigned to individual samples.\n\nReturns\n-------\nloss : float\n Multinomial loss.\n\np : ndarray of shape (n_samples, n_classes)\n Estimated class probabilities.\n\nw : ndarray of shape (n_classes, n_features)\n Reshaped param vector excluding intercept terms.\n\nReference\n---------\nBishop, C. M. (2006). Pattern recognition and machine learning.\nSpringer. (Chapter 4.3.4)", + "code": "def _multinomial_loss(w, X, Y, alpha, sample_weight):\n \"\"\"Computes multinomial loss and class probabilities.\n\n Parameters\n ----------\n w : ndarray of shape (n_classes * n_features,) or\n (n_classes * (n_features + 1),)\n Coefficient vector.\n\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\n Y : ndarray of shape (n_samples, n_classes)\n Transformed labels according to the output of LabelBinarizer.\n\n alpha : float\n Regularization parameter. alpha is equal to 1 / C.\n\n sample_weight : array-like of shape (n_samples,)\n Array of weights that are assigned to individual samples.\n\n Returns\n -------\n loss : float\n Multinomial loss.\n\n p : ndarray of shape (n_samples, n_classes)\n Estimated class probabilities.\n\n w : ndarray of shape (n_classes, n_features)\n Reshaped param vector excluding intercept terms.\n\n Reference\n ---------\n Bishop, C. M. (2006). Pattern recognition and machine learning.\n Springer. (Chapter 4.3.4)\n \"\"\"\n n_classes = Y.shape[1]\n n_features = X.shape[1]\n fit_intercept = w.size == (n_classes * (n_features + 1))\n w = w.reshape(n_classes, -1)\n sample_weight = sample_weight[:, np.newaxis]\n if fit_intercept:\n intercept = w[:, -1]\n w = w[:, :-1]\n else:\n intercept = 0\n p = safe_sparse_dot(X, w.T)\n p += intercept\n p -= logsumexp(p, axis=1)[:, np.newaxis]\n loss = -(sample_weight * Y * p).sum()\n loss += 0.5 * alpha * squared_norm(w)\n p = np.exp(p, p)\n return loss, p, w" + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_multinomial_loss_grad", + "name": "_multinomial_loss_grad", + "qname": "sklearn.linear_model._logistic._multinomial_loss_grad", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._logistic/_multinomial_loss_grad/w", + "name": "w", + "qname": "sklearn.linear_model._logistic._multinomial_loss_grad.w", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_classes * n_features,) or", + "default_value": "", + "description": "(n_classes * (n_features + 1),)\nCoefficient vector." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_classes * n_features,) or" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_multinomial_loss_grad/X", + "name": "X", + "qname": "sklearn.linear_model._logistic._multinomial_loss_grad.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_multinomial_loss_grad/Y", + "name": "Y", + "qname": "sklearn.linear_model._logistic._multinomial_loss_grad.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_classes)", + "default_value": "", + "description": "Transformed labels according to the output of LabelBinarizer." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_classes)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_multinomial_loss_grad/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._logistic._multinomial_loss_grad.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Regularization parameter. alpha is equal to 1 / C." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._logistic/_multinomial_loss_grad/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._logistic._multinomial_loss_grad.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Array of weights that are assigned to individual samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes the multinomial loss, gradient and class probabilities.", + "docstring": "Computes the multinomial loss, gradient and class probabilities.\n\nParameters\n----------\nw : ndarray of shape (n_classes * n_features,) or\n (n_classes * (n_features + 1),)\n Coefficient vector.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\nY : ndarray of shape (n_samples, n_classes)\n Transformed labels according to the output of LabelBinarizer.\n\nalpha : float\n Regularization parameter. alpha is equal to 1 / C.\n\nsample_weight : array-like of shape (n_samples,)\n Array of weights that are assigned to individual samples.\n\nReturns\n-------\nloss : float\n Multinomial loss.\n\ngrad : ndarray of shape (n_classes * n_features,) or (n_classes * (n_features + 1),)\n Ravelled gradient of the multinomial loss.\n\np : ndarray of shape (n_samples, n_classes)\n Estimated class probabilities\n\nReference\n---------\nBishop, C. M. (2006). Pattern recognition and machine learning.\nSpringer. (Chapter 4.3.4)", + "code": "def _multinomial_loss_grad(w, X, Y, alpha, sample_weight):\n \"\"\"Computes the multinomial loss, gradient and class probabilities.\n\n Parameters\n ----------\n w : ndarray of shape (n_classes * n_features,) or\n (n_classes * (n_features + 1),)\n Coefficient vector.\n\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\n Y : ndarray of shape (n_samples, n_classes)\n Transformed labels according to the output of LabelBinarizer.\n\n alpha : float\n Regularization parameter. alpha is equal to 1 / C.\n\n sample_weight : array-like of shape (n_samples,)\n Array of weights that are assigned to individual samples.\n\n Returns\n -------\n loss : float\n Multinomial loss.\n\n grad : ndarray of shape (n_classes * n_features,) or \\\n (n_classes * (n_features + 1),)\n Ravelled gradient of the multinomial loss.\n\n p : ndarray of shape (n_samples, n_classes)\n Estimated class probabilities\n\n Reference\n ---------\n Bishop, C. M. (2006). Pattern recognition and machine learning.\n Springer. (Chapter 4.3.4)\n \"\"\"\n n_classes = Y.shape[1]\n n_features = X.shape[1]\n fit_intercept = (w.size == n_classes * (n_features + 1))\n grad = np.zeros((n_classes, n_features + bool(fit_intercept)),\n dtype=X.dtype)\n loss, p, w = _multinomial_loss(w, X, Y, alpha, sample_weight)\n sample_weight = sample_weight[:, np.newaxis]\n diff = sample_weight * (p - Y)\n grad[:, :n_features] = safe_sparse_dot(diff.T, X)\n grad[:, :n_features] += alpha * w\n if fit_intercept:\n grad[:, -1] = diff.sum(axis=0)\n return loss, grad.ravel(), p" + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuit/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuit.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuit/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuit.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuit/__init__/n_nonzero_coefs", + "name": "n_nonzero_coefs", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuit.__init__.n_nonzero_coefs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Desired number of non-zero entries in the solution. If None (by\ndefault) this value is set to 10% of n_features." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuit/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuit.__init__.tol", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Maximum norm of the residual. If not None, overrides n_nonzero_coefs." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuit/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuit.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "whether to calculate the intercept for this model. If set\nto false, no intercept will be used in calculations\n(i.e. data is expected to be centered)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuit/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuit.__init__.normalize", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuit/__init__/precompute", + "name": "precompute", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuit.__init__.precompute", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'auto' or bool", + "default_value": "'auto'", + "description": "Whether to use a precomputed Gram and Xy matrix to speed up\ncalculations. Improves performance when :term:`n_targets` or\n:term:`n_samples` is very large. Note that if you already have such\nmatrices, you can pass them directly to the fit method." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "bool" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Orthogonal Matching Pursuit model (OMP).\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, n_nonzero_coefs=None, tol=None, fit_intercept=True,\n normalize=True, precompute='auto'):\n self.n_nonzero_coefs = n_nonzero_coefs\n self.tol = tol\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.precompute = precompute" + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuit/fit", + "name": "fit", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuit.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuit/fit/self", + "name": "self", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuit.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuit/fit/X", + "name": "X", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuit.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuit/fit/y", + "name": "y", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuit.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "", + "description": "Target values. Will be cast to X's dtype if necessary" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_targets)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model using X, y as training data.", + "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values. Will be cast to X's dtype if necessary\n\n\nReturns\n-------\nself : object\n returns an instance of self.", + "code": " def fit(self, X, y):\n \"\"\"Fit the model using X, y as training data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values. Will be cast to X's dtype if necessary\n\n\n Returns\n -------\n self : object\n returns an instance of self.\n \"\"\"\n X, y = self._validate_data(X, y, multi_output=True, y_numeric=True)\n n_features = X.shape[1]\n\n X, y, X_offset, y_offset, X_scale, Gram, Xy = \\\n _pre_fit(X, y, None, self.precompute, self.normalize,\n self.fit_intercept, copy=True)\n\n if y.ndim == 1:\n y = y[:, np.newaxis]\n\n if self.n_nonzero_coefs is None and self.tol is None:\n # default for n_nonzero_coefs is 0.1 * n_features\n # but at least one.\n self.n_nonzero_coefs_ = max(int(0.1 * n_features), 1)\n else:\n self.n_nonzero_coefs_ = self.n_nonzero_coefs\n\n if Gram is False:\n coef_, self.n_iter_ = orthogonal_mp(\n X, y, n_nonzero_coefs=self.n_nonzero_coefs_, tol=self.tol,\n precompute=False, copy_X=True,\n return_n_iter=True)\n else:\n norms_sq = np.sum(y ** 2, axis=0) if self.tol is not None else None\n\n coef_, self.n_iter_ = orthogonal_mp_gram(\n Gram, Xy=Xy, n_nonzero_coefs=self.n_nonzero_coefs_,\n tol=self.tol, norms_squared=norms_sq,\n copy_Gram=True, copy_Xy=True,\n return_n_iter=True)\n self.coef_ = coef_.T\n self._set_intercept(X_offset, y_offset, X_scale)\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuitCV/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuitCV.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuitCV/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuitCV.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuitCV/__init__/copy", + "name": "copy", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuitCV.__init__.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether the design matrix X must be copied by the algorithm. A false\nvalue is only helpful if X is already Fortran-ordered, otherwise a\ncopy is made anyway." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuitCV/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuitCV.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "whether to calculate the intercept for this model. If set\nto false, no intercept will be used in calculations\n(i.e. data is expected to be centered)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuitCV/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuitCV.__init__.normalize", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuitCV/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuitCV.__init__.max_iter", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Maximum numbers of iterations to perform, therefore maximum features\nto include. 10% of ``n_features`` but at least 5 if available." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuitCV/__init__/cv", + "name": "cv", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuitCV.__init__.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, cross-validation generator or iterable", + "default_value": "None", + "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, :class:`KFold` is used.\n\nRefer :ref:`User Guide ` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuitCV/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuitCV.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of CPUs to use during the cross validation.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuitCV/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuitCV.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or int", + "default_value": "False", + "description": "Sets the verbosity amount." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Cross-validated Orthogonal Matching Pursuit model (OMP).\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, copy=True, fit_intercept=True, normalize=True,\n max_iter=None, cv=None, n_jobs=None, verbose=False):\n self.copy = copy\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.max_iter = max_iter\n self.cv = cv\n self.n_jobs = n_jobs\n self.verbose = verbose" + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuitCV/fit", + "name": "fit", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuitCV.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuitCV/fit/self", + "name": "self", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuitCV.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuitCV/fit/X", + "name": "X", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuitCV.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/OrthogonalMatchingPursuitCV/fit/y", + "name": "y", + "qname": "sklearn.linear_model._omp.OrthogonalMatchingPursuitCV.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values. Will be cast to X's dtype if necessary." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model using X, y as training data.", + "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,)\n Target values. Will be cast to X's dtype if necessary.\n\nReturns\n-------\nself : object\n returns an instance of self.", + "code": " def fit(self, X, y):\n \"\"\"Fit the model using X, y as training data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data.\n\n y : array-like of shape (n_samples,)\n Target values. Will be cast to X's dtype if necessary.\n\n Returns\n -------\n self : object\n returns an instance of self.\n \"\"\"\n X, y = self._validate_data(X, y, y_numeric=True, ensure_min_features=2,\n estimator=self)\n X = as_float_array(X, copy=False, force_all_finite=False)\n cv = check_cv(self.cv, classifier=False)\n max_iter = (min(max(int(0.1 * X.shape[1]), 5), X.shape[1])\n if not self.max_iter\n else self.max_iter)\n cv_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(\n delayed(_omp_path_residues)(\n X[train], y[train], X[test], y[test], self.copy,\n self.fit_intercept, self.normalize, max_iter)\n for train, test in cv.split(X))\n\n min_early_stop = min(fold.shape[0] for fold in cv_paths)\n mse_folds = np.array([(fold[:min_early_stop] ** 2).mean(axis=1)\n for fold in cv_paths])\n best_n_nonzero_coefs = np.argmin(mse_folds.mean(axis=0)) + 1\n self.n_nonzero_coefs_ = best_n_nonzero_coefs\n omp = OrthogonalMatchingPursuit(n_nonzero_coefs=best_n_nonzero_coefs,\n fit_intercept=self.fit_intercept,\n normalize=self.normalize)\n omp.fit(X, y)\n self.coef_ = omp.coef_\n self.intercept_ = omp.intercept_\n self.n_iter_ = omp.n_iter_\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/_cholesky_omp", + "name": "_cholesky_omp", + "qname": "sklearn.linear_model._omp._cholesky_omp", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._omp/_cholesky_omp/X", + "name": "X", + "qname": "sklearn.linear_model._omp._cholesky_omp.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Input dictionary. Columns are assumed to have unit norm." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/_cholesky_omp/y", + "name": "y", + "qname": "sklearn.linear_model._omp._cholesky_omp.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Input targets." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/_cholesky_omp/n_nonzero_coefs", + "name": "n_nonzero_coefs", + "qname": "sklearn.linear_model._omp._cholesky_omp.n_nonzero_coefs", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Targeted number of non-zero elements." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/_cholesky_omp/tol", + "name": "tol", + "qname": "sklearn.linear_model._omp._cholesky_omp.tol", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Targeted squared error, if not None overrides n_nonzero_coefs." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/_cholesky_omp/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._omp._cholesky_omp.copy_X", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether the design matrix X must be copied by the algorithm. A false\nvalue is only helpful if X is already Fortran-ordered, otherwise a\ncopy is made anyway." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/_cholesky_omp/return_path", + "name": "return_path", + "qname": "sklearn.linear_model._omp._cholesky_omp.return_path", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to return every value of the nonzero coefficients along the\nforward path. Useful for cross-validation." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Orthogonal Matching Pursuit step using the Cholesky decomposition.", + "docstring": "Orthogonal Matching Pursuit step using the Cholesky decomposition.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Input dictionary. Columns are assumed to have unit norm.\n\ny : ndarray of shape (n_samples,)\n Input targets.\n\nn_nonzero_coefs : int\n Targeted number of non-zero elements.\n\ntol : float, default=None\n Targeted squared error, if not None overrides n_nonzero_coefs.\n\ncopy_X : bool, default=True\n Whether the design matrix X must be copied by the algorithm. A false\n value is only helpful if X is already Fortran-ordered, otherwise a\n copy is made anyway.\n\nreturn_path : bool, default=False\n Whether to return every value of the nonzero coefficients along the\n forward path. Useful for cross-validation.\n\nReturns\n-------\ngamma : ndarray of shape (n_nonzero_coefs,)\n Non-zero elements of the solution.\n\nidx : ndarray of shape (n_nonzero_coefs,)\n Indices of the positions of the elements in gamma within the solution\n vector.\n\ncoef : ndarray of shape (n_features, n_nonzero_coefs)\n The first k values of column k correspond to the coefficient value\n for the active features at that step. The lower left triangle contains\n garbage. Only returned if ``return_path=True``.\n\nn_active : int\n Number of active features at convergence.", + "code": "def _cholesky_omp(X, y, n_nonzero_coefs, tol=None, copy_X=True,\n return_path=False):\n \"\"\"Orthogonal Matching Pursuit step using the Cholesky decomposition.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Input dictionary. Columns are assumed to have unit norm.\n\n y : ndarray of shape (n_samples,)\n Input targets.\n\n n_nonzero_coefs : int\n Targeted number of non-zero elements.\n\n tol : float, default=None\n Targeted squared error, if not None overrides n_nonzero_coefs.\n\n copy_X : bool, default=True\n Whether the design matrix X must be copied by the algorithm. A false\n value is only helpful if X is already Fortran-ordered, otherwise a\n copy is made anyway.\n\n return_path : bool, default=False\n Whether to return every value of the nonzero coefficients along the\n forward path. Useful for cross-validation.\n\n Returns\n -------\n gamma : ndarray of shape (n_nonzero_coefs,)\n Non-zero elements of the solution.\n\n idx : ndarray of shape (n_nonzero_coefs,)\n Indices of the positions of the elements in gamma within the solution\n vector.\n\n coef : ndarray of shape (n_features, n_nonzero_coefs)\n The first k values of column k correspond to the coefficient value\n for the active features at that step. The lower left triangle contains\n garbage. Only returned if ``return_path=True``.\n\n n_active : int\n Number of active features at convergence.\n \"\"\"\n if copy_X:\n X = X.copy('F')\n else: # even if we are allowed to overwrite, still copy it if bad order\n X = np.asfortranarray(X)\n\n min_float = np.finfo(X.dtype).eps\n nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (X,))\n potrs, = get_lapack_funcs(('potrs',), (X,))\n\n alpha = np.dot(X.T, y)\n residual = y\n gamma = np.empty(0)\n n_active = 0\n indices = np.arange(X.shape[1]) # keeping track of swapping\n\n max_features = X.shape[1] if tol is not None else n_nonzero_coefs\n\n L = np.empty((max_features, max_features), dtype=X.dtype)\n\n if return_path:\n coefs = np.empty_like(L)\n\n while True:\n lam = np.argmax(np.abs(np.dot(X.T, residual)))\n if lam < n_active or alpha[lam] ** 2 < min_float:\n # atom already selected or inner product too small\n warnings.warn(premature, RuntimeWarning, stacklevel=2)\n break\n\n if n_active > 0:\n # Updates the Cholesky decomposition of X' X\n L[n_active, :n_active] = np.dot(X[:, :n_active].T, X[:, lam])\n linalg.solve_triangular(L[:n_active, :n_active],\n L[n_active, :n_active],\n trans=0, lower=1,\n overwrite_b=True,\n check_finite=False)\n v = nrm2(L[n_active, :n_active]) ** 2\n Lkk = linalg.norm(X[:, lam]) ** 2 - v\n if Lkk <= min_float: # selected atoms are dependent\n warnings.warn(premature, RuntimeWarning, stacklevel=2)\n break\n L[n_active, n_active] = sqrt(Lkk)\n else:\n L[0, 0] = linalg.norm(X[:, lam])\n\n X.T[n_active], X.T[lam] = swap(X.T[n_active], X.T[lam])\n alpha[n_active], alpha[lam] = alpha[lam], alpha[n_active]\n indices[n_active], indices[lam] = indices[lam], indices[n_active]\n n_active += 1\n\n # solves LL'x = X'y as a composition of two triangular systems\n gamma, _ = potrs(L[:n_active, :n_active], alpha[:n_active], lower=True,\n overwrite_b=False)\n\n if return_path:\n coefs[:n_active, n_active - 1] = gamma\n residual = y - np.dot(X[:, :n_active], gamma)\n if tol is not None and nrm2(residual) ** 2 <= tol:\n break\n elif n_active == max_features:\n break\n\n if return_path:\n return gamma, indices[:n_active], coefs[:, :n_active], n_active\n else:\n return gamma, indices[:n_active], n_active" + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/_gram_omp", + "name": "_gram_omp", + "qname": "sklearn.linear_model._omp._gram_omp", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._omp/_gram_omp/Gram", + "name": "Gram", + "qname": "sklearn.linear_model._omp._gram_omp.Gram", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features, n_features)", + "default_value": "", + "description": "Gram matrix of the input data matrix." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/_gram_omp/Xy", + "name": "Xy", + "qname": "sklearn.linear_model._omp._gram_omp.Xy", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features,)", + "default_value": "", + "description": "Input targets." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/_gram_omp/n_nonzero_coefs", + "name": "n_nonzero_coefs", + "qname": "sklearn.linear_model._omp._gram_omp.n_nonzero_coefs", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Targeted number of non-zero elements." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/_gram_omp/tol_0", + "name": "tol_0", + "qname": "sklearn.linear_model._omp._gram_omp.tol_0", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Squared norm of y, required if tol is not None." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/_gram_omp/tol", + "name": "tol", + "qname": "sklearn.linear_model._omp._gram_omp.tol", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Targeted squared error, if not None overrides n_nonzero_coefs." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/_gram_omp/copy_Gram", + "name": "copy_Gram", + "qname": "sklearn.linear_model._omp._gram_omp.copy_Gram", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether the gram matrix must be copied by the algorithm. A false\nvalue is only helpful if it is already Fortran-ordered, otherwise a\ncopy is made anyway." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/_gram_omp/copy_Xy", + "name": "copy_Xy", + "qname": "sklearn.linear_model._omp._gram_omp.copy_Xy", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether the covariance vector Xy must be copied by the algorithm.\nIf False, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/_gram_omp/return_path", + "name": "return_path", + "qname": "sklearn.linear_model._omp._gram_omp.return_path", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to return every value of the nonzero coefficients along the\nforward path. Useful for cross-validation." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Orthogonal Matching Pursuit step on a precomputed Gram matrix.\n\nThis function uses the Cholesky decomposition method.", + "docstring": "Orthogonal Matching Pursuit step on a precomputed Gram matrix.\n\nThis function uses the Cholesky decomposition method.\n\nParameters\n----------\nGram : ndarray of shape (n_features, n_features)\n Gram matrix of the input data matrix.\n\nXy : ndarray of shape (n_features,)\n Input targets.\n\nn_nonzero_coefs : int\n Targeted number of non-zero elements.\n\ntol_0 : float, default=None\n Squared norm of y, required if tol is not None.\n\ntol : float, default=None\n Targeted squared error, if not None overrides n_nonzero_coefs.\n\ncopy_Gram : bool, default=True\n Whether the gram matrix must be copied by the algorithm. A false\n value is only helpful if it is already Fortran-ordered, otherwise a\n copy is made anyway.\n\ncopy_Xy : bool, default=True\n Whether the covariance vector Xy must be copied by the algorithm.\n If False, it may be overwritten.\n\nreturn_path : bool, default=False\n Whether to return every value of the nonzero coefficients along the\n forward path. Useful for cross-validation.\n\nReturns\n-------\ngamma : ndarray of shape (n_nonzero_coefs,)\n Non-zero elements of the solution.\n\nidx : ndarray of shape (n_nonzero_coefs,)\n Indices of the positions of the elements in gamma within the solution\n vector.\n\ncoefs : ndarray of shape (n_features, n_nonzero_coefs)\n The first k values of column k correspond to the coefficient value\n for the active features at that step. The lower left triangle contains\n garbage. Only returned if ``return_path=True``.\n\nn_active : int\n Number of active features at convergence.", + "code": "def _gram_omp(Gram, Xy, n_nonzero_coefs, tol_0=None, tol=None,\n copy_Gram=True, copy_Xy=True, return_path=False):\n \"\"\"Orthogonal Matching Pursuit step on a precomputed Gram matrix.\n\n This function uses the Cholesky decomposition method.\n\n Parameters\n ----------\n Gram : ndarray of shape (n_features, n_features)\n Gram matrix of the input data matrix.\n\n Xy : ndarray of shape (n_features,)\n Input targets.\n\n n_nonzero_coefs : int\n Targeted number of non-zero elements.\n\n tol_0 : float, default=None\n Squared norm of y, required if tol is not None.\n\n tol : float, default=None\n Targeted squared error, if not None overrides n_nonzero_coefs.\n\n copy_Gram : bool, default=True\n Whether the gram matrix must be copied by the algorithm. A false\n value is only helpful if it is already Fortran-ordered, otherwise a\n copy is made anyway.\n\n copy_Xy : bool, default=True\n Whether the covariance vector Xy must be copied by the algorithm.\n If False, it may be overwritten.\n\n return_path : bool, default=False\n Whether to return every value of the nonzero coefficients along the\n forward path. Useful for cross-validation.\n\n Returns\n -------\n gamma : ndarray of shape (n_nonzero_coefs,)\n Non-zero elements of the solution.\n\n idx : ndarray of shape (n_nonzero_coefs,)\n Indices of the positions of the elements in gamma within the solution\n vector.\n\n coefs : ndarray of shape (n_features, n_nonzero_coefs)\n The first k values of column k correspond to the coefficient value\n for the active features at that step. The lower left triangle contains\n garbage. Only returned if ``return_path=True``.\n\n n_active : int\n Number of active features at convergence.\n \"\"\"\n Gram = Gram.copy('F') if copy_Gram else np.asfortranarray(Gram)\n\n if copy_Xy or not Xy.flags.writeable:\n Xy = Xy.copy()\n\n min_float = np.finfo(Gram.dtype).eps\n nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (Gram,))\n potrs, = get_lapack_funcs(('potrs',), (Gram,))\n\n indices = np.arange(len(Gram)) # keeping track of swapping\n alpha = Xy\n tol_curr = tol_0\n delta = 0\n gamma = np.empty(0)\n n_active = 0\n\n max_features = len(Gram) if tol is not None else n_nonzero_coefs\n\n L = np.empty((max_features, max_features), dtype=Gram.dtype)\n\n L[0, 0] = 1.\n if return_path:\n coefs = np.empty_like(L)\n\n while True:\n lam = np.argmax(np.abs(alpha))\n if lam < n_active or alpha[lam] ** 2 < min_float:\n # selected same atom twice, or inner product too small\n warnings.warn(premature, RuntimeWarning, stacklevel=3)\n break\n if n_active > 0:\n L[n_active, :n_active] = Gram[lam, :n_active]\n linalg.solve_triangular(L[:n_active, :n_active],\n L[n_active, :n_active],\n trans=0, lower=1,\n overwrite_b=True,\n check_finite=False)\n v = nrm2(L[n_active, :n_active]) ** 2\n Lkk = Gram[lam, lam] - v\n if Lkk <= min_float: # selected atoms are dependent\n warnings.warn(premature, RuntimeWarning, stacklevel=3)\n break\n L[n_active, n_active] = sqrt(Lkk)\n else:\n L[0, 0] = sqrt(Gram[lam, lam])\n\n Gram[n_active], Gram[lam] = swap(Gram[n_active], Gram[lam])\n Gram.T[n_active], Gram.T[lam] = swap(Gram.T[n_active], Gram.T[lam])\n indices[n_active], indices[lam] = indices[lam], indices[n_active]\n Xy[n_active], Xy[lam] = Xy[lam], Xy[n_active]\n n_active += 1\n # solves LL'x = X'y as a composition of two triangular systems\n gamma, _ = potrs(L[:n_active, :n_active], Xy[:n_active], lower=True,\n overwrite_b=False)\n if return_path:\n coefs[:n_active, n_active - 1] = gamma\n beta = np.dot(Gram[:, :n_active], gamma)\n alpha = Xy - beta\n if tol is not None:\n tol_curr += delta\n delta = np.inner(gamma, beta[:n_active])\n tol_curr -= delta\n if abs(tol_curr) <= tol:\n break\n elif n_active == max_features:\n break\n\n if return_path:\n return gamma, indices[:n_active], coefs[:, :n_active], n_active\n else:\n return gamma, indices[:n_active], n_active" + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/_omp_path_residues", + "name": "_omp_path_residues", + "qname": "sklearn.linear_model._omp._omp_path_residues", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._omp/_omp_path_residues/X_train", + "name": "X_train", + "qname": "sklearn.linear_model._omp._omp_path_residues.X_train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to fit the LARS on." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/_omp_path_residues/y_train", + "name": "y_train", + "qname": "sklearn.linear_model._omp._omp_path_residues.y_train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples)", + "default_value": "", + "description": "The target variable to fit LARS on." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/_omp_path_residues/X_test", + "name": "X_test", + "qname": "sklearn.linear_model._omp._omp_path_residues.X_test", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to compute the residues on." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/_omp_path_residues/y_test", + "name": "y_test", + "qname": "sklearn.linear_model._omp._omp_path_residues.y_test", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples)", + "default_value": "", + "description": "The target variable to compute the residues on." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/_omp_path_residues/copy", + "name": "copy", + "qname": "sklearn.linear_model._omp._omp_path_residues.copy", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether X_train, X_test, y_train and y_test should be copied. If\nFalse, they may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/_omp_path_residues/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._omp._omp_path_residues.fit_intercept", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to calculate the intercept for this model. If set\nto false, no intercept will be used in calculations\n(i.e. data is expected to be centered)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/_omp_path_residues/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._omp._omp_path_residues.normalize", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/_omp_path_residues/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._omp._omp_path_residues.max_iter", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Maximum numbers of iterations to perform, therefore maximum features\nto include. 100 by default." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the residues on left-out data for a full LARS path.", + "docstring": "Compute the residues on left-out data for a full LARS path.\n\nParameters\n----------\nX_train : ndarray of shape (n_samples, n_features)\n The data to fit the LARS on.\n\ny_train : ndarray of shape (n_samples)\n The target variable to fit LARS on.\n\nX_test : ndarray of shape (n_samples, n_features)\n The data to compute the residues on.\n\ny_test : ndarray of shape (n_samples)\n The target variable to compute the residues on.\n\ncopy : bool, default=True\n Whether X_train, X_test, y_train and y_test should be copied. If\n False, they may be overwritten.\n\nfit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\nnormalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\nmax_iter : int, default=100\n Maximum numbers of iterations to perform, therefore maximum features\n to include. 100 by default.\n\nReturns\n-------\nresidues : ndarray of shape (n_samples, max_features)\n Residues of the prediction on the test data.", + "code": "def _omp_path_residues(X_train, y_train, X_test, y_test, copy=True,\n fit_intercept=True, normalize=True, max_iter=100):\n \"\"\"Compute the residues on left-out data for a full LARS path.\n\n Parameters\n ----------\n X_train : ndarray of shape (n_samples, n_features)\n The data to fit the LARS on.\n\n y_train : ndarray of shape (n_samples)\n The target variable to fit LARS on.\n\n X_test : ndarray of shape (n_samples, n_features)\n The data to compute the residues on.\n\n y_test : ndarray of shape (n_samples)\n The target variable to compute the residues on.\n\n copy : bool, default=True\n Whether X_train, X_test, y_train and y_test should be copied. If\n False, they may be overwritten.\n\n fit_intercept : bool, default=True\n Whether to calculate the intercept for this model. If set\n to false, no intercept will be used in calculations\n (i.e. data is expected to be centered).\n\n normalize : bool, default=True\n This parameter is ignored when ``fit_intercept`` is set to False.\n If True, the regressors X will be normalized before regression by\n subtracting the mean and dividing by the l2-norm.\n If you wish to standardize, please use\n :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n on an estimator with ``normalize=False``.\n\n max_iter : int, default=100\n Maximum numbers of iterations to perform, therefore maximum features\n to include. 100 by default.\n\n Returns\n -------\n residues : ndarray of shape (n_samples, max_features)\n Residues of the prediction on the test data.\n \"\"\"\n\n if copy:\n X_train = X_train.copy()\n y_train = y_train.copy()\n X_test = X_test.copy()\n y_test = y_test.copy()\n\n if fit_intercept:\n X_mean = X_train.mean(axis=0)\n X_train -= X_mean\n X_test -= X_mean\n y_mean = y_train.mean(axis=0)\n y_train = as_float_array(y_train, copy=False)\n y_train -= y_mean\n y_test = as_float_array(y_test, copy=False)\n y_test -= y_mean\n\n if normalize:\n norms = np.sqrt(np.sum(X_train ** 2, axis=0))\n nonzeros = np.flatnonzero(norms)\n X_train[:, nonzeros] /= norms[nonzeros]\n\n coefs = orthogonal_mp(X_train, y_train, n_nonzero_coefs=max_iter, tol=None,\n precompute=False, copy_X=False,\n return_path=True)\n if coefs.ndim == 1:\n coefs = coefs[:, np.newaxis]\n if normalize:\n coefs[nonzeros] /= norms[nonzeros][:, np.newaxis]\n\n return np.dot(coefs.T, X_test.T) - y_test" + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/orthogonal_mp", + "name": "orthogonal_mp", + "qname": "sklearn.linear_model._omp.orthogonal_mp", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._omp/orthogonal_mp/X", + "name": "X", + "qname": "sklearn.linear_model._omp.orthogonal_mp.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data. Columns are assumed to have unit norm." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/orthogonal_mp/y", + "name": "y", + "qname": "sklearn.linear_model._omp.orthogonal_mp.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "", + "description": "Input targets." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,) or (n_samples, n_targets)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/orthogonal_mp/n_nonzero_coefs", + "name": "n_nonzero_coefs", + "qname": "sklearn.linear_model._omp.orthogonal_mp.n_nonzero_coefs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Desired number of non-zero entries in the solution. If None (by\ndefault) this value is set to 10% of n_features." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/orthogonal_mp/tol", + "name": "tol", + "qname": "sklearn.linear_model._omp.orthogonal_mp.tol", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Maximum norm of the residual. If not None, overrides n_nonzero_coefs." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/orthogonal_mp/precompute", + "name": "precompute", + "qname": "sklearn.linear_model._omp.orthogonal_mp.precompute", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'auto' or bool", + "default_value": "False", + "description": "Whether to perform precomputations. Improves performance when n_targets\nor n_samples is very large." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "bool" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/orthogonal_mp/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._omp.orthogonal_mp.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether the design matrix X must be copied by the algorithm. A false\nvalue is only helpful if X is already Fortran-ordered, otherwise a\ncopy is made anyway." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/orthogonal_mp/return_path", + "name": "return_path", + "qname": "sklearn.linear_model._omp.orthogonal_mp.return_path", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to return every value of the nonzero coefficients along the\nforward path. Useful for cross-validation." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/orthogonal_mp/return_n_iter", + "name": "return_n_iter", + "qname": "sklearn.linear_model._omp.orthogonal_mp.return_n_iter", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether or not to return the number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Orthogonal Matching Pursuit (OMP).\n\nSolves n_targets Orthogonal Matching Pursuit problems.\nAn instance of the problem has the form:\n\nWhen parametrized by the number of non-zero coefficients using\n`n_nonzero_coefs`:\nargmin ||y - X\\gamma||^2 subject to ||\\gamma||_0 <= n_{nonzero coefs}\n\nWhen parametrized by error using the parameter `tol`:\nargmin ||\\gamma||_0 subject to ||y - X\\gamma||^2 <= tol\n\nRead more in the :ref:`User Guide `.", + "docstring": "Orthogonal Matching Pursuit (OMP).\n\nSolves n_targets Orthogonal Matching Pursuit problems.\nAn instance of the problem has the form:\n\nWhen parametrized by the number of non-zero coefficients using\n`n_nonzero_coefs`:\nargmin ||y - X\\gamma||^2 subject to ||\\gamma||_0 <= n_{nonzero coefs}\n\nWhen parametrized by error using the parameter `tol`:\nargmin ||\\gamma||_0 subject to ||y - X\\gamma||^2 <= tol\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Input data. Columns are assumed to have unit norm.\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Input targets.\n\nn_nonzero_coefs : int, default=None\n Desired number of non-zero entries in the solution. If None (by\n default) this value is set to 10% of n_features.\n\ntol : float, default=None\n Maximum norm of the residual. If not None, overrides n_nonzero_coefs.\n\nprecompute : 'auto' or bool, default=False\n Whether to perform precomputations. Improves performance when n_targets\n or n_samples is very large.\n\ncopy_X : bool, default=True\n Whether the design matrix X must be copied by the algorithm. A false\n value is only helpful if X is already Fortran-ordered, otherwise a\n copy is made anyway.\n\nreturn_path : bool, default=False\n Whether to return every value of the nonzero coefficients along the\n forward path. Useful for cross-validation.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\nReturns\n-------\ncoef : ndarray of shape (n_features,) or (n_features, n_targets)\n Coefficients of the OMP solution. If `return_path=True`, this contains\n the whole coefficient path. In this case its shape is\n (n_features, n_features) or (n_features, n_targets, n_features) and\n iterating over the last axis yields coefficients in increasing order\n of active features.\n\nn_iters : array-like or int\n Number of active features across every target. Returned only if\n `return_n_iter` is set to True.\n\nSee Also\n--------\nOrthogonalMatchingPursuit\northogonal_mp_gram\nlars_path\nsklearn.decomposition.sparse_encode\n\nNotes\n-----\nOrthogonal matching pursuit was introduced in S. Mallat, Z. Zhang,\nMatching pursuits with time-frequency dictionaries, IEEE Transactions on\nSignal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.\n(http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf)\n\nThis implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,\nM., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal\nMatching Pursuit Technical Report - CS Technion, April 2008.\nhttps://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf", + "code": "@_deprecate_positional_args\ndef orthogonal_mp(X, y, *, n_nonzero_coefs=None, tol=None, precompute=False,\n copy_X=True, return_path=False,\n return_n_iter=False):\n r\"\"\"Orthogonal Matching Pursuit (OMP).\n\n Solves n_targets Orthogonal Matching Pursuit problems.\n An instance of the problem has the form:\n\n When parametrized by the number of non-zero coefficients using\n `n_nonzero_coefs`:\n argmin ||y - X\\gamma||^2 subject to ||\\gamma||_0 <= n_{nonzero coefs}\n\n When parametrized by error using the parameter `tol`:\n argmin ||\\gamma||_0 subject to ||y - X\\gamma||^2 <= tol\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Input data. Columns are assumed to have unit norm.\n\n y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Input targets.\n\n n_nonzero_coefs : int, default=None\n Desired number of non-zero entries in the solution. If None (by\n default) this value is set to 10% of n_features.\n\n tol : float, default=None\n Maximum norm of the residual. If not None, overrides n_nonzero_coefs.\n\n precompute : 'auto' or bool, default=False\n Whether to perform precomputations. Improves performance when n_targets\n or n_samples is very large.\n\n copy_X : bool, default=True\n Whether the design matrix X must be copied by the algorithm. A false\n value is only helpful if X is already Fortran-ordered, otherwise a\n copy is made anyway.\n\n return_path : bool, default=False\n Whether to return every value of the nonzero coefficients along the\n forward path. Useful for cross-validation.\n\n return_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\n Returns\n -------\n coef : ndarray of shape (n_features,) or (n_features, n_targets)\n Coefficients of the OMP solution. If `return_path=True`, this contains\n the whole coefficient path. In this case its shape is\n (n_features, n_features) or (n_features, n_targets, n_features) and\n iterating over the last axis yields coefficients in increasing order\n of active features.\n\n n_iters : array-like or int\n Number of active features across every target. Returned only if\n `return_n_iter` is set to True.\n\n See Also\n --------\n OrthogonalMatchingPursuit\n orthogonal_mp_gram\n lars_path\n sklearn.decomposition.sparse_encode\n\n Notes\n -----\n Orthogonal matching pursuit was introduced in S. Mallat, Z. Zhang,\n Matching pursuits with time-frequency dictionaries, IEEE Transactions on\n Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.\n (http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf)\n\n This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,\n M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal\n Matching Pursuit Technical Report - CS Technion, April 2008.\n https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf\n\n \"\"\"\n X = check_array(X, order='F', copy=copy_X)\n copy_X = False\n if y.ndim == 1:\n y = y.reshape(-1, 1)\n y = check_array(y)\n if y.shape[1] > 1: # subsequent targets will be affected\n copy_X = True\n if n_nonzero_coefs is None and tol is None:\n # default for n_nonzero_coefs is 0.1 * n_features\n # but at least one.\n n_nonzero_coefs = max(int(0.1 * X.shape[1]), 1)\n if tol is not None and tol < 0:\n raise ValueError(\"Epsilon cannot be negative\")\n if tol is None and n_nonzero_coefs <= 0:\n raise ValueError(\"The number of atoms must be positive\")\n if tol is None and n_nonzero_coefs > X.shape[1]:\n raise ValueError(\"The number of atoms cannot be more than the number \"\n \"of features\")\n if precompute == 'auto':\n precompute = X.shape[0] > X.shape[1]\n if precompute:\n G = np.dot(X.T, X)\n G = np.asfortranarray(G)\n Xy = np.dot(X.T, y)\n if tol is not None:\n norms_squared = np.sum((y ** 2), axis=0)\n else:\n norms_squared = None\n return orthogonal_mp_gram(G, Xy, n_nonzero_coefs=n_nonzero_coefs,\n tol=tol, norms_squared=norms_squared,\n copy_Gram=copy_X, copy_Xy=False,\n return_path=return_path)\n\n if return_path:\n coef = np.zeros((X.shape[1], y.shape[1], X.shape[1]))\n else:\n coef = np.zeros((X.shape[1], y.shape[1]))\n n_iters = []\n\n for k in range(y.shape[1]):\n out = _cholesky_omp(\n X, y[:, k], n_nonzero_coefs, tol,\n copy_X=copy_X, return_path=return_path)\n if return_path:\n _, idx, coefs, n_iter = out\n coef = coef[:, :, :len(idx)]\n for n_active, x in enumerate(coefs.T):\n coef[idx[:n_active + 1], k, n_active] = x[:n_active + 1]\n else:\n x, idx, n_iter = out\n coef[idx, k] = x\n n_iters.append(n_iter)\n\n if y.shape[1] == 1:\n n_iters = n_iters[0]\n\n if return_n_iter:\n return np.squeeze(coef), n_iters\n else:\n return np.squeeze(coef)" + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/orthogonal_mp_gram", + "name": "orthogonal_mp_gram", + "qname": "sklearn.linear_model._omp.orthogonal_mp_gram", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._omp/orthogonal_mp_gram/Gram", + "name": "Gram", + "qname": "sklearn.linear_model._omp.orthogonal_mp_gram.Gram", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features, n_features)", + "default_value": "", + "description": "Gram matrix of the input data: X.T * X." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/orthogonal_mp_gram/Xy", + "name": "Xy", + "qname": "sklearn.linear_model._omp.orthogonal_mp_gram.Xy", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features,) or (n_features, n_targets)", + "default_value": "", + "description": "Input targets multiplied by X: X.T * y." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features,) or (n_features, n_targets)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/orthogonal_mp_gram/n_nonzero_coefs", + "name": "n_nonzero_coefs", + "qname": "sklearn.linear_model._omp.orthogonal_mp_gram.n_nonzero_coefs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Desired number of non-zero entries in the solution. If None (by\ndefault) this value is set to 10% of n_features." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/orthogonal_mp_gram/tol", + "name": "tol", + "qname": "sklearn.linear_model._omp.orthogonal_mp_gram.tol", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Maximum norm of the residual. If not None, overrides n_nonzero_coefs." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/orthogonal_mp_gram/norms_squared", + "name": "norms_squared", + "qname": "sklearn.linear_model._omp.orthogonal_mp_gram.norms_squared", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_targets,)", + "default_value": "None", + "description": "Squared L2 norms of the lines of y. Required if tol is not None." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_targets,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/orthogonal_mp_gram/copy_Gram", + "name": "copy_Gram", + "qname": "sklearn.linear_model._omp.orthogonal_mp_gram.copy_Gram", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether the gram matrix must be copied by the algorithm. A false\nvalue is only helpful if it is already Fortran-ordered, otherwise a\ncopy is made anyway." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/orthogonal_mp_gram/copy_Xy", + "name": "copy_Xy", + "qname": "sklearn.linear_model._omp.orthogonal_mp_gram.copy_Xy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether the covariance vector Xy must be copied by the algorithm.\nIf False, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/orthogonal_mp_gram/return_path", + "name": "return_path", + "qname": "sklearn.linear_model._omp.orthogonal_mp_gram.return_path", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to return every value of the nonzero coefficients along the\nforward path. Useful for cross-validation." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._omp/orthogonal_mp_gram/return_n_iter", + "name": "return_n_iter", + "qname": "sklearn.linear_model._omp.orthogonal_mp_gram.return_n_iter", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether or not to return the number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Gram Orthogonal Matching Pursuit (OMP).\n\nSolves n_targets Orthogonal Matching Pursuit problems using only\nthe Gram matrix X.T * X and the product X.T * y.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Gram Orthogonal Matching Pursuit (OMP).\n\nSolves n_targets Orthogonal Matching Pursuit problems using only\nthe Gram matrix X.T * X and the product X.T * y.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nGram : ndarray of shape (n_features, n_features)\n Gram matrix of the input data: X.T * X.\n\nXy : ndarray of shape (n_features,) or (n_features, n_targets)\n Input targets multiplied by X: X.T * y.\n\nn_nonzero_coefs : int, default=None\n Desired number of non-zero entries in the solution. If None (by\n default) this value is set to 10% of n_features.\n\ntol : float, default=None\n Maximum norm of the residual. If not None, overrides n_nonzero_coefs.\n\nnorms_squared : array-like of shape (n_targets,), default=None\n Squared L2 norms of the lines of y. Required if tol is not None.\n\ncopy_Gram : bool, default=True\n Whether the gram matrix must be copied by the algorithm. A false\n value is only helpful if it is already Fortran-ordered, otherwise a\n copy is made anyway.\n\ncopy_Xy : bool, default=True\n Whether the covariance vector Xy must be copied by the algorithm.\n If False, it may be overwritten.\n\nreturn_path : bool, default=False\n Whether to return every value of the nonzero coefficients along the\n forward path. Useful for cross-validation.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\nReturns\n-------\ncoef : ndarray of shape (n_features,) or (n_features, n_targets)\n Coefficients of the OMP solution. If `return_path=True`, this contains\n the whole coefficient path. In this case its shape is\n (n_features, n_features) or (n_features, n_targets, n_features) and\n iterating over the last axis yields coefficients in increasing order\n of active features.\n\nn_iters : array-like or int\n Number of active features across every target. Returned only if\n `return_n_iter` is set to True.\n\nSee Also\n--------\nOrthogonalMatchingPursuit\northogonal_mp\nlars_path\nsklearn.decomposition.sparse_encode\n\nNotes\n-----\nOrthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,\nMatching pursuits with time-frequency dictionaries, IEEE Transactions on\nSignal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.\n(http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf)\n\nThis implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,\nM., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal\nMatching Pursuit Technical Report - CS Technion, April 2008.\nhttps://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf", + "code": "@_deprecate_positional_args\ndef orthogonal_mp_gram(Gram, Xy, *, n_nonzero_coefs=None, tol=None,\n norms_squared=None, copy_Gram=True,\n copy_Xy=True, return_path=False,\n return_n_iter=False):\n \"\"\"Gram Orthogonal Matching Pursuit (OMP).\n\n Solves n_targets Orthogonal Matching Pursuit problems using only\n the Gram matrix X.T * X and the product X.T * y.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n Gram : ndarray of shape (n_features, n_features)\n Gram matrix of the input data: X.T * X.\n\n Xy : ndarray of shape (n_features,) or (n_features, n_targets)\n Input targets multiplied by X: X.T * y.\n\n n_nonzero_coefs : int, default=None\n Desired number of non-zero entries in the solution. If None (by\n default) this value is set to 10% of n_features.\n\n tol : float, default=None\n Maximum norm of the residual. If not None, overrides n_nonzero_coefs.\n\n norms_squared : array-like of shape (n_targets,), default=None\n Squared L2 norms of the lines of y. Required if tol is not None.\n\n copy_Gram : bool, default=True\n Whether the gram matrix must be copied by the algorithm. A false\n value is only helpful if it is already Fortran-ordered, otherwise a\n copy is made anyway.\n\n copy_Xy : bool, default=True\n Whether the covariance vector Xy must be copied by the algorithm.\n If False, it may be overwritten.\n\n return_path : bool, default=False\n Whether to return every value of the nonzero coefficients along the\n forward path. Useful for cross-validation.\n\n return_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\n Returns\n -------\n coef : ndarray of shape (n_features,) or (n_features, n_targets)\n Coefficients of the OMP solution. If `return_path=True`, this contains\n the whole coefficient path. In this case its shape is\n (n_features, n_features) or (n_features, n_targets, n_features) and\n iterating over the last axis yields coefficients in increasing order\n of active features.\n\n n_iters : array-like or int\n Number of active features across every target. Returned only if\n `return_n_iter` is set to True.\n\n See Also\n --------\n OrthogonalMatchingPursuit\n orthogonal_mp\n lars_path\n sklearn.decomposition.sparse_encode\n\n Notes\n -----\n Orthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,\n Matching pursuits with time-frequency dictionaries, IEEE Transactions on\n Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.\n (http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf)\n\n This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,\n M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal\n Matching Pursuit Technical Report - CS Technion, April 2008.\n https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf\n\n \"\"\"\n Gram = check_array(Gram, order='F', copy=copy_Gram)\n Xy = np.asarray(Xy)\n if Xy.ndim > 1 and Xy.shape[1] > 1:\n # or subsequent target will be affected\n copy_Gram = True\n if Xy.ndim == 1:\n Xy = Xy[:, np.newaxis]\n if tol is not None:\n norms_squared = [norms_squared]\n if copy_Xy or not Xy.flags.writeable:\n # Make the copy once instead of many times in _gram_omp itself.\n Xy = Xy.copy()\n\n if n_nonzero_coefs is None and tol is None:\n n_nonzero_coefs = int(0.1 * len(Gram))\n if tol is not None and norms_squared is None:\n raise ValueError('Gram OMP needs the precomputed norms in order '\n 'to evaluate the error sum of squares.')\n if tol is not None and tol < 0:\n raise ValueError(\"Epsilon cannot be negative\")\n if tol is None and n_nonzero_coefs <= 0:\n raise ValueError(\"The number of atoms must be positive\")\n if tol is None and n_nonzero_coefs > len(Gram):\n raise ValueError(\"The number of atoms cannot be more than the number \"\n \"of features\")\n\n if return_path:\n coef = np.zeros((len(Gram), Xy.shape[1], len(Gram)))\n else:\n coef = np.zeros((len(Gram), Xy.shape[1]))\n\n n_iters = []\n for k in range(Xy.shape[1]):\n out = _gram_omp(\n Gram, Xy[:, k], n_nonzero_coefs,\n norms_squared[k] if tol is not None else None, tol,\n copy_Gram=copy_Gram, copy_Xy=False,\n return_path=return_path)\n if return_path:\n _, idx, coefs, n_iter = out\n coef = coef[:, :, :len(idx)]\n for n_active, x in enumerate(coefs.T):\n coef[idx[:n_active + 1], k, n_active] = x[:n_active + 1]\n else:\n x, idx, n_iter = out\n coef[idx, k] = x\n n_iters.append(n_iter)\n\n if Xy.shape[1] == 1:\n n_iters = n_iters[0]\n\n if return_n_iter:\n return np.squeeze(coef), n_iters\n else:\n return np.squeeze(coef)" + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/__init__/C", + "name": "C", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.__init__.C", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Maximum step size (regularization). Defaults to 1.0." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether the intercept should be estimated or not. If False, the\ndata is assumed to be already centered." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.__init__.max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "The maximum number of passes over the training data (aka epochs).\nIt only impacts the behavior in the ``fit`` method, and not the\n:meth:`partial_fit` method.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float or None", + "default_value": "1e-3", + "description": "The stopping criterion. If it is not None, the iterations will stop\nwhen (loss > previous_loss - tol).\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/__init__/early_stopping", + "name": "early_stopping", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.__init__.early_stopping", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to use early stopping to terminate training when validation.\nscore is not improving. If set to True, it will automatically set aside\na stratified fraction of training data as validation and terminate\ntraining when validation score is not improving by at least tol for\nn_iter_no_change consecutive epochs.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/__init__/validation_fraction", + "name": "validation_fraction", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.__init__.validation_fraction", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "The proportion of training data to set aside as validation set for\nearly stopping. Must be between 0 and 1.\nOnly used if early_stopping is True.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/__init__/n_iter_no_change", + "name": "n_iter_no_change", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.__init__.n_iter_no_change", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "Number of iterations with no improvement to wait before early stopping.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/__init__/shuffle", + "name": "shuffle", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.__init__.shuffle", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether or not the training data should be shuffled after each epoch." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "integer", + "default_value": "0", + "description": "The verbosity level" + }, + "type": { + "kind": "NamedType", + "name": "integer" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/__init__/loss", + "name": "loss", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.__init__.loss", + "default_value": "'hinge'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "string", + "default_value": "\"hinge\"", + "description": "The loss function to be used:\nhinge: equivalent to PA-I in the reference paper.\nsquared_hinge: equivalent to PA-II in the reference paper." + }, + "type": { + "kind": "NamedType", + "name": "string" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or None", + "default_value": "None", + "description": "The number of CPUs to use to do the OVA (One Versus All, for\nmulti-class problems) computation.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "Used to shuffle the training data, when ``shuffle`` is set to\n``True``. Pass an int for reproducible output across multiple\nfunction calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to True, reuse the solution of the previous call to fit as\ninitialization, otherwise, just erase the previous solution.\nSee :term:`the Glossary `.\n\nRepeatedly calling fit or partial_fit when warm_start is True can\nresult in a different solution than when calling fit a single time\nbecause of the way the data is shuffled." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/__init__/class_weight", + "name": "class_weight", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.__init__.class_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict, {class_label: weight} or \"balanced\" or None", + "default_value": "None", + "description": "Preset for the class_weight fit parameter.\n\nWeights associated with classes. If not given, all classes\nare supposed to have weight one.\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``\n\n.. versionadded:: 0.17\n parameter *class_weight* to automatically weight samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "dict" + }, + { + "kind": "NamedType", + "name": "\"balanced\"" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/__init__/average", + "name": "average", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.__init__.average", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or int", + "default_value": "False", + "description": "When set to True, computes the averaged SGD weights and stores the\nresult in the ``coef_`` attribute. If set to an int greater than 1,\naveraging will begin once the total number of samples seen reaches\naverage. So average=10 will begin averaging after seeing 10 samples.\n\n.. versionadded:: 0.19\n parameter *average* to use weights averaging in SGD" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Passive Aggressive Classifier\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, C=1.0, fit_intercept=True, max_iter=1000, tol=1e-3,\n early_stopping=False, validation_fraction=0.1,\n n_iter_no_change=5, shuffle=True, verbose=0, loss=\"hinge\",\n n_jobs=None, random_state=None, warm_start=False,\n class_weight=None, average=False):\n super().__init__(\n penalty=None,\n fit_intercept=fit_intercept,\n max_iter=max_iter,\n tol=tol,\n early_stopping=early_stopping,\n validation_fraction=validation_fraction,\n n_iter_no_change=n_iter_no_change,\n shuffle=shuffle,\n verbose=verbose,\n random_state=random_state,\n eta0=1.0,\n warm_start=warm_start,\n class_weight=class_weight,\n average=average,\n n_jobs=n_jobs)\n\n self.C = C\n self.loss = loss" + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/fit", + "name": "fit", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/fit/self", + "name": "self", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/fit/X", + "name": "X", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/fit/y", + "name": "y", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "numpy array of shape [n_samples]", + "default_value": "", + "description": "Target values" + }, + "type": { + "kind": "NamedType", + "name": "numpy array of shape [n_samples]" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/fit/coef_init", + "name": "coef_init", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.fit.coef_init", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array, shape = [n_classes,n_features]", + "default_value": "", + "description": "The initial coefficients to warm-start the optimization." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array" + }, + { + "kind": "NamedType", + "name": "shape = [n_classes" + }, + { + "kind": "NamedType", + "name": "n_features]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/fit/intercept_init", + "name": "intercept_init", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.fit.intercept_init", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array, shape = [n_classes]", + "default_value": "", + "description": "The initial intercept to warm-start the optimization." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array" + }, + { + "kind": "NamedType", + "name": "shape = [n_classes]" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit linear model with Passive Aggressive algorithm.", + "docstring": "Fit linear model with Passive Aggressive algorithm.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data\n\ny : numpy array of shape [n_samples]\n Target values\n\ncoef_init : array, shape = [n_classes,n_features]\n The initial coefficients to warm-start the optimization.\n\nintercept_init : array, shape = [n_classes]\n The initial intercept to warm-start the optimization.\n\nReturns\n-------\nself : returns an instance of self.", + "code": " def fit(self, X, y, coef_init=None, intercept_init=None):\n \"\"\"Fit linear model with Passive Aggressive algorithm.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data\n\n y : numpy array of shape [n_samples]\n Target values\n\n coef_init : array, shape = [n_classes,n_features]\n The initial coefficients to warm-start the optimization.\n\n intercept_init : array, shape = [n_classes]\n The initial intercept to warm-start the optimization.\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n self._validate_params()\n lr = \"pa1\" if self.loss == \"hinge\" else \"pa2\"\n return self._fit(X, y, alpha=1.0, C=self.C,\n loss=\"hinge\", learning_rate=lr,\n coef_init=coef_init, intercept_init=intercept_init)" + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/partial_fit", + "name": "partial_fit", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.partial_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/partial_fit/self", + "name": "self", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/partial_fit/X", + "name": "X", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Subset of the training data" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/partial_fit/y", + "name": "y", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.partial_fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "numpy array of shape [n_samples]", + "default_value": "", + "description": "Subset of the target values" + }, + "type": { + "kind": "NamedType", + "name": "numpy array of shape [n_samples]" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveClassifier/partial_fit/classes", + "name": "classes", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveClassifier.partial_fit.classes", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array, shape = [n_classes]", + "default_value": "", + "description": "Classes across all calls to partial_fit.\nCan be obtained by via `np.unique(y_all)`, where y_all is the\ntarget vector of the entire dataset.\nThis argument is required for the first call to partial_fit\nand can be omitted in the subsequent calls.\nNote that y doesn't need to contain all labels in `classes`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array" + }, + { + "kind": "NamedType", + "name": "shape = [n_classes]" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit linear model with Passive Aggressive algorithm.", + "docstring": "Fit linear model with Passive Aggressive algorithm.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Subset of the training data\n\ny : numpy array of shape [n_samples]\n Subset of the target values\n\nclasses : array, shape = [n_classes]\n Classes across all calls to partial_fit.\n Can be obtained by via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is required for the first call to partial_fit\n and can be omitted in the subsequent calls.\n Note that y doesn't need to contain all labels in `classes`.\n\nReturns\n-------\nself : returns an instance of self.", + "code": " def partial_fit(self, X, y, classes=None):\n \"\"\"Fit linear model with Passive Aggressive algorithm.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Subset of the training data\n\n y : numpy array of shape [n_samples]\n Subset of the target values\n\n classes : array, shape = [n_classes]\n Classes across all calls to partial_fit.\n Can be obtained by via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is required for the first call to partial_fit\n and can be omitted in the subsequent calls.\n Note that y doesn't need to contain all labels in `classes`.\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n self._validate_params(for_partial_fit=True)\n if self.class_weight == 'balanced':\n raise ValueError(\"class_weight 'balanced' is not supported for \"\n \"partial_fit. For 'balanced' weights, use \"\n \"`sklearn.utils.compute_class_weight` with \"\n \"`class_weight='balanced'`. In place of y you \"\n \"can use a large enough subset of the full \"\n \"training set target to properly estimate the \"\n \"class frequency distributions. Pass the \"\n \"resulting weights as the class_weight \"\n \"parameter.\")\n lr = \"pa1\" if self.loss == \"hinge\" else \"pa2\"\n return self._partial_fit(X, y, alpha=1.0, C=self.C,\n loss=\"hinge\", learning_rate=lr, max_iter=1,\n classes=classes, sample_weight=None,\n coef_init=None, intercept_init=None)" + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/__init__/C", + "name": "C", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.__init__.C", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Maximum step size (regularization). Defaults to 1.0." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether the intercept should be estimated or not. If False, the\ndata is assumed to be already centered. Defaults to True." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.__init__.max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "The maximum number of passes over the training data (aka epochs).\nIt only impacts the behavior in the ``fit`` method, and not the\n:meth:`partial_fit` method.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float or None", + "default_value": "1e-3", + "description": "The stopping criterion. If it is not None, the iterations will stop\nwhen (loss > previous_loss - tol).\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/__init__/early_stopping", + "name": "early_stopping", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.__init__.early_stopping", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to use early stopping to terminate training when validation.\nscore is not improving. If set to True, it will automatically set aside\na fraction of training data as validation and terminate\ntraining when validation score is not improving by at least tol for\nn_iter_no_change consecutive epochs.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/__init__/validation_fraction", + "name": "validation_fraction", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.__init__.validation_fraction", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "The proportion of training data to set aside as validation set for\nearly stopping. Must be between 0 and 1.\nOnly used if early_stopping is True.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/__init__/n_iter_no_change", + "name": "n_iter_no_change", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.__init__.n_iter_no_change", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "Number of iterations with no improvement to wait before early stopping.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/__init__/shuffle", + "name": "shuffle", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.__init__.shuffle", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether or not the training data should be shuffled after each epoch." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "integer", + "default_value": "0", + "description": "The verbosity level" + }, + "type": { + "kind": "NamedType", + "name": "integer" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/__init__/loss", + "name": "loss", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.__init__.loss", + "default_value": "'epsilon_insensitive'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "string", + "default_value": "\"epsilon_insensitive\"", + "description": "The loss function to be used:\nepsilon_insensitive: equivalent to PA-I in the reference paper.\nsquared_epsilon_insensitive: equivalent to PA-II in the reference\npaper." + }, + "type": { + "kind": "NamedType", + "name": "string" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/__init__/epsilon", + "name": "epsilon", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.__init__.epsilon", + "default_value": "DEFAULT_EPSILON", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "If the difference between the current prediction and the correct label\nis below this threshold, the model is not updated." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "Used to shuffle the training data, when ``shuffle`` is set to\n``True``. Pass an int for reproducible output across multiple\nfunction calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to True, reuse the solution of the previous call to fit as\ninitialization, otherwise, just erase the previous solution.\nSee :term:`the Glossary `.\n\nRepeatedly calling fit or partial_fit when warm_start is True can\nresult in a different solution than when calling fit a single time\nbecause of the way the data is shuffled." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/__init__/average", + "name": "average", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.__init__.average", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or int", + "default_value": "False", + "description": "When set to True, computes the averaged SGD weights and stores the\nresult in the ``coef_`` attribute. If set to an int greater than 1,\naveraging will begin once the total number of samples seen reaches\naverage. So average=10 will begin averaging after seeing 10 samples.\n\n.. versionadded:: 0.19\n parameter *average* to use weights averaging in SGD" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Passive Aggressive Regressor\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, C=1.0, fit_intercept=True, max_iter=1000, tol=1e-3,\n early_stopping=False, validation_fraction=0.1,\n n_iter_no_change=5, shuffle=True, verbose=0,\n loss=\"epsilon_insensitive\", epsilon=DEFAULT_EPSILON,\n random_state=None, warm_start=False,\n average=False):\n super().__init__(\n penalty=None,\n l1_ratio=0,\n epsilon=epsilon,\n eta0=1.0,\n fit_intercept=fit_intercept,\n max_iter=max_iter,\n tol=tol,\n early_stopping=early_stopping,\n validation_fraction=validation_fraction,\n n_iter_no_change=n_iter_no_change,\n shuffle=shuffle,\n verbose=verbose,\n random_state=random_state,\n warm_start=warm_start,\n average=average)\n self.C = C\n self.loss = loss" + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/fit", + "name": "fit", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/fit/self", + "name": "self", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/fit/X", + "name": "X", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/fit/y", + "name": "y", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "numpy array of shape [n_samples]", + "default_value": "", + "description": "Target values" + }, + "type": { + "kind": "NamedType", + "name": "numpy array of shape [n_samples]" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/fit/coef_init", + "name": "coef_init", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.fit.coef_init", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array, shape = [n_features]", + "default_value": "", + "description": "The initial coefficients to warm-start the optimization." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array" + }, + { + "kind": "NamedType", + "name": "shape = [n_features]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/fit/intercept_init", + "name": "intercept_init", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.fit.intercept_init", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array, shape = [1]", + "default_value": "", + "description": "The initial intercept to warm-start the optimization." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array" + }, + { + "kind": "NamedType", + "name": "shape = [1]" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit linear model with Passive Aggressive algorithm.", + "docstring": "Fit linear model with Passive Aggressive algorithm.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data\n\ny : numpy array of shape [n_samples]\n Target values\n\ncoef_init : array, shape = [n_features]\n The initial coefficients to warm-start the optimization.\n\nintercept_init : array, shape = [1]\n The initial intercept to warm-start the optimization.\n\nReturns\n-------\nself : returns an instance of self.", + "code": " def fit(self, X, y, coef_init=None, intercept_init=None):\n \"\"\"Fit linear model with Passive Aggressive algorithm.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data\n\n y : numpy array of shape [n_samples]\n Target values\n\n coef_init : array, shape = [n_features]\n The initial coefficients to warm-start the optimization.\n\n intercept_init : array, shape = [1]\n The initial intercept to warm-start the optimization.\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n self._validate_params()\n lr = \"pa1\" if self.loss == \"epsilon_insensitive\" else \"pa2\"\n return self._fit(X, y, alpha=1.0, C=self.C,\n loss=\"epsilon_insensitive\",\n learning_rate=lr,\n coef_init=coef_init,\n intercept_init=intercept_init)" + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/partial_fit", + "name": "partial_fit", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.partial_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/partial_fit/self", + "name": "self", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/partial_fit/X", + "name": "X", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Subset of training data" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._passive_aggressive/PassiveAggressiveRegressor/partial_fit/y", + "name": "y", + "qname": "sklearn.linear_model._passive_aggressive.PassiveAggressiveRegressor.partial_fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "numpy array of shape [n_samples]", + "default_value": "", + "description": "Subset of target values" + }, + "type": { + "kind": "NamedType", + "name": "numpy array of shape [n_samples]" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit linear model with Passive Aggressive algorithm.", + "docstring": "Fit linear model with Passive Aggressive algorithm.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Subset of training data\n\ny : numpy array of shape [n_samples]\n Subset of target values\n\nReturns\n-------\nself : returns an instance of self.", + "code": " def partial_fit(self, X, y):\n \"\"\"Fit linear model with Passive Aggressive algorithm.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Subset of training data\n\n y : numpy array of shape [n_samples]\n Subset of target values\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n self._validate_params(for_partial_fit=True)\n lr = \"pa1\" if self.loss == \"epsilon_insensitive\" else \"pa2\"\n return self._partial_fit(X, y, alpha=1.0, C=self.C,\n loss=\"epsilon_insensitive\",\n learning_rate=lr, max_iter=1,\n sample_weight=None,\n coef_init=None, intercept_init=None)" + }, + { + "id": "scikit-learn/sklearn.linear_model._perceptron/Perceptron/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._perceptron.Perceptron.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._perceptron/Perceptron/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._perceptron.Perceptron.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._perceptron/Perceptron/__init__/penalty", + "name": "penalty", + "qname": "sklearn.linear_model._perceptron.Perceptron.__init__.penalty", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'l2','l1','elasticnet'}", + "default_value": "None", + "description": "The penalty (aka regularization term) to be used." + }, + "type": { + "kind": "EnumType", + "values": ["l2", "l1", "elasticnet"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._perceptron/Perceptron/__init__/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._perceptron.Perceptron.__init__.alpha", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0001", + "description": "Constant that multiplies the regularization term if regularization is\nused." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._perceptron/Perceptron/__init__/l1_ratio", + "name": "l1_ratio", + "qname": "sklearn.linear_model._perceptron.Perceptron.__init__.l1_ratio", + "default_value": "0.15", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.15", + "description": "The Elastic Net mixing parameter, with `0 <= l1_ratio <= 1`.\n`l1_ratio=0` corresponds to L2 penalty, `l1_ratio=1` to L1.\nOnly used if `penalty='elasticnet'`.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._perceptron/Perceptron/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._perceptron.Perceptron.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether the intercept should be estimated or not. If False, the\ndata is assumed to be already centered." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._perceptron/Perceptron/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._perceptron.Perceptron.__init__.max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "The maximum number of passes over the training data (aka epochs).\nIt only impacts the behavior in the ``fit`` method, and not the\n:meth:`partial_fit` method.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._perceptron/Perceptron/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._perceptron.Perceptron.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "The stopping criterion. If it is not None, the iterations will stop\nwhen (loss > previous_loss - tol).\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._perceptron/Perceptron/__init__/shuffle", + "name": "shuffle", + "qname": "sklearn.linear_model._perceptron.Perceptron.__init__.shuffle", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether or not the training data should be shuffled after each epoch." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._perceptron/Perceptron/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._perceptron.Perceptron.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "The verbosity level" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._perceptron/Perceptron/__init__/eta0", + "name": "eta0", + "qname": "sklearn.linear_model._perceptron.Perceptron.__init__.eta0", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "double", + "default_value": "1", + "description": "Constant by which the updates are multiplied." + }, + "type": { + "kind": "NamedType", + "name": "double" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._perceptron/Perceptron/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.linear_model._perceptron.Perceptron.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of CPUs to use to do the OVA (One Versus All, for\nmulti-class problems) computation.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._perceptron/Perceptron/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._perceptron.Perceptron.__init__.random_state", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "Used to shuffle the training data, when ``shuffle`` is set to\n``True``. Pass an int for reproducible output across multiple\nfunction calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._perceptron/Perceptron/__init__/early_stopping", + "name": "early_stopping", + "qname": "sklearn.linear_model._perceptron.Perceptron.__init__.early_stopping", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to use early stopping to terminate training when validation.\nscore is not improving. If set to True, it will automatically set aside\na stratified fraction of training data as validation and terminate\ntraining when validation score is not improving by at least tol for\nn_iter_no_change consecutive epochs.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._perceptron/Perceptron/__init__/validation_fraction", + "name": "validation_fraction", + "qname": "sklearn.linear_model._perceptron.Perceptron.__init__.validation_fraction", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "The proportion of training data to set aside as validation set for\nearly stopping. Must be between 0 and 1.\nOnly used if early_stopping is True.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._perceptron/Perceptron/__init__/n_iter_no_change", + "name": "n_iter_no_change", + "qname": "sklearn.linear_model._perceptron.Perceptron.__init__.n_iter_no_change", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "Number of iterations with no improvement to wait before early stopping.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._perceptron/Perceptron/__init__/class_weight", + "name": "class_weight", + "qname": "sklearn.linear_model._perceptron.Perceptron.__init__.class_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict, {class_label: weight} or \"balanced\"", + "default_value": "None", + "description": "Preset for the class_weight fit parameter.\n\nWeights associated with classes. If not given, all classes\nare supposed to have weight one.\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "dict" + }, + { + "kind": "NamedType", + "name": "\"balanced\"" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._perceptron/Perceptron/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.linear_model._perceptron.Perceptron.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to True, reuse the solution of the previous call to fit as\ninitialization, otherwise, just erase the previous solution. See\n:term:`the Glossary `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perceptron\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, penalty=None, alpha=0.0001, l1_ratio=0.15,\n fit_intercept=True,\n max_iter=1000, tol=1e-3, shuffle=True, verbose=0, eta0=1.0,\n n_jobs=None, random_state=0, early_stopping=False,\n validation_fraction=0.1, n_iter_no_change=5,\n class_weight=None, warm_start=False):\n super().__init__(\n loss=\"perceptron\", penalty=penalty, alpha=alpha, l1_ratio=l1_ratio,\n fit_intercept=fit_intercept, max_iter=max_iter, tol=tol,\n shuffle=shuffle, verbose=verbose, random_state=random_state,\n learning_rate=\"constant\", eta0=eta0, early_stopping=early_stopping,\n validation_fraction=validation_fraction,\n n_iter_no_change=n_iter_no_change, power_t=0.5,\n warm_start=warm_start, class_weight=class_weight, n_jobs=n_jobs)" + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/__init__/base_estimator", + "name": "base_estimator", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.__init__.base_estimator", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "None", + "description": "Base estimator object which implements the following methods:\n\n * `fit(X, y)`: Fit model to given training data and target values.\n * `score(X, y)`: Returns the mean accuracy on the given test data,\n which is used for the stop criterion defined by `stop_score`.\n Additionally, the score is used to decide which of two equally\n large consensus sets is chosen as the better one.\n * `predict(X)`: Returns predicted values using the linear model,\n which is used to compute residual error using loss function.\n\nIf `base_estimator` is None, then\n:class:`~sklearn.linear_model.LinearRegression` is used for\ntarget values of dtype float.\n\nNote that the current implementation only supports regression\nestimators." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/__init__/min_samples", + "name": "min_samples", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.__init__.min_samples", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int (>= 1) or float ([0, 1])", + "default_value": "None", + "description": "Minimum number of samples chosen randomly from original data. Treated\nas an absolute number of samples for `min_samples >= 1`, treated as a\nrelative number `ceil(min_samples * X.shape[0]`) for\n`min_samples < 1`. This is typically chosen as the minimal number of\nsamples necessary to estimate the given `base_estimator`. By default a\n``sklearn.linear_model.LinearRegression()`` estimator is assumed and\n`min_samples` is chosen as ``X.shape[1] + 1``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int (>= 1)" + }, + { + "kind": "NamedType", + "name": "float ([0, 1])" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/__init__/residual_threshold", + "name": "residual_threshold", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.__init__.residual_threshold", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Maximum residual for a data sample to be classified as an inlier.\nBy default the threshold is chosen as the MAD (median absolute\ndeviation) of the target values `y`." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/__init__/is_data_valid", + "name": "is_data_valid", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.__init__.is_data_valid", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "None", + "description": "This function is called with the randomly selected data before the\nmodel is fitted to it: `is_data_valid(X, y)`. If its return value is\nFalse the current randomly chosen sub-sample is skipped." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/__init__/is_model_valid", + "name": "is_model_valid", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.__init__.is_model_valid", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "None", + "description": "This function is called with the estimated model and the randomly\nselected data: `is_model_valid(model, X, y)`. If its return value is\nFalse the current randomly chosen sub-sample is skipped.\nRejecting samples with this function is computationally costlier than\nwith `is_data_valid`. `is_model_valid` should therefore only be used if\nthe estimated model is needed for making the rejection decision." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/__init__/max_trials", + "name": "max_trials", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.__init__.max_trials", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Maximum number of iterations for random sample selection." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/__init__/max_skips", + "name": "max_skips", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.__init__.max_skips", + "default_value": "np.inf", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "np.inf", + "description": "Maximum number of iterations that can be skipped due to finding zero\ninliers or invalid data defined by ``is_data_valid`` or invalid models\ndefined by ``is_model_valid``.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/__init__/stop_n_inliers", + "name": "stop_n_inliers", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.__init__.stop_n_inliers", + "default_value": "np.inf", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "np.inf", + "description": "Stop iteration if at least this number of inliers are found." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/__init__/stop_score", + "name": "stop_score", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.__init__.stop_score", + "default_value": "np.inf", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "np.inf", + "description": "Stop iteration if score is greater equal than this threshold." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/__init__/stop_probability", + "name": "stop_probability", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.__init__.stop_probability", + "default_value": "0.99", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float in range [0, 1]", + "default_value": "0.99", + "description": "RANSAC iteration stops if at least one outlier-free set of the training\ndata is sampled in RANSAC. This requires to generate at least N\nsamples (iterations)::\n\n N >= log(1 - probability) / log(1 - e**m)\n\nwhere the probability (confidence) is typically set to high value such\nas 0.99 (the default) and e is the current fraction of inliers w.r.t.\nthe total number of samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float in range [0" + }, + { + "kind": "NamedType", + "name": "1]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/__init__/loss", + "name": "loss", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.__init__.loss", + "default_value": "'absolute_loss'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "string, callable", + "default_value": "'absolute_loss'", + "description": "String inputs, \"absolute_loss\" and \"squared_loss\" are supported which\nfind the absolute loss and squared loss per sample\nrespectively.\n\nIf ``loss`` is a callable, then it should be a function that takes\ntwo arrays as inputs, the true and predicted value and returns a 1-D\narray with the i-th value of the array corresponding to the loss\non ``X[i]``.\n\nIf the loss on a sample is greater than the ``residual_threshold``,\nthen this sample is classified as an outlier.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "string" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "The generator used to initialize the centers.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "RANSAC (RANdom SAmple Consensus) algorithm.\n\nRANSAC is an iterative algorithm for the robust estimation of parameters\nfrom a subset of inliers from the complete data set.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, base_estimator=None, *, min_samples=None,\n residual_threshold=None, is_data_valid=None,\n is_model_valid=None, max_trials=100, max_skips=np.inf,\n stop_n_inliers=np.inf, stop_score=np.inf,\n stop_probability=0.99, loss='absolute_loss',\n random_state=None):\n\n self.base_estimator = base_estimator\n self.min_samples = min_samples\n self.residual_threshold = residual_threshold\n self.is_data_valid = is_data_valid\n self.is_model_valid = is_model_valid\n self.max_trials = max_trials\n self.max_skips = max_skips\n self.stop_n_inliers = stop_n_inliers\n self.stop_score = stop_score\n self.stop_probability = stop_probability\n self.random_state = random_state\n self.loss = loss" + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/_more_tags", + "name": "_more_tags", + "qname": "sklearn.linear_model._ransac.RANSACRegressor._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/_more_tags/self", + "name": "self", + "qname": "sklearn.linear_model._ransac.RANSACRegressor._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }" + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/fit", + "name": "fit", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/fit/self", + "name": "self", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/fit/X", + "name": "X", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like or sparse matrix, shape [n_samples, n_features]", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "sparse matrix" + }, + { + "kind": "NamedType", + "name": "shape [n_samples" + }, + { + "kind": "NamedType", + "name": "n_features]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/fit/y", + "name": "y", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_targets)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Individual weights for each sample\nraises error if sample_weight is passed and base_estimator\nfit method does not support it.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit estimator using RANSAC algorithm.", + "docstring": "Fit estimator using RANSAC algorithm.\n\nParameters\n----------\nX : array-like or sparse matrix, shape [n_samples, n_features]\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Individual weights for each sample\n raises error if sample_weight is passed and base_estimator\n fit method does not support it.\n\n .. versionadded:: 0.18\n\nRaises\n------\nValueError\n If no valid consensus set could be found. This occurs if\n `is_data_valid` and `is_model_valid` return False for all\n `max_trials` randomly chosen sub-samples.", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit estimator using RANSAC algorithm.\n\n Parameters\n ----------\n X : array-like or sparse matrix, shape [n_samples, n_features]\n Training data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Individual weights for each sample\n raises error if sample_weight is passed and base_estimator\n fit method does not support it.\n\n .. versionadded:: 0.18\n\n Raises\n ------\n ValueError\n If no valid consensus set could be found. This occurs if\n `is_data_valid` and `is_model_valid` return False for all\n `max_trials` randomly chosen sub-samples.\n\n \"\"\"\n # Need to validate separately here.\n # We can't pass multi_ouput=True because that would allow y to be csr.\n check_X_params = dict(accept_sparse='csr')\n check_y_params = dict(ensure_2d=False)\n X, y = self._validate_data(X, y, validate_separately=(check_X_params,\n check_y_params))\n check_consistent_length(X, y)\n\n if self.base_estimator is not None:\n base_estimator = clone(self.base_estimator)\n else:\n base_estimator = LinearRegression()\n\n if self.min_samples is None:\n # assume linear model by default\n min_samples = X.shape[1] + 1\n elif 0 < self.min_samples < 1:\n min_samples = np.ceil(self.min_samples * X.shape[0])\n elif self.min_samples >= 1:\n if self.min_samples % 1 != 0:\n raise ValueError(\"Absolute number of samples must be an \"\n \"integer value.\")\n min_samples = self.min_samples\n else:\n raise ValueError(\"Value for `min_samples` must be scalar and \"\n \"positive.\")\n if min_samples > X.shape[0]:\n raise ValueError(\"`min_samples` may not be larger than number \"\n \"of samples: n_samples = %d.\" % (X.shape[0]))\n\n if self.stop_probability < 0 or self.stop_probability > 1:\n raise ValueError(\"`stop_probability` must be in range [0, 1].\")\n\n if self.residual_threshold is None:\n # MAD (median absolute deviation)\n residual_threshold = np.median(np.abs(y - np.median(y)))\n else:\n residual_threshold = self.residual_threshold\n\n if self.loss == \"absolute_loss\":\n if y.ndim == 1:\n loss_function = lambda y_true, y_pred: np.abs(y_true - y_pred)\n else:\n loss_function = lambda \\\n y_true, y_pred: np.sum(np.abs(y_true - y_pred), axis=1)\n\n elif self.loss == \"squared_loss\":\n if y.ndim == 1:\n loss_function = lambda y_true, y_pred: (y_true - y_pred) ** 2\n else:\n loss_function = lambda \\\n y_true, y_pred: np.sum((y_true - y_pred) ** 2, axis=1)\n\n elif callable(self.loss):\n loss_function = self.loss\n\n else:\n raise ValueError(\n \"loss should be 'absolute_loss', 'squared_loss' or a callable.\"\n \"Got %s. \" % self.loss)\n\n\n random_state = check_random_state(self.random_state)\n\n try: # Not all estimator accept a random_state\n base_estimator.set_params(random_state=random_state)\n except ValueError:\n pass\n\n estimator_fit_has_sample_weight = has_fit_parameter(base_estimator,\n \"sample_weight\")\n estimator_name = type(base_estimator).__name__\n if (sample_weight is not None and not\n estimator_fit_has_sample_weight):\n raise ValueError(\"%s does not support sample_weight. Samples\"\n \" weights are only used for the calibration\"\n \" itself.\" % estimator_name)\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X)\n\n n_inliers_best = 1\n score_best = -np.inf\n inlier_mask_best = None\n X_inlier_best = None\n y_inlier_best = None\n inlier_best_idxs_subset = None\n self.n_skips_no_inliers_ = 0\n self.n_skips_invalid_data_ = 0\n self.n_skips_invalid_model_ = 0\n\n # number of data samples\n n_samples = X.shape[0]\n sample_idxs = np.arange(n_samples)\n\n self.n_trials_ = 0\n max_trials = self.max_trials\n while self.n_trials_ < max_trials:\n self.n_trials_ += 1\n\n if (self.n_skips_no_inliers_ + self.n_skips_invalid_data_ +\n self.n_skips_invalid_model_) > self.max_skips:\n break\n\n # choose random sample set\n subset_idxs = sample_without_replacement(n_samples, min_samples,\n random_state=random_state)\n X_subset = X[subset_idxs]\n y_subset = y[subset_idxs]\n\n # check if random sample set is valid\n if (self.is_data_valid is not None\n and not self.is_data_valid(X_subset, y_subset)):\n self.n_skips_invalid_data_ += 1\n continue\n\n # fit model for current random sample set\n if sample_weight is None:\n base_estimator.fit(X_subset, y_subset)\n else:\n base_estimator.fit(X_subset, y_subset,\n sample_weight=sample_weight[subset_idxs])\n\n # check if estimated model is valid\n if (self.is_model_valid is not None and not\n self.is_model_valid(base_estimator, X_subset, y_subset)):\n self.n_skips_invalid_model_ += 1\n continue\n\n # residuals of all data for current random sample model\n y_pred = base_estimator.predict(X)\n residuals_subset = loss_function(y, y_pred)\n\n # classify data into inliers and outliers\n inlier_mask_subset = residuals_subset < residual_threshold\n n_inliers_subset = np.sum(inlier_mask_subset)\n\n # less inliers -> skip current random sample\n if n_inliers_subset < n_inliers_best:\n self.n_skips_no_inliers_ += 1\n continue\n\n # extract inlier data set\n inlier_idxs_subset = sample_idxs[inlier_mask_subset]\n X_inlier_subset = X[inlier_idxs_subset]\n y_inlier_subset = y[inlier_idxs_subset]\n\n # score of inlier data set\n score_subset = base_estimator.score(X_inlier_subset,\n y_inlier_subset)\n\n # same number of inliers but worse score -> skip current random\n # sample\n if (n_inliers_subset == n_inliers_best\n and score_subset < score_best):\n continue\n\n # save current random sample as best sample\n n_inliers_best = n_inliers_subset\n score_best = score_subset\n inlier_mask_best = inlier_mask_subset\n X_inlier_best = X_inlier_subset\n y_inlier_best = y_inlier_subset\n inlier_best_idxs_subset = inlier_idxs_subset\n\n max_trials = min(\n max_trials,\n _dynamic_max_trials(n_inliers_best, n_samples,\n min_samples, self.stop_probability))\n\n # break if sufficient number of inliers or score is reached\n if n_inliers_best >= self.stop_n_inliers or \\\n score_best >= self.stop_score:\n break\n\n # if none of the iterations met the required criteria\n if inlier_mask_best is None:\n if ((self.n_skips_no_inliers_ + self.n_skips_invalid_data_ +\n self.n_skips_invalid_model_) > self.max_skips):\n raise ValueError(\n \"RANSAC skipped more iterations than `max_skips` without\"\n \" finding a valid consensus set. Iterations were skipped\"\n \" because each randomly chosen sub-sample failed the\"\n \" passing criteria. See estimator attributes for\"\n \" diagnostics (n_skips*).\")\n else:\n raise ValueError(\n \"RANSAC could not find a valid consensus set. All\"\n \" `max_trials` iterations were skipped because each\"\n \" randomly chosen sub-sample failed the passing criteria.\"\n \" See estimator attributes for diagnostics (n_skips*).\")\n else:\n if (self.n_skips_no_inliers_ + self.n_skips_invalid_data_ +\n self.n_skips_invalid_model_) > self.max_skips:\n warnings.warn(\"RANSAC found a valid consensus set but exited\"\n \" early due to skipping more iterations than\"\n \" `max_skips`. See estimator attributes for\"\n \" diagnostics (n_skips*).\",\n ConvergenceWarning)\n\n # estimate final model using all inliers\n if sample_weight is None:\n base_estimator.fit(X_inlier_best, y_inlier_best)\n else:\n base_estimator.fit(\n X_inlier_best,\n y_inlier_best,\n sample_weight=sample_weight[inlier_best_idxs_subset])\n\n self.estimator_ = base_estimator\n self.inlier_mask_ = inlier_mask_best\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/predict", + "name": "predict", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/predict/self", + "name": "self", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/predict/X", + "name": "X", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "numpy array of shape [n_samples, n_features]", + "default_value": "", + "description": "" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "numpy array of shape [n_samples" + }, + { + "kind": "NamedType", + "name": "n_features]" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict using the estimated model.\n\nThis is a wrapper for `estimator_.predict(X)`.", + "docstring": "Predict using the estimated model.\n\nThis is a wrapper for `estimator_.predict(X)`.\n\nParameters\n----------\nX : numpy array of shape [n_samples, n_features]\n\nReturns\n-------\ny : array, shape = [n_samples] or [n_samples, n_targets]\n Returns predicted values.", + "code": " def predict(self, X):\n \"\"\"Predict using the estimated model.\n\n This is a wrapper for `estimator_.predict(X)`.\n\n Parameters\n ----------\n X : numpy array of shape [n_samples, n_features]\n\n Returns\n -------\n y : array, shape = [n_samples] or [n_samples, n_targets]\n Returns predicted values.\n \"\"\"\n check_is_fitted(self)\n\n return self.estimator_.predict(X)" + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/score", + "name": "score", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/score/self", + "name": "self", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/score/X", + "name": "X", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "numpy array or sparse matrix of shape [n_samples, n_features]", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "numpy array" + }, + { + "kind": "NamedType", + "name": "sparse matrix of shape [n_samples" + }, + { + "kind": "NamedType", + "name": "n_features]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/RANSACRegressor/score/y", + "name": "y", + "qname": "sklearn.linear_model._ransac.RANSACRegressor.score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array, shape = [n_samples] or [n_samples, n_targets]", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array" + }, + { + "kind": "NamedType", + "name": "shape = [n_samples]" + }, + { + "kind": "NamedType", + "name": "[n_samples" + }, + { + "kind": "NamedType", + "name": "n_targets]" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns the score of the prediction.\n\nThis is a wrapper for `estimator_.score(X, y)`.", + "docstring": "Returns the score of the prediction.\n\nThis is a wrapper for `estimator_.score(X, y)`.\n\nParameters\n----------\nX : numpy array or sparse matrix of shape [n_samples, n_features]\n Training data.\n\ny : array, shape = [n_samples] or [n_samples, n_targets]\n Target values.\n\nReturns\n-------\nz : float\n Score of the prediction.", + "code": " def score(self, X, y):\n \"\"\"Returns the score of the prediction.\n\n This is a wrapper for `estimator_.score(X, y)`.\n\n Parameters\n ----------\n X : numpy array or sparse matrix of shape [n_samples, n_features]\n Training data.\n\n y : array, shape = [n_samples] or [n_samples, n_targets]\n Target values.\n\n Returns\n -------\n z : float\n Score of the prediction.\n \"\"\"\n check_is_fitted(self)\n\n return self.estimator_.score(X, y)" + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/_dynamic_max_trials", + "name": "_dynamic_max_trials", + "qname": "sklearn.linear_model._ransac._dynamic_max_trials", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ransac/_dynamic_max_trials/n_inliers", + "name": "n_inliers", + "qname": "sklearn.linear_model._ransac._dynamic_max_trials.n_inliers", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of inliers in the data." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/_dynamic_max_trials/n_samples", + "name": "n_samples", + "qname": "sklearn.linear_model._ransac._dynamic_max_trials.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Total number of samples in the data." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/_dynamic_max_trials/min_samples", + "name": "min_samples", + "qname": "sklearn.linear_model._ransac._dynamic_max_trials.min_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Minimum number of samples chosen randomly from original data." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ransac/_dynamic_max_trials/probability", + "name": "probability", + "qname": "sklearn.linear_model._ransac._dynamic_max_trials.probability", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Probability (confidence) that one outlier-free sample is generated." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Determine number trials such that at least one outlier-free subset is\nsampled for the given inlier/outlier ratio.", + "docstring": "Determine number trials such that at least one outlier-free subset is\nsampled for the given inlier/outlier ratio.\n\nParameters\n----------\nn_inliers : int\n Number of inliers in the data.\n\nn_samples : int\n Total number of samples in the data.\n\nmin_samples : int\n Minimum number of samples chosen randomly from original data.\n\nprobability : float\n Probability (confidence) that one outlier-free sample is generated.\n\nReturns\n-------\ntrials : int\n Number of trials.", + "code": "def _dynamic_max_trials(n_inliers, n_samples, min_samples, probability):\n \"\"\"Determine number trials such that at least one outlier-free subset is\n sampled for the given inlier/outlier ratio.\n\n Parameters\n ----------\n n_inliers : int\n Number of inliers in the data.\n\n n_samples : int\n Total number of samples in the data.\n\n min_samples : int\n Minimum number of samples chosen randomly from original data.\n\n probability : float\n Probability (confidence) that one outlier-free sample is generated.\n\n Returns\n -------\n trials : int\n Number of trials.\n\n \"\"\"\n inlier_ratio = n_inliers / float(n_samples)\n nom = max(_EPSILON, 1 - probability)\n denom = max(_EPSILON, 1 - inlier_ratio ** min_samples)\n if nom == 1:\n return 0\n if denom == 1:\n return float('inf')\n return abs(float(np.ceil(np.log(nom) / np.log(denom))))" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/Ridge/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._ridge.Ridge.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/Ridge/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._ridge.Ridge.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/Ridge/__init__/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._ridge.Ridge.__init__.alpha", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{float, ndarray of shape (n_targets,)}", + "default_value": "1.0", + "description": "Regularization strength; must be a positive float. Regularization\nimproves the conditioning of the problem and reduces the variance of\nthe estimates. Larger values specify stronger regularization.\nAlpha corresponds to ``1 / (2C)`` in other linear models such as\n:class:`~sklearn.linear_model.LogisticRegression` or\n:class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\nassumed to be specific to the targets. Hence they must correspond in\nnumber." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/Ridge/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._ridge.Ridge.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to fit the intercept for this model. If set\nto false, no intercept will be used in calculations\n(i.e. ``X`` and ``y`` are expected to be centered)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/Ridge/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._ridge.Ridge.__init__.normalize", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/Ridge/__init__/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._ridge.Ridge.__init__.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, X will be copied; else, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/Ridge/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._ridge.Ridge.__init__.max_iter", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Maximum number of iterations for conjugate gradient solver.\nFor 'sparse_cg' and 'lsqr' solvers, the default value is determined\nby scipy.sparse.linalg. For 'sag' solver, the default value is 1000." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/Ridge/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._ridge.Ridge.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "Precision of the solution." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/Ridge/__init__/solver", + "name": "solver", + "qname": "sklearn.linear_model._ridge.Ridge.__init__.solver", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}", + "default_value": "'auto'", + "description": "Solver to use in the computational routines:\n\n- 'auto' chooses the solver automatically based on the type of data.\n\n- 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n coefficients. More stable for singular matrices than 'cholesky'.\n\n- 'cholesky' uses the standard scipy.linalg.solve function to\n obtain a closed-form solution.\n\n- 'sparse_cg' uses the conjugate gradient solver as found in\n scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n more appropriate than 'cholesky' for large-scale data\n (possibility to set `tol` and `max_iter`).\n\n- 'lsqr' uses the dedicated regularized least-squares routine\n scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n procedure.\n\n- 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n its improved, unbiased version named SAGA. Both methods also use an\n iterative procedure, and are often faster than other solvers when\n both n_samples and n_features are large. Note that 'sag' and\n 'saga' fast convergence is only guaranteed on features with\n approximately the same scale. You can preprocess the data with a\n scaler from sklearn.preprocessing.\n\nAll last five solvers support both dense and sparse data. However, only\n'sag' and 'sparse_cg' supports sparse input when `fit_intercept` is\nTrue.\n\n.. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n.. versionadded:: 0.19\n SAGA solver." + }, + "type": { + "kind": "EnumType", + "values": ["svd", "sparse_cg", "cholesky", "saga", "auto", "sag", "lsqr"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/Ridge/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._ridge.Ridge.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\nSee :term:`Glossary ` for details.\n\n.. versionadded:: 0.17\n `random_state` to support Stochastic Average Gradient." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Linear least squares with l2 regularization.\n\nMinimizes the objective function::\n\n||y - Xw||^2_2 + alpha * ||w||^2_2\n\nThis model solves a regression model where the loss function is\nthe linear least squares function and regularization is given by\nthe l2-norm. Also known as Ridge Regression or Tikhonov regularization.\nThis estimator has built-in support for multi-variate regression\n(i.e., when y is a 2d-array of shape (n_samples, n_targets)).\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, alpha=1.0, *, fit_intercept=True, normalize=False,\n copy_X=True, max_iter=None, tol=1e-3, solver=\"auto\",\n random_state=None):\n super().__init__(\n alpha=alpha, fit_intercept=fit_intercept,\n normalize=normalize, copy_X=copy_X,\n max_iter=max_iter, tol=tol, solver=solver,\n random_state=random_state)" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/Ridge/fit", + "name": "fit", + "qname": "sklearn.linear_model._ridge.Ridge.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/Ridge/fit/self", + "name": "self", + "qname": "sklearn.linear_model._ridge.Ridge.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/Ridge/fit/X", + "name": "X", + "qname": "sklearn.linear_model._ridge.Ridge.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{ndarray, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/Ridge/fit/y", + "name": "y", + "qname": "sklearn.linear_model._ridge.Ridge.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "", + "description": "Target values" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,) or (n_samples, n_targets)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/Ridge/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._ridge.Ridge.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float or ndarray of shape (n_samples,)", + "default_value": "None", + "description": "Individual weights for each sample. If given a float, every sample\nwill have the same weight." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit Ridge regression model.", + "docstring": "Fit Ridge regression model.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training data\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Target values\n\nsample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\nReturns\n-------\nself : returns an instance of self.", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit Ridge regression model.\n\n Parameters\n ----------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training data\n\n y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Target values\n\n sample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n return super().fit(X, y, sample_weight=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifier/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._ridge.RidgeClassifier.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifier/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._ridge.RidgeClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifier/__init__/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._ridge.RidgeClassifier.__init__.alpha", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Regularization strength; must be a positive float. Regularization\nimproves the conditioning of the problem and reduces the variance of\nthe estimates. Larger values specify stronger regularization.\nAlpha corresponds to ``1 / (2C)`` in other linear models such as\n:class:`~sklearn.linear_model.LogisticRegression` or\n:class:`~sklearn.svm.LinearSVC`." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifier/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._ridge.RidgeClassifier.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to calculate the intercept for this model. If set to false, no\nintercept will be used in calculations (e.g. data is expected to be\nalready centered)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifier/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._ridge.RidgeClassifier.__init__.normalize", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifier/__init__/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._ridge.RidgeClassifier.__init__.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, X will be copied; else, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifier/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._ridge.RidgeClassifier.__init__.max_iter", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Maximum number of iterations for conjugate gradient solver.\nThe default value is determined by scipy.sparse.linalg." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifier/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._ridge.RidgeClassifier.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "Precision of the solution." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifier/__init__/class_weight", + "name": "class_weight", + "qname": "sklearn.linear_model._ridge.RidgeClassifier.__init__.class_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict or 'balanced'", + "default_value": "None", + "description": "Weights associated with classes in the form ``{class_label: weight}``.\nIf not given, all classes are supposed to have weight one.\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "dict" + }, + { + "kind": "NamedType", + "name": "'balanced'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifier/__init__/solver", + "name": "solver", + "qname": "sklearn.linear_model._ridge.RidgeClassifier.__init__.solver", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}", + "default_value": "'auto'", + "description": "Solver to use in the computational routines:\n\n- 'auto' chooses the solver automatically based on the type of data.\n\n- 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n coefficients. More stable for singular matrices than 'cholesky'.\n\n- 'cholesky' uses the standard scipy.linalg.solve function to\n obtain a closed-form solution.\n\n- 'sparse_cg' uses the conjugate gradient solver as found in\n scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n more appropriate than 'cholesky' for large-scale data\n (possibility to set `tol` and `max_iter`).\n\n- 'lsqr' uses the dedicated regularized least-squares routine\n scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n procedure.\n\n- 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n its unbiased and more flexible version named SAGA. Both methods\n use an iterative procedure, and are often faster than other solvers\n when both n_samples and n_features are large. Note that 'sag' and\n 'saga' fast convergence is only guaranteed on features with\n approximately the same scale. You can preprocess the data with a\n scaler from sklearn.preprocessing.\n\n .. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n .. versionadded:: 0.19\n SAGA solver." + }, + "type": { + "kind": "EnumType", + "values": ["svd", "sparse_cg", "cholesky", "saga", "auto", "sag", "lsqr"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifier/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._ridge.RidgeClassifier.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\nSee :term:`Glossary ` for details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Classifier using Ridge regression.\n\nThis classifier first converts the target values into ``{-1, 1}`` and\nthen treats the problem as a regression task (multi-output regression in\nthe multiclass case).\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, alpha=1.0, *, fit_intercept=True, normalize=False,\n copy_X=True, max_iter=None, tol=1e-3, class_weight=None,\n solver=\"auto\", random_state=None):\n super().__init__(\n alpha=alpha, fit_intercept=fit_intercept, normalize=normalize,\n copy_X=copy_X, max_iter=max_iter, tol=tol, solver=solver,\n random_state=random_state)\n self.class_weight = class_weight" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifier/classes_@getter", + "name": "classes_", + "qname": "sklearn.linear_model._ridge.RidgeClassifier.classes_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifier/classes_/self", + "name": "self", + "qname": "sklearn.linear_model._ridge.RidgeClassifier.classes_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def classes_(self):\n return self._label_binarizer.classes_" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifier/fit", + "name": "fit", + "qname": "sklearn.linear_model._ridge.RidgeClassifier.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifier/fit/self", + "name": "self", + "qname": "sklearn.linear_model._ridge.RidgeClassifier.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifier/fit/X", + "name": "X", + "qname": "sklearn.linear_model._ridge.RidgeClassifier.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{ndarray, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifier/fit/y", + "name": "y", + "qname": "sklearn.linear_model._ridge.RidgeClassifier.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifier/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._ridge.RidgeClassifier.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float or ndarray of shape (n_samples,)", + "default_value": "None", + "description": "Individual weights for each sample. If given a float, every sample\nwill have the same weight.\n\n.. versionadded:: 0.17\n *sample_weight* support to Classifier." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit Ridge classifier model.", + "docstring": "Fit Ridge classifier model.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : ndarray of shape (n_samples,)\n Target values.\n\nsample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\n .. versionadded:: 0.17\n *sample_weight* support to Classifier.\n\nReturns\n-------\nself : object\n Instance of the estimator.", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit Ridge classifier model.\n\n Parameters\n ----------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\n y : ndarray of shape (n_samples,)\n Target values.\n\n sample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\n .. versionadded:: 0.17\n *sample_weight* support to Classifier.\n\n Returns\n -------\n self : object\n Instance of the estimator.\n \"\"\"\n _accept_sparse = _get_valid_accept_sparse(sparse.issparse(X),\n self.solver)\n X, y = self._validate_data(X, y, accept_sparse=_accept_sparse,\n multi_output=True, y_numeric=False)\n sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n self._label_binarizer = LabelBinarizer(pos_label=1, neg_label=-1)\n Y = self._label_binarizer.fit_transform(y)\n if not self._label_binarizer.y_type_.startswith('multilabel'):\n y = column_or_1d(y, warn=True)\n else:\n # we don't (yet) support multi-label classification in Ridge\n raise ValueError(\n \"%s doesn't support multi-label classification\" % (\n self.__class__.__name__))\n\n if self.class_weight:\n # modify the sample weights with the corresponding class weight\n sample_weight = (sample_weight *\n compute_sample_weight(self.class_weight, y))\n\n super().fit(X, Y, sample_weight=sample_weight)\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifierCV/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._ridge.RidgeClassifierCV.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifierCV/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._ridge.RidgeClassifierCV.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifierCV/__init__/alphas", + "name": "alphas", + "qname": "sklearn.linear_model._ridge.RidgeClassifierCV.__init__.alphas", + "default_value": "(0.1, 1.0, 10.0)", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_alphas,)", + "default_value": "(0.1, 1.0, 10.0)", + "description": "Array of alpha values to try.\nRegularization strength; must be a positive float. Regularization\nimproves the conditioning of the problem and reduces the variance of\nthe estimates. Larger values specify stronger regularization.\nAlpha corresponds to ``1 / (2C)`` in other linear models such as\n:class:`~sklearn.linear_model.LogisticRegression` or\n:class:`~sklearn.svm.LinearSVC`." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_alphas,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifierCV/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._ridge.RidgeClassifierCV.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to calculate the intercept for this model. If set\nto false, no intercept will be used in calculations\n(i.e. data is expected to be centered)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifierCV/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._ridge.RidgeClassifierCV.__init__.normalize", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifierCV/__init__/scoring", + "name": "scoring", + "qname": "sklearn.linear_model._ridge.RidgeClassifierCV.__init__.scoring", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "string, callable", + "default_value": "None", + "description": "A string (see model evaluation documentation) or\na scorer callable object / function with signature\n``scorer(estimator, X, y)``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "string" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifierCV/__init__/cv", + "name": "cv", + "qname": "sklearn.linear_model._ridge.RidgeClassifierCV.__init__.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, cross-validation generator or an iterable", + "default_value": "None", + "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the efficient Leave-One-Out cross-validation\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nRefer :ref:`User Guide ` for the various\ncross-validation strategies that can be used here." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "an iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifierCV/__init__/class_weight", + "name": "class_weight", + "qname": "sklearn.linear_model._ridge.RidgeClassifierCV.__init__.class_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict or 'balanced'", + "default_value": "None", + "description": "Weights associated with classes in the form ``{class_label: weight}``.\nIf not given, all classes are supposed to have weight one.\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "dict" + }, + { + "kind": "NamedType", + "name": "'balanced'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifierCV/__init__/store_cv_values", + "name": "store_cv_values", + "qname": "sklearn.linear_model._ridge.RidgeClassifierCV.__init__.store_cv_values", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Flag indicating if the cross-validation values corresponding to\neach alpha should be stored in the ``cv_values_`` attribute (see\nbelow). This flag is only compatible with ``cv=None`` (i.e. using\nLeave-One-Out Cross-Validation)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Ridge classifier with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nBy default, it performs Leave-One-Out Cross-Validation. Currently,\nonly the n_features > n_samples case is handled efficiently.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, alphas=(0.1, 1.0, 10.0), *, fit_intercept=True,\n normalize=False, scoring=None, cv=None, class_weight=None,\n store_cv_values=False):\n super().__init__(\n alphas=alphas, fit_intercept=fit_intercept, normalize=normalize,\n scoring=scoring, cv=cv, store_cv_values=store_cv_values)\n self.class_weight = class_weight" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifierCV/_more_tags", + "name": "_more_tags", + "qname": "sklearn.linear_model._ridge.RidgeClassifierCV._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifierCV/_more_tags/self", + "name": "self", + "qname": "sklearn.linear_model._ridge.RidgeClassifierCV._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifierCV/classes_@getter", + "name": "classes_", + "qname": "sklearn.linear_model._ridge.RidgeClassifierCV.classes_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifierCV/classes_/self", + "name": "self", + "qname": "sklearn.linear_model._ridge.RidgeClassifierCV.classes_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def classes_(self):\n return self._label_binarizer.classes_" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifierCV/fit", + "name": "fit", + "qname": "sklearn.linear_model._ridge.RidgeClassifierCV.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifierCV/fit/self", + "name": "self", + "qname": "sklearn.linear_model._ridge.RidgeClassifierCV.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifierCV/fit/X", + "name": "X", + "qname": "sklearn.linear_model._ridge.RidgeClassifierCV.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vectors, where n_samples is the number of samples\nand n_features is the number of features. When using GCV,\nwill be cast to float64 if necessary." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifierCV/fit/y", + "name": "y", + "qname": "sklearn.linear_model._ridge.RidgeClassifierCV.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Target values. Will be cast to X's dtype if necessary." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/RidgeClassifierCV/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._ridge.RidgeClassifierCV.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float or ndarray of shape (n_samples,)", + "default_value": "None", + "description": "Individual weights for each sample. If given a float, every sample\nwill have the same weight." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit Ridge classifier with cv.", + "docstring": "Fit Ridge classifier with cv.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples\n and n_features is the number of features. When using GCV,\n will be cast to float64 if necessary.\n\ny : ndarray of shape (n_samples,)\n Target values. Will be cast to X's dtype if necessary.\n\nsample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit Ridge classifier with cv.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples\n and n_features is the number of features. When using GCV,\n will be cast to float64 if necessary.\n\n y : ndarray of shape (n_samples,)\n Target values. Will be cast to X's dtype if necessary.\n\n sample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\n Returns\n -------\n self : object\n \"\"\"\n X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc', 'coo'],\n multi_output=True, y_numeric=False)\n sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n self._label_binarizer = LabelBinarizer(pos_label=1, neg_label=-1)\n Y = self._label_binarizer.fit_transform(y)\n if not self._label_binarizer.y_type_.startswith('multilabel'):\n y = column_or_1d(y, warn=True)\n\n if self.class_weight:\n # modify the sample weights with the corresponding class weight\n sample_weight = (sample_weight *\n compute_sample_weight(self.class_weight, y))\n\n target = Y if self.cv is None else y\n _BaseRidgeCV.fit(self, X, target, sample_weight=sample_weight)\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidge/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._ridge._BaseRidge.__init__", + "decorators": ["abstractmethod", "_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidge/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._BaseRidge.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidge/__init__/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._ridge._BaseRidge.__init__.alpha", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidge/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._ridge._BaseRidge.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidge/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._ridge._BaseRidge.__init__.normalize", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidge/__init__/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._ridge._BaseRidge.__init__.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidge/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._ridge._BaseRidge.__init__.max_iter", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidge/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._ridge._BaseRidge.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidge/__init__/solver", + "name": "solver", + "qname": "sklearn.linear_model._ridge._BaseRidge.__init__.solver", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidge/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._ridge._BaseRidge.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @abstractmethod\n @_deprecate_positional_args\n def __init__(self, alpha=1.0, *, fit_intercept=True, normalize=False,\n copy_X=True, max_iter=None, tol=1e-3, solver=\"auto\",\n random_state=None):\n self.alpha = alpha\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.copy_X = copy_X\n self.max_iter = max_iter\n self.tol = tol\n self.solver = solver\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidge/fit", + "name": "fit", + "qname": "sklearn.linear_model._ridge._BaseRidge.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidge/fit/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._BaseRidge.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidge/fit/X", + "name": "X", + "qname": "sklearn.linear_model._ridge._BaseRidge.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidge/fit/y", + "name": "y", + "qname": "sklearn.linear_model._ridge._BaseRidge.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidge/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._ridge._BaseRidge.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def fit(self, X, y, sample_weight=None):\n\n # all other solvers work at both float precision levels\n _dtype = [np.float64, np.float32]\n _accept_sparse = _get_valid_accept_sparse(sparse.issparse(X),\n self.solver)\n X, y = self._validate_data(X, y,\n accept_sparse=_accept_sparse,\n dtype=_dtype,\n multi_output=True, y_numeric=True)\n if sparse.issparse(X) and self.fit_intercept:\n if self.solver not in ['auto', 'sparse_cg', 'sag']:\n raise ValueError(\n \"solver='{}' does not support fitting the intercept \"\n \"on sparse data. Please set the solver to 'auto' or \"\n \"'sparse_cg', 'sag', or set `fit_intercept=False`\"\n .format(self.solver))\n if (self.solver == 'sag' and self.max_iter is None and\n self.tol > 1e-4):\n warnings.warn(\n '\"sag\" solver requires many iterations to fit '\n 'an intercept with sparse inputs. Either set the '\n 'solver to \"auto\" or \"sparse_cg\", or set a low '\n '\"tol\" and a high \"max_iter\" (especially if inputs are '\n 'not standardized).')\n solver = 'sag'\n else:\n solver = 'sparse_cg'\n else:\n solver = self.solver\n\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X,\n dtype=X.dtype)\n\n # when X is sparse we only remove offset from y\n X, y, X_offset, y_offset, X_scale = self._preprocess_data(\n X, y, self.fit_intercept, self.normalize, self.copy_X,\n sample_weight=sample_weight, return_mean=True)\n\n if solver == 'sag' and sparse.issparse(X) and self.fit_intercept:\n self.coef_, self.n_iter_, self.intercept_ = _ridge_regression(\n X, y, alpha=self.alpha, sample_weight=sample_weight,\n max_iter=self.max_iter, tol=self.tol, solver='sag',\n random_state=self.random_state, return_n_iter=True,\n return_intercept=True, check_input=False)\n # add the offset which was subtracted by _preprocess_data\n self.intercept_ += y_offset\n\n else:\n if sparse.issparse(X) and self.fit_intercept:\n # required to fit intercept with sparse_cg solver\n params = {'X_offset': X_offset, 'X_scale': X_scale}\n else:\n # for dense matrices or when intercept is set to 0\n params = {}\n\n self.coef_, self.n_iter_ = _ridge_regression(\n X, y, alpha=self.alpha, sample_weight=sample_weight,\n max_iter=self.max_iter, tol=self.tol, solver=solver,\n random_state=self.random_state, return_n_iter=True,\n return_intercept=False, check_input=False, **params)\n self._set_intercept(X_offset, y_offset, X_scale)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidgeCV/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._ridge._BaseRidgeCV.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidgeCV/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._BaseRidgeCV.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidgeCV/__init__/alphas", + "name": "alphas", + "qname": "sklearn.linear_model._ridge._BaseRidgeCV.__init__.alphas", + "default_value": "(0.1, 1.0, 10.0)", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidgeCV/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._ridge._BaseRidgeCV.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidgeCV/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._ridge._BaseRidgeCV.__init__.normalize", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidgeCV/__init__/scoring", + "name": "scoring", + "qname": "sklearn.linear_model._ridge._BaseRidgeCV.__init__.scoring", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidgeCV/__init__/cv", + "name": "cv", + "qname": "sklearn.linear_model._ridge._BaseRidgeCV.__init__.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidgeCV/__init__/gcv_mode", + "name": "gcv_mode", + "qname": "sklearn.linear_model._ridge._BaseRidgeCV.__init__.gcv_mode", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidgeCV/__init__/store_cv_values", + "name": "store_cv_values", + "qname": "sklearn.linear_model._ridge._BaseRidgeCV.__init__.store_cv_values", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidgeCV/__init__/alpha_per_target", + "name": "alpha_per_target", + "qname": "sklearn.linear_model._ridge._BaseRidgeCV.__init__.alpha_per_target", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, alphas=(0.1, 1.0, 10.0), *,\n fit_intercept=True, normalize=False, scoring=None,\n cv=None, gcv_mode=None, store_cv_values=False,\n alpha_per_target=False):\n self.alphas = np.asarray(alphas)\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.scoring = scoring\n self.cv = cv\n self.gcv_mode = gcv_mode\n self.store_cv_values = store_cv_values\n self.alpha_per_target = alpha_per_target" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidgeCV/fit", + "name": "fit", + "qname": "sklearn.linear_model._ridge._BaseRidgeCV.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidgeCV/fit/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._BaseRidgeCV.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidgeCV/fit/X", + "name": "X", + "qname": "sklearn.linear_model._ridge._BaseRidgeCV.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data. If using GCV, will be cast to float64\nif necessary." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidgeCV/fit/y", + "name": "y", + "qname": "sklearn.linear_model._ridge._BaseRidgeCV.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "", + "description": "Target values. Will be cast to X's dtype if necessary." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,) or (n_samples, n_targets)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_BaseRidgeCV/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._ridge._BaseRidgeCV.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float or ndarray of shape (n_samples,)", + "default_value": "None", + "description": "Individual weights for each sample. If given a float, every sample\nwill have the same weight." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit Ridge regression model with cv.", + "docstring": "Fit Ridge regression model with cv.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Training data. If using GCV, will be cast to float64\n if necessary.\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Target values. Will be cast to X's dtype if necessary.\n\nsample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\nReturns\n-------\nself : object\n\nNotes\n-----\nWhen sample_weight is provided, the selected hyperparameter may depend\non whether we use leave-one-out cross-validation (cv=None or cv='auto')\nor another form of cross-validation, because only leave-one-out\ncross-validation takes the sample weights into account when computing\nthe validation score.", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit Ridge regression model with cv.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Training data. If using GCV, will be cast to float64\n if necessary.\n\n y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Target values. Will be cast to X's dtype if necessary.\n\n sample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\n Returns\n -------\n self : object\n\n Notes\n -----\n When sample_weight is provided, the selected hyperparameter may depend\n on whether we use leave-one-out cross-validation (cv=None or cv='auto')\n or another form of cross-validation, because only leave-one-out\n cross-validation takes the sample weights into account when computing\n the validation score.\n \"\"\"\n cv = self.cv\n if cv is None:\n estimator = _RidgeGCV(self.alphas,\n fit_intercept=self.fit_intercept,\n normalize=self.normalize,\n scoring=self.scoring,\n gcv_mode=self.gcv_mode,\n store_cv_values=self.store_cv_values,\n is_clf=is_classifier(self),\n alpha_per_target=self.alpha_per_target)\n estimator.fit(X, y, sample_weight=sample_weight)\n self.alpha_ = estimator.alpha_\n self.best_score_ = estimator.best_score_\n if self.store_cv_values:\n self.cv_values_ = estimator.cv_values_\n else:\n if self.store_cv_values:\n raise ValueError(\"cv!=None and store_cv_values=True\"\n \" are incompatible\")\n if self.alpha_per_target:\n raise ValueError(\"cv!=None and alpha_per_target=True\"\n \" are incompatible\")\n parameters = {'alpha': self.alphas}\n solver = 'sparse_cg' if sparse.issparse(X) else 'auto'\n model = RidgeClassifier if is_classifier(self) else Ridge\n gs = GridSearchCV(model(fit_intercept=self.fit_intercept,\n normalize=self.normalize,\n solver=solver),\n parameters, cv=cv, scoring=self.scoring)\n gs.fit(X, y, sample_weight=sample_weight)\n estimator = gs.best_estimator_\n self.alpha_ = gs.best_estimator_.alpha\n self.best_score_ = gs.best_score_\n\n self.coef_ = estimator.coef_\n self.intercept_ = estimator.intercept_\n self.n_features_in_ = estimator.n_features_in_\n\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_IdentityClassifier/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._ridge._IdentityClassifier.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_IdentityClassifier/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._IdentityClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_IdentityClassifier/__init__/classes", + "name": "classes", + "qname": "sklearn.linear_model._ridge._IdentityClassifier.__init__.classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fake classifier which will directly output the prediction.\n\nWe inherit from LinearClassifierMixin to get the proper shape for the\noutput `y`.", + "docstring": "", + "code": " def __init__(self, classes):\n self.classes_ = classes" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_IdentityClassifier/decision_function", + "name": "decision_function", + "qname": "sklearn.linear_model._ridge._IdentityClassifier.decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_IdentityClassifier/decision_function/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._IdentityClassifier.decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_IdentityClassifier/decision_function/y_predict", + "name": "y_predict", + "qname": "sklearn.linear_model._ridge._IdentityClassifier.decision_function.y_predict", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def decision_function(self, y_predict):\n return y_predict" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_IdentityRegressor/decision_function", + "name": "decision_function", + "qname": "sklearn.linear_model._ridge._IdentityRegressor.decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_IdentityRegressor/decision_function/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._IdentityRegressor.decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_IdentityRegressor/decision_function/y_predict", + "name": "y_predict", + "qname": "sklearn.linear_model._ridge._IdentityRegressor.decision_function.y_predict", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def decision_function(self, y_predict):\n return y_predict" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_IdentityRegressor/predict", + "name": "predict", + "qname": "sklearn.linear_model._ridge._IdentityRegressor.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_IdentityRegressor/predict/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._IdentityRegressor.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_IdentityRegressor/predict/y_predict", + "name": "y_predict", + "qname": "sklearn.linear_model._ridge._IdentityRegressor.predict.y_predict", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def predict(self, y_predict):\n return y_predict" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._ridge._RidgeGCV.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._RidgeGCV.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/__init__/alphas", + "name": "alphas", + "qname": "sklearn.linear_model._ridge._RidgeGCV.__init__.alphas", + "default_value": "(0.1, 1.0, 10.0)", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._ridge._RidgeGCV.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/__init__/normalize", + "name": "normalize", + "qname": "sklearn.linear_model._ridge._RidgeGCV.__init__.normalize", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/__init__/scoring", + "name": "scoring", + "qname": "sklearn.linear_model._ridge._RidgeGCV.__init__.scoring", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/__init__/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._ridge._RidgeGCV.__init__.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/__init__/gcv_mode", + "name": "gcv_mode", + "qname": "sklearn.linear_model._ridge._RidgeGCV.__init__.gcv_mode", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/__init__/store_cv_values", + "name": "store_cv_values", + "qname": "sklearn.linear_model._ridge._RidgeGCV.__init__.store_cv_values", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/__init__/is_clf", + "name": "is_clf", + "qname": "sklearn.linear_model._ridge._RidgeGCV.__init__.is_clf", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/__init__/alpha_per_target", + "name": "alpha_per_target", + "qname": "sklearn.linear_model._ridge._RidgeGCV.__init__.alpha_per_target", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Ridge regression with built-in Leave-one-out Cross-Validation.\n\nThis class is not intended to be used directly. Use RidgeCV instead.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, alphas=(0.1, 1.0, 10.0), *,\n fit_intercept=True, normalize=False,\n scoring=None, copy_X=True,\n gcv_mode=None, store_cv_values=False,\n is_clf=False, alpha_per_target=False):\n self.alphas = np.asarray(alphas)\n self.fit_intercept = fit_intercept\n self.normalize = normalize\n self.scoring = scoring\n self.copy_X = copy_X\n self.gcv_mode = gcv_mode\n self.store_cv_values = store_cv_values\n self.is_clf = is_clf\n self.alpha_per_target = alpha_per_target" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_compute_covariance", + "name": "_compute_covariance", + "qname": "sklearn.linear_model._ridge._RidgeGCV._compute_covariance", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_compute_covariance/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._RidgeGCV._compute_covariance.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_compute_covariance/X", + "name": "X", + "qname": "sklearn.linear_model._ridge._RidgeGCV._compute_covariance.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "sparse matrix of shape (n_samples, n_features)", + "default_value": "", + "description": "The preprocessed design matrix." + }, + "type": { + "kind": "NamedType", + "name": "sparse matrix of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_compute_covariance/sqrt_sw", + "name": "sqrt_sw", + "qname": "sklearn.linear_model._ridge._RidgeGCV._compute_covariance.sqrt_sw", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "square roots of sample weights" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes covariance matrix X^TX with possible centering.", + "docstring": "Computes covariance matrix X^TX with possible centering.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n The preprocessed design matrix.\n\nsqrt_sw : ndarray of shape (n_samples,)\n square roots of sample weights\n\nReturns\n-------\ncovariance : ndarray of shape (n_features, n_features)\n The covariance matrix.\nX_mean : ndarray of shape (n_feature,)\n The weighted mean of ``X`` for each feature.\n\nNotes\n-----\nSince X is sparse it has not been centered in preprocessing, but it has\nbeen scaled by sqrt(sample weights).\n\nWhen self.fit_intercept is False no centering is done.\n\nThe centered X is never actually computed because centering would break\nthe sparsity of X.", + "code": " def _compute_covariance(self, X, sqrt_sw):\n \"\"\"Computes covariance matrix X^TX with possible centering.\n\n Parameters\n ----------\n X : sparse matrix of shape (n_samples, n_features)\n The preprocessed design matrix.\n\n sqrt_sw : ndarray of shape (n_samples,)\n square roots of sample weights\n\n Returns\n -------\n covariance : ndarray of shape (n_features, n_features)\n The covariance matrix.\n X_mean : ndarray of shape (n_feature,)\n The weighted mean of ``X`` for each feature.\n\n Notes\n -----\n Since X is sparse it has not been centered in preprocessing, but it has\n been scaled by sqrt(sample weights).\n\n When self.fit_intercept is False no centering is done.\n\n The centered X is never actually computed because centering would break\n the sparsity of X.\n \"\"\"\n if not self.fit_intercept:\n # in this case centering has been done in preprocessing\n # or we are not fitting an intercept.\n X_mean = np.zeros(X.shape[1], dtype=X.dtype)\n return safe_sparse_dot(X.T, X, dense_output=True), X_mean\n # this function only gets called for sparse X\n n_samples = X.shape[0]\n sample_weight_matrix = sparse.dia_matrix(\n (sqrt_sw, 0), shape=(n_samples, n_samples))\n X_weighted = sample_weight_matrix.dot(X)\n X_mean, _ = mean_variance_axis(X_weighted, axis=0)\n X_mean = X_mean * n_samples / sqrt_sw.dot(sqrt_sw)\n weight_sum = sqrt_sw.dot(sqrt_sw)\n return (safe_sparse_dot(X.T, X, dense_output=True) -\n weight_sum * np.outer(X_mean, X_mean),\n X_mean)" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_compute_gram", + "name": "_compute_gram", + "qname": "sklearn.linear_model._ridge._RidgeGCV._compute_gram", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_compute_gram/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._RidgeGCV._compute_gram.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_compute_gram/X", + "name": "X", + "qname": "sklearn.linear_model._ridge._RidgeGCV._compute_gram.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{ndarray, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The preprocessed design matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_compute_gram/sqrt_sw", + "name": "sqrt_sw", + "qname": "sklearn.linear_model._ridge._RidgeGCV._compute_gram.sqrt_sw", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "square roots of sample weights" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes the Gram matrix XX^T with possible centering.", + "docstring": "Computes the Gram matrix XX^T with possible centering.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The preprocessed design matrix.\n\nsqrt_sw : ndarray of shape (n_samples,)\n square roots of sample weights\n\nReturns\n-------\ngram : ndarray of shape (n_samples, n_samples)\n The Gram matrix.\nX_mean : ndarray of shape (n_feature,)\n The weighted mean of ``X`` for each feature.\n\nNotes\n-----\nWhen X is dense the centering has been done in preprocessing\nso the mean is 0 and we just compute XX^T.\n\nWhen X is sparse it has not been centered in preprocessing, but it has\nbeen scaled by sqrt(sample weights).\n\nWhen self.fit_intercept is False no centering is done.\n\nThe centered X is never actually computed because centering would break\nthe sparsity of X.", + "code": " def _compute_gram(self, X, sqrt_sw):\n \"\"\"Computes the Gram matrix XX^T with possible centering.\n\n Parameters\n ----------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The preprocessed design matrix.\n\n sqrt_sw : ndarray of shape (n_samples,)\n square roots of sample weights\n\n Returns\n -------\n gram : ndarray of shape (n_samples, n_samples)\n The Gram matrix.\n X_mean : ndarray of shape (n_feature,)\n The weighted mean of ``X`` for each feature.\n\n Notes\n -----\n When X is dense the centering has been done in preprocessing\n so the mean is 0 and we just compute XX^T.\n\n When X is sparse it has not been centered in preprocessing, but it has\n been scaled by sqrt(sample weights).\n\n When self.fit_intercept is False no centering is done.\n\n The centered X is never actually computed because centering would break\n the sparsity of X.\n \"\"\"\n center = self.fit_intercept and sparse.issparse(X)\n if not center:\n # in this case centering has been done in preprocessing\n # or we are not fitting an intercept.\n X_mean = np.zeros(X.shape[1], dtype=X.dtype)\n return safe_sparse_dot(X, X.T, dense_output=True), X_mean\n # X is sparse\n n_samples = X.shape[0]\n sample_weight_matrix = sparse.dia_matrix(\n (sqrt_sw, 0), shape=(n_samples, n_samples))\n X_weighted = sample_weight_matrix.dot(X)\n X_mean, _ = mean_variance_axis(X_weighted, axis=0)\n X_mean *= n_samples / sqrt_sw.dot(sqrt_sw)\n X_mX = sqrt_sw[:, None] * safe_sparse_dot(\n X_mean, X.T, dense_output=True)\n X_mX_m = np.outer(sqrt_sw, sqrt_sw) * np.dot(X_mean, X_mean)\n return (safe_sparse_dot(X, X.T, dense_output=True) + X_mX_m\n - X_mX - X_mX.T, X_mean)" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_decomp_diag", + "name": "_decomp_diag", + "qname": "sklearn.linear_model._ridge._RidgeGCV._decomp_diag", + "decorators": ["staticmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_decomp_diag/v_prime", + "name": "v_prime", + "qname": "sklearn.linear_model._ridge._RidgeGCV._decomp_diag.v_prime", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_decomp_diag/Q", + "name": "Q", + "qname": "sklearn.linear_model._ridge._RidgeGCV._decomp_diag.Q", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @staticmethod\n def _decomp_diag(v_prime, Q):\n # compute diagonal of the matrix: dot(Q, dot(diag(v_prime), Q^T))\n return (v_prime * Q ** 2).sum(axis=-1)" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_diag_dot", + "name": "_diag_dot", + "qname": "sklearn.linear_model._ridge._RidgeGCV._diag_dot", + "decorators": ["staticmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_diag_dot/D", + "name": "D", + "qname": "sklearn.linear_model._ridge._RidgeGCV._diag_dot.D", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_diag_dot/B", + "name": "B", + "qname": "sklearn.linear_model._ridge._RidgeGCV._diag_dot.B", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @staticmethod\n def _diag_dot(D, B):\n # compute dot(diag(D), B)\n if len(B.shape) > 1:\n # handle case where B is > 1-d\n D = D[(slice(None), ) + (np.newaxis, ) * (len(B.shape) - 1)]\n return D * B" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_eigen_decompose_covariance", + "name": "_eigen_decompose_covariance", + "qname": "sklearn.linear_model._ridge._RidgeGCV._eigen_decompose_covariance", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_eigen_decompose_covariance/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._RidgeGCV._eigen_decompose_covariance.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_eigen_decompose_covariance/X", + "name": "X", + "qname": "sklearn.linear_model._ridge._RidgeGCV._eigen_decompose_covariance.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_eigen_decompose_covariance/y", + "name": "y", + "qname": "sklearn.linear_model._ridge._RidgeGCV._eigen_decompose_covariance.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_eigen_decompose_covariance/sqrt_sw", + "name": "sqrt_sw", + "qname": "sklearn.linear_model._ridge._RidgeGCV._eigen_decompose_covariance.sqrt_sw", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Eigendecomposition of X^T.X, used when n_samples > n_features\nand X is sparse.", + "docstring": "Eigendecomposition of X^T.X, used when n_samples > n_features\nand X is sparse.", + "code": " def _eigen_decompose_covariance(self, X, y, sqrt_sw):\n \"\"\"Eigendecomposition of X^T.X, used when n_samples > n_features\n and X is sparse.\n \"\"\"\n n_samples, n_features = X.shape\n cov = np.empty((n_features + 1, n_features + 1), dtype=X.dtype)\n cov[:-1, :-1], X_mean = self._compute_covariance(X, sqrt_sw)\n if not self.fit_intercept:\n cov = cov[:-1, :-1]\n # to emulate centering X with sample weights,\n # ie removing the weighted average, we add a column\n # containing the square roots of the sample weights.\n # by centering, it is orthogonal to the other columns\n # when all samples have the same weight we add a column of 1\n else:\n cov[-1] = 0\n cov[:, -1] = 0\n cov[-1, -1] = sqrt_sw.dot(sqrt_sw)\n nullspace_dim = max(0, n_features - n_samples)\n eigvals, V = linalg.eigh(cov)\n # remove eigenvalues and vectors in the null space of X^T.X\n eigvals = eigvals[nullspace_dim:]\n V = V[:, nullspace_dim:]\n return X_mean, eigvals, V, X" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_eigen_decompose_gram", + "name": "_eigen_decompose_gram", + "qname": "sklearn.linear_model._ridge._RidgeGCV._eigen_decompose_gram", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_eigen_decompose_gram/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._RidgeGCV._eigen_decompose_gram.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_eigen_decompose_gram/X", + "name": "X", + "qname": "sklearn.linear_model._ridge._RidgeGCV._eigen_decompose_gram.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_eigen_decompose_gram/y", + "name": "y", + "qname": "sklearn.linear_model._ridge._RidgeGCV._eigen_decompose_gram.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_eigen_decompose_gram/sqrt_sw", + "name": "sqrt_sw", + "qname": "sklearn.linear_model._ridge._RidgeGCV._eigen_decompose_gram.sqrt_sw", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Eigendecomposition of X.X^T, used when n_samples <= n_features.", + "docstring": "Eigendecomposition of X.X^T, used when n_samples <= n_features.", + "code": " def _eigen_decompose_gram(self, X, y, sqrt_sw):\n \"\"\"Eigendecomposition of X.X^T, used when n_samples <= n_features.\"\"\"\n # if X is dense it has already been centered in preprocessing\n K, X_mean = self._compute_gram(X, sqrt_sw)\n if self.fit_intercept:\n # to emulate centering X with sample weights,\n # ie removing the weighted average, we add a column\n # containing the square roots of the sample weights.\n # by centering, it is orthogonal to the other columns\n K += np.outer(sqrt_sw, sqrt_sw)\n eigvals, Q = linalg.eigh(K)\n QT_y = np.dot(Q.T, y)\n return X_mean, eigvals, Q, QT_y" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance", + "name": "_solve_eigen_covariance", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance/y", + "name": "y", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance/sqrt_sw", + "name": "sqrt_sw", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance.sqrt_sw", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance/X_mean", + "name": "X_mean", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance.X_mean", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance/eigvals", + "name": "eigvals", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance.eigvals", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance/V", + "name": "V", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance.V", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance/X", + "name": "X", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute dual coefficients and diagonal of G^-1.\n\nUsed when we have a decomposition of X^T.X\n(n_samples > n_features and X is sparse).", + "docstring": "Compute dual coefficients and diagonal of G^-1.\n\nUsed when we have a decomposition of X^T.X\n(n_samples > n_features and X is sparse).", + "code": " def _solve_eigen_covariance(\n self, alpha, y, sqrt_sw, X_mean, eigvals, V, X):\n \"\"\"Compute dual coefficients and diagonal of G^-1.\n\n Used when we have a decomposition of X^T.X\n (n_samples > n_features and X is sparse).\n \"\"\"\n if self.fit_intercept:\n return self._solve_eigen_covariance_intercept(\n alpha, y, sqrt_sw, X_mean, eigvals, V, X)\n return self._solve_eigen_covariance_no_intercept(\n alpha, y, sqrt_sw, X_mean, eigvals, V, X)" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance_intercept", + "name": "_solve_eigen_covariance_intercept", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance_intercept", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance_intercept/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance_intercept.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance_intercept/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance_intercept.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance_intercept/y", + "name": "y", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance_intercept.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance_intercept/sqrt_sw", + "name": "sqrt_sw", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance_intercept.sqrt_sw", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance_intercept/X_mean", + "name": "X_mean", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance_intercept.X_mean", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance_intercept/eigvals", + "name": "eigvals", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance_intercept.eigvals", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance_intercept/V", + "name": "V", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance_intercept.V", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance_intercept/X", + "name": "X", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance_intercept.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute dual coefficients and diagonal of G^-1.\n\nUsed when we have a decomposition of X^T.X\n(n_samples > n_features and X is sparse),\nand we are fitting an intercept.", + "docstring": "Compute dual coefficients and diagonal of G^-1.\n\nUsed when we have a decomposition of X^T.X\n(n_samples > n_features and X is sparse),\nand we are fitting an intercept.", + "code": " def _solve_eigen_covariance_intercept(\n self, alpha, y, sqrt_sw, X_mean, eigvals, V, X):\n \"\"\"Compute dual coefficients and diagonal of G^-1.\n\n Used when we have a decomposition of X^T.X\n (n_samples > n_features and X is sparse),\n and we are fitting an intercept.\n \"\"\"\n # the vector [0, 0, ..., 0, 1]\n # is the eigenvector of X^TX which\n # corresponds to the intercept; we cancel the regularization on\n # this dimension. the corresponding eigenvalue is\n # sum(sample_weight), e.g. n when uniform sample weights.\n intercept_sv = np.zeros(V.shape[0])\n intercept_sv[-1] = 1\n intercept_dim = _find_smallest_angle(intercept_sv, V)\n w = 1 / (eigvals + alpha)\n w[intercept_dim] = 1 / eigvals[intercept_dim]\n A = (V * w).dot(V.T)\n # add a column to X containing the square roots of sample weights\n X_op = _X_CenterStackOp(X, X_mean, sqrt_sw)\n AXy = A.dot(X_op.T.dot(y))\n y_hat = X_op.dot(AXy)\n hat_diag = self._sparse_multidot_diag(X, A, X_mean, sqrt_sw)\n # return (1 - hat_diag), (y - y_hat)\n if len(y.shape) != 1:\n # handle case where y is 2-d\n hat_diag = hat_diag[:, np.newaxis]\n return (1 - hat_diag) / alpha, (y - y_hat) / alpha" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance_no_intercept", + "name": "_solve_eigen_covariance_no_intercept", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance_no_intercept", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance_no_intercept/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance_no_intercept.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance_no_intercept/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance_no_intercept.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance_no_intercept/y", + "name": "y", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance_no_intercept.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance_no_intercept/sqrt_sw", + "name": "sqrt_sw", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance_no_intercept.sqrt_sw", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance_no_intercept/X_mean", + "name": "X_mean", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance_no_intercept.X_mean", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance_no_intercept/eigvals", + "name": "eigvals", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance_no_intercept.eigvals", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance_no_intercept/V", + "name": "V", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance_no_intercept.V", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_covariance_no_intercept/X", + "name": "X", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_covariance_no_intercept.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute dual coefficients and diagonal of G^-1.\n\nUsed when we have a decomposition of X^T.X\n(n_samples > n_features and X is sparse), and not fitting an intercept.", + "docstring": "Compute dual coefficients and diagonal of G^-1.\n\nUsed when we have a decomposition of X^T.X\n(n_samples > n_features and X is sparse), and not fitting an intercept.", + "code": " def _solve_eigen_covariance_no_intercept(\n self, alpha, y, sqrt_sw, X_mean, eigvals, V, X):\n \"\"\"Compute dual coefficients and diagonal of G^-1.\n\n Used when we have a decomposition of X^T.X\n (n_samples > n_features and X is sparse), and not fitting an intercept.\n \"\"\"\n w = 1 / (eigvals + alpha)\n A = (V * w).dot(V.T)\n AXy = A.dot(safe_sparse_dot(X.T, y, dense_output=True))\n y_hat = safe_sparse_dot(X, AXy, dense_output=True)\n hat_diag = self._sparse_multidot_diag(X, A, X_mean, sqrt_sw)\n if len(y.shape) != 1:\n # handle case where y is 2-d\n hat_diag = hat_diag[:, np.newaxis]\n return (1 - hat_diag) / alpha, (y - y_hat) / alpha" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_gram", + "name": "_solve_eigen_gram", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_gram", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_gram/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_gram.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_gram/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_gram.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_gram/y", + "name": "y", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_gram.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_gram/sqrt_sw", + "name": "sqrt_sw", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_gram.sqrt_sw", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_gram/X_mean", + "name": "X_mean", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_gram.X_mean", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_gram/eigvals", + "name": "eigvals", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_gram.eigvals", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_gram/Q", + "name": "Q", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_gram.Q", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_eigen_gram/QT_y", + "name": "QT_y", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_eigen_gram.QT_y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute dual coefficients and diagonal of G^-1.\n\nUsed when we have a decomposition of X.X^T (n_samples <= n_features).", + "docstring": "Compute dual coefficients and diagonal of G^-1.\n\nUsed when we have a decomposition of X.X^T (n_samples <= n_features).", + "code": " def _solve_eigen_gram(self, alpha, y, sqrt_sw, X_mean, eigvals, Q, QT_y):\n \"\"\"Compute dual coefficients and diagonal of G^-1.\n\n Used when we have a decomposition of X.X^T (n_samples <= n_features).\n \"\"\"\n w = 1. / (eigvals + alpha)\n if self.fit_intercept:\n # the vector containing the square roots of the sample weights (1\n # when no sample weights) is the eigenvector of XX^T which\n # corresponds to the intercept; we cancel the regularization on\n # this dimension. the corresponding eigenvalue is\n # sum(sample_weight).\n normalized_sw = sqrt_sw / np.linalg.norm(sqrt_sw)\n intercept_dim = _find_smallest_angle(normalized_sw, Q)\n w[intercept_dim] = 0 # cancel regularization for the intercept\n\n c = np.dot(Q, self._diag_dot(w, QT_y))\n G_inverse_diag = self._decomp_diag(w, Q)\n # handle case where y is 2-d\n if len(y.shape) != 1:\n G_inverse_diag = G_inverse_diag[:, np.newaxis]\n return G_inverse_diag, c" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_svd_design_matrix", + "name": "_solve_svd_design_matrix", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_svd_design_matrix", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_svd_design_matrix/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_svd_design_matrix.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_svd_design_matrix/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_svd_design_matrix.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_svd_design_matrix/y", + "name": "y", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_svd_design_matrix.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_svd_design_matrix/sqrt_sw", + "name": "sqrt_sw", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_svd_design_matrix.sqrt_sw", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_svd_design_matrix/X_mean", + "name": "X_mean", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_svd_design_matrix.X_mean", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_svd_design_matrix/singvals_sq", + "name": "singvals_sq", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_svd_design_matrix.singvals_sq", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_svd_design_matrix/U", + "name": "U", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_svd_design_matrix.U", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_solve_svd_design_matrix/UT_y", + "name": "UT_y", + "qname": "sklearn.linear_model._ridge._RidgeGCV._solve_svd_design_matrix.UT_y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute dual coefficients and diagonal of G^-1.\n\nUsed when we have an SVD decomposition of X\n(n_samples > n_features and X is dense).", + "docstring": "Compute dual coefficients and diagonal of G^-1.\n\nUsed when we have an SVD decomposition of X\n(n_samples > n_features and X is dense).", + "code": " def _solve_svd_design_matrix(\n self, alpha, y, sqrt_sw, X_mean, singvals_sq, U, UT_y):\n \"\"\"Compute dual coefficients and diagonal of G^-1.\n\n Used when we have an SVD decomposition of X\n (n_samples > n_features and X is dense).\n \"\"\"\n w = ((singvals_sq + alpha) ** -1) - (alpha ** -1)\n if self.fit_intercept:\n # detect intercept column\n normalized_sw = sqrt_sw / np.linalg.norm(sqrt_sw)\n intercept_dim = _find_smallest_angle(normalized_sw, U)\n # cancel the regularization for the intercept\n w[intercept_dim] = - (alpha ** -1)\n c = np.dot(U, self._diag_dot(w, UT_y)) + (alpha ** -1) * y\n G_inverse_diag = self._decomp_diag(w, U) + (alpha ** -1)\n if len(y.shape) != 1:\n # handle case where y is 2-d\n G_inverse_diag = G_inverse_diag[:, np.newaxis]\n return G_inverse_diag, c" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_sparse_multidot_diag", + "name": "_sparse_multidot_diag", + "qname": "sklearn.linear_model._ridge._RidgeGCV._sparse_multidot_diag", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_sparse_multidot_diag/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._RidgeGCV._sparse_multidot_diag.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_sparse_multidot_diag/X", + "name": "X", + "qname": "sklearn.linear_model._ridge._RidgeGCV._sparse_multidot_diag.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "sparse matrix of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "sparse matrix of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_sparse_multidot_diag/A", + "name": "A", + "qname": "sklearn.linear_model._ridge._RidgeGCV._sparse_multidot_diag.A", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_sparse_multidot_diag/X_mean", + "name": "X_mean", + "qname": "sklearn.linear_model._ridge._RidgeGCV._sparse_multidot_diag.X_mean", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features,)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_sparse_multidot_diag/sqrt_sw", + "name": "sqrt_sw", + "qname": "sklearn.linear_model._ridge._RidgeGCV._sparse_multidot_diag.sqrt_sw", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features,)", + "default_value": "", + "description": "square roots of sample weights" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the diagonal of (X - X_mean).dot(A).dot((X - X_mean).T)\nwithout explicitely centering X nor computing X.dot(A)\nwhen X is sparse.", + "docstring": "Compute the diagonal of (X - X_mean).dot(A).dot((X - X_mean).T)\nwithout explicitely centering X nor computing X.dot(A)\nwhen X is sparse.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n\nA : ndarray of shape (n_features, n_features)\n\nX_mean : ndarray of shape (n_features,)\n\nsqrt_sw : ndarray of shape (n_features,)\n square roots of sample weights\n\nReturns\n-------\ndiag : np.ndarray, shape (n_samples,)\n The computed diagonal.", + "code": " def _sparse_multidot_diag(self, X, A, X_mean, sqrt_sw):\n \"\"\"Compute the diagonal of (X - X_mean).dot(A).dot((X - X_mean).T)\n without explicitely centering X nor computing X.dot(A)\n when X is sparse.\n\n Parameters\n ----------\n X : sparse matrix of shape (n_samples, n_features)\n\n A : ndarray of shape (n_features, n_features)\n\n X_mean : ndarray of shape (n_features,)\n\n sqrt_sw : ndarray of shape (n_features,)\n square roots of sample weights\n\n Returns\n -------\n diag : np.ndarray, shape (n_samples,)\n The computed diagonal.\n \"\"\"\n intercept_col = scale = sqrt_sw\n batch_size = X.shape[1]\n diag = np.empty(X.shape[0], dtype=X.dtype)\n for start in range(0, X.shape[0], batch_size):\n batch = slice(start, min(X.shape[0], start + batch_size), 1)\n X_batch = np.empty(\n (X[batch].shape[0], X.shape[1] + self.fit_intercept),\n dtype=X.dtype\n )\n if self.fit_intercept:\n X_batch[:, :-1] = X[batch].A - X_mean * scale[batch][:, None]\n X_batch[:, -1] = intercept_col[batch]\n else:\n X_batch = X[batch].A\n diag[batch] = (X_batch.dot(A) * X_batch).sum(axis=1)\n return diag" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_svd_decompose_design_matrix", + "name": "_svd_decompose_design_matrix", + "qname": "sklearn.linear_model._ridge._RidgeGCV._svd_decompose_design_matrix", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_svd_decompose_design_matrix/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._RidgeGCV._svd_decompose_design_matrix.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_svd_decompose_design_matrix/X", + "name": "X", + "qname": "sklearn.linear_model._ridge._RidgeGCV._svd_decompose_design_matrix.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_svd_decompose_design_matrix/y", + "name": "y", + "qname": "sklearn.linear_model._ridge._RidgeGCV._svd_decompose_design_matrix.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/_svd_decompose_design_matrix/sqrt_sw", + "name": "sqrt_sw", + "qname": "sklearn.linear_model._ridge._RidgeGCV._svd_decompose_design_matrix.sqrt_sw", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _svd_decompose_design_matrix(self, X, y, sqrt_sw):\n # X already centered\n X_mean = np.zeros(X.shape[1], dtype=X.dtype)\n if self.fit_intercept:\n # to emulate fit_intercept=True situation, add a column\n # containing the square roots of the sample weights\n # by centering, the other columns are orthogonal to that one\n intercept_column = sqrt_sw[:, None]\n X = np.hstack((X, intercept_column))\n U, singvals, _ = linalg.svd(X, full_matrices=0)\n singvals_sq = singvals ** 2\n UT_y = np.dot(U.T, y)\n return X_mean, singvals_sq, U, UT_y" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/fit", + "name": "fit", + "qname": "sklearn.linear_model._ridge._RidgeGCV.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/fit/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._RidgeGCV.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/fit/X", + "name": "X", + "qname": "sklearn.linear_model._ridge._RidgeGCV.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{ndarray, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data. Will be cast to float64 if necessary." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/fit/y", + "name": "y", + "qname": "sklearn.linear_model._ridge._RidgeGCV.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "", + "description": "Target values. Will be cast to float64 if necessary." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,) or (n_samples, n_targets)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_RidgeGCV/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._ridge._RidgeGCV.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float or ndarray of shape (n_samples,)", + "default_value": "None", + "description": "Individual weights for each sample. If given a float, every sample\nwill have the same weight." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit Ridge regression model with gcv.", + "docstring": "Fit Ridge regression model with gcv.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training data. Will be cast to float64 if necessary.\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Target values. Will be cast to float64 if necessary.\n\nsample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit Ridge regression model with gcv.\n\n Parameters\n ----------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training data. Will be cast to float64 if necessary.\n\n y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Target values. Will be cast to float64 if necessary.\n\n sample_weight : float or ndarray of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight.\n\n Returns\n -------\n self : object\n \"\"\"\n X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc', 'coo'],\n dtype=[np.float64],\n multi_output=True, y_numeric=True)\n\n # alpha_per_target cannot be used in classifier mode. All subclasses\n # of _RidgeGCV that are classifiers keep alpha_per_target at its\n # default value: False, so the condition below should never happen.\n assert not (self.is_clf and self.alpha_per_target)\n\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X,\n dtype=X.dtype)\n\n if np.any(self.alphas <= 0):\n raise ValueError(\n \"alphas must be positive. Got {} containing some \"\n \"negative or null value instead.\".format(self.alphas))\n\n X, y, X_offset, y_offset, X_scale = LinearModel._preprocess_data(\n X, y, self.fit_intercept, self.normalize, self.copy_X,\n sample_weight=sample_weight)\n\n gcv_mode = _check_gcv_mode(X, self.gcv_mode)\n\n if gcv_mode == 'eigen':\n decompose = self._eigen_decompose_gram\n solve = self._solve_eigen_gram\n elif gcv_mode == 'svd':\n if sparse.issparse(X):\n decompose = self._eigen_decompose_covariance\n solve = self._solve_eigen_covariance\n else:\n decompose = self._svd_decompose_design_matrix\n solve = self._solve_svd_design_matrix\n\n n_samples = X.shape[0]\n\n if sample_weight is not None:\n X, y = _rescale_data(X, y, sample_weight)\n sqrt_sw = np.sqrt(sample_weight)\n else:\n sqrt_sw = np.ones(n_samples, dtype=X.dtype)\n\n X_mean, *decomposition = decompose(X, y, sqrt_sw)\n\n scorer = check_scoring(self, scoring=self.scoring, allow_none=True)\n error = scorer is None\n\n n_y = 1 if len(y.shape) == 1 else y.shape[1]\n n_alphas = 1 if np.ndim(self.alphas) == 0 else len(self.alphas)\n\n if self.store_cv_values:\n self.cv_values_ = np.empty(\n (n_samples * n_y, n_alphas), dtype=X.dtype)\n\n best_coef, best_score, best_alpha = None, None, None\n\n for i, alpha in enumerate(np.atleast_1d(self.alphas)):\n G_inverse_diag, c = solve(\n float(alpha), y, sqrt_sw, X_mean, *decomposition)\n if error:\n squared_errors = (c / G_inverse_diag) ** 2\n if self.alpha_per_target:\n alpha_score = -squared_errors.mean(axis=0)\n else:\n alpha_score = -squared_errors.mean()\n if self.store_cv_values:\n self.cv_values_[:, i] = squared_errors.ravel()\n else:\n predictions = y - (c / G_inverse_diag)\n if self.store_cv_values:\n self.cv_values_[:, i] = predictions.ravel()\n\n if self.is_clf:\n identity_estimator = _IdentityClassifier(\n classes=np.arange(n_y)\n )\n alpha_score = scorer(identity_estimator,\n predictions, y.argmax(axis=1))\n else:\n identity_estimator = _IdentityRegressor()\n if self.alpha_per_target:\n alpha_score = np.array([\n scorer(identity_estimator,\n predictions[:, j], y[:, j])\n for j in range(n_y)\n ])\n else:\n alpha_score = scorer(identity_estimator,\n predictions.ravel(), y.ravel())\n\n # Keep track of the best model\n if best_score is None:\n # initialize\n if self.alpha_per_target and n_y > 1:\n best_coef = c\n best_score = np.atleast_1d(alpha_score)\n best_alpha = np.full(n_y, alpha)\n else:\n best_coef = c\n best_score = alpha_score\n best_alpha = alpha\n else:\n # update\n if self.alpha_per_target and n_y > 1:\n to_update = alpha_score > best_score\n best_coef[:, to_update] = c[:, to_update]\n best_score[to_update] = alpha_score[to_update]\n best_alpha[to_update] = alpha\n elif alpha_score > best_score:\n best_coef, best_score, best_alpha = c, alpha_score, alpha\n\n self.alpha_ = best_alpha\n self.best_score_ = best_score\n self.dual_coef_ = best_coef\n self.coef_ = safe_sparse_dot(self.dual_coef_.T, X)\n\n X_offset += X_mean * X_scale\n self._set_intercept(X_offset, y_offset, X_scale)\n\n if self.store_cv_values:\n if len(y.shape) == 1:\n cv_values_shape = n_samples, n_alphas\n else:\n cv_values_shape = n_samples, n_y, n_alphas\n self.cv_values_ = self.cv_values_.reshape(cv_values_shape)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_XT_CenterStackOp/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._ridge._XT_CenterStackOp.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_XT_CenterStackOp/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._XT_CenterStackOp.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_XT_CenterStackOp/__init__/X", + "name": "X", + "qname": "sklearn.linear_model._ridge._XT_CenterStackOp.__init__.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_XT_CenterStackOp/__init__/X_mean", + "name": "X_mean", + "qname": "sklearn.linear_model._ridge._XT_CenterStackOp.__init__.X_mean", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_XT_CenterStackOp/__init__/sqrt_sw", + "name": "sqrt_sw", + "qname": "sklearn.linear_model._ridge._XT_CenterStackOp.__init__.sqrt_sw", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Behaves as transposed centered and scaled X with an intercept column.\n\nThis operator behaves as\nnp.hstack([X - sqrt_sw[:, None] * X_mean, sqrt_sw[:, None]]).T", + "docstring": "", + "code": " def __init__(self, X, X_mean, sqrt_sw):\n n_samples, n_features = X.shape\n super().__init__(X.dtype, (n_features + 1, n_samples))\n self.X = X\n self.X_mean = X_mean\n self.sqrt_sw = sqrt_sw" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_XT_CenterStackOp/_matmat", + "name": "_matmat", + "qname": "sklearn.linear_model._ridge._XT_CenterStackOp._matmat", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_XT_CenterStackOp/_matmat/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._XT_CenterStackOp._matmat.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_XT_CenterStackOp/_matmat/v", + "name": "v", + "qname": "sklearn.linear_model._ridge._XT_CenterStackOp._matmat.v", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _matmat(self, v):\n n_features = self.shape[0]\n res = np.empty((n_features, v.shape[1]), dtype=self.X.dtype)\n res[:-1] = (\n safe_sparse_dot(self.X.T, v, dense_output=True) -\n self.X_mean[:, None] * self.sqrt_sw.dot(v)\n )\n res[-1] = np.dot(self.sqrt_sw, v)\n return res" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_XT_CenterStackOp/_matvec", + "name": "_matvec", + "qname": "sklearn.linear_model._ridge._XT_CenterStackOp._matvec", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_XT_CenterStackOp/_matvec/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._XT_CenterStackOp._matvec.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_XT_CenterStackOp/_matvec/v", + "name": "v", + "qname": "sklearn.linear_model._ridge._XT_CenterStackOp._matvec.v", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _matvec(self, v):\n v = v.ravel()\n n_features = self.shape[0]\n res = np.empty(n_features, dtype=self.X.dtype)\n res[:-1] = (\n safe_sparse_dot(self.X.T, v, dense_output=True) -\n (self.X_mean * self.sqrt_sw.dot(v))\n )\n res[-1] = np.dot(v, self.sqrt_sw)\n return res" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_X_CenterStackOp/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._ridge._X_CenterStackOp.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_X_CenterStackOp/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._X_CenterStackOp.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_X_CenterStackOp/__init__/X", + "name": "X", + "qname": "sklearn.linear_model._ridge._X_CenterStackOp.__init__.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_X_CenterStackOp/__init__/X_mean", + "name": "X_mean", + "qname": "sklearn.linear_model._ridge._X_CenterStackOp.__init__.X_mean", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_X_CenterStackOp/__init__/sqrt_sw", + "name": "sqrt_sw", + "qname": "sklearn.linear_model._ridge._X_CenterStackOp.__init__.sqrt_sw", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Behaves as centered and scaled X with an added intercept column.\n\nThis operator behaves as\nnp.hstack([X - sqrt_sw[:, None] * X_mean, sqrt_sw[:, None]])", + "docstring": "", + "code": " def __init__(self, X, X_mean, sqrt_sw):\n n_samples, n_features = X.shape\n super().__init__(X.dtype, (n_samples, n_features + 1))\n self.X = X\n self.X_mean = X_mean\n self.sqrt_sw = sqrt_sw" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_X_CenterStackOp/_matmat", + "name": "_matmat", + "qname": "sklearn.linear_model._ridge._X_CenterStackOp._matmat", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_X_CenterStackOp/_matmat/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._X_CenterStackOp._matmat.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_X_CenterStackOp/_matmat/v", + "name": "v", + "qname": "sklearn.linear_model._ridge._X_CenterStackOp._matmat.v", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _matmat(self, v):\n return (\n safe_sparse_dot(self.X, v[:-1], dense_output=True) -\n self.sqrt_sw[:, None] * self.X_mean.dot(v[:-1]) + v[-1] *\n self.sqrt_sw[:, None])" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_X_CenterStackOp/_matvec", + "name": "_matvec", + "qname": "sklearn.linear_model._ridge._X_CenterStackOp._matvec", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_X_CenterStackOp/_matvec/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._X_CenterStackOp._matvec.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_X_CenterStackOp/_matvec/v", + "name": "v", + "qname": "sklearn.linear_model._ridge._X_CenterStackOp._matvec.v", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _matvec(self, v):\n v = v.ravel()\n return safe_sparse_dot(\n self.X, v[:-1], dense_output=True\n ) - self.sqrt_sw * self.X_mean.dot(v[:-1]) + v[-1] * self.sqrt_sw" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_X_CenterStackOp/_transpose", + "name": "_transpose", + "qname": "sklearn.linear_model._ridge._X_CenterStackOp._transpose", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_X_CenterStackOp/_transpose/self", + "name": "self", + "qname": "sklearn.linear_model._ridge._X_CenterStackOp._transpose.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _transpose(self):\n return _XT_CenterStackOp(self.X, self.X_mean, self.sqrt_sw)" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_check_gcv_mode", + "name": "_check_gcv_mode", + "qname": "sklearn.linear_model._ridge._check_gcv_mode", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_check_gcv_mode/X", + "name": "X", + "qname": "sklearn.linear_model._ridge._check_gcv_mode.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_check_gcv_mode/gcv_mode", + "name": "gcv_mode", + "qname": "sklearn.linear_model._ridge._check_gcv_mode.gcv_mode", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _check_gcv_mode(X, gcv_mode):\n possible_gcv_modes = [None, 'auto', 'svd', 'eigen']\n if gcv_mode not in possible_gcv_modes:\n raise ValueError(\n \"Unknown value for 'gcv_mode'. \"\n \"Got {} instead of one of {}\" .format(\n gcv_mode, possible_gcv_modes))\n if gcv_mode in ['eigen', 'svd']:\n return gcv_mode\n # if X has more rows than columns, use decomposition of X^T.X,\n # otherwise X.X^T\n if X.shape[0] > X.shape[1]:\n return 'svd'\n return 'eigen'" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_find_smallest_angle", + "name": "_find_smallest_angle", + "qname": "sklearn.linear_model._ridge._find_smallest_angle", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_find_smallest_angle/query", + "name": "query", + "qname": "sklearn.linear_model._ridge._find_smallest_angle.query", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Normalized query vector." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_find_smallest_angle/vectors", + "name": "vectors", + "qname": "sklearn.linear_model._ridge._find_smallest_angle.vectors", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Vectors to which we compare query, as columns. Must be normalized." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Find the column of vectors that is most aligned with the query.\n\nBoth query and the columns of vectors must have their l2 norm equal to 1.", + "docstring": "Find the column of vectors that is most aligned with the query.\n\nBoth query and the columns of vectors must have their l2 norm equal to 1.\n\nParameters\n----------\nquery : ndarray of shape (n_samples,)\n Normalized query vector.\n\nvectors : ndarray of shape (n_samples, n_features)\n Vectors to which we compare query, as columns. Must be normalized.", + "code": "def _find_smallest_angle(query, vectors):\n \"\"\"Find the column of vectors that is most aligned with the query.\n\n Both query and the columns of vectors must have their l2 norm equal to 1.\n\n Parameters\n ----------\n query : ndarray of shape (n_samples,)\n Normalized query vector.\n\n vectors : ndarray of shape (n_samples, n_features)\n Vectors to which we compare query, as columns. Must be normalized.\n \"\"\"\n abs_cosine = np.abs(query.dot(vectors))\n index = np.argmax(abs_cosine)\n return index" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_get_valid_accept_sparse", + "name": "_get_valid_accept_sparse", + "qname": "sklearn.linear_model._ridge._get_valid_accept_sparse", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_get_valid_accept_sparse/is_X_sparse", + "name": "is_X_sparse", + "qname": "sklearn.linear_model._ridge._get_valid_accept_sparse.is_X_sparse", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_get_valid_accept_sparse/solver", + "name": "solver", + "qname": "sklearn.linear_model._ridge._get_valid_accept_sparse.solver", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _get_valid_accept_sparse(is_X_sparse, solver):\n if is_X_sparse and solver in ['auto', 'sag', 'saga']:\n return 'csr'\n else:\n return ['csr', 'csc', 'coo']" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_ridge_regression", + "name": "_ridge_regression", + "qname": "sklearn.linear_model._ridge._ridge_regression", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_ridge_regression/X", + "name": "X", + "qname": "sklearn.linear_model._ridge._ridge_regression.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_ridge_regression/y", + "name": "y", + "qname": "sklearn.linear_model._ridge._ridge_regression.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_ridge_regression/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._ridge._ridge_regression.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_ridge_regression/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._ridge._ridge_regression.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_ridge_regression/solver", + "name": "solver", + "qname": "sklearn.linear_model._ridge._ridge_regression.solver", + "default_value": "'auto'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_ridge_regression/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._ridge._ridge_regression.max_iter", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_ridge_regression/tol", + "name": "tol", + "qname": "sklearn.linear_model._ridge._ridge_regression.tol", + "default_value": "0.001", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_ridge_regression/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._ridge._ridge_regression.verbose", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_ridge_regression/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._ridge._ridge_regression.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_ridge_regression/return_n_iter", + "name": "return_n_iter", + "qname": "sklearn.linear_model._ridge._ridge_regression.return_n_iter", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_ridge_regression/return_intercept", + "name": "return_intercept", + "qname": "sklearn.linear_model._ridge._ridge_regression.return_intercept", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_ridge_regression/X_scale", + "name": "X_scale", + "qname": "sklearn.linear_model._ridge._ridge_regression.X_scale", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_ridge_regression/X_offset", + "name": "X_offset", + "qname": "sklearn.linear_model._ridge._ridge_regression.X_offset", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_ridge_regression/check_input", + "name": "check_input", + "qname": "sklearn.linear_model._ridge._ridge_regression.check_input", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _ridge_regression(X, y, alpha, sample_weight=None, solver='auto',\n max_iter=None, tol=1e-3, verbose=0, random_state=None,\n return_n_iter=False, return_intercept=False,\n X_scale=None, X_offset=None, check_input=True):\n\n has_sw = sample_weight is not None\n\n if solver == 'auto':\n if return_intercept:\n # only sag supports fitting intercept directly\n solver = \"sag\"\n elif not sparse.issparse(X):\n solver = \"cholesky\"\n else:\n solver = \"sparse_cg\"\n\n if solver not in ('sparse_cg', 'cholesky', 'svd', 'lsqr', 'sag', 'saga'):\n raise ValueError(\"Known solvers are 'sparse_cg', 'cholesky', 'svd'\"\n \" 'lsqr', 'sag' or 'saga'. Got %s.\" % solver)\n\n if return_intercept and solver != 'sag':\n raise ValueError(\"In Ridge, only 'sag' solver can directly fit the \"\n \"intercept. Please change solver to 'sag' or set \"\n \"return_intercept=False.\")\n\n if check_input:\n _dtype = [np.float64, np.float32]\n _accept_sparse = _get_valid_accept_sparse(sparse.issparse(X), solver)\n X = check_array(X, accept_sparse=_accept_sparse, dtype=_dtype,\n order=\"C\")\n y = check_array(y, dtype=X.dtype, ensure_2d=False, order=None)\n check_consistent_length(X, y)\n\n n_samples, n_features = X.shape\n\n if y.ndim > 2:\n raise ValueError(\"Target y has the wrong shape %s\" % str(y.shape))\n\n ravel = False\n if y.ndim == 1:\n y = y.reshape(-1, 1)\n ravel = True\n\n n_samples_, n_targets = y.shape\n\n if n_samples != n_samples_:\n raise ValueError(\"Number of samples in X and y does not correspond:\"\n \" %d != %d\" % (n_samples, n_samples_))\n\n if has_sw:\n sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n if solver not in ['sag', 'saga']:\n # SAG supports sample_weight directly. For other solvers,\n # we implement sample_weight via a simple rescaling.\n X, y = _rescale_data(X, y, sample_weight)\n\n # There should be either 1 or n_targets penalties\n alpha = np.asarray(alpha, dtype=X.dtype).ravel()\n if alpha.size not in [1, n_targets]:\n raise ValueError(\"Number of targets and number of penalties \"\n \"do not correspond: %d != %d\"\n % (alpha.size, n_targets))\n\n if alpha.size == 1 and n_targets > 1:\n alpha = np.repeat(alpha, n_targets)\n\n n_iter = None\n if solver == 'sparse_cg':\n coef = _solve_sparse_cg(X, y, alpha,\n max_iter=max_iter,\n tol=tol,\n verbose=verbose,\n X_offset=X_offset,\n X_scale=X_scale)\n\n elif solver == 'lsqr':\n coef, n_iter = _solve_lsqr(X, y, alpha, max_iter, tol)\n\n elif solver == 'cholesky':\n if n_features > n_samples:\n K = safe_sparse_dot(X, X.T, dense_output=True)\n try:\n dual_coef = _solve_cholesky_kernel(K, y, alpha)\n\n coef = safe_sparse_dot(X.T, dual_coef, dense_output=True).T\n except linalg.LinAlgError:\n # use SVD solver if matrix is singular\n solver = 'svd'\n else:\n try:\n coef = _solve_cholesky(X, y, alpha)\n except linalg.LinAlgError:\n # use SVD solver if matrix is singular\n solver = 'svd'\n\n elif solver in ['sag', 'saga']:\n # precompute max_squared_sum for all targets\n max_squared_sum = row_norms(X, squared=True).max()\n\n coef = np.empty((y.shape[1], n_features), dtype=X.dtype)\n n_iter = np.empty(y.shape[1], dtype=np.int32)\n intercept = np.zeros((y.shape[1], ), dtype=X.dtype)\n for i, (alpha_i, target) in enumerate(zip(alpha, y.T)):\n init = {'coef': np.zeros((n_features + int(return_intercept), 1),\n dtype=X.dtype)}\n coef_, n_iter_, _ = sag_solver(\n X, target.ravel(), sample_weight, 'squared', alpha_i, 0,\n max_iter, tol, verbose, random_state, False, max_squared_sum,\n init, is_saga=solver == 'saga')\n if return_intercept:\n coef[i] = coef_[:-1]\n intercept[i] = coef_[-1]\n else:\n coef[i] = coef_\n n_iter[i] = n_iter_\n\n if intercept.shape[0] == 1:\n intercept = intercept[0]\n coef = np.asarray(coef)\n\n if solver == 'svd':\n if sparse.issparse(X):\n raise TypeError('SVD solver does not support sparse'\n ' inputs currently')\n coef = _solve_svd(X, y, alpha)\n\n if ravel:\n # When y was passed as a 1d-array, we flatten the coefficients.\n coef = coef.ravel()\n\n if return_n_iter and return_intercept:\n return coef, n_iter, intercept\n elif return_intercept:\n return coef, intercept\n elif return_n_iter:\n return coef, n_iter\n else:\n return coef" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_cholesky", + "name": "_solve_cholesky", + "qname": "sklearn.linear_model._ridge._solve_cholesky", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_cholesky/X", + "name": "X", + "qname": "sklearn.linear_model._ridge._solve_cholesky.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_cholesky/y", + "name": "y", + "qname": "sklearn.linear_model._ridge._solve_cholesky.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_cholesky/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._ridge._solve_cholesky.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _solve_cholesky(X, y, alpha):\n # w = inv(X^t X + alpha*Id) * X.T y\n n_features = X.shape[1]\n n_targets = y.shape[1]\n\n A = safe_sparse_dot(X.T, X, dense_output=True)\n Xy = safe_sparse_dot(X.T, y, dense_output=True)\n\n one_alpha = np.array_equal(alpha, len(alpha) * [alpha[0]])\n\n if one_alpha:\n A.flat[::n_features + 1] += alpha[0]\n return linalg.solve(A, Xy, sym_pos=True,\n overwrite_a=True).T\n else:\n coefs = np.empty([n_targets, n_features], dtype=X.dtype)\n for coef, target, current_alpha in zip(coefs, Xy.T, alpha):\n A.flat[::n_features + 1] += current_alpha\n coef[:] = linalg.solve(A, target, sym_pos=True,\n overwrite_a=False).ravel()\n A.flat[::n_features + 1] -= current_alpha\n return coefs" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_cholesky_kernel", + "name": "_solve_cholesky_kernel", + "qname": "sklearn.linear_model._ridge._solve_cholesky_kernel", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_cholesky_kernel/K", + "name": "K", + "qname": "sklearn.linear_model._ridge._solve_cholesky_kernel.K", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_cholesky_kernel/y", + "name": "y", + "qname": "sklearn.linear_model._ridge._solve_cholesky_kernel.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_cholesky_kernel/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._ridge._solve_cholesky_kernel.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_cholesky_kernel/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._ridge._solve_cholesky_kernel.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_cholesky_kernel/copy", + "name": "copy", + "qname": "sklearn.linear_model._ridge._solve_cholesky_kernel.copy", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _solve_cholesky_kernel(K, y, alpha, sample_weight=None, copy=False):\n # dual_coef = inv(X X^t + alpha*Id) y\n n_samples = K.shape[0]\n n_targets = y.shape[1]\n\n if copy:\n K = K.copy()\n\n alpha = np.atleast_1d(alpha)\n one_alpha = (alpha == alpha[0]).all()\n has_sw = isinstance(sample_weight, np.ndarray) \\\n or sample_weight not in [1.0, None]\n\n if has_sw:\n # Unlike other solvers, we need to support sample_weight directly\n # because K might be a pre-computed kernel.\n sw = np.sqrt(np.atleast_1d(sample_weight))\n y = y * sw[:, np.newaxis]\n K *= np.outer(sw, sw)\n\n if one_alpha:\n # Only one penalty, we can solve multi-target problems in one time.\n K.flat[::n_samples + 1] += alpha[0]\n\n try:\n # Note: we must use overwrite_a=False in order to be able to\n # use the fall-back solution below in case a LinAlgError\n # is raised\n dual_coef = linalg.solve(K, y, sym_pos=True,\n overwrite_a=False)\n except np.linalg.LinAlgError:\n warnings.warn(\"Singular matrix in solving dual problem. Using \"\n \"least-squares solution instead.\")\n dual_coef = linalg.lstsq(K, y)[0]\n\n # K is expensive to compute and store in memory so change it back in\n # case it was user-given.\n K.flat[::n_samples + 1] -= alpha[0]\n\n if has_sw:\n dual_coef *= sw[:, np.newaxis]\n\n return dual_coef\n else:\n # One penalty per target. We need to solve each target separately.\n dual_coefs = np.empty([n_targets, n_samples], K.dtype)\n\n for dual_coef, target, current_alpha in zip(dual_coefs, y.T, alpha):\n K.flat[::n_samples + 1] += current_alpha\n\n dual_coef[:] = linalg.solve(K, target, sym_pos=True,\n overwrite_a=False).ravel()\n\n K.flat[::n_samples + 1] -= current_alpha\n\n if has_sw:\n dual_coefs *= sw[np.newaxis, :]\n\n return dual_coefs.T" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_lsqr", + "name": "_solve_lsqr", + "qname": "sklearn.linear_model._ridge._solve_lsqr", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_lsqr/X", + "name": "X", + "qname": "sklearn.linear_model._ridge._solve_lsqr.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_lsqr/y", + "name": "y", + "qname": "sklearn.linear_model._ridge._solve_lsqr.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_lsqr/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._ridge._solve_lsqr.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_lsqr/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._ridge._solve_lsqr.max_iter", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_lsqr/tol", + "name": "tol", + "qname": "sklearn.linear_model._ridge._solve_lsqr.tol", + "default_value": "0.001", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _solve_lsqr(X, y, alpha, max_iter=None, tol=1e-3):\n n_samples, n_features = X.shape\n coefs = np.empty((y.shape[1], n_features), dtype=X.dtype)\n n_iter = np.empty(y.shape[1], dtype=np.int32)\n\n # According to the lsqr documentation, alpha = damp^2.\n sqrt_alpha = np.sqrt(alpha)\n\n for i in range(y.shape[1]):\n y_column = y[:, i]\n info = sp_linalg.lsqr(X, y_column, damp=sqrt_alpha[i],\n atol=tol, btol=tol, iter_lim=max_iter)\n coefs[i] = info[0]\n n_iter[i] = info[2]\n\n return coefs, n_iter" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_sparse_cg", + "name": "_solve_sparse_cg", + "qname": "sklearn.linear_model._ridge._solve_sparse_cg", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_sparse_cg/X", + "name": "X", + "qname": "sklearn.linear_model._ridge._solve_sparse_cg.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_sparse_cg/y", + "name": "y", + "qname": "sklearn.linear_model._ridge._solve_sparse_cg.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_sparse_cg/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._ridge._solve_sparse_cg.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_sparse_cg/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._ridge._solve_sparse_cg.max_iter", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_sparse_cg/tol", + "name": "tol", + "qname": "sklearn.linear_model._ridge._solve_sparse_cg.tol", + "default_value": "0.001", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_sparse_cg/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._ridge._solve_sparse_cg.verbose", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_sparse_cg/X_offset", + "name": "X_offset", + "qname": "sklearn.linear_model._ridge._solve_sparse_cg.X_offset", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_sparse_cg/X_scale", + "name": "X_scale", + "qname": "sklearn.linear_model._ridge._solve_sparse_cg.X_scale", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _solve_sparse_cg(X, y, alpha, max_iter=None, tol=1e-3, verbose=0,\n X_offset=None, X_scale=None):\n\n def _get_rescaled_operator(X):\n\n X_offset_scale = X_offset / X_scale\n\n def matvec(b):\n return X.dot(b) - b.dot(X_offset_scale)\n\n def rmatvec(b):\n return X.T.dot(b) - X_offset_scale * np.sum(b)\n\n X1 = sparse.linalg.LinearOperator(shape=X.shape,\n matvec=matvec,\n rmatvec=rmatvec)\n return X1\n\n n_samples, n_features = X.shape\n\n if X_offset is None or X_scale is None:\n X1 = sp_linalg.aslinearoperator(X)\n else:\n X1 = _get_rescaled_operator(X)\n\n coefs = np.empty((y.shape[1], n_features), dtype=X.dtype)\n\n if n_features > n_samples:\n def create_mv(curr_alpha):\n def _mv(x):\n return X1.matvec(X1.rmatvec(x)) + curr_alpha * x\n return _mv\n else:\n def create_mv(curr_alpha):\n def _mv(x):\n return X1.rmatvec(X1.matvec(x)) + curr_alpha * x\n return _mv\n\n for i in range(y.shape[1]):\n y_column = y[:, i]\n\n mv = create_mv(alpha[i])\n if n_features > n_samples:\n # kernel ridge\n # w = X.T * inv(X X^t + alpha*Id) y\n C = sp_linalg.LinearOperator(\n (n_samples, n_samples), matvec=mv, dtype=X.dtype)\n # FIXME atol\n try:\n coef, info = sp_linalg.cg(C, y_column, tol=tol, atol='legacy')\n except TypeError:\n # old scipy\n coef, info = sp_linalg.cg(C, y_column, tol=tol)\n coefs[i] = X1.rmatvec(coef)\n else:\n # linear ridge\n # w = inv(X^t X + alpha*Id) * X.T y\n y_column = X1.rmatvec(y_column)\n C = sp_linalg.LinearOperator(\n (n_features, n_features), matvec=mv, dtype=X.dtype)\n # FIXME atol\n try:\n coefs[i], info = sp_linalg.cg(C, y_column, maxiter=max_iter,\n tol=tol, atol='legacy')\n except TypeError:\n # old scipy\n coefs[i], info = sp_linalg.cg(C, y_column, maxiter=max_iter,\n tol=tol)\n\n if info < 0:\n raise ValueError(\"Failed with error code %d\" % info)\n\n if max_iter is None and info > 0 and verbose:\n warnings.warn(\"sparse_cg did not converge after %d iterations.\" %\n info, ConvergenceWarning)\n\n return coefs" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_svd", + "name": "_solve_svd", + "qname": "sklearn.linear_model._ridge._solve_svd", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_svd/X", + "name": "X", + "qname": "sklearn.linear_model._ridge._solve_svd.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_svd/y", + "name": "y", + "qname": "sklearn.linear_model._ridge._solve_svd.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/_solve_svd/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._ridge._solve_svd.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _solve_svd(X, y, alpha):\n U, s, Vt = linalg.svd(X, full_matrices=False)\n idx = s > 1e-15 # same default value as scipy.linalg.pinv\n s_nnz = s[idx][:, np.newaxis]\n UTy = np.dot(U.T, y)\n d = np.zeros((s.size, alpha.size), dtype=X.dtype)\n d[idx] = s_nnz / (s_nnz ** 2 + alpha)\n d_UT_y = d * UTy\n return np.dot(Vt.T, d_UT_y).T" + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/ridge_regression", + "name": "ridge_regression", + "qname": "sklearn.linear_model._ridge.ridge_regression", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._ridge/ridge_regression/X", + "name": "X", + "qname": "sklearn.linear_model._ridge.ridge_regression.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{ndarray, sparse matrix, LinearOperator} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/ridge_regression/y", + "name": "y", + "qname": "sklearn.linear_model._ridge.ridge_regression.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,) or (n_samples, n_targets)", + "default_value": "", + "description": "Target values" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,) or (n_samples, n_targets)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/ridge_regression/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._ridge.ridge_regression.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float or array-like of shape (n_targets,)", + "default_value": "", + "description": "Regularization strength; must be a positive float. Regularization\nimproves the conditioning of the problem and reduces the variance of\nthe estimates. Larger values specify stronger regularization.\nAlpha corresponds to ``1 / (2C)`` in other linear models such as\n:class:`~sklearn.linear_model.LogisticRegression` or\n:class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\nassumed to be specific to the targets. Hence they must correspond in\nnumber." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_targets,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/ridge_regression/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._ridge.ridge_regression.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float or array-like of shape (n_samples,)", + "default_value": "None", + "description": "Individual weights for each sample. If given a float, every sample\nwill have the same weight. If sample_weight is not None and\nsolver='auto', the solver will be set to 'cholesky'.\n\n.. versionadded:: 0.17" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/ridge_regression/solver", + "name": "solver", + "qname": "sklearn.linear_model._ridge.ridge_regression.solver", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}", + "default_value": "'auto'", + "description": "Solver to use in the computational routines:\n\n- 'auto' chooses the solver automatically based on the type of data.\n\n- 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n coefficients. More stable for singular matrices than 'cholesky'.\n\n- 'cholesky' uses the standard scipy.linalg.solve function to\n obtain a closed-form solution via a Cholesky decomposition of\n dot(X.T, X)\n\n- 'sparse_cg' uses the conjugate gradient solver as found in\n scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n more appropriate than 'cholesky' for large-scale data\n (possibility to set `tol` and `max_iter`).\n\n- 'lsqr' uses the dedicated regularized least-squares routine\n scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n procedure.\n\n- 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n its improved, unbiased version named SAGA. Both methods also use an\n iterative procedure, and are often faster than other solvers when\n both n_samples and n_features are large. Note that 'sag' and\n 'saga' fast convergence is only guaranteed on features with\n approximately the same scale. You can preprocess the data with a\n scaler from sklearn.preprocessing.\n\nAll last five solvers support both dense and sparse data. However, only\n'sag' and 'sparse_cg' supports sparse input when `fit_intercept` is\nTrue.\n\n.. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n.. versionadded:: 0.19\n SAGA solver." + }, + "type": { + "kind": "EnumType", + "values": ["svd", "sparse_cg", "cholesky", "saga", "auto", "sag", "lsqr"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/ridge_regression/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._ridge.ridge_regression.max_iter", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Maximum number of iterations for conjugate gradient solver.\nFor the 'sparse_cg' and 'lsqr' solvers, the default value is determined\nby scipy.sparse.linalg. For 'sag' and saga solver, the default value is\n1000." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/ridge_regression/tol", + "name": "tol", + "qname": "sklearn.linear_model._ridge.ridge_regression.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "Precision of the solution." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/ridge_regression/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._ridge.ridge_regression.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Verbosity level. Setting verbose > 0 will display additional\ninformation depending on the solver used." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/ridge_regression/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._ridge.ridge_regression.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\nSee :term:`Glossary ` for details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/ridge_regression/return_n_iter", + "name": "return_n_iter", + "qname": "sklearn.linear_model._ridge.ridge_regression.return_n_iter", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the method also returns `n_iter`, the actual number of\niteration performed by the solver.\n\n.. versionadded:: 0.17" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/ridge_regression/return_intercept", + "name": "return_intercept", + "qname": "sklearn.linear_model._ridge.ridge_regression.return_intercept", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True and if X is sparse, the method also returns the intercept,\nand the solver is automatically changed to 'sag'. This is only a\ntemporary fix for fitting the intercept with sparse data. For dense\ndata, use sklearn.linear_model._preprocess_data before your regression.\n\n.. versionadded:: 0.17" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._ridge/ridge_regression/check_input", + "name": "check_input", + "qname": "sklearn.linear_model._ridge.ridge_regression.check_input", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If False, the input arrays X and y will not be checked.\n\n.. versionadded:: 0.21" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Solve the ridge equation by the method of normal equations.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Solve the ridge equation by the method of normal equations.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {ndarray, sparse matrix, LinearOperator} of shape (n_samples, n_features)\n Training data\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Target values\n\nalpha : float or array-like of shape (n_targets,)\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\n assumed to be specific to the targets. Hence they must correspond in\n number.\n\nsample_weight : float or array-like of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight. If sample_weight is not None and\n solver='auto', the solver will be set to 'cholesky'.\n\n .. versionadded:: 0.17\n\nsolver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}, default='auto'\n Solver to use in the computational routines:\n\n - 'auto' chooses the solver automatically based on the type of data.\n\n - 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n coefficients. More stable for singular matrices than 'cholesky'.\n\n - 'cholesky' uses the standard scipy.linalg.solve function to\n obtain a closed-form solution via a Cholesky decomposition of\n dot(X.T, X)\n\n - 'sparse_cg' uses the conjugate gradient solver as found in\n scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n more appropriate than 'cholesky' for large-scale data\n (possibility to set `tol` and `max_iter`).\n\n - 'lsqr' uses the dedicated regularized least-squares routine\n scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n procedure.\n\n - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n its improved, unbiased version named SAGA. Both methods also use an\n iterative procedure, and are often faster than other solvers when\n both n_samples and n_features are large. Note that 'sag' and\n 'saga' fast convergence is only guaranteed on features with\n approximately the same scale. You can preprocess the data with a\n scaler from sklearn.preprocessing.\n\n\n All last five solvers support both dense and sparse data. However, only\n 'sag' and 'sparse_cg' supports sparse input when `fit_intercept` is\n True.\n\n .. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n .. versionadded:: 0.19\n SAGA solver.\n\nmax_iter : int, default=None\n Maximum number of iterations for conjugate gradient solver.\n For the 'sparse_cg' and 'lsqr' solvers, the default value is determined\n by scipy.sparse.linalg. For 'sag' and saga solver, the default value is\n 1000.\n\ntol : float, default=1e-3\n Precision of the solution.\n\nverbose : int, default=0\n Verbosity level. Setting verbose > 0 will display additional\n information depending on the solver used.\n\nrandom_state : int, RandomState instance, default=None\n Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\n See :term:`Glossary ` for details.\n\nreturn_n_iter : bool, default=False\n If True, the method also returns `n_iter`, the actual number of\n iteration performed by the solver.\n\n .. versionadded:: 0.17\n\nreturn_intercept : bool, default=False\n If True and if X is sparse, the method also returns the intercept,\n and the solver is automatically changed to 'sag'. This is only a\n temporary fix for fitting the intercept with sparse data. For dense\n data, use sklearn.linear_model._preprocess_data before your regression.\n\n .. versionadded:: 0.17\n\ncheck_input : bool, default=True\n If False, the input arrays X and y will not be checked.\n\n .. versionadded:: 0.21\n\nReturns\n-------\ncoef : ndarray of shape (n_features,) or (n_targets, n_features)\n Weight vector(s).\n\nn_iter : int, optional\n The actual number of iteration performed by the solver.\n Only returned if `return_n_iter` is True.\n\nintercept : float or ndarray of shape (n_targets,)\n The intercept of the model. Only returned if `return_intercept`\n is True and if X is a scipy sparse array.\n\nNotes\n-----\nThis function won't compute the intercept.", + "code": "@_deprecate_positional_args\ndef ridge_regression(X, y, alpha, *, sample_weight=None, solver='auto',\n max_iter=None, tol=1e-3, verbose=0, random_state=None,\n return_n_iter=False, return_intercept=False,\n check_input=True):\n \"\"\"Solve the ridge equation by the method of normal equations.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : {ndarray, sparse matrix, LinearOperator} of shape \\\n (n_samples, n_features)\n Training data\n\n y : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Target values\n\n alpha : float or array-like of shape (n_targets,)\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\n assumed to be specific to the targets. Hence they must correspond in\n number.\n\n sample_weight : float or array-like of shape (n_samples,), default=None\n Individual weights for each sample. If given a float, every sample\n will have the same weight. If sample_weight is not None and\n solver='auto', the solver will be set to 'cholesky'.\n\n .. versionadded:: 0.17\n\n solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}, \\\n default='auto'\n Solver to use in the computational routines:\n\n - 'auto' chooses the solver automatically based on the type of data.\n\n - 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n coefficients. More stable for singular matrices than 'cholesky'.\n\n - 'cholesky' uses the standard scipy.linalg.solve function to\n obtain a closed-form solution via a Cholesky decomposition of\n dot(X.T, X)\n\n - 'sparse_cg' uses the conjugate gradient solver as found in\n scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n more appropriate than 'cholesky' for large-scale data\n (possibility to set `tol` and `max_iter`).\n\n - 'lsqr' uses the dedicated regularized least-squares routine\n scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n procedure.\n\n - 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n its improved, unbiased version named SAGA. Both methods also use an\n iterative procedure, and are often faster than other solvers when\n both n_samples and n_features are large. Note that 'sag' and\n 'saga' fast convergence is only guaranteed on features with\n approximately the same scale. You can preprocess the data with a\n scaler from sklearn.preprocessing.\n\n\n All last five solvers support both dense and sparse data. However, only\n 'sag' and 'sparse_cg' supports sparse input when `fit_intercept` is\n True.\n\n .. versionadded:: 0.17\n Stochastic Average Gradient descent solver.\n .. versionadded:: 0.19\n SAGA solver.\n\n max_iter : int, default=None\n Maximum number of iterations for conjugate gradient solver.\n For the 'sparse_cg' and 'lsqr' solvers, the default value is determined\n by scipy.sparse.linalg. For 'sag' and saga solver, the default value is\n 1000.\n\n tol : float, default=1e-3\n Precision of the solution.\n\n verbose : int, default=0\n Verbosity level. Setting verbose > 0 will display additional\n information depending on the solver used.\n\n random_state : int, RandomState instance, default=None\n Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\n See :term:`Glossary ` for details.\n\n return_n_iter : bool, default=False\n If True, the method also returns `n_iter`, the actual number of\n iteration performed by the solver.\n\n .. versionadded:: 0.17\n\n return_intercept : bool, default=False\n If True and if X is sparse, the method also returns the intercept,\n and the solver is automatically changed to 'sag'. This is only a\n temporary fix for fitting the intercept with sparse data. For dense\n data, use sklearn.linear_model._preprocess_data before your regression.\n\n .. versionadded:: 0.17\n\n check_input : bool, default=True\n If False, the input arrays X and y will not be checked.\n\n .. versionadded:: 0.21\n\n Returns\n -------\n coef : ndarray of shape (n_features,) or (n_targets, n_features)\n Weight vector(s).\n\n n_iter : int, optional\n The actual number of iteration performed by the solver.\n Only returned if `return_n_iter` is True.\n\n intercept : float or ndarray of shape (n_targets,)\n The intercept of the model. Only returned if `return_intercept`\n is True and if X is a scipy sparse array.\n\n Notes\n -----\n This function won't compute the intercept.\n \"\"\"\n return _ridge_regression(X, y, alpha,\n sample_weight=sample_weight,\n solver=solver,\n max_iter=max_iter,\n tol=tol,\n verbose=verbose,\n random_state=random_state,\n return_n_iter=return_n_iter,\n return_intercept=return_intercept,\n X_scale=None,\n X_offset=None,\n check_input=check_input)" + }, + { + "id": "scikit-learn/sklearn.linear_model._sag/get_auto_step_size", + "name": "get_auto_step_size", + "qname": "sklearn.linear_model._sag.get_auto_step_size", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._sag/get_auto_step_size/max_squared_sum", + "name": "max_squared_sum", + "qname": "sklearn.linear_model._sag.get_auto_step_size.max_squared_sum", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Maximum squared sum of X over samples." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._sag/get_auto_step_size/alpha_scaled", + "name": "alpha_scaled", + "qname": "sklearn.linear_model._sag.get_auto_step_size.alpha_scaled", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Constant that multiplies the regularization term, scaled by\n1. / n_samples, the number of samples." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._sag/get_auto_step_size/loss", + "name": "loss", + "qname": "sklearn.linear_model._sag.get_auto_step_size.loss", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'log', 'squared', 'multinomial'}", + "default_value": "", + "description": "The loss function used in SAG solver." + }, + "type": { + "kind": "EnumType", + "values": ["multinomial", "log", "squared"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._sag/get_auto_step_size/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._sag.get_auto_step_size.fit_intercept", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "Specifies if a constant (a.k.a. bias or intercept) will be\nadded to the decision function." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._sag/get_auto_step_size/n_samples", + "name": "n_samples", + "qname": "sklearn.linear_model._sag.get_auto_step_size.n_samples", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of rows in X. Useful if is_saga=True." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._sag/get_auto_step_size/is_saga", + "name": "is_saga", + "qname": "sklearn.linear_model._sag.get_auto_step_size.is_saga", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to return step size for the SAGA algorithm or the SAG\nalgorithm." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute automatic step size for SAG solver.\n\nThe step size is set to 1 / (alpha_scaled + L + fit_intercept) where L is\nthe max sum of squares for over all samples.", + "docstring": "Compute automatic step size for SAG solver.\n\nThe step size is set to 1 / (alpha_scaled + L + fit_intercept) where L is\nthe max sum of squares for over all samples.\n\nParameters\n----------\nmax_squared_sum : float\n Maximum squared sum of X over samples.\n\nalpha_scaled : float\n Constant that multiplies the regularization term, scaled by\n 1. / n_samples, the number of samples.\n\nloss : {'log', 'squared', 'multinomial'}\n The loss function used in SAG solver.\n\nfit_intercept : bool\n Specifies if a constant (a.k.a. bias or intercept) will be\n added to the decision function.\n\nn_samples : int, default=None\n Number of rows in X. Useful if is_saga=True.\n\nis_saga : bool, default=False\n Whether to return step size for the SAGA algorithm or the SAG\n algorithm.\n\nReturns\n-------\nstep_size : float\n Step size used in SAG solver.\n\nReferences\n----------\nSchmidt, M., Roux, N. L., & Bach, F. (2013).\nMinimizing finite sums with the stochastic average gradient\nhttps://hal.inria.fr/hal-00860051/document\n\nDefazio, A., Bach F. & Lacoste-Julien S. (2014).\nSAGA: A Fast Incremental Gradient Method With Support\nfor Non-Strongly Convex Composite Objectives\nhttps://arxiv.org/abs/1407.0202", + "code": "def get_auto_step_size(max_squared_sum, alpha_scaled, loss, fit_intercept,\n n_samples=None,\n is_saga=False):\n \"\"\"Compute automatic step size for SAG solver.\n\n The step size is set to 1 / (alpha_scaled + L + fit_intercept) where L is\n the max sum of squares for over all samples.\n\n Parameters\n ----------\n max_squared_sum : float\n Maximum squared sum of X over samples.\n\n alpha_scaled : float\n Constant that multiplies the regularization term, scaled by\n 1. / n_samples, the number of samples.\n\n loss : {'log', 'squared', 'multinomial'}\n The loss function used in SAG solver.\n\n fit_intercept : bool\n Specifies if a constant (a.k.a. bias or intercept) will be\n added to the decision function.\n\n n_samples : int, default=None\n Number of rows in X. Useful if is_saga=True.\n\n is_saga : bool, default=False\n Whether to return step size for the SAGA algorithm or the SAG\n algorithm.\n\n Returns\n -------\n step_size : float\n Step size used in SAG solver.\n\n References\n ----------\n Schmidt, M., Roux, N. L., & Bach, F. (2013).\n Minimizing finite sums with the stochastic average gradient\n https://hal.inria.fr/hal-00860051/document\n\n Defazio, A., Bach F. & Lacoste-Julien S. (2014).\n SAGA: A Fast Incremental Gradient Method With Support\n for Non-Strongly Convex Composite Objectives\n https://arxiv.org/abs/1407.0202\n \"\"\"\n if loss in ('log', 'multinomial'):\n L = (0.25 * (max_squared_sum + int(fit_intercept)) + alpha_scaled)\n elif loss == 'squared':\n # inverse Lipschitz constant for squared loss\n L = max_squared_sum + int(fit_intercept) + alpha_scaled\n else:\n raise ValueError(\"Unknown loss function for SAG solver, got %s \"\n \"instead of 'log' or 'squared'\" % loss)\n if is_saga:\n # SAGA theoretical step size is 1/3L or 1 / (2 * (L + mu n))\n # See Defazio et al. 2014\n mun = min(2 * n_samples * alpha_scaled, L)\n step = 1. / (2 * L + mun)\n else:\n # SAG theoretical step size is 1/16L but it is recommended to use 1 / L\n # see http://www.birs.ca//workshops//2014/14w5003/files/schmidt.pdf,\n # slide 65\n step = 1. / L\n return step" + }, + { + "id": "scikit-learn/sklearn.linear_model._sag/sag_solver", + "name": "sag_solver", + "qname": "sklearn.linear_model._sag.sag_solver", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._sag/sag_solver/X", + "name": "X", + "qname": "sklearn.linear_model._sag.sag_solver.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._sag/sag_solver/y", + "name": "y", + "qname": "sklearn.linear_model._sag.sag_solver.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Target values. With loss='multinomial', y must be label encoded\n(see preprocessing.LabelEncoder)." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._sag/sag_solver/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._sag.sag_solver.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Weights applied to individual samples (1. for unweighted)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._sag/sag_solver/loss", + "name": "loss", + "qname": "sklearn.linear_model._sag.sag_solver.loss", + "default_value": "'log'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'log', 'squared', 'multinomial'}", + "default_value": "'log'", + "description": "Loss function that will be optimized:\n-'log' is the binary logistic loss, as used in LogisticRegression.\n-'squared' is the squared loss, as used in Ridge.\n-'multinomial' is the multinomial logistic loss, as used in\n LogisticRegression.\n\n.. versionadded:: 0.18\n *loss='multinomial'*" + }, + "type": { + "kind": "EnumType", + "values": ["multinomial", "log", "squared"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._sag/sag_solver/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._sag.sag_solver.alpha", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.", + "description": "L2 regularization term in the objective function\n``(0.5 * alpha * || W ||_F^2)``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._sag/sag_solver/beta", + "name": "beta", + "qname": "sklearn.linear_model._sag.sag_solver.beta", + "default_value": "0.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.", + "description": "L1 regularization term in the objective function\n``(beta * || W ||_1)``. Only applied if ``is_saga`` is set to True." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._sag/sag_solver/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._sag.sag_solver.max_iter", + "default_value": "1000", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "The max number of passes over the training data if the stopping\ncriteria is not reached." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._sag/sag_solver/tol", + "name": "tol", + "qname": "sklearn.linear_model._sag.sag_solver.tol", + "default_value": "0.001", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "double", + "default_value": "0.001", + "description": "The stopping criteria for the weights. The iterations will stop when\nmax(change in weights) / max(weights) < tol." + }, + "type": { + "kind": "NamedType", + "name": "double" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._sag/sag_solver/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._sag.sag_solver.verbose", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "The verbosity level." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._sag/sag_solver/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._sag.sag_solver.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Used when shuffling the data. Pass an int for reproducible output\nacross multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._sag/sag_solver/check_input", + "name": "check_input", + "qname": "sklearn.linear_model._sag.sag_solver.check_input", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If False, the input arrays X and y will not be checked." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._sag/sag_solver/max_squared_sum", + "name": "max_squared_sum", + "qname": "sklearn.linear_model._sag.sag_solver.max_squared_sum", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Maximum squared sum of X over samples. If None, it will be computed,\ngoing through all the samples. The value should be precomputed\nto speed up cross validation." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._sag/sag_solver/warm_start_mem", + "name": "warm_start_mem", + "qname": "sklearn.linear_model._sag.sag_solver.warm_start_mem", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "The initialization parameters used for warm starting. Warm starting is\ncurrently used in LogisticRegression but not in Ridge.\nIt contains:\n - 'coef': the weight vector, with the intercept in last line\n if the intercept is fitted.\n - 'gradient_memory': the scalar gradient for all seen samples.\n - 'sum_gradient': the sum of gradient over all seen samples,\n for each feature.\n - 'intercept_sum_gradient': the sum of gradient over all seen\n samples, for the intercept.\n - 'seen': array of boolean describing the seen samples.\n - 'num_seen': the number of seen samples." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._sag/sag_solver/is_saga", + "name": "is_saga", + "qname": "sklearn.linear_model._sag.sag_solver.is_saga", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to use the SAGA algorithm or the SAG algorithm. SAGA behaves\nbetter in the first epochs, and allow for l1 regularisation." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "SAG solver for Ridge and LogisticRegression.\n\nSAG stands for Stochastic Average Gradient: the gradient of the loss is\nestimated each sample at a time and the model is updated along the way with\na constant learning rate.\n\nIMPORTANT NOTE: 'sag' solver converges faster on columns that are on the\nsame scale. You can normalize the data by using\nsklearn.preprocessing.StandardScaler on your data before passing it to the\nfit method.\n\nThis implementation works with data represented as dense numpy arrays or\nsparse scipy arrays of floating point values for the features. It will\nfit the data according to squared loss or log loss.\n\nThe regularizer is a penalty added to the loss function that shrinks model\nparameters towards the zero vector using the squared euclidean norm L2.\n\n.. versionadded:: 0.17", + "docstring": "SAG solver for Ridge and LogisticRegression.\n\nSAG stands for Stochastic Average Gradient: the gradient of the loss is\nestimated each sample at a time and the model is updated along the way with\na constant learning rate.\n\nIMPORTANT NOTE: 'sag' solver converges faster on columns that are on the\nsame scale. You can normalize the data by using\nsklearn.preprocessing.StandardScaler on your data before passing it to the\nfit method.\n\nThis implementation works with data represented as dense numpy arrays or\nsparse scipy arrays of floating point values for the features. It will\nfit the data according to squared loss or log loss.\n\nThe regularizer is a penalty added to the loss function that shrinks model\nparameters towards the zero vector using the squared euclidean norm L2.\n\n.. versionadded:: 0.17\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : ndarray of shape (n_samples,)\n Target values. With loss='multinomial', y must be label encoded\n (see preprocessing.LabelEncoder).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\nloss : {'log', 'squared', 'multinomial'}, default='log'\n Loss function that will be optimized:\n -'log' is the binary logistic loss, as used in LogisticRegression.\n -'squared' is the squared loss, as used in Ridge.\n -'multinomial' is the multinomial logistic loss, as used in\n LogisticRegression.\n\n .. versionadded:: 0.18\n *loss='multinomial'*\n\nalpha : float, default=1.\n L2 regularization term in the objective function\n ``(0.5 * alpha * || W ||_F^2)``.\n\nbeta : float, default=0.\n L1 regularization term in the objective function\n ``(beta * || W ||_1)``. Only applied if ``is_saga`` is set to True.\n\nmax_iter : int, default=1000\n The max number of passes over the training data if the stopping\n criteria is not reached.\n\ntol : double, default=0.001\n The stopping criteria for the weights. The iterations will stop when\n max(change in weights) / max(weights) < tol.\n\nverbose : int, default=0\n The verbosity level.\n\nrandom_state : int, RandomState instance or None, default=None\n Used when shuffling the data. Pass an int for reproducible output\n across multiple function calls.\n See :term:`Glossary `.\n\ncheck_input : bool, default=True\n If False, the input arrays X and y will not be checked.\n\nmax_squared_sum : float, default=None\n Maximum squared sum of X over samples. If None, it will be computed,\n going through all the samples. The value should be precomputed\n to speed up cross validation.\n\nwarm_start_mem : dict, default=None\n The initialization parameters used for warm starting. Warm starting is\n currently used in LogisticRegression but not in Ridge.\n It contains:\n - 'coef': the weight vector, with the intercept in last line\n if the intercept is fitted.\n - 'gradient_memory': the scalar gradient for all seen samples.\n - 'sum_gradient': the sum of gradient over all seen samples,\n for each feature.\n - 'intercept_sum_gradient': the sum of gradient over all seen\n samples, for the intercept.\n - 'seen': array of boolean describing the seen samples.\n - 'num_seen': the number of seen samples.\n\nis_saga : bool, default=False\n Whether to use the SAGA algorithm or the SAG algorithm. SAGA behaves\n better in the first epochs, and allow for l1 regularisation.\n\nReturns\n-------\ncoef_ : ndarray of shape (n_features,)\n Weight vector.\n\nn_iter_ : int\n The number of full pass on all samples.\n\nwarm_start_mem : dict\n Contains a 'coef' key with the fitted result, and possibly the\n fitted intercept at the end of the array. Contains also other keys\n used for warm starting.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import linear_model\n>>> n_samples, n_features = 10, 5\n>>> rng = np.random.RandomState(0)\n>>> X = rng.randn(n_samples, n_features)\n>>> y = rng.randn(n_samples)\n>>> clf = linear_model.Ridge(solver='sag')\n>>> clf.fit(X, y)\nRidge(solver='sag')\n\n>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n>>> y = np.array([1, 1, 2, 2])\n>>> clf = linear_model.LogisticRegression(\n... solver='sag', multi_class='multinomial')\n>>> clf.fit(X, y)\nLogisticRegression(multi_class='multinomial', solver='sag')\n\nReferences\n----------\nSchmidt, M., Roux, N. L., & Bach, F. (2013).\nMinimizing finite sums with the stochastic average gradient\nhttps://hal.inria.fr/hal-00860051/document\n\nDefazio, A., Bach F. & Lacoste-Julien S. (2014).\nSAGA: A Fast Incremental Gradient Method With Support\nfor Non-Strongly Convex Composite Objectives\nhttps://arxiv.org/abs/1407.0202\n\nSee Also\n--------\nRidge, SGDRegressor, ElasticNet, Lasso, SVR,\nLogisticRegression, SGDClassifier, LinearSVC, Perceptron", + "code": "@_deprecate_positional_args\ndef sag_solver(X, y, sample_weight=None, loss='log', alpha=1., beta=0.,\n max_iter=1000, tol=0.001, verbose=0, random_state=None,\n check_input=True, max_squared_sum=None,\n warm_start_mem=None,\n is_saga=False):\n \"\"\"SAG solver for Ridge and LogisticRegression.\n\n SAG stands for Stochastic Average Gradient: the gradient of the loss is\n estimated each sample at a time and the model is updated along the way with\n a constant learning rate.\n\n IMPORTANT NOTE: 'sag' solver converges faster on columns that are on the\n same scale. You can normalize the data by using\n sklearn.preprocessing.StandardScaler on your data before passing it to the\n fit method.\n\n This implementation works with data represented as dense numpy arrays or\n sparse scipy arrays of floating point values for the features. It will\n fit the data according to squared loss or log loss.\n\n The regularizer is a penalty added to the loss function that shrinks model\n parameters towards the zero vector using the squared euclidean norm L2.\n\n .. versionadded:: 0.17\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\n y : ndarray of shape (n_samples,)\n Target values. With loss='multinomial', y must be label encoded\n (see preprocessing.LabelEncoder).\n\n sample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\n loss : {'log', 'squared', 'multinomial'}, default='log'\n Loss function that will be optimized:\n -'log' is the binary logistic loss, as used in LogisticRegression.\n -'squared' is the squared loss, as used in Ridge.\n -'multinomial' is the multinomial logistic loss, as used in\n LogisticRegression.\n\n .. versionadded:: 0.18\n *loss='multinomial'*\n\n alpha : float, default=1.\n L2 regularization term in the objective function\n ``(0.5 * alpha * || W ||_F^2)``.\n\n beta : float, default=0.\n L1 regularization term in the objective function\n ``(beta * || W ||_1)``. Only applied if ``is_saga`` is set to True.\n\n max_iter : int, default=1000\n The max number of passes over the training data if the stopping\n criteria is not reached.\n\n tol : double, default=0.001\n The stopping criteria for the weights. The iterations will stop when\n max(change in weights) / max(weights) < tol.\n\n verbose : int, default=0\n The verbosity level.\n\n random_state : int, RandomState instance or None, default=None\n Used when shuffling the data. Pass an int for reproducible output\n across multiple function calls.\n See :term:`Glossary `.\n\n check_input : bool, default=True\n If False, the input arrays X and y will not be checked.\n\n max_squared_sum : float, default=None\n Maximum squared sum of X over samples. If None, it will be computed,\n going through all the samples. The value should be precomputed\n to speed up cross validation.\n\n warm_start_mem : dict, default=None\n The initialization parameters used for warm starting. Warm starting is\n currently used in LogisticRegression but not in Ridge.\n It contains:\n - 'coef': the weight vector, with the intercept in last line\n if the intercept is fitted.\n - 'gradient_memory': the scalar gradient for all seen samples.\n - 'sum_gradient': the sum of gradient over all seen samples,\n for each feature.\n - 'intercept_sum_gradient': the sum of gradient over all seen\n samples, for the intercept.\n - 'seen': array of boolean describing the seen samples.\n - 'num_seen': the number of seen samples.\n\n is_saga : bool, default=False\n Whether to use the SAGA algorithm or the SAG algorithm. SAGA behaves\n better in the first epochs, and allow for l1 regularisation.\n\n Returns\n -------\n coef_ : ndarray of shape (n_features,)\n Weight vector.\n\n n_iter_ : int\n The number of full pass on all samples.\n\n warm_start_mem : dict\n Contains a 'coef' key with the fitted result, and possibly the\n fitted intercept at the end of the array. Contains also other keys\n used for warm starting.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn import linear_model\n >>> n_samples, n_features = 10, 5\n >>> rng = np.random.RandomState(0)\n >>> X = rng.randn(n_samples, n_features)\n >>> y = rng.randn(n_samples)\n >>> clf = linear_model.Ridge(solver='sag')\n >>> clf.fit(X, y)\n Ridge(solver='sag')\n\n >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])\n >>> y = np.array([1, 1, 2, 2])\n >>> clf = linear_model.LogisticRegression(\n ... solver='sag', multi_class='multinomial')\n >>> clf.fit(X, y)\n LogisticRegression(multi_class='multinomial', solver='sag')\n\n References\n ----------\n Schmidt, M., Roux, N. L., & Bach, F. (2013).\n Minimizing finite sums with the stochastic average gradient\n https://hal.inria.fr/hal-00860051/document\n\n Defazio, A., Bach F. & Lacoste-Julien S. (2014).\n SAGA: A Fast Incremental Gradient Method With Support\n for Non-Strongly Convex Composite Objectives\n https://arxiv.org/abs/1407.0202\n\n See Also\n --------\n Ridge, SGDRegressor, ElasticNet, Lasso, SVR,\n LogisticRegression, SGDClassifier, LinearSVC, Perceptron\n \"\"\"\n if warm_start_mem is None:\n warm_start_mem = {}\n # Ridge default max_iter is None\n if max_iter is None:\n max_iter = 1000\n\n if check_input:\n _dtype = [np.float64, np.float32]\n X = check_array(X, dtype=_dtype, accept_sparse='csr', order='C')\n y = check_array(y, dtype=_dtype, ensure_2d=False, order='C')\n\n n_samples, n_features = X.shape[0], X.shape[1]\n # As in SGD, the alpha is scaled by n_samples.\n alpha_scaled = float(alpha) / n_samples\n beta_scaled = float(beta) / n_samples\n\n # if loss == 'multinomial', y should be label encoded.\n n_classes = int(y.max()) + 1 if loss == 'multinomial' else 1\n\n # initialization\n sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)\n\n if 'coef' in warm_start_mem.keys():\n coef_init = warm_start_mem['coef']\n else:\n # assume fit_intercept is False\n coef_init = np.zeros((n_features, n_classes), dtype=X.dtype,\n order='C')\n\n # coef_init contains possibly the intercept_init at the end.\n # Note that Ridge centers the data before fitting, so fit_intercept=False.\n fit_intercept = coef_init.shape[0] == (n_features + 1)\n if fit_intercept:\n intercept_init = coef_init[-1, :]\n coef_init = coef_init[:-1, :]\n else:\n intercept_init = np.zeros(n_classes, dtype=X.dtype)\n\n if 'intercept_sum_gradient' in warm_start_mem.keys():\n intercept_sum_gradient = warm_start_mem['intercept_sum_gradient']\n else:\n intercept_sum_gradient = np.zeros(n_classes, dtype=X.dtype)\n\n if 'gradient_memory' in warm_start_mem.keys():\n gradient_memory_init = warm_start_mem['gradient_memory']\n else:\n gradient_memory_init = np.zeros((n_samples, n_classes),\n dtype=X.dtype, order='C')\n if 'sum_gradient' in warm_start_mem.keys():\n sum_gradient_init = warm_start_mem['sum_gradient']\n else:\n sum_gradient_init = np.zeros((n_features, n_classes),\n dtype=X.dtype, order='C')\n\n if 'seen' in warm_start_mem.keys():\n seen_init = warm_start_mem['seen']\n else:\n seen_init = np.zeros(n_samples, dtype=np.int32, order='C')\n\n if 'num_seen' in warm_start_mem.keys():\n num_seen_init = warm_start_mem['num_seen']\n else:\n num_seen_init = 0\n\n dataset, intercept_decay = make_dataset(X, y, sample_weight, random_state)\n\n if max_squared_sum is None:\n max_squared_sum = row_norms(X, squared=True).max()\n step_size = get_auto_step_size(max_squared_sum, alpha_scaled, loss,\n fit_intercept, n_samples=n_samples,\n is_saga=is_saga)\n if step_size * alpha_scaled == 1:\n raise ZeroDivisionError(\"Current sag implementation does not handle \"\n \"the case step_size * alpha_scaled == 1\")\n\n sag = sag64 if X.dtype == np.float64 else sag32\n num_seen, n_iter_ = sag(dataset, coef_init,\n intercept_init, n_samples,\n n_features, n_classes, tol,\n max_iter,\n loss,\n step_size, alpha_scaled,\n beta_scaled,\n sum_gradient_init,\n gradient_memory_init,\n seen_init,\n num_seen_init,\n fit_intercept,\n intercept_sum_gradient,\n intercept_decay,\n is_saga,\n verbose)\n\n if n_iter_ == max_iter:\n warnings.warn(\"The max_iter was reached which means \"\n \"the coef_ did not converge\", ConvergenceWarning)\n\n if fit_intercept:\n coef_init = np.vstack((coef_init, intercept_init))\n\n warm_start_mem = {'coef': coef_init, 'sum_gradient': sum_gradient_init,\n 'intercept_sum_gradient': intercept_sum_gradient,\n 'gradient_memory': gradient_memory_init,\n 'seen': seen_init, 'num_seen': num_seen}\n\n if loss == 'multinomial':\n coef_ = coef_init.T\n else:\n coef_ = coef_init[:, 0]\n\n return coef_, n_iter_, warm_start_mem" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/__init__/loss", + "name": "loss", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.__init__.loss", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/__init__/penalty", + "name": "penalty", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.__init__.penalty", + "default_value": "'l2'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/__init__/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.__init__.alpha", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/__init__/C", + "name": "C", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.__init__.C", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/__init__/l1_ratio", + "name": "l1_ratio", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.__init__.l1_ratio", + "default_value": "0.15", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.__init__.max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/__init__/shuffle", + "name": "shuffle", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.__init__.shuffle", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/__init__/epsilon", + "name": "epsilon", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.__init__.epsilon", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/__init__/learning_rate", + "name": "learning_rate", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.__init__.learning_rate", + "default_value": "'optimal'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/__init__/eta0", + "name": "eta0", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.__init__.eta0", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/__init__/power_t", + "name": "power_t", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.__init__.power_t", + "default_value": "0.5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/__init__/early_stopping", + "name": "early_stopping", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.__init__.early_stopping", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/__init__/validation_fraction", + "name": "validation_fraction", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.__init__.validation_fraction", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/__init__/n_iter_no_change", + "name": "n_iter_no_change", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.__init__.n_iter_no_change", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/__init__/average", + "name": "average", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.__init__.average", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base class for SGD classification and regression.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, loss, *, penalty='l2', alpha=0.0001, C=1.0,\n l1_ratio=0.15, fit_intercept=True, max_iter=1000, tol=1e-3,\n shuffle=True, verbose=0, epsilon=0.1, random_state=None,\n learning_rate=\"optimal\", eta0=0.0, power_t=0.5,\n early_stopping=False, validation_fraction=0.1,\n n_iter_no_change=5, warm_start=False, average=False):\n self.loss = loss\n self.penalty = penalty\n self.learning_rate = learning_rate\n self.epsilon = epsilon\n self.alpha = alpha\n self.C = C\n self.l1_ratio = l1_ratio\n self.fit_intercept = fit_intercept\n self.shuffle = shuffle\n self.random_state = random_state\n self.verbose = verbose\n self.eta0 = eta0\n self.power_t = power_t\n self.early_stopping = early_stopping\n self.validation_fraction = validation_fraction\n self.n_iter_no_change = n_iter_no_change\n self.warm_start = warm_start\n self.average = average\n self.max_iter = max_iter\n self.tol = tol\n # current tests expect init to do parameter validation\n # but we are not allowed to set attributes\n self._validate_params()" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_allocate_parameter_mem", + "name": "_allocate_parameter_mem", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._allocate_parameter_mem", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_allocate_parameter_mem/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._allocate_parameter_mem.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_allocate_parameter_mem/n_classes", + "name": "n_classes", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._allocate_parameter_mem.n_classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_allocate_parameter_mem/n_features", + "name": "n_features", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._allocate_parameter_mem.n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_allocate_parameter_mem/coef_init", + "name": "coef_init", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._allocate_parameter_mem.coef_init", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_allocate_parameter_mem/intercept_init", + "name": "intercept_init", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._allocate_parameter_mem.intercept_init", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Allocate mem for parameters; initialize if provided.", + "docstring": "Allocate mem for parameters; initialize if provided.", + "code": " def _allocate_parameter_mem(self, n_classes, n_features, coef_init=None,\n intercept_init=None):\n \"\"\"Allocate mem for parameters; initialize if provided.\"\"\"\n if n_classes > 2:\n # allocate coef_ for multi-class\n if coef_init is not None:\n coef_init = np.asarray(coef_init, order=\"C\")\n if coef_init.shape != (n_classes, n_features):\n raise ValueError(\"Provided ``coef_`` does not match \"\n \"dataset. \")\n self.coef_ = coef_init\n else:\n self.coef_ = np.zeros((n_classes, n_features),\n dtype=np.float64, order=\"C\")\n\n # allocate intercept_ for multi-class\n if intercept_init is not None:\n intercept_init = np.asarray(intercept_init, order=\"C\")\n if intercept_init.shape != (n_classes, ):\n raise ValueError(\"Provided intercept_init \"\n \"does not match dataset.\")\n self.intercept_ = intercept_init\n else:\n self.intercept_ = np.zeros(n_classes, dtype=np.float64,\n order=\"C\")\n else:\n # allocate coef_ for binary problem\n if coef_init is not None:\n coef_init = np.asarray(coef_init, dtype=np.float64,\n order=\"C\")\n coef_init = coef_init.ravel()\n if coef_init.shape != (n_features,):\n raise ValueError(\"Provided coef_init does not \"\n \"match dataset.\")\n self.coef_ = coef_init\n else:\n self.coef_ = np.zeros(n_features,\n dtype=np.float64,\n order=\"C\")\n\n # allocate intercept_ for binary problem\n if intercept_init is not None:\n intercept_init = np.asarray(intercept_init, dtype=np.float64)\n if intercept_init.shape != (1,) and intercept_init.shape != ():\n raise ValueError(\"Provided intercept_init \"\n \"does not match dataset.\")\n self.intercept_ = intercept_init.reshape(1,)\n else:\n self.intercept_ = np.zeros(1, dtype=np.float64, order=\"C\")\n\n # initialize average parameters\n if self.average > 0:\n self._standard_coef = self.coef_\n self._standard_intercept = self.intercept_\n self._average_coef = np.zeros(self.coef_.shape,\n dtype=np.float64,\n order=\"C\")\n self._average_intercept = np.zeros(self._standard_intercept.shape,\n dtype=np.float64,\n order=\"C\")" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_get_learning_rate_type", + "name": "_get_learning_rate_type", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._get_learning_rate_type", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_get_learning_rate_type/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._get_learning_rate_type.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_get_learning_rate_type/learning_rate", + "name": "learning_rate", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._get_learning_rate_type.learning_rate", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_learning_rate_type(self, learning_rate):\n try:\n return LEARNING_RATE_TYPES[learning_rate]\n except KeyError as e:\n raise ValueError(\"learning rate %s \"\n \"is not supported. \" % learning_rate) from e" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_get_loss_function", + "name": "_get_loss_function", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._get_loss_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_get_loss_function/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._get_loss_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_get_loss_function/loss", + "name": "loss", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._get_loss_function.loss", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Get concrete ``LossFunction`` object for str ``loss``.", + "docstring": "Get concrete ``LossFunction`` object for str ``loss``. ", + "code": " def _get_loss_function(self, loss):\n \"\"\"Get concrete ``LossFunction`` object for str ``loss``. \"\"\"\n try:\n loss_ = self.loss_functions[loss]\n loss_class, args = loss_[0], loss_[1:]\n if loss in ('huber', 'epsilon_insensitive',\n 'squared_epsilon_insensitive'):\n args = (self.epsilon, )\n return loss_class(*args)\n except KeyError as e:\n raise ValueError(\"The loss %s is not supported. \" % loss) from e" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_get_penalty_type", + "name": "_get_penalty_type", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._get_penalty_type", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_get_penalty_type/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._get_penalty_type.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_get_penalty_type/penalty", + "name": "penalty", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._get_penalty_type.penalty", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_penalty_type(self, penalty):\n penalty = str(penalty).lower()\n try:\n return PENALTY_TYPES[penalty]\n except KeyError as e:\n raise ValueError(\"Penalty %s is not supported. \" % penalty) from e" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_make_validation_score_cb", + "name": "_make_validation_score_cb", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._make_validation_score_cb", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_make_validation_score_cb/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._make_validation_score_cb.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_make_validation_score_cb/validation_mask", + "name": "validation_mask", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._make_validation_score_cb.validation_mask", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_make_validation_score_cb/X", + "name": "X", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._make_validation_score_cb.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_make_validation_score_cb/y", + "name": "y", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._make_validation_score_cb.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_make_validation_score_cb/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._make_validation_score_cb.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_make_validation_score_cb/classes", + "name": "classes", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._make_validation_score_cb.classes", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _make_validation_score_cb(self, validation_mask, X, y, sample_weight,\n classes=None):\n if not self.early_stopping:\n return None\n\n return _ValidationScoreCallback(\n self, X[validation_mask], y[validation_mask],\n sample_weight[validation_mask], classes=classes)" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_make_validation_split", + "name": "_make_validation_split", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._make_validation_split", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_make_validation_split/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._make_validation_split.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_make_validation_split/y", + "name": "y", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._make_validation_split.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, )", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, )" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Split the dataset between training set and validation set.", + "docstring": "Split the dataset between training set and validation set.\n\nParameters\n----------\ny : ndarray of shape (n_samples, )\n Target values.\n\nReturns\n-------\nvalidation_mask : ndarray of shape (n_samples, )\n Equal to 1 on the validation set, 0 on the training set.", + "code": " def _make_validation_split(self, y):\n \"\"\"Split the dataset between training set and validation set.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples, )\n Target values.\n\n Returns\n -------\n validation_mask : ndarray of shape (n_samples, )\n Equal to 1 on the validation set, 0 on the training set.\n \"\"\"\n n_samples = y.shape[0]\n validation_mask = np.zeros(n_samples, dtype=np.uint8)\n if not self.early_stopping:\n # use the full set for training, with an empty validation set\n return validation_mask\n\n if is_classifier(self):\n splitter_type = StratifiedShuffleSplit\n else:\n splitter_type = ShuffleSplit\n cv = splitter_type(test_size=self.validation_fraction,\n random_state=self.random_state)\n idx_train, idx_val = next(cv.split(np.zeros(shape=(y.shape[0], 1)), y))\n if idx_train.shape[0] == 0 or idx_val.shape[0] == 0:\n raise ValueError(\n \"Splitting %d samples into a train set and a validation set \"\n \"with validation_fraction=%r led to an empty set (%d and %d \"\n \"samples). Please either change validation_fraction, increase \"\n \"number of samples, or disable early_stopping.\"\n % (n_samples, self.validation_fraction, idx_train.shape[0],\n idx_val.shape[0]))\n\n validation_mask[idx_val] = 1\n return validation_mask" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_validate_params", + "name": "_validate_params", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._validate_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_validate_params/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._validate_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/_validate_params/for_partial_fit", + "name": "for_partial_fit", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD._validate_params.for_partial_fit", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Validate input params.", + "docstring": "Validate input params. ", + "code": " def _validate_params(self, for_partial_fit=False):\n \"\"\"Validate input params. \"\"\"\n if not isinstance(self.shuffle, bool):\n raise ValueError(\"shuffle must be either True or False\")\n if not isinstance(self.early_stopping, bool):\n raise ValueError(\"early_stopping must be either True or False\")\n if self.early_stopping and for_partial_fit:\n raise ValueError(\"early_stopping should be False with partial_fit\")\n if self.max_iter is not None and self.max_iter <= 0:\n raise ValueError(\"max_iter must be > zero. Got %f\" % self.max_iter)\n if not (0.0 <= self.l1_ratio <= 1.0):\n raise ValueError(\"l1_ratio must be in [0, 1]\")\n if self.alpha < 0.0:\n raise ValueError(\"alpha must be >= 0\")\n if self.n_iter_no_change < 1:\n raise ValueError(\"n_iter_no_change must be >= 1\")\n if not (0.0 < self.validation_fraction < 1.0):\n raise ValueError(\"validation_fraction must be in range (0, 1)\")\n if self.learning_rate in (\"constant\", \"invscaling\", \"adaptive\"):\n if self.eta0 <= 0.0:\n raise ValueError(\"eta0 must be > 0\")\n if self.learning_rate == \"optimal\" and self.alpha == 0:\n raise ValueError(\"alpha must be > 0 since \"\n \"learning_rate is 'optimal'. alpha is used \"\n \"to compute the optimal learning rate.\")\n\n # raises ValueError if not registered\n self._get_penalty_type(self.penalty)\n self._get_learning_rate_type(self.learning_rate)\n\n if self.loss not in self.loss_functions:\n raise ValueError(\"The loss %s is not supported. \" % self.loss)" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/average_coef_@getter", + "name": "average_coef_", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.average_coef_", + "decorators": [ + "deprecated('Attribute average_coef_ was deprecated in version 0.23 and will be removed in 1.0 (renaming of 0.25).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/average_coef_/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.average_coef_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated(\"Attribute average_coef_ was deprecated \" # type: ignore\n \"in version 0.23 and will be removed in 1.0 \"\n \"(renaming of 0.25).\")\n @property\n def average_coef_(self):\n return self._average_coef" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/average_intercept_@getter", + "name": "average_intercept_", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.average_intercept_", + "decorators": [ + "deprecated('Attribute average_intercept_ was deprecated in version 0.23 and will be removed in 1.0 (renaming of 0.25).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/average_intercept_/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.average_intercept_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated(\"Attribute average_intercept_ was deprecated \" # type: ignore\n \"in version 0.23 and will be removed in 1.0 \"\n \"(renaming of 0.25).\")\n @property\n def average_intercept_(self):\n return self._average_intercept" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/fit", + "name": "fit", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.fit", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/fit/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/fit/X", + "name": "X", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/fit/y", + "name": "y", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit model.", + "docstring": "Fit model.", + "code": " @abstractmethod\n def fit(self, X, y):\n \"\"\"Fit model.\"\"\"" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/set_params", + "name": "set_params", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.set_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/set_params/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.set_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/set_params/kwargs", + "name": "kwargs", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.set_params.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "", + "description": "Estimator parameters." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Set and validate the parameters of estimator.", + "docstring": "Set and validate the parameters of estimator.\n\nParameters\n----------\n**kwargs : dict\n Estimator parameters.\n\nReturns\n-------\nself : object\n Estimator instance.", + "code": " def set_params(self, **kwargs):\n \"\"\"Set and validate the parameters of estimator.\n\n Parameters\n ----------\n **kwargs : dict\n Estimator parameters.\n\n Returns\n -------\n self : object\n Estimator instance.\n \"\"\"\n super().set_params(**kwargs)\n self._validate_params()\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/standard_coef_@getter", + "name": "standard_coef_", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.standard_coef_", + "decorators": [ + "deprecated('Attribute standard_coef_ was deprecated in version 0.23 and will be removed in 1.0 (renaming of 0.25).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/standard_coef_/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.standard_coef_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated(\"Attribute standard_coef_ was deprecated \" # type: ignore\n \"in version 0.23 and will be removed in 1.0 \"\n \"(renaming of 0.25).\")\n @property\n def standard_coef_(self):\n return self._standard_coef" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/standard_intercept_@getter", + "name": "standard_intercept_", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.standard_intercept_", + "decorators": [ + "deprecated('Attribute standard_intercept_ was deprecated in version 0.23 and will be removed in 1.0 (renaming of 0.25).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGD/standard_intercept_/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGD.standard_intercept_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated( # type: ignore\n \"Attribute standard_intercept_ was deprecated \"\n \"in version 0.23 and will be removed in 1.0 (renaming of 0.25).\"\n )\n @property\n def standard_intercept_(self):\n return self._standard_intercept" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.__init__", + "decorators": ["abstractmethod", "_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/__init__/loss", + "name": "loss", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.__init__.loss", + "default_value": "'hinge'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/__init__/penalty", + "name": "penalty", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.__init__.penalty", + "default_value": "'l2'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/__init__/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.__init__.alpha", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/__init__/l1_ratio", + "name": "l1_ratio", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.__init__.l1_ratio", + "default_value": "0.15", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.__init__.max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/__init__/shuffle", + "name": "shuffle", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.__init__.shuffle", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/__init__/epsilon", + "name": "epsilon", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.__init__.epsilon", + "default_value": "DEFAULT_EPSILON", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/__init__/learning_rate", + "name": "learning_rate", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.__init__.learning_rate", + "default_value": "'optimal'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/__init__/eta0", + "name": "eta0", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.__init__.eta0", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/__init__/power_t", + "name": "power_t", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.__init__.power_t", + "default_value": "0.5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/__init__/early_stopping", + "name": "early_stopping", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.__init__.early_stopping", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/__init__/validation_fraction", + "name": "validation_fraction", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.__init__.validation_fraction", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/__init__/n_iter_no_change", + "name": "n_iter_no_change", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.__init__.n_iter_no_change", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/__init__/class_weight", + "name": "class_weight", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.__init__.class_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/__init__/average", + "name": "average", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.__init__.average", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @abstractmethod\n @_deprecate_positional_args\n def __init__(self, loss=\"hinge\", *, penalty='l2', alpha=0.0001,\n l1_ratio=0.15, fit_intercept=True, max_iter=1000, tol=1e-3,\n shuffle=True, verbose=0, epsilon=DEFAULT_EPSILON, n_jobs=None,\n random_state=None, learning_rate=\"optimal\", eta0=0.0,\n power_t=0.5, early_stopping=False,\n validation_fraction=0.1, n_iter_no_change=5,\n class_weight=None, warm_start=False, average=False):\n\n super().__init__(\n loss=loss, penalty=penalty, alpha=alpha, l1_ratio=l1_ratio,\n fit_intercept=fit_intercept, max_iter=max_iter, tol=tol,\n shuffle=shuffle, verbose=verbose, epsilon=epsilon,\n random_state=random_state, learning_rate=learning_rate, eta0=eta0,\n power_t=power_t, early_stopping=early_stopping,\n validation_fraction=validation_fraction,\n n_iter_no_change=n_iter_no_change, warm_start=warm_start,\n average=average)\n self.class_weight = class_weight\n self.n_jobs = n_jobs" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit", + "name": "_fit", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit/X", + "name": "X", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit/y", + "name": "y", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit/C", + "name": "C", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit.C", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit/loss", + "name": "loss", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit.loss", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit/learning_rate", + "name": "learning_rate", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit.learning_rate", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit/coef_init", + "name": "coef_init", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit.coef_init", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit/intercept_init", + "name": "intercept_init", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit.intercept_init", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _fit(self, X, y, alpha, C, loss, learning_rate, coef_init=None,\n intercept_init=None, sample_weight=None):\n self._validate_params()\n if hasattr(self, \"classes_\"):\n self.classes_ = None\n\n X, y = self._validate_data(X, y, accept_sparse='csr',\n dtype=np.float64, order=\"C\",\n accept_large_sparse=False)\n\n # labels can be encoded as float, int, or string literals\n # np.unique sorts in asc order; largest class id is positive class\n classes = np.unique(y)\n\n if self.warm_start and hasattr(self, \"coef_\"):\n if coef_init is None:\n coef_init = self.coef_\n if intercept_init is None:\n intercept_init = self.intercept_\n else:\n self.coef_ = None\n self.intercept_ = None\n\n if self.average > 0:\n self._standard_coef = self.coef_\n self._standard_intercept = self.intercept_\n self._average_coef = None\n self._average_intercept = None\n\n # Clear iteration count for multiple call to fit.\n self.t_ = 1.0\n\n self._partial_fit(X, y, alpha, C, loss, learning_rate, self.max_iter,\n classes, sample_weight, coef_init, intercept_init)\n\n if (self.tol is not None and self.tol > -np.inf\n and self.n_iter_ == self.max_iter):\n warnings.warn(\"Maximum number of iteration reached before \"\n \"convergence. Consider increasing max_iter to \"\n \"improve the fit.\",\n ConvergenceWarning)\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit_binary", + "name": "_fit_binary", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit_binary", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit_binary/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit_binary.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit_binary/X", + "name": "X", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit_binary.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit_binary/y", + "name": "y", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit_binary.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit_binary/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit_binary.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit_binary/C", + "name": "C", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit_binary.C", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit_binary/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit_binary.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit_binary/learning_rate", + "name": "learning_rate", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit_binary.learning_rate", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit_binary/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit_binary.max_iter", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit a binary classifier on X and y.", + "docstring": "Fit a binary classifier on X and y. ", + "code": " def _fit_binary(self, X, y, alpha, C, sample_weight,\n learning_rate, max_iter):\n \"\"\"Fit a binary classifier on X and y. \"\"\"\n coef, intercept, n_iter_ = fit_binary(self, 1, X, y, alpha, C,\n learning_rate, max_iter,\n self._expanded_class_weight[1],\n self._expanded_class_weight[0],\n sample_weight,\n random_state=self.random_state)\n\n self.t_ += n_iter_ * X.shape[0]\n self.n_iter_ = n_iter_\n\n # need to be 2d\n if self.average > 0:\n if self.average <= self.t_ - 1:\n self.coef_ = self._average_coef.reshape(1, -1)\n self.intercept_ = self._average_intercept\n else:\n self.coef_ = self._standard_coef.reshape(1, -1)\n self._standard_intercept = np.atleast_1d(intercept)\n self.intercept_ = self._standard_intercept\n else:\n self.coef_ = coef.reshape(1, -1)\n # intercept is a float, need to convert it to an array of length 1\n self.intercept_ = np.atleast_1d(intercept)" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit_multiclass", + "name": "_fit_multiclass", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit_multiclass", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit_multiclass/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit_multiclass.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit_multiclass/X", + "name": "X", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit_multiclass.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit_multiclass/y", + "name": "y", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit_multiclass.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit_multiclass/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit_multiclass.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit_multiclass/C", + "name": "C", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit_multiclass.C", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit_multiclass/learning_rate", + "name": "learning_rate", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit_multiclass.learning_rate", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit_multiclass/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit_multiclass.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_fit_multiclass/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._fit_multiclass.max_iter", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit a multi-class classifier by combining binary classifiers\n\nEach binary classifier predicts one class versus all others. This\nstrategy is called OvA (One versus All) or OvR (One versus Rest).", + "docstring": "Fit a multi-class classifier by combining binary classifiers\n\nEach binary classifier predicts one class versus all others. This\nstrategy is called OvA (One versus All) or OvR (One versus Rest).", + "code": " def _fit_multiclass(self, X, y, alpha, C, learning_rate,\n sample_weight, max_iter):\n \"\"\"Fit a multi-class classifier by combining binary classifiers\n\n Each binary classifier predicts one class versus all others. This\n strategy is called OvA (One versus All) or OvR (One versus Rest).\n \"\"\"\n # Precompute the validation split using the multiclass labels\n # to ensure proper balancing of the classes.\n validation_mask = self._make_validation_split(y)\n\n # Use joblib to fit OvA in parallel.\n # Pick the random seed for each job outside of fit_binary to avoid\n # sharing the estimator random state between threads which could lead\n # to non-deterministic behavior\n random_state = check_random_state(self.random_state)\n seeds = random_state.randint(MAX_INT, size=len(self.classes_))\n result = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,\n **_joblib_parallel_args(require=\"sharedmem\"))(\n delayed(fit_binary)(self, i, X, y, alpha, C, learning_rate,\n max_iter, self._expanded_class_weight[i],\n 1., sample_weight,\n validation_mask=validation_mask,\n random_state=seed)\n for i, seed in enumerate(seeds))\n\n # take the maximum of n_iter_ over every binary fit\n n_iter_ = 0.\n for i, (_, intercept, n_iter_i) in enumerate(result):\n self.intercept_[i] = intercept\n n_iter_ = max(n_iter_, n_iter_i)\n\n self.t_ += n_iter_ * X.shape[0]\n self.n_iter_ = n_iter_\n\n if self.average > 0:\n if self.average <= self.t_ - 1.0:\n self.coef_ = self._average_coef\n self.intercept_ = self._average_intercept\n else:\n self.coef_ = self._standard_coef\n self._standard_intercept = np.atleast_1d(self.intercept_)\n self.intercept_ = self._standard_intercept" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_partial_fit", + "name": "_partial_fit", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._partial_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_partial_fit/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_partial_fit/X", + "name": "X", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_partial_fit/y", + "name": "y", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._partial_fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_partial_fit/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._partial_fit.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_partial_fit/C", + "name": "C", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._partial_fit.C", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_partial_fit/loss", + "name": "loss", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._partial_fit.loss", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_partial_fit/learning_rate", + "name": "learning_rate", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._partial_fit.learning_rate", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_partial_fit/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._partial_fit.max_iter", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_partial_fit/classes", + "name": "classes", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._partial_fit.classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_partial_fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._partial_fit.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_partial_fit/coef_init", + "name": "coef_init", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._partial_fit.coef_init", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/_partial_fit/intercept_init", + "name": "intercept_init", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier._partial_fit.intercept_init", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _partial_fit(self, X, y, alpha, C,\n loss, learning_rate, max_iter,\n classes, sample_weight,\n coef_init, intercept_init):\n X, y = check_X_y(X, y, accept_sparse='csr', dtype=np.float64,\n order=\"C\", accept_large_sparse=False)\n\n n_samples, n_features = X.shape\n\n _check_partial_fit_first_call(self, classes)\n\n n_classes = self.classes_.shape[0]\n\n # Allocate datastructures from input arguments\n self._expanded_class_weight = compute_class_weight(\n self.class_weight, classes=self.classes_, y=y)\n sample_weight = _check_sample_weight(sample_weight, X)\n\n if getattr(self, \"coef_\", None) is None or coef_init is not None:\n self._allocate_parameter_mem(n_classes, n_features,\n coef_init, intercept_init)\n elif n_features != self.coef_.shape[-1]:\n raise ValueError(\"Number of features %d does not match previous \"\n \"data %d.\" % (n_features, self.coef_.shape[-1]))\n\n self.loss_function_ = self._get_loss_function(loss)\n if not hasattr(self, \"t_\"):\n self.t_ = 1.0\n\n # delegate to concrete training procedure\n if n_classes > 2:\n self._fit_multiclass(X, y, alpha=alpha, C=C,\n learning_rate=learning_rate,\n sample_weight=sample_weight,\n max_iter=max_iter)\n elif n_classes == 2:\n self._fit_binary(X, y, alpha=alpha, C=C,\n learning_rate=learning_rate,\n sample_weight=sample_weight,\n max_iter=max_iter)\n else:\n raise ValueError(\n \"The number of classes has to be greater than one;\"\n \" got %d class\" % n_classes)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/fit", + "name": "fit", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/fit/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/fit/X", + "name": "X", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/fit/y", + "name": "y", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/fit/coef_init", + "name": "coef_init", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.fit.coef_init", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_classes, n_features)", + "default_value": "None", + "description": "The initial coefficients to warm-start the optimization." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_classes, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/fit/intercept_init", + "name": "intercept_init", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.fit.intercept_init", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_classes,)", + "default_value": "None", + "description": "The initial intercept to warm-start the optimization." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_classes,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples,)", + "default_value": "None", + "description": "Weights applied to individual samples.\nIf not provided, uniform weights are assumed. These weights will\nbe multiplied with class_weight (passed through the\nconstructor) if class_weight is specified." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples,)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit linear model with Stochastic Gradient Descent.", + "docstring": "Fit linear model with Stochastic Gradient Descent.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Training data.\n\ny : ndarray of shape (n_samples,)\n Target values.\n\ncoef_init : ndarray of shape (n_classes, n_features), default=None\n The initial coefficients to warm-start the optimization.\n\nintercept_init : ndarray of shape (n_classes,), default=None\n The initial intercept to warm-start the optimization.\n\nsample_weight : array-like, shape (n_samples,), default=None\n Weights applied to individual samples.\n If not provided, uniform weights are assumed. These weights will\n be multiplied with class_weight (passed through the\n constructor) if class_weight is specified.\n\nReturns\n-------\nself :\n Returns an instance of self.", + "code": " def fit(self, X, y, coef_init=None, intercept_init=None,\n sample_weight=None):\n \"\"\"Fit linear model with Stochastic Gradient Descent.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n Training data.\n\n y : ndarray of shape (n_samples,)\n Target values.\n\n coef_init : ndarray of shape (n_classes, n_features), default=None\n The initial coefficients to warm-start the optimization.\n\n intercept_init : ndarray of shape (n_classes,), default=None\n The initial intercept to warm-start the optimization.\n\n sample_weight : array-like, shape (n_samples,), default=None\n Weights applied to individual samples.\n If not provided, uniform weights are assumed. These weights will\n be multiplied with class_weight (passed through the\n constructor) if class_weight is specified.\n\n Returns\n -------\n self :\n Returns an instance of self.\n \"\"\"\n return self._fit(X, y, alpha=self.alpha, C=1.0,\n loss=self.loss, learning_rate=self.learning_rate,\n coef_init=coef_init, intercept_init=intercept_init,\n sample_weight=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/partial_fit", + "name": "partial_fit", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.partial_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/partial_fit/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/partial_fit/X", + "name": "X", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "Subset of the training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/partial_fit/y", + "name": "y", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.partial_fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Subset of the target values." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/partial_fit/classes", + "name": "classes", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.partial_fit.classes", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_classes,)", + "default_value": "None", + "description": "Classes across all calls to partial_fit.\nCan be obtained by via `np.unique(y_all)`, where y_all is the\ntarget vector of the entire dataset.\nThis argument is required for the first call to partial_fit\nand can be omitted in the subsequent calls.\nNote that y doesn't need to contain all labels in `classes`." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_classes,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDClassifier/partial_fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDClassifier.partial_fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples,)", + "default_value": "None", + "description": "Weights applied to individual samples.\nIf not provided, uniform weights are assumed." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples,)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform one epoch of stochastic gradient descent on given samples.\n\nInternally, this method uses ``max_iter = 1``. Therefore, it is not\nguaranteed that a minimum of the cost function is reached after calling\nit once. Matters such as objective convergence and early stopping\nshould be handled by the user.", + "docstring": "Perform one epoch of stochastic gradient descent on given samples.\n\nInternally, this method uses ``max_iter = 1``. Therefore, it is not\nguaranteed that a minimum of the cost function is reached after calling\nit once. Matters such as objective convergence and early stopping\nshould be handled by the user.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Subset of the training data.\n\ny : ndarray of shape (n_samples,)\n Subset of the target values.\n\nclasses : ndarray of shape (n_classes,), default=None\n Classes across all calls to partial_fit.\n Can be obtained by via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is required for the first call to partial_fit\n and can be omitted in the subsequent calls.\n Note that y doesn't need to contain all labels in `classes`.\n\nsample_weight : array-like, shape (n_samples,), default=None\n Weights applied to individual samples.\n If not provided, uniform weights are assumed.\n\nReturns\n-------\nself :\n Returns an instance of self.", + "code": " def partial_fit(self, X, y, classes=None, sample_weight=None):\n \"\"\"Perform one epoch of stochastic gradient descent on given samples.\n\n Internally, this method uses ``max_iter = 1``. Therefore, it is not\n guaranteed that a minimum of the cost function is reached after calling\n it once. Matters such as objective convergence and early stopping\n should be handled by the user.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n Subset of the training data.\n\n y : ndarray of shape (n_samples,)\n Subset of the target values.\n\n classes : ndarray of shape (n_classes,), default=None\n Classes across all calls to partial_fit.\n Can be obtained by via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is required for the first call to partial_fit\n and can be omitted in the subsequent calls.\n Note that y doesn't need to contain all labels in `classes`.\n\n sample_weight : array-like, shape (n_samples,), default=None\n Weights applied to individual samples.\n If not provided, uniform weights are assumed.\n\n Returns\n -------\n self :\n Returns an instance of self.\n \"\"\"\n self._validate_params(for_partial_fit=True)\n if self.class_weight in ['balanced']:\n raise ValueError(\"class_weight '{0}' is not supported for \"\n \"partial_fit. In order to use 'balanced' weights,\"\n \" use compute_class_weight('{0}', \"\n \"classes=classes, y=y). \"\n \"In place of y you can us a large enough sample \"\n \"of the full training set target to properly \"\n \"estimate the class frequency distributions. \"\n \"Pass the resulting weights as the class_weight \"\n \"parameter.\".format(self.class_weight))\n return self._partial_fit(X, y, alpha=self.alpha, C=1.0, loss=self.loss,\n learning_rate=self.learning_rate, max_iter=1,\n classes=classes, sample_weight=sample_weight,\n coef_init=None, intercept_init=None)" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.__init__", + "decorators": ["abstractmethod", "_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/__init__/loss", + "name": "loss", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.__init__.loss", + "default_value": "'squared_loss'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/__init__/penalty", + "name": "penalty", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.__init__.penalty", + "default_value": "'l2'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/__init__/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.__init__.alpha", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/__init__/l1_ratio", + "name": "l1_ratio", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.__init__.l1_ratio", + "default_value": "0.15", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.__init__.max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/__init__/shuffle", + "name": "shuffle", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.__init__.shuffle", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/__init__/epsilon", + "name": "epsilon", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.__init__.epsilon", + "default_value": "DEFAULT_EPSILON", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/__init__/learning_rate", + "name": "learning_rate", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.__init__.learning_rate", + "default_value": "'invscaling'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/__init__/eta0", + "name": "eta0", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.__init__.eta0", + "default_value": "0.01", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/__init__/power_t", + "name": "power_t", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.__init__.power_t", + "default_value": "0.25", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/__init__/early_stopping", + "name": "early_stopping", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.__init__.early_stopping", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/__init__/validation_fraction", + "name": "validation_fraction", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.__init__.validation_fraction", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/__init__/n_iter_no_change", + "name": "n_iter_no_change", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.__init__.n_iter_no_change", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/__init__/average", + "name": "average", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.__init__.average", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @abstractmethod\n @_deprecate_positional_args\n def __init__(self, loss=\"squared_loss\", *, penalty=\"l2\", alpha=0.0001,\n l1_ratio=0.15, fit_intercept=True, max_iter=1000, tol=1e-3,\n shuffle=True, verbose=0, epsilon=DEFAULT_EPSILON,\n random_state=None, learning_rate=\"invscaling\", eta0=0.01,\n power_t=0.25, early_stopping=False, validation_fraction=0.1,\n n_iter_no_change=5, warm_start=False, average=False):\n super().__init__(\n loss=loss, penalty=penalty, alpha=alpha, l1_ratio=l1_ratio,\n fit_intercept=fit_intercept, max_iter=max_iter, tol=tol,\n shuffle=shuffle, verbose=verbose, epsilon=epsilon,\n random_state=random_state, learning_rate=learning_rate, eta0=eta0,\n power_t=power_t, early_stopping=early_stopping,\n validation_fraction=validation_fraction,\n n_iter_no_change=n_iter_no_change, warm_start=warm_start,\n average=average)" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_decision_function", + "name": "_decision_function", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_decision_function/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_decision_function/X", + "name": "X", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict using the linear model", + "docstring": "Predict using the linear model\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n\nReturns\n-------\nndarray of shape (n_samples,)\n Predicted target values per element in X.", + "code": " def _decision_function(self, X):\n \"\"\"Predict using the linear model\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n\n Returns\n -------\n ndarray of shape (n_samples,)\n Predicted target values per element in X.\n \"\"\"\n check_is_fitted(self)\n\n X = check_array(X, accept_sparse='csr')\n\n scores = safe_sparse_dot(X, self.coef_.T,\n dense_output=True) + self.intercept_\n return scores.ravel()" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_fit", + "name": "_fit", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_fit/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_fit/X", + "name": "X", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_fit/y", + "name": "y", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_fit/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._fit.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_fit/C", + "name": "C", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._fit.C", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_fit/loss", + "name": "loss", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._fit.loss", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_fit/learning_rate", + "name": "learning_rate", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._fit.learning_rate", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_fit/coef_init", + "name": "coef_init", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._fit.coef_init", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_fit/intercept_init", + "name": "intercept_init", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._fit.intercept_init", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _fit(self, X, y, alpha, C, loss, learning_rate, coef_init=None,\n intercept_init=None, sample_weight=None):\n self._validate_params()\n if self.warm_start and getattr(self, \"coef_\", None) is not None:\n if coef_init is None:\n coef_init = self.coef_\n if intercept_init is None:\n intercept_init = self.intercept_\n else:\n self.coef_ = None\n self.intercept_ = None\n\n # Clear iteration count for multiple call to fit.\n self.t_ = 1.0\n\n self._partial_fit(X, y, alpha, C, loss, learning_rate,\n self.max_iter, sample_weight, coef_init,\n intercept_init)\n\n if (self.tol is not None and self.tol > -np.inf\n and self.n_iter_ == self.max_iter):\n warnings.warn(\"Maximum number of iteration reached before \"\n \"convergence. Consider increasing max_iter to \"\n \"improve the fit.\",\n ConvergenceWarning)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_fit_regressor", + "name": "_fit_regressor", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._fit_regressor", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_fit_regressor/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._fit_regressor.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_fit_regressor/X", + "name": "X", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._fit_regressor.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_fit_regressor/y", + "name": "y", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._fit_regressor.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_fit_regressor/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._fit_regressor.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_fit_regressor/C", + "name": "C", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._fit_regressor.C", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_fit_regressor/loss", + "name": "loss", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._fit_regressor.loss", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_fit_regressor/learning_rate", + "name": "learning_rate", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._fit_regressor.learning_rate", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_fit_regressor/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._fit_regressor.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_fit_regressor/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._fit_regressor.max_iter", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _fit_regressor(self, X, y, alpha, C, loss, learning_rate,\n sample_weight, max_iter):\n dataset, intercept_decay = make_dataset(X, y, sample_weight)\n\n loss_function = self._get_loss_function(loss)\n penalty_type = self._get_penalty_type(self.penalty)\n learning_rate_type = self._get_learning_rate_type(learning_rate)\n\n if not hasattr(self, \"t_\"):\n self.t_ = 1.0\n\n validation_mask = self._make_validation_split(y)\n validation_score_cb = self._make_validation_score_cb(\n validation_mask, X, y, sample_weight)\n\n random_state = check_random_state(self.random_state)\n # numpy mtrand expects a C long which is a signed 32 bit integer under\n # Windows\n seed = random_state.randint(0, np.iinfo(np.int32).max)\n\n tol = self.tol if self.tol is not None else -np.inf\n\n if self.average:\n coef = self._standard_coef\n intercept = self._standard_intercept\n average_coef = self._average_coef\n average_intercept = self._average_intercept\n else:\n coef = self.coef_\n intercept = self.intercept_\n average_coef = None # Not used\n average_intercept = [0] # Not used\n\n coef, intercept, average_coef, average_intercept, self.n_iter_ = \\\n _plain_sgd(coef,\n intercept[0],\n average_coef,\n average_intercept[0],\n loss_function,\n penalty_type,\n alpha, C,\n self.l1_ratio,\n dataset,\n validation_mask, self.early_stopping,\n validation_score_cb,\n int(self.n_iter_no_change),\n max_iter, tol,\n int(self.fit_intercept),\n int(self.verbose),\n int(self.shuffle),\n seed,\n 1.0, 1.0,\n learning_rate_type,\n self.eta0, self.power_t, self.t_,\n intercept_decay, self.average)\n\n self.t_ += self.n_iter_ * X.shape[0]\n\n if self.average > 0:\n self._average_intercept = np.atleast_1d(average_intercept)\n self._standard_intercept = np.atleast_1d(intercept)\n\n if self.average <= self.t_ - 1.0:\n # made enough updates for averaging to be taken into account\n self.coef_ = average_coef\n self.intercept_ = np.atleast_1d(average_intercept)\n else:\n self.coef_ = coef\n self.intercept_ = np.atleast_1d(intercept)\n\n else:\n self.intercept_ = np.atleast_1d(intercept)" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_partial_fit", + "name": "_partial_fit", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._partial_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_partial_fit/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_partial_fit/X", + "name": "X", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_partial_fit/y", + "name": "y", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._partial_fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_partial_fit/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._partial_fit.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_partial_fit/C", + "name": "C", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._partial_fit.C", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_partial_fit/loss", + "name": "loss", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._partial_fit.loss", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_partial_fit/learning_rate", + "name": "learning_rate", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._partial_fit.learning_rate", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_partial_fit/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._partial_fit.max_iter", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_partial_fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._partial_fit.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_partial_fit/coef_init", + "name": "coef_init", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._partial_fit.coef_init", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/_partial_fit/intercept_init", + "name": "intercept_init", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor._partial_fit.intercept_init", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _partial_fit(self, X, y, alpha, C, loss, learning_rate,\n max_iter, sample_weight, coef_init, intercept_init):\n X, y = self._validate_data(X, y, accept_sparse=\"csr\", copy=False,\n order='C', dtype=np.float64,\n accept_large_sparse=False)\n y = y.astype(np.float64, copy=False)\n\n n_samples, n_features = X.shape\n\n sample_weight = _check_sample_weight(sample_weight, X)\n\n # Allocate datastructures from input arguments\n if getattr(self, \"coef_\", None) is None:\n self._allocate_parameter_mem(1, n_features, coef_init,\n intercept_init)\n elif n_features != self.coef_.shape[-1]:\n raise ValueError(\"Number of features %d does not match previous \"\n \"data %d.\" % (n_features, self.coef_.shape[-1]))\n if self.average > 0 and getattr(self, \"_average_coef\", None) is None:\n self._average_coef = np.zeros(n_features,\n dtype=np.float64,\n order=\"C\")\n self._average_intercept = np.zeros(1, dtype=np.float64, order=\"C\")\n\n self._fit_regressor(X, y, alpha, C, loss, learning_rate,\n sample_weight, max_iter)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/fit", + "name": "fit", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/fit/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/fit/X", + "name": "X", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "Training data" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/fit/y", + "name": "y", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Target values" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/fit/coef_init", + "name": "coef_init", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.fit.coef_init", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features,)", + "default_value": "None", + "description": "The initial coefficients to warm-start the optimization." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/fit/intercept_init", + "name": "intercept_init", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.fit.intercept_init", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (1,)", + "default_value": "None", + "description": "The initial intercept to warm-start the optimization." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (1,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples,)", + "default_value": "None", + "description": "Weights applied to individual samples (1. for unweighted)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples,)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit linear model with Stochastic Gradient Descent.", + "docstring": "Fit linear model with Stochastic Gradient Descent.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Training data\n\ny : ndarray of shape (n_samples,)\n Target values\n\ncoef_init : ndarray of shape (n_features,), default=None\n The initial coefficients to warm-start the optimization.\n\nintercept_init : ndarray of shape (1,), default=None\n The initial intercept to warm-start the optimization.\n\nsample_weight : array-like, shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : returns an instance of self.", + "code": " def fit(self, X, y, coef_init=None, intercept_init=None,\n sample_weight=None):\n \"\"\"Fit linear model with Stochastic Gradient Descent.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n Training data\n\n y : ndarray of shape (n_samples,)\n Target values\n\n coef_init : ndarray of shape (n_features,), default=None\n The initial coefficients to warm-start the optimization.\n\n intercept_init : ndarray of shape (1,), default=None\n The initial intercept to warm-start the optimization.\n\n sample_weight : array-like, shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n return self._fit(X, y, alpha=self.alpha, C=1.0,\n loss=self.loss, learning_rate=self.learning_rate,\n coef_init=coef_init,\n intercept_init=intercept_init,\n sample_weight=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/partial_fit", + "name": "partial_fit", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.partial_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/partial_fit/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/partial_fit/X", + "name": "X", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "Subset of training data" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/partial_fit/y", + "name": "y", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.partial_fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "numpy array of shape (n_samples,)", + "default_value": "", + "description": "Subset of target values" + }, + "type": { + "kind": "NamedType", + "name": "numpy array of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/partial_fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.partial_fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples,)", + "default_value": "None", + "description": "Weights applied to individual samples.\nIf not provided, uniform weights are assumed." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples,)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform one epoch of stochastic gradient descent on given samples.\n\nInternally, this method uses ``max_iter = 1``. Therefore, it is not\nguaranteed that a minimum of the cost function is reached after calling\nit once. Matters such as objective convergence and early stopping\nshould be handled by the user.", + "docstring": "Perform one epoch of stochastic gradient descent on given samples.\n\nInternally, this method uses ``max_iter = 1``. Therefore, it is not\nguaranteed that a minimum of the cost function is reached after calling\nit once. Matters such as objective convergence and early stopping\nshould be handled by the user.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Subset of training data\n\ny : numpy array of shape (n_samples,)\n Subset of target values\n\nsample_weight : array-like, shape (n_samples,), default=None\n Weights applied to individual samples.\n If not provided, uniform weights are assumed.\n\nReturns\n-------\nself : returns an instance of self.", + "code": " def partial_fit(self, X, y, sample_weight=None):\n \"\"\"Perform one epoch of stochastic gradient descent on given samples.\n\n Internally, this method uses ``max_iter = 1``. Therefore, it is not\n guaranteed that a minimum of the cost function is reached after calling\n it once. Matters such as objective convergence and early stopping\n should be handled by the user.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n Subset of training data\n\n y : numpy array of shape (n_samples,)\n Subset of target values\n\n sample_weight : array-like, shape (n_samples,), default=None\n Weights applied to individual samples.\n If not provided, uniform weights are assumed.\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n self._validate_params(for_partial_fit=True)\n return self._partial_fit(X, y, self.alpha, C=1.0,\n loss=self.loss,\n learning_rate=self.learning_rate, max_iter=1,\n sample_weight=sample_weight, coef_init=None,\n intercept_init=None)" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/predict", + "name": "predict", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/predict/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/BaseSGDRegressor/predict/X", + "name": "X", + "qname": "sklearn.linear_model._stochastic_gradient.BaseSGDRegressor.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict using the linear model", + "docstring": "Predict using the linear model\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n\nReturns\n-------\nndarray of shape (n_samples,)\n Predicted target values per element in X.", + "code": " def predict(self, X):\n \"\"\"Predict using the linear model\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n\n Returns\n -------\n ndarray of shape (n_samples,)\n Predicted target values per element in X.\n \"\"\"\n return self._decision_function(X)" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/__init__/loss", + "name": "loss", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.__init__.loss", + "default_value": "'hinge'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "'hinge'", + "description": "The loss function to be used. Defaults to 'hinge', which gives a\nlinear SVM.\n\nThe possible options are 'hinge', 'log', 'modified_huber',\n'squared_hinge', 'perceptron', or a regression loss: 'squared_loss',\n'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'.\n\nThe 'log' loss gives logistic regression, a probabilistic classifier.\n'modified_huber' is another smooth loss that brings tolerance to\noutliers as well as probability estimates.\n'squared_hinge' is like hinge but is quadratically penalized.\n'perceptron' is the linear loss used by the perceptron algorithm.\nThe other losses are designed for regression but can be useful in\nclassification as well; see\n:class:`~sklearn.linear_model.SGDRegressor` for a description.\n\nMore details about the losses formulas can be found in the\n:ref:`User Guide `." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/__init__/penalty", + "name": "penalty", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.__init__.penalty", + "default_value": "'l2'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'l2', 'l1', 'elasticnet'}", + "default_value": "'l2'", + "description": "The penalty (aka regularization term) to be used. Defaults to 'l2'\nwhich is the standard regularizer for linear SVM models. 'l1' and\n'elasticnet' might bring sparsity to the model (feature selection)\nnot achievable with 'l2'." + }, + "type": { + "kind": "EnumType", + "values": ["l2", "l1", "elasticnet"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/__init__/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.__init__.alpha", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0001", + "description": "Constant that multiplies the regularization term. The higher the\nvalue, the stronger the regularization.\nAlso used to compute the learning rate when set to `learning_rate` is\nset to 'optimal'." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/__init__/l1_ratio", + "name": "l1_ratio", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.__init__.l1_ratio", + "default_value": "0.15", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.15", + "description": "The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.\nl1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.\nOnly used if `penalty` is 'elasticnet'." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether the intercept should be estimated or not. If False, the\ndata is assumed to be already centered." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.__init__.max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "The maximum number of passes over the training data (aka epochs).\nIt only impacts the behavior in the ``fit`` method, and not the\n:meth:`partial_fit` method.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "The stopping criterion. If it is not None, training will stop\nwhen (loss > best_loss - tol) for ``n_iter_no_change`` consecutive\nepochs.\nConvergence is checked against the training loss or the\nvalidation loss depending on the `early_stopping` parameter.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/__init__/shuffle", + "name": "shuffle", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.__init__.shuffle", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether or not the training data should be shuffled after each epoch." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "The verbosity level." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/__init__/epsilon", + "name": "epsilon", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.__init__.epsilon", + "default_value": "DEFAULT_EPSILON", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "Epsilon in the epsilon-insensitive loss functions; only if `loss` is\n'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'.\nFor 'huber', determines the threshold at which it becomes less\nimportant to get the prediction exactly right.\nFor epsilon-insensitive, any differences between the current prediction\nand the correct label are ignored if they are less than this threshold." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of CPUs to use to do the OVA (One Versus All, for\nmulti-class problems) computation.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "Used for shuffling the data, when ``shuffle`` is set to ``True``.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/__init__/learning_rate", + "name": "learning_rate", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.__init__.learning_rate", + "default_value": "'optimal'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "'optimal'", + "description": "The learning rate schedule:\n\n- 'constant': `eta = eta0`\n- 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n where t0 is chosen by a heuristic proposed by Leon Bottou.\n- 'invscaling': `eta = eta0 / pow(t, power_t)`\n- 'adaptive': eta = eta0, as long as the training keeps decreasing.\n Each time n_iter_no_change consecutive epochs fail to decrease the\n training loss by tol or fail to increase validation score by tol if\n early_stopping is True, the current learning rate is divided by 5.\n\n .. versionadded:: 0.20\n Added 'adaptive' option" + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/__init__/eta0", + "name": "eta0", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.__init__.eta0", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "double", + "default_value": "0.0", + "description": "The initial learning rate for the 'constant', 'invscaling' or\n'adaptive' schedules. The default value is 0.0 as eta0 is not used by\nthe default schedule 'optimal'." + }, + "type": { + "kind": "NamedType", + "name": "double" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/__init__/power_t", + "name": "power_t", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.__init__.power_t", + "default_value": "0.5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "double", + "default_value": "0.5", + "description": "The exponent for inverse scaling learning rate [default 0.5]." + }, + "type": { + "kind": "NamedType", + "name": "double" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/__init__/early_stopping", + "name": "early_stopping", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.__init__.early_stopping", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to use early stopping to terminate training when validation\nscore is not improving. If set to True, it will automatically set aside\na stratified fraction of training data as validation and terminate\ntraining when validation score returned by the `score` method is not\nimproving by at least tol for n_iter_no_change consecutive epochs.\n\n.. versionadded:: 0.20\n Added 'early_stopping' option" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/__init__/validation_fraction", + "name": "validation_fraction", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.__init__.validation_fraction", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "The proportion of training data to set aside as validation set for\nearly stopping. Must be between 0 and 1.\nOnly used if `early_stopping` is True.\n\n.. versionadded:: 0.20\n Added 'validation_fraction' option" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/__init__/n_iter_no_change", + "name": "n_iter_no_change", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.__init__.n_iter_no_change", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "Number of iterations with no improvement to wait before stopping\nfitting.\nConvergence is checked against the training loss or the\nvalidation loss depending on the `early_stopping` parameter.\n\n.. versionadded:: 0.20\n Added 'n_iter_no_change' option" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/__init__/class_weight", + "name": "class_weight", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.__init__.class_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict, {class_label: weight} or \"balanced\"", + "default_value": "None", + "description": "Preset for the class_weight fit parameter.\n\nWeights associated with classes. If not given, all classes\nare supposed to have weight one.\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "dict" + }, + { + "kind": "NamedType", + "name": "\"balanced\"" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to True, reuse the solution of the previous call to fit as\ninitialization, otherwise, just erase the previous solution.\nSee :term:`the Glossary `.\n\nRepeatedly calling fit or partial_fit when warm_start is True can\nresult in a different solution than when calling fit a single time\nbecause of the way the data is shuffled.\nIf a dynamic learning rate is used, the learning rate is adapted\ndepending on the number of samples already seen. Calling ``fit`` resets\nthis counter, while ``partial_fit`` will result in increasing the\nexisting counter." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/__init__/average", + "name": "average", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.__init__.average", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or int", + "default_value": "False", + "description": "When set to True, computes the averaged SGD weights accross all\nupdates and stores the result in the ``coef_`` attribute. If set to\nan int greater than 1, averaging will begin once the total number of\nsamples seen reaches `average`. So ``average=10`` will begin\naveraging after seeing 10 samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Linear classifiers (SVM, logistic regression, etc.) with SGD training.\n\nThis estimator implements regularized linear models with stochastic\ngradient descent (SGD) learning: the gradient of the loss is estimated\neach sample at a time and the model is updated along the way with a\ndecreasing strength schedule (aka learning rate). SGD allows minibatch\n(online/out-of-core) learning via the `partial_fit` method.\nFor best results using the default learning rate schedule, the data should\nhave zero mean and unit variance.\n\nThis implementation works with data represented as dense or sparse arrays\nof floating point values for the features. The model it fits can be\ncontrolled with the loss parameter; by default, it fits a linear support\nvector machine (SVM).\n\nThe regularizer is a penalty added to the loss function that shrinks model\nparameters towards the zero vector using either the squared euclidean norm\nL2 or the absolute norm L1 or a combination of both (Elastic Net). If the\nparameter update crosses the 0.0 value because of the regularizer, the\nupdate is truncated to 0.0 to allow for learning sparse models and achieve\nonline feature selection.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, loss=\"hinge\", *, penalty='l2', alpha=0.0001,\n l1_ratio=0.15,\n fit_intercept=True, max_iter=1000, tol=1e-3, shuffle=True,\n verbose=0, epsilon=DEFAULT_EPSILON, n_jobs=None,\n random_state=None, learning_rate=\"optimal\", eta0=0.0,\n power_t=0.5, early_stopping=False, validation_fraction=0.1,\n n_iter_no_change=5, class_weight=None, warm_start=False,\n average=False):\n super().__init__(\n loss=loss, penalty=penalty, alpha=alpha, l1_ratio=l1_ratio,\n fit_intercept=fit_intercept, max_iter=max_iter, tol=tol,\n shuffle=shuffle, verbose=verbose, epsilon=epsilon, n_jobs=n_jobs,\n random_state=random_state, learning_rate=learning_rate, eta0=eta0,\n power_t=power_t, early_stopping=early_stopping,\n validation_fraction=validation_fraction,\n n_iter_no_change=n_iter_no_change, class_weight=class_weight,\n warm_start=warm_start, average=average)" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/_check_proba", + "name": "_check_proba", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier._check_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/_check_proba/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier._check_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_proba(self):\n if self.loss not in (\"log\", \"modified_huber\"):\n raise AttributeError(\"probability estimates are not available for\"\n \" loss=%r\" % self.loss)" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/_more_tags", + "name": "_more_tags", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/_more_tags/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/_predict_log_proba", + "name": "_predict_log_proba", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier._predict_log_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/_predict_log_proba/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier._predict_log_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/_predict_log_proba/X", + "name": "X", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier._predict_log_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _predict_log_proba(self, X):\n return np.log(self.predict_proba(X))" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/_predict_proba", + "name": "_predict_proba", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier._predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/_predict_proba/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier._predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/_predict_proba/X", + "name": "X", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier._predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _predict_proba(self, X):\n check_is_fitted(self)\n\n if self.loss == \"log\":\n return self._predict_proba_lr(X)\n\n elif self.loss == \"modified_huber\":\n binary = (len(self.classes_) == 2)\n scores = self.decision_function(X)\n\n if binary:\n prob2 = np.ones((scores.shape[0], 2))\n prob = prob2[:, 1]\n else:\n prob = scores\n\n np.clip(scores, -1, 1, prob)\n prob += 1.\n prob /= 2.\n\n if binary:\n prob2[:, 0] -= prob\n prob = prob2\n else:\n # the above might assign zero to all classes, which doesn't\n # normalize neatly; work around this to produce uniform\n # probabilities\n prob_sum = prob.sum(axis=1)\n all_zero = (prob_sum == 0)\n if np.any(all_zero):\n prob[all_zero, :] = 1\n prob_sum[all_zero] = len(self.classes_)\n\n # normalize\n prob /= prob_sum.reshape((prob.shape[0], -1))\n\n return prob\n\n else:\n raise NotImplementedError(\"predict_(log_)proba only supported when\"\n \" loss='log' or loss='modified_huber' \"\n \"(%r given)\" % self.loss)" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/predict_log_proba@getter", + "name": "predict_log_proba", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.predict_log_proba", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/predict_log_proba/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.predict_log_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Log of probability estimates.\n\nThis method is only available for log loss and modified Huber loss.\n\nWhen loss=\"modified_huber\", probability estimates may be hard zeros\nand ones, so taking the logarithm is not possible.\n\nSee ``predict_proba`` for details.", + "docstring": "Log of probability estimates.\n\nThis method is only available for log loss and modified Huber loss.\n\nWhen loss=\"modified_huber\", probability estimates may be hard zeros\nand ones, so taking the logarithm is not possible.\n\nSee ``predict_proba`` for details.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data for prediction.\n\nReturns\n-------\nT : array-like, shape (n_samples, n_classes)\n Returns the log-probability of the sample for each class in the\n model, where classes are ordered as they are in\n `self.classes_`.", + "code": " @property\n def predict_log_proba(self):\n \"\"\"Log of probability estimates.\n\n This method is only available for log loss and modified Huber loss.\n\n When loss=\"modified_huber\", probability estimates may be hard zeros\n and ones, so taking the logarithm is not possible.\n\n See ``predict_proba`` for details.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data for prediction.\n\n Returns\n -------\n T : array-like, shape (n_samples, n_classes)\n Returns the log-probability of the sample for each class in the\n model, where classes are ordered as they are in\n `self.classes_`.\n \"\"\"\n self._check_proba()\n return self._predict_log_proba" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/predict_proba@getter", + "name": "predict_proba", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.predict_proba", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDClassifier/predict_proba/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.SGDClassifier.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Probability estimates.\n\nThis method is only available for log loss and modified Huber loss.\n\nMulticlass probability estimates are derived from binary (one-vs.-rest)\nestimates by simple normalization, as recommended by Zadrozny and\nElkan.\n\nBinary probability estimates for loss=\"modified_huber\" are given by\n(clip(decision_function(X), -1, 1) + 1) / 2. For other loss functions\nit is necessary to perform proper probability calibration by wrapping\nthe classifier with\n:class:`~sklearn.calibration.CalibratedClassifierCV` instead.", + "docstring": "Probability estimates.\n\nThis method is only available for log loss and modified Huber loss.\n\nMulticlass probability estimates are derived from binary (one-vs.-rest)\nestimates by simple normalization, as recommended by Zadrozny and\nElkan.\n\nBinary probability estimates for loss=\"modified_huber\" are given by\n(clip(decision_function(X), -1, 1) + 1) / 2. For other loss functions\nit is necessary to perform proper probability calibration by wrapping\nthe classifier with\n:class:`~sklearn.calibration.CalibratedClassifierCV` instead.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Input data for prediction.\n\nReturns\n-------\nndarray of shape (n_samples, n_classes)\n Returns the probability of the sample for each class in the model,\n where classes are ordered as they are in `self.classes_`.\n\nReferences\n----------\nZadrozny and Elkan, \"Transforming classifier scores into multiclass\nprobability estimates\", SIGKDD'02,\nhttp://www.research.ibm.com/people/z/zadrozny/kdd2002-Transf.pdf\n\nThe justification for the formula in the loss=\"modified_huber\"\ncase is in the appendix B in:\nhttp://jmlr.csail.mit.edu/papers/volume2/zhang02c/zhang02c.pdf", + "code": " @property\n def predict_proba(self):\n \"\"\"Probability estimates.\n\n This method is only available for log loss and modified Huber loss.\n\n Multiclass probability estimates are derived from binary (one-vs.-rest)\n estimates by simple normalization, as recommended by Zadrozny and\n Elkan.\n\n Binary probability estimates for loss=\"modified_huber\" are given by\n (clip(decision_function(X), -1, 1) + 1) / 2. For other loss functions\n it is necessary to perform proper probability calibration by wrapping\n the classifier with\n :class:`~sklearn.calibration.CalibratedClassifierCV` instead.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n Input data for prediction.\n\n Returns\n -------\n ndarray of shape (n_samples, n_classes)\n Returns the probability of the sample for each class in the model,\n where classes are ordered as they are in `self.classes_`.\n\n References\n ----------\n Zadrozny and Elkan, \"Transforming classifier scores into multiclass\n probability estimates\", SIGKDD'02,\n http://www.research.ibm.com/people/z/zadrozny/kdd2002-Transf.pdf\n\n The justification for the formula in the loss=\"modified_huber\"\n case is in the appendix B in:\n http://jmlr.csail.mit.edu/papers/volume2/zhang02c/zhang02c.pdf\n \"\"\"\n self._check_proba()\n return self._predict_proba" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._stochastic_gradient.SGDRegressor.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.SGDRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/__init__/loss", + "name": "loss", + "qname": "sklearn.linear_model._stochastic_gradient.SGDRegressor.__init__.loss", + "default_value": "'squared_loss'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "'squared_loss'", + "description": "The loss function to be used. The possible values are 'squared_loss',\n'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'\n\nThe 'squared_loss' refers to the ordinary least squares fit.\n'huber' modifies 'squared_loss' to focus less on getting outliers\ncorrect by switching from squared to linear loss past a distance of\nepsilon. 'epsilon_insensitive' ignores errors less than epsilon and is\nlinear past that; this is the loss function used in SVR.\n'squared_epsilon_insensitive' is the same but becomes squared loss past\na tolerance of epsilon.\n\nMore details about the losses formulas can be found in the\n:ref:`User Guide `." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/__init__/penalty", + "name": "penalty", + "qname": "sklearn.linear_model._stochastic_gradient.SGDRegressor.__init__.penalty", + "default_value": "'l2'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'l2', 'l1', 'elasticnet'}", + "default_value": "'l2'", + "description": "The penalty (aka regularization term) to be used. Defaults to 'l2'\nwhich is the standard regularizer for linear SVM models. 'l1' and\n'elasticnet' might bring sparsity to the model (feature selection)\nnot achievable with 'l2'." + }, + "type": { + "kind": "EnumType", + "values": ["l2", "l1", "elasticnet"] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/__init__/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._stochastic_gradient.SGDRegressor.__init__.alpha", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0001", + "description": "Constant that multiplies the regularization term. The higher the\nvalue, the stronger the regularization.\nAlso used to compute the learning rate when set to `learning_rate` is\nset to 'optimal'." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/__init__/l1_ratio", + "name": "l1_ratio", + "qname": "sklearn.linear_model._stochastic_gradient.SGDRegressor.__init__.l1_ratio", + "default_value": "0.15", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.15", + "description": "The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.\nl1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.\nOnly used if `penalty` is 'elasticnet'." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._stochastic_gradient.SGDRegressor.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether the intercept should be estimated or not. If False, the\ndata is assumed to be already centered." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._stochastic_gradient.SGDRegressor.__init__.max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "The maximum number of passes over the training data (aka epochs).\nIt only impacts the behavior in the ``fit`` method, and not the\n:meth:`partial_fit` method.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._stochastic_gradient.SGDRegressor.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "The stopping criterion. If it is not None, training will stop\nwhen (loss > best_loss - tol) for ``n_iter_no_change`` consecutive\nepochs.\nConvergence is checked against the training loss or the\nvalidation loss depending on the `early_stopping` parameter.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/__init__/shuffle", + "name": "shuffle", + "qname": "sklearn.linear_model._stochastic_gradient.SGDRegressor.__init__.shuffle", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether or not the training data should be shuffled after each epoch." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._stochastic_gradient.SGDRegressor.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "The verbosity level." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/__init__/epsilon", + "name": "epsilon", + "qname": "sklearn.linear_model._stochastic_gradient.SGDRegressor.__init__.epsilon", + "default_value": "DEFAULT_EPSILON", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "Epsilon in the epsilon-insensitive loss functions; only if `loss` is\n'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'.\nFor 'huber', determines the threshold at which it becomes less\nimportant to get the prediction exactly right.\nFor epsilon-insensitive, any differences between the current prediction\nand the correct label are ignored if they are less than this threshold." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._stochastic_gradient.SGDRegressor.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "Used for shuffling the data, when ``shuffle`` is set to ``True``.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/__init__/learning_rate", + "name": "learning_rate", + "qname": "sklearn.linear_model._stochastic_gradient.SGDRegressor.__init__.learning_rate", + "default_value": "'invscaling'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "string", + "default_value": "'invscaling'", + "description": "The learning rate schedule:\n\n- 'constant': `eta = eta0`\n- 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n where t0 is chosen by a heuristic proposed by Leon Bottou.\n- 'invscaling': `eta = eta0 / pow(t, power_t)`\n- 'adaptive': eta = eta0, as long as the training keeps decreasing.\n Each time n_iter_no_change consecutive epochs fail to decrease the\n training loss by tol or fail to increase validation score by tol if\n early_stopping is True, the current learning rate is divided by 5.\n\n .. versionadded:: 0.20\n Added 'adaptive' option" + }, + "type": { + "kind": "NamedType", + "name": "string" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/__init__/eta0", + "name": "eta0", + "qname": "sklearn.linear_model._stochastic_gradient.SGDRegressor.__init__.eta0", + "default_value": "0.01", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "double", + "default_value": "0.01", + "description": "The initial learning rate for the 'constant', 'invscaling' or\n'adaptive' schedules. The default value is 0.01." + }, + "type": { + "kind": "NamedType", + "name": "double" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/__init__/power_t", + "name": "power_t", + "qname": "sklearn.linear_model._stochastic_gradient.SGDRegressor.__init__.power_t", + "default_value": "0.25", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "double", + "default_value": "0.25", + "description": "The exponent for inverse scaling learning rate." + }, + "type": { + "kind": "NamedType", + "name": "double" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/__init__/early_stopping", + "name": "early_stopping", + "qname": "sklearn.linear_model._stochastic_gradient.SGDRegressor.__init__.early_stopping", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to use early stopping to terminate training when validation\nscore is not improving. If set to True, it will automatically set aside\na fraction of training data as validation and terminate\ntraining when validation score returned by the `score` method is not\nimproving by at least `tol` for `n_iter_no_change` consecutive\nepochs.\n\n.. versionadded:: 0.20\n Added 'early_stopping' option" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/__init__/validation_fraction", + "name": "validation_fraction", + "qname": "sklearn.linear_model._stochastic_gradient.SGDRegressor.__init__.validation_fraction", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "The proportion of training data to set aside as validation set for\nearly stopping. Must be between 0 and 1.\nOnly used if `early_stopping` is True.\n\n.. versionadded:: 0.20\n Added 'validation_fraction' option" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/__init__/n_iter_no_change", + "name": "n_iter_no_change", + "qname": "sklearn.linear_model._stochastic_gradient.SGDRegressor.__init__.n_iter_no_change", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "Number of iterations with no improvement to wait before stopping\nfitting.\nConvergence is checked against the training loss or the\nvalidation loss depending on the `early_stopping` parameter.\n\n.. versionadded:: 0.20\n Added 'n_iter_no_change' option" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.linear_model._stochastic_gradient.SGDRegressor.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to True, reuse the solution of the previous call to fit as\ninitialization, otherwise, just erase the previous solution.\nSee :term:`the Glossary `.\n\nRepeatedly calling fit or partial_fit when warm_start is True can\nresult in a different solution than when calling fit a single time\nbecause of the way the data is shuffled.\nIf a dynamic learning rate is used, the learning rate is adapted\ndepending on the number of samples already seen. Calling ``fit`` resets\nthis counter, while ``partial_fit`` will result in increasing the\nexisting counter." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/__init__/average", + "name": "average", + "qname": "sklearn.linear_model._stochastic_gradient.SGDRegressor.__init__.average", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or int", + "default_value": "False", + "description": "When set to True, computes the averaged SGD weights accross all\nupdates and stores the result in the ``coef_`` attribute. If set to\nan int greater than 1, averaging will begin once the total number of\nsamples seen reaches `average`. So ``average=10`` will begin\naveraging after seeing 10 samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Linear model fitted by minimizing a regularized empirical loss with SGD\n\nSGD stands for Stochastic Gradient Descent: the gradient of the loss is\nestimated each sample at a time and the model is updated along the way with\na decreasing strength schedule (aka learning rate).\n\nThe regularizer is a penalty added to the loss function that shrinks model\nparameters towards the zero vector using either the squared euclidean norm\nL2 or the absolute norm L1 or a combination of both (Elastic Net). If the\nparameter update crosses the 0.0 value because of the regularizer, the\nupdate is truncated to 0.0 to allow for learning sparse models and achieve\nonline feature selection.\n\nThis implementation works with data represented as dense numpy arrays of\nfloating point values for the features.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, loss=\"squared_loss\", *, penalty=\"l2\", alpha=0.0001,\n l1_ratio=0.15, fit_intercept=True, max_iter=1000, tol=1e-3,\n shuffle=True, verbose=0, epsilon=DEFAULT_EPSILON,\n random_state=None, learning_rate=\"invscaling\", eta0=0.01,\n power_t=0.25, early_stopping=False, validation_fraction=0.1,\n n_iter_no_change=5, warm_start=False, average=False):\n super().__init__(\n loss=loss, penalty=penalty, alpha=alpha, l1_ratio=l1_ratio,\n fit_intercept=fit_intercept, max_iter=max_iter, tol=tol,\n shuffle=shuffle, verbose=verbose, epsilon=epsilon,\n random_state=random_state, learning_rate=learning_rate, eta0=eta0,\n power_t=power_t, early_stopping=early_stopping,\n validation_fraction=validation_fraction,\n n_iter_no_change=n_iter_no_change, warm_start=warm_start,\n average=average)" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/_more_tags", + "name": "_more_tags", + "qname": "sklearn.linear_model._stochastic_gradient.SGDRegressor._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/SGDRegressor/_more_tags/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient.SGDRegressor._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/_ValidationScoreCallback/__call__", + "name": "__call__", + "qname": "sklearn.linear_model._stochastic_gradient._ValidationScoreCallback.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/_ValidationScoreCallback/__call__/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient._ValidationScoreCallback.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/_ValidationScoreCallback/__call__/coef", + "name": "coef", + "qname": "sklearn.linear_model._stochastic_gradient._ValidationScoreCallback.__call__.coef", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/_ValidationScoreCallback/__call__/intercept", + "name": "intercept", + "qname": "sklearn.linear_model._stochastic_gradient._ValidationScoreCallback.__call__.intercept", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __call__(self, coef, intercept):\n est = self.estimator\n est.coef_ = coef.reshape(1, -1)\n est.intercept_ = np.atleast_1d(intercept)\n return est.score(self.X_val, self.y_val, self.sample_weight_val)" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/_ValidationScoreCallback/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._stochastic_gradient._ValidationScoreCallback.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/_ValidationScoreCallback/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._stochastic_gradient._ValidationScoreCallback.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/_ValidationScoreCallback/__init__/estimator", + "name": "estimator", + "qname": "sklearn.linear_model._stochastic_gradient._ValidationScoreCallback.__init__.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/_ValidationScoreCallback/__init__/X_val", + "name": "X_val", + "qname": "sklearn.linear_model._stochastic_gradient._ValidationScoreCallback.__init__.X_val", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/_ValidationScoreCallback/__init__/y_val", + "name": "y_val", + "qname": "sklearn.linear_model._stochastic_gradient._ValidationScoreCallback.__init__.y_val", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/_ValidationScoreCallback/__init__/sample_weight_val", + "name": "sample_weight_val", + "qname": "sklearn.linear_model._stochastic_gradient._ValidationScoreCallback.__init__.sample_weight_val", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/_ValidationScoreCallback/__init__/classes", + "name": "classes", + "qname": "sklearn.linear_model._stochastic_gradient._ValidationScoreCallback.__init__.classes", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Callback for early stopping based on validation score", + "docstring": "", + "code": " def __init__(self, estimator, X_val, y_val, sample_weight_val,\n classes=None):\n self.estimator = clone(estimator)\n self.estimator.t_ = 1 # to pass check_is_fitted\n if classes is not None:\n self.estimator.classes_ = classes\n self.X_val = X_val\n self.y_val = y_val\n self.sample_weight_val = sample_weight_val" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/_prepare_fit_binary", + "name": "_prepare_fit_binary", + "qname": "sklearn.linear_model._stochastic_gradient._prepare_fit_binary", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/_prepare_fit_binary/est", + "name": "est", + "qname": "sklearn.linear_model._stochastic_gradient._prepare_fit_binary.est", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/_prepare_fit_binary/y", + "name": "y", + "qname": "sklearn.linear_model._stochastic_gradient._prepare_fit_binary.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/_prepare_fit_binary/i", + "name": "i", + "qname": "sklearn.linear_model._stochastic_gradient._prepare_fit_binary.i", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Initialization for fit_binary.\n\nReturns y, coef, intercept, average_coef, average_intercept.", + "docstring": "Initialization for fit_binary.\n\nReturns y, coef, intercept, average_coef, average_intercept.", + "code": "def _prepare_fit_binary(est, y, i):\n \"\"\"Initialization for fit_binary.\n\n Returns y, coef, intercept, average_coef, average_intercept.\n \"\"\"\n y_i = np.ones(y.shape, dtype=np.float64, order=\"C\")\n y_i[y != est.classes_[i]] = -1.0\n average_intercept = 0\n average_coef = None\n\n if len(est.classes_) == 2:\n if not est.average:\n coef = est.coef_.ravel()\n intercept = est.intercept_[0]\n else:\n coef = est._standard_coef.ravel()\n intercept = est._standard_intercept[0]\n average_coef = est._average_coef.ravel()\n average_intercept = est._average_intercept[0]\n else:\n if not est.average:\n coef = est.coef_[i]\n intercept = est.intercept_[i]\n else:\n coef = est._standard_coef[i]\n intercept = est._standard_intercept[i]\n average_coef = est._average_coef[i]\n average_intercept = est._average_intercept[i]\n\n return y_i, coef, intercept, average_coef, average_intercept" + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/fit_binary", + "name": "fit_binary", + "qname": "sklearn.linear_model._stochastic_gradient.fit_binary", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/fit_binary/est", + "name": "est", + "qname": "sklearn.linear_model._stochastic_gradient.fit_binary.est", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Estimator object", + "default_value": "", + "description": "The estimator to fit" + }, + "type": { + "kind": "NamedType", + "name": "Estimator object" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/fit_binary/i", + "name": "i", + "qname": "sklearn.linear_model._stochastic_gradient.fit_binary.i", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Index of the positive class" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/fit_binary/X", + "name": "X", + "qname": "sklearn.linear_model._stochastic_gradient.fit_binary.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "numpy array or sparse matrix of shape [n_samples,n_features]", + "default_value": "", + "description": "Training data" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "numpy array" + }, + { + "kind": "NamedType", + "name": "sparse matrix of shape [n_samples" + }, + { + "kind": "NamedType", + "name": "n_features]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/fit_binary/y", + "name": "y", + "qname": "sklearn.linear_model._stochastic_gradient.fit_binary.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "numpy array of shape [n_samples, ]", + "default_value": "", + "description": "Target values" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "numpy array of shape [n_samples" + }, + { + "kind": "NamedType", + "name": "]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/fit_binary/alpha", + "name": "alpha", + "qname": "sklearn.linear_model._stochastic_gradient.fit_binary.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "The regularization parameter" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/fit_binary/C", + "name": "C", + "qname": "sklearn.linear_model._stochastic_gradient.fit_binary.C", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Maximum step size for passive aggressive" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/fit_binary/learning_rate", + "name": "learning_rate", + "qname": "sklearn.linear_model._stochastic_gradient.fit_binary.learning_rate", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "string", + "default_value": "", + "description": "The learning rate. Accepted values are 'constant', 'optimal',\n'invscaling', 'pa1' and 'pa2'." + }, + "type": { + "kind": "NamedType", + "name": "string" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/fit_binary/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._stochastic_gradient.fit_binary.max_iter", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The maximum number of iterations (epochs)" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/fit_binary/pos_weight", + "name": "pos_weight", + "qname": "sklearn.linear_model._stochastic_gradient.fit_binary.pos_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "The weight of the positive class" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/fit_binary/neg_weight", + "name": "neg_weight", + "qname": "sklearn.linear_model._stochastic_gradient.fit_binary.neg_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "The weight of the negative class" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/fit_binary/sample_weight", + "name": "sample_weight", + "qname": "sklearn.linear_model._stochastic_gradient.fit_binary.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "numpy array of shape [n_samples, ]", + "default_value": "", + "description": "The weight of each sample" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "numpy array of shape [n_samples" + }, + { + "kind": "NamedType", + "name": "]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/fit_binary/validation_mask", + "name": "validation_mask", + "qname": "sklearn.linear_model._stochastic_gradient.fit_binary.validation_mask", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "numpy array of shape [n_samples, ]", + "default_value": "None", + "description": "Precomputed validation mask in case _fit_binary is called in the\ncontext of a one-vs-rest reduction." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "numpy array of shape [n_samples" + }, + { + "kind": "NamedType", + "name": "]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._stochastic_gradient/fit_binary/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._stochastic_gradient.fit_binary.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "If int, random_state is the seed used by the random number generator;\nIf RandomState instance, random_state is the random number generator;\nIf None, the random number generator is the RandomState instance used\nby `np.random`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit a single binary classifier.\n\nThe i'th class is considered the \"positive\" class.", + "docstring": "Fit a single binary classifier.\n\nThe i'th class is considered the \"positive\" class.\n\nParameters\n----------\nest : Estimator object\n The estimator to fit\n\ni : int\n Index of the positive class\n\nX : numpy array or sparse matrix of shape [n_samples,n_features]\n Training data\n\ny : numpy array of shape [n_samples, ]\n Target values\n\nalpha : float\n The regularization parameter\n\nC : float\n Maximum step size for passive aggressive\n\nlearning_rate : string\n The learning rate. Accepted values are 'constant', 'optimal',\n 'invscaling', 'pa1' and 'pa2'.\n\nmax_iter : int\n The maximum number of iterations (epochs)\n\npos_weight : float\n The weight of the positive class\n\nneg_weight : float\n The weight of the negative class\n\nsample_weight : numpy array of shape [n_samples, ]\n The weight of each sample\n\nvalidation_mask : numpy array of shape [n_samples, ], default=None\n Precomputed validation mask in case _fit_binary is called in the\n context of a one-vs-rest reduction.\n\nrandom_state : int, RandomState instance, default=None\n If int, random_state is the seed used by the random number generator;\n If RandomState instance, random_state is the random number generator;\n If None, the random number generator is the RandomState instance used\n by `np.random`.", + "code": "def fit_binary(est, i, X, y, alpha, C, learning_rate, max_iter,\n pos_weight, neg_weight, sample_weight, validation_mask=None,\n random_state=None):\n \"\"\"Fit a single binary classifier.\n\n The i'th class is considered the \"positive\" class.\n\n Parameters\n ----------\n est : Estimator object\n The estimator to fit\n\n i : int\n Index of the positive class\n\n X : numpy array or sparse matrix of shape [n_samples,n_features]\n Training data\n\n y : numpy array of shape [n_samples, ]\n Target values\n\n alpha : float\n The regularization parameter\n\n C : float\n Maximum step size for passive aggressive\n\n learning_rate : string\n The learning rate. Accepted values are 'constant', 'optimal',\n 'invscaling', 'pa1' and 'pa2'.\n\n max_iter : int\n The maximum number of iterations (epochs)\n\n pos_weight : float\n The weight of the positive class\n\n neg_weight : float\n The weight of the negative class\n\n sample_weight : numpy array of shape [n_samples, ]\n The weight of each sample\n\n validation_mask : numpy array of shape [n_samples, ], default=None\n Precomputed validation mask in case _fit_binary is called in the\n context of a one-vs-rest reduction.\n\n random_state : int, RandomState instance, default=None\n If int, random_state is the seed used by the random number generator;\n If RandomState instance, random_state is the random number generator;\n If None, the random number generator is the RandomState instance used\n by `np.random`.\n \"\"\"\n # if average is not true, average_coef, and average_intercept will be\n # unused\n y_i, coef, intercept, average_coef, average_intercept = \\\n _prepare_fit_binary(est, y, i)\n assert y_i.shape[0] == y.shape[0] == sample_weight.shape[0]\n\n random_state = check_random_state(random_state)\n dataset, intercept_decay = make_dataset(\n X, y_i, sample_weight, random_state=random_state)\n\n penalty_type = est._get_penalty_type(est.penalty)\n learning_rate_type = est._get_learning_rate_type(learning_rate)\n\n if validation_mask is None:\n validation_mask = est._make_validation_split(y_i)\n classes = np.array([-1, 1], dtype=y_i.dtype)\n validation_score_cb = est._make_validation_score_cb(\n validation_mask, X, y_i, sample_weight, classes=classes)\n\n # numpy mtrand expects a C long which is a signed 32 bit integer under\n # Windows\n seed = random_state.randint(MAX_INT)\n\n tol = est.tol if est.tol is not None else -np.inf\n\n coef, intercept, average_coef, average_intercept, n_iter_ = _plain_sgd(\n coef, intercept, average_coef, average_intercept, est.loss_function_,\n penalty_type, alpha, C, est.l1_ratio, dataset, validation_mask,\n est.early_stopping, validation_score_cb, int(est.n_iter_no_change),\n max_iter, tol, int(est.fit_intercept), int(est.verbose),\n int(est.shuffle), seed, pos_weight, neg_weight, learning_rate_type,\n est.eta0, est.power_t, est.t_, intercept_decay, est.average)\n\n if est.average:\n if len(est.classes_) == 2:\n est._average_intercept[0] = average_intercept\n else:\n est._average_intercept[i] = average_intercept\n\n return coef, intercept, n_iter_" + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/TheilSenRegressor/__init__", + "name": "__init__", + "qname": "sklearn.linear_model._theil_sen.TheilSenRegressor.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/TheilSenRegressor/__init__/self", + "name": "self", + "qname": "sklearn.linear_model._theil_sen.TheilSenRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/TheilSenRegressor/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._theil_sen.TheilSenRegressor.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to calculate the intercept for this model. If set\nto false, no intercept will be used in calculations." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/TheilSenRegressor/__init__/copy_X", + "name": "copy_X", + "qname": "sklearn.linear_model._theil_sen.TheilSenRegressor.__init__.copy_X", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, X will be copied; else, it may be overwritten." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/TheilSenRegressor/__init__/max_subpopulation", + "name": "max_subpopulation", + "qname": "sklearn.linear_model._theil_sen.TheilSenRegressor.__init__.max_subpopulation", + "default_value": "10000.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1e4", + "description": "Instead of computing with a set of cardinality 'n choose k', where n is\nthe number of samples and k is the number of subsamples (at least\nnumber of features), consider only a stochastic subpopulation of a\ngiven maximal size if 'n choose k' is larger than max_subpopulation.\nFor other than small problem sizes this parameter will determine\nmemory usage and runtime if n_subsamples is not changed." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/TheilSenRegressor/__init__/n_subsamples", + "name": "n_subsamples", + "qname": "sklearn.linear_model._theil_sen.TheilSenRegressor.__init__.n_subsamples", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of samples to calculate the parameters. This is at least the\nnumber of features (plus 1 if fit_intercept=True) and the number of\nsamples as a maximum. A lower number leads to a higher breakdown\npoint and a low efficiency while a high number leads to a low\nbreakdown point and a high efficiency. If None, take the\nminimum number of subsamples leading to maximal robustness.\nIf n_subsamples is set to n_samples, Theil-Sen is identical to least\nsquares." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/TheilSenRegressor/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._theil_sen.TheilSenRegressor.__init__.max_iter", + "default_value": "300", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "300", + "description": "Maximum number of iterations for the calculation of spatial median." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/TheilSenRegressor/__init__/tol", + "name": "tol", + "qname": "sklearn.linear_model._theil_sen.TheilSenRegressor.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.e-3", + "description": "Tolerance when calculating spatial median." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/TheilSenRegressor/__init__/random_state", + "name": "random_state", + "qname": "sklearn.linear_model._theil_sen.TheilSenRegressor.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "A random number generator instance to define the state of the random\npermutations generator. Pass an int for reproducible output across\nmultiple function calls.\nSee :term:`Glossary `" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/TheilSenRegressor/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.linear_model._theil_sen.TheilSenRegressor.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of CPUs to use during the cross validation.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/TheilSenRegressor/__init__/verbose", + "name": "verbose", + "qname": "sklearn.linear_model._theil_sen.TheilSenRegressor.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Verbose mode when fitting the model." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Theil-Sen Estimator: robust multivariate regression model.\n\nThe algorithm calculates least square solutions on subsets with size\nn_subsamples of the samples in X. Any value of n_subsamples between the\nnumber of features and samples leads to an estimator with a compromise\nbetween robustness and efficiency. Since the number of least square\nsolutions is \"n_samples choose n_subsamples\", it can be extremely large\nand can therefore be limited with max_subpopulation. If this limit is\nreached, the subsets are chosen randomly. In a final step, the spatial\nmedian (or L1 median) is calculated of all least square solutions.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, fit_intercept=True, copy_X=True,\n max_subpopulation=1e4, n_subsamples=None, max_iter=300,\n tol=1.e-3, random_state=None, n_jobs=None, verbose=False):\n self.fit_intercept = fit_intercept\n self.copy_X = copy_X\n self.max_subpopulation = int(max_subpopulation)\n self.n_subsamples = n_subsamples\n self.max_iter = max_iter\n self.tol = tol\n self.random_state = random_state\n self.n_jobs = n_jobs\n self.verbose = verbose" + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/TheilSenRegressor/_check_subparams", + "name": "_check_subparams", + "qname": "sklearn.linear_model._theil_sen.TheilSenRegressor._check_subparams", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/TheilSenRegressor/_check_subparams/self", + "name": "self", + "qname": "sklearn.linear_model._theil_sen.TheilSenRegressor._check_subparams.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/TheilSenRegressor/_check_subparams/n_samples", + "name": "n_samples", + "qname": "sklearn.linear_model._theil_sen.TheilSenRegressor._check_subparams.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/TheilSenRegressor/_check_subparams/n_features", + "name": "n_features", + "qname": "sklearn.linear_model._theil_sen.TheilSenRegressor._check_subparams.n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_subparams(self, n_samples, n_features):\n n_subsamples = self.n_subsamples\n\n if self.fit_intercept:\n n_dim = n_features + 1\n else:\n n_dim = n_features\n\n if n_subsamples is not None:\n if n_subsamples > n_samples:\n raise ValueError(\"Invalid parameter since n_subsamples > \"\n \"n_samples ({0} > {1}).\".format(n_subsamples,\n n_samples))\n if n_samples >= n_features:\n if n_dim > n_subsamples:\n plus_1 = \"+1\" if self.fit_intercept else \"\"\n raise ValueError(\"Invalid parameter since n_features{0} \"\n \"> n_subsamples ({1} > {2}).\"\n \"\".format(plus_1, n_dim, n_samples))\n else: # if n_samples < n_features\n if n_subsamples != n_samples:\n raise ValueError(\"Invalid parameter since n_subsamples != \"\n \"n_samples ({0} != {1}) while n_samples \"\n \"< n_features.\".format(n_subsamples,\n n_samples))\n else:\n n_subsamples = min(n_dim, n_samples)\n\n if self.max_subpopulation <= 0:\n raise ValueError(\"Subpopulation must be strictly positive \"\n \"({0} <= 0).\".format(self.max_subpopulation))\n\n all_combinations = max(1, np.rint(binom(n_samples, n_subsamples)))\n n_subpopulation = int(min(self.max_subpopulation, all_combinations))\n\n return n_subsamples, n_subpopulation" + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/TheilSenRegressor/fit", + "name": "fit", + "qname": "sklearn.linear_model._theil_sen.TheilSenRegressor.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/TheilSenRegressor/fit/self", + "name": "self", + "qname": "sklearn.linear_model._theil_sen.TheilSenRegressor.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/TheilSenRegressor/fit/X", + "name": "X", + "qname": "sklearn.linear_model._theil_sen.TheilSenRegressor.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/TheilSenRegressor/fit/y", + "name": "y", + "qname": "sklearn.linear_model._theil_sen.TheilSenRegressor.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit linear model.", + "docstring": "Fit linear model.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Training data.\ny : ndarray of shape (n_samples,)\n Target values.\n\nReturns\n-------\nself : returns an instance of self.", + "code": " def fit(self, X, y):\n \"\"\"Fit linear model.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Training data.\n y : ndarray of shape (n_samples,)\n Target values.\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n random_state = check_random_state(self.random_state)\n X, y = self._validate_data(X, y, y_numeric=True)\n n_samples, n_features = X.shape\n n_subsamples, self.n_subpopulation_ = self._check_subparams(n_samples,\n n_features)\n self.breakdown_ = _breakdown_point(n_samples, n_subsamples)\n\n if self.verbose:\n print(\"Breakdown point: {0}\".format(self.breakdown_))\n print(\"Number of samples: {0}\".format(n_samples))\n tol_outliers = int(self.breakdown_ * n_samples)\n print(\"Tolerable outliers: {0}\".format(tol_outliers))\n print(\"Number of subpopulations: {0}\".format(\n self.n_subpopulation_))\n\n # Determine indices of subpopulation\n if np.rint(binom(n_samples, n_subsamples)) <= self.max_subpopulation:\n indices = list(combinations(range(n_samples), n_subsamples))\n else:\n indices = [random_state.choice(n_samples, size=n_subsamples,\n replace=False)\n for _ in range(self.n_subpopulation_)]\n\n n_jobs = effective_n_jobs(self.n_jobs)\n index_list = np.array_split(indices, n_jobs)\n weights = Parallel(n_jobs=n_jobs,\n verbose=self.verbose)(\n delayed(_lstsq)(X, y, index_list[job], self.fit_intercept)\n for job in range(n_jobs))\n weights = np.vstack(weights)\n self.n_iter_, coefs = _spatial_median(weights,\n max_iter=self.max_iter,\n tol=self.tol)\n\n if self.fit_intercept:\n self.intercept_ = coefs[0]\n self.coef_ = coefs[1:]\n else:\n self.intercept_ = 0.\n self.coef_ = coefs\n\n return self" + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/_breakdown_point", + "name": "_breakdown_point", + "qname": "sklearn.linear_model._theil_sen._breakdown_point", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/_breakdown_point/n_samples", + "name": "n_samples", + "qname": "sklearn.linear_model._theil_sen._breakdown_point.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of samples." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/_breakdown_point/n_subsamples", + "name": "n_subsamples", + "qname": "sklearn.linear_model._theil_sen._breakdown_point.n_subsamples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of subsamples to consider." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Approximation of the breakdown point.", + "docstring": "Approximation of the breakdown point.\n\nParameters\n----------\nn_samples : int\n Number of samples.\n\nn_subsamples : int\n Number of subsamples to consider.\n\nReturns\n-------\nbreakdown_point : float\n Approximation of breakdown point.", + "code": "def _breakdown_point(n_samples, n_subsamples):\n \"\"\"Approximation of the breakdown point.\n\n Parameters\n ----------\n n_samples : int\n Number of samples.\n\n n_subsamples : int\n Number of subsamples to consider.\n\n Returns\n -------\n breakdown_point : float\n Approximation of breakdown point.\n \"\"\"\n return 1 - (0.5 ** (1 / n_subsamples) * (n_samples - n_subsamples + 1) +\n n_subsamples - 1) / n_samples" + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/_lstsq", + "name": "_lstsq", + "qname": "sklearn.linear_model._theil_sen._lstsq", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/_lstsq/X", + "name": "X", + "qname": "sklearn.linear_model._theil_sen._lstsq.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Design matrix, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/_lstsq/y", + "name": "y", + "qname": "sklearn.linear_model._theil_sen._lstsq.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Target vector, where n_samples is the number of samples." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/_lstsq/indices", + "name": "indices", + "qname": "sklearn.linear_model._theil_sen._lstsq.indices", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_subpopulation, n_subsamples)", + "default_value": "", + "description": "Indices of all subsamples with respect to the chosen subpopulation." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_subpopulation, n_subsamples)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/_lstsq/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.linear_model._theil_sen._lstsq.fit_intercept", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "Fit intercept or not." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Least Squares Estimator for TheilSenRegressor class.\n\nThis function calculates the least squares method on a subset of rows of X\nand y defined by the indices array. Optionally, an intercept column is\nadded if intercept is set to true.", + "docstring": "Least Squares Estimator for TheilSenRegressor class.\n\nThis function calculates the least squares method on a subset of rows of X\nand y defined by the indices array. Optionally, an intercept column is\nadded if intercept is set to true.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Design matrix, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : ndarray of shape (n_samples,)\n Target vector, where n_samples is the number of samples.\n\nindices : ndarray of shape (n_subpopulation, n_subsamples)\n Indices of all subsamples with respect to the chosen subpopulation.\n\nfit_intercept : bool\n Fit intercept or not.\n\nReturns\n-------\nweights : ndarray of shape (n_subpopulation, n_features + intercept)\n Solution matrix of n_subpopulation solved least square problems.", + "code": "def _lstsq(X, y, indices, fit_intercept):\n \"\"\"Least Squares Estimator for TheilSenRegressor class.\n\n This function calculates the least squares method on a subset of rows of X\n and y defined by the indices array. Optionally, an intercept column is\n added if intercept is set to true.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Design matrix, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : ndarray of shape (n_samples,)\n Target vector, where n_samples is the number of samples.\n\n indices : ndarray of shape (n_subpopulation, n_subsamples)\n Indices of all subsamples with respect to the chosen subpopulation.\n\n fit_intercept : bool\n Fit intercept or not.\n\n Returns\n -------\n weights : ndarray of shape (n_subpopulation, n_features + intercept)\n Solution matrix of n_subpopulation solved least square problems.\n \"\"\"\n fit_intercept = int(fit_intercept)\n n_features = X.shape[1] + fit_intercept\n n_subsamples = indices.shape[1]\n weights = np.empty((indices.shape[0], n_features))\n X_subpopulation = np.ones((n_subsamples, n_features))\n # gelss need to pad y_subpopulation to be of the max dim of X_subpopulation\n y_subpopulation = np.zeros((max(n_subsamples, n_features)))\n lstsq, = get_lapack_funcs(('gelss',), (X_subpopulation, y_subpopulation))\n\n for index, subset in enumerate(indices):\n X_subpopulation[:, fit_intercept:] = X[subset, :]\n y_subpopulation[:n_subsamples] = y[subset]\n weights[index] = lstsq(X_subpopulation,\n y_subpopulation)[1][:n_features]\n\n return weights" + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/_modified_weiszfeld_step", + "name": "_modified_weiszfeld_step", + "qname": "sklearn.linear_model._theil_sen._modified_weiszfeld_step", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/_modified_weiszfeld_step/X", + "name": "X", + "qname": "sklearn.linear_model._theil_sen._modified_weiszfeld_step.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/_modified_weiszfeld_step/x_old", + "name": "x_old", + "qname": "sklearn.linear_model._theil_sen._modified_weiszfeld_step.x_old", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape = (n_features,)", + "default_value": "", + "description": "Current start vector." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape = (n_features,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Modified Weiszfeld step.\n\nThis function defines one iteration step in order to approximate the\nspatial median (L1 median). It is a form of an iteratively re-weighted\nleast squares method.", + "docstring": "Modified Weiszfeld step.\n\nThis function defines one iteration step in order to approximate the\nspatial median (L1 median). It is a form of an iteratively re-weighted\nleast squares method.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\nx_old : ndarray of shape = (n_features,)\n Current start vector.\n\nReturns\n-------\nx_new : ndarray of shape (n_features,)\n New iteration step.\n\nReferences\n----------\n- On Computation of Spatial Median for Robust Data Mining, 2005\n T. K\u00e4rkk\u00e4inen and S. \u00c4yr\u00e4m\u00f6\n http://users.jyu.fi/~samiayr/pdf/ayramo_eurogen05.pdf", + "code": "def _modified_weiszfeld_step(X, x_old):\n \"\"\"Modified Weiszfeld step.\n\n This function defines one iteration step in order to approximate the\n spatial median (L1 median). It is a form of an iteratively re-weighted\n least squares method.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\n x_old : ndarray of shape = (n_features,)\n Current start vector.\n\n Returns\n -------\n x_new : ndarray of shape (n_features,)\n New iteration step.\n\n References\n ----------\n - On Computation of Spatial Median for Robust Data Mining, 2005\n T. K\u00e4rkk\u00e4inen and S. \u00c4yr\u00e4m\u00f6\n http://users.jyu.fi/~samiayr/pdf/ayramo_eurogen05.pdf\n \"\"\"\n diff = X - x_old\n diff_norm = np.sqrt(np.sum(diff ** 2, axis=1))\n mask = diff_norm >= _EPSILON\n # x_old equals one of our samples\n is_x_old_in_X = int(mask.sum() < X.shape[0])\n\n diff = diff[mask]\n diff_norm = diff_norm[mask][:, np.newaxis]\n quotient_norm = linalg.norm(np.sum(diff / diff_norm, axis=0))\n\n if quotient_norm > _EPSILON: # to avoid division by zero\n new_direction = (np.sum(X[mask, :] / diff_norm, axis=0)\n / np.sum(1 / diff_norm, axis=0))\n else:\n new_direction = 1.\n quotient_norm = 1.\n\n return (max(0., 1. - is_x_old_in_X / quotient_norm) * new_direction\n + min(1., is_x_old_in_X / quotient_norm) * x_old)" + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/_spatial_median", + "name": "_spatial_median", + "qname": "sklearn.linear_model._theil_sen._spatial_median", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/_spatial_median/X", + "name": "X", + "qname": "sklearn.linear_model._theil_sen._spatial_median.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/_spatial_median/max_iter", + "name": "max_iter", + "qname": "sklearn.linear_model._theil_sen._spatial_median.max_iter", + "default_value": "300", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "300", + "description": "Maximum number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.linear_model._theil_sen/_spatial_median/tol", + "name": "tol", + "qname": "sklearn.linear_model._theil_sen._spatial_median.tol", + "default_value": "0.001", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.e-3", + "description": "Stop the algorithm if spatial_median has converged." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Spatial median (L1 median).\n\nThe spatial median is member of a class of so-called M-estimators which\nare defined by an optimization problem. Given a number of p points in an\nn-dimensional space, the point x minimizing the sum of all distances to the\np other points is called spatial median.", + "docstring": "Spatial median (L1 median).\n\nThe spatial median is member of a class of so-called M-estimators which\nare defined by an optimization problem. Given a number of p points in an\nn-dimensional space, the point x minimizing the sum of all distances to the\np other points is called spatial median.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\nmax_iter : int, default=300\n Maximum number of iterations.\n\ntol : float, default=1.e-3\n Stop the algorithm if spatial_median has converged.\n\nReturns\n-------\nspatial_median : ndarray of shape = (n_features,)\n Spatial median.\n\nn_iter : int\n Number of iterations needed.\n\nReferences\n----------\n- On Computation of Spatial Median for Robust Data Mining, 2005\n T. K\u00e4rkk\u00e4inen and S. \u00c4yr\u00e4m\u00f6\n http://users.jyu.fi/~samiayr/pdf/ayramo_eurogen05.pdf", + "code": "def _spatial_median(X, max_iter=300, tol=1.e-3):\n \"\"\"Spatial median (L1 median).\n\n The spatial median is member of a class of so-called M-estimators which\n are defined by an optimization problem. Given a number of p points in an\n n-dimensional space, the point x minimizing the sum of all distances to the\n p other points is called spatial median.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\n max_iter : int, default=300\n Maximum number of iterations.\n\n tol : float, default=1.e-3\n Stop the algorithm if spatial_median has converged.\n\n Returns\n -------\n spatial_median : ndarray of shape = (n_features,)\n Spatial median.\n\n n_iter : int\n Number of iterations needed.\n\n References\n ----------\n - On Computation of Spatial Median for Robust Data Mining, 2005\n T. K\u00e4rkk\u00e4inen and S. \u00c4yr\u00e4m\u00f6\n http://users.jyu.fi/~samiayr/pdf/ayramo_eurogen05.pdf\n \"\"\"\n if X.shape[1] == 1:\n return 1, np.median(X.ravel(), keepdims=True)\n\n tol **= 2 # We are computing the tol on the squared norm\n spatial_median_old = np.mean(X, axis=0)\n\n for n_iter in range(max_iter):\n spatial_median = _modified_weiszfeld_step(X, spatial_median_old)\n if np.sum((spatial_median_old - spatial_median) ** 2) < tol:\n break\n else:\n spatial_median_old = spatial_median\n else:\n warnings.warn(\"Maximum number of iterations {max_iter} reached in \"\n \"spatial median for TheilSen regressor.\"\n \"\".format(max_iter=max_iter), ConvergenceWarning)\n return n_iter, spatial_median" + }, + { + "id": "scikit-learn/sklearn.linear_model.setup/configuration", + "name": "configuration", + "qname": "sklearn.linear_model.setup.configuration", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.linear_model.setup/configuration/parent_package", + "name": "parent_package", + "qname": "sklearn.linear_model.setup.configuration.parent_package", + "default_value": "''", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.linear_model.setup/configuration/top_path", + "name": "top_path", + "qname": "sklearn.linear_model.setup.configuration.top_path", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def configuration(parent_package='', top_path=None):\n from numpy.distutils.misc_util import Configuration\n\n config = Configuration('linear_model', parent_package, top_path)\n\n libraries = []\n if os.name == 'posix':\n libraries.append('m')\n\n config.add_extension('_cd_fast',\n sources=['_cd_fast.pyx'],\n include_dirs=numpy.get_include(),\n libraries=libraries)\n\n config.add_extension('_sgd_fast',\n sources=['_sgd_fast.pyx'],\n include_dirs=numpy.get_include(),\n libraries=libraries)\n\n # generate sag_fast from template\n templates = ['sklearn/linear_model/_sag_fast.pyx.tp']\n gen_from_templates(templates, top_path)\n\n config.add_extension('_sag_fast',\n sources=['_sag_fast.pyx'],\n include_dirs=numpy.get_include())\n\n # add other directories\n config.add_subpackage('tests')\n config.add_subpackage('_glm')\n config.add_subpackage('_glm/tests')\n\n return config" + }, + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/__init__", + "name": "__init__", + "qname": "sklearn.manifold._isomap.Isomap.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/__init__/self", + "name": "self", + "qname": "sklearn.manifold._isomap.Isomap.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/__init__/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.manifold._isomap.Isomap.__init__.n_neighbors", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "number of neighbors to consider for each point." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/__init__/n_components", + "name": "n_components", + "qname": "sklearn.manifold._isomap.Isomap.__init__.n_components", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "number of coordinates for the manifold" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/__init__/eigen_solver", + "name": "eigen_solver", + "qname": "sklearn.manifold._isomap.Isomap.__init__.eigen_solver", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'arpack', 'dense'}", + "default_value": "'auto'", + "description": "'auto' : Attempt to choose the most efficient solver\nfor the given problem.\n\n'arpack' : Use Arnoldi decomposition to find the eigenvalues\nand eigenvectors.\n\n'dense' : Use a direct solver (i.e. LAPACK)\nfor the eigenvalue decomposition." + }, + "type": { + "kind": "EnumType", + "values": ["dense", "auto", "arpack"] + } + }, + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/__init__/tol", + "name": "tol", + "qname": "sklearn.manifold._isomap.Isomap.__init__.tol", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0", + "description": "Convergence tolerance passed to arpack or lobpcg.\nnot used if eigen_solver == 'dense'." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.manifold._isomap.Isomap.__init__.max_iter", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Maximum number of iterations for the arpack solver.\nnot used if eigen_solver == 'dense'." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/__init__/path_method", + "name": "path_method", + "qname": "sklearn.manifold._isomap.Isomap.__init__.path_method", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'FW', 'D'}", + "default_value": "'auto'", + "description": "Method to use in finding shortest path.\n\n'auto' : attempt to choose the best algorithm automatically.\n\n'FW' : Floyd-Warshall algorithm.\n\n'D' : Dijkstra's algorithm." + }, + "type": { + "kind": "EnumType", + "values": ["auto", "D", "FW"] + } + }, + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/__init__/neighbors_algorithm", + "name": "neighbors_algorithm", + "qname": "sklearn.manifold._isomap.Isomap.__init__.neighbors_algorithm", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'brute', 'kd_tree', 'ball_tree'}", + "default_value": "'auto'", + "description": "Algorithm to use for nearest neighbors search,\npassed to neighbors.NearestNeighbors instance." + }, + "type": { + "kind": "EnumType", + "values": ["kd_tree", "auto", "brute", "ball_tree"] + } + }, + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.manifold._isomap.Isomap.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or None", + "default_value": "None", + "description": "The number of parallel jobs to run.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/__init__/metric", + "name": "metric", + "qname": "sklearn.manifold._isomap.Isomap.__init__.metric", + "default_value": "'minkowski'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "string, or callable", + "default_value": "\"minkowski\"", + "description": "The metric to use when calculating distance between instances in a\nfeature array. If metric is a string or callable, it must be one of\nthe options allowed by :func:`sklearn.metrics.pairwise_distances` for\nits metric parameter.\nIf metric is \"precomputed\", X is assumed to be a distance matrix and\nmust be square. X may be a :term:`Glossary `.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "string" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/__init__/p", + "name": "p", + "qname": "sklearn.manifold._isomap.Isomap.__init__.p", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Parameter for the Minkowski metric from\nsklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\nequivalent to using manhattan_distance (l1), and euclidean_distance\n(l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/__init__/metric_params", + "name": "metric_params", + "qname": "sklearn.manifold._isomap.Isomap.__init__.metric_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Additional keyword arguments for the metric function.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Isomap Embedding\n\nNon-linear dimensionality reduction through Isometric Mapping\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, n_neighbors=5, n_components=2, eigen_solver='auto',\n tol=0, max_iter=None, path_method='auto',\n neighbors_algorithm='auto', n_jobs=None, metric='minkowski',\n p=2, metric_params=None):\n self.n_neighbors = n_neighbors\n self.n_components = n_components\n self.eigen_solver = eigen_solver\n self.tol = tol\n self.max_iter = max_iter\n self.path_method = path_method\n self.neighbors_algorithm = neighbors_algorithm\n self.n_jobs = n_jobs\n self.metric = metric\n self.p = p\n self.metric_params = metric_params" + }, + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/_fit_transform", + "name": "_fit_transform", + "qname": "sklearn.manifold._isomap.Isomap._fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/_fit_transform/self", + "name": "self", + "qname": "sklearn.manifold._isomap.Isomap._fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/_fit_transform/X", + "name": "X", + "qname": "sklearn.manifold._isomap.Isomap._fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _fit_transform(self, X):\n self.nbrs_ = NearestNeighbors(n_neighbors=self.n_neighbors,\n algorithm=self.neighbors_algorithm,\n metric=self.metric, p=self.p,\n metric_params=self.metric_params,\n n_jobs=self.n_jobs)\n self.nbrs_.fit(X)\n self.n_features_in_ = self.nbrs_.n_features_in_\n\n self.kernel_pca_ = KernelPCA(n_components=self.n_components,\n kernel=\"precomputed\",\n eigen_solver=self.eigen_solver,\n tol=self.tol, max_iter=self.max_iter,\n n_jobs=self.n_jobs)\n\n kng = kneighbors_graph(self.nbrs_, self.n_neighbors,\n metric=self.metric, p=self.p,\n metric_params=self.metric_params,\n mode='distance', n_jobs=self.n_jobs)\n\n self.dist_matrix_ = graph_shortest_path(kng,\n method=self.path_method,\n directed=False)\n G = self.dist_matrix_ ** 2\n G *= -0.5\n\n self.embedding_ = self.kernel_pca_.fit_transform(G)" + }, + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/fit", + "name": "fit", + "qname": "sklearn.manifold._isomap.Isomap.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/fit/self", + "name": "self", + "qname": "sklearn.manifold._isomap.Isomap.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/fit/X", + "name": "X", + "qname": "sklearn.manifold._isomap.Isomap.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse graph, BallTree, KDTree, NearestNeighbors}", + "default_value": "", + "description": "Sample data, shape = (n_samples, n_features), in the form of a\nnumpy array, sparse graph, precomputed tree, or NearestNeighbors\nobject." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/fit/y", + "name": "y", + "qname": "sklearn.manifold._isomap.Isomap.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the embedding vectors for data X", + "docstring": "Compute the embedding vectors for data X\n\nParameters\n----------\nX : {array-like, sparse graph, BallTree, KDTree, NearestNeighbors}\n Sample data, shape = (n_samples, n_features), in the form of a\n numpy array, sparse graph, precomputed tree, or NearestNeighbors\n object.\n\ny : Ignored\n\nReturns\n-------\nself : returns an instance of self.", + "code": " def fit(self, X, y=None):\n \"\"\"Compute the embedding vectors for data X\n\n Parameters\n ----------\n X : {array-like, sparse graph, BallTree, KDTree, NearestNeighbors}\n Sample data, shape = (n_samples, n_features), in the form of a\n numpy array, sparse graph, precomputed tree, or NearestNeighbors\n object.\n\n y : Ignored\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n self._fit_transform(X)\n return self" + }, + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/fit_transform", + "name": "fit_transform", + "qname": "sklearn.manifold._isomap.Isomap.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/fit_transform/self", + "name": "self", + "qname": "sklearn.manifold._isomap.Isomap.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/fit_transform/X", + "name": "X", + "qname": "sklearn.manifold._isomap.Isomap.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse graph, BallTree, KDTree}", + "default_value": "", + "description": "Training vector, where n_samples in the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/fit_transform/y", + "name": "y", + "qname": "sklearn.manifold._isomap.Isomap.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model from data in X and transform X.", + "docstring": "Fit the model from data in X and transform X.\n\nParameters\n----------\nX : {array-like, sparse graph, BallTree, KDTree}\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\ny : Ignored\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)", + "code": " def fit_transform(self, X, y=None):\n \"\"\"Fit the model from data in X and transform X.\n\n Parameters\n ----------\n X : {array-like, sparse graph, BallTree, KDTree}\n Training vector, where n_samples in the number of samples\n and n_features is the number of features.\n\n y : Ignored\n\n Returns\n -------\n X_new : array-like, shape (n_samples, n_components)\n \"\"\"\n self._fit_transform(X)\n return self.embedding_" + }, + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/reconstruction_error", + "name": "reconstruction_error", + "qname": "sklearn.manifold._isomap.Isomap.reconstruction_error", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/reconstruction_error/self", + "name": "self", + "qname": "sklearn.manifold._isomap.Isomap.reconstruction_error.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the reconstruction error for the embedding.", + "docstring": "Compute the reconstruction error for the embedding.\n\nReturns\n-------\nreconstruction_error : float\n\nNotes\n-----\nThe cost function of an isomap embedding is\n\n``E = frobenius_norm[K(D) - K(D_fit)] / n_samples``\n\nWhere D is the matrix of distances for the input data X,\nD_fit is the matrix of distances for the output embedding X_fit,\nand K is the isomap kernel:\n\n``K(D) = -0.5 * (I - 1/n_samples) * D^2 * (I - 1/n_samples)``", + "code": " def reconstruction_error(self):\n \"\"\"Compute the reconstruction error for the embedding.\n\n Returns\n -------\n reconstruction_error : float\n\n Notes\n -----\n The cost function of an isomap embedding is\n\n ``E = frobenius_norm[K(D) - K(D_fit)] / n_samples``\n\n Where D is the matrix of distances for the input data X,\n D_fit is the matrix of distances for the output embedding X_fit,\n and K is the isomap kernel:\n\n ``K(D) = -0.5 * (I - 1/n_samples) * D^2 * (I - 1/n_samples)``\n \"\"\"\n G = -0.5 * self.dist_matrix_ ** 2\n G_center = KernelCenterer().fit_transform(G)\n evals = self.kernel_pca_.lambdas_\n return np.sqrt(np.sum(G_center ** 2) - np.sum(evals ** 2)) / G.shape[0]" + }, + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/transform", + "name": "transform", + "qname": "sklearn.manifold._isomap.Isomap.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/transform/self", + "name": "self", + "qname": "sklearn.manifold._isomap.Isomap.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._isomap/Isomap/transform/X", + "name": "X", + "qname": "sklearn.manifold._isomap.Isomap.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_queries, n_features)", + "default_value": "", + "description": "If neighbors_algorithm='precomputed', X is assumed to be a\ndistance matrix or a sparse graph of shape\n(n_queries, n_samples_fit)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_queries, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform X.\n\nThis is implemented by linking the points X into the graph of geodesic\ndistances of the training data. First the `n_neighbors` nearest\nneighbors of X are found in the training data, and from these the\nshortest geodesic distances from each point in X to each point in\nthe training data are computed in order to construct the kernel.\nThe embedding of X is the projection of this kernel onto the\nembedding vectors of the training set.", + "docstring": "Transform X.\n\nThis is implemented by linking the points X into the graph of geodesic\ndistances of the training data. First the `n_neighbors` nearest\nneighbors of X are found in the training data, and from these the\nshortest geodesic distances from each point in X to each point in\nthe training data are computed in order to construct the kernel.\nThe embedding of X is the projection of this kernel onto the\nembedding vectors of the training set.\n\nParameters\n----------\nX : array-like, shape (n_queries, n_features)\n If neighbors_algorithm='precomputed', X is assumed to be a\n distance matrix or a sparse graph of shape\n (n_queries, n_samples_fit).\n\nReturns\n-------\nX_new : array-like, shape (n_queries, n_components)", + "code": " def transform(self, X):\n \"\"\"Transform X.\n\n This is implemented by linking the points X into the graph of geodesic\n distances of the training data. First the `n_neighbors` nearest\n neighbors of X are found in the training data, and from these the\n shortest geodesic distances from each point in X to each point in\n the training data are computed in order to construct the kernel.\n The embedding of X is the projection of this kernel onto the\n embedding vectors of the training set.\n\n Parameters\n ----------\n X : array-like, shape (n_queries, n_features)\n If neighbors_algorithm='precomputed', X is assumed to be a\n distance matrix or a sparse graph of shape\n (n_queries, n_samples_fit).\n\n Returns\n -------\n X_new : array-like, shape (n_queries, n_components)\n \"\"\"\n check_is_fitted(self)\n distances, indices = self.nbrs_.kneighbors(X, return_distance=True)\n\n # Create the graph of shortest distances from X to\n # training data via the nearest neighbors of X.\n # This can be done as a single array operation, but it potentially\n # takes a lot of memory. To avoid that, use a loop:\n\n n_samples_fit = self.nbrs_.n_samples_fit_\n n_queries = distances.shape[0]\n G_X = np.zeros((n_queries, n_samples_fit))\n for i in range(n_queries):\n G_X[i] = np.min(self.dist_matrix_[indices[i]] +\n distances[i][:, None], 0)\n\n G_X **= 2\n G_X *= -0.5\n\n return self.kernel_pca_.transform(G_X)" + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/__init__", + "name": "__init__", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/__init__/self", + "name": "self", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/__init__/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.__init__.n_neighbors", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "number of neighbors to consider for each point." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/__init__/n_components", + "name": "n_components", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.__init__.n_components", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "number of coordinates for the manifold" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/__init__/reg", + "name": "reg", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.__init__.reg", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "regularization constant, multiplies the trace of the local covariance\nmatrix of the distances." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/__init__/eigen_solver", + "name": "eigen_solver", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.__init__.eigen_solver", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'arpack', 'dense'}", + "default_value": "'auto'", + "description": "auto : algorithm will attempt to choose the best method for input data\n\narpack : use arnoldi iteration in shift-invert mode.\n For this method, M may be a dense matrix, sparse matrix,\n or general linear operator.\n Warning: ARPACK can be unstable for some problems. It is\n best to try several random seeds in order to check results.\n\ndense : use standard dense matrix operations for the eigenvalue\n decomposition. For this method, M must be an array\n or matrix type. This method should be avoided for\n large problems." + }, + "type": { + "kind": "EnumType", + "values": ["dense", "auto", "arpack"] + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/__init__/tol", + "name": "tol", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.__init__.tol", + "default_value": "1e-06", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-6", + "description": "Tolerance for 'arpack' method\nNot used if eigen_solver=='dense'." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.__init__.max_iter", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "maximum number of iterations for the arpack solver.\nNot used if eigen_solver=='dense'." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/__init__/method", + "name": "method", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.__init__.method", + "default_value": "'standard'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'standard', 'hessian', 'modified', 'ltsa'}", + "default_value": "'standard'", + "description": "- `standard`: use the standard locally linear embedding algorithm. see\n reference [1]_\n- `hessian`: use the Hessian eigenmap method. This method requires\n ``n_neighbors > n_components * (1 + (n_components + 1) / 2``. see\n reference [2]_\n- `modified`: use the modified locally linear embedding algorithm.\n see reference [3]_\n- `ltsa`: use local tangent space alignment algorithm. see\n reference [4]_" + }, + "type": { + "kind": "EnumType", + "values": ["modified", "standard", "ltsa", "hessian"] + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/__init__/hessian_tol", + "name": "hessian_tol", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.__init__.hessian_tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Tolerance for Hessian eigenmapping method.\nOnly used if ``method == 'hessian'``" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/__init__/modified_tol", + "name": "modified_tol", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.__init__.modified_tol", + "default_value": "1e-12", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-12", + "description": "Tolerance for modified LLE method.\nOnly used if ``method == 'modified'``" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/__init__/neighbors_algorithm", + "name": "neighbors_algorithm", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.__init__.neighbors_algorithm", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'brute', 'kd_tree', 'ball_tree'}", + "default_value": "'auto'", + "description": "algorithm to use for nearest neighbors search,\npassed to neighbors.NearestNeighbors instance" + }, + "type": { + "kind": "EnumType", + "values": ["kd_tree", "auto", "brute", "ball_tree"] + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/__init__/random_state", + "name": "random_state", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "Determines the random number generator when\n``eigen_solver`` == 'arpack'. Pass an int for reproducible results\nacross multiple function calls. See :term: `Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or None", + "default_value": "None", + "description": "The number of parallel jobs to run.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Locally Linear Embedding\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, n_neighbors=5, n_components=2, reg=1E-3,\n eigen_solver='auto', tol=1E-6, max_iter=100,\n method='standard', hessian_tol=1E-4, modified_tol=1E-12,\n neighbors_algorithm='auto', random_state=None, n_jobs=None):\n self.n_neighbors = n_neighbors\n self.n_components = n_components\n self.reg = reg\n self.eigen_solver = eigen_solver\n self.tol = tol\n self.max_iter = max_iter\n self.method = method\n self.hessian_tol = hessian_tol\n self.modified_tol = modified_tol\n self.random_state = random_state\n self.neighbors_algorithm = neighbors_algorithm\n self.n_jobs = n_jobs" + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/_fit_transform", + "name": "_fit_transform", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding._fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/_fit_transform/self", + "name": "self", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding._fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/_fit_transform/X", + "name": "X", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding._fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _fit_transform(self, X):\n self.nbrs_ = NearestNeighbors(n_neighbors=self.n_neighbors,\n algorithm=self.neighbors_algorithm,\n n_jobs=self.n_jobs)\n\n random_state = check_random_state(self.random_state)\n X = self._validate_data(X, dtype=float)\n self.nbrs_.fit(X)\n self.embedding_, self.reconstruction_error_ = \\\n locally_linear_embedding(\n X=self.nbrs_, n_neighbors=self.n_neighbors,\n n_components=self.n_components,\n eigen_solver=self.eigen_solver, tol=self.tol,\n max_iter=self.max_iter, method=self.method,\n hessian_tol=self.hessian_tol, modified_tol=self.modified_tol,\n random_state=random_state, reg=self.reg, n_jobs=self.n_jobs)" + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/fit", + "name": "fit", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/fit/self", + "name": "self", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/fit/X", + "name": "X", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape [n_samples, n_features]", + "default_value": "", + "description": "training set." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape [n_samples" + }, + { + "kind": "NamedType", + "name": "n_features]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/fit/y", + "name": "y", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the embedding vectors for data X", + "docstring": "Compute the embedding vectors for data X\n\nParameters\n----------\nX : array-like of shape [n_samples, n_features]\n training set.\n\ny : Ignored\n\nReturns\n-------\nself : returns an instance of self.", + "code": " def fit(self, X, y=None):\n \"\"\"Compute the embedding vectors for data X\n\n Parameters\n ----------\n X : array-like of shape [n_samples, n_features]\n training set.\n\n y : Ignored\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n self._fit_transform(X)\n return self" + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/fit_transform", + "name": "fit_transform", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/fit_transform/self", + "name": "self", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/fit_transform/X", + "name": "X", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape [n_samples, n_features]", + "default_value": "", + "description": "training set." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape [n_samples" + }, + { + "kind": "NamedType", + "name": "n_features]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/fit_transform/y", + "name": "y", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the embedding vectors for data X and transform X.", + "docstring": "Compute the embedding vectors for data X and transform X.\n\nParameters\n----------\nX : array-like of shape [n_samples, n_features]\n training set.\n\ny : Ignored\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)", + "code": " def fit_transform(self, X, y=None):\n \"\"\"Compute the embedding vectors for data X and transform X.\n\n Parameters\n ----------\n X : array-like of shape [n_samples, n_features]\n training set.\n\n y : Ignored\n\n Returns\n -------\n X_new : array-like, shape (n_samples, n_components)\n \"\"\"\n self._fit_transform(X)\n return self.embedding_" + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/transform", + "name": "transform", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/transform/self", + "name": "self", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/LocallyLinearEmbedding/transform/X", + "name": "X", + "qname": "sklearn.manifold._locally_linear.LocallyLinearEmbedding.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform new points into embedding space.", + "docstring": "Transform new points into embedding space.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nX_new : array, shape = [n_samples, n_components]\n\nNotes\n-----\nBecause of scaling performed by this method, it is discouraged to use\nit together with methods that are not scale-invariant (like SVMs)", + "code": " def transform(self, X):\n \"\"\"\n Transform new points into embedding space.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n X_new : array, shape = [n_samples, n_components]\n\n Notes\n -----\n Because of scaling performed by this method, it is discouraged to use\n it together with methods that are not scale-invariant (like SVMs)\n \"\"\"\n check_is_fitted(self)\n\n X = check_array(X)\n ind = self.nbrs_.kneighbors(X, n_neighbors=self.n_neighbors,\n return_distance=False)\n weights = barycenter_weights(X, self.nbrs_._fit_X, ind, reg=self.reg)\n X_new = np.empty((X.shape[0], self.n_components))\n for i in range(X.shape[0]):\n X_new[i] = np.dot(self.embedding_[ind[i]].T, weights[i])\n return X_new" + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/barycenter_kneighbors_graph", + "name": "barycenter_kneighbors_graph", + "qname": "sklearn.manifold._locally_linear.barycenter_kneighbors_graph", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._locally_linear/barycenter_kneighbors_graph/X", + "name": "X", + "qname": "sklearn.manifold._locally_linear.barycenter_kneighbors_graph.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, NearestNeighbors}", + "default_value": "", + "description": "Sample data, shape = (n_samples, n_features), in the form of a\nnumpy array or a NearestNeighbors object." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/barycenter_kneighbors_graph/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.manifold._locally_linear.barycenter_kneighbors_graph.n_neighbors", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of neighbors for each sample." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/barycenter_kneighbors_graph/reg", + "name": "reg", + "qname": "sklearn.manifold._locally_linear.barycenter_kneighbors_graph.reg", + "default_value": "0.001", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "Amount of regularization when solving the least-squares\nproblem. Only relevant if mode='barycenter'. If None, use the\ndefault." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/barycenter_kneighbors_graph/n_jobs", + "name": "n_jobs", + "qname": "sklearn.manifold._locally_linear.barycenter_kneighbors_graph.n_jobs", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or None", + "default_value": "None", + "description": "The number of parallel jobs to run for neighbors search.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes the barycenter weighted graph of k-Neighbors for points in X", + "docstring": "Computes the barycenter weighted graph of k-Neighbors for points in X\n\nParameters\n----------\nX : {array-like, NearestNeighbors}\n Sample data, shape = (n_samples, n_features), in the form of a\n numpy array or a NearestNeighbors object.\n\nn_neighbors : int\n Number of neighbors for each sample.\n\nreg : float, default=1e-3\n Amount of regularization when solving the least-squares\n problem. Only relevant if mode='barycenter'. If None, use the\n default.\n\nn_jobs : int or None, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nReturns\n-------\nA : sparse matrix in CSR format, shape = [n_samples, n_samples]\n A[i, j] is assigned the weight of edge that connects i to j.\n\nSee Also\n--------\nsklearn.neighbors.kneighbors_graph\nsklearn.neighbors.radius_neighbors_graph", + "code": "def barycenter_kneighbors_graph(X, n_neighbors, reg=1e-3, n_jobs=None):\n \"\"\"Computes the barycenter weighted graph of k-Neighbors for points in X\n\n Parameters\n ----------\n X : {array-like, NearestNeighbors}\n Sample data, shape = (n_samples, n_features), in the form of a\n numpy array or a NearestNeighbors object.\n\n n_neighbors : int\n Number of neighbors for each sample.\n\n reg : float, default=1e-3\n Amount of regularization when solving the least-squares\n problem. Only relevant if mode='barycenter'. If None, use the\n default.\n\n n_jobs : int or None, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n Returns\n -------\n A : sparse matrix in CSR format, shape = [n_samples, n_samples]\n A[i, j] is assigned the weight of edge that connects i to j.\n\n See Also\n --------\n sklearn.neighbors.kneighbors_graph\n sklearn.neighbors.radius_neighbors_graph\n \"\"\"\n knn = NearestNeighbors(n_neighbors=n_neighbors + 1, n_jobs=n_jobs).fit(X)\n X = knn._fit_X\n n_samples = knn.n_samples_fit_\n ind = knn.kneighbors(X, return_distance=False)[:, 1:]\n data = barycenter_weights(X, X, ind, reg=reg)\n indptr = np.arange(0, n_samples * n_neighbors + 1, n_neighbors)\n return csr_matrix((data.ravel(), ind.ravel(), indptr),\n shape=(n_samples, n_samples))" + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/barycenter_weights", + "name": "barycenter_weights", + "qname": "sklearn.manifold._locally_linear.barycenter_weights", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._locally_linear/barycenter_weights/X", + "name": "X", + "qname": "sklearn.manifold._locally_linear.barycenter_weights.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples, n_dim)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_dim)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/barycenter_weights/Y", + "name": "Y", + "qname": "sklearn.manifold._locally_linear.barycenter_weights.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples, n_dim)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_dim)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/barycenter_weights/indices", + "name": "indices", + "qname": "sklearn.manifold._locally_linear.barycenter_weights.indices", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples, n_dim)", + "default_value": "", + "description": "Indices of the points in Y used to compute the barycenter" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_dim)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/barycenter_weights/reg", + "name": "reg", + "qname": "sklearn.manifold._locally_linear.barycenter_weights.reg", + "default_value": "0.001", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "amount of regularization to add for the problem to be\nwell-posed in the case of n_neighbors > n_dim" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute barycenter weights of X from Y along the first axis\n\nWe estimate the weights to assign to each point in Y[indices] to recover\nthe point X[i]. The barycenter weights sum to 1.", + "docstring": "Compute barycenter weights of X from Y along the first axis\n\nWe estimate the weights to assign to each point in Y[indices] to recover\nthe point X[i]. The barycenter weights sum to 1.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_dim)\n\nY : array-like, shape (n_samples, n_dim)\n\nindices : array-like, shape (n_samples, n_dim)\n Indices of the points in Y used to compute the barycenter\n\nreg : float, default=1e-3\n amount of regularization to add for the problem to be\n well-posed in the case of n_neighbors > n_dim\n\nReturns\n-------\nB : array-like, shape (n_samples, n_neighbors)\n\nNotes\n-----\nSee developers note for more information.", + "code": "def barycenter_weights(X, Y, indices, reg=1e-3):\n \"\"\"Compute barycenter weights of X from Y along the first axis\n\n We estimate the weights to assign to each point in Y[indices] to recover\n the point X[i]. The barycenter weights sum to 1.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_dim)\n\n Y : array-like, shape (n_samples, n_dim)\n\n indices : array-like, shape (n_samples, n_dim)\n Indices of the points in Y used to compute the barycenter\n\n reg : float, default=1e-3\n amount of regularization to add for the problem to be\n well-posed in the case of n_neighbors > n_dim\n\n Returns\n -------\n B : array-like, shape (n_samples, n_neighbors)\n\n Notes\n -----\n See developers note for more information.\n \"\"\"\n X = check_array(X, dtype=FLOAT_DTYPES)\n Y = check_array(Y, dtype=FLOAT_DTYPES)\n indices = check_array(indices, dtype=int)\n\n n_samples, n_neighbors = indices.shape\n assert X.shape[0] == n_samples\n\n B = np.empty((n_samples, n_neighbors), dtype=X.dtype)\n v = np.ones(n_neighbors, dtype=X.dtype)\n\n # this might raise a LinalgError if G is singular and has trace\n # zero\n for i, ind in enumerate(indices):\n A = Y[ind]\n C = A - X[i] # broadcasting\n G = np.dot(C, C.T)\n trace = np.trace(G)\n if trace > 0:\n R = reg * trace\n else:\n R = reg\n G.flat[::n_neighbors + 1] += R\n w = solve(G, v, sym_pos=True)\n B[i, :] = w / np.sum(w)\n return B" + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/locally_linear_embedding", + "name": "locally_linear_embedding", + "qname": "sklearn.manifold._locally_linear.locally_linear_embedding", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._locally_linear/locally_linear_embedding/X", + "name": "X", + "qname": "sklearn.manifold._locally_linear.locally_linear_embedding.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, NearestNeighbors}", + "default_value": "", + "description": "Sample data, shape = (n_samples, n_features), in the form of a\nnumpy array or a NearestNeighbors object." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/locally_linear_embedding/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.manifold._locally_linear.locally_linear_embedding.n_neighbors", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "number of neighbors to consider for each point." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/locally_linear_embedding/n_components", + "name": "n_components", + "qname": "sklearn.manifold._locally_linear.locally_linear_embedding.n_components", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "number of coordinates for the manifold." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/locally_linear_embedding/reg", + "name": "reg", + "qname": "sklearn.manifold._locally_linear.locally_linear_embedding.reg", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "regularization constant, multiplies the trace of the local covariance\nmatrix of the distances." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/locally_linear_embedding/eigen_solver", + "name": "eigen_solver", + "qname": "sklearn.manifold._locally_linear.locally_linear_embedding.eigen_solver", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'arpack', 'dense'}", + "default_value": "'auto'", + "description": "auto : algorithm will attempt to choose the best method for input data\n\narpack : use arnoldi iteration in shift-invert mode.\n For this method, M may be a dense matrix, sparse matrix,\n or general linear operator.\n Warning: ARPACK can be unstable for some problems. It is\n best to try several random seeds in order to check results.\n\ndense : use standard dense matrix operations for the eigenvalue\n decomposition. For this method, M must be an array\n or matrix type. This method should be avoided for\n large problems." + }, + "type": { + "kind": "EnumType", + "values": ["dense", "auto", "arpack"] + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/locally_linear_embedding/tol", + "name": "tol", + "qname": "sklearn.manifold._locally_linear.locally_linear_embedding.tol", + "default_value": "1e-06", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-6", + "description": "Tolerance for 'arpack' method\nNot used if eigen_solver=='dense'." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/locally_linear_embedding/max_iter", + "name": "max_iter", + "qname": "sklearn.manifold._locally_linear.locally_linear_embedding.max_iter", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "maximum number of iterations for the arpack solver." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/locally_linear_embedding/method", + "name": "method", + "qname": "sklearn.manifold._locally_linear.locally_linear_embedding.method", + "default_value": "'standard'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'standard', 'hessian', 'modified', 'ltsa'}", + "default_value": "'standard'", + "description": "standard : use the standard locally linear embedding algorithm.\n see reference [1]_\nhessian : use the Hessian eigenmap method. This method requires\n n_neighbors > n_components * (1 + (n_components + 1) / 2.\n see reference [2]_\nmodified : use the modified locally linear embedding algorithm.\n see reference [3]_\nltsa : use local tangent space alignment algorithm\n see reference [4]_" + }, + "type": { + "kind": "EnumType", + "values": ["modified", "standard", "ltsa", "hessian"] + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/locally_linear_embedding/hessian_tol", + "name": "hessian_tol", + "qname": "sklearn.manifold._locally_linear.locally_linear_embedding.hessian_tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Tolerance for Hessian eigenmapping method.\nOnly used if method == 'hessian'" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/locally_linear_embedding/modified_tol", + "name": "modified_tol", + "qname": "sklearn.manifold._locally_linear.locally_linear_embedding.modified_tol", + "default_value": "1e-12", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-12", + "description": "Tolerance for modified LLE method.\nOnly used if method == 'modified'" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/locally_linear_embedding/random_state", + "name": "random_state", + "qname": "sklearn.manifold._locally_linear.locally_linear_embedding.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "Determines the random number generator when ``solver`` == 'arpack'.\nPass an int for reproducible results across multiple function calls.\nSee :term: `Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/locally_linear_embedding/n_jobs", + "name": "n_jobs", + "qname": "sklearn.manifold._locally_linear.locally_linear_embedding.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or None", + "default_value": "None", + "description": "The number of parallel jobs to run for neighbors search.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform a Locally Linear Embedding analysis on the data.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Perform a Locally Linear Embedding analysis on the data.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, NearestNeighbors}\n Sample data, shape = (n_samples, n_features), in the form of a\n numpy array or a NearestNeighbors object.\n\nn_neighbors : int\n number of neighbors to consider for each point.\n\nn_components : int\n number of coordinates for the manifold.\n\nreg : float, default=1e-3\n regularization constant, multiplies the trace of the local covariance\n matrix of the distances.\n\neigen_solver : {'auto', 'arpack', 'dense'}, default='auto'\n auto : algorithm will attempt to choose the best method for input data\n\n arpack : use arnoldi iteration in shift-invert mode.\n For this method, M may be a dense matrix, sparse matrix,\n or general linear operator.\n Warning: ARPACK can be unstable for some problems. It is\n best to try several random seeds in order to check results.\n\n dense : use standard dense matrix operations for the eigenvalue\n decomposition. For this method, M must be an array\n or matrix type. This method should be avoided for\n large problems.\n\ntol : float, default=1e-6\n Tolerance for 'arpack' method\n Not used if eigen_solver=='dense'.\n\nmax_iter : int, default=100\n maximum number of iterations for the arpack solver.\n\nmethod : {'standard', 'hessian', 'modified', 'ltsa'}, default='standard'\n standard : use the standard locally linear embedding algorithm.\n see reference [1]_\n hessian : use the Hessian eigenmap method. This method requires\n n_neighbors > n_components * (1 + (n_components + 1) / 2.\n see reference [2]_\n modified : use the modified locally linear embedding algorithm.\n see reference [3]_\n ltsa : use local tangent space alignment algorithm\n see reference [4]_\n\nhessian_tol : float, default=1e-4\n Tolerance for Hessian eigenmapping method.\n Only used if method == 'hessian'\n\nmodified_tol : float, default=1e-12\n Tolerance for modified LLE method.\n Only used if method == 'modified'\n\nrandom_state : int, RandomState instance, default=None\n Determines the random number generator when ``solver`` == 'arpack'.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nn_jobs : int or None, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nReturns\n-------\nY : array-like, shape [n_samples, n_components]\n Embedding vectors.\n\nsquared_error : float\n Reconstruction error for the embedding vectors. Equivalent to\n ``norm(Y - W Y, 'fro')**2``, where W are the reconstruction weights.\n\nReferences\n----------\n\n.. [1] Roweis, S. & Saul, L. Nonlinear dimensionality reduction\n by locally linear embedding. Science 290:2323 (2000).\n.. [2] Donoho, D. & Grimes, C. Hessian eigenmaps: Locally\n linear embedding techniques for high-dimensional data.\n Proc Natl Acad Sci U S A. 100:5591 (2003).\n.. [3] Zhang, Z. & Wang, J. MLLE: Modified Locally Linear\n Embedding Using Multiple Weights.\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.70.382\n.. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear\n dimensionality reduction via tangent space alignment.\n Journal of Shanghai Univ. 8:406 (2004)", + "code": "@_deprecate_positional_args\ndef locally_linear_embedding(\n X, *, n_neighbors, n_components, reg=1e-3, eigen_solver='auto',\n tol=1e-6, max_iter=100, method='standard', hessian_tol=1E-4,\n modified_tol=1E-12, random_state=None, n_jobs=None):\n \"\"\"Perform a Locally Linear Embedding analysis on the data.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : {array-like, NearestNeighbors}\n Sample data, shape = (n_samples, n_features), in the form of a\n numpy array or a NearestNeighbors object.\n\n n_neighbors : int\n number of neighbors to consider for each point.\n\n n_components : int\n number of coordinates for the manifold.\n\n reg : float, default=1e-3\n regularization constant, multiplies the trace of the local covariance\n matrix of the distances.\n\n eigen_solver : {'auto', 'arpack', 'dense'}, default='auto'\n auto : algorithm will attempt to choose the best method for input data\n\n arpack : use arnoldi iteration in shift-invert mode.\n For this method, M may be a dense matrix, sparse matrix,\n or general linear operator.\n Warning: ARPACK can be unstable for some problems. It is\n best to try several random seeds in order to check results.\n\n dense : use standard dense matrix operations for the eigenvalue\n decomposition. For this method, M must be an array\n or matrix type. This method should be avoided for\n large problems.\n\n tol : float, default=1e-6\n Tolerance for 'arpack' method\n Not used if eigen_solver=='dense'.\n\n max_iter : int, default=100\n maximum number of iterations for the arpack solver.\n\n method : {'standard', 'hessian', 'modified', 'ltsa'}, default='standard'\n standard : use the standard locally linear embedding algorithm.\n see reference [1]_\n hessian : use the Hessian eigenmap method. This method requires\n n_neighbors > n_components * (1 + (n_components + 1) / 2.\n see reference [2]_\n modified : use the modified locally linear embedding algorithm.\n see reference [3]_\n ltsa : use local tangent space alignment algorithm\n see reference [4]_\n\n hessian_tol : float, default=1e-4\n Tolerance for Hessian eigenmapping method.\n Only used if method == 'hessian'\n\n modified_tol : float, default=1e-12\n Tolerance for modified LLE method.\n Only used if method == 'modified'\n\n random_state : int, RandomState instance, default=None\n Determines the random number generator when ``solver`` == 'arpack'.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\n n_jobs : int or None, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n Returns\n -------\n Y : array-like, shape [n_samples, n_components]\n Embedding vectors.\n\n squared_error : float\n Reconstruction error for the embedding vectors. Equivalent to\n ``norm(Y - W Y, 'fro')**2``, where W are the reconstruction weights.\n\n References\n ----------\n\n .. [1] Roweis, S. & Saul, L. Nonlinear dimensionality reduction\n by locally linear embedding. Science 290:2323 (2000).\n .. [2] Donoho, D. & Grimes, C. Hessian eigenmaps: Locally\n linear embedding techniques for high-dimensional data.\n Proc Natl Acad Sci U S A. 100:5591 (2003).\n .. [3] Zhang, Z. & Wang, J. MLLE: Modified Locally Linear\n Embedding Using Multiple Weights.\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.70.382\n .. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear\n dimensionality reduction via tangent space alignment.\n Journal of Shanghai Univ. 8:406 (2004)\n \"\"\"\n if eigen_solver not in ('auto', 'arpack', 'dense'):\n raise ValueError(\"unrecognized eigen_solver '%s'\" % eigen_solver)\n\n if method not in ('standard', 'hessian', 'modified', 'ltsa'):\n raise ValueError(\"unrecognized method '%s'\" % method)\n\n nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1, n_jobs=n_jobs)\n nbrs.fit(X)\n X = nbrs._fit_X\n\n N, d_in = X.shape\n\n if n_components > d_in:\n raise ValueError(\"output dimension must be less than or equal \"\n \"to input dimension\")\n if n_neighbors >= N:\n raise ValueError(\n \"Expected n_neighbors <= n_samples, \"\n \" but n_samples = %d, n_neighbors = %d\" %\n (N, n_neighbors)\n )\n\n if n_neighbors <= 0:\n raise ValueError(\"n_neighbors must be positive\")\n\n M_sparse = (eigen_solver != 'dense')\n\n if method == 'standard':\n W = barycenter_kneighbors_graph(\n nbrs, n_neighbors=n_neighbors, reg=reg, n_jobs=n_jobs)\n\n # we'll compute M = (I-W)'(I-W)\n # depending on the solver, we'll do this differently\n if M_sparse:\n M = eye(*W.shape, format=W.format) - W\n M = (M.T * M).tocsr()\n else:\n M = (W.T * W - W.T - W).toarray()\n M.flat[::M.shape[0] + 1] += 1 # W = W - I = W - I\n\n elif method == 'hessian':\n dp = n_components * (n_components + 1) // 2\n\n if n_neighbors <= n_components + dp:\n raise ValueError(\"for method='hessian', n_neighbors must be \"\n \"greater than \"\n \"[n_components * (n_components + 3) / 2]\")\n\n neighbors = nbrs.kneighbors(X, n_neighbors=n_neighbors + 1,\n return_distance=False)\n neighbors = neighbors[:, 1:]\n\n Yi = np.empty((n_neighbors, 1 + n_components + dp), dtype=np.float64)\n Yi[:, 0] = 1\n\n M = np.zeros((N, N), dtype=np.float64)\n\n use_svd = (n_neighbors > d_in)\n\n for i in range(N):\n Gi = X[neighbors[i]]\n Gi -= Gi.mean(0)\n\n # build Hessian estimator\n if use_svd:\n U = svd(Gi, full_matrices=0)[0]\n else:\n Ci = np.dot(Gi, Gi.T)\n U = eigh(Ci)[1][:, ::-1]\n\n Yi[:, 1:1 + n_components] = U[:, :n_components]\n\n j = 1 + n_components\n for k in range(n_components):\n Yi[:, j:j + n_components - k] = (U[:, k:k + 1] *\n U[:, k:n_components])\n j += n_components - k\n\n Q, R = qr(Yi)\n\n w = Q[:, n_components + 1:]\n S = w.sum(0)\n\n S[np.where(abs(S) < hessian_tol)] = 1\n w /= S\n\n nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])\n M[nbrs_x, nbrs_y] += np.dot(w, w.T)\n\n if M_sparse:\n M = csr_matrix(M)\n\n elif method == 'modified':\n if n_neighbors < n_components:\n raise ValueError(\"modified LLE requires \"\n \"n_neighbors >= n_components\")\n\n neighbors = nbrs.kneighbors(X, n_neighbors=n_neighbors + 1,\n return_distance=False)\n neighbors = neighbors[:, 1:]\n\n # find the eigenvectors and eigenvalues of each local covariance\n # matrix. We want V[i] to be a [n_neighbors x n_neighbors] matrix,\n # where the columns are eigenvectors\n V = np.zeros((N, n_neighbors, n_neighbors))\n nev = min(d_in, n_neighbors)\n evals = np.zeros([N, nev])\n\n # choose the most efficient way to find the eigenvectors\n use_svd = (n_neighbors > d_in)\n\n if use_svd:\n for i in range(N):\n X_nbrs = X[neighbors[i]] - X[i]\n V[i], evals[i], _ = svd(X_nbrs,\n full_matrices=True)\n evals **= 2\n else:\n for i in range(N):\n X_nbrs = X[neighbors[i]] - X[i]\n C_nbrs = np.dot(X_nbrs, X_nbrs.T)\n evi, vi = eigh(C_nbrs)\n evals[i] = evi[::-1]\n V[i] = vi[:, ::-1]\n\n # find regularized weights: this is like normal LLE.\n # because we've already computed the SVD of each covariance matrix,\n # it's faster to use this rather than np.linalg.solve\n reg = 1E-3 * evals.sum(1)\n\n tmp = np.dot(V.transpose(0, 2, 1), np.ones(n_neighbors))\n tmp[:, :nev] /= evals + reg[:, None]\n tmp[:, nev:] /= reg[:, None]\n\n w_reg = np.zeros((N, n_neighbors))\n for i in range(N):\n w_reg[i] = np.dot(V[i], tmp[i])\n w_reg /= w_reg.sum(1)[:, None]\n\n # calculate eta: the median of the ratio of small to large eigenvalues\n # across the points. This is used to determine s_i, below\n rho = evals[:, n_components:].sum(1) / evals[:, :n_components].sum(1)\n eta = np.median(rho)\n\n # find s_i, the size of the \"almost null space\" for each point:\n # this is the size of the largest set of eigenvalues\n # such that Sum[v; v in set]/Sum[v; v not in set] < eta\n s_range = np.zeros(N, dtype=int)\n evals_cumsum = stable_cumsum(evals, 1)\n eta_range = evals_cumsum[:, -1:] / evals_cumsum[:, :-1] - 1\n for i in range(N):\n s_range[i] = np.searchsorted(eta_range[i, ::-1], eta)\n s_range += n_neighbors - nev # number of zero eigenvalues\n\n # Now calculate M.\n # This is the [N x N] matrix whose null space is the desired embedding\n M = np.zeros((N, N), dtype=np.float64)\n for i in range(N):\n s_i = s_range[i]\n\n # select bottom s_i eigenvectors and calculate alpha\n Vi = V[i, :, n_neighbors - s_i:]\n alpha_i = np.linalg.norm(Vi.sum(0)) / np.sqrt(s_i)\n\n # compute Householder matrix which satisfies\n # Hi*Vi.T*ones(n_neighbors) = alpha_i*ones(s)\n # using prescription from paper\n h = np.full(s_i, alpha_i) - np.dot(Vi.T, np.ones(n_neighbors))\n\n norm_h = np.linalg.norm(h)\n if norm_h < modified_tol:\n h *= 0\n else:\n h /= norm_h\n\n # Householder matrix is\n # >> Hi = np.identity(s_i) - 2*np.outer(h,h)\n # Then the weight matrix is\n # >> Wi = np.dot(Vi,Hi) + (1-alpha_i) * w_reg[i,:,None]\n # We do this much more efficiently:\n Wi = (Vi - 2 * np.outer(np.dot(Vi, h), h) +\n (1 - alpha_i) * w_reg[i, :, None])\n\n # Update M as follows:\n # >> W_hat = np.zeros( (N,s_i) )\n # >> W_hat[neighbors[i],:] = Wi\n # >> W_hat[i] -= 1\n # >> M += np.dot(W_hat,W_hat.T)\n # We can do this much more efficiently:\n nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])\n M[nbrs_x, nbrs_y] += np.dot(Wi, Wi.T)\n Wi_sum1 = Wi.sum(1)\n M[i, neighbors[i]] -= Wi_sum1\n M[neighbors[i], i] -= Wi_sum1\n M[i, i] += s_i\n\n if M_sparse:\n M = csr_matrix(M)\n\n elif method == 'ltsa':\n neighbors = nbrs.kneighbors(X, n_neighbors=n_neighbors + 1,\n return_distance=False)\n neighbors = neighbors[:, 1:]\n\n M = np.zeros((N, N))\n\n use_svd = (n_neighbors > d_in)\n\n for i in range(N):\n Xi = X[neighbors[i]]\n Xi -= Xi.mean(0)\n\n # compute n_components largest eigenvalues of Xi * Xi^T\n if use_svd:\n v = svd(Xi, full_matrices=True)[0]\n else:\n Ci = np.dot(Xi, Xi.T)\n v = eigh(Ci)[1][:, ::-1]\n\n Gi = np.zeros((n_neighbors, n_components + 1))\n Gi[:, 1:] = v[:, :n_components]\n Gi[:, 0] = 1. / np.sqrt(n_neighbors)\n\n GiGiT = np.dot(Gi, Gi.T)\n\n nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])\n M[nbrs_x, nbrs_y] -= GiGiT\n M[neighbors[i], neighbors[i]] += 1\n\n return null_space(M, n_components, k_skip=1, eigen_solver=eigen_solver,\n tol=tol, max_iter=max_iter, random_state=random_state)" + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/null_space", + "name": "null_space", + "qname": "sklearn.manifold._locally_linear.null_space", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._locally_linear/null_space/M", + "name": "M", + "qname": "sklearn.manifold._locally_linear.null_space.M", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array, matrix, sparse matrix, LinearOperator}", + "default_value": "", + "description": "Input covariance matrix: should be symmetric positive semi-definite" + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/null_space/k", + "name": "k", + "qname": "sklearn.manifold._locally_linear.null_space.k", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of eigenvalues/vectors to return" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/null_space/k_skip", + "name": "k_skip", + "qname": "sklearn.manifold._locally_linear.null_space.k_skip", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "Number of low eigenvalues to skip." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/null_space/eigen_solver", + "name": "eigen_solver", + "qname": "sklearn.manifold._locally_linear.null_space.eigen_solver", + "default_value": "'arpack'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'auto', 'arpack', 'dense'}", + "default_value": "'arpack'", + "description": "auto : algorithm will attempt to choose the best method for input data\narpack : use arnoldi iteration in shift-invert mode.\n For this method, M may be a dense matrix, sparse matrix,\n or general linear operator.\n Warning: ARPACK can be unstable for some problems. It is\n best to try several random seeds in order to check results.\ndense : use standard dense matrix operations for the eigenvalue\n decomposition. For this method, M must be an array\n or matrix type. This method should be avoided for\n large problems." + }, + "type": { + "kind": "EnumType", + "values": ["dense", "auto", "arpack"] + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/null_space/tol", + "name": "tol", + "qname": "sklearn.manifold._locally_linear.null_space.tol", + "default_value": "1e-06", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-6", + "description": "Tolerance for 'arpack' method.\nNot used if eigen_solver=='dense'." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/null_space/max_iter", + "name": "max_iter", + "qname": "sklearn.manifold._locally_linear.null_space.max_iter", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Maximum number of iterations for 'arpack' method.\nNot used if eigen_solver=='dense'" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._locally_linear/null_space/random_state", + "name": "random_state", + "qname": "sklearn.manifold._locally_linear.null_space.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "Determines the random number generator when ``solver`` == 'arpack'.\nPass an int for reproducible results across multiple function calls.\nSee :term: `Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Find the null space of a matrix M.", + "docstring": "Find the null space of a matrix M.\n\nParameters\n----------\nM : {array, matrix, sparse matrix, LinearOperator}\n Input covariance matrix: should be symmetric positive semi-definite\n\nk : int\n Number of eigenvalues/vectors to return\n\nk_skip : int, default=1\n Number of low eigenvalues to skip.\n\neigen_solver : {'auto', 'arpack', 'dense'}, default='arpack'\n auto : algorithm will attempt to choose the best method for input data\n arpack : use arnoldi iteration in shift-invert mode.\n For this method, M may be a dense matrix, sparse matrix,\n or general linear operator.\n Warning: ARPACK can be unstable for some problems. It is\n best to try several random seeds in order to check results.\n dense : use standard dense matrix operations for the eigenvalue\n decomposition. For this method, M must be an array\n or matrix type. This method should be avoided for\n large problems.\n\ntol : float, default=1e-6\n Tolerance for 'arpack' method.\n Not used if eigen_solver=='dense'.\n\nmax_iter : int, default=100\n Maximum number of iterations for 'arpack' method.\n Not used if eigen_solver=='dense'\n\nrandom_state : int, RandomState instance, default=None\n Determines the random number generator when ``solver`` == 'arpack'.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.", + "code": "def null_space(M, k, k_skip=1, eigen_solver='arpack', tol=1E-6, max_iter=100,\n random_state=None):\n \"\"\"\n Find the null space of a matrix M.\n\n Parameters\n ----------\n M : {array, matrix, sparse matrix, LinearOperator}\n Input covariance matrix: should be symmetric positive semi-definite\n\n k : int\n Number of eigenvalues/vectors to return\n\n k_skip : int, default=1\n Number of low eigenvalues to skip.\n\n eigen_solver : {'auto', 'arpack', 'dense'}, default='arpack'\n auto : algorithm will attempt to choose the best method for input data\n arpack : use arnoldi iteration in shift-invert mode.\n For this method, M may be a dense matrix, sparse matrix,\n or general linear operator.\n Warning: ARPACK can be unstable for some problems. It is\n best to try several random seeds in order to check results.\n dense : use standard dense matrix operations for the eigenvalue\n decomposition. For this method, M must be an array\n or matrix type. This method should be avoided for\n large problems.\n\n tol : float, default=1e-6\n Tolerance for 'arpack' method.\n Not used if eigen_solver=='dense'.\n\n max_iter : int, default=100\n Maximum number of iterations for 'arpack' method.\n Not used if eigen_solver=='dense'\n\n random_state : int, RandomState instance, default=None\n Determines the random number generator when ``solver`` == 'arpack'.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n \"\"\"\n if eigen_solver == 'auto':\n if M.shape[0] > 200 and k + k_skip < 10:\n eigen_solver = 'arpack'\n else:\n eigen_solver = 'dense'\n\n if eigen_solver == 'arpack':\n v0 = _init_arpack_v0(M.shape[0], random_state)\n try:\n eigen_values, eigen_vectors = eigsh(M, k + k_skip, sigma=0.0,\n tol=tol, maxiter=max_iter,\n v0=v0)\n except RuntimeError as e:\n raise ValueError(\n \"Error in determining null-space with ARPACK. Error message: \"\n \"'%s'. Note that eigen_solver='arpack' can fail when the \"\n \"weight matrix is singular or otherwise ill-behaved. In that \"\n \"case, eigen_solver='dense' is recommended. See online \"\n \"documentation for more information.\" % e\n ) from e\n\n return eigen_vectors[:, k_skip:], np.sum(eigen_values[k_skip:])\n elif eigen_solver == 'dense':\n if hasattr(M, 'toarray'):\n M = M.toarray()\n eigen_values, eigen_vectors = eigh(\n M, eigvals=(k_skip, k + k_skip - 1), overwrite_a=True)\n index = np.argsort(np.abs(eigen_values))\n return eigen_vectors[:, index], np.sum(eigen_values)\n else:\n raise ValueError(\"Unrecognized eigen_solver '%s'\" % eigen_solver)" + }, + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/__init__", + "name": "__init__", + "qname": "sklearn.manifold._mds.MDS.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/__init__/self", + "name": "self", + "qname": "sklearn.manifold._mds.MDS.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/__init__/n_components", + "name": "n_components", + "qname": "sklearn.manifold._mds.MDS.__init__.n_components", + "default_value": "2", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Number of dimensions in which to immerse the dissimilarities." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/__init__/metric", + "name": "metric", + "qname": "sklearn.manifold._mds.MDS.__init__.metric", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``True``, perform metric MDS; otherwise, perform nonmetric MDS." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/__init__/n_init", + "name": "n_init", + "qname": "sklearn.manifold._mds.MDS.__init__.n_init", + "default_value": "4", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "4", + "description": "Number of times the SMACOF algorithm will be run with different\ninitializations. The final results will be the best output of the runs,\ndetermined by the run with the smallest final stress." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.manifold._mds.MDS.__init__.max_iter", + "default_value": "300", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "300", + "description": "Maximum number of iterations of the SMACOF algorithm for a single run." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/__init__/verbose", + "name": "verbose", + "qname": "sklearn.manifold._mds.MDS.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Level of verbosity." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/__init__/eps", + "name": "eps", + "qname": "sklearn.manifold._mds.MDS.__init__.eps", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "Relative tolerance with respect to stress at which to declare\nconvergence." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.manifold._mds.MDS.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to use for the computation. If multiple\ninitializations are used (``n_init``), each run of the algorithm is\ncomputed in parallel.\n\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/__init__/random_state", + "name": "random_state", + "qname": "sklearn.manifold._mds.MDS.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines the random number generator used to initialize the centers.\nPass an int for reproducible results across multiple function calls.\nSee :term: `Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/__init__/dissimilarity", + "name": "dissimilarity", + "qname": "sklearn.manifold._mds.MDS.__init__.dissimilarity", + "default_value": "'euclidean'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'euclidean', 'precomputed'}", + "default_value": "'euclidean'", + "description": "Dissimilarity measure to use:\n\n- 'euclidean':\n Pairwise Euclidean distances between points in the dataset.\n\n- 'precomputed':\n Pre-computed dissimilarities are passed directly to ``fit`` and\n ``fit_transform``." + }, + "type": { + "kind": "EnumType", + "values": ["precomputed", "euclidean"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Multidimensional scaling.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_components=2, *, metric=True, n_init=4,\n max_iter=300, verbose=0, eps=1e-3, n_jobs=None,\n random_state=None, dissimilarity=\"euclidean\"):\n self.n_components = n_components\n self.dissimilarity = dissimilarity\n self.metric = metric\n self.n_init = n_init\n self.max_iter = max_iter\n self.eps = eps\n self.verbose = verbose\n self.n_jobs = n_jobs\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/_more_tags", + "name": "_more_tags", + "qname": "sklearn.manifold._mds.MDS._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/_more_tags/self", + "name": "self", + "qname": "sklearn.manifold._mds.MDS._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'pairwise': self.dissimilarity == 'precomputed'}" + }, + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/_pairwise@getter", + "name": "_pairwise", + "qname": "sklearn.manifold._mds.MDS._pairwise", + "decorators": [ + "deprecated('Attribute _pairwise was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/_pairwise/self", + "name": "self", + "qname": "sklearn.manifold._mds.MDS._pairwise.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n return self.dissimilarity == \"precomputed\"" + }, + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/fit", + "name": "fit", + "qname": "sklearn.manifold._mds.MDS.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/fit/self", + "name": "self", + "qname": "sklearn.manifold._mds.MDS.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/fit/X", + "name": "X", + "qname": "sklearn.manifold._mds.MDS.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features) or (n_samples, n_samples)", + "default_value": "", + "description": "Input data. If ``dissimilarity=='precomputed'``, the input should\nbe the dissimilarity matrix." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features) or (n_samples, n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/fit/y", + "name": "y", + "qname": "sklearn.manifold._mds.MDS.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/fit/init", + "name": "init", + "qname": "sklearn.manifold._mds.MDS.fit.init", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "None", + "description": "Starting configuration of the embedding to initialize the SMACOF\nalgorithm. By default, the algorithm is initialized with a randomly\nchosen array." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes the position of the points in the embedding space.", + "docstring": "Computes the position of the points in the embedding space.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or (n_samples, n_samples)\n Input data. If ``dissimilarity=='precomputed'``, the input should\n be the dissimilarity matrix.\n\ny : Ignored\n\ninit : ndarray of shape (n_samples,), default=None\n Starting configuration of the embedding to initialize the SMACOF\n algorithm. By default, the algorithm is initialized with a randomly\n chosen array.", + "code": " def fit(self, X, y=None, init=None):\n \"\"\"\n Computes the position of the points in the embedding space.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or \\\n (n_samples, n_samples)\n Input data. If ``dissimilarity=='precomputed'``, the input should\n be the dissimilarity matrix.\n\n y : Ignored\n\n init : ndarray of shape (n_samples,), default=None\n Starting configuration of the embedding to initialize the SMACOF\n algorithm. By default, the algorithm is initialized with a randomly\n chosen array.\n \"\"\"\n self.fit_transform(X, init=init)\n return self" + }, + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/fit_transform", + "name": "fit_transform", + "qname": "sklearn.manifold._mds.MDS.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/fit_transform/self", + "name": "self", + "qname": "sklearn.manifold._mds.MDS.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/fit_transform/X", + "name": "X", + "qname": "sklearn.manifold._mds.MDS.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features) or (n_samples, n_samples)", + "default_value": "", + "description": "Input data. If ``dissimilarity=='precomputed'``, the input should\nbe the dissimilarity matrix." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features) or (n_samples, n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/fit_transform/y", + "name": "y", + "qname": "sklearn.manifold._mds.MDS.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/MDS/fit_transform/init", + "name": "init", + "qname": "sklearn.manifold._mds.MDS.fit_transform.init", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "None", + "description": "Starting configuration of the embedding to initialize the SMACOF\nalgorithm. By default, the algorithm is initialized with a randomly\nchosen array." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the data from X, and returns the embedded coordinates.", + "docstring": "Fit the data from X, and returns the embedded coordinates.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or (n_samples, n_samples)\n Input data. If ``dissimilarity=='precomputed'``, the input should\n be the dissimilarity matrix.\n\ny : Ignored\n\ninit : ndarray of shape (n_samples,), default=None\n Starting configuration of the embedding to initialize the SMACOF\n algorithm. By default, the algorithm is initialized with a randomly\n chosen array.", + "code": " def fit_transform(self, X, y=None, init=None):\n \"\"\"\n Fit the data from X, and returns the embedded coordinates.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or \\\n (n_samples, n_samples)\n Input data. If ``dissimilarity=='precomputed'``, the input should\n be the dissimilarity matrix.\n\n y : Ignored\n\n init : ndarray of shape (n_samples,), default=None\n Starting configuration of the embedding to initialize the SMACOF\n algorithm. By default, the algorithm is initialized with a randomly\n chosen array.\n \"\"\"\n X = self._validate_data(X)\n if X.shape[0] == X.shape[1] and self.dissimilarity != \"precomputed\":\n warnings.warn(\"The MDS API has changed. ``fit`` now constructs an\"\n \" dissimilarity matrix from data. To use a custom \"\n \"dissimilarity matrix, set \"\n \"``dissimilarity='precomputed'``.\")\n\n if self.dissimilarity == \"precomputed\":\n self.dissimilarity_matrix_ = X\n elif self.dissimilarity == \"euclidean\":\n self.dissimilarity_matrix_ = euclidean_distances(X)\n else:\n raise ValueError(\"Proximity must be 'precomputed' or 'euclidean'.\"\n \" Got %s instead\" % str(self.dissimilarity))\n\n self.embedding_, self.stress_, self.n_iter_ = smacof(\n self.dissimilarity_matrix_, metric=self.metric,\n n_components=self.n_components, init=init, n_init=self.n_init,\n n_jobs=self.n_jobs, max_iter=self.max_iter, verbose=self.verbose,\n eps=self.eps, random_state=self.random_state,\n return_n_iter=True)\n\n return self.embedding_" + }, + { + "id": "scikit-learn/sklearn.manifold._mds/_smacof_single", + "name": "_smacof_single", + "qname": "sklearn.manifold._mds._smacof_single", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._mds/_smacof_single/dissimilarities", + "name": "dissimilarities", + "qname": "sklearn.manifold._mds._smacof_single.dissimilarities", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_samples)", + "default_value": "", + "description": "Pairwise dissimilarities between the points. Must be symmetric." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/_smacof_single/metric", + "name": "metric", + "qname": "sklearn.manifold._mds._smacof_single.metric", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Compute metric or nonmetric SMACOF algorithm." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/_smacof_single/n_components", + "name": "n_components", + "qname": "sklearn.manifold._mds._smacof_single.n_components", + "default_value": "2", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Number of dimensions in which to immerse the dissimilarities. If an\n``init`` array is provided, this option is overridden and the shape of\n``init`` is used to determine the dimensionality of the embedding\nspace." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/_smacof_single/init", + "name": "init", + "qname": "sklearn.manifold._mds._smacof_single.init", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_components)", + "default_value": "None", + "description": "Starting configuration of the embedding to initialize the algorithm. By\ndefault, the algorithm is initialized with a randomly chosen array." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_components)" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/_smacof_single/max_iter", + "name": "max_iter", + "qname": "sklearn.manifold._mds._smacof_single.max_iter", + "default_value": "300", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "300", + "description": "Maximum number of iterations of the SMACOF algorithm for a single run." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/_smacof_single/verbose", + "name": "verbose", + "qname": "sklearn.manifold._mds._smacof_single.verbose", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Level of verbosity." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/_smacof_single/eps", + "name": "eps", + "qname": "sklearn.manifold._mds._smacof_single.eps", + "default_value": "0.001", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "Relative tolerance with respect to stress at which to declare\nconvergence." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/_smacof_single/random_state", + "name": "random_state", + "qname": "sklearn.manifold._mds._smacof_single.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines the random number generator used to initialize the centers.\nPass an int for reproducible results across multiple function calls.\nSee :term: `Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes multidimensional scaling using SMACOF algorithm.", + "docstring": "Computes multidimensional scaling using SMACOF algorithm.\n\nParameters\n----------\ndissimilarities : ndarray of shape (n_samples, n_samples)\n Pairwise dissimilarities between the points. Must be symmetric.\n\nmetric : bool, default=True\n Compute metric or nonmetric SMACOF algorithm.\n\nn_components : int, default=2\n Number of dimensions in which to immerse the dissimilarities. If an\n ``init`` array is provided, this option is overridden and the shape of\n ``init`` is used to determine the dimensionality of the embedding\n space.\n\ninit : ndarray of shape (n_samples, n_components), default=None\n Starting configuration of the embedding to initialize the algorithm. By\n default, the algorithm is initialized with a randomly chosen array.\n\nmax_iter : int, default=300\n Maximum number of iterations of the SMACOF algorithm for a single run.\n\nverbose : int, default=0\n Level of verbosity.\n\neps : float, default=1e-3\n Relative tolerance with respect to stress at which to declare\n convergence.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the random number generator used to initialize the centers.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_components)\n Coordinates of the points in a ``n_components``-space.\n\nstress : float\n The final value of the stress (sum of squared distance of the\n disparities and the distances for all constrained points).\n\nn_iter : int\n The number of iterations corresponding to the best stress.", + "code": "def _smacof_single(dissimilarities, metric=True, n_components=2, init=None,\n max_iter=300, verbose=0, eps=1e-3, random_state=None):\n \"\"\"Computes multidimensional scaling using SMACOF algorithm.\n\n Parameters\n ----------\n dissimilarities : ndarray of shape (n_samples, n_samples)\n Pairwise dissimilarities between the points. Must be symmetric.\n\n metric : bool, default=True\n Compute metric or nonmetric SMACOF algorithm.\n\n n_components : int, default=2\n Number of dimensions in which to immerse the dissimilarities. If an\n ``init`` array is provided, this option is overridden and the shape of\n ``init`` is used to determine the dimensionality of the embedding\n space.\n\n init : ndarray of shape (n_samples, n_components), default=None\n Starting configuration of the embedding to initialize the algorithm. By\n default, the algorithm is initialized with a randomly chosen array.\n\n max_iter : int, default=300\n Maximum number of iterations of the SMACOF algorithm for a single run.\n\n verbose : int, default=0\n Level of verbosity.\n\n eps : float, default=1e-3\n Relative tolerance with respect to stress at which to declare\n convergence.\n\n random_state : int, RandomState instance or None, default=None\n Determines the random number generator used to initialize the centers.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\n Returns\n -------\n X : ndarray of shape (n_samples, n_components)\n Coordinates of the points in a ``n_components``-space.\n\n stress : float\n The final value of the stress (sum of squared distance of the\n disparities and the distances for all constrained points).\n\n n_iter : int\n The number of iterations corresponding to the best stress.\n \"\"\"\n dissimilarities = check_symmetric(dissimilarities, raise_exception=True)\n\n n_samples = dissimilarities.shape[0]\n random_state = check_random_state(random_state)\n\n sim_flat = ((1 - np.tri(n_samples)) * dissimilarities).ravel()\n sim_flat_w = sim_flat[sim_flat != 0]\n if init is None:\n # Randomly choose initial configuration\n X = random_state.rand(n_samples * n_components)\n X = X.reshape((n_samples, n_components))\n else:\n # overrides the parameter p\n n_components = init.shape[1]\n if n_samples != init.shape[0]:\n raise ValueError(\"init matrix should be of shape (%d, %d)\" %\n (n_samples, n_components))\n X = init\n\n old_stress = None\n ir = IsotonicRegression()\n for it in range(max_iter):\n # Compute distance and monotonic regression\n dis = euclidean_distances(X)\n\n if metric:\n disparities = dissimilarities\n else:\n dis_flat = dis.ravel()\n # dissimilarities with 0 are considered as missing values\n dis_flat_w = dis_flat[sim_flat != 0]\n\n # Compute the disparities using a monotonic regression\n disparities_flat = ir.fit_transform(sim_flat_w, dis_flat_w)\n disparities = dis_flat.copy()\n disparities[sim_flat != 0] = disparities_flat\n disparities = disparities.reshape((n_samples, n_samples))\n disparities *= np.sqrt((n_samples * (n_samples - 1) / 2) /\n (disparities ** 2).sum())\n\n # Compute stress\n stress = ((dis.ravel() - disparities.ravel()) ** 2).sum() / 2\n\n # Update X using the Guttman transform\n dis[dis == 0] = 1e-5\n ratio = disparities / dis\n B = - ratio\n B[np.arange(len(B)), np.arange(len(B))] += ratio.sum(axis=1)\n X = 1. / n_samples * np.dot(B, X)\n\n dis = np.sqrt((X ** 2).sum(axis=1)).sum()\n if verbose >= 2:\n print('it: %d, stress %s' % (it, stress))\n if old_stress is not None:\n if(old_stress - stress / dis) < eps:\n if verbose:\n print('breaking at iteration %d with stress %s' % (it,\n stress))\n break\n old_stress = stress / dis\n\n return X, stress, it + 1" + }, + { + "id": "scikit-learn/sklearn.manifold._mds/smacof", + "name": "smacof", + "qname": "sklearn.manifold._mds.smacof", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._mds/smacof/dissimilarities", + "name": "dissimilarities", + "qname": "sklearn.manifold._mds.smacof.dissimilarities", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_samples)", + "default_value": "", + "description": "Pairwise dissimilarities between the points. Must be symmetric." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/smacof/metric", + "name": "metric", + "qname": "sklearn.manifold._mds.smacof.metric", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Compute metric or nonmetric SMACOF algorithm." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/smacof/n_components", + "name": "n_components", + "qname": "sklearn.manifold._mds.smacof.n_components", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Number of dimensions in which to immerse the dissimilarities. If an\n``init`` array is provided, this option is overridden and the shape of\n``init`` is used to determine the dimensionality of the embedding\nspace." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/smacof/init", + "name": "init", + "qname": "sklearn.manifold._mds.smacof.init", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_components)", + "default_value": "None", + "description": "Starting configuration of the embedding to initialize the algorithm. By\ndefault, the algorithm is initialized with a randomly chosen array." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_components)" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/smacof/n_init", + "name": "n_init", + "qname": "sklearn.manifold._mds.smacof.n_init", + "default_value": "8", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "8", + "description": "Number of times the SMACOF algorithm will be run with different\ninitializations. The final results will be the best output of the runs,\ndetermined by the run with the smallest final stress. If ``init`` is\nprovided, this option is overridden and a single run is performed." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/smacof/n_jobs", + "name": "n_jobs", + "qname": "sklearn.manifold._mds.smacof.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to use for the computation. If multiple\ninitializations are used (``n_init``), each run of the algorithm is\ncomputed in parallel.\n\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/smacof/max_iter", + "name": "max_iter", + "qname": "sklearn.manifold._mds.smacof.max_iter", + "default_value": "300", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "300", + "description": "Maximum number of iterations of the SMACOF algorithm for a single run." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/smacof/verbose", + "name": "verbose", + "qname": "sklearn.manifold._mds.smacof.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Level of verbosity." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/smacof/eps", + "name": "eps", + "qname": "sklearn.manifold._mds.smacof.eps", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "Relative tolerance with respect to stress at which to declare\nconvergence." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/smacof/random_state", + "name": "random_state", + "qname": "sklearn.manifold._mds.smacof.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines the random number generator used to initialize the centers.\nPass an int for reproducible results across multiple function calls.\nSee :term: `Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.manifold._mds/smacof/return_n_iter", + "name": "return_n_iter", + "qname": "sklearn.manifold._mds.smacof.return_n_iter", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether or not to return the number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes multidimensional scaling using the SMACOF algorithm.\n\nThe SMACOF (Scaling by MAjorizing a COmplicated Function) algorithm is a\nmultidimensional scaling algorithm which minimizes an objective function\n(the *stress*) using a majorization technique. Stress majorization, also\nknown as the Guttman Transform, guarantees a monotone convergence of\nstress, and is more powerful than traditional techniques such as gradient\ndescent.\n\nThe SMACOF algorithm for metric MDS can summarized by the following steps:\n\n1. Set an initial start configuration, randomly or not.\n2. Compute the stress\n3. Compute the Guttman Transform\n4. Iterate 2 and 3 until convergence.\n\nThe nonmetric algorithm adds a monotonic regression step before computing\nthe stress.", + "docstring": "Computes multidimensional scaling using the SMACOF algorithm.\n\nThe SMACOF (Scaling by MAjorizing a COmplicated Function) algorithm is a\nmultidimensional scaling algorithm which minimizes an objective function\n(the *stress*) using a majorization technique. Stress majorization, also\nknown as the Guttman Transform, guarantees a monotone convergence of\nstress, and is more powerful than traditional techniques such as gradient\ndescent.\n\nThe SMACOF algorithm for metric MDS can summarized by the following steps:\n\n1. Set an initial start configuration, randomly or not.\n2. Compute the stress\n3. Compute the Guttman Transform\n4. Iterate 2 and 3 until convergence.\n\nThe nonmetric algorithm adds a monotonic regression step before computing\nthe stress.\n\nParameters\n----------\ndissimilarities : ndarray of shape (n_samples, n_samples)\n Pairwise dissimilarities between the points. Must be symmetric.\n\nmetric : bool, default=True\n Compute metric or nonmetric SMACOF algorithm.\n\nn_components : int, default=2\n Number of dimensions in which to immerse the dissimilarities. If an\n ``init`` array is provided, this option is overridden and the shape of\n ``init`` is used to determine the dimensionality of the embedding\n space.\n\ninit : ndarray of shape (n_samples, n_components), default=None\n Starting configuration of the embedding to initialize the algorithm. By\n default, the algorithm is initialized with a randomly chosen array.\n\nn_init : int, default=8\n Number of times the SMACOF algorithm will be run with different\n initializations. The final results will be the best output of the runs,\n determined by the run with the smallest final stress. If ``init`` is\n provided, this option is overridden and a single run is performed.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. If multiple\n initializations are used (``n_init``), each run of the algorithm is\n computed in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nmax_iter : int, default=300\n Maximum number of iterations of the SMACOF algorithm for a single run.\n\nverbose : int, default=0\n Level of verbosity.\n\neps : float, default=1e-3\n Relative tolerance with respect to stress at which to declare\n convergence.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the random number generator used to initialize the centers.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_components)\n Coordinates of the points in a ``n_components``-space.\n\nstress : float\n The final value of the stress (sum of squared distance of the\n disparities and the distances for all constrained points).\n\nn_iter : int\n The number of iterations corresponding to the best stress. Returned\n only if ``return_n_iter`` is set to ``True``.\n\nNotes\n-----\n\"Modern Multidimensional Scaling - Theory and Applications\" Borg, I.;\nGroenen P. Springer Series in Statistics (1997)\n\n\"Nonmetric multidimensional scaling: a numerical method\" Kruskal, J.\nPsychometrika, 29 (1964)\n\n\"Multidimensional scaling by optimizing goodness of fit to a nonmetric\nhypothesis\" Kruskal, J. Psychometrika, 29, (1964)", + "code": "@_deprecate_positional_args\ndef smacof(dissimilarities, *, metric=True, n_components=2, init=None,\n n_init=8, n_jobs=None, max_iter=300, verbose=0, eps=1e-3,\n random_state=None, return_n_iter=False):\n \"\"\"Computes multidimensional scaling using the SMACOF algorithm.\n\n The SMACOF (Scaling by MAjorizing a COmplicated Function) algorithm is a\n multidimensional scaling algorithm which minimizes an objective function\n (the *stress*) using a majorization technique. Stress majorization, also\n known as the Guttman Transform, guarantees a monotone convergence of\n stress, and is more powerful than traditional techniques such as gradient\n descent.\n\n The SMACOF algorithm for metric MDS can summarized by the following steps:\n\n 1. Set an initial start configuration, randomly or not.\n 2. Compute the stress\n 3. Compute the Guttman Transform\n 4. Iterate 2 and 3 until convergence.\n\n The nonmetric algorithm adds a monotonic regression step before computing\n the stress.\n\n Parameters\n ----------\n dissimilarities : ndarray of shape (n_samples, n_samples)\n Pairwise dissimilarities between the points. Must be symmetric.\n\n metric : bool, default=True\n Compute metric or nonmetric SMACOF algorithm.\n\n n_components : int, default=2\n Number of dimensions in which to immerse the dissimilarities. If an\n ``init`` array is provided, this option is overridden and the shape of\n ``init`` is used to determine the dimensionality of the embedding\n space.\n\n init : ndarray of shape (n_samples, n_components), default=None\n Starting configuration of the embedding to initialize the algorithm. By\n default, the algorithm is initialized with a randomly chosen array.\n\n n_init : int, default=8\n Number of times the SMACOF algorithm will be run with different\n initializations. The final results will be the best output of the runs,\n determined by the run with the smallest final stress. If ``init`` is\n provided, this option is overridden and a single run is performed.\n\n n_jobs : int, default=None\n The number of jobs to use for the computation. If multiple\n initializations are used (``n_init``), each run of the algorithm is\n computed in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n max_iter : int, default=300\n Maximum number of iterations of the SMACOF algorithm for a single run.\n\n verbose : int, default=0\n Level of verbosity.\n\n eps : float, default=1e-3\n Relative tolerance with respect to stress at which to declare\n convergence.\n\n random_state : int, RandomState instance or None, default=None\n Determines the random number generator used to initialize the centers.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\n return_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\n Returns\n -------\n X : ndarray of shape (n_samples, n_components)\n Coordinates of the points in a ``n_components``-space.\n\n stress : float\n The final value of the stress (sum of squared distance of the\n disparities and the distances for all constrained points).\n\n n_iter : int\n The number of iterations corresponding to the best stress. Returned\n only if ``return_n_iter`` is set to ``True``.\n\n Notes\n -----\n \"Modern Multidimensional Scaling - Theory and Applications\" Borg, I.;\n Groenen P. Springer Series in Statistics (1997)\n\n \"Nonmetric multidimensional scaling: a numerical method\" Kruskal, J.\n Psychometrika, 29 (1964)\n\n \"Multidimensional scaling by optimizing goodness of fit to a nonmetric\n hypothesis\" Kruskal, J. Psychometrika, 29, (1964)\n \"\"\"\n\n dissimilarities = check_array(dissimilarities)\n random_state = check_random_state(random_state)\n\n if hasattr(init, '__array__'):\n init = np.asarray(init).copy()\n if not n_init == 1:\n warnings.warn(\n 'Explicit initial positions passed: '\n 'performing only one init of the MDS instead of %d'\n % n_init)\n n_init = 1\n\n best_pos, best_stress = None, None\n\n if effective_n_jobs(n_jobs) == 1:\n for it in range(n_init):\n pos, stress, n_iter_ = _smacof_single(\n dissimilarities, metric=metric,\n n_components=n_components, init=init,\n max_iter=max_iter, verbose=verbose,\n eps=eps, random_state=random_state)\n if best_stress is None or stress < best_stress:\n best_stress = stress\n best_pos = pos.copy()\n best_iter = n_iter_\n else:\n seeds = random_state.randint(np.iinfo(np.int32).max, size=n_init)\n results = Parallel(n_jobs=n_jobs, verbose=max(verbose - 1, 0))(\n delayed(_smacof_single)(\n dissimilarities, metric=metric, n_components=n_components,\n init=init, max_iter=max_iter, verbose=verbose, eps=eps,\n random_state=seed)\n for seed in seeds)\n positions, stress, n_iters = zip(*results)\n best = np.argmin(stress)\n best_stress = stress[best]\n best_pos = positions[best]\n best_iter = n_iters[best]\n\n if return_n_iter:\n return best_pos, best_stress, best_iter\n else:\n return best_pos, best_stress" + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/__init__", + "name": "__init__", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/__init__/self", + "name": "self", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/__init__/n_components", + "name": "n_components", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding.__init__.n_components", + "default_value": "2", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "The dimension of the projected subspace." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/__init__/affinity", + "name": "affinity", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding.__init__.affinity", + "default_value": "'nearest_neighbors'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'nearest_neighbors', 'rbf', 'precomputed', 'precomputed_nearest_neighbors'} or callable", + "default_value": "'nearest_neighbors'", + "description": "How to construct the affinity matrix.\n - 'nearest_neighbors' : construct the affinity matrix by computing a\n graph of nearest neighbors.\n - 'rbf' : construct the affinity matrix by computing a radial basis\n function (RBF) kernel.\n - 'precomputed' : interpret ``X`` as a precomputed affinity matrix.\n - 'precomputed_nearest_neighbors' : interpret ``X`` as a sparse graph\n of precomputed nearest neighbors, and constructs the affinity matrix\n by selecting the ``n_neighbors`` nearest neighbors.\n - callable : use passed in function as affinity\n the function takes in data matrix (n_samples, n_features)\n and return affinity matrix (n_samples, n_samples)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["precomputed_nearest_neighbors", "precomputed", "rbf", "nearest_neighbors"] + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/__init__/gamma", + "name": "gamma", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding.__init__.gamma", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Kernel coefficient for rbf kernel. If None, gamma will be set to\n1/n_features." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/__init__/random_state", + "name": "random_state", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines the random number generator used for the initialization of\nthe lobpcg eigenvectors when ``solver`` == 'amg'. Pass an int for\nreproducible results across multiple function calls.\nSee :term: `Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/__init__/eigen_solver", + "name": "eigen_solver", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding.__init__.eigen_solver", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'arpack', 'lobpcg', 'amg'}", + "default_value": "None", + "description": "The eigenvalue decomposition strategy to use. AMG requires pyamg\nto be installed. It can be faster on very large, sparse problems.\nIf None, then ``'arpack'`` is used." + }, + "type": { + "kind": "EnumType", + "values": ["lobpcg", "arpack", "amg"] + } + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/__init__/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding.__init__.n_neighbors", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of nearest neighbors for nearest_neighbors graph building.\nIf None, n_neighbors will be set to max(n_samples/10, 1)." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of parallel jobs to run.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Spectral embedding for non-linear dimensionality reduction.\n\nForms an affinity matrix given by the specified function and\napplies spectral decomposition to the corresponding graph laplacian.\nThe resulting transformation is given by the value of the\neigenvectors for each data point.\n\nNote : Laplacian Eigenmaps is the actual algorithm implemented here.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_components=2, *, affinity=\"nearest_neighbors\",\n gamma=None, random_state=None, eigen_solver=None,\n n_neighbors=None, n_jobs=None):\n self.n_components = n_components\n self.affinity = affinity\n self.gamma = gamma\n self.random_state = random_state\n self.eigen_solver = eigen_solver\n self.n_neighbors = n_neighbors\n self.n_jobs = n_jobs" + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/_get_affinity_matrix", + "name": "_get_affinity_matrix", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding._get_affinity_matrix", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/_get_affinity_matrix/self", + "name": "self", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding._get_affinity_matrix.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/_get_affinity_matrix/X", + "name": "X", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding._get_affinity_matrix.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/_get_affinity_matrix/Y", + "name": "Y", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding._get_affinity_matrix.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Calculate the affinity matrix from data\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples\n and n_features is the number of features.\n\n If affinity is \"precomputed\"\n X : array-like of shape (n_samples, n_samples),\n Interpret X as precomputed adjacency graph computed from\n samples.\n\nY: Ignored", + "docstring": "Calculate the affinity matrix from data\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples\n and n_features is the number of features.\n\n If affinity is \"precomputed\"\n X : array-like of shape (n_samples, n_samples),\n Interpret X as precomputed adjacency graph computed from\n samples.\n\nY: Ignored\n\nReturns\n-------\naffinity_matrix of shape (n_samples, n_samples)", + "code": " def _get_affinity_matrix(self, X, Y=None):\n \"\"\"Calculate the affinity matrix from data\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples\n and n_features is the number of features.\n\n If affinity is \"precomputed\"\n X : array-like of shape (n_samples, n_samples),\n Interpret X as precomputed adjacency graph computed from\n samples.\n\n Y: Ignored\n\n Returns\n -------\n affinity_matrix of shape (n_samples, n_samples)\n \"\"\"\n if self.affinity == 'precomputed':\n self.affinity_matrix_ = X\n return self.affinity_matrix_\n if self.affinity == 'precomputed_nearest_neighbors':\n estimator = NearestNeighbors(n_neighbors=self.n_neighbors,\n n_jobs=self.n_jobs,\n metric=\"precomputed\").fit(X)\n connectivity = estimator.kneighbors_graph(X=X, mode='connectivity')\n self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)\n return self.affinity_matrix_\n if self.affinity == 'nearest_neighbors':\n if sparse.issparse(X):\n warnings.warn(\"Nearest neighbors affinity currently does \"\n \"not support sparse input, falling back to \"\n \"rbf affinity\")\n self.affinity = \"rbf\"\n else:\n self.n_neighbors_ = (self.n_neighbors\n if self.n_neighbors is not None\n else max(int(X.shape[0] / 10), 1))\n self.affinity_matrix_ = kneighbors_graph(X, self.n_neighbors_,\n include_self=True,\n n_jobs=self.n_jobs)\n # currently only symmetric affinity_matrix supported\n self.affinity_matrix_ = 0.5 * (self.affinity_matrix_ +\n self.affinity_matrix_.T)\n return self.affinity_matrix_\n if self.affinity == 'rbf':\n self.gamma_ = (self.gamma\n if self.gamma is not None else 1.0 / X.shape[1])\n self.affinity_matrix_ = rbf_kernel(X, gamma=self.gamma_)\n return self.affinity_matrix_\n self.affinity_matrix_ = self.affinity(X)\n return self.affinity_matrix_" + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/_more_tags", + "name": "_more_tags", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/_more_tags/self", + "name": "self", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'pairwise': self.affinity in [\"precomputed\",\n \"precomputed_nearest_neighbors\"]}" + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/_pairwise@getter", + "name": "_pairwise", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding._pairwise", + "decorators": [ + "deprecated('Attribute _pairwise was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/_pairwise/self", + "name": "self", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding._pairwise.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n return self.affinity in [\"precomputed\",\n \"precomputed_nearest_neighbors\"]" + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/fit", + "name": "fit", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/fit/self", + "name": "self", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/fit/X", + "name": "X", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples is the number of samples\nand n_features is the number of features.\n\nIf affinity is \"precomputed\"\nX : {array-like, sparse matrix}, shape (n_samples, n_samples),\nInterpret X as precomputed adjacency graph computed from\nsamples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/fit/y", + "name": "y", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model from data in X.", + "docstring": "Fit the model from data in X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples\n and n_features is the number of features.\n\n If affinity is \"precomputed\"\n X : {array-like, sparse matrix}, shape (n_samples, n_samples),\n Interpret X as precomputed adjacency graph computed from\n samples.\n\ny : Ignored\n\nReturns\n-------\nself : object\n Returns the instance itself.", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the model from data in X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples\n and n_features is the number of features.\n\n If affinity is \"precomputed\"\n X : {array-like, sparse matrix}, shape (n_samples, n_samples),\n Interpret X as precomputed adjacency graph computed from\n samples.\n\n y : Ignored\n\n Returns\n -------\n self : object\n Returns the instance itself.\n \"\"\"\n\n X = self._validate_data(X, accept_sparse='csr', ensure_min_samples=2,\n estimator=self)\n\n random_state = check_random_state(self.random_state)\n if isinstance(self.affinity, str):\n if self.affinity not in {\"nearest_neighbors\", \"rbf\", \"precomputed\",\n \"precomputed_nearest_neighbors\"}:\n raise ValueError((\"%s is not a valid affinity. Expected \"\n \"'precomputed', 'rbf', 'nearest_neighbors' \"\n \"or a callable.\") % self.affinity)\n elif not callable(self.affinity):\n raise ValueError((\"'affinity' is expected to be an affinity \"\n \"name or a callable. Got: %s\") % self.affinity)\n\n affinity_matrix = self._get_affinity_matrix(X)\n self.embedding_ = spectral_embedding(affinity_matrix,\n n_components=self.n_components,\n eigen_solver=self.eigen_solver,\n random_state=random_state)\n return self" + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/fit_transform", + "name": "fit_transform", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/fit_transform/self", + "name": "self", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/fit_transform/X", + "name": "X", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples is the number of samples\nand n_features is the number of features.\n\nIf affinity is \"precomputed\"\nX : {array-like, sparse matrix} of shape (n_samples, n_samples),\nInterpret X as precomputed adjacency graph computed from\nsamples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/SpectralEmbedding/fit_transform/y", + "name": "y", + "qname": "sklearn.manifold._spectral_embedding.SpectralEmbedding.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model from data in X and transform X.", + "docstring": "Fit the model from data in X and transform X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples\n and n_features is the number of features.\n\n If affinity is \"precomputed\"\n X : {array-like, sparse matrix} of shape (n_samples, n_samples),\n Interpret X as precomputed adjacency graph computed from\n samples.\n\ny : Ignored\n\nReturns\n-------\nX_new : array-like of shape (n_samples, n_components)", + "code": " def fit_transform(self, X, y=None):\n \"\"\"Fit the model from data in X and transform X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples\n and n_features is the number of features.\n\n If affinity is \"precomputed\"\n X : {array-like, sparse matrix} of shape (n_samples, n_samples),\n Interpret X as precomputed adjacency graph computed from\n samples.\n\n y : Ignored\n\n Returns\n -------\n X_new : array-like of shape (n_samples, n_components)\n \"\"\"\n self.fit(X)\n return self.embedding_" + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/_graph_connected_component", + "name": "_graph_connected_component", + "qname": "sklearn.manifold._spectral_embedding._graph_connected_component", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/_graph_connected_component/graph", + "name": "graph", + "qname": "sklearn.manifold._spectral_embedding._graph_connected_component.graph", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_samples)", + "default_value": "", + "description": "Adjacency matrix of the graph, non-zero weight means an edge\nbetween the nodes." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/_graph_connected_component/node_id", + "name": "node_id", + "qname": "sklearn.manifold._spectral_embedding._graph_connected_component.node_id", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The index of the query node of the graph." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Find the largest graph connected components that contains one\ngiven node.", + "docstring": "Find the largest graph connected components that contains one\ngiven node.\n\nParameters\n----------\ngraph : array-like of shape (n_samples, n_samples)\n Adjacency matrix of the graph, non-zero weight means an edge\n between the nodes.\n\nnode_id : int\n The index of the query node of the graph.\n\nReturns\n-------\nconnected_components_matrix : array-like of shape (n_samples,)\n An array of bool value indicating the indexes of the nodes\n belonging to the largest connected components of the given query\n node.", + "code": "def _graph_connected_component(graph, node_id):\n \"\"\"Find the largest graph connected components that contains one\n given node.\n\n Parameters\n ----------\n graph : array-like of shape (n_samples, n_samples)\n Adjacency matrix of the graph, non-zero weight means an edge\n between the nodes.\n\n node_id : int\n The index of the query node of the graph.\n\n Returns\n -------\n connected_components_matrix : array-like of shape (n_samples,)\n An array of bool value indicating the indexes of the nodes\n belonging to the largest connected components of the given query\n node.\n \"\"\"\n n_node = graph.shape[0]\n if sparse.issparse(graph):\n # speed up row-wise access to boolean connection mask\n graph = graph.tocsr()\n connected_nodes = np.zeros(n_node, dtype=bool)\n nodes_to_explore = np.zeros(n_node, dtype=bool)\n nodes_to_explore[node_id] = True\n for _ in range(n_node):\n last_num_component = connected_nodes.sum()\n np.logical_or(connected_nodes, nodes_to_explore, out=connected_nodes)\n if last_num_component >= connected_nodes.sum():\n break\n indices = np.where(nodes_to_explore)[0]\n nodes_to_explore.fill(False)\n for i in indices:\n if sparse.issparse(graph):\n neighbors = graph[i].toarray().ravel()\n else:\n neighbors = graph[i]\n np.logical_or(nodes_to_explore, neighbors, out=nodes_to_explore)\n return connected_nodes" + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/_graph_is_connected", + "name": "_graph_is_connected", + "qname": "sklearn.manifold._spectral_embedding._graph_is_connected", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/_graph_is_connected/graph", + "name": "graph", + "qname": "sklearn.manifold._spectral_embedding._graph_is_connected.graph", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_samples)", + "default_value": "", + "description": "Adjacency matrix of the graph, non-zero weight means an edge\nbetween the nodes." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_samples)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return whether the graph is connected (True) or Not (False).", + "docstring": "Return whether the graph is connected (True) or Not (False).\n\nParameters\n----------\ngraph : {array-like, sparse matrix} of shape (n_samples, n_samples)\n Adjacency matrix of the graph, non-zero weight means an edge\n between the nodes.\n\nReturns\n-------\nis_connected : bool\n True means the graph is fully connected and False means not.", + "code": "def _graph_is_connected(graph):\n \"\"\" Return whether the graph is connected (True) or Not (False).\n\n Parameters\n ----------\n graph : {array-like, sparse matrix} of shape (n_samples, n_samples)\n Adjacency matrix of the graph, non-zero weight means an edge\n between the nodes.\n\n Returns\n -------\n is_connected : bool\n True means the graph is fully connected and False means not.\n \"\"\"\n if sparse.isspmatrix(graph):\n # sparse graph, find all the connected components\n n_connected_components, _ = connected_components(graph)\n return n_connected_components == 1\n else:\n # dense graph, find all connected components start from node 0\n return _graph_connected_component(graph, 0).sum() == graph.shape[0]" + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/_set_diag", + "name": "_set_diag", + "qname": "sklearn.manifold._spectral_embedding._set_diag", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/_set_diag/laplacian", + "name": "laplacian", + "qname": "sklearn.manifold._spectral_embedding._set_diag.laplacian", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{ndarray, sparse matrix}", + "default_value": "", + "description": "The graph laplacian." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/_set_diag/value", + "name": "value", + "qname": "sklearn.manifold._spectral_embedding._set_diag.value", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "The value of the diagonal." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/_set_diag/norm_laplacian", + "name": "norm_laplacian", + "qname": "sklearn.manifold._spectral_embedding._set_diag.norm_laplacian", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "Whether the value of the diagonal should be changed or not." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Set the diagonal of the laplacian matrix and convert it to a\nsparse format well suited for eigenvalue decomposition.", + "docstring": "Set the diagonal of the laplacian matrix and convert it to a\nsparse format well suited for eigenvalue decomposition.\n\nParameters\n----------\nlaplacian : {ndarray, sparse matrix}\n The graph laplacian.\n\nvalue : float\n The value of the diagonal.\n\nnorm_laplacian : bool\n Whether the value of the diagonal should be changed or not.\n\nReturns\n-------\nlaplacian : {array, sparse matrix}\n An array of matrix in a form that is well suited to fast\n eigenvalue decomposition, depending on the band width of the\n matrix.", + "code": "def _set_diag(laplacian, value, norm_laplacian):\n \"\"\"Set the diagonal of the laplacian matrix and convert it to a\n sparse format well suited for eigenvalue decomposition.\n\n Parameters\n ----------\n laplacian : {ndarray, sparse matrix}\n The graph laplacian.\n\n value : float\n The value of the diagonal.\n\n norm_laplacian : bool\n Whether the value of the diagonal should be changed or not.\n\n Returns\n -------\n laplacian : {array, sparse matrix}\n An array of matrix in a form that is well suited to fast\n eigenvalue decomposition, depending on the band width of the\n matrix.\n \"\"\"\n n_nodes = laplacian.shape[0]\n # We need all entries in the diagonal to values\n if not sparse.isspmatrix(laplacian):\n if norm_laplacian:\n laplacian.flat[::n_nodes + 1] = value\n else:\n laplacian = laplacian.tocoo()\n if norm_laplacian:\n diag_idx = (laplacian.row == laplacian.col)\n laplacian.data[diag_idx] = value\n # If the matrix has a small number of diagonals (as in the\n # case of structured matrices coming from images), the\n # dia format might be best suited for matvec products:\n n_diags = np.unique(laplacian.row - laplacian.col).size\n if n_diags <= 7:\n # 3 or less outer diagonals on each side\n laplacian = laplacian.todia()\n else:\n # csr has the fastest matvec and is thus best suited to\n # arpack\n laplacian = laplacian.tocsr()\n return laplacian" + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/spectral_embedding", + "name": "spectral_embedding", + "qname": "sklearn.manifold._spectral_embedding.spectral_embedding", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/spectral_embedding/adjacency", + "name": "adjacency", + "qname": "sklearn.manifold._spectral_embedding.spectral_embedding.adjacency", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse graph} of shape (n_samples, n_samples)", + "default_value": "", + "description": "The adjacency matrix of the graph to embed." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_samples)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/spectral_embedding/n_components", + "name": "n_components", + "qname": "sklearn.manifold._spectral_embedding.spectral_embedding.n_components", + "default_value": "8", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "8", + "description": "The dimension of the projection subspace." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/spectral_embedding/eigen_solver", + "name": "eigen_solver", + "qname": "sklearn.manifold._spectral_embedding.spectral_embedding.eigen_solver", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'arpack', 'lobpcg', 'amg'}", + "default_value": "None", + "description": "The eigenvalue decomposition strategy to use. AMG requires pyamg\nto be installed. It can be faster on very large, sparse problems,\nbut may also lead to instabilities. If None, then ``'arpack'`` is\nused." + }, + "type": { + "kind": "EnumType", + "values": ["lobpcg", "arpack", "amg"] + } + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/spectral_embedding/random_state", + "name": "random_state", + "qname": "sklearn.manifold._spectral_embedding.spectral_embedding.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines the random number generator used for the initialization of\nthe lobpcg eigenvectors decomposition when ``solver`` == 'amg'. Pass\nan int for reproducible results across multiple function calls.\nSee :term: `Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/spectral_embedding/eigen_tol", + "name": "eigen_tol", + "qname": "sklearn.manifold._spectral_embedding.spectral_embedding.eigen_tol", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "Stopping criterion for eigendecomposition of the Laplacian matrix\nwhen using arpack eigen_solver." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/spectral_embedding/norm_laplacian", + "name": "norm_laplacian", + "qname": "sklearn.manifold._spectral_embedding.spectral_embedding.norm_laplacian", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, then compute normalized Laplacian." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.manifold._spectral_embedding/spectral_embedding/drop_first", + "name": "drop_first", + "qname": "sklearn.manifold._spectral_embedding.spectral_embedding.drop_first", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to drop the first eigenvector. For spectral embedding, this\nshould be True as the first eigenvector should be constant vector for\nconnected graph, but for spectral clustering, this should be kept as\nFalse to retain the first eigenvector." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Project the sample on the first eigenvectors of the graph Laplacian.\n\nThe adjacency matrix is used to compute a normalized graph Laplacian\nwhose spectrum (especially the eigenvectors associated to the\nsmallest eigenvalues) has an interpretation in terms of minimal\nnumber of cuts necessary to split the graph into comparably sized\ncomponents.\n\nThis embedding can also 'work' even if the ``adjacency`` variable is\nnot strictly the adjacency matrix of a graph but more generally\nan affinity or similarity matrix between samples (for instance the\nheat kernel of a euclidean distance matrix or a k-NN matrix).\n\nHowever care must taken to always make the affinity matrix symmetric\nso that the eigenvector decomposition works as expected.\n\nNote : Laplacian Eigenmaps is the actual algorithm implemented here.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Project the sample on the first eigenvectors of the graph Laplacian.\n\nThe adjacency matrix is used to compute a normalized graph Laplacian\nwhose spectrum (especially the eigenvectors associated to the\nsmallest eigenvalues) has an interpretation in terms of minimal\nnumber of cuts necessary to split the graph into comparably sized\ncomponents.\n\nThis embedding can also 'work' even if the ``adjacency`` variable is\nnot strictly the adjacency matrix of a graph but more generally\nan affinity or similarity matrix between samples (for instance the\nheat kernel of a euclidean distance matrix or a k-NN matrix).\n\nHowever care must taken to always make the affinity matrix symmetric\nso that the eigenvector decomposition works as expected.\n\nNote : Laplacian Eigenmaps is the actual algorithm implemented here.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nadjacency : {array-like, sparse graph} of shape (n_samples, n_samples)\n The adjacency matrix of the graph to embed.\n\nn_components : int, default=8\n The dimension of the projection subspace.\n\neigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n The eigenvalue decomposition strategy to use. AMG requires pyamg\n to be installed. It can be faster on very large, sparse problems,\n but may also lead to instabilities. If None, then ``'arpack'`` is\n used.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the random number generator used for the initialization of\n the lobpcg eigenvectors decomposition when ``solver`` == 'amg'. Pass\n an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\neigen_tol : float, default=0.0\n Stopping criterion for eigendecomposition of the Laplacian matrix\n when using arpack eigen_solver.\n\nnorm_laplacian : bool, default=True\n If True, then compute normalized Laplacian.\n\ndrop_first : bool, default=True\n Whether to drop the first eigenvector. For spectral embedding, this\n should be True as the first eigenvector should be constant vector for\n connected graph, but for spectral clustering, this should be kept as\n False to retain the first eigenvector.\n\nReturns\n-------\nembedding : ndarray of shape (n_samples, n_components)\n The reduced samples.\n\nNotes\n-----\nSpectral Embedding (Laplacian Eigenmaps) is most useful when the graph\nhas one connected component. If there graph has many components, the first\nfew eigenvectors will simply uncover the connected components of the graph.\n\nReferences\n----------\n* https://en.wikipedia.org/wiki/LOBPCG\n\n* Toward the Optimal Preconditioned Eigensolver: Locally Optimal\n Block Preconditioned Conjugate Gradient Method\n Andrew V. Knyazev\n https://doi.org/10.1137%2FS1064827500366124", + "code": "@_deprecate_positional_args\ndef spectral_embedding(adjacency, *, n_components=8, eigen_solver=None,\n random_state=None, eigen_tol=0.0,\n norm_laplacian=True, drop_first=True):\n \"\"\"Project the sample on the first eigenvectors of the graph Laplacian.\n\n The adjacency matrix is used to compute a normalized graph Laplacian\n whose spectrum (especially the eigenvectors associated to the\n smallest eigenvalues) has an interpretation in terms of minimal\n number of cuts necessary to split the graph into comparably sized\n components.\n\n This embedding can also 'work' even if the ``adjacency`` variable is\n not strictly the adjacency matrix of a graph but more generally\n an affinity or similarity matrix between samples (for instance the\n heat kernel of a euclidean distance matrix or a k-NN matrix).\n\n However care must taken to always make the affinity matrix symmetric\n so that the eigenvector decomposition works as expected.\n\n Note : Laplacian Eigenmaps is the actual algorithm implemented here.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n adjacency : {array-like, sparse graph} of shape (n_samples, n_samples)\n The adjacency matrix of the graph to embed.\n\n n_components : int, default=8\n The dimension of the projection subspace.\n\n eigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n The eigenvalue decomposition strategy to use. AMG requires pyamg\n to be installed. It can be faster on very large, sparse problems,\n but may also lead to instabilities. If None, then ``'arpack'`` is\n used.\n\n random_state : int, RandomState instance or None, default=None\n Determines the random number generator used for the initialization of\n the lobpcg eigenvectors decomposition when ``solver`` == 'amg'. Pass\n an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\n eigen_tol : float, default=0.0\n Stopping criterion for eigendecomposition of the Laplacian matrix\n when using arpack eigen_solver.\n\n norm_laplacian : bool, default=True\n If True, then compute normalized Laplacian.\n\n drop_first : bool, default=True\n Whether to drop the first eigenvector. For spectral embedding, this\n should be True as the first eigenvector should be constant vector for\n connected graph, but for spectral clustering, this should be kept as\n False to retain the first eigenvector.\n\n Returns\n -------\n embedding : ndarray of shape (n_samples, n_components)\n The reduced samples.\n\n Notes\n -----\n Spectral Embedding (Laplacian Eigenmaps) is most useful when the graph\n has one connected component. If there graph has many components, the first\n few eigenvectors will simply uncover the connected components of the graph.\n\n References\n ----------\n * https://en.wikipedia.org/wiki/LOBPCG\n\n * Toward the Optimal Preconditioned Eigensolver: Locally Optimal\n Block Preconditioned Conjugate Gradient Method\n Andrew V. Knyazev\n https://doi.org/10.1137%2FS1064827500366124\n \"\"\"\n adjacency = check_symmetric(adjacency)\n\n try:\n from pyamg import smoothed_aggregation_solver\n except ImportError as e:\n if eigen_solver == \"amg\":\n raise ValueError(\"The eigen_solver was set to 'amg', but pyamg is \"\n \"not available.\") from e\n\n if eigen_solver is None:\n eigen_solver = 'arpack'\n elif eigen_solver not in ('arpack', 'lobpcg', 'amg'):\n raise ValueError(\"Unknown value for eigen_solver: '%s'.\"\n \"Should be 'amg', 'arpack', or 'lobpcg'\"\n % eigen_solver)\n\n random_state = check_random_state(random_state)\n\n n_nodes = adjacency.shape[0]\n # Whether to drop the first eigenvector\n if drop_first:\n n_components = n_components + 1\n\n if not _graph_is_connected(adjacency):\n warnings.warn(\"Graph is not fully connected, spectral embedding\"\n \" may not work as expected.\")\n\n laplacian, dd = csgraph_laplacian(adjacency, normed=norm_laplacian,\n return_diag=True)\n if (eigen_solver == 'arpack' or eigen_solver != 'lobpcg' and\n (not sparse.isspmatrix(laplacian) or n_nodes < 5 * n_components)):\n # lobpcg used with eigen_solver='amg' has bugs for low number of nodes\n # for details see the source code in scipy:\n # https://github.com/scipy/scipy/blob/v0.11.0/scipy/sparse/linalg/eigen\n # /lobpcg/lobpcg.py#L237\n # or matlab:\n # https://www.mathworks.com/matlabcentral/fileexchange/48-lobpcg-m\n laplacian = _set_diag(laplacian, 1, norm_laplacian)\n\n # Here we'll use shift-invert mode for fast eigenvalues\n # (see https://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html\n # for a short explanation of what this means)\n # Because the normalized Laplacian has eigenvalues between 0 and 2,\n # I - L has eigenvalues between -1 and 1. ARPACK is most efficient\n # when finding eigenvalues of largest magnitude (keyword which='LM')\n # and when these eigenvalues are very large compared to the rest.\n # For very large, very sparse graphs, I - L can have many, many\n # eigenvalues very near 1.0. This leads to slow convergence. So\n # instead, we'll use ARPACK's shift-invert mode, asking for the\n # eigenvalues near 1.0. This effectively spreads-out the spectrum\n # near 1.0 and leads to much faster convergence: potentially an\n # orders-of-magnitude speedup over simply using keyword which='LA'\n # in standard mode.\n try:\n # We are computing the opposite of the laplacian inplace so as\n # to spare a memory allocation of a possibly very large array\n laplacian *= -1\n v0 = _init_arpack_v0(laplacian.shape[0], random_state)\n _, diffusion_map = eigsh(\n laplacian, k=n_components, sigma=1.0, which='LM',\n tol=eigen_tol, v0=v0)\n embedding = diffusion_map.T[n_components::-1]\n if norm_laplacian:\n embedding = embedding / dd\n except RuntimeError:\n # When submatrices are exactly singular, an LU decomposition\n # in arpack fails. We fallback to lobpcg\n eigen_solver = \"lobpcg\"\n # Revert the laplacian to its opposite to have lobpcg work\n laplacian *= -1\n\n elif eigen_solver == 'amg':\n # Use AMG to get a preconditioner and speed up the eigenvalue\n # problem.\n if not sparse.issparse(laplacian):\n warnings.warn(\"AMG works better for sparse matrices\")\n # lobpcg needs double precision floats\n laplacian = check_array(laplacian, dtype=np.float64,\n accept_sparse=True)\n laplacian = _set_diag(laplacian, 1, norm_laplacian)\n\n # The Laplacian matrix is always singular, having at least one zero\n # eigenvalue, corresponding to the trivial eigenvector, which is a\n # constant. Using a singular matrix for preconditioning may result in\n # random failures in LOBPCG and is not supported by the existing\n # theory:\n # see https://doi.org/10.1007/s10208-015-9297-1\n # Shift the Laplacian so its diagononal is not all ones. The shift\n # does change the eigenpairs however, so we'll feed the shifted\n # matrix to the solver and afterward set it back to the original.\n diag_shift = 1e-5 * sparse.eye(laplacian.shape[0])\n laplacian += diag_shift\n ml = smoothed_aggregation_solver(check_array(laplacian,\n accept_sparse='csr'))\n laplacian -= diag_shift\n\n M = ml.aspreconditioner()\n X = random_state.rand(laplacian.shape[0], n_components + 1)\n X[:, 0] = dd.ravel()\n _, diffusion_map = lobpcg(laplacian, X, M=M, tol=1.e-5,\n largest=False)\n embedding = diffusion_map.T\n if norm_laplacian:\n embedding = embedding / dd\n if embedding.shape[0] == 1:\n raise ValueError\n\n if eigen_solver == \"lobpcg\":\n # lobpcg needs double precision floats\n laplacian = check_array(laplacian, dtype=np.float64,\n accept_sparse=True)\n if n_nodes < 5 * n_components + 1:\n # see note above under arpack why lobpcg has problems with small\n # number of nodes\n # lobpcg will fallback to eigh, so we short circuit it\n if sparse.isspmatrix(laplacian):\n laplacian = laplacian.toarray()\n _, diffusion_map = eigh(laplacian)\n embedding = diffusion_map.T[:n_components]\n if norm_laplacian:\n embedding = embedding / dd\n else:\n laplacian = _set_diag(laplacian, 1, norm_laplacian)\n # We increase the number of eigenvectors requested, as lobpcg\n # doesn't behave well in low dimension\n X = random_state.rand(laplacian.shape[0], n_components + 1)\n X[:, 0] = dd.ravel()\n _, diffusion_map = lobpcg(laplacian, X, tol=1e-15,\n largest=False, maxiter=2000)\n embedding = diffusion_map.T[:n_components]\n if norm_laplacian:\n embedding = embedding / dd\n if embedding.shape[0] == 1:\n raise ValueError\n\n embedding = _deterministic_vector_sign_flip(embedding)\n if drop_first:\n return embedding[1:n_components].T\n else:\n return embedding[:n_components].T" + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/__init__", + "name": "__init__", + "qname": "sklearn.manifold._t_sne.TSNE.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/__init__/self", + "name": "self", + "qname": "sklearn.manifold._t_sne.TSNE.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/__init__/n_components", + "name": "n_components", + "qname": "sklearn.manifold._t_sne.TSNE.__init__.n_components", + "default_value": "2", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Dimension of the embedded space." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/__init__/perplexity", + "name": "perplexity", + "qname": "sklearn.manifold._t_sne.TSNE.__init__.perplexity", + "default_value": "30.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "30.0", + "description": "The perplexity is related to the number of nearest neighbors that\nis used in other manifold learning algorithms. Larger datasets\nusually require a larger perplexity. Consider selecting a value\nbetween 5 and 50. Different values can result in significantly\ndifferent results." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/__init__/early_exaggeration", + "name": "early_exaggeration", + "qname": "sklearn.manifold._t_sne.TSNE.__init__.early_exaggeration", + "default_value": "12.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "12.0", + "description": "Controls how tight natural clusters in the original space are in\nthe embedded space and how much space will be between them. For\nlarger values, the space between natural clusters will be larger\nin the embedded space. Again, the choice of this parameter is not\nvery critical. If the cost function increases during initial\noptimization, the early exaggeration factor or the learning rate\nmight be too high." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/__init__/learning_rate", + "name": "learning_rate", + "qname": "sklearn.manifold._t_sne.TSNE.__init__.learning_rate", + "default_value": "200.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "200.0", + "description": "The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If\nthe learning rate is too high, the data may look like a 'ball' with any\npoint approximately equidistant from its nearest neighbours. If the\nlearning rate is too low, most points may look compressed in a dense\ncloud with few outliers. If the cost function gets stuck in a bad local\nminimum increasing the learning rate may help." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "BoundaryType", + "base_type": "float", + "min": 10.0, + "max": 1000.0, + "min_inclusive": true, + "max_inclusive": true + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/__init__/n_iter", + "name": "n_iter", + "qname": "sklearn.manifold._t_sne.TSNE.__init__.n_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "Maximum number of iterations for the optimization. Should be at\nleast 250." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/__init__/n_iter_without_progress", + "name": "n_iter_without_progress", + "qname": "sklearn.manifold._t_sne.TSNE.__init__.n_iter_without_progress", + "default_value": "300", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "300", + "description": "Maximum number of iterations without progress before we abort the\noptimization, used after 250 initial iterations with early\nexaggeration. Note that progress is only checked every 50 iterations so\nthis value is rounded to the next multiple of 50.\n\n.. versionadded:: 0.17\n parameter *n_iter_without_progress* to control stopping criteria." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/__init__/min_grad_norm", + "name": "min_grad_norm", + "qname": "sklearn.manifold._t_sne.TSNE.__init__.min_grad_norm", + "default_value": "1e-07", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-7", + "description": "If the gradient norm is below this threshold, the optimization will\nbe stopped." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/__init__/metric", + "name": "metric", + "qname": "sklearn.manifold._t_sne.TSNE.__init__.metric", + "default_value": "'euclidean'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "'euclidean'", + "description": "The metric to use when calculating distance between instances in a\nfeature array. If metric is a string, it must be one of the options\nallowed by scipy.spatial.distance.pdist for its metric parameter, or\na metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS.\nIf metric is \"precomputed\", X is assumed to be a distance matrix.\nAlternatively, if metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays from X as input and return a value indicating\nthe distance between them. The default is \"euclidean\" which is\ninterpreted as squared euclidean distance." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/__init__/init", + "name": "init", + "qname": "sklearn.manifold._t_sne.TSNE.__init__.init", + "default_value": "'random'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'random', 'pca'} or ndarray of shape (n_samples, n_components)", + "default_value": "'random'", + "description": "Initialization of embedding. Possible options are 'random', 'pca',\nand a numpy array of shape (n_samples, n_components).\nPCA initialization cannot be used with precomputed distances and is\nusually more globally stable than random initialization." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["pca", "random"] + }, + { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_components)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/__init__/verbose", + "name": "verbose", + "qname": "sklearn.manifold._t_sne.TSNE.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Verbosity level." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/__init__/random_state", + "name": "random_state", + "qname": "sklearn.manifold._t_sne.TSNE.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines the random number generator. Pass an int for reproducible\nresults across multiple function calls. Note that different\ninitializations might result in different local minima of the cost\nfunction. See :term: `Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/__init__/method", + "name": "method", + "qname": "sklearn.manifold._t_sne.TSNE.__init__.method", + "default_value": "'barnes_hut'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "'barnes_hut'", + "description": "By default the gradient calculation algorithm uses Barnes-Hut\napproximation running in O(NlogN) time. method='exact'\nwill run on the slower, but exact, algorithm in O(N^2) time. The\nexact algorithm should be used when nearest-neighbor errors need\nto be better than 3%. However, the exact method cannot scale to\nmillions of examples.\n\n.. versionadded:: 0.17\n Approximate optimization *method* via the Barnes-Hut." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/__init__/angle", + "name": "angle", + "qname": "sklearn.manifold._t_sne.TSNE.__init__.angle", + "default_value": "0.5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.5", + "description": "Only used if method='barnes_hut'\nThis is the trade-off between speed and accuracy for Barnes-Hut T-SNE.\n'angle' is the angular size (referred to as theta in [3]) of a distant\nnode as measured from a point. If this size is below 'angle' then it is\nused as a summary node of all points contained within it.\nThis method is not very sensitive to changes in this parameter\nin the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing\ncomputation time and angle greater 0.8 has quickly increasing error." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.manifold._t_sne.TSNE.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of parallel jobs to run for neighbors search. This parameter\nhas no impact when ``metric=\"precomputed\"`` or\n(``metric=\"euclidean\"`` and ``method=\"exact\"``).\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/__init__/square_distances", + "name": "square_distances", + "qname": "sklearn.manifold._t_sne.TSNE.__init__.square_distances", + "default_value": "'legacy'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "True or 'legacy'", + "default_value": "'legacy'", + "description": "Whether TSNE should square the distance values. ``'legacy'`` means\nthat distance values are squared only when ``metric=\"euclidean\"``.\n``True`` means that distance values are squared for all metrics.\n\n.. versionadded:: 0.24\n Added to provide backward compatibility during deprecation of\n legacy squaring behavior.\n.. deprecated:: 0.24\n Legacy squaring behavior was deprecated in 0.24. The ``'legacy'``\n value will be removed in 1.1 (renaming of 0.26), at which point the\n default value will change to ``True``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "True" + }, + { + "kind": "NamedType", + "name": "'legacy'" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "t-distributed Stochastic Neighbor Embedding.\n\nt-SNE [1] is a tool to visualize high-dimensional data. It converts\nsimilarities between data points to joint probabilities and tries\nto minimize the Kullback-Leibler divergence between the joint\nprobabilities of the low-dimensional embedding and the\nhigh-dimensional data. t-SNE has a cost function that is not convex,\ni.e. with different initializations we can get different results.\n\nIt is highly recommended to use another dimensionality reduction\nmethod (e.g. PCA for dense data or TruncatedSVD for sparse data)\nto reduce the number of dimensions to a reasonable amount (e.g. 50)\nif the number of features is very high. This will suppress some\nnoise and speed up the computation of pairwise distances between\nsamples. For more tips see Laurens van der Maaten's FAQ [2].\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_components=2, *, perplexity=30.0,\n early_exaggeration=12.0, learning_rate=200.0, n_iter=1000,\n n_iter_without_progress=300, min_grad_norm=1e-7,\n metric=\"euclidean\", init=\"random\", verbose=0,\n random_state=None, method='barnes_hut', angle=0.5,\n n_jobs=None, square_distances='legacy'):\n self.n_components = n_components\n self.perplexity = perplexity\n self.early_exaggeration = early_exaggeration\n self.learning_rate = learning_rate\n self.n_iter = n_iter\n self.n_iter_without_progress = n_iter_without_progress\n self.min_grad_norm = min_grad_norm\n self.metric = metric\n self.init = init\n self.verbose = verbose\n self.random_state = random_state\n self.method = method\n self.angle = angle\n self.n_jobs = n_jobs\n # TODO Revisit deprecation of square_distances for 1.1-1.3 (#12401)\n self.square_distances = square_distances" + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/_fit", + "name": "_fit", + "qname": "sklearn.manifold._t_sne.TSNE._fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/_fit/self", + "name": "self", + "qname": "sklearn.manifold._t_sne.TSNE._fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/_fit/X", + "name": "X", + "qname": "sklearn.manifold._t_sne.TSNE._fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/_fit/skip_num_points", + "name": "skip_num_points", + "qname": "sklearn.manifold._t_sne.TSNE._fit.skip_num_points", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Private function to fit the model using X as training data.", + "docstring": "Private function to fit the model using X as training data.", + "code": " def _fit(self, X, skip_num_points=0):\n \"\"\"Private function to fit the model using X as training data.\"\"\"\n\n if self.method not in ['barnes_hut', 'exact']:\n raise ValueError(\"'method' must be 'barnes_hut' or 'exact'\")\n if self.angle < 0.0 or self.angle > 1.0:\n raise ValueError(\"'angle' must be between 0.0 - 1.0\")\n if self.square_distances not in [True, 'legacy']:\n raise ValueError(\"'square_distances' must be True or 'legacy'.\")\n if self.metric != \"euclidean\" and self.square_distances is not True:\n warnings.warn(\n \"'square_distances' has been introduced in 0.24 to help phase \"\n \"out legacy squaring behavior. The 'legacy' setting will be \"\n \"removed in 1.1 (renaming of 0.26), and the default setting \"\n \"will be changed to True. In 1.3, 'square_distances' will be \"\n \"removed altogether, and distances will be squared by \"\n \"default. Set 'square_distances'=True to silence this \"\n \"warning.\",\n FutureWarning\n )\n if self.method == 'barnes_hut':\n X = self._validate_data(X, accept_sparse=['csr'],\n ensure_min_samples=2,\n dtype=[np.float32, np.float64])\n else:\n X = self._validate_data(X, accept_sparse=['csr', 'csc', 'coo'],\n dtype=[np.float32, np.float64])\n if self.metric == \"precomputed\":\n if isinstance(self.init, str) and self.init == 'pca':\n raise ValueError(\"The parameter init=\\\"pca\\\" cannot be \"\n \"used with metric=\\\"precomputed\\\".\")\n if X.shape[0] != X.shape[1]:\n raise ValueError(\"X should be a square distance matrix\")\n\n check_non_negative(X, \"TSNE.fit(). With metric='precomputed', X \"\n \"should contain positive distances.\")\n\n if self.method == \"exact\" and issparse(X):\n raise TypeError(\n 'TSNE with method=\"exact\" does not accept sparse '\n 'precomputed distance matrix. Use method=\"barnes_hut\" '\n 'or provide the dense distance matrix.')\n\n if self.method == 'barnes_hut' and self.n_components > 3:\n raise ValueError(\"'n_components' should be inferior to 4 for the \"\n \"barnes_hut algorithm as it relies on \"\n \"quad-tree or oct-tree.\")\n random_state = check_random_state(self.random_state)\n\n if self.early_exaggeration < 1.0:\n raise ValueError(\"early_exaggeration must be at least 1, but is {}\"\n .format(self.early_exaggeration))\n\n if self.n_iter < 250:\n raise ValueError(\"n_iter should be at least 250\")\n\n n_samples = X.shape[0]\n\n neighbors_nn = None\n if self.method == \"exact\":\n # Retrieve the distance matrix, either using the precomputed one or\n # computing it.\n if self.metric == \"precomputed\":\n distances = X\n else:\n if self.verbose:\n print(\"[t-SNE] Computing pairwise distances...\")\n\n if self.metric == \"euclidean\":\n # Euclidean is squared here, rather than using **= 2,\n # because euclidean_distances already calculates\n # squared distances, and returns np.sqrt(dist) for\n # squared=False.\n # Also, Euclidean is slower for n_jobs>1, so don't set here\n distances = pairwise_distances(X, metric=self.metric,\n squared=True)\n else:\n distances = pairwise_distances(X, metric=self.metric,\n n_jobs=self.n_jobs)\n\n if np.any(distances < 0):\n raise ValueError(\"All distances should be positive, the \"\n \"metric given is not correct\")\n\n if self.metric != \"euclidean\" and self.square_distances is True:\n distances **= 2\n\n # compute the joint probability distribution for the input space\n P = _joint_probabilities(distances, self.perplexity, self.verbose)\n assert np.all(np.isfinite(P)), \"All probabilities should be finite\"\n assert np.all(P >= 0), \"All probabilities should be non-negative\"\n assert np.all(P <= 1), (\"All probabilities should be less \"\n \"or then equal to one\")\n\n else:\n # Compute the number of nearest neighbors to find.\n # LvdM uses 3 * perplexity as the number of neighbors.\n # In the event that we have very small # of points\n # set the neighbors to n - 1.\n n_neighbors = min(n_samples - 1, int(3. * self.perplexity + 1))\n\n if self.verbose:\n print(\"[t-SNE] Computing {} nearest neighbors...\"\n .format(n_neighbors))\n\n # Find the nearest neighbors for every point\n knn = NearestNeighbors(algorithm='auto',\n n_jobs=self.n_jobs,\n n_neighbors=n_neighbors,\n metric=self.metric)\n t0 = time()\n knn.fit(X)\n duration = time() - t0\n if self.verbose:\n print(\"[t-SNE] Indexed {} samples in {:.3f}s...\".format(\n n_samples, duration))\n\n t0 = time()\n distances_nn = knn.kneighbors_graph(mode='distance')\n duration = time() - t0\n if self.verbose:\n print(\"[t-SNE] Computed neighbors for {} samples \"\n \"in {:.3f}s...\".format(n_samples, duration))\n\n # Free the memory used by the ball_tree\n del knn\n\n if self.square_distances is True or self.metric == \"euclidean\":\n # knn return the euclidean distance but we need it squared\n # to be consistent with the 'exact' method. Note that the\n # the method was derived using the euclidean method as in the\n # input space. Not sure of the implication of using a different\n # metric.\n distances_nn.data **= 2\n\n # compute the joint probability distribution for the input space\n P = _joint_probabilities_nn(distances_nn, self.perplexity,\n self.verbose)\n\n if isinstance(self.init, np.ndarray):\n X_embedded = self.init\n elif self.init == 'pca':\n pca = PCA(n_components=self.n_components, svd_solver='randomized',\n random_state=random_state)\n X_embedded = pca.fit_transform(X).astype(np.float32, copy=False)\n elif self.init == 'random':\n # The embedding is initialized with iid samples from Gaussians with\n # standard deviation 1e-4.\n X_embedded = 1e-4 * random_state.randn(\n n_samples, self.n_components).astype(np.float32)\n else:\n raise ValueError(\"'init' must be 'pca', 'random', or \"\n \"a numpy array\")\n\n # Degrees of freedom of the Student's t-distribution. The suggestion\n # degrees_of_freedom = n_components - 1 comes from\n # \"Learning a Parametric Embedding by Preserving Local Structure\"\n # Laurens van der Maaten, 2009.\n degrees_of_freedom = max(self.n_components - 1, 1)\n\n return self._tsne(P, degrees_of_freedom, n_samples,\n X_embedded=X_embedded,\n neighbors=neighbors_nn,\n skip_num_points=skip_num_points)" + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/_tsne", + "name": "_tsne", + "qname": "sklearn.manifold._t_sne.TSNE._tsne", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/_tsne/self", + "name": "self", + "qname": "sklearn.manifold._t_sne.TSNE._tsne.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/_tsne/P", + "name": "P", + "qname": "sklearn.manifold._t_sne.TSNE._tsne.P", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/_tsne/degrees_of_freedom", + "name": "degrees_of_freedom", + "qname": "sklearn.manifold._t_sne.TSNE._tsne.degrees_of_freedom", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/_tsne/n_samples", + "name": "n_samples", + "qname": "sklearn.manifold._t_sne.TSNE._tsne.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/_tsne/X_embedded", + "name": "X_embedded", + "qname": "sklearn.manifold._t_sne.TSNE._tsne.X_embedded", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/_tsne/neighbors", + "name": "neighbors", + "qname": "sklearn.manifold._t_sne.TSNE._tsne.neighbors", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/_tsne/skip_num_points", + "name": "skip_num_points", + "qname": "sklearn.manifold._t_sne.TSNE._tsne.skip_num_points", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Runs t-SNE.", + "docstring": "Runs t-SNE.", + "code": " def _tsne(self, P, degrees_of_freedom, n_samples, X_embedded,\n neighbors=None, skip_num_points=0):\n \"\"\"Runs t-SNE.\"\"\"\n # t-SNE minimizes the Kullback-Leiber divergence of the Gaussians P\n # and the Student's t-distributions Q. The optimization algorithm that\n # we use is batch gradient descent with two stages:\n # * initial optimization with early exaggeration and momentum at 0.5\n # * final optimization with momentum at 0.8\n params = X_embedded.ravel()\n\n opt_args = {\n \"it\": 0,\n \"n_iter_check\": self._N_ITER_CHECK,\n \"min_grad_norm\": self.min_grad_norm,\n \"learning_rate\": self.learning_rate,\n \"verbose\": self.verbose,\n \"kwargs\": dict(skip_num_points=skip_num_points),\n \"args\": [P, degrees_of_freedom, n_samples, self.n_components],\n \"n_iter_without_progress\": self._EXPLORATION_N_ITER,\n \"n_iter\": self._EXPLORATION_N_ITER,\n \"momentum\": 0.5,\n }\n if self.method == 'barnes_hut':\n obj_func = _kl_divergence_bh\n opt_args['kwargs']['angle'] = self.angle\n # Repeat verbose argument for _kl_divergence_bh\n opt_args['kwargs']['verbose'] = self.verbose\n # Get the number of threads for gradient computation here to\n # avoid recomputing it at each iteration.\n opt_args['kwargs']['num_threads'] = _openmp_effective_n_threads()\n else:\n obj_func = _kl_divergence\n\n # Learning schedule (part 1): do 250 iteration with lower momentum but\n # higher learning rate controlled via the early exaggeration parameter\n P *= self.early_exaggeration\n params, kl_divergence, it = _gradient_descent(obj_func, params,\n **opt_args)\n if self.verbose:\n print(\"[t-SNE] KL divergence after %d iterations with early \"\n \"exaggeration: %f\" % (it + 1, kl_divergence))\n\n # Learning schedule (part 2): disable early exaggeration and finish\n # optimization with a higher momentum at 0.8\n P /= self.early_exaggeration\n remaining = self.n_iter - self._EXPLORATION_N_ITER\n if it < self._EXPLORATION_N_ITER or remaining > 0:\n opt_args['n_iter'] = self.n_iter\n opt_args['it'] = it + 1\n opt_args['momentum'] = 0.8\n opt_args['n_iter_without_progress'] = self.n_iter_without_progress\n params, kl_divergence, it = _gradient_descent(obj_func, params,\n **opt_args)\n\n # Save the final number of iterations\n self.n_iter_ = it\n\n if self.verbose:\n print(\"[t-SNE] KL divergence after %d iterations: %f\"\n % (it + 1, kl_divergence))\n\n X_embedded = params.reshape(n_samples, self.n_components)\n self.kl_divergence_ = kl_divergence\n\n return X_embedded" + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/fit", + "name": "fit", + "qname": "sklearn.manifold._t_sne.TSNE.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/fit/self", + "name": "self", + "qname": "sklearn.manifold._t_sne.TSNE.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/fit/X", + "name": "X", + "qname": "sklearn.manifold._t_sne.TSNE.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features) or (n_samples, n_samples)", + "default_value": "", + "description": "If the metric is 'precomputed' X must be a square distance\nmatrix. Otherwise it contains a sample per row. If the method\nis 'exact', X may be a sparse matrix of type 'csr', 'csc'\nor 'coo'. If the method is 'barnes_hut' and the metric is\n'precomputed', X may be a precomputed sparse graph." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features) or (n_samples, n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/fit/y", + "name": "y", + "qname": "sklearn.manifold._t_sne.TSNE.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit X into an embedded space.", + "docstring": "Fit X into an embedded space.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n If the metric is 'precomputed' X must be a square distance\n matrix. Otherwise it contains a sample per row. If the method\n is 'exact', X may be a sparse matrix of type 'csr', 'csc'\n or 'coo'. If the method is 'barnes_hut' and the metric is\n 'precomputed', X may be a precomputed sparse graph.\n\ny : Ignored", + "code": " def fit(self, X, y=None):\n \"\"\"Fit X into an embedded space.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n If the metric is 'precomputed' X must be a square distance\n matrix. Otherwise it contains a sample per row. If the method\n is 'exact', X may be a sparse matrix of type 'csr', 'csc'\n or 'coo'. If the method is 'barnes_hut' and the metric is\n 'precomputed', X may be a precomputed sparse graph.\n\n y : Ignored\n \"\"\"\n self.fit_transform(X)\n return self" + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/fit_transform", + "name": "fit_transform", + "qname": "sklearn.manifold._t_sne.TSNE.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/fit_transform/self", + "name": "self", + "qname": "sklearn.manifold._t_sne.TSNE.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/fit_transform/X", + "name": "X", + "qname": "sklearn.manifold._t_sne.TSNE.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features) or (n_samples, n_samples)", + "default_value": "", + "description": "If the metric is 'precomputed' X must be a square distance\nmatrix. Otherwise it contains a sample per row. If the method\nis 'exact', X may be a sparse matrix of type 'csr', 'csc'\nor 'coo'. If the method is 'barnes_hut' and the metric is\n'precomputed', X may be a precomputed sparse graph." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features) or (n_samples, n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/TSNE/fit_transform/y", + "name": "y", + "qname": "sklearn.manifold._t_sne.TSNE.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit X into an embedded space and return that transformed\noutput.", + "docstring": "Fit X into an embedded space and return that transformed\noutput.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n If the metric is 'precomputed' X must be a square distance\n matrix. Otherwise it contains a sample per row. If the method\n is 'exact', X may be a sparse matrix of type 'csr', 'csc'\n or 'coo'. If the method is 'barnes_hut' and the metric is\n 'precomputed', X may be a precomputed sparse graph.\n\ny : Ignored\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Embedding of the training data in low-dimensional space.", + "code": " def fit_transform(self, X, y=None):\n \"\"\"Fit X into an embedded space and return that transformed\n output.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n If the metric is 'precomputed' X must be a square distance\n matrix. Otherwise it contains a sample per row. If the method\n is 'exact', X may be a sparse matrix of type 'csr', 'csc'\n or 'coo'. If the method is 'barnes_hut' and the metric is\n 'precomputed', X may be a precomputed sparse graph.\n\n y : Ignored\n\n Returns\n -------\n X_new : ndarray of shape (n_samples, n_components)\n Embedding of the training data in low-dimensional space.\n \"\"\"\n embedding = self._fit(X)\n self.embedding_ = embedding\n return self.embedding_" + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_gradient_descent", + "name": "_gradient_descent", + "qname": "sklearn.manifold._t_sne._gradient_descent", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._t_sne/_gradient_descent/objective", + "name": "objective", + "qname": "sklearn.manifold._t_sne._gradient_descent.objective", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "", + "description": "Should return a tuple of cost and gradient for a given parameter\nvector. When expensive to compute, the cost can optionally\nbe None and can be computed every n_iter_check steps using\nthe objective_error function." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_gradient_descent/p0", + "name": "p0", + "qname": "sklearn.manifold._t_sne._gradient_descent.p0", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_params,)", + "default_value": "", + "description": "Initial parameter vector." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_params,)" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_gradient_descent/it", + "name": "it", + "qname": "sklearn.manifold._t_sne._gradient_descent.it", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Current number of iterations (this function will be called more than\nonce during the optimization)." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_gradient_descent/n_iter", + "name": "n_iter", + "qname": "sklearn.manifold._t_sne._gradient_descent.n_iter", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Maximum number of gradient descent iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_gradient_descent/n_iter_check", + "name": "n_iter_check", + "qname": "sklearn.manifold._t_sne._gradient_descent.n_iter_check", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "Number of iterations before evaluating the global error. If the error\nis sufficiently low, we abort the optimization." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_gradient_descent/n_iter_without_progress", + "name": "n_iter_without_progress", + "qname": "sklearn.manifold._t_sne._gradient_descent.n_iter_without_progress", + "default_value": "300", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "300", + "description": "Maximum number of iterations without progress before we abort the\noptimization." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_gradient_descent/momentum", + "name": "momentum", + "qname": "sklearn.manifold._t_sne._gradient_descent.momentum", + "default_value": "0.8", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float within (0.0, 1.0)", + "default_value": "0.8", + "description": "The momentum generates a weight for previous gradients that decays\nexponentially." + }, + "type": { + "kind": "NamedType", + "name": "float within (0.0, 1.0)" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_gradient_descent/learning_rate", + "name": "learning_rate", + "qname": "sklearn.manifold._t_sne._gradient_descent.learning_rate", + "default_value": "200.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "200.0", + "description": "The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If\nthe learning rate is too high, the data may look like a 'ball' with any\npoint approximately equidistant from its nearest neighbours. If the\nlearning rate is too low, most points may look compressed in a dense\ncloud with few outliers." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "BoundaryType", + "base_type": "float", + "min": 10.0, + "max": 1000.0, + "min_inclusive": true, + "max_inclusive": true + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_gradient_descent/min_gain", + "name": "min_gain", + "qname": "sklearn.manifold._t_sne._gradient_descent.min_gain", + "default_value": "0.01", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.01", + "description": "Minimum individual gain for each parameter." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_gradient_descent/min_grad_norm", + "name": "min_grad_norm", + "qname": "sklearn.manifold._t_sne._gradient_descent.min_grad_norm", + "default_value": "1e-07", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-7", + "description": "If the gradient norm is below this threshold, the optimization will\nbe aborted." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_gradient_descent/verbose", + "name": "verbose", + "qname": "sklearn.manifold._t_sne._gradient_descent.verbose", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Verbosity level." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_gradient_descent/args", + "name": "args", + "qname": "sklearn.manifold._t_sne._gradient_descent.args", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "sequence", + "default_value": "None", + "description": "Arguments to pass to objective function." + }, + "type": { + "kind": "NamedType", + "name": "sequence" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_gradient_descent/kwargs", + "name": "kwargs", + "qname": "sklearn.manifold._t_sne._gradient_descent.kwargs", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Keyword arguments to pass to objective function." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Batch gradient descent with momentum and individual gains.", + "docstring": "Batch gradient descent with momentum and individual gains.\n\nParameters\n----------\nobjective : callable\n Should return a tuple of cost and gradient for a given parameter\n vector. When expensive to compute, the cost can optionally\n be None and can be computed every n_iter_check steps using\n the objective_error function.\n\np0 : array-like of shape (n_params,)\n Initial parameter vector.\n\nit : int\n Current number of iterations (this function will be called more than\n once during the optimization).\n\nn_iter : int\n Maximum number of gradient descent iterations.\n\nn_iter_check : int, default=1\n Number of iterations before evaluating the global error. If the error\n is sufficiently low, we abort the optimization.\n\nn_iter_without_progress : int, default=300\n Maximum number of iterations without progress before we abort the\n optimization.\n\nmomentum : float within (0.0, 1.0), default=0.8\n The momentum generates a weight for previous gradients that decays\n exponentially.\n\nlearning_rate : float, default=200.0\n The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If\n the learning rate is too high, the data may look like a 'ball' with any\n point approximately equidistant from its nearest neighbours. If the\n learning rate is too low, most points may look compressed in a dense\n cloud with few outliers.\n\nmin_gain : float, default=0.01\n Minimum individual gain for each parameter.\n\nmin_grad_norm : float, default=1e-7\n If the gradient norm is below this threshold, the optimization will\n be aborted.\n\nverbose : int, default=0\n Verbosity level.\n\nargs : sequence, default=None\n Arguments to pass to objective function.\n\nkwargs : dict, default=None\n Keyword arguments to pass to objective function.\n\nReturns\n-------\np : ndarray of shape (n_params,)\n Optimum parameters.\n\nerror : float\n Optimum.\n\ni : int\n Last iteration.", + "code": "def _gradient_descent(objective, p0, it, n_iter,\n n_iter_check=1, n_iter_without_progress=300,\n momentum=0.8, learning_rate=200.0, min_gain=0.01,\n min_grad_norm=1e-7, verbose=0, args=None, kwargs=None):\n \"\"\"Batch gradient descent with momentum and individual gains.\n\n Parameters\n ----------\n objective : callable\n Should return a tuple of cost and gradient for a given parameter\n vector. When expensive to compute, the cost can optionally\n be None and can be computed every n_iter_check steps using\n the objective_error function.\n\n p0 : array-like of shape (n_params,)\n Initial parameter vector.\n\n it : int\n Current number of iterations (this function will be called more than\n once during the optimization).\n\n n_iter : int\n Maximum number of gradient descent iterations.\n\n n_iter_check : int, default=1\n Number of iterations before evaluating the global error. If the error\n is sufficiently low, we abort the optimization.\n\n n_iter_without_progress : int, default=300\n Maximum number of iterations without progress before we abort the\n optimization.\n\n momentum : float within (0.0, 1.0), default=0.8\n The momentum generates a weight for previous gradients that decays\n exponentially.\n\n learning_rate : float, default=200.0\n The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If\n the learning rate is too high, the data may look like a 'ball' with any\n point approximately equidistant from its nearest neighbours. If the\n learning rate is too low, most points may look compressed in a dense\n cloud with few outliers.\n\n min_gain : float, default=0.01\n Minimum individual gain for each parameter.\n\n min_grad_norm : float, default=1e-7\n If the gradient norm is below this threshold, the optimization will\n be aborted.\n\n verbose : int, default=0\n Verbosity level.\n\n args : sequence, default=None\n Arguments to pass to objective function.\n\n kwargs : dict, default=None\n Keyword arguments to pass to objective function.\n\n Returns\n -------\n p : ndarray of shape (n_params,)\n Optimum parameters.\n\n error : float\n Optimum.\n\n i : int\n Last iteration.\n \"\"\"\n if args is None:\n args = []\n if kwargs is None:\n kwargs = {}\n\n p = p0.copy().ravel()\n update = np.zeros_like(p)\n gains = np.ones_like(p)\n error = np.finfo(float).max\n best_error = np.finfo(float).max\n best_iter = i = it\n\n tic = time()\n for i in range(it, n_iter):\n check_convergence = (i + 1) % n_iter_check == 0\n # only compute the error when needed\n kwargs['compute_error'] = check_convergence or i == n_iter - 1\n\n error, grad = objective(p, *args, **kwargs)\n grad_norm = linalg.norm(grad)\n\n inc = update * grad < 0.0\n dec = np.invert(inc)\n gains[inc] += 0.2\n gains[dec] *= 0.8\n np.clip(gains, min_gain, np.inf, out=gains)\n grad *= gains\n update = momentum * update - learning_rate * grad\n p += update\n\n if check_convergence:\n toc = time()\n duration = toc - tic\n tic = toc\n\n if verbose >= 2:\n print(\"[t-SNE] Iteration %d: error = %.7f,\"\n \" gradient norm = %.7f\"\n \" (%s iterations in %0.3fs)\"\n % (i + 1, error, grad_norm, n_iter_check, duration))\n\n if error < best_error:\n best_error = error\n best_iter = i\n elif i - best_iter > n_iter_without_progress:\n if verbose >= 2:\n print(\"[t-SNE] Iteration %d: did not make any progress \"\n \"during the last %d episodes. Finished.\"\n % (i + 1, n_iter_without_progress))\n break\n if grad_norm <= min_grad_norm:\n if verbose >= 2:\n print(\"[t-SNE] Iteration %d: gradient norm %f. Finished.\"\n % (i + 1, grad_norm))\n break\n\n return p, error, i" + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_joint_probabilities", + "name": "_joint_probabilities", + "qname": "sklearn.manifold._t_sne._joint_probabilities", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._t_sne/_joint_probabilities/distances", + "name": "distances", + "qname": "sklearn.manifold._t_sne._joint_probabilities.distances", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples * (n_samples-1) / 2,)", + "default_value": "", + "description": "Distances of samples are stored as condensed matrices, i.e.\nwe omit the diagonal and duplicate entries and store everything\nin a one-dimensional array." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples * (n_samples-1) / 2,)" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_joint_probabilities/desired_perplexity", + "name": "desired_perplexity", + "qname": "sklearn.manifold._t_sne._joint_probabilities.desired_perplexity", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Desired perplexity of the joint probability distributions." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_joint_probabilities/verbose", + "name": "verbose", + "qname": "sklearn.manifold._t_sne._joint_probabilities.verbose", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Verbosity level." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute joint probabilities p_ij from distances.", + "docstring": "Compute joint probabilities p_ij from distances.\n\nParameters\n----------\ndistances : ndarray of shape (n_samples * (n_samples-1) / 2,)\n Distances of samples are stored as condensed matrices, i.e.\n we omit the diagonal and duplicate entries and store everything\n in a one-dimensional array.\n\ndesired_perplexity : float\n Desired perplexity of the joint probability distributions.\n\nverbose : int\n Verbosity level.\n\nReturns\n-------\nP : ndarray of shape (n_samples * (n_samples-1) / 2,)\n Condensed joint probability matrix.", + "code": "def _joint_probabilities(distances, desired_perplexity, verbose):\n \"\"\"Compute joint probabilities p_ij from distances.\n\n Parameters\n ----------\n distances : ndarray of shape (n_samples * (n_samples-1) / 2,)\n Distances of samples are stored as condensed matrices, i.e.\n we omit the diagonal and duplicate entries and store everything\n in a one-dimensional array.\n\n desired_perplexity : float\n Desired perplexity of the joint probability distributions.\n\n verbose : int\n Verbosity level.\n\n Returns\n -------\n P : ndarray of shape (n_samples * (n_samples-1) / 2,)\n Condensed joint probability matrix.\n \"\"\"\n # Compute conditional probabilities such that they approximately match\n # the desired perplexity\n distances = distances.astype(np.float32, copy=False)\n conditional_P = _utils._binary_search_perplexity(\n distances, desired_perplexity, verbose)\n P = conditional_P + conditional_P.T\n sum_P = np.maximum(np.sum(P), MACHINE_EPSILON)\n P = np.maximum(squareform(P) / sum_P, MACHINE_EPSILON)\n return P" + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_joint_probabilities_nn", + "name": "_joint_probabilities_nn", + "qname": "sklearn.manifold._t_sne._joint_probabilities_nn", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._t_sne/_joint_probabilities_nn/distances", + "name": "distances", + "qname": "sklearn.manifold._t_sne._joint_probabilities_nn.distances", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "sparse matrix of shape (n_samples, n_samples)", + "default_value": "", + "description": "Distances of samples to its n_neighbors nearest neighbors. All other\ndistances are left to zero (and are not materialized in memory).\nMatrix should be of CSR format." + }, + "type": { + "kind": "NamedType", + "name": "sparse matrix of shape (n_samples, n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_joint_probabilities_nn/desired_perplexity", + "name": "desired_perplexity", + "qname": "sklearn.manifold._t_sne._joint_probabilities_nn.desired_perplexity", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Desired perplexity of the joint probability distributions." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_joint_probabilities_nn/verbose", + "name": "verbose", + "qname": "sklearn.manifold._t_sne._joint_probabilities_nn.verbose", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Verbosity level." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute joint probabilities p_ij from distances using just nearest\nneighbors.\n\nThis method is approximately equal to _joint_probabilities. The latter\nis O(N), but limiting the joint probability to nearest neighbors improves\nthis substantially to O(uN).", + "docstring": "Compute joint probabilities p_ij from distances using just nearest\nneighbors.\n\nThis method is approximately equal to _joint_probabilities. The latter\nis O(N), but limiting the joint probability to nearest neighbors improves\nthis substantially to O(uN).\n\nParameters\n----------\ndistances : sparse matrix of shape (n_samples, n_samples)\n Distances of samples to its n_neighbors nearest neighbors. All other\n distances are left to zero (and are not materialized in memory).\n Matrix should be of CSR format.\n\ndesired_perplexity : float\n Desired perplexity of the joint probability distributions.\n\nverbose : int\n Verbosity level.\n\nReturns\n-------\nP : sparse matrix of shape (n_samples, n_samples)\n Condensed joint probability matrix with only nearest neighbors. Matrix\n will be of CSR format.", + "code": "def _joint_probabilities_nn(distances, desired_perplexity, verbose):\n \"\"\"Compute joint probabilities p_ij from distances using just nearest\n neighbors.\n\n This method is approximately equal to _joint_probabilities. The latter\n is O(N), but limiting the joint probability to nearest neighbors improves\n this substantially to O(uN).\n\n Parameters\n ----------\n distances : sparse matrix of shape (n_samples, n_samples)\n Distances of samples to its n_neighbors nearest neighbors. All other\n distances are left to zero (and are not materialized in memory).\n Matrix should be of CSR format.\n\n desired_perplexity : float\n Desired perplexity of the joint probability distributions.\n\n verbose : int\n Verbosity level.\n\n Returns\n -------\n P : sparse matrix of shape (n_samples, n_samples)\n Condensed joint probability matrix with only nearest neighbors. Matrix\n will be of CSR format.\n \"\"\"\n t0 = time()\n # Compute conditional probabilities such that they approximately match\n # the desired perplexity\n distances.sort_indices()\n n_samples = distances.shape[0]\n distances_data = distances.data.reshape(n_samples, -1)\n distances_data = distances_data.astype(np.float32, copy=False)\n conditional_P = _utils._binary_search_perplexity(\n distances_data, desired_perplexity, verbose)\n assert np.all(np.isfinite(conditional_P)), \\\n \"All probabilities should be finite\"\n\n # Symmetrize the joint probability distribution using sparse operations\n P = csr_matrix((conditional_P.ravel(), distances.indices,\n distances.indptr),\n shape=(n_samples, n_samples))\n P = P + P.T\n\n # Normalize the joint probability distribution\n sum_P = np.maximum(P.sum(), MACHINE_EPSILON)\n P /= sum_P\n\n assert np.all(np.abs(P.data) <= 1.0)\n if verbose >= 2:\n duration = time() - t0\n print(\"[t-SNE] Computed conditional probabilities in {:.3f}s\"\n .format(duration))\n return P" + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_kl_divergence", + "name": "_kl_divergence", + "qname": "sklearn.manifold._t_sne._kl_divergence", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._t_sne/_kl_divergence/params", + "name": "params", + "qname": "sklearn.manifold._t_sne._kl_divergence.params", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_params,)", + "default_value": "", + "description": "Unraveled embedding." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_params,)" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_kl_divergence/P", + "name": "P", + "qname": "sklearn.manifold._t_sne._kl_divergence.P", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples * (n_samples-1) / 2,)", + "default_value": "", + "description": "Condensed joint probability matrix." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples * (n_samples-1) / 2,)" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_kl_divergence/degrees_of_freedom", + "name": "degrees_of_freedom", + "qname": "sklearn.manifold._t_sne._kl_divergence.degrees_of_freedom", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Degrees of freedom of the Student's-t distribution." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_kl_divergence/n_samples", + "name": "n_samples", + "qname": "sklearn.manifold._t_sne._kl_divergence.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of samples." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_kl_divergence/n_components", + "name": "n_components", + "qname": "sklearn.manifold._t_sne._kl_divergence.n_components", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Dimension of the embedded space." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_kl_divergence/skip_num_points", + "name": "skip_num_points", + "qname": "sklearn.manifold._t_sne._kl_divergence.skip_num_points", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "This does not compute the gradient for points with indices below\n`skip_num_points`. This is useful when computing transforms of new\ndata where you'd like to keep the old data fixed." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_kl_divergence/compute_error", + "name": "compute_error", + "qname": "sklearn.manifold._t_sne._kl_divergence.compute_error", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "t-SNE objective function: gradient of the KL divergence\nof p_ijs and q_ijs and the absolute error.", + "docstring": "t-SNE objective function: gradient of the KL divergence\nof p_ijs and q_ijs and the absolute error.\n\nParameters\n----------\nparams : ndarray of shape (n_params,)\n Unraveled embedding.\n\nP : ndarray of shape (n_samples * (n_samples-1) / 2,)\n Condensed joint probability matrix.\n\ndegrees_of_freedom : int\n Degrees of freedom of the Student's-t distribution.\n\nn_samples : int\n Number of samples.\n\nn_components : int\n Dimension of the embedded space.\n\nskip_num_points : int, default=0\n This does not compute the gradient for points with indices below\n `skip_num_points`. This is useful when computing transforms of new\n data where you'd like to keep the old data fixed.\n\ncompute_error: bool, default=True\n If False, the kl_divergence is not computed and returns NaN.\n\nReturns\n-------\nkl_divergence : float\n Kullback-Leibler divergence of p_ij and q_ij.\n\ngrad : ndarray of shape (n_params,)\n Unraveled gradient of the Kullback-Leibler divergence with respect to\n the embedding.", + "code": "def _kl_divergence(params, P, degrees_of_freedom, n_samples, n_components,\n skip_num_points=0, compute_error=True):\n \"\"\"t-SNE objective function: gradient of the KL divergence\n of p_ijs and q_ijs and the absolute error.\n\n Parameters\n ----------\n params : ndarray of shape (n_params,)\n Unraveled embedding.\n\n P : ndarray of shape (n_samples * (n_samples-1) / 2,)\n Condensed joint probability matrix.\n\n degrees_of_freedom : int\n Degrees of freedom of the Student's-t distribution.\n\n n_samples : int\n Number of samples.\n\n n_components : int\n Dimension of the embedded space.\n\n skip_num_points : int, default=0\n This does not compute the gradient for points with indices below\n `skip_num_points`. This is useful when computing transforms of new\n data where you'd like to keep the old data fixed.\n\n compute_error: bool, default=True\n If False, the kl_divergence is not computed and returns NaN.\n\n Returns\n -------\n kl_divergence : float\n Kullback-Leibler divergence of p_ij and q_ij.\n\n grad : ndarray of shape (n_params,)\n Unraveled gradient of the Kullback-Leibler divergence with respect to\n the embedding.\n \"\"\"\n X_embedded = params.reshape(n_samples, n_components)\n\n # Q is a heavy-tailed distribution: Student's t-distribution\n dist = pdist(X_embedded, \"sqeuclidean\")\n dist /= degrees_of_freedom\n dist += 1.\n dist **= (degrees_of_freedom + 1.0) / -2.0\n Q = np.maximum(dist / (2.0 * np.sum(dist)), MACHINE_EPSILON)\n\n # Optimization trick below: np.dot(x, y) is faster than\n # np.sum(x * y) because it calls BLAS\n\n # Objective: C (Kullback-Leibler divergence of P and Q)\n if compute_error:\n kl_divergence = 2.0 * np.dot(\n P, np.log(np.maximum(P, MACHINE_EPSILON) / Q))\n else:\n kl_divergence = np.nan\n\n # Gradient: dC/dY\n # pdist always returns double precision distances. Thus we need to take\n grad = np.ndarray((n_samples, n_components), dtype=params.dtype)\n PQd = squareform((P - Q) * dist)\n for i in range(skip_num_points, n_samples):\n grad[i] = np.dot(np.ravel(PQd[i], order='K'),\n X_embedded[i] - X_embedded)\n grad = grad.ravel()\n c = 2.0 * (degrees_of_freedom + 1.0) / degrees_of_freedom\n grad *= c\n\n return kl_divergence, grad" + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_kl_divergence_bh", + "name": "_kl_divergence_bh", + "qname": "sklearn.manifold._t_sne._kl_divergence_bh", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._t_sne/_kl_divergence_bh/params", + "name": "params", + "qname": "sklearn.manifold._t_sne._kl_divergence_bh.params", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_params,)", + "default_value": "", + "description": "Unraveled embedding." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_params,)" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_kl_divergence_bh/P", + "name": "P", + "qname": "sklearn.manifold._t_sne._kl_divergence_bh.P", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "sparse matrix of shape (n_samples, n_sample)", + "default_value": "", + "description": "Sparse approximate joint probability matrix, computed only for the\nk nearest-neighbors and symmetrized. Matrix should be of CSR format." + }, + "type": { + "kind": "NamedType", + "name": "sparse matrix of shape (n_samples, n_sample)" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_kl_divergence_bh/degrees_of_freedom", + "name": "degrees_of_freedom", + "qname": "sklearn.manifold._t_sne._kl_divergence_bh.degrees_of_freedom", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Degrees of freedom of the Student's-t distribution." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_kl_divergence_bh/n_samples", + "name": "n_samples", + "qname": "sklearn.manifold._t_sne._kl_divergence_bh.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of samples." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_kl_divergence_bh/n_components", + "name": "n_components", + "qname": "sklearn.manifold._t_sne._kl_divergence_bh.n_components", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Dimension of the embedded space." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_kl_divergence_bh/angle", + "name": "angle", + "qname": "sklearn.manifold._t_sne._kl_divergence_bh.angle", + "default_value": "0.5", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.5", + "description": "This is the trade-off between speed and accuracy for Barnes-Hut T-SNE.\n'angle' is the angular size (referred to as theta in [3]) of a distant\nnode as measured from a point. If this size is below 'angle' then it is\nused as a summary node of all points contained within it.\nThis method is not very sensitive to changes in this parameter\nin the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing\ncomputation time and angle greater 0.8 has quickly increasing error." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_kl_divergence_bh/skip_num_points", + "name": "skip_num_points", + "qname": "sklearn.manifold._t_sne._kl_divergence_bh.skip_num_points", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "This does not compute the gradient for points with indices below\n`skip_num_points`. This is useful when computing transforms of new\ndata where you'd like to keep the old data fixed." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_kl_divergence_bh/verbose", + "name": "verbose", + "qname": "sklearn.manifold._t_sne._kl_divergence_bh.verbose", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "False", + "description": "Verbosity level." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_kl_divergence_bh/compute_error", + "name": "compute_error", + "qname": "sklearn.manifold._t_sne._kl_divergence_bh.compute_error", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/_kl_divergence_bh/num_threads", + "name": "num_threads", + "qname": "sklearn.manifold._t_sne._kl_divergence_bh.num_threads", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "Number of threads used to compute the gradient. This is set here to\navoid calling _openmp_effective_n_threads for each gradient step." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "t-SNE objective function: KL divergence of p_ijs and q_ijs.\n\nUses Barnes-Hut tree methods to calculate the gradient that\nruns in O(NlogN) instead of O(N^2).", + "docstring": "t-SNE objective function: KL divergence of p_ijs and q_ijs.\n\nUses Barnes-Hut tree methods to calculate the gradient that\nruns in O(NlogN) instead of O(N^2).\n\nParameters\n----------\nparams : ndarray of shape (n_params,)\n Unraveled embedding.\n\nP : sparse matrix of shape (n_samples, n_sample)\n Sparse approximate joint probability matrix, computed only for the\n k nearest-neighbors and symmetrized. Matrix should be of CSR format.\n\ndegrees_of_freedom : int\n Degrees of freedom of the Student's-t distribution.\n\nn_samples : int\n Number of samples.\n\nn_components : int\n Dimension of the embedded space.\n\nangle : float, default=0.5\n This is the trade-off between speed and accuracy for Barnes-Hut T-SNE.\n 'angle' is the angular size (referred to as theta in [3]) of a distant\n node as measured from a point. If this size is below 'angle' then it is\n used as a summary node of all points contained within it.\n This method is not very sensitive to changes in this parameter\n in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing\n computation time and angle greater 0.8 has quickly increasing error.\n\nskip_num_points : int, default=0\n This does not compute the gradient for points with indices below\n `skip_num_points`. This is useful when computing transforms of new\n data where you'd like to keep the old data fixed.\n\nverbose : int, default=False\n Verbosity level.\n\ncompute_error: bool, default=True\n If False, the kl_divergence is not computed and returns NaN.\n\nnum_threads : int, default=1\n Number of threads used to compute the gradient. This is set here to\n avoid calling _openmp_effective_n_threads for each gradient step.\n\nReturns\n-------\nkl_divergence : float\n Kullback-Leibler divergence of p_ij and q_ij.\n\ngrad : ndarray of shape (n_params,)\n Unraveled gradient of the Kullback-Leibler divergence with respect to\n the embedding.", + "code": "def _kl_divergence_bh(params, P, degrees_of_freedom, n_samples, n_components,\n angle=0.5, skip_num_points=0, verbose=False,\n compute_error=True, num_threads=1):\n \"\"\"t-SNE objective function: KL divergence of p_ijs and q_ijs.\n\n Uses Barnes-Hut tree methods to calculate the gradient that\n runs in O(NlogN) instead of O(N^2).\n\n Parameters\n ----------\n params : ndarray of shape (n_params,)\n Unraveled embedding.\n\n P : sparse matrix of shape (n_samples, n_sample)\n Sparse approximate joint probability matrix, computed only for the\n k nearest-neighbors and symmetrized. Matrix should be of CSR format.\n\n degrees_of_freedom : int\n Degrees of freedom of the Student's-t distribution.\n\n n_samples : int\n Number of samples.\n\n n_components : int\n Dimension of the embedded space.\n\n angle : float, default=0.5\n This is the trade-off between speed and accuracy for Barnes-Hut T-SNE.\n 'angle' is the angular size (referred to as theta in [3]) of a distant\n node as measured from a point. If this size is below 'angle' then it is\n used as a summary node of all points contained within it.\n This method is not very sensitive to changes in this parameter\n in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing\n computation time and angle greater 0.8 has quickly increasing error.\n\n skip_num_points : int, default=0\n This does not compute the gradient for points with indices below\n `skip_num_points`. This is useful when computing transforms of new\n data where you'd like to keep the old data fixed.\n\n verbose : int, default=False\n Verbosity level.\n\n compute_error: bool, default=True\n If False, the kl_divergence is not computed and returns NaN.\n\n num_threads : int, default=1\n Number of threads used to compute the gradient. This is set here to\n avoid calling _openmp_effective_n_threads for each gradient step.\n\n Returns\n -------\n kl_divergence : float\n Kullback-Leibler divergence of p_ij and q_ij.\n\n grad : ndarray of shape (n_params,)\n Unraveled gradient of the Kullback-Leibler divergence with respect to\n the embedding.\n \"\"\"\n params = params.astype(np.float32, copy=False)\n X_embedded = params.reshape(n_samples, n_components)\n\n val_P = P.data.astype(np.float32, copy=False)\n neighbors = P.indices.astype(np.int64, copy=False)\n indptr = P.indptr.astype(np.int64, copy=False)\n\n grad = np.zeros(X_embedded.shape, dtype=np.float32)\n error = _barnes_hut_tsne.gradient(val_P, X_embedded, neighbors, indptr,\n grad, angle, n_components, verbose,\n dof=degrees_of_freedom,\n compute_error=compute_error,\n num_threads=num_threads)\n c = 2.0 * (degrees_of_freedom + 1.0) / degrees_of_freedom\n grad = grad.ravel()\n grad *= c\n\n return error, grad" + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/trustworthiness", + "name": "trustworthiness", + "qname": "sklearn.manifold._t_sne.trustworthiness", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold._t_sne/trustworthiness/X", + "name": "X", + "qname": "sklearn.manifold._t_sne.trustworthiness.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features) or (n_samples, n_samples)", + "default_value": "", + "description": "If the metric is 'precomputed' X must be a square distance\nmatrix. Otherwise it contains a sample per row." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features) or (n_samples, n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/trustworthiness/X_embedded", + "name": "X_embedded", + "qname": "sklearn.manifold._t_sne.trustworthiness.X_embedded", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_components)", + "default_value": "", + "description": "Embedding of the training data in low-dimensional space." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_components)" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/trustworthiness/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.manifold._t_sne.trustworthiness.n_neighbors", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "Number of neighbors k that will be considered." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.manifold._t_sne/trustworthiness/metric", + "name": "metric", + "qname": "sklearn.manifold._t_sne.trustworthiness.metric", + "default_value": "'euclidean'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "'euclidean'", + "description": "Which metric to use for computing pairwise distances between samples\nfrom the original input space. If metric is 'precomputed', X must be a\nmatrix of pairwise distances or squared distances. Otherwise, see the\ndocumentation of argument metric in sklearn.pairwise.pairwise_distances\nfor a list of available metrics.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Expresses to what extent the local structure is retained.\n\nThe trustworthiness is within [0, 1]. It is defined as\n\n.. math::\n\n T(k) = 1 - \\frac{2}{nk (2n - 3k - 1)} \\sum^n_{i=1}\n \\sum_{j \\in \\mathcal{N}_{i}^{k}} \\max(0, (r(i, j) - k))\n\nwhere for each sample i, :math:`\\mathcal{N}_{i}^{k}` are its k nearest\nneighbors in the output space, and every sample j is its :math:`r(i, j)`-th\nnearest neighbor in the input space. In other words, any unexpected nearest\nneighbors in the output space are penalised in proportion to their rank in\nthe input space.\n\n* \"Neighborhood Preservation in Nonlinear Projection Methods: An\n Experimental Study\"\n J. Venna, S. Kaski\n* \"Learning a Parametric Embedding by Preserving Local Structure\"\n L.J.P. van der Maaten", + "docstring": "Expresses to what extent the local structure is retained.\n\nThe trustworthiness is within [0, 1]. It is defined as\n\n.. math::\n\n T(k) = 1 - \\frac{2}{nk (2n - 3k - 1)} \\sum^n_{i=1}\n \\sum_{j \\in \\mathcal{N}_{i}^{k}} \\max(0, (r(i, j) - k))\n\nwhere for each sample i, :math:`\\mathcal{N}_{i}^{k}` are its k nearest\nneighbors in the output space, and every sample j is its :math:`r(i, j)`-th\nnearest neighbor in the input space. In other words, any unexpected nearest\nneighbors in the output space are penalised in proportion to their rank in\nthe input space.\n\n* \"Neighborhood Preservation in Nonlinear Projection Methods: An\n Experimental Study\"\n J. Venna, S. Kaski\n* \"Learning a Parametric Embedding by Preserving Local Structure\"\n L.J.P. van der Maaten\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n If the metric is 'precomputed' X must be a square distance\n matrix. Otherwise it contains a sample per row.\n\nX_embedded : ndarray of shape (n_samples, n_components)\n Embedding of the training data in low-dimensional space.\n\nn_neighbors : int, default=5\n Number of neighbors k that will be considered.\n\nmetric : str or callable, default='euclidean'\n Which metric to use for computing pairwise distances between samples\n from the original input space. If metric is 'precomputed', X must be a\n matrix of pairwise distances or squared distances. Otherwise, see the\n documentation of argument metric in sklearn.pairwise.pairwise_distances\n for a list of available metrics.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ntrustworthiness : float\n Trustworthiness of the low-dimensional embedding.", + "code": "@_deprecate_positional_args\ndef trustworthiness(X, X_embedded, *, n_neighbors=5, metric='euclidean'):\n r\"\"\"Expresses to what extent the local structure is retained.\n\n The trustworthiness is within [0, 1]. It is defined as\n\n .. math::\n\n T(k) = 1 - \\frac{2}{nk (2n - 3k - 1)} \\sum^n_{i=1}\n \\sum_{j \\in \\mathcal{N}_{i}^{k}} \\max(0, (r(i, j) - k))\n\n where for each sample i, :math:`\\mathcal{N}_{i}^{k}` are its k nearest\n neighbors in the output space, and every sample j is its :math:`r(i, j)`-th\n nearest neighbor in the input space. In other words, any unexpected nearest\n neighbors in the output space are penalised in proportion to their rank in\n the input space.\n\n * \"Neighborhood Preservation in Nonlinear Projection Methods: An\n Experimental Study\"\n J. Venna, S. Kaski\n * \"Learning a Parametric Embedding by Preserving Local Structure\"\n L.J.P. van der Maaten\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n If the metric is 'precomputed' X must be a square distance\n matrix. Otherwise it contains a sample per row.\n\n X_embedded : ndarray of shape (n_samples, n_components)\n Embedding of the training data in low-dimensional space.\n\n n_neighbors : int, default=5\n Number of neighbors k that will be considered.\n\n metric : str or callable, default='euclidean'\n Which metric to use for computing pairwise distances between samples\n from the original input space. If metric is 'precomputed', X must be a\n matrix of pairwise distances or squared distances. Otherwise, see the\n documentation of argument metric in sklearn.pairwise.pairwise_distances\n for a list of available metrics.\n\n .. versionadded:: 0.20\n\n Returns\n -------\n trustworthiness : float\n Trustworthiness of the low-dimensional embedding.\n \"\"\"\n dist_X = pairwise_distances(X, metric=metric)\n if metric == 'precomputed':\n dist_X = dist_X.copy()\n # we set the diagonal to np.inf to exclude the points themselves from\n # their own neighborhood\n np.fill_diagonal(dist_X, np.inf)\n ind_X = np.argsort(dist_X, axis=1)\n # `ind_X[i]` is the index of sorted distances between i and other samples\n ind_X_embedded = NearestNeighbors(n_neighbors=n_neighbors).fit(\n X_embedded).kneighbors(return_distance=False)\n\n # We build an inverted index of neighbors in the input space: For sample i,\n # we define `inverted_index[i]` as the inverted index of sorted distances:\n # inverted_index[i][ind_X[i]] = np.arange(1, n_sample + 1)\n n_samples = X.shape[0]\n inverted_index = np.zeros((n_samples, n_samples), dtype=int)\n ordered_indices = np.arange(n_samples + 1)\n inverted_index[ordered_indices[:-1, np.newaxis],\n ind_X] = ordered_indices[1:]\n ranks = inverted_index[ordered_indices[:-1, np.newaxis],\n ind_X_embedded] - n_neighbors\n t = np.sum(ranks[ranks > 0])\n t = 1.0 - t * (2.0 / (n_samples * n_neighbors *\n (2.0 * n_samples - 3.0 * n_neighbors - 1.0)))\n return t" + }, + { + "id": "scikit-learn/sklearn.manifold.setup/configuration", + "name": "configuration", + "qname": "sklearn.manifold.setup.configuration", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.manifold.setup/configuration/parent_package", + "name": "parent_package", + "qname": "sklearn.manifold.setup.configuration.parent_package", + "default_value": "''", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.manifold.setup/configuration/top_path", + "name": "top_path", + "qname": "sklearn.manifold.setup.configuration.top_path", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def configuration(parent_package=\"\", top_path=None):\n from numpy.distutils.misc_util import Configuration\n\n config = Configuration(\"manifold\", parent_package, top_path)\n\n libraries = []\n if os.name == 'posix':\n libraries.append('m')\n\n config.add_extension(\"_utils\",\n sources=[\"_utils.pyx\"],\n include_dirs=[numpy.get_include()],\n libraries=libraries,\n extra_compile_args=[\"-O3\"])\n\n config.add_extension(\"_barnes_hut_tsne\",\n sources=[\"_barnes_hut_tsne.pyx\"],\n include_dirs=[numpy.get_include()],\n libraries=libraries,\n extra_compile_args=['-O3'])\n\n config.add_subpackage('tests')\n\n return config" + }, + { + "id": "scikit-learn/sklearn.metrics._base/_average_binary_score", + "name": "_average_binary_score", + "qname": "sklearn.metrics._base._average_binary_score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._base/_average_binary_score/binary_metric", + "name": "binary_metric", + "qname": "sklearn.metrics._base._average_binary_score.binary_metric", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable, returns shape [n_classes]", + "default_value": "", + "description": "The binary metric function to use." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "callable" + }, + { + "kind": "NamedType", + "name": "returns shape [n_classes]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._base/_average_binary_score/y_true", + "name": "y_true", + "qname": "sklearn.metrics._base._average_binary_score.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array, shape = [n_samples] or [n_samples, n_classes]", + "default_value": "", + "description": "True binary labels in binary label indicators." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array" + }, + { + "kind": "NamedType", + "name": "shape = [n_samples]" + }, + { + "kind": "NamedType", + "name": "[n_samples" + }, + { + "kind": "NamedType", + "name": "n_classes]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._base/_average_binary_score/y_score", + "name": "y_score", + "qname": "sklearn.metrics._base._average_binary_score.y_score", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array, shape = [n_samples] or [n_samples, n_classes]", + "default_value": "", + "description": "Target scores, can either be probability estimates of the positive\nclass, confidence values, or binary decisions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array" + }, + { + "kind": "NamedType", + "name": "shape = [n_samples]" + }, + { + "kind": "NamedType", + "name": "[n_samples" + }, + { + "kind": "NamedType", + "name": "n_classes]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._base/_average_binary_score/average", + "name": "average", + "qname": "sklearn.metrics._base._average_binary_score.average", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "string, [None, 'micro', 'macro' (default), 'samples', 'weighted']", + "default_value": "", + "description": "If ``None``, the scores for each class are returned. Otherwise,\nthis determines the type of averaging performed on the data:\n\n``'micro'``:\n Calculate metrics globally by considering each element of the label\n indicator matrix as a label.\n``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n``'weighted'``:\n Calculate metrics for each label, and find their average, weighted\n by support (the number of true instances for each label).\n``'samples'``:\n Calculate metrics for each instance, and find their average.\n\nWill be ignored when ``y_true`` is binary." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "string" + }, + { + "kind": "NamedType", + "name": "[None" + }, + { + "kind": "NamedType", + "name": "'micro'" + }, + { + "kind": "NamedType", + "name": "'macro' (default)" + }, + { + "kind": "NamedType", + "name": "'samples'" + }, + { + "kind": "NamedType", + "name": "'weighted']" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._base/_average_binary_score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._base._average_binary_score.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Average a binary metric for multilabel classification.", + "docstring": "Average a binary metric for multilabel classification.\n\nParameters\n----------\ny_true : array, shape = [n_samples] or [n_samples, n_classes]\n True binary labels in binary label indicators.\n\ny_score : array, shape = [n_samples] or [n_samples, n_classes]\n Target scores, can either be probability estimates of the positive\n class, confidence values, or binary decisions.\n\naverage : string, [None, 'micro', 'macro' (default), 'samples', 'weighted']\n If ``None``, the scores for each class are returned. Otherwise,\n this determines the type of averaging performed on the data:\n\n ``'micro'``:\n Calculate metrics globally by considering each element of the label\n indicator matrix as a label.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average, weighted\n by support (the number of true instances for each label).\n ``'samples'``:\n Calculate metrics for each instance, and find their average.\n\n Will be ignored when ``y_true`` is binary.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nbinary_metric : callable, returns shape [n_classes]\n The binary metric function to use.\n\nReturns\n-------\nscore : float or array of shape [n_classes]\n If not ``None``, average the score, else return the score for each\n classes.", + "code": "def _average_binary_score(binary_metric, y_true, y_score, average,\n sample_weight=None):\n \"\"\"Average a binary metric for multilabel classification.\n\n Parameters\n ----------\n y_true : array, shape = [n_samples] or [n_samples, n_classes]\n True binary labels in binary label indicators.\n\n y_score : array, shape = [n_samples] or [n_samples, n_classes]\n Target scores, can either be probability estimates of the positive\n class, confidence values, or binary decisions.\n\n average : string, [None, 'micro', 'macro' (default), 'samples', 'weighted']\n If ``None``, the scores for each class are returned. Otherwise,\n this determines the type of averaging performed on the data:\n\n ``'micro'``:\n Calculate metrics globally by considering each element of the label\n indicator matrix as a label.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average, weighted\n by support (the number of true instances for each label).\n ``'samples'``:\n Calculate metrics for each instance, and find their average.\n\n Will be ignored when ``y_true`` is binary.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n binary_metric : callable, returns shape [n_classes]\n The binary metric function to use.\n\n Returns\n -------\n score : float or array of shape [n_classes]\n If not ``None``, average the score, else return the score for each\n classes.\n\n \"\"\"\n average_options = (None, 'micro', 'macro', 'weighted', 'samples')\n if average not in average_options:\n raise ValueError('average has to be one of {0}'\n ''.format(average_options))\n\n y_type = type_of_target(y_true)\n if y_type not in (\"binary\", \"multilabel-indicator\"):\n raise ValueError(\"{0} format is not supported\".format(y_type))\n\n if y_type == \"binary\":\n return binary_metric(y_true, y_score, sample_weight=sample_weight)\n\n check_consistent_length(y_true, y_score, sample_weight)\n y_true = check_array(y_true)\n y_score = check_array(y_score)\n\n not_average_axis = 1\n score_weight = sample_weight\n average_weight = None\n\n if average == \"micro\":\n if score_weight is not None:\n score_weight = np.repeat(score_weight, y_true.shape[1])\n y_true = y_true.ravel()\n y_score = y_score.ravel()\n\n elif average == 'weighted':\n if score_weight is not None:\n average_weight = np.sum(np.multiply(\n y_true, np.reshape(score_weight, (-1, 1))), axis=0)\n else:\n average_weight = np.sum(y_true, axis=0)\n if np.isclose(average_weight.sum(), 0.0):\n return 0\n\n elif average == 'samples':\n # swap average_weight <-> score_weight\n average_weight = score_weight\n score_weight = None\n not_average_axis = 0\n\n if y_true.ndim == 1:\n y_true = y_true.reshape((-1, 1))\n\n if y_score.ndim == 1:\n y_score = y_score.reshape((-1, 1))\n\n n_classes = y_score.shape[not_average_axis]\n score = np.zeros((n_classes,))\n for c in range(n_classes):\n y_true_c = y_true.take([c], axis=not_average_axis).ravel()\n y_score_c = y_score.take([c], axis=not_average_axis).ravel()\n score[c] = binary_metric(y_true_c, y_score_c,\n sample_weight=score_weight)\n\n # Average the results\n if average is not None:\n if average_weight is not None:\n # Scores with 0 weights are forced to be 0, preventing the average\n # score from being affected by 0-weighted NaN elements.\n average_weight = np.asarray(average_weight)\n score[average_weight == 0] = 0\n return np.average(score, weights=average_weight)\n else:\n return score" + }, + { + "id": "scikit-learn/sklearn.metrics._base/_average_multiclass_ovo_score", + "name": "_average_multiclass_ovo_score", + "qname": "sklearn.metrics._base._average_multiclass_ovo_score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._base/_average_multiclass_ovo_score/binary_metric", + "name": "binary_metric", + "qname": "sklearn.metrics._base._average_multiclass_ovo_score.binary_metric", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "", + "description": "The binary metric function to use that accepts the following as input:\n y_true_target : array, shape = [n_samples_target]\n Some sub-array of y_true for a pair of classes designated\n positive and negative in the one-vs-one scheme.\n y_score_target : array, shape = [n_samples_target]\n Scores corresponding to the probability estimates\n of a sample belonging to the designated positive class label" + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.metrics._base/_average_multiclass_ovo_score/y_true", + "name": "y_true", + "qname": "sklearn.metrics._base._average_multiclass_ovo_score.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "True multiclass labels." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._base/_average_multiclass_ovo_score/y_score", + "name": "y_score", + "qname": "sklearn.metrics._base._average_multiclass_ovo_score.y_score", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_classes)", + "default_value": "", + "description": "Target scores corresponding to probability estimates of a sample\nbelonging to a particular class." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_classes)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._base/_average_multiclass_ovo_score/average", + "name": "average", + "qname": "sklearn.metrics._base._average_multiclass_ovo_score.average", + "default_value": "'macro'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'macro', 'weighted'}", + "default_value": "'macro'", + "description": "Determines the type of averaging performed on the pairwise binary\nmetric scores:\n``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account. Classes\n are assumed to be uniformly distributed.\n``'weighted'``:\n Calculate metrics for each label, taking into account the\n prevalence of the classes." + }, + "type": { + "kind": "EnumType", + "values": ["macro", "weighted"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Average one-versus-one scores for multiclass classification.\n\nUses the binary metric for one-vs-one multiclass classification,\nwhere the score is computed according to the Hand & Till (2001) algorithm.", + "docstring": "Average one-versus-one scores for multiclass classification.\n\nUses the binary metric for one-vs-one multiclass classification,\nwhere the score is computed according to the Hand & Till (2001) algorithm.\n\nParameters\n----------\nbinary_metric : callable\n The binary metric function to use that accepts the following as input:\n y_true_target : array, shape = [n_samples_target]\n Some sub-array of y_true for a pair of classes designated\n positive and negative in the one-vs-one scheme.\n y_score_target : array, shape = [n_samples_target]\n Scores corresponding to the probability estimates\n of a sample belonging to the designated positive class label\n\ny_true : array-like of shape (n_samples,)\n True multiclass labels.\n\ny_score : array-like of shape (n_samples, n_classes)\n Target scores corresponding to probability estimates of a sample\n belonging to a particular class.\n\naverage : {'macro', 'weighted'}, default='macro'\n Determines the type of averaging performed on the pairwise binary\n metric scores:\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account. Classes\n are assumed to be uniformly distributed.\n ``'weighted'``:\n Calculate metrics for each label, taking into account the\n prevalence of the classes.\n\nReturns\n-------\nscore : float\n Average of the pairwise binary metric scores.", + "code": "def _average_multiclass_ovo_score(binary_metric, y_true, y_score,\n average='macro'):\n \"\"\"Average one-versus-one scores for multiclass classification.\n\n Uses the binary metric for one-vs-one multiclass classification,\n where the score is computed according to the Hand & Till (2001) algorithm.\n\n Parameters\n ----------\n binary_metric : callable\n The binary metric function to use that accepts the following as input:\n y_true_target : array, shape = [n_samples_target]\n Some sub-array of y_true for a pair of classes designated\n positive and negative in the one-vs-one scheme.\n y_score_target : array, shape = [n_samples_target]\n Scores corresponding to the probability estimates\n of a sample belonging to the designated positive class label\n\n y_true : array-like of shape (n_samples,)\n True multiclass labels.\n\n y_score : array-like of shape (n_samples, n_classes)\n Target scores corresponding to probability estimates of a sample\n belonging to a particular class.\n\n average : {'macro', 'weighted'}, default='macro'\n Determines the type of averaging performed on the pairwise binary\n metric scores:\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account. Classes\n are assumed to be uniformly distributed.\n ``'weighted'``:\n Calculate metrics for each label, taking into account the\n prevalence of the classes.\n\n Returns\n -------\n score : float\n Average of the pairwise binary metric scores.\n \"\"\"\n check_consistent_length(y_true, y_score)\n\n y_true_unique = np.unique(y_true)\n n_classes = y_true_unique.shape[0]\n n_pairs = n_classes * (n_classes - 1) // 2\n pair_scores = np.empty(n_pairs)\n\n is_weighted = average == \"weighted\"\n prevalence = np.empty(n_pairs) if is_weighted else None\n\n # Compute scores treating a as positive class and b as negative class,\n # then b as positive class and a as negative class\n for ix, (a, b) in enumerate(combinations(y_true_unique, 2)):\n a_mask = y_true == a\n b_mask = y_true == b\n ab_mask = np.logical_or(a_mask, b_mask)\n\n if is_weighted:\n prevalence[ix] = np.average(ab_mask)\n\n a_true = a_mask[ab_mask]\n b_true = b_mask[ab_mask]\n\n a_true_score = binary_metric(a_true, y_score[ab_mask, a])\n b_true_score = binary_metric(b_true, y_score[ab_mask, b])\n pair_scores[ix] = (a_true_score + b_true_score) / 2\n\n return np.average(pair_scores, weights=prevalence)" + }, + { + "id": "scikit-learn/sklearn.metrics._base/_check_pos_label_consistency", + "name": "_check_pos_label_consistency", + "qname": "sklearn.metrics._base._check_pos_label_consistency", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._base/_check_pos_label_consistency/pos_label", + "name": "pos_label", + "qname": "sklearn.metrics._base._check_pos_label_consistency.pos_label", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, str or None", + "default_value": "", + "description": "The positive label." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._base/_check_pos_label_consistency/y_true", + "name": "y_true", + "qname": "sklearn.metrics._base._check_pos_label_consistency.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "The target vector." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check if `pos_label` need to be specified or not.\n\nIn binary classification, we fix `pos_label=1` if the labels are in the set\n{-1, 1} or {0, 1}. Otherwise, we raise an error asking to specify the\n`pos_label` parameters.", + "docstring": "Check if `pos_label` need to be specified or not.\n\nIn binary classification, we fix `pos_label=1` if the labels are in the set\n{-1, 1} or {0, 1}. Otherwise, we raise an error asking to specify the\n`pos_label` parameters.\n\nParameters\n----------\npos_label : int, str or None\n The positive label.\ny_true : ndarray of shape (n_samples,)\n The target vector.\n\nReturns\n-------\npos_label : int\n If `pos_label` can be inferred, it will be returned.\n\nRaises\n------\nValueError\n In the case that `y_true` does not have label in {-1, 1} or {0, 1},\n it will raise a `ValueError`.", + "code": "def _check_pos_label_consistency(pos_label, y_true):\n \"\"\"Check if `pos_label` need to be specified or not.\n\n In binary classification, we fix `pos_label=1` if the labels are in the set\n {-1, 1} or {0, 1}. Otherwise, we raise an error asking to specify the\n `pos_label` parameters.\n\n Parameters\n ----------\n pos_label : int, str or None\n The positive label.\n y_true : ndarray of shape (n_samples,)\n The target vector.\n\n Returns\n -------\n pos_label : int\n If `pos_label` can be inferred, it will be returned.\n\n Raises\n ------\n ValueError\n In the case that `y_true` does not have label in {-1, 1} or {0, 1},\n it will raise a `ValueError`.\n \"\"\"\n # ensure binary classification if pos_label is not specified\n # classes.dtype.kind in ('O', 'U', 'S') is required to avoid\n # triggering a FutureWarning by calling np.array_equal(a, b)\n # when elements in the two arrays are not comparable.\n classes = np.unique(y_true)\n if (pos_label is None and (\n classes.dtype.kind in 'OUS' or\n not (np.array_equal(classes, [0, 1]) or\n np.array_equal(classes, [-1, 1]) or\n np.array_equal(classes, [0]) or\n np.array_equal(classes, [-1]) or\n np.array_equal(classes, [1])))):\n classes_repr = \", \".join(repr(c) for c in classes)\n raise ValueError(\n f\"y_true takes value in {{{classes_repr}}} and pos_label is not \"\n f\"specified: either make y_true take value in {{0, 1}} or \"\n f\"{{-1, 1}} or pass pos_label explicitly.\"\n )\n elif pos_label is None:\n pos_label = 1.0\n\n return pos_label" + }, + { + "id": "scikit-learn/sklearn.metrics._classification/_check_set_wise_labels", + "name": "_check_set_wise_labels", + "qname": "sklearn.metrics._classification._check_set_wise_labels", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._classification/_check_set_wise_labels/y_true", + "name": "y_true", + "qname": "sklearn.metrics._classification._check_set_wise_labels.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._classification/_check_set_wise_labels/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._classification._check_set_wise_labels.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._classification/_check_set_wise_labels/average", + "name": "average", + "qname": "sklearn.metrics._classification._check_set_wise_labels.average", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._classification/_check_set_wise_labels/labels", + "name": "labels", + "qname": "sklearn.metrics._classification._check_set_wise_labels.labels", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._classification/_check_set_wise_labels/pos_label", + "name": "pos_label", + "qname": "sklearn.metrics._classification._check_set_wise_labels.pos_label", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Validation associated with set-wise metrics.\n\nReturns identified labels.", + "docstring": "Validation associated with set-wise metrics.\n\nReturns identified labels.", + "code": "def _check_set_wise_labels(y_true, y_pred, average, labels, pos_label):\n \"\"\"Validation associated with set-wise metrics.\n\n Returns identified labels.\n \"\"\"\n average_options = (None, 'micro', 'macro', 'weighted', 'samples')\n if average not in average_options and average != 'binary':\n raise ValueError('average has to be one of ' +\n str(average_options))\n\n y_type, y_true, y_pred = _check_targets(y_true, y_pred)\n # Convert to Python primitive type to avoid NumPy type / Python str\n # comparison. See https://github.com/numpy/numpy/issues/6784\n present_labels = unique_labels(y_true, y_pred).tolist()\n if average == 'binary':\n if y_type == 'binary':\n if pos_label not in present_labels:\n if len(present_labels) >= 2:\n raise ValueError(\n f\"pos_label={pos_label} is not a valid label. It \"\n f\"should be one of {present_labels}\"\n )\n labels = [pos_label]\n else:\n average_options = list(average_options)\n if y_type == 'multiclass':\n average_options.remove('samples')\n raise ValueError(\"Target is %s but average='binary'. Please \"\n \"choose another average setting, one of %r.\"\n % (y_type, average_options))\n elif pos_label not in (None, 1):\n warnings.warn(\"Note that pos_label (set to %r) is ignored when \"\n \"average != 'binary' (got %r). You may use \"\n \"labels=[pos_label] to specify a single positive class.\"\n % (pos_label, average), UserWarning)\n return labels" + }, + { + "id": "scikit-learn/sklearn.metrics._classification/_check_targets", + "name": "_check_targets", + "qname": "sklearn.metrics._classification._check_targets", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._classification/_check_targets/y_true", + "name": "y_true", + "qname": "sklearn.metrics._classification._check_targets.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/_check_targets/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._classification._check_targets.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check that y_true and y_pred belong to the same classification task.\n\nThis converts multiclass or binary types to a common shape, and raises a\nValueError for a mix of multilabel and multiclass targets, a mix of\nmultilabel formats, for the presence of continuous-valued or multioutput\ntargets, or for targets of different lengths.\n\nColumn vectors are squeezed to 1d, while multilabel formats are returned\nas CSR sparse label indicators.", + "docstring": "Check that y_true and y_pred belong to the same classification task.\n\nThis converts multiclass or binary types to a common shape, and raises a\nValueError for a mix of multilabel and multiclass targets, a mix of\nmultilabel formats, for the presence of continuous-valued or multioutput\ntargets, or for targets of different lengths.\n\nColumn vectors are squeezed to 1d, while multilabel formats are returned\nas CSR sparse label indicators.\n\nParameters\n----------\ny_true : array-like\n\ny_pred : array-like\n\nReturns\n-------\ntype_true : one of {'multilabel-indicator', 'multiclass', 'binary'}\n The type of the true target data, as output by\n ``utils.multiclass.type_of_target``.\n\ny_true : array or indicator matrix\n\ny_pred : array or indicator matrix", + "code": "def _check_targets(y_true, y_pred):\n \"\"\"Check that y_true and y_pred belong to the same classification task.\n\n This converts multiclass or binary types to a common shape, and raises a\n ValueError for a mix of multilabel and multiclass targets, a mix of\n multilabel formats, for the presence of continuous-valued or multioutput\n targets, or for targets of different lengths.\n\n Column vectors are squeezed to 1d, while multilabel formats are returned\n as CSR sparse label indicators.\n\n Parameters\n ----------\n y_true : array-like\n\n y_pred : array-like\n\n Returns\n -------\n type_true : one of {'multilabel-indicator', 'multiclass', 'binary'}\n The type of the true target data, as output by\n ``utils.multiclass.type_of_target``.\n\n y_true : array or indicator matrix\n\n y_pred : array or indicator matrix\n \"\"\"\n check_consistent_length(y_true, y_pred)\n type_true = type_of_target(y_true)\n type_pred = type_of_target(y_pred)\n\n y_type = {type_true, type_pred}\n if y_type == {\"binary\", \"multiclass\"}:\n y_type = {\"multiclass\"}\n\n if len(y_type) > 1:\n raise ValueError(\"Classification metrics can't handle a mix of {0} \"\n \"and {1} targets\".format(type_true, type_pred))\n\n # We can't have more than one value on y_type => The set is no more needed\n y_type = y_type.pop()\n\n # No metrics support \"multiclass-multioutput\" format\n if (y_type not in [\"binary\", \"multiclass\", \"multilabel-indicator\"]):\n raise ValueError(\"{0} is not supported\".format(y_type))\n\n if y_type in [\"binary\", \"multiclass\"]:\n y_true = column_or_1d(y_true)\n y_pred = column_or_1d(y_pred)\n if y_type == \"binary\":\n try:\n unique_values = np.union1d(y_true, y_pred)\n except TypeError as e:\n # We expect y_true and y_pred to be of the same data type.\n # If `y_true` was provided to the classifier as strings,\n # `y_pred` given by the classifier will also be encoded with\n # strings. So we raise a meaningful error\n raise TypeError(\n f\"Labels in y_true and y_pred should be of the same type. \"\n f\"Got y_true={np.unique(y_true)} and \"\n f\"y_pred={np.unique(y_pred)}. Make sure that the \"\n f\"predictions provided by the classifier coincides with \"\n f\"the true labels.\"\n ) from e\n if len(unique_values) > 2:\n y_type = \"multiclass\"\n\n if y_type.startswith('multilabel'):\n y_true = csr_matrix(y_true)\n y_pred = csr_matrix(y_pred)\n y_type = 'multilabel-indicator'\n\n return y_type, y_true, y_pred" + }, + { + "id": "scikit-learn/sklearn.metrics._classification/_check_zero_division", + "name": "_check_zero_division", + "qname": "sklearn.metrics._classification._check_zero_division", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._classification/_check_zero_division/zero_division", + "name": "zero_division", + "qname": "sklearn.metrics._classification._check_zero_division.zero_division", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _check_zero_division(zero_division):\n if isinstance(zero_division, str) and zero_division == \"warn\":\n return\n elif isinstance(zero_division, (int, float)) and zero_division in [0, 1]:\n return\n raise ValueError('Got zero_division={0}.'\n ' Must be one of [\"warn\", 0, 1]'.format(zero_division))" + }, + { + "id": "scikit-learn/sklearn.metrics._classification/_prf_divide", + "name": "_prf_divide", + "qname": "sklearn.metrics._classification._prf_divide", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._classification/_prf_divide/numerator", + "name": "numerator", + "qname": "sklearn.metrics._classification._prf_divide.numerator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._classification/_prf_divide/denominator", + "name": "denominator", + "qname": "sklearn.metrics._classification._prf_divide.denominator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._classification/_prf_divide/metric", + "name": "metric", + "qname": "sklearn.metrics._classification._prf_divide.metric", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._classification/_prf_divide/modifier", + "name": "modifier", + "qname": "sklearn.metrics._classification._prf_divide.modifier", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._classification/_prf_divide/average", + "name": "average", + "qname": "sklearn.metrics._classification._prf_divide.average", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._classification/_prf_divide/warn_for", + "name": "warn_for", + "qname": "sklearn.metrics._classification._prf_divide.warn_for", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._classification/_prf_divide/zero_division", + "name": "zero_division", + "qname": "sklearn.metrics._classification._prf_divide.zero_division", + "default_value": "'warn'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Performs division and handles divide-by-zero.\n\nOn zero-division, sets the corresponding result elements equal to\n0 or 1 (according to ``zero_division``). Plus, if\n``zero_division != \"warn\"`` raises a warning.\n\nThe metric, modifier and average arguments are used only for determining\nan appropriate warning.", + "docstring": "Performs division and handles divide-by-zero.\n\nOn zero-division, sets the corresponding result elements equal to\n0 or 1 (according to ``zero_division``). Plus, if\n``zero_division != \"warn\"`` raises a warning.\n\nThe metric, modifier and average arguments are used only for determining\nan appropriate warning.", + "code": "def _prf_divide(numerator, denominator, metric,\n modifier, average, warn_for, zero_division=\"warn\"):\n \"\"\"Performs division and handles divide-by-zero.\n\n On zero-division, sets the corresponding result elements equal to\n 0 or 1 (according to ``zero_division``). Plus, if\n ``zero_division != \"warn\"`` raises a warning.\n\n The metric, modifier and average arguments are used only for determining\n an appropriate warning.\n \"\"\"\n mask = denominator == 0.0\n denominator = denominator.copy()\n denominator[mask] = 1 # avoid infs/nans\n result = numerator / denominator\n\n if not np.any(mask):\n return result\n\n # if ``zero_division=1``, set those with denominator == 0 equal to 1\n result[mask] = 0.0 if zero_division in [\"warn\", 0] else 1.0\n\n # the user will be removing warnings if zero_division is set to something\n # different than its default value. If we are computing only f-score\n # the warning will be raised only if precision and recall are ill-defined\n if zero_division != \"warn\" or metric not in warn_for:\n return result\n\n # build appropriate warning\n # E.g. \"Precision and F-score are ill-defined and being set to 0.0 in\n # labels with no predicted samples. Use ``zero_division`` parameter to\n # control this behavior.\"\n\n if metric in warn_for and 'f-score' in warn_for:\n msg_start = '{0} and F-score are'.format(metric.title())\n elif metric in warn_for:\n msg_start = '{0} is'.format(metric.title())\n elif 'f-score' in warn_for:\n msg_start = 'F-score is'\n else:\n return result\n\n _warn_prf(average, modifier, msg_start, len(result))\n\n return result" + }, + { + "id": "scikit-learn/sklearn.metrics._classification/_warn_prf", + "name": "_warn_prf", + "qname": "sklearn.metrics._classification._warn_prf", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._classification/_warn_prf/average", + "name": "average", + "qname": "sklearn.metrics._classification._warn_prf.average", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._classification/_warn_prf/modifier", + "name": "modifier", + "qname": "sklearn.metrics._classification._warn_prf.modifier", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._classification/_warn_prf/msg_start", + "name": "msg_start", + "qname": "sklearn.metrics._classification._warn_prf.msg_start", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._classification/_warn_prf/result_size", + "name": "result_size", + "qname": "sklearn.metrics._classification._warn_prf.result_size", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _warn_prf(average, modifier, msg_start, result_size):\n axis0, axis1 = 'sample', 'label'\n if average == 'samples':\n axis0, axis1 = axis1, axis0\n msg = ('{0} ill-defined and being set to 0.0 {{0}} '\n 'no {1} {2}s. Use `zero_division` parameter to control'\n ' this behavior.'.format(msg_start, modifier, axis0))\n if result_size == 1:\n msg = msg.format('due to')\n else:\n msg = msg.format('in {0}s with'.format(axis1))\n warnings.warn(msg, UndefinedMetricWarning, stacklevel=2)" + }, + { + "id": "scikit-learn/sklearn.metrics._classification/_weighted_sum", + "name": "_weighted_sum", + "qname": "sklearn.metrics._classification._weighted_sum", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._classification/_weighted_sum/sample_score", + "name": "sample_score", + "qname": "sklearn.metrics._classification._weighted_sum.sample_score", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._classification/_weighted_sum/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._classification._weighted_sum.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._classification/_weighted_sum/normalize", + "name": "normalize", + "qname": "sklearn.metrics._classification._weighted_sum.normalize", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _weighted_sum(sample_score, sample_weight, normalize=False):\n if normalize:\n return np.average(sample_score, weights=sample_weight)\n elif sample_weight is not None:\n return np.dot(sample_score, sample_weight)\n else:\n return sample_score.sum()" + }, + { + "id": "scikit-learn/sklearn.metrics._classification/accuracy_score", + "name": "accuracy_score", + "qname": "sklearn.metrics._classification.accuracy_score", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._classification/accuracy_score/y_true", + "name": "y_true", + "qname": "sklearn.metrics._classification.accuracy_score.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "1d array-like, or label indicator array / sparse matrix", + "default_value": "", + "description": "Ground truth (correct) labels." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "1d array-like" + }, + { + "kind": "NamedType", + "name": "label indicator array / sparse matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/accuracy_score/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._classification.accuracy_score.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "1d array-like, or label indicator array / sparse matrix", + "default_value": "", + "description": "Predicted labels, as returned by a classifier." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "1d array-like" + }, + { + "kind": "NamedType", + "name": "label indicator array / sparse matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/accuracy_score/normalize", + "name": "normalize", + "qname": "sklearn.metrics._classification.accuracy_score.normalize", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``False``, return the number of correctly classified samples.\nOtherwise, return the fraction of correctly classified samples." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/accuracy_score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._classification.accuracy_score.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Accuracy classification score.\n\nIn multilabel classification, this function computes subset accuracy:\nthe set of labels predicted for a sample must *exactly* match the\ncorresponding set of labels in y_true.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Accuracy classification score.\n\nIn multilabel classification, this function computes subset accuracy:\nthe set of labels predicted for a sample must *exactly* match the\ncorresponding set of labels in y_true.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) labels.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Predicted labels, as returned by a classifier.\n\nnormalize : bool, default=True\n If ``False``, return the number of correctly classified samples.\n Otherwise, return the fraction of correctly classified samples.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n If ``normalize == True``, return the fraction of correctly\n classified samples (float), else returns the number of correctly\n classified samples (int).\n\n The best performance is 1 with ``normalize == True`` and the number\n of samples with ``normalize == False``.\n\nSee Also\n--------\njaccard_score, hamming_loss, zero_one_loss\n\nNotes\n-----\nIn binary and multiclass classification, this function is equal\nto the ``jaccard_score`` function.\n\nExamples\n--------\n>>> from sklearn.metrics import accuracy_score\n>>> y_pred = [0, 2, 1, 3]\n>>> y_true = [0, 1, 2, 3]\n>>> accuracy_score(y_true, y_pred)\n0.5\n>>> accuracy_score(y_true, y_pred, normalize=False)\n2\n\nIn the multilabel case with binary label indicators:\n\n>>> import numpy as np\n>>> accuracy_score(np.array([[0, 1], [1, 1]]), np.ones((2, 2)))\n0.5", + "code": "@_deprecate_positional_args\ndef accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None):\n \"\"\"Accuracy classification score.\n\n In multilabel classification, this function computes subset accuracy:\n the set of labels predicted for a sample must *exactly* match the\n corresponding set of labels in y_true.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) labels.\n\n y_pred : 1d array-like, or label indicator array / sparse matrix\n Predicted labels, as returned by a classifier.\n\n normalize : bool, default=True\n If ``False``, return the number of correctly classified samples.\n Otherwise, return the fraction of correctly classified samples.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n score : float\n If ``normalize == True``, return the fraction of correctly\n classified samples (float), else returns the number of correctly\n classified samples (int).\n\n The best performance is 1 with ``normalize == True`` and the number\n of samples with ``normalize == False``.\n\n See Also\n --------\n jaccard_score, hamming_loss, zero_one_loss\n\n Notes\n -----\n In binary and multiclass classification, this function is equal\n to the ``jaccard_score`` function.\n\n Examples\n --------\n >>> from sklearn.metrics import accuracy_score\n >>> y_pred = [0, 2, 1, 3]\n >>> y_true = [0, 1, 2, 3]\n >>> accuracy_score(y_true, y_pred)\n 0.5\n >>> accuracy_score(y_true, y_pred, normalize=False)\n 2\n\n In the multilabel case with binary label indicators:\n\n >>> import numpy as np\n >>> accuracy_score(np.array([[0, 1], [1, 1]]), np.ones((2, 2)))\n 0.5\n \"\"\"\n\n # Compute accuracy for each possible representation\n y_type, y_true, y_pred = _check_targets(y_true, y_pred)\n check_consistent_length(y_true, y_pred, sample_weight)\n if y_type.startswith('multilabel'):\n differing_labels = count_nonzero(y_true - y_pred, axis=1)\n score = differing_labels == 0\n else:\n score = y_true == y_pred\n\n return _weighted_sum(score, sample_weight, normalize)" + }, + { + "id": "scikit-learn/sklearn.metrics._classification/balanced_accuracy_score", + "name": "balanced_accuracy_score", + "qname": "sklearn.metrics._classification.balanced_accuracy_score", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._classification/balanced_accuracy_score/y_true", + "name": "y_true", + "qname": "sklearn.metrics._classification.balanced_accuracy_score.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "1d array-like", + "default_value": "", + "description": "Ground truth (correct) target values." + }, + "type": { + "kind": "NamedType", + "name": "1d array-like" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/balanced_accuracy_score/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._classification.balanced_accuracy_score.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "1d array-like", + "default_value": "", + "description": "Estimated targets as returned by a classifier." + }, + "type": { + "kind": "NamedType", + "name": "1d array-like" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/balanced_accuracy_score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._classification.balanced_accuracy_score.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/balanced_accuracy_score/adjusted", + "name": "adjusted", + "qname": "sklearn.metrics._classification.balanced_accuracy_score.adjusted", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When true, the result is adjusted for chance, so that random\nperformance would score 0, while keeping perfect performance at a score\nof 1." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the balanced accuracy.\n\nThe balanced accuracy in binary and multiclass classification problems to\ndeal with imbalanced datasets. It is defined as the average of recall\nobtained on each class.\n\nThe best value is 1 and the worst value is 0 when ``adjusted=False``.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20", + "docstring": "Compute the balanced accuracy.\n\nThe balanced accuracy in binary and multiclass classification problems to\ndeal with imbalanced datasets. It is defined as the average of recall\nobtained on each class.\n\nThe best value is 1 and the worst value is 0 when ``adjusted=False``.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\ny_true : 1d array-like\n Ground truth (correct) target values.\n\ny_pred : 1d array-like\n Estimated targets as returned by a classifier.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nadjusted : bool, default=False\n When true, the result is adjusted for chance, so that random\n performance would score 0, while keeping perfect performance at a score\n of 1.\n\nReturns\n-------\nbalanced_accuracy : float\n\nSee Also\n--------\nrecall_score, roc_auc_score\n\nNotes\n-----\nSome literature promotes alternative definitions of balanced accuracy. Our\ndefinition is equivalent to :func:`accuracy_score` with class-balanced\nsample weights, and shares desirable properties with the binary case.\nSee the :ref:`User Guide `.\n\nReferences\n----------\n.. [1] Brodersen, K.H.; Ong, C.S.; Stephan, K.E.; Buhmann, J.M. (2010).\n The balanced accuracy and its posterior distribution.\n Proceedings of the 20th International Conference on Pattern\n Recognition, 3121-24.\n.. [2] John. D. Kelleher, Brian Mac Namee, Aoife D'Arcy, (2015).\n `Fundamentals of Machine Learning for Predictive Data Analytics:\n Algorithms, Worked Examples, and Case Studies\n `_.\n\nExamples\n--------\n>>> from sklearn.metrics import balanced_accuracy_score\n>>> y_true = [0, 1, 0, 0, 1, 0]\n>>> y_pred = [0, 1, 0, 0, 0, 1]\n>>> balanced_accuracy_score(y_true, y_pred)\n0.625", + "code": "@_deprecate_positional_args\ndef balanced_accuracy_score(y_true, y_pred, *, sample_weight=None,\n adjusted=False):\n \"\"\"Compute the balanced accuracy.\n\n The balanced accuracy in binary and multiclass classification problems to\n deal with imbalanced datasets. It is defined as the average of recall\n obtained on each class.\n\n The best value is 1 and the worst value is 0 when ``adjusted=False``.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.20\n\n Parameters\n ----------\n y_true : 1d array-like\n Ground truth (correct) target values.\n\n y_pred : 1d array-like\n Estimated targets as returned by a classifier.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n adjusted : bool, default=False\n When true, the result is adjusted for chance, so that random\n performance would score 0, while keeping perfect performance at a score\n of 1.\n\n Returns\n -------\n balanced_accuracy : float\n\n See Also\n --------\n recall_score, roc_auc_score\n\n Notes\n -----\n Some literature promotes alternative definitions of balanced accuracy. Our\n definition is equivalent to :func:`accuracy_score` with class-balanced\n sample weights, and shares desirable properties with the binary case.\n See the :ref:`User Guide `.\n\n References\n ----------\n .. [1] Brodersen, K.H.; Ong, C.S.; Stephan, K.E.; Buhmann, J.M. (2010).\n The balanced accuracy and its posterior distribution.\n Proceedings of the 20th International Conference on Pattern\n Recognition, 3121-24.\n .. [2] John. D. Kelleher, Brian Mac Namee, Aoife D'Arcy, (2015).\n `Fundamentals of Machine Learning for Predictive Data Analytics:\n Algorithms, Worked Examples, and Case Studies\n `_.\n\n Examples\n --------\n >>> from sklearn.metrics import balanced_accuracy_score\n >>> y_true = [0, 1, 0, 0, 1, 0]\n >>> y_pred = [0, 1, 0, 0, 0, 1]\n >>> balanced_accuracy_score(y_true, y_pred)\n 0.625\n\n \"\"\"\n C = confusion_matrix(y_true, y_pred, sample_weight=sample_weight)\n with np.errstate(divide='ignore', invalid='ignore'):\n per_class = np.diag(C) / C.sum(axis=1)\n if np.any(np.isnan(per_class)):\n warnings.warn('y_pred contains classes not in y_true')\n per_class = per_class[~np.isnan(per_class)]\n score = np.mean(per_class)\n if adjusted:\n n_classes = len(per_class)\n chance = 1 / n_classes\n score -= chance\n score /= 1 - chance\n return score" + }, + { + "id": "scikit-learn/sklearn.metrics._classification/brier_score_loss", + "name": "brier_score_loss", + "qname": "sklearn.metrics._classification.brier_score_loss", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._classification/brier_score_loss/y_true", + "name": "y_true", + "qname": "sklearn.metrics._classification.brier_score_loss.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples,)", + "default_value": "", + "description": "True targets." + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/brier_score_loss/y_prob", + "name": "y_prob", + "qname": "sklearn.metrics._classification.brier_score_loss.y_prob", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples,)", + "default_value": "", + "description": "Probabilities of the positive class." + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/brier_score_loss/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._classification.brier_score_loss.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/brier_score_loss/pos_label", + "name": "pos_label", + "qname": "sklearn.metrics._classification.brier_score_loss.pos_label", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or str", + "default_value": "None", + "description": "Label of the positive class. `pos_label` will be infered in the\nfollowing manner:\n\n* if `y_true` in {-1, 1} or {0, 1}, `pos_label` defaults to 1;\n* else if `y_true` contains string, an error will be raised and\n `pos_label` should be explicitely specified;\n* otherwise, `pos_label` defaults to the greater label,\n i.e. `np.unique(y_true)[-1]`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "str" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the Brier score loss.\n\nThe smaller the Brier score loss, the better, hence the naming with \"loss\".\nThe Brier score measures the mean squared difference between the predicted\nprobability and the actual outcome. The Brier score always\ntakes on a value between zero and one, since this is the largest\npossible difference between a predicted probability (which must be\nbetween zero and one) and the actual outcome (which can take on values\nof only 0 and 1). It can be decomposed is the sum of refinement loss and\ncalibration loss.\n\nThe Brier score is appropriate for binary and categorical outcomes that\ncan be structured as true or false, but is inappropriate for ordinal\nvariables which can take on three or more values (this is because the\nBrier score assumes that all possible outcomes are equivalently\n\"distant\" from one another). Which label is considered to be the positive\nlabel is controlled via the parameter `pos_label`, which defaults to\nthe greater label unless `y_true` is all 0 or all -1, in which case\n`pos_label` defaults to 1.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute the Brier score loss.\n\nThe smaller the Brier score loss, the better, hence the naming with \"loss\".\nThe Brier score measures the mean squared difference between the predicted\nprobability and the actual outcome. The Brier score always\ntakes on a value between zero and one, since this is the largest\npossible difference between a predicted probability (which must be\nbetween zero and one) and the actual outcome (which can take on values\nof only 0 and 1). It can be decomposed is the sum of refinement loss and\ncalibration loss.\n\nThe Brier score is appropriate for binary and categorical outcomes that\ncan be structured as true or false, but is inappropriate for ordinal\nvariables which can take on three or more values (this is because the\nBrier score assumes that all possible outcomes are equivalently\n\"distant\" from one another). Which label is considered to be the positive\nlabel is controlled via the parameter `pos_label`, which defaults to\nthe greater label unless `y_true` is all 0 or all -1, in which case\n`pos_label` defaults to 1.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array of shape (n_samples,)\n True targets.\n\ny_prob : array of shape (n_samples,)\n Probabilities of the positive class.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\npos_label : int or str, default=None\n Label of the positive class. `pos_label` will be infered in the\n following manner:\n\n * if `y_true` in {-1, 1} or {0, 1}, `pos_label` defaults to 1;\n * else if `y_true` contains string, an error will be raised and\n `pos_label` should be explicitely specified;\n * otherwise, `pos_label` defaults to the greater label,\n i.e. `np.unique(y_true)[-1]`.\n\nReturns\n-------\nscore : float\n Brier score loss.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import brier_score_loss\n>>> y_true = np.array([0, 1, 1, 0])\n>>> y_true_categorical = np.array([\"spam\", \"ham\", \"ham\", \"spam\"])\n>>> y_prob = np.array([0.1, 0.9, 0.8, 0.3])\n>>> brier_score_loss(y_true, y_prob)\n0.037...\n>>> brier_score_loss(y_true, 1-y_prob, pos_label=0)\n0.037...\n>>> brier_score_loss(y_true_categorical, y_prob, pos_label=\"ham\")\n0.037...\n>>> brier_score_loss(y_true, np.array(y_prob) > 0.5)\n0.0\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Brier score\n `_.", + "code": "@_deprecate_positional_args\ndef brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None):\n \"\"\"Compute the Brier score loss.\n\n The smaller the Brier score loss, the better, hence the naming with \"loss\".\n The Brier score measures the mean squared difference between the predicted\n probability and the actual outcome. The Brier score always\n takes on a value between zero and one, since this is the largest\n possible difference between a predicted probability (which must be\n between zero and one) and the actual outcome (which can take on values\n of only 0 and 1). It can be decomposed is the sum of refinement loss and\n calibration loss.\n\n The Brier score is appropriate for binary and categorical outcomes that\n can be structured as true or false, but is inappropriate for ordinal\n variables which can take on three or more values (this is because the\n Brier score assumes that all possible outcomes are equivalently\n \"distant\" from one another). Which label is considered to be the positive\n label is controlled via the parameter `pos_label`, which defaults to\n the greater label unless `y_true` is all 0 or all -1, in which case\n `pos_label` defaults to 1.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : array of shape (n_samples,)\n True targets.\n\n y_prob : array of shape (n_samples,)\n Probabilities of the positive class.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n pos_label : int or str, default=None\n Label of the positive class. `pos_label` will be infered in the\n following manner:\n\n * if `y_true` in {-1, 1} or {0, 1}, `pos_label` defaults to 1;\n * else if `y_true` contains string, an error will be raised and\n `pos_label` should be explicitely specified;\n * otherwise, `pos_label` defaults to the greater label,\n i.e. `np.unique(y_true)[-1]`.\n\n Returns\n -------\n score : float\n Brier score loss.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.metrics import brier_score_loss\n >>> y_true = np.array([0, 1, 1, 0])\n >>> y_true_categorical = np.array([\"spam\", \"ham\", \"ham\", \"spam\"])\n >>> y_prob = np.array([0.1, 0.9, 0.8, 0.3])\n >>> brier_score_loss(y_true, y_prob)\n 0.037...\n >>> brier_score_loss(y_true, 1-y_prob, pos_label=0)\n 0.037...\n >>> brier_score_loss(y_true_categorical, y_prob, pos_label=\"ham\")\n 0.037...\n >>> brier_score_loss(y_true, np.array(y_prob) > 0.5)\n 0.0\n\n References\n ----------\n .. [1] `Wikipedia entry for the Brier score\n `_.\n \"\"\"\n y_true = column_or_1d(y_true)\n y_prob = column_or_1d(y_prob)\n assert_all_finite(y_true)\n assert_all_finite(y_prob)\n check_consistent_length(y_true, y_prob, sample_weight)\n\n y_type = type_of_target(y_true)\n if y_type != \"binary\":\n raise ValueError(\n f\"Only binary classification is supported. The type of the target \"\n f\"is {y_type}.\"\n )\n\n if y_prob.max() > 1:\n raise ValueError(\"y_prob contains values greater than 1.\")\n if y_prob.min() < 0:\n raise ValueError(\"y_prob contains values less than 0.\")\n\n try:\n pos_label = _check_pos_label_consistency(pos_label, y_true)\n except ValueError:\n classes = np.unique(y_true)\n if classes.dtype.kind not in ('O', 'U', 'S'):\n # for backward compatibility, if classes are not string then\n # `pos_label` will correspond to the greater label\n pos_label = classes[-1]\n else:\n raise\n y_true = np.array(y_true == pos_label, int)\n return np.average((y_true - y_prob) ** 2, weights=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.metrics._classification/classification_report", + "name": "classification_report", + "qname": "sklearn.metrics._classification.classification_report", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._classification/classification_report/y_true", + "name": "y_true", + "qname": "sklearn.metrics._classification.classification_report.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "1d array-like, or label indicator array / sparse matrix", + "default_value": "", + "description": "Ground truth (correct) target values." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "1d array-like" + }, + { + "kind": "NamedType", + "name": "label indicator array / sparse matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/classification_report/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._classification.classification_report.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "1d array-like, or label indicator array / sparse matrix", + "default_value": "", + "description": "Estimated targets as returned by a classifier." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "1d array-like" + }, + { + "kind": "NamedType", + "name": "label indicator array / sparse matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/classification_report/labels", + "name": "labels", + "qname": "sklearn.metrics._classification.classification_report.labels", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_labels,)", + "default_value": "None", + "description": "Optional list of label indices to include in the report." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_labels,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/classification_report/target_names", + "name": "target_names", + "qname": "sklearn.metrics._classification.classification_report.target_names", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "list of str of shape (n_labels,)", + "default_value": "None", + "description": "Optional display names matching the labels (same order)." + }, + "type": { + "kind": "NamedType", + "name": "list of str of shape (n_labels,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/classification_report/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._classification.classification_report.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/classification_report/digits", + "name": "digits", + "qname": "sklearn.metrics._classification.classification_report.digits", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Number of digits for formatting output floating point values.\nWhen ``output_dict`` is ``True``, this will be ignored and the\nreturned values will not be rounded." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/classification_report/output_dict", + "name": "output_dict", + "qname": "sklearn.metrics._classification.classification_report.output_dict", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, return output as dict.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/classification_report/zero_division", + "name": "zero_division", + "qname": "sklearn.metrics._classification.classification_report.zero_division", + "default_value": "'warn'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "\"warn\", 0 or 1", + "default_value": "\"warn\"", + "description": "Sets the value to return when there is a zero division. If set to\n\"warn\", this acts as 0, but warnings are also raised." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "\"warn\"" + }, + { + "kind": "NamedType", + "name": "0" + }, + { + "kind": "NamedType", + "name": "1" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Build a text report showing the main classification metrics.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Build a text report showing the main classification metrics.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) target values.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Estimated targets as returned by a classifier.\n\nlabels : array-like of shape (n_labels,), default=None\n Optional list of label indices to include in the report.\n\ntarget_names : list of str of shape (n_labels,), default=None\n Optional display names matching the labels (same order).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\ndigits : int, default=2\n Number of digits for formatting output floating point values.\n When ``output_dict`` is ``True``, this will be ignored and the\n returned values will not be rounded.\n\noutput_dict : bool, default=False\n If True, return output as dict.\n\n .. versionadded:: 0.20\n\nzero_division : \"warn\", 0 or 1, default=\"warn\"\n Sets the value to return when there is a zero division. If set to\n \"warn\", this acts as 0, but warnings are also raised.\n\nReturns\n-------\nreport : string / dict\n Text summary of the precision, recall, F1 score for each class.\n Dictionary returned if output_dict is True. Dictionary has the\n following structure::\n\n {'label 1': {'precision':0.5,\n 'recall':1.0,\n 'f1-score':0.67,\n 'support':1},\n 'label 2': { ... },\n ...\n }\n\n The reported averages include macro average (averaging the unweighted\n mean per label), weighted average (averaging the support-weighted mean\n per label), and sample average (only for multilabel classification).\n Micro average (averaging the total true positives, false negatives and\n false positives) is only shown for multi-label or multi-class\n with a subset of classes, because it corresponds to accuracy\n otherwise and would be the same for all metrics.\n See also :func:`precision_recall_fscore_support` for more details\n on averages.\n\n Note that in binary classification, recall of the positive class\n is also known as \"sensitivity\"; recall of the negative class is\n \"specificity\".\n\nSee Also\n--------\nprecision_recall_fscore_support, confusion_matrix,\nmultilabel_confusion_matrix\n\nExamples\n--------\n>>> from sklearn.metrics import classification_report\n>>> y_true = [0, 1, 2, 2, 2]\n>>> y_pred = [0, 0, 2, 2, 1]\n>>> target_names = ['class 0', 'class 1', 'class 2']\n>>> print(classification_report(y_true, y_pred, target_names=target_names))\n precision recall f1-score support\n\n class 0 0.50 1.00 0.67 1\n class 1 0.00 0.00 0.00 1\n class 2 1.00 0.67 0.80 3\n\n accuracy 0.60 5\n macro avg 0.50 0.56 0.49 5\nweighted avg 0.70 0.60 0.61 5\n\n>>> y_pred = [1, 1, 0]\n>>> y_true = [1, 1, 1]\n>>> print(classification_report(y_true, y_pred, labels=[1, 2, 3]))\n precision recall f1-score support\n\n 1 1.00 0.67 0.80 3\n 2 0.00 0.00 0.00 0\n 3 0.00 0.00 0.00 0\n\n micro avg 1.00 0.67 0.80 3\n macro avg 0.33 0.22 0.27 3\nweighted avg 1.00 0.67 0.80 3\n", + "code": "@_deprecate_positional_args\ndef classification_report(y_true, y_pred, *, labels=None, target_names=None,\n sample_weight=None, digits=2, output_dict=False,\n zero_division=\"warn\"):\n \"\"\"Build a text report showing the main classification metrics.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) target values.\n\n y_pred : 1d array-like, or label indicator array / sparse matrix\n Estimated targets as returned by a classifier.\n\n labels : array-like of shape (n_labels,), default=None\n Optional list of label indices to include in the report.\n\n target_names : list of str of shape (n_labels,), default=None\n Optional display names matching the labels (same order).\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n digits : int, default=2\n Number of digits for formatting output floating point values.\n When ``output_dict`` is ``True``, this will be ignored and the\n returned values will not be rounded.\n\n output_dict : bool, default=False\n If True, return output as dict.\n\n .. versionadded:: 0.20\n\n zero_division : \"warn\", 0 or 1, default=\"warn\"\n Sets the value to return when there is a zero division. If set to\n \"warn\", this acts as 0, but warnings are also raised.\n\n Returns\n -------\n report : string / dict\n Text summary of the precision, recall, F1 score for each class.\n Dictionary returned if output_dict is True. Dictionary has the\n following structure::\n\n {'label 1': {'precision':0.5,\n 'recall':1.0,\n 'f1-score':0.67,\n 'support':1},\n 'label 2': { ... },\n ...\n }\n\n The reported averages include macro average (averaging the unweighted\n mean per label), weighted average (averaging the support-weighted mean\n per label), and sample average (only for multilabel classification).\n Micro average (averaging the total true positives, false negatives and\n false positives) is only shown for multi-label or multi-class\n with a subset of classes, because it corresponds to accuracy\n otherwise and would be the same for all metrics.\n See also :func:`precision_recall_fscore_support` for more details\n on averages.\n\n Note that in binary classification, recall of the positive class\n is also known as \"sensitivity\"; recall of the negative class is\n \"specificity\".\n\n See Also\n --------\n precision_recall_fscore_support, confusion_matrix,\n multilabel_confusion_matrix\n\n Examples\n --------\n >>> from sklearn.metrics import classification_report\n >>> y_true = [0, 1, 2, 2, 2]\n >>> y_pred = [0, 0, 2, 2, 1]\n >>> target_names = ['class 0', 'class 1', 'class 2']\n >>> print(classification_report(y_true, y_pred, target_names=target_names))\n precision recall f1-score support\n \n class 0 0.50 1.00 0.67 1\n class 1 0.00 0.00 0.00 1\n class 2 1.00 0.67 0.80 3\n \n accuracy 0.60 5\n macro avg 0.50 0.56 0.49 5\n weighted avg 0.70 0.60 0.61 5\n \n >>> y_pred = [1, 1, 0]\n >>> y_true = [1, 1, 1]\n >>> print(classification_report(y_true, y_pred, labels=[1, 2, 3]))\n precision recall f1-score support\n \n 1 1.00 0.67 0.80 3\n 2 0.00 0.00 0.00 0\n 3 0.00 0.00 0.00 0\n \n micro avg 1.00 0.67 0.80 3\n macro avg 0.33 0.22 0.27 3\n weighted avg 1.00 0.67 0.80 3\n \n \"\"\"\n\n y_type, y_true, y_pred = _check_targets(y_true, y_pred)\n\n if labels is None:\n labels = unique_labels(y_true, y_pred)\n labels_given = False\n else:\n labels = np.asarray(labels)\n labels_given = True\n\n # labelled micro average\n micro_is_accuracy = ((y_type == 'multiclass' or y_type == 'binary') and\n (not labels_given or\n (set(labels) == set(unique_labels(y_true, y_pred)))))\n\n if target_names is not None and len(labels) != len(target_names):\n if labels_given:\n warnings.warn(\n \"labels size, {0}, does not match size of target_names, {1}\"\n .format(len(labels), len(target_names))\n )\n else:\n raise ValueError(\n \"Number of classes, {0}, does not match size of \"\n \"target_names, {1}. Try specifying the labels \"\n \"parameter\".format(len(labels), len(target_names))\n )\n if target_names is None:\n target_names = ['%s' % l for l in labels]\n\n headers = [\"precision\", \"recall\", \"f1-score\", \"support\"]\n # compute per-class results without averaging\n p, r, f1, s = precision_recall_fscore_support(y_true, y_pred,\n labels=labels,\n average=None,\n sample_weight=sample_weight,\n zero_division=zero_division)\n rows = zip(target_names, p, r, f1, s)\n\n if y_type.startswith('multilabel'):\n average_options = ('micro', 'macro', 'weighted', 'samples')\n else:\n average_options = ('micro', 'macro', 'weighted')\n\n if output_dict:\n report_dict = {label[0]: label[1:] for label in rows}\n for label, scores in report_dict.items():\n report_dict[label] = dict(zip(headers,\n [i.item() for i in scores]))\n else:\n longest_last_line_heading = 'weighted avg'\n name_width = max(len(cn) for cn in target_names)\n width = max(name_width, len(longest_last_line_heading), digits)\n head_fmt = '{:>{width}s} ' + ' {:>9}' * len(headers)\n report = head_fmt.format('', *headers, width=width)\n report += '\\n\\n'\n row_fmt = '{:>{width}s} ' + ' {:>9.{digits}f}' * 3 + ' {:>9}\\n'\n for row in rows:\n report += row_fmt.format(*row, width=width, digits=digits)\n report += '\\n'\n\n # compute all applicable averages\n for average in average_options:\n if average.startswith('micro') and micro_is_accuracy:\n line_heading = 'accuracy'\n else:\n line_heading = average + ' avg'\n\n # compute averages with specified averaging method\n avg_p, avg_r, avg_f1, _ = precision_recall_fscore_support(\n y_true, y_pred, labels=labels,\n average=average, sample_weight=sample_weight,\n zero_division=zero_division)\n avg = [avg_p, avg_r, avg_f1, np.sum(s)]\n\n if output_dict:\n report_dict[line_heading] = dict(\n zip(headers, [i.item() for i in avg]))\n else:\n if line_heading == 'accuracy':\n row_fmt_accuracy = '{:>{width}s} ' + \\\n ' {:>9.{digits}}' * 2 + ' {:>9.{digits}f}' + \\\n ' {:>9}\\n'\n report += row_fmt_accuracy.format(line_heading, '', '',\n *avg[2:], width=width,\n digits=digits)\n else:\n report += row_fmt.format(line_heading, *avg,\n width=width, digits=digits)\n\n if output_dict:\n if 'accuracy' in report_dict.keys():\n report_dict['accuracy'] = report_dict['accuracy']['precision']\n return report_dict\n else:\n return report" + }, + { + "id": "scikit-learn/sklearn.metrics._classification/cohen_kappa_score", + "name": "cohen_kappa_score", + "qname": "sklearn.metrics._classification.cohen_kappa_score", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._classification/cohen_kappa_score/y1", + "name": "y1", + "qname": "sklearn.metrics._classification.cohen_kappa_score.y1", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples,)", + "default_value": "", + "description": "Labels assigned by the first annotator." + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/cohen_kappa_score/y2", + "name": "y2", + "qname": "sklearn.metrics._classification.cohen_kappa_score.y2", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples,)", + "default_value": "", + "description": "Labels assigned by the second annotator. The kappa statistic is\nsymmetric, so swapping ``y1`` and ``y2`` doesn't change the value." + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/cohen_kappa_score/labels", + "name": "labels", + "qname": "sklearn.metrics._classification.cohen_kappa_score.labels", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_classes,)", + "default_value": "None", + "description": "List of labels to index the matrix. This may be used to select a\nsubset of labels. If None, all labels that appear at least once in\n``y1`` or ``y2`` are used." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_classes,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/cohen_kappa_score/weights", + "name": "weights", + "qname": "sklearn.metrics._classification.cohen_kappa_score.weights", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'linear', 'quadratic'}", + "default_value": "None", + "description": "Weighting type to calculate the score. None means no weighted;\n\"linear\" means linear weighted; \"quadratic\" means quadratic weighted." + }, + "type": { + "kind": "EnumType", + "values": ["quadratic", "linear"] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/cohen_kappa_score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._classification.cohen_kappa_score.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Cohen's kappa: a statistic that measures inter-annotator agreement.\n\nThis function computes Cohen's kappa [1]_, a score that expresses the level\nof agreement between two annotators on a classification problem. It is\ndefined as\n\n.. math::\n \\kappa = (p_o - p_e) / (1 - p_e)\n\nwhere :math:`p_o` is the empirical probability of agreement on the label\nassigned to any sample (the observed agreement ratio), and :math:`p_e` is\nthe expected agreement when both annotators assign labels randomly.\n:math:`p_e` is estimated using a per-annotator empirical prior over the\nclass labels [2]_.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Cohen's kappa: a statistic that measures inter-annotator agreement.\n\nThis function computes Cohen's kappa [1]_, a score that expresses the level\nof agreement between two annotators on a classification problem. It is\ndefined as\n\n.. math::\n \\kappa = (p_o - p_e) / (1 - p_e)\n\nwhere :math:`p_o` is the empirical probability of agreement on the label\nassigned to any sample (the observed agreement ratio), and :math:`p_e` is\nthe expected agreement when both annotators assign labels randomly.\n:math:`p_e` is estimated using a per-annotator empirical prior over the\nclass labels [2]_.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny1 : array of shape (n_samples,)\n Labels assigned by the first annotator.\n\ny2 : array of shape (n_samples,)\n Labels assigned by the second annotator. The kappa statistic is\n symmetric, so swapping ``y1`` and ``y2`` doesn't change the value.\n\nlabels : array-like of shape (n_classes,), default=None\n List of labels to index the matrix. This may be used to select a\n subset of labels. If None, all labels that appear at least once in\n ``y1`` or ``y2`` are used.\n\nweights : {'linear', 'quadratic'}, default=None\n Weighting type to calculate the score. None means no weighted;\n \"linear\" means linear weighted; \"quadratic\" means quadratic weighted.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nkappa : float\n The kappa statistic, which is a number between -1 and 1. The maximum\n value means complete agreement; zero or lower means chance agreement.\n\nReferences\n----------\n.. [1] J. Cohen (1960). \"A coefficient of agreement for nominal scales\".\n Educational and Psychological Measurement 20(1):37-46.\n doi:10.1177/001316446002000104.\n.. [2] `R. Artstein and M. Poesio (2008). \"Inter-coder agreement for\n computational linguistics\". Computational Linguistics 34(4):555-596\n `_.\n.. [3] `Wikipedia entry for the Cohen's kappa\n `_.", + "code": "@_deprecate_positional_args\ndef cohen_kappa_score(y1, y2, *, labels=None, weights=None,\n sample_weight=None):\n r\"\"\"Cohen's kappa: a statistic that measures inter-annotator agreement.\n\n This function computes Cohen's kappa [1]_, a score that expresses the level\n of agreement between two annotators on a classification problem. It is\n defined as\n\n .. math::\n \\kappa = (p_o - p_e) / (1 - p_e)\n\n where :math:`p_o` is the empirical probability of agreement on the label\n assigned to any sample (the observed agreement ratio), and :math:`p_e` is\n the expected agreement when both annotators assign labels randomly.\n :math:`p_e` is estimated using a per-annotator empirical prior over the\n class labels [2]_.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y1 : array of shape (n_samples,)\n Labels assigned by the first annotator.\n\n y2 : array of shape (n_samples,)\n Labels assigned by the second annotator. The kappa statistic is\n symmetric, so swapping ``y1`` and ``y2`` doesn't change the value.\n\n labels : array-like of shape (n_classes,), default=None\n List of labels to index the matrix. This may be used to select a\n subset of labels. If None, all labels that appear at least once in\n ``y1`` or ``y2`` are used.\n\n weights : {'linear', 'quadratic'}, default=None\n Weighting type to calculate the score. None means no weighted;\n \"linear\" means linear weighted; \"quadratic\" means quadratic weighted.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n kappa : float\n The kappa statistic, which is a number between -1 and 1. The maximum\n value means complete agreement; zero or lower means chance agreement.\n\n References\n ----------\n .. [1] J. Cohen (1960). \"A coefficient of agreement for nominal scales\".\n Educational and Psychological Measurement 20(1):37-46.\n doi:10.1177/001316446002000104.\n .. [2] `R. Artstein and M. Poesio (2008). \"Inter-coder agreement for\n computational linguistics\". Computational Linguistics 34(4):555-596\n `_.\n .. [3] `Wikipedia entry for the Cohen's kappa\n `_.\n \"\"\"\n confusion = confusion_matrix(y1, y2, labels=labels,\n sample_weight=sample_weight)\n n_classes = confusion.shape[0]\n sum0 = np.sum(confusion, axis=0)\n sum1 = np.sum(confusion, axis=1)\n expected = np.outer(sum0, sum1) / np.sum(sum0)\n\n if weights is None:\n w_mat = np.ones([n_classes, n_classes], dtype=int)\n w_mat.flat[:: n_classes + 1] = 0\n elif weights == \"linear\" or weights == \"quadratic\":\n w_mat = np.zeros([n_classes, n_classes], dtype=int)\n w_mat += np.arange(n_classes)\n if weights == \"linear\":\n w_mat = np.abs(w_mat - w_mat.T)\n else:\n w_mat = (w_mat - w_mat.T) ** 2\n else:\n raise ValueError(\"Unknown kappa weighting type.\")\n\n k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)\n return 1 - k" + }, + { + "id": "scikit-learn/sklearn.metrics._classification/confusion_matrix", + "name": "confusion_matrix", + "qname": "sklearn.metrics._classification.confusion_matrix", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._classification/confusion_matrix/y_true", + "name": "y_true", + "qname": "sklearn.metrics._classification.confusion_matrix.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Ground truth (correct) target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/confusion_matrix/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._classification.confusion_matrix.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Estimated targets as returned by a classifier." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/confusion_matrix/labels", + "name": "labels", + "qname": "sklearn.metrics._classification.confusion_matrix.labels", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_classes)", + "default_value": "None", + "description": "List of labels to index the matrix. This may be used to reorder\nor select a subset of labels.\nIf ``None`` is given, those that appear at least once\nin ``y_true`` or ``y_pred`` are used in sorted order." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_classes)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/confusion_matrix/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._classification.confusion_matrix.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/confusion_matrix/normalize", + "name": "normalize", + "qname": "sklearn.metrics._classification.confusion_matrix.normalize", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'true', 'pred', 'all'}", + "default_value": "None", + "description": "Normalizes confusion matrix over the true (rows), predicted (columns)\nconditions or all the population. If None, confusion matrix will not be\nnormalized." + }, + "type": { + "kind": "EnumType", + "values": ["all", "pred", "true"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute confusion matrix to evaluate the accuracy of a classification.\n\nBy definition a confusion matrix :math:`C` is such that :math:`C_{i, j}`\nis equal to the number of observations known to be in group :math:`i` and\npredicted to be in group :math:`j`.\n\nThus in binary classification, the count of true negatives is\n:math:`C_{0,0}`, false negatives is :math:`C_{1,0}`, true positives is\n:math:`C_{1,1}` and false positives is :math:`C_{0,1}`.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute confusion matrix to evaluate the accuracy of a classification.\n\nBy definition a confusion matrix :math:`C` is such that :math:`C_{i, j}`\nis equal to the number of observations known to be in group :math:`i` and\npredicted to be in group :math:`j`.\n\nThus in binary classification, the count of true negatives is\n:math:`C_{0,0}`, false negatives is :math:`C_{1,0}`, true positives is\n:math:`C_{1,1}` and false positives is :math:`C_{0,1}`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,)\n Estimated targets as returned by a classifier.\n\nlabels : array-like of shape (n_classes), default=None\n List of labels to index the matrix. This may be used to reorder\n or select a subset of labels.\n If ``None`` is given, those that appear at least once\n in ``y_true`` or ``y_pred`` are used in sorted order.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n .. versionadded:: 0.18\n\nnormalize : {'true', 'pred', 'all'}, default=None\n Normalizes confusion matrix over the true (rows), predicted (columns)\n conditions or all the population. If None, confusion matrix will not be\n normalized.\n\nReturns\n-------\nC : ndarray of shape (n_classes, n_classes)\n Confusion matrix whose i-th row and j-th\n column entry indicates the number of\n samples with true label being i-th class\n and predicted label being j-th class.\n\nSee Also\n--------\nConfusionMatrixDisplay.from_estimator : Plot the confusion matrix\n given an estimator, the data, and the label.\nConfusionMatrixDisplay.from_predictions : Plot the confusion matrix\n given the true and predicted labels.\nConfusionMatrixDisplay : Confusion Matrix visualization.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Confusion matrix\n `_\n (Wikipedia and other references may use a different\n convention for axes).\n\nExamples\n--------\n>>> from sklearn.metrics import confusion_matrix\n>>> y_true = [2, 0, 2, 2, 0, 1]\n>>> y_pred = [0, 0, 2, 2, 0, 2]\n>>> confusion_matrix(y_true, y_pred)\narray([[2, 0, 0],\n [0, 0, 1],\n [1, 0, 2]])\n\n>>> y_true = [\"cat\", \"ant\", \"cat\", \"cat\", \"ant\", \"bird\"]\n>>> y_pred = [\"ant\", \"ant\", \"cat\", \"cat\", \"ant\", \"cat\"]\n>>> confusion_matrix(y_true, y_pred, labels=[\"ant\", \"bird\", \"cat\"])\narray([[2, 0, 0],\n [0, 0, 1],\n [1, 0, 2]])\n\nIn the binary case, we can extract true positives, etc as follows:\n\n>>> tn, fp, fn, tp = confusion_matrix([0, 1, 0, 1], [1, 1, 1, 0]).ravel()\n>>> (tn, fp, fn, tp)\n(0, 2, 1, 1)", + "code": "@_deprecate_positional_args\ndef confusion_matrix(y_true, y_pred, *, labels=None, sample_weight=None,\n normalize=None):\n \"\"\"Compute confusion matrix to evaluate the accuracy of a classification.\n\n By definition a confusion matrix :math:`C` is such that :math:`C_{i, j}`\n is equal to the number of observations known to be in group :math:`i` and\n predicted to be in group :math:`j`.\n\n Thus in binary classification, the count of true negatives is\n :math:`C_{0,0}`, false negatives is :math:`C_{1,0}`, true positives is\n :math:`C_{1,1}` and false positives is :math:`C_{0,1}`.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : array-like of shape (n_samples,)\n Ground truth (correct) target values.\n\n y_pred : array-like of shape (n_samples,)\n Estimated targets as returned by a classifier.\n\n labels : array-like of shape (n_classes), default=None\n List of labels to index the matrix. This may be used to reorder\n or select a subset of labels.\n If ``None`` is given, those that appear at least once\n in ``y_true`` or ``y_pred`` are used in sorted order.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n .. versionadded:: 0.18\n\n normalize : {'true', 'pred', 'all'}, default=None\n Normalizes confusion matrix over the true (rows), predicted (columns)\n conditions or all the population. If None, confusion matrix will not be\n normalized.\n\n Returns\n -------\n C : ndarray of shape (n_classes, n_classes)\n Confusion matrix whose i-th row and j-th\n column entry indicates the number of\n samples with true label being i-th class\n and predicted label being j-th class.\n\n See Also\n --------\n ConfusionMatrixDisplay.from_estimator : Plot the confusion matrix\n given an estimator, the data, and the label.\n ConfusionMatrixDisplay.from_predictions : Plot the confusion matrix\n given the true and predicted labels.\n ConfusionMatrixDisplay : Confusion Matrix visualization.\n\n References\n ----------\n .. [1] `Wikipedia entry for the Confusion matrix\n `_\n (Wikipedia and other references may use a different\n convention for axes).\n\n Examples\n --------\n >>> from sklearn.metrics import confusion_matrix\n >>> y_true = [2, 0, 2, 2, 0, 1]\n >>> y_pred = [0, 0, 2, 2, 0, 2]\n >>> confusion_matrix(y_true, y_pred)\n array([[2, 0, 0],\n [0, 0, 1],\n [1, 0, 2]])\n\n >>> y_true = [\"cat\", \"ant\", \"cat\", \"cat\", \"ant\", \"bird\"]\n >>> y_pred = [\"ant\", \"ant\", \"cat\", \"cat\", \"ant\", \"cat\"]\n >>> confusion_matrix(y_true, y_pred, labels=[\"ant\", \"bird\", \"cat\"])\n array([[2, 0, 0],\n [0, 0, 1],\n [1, 0, 2]])\n\n In the binary case, we can extract true positives, etc as follows:\n\n >>> tn, fp, fn, tp = confusion_matrix([0, 1, 0, 1], [1, 1, 1, 0]).ravel()\n >>> (tn, fp, fn, tp)\n (0, 2, 1, 1)\n\n \"\"\"\n y_type, y_true, y_pred = _check_targets(y_true, y_pred)\n if y_type not in (\"binary\", \"multiclass\"):\n raise ValueError(\"%s is not supported\" % y_type)\n\n if labels is None:\n labels = unique_labels(y_true, y_pred)\n else:\n labels = np.asarray(labels)\n n_labels = labels.size\n if n_labels == 0:\n raise ValueError(\"'labels' should contains at least one label.\")\n elif y_true.size == 0:\n return np.zeros((n_labels, n_labels), dtype=int)\n elif np.all([l not in y_true for l in labels]):\n raise ValueError(\"At least one label specified must be in y_true\")\n\n if sample_weight is None:\n sample_weight = np.ones(y_true.shape[0], dtype=np.int64)\n else:\n sample_weight = np.asarray(sample_weight)\n\n check_consistent_length(y_true, y_pred, sample_weight)\n\n if normalize not in ['true', 'pred', 'all', None]:\n raise ValueError(\"normalize must be one of {'true', 'pred', \"\n \"'all', None}\")\n\n n_labels = labels.size\n label_to_ind = {y: x for x, y in enumerate(labels)}\n # convert yt, yp into index\n y_pred = np.array([label_to_ind.get(x, n_labels + 1) for x in y_pred])\n y_true = np.array([label_to_ind.get(x, n_labels + 1) for x in y_true])\n\n # intersect y_pred, y_true with labels, eliminate items not in labels\n ind = np.logical_and(y_pred < n_labels, y_true < n_labels)\n y_pred = y_pred[ind]\n y_true = y_true[ind]\n # also eliminate weights of eliminated items\n sample_weight = sample_weight[ind]\n\n # Choose the accumulator dtype to always have high precision\n if sample_weight.dtype.kind in {'i', 'u', 'b'}:\n dtype = np.int64\n else:\n dtype = np.float64\n\n cm = coo_matrix((sample_weight, (y_true, y_pred)),\n shape=(n_labels, n_labels), dtype=dtype,\n ).toarray()\n\n with np.errstate(all='ignore'):\n if normalize == 'true':\n cm = cm / cm.sum(axis=1, keepdims=True)\n elif normalize == 'pred':\n cm = cm / cm.sum(axis=0, keepdims=True)\n elif normalize == 'all':\n cm = cm / cm.sum()\n cm = np.nan_to_num(cm)\n\n return cm" + }, + { + "id": "scikit-learn/sklearn.metrics._classification/f1_score", + "name": "f1_score", + "qname": "sklearn.metrics._classification.f1_score", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._classification/f1_score/y_true", + "name": "y_true", + "qname": "sklearn.metrics._classification.f1_score.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "1d array-like, or label indicator array / sparse matrix", + "default_value": "", + "description": "Ground truth (correct) target values." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "1d array-like" + }, + { + "kind": "NamedType", + "name": "label indicator array / sparse matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/f1_score/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._classification.f1_score.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "1d array-like, or label indicator array / sparse matrix", + "default_value": "", + "description": "Estimated targets as returned by a classifier." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "1d array-like" + }, + { + "kind": "NamedType", + "name": "label indicator array / sparse matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/f1_score/labels", + "name": "labels", + "qname": "sklearn.metrics._classification.f1_score.labels", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "None", + "description": "The set of labels to include when ``average != 'binary'``, and their\norder if ``average is None``. Labels present in the data can be\nexcluded, for example to calculate a multiclass average ignoring a\nmajority negative class, while labels not present in the data will\nresult in 0 components in a macro average. For multilabel targets,\nlabels are column indices. By default, all labels in ``y_true`` and\n``y_pred`` are used in sorted order.\n\n.. versionchanged:: 0.17\n Parameter `labels` improved for multiclass problem." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/f1_score/pos_label", + "name": "pos_label", + "qname": "sklearn.metrics._classification.f1_score.pos_label", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or int", + "default_value": "1", + "description": "The class to report if ``average='binary'`` and the data is binary.\nIf the data are multiclass or multilabel, this will be ignored;\nsetting ``labels=[pos_label]`` and ``average != 'binary'`` will report\nscores for that label only." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/f1_score/average", + "name": "average", + "qname": "sklearn.metrics._classification.f1_score.average", + "default_value": "'binary'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'micro', 'macro', 'samples','weighted', 'binary'} or None", + "default_value": "'binary'", + "description": "This parameter is required for multiclass/multilabel targets.\nIf ``None``, the scores for each class are returned. Otherwise, this\ndetermines the type of averaging performed on the data:\n\n``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["micro", "weighted", "macro", "binary", "samples"] + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/f1_score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._classification.f1_score.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/f1_score/zero_division", + "name": "zero_division", + "qname": "sklearn.metrics._classification.f1_score.zero_division", + "default_value": "'warn'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "\"warn\", 0 or 1", + "default_value": "\"warn\"", + "description": "Sets the value to return when there is a zero division, i.e. when all\npredictions and labels are negative. If set to \"warn\", this acts as 0,\nbut warnings are also raised." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "\"warn\"" + }, + { + "kind": "NamedType", + "name": "0" + }, + { + "kind": "NamedType", + "name": "1" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the F1 score, also known as balanced F-score or F-measure.\n\nThe F1 score can be interpreted as a weighted average of the precision and\nrecall, where an F1 score reaches its best value at 1 and worst score at 0.\nThe relative contribution of precision and recall to the F1 score are\nequal. The formula for the F1 score is::\n\n F1 = 2 * (precision * recall) / (precision + recall)\n\nIn the multi-class and multi-label case, this is the average of\nthe F1 score of each class with weighting depending on the ``average``\nparameter.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute the F1 score, also known as balanced F-score or F-measure.\n\nThe F1 score can be interpreted as a weighted average of the precision and\nrecall, where an F1 score reaches its best value at 1 and worst score at 0.\nThe relative contribution of precision and recall to the F1 score are\nequal. The formula for the F1 score is::\n\n F1 = 2 * (precision * recall) / (precision + recall)\n\nIn the multi-class and multi-label case, this is the average of\nthe F1 score of each class with weighting depending on the ``average``\nparameter.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) target values.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Estimated targets as returned by a classifier.\n\nlabels : array-like, default=None\n The set of labels to include when ``average != 'binary'``, and their\n order if ``average is None``. Labels present in the data can be\n excluded, for example to calculate a multiclass average ignoring a\n majority negative class, while labels not present in the data will\n result in 0 components in a macro average. For multilabel targets,\n labels are column indices. By default, all labels in ``y_true`` and\n ``y_pred`` are used in sorted order.\n\n .. versionchanged:: 0.17\n Parameter `labels` improved for multiclass problem.\n\npos_label : str or int, default=1\n The class to report if ``average='binary'`` and the data is binary.\n If the data are multiclass or multilabel, this will be ignored;\n setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n scores for that label only.\n\naverage : {'micro', 'macro', 'samples','weighted', 'binary'} or None, default='binary'\n This parameter is required for multiclass/multilabel targets.\n If ``None``, the scores for each class are returned. Otherwise, this\n determines the type of averaging performed on the data:\n\n ``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n ``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n ``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nzero_division : \"warn\", 0 or 1, default=\"warn\"\n Sets the value to return when there is a zero division, i.e. when all\n predictions and labels are negative. If set to \"warn\", this acts as 0,\n but warnings are also raised.\n\nReturns\n-------\nf1_score : float or array of float, shape = [n_unique_labels]\n F1 score of the positive class in binary classification or weighted\n average of the F1 scores of each class for the multiclass task.\n\nSee Also\n--------\nfbeta_score, precision_recall_fscore_support, jaccard_score,\nmultilabel_confusion_matrix\n\nReferences\n----------\n.. [1] `Wikipedia entry for the F1-score\n `_.\n\nExamples\n--------\n>>> from sklearn.metrics import f1_score\n>>> y_true = [0, 1, 2, 0, 1, 2]\n>>> y_pred = [0, 2, 1, 0, 0, 1]\n>>> f1_score(y_true, y_pred, average='macro')\n0.26...\n>>> f1_score(y_true, y_pred, average='micro')\n0.33...\n>>> f1_score(y_true, y_pred, average='weighted')\n0.26...\n>>> f1_score(y_true, y_pred, average=None)\narray([0.8, 0. , 0. ])\n>>> y_true = [0, 0, 0, 0, 0, 0]\n>>> y_pred = [0, 0, 0, 0, 0, 0]\n>>> f1_score(y_true, y_pred, zero_division=1)\n1.0...\n\nNotes\n-----\nWhen ``true positive + false positive == 0``, precision is undefined.\nWhen ``true positive + false negative == 0``, recall is undefined.\nIn such cases, by default the metric will be set to 0, as will f-score,\nand ``UndefinedMetricWarning`` will be raised. This behavior can be\nmodified with ``zero_division``.", + "code": "@_deprecate_positional_args\ndef f1_score(y_true, y_pred, *, labels=None, pos_label=1, average='binary',\n sample_weight=None, zero_division=\"warn\"):\n \"\"\"Compute the F1 score, also known as balanced F-score or F-measure.\n\n The F1 score can be interpreted as a weighted average of the precision and\n recall, where an F1 score reaches its best value at 1 and worst score at 0.\n The relative contribution of precision and recall to the F1 score are\n equal. The formula for the F1 score is::\n\n F1 = 2 * (precision * recall) / (precision + recall)\n\n In the multi-class and multi-label case, this is the average of\n the F1 score of each class with weighting depending on the ``average``\n parameter.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) target values.\n\n y_pred : 1d array-like, or label indicator array / sparse matrix\n Estimated targets as returned by a classifier.\n\n labels : array-like, default=None\n The set of labels to include when ``average != 'binary'``, and their\n order if ``average is None``. Labels present in the data can be\n excluded, for example to calculate a multiclass average ignoring a\n majority negative class, while labels not present in the data will\n result in 0 components in a macro average. For multilabel targets,\n labels are column indices. By default, all labels in ``y_true`` and\n ``y_pred`` are used in sorted order.\n\n .. versionchanged:: 0.17\n Parameter `labels` improved for multiclass problem.\n\n pos_label : str or int, default=1\n The class to report if ``average='binary'`` and the data is binary.\n If the data are multiclass or multilabel, this will be ignored;\n setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n scores for that label only.\n\n average : {'micro', 'macro', 'samples','weighted', 'binary'} or None, \\\n default='binary'\n This parameter is required for multiclass/multilabel targets.\n If ``None``, the scores for each class are returned. Otherwise, this\n determines the type of averaging performed on the data:\n\n ``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n ``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n ``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`).\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n zero_division : \"warn\", 0 or 1, default=\"warn\"\n Sets the value to return when there is a zero division, i.e. when all\n predictions and labels are negative. If set to \"warn\", this acts as 0,\n but warnings are also raised.\n\n Returns\n -------\n f1_score : float or array of float, shape = [n_unique_labels]\n F1 score of the positive class in binary classification or weighted\n average of the F1 scores of each class for the multiclass task.\n\n See Also\n --------\n fbeta_score, precision_recall_fscore_support, jaccard_score,\n multilabel_confusion_matrix\n\n References\n ----------\n .. [1] `Wikipedia entry for the F1-score\n `_.\n\n Examples\n --------\n >>> from sklearn.metrics import f1_score\n >>> y_true = [0, 1, 2, 0, 1, 2]\n >>> y_pred = [0, 2, 1, 0, 0, 1]\n >>> f1_score(y_true, y_pred, average='macro')\n 0.26...\n >>> f1_score(y_true, y_pred, average='micro')\n 0.33...\n >>> f1_score(y_true, y_pred, average='weighted')\n 0.26...\n >>> f1_score(y_true, y_pred, average=None)\n array([0.8, 0. , 0. ])\n >>> y_true = [0, 0, 0, 0, 0, 0]\n >>> y_pred = [0, 0, 0, 0, 0, 0]\n >>> f1_score(y_true, y_pred, zero_division=1)\n 1.0...\n\n Notes\n -----\n When ``true positive + false positive == 0``, precision is undefined.\n When ``true positive + false negative == 0``, recall is undefined.\n In such cases, by default the metric will be set to 0, as will f-score,\n and ``UndefinedMetricWarning`` will be raised. This behavior can be\n modified with ``zero_division``.\n \"\"\"\n return fbeta_score(y_true, y_pred, beta=1, labels=labels,\n pos_label=pos_label, average=average,\n sample_weight=sample_weight,\n zero_division=zero_division)" + }, + { + "id": "scikit-learn/sklearn.metrics._classification/fbeta_score", + "name": "fbeta_score", + "qname": "sklearn.metrics._classification.fbeta_score", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._classification/fbeta_score/y_true", + "name": "y_true", + "qname": "sklearn.metrics._classification.fbeta_score.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "1d array-like, or label indicator array / sparse matrix", + "default_value": "", + "description": "Ground truth (correct) target values." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "1d array-like" + }, + { + "kind": "NamedType", + "name": "label indicator array / sparse matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/fbeta_score/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._classification.fbeta_score.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "1d array-like, or label indicator array / sparse matrix", + "default_value": "", + "description": "Estimated targets as returned by a classifier." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "1d array-like" + }, + { + "kind": "NamedType", + "name": "label indicator array / sparse matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/fbeta_score/beta", + "name": "beta", + "qname": "sklearn.metrics._classification.fbeta_score.beta", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Determines the weight of recall in the combined score." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/fbeta_score/labels", + "name": "labels", + "qname": "sklearn.metrics._classification.fbeta_score.labels", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "None", + "description": "The set of labels to include when ``average != 'binary'``, and their\norder if ``average is None``. Labels present in the data can be\nexcluded, for example to calculate a multiclass average ignoring a\nmajority negative class, while labels not present in the data will\nresult in 0 components in a macro average. For multilabel targets,\nlabels are column indices. By default, all labels in ``y_true`` and\n``y_pred`` are used in sorted order.\n\n.. versionchanged:: 0.17\n Parameter `labels` improved for multiclass problem." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/fbeta_score/pos_label", + "name": "pos_label", + "qname": "sklearn.metrics._classification.fbeta_score.pos_label", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or int", + "default_value": "1", + "description": "The class to report if ``average='binary'`` and the data is binary.\nIf the data are multiclass or multilabel, this will be ignored;\nsetting ``labels=[pos_label]`` and ``average != 'binary'`` will report\nscores for that label only." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/fbeta_score/average", + "name": "average", + "qname": "sklearn.metrics._classification.fbeta_score.average", + "default_value": "'binary'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'micro', 'macro', 'samples', 'weighted', 'binary'} or None default='binary'", + "default_value": "", + "description": "This parameter is required for multiclass/multilabel targets.\nIf ``None``, the scores for each class are returned. Otherwise, this\ndetermines the type of averaging performed on the data:\n\n``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["micro", "weighted", "macro", "binary", "samples"] + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/fbeta_score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._classification.fbeta_score.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/fbeta_score/zero_division", + "name": "zero_division", + "qname": "sklearn.metrics._classification.fbeta_score.zero_division", + "default_value": "'warn'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "\"warn\", 0 or 1", + "default_value": "\"warn\"", + "description": "Sets the value to return when there is a zero division, i.e. when all\npredictions and labels are negative. If set to \"warn\", this acts as 0,\nbut warnings are also raised." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "\"warn\"" + }, + { + "kind": "NamedType", + "name": "0" + }, + { + "kind": "NamedType", + "name": "1" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the F-beta score.\n\nThe F-beta score is the weighted harmonic mean of precision and recall,\nreaching its optimal value at 1 and its worst value at 0.\n\nThe `beta` parameter determines the weight of recall in the combined\nscore. ``beta < 1`` lends more weight to precision, while ``beta > 1``\nfavors recall (``beta -> 0`` considers only precision, ``beta -> +inf``\nonly recall).\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute the F-beta score.\n\nThe F-beta score is the weighted harmonic mean of precision and recall,\nreaching its optimal value at 1 and its worst value at 0.\n\nThe `beta` parameter determines the weight of recall in the combined\nscore. ``beta < 1`` lends more weight to precision, while ``beta > 1``\nfavors recall (``beta -> 0`` considers only precision, ``beta -> +inf``\nonly recall).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) target values.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Estimated targets as returned by a classifier.\n\nbeta : float\n Determines the weight of recall in the combined score.\n\nlabels : array-like, default=None\n The set of labels to include when ``average != 'binary'``, and their\n order if ``average is None``. Labels present in the data can be\n excluded, for example to calculate a multiclass average ignoring a\n majority negative class, while labels not present in the data will\n result in 0 components in a macro average. For multilabel targets,\n labels are column indices. By default, all labels in ``y_true`` and\n ``y_pred`` are used in sorted order.\n\n .. versionchanged:: 0.17\n Parameter `labels` improved for multiclass problem.\n\npos_label : str or int, default=1\n The class to report if ``average='binary'`` and the data is binary.\n If the data are multiclass or multilabel, this will be ignored;\n setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n scores for that label only.\n\naverage : {'micro', 'macro', 'samples', 'weighted', 'binary'} or None default='binary'\n This parameter is required for multiclass/multilabel targets.\n If ``None``, the scores for each class are returned. Otherwise, this\n determines the type of averaging performed on the data:\n\n ``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n ``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n ``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nzero_division : \"warn\", 0 or 1, default=\"warn\"\n Sets the value to return when there is a zero division, i.e. when all\n predictions and labels are negative. If set to \"warn\", this acts as 0,\n but warnings are also raised.\n\nReturns\n-------\nfbeta_score : float (if average is not None) or array of float, shape = [n_unique_labels]\n F-beta score of the positive class in binary classification or weighted\n average of the F-beta score of each class for the multiclass task.\n\nSee Also\n--------\nprecision_recall_fscore_support, multilabel_confusion_matrix\n\nNotes\n-----\nWhen ``true positive + false positive == 0`` or\n``true positive + false negative == 0``, f-score returns 0 and raises\n``UndefinedMetricWarning``. This behavior can be\nmodified with ``zero_division``.\n\nReferences\n----------\n.. [1] R. Baeza-Yates and B. Ribeiro-Neto (2011).\n Modern Information Retrieval. Addison Wesley, pp. 327-328.\n\n.. [2] `Wikipedia entry for the F1-score\n `_.\n\nExamples\n--------\n>>> from sklearn.metrics import fbeta_score\n>>> y_true = [0, 1, 2, 0, 1, 2]\n>>> y_pred = [0, 2, 1, 0, 0, 1]\n>>> fbeta_score(y_true, y_pred, average='macro', beta=0.5)\n0.23...\n>>> fbeta_score(y_true, y_pred, average='micro', beta=0.5)\n0.33...\n>>> fbeta_score(y_true, y_pred, average='weighted', beta=0.5)\n0.23...\n>>> fbeta_score(y_true, y_pred, average=None, beta=0.5)\narray([0.71..., 0. , 0. ])", + "code": "@_deprecate_positional_args\ndef fbeta_score(y_true, y_pred, *, beta, labels=None, pos_label=1,\n average='binary', sample_weight=None, zero_division=\"warn\"):\n \"\"\"Compute the F-beta score.\n\n The F-beta score is the weighted harmonic mean of precision and recall,\n reaching its optimal value at 1 and its worst value at 0.\n\n The `beta` parameter determines the weight of recall in the combined\n score. ``beta < 1`` lends more weight to precision, while ``beta > 1``\n favors recall (``beta -> 0`` considers only precision, ``beta -> +inf``\n only recall).\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) target values.\n\n y_pred : 1d array-like, or label indicator array / sparse matrix\n Estimated targets as returned by a classifier.\n\n beta : float\n Determines the weight of recall in the combined score.\n\n labels : array-like, default=None\n The set of labels to include when ``average != 'binary'``, and their\n order if ``average is None``. Labels present in the data can be\n excluded, for example to calculate a multiclass average ignoring a\n majority negative class, while labels not present in the data will\n result in 0 components in a macro average. For multilabel targets,\n labels are column indices. By default, all labels in ``y_true`` and\n ``y_pred`` are used in sorted order.\n\n .. versionchanged:: 0.17\n Parameter `labels` improved for multiclass problem.\n\n pos_label : str or int, default=1\n The class to report if ``average='binary'`` and the data is binary.\n If the data are multiclass or multilabel, this will be ignored;\n setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n scores for that label only.\n\n average : {'micro', 'macro', 'samples', 'weighted', 'binary'} or None \\\n default='binary'\n This parameter is required for multiclass/multilabel targets.\n If ``None``, the scores for each class are returned. Otherwise, this\n determines the type of averaging performed on the data:\n\n ``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n ``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n ``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`).\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n zero_division : \"warn\", 0 or 1, default=\"warn\"\n Sets the value to return when there is a zero division, i.e. when all\n predictions and labels are negative. If set to \"warn\", this acts as 0,\n but warnings are also raised.\n\n Returns\n -------\n fbeta_score : float (if average is not None) or array of float, shape =\\\n [n_unique_labels]\n F-beta score of the positive class in binary classification or weighted\n average of the F-beta score of each class for the multiclass task.\n\n See Also\n --------\n precision_recall_fscore_support, multilabel_confusion_matrix\n\n Notes\n -----\n When ``true positive + false positive == 0`` or\n ``true positive + false negative == 0``, f-score returns 0 and raises\n ``UndefinedMetricWarning``. This behavior can be\n modified with ``zero_division``.\n\n References\n ----------\n .. [1] R. Baeza-Yates and B. Ribeiro-Neto (2011).\n Modern Information Retrieval. Addison Wesley, pp. 327-328.\n\n .. [2] `Wikipedia entry for the F1-score\n `_.\n\n Examples\n --------\n >>> from sklearn.metrics import fbeta_score\n >>> y_true = [0, 1, 2, 0, 1, 2]\n >>> y_pred = [0, 2, 1, 0, 0, 1]\n >>> fbeta_score(y_true, y_pred, average='macro', beta=0.5)\n 0.23...\n >>> fbeta_score(y_true, y_pred, average='micro', beta=0.5)\n 0.33...\n >>> fbeta_score(y_true, y_pred, average='weighted', beta=0.5)\n 0.23...\n >>> fbeta_score(y_true, y_pred, average=None, beta=0.5)\n array([0.71..., 0. , 0. ])\n \"\"\"\n\n _, _, f, _ = precision_recall_fscore_support(y_true, y_pred,\n beta=beta,\n labels=labels,\n pos_label=pos_label,\n average=average,\n warn_for=('f-score',),\n sample_weight=sample_weight,\n zero_division=zero_division)\n return f" + }, + { + "id": "scikit-learn/sklearn.metrics._classification/hamming_loss", + "name": "hamming_loss", + "qname": "sklearn.metrics._classification.hamming_loss", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._classification/hamming_loss/y_true", + "name": "y_true", + "qname": "sklearn.metrics._classification.hamming_loss.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "1d array-like, or label indicator array / sparse matrix", + "default_value": "", + "description": "Ground truth (correct) labels." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "1d array-like" + }, + { + "kind": "NamedType", + "name": "label indicator array / sparse matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/hamming_loss/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._classification.hamming_loss.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "1d array-like, or label indicator array / sparse matrix", + "default_value": "", + "description": "Predicted labels, as returned by a classifier." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "1d array-like" + }, + { + "kind": "NamedType", + "name": "label indicator array / sparse matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/hamming_loss/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._classification.hamming_loss.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the average Hamming loss.\n\nThe Hamming loss is the fraction of labels that are incorrectly predicted.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute the average Hamming loss.\n\nThe Hamming loss is the fraction of labels that are incorrectly predicted.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) labels.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Predicted labels, as returned by a classifier.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n .. versionadded:: 0.18\n\nReturns\n-------\nloss : float or int\n Return the average Hamming loss between element of ``y_true`` and\n ``y_pred``.\n\nSee Also\n--------\naccuracy_score, jaccard_score, zero_one_loss\n\nNotes\n-----\nIn multiclass classification, the Hamming loss corresponds to the Hamming\ndistance between ``y_true`` and ``y_pred`` which is equivalent to the\nsubset ``zero_one_loss`` function, when `normalize` parameter is set to\nTrue.\n\nIn multilabel classification, the Hamming loss is different from the\nsubset zero-one loss. The zero-one loss considers the entire set of labels\nfor a given sample incorrect if it does not entirely match the true set of\nlabels. Hamming loss is more forgiving in that it penalizes only the\nindividual labels.\n\nThe Hamming loss is upperbounded by the subset zero-one loss, when\n`normalize` parameter is set to True. It is always between 0 and 1,\nlower being better.\n\nReferences\n----------\n.. [1] Grigorios Tsoumakas, Ioannis Katakis. Multi-Label Classification:\n An Overview. International Journal of Data Warehousing & Mining,\n 3(3), 1-13, July-September 2007.\n\n.. [2] `Wikipedia entry on the Hamming distance\n `_.\n\nExamples\n--------\n>>> from sklearn.metrics import hamming_loss\n>>> y_pred = [1, 2, 3, 4]\n>>> y_true = [2, 2, 3, 4]\n>>> hamming_loss(y_true, y_pred)\n0.25\n\nIn the multilabel case with binary label indicators:\n\n>>> import numpy as np\n>>> hamming_loss(np.array([[0, 1], [1, 1]]), np.zeros((2, 2)))\n0.75", + "code": "@_deprecate_positional_args\ndef hamming_loss(y_true, y_pred, *, sample_weight=None):\n \"\"\"Compute the average Hamming loss.\n\n The Hamming loss is the fraction of labels that are incorrectly predicted.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) labels.\n\n y_pred : 1d array-like, or label indicator array / sparse matrix\n Predicted labels, as returned by a classifier.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n .. versionadded:: 0.18\n\n Returns\n -------\n loss : float or int\n Return the average Hamming loss between element of ``y_true`` and\n ``y_pred``.\n\n See Also\n --------\n accuracy_score, jaccard_score, zero_one_loss\n\n Notes\n -----\n In multiclass classification, the Hamming loss corresponds to the Hamming\n distance between ``y_true`` and ``y_pred`` which is equivalent to the\n subset ``zero_one_loss`` function, when `normalize` parameter is set to\n True.\n\n In multilabel classification, the Hamming loss is different from the\n subset zero-one loss. The zero-one loss considers the entire set of labels\n for a given sample incorrect if it does not entirely match the true set of\n labels. Hamming loss is more forgiving in that it penalizes only the\n individual labels.\n\n The Hamming loss is upperbounded by the subset zero-one loss, when\n `normalize` parameter is set to True. It is always between 0 and 1,\n lower being better.\n\n References\n ----------\n .. [1] Grigorios Tsoumakas, Ioannis Katakis. Multi-Label Classification:\n An Overview. International Journal of Data Warehousing & Mining,\n 3(3), 1-13, July-September 2007.\n\n .. [2] `Wikipedia entry on the Hamming distance\n `_.\n\n Examples\n --------\n >>> from sklearn.metrics import hamming_loss\n >>> y_pred = [1, 2, 3, 4]\n >>> y_true = [2, 2, 3, 4]\n >>> hamming_loss(y_true, y_pred)\n 0.25\n\n In the multilabel case with binary label indicators:\n\n >>> import numpy as np\n >>> hamming_loss(np.array([[0, 1], [1, 1]]), np.zeros((2, 2)))\n 0.75\n \"\"\"\n\n y_type, y_true, y_pred = _check_targets(y_true, y_pred)\n check_consistent_length(y_true, y_pred, sample_weight)\n\n if sample_weight is None:\n weight_average = 1.\n else:\n weight_average = np.mean(sample_weight)\n\n if y_type.startswith('multilabel'):\n n_differences = count_nonzero(y_true - y_pred,\n sample_weight=sample_weight)\n return (n_differences /\n (y_true.shape[0] * y_true.shape[1] * weight_average))\n\n elif y_type in [\"binary\", \"multiclass\"]:\n return _weighted_sum(y_true != y_pred, sample_weight, normalize=True)\n else:\n raise ValueError(\"{0} is not supported\".format(y_type))" + }, + { + "id": "scikit-learn/sklearn.metrics._classification/hinge_loss", + "name": "hinge_loss", + "qname": "sklearn.metrics._classification.hinge_loss", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._classification/hinge_loss/y_true", + "name": "y_true", + "qname": "sklearn.metrics._classification.hinge_loss.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples,)", + "default_value": "", + "description": "True target, consisting of integers of two values. The positive label\nmust be greater than the negative label." + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/hinge_loss/pred_decision", + "name": "pred_decision", + "qname": "sklearn.metrics._classification.hinge_loss.pred_decision", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples,) or (n_samples, n_classes)", + "default_value": "", + "description": "Predicted decisions, as output by decision_function (floats)." + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples,) or (n_samples, n_classes)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/hinge_loss/labels", + "name": "labels", + "qname": "sklearn.metrics._classification.hinge_loss.labels", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "None", + "description": "Contains all the labels for the problem. Used in multiclass hinge loss." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/hinge_loss/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._classification.hinge_loss.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Average hinge loss (non-regularized).\n\nIn binary class case, assuming labels in y_true are encoded with +1 and -1,\nwhen a prediction mistake is made, ``margin = y_true * pred_decision`` is\nalways negative (since the signs disagree), implying ``1 - margin`` is\nalways greater than 1. The cumulated hinge loss is therefore an upper\nbound of the number of mistakes made by the classifier.\n\nIn multiclass case, the function expects that either all the labels are\nincluded in y_true or an optional labels argument is provided which\ncontains all the labels. The multilabel margin is calculated according\nto Crammer-Singer's method. As in the binary case, the cumulated hinge loss\nis an upper bound of the number of mistakes made by the classifier.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Average hinge loss (non-regularized).\n\nIn binary class case, assuming labels in y_true are encoded with +1 and -1,\nwhen a prediction mistake is made, ``margin = y_true * pred_decision`` is\nalways negative (since the signs disagree), implying ``1 - margin`` is\nalways greater than 1. The cumulated hinge loss is therefore an upper\nbound of the number of mistakes made by the classifier.\n\nIn multiclass case, the function expects that either all the labels are\nincluded in y_true or an optional labels argument is provided which\ncontains all the labels. The multilabel margin is calculated according\nto Crammer-Singer's method. As in the binary case, the cumulated hinge loss\nis an upper bound of the number of mistakes made by the classifier.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array of shape (n_samples,)\n True target, consisting of integers of two values. The positive label\n must be greater than the negative label.\n\npred_decision : array of shape (n_samples,) or (n_samples, n_classes)\n Predicted decisions, as output by decision_function (floats).\n\nlabels : array-like, default=None\n Contains all the labels for the problem. Used in multiclass hinge loss.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nloss : float\n\nReferences\n----------\n.. [1] `Wikipedia entry on the Hinge loss\n `_.\n\n.. [2] Koby Crammer, Yoram Singer. On the Algorithmic\n Implementation of Multiclass Kernel-based Vector\n Machines. Journal of Machine Learning Research 2,\n (2001), 265-292.\n\n.. [3] `L1 AND L2 Regularization for Multiclass Hinge Loss Models\n by Robert C. Moore, John DeNero\n `_.\n\nExamples\n--------\n>>> from sklearn import svm\n>>> from sklearn.metrics import hinge_loss\n>>> X = [[0], [1]]\n>>> y = [-1, 1]\n>>> est = svm.LinearSVC(random_state=0)\n>>> est.fit(X, y)\nLinearSVC(random_state=0)\n>>> pred_decision = est.decision_function([[-2], [3], [0.5]])\n>>> pred_decision\narray([-2.18..., 2.36..., 0.09...])\n>>> hinge_loss([-1, 1, 1], pred_decision)\n0.30...\n\nIn the multiclass case:\n\n>>> import numpy as np\n>>> X = np.array([[0], [1], [2], [3]])\n>>> Y = np.array([0, 1, 2, 3])\n>>> labels = np.array([0, 1, 2, 3])\n>>> est = svm.LinearSVC()\n>>> est.fit(X, Y)\nLinearSVC()\n>>> pred_decision = est.decision_function([[-1], [2], [3]])\n>>> y_true = [0, 2, 3]\n>>> hinge_loss(y_true, pred_decision, labels=labels)\n0.56...", + "code": "@_deprecate_positional_args\ndef hinge_loss(y_true, pred_decision, *, labels=None, sample_weight=None):\n \"\"\"Average hinge loss (non-regularized).\n\n In binary class case, assuming labels in y_true are encoded with +1 and -1,\n when a prediction mistake is made, ``margin = y_true * pred_decision`` is\n always negative (since the signs disagree), implying ``1 - margin`` is\n always greater than 1. The cumulated hinge loss is therefore an upper\n bound of the number of mistakes made by the classifier.\n\n In multiclass case, the function expects that either all the labels are\n included in y_true or an optional labels argument is provided which\n contains all the labels. The multilabel margin is calculated according\n to Crammer-Singer's method. As in the binary case, the cumulated hinge loss\n is an upper bound of the number of mistakes made by the classifier.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : array of shape (n_samples,)\n True target, consisting of integers of two values. The positive label\n must be greater than the negative label.\n\n pred_decision : array of shape (n_samples,) or (n_samples, n_classes)\n Predicted decisions, as output by decision_function (floats).\n\n labels : array-like, default=None\n Contains all the labels for the problem. Used in multiclass hinge loss.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n loss : float\n\n References\n ----------\n .. [1] `Wikipedia entry on the Hinge loss\n `_.\n\n .. [2] Koby Crammer, Yoram Singer. On the Algorithmic\n Implementation of Multiclass Kernel-based Vector\n Machines. Journal of Machine Learning Research 2,\n (2001), 265-292.\n\n .. [3] `L1 AND L2 Regularization for Multiclass Hinge Loss Models\n by Robert C. Moore, John DeNero\n `_.\n\n Examples\n --------\n >>> from sklearn import svm\n >>> from sklearn.metrics import hinge_loss\n >>> X = [[0], [1]]\n >>> y = [-1, 1]\n >>> est = svm.LinearSVC(random_state=0)\n >>> est.fit(X, y)\n LinearSVC(random_state=0)\n >>> pred_decision = est.decision_function([[-2], [3], [0.5]])\n >>> pred_decision\n array([-2.18..., 2.36..., 0.09...])\n >>> hinge_loss([-1, 1, 1], pred_decision)\n 0.30...\n\n In the multiclass case:\n\n >>> import numpy as np\n >>> X = np.array([[0], [1], [2], [3]])\n >>> Y = np.array([0, 1, 2, 3])\n >>> labels = np.array([0, 1, 2, 3])\n >>> est = svm.LinearSVC()\n >>> est.fit(X, Y)\n LinearSVC()\n >>> pred_decision = est.decision_function([[-1], [2], [3]])\n >>> y_true = [0, 2, 3]\n >>> hinge_loss(y_true, pred_decision, labels=labels)\n 0.56...\n \"\"\"\n check_consistent_length(y_true, pred_decision, sample_weight)\n pred_decision = check_array(pred_decision, ensure_2d=False)\n y_true = column_or_1d(y_true)\n y_true_unique = np.unique(labels if labels is not None else y_true)\n if y_true_unique.size > 2:\n if (labels is None and pred_decision.ndim > 1 and\n (np.size(y_true_unique) != pred_decision.shape[1])):\n raise ValueError(\"Please include all labels in y_true \"\n \"or pass labels as third argument\")\n if labels is None:\n labels = y_true_unique\n le = LabelEncoder()\n le.fit(labels)\n y_true = le.transform(y_true)\n mask = np.ones_like(pred_decision, dtype=bool)\n mask[np.arange(y_true.shape[0]), y_true] = False\n margin = pred_decision[~mask]\n margin -= np.max(pred_decision[mask].reshape(y_true.shape[0], -1),\n axis=1)\n\n else:\n # Handles binary class case\n # this code assumes that positive and negative labels\n # are encoded as +1 and -1 respectively\n pred_decision = column_or_1d(pred_decision)\n pred_decision = np.ravel(pred_decision)\n\n lbin = LabelBinarizer(neg_label=-1)\n y_true = lbin.fit_transform(y_true)[:, 0]\n\n try:\n margin = y_true * pred_decision\n except TypeError:\n raise TypeError(\"pred_decision should be an array of floats.\")\n\n losses = 1 - margin\n # The hinge_loss doesn't penalize good enough predictions.\n np.clip(losses, 0, None, out=losses)\n return np.average(losses, weights=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.metrics._classification/jaccard_score", + "name": "jaccard_score", + "qname": "sklearn.metrics._classification.jaccard_score", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._classification/jaccard_score/y_true", + "name": "y_true", + "qname": "sklearn.metrics._classification.jaccard_score.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "1d array-like, or label indicator array / sparse matrix", + "default_value": "", + "description": "Ground truth (correct) labels." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "1d array-like" + }, + { + "kind": "NamedType", + "name": "label indicator array / sparse matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/jaccard_score/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._classification.jaccard_score.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "1d array-like, or label indicator array / sparse matrix", + "default_value": "", + "description": "Predicted labels, as returned by a classifier." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "1d array-like" + }, + { + "kind": "NamedType", + "name": "label indicator array / sparse matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/jaccard_score/labels", + "name": "labels", + "qname": "sklearn.metrics._classification.jaccard_score.labels", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_classes,)", + "default_value": "None", + "description": "The set of labels to include when ``average != 'binary'``, and their\norder if ``average is None``. Labels present in the data can be\nexcluded, for example to calculate a multiclass average ignoring a\nmajority negative class, while labels not present in the data will\nresult in 0 components in a macro average. For multilabel targets,\nlabels are column indices. By default, all labels in ``y_true`` and\n``y_pred`` are used in sorted order." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_classes,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/jaccard_score/pos_label", + "name": "pos_label", + "qname": "sklearn.metrics._classification.jaccard_score.pos_label", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or int", + "default_value": "1", + "description": "The class to report if ``average='binary'`` and the data is binary.\nIf the data are multiclass or multilabel, this will be ignored;\nsetting ``labels=[pos_label]`` and ``average != 'binary'`` will report\nscores for that label only." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/jaccard_score/average", + "name": "average", + "qname": "sklearn.metrics._classification.jaccard_score.average", + "default_value": "'binary'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{None, 'micro', 'macro', 'samples', 'weighted', 'binary'}", + "default_value": "'binary'", + "description": "If ``None``, the scores for each class are returned. Otherwise, this\ndetermines the type of averaging performed on the data:\n\n``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n``'weighted'``:\n Calculate metrics for each label, and find their average, weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance.\n``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification)." + }, + "type": { + "kind": "EnumType", + "values": ["micro", "weighted", "macro", "binary", "samples"] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/jaccard_score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._classification.jaccard_score.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/jaccard_score/zero_division", + "name": "zero_division", + "qname": "sklearn.metrics._classification.jaccard_score.zero_division", + "default_value": "'warn'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "\"warn\", {0.0, 1.0}", + "default_value": "\"warn\"", + "description": "Sets the value to return when there is a zero division, i.e. when there\nthere are no negative values in predictions and labels. If set to\n\"warn\", this acts like 0, but a warning is also raised." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "\"warn\"" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Jaccard similarity coefficient score.\n\nThe Jaccard index [1], or Jaccard similarity coefficient, defined as\nthe size of the intersection divided by the size of the union of two label\nsets, is used to compare set of predicted labels for a sample to the\ncorresponding set of labels in ``y_true``.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Jaccard similarity coefficient score.\n\nThe Jaccard index [1], or Jaccard similarity coefficient, defined as\nthe size of the intersection divided by the size of the union of two label\nsets, is used to compare set of predicted labels for a sample to the\ncorresponding set of labels in ``y_true``.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) labels.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Predicted labels, as returned by a classifier.\n\nlabels : array-like of shape (n_classes,), default=None\n The set of labels to include when ``average != 'binary'``, and their\n order if ``average is None``. Labels present in the data can be\n excluded, for example to calculate a multiclass average ignoring a\n majority negative class, while labels not present in the data will\n result in 0 components in a macro average. For multilabel targets,\n labels are column indices. By default, all labels in ``y_true`` and\n ``y_pred`` are used in sorted order.\n\npos_label : str or int, default=1\n The class to report if ``average='binary'`` and the data is binary.\n If the data are multiclass or multilabel, this will be ignored;\n setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n scores for that label only.\n\naverage : {None, 'micro', 'macro', 'samples', 'weighted', 'binary'}, default='binary'\n If ``None``, the scores for each class are returned. Otherwise, this\n determines the type of averaging performed on the data:\n\n ``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n ``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average, weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance.\n ``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nzero_division : \"warn\", {0.0, 1.0}, default=\"warn\"\n Sets the value to return when there is a zero division, i.e. when there\n there are no negative values in predictions and labels. If set to\n \"warn\", this acts like 0, but a warning is also raised.\n\nReturns\n-------\nscore : float (if average is not None) or array of floats, shape = [n_unique_labels]\n\nSee Also\n--------\naccuracy_score, f_score, multilabel_confusion_matrix\n\nNotes\n-----\n:func:`jaccard_score` may be a poor metric if there are no\npositives for some samples or classes. Jaccard is undefined if there are\nno true or predicted labels, and our implementation will return a score\nof 0 with a warning.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Jaccard index\n `_.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import jaccard_score\n>>> y_true = np.array([[0, 1, 1],\n... [1, 1, 0]])\n>>> y_pred = np.array([[1, 1, 1],\n... [1, 0, 0]])\n\nIn the binary case:\n\n>>> jaccard_score(y_true[0], y_pred[0])\n0.6666...\n\nIn the multilabel case:\n\n>>> jaccard_score(y_true, y_pred, average='samples')\n0.5833...\n>>> jaccard_score(y_true, y_pred, average='macro')\n0.6666...\n>>> jaccard_score(y_true, y_pred, average=None)\narray([0.5, 0.5, 1. ])\n\nIn the multiclass case:\n\n>>> y_pred = [0, 2, 1, 2]\n>>> y_true = [0, 1, 2, 2]\n>>> jaccard_score(y_true, y_pred, average=None)\narray([1. , 0. , 0.33...])", + "code": "@_deprecate_positional_args\ndef jaccard_score(y_true, y_pred, *, labels=None, pos_label=1,\n average='binary', sample_weight=None, zero_division=\"warn\"):\n \"\"\"Jaccard similarity coefficient score.\n\n The Jaccard index [1], or Jaccard similarity coefficient, defined as\n the size of the intersection divided by the size of the union of two label\n sets, is used to compare set of predicted labels for a sample to the\n corresponding set of labels in ``y_true``.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) labels.\n\n y_pred : 1d array-like, or label indicator array / sparse matrix\n Predicted labels, as returned by a classifier.\n\n labels : array-like of shape (n_classes,), default=None\n The set of labels to include when ``average != 'binary'``, and their\n order if ``average is None``. Labels present in the data can be\n excluded, for example to calculate a multiclass average ignoring a\n majority negative class, while labels not present in the data will\n result in 0 components in a macro average. For multilabel targets,\n labels are column indices. By default, all labels in ``y_true`` and\n ``y_pred`` are used in sorted order.\n\n pos_label : str or int, default=1\n The class to report if ``average='binary'`` and the data is binary.\n If the data are multiclass or multilabel, this will be ignored;\n setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n scores for that label only.\n\n average : {None, 'micro', 'macro', 'samples', 'weighted', \\\n 'binary'}, default='binary'\n If ``None``, the scores for each class are returned. Otherwise, this\n determines the type of averaging performed on the data:\n\n ``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n ``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average, weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance.\n ``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification).\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n zero_division : \"warn\", {0.0, 1.0}, default=\"warn\"\n Sets the value to return when there is a zero division, i.e. when there\n there are no negative values in predictions and labels. If set to\n \"warn\", this acts like 0, but a warning is also raised.\n\n Returns\n -------\n score : float (if average is not None) or array of floats, shape =\\\n [n_unique_labels]\n\n See Also\n --------\n accuracy_score, f_score, multilabel_confusion_matrix\n\n Notes\n -----\n :func:`jaccard_score` may be a poor metric if there are no\n positives for some samples or classes. Jaccard is undefined if there are\n no true or predicted labels, and our implementation will return a score\n of 0 with a warning.\n\n References\n ----------\n .. [1] `Wikipedia entry for the Jaccard index\n `_.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.metrics import jaccard_score\n >>> y_true = np.array([[0, 1, 1],\n ... [1, 1, 0]])\n >>> y_pred = np.array([[1, 1, 1],\n ... [1, 0, 0]])\n\n In the binary case:\n\n >>> jaccard_score(y_true[0], y_pred[0])\n 0.6666...\n\n In the multilabel case:\n\n >>> jaccard_score(y_true, y_pred, average='samples')\n 0.5833...\n >>> jaccard_score(y_true, y_pred, average='macro')\n 0.6666...\n >>> jaccard_score(y_true, y_pred, average=None)\n array([0.5, 0.5, 1. ])\n\n In the multiclass case:\n\n >>> y_pred = [0, 2, 1, 2]\n >>> y_true = [0, 1, 2, 2]\n >>> jaccard_score(y_true, y_pred, average=None)\n array([1. , 0. , 0.33...])\n \"\"\"\n labels = _check_set_wise_labels(y_true, y_pred, average, labels,\n pos_label)\n samplewise = average == 'samples'\n MCM = multilabel_confusion_matrix(y_true, y_pred,\n sample_weight=sample_weight,\n labels=labels, samplewise=samplewise)\n numerator = MCM[:, 1, 1]\n denominator = MCM[:, 1, 1] + MCM[:, 0, 1] + MCM[:, 1, 0]\n\n if average == 'micro':\n numerator = np.array([numerator.sum()])\n denominator = np.array([denominator.sum()])\n\n jaccard = _prf_divide(numerator, denominator, 'jaccard',\n 'true or predicted', average, ('jaccard',),\n zero_division=zero_division)\n if average is None:\n return jaccard\n if average == 'weighted':\n weights = MCM[:, 1, 0] + MCM[:, 1, 1]\n if not np.any(weights):\n # numerator is 0, and warning should have already been issued\n weights = None\n elif average == 'samples' and sample_weight is not None:\n weights = sample_weight\n else:\n weights = None\n return np.average(jaccard, weights=weights)" + }, + { + "id": "scikit-learn/sklearn.metrics._classification/log_loss", + "name": "log_loss", + "qname": "sklearn.metrics._classification.log_loss", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._classification/log_loss/y_true", + "name": "y_true", + "qname": "sklearn.metrics._classification.log_loss.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like or label indicator matrix", + "default_value": "", + "description": "Ground truth (correct) labels for n_samples samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "label indicator matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/log_loss/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._classification.log_loss.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of float, shape = (n_samples, n_classes) or (n_samples,)", + "default_value": "", + "description": "Predicted probabilities, as returned by a classifier's\npredict_proba method. If ``y_pred.shape = (n_samples,)``\nthe probabilities provided are assumed to be that of the\npositive class. The labels in ``y_pred`` are assumed to be\nordered alphabetically, as done by\n:class:`preprocessing.LabelBinarizer`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of float" + }, + { + "kind": "NamedType", + "name": "shape = (n_samples, n_classes) or (n_samples,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/log_loss/eps", + "name": "eps", + "qname": "sklearn.metrics._classification.log_loss.eps", + "default_value": "1e-15", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-15", + "description": "Log loss is undefined for p=0 or p=1, so probabilities are\nclipped to max(eps, min(1 - eps, p))." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/log_loss/normalize", + "name": "normalize", + "qname": "sklearn.metrics._classification.log_loss.normalize", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If true, return the mean loss per sample.\nOtherwise, return the sum of the per-sample losses." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/log_loss/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._classification.log_loss.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/log_loss/labels", + "name": "labels", + "qname": "sklearn.metrics._classification.log_loss.labels", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "None", + "description": "If not provided, labels will be inferred from y_true. If ``labels``\nis ``None`` and ``y_pred`` has shape (n_samples,) the labels are\nassumed to be binary and are inferred from ``y_true``.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Log loss, aka logistic loss or cross-entropy loss.\n\nThis is the loss function used in (multinomial) logistic regression\nand extensions of it such as neural networks, defined as the negative\nlog-likelihood of a logistic model that returns ``y_pred`` probabilities\nfor its training data ``y_true``.\nThe log loss is only defined for two or more labels.\nFor a single sample with true label :math:`y \\in \\{0,1\\}` and\nand a probability estimate :math:`p = \\operatorname{Pr}(y = 1)`, the log\nloss is:\n\n.. math::\n L_{\\log}(y, p) = -(y \\log (p) + (1 - y) \\log (1 - p))\n\nRead more in the :ref:`User Guide `.", + "docstring": "Log loss, aka logistic loss or cross-entropy loss.\n\nThis is the loss function used in (multinomial) logistic regression\nand extensions of it such as neural networks, defined as the negative\nlog-likelihood of a logistic model that returns ``y_pred`` probabilities\nfor its training data ``y_true``.\nThe log loss is only defined for two or more labels.\nFor a single sample with true label :math:`y \\in \\{0,1\\}` and\nand a probability estimate :math:`p = \\operatorname{Pr}(y = 1)`, the log\nloss is:\n\n.. math::\n L_{\\log}(y, p) = -(y \\log (p) + (1 - y) \\log (1 - p))\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like or label indicator matrix\n Ground truth (correct) labels for n_samples samples.\n\ny_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,)\n Predicted probabilities, as returned by a classifier's\n predict_proba method. If ``y_pred.shape = (n_samples,)``\n the probabilities provided are assumed to be that of the\n positive class. The labels in ``y_pred`` are assumed to be\n ordered alphabetically, as done by\n :class:`preprocessing.LabelBinarizer`.\n\neps : float, default=1e-15\n Log loss is undefined for p=0 or p=1, so probabilities are\n clipped to max(eps, min(1 - eps, p)).\n\nnormalize : bool, default=True\n If true, return the mean loss per sample.\n Otherwise, return the sum of the per-sample losses.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nlabels : array-like, default=None\n If not provided, labels will be inferred from y_true. If ``labels``\n is ``None`` and ``y_pred`` has shape (n_samples,) the labels are\n assumed to be binary and are inferred from ``y_true``.\n\n .. versionadded:: 0.18\n\nReturns\n-------\nloss : float\n\nNotes\n-----\nThe logarithm used is the natural logarithm (base-e).\n\nExamples\n--------\n>>> from sklearn.metrics import log_loss\n>>> log_loss([\"spam\", \"ham\", \"ham\", \"spam\"],\n... [[.1, .9], [.9, .1], [.8, .2], [.35, .65]])\n0.21616...\n\nReferences\n----------\nC.M. Bishop (2006). Pattern Recognition and Machine Learning. Springer,\np. 209.", + "code": "@_deprecate_positional_args\ndef log_loss(y_true, y_pred, *, eps=1e-15, normalize=True, sample_weight=None,\n labels=None):\n r\"\"\"Log loss, aka logistic loss or cross-entropy loss.\n\n This is the loss function used in (multinomial) logistic regression\n and extensions of it such as neural networks, defined as the negative\n log-likelihood of a logistic model that returns ``y_pred`` probabilities\n for its training data ``y_true``.\n The log loss is only defined for two or more labels.\n For a single sample with true label :math:`y \\in \\{0,1\\}` and\n and a probability estimate :math:`p = \\operatorname{Pr}(y = 1)`, the log\n loss is:\n\n .. math::\n L_{\\log}(y, p) = -(y \\log (p) + (1 - y) \\log (1 - p))\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : array-like or label indicator matrix\n Ground truth (correct) labels for n_samples samples.\n\n y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,)\n Predicted probabilities, as returned by a classifier's\n predict_proba method. If ``y_pred.shape = (n_samples,)``\n the probabilities provided are assumed to be that of the\n positive class. The labels in ``y_pred`` are assumed to be\n ordered alphabetically, as done by\n :class:`preprocessing.LabelBinarizer`.\n\n eps : float, default=1e-15\n Log loss is undefined for p=0 or p=1, so probabilities are\n clipped to max(eps, min(1 - eps, p)).\n\n normalize : bool, default=True\n If true, return the mean loss per sample.\n Otherwise, return the sum of the per-sample losses.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n labels : array-like, default=None\n If not provided, labels will be inferred from y_true. If ``labels``\n is ``None`` and ``y_pred`` has shape (n_samples,) the labels are\n assumed to be binary and are inferred from ``y_true``.\n\n .. versionadded:: 0.18\n\n Returns\n -------\n loss : float\n\n Notes\n -----\n The logarithm used is the natural logarithm (base-e).\n\n Examples\n --------\n >>> from sklearn.metrics import log_loss\n >>> log_loss([\"spam\", \"ham\", \"ham\", \"spam\"],\n ... [[.1, .9], [.9, .1], [.8, .2], [.35, .65]])\n 0.21616...\n\n References\n ----------\n C.M. Bishop (2006). Pattern Recognition and Machine Learning. Springer,\n p. 209.\n \"\"\"\n y_pred = check_array(y_pred, ensure_2d=False)\n check_consistent_length(y_pred, y_true, sample_weight)\n\n lb = LabelBinarizer()\n\n if labels is not None:\n lb.fit(labels)\n else:\n lb.fit(y_true)\n\n if len(lb.classes_) == 1:\n if labels is None:\n raise ValueError('y_true contains only one label ({0}). Please '\n 'provide the true labels explicitly through the '\n 'labels argument.'.format(lb.classes_[0]))\n else:\n raise ValueError('The labels array needs to contain at least two '\n 'labels for log_loss, '\n 'got {0}.'.format(lb.classes_))\n\n transformed_labels = lb.transform(y_true)\n\n if transformed_labels.shape[1] == 1:\n transformed_labels = np.append(1 - transformed_labels,\n transformed_labels, axis=1)\n\n # Clipping\n y_pred = np.clip(y_pred, eps, 1 - eps)\n\n # If y_pred is of single dimension, assume y_true to be binary\n # and then check.\n if y_pred.ndim == 1:\n y_pred = y_pred[:, np.newaxis]\n if y_pred.shape[1] == 1:\n y_pred = np.append(1 - y_pred, y_pred, axis=1)\n\n # Check if dimensions are consistent.\n transformed_labels = check_array(transformed_labels)\n if len(lb.classes_) != y_pred.shape[1]:\n if labels is None:\n raise ValueError(\"y_true and y_pred contain different number of \"\n \"classes {0}, {1}. Please provide the true \"\n \"labels explicitly through the labels argument. \"\n \"Classes found in \"\n \"y_true: {2}\".format(transformed_labels.shape[1],\n y_pred.shape[1],\n lb.classes_))\n else:\n raise ValueError('The number of classes in labels is different '\n 'from that in y_pred. Classes found in '\n 'labels: {0}'.format(lb.classes_))\n\n # Renormalize\n y_pred /= y_pred.sum(axis=1)[:, np.newaxis]\n loss = -(transformed_labels * np.log(y_pred)).sum(axis=1)\n\n return _weighted_sum(loss, sample_weight, normalize)" + }, + { + "id": "scikit-learn/sklearn.metrics._classification/matthews_corrcoef", + "name": "matthews_corrcoef", + "qname": "sklearn.metrics._classification.matthews_corrcoef", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._classification/matthews_corrcoef/y_true", + "name": "y_true", + "qname": "sklearn.metrics._classification.matthews_corrcoef.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array, shape = [n_samples]", + "default_value": "", + "description": "Ground truth (correct) target values." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array" + }, + { + "kind": "NamedType", + "name": "shape = [n_samples]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/matthews_corrcoef/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._classification.matthews_corrcoef.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array, shape = [n_samples]", + "default_value": "", + "description": "Estimated targets as returned by a classifier." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array" + }, + { + "kind": "NamedType", + "name": "shape = [n_samples]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/matthews_corrcoef/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._classification.matthews_corrcoef.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the Matthews correlation coefficient (MCC).\n\nThe Matthews correlation coefficient is used in machine learning as a\nmeasure of the quality of binary and multiclass classifications. It takes\ninto account true and false positives and negatives and is generally\nregarded as a balanced measure which can be used even if the classes are of\nvery different sizes. The MCC is in essence a correlation coefficient value\nbetween -1 and +1. A coefficient of +1 represents a perfect prediction, 0\nan average random prediction and -1 an inverse prediction. The statistic\nis also known as the phi coefficient. [source: Wikipedia]\n\nBinary and multiclass labels are supported. Only in the binary case does\nthis relate to information about true and false positives and negatives.\nSee references below.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute the Matthews correlation coefficient (MCC).\n\nThe Matthews correlation coefficient is used in machine learning as a\nmeasure of the quality of binary and multiclass classifications. It takes\ninto account true and false positives and negatives and is generally\nregarded as a balanced measure which can be used even if the classes are of\nvery different sizes. The MCC is in essence a correlation coefficient value\nbetween -1 and +1. A coefficient of +1 represents a perfect prediction, 0\nan average random prediction and -1 an inverse prediction. The statistic\nis also known as the phi coefficient. [source: Wikipedia]\n\nBinary and multiclass labels are supported. Only in the binary case does\nthis relate to information about true and false positives and negatives.\nSee references below.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array, shape = [n_samples]\n Ground truth (correct) target values.\n\ny_pred : array, shape = [n_samples]\n Estimated targets as returned by a classifier.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n .. versionadded:: 0.18\n\nReturns\n-------\nmcc : float\n The Matthews correlation coefficient (+1 represents a perfect\n prediction, 0 an average random prediction and -1 and inverse\n prediction).\n\nReferences\n----------\n.. [1] `Baldi, Brunak, Chauvin, Andersen and Nielsen, (2000). Assessing the\n accuracy of prediction algorithms for classification: an overview\n `_.\n\n.. [2] `Wikipedia entry for the Matthews Correlation Coefficient\n `_.\n\n.. [3] `Gorodkin, (2004). Comparing two K-category assignments by a\n K-category correlation coefficient\n `_.\n\n.. [4] `Jurman, Riccadonna, Furlanello, (2012). A Comparison of MCC and CEN\n Error Measures in MultiClass Prediction\n `_.\n\nExamples\n--------\n>>> from sklearn.metrics import matthews_corrcoef\n>>> y_true = [+1, +1, +1, -1]\n>>> y_pred = [+1, -1, +1, +1]\n>>> matthews_corrcoef(y_true, y_pred)\n-0.33...", + "code": "@_deprecate_positional_args\ndef matthews_corrcoef(y_true, y_pred, *, sample_weight=None):\n \"\"\"Compute the Matthews correlation coefficient (MCC).\n\n The Matthews correlation coefficient is used in machine learning as a\n measure of the quality of binary and multiclass classifications. It takes\n into account true and false positives and negatives and is generally\n regarded as a balanced measure which can be used even if the classes are of\n very different sizes. The MCC is in essence a correlation coefficient value\n between -1 and +1. A coefficient of +1 represents a perfect prediction, 0\n an average random prediction and -1 an inverse prediction. The statistic\n is also known as the phi coefficient. [source: Wikipedia]\n\n Binary and multiclass labels are supported. Only in the binary case does\n this relate to information about true and false positives and negatives.\n See references below.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : array, shape = [n_samples]\n Ground truth (correct) target values.\n\n y_pred : array, shape = [n_samples]\n Estimated targets as returned by a classifier.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n .. versionadded:: 0.18\n\n Returns\n -------\n mcc : float\n The Matthews correlation coefficient (+1 represents a perfect\n prediction, 0 an average random prediction and -1 and inverse\n prediction).\n\n References\n ----------\n .. [1] `Baldi, Brunak, Chauvin, Andersen and Nielsen, (2000). Assessing the\n accuracy of prediction algorithms for classification: an overview\n `_.\n\n .. [2] `Wikipedia entry for the Matthews Correlation Coefficient\n `_.\n\n .. [3] `Gorodkin, (2004). Comparing two K-category assignments by a\n K-category correlation coefficient\n `_.\n\n .. [4] `Jurman, Riccadonna, Furlanello, (2012). A Comparison of MCC and CEN\n Error Measures in MultiClass Prediction\n `_.\n\n Examples\n --------\n >>> from sklearn.metrics import matthews_corrcoef\n >>> y_true = [+1, +1, +1, -1]\n >>> y_pred = [+1, -1, +1, +1]\n >>> matthews_corrcoef(y_true, y_pred)\n -0.33...\n \"\"\"\n y_type, y_true, y_pred = _check_targets(y_true, y_pred)\n check_consistent_length(y_true, y_pred, sample_weight)\n if y_type not in {\"binary\", \"multiclass\"}:\n raise ValueError(\"%s is not supported\" % y_type)\n\n lb = LabelEncoder()\n lb.fit(np.hstack([y_true, y_pred]))\n y_true = lb.transform(y_true)\n y_pred = lb.transform(y_pred)\n\n C = confusion_matrix(y_true, y_pred, sample_weight=sample_weight)\n t_sum = C.sum(axis=1, dtype=np.float64)\n p_sum = C.sum(axis=0, dtype=np.float64)\n n_correct = np.trace(C, dtype=np.float64)\n n_samples = p_sum.sum()\n cov_ytyp = n_correct * n_samples - np.dot(t_sum, p_sum)\n cov_ypyp = n_samples ** 2 - np.dot(p_sum, p_sum)\n cov_ytyt = n_samples ** 2 - np.dot(t_sum, t_sum)\n mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)\n\n if np.isnan(mcc):\n return 0.\n else:\n return mcc" + }, + { + "id": "scikit-learn/sklearn.metrics._classification/multilabel_confusion_matrix", + "name": "multilabel_confusion_matrix", + "qname": "sklearn.metrics._classification.multilabel_confusion_matrix", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._classification/multilabel_confusion_matrix/y_true", + "name": "y_true", + "qname": "sklearn.metrics._classification.multilabel_confusion_matrix.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_outputs) or (n_samples,)", + "default_value": "", + "description": "Ground truth (correct) target values." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_outputs) or (n_samples,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/multilabel_confusion_matrix/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._classification.multilabel_confusion_matrix.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_outputs) or (n_samples,)", + "default_value": "", + "description": "Estimated targets as returned by a classifier." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_outputs) or (n_samples,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/multilabel_confusion_matrix/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._classification.multilabel_confusion_matrix.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/multilabel_confusion_matrix/labels", + "name": "labels", + "qname": "sklearn.metrics._classification.multilabel_confusion_matrix.labels", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_classes,)", + "default_value": "None", + "description": "A list of classes or column indices to select some (or to force\ninclusion of classes absent from the data)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_classes,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/multilabel_confusion_matrix/samplewise", + "name": "samplewise", + "qname": "sklearn.metrics._classification.multilabel_confusion_matrix.samplewise", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "In the multilabel case, this calculates a confusion matrix per sample." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute a confusion matrix for each class or sample.\n\n.. versionadded:: 0.21\n\nCompute class-wise (default) or sample-wise (samplewise=True) multilabel\nconfusion matrix to evaluate the accuracy of a classification, and output\nconfusion matrices for each class or sample.\n\nIn multilabel confusion matrix :math:`MCM`, the count of true negatives\nis :math:`MCM_{:,0,0}`, false negatives is :math:`MCM_{:,1,0}`,\ntrue positives is :math:`MCM_{:,1,1}` and false positives is\n:math:`MCM_{:,0,1}`.\n\nMulticlass data will be treated as if binarized under a one-vs-rest\ntransformation. Returned confusion matrices will be in the order of\nsorted unique labels in the union of (y_true, y_pred).\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute a confusion matrix for each class or sample.\n\n.. versionadded:: 0.21\n\nCompute class-wise (default) or sample-wise (samplewise=True) multilabel\nconfusion matrix to evaluate the accuracy of a classification, and output\nconfusion matrices for each class or sample.\n\nIn multilabel confusion matrix :math:`MCM`, the count of true negatives\nis :math:`MCM_{:,0,0}`, false negatives is :math:`MCM_{:,1,0}`,\ntrue positives is :math:`MCM_{:,1,1}` and false positives is\n:math:`MCM_{:,0,1}`.\n\nMulticlass data will be treated as if binarized under a one-vs-rest\ntransformation. Returned confusion matrices will be in the order of\nsorted unique labels in the union of (y_true, y_pred).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : {array-like, sparse matrix} of shape (n_samples, n_outputs) or (n_samples,)\n Ground truth (correct) target values.\n\ny_pred : {array-like, sparse matrix} of shape (n_samples, n_outputs) or (n_samples,)\n Estimated targets as returned by a classifier.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nlabels : array-like of shape (n_classes,), default=None\n A list of classes or column indices to select some (or to force\n inclusion of classes absent from the data).\n\nsamplewise : bool, default=False\n In the multilabel case, this calculates a confusion matrix per sample.\n\nReturns\n-------\nmulti_confusion : ndarray of shape (n_outputs, 2, 2)\n A 2x2 confusion matrix corresponding to each output in the input.\n When calculating class-wise multi_confusion (default), then\n n_outputs = n_labels; when calculating sample-wise multi_confusion\n (samplewise=True), n_outputs = n_samples. If ``labels`` is defined,\n the results will be returned in the order specified in ``labels``,\n otherwise the results will be returned in sorted order by default.\n\nSee Also\n--------\nconfusion_matrix\n\nNotes\n-----\nThe multilabel_confusion_matrix calculates class-wise or sample-wise\nmultilabel confusion matrices, and in multiclass tasks, labels are\nbinarized under a one-vs-rest way; while confusion_matrix calculates\none confusion matrix for confusion between every two classes.\n\nExamples\n--------\nMultilabel-indicator case:\n\n>>> import numpy as np\n>>> from sklearn.metrics import multilabel_confusion_matrix\n>>> y_true = np.array([[1, 0, 1],\n... [0, 1, 0]])\n>>> y_pred = np.array([[1, 0, 0],\n... [0, 1, 1]])\n>>> multilabel_confusion_matrix(y_true, y_pred)\narray([[[1, 0],\n [0, 1]],\n\n [[1, 0],\n [0, 1]],\n\n [[0, 1],\n [1, 0]]])\n\nMulticlass case:\n\n>>> y_true = [\"cat\", \"ant\", \"cat\", \"cat\", \"ant\", \"bird\"]\n>>> y_pred = [\"ant\", \"ant\", \"cat\", \"cat\", \"ant\", \"cat\"]\n>>> multilabel_confusion_matrix(y_true, y_pred,\n... labels=[\"ant\", \"bird\", \"cat\"])\narray([[[3, 1],\n [0, 2]],\n\n [[5, 0],\n [1, 0]],\n\n [[2, 1],\n [1, 2]]])", + "code": "@_deprecate_positional_args\ndef multilabel_confusion_matrix(y_true, y_pred, *, sample_weight=None,\n labels=None, samplewise=False):\n \"\"\"Compute a confusion matrix for each class or sample.\n\n .. versionadded:: 0.21\n\n Compute class-wise (default) or sample-wise (samplewise=True) multilabel\n confusion matrix to evaluate the accuracy of a classification, and output\n confusion matrices for each class or sample.\n\n In multilabel confusion matrix :math:`MCM`, the count of true negatives\n is :math:`MCM_{:,0,0}`, false negatives is :math:`MCM_{:,1,0}`,\n true positives is :math:`MCM_{:,1,1}` and false positives is\n :math:`MCM_{:,0,1}`.\n\n Multiclass data will be treated as if binarized under a one-vs-rest\n transformation. Returned confusion matrices will be in the order of\n sorted unique labels in the union of (y_true, y_pred).\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : {array-like, sparse matrix} of shape (n_samples, n_outputs) or \\\n (n_samples,)\n Ground truth (correct) target values.\n\n y_pred : {array-like, sparse matrix} of shape (n_samples, n_outputs) or \\\n (n_samples,)\n Estimated targets as returned by a classifier.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n labels : array-like of shape (n_classes,), default=None\n A list of classes or column indices to select some (or to force\n inclusion of classes absent from the data).\n\n samplewise : bool, default=False\n In the multilabel case, this calculates a confusion matrix per sample.\n\n Returns\n -------\n multi_confusion : ndarray of shape (n_outputs, 2, 2)\n A 2x2 confusion matrix corresponding to each output in the input.\n When calculating class-wise multi_confusion (default), then\n n_outputs = n_labels; when calculating sample-wise multi_confusion\n (samplewise=True), n_outputs = n_samples. If ``labels`` is defined,\n the results will be returned in the order specified in ``labels``,\n otherwise the results will be returned in sorted order by default.\n\n See Also\n --------\n confusion_matrix\n\n Notes\n -----\n The multilabel_confusion_matrix calculates class-wise or sample-wise\n multilabel confusion matrices, and in multiclass tasks, labels are\n binarized under a one-vs-rest way; while confusion_matrix calculates\n one confusion matrix for confusion between every two classes.\n\n Examples\n --------\n Multilabel-indicator case:\n\n >>> import numpy as np\n >>> from sklearn.metrics import multilabel_confusion_matrix\n >>> y_true = np.array([[1, 0, 1],\n ... [0, 1, 0]])\n >>> y_pred = np.array([[1, 0, 0],\n ... [0, 1, 1]])\n >>> multilabel_confusion_matrix(y_true, y_pred)\n array([[[1, 0],\n [0, 1]],\n \n [[1, 0],\n [0, 1]],\n \n [[0, 1],\n [1, 0]]])\n\n Multiclass case:\n\n >>> y_true = [\"cat\", \"ant\", \"cat\", \"cat\", \"ant\", \"bird\"]\n >>> y_pred = [\"ant\", \"ant\", \"cat\", \"cat\", \"ant\", \"cat\"]\n >>> multilabel_confusion_matrix(y_true, y_pred,\n ... labels=[\"ant\", \"bird\", \"cat\"])\n array([[[3, 1],\n [0, 2]],\n \n [[5, 0],\n [1, 0]],\n \n [[2, 1],\n [1, 2]]])\n \"\"\"\n y_type, y_true, y_pred = _check_targets(y_true, y_pred)\n if sample_weight is not None:\n sample_weight = column_or_1d(sample_weight)\n check_consistent_length(y_true, y_pred, sample_weight)\n\n if y_type not in (\"binary\", \"multiclass\", \"multilabel-indicator\"):\n raise ValueError(\"%s is not supported\" % y_type)\n\n present_labels = unique_labels(y_true, y_pred)\n if labels is None:\n labels = present_labels\n n_labels = None\n else:\n n_labels = len(labels)\n labels = np.hstack([labels, np.setdiff1d(present_labels, labels,\n assume_unique=True)])\n\n if y_true.ndim == 1:\n if samplewise:\n raise ValueError(\"Samplewise metrics are not available outside of \"\n \"multilabel classification.\")\n\n le = LabelEncoder()\n le.fit(labels)\n y_true = le.transform(y_true)\n y_pred = le.transform(y_pred)\n sorted_labels = le.classes_\n\n # labels are now from 0 to len(labels) - 1 -> use bincount\n tp = y_true == y_pred\n tp_bins = y_true[tp]\n if sample_weight is not None:\n tp_bins_weights = np.asarray(sample_weight)[tp]\n else:\n tp_bins_weights = None\n\n if len(tp_bins):\n tp_sum = np.bincount(tp_bins, weights=tp_bins_weights,\n minlength=len(labels))\n else:\n # Pathological case\n true_sum = pred_sum = tp_sum = np.zeros(len(labels))\n if len(y_pred):\n pred_sum = np.bincount(y_pred, weights=sample_weight,\n minlength=len(labels))\n if len(y_true):\n true_sum = np.bincount(y_true, weights=sample_weight,\n minlength=len(labels))\n\n # Retain only selected labels\n indices = np.searchsorted(sorted_labels, labels[:n_labels])\n tp_sum = tp_sum[indices]\n true_sum = true_sum[indices]\n pred_sum = pred_sum[indices]\n\n else:\n sum_axis = 1 if samplewise else 0\n\n # All labels are index integers for multilabel.\n # Select labels:\n if not np.array_equal(labels, present_labels):\n if np.max(labels) > np.max(present_labels):\n raise ValueError('All labels must be in [0, n labels) for '\n 'multilabel targets. '\n 'Got %d > %d' %\n (np.max(labels), np.max(present_labels)))\n if np.min(labels) < 0:\n raise ValueError('All labels must be in [0, n labels) for '\n 'multilabel targets. '\n 'Got %d < 0' % np.min(labels))\n\n if n_labels is not None:\n y_true = y_true[:, labels[:n_labels]]\n y_pred = y_pred[:, labels[:n_labels]]\n\n # calculate weighted counts\n true_and_pred = y_true.multiply(y_pred)\n tp_sum = count_nonzero(true_and_pred, axis=sum_axis,\n sample_weight=sample_weight)\n pred_sum = count_nonzero(y_pred, axis=sum_axis,\n sample_weight=sample_weight)\n true_sum = count_nonzero(y_true, axis=sum_axis,\n sample_weight=sample_weight)\n\n fp = pred_sum - tp_sum\n fn = true_sum - tp_sum\n tp = tp_sum\n\n if sample_weight is not None and samplewise:\n sample_weight = np.array(sample_weight)\n tp = np.array(tp)\n fp = np.array(fp)\n fn = np.array(fn)\n tn = sample_weight * y_true.shape[1] - tp - fp - fn\n elif sample_weight is not None:\n tn = sum(sample_weight) - tp - fp - fn\n elif samplewise:\n tn = y_true.shape[1] - tp - fp - fn\n else:\n tn = y_true.shape[0] - tp - fp - fn\n\n return np.array([tn, fp, fn, tp]).T.reshape(-1, 2, 2)" + }, + { + "id": "scikit-learn/sklearn.metrics._classification/precision_recall_fscore_support", + "name": "precision_recall_fscore_support", + "qname": "sklearn.metrics._classification.precision_recall_fscore_support", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._classification/precision_recall_fscore_support/y_true", + "name": "y_true", + "qname": "sklearn.metrics._classification.precision_recall_fscore_support.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "1d array-like, or label indicator array / sparse matrix", + "default_value": "", + "description": "Ground truth (correct) target values." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "1d array-like" + }, + { + "kind": "NamedType", + "name": "label indicator array / sparse matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/precision_recall_fscore_support/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._classification.precision_recall_fscore_support.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "1d array-like, or label indicator array / sparse matrix", + "default_value": "", + "description": "Estimated targets as returned by a classifier." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "1d array-like" + }, + { + "kind": "NamedType", + "name": "label indicator array / sparse matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/precision_recall_fscore_support/beta", + "name": "beta", + "qname": "sklearn.metrics._classification.precision_recall_fscore_support.beta", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "The strength of recall versus precision in the F-score." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/precision_recall_fscore_support/labels", + "name": "labels", + "qname": "sklearn.metrics._classification.precision_recall_fscore_support.labels", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "None", + "description": "The set of labels to include when ``average != 'binary'``, and their\norder if ``average is None``. Labels present in the data can be\nexcluded, for example to calculate a multiclass average ignoring a\nmajority negative class, while labels not present in the data will\nresult in 0 components in a macro average. For multilabel targets,\nlabels are column indices. By default, all labels in ``y_true`` and\n``y_pred`` are used in sorted order." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/precision_recall_fscore_support/pos_label", + "name": "pos_label", + "qname": "sklearn.metrics._classification.precision_recall_fscore_support.pos_label", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or int", + "default_value": "1", + "description": "The class to report if ``average='binary'`` and the data is binary.\nIf the data are multiclass or multilabel, this will be ignored;\nsetting ``labels=[pos_label]`` and ``average != 'binary'`` will report\nscores for that label only." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/precision_recall_fscore_support/average", + "name": "average", + "qname": "sklearn.metrics._classification.precision_recall_fscore_support.average", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'binary', 'micro', 'macro', 'samples','weighted'}", + "default_value": "None", + "description": "If ``None``, the scores for each class are returned. Otherwise, this\ndetermines the type of averaging performed on the data:\n\n``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`)." + }, + "type": { + "kind": "EnumType", + "values": ["micro", "weighted", "macro", "binary", "samples"] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/precision_recall_fscore_support/warn_for", + "name": "warn_for", + "qname": "sklearn.metrics._classification.precision_recall_fscore_support.warn_for", + "default_value": "('precision', 'recall', 'f-score')", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "tuple or set, for internal use", + "default_value": "", + "description": "This determines which warnings will be made in the case that this\nfunction is being used to return only one of its metrics." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "tuple" + }, + { + "kind": "NamedType", + "name": "set" + }, + { + "kind": "NamedType", + "name": "for internal use" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/precision_recall_fscore_support/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._classification.precision_recall_fscore_support.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/precision_recall_fscore_support/zero_division", + "name": "zero_division", + "qname": "sklearn.metrics._classification.precision_recall_fscore_support.zero_division", + "default_value": "'warn'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "\"warn\", 0 or 1", + "default_value": "\"warn\"", + "description": "Sets the value to return when there is a zero division:\n - recall: when there are no positive labels\n - precision: when there are no positive predictions\n - f-score: both\n\nIf set to \"warn\", this acts as 0, but warnings are also raised." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "\"warn\"" + }, + { + "kind": "NamedType", + "name": "0" + }, + { + "kind": "NamedType", + "name": "1" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute precision, recall, F-measure and support for each class.\n\nThe precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of\ntrue positives and ``fp`` the number of false positives. The precision is\nintuitively the ability of the classifier not to label as positive a sample\nthat is negative.\n\nThe recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of\ntrue positives and ``fn`` the number of false negatives. The recall is\nintuitively the ability of the classifier to find all the positive samples.\n\nThe F-beta score can be interpreted as a weighted harmonic mean of\nthe precision and recall, where an F-beta score reaches its best\nvalue at 1 and worst score at 0.\n\nThe F-beta score weights recall more than precision by a factor of\n``beta``. ``beta == 1.0`` means recall and precision are equally important.\n\nThe support is the number of occurrences of each class in ``y_true``.\n\nIf ``pos_label is None`` and in binary classification, this function\nreturns the average precision, recall and F-measure if ``average``\nis one of ``'micro'``, ``'macro'``, ``'weighted'`` or ``'samples'``.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute precision, recall, F-measure and support for each class.\n\nThe precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of\ntrue positives and ``fp`` the number of false positives. The precision is\nintuitively the ability of the classifier not to label as positive a sample\nthat is negative.\n\nThe recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of\ntrue positives and ``fn`` the number of false negatives. The recall is\nintuitively the ability of the classifier to find all the positive samples.\n\nThe F-beta score can be interpreted as a weighted harmonic mean of\nthe precision and recall, where an F-beta score reaches its best\nvalue at 1 and worst score at 0.\n\nThe F-beta score weights recall more than precision by a factor of\n``beta``. ``beta == 1.0`` means recall and precision are equally important.\n\nThe support is the number of occurrences of each class in ``y_true``.\n\nIf ``pos_label is None`` and in binary classification, this function\nreturns the average precision, recall and F-measure if ``average``\nis one of ``'micro'``, ``'macro'``, ``'weighted'`` or ``'samples'``.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) target values.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Estimated targets as returned by a classifier.\n\nbeta : float, default=1.0\n The strength of recall versus precision in the F-score.\n\nlabels : array-like, default=None\n The set of labels to include when ``average != 'binary'``, and their\n order if ``average is None``. Labels present in the data can be\n excluded, for example to calculate a multiclass average ignoring a\n majority negative class, while labels not present in the data will\n result in 0 components in a macro average. For multilabel targets,\n labels are column indices. By default, all labels in ``y_true`` and\n ``y_pred`` are used in sorted order.\n\npos_label : str or int, default=1\n The class to report if ``average='binary'`` and the data is binary.\n If the data are multiclass or multilabel, this will be ignored;\n setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n scores for that label only.\n\naverage : {'binary', 'micro', 'macro', 'samples','weighted'}, default=None\n If ``None``, the scores for each class are returned. Otherwise, this\n determines the type of averaging performed on the data:\n\n ``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n ``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n ``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`).\n\nwarn_for : tuple or set, for internal use\n This determines which warnings will be made in the case that this\n function is being used to return only one of its metrics.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nzero_division : \"warn\", 0 or 1, default=\"warn\"\n Sets the value to return when there is a zero division:\n - recall: when there are no positive labels\n - precision: when there are no positive predictions\n - f-score: both\n\n If set to \"warn\", this acts as 0, but warnings are also raised.\n\nReturns\n-------\nprecision : float (if average is not None) or array of float, shape = [n_unique_labels]\n\nrecall : float (if average is not None) or array of float, , shape = [n_unique_labels]\n\nfbeta_score : float (if average is not None) or array of float, shape = [n_unique_labels]\n\nsupport : None (if average is not None) or array of int, shape = [n_unique_labels]\n The number of occurrences of each label in ``y_true``.\n\nNotes\n-----\nWhen ``true positive + false positive == 0``, precision is undefined.\nWhen ``true positive + false negative == 0``, recall is undefined.\nIn such cases, by default the metric will be set to 0, as will f-score,\nand ``UndefinedMetricWarning`` will be raised. This behavior can be\nmodified with ``zero_division``.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Precision and recall\n `_.\n\n.. [2] `Wikipedia entry for the F1-score\n `_.\n\n.. [3] `Discriminative Methods for Multi-labeled Classification Advances\n in Knowledge Discovery and Data Mining (2004), pp. 22-30 by Shantanu\n Godbole, Sunita Sarawagi\n `_.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import precision_recall_fscore_support\n>>> y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig'])\n>>> y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog'])\n>>> precision_recall_fscore_support(y_true, y_pred, average='macro')\n(0.22..., 0.33..., 0.26..., None)\n>>> precision_recall_fscore_support(y_true, y_pred, average='micro')\n(0.33..., 0.33..., 0.33..., None)\n>>> precision_recall_fscore_support(y_true, y_pred, average='weighted')\n(0.22..., 0.33..., 0.26..., None)\n\nIt is possible to compute per-label precisions, recalls, F1-scores and\nsupports instead of averaging:\n\n>>> precision_recall_fscore_support(y_true, y_pred, average=None,\n... labels=['pig', 'dog', 'cat'])\n(array([0. , 0. , 0.66...]),\n array([0., 0., 1.]), array([0. , 0. , 0.8]),\n array([2, 2, 2]))", + "code": "@_deprecate_positional_args\ndef precision_recall_fscore_support(y_true, y_pred, *, beta=1.0, labels=None,\n pos_label=1, average=None,\n warn_for=('precision', 'recall',\n 'f-score'),\n sample_weight=None,\n zero_division=\"warn\"):\n \"\"\"Compute precision, recall, F-measure and support for each class.\n\n The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of\n true positives and ``fp`` the number of false positives. The precision is\n intuitively the ability of the classifier not to label as positive a sample\n that is negative.\n\n The recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of\n true positives and ``fn`` the number of false negatives. The recall is\n intuitively the ability of the classifier to find all the positive samples.\n\n The F-beta score can be interpreted as a weighted harmonic mean of\n the precision and recall, where an F-beta score reaches its best\n value at 1 and worst score at 0.\n\n The F-beta score weights recall more than precision by a factor of\n ``beta``. ``beta == 1.0`` means recall and precision are equally important.\n\n The support is the number of occurrences of each class in ``y_true``.\n\n If ``pos_label is None`` and in binary classification, this function\n returns the average precision, recall and F-measure if ``average``\n is one of ``'micro'``, ``'macro'``, ``'weighted'`` or ``'samples'``.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) target values.\n\n y_pred : 1d array-like, or label indicator array / sparse matrix\n Estimated targets as returned by a classifier.\n\n beta : float, default=1.0\n The strength of recall versus precision in the F-score.\n\n labels : array-like, default=None\n The set of labels to include when ``average != 'binary'``, and their\n order if ``average is None``. Labels present in the data can be\n excluded, for example to calculate a multiclass average ignoring a\n majority negative class, while labels not present in the data will\n result in 0 components in a macro average. For multilabel targets,\n labels are column indices. By default, all labels in ``y_true`` and\n ``y_pred`` are used in sorted order.\n\n pos_label : str or int, default=1\n The class to report if ``average='binary'`` and the data is binary.\n If the data are multiclass or multilabel, this will be ignored;\n setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n scores for that label only.\n\n average : {'binary', 'micro', 'macro', 'samples','weighted'}, \\\n default=None\n If ``None``, the scores for each class are returned. Otherwise, this\n determines the type of averaging performed on the data:\n\n ``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n ``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n ``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`).\n\n warn_for : tuple or set, for internal use\n This determines which warnings will be made in the case that this\n function is being used to return only one of its metrics.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n zero_division : \"warn\", 0 or 1, default=\"warn\"\n Sets the value to return when there is a zero division:\n - recall: when there are no positive labels\n - precision: when there are no positive predictions\n - f-score: both\n\n If set to \"warn\", this acts as 0, but warnings are also raised.\n\n Returns\n -------\n precision : float (if average is not None) or array of float, shape =\\\n [n_unique_labels]\n\n recall : float (if average is not None) or array of float, , shape =\\\n [n_unique_labels]\n\n fbeta_score : float (if average is not None) or array of float, shape =\\\n [n_unique_labels]\n\n support : None (if average is not None) or array of int, shape =\\\n [n_unique_labels]\n The number of occurrences of each label in ``y_true``.\n\n Notes\n -----\n When ``true positive + false positive == 0``, precision is undefined.\n When ``true positive + false negative == 0``, recall is undefined.\n In such cases, by default the metric will be set to 0, as will f-score,\n and ``UndefinedMetricWarning`` will be raised. This behavior can be\n modified with ``zero_division``.\n\n References\n ----------\n .. [1] `Wikipedia entry for the Precision and recall\n `_.\n\n .. [2] `Wikipedia entry for the F1-score\n `_.\n\n .. [3] `Discriminative Methods for Multi-labeled Classification Advances\n in Knowledge Discovery and Data Mining (2004), pp. 22-30 by Shantanu\n Godbole, Sunita Sarawagi\n `_.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.metrics import precision_recall_fscore_support\n >>> y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig'])\n >>> y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog'])\n >>> precision_recall_fscore_support(y_true, y_pred, average='macro')\n (0.22..., 0.33..., 0.26..., None)\n >>> precision_recall_fscore_support(y_true, y_pred, average='micro')\n (0.33..., 0.33..., 0.33..., None)\n >>> precision_recall_fscore_support(y_true, y_pred, average='weighted')\n (0.22..., 0.33..., 0.26..., None)\n\n It is possible to compute per-label precisions, recalls, F1-scores and\n supports instead of averaging:\n\n >>> precision_recall_fscore_support(y_true, y_pred, average=None,\n ... labels=['pig', 'dog', 'cat'])\n (array([0. , 0. , 0.66...]),\n array([0., 0., 1.]), array([0. , 0. , 0.8]),\n array([2, 2, 2]))\n \"\"\"\n _check_zero_division(zero_division)\n if beta < 0:\n raise ValueError(\"beta should be >=0 in the F-beta score\")\n labels = _check_set_wise_labels(y_true, y_pred, average, labels,\n pos_label)\n\n # Calculate tp_sum, pred_sum, true_sum ###\n samplewise = average == 'samples'\n MCM = multilabel_confusion_matrix(y_true, y_pred,\n sample_weight=sample_weight,\n labels=labels, samplewise=samplewise)\n tp_sum = MCM[:, 1, 1]\n pred_sum = tp_sum + MCM[:, 0, 1]\n true_sum = tp_sum + MCM[:, 1, 0]\n\n if average == 'micro':\n tp_sum = np.array([tp_sum.sum()])\n pred_sum = np.array([pred_sum.sum()])\n true_sum = np.array([true_sum.sum()])\n\n # Finally, we have all our sufficient statistics. Divide! #\n beta2 = beta ** 2\n\n # Divide, and on zero-division, set scores and/or warn according to\n # zero_division:\n precision = _prf_divide(tp_sum, pred_sum, 'precision',\n 'predicted', average, warn_for, zero_division)\n recall = _prf_divide(tp_sum, true_sum, 'recall',\n 'true', average, warn_for, zero_division)\n\n # warn for f-score only if zero_division is warn, it is in warn_for\n # and BOTH prec and rec are ill-defined\n if zero_division == \"warn\" and (\"f-score\",) == warn_for:\n if (pred_sum[true_sum == 0] == 0).any():\n _warn_prf(\n average, \"true nor predicted\", 'F-score is', len(true_sum)\n )\n\n # if tp == 0 F will be 1 only if all predictions are zero, all labels are\n # zero, and zero_division=1. In all other case, 0\n if np.isposinf(beta):\n f_score = recall\n else:\n denom = beta2 * precision + recall\n\n denom[denom == 0.] = 1 # avoid division by 0\n f_score = (1 + beta2) * precision * recall / denom\n\n # Average the results\n if average == 'weighted':\n weights = true_sum\n if weights.sum() == 0:\n zero_division_value = np.float64(1.0)\n if zero_division in [\"warn\", 0]:\n zero_division_value = np.float64(0.0)\n # precision is zero_division if there are no positive predictions\n # recall is zero_division if there are no positive labels\n # fscore is zero_division if all labels AND predictions are\n # negative\n if pred_sum.sum() == 0:\n return (zero_division_value,\n zero_division_value,\n zero_division_value,\n None)\n else:\n return (np.float64(0.0),\n zero_division_value,\n np.float64(0.0),\n None)\n\n elif average == 'samples':\n weights = sample_weight\n else:\n weights = None\n\n if average is not None:\n assert average != 'binary' or len(precision) == 1\n precision = np.average(precision, weights=weights)\n recall = np.average(recall, weights=weights)\n f_score = np.average(f_score, weights=weights)\n true_sum = None # return no support\n\n return precision, recall, f_score, true_sum" + }, + { + "id": "scikit-learn/sklearn.metrics._classification/precision_score", + "name": "precision_score", + "qname": "sklearn.metrics._classification.precision_score", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._classification/precision_score/y_true", + "name": "y_true", + "qname": "sklearn.metrics._classification.precision_score.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "1d array-like, or label indicator array / sparse matrix", + "default_value": "", + "description": "Ground truth (correct) target values." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "1d array-like" + }, + { + "kind": "NamedType", + "name": "label indicator array / sparse matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/precision_score/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._classification.precision_score.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "1d array-like, or label indicator array / sparse matrix", + "default_value": "", + "description": "Estimated targets as returned by a classifier." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "1d array-like" + }, + { + "kind": "NamedType", + "name": "label indicator array / sparse matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/precision_score/labels", + "name": "labels", + "qname": "sklearn.metrics._classification.precision_score.labels", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "None", + "description": "The set of labels to include when ``average != 'binary'``, and their\norder if ``average is None``. Labels present in the data can be\nexcluded, for example to calculate a multiclass average ignoring a\nmajority negative class, while labels not present in the data will\nresult in 0 components in a macro average. For multilabel targets,\nlabels are column indices. By default, all labels in ``y_true`` and\n``y_pred`` are used in sorted order.\n\n.. versionchanged:: 0.17\n Parameter `labels` improved for multiclass problem." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/precision_score/pos_label", + "name": "pos_label", + "qname": "sklearn.metrics._classification.precision_score.pos_label", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or int", + "default_value": "1", + "description": "The class to report if ``average='binary'`` and the data is binary.\nIf the data are multiclass or multilabel, this will be ignored;\nsetting ``labels=[pos_label]`` and ``average != 'binary'`` will report\nscores for that label only." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/precision_score/average", + "name": "average", + "qname": "sklearn.metrics._classification.precision_score.average", + "default_value": "'binary'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'micro', 'macro', 'samples', 'weighted', 'binary'} default='binary'", + "default_value": "", + "description": "This parameter is required for multiclass/multilabel targets.\nIf ``None``, the scores for each class are returned. Otherwise, this\ndetermines the type of averaging performed on the data:\n\n``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`)." + }, + "type": { + "kind": "EnumType", + "values": ["micro", "weighted", "macro", "binary", "samples"] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/precision_score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._classification.precision_score.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/precision_score/zero_division", + "name": "zero_division", + "qname": "sklearn.metrics._classification.precision_score.zero_division", + "default_value": "'warn'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "\"warn\", 0 or 1", + "default_value": "\"warn\"", + "description": "Sets the value to return when there is a zero division. If set to\n\"warn\", this acts as 0, but warnings are also raised." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "\"warn\"" + }, + { + "kind": "NamedType", + "name": "0" + }, + { + "kind": "NamedType", + "name": "1" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the precision.\n\nThe precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of\ntrue positives and ``fp`` the number of false positives. The precision is\nintuitively the ability of the classifier not to label as positive a sample\nthat is negative.\n\nThe best value is 1 and the worst value is 0.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute the precision.\n\nThe precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of\ntrue positives and ``fp`` the number of false positives. The precision is\nintuitively the ability of the classifier not to label as positive a sample\nthat is negative.\n\nThe best value is 1 and the worst value is 0.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) target values.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Estimated targets as returned by a classifier.\n\nlabels : array-like, default=None\n The set of labels to include when ``average != 'binary'``, and their\n order if ``average is None``. Labels present in the data can be\n excluded, for example to calculate a multiclass average ignoring a\n majority negative class, while labels not present in the data will\n result in 0 components in a macro average. For multilabel targets,\n labels are column indices. By default, all labels in ``y_true`` and\n ``y_pred`` are used in sorted order.\n\n .. versionchanged:: 0.17\n Parameter `labels` improved for multiclass problem.\n\npos_label : str or int, default=1\n The class to report if ``average='binary'`` and the data is binary.\n If the data are multiclass or multilabel, this will be ignored;\n setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n scores for that label only.\n\naverage : {'micro', 'macro', 'samples', 'weighted', 'binary'} default='binary'\n This parameter is required for multiclass/multilabel targets.\n If ``None``, the scores for each class are returned. Otherwise, this\n determines the type of averaging performed on the data:\n\n ``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n ``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n ``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nzero_division : \"warn\", 0 or 1, default=\"warn\"\n Sets the value to return when there is a zero division. If set to\n \"warn\", this acts as 0, but warnings are also raised.\n\nReturns\n-------\nprecision : float (if average is not None) or array of float of shape\n (n_unique_labels,)\n Precision of the positive class in binary classification or weighted\n average of the precision of each class for the multiclass task.\n\nSee Also\n--------\nprecision_recall_fscore_support, multilabel_confusion_matrix\n\nNotes\n-----\nWhen ``true positive + false positive == 0``, precision returns 0 and\nraises ``UndefinedMetricWarning``. This behavior can be\nmodified with ``zero_division``.\n\nExamples\n--------\n>>> from sklearn.metrics import precision_score\n>>> y_true = [0, 1, 2, 0, 1, 2]\n>>> y_pred = [0, 2, 1, 0, 0, 1]\n>>> precision_score(y_true, y_pred, average='macro')\n0.22...\n>>> precision_score(y_true, y_pred, average='micro')\n0.33...\n>>> precision_score(y_true, y_pred, average='weighted')\n0.22...\n>>> precision_score(y_true, y_pred, average=None)\narray([0.66..., 0. , 0. ])\n>>> y_pred = [0, 0, 0, 0, 0, 0]\n>>> precision_score(y_true, y_pred, average=None)\narray([0.33..., 0. , 0. ])\n>>> precision_score(y_true, y_pred, average=None, zero_division=1)\narray([0.33..., 1. , 1. ])", + "code": "@_deprecate_positional_args\ndef precision_score(y_true, y_pred, *, labels=None, pos_label=1,\n average='binary', sample_weight=None,\n zero_division=\"warn\"):\n \"\"\"Compute the precision.\n\n The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of\n true positives and ``fp`` the number of false positives. The precision is\n intuitively the ability of the classifier not to label as positive a sample\n that is negative.\n\n The best value is 1 and the worst value is 0.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) target values.\n\n y_pred : 1d array-like, or label indicator array / sparse matrix\n Estimated targets as returned by a classifier.\n\n labels : array-like, default=None\n The set of labels to include when ``average != 'binary'``, and their\n order if ``average is None``. Labels present in the data can be\n excluded, for example to calculate a multiclass average ignoring a\n majority negative class, while labels not present in the data will\n result in 0 components in a macro average. For multilabel targets,\n labels are column indices. By default, all labels in ``y_true`` and\n ``y_pred`` are used in sorted order.\n\n .. versionchanged:: 0.17\n Parameter `labels` improved for multiclass problem.\n\n pos_label : str or int, default=1\n The class to report if ``average='binary'`` and the data is binary.\n If the data are multiclass or multilabel, this will be ignored;\n setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n scores for that label only.\n\n average : {'micro', 'macro', 'samples', 'weighted', 'binary'} \\\n default='binary'\n This parameter is required for multiclass/multilabel targets.\n If ``None``, the scores for each class are returned. Otherwise, this\n determines the type of averaging performed on the data:\n\n ``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n ``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n ``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`).\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n zero_division : \"warn\", 0 or 1, default=\"warn\"\n Sets the value to return when there is a zero division. If set to\n \"warn\", this acts as 0, but warnings are also raised.\n\n Returns\n -------\n precision : float (if average is not None) or array of float of shape\n (n_unique_labels,)\n Precision of the positive class in binary classification or weighted\n average of the precision of each class for the multiclass task.\n\n See Also\n --------\n precision_recall_fscore_support, multilabel_confusion_matrix\n\n Notes\n -----\n When ``true positive + false positive == 0``, precision returns 0 and\n raises ``UndefinedMetricWarning``. This behavior can be\n modified with ``zero_division``.\n\n Examples\n --------\n >>> from sklearn.metrics import precision_score\n >>> y_true = [0, 1, 2, 0, 1, 2]\n >>> y_pred = [0, 2, 1, 0, 0, 1]\n >>> precision_score(y_true, y_pred, average='macro')\n 0.22...\n >>> precision_score(y_true, y_pred, average='micro')\n 0.33...\n >>> precision_score(y_true, y_pred, average='weighted')\n 0.22...\n >>> precision_score(y_true, y_pred, average=None)\n array([0.66..., 0. , 0. ])\n >>> y_pred = [0, 0, 0, 0, 0, 0]\n >>> precision_score(y_true, y_pred, average=None)\n array([0.33..., 0. , 0. ])\n >>> precision_score(y_true, y_pred, average=None, zero_division=1)\n array([0.33..., 1. , 1. ])\n\n \"\"\"\n p, _, _, _ = precision_recall_fscore_support(y_true, y_pred,\n labels=labels,\n pos_label=pos_label,\n average=average,\n warn_for=('precision',),\n sample_weight=sample_weight,\n zero_division=zero_division)\n return p" + }, + { + "id": "scikit-learn/sklearn.metrics._classification/recall_score", + "name": "recall_score", + "qname": "sklearn.metrics._classification.recall_score", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._classification/recall_score/y_true", + "name": "y_true", + "qname": "sklearn.metrics._classification.recall_score.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "1d array-like, or label indicator array / sparse matrix", + "default_value": "", + "description": "Ground truth (correct) target values." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "1d array-like" + }, + { + "kind": "NamedType", + "name": "label indicator array / sparse matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/recall_score/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._classification.recall_score.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "1d array-like, or label indicator array / sparse matrix", + "default_value": "", + "description": "Estimated targets as returned by a classifier." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "1d array-like" + }, + { + "kind": "NamedType", + "name": "label indicator array / sparse matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/recall_score/labels", + "name": "labels", + "qname": "sklearn.metrics._classification.recall_score.labels", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "None", + "description": "The set of labels to include when ``average != 'binary'``, and their\norder if ``average is None``. Labels present in the data can be\nexcluded, for example to calculate a multiclass average ignoring a\nmajority negative class, while labels not present in the data will\nresult in 0 components in a macro average. For multilabel targets,\nlabels are column indices. By default, all labels in ``y_true`` and\n``y_pred`` are used in sorted order.\n\n.. versionchanged:: 0.17\n Parameter `labels` improved for multiclass problem." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/recall_score/pos_label", + "name": "pos_label", + "qname": "sklearn.metrics._classification.recall_score.pos_label", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or int", + "default_value": "1", + "description": "The class to report if ``average='binary'`` and the data is binary.\nIf the data are multiclass or multilabel, this will be ignored;\nsetting ``labels=[pos_label]`` and ``average != 'binary'`` will report\nscores for that label only." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/recall_score/average", + "name": "average", + "qname": "sklearn.metrics._classification.recall_score.average", + "default_value": "'binary'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'micro', 'macro', 'samples', 'weighted', 'binary'} default='binary'", + "default_value": "", + "description": "This parameter is required for multiclass/multilabel targets.\nIf ``None``, the scores for each class are returned. Otherwise, this\ndetermines the type of averaging performed on the data:\n\n``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`)." + }, + "type": { + "kind": "EnumType", + "values": ["micro", "weighted", "macro", "binary", "samples"] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/recall_score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._classification.recall_score.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/recall_score/zero_division", + "name": "zero_division", + "qname": "sklearn.metrics._classification.recall_score.zero_division", + "default_value": "'warn'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "\"warn\", 0 or 1", + "default_value": "\"warn\"", + "description": "Sets the value to return when there is a zero division. If set to\n\"warn\", this acts as 0, but warnings are also raised." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "\"warn\"" + }, + { + "kind": "NamedType", + "name": "0" + }, + { + "kind": "NamedType", + "name": "1" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the recall.\n\nThe recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of\ntrue positives and ``fn`` the number of false negatives. The recall is\nintuitively the ability of the classifier to find all the positive samples.\n\nThe best value is 1 and the worst value is 0.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute the recall.\n\nThe recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of\ntrue positives and ``fn`` the number of false negatives. The recall is\nintuitively the ability of the classifier to find all the positive samples.\n\nThe best value is 1 and the worst value is 0.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) target values.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Estimated targets as returned by a classifier.\n\nlabels : array-like, default=None\n The set of labels to include when ``average != 'binary'``, and their\n order if ``average is None``. Labels present in the data can be\n excluded, for example to calculate a multiclass average ignoring a\n majority negative class, while labels not present in the data will\n result in 0 components in a macro average. For multilabel targets,\n labels are column indices. By default, all labels in ``y_true`` and\n ``y_pred`` are used in sorted order.\n\n .. versionchanged:: 0.17\n Parameter `labels` improved for multiclass problem.\n\npos_label : str or int, default=1\n The class to report if ``average='binary'`` and the data is binary.\n If the data are multiclass or multilabel, this will be ignored;\n setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n scores for that label only.\n\naverage : {'micro', 'macro', 'samples', 'weighted', 'binary'} default='binary'\n This parameter is required for multiclass/multilabel targets.\n If ``None``, the scores for each class are returned. Otherwise, this\n determines the type of averaging performed on the data:\n\n ``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n ``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n ``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nzero_division : \"warn\", 0 or 1, default=\"warn\"\n Sets the value to return when there is a zero division. If set to\n \"warn\", this acts as 0, but warnings are also raised.\n\nReturns\n-------\nrecall : float (if average is not None) or array of float of shape\n (n_unique_labels,)\n Recall of the positive class in binary classification or weighted\n average of the recall of each class for the multiclass task.\n\nSee Also\n--------\nprecision_recall_fscore_support, balanced_accuracy_score,\nmultilabel_confusion_matrix\n\nNotes\n-----\nWhen ``true positive + false negative == 0``, recall returns 0 and raises\n``UndefinedMetricWarning``. This behavior can be modified with\n``zero_division``.\n\nExamples\n--------\n>>> from sklearn.metrics import recall_score\n>>> y_true = [0, 1, 2, 0, 1, 2]\n>>> y_pred = [0, 2, 1, 0, 0, 1]\n>>> recall_score(y_true, y_pred, average='macro')\n0.33...\n>>> recall_score(y_true, y_pred, average='micro')\n0.33...\n>>> recall_score(y_true, y_pred, average='weighted')\n0.33...\n>>> recall_score(y_true, y_pred, average=None)\narray([1., 0., 0.])\n>>> y_true = [0, 0, 0, 0, 0, 0]\n>>> recall_score(y_true, y_pred, average=None)\narray([0.5, 0. , 0. ])\n>>> recall_score(y_true, y_pred, average=None, zero_division=1)\narray([0.5, 1. , 1. ])", + "code": "@_deprecate_positional_args\ndef recall_score(y_true, y_pred, *, labels=None, pos_label=1, average='binary',\n sample_weight=None, zero_division=\"warn\"):\n \"\"\"Compute the recall.\n\n The recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of\n true positives and ``fn`` the number of false negatives. The recall is\n intuitively the ability of the classifier to find all the positive samples.\n\n The best value is 1 and the worst value is 0.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) target values.\n\n y_pred : 1d array-like, or label indicator array / sparse matrix\n Estimated targets as returned by a classifier.\n\n labels : array-like, default=None\n The set of labels to include when ``average != 'binary'``, and their\n order if ``average is None``. Labels present in the data can be\n excluded, for example to calculate a multiclass average ignoring a\n majority negative class, while labels not present in the data will\n result in 0 components in a macro average. For multilabel targets,\n labels are column indices. By default, all labels in ``y_true`` and\n ``y_pred`` are used in sorted order.\n\n .. versionchanged:: 0.17\n Parameter `labels` improved for multiclass problem.\n\n pos_label : str or int, default=1\n The class to report if ``average='binary'`` and the data is binary.\n If the data are multiclass or multilabel, this will be ignored;\n setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n scores for that label only.\n\n average : {'micro', 'macro', 'samples', 'weighted', 'binary'} \\\n default='binary'\n This parameter is required for multiclass/multilabel targets.\n If ``None``, the scores for each class are returned. Otherwise, this\n determines the type of averaging performed on the data:\n\n ``'binary'``:\n Only report results for the class specified by ``pos_label``.\n This is applicable only if targets (``y_{true,pred}``) are binary.\n ``'micro'``:\n Calculate metrics globally by counting the total true positives,\n false negatives and false positives.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average weighted\n by support (the number of true instances for each label). This\n alters 'macro' to account for label imbalance; it can result in an\n F-score that is not between precision and recall.\n ``'samples'``:\n Calculate metrics for each instance, and find their average (only\n meaningful for multilabel classification where this differs from\n :func:`accuracy_score`).\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n zero_division : \"warn\", 0 or 1, default=\"warn\"\n Sets the value to return when there is a zero division. If set to\n \"warn\", this acts as 0, but warnings are also raised.\n\n Returns\n -------\n recall : float (if average is not None) or array of float of shape\n (n_unique_labels,)\n Recall of the positive class in binary classification or weighted\n average of the recall of each class for the multiclass task.\n\n See Also\n --------\n precision_recall_fscore_support, balanced_accuracy_score,\n multilabel_confusion_matrix\n\n Notes\n -----\n When ``true positive + false negative == 0``, recall returns 0 and raises\n ``UndefinedMetricWarning``. This behavior can be modified with\n ``zero_division``.\n\n Examples\n --------\n >>> from sklearn.metrics import recall_score\n >>> y_true = [0, 1, 2, 0, 1, 2]\n >>> y_pred = [0, 2, 1, 0, 0, 1]\n >>> recall_score(y_true, y_pred, average='macro')\n 0.33...\n >>> recall_score(y_true, y_pred, average='micro')\n 0.33...\n >>> recall_score(y_true, y_pred, average='weighted')\n 0.33...\n >>> recall_score(y_true, y_pred, average=None)\n array([1., 0., 0.])\n >>> y_true = [0, 0, 0, 0, 0, 0]\n >>> recall_score(y_true, y_pred, average=None)\n array([0.5, 0. , 0. ])\n >>> recall_score(y_true, y_pred, average=None, zero_division=1)\n array([0.5, 1. , 1. ])\n \"\"\"\n _, r, _, _ = precision_recall_fscore_support(y_true, y_pred,\n labels=labels,\n pos_label=pos_label,\n average=average,\n warn_for=('recall',),\n sample_weight=sample_weight,\n zero_division=zero_division)\n return r" + }, + { + "id": "scikit-learn/sklearn.metrics._classification/zero_one_loss", + "name": "zero_one_loss", + "qname": "sklearn.metrics._classification.zero_one_loss", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._classification/zero_one_loss/y_true", + "name": "y_true", + "qname": "sklearn.metrics._classification.zero_one_loss.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "1d array-like, or label indicator array / sparse matrix", + "default_value": "", + "description": "Ground truth (correct) labels." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "1d array-like" + }, + { + "kind": "NamedType", + "name": "label indicator array / sparse matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/zero_one_loss/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._classification.zero_one_loss.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "1d array-like, or label indicator array / sparse matrix", + "default_value": "", + "description": "Predicted labels, as returned by a classifier." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "1d array-like" + }, + { + "kind": "NamedType", + "name": "label indicator array / sparse matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/zero_one_loss/normalize", + "name": "normalize", + "qname": "sklearn.metrics._classification.zero_one_loss.normalize", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If ``False``, return the number of misclassifications.\nOtherwise, return the fraction of misclassifications." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.metrics._classification/zero_one_loss/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._classification.zero_one_loss.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Zero-one classification loss.\n\nIf normalize is ``True``, return the fraction of misclassifications\n(float), else it returns the number of misclassifications (int). The best\nperformance is 0.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Zero-one classification loss.\n\nIf normalize is ``True``, return the fraction of misclassifications\n(float), else it returns the number of misclassifications (int). The best\nperformance is 0.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) labels.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n Predicted labels, as returned by a classifier.\n\nnormalize : bool, default=True\n If ``False``, return the number of misclassifications.\n Otherwise, return the fraction of misclassifications.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nloss : float or int,\n If ``normalize == True``, return the fraction of misclassifications\n (float), else it returns the number of misclassifications (int).\n\nNotes\n-----\nIn multilabel classification, the zero_one_loss function corresponds to\nthe subset zero-one loss: for each sample, the entire set of labels must be\ncorrectly predicted, otherwise the loss for that sample is equal to one.\n\nSee Also\n--------\naccuracy_score, hamming_loss, jaccard_score\n\nExamples\n--------\n>>> from sklearn.metrics import zero_one_loss\n>>> y_pred = [1, 2, 3, 4]\n>>> y_true = [2, 2, 3, 4]\n>>> zero_one_loss(y_true, y_pred)\n0.25\n>>> zero_one_loss(y_true, y_pred, normalize=False)\n1\n\nIn the multilabel case with binary label indicators:\n\n>>> import numpy as np\n>>> zero_one_loss(np.array([[0, 1], [1, 1]]), np.ones((2, 2)))\n0.5", + "code": "@_deprecate_positional_args\ndef zero_one_loss(y_true, y_pred, *, normalize=True, sample_weight=None):\n \"\"\"Zero-one classification loss.\n\n If normalize is ``True``, return the fraction of misclassifications\n (float), else it returns the number of misclassifications (int). The best\n performance is 0.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : 1d array-like, or label indicator array / sparse matrix\n Ground truth (correct) labels.\n\n y_pred : 1d array-like, or label indicator array / sparse matrix\n Predicted labels, as returned by a classifier.\n\n normalize : bool, default=True\n If ``False``, return the number of misclassifications.\n Otherwise, return the fraction of misclassifications.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n loss : float or int,\n If ``normalize == True``, return the fraction of misclassifications\n (float), else it returns the number of misclassifications (int).\n\n Notes\n -----\n In multilabel classification, the zero_one_loss function corresponds to\n the subset zero-one loss: for each sample, the entire set of labels must be\n correctly predicted, otherwise the loss for that sample is equal to one.\n\n See Also\n --------\n accuracy_score, hamming_loss, jaccard_score\n\n Examples\n --------\n >>> from sklearn.metrics import zero_one_loss\n >>> y_pred = [1, 2, 3, 4]\n >>> y_true = [2, 2, 3, 4]\n >>> zero_one_loss(y_true, y_pred)\n 0.25\n >>> zero_one_loss(y_true, y_pred, normalize=False)\n 1\n\n In the multilabel case with binary label indicators:\n\n >>> import numpy as np\n >>> zero_one_loss(np.array([[0, 1], [1, 1]]), np.ones((2, 2)))\n 0.5\n \"\"\"\n score = accuracy_score(y_true, y_pred,\n normalize=normalize,\n sample_weight=sample_weight)\n\n if normalize:\n return 1 - score\n else:\n if sample_weight is not None:\n n_samples = np.sum(sample_weight)\n else:\n n_samples = _num_samples(y_true)\n return n_samples - score" + }, + { + "id": "scikit-learn/sklearn.metrics._plot.base/_check_classifier_response_method", + "name": "_check_classifier_response_method", + "qname": "sklearn.metrics._plot.base._check_classifier_response_method", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._plot.base/_check_classifier_response_method/estimator", + "name": "estimator", + "qname": "sklearn.metrics._plot.base._check_classifier_response_method.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._plot.base/_check_classifier_response_method/response_method", + "name": "response_method", + "qname": "sklearn.metrics._plot.base._check_classifier_response_method.response_method", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return prediction method from the response_method", + "docstring": "Return prediction method from the response_method\n\nParameters\n----------\nestimator: object\n Classifier to check\n\nresponse_method: {'auto', 'predict_proba', 'decision_function'}\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the target response. If set to 'auto',\n :term:`predict_proba` is tried first and if it does not exist\n :term:`decision_function` is tried next.\n\nReturns\n-------\nprediction_method: callable\n prediction method of estimator", + "code": "def _check_classifier_response_method(estimator, response_method):\n \"\"\"Return prediction method from the response_method\n\n Parameters\n ----------\n estimator: object\n Classifier to check\n\n response_method: {'auto', 'predict_proba', 'decision_function'}\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the target response. If set to 'auto',\n :term:`predict_proba` is tried first and if it does not exist\n :term:`decision_function` is tried next.\n\n Returns\n -------\n prediction_method: callable\n prediction method of estimator\n \"\"\"\n\n if response_method not in (\"predict_proba\", \"decision_function\", \"auto\"):\n raise ValueError(\"response_method must be 'predict_proba', \"\n \"'decision_function' or 'auto'\")\n\n error_msg = \"response method {} is not defined in {}\"\n if response_method != \"auto\":\n prediction_method = getattr(estimator, response_method, None)\n if prediction_method is None:\n raise ValueError(error_msg.format(response_method,\n estimator.__class__.__name__))\n else:\n predict_proba = getattr(estimator, 'predict_proba', None)\n decision_function = getattr(estimator, 'decision_function', None)\n prediction_method = predict_proba or decision_function\n if prediction_method is None:\n raise ValueError(error_msg.format(\n \"decision_function or predict_proba\",\n estimator.__class__.__name__))\n\n return prediction_method" + }, + { + "id": "scikit-learn/sklearn.metrics._plot.base/_get_response", + "name": "_get_response", + "qname": "sklearn.metrics._plot.base._get_response", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._plot.base/_get_response/X", + "name": "X", + "qname": "sklearn.metrics._plot.base._get_response.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Input values." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.base/_get_response/estimator", + "name": "estimator", + "qname": "sklearn.metrics._plot.base._get_response.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator instance", + "default_value": "", + "description": "Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\nin which the last estimator is a classifier." + }, + "type": { + "kind": "NamedType", + "name": "estimator instance" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.base/_get_response/response_method", + "name": "response_method", + "qname": "sklearn.metrics._plot.base._get_response.response_method", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._plot.base/_get_response/pos_label", + "name": "pos_label", + "qname": "sklearn.metrics._plot.base._get_response.pos_label", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str or int", + "default_value": "None", + "description": "The class considered as the positive class when computing\nthe metrics. By default, `estimators.classes_[1]` is\nconsidered as the positive class." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return response and positive label.", + "docstring": "Return response and positive label.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input values.\n\nestimator : estimator instance\n Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n in which the last estimator is a classifier.\n\nresponse_method: {'auto', 'predict_proba', 'decision_function'}\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the target response. If set to 'auto',\n :term:`predict_proba` is tried first and if it does not exist\n :term:`decision_function` is tried next.\n\npos_label : str or int, default=None\n The class considered as the positive class when computing\n the metrics. By default, `estimators.classes_[1]` is\n considered as the positive class.\n\nReturns\n-------\ny_pred: ndarray of shape (n_samples,)\n Target scores calculated from the provided response_method\n and pos_label.\n\npos_label: str or int\n The class considered as the positive class when computing\n the metrics.", + "code": "def _get_response(X, estimator, response_method, pos_label=None):\n \"\"\"Return response and positive label.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input values.\n\n estimator : estimator instance\n Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n in which the last estimator is a classifier.\n\n response_method: {'auto', 'predict_proba', 'decision_function'}\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the target response. If set to 'auto',\n :term:`predict_proba` is tried first and if it does not exist\n :term:`decision_function` is tried next.\n\n pos_label : str or int, default=None\n The class considered as the positive class when computing\n the metrics. By default, `estimators.classes_[1]` is\n considered as the positive class.\n\n Returns\n -------\n y_pred: ndarray of shape (n_samples,)\n Target scores calculated from the provided response_method\n and pos_label.\n\n pos_label: str or int\n The class considered as the positive class when computing\n the metrics.\n \"\"\"\n classification_error = (\n \"{} should be a binary classifier\".format(estimator.__class__.__name__)\n )\n\n if not is_classifier(estimator):\n raise ValueError(classification_error)\n\n prediction_method = _check_classifier_response_method(\n estimator, response_method)\n\n y_pred = prediction_method(X)\n\n if pos_label is not None and pos_label not in estimator.classes_:\n raise ValueError(\n f\"The class provided by 'pos_label' is unknown. Got \"\n f\"{pos_label} instead of one of {estimator.classes_}\"\n )\n\n if y_pred.ndim != 1: # `predict_proba`\n if y_pred.shape[1] != 2:\n raise ValueError(classification_error)\n if pos_label is None:\n pos_label = estimator.classes_[1]\n y_pred = y_pred[:, 1]\n else:\n class_idx = np.flatnonzero(estimator.classes_ == pos_label)\n y_pred = y_pred[:, class_idx]\n else:\n if pos_label is None:\n pos_label = estimator.classes_[1]\n elif pos_label == estimator.classes_[0]:\n y_pred *= -1\n\n return y_pred, pos_label" + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/ConfusionMatrixDisplay/__init__", + "name": "__init__", + "qname": "sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/ConfusionMatrixDisplay/__init__/self", + "name": "self", + "qname": "sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/ConfusionMatrixDisplay/__init__/confusion_matrix", + "name": "confusion_matrix", + "qname": "sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay.__init__.confusion_matrix", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_classes, n_classes)", + "default_value": "", + "description": "Confusion matrix." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_classes, n_classes)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/ConfusionMatrixDisplay/__init__/display_labels", + "name": "display_labels", + "qname": "sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay.__init__.display_labels", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_classes,)", + "default_value": "None", + "description": "Display labels for plot. If None, display labels are set from 0 to\n`n_classes - 1`." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_classes,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Confusion Matrix visualization.\n\nIt is recommend to use :func:`~sklearn.metrics.plot_confusion_matrix` to\ncreate a :class:`ConfusionMatrixDisplay`. All parameters are stored as\nattributes.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, confusion_matrix, *, display_labels=None):\n self.confusion_matrix = confusion_matrix\n self.display_labels = display_labels" + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/ConfusionMatrixDisplay/plot", + "name": "plot", + "qname": "sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay.plot", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/ConfusionMatrixDisplay/plot/self", + "name": "self", + "qname": "sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay.plot.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/ConfusionMatrixDisplay/plot/include_values", + "name": "include_values", + "qname": "sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay.plot.include_values", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Includes values in confusion matrix." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/ConfusionMatrixDisplay/plot/cmap", + "name": "cmap", + "qname": "sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay.plot.cmap", + "default_value": "'viridis'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or matplotlib Colormap", + "default_value": "'viridis'", + "description": "Colormap recognized by matplotlib." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "matplotlib Colormap" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/ConfusionMatrixDisplay/plot/xticks_rotation", + "name": "xticks_rotation", + "qname": "sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay.plot.xticks_rotation", + "default_value": "'horizontal'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'vertical', 'horizontal'} or float", + "default_value": "'horizontal'", + "description": "Rotation of xtick labels." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["horizontal", "vertical"] + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/ConfusionMatrixDisplay/plot/values_format", + "name": "values_format", + "qname": "sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay.plot.values_format", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Format specification for values in confusion matrix. If `None`,\nthe format specification is 'd' or '.2g' whichever is shorter." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/ConfusionMatrixDisplay/plot/ax", + "name": "ax", + "qname": "sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay.plot.ax", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "matplotlib axes", + "default_value": "None", + "description": "Axes object to plot on. If `None`, a new figure and axes is\ncreated." + }, + "type": { + "kind": "NamedType", + "name": "matplotlib axes" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/ConfusionMatrixDisplay/plot/colorbar", + "name": "colorbar", + "qname": "sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay.plot.colorbar", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether or not to add a colorbar to the plot." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Plot visualization.", + "docstring": "Plot visualization.\n\nParameters\n----------\ninclude_values : bool, default=True\n Includes values in confusion matrix.\n\ncmap : str or matplotlib Colormap, default='viridis'\n Colormap recognized by matplotlib.\n\nxticks_rotation : {'vertical', 'horizontal'} or float, default='horizontal'\n Rotation of xtick labels.\n\nvalues_format : str, default=None\n Format specification for values in confusion matrix. If `None`,\n the format specification is 'd' or '.2g' whichever is shorter.\n\nax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is\n created.\n\ncolorbar : bool, default=True\n Whether or not to add a colorbar to the plot.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.ConfusionMatrixDisplay`", + "code": " @_deprecate_positional_args\n def plot(self, *, include_values=True, cmap='viridis',\n xticks_rotation='horizontal', values_format=None,\n ax=None, colorbar=True):\n \"\"\"Plot visualization.\n\n Parameters\n ----------\n include_values : bool, default=True\n Includes values in confusion matrix.\n\n cmap : str or matplotlib Colormap, default='viridis'\n Colormap recognized by matplotlib.\n\n xticks_rotation : {'vertical', 'horizontal'} or float, \\\n default='horizontal'\n Rotation of xtick labels.\n\n values_format : str, default=None\n Format specification for values in confusion matrix. If `None`,\n the format specification is 'd' or '.2g' whichever is shorter.\n\n ax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is\n created.\n\n colorbar : bool, default=True\n Whether or not to add a colorbar to the plot.\n\n Returns\n -------\n display : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n \"\"\"\n check_matplotlib_support(\"ConfusionMatrixDisplay.plot\")\n import matplotlib.pyplot as plt\n\n if ax is None:\n fig, ax = plt.subplots()\n else:\n fig = ax.figure\n\n cm = self.confusion_matrix\n n_classes = cm.shape[0]\n self.im_ = ax.imshow(cm, interpolation='nearest', cmap=cmap)\n self.text_ = None\n cmap_min, cmap_max = self.im_.cmap(0), self.im_.cmap(256)\n\n if include_values:\n self.text_ = np.empty_like(cm, dtype=object)\n\n # print text with appropriate color depending on background\n thresh = (cm.max() + cm.min()) / 2.0\n\n for i, j in product(range(n_classes), range(n_classes)):\n color = cmap_max if cm[i, j] < thresh else cmap_min\n\n if values_format is None:\n text_cm = format(cm[i, j], '.2g')\n if cm.dtype.kind != 'f':\n text_d = format(cm[i, j], 'd')\n if len(text_d) < len(text_cm):\n text_cm = text_d\n else:\n text_cm = format(cm[i, j], values_format)\n\n self.text_[i, j] = ax.text(\n j, i, text_cm,\n ha=\"center\", va=\"center\",\n color=color)\n\n if self.display_labels is None:\n display_labels = np.arange(n_classes)\n else:\n display_labels = self.display_labels\n if colorbar:\n fig.colorbar(self.im_, ax=ax)\n ax.set(xticks=np.arange(n_classes),\n yticks=np.arange(n_classes),\n xticklabels=display_labels,\n yticklabels=display_labels,\n ylabel=\"True label\",\n xlabel=\"Predicted label\")\n\n ax.set_ylim((n_classes - 0.5, -0.5))\n plt.setp(ax.get_xticklabels(), rotation=xticks_rotation)\n\n self.figure_ = fig\n self.ax_ = ax\n return self" + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix", + "name": "plot_confusion_matrix", + "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/estimator", + "name": "estimator", + "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator instance", + "default_value": "", + "description": "Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\nin which the last estimator is a classifier." + }, + "type": { + "kind": "NamedType", + "name": "estimator instance" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/X", + "name": "X", + "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Input values." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/y_true", + "name": "y_true", + "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/labels", + "name": "labels", + "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.labels", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_classes,)", + "default_value": "None", + "description": "List of labels to index the matrix. This may be used to reorder or\nselect a subset of labels. If `None` is given, those that appear at\nleast once in `y_true` or `y_pred` are used in sorted order." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_classes,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/normalize", + "name": "normalize", + "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.normalize", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'true', 'pred', 'all'}", + "default_value": "None", + "description": "Normalizes confusion matrix over the true (rows), predicted (columns)\nconditions or all the population. If None, confusion matrix will not be\nnormalized." + }, + "type": { + "kind": "EnumType", + "values": ["all", "pred", "true"] + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/display_labels", + "name": "display_labels", + "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.display_labels", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_classes,)", + "default_value": "None", + "description": "Target names used for plotting. By default, `labels` will be used if\nit is defined, otherwise the unique labels of `y_true` and `y_pred`\nwill be used." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_classes,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/include_values", + "name": "include_values", + "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.include_values", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Includes values in confusion matrix." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/xticks_rotation", + "name": "xticks_rotation", + "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.xticks_rotation", + "default_value": "'horizontal'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'vertical', 'horizontal'} or float", + "default_value": "'horizontal'", + "description": "Rotation of xtick labels." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["horizontal", "vertical"] + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/values_format", + "name": "values_format", + "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.values_format", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Format specification for values in confusion matrix. If `None`,\nthe format specification is 'd' or '.2g' whichever is shorter." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/cmap", + "name": "cmap", + "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.cmap", + "default_value": "'viridis'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or matplotlib Colormap", + "default_value": "'viridis'", + "description": "Colormap recognized by matplotlib." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "matplotlib Colormap" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/ax", + "name": "ax", + "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.ax", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "matplotlib Axes", + "default_value": "None", + "description": "Axes object to plot on. If `None`, a new figure and axes is\ncreated." + }, + "type": { + "kind": "NamedType", + "name": "matplotlib Axes" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.confusion_matrix/plot_confusion_matrix/colorbar", + "name": "colorbar", + "qname": "sklearn.metrics._plot.confusion_matrix.plot_confusion_matrix.colorbar", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether or not to add a colorbar to the plot.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Plot Confusion Matrix.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Plot Confusion Matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator instance\n Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input values.\n\ny_true : array-like of shape (n_samples,)\n Target values.\n\nlabels : array-like of shape (n_classes,), default=None\n List of labels to index the matrix. This may be used to reorder or\n select a subset of labels. If `None` is given, those that appear at\n least once in `y_true` or `y_pred` are used in sorted order.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nnormalize : {'true', 'pred', 'all'}, default=None\n Normalizes confusion matrix over the true (rows), predicted (columns)\n conditions or all the population. If None, confusion matrix will not be\n normalized.\n\ndisplay_labels : array-like of shape (n_classes,), default=None\n Target names used for plotting. By default, `labels` will be used if\n it is defined, otherwise the unique labels of `y_true` and `y_pred`\n will be used.\n\ninclude_values : bool, default=True\n Includes values in confusion matrix.\n\nxticks_rotation : {'vertical', 'horizontal'} or float, default='horizontal'\n Rotation of xtick labels.\n\nvalues_format : str, default=None\n Format specification for values in confusion matrix. If `None`,\n the format specification is 'd' or '.2g' whichever is shorter.\n\ncmap : str or matplotlib Colormap, default='viridis'\n Colormap recognized by matplotlib.\n\nax : matplotlib Axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is\n created.\n\ncolorbar : bool, default=True\n Whether or not to add a colorbar to the plot.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n\nSee Also\n--------\nconfusion_matrix : Compute Confusion Matrix to evaluate the accuracy of a\n classification.\nConfusionMatrixDisplay : Confusion Matrix visualization.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt # doctest: +SKIP\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.metrics import plot_confusion_matrix\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.svm import SVC\n>>> X, y = make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, random_state=0)\n>>> clf = SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> plot_confusion_matrix(clf, X_test, y_test) # doctest: +SKIP\n>>> plt.show() # doctest: +SKIP", + "code": "@_deprecate_positional_args\ndef plot_confusion_matrix(estimator, X, y_true, *, labels=None,\n sample_weight=None, normalize=None,\n display_labels=None, include_values=True,\n xticks_rotation='horizontal',\n values_format=None,\n cmap='viridis', ax=None, colorbar=True):\n \"\"\"Plot Confusion Matrix.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n estimator : estimator instance\n Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n in which the last estimator is a classifier.\n\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input values.\n\n y_true : array-like of shape (n_samples,)\n Target values.\n\n labels : array-like of shape (n_classes,), default=None\n List of labels to index the matrix. This may be used to reorder or\n select a subset of labels. If `None` is given, those that appear at\n least once in `y_true` or `y_pred` are used in sorted order.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n normalize : {'true', 'pred', 'all'}, default=None\n Normalizes confusion matrix over the true (rows), predicted (columns)\n conditions or all the population. If None, confusion matrix will not be\n normalized.\n\n display_labels : array-like of shape (n_classes,), default=None\n Target names used for plotting. By default, `labels` will be used if\n it is defined, otherwise the unique labels of `y_true` and `y_pred`\n will be used.\n\n include_values : bool, default=True\n Includes values in confusion matrix.\n\n xticks_rotation : {'vertical', 'horizontal'} or float, \\\n default='horizontal'\n Rotation of xtick labels.\n\n values_format : str, default=None\n Format specification for values in confusion matrix. If `None`,\n the format specification is 'd' or '.2g' whichever is shorter.\n\n cmap : str or matplotlib Colormap, default='viridis'\n Colormap recognized by matplotlib.\n\n ax : matplotlib Axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is\n created.\n\n colorbar : bool, default=True\n Whether or not to add a colorbar to the plot.\n\n .. versionadded:: 0.24\n\n Returns\n -------\n display : :class:`~sklearn.metrics.ConfusionMatrixDisplay`\n\n See Also\n --------\n confusion_matrix : Compute Confusion Matrix to evaluate the accuracy of a\n classification.\n ConfusionMatrixDisplay : Confusion Matrix visualization.\n\n Examples\n --------\n >>> import matplotlib.pyplot as plt # doctest: +SKIP\n >>> from sklearn.datasets import make_classification\n >>> from sklearn.metrics import plot_confusion_matrix\n >>> from sklearn.model_selection import train_test_split\n >>> from sklearn.svm import SVC\n >>> X, y = make_classification(random_state=0)\n >>> X_train, X_test, y_train, y_test = train_test_split(\n ... X, y, random_state=0)\n >>> clf = SVC(random_state=0)\n >>> clf.fit(X_train, y_train)\n SVC(random_state=0)\n >>> plot_confusion_matrix(clf, X_test, y_test) # doctest: +SKIP\n >>> plt.show() # doctest: +SKIP\n \"\"\"\n check_matplotlib_support(\"plot_confusion_matrix\")\n\n if not is_classifier(estimator):\n raise ValueError(\"plot_confusion_matrix only supports classifiers\")\n\n y_pred = estimator.predict(X)\n cm = confusion_matrix(y_true, y_pred, sample_weight=sample_weight,\n labels=labels, normalize=normalize)\n\n if display_labels is None:\n if labels is None:\n display_labels = unique_labels(y_true, y_pred)\n else:\n display_labels = labels\n\n disp = ConfusionMatrixDisplay(confusion_matrix=cm,\n display_labels=display_labels)\n return disp.plot(include_values=include_values,\n cmap=cmap, ax=ax, xticks_rotation=xticks_rotation,\n values_format=values_format, colorbar=colorbar)" + }, + { + "id": "scikit-learn/sklearn.metrics._plot.det_curve/DetCurveDisplay/__init__", + "name": "__init__", + "qname": "sklearn.metrics._plot.det_curve.DetCurveDisplay.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._plot.det_curve/DetCurveDisplay/__init__/self", + "name": "self", + "qname": "sklearn.metrics._plot.det_curve.DetCurveDisplay.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._plot.det_curve/DetCurveDisplay/__init__/fpr", + "name": "fpr", + "qname": "sklearn.metrics._plot.det_curve.DetCurveDisplay.__init__.fpr", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "False positive rate." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.det_curve/DetCurveDisplay/__init__/fnr", + "name": "fnr", + "qname": "sklearn.metrics._plot.det_curve.DetCurveDisplay.__init__.fnr", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "False negative rate." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.det_curve/DetCurveDisplay/__init__/estimator_name", + "name": "estimator_name", + "qname": "sklearn.metrics._plot.det_curve.DetCurveDisplay.__init__.estimator_name", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Name of estimator. If None, the estimator name is not shown." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.det_curve/DetCurveDisplay/__init__/pos_label", + "name": "pos_label", + "qname": "sklearn.metrics._plot.det_curve.DetCurveDisplay.__init__.pos_label", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or int", + "default_value": "None", + "description": "The label of the positive class." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "DET curve visualization.\n\nIt is recommend to use :func:`~sklearn.metrics.plot_det_curve` to create a\nvisualizer. All parameters are stored as attributes.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.24", + "docstring": "", + "code": " def __init__(self, *, fpr, fnr, estimator_name=None, pos_label=None):\n self.fpr = fpr\n self.fnr = fnr\n self.estimator_name = estimator_name\n self.pos_label = pos_label" + }, + { + "id": "scikit-learn/sklearn.metrics._plot.det_curve/DetCurveDisplay/plot", + "name": "plot", + "qname": "sklearn.metrics._plot.det_curve.DetCurveDisplay.plot", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._plot.det_curve/DetCurveDisplay/plot/self", + "name": "self", + "qname": "sklearn.metrics._plot.det_curve.DetCurveDisplay.plot.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._plot.det_curve/DetCurveDisplay/plot/ax", + "name": "ax", + "qname": "sklearn.metrics._plot.det_curve.DetCurveDisplay.plot.ax", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "matplotlib axes", + "default_value": "None", + "description": "Axes object to plot on. If `None`, a new figure and axes is\ncreated." + }, + "type": { + "kind": "NamedType", + "name": "matplotlib axes" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.det_curve/DetCurveDisplay/plot/name", + "name": "name", + "qname": "sklearn.metrics._plot.det_curve.DetCurveDisplay.plot.name", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Name of DET curve for labeling. If `None`, use the name of the\nestimator." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.det_curve/DetCurveDisplay/plot/kwargs", + "name": "kwargs", + "qname": "sklearn.metrics._plot.det_curve.DetCurveDisplay.plot.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Plot visualization.", + "docstring": "Plot visualization.\n\nParameters\n----------\nax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is\n created.\n\nname : str, default=None\n Name of DET curve for labeling. If `None`, use the name of the\n estimator.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.plot.DetCurveDisplay`\n Object that stores computed values.", + "code": " def plot(self, ax=None, *, name=None, **kwargs):\n \"\"\"Plot visualization.\n\n Parameters\n ----------\n ax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is\n created.\n\n name : str, default=None\n Name of DET curve for labeling. If `None`, use the name of the\n estimator.\n\n Returns\n -------\n display : :class:`~sklearn.metrics.plot.DetCurveDisplay`\n Object that stores computed values.\n \"\"\"\n check_matplotlib_support('DetCurveDisplay.plot')\n\n name = self.estimator_name if name is None else name\n line_kwargs = {} if name is None else {\"label\": name}\n line_kwargs.update(**kwargs)\n\n import matplotlib.pyplot as plt\n\n if ax is None:\n _, ax = plt.subplots()\n\n self.line_, = ax.plot(\n sp.stats.norm.ppf(self.fpr),\n sp.stats.norm.ppf(self.fnr),\n **line_kwargs,\n )\n info_pos_label = (f\" (Positive label: {self.pos_label})\"\n if self.pos_label is not None else \"\")\n\n xlabel = \"False Positive Rate\" + info_pos_label\n ylabel = \"False Negative Rate\" + info_pos_label\n ax.set(xlabel=xlabel, ylabel=ylabel)\n\n if \"label\" in line_kwargs:\n ax.legend(loc=\"lower right\")\n\n ticks = [0.001, 0.01, 0.05, 0.20, 0.5, 0.80, 0.95, 0.99, 0.999]\n tick_locations = sp.stats.norm.ppf(ticks)\n tick_labels = [\n '{:.0%}'.format(s) if (100*s).is_integer() else '{:.1%}'.format(s)\n for s in ticks\n ]\n ax.set_xticks(tick_locations)\n ax.set_xticklabels(tick_labels)\n ax.set_xlim(-3, 3)\n ax.set_yticks(tick_locations)\n ax.set_yticklabels(tick_labels)\n ax.set_ylim(-3, 3)\n\n self.ax_ = ax\n self.figure_ = ax.figure\n return self" + }, + { + "id": "scikit-learn/sklearn.metrics._plot.det_curve/plot_det_curve", + "name": "plot_det_curve", + "qname": "sklearn.metrics._plot.det_curve.plot_det_curve", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._plot.det_curve/plot_det_curve/estimator", + "name": "estimator", + "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator instance", + "default_value": "", + "description": "Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\nin which the last estimator is a classifier." + }, + "type": { + "kind": "NamedType", + "name": "estimator instance" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.det_curve/plot_det_curve/X", + "name": "X", + "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Input values." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.det_curve/plot_det_curve/y", + "name": "y", + "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.det_curve/plot_det_curve/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.det_curve/plot_det_curve/response_method", + "name": "response_method", + "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.response_method", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'predict_proba', 'decision_function', 'auto'} default='auto'", + "default_value": "", + "description": "Specifies whether to use :term:`predict_proba` or\n:term:`decision_function` as the predicted target response. If set to\n'auto', :term:`predict_proba` is tried first and if it does not exist\n:term:`decision_function` is tried next." + }, + "type": { + "kind": "EnumType", + "values": ["predict_proba", "auto", "decision_function"] + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.det_curve/plot_det_curve/name", + "name": "name", + "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.name", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Name of DET curve for labeling. If `None`, use the name of the\nestimator." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.det_curve/plot_det_curve/ax", + "name": "ax", + "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.ax", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "matplotlib axes", + "default_value": "None", + "description": "Axes object to plot on. If `None`, a new figure and axes is created." + }, + "type": { + "kind": "NamedType", + "name": "matplotlib axes" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.det_curve/plot_det_curve/pos_label", + "name": "pos_label", + "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.pos_label", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or int", + "default_value": "None", + "description": "The label of the positive class.\nWhen `pos_label=None`, if `y_true` is in {-1, 1} or {0, 1},\n`pos_label` is set to 1, otherwise an error will be raised." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.det_curve/plot_det_curve/kwargs", + "name": "kwargs", + "qname": "sklearn.metrics._plot.det_curve.plot_det_curve.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Plot detection error tradeoff (DET) curve.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.24", + "docstring": "Plot detection error tradeoff (DET) curve.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nestimator : estimator instance\n Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input values.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nresponse_method : {'predict_proba', 'decision_function', 'auto'} default='auto'\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the predicted target response. If set to\n 'auto', :term:`predict_proba` is tried first and if it does not exist\n :term:`decision_function` is tried next.\n\nname : str, default=None\n Name of DET curve for labeling. If `None`, use the name of the\n estimator.\n\nax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is created.\n\npos_label : str or int, default=None\n The label of the positive class.\n When `pos_label=None`, if `y_true` is in {-1, 1} or {0, 1},\n `pos_label` is set to 1, otherwise an error will be raised.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.DetCurveDisplay`\n Object that stores computed values.\n\nSee Also\n--------\ndet_curve : Compute error rates for different probability thresholds.\nDetCurveDisplay : DET curve visualization.\nplot_roc_curve : Plot Receiver operating characteristic (ROC) curve.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt # doctest: +SKIP\n>>> from sklearn import datasets, metrics, model_selection, svm\n>>> X, y = datasets.make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = model_selection.train_test_split(\n... X, y, random_state=0)\n>>> clf = svm.SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> metrics.plot_det_curve(clf, X_test, y_test) # doctest: +SKIP\n>>> plt.show() # doctest: +SKIP", + "code": "def plot_det_curve(\n estimator,\n X,\n y,\n *,\n sample_weight=None,\n response_method=\"auto\",\n name=None,\n ax=None,\n pos_label=None,\n **kwargs\n):\n \"\"\"Plot detection error tradeoff (DET) curve.\n\n Extra keyword arguments will be passed to matplotlib's `plot`.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.24\n\n Parameters\n ----------\n estimator : estimator instance\n Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n in which the last estimator is a classifier.\n\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input values.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n response_method : {'predict_proba', 'decision_function', 'auto'} \\\n default='auto'\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the predicted target response. If set to\n 'auto', :term:`predict_proba` is tried first and if it does not exist\n :term:`decision_function` is tried next.\n\n name : str, default=None\n Name of DET curve for labeling. If `None`, use the name of the\n estimator.\n\n ax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is created.\n\n pos_label : str or int, default=None\n The label of the positive class.\n When `pos_label=None`, if `y_true` is in {-1, 1} or {0, 1},\n `pos_label` is set to 1, otherwise an error will be raised.\n\n Returns\n -------\n display : :class:`~sklearn.metrics.DetCurveDisplay`\n Object that stores computed values.\n\n See Also\n --------\n det_curve : Compute error rates for different probability thresholds.\n DetCurveDisplay : DET curve visualization.\n plot_roc_curve : Plot Receiver operating characteristic (ROC) curve.\n\n Examples\n --------\n >>> import matplotlib.pyplot as plt # doctest: +SKIP\n >>> from sklearn import datasets, metrics, model_selection, svm\n >>> X, y = datasets.make_classification(random_state=0)\n >>> X_train, X_test, y_train, y_test = model_selection.train_test_split(\n ... X, y, random_state=0)\n >>> clf = svm.SVC(random_state=0)\n >>> clf.fit(X_train, y_train)\n SVC(random_state=0)\n >>> metrics.plot_det_curve(clf, X_test, y_test) # doctest: +SKIP\n >>> plt.show() # doctest: +SKIP\n \"\"\"\n check_matplotlib_support('plot_det_curve')\n\n y_pred, pos_label = _get_response(\n X, estimator, response_method, pos_label=pos_label\n )\n\n fpr, fnr, _ = det_curve(\n y, y_pred, pos_label=pos_label, sample_weight=sample_weight,\n )\n\n name = estimator.__class__.__name__ if name is None else name\n\n viz = DetCurveDisplay(\n fpr=fpr,\n fnr=fnr,\n estimator_name=name,\n pos_label=pos_label\n )\n\n return viz.plot(ax=ax, name=name, **kwargs)" + }, + { + "id": "scikit-learn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/__init__", + "name": "__init__", + "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/__init__/self", + "name": "self", + "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/__init__/precision", + "name": "precision", + "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.__init__.precision", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "Precision values." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/__init__/recall", + "name": "recall", + "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.__init__.recall", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "Recall values." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/__init__/average_precision", + "name": "average_precision", + "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.__init__.average_precision", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Average precision. If None, the average precision is not shown." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/__init__/estimator_name", + "name": "estimator_name", + "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.__init__.estimator_name", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Name of estimator. If None, then the estimator name is not shown." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/__init__/pos_label", + "name": "pos_label", + "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.__init__.pos_label", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or int", + "default_value": "None", + "description": "The class considered as the positive class. If None, the class will not\nbe shown in the legend.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Precision Recall visualization.\n\nIt is recommend to use :func:`~sklearn.metrics.plot_precision_recall_curve`\nto create a visualizer. All parameters are stored as attributes.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, precision, recall, *,\n average_precision=None, estimator_name=None, pos_label=None):\n self.estimator_name = estimator_name\n self.precision = precision\n self.recall = recall\n self.average_precision = average_precision\n self.pos_label = pos_label" + }, + { + "id": "scikit-learn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/plot", + "name": "plot", + "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.plot", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/plot/self", + "name": "self", + "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.plot.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/plot/ax", + "name": "ax", + "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.plot.ax", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Matplotlib Axes", + "default_value": "None", + "description": "Axes object to plot on. If `None`, a new figure and axes is\ncreated." + }, + "type": { + "kind": "NamedType", + "name": "Matplotlib Axes" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/plot/name", + "name": "name", + "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.plot.name", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Name of precision recall curve for labeling. If `None`, use the\nname of the estimator." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.precision_recall_curve/PrecisionRecallDisplay/plot/kwargs", + "name": "kwargs", + "qname": "sklearn.metrics._plot.precision_recall_curve.PrecisionRecallDisplay.plot.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "", + "description": "Keyword arguments to be passed to matplotlib's `plot`." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Plot visualization.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.", + "docstring": "Plot visualization.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nParameters\n----------\nax : Matplotlib Axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is\n created.\n\nname : str, default=None\n Name of precision recall curve for labeling. If `None`, use the\n name of the estimator.\n\n**kwargs : dict\n Keyword arguments to be passed to matplotlib's `plot`.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.PrecisionRecallDisplay`\n Object that stores computed values.", + "code": " @_deprecate_positional_args\n def plot(self, ax=None, *, name=None, **kwargs):\n \"\"\"Plot visualization.\n\n Extra keyword arguments will be passed to matplotlib's `plot`.\n\n Parameters\n ----------\n ax : Matplotlib Axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is\n created.\n\n name : str, default=None\n Name of precision recall curve for labeling. If `None`, use the\n name of the estimator.\n\n **kwargs : dict\n Keyword arguments to be passed to matplotlib's `plot`.\n\n Returns\n -------\n display : :class:`~sklearn.metrics.PrecisionRecallDisplay`\n Object that stores computed values.\n \"\"\"\n check_matplotlib_support(\"PrecisionRecallDisplay.plot\")\n\n name = self.estimator_name if name is None else name\n\n line_kwargs = {\"drawstyle\": \"steps-post\"}\n if self.average_precision is not None and name is not None:\n line_kwargs[\"label\"] = (f\"{name} (AP = \"\n f\"{self.average_precision:0.2f})\")\n elif self.average_precision is not None:\n line_kwargs[\"label\"] = (f\"AP = \"\n f\"{self.average_precision:0.2f}\")\n elif name is not None:\n line_kwargs[\"label\"] = name\n line_kwargs.update(**kwargs)\n\n import matplotlib.pyplot as plt\n\n if ax is None:\n fig, ax = plt.subplots()\n\n self.line_, = ax.plot(self.recall, self.precision, **line_kwargs)\n info_pos_label = (f\" (Positive label: {self.pos_label})\"\n if self.pos_label is not None else \"\")\n\n xlabel = \"Recall\" + info_pos_label\n ylabel = \"Precision\" + info_pos_label\n ax.set(xlabel=xlabel, ylabel=ylabel)\n\n if \"label\" in line_kwargs:\n ax.legend(loc=\"lower left\")\n\n self.ax_ = ax\n self.figure_ = ax.figure\n return self" + }, + { + "id": "scikit-learn/sklearn.metrics._plot.precision_recall_curve/plot_precision_recall_curve", + "name": "plot_precision_recall_curve", + "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._plot.precision_recall_curve/plot_precision_recall_curve/estimator", + "name": "estimator", + "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator instance", + "default_value": "", + "description": "Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\nin which the last estimator is a classifier." + }, + "type": { + "kind": "NamedType", + "name": "estimator instance" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.precision_recall_curve/plot_precision_recall_curve/X", + "name": "X", + "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Input values." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.precision_recall_curve/plot_precision_recall_curve/y", + "name": "y", + "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Binary target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.precision_recall_curve/plot_precision_recall_curve/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.precision_recall_curve/plot_precision_recall_curve/response_method", + "name": "response_method", + "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.response_method", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'predict_proba', 'decision_function', 'auto'}", + "default_value": "'auto'", + "description": "Specifies whether to use :term:`predict_proba` or\n:term:`decision_function` as the target response. If set to 'auto',\n:term:`predict_proba` is tried first and if it does not exist\n:term:`decision_function` is tried next." + }, + "type": { + "kind": "EnumType", + "values": ["predict_proba", "auto", "decision_function"] + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.precision_recall_curve/plot_precision_recall_curve/name", + "name": "name", + "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.name", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Name for labeling curve. If `None`, the name of the\nestimator is used." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.precision_recall_curve/plot_precision_recall_curve/ax", + "name": "ax", + "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.ax", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "matplotlib axes", + "default_value": "None", + "description": "Axes object to plot on. If `None`, a new figure and axes is created." + }, + "type": { + "kind": "NamedType", + "name": "matplotlib axes" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.precision_recall_curve/plot_precision_recall_curve/pos_label", + "name": "pos_label", + "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.pos_label", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or int", + "default_value": "None", + "description": "The class considered as the positive class when computing the precision\nand recall metrics. By default, `estimators.classes_[1]` is considered\nas the positive class.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.precision_recall_curve/plot_precision_recall_curve/kwargs", + "name": "kwargs", + "qname": "sklearn.metrics._plot.precision_recall_curve.plot_precision_recall_curve.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "", + "description": "Keyword arguments to be passed to matplotlib's `plot`." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Plot Precision Recall Curve for binary classifiers.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Plot Precision Recall Curve for binary classifiers.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator instance\n Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input values.\n\ny : array-like of shape (n_samples,)\n Binary target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nresponse_method : {'predict_proba', 'decision_function', 'auto'}, default='auto'\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the target response. If set to 'auto',\n :term:`predict_proba` is tried first and if it does not exist\n :term:`decision_function` is tried next.\n\nname : str, default=None\n Name for labeling curve. If `None`, the name of the\n estimator is used.\n\nax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is created.\n\npos_label : str or int, default=None\n The class considered as the positive class when computing the precision\n and recall metrics. By default, `estimators.classes_[1]` is considered\n as the positive class.\n\n .. versionadded:: 0.24\n\n**kwargs : dict\n Keyword arguments to be passed to matplotlib's `plot`.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.PrecisionRecallDisplay`\n Object that stores computed values.\n\nSee Also\n--------\nprecision_recall_curve : Compute precision-recall pairs for different\n probability thresholds.\nPrecisionRecallDisplay : Precision Recall visualization.", + "code": "@_deprecate_positional_args\ndef plot_precision_recall_curve(estimator, X, y, *,\n sample_weight=None, response_method=\"auto\",\n name=None, ax=None, pos_label=None, **kwargs):\n \"\"\"Plot Precision Recall Curve for binary classifiers.\n\n Extra keyword arguments will be passed to matplotlib's `plot`.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n estimator : estimator instance\n Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n in which the last estimator is a classifier.\n\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input values.\n\n y : array-like of shape (n_samples,)\n Binary target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n response_method : {'predict_proba', 'decision_function', 'auto'}, \\\n default='auto'\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the target response. If set to 'auto',\n :term:`predict_proba` is tried first and if it does not exist\n :term:`decision_function` is tried next.\n\n name : str, default=None\n Name for labeling curve. If `None`, the name of the\n estimator is used.\n\n ax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is created.\n\n pos_label : str or int, default=None\n The class considered as the positive class when computing the precision\n and recall metrics. By default, `estimators.classes_[1]` is considered\n as the positive class.\n\n .. versionadded:: 0.24\n\n **kwargs : dict\n Keyword arguments to be passed to matplotlib's `plot`.\n\n Returns\n -------\n display : :class:`~sklearn.metrics.PrecisionRecallDisplay`\n Object that stores computed values.\n\n See Also\n --------\n precision_recall_curve : Compute precision-recall pairs for different\n probability thresholds.\n PrecisionRecallDisplay : Precision Recall visualization.\n \"\"\"\n check_matplotlib_support(\"plot_precision_recall_curve\")\n\n y_pred, pos_label = _get_response(\n X, estimator, response_method, pos_label=pos_label)\n\n precision, recall, _ = precision_recall_curve(y, y_pred,\n pos_label=pos_label,\n sample_weight=sample_weight)\n average_precision = average_precision_score(y, y_pred,\n pos_label=pos_label,\n sample_weight=sample_weight)\n\n name = name if name is not None else estimator.__class__.__name__\n\n viz = PrecisionRecallDisplay(\n precision=precision,\n recall=recall,\n average_precision=average_precision,\n estimator_name=name,\n pos_label=pos_label,\n )\n\n return viz.plot(ax=ax, name=name, **kwargs)" + }, + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve/RocCurveDisplay/__init__", + "name": "__init__", + "qname": "sklearn.metrics._plot.roc_curve.RocCurveDisplay.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve/RocCurveDisplay/__init__/self", + "name": "self", + "qname": "sklearn.metrics._plot.roc_curve.RocCurveDisplay.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve/RocCurveDisplay/__init__/fpr", + "name": "fpr", + "qname": "sklearn.metrics._plot.roc_curve.RocCurveDisplay.__init__.fpr", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "False positive rate." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve/RocCurveDisplay/__init__/tpr", + "name": "tpr", + "qname": "sklearn.metrics._plot.roc_curve.RocCurveDisplay.__init__.tpr", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "True positive rate." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve/RocCurveDisplay/__init__/roc_auc", + "name": "roc_auc", + "qname": "sklearn.metrics._plot.roc_curve.RocCurveDisplay.__init__.roc_auc", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Area under ROC curve. If None, the roc_auc score is not shown." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve/RocCurveDisplay/__init__/estimator_name", + "name": "estimator_name", + "qname": "sklearn.metrics._plot.roc_curve.RocCurveDisplay.__init__.estimator_name", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Name of estimator. If None, the estimator name is not shown." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve/RocCurveDisplay/__init__/pos_label", + "name": "pos_label", + "qname": "sklearn.metrics._plot.roc_curve.RocCurveDisplay.__init__.pos_label", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or int", + "default_value": "None", + "description": "The class considered as the positive class when computing the roc auc\nmetrics. By default, `estimators.classes_[1]` is considered\nas the positive class.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "ROC Curve visualization.\n\nIt is recommend to use :func:`~sklearn.metrics.plot_roc_curve` to create a\nvisualizer. All parameters are stored as attributes.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, fpr, tpr,\n roc_auc=None, estimator_name=None, pos_label=None):\n self.estimator_name = estimator_name\n self.fpr = fpr\n self.tpr = tpr\n self.roc_auc = roc_auc\n self.pos_label = pos_label" + }, + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve/RocCurveDisplay/plot", + "name": "plot", + "qname": "sklearn.metrics._plot.roc_curve.RocCurveDisplay.plot", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve/RocCurveDisplay/plot/self", + "name": "self", + "qname": "sklearn.metrics._plot.roc_curve.RocCurveDisplay.plot.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve/RocCurveDisplay/plot/ax", + "name": "ax", + "qname": "sklearn.metrics._plot.roc_curve.RocCurveDisplay.plot.ax", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "matplotlib axes", + "default_value": "None", + "description": "Axes object to plot on. If `None`, a new figure and axes is\ncreated." + }, + "type": { + "kind": "NamedType", + "name": "matplotlib axes" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve/RocCurveDisplay/plot/name", + "name": "name", + "qname": "sklearn.metrics._plot.roc_curve.RocCurveDisplay.plot.name", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Name of ROC Curve for labeling. If `None`, use the name of the\nestimator." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve/RocCurveDisplay/plot/kwargs", + "name": "kwargs", + "qname": "sklearn.metrics._plot.roc_curve.RocCurveDisplay.plot.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Plot visualization\n\nExtra keyword arguments will be passed to matplotlib's ``plot``.", + "docstring": "Plot visualization\n\nExtra keyword arguments will be passed to matplotlib's ``plot``.\n\nParameters\n----------\nax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is\n created.\n\nname : str, default=None\n Name of ROC Curve for labeling. If `None`, use the name of the\n estimator.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.plot.RocCurveDisplay`\n Object that stores computed values.", + "code": " @_deprecate_positional_args\n def plot(self, ax=None, *, name=None, **kwargs):\n \"\"\"Plot visualization\n\n Extra keyword arguments will be passed to matplotlib's ``plot``.\n\n Parameters\n ----------\n ax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is\n created.\n\n name : str, default=None\n Name of ROC Curve for labeling. If `None`, use the name of the\n estimator.\n\n Returns\n -------\n display : :class:`~sklearn.metrics.plot.RocCurveDisplay`\n Object that stores computed values.\n \"\"\"\n check_matplotlib_support('RocCurveDisplay.plot')\n\n name = self.estimator_name if name is None else name\n\n line_kwargs = {}\n if self.roc_auc is not None and name is not None:\n line_kwargs[\"label\"] = f\"{name} (AUC = {self.roc_auc:0.2f})\"\n elif self.roc_auc is not None:\n line_kwargs[\"label\"] = f\"AUC = {self.roc_auc:0.2f}\"\n elif name is not None:\n line_kwargs[\"label\"] = name\n\n line_kwargs.update(**kwargs)\n\n import matplotlib.pyplot as plt\n\n if ax is None:\n fig, ax = plt.subplots()\n\n self.line_, = ax.plot(self.fpr, self.tpr, **line_kwargs)\n info_pos_label = (f\" (Positive label: {self.pos_label})\"\n if self.pos_label is not None else \"\")\n\n xlabel = \"False Positive Rate\" + info_pos_label\n ylabel = \"True Positive Rate\" + info_pos_label\n ax.set(xlabel=xlabel, ylabel=ylabel)\n\n if \"label\" in line_kwargs:\n ax.legend(loc=\"lower right\")\n\n self.ax_ = ax\n self.figure_ = ax.figure\n return self" + }, + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve/plot_roc_curve", + "name": "plot_roc_curve", + "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve/plot_roc_curve/estimator", + "name": "estimator", + "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator instance", + "default_value": "", + "description": "Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\nin which the last estimator is a classifier." + }, + "type": { + "kind": "NamedType", + "name": "estimator instance" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve/plot_roc_curve/X", + "name": "X", + "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Input values." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve/plot_roc_curve/y", + "name": "y", + "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve/plot_roc_curve/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve/plot_roc_curve/drop_intermediate", + "name": "drop_intermediate", + "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.drop_intermediate", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "boolean", + "default_value": "True", + "description": "Whether to drop some suboptimal thresholds which would not appear\non a plotted ROC curve. This is useful in order to create lighter\nROC curves." + }, + "type": { + "kind": "NamedType", + "name": "boolean" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve/plot_roc_curve/response_method", + "name": "response_method", + "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.response_method", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'predict_proba', 'decision_function', 'auto'} default='auto'", + "default_value": "", + "description": "Specifies whether to use :term:`predict_proba` or\n:term:`decision_function` as the target response. If set to 'auto',\n:term:`predict_proba` is tried first and if it does not exist\n:term:`decision_function` is tried next." + }, + "type": { + "kind": "EnumType", + "values": ["predict_proba", "auto", "decision_function"] + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve/plot_roc_curve/name", + "name": "name", + "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.name", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Name of ROC Curve for labeling. If `None`, use the name of the\nestimator." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve/plot_roc_curve/ax", + "name": "ax", + "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.ax", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "matplotlib axes", + "default_value": "None", + "description": "Axes object to plot on. If `None`, a new figure and axes is created." + }, + "type": { + "kind": "NamedType", + "name": "matplotlib axes" + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve/plot_roc_curve/pos_label", + "name": "pos_label", + "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.pos_label", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or int", + "default_value": "None", + "description": "The class considered as the positive class when computing the roc auc\nmetrics. By default, `estimators.classes_[1]` is considered\nas the positive class.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._plot.roc_curve/plot_roc_curve/kwargs", + "name": "kwargs", + "qname": "sklearn.metrics._plot.roc_curve.plot_roc_curve.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Plot Receiver operating characteristic (ROC) curve.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Plot Receiver operating characteristic (ROC) curve.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator instance\n Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input values.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\ndrop_intermediate : boolean, default=True\n Whether to drop some suboptimal thresholds which would not appear\n on a plotted ROC curve. This is useful in order to create lighter\n ROC curves.\n\nresponse_method : {'predict_proba', 'decision_function', 'auto'} default='auto'\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the target response. If set to 'auto',\n :term:`predict_proba` is tried first and if it does not exist\n :term:`decision_function` is tried next.\n\nname : str, default=None\n Name of ROC Curve for labeling. If `None`, use the name of the\n estimator.\n\nax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is created.\n\npos_label : str or int, default=None\n The class considered as the positive class when computing the roc auc\n metrics. By default, `estimators.classes_[1]` is considered\n as the positive class.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.RocCurveDisplay`\n Object that stores computed values.\n\nSee Also\n--------\nroc_curve : Compute Receiver operating characteristic (ROC) curve.\nRocCurveDisplay : ROC Curve visualization.\nroc_auc_score : Compute the area under the ROC curve.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt # doctest: +SKIP\n>>> from sklearn import datasets, metrics, model_selection, svm\n>>> X, y = datasets.make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = model_selection.train_test_split(\n... X, y, random_state=0)\n>>> clf = svm.SVC(random_state=0)\n>>> clf.fit(X_train, y_train)\nSVC(random_state=0)\n>>> metrics.plot_roc_curve(clf, X_test, y_test) # doctest: +SKIP\n>>> plt.show() # doctest: +SKIP", + "code": "@_deprecate_positional_args\ndef plot_roc_curve(estimator, X, y, *, sample_weight=None,\n drop_intermediate=True, response_method=\"auto\",\n name=None, ax=None, pos_label=None, **kwargs):\n \"\"\"Plot Receiver operating characteristic (ROC) curve.\n\n Extra keyword arguments will be passed to matplotlib's `plot`.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n estimator : estimator instance\n Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n in which the last estimator is a classifier.\n\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input values.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n drop_intermediate : boolean, default=True\n Whether to drop some suboptimal thresholds which would not appear\n on a plotted ROC curve. This is useful in order to create lighter\n ROC curves.\n\n response_method : {'predict_proba', 'decision_function', 'auto'} \\\n default='auto'\n Specifies whether to use :term:`predict_proba` or\n :term:`decision_function` as the target response. If set to 'auto',\n :term:`predict_proba` is tried first and if it does not exist\n :term:`decision_function` is tried next.\n\n name : str, default=None\n Name of ROC Curve for labeling. If `None`, use the name of the\n estimator.\n\n ax : matplotlib axes, default=None\n Axes object to plot on. If `None`, a new figure and axes is created.\n\n pos_label : str or int, default=None\n The class considered as the positive class when computing the roc auc\n metrics. By default, `estimators.classes_[1]` is considered\n as the positive class.\n\n .. versionadded:: 0.24\n\n Returns\n -------\n display : :class:`~sklearn.metrics.RocCurveDisplay`\n Object that stores computed values.\n\n See Also\n --------\n roc_curve : Compute Receiver operating characteristic (ROC) curve.\n RocCurveDisplay : ROC Curve visualization.\n roc_auc_score : Compute the area under the ROC curve.\n\n Examples\n --------\n >>> import matplotlib.pyplot as plt # doctest: +SKIP\n >>> from sklearn import datasets, metrics, model_selection, svm\n >>> X, y = datasets.make_classification(random_state=0)\n >>> X_train, X_test, y_train, y_test = model_selection.train_test_split(\n ... X, y, random_state=0)\n >>> clf = svm.SVC(random_state=0)\n >>> clf.fit(X_train, y_train)\n SVC(random_state=0)\n >>> metrics.plot_roc_curve(clf, X_test, y_test) # doctest: +SKIP\n >>> plt.show() # doctest: +SKIP\n \"\"\"\n check_matplotlib_support('plot_roc_curve')\n\n y_pred, pos_label = _get_response(\n X, estimator, response_method, pos_label=pos_label)\n\n fpr, tpr, _ = roc_curve(y, y_pred, pos_label=pos_label,\n sample_weight=sample_weight,\n drop_intermediate=drop_intermediate)\n roc_auc = auc(fpr, tpr)\n\n name = estimator.__class__.__name__ if name is None else name\n\n viz = RocCurveDisplay(\n fpr=fpr,\n tpr=tpr,\n roc_auc=roc_auc,\n estimator_name=name,\n pos_label=pos_label\n )\n\n return viz.plot(ax=ax, name=name, **kwargs)" + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_binary_clf_curve", + "name": "_binary_clf_curve", + "qname": "sklearn.metrics._ranking._binary_clf_curve", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._ranking/_binary_clf_curve/y_true", + "name": "y_true", + "qname": "sklearn.metrics._ranking._binary_clf_curve.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "True targets of binary classification." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_binary_clf_curve/y_score", + "name": "y_score", + "qname": "sklearn.metrics._ranking._binary_clf_curve.y_score", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Estimated probabilities or output of a decision function." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_binary_clf_curve/pos_label", + "name": "pos_label", + "qname": "sklearn.metrics._ranking._binary_clf_curve.pos_label", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or str", + "default_value": "None", + "description": "The label of the positive class." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "str" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_binary_clf_curve/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._ranking._binary_clf_curve.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Calculate true and false positives per binary classification threshold.", + "docstring": "Calculate true and false positives per binary classification threshold.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples,)\n True targets of binary classification.\n\ny_score : ndarray of shape (n_samples,)\n Estimated probabilities or output of a decision function.\n\npos_label : int or str, default=None\n The label of the positive class.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nfps : ndarray of shape (n_thresholds,)\n A count of false positives, at index i being the number of negative\n samples assigned a score >= thresholds[i]. The total number of\n negative samples is equal to fps[-1] (thus true negatives are given by\n fps[-1] - fps).\n\ntps : ndarray of shape (n_thresholds,)\n An increasing count of true positives, at index i being the number\n of positive samples assigned a score >= thresholds[i]. The total\n number of positive samples is equal to tps[-1] (thus false negatives\n are given by tps[-1] - tps).\n\nthresholds : ndarray of shape (n_thresholds,)\n Decreasing score values.", + "code": "def _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None):\n \"\"\"Calculate true and false positives per binary classification threshold.\n\n Parameters\n ----------\n y_true : ndarray of shape (n_samples,)\n True targets of binary classification.\n\n y_score : ndarray of shape (n_samples,)\n Estimated probabilities or output of a decision function.\n\n pos_label : int or str, default=None\n The label of the positive class.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n fps : ndarray of shape (n_thresholds,)\n A count of false positives, at index i being the number of negative\n samples assigned a score >= thresholds[i]. The total number of\n negative samples is equal to fps[-1] (thus true negatives are given by\n fps[-1] - fps).\n\n tps : ndarray of shape (n_thresholds,)\n An increasing count of true positives, at index i being the number\n of positive samples assigned a score >= thresholds[i]. The total\n number of positive samples is equal to tps[-1] (thus false negatives\n are given by tps[-1] - tps).\n\n thresholds : ndarray of shape (n_thresholds,)\n Decreasing score values.\n \"\"\"\n # Check to make sure y_true is valid\n y_type = type_of_target(y_true)\n if not (y_type == \"binary\" or\n (y_type == \"multiclass\" and pos_label is not None)):\n raise ValueError(\"{0} format is not supported\".format(y_type))\n\n check_consistent_length(y_true, y_score, sample_weight)\n y_true = column_or_1d(y_true)\n y_score = column_or_1d(y_score)\n assert_all_finite(y_true)\n assert_all_finite(y_score)\n\n if sample_weight is not None:\n sample_weight = column_or_1d(sample_weight)\n\n pos_label = _check_pos_label_consistency(pos_label, y_true)\n\n # make y_true a boolean vector\n y_true = (y_true == pos_label)\n\n # sort scores and corresponding truth values\n desc_score_indices = np.argsort(y_score, kind=\"mergesort\")[::-1]\n y_score = y_score[desc_score_indices]\n y_true = y_true[desc_score_indices]\n if sample_weight is not None:\n weight = sample_weight[desc_score_indices]\n else:\n weight = 1.\n\n # y_score typically has many tied values. Here we extract\n # the indices associated with the distinct values. We also\n # concatenate a value for the end of the curve.\n distinct_value_indices = np.where(np.diff(y_score))[0]\n threshold_idxs = np.r_[distinct_value_indices, y_true.size - 1]\n\n # accumulate the true positives with decreasing threshold\n tps = stable_cumsum(y_true * weight)[threshold_idxs]\n if sample_weight is not None:\n # express fps as a cumsum to ensure fps is increasing even in\n # the presence of floating point errors\n fps = stable_cumsum((1 - y_true) * weight)[threshold_idxs]\n else:\n fps = 1 + threshold_idxs - tps\n return fps, tps, y_score[threshold_idxs]" + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_binary_roc_auc_score", + "name": "_binary_roc_auc_score", + "qname": "sklearn.metrics._ranking._binary_roc_auc_score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._ranking/_binary_roc_auc_score/y_true", + "name": "y_true", + "qname": "sklearn.metrics._ranking._binary_roc_auc_score.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_binary_roc_auc_score/y_score", + "name": "y_score", + "qname": "sklearn.metrics._ranking._binary_roc_auc_score.y_score", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_binary_roc_auc_score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._ranking._binary_roc_auc_score.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_binary_roc_auc_score/max_fpr", + "name": "max_fpr", + "qname": "sklearn.metrics._ranking._binary_roc_auc_score.max_fpr", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Binary roc auc score.", + "docstring": "Binary roc auc score.", + "code": "def _binary_roc_auc_score(y_true, y_score, sample_weight=None, max_fpr=None):\n \"\"\"Binary roc auc score.\"\"\"\n if len(np.unique(y_true)) != 2:\n raise ValueError(\"Only one class present in y_true. ROC AUC score \"\n \"is not defined in that case.\")\n\n fpr, tpr, _ = roc_curve(y_true, y_score,\n sample_weight=sample_weight)\n if max_fpr is None or max_fpr == 1:\n return auc(fpr, tpr)\n if max_fpr <= 0 or max_fpr > 1:\n raise ValueError(\"Expected max_fpr in range (0, 1], got: %r\" % max_fpr)\n\n # Add a single point at max_fpr by linear interpolation\n stop = np.searchsorted(fpr, max_fpr, 'right')\n x_interp = [fpr[stop - 1], fpr[stop]]\n y_interp = [tpr[stop - 1], tpr[stop]]\n tpr = np.append(tpr[:stop], np.interp(max_fpr, x_interp, y_interp))\n fpr = np.append(fpr[:stop], max_fpr)\n partial_auc = auc(fpr, tpr)\n\n # McClish correction: standardize result to be 0.5 if non-discriminant\n # and 1 if maximal\n min_area = 0.5 * max_fpr**2\n max_area = max_fpr\n return 0.5 * (1 + (partial_auc - min_area) / (max_area - min_area))" + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_check_dcg_target_type", + "name": "_check_dcg_target_type", + "qname": "sklearn.metrics._ranking._check_dcg_target_type", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._ranking/_check_dcg_target_type/y_true", + "name": "y_true", + "qname": "sklearn.metrics._ranking._check_dcg_target_type.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _check_dcg_target_type(y_true):\n y_type = type_of_target(y_true)\n supported_fmt = (\"multilabel-indicator\", \"continuous-multioutput\",\n \"multiclass-multioutput\")\n if y_type not in supported_fmt:\n raise ValueError(\n \"Only {} formats are supported. Got {} instead\".format(\n supported_fmt, y_type))" + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_dcg_sample_scores", + "name": "_dcg_sample_scores", + "qname": "sklearn.metrics._ranking._dcg_sample_scores", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._ranking/_dcg_sample_scores/y_true", + "name": "y_true", + "qname": "sklearn.metrics._ranking._dcg_sample_scores.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_labels)", + "default_value": "", + "description": "True targets of multilabel classification, or true scores of entities\nto be ranked." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_labels)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_dcg_sample_scores/y_score", + "name": "y_score", + "qname": "sklearn.metrics._ranking._dcg_sample_scores.y_score", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_labels)", + "default_value": "", + "description": "Target scores, can either be probability estimates, confidence values,\nor non-thresholded measure of decisions (as returned by\n\"decision_function\" on some classifiers)." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_labels)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_dcg_sample_scores/k", + "name": "k", + "qname": "sklearn.metrics._ranking._dcg_sample_scores.k", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Only consider the highest k scores in the ranking. If None, use all\noutputs." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_dcg_sample_scores/log_base", + "name": "log_base", + "qname": "sklearn.metrics._ranking._dcg_sample_scores.log_base", + "default_value": "2", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "2", + "description": "Base of the logarithm used for the discount. A low value means a\nsharper discount (top results are more important)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_dcg_sample_scores/ignore_ties", + "name": "ignore_ties", + "qname": "sklearn.metrics._ranking._dcg_sample_scores.ignore_ties", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Assume that there are no ties in y_score (which is likely to be the\ncase if y_score is continuous) for efficiency gains." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute Discounted Cumulative Gain.\n\nSum the true scores ranked in the order induced by the predicted scores,\nafter applying a logarithmic discount.\n\nThis ranking metric yields a high value if true labels are ranked high by\n``y_score``.", + "docstring": "Compute Discounted Cumulative Gain.\n\nSum the true scores ranked in the order induced by the predicted scores,\nafter applying a logarithmic discount.\n\nThis ranking metric yields a high value if true labels are ranked high by\n``y_score``.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples, n_labels)\n True targets of multilabel classification, or true scores of entities\n to be ranked.\n\ny_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates, confidence values,\n or non-thresholded measure of decisions (as returned by\n \"decision_function\" on some classifiers).\n\nk : int, default=None\n Only consider the highest k scores in the ranking. If None, use all\n outputs.\n\nlog_base : float, default=2\n Base of the logarithm used for the discount. A low value means a\n sharper discount (top results are more important).\n\nignore_ties : bool, default=False\n Assume that there are no ties in y_score (which is likely to be the\n case if y_score is continuous) for efficiency gains.\n\nReturns\n-------\ndiscounted_cumulative_gain : ndarray of shape (n_samples,)\n The DCG score for each sample.\n\nSee Also\n--------\nndcg_score : The Discounted Cumulative Gain divided by the Ideal Discounted\n Cumulative Gain (the DCG obtained for a perfect ranking), in order to\n have a score between 0 and 1.", + "code": "def _dcg_sample_scores(y_true, y_score, k=None,\n log_base=2, ignore_ties=False):\n \"\"\"Compute Discounted Cumulative Gain.\n\n Sum the true scores ranked in the order induced by the predicted scores,\n after applying a logarithmic discount.\n\n This ranking metric yields a high value if true labels are ranked high by\n ``y_score``.\n\n Parameters\n ----------\n y_true : ndarray of shape (n_samples, n_labels)\n True targets of multilabel classification, or true scores of entities\n to be ranked.\n\n y_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates, confidence values,\n or non-thresholded measure of decisions (as returned by\n \"decision_function\" on some classifiers).\n\n k : int, default=None\n Only consider the highest k scores in the ranking. If None, use all\n outputs.\n\n log_base : float, default=2\n Base of the logarithm used for the discount. A low value means a\n sharper discount (top results are more important).\n\n ignore_ties : bool, default=False\n Assume that there are no ties in y_score (which is likely to be the\n case if y_score is continuous) for efficiency gains.\n\n Returns\n -------\n discounted_cumulative_gain : ndarray of shape (n_samples,)\n The DCG score for each sample.\n\n See Also\n --------\n ndcg_score : The Discounted Cumulative Gain divided by the Ideal Discounted\n Cumulative Gain (the DCG obtained for a perfect ranking), in order to\n have a score between 0 and 1.\n \"\"\"\n discount = 1 / (np.log(np.arange(y_true.shape[1]) + 2) / np.log(log_base))\n if k is not None:\n discount[k:] = 0\n if ignore_ties:\n ranking = np.argsort(y_score)[:, ::-1]\n ranked = y_true[np.arange(ranking.shape[0])[:, np.newaxis], ranking]\n cumulative_gains = discount.dot(ranked.T)\n else:\n discount_cumsum = np.cumsum(discount)\n cumulative_gains = [_tie_averaged_dcg(y_t, y_s, discount_cumsum)\n for y_t, y_s in zip(y_true, y_score)]\n cumulative_gains = np.asarray(cumulative_gains)\n return cumulative_gains" + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_multiclass_roc_auc_score", + "name": "_multiclass_roc_auc_score", + "qname": "sklearn.metrics._ranking._multiclass_roc_auc_score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._ranking/_multiclass_roc_auc_score/y_true", + "name": "y_true", + "qname": "sklearn.metrics._ranking._multiclass_roc_auc_score.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "True multiclass labels." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_multiclass_roc_auc_score/y_score", + "name": "y_score", + "qname": "sklearn.metrics._ranking._multiclass_roc_auc_score.y_score", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_classes)", + "default_value": "", + "description": "Target scores corresponding to probability estimates of a sample\nbelonging to a particular class" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_classes)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_multiclass_roc_auc_score/labels", + "name": "labels", + "qname": "sklearn.metrics._ranking._multiclass_roc_auc_score.labels", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_classes,) or None", + "default_value": "", + "description": "List of labels to index ``y_score`` used for multiclass. If ``None``,\nthe lexical order of ``y_true`` is used to index ``y_score``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_classes,)" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_multiclass_roc_auc_score/multi_class", + "name": "multi_class", + "qname": "sklearn.metrics._ranking._multiclass_roc_auc_score.multi_class", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'ovr', 'ovo'}", + "default_value": "", + "description": "Determines the type of multiclass configuration to use.\n``'ovr'``:\n Calculate metrics for the multiclass case using the one-vs-rest\n approach.\n``'ovo'``:\n Calculate metrics for the multiclass case using the one-vs-one\n approach." + }, + "type": { + "kind": "EnumType", + "values": ["ovo", "ovr"] + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_multiclass_roc_auc_score/average", + "name": "average", + "qname": "sklearn.metrics._ranking._multiclass_roc_auc_score.average", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'macro', 'weighted'}", + "default_value": "", + "description": "Determines the type of averaging performed on the pairwise binary\nmetric scores\n``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account. Classes\n are assumed to be uniformly distributed.\n``'weighted'``:\n Calculate metrics for each label, taking into account the\n prevalence of the classes." + }, + "type": { + "kind": "EnumType", + "values": ["macro", "weighted"] + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_multiclass_roc_auc_score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._ranking._multiclass_roc_auc_score.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or None", + "default_value": "", + "description": "Sample weights." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Multiclass roc auc score.", + "docstring": "Multiclass roc auc score.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n True multiclass labels.\n\ny_score : array-like of shape (n_samples, n_classes)\n Target scores corresponding to probability estimates of a sample\n belonging to a particular class\n\nlabels : array-like of shape (n_classes,) or None\n List of labels to index ``y_score`` used for multiclass. If ``None``,\n the lexical order of ``y_true`` is used to index ``y_score``.\n\nmulti_class : {'ovr', 'ovo'}\n Determines the type of multiclass configuration to use.\n ``'ovr'``:\n Calculate metrics for the multiclass case using the one-vs-rest\n approach.\n ``'ovo'``:\n Calculate metrics for the multiclass case using the one-vs-one\n approach.\n\naverage : {'macro', 'weighted'}\n Determines the type of averaging performed on the pairwise binary\n metric scores\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account. Classes\n are assumed to be uniformly distributed.\n ``'weighted'``:\n Calculate metrics for each label, taking into account the\n prevalence of the classes.\n\nsample_weight : array-like of shape (n_samples,) or None\n Sample weights.", + "code": "def _multiclass_roc_auc_score(y_true, y_score, labels,\n multi_class, average, sample_weight):\n \"\"\"Multiclass roc auc score.\n\n Parameters\n ----------\n y_true : array-like of shape (n_samples,)\n True multiclass labels.\n\n y_score : array-like of shape (n_samples, n_classes)\n Target scores corresponding to probability estimates of a sample\n belonging to a particular class\n\n labels : array-like of shape (n_classes,) or None\n List of labels to index ``y_score`` used for multiclass. If ``None``,\n the lexical order of ``y_true`` is used to index ``y_score``.\n\n multi_class : {'ovr', 'ovo'}\n Determines the type of multiclass configuration to use.\n ``'ovr'``:\n Calculate metrics for the multiclass case using the one-vs-rest\n approach.\n ``'ovo'``:\n Calculate metrics for the multiclass case using the one-vs-one\n approach.\n\n average : {'macro', 'weighted'}\n Determines the type of averaging performed on the pairwise binary\n metric scores\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account. Classes\n are assumed to be uniformly distributed.\n ``'weighted'``:\n Calculate metrics for each label, taking into account the\n prevalence of the classes.\n\n sample_weight : array-like of shape (n_samples,) or None\n Sample weights.\n\n \"\"\"\n # validation of the input y_score\n if not np.allclose(1, y_score.sum(axis=1)):\n raise ValueError(\n \"Target scores need to be probabilities for multiclass \"\n \"roc_auc, i.e. they should sum up to 1.0 over classes\")\n\n # validation for multiclass parameter specifications\n average_options = (\"macro\", \"weighted\")\n if average not in average_options:\n raise ValueError(\"average must be one of {0} for \"\n \"multiclass problems\".format(average_options))\n\n multiclass_options = (\"ovo\", \"ovr\")\n if multi_class not in multiclass_options:\n raise ValueError(\"multi_class='{0}' is not supported \"\n \"for multiclass ROC AUC, multi_class must be \"\n \"in {1}\".format(\n multi_class, multiclass_options))\n\n if labels is not None:\n labels = column_or_1d(labels)\n classes = _unique(labels)\n if len(classes) != len(labels):\n raise ValueError(\"Parameter 'labels' must be unique\")\n if not np.array_equal(classes, labels):\n raise ValueError(\"Parameter 'labels' must be ordered\")\n if len(classes) != y_score.shape[1]:\n raise ValueError(\n \"Number of given labels, {0}, not equal to the number \"\n \"of columns in 'y_score', {1}\".format(\n len(classes), y_score.shape[1]))\n if len(np.setdiff1d(y_true, classes)):\n raise ValueError(\n \"'y_true' contains labels not in parameter 'labels'\")\n else:\n classes = _unique(y_true)\n if len(classes) != y_score.shape[1]:\n raise ValueError(\n \"Number of classes in y_true not equal to the number of \"\n \"columns in 'y_score'\")\n\n if multi_class == \"ovo\":\n if sample_weight is not None:\n raise ValueError(\"sample_weight is not supported \"\n \"for multiclass one-vs-one ROC AUC, \"\n \"'sample_weight' must be None in this case.\")\n y_true_encoded = _encode(y_true, uniques=classes)\n # Hand & Till (2001) implementation (ovo)\n return _average_multiclass_ovo_score(_binary_roc_auc_score,\n y_true_encoded,\n y_score, average=average)\n else:\n # ovr is same as multi-label\n y_true_multilabel = label_binarize(y_true, classes=classes)\n return _average_binary_score(_binary_roc_auc_score, y_true_multilabel,\n y_score, average,\n sample_weight=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_ndcg_sample_scores", + "name": "_ndcg_sample_scores", + "qname": "sklearn.metrics._ranking._ndcg_sample_scores", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._ranking/_ndcg_sample_scores/y_true", + "name": "y_true", + "qname": "sklearn.metrics._ranking._ndcg_sample_scores.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_labels)", + "default_value": "", + "description": "True targets of multilabel classification, or true scores of entities\nto be ranked." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_labels)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_ndcg_sample_scores/y_score", + "name": "y_score", + "qname": "sklearn.metrics._ranking._ndcg_sample_scores.y_score", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_labels)", + "default_value": "", + "description": "Target scores, can either be probability estimates, confidence values,\nor non-thresholded measure of decisions (as returned by\n\"decision_function\" on some classifiers)." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_labels)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_ndcg_sample_scores/k", + "name": "k", + "qname": "sklearn.metrics._ranking._ndcg_sample_scores.k", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Only consider the highest k scores in the ranking. If None, use all\noutputs." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_ndcg_sample_scores/ignore_ties", + "name": "ignore_ties", + "qname": "sklearn.metrics._ranking._ndcg_sample_scores.ignore_ties", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Assume that there are no ties in y_score (which is likely to be the\ncase if y_score is continuous) for efficiency gains." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute Normalized Discounted Cumulative Gain.\n\nSum the true scores ranked in the order induced by the predicted scores,\nafter applying a logarithmic discount. Then divide by the best possible\nscore (Ideal DCG, obtained for a perfect ranking) to obtain a score between\n0 and 1.\n\nThis ranking metric yields a high value if true labels are ranked high by\n``y_score``.", + "docstring": "Compute Normalized Discounted Cumulative Gain.\n\nSum the true scores ranked in the order induced by the predicted scores,\nafter applying a logarithmic discount. Then divide by the best possible\nscore (Ideal DCG, obtained for a perfect ranking) to obtain a score between\n0 and 1.\n\nThis ranking metric yields a high value if true labels are ranked high by\n``y_score``.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples, n_labels)\n True targets of multilabel classification, or true scores of entities\n to be ranked.\n\ny_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates, confidence values,\n or non-thresholded measure of decisions (as returned by\n \"decision_function\" on some classifiers).\n\nk : int, default=None\n Only consider the highest k scores in the ranking. If None, use all\n outputs.\n\nignore_ties : bool, default=False\n Assume that there are no ties in y_score (which is likely to be the\n case if y_score is continuous) for efficiency gains.\n\nReturns\n-------\nnormalized_discounted_cumulative_gain : ndarray of shape (n_samples,)\n The NDCG score for each sample (float in [0., 1.]).\n\nSee Also\n--------\ndcg_score : Discounted Cumulative Gain (not normalized).", + "code": "def _ndcg_sample_scores(y_true, y_score, k=None, ignore_ties=False):\n \"\"\"Compute Normalized Discounted Cumulative Gain.\n\n Sum the true scores ranked in the order induced by the predicted scores,\n after applying a logarithmic discount. Then divide by the best possible\n score (Ideal DCG, obtained for a perfect ranking) to obtain a score between\n 0 and 1.\n\n This ranking metric yields a high value if true labels are ranked high by\n ``y_score``.\n\n Parameters\n ----------\n y_true : ndarray of shape (n_samples, n_labels)\n True targets of multilabel classification, or true scores of entities\n to be ranked.\n\n y_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates, confidence values,\n or non-thresholded measure of decisions (as returned by\n \"decision_function\" on some classifiers).\n\n k : int, default=None\n Only consider the highest k scores in the ranking. If None, use all\n outputs.\n\n ignore_ties : bool, default=False\n Assume that there are no ties in y_score (which is likely to be the\n case if y_score is continuous) for efficiency gains.\n\n Returns\n -------\n normalized_discounted_cumulative_gain : ndarray of shape (n_samples,)\n The NDCG score for each sample (float in [0., 1.]).\n\n See Also\n --------\n dcg_score : Discounted Cumulative Gain (not normalized).\n\n \"\"\"\n gain = _dcg_sample_scores(y_true, y_score, k, ignore_ties=ignore_ties)\n # Here we use the order induced by y_true so we can ignore ties since\n # the gain associated to tied indices is the same (permuting ties doesn't\n # change the value of the re-ordered y_true)\n normalizing_gain = _dcg_sample_scores(y_true, y_true, k, ignore_ties=True)\n all_irrelevant = normalizing_gain == 0\n gain[all_irrelevant] = 0\n gain[~all_irrelevant] /= normalizing_gain[~all_irrelevant]\n return gain" + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_tie_averaged_dcg", + "name": "_tie_averaged_dcg", + "qname": "sklearn.metrics._ranking._tie_averaged_dcg", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._ranking/_tie_averaged_dcg/y_true", + "name": "y_true", + "qname": "sklearn.metrics._ranking._tie_averaged_dcg.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "The true relevance scores." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_tie_averaged_dcg/y_score", + "name": "y_score", + "qname": "sklearn.metrics._ranking._tie_averaged_dcg.y_score", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "Predicted scores." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/_tie_averaged_dcg/discount_cumsum", + "name": "discount_cumsum", + "qname": "sklearn.metrics._ranking._tie_averaged_dcg.discount_cumsum", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "Precomputed cumulative sum of the discounts." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute DCG by averaging over possible permutations of ties.\n\nThe gain (`y_true`) of an index falling inside a tied group (in the order\ninduced by `y_score`) is replaced by the average gain within this group.\nThe discounted gain for a tied group is then the average `y_true` within\nthis group times the sum of discounts of the corresponding ranks.\n\nThis amounts to averaging scores for all possible orderings of the tied\ngroups.\n\n(note in the case of dcg@k the discount is 0 after index k)", + "docstring": "Compute DCG by averaging over possible permutations of ties.\n\nThe gain (`y_true`) of an index falling inside a tied group (in the order\ninduced by `y_score`) is replaced by the average gain within this group.\nThe discounted gain for a tied group is then the average `y_true` within\nthis group times the sum of discounts of the corresponding ranks.\n\nThis amounts to averaging scores for all possible orderings of the tied\ngroups.\n\n(note in the case of dcg@k the discount is 0 after index k)\n\nParameters\n----------\ny_true : ndarray\n The true relevance scores.\n\ny_score : ndarray\n Predicted scores.\n\ndiscount_cumsum : ndarray\n Precomputed cumulative sum of the discounts.\n\nReturns\n-------\ndiscounted_cumulative_gain : float\n The discounted cumulative gain.\n\nReferences\n----------\nMcSherry, F., & Najork, M. (2008, March). Computing information retrieval\nperformance measures efficiently in the presence of tied scores. In\nEuropean conference on information retrieval (pp. 414-421). Springer,\nBerlin, Heidelberg.", + "code": "def _tie_averaged_dcg(y_true, y_score, discount_cumsum):\n \"\"\"\n Compute DCG by averaging over possible permutations of ties.\n\n The gain (`y_true`) of an index falling inside a tied group (in the order\n induced by `y_score`) is replaced by the average gain within this group.\n The discounted gain for a tied group is then the average `y_true` within\n this group times the sum of discounts of the corresponding ranks.\n\n This amounts to averaging scores for all possible orderings of the tied\n groups.\n\n (note in the case of dcg@k the discount is 0 after index k)\n\n Parameters\n ----------\n y_true : ndarray\n The true relevance scores.\n\n y_score : ndarray\n Predicted scores.\n\n discount_cumsum : ndarray\n Precomputed cumulative sum of the discounts.\n\n Returns\n -------\n discounted_cumulative_gain : float\n The discounted cumulative gain.\n\n References\n ----------\n McSherry, F., & Najork, M. (2008, March). Computing information retrieval\n performance measures efficiently in the presence of tied scores. In\n European conference on information retrieval (pp. 414-421). Springer,\n Berlin, Heidelberg.\n \"\"\"\n _, inv, counts = np.unique(\n - y_score, return_inverse=True, return_counts=True)\n ranked = np.zeros(len(counts))\n np.add.at(ranked, inv, y_true)\n ranked /= counts\n groups = np.cumsum(counts) - 1\n discount_sums = np.empty(len(counts))\n discount_sums[0] = discount_cumsum[groups[0]]\n discount_sums[1:] = np.diff(discount_cumsum[groups])\n return (ranked * discount_sums).sum()" + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/auc", + "name": "auc", + "qname": "sklearn.metrics._ranking.auc", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._ranking/auc/x", + "name": "x", + "qname": "sklearn.metrics._ranking.auc.x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n,)", + "default_value": "", + "description": "x coordinates. These must be either monotonic increasing or monotonic\ndecreasing." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/auc/y", + "name": "y", + "qname": "sklearn.metrics._ranking.auc.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape, (n,)", + "default_value": "", + "description": "y coordinates." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray of shape" + }, + { + "kind": "NamedType", + "name": "(n,)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute Area Under the Curve (AUC) using the trapezoidal rule.\n\nThis is a general function, given points on a curve. For computing the\narea under the ROC-curve, see :func:`roc_auc_score`. For an alternative\nway to summarize a precision-recall curve, see\n:func:`average_precision_score`.", + "docstring": "Compute Area Under the Curve (AUC) using the trapezoidal rule.\n\nThis is a general function, given points on a curve. For computing the\narea under the ROC-curve, see :func:`roc_auc_score`. For an alternative\nway to summarize a precision-recall curve, see\n:func:`average_precision_score`.\n\nParameters\n----------\nx : ndarray of shape (n,)\n x coordinates. These must be either monotonic increasing or monotonic\n decreasing.\ny : ndarray of shape, (n,)\n y coordinates.\n\nReturns\n-------\nauc : float\n\nSee Also\n--------\nroc_auc_score : Compute the area under the ROC curve.\naverage_precision_score : Compute average precision from prediction scores.\nprecision_recall_curve : Compute precision-recall pairs for different\n probability thresholds.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import metrics\n>>> y = np.array([1, 1, 2, 2])\n>>> pred = np.array([0.1, 0.4, 0.35, 0.8])\n>>> fpr, tpr, thresholds = metrics.roc_curve(y, pred, pos_label=2)\n>>> metrics.auc(fpr, tpr)\n0.75", + "code": "def auc(x, y):\n \"\"\"Compute Area Under the Curve (AUC) using the trapezoidal rule.\n\n This is a general function, given points on a curve. For computing the\n area under the ROC-curve, see :func:`roc_auc_score`. For an alternative\n way to summarize a precision-recall curve, see\n :func:`average_precision_score`.\n\n Parameters\n ----------\n x : ndarray of shape (n,)\n x coordinates. These must be either monotonic increasing or monotonic\n decreasing.\n y : ndarray of shape, (n,)\n y coordinates.\n\n Returns\n -------\n auc : float\n\n See Also\n --------\n roc_auc_score : Compute the area under the ROC curve.\n average_precision_score : Compute average precision from prediction scores.\n precision_recall_curve : Compute precision-recall pairs for different\n probability thresholds.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn import metrics\n >>> y = np.array([1, 1, 2, 2])\n >>> pred = np.array([0.1, 0.4, 0.35, 0.8])\n >>> fpr, tpr, thresholds = metrics.roc_curve(y, pred, pos_label=2)\n >>> metrics.auc(fpr, tpr)\n 0.75\n \"\"\"\n check_consistent_length(x, y)\n x = column_or_1d(x)\n y = column_or_1d(y)\n\n if x.shape[0] < 2:\n raise ValueError('At least 2 points are needed to compute'\n ' area under curve, but x.shape = %s' % x.shape)\n\n direction = 1\n dx = np.diff(x)\n if np.any(dx < 0):\n if np.all(dx <= 0):\n direction = -1\n else:\n raise ValueError(\"x is neither increasing nor decreasing \"\n \": {}.\".format(x))\n\n area = direction * np.trapz(y, x)\n if isinstance(area, np.memmap):\n # Reductions such as .sum used internally in np.trapz do not return a\n # scalar by default for numpy.memmap instances contrary to\n # regular numpy.ndarray instances.\n area = area.dtype.type(area)\n return area" + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/average_precision_score", + "name": "average_precision_score", + "qname": "sklearn.metrics._ranking.average_precision_score", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._ranking/average_precision_score/y_true", + "name": "y_true", + "qname": "sklearn.metrics._ranking.average_precision_score.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,) or (n_samples, n_classes)", + "default_value": "", + "description": "True binary labels or binary label indicators." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,) or (n_samples, n_classes)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/average_precision_score/y_score", + "name": "y_score", + "qname": "sklearn.metrics._ranking.average_precision_score.y_score", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,) or (n_samples, n_classes)", + "default_value": "", + "description": "Target scores, can either be probability estimates of the positive\nclass, confidence values, or non-thresholded measure of decisions\n(as returned by :term:`decision_function` on some classifiers)." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,) or (n_samples, n_classes)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/average_precision_score/average", + "name": "average", + "qname": "sklearn.metrics._ranking.average_precision_score.average", + "default_value": "'macro'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'micro', 'samples', 'weighted', 'macro'} or None", + "default_value": "'macro'", + "description": "If ``None``, the scores for each class are returned. Otherwise,\nthis determines the type of averaging performed on the data:\n\n``'micro'``:\n Calculate metrics globally by considering each element of the label\n indicator matrix as a label.\n``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n``'weighted'``:\n Calculate metrics for each label, and find their average, weighted\n by support (the number of true instances for each label).\n``'samples'``:\n Calculate metrics for each instance, and find their average.\n\nWill be ignored when ``y_true`` is binary." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["samples", "macro", "micro", "weighted"] + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/average_precision_score/pos_label", + "name": "pos_label", + "qname": "sklearn.metrics._ranking.average_precision_score.pos_label", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or str", + "default_value": "1", + "description": "The label of the positive class. Only applied to binary ``y_true``.\nFor multilabel-indicator ``y_true``, ``pos_label`` is fixed to 1." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "str" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/average_precision_score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._ranking.average_precision_score.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute average precision (AP) from prediction scores.\n\nAP summarizes a precision-recall curve as the weighted mean of precisions\nachieved at each threshold, with the increase in recall from the previous\nthreshold used as the weight:\n\n.. math::\n \\text{AP} = \\sum_n (R_n - R_{n-1}) P_n\n\nwhere :math:`P_n` and :math:`R_n` are the precision and recall at the nth\nthreshold [1]_. This implementation is not interpolated and is different\nfrom computing the area under the precision-recall curve with the\ntrapezoidal rule, which uses linear interpolation and can be too\noptimistic.\n\nNote: this implementation is restricted to the binary classification task\nor multilabel classification task.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute average precision (AP) from prediction scores.\n\nAP summarizes a precision-recall curve as the weighted mean of precisions\nachieved at each threshold, with the increase in recall from the previous\nthreshold used as the weight:\n\n.. math::\n \\text{AP} = \\sum_n (R_n - R_{n-1}) P_n\n\nwhere :math:`P_n` and :math:`R_n` are the precision and recall at the nth\nthreshold [1]_. This implementation is not interpolated and is different\nfrom computing the area under the precision-recall curve with the\ntrapezoidal rule, which uses linear interpolation and can be too\noptimistic.\n\nNote: this implementation is restricted to the binary classification task\nor multilabel classification task.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples,) or (n_samples, n_classes)\n True binary labels or binary label indicators.\n\ny_score : ndarray of shape (n_samples,) or (n_samples, n_classes)\n Target scores, can either be probability estimates of the positive\n class, confidence values, or non-thresholded measure of decisions\n (as returned by :term:`decision_function` on some classifiers).\n\naverage : {'micro', 'samples', 'weighted', 'macro'} or None, default='macro'\n If ``None``, the scores for each class are returned. Otherwise,\n this determines the type of averaging performed on the data:\n\n ``'micro'``:\n Calculate metrics globally by considering each element of the label\n indicator matrix as a label.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average, weighted\n by support (the number of true instances for each label).\n ``'samples'``:\n Calculate metrics for each instance, and find their average.\n\n Will be ignored when ``y_true`` is binary.\n\npos_label : int or str, default=1\n The label of the positive class. Only applied to binary ``y_true``.\n For multilabel-indicator ``y_true``, ``pos_label`` is fixed to 1.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\naverage_precision : float\n\nSee Also\n--------\nroc_auc_score : Compute the area under the ROC curve.\nprecision_recall_curve : Compute precision-recall pairs for different\n probability thresholds.\n\nNotes\n-----\n.. versionchanged:: 0.19\n Instead of linearly interpolating between operating points, precisions\n are weighted by the change in recall since the last operating point.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Average precision\n `_\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import average_precision_score\n>>> y_true = np.array([0, 0, 1, 1])\n>>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])\n>>> average_precision_score(y_true, y_scores)\n0.83...", + "code": "@_deprecate_positional_args\ndef average_precision_score(y_true, y_score, *, average=\"macro\", pos_label=1,\n sample_weight=None):\n \"\"\"Compute average precision (AP) from prediction scores.\n\n AP summarizes a precision-recall curve as the weighted mean of precisions\n achieved at each threshold, with the increase in recall from the previous\n threshold used as the weight:\n\n .. math::\n \\\\text{AP} = \\\\sum_n (R_n - R_{n-1}) P_n\n\n where :math:`P_n` and :math:`R_n` are the precision and recall at the nth\n threshold [1]_. This implementation is not interpolated and is different\n from computing the area under the precision-recall curve with the\n trapezoidal rule, which uses linear interpolation and can be too\n optimistic.\n\n Note: this implementation is restricted to the binary classification task\n or multilabel classification task.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : ndarray of shape (n_samples,) or (n_samples, n_classes)\n True binary labels or binary label indicators.\n\n y_score : ndarray of shape (n_samples,) or (n_samples, n_classes)\n Target scores, can either be probability estimates of the positive\n class, confidence values, or non-thresholded measure of decisions\n (as returned by :term:`decision_function` on some classifiers).\n\n average : {'micro', 'samples', 'weighted', 'macro'} or None, \\\n default='macro'\n If ``None``, the scores for each class are returned. Otherwise,\n this determines the type of averaging performed on the data:\n\n ``'micro'``:\n Calculate metrics globally by considering each element of the label\n indicator matrix as a label.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average, weighted\n by support (the number of true instances for each label).\n ``'samples'``:\n Calculate metrics for each instance, and find their average.\n\n Will be ignored when ``y_true`` is binary.\n\n pos_label : int or str, default=1\n The label of the positive class. Only applied to binary ``y_true``.\n For multilabel-indicator ``y_true``, ``pos_label`` is fixed to 1.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n average_precision : float\n\n See Also\n --------\n roc_auc_score : Compute the area under the ROC curve.\n precision_recall_curve : Compute precision-recall pairs for different\n probability thresholds.\n\n Notes\n -----\n .. versionchanged:: 0.19\n Instead of linearly interpolating between operating points, precisions\n are weighted by the change in recall since the last operating point.\n\n References\n ----------\n .. [1] `Wikipedia entry for the Average precision\n `_\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.metrics import average_precision_score\n >>> y_true = np.array([0, 0, 1, 1])\n >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])\n >>> average_precision_score(y_true, y_scores)\n 0.83...\n \"\"\"\n def _binary_uninterpolated_average_precision(\n y_true, y_score, pos_label=1, sample_weight=None):\n precision, recall, _ = precision_recall_curve(\n y_true, y_score, pos_label=pos_label, sample_weight=sample_weight)\n # Return the step function integral\n # The following works because the last entry of precision is\n # guaranteed to be 1, as returned by precision_recall_curve\n return -np.sum(np.diff(recall) * np.array(precision)[:-1])\n\n y_type = type_of_target(y_true)\n if y_type == \"multilabel-indicator\" and pos_label != 1:\n raise ValueError(\"Parameter pos_label is fixed to 1 for \"\n \"multilabel-indicator y_true. Do not set \"\n \"pos_label or set pos_label to 1.\")\n elif y_type == \"binary\":\n # Convert to Python primitive type to avoid NumPy type / Python str\n # comparison. See https://github.com/numpy/numpy/issues/6784\n present_labels = np.unique(y_true).tolist()\n if len(present_labels) == 2 and pos_label not in present_labels:\n raise ValueError(\n f\"pos_label={pos_label} is not a valid label. It should be \"\n f\"one of {present_labels}\"\n )\n average_precision = partial(_binary_uninterpolated_average_precision,\n pos_label=pos_label)\n return _average_binary_score(average_precision, y_true, y_score,\n average, sample_weight=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/coverage_error", + "name": "coverage_error", + "qname": "sklearn.metrics._ranking.coverage_error", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._ranking/coverage_error/y_true", + "name": "y_true", + "qname": "sklearn.metrics._ranking.coverage_error.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_labels)", + "default_value": "", + "description": "True binary labels in binary indicator format." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_labels)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/coverage_error/y_score", + "name": "y_score", + "qname": "sklearn.metrics._ranking.coverage_error.y_score", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_labels)", + "default_value": "", + "description": "Target scores, can either be probability estimates of the positive\nclass, confidence values, or non-thresholded measure of decisions\n(as returned by \"decision_function\" on some classifiers)." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_labels)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/coverage_error/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._ranking.coverage_error.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Coverage error measure.\n\nCompute how far we need to go through the ranked scores to cover all\ntrue labels. The best value is equal to the average number\nof labels in ``y_true`` per sample.\n\nTies in ``y_scores`` are broken by giving maximal rank that would have\nbeen assigned to all tied values.\n\nNote: Our implementation's score is 1 greater than the one given in\nTsoumakas et al., 2010. This extends it to handle the degenerate case\nin which an instance has 0 true labels.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Coverage error measure.\n\nCompute how far we need to go through the ranked scores to cover all\ntrue labels. The best value is equal to the average number\nof labels in ``y_true`` per sample.\n\nTies in ``y_scores`` are broken by giving maximal rank that would have\nbeen assigned to all tied values.\n\nNote: Our implementation's score is 1 greater than the one given in\nTsoumakas et al., 2010. This extends it to handle the degenerate case\nin which an instance has 0 true labels.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples, n_labels)\n True binary labels in binary indicator format.\n\ny_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates of the positive\n class, confidence values, or non-thresholded measure of decisions\n (as returned by \"decision_function\" on some classifiers).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\ncoverage_error : float\n\nReferences\n----------\n.. [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010).\n Mining multi-label data. In Data mining and knowledge discovery\n handbook (pp. 667-685). Springer US.", + "code": "@_deprecate_positional_args\ndef coverage_error(y_true, y_score, *, sample_weight=None):\n \"\"\"Coverage error measure.\n\n Compute how far we need to go through the ranked scores to cover all\n true labels. The best value is equal to the average number\n of labels in ``y_true`` per sample.\n\n Ties in ``y_scores`` are broken by giving maximal rank that would have\n been assigned to all tied values.\n\n Note: Our implementation's score is 1 greater than the one given in\n Tsoumakas et al., 2010. This extends it to handle the degenerate case\n in which an instance has 0 true labels.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : ndarray of shape (n_samples, n_labels)\n True binary labels in binary indicator format.\n\n y_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates of the positive\n class, confidence values, or non-thresholded measure of decisions\n (as returned by \"decision_function\" on some classifiers).\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n coverage_error : float\n\n References\n ----------\n .. [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010).\n Mining multi-label data. In Data mining and knowledge discovery\n handbook (pp. 667-685). Springer US.\n\n \"\"\"\n y_true = check_array(y_true, ensure_2d=False)\n y_score = check_array(y_score, ensure_2d=False)\n check_consistent_length(y_true, y_score, sample_weight)\n\n y_type = type_of_target(y_true)\n if y_type != \"multilabel-indicator\":\n raise ValueError(\"{0} format is not supported\".format(y_type))\n\n if y_true.shape != y_score.shape:\n raise ValueError(\"y_true and y_score have different shape\")\n\n y_score_mask = np.ma.masked_array(y_score, mask=np.logical_not(y_true))\n y_min_relevant = y_score_mask.min(axis=1).reshape((-1, 1))\n coverage = (y_score >= y_min_relevant).sum(axis=1)\n coverage = coverage.filled(0)\n\n return np.average(coverage, weights=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/dcg_score", + "name": "dcg_score", + "qname": "sklearn.metrics._ranking.dcg_score", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._ranking/dcg_score/y_true", + "name": "y_true", + "qname": "sklearn.metrics._ranking.dcg_score.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_labels)", + "default_value": "", + "description": "True targets of multilabel classification, or true scores of entities\nto be ranked." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_labels)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/dcg_score/y_score", + "name": "y_score", + "qname": "sklearn.metrics._ranking.dcg_score.y_score", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_labels)", + "default_value": "", + "description": "Target scores, can either be probability estimates, confidence values,\nor non-thresholded measure of decisions (as returned by\n\"decision_function\" on some classifiers)." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_labels)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/dcg_score/k", + "name": "k", + "qname": "sklearn.metrics._ranking.dcg_score.k", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Only consider the highest k scores in the ranking. If None, use all\noutputs." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/dcg_score/log_base", + "name": "log_base", + "qname": "sklearn.metrics._ranking.dcg_score.log_base", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "2", + "description": "Base of the logarithm used for the discount. A low value means a\nsharper discount (top results are more important)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/dcg_score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._ranking.dcg_score.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, all samples are given the same weight." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/dcg_score/ignore_ties", + "name": "ignore_ties", + "qname": "sklearn.metrics._ranking.dcg_score.ignore_ties", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Assume that there are no ties in y_score (which is likely to be the\ncase if y_score is continuous) for efficiency gains." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute Discounted Cumulative Gain.\n\nSum the true scores ranked in the order induced by the predicted scores,\nafter applying a logarithmic discount.\n\nThis ranking metric yields a high value if true labels are ranked high by\n``y_score``.\n\nUsually the Normalized Discounted Cumulative Gain (NDCG, computed by\nndcg_score) is preferred.", + "docstring": "Compute Discounted Cumulative Gain.\n\nSum the true scores ranked in the order induced by the predicted scores,\nafter applying a logarithmic discount.\n\nThis ranking metric yields a high value if true labels are ranked high by\n``y_score``.\n\nUsually the Normalized Discounted Cumulative Gain (NDCG, computed by\nndcg_score) is preferred.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples, n_labels)\n True targets of multilabel classification, or true scores of entities\n to be ranked.\n\ny_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates, confidence values,\n or non-thresholded measure of decisions (as returned by\n \"decision_function\" on some classifiers).\n\nk : int, default=None\n Only consider the highest k scores in the ranking. If None, use all\n outputs.\n\nlog_base : float, default=2\n Base of the logarithm used for the discount. A low value means a\n sharper discount (top results are more important).\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights. If None, all samples are given the same weight.\n\nignore_ties : bool, default=False\n Assume that there are no ties in y_score (which is likely to be the\n case if y_score is continuous) for efficiency gains.\n\nReturns\n-------\ndiscounted_cumulative_gain : float\n The averaged sample DCG scores.\n\nSee Also\n--------\nndcg_score : The Discounted Cumulative Gain divided by the Ideal Discounted\n Cumulative Gain (the DCG obtained for a perfect ranking), in order to\n have a score between 0 and 1.\n\nReferences\n----------\n`Wikipedia entry for Discounted Cumulative Gain\n`_.\n\nJarvelin, K., & Kekalainen, J. (2002).\nCumulated gain-based evaluation of IR techniques. ACM Transactions on\nInformation Systems (TOIS), 20(4), 422-446.\n\nWang, Y., Wang, L., Li, Y., He, D., Chen, W., & Liu, T. Y. (2013, May).\nA theoretical analysis of NDCG ranking measures. In Proceedings of the 26th\nAnnual Conference on Learning Theory (COLT 2013).\n\nMcSherry, F., & Najork, M. (2008, March). Computing information retrieval\nperformance measures efficiently in the presence of tied scores. In\nEuropean conference on information retrieval (pp. 414-421). Springer,\nBerlin, Heidelberg.\n\nExamples\n--------\n>>> from sklearn.metrics import dcg_score\n>>> # we have groud-truth relevance of some answers to a query:\n>>> true_relevance = np.asarray([[10, 0, 0, 1, 5]])\n>>> # we predict scores for the answers\n>>> scores = np.asarray([[.1, .2, .3, 4, 70]])\n>>> dcg_score(true_relevance, scores)\n9.49...\n>>> # we can set k to truncate the sum; only top k answers contribute\n>>> dcg_score(true_relevance, scores, k=2)\n5.63...\n>>> # now we have some ties in our prediction\n>>> scores = np.asarray([[1, 0, 0, 0, 1]])\n>>> # by default ties are averaged, so here we get the average true\n>>> # relevance of our top predictions: (10 + 5) / 2 = 7.5\n>>> dcg_score(true_relevance, scores, k=1)\n7.5\n>>> # we can choose to ignore ties for faster results, but only\n>>> # if we know there aren't ties in our scores, otherwise we get\n>>> # wrong results:\n>>> dcg_score(true_relevance,\n... scores, k=1, ignore_ties=True)\n5.0", + "code": "@_deprecate_positional_args\ndef dcg_score(y_true, y_score, *, k=None,\n log_base=2, sample_weight=None, ignore_ties=False):\n \"\"\"Compute Discounted Cumulative Gain.\n\n Sum the true scores ranked in the order induced by the predicted scores,\n after applying a logarithmic discount.\n\n This ranking metric yields a high value if true labels are ranked high by\n ``y_score``.\n\n Usually the Normalized Discounted Cumulative Gain (NDCG, computed by\n ndcg_score) is preferred.\n\n Parameters\n ----------\n y_true : ndarray of shape (n_samples, n_labels)\n True targets of multilabel classification, or true scores of entities\n to be ranked.\n\n y_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates, confidence values,\n or non-thresholded measure of decisions (as returned by\n \"decision_function\" on some classifiers).\n\n k : int, default=None\n Only consider the highest k scores in the ranking. If None, use all\n outputs.\n\n log_base : float, default=2\n Base of the logarithm used for the discount. A low value means a\n sharper discount (top results are more important).\n\n sample_weight : ndarray of shape (n_samples,), default=None\n Sample weights. If None, all samples are given the same weight.\n\n ignore_ties : bool, default=False\n Assume that there are no ties in y_score (which is likely to be the\n case if y_score is continuous) for efficiency gains.\n\n Returns\n -------\n discounted_cumulative_gain : float\n The averaged sample DCG scores.\n\n See Also\n --------\n ndcg_score : The Discounted Cumulative Gain divided by the Ideal Discounted\n Cumulative Gain (the DCG obtained for a perfect ranking), in order to\n have a score between 0 and 1.\n\n References\n ----------\n `Wikipedia entry for Discounted Cumulative Gain\n `_.\n\n Jarvelin, K., & Kekalainen, J. (2002).\n Cumulated gain-based evaluation of IR techniques. ACM Transactions on\n Information Systems (TOIS), 20(4), 422-446.\n\n Wang, Y., Wang, L., Li, Y., He, D., Chen, W., & Liu, T. Y. (2013, May).\n A theoretical analysis of NDCG ranking measures. In Proceedings of the 26th\n Annual Conference on Learning Theory (COLT 2013).\n\n McSherry, F., & Najork, M. (2008, March). Computing information retrieval\n performance measures efficiently in the presence of tied scores. In\n European conference on information retrieval (pp. 414-421). Springer,\n Berlin, Heidelberg.\n\n Examples\n --------\n >>> from sklearn.metrics import dcg_score\n >>> # we have groud-truth relevance of some answers to a query:\n >>> true_relevance = np.asarray([[10, 0, 0, 1, 5]])\n >>> # we predict scores for the answers\n >>> scores = np.asarray([[.1, .2, .3, 4, 70]])\n >>> dcg_score(true_relevance, scores)\n 9.49...\n >>> # we can set k to truncate the sum; only top k answers contribute\n >>> dcg_score(true_relevance, scores, k=2)\n 5.63...\n >>> # now we have some ties in our prediction\n >>> scores = np.asarray([[1, 0, 0, 0, 1]])\n >>> # by default ties are averaged, so here we get the average true\n >>> # relevance of our top predictions: (10 + 5) / 2 = 7.5\n >>> dcg_score(true_relevance, scores, k=1)\n 7.5\n >>> # we can choose to ignore ties for faster results, but only\n >>> # if we know there aren't ties in our scores, otherwise we get\n >>> # wrong results:\n >>> dcg_score(true_relevance,\n ... scores, k=1, ignore_ties=True)\n 5.0\n\n \"\"\"\n y_true = check_array(y_true, ensure_2d=False)\n y_score = check_array(y_score, ensure_2d=False)\n check_consistent_length(y_true, y_score, sample_weight)\n _check_dcg_target_type(y_true)\n return np.average(\n _dcg_sample_scores(\n y_true, y_score, k=k, log_base=log_base,\n ignore_ties=ignore_ties),\n weights=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/det_curve", + "name": "det_curve", + "qname": "sklearn.metrics._ranking.det_curve", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._ranking/det_curve/y_true", + "name": "y_true", + "qname": "sklearn.metrics._ranking.det_curve.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "True binary labels. If labels are not either {-1, 1} or {0, 1}, then\npos_label should be explicitly given." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/det_curve/y_score", + "name": "y_score", + "qname": "sklearn.metrics._ranking.det_curve.y_score", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape of (n_samples,)", + "default_value": "", + "description": "Target scores, can either be probability estimates of the positive\nclass, confidence values, or non-thresholded measure of decisions\n(as returned by \"decision_function\" on some classifiers)." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape of (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/det_curve/pos_label", + "name": "pos_label", + "qname": "sklearn.metrics._ranking.det_curve.pos_label", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or str", + "default_value": "None", + "description": "The label of the positive class.\nWhen ``pos_label=None``, if `y_true` is in {-1, 1} or {0, 1},\n``pos_label`` is set to 1, otherwise an error will be raised." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "str" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/det_curve/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._ranking.det_curve.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute error rates for different probability thresholds.\n\n.. note::\n This metric is used for evaluation of ranking and error tradeoffs of\n a binary classification task.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.24", + "docstring": "Compute error rates for different probability thresholds.\n\n.. note::\n This metric is used for evaluation of ranking and error tradeoffs of\n a binary classification task.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.24\n\nParameters\n----------\ny_true : ndarray of shape (n_samples,)\n True binary labels. If labels are not either {-1, 1} or {0, 1}, then\n pos_label should be explicitly given.\n\ny_score : ndarray of shape of (n_samples,)\n Target scores, can either be probability estimates of the positive\n class, confidence values, or non-thresholded measure of decisions\n (as returned by \"decision_function\" on some classifiers).\n\npos_label : int or str, default=None\n The label of the positive class.\n When ``pos_label=None``, if `y_true` is in {-1, 1} or {0, 1},\n ``pos_label`` is set to 1, otherwise an error will be raised.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nfpr : ndarray of shape (n_thresholds,)\n False positive rate (FPR) such that element i is the false positive\n rate of predictions with score >= thresholds[i]. This is occasionally\n referred to as false acceptance propability or fall-out.\n\nfnr : ndarray of shape (n_thresholds,)\n False negative rate (FNR) such that element i is the false negative\n rate of predictions with score >= thresholds[i]. This is occasionally\n referred to as false rejection or miss rate.\n\nthresholds : ndarray of shape (n_thresholds,)\n Decreasing score values.\n\nSee Also\n--------\nplot_det_curve : Plot detection error tradeoff (DET) curve.\nDetCurveDisplay : DET curve visualization.\nroc_curve : Compute Receiver operating characteristic (ROC) curve.\nprecision_recall_curve : Compute precision-recall curve.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import det_curve\n>>> y_true = np.array([0, 0, 1, 1])\n>>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])\n>>> fpr, fnr, thresholds = det_curve(y_true, y_scores)\n>>> fpr\narray([0.5, 0.5, 0. ])\n>>> fnr\narray([0. , 0.5, 0.5])\n>>> thresholds\narray([0.35, 0.4 , 0.8 ])", + "code": "def det_curve(y_true, y_score, pos_label=None, sample_weight=None):\n \"\"\"Compute error rates for different probability thresholds.\n\n .. note::\n This metric is used for evaluation of ranking and error tradeoffs of\n a binary classification task.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.24\n\n Parameters\n ----------\n y_true : ndarray of shape (n_samples,)\n True binary labels. If labels are not either {-1, 1} or {0, 1}, then\n pos_label should be explicitly given.\n\n y_score : ndarray of shape of (n_samples,)\n Target scores, can either be probability estimates of the positive\n class, confidence values, or non-thresholded measure of decisions\n (as returned by \"decision_function\" on some classifiers).\n\n pos_label : int or str, default=None\n The label of the positive class.\n When ``pos_label=None``, if `y_true` is in {-1, 1} or {0, 1},\n ``pos_label`` is set to 1, otherwise an error will be raised.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n fpr : ndarray of shape (n_thresholds,)\n False positive rate (FPR) such that element i is the false positive\n rate of predictions with score >= thresholds[i]. This is occasionally\n referred to as false acceptance propability or fall-out.\n\n fnr : ndarray of shape (n_thresholds,)\n False negative rate (FNR) such that element i is the false negative\n rate of predictions with score >= thresholds[i]. This is occasionally\n referred to as false rejection or miss rate.\n\n thresholds : ndarray of shape (n_thresholds,)\n Decreasing score values.\n\n See Also\n --------\n plot_det_curve : Plot detection error tradeoff (DET) curve.\n DetCurveDisplay : DET curve visualization.\n roc_curve : Compute Receiver operating characteristic (ROC) curve.\n precision_recall_curve : Compute precision-recall curve.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.metrics import det_curve\n >>> y_true = np.array([0, 0, 1, 1])\n >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])\n >>> fpr, fnr, thresholds = det_curve(y_true, y_scores)\n >>> fpr\n array([0.5, 0.5, 0. ])\n >>> fnr\n array([0. , 0.5, 0.5])\n >>> thresholds\n array([0.35, 0.4 , 0.8 ])\n \"\"\"\n if len(np.unique(y_true)) != 2:\n raise ValueError(\"Only one class present in y_true. Detection error \"\n \"tradeoff curve is not defined in that case.\")\n\n fps, tps, thresholds = _binary_clf_curve(\n y_true, y_score, pos_label=pos_label, sample_weight=sample_weight\n )\n\n fns = tps[-1] - tps\n p_count = tps[-1]\n n_count = fps[-1]\n\n # start with false positives zero\n first_ind = (\n fps.searchsorted(fps[0], side='right') - 1\n if fps.searchsorted(fps[0], side='right') > 0\n else None\n )\n # stop with false negatives zero\n last_ind = tps.searchsorted(tps[-1]) + 1\n sl = slice(first_ind, last_ind)\n\n # reverse the output such that list of false positives is decreasing\n return (\n fps[sl][::-1] / n_count,\n fns[sl][::-1] / p_count,\n thresholds[sl][::-1]\n )" + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/label_ranking_average_precision_score", + "name": "label_ranking_average_precision_score", + "qname": "sklearn.metrics._ranking.label_ranking_average_precision_score", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._ranking/label_ranking_average_precision_score/y_true", + "name": "y_true", + "qname": "sklearn.metrics._ranking.label_ranking_average_precision_score.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{ndarray, sparse matrix} of shape (n_samples, n_labels)", + "default_value": "", + "description": "True binary labels in binary indicator format." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_labels)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/label_ranking_average_precision_score/y_score", + "name": "y_score", + "qname": "sklearn.metrics._ranking.label_ranking_average_precision_score.y_score", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_labels)", + "default_value": "", + "description": "Target scores, can either be probability estimates of the positive\nclass, confidence values, or non-thresholded measure of decisions\n(as returned by \"decision_function\" on some classifiers)." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_labels)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/label_ranking_average_precision_score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._ranking.label_ranking_average_precision_score.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute ranking-based average precision.\n\nLabel ranking average precision (LRAP) is the average over each ground\ntruth label assigned to each sample, of the ratio of true vs. total\nlabels with lower score.\n\nThis metric is used in multilabel ranking problem, where the goal\nis to give better rank to the labels associated to each sample.\n\nThe obtained score is always strictly greater than 0 and\nthe best value is 1.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute ranking-based average precision.\n\nLabel ranking average precision (LRAP) is the average over each ground\ntruth label assigned to each sample, of the ratio of true vs. total\nlabels with lower score.\n\nThis metric is used in multilabel ranking problem, where the goal\nis to give better rank to the labels associated to each sample.\n\nThe obtained score is always strictly greater than 0 and\nthe best value is 1.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : {ndarray, sparse matrix} of shape (n_samples, n_labels)\n True binary labels in binary indicator format.\n\ny_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates of the positive\n class, confidence values, or non-thresholded measure of decisions\n (as returned by \"decision_function\" on some classifiers).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n .. versionadded:: 0.20\n\nReturns\n-------\nscore : float\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import label_ranking_average_precision_score\n>>> y_true = np.array([[1, 0, 0], [0, 0, 1]])\n>>> y_score = np.array([[0.75, 0.5, 1], [1, 0.2, 0.1]])\n>>> label_ranking_average_precision_score(y_true, y_score)\n0.416...", + "code": "@_deprecate_positional_args\ndef label_ranking_average_precision_score(y_true, y_score, *,\n sample_weight=None):\n \"\"\"Compute ranking-based average precision.\n\n Label ranking average precision (LRAP) is the average over each ground\n truth label assigned to each sample, of the ratio of true vs. total\n labels with lower score.\n\n This metric is used in multilabel ranking problem, where the goal\n is to give better rank to the labels associated to each sample.\n\n The obtained score is always strictly greater than 0 and\n the best value is 1.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : {ndarray, sparse matrix} of shape (n_samples, n_labels)\n True binary labels in binary indicator format.\n\n y_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates of the positive\n class, confidence values, or non-thresholded measure of decisions\n (as returned by \"decision_function\" on some classifiers).\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n .. versionadded:: 0.20\n\n Returns\n -------\n score : float\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.metrics import label_ranking_average_precision_score\n >>> y_true = np.array([[1, 0, 0], [0, 0, 1]])\n >>> y_score = np.array([[0.75, 0.5, 1], [1, 0.2, 0.1]])\n >>> label_ranking_average_precision_score(y_true, y_score)\n 0.416...\n\n \"\"\"\n check_consistent_length(y_true, y_score, sample_weight)\n y_true = check_array(y_true, ensure_2d=False)\n y_score = check_array(y_score, ensure_2d=False)\n\n if y_true.shape != y_score.shape:\n raise ValueError(\"y_true and y_score have different shape\")\n\n # Handle badly formatted array and the degenerate case with one label\n y_type = type_of_target(y_true)\n if (y_type != \"multilabel-indicator\" and\n not (y_type == \"binary\" and y_true.ndim == 2)):\n raise ValueError(\"{0} format is not supported\".format(y_type))\n\n y_true = csr_matrix(y_true)\n y_score = -y_score\n\n n_samples, n_labels = y_true.shape\n\n out = 0.\n for i, (start, stop) in enumerate(zip(y_true.indptr, y_true.indptr[1:])):\n relevant = y_true.indices[start:stop]\n\n if (relevant.size == 0 or relevant.size == n_labels):\n # If all labels are relevant or unrelevant, the score is also\n # equal to 1. The label ranking has no meaning.\n aux = 1.\n else:\n scores_i = y_score[i]\n rank = rankdata(scores_i, 'max')[relevant]\n L = rankdata(scores_i[relevant], 'max')\n aux = (L / rank).mean()\n\n if sample_weight is not None:\n aux = aux * sample_weight[i]\n out += aux\n\n if sample_weight is None:\n out /= n_samples\n else:\n out /= np.sum(sample_weight)\n\n return out" + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/label_ranking_loss", + "name": "label_ranking_loss", + "qname": "sklearn.metrics._ranking.label_ranking_loss", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._ranking/label_ranking_loss/y_true", + "name": "y_true", + "qname": "sklearn.metrics._ranking.label_ranking_loss.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{ndarray, sparse matrix} of shape (n_samples, n_labels)", + "default_value": "", + "description": "True binary labels in binary indicator format." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_labels)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/label_ranking_loss/y_score", + "name": "y_score", + "qname": "sklearn.metrics._ranking.label_ranking_loss.y_score", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_labels)", + "default_value": "", + "description": "Target scores, can either be probability estimates of the positive\nclass, confidence values, or non-thresholded measure of decisions\n(as returned by \"decision_function\" on some classifiers)." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_labels)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/label_ranking_loss/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._ranking.label_ranking_loss.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute Ranking loss measure.\n\nCompute the average number of label pairs that are incorrectly ordered\ngiven y_score weighted by the size of the label set and the number of\nlabels not in the label set.\n\nThis is similar to the error set size, but weighted by the number of\nrelevant and irrelevant labels. The best performance is achieved with\na ranking loss of zero.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.17\n A function *label_ranking_loss*", + "docstring": "Compute Ranking loss measure.\n\nCompute the average number of label pairs that are incorrectly ordered\ngiven y_score weighted by the size of the label set and the number of\nlabels not in the label set.\n\nThis is similar to the error set size, but weighted by the number of\nrelevant and irrelevant labels. The best performance is achieved with\na ranking loss of zero.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.17\n A function *label_ranking_loss*\n\nParameters\n----------\ny_true : {ndarray, sparse matrix} of shape (n_samples, n_labels)\n True binary labels in binary indicator format.\n\ny_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates of the positive\n class, confidence values, or non-thresholded measure of decisions\n (as returned by \"decision_function\" on some classifiers).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nloss : float\n\nReferences\n----------\n.. [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010).\n Mining multi-label data. In Data mining and knowledge discovery\n handbook (pp. 667-685). Springer US.", + "code": "@_deprecate_positional_args\ndef label_ranking_loss(y_true, y_score, *, sample_weight=None):\n \"\"\"Compute Ranking loss measure.\n\n Compute the average number of label pairs that are incorrectly ordered\n given y_score weighted by the size of the label set and the number of\n labels not in the label set.\n\n This is similar to the error set size, but weighted by the number of\n relevant and irrelevant labels. The best performance is achieved with\n a ranking loss of zero.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.17\n A function *label_ranking_loss*\n\n Parameters\n ----------\n y_true : {ndarray, sparse matrix} of shape (n_samples, n_labels)\n True binary labels in binary indicator format.\n\n y_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates of the positive\n class, confidence values, or non-thresholded measure of decisions\n (as returned by \"decision_function\" on some classifiers).\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n loss : float\n\n References\n ----------\n .. [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010).\n Mining multi-label data. In Data mining and knowledge discovery\n handbook (pp. 667-685). Springer US.\n \"\"\"\n y_true = check_array(y_true, ensure_2d=False, accept_sparse='csr')\n y_score = check_array(y_score, ensure_2d=False)\n check_consistent_length(y_true, y_score, sample_weight)\n\n y_type = type_of_target(y_true)\n if y_type not in (\"multilabel-indicator\",):\n raise ValueError(\"{0} format is not supported\".format(y_type))\n\n if y_true.shape != y_score.shape:\n raise ValueError(\"y_true and y_score have different shape\")\n\n n_samples, n_labels = y_true.shape\n\n y_true = csr_matrix(y_true)\n\n loss = np.zeros(n_samples)\n for i, (start, stop) in enumerate(zip(y_true.indptr, y_true.indptr[1:])):\n # Sort and bin the label scores\n unique_scores, unique_inverse = np.unique(y_score[i],\n return_inverse=True)\n true_at_reversed_rank = np.bincount(\n unique_inverse[y_true.indices[start:stop]],\n minlength=len(unique_scores))\n all_at_reversed_rank = np.bincount(unique_inverse,\n minlength=len(unique_scores))\n false_at_reversed_rank = all_at_reversed_rank - true_at_reversed_rank\n\n # if the scores are ordered, it's possible to count the number of\n # incorrectly ordered paires in linear time by cumulatively counting\n # how many false labels of a given score have a score higher than the\n # accumulated true labels with lower score.\n loss[i] = np.dot(true_at_reversed_rank.cumsum(),\n false_at_reversed_rank)\n\n n_positives = count_nonzero(y_true, axis=1)\n with np.errstate(divide=\"ignore\", invalid=\"ignore\"):\n loss /= ((n_labels - n_positives) * n_positives)\n\n # When there is no positive or no negative labels, those values should\n # be consider as correct, i.e. the ranking doesn't matter.\n loss[np.logical_or(n_positives == 0, n_positives == n_labels)] = 0.\n\n return np.average(loss, weights=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/ndcg_score", + "name": "ndcg_score", + "qname": "sklearn.metrics._ranking.ndcg_score", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._ranking/ndcg_score/y_true", + "name": "y_true", + "qname": "sklearn.metrics._ranking.ndcg_score.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_labels)", + "default_value": "", + "description": "True targets of multilabel classification, or true scores of entities\nto be ranked." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_labels)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/ndcg_score/y_score", + "name": "y_score", + "qname": "sklearn.metrics._ranking.ndcg_score.y_score", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_labels)", + "default_value": "", + "description": "Target scores, can either be probability estimates, confidence values,\nor non-thresholded measure of decisions (as returned by\n\"decision_function\" on some classifiers)." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_labels)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/ndcg_score/k", + "name": "k", + "qname": "sklearn.metrics._ranking.ndcg_score.k", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Only consider the highest k scores in the ranking. If None, use all\noutputs." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/ndcg_score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._ranking.ndcg_score.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, all samples are given the same weight." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/ndcg_score/ignore_ties", + "name": "ignore_ties", + "qname": "sklearn.metrics._ranking.ndcg_score.ignore_ties", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Assume that there are no ties in y_score (which is likely to be the\ncase if y_score is continuous) for efficiency gains." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute Normalized Discounted Cumulative Gain.\n\nSum the true scores ranked in the order induced by the predicted scores,\nafter applying a logarithmic discount. Then divide by the best possible\nscore (Ideal DCG, obtained for a perfect ranking) to obtain a score between\n0 and 1.\n\nThis ranking metric yields a high value if true labels are ranked high by\n``y_score``.", + "docstring": "Compute Normalized Discounted Cumulative Gain.\n\nSum the true scores ranked in the order induced by the predicted scores,\nafter applying a logarithmic discount. Then divide by the best possible\nscore (Ideal DCG, obtained for a perfect ranking) to obtain a score between\n0 and 1.\n\nThis ranking metric yields a high value if true labels are ranked high by\n``y_score``.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples, n_labels)\n True targets of multilabel classification, or true scores of entities\n to be ranked.\n\ny_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates, confidence values,\n or non-thresholded measure of decisions (as returned by\n \"decision_function\" on some classifiers).\n\nk : int, default=None\n Only consider the highest k scores in the ranking. If None, use all\n outputs.\n\nsample_weight : ndarray of shape (n_samples,), default=None\n Sample weights. If None, all samples are given the same weight.\n\nignore_ties : bool, default=False\n Assume that there are no ties in y_score (which is likely to be the\n case if y_score is continuous) for efficiency gains.\n\nReturns\n-------\nnormalized_discounted_cumulative_gain : float in [0., 1.]\n The averaged NDCG scores for all samples.\n\nSee Also\n--------\ndcg_score : Discounted Cumulative Gain (not normalized).\n\nReferences\n----------\n`Wikipedia entry for Discounted Cumulative Gain\n`_\n\nJarvelin, K., & Kekalainen, J. (2002).\nCumulated gain-based evaluation of IR techniques. ACM Transactions on\nInformation Systems (TOIS), 20(4), 422-446.\n\nWang, Y., Wang, L., Li, Y., He, D., Chen, W., & Liu, T. Y. (2013, May).\nA theoretical analysis of NDCG ranking measures. In Proceedings of the 26th\nAnnual Conference on Learning Theory (COLT 2013)\n\nMcSherry, F., & Najork, M. (2008, March). Computing information retrieval\nperformance measures efficiently in the presence of tied scores. In\nEuropean conference on information retrieval (pp. 414-421). Springer,\nBerlin, Heidelberg.\n\nExamples\n--------\n>>> from sklearn.metrics import ndcg_score\n>>> # we have groud-truth relevance of some answers to a query:\n>>> true_relevance = np.asarray([[10, 0, 0, 1, 5]])\n>>> # we predict some scores (relevance) for the answers\n>>> scores = np.asarray([[.1, .2, .3, 4, 70]])\n>>> ndcg_score(true_relevance, scores)\n0.69...\n>>> scores = np.asarray([[.05, 1.1, 1., .5, .0]])\n>>> ndcg_score(true_relevance, scores)\n0.49...\n>>> # we can set k to truncate the sum; only top k answers contribute.\n>>> ndcg_score(true_relevance, scores, k=4)\n0.35...\n>>> # the normalization takes k into account so a perfect answer\n>>> # would still get 1.0\n>>> ndcg_score(true_relevance, true_relevance, k=4)\n1.0\n>>> # now we have some ties in our prediction\n>>> scores = np.asarray([[1, 0, 0, 0, 1]])\n>>> # by default ties are averaged, so here we get the average (normalized)\n>>> # true relevance of our top predictions: (10 / 10 + 5 / 10) / 2 = .75\n>>> ndcg_score(true_relevance, scores, k=1)\n0.75\n>>> # we can choose to ignore ties for faster results, but only\n>>> # if we know there aren't ties in our scores, otherwise we get\n>>> # wrong results:\n>>> ndcg_score(true_relevance,\n... scores, k=1, ignore_ties=True)\n0.5", + "code": "@_deprecate_positional_args\ndef ndcg_score(y_true, y_score, *, k=None, sample_weight=None,\n ignore_ties=False):\n \"\"\"Compute Normalized Discounted Cumulative Gain.\n\n Sum the true scores ranked in the order induced by the predicted scores,\n after applying a logarithmic discount. Then divide by the best possible\n score (Ideal DCG, obtained for a perfect ranking) to obtain a score between\n 0 and 1.\n\n This ranking metric yields a high value if true labels are ranked high by\n ``y_score``.\n\n Parameters\n ----------\n y_true : ndarray of shape (n_samples, n_labels)\n True targets of multilabel classification, or true scores of entities\n to be ranked.\n\n y_score : ndarray of shape (n_samples, n_labels)\n Target scores, can either be probability estimates, confidence values,\n or non-thresholded measure of decisions (as returned by\n \"decision_function\" on some classifiers).\n\n k : int, default=None\n Only consider the highest k scores in the ranking. If None, use all\n outputs.\n\n sample_weight : ndarray of shape (n_samples,), default=None\n Sample weights. If None, all samples are given the same weight.\n\n ignore_ties : bool, default=False\n Assume that there are no ties in y_score (which is likely to be the\n case if y_score is continuous) for efficiency gains.\n\n Returns\n -------\n normalized_discounted_cumulative_gain : float in [0., 1.]\n The averaged NDCG scores for all samples.\n\n See Also\n --------\n dcg_score : Discounted Cumulative Gain (not normalized).\n\n References\n ----------\n `Wikipedia entry for Discounted Cumulative Gain\n `_\n\n Jarvelin, K., & Kekalainen, J. (2002).\n Cumulated gain-based evaluation of IR techniques. ACM Transactions on\n Information Systems (TOIS), 20(4), 422-446.\n\n Wang, Y., Wang, L., Li, Y., He, D., Chen, W., & Liu, T. Y. (2013, May).\n A theoretical analysis of NDCG ranking measures. In Proceedings of the 26th\n Annual Conference on Learning Theory (COLT 2013)\n\n McSherry, F., & Najork, M. (2008, March). Computing information retrieval\n performance measures efficiently in the presence of tied scores. In\n European conference on information retrieval (pp. 414-421). Springer,\n Berlin, Heidelberg.\n\n Examples\n --------\n >>> from sklearn.metrics import ndcg_score\n >>> # we have groud-truth relevance of some answers to a query:\n >>> true_relevance = np.asarray([[10, 0, 0, 1, 5]])\n >>> # we predict some scores (relevance) for the answers\n >>> scores = np.asarray([[.1, .2, .3, 4, 70]])\n >>> ndcg_score(true_relevance, scores)\n 0.69...\n >>> scores = np.asarray([[.05, 1.1, 1., .5, .0]])\n >>> ndcg_score(true_relevance, scores)\n 0.49...\n >>> # we can set k to truncate the sum; only top k answers contribute.\n >>> ndcg_score(true_relevance, scores, k=4)\n 0.35...\n >>> # the normalization takes k into account so a perfect answer\n >>> # would still get 1.0\n >>> ndcg_score(true_relevance, true_relevance, k=4)\n 1.0\n >>> # now we have some ties in our prediction\n >>> scores = np.asarray([[1, 0, 0, 0, 1]])\n >>> # by default ties are averaged, so here we get the average (normalized)\n >>> # true relevance of our top predictions: (10 / 10 + 5 / 10) / 2 = .75\n >>> ndcg_score(true_relevance, scores, k=1)\n 0.75\n >>> # we can choose to ignore ties for faster results, but only\n >>> # if we know there aren't ties in our scores, otherwise we get\n >>> # wrong results:\n >>> ndcg_score(true_relevance,\n ... scores, k=1, ignore_ties=True)\n 0.5\n\n \"\"\"\n y_true = check_array(y_true, ensure_2d=False)\n y_score = check_array(y_score, ensure_2d=False)\n check_consistent_length(y_true, y_score, sample_weight)\n _check_dcg_target_type(y_true)\n gain = _ndcg_sample_scores(y_true, y_score, k=k, ignore_ties=ignore_ties)\n return np.average(gain, weights=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/precision_recall_curve", + "name": "precision_recall_curve", + "qname": "sklearn.metrics._ranking.precision_recall_curve", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._ranking/precision_recall_curve/y_true", + "name": "y_true", + "qname": "sklearn.metrics._ranking.precision_recall_curve.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "True binary labels. If labels are not either {-1, 1} or {0, 1}, then\npos_label should be explicitly given." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/precision_recall_curve/probas_pred", + "name": "probas_pred", + "qname": "sklearn.metrics._ranking.precision_recall_curve.probas_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Estimated probabilities or output of a decision function." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/precision_recall_curve/pos_label", + "name": "pos_label", + "qname": "sklearn.metrics._ranking.precision_recall_curve.pos_label", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or str", + "default_value": "None", + "description": "The label of the positive class.\nWhen ``pos_label=None``, if y_true is in {-1, 1} or {0, 1},\n``pos_label`` is set to 1, otherwise an error will be raised." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "str" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/precision_recall_curve/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._ranking.precision_recall_curve.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute precision-recall pairs for different probability thresholds.\n\nNote: this implementation is restricted to the binary classification task.\n\nThe precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of\ntrue positives and ``fp`` the number of false positives. The precision is\nintuitively the ability of the classifier not to label as positive a sample\nthat is negative.\n\nThe recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of\ntrue positives and ``fn`` the number of false negatives. The recall is\nintuitively the ability of the classifier to find all the positive samples.\n\nThe last precision and recall values are 1. and 0. respectively and do not\nhave a corresponding threshold. This ensures that the graph starts on the\ny axis.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute precision-recall pairs for different probability thresholds.\n\nNote: this implementation is restricted to the binary classification task.\n\nThe precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of\ntrue positives and ``fp`` the number of false positives. The precision is\nintuitively the ability of the classifier not to label as positive a sample\nthat is negative.\n\nThe recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of\ntrue positives and ``fn`` the number of false negatives. The recall is\nintuitively the ability of the classifier to find all the positive samples.\n\nThe last precision and recall values are 1. and 0. respectively and do not\nhave a corresponding threshold. This ensures that the graph starts on the\ny axis.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples,)\n True binary labels. If labels are not either {-1, 1} or {0, 1}, then\n pos_label should be explicitly given.\n\nprobas_pred : ndarray of shape (n_samples,)\n Estimated probabilities or output of a decision function.\n\npos_label : int or str, default=None\n The label of the positive class.\n When ``pos_label=None``, if y_true is in {-1, 1} or {0, 1},\n ``pos_label`` is set to 1, otherwise an error will be raised.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nprecision : ndarray of shape (n_thresholds + 1,)\n Precision values such that element i is the precision of\n predictions with score >= thresholds[i] and the last element is 1.\n\nrecall : ndarray of shape (n_thresholds + 1,)\n Decreasing recall values such that element i is the recall of\n predictions with score >= thresholds[i] and the last element is 0.\n\nthresholds : ndarray of shape (n_thresholds,)\n Increasing thresholds on the decision function used to compute\n precision and recall. n_thresholds <= len(np.unique(probas_pred)).\n\nSee Also\n--------\nplot_precision_recall_curve : Plot Precision Recall Curve for binary\n classifiers.\nPrecisionRecallDisplay : Precision Recall visualization.\naverage_precision_score : Compute average precision from prediction scores.\ndet_curve: Compute error rates for different probability thresholds.\nroc_curve : Compute Receiver operating characteristic (ROC) curve.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import precision_recall_curve\n>>> y_true = np.array([0, 0, 1, 1])\n>>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])\n>>> precision, recall, thresholds = precision_recall_curve(\n... y_true, y_scores)\n>>> precision\narray([0.66666667, 0.5 , 1. , 1. ])\n>>> recall\narray([1. , 0.5, 0.5, 0. ])\n>>> thresholds\narray([0.35, 0.4 , 0.8 ])", + "code": "@_deprecate_positional_args\ndef precision_recall_curve(y_true, probas_pred, *, pos_label=None,\n sample_weight=None):\n \"\"\"Compute precision-recall pairs for different probability thresholds.\n\n Note: this implementation is restricted to the binary classification task.\n\n The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of\n true positives and ``fp`` the number of false positives. The precision is\n intuitively the ability of the classifier not to label as positive a sample\n that is negative.\n\n The recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of\n true positives and ``fn`` the number of false negatives. The recall is\n intuitively the ability of the classifier to find all the positive samples.\n\n The last precision and recall values are 1. and 0. respectively and do not\n have a corresponding threshold. This ensures that the graph starts on the\n y axis.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : ndarray of shape (n_samples,)\n True binary labels. If labels are not either {-1, 1} or {0, 1}, then\n pos_label should be explicitly given.\n\n probas_pred : ndarray of shape (n_samples,)\n Estimated probabilities or output of a decision function.\n\n pos_label : int or str, default=None\n The label of the positive class.\n When ``pos_label=None``, if y_true is in {-1, 1} or {0, 1},\n ``pos_label`` is set to 1, otherwise an error will be raised.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n precision : ndarray of shape (n_thresholds + 1,)\n Precision values such that element i is the precision of\n predictions with score >= thresholds[i] and the last element is 1.\n\n recall : ndarray of shape (n_thresholds + 1,)\n Decreasing recall values such that element i is the recall of\n predictions with score >= thresholds[i] and the last element is 0.\n\n thresholds : ndarray of shape (n_thresholds,)\n Increasing thresholds on the decision function used to compute\n precision and recall. n_thresholds <= len(np.unique(probas_pred)).\n\n See Also\n --------\n plot_precision_recall_curve : Plot Precision Recall Curve for binary\n classifiers.\n PrecisionRecallDisplay : Precision Recall visualization.\n average_precision_score : Compute average precision from prediction scores.\n det_curve: Compute error rates for different probability thresholds.\n roc_curve : Compute Receiver operating characteristic (ROC) curve.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.metrics import precision_recall_curve\n >>> y_true = np.array([0, 0, 1, 1])\n >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])\n >>> precision, recall, thresholds = precision_recall_curve(\n ... y_true, y_scores)\n >>> precision\n array([0.66666667, 0.5 , 1. , 1. ])\n >>> recall\n array([1. , 0.5, 0.5, 0. ])\n >>> thresholds\n array([0.35, 0.4 , 0.8 ])\n\n \"\"\"\n fps, tps, thresholds = _binary_clf_curve(y_true, probas_pred,\n pos_label=pos_label,\n sample_weight=sample_weight)\n\n precision = tps / (tps + fps)\n precision[np.isnan(precision)] = 0\n recall = tps / tps[-1]\n\n # stop when full recall attained\n # and reverse the outputs so recall is decreasing\n last_ind = tps.searchsorted(tps[-1])\n sl = slice(last_ind, None, -1)\n return np.r_[precision[sl], 1], np.r_[recall[sl], 0], thresholds[sl]" + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/roc_auc_score", + "name": "roc_auc_score", + "qname": "sklearn.metrics._ranking.roc_auc_score", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._ranking/roc_auc_score/y_true", + "name": "y_true", + "qname": "sklearn.metrics._ranking.roc_auc_score.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_classes)", + "default_value": "", + "description": "True labels or binary label indicators. The binary and multiclass cases\nexpect labels with shape (n_samples,) while the multilabel case expects\nbinary label indicators with shape (n_samples, n_classes)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_classes)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/roc_auc_score/y_score", + "name": "y_score", + "qname": "sklearn.metrics._ranking.roc_auc_score.y_score", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_classes)", + "default_value": "", + "description": "Target scores.\n\n* In the binary case, it corresponds to an array of shape\n `(n_samples,)`. Both probability estimates and non-thresholded\n decision values can be provided. The probability estimates correspond\n to the **probability of the class with the greater label**,\n i.e. `estimator.classes_[1]` and thus\n `estimator.predict_proba(X, y)[:, 1]`. The decision values\n corresponds to the output of `estimator.decision_function(X, y)`.\n See more information in the :ref:`User guide `;\n* In the multiclass case, it corresponds to an array of shape\n `(n_samples, n_classes)` of probability estimates provided by the\n `predict_proba` method. The probability estimates **must**\n sum to 1 across the possible classes. In addition, the order of the\n class scores must correspond to the order of ``labels``,\n if provided, or else to the numerical or lexicographical order of\n the labels in ``y_true``. See more information in the\n :ref:`User guide `;\n* In the multilabel case, it corresponds to an array of shape\n `(n_samples, n_classes)`. Probability estimates are provided by the\n `predict_proba` method and the non-thresholded decision values by\n the `decision_function` method. The probability estimates correspond\n to the **probability of the class with the greater label for each\n output** of the classifier. See more information in the\n :ref:`User guide `." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_classes)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/roc_auc_score/average", + "name": "average", + "qname": "sklearn.metrics._ranking.roc_auc_score.average", + "default_value": "'macro'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'micro', 'macro', 'samples', 'weighted'} or None", + "default_value": "'macro'", + "description": "If ``None``, the scores for each class are returned. Otherwise,\nthis determines the type of averaging performed on the data:\nNote: multiclass ROC AUC currently only handles the 'macro' and\n'weighted' averages.\n\n``'micro'``:\n Calculate metrics globally by considering each element of the label\n indicator matrix as a label.\n``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n``'weighted'``:\n Calculate metrics for each label, and find their average, weighted\n by support (the number of true instances for each label).\n``'samples'``:\n Calculate metrics for each instance, and find their average.\n\nWill be ignored when ``y_true`` is binary." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["samples", "macro", "micro", "weighted"] + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/roc_auc_score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._ranking.roc_auc_score.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/roc_auc_score/max_fpr", + "name": "max_fpr", + "qname": "sklearn.metrics._ranking.roc_auc_score.max_fpr", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float > 0 and <= 1", + "default_value": "None", + "description": "If not ``None``, the standardized partial AUC [2]_ over the range\n[0, max_fpr] is returned. For the multiclass case, ``max_fpr``,\nshould be either equal to ``None`` or ``1.0`` as AUC ROC partial\ncomputation currently is not supported for multiclass." + }, + "type": { + "kind": "NamedType", + "name": "float > 0 and <= 1" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/roc_auc_score/multi_class", + "name": "multi_class", + "qname": "sklearn.metrics._ranking.roc_auc_score.multi_class", + "default_value": "'raise'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'raise', 'ovr', 'ovo'}", + "default_value": "'raise'", + "description": "Only used for multiclass targets. Determines the type of configuration\nto use. The default value raises an error, so either\n``'ovr'`` or ``'ovo'`` must be passed explicitly.\n\n``'ovr'``:\n Stands for One-vs-rest. Computes the AUC of each class\n against the rest [3]_ [4]_. This\n treats the multiclass case in the same way as the multilabel case.\n Sensitive to class imbalance even when ``average == 'macro'``,\n because class imbalance affects the composition of each of the\n 'rest' groupings.\n``'ovo'``:\n Stands for One-vs-one. Computes the average AUC of all\n possible pairwise combinations of classes [5]_.\n Insensitive to class imbalance when\n ``average == 'macro'``." + }, + "type": { + "kind": "EnumType", + "values": ["ovo", "ovr", "raise"] + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/roc_auc_score/labels", + "name": "labels", + "qname": "sklearn.metrics._ranking.roc_auc_score.labels", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_classes,)", + "default_value": "None", + "description": "Only used for multiclass targets. List of labels that index the\nclasses in ``y_score``. If ``None``, the numerical or lexicographical\norder of the labels in ``y_true`` is used." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_classes,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC)\nfrom prediction scores.\n\nNote: this implementation can be used with binary, multiclass and\nmultilabel classification, but some restrictions apply (see Parameters).\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC)\nfrom prediction scores.\n\nNote: this implementation can be used with binary, multiclass and\nmultilabel classification, but some restrictions apply (see Parameters).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_classes)\n True labels or binary label indicators. The binary and multiclass cases\n expect labels with shape (n_samples,) while the multilabel case expects\n binary label indicators with shape (n_samples, n_classes).\n\ny_score : array-like of shape (n_samples,) or (n_samples, n_classes)\n Target scores.\n\n * In the binary case, it corresponds to an array of shape\n `(n_samples,)`. Both probability estimates and non-thresholded\n decision values can be provided. The probability estimates correspond\n to the **probability of the class with the greater label**,\n i.e. `estimator.classes_[1]` and thus\n `estimator.predict_proba(X, y)[:, 1]`. The decision values\n corresponds to the output of `estimator.decision_function(X, y)`.\n See more information in the :ref:`User guide `;\n * In the multiclass case, it corresponds to an array of shape\n `(n_samples, n_classes)` of probability estimates provided by the\n `predict_proba` method. The probability estimates **must**\n sum to 1 across the possible classes. In addition, the order of the\n class scores must correspond to the order of ``labels``,\n if provided, or else to the numerical or lexicographical order of\n the labels in ``y_true``. See more information in the\n :ref:`User guide `;\n * In the multilabel case, it corresponds to an array of shape\n `(n_samples, n_classes)`. Probability estimates are provided by the\n `predict_proba` method and the non-thresholded decision values by\n the `decision_function` method. The probability estimates correspond\n to the **probability of the class with the greater label for each\n output** of the classifier. See more information in the\n :ref:`User guide `.\n\naverage : {'micro', 'macro', 'samples', 'weighted'} or None, default='macro'\n If ``None``, the scores for each class are returned. Otherwise,\n this determines the type of averaging performed on the data:\n Note: multiclass ROC AUC currently only handles the 'macro' and\n 'weighted' averages.\n\n ``'micro'``:\n Calculate metrics globally by considering each element of the label\n indicator matrix as a label.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average, weighted\n by support (the number of true instances for each label).\n ``'samples'``:\n Calculate metrics for each instance, and find their average.\n\n Will be ignored when ``y_true`` is binary.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nmax_fpr : float > 0 and <= 1, default=None\n If not ``None``, the standardized partial AUC [2]_ over the range\n [0, max_fpr] is returned. For the multiclass case, ``max_fpr``,\n should be either equal to ``None`` or ``1.0`` as AUC ROC partial\n computation currently is not supported for multiclass.\n\nmulti_class : {'raise', 'ovr', 'ovo'}, default='raise'\n Only used for multiclass targets. Determines the type of configuration\n to use. The default value raises an error, so either\n ``'ovr'`` or ``'ovo'`` must be passed explicitly.\n\n ``'ovr'``:\n Stands for One-vs-rest. Computes the AUC of each class\n against the rest [3]_ [4]_. This\n treats the multiclass case in the same way as the multilabel case.\n Sensitive to class imbalance even when ``average == 'macro'``,\n because class imbalance affects the composition of each of the\n 'rest' groupings.\n ``'ovo'``:\n Stands for One-vs-one. Computes the average AUC of all\n possible pairwise combinations of classes [5]_.\n Insensitive to class imbalance when\n ``average == 'macro'``.\n\nlabels : array-like of shape (n_classes,), default=None\n Only used for multiclass targets. List of labels that index the\n classes in ``y_score``. If ``None``, the numerical or lexicographical\n order of the labels in ``y_true`` is used.\n\nReturns\n-------\nauc : float\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Receiver operating characteristic\n `_\n\n.. [2] `Analyzing a portion of the ROC curve. McClish, 1989\n `_\n\n.. [3] Provost, F., Domingos, P. (2000). Well-trained PETs: Improving\n probability estimation trees (Section 6.2), CeDER Working Paper\n #IS-00-04, Stern School of Business, New York University.\n\n.. [4] `Fawcett, T. (2006). An introduction to ROC analysis. Pattern\n Recognition Letters, 27(8), 861-874.\n `_\n\n.. [5] `Hand, D.J., Till, R.J. (2001). A Simple Generalisation of the Area\n Under the ROC Curve for Multiple Class Classification Problems.\n Machine Learning, 45(2), 171-186.\n `_\n\nSee Also\n--------\naverage_precision_score : Area under the precision-recall curve.\nroc_curve : Compute Receiver operating characteristic (ROC) curve.\nplot_roc_curve : Plot Receiver operating characteristic (ROC) curve.\n\nExamples\n--------\nBinary case:\n\n>>> from sklearn.datasets import load_breast_cancer\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.metrics import roc_auc_score\n>>> X, y = load_breast_cancer(return_X_y=True)\n>>> clf = LogisticRegression(solver=\"liblinear\", random_state=0).fit(X, y)\n>>> roc_auc_score(y, clf.predict_proba(X)[:, 1])\n0.99...\n>>> roc_auc_score(y, clf.decision_function(X))\n0.99...\n\nMulticlass case:\n\n>>> from sklearn.datasets import load_iris\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = LogisticRegression(solver=\"liblinear\").fit(X, y)\n>>> roc_auc_score(y, clf.predict_proba(X), multi_class='ovr')\n0.99...\n\nMultilabel case:\n\n>>> from sklearn.datasets import make_multilabel_classification\n>>> from sklearn.multioutput import MultiOutputClassifier\n>>> X, y = make_multilabel_classification(random_state=0)\n>>> clf = MultiOutputClassifier(clf).fit(X, y)\n>>> # get a list of n_output containing probability arrays of shape\n>>> # (n_samples, n_classes)\n>>> y_pred = clf.predict_proba(X)\n>>> # extract the positive columns for each output\n>>> y_pred = np.transpose([pred[:, 1] for pred in y_pred])\n>>> roc_auc_score(y, y_pred, average=None)\narray([0.82..., 0.86..., 0.94..., 0.85... , 0.94...])\n>>> from sklearn.linear_model import RidgeClassifierCV\n>>> clf = RidgeClassifierCV().fit(X, y)\n>>> roc_auc_score(y, clf.decision_function(X), average=None)\narray([0.81..., 0.84... , 0.93..., 0.87..., 0.94...])", + "code": "@_deprecate_positional_args\ndef roc_auc_score(y_true, y_score, *, average=\"macro\", sample_weight=None,\n max_fpr=None, multi_class=\"raise\", labels=None):\n \"\"\"Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC)\n from prediction scores.\n\n Note: this implementation can be used with binary, multiclass and\n multilabel classification, but some restrictions apply (see Parameters).\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : array-like of shape (n_samples,) or (n_samples, n_classes)\n True labels or binary label indicators. The binary and multiclass cases\n expect labels with shape (n_samples,) while the multilabel case expects\n binary label indicators with shape (n_samples, n_classes).\n\n y_score : array-like of shape (n_samples,) or (n_samples, n_classes)\n Target scores.\n\n * In the binary case, it corresponds to an array of shape\n `(n_samples,)`. Both probability estimates and non-thresholded\n decision values can be provided. The probability estimates correspond\n to the **probability of the class with the greater label**,\n i.e. `estimator.classes_[1]` and thus\n `estimator.predict_proba(X, y)[:, 1]`. The decision values\n corresponds to the output of `estimator.decision_function(X, y)`.\n See more information in the :ref:`User guide `;\n * In the multiclass case, it corresponds to an array of shape\n `(n_samples, n_classes)` of probability estimates provided by the\n `predict_proba` method. The probability estimates **must**\n sum to 1 across the possible classes. In addition, the order of the\n class scores must correspond to the order of ``labels``,\n if provided, or else to the numerical or lexicographical order of\n the labels in ``y_true``. See more information in the\n :ref:`User guide `;\n * In the multilabel case, it corresponds to an array of shape\n `(n_samples, n_classes)`. Probability estimates are provided by the\n `predict_proba` method and the non-thresholded decision values by\n the `decision_function` method. The probability estimates correspond\n to the **probability of the class with the greater label for each\n output** of the classifier. See more information in the\n :ref:`User guide `.\n\n average : {'micro', 'macro', 'samples', 'weighted'} or None, \\\n default='macro'\n If ``None``, the scores for each class are returned. Otherwise,\n this determines the type of averaging performed on the data:\n Note: multiclass ROC AUC currently only handles the 'macro' and\n 'weighted' averages.\n\n ``'micro'``:\n Calculate metrics globally by considering each element of the label\n indicator matrix as a label.\n ``'macro'``:\n Calculate metrics for each label, and find their unweighted\n mean. This does not take label imbalance into account.\n ``'weighted'``:\n Calculate metrics for each label, and find their average, weighted\n by support (the number of true instances for each label).\n ``'samples'``:\n Calculate metrics for each instance, and find their average.\n\n Will be ignored when ``y_true`` is binary.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n max_fpr : float > 0 and <= 1, default=None\n If not ``None``, the standardized partial AUC [2]_ over the range\n [0, max_fpr] is returned. For the multiclass case, ``max_fpr``,\n should be either equal to ``None`` or ``1.0`` as AUC ROC partial\n computation currently is not supported for multiclass.\n\n multi_class : {'raise', 'ovr', 'ovo'}, default='raise'\n Only used for multiclass targets. Determines the type of configuration\n to use. The default value raises an error, so either\n ``'ovr'`` or ``'ovo'`` must be passed explicitly.\n\n ``'ovr'``:\n Stands for One-vs-rest. Computes the AUC of each class\n against the rest [3]_ [4]_. This\n treats the multiclass case in the same way as the multilabel case.\n Sensitive to class imbalance even when ``average == 'macro'``,\n because class imbalance affects the composition of each of the\n 'rest' groupings.\n ``'ovo'``:\n Stands for One-vs-one. Computes the average AUC of all\n possible pairwise combinations of classes [5]_.\n Insensitive to class imbalance when\n ``average == 'macro'``.\n\n labels : array-like of shape (n_classes,), default=None\n Only used for multiclass targets. List of labels that index the\n classes in ``y_score``. If ``None``, the numerical or lexicographical\n order of the labels in ``y_true`` is used.\n\n Returns\n -------\n auc : float\n\n References\n ----------\n .. [1] `Wikipedia entry for the Receiver operating characteristic\n `_\n\n .. [2] `Analyzing a portion of the ROC curve. McClish, 1989\n `_\n\n .. [3] Provost, F., Domingos, P. (2000). Well-trained PETs: Improving\n probability estimation trees (Section 6.2), CeDER Working Paper\n #IS-00-04, Stern School of Business, New York University.\n\n .. [4] `Fawcett, T. (2006). An introduction to ROC analysis. Pattern\n Recognition Letters, 27(8), 861-874.\n `_\n\n .. [5] `Hand, D.J., Till, R.J. (2001). A Simple Generalisation of the Area\n Under the ROC Curve for Multiple Class Classification Problems.\n Machine Learning, 45(2), 171-186.\n `_\n\n See Also\n --------\n average_precision_score : Area under the precision-recall curve.\n roc_curve : Compute Receiver operating characteristic (ROC) curve.\n plot_roc_curve : Plot Receiver operating characteristic (ROC) curve.\n\n Examples\n --------\n Binary case:\n\n >>> from sklearn.datasets import load_breast_cancer\n >>> from sklearn.linear_model import LogisticRegression\n >>> from sklearn.metrics import roc_auc_score\n >>> X, y = load_breast_cancer(return_X_y=True)\n >>> clf = LogisticRegression(solver=\"liblinear\", random_state=0).fit(X, y)\n >>> roc_auc_score(y, clf.predict_proba(X)[:, 1])\n 0.99...\n >>> roc_auc_score(y, clf.decision_function(X))\n 0.99...\n\n Multiclass case:\n\n >>> from sklearn.datasets import load_iris\n >>> X, y = load_iris(return_X_y=True)\n >>> clf = LogisticRegression(solver=\"liblinear\").fit(X, y)\n >>> roc_auc_score(y, clf.predict_proba(X), multi_class='ovr')\n 0.99...\n\n Multilabel case:\n\n >>> from sklearn.datasets import make_multilabel_classification\n >>> from sklearn.multioutput import MultiOutputClassifier\n >>> X, y = make_multilabel_classification(random_state=0)\n >>> clf = MultiOutputClassifier(clf).fit(X, y)\n >>> # get a list of n_output containing probability arrays of shape\n >>> # (n_samples, n_classes)\n >>> y_pred = clf.predict_proba(X)\n >>> # extract the positive columns for each output\n >>> y_pred = np.transpose([pred[:, 1] for pred in y_pred])\n >>> roc_auc_score(y, y_pred, average=None)\n array([0.82..., 0.86..., 0.94..., 0.85... , 0.94...])\n >>> from sklearn.linear_model import RidgeClassifierCV\n >>> clf = RidgeClassifierCV().fit(X, y)\n >>> roc_auc_score(y, clf.decision_function(X), average=None)\n array([0.81..., 0.84... , 0.93..., 0.87..., 0.94...])\n \"\"\"\n\n y_type = type_of_target(y_true)\n y_true = check_array(y_true, ensure_2d=False, dtype=None)\n y_score = check_array(y_score, ensure_2d=False)\n\n if y_type == \"multiclass\" or (y_type == \"binary\" and\n y_score.ndim == 2 and\n y_score.shape[1] > 2):\n # do not support partial ROC computation for multiclass\n if max_fpr is not None and max_fpr != 1.:\n raise ValueError(\"Partial AUC computation not available in \"\n \"multiclass setting, 'max_fpr' must be\"\n \" set to `None`, received `max_fpr={0}` \"\n \"instead\".format(max_fpr))\n if multi_class == 'raise':\n raise ValueError(\"multi_class must be in ('ovo', 'ovr')\")\n return _multiclass_roc_auc_score(y_true, y_score, labels,\n multi_class, average, sample_weight)\n elif y_type == \"binary\":\n labels = np.unique(y_true)\n y_true = label_binarize(y_true, classes=labels)[:, 0]\n return _average_binary_score(partial(_binary_roc_auc_score,\n max_fpr=max_fpr),\n y_true, y_score, average,\n sample_weight=sample_weight)\n else: # multilabel-indicator\n return _average_binary_score(partial(_binary_roc_auc_score,\n max_fpr=max_fpr),\n y_true, y_score, average,\n sample_weight=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/roc_curve", + "name": "roc_curve", + "qname": "sklearn.metrics._ranking.roc_curve", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._ranking/roc_curve/y_true", + "name": "y_true", + "qname": "sklearn.metrics._ranking.roc_curve.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "True binary labels. If labels are not either {-1, 1} or {0, 1}, then\npos_label should be explicitly given." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/roc_curve/y_score", + "name": "y_score", + "qname": "sklearn.metrics._ranking.roc_curve.y_score", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Target scores, can either be probability estimates of the positive\nclass, confidence values, or non-thresholded measure of decisions\n(as returned by \"decision_function\" on some classifiers)." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/roc_curve/pos_label", + "name": "pos_label", + "qname": "sklearn.metrics._ranking.roc_curve.pos_label", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or str", + "default_value": "None", + "description": "The label of the positive class.\nWhen ``pos_label=None``, if `y_true` is in {-1, 1} or {0, 1},\n``pos_label`` is set to 1, otherwise an error will be raised." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "str" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/roc_curve/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._ranking.roc_curve.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/roc_curve/drop_intermediate", + "name": "drop_intermediate", + "qname": "sklearn.metrics._ranking.roc_curve.drop_intermediate", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to drop some suboptimal thresholds which would not appear\non a plotted ROC curve. This is useful in order to create lighter\nROC curves.\n\n.. versionadded:: 0.17\n parameter *drop_intermediate*." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute Receiver operating characteristic (ROC).\n\nNote: this implementation is restricted to the binary classification task.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute Receiver operating characteristic (ROC).\n\nNote: this implementation is restricted to the binary classification task.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples,)\n True binary labels. If labels are not either {-1, 1} or {0, 1}, then\n pos_label should be explicitly given.\n\ny_score : ndarray of shape (n_samples,)\n Target scores, can either be probability estimates of the positive\n class, confidence values, or non-thresholded measure of decisions\n (as returned by \"decision_function\" on some classifiers).\n\npos_label : int or str, default=None\n The label of the positive class.\n When ``pos_label=None``, if `y_true` is in {-1, 1} or {0, 1},\n ``pos_label`` is set to 1, otherwise an error will be raised.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\ndrop_intermediate : bool, default=True\n Whether to drop some suboptimal thresholds which would not appear\n on a plotted ROC curve. This is useful in order to create lighter\n ROC curves.\n\n .. versionadded:: 0.17\n parameter *drop_intermediate*.\n\nReturns\n-------\nfpr : ndarray of shape (>2,)\n Increasing false positive rates such that element i is the false\n positive rate of predictions with score >= `thresholds[i]`.\n\ntpr : ndarray of shape (>2,)\n Increasing true positive rates such that element `i` is the true\n positive rate of predictions with score >= `thresholds[i]`.\n\nthresholds : ndarray of shape = (n_thresholds,)\n Decreasing thresholds on the decision function used to compute\n fpr and tpr. `thresholds[0]` represents no instances being predicted\n and is arbitrarily set to `max(y_score) + 1`.\n\nSee Also\n--------\nplot_roc_curve : Plot Receiver operating characteristic (ROC) curve.\nRocCurveDisplay : ROC Curve visualization.\ndet_curve: Compute error rates for different probability thresholds.\nroc_auc_score : Compute the area under the ROC curve.\n\nNotes\n-----\nSince the thresholds are sorted from low to high values, they\nare reversed upon returning them to ensure they correspond to both ``fpr``\nand ``tpr``, which are sorted in reversed order during their calculation.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Receiver operating characteristic\n `_\n\n.. [2] Fawcett T. An introduction to ROC analysis[J]. Pattern Recognition\n Letters, 2006, 27(8):861-874.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import metrics\n>>> y = np.array([1, 1, 2, 2])\n>>> scores = np.array([0.1, 0.4, 0.35, 0.8])\n>>> fpr, tpr, thresholds = metrics.roc_curve(y, scores, pos_label=2)\n>>> fpr\narray([0. , 0. , 0.5, 0.5, 1. ])\n>>> tpr\narray([0. , 0.5, 0.5, 1. , 1. ])\n>>> thresholds\narray([1.8 , 0.8 , 0.4 , 0.35, 0.1 ])", + "code": "@_deprecate_positional_args\ndef roc_curve(y_true, y_score, *, pos_label=None, sample_weight=None,\n drop_intermediate=True):\n \"\"\"Compute Receiver operating characteristic (ROC).\n\n Note: this implementation is restricted to the binary classification task.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : ndarray of shape (n_samples,)\n True binary labels. If labels are not either {-1, 1} or {0, 1}, then\n pos_label should be explicitly given.\n\n y_score : ndarray of shape (n_samples,)\n Target scores, can either be probability estimates of the positive\n class, confidence values, or non-thresholded measure of decisions\n (as returned by \"decision_function\" on some classifiers).\n\n pos_label : int or str, default=None\n The label of the positive class.\n When ``pos_label=None``, if `y_true` is in {-1, 1} or {0, 1},\n ``pos_label`` is set to 1, otherwise an error will be raised.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n drop_intermediate : bool, default=True\n Whether to drop some suboptimal thresholds which would not appear\n on a plotted ROC curve. This is useful in order to create lighter\n ROC curves.\n\n .. versionadded:: 0.17\n parameter *drop_intermediate*.\n\n Returns\n -------\n fpr : ndarray of shape (>2,)\n Increasing false positive rates such that element i is the false\n positive rate of predictions with score >= `thresholds[i]`.\n\n tpr : ndarray of shape (>2,)\n Increasing true positive rates such that element `i` is the true\n positive rate of predictions with score >= `thresholds[i]`.\n\n thresholds : ndarray of shape = (n_thresholds,)\n Decreasing thresholds on the decision function used to compute\n fpr and tpr. `thresholds[0]` represents no instances being predicted\n and is arbitrarily set to `max(y_score) + 1`.\n\n See Also\n --------\n plot_roc_curve : Plot Receiver operating characteristic (ROC) curve.\n RocCurveDisplay : ROC Curve visualization.\n det_curve: Compute error rates for different probability thresholds.\n roc_auc_score : Compute the area under the ROC curve.\n\n Notes\n -----\n Since the thresholds are sorted from low to high values, they\n are reversed upon returning them to ensure they correspond to both ``fpr``\n and ``tpr``, which are sorted in reversed order during their calculation.\n\n References\n ----------\n .. [1] `Wikipedia entry for the Receiver operating characteristic\n `_\n\n .. [2] Fawcett T. An introduction to ROC analysis[J]. Pattern Recognition\n Letters, 2006, 27(8):861-874.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn import metrics\n >>> y = np.array([1, 1, 2, 2])\n >>> scores = np.array([0.1, 0.4, 0.35, 0.8])\n >>> fpr, tpr, thresholds = metrics.roc_curve(y, scores, pos_label=2)\n >>> fpr\n array([0. , 0. , 0.5, 0.5, 1. ])\n >>> tpr\n array([0. , 0.5, 0.5, 1. , 1. ])\n >>> thresholds\n array([1.8 , 0.8 , 0.4 , 0.35, 0.1 ])\n\n \"\"\"\n fps, tps, thresholds = _binary_clf_curve(\n y_true, y_score, pos_label=pos_label, sample_weight=sample_weight)\n\n # Attempt to drop thresholds corresponding to points in between and\n # collinear with other points. These are always suboptimal and do not\n # appear on a plotted ROC curve (and thus do not affect the AUC).\n # Here np.diff(_, 2) is used as a \"second derivative\" to tell if there\n # is a corner at the point. Both fps and tps must be tested to handle\n # thresholds with multiple data points (which are combined in\n # _binary_clf_curve). This keeps all cases where the point should be kept,\n # but does not drop more complicated cases like fps = [1, 3, 7],\n # tps = [1, 2, 4]; there is no harm in keeping too many thresholds.\n if drop_intermediate and len(fps) > 2:\n optimal_idxs = np.where(np.r_[True,\n np.logical_or(np.diff(fps, 2),\n np.diff(tps, 2)),\n True])[0]\n fps = fps[optimal_idxs]\n tps = tps[optimal_idxs]\n thresholds = thresholds[optimal_idxs]\n\n # Add an extra threshold position\n # to make sure that the curve starts at (0, 0)\n tps = np.r_[0, tps]\n fps = np.r_[0, fps]\n thresholds = np.r_[thresholds[0] + 1, thresholds]\n\n if fps[-1] <= 0:\n warnings.warn(\"No negative samples in y_true, \"\n \"false positive value should be meaningless\",\n UndefinedMetricWarning)\n fpr = np.repeat(np.nan, fps.shape)\n else:\n fpr = fps / fps[-1]\n\n if tps[-1] <= 0:\n warnings.warn(\"No positive samples in y_true, \"\n \"true positive value should be meaningless\",\n UndefinedMetricWarning)\n tpr = np.repeat(np.nan, tps.shape)\n else:\n tpr = tps / tps[-1]\n\n return fpr, tpr, thresholds" + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/top_k_accuracy_score", + "name": "top_k_accuracy_score", + "qname": "sklearn.metrics._ranking.top_k_accuracy_score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._ranking/top_k_accuracy_score/y_true", + "name": "y_true", + "qname": "sklearn.metrics._ranking.top_k_accuracy_score.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "True labels." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/top_k_accuracy_score/y_score", + "name": "y_score", + "qname": "sklearn.metrics._ranking.top_k_accuracy_score.y_score", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_classes)", + "default_value": "", + "description": "Target scores. These can be either probability estimates or\nnon-thresholded decision values (as returned by\n:term:`decision_function` on some classifiers). The binary case expects\nscores with shape (n_samples,) while the multiclass case expects scores\nwith shape (n_samples, n_classes). In the multiclass case, the order of\nthe class scores must correspond to the order of ``labels``, if\nprovided, or else to the numerical or lexicographical order of the\nlabels in ``y_true``." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_classes)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/top_k_accuracy_score/k", + "name": "k", + "qname": "sklearn.metrics._ranking.top_k_accuracy_score.k", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Number of most likely outcomes considered to find the correct label." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/top_k_accuracy_score/normalize", + "name": "normalize", + "qname": "sklearn.metrics._ranking.top_k_accuracy_score.normalize", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If `True`, return the fraction of correctly classified samples.\nOtherwise, return the number of correctly classified samples." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/top_k_accuracy_score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._ranking.top_k_accuracy_score.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If `None`, all samples are given the same weight." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._ranking/top_k_accuracy_score/labels", + "name": "labels", + "qname": "sklearn.metrics._ranking.top_k_accuracy_score.labels", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_classes,)", + "default_value": "None", + "description": "Multiclass only. List of labels that index the classes in ``y_score``.\nIf ``None``, the numerical or lexicographical order of the labels in\n``y_true`` is used." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_classes,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Top-k Accuracy classification score.\n\nThis metric computes the number of times where the correct label is among\nthe top `k` labels predicted (ranked by predicted scores). Note that the\nmultilabel case isn't covered here.\n\nRead more in the :ref:`User Guide `", + "docstring": "Top-k Accuracy classification score.\n\nThis metric computes the number of times where the correct label is among\nthe top `k` labels predicted (ranked by predicted scores). Note that the\nmultilabel case isn't covered here.\n\nRead more in the :ref:`User Guide `\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n True labels.\n\ny_score : array-like of shape (n_samples,) or (n_samples, n_classes)\n Target scores. These can be either probability estimates or\n non-thresholded decision values (as returned by\n :term:`decision_function` on some classifiers). The binary case expects\n scores with shape (n_samples,) while the multiclass case expects scores\n with shape (n_samples, n_classes). In the multiclass case, the order of\n the class scores must correspond to the order of ``labels``, if\n provided, or else to the numerical or lexicographical order of the\n labels in ``y_true``.\n\nk : int, default=2\n Number of most likely outcomes considered to find the correct label.\n\nnormalize : bool, default=True\n If `True`, return the fraction of correctly classified samples.\n Otherwise, return the number of correctly classified samples.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If `None`, all samples are given the same weight.\n\nlabels : array-like of shape (n_classes,), default=None\n Multiclass only. List of labels that index the classes in ``y_score``.\n If ``None``, the numerical or lexicographical order of the labels in\n ``y_true`` is used.\n\nReturns\n-------\nscore : float\n The top-k accuracy score. The best performance is 1 with\n `normalize == True` and the number of samples with\n `normalize == False`.\n\nSee also\n--------\naccuracy_score\n\nNotes\n-----\nIn cases where two or more labels are assigned equal predicted scores,\nthe labels with the highest indices will be chosen first. This might\nimpact the result if the correct label falls after the threshold because\nof that.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import top_k_accuracy_score\n>>> y_true = np.array([0, 1, 2, 2])\n>>> y_score = np.array([[0.5, 0.2, 0.2], # 0 is in top 2\n... [0.3, 0.4, 0.2], # 1 is in top 2\n... [0.2, 0.4, 0.3], # 2 is in top 2\n... [0.7, 0.2, 0.1]]) # 2 isn't in top 2\n>>> top_k_accuracy_score(y_true, y_score, k=2)\n0.75\n>>> # Not normalizing gives the number of \"correctly\" classified samples\n>>> top_k_accuracy_score(y_true, y_score, k=2, normalize=False)\n3", + "code": "def top_k_accuracy_score(y_true, y_score, *, k=2, normalize=True,\n sample_weight=None, labels=None):\n \"\"\"Top-k Accuracy classification score.\n\n This metric computes the number of times where the correct label is among\n the top `k` labels predicted (ranked by predicted scores). Note that the\n multilabel case isn't covered here.\n\n Read more in the :ref:`User Guide `\n\n Parameters\n ----------\n y_true : array-like of shape (n_samples,)\n True labels.\n\n y_score : array-like of shape (n_samples,) or (n_samples, n_classes)\n Target scores. These can be either probability estimates or\n non-thresholded decision values (as returned by\n :term:`decision_function` on some classifiers). The binary case expects\n scores with shape (n_samples,) while the multiclass case expects scores\n with shape (n_samples, n_classes). In the multiclass case, the order of\n the class scores must correspond to the order of ``labels``, if\n provided, or else to the numerical or lexicographical order of the\n labels in ``y_true``.\n\n k : int, default=2\n Number of most likely outcomes considered to find the correct label.\n\n normalize : bool, default=True\n If `True`, return the fraction of correctly classified samples.\n Otherwise, return the number of correctly classified samples.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If `None`, all samples are given the same weight.\n\n labels : array-like of shape (n_classes,), default=None\n Multiclass only. List of labels that index the classes in ``y_score``.\n If ``None``, the numerical or lexicographical order of the labels in\n ``y_true`` is used.\n\n Returns\n -------\n score : float\n The top-k accuracy score. The best performance is 1 with\n `normalize == True` and the number of samples with\n `normalize == False`.\n\n See also\n --------\n accuracy_score\n\n Notes\n -----\n In cases where two or more labels are assigned equal predicted scores,\n the labels with the highest indices will be chosen first. This might\n impact the result if the correct label falls after the threshold because\n of that.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.metrics import top_k_accuracy_score\n >>> y_true = np.array([0, 1, 2, 2])\n >>> y_score = np.array([[0.5, 0.2, 0.2], # 0 is in top 2\n ... [0.3, 0.4, 0.2], # 1 is in top 2\n ... [0.2, 0.4, 0.3], # 2 is in top 2\n ... [0.7, 0.2, 0.1]]) # 2 isn't in top 2\n >>> top_k_accuracy_score(y_true, y_score, k=2)\n 0.75\n >>> # Not normalizing gives the number of \"correctly\" classified samples\n >>> top_k_accuracy_score(y_true, y_score, k=2, normalize=False)\n 3\n\n \"\"\"\n y_true = check_array(y_true, ensure_2d=False, dtype=None)\n y_true = column_or_1d(y_true)\n y_type = type_of_target(y_true)\n if y_type == \"binary\" and labels is not None and len(labels) > 2:\n y_type = \"multiclass\"\n y_score = check_array(y_score, ensure_2d=False)\n y_score = column_or_1d(y_score) if y_type == 'binary' else y_score\n check_consistent_length(y_true, y_score, sample_weight)\n\n if y_type not in {'binary', 'multiclass'}:\n raise ValueError(\n f\"y type must be 'binary' or 'multiclass', got '{y_type}' instead.\"\n )\n\n y_score_n_classes = y_score.shape[1] if y_score.ndim == 2 else 2\n\n if labels is None:\n classes = _unique(y_true)\n n_classes = len(classes)\n\n if n_classes != y_score_n_classes:\n raise ValueError(\n f\"Number of classes in 'y_true' ({n_classes}) not equal \"\n f\"to the number of classes in 'y_score' ({y_score_n_classes}).\"\n )\n else:\n labels = column_or_1d(labels)\n classes = _unique(labels)\n n_labels = len(labels)\n n_classes = len(classes)\n\n if n_classes != n_labels:\n raise ValueError(\"Parameter 'labels' must be unique.\")\n\n if not np.array_equal(classes, labels):\n raise ValueError(\"Parameter 'labels' must be ordered.\")\n\n if n_classes != y_score_n_classes:\n raise ValueError(\n f\"Number of given labels ({n_classes}) not equal to the \"\n f\"number of classes in 'y_score' ({y_score_n_classes}).\"\n )\n\n if len(np.setdiff1d(y_true, classes)):\n raise ValueError(\n \"'y_true' contains labels not in parameter 'labels'.\"\n )\n\n if k >= n_classes:\n warnings.warn(\n f\"'k' ({k}) greater than or equal to 'n_classes' ({n_classes}) \"\n \"will result in a perfect score and is therefore meaningless.\",\n UndefinedMetricWarning\n )\n\n y_true_encoded = _encode(y_true, uniques=classes)\n\n if y_type == 'binary':\n if k == 1:\n threshold = .5 if y_score.min() >= 0 and y_score.max() <= 1 else 0\n y_pred = (y_score > threshold).astype(np.int64)\n hits = y_pred == y_true_encoded\n else:\n hits = np.ones_like(y_score, dtype=np.bool_)\n elif y_type == 'multiclass':\n sorted_pred = np.argsort(y_score, axis=1, kind='mergesort')[:, ::-1]\n hits = (y_true_encoded == sorted_pred[:, :k].T).any(axis=0)\n\n if normalize:\n return np.average(hits, weights=sample_weight)\n elif sample_weight is None:\n return np.sum(hits)\n else:\n return np.dot(hits, sample_weight)" + }, + { + "id": "scikit-learn/sklearn.metrics._regression/_check_reg_targets", + "name": "_check_reg_targets", + "qname": "sklearn.metrics._regression._check_reg_targets", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._regression/_check_reg_targets/y_true", + "name": "y_true", + "qname": "sklearn.metrics._regression._check_reg_targets.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/_check_reg_targets/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._regression._check_reg_targets.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/_check_reg_targets/multioutput", + "name": "multioutput", + "qname": "sklearn.metrics._regression._check_reg_targets.multioutput", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like or string in ['raw_values', uniform_average',", + "default_value": "", + "description": "'variance_weighted'] or None\nNone is accepted due to backward compatibility of r2_score()." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "string in ['raw_values'" + }, + { + "kind": "NamedType", + "name": "uniform_average'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/_check_reg_targets/dtype", + "name": "dtype", + "qname": "sklearn.metrics._regression._check_reg_targets.dtype", + "default_value": "'numeric'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check that y_true and y_pred belong to the same regression task.", + "docstring": "Check that y_true and y_pred belong to the same regression task.\n\nParameters\n----------\ny_true : array-like\n\ny_pred : array-like\n\nmultioutput : array-like or string in ['raw_values', uniform_average',\n 'variance_weighted'] or None\n None is accepted due to backward compatibility of r2_score().\n\nReturns\n-------\ntype_true : one of {'continuous', continuous-multioutput'}\n The type of the true target data, as output by\n 'utils.multiclass.type_of_target'.\n\ny_true : array-like of shape (n_samples, n_outputs)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples, n_outputs)\n Estimated target values.\n\nmultioutput : array-like of shape (n_outputs) or string in ['raw_values',\n uniform_average', 'variance_weighted'] or None\n Custom output weights if ``multioutput`` is array-like or\n just the corresponding argument if ``multioutput`` is a\n correct keyword.\n\ndtype : str or list, default=\"numeric\"\n the dtype argument passed to check_array.", + "code": "def _check_reg_targets(y_true, y_pred, multioutput, dtype=\"numeric\"):\n \"\"\"Check that y_true and y_pred belong to the same regression task.\n\n Parameters\n ----------\n y_true : array-like\n\n y_pred : array-like\n\n multioutput : array-like or string in ['raw_values', uniform_average',\n 'variance_weighted'] or None\n None is accepted due to backward compatibility of r2_score().\n\n Returns\n -------\n type_true : one of {'continuous', continuous-multioutput'}\n The type of the true target data, as output by\n 'utils.multiclass.type_of_target'.\n\n y_true : array-like of shape (n_samples, n_outputs)\n Ground truth (correct) target values.\n\n y_pred : array-like of shape (n_samples, n_outputs)\n Estimated target values.\n\n multioutput : array-like of shape (n_outputs) or string in ['raw_values',\n uniform_average', 'variance_weighted'] or None\n Custom output weights if ``multioutput`` is array-like or\n just the corresponding argument if ``multioutput`` is a\n correct keyword.\n\n dtype : str or list, default=\"numeric\"\n the dtype argument passed to check_array.\n \"\"\"\n check_consistent_length(y_true, y_pred)\n y_true = check_array(y_true, ensure_2d=False, dtype=dtype)\n y_pred = check_array(y_pred, ensure_2d=False, dtype=dtype)\n\n if y_true.ndim == 1:\n y_true = y_true.reshape((-1, 1))\n\n if y_pred.ndim == 1:\n y_pred = y_pred.reshape((-1, 1))\n\n if y_true.shape[1] != y_pred.shape[1]:\n raise ValueError(\"y_true and y_pred have different number of output \"\n \"({0}!={1})\".format(y_true.shape[1], y_pred.shape[1]))\n\n n_outputs = y_true.shape[1]\n allowed_multioutput_str = ('raw_values', 'uniform_average',\n 'variance_weighted')\n if isinstance(multioutput, str):\n if multioutput not in allowed_multioutput_str:\n raise ValueError(\"Allowed 'multioutput' string values are {}. \"\n \"You provided multioutput={!r}\".format(\n allowed_multioutput_str,\n multioutput))\n elif multioutput is not None:\n multioutput = check_array(multioutput, ensure_2d=False)\n if n_outputs == 1:\n raise ValueError(\"Custom weights are useful only in \"\n \"multi-output cases.\")\n elif n_outputs != len(multioutput):\n raise ValueError((\"There must be equally many custom weights \"\n \"(%d) as outputs (%d).\") %\n (len(multioutput), n_outputs))\n y_type = 'continuous' if n_outputs == 1 else 'continuous-multioutput'\n\n return y_type, y_true, y_pred, multioutput" + }, + { + "id": "scikit-learn/sklearn.metrics._regression/explained_variance_score", + "name": "explained_variance_score", + "qname": "sklearn.metrics._regression.explained_variance_score", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._regression/explained_variance_score/y_true", + "name": "y_true", + "qname": "sklearn.metrics._regression.explained_variance_score.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "Ground truth (correct) target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/explained_variance_score/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._regression.explained_variance_score.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "Estimated target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/explained_variance_score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._regression.explained_variance_score.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/explained_variance_score/multioutput", + "name": "multioutput", + "qname": "sklearn.metrics._regression.explained_variance_score.multioutput", + "default_value": "'uniform_average'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'raw_values', 'uniform_average', 'variance_weighted'} or array-like of shape (n_outputs,)", + "default_value": "'uniform_average'", + "description": "Defines aggregating of multiple output scores.\nArray-like value defines weights used to average scores.\n\n'raw_values' :\n Returns a full set of scores in case of multioutput input.\n\n'uniform_average' :\n Scores of all outputs are averaged with uniform weight.\n\n'variance_weighted' :\n Scores of all outputs are averaged, weighted by the variances\n of each individual output." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["raw_values", "uniform_average", "variance_weighted"] + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_outputs,)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Explained variance regression score function.\n\nBest possible score is 1.0, lower values are worse.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Explained variance regression score function.\n\nBest possible score is 1.0, lower values are worse.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Estimated target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nmultioutput : {'raw_values', 'uniform_average', 'variance_weighted'} or array-like of shape (n_outputs,), default='uniform_average'\n Defines aggregating of multiple output scores.\n Array-like value defines weights used to average scores.\n\n 'raw_values' :\n Returns a full set of scores in case of multioutput input.\n\n 'uniform_average' :\n Scores of all outputs are averaged with uniform weight.\n\n 'variance_weighted' :\n Scores of all outputs are averaged, weighted by the variances\n of each individual output.\n\nReturns\n-------\nscore : float or ndarray of floats\n The explained variance or ndarray if 'multioutput' is 'raw_values'.\n\nNotes\n-----\nThis is not a symmetric function.\n\nExamples\n--------\n>>> from sklearn.metrics import explained_variance_score\n>>> y_true = [3, -0.5, 2, 7]\n>>> y_pred = [2.5, 0.0, 2, 8]\n>>> explained_variance_score(y_true, y_pred)\n0.957...\n>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]\n>>> y_pred = [[0, 2], [-1, 2], [8, -5]]\n>>> explained_variance_score(y_true, y_pred, multioutput='uniform_average')\n0.983...", + "code": "@_deprecate_positional_args\ndef explained_variance_score(y_true, y_pred, *,\n sample_weight=None,\n multioutput='uniform_average'):\n \"\"\"Explained variance regression score function.\n\n Best possible score is 1.0, lower values are worse.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\n y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Estimated target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n multioutput : {'raw_values', 'uniform_average', 'variance_weighted'} or \\\n array-like of shape (n_outputs,), default='uniform_average'\n Defines aggregating of multiple output scores.\n Array-like value defines weights used to average scores.\n\n 'raw_values' :\n Returns a full set of scores in case of multioutput input.\n\n 'uniform_average' :\n Scores of all outputs are averaged with uniform weight.\n\n 'variance_weighted' :\n Scores of all outputs are averaged, weighted by the variances\n of each individual output.\n\n Returns\n -------\n score : float or ndarray of floats\n The explained variance or ndarray if 'multioutput' is 'raw_values'.\n\n Notes\n -----\n This is not a symmetric function.\n\n Examples\n --------\n >>> from sklearn.metrics import explained_variance_score\n >>> y_true = [3, -0.5, 2, 7]\n >>> y_pred = [2.5, 0.0, 2, 8]\n >>> explained_variance_score(y_true, y_pred)\n 0.957...\n >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]\n >>> y_pred = [[0, 2], [-1, 2], [8, -5]]\n >>> explained_variance_score(y_true, y_pred, multioutput='uniform_average')\n 0.983...\n \"\"\"\n y_type, y_true, y_pred, multioutput = _check_reg_targets(\n y_true, y_pred, multioutput)\n check_consistent_length(y_true, y_pred, sample_weight)\n\n y_diff_avg = np.average(y_true - y_pred, weights=sample_weight, axis=0)\n numerator = np.average((y_true - y_pred - y_diff_avg) ** 2,\n weights=sample_weight, axis=0)\n\n y_true_avg = np.average(y_true, weights=sample_weight, axis=0)\n denominator = np.average((y_true - y_true_avg) ** 2,\n weights=sample_weight, axis=0)\n\n nonzero_numerator = numerator != 0\n nonzero_denominator = denominator != 0\n valid_score = nonzero_numerator & nonzero_denominator\n output_scores = np.ones(y_true.shape[1])\n\n output_scores[valid_score] = 1 - (numerator[valid_score] /\n denominator[valid_score])\n output_scores[nonzero_numerator & ~nonzero_denominator] = 0.\n if isinstance(multioutput, str):\n if multioutput == 'raw_values':\n # return scores individually\n return output_scores\n elif multioutput == 'uniform_average':\n # passing to np.average() None as weights results is uniform mean\n avg_weights = None\n elif multioutput == 'variance_weighted':\n avg_weights = denominator\n else:\n avg_weights = multioutput\n\n return np.average(output_scores, weights=avg_weights)" + }, + { + "id": "scikit-learn/sklearn.metrics._regression/max_error", + "name": "max_error", + "qname": "sklearn.metrics._regression.max_error", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._regression/max_error/y_true", + "name": "y_true", + "qname": "sklearn.metrics._regression.max_error.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Ground truth (correct) target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/max_error/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._regression.max_error.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Estimated target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "max_error metric calculates the maximum residual error.\n\nRead more in the :ref:`User Guide `.", + "docstring": "max_error metric calculates the maximum residual error.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,)\n Estimated target values.\n\nReturns\n-------\nmax_error : float\n A positive floating point value (the best value is 0.0).\n\nExamples\n--------\n>>> from sklearn.metrics import max_error\n>>> y_true = [3, 2, 7, 1]\n>>> y_pred = [4, 2, 7, 1]\n>>> max_error(y_true, y_pred)\n1", + "code": "def max_error(y_true, y_pred):\n \"\"\"\n max_error metric calculates the maximum residual error.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : array-like of shape (n_samples,)\n Ground truth (correct) target values.\n\n y_pred : array-like of shape (n_samples,)\n Estimated target values.\n\n Returns\n -------\n max_error : float\n A positive floating point value (the best value is 0.0).\n\n Examples\n --------\n >>> from sklearn.metrics import max_error\n >>> y_true = [3, 2, 7, 1]\n >>> y_pred = [4, 2, 7, 1]\n >>> max_error(y_true, y_pred)\n 1\n \"\"\"\n y_type, y_true, y_pred, _ = _check_reg_targets(y_true, y_pred, None)\n if y_type == 'continuous-multioutput':\n raise ValueError(\"Multioutput not supported in max_error\")\n return np.max(np.abs(y_true - y_pred))" + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_absolute_error", + "name": "mean_absolute_error", + "qname": "sklearn.metrics._regression.mean_absolute_error", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._regression/mean_absolute_error/y_true", + "name": "y_true", + "qname": "sklearn.metrics._regression.mean_absolute_error.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "Ground truth (correct) target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_absolute_error/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._regression.mean_absolute_error.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "Estimated target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_absolute_error/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._regression.mean_absolute_error.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_absolute_error/multioutput", + "name": "multioutput", + "qname": "sklearn.metrics._regression.mean_absolute_error.multioutput", + "default_value": "'uniform_average'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'raw_values', 'uniform_average'} or array-like of shape (n_outputs,)", + "default_value": "'uniform_average'", + "description": "Defines aggregating of multiple output values.\nArray-like value defines weights used to average errors.\n\n'raw_values' :\n Returns a full set of errors in case of multioutput input.\n\n'uniform_average' :\n Errors of all outputs are averaged with uniform weight." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["raw_values", "uniform_average"] + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_outputs,)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Mean absolute error regression loss.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Mean absolute error regression loss.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Estimated target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nmultioutput : {'raw_values', 'uniform_average'} or array-like of shape (n_outputs,), default='uniform_average'\n Defines aggregating of multiple output values.\n Array-like value defines weights used to average errors.\n\n 'raw_values' :\n Returns a full set of errors in case of multioutput input.\n\n 'uniform_average' :\n Errors of all outputs are averaged with uniform weight.\n\n\nReturns\n-------\nloss : float or ndarray of floats\n If multioutput is 'raw_values', then mean absolute error is returned\n for each output separately.\n If multioutput is 'uniform_average' or an ndarray of weights, then the\n weighted average of all output errors is returned.\n\n MAE output is non-negative floating point. The best value is 0.0.\n\nExamples\n--------\n>>> from sklearn.metrics import mean_absolute_error\n>>> y_true = [3, -0.5, 2, 7]\n>>> y_pred = [2.5, 0.0, 2, 8]\n>>> mean_absolute_error(y_true, y_pred)\n0.5\n>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]\n>>> y_pred = [[0, 2], [-1, 2], [8, -5]]\n>>> mean_absolute_error(y_true, y_pred)\n0.75\n>>> mean_absolute_error(y_true, y_pred, multioutput='raw_values')\narray([0.5, 1. ])\n>>> mean_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7])\n0.85...", + "code": "@_deprecate_positional_args\ndef mean_absolute_error(y_true, y_pred, *,\n sample_weight=None,\n multioutput='uniform_average'):\n \"\"\"Mean absolute error regression loss.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\n y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Estimated target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n multioutput : {'raw_values', 'uniform_average'} or array-like of shape \\\n (n_outputs,), default='uniform_average'\n Defines aggregating of multiple output values.\n Array-like value defines weights used to average errors.\n\n 'raw_values' :\n Returns a full set of errors in case of multioutput input.\n\n 'uniform_average' :\n Errors of all outputs are averaged with uniform weight.\n\n\n Returns\n -------\n loss : float or ndarray of floats\n If multioutput is 'raw_values', then mean absolute error is returned\n for each output separately.\n If multioutput is 'uniform_average' or an ndarray of weights, then the\n weighted average of all output errors is returned.\n\n MAE output is non-negative floating point. The best value is 0.0.\n\n Examples\n --------\n >>> from sklearn.metrics import mean_absolute_error\n >>> y_true = [3, -0.5, 2, 7]\n >>> y_pred = [2.5, 0.0, 2, 8]\n >>> mean_absolute_error(y_true, y_pred)\n 0.5\n >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]\n >>> y_pred = [[0, 2], [-1, 2], [8, -5]]\n >>> mean_absolute_error(y_true, y_pred)\n 0.75\n >>> mean_absolute_error(y_true, y_pred, multioutput='raw_values')\n array([0.5, 1. ])\n >>> mean_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7])\n 0.85...\n \"\"\"\n y_type, y_true, y_pred, multioutput = _check_reg_targets(\n y_true, y_pred, multioutput)\n check_consistent_length(y_true, y_pred, sample_weight)\n output_errors = np.average(np.abs(y_pred - y_true),\n weights=sample_weight, axis=0)\n if isinstance(multioutput, str):\n if multioutput == 'raw_values':\n return output_errors\n elif multioutput == 'uniform_average':\n # pass None as weights to np.average: uniform mean\n multioutput = None\n\n return np.average(output_errors, weights=multioutput)" + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_absolute_percentage_error", + "name": "mean_absolute_percentage_error", + "qname": "sklearn.metrics._regression.mean_absolute_percentage_error", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._regression/mean_absolute_percentage_error/y_true", + "name": "y_true", + "qname": "sklearn.metrics._regression.mean_absolute_percentage_error.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "Ground truth (correct) target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_absolute_percentage_error/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._regression.mean_absolute_percentage_error.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "Estimated target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_absolute_percentage_error/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._regression.mean_absolute_percentage_error.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_absolute_percentage_error/multioutput", + "name": "multioutput", + "qname": "sklearn.metrics._regression.mean_absolute_percentage_error.multioutput", + "default_value": "'uniform_average'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'raw_values', 'uniform_average'} or array-like", + "default_value": "", + "description": "Defines aggregating of multiple output values.\nArray-like value defines weights used to average errors.\nIf input is list then the shape must be (n_outputs,).\n\n'raw_values' :\n Returns a full set of errors in case of multioutput input.\n\n'uniform_average' :\n Errors of all outputs are averaged with uniform weight." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["raw_values", "uniform_average"] + }, + { + "kind": "NamedType", + "name": "array-like" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Mean absolute percentage error regression loss.\n\nNote here that we do not represent the output as a percentage in range\n[0, 100]. Instead, we represent it in range [0, 1/eps]. Read more in the\n:ref:`User Guide `.\n\n.. versionadded:: 0.24", + "docstring": "Mean absolute percentage error regression loss.\n\nNote here that we do not represent the output as a percentage in range\n[0, 100]. Instead, we represent it in range [0, 1/eps]. Read more in the\n:ref:`User Guide `.\n\n.. versionadded:: 0.24\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Estimated target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nmultioutput : {'raw_values', 'uniform_average'} or array-like\n Defines aggregating of multiple output values.\n Array-like value defines weights used to average errors.\n If input is list then the shape must be (n_outputs,).\n\n 'raw_values' :\n Returns a full set of errors in case of multioutput input.\n\n 'uniform_average' :\n Errors of all outputs are averaged with uniform weight.\n\nReturns\n-------\nloss : float or ndarray of floats in the range [0, 1/eps]\n If multioutput is 'raw_values', then mean absolute percentage error\n is returned for each output separately.\n If multioutput is 'uniform_average' or an ndarray of weights, then the\n weighted average of all output errors is returned.\n\n MAPE output is non-negative floating point. The best value is 0.0.\n But note the fact that bad predictions can lead to arbitarily large\n MAPE values, especially if some y_true values are very close to zero.\n Note that we return a large value instead of `inf` when y_true is zero.\n\nExamples\n--------\n>>> from sklearn.metrics import mean_absolute_percentage_error\n>>> y_true = [3, -0.5, 2, 7]\n>>> y_pred = [2.5, 0.0, 2, 8]\n>>> mean_absolute_percentage_error(y_true, y_pred)\n0.3273...\n>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]\n>>> y_pred = [[0, 2], [-1, 2], [8, -5]]\n>>> mean_absolute_percentage_error(y_true, y_pred)\n0.5515...\n>>> mean_absolute_percentage_error(y_true, y_pred, multioutput=[0.3, 0.7])\n0.6198...", + "code": "def mean_absolute_percentage_error(y_true, y_pred,\n sample_weight=None,\n multioutput='uniform_average'):\n \"\"\"Mean absolute percentage error regression loss.\n\n Note here that we do not represent the output as a percentage in range\n [0, 100]. Instead, we represent it in range [0, 1/eps]. Read more in the\n :ref:`User Guide `.\n\n .. versionadded:: 0.24\n\n Parameters\n ----------\n y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\n y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Estimated target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n multioutput : {'raw_values', 'uniform_average'} or array-like\n Defines aggregating of multiple output values.\n Array-like value defines weights used to average errors.\n If input is list then the shape must be (n_outputs,).\n\n 'raw_values' :\n Returns a full set of errors in case of multioutput input.\n\n 'uniform_average' :\n Errors of all outputs are averaged with uniform weight.\n\n Returns\n -------\n loss : float or ndarray of floats in the range [0, 1/eps]\n If multioutput is 'raw_values', then mean absolute percentage error\n is returned for each output separately.\n If multioutput is 'uniform_average' or an ndarray of weights, then the\n weighted average of all output errors is returned.\n\n MAPE output is non-negative floating point. The best value is 0.0.\n But note the fact that bad predictions can lead to arbitarily large\n MAPE values, especially if some y_true values are very close to zero.\n Note that we return a large value instead of `inf` when y_true is zero.\n\n Examples\n --------\n >>> from sklearn.metrics import mean_absolute_percentage_error\n >>> y_true = [3, -0.5, 2, 7]\n >>> y_pred = [2.5, 0.0, 2, 8]\n >>> mean_absolute_percentage_error(y_true, y_pred)\n 0.3273...\n >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]\n >>> y_pred = [[0, 2], [-1, 2], [8, -5]]\n >>> mean_absolute_percentage_error(y_true, y_pred)\n 0.5515...\n >>> mean_absolute_percentage_error(y_true, y_pred, multioutput=[0.3, 0.7])\n 0.6198...\n \"\"\"\n y_type, y_true, y_pred, multioutput = _check_reg_targets(\n y_true, y_pred, multioutput)\n check_consistent_length(y_true, y_pred, sample_weight)\n epsilon = np.finfo(np.float64).eps\n mape = np.abs(y_pred - y_true) / np.maximum(np.abs(y_true), epsilon)\n output_errors = np.average(mape,\n weights=sample_weight, axis=0)\n if isinstance(multioutput, str):\n if multioutput == 'raw_values':\n return output_errors\n elif multioutput == 'uniform_average':\n # pass None as weights to np.average: uniform mean\n multioutput = None\n\n return np.average(output_errors, weights=multioutput)" + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_gamma_deviance", + "name": "mean_gamma_deviance", + "qname": "sklearn.metrics._regression.mean_gamma_deviance", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._regression/mean_gamma_deviance/y_true", + "name": "y_true", + "qname": "sklearn.metrics._regression.mean_gamma_deviance.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Ground truth (correct) target values. Requires y_true > 0." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_gamma_deviance/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._regression.mean_gamma_deviance.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Estimated target values. Requires y_pred > 0." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_gamma_deviance/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._regression.mean_gamma_deviance.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Mean Gamma deviance regression loss.\n\nGamma deviance is equivalent to the Tweedie deviance with\nthe power parameter `power=2`. It is invariant to scaling of\nthe target variable, and measures relative errors.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Mean Gamma deviance regression loss.\n\nGamma deviance is equivalent to the Tweedie deviance with\nthe power parameter `power=2`. It is invariant to scaling of\nthe target variable, and measures relative errors.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n Ground truth (correct) target values. Requires y_true > 0.\n\ny_pred : array-like of shape (n_samples,)\n Estimated target values. Requires y_pred > 0.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nloss : float\n A non-negative floating point value (the best value is 0.0).\n\nExamples\n--------\n>>> from sklearn.metrics import mean_gamma_deviance\n>>> y_true = [2, 0.5, 1, 4]\n>>> y_pred = [0.5, 0.5, 2., 2.]\n>>> mean_gamma_deviance(y_true, y_pred)\n1.0568...", + "code": "@_deprecate_positional_args\ndef mean_gamma_deviance(y_true, y_pred, *, sample_weight=None):\n \"\"\"Mean Gamma deviance regression loss.\n\n Gamma deviance is equivalent to the Tweedie deviance with\n the power parameter `power=2`. It is invariant to scaling of\n the target variable, and measures relative errors.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : array-like of shape (n_samples,)\n Ground truth (correct) target values. Requires y_true > 0.\n\n y_pred : array-like of shape (n_samples,)\n Estimated target values. Requires y_pred > 0.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n loss : float\n A non-negative floating point value (the best value is 0.0).\n\n Examples\n --------\n >>> from sklearn.metrics import mean_gamma_deviance\n >>> y_true = [2, 0.5, 1, 4]\n >>> y_pred = [0.5, 0.5, 2., 2.]\n >>> mean_gamma_deviance(y_true, y_pred)\n 1.0568...\n \"\"\"\n return mean_tweedie_deviance(\n y_true, y_pred, sample_weight=sample_weight, power=2\n )" + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_poisson_deviance", + "name": "mean_poisson_deviance", + "qname": "sklearn.metrics._regression.mean_poisson_deviance", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._regression/mean_poisson_deviance/y_true", + "name": "y_true", + "qname": "sklearn.metrics._regression.mean_poisson_deviance.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Ground truth (correct) target values. Requires y_true >= 0." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_poisson_deviance/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._regression.mean_poisson_deviance.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Estimated target values. Requires y_pred > 0." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_poisson_deviance/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._regression.mean_poisson_deviance.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Mean Poisson deviance regression loss.\n\nPoisson deviance is equivalent to the Tweedie deviance with\nthe power parameter `power=1`.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Mean Poisson deviance regression loss.\n\nPoisson deviance is equivalent to the Tweedie deviance with\nthe power parameter `power=1`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n Ground truth (correct) target values. Requires y_true >= 0.\n\ny_pred : array-like of shape (n_samples,)\n Estimated target values. Requires y_pred > 0.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nloss : float\n A non-negative floating point value (the best value is 0.0).\n\nExamples\n--------\n>>> from sklearn.metrics import mean_poisson_deviance\n>>> y_true = [2, 0, 1, 4]\n>>> y_pred = [0.5, 0.5, 2., 2.]\n>>> mean_poisson_deviance(y_true, y_pred)\n1.4260...", + "code": "@_deprecate_positional_args\ndef mean_poisson_deviance(y_true, y_pred, *, sample_weight=None):\n \"\"\"Mean Poisson deviance regression loss.\n\n Poisson deviance is equivalent to the Tweedie deviance with\n the power parameter `power=1`.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : array-like of shape (n_samples,)\n Ground truth (correct) target values. Requires y_true >= 0.\n\n y_pred : array-like of shape (n_samples,)\n Estimated target values. Requires y_pred > 0.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n loss : float\n A non-negative floating point value (the best value is 0.0).\n\n Examples\n --------\n >>> from sklearn.metrics import mean_poisson_deviance\n >>> y_true = [2, 0, 1, 4]\n >>> y_pred = [0.5, 0.5, 2., 2.]\n >>> mean_poisson_deviance(y_true, y_pred)\n 1.4260...\n \"\"\"\n return mean_tweedie_deviance(\n y_true, y_pred, sample_weight=sample_weight, power=1\n )" + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_squared_error", + "name": "mean_squared_error", + "qname": "sklearn.metrics._regression.mean_squared_error", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._regression/mean_squared_error/y_true", + "name": "y_true", + "qname": "sklearn.metrics._regression.mean_squared_error.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "Ground truth (correct) target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_squared_error/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._regression.mean_squared_error.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "Estimated target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_squared_error/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._regression.mean_squared_error.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_squared_error/multioutput", + "name": "multioutput", + "qname": "sklearn.metrics._regression.mean_squared_error.multioutput", + "default_value": "'uniform_average'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'raw_values', 'uniform_average'} or array-like of shape (n_outputs,)", + "default_value": "'uniform_average'", + "description": "Defines aggregating of multiple output values.\nArray-like value defines weights used to average errors.\n\n'raw_values' :\n Returns a full set of errors in case of multioutput input.\n\n'uniform_average' :\n Errors of all outputs are averaged with uniform weight." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["raw_values", "uniform_average"] + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_outputs,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_squared_error/squared", + "name": "squared", + "qname": "sklearn.metrics._regression.mean_squared_error.squared", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True returns MSE value, if False returns RMSE value." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Mean squared error regression loss.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Mean squared error regression loss.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Estimated target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nmultioutput : {'raw_values', 'uniform_average'} or array-like of shape (n_outputs,), default='uniform_average'\n Defines aggregating of multiple output values.\n Array-like value defines weights used to average errors.\n\n 'raw_values' :\n Returns a full set of errors in case of multioutput input.\n\n 'uniform_average' :\n Errors of all outputs are averaged with uniform weight.\n\nsquared : bool, default=True\n If True returns MSE value, if False returns RMSE value.\n\nReturns\n-------\nloss : float or ndarray of floats\n A non-negative floating point value (the best value is 0.0), or an\n array of floating point values, one for each individual target.\n\nExamples\n--------\n>>> from sklearn.metrics import mean_squared_error\n>>> y_true = [3, -0.5, 2, 7]\n>>> y_pred = [2.5, 0.0, 2, 8]\n>>> mean_squared_error(y_true, y_pred)\n0.375\n>>> y_true = [3, -0.5, 2, 7]\n>>> y_pred = [2.5, 0.0, 2, 8]\n>>> mean_squared_error(y_true, y_pred, squared=False)\n0.612...\n>>> y_true = [[0.5, 1],[-1, 1],[7, -6]]\n>>> y_pred = [[0, 2],[-1, 2],[8, -5]]\n>>> mean_squared_error(y_true, y_pred)\n0.708...\n>>> mean_squared_error(y_true, y_pred, squared=False)\n0.822...\n>>> mean_squared_error(y_true, y_pred, multioutput='raw_values')\narray([0.41666667, 1. ])\n>>> mean_squared_error(y_true, y_pred, multioutput=[0.3, 0.7])\n0.825...", + "code": "@_deprecate_positional_args\ndef mean_squared_error(y_true, y_pred, *,\n sample_weight=None,\n multioutput='uniform_average', squared=True):\n \"\"\"Mean squared error regression loss.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\n y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Estimated target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n multioutput : {'raw_values', 'uniform_average'} or array-like of shape \\\n (n_outputs,), default='uniform_average'\n Defines aggregating of multiple output values.\n Array-like value defines weights used to average errors.\n\n 'raw_values' :\n Returns a full set of errors in case of multioutput input.\n\n 'uniform_average' :\n Errors of all outputs are averaged with uniform weight.\n\n squared : bool, default=True\n If True returns MSE value, if False returns RMSE value.\n\n Returns\n -------\n loss : float or ndarray of floats\n A non-negative floating point value (the best value is 0.0), or an\n array of floating point values, one for each individual target.\n\n Examples\n --------\n >>> from sklearn.metrics import mean_squared_error\n >>> y_true = [3, -0.5, 2, 7]\n >>> y_pred = [2.5, 0.0, 2, 8]\n >>> mean_squared_error(y_true, y_pred)\n 0.375\n >>> y_true = [3, -0.5, 2, 7]\n >>> y_pred = [2.5, 0.0, 2, 8]\n >>> mean_squared_error(y_true, y_pred, squared=False)\n 0.612...\n >>> y_true = [[0.5, 1],[-1, 1],[7, -6]]\n >>> y_pred = [[0, 2],[-1, 2],[8, -5]]\n >>> mean_squared_error(y_true, y_pred)\n 0.708...\n >>> mean_squared_error(y_true, y_pred, squared=False)\n 0.822...\n >>> mean_squared_error(y_true, y_pred, multioutput='raw_values')\n array([0.41666667, 1. ])\n >>> mean_squared_error(y_true, y_pred, multioutput=[0.3, 0.7])\n 0.825...\n \"\"\"\n y_type, y_true, y_pred, multioutput = _check_reg_targets(\n y_true, y_pred, multioutput)\n check_consistent_length(y_true, y_pred, sample_weight)\n output_errors = np.average((y_true - y_pred) ** 2, axis=0,\n weights=sample_weight)\n\n if not squared:\n output_errors = np.sqrt(output_errors)\n\n if isinstance(multioutput, str):\n if multioutput == 'raw_values':\n return output_errors\n elif multioutput == 'uniform_average':\n # pass None as weights to np.average: uniform mean\n multioutput = None\n\n return np.average(output_errors, weights=multioutput)" + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_squared_log_error", + "name": "mean_squared_log_error", + "qname": "sklearn.metrics._regression.mean_squared_log_error", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._regression/mean_squared_log_error/y_true", + "name": "y_true", + "qname": "sklearn.metrics._regression.mean_squared_log_error.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "Ground truth (correct) target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_squared_log_error/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._regression.mean_squared_log_error.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "Estimated target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_squared_log_error/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._regression.mean_squared_log_error.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_squared_log_error/multioutput", + "name": "multioutput", + "qname": "sklearn.metrics._regression.mean_squared_log_error.multioutput", + "default_value": "'uniform_average'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'raw_values', 'uniform_average'} or array-like of shape (n_outputs,)", + "default_value": "'uniform_average'", + "description": "Defines aggregating of multiple output values.\nArray-like value defines weights used to average errors.\n\n'raw_values' :\n Returns a full set of errors when the input is of multioutput\n format.\n\n'uniform_average' :\n Errors of all outputs are averaged with uniform weight." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["raw_values", "uniform_average"] + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_outputs,)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Mean squared logarithmic error regression loss.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Mean squared logarithmic error regression loss.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Estimated target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nmultioutput : {'raw_values', 'uniform_average'} or array-like of shape (n_outputs,), default='uniform_average'\n\n Defines aggregating of multiple output values.\n Array-like value defines weights used to average errors.\n\n 'raw_values' :\n Returns a full set of errors when the input is of multioutput\n format.\n\n 'uniform_average' :\n Errors of all outputs are averaged with uniform weight.\n\nReturns\n-------\nloss : float or ndarray of floats\n A non-negative floating point value (the best value is 0.0), or an\n array of floating point values, one for each individual target.\n\nExamples\n--------\n>>> from sklearn.metrics import mean_squared_log_error\n>>> y_true = [3, 5, 2.5, 7]\n>>> y_pred = [2.5, 5, 4, 8]\n>>> mean_squared_log_error(y_true, y_pred)\n0.039...\n>>> y_true = [[0.5, 1], [1, 2], [7, 6]]\n>>> y_pred = [[0.5, 2], [1, 2.5], [8, 8]]\n>>> mean_squared_log_error(y_true, y_pred)\n0.044...\n>>> mean_squared_log_error(y_true, y_pred, multioutput='raw_values')\narray([0.00462428, 0.08377444])\n>>> mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])\n0.060...", + "code": "@_deprecate_positional_args\ndef mean_squared_log_error(y_true, y_pred, *,\n sample_weight=None,\n multioutput='uniform_average'):\n \"\"\"Mean squared logarithmic error regression loss.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\n y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Estimated target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n multioutput : {'raw_values', 'uniform_average'} or array-like of shape \\\n (n_outputs,), default='uniform_average'\n\n Defines aggregating of multiple output values.\n Array-like value defines weights used to average errors.\n\n 'raw_values' :\n Returns a full set of errors when the input is of multioutput\n format.\n\n 'uniform_average' :\n Errors of all outputs are averaged with uniform weight.\n\n Returns\n -------\n loss : float or ndarray of floats\n A non-negative floating point value (the best value is 0.0), or an\n array of floating point values, one for each individual target.\n\n Examples\n --------\n >>> from sklearn.metrics import mean_squared_log_error\n >>> y_true = [3, 5, 2.5, 7]\n >>> y_pred = [2.5, 5, 4, 8]\n >>> mean_squared_log_error(y_true, y_pred)\n 0.039...\n >>> y_true = [[0.5, 1], [1, 2], [7, 6]]\n >>> y_pred = [[0.5, 2], [1, 2.5], [8, 8]]\n >>> mean_squared_log_error(y_true, y_pred)\n 0.044...\n >>> mean_squared_log_error(y_true, y_pred, multioutput='raw_values')\n array([0.00462428, 0.08377444])\n >>> mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])\n 0.060...\n \"\"\"\n y_type, y_true, y_pred, multioutput = _check_reg_targets(\n y_true, y_pred, multioutput)\n check_consistent_length(y_true, y_pred, sample_weight)\n\n if (y_true < 0).any() or (y_pred < 0).any():\n raise ValueError(\"Mean Squared Logarithmic Error cannot be used when \"\n \"targets contain negative values.\")\n\n return mean_squared_error(np.log1p(y_true), np.log1p(y_pred),\n sample_weight=sample_weight,\n multioutput=multioutput)" + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_tweedie_deviance", + "name": "mean_tweedie_deviance", + "qname": "sklearn.metrics._regression.mean_tweedie_deviance", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._regression/mean_tweedie_deviance/y_true", + "name": "y_true", + "qname": "sklearn.metrics._regression.mean_tweedie_deviance.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Ground truth (correct) target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_tweedie_deviance/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._regression.mean_tweedie_deviance.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Estimated target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_tweedie_deviance/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._regression.mean_tweedie_deviance.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/mean_tweedie_deviance/power", + "name": "power", + "qname": "sklearn.metrics._regression.mean_tweedie_deviance.power", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0", + "description": "Tweedie power parameter. Either power <= 0 or power >= 1.\n\nThe higher `p` the less weight is given to extreme\ndeviations between true and predicted targets.\n\n- power < 0: Extreme stable distribution. Requires: y_pred > 0.\n- power = 0 : Normal distribution, output corresponds to\n mean_squared_error. y_true and y_pred can be any real numbers.\n- power = 1 : Poisson distribution. Requires: y_true >= 0 and\n y_pred > 0.\n- 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0\n and y_pred > 0.\n- power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0.\n- power = 3 : Inverse Gaussian distribution. Requires: y_true > 0\n and y_pred > 0.\n- otherwise : Positive stable distribution. Requires: y_true > 0\n and y_pred > 0." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Mean Tweedie deviance regression loss.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Mean Tweedie deviance regression loss.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,)\n Estimated target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\npower : float, default=0\n Tweedie power parameter. Either power <= 0 or power >= 1.\n\n The higher `p` the less weight is given to extreme\n deviations between true and predicted targets.\n\n - power < 0: Extreme stable distribution. Requires: y_pred > 0.\n - power = 0 : Normal distribution, output corresponds to\n mean_squared_error. y_true and y_pred can be any real numbers.\n - power = 1 : Poisson distribution. Requires: y_true >= 0 and\n y_pred > 0.\n - 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0\n and y_pred > 0.\n - power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0.\n - power = 3 : Inverse Gaussian distribution. Requires: y_true > 0\n and y_pred > 0.\n - otherwise : Positive stable distribution. Requires: y_true > 0\n and y_pred > 0.\n\nReturns\n-------\nloss : float\n A non-negative floating point value (the best value is 0.0).\n\nExamples\n--------\n>>> from sklearn.metrics import mean_tweedie_deviance\n>>> y_true = [2, 0, 1, 4]\n>>> y_pred = [0.5, 0.5, 2., 2.]\n>>> mean_tweedie_deviance(y_true, y_pred, power=1)\n1.4260...", + "code": "@_deprecate_positional_args\ndef mean_tweedie_deviance(y_true, y_pred, *, sample_weight=None, power=0):\n \"\"\"Mean Tweedie deviance regression loss.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : array-like of shape (n_samples,)\n Ground truth (correct) target values.\n\n y_pred : array-like of shape (n_samples,)\n Estimated target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n power : float, default=0\n Tweedie power parameter. Either power <= 0 or power >= 1.\n\n The higher `p` the less weight is given to extreme\n deviations between true and predicted targets.\n\n - power < 0: Extreme stable distribution. Requires: y_pred > 0.\n - power = 0 : Normal distribution, output corresponds to\n mean_squared_error. y_true and y_pred can be any real numbers.\n - power = 1 : Poisson distribution. Requires: y_true >= 0 and\n y_pred > 0.\n - 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0\n and y_pred > 0.\n - power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0.\n - power = 3 : Inverse Gaussian distribution. Requires: y_true > 0\n and y_pred > 0.\n - otherwise : Positive stable distribution. Requires: y_true > 0\n and y_pred > 0.\n\n Returns\n -------\n loss : float\n A non-negative floating point value (the best value is 0.0).\n\n Examples\n --------\n >>> from sklearn.metrics import mean_tweedie_deviance\n >>> y_true = [2, 0, 1, 4]\n >>> y_pred = [0.5, 0.5, 2., 2.]\n >>> mean_tweedie_deviance(y_true, y_pred, power=1)\n 1.4260...\n \"\"\"\n y_type, y_true, y_pred, _ = _check_reg_targets(\n y_true, y_pred, None, dtype=[np.float64, np.float32])\n if y_type == 'continuous-multioutput':\n raise ValueError(\"Multioutput not supported in mean_tweedie_deviance\")\n check_consistent_length(y_true, y_pred, sample_weight)\n\n if sample_weight is not None:\n sample_weight = column_or_1d(sample_weight)\n sample_weight = sample_weight[:, np.newaxis]\n\n dist = TweedieDistribution(power=power)\n dev = dist.unit_deviance(y_true, y_pred, check_input=True)\n\n return np.average(dev, weights=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.metrics._regression/median_absolute_error", + "name": "median_absolute_error", + "qname": "sklearn.metrics._regression.median_absolute_error", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._regression/median_absolute_error/y_true", + "name": "y_true", + "qname": "sklearn.metrics._regression.median_absolute_error.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape = (n_samples) or (n_samples, n_outputs)", + "default_value": "", + "description": "Ground truth (correct) target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape = (n_samples) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/median_absolute_error/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._regression.median_absolute_error.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape = (n_samples) or (n_samples, n_outputs)", + "default_value": "", + "description": "Estimated target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape = (n_samples) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/median_absolute_error/multioutput", + "name": "multioutput", + "qname": "sklearn.metrics._regression.median_absolute_error.multioutput", + "default_value": "'uniform_average'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'raw_values', 'uniform_average'} or array-like of shape (n_outputs,)", + "default_value": "'uniform_average'", + "description": "Defines aggregating of multiple output values. Array-like value defines\nweights used to average errors.\n\n'raw_values' :\n Returns a full set of errors in case of multioutput input.\n\n'uniform_average' :\n Errors of all outputs are averaged with uniform weight." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["raw_values", "uniform_average"] + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_outputs,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/median_absolute_error/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._regression.median_absolute_error.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Median absolute error regression loss.\n\nMedian absolute error output is non-negative floating point. The best value\nis 0.0. Read more in the :ref:`User Guide `.", + "docstring": "Median absolute error regression loss.\n\nMedian absolute error output is non-negative floating point. The best value\nis 0.0. Read more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape = (n_samples) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape = (n_samples) or (n_samples, n_outputs)\n Estimated target values.\n\nmultioutput : {'raw_values', 'uniform_average'} or array-like of shape (n_outputs,), default='uniform_average'\n Defines aggregating of multiple output values. Array-like value defines\n weights used to average errors.\n\n 'raw_values' :\n Returns a full set of errors in case of multioutput input.\n\n 'uniform_average' :\n Errors of all outputs are averaged with uniform weight.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n .. versionadded:: 0.24\n\nReturns\n-------\nloss : float or ndarray of floats\n If multioutput is 'raw_values', then mean absolute error is returned\n for each output separately.\n If multioutput is 'uniform_average' or an ndarray of weights, then the\n weighted average of all output errors is returned.\n\nExamples\n--------\n>>> from sklearn.metrics import median_absolute_error\n>>> y_true = [3, -0.5, 2, 7]\n>>> y_pred = [2.5, 0.0, 2, 8]\n>>> median_absolute_error(y_true, y_pred)\n0.5\n>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]\n>>> y_pred = [[0, 2], [-1, 2], [8, -5]]\n>>> median_absolute_error(y_true, y_pred)\n0.75\n>>> median_absolute_error(y_true, y_pred, multioutput='raw_values')\narray([0.5, 1. ])\n>>> median_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7])\n0.85", + "code": "@_deprecate_positional_args\ndef median_absolute_error(y_true, y_pred, *, multioutput='uniform_average',\n sample_weight=None):\n \"\"\"Median absolute error regression loss.\n\n Median absolute error output is non-negative floating point. The best value\n is 0.0. Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : array-like of shape = (n_samples) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\n y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs)\n Estimated target values.\n\n multioutput : {'raw_values', 'uniform_average'} or array-like of shape \\\n (n_outputs,), default='uniform_average'\n Defines aggregating of multiple output values. Array-like value defines\n weights used to average errors.\n\n 'raw_values' :\n Returns a full set of errors in case of multioutput input.\n\n 'uniform_average' :\n Errors of all outputs are averaged with uniform weight.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n .. versionadded:: 0.24\n\n Returns\n -------\n loss : float or ndarray of floats\n If multioutput is 'raw_values', then mean absolute error is returned\n for each output separately.\n If multioutput is 'uniform_average' or an ndarray of weights, then the\n weighted average of all output errors is returned.\n\n Examples\n --------\n >>> from sklearn.metrics import median_absolute_error\n >>> y_true = [3, -0.5, 2, 7]\n >>> y_pred = [2.5, 0.0, 2, 8]\n >>> median_absolute_error(y_true, y_pred)\n 0.5\n >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]\n >>> y_pred = [[0, 2], [-1, 2], [8, -5]]\n >>> median_absolute_error(y_true, y_pred)\n 0.75\n >>> median_absolute_error(y_true, y_pred, multioutput='raw_values')\n array([0.5, 1. ])\n >>> median_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7])\n 0.85\n \"\"\"\n y_type, y_true, y_pred, multioutput = _check_reg_targets(\n y_true, y_pred, multioutput)\n if sample_weight is None:\n output_errors = np.median(np.abs(y_pred - y_true), axis=0)\n else:\n sample_weight = _check_sample_weight(sample_weight, y_pred)\n output_errors = _weighted_percentile(np.abs(y_pred - y_true),\n sample_weight=sample_weight)\n if isinstance(multioutput, str):\n if multioutput == 'raw_values':\n return output_errors\n elif multioutput == 'uniform_average':\n # pass None as weights to np.average: uniform mean\n multioutput = None\n\n return np.average(output_errors, weights=multioutput)" + }, + { + "id": "scikit-learn/sklearn.metrics._regression/r2_score", + "name": "r2_score", + "qname": "sklearn.metrics._regression.r2_score", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._regression/r2_score/y_true", + "name": "y_true", + "qname": "sklearn.metrics._regression.r2_score.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "Ground truth (correct) target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/r2_score/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._regression.r2_score.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "Estimated target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/r2_score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._regression.r2_score.sample_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._regression/r2_score/multioutput", + "name": "multioutput", + "qname": "sklearn.metrics._regression.r2_score.multioutput", + "default_value": "'uniform_average'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'raw_values', 'uniform_average', 'variance_weighted'}, array-like of shape (n_outputs,) or None", + "default_value": "'uniform_average'", + "description": "Defines aggregating of multiple output scores.\nArray-like value defines weights used to average scores.\nDefault is \"uniform_average\".\n\n'raw_values' :\n Returns a full set of scores in case of multioutput input.\n\n'uniform_average' :\n Scores of all outputs are averaged with uniform weight.\n\n'variance_weighted' :\n Scores of all outputs are averaged, weighted by the variances\n of each individual output.\n\n.. versionchanged:: 0.19\n Default value of multioutput is 'uniform_average'." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["raw_values", "uniform_average", "variance_weighted"] + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_outputs,)" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": ":math:`R^2` (coefficient of determination) regression score function.\n\nBest possible score is 1.0 and it can be negative (because the\nmodel can be arbitrarily worse). A constant model that always\npredicts the expected value of y, disregarding the input features,\nwould get a :math:`R^2` score of 0.0.\n\nRead more in the :ref:`User Guide `.", + "docstring": ":math:`R^2` (coefficient of determination) regression score function.\n\nBest possible score is 1.0 and it can be negative (because the\nmodel can be arbitrarily worse). A constant model that always\npredicts the expected value of y, disregarding the input features,\nwould get a :math:`R^2` score of 0.0.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Estimated target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nmultioutput : {'raw_values', 'uniform_average', 'variance_weighted'}, array-like of shape (n_outputs,) or None, default='uniform_average'\n\n Defines aggregating of multiple output scores.\n Array-like value defines weights used to average scores.\n Default is \"uniform_average\".\n\n 'raw_values' :\n Returns a full set of scores in case of multioutput input.\n\n 'uniform_average' :\n Scores of all outputs are averaged with uniform weight.\n\n 'variance_weighted' :\n Scores of all outputs are averaged, weighted by the variances\n of each individual output.\n\n .. versionchanged:: 0.19\n Default value of multioutput is 'uniform_average'.\n\nReturns\n-------\nz : float or ndarray of floats\n The :math:`R^2` score or ndarray of scores if 'multioutput' is\n 'raw_values'.\n\nNotes\n-----\nThis is not a symmetric function.\n\nUnlike most other scores, :math:`R^2` score may be negative (it need not\nactually be the square of a quantity R).\n\nThis metric is not well-defined for single samples and will return a NaN\nvalue if n_samples is less than two.\n\nReferences\n----------\n.. [1] `Wikipedia entry on the Coefficient of determination\n `_\n\nExamples\n--------\n>>> from sklearn.metrics import r2_score\n>>> y_true = [3, -0.5, 2, 7]\n>>> y_pred = [2.5, 0.0, 2, 8]\n>>> r2_score(y_true, y_pred)\n0.948...\n>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]\n>>> y_pred = [[0, 2], [-1, 2], [8, -5]]\n>>> r2_score(y_true, y_pred,\n... multioutput='variance_weighted')\n0.938...\n>>> y_true = [1, 2, 3]\n>>> y_pred = [1, 2, 3]\n>>> r2_score(y_true, y_pred)\n1.0\n>>> y_true = [1, 2, 3]\n>>> y_pred = [2, 2, 2]\n>>> r2_score(y_true, y_pred)\n0.0\n>>> y_true = [1, 2, 3]\n>>> y_pred = [3, 2, 1]\n>>> r2_score(y_true, y_pred)\n-3.0", + "code": "@_deprecate_positional_args\ndef r2_score(y_true, y_pred, *, sample_weight=None,\n multioutput=\"uniform_average\"):\n \"\"\":math:`R^2` (coefficient of determination) regression score function.\n\n Best possible score is 1.0 and it can be negative (because the\n model can be arbitrarily worse). A constant model that always\n predicts the expected value of y, disregarding the input features,\n would get a :math:`R^2` score of 0.0.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Ground truth (correct) target values.\n\n y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Estimated target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n multioutput : {'raw_values', 'uniform_average', 'variance_weighted'}, \\\n array-like of shape (n_outputs,) or None, default='uniform_average'\n\n Defines aggregating of multiple output scores.\n Array-like value defines weights used to average scores.\n Default is \"uniform_average\".\n\n 'raw_values' :\n Returns a full set of scores in case of multioutput input.\n\n 'uniform_average' :\n Scores of all outputs are averaged with uniform weight.\n\n 'variance_weighted' :\n Scores of all outputs are averaged, weighted by the variances\n of each individual output.\n\n .. versionchanged:: 0.19\n Default value of multioutput is 'uniform_average'.\n\n Returns\n -------\n z : float or ndarray of floats\n The :math:`R^2` score or ndarray of scores if 'multioutput' is\n 'raw_values'.\n\n Notes\n -----\n This is not a symmetric function.\n\n Unlike most other scores, :math:`R^2` score may be negative (it need not\n actually be the square of a quantity R).\n\n This metric is not well-defined for single samples and will return a NaN\n value if n_samples is less than two.\n\n References\n ----------\n .. [1] `Wikipedia entry on the Coefficient of determination\n `_\n\n Examples\n --------\n >>> from sklearn.metrics import r2_score\n >>> y_true = [3, -0.5, 2, 7]\n >>> y_pred = [2.5, 0.0, 2, 8]\n >>> r2_score(y_true, y_pred)\n 0.948...\n >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]\n >>> y_pred = [[0, 2], [-1, 2], [8, -5]]\n >>> r2_score(y_true, y_pred,\n ... multioutput='variance_weighted')\n 0.938...\n >>> y_true = [1, 2, 3]\n >>> y_pred = [1, 2, 3]\n >>> r2_score(y_true, y_pred)\n 1.0\n >>> y_true = [1, 2, 3]\n >>> y_pred = [2, 2, 2]\n >>> r2_score(y_true, y_pred)\n 0.0\n >>> y_true = [1, 2, 3]\n >>> y_pred = [3, 2, 1]\n >>> r2_score(y_true, y_pred)\n -3.0\n \"\"\"\n y_type, y_true, y_pred, multioutput = _check_reg_targets(\n y_true, y_pred, multioutput)\n check_consistent_length(y_true, y_pred, sample_weight)\n\n if _num_samples(y_pred) < 2:\n msg = \"R^2 score is not well-defined with less than two samples.\"\n warnings.warn(msg, UndefinedMetricWarning)\n return float('nan')\n\n if sample_weight is not None:\n sample_weight = column_or_1d(sample_weight)\n weight = sample_weight[:, np.newaxis]\n else:\n weight = 1.\n\n numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0,\n dtype=np.float64)\n denominator = (weight * (y_true - np.average(\n y_true, axis=0, weights=sample_weight)) ** 2).sum(axis=0,\n dtype=np.float64)\n nonzero_denominator = denominator != 0\n nonzero_numerator = numerator != 0\n valid_score = nonzero_denominator & nonzero_numerator\n output_scores = np.ones([y_true.shape[1]])\n output_scores[valid_score] = 1 - (numerator[valid_score] /\n denominator[valid_score])\n # arbitrary set to zero to avoid -inf scores, having a constant\n # y_true is not interesting for scoring a regression anyway\n output_scores[nonzero_numerator & ~nonzero_denominator] = 0.\n if isinstance(multioutput, str):\n if multioutput == 'raw_values':\n # return scores individually\n return output_scores\n elif multioutput == 'uniform_average':\n # passing None as weights results is uniform mean\n avg_weights = None\n elif multioutput == 'variance_weighted':\n avg_weights = denominator\n # avoid fail on constant y or one-element arrays\n if not np.any(nonzero_denominator):\n if not np.any(nonzero_numerator):\n return 1.0\n else:\n return 0.0\n else:\n avg_weights = multioutput\n\n return np.average(output_scores, weights=avg_weights)" + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_BaseScorer/__call__", + "name": "__call__", + "qname": "sklearn.metrics._scorer._BaseScorer.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._scorer/_BaseScorer/__call__/self", + "name": "self", + "qname": "sklearn.metrics._scorer._BaseScorer.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_BaseScorer/__call__/estimator", + "name": "estimator", + "qname": "sklearn.metrics._scorer._BaseScorer.__call__.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Trained estimator to use for scoring. Must have a predict_proba\nmethod; the output of that is used to compute the score." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_BaseScorer/__call__/X", + "name": "X", + "qname": "sklearn.metrics._scorer._BaseScorer.__call__.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}", + "default_value": "", + "description": "Test data that will be fed to estimator.predict." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_BaseScorer/__call__/y_true", + "name": "y_true", + "qname": "sklearn.metrics._scorer._BaseScorer.__call__.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "Gold standard target values for X." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_BaseScorer/__call__/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._scorer._BaseScorer.__call__.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Evaluate predicted target values for X relative to y_true.", + "docstring": "Evaluate predicted target values for X relative to y_true.\n\nParameters\n----------\nestimator : object\n Trained estimator to use for scoring. Must have a predict_proba\n method; the output of that is used to compute the score.\n\nX : {array-like, sparse matrix}\n Test data that will be fed to estimator.predict.\n\ny_true : array-like\n Gold standard target values for X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Score function applied to prediction of estimator on X.", + "code": " def __call__(self, estimator, X, y_true, sample_weight=None):\n \"\"\"Evaluate predicted target values for X relative to y_true.\n\n Parameters\n ----------\n estimator : object\n Trained estimator to use for scoring. Must have a predict_proba\n method; the output of that is used to compute the score.\n\n X : {array-like, sparse matrix}\n Test data that will be fed to estimator.predict.\n\n y_true : array-like\n Gold standard target values for X.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n score : float\n Score function applied to prediction of estimator on X.\n \"\"\"\n return self._score(partial(_cached_call, None), estimator, X, y_true,\n sample_weight=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_BaseScorer/__init__", + "name": "__init__", + "qname": "sklearn.metrics._scorer._BaseScorer.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._scorer/_BaseScorer/__init__/self", + "name": "self", + "qname": "sklearn.metrics._scorer._BaseScorer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_BaseScorer/__init__/score_func", + "name": "score_func", + "qname": "sklearn.metrics._scorer._BaseScorer.__init__.score_func", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_BaseScorer/__init__/sign", + "name": "sign", + "qname": "sklearn.metrics._scorer._BaseScorer.__init__.sign", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_BaseScorer/__init__/kwargs", + "name": "kwargs", + "qname": "sklearn.metrics._scorer._BaseScorer.__init__.kwargs", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __init__(self, score_func, sign, kwargs):\n self._kwargs = kwargs\n self._score_func = score_func\n self._sign = sign" + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_BaseScorer/__repr__", + "name": "__repr__", + "qname": "sklearn.metrics._scorer._BaseScorer.__repr__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._scorer/_BaseScorer/__repr__/self", + "name": "self", + "qname": "sklearn.metrics._scorer._BaseScorer.__repr__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __repr__(self):\n kwargs_string = \"\".join([\", %s=%s\" % (str(k), str(v))\n for k, v in self._kwargs.items()])\n return (\"make_scorer(%s%s%s%s)\"\n % (self._score_func.__name__,\n \"\" if self._sign > 0 else \", greater_is_better=False\",\n self._factory_args(), kwargs_string))" + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_BaseScorer/_check_pos_label", + "name": "_check_pos_label", + "qname": "sklearn.metrics._scorer._BaseScorer._check_pos_label", + "decorators": ["staticmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._scorer/_BaseScorer/_check_pos_label/pos_label", + "name": "pos_label", + "qname": "sklearn.metrics._scorer._BaseScorer._check_pos_label.pos_label", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_BaseScorer/_check_pos_label/classes", + "name": "classes", + "qname": "sklearn.metrics._scorer._BaseScorer._check_pos_label.classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @staticmethod\n def _check_pos_label(pos_label, classes):\n if pos_label not in list(classes):\n raise ValueError(\n f\"pos_label={pos_label} is not a valid label: {classes}\"\n )" + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_BaseScorer/_factory_args", + "name": "_factory_args", + "qname": "sklearn.metrics._scorer._BaseScorer._factory_args", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._scorer/_BaseScorer/_factory_args/self", + "name": "self", + "qname": "sklearn.metrics._scorer._BaseScorer._factory_args.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return non-default make_scorer arguments for repr.", + "docstring": "Return non-default make_scorer arguments for repr.", + "code": " def _factory_args(self):\n \"\"\"Return non-default make_scorer arguments for repr.\"\"\"\n return \"\"" + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_BaseScorer/_select_proba_binary", + "name": "_select_proba_binary", + "qname": "sklearn.metrics._scorer._BaseScorer._select_proba_binary", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._scorer/_BaseScorer/_select_proba_binary/self", + "name": "self", + "qname": "sklearn.metrics._scorer._BaseScorer._select_proba_binary.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_BaseScorer/_select_proba_binary/y_pred", + "name": "y_pred", + "qname": "sklearn.metrics._scorer._BaseScorer._select_proba_binary.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_classes)", + "default_value": "", + "description": "The prediction given by `predict_proba`." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_classes)" + } + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_BaseScorer/_select_proba_binary/classes", + "name": "classes", + "qname": "sklearn.metrics._scorer._BaseScorer._select_proba_binary.classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_classes,)", + "default_value": "", + "description": "The class labels for the estimator." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_classes,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Select the column of the positive label in `y_pred` when\nprobabilities are provided.", + "docstring": "Select the column of the positive label in `y_pred` when\nprobabilities are provided.\n\nParameters\n----------\ny_pred : ndarray of shape (n_samples, n_classes)\n The prediction given by `predict_proba`.\n\nclasses : ndarray of shape (n_classes,)\n The class labels for the estimator.\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,)\n Probability predictions of the positive class.", + "code": " def _select_proba_binary(self, y_pred, classes):\n \"\"\"Select the column of the positive label in `y_pred` when\n probabilities are provided.\n\n Parameters\n ----------\n y_pred : ndarray of shape (n_samples, n_classes)\n The prediction given by `predict_proba`.\n\n classes : ndarray of shape (n_classes,)\n The class labels for the estimator.\n\n Returns\n -------\n y_pred : ndarray of shape (n_samples,)\n Probability predictions of the positive class.\n \"\"\"\n if y_pred.shape[1] == 2:\n pos_label = self._kwargs.get(\"pos_label\", classes[1])\n self._check_pos_label(pos_label, classes)\n col_idx = np.flatnonzero(classes == pos_label)[0]\n return y_pred[:, col_idx]\n\n err_msg = (\n f\"Got predict_proba of shape {y_pred.shape}, but need \"\n f\"classifier with two classes for {self._score_func.__name__} \"\n f\"scoring\"\n )\n raise ValueError(err_msg)" + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_MultimetricScorer/__call__", + "name": "__call__", + "qname": "sklearn.metrics._scorer._MultimetricScorer.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._scorer/_MultimetricScorer/__call__/self", + "name": "self", + "qname": "sklearn.metrics._scorer._MultimetricScorer.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_MultimetricScorer/__call__/estimator", + "name": "estimator", + "qname": "sklearn.metrics._scorer._MultimetricScorer.__call__.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_MultimetricScorer/__call__/args", + "name": "args", + "qname": "sklearn.metrics._scorer._MultimetricScorer.__call__.args", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_MultimetricScorer/__call__/kwargs", + "name": "kwargs", + "qname": "sklearn.metrics._scorer._MultimetricScorer.__call__.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Evaluate predicted target values.", + "docstring": "Evaluate predicted target values.", + "code": " def __call__(self, estimator, *args, **kwargs):\n \"\"\"Evaluate predicted target values.\"\"\"\n scores = {}\n cache = {} if self._use_cache(estimator) else None\n cached_call = partial(_cached_call, cache)\n\n for name, scorer in self._scorers.items():\n if isinstance(scorer, _BaseScorer):\n score = scorer._score(cached_call, estimator,\n *args, **kwargs)\n else:\n score = scorer(estimator, *args, **kwargs)\n scores[name] = score\n return scores" + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_MultimetricScorer/__init__", + "name": "__init__", + "qname": "sklearn.metrics._scorer._MultimetricScorer.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._scorer/_MultimetricScorer/__init__/self", + "name": "self", + "qname": "sklearn.metrics._scorer._MultimetricScorer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_MultimetricScorer/__init__/scorers", + "name": "scorers", + "qname": "sklearn.metrics._scorer._MultimetricScorer.__init__.scorers", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Callable for multimetric scoring used to avoid repeated calls\nto `predict_proba`, `predict`, and `decision_function`.\n\n`_MultimetricScorer` will return a dictionary of scores corresponding to\nthe scorers in the dictionary. Note that `_MultimetricScorer` can be\ncreated with a dictionary with one key (i.e. only one actual scorer).", + "docstring": "", + "code": " def __init__(self, **scorers):\n self._scorers = scorers" + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_MultimetricScorer/_use_cache", + "name": "_use_cache", + "qname": "sklearn.metrics._scorer._MultimetricScorer._use_cache", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._scorer/_MultimetricScorer/_use_cache/self", + "name": "self", + "qname": "sklearn.metrics._scorer._MultimetricScorer._use_cache.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_MultimetricScorer/_use_cache/estimator", + "name": "estimator", + "qname": "sklearn.metrics._scorer._MultimetricScorer._use_cache.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return True if using a cache is beneficial.\n\nCaching may be beneficial when one of these conditions holds:\n - `_ProbaScorer` will be called twice.\n - `_PredictScorer` will be called twice.\n - `_ThresholdScorer` will be called twice.\n - `_ThresholdScorer` and `_PredictScorer` are called and\n estimator is a regressor.\n - `_ThresholdScorer` and `_ProbaScorer` are called and\n estimator does not have a `decision_function` attribute.", + "docstring": "Return True if using a cache is beneficial.\n\nCaching may be beneficial when one of these conditions holds:\n - `_ProbaScorer` will be called twice.\n - `_PredictScorer` will be called twice.\n - `_ThresholdScorer` will be called twice.\n - `_ThresholdScorer` and `_PredictScorer` are called and\n estimator is a regressor.\n - `_ThresholdScorer` and `_ProbaScorer` are called and\n estimator does not have a `decision_function` attribute.", + "code": " def _use_cache(self, estimator):\n \"\"\"Return True if using a cache is beneficial.\n\n Caching may be beneficial when one of these conditions holds:\n - `_ProbaScorer` will be called twice.\n - `_PredictScorer` will be called twice.\n - `_ThresholdScorer` will be called twice.\n - `_ThresholdScorer` and `_PredictScorer` are called and\n estimator is a regressor.\n - `_ThresholdScorer` and `_ProbaScorer` are called and\n estimator does not have a `decision_function` attribute.\n\n \"\"\"\n if len(self._scorers) == 1: # Only one scorer\n return False\n\n counter = Counter([type(v) for v in self._scorers.values()])\n\n if any(counter[known_type] > 1 for known_type in\n [_PredictScorer, _ProbaScorer, _ThresholdScorer]):\n return True\n\n if counter[_ThresholdScorer]:\n if is_regressor(estimator) and counter[_PredictScorer]:\n return True\n elif (counter[_ProbaScorer] and\n not hasattr(estimator, \"decision_function\")):\n return True\n return False" + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_PredictScorer/_score", + "name": "_score", + "qname": "sklearn.metrics._scorer._PredictScorer._score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._scorer/_PredictScorer/_score/self", + "name": "self", + "qname": "sklearn.metrics._scorer._PredictScorer._score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_PredictScorer/_score/method_caller", + "name": "method_caller", + "qname": "sklearn.metrics._scorer._PredictScorer._score.method_caller", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "", + "description": "Returns predictions given an estimator, method name, and other\narguments, potentially caching results." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_PredictScorer/_score/estimator", + "name": "estimator", + "qname": "sklearn.metrics._scorer._PredictScorer._score.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Trained estimator to use for scoring. Must have a `predict`\nmethod; the output of that is used to compute the score." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_PredictScorer/_score/X", + "name": "X", + "qname": "sklearn.metrics._scorer._PredictScorer._score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}", + "default_value": "", + "description": "Test data that will be fed to estimator.predict." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_PredictScorer/_score/y_true", + "name": "y_true", + "qname": "sklearn.metrics._scorer._PredictScorer._score.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "Gold standard target values for X." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_PredictScorer/_score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._scorer._PredictScorer._score.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Evaluate predicted target values for X relative to y_true.", + "docstring": "Evaluate predicted target values for X relative to y_true.\n\nParameters\n----------\nmethod_caller : callable\n Returns predictions given an estimator, method name, and other\n arguments, potentially caching results.\n\nestimator : object\n Trained estimator to use for scoring. Must have a `predict`\n method; the output of that is used to compute the score.\n\nX : {array-like, sparse matrix}\n Test data that will be fed to estimator.predict.\n\ny_true : array-like\n Gold standard target values for X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Score function applied to prediction of estimator on X.", + "code": " def _score(self, method_caller, estimator, X, y_true, sample_weight=None):\n \"\"\"Evaluate predicted target values for X relative to y_true.\n\n Parameters\n ----------\n method_caller : callable\n Returns predictions given an estimator, method name, and other\n arguments, potentially caching results.\n\n estimator : object\n Trained estimator to use for scoring. Must have a `predict`\n method; the output of that is used to compute the score.\n\n X : {array-like, sparse matrix}\n Test data that will be fed to estimator.predict.\n\n y_true : array-like\n Gold standard target values for X.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n score : float\n Score function applied to prediction of estimator on X.\n \"\"\"\n\n y_pred = method_caller(estimator, \"predict\", X)\n if sample_weight is not None:\n return self._sign * self._score_func(y_true, y_pred,\n sample_weight=sample_weight,\n **self._kwargs)\n else:\n return self._sign * self._score_func(y_true, y_pred,\n **self._kwargs)" + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_ProbaScorer/_factory_args", + "name": "_factory_args", + "qname": "sklearn.metrics._scorer._ProbaScorer._factory_args", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._scorer/_ProbaScorer/_factory_args/self", + "name": "self", + "qname": "sklearn.metrics._scorer._ProbaScorer._factory_args.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _factory_args(self):\n return \", needs_proba=True\"" + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_ProbaScorer/_score", + "name": "_score", + "qname": "sklearn.metrics._scorer._ProbaScorer._score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._scorer/_ProbaScorer/_score/self", + "name": "self", + "qname": "sklearn.metrics._scorer._ProbaScorer._score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_ProbaScorer/_score/method_caller", + "name": "method_caller", + "qname": "sklearn.metrics._scorer._ProbaScorer._score.method_caller", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "", + "description": "Returns predictions given an estimator, method name, and other\narguments, potentially caching results." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_ProbaScorer/_score/clf", + "name": "clf", + "qname": "sklearn.metrics._scorer._ProbaScorer._score.clf", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Trained classifier to use for scoring. Must have a `predict_proba`\nmethod; the output of that is used to compute the score." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_ProbaScorer/_score/X", + "name": "X", + "qname": "sklearn.metrics._scorer._ProbaScorer._score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}", + "default_value": "", + "description": "Test data that will be fed to clf.predict_proba." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_ProbaScorer/_score/y", + "name": "y", + "qname": "sklearn.metrics._scorer._ProbaScorer._score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "Gold standard target values for X. These must be class labels,\nnot probabilities." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_ProbaScorer/_score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._scorer._ProbaScorer._score.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Evaluate predicted probabilities for X relative to y_true.", + "docstring": "Evaluate predicted probabilities for X relative to y_true.\n\nParameters\n----------\nmethod_caller : callable\n Returns predictions given an estimator, method name, and other\n arguments, potentially caching results.\n\nclf : object\n Trained classifier to use for scoring. Must have a `predict_proba`\n method; the output of that is used to compute the score.\n\nX : {array-like, sparse matrix}\n Test data that will be fed to clf.predict_proba.\n\ny : array-like\n Gold standard target values for X. These must be class labels,\n not probabilities.\n\nsample_weight : array-like, default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Score function applied to prediction of estimator on X.", + "code": " def _score(self, method_caller, clf, X, y, sample_weight=None):\n \"\"\"Evaluate predicted probabilities for X relative to y_true.\n\n Parameters\n ----------\n method_caller : callable\n Returns predictions given an estimator, method name, and other\n arguments, potentially caching results.\n\n clf : object\n Trained classifier to use for scoring. Must have a `predict_proba`\n method; the output of that is used to compute the score.\n\n X : {array-like, sparse matrix}\n Test data that will be fed to clf.predict_proba.\n\n y : array-like\n Gold standard target values for X. These must be class labels,\n not probabilities.\n\n sample_weight : array-like, default=None\n Sample weights.\n\n Returns\n -------\n score : float\n Score function applied to prediction of estimator on X.\n \"\"\"\n\n y_type = type_of_target(y)\n y_pred = method_caller(clf, \"predict_proba\", X)\n if y_type == \"binary\" and y_pred.shape[1] <= 2:\n # `y_type` could be equal to \"binary\" even in a multi-class\n # problem: (when only 2 class are given to `y_true` during scoring)\n # Thus, we need to check for the shape of `y_pred`.\n y_pred = self._select_proba_binary(y_pred, clf.classes_)\n if sample_weight is not None:\n return self._sign * self._score_func(y, y_pred,\n sample_weight=sample_weight,\n **self._kwargs)\n else:\n return self._sign * self._score_func(y, y_pred, **self._kwargs)" + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_ThresholdScorer/_factory_args", + "name": "_factory_args", + "qname": "sklearn.metrics._scorer._ThresholdScorer._factory_args", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._scorer/_ThresholdScorer/_factory_args/self", + "name": "self", + "qname": "sklearn.metrics._scorer._ThresholdScorer._factory_args.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _factory_args(self):\n return \", needs_threshold=True\"" + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_ThresholdScorer/_score", + "name": "_score", + "qname": "sklearn.metrics._scorer._ThresholdScorer._score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._scorer/_ThresholdScorer/_score/self", + "name": "self", + "qname": "sklearn.metrics._scorer._ThresholdScorer._score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_ThresholdScorer/_score/method_caller", + "name": "method_caller", + "qname": "sklearn.metrics._scorer._ThresholdScorer._score.method_caller", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "", + "description": "Returns predictions given an estimator, method name, and other\narguments, potentially caching results." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_ThresholdScorer/_score/clf", + "name": "clf", + "qname": "sklearn.metrics._scorer._ThresholdScorer._score.clf", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Trained classifier to use for scoring. Must have either a\ndecision_function method or a predict_proba method; the output of\nthat is used to compute the score." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_ThresholdScorer/_score/X", + "name": "X", + "qname": "sklearn.metrics._scorer._ThresholdScorer._score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}", + "default_value": "", + "description": "Test data that will be fed to clf.decision_function or\nclf.predict_proba." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_ThresholdScorer/_score/y", + "name": "y", + "qname": "sklearn.metrics._scorer._ThresholdScorer._score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "Gold standard target values for X. These must be class labels,\nnot decision function values." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_ThresholdScorer/_score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.metrics._scorer._ThresholdScorer._score.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Evaluate decision function output for X relative to y_true.", + "docstring": "Evaluate decision function output for X relative to y_true.\n\nParameters\n----------\nmethod_caller : callable\n Returns predictions given an estimator, method name, and other\n arguments, potentially caching results.\n\nclf : object\n Trained classifier to use for scoring. Must have either a\n decision_function method or a predict_proba method; the output of\n that is used to compute the score.\n\nX : {array-like, sparse matrix}\n Test data that will be fed to clf.decision_function or\n clf.predict_proba.\n\ny : array-like\n Gold standard target values for X. These must be class labels,\n not decision function values.\n\nsample_weight : array-like, default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Score function applied to prediction of estimator on X.", + "code": " def _score(self, method_caller, clf, X, y, sample_weight=None):\n \"\"\"Evaluate decision function output for X relative to y_true.\n\n Parameters\n ----------\n method_caller : callable\n Returns predictions given an estimator, method name, and other\n arguments, potentially caching results.\n\n clf : object\n Trained classifier to use for scoring. Must have either a\n decision_function method or a predict_proba method; the output of\n that is used to compute the score.\n\n X : {array-like, sparse matrix}\n Test data that will be fed to clf.decision_function or\n clf.predict_proba.\n\n y : array-like\n Gold standard target values for X. These must be class labels,\n not decision function values.\n\n sample_weight : array-like, default=None\n Sample weights.\n\n Returns\n -------\n score : float\n Score function applied to prediction of estimator on X.\n \"\"\"\n\n y_type = type_of_target(y)\n if y_type not in (\"binary\", \"multilabel-indicator\"):\n raise ValueError(\"{0} format is not supported\".format(y_type))\n\n if is_regressor(clf):\n y_pred = method_caller(clf, \"predict\", X)\n else:\n try:\n y_pred = method_caller(clf, \"decision_function\", X)\n\n if isinstance(y_pred, list):\n # For multi-output multi-class estimator\n y_pred = np.vstack([p for p in y_pred]).T\n elif y_type == \"binary\" and \"pos_label\" in self._kwargs:\n self._check_pos_label(\n self._kwargs[\"pos_label\"], clf.classes_\n )\n if self._kwargs[\"pos_label\"] == clf.classes_[0]:\n # The implicit positive class of the binary classifier\n # does not match `pos_label`: we need to invert the\n # predictions\n y_pred *= -1\n\n except (NotImplementedError, AttributeError):\n y_pred = method_caller(clf, \"predict_proba\", X)\n\n if y_type == \"binary\":\n y_pred = self._select_proba_binary(y_pred, clf.classes_)\n elif isinstance(y_pred, list):\n y_pred = np.vstack([p[:, -1] for p in y_pred]).T\n\n if sample_weight is not None:\n return self._sign * self._score_func(y, y_pred,\n sample_weight=sample_weight,\n **self._kwargs)\n else:\n return self._sign * self._score_func(y, y_pred, **self._kwargs)" + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_cached_call", + "name": "_cached_call", + "qname": "sklearn.metrics._scorer._cached_call", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._scorer/_cached_call/cache", + "name": "cache", + "qname": "sklearn.metrics._scorer._cached_call.cache", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_cached_call/estimator", + "name": "estimator", + "qname": "sklearn.metrics._scorer._cached_call.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_cached_call/method", + "name": "method", + "qname": "sklearn.metrics._scorer._cached_call.method", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_cached_call/args", + "name": "args", + "qname": "sklearn.metrics._scorer._cached_call.args", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_cached_call/kwargs", + "name": "kwargs", + "qname": "sklearn.metrics._scorer._cached_call.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Call estimator with method and args and kwargs.", + "docstring": "Call estimator with method and args and kwargs.", + "code": "def _cached_call(cache, estimator, method, *args, **kwargs):\n \"\"\"Call estimator with method and args and kwargs.\"\"\"\n if cache is None:\n return getattr(estimator, method)(*args, **kwargs)\n\n try:\n return cache[method]\n except KeyError:\n result = getattr(estimator, method)(*args, **kwargs)\n cache[method] = result\n return result" + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_check_multimetric_scoring", + "name": "_check_multimetric_scoring", + "qname": "sklearn.metrics._scorer._check_multimetric_scoring", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._scorer/_check_multimetric_scoring/estimator", + "name": "estimator", + "qname": "sklearn.metrics._scorer._check_multimetric_scoring.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "sklearn estimator instance", + "default_value": "", + "description": "The estimator for which the scoring will be applied." + }, + "type": { + "kind": "NamedType", + "name": "sklearn estimator instance" + } + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_check_multimetric_scoring/scoring", + "name": "scoring", + "qname": "sklearn.metrics._scorer._check_multimetric_scoring.scoring", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list, tuple or dict", + "default_value": "", + "description": "Strategy to evaluate the performance of the cross-validated model on\nthe test set.\n\nThe possibilities are:\n\n- a list or tuple of unique strings;\n- a callable returning a dictionary where they keys are the metric\n names and the values are the metric scores;\n- a dictionary with metric names as keys and callables a values.\n\nSee :ref:`multimetric_grid_search` for an example." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "list" + }, + { + "kind": "NamedType", + "name": "tuple" + }, + { + "kind": "NamedType", + "name": "dict" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check the scoring parameter in cases when multiple metrics are allowed.", + "docstring": "Check the scoring parameter in cases when multiple metrics are allowed.\n\nParameters\n----------\nestimator : sklearn estimator instance\n The estimator for which the scoring will be applied.\n\nscoring : list, tuple or dict\n Strategy to evaluate the performance of the cross-validated model on\n the test set.\n\n The possibilities are:\n\n - a list or tuple of unique strings;\n - a callable returning a dictionary where they keys are the metric\n names and the values are the metric scores;\n - a dictionary with metric names as keys and callables a values.\n\n See :ref:`multimetric_grid_search` for an example.\n\nReturns\n-------\nscorers_dict : dict\n A dict mapping each scorer name to its validated scorer.", + "code": "def _check_multimetric_scoring(estimator, scoring):\n \"\"\"Check the scoring parameter in cases when multiple metrics are allowed.\n\n Parameters\n ----------\n estimator : sklearn estimator instance\n The estimator for which the scoring will be applied.\n\n scoring : list, tuple or dict\n Strategy to evaluate the performance of the cross-validated model on\n the test set.\n\n The possibilities are:\n\n - a list or tuple of unique strings;\n - a callable returning a dictionary where they keys are the metric\n names and the values are the metric scores;\n - a dictionary with metric names as keys and callables a values.\n\n See :ref:`multimetric_grid_search` for an example.\n\n Returns\n -------\n scorers_dict : dict\n A dict mapping each scorer name to its validated scorer.\n \"\"\"\n err_msg_generic = (\n f\"scoring is invalid (got {scoring!r}). Refer to the \"\n \"scoring glossary for details: \"\n \"https://scikit-learn.org/stable/glossary.html#term-scoring\")\n\n if isinstance(scoring, (list, tuple, set)):\n err_msg = (\"The list/tuple elements must be unique \"\n \"strings of predefined scorers. \")\n invalid = False\n try:\n keys = set(scoring)\n except TypeError:\n invalid = True\n if invalid:\n raise ValueError(err_msg)\n\n if len(keys) != len(scoring):\n raise ValueError(f\"{err_msg} Duplicate elements were found in\"\n f\" the given list. {scoring!r}\")\n elif len(keys) > 0:\n if not all(isinstance(k, str) for k in keys):\n if any(callable(k) for k in keys):\n raise ValueError(f\"{err_msg} One or more of the elements \"\n \"were callables. Use a dict of score \"\n \"name mapped to the scorer callable. \"\n f\"Got {scoring!r}\")\n else:\n raise ValueError(f\"{err_msg} Non-string types were found \"\n f\"in the given list. Got {scoring!r}\")\n scorers = {scorer: check_scoring(estimator, scoring=scorer)\n for scorer in scoring}\n else:\n raise ValueError(f\"{err_msg} Empty list was given. {scoring!r}\")\n\n elif isinstance(scoring, dict):\n keys = set(scoring)\n if not all(isinstance(k, str) for k in keys):\n raise ValueError(\"Non-string types were found in the keys of \"\n f\"the given dict. scoring={scoring!r}\")\n if len(keys) == 0:\n raise ValueError(f\"An empty dict was passed. {scoring!r}\")\n scorers = {key: check_scoring(estimator, scoring=scorer)\n for key, scorer in scoring.items()}\n else:\n raise ValueError(err_msg_generic)\n return scorers" + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_passthrough_scorer", + "name": "_passthrough_scorer", + "qname": "sklearn.metrics._scorer._passthrough_scorer", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._scorer/_passthrough_scorer/estimator", + "name": "estimator", + "qname": "sklearn.metrics._scorer._passthrough_scorer.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_passthrough_scorer/args", + "name": "args", + "qname": "sklearn.metrics._scorer._passthrough_scorer.args", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/_passthrough_scorer/kwargs", + "name": "kwargs", + "qname": "sklearn.metrics._scorer._passthrough_scorer.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Function that wraps estimator.score", + "docstring": "Function that wraps estimator.score", + "code": "def _passthrough_scorer(estimator, *args, **kwargs):\n \"\"\"Function that wraps estimator.score\"\"\"\n return estimator.score(*args, **kwargs)" + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/check_scoring", + "name": "check_scoring", + "qname": "sklearn.metrics._scorer.check_scoring", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._scorer/check_scoring/estimator", + "name": "estimator", + "qname": "sklearn.metrics._scorer.check_scoring.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator object implementing 'fit'", + "default_value": "", + "description": "The object to use to fit the data." + }, + "type": { + "kind": "NamedType", + "name": "estimator object implementing 'fit'" + } + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/check_scoring/scoring", + "name": "scoring", + "qname": "sklearn.metrics._scorer.check_scoring.scoring", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "None", + "description": "A string (see model evaluation documentation) or\na scorer callable object / function with signature\n``scorer(estimator, X, y)``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/check_scoring/allow_none", + "name": "allow_none", + "qname": "sklearn.metrics._scorer.check_scoring.allow_none", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If no scoring is specified and the estimator has no score function, we\ncan either return None or raise an exception." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Determine scorer from user options.\n\nA TypeError will be thrown if the estimator cannot be scored.", + "docstring": "Determine scorer from user options.\n\nA TypeError will be thrown if the estimator cannot be scored.\n\nParameters\n----------\nestimator : estimator object implementing 'fit'\n The object to use to fit the data.\n\nscoring : str or callable, default=None\n A string (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n\nallow_none : bool, default=False\n If no scoring is specified and the estimator has no score function, we\n can either return None or raise an exception.\n\nReturns\n-------\nscoring : callable\n A scorer callable object / function with signature\n ``scorer(estimator, X, y)``.", + "code": "@_deprecate_positional_args\ndef check_scoring(estimator, scoring=None, *, allow_none=False):\n \"\"\"Determine scorer from user options.\n\n A TypeError will be thrown if the estimator cannot be scored.\n\n Parameters\n ----------\n estimator : estimator object implementing 'fit'\n The object to use to fit the data.\n\n scoring : str or callable, default=None\n A string (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n\n allow_none : bool, default=False\n If no scoring is specified and the estimator has no score function, we\n can either return None or raise an exception.\n\n Returns\n -------\n scoring : callable\n A scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n \"\"\"\n if not hasattr(estimator, 'fit'):\n raise TypeError(\"estimator should be an estimator implementing \"\n \"'fit' method, %r was passed\" % estimator)\n if isinstance(scoring, str):\n return get_scorer(scoring)\n elif callable(scoring):\n # Heuristic to ensure user has not passed a metric\n module = getattr(scoring, '__module__', None)\n if hasattr(module, 'startswith') and \\\n module.startswith('sklearn.metrics.') and \\\n not module.startswith('sklearn.metrics._scorer') and \\\n not module.startswith('sklearn.metrics.tests.'):\n raise ValueError('scoring value %r looks like it is a metric '\n 'function rather than a scorer. A scorer should '\n 'require an estimator as its first parameter. '\n 'Please use `make_scorer` to convert a metric '\n 'to a scorer.' % scoring)\n return get_scorer(scoring)\n elif scoring is None:\n if hasattr(estimator, 'score'):\n return _passthrough_scorer\n elif allow_none:\n return None\n else:\n raise TypeError(\n \"If no scoring is specified, the estimator passed should \"\n \"have a 'score' method. The estimator %r does not.\"\n % estimator)\n elif isinstance(scoring, Iterable):\n raise ValueError(\"For evaluating multiple scores, use \"\n \"sklearn.model_selection.cross_validate instead. \"\n \"{0} was passed.\".format(scoring))\n else:\n raise ValueError(\"scoring value should either be a callable, string or\"\n \" None. %r was passed\" % scoring)" + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/get_scorer", + "name": "get_scorer", + "qname": "sklearn.metrics._scorer.get_scorer", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._scorer/get_scorer/scoring", + "name": "scoring", + "qname": "sklearn.metrics._scorer.get_scorer.scoring", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "", + "description": "Scoring method as string. If callable it is returned as is." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Get a scorer from string.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Get a scorer from string.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nscoring : str or callable\n Scoring method as string. If callable it is returned as is.\n\nReturns\n-------\nscorer : callable\n The scorer.", + "code": "def get_scorer(scoring):\n \"\"\"Get a scorer from string.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n scoring : str or callable\n Scoring method as string. If callable it is returned as is.\n\n Returns\n -------\n scorer : callable\n The scorer.\n \"\"\"\n if isinstance(scoring, str):\n try:\n scorer = SCORERS[scoring]\n except KeyError:\n raise ValueError('%r is not a valid scoring value. '\n 'Use sorted(sklearn.metrics.SCORERS.keys()) '\n 'to get valid options.' % scoring)\n else:\n scorer = scoring\n return scorer" + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/make_scorer", + "name": "make_scorer", + "qname": "sklearn.metrics._scorer.make_scorer", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics._scorer/make_scorer/score_func", + "name": "score_func", + "qname": "sklearn.metrics._scorer.make_scorer.score_func", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "", + "description": "Score function (or loss function) with signature\n``score_func(y, y_pred, **kwargs)``." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/make_scorer/greater_is_better", + "name": "greater_is_better", + "qname": "sklearn.metrics._scorer.make_scorer.greater_is_better", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether score_func is a score function (default), meaning high is good,\nor a loss function, meaning low is good. In the latter case, the\nscorer object will sign-flip the outcome of the score_func." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/make_scorer/needs_proba", + "name": "needs_proba", + "qname": "sklearn.metrics._scorer.make_scorer.needs_proba", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether score_func requires predict_proba to get probability estimates\nout of a classifier.\n\nIf True, for binary `y_true`, the score function is supposed to accept\na 1D `y_pred` (i.e., probability of the positive class, shape\n`(n_samples,)`)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/make_scorer/needs_threshold", + "name": "needs_threshold", + "qname": "sklearn.metrics._scorer.make_scorer.needs_threshold", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether score_func takes a continuous decision certainty.\nThis only works for binary classification using estimators that\nhave either a decision_function or predict_proba method.\n\nIf True, for binary `y_true`, the score function is supposed to accept\na 1D `y_pred` (i.e., probability of the positive class or the decision\nfunction, shape `(n_samples,)`).\n\nFor example ``average_precision`` or the area under the roc curve\ncan not be computed using discrete predictions alone." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.metrics._scorer/make_scorer/kwargs", + "name": "kwargs", + "qname": "sklearn.metrics._scorer.make_scorer.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "additional arguments", + "default_value": "", + "description": "Additional parameters to be passed to score_func." + }, + "type": { + "kind": "NamedType", + "name": "additional arguments" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Make a scorer from a performance metric or loss function.\n\nThis factory function wraps scoring functions for use in\n:class:`~sklearn.model_selection.GridSearchCV` and\n:func:`~sklearn.model_selection.cross_val_score`.\nIt takes a score function, such as :func:`~sklearn.metrics.accuracy_score`,\n:func:`~sklearn.metrics.mean_squared_error`,\n:func:`~sklearn.metrics.adjusted_rand_index` or\n:func:`~sklearn.metrics.average_precision`\nand returns a callable that scores an estimator's output.\nThe signature of the call is `(estimator, X, y)` where `estimator`\nis the model to be evaluated, `X` is the data and `y` is the\nground truth labeling (or `None` in the case of unsupervised models).\n\nRead more in the :ref:`User Guide `.", + "docstring": "Make a scorer from a performance metric or loss function.\n\nThis factory function wraps scoring functions for use in\n:class:`~sklearn.model_selection.GridSearchCV` and\n:func:`~sklearn.model_selection.cross_val_score`.\nIt takes a score function, such as :func:`~sklearn.metrics.accuracy_score`,\n:func:`~sklearn.metrics.mean_squared_error`,\n:func:`~sklearn.metrics.adjusted_rand_index` or\n:func:`~sklearn.metrics.average_precision`\nand returns a callable that scores an estimator's output.\nThe signature of the call is `(estimator, X, y)` where `estimator`\nis the model to be evaluated, `X` is the data and `y` is the\nground truth labeling (or `None` in the case of unsupervised models).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nscore_func : callable\n Score function (or loss function) with signature\n ``score_func(y, y_pred, **kwargs)``.\n\ngreater_is_better : bool, default=True\n Whether score_func is a score function (default), meaning high is good,\n or a loss function, meaning low is good. In the latter case, the\n scorer object will sign-flip the outcome of the score_func.\n\nneeds_proba : bool, default=False\n Whether score_func requires predict_proba to get probability estimates\n out of a classifier.\n\n If True, for binary `y_true`, the score function is supposed to accept\n a 1D `y_pred` (i.e., probability of the positive class, shape\n `(n_samples,)`).\n\nneeds_threshold : bool, default=False\n Whether score_func takes a continuous decision certainty.\n This only works for binary classification using estimators that\n have either a decision_function or predict_proba method.\n\n If True, for binary `y_true`, the score function is supposed to accept\n a 1D `y_pred` (i.e., probability of the positive class or the decision\n function, shape `(n_samples,)`).\n\n For example ``average_precision`` or the area under the roc curve\n can not be computed using discrete predictions alone.\n\n**kwargs : additional arguments\n Additional parameters to be passed to score_func.\n\nReturns\n-------\nscorer : callable\n Callable object that returns a scalar score; greater is better.\n\nExamples\n--------\n>>> from sklearn.metrics import fbeta_score, make_scorer\n>>> ftwo_scorer = make_scorer(fbeta_score, beta=2)\n>>> ftwo_scorer\nmake_scorer(fbeta_score, beta=2)\n>>> from sklearn.model_selection import GridSearchCV\n>>> from sklearn.svm import LinearSVC\n>>> grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]},\n... scoring=ftwo_scorer)\n\nNotes\n-----\nIf `needs_proba=False` and `needs_threshold=False`, the score\nfunction is supposed to accept the output of :term:`predict`. If\n`needs_proba=True`, the score function is supposed to accept the\noutput of :term:`predict_proba` (For binary `y_true`, the score function is\nsupposed to accept probability of the positive class). If\n`needs_threshold=True`, the score function is supposed to accept the\noutput of :term:`decision_function`.", + "code": "@_deprecate_positional_args\ndef make_scorer(score_func, *, greater_is_better=True, needs_proba=False,\n needs_threshold=False, **kwargs):\n \"\"\"Make a scorer from a performance metric or loss function.\n\n This factory function wraps scoring functions for use in\n :class:`~sklearn.model_selection.GridSearchCV` and\n :func:`~sklearn.model_selection.cross_val_score`.\n It takes a score function, such as :func:`~sklearn.metrics.accuracy_score`,\n :func:`~sklearn.metrics.mean_squared_error`,\n :func:`~sklearn.metrics.adjusted_rand_index` or\n :func:`~sklearn.metrics.average_precision`\n and returns a callable that scores an estimator's output.\n The signature of the call is `(estimator, X, y)` where `estimator`\n is the model to be evaluated, `X` is the data and `y` is the\n ground truth labeling (or `None` in the case of unsupervised models).\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n score_func : callable\n Score function (or loss function) with signature\n ``score_func(y, y_pred, **kwargs)``.\n\n greater_is_better : bool, default=True\n Whether score_func is a score function (default), meaning high is good,\n or a loss function, meaning low is good. In the latter case, the\n scorer object will sign-flip the outcome of the score_func.\n\n needs_proba : bool, default=False\n Whether score_func requires predict_proba to get probability estimates\n out of a classifier.\n\n If True, for binary `y_true`, the score function is supposed to accept\n a 1D `y_pred` (i.e., probability of the positive class, shape\n `(n_samples,)`).\n\n needs_threshold : bool, default=False\n Whether score_func takes a continuous decision certainty.\n This only works for binary classification using estimators that\n have either a decision_function or predict_proba method.\n\n If True, for binary `y_true`, the score function is supposed to accept\n a 1D `y_pred` (i.e., probability of the positive class or the decision\n function, shape `(n_samples,)`).\n\n For example ``average_precision`` or the area under the roc curve\n can not be computed using discrete predictions alone.\n\n **kwargs : additional arguments\n Additional parameters to be passed to score_func.\n\n Returns\n -------\n scorer : callable\n Callable object that returns a scalar score; greater is better.\n\n Examples\n --------\n >>> from sklearn.metrics import fbeta_score, make_scorer\n >>> ftwo_scorer = make_scorer(fbeta_score, beta=2)\n >>> ftwo_scorer\n make_scorer(fbeta_score, beta=2)\n >>> from sklearn.model_selection import GridSearchCV\n >>> from sklearn.svm import LinearSVC\n >>> grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]},\n ... scoring=ftwo_scorer)\n\n Notes\n -----\n If `needs_proba=False` and `needs_threshold=False`, the score\n function is supposed to accept the output of :term:`predict`. If\n `needs_proba=True`, the score function is supposed to accept the\n output of :term:`predict_proba` (For binary `y_true`, the score function is\n supposed to accept probability of the positive class). If\n `needs_threshold=True`, the score function is supposed to accept the\n output of :term:`decision_function`.\n \"\"\"\n sign = 1 if greater_is_better else -1\n if needs_proba and needs_threshold:\n raise ValueError(\"Set either needs_proba or needs_threshold to True,\"\n \" but not both.\")\n if needs_proba:\n cls = _ProbaScorer\n elif needs_threshold:\n cls = _ThresholdScorer\n else:\n cls = _PredictScorer\n return cls(score_func, sign, kwargs)" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._bicluster/_check_rows_and_columns", + "name": "_check_rows_and_columns", + "qname": "sklearn.metrics.cluster._bicluster._check_rows_and_columns", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._bicluster/_check_rows_and_columns/a", + "name": "a", + "qname": "sklearn.metrics.cluster._bicluster._check_rows_and_columns.a", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._bicluster/_check_rows_and_columns/b", + "name": "b", + "qname": "sklearn.metrics.cluster._bicluster._check_rows_and_columns.b", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Unpacks the row and column arrays and checks their shape.", + "docstring": "Unpacks the row and column arrays and checks their shape.", + "code": "def _check_rows_and_columns(a, b):\n \"\"\"Unpacks the row and column arrays and checks their shape.\"\"\"\n check_consistent_length(*a)\n check_consistent_length(*b)\n checks = lambda x: check_array(x, ensure_2d=False)\n a_rows, a_cols = map(checks, a)\n b_rows, b_cols = map(checks, b)\n return a_rows, a_cols, b_rows, b_cols" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._bicluster/_jaccard", + "name": "_jaccard", + "qname": "sklearn.metrics.cluster._bicluster._jaccard", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._bicluster/_jaccard/a_rows", + "name": "a_rows", + "qname": "sklearn.metrics.cluster._bicluster._jaccard.a_rows", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._bicluster/_jaccard/a_cols", + "name": "a_cols", + "qname": "sklearn.metrics.cluster._bicluster._jaccard.a_cols", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._bicluster/_jaccard/b_rows", + "name": "b_rows", + "qname": "sklearn.metrics.cluster._bicluster._jaccard.b_rows", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._bicluster/_jaccard/b_cols", + "name": "b_cols", + "qname": "sklearn.metrics.cluster._bicluster._jaccard.b_cols", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Jaccard coefficient on the elements of the two biclusters.", + "docstring": "Jaccard coefficient on the elements of the two biclusters.", + "code": "def _jaccard(a_rows, a_cols, b_rows, b_cols):\n \"\"\"Jaccard coefficient on the elements of the two biclusters.\"\"\"\n intersection = ((a_rows * b_rows).sum() *\n (a_cols * b_cols).sum())\n\n a_size = a_rows.sum() * a_cols.sum()\n b_size = b_rows.sum() * b_cols.sum()\n\n return intersection / (a_size + b_size - intersection)" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._bicluster/_pairwise_similarity", + "name": "_pairwise_similarity", + "qname": "sklearn.metrics.cluster._bicluster._pairwise_similarity", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._bicluster/_pairwise_similarity/a", + "name": "a", + "qname": "sklearn.metrics.cluster._bicluster._pairwise_similarity.a", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._bicluster/_pairwise_similarity/b", + "name": "b", + "qname": "sklearn.metrics.cluster._bicluster._pairwise_similarity.b", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._bicluster/_pairwise_similarity/similarity", + "name": "similarity", + "qname": "sklearn.metrics.cluster._bicluster._pairwise_similarity.similarity", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes pairwise similarity matrix.\n\nresult[i, j] is the Jaccard coefficient of a's bicluster i and b's\nbicluster j.", + "docstring": "Computes pairwise similarity matrix.\n\nresult[i, j] is the Jaccard coefficient of a's bicluster i and b's\nbicluster j.", + "code": "def _pairwise_similarity(a, b, similarity):\n \"\"\"Computes pairwise similarity matrix.\n\n result[i, j] is the Jaccard coefficient of a's bicluster i and b's\n bicluster j.\n\n \"\"\"\n a_rows, a_cols, b_rows, b_cols = _check_rows_and_columns(a, b)\n n_a = a_rows.shape[0]\n n_b = b_rows.shape[0]\n result = np.array(list(list(similarity(a_rows[i], a_cols[i],\n b_rows[j], b_cols[j])\n for j in range(n_b))\n for i in range(n_a)))\n return result" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._bicluster/consensus_score", + "name": "consensus_score", + "qname": "sklearn.metrics.cluster._bicluster.consensus_score", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._bicluster/consensus_score/a", + "name": "a", + "qname": "sklearn.metrics.cluster._bicluster.consensus_score.a", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "(rows, columns)", + "default_value": "", + "description": "Tuple of row and column indicators for a set of biclusters." + }, + "type": { + "kind": "NamedType", + "name": "(rows, columns)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._bicluster/consensus_score/b", + "name": "b", + "qname": "sklearn.metrics.cluster._bicluster.consensus_score.b", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "(rows, columns)", + "default_value": "", + "description": "Another set of biclusters like ``a``." + }, + "type": { + "kind": "NamedType", + "name": "(rows, columns)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._bicluster/consensus_score/similarity", + "name": "similarity", + "qname": "sklearn.metrics.cluster._bicluster.consensus_score.similarity", + "default_value": "'jaccard'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'jaccard' or callable", + "default_value": "'jaccard'", + "description": "May be the string \"jaccard\" to use the Jaccard coefficient, or\nany function that takes four arguments, each of which is a 1d\nindicator vector: (a_rows, a_columns, b_rows, b_columns)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'jaccard'" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "The similarity of two sets of biclusters.\n\nSimilarity between individual biclusters is computed. Then the\nbest matching between sets is found using the Hungarian algorithm.\nThe final score is the sum of similarities divided by the size of\nthe larger set.\n\nRead more in the :ref:`User Guide `.", + "docstring": "The similarity of two sets of biclusters.\n\nSimilarity between individual biclusters is computed. Then the\nbest matching between sets is found using the Hungarian algorithm.\nThe final score is the sum of similarities divided by the size of\nthe larger set.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\na : (rows, columns)\n Tuple of row and column indicators for a set of biclusters.\n\nb : (rows, columns)\n Another set of biclusters like ``a``.\n\nsimilarity : 'jaccard' or callable, default='jaccard'\n May be the string \"jaccard\" to use the Jaccard coefficient, or\n any function that takes four arguments, each of which is a 1d\n indicator vector: (a_rows, a_columns, b_rows, b_columns).\n\nReferences\n----------\n\n* Hochreiter, Bodenhofer, et. al., 2010. `FABIA: factor analysis\n for bicluster acquisition\n `__.", + "code": "@_deprecate_positional_args\ndef consensus_score(a, b, *, similarity=\"jaccard\"):\n \"\"\"The similarity of two sets of biclusters.\n\n Similarity between individual biclusters is computed. Then the\n best matching between sets is found using the Hungarian algorithm.\n The final score is the sum of similarities divided by the size of\n the larger set.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n a : (rows, columns)\n Tuple of row and column indicators for a set of biclusters.\n\n b : (rows, columns)\n Another set of biclusters like ``a``.\n\n similarity : 'jaccard' or callable, default='jaccard'\n May be the string \"jaccard\" to use the Jaccard coefficient, or\n any function that takes four arguments, each of which is a 1d\n indicator vector: (a_rows, a_columns, b_rows, b_columns).\n\n References\n ----------\n\n * Hochreiter, Bodenhofer, et. al., 2010. `FABIA: factor analysis\n for bicluster acquisition\n `__.\n\n \"\"\"\n if similarity == \"jaccard\":\n similarity = _jaccard\n matrix = _pairwise_similarity(a, b, similarity)\n row_indices, col_indices = linear_sum_assignment(1. - matrix)\n n_a = len(a[0])\n n_b = len(b[0])\n return matrix[row_indices, col_indices].sum() / max(n_a, n_b)" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/_generalized_average", + "name": "_generalized_average", + "qname": "sklearn.metrics.cluster._supervised._generalized_average", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/_generalized_average/U", + "name": "U", + "qname": "sklearn.metrics.cluster._supervised._generalized_average.U", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/_generalized_average/V", + "name": "V", + "qname": "sklearn.metrics.cluster._supervised._generalized_average.V", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/_generalized_average/average_method", + "name": "average_method", + "qname": "sklearn.metrics.cluster._supervised._generalized_average.average_method", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return a particular mean of two numbers.", + "docstring": "Return a particular mean of two numbers.", + "code": "def _generalized_average(U, V, average_method):\n \"\"\"Return a particular mean of two numbers.\"\"\"\n if average_method == \"min\":\n return min(U, V)\n elif average_method == \"geometric\":\n return np.sqrt(U * V)\n elif average_method == \"arithmetic\":\n return np.mean([U, V])\n elif average_method == \"max\":\n return max(U, V)\n else:\n raise ValueError(\"'average_method' must be 'min', 'geometric', \"\n \"'arithmetic', or 'max'\")" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/adjusted_mutual_info_score", + "name": "adjusted_mutual_info_score", + "qname": "sklearn.metrics.cluster._supervised.adjusted_mutual_info_score", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/adjusted_mutual_info_score/labels_true", + "name": "labels_true", + "qname": "sklearn.metrics.cluster._supervised.adjusted_mutual_info_score.labels_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int array, shape = [n_samples]", + "default_value": "", + "description": "A clustering of the data into disjoint subsets." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int array" + }, + { + "kind": "NamedType", + "name": "shape = [n_samples]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/adjusted_mutual_info_score/labels_pred", + "name": "labels_pred", + "qname": "sklearn.metrics.cluster._supervised.adjusted_mutual_info_score.labels_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int array-like of shape (n_samples,)", + "default_value": "", + "description": "A clustering of the data into disjoint subsets." + }, + "type": { + "kind": "NamedType", + "name": "int array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/adjusted_mutual_info_score/average_method", + "name": "average_method", + "qname": "sklearn.metrics.cluster._supervised.adjusted_mutual_info_score.average_method", + "default_value": "'arithmetic'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "'arithmetic'", + "description": "How to compute the normalizer in the denominator. Possible options\nare 'min', 'geometric', 'arithmetic', and 'max'.\n\n.. versionadded:: 0.20\n\n.. versionchanged:: 0.22\n The default value of ``average_method`` changed from 'max' to\n 'arithmetic'." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Adjusted Mutual Information between two clusterings.\n\nAdjusted Mutual Information (AMI) is an adjustment of the Mutual\nInformation (MI) score to account for chance. It accounts for the fact that\nthe MI is generally higher for two clusterings with a larger number of\nclusters, regardless of whether there is actually more information shared.\nFor two clusterings :math:`U` and :math:`V`, the AMI is given as::\n\n AMI(U, V) = [MI(U, V) - E(MI(U, V))] / [avg(H(U), H(V)) - E(MI(U, V))]\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching ``label_true`` with\n``label_pred`` will return the same score value. This can be useful to\nmeasure the agreement of two independent label assignments strategies\non the same dataset when the real ground truth is not known.\n\nBe mindful that this function is an order of magnitude slower than other\nmetrics, such as the Adjusted Rand Index.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Adjusted Mutual Information between two clusterings.\n\nAdjusted Mutual Information (AMI) is an adjustment of the Mutual\nInformation (MI) score to account for chance. It accounts for the fact that\nthe MI is generally higher for two clusterings with a larger number of\nclusters, regardless of whether there is actually more information shared.\nFor two clusterings :math:`U` and :math:`V`, the AMI is given as::\n\n AMI(U, V) = [MI(U, V) - E(MI(U, V))] / [avg(H(U), H(V)) - E(MI(U, V))]\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching ``label_true`` with\n``label_pred`` will return the same score value. This can be useful to\nmeasure the agreement of two independent label assignments strategies\non the same dataset when the real ground truth is not known.\n\nBe mindful that this function is an order of magnitude slower than other\nmetrics, such as the Adjusted Rand Index.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n A clustering of the data into disjoint subsets.\n\nlabels_pred : int array-like of shape (n_samples,)\n A clustering of the data into disjoint subsets.\n\naverage_method : str, default='arithmetic'\n How to compute the normalizer in the denominator. Possible options\n are 'min', 'geometric', 'arithmetic', and 'max'.\n\n .. versionadded:: 0.20\n\n .. versionchanged:: 0.22\n The default value of ``average_method`` changed from 'max' to\n 'arithmetic'.\n\nReturns\n-------\nami: float (upperlimited by 1.0)\n The AMI returns a value of 1 when the two partitions are identical\n (ie perfectly matched). Random partitions (independent labellings) have\n an expected AMI around 0 on average hence can be negative.\n\nSee Also\n--------\nadjusted_rand_score : Adjusted Rand Index.\nmutual_info_score : Mutual Information (not adjusted for chance).\n\nExamples\n--------\n\nPerfect labelings are both homogeneous and complete, hence have\nscore 1.0::\n\n >>> from sklearn.metrics.cluster import adjusted_mutual_info_score\n >>> adjusted_mutual_info_score([0, 0, 1, 1], [0, 0, 1, 1])\n ... # doctest: +SKIP\n 1.0\n >>> adjusted_mutual_info_score([0, 0, 1, 1], [1, 1, 0, 0])\n ... # doctest: +SKIP\n 1.0\n\nIf classes members are completely split across different clusters,\nthe assignment is totally in-complete, hence the AMI is null::\n\n >>> adjusted_mutual_info_score([0, 0, 0, 0], [0, 1, 2, 3])\n ... # doctest: +SKIP\n 0.0\n\nReferences\n----------\n.. [1] `Vinh, Epps, and Bailey, (2010). Information Theoretic Measures for\n Clusterings Comparison: Variants, Properties, Normalization and\n Correction for Chance, JMLR\n `_\n\n.. [2] `Wikipedia entry for the Adjusted Mutual Information\n `_", + "code": "@_deprecate_positional_args\ndef adjusted_mutual_info_score(labels_true, labels_pred, *,\n average_method='arithmetic'):\n \"\"\"Adjusted Mutual Information between two clusterings.\n\n Adjusted Mutual Information (AMI) is an adjustment of the Mutual\n Information (MI) score to account for chance. It accounts for the fact that\n the MI is generally higher for two clusterings with a larger number of\n clusters, regardless of whether there is actually more information shared.\n For two clusterings :math:`U` and :math:`V`, the AMI is given as::\n\n AMI(U, V) = [MI(U, V) - E(MI(U, V))] / [avg(H(U), H(V)) - E(MI(U, V))]\n\n This metric is independent of the absolute values of the labels:\n a permutation of the class or cluster label values won't change the\n score value in any way.\n\n This metric is furthermore symmetric: switching ``label_true`` with\n ``label_pred`` will return the same score value. This can be useful to\n measure the agreement of two independent label assignments strategies\n on the same dataset when the real ground truth is not known.\n\n Be mindful that this function is an order of magnitude slower than other\n metrics, such as the Adjusted Rand Index.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n labels_true : int array, shape = [n_samples]\n A clustering of the data into disjoint subsets.\n\n labels_pred : int array-like of shape (n_samples,)\n A clustering of the data into disjoint subsets.\n\n average_method : str, default='arithmetic'\n How to compute the normalizer in the denominator. Possible options\n are 'min', 'geometric', 'arithmetic', and 'max'.\n\n .. versionadded:: 0.20\n\n .. versionchanged:: 0.22\n The default value of ``average_method`` changed from 'max' to\n 'arithmetic'.\n\n Returns\n -------\n ami: float (upperlimited by 1.0)\n The AMI returns a value of 1 when the two partitions are identical\n (ie perfectly matched). Random partitions (independent labellings) have\n an expected AMI around 0 on average hence can be negative.\n\n See Also\n --------\n adjusted_rand_score : Adjusted Rand Index.\n mutual_info_score : Mutual Information (not adjusted for chance).\n\n Examples\n --------\n\n Perfect labelings are both homogeneous and complete, hence have\n score 1.0::\n\n >>> from sklearn.metrics.cluster import adjusted_mutual_info_score\n >>> adjusted_mutual_info_score([0, 0, 1, 1], [0, 0, 1, 1])\n ... # doctest: +SKIP\n 1.0\n >>> adjusted_mutual_info_score([0, 0, 1, 1], [1, 1, 0, 0])\n ... # doctest: +SKIP\n 1.0\n\n If classes members are completely split across different clusters,\n the assignment is totally in-complete, hence the AMI is null::\n\n >>> adjusted_mutual_info_score([0, 0, 0, 0], [0, 1, 2, 3])\n ... # doctest: +SKIP\n 0.0\n\n References\n ----------\n .. [1] `Vinh, Epps, and Bailey, (2010). Information Theoretic Measures for\n Clusterings Comparison: Variants, Properties, Normalization and\n Correction for Chance, JMLR\n `_\n\n .. [2] `Wikipedia entry for the Adjusted Mutual Information\n `_\n \"\"\"\n labels_true, labels_pred = check_clusterings(labels_true, labels_pred)\n n_samples = labels_true.shape[0]\n classes = np.unique(labels_true)\n clusters = np.unique(labels_pred)\n # Special limit cases: no clustering since the data is not split.\n # This is a perfect match hence return 1.0.\n if (classes.shape[0] == clusters.shape[0] == 1 or\n classes.shape[0] == clusters.shape[0] == 0):\n return 1.0\n contingency = contingency_matrix(labels_true, labels_pred, sparse=True)\n contingency = contingency.astype(np.float64,\n **_astype_copy_false(contingency))\n # Calculate the MI for the two clusterings\n mi = mutual_info_score(labels_true, labels_pred,\n contingency=contingency)\n # Calculate the expected value for the mutual information\n emi = expected_mutual_information(contingency, n_samples)\n # Calculate entropy for each labeling\n h_true, h_pred = entropy(labels_true), entropy(labels_pred)\n normalizer = _generalized_average(h_true, h_pred, average_method)\n denominator = normalizer - emi\n # Avoid 0.0 / 0.0 when expectation equals maximum, i.e a perfect match.\n # normalizer should always be >= emi, but because of floating-point\n # representation, sometimes emi is slightly larger. Correct this\n # by preserving the sign.\n if denominator < 0:\n denominator = min(denominator, -np.finfo('float64').eps)\n else:\n denominator = max(denominator, np.finfo('float64').eps)\n ami = (mi - emi) / denominator\n return ami" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/adjusted_rand_score", + "name": "adjusted_rand_score", + "qname": "sklearn.metrics.cluster._supervised.adjusted_rand_score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/adjusted_rand_score/labels_true", + "name": "labels_true", + "qname": "sklearn.metrics.cluster._supervised.adjusted_rand_score.labels_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int array, shape = [n_samples]", + "default_value": "", + "description": "Ground truth class labels to be used as a reference" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int array" + }, + { + "kind": "NamedType", + "name": "shape = [n_samples]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/adjusted_rand_score/labels_pred", + "name": "labels_pred", + "qname": "sklearn.metrics.cluster._supervised.adjusted_rand_score.labels_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Cluster labels to evaluate" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Rand index adjusted for chance.\n\nThe Rand Index computes a similarity measure between two clusterings\nby considering all pairs of samples and counting pairs that are\nassigned in the same or different clusters in the predicted and\ntrue clusterings.\n\nThe raw RI score is then \"adjusted for chance\" into the ARI score\nusing the following scheme::\n\n ARI = (RI - Expected_RI) / (max(RI) - Expected_RI)\n\nThe adjusted Rand index is thus ensured to have a value close to\n0.0 for random labeling independently of the number of clusters and\nsamples and exactly 1.0 when the clusterings are identical (up to\na permutation).\n\nARI is a symmetric measure::\n\n adjusted_rand_score(a, b) == adjusted_rand_score(b, a)\n\nRead more in the :ref:`User Guide `.", + "docstring": "Rand index adjusted for chance.\n\nThe Rand Index computes a similarity measure between two clusterings\nby considering all pairs of samples and counting pairs that are\nassigned in the same or different clusters in the predicted and\ntrue clusterings.\n\nThe raw RI score is then \"adjusted for chance\" into the ARI score\nusing the following scheme::\n\n ARI = (RI - Expected_RI) / (max(RI) - Expected_RI)\n\nThe adjusted Rand index is thus ensured to have a value close to\n0.0 for random labeling independently of the number of clusters and\nsamples and exactly 1.0 when the clusterings are identical (up to\na permutation).\n\nARI is a symmetric measure::\n\n adjusted_rand_score(a, b) == adjusted_rand_score(b, a)\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n Ground truth class labels to be used as a reference\n\nlabels_pred : array-like of shape (n_samples,)\n Cluster labels to evaluate\n\nReturns\n-------\nARI : float\n Similarity score between -1.0 and 1.0. Random labelings have an ARI\n close to 0.0. 1.0 stands for perfect match.\n\nExamples\n--------\nPerfectly matching labelings have a score of 1 even\n\n >>> from sklearn.metrics.cluster import adjusted_rand_score\n >>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 1])\n 1.0\n >>> adjusted_rand_score([0, 0, 1, 1], [1, 1, 0, 0])\n 1.0\n\nLabelings that assign all classes members to the same clusters\nare complete but may not always be pure, hence penalized::\n\n >>> adjusted_rand_score([0, 0, 1, 2], [0, 0, 1, 1])\n 0.57...\n\nARI is symmetric, so labelings that have pure clusters with members\ncoming from the same classes but unnecessary splits are penalized::\n\n >>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 2])\n 0.57...\n\nIf classes members are completely split across different clusters, the\nassignment is totally incomplete, hence the ARI is very low::\n\n >>> adjusted_rand_score([0, 0, 0, 0], [0, 1, 2, 3])\n 0.0\n\nReferences\n----------\n.. [Hubert1985] L. Hubert and P. Arabie, Comparing Partitions,\n Journal of Classification 1985\n https://link.springer.com/article/10.1007%2FBF01908075\n\n.. [Steinley2004] D. Steinley, Properties of the Hubert-Arabie\n adjusted Rand index, Psychological Methods 2004\n\n.. [wk] https://en.wikipedia.org/wiki/Rand_index#Adjusted_Rand_index\n\nSee Also\n--------\nadjusted_mutual_info_score : Adjusted Mutual Information.", + "code": "def adjusted_rand_score(labels_true, labels_pred):\n \"\"\"Rand index adjusted for chance.\n\n The Rand Index computes a similarity measure between two clusterings\n by considering all pairs of samples and counting pairs that are\n assigned in the same or different clusters in the predicted and\n true clusterings.\n\n The raw RI score is then \"adjusted for chance\" into the ARI score\n using the following scheme::\n\n ARI = (RI - Expected_RI) / (max(RI) - Expected_RI)\n\n The adjusted Rand index is thus ensured to have a value close to\n 0.0 for random labeling independently of the number of clusters and\n samples and exactly 1.0 when the clusterings are identical (up to\n a permutation).\n\n ARI is a symmetric measure::\n\n adjusted_rand_score(a, b) == adjusted_rand_score(b, a)\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n labels_true : int array, shape = [n_samples]\n Ground truth class labels to be used as a reference\n\n labels_pred : array-like of shape (n_samples,)\n Cluster labels to evaluate\n\n Returns\n -------\n ARI : float\n Similarity score between -1.0 and 1.0. Random labelings have an ARI\n close to 0.0. 1.0 stands for perfect match.\n\n Examples\n --------\n Perfectly matching labelings have a score of 1 even\n\n >>> from sklearn.metrics.cluster import adjusted_rand_score\n >>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 1])\n 1.0\n >>> adjusted_rand_score([0, 0, 1, 1], [1, 1, 0, 0])\n 1.0\n\n Labelings that assign all classes members to the same clusters\n are complete but may not always be pure, hence penalized::\n\n >>> adjusted_rand_score([0, 0, 1, 2], [0, 0, 1, 1])\n 0.57...\n\n ARI is symmetric, so labelings that have pure clusters with members\n coming from the same classes but unnecessary splits are penalized::\n\n >>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 2])\n 0.57...\n\n If classes members are completely split across different clusters, the\n assignment is totally incomplete, hence the ARI is very low::\n\n >>> adjusted_rand_score([0, 0, 0, 0], [0, 1, 2, 3])\n 0.0\n\n References\n ----------\n .. [Hubert1985] L. Hubert and P. Arabie, Comparing Partitions,\n Journal of Classification 1985\n https://link.springer.com/article/10.1007%2FBF01908075\n\n .. [Steinley2004] D. Steinley, Properties of the Hubert-Arabie\n adjusted Rand index, Psychological Methods 2004\n\n .. [wk] https://en.wikipedia.org/wiki/Rand_index#Adjusted_Rand_index\n\n See Also\n --------\n adjusted_mutual_info_score : Adjusted Mutual Information.\n \"\"\"\n (tn, fp), (fn, tp) = pair_confusion_matrix(labels_true, labels_pred)\n\n # Special cases: empty data or full agreement\n if fn == 0 and fp == 0:\n return 1.0\n\n return 2. * (tp * tn - fn * fp) / ((tp + fn) * (fn + tn) +\n (tp + fp) * (fp + tn))" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/check_clusterings", + "name": "check_clusterings", + "qname": "sklearn.metrics.cluster._supervised.check_clusterings", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/check_clusterings/labels_true", + "name": "labels_true", + "qname": "sklearn.metrics.cluster._supervised.check_clusterings.labels_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The true labels." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/check_clusterings/labels_pred", + "name": "labels_pred", + "qname": "sklearn.metrics.cluster._supervised.check_clusterings.labels_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The predicted labels." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check that the labels arrays are 1D and of same dimension.", + "docstring": "Check that the labels arrays are 1D and of same dimension.\n\nParameters\n----------\nlabels_true : array-like of shape (n_samples,)\n The true labels.\n\nlabels_pred : array-like of shape (n_samples,)\n The predicted labels.", + "code": "def check_clusterings(labels_true, labels_pred):\n \"\"\"Check that the labels arrays are 1D and of same dimension.\n\n Parameters\n ----------\n labels_true : array-like of shape (n_samples,)\n The true labels.\n\n labels_pred : array-like of shape (n_samples,)\n The predicted labels.\n \"\"\"\n labels_true = check_array(\n labels_true, ensure_2d=False, ensure_min_samples=0, dtype=None,\n )\n\n labels_pred = check_array(\n labels_pred, ensure_2d=False, ensure_min_samples=0, dtype=None,\n )\n\n type_label = type_of_target(labels_true)\n type_pred = type_of_target(labels_pred)\n\n if 'continuous' in (type_pred, type_label):\n msg = f'Clustering metrics expects discrete values but received' \\\n f' {type_label} values for label, and {type_pred} values ' \\\n f'for target'\n warnings.warn(msg, UserWarning)\n\n # input checks\n if labels_true.ndim != 1:\n raise ValueError(\n \"labels_true must be 1D: shape is %r\" % (labels_true.shape,))\n if labels_pred.ndim != 1:\n raise ValueError(\n \"labels_pred must be 1D: shape is %r\" % (labels_pred.shape,))\n check_consistent_length(labels_true, labels_pred)\n\n return labels_true, labels_pred" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/completeness_score", + "name": "completeness_score", + "qname": "sklearn.metrics.cluster._supervised.completeness_score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/completeness_score/labels_true", + "name": "labels_true", + "qname": "sklearn.metrics.cluster._supervised.completeness_score.labels_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int array, shape = [n_samples]", + "default_value": "", + "description": "ground truth class labels to be used as a reference" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int array" + }, + { + "kind": "NamedType", + "name": "shape = [n_samples]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/completeness_score/labels_pred", + "name": "labels_pred", + "qname": "sklearn.metrics.cluster._supervised.completeness_score.labels_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "cluster labels to evaluate" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Completeness metric of a cluster labeling given a ground truth.\n\nA clustering result satisfies completeness if all the data points\nthat are members of a given class are elements of the same cluster.\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is not symmetric: switching ``label_true`` with ``label_pred``\nwill return the :func:`homogeneity_score` which will be different in\ngeneral.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Completeness metric of a cluster labeling given a ground truth.\n\nA clustering result satisfies completeness if all the data points\nthat are members of a given class are elements of the same cluster.\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is not symmetric: switching ``label_true`` with ``label_pred``\nwill return the :func:`homogeneity_score` which will be different in\ngeneral.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n ground truth class labels to be used as a reference\n\nlabels_pred : array-like of shape (n_samples,)\n cluster labels to evaluate\n\nReturns\n-------\ncompleteness : float\n score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling\n\nReferences\n----------\n\n.. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A\n conditional entropy-based external cluster evaluation measure\n `_\n\nSee Also\n--------\nhomogeneity_score\nv_measure_score\n\nExamples\n--------\n\nPerfect labelings are complete::\n\n >>> from sklearn.metrics.cluster import completeness_score\n >>> completeness_score([0, 0, 1, 1], [1, 1, 0, 0])\n 1.0\n\nNon-perfect labelings that assign all classes members to the same clusters\nare still complete::\n\n >>> print(completeness_score([0, 0, 1, 1], [0, 0, 0, 0]))\n 1.0\n >>> print(completeness_score([0, 1, 2, 3], [0, 0, 1, 1]))\n 0.999...\n\nIf classes members are split across different clusters, the\nassignment cannot be complete::\n\n >>> print(completeness_score([0, 0, 1, 1], [0, 1, 0, 1]))\n 0.0\n >>> print(completeness_score([0, 0, 0, 0], [0, 1, 2, 3]))\n 0.0", + "code": "def completeness_score(labels_true, labels_pred):\n \"\"\"Completeness metric of a cluster labeling given a ground truth.\n\n A clustering result satisfies completeness if all the data points\n that are members of a given class are elements of the same cluster.\n\n This metric is independent of the absolute values of the labels:\n a permutation of the class or cluster label values won't change the\n score value in any way.\n\n This metric is not symmetric: switching ``label_true`` with ``label_pred``\n will return the :func:`homogeneity_score` which will be different in\n general.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n labels_true : int array, shape = [n_samples]\n ground truth class labels to be used as a reference\n\n labels_pred : array-like of shape (n_samples,)\n cluster labels to evaluate\n\n Returns\n -------\n completeness : float\n score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling\n\n References\n ----------\n\n .. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A\n conditional entropy-based external cluster evaluation measure\n `_\n\n See Also\n --------\n homogeneity_score\n v_measure_score\n\n Examples\n --------\n\n Perfect labelings are complete::\n\n >>> from sklearn.metrics.cluster import completeness_score\n >>> completeness_score([0, 0, 1, 1], [1, 1, 0, 0])\n 1.0\n\n Non-perfect labelings that assign all classes members to the same clusters\n are still complete::\n\n >>> print(completeness_score([0, 0, 1, 1], [0, 0, 0, 0]))\n 1.0\n >>> print(completeness_score([0, 1, 2, 3], [0, 0, 1, 1]))\n 0.999...\n\n If classes members are split across different clusters, the\n assignment cannot be complete::\n\n >>> print(completeness_score([0, 0, 1, 1], [0, 1, 0, 1]))\n 0.0\n >>> print(completeness_score([0, 0, 0, 0], [0, 1, 2, 3]))\n 0.0\n \"\"\"\n return homogeneity_completeness_v_measure(labels_true, labels_pred)[1]" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/contingency_matrix", + "name": "contingency_matrix", + "qname": "sklearn.metrics.cluster._supervised.contingency_matrix", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/contingency_matrix/labels_true", + "name": "labels_true", + "qname": "sklearn.metrics.cluster._supervised.contingency_matrix.labels_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int array, shape = [n_samples]", + "default_value": "", + "description": "Ground truth class labels to be used as a reference." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int array" + }, + { + "kind": "NamedType", + "name": "shape = [n_samples]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/contingency_matrix/labels_pred", + "name": "labels_pred", + "qname": "sklearn.metrics.cluster._supervised.contingency_matrix.labels_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Cluster labels to evaluate." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/contingency_matrix/eps", + "name": "eps", + "qname": "sklearn.metrics.cluster._supervised.contingency_matrix.eps", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "If a float, that value is added to all values in the contingency\nmatrix. This helps to stop NaN propagation.\nIf ``None``, nothing is adjusted." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/contingency_matrix/sparse", + "name": "sparse", + "qname": "sklearn.metrics.cluster._supervised.contingency_matrix.sparse", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If `True`, return a sparse CSR continency matrix. If `eps` is not\n`None` and `sparse` is `True` will raise ValueError.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/contingency_matrix/dtype", + "name": "dtype", + "qname": "sklearn.metrics.cluster._supervised.contingency_matrix.dtype", + "default_value": "np.int64", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "numeric type", + "default_value": "np.int64", + "description": "Output dtype. Ignored if `eps` is not `None`.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "numeric type" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Build a contingency matrix describing the relationship between labels.", + "docstring": "Build a contingency matrix describing the relationship between labels.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n Ground truth class labels to be used as a reference.\n\nlabels_pred : array-like of shape (n_samples,)\n Cluster labels to evaluate.\n\neps : float, default=None\n If a float, that value is added to all values in the contingency\n matrix. This helps to stop NaN propagation.\n If ``None``, nothing is adjusted.\n\nsparse : bool, default=False\n If `True`, return a sparse CSR continency matrix. If `eps` is not\n `None` and `sparse` is `True` will raise ValueError.\n\n .. versionadded:: 0.18\n\ndtype : numeric type, default=np.int64\n Output dtype. Ignored if `eps` is not `None`.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ncontingency : {array-like, sparse}, shape=[n_classes_true, n_classes_pred]\n Matrix :math:`C` such that :math:`C_{i, j}` is the number of samples in\n true class :math:`i` and in predicted class :math:`j`. If\n ``eps is None``, the dtype of this array will be integer unless set\n otherwise with the ``dtype`` argument. If ``eps`` is given, the dtype\n will be float.\n Will be a ``sklearn.sparse.csr_matrix`` if ``sparse=True``.", + "code": "@_deprecate_positional_args\ndef contingency_matrix(labels_true, labels_pred, *, eps=None, sparse=False,\n dtype=np.int64):\n \"\"\"Build a contingency matrix describing the relationship between labels.\n\n Parameters\n ----------\n labels_true : int array, shape = [n_samples]\n Ground truth class labels to be used as a reference.\n\n labels_pred : array-like of shape (n_samples,)\n Cluster labels to evaluate.\n\n eps : float, default=None\n If a float, that value is added to all values in the contingency\n matrix. This helps to stop NaN propagation.\n If ``None``, nothing is adjusted.\n\n sparse : bool, default=False\n If `True`, return a sparse CSR continency matrix. If `eps` is not\n `None` and `sparse` is `True` will raise ValueError.\n\n .. versionadded:: 0.18\n\n dtype : numeric type, default=np.int64\n Output dtype. Ignored if `eps` is not `None`.\n\n .. versionadded:: 0.24\n\n Returns\n -------\n contingency : {array-like, sparse}, shape=[n_classes_true, n_classes_pred]\n Matrix :math:`C` such that :math:`C_{i, j}` is the number of samples in\n true class :math:`i` and in predicted class :math:`j`. If\n ``eps is None``, the dtype of this array will be integer unless set\n otherwise with the ``dtype`` argument. If ``eps`` is given, the dtype\n will be float.\n Will be a ``sklearn.sparse.csr_matrix`` if ``sparse=True``.\n \"\"\"\n\n if eps is not None and sparse:\n raise ValueError(\"Cannot set 'eps' when sparse=True\")\n\n classes, class_idx = np.unique(labels_true, return_inverse=True)\n clusters, cluster_idx = np.unique(labels_pred, return_inverse=True)\n n_classes = classes.shape[0]\n n_clusters = clusters.shape[0]\n # Using coo_matrix to accelerate simple histogram calculation,\n # i.e. bins are consecutive integers\n # Currently, coo_matrix is faster than histogram2d for simple cases\n contingency = sp.coo_matrix((np.ones(class_idx.shape[0]),\n (class_idx, cluster_idx)),\n shape=(n_classes, n_clusters),\n dtype=dtype)\n if sparse:\n contingency = contingency.tocsr()\n contingency.sum_duplicates()\n else:\n contingency = contingency.toarray()\n if eps is not None:\n # don't use += as contingency is integer\n contingency = contingency + eps\n return contingency" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/entropy", + "name": "entropy", + "qname": "sklearn.metrics.cluster._supervised.entropy", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/entropy/labels", + "name": "labels", + "qname": "sklearn.metrics.cluster._supervised.entropy.labels", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int array, shape = [n_samples]", + "default_value": "", + "description": "The labels" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int array" + }, + { + "kind": "NamedType", + "name": "shape = [n_samples]" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Calculates the entropy for a labeling.", + "docstring": "Calculates the entropy for a labeling.\n\nParameters\n----------\nlabels : int array, shape = [n_samples]\n The labels\n\nNotes\n-----\nThe logarithm used is the natural logarithm (base-e).", + "code": "def entropy(labels):\n \"\"\"Calculates the entropy for a labeling.\n\n Parameters\n ----------\n labels : int array, shape = [n_samples]\n The labels\n\n Notes\n -----\n The logarithm used is the natural logarithm (base-e).\n \"\"\"\n if len(labels) == 0:\n return 1.0\n label_idx = np.unique(labels, return_inverse=True)[1]\n pi = np.bincount(label_idx).astype(np.float64)\n pi = pi[pi > 0]\n pi_sum = np.sum(pi)\n # log(a / b) should be calculated as log(a) - log(b) for\n # possible loss of precision\n return -np.sum((pi / pi_sum) * (np.log(pi) - log(pi_sum)))" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/fowlkes_mallows_score", + "name": "fowlkes_mallows_score", + "qname": "sklearn.metrics.cluster._supervised.fowlkes_mallows_score", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/fowlkes_mallows_score/labels_true", + "name": "labels_true", + "qname": "sklearn.metrics.cluster._supervised.fowlkes_mallows_score.labels_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int array, shape = (``n_samples``,)", + "default_value": "", + "description": "A clustering of the data into disjoint subsets." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int array" + }, + { + "kind": "NamedType", + "name": "shape = (``n_samples``,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/fowlkes_mallows_score/labels_pred", + "name": "labels_pred", + "qname": "sklearn.metrics.cluster._supervised.fowlkes_mallows_score.labels_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array, shape = (``n_samples``, )", + "default_value": "", + "description": "A clustering of the data into disjoint subsets." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array" + }, + { + "kind": "NamedType", + "name": "shape = (``n_samples``, )" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/fowlkes_mallows_score/sparse", + "name": "sparse", + "qname": "sklearn.metrics.cluster._supervised.fowlkes_mallows_score.sparse", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Compute contingency matrix internally with sparse matrix." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Measure the similarity of two clusterings of a set of points.\n\n.. versionadded:: 0.18\n\nThe Fowlkes-Mallows index (FMI) is defined as the geometric mean between of\nthe precision and recall::\n\n FMI = TP / sqrt((TP + FP) * (TP + FN))\n\nWhere ``TP`` is the number of **True Positive** (i.e. the number of pair of\npoints that belongs in the same clusters in both ``labels_true`` and\n``labels_pred``), ``FP`` is the number of **False Positive** (i.e. the\nnumber of pair of points that belongs in the same clusters in\n``labels_true`` and not in ``labels_pred``) and ``FN`` is the number of\n**False Negative** (i.e the number of pair of points that belongs in the\nsame clusters in ``labels_pred`` and not in ``labels_True``).\n\nThe score ranges from 0 to 1. A high value indicates a good similarity\nbetween two clusters.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Measure the similarity of two clusterings of a set of points.\n\n.. versionadded:: 0.18\n\nThe Fowlkes-Mallows index (FMI) is defined as the geometric mean between of\nthe precision and recall::\n\n FMI = TP / sqrt((TP + FP) * (TP + FN))\n\nWhere ``TP`` is the number of **True Positive** (i.e. the number of pair of\npoints that belongs in the same clusters in both ``labels_true`` and\n``labels_pred``), ``FP`` is the number of **False Positive** (i.e. the\nnumber of pair of points that belongs in the same clusters in\n``labels_true`` and not in ``labels_pred``) and ``FN`` is the number of\n**False Negative** (i.e the number of pair of points that belongs in the\nsame clusters in ``labels_pred`` and not in ``labels_True``).\n\nThe score ranges from 0 to 1. A high value indicates a good similarity\nbetween two clusters.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = (``n_samples``,)\n A clustering of the data into disjoint subsets.\n\nlabels_pred : array, shape = (``n_samples``, )\n A clustering of the data into disjoint subsets.\n\nsparse : bool, default=False\n Compute contingency matrix internally with sparse matrix.\n\nReturns\n-------\nscore : float\n The resulting Fowlkes-Mallows score.\n\nExamples\n--------\n\nPerfect labelings are both homogeneous and complete, hence have\nscore 1.0::\n\n >>> from sklearn.metrics.cluster import fowlkes_mallows_score\n >>> fowlkes_mallows_score([0, 0, 1, 1], [0, 0, 1, 1])\n 1.0\n >>> fowlkes_mallows_score([0, 0, 1, 1], [1, 1, 0, 0])\n 1.0\n\nIf classes members are completely split across different clusters,\nthe assignment is totally random, hence the FMI is null::\n\n >>> fowlkes_mallows_score([0, 0, 0, 0], [0, 1, 2, 3])\n 0.0\n\nReferences\n----------\n.. [1] `E. B. Fowkles and C. L. Mallows, 1983. \"A method for comparing two\n hierarchical clusterings\". Journal of the American Statistical\n Association\n `_\n\n.. [2] `Wikipedia entry for the Fowlkes-Mallows Index\n `_", + "code": "@_deprecate_positional_args\ndef fowlkes_mallows_score(labels_true, labels_pred, *, sparse=False):\n \"\"\"Measure the similarity of two clusterings of a set of points.\n\n .. versionadded:: 0.18\n\n The Fowlkes-Mallows index (FMI) is defined as the geometric mean between of\n the precision and recall::\n\n FMI = TP / sqrt((TP + FP) * (TP + FN))\n\n Where ``TP`` is the number of **True Positive** (i.e. the number of pair of\n points that belongs in the same clusters in both ``labels_true`` and\n ``labels_pred``), ``FP`` is the number of **False Positive** (i.e. the\n number of pair of points that belongs in the same clusters in\n ``labels_true`` and not in ``labels_pred``) and ``FN`` is the number of\n **False Negative** (i.e the number of pair of points that belongs in the\n same clusters in ``labels_pred`` and not in ``labels_True``).\n\n The score ranges from 0 to 1. A high value indicates a good similarity\n between two clusters.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n labels_true : int array, shape = (``n_samples``,)\n A clustering of the data into disjoint subsets.\n\n labels_pred : array, shape = (``n_samples``, )\n A clustering of the data into disjoint subsets.\n\n sparse : bool, default=False\n Compute contingency matrix internally with sparse matrix.\n\n Returns\n -------\n score : float\n The resulting Fowlkes-Mallows score.\n\n Examples\n --------\n\n Perfect labelings are both homogeneous and complete, hence have\n score 1.0::\n\n >>> from sklearn.metrics.cluster import fowlkes_mallows_score\n >>> fowlkes_mallows_score([0, 0, 1, 1], [0, 0, 1, 1])\n 1.0\n >>> fowlkes_mallows_score([0, 0, 1, 1], [1, 1, 0, 0])\n 1.0\n\n If classes members are completely split across different clusters,\n the assignment is totally random, hence the FMI is null::\n\n >>> fowlkes_mallows_score([0, 0, 0, 0], [0, 1, 2, 3])\n 0.0\n\n References\n ----------\n .. [1] `E. B. Fowkles and C. L. Mallows, 1983. \"A method for comparing two\n hierarchical clusterings\". Journal of the American Statistical\n Association\n `_\n\n .. [2] `Wikipedia entry for the Fowlkes-Mallows Index\n `_\n \"\"\"\n labels_true, labels_pred = check_clusterings(labels_true, labels_pred)\n n_samples, = labels_true.shape\n\n c = contingency_matrix(labels_true, labels_pred,\n sparse=True)\n c = c.astype(np.int64, **_astype_copy_false(c))\n tk = np.dot(c.data, c.data) - n_samples\n pk = np.sum(np.asarray(c.sum(axis=0)).ravel() ** 2) - n_samples\n qk = np.sum(np.asarray(c.sum(axis=1)).ravel() ** 2) - n_samples\n return np.sqrt(tk / pk) * np.sqrt(tk / qk) if tk != 0. else 0." + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/homogeneity_completeness_v_measure", + "name": "homogeneity_completeness_v_measure", + "qname": "sklearn.metrics.cluster._supervised.homogeneity_completeness_v_measure", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/homogeneity_completeness_v_measure/labels_true", + "name": "labels_true", + "qname": "sklearn.metrics.cluster._supervised.homogeneity_completeness_v_measure.labels_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int array, shape = [n_samples]", + "default_value": "", + "description": "ground truth class labels to be used as a reference" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int array" + }, + { + "kind": "NamedType", + "name": "shape = [n_samples]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/homogeneity_completeness_v_measure/labels_pred", + "name": "labels_pred", + "qname": "sklearn.metrics.cluster._supervised.homogeneity_completeness_v_measure.labels_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "cluster labels to evaluate" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/homogeneity_completeness_v_measure/beta", + "name": "beta", + "qname": "sklearn.metrics.cluster._supervised.homogeneity_completeness_v_measure.beta", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Ratio of weight attributed to ``homogeneity`` vs ``completeness``.\nIf ``beta`` is greater than 1, ``completeness`` is weighted more\nstrongly in the calculation. If ``beta`` is less than 1,\n``homogeneity`` is weighted more strongly." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the homogeneity and completeness and V-Measure scores at once.\n\nThose metrics are based on normalized conditional entropy measures of\nthe clustering labeling to evaluate given the knowledge of a Ground\nTruth class labels of the same samples.\n\nA clustering result satisfies homogeneity if all of its clusters\ncontain only data points which are members of a single class.\n\nA clustering result satisfies completeness if all the data points\nthat are members of a given class are elements of the same cluster.\n\nBoth scores have positive values between 0.0 and 1.0, larger values\nbeing desirable.\n\nThose 3 metrics are independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore values in any way.\n\nV-Measure is furthermore symmetric: swapping ``labels_true`` and\n``label_pred`` will give the same score. This does not hold for\nhomogeneity and completeness. V-Measure is identical to\n:func:`normalized_mutual_info_score` with the arithmetic averaging\nmethod.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute the homogeneity and completeness and V-Measure scores at once.\n\nThose metrics are based on normalized conditional entropy measures of\nthe clustering labeling to evaluate given the knowledge of a Ground\nTruth class labels of the same samples.\n\nA clustering result satisfies homogeneity if all of its clusters\ncontain only data points which are members of a single class.\n\nA clustering result satisfies completeness if all the data points\nthat are members of a given class are elements of the same cluster.\n\nBoth scores have positive values between 0.0 and 1.0, larger values\nbeing desirable.\n\nThose 3 metrics are independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore values in any way.\n\nV-Measure is furthermore symmetric: swapping ``labels_true`` and\n``label_pred`` will give the same score. This does not hold for\nhomogeneity and completeness. V-Measure is identical to\n:func:`normalized_mutual_info_score` with the arithmetic averaging\nmethod.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n ground truth class labels to be used as a reference\n\nlabels_pred : array-like of shape (n_samples,)\n cluster labels to evaluate\n\nbeta : float, default=1.0\n Ratio of weight attributed to ``homogeneity`` vs ``completeness``.\n If ``beta`` is greater than 1, ``completeness`` is weighted more\n strongly in the calculation. If ``beta`` is less than 1,\n ``homogeneity`` is weighted more strongly.\n\nReturns\n-------\nhomogeneity : float\n score between 0.0 and 1.0. 1.0 stands for perfectly homogeneous labeling\n\ncompleteness : float\n score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling\n\nv_measure : float\n harmonic mean of the first two\n\nSee Also\n--------\nhomogeneity_score\ncompleteness_score\nv_measure_score", + "code": "@_deprecate_positional_args\ndef homogeneity_completeness_v_measure(labels_true, labels_pred, *, beta=1.0):\n \"\"\"Compute the homogeneity and completeness and V-Measure scores at once.\n\n Those metrics are based on normalized conditional entropy measures of\n the clustering labeling to evaluate given the knowledge of a Ground\n Truth class labels of the same samples.\n\n A clustering result satisfies homogeneity if all of its clusters\n contain only data points which are members of a single class.\n\n A clustering result satisfies completeness if all the data points\n that are members of a given class are elements of the same cluster.\n\n Both scores have positive values between 0.0 and 1.0, larger values\n being desirable.\n\n Those 3 metrics are independent of the absolute values of the labels:\n a permutation of the class or cluster label values won't change the\n score values in any way.\n\n V-Measure is furthermore symmetric: swapping ``labels_true`` and\n ``label_pred`` will give the same score. This does not hold for\n homogeneity and completeness. V-Measure is identical to\n :func:`normalized_mutual_info_score` with the arithmetic averaging\n method.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n labels_true : int array, shape = [n_samples]\n ground truth class labels to be used as a reference\n\n labels_pred : array-like of shape (n_samples,)\n cluster labels to evaluate\n\n beta : float, default=1.0\n Ratio of weight attributed to ``homogeneity`` vs ``completeness``.\n If ``beta`` is greater than 1, ``completeness`` is weighted more\n strongly in the calculation. If ``beta`` is less than 1,\n ``homogeneity`` is weighted more strongly.\n\n Returns\n -------\n homogeneity : float\n score between 0.0 and 1.0. 1.0 stands for perfectly homogeneous labeling\n\n completeness : float\n score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling\n\n v_measure : float\n harmonic mean of the first two\n\n See Also\n --------\n homogeneity_score\n completeness_score\n v_measure_score\n \"\"\"\n labels_true, labels_pred = check_clusterings(labels_true, labels_pred)\n\n if len(labels_true) == 0:\n return 1.0, 1.0, 1.0\n\n entropy_C = entropy(labels_true)\n entropy_K = entropy(labels_pred)\n\n contingency = contingency_matrix(labels_true, labels_pred, sparse=True)\n MI = mutual_info_score(None, None, contingency=contingency)\n\n homogeneity = MI / (entropy_C) if entropy_C else 1.0\n completeness = MI / (entropy_K) if entropy_K else 1.0\n\n if homogeneity + completeness == 0.0:\n v_measure_score = 0.0\n else:\n v_measure_score = ((1 + beta) * homogeneity * completeness\n / (beta * homogeneity + completeness))\n\n return homogeneity, completeness, v_measure_score" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/homogeneity_score", + "name": "homogeneity_score", + "qname": "sklearn.metrics.cluster._supervised.homogeneity_score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/homogeneity_score/labels_true", + "name": "labels_true", + "qname": "sklearn.metrics.cluster._supervised.homogeneity_score.labels_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int array, shape = [n_samples]", + "default_value": "", + "description": "ground truth class labels to be used as a reference" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int array" + }, + { + "kind": "NamedType", + "name": "shape = [n_samples]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/homogeneity_score/labels_pred", + "name": "labels_pred", + "qname": "sklearn.metrics.cluster._supervised.homogeneity_score.labels_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "cluster labels to evaluate" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Homogeneity metric of a cluster labeling given a ground truth.\n\nA clustering result satisfies homogeneity if all of its clusters\ncontain only data points which are members of a single class.\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is not symmetric: switching ``label_true`` with ``label_pred``\nwill return the :func:`completeness_score` which will be different in\ngeneral.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Homogeneity metric of a cluster labeling given a ground truth.\n\nA clustering result satisfies homogeneity if all of its clusters\ncontain only data points which are members of a single class.\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is not symmetric: switching ``label_true`` with ``label_pred``\nwill return the :func:`completeness_score` which will be different in\ngeneral.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n ground truth class labels to be used as a reference\n\nlabels_pred : array-like of shape (n_samples,)\n cluster labels to evaluate\n\nReturns\n-------\nhomogeneity : float\n score between 0.0 and 1.0. 1.0 stands for perfectly homogeneous labeling\n\nReferences\n----------\n\n.. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A\n conditional entropy-based external cluster evaluation measure\n `_\n\nSee Also\n--------\ncompleteness_score\nv_measure_score\n\nExamples\n--------\n\nPerfect labelings are homogeneous::\n\n >>> from sklearn.metrics.cluster import homogeneity_score\n >>> homogeneity_score([0, 0, 1, 1], [1, 1, 0, 0])\n 1.0\n\nNon-perfect labelings that further split classes into more clusters can be\nperfectly homogeneous::\n\n >>> print(\"%.6f\" % homogeneity_score([0, 0, 1, 1], [0, 0, 1, 2]))\n 1.000000\n >>> print(\"%.6f\" % homogeneity_score([0, 0, 1, 1], [0, 1, 2, 3]))\n 1.000000\n\nClusters that include samples from different classes do not make for an\nhomogeneous labeling::\n\n >>> print(\"%.6f\" % homogeneity_score([0, 0, 1, 1], [0, 1, 0, 1]))\n 0.0...\n >>> print(\"%.6f\" % homogeneity_score([0, 0, 1, 1], [0, 0, 0, 0]))\n 0.0...", + "code": "def homogeneity_score(labels_true, labels_pred):\n \"\"\"Homogeneity metric of a cluster labeling given a ground truth.\n\n A clustering result satisfies homogeneity if all of its clusters\n contain only data points which are members of a single class.\n\n This metric is independent of the absolute values of the labels:\n a permutation of the class or cluster label values won't change the\n score value in any way.\n\n This metric is not symmetric: switching ``label_true`` with ``label_pred``\n will return the :func:`completeness_score` which will be different in\n general.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n labels_true : int array, shape = [n_samples]\n ground truth class labels to be used as a reference\n\n labels_pred : array-like of shape (n_samples,)\n cluster labels to evaluate\n\n Returns\n -------\n homogeneity : float\n score between 0.0 and 1.0. 1.0 stands for perfectly homogeneous labeling\n\n References\n ----------\n\n .. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A\n conditional entropy-based external cluster evaluation measure\n `_\n\n See Also\n --------\n completeness_score\n v_measure_score\n\n Examples\n --------\n\n Perfect labelings are homogeneous::\n\n >>> from sklearn.metrics.cluster import homogeneity_score\n >>> homogeneity_score([0, 0, 1, 1], [1, 1, 0, 0])\n 1.0\n\n Non-perfect labelings that further split classes into more clusters can be\n perfectly homogeneous::\n\n >>> print(\"%.6f\" % homogeneity_score([0, 0, 1, 1], [0, 0, 1, 2]))\n 1.000000\n >>> print(\"%.6f\" % homogeneity_score([0, 0, 1, 1], [0, 1, 2, 3]))\n 1.000000\n\n Clusters that include samples from different classes do not make for an\n homogeneous labeling::\n\n >>> print(\"%.6f\" % homogeneity_score([0, 0, 1, 1], [0, 1, 0, 1]))\n 0.0...\n >>> print(\"%.6f\" % homogeneity_score([0, 0, 1, 1], [0, 0, 0, 0]))\n 0.0...\n \"\"\"\n return homogeneity_completeness_v_measure(labels_true, labels_pred)[0]" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/mutual_info_score", + "name": "mutual_info_score", + "qname": "sklearn.metrics.cluster._supervised.mutual_info_score", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/mutual_info_score/labels_true", + "name": "labels_true", + "qname": "sklearn.metrics.cluster._supervised.mutual_info_score.labels_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int array, shape = [n_samples]", + "default_value": "", + "description": "A clustering of the data into disjoint subsets." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int array" + }, + { + "kind": "NamedType", + "name": "shape = [n_samples]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/mutual_info_score/labels_pred", + "name": "labels_pred", + "qname": "sklearn.metrics.cluster._supervised.mutual_info_score.labels_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int array-like of shape (n_samples,)", + "default_value": "", + "description": "A clustering of the data into disjoint subsets." + }, + "type": { + "kind": "NamedType", + "name": "int array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/mutual_info_score/contingency", + "name": "contingency", + "qname": "sklearn.metrics.cluster._supervised.mutual_info_score.contingency", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{ndarray, sparse matrix} of shape (n_classes_true, n_classes_pred)", + "default_value": "None", + "description": "A contingency matrix given by the :func:`contingency_matrix` function.\nIf value is ``None``, it will be computed, otherwise the given value is\nused, with ``labels_true`` and ``labels_pred`` ignored." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_classes_true, n_classes_pred)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Mutual Information between two clusterings.\n\nThe Mutual Information is a measure of the similarity between two labels of\nthe same data. Where :math:`|U_i|` is the number of the samples\nin cluster :math:`U_i` and :math:`|V_j|` is the number of the\nsamples in cluster :math:`V_j`, the Mutual Information\nbetween clusterings :math:`U` and :math:`V` is given as:\n\n.. math::\n\n MI(U,V)=\\sum_{i=1}^{|U|} \\sum_{j=1}^{|V|} \\frac{|U_i\\cap V_j|}{N}\n \\log\\frac{N|U_i \\cap V_j|}{|U_i||V_j|}\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching ``label_true`` with\n``label_pred`` will return the same score value. This can be useful to\nmeasure the agreement of two independent label assignments strategies\non the same dataset when the real ground truth is not known.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Mutual Information between two clusterings.\n\nThe Mutual Information is a measure of the similarity between two labels of\nthe same data. Where :math:`|U_i|` is the number of the samples\nin cluster :math:`U_i` and :math:`|V_j|` is the number of the\nsamples in cluster :math:`V_j`, the Mutual Information\nbetween clusterings :math:`U` and :math:`V` is given as:\n\n.. math::\n\n MI(U,V)=\\sum_{i=1}^{|U|} \\sum_{j=1}^{|V|} \\frac{|U_i\\cap V_j|}{N}\n \\log\\frac{N|U_i \\cap V_j|}{|U_i||V_j|}\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching ``label_true`` with\n``label_pred`` will return the same score value. This can be useful to\nmeasure the agreement of two independent label assignments strategies\non the same dataset when the real ground truth is not known.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n A clustering of the data into disjoint subsets.\n\nlabels_pred : int array-like of shape (n_samples,)\n A clustering of the data into disjoint subsets.\n\ncontingency : {ndarray, sparse matrix} of shape (n_classes_true, n_classes_pred), default=None\n A contingency matrix given by the :func:`contingency_matrix` function.\n If value is ``None``, it will be computed, otherwise the given value is\n used, with ``labels_true`` and ``labels_pred`` ignored.\n\nReturns\n-------\nmi : float\n Mutual information, a non-negative value\n\nNotes\n-----\nThe logarithm used is the natural logarithm (base-e).\n\nSee Also\n--------\nadjusted_mutual_info_score : Adjusted against chance Mutual Information.\nnormalized_mutual_info_score : Normalized Mutual Information.", + "code": "@_deprecate_positional_args\ndef mutual_info_score(labels_true, labels_pred, *, contingency=None):\n \"\"\"Mutual Information between two clusterings.\n\n The Mutual Information is a measure of the similarity between two labels of\n the same data. Where :math:`|U_i|` is the number of the samples\n in cluster :math:`U_i` and :math:`|V_j|` is the number of the\n samples in cluster :math:`V_j`, the Mutual Information\n between clusterings :math:`U` and :math:`V` is given as:\n\n .. math::\n\n MI(U,V)=\\\\sum_{i=1}^{|U|} \\\\sum_{j=1}^{|V|} \\\\frac{|U_i\\\\cap V_j|}{N}\n \\\\log\\\\frac{N|U_i \\\\cap V_j|}{|U_i||V_j|}\n\n This metric is independent of the absolute values of the labels:\n a permutation of the class or cluster label values won't change the\n score value in any way.\n\n This metric is furthermore symmetric: switching ``label_true`` with\n ``label_pred`` will return the same score value. This can be useful to\n measure the agreement of two independent label assignments strategies\n on the same dataset when the real ground truth is not known.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n labels_true : int array, shape = [n_samples]\n A clustering of the data into disjoint subsets.\n\n labels_pred : int array-like of shape (n_samples,)\n A clustering of the data into disjoint subsets.\n\n contingency : {ndarray, sparse matrix} of shape \\\n (n_classes_true, n_classes_pred), default=None\n A contingency matrix given by the :func:`contingency_matrix` function.\n If value is ``None``, it will be computed, otherwise the given value is\n used, with ``labels_true`` and ``labels_pred`` ignored.\n\n Returns\n -------\n mi : float\n Mutual information, a non-negative value\n\n Notes\n -----\n The logarithm used is the natural logarithm (base-e).\n\n See Also\n --------\n adjusted_mutual_info_score : Adjusted against chance Mutual Information.\n normalized_mutual_info_score : Normalized Mutual Information.\n \"\"\"\n if contingency is None:\n labels_true, labels_pred = check_clusterings(labels_true, labels_pred)\n contingency = contingency_matrix(labels_true, labels_pred, sparse=True)\n else:\n contingency = check_array(contingency,\n accept_sparse=['csr', 'csc', 'coo'],\n dtype=[int, np.int32, np.int64])\n\n if isinstance(contingency, np.ndarray):\n # For an array\n nzx, nzy = np.nonzero(contingency)\n nz_val = contingency[nzx, nzy]\n elif sp.issparse(contingency):\n # For a sparse matrix\n nzx, nzy, nz_val = sp.find(contingency)\n else:\n raise ValueError(\"Unsupported type for 'contingency': %s\" %\n type(contingency))\n\n contingency_sum = contingency.sum()\n pi = np.ravel(contingency.sum(axis=1))\n pj = np.ravel(contingency.sum(axis=0))\n log_contingency_nm = np.log(nz_val)\n contingency_nm = nz_val / contingency_sum\n # Don't need to calculate the full outer product, just for non-zeroes\n outer = (pi.take(nzx).astype(np.int64, copy=False)\n * pj.take(nzy).astype(np.int64, copy=False))\n log_outer = -np.log(outer) + log(pi.sum()) + log(pj.sum())\n mi = (contingency_nm * (log_contingency_nm - log(contingency_sum)) +\n contingency_nm * log_outer)\n mi = np.where(np.abs(mi) < np.finfo(mi.dtype).eps, 0.0, mi)\n return np.clip(mi.sum(), 0.0, None)" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/normalized_mutual_info_score", + "name": "normalized_mutual_info_score", + "qname": "sklearn.metrics.cluster._supervised.normalized_mutual_info_score", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/normalized_mutual_info_score/labels_true", + "name": "labels_true", + "qname": "sklearn.metrics.cluster._supervised.normalized_mutual_info_score.labels_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int array, shape = [n_samples]", + "default_value": "", + "description": "A clustering of the data into disjoint subsets." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int array" + }, + { + "kind": "NamedType", + "name": "shape = [n_samples]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/normalized_mutual_info_score/labels_pred", + "name": "labels_pred", + "qname": "sklearn.metrics.cluster._supervised.normalized_mutual_info_score.labels_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int array-like of shape (n_samples,)", + "default_value": "", + "description": "A clustering of the data into disjoint subsets." + }, + "type": { + "kind": "NamedType", + "name": "int array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/normalized_mutual_info_score/average_method", + "name": "average_method", + "qname": "sklearn.metrics.cluster._supervised.normalized_mutual_info_score.average_method", + "default_value": "'arithmetic'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "'arithmetic'", + "description": "How to compute the normalizer in the denominator. Possible options\nare 'min', 'geometric', 'arithmetic', and 'max'.\n\n.. versionadded:: 0.20\n\n.. versionchanged:: 0.22\n The default value of ``average_method`` changed from 'geometric' to\n 'arithmetic'." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Normalized Mutual Information between two clusterings.\n\nNormalized Mutual Information (NMI) is a normalization of the Mutual\nInformation (MI) score to scale the results between 0 (no mutual\ninformation) and 1 (perfect correlation). In this function, mutual\ninformation is normalized by some generalized mean of ``H(labels_true)``\nand ``H(labels_pred))``, defined by the `average_method`.\n\nThis measure is not adjusted for chance. Therefore\n:func:`adjusted_mutual_info_score` might be preferred.\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching ``label_true`` with\n``label_pred`` will return the same score value. This can be useful to\nmeasure the agreement of two independent label assignments strategies\non the same dataset when the real ground truth is not known.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Normalized Mutual Information between two clusterings.\n\nNormalized Mutual Information (NMI) is a normalization of the Mutual\nInformation (MI) score to scale the results between 0 (no mutual\ninformation) and 1 (perfect correlation). In this function, mutual\ninformation is normalized by some generalized mean of ``H(labels_true)``\nand ``H(labels_pred))``, defined by the `average_method`.\n\nThis measure is not adjusted for chance. Therefore\n:func:`adjusted_mutual_info_score` might be preferred.\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching ``label_true`` with\n``label_pred`` will return the same score value. This can be useful to\nmeasure the agreement of two independent label assignments strategies\non the same dataset when the real ground truth is not known.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n A clustering of the data into disjoint subsets.\n\nlabels_pred : int array-like of shape (n_samples,)\n A clustering of the data into disjoint subsets.\n\naverage_method : str, default='arithmetic'\n How to compute the normalizer in the denominator. Possible options\n are 'min', 'geometric', 'arithmetic', and 'max'.\n\n .. versionadded:: 0.20\n\n .. versionchanged:: 0.22\n The default value of ``average_method`` changed from 'geometric' to\n 'arithmetic'.\n\nReturns\n-------\nnmi : float\n score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling\n\nSee Also\n--------\nv_measure_score : V-Measure (NMI with arithmetic mean option).\nadjusted_rand_score : Adjusted Rand Index.\nadjusted_mutual_info_score : Adjusted Mutual Information (adjusted\n against chance).\n\nExamples\n--------\n\nPerfect labelings are both homogeneous and complete, hence have\nscore 1.0::\n\n >>> from sklearn.metrics.cluster import normalized_mutual_info_score\n >>> normalized_mutual_info_score([0, 0, 1, 1], [0, 0, 1, 1])\n ... # doctest: +SKIP\n 1.0\n >>> normalized_mutual_info_score([0, 0, 1, 1], [1, 1, 0, 0])\n ... # doctest: +SKIP\n 1.0\n\nIf classes members are completely split across different clusters,\nthe assignment is totally in-complete, hence the NMI is null::\n\n >>> normalized_mutual_info_score([0, 0, 0, 0], [0, 1, 2, 3])\n ... # doctest: +SKIP\n 0.0", + "code": "@_deprecate_positional_args\ndef normalized_mutual_info_score(labels_true, labels_pred, *,\n average_method='arithmetic'):\n \"\"\"Normalized Mutual Information between two clusterings.\n\n Normalized Mutual Information (NMI) is a normalization of the Mutual\n Information (MI) score to scale the results between 0 (no mutual\n information) and 1 (perfect correlation). In this function, mutual\n information is normalized by some generalized mean of ``H(labels_true)``\n and ``H(labels_pred))``, defined by the `average_method`.\n\n This measure is not adjusted for chance. Therefore\n :func:`adjusted_mutual_info_score` might be preferred.\n\n This metric is independent of the absolute values of the labels:\n a permutation of the class or cluster label values won't change the\n score value in any way.\n\n This metric is furthermore symmetric: switching ``label_true`` with\n ``label_pred`` will return the same score value. This can be useful to\n measure the agreement of two independent label assignments strategies\n on the same dataset when the real ground truth is not known.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n labels_true : int array, shape = [n_samples]\n A clustering of the data into disjoint subsets.\n\n labels_pred : int array-like of shape (n_samples,)\n A clustering of the data into disjoint subsets.\n\n average_method : str, default='arithmetic'\n How to compute the normalizer in the denominator. Possible options\n are 'min', 'geometric', 'arithmetic', and 'max'.\n\n .. versionadded:: 0.20\n\n .. versionchanged:: 0.22\n The default value of ``average_method`` changed from 'geometric' to\n 'arithmetic'.\n\n Returns\n -------\n nmi : float\n score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling\n\n See Also\n --------\n v_measure_score : V-Measure (NMI with arithmetic mean option).\n adjusted_rand_score : Adjusted Rand Index.\n adjusted_mutual_info_score : Adjusted Mutual Information (adjusted\n against chance).\n\n Examples\n --------\n\n Perfect labelings are both homogeneous and complete, hence have\n score 1.0::\n\n >>> from sklearn.metrics.cluster import normalized_mutual_info_score\n >>> normalized_mutual_info_score([0, 0, 1, 1], [0, 0, 1, 1])\n ... # doctest: +SKIP\n 1.0\n >>> normalized_mutual_info_score([0, 0, 1, 1], [1, 1, 0, 0])\n ... # doctest: +SKIP\n 1.0\n\n If classes members are completely split across different clusters,\n the assignment is totally in-complete, hence the NMI is null::\n\n >>> normalized_mutual_info_score([0, 0, 0, 0], [0, 1, 2, 3])\n ... # doctest: +SKIP\n 0.0\n \"\"\"\n labels_true, labels_pred = check_clusterings(labels_true, labels_pred)\n classes = np.unique(labels_true)\n clusters = np.unique(labels_pred)\n\n # Special limit cases: no clustering since the data is not split.\n # This is a perfect match hence return 1.0.\n if (classes.shape[0] == clusters.shape[0] == 1 or\n classes.shape[0] == clusters.shape[0] == 0):\n return 1.0\n contingency = contingency_matrix(labels_true, labels_pred, sparse=True)\n contingency = contingency.astype(np.float64,\n **_astype_copy_false(contingency))\n # Calculate the MI for the two clusterings\n mi = mutual_info_score(labels_true, labels_pred,\n contingency=contingency)\n # Calculate the expected value for the mutual information\n # Calculate entropy for each labeling\n h_true, h_pred = entropy(labels_true), entropy(labels_pred)\n normalizer = _generalized_average(h_true, h_pred, average_method)\n # Avoid 0.0 / 0.0 when either entropy is zero.\n normalizer = max(normalizer, np.finfo('float64').eps)\n nmi = mi / normalizer\n return nmi" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/pair_confusion_matrix", + "name": "pair_confusion_matrix", + "qname": "sklearn.metrics.cluster._supervised.pair_confusion_matrix", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/pair_confusion_matrix/labels_true", + "name": "labels_true", + "qname": "sklearn.metrics.cluster._supervised.pair_confusion_matrix.labels_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,), dtype=integral", + "default_value": "", + "description": "Ground truth class labels to be used as a reference." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + }, + { + "kind": "NamedType", + "name": "dtype=integral" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/pair_confusion_matrix/labels_pred", + "name": "labels_pred", + "qname": "sklearn.metrics.cluster._supervised.pair_confusion_matrix.labels_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,), dtype=integral", + "default_value": "", + "description": "Cluster labels to evaluate." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + }, + { + "kind": "NamedType", + "name": "dtype=integral" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Pair confusion matrix arising from two clusterings.\n\nThe pair confusion matrix :math:`C` computes a 2 by 2 similarity matrix\nbetween two clusterings by considering all pairs of samples and counting\npairs that are assigned into the same or into different clusters under\nthe true and predicted clusterings.\n\nConsidering a pair of samples that is clustered together a positive pair,\nthen as in binary classification the count of true negatives is\n:math:`C_{00}`, false negatives is :math:`C_{10}`, true positives is\n:math:`C_{11}` and false positives is :math:`C_{01}`.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Pair confusion matrix arising from two clusterings.\n\nThe pair confusion matrix :math:`C` computes a 2 by 2 similarity matrix\nbetween two clusterings by considering all pairs of samples and counting\npairs that are assigned into the same or into different clusters under\nthe true and predicted clusterings.\n\nConsidering a pair of samples that is clustered together a positive pair,\nthen as in binary classification the count of true negatives is\n:math:`C_{00}`, false negatives is :math:`C_{10}`, true positives is\n:math:`C_{11}` and false positives is :math:`C_{01}`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : array-like of shape (n_samples,), dtype=integral\n Ground truth class labels to be used as a reference.\n\nlabels_pred : array-like of shape (n_samples,), dtype=integral\n Cluster labels to evaluate.\n\nReturns\n-------\nC : ndarray of shape (2, 2), dtype=np.int64\n The contingency matrix.\n\nSee Also\n--------\nrand_score: Rand Score\nadjusted_rand_score: Adjusted Rand Score\nadjusted_mutual_info_score: Adjusted Mutual Information\n\nExamples\n--------\nPerfectly matching labelings have all non-zero entries on the\ndiagonal regardless of actual label values:\n\n >>> from sklearn.metrics.cluster import pair_confusion_matrix\n >>> pair_confusion_matrix([0, 0, 1, 1], [1, 1, 0, 0])\n array([[8, 0],\n [0, 4]]...\n\nLabelings that assign all classes members to the same clusters\nare complete but may be not always pure, hence penalized, and\nhave some off-diagonal non-zero entries:\n\n >>> pair_confusion_matrix([0, 0, 1, 2], [0, 0, 1, 1])\n array([[8, 2],\n [0, 2]]...\n\nNote that the matrix is not symmetric.\n\nReferences\n----------\n.. L. Hubert and P. Arabie, Comparing Partitions, Journal of\n Classification 1985\n https://link.springer.com/article/10.1007%2FBF01908075", + "code": "def pair_confusion_matrix(labels_true, labels_pred):\n \"\"\"Pair confusion matrix arising from two clusterings.\n\n The pair confusion matrix :math:`C` computes a 2 by 2 similarity matrix\n between two clusterings by considering all pairs of samples and counting\n pairs that are assigned into the same or into different clusters under\n the true and predicted clusterings.\n\n Considering a pair of samples that is clustered together a positive pair,\n then as in binary classification the count of true negatives is\n :math:`C_{00}`, false negatives is :math:`C_{10}`, true positives is\n :math:`C_{11}` and false positives is :math:`C_{01}`.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n labels_true : array-like of shape (n_samples,), dtype=integral\n Ground truth class labels to be used as a reference.\n\n labels_pred : array-like of shape (n_samples,), dtype=integral\n Cluster labels to evaluate.\n\n Returns\n -------\n C : ndarray of shape (2, 2), dtype=np.int64\n The contingency matrix.\n\n See Also\n --------\n rand_score: Rand Score\n adjusted_rand_score: Adjusted Rand Score\n adjusted_mutual_info_score: Adjusted Mutual Information\n\n Examples\n --------\n Perfectly matching labelings have all non-zero entries on the\n diagonal regardless of actual label values:\n\n >>> from sklearn.metrics.cluster import pair_confusion_matrix\n >>> pair_confusion_matrix([0, 0, 1, 1], [1, 1, 0, 0])\n array([[8, 0],\n [0, 4]]...\n\n Labelings that assign all classes members to the same clusters\n are complete but may be not always pure, hence penalized, and\n have some off-diagonal non-zero entries:\n\n >>> pair_confusion_matrix([0, 0, 1, 2], [0, 0, 1, 1])\n array([[8, 2],\n [0, 2]]...\n\n Note that the matrix is not symmetric.\n\n References\n ----------\n .. L. Hubert and P. Arabie, Comparing Partitions, Journal of\n Classification 1985\n https://link.springer.com/article/10.1007%2FBF01908075\n \"\"\"\n labels_true, labels_pred = check_clusterings(labels_true, labels_pred)\n n_samples = np.int64(labels_true.shape[0])\n\n # Computation using the contingency data\n contingency = contingency_matrix(\n labels_true, labels_pred, sparse=True, dtype=np.int64\n )\n n_c = np.ravel(contingency.sum(axis=1))\n n_k = np.ravel(contingency.sum(axis=0))\n sum_squares = (contingency.data ** 2).sum()\n C = np.empty((2, 2), dtype=np.int64)\n C[1, 1] = sum_squares - n_samples\n C[0, 1] = contingency.dot(n_k).sum() - sum_squares\n C[1, 0] = contingency.transpose().dot(n_c).sum() - sum_squares\n C[0, 0] = n_samples ** 2 - C[0, 1] - C[1, 0] - sum_squares\n return C" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/rand_score", + "name": "rand_score", + "qname": "sklearn.metrics.cluster._supervised.rand_score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/rand_score/labels_true", + "name": "labels_true", + "qname": "sklearn.metrics.cluster._supervised.rand_score.labels_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,), dtype=integral", + "default_value": "", + "description": "Ground truth class labels to be used as a reference." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + }, + { + "kind": "NamedType", + "name": "dtype=integral" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/rand_score/labels_pred", + "name": "labels_pred", + "qname": "sklearn.metrics.cluster._supervised.rand_score.labels_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,), dtype=integral", + "default_value": "", + "description": "Cluster labels to evaluate." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + }, + { + "kind": "NamedType", + "name": "dtype=integral" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Rand index.\n\nThe Rand Index computes a similarity measure between two clusterings\nby considering all pairs of samples and counting pairs that are\nassigned in the same or different clusters in the predicted and\ntrue clusterings.\n\nThe raw RI score is:\n\n RI = (number of agreeing pairs) / (number of pairs)\n\nRead more in the :ref:`User Guide `.", + "docstring": "Rand index.\n\nThe Rand Index computes a similarity measure between two clusterings\nby considering all pairs of samples and counting pairs that are\nassigned in the same or different clusters in the predicted and\ntrue clusterings.\n\nThe raw RI score is:\n\n RI = (number of agreeing pairs) / (number of pairs)\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : array-like of shape (n_samples,), dtype=integral\n Ground truth class labels to be used as a reference.\n\nlabels_pred : array-like of shape (n_samples,), dtype=integral\n Cluster labels to evaluate.\n\nReturns\n-------\nRI : float\n Similarity score between 0.0 and 1.0, inclusive, 1.0 stands for\n perfect match.\n\nSee Also\n--------\nadjusted_rand_score: Adjusted Rand Score\nadjusted_mutual_info_score: Adjusted Mutual Information\n\nExamples\n--------\nPerfectly matching labelings have a score of 1 even\n\n >>> from sklearn.metrics.cluster import rand_score\n >>> rand_score([0, 0, 1, 1], [1, 1, 0, 0])\n 1.0\n\nLabelings that assign all classes members to the same clusters\nare complete but may not always be pure, hence penalized:\n\n >>> rand_score([0, 0, 1, 2], [0, 0, 1, 1])\n 0.83...\n\nReferences\n----------\n.. L. Hubert and P. Arabie, Comparing Partitions, Journal of\n Classification 1985\n https://link.springer.com/article/10.1007%2FBF01908075\n\n.. https://en.wikipedia.org/wiki/Simple_matching_coefficient\n\n.. https://en.wikipedia.org/wiki/Rand_index", + "code": "def rand_score(labels_true, labels_pred):\n \"\"\"Rand index.\n\n The Rand Index computes a similarity measure between two clusterings\n by considering all pairs of samples and counting pairs that are\n assigned in the same or different clusters in the predicted and\n true clusterings.\n\n The raw RI score is:\n\n RI = (number of agreeing pairs) / (number of pairs)\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n labels_true : array-like of shape (n_samples,), dtype=integral\n Ground truth class labels to be used as a reference.\n\n labels_pred : array-like of shape (n_samples,), dtype=integral\n Cluster labels to evaluate.\n\n Returns\n -------\n RI : float\n Similarity score between 0.0 and 1.0, inclusive, 1.0 stands for\n perfect match.\n\n See Also\n --------\n adjusted_rand_score: Adjusted Rand Score\n adjusted_mutual_info_score: Adjusted Mutual Information\n\n Examples\n --------\n Perfectly matching labelings have a score of 1 even\n\n >>> from sklearn.metrics.cluster import rand_score\n >>> rand_score([0, 0, 1, 1], [1, 1, 0, 0])\n 1.0\n\n Labelings that assign all classes members to the same clusters\n are complete but may not always be pure, hence penalized:\n\n >>> rand_score([0, 0, 1, 2], [0, 0, 1, 1])\n 0.83...\n\n References\n ----------\n .. L. Hubert and P. Arabie, Comparing Partitions, Journal of\n Classification 1985\n https://link.springer.com/article/10.1007%2FBF01908075\n\n .. https://en.wikipedia.org/wiki/Simple_matching_coefficient\n\n .. https://en.wikipedia.org/wiki/Rand_index\n \"\"\"\n contingency = pair_confusion_matrix(labels_true, labels_pred)\n numerator = contingency.diagonal().sum()\n denominator = contingency.sum()\n\n if numerator == denominator or denominator == 0:\n # Special limit cases: no clustering since the data is not split;\n # or trivial clustering where each document is assigned a unique\n # cluster. These are perfect matches hence return 1.0.\n return 1.0\n\n return numerator / denominator" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/v_measure_score", + "name": "v_measure_score", + "qname": "sklearn.metrics.cluster._supervised.v_measure_score", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/v_measure_score/labels_true", + "name": "labels_true", + "qname": "sklearn.metrics.cluster._supervised.v_measure_score.labels_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int array, shape = [n_samples]", + "default_value": "", + "description": "ground truth class labels to be used as a reference" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int array" + }, + { + "kind": "NamedType", + "name": "shape = [n_samples]" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/v_measure_score/labels_pred", + "name": "labels_pred", + "qname": "sklearn.metrics.cluster._supervised.v_measure_score.labels_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "cluster labels to evaluate" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._supervised/v_measure_score/beta", + "name": "beta", + "qname": "sklearn.metrics.cluster._supervised.v_measure_score.beta", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Ratio of weight attributed to ``homogeneity`` vs ``completeness``.\nIf ``beta`` is greater than 1, ``completeness`` is weighted more\nstrongly in the calculation. If ``beta`` is less than 1,\n``homogeneity`` is weighted more strongly." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "V-measure cluster labeling given a ground truth.\n\nThis score is identical to :func:`normalized_mutual_info_score` with\nthe ``'arithmetic'`` option for averaging.\n\nThe V-measure is the harmonic mean between homogeneity and completeness::\n\n v = (1 + beta) * homogeneity * completeness\n / (beta * homogeneity + completeness)\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching ``label_true`` with\n``label_pred`` will return the same score value. This can be useful to\nmeasure the agreement of two independent label assignments strategies\non the same dataset when the real ground truth is not known.\n\nRead more in the :ref:`User Guide `.", + "docstring": "V-measure cluster labeling given a ground truth.\n\nThis score is identical to :func:`normalized_mutual_info_score` with\nthe ``'arithmetic'`` option for averaging.\n\nThe V-measure is the harmonic mean between homogeneity and completeness::\n\n v = (1 + beta) * homogeneity * completeness\n / (beta * homogeneity + completeness)\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching ``label_true`` with\n``label_pred`` will return the same score value. This can be useful to\nmeasure the agreement of two independent label assignments strategies\non the same dataset when the real ground truth is not known.\n\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n ground truth class labels to be used as a reference\n\nlabels_pred : array-like of shape (n_samples,)\n cluster labels to evaluate\n\nbeta : float, default=1.0\n Ratio of weight attributed to ``homogeneity`` vs ``completeness``.\n If ``beta`` is greater than 1, ``completeness`` is weighted more\n strongly in the calculation. If ``beta`` is less than 1,\n ``homogeneity`` is weighted more strongly.\n\nReturns\n-------\nv_measure : float\n score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling\n\nReferences\n----------\n\n.. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A\n conditional entropy-based external cluster evaluation measure\n `_\n\nSee Also\n--------\nhomogeneity_score\ncompleteness_score\nnormalized_mutual_info_score\n\nExamples\n--------\n\nPerfect labelings are both homogeneous and complete, hence have score 1.0::\n\n >>> from sklearn.metrics.cluster import v_measure_score\n >>> v_measure_score([0, 0, 1, 1], [0, 0, 1, 1])\n 1.0\n >>> v_measure_score([0, 0, 1, 1], [1, 1, 0, 0])\n 1.0\n\nLabelings that assign all classes members to the same clusters\nare complete be not homogeneous, hence penalized::\n\n >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 2], [0, 0, 1, 1]))\n 0.8...\n >>> print(\"%.6f\" % v_measure_score([0, 1, 2, 3], [0, 0, 1, 1]))\n 0.66...\n\nLabelings that have pure clusters with members coming from the same\nclasses are homogeneous but un-necessary splits harms completeness\nand thus penalize V-measure as well::\n\n >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 0, 1, 2]))\n 0.8...\n >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 1, 2, 3]))\n 0.66...\n\nIf classes members are completely split across different clusters,\nthe assignment is totally incomplete, hence the V-Measure is null::\n\n >>> print(\"%.6f\" % v_measure_score([0, 0, 0, 0], [0, 1, 2, 3]))\n 0.0...\n\nClusters that include samples from totally different classes totally\ndestroy the homogeneity of the labeling, hence::\n\n >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 0, 0, 0]))\n 0.0...", + "code": "@_deprecate_positional_args\ndef v_measure_score(labels_true, labels_pred, *, beta=1.0):\n \"\"\"V-measure cluster labeling given a ground truth.\n\n This score is identical to :func:`normalized_mutual_info_score` with\n the ``'arithmetic'`` option for averaging.\n\n The V-measure is the harmonic mean between homogeneity and completeness::\n\n v = (1 + beta) * homogeneity * completeness\n / (beta * homogeneity + completeness)\n\n This metric is independent of the absolute values of the labels:\n a permutation of the class or cluster label values won't change the\n score value in any way.\n\n This metric is furthermore symmetric: switching ``label_true`` with\n ``label_pred`` will return the same score value. This can be useful to\n measure the agreement of two independent label assignments strategies\n on the same dataset when the real ground truth is not known.\n\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n labels_true : int array, shape = [n_samples]\n ground truth class labels to be used as a reference\n\n labels_pred : array-like of shape (n_samples,)\n cluster labels to evaluate\n\n beta : float, default=1.0\n Ratio of weight attributed to ``homogeneity`` vs ``completeness``.\n If ``beta`` is greater than 1, ``completeness`` is weighted more\n strongly in the calculation. If ``beta`` is less than 1,\n ``homogeneity`` is weighted more strongly.\n\n Returns\n -------\n v_measure : float\n score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling\n\n References\n ----------\n\n .. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A\n conditional entropy-based external cluster evaluation measure\n `_\n\n See Also\n --------\n homogeneity_score\n completeness_score\n normalized_mutual_info_score\n\n Examples\n --------\n\n Perfect labelings are both homogeneous and complete, hence have score 1.0::\n\n >>> from sklearn.metrics.cluster import v_measure_score\n >>> v_measure_score([0, 0, 1, 1], [0, 0, 1, 1])\n 1.0\n >>> v_measure_score([0, 0, 1, 1], [1, 1, 0, 0])\n 1.0\n\n Labelings that assign all classes members to the same clusters\n are complete be not homogeneous, hence penalized::\n\n >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 2], [0, 0, 1, 1]))\n 0.8...\n >>> print(\"%.6f\" % v_measure_score([0, 1, 2, 3], [0, 0, 1, 1]))\n 0.66...\n\n Labelings that have pure clusters with members coming from the same\n classes are homogeneous but un-necessary splits harms completeness\n and thus penalize V-measure as well::\n\n >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 0, 1, 2]))\n 0.8...\n >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 1, 2, 3]))\n 0.66...\n\n If classes members are completely split across different clusters,\n the assignment is totally incomplete, hence the V-Measure is null::\n\n >>> print(\"%.6f\" % v_measure_score([0, 0, 0, 0], [0, 1, 2, 3]))\n 0.0...\n\n Clusters that include samples from totally different classes totally\n destroy the homogeneity of the labeling, hence::\n\n >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 0, 0, 0]))\n 0.0...\n \"\"\"\n return homogeneity_completeness_v_measure(labels_true, labels_pred,\n beta=beta)[2]" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/_silhouette_reduce", + "name": "_silhouette_reduce", + "qname": "sklearn.metrics.cluster._unsupervised._silhouette_reduce", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/_silhouette_reduce/D_chunk", + "name": "D_chunk", + "qname": "sklearn.metrics.cluster._unsupervised._silhouette_reduce.D_chunk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_chunk_samples, n_samples)", + "default_value": "", + "description": "Precomputed distances for a chunk." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_chunk_samples, n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/_silhouette_reduce/start", + "name": "start", + "qname": "sklearn.metrics.cluster._unsupervised._silhouette_reduce.start", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "First index in the chunk." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/_silhouette_reduce/labels", + "name": "labels", + "qname": "sklearn.metrics.cluster._unsupervised._silhouette_reduce.labels", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Corresponding cluster labels, encoded as {0, ..., n_clusters-1}." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/_silhouette_reduce/label_freqs", + "name": "label_freqs", + "qname": "sklearn.metrics.cluster._unsupervised._silhouette_reduce.label_freqs", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "Distribution of cluster labels in ``labels``." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Accumulate silhouette statistics for vertical chunk of X.", + "docstring": "Accumulate silhouette statistics for vertical chunk of X.\n\nParameters\n----------\nD_chunk : array-like of shape (n_chunk_samples, n_samples)\n Precomputed distances for a chunk.\nstart : int\n First index in the chunk.\nlabels : array-like of shape (n_samples,)\n Corresponding cluster labels, encoded as {0, ..., n_clusters-1}.\nlabel_freqs : array-like\n Distribution of cluster labels in ``labels``.", + "code": "def _silhouette_reduce(D_chunk, start, labels, label_freqs):\n \"\"\"Accumulate silhouette statistics for vertical chunk of X.\n\n Parameters\n ----------\n D_chunk : array-like of shape (n_chunk_samples, n_samples)\n Precomputed distances for a chunk.\n start : int\n First index in the chunk.\n labels : array-like of shape (n_samples,)\n Corresponding cluster labels, encoded as {0, ..., n_clusters-1}.\n label_freqs : array-like\n Distribution of cluster labels in ``labels``.\n \"\"\"\n # accumulate distances from each sample to each cluster\n clust_dists = np.zeros((len(D_chunk), len(label_freqs)),\n dtype=D_chunk.dtype)\n for i in range(len(D_chunk)):\n clust_dists[i] += np.bincount(labels, weights=D_chunk[i],\n minlength=len(label_freqs))\n\n # intra_index selects intra-cluster distances within clust_dists\n intra_index = (np.arange(len(D_chunk)), labels[start:start + len(D_chunk)])\n # intra_clust_dists are averaged over cluster size outside this function\n intra_clust_dists = clust_dists[intra_index]\n # of the remaining distances we normalise and extract the minimum\n clust_dists[intra_index] = np.inf\n clust_dists /= label_freqs\n inter_clust_dists = clust_dists.min(axis=1)\n return intra_clust_dists, inter_clust_dists" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/calinski_harabasz_score", + "name": "calinski_harabasz_score", + "qname": "sklearn.metrics.cluster._unsupervised.calinski_harabasz_score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/calinski_harabasz_score/X", + "name": "X", + "qname": "sklearn.metrics.cluster._unsupervised.calinski_harabasz_score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "A list of ``n_features``-dimensional data points. Each row corresponds\nto a single data point." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/calinski_harabasz_score/labels", + "name": "labels", + "qname": "sklearn.metrics.cluster._unsupervised.calinski_harabasz_score.labels", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Predicted labels for each sample." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the Calinski and Harabasz score.\n\nIt is also known as the Variance Ratio Criterion.\n\nThe score is defined as ratio between the within-cluster dispersion and\nthe between-cluster dispersion.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute the Calinski and Harabasz score.\n\nIt is also known as the Variance Ratio Criterion.\n\nThe score is defined as ratio between the within-cluster dispersion and\nthe between-cluster dispersion.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n A list of ``n_features``-dimensional data points. Each row corresponds\n to a single data point.\n\nlabels : array-like of shape (n_samples,)\n Predicted labels for each sample.\n\nReturns\n-------\nscore : float\n The resulting Calinski-Harabasz score.\n\nReferences\n----------\n.. [1] `T. Calinski and J. Harabasz, 1974. \"A dendrite method for cluster\n analysis\". Communications in Statistics\n `_", + "code": "def calinski_harabasz_score(X, labels):\n \"\"\"Compute the Calinski and Harabasz score.\n\n It is also known as the Variance Ratio Criterion.\n\n The score is defined as ratio between the within-cluster dispersion and\n the between-cluster dispersion.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n A list of ``n_features``-dimensional data points. Each row corresponds\n to a single data point.\n\n labels : array-like of shape (n_samples,)\n Predicted labels for each sample.\n\n Returns\n -------\n score : float\n The resulting Calinski-Harabasz score.\n\n References\n ----------\n .. [1] `T. Calinski and J. Harabasz, 1974. \"A dendrite method for cluster\n analysis\". Communications in Statistics\n `_\n \"\"\"\n X, labels = check_X_y(X, labels)\n le = LabelEncoder()\n labels = le.fit_transform(labels)\n\n n_samples, _ = X.shape\n n_labels = len(le.classes_)\n\n check_number_of_labels(n_labels, n_samples)\n\n extra_disp, intra_disp = 0., 0.\n mean = np.mean(X, axis=0)\n for k in range(n_labels):\n cluster_k = X[labels == k]\n mean_k = np.mean(cluster_k, axis=0)\n extra_disp += len(cluster_k) * np.sum((mean_k - mean) ** 2)\n intra_disp += np.sum((cluster_k - mean_k) ** 2)\n\n return (1. if intra_disp == 0. else\n extra_disp * (n_samples - n_labels) /\n (intra_disp * (n_labels - 1.)))" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/check_number_of_labels", + "name": "check_number_of_labels", + "qname": "sklearn.metrics.cluster._unsupervised.check_number_of_labels", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/check_number_of_labels/n_labels", + "name": "n_labels", + "qname": "sklearn.metrics.cluster._unsupervised.check_number_of_labels.n_labels", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of labels." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/check_number_of_labels/n_samples", + "name": "n_samples", + "qname": "sklearn.metrics.cluster._unsupervised.check_number_of_labels.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of samples." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check that number of labels are valid.", + "docstring": "Check that number of labels are valid.\n\nParameters\n----------\nn_labels : int\n Number of labels.\n\nn_samples : int\n Number of samples.", + "code": "def check_number_of_labels(n_labels, n_samples):\n \"\"\"Check that number of labels are valid.\n\n Parameters\n ----------\n n_labels : int\n Number of labels.\n\n n_samples : int\n Number of samples.\n \"\"\"\n if not 1 < n_labels < n_samples:\n raise ValueError(\"Number of labels is %d. Valid values are 2 \"\n \"to n_samples - 1 (inclusive)\" % n_labels)" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/davies_bouldin_score", + "name": "davies_bouldin_score", + "qname": "sklearn.metrics.cluster._unsupervised.davies_bouldin_score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/davies_bouldin_score/X", + "name": "X", + "qname": "sklearn.metrics.cluster._unsupervised.davies_bouldin_score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "A list of ``n_features``-dimensional data points. Each row corresponds\nto a single data point." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/davies_bouldin_score/labels", + "name": "labels", + "qname": "sklearn.metrics.cluster._unsupervised.davies_bouldin_score.labels", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Predicted labels for each sample." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes the Davies-Bouldin score.\n\nThe score is defined as the average similarity measure of each cluster with\nits most similar cluster, where similarity is the ratio of within-cluster\ndistances to between-cluster distances. Thus, clusters which are farther\napart and less dispersed will result in a better score.\n\nThe minimum score is zero, with lower values indicating better clustering.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20", + "docstring": "Computes the Davies-Bouldin score.\n\nThe score is defined as the average similarity measure of each cluster with\nits most similar cluster, where similarity is the ratio of within-cluster\ndistances to between-cluster distances. Thus, clusters which are farther\napart and less dispersed will result in a better score.\n\nThe minimum score is zero, with lower values indicating better clustering.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n A list of ``n_features``-dimensional data points. Each row corresponds\n to a single data point.\n\nlabels : array-like of shape (n_samples,)\n Predicted labels for each sample.\n\nReturns\n-------\nscore: float\n The resulting Davies-Bouldin score.\n\nReferences\n----------\n.. [1] Davies, David L.; Bouldin, Donald W. (1979).\n `\"A Cluster Separation Measure\"\n `__.\n IEEE Transactions on Pattern Analysis and Machine Intelligence.\n PAMI-1 (2): 224-227", + "code": "def davies_bouldin_score(X, labels):\n \"\"\"Computes the Davies-Bouldin score.\n\n The score is defined as the average similarity measure of each cluster with\n its most similar cluster, where similarity is the ratio of within-cluster\n distances to between-cluster distances. Thus, clusters which are farther\n apart and less dispersed will result in a better score.\n\n The minimum score is zero, with lower values indicating better clustering.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.20\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n A list of ``n_features``-dimensional data points. Each row corresponds\n to a single data point.\n\n labels : array-like of shape (n_samples,)\n Predicted labels for each sample.\n\n Returns\n -------\n score: float\n The resulting Davies-Bouldin score.\n\n References\n ----------\n .. [1] Davies, David L.; Bouldin, Donald W. (1979).\n `\"A Cluster Separation Measure\"\n `__.\n IEEE Transactions on Pattern Analysis and Machine Intelligence.\n PAMI-1 (2): 224-227\n \"\"\"\n X, labels = check_X_y(X, labels)\n le = LabelEncoder()\n labels = le.fit_transform(labels)\n n_samples, _ = X.shape\n n_labels = len(le.classes_)\n check_number_of_labels(n_labels, n_samples)\n\n intra_dists = np.zeros(n_labels)\n centroids = np.zeros((n_labels, len(X[0])), dtype=float)\n for k in range(n_labels):\n cluster_k = _safe_indexing(X, labels == k)\n centroid = cluster_k.mean(axis=0)\n centroids[k] = centroid\n intra_dists[k] = np.average(pairwise_distances(\n cluster_k, [centroid]))\n\n centroid_distances = pairwise_distances(centroids)\n\n if np.allclose(intra_dists, 0) or np.allclose(centroid_distances, 0):\n return 0.0\n\n centroid_distances[centroid_distances == 0] = np.inf\n combined_intra_dists = intra_dists[:, None] + intra_dists\n scores = np.max(combined_intra_dists / centroid_distances, axis=1)\n return np.mean(scores)" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/silhouette_samples", + "name": "silhouette_samples", + "qname": "sklearn.metrics.cluster._unsupervised.silhouette_samples", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/silhouette_samples/X", + "name": "X", + "qname": "sklearn.metrics.cluster._unsupervised.silhouette_samples.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples_a, n_samples_a) if metric == \"precomputed\" or (n_samples_a, n_features) otherwise", + "default_value": "", + "description": "An array of pairwise distances between samples, or a feature array." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples_a, n_samples_a) if metric == \"precomputed\"" + }, + { + "kind": "NamedType", + "name": "(n_samples_a, n_features) otherwise" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/silhouette_samples/labels", + "name": "labels", + "qname": "sklearn.metrics.cluster._unsupervised.silhouette_samples.labels", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Label values for each sample." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/silhouette_samples/metric", + "name": "metric", + "qname": "sklearn.metrics.cluster._unsupervised.silhouette_samples.metric", + "default_value": "'euclidean'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "'euclidean'", + "description": "The metric to use when calculating distance between instances in a\nfeature array. If metric is a string, it must be one of the options\nallowed by :func:`sklearn.metrics.pairwise.pairwise_distances`.\nIf ``X`` is the distance array itself, use \"precomputed\" as the metric.\nPrecomputed distance matrices must have 0 along the diagonal." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/silhouette_samples/kwds", + "name": "kwds", + "qname": "sklearn.metrics.cluster._unsupervised.silhouette_samples.kwds", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the Silhouette Coefficient for each sample.\n\nThe Silhouette Coefficient is a measure of how well samples are clustered\nwith samples that are similar to themselves. Clustering models with a high\nSilhouette Coefficient are said to be dense, where samples in the same\ncluster are similar to each other, and well separated, where samples in\ndifferent clusters are not very similar to each other.\n\nThe Silhouette Coefficient is calculated using the mean intra-cluster\ndistance (``a``) and the mean nearest-cluster distance (``b``) for each\nsample. The Silhouette Coefficient for a sample is ``(b - a) / max(a,\nb)``.\nNote that Silhouette Coefficient is only defined if number of labels\nis 2 ``<= n_labels <= n_samples - 1``.\n\nThis function returns the Silhouette Coefficient for each sample.\n\nThe best value is 1 and the worst value is -1. Values near 0 indicate\noverlapping clusters.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute the Silhouette Coefficient for each sample.\n\nThe Silhouette Coefficient is a measure of how well samples are clustered\nwith samples that are similar to themselves. Clustering models with a high\nSilhouette Coefficient are said to be dense, where samples in the same\ncluster are similar to each other, and well separated, where samples in\ndifferent clusters are not very similar to each other.\n\nThe Silhouette Coefficient is calculated using the mean intra-cluster\ndistance (``a``) and the mean nearest-cluster distance (``b``) for each\nsample. The Silhouette Coefficient for a sample is ``(b - a) / max(a,\nb)``.\nNote that Silhouette Coefficient is only defined if number of labels\nis 2 ``<= n_labels <= n_samples - 1``.\n\nThis function returns the Silhouette Coefficient for each sample.\n\nThe best value is 1 and the worst value is -1. Values near 0 indicate\noverlapping clusters.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples_a, n_samples_a) if metric == \"precomputed\" or (n_samples_a, n_features) otherwise\n An array of pairwise distances between samples, or a feature array.\n\nlabels : array-like of shape (n_samples,)\n Label values for each sample.\n\nmetric : str or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string, it must be one of the options\n allowed by :func:`sklearn.metrics.pairwise.pairwise_distances`.\n If ``X`` is the distance array itself, use \"precomputed\" as the metric.\n Precomputed distance matrices must have 0 along the diagonal.\n\n`**kwds` : optional keyword parameters\n Any further parameters are passed directly to the distance function.\n If using a ``scipy.spatial.distance`` metric, the parameters are still\n metric dependent. See the scipy docs for usage examples.\n\nReturns\n-------\nsilhouette : array-like of shape (n_samples,)\n Silhouette Coefficients for each sample.\n\nReferences\n----------\n\n.. [1] `Peter J. Rousseeuw (1987). \"Silhouettes: a Graphical Aid to the\n Interpretation and Validation of Cluster Analysis\". Computational\n and Applied Mathematics 20: 53-65.\n `_\n\n.. [2] `Wikipedia entry on the Silhouette Coefficient\n `_", + "code": "@_deprecate_positional_args\ndef silhouette_samples(X, labels, *, metric='euclidean', **kwds):\n \"\"\"Compute the Silhouette Coefficient for each sample.\n\n The Silhouette Coefficient is a measure of how well samples are clustered\n with samples that are similar to themselves. Clustering models with a high\n Silhouette Coefficient are said to be dense, where samples in the same\n cluster are similar to each other, and well separated, where samples in\n different clusters are not very similar to each other.\n\n The Silhouette Coefficient is calculated using the mean intra-cluster\n distance (``a``) and the mean nearest-cluster distance (``b``) for each\n sample. The Silhouette Coefficient for a sample is ``(b - a) / max(a,\n b)``.\n Note that Silhouette Coefficient is only defined if number of labels\n is 2 ``<= n_labels <= n_samples - 1``.\n\n This function returns the Silhouette Coefficient for each sample.\n\n The best value is 1 and the worst value is -1. Values near 0 indicate\n overlapping clusters.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_a, n_samples_a) if metric == \\\n \"precomputed\" or (n_samples_a, n_features) otherwise\n An array of pairwise distances between samples, or a feature array.\n\n labels : array-like of shape (n_samples,)\n Label values for each sample.\n\n metric : str or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string, it must be one of the options\n allowed by :func:`sklearn.metrics.pairwise.pairwise_distances`.\n If ``X`` is the distance array itself, use \"precomputed\" as the metric.\n Precomputed distance matrices must have 0 along the diagonal.\n\n `**kwds` : optional keyword parameters\n Any further parameters are passed directly to the distance function.\n If using a ``scipy.spatial.distance`` metric, the parameters are still\n metric dependent. See the scipy docs for usage examples.\n\n Returns\n -------\n silhouette : array-like of shape (n_samples,)\n Silhouette Coefficients for each sample.\n\n References\n ----------\n\n .. [1] `Peter J. Rousseeuw (1987). \"Silhouettes: a Graphical Aid to the\n Interpretation and Validation of Cluster Analysis\". Computational\n and Applied Mathematics 20: 53-65.\n `_\n\n .. [2] `Wikipedia entry on the Silhouette Coefficient\n `_\n\n \"\"\"\n X, labels = check_X_y(X, labels, accept_sparse=['csc', 'csr'])\n\n # Check for non-zero diagonal entries in precomputed distance matrix\n if metric == 'precomputed':\n atol = np.finfo(X.dtype).eps * 100\n if np.any(np.abs(np.diagonal(X)) > atol):\n raise ValueError(\n 'The precomputed distance matrix contains non-zero '\n 'elements on the diagonal. Use np.fill_diagonal(X, 0).'\n )\n\n le = LabelEncoder()\n labels = le.fit_transform(labels)\n n_samples = len(labels)\n label_freqs = np.bincount(labels)\n check_number_of_labels(len(le.classes_), n_samples)\n\n kwds['metric'] = metric\n reduce_func = functools.partial(_silhouette_reduce,\n labels=labels, label_freqs=label_freqs)\n results = zip(*pairwise_distances_chunked(X, reduce_func=reduce_func,\n **kwds))\n intra_clust_dists, inter_clust_dists = results\n intra_clust_dists = np.concatenate(intra_clust_dists)\n inter_clust_dists = np.concatenate(inter_clust_dists)\n\n denom = (label_freqs - 1).take(labels, mode='clip')\n with np.errstate(divide=\"ignore\", invalid=\"ignore\"):\n intra_clust_dists /= denom\n\n sil_samples = inter_clust_dists - intra_clust_dists\n with np.errstate(divide=\"ignore\", invalid=\"ignore\"):\n sil_samples /= np.maximum(intra_clust_dists, inter_clust_dists)\n # nan values are for clusters of size 1, and should be 0\n return np.nan_to_num(sil_samples)" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/silhouette_score", + "name": "silhouette_score", + "qname": "sklearn.metrics.cluster._unsupervised.silhouette_score", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/silhouette_score/X", + "name": "X", + "qname": "sklearn.metrics.cluster._unsupervised.silhouette_score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples_a, n_samples_a) if metric == \"precomputed\" or (n_samples_a, n_features) otherwise", + "default_value": "", + "description": "An array of pairwise distances between samples, or a feature array." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples_a, n_samples_a) if metric == \"precomputed\"" + }, + { + "kind": "NamedType", + "name": "(n_samples_a, n_features) otherwise" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/silhouette_score/labels", + "name": "labels", + "qname": "sklearn.metrics.cluster._unsupervised.silhouette_score.labels", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Predicted labels for each sample." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/silhouette_score/metric", + "name": "metric", + "qname": "sklearn.metrics.cluster._unsupervised.silhouette_score.metric", + "default_value": "'euclidean'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "'euclidean'", + "description": "The metric to use when calculating distance between instances in a\nfeature array. If metric is a string, it must be one of the options\nallowed by :func:`metrics.pairwise.pairwise_distances\n`. If ``X`` is\nthe distance array itself, use ``metric=\"precomputed\"``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/silhouette_score/sample_size", + "name": "sample_size", + "qname": "sklearn.metrics.cluster._unsupervised.silhouette_score.sample_size", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The size of the sample to use when computing the Silhouette Coefficient\non a random subset of the data.\nIf ``sample_size is None``, no sampling is used." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/silhouette_score/random_state", + "name": "random_state", + "qname": "sklearn.metrics.cluster._unsupervised.silhouette_score.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for selecting a subset of samples.\nUsed when ``sample_size is not None``.\nPass an int for reproducible results across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.cluster._unsupervised/silhouette_score/kwds", + "name": "kwds", + "qname": "sklearn.metrics.cluster._unsupervised.silhouette_score.kwds", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "optional keyword parameters", + "default_value": "", + "description": "Any further parameters are passed directly to the distance function.\nIf using a scipy.spatial.distance metric, the parameters are still\nmetric dependent. See the scipy docs for usage examples." + }, + "type": { + "kind": "NamedType", + "name": "optional keyword parameters" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the mean Silhouette Coefficient of all samples.\n\nThe Silhouette Coefficient is calculated using the mean intra-cluster\ndistance (``a``) and the mean nearest-cluster distance (``b``) for each\nsample. The Silhouette Coefficient for a sample is ``(b - a) / max(a,\nb)``. To clarify, ``b`` is the distance between a sample and the nearest\ncluster that the sample is not a part of.\nNote that Silhouette Coefficient is only defined if number of labels\nis ``2 <= n_labels <= n_samples - 1``.\n\nThis function returns the mean Silhouette Coefficient over all samples.\nTo obtain the values for each sample, use :func:`silhouette_samples`.\n\nThe best value is 1 and the worst value is -1. Values near 0 indicate\noverlapping clusters. Negative values generally indicate that a sample has\nbeen assigned to the wrong cluster, as a different cluster is more similar.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute the mean Silhouette Coefficient of all samples.\n\nThe Silhouette Coefficient is calculated using the mean intra-cluster\ndistance (``a``) and the mean nearest-cluster distance (``b``) for each\nsample. The Silhouette Coefficient for a sample is ``(b - a) / max(a,\nb)``. To clarify, ``b`` is the distance between a sample and the nearest\ncluster that the sample is not a part of.\nNote that Silhouette Coefficient is only defined if number of labels\nis ``2 <= n_labels <= n_samples - 1``.\n\nThis function returns the mean Silhouette Coefficient over all samples.\nTo obtain the values for each sample, use :func:`silhouette_samples`.\n\nThe best value is 1 and the worst value is -1. Values near 0 indicate\noverlapping clusters. Negative values generally indicate that a sample has\nbeen assigned to the wrong cluster, as a different cluster is more similar.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples_a, n_samples_a) if metric == \"precomputed\" or (n_samples_a, n_features) otherwise\n An array of pairwise distances between samples, or a feature array.\n\nlabels : array-like of shape (n_samples,)\n Predicted labels for each sample.\n\nmetric : str or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string, it must be one of the options\n allowed by :func:`metrics.pairwise.pairwise_distances\n `. If ``X`` is\n the distance array itself, use ``metric=\"precomputed\"``.\n\nsample_size : int, default=None\n The size of the sample to use when computing the Silhouette Coefficient\n on a random subset of the data.\n If ``sample_size is None``, no sampling is used.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for selecting a subset of samples.\n Used when ``sample_size is not None``.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\n**kwds : optional keyword parameters\n Any further parameters are passed directly to the distance function.\n If using a scipy.spatial.distance metric, the parameters are still\n metric dependent. See the scipy docs for usage examples.\n\nReturns\n-------\nsilhouette : float\n Mean Silhouette Coefficient for all samples.\n\nReferences\n----------\n\n.. [1] `Peter J. Rousseeuw (1987). \"Silhouettes: a Graphical Aid to the\n Interpretation and Validation of Cluster Analysis\". Computational\n and Applied Mathematics 20: 53-65.\n `_\n\n.. [2] `Wikipedia entry on the Silhouette Coefficient\n `_", + "code": "@_deprecate_positional_args\ndef silhouette_score(X, labels, *, metric='euclidean', sample_size=None,\n random_state=None, **kwds):\n \"\"\"Compute the mean Silhouette Coefficient of all samples.\n\n The Silhouette Coefficient is calculated using the mean intra-cluster\n distance (``a``) and the mean nearest-cluster distance (``b``) for each\n sample. The Silhouette Coefficient for a sample is ``(b - a) / max(a,\n b)``. To clarify, ``b`` is the distance between a sample and the nearest\n cluster that the sample is not a part of.\n Note that Silhouette Coefficient is only defined if number of labels\n is ``2 <= n_labels <= n_samples - 1``.\n\n This function returns the mean Silhouette Coefficient over all samples.\n To obtain the values for each sample, use :func:`silhouette_samples`.\n\n The best value is 1 and the worst value is -1. Values near 0 indicate\n overlapping clusters. Negative values generally indicate that a sample has\n been assigned to the wrong cluster, as a different cluster is more similar.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_a, n_samples_a) if metric == \\\n \"precomputed\" or (n_samples_a, n_features) otherwise\n An array of pairwise distances between samples, or a feature array.\n\n labels : array-like of shape (n_samples,)\n Predicted labels for each sample.\n\n metric : str or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string, it must be one of the options\n allowed by :func:`metrics.pairwise.pairwise_distances\n `. If ``X`` is\n the distance array itself, use ``metric=\"precomputed\"``.\n\n sample_size : int, default=None\n The size of the sample to use when computing the Silhouette Coefficient\n on a random subset of the data.\n If ``sample_size is None``, no sampling is used.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for selecting a subset of samples.\n Used when ``sample_size is not None``.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\n **kwds : optional keyword parameters\n Any further parameters are passed directly to the distance function.\n If using a scipy.spatial.distance metric, the parameters are still\n metric dependent. See the scipy docs for usage examples.\n\n Returns\n -------\n silhouette : float\n Mean Silhouette Coefficient for all samples.\n\n References\n ----------\n\n .. [1] `Peter J. Rousseeuw (1987). \"Silhouettes: a Graphical Aid to the\n Interpretation and Validation of Cluster Analysis\". Computational\n and Applied Mathematics 20: 53-65.\n `_\n\n .. [2] `Wikipedia entry on the Silhouette Coefficient\n `_\n\n \"\"\"\n if sample_size is not None:\n X, labels = check_X_y(X, labels, accept_sparse=['csc', 'csr'])\n random_state = check_random_state(random_state)\n indices = random_state.permutation(X.shape[0])[:sample_size]\n if metric == \"precomputed\":\n X, labels = X[indices].T[indices].T, labels[indices]\n else:\n X, labels = X[indices], labels[indices]\n return np.mean(silhouette_samples(X, labels, metric=metric, **kwds))" + }, + { + "id": "scikit-learn/sklearn.metrics.cluster.setup/configuration", + "name": "configuration", + "qname": "sklearn.metrics.cluster.setup.configuration", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.cluster.setup/configuration/parent_package", + "name": "parent_package", + "qname": "sklearn.metrics.cluster.setup.configuration.parent_package", + "default_value": "''", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.cluster.setup/configuration/top_path", + "name": "top_path", + "qname": "sklearn.metrics.cluster.setup.configuration.top_path", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def configuration(parent_package=\"\", top_path=None):\n config = Configuration(\"cluster\", parent_package, top_path)\n libraries = []\n if os.name == 'posix':\n libraries.append('m')\n config.add_extension(\"_expected_mutual_info_fast\",\n sources=[\"_expected_mutual_info_fast.pyx\"],\n include_dirs=[numpy.get_include()],\n libraries=libraries)\n\n config.add_subpackage(\"tests\")\n\n return config" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_argmin_min_reduce", + "name": "_argmin_min_reduce", + "qname": "sklearn.metrics.pairwise._argmin_min_reduce", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/_argmin_min_reduce/dist", + "name": "dist", + "qname": "sklearn.metrics.pairwise._argmin_min_reduce.dist", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_argmin_min_reduce/start", + "name": "start", + "qname": "sklearn.metrics.pairwise._argmin_min_reduce.start", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _argmin_min_reduce(dist, start):\n indices = dist.argmin(axis=1)\n values = dist[np.arange(dist.shape[0]), indices]\n return indices, values" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_check_chunk_size", + "name": "_check_chunk_size", + "qname": "sklearn.metrics.pairwise._check_chunk_size", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/_check_chunk_size/reduced", + "name": "reduced", + "qname": "sklearn.metrics.pairwise._check_chunk_size.reduced", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_check_chunk_size/chunk_size", + "name": "chunk_size", + "qname": "sklearn.metrics.pairwise._check_chunk_size.chunk_size", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Checks chunk is a sequence of expected size or a tuple of same.", + "docstring": "Checks chunk is a sequence of expected size or a tuple of same.\n ", + "code": "def _check_chunk_size(reduced, chunk_size):\n \"\"\"Checks chunk is a sequence of expected size or a tuple of same.\n \"\"\"\n if reduced is None:\n return\n is_tuple = isinstance(reduced, tuple)\n if not is_tuple:\n reduced = (reduced,)\n if any(isinstance(r, tuple) or not hasattr(r, '__iter__')\n for r in reduced):\n raise TypeError('reduce_func returned %r. '\n 'Expected sequence(s) of length %d.' %\n (reduced if is_tuple else reduced[0], chunk_size))\n if any(_num_samples(r) != chunk_size for r in reduced):\n actual_size = tuple(_num_samples(r) for r in reduced)\n raise ValueError('reduce_func returned object of length %s. '\n 'Expected same length as input: %d.' %\n (actual_size if is_tuple else actual_size[0],\n chunk_size))" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_dist_wrapper", + "name": "_dist_wrapper", + "qname": "sklearn.metrics.pairwise._dist_wrapper", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/_dist_wrapper/dist_func", + "name": "dist_func", + "qname": "sklearn.metrics.pairwise._dist_wrapper.dist_func", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_dist_wrapper/dist_matrix", + "name": "dist_matrix", + "qname": "sklearn.metrics.pairwise._dist_wrapper.dist_matrix", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_dist_wrapper/slice_", + "name": "slice_", + "qname": "sklearn.metrics.pairwise._dist_wrapper.slice_", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_dist_wrapper/args", + "name": "args", + "qname": "sklearn.metrics.pairwise._dist_wrapper.args", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_dist_wrapper/kwargs", + "name": "kwargs", + "qname": "sklearn.metrics.pairwise._dist_wrapper.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Write in-place to a slice of a distance matrix.", + "docstring": "Write in-place to a slice of a distance matrix.", + "code": "def _dist_wrapper(dist_func, dist_matrix, slice_, *args, **kwargs):\n \"\"\"Write in-place to a slice of a distance matrix.\"\"\"\n dist_matrix[:, slice_] = dist_func(*args, **kwargs)" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_euclidean_distances_upcast", + "name": "_euclidean_distances_upcast", + "qname": "sklearn.metrics.pairwise._euclidean_distances_upcast", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/_euclidean_distances_upcast/X", + "name": "X", + "qname": "sklearn.metrics.pairwise._euclidean_distances_upcast.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_euclidean_distances_upcast/XX", + "name": "XX", + "qname": "sklearn.metrics.pairwise._euclidean_distances_upcast.XX", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_euclidean_distances_upcast/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise._euclidean_distances_upcast.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_euclidean_distances_upcast/YY", + "name": "YY", + "qname": "sklearn.metrics.pairwise._euclidean_distances_upcast.YY", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_euclidean_distances_upcast/batch_size", + "name": "batch_size", + "qname": "sklearn.metrics.pairwise._euclidean_distances_upcast.batch_size", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Euclidean distances between X and Y.\n\nAssumes X and Y have float32 dtype.\nAssumes XX and YY have float64 dtype or are None.\n\nX and Y are upcast to float64 by chunks, which size is chosen to limit\nmemory increase by approximately 10% (at least 10MiB).", + "docstring": "Euclidean distances between X and Y.\n\nAssumes X and Y have float32 dtype.\nAssumes XX and YY have float64 dtype or are None.\n\nX and Y are upcast to float64 by chunks, which size is chosen to limit\nmemory increase by approximately 10% (at least 10MiB).", + "code": "def _euclidean_distances_upcast(X, XX=None, Y=None, YY=None, batch_size=None):\n \"\"\"Euclidean distances between X and Y.\n\n Assumes X and Y have float32 dtype.\n Assumes XX and YY have float64 dtype or are None.\n\n X and Y are upcast to float64 by chunks, which size is chosen to limit\n memory increase by approximately 10% (at least 10MiB).\n \"\"\"\n n_samples_X = X.shape[0]\n n_samples_Y = Y.shape[0]\n n_features = X.shape[1]\n\n distances = np.empty((n_samples_X, n_samples_Y), dtype=np.float32)\n\n if batch_size is None:\n x_density = X.nnz / np.prod(X.shape) if issparse(X) else 1\n y_density = Y.nnz / np.prod(Y.shape) if issparse(Y) else 1\n\n # Allow 10% more memory than X, Y and the distance matrix take (at\n # least 10MiB)\n maxmem = max(\n ((x_density * n_samples_X + y_density * n_samples_Y) * n_features\n + (x_density * n_samples_X * y_density * n_samples_Y)) / 10,\n 10 * 2 ** 17)\n\n # The increase amount of memory in 8-byte blocks is:\n # - x_density * batch_size * n_features (copy of chunk of X)\n # - y_density * batch_size * n_features (copy of chunk of Y)\n # - batch_size * batch_size (chunk of distance matrix)\n # Hence x\u00b2 + (xd+yd)kx = M, where x=batch_size, k=n_features, M=maxmem\n # xd=x_density and yd=y_density\n tmp = (x_density + y_density) * n_features\n batch_size = (-tmp + np.sqrt(tmp ** 2 + 4 * maxmem)) / 2\n batch_size = max(int(batch_size), 1)\n\n x_batches = gen_batches(n_samples_X, batch_size)\n\n for i, x_slice in enumerate(x_batches):\n X_chunk = X[x_slice].astype(np.float64)\n if XX is None:\n XX_chunk = row_norms(X_chunk, squared=True)[:, np.newaxis]\n else:\n XX_chunk = XX[x_slice]\n\n y_batches = gen_batches(n_samples_Y, batch_size)\n\n for j, y_slice in enumerate(y_batches):\n if X is Y and j < i:\n # when X is Y the distance matrix is symmetric so we only need\n # to compute half of it.\n d = distances[y_slice, x_slice].T\n\n else:\n Y_chunk = Y[y_slice].astype(np.float64)\n if YY is None:\n YY_chunk = row_norms(Y_chunk, squared=True)[np.newaxis, :]\n else:\n YY_chunk = YY[:, y_slice]\n\n d = -2 * safe_sparse_dot(X_chunk, Y_chunk.T, dense_output=True)\n d += XX_chunk\n d += YY_chunk\n\n distances[x_slice, y_slice] = d.astype(np.float32, copy=False)\n\n return distances" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_pairwise_callable", + "name": "_pairwise_callable", + "qname": "sklearn.metrics.pairwise._pairwise_callable", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/_pairwise_callable/X", + "name": "X", + "qname": "sklearn.metrics.pairwise._pairwise_callable.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_pairwise_callable/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise._pairwise_callable.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_pairwise_callable/metric", + "name": "metric", + "qname": "sklearn.metrics.pairwise._pairwise_callable.metric", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_pairwise_callable/force_all_finite", + "name": "force_all_finite", + "qname": "sklearn.metrics.pairwise._pairwise_callable.force_all_finite", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_pairwise_callable/kwds", + "name": "kwds", + "qname": "sklearn.metrics.pairwise._pairwise_callable.kwds", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Handle the callable case for pairwise_{distances,kernels}.", + "docstring": "Handle the callable case for pairwise_{distances,kernels}.\n ", + "code": "def _pairwise_callable(X, Y, metric, force_all_finite=True, **kwds):\n \"\"\"Handle the callable case for pairwise_{distances,kernels}.\n \"\"\"\n X, Y = check_pairwise_arrays(X, Y, force_all_finite=force_all_finite)\n\n if X is Y:\n # Only calculate metric for upper triangle\n out = np.zeros((X.shape[0], Y.shape[0]), dtype='float')\n iterator = itertools.combinations(range(X.shape[0]), 2)\n for i, j in iterator:\n out[i, j] = metric(X[i], Y[j], **kwds)\n\n # Make symmetric\n # NB: out += out.T will produce incorrect results\n out = out + out.T\n\n # Calculate diagonal\n # NB: nonzero diagonals are allowed for both metrics and kernels\n for i in range(X.shape[0]):\n x = X[i]\n out[i, i] = metric(x, x, **kwds)\n\n else:\n # Calculate all cells\n out = np.empty((X.shape[0], Y.shape[0]), dtype='float')\n iterator = itertools.product(range(X.shape[0]), range(Y.shape[0]))\n for i, j in iterator:\n out[i, j] = metric(X[i], Y[j], **kwds)\n\n return out" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_parallel_pairwise", + "name": "_parallel_pairwise", + "qname": "sklearn.metrics.pairwise._parallel_pairwise", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/_parallel_pairwise/X", + "name": "X", + "qname": "sklearn.metrics.pairwise._parallel_pairwise.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_parallel_pairwise/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise._parallel_pairwise.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_parallel_pairwise/func", + "name": "func", + "qname": "sklearn.metrics.pairwise._parallel_pairwise.func", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_parallel_pairwise/n_jobs", + "name": "n_jobs", + "qname": "sklearn.metrics.pairwise._parallel_pairwise.n_jobs", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_parallel_pairwise/kwds", + "name": "kwds", + "qname": "sklearn.metrics.pairwise._parallel_pairwise.kwds", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Break the pairwise matrix in n_jobs even slices\nand compute them in parallel.", + "docstring": "Break the pairwise matrix in n_jobs even slices\nand compute them in parallel.", + "code": "def _parallel_pairwise(X, Y, func, n_jobs, **kwds):\n \"\"\"Break the pairwise matrix in n_jobs even slices\n and compute them in parallel.\"\"\"\n\n if Y is None:\n Y = X\n X, Y, dtype = _return_float_dtype(X, Y)\n\n if effective_n_jobs(n_jobs) == 1:\n return func(X, Y, **kwds)\n\n # enforce a threading backend to prevent data communication overhead\n fd = delayed(_dist_wrapper)\n ret = np.empty((X.shape[0], Y.shape[0]), dtype=dtype, order='F')\n Parallel(backend=\"threading\", n_jobs=n_jobs)(\n fd(func, ret, s, X, Y[s], **kwds)\n for s in gen_even_slices(_num_samples(Y), effective_n_jobs(n_jobs)))\n\n if (X is Y or Y is None) and func is euclidean_distances:\n # zeroing diagonal for euclidean norm.\n # TODO: do it also for other norms.\n np.fill_diagonal(ret, 0)\n\n return ret" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_precompute_metric_params", + "name": "_precompute_metric_params", + "qname": "sklearn.metrics.pairwise._precompute_metric_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/_precompute_metric_params/X", + "name": "X", + "qname": "sklearn.metrics.pairwise._precompute_metric_params.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_precompute_metric_params/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise._precompute_metric_params.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_precompute_metric_params/metric", + "name": "metric", + "qname": "sklearn.metrics.pairwise._precompute_metric_params.metric", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_precompute_metric_params/kwds", + "name": "kwds", + "qname": "sklearn.metrics.pairwise._precompute_metric_params.kwds", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Precompute data-derived metric parameters if not provided.", + "docstring": "Precompute data-derived metric parameters if not provided.\n ", + "code": "def _precompute_metric_params(X, Y, metric=None, **kwds):\n \"\"\"Precompute data-derived metric parameters if not provided.\n \"\"\"\n if metric == \"seuclidean\" and 'V' not in kwds:\n # There is a bug in scipy < 1.5 that will cause a crash if\n # X.dtype != np.double (float64). See PR #15730\n dtype = np.float64 if sp_version < parse_version('1.5') else None\n if X is Y:\n V = np.var(X, axis=0, ddof=1, dtype=dtype)\n else:\n warnings.warn(\n \"from version 1.0 (renaming of 0.25), pairwise_distances for \"\n \"metric='seuclidean' will require V to be specified if Y is \"\n \"passed.\",\n FutureWarning\n )\n V = np.var(np.vstack([X, Y]), axis=0, ddof=1, dtype=dtype)\n return {'V': V}\n if metric == \"mahalanobis\" and 'VI' not in kwds:\n if X is Y:\n VI = np.linalg.inv(np.cov(X.T)).T\n else:\n warnings.warn(\n \"from version 1.0 (renaming of 0.25), pairwise_distances for \"\n \"metric='mahalanobis' will require VI to be specified if Y \"\n \"is passed.\",\n FutureWarning\n )\n VI = np.linalg.inv(np.cov(np.vstack([X, Y]).T)).T\n return {'VI': VI}\n return {}" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_return_float_dtype", + "name": "_return_float_dtype", + "qname": "sklearn.metrics.pairwise._return_float_dtype", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/_return_float_dtype/X", + "name": "X", + "qname": "sklearn.metrics.pairwise._return_float_dtype.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/_return_float_dtype/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise._return_float_dtype.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "1. If dtype of X and Y is float32, then dtype float32 is returned.\n2. Else dtype float is returned.", + "docstring": "1. If dtype of X and Y is float32, then dtype float32 is returned.\n2. Else dtype float is returned.", + "code": "def _return_float_dtype(X, Y):\n \"\"\"\n 1. If dtype of X and Y is float32, then dtype float32 is returned.\n 2. Else dtype float is returned.\n \"\"\"\n if not issparse(X) and not isinstance(X, np.ndarray):\n X = np.asarray(X)\n\n if Y is None:\n Y_dtype = X.dtype\n elif not issparse(Y) and not isinstance(Y, np.ndarray):\n Y = np.asarray(Y)\n Y_dtype = Y.dtype\n else:\n Y_dtype = Y.dtype\n\n if X.dtype == Y_dtype == np.float32:\n dtype = np.float32\n else:\n dtype = float\n\n return X, Y, dtype" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/additive_chi2_kernel", + "name": "additive_chi2_kernel", + "qname": "sklearn.metrics.pairwise.additive_chi2_kernel", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/additive_chi2_kernel/X", + "name": "X", + "qname": "sklearn.metrics.pairwise.additive_chi2_kernel.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_X, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples_X, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/additive_chi2_kernel/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise.additive_chi2_kernel.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_Y, n_features)", + "default_value": "None", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_Y, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Computes the additive chi-squared kernel between observations in X and\nY.\n\nThe chi-squared kernel is computed between each pair of rows in X and Y. X\nand Y have to be non-negative. This kernel is most commonly applied to\nhistograms.\n\nThe chi-squared kernel is given by::\n\n k(x, y) = -Sum [(x - y)^2 / (x + y)]\n\nIt can be interpreted as a weighted difference per entry.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Computes the additive chi-squared kernel between observations in X and\nY.\n\nThe chi-squared kernel is computed between each pair of rows in X and Y. X\nand Y have to be non-negative. This kernel is most commonly applied to\nhistograms.\n\nThe chi-squared kernel is given by::\n\n k(x, y) = -Sum [(x - y)^2 / (x + y)]\n\nIt can be interpreted as a weighted difference per entry.\n\nRead more in the :ref:`User Guide `.\n\nNotes\n-----\nAs the negative of a distance, this kernel is only conditionally positive\ndefinite.\n\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n\nReturns\n-------\nkernel_matrix : ndarray of shape (n_samples_X, n_samples_Y)\n\nSee Also\n--------\nchi2_kernel : The exponentiated version of the kernel, which is usually\n preferable.\nsklearn.kernel_approximation.AdditiveChi2Sampler : A Fourier approximation\n to this kernel.\n\nReferences\n----------\n* Zhang, J. and Marszalek, M. and Lazebnik, S. and Schmid, C.\n Local features and kernels for classification of texture and object\n categories: A comprehensive study\n International Journal of Computer Vision 2007\n https://research.microsoft.com/en-us/um/people/manik/projects/trade-off/papers/ZhangIJCV06.pdf", + "code": "def additive_chi2_kernel(X, Y=None):\n \"\"\"Computes the additive chi-squared kernel between observations in X and\n Y.\n\n The chi-squared kernel is computed between each pair of rows in X and Y. X\n and Y have to be non-negative. This kernel is most commonly applied to\n histograms.\n\n The chi-squared kernel is given by::\n\n k(x, y) = -Sum [(x - y)^2 / (x + y)]\n\n It can be interpreted as a weighted difference per entry.\n\n Read more in the :ref:`User Guide `.\n\n Notes\n -----\n As the negative of a distance, this kernel is only conditionally positive\n definite.\n\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features)\n\n Y : ndarray of shape (n_samples_Y, n_features), default=None\n\n Returns\n -------\n kernel_matrix : ndarray of shape (n_samples_X, n_samples_Y)\n\n See Also\n --------\n chi2_kernel : The exponentiated version of the kernel, which is usually\n preferable.\n sklearn.kernel_approximation.AdditiveChi2Sampler : A Fourier approximation\n to this kernel.\n\n References\n ----------\n * Zhang, J. and Marszalek, M. and Lazebnik, S. and Schmid, C.\n Local features and kernels for classification of texture and object\n categories: A comprehensive study\n International Journal of Computer Vision 2007\n https://research.microsoft.com/en-us/um/people/manik/projects/trade-off/papers/ZhangIJCV06.pdf\n \"\"\"\n if issparse(X) or issparse(Y):\n raise ValueError(\"additive_chi2 does not support sparse matrices.\")\n X, Y = check_pairwise_arrays(X, Y)\n if (X < 0).any():\n raise ValueError(\"X contains negative values.\")\n if Y is not X and (Y < 0).any():\n raise ValueError(\"Y contains negative values.\")\n\n result = np.zeros((X.shape[0], Y.shape[0]), dtype=X.dtype)\n _chi2_kernel_fast(X, Y, result)\n return result" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/check_paired_arrays", + "name": "check_paired_arrays", + "qname": "sklearn.metrics.pairwise.check_paired_arrays", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/check_paired_arrays/X", + "name": "X", + "qname": "sklearn.metrics.pairwise.check_paired_arrays.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples_X, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples_X, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/check_paired_arrays/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise.check_paired_arrays.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples_Y, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples_Y, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Set X and Y appropriately and checks inputs for paired distances.\n\nAll paired distance metrics should use this function first to assert that\nthe given parameters are correct and safe to use.\n\nSpecifically, this function first ensures that both X and Y are arrays,\nthen checks that they are at least two dimensional while ensuring that\ntheir elements are floats. Finally, the function checks that the size\nof the dimensions of the two arrays are equal.", + "docstring": "Set X and Y appropriately and checks inputs for paired distances.\n\nAll paired distance metrics should use this function first to assert that\nthe given parameters are correct and safe to use.\n\nSpecifically, this function first ensures that both X and Y are arrays,\nthen checks that they are at least two dimensional while ensuring that\ntheir elements are floats. Finally, the function checks that the size\nof the dimensions of the two arrays are equal.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n\nY : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n\nReturns\n-------\nsafe_X : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n An array equal to X, guaranteed to be a numpy array.\n\nsafe_Y : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n An array equal to Y if Y was not None, guaranteed to be a numpy array.\n If Y was None, safe_Y will be a pointer to X.", + "code": "def check_paired_arrays(X, Y):\n \"\"\"Set X and Y appropriately and checks inputs for paired distances.\n\n All paired distance metrics should use this function first to assert that\n the given parameters are correct and safe to use.\n\n Specifically, this function first ensures that both X and Y are arrays,\n then checks that they are at least two dimensional while ensuring that\n their elements are floats. Finally, the function checks that the size\n of the dimensions of the two arrays are equal.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n\n Y : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n\n Returns\n -------\n safe_X : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n An array equal to X, guaranteed to be a numpy array.\n\n safe_Y : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n An array equal to Y if Y was not None, guaranteed to be a numpy array.\n If Y was None, safe_Y will be a pointer to X.\n\n \"\"\"\n X, Y = check_pairwise_arrays(X, Y)\n if X.shape != Y.shape:\n raise ValueError(\"X and Y should be of same shape. They were \"\n \"respectively %r and %r long.\" % (X.shape, Y.shape))\n return X, Y" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/check_pairwise_arrays", + "name": "check_pairwise_arrays", + "qname": "sklearn.metrics.pairwise.check_pairwise_arrays", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/check_pairwise_arrays/X", + "name": "X", + "qname": "sklearn.metrics.pairwise.check_pairwise_arrays.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples_X, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples_X, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/check_pairwise_arrays/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise.check_pairwise_arrays.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples_Y, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples_Y, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/check_pairwise_arrays/precomputed", + "name": "precomputed", + "qname": "sklearn.metrics.pairwise.check_pairwise_arrays.precomputed", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "True if X is to be treated as precomputed distances to the samples in\nY." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/check_pairwise_arrays/dtype", + "name": "dtype", + "qname": "sklearn.metrics.pairwise.check_pairwise_arrays.dtype", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "str, type, list of type", + "default_value": "None", + "description": "Data type required for X and Y. If None, the dtype will be an\nappropriate float type selected by _return_float_dtype.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "type" + }, + { + "kind": "NamedType", + "name": "list of type" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/check_pairwise_arrays/accept_sparse", + "name": "accept_sparse", + "qname": "sklearn.metrics.pairwise.check_pairwise_arrays.accept_sparse", + "default_value": "'csr'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "str, bool or list/tuple of str", + "default_value": "'csr'", + "description": "String[s] representing allowed sparse matrix formats, such as 'csc',\n'csr', etc. If the input is sparse but not in the allowed format,\nit will be converted to the first listed format. True allows the input\nto be any format. False means that a sparse matrix input will\nraise an error." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "list/tuple of str" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/check_pairwise_arrays/force_all_finite", + "name": "force_all_finite", + "qname": "sklearn.metrics.pairwise.check_pairwise_arrays.force_all_finite", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool or 'allow-nan'", + "default_value": "True", + "description": "Whether to raise an error on np.inf, np.nan, pd.NA in array. The\npossibilities are:\n\n- True: Force all values of array to be finite.\n- False: accepts np.inf, np.nan, pd.NA in array.\n- 'allow-nan': accepts only np.nan and pd.NA values in array. Values\n cannot be infinite.\n\n.. versionadded:: 0.22\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n.. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "'allow-nan'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/check_pairwise_arrays/copy", + "name": "copy", + "qname": "sklearn.metrics.pairwise.check_pairwise_arrays.copy", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether a forced copy will be triggered. If copy=False, a copy might\nbe triggered by a conversion.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Set X and Y appropriately and checks inputs.\n\nIf Y is None, it is set as a pointer to X (i.e. not a copy).\nIf Y is given, this does not happen.\nAll distance metrics should use this function first to assert that the\ngiven parameters are correct and safe to use.\n\nSpecifically, this function first ensures that both X and Y are arrays,\nthen checks that they are at least two dimensional while ensuring that\ntheir elements are floats (or dtype if provided). Finally, the function\nchecks that the size of the second dimension of the two arrays is equal, or\nthe equivalent check for a precomputed distance matrix.", + "docstring": "Set X and Y appropriately and checks inputs.\n\nIf Y is None, it is set as a pointer to X (i.e. not a copy).\nIf Y is given, this does not happen.\nAll distance metrics should use this function first to assert that the\ngiven parameters are correct and safe to use.\n\nSpecifically, this function first ensures that both X and Y are arrays,\nthen checks that they are at least two dimensional while ensuring that\ntheir elements are floats (or dtype if provided). Finally, the function\nchecks that the size of the second dimension of the two arrays is equal, or\nthe equivalent check for a precomputed distance matrix.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n\nY : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n\nprecomputed : bool, default=False\n True if X is to be treated as precomputed distances to the samples in\n Y.\n\ndtype : str, type, list of type, default=None\n Data type required for X and Y. If None, the dtype will be an\n appropriate float type selected by _return_float_dtype.\n\n .. versionadded:: 0.18\n\naccept_sparse : str, bool or list/tuple of str, default='csr'\n String[s] representing allowed sparse matrix formats, such as 'csc',\n 'csr', etc. If the input is sparse but not in the allowed format,\n it will be converted to the first listed format. True allows the input\n to be any format. False means that a sparse matrix input will\n raise an error.\n\nforce_all_finite : bool or 'allow-nan', default=True\n Whether to raise an error on np.inf, np.nan, pd.NA in array. The\n possibilities are:\n\n - True: Force all values of array to be finite.\n - False: accepts np.inf, np.nan, pd.NA in array.\n - 'allow-nan': accepts only np.nan and pd.NA values in array. Values\n cannot be infinite.\n\n .. versionadded:: 0.22\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n .. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`.\n\ncopy : bool, default=False\n Whether a forced copy will be triggered. If copy=False, a copy might\n be triggered by a conversion.\n\n .. versionadded:: 0.22\n\nReturns\n-------\nsafe_X : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n An array equal to X, guaranteed to be a numpy array.\n\nsafe_Y : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n An array equal to Y if Y was not None, guaranteed to be a numpy array.\n If Y was None, safe_Y will be a pointer to X.", + "code": "@_deprecate_positional_args\ndef check_pairwise_arrays(X, Y, *, precomputed=False, dtype=None,\n accept_sparse='csr', force_all_finite=True,\n copy=False):\n \"\"\"Set X and Y appropriately and checks inputs.\n\n If Y is None, it is set as a pointer to X (i.e. not a copy).\n If Y is given, this does not happen.\n All distance metrics should use this function first to assert that the\n given parameters are correct and safe to use.\n\n Specifically, this function first ensures that both X and Y are arrays,\n then checks that they are at least two dimensional while ensuring that\n their elements are floats (or dtype if provided). Finally, the function\n checks that the size of the second dimension of the two arrays is equal, or\n the equivalent check for a precomputed distance matrix.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n\n Y : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n\n precomputed : bool, default=False\n True if X is to be treated as precomputed distances to the samples in\n Y.\n\n dtype : str, type, list of type, default=None\n Data type required for X and Y. If None, the dtype will be an\n appropriate float type selected by _return_float_dtype.\n\n .. versionadded:: 0.18\n\n accept_sparse : str, bool or list/tuple of str, default='csr'\n String[s] representing allowed sparse matrix formats, such as 'csc',\n 'csr', etc. If the input is sparse but not in the allowed format,\n it will be converted to the first listed format. True allows the input\n to be any format. False means that a sparse matrix input will\n raise an error.\n\n force_all_finite : bool or 'allow-nan', default=True\n Whether to raise an error on np.inf, np.nan, pd.NA in array. The\n possibilities are:\n\n - True: Force all values of array to be finite.\n - False: accepts np.inf, np.nan, pd.NA in array.\n - 'allow-nan': accepts only np.nan and pd.NA values in array. Values\n cannot be infinite.\n\n .. versionadded:: 0.22\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n .. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`.\n\n copy : bool, default=False\n Whether a forced copy will be triggered. If copy=False, a copy might\n be triggered by a conversion.\n\n .. versionadded:: 0.22\n\n Returns\n -------\n safe_X : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n An array equal to X, guaranteed to be a numpy array.\n\n safe_Y : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n An array equal to Y if Y was not None, guaranteed to be a numpy array.\n If Y was None, safe_Y will be a pointer to X.\n\n \"\"\"\n X, Y, dtype_float = _return_float_dtype(X, Y)\n\n estimator = 'check_pairwise_arrays'\n if dtype is None:\n dtype = dtype_float\n\n if Y is X or Y is None:\n X = Y = check_array(X, accept_sparse=accept_sparse, dtype=dtype,\n copy=copy, force_all_finite=force_all_finite,\n estimator=estimator)\n else:\n X = check_array(X, accept_sparse=accept_sparse, dtype=dtype,\n copy=copy, force_all_finite=force_all_finite,\n estimator=estimator)\n Y = check_array(Y, accept_sparse=accept_sparse, dtype=dtype,\n copy=copy, force_all_finite=force_all_finite,\n estimator=estimator)\n\n if precomputed:\n if X.shape[1] != Y.shape[0]:\n raise ValueError(\"Precomputed metric requires shape \"\n \"(n_queries, n_indexed). Got (%d, %d) \"\n \"for %d indexed.\" %\n (X.shape[0], X.shape[1], Y.shape[0]))\n elif X.shape[1] != Y.shape[1]:\n raise ValueError(\"Incompatible dimension for X and Y matrices: \"\n \"X.shape[1] == %d while Y.shape[1] == %d\" % (\n X.shape[1], Y.shape[1]))\n\n return X, Y" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/chi2_kernel", + "name": "chi2_kernel", + "qname": "sklearn.metrics.pairwise.chi2_kernel", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/chi2_kernel/X", + "name": "X", + "qname": "sklearn.metrics.pairwise.chi2_kernel.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_X, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples_X, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/chi2_kernel/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise.chi2_kernel.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_Y, n_features)", + "default_value": "None", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_Y, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/chi2_kernel/gamma", + "name": "gamma", + "qname": "sklearn.metrics.pairwise.chi2_kernel.gamma", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "1.", + "description": "Scaling parameter of the chi2 kernel." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Computes the exponential chi-squared kernel X and Y.\n\nThe chi-squared kernel is computed between each pair of rows in X and Y. X\nand Y have to be non-negative. This kernel is most commonly applied to\nhistograms.\n\nThe chi-squared kernel is given by::\n\n k(x, y) = exp(-gamma Sum [(x - y)^2 / (x + y)])\n\nIt can be interpreted as a weighted difference per entry.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Computes the exponential chi-squared kernel X and Y.\n\nThe chi-squared kernel is computed between each pair of rows in X and Y. X\nand Y have to be non-negative. This kernel is most commonly applied to\nhistograms.\n\nThe chi-squared kernel is given by::\n\n k(x, y) = exp(-gamma Sum [(x - y)^2 / (x + y)])\n\nIt can be interpreted as a weighted difference per entry.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n\ngamma : float, default=1.\n Scaling parameter of the chi2 kernel.\n\nReturns\n-------\nkernel_matrix : ndarray of shape (n_samples_X, n_samples_Y)\n\nSee Also\n--------\nadditive_chi2_kernel : The additive version of this kernel.\nsklearn.kernel_approximation.AdditiveChi2Sampler : A Fourier approximation\n to the additive version of this kernel.\n\nReferences\n----------\n* Zhang, J. and Marszalek, M. and Lazebnik, S. and Schmid, C.\n Local features and kernels for classification of texture and object\n categories: A comprehensive study\n International Journal of Computer Vision 2007\n https://research.microsoft.com/en-us/um/people/manik/projects/trade-off/papers/ZhangIJCV06.pdf", + "code": "def chi2_kernel(X, Y=None, gamma=1.):\n \"\"\"Computes the exponential chi-squared kernel X and Y.\n\n The chi-squared kernel is computed between each pair of rows in X and Y. X\n and Y have to be non-negative. This kernel is most commonly applied to\n histograms.\n\n The chi-squared kernel is given by::\n\n k(x, y) = exp(-gamma Sum [(x - y)^2 / (x + y)])\n\n It can be interpreted as a weighted difference per entry.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features)\n\n Y : ndarray of shape (n_samples_Y, n_features), default=None\n\n gamma : float, default=1.\n Scaling parameter of the chi2 kernel.\n\n Returns\n -------\n kernel_matrix : ndarray of shape (n_samples_X, n_samples_Y)\n\n See Also\n --------\n additive_chi2_kernel : The additive version of this kernel.\n sklearn.kernel_approximation.AdditiveChi2Sampler : A Fourier approximation\n to the additive version of this kernel.\n\n References\n ----------\n * Zhang, J. and Marszalek, M. and Lazebnik, S. and Schmid, C.\n Local features and kernels for classification of texture and object\n categories: A comprehensive study\n International Journal of Computer Vision 2007\n https://research.microsoft.com/en-us/um/people/manik/projects/trade-off/papers/ZhangIJCV06.pdf\n \"\"\"\n K = additive_chi2_kernel(X, Y)\n K *= gamma\n return np.exp(K, K)" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/cosine_distances", + "name": "cosine_distances", + "qname": "sklearn.metrics.pairwise.cosine_distances", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/cosine_distances/X", + "name": "X", + "qname": "sklearn.metrics.pairwise.cosine_distances.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples_X, n_features)", + "default_value": "", + "description": "Matrix `X`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples_X, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/cosine_distances/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise.cosine_distances.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples_Y, n_features)", + "default_value": "None", + "description": "Matrix `Y`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples_Y, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Compute cosine distance between samples in X and Y.\n\nCosine distance is defined as 1.0 minus the cosine similarity.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute cosine distance between samples in X and Y.\n\nCosine distance is defined as 1.0 minus the cosine similarity.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n Matrix `X`.\n\nY : {array-like, sparse matrix} of shape (n_samples_Y, n_features), default=None\n Matrix `Y`.\n\nReturns\n-------\ndistance matrix : ndarray of shape (n_samples_X, n_samples_Y)\n\nSee Also\n--------\ncosine_similarity\nscipy.spatial.distance.cosine : Dense matrices only.", + "code": "def cosine_distances(X, Y=None):\n \"\"\"Compute cosine distance between samples in X and Y.\n\n Cosine distance is defined as 1.0 minus the cosine similarity.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n Matrix `X`.\n\n Y : {array-like, sparse matrix} of shape (n_samples_Y, n_features), \\\n default=None\n Matrix `Y`.\n\n Returns\n -------\n distance matrix : ndarray of shape (n_samples_X, n_samples_Y)\n\n See Also\n --------\n cosine_similarity\n scipy.spatial.distance.cosine : Dense matrices only.\n \"\"\"\n # 1.0 - cosine_similarity(X, Y) without copy\n S = cosine_similarity(X, Y)\n S *= -1\n S += 1\n np.clip(S, 0, 2, out=S)\n if X is Y or Y is None:\n # Ensure that distances between vectors and themselves are set to 0.0.\n # This may not be the case due to floating point rounding errors.\n S[np.diag_indices_from(S)] = 0.0\n return S" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/cosine_similarity", + "name": "cosine_similarity", + "qname": "sklearn.metrics.pairwise.cosine_similarity", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/cosine_similarity/X", + "name": "X", + "qname": "sklearn.metrics.pairwise.cosine_similarity.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{ndarray, sparse matrix} of shape (n_samples_X, n_features)", + "default_value": "", + "description": "Input data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples_X, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/cosine_similarity/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise.cosine_similarity.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{ndarray, sparse matrix} of shape (n_samples_Y, n_features)", + "default_value": "None", + "description": "Input data. If ``None``, the output will be the pairwise\nsimilarities between all samples in ``X``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples_Y, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/cosine_similarity/dense_output", + "name": "dense_output", + "qname": "sklearn.metrics.pairwise.cosine_similarity.dense_output", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to return dense output even when the input is sparse. If\n``False``, the output is sparse if both input arrays are sparse.\n\n.. versionadded:: 0.17\n parameter ``dense_output`` for dense output." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Compute cosine similarity between samples in X and Y.\n\nCosine similarity, or the cosine kernel, computes similarity as the\nnormalized dot product of X and Y:\n\n K(X, Y) = / (||X||*||Y||)\n\nOn L2-normalized data, this function is equivalent to linear_kernel.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute cosine similarity between samples in X and Y.\n\nCosine similarity, or the cosine kernel, computes similarity as the\nnormalized dot product of X and Y:\n\n K(X, Y) = / (||X||*||Y||)\n\nOn L2-normalized data, this function is equivalent to linear_kernel.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples_X, n_features)\n Input data.\n\nY : {ndarray, sparse matrix} of shape (n_samples_Y, n_features), default=None\n Input data. If ``None``, the output will be the pairwise\n similarities between all samples in ``X``.\n\ndense_output : bool, default=True\n Whether to return dense output even when the input is sparse. If\n ``False``, the output is sparse if both input arrays are sparse.\n\n .. versionadded:: 0.17\n parameter ``dense_output`` for dense output.\n\nReturns\n-------\nkernel matrix : ndarray of shape (n_samples_X, n_samples_Y)", + "code": "def cosine_similarity(X, Y=None, dense_output=True):\n \"\"\"Compute cosine similarity between samples in X and Y.\n\n Cosine similarity, or the cosine kernel, computes similarity as the\n normalized dot product of X and Y:\n\n K(X, Y) = / (||X||*||Y||)\n\n On L2-normalized data, this function is equivalent to linear_kernel.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : {ndarray, sparse matrix} of shape (n_samples_X, n_features)\n Input data.\n\n Y : {ndarray, sparse matrix} of shape (n_samples_Y, n_features), \\\n default=None\n Input data. If ``None``, the output will be the pairwise\n similarities between all samples in ``X``.\n\n dense_output : bool, default=True\n Whether to return dense output even when the input is sparse. If\n ``False``, the output is sparse if both input arrays are sparse.\n\n .. versionadded:: 0.17\n parameter ``dense_output`` for dense output.\n\n Returns\n -------\n kernel matrix : ndarray of shape (n_samples_X, n_samples_Y)\n \"\"\"\n # to avoid recursive import\n\n X, Y = check_pairwise_arrays(X, Y)\n\n X_normalized = normalize(X, copy=True)\n if X is Y:\n Y_normalized = X_normalized\n else:\n Y_normalized = normalize(Y, copy=True)\n\n K = safe_sparse_dot(X_normalized, Y_normalized.T,\n dense_output=dense_output)\n\n return K" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/distance_metrics", + "name": "distance_metrics", + "qname": "sklearn.metrics.pairwise.distance_metrics", + "decorators": [], + "parameters": [], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Valid metrics for pairwise_distances.\n\nThis function simply returns the valid pairwise distance metrics.\nIt exists to allow for a description of the mapping for\neach of the valid strings.\n\nThe valid distance metrics, and the function they map to, are:\n\n=============== ========================================\nmetric Function\n=============== ========================================\n'cityblock' metrics.pairwise.manhattan_distances\n'cosine' metrics.pairwise.cosine_distances\n'euclidean' metrics.pairwise.euclidean_distances\n'haversine' metrics.pairwise.haversine_distances\n'l1' metrics.pairwise.manhattan_distances\n'l2' metrics.pairwise.euclidean_distances\n'manhattan' metrics.pairwise.manhattan_distances\n'nan_euclidean' metrics.pairwise.nan_euclidean_distances\n=============== ========================================\n\nRead more in the :ref:`User Guide `.", + "docstring": "Valid metrics for pairwise_distances.\n\nThis function simply returns the valid pairwise distance metrics.\nIt exists to allow for a description of the mapping for\neach of the valid strings.\n\nThe valid distance metrics, and the function they map to, are:\n\n=============== ========================================\nmetric Function\n=============== ========================================\n'cityblock' metrics.pairwise.manhattan_distances\n'cosine' metrics.pairwise.cosine_distances\n'euclidean' metrics.pairwise.euclidean_distances\n'haversine' metrics.pairwise.haversine_distances\n'l1' metrics.pairwise.manhattan_distances\n'l2' metrics.pairwise.euclidean_distances\n'manhattan' metrics.pairwise.manhattan_distances\n'nan_euclidean' metrics.pairwise.nan_euclidean_distances\n=============== ========================================\n\nRead more in the :ref:`User Guide `.", + "code": "def distance_metrics():\n \"\"\"Valid metrics for pairwise_distances.\n\n This function simply returns the valid pairwise distance metrics.\n It exists to allow for a description of the mapping for\n each of the valid strings.\n\n The valid distance metrics, and the function they map to, are:\n\n =============== ========================================\n metric Function\n =============== ========================================\n 'cityblock' metrics.pairwise.manhattan_distances\n 'cosine' metrics.pairwise.cosine_distances\n 'euclidean' metrics.pairwise.euclidean_distances\n 'haversine' metrics.pairwise.haversine_distances\n 'l1' metrics.pairwise.manhattan_distances\n 'l2' metrics.pairwise.euclidean_distances\n 'manhattan' metrics.pairwise.manhattan_distances\n 'nan_euclidean' metrics.pairwise.nan_euclidean_distances\n =============== ========================================\n\n Read more in the :ref:`User Guide `.\n\n \"\"\"\n return PAIRWISE_DISTANCE_FUNCTIONS" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/euclidean_distances", + "name": "euclidean_distances", + "qname": "sklearn.metrics.pairwise.euclidean_distances", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/euclidean_distances/X", + "name": "X", + "qname": "sklearn.metrics.pairwise.euclidean_distances.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples_X, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples_X, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/euclidean_distances/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise.euclidean_distances.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples_Y, n_features)", + "default_value": "None", + "description": "" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples_Y, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/euclidean_distances/Y_norm_squared", + "name": "Y_norm_squared", + "qname": "sklearn.metrics.pairwise.euclidean_distances.Y_norm_squared", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_Y,)", + "default_value": "None", + "description": "Pre-computed dot-products of vectors in Y (e.g.,\n``(Y**2).sum(axis=1)``)\nMay be ignored in some cases, see the note below." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples_Y,)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/euclidean_distances/squared", + "name": "squared", + "qname": "sklearn.metrics.pairwise.euclidean_distances.squared", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Return squared Euclidean distances." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/euclidean_distances/X_norm_squared", + "name": "X_norm_squared", + "qname": "sklearn.metrics.pairwise.euclidean_distances.X_norm_squared", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Pre-computed dot-products of vectors in X (e.g.,\n``(X**2).sum(axis=1)``)\nMay be ignored in some cases, see the note below." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Considering the rows of X (and Y=X) as vectors, compute the\ndistance matrix between each pair of vectors.\n\nFor efficiency reasons, the euclidean distance between a pair of row\nvector x and y is computed as::\n\n dist(x, y) = sqrt(dot(x, x) - 2 * dot(x, y) + dot(y, y))\n\nThis formulation has two advantages over other ways of computing distances.\nFirst, it is computationally efficient when dealing with sparse data.\nSecond, if one argument varies but the other remains unchanged, then\n`dot(x, x)` and/or `dot(y, y)` can be pre-computed.\n\nHowever, this is not the most precise way of doing this computation,\nbecause this equation potentially suffers from \"catastrophic cancellation\".\nAlso, the distance matrix returned by this function may not be exactly\nsymmetric as required by, e.g., ``scipy.spatial.distance`` functions.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Considering the rows of X (and Y=X) as vectors, compute the\ndistance matrix between each pair of vectors.\n\nFor efficiency reasons, the euclidean distance between a pair of row\nvector x and y is computed as::\n\n dist(x, y) = sqrt(dot(x, x) - 2 * dot(x, y) + dot(y, y))\n\nThis formulation has two advantages over other ways of computing distances.\nFirst, it is computationally efficient when dealing with sparse data.\nSecond, if one argument varies but the other remains unchanged, then\n`dot(x, x)` and/or `dot(y, y)` can be pre-computed.\n\nHowever, this is not the most precise way of doing this computation,\nbecause this equation potentially suffers from \"catastrophic cancellation\".\nAlso, the distance matrix returned by this function may not be exactly\nsymmetric as required by, e.g., ``scipy.spatial.distance`` functions.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n\nY : {array-like, sparse matrix} of shape (n_samples_Y, n_features), default=None\n\nY_norm_squared : array-like of shape (n_samples_Y,), default=None\n Pre-computed dot-products of vectors in Y (e.g.,\n ``(Y**2).sum(axis=1)``)\n May be ignored in some cases, see the note below.\n\nsquared : bool, default=False\n Return squared Euclidean distances.\n\nX_norm_squared : array-like of shape (n_samples,), default=None\n Pre-computed dot-products of vectors in X (e.g.,\n ``(X**2).sum(axis=1)``)\n May be ignored in some cases, see the note below.\n\nNotes\n-----\nTo achieve better accuracy, `X_norm_squared`\u00a0and `Y_norm_squared` may be\nunused if they are passed as ``float32``.\n\nReturns\n-------\ndistances : ndarray of shape (n_samples_X, n_samples_Y)\n\nSee Also\n--------\npaired_distances : Distances betweens pairs of elements of X and Y.\n\nExamples\n--------\n>>> from sklearn.metrics.pairwise import euclidean_distances\n>>> X = [[0, 1], [1, 1]]\n>>> # distance between rows of X\n>>> euclidean_distances(X, X)\narray([[0., 1.],\n [1., 0.]])\n>>> # get distance to origin\n>>> euclidean_distances(X, [[0, 0]])\narray([[1. ],\n [1.41421356]])", + "code": "@_deprecate_positional_args\ndef euclidean_distances(X, Y=None, *, Y_norm_squared=None, squared=False,\n X_norm_squared=None):\n \"\"\"\n Considering the rows of X (and Y=X) as vectors, compute the\n distance matrix between each pair of vectors.\n\n For efficiency reasons, the euclidean distance between a pair of row\n vector x and y is computed as::\n\n dist(x, y) = sqrt(dot(x, x) - 2 * dot(x, y) + dot(y, y))\n\n This formulation has two advantages over other ways of computing distances.\n First, it is computationally efficient when dealing with sparse data.\n Second, if one argument varies but the other remains unchanged, then\n `dot(x, x)` and/or `dot(y, y)` can be pre-computed.\n\n However, this is not the most precise way of doing this computation,\n because this equation potentially suffers from \"catastrophic cancellation\".\n Also, the distance matrix returned by this function may not be exactly\n symmetric as required by, e.g., ``scipy.spatial.distance`` functions.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n\n Y : {array-like, sparse matrix} of shape (n_samples_Y, n_features), \\\n default=None\n\n Y_norm_squared : array-like of shape (n_samples_Y,), default=None\n Pre-computed dot-products of vectors in Y (e.g.,\n ``(Y**2).sum(axis=1)``)\n May be ignored in some cases, see the note below.\n\n squared : bool, default=False\n Return squared Euclidean distances.\n\n X_norm_squared : array-like of shape (n_samples,), default=None\n Pre-computed dot-products of vectors in X (e.g.,\n ``(X**2).sum(axis=1)``)\n May be ignored in some cases, see the note below.\n\n Notes\n -----\n To achieve better accuracy, `X_norm_squared`\u00a0and `Y_norm_squared` may be\n unused if they are passed as ``float32``.\n\n Returns\n -------\n distances : ndarray of shape (n_samples_X, n_samples_Y)\n\n See Also\n --------\n paired_distances : Distances betweens pairs of elements of X and Y.\n\n Examples\n --------\n >>> from sklearn.metrics.pairwise import euclidean_distances\n >>> X = [[0, 1], [1, 1]]\n >>> # distance between rows of X\n >>> euclidean_distances(X, X)\n array([[0., 1.],\n [1., 0.]])\n >>> # get distance to origin\n >>> euclidean_distances(X, [[0, 0]])\n array([[1. ],\n [1.41421356]])\n \"\"\"\n X, Y = check_pairwise_arrays(X, Y)\n\n # If norms are passed as float32, they are unused. If arrays are passed as\n # float32, norms needs to be recomputed on upcast chunks.\n # TODO: use a float64 accumulator in row_norms to avoid the latter.\n if X_norm_squared is not None:\n XX = check_array(X_norm_squared)\n if XX.shape == (1, X.shape[0]):\n XX = XX.T\n elif XX.shape != (X.shape[0], 1):\n raise ValueError(\n \"Incompatible dimensions for X and X_norm_squared\")\n if XX.dtype == np.float32:\n XX = None\n elif X.dtype == np.float32:\n XX = None\n else:\n XX = row_norms(X, squared=True)[:, np.newaxis]\n\n if X is Y and XX is not None:\n # shortcut in the common case euclidean_distances(X, X)\n YY = XX.T\n elif Y_norm_squared is not None:\n YY = np.atleast_2d(Y_norm_squared)\n\n if YY.shape != (1, Y.shape[0]):\n raise ValueError(\n \"Incompatible dimensions for Y and Y_norm_squared\")\n if YY.dtype == np.float32:\n YY = None\n elif Y.dtype == np.float32:\n YY = None\n else:\n YY = row_norms(Y, squared=True)[np.newaxis, :]\n\n if X.dtype == np.float32:\n # To minimize precision issues with float32, we compute the distance\n # matrix on chunks of X and Y upcast to float64\n distances = _euclidean_distances_upcast(X, XX, Y, YY)\n else:\n # if dtype is already float64, no need to chunk and upcast\n distances = - 2 * safe_sparse_dot(X, Y.T, dense_output=True)\n distances += XX\n distances += YY\n np.maximum(distances, 0, out=distances)\n\n # Ensure that distances between vectors and themselves are set to 0.0.\n # This may not be the case due to floating point rounding errors.\n if X is Y:\n np.fill_diagonal(distances, 0)\n\n return distances if squared else np.sqrt(distances, out=distances)" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/haversine_distances", + "name": "haversine_distances", + "qname": "sklearn.metrics.pairwise.haversine_distances", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/haversine_distances/X", + "name": "X", + "qname": "sklearn.metrics.pairwise.haversine_distances.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_X, 2)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples_X, 2)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/haversine_distances/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise.haversine_distances.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_Y, 2)", + "default_value": "None", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples_Y, 2)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Compute the Haversine distance between samples in X and Y.\n\nThe Haversine (or great circle) distance is the angular distance between\ntwo points on the surface of a sphere. The first coordinate of each point\nis assumed to be the latitude, the second is the longitude, given\nin radians. The dimension of the data must be 2.\n\n.. math::\n D(x, y) = 2\\arcsin[\\sqrt{\\sin^2((x1 - y1) / 2)\n + \\cos(x1)\\cos(y1)\\sin^2((x2 - y2) / 2)}]", + "docstring": "Compute the Haversine distance between samples in X and Y.\n\nThe Haversine (or great circle) distance is the angular distance between\ntwo points on the surface of a sphere. The first coordinate of each point\nis assumed to be the latitude, the second is the longitude, given\nin radians. The dimension of the data must be 2.\n\n.. math::\n D(x, y) = 2\\arcsin[\\sqrt{\\sin^2((x1 - y1) / 2)\n + \\cos(x1)\\cos(y1)\\sin^2((x2 - y2) / 2)}]\n\nParameters\n----------\nX : array-like of shape (n_samples_X, 2)\n\nY : array-like of shape (n_samples_Y, 2), default=None\n\nReturns\n-------\ndistance : ndarray of shape (n_samples_X, n_samples_Y)\n\nNotes\n-----\nAs the Earth is nearly spherical, the haversine formula provides a good\napproximation of the distance between two points of the Earth surface, with\na less than 1% error on average.\n\nExamples\n--------\nWe want to calculate the distance between the Ezeiza Airport\n(Buenos Aires, Argentina) and the Charles de Gaulle Airport (Paris,\nFrance).\n\n>>> from sklearn.metrics.pairwise import haversine_distances\n>>> from math import radians\n>>> bsas = [-34.83333, -58.5166646]\n>>> paris = [49.0083899664, 2.53844117956]\n>>> bsas_in_radians = [radians(_) for _ in bsas]\n>>> paris_in_radians = [radians(_) for _ in paris]\n>>> result = haversine_distances([bsas_in_radians, paris_in_radians])\n>>> result * 6371000/1000 # multiply by Earth radius to get kilometers\narray([[ 0. , 11099.54035582],\n [11099.54035582, 0. ]])", + "code": "def haversine_distances(X, Y=None):\n \"\"\"Compute the Haversine distance between samples in X and Y.\n\n The Haversine (or great circle) distance is the angular distance between\n two points on the surface of a sphere. The first coordinate of each point\n is assumed to be the latitude, the second is the longitude, given\n in radians. The dimension of the data must be 2.\n\n .. math::\n D(x, y) = 2\\\\arcsin[\\\\sqrt{\\\\sin^2((x1 - y1) / 2)\n + \\\\cos(x1)\\\\cos(y1)\\\\sin^2((x2 - y2) / 2)}]\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, 2)\n\n Y : array-like of shape (n_samples_Y, 2), default=None\n\n Returns\n -------\n distance : ndarray of shape (n_samples_X, n_samples_Y)\n\n Notes\n -----\n As the Earth is nearly spherical, the haversine formula provides a good\n approximation of the distance between two points of the Earth surface, with\n a less than 1% error on average.\n\n Examples\n --------\n We want to calculate the distance between the Ezeiza Airport\n (Buenos Aires, Argentina) and the Charles de Gaulle Airport (Paris,\n France).\n\n >>> from sklearn.metrics.pairwise import haversine_distances\n >>> from math import radians\n >>> bsas = [-34.83333, -58.5166646]\n >>> paris = [49.0083899664, 2.53844117956]\n >>> bsas_in_radians = [radians(_) for _ in bsas]\n >>> paris_in_radians = [radians(_) for _ in paris]\n >>> result = haversine_distances([bsas_in_radians, paris_in_radians])\n >>> result * 6371000/1000 # multiply by Earth radius to get kilometers\n array([[ 0. , 11099.54035582],\n [11099.54035582, 0. ]])\n \"\"\"\n from sklearn.neighbors import DistanceMetric\n return DistanceMetric.get_metric('haversine').pairwise(X, Y)" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/kernel_metrics", + "name": "kernel_metrics", + "qname": "sklearn.metrics.pairwise.kernel_metrics", + "decorators": [], + "parameters": [], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Valid metrics for pairwise_kernels.\n\nThis function simply returns the valid pairwise distance metrics.\nIt exists, however, to allow for a verbose description of the mapping for\neach of the valid strings.\n\nThe valid distance metrics, and the function they map to, are:\n =============== ========================================\n metric Function\n =============== ========================================\n 'additive_chi2' sklearn.pairwise.additive_chi2_kernel\n 'chi2' sklearn.pairwise.chi2_kernel\n 'linear' sklearn.pairwise.linear_kernel\n 'poly' sklearn.pairwise.polynomial_kernel\n 'polynomial' sklearn.pairwise.polynomial_kernel\n 'rbf' sklearn.pairwise.rbf_kernel\n 'laplacian' sklearn.pairwise.laplacian_kernel\n 'sigmoid' sklearn.pairwise.sigmoid_kernel\n 'cosine' sklearn.pairwise.cosine_similarity\n =============== ========================================\n\nRead more in the :ref:`User Guide `.", + "docstring": "Valid metrics for pairwise_kernels.\n\nThis function simply returns the valid pairwise distance metrics.\nIt exists, however, to allow for a verbose description of the mapping for\neach of the valid strings.\n\nThe valid distance metrics, and the function they map to, are:\n =============== ========================================\n metric Function\n =============== ========================================\n 'additive_chi2' sklearn.pairwise.additive_chi2_kernel\n 'chi2' sklearn.pairwise.chi2_kernel\n 'linear' sklearn.pairwise.linear_kernel\n 'poly' sklearn.pairwise.polynomial_kernel\n 'polynomial' sklearn.pairwise.polynomial_kernel\n 'rbf' sklearn.pairwise.rbf_kernel\n 'laplacian' sklearn.pairwise.laplacian_kernel\n 'sigmoid' sklearn.pairwise.sigmoid_kernel\n 'cosine' sklearn.pairwise.cosine_similarity\n =============== ========================================\n\nRead more in the :ref:`User Guide `.", + "code": "def kernel_metrics():\n \"\"\"Valid metrics for pairwise_kernels.\n\n This function simply returns the valid pairwise distance metrics.\n It exists, however, to allow for a verbose description of the mapping for\n each of the valid strings.\n\n The valid distance metrics, and the function they map to, are:\n =============== ========================================\n metric Function\n =============== ========================================\n 'additive_chi2' sklearn.pairwise.additive_chi2_kernel\n 'chi2' sklearn.pairwise.chi2_kernel\n 'linear' sklearn.pairwise.linear_kernel\n 'poly' sklearn.pairwise.polynomial_kernel\n 'polynomial' sklearn.pairwise.polynomial_kernel\n 'rbf' sklearn.pairwise.rbf_kernel\n 'laplacian' sklearn.pairwise.laplacian_kernel\n 'sigmoid' sklearn.pairwise.sigmoid_kernel\n 'cosine' sklearn.pairwise.cosine_similarity\n =============== ========================================\n\n Read more in the :ref:`User Guide `.\n \"\"\"\n return PAIRWISE_KERNEL_FUNCTIONS" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/laplacian_kernel", + "name": "laplacian_kernel", + "qname": "sklearn.metrics.pairwise.laplacian_kernel", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/laplacian_kernel/X", + "name": "X", + "qname": "sklearn.metrics.pairwise.laplacian_kernel.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_X, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_X, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/laplacian_kernel/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise.laplacian_kernel.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_Y, n_features)", + "default_value": "None", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_Y, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/laplacian_kernel/gamma", + "name": "gamma", + "qname": "sklearn.metrics.pairwise.laplacian_kernel.gamma", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "None", + "description": "If None, defaults to 1.0 / n_features." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Compute the laplacian kernel between X and Y.\n\nThe laplacian kernel is defined as::\n\n K(x, y) = exp(-gamma ||x-y||_1)\n\nfor each pair of rows x in X and y in Y.\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.17", + "docstring": "Compute the laplacian kernel between X and Y.\n\nThe laplacian kernel is defined as::\n\n K(x, y) = exp(-gamma ||x-y||_1)\n\nfor each pair of rows x in X and y in Y.\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.17\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n\ngamma : float, default=None\n If None, defaults to 1.0 / n_features.\n\nReturns\n-------\nkernel_matrix : ndarray of shape (n_samples_X, n_samples_Y)", + "code": "def laplacian_kernel(X, Y=None, gamma=None):\n \"\"\"Compute the laplacian kernel between X and Y.\n\n The laplacian kernel is defined as::\n\n K(x, y) = exp(-gamma ||x-y||_1)\n\n for each pair of rows x in X and y in Y.\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.17\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_features)\n\n Y : ndarray of shape (n_samples_Y, n_features), default=None\n\n gamma : float, default=None\n If None, defaults to 1.0 / n_features.\n\n Returns\n -------\n kernel_matrix : ndarray of shape (n_samples_X, n_samples_Y)\n \"\"\"\n X, Y = check_pairwise_arrays(X, Y)\n if gamma is None:\n gamma = 1.0 / X.shape[1]\n\n K = -gamma * manhattan_distances(X, Y)\n np.exp(K, K) # exponentiate K in-place\n return K" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/linear_kernel", + "name": "linear_kernel", + "qname": "sklearn.metrics.pairwise.linear_kernel", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/linear_kernel/X", + "name": "X", + "qname": "sklearn.metrics.pairwise.linear_kernel.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_X, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_X, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/linear_kernel/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise.linear_kernel.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_Y, n_features)", + "default_value": "None", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_Y, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/linear_kernel/dense_output", + "name": "dense_output", + "qname": "sklearn.metrics.pairwise.linear_kernel.dense_output", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to return dense output even when the input is sparse. If\n``False``, the output is sparse if both input arrays are sparse.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Compute the linear kernel between X and Y.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute the linear kernel between X and Y.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n\ndense_output : bool, default=True\n Whether to return dense output even when the input is sparse. If\n ``False``, the output is sparse if both input arrays are sparse.\n\n .. versionadded:: 0.20\n\nReturns\n-------\nGram matrix : ndarray of shape (n_samples_X, n_samples_Y)", + "code": "def linear_kernel(X, Y=None, dense_output=True):\n \"\"\"\n Compute the linear kernel between X and Y.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_features)\n\n Y : ndarray of shape (n_samples_Y, n_features), default=None\n\n dense_output : bool, default=True\n Whether to return dense output even when the input is sparse. If\n ``False``, the output is sparse if both input arrays are sparse.\n\n .. versionadded:: 0.20\n\n Returns\n -------\n Gram matrix : ndarray of shape (n_samples_X, n_samples_Y)\n \"\"\"\n X, Y = check_pairwise_arrays(X, Y)\n return safe_sparse_dot(X, Y.T, dense_output=dense_output)" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/manhattan_distances", + "name": "manhattan_distances", + "qname": "sklearn.metrics.pairwise.manhattan_distances", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/manhattan_distances/X", + "name": "X", + "qname": "sklearn.metrics.pairwise.manhattan_distances.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_X, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples_X, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/manhattan_distances/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise.manhattan_distances.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_Y, n_features)", + "default_value": "None", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples_Y, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/manhattan_distances/sum_over_features", + "name": "sum_over_features", + "qname": "sklearn.metrics.pairwise.manhattan_distances.sum_over_features", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True the function returns the pairwise distance matrix\nelse it returns the componentwise L1 pairwise-distances.\nNot supported for sparse matrix inputs." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Compute the L1 distances between the vectors in X and Y.\n\nWith sum_over_features equal to False it returns the componentwise\ndistances.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute the L1 distances between the vectors in X and Y.\n\nWith sum_over_features equal to False it returns the componentwise\ndistances.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features)\n\nY : array-like of shape (n_samples_Y, n_features), default=None\n\nsum_over_features : bool, default=True\n If True the function returns the pairwise distance matrix\n else it returns the componentwise L1 pairwise-distances.\n Not supported for sparse matrix inputs.\n\nReturns\n-------\nD : ndarray of shape (n_samples_X * n_samples_Y, n_features) or (n_samples_X, n_samples_Y)\n If sum_over_features is False shape is\n (n_samples_X * n_samples_Y, n_features) and D contains the\n componentwise L1 pairwise-distances (ie. absolute difference),\n else shape is (n_samples_X, n_samples_Y) and D contains\n the pairwise L1 distances.\n\nNotes\n--------\nWhen X and/or Y are CSR sparse matrices and they are not already\nin canonical format, this function modifies them in-place to\nmake them canonical.\n\nExamples\n--------\n>>> from sklearn.metrics.pairwise import manhattan_distances\n>>> manhattan_distances([[3]], [[3]])\narray([[0.]])\n>>> manhattan_distances([[3]], [[2]])\narray([[1.]])\n>>> manhattan_distances([[2]], [[3]])\narray([[1.]])\n>>> manhattan_distances([[1, 2], [3, 4]], [[1, 2], [0, 3]])\narray([[0., 2.],\n [4., 4.]])\n>>> import numpy as np\n>>> X = np.ones((1, 2))\n>>> y = np.full((2, 2), 2.)\n>>> manhattan_distances(X, y, sum_over_features=False)\narray([[1., 1.],\n [1., 1.]])", + "code": "@_deprecate_positional_args\ndef manhattan_distances(X, Y=None, *, sum_over_features=True):\n \"\"\"Compute the L1 distances between the vectors in X and Y.\n\n With sum_over_features equal to False it returns the componentwise\n distances.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features)\n\n Y : array-like of shape (n_samples_Y, n_features), default=None\n\n sum_over_features : bool, default=True\n If True the function returns the pairwise distance matrix\n else it returns the componentwise L1 pairwise-distances.\n Not supported for sparse matrix inputs.\n\n Returns\n -------\n D : ndarray of shape (n_samples_X * n_samples_Y, n_features) or \\\n (n_samples_X, n_samples_Y)\n If sum_over_features is False shape is\n (n_samples_X * n_samples_Y, n_features) and D contains the\n componentwise L1 pairwise-distances (ie. absolute difference),\n else shape is (n_samples_X, n_samples_Y) and D contains\n the pairwise L1 distances.\n\n Notes\n --------\n When X and/or Y are CSR sparse matrices and they are not already\n in canonical format, this function modifies them in-place to\n make them canonical.\n\n Examples\n --------\n >>> from sklearn.metrics.pairwise import manhattan_distances\n >>> manhattan_distances([[3]], [[3]])\n array([[0.]])\n >>> manhattan_distances([[3]], [[2]])\n array([[1.]])\n >>> manhattan_distances([[2]], [[3]])\n array([[1.]])\n >>> manhattan_distances([[1, 2], [3, 4]],\\\n [[1, 2], [0, 3]])\n array([[0., 2.],\n [4., 4.]])\n >>> import numpy as np\n >>> X = np.ones((1, 2))\n >>> y = np.full((2, 2), 2.)\n >>> manhattan_distances(X, y, sum_over_features=False)\n array([[1., 1.],\n [1., 1.]])\n \"\"\"\n X, Y = check_pairwise_arrays(X, Y)\n\n if issparse(X) or issparse(Y):\n if not sum_over_features:\n raise TypeError(\"sum_over_features=%r not supported\"\n \" for sparse matrices\" % sum_over_features)\n\n X = csr_matrix(X, copy=False)\n Y = csr_matrix(Y, copy=False)\n X.sum_duplicates() # this also sorts indices in-place\n Y.sum_duplicates()\n D = np.zeros((X.shape[0], Y.shape[0]))\n _sparse_manhattan(X.data, X.indices, X.indptr,\n Y.data, Y.indices, Y.indptr,\n D)\n return D\n\n if sum_over_features:\n return distance.cdist(X, Y, 'cityblock')\n\n D = X[:, np.newaxis, :] - Y[np.newaxis, :, :]\n D = np.abs(D, D)\n return D.reshape((-1, X.shape[1]))" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/nan_euclidean_distances", + "name": "nan_euclidean_distances", + "qname": "sklearn.metrics.pairwise.nan_euclidean_distances", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/nan_euclidean_distances/X", + "name": "X", + "qname": "sklearn.metrics.pairwise.nan_euclidean_distances.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape=(n_samples_X, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape=(n_samples_X, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/nan_euclidean_distances/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise.nan_euclidean_distances.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape=(n_samples_Y, n_features)", + "default_value": "None", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape=(n_samples_Y, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/nan_euclidean_distances/squared", + "name": "squared", + "qname": "sklearn.metrics.pairwise.nan_euclidean_distances.squared", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Return squared Euclidean distances." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/nan_euclidean_distances/missing_values", + "name": "missing_values", + "qname": "sklearn.metrics.pairwise.nan_euclidean_distances.missing_values", + "default_value": "np.nan", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "np.nan or int", + "default_value": "np.nan", + "description": "Representation of missing value." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "np.nan" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/nan_euclidean_distances/copy", + "name": "copy", + "qname": "sklearn.metrics.pairwise.nan_euclidean_distances.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Make and use a deep copy of X and Y (if Y exists)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Calculate the euclidean distances in the presence of missing values.\n\nCompute the euclidean distance between each pair of samples in X and Y,\nwhere Y=X is assumed if Y=None. When calculating the distance between a\npair of samples, this formulation ignores feature coordinates with a\nmissing value in either sample and scales up the weight of the remaining\ncoordinates:\n\n dist(x,y) = sqrt(weight * sq. distance from present coordinates)\n where,\n weight = Total # of coordinates / # of present coordinates\n\nFor example, the distance between ``[3, na, na, 6]`` and ``[1, na, 4, 5]``\nis:\n\n .. math::\n \\sqrt{\\frac{4}{2}((3-1)^2 + (6-5)^2)}\n\nIf all the coordinates are missing or if there are no common present\ncoordinates then NaN is returned for that pair.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22", + "docstring": "Calculate the euclidean distances in the presence of missing values.\n\nCompute the euclidean distance between each pair of samples in X and Y,\nwhere Y=X is assumed if Y=None. When calculating the distance between a\npair of samples, this formulation ignores feature coordinates with a\nmissing value in either sample and scales up the weight of the remaining\ncoordinates:\n\n dist(x,y) = sqrt(weight * sq. distance from present coordinates)\n where,\n weight = Total # of coordinates / # of present coordinates\n\nFor example, the distance between ``[3, na, na, 6]`` and ``[1, na, 4, 5]``\nis:\n\n .. math::\n \\sqrt{\\frac{4}{2}((3-1)^2 + (6-5)^2)}\n\nIf all the coordinates are missing or if there are no common present\ncoordinates then NaN is returned for that pair.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nX : array-like of shape=(n_samples_X, n_features)\n\nY : array-like of shape=(n_samples_Y, n_features), default=None\n\nsquared : bool, default=False\n Return squared Euclidean distances.\n\nmissing_values : np.nan or int, default=np.nan\n Representation of missing value.\n\ncopy : bool, default=True\n Make and use a deep copy of X and Y (if Y exists).\n\nReturns\n-------\ndistances : ndarray of shape (n_samples_X, n_samples_Y)\n\nSee Also\n--------\npaired_distances : Distances between pairs of elements of X and Y.\n\nExamples\n--------\n>>> from sklearn.metrics.pairwise import nan_euclidean_distances\n>>> nan = float(\"NaN\")\n>>> X = [[0, 1], [1, nan]]\n>>> nan_euclidean_distances(X, X) # distance between rows of X\narray([[0. , 1.41421356],\n [1.41421356, 0. ]])\n\n>>> # get distance to origin\n>>> nan_euclidean_distances(X, [[0, 0]])\narray([[1. ],\n [1.41421356]])\n\nReferences\n----------\n* John K. Dixon, \"Pattern Recognition with Partly Missing Data\",\n IEEE Transactions on Systems, Man, and Cybernetics, Volume: 9, Issue:\n 10, pp. 617 - 621, Oct. 1979.\n http://ieeexplore.ieee.org/abstract/document/4310090/", + "code": "@_deprecate_positional_args\ndef nan_euclidean_distances(X, Y=None, *, squared=False,\n missing_values=np.nan, copy=True):\n \"\"\"Calculate the euclidean distances in the presence of missing values.\n\n Compute the euclidean distance between each pair of samples in X and Y,\n where Y=X is assumed if Y=None. When calculating the distance between a\n pair of samples, this formulation ignores feature coordinates with a\n missing value in either sample and scales up the weight of the remaining\n coordinates:\n\n dist(x,y) = sqrt(weight * sq. distance from present coordinates)\n where,\n weight = Total # of coordinates / # of present coordinates\n\n For example, the distance between ``[3, na, na, 6]`` and ``[1, na, 4, 5]``\n is:\n\n .. math::\n \\\\sqrt{\\\\frac{4}{2}((3-1)^2 + (6-5)^2)}\n\n If all the coordinates are missing or if there are no common present\n coordinates then NaN is returned for that pair.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.22\n\n Parameters\n ----------\n X : array-like of shape=(n_samples_X, n_features)\n\n Y : array-like of shape=(n_samples_Y, n_features), default=None\n\n squared : bool, default=False\n Return squared Euclidean distances.\n\n missing_values : np.nan or int, default=np.nan\n Representation of missing value.\n\n copy : bool, default=True\n Make and use a deep copy of X and Y (if Y exists).\n\n Returns\n -------\n distances : ndarray of shape (n_samples_X, n_samples_Y)\n\n See Also\n --------\n paired_distances : Distances between pairs of elements of X and Y.\n\n Examples\n --------\n >>> from sklearn.metrics.pairwise import nan_euclidean_distances\n >>> nan = float(\"NaN\")\n >>> X = [[0, 1], [1, nan]]\n >>> nan_euclidean_distances(X, X) # distance between rows of X\n array([[0. , 1.41421356],\n [1.41421356, 0. ]])\n\n >>> # get distance to origin\n >>> nan_euclidean_distances(X, [[0, 0]])\n array([[1. ],\n [1.41421356]])\n\n References\n ----------\n * John K. Dixon, \"Pattern Recognition with Partly Missing Data\",\n IEEE Transactions on Systems, Man, and Cybernetics, Volume: 9, Issue:\n 10, pp. 617 - 621, Oct. 1979.\n http://ieeexplore.ieee.org/abstract/document/4310090/\n \"\"\"\n\n force_all_finite = 'allow-nan' if is_scalar_nan(missing_values) else True\n X, Y = check_pairwise_arrays(X, Y, accept_sparse=False,\n force_all_finite=force_all_finite, copy=copy)\n # Get missing mask for X\n missing_X = _get_mask(X, missing_values)\n\n # Get missing mask for Y\n missing_Y = missing_X if Y is X else _get_mask(Y, missing_values)\n\n # set missing values to zero\n X[missing_X] = 0\n Y[missing_Y] = 0\n\n distances = euclidean_distances(X, Y, squared=True)\n\n # Adjust distances for missing values\n XX = X * X\n YY = Y * Y\n distances -= np.dot(XX, missing_Y.T)\n distances -= np.dot(missing_X, YY.T)\n\n np.clip(distances, 0, None, out=distances)\n\n if X is Y:\n # Ensure that distances between vectors and themselves are set to 0.0.\n # This may not be the case due to floating point rounding errors.\n np.fill_diagonal(distances, 0.0)\n\n present_X = 1 - missing_X\n present_Y = present_X if Y is X else ~missing_Y\n present_count = np.dot(present_X, present_Y.T)\n distances[present_count == 0] = np.nan\n # avoid divide by zero\n np.maximum(1, present_count, out=present_count)\n distances /= present_count\n distances *= X.shape[1]\n\n if not squared:\n np.sqrt(distances, out=distances)\n\n return distances" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/paired_cosine_distances", + "name": "paired_cosine_distances", + "qname": "sklearn.metrics.pairwise.paired_cosine_distances", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/paired_cosine_distances/X", + "name": "X", + "qname": "sklearn.metrics.pairwise.paired_cosine_distances.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/paired_cosine_distances/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise.paired_cosine_distances.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Computes the paired cosine distances between X and Y.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Computes the paired cosine distances between X and Y.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nY : array-like of shape (n_samples, n_features)\n\nReturns\n-------\ndistances : ndarray of shape (n_samples,)\n\nNotes\n-----\nThe cosine distance is equivalent to the half the squared\neuclidean distance if each sample is normalized to unit norm.", + "code": "def paired_cosine_distances(X, Y):\n \"\"\"\n Computes the paired cosine distances between X and Y.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Y : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n distances : ndarray of shape (n_samples,)\n\n Notes\n -----\n The cosine distance is equivalent to the half the squared\n euclidean distance if each sample is normalized to unit norm.\n \"\"\"\n X, Y = check_paired_arrays(X, Y)\n return .5 * row_norms(normalize(X) - normalize(Y), squared=True)" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/paired_distances", + "name": "paired_distances", + "qname": "sklearn.metrics.pairwise.paired_distances", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/paired_distances/X", + "name": "X", + "qname": "sklearn.metrics.pairwise.paired_distances.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Array 1 for distance computation." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/paired_distances/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise.paired_distances.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Array 2 for distance computation." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/paired_distances/metric", + "name": "metric", + "qname": "sklearn.metrics.pairwise.paired_distances.metric", + "default_value": "'euclidean'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "str or callable", + "default_value": "\"euclidean\"", + "description": "The metric to use when calculating distance between instances in a\nfeature array. If metric is a string, it must be one of the options\nspecified in PAIRED_DISTANCES, including \"euclidean\",\n\"manhattan\", or \"cosine\".\nAlternatively, if metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays from X as input and return a value indicating\nthe distance between them." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/paired_distances/kwds", + "name": "kwds", + "qname": "sklearn.metrics.pairwise.paired_distances.kwds", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Computes the paired distances between X and Y.\n\nComputes the distances between (X[0], Y[0]), (X[1], Y[1]), etc...\n\nRead more in the :ref:`User Guide `.", + "docstring": "Computes the paired distances between X and Y.\n\nComputes the distances between (X[0], Y[0]), (X[1], Y[1]), etc...\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Array 1 for distance computation.\n\nY : ndarray of shape (n_samples, n_features)\n Array 2 for distance computation.\n\nmetric : str or callable, default=\"euclidean\"\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string, it must be one of the options\n specified in PAIRED_DISTANCES, including \"euclidean\",\n \"manhattan\", or \"cosine\".\n Alternatively, if metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays from X as input and return a value indicating\n the distance between them.\n\nReturns\n-------\ndistances : ndarray of shape (n_samples,)\n\nSee Also\n--------\npairwise_distances : Computes the distance between every pair of samples.\n\nExamples\n--------\n>>> from sklearn.metrics.pairwise import paired_distances\n>>> X = [[0, 1], [1, 1]]\n>>> Y = [[0, 1], [2, 1]]\n>>> paired_distances(X, Y)\narray([0., 1.])", + "code": "@_deprecate_positional_args\ndef paired_distances(X, Y, *, metric=\"euclidean\", **kwds):\n \"\"\"\n Computes the paired distances between X and Y.\n\n Computes the distances between (X[0], Y[0]), (X[1], Y[1]), etc...\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Array 1 for distance computation.\n\n Y : ndarray of shape (n_samples, n_features)\n Array 2 for distance computation.\n\n metric : str or callable, default=\"euclidean\"\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string, it must be one of the options\n specified in PAIRED_DISTANCES, including \"euclidean\",\n \"manhattan\", or \"cosine\".\n Alternatively, if metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays from X as input and return a value indicating\n the distance between them.\n\n Returns\n -------\n distances : ndarray of shape (n_samples,)\n\n See Also\n --------\n pairwise_distances : Computes the distance between every pair of samples.\n\n Examples\n --------\n >>> from sklearn.metrics.pairwise import paired_distances\n >>> X = [[0, 1], [1, 1]]\n >>> Y = [[0, 1], [2, 1]]\n >>> paired_distances(X, Y)\n array([0., 1.])\n \"\"\"\n\n if metric in PAIRED_DISTANCES:\n func = PAIRED_DISTANCES[metric]\n return func(X, Y)\n elif callable(metric):\n # Check the matrix first (it is usually done by the metric)\n X, Y = check_paired_arrays(X, Y)\n distances = np.zeros(len(X))\n for i in range(len(X)):\n distances[i] = metric(X[i], Y[i])\n return distances\n else:\n raise ValueError('Unknown distance %s' % metric)" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/paired_euclidean_distances", + "name": "paired_euclidean_distances", + "qname": "sklearn.metrics.pairwise.paired_euclidean_distances", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/paired_euclidean_distances/X", + "name": "X", + "qname": "sklearn.metrics.pairwise.paired_euclidean_distances.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/paired_euclidean_distances/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise.paired_euclidean_distances.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Computes the paired euclidean distances between X and Y.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Computes the paired euclidean distances between X and Y.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nY : array-like of shape (n_samples, n_features)\n\nReturns\n-------\ndistances : ndarray of shape (n_samples,)", + "code": "def paired_euclidean_distances(X, Y):\n \"\"\"\n Computes the paired euclidean distances between X and Y.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Y : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n distances : ndarray of shape (n_samples,)\n \"\"\"\n X, Y = check_paired_arrays(X, Y)\n return row_norms(X - Y)" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/paired_manhattan_distances", + "name": "paired_manhattan_distances", + "qname": "sklearn.metrics.pairwise.paired_manhattan_distances", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/paired_manhattan_distances/X", + "name": "X", + "qname": "sklearn.metrics.pairwise.paired_manhattan_distances.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/paired_manhattan_distances/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise.paired_manhattan_distances.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Compute the L1 distances between the vectors in X and Y.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute the L1 distances between the vectors in X and Y.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nY : array-like of shape (n_samples, n_features)\n\nReturns\n-------\ndistances : ndarray of shape (n_samples,)", + "code": "def paired_manhattan_distances(X, Y):\n \"\"\"Compute the L1 distances between the vectors in X and Y.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Y : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n distances : ndarray of shape (n_samples,)\n \"\"\"\n X, Y = check_paired_arrays(X, Y)\n diff = X - Y\n if issparse(diff):\n diff.data = np.abs(diff.data)\n return np.squeeze(np.array(diff.sum(axis=1)))\n else:\n return np.abs(diff).sum(axis=-1)" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances", + "name": "pairwise_distances", + "qname": "sklearn.metrics.pairwise.pairwise_distances", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances/X", + "name": "X", + "qname": "sklearn.metrics.pairwise.pairwise_distances.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_X, n_samples_X) or (n_samples_X, n_features)", + "default_value": "", + "description": "Array of pairwise distances between samples, or a feature array.\nThe shape of the array should be (n_samples_X, n_samples_X) if\nmetric == \"precomputed\" and (n_samples_X, n_features) otherwise." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_X, n_samples_X) or (n_samples_X, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise.pairwise_distances.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_Y, n_features)", + "default_value": "None", + "description": "An optional second feature array. Only allowed if\nmetric != \"precomputed\"." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_Y, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances/metric", + "name": "metric", + "qname": "sklearn.metrics.pairwise.pairwise_distances.metric", + "default_value": "'euclidean'", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "str or callable", + "default_value": "'euclidean'", + "description": "The metric to use when calculating distance between instances in a\nfeature array. If metric is a string, it must be one of the options\nallowed by scipy.spatial.distance.pdist for its metric parameter, or\na metric listed in ``pairwise.PAIRWISE_DISTANCE_FUNCTIONS``.\nIf metric is \"precomputed\", X is assumed to be a distance matrix.\nAlternatively, if metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays from X as input and return a value indicating\nthe distance between them." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances/n_jobs", + "name": "n_jobs", + "qname": "sklearn.metrics.pairwise.pairwise_distances.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to use for the computation. This works by breaking\ndown the pairwise matrix into n_jobs even slices and computing them in\nparallel.\n\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances/force_all_finite", + "name": "force_all_finite", + "qname": "sklearn.metrics.pairwise.pairwise_distances.force_all_finite", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool or 'allow-nan'", + "default_value": "True", + "description": "Whether to raise an error on np.inf, np.nan, pd.NA in array. Ignored\nfor a metric listed in ``pairwise.PAIRWISE_DISTANCE_FUNCTIONS``. The\npossibilities are:\n\n- True: Force all values of array to be finite.\n- False: accepts np.inf, np.nan, pd.NA in array.\n- 'allow-nan': accepts only np.nan and pd.NA values in array. Values\n cannot be infinite.\n\n.. versionadded:: 0.22\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n.. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "'allow-nan'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances/kwds", + "name": "kwds", + "qname": "sklearn.metrics.pairwise.pairwise_distances.kwds", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": true, + "docstring": { + "type": "optional keyword parameters", + "default_value": "", + "description": "Any further parameters are passed directly to the distance function.\nIf using a scipy.spatial.distance metric, the parameters are still\nmetric dependent. See the scipy docs for usage examples." + }, + "type": { + "kind": "NamedType", + "name": "optional keyword parameters" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Compute the distance matrix from a vector array X and optional Y.\n\nThis method takes either a vector array or a distance matrix, and returns\na distance matrix. If the input is a vector array, the distances are\ncomputed. If the input is a distances matrix, it is returned instead.\n\nThis method provides a safe way to take a distance matrix as input, while\npreserving compatibility with many other algorithms that take a vector\narray.\n\nIf Y is given (default is None), then the returned matrix is the pairwise\ndistance between the arrays from both X and Y.\n\nValid values for metric are:\n\n- From scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']. These metrics support sparse matrix\n inputs.\n ['nan_euclidean'] but it does not yet support sparse matrices.\n\n- From scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis',\n 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',\n 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule']\n See the documentation for scipy.spatial.distance for details on these\n metrics. These metrics do not support sparse matrix inputs.\n\nNote that in the case of 'cityblock', 'cosine' and 'euclidean' (which are\nvalid scipy.spatial.distance metrics), the scikit-learn implementation\nwill be used, which is faster and has support for sparse matrices (except\nfor 'cityblock'). For a verbose description of the metrics from\nscikit-learn, see the __doc__ of the sklearn.pairwise.distance_metrics\nfunction.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute the distance matrix from a vector array X and optional Y.\n\nThis method takes either a vector array or a distance matrix, and returns\na distance matrix. If the input is a vector array, the distances are\ncomputed. If the input is a distances matrix, it is returned instead.\n\nThis method provides a safe way to take a distance matrix as input, while\npreserving compatibility with many other algorithms that take a vector\narray.\n\nIf Y is given (default is None), then the returned matrix is the pairwise\ndistance between the arrays from both X and Y.\n\nValid values for metric are:\n\n- From scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']. These metrics support sparse matrix\n inputs.\n ['nan_euclidean'] but it does not yet support sparse matrices.\n\n- From scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis',\n 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',\n 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule']\n See the documentation for scipy.spatial.distance for details on these\n metrics. These metrics do not support sparse matrix inputs.\n\nNote that in the case of 'cityblock', 'cosine' and 'euclidean' (which are\nvalid scipy.spatial.distance metrics), the scikit-learn implementation\nwill be used, which is faster and has support for sparse matrices (except\nfor 'cityblock'). For a verbose description of the metrics from\nscikit-learn, see the __doc__ of the sklearn.pairwise.distance_metrics\nfunction.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_samples_X) or (n_samples_X, n_features)\n Array of pairwise distances between samples, or a feature array.\n The shape of the array should be (n_samples_X, n_samples_X) if\n metric == \"precomputed\" and (n_samples_X, n_features) otherwise.\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n An optional second feature array. Only allowed if\n metric != \"precomputed\".\n\nmetric : str or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string, it must be one of the options\n allowed by scipy.spatial.distance.pdist for its metric parameter, or\n a metric listed in ``pairwise.PAIRWISE_DISTANCE_FUNCTIONS``.\n If metric is \"precomputed\", X is assumed to be a distance matrix.\n Alternatively, if metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays from X as input and return a value indicating\n the distance between them.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the pairwise matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nforce_all_finite : bool or 'allow-nan', default=True\n Whether to raise an error on np.inf, np.nan, pd.NA in array. Ignored\n for a metric listed in ``pairwise.PAIRWISE_DISTANCE_FUNCTIONS``. The\n possibilities are:\n\n - True: Force all values of array to be finite.\n - False: accepts np.inf, np.nan, pd.NA in array.\n - 'allow-nan': accepts only np.nan and pd.NA values in array. Values\n cannot be infinite.\n\n .. versionadded:: 0.22\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n .. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`.\n\n**kwds : optional keyword parameters\n Any further parameters are passed directly to the distance function.\n If using a scipy.spatial.distance metric, the parameters are still\n metric dependent. See the scipy docs for usage examples.\n\nReturns\n-------\nD : ndarray of shape (n_samples_X, n_samples_X) or (n_samples_X, n_samples_Y)\n A distance matrix D such that D_{i, j} is the distance between the\n ith and jth vectors of the given matrix X, if Y is None.\n If Y is not None, then D_{i, j} is the distance between the ith array\n from X and the jth array from Y.\n\nSee Also\n--------\npairwise_distances_chunked : Performs the same calculation as this\n function, but returns a generator of chunks of the distance matrix, in\n order to limit memory usage.\npaired_distances : Computes the distances between corresponding elements\n of two arrays.", + "code": "@_deprecate_positional_args\ndef pairwise_distances(X, Y=None, metric=\"euclidean\", *, n_jobs=None,\n force_all_finite=True, **kwds):\n \"\"\"Compute the distance matrix from a vector array X and optional Y.\n\n This method takes either a vector array or a distance matrix, and returns\n a distance matrix. If the input is a vector array, the distances are\n computed. If the input is a distances matrix, it is returned instead.\n\n This method provides a safe way to take a distance matrix as input, while\n preserving compatibility with many other algorithms that take a vector\n array.\n\n If Y is given (default is None), then the returned matrix is the pairwise\n distance between the arrays from both X and Y.\n\n Valid values for metric are:\n\n - From scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']. These metrics support sparse matrix\n inputs.\n ['nan_euclidean'] but it does not yet support sparse matrices.\n\n - From scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis',\n 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',\n 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule']\n See the documentation for scipy.spatial.distance for details on these\n metrics. These metrics do not support sparse matrix inputs.\n\n Note that in the case of 'cityblock', 'cosine' and 'euclidean' (which are\n valid scipy.spatial.distance metrics), the scikit-learn implementation\n will be used, which is faster and has support for sparse matrices (except\n for 'cityblock'). For a verbose description of the metrics from\n scikit-learn, see the __doc__ of the sklearn.pairwise.distance_metrics\n function.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_samples_X) or \\\n (n_samples_X, n_features)\n Array of pairwise distances between samples, or a feature array.\n The shape of the array should be (n_samples_X, n_samples_X) if\n metric == \"precomputed\" and (n_samples_X, n_features) otherwise.\n\n Y : ndarray of shape (n_samples_Y, n_features), default=None\n An optional second feature array. Only allowed if\n metric != \"precomputed\".\n\n metric : str or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string, it must be one of the options\n allowed by scipy.spatial.distance.pdist for its metric parameter, or\n a metric listed in ``pairwise.PAIRWISE_DISTANCE_FUNCTIONS``.\n If metric is \"precomputed\", X is assumed to be a distance matrix.\n Alternatively, if metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays from X as input and return a value indicating\n the distance between them.\n\n n_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the pairwise matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n force_all_finite : bool or 'allow-nan', default=True\n Whether to raise an error on np.inf, np.nan, pd.NA in array. Ignored\n for a metric listed in ``pairwise.PAIRWISE_DISTANCE_FUNCTIONS``. The\n possibilities are:\n\n - True: Force all values of array to be finite.\n - False: accepts np.inf, np.nan, pd.NA in array.\n - 'allow-nan': accepts only np.nan and pd.NA values in array. Values\n cannot be infinite.\n\n .. versionadded:: 0.22\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n .. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`.\n\n **kwds : optional keyword parameters\n Any further parameters are passed directly to the distance function.\n If using a scipy.spatial.distance metric, the parameters are still\n metric dependent. See the scipy docs for usage examples.\n\n Returns\n -------\n D : ndarray of shape (n_samples_X, n_samples_X) or \\\n (n_samples_X, n_samples_Y)\n A distance matrix D such that D_{i, j} is the distance between the\n ith and jth vectors of the given matrix X, if Y is None.\n If Y is not None, then D_{i, j} is the distance between the ith array\n from X and the jth array from Y.\n\n See Also\n --------\n pairwise_distances_chunked : Performs the same calculation as this\n function, but returns a generator of chunks of the distance matrix, in\n order to limit memory usage.\n paired_distances : Computes the distances between corresponding elements\n of two arrays.\n \"\"\"\n if (metric not in _VALID_METRICS and\n not callable(metric) and metric != \"precomputed\"):\n raise ValueError(\"Unknown metric %s. \"\n \"Valid metrics are %s, or 'precomputed', or a \"\n \"callable\" % (metric, _VALID_METRICS))\n\n if metric == \"precomputed\":\n X, _ = check_pairwise_arrays(X, Y, precomputed=True,\n force_all_finite=force_all_finite)\n\n whom = (\"`pairwise_distances`. Precomputed distance \"\n \" need to have non-negative values.\")\n check_non_negative(X, whom=whom)\n return X\n elif metric in PAIRWISE_DISTANCE_FUNCTIONS:\n func = PAIRWISE_DISTANCE_FUNCTIONS[metric]\n elif callable(metric):\n func = partial(_pairwise_callable, metric=metric,\n force_all_finite=force_all_finite, **kwds)\n else:\n if issparse(X) or issparse(Y):\n raise TypeError(\"scipy distance metrics do not\"\n \" support sparse matrices.\")\n\n dtype = bool if metric in PAIRWISE_BOOLEAN_FUNCTIONS else None\n\n if (dtype == bool and\n (X.dtype != bool or (Y is not None and Y.dtype != bool))):\n msg = \"Data was converted to boolean for metric %s\" % metric\n warnings.warn(msg, DataConversionWarning)\n\n X, Y = check_pairwise_arrays(X, Y, dtype=dtype,\n force_all_finite=force_all_finite)\n\n # precompute data-derived metric params\n params = _precompute_metric_params(X, Y, metric=metric, **kwds)\n kwds.update(**params)\n\n if effective_n_jobs(n_jobs) == 1 and X is Y:\n return distance.squareform(distance.pdist(X, metric=metric,\n **kwds))\n func = partial(distance.cdist, metric=metric, **kwds)\n\n return _parallel_pairwise(X, Y, func, n_jobs, **kwds)" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances_argmin", + "name": "pairwise_distances_argmin", + "qname": "sklearn.metrics.pairwise.pairwise_distances_argmin", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances_argmin/X", + "name": "X", + "qname": "sklearn.metrics.pairwise.pairwise_distances_argmin.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_X, n_features)", + "default_value": "", + "description": "Array containing points." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples_X, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances_argmin/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise.pairwise_distances_argmin.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples_Y, n_features)", + "default_value": "", + "description": "Arrays containing points." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples_Y, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances_argmin/axis", + "name": "axis", + "qname": "sklearn.metrics.pairwise.pairwise_distances_argmin.axis", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "1", + "description": "Axis along which the argmin and distances are to be computed." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances_argmin/metric", + "name": "metric", + "qname": "sklearn.metrics.pairwise.pairwise_distances_argmin.metric", + "default_value": "'euclidean'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "str or callable", + "default_value": "\"euclidean\"", + "description": "Metric to use for distance computation. Any metric from scikit-learn\nor scipy.spatial.distance can be used.\n\nIf metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays as input and return one value indicating the\ndistance between them. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string.\n\nDistance matrices are not supported.\n\nValid values for metric are:\n\n- from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n- from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\nSee the documentation for scipy.spatial.distance for details on these\nmetrics." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances_argmin/metric_kwargs", + "name": "metric_kwargs", + "qname": "sklearn.metrics.pairwise.pairwise_distances_argmin.metric_kwargs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Keyword arguments to pass to specified metric function." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Compute minimum distances between one point and a set of points.\n\nThis function computes for each row in X, the index of the row of Y which\nis closest (according to the specified distance).\n\nThis is mostly equivalent to calling:\n\n pairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis)\n\nbut uses much less memory, and is faster for large arrays.\n\nThis function works with dense 2D arrays only.", + "docstring": "Compute minimum distances between one point and a set of points.\n\nThis function computes for each row in X, the index of the row of Y which\nis closest (according to the specified distance).\n\nThis is mostly equivalent to calling:\n\n pairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis)\n\nbut uses much less memory, and is faster for large arrays.\n\nThis function works with dense 2D arrays only.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features)\n Array containing points.\n\nY : array-like of shape (n_samples_Y, n_features)\n Arrays containing points.\n\naxis : int, default=1\n Axis along which the argmin and distances are to be computed.\n\nmetric : str or callable, default=\"euclidean\"\n Metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string.\n\n Distance matrices are not supported.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\nmetric_kwargs : dict, default=None\n Keyword arguments to pass to specified metric function.\n\nReturns\n-------\nargmin : numpy.ndarray\n Y[argmin[i], :] is the row in Y that is closest to X[i, :].\n\nSee Also\n--------\nsklearn.metrics.pairwise_distances\nsklearn.metrics.pairwise_distances_argmin_min", + "code": "@_deprecate_positional_args\ndef pairwise_distances_argmin(X, Y, *, axis=1, metric=\"euclidean\",\n metric_kwargs=None):\n \"\"\"Compute minimum distances between one point and a set of points.\n\n This function computes for each row in X, the index of the row of Y which\n is closest (according to the specified distance).\n\n This is mostly equivalent to calling:\n\n pairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis)\n\n but uses much less memory, and is faster for large arrays.\n\n This function works with dense 2D arrays only.\n\n Parameters\n ----------\n X : array-like of shape (n_samples_X, n_features)\n Array containing points.\n\n Y : array-like of shape (n_samples_Y, n_features)\n Arrays containing points.\n\n axis : int, default=1\n Axis along which the argmin and distances are to be computed.\n\n metric : str or callable, default=\"euclidean\"\n Metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string.\n\n Distance matrices are not supported.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\n metric_kwargs : dict, default=None\n Keyword arguments to pass to specified metric function.\n\n Returns\n -------\n argmin : numpy.ndarray\n Y[argmin[i], :] is the row in Y that is closest to X[i, :].\n\n See Also\n --------\n sklearn.metrics.pairwise_distances\n sklearn.metrics.pairwise_distances_argmin_min\n \"\"\"\n if metric_kwargs is None:\n metric_kwargs = {}\n\n return pairwise_distances_argmin_min(X, Y, axis=axis, metric=metric,\n metric_kwargs=metric_kwargs)[0]" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances_argmin_min", + "name": "pairwise_distances_argmin_min", + "qname": "sklearn.metrics.pairwise.pairwise_distances_argmin_min", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances_argmin_min/X", + "name": "X", + "qname": "sklearn.metrics.pairwise.pairwise_distances_argmin_min.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples_X, n_features)", + "default_value": "", + "description": "Array containing points." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples_X, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances_argmin_min/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise.pairwise_distances_argmin_min.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples_Y, n_features)", + "default_value": "", + "description": "Array containing points." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples_Y, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances_argmin_min/axis", + "name": "axis", + "qname": "sklearn.metrics.pairwise.pairwise_distances_argmin_min.axis", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "1", + "description": "Axis along which the argmin and distances are to be computed." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances_argmin_min/metric", + "name": "metric", + "qname": "sklearn.metrics.pairwise.pairwise_distances_argmin_min.metric", + "default_value": "'euclidean'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "str or callable", + "default_value": "'euclidean'", + "description": "Metric to use for distance computation. Any metric from scikit-learn\nor scipy.spatial.distance can be used.\n\nIf metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays as input and return one value indicating the\ndistance between them. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string.\n\nDistance matrices are not supported.\n\nValid values for metric are:\n\n- from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n- from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\nSee the documentation for scipy.spatial.distance for details on these\nmetrics." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances_argmin_min/metric_kwargs", + "name": "metric_kwargs", + "qname": "sklearn.metrics.pairwise.pairwise_distances_argmin_min.metric_kwargs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Keyword arguments to pass to specified metric function." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Compute minimum distances between one point and a set of points.\n\nThis function computes for each row in X, the index of the row of Y which\nis closest (according to the specified distance). The minimal distances are\nalso returned.\n\nThis is mostly equivalent to calling:\n\n (pairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis),\n pairwise_distances(X, Y=Y, metric=metric).min(axis=axis))\n\nbut uses much less memory, and is faster for large arrays.", + "docstring": "Compute minimum distances between one point and a set of points.\n\nThis function computes for each row in X, the index of the row of Y which\nis closest (according to the specified distance). The minimal distances are\nalso returned.\n\nThis is mostly equivalent to calling:\n\n (pairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis),\n pairwise_distances(X, Y=Y, metric=metric).min(axis=axis))\n\nbut uses much less memory, and is faster for large arrays.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n Array containing points.\n\nY : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n Array containing points.\n\naxis : int, default=1\n Axis along which the argmin and distances are to be computed.\n\nmetric : str or callable, default='euclidean'\n Metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string.\n\n Distance matrices are not supported.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\nmetric_kwargs : dict, default=None\n Keyword arguments to pass to specified metric function.\n\nReturns\n-------\nargmin : ndarray\n Y[argmin[i], :] is the row in Y that is closest to X[i, :].\n\ndistances : ndarray\n distances[i] is the distance between the i-th row in X and the\n argmin[i]-th row in Y.\n\nSee Also\n--------\nsklearn.metrics.pairwise_distances\nsklearn.metrics.pairwise_distances_argmin", + "code": "@_deprecate_positional_args\ndef pairwise_distances_argmin_min(X, Y, *, axis=1, metric=\"euclidean\",\n metric_kwargs=None):\n \"\"\"Compute minimum distances between one point and a set of points.\n\n This function computes for each row in X, the index of the row of Y which\n is closest (according to the specified distance). The minimal distances are\n also returned.\n\n This is mostly equivalent to calling:\n\n (pairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis),\n pairwise_distances(X, Y=Y, metric=metric).min(axis=axis))\n\n but uses much less memory, and is faster for large arrays.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n Array containing points.\n\n Y : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n Array containing points.\n\n axis : int, default=1\n Axis along which the argmin and distances are to be computed.\n\n metric : str or callable, default='euclidean'\n Metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string.\n\n Distance matrices are not supported.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\n metric_kwargs : dict, default=None\n Keyword arguments to pass to specified metric function.\n\n Returns\n -------\n argmin : ndarray\n Y[argmin[i], :] is the row in Y that is closest to X[i, :].\n\n distances : ndarray\n distances[i] is the distance between the i-th row in X and the\n argmin[i]-th row in Y.\n\n See Also\n --------\n sklearn.metrics.pairwise_distances\n sklearn.metrics.pairwise_distances_argmin\n \"\"\"\n X, Y = check_pairwise_arrays(X, Y)\n\n if metric_kwargs is None:\n metric_kwargs = {}\n\n if axis == 0:\n X, Y = Y, X\n\n indices, values = zip(*pairwise_distances_chunked(\n X, Y, reduce_func=_argmin_min_reduce, metric=metric,\n **metric_kwargs))\n indices = np.concatenate(indices)\n values = np.concatenate(values)\n\n return indices, values" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances_chunked", + "name": "pairwise_distances_chunked", + "qname": "sklearn.metrics.pairwise.pairwise_distances_chunked", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances_chunked/X", + "name": "X", + "qname": "sklearn.metrics.pairwise.pairwise_distances_chunked.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_X, n_samples_X) or (n_samples_X, n_features)", + "default_value": "", + "description": "Array of pairwise distances between samples, or a feature array.\nThe shape the array should be (n_samples_X, n_samples_X) if\nmetric='precomputed' and (n_samples_X, n_features) otherwise." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_X, n_samples_X) or (n_samples_X, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances_chunked/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise.pairwise_distances_chunked.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_Y, n_features)", + "default_value": "None", + "description": "An optional second feature array. Only allowed if\nmetric != \"precomputed\"." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_Y, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances_chunked/reduce_func", + "name": "reduce_func", + "qname": "sklearn.metrics.pairwise.pairwise_distances_chunked.reduce_func", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "callable", + "default_value": "None", + "description": "The function which is applied on each chunk of the distance matrix,\nreducing it to needed values. ``reduce_func(D_chunk, start)``\nis called repeatedly, where ``D_chunk`` is a contiguous vertical\nslice of the pairwise distance matrix, starting at row ``start``.\nIt should return one of: None; an array, a list, or a sparse matrix\nof length ``D_chunk.shape[0]``; or a tuple of such objects. Returning\nNone is useful for in-place operations, rather than reductions.\n\nIf None, pairwise_distances_chunked returns a generator of vertical\nchunks of the distance matrix." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances_chunked/metric", + "name": "metric", + "qname": "sklearn.metrics.pairwise.pairwise_distances_chunked.metric", + "default_value": "'euclidean'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "str or callable", + "default_value": "'euclidean'", + "description": "The metric to use when calculating distance between instances in a\nfeature array. If metric is a string, it must be one of the options\nallowed by scipy.spatial.distance.pdist for its metric parameter, or\na metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS.\nIf metric is \"precomputed\", X is assumed to be a distance matrix.\nAlternatively, if metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays from X as input and return a value indicating\nthe distance between them." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances_chunked/n_jobs", + "name": "n_jobs", + "qname": "sklearn.metrics.pairwise.pairwise_distances_chunked.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to use for the computation. This works by breaking\ndown the pairwise matrix into n_jobs even slices and computing them in\nparallel.\n\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances_chunked/working_memory", + "name": "working_memory", + "qname": "sklearn.metrics.pairwise.pairwise_distances_chunked.working_memory", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The sought maximum memory for temporary distance matrix chunks.\nWhen None (default), the value of\n``sklearn.get_config()['working_memory']`` is used." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_distances_chunked/kwds", + "name": "kwds", + "qname": "sklearn.metrics.pairwise.pairwise_distances_chunked.kwds", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Generate a distance matrix chunk by chunk with optional reduction.\n\nIn cases where not all of a pairwise distance matrix needs to be stored at\nonce, this is used to calculate pairwise distances in\n``working_memory``-sized chunks. If ``reduce_func`` is given, it is run\non each chunk and its return values are concatenated into lists, arrays\nor sparse matrices.", + "docstring": "Generate a distance matrix chunk by chunk with optional reduction.\n\nIn cases where not all of a pairwise distance matrix needs to be stored at\nonce, this is used to calculate pairwise distances in\n``working_memory``-sized chunks. If ``reduce_func`` is given, it is run\non each chunk and its return values are concatenated into lists, arrays\nor sparse matrices.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_samples_X) or (n_samples_X, n_features)\n Array of pairwise distances between samples, or a feature array.\n The shape the array should be (n_samples_X, n_samples_X) if\n metric='precomputed' and (n_samples_X, n_features) otherwise.\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n An optional second feature array. Only allowed if\n metric != \"precomputed\".\n\nreduce_func : callable, default=None\n The function which is applied on each chunk of the distance matrix,\n reducing it to needed values. ``reduce_func(D_chunk, start)``\n is called repeatedly, where ``D_chunk`` is a contiguous vertical\n slice of the pairwise distance matrix, starting at row ``start``.\n It should return one of: None; an array, a list, or a sparse matrix\n of length ``D_chunk.shape[0]``; or a tuple of such objects. Returning\n None is useful for in-place operations, rather than reductions.\n\n If None, pairwise_distances_chunked returns a generator of vertical\n chunks of the distance matrix.\n\nmetric : str or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string, it must be one of the options\n allowed by scipy.spatial.distance.pdist for its metric parameter, or\n a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS.\n If metric is \"precomputed\", X is assumed to be a distance matrix.\n Alternatively, if metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays from X as input and return a value indicating\n the distance between them.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the pairwise matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nworking_memory : int, default=None\n The sought maximum memory for temporary distance matrix chunks.\n When None (default), the value of\n ``sklearn.get_config()['working_memory']`` is used.\n\n`**kwds` : optional keyword parameters\n Any further parameters are passed directly to the distance function.\n If using a scipy.spatial.distance metric, the parameters are still\n metric dependent. See the scipy docs for usage examples.\n\nYields\n------\nD_chunk : {ndarray, sparse matrix}\n A contiguous slice of distance matrix, optionally processed by\n ``reduce_func``.\n\nExamples\n--------\nWithout reduce_func:\n\n>>> import numpy as np\n>>> from sklearn.metrics import pairwise_distances_chunked\n>>> X = np.random.RandomState(0).rand(5, 3)\n>>> D_chunk = next(pairwise_distances_chunked(X))\n>>> D_chunk\narray([[0. ..., 0.29..., 0.41..., 0.19..., 0.57...],\n [0.29..., 0. ..., 0.57..., 0.41..., 0.76...],\n [0.41..., 0.57..., 0. ..., 0.44..., 0.90...],\n [0.19..., 0.41..., 0.44..., 0. ..., 0.51...],\n [0.57..., 0.76..., 0.90..., 0.51..., 0. ...]])\n\nRetrieve all neighbors and average distance within radius r:\n\n>>> r = .2\n>>> def reduce_func(D_chunk, start):\n... neigh = [np.flatnonzero(d < r) for d in D_chunk]\n... avg_dist = (D_chunk * (D_chunk < r)).mean(axis=1)\n... return neigh, avg_dist\n>>> gen = pairwise_distances_chunked(X, reduce_func=reduce_func)\n>>> neigh, avg_dist = next(gen)\n>>> neigh\n[array([0, 3]), array([1]), array([2]), array([0, 3]), array([4])]\n>>> avg_dist\narray([0.039..., 0. , 0. , 0.039..., 0. ])\n\nWhere r is defined per sample, we need to make use of ``start``:\n\n>>> r = [.2, .4, .4, .3, .1]\n>>> def reduce_func(D_chunk, start):\n... neigh = [np.flatnonzero(d < r[i])\n... for i, d in enumerate(D_chunk, start)]\n... return neigh\n>>> neigh = next(pairwise_distances_chunked(X, reduce_func=reduce_func))\n>>> neigh\n[array([0, 3]), array([0, 1]), array([2]), array([0, 3]), array([4])]\n\nForce row-by-row generation by reducing ``working_memory``:\n\n>>> gen = pairwise_distances_chunked(X, reduce_func=reduce_func,\n... working_memory=0)\n>>> next(gen)\n[array([0, 3])]\n>>> next(gen)\n[array([0, 1])]", + "code": "@_deprecate_positional_args\ndef pairwise_distances_chunked(X, Y=None, *, reduce_func=None,\n metric='euclidean', n_jobs=None,\n working_memory=None, **kwds):\n \"\"\"Generate a distance matrix chunk by chunk with optional reduction.\n\n In cases where not all of a pairwise distance matrix needs to be stored at\n once, this is used to calculate pairwise distances in\n ``working_memory``-sized chunks. If ``reduce_func`` is given, it is run\n on each chunk and its return values are concatenated into lists, arrays\n or sparse matrices.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_samples_X) or \\\n (n_samples_X, n_features)\n Array of pairwise distances between samples, or a feature array.\n The shape the array should be (n_samples_X, n_samples_X) if\n metric='precomputed' and (n_samples_X, n_features) otherwise.\n\n Y : ndarray of shape (n_samples_Y, n_features), default=None\n An optional second feature array. Only allowed if\n metric != \"precomputed\".\n\n reduce_func : callable, default=None\n The function which is applied on each chunk of the distance matrix,\n reducing it to needed values. ``reduce_func(D_chunk, start)``\n is called repeatedly, where ``D_chunk`` is a contiguous vertical\n slice of the pairwise distance matrix, starting at row ``start``.\n It should return one of: None; an array, a list, or a sparse matrix\n of length ``D_chunk.shape[0]``; or a tuple of such objects. Returning\n None is useful for in-place operations, rather than reductions.\n\n If None, pairwise_distances_chunked returns a generator of vertical\n chunks of the distance matrix.\n\n metric : str or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string, it must be one of the options\n allowed by scipy.spatial.distance.pdist for its metric parameter, or\n a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS.\n If metric is \"precomputed\", X is assumed to be a distance matrix.\n Alternatively, if metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays from X as input and return a value indicating\n the distance between them.\n\n n_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the pairwise matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n working_memory : int, default=None\n The sought maximum memory for temporary distance matrix chunks.\n When None (default), the value of\n ``sklearn.get_config()['working_memory']`` is used.\n\n `**kwds` : optional keyword parameters\n Any further parameters are passed directly to the distance function.\n If using a scipy.spatial.distance metric, the parameters are still\n metric dependent. See the scipy docs for usage examples.\n\n Yields\n ------\n D_chunk : {ndarray, sparse matrix}\n A contiguous slice of distance matrix, optionally processed by\n ``reduce_func``.\n\n Examples\n --------\n Without reduce_func:\n\n >>> import numpy as np\n >>> from sklearn.metrics import pairwise_distances_chunked\n >>> X = np.random.RandomState(0).rand(5, 3)\n >>> D_chunk = next(pairwise_distances_chunked(X))\n >>> D_chunk\n array([[0. ..., 0.29..., 0.41..., 0.19..., 0.57...],\n [0.29..., 0. ..., 0.57..., 0.41..., 0.76...],\n [0.41..., 0.57..., 0. ..., 0.44..., 0.90...],\n [0.19..., 0.41..., 0.44..., 0. ..., 0.51...],\n [0.57..., 0.76..., 0.90..., 0.51..., 0. ...]])\n\n Retrieve all neighbors and average distance within radius r:\n\n >>> r = .2\n >>> def reduce_func(D_chunk, start):\n ... neigh = [np.flatnonzero(d < r) for d in D_chunk]\n ... avg_dist = (D_chunk * (D_chunk < r)).mean(axis=1)\n ... return neigh, avg_dist\n >>> gen = pairwise_distances_chunked(X, reduce_func=reduce_func)\n >>> neigh, avg_dist = next(gen)\n >>> neigh\n [array([0, 3]), array([1]), array([2]), array([0, 3]), array([4])]\n >>> avg_dist\n array([0.039..., 0. , 0. , 0.039..., 0. ])\n\n Where r is defined per sample, we need to make use of ``start``:\n\n >>> r = [.2, .4, .4, .3, .1]\n >>> def reduce_func(D_chunk, start):\n ... neigh = [np.flatnonzero(d < r[i])\n ... for i, d in enumerate(D_chunk, start)]\n ... return neigh\n >>> neigh = next(pairwise_distances_chunked(X, reduce_func=reduce_func))\n >>> neigh\n [array([0, 3]), array([0, 1]), array([2]), array([0, 3]), array([4])]\n\n Force row-by-row generation by reducing ``working_memory``:\n\n >>> gen = pairwise_distances_chunked(X, reduce_func=reduce_func,\n ... working_memory=0)\n >>> next(gen)\n [array([0, 3])]\n >>> next(gen)\n [array([0, 1])]\n \"\"\"\n n_samples_X = _num_samples(X)\n if metric == 'precomputed':\n slices = (slice(0, n_samples_X),)\n else:\n if Y is None:\n Y = X\n # We get as many rows as possible within our working_memory budget to\n # store len(Y) distances in each row of output.\n #\n # Note:\n # - this will get at least 1 row, even if 1 row of distances will\n # exceed working_memory.\n # - this does not account for any temporary memory usage while\n # calculating distances (e.g. difference of vectors in manhattan\n # distance.\n chunk_n_rows = get_chunk_n_rows(row_bytes=8 * _num_samples(Y),\n max_n_rows=n_samples_X,\n working_memory=working_memory)\n slices = gen_batches(n_samples_X, chunk_n_rows)\n\n # precompute data-derived metric params\n params = _precompute_metric_params(X, Y, metric=metric, **kwds)\n kwds.update(**params)\n\n for sl in slices:\n if sl.start == 0 and sl.stop == n_samples_X:\n X_chunk = X # enable optimised paths for X is Y\n else:\n X_chunk = X[sl]\n D_chunk = pairwise_distances(X_chunk, Y, metric=metric,\n n_jobs=n_jobs, **kwds)\n if ((X is Y or Y is None)\n and PAIRWISE_DISTANCE_FUNCTIONS.get(metric, None)\n is euclidean_distances):\n # zeroing diagonal, taking care of aliases of \"euclidean\",\n # i.e. \"l2\"\n D_chunk.flat[sl.start::_num_samples(X) + 1] = 0\n if reduce_func is not None:\n chunk_size = D_chunk.shape[0]\n D_chunk = reduce_func(D_chunk, sl.start)\n _check_chunk_size(D_chunk, chunk_size)\n yield D_chunk" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_kernels", + "name": "pairwise_kernels", + "qname": "sklearn.metrics.pairwise.pairwise_kernels", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_kernels/X", + "name": "X", + "qname": "sklearn.metrics.pairwise.pairwise_kernels.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_X, n_samples_X) or (n_samples_X, n_features)", + "default_value": "", + "description": "Array of pairwise kernels between samples, or a feature array.\nThe shape of the array should be (n_samples_X, n_samples_X) if\nmetric == \"precomputed\" and (n_samples_X, n_features) otherwise." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_X, n_samples_X) or (n_samples_X, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_kernels/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise.pairwise_kernels.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_Y, n_features)", + "default_value": "None", + "description": "A second feature array only if X has shape (n_samples_X, n_features)." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_Y, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_kernels/metric", + "name": "metric", + "qname": "sklearn.metrics.pairwise.pairwise_kernels.metric", + "default_value": "'linear'", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "str or callable", + "default_value": "\"linear\"", + "description": "The metric to use when calculating kernel between instances in a\nfeature array. If metric is a string, it must be one of the metrics\nin pairwise.PAIRWISE_KERNEL_FUNCTIONS.\nIf metric is \"precomputed\", X is assumed to be a kernel matrix.\nAlternatively, if metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two rows from X as input and return the corresponding\nkernel value as a single number. This means that callables from\n:mod:`sklearn.metrics.pairwise` are not allowed, as they operate on\nmatrices, not single samples. Use the string identifying the kernel\ninstead." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_kernels/filter_params", + "name": "filter_params", + "qname": "sklearn.metrics.pairwise.pairwise_kernels.filter_params", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to filter invalid parameters or not." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_kernels/n_jobs", + "name": "n_jobs", + "qname": "sklearn.metrics.pairwise.pairwise_kernels.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to use for the computation. This works by breaking\ndown the pairwise matrix into n_jobs even slices and computing them in\nparallel.\n\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/pairwise_kernels/kwds", + "name": "kwds", + "qname": "sklearn.metrics.pairwise.pairwise_kernels.kwds", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": true, + "docstring": { + "type": "optional keyword parameters", + "default_value": "", + "description": "Any further parameters are passed directly to the kernel function." + }, + "type": { + "kind": "NamedType", + "name": "optional keyword parameters" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Compute the kernel between arrays X and optional array Y.\n\nThis method takes either a vector array or a kernel matrix, and returns\na kernel matrix. If the input is a vector array, the kernels are\ncomputed. If the input is a kernel matrix, it is returned instead.\n\nThis method provides a safe way to take a kernel matrix as input, while\npreserving compatibility with many other algorithms that take a vector\narray.\n\nIf Y is given (default is None), then the returned matrix is the pairwise\nkernel between the arrays from both X and Y.\n\nValid values for metric are:\n ['additive_chi2', 'chi2', 'linear', 'poly', 'polynomial', 'rbf',\n 'laplacian', 'sigmoid', 'cosine']\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute the kernel between arrays X and optional array Y.\n\nThis method takes either a vector array or a kernel matrix, and returns\na kernel matrix. If the input is a vector array, the kernels are\ncomputed. If the input is a kernel matrix, it is returned instead.\n\nThis method provides a safe way to take a kernel matrix as input, while\npreserving compatibility with many other algorithms that take a vector\narray.\n\nIf Y is given (default is None), then the returned matrix is the pairwise\nkernel between the arrays from both X and Y.\n\nValid values for metric are:\n ['additive_chi2', 'chi2', 'linear', 'poly', 'polynomial', 'rbf',\n 'laplacian', 'sigmoid', 'cosine']\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_samples_X) or (n_samples_X, n_features)\n Array of pairwise kernels between samples, or a feature array.\n The shape of the array should be (n_samples_X, n_samples_X) if\n metric == \"precomputed\" and (n_samples_X, n_features) otherwise.\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n A second feature array only if X has shape (n_samples_X, n_features).\n\nmetric : str or callable, default=\"linear\"\n The metric to use when calculating kernel between instances in a\n feature array. If metric is a string, it must be one of the metrics\n in pairwise.PAIRWISE_KERNEL_FUNCTIONS.\n If metric is \"precomputed\", X is assumed to be a kernel matrix.\n Alternatively, if metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two rows from X as input and return the corresponding\n kernel value as a single number. This means that callables from\n :mod:`sklearn.metrics.pairwise` are not allowed, as they operate on\n matrices, not single samples. Use the string identifying the kernel\n instead.\n\nfilter_params : bool, default=False\n Whether to filter invalid parameters or not.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the pairwise matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n**kwds : optional keyword parameters\n Any further parameters are passed directly to the kernel function.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_X) or (n_samples_X, n_samples_Y)\n A kernel matrix K such that K_{i, j} is the kernel between the\n ith and jth vectors of the given matrix X, if Y is None.\n If Y is not None, then K_{i, j} is the kernel between the ith array\n from X and the jth array from Y.\n\nNotes\n-----\nIf metric is 'precomputed', Y is ignored and X is returned.", + "code": "@_deprecate_positional_args\ndef pairwise_kernels(X, Y=None, metric=\"linear\", *, filter_params=False,\n n_jobs=None, **kwds):\n \"\"\"Compute the kernel between arrays X and optional array Y.\n\n This method takes either a vector array or a kernel matrix, and returns\n a kernel matrix. If the input is a vector array, the kernels are\n computed. If the input is a kernel matrix, it is returned instead.\n\n This method provides a safe way to take a kernel matrix as input, while\n preserving compatibility with many other algorithms that take a vector\n array.\n\n If Y is given (default is None), then the returned matrix is the pairwise\n kernel between the arrays from both X and Y.\n\n Valid values for metric are:\n ['additive_chi2', 'chi2', 'linear', 'poly', 'polynomial', 'rbf',\n 'laplacian', 'sigmoid', 'cosine']\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_samples_X) or \\\n (n_samples_X, n_features)\n Array of pairwise kernels between samples, or a feature array.\n The shape of the array should be (n_samples_X, n_samples_X) if\n metric == \"precomputed\" and (n_samples_X, n_features) otherwise.\n\n Y : ndarray of shape (n_samples_Y, n_features), default=None\n A second feature array only if X has shape (n_samples_X, n_features).\n\n metric : str or callable, default=\"linear\"\n The metric to use when calculating kernel between instances in a\n feature array. If metric is a string, it must be one of the metrics\n in pairwise.PAIRWISE_KERNEL_FUNCTIONS.\n If metric is \"precomputed\", X is assumed to be a kernel matrix.\n Alternatively, if metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two rows from X as input and return the corresponding\n kernel value as a single number. This means that callables from\n :mod:`sklearn.metrics.pairwise` are not allowed, as they operate on\n matrices, not single samples. Use the string identifying the kernel\n instead.\n\n filter_params : bool, default=False\n Whether to filter invalid parameters or not.\n\n n_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the pairwise matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n **kwds : optional keyword parameters\n Any further parameters are passed directly to the kernel function.\n\n Returns\n -------\n K : ndarray of shape (n_samples_X, n_samples_X) or \\\n (n_samples_X, n_samples_Y)\n A kernel matrix K such that K_{i, j} is the kernel between the\n ith and jth vectors of the given matrix X, if Y is None.\n If Y is not None, then K_{i, j} is the kernel between the ith array\n from X and the jth array from Y.\n\n Notes\n -----\n If metric is 'precomputed', Y is ignored and X is returned.\n\n \"\"\"\n # import GPKernel locally to prevent circular imports\n from ..gaussian_process.kernels import Kernel as GPKernel\n\n if metric == \"precomputed\":\n X, _ = check_pairwise_arrays(X, Y, precomputed=True)\n return X\n elif isinstance(metric, GPKernel):\n func = metric.__call__\n elif metric in PAIRWISE_KERNEL_FUNCTIONS:\n if filter_params:\n kwds = {k: kwds[k] for k in kwds\n if k in KERNEL_PARAMS[metric]}\n func = PAIRWISE_KERNEL_FUNCTIONS[metric]\n elif callable(metric):\n func = partial(_pairwise_callable, metric=metric, **kwds)\n else:\n raise ValueError(\"Unknown kernel %r\" % metric)\n\n return _parallel_pairwise(X, Y, func, n_jobs, **kwds)" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/polynomial_kernel", + "name": "polynomial_kernel", + "qname": "sklearn.metrics.pairwise.polynomial_kernel", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/polynomial_kernel/X", + "name": "X", + "qname": "sklearn.metrics.pairwise.polynomial_kernel.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_X, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_X, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/polynomial_kernel/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise.polynomial_kernel.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_Y, n_features)", + "default_value": "None", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_Y, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/polynomial_kernel/degree", + "name": "degree", + "qname": "sklearn.metrics.pairwise.polynomial_kernel.degree", + "default_value": "3", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "3", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/polynomial_kernel/gamma", + "name": "gamma", + "qname": "sklearn.metrics.pairwise.polynomial_kernel.gamma", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "None", + "description": "If None, defaults to 1.0 / n_features." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/polynomial_kernel/coef0", + "name": "coef0", + "qname": "sklearn.metrics.pairwise.polynomial_kernel.coef0", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "1", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Compute the polynomial kernel between X and Y::\n\n K(X, Y) = (gamma + coef0)^degree\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute the polynomial kernel between X and Y::\n\n K(X, Y) = (gamma + coef0)^degree\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n\ndegree : int, default=3\n\ngamma : float, default=None\n If None, defaults to 1.0 / n_features.\n\ncoef0 : float, default=1\n\nReturns\n-------\nGram matrix : ndarray of shape (n_samples_X, n_samples_Y)", + "code": "def polynomial_kernel(X, Y=None, degree=3, gamma=None, coef0=1):\n \"\"\"\n Compute the polynomial kernel between X and Y::\n\n K(X, Y) = (gamma + coef0)^degree\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_features)\n\n Y : ndarray of shape (n_samples_Y, n_features), default=None\n\n degree : int, default=3\n\n gamma : float, default=None\n If None, defaults to 1.0 / n_features.\n\n coef0 : float, default=1\n\n Returns\n -------\n Gram matrix : ndarray of shape (n_samples_X, n_samples_Y)\n \"\"\"\n X, Y = check_pairwise_arrays(X, Y)\n if gamma is None:\n gamma = 1.0 / X.shape[1]\n\n K = safe_sparse_dot(X, Y.T, dense_output=True)\n K *= gamma\n K += coef0\n K **= degree\n return K" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/rbf_kernel", + "name": "rbf_kernel", + "qname": "sklearn.metrics.pairwise.rbf_kernel", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/rbf_kernel/X", + "name": "X", + "qname": "sklearn.metrics.pairwise.rbf_kernel.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_X, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_X, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/rbf_kernel/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise.rbf_kernel.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_Y, n_features)", + "default_value": "None", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_Y, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/rbf_kernel/gamma", + "name": "gamma", + "qname": "sklearn.metrics.pairwise.rbf_kernel.gamma", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "None", + "description": "If None, defaults to 1.0 / n_features." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Compute the rbf (gaussian) kernel between X and Y::\n\n K(x, y) = exp(-gamma ||x-y||^2)\n\nfor each pair of rows x in X and y in Y.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute the rbf (gaussian) kernel between X and Y::\n\n K(x, y) = exp(-gamma ||x-y||^2)\n\nfor each pair of rows x in X and y in Y.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n\ngamma : float, default=None\n If None, defaults to 1.0 / n_features.\n\nReturns\n-------\nkernel_matrix : ndarray of shape (n_samples_X, n_samples_Y)", + "code": "def rbf_kernel(X, Y=None, gamma=None):\n \"\"\"\n Compute the rbf (gaussian) kernel between X and Y::\n\n K(x, y) = exp(-gamma ||x-y||^2)\n\n for each pair of rows x in X and y in Y.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_features)\n\n Y : ndarray of shape (n_samples_Y, n_features), default=None\n\n gamma : float, default=None\n If None, defaults to 1.0 / n_features.\n\n Returns\n -------\n kernel_matrix : ndarray of shape (n_samples_X, n_samples_Y)\n \"\"\"\n X, Y = check_pairwise_arrays(X, Y)\n if gamma is None:\n gamma = 1.0 / X.shape[1]\n\n K = euclidean_distances(X, Y, squared=True)\n K *= -gamma\n np.exp(K, K) # exponentiate K in-place\n return K" + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/sigmoid_kernel", + "name": "sigmoid_kernel", + "qname": "sklearn.metrics.pairwise.sigmoid_kernel", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.pairwise/sigmoid_kernel/X", + "name": "X", + "qname": "sklearn.metrics.pairwise.sigmoid_kernel.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_X, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_X, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/sigmoid_kernel/Y", + "name": "Y", + "qname": "sklearn.metrics.pairwise.sigmoid_kernel.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples_Y, n_features)", + "default_value": "None", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_Y, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/sigmoid_kernel/gamma", + "name": "gamma", + "qname": "sklearn.metrics.pairwise.sigmoid_kernel.gamma", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "None", + "description": "If None, defaults to 1.0 / n_features." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.metrics.pairwise/sigmoid_kernel/coef0", + "name": "coef0", + "qname": "sklearn.metrics.pairwise.sigmoid_kernel.coef0", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "1", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Compute the sigmoid kernel between X and Y::\n\n K(X, Y) = tanh(gamma + coef0)\n\nRead more in the :ref:`User Guide `.", + "docstring": "Compute the sigmoid kernel between X and Y::\n\n K(X, Y) = tanh(gamma + coef0)\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n\ngamma : float, default=None\n If None, defaults to 1.0 / n_features.\n\ncoef0 : float, default=1\n\nReturns\n-------\nGram matrix : ndarray of shape (n_samples_X, n_samples_Y)", + "code": "def sigmoid_kernel(X, Y=None, gamma=None, coef0=1):\n \"\"\"\n Compute the sigmoid kernel between X and Y::\n\n K(X, Y) = tanh(gamma + coef0)\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples_X, n_features)\n\n Y : ndarray of shape (n_samples_Y, n_features), default=None\n\n gamma : float, default=None\n If None, defaults to 1.0 / n_features.\n\n coef0 : float, default=1\n\n Returns\n -------\n Gram matrix : ndarray of shape (n_samples_X, n_samples_Y)\n \"\"\"\n X, Y = check_pairwise_arrays(X, Y)\n if gamma is None:\n gamma = 1.0 / X.shape[1]\n\n K = safe_sparse_dot(X, Y.T, dense_output=True)\n K *= gamma\n K += coef0\n np.tanh(K, K) # compute tanh in-place\n return K" + }, + { + "id": "scikit-learn/sklearn.metrics.setup/configuration", + "name": "configuration", + "qname": "sklearn.metrics.setup.configuration", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.metrics.setup/configuration/parent_package", + "name": "parent_package", + "qname": "sklearn.metrics.setup.configuration.parent_package", + "default_value": "''", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.metrics.setup/configuration/top_path", + "name": "top_path", + "qname": "sklearn.metrics.setup.configuration.top_path", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def configuration(parent_package=\"\", top_path=None):\n config = Configuration(\"metrics\", parent_package, top_path)\n\n libraries = []\n if os.name == 'posix':\n libraries.append('m')\n\n config.add_subpackage('_plot')\n config.add_subpackage('_plot.tests')\n config.add_subpackage('cluster')\n\n config.add_extension(\"_pairwise_fast\",\n sources=[\"_pairwise_fast.pyx\"],\n libraries=libraries)\n\n config.add_subpackage('tests')\n\n return config" + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/__init__", + "name": "__init__", + "qname": "sklearn.mixture._base.BaseMixture.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/__init__/self", + "name": "self", + "qname": "sklearn.mixture._base.BaseMixture.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/__init__/n_components", + "name": "n_components", + "qname": "sklearn.mixture._base.BaseMixture.__init__.n_components", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/__init__/tol", + "name": "tol", + "qname": "sklearn.mixture._base.BaseMixture.__init__.tol", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/__init__/reg_covar", + "name": "reg_covar", + "qname": "sklearn.mixture._base.BaseMixture.__init__.reg_covar", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.mixture._base.BaseMixture.__init__.max_iter", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/__init__/n_init", + "name": "n_init", + "qname": "sklearn.mixture._base.BaseMixture.__init__.n_init", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/__init__/init_params", + "name": "init_params", + "qname": "sklearn.mixture._base.BaseMixture.__init__.init_params", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/__init__/random_state", + "name": "random_state", + "qname": "sklearn.mixture._base.BaseMixture.__init__.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.mixture._base.BaseMixture.__init__.warm_start", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/__init__/verbose", + "name": "verbose", + "qname": "sklearn.mixture._base.BaseMixture.__init__.verbose", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/__init__/verbose_interval", + "name": "verbose_interval", + "qname": "sklearn.mixture._base.BaseMixture.__init__.verbose_interval", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base class for mixture models.\n\nThis abstract class specifies an interface for all mixture classes and\nprovides basic common methods for mixture models.", + "docstring": "", + "code": " def __init__(self, n_components, tol, reg_covar,\n max_iter, n_init, init_params, random_state, warm_start,\n verbose, verbose_interval):\n self.n_components = n_components\n self.tol = tol\n self.reg_covar = reg_covar\n self.max_iter = max_iter\n self.n_init = n_init\n self.init_params = init_params\n self.random_state = random_state\n self.warm_start = warm_start\n self.verbose = verbose\n self.verbose_interval = verbose_interval" + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_check_initial_parameters", + "name": "_check_initial_parameters", + "qname": "sklearn.mixture._base.BaseMixture._check_initial_parameters", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_check_initial_parameters/self", + "name": "self", + "qname": "sklearn.mixture._base.BaseMixture._check_initial_parameters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_check_initial_parameters/X", + "name": "X", + "qname": "sklearn.mixture._base.BaseMixture._check_initial_parameters.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check values of the basic parameters.", + "docstring": "Check values of the basic parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)", + "code": " def _check_initial_parameters(self, X):\n \"\"\"Check values of the basic parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n \"\"\"\n if self.n_components < 1:\n raise ValueError(\"Invalid value for 'n_components': %d \"\n \"Estimation requires at least one component\"\n % self.n_components)\n\n if self.tol < 0.:\n raise ValueError(\"Invalid value for 'tol': %.5f \"\n \"Tolerance used by the EM must be non-negative\"\n % self.tol)\n\n if self.n_init < 1:\n raise ValueError(\"Invalid value for 'n_init': %d \"\n \"Estimation requires at least one run\"\n % self.n_init)\n\n if self.max_iter < 1:\n raise ValueError(\"Invalid value for 'max_iter': %d \"\n \"Estimation requires at least one iteration\"\n % self.max_iter)\n\n if self.reg_covar < 0.:\n raise ValueError(\"Invalid value for 'reg_covar': %.5f \"\n \"regularization on covariance must be \"\n \"non-negative\"\n % self.reg_covar)\n\n # Check all the parameters values of the derived class\n self._check_parameters(X)" + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_check_parameters", + "name": "_check_parameters", + "qname": "sklearn.mixture._base.BaseMixture._check_parameters", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_check_parameters/self", + "name": "self", + "qname": "sklearn.mixture._base.BaseMixture._check_parameters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_check_parameters/X", + "name": "X", + "qname": "sklearn.mixture._base.BaseMixture._check_parameters.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check initial parameters of the derived class.", + "docstring": "Check initial parameters of the derived class.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)", + "code": " @abstractmethod\n def _check_parameters(self, X):\n \"\"\"Check initial parameters of the derived class.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n \"\"\"\n pass" + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_e_step", + "name": "_e_step", + "qname": "sklearn.mixture._base.BaseMixture._e_step", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_e_step/self", + "name": "self", + "qname": "sklearn.mixture._base.BaseMixture._e_step.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_e_step/X", + "name": "X", + "qname": "sklearn.mixture._base.BaseMixture._e_step.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "E step.", + "docstring": "E step.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nlog_prob_norm : float\n Mean of the logarithms of the probabilities of each sample in X\n\nlog_responsibility : array, shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X.", + "code": " def _e_step(self, X):\n \"\"\"E step.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n log_prob_norm : float\n Mean of the logarithms of the probabilities of each sample in X\n\n log_responsibility : array, shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X.\n \"\"\"\n log_prob_norm, log_resp = self._estimate_log_prob_resp(X)\n return np.mean(log_prob_norm), log_resp" + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_estimate_log_prob", + "name": "_estimate_log_prob", + "qname": "sklearn.mixture._base.BaseMixture._estimate_log_prob", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_estimate_log_prob/self", + "name": "self", + "qname": "sklearn.mixture._base.BaseMixture._estimate_log_prob.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_estimate_log_prob/X", + "name": "X", + "qname": "sklearn.mixture._base.BaseMixture._estimate_log_prob.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate the log-probabilities log P(X | Z).\n\nCompute the log-probabilities per each component for each sample.", + "docstring": "Estimate the log-probabilities log P(X | Z).\n\nCompute the log-probabilities per each component for each sample.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nlog_prob : array, shape (n_samples, n_component)", + "code": " @abstractmethod\n def _estimate_log_prob(self, X):\n \"\"\"Estimate the log-probabilities log P(X | Z).\n\n Compute the log-probabilities per each component for each sample.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n log_prob : array, shape (n_samples, n_component)\n \"\"\"\n pass" + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_estimate_log_prob_resp", + "name": "_estimate_log_prob_resp", + "qname": "sklearn.mixture._base.BaseMixture._estimate_log_prob_resp", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_estimate_log_prob_resp/self", + "name": "self", + "qname": "sklearn.mixture._base.BaseMixture._estimate_log_prob_resp.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_estimate_log_prob_resp/X", + "name": "X", + "qname": "sklearn.mixture._base.BaseMixture._estimate_log_prob_resp.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate log probabilities and responsibilities for each sample.\n\nCompute the log probabilities, weighted log probabilities per\ncomponent and responsibilities for each sample in X with respect to\nthe current state of the model.", + "docstring": "Estimate log probabilities and responsibilities for each sample.\n\nCompute the log probabilities, weighted log probabilities per\ncomponent and responsibilities for each sample in X with respect to\nthe current state of the model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nlog_prob_norm : array, shape (n_samples,)\n log p(X)\n\nlog_responsibilities : array, shape (n_samples, n_components)\n logarithm of the responsibilities", + "code": " def _estimate_log_prob_resp(self, X):\n \"\"\"Estimate log probabilities and responsibilities for each sample.\n\n Compute the log probabilities, weighted log probabilities per\n component and responsibilities for each sample in X with respect to\n the current state of the model.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n log_prob_norm : array, shape (n_samples,)\n log p(X)\n\n log_responsibilities : array, shape (n_samples, n_components)\n logarithm of the responsibilities\n \"\"\"\n weighted_log_prob = self._estimate_weighted_log_prob(X)\n log_prob_norm = logsumexp(weighted_log_prob, axis=1)\n with np.errstate(under='ignore'):\n # ignore underflow\n log_resp = weighted_log_prob - log_prob_norm[:, np.newaxis]\n return log_prob_norm, log_resp" + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_estimate_log_weights", + "name": "_estimate_log_weights", + "qname": "sklearn.mixture._base.BaseMixture._estimate_log_weights", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_estimate_log_weights/self", + "name": "self", + "qname": "sklearn.mixture._base.BaseMixture._estimate_log_weights.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate log-weights in EM algorithm, E[ log pi ] in VB algorithm.", + "docstring": "Estimate log-weights in EM algorithm, E[ log pi ] in VB algorithm.\n\nReturns\n-------\nlog_weight : array, shape (n_components, )", + "code": " @abstractmethod\n def _estimate_log_weights(self):\n \"\"\"Estimate log-weights in EM algorithm, E[ log pi ] in VB algorithm.\n\n Returns\n -------\n log_weight : array, shape (n_components, )\n \"\"\"\n pass" + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_estimate_weighted_log_prob", + "name": "_estimate_weighted_log_prob", + "qname": "sklearn.mixture._base.BaseMixture._estimate_weighted_log_prob", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_estimate_weighted_log_prob/self", + "name": "self", + "qname": "sklearn.mixture._base.BaseMixture._estimate_weighted_log_prob.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_estimate_weighted_log_prob/X", + "name": "X", + "qname": "sklearn.mixture._base.BaseMixture._estimate_weighted_log_prob.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate the weighted log-probabilities, log P(X | Z) + log weights.", + "docstring": "Estimate the weighted log-probabilities, log P(X | Z) + log weights.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nweighted_log_prob : array, shape (n_samples, n_component)", + "code": " def _estimate_weighted_log_prob(self, X):\n \"\"\"Estimate the weighted log-probabilities, log P(X | Z) + log weights.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n weighted_log_prob : array, shape (n_samples, n_component)\n \"\"\"\n return self._estimate_log_prob(X) + self._estimate_log_weights()" + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_get_parameters", + "name": "_get_parameters", + "qname": "sklearn.mixture._base.BaseMixture._get_parameters", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_get_parameters/self", + "name": "self", + "qname": "sklearn.mixture._base.BaseMixture._get_parameters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @abstractmethod\n def _get_parameters(self):\n pass" + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_initialize", + "name": "_initialize", + "qname": "sklearn.mixture._base.BaseMixture._initialize", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_initialize/self", + "name": "self", + "qname": "sklearn.mixture._base.BaseMixture._initialize.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_initialize/X", + "name": "X", + "qname": "sklearn.mixture._base.BaseMixture._initialize.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_initialize/resp", + "name": "resp", + "qname": "sklearn.mixture._base.BaseMixture._initialize.resp", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_components)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_components)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Initialize the model parameters of the derived class.", + "docstring": "Initialize the model parameters of the derived class.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nresp : array-like of shape (n_samples, n_components)", + "code": " @abstractmethod\n def _initialize(self, X, resp):\n \"\"\"Initialize the model parameters of the derived class.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n resp : array-like of shape (n_samples, n_components)\n \"\"\"\n pass" + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_initialize_parameters", + "name": "_initialize_parameters", + "qname": "sklearn.mixture._base.BaseMixture._initialize_parameters", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_initialize_parameters/self", + "name": "self", + "qname": "sklearn.mixture._base.BaseMixture._initialize_parameters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_initialize_parameters/X", + "name": "X", + "qname": "sklearn.mixture._base.BaseMixture._initialize_parameters.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_initialize_parameters/random_state", + "name": "random_state", + "qname": "sklearn.mixture._base.BaseMixture._initialize_parameters.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "RandomState", + "default_value": "", + "description": "A random number generator instance that controls the random seed\nused for the method chosen to initialize the parameters." + }, + "type": { + "kind": "NamedType", + "name": "RandomState" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Initialize the model parameters.", + "docstring": "Initialize the model parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nrandom_state : RandomState\n A random number generator instance that controls the random seed\n used for the method chosen to initialize the parameters.", + "code": " def _initialize_parameters(self, X, random_state):\n \"\"\"Initialize the model parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n random_state : RandomState\n A random number generator instance that controls the random seed\n used for the method chosen to initialize the parameters.\n \"\"\"\n n_samples, _ = X.shape\n\n if self.init_params == 'kmeans':\n resp = np.zeros((n_samples, self.n_components))\n label = cluster.KMeans(n_clusters=self.n_components, n_init=1,\n random_state=random_state).fit(X).labels_\n resp[np.arange(n_samples), label] = 1\n elif self.init_params == 'random':\n resp = random_state.rand(n_samples, self.n_components)\n resp /= resp.sum(axis=1)[:, np.newaxis]\n else:\n raise ValueError(\"Unimplemented initialization method '%s'\"\n % self.init_params)\n\n self._initialize(X, resp)" + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_m_step", + "name": "_m_step", + "qname": "sklearn.mixture._base.BaseMixture._m_step", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_m_step/self", + "name": "self", + "qname": "sklearn.mixture._base.BaseMixture._m_step.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_m_step/X", + "name": "X", + "qname": "sklearn.mixture._base.BaseMixture._m_step.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_m_step/log_resp", + "name": "log_resp", + "qname": "sklearn.mixture._base.BaseMixture._m_step.log_resp", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_components)", + "default_value": "", + "description": "Logarithm of the posterior probabilities (or responsibilities) of\nthe point of each sample in X." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_components)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "M step.", + "docstring": "M step.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nlog_resp : array-like of shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X.", + "code": " @abstractmethod\n def _m_step(self, X, log_resp):\n \"\"\"M step.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n log_resp : array-like of shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X.\n \"\"\"\n pass" + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_print_verbose_msg_init_beg", + "name": "_print_verbose_msg_init_beg", + "qname": "sklearn.mixture._base.BaseMixture._print_verbose_msg_init_beg", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_print_verbose_msg_init_beg/self", + "name": "self", + "qname": "sklearn.mixture._base.BaseMixture._print_verbose_msg_init_beg.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_print_verbose_msg_init_beg/n_init", + "name": "n_init", + "qname": "sklearn.mixture._base.BaseMixture._print_verbose_msg_init_beg.n_init", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Print verbose message on initialization.", + "docstring": "Print verbose message on initialization.", + "code": " def _print_verbose_msg_init_beg(self, n_init):\n \"\"\"Print verbose message on initialization.\"\"\"\n if self.verbose == 1:\n print(\"Initialization %d\" % n_init)\n elif self.verbose >= 2:\n print(\"Initialization %d\" % n_init)\n self._init_prev_time = time()\n self._iter_prev_time = self._init_prev_time" + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_print_verbose_msg_init_end", + "name": "_print_verbose_msg_init_end", + "qname": "sklearn.mixture._base.BaseMixture._print_verbose_msg_init_end", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_print_verbose_msg_init_end/self", + "name": "self", + "qname": "sklearn.mixture._base.BaseMixture._print_verbose_msg_init_end.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_print_verbose_msg_init_end/ll", + "name": "ll", + "qname": "sklearn.mixture._base.BaseMixture._print_verbose_msg_init_end.ll", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Print verbose message on the end of iteration.", + "docstring": "Print verbose message on the end of iteration.", + "code": " def _print_verbose_msg_init_end(self, ll):\n \"\"\"Print verbose message on the end of iteration.\"\"\"\n if self.verbose == 1:\n print(\"Initialization converged: %s\" % self.converged_)\n elif self.verbose >= 2:\n print(\"Initialization converged: %s\\t time lapse %.5fs\\t ll %.5f\" %\n (self.converged_, time() - self._init_prev_time, ll))" + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_print_verbose_msg_iter_end", + "name": "_print_verbose_msg_iter_end", + "qname": "sklearn.mixture._base.BaseMixture._print_verbose_msg_iter_end", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_print_verbose_msg_iter_end/self", + "name": "self", + "qname": "sklearn.mixture._base.BaseMixture._print_verbose_msg_iter_end.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_print_verbose_msg_iter_end/n_iter", + "name": "n_iter", + "qname": "sklearn.mixture._base.BaseMixture._print_verbose_msg_iter_end.n_iter", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_print_verbose_msg_iter_end/diff_ll", + "name": "diff_ll", + "qname": "sklearn.mixture._base.BaseMixture._print_verbose_msg_iter_end.diff_ll", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Print verbose message on initialization.", + "docstring": "Print verbose message on initialization.", + "code": " def _print_verbose_msg_iter_end(self, n_iter, diff_ll):\n \"\"\"Print verbose message on initialization.\"\"\"\n if n_iter % self.verbose_interval == 0:\n if self.verbose == 1:\n print(\" Iteration %d\" % n_iter)\n elif self.verbose >= 2:\n cur_time = time()\n print(\" Iteration %d\\t time lapse %.5fs\\t ll change %.5f\" % (\n n_iter, cur_time - self._iter_prev_time, diff_ll))\n self._iter_prev_time = cur_time" + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_set_parameters", + "name": "_set_parameters", + "qname": "sklearn.mixture._base.BaseMixture._set_parameters", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_set_parameters/self", + "name": "self", + "qname": "sklearn.mixture._base.BaseMixture._set_parameters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/_set_parameters/params", + "name": "params", + "qname": "sklearn.mixture._base.BaseMixture._set_parameters.params", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @abstractmethod\n def _set_parameters(self, params):\n pass" + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/fit", + "name": "fit", + "qname": "sklearn.mixture._base.BaseMixture.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/fit/self", + "name": "self", + "qname": "sklearn.mixture._base.BaseMixture.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/fit/X", + "name": "X", + "qname": "sklearn.mixture._base.BaseMixture.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "List of n_features-dimensional data points. Each row\ncorresponds to a single data point." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/fit/y", + "name": "y", + "qname": "sklearn.mixture._base.BaseMixture.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate model parameters with the EM algorithm.\n\nThe method fits the model ``n_init`` times and sets the parameters with\nwhich the model has the largest likelihood or lower bound. Within each\ntrial, the method iterates between E-step and M-step for ``max_iter``\ntimes until the change of likelihood or lower bound is less than\n``tol``, otherwise, a ``ConvergenceWarning`` is raised.\nIf ``warm_start`` is ``True``, then ``n_init`` is ignored and a single\ninitialization is performed upon the first call. Upon consecutive\ncalls, training starts where it left off.", + "docstring": "Estimate model parameters with the EM algorithm.\n\nThe method fits the model ``n_init`` times and sets the parameters with\nwhich the model has the largest likelihood or lower bound. Within each\ntrial, the method iterates between E-step and M-step for ``max_iter``\ntimes until the change of likelihood or lower bound is less than\n``tol``, otherwise, a ``ConvergenceWarning`` is raised.\nIf ``warm_start`` is ``True``, then ``n_init`` is ignored and a single\ninitialization is performed upon the first call. Upon consecutive\ncalls, training starts where it left off.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\nReturns\n-------\nself", + "code": " def fit(self, X, y=None):\n \"\"\"Estimate model parameters with the EM algorithm.\n\n The method fits the model ``n_init`` times and sets the parameters with\n which the model has the largest likelihood or lower bound. Within each\n trial, the method iterates between E-step and M-step for ``max_iter``\n times until the change of likelihood or lower bound is less than\n ``tol``, otherwise, a ``ConvergenceWarning`` is raised.\n If ``warm_start`` is ``True``, then ``n_init`` is ignored and a single\n initialization is performed upon the first call. Upon consecutive\n calls, training starts where it left off.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\n Returns\n -------\n self\n \"\"\"\n self.fit_predict(X, y)\n return self" + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/fit_predict", + "name": "fit_predict", + "qname": "sklearn.mixture._base.BaseMixture.fit_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/fit_predict/self", + "name": "self", + "qname": "sklearn.mixture._base.BaseMixture.fit_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/fit_predict/X", + "name": "X", + "qname": "sklearn.mixture._base.BaseMixture.fit_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "List of n_features-dimensional data points. Each row\ncorresponds to a single data point." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/fit_predict/y", + "name": "y", + "qname": "sklearn.mixture._base.BaseMixture.fit_predict.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate model parameters using X and predict the labels for X.\n\nThe method fits the model n_init times and sets the parameters with\nwhich the model has the largest likelihood or lower bound. Within each\ntrial, the method iterates between E-step and M-step for `max_iter`\ntimes until the change of likelihood or lower bound is less than\n`tol`, otherwise, a :class:`~sklearn.exceptions.ConvergenceWarning` is\nraised. After fitting, it predicts the most probable label for the\ninput data points.\n\n.. versionadded:: 0.20", + "docstring": "Estimate model parameters using X and predict the labels for X.\n\nThe method fits the model n_init times and sets the parameters with\nwhich the model has the largest likelihood or lower bound. Within each\ntrial, the method iterates between E-step and M-step for `max_iter`\ntimes until the change of likelihood or lower bound is less than\n`tol`, otherwise, a :class:`~sklearn.exceptions.ConvergenceWarning` is\nraised. After fitting, it predicts the most probable label for the\ninput data points.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\nReturns\n-------\nlabels : array, shape (n_samples,)\n Component labels.", + "code": " def fit_predict(self, X, y=None):\n \"\"\"Estimate model parameters using X and predict the labels for X.\n\n The method fits the model n_init times and sets the parameters with\n which the model has the largest likelihood or lower bound. Within each\n trial, the method iterates between E-step and M-step for `max_iter`\n times until the change of likelihood or lower bound is less than\n `tol`, otherwise, a :class:`~sklearn.exceptions.ConvergenceWarning` is\n raised. After fitting, it predicts the most probable label for the\n input data points.\n\n .. versionadded:: 0.20\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\n Returns\n -------\n labels : array, shape (n_samples,)\n Component labels.\n \"\"\"\n X = _check_X(X, self.n_components, ensure_min_samples=2)\n self._check_n_features(X, reset=True)\n self._check_initial_parameters(X)\n\n # if we enable warm_start, we will have a unique initialisation\n do_init = not(self.warm_start and hasattr(self, 'converged_'))\n n_init = self.n_init if do_init else 1\n\n max_lower_bound = -np.infty\n self.converged_ = False\n\n random_state = check_random_state(self.random_state)\n\n n_samples, _ = X.shape\n for init in range(n_init):\n self._print_verbose_msg_init_beg(init)\n\n if do_init:\n self._initialize_parameters(X, random_state)\n\n lower_bound = (-np.infty if do_init else self.lower_bound_)\n\n for n_iter in range(1, self.max_iter + 1):\n prev_lower_bound = lower_bound\n\n log_prob_norm, log_resp = self._e_step(X)\n self._m_step(X, log_resp)\n lower_bound = self._compute_lower_bound(\n log_resp, log_prob_norm)\n\n change = lower_bound - prev_lower_bound\n self._print_verbose_msg_iter_end(n_iter, change)\n\n if abs(change) < self.tol:\n self.converged_ = True\n break\n\n self._print_verbose_msg_init_end(lower_bound)\n\n if lower_bound > max_lower_bound:\n max_lower_bound = lower_bound\n best_params = self._get_parameters()\n best_n_iter = n_iter\n\n if not self.converged_:\n warnings.warn('Initialization %d did not converge. '\n 'Try different init parameters, '\n 'or increase max_iter, tol '\n 'or check for degenerate data.'\n % (init + 1), ConvergenceWarning)\n\n self._set_parameters(best_params)\n self.n_iter_ = best_n_iter\n self.lower_bound_ = max_lower_bound\n\n # Always do a final e-step to guarantee that the labels returned by\n # fit_predict(X) are always consistent with fit(X).predict(X)\n # for any value of max_iter and tol (and any random_state).\n _, log_resp = self._e_step(X)\n\n return log_resp.argmax(axis=1)" + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/predict", + "name": "predict", + "qname": "sklearn.mixture._base.BaseMixture.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/predict/self", + "name": "self", + "qname": "sklearn.mixture._base.BaseMixture.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/predict/X", + "name": "X", + "qname": "sklearn.mixture._base.BaseMixture.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "List of n_features-dimensional data points. Each row\ncorresponds to a single data point." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict the labels for the data samples in X using trained model.", + "docstring": "Predict the labels for the data samples in X using trained model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\nReturns\n-------\nlabels : array, shape (n_samples,)\n Component labels.", + "code": " def predict(self, X):\n \"\"\"Predict the labels for the data samples in X using trained model.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\n Returns\n -------\n labels : array, shape (n_samples,)\n Component labels.\n \"\"\"\n check_is_fitted(self)\n X = _check_X(X, None, self.means_.shape[1])\n return self._estimate_weighted_log_prob(X).argmax(axis=1)" + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/predict_proba", + "name": "predict_proba", + "qname": "sklearn.mixture._base.BaseMixture.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/predict_proba/self", + "name": "self", + "qname": "sklearn.mixture._base.BaseMixture.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/predict_proba/X", + "name": "X", + "qname": "sklearn.mixture._base.BaseMixture.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "List of n_features-dimensional data points. Each row\ncorresponds to a single data point." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict posterior probability of each component given the data.", + "docstring": "Predict posterior probability of each component given the data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\nReturns\n-------\nresp : array, shape (n_samples, n_components)\n Returns the probability each Gaussian (state) in\n the model given each sample.", + "code": " def predict_proba(self, X):\n \"\"\"Predict posterior probability of each component given the data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\n Returns\n -------\n resp : array, shape (n_samples, n_components)\n Returns the probability each Gaussian (state) in\n the model given each sample.\n \"\"\"\n check_is_fitted(self)\n X = _check_X(X, None, self.means_.shape[1])\n _, log_resp = self._estimate_log_prob_resp(X)\n return np.exp(log_resp)" + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/sample", + "name": "sample", + "qname": "sklearn.mixture._base.BaseMixture.sample", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/sample/self", + "name": "self", + "qname": "sklearn.mixture._base.BaseMixture.sample.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/sample/n_samples", + "name": "n_samples", + "qname": "sklearn.mixture._base.BaseMixture.sample.n_samples", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "Number of samples to generate." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate random samples from the fitted Gaussian distribution.", + "docstring": "Generate random samples from the fitted Gaussian distribution.\n\nParameters\n----------\nn_samples : int, default=1\n Number of samples to generate.\n\nReturns\n-------\nX : array, shape (n_samples, n_features)\n Randomly generated sample\n\ny : array, shape (nsamples,)\n Component labels", + "code": " def sample(self, n_samples=1):\n \"\"\"Generate random samples from the fitted Gaussian distribution.\n\n Parameters\n ----------\n n_samples : int, default=1\n Number of samples to generate.\n\n Returns\n -------\n X : array, shape (n_samples, n_features)\n Randomly generated sample\n\n y : array, shape (nsamples,)\n Component labels\n\n \"\"\"\n check_is_fitted(self)\n\n if n_samples < 1:\n raise ValueError(\n \"Invalid value for 'n_samples': %d . The sampling requires at \"\n \"least one sample.\" % (self.n_components))\n\n _, n_features = self.means_.shape\n rng = check_random_state(self.random_state)\n n_samples_comp = rng.multinomial(n_samples, self.weights_)\n\n if self.covariance_type == 'full':\n X = np.vstack([\n rng.multivariate_normal(mean, covariance, int(sample))\n for (mean, covariance, sample) in zip(\n self.means_, self.covariances_, n_samples_comp)])\n elif self.covariance_type == \"tied\":\n X = np.vstack([\n rng.multivariate_normal(mean, self.covariances_, int(sample))\n for (mean, sample) in zip(\n self.means_, n_samples_comp)])\n else:\n X = np.vstack([\n mean + rng.randn(sample, n_features) * np.sqrt(covariance)\n for (mean, covariance, sample) in zip(\n self.means_, self.covariances_, n_samples_comp)])\n\n y = np.concatenate([np.full(sample, j, dtype=int)\n for j, sample in enumerate(n_samples_comp)])\n\n return (X, y)" + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/score", + "name": "score", + "qname": "sklearn.mixture._base.BaseMixture.score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/score/self", + "name": "self", + "qname": "sklearn.mixture._base.BaseMixture.score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/score/X", + "name": "X", + "qname": "sklearn.mixture._base.BaseMixture.score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_dimensions)", + "default_value": "", + "description": "List of n_features-dimensional data points. Each row\ncorresponds to a single data point." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_dimensions)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/score/y", + "name": "y", + "qname": "sklearn.mixture._base.BaseMixture.score.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the per-sample average log-likelihood of the given data X.", + "docstring": "Compute the per-sample average log-likelihood of the given data X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_dimensions)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\nReturns\n-------\nlog_likelihood : float\n Log likelihood of the Gaussian mixture given X.", + "code": " def score(self, X, y=None):\n \"\"\"Compute the per-sample average log-likelihood of the given data X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_dimensions)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\n Returns\n -------\n log_likelihood : float\n Log likelihood of the Gaussian mixture given X.\n \"\"\"\n return self.score_samples(X).mean()" + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/score_samples", + "name": "score_samples", + "qname": "sklearn.mixture._base.BaseMixture.score_samples", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/score_samples/self", + "name": "self", + "qname": "sklearn.mixture._base.BaseMixture.score_samples.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/BaseMixture/score_samples/X", + "name": "X", + "qname": "sklearn.mixture._base.BaseMixture.score_samples.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "List of n_features-dimensional data points. Each row\ncorresponds to a single data point." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the weighted log probabilities for each sample.", + "docstring": "Compute the weighted log probabilities for each sample.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\nReturns\n-------\nlog_prob : array, shape (n_samples,)\n Log probabilities of each data point in X.", + "code": " def score_samples(self, X):\n \"\"\"Compute the weighted log probabilities for each sample.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\n Returns\n -------\n log_prob : array, shape (n_samples,)\n Log probabilities of each data point in X.\n \"\"\"\n check_is_fitted(self)\n X = _check_X(X, None, self.means_.shape[1])\n\n return logsumexp(self._estimate_weighted_log_prob(X), axis=1)" + }, + { + "id": "scikit-learn/sklearn.mixture._base/_check_X", + "name": "_check_X", + "qname": "sklearn.mixture._base._check_X", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/_check_X/X", + "name": "X", + "qname": "sklearn.mixture._base._check_X.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._base/_check_X/n_components", + "name": "n_components", + "qname": "sklearn.mixture._base._check_X.n_components", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.mixture._base/_check_X/n_features", + "name": "n_features", + "qname": "sklearn.mixture._base._check_X.n_features", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._base/_check_X/ensure_min_samples", + "name": "ensure_min_samples", + "qname": "sklearn.mixture._base._check_X.ensure_min_samples", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check the input data X.", + "docstring": "Check the input data X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nn_components : int\n\nReturns\n-------\nX : array, shape (n_samples, n_features)", + "code": "def _check_X(X, n_components=None, n_features=None, ensure_min_samples=1):\n \"\"\"Check the input data X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n n_components : int\n\n Returns\n -------\n X : array, shape (n_samples, n_features)\n \"\"\"\n X = check_array(X, dtype=[np.float64, np.float32],\n ensure_min_samples=ensure_min_samples)\n if n_components is not None and X.shape[0] < n_components:\n raise ValueError('Expected n_samples >= n_components '\n 'but got n_components = %d, n_samples = %d'\n % (n_components, X.shape[0]))\n if n_features is not None and X.shape[1] != n_features:\n raise ValueError(\"Expected the input data X have %d features, \"\n \"but got %d features\"\n % (n_features, X.shape[1]))\n return X" + }, + { + "id": "scikit-learn/sklearn.mixture._base/_check_shape", + "name": "_check_shape", + "qname": "sklearn.mixture._base._check_shape", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._base/_check_shape/param", + "name": "param", + "qname": "sklearn.mixture._base._check_shape.param", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array" + } + }, + { + "id": "scikit-learn/sklearn.mixture._base/_check_shape/param_shape", + "name": "param_shape", + "qname": "sklearn.mixture._base._check_shape.param_shape", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "tuple", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "tuple" + } + }, + { + "id": "scikit-learn/sklearn.mixture._base/_check_shape/name", + "name": "name", + "qname": "sklearn.mixture._base._check_shape.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "string", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "string" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Validate the shape of the input parameter 'param'.", + "docstring": "Validate the shape of the input parameter 'param'.\n\nParameters\n----------\nparam : array\n\nparam_shape : tuple\n\nname : string", + "code": "def _check_shape(param, param_shape, name):\n \"\"\"Validate the shape of the input parameter 'param'.\n\n Parameters\n ----------\n param : array\n\n param_shape : tuple\n\n name : string\n \"\"\"\n param = np.array(param)\n if param.shape != param_shape:\n raise ValueError(\"The parameter '%s' should have the shape of %s, \"\n \"but got %s\" % (name, param_shape, param.shape))" + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/__init__", + "name": "__init__", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/__init__/self", + "name": "self", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/__init__/n_components", + "name": "n_components", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture.__init__.n_components", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "The number of mixture components. Depending on the data and the value\nof the `weight_concentration_prior` the model can decide to not use\nall the components by setting some component `weights_` to values very\nclose to zero. The number of effective components is therefore smaller\nthan n_components." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/__init__/covariance_type", + "name": "covariance_type", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture.__init__.covariance_type", + "default_value": "'full'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'full', 'tied', 'diag', 'spherical'}", + "default_value": "'full'", + "description": "String describing the type of covariance parameters to use.\nMust be one of::\n\n 'full' (each component has its own general covariance matrix),\n 'tied' (all components share the same general covariance matrix),\n 'diag' (each component has its own diagonal covariance matrix),\n 'spherical' (each component has its own single variance)." + }, + "type": { + "kind": "EnumType", + "values": ["full", "spherical", "diag", "tied"] + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/__init__/tol", + "name": "tol", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "The convergence threshold. EM iterations will stop when the\nlower bound average gain on the likelihood (of the training data with\nrespect to the model) is below this threshold." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/__init__/reg_covar", + "name": "reg_covar", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture.__init__.reg_covar", + "default_value": "1e-06", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-6", + "description": "Non-negative regularization added to the diagonal of covariance.\nAllows to assure that the covariance matrices are all positive." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture.__init__.max_iter", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The number of EM iterations to perform." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/__init__/n_init", + "name": "n_init", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture.__init__.n_init", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "The number of initializations to perform. The result with the highest\nlower bound value on the likelihood is kept." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/__init__/init_params", + "name": "init_params", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture.__init__.init_params", + "default_value": "'kmeans'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'kmeans', 'random'}", + "default_value": "'kmeans'", + "description": "The method used to initialize the weights, the means and the\ncovariances.\nMust be one of::\n\n 'kmeans' : responsibilities are initialized using kmeans.\n 'random' : responsibilities are initialized randomly." + }, + "type": { + "kind": "EnumType", + "values": ["kmeans", "random"] + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/__init__/weight_concentration_prior_type", + "name": "weight_concentration_prior_type", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture.__init__.weight_concentration_prior_type", + "default_value": "'dirichlet_process'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "'dirichlet_process'", + "description": "String describing the type of the weight concentration prior.\nMust be one of::\n\n 'dirichlet_process' (using the Stick-breaking representation),\n 'dirichlet_distribution' (can favor more uniform weights)." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/__init__/weight_concentration_prior", + "name": "weight_concentration_prior", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture.__init__.weight_concentration_prior", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float | None", + "default_value": "None.", + "description": "The dirichlet concentration of each component on the weight\ndistribution (Dirichlet). This is commonly called gamma in the\nliterature. The higher concentration puts more mass in\nthe center and will lead to more components being active, while a lower\nconcentration parameter will lead to more mass at the edge of the\nmixture weights simplex. The value of the parameter must be greater\nthan 0. If it is None, it's set to ``1. / n_components``." + }, + "type": { + "kind": "NamedType", + "name": "float | None" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/__init__/mean_precision_prior", + "name": "mean_precision_prior", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture.__init__.mean_precision_prior", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float | None", + "default_value": "None.", + "description": "The precision prior on the mean distribution (Gaussian).\nControls the extent of where means can be placed. Larger\nvalues concentrate the cluster means around `mean_prior`.\nThe value of the parameter must be greater than 0.\nIf it is None, it is set to 1." + }, + "type": { + "kind": "NamedType", + "name": "float | None" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/__init__/mean_prior", + "name": "mean_prior", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture.__init__.mean_prior", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_features,)", + "default_value": "None.", + "description": "The prior on the mean distribution (Gaussian).\nIf it is None, it is set to the mean of X." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_features,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/__init__/degrees_of_freedom_prior", + "name": "degrees_of_freedom_prior", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture.__init__.degrees_of_freedom_prior", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float | None", + "default_value": "None.", + "description": "The prior of the number of degrees of freedom on the covariance\ndistributions (Wishart). If it is None, it's set to `n_features`." + }, + "type": { + "kind": "NamedType", + "name": "float | None" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/__init__/covariance_prior", + "name": "covariance_prior", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture.__init__.covariance_prior", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float or array-like", + "default_value": "None.", + "description": "The prior on the covariance distribution (Wishart).\nIf it is None, the emiprical covariance prior is initialized using the\ncovariance of X. The shape depends on `covariance_type`::\n\n (n_features, n_features) if 'full',\n (n_features, n_features) if 'tied',\n (n_features) if 'diag',\n float if 'spherical'" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "array-like" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/__init__/random_state", + "name": "random_state", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the random seed given to the method chosen to initialize the\nparameters (see `init_params`).\nIn addition, it controls the generation of random samples from the\nfitted distribution (see the method `sample`).\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If 'warm_start' is True, the solution of the last fitting is used as\ninitialization for the next call of fit(). This can speed up\nconvergence when fit is called several times on similar problems.\nSee :term:`the Glossary `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/__init__/verbose", + "name": "verbose", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Enable verbose output. If 1 then it prints the current\ninitialization and each iteration step. If greater than 1 then\nit prints also the log probability and the time needed\nfor each step." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/__init__/verbose_interval", + "name": "verbose_interval", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture.__init__.verbose_interval", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Number of iteration done before the next print." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Variational Bayesian estimation of a Gaussian mixture.\n\nThis class allows to infer an approximate posterior distribution over the\nparameters of a Gaussian mixture distribution. The effective number of\ncomponents can be inferred from the data.\n\nThis class implements two types of prior for the weights distribution: a\nfinite mixture model with Dirichlet distribution and an infinite mixture\nmodel with the Dirichlet Process. In practice Dirichlet Process inference\nalgorithm is approximated and uses a truncated distribution with a fixed\nmaximum number of components (called the Stick-breaking representation).\nThe number of components actually used almost always depends on the data.\n\n.. versionadded:: 0.18\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, n_components=1, covariance_type='full', tol=1e-3,\n reg_covar=1e-6, max_iter=100, n_init=1, init_params='kmeans',\n weight_concentration_prior_type='dirichlet_process',\n weight_concentration_prior=None,\n mean_precision_prior=None, mean_prior=None,\n degrees_of_freedom_prior=None, covariance_prior=None,\n random_state=None, warm_start=False, verbose=0,\n verbose_interval=10):\n super().__init__(\n n_components=n_components, tol=tol, reg_covar=reg_covar,\n max_iter=max_iter, n_init=n_init, init_params=init_params,\n random_state=random_state, warm_start=warm_start,\n verbose=verbose, verbose_interval=verbose_interval)\n\n self.covariance_type = covariance_type\n self.weight_concentration_prior_type = weight_concentration_prior_type\n self.weight_concentration_prior = weight_concentration_prior\n self.mean_precision_prior = mean_precision_prior\n self.mean_prior = mean_prior\n self.degrees_of_freedom_prior = degrees_of_freedom_prior\n self.covariance_prior = covariance_prior" + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_check_means_parameters", + "name": "_check_means_parameters", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._check_means_parameters", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_check_means_parameters/self", + "name": "self", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._check_means_parameters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_check_means_parameters/X", + "name": "X", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._check_means_parameters.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check the parameters of the Gaussian distribution.", + "docstring": "Check the parameters of the Gaussian distribution.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)", + "code": " def _check_means_parameters(self, X):\n \"\"\"Check the parameters of the Gaussian distribution.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n \"\"\"\n _, n_features = X.shape\n\n if self.mean_precision_prior is None:\n self.mean_precision_prior_ = 1.\n elif self.mean_precision_prior > 0.:\n self.mean_precision_prior_ = self.mean_precision_prior\n else:\n raise ValueError(\"The parameter 'mean_precision_prior' should be \"\n \"greater than 0., but got %.3f.\"\n % self.mean_precision_prior)\n\n if self.mean_prior is None:\n self.mean_prior_ = X.mean(axis=0)\n else:\n self.mean_prior_ = check_array(self.mean_prior,\n dtype=[np.float64, np.float32],\n ensure_2d=False)\n _check_shape(self.mean_prior_, (n_features, ), 'means')" + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_check_parameters", + "name": "_check_parameters", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._check_parameters", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_check_parameters/self", + "name": "self", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._check_parameters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_check_parameters/X", + "name": "X", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._check_parameters.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check that the parameters are well defined.", + "docstring": "Check that the parameters are well defined.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)", + "code": " def _check_parameters(self, X):\n \"\"\"Check that the parameters are well defined.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n \"\"\"\n if self.covariance_type not in ['spherical', 'tied', 'diag', 'full']:\n raise ValueError(\"Invalid value for 'covariance_type': %s \"\n \"'covariance_type' should be in \"\n \"['spherical', 'tied', 'diag', 'full']\"\n % self.covariance_type)\n\n if (self.weight_concentration_prior_type not in\n ['dirichlet_process', 'dirichlet_distribution']):\n raise ValueError(\n \"Invalid value for 'weight_concentration_prior_type': %s \"\n \"'weight_concentration_prior_type' should be in \"\n \"['dirichlet_process', 'dirichlet_distribution']\"\n % self.weight_concentration_prior_type)\n\n self._check_weights_parameters()\n self._check_means_parameters(X)\n self._check_precision_parameters(X)\n self._checkcovariance_prior_parameter(X)" + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_check_precision_parameters", + "name": "_check_precision_parameters", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._check_precision_parameters", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_check_precision_parameters/self", + "name": "self", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._check_precision_parameters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_check_precision_parameters/X", + "name": "X", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._check_precision_parameters.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check the prior parameters of the precision distribution.", + "docstring": "Check the prior parameters of the precision distribution.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)", + "code": " def _check_precision_parameters(self, X):\n \"\"\"Check the prior parameters of the precision distribution.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n \"\"\"\n _, n_features = X.shape\n\n if self.degrees_of_freedom_prior is None:\n self.degrees_of_freedom_prior_ = n_features\n elif self.degrees_of_freedom_prior > n_features - 1.:\n self.degrees_of_freedom_prior_ = self.degrees_of_freedom_prior\n else:\n raise ValueError(\"The parameter 'degrees_of_freedom_prior' \"\n \"should be greater than %d, but got %.3f.\"\n % (n_features - 1, self.degrees_of_freedom_prior))" + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_check_weights_parameters", + "name": "_check_weights_parameters", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._check_weights_parameters", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_check_weights_parameters/self", + "name": "self", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._check_weights_parameters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check the parameter of the Dirichlet distribution.", + "docstring": "Check the parameter of the Dirichlet distribution.", + "code": " def _check_weights_parameters(self):\n \"\"\"Check the parameter of the Dirichlet distribution.\"\"\"\n if self.weight_concentration_prior is None:\n self.weight_concentration_prior_ = 1. / self.n_components\n elif self.weight_concentration_prior > 0.:\n self.weight_concentration_prior_ = (\n self.weight_concentration_prior)\n else:\n raise ValueError(\"The parameter 'weight_concentration_prior' \"\n \"should be greater than 0., but got %.3f.\"\n % self.weight_concentration_prior)" + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_checkcovariance_prior_parameter", + "name": "_checkcovariance_prior_parameter", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._checkcovariance_prior_parameter", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_checkcovariance_prior_parameter/self", + "name": "self", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._checkcovariance_prior_parameter.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_checkcovariance_prior_parameter/X", + "name": "X", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._checkcovariance_prior_parameter.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check the `covariance_prior_`.", + "docstring": "Check the `covariance_prior_`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)", + "code": " def _checkcovariance_prior_parameter(self, X):\n \"\"\"Check the `covariance_prior_`.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n \"\"\"\n _, n_features = X.shape\n\n if self.covariance_prior is None:\n self.covariance_prior_ = {\n 'full': np.atleast_2d(np.cov(X.T)),\n 'tied': np.atleast_2d(np.cov(X.T)),\n 'diag': np.var(X, axis=0, ddof=1),\n 'spherical': np.var(X, axis=0, ddof=1).mean()\n }[self.covariance_type]\n\n elif self.covariance_type in ['full', 'tied']:\n self.covariance_prior_ = check_array(\n self.covariance_prior, dtype=[np.float64, np.float32],\n ensure_2d=False)\n _check_shape(self.covariance_prior_, (n_features, n_features),\n '%s covariance_prior' % self.covariance_type)\n _check_precision_matrix(self.covariance_prior_,\n self.covariance_type)\n elif self.covariance_type == 'diag':\n self.covariance_prior_ = check_array(\n self.covariance_prior, dtype=[np.float64, np.float32],\n ensure_2d=False)\n _check_shape(self.covariance_prior_, (n_features,),\n '%s covariance_prior' % self.covariance_type)\n _check_precision_positivity(self.covariance_prior_,\n self.covariance_type)\n # spherical case\n elif self.covariance_prior > 0.:\n self.covariance_prior_ = self.covariance_prior\n else:\n raise ValueError(\"The parameter 'spherical covariance_prior' \"\n \"should be greater than 0., but got %.3f.\"\n % self.covariance_prior)" + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_compute_lower_bound", + "name": "_compute_lower_bound", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._compute_lower_bound", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_compute_lower_bound/self", + "name": "self", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._compute_lower_bound.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_compute_lower_bound/log_resp", + "name": "log_resp", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._compute_lower_bound.log_resp", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array, shape (n_samples, n_components)", + "default_value": "", + "description": "Logarithm of the posterior probabilities (or responsibilities) of\nthe point of each sample in X." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_components)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_compute_lower_bound/log_prob_norm", + "name": "log_prob_norm", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._compute_lower_bound.log_prob_norm", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Logarithm of the probability of each sample in X." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate the lower bound of the model.\n\nThe lower bound on the likelihood (of the training data with respect to\nthe model) is used to detect the convergence and has to increase at\neach iteration.", + "docstring": "Estimate the lower bound of the model.\n\nThe lower bound on the likelihood (of the training data with respect to\nthe model) is used to detect the convergence and has to increase at\neach iteration.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nlog_resp : array, shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X.\n\nlog_prob_norm : float\n Logarithm of the probability of each sample in X.\n\nReturns\n-------\nlower_bound : float", + "code": " def _compute_lower_bound(self, log_resp, log_prob_norm):\n \"\"\"Estimate the lower bound of the model.\n\n The lower bound on the likelihood (of the training data with respect to\n the model) is used to detect the convergence and has to increase at\n each iteration.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n log_resp : array, shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X.\n\n log_prob_norm : float\n Logarithm of the probability of each sample in X.\n\n Returns\n -------\n lower_bound : float\n \"\"\"\n # Contrary to the original formula, we have done some simplification\n # and removed all the constant terms.\n n_features, = self.mean_prior_.shape\n\n # We removed `.5 * n_features * np.log(self.degrees_of_freedom_)`\n # because the precision matrix is normalized.\n log_det_precisions_chol = (_compute_log_det_cholesky(\n self.precisions_cholesky_, self.covariance_type, n_features) -\n .5 * n_features * np.log(self.degrees_of_freedom_))\n\n if self.covariance_type == 'tied':\n log_wishart = self.n_components * np.float64(_log_wishart_norm(\n self.degrees_of_freedom_, log_det_precisions_chol, n_features))\n else:\n log_wishart = np.sum(_log_wishart_norm(\n self.degrees_of_freedom_, log_det_precisions_chol, n_features))\n\n if self.weight_concentration_prior_type == 'dirichlet_process':\n log_norm_weight = -np.sum(betaln(self.weight_concentration_[0],\n self.weight_concentration_[1]))\n else:\n log_norm_weight = _log_dirichlet_norm(self.weight_concentration_)\n\n return (-np.sum(np.exp(log_resp) * log_resp) -\n log_wishart - log_norm_weight -\n 0.5 * n_features * np.sum(np.log(self.mean_precision_)))" + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_log_prob", + "name": "_estimate_log_prob", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_log_prob", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_log_prob/self", + "name": "self", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_log_prob.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_log_prob/X", + "name": "X", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_log_prob.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _estimate_log_prob(self, X):\n _, n_features = X.shape\n # We remove `n_features * np.log(self.degrees_of_freedom_)` because\n # the precision matrix is normalized\n log_gauss = (_estimate_log_gaussian_prob(\n X, self.means_, self.precisions_cholesky_, self.covariance_type) -\n .5 * n_features * np.log(self.degrees_of_freedom_))\n\n log_lambda = n_features * np.log(2.) + np.sum(digamma(\n .5 * (self.degrees_of_freedom_ -\n np.arange(0, n_features)[:, np.newaxis])), 0)\n\n return log_gauss + .5 * (log_lambda -\n n_features / self.mean_precision_)" + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_log_weights", + "name": "_estimate_log_weights", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_log_weights", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_log_weights/self", + "name": "self", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_log_weights.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _estimate_log_weights(self):\n if self.weight_concentration_prior_type == 'dirichlet_process':\n digamma_sum = digamma(self.weight_concentration_[0] +\n self.weight_concentration_[1])\n digamma_a = digamma(self.weight_concentration_[0])\n digamma_b = digamma(self.weight_concentration_[1])\n return (digamma_a - digamma_sum +\n np.hstack((0, np.cumsum(digamma_b - digamma_sum)[:-1])))\n else:\n # case Variationnal Gaussian mixture with dirichlet distribution\n return (digamma(self.weight_concentration_) -\n digamma(np.sum(self.weight_concentration_)))" + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_means", + "name": "_estimate_means", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_means", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_means/self", + "name": "self", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_means.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_means/nk", + "name": "nk", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_means.nk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components,)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components,)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_means/xk", + "name": "xk", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_means.xk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate the parameters of the Gaussian distribution.", + "docstring": "Estimate the parameters of the Gaussian distribution.\n\nParameters\n----------\nnk : array-like of shape (n_components,)\n\nxk : array-like of shape (n_components, n_features)", + "code": " def _estimate_means(self, nk, xk):\n \"\"\"Estimate the parameters of the Gaussian distribution.\n\n Parameters\n ----------\n nk : array-like of shape (n_components,)\n\n xk : array-like of shape (n_components, n_features)\n \"\"\"\n self.mean_precision_ = self.mean_precision_prior_ + nk\n self.means_ = ((self.mean_precision_prior_ * self.mean_prior_ +\n nk[:, np.newaxis] * xk) /\n self.mean_precision_[:, np.newaxis])" + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_precisions", + "name": "_estimate_precisions", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_precisions", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_precisions/self", + "name": "self", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_precisions.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_precisions/nk", + "name": "nk", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_precisions.nk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components,)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components,)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_precisions/xk", + "name": "xk", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_precisions.xk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_precisions/sk", + "name": "sk", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_precisions.sk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "The shape depends of `covariance_type`:\n'full' : (n_components, n_features, n_features)\n'tied' : (n_features, n_features)\n'diag' : (n_components, n_features)\n'spherical' : (n_components,)" + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate the precisions parameters of the precision distribution.", + "docstring": "Estimate the precisions parameters of the precision distribution.\n\nParameters\n----------\nnk : array-like of shape (n_components,)\n\nxk : array-like of shape (n_components, n_features)\n\nsk : array-like\n The shape depends of `covariance_type`:\n 'full' : (n_components, n_features, n_features)\n 'tied' : (n_features, n_features)\n 'diag' : (n_components, n_features)\n 'spherical' : (n_components,)", + "code": " def _estimate_precisions(self, nk, xk, sk):\n \"\"\"Estimate the precisions parameters of the precision distribution.\n\n Parameters\n ----------\n nk : array-like of shape (n_components,)\n\n xk : array-like of shape (n_components, n_features)\n\n sk : array-like\n The shape depends of `covariance_type`:\n 'full' : (n_components, n_features, n_features)\n 'tied' : (n_features, n_features)\n 'diag' : (n_components, n_features)\n 'spherical' : (n_components,)\n \"\"\"\n {\"full\": self._estimate_wishart_full,\n \"tied\": self._estimate_wishart_tied,\n \"diag\": self._estimate_wishart_diag,\n \"spherical\": self._estimate_wishart_spherical\n }[self.covariance_type](nk, xk, sk)\n\n self.precisions_cholesky_ = _compute_precision_cholesky(\n self.covariances_, self.covariance_type)" + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_weights", + "name": "_estimate_weights", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_weights", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_weights/self", + "name": "self", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_weights.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_weights/nk", + "name": "nk", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_weights.nk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components,)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate the parameters of the Dirichlet distribution.", + "docstring": "Estimate the parameters of the Dirichlet distribution.\n\nParameters\n----------\nnk : array-like of shape (n_components,)", + "code": " def _estimate_weights(self, nk):\n \"\"\"Estimate the parameters of the Dirichlet distribution.\n\n Parameters\n ----------\n nk : array-like of shape (n_components,)\n \"\"\"\n if self.weight_concentration_prior_type == 'dirichlet_process':\n # For dirichlet process weight_concentration will be a tuple\n # containing the two parameters of the beta distribution\n self.weight_concentration_ = (\n 1. + nk,\n (self.weight_concentration_prior_ +\n np.hstack((np.cumsum(nk[::-1])[-2::-1], 0))))\n else:\n # case Variationnal Gaussian mixture with dirichlet distribution\n self.weight_concentration_ = self.weight_concentration_prior_ + nk" + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_wishart_diag", + "name": "_estimate_wishart_diag", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_wishart_diag", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_wishart_diag/self", + "name": "self", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_wishart_diag.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_wishart_diag/nk", + "name": "nk", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_wishart_diag.nk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components,)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components,)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_wishart_diag/xk", + "name": "xk", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_wishart_diag.xk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_wishart_diag/sk", + "name": "sk", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_wishart_diag.sk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate the diag Wishart distribution parameters.", + "docstring": "Estimate the diag Wishart distribution parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nnk : array-like of shape (n_components,)\n\nxk : array-like of shape (n_components, n_features)\n\nsk : array-like of shape (n_components, n_features)", + "code": " def _estimate_wishart_diag(self, nk, xk, sk):\n \"\"\"Estimate the diag Wishart distribution parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n nk : array-like of shape (n_components,)\n\n xk : array-like of shape (n_components, n_features)\n\n sk : array-like of shape (n_components, n_features)\n \"\"\"\n _, n_features = xk.shape\n\n # Warning : in some Bishop book, there is a typo on the formula 10.63\n # `degrees_of_freedom_k = degrees_of_freedom_0 + Nk`\n # is the correct formula\n self.degrees_of_freedom_ = self.degrees_of_freedom_prior_ + nk\n\n diff = xk - self.mean_prior_\n self.covariances_ = (\n self.covariance_prior_ + nk[:, np.newaxis] * (\n sk + (self.mean_precision_prior_ /\n self.mean_precision_)[:, np.newaxis] * np.square(diff)))\n\n # Contrary to the original bishop book, we normalize the covariances\n self.covariances_ /= self.degrees_of_freedom_[:, np.newaxis]" + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_wishart_full", + "name": "_estimate_wishart_full", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_wishart_full", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_wishart_full/self", + "name": "self", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_wishart_full.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_wishart_full/nk", + "name": "nk", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_wishart_full.nk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components,)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components,)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_wishart_full/xk", + "name": "xk", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_wishart_full.xk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_wishart_full/sk", + "name": "sk", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_wishart_full.sk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components, n_features, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components, n_features, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate the full Wishart distribution parameters.", + "docstring": "Estimate the full Wishart distribution parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nnk : array-like of shape (n_components,)\n\nxk : array-like of shape (n_components, n_features)\n\nsk : array-like of shape (n_components, n_features, n_features)", + "code": " def _estimate_wishart_full(self, nk, xk, sk):\n \"\"\"Estimate the full Wishart distribution parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n nk : array-like of shape (n_components,)\n\n xk : array-like of shape (n_components, n_features)\n\n sk : array-like of shape (n_components, n_features, n_features)\n \"\"\"\n _, n_features = xk.shape\n\n # Warning : in some Bishop book, there is a typo on the formula 10.63\n # `degrees_of_freedom_k = degrees_of_freedom_0 + Nk` is\n # the correct formula\n self.degrees_of_freedom_ = self.degrees_of_freedom_prior_ + nk\n\n self.covariances_ = np.empty((self.n_components, n_features,\n n_features))\n\n for k in range(self.n_components):\n diff = xk[k] - self.mean_prior_\n self.covariances_[k] = (self.covariance_prior_ + nk[k] * sk[k] +\n nk[k] * self.mean_precision_prior_ /\n self.mean_precision_[k] * np.outer(diff,\n diff))\n\n # Contrary to the original bishop book, we normalize the covariances\n self.covariances_ /= (\n self.degrees_of_freedom_[:, np.newaxis, np.newaxis])" + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_wishart_spherical", + "name": "_estimate_wishart_spherical", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_wishart_spherical", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_wishart_spherical/self", + "name": "self", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_wishart_spherical.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_wishart_spherical/nk", + "name": "nk", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_wishart_spherical.nk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components,)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components,)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_wishart_spherical/xk", + "name": "xk", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_wishart_spherical.xk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_wishart_spherical/sk", + "name": "sk", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_wishart_spherical.sk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components,)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate the spherical Wishart distribution parameters.", + "docstring": "Estimate the spherical Wishart distribution parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nnk : array-like of shape (n_components,)\n\nxk : array-like of shape (n_components, n_features)\n\nsk : array-like of shape (n_components,)", + "code": " def _estimate_wishart_spherical(self, nk, xk, sk):\n \"\"\"Estimate the spherical Wishart distribution parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n nk : array-like of shape (n_components,)\n\n xk : array-like of shape (n_components, n_features)\n\n sk : array-like of shape (n_components,)\n \"\"\"\n _, n_features = xk.shape\n\n # Warning : in some Bishop book, there is a typo on the formula 10.63\n # `degrees_of_freedom_k = degrees_of_freedom_0 + Nk`\n # is the correct formula\n self.degrees_of_freedom_ = self.degrees_of_freedom_prior_ + nk\n\n diff = xk - self.mean_prior_\n self.covariances_ = (\n self.covariance_prior_ + nk * (\n sk + self.mean_precision_prior_ / self.mean_precision_ *\n np.mean(np.square(diff), 1)))\n\n # Contrary to the original bishop book, we normalize the covariances\n self.covariances_ /= self.degrees_of_freedom_" + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_wishart_tied", + "name": "_estimate_wishart_tied", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_wishart_tied", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_wishart_tied/self", + "name": "self", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_wishart_tied.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_wishart_tied/nk", + "name": "nk", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_wishart_tied.nk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components,)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components,)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_wishart_tied/xk", + "name": "xk", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_wishart_tied.xk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_estimate_wishart_tied/sk", + "name": "sk", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._estimate_wishart_tied.sk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_features, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_features, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate the tied Wishart distribution parameters.", + "docstring": "Estimate the tied Wishart distribution parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nnk : array-like of shape (n_components,)\n\nxk : array-like of shape (n_components, n_features)\n\nsk : array-like of shape (n_features, n_features)", + "code": " def _estimate_wishart_tied(self, nk, xk, sk):\n \"\"\"Estimate the tied Wishart distribution parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n nk : array-like of shape (n_components,)\n\n xk : array-like of shape (n_components, n_features)\n\n sk : array-like of shape (n_features, n_features)\n \"\"\"\n _, n_features = xk.shape\n\n # Warning : in some Bishop book, there is a typo on the formula 10.63\n # `degrees_of_freedom_k = degrees_of_freedom_0 + Nk`\n # is the correct formula\n self.degrees_of_freedom_ = (\n self.degrees_of_freedom_prior_ + nk.sum() / self.n_components)\n\n diff = xk - self.mean_prior_\n self.covariances_ = (\n self.covariance_prior_ + sk * nk.sum() / self.n_components +\n self.mean_precision_prior_ / self.n_components * np.dot(\n (nk / self.mean_precision_) * diff.T, diff))\n\n # Contrary to the original bishop book, we normalize the covariances\n self.covariances_ /= self.degrees_of_freedom_" + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_get_parameters", + "name": "_get_parameters", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._get_parameters", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_get_parameters/self", + "name": "self", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._get_parameters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_parameters(self):\n return (self.weight_concentration_,\n self.mean_precision_, self.means_,\n self.degrees_of_freedom_, self.covariances_,\n self.precisions_cholesky_)" + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_initialize", + "name": "_initialize", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._initialize", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_initialize/self", + "name": "self", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._initialize.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_initialize/X", + "name": "X", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._initialize.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_initialize/resp", + "name": "resp", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._initialize.resp", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_components)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_components)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Initialization of the mixture parameters.", + "docstring": "Initialization of the mixture parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nresp : array-like of shape (n_samples, n_components)", + "code": " def _initialize(self, X, resp):\n \"\"\"Initialization of the mixture parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n resp : array-like of shape (n_samples, n_components)\n \"\"\"\n nk, xk, sk = _estimate_gaussian_parameters(X, resp, self.reg_covar,\n self.covariance_type)\n\n self._estimate_weights(nk)\n self._estimate_means(nk, xk)\n self._estimate_precisions(nk, xk, sk)" + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_m_step", + "name": "_m_step", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._m_step", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_m_step/self", + "name": "self", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._m_step.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_m_step/X", + "name": "X", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._m_step.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_m_step/log_resp", + "name": "log_resp", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._m_step.log_resp", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_components)", + "default_value": "", + "description": "Logarithm of the posterior probabilities (or responsibilities) of\nthe point of each sample in X." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_components)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "M step.", + "docstring": "M step.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nlog_resp : array-like of shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X.", + "code": " def _m_step(self, X, log_resp):\n \"\"\"M step.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n log_resp : array-like of shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X.\n \"\"\"\n n_samples, _ = X.shape\n\n nk, xk, sk = _estimate_gaussian_parameters(\n X, np.exp(log_resp), self.reg_covar, self.covariance_type)\n self._estimate_weights(nk)\n self._estimate_means(nk, xk)\n self._estimate_precisions(nk, xk, sk)" + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_set_parameters", + "name": "_set_parameters", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._set_parameters", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_set_parameters/self", + "name": "self", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._set_parameters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/BayesianGaussianMixture/_set_parameters/params", + "name": "params", + "qname": "sklearn.mixture._bayesian_mixture.BayesianGaussianMixture._set_parameters.params", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _set_parameters(self, params):\n (self.weight_concentration_, self.mean_precision_, self.means_,\n self.degrees_of_freedom_, self.covariances_,\n self.precisions_cholesky_) = params\n\n # Weights computation\n if self.weight_concentration_prior_type == \"dirichlet_process\":\n weight_dirichlet_sum = (self.weight_concentration_[0] +\n self.weight_concentration_[1])\n tmp = self.weight_concentration_[1] / weight_dirichlet_sum\n self.weights_ = (\n self.weight_concentration_[0] / weight_dirichlet_sum *\n np.hstack((1, np.cumprod(tmp[:-1]))))\n self.weights_ /= np.sum(self.weights_)\n else:\n self. weights_ = (self.weight_concentration_ /\n np.sum(self.weight_concentration_))\n\n # Precisions matrices computation\n if self.covariance_type == 'full':\n self.precisions_ = np.array([\n np.dot(prec_chol, prec_chol.T)\n for prec_chol in self.precisions_cholesky_])\n\n elif self.covariance_type == 'tied':\n self.precisions_ = np.dot(self.precisions_cholesky_,\n self.precisions_cholesky_.T)\n else:\n self.precisions_ = self.precisions_cholesky_ ** 2" + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/_log_dirichlet_norm", + "name": "_log_dirichlet_norm", + "qname": "sklearn.mixture._bayesian_mixture._log_dirichlet_norm", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/_log_dirichlet_norm/dirichlet_concentration", + "name": "dirichlet_concentration", + "qname": "sklearn.mixture._bayesian_mixture._log_dirichlet_norm.dirichlet_concentration", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The parameters values of the Dirichlet distribution." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the log of the Dirichlet distribution normalization term.", + "docstring": "Compute the log of the Dirichlet distribution normalization term.\n\nParameters\n----------\ndirichlet_concentration : array-like of shape (n_samples,)\n The parameters values of the Dirichlet distribution.\n\nReturns\n-------\nlog_dirichlet_norm : float\n The log normalization of the Dirichlet distribution.", + "code": "def _log_dirichlet_norm(dirichlet_concentration):\n \"\"\"Compute the log of the Dirichlet distribution normalization term.\n\n Parameters\n ----------\n dirichlet_concentration : array-like of shape (n_samples,)\n The parameters values of the Dirichlet distribution.\n\n Returns\n -------\n log_dirichlet_norm : float\n The log normalization of the Dirichlet distribution.\n \"\"\"\n return (gammaln(np.sum(dirichlet_concentration)) -\n np.sum(gammaln(dirichlet_concentration)))" + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/_log_wishart_norm", + "name": "_log_wishart_norm", + "qname": "sklearn.mixture._bayesian_mixture._log_wishart_norm", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/_log_wishart_norm/degrees_of_freedom", + "name": "degrees_of_freedom", + "qname": "sklearn.mixture._bayesian_mixture._log_wishart_norm.degrees_of_freedom", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components,)", + "default_value": "", + "description": "The number of degrees of freedom on the covariance Wishart\ndistributions." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components,)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/_log_wishart_norm/log_det_precisions_chol", + "name": "log_det_precisions_chol", + "qname": "sklearn.mixture._bayesian_mixture._log_wishart_norm.log_det_precisions_chol", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._bayesian_mixture/_log_wishart_norm/n_features", + "name": "n_features", + "qname": "sklearn.mixture._bayesian_mixture._log_wishart_norm.n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The number of features." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the log of the Wishart distribution normalization term.", + "docstring": "Compute the log of the Wishart distribution normalization term.\n\nParameters\n----------\ndegrees_of_freedom : array-like of shape (n_components,)\n The number of degrees of freedom on the covariance Wishart\n distributions.\n\nlog_det_precision_chol : array-like of shape (n_components,)\n The determinant of the precision matrix for each component.\n\nn_features : int\n The number of features.\n\nReturn\n------\nlog_wishart_norm : array-like of shape (n_components,)\n The log normalization of the Wishart distribution.", + "code": "def _log_wishart_norm(degrees_of_freedom, log_det_precisions_chol, n_features):\n \"\"\"Compute the log of the Wishart distribution normalization term.\n\n Parameters\n ----------\n degrees_of_freedom : array-like of shape (n_components,)\n The number of degrees of freedom on the covariance Wishart\n distributions.\n\n log_det_precision_chol : array-like of shape (n_components,)\n The determinant of the precision matrix for each component.\n\n n_features : int\n The number of features.\n\n Return\n ------\n log_wishart_norm : array-like of shape (n_components,)\n The log normalization of the Wishart distribution.\n \"\"\"\n # To simplify the computation we have removed the np.log(np.pi) term\n return -(degrees_of_freedom * log_det_precisions_chol +\n degrees_of_freedom * n_features * .5 * math.log(2.) +\n np.sum(gammaln(.5 * (degrees_of_freedom -\n np.arange(n_features)[:, np.newaxis])), 0))" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/__init__", + "name": "__init__", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/__init__/self", + "name": "self", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/__init__/n_components", + "name": "n_components", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture.__init__.n_components", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "The number of mixture components." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/__init__/covariance_type", + "name": "covariance_type", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture.__init__.covariance_type", + "default_value": "'full'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'full', 'tied', 'diag', 'spherical'}", + "default_value": "'full'", + "description": "String describing the type of covariance parameters to use.\nMust be one of:\n\n'full'\n each component has its own general covariance matrix\n'tied'\n all components share the same general covariance matrix\n'diag'\n each component has its own diagonal covariance matrix\n'spherical'\n each component has its own single variance" + }, + "type": { + "kind": "EnumType", + "values": ["full", "spherical", "diag", "tied"] + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/__init__/tol", + "name": "tol", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "The convergence threshold. EM iterations will stop when the\nlower bound average gain is below this threshold." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/__init__/reg_covar", + "name": "reg_covar", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture.__init__.reg_covar", + "default_value": "1e-06", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-6", + "description": "Non-negative regularization added to the diagonal of covariance.\nAllows to assure that the covariance matrices are all positive." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture.__init__.max_iter", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "The number of EM iterations to perform." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/__init__/n_init", + "name": "n_init", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture.__init__.n_init", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "The number of initializations to perform. The best results are kept." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/__init__/init_params", + "name": "init_params", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture.__init__.init_params", + "default_value": "'kmeans'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'kmeans', 'random'}", + "default_value": "'kmeans'", + "description": "The method used to initialize the weights, the means and the\nprecisions.\nMust be one of::\n\n 'kmeans' : responsibilities are initialized using kmeans.\n 'random' : responsibilities are initialized randomly." + }, + "type": { + "kind": "EnumType", + "values": ["kmeans", "random"] + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/__init__/weights_init", + "name": "weights_init", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture.__init__.weights_init", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components, )", + "default_value": "None", + "description": "The user-provided initial weights.\nIf it is None, weights are initialized using the `init_params` method." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components, )" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/__init__/means_init", + "name": "means_init", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture.__init__.means_init", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components, n_features)", + "default_value": "None", + "description": "The user-provided initial means,\nIf it is None, means are initialized using the `init_params` method." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/__init__/precisions_init", + "name": "precisions_init", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture.__init__.precisions_init", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "None", + "description": "The user-provided initial precisions (inverse of the covariance\nmatrices).\nIf it is None, precisions are initialized using the 'init_params'\nmethod.\nThe shape depends on 'covariance_type'::\n\n (n_components,) if 'spherical',\n (n_features, n_features) if 'tied',\n (n_components, n_features) if 'diag',\n (n_components, n_features, n_features) if 'full'" + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/__init__/random_state", + "name": "random_state", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the random seed given to the method chosen to initialize the\nparameters (see `init_params`).\nIn addition, it controls the generation of random samples from the\nfitted distribution (see the method `sample`).\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If 'warm_start' is True, the solution of the last fitting is used as\ninitialization for the next call of fit(). This can speed up\nconvergence when fit is called several times on similar problems.\nIn that case, 'n_init' is ignored and only a single initialization\noccurs upon the first call.\nSee :term:`the Glossary `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/__init__/verbose", + "name": "verbose", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Enable verbose output. If 1 then it prints the current\ninitialization and each iteration step. If greater than 1 then\nit prints also the log probability and the time needed\nfor each step." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/__init__/verbose_interval", + "name": "verbose_interval", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture.__init__.verbose_interval", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Number of iteration done before the next print." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Gaussian Mixture.\n\nRepresentation of a Gaussian mixture model probability distribution.\nThis class allows to estimate the parameters of a Gaussian mixture\ndistribution.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_components=1, *, covariance_type='full', tol=1e-3,\n reg_covar=1e-6, max_iter=100, n_init=1, init_params='kmeans',\n weights_init=None, means_init=None, precisions_init=None,\n random_state=None, warm_start=False,\n verbose=0, verbose_interval=10):\n super().__init__(\n n_components=n_components, tol=tol, reg_covar=reg_covar,\n max_iter=max_iter, n_init=n_init, init_params=init_params,\n random_state=random_state, warm_start=warm_start,\n verbose=verbose, verbose_interval=verbose_interval)\n\n self.covariance_type = covariance_type\n self.weights_init = weights_init\n self.means_init = means_init\n self.precisions_init = precisions_init" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_check_parameters", + "name": "_check_parameters", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._check_parameters", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_check_parameters/self", + "name": "self", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._check_parameters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_check_parameters/X", + "name": "X", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._check_parameters.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check the Gaussian mixture parameters are well defined.", + "docstring": "Check the Gaussian mixture parameters are well defined.", + "code": " def _check_parameters(self, X):\n \"\"\"Check the Gaussian mixture parameters are well defined.\"\"\"\n _, n_features = X.shape\n if self.covariance_type not in ['spherical', 'tied', 'diag', 'full']:\n raise ValueError(\"Invalid value for 'covariance_type': %s \"\n \"'covariance_type' should be in \"\n \"['spherical', 'tied', 'diag', 'full']\"\n % self.covariance_type)\n\n if self.weights_init is not None:\n self.weights_init = _check_weights(self.weights_init,\n self.n_components)\n\n if self.means_init is not None:\n self.means_init = _check_means(self.means_init,\n self.n_components, n_features)\n\n if self.precisions_init is not None:\n self.precisions_init = _check_precisions(self.precisions_init,\n self.covariance_type,\n self.n_components,\n n_features)" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_compute_lower_bound", + "name": "_compute_lower_bound", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._compute_lower_bound", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_compute_lower_bound/self", + "name": "self", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._compute_lower_bound.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_compute_lower_bound/_", + "name": "_", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._compute_lower_bound._", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_compute_lower_bound/log_prob_norm", + "name": "log_prob_norm", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._compute_lower_bound.log_prob_norm", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _compute_lower_bound(self, _, log_prob_norm):\n return log_prob_norm" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_estimate_log_prob", + "name": "_estimate_log_prob", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._estimate_log_prob", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_estimate_log_prob/self", + "name": "self", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._estimate_log_prob.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_estimate_log_prob/X", + "name": "X", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._estimate_log_prob.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _estimate_log_prob(self, X):\n return _estimate_log_gaussian_prob(\n X, self.means_, self.precisions_cholesky_, self.covariance_type)" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_estimate_log_weights", + "name": "_estimate_log_weights", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._estimate_log_weights", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_estimate_log_weights/self", + "name": "self", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._estimate_log_weights.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _estimate_log_weights(self):\n return np.log(self.weights_)" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_get_parameters", + "name": "_get_parameters", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._get_parameters", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_get_parameters/self", + "name": "self", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._get_parameters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_parameters(self):\n return (self.weights_, self.means_, self.covariances_,\n self.precisions_cholesky_)" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_initialize", + "name": "_initialize", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._initialize", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_initialize/self", + "name": "self", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._initialize.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_initialize/X", + "name": "X", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._initialize.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_initialize/resp", + "name": "resp", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._initialize.resp", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_components)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_components)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Initialization of the Gaussian mixture parameters.", + "docstring": "Initialization of the Gaussian mixture parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nresp : array-like of shape (n_samples, n_components)", + "code": " def _initialize(self, X, resp):\n \"\"\"Initialization of the Gaussian mixture parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n resp : array-like of shape (n_samples, n_components)\n \"\"\"\n n_samples, _ = X.shape\n\n weights, means, covariances = _estimate_gaussian_parameters(\n X, resp, self.reg_covar, self.covariance_type)\n weights /= n_samples\n\n self.weights_ = (weights if self.weights_init is None\n else self.weights_init)\n self.means_ = means if self.means_init is None else self.means_init\n\n if self.precisions_init is None:\n self.covariances_ = covariances\n self.precisions_cholesky_ = _compute_precision_cholesky(\n covariances, self.covariance_type)\n elif self.covariance_type == 'full':\n self.precisions_cholesky_ = np.array(\n [linalg.cholesky(prec_init, lower=True)\n for prec_init in self.precisions_init])\n elif self.covariance_type == 'tied':\n self.precisions_cholesky_ = linalg.cholesky(self.precisions_init,\n lower=True)\n else:\n self.precisions_cholesky_ = self.precisions_init" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_m_step", + "name": "_m_step", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._m_step", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_m_step/self", + "name": "self", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._m_step.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_m_step/X", + "name": "X", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._m_step.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_m_step/log_resp", + "name": "log_resp", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._m_step.log_resp", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_components)", + "default_value": "", + "description": "Logarithm of the posterior probabilities (or responsibilities) of\nthe point of each sample in X." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_components)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "M step.", + "docstring": "M step.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nlog_resp : array-like of shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X.", + "code": " def _m_step(self, X, log_resp):\n \"\"\"M step.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n log_resp : array-like of shape (n_samples, n_components)\n Logarithm of the posterior probabilities (or responsibilities) of\n the point of each sample in X.\n \"\"\"\n n_samples, _ = X.shape\n self.weights_, self.means_, self.covariances_ = (\n _estimate_gaussian_parameters(X, np.exp(log_resp), self.reg_covar,\n self.covariance_type))\n self.weights_ /= n_samples\n self.precisions_cholesky_ = _compute_precision_cholesky(\n self.covariances_, self.covariance_type)" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_n_parameters", + "name": "_n_parameters", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._n_parameters", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_n_parameters/self", + "name": "self", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._n_parameters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the number of free parameters in the model.", + "docstring": "Return the number of free parameters in the model.", + "code": " def _n_parameters(self):\n \"\"\"Return the number of free parameters in the model.\"\"\"\n _, n_features = self.means_.shape\n if self.covariance_type == 'full':\n cov_params = self.n_components * n_features * (n_features + 1) / 2.\n elif self.covariance_type == 'diag':\n cov_params = self.n_components * n_features\n elif self.covariance_type == 'tied':\n cov_params = n_features * (n_features + 1) / 2.\n elif self.covariance_type == 'spherical':\n cov_params = self.n_components\n mean_params = n_features * self.n_components\n return int(cov_params + mean_params + self.n_components - 1)" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_set_parameters", + "name": "_set_parameters", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._set_parameters", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_set_parameters/self", + "name": "self", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._set_parameters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/_set_parameters/params", + "name": "params", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture._set_parameters.params", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _set_parameters(self, params):\n (self.weights_, self.means_, self.covariances_,\n self.precisions_cholesky_) = params\n\n # Attributes computation\n _, n_features = self.means_.shape\n\n if self.covariance_type == 'full':\n self.precisions_ = np.empty(self.precisions_cholesky_.shape)\n for k, prec_chol in enumerate(self.precisions_cholesky_):\n self.precisions_[k] = np.dot(prec_chol, prec_chol.T)\n\n elif self.covariance_type == 'tied':\n self.precisions_ = np.dot(self.precisions_cholesky_,\n self.precisions_cholesky_.T)\n else:\n self.precisions_ = self.precisions_cholesky_ ** 2" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/aic", + "name": "aic", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture.aic", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/aic/self", + "name": "self", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture.aic.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/aic/X", + "name": "X", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture.aic.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples, n_dimensions)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples, n_dimensions)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Akaike information criterion for the current model on the input X.", + "docstring": "Akaike information criterion for the current model on the input X.\n\nParameters\n----------\nX : array of shape (n_samples, n_dimensions)\n\nReturns\n-------\naic : float\n The lower the better.", + "code": " def aic(self, X):\n \"\"\"Akaike information criterion for the current model on the input X.\n\n Parameters\n ----------\n X : array of shape (n_samples, n_dimensions)\n\n Returns\n -------\n aic : float\n The lower the better.\n \"\"\"\n return -2 * self.score(X) * X.shape[0] + 2 * self._n_parameters()" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/bic", + "name": "bic", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture.bic", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/bic/self", + "name": "self", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture.bic.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/GaussianMixture/bic/X", + "name": "X", + "qname": "sklearn.mixture._gaussian_mixture.GaussianMixture.bic.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of shape (n_samples, n_dimensions)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array of shape (n_samples, n_dimensions)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Bayesian information criterion for the current model on the input X.", + "docstring": "Bayesian information criterion for the current model on the input X.\n\nParameters\n----------\nX : array of shape (n_samples, n_dimensions)\n\nReturns\n-------\nbic : float\n The lower the better.", + "code": " def bic(self, X):\n \"\"\"Bayesian information criterion for the current model on the input X.\n\n Parameters\n ----------\n X : array of shape (n_samples, n_dimensions)\n\n Returns\n -------\n bic : float\n The lower the better.\n \"\"\"\n return (-2 * self.score(X) * X.shape[0] +\n self._n_parameters() * np.log(X.shape[0]))" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_check_means", + "name": "_check_means", + "qname": "sklearn.mixture._gaussian_mixture._check_means", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_check_means/means", + "name": "means", + "qname": "sklearn.mixture._gaussian_mixture._check_means.means", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components, n_features)", + "default_value": "", + "description": "The centers of the current components." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_check_means/n_components", + "name": "n_components", + "qname": "sklearn.mixture._gaussian_mixture._check_means.n_components", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of components." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_check_means/n_features", + "name": "n_features", + "qname": "sklearn.mixture._gaussian_mixture._check_means.n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of features." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Validate the provided 'means'.", + "docstring": "Validate the provided 'means'.\n\nParameters\n----------\nmeans : array-like of shape (n_components, n_features)\n The centers of the current components.\n\nn_components : int\n Number of components.\n\nn_features : int\n Number of features.\n\nReturns\n-------\nmeans : array, (n_components, n_features)", + "code": "def _check_means(means, n_components, n_features):\n \"\"\"Validate the provided 'means'.\n\n Parameters\n ----------\n means : array-like of shape (n_components, n_features)\n The centers of the current components.\n\n n_components : int\n Number of components.\n\n n_features : int\n Number of features.\n\n Returns\n -------\n means : array, (n_components, n_features)\n \"\"\"\n means = check_array(means, dtype=[np.float64, np.float32], ensure_2d=False)\n _check_shape(means, (n_components, n_features), 'means')\n return means" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_check_precision_matrix", + "name": "_check_precision_matrix", + "qname": "sklearn.mixture._gaussian_mixture._check_precision_matrix", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_check_precision_matrix/precision", + "name": "precision", + "qname": "sklearn.mixture._gaussian_mixture._check_precision_matrix.precision", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_check_precision_matrix/covariance_type", + "name": "covariance_type", + "qname": "sklearn.mixture._gaussian_mixture._check_precision_matrix.covariance_type", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check a precision matrix is symmetric and positive-definite.", + "docstring": "Check a precision matrix is symmetric and positive-definite.", + "code": "def _check_precision_matrix(precision, covariance_type):\n \"\"\"Check a precision matrix is symmetric and positive-definite.\"\"\"\n if not (np.allclose(precision, precision.T) and\n np.all(linalg.eigvalsh(precision) > 0.)):\n raise ValueError(\"'%s precision' should be symmetric, \"\n \"positive-definite\" % covariance_type)" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_check_precision_positivity", + "name": "_check_precision_positivity", + "qname": "sklearn.mixture._gaussian_mixture._check_precision_positivity", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_check_precision_positivity/precision", + "name": "precision", + "qname": "sklearn.mixture._gaussian_mixture._check_precision_positivity.precision", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_check_precision_positivity/covariance_type", + "name": "covariance_type", + "qname": "sklearn.mixture._gaussian_mixture._check_precision_positivity.covariance_type", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check a precision vector is positive-definite.", + "docstring": "Check a precision vector is positive-definite.", + "code": "def _check_precision_positivity(precision, covariance_type):\n \"\"\"Check a precision vector is positive-definite.\"\"\"\n if np.any(np.less_equal(precision, 0.0)):\n raise ValueError(\"'%s precision' should be \"\n \"positive\" % covariance_type)" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_check_precisions", + "name": "_check_precisions", + "qname": "sklearn.mixture._gaussian_mixture._check_precisions", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_check_precisions/precisions", + "name": "precisions", + "qname": "sklearn.mixture._gaussian_mixture._check_precisions.precisions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "'full' : shape of (n_components, n_features, n_features)\n'tied' : shape of (n_features, n_features)\n'diag' : shape of (n_components, n_features)\n'spherical' : shape of (n_components,)" + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_check_precisions/covariance_type", + "name": "covariance_type", + "qname": "sklearn.mixture._gaussian_mixture._check_precisions.covariance_type", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "string", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "string" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_check_precisions/n_components", + "name": "n_components", + "qname": "sklearn.mixture._gaussian_mixture._check_precisions.n_components", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of components." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_check_precisions/n_features", + "name": "n_features", + "qname": "sklearn.mixture._gaussian_mixture._check_precisions.n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of features." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Validate user provided precisions.", + "docstring": "Validate user provided precisions.\n\nParameters\n----------\nprecisions : array-like\n 'full' : shape of (n_components, n_features, n_features)\n 'tied' : shape of (n_features, n_features)\n 'diag' : shape of (n_components, n_features)\n 'spherical' : shape of (n_components,)\n\ncovariance_type : string\n\nn_components : int\n Number of components.\n\nn_features : int\n Number of features.\n\nReturns\n-------\nprecisions : array", + "code": "def _check_precisions(precisions, covariance_type, n_components, n_features):\n \"\"\"Validate user provided precisions.\n\n Parameters\n ----------\n precisions : array-like\n 'full' : shape of (n_components, n_features, n_features)\n 'tied' : shape of (n_features, n_features)\n 'diag' : shape of (n_components, n_features)\n 'spherical' : shape of (n_components,)\n\n covariance_type : string\n\n n_components : int\n Number of components.\n\n n_features : int\n Number of features.\n\n Returns\n -------\n precisions : array\n \"\"\"\n precisions = check_array(precisions, dtype=[np.float64, np.float32],\n ensure_2d=False,\n allow_nd=covariance_type == 'full')\n\n precisions_shape = {'full': (n_components, n_features, n_features),\n 'tied': (n_features, n_features),\n 'diag': (n_components, n_features),\n 'spherical': (n_components,)}\n _check_shape(precisions, precisions_shape[covariance_type],\n '%s precision' % covariance_type)\n\n _check_precisions = {'full': _check_precisions_full,\n 'tied': _check_precision_matrix,\n 'diag': _check_precision_positivity,\n 'spherical': _check_precision_positivity}\n _check_precisions[covariance_type](precisions, covariance_type)\n return precisions" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_check_precisions_full", + "name": "_check_precisions_full", + "qname": "sklearn.mixture._gaussian_mixture._check_precisions_full", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_check_precisions_full/precisions", + "name": "precisions", + "qname": "sklearn.mixture._gaussian_mixture._check_precisions_full.precisions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_check_precisions_full/covariance_type", + "name": "covariance_type", + "qname": "sklearn.mixture._gaussian_mixture._check_precisions_full.covariance_type", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check the precision matrices are symmetric and positive-definite.", + "docstring": "Check the precision matrices are symmetric and positive-definite.", + "code": "def _check_precisions_full(precisions, covariance_type):\n \"\"\"Check the precision matrices are symmetric and positive-definite.\"\"\"\n for prec in precisions:\n _check_precision_matrix(prec, covariance_type)" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_check_weights", + "name": "_check_weights", + "qname": "sklearn.mixture._gaussian_mixture._check_weights", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_check_weights/weights", + "name": "weights", + "qname": "sklearn.mixture._gaussian_mixture._check_weights.weights", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components,)", + "default_value": "", + "description": "The proportions of components of each mixture." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components,)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_check_weights/n_components", + "name": "n_components", + "qname": "sklearn.mixture._gaussian_mixture._check_weights.n_components", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of components." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check the user provided 'weights'.", + "docstring": "Check the user provided 'weights'.\n\nParameters\n----------\nweights : array-like of shape (n_components,)\n The proportions of components of each mixture.\n\nn_components : int\n Number of components.\n\nReturns\n-------\nweights : array, shape (n_components,)", + "code": "def _check_weights(weights, n_components):\n \"\"\"Check the user provided 'weights'.\n\n Parameters\n ----------\n weights : array-like of shape (n_components,)\n The proportions of components of each mixture.\n\n n_components : int\n Number of components.\n\n Returns\n -------\n weights : array, shape (n_components,)\n \"\"\"\n weights = check_array(weights, dtype=[np.float64, np.float32],\n ensure_2d=False)\n _check_shape(weights, (n_components,), 'weights')\n\n # check range\n if (any(np.less(weights, 0.)) or\n any(np.greater(weights, 1.))):\n raise ValueError(\"The parameter 'weights' should be in the range \"\n \"[0, 1], but got max value %.5f, min value %.5f\"\n % (np.min(weights), np.max(weights)))\n\n # check normalization\n if not np.allclose(np.abs(1. - np.sum(weights)), 0.):\n raise ValueError(\"The parameter 'weights' should be normalized, \"\n \"but got sum(weights) = %.5f\" % np.sum(weights))\n return weights" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_compute_log_det_cholesky", + "name": "_compute_log_det_cholesky", + "qname": "sklearn.mixture._gaussian_mixture._compute_log_det_cholesky", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_compute_log_det_cholesky/matrix_chol", + "name": "matrix_chol", + "qname": "sklearn.mixture._gaussian_mixture._compute_log_det_cholesky.matrix_chol", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "Cholesky decompositions of the matrices.\n'full' : shape of (n_components, n_features, n_features)\n'tied' : shape of (n_features, n_features)\n'diag' : shape of (n_components, n_features)\n'spherical' : shape of (n_components,)" + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_compute_log_det_cholesky/covariance_type", + "name": "covariance_type", + "qname": "sklearn.mixture._gaussian_mixture._compute_log_det_cholesky.covariance_type", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'full', 'tied', 'diag', 'spherical'}", + "default_value": "", + "description": "" + }, + "type": { + "kind": "EnumType", + "values": ["full", "spherical", "diag", "tied"] + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_compute_log_det_cholesky/n_features", + "name": "n_features", + "qname": "sklearn.mixture._gaussian_mixture._compute_log_det_cholesky.n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of features." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the log-det of the cholesky decomposition of matrices.", + "docstring": "Compute the log-det of the cholesky decomposition of matrices.\n\nParameters\n----------\nmatrix_chol : array-like\n Cholesky decompositions of the matrices.\n 'full' : shape of (n_components, n_features, n_features)\n 'tied' : shape of (n_features, n_features)\n 'diag' : shape of (n_components, n_features)\n 'spherical' : shape of (n_components,)\n\ncovariance_type : {'full', 'tied', 'diag', 'spherical'}\n\nn_features : int\n Number of features.\n\nReturns\n-------\nlog_det_precision_chol : array-like of shape (n_components,)\n The determinant of the precision matrix for each component.", + "code": "def _compute_log_det_cholesky(matrix_chol, covariance_type, n_features):\n \"\"\"Compute the log-det of the cholesky decomposition of matrices.\n\n Parameters\n ----------\n matrix_chol : array-like\n Cholesky decompositions of the matrices.\n 'full' : shape of (n_components, n_features, n_features)\n 'tied' : shape of (n_features, n_features)\n 'diag' : shape of (n_components, n_features)\n 'spherical' : shape of (n_components,)\n\n covariance_type : {'full', 'tied', 'diag', 'spherical'}\n\n n_features : int\n Number of features.\n\n Returns\n -------\n log_det_precision_chol : array-like of shape (n_components,)\n The determinant of the precision matrix for each component.\n \"\"\"\n if covariance_type == 'full':\n n_components, _, _ = matrix_chol.shape\n log_det_chol = (np.sum(np.log(\n matrix_chol.reshape(\n n_components, -1)[:, ::n_features + 1]), 1))\n\n elif covariance_type == 'tied':\n log_det_chol = (np.sum(np.log(np.diag(matrix_chol))))\n\n elif covariance_type == 'diag':\n log_det_chol = (np.sum(np.log(matrix_chol), axis=1))\n\n else:\n log_det_chol = n_features * (np.log(matrix_chol))\n\n return log_det_chol" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_compute_precision_cholesky", + "name": "_compute_precision_cholesky", + "qname": "sklearn.mixture._gaussian_mixture._compute_precision_cholesky", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_compute_precision_cholesky/covariances", + "name": "covariances", + "qname": "sklearn.mixture._gaussian_mixture._compute_precision_cholesky.covariances", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "The covariance matrix of the current components.\nThe shape depends of the covariance_type." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_compute_precision_cholesky/covariance_type", + "name": "covariance_type", + "qname": "sklearn.mixture._gaussian_mixture._compute_precision_cholesky.covariance_type", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'full', 'tied', 'diag', 'spherical'}", + "default_value": "", + "description": "The type of precision matrices." + }, + "type": { + "kind": "EnumType", + "values": ["full", "spherical", "diag", "tied"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the Cholesky decomposition of the precisions.", + "docstring": "Compute the Cholesky decomposition of the precisions.\n\nParameters\n----------\ncovariances : array-like\n The covariance matrix of the current components.\n The shape depends of the covariance_type.\n\ncovariance_type : {'full', 'tied', 'diag', 'spherical'}\n The type of precision matrices.\n\nReturns\n-------\nprecisions_cholesky : array-like\n The cholesky decomposition of sample precisions of the current\n components. The shape depends of the covariance_type.", + "code": "def _compute_precision_cholesky(covariances, covariance_type):\n \"\"\"Compute the Cholesky decomposition of the precisions.\n\n Parameters\n ----------\n covariances : array-like\n The covariance matrix of the current components.\n The shape depends of the covariance_type.\n\n covariance_type : {'full', 'tied', 'diag', 'spherical'}\n The type of precision matrices.\n\n Returns\n -------\n precisions_cholesky : array-like\n The cholesky decomposition of sample precisions of the current\n components. The shape depends of the covariance_type.\n \"\"\"\n estimate_precision_error_message = (\n \"Fitting the mixture model failed because some components have \"\n \"ill-defined empirical covariance (for instance caused by singleton \"\n \"or collapsed samples). Try to decrease the number of components, \"\n \"or increase reg_covar.\")\n\n if covariance_type == 'full':\n n_components, n_features, _ = covariances.shape\n precisions_chol = np.empty((n_components, n_features, n_features))\n for k, covariance in enumerate(covariances):\n try:\n cov_chol = linalg.cholesky(covariance, lower=True)\n except linalg.LinAlgError:\n raise ValueError(estimate_precision_error_message)\n precisions_chol[k] = linalg.solve_triangular(cov_chol,\n np.eye(n_features),\n lower=True).T\n elif covariance_type == 'tied':\n _, n_features = covariances.shape\n try:\n cov_chol = linalg.cholesky(covariances, lower=True)\n except linalg.LinAlgError:\n raise ValueError(estimate_precision_error_message)\n precisions_chol = linalg.solve_triangular(cov_chol, np.eye(n_features),\n lower=True).T\n else:\n if np.any(np.less_equal(covariances, 0.0)):\n raise ValueError(estimate_precision_error_message)\n precisions_chol = 1. / np.sqrt(covariances)\n return precisions_chol" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_diag", + "name": "_estimate_gaussian_covariances_diag", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_covariances_diag", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_diag/resp", + "name": "resp", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_covariances_diag.resp", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_diag/X", + "name": "X", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_covariances_diag.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_diag/nk", + "name": "nk", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_covariances_diag.nk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components,)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components,)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_diag/means", + "name": "means", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_covariances_diag.means", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_diag/reg_covar", + "name": "reg_covar", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_covariances_diag.reg_covar", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate the diagonal covariance vectors.", + "docstring": "Estimate the diagonal covariance vectors.\n\nParameters\n----------\nresponsibilities : array-like of shape (n_samples, n_components)\n\nX : array-like of shape (n_samples, n_features)\n\nnk : array-like of shape (n_components,)\n\nmeans : array-like of shape (n_components, n_features)\n\nreg_covar : float\n\nReturns\n-------\ncovariances : array, shape (n_components, n_features)\n The covariance vector of the current components.", + "code": "def _estimate_gaussian_covariances_diag(resp, X, nk, means, reg_covar):\n \"\"\"Estimate the diagonal covariance vectors.\n\n Parameters\n ----------\n responsibilities : array-like of shape (n_samples, n_components)\n\n X : array-like of shape (n_samples, n_features)\n\n nk : array-like of shape (n_components,)\n\n means : array-like of shape (n_components, n_features)\n\n reg_covar : float\n\n Returns\n -------\n covariances : array, shape (n_components, n_features)\n The covariance vector of the current components.\n \"\"\"\n avg_X2 = np.dot(resp.T, X * X) / nk[:, np.newaxis]\n avg_means2 = means ** 2\n avg_X_means = means * np.dot(resp.T, X) / nk[:, np.newaxis]\n return avg_X2 - 2 * avg_X_means + avg_means2 + reg_covar" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_full", + "name": "_estimate_gaussian_covariances_full", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_covariances_full", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_full/resp", + "name": "resp", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_covariances_full.resp", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_components)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_components)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_full/X", + "name": "X", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_covariances_full.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_full/nk", + "name": "nk", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_covariances_full.nk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components,)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components,)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_full/means", + "name": "means", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_covariances_full.means", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_full/reg_covar", + "name": "reg_covar", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_covariances_full.reg_covar", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate the full covariance matrices.", + "docstring": "Estimate the full covariance matrices.\n\nParameters\n----------\nresp : array-like of shape (n_samples, n_components)\n\nX : array-like of shape (n_samples, n_features)\n\nnk : array-like of shape (n_components,)\n\nmeans : array-like of shape (n_components, n_features)\n\nreg_covar : float\n\nReturns\n-------\ncovariances : array, shape (n_components, n_features, n_features)\n The covariance matrix of the current components.", + "code": "def _estimate_gaussian_covariances_full(resp, X, nk, means, reg_covar):\n \"\"\"Estimate the full covariance matrices.\n\n Parameters\n ----------\n resp : array-like of shape (n_samples, n_components)\n\n X : array-like of shape (n_samples, n_features)\n\n nk : array-like of shape (n_components,)\n\n means : array-like of shape (n_components, n_features)\n\n reg_covar : float\n\n Returns\n -------\n covariances : array, shape (n_components, n_features, n_features)\n The covariance matrix of the current components.\n \"\"\"\n n_components, n_features = means.shape\n covariances = np.empty((n_components, n_features, n_features))\n for k in range(n_components):\n diff = X - means[k]\n covariances[k] = np.dot(resp[:, k] * diff.T, diff) / nk[k]\n covariances[k].flat[::n_features + 1] += reg_covar\n return covariances" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_spherical", + "name": "_estimate_gaussian_covariances_spherical", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_covariances_spherical", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_spherical/resp", + "name": "resp", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_covariances_spherical.resp", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_spherical/X", + "name": "X", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_covariances_spherical.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_spherical/nk", + "name": "nk", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_covariances_spherical.nk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components,)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components,)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_spherical/means", + "name": "means", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_covariances_spherical.means", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_spherical/reg_covar", + "name": "reg_covar", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_covariances_spherical.reg_covar", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate the spherical variance values.", + "docstring": "Estimate the spherical variance values.\n\nParameters\n----------\nresponsibilities : array-like of shape (n_samples, n_components)\n\nX : array-like of shape (n_samples, n_features)\n\nnk : array-like of shape (n_components,)\n\nmeans : array-like of shape (n_components, n_features)\n\nreg_covar : float\n\nReturns\n-------\nvariances : array, shape (n_components,)\n The variance values of each components.", + "code": "def _estimate_gaussian_covariances_spherical(resp, X, nk, means, reg_covar):\n \"\"\"Estimate the spherical variance values.\n\n Parameters\n ----------\n responsibilities : array-like of shape (n_samples, n_components)\n\n X : array-like of shape (n_samples, n_features)\n\n nk : array-like of shape (n_components,)\n\n means : array-like of shape (n_components, n_features)\n\n reg_covar : float\n\n Returns\n -------\n variances : array, shape (n_components,)\n The variance values of each components.\n \"\"\"\n return _estimate_gaussian_covariances_diag(resp, X, nk,\n means, reg_covar).mean(1)" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_tied", + "name": "_estimate_gaussian_covariances_tied", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_covariances_tied", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_tied/resp", + "name": "resp", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_covariances_tied.resp", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_components)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_components)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_tied/X", + "name": "X", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_covariances_tied.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_tied/nk", + "name": "nk", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_covariances_tied.nk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components,)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components,)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_tied/means", + "name": "means", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_covariances_tied.means", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_covariances_tied/reg_covar", + "name": "reg_covar", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_covariances_tied.reg_covar", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate the tied covariance matrix.", + "docstring": "Estimate the tied covariance matrix.\n\nParameters\n----------\nresp : array-like of shape (n_samples, n_components)\n\nX : array-like of shape (n_samples, n_features)\n\nnk : array-like of shape (n_components,)\n\nmeans : array-like of shape (n_components, n_features)\n\nreg_covar : float\n\nReturns\n-------\ncovariance : array, shape (n_features, n_features)\n The tied covariance matrix of the components.", + "code": "def _estimate_gaussian_covariances_tied(resp, X, nk, means, reg_covar):\n \"\"\"Estimate the tied covariance matrix.\n\n Parameters\n ----------\n resp : array-like of shape (n_samples, n_components)\n\n X : array-like of shape (n_samples, n_features)\n\n nk : array-like of shape (n_components,)\n\n means : array-like of shape (n_components, n_features)\n\n reg_covar : float\n\n Returns\n -------\n covariance : array, shape (n_features, n_features)\n The tied covariance matrix of the components.\n \"\"\"\n avg_X2 = np.dot(X.T, X)\n avg_means2 = np.dot(nk * means.T, means)\n covariance = avg_X2 - avg_means2\n covariance /= nk.sum()\n covariance.flat[::len(covariance) + 1] += reg_covar\n return covariance" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_parameters", + "name": "_estimate_gaussian_parameters", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_parameters", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_parameters/X", + "name": "X", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_parameters.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The input data array." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_parameters/resp", + "name": "resp", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_parameters.resp", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_components)", + "default_value": "", + "description": "The responsibilities for each data sample in X." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_components)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_parameters/reg_covar", + "name": "reg_covar", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_parameters.reg_covar", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "The regularization added to the diagonal of the covariance matrices." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_gaussian_parameters/covariance_type", + "name": "covariance_type", + "qname": "sklearn.mixture._gaussian_mixture._estimate_gaussian_parameters.covariance_type", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'full', 'tied', 'diag', 'spherical'}", + "default_value": "", + "description": "The type of precision matrices." + }, + "type": { + "kind": "EnumType", + "values": ["full", "spherical", "diag", "tied"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate the Gaussian distribution parameters.", + "docstring": "Estimate the Gaussian distribution parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input data array.\n\nresp : array-like of shape (n_samples, n_components)\n The responsibilities for each data sample in X.\n\nreg_covar : float\n The regularization added to the diagonal of the covariance matrices.\n\ncovariance_type : {'full', 'tied', 'diag', 'spherical'}\n The type of precision matrices.\n\nReturns\n-------\nnk : array-like of shape (n_components,)\n The numbers of data samples in the current components.\n\nmeans : array-like of shape (n_components, n_features)\n The centers of the current components.\n\ncovariances : array-like\n The covariance matrix of the current components.\n The shape depends of the covariance_type.", + "code": "def _estimate_gaussian_parameters(X, resp, reg_covar, covariance_type):\n \"\"\"Estimate the Gaussian distribution parameters.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input data array.\n\n resp : array-like of shape (n_samples, n_components)\n The responsibilities for each data sample in X.\n\n reg_covar : float\n The regularization added to the diagonal of the covariance matrices.\n\n covariance_type : {'full', 'tied', 'diag', 'spherical'}\n The type of precision matrices.\n\n Returns\n -------\n nk : array-like of shape (n_components,)\n The numbers of data samples in the current components.\n\n means : array-like of shape (n_components, n_features)\n The centers of the current components.\n\n covariances : array-like\n The covariance matrix of the current components.\n The shape depends of the covariance_type.\n \"\"\"\n nk = resp.sum(axis=0) + 10 * np.finfo(resp.dtype).eps\n means = np.dot(resp.T, X) / nk[:, np.newaxis]\n covariances = {\"full\": _estimate_gaussian_covariances_full,\n \"tied\": _estimate_gaussian_covariances_tied,\n \"diag\": _estimate_gaussian_covariances_diag,\n \"spherical\": _estimate_gaussian_covariances_spherical\n }[covariance_type](resp, X, nk, means, reg_covar)\n return nk, means, covariances" + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_log_gaussian_prob", + "name": "_estimate_log_gaussian_prob", + "qname": "sklearn.mixture._gaussian_mixture._estimate_log_gaussian_prob", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_log_gaussian_prob/X", + "name": "X", + "qname": "sklearn.mixture._gaussian_mixture._estimate_log_gaussian_prob.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_log_gaussian_prob/means", + "name": "means", + "qname": "sklearn.mixture._gaussian_mixture._estimate_log_gaussian_prob.means", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_components, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_components, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_log_gaussian_prob/precisions_chol", + "name": "precisions_chol", + "qname": "sklearn.mixture._gaussian_mixture._estimate_log_gaussian_prob.precisions_chol", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "Cholesky decompositions of the precision matrices.\n'full' : shape of (n_components, n_features, n_features)\n'tied' : shape of (n_features, n_features)\n'diag' : shape of (n_components, n_features)\n'spherical' : shape of (n_components,)" + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.mixture._gaussian_mixture/_estimate_log_gaussian_prob/covariance_type", + "name": "covariance_type", + "qname": "sklearn.mixture._gaussian_mixture._estimate_log_gaussian_prob.covariance_type", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'full', 'tied', 'diag', 'spherical'}", + "default_value": "", + "description": "" + }, + "type": { + "kind": "EnumType", + "values": ["full", "spherical", "diag", "tied"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate the log Gaussian probability.", + "docstring": "Estimate the log Gaussian probability.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nmeans : array-like of shape (n_components, n_features)\n\nprecisions_chol : array-like\n Cholesky decompositions of the precision matrices.\n 'full' : shape of (n_components, n_features, n_features)\n 'tied' : shape of (n_features, n_features)\n 'diag' : shape of (n_components, n_features)\n 'spherical' : shape of (n_components,)\n\ncovariance_type : {'full', 'tied', 'diag', 'spherical'}\n\nReturns\n-------\nlog_prob : array, shape (n_samples, n_components)", + "code": "def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type):\n \"\"\"Estimate the log Gaussian probability.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n means : array-like of shape (n_components, n_features)\n\n precisions_chol : array-like\n Cholesky decompositions of the precision matrices.\n 'full' : shape of (n_components, n_features, n_features)\n 'tied' : shape of (n_features, n_features)\n 'diag' : shape of (n_components, n_features)\n 'spherical' : shape of (n_components,)\n\n covariance_type : {'full', 'tied', 'diag', 'spherical'}\n\n Returns\n -------\n log_prob : array, shape (n_samples, n_components)\n \"\"\"\n n_samples, n_features = X.shape\n n_components, _ = means.shape\n # det(precision_chol) is half of det(precision)\n log_det = _compute_log_det_cholesky(\n precisions_chol, covariance_type, n_features)\n\n if covariance_type == 'full':\n log_prob = np.empty((n_samples, n_components))\n for k, (mu, prec_chol) in enumerate(zip(means, precisions_chol)):\n y = np.dot(X, prec_chol) - np.dot(mu, prec_chol)\n log_prob[:, k] = np.sum(np.square(y), axis=1)\n\n elif covariance_type == 'tied':\n log_prob = np.empty((n_samples, n_components))\n for k, mu in enumerate(means):\n y = np.dot(X, precisions_chol) - np.dot(mu, precisions_chol)\n log_prob[:, k] = np.sum(np.square(y), axis=1)\n\n elif covariance_type == 'diag':\n precisions = precisions_chol ** 2\n log_prob = (np.sum((means ** 2 * precisions), 1) -\n 2. * np.dot(X, (means * precisions).T) +\n np.dot(X ** 2, precisions.T))\n\n elif covariance_type == 'spherical':\n precisions = precisions_chol ** 2\n log_prob = (np.sum(means ** 2, 1) * precisions -\n 2 * np.dot(X, means.T * precisions) +\n np.outer(row_norms(X, squared=True), precisions))\n return -.5 * (n_features * np.log(2 * np.pi) + log_prob) + log_det" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._search.BaseSearchCV.__init__", + "decorators": ["abstractmethod", "_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._search.BaseSearchCV.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/__init__/estimator", + "name": "estimator", + "qname": "sklearn.model_selection._search.BaseSearchCV.__init__.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/__init__/scoring", + "name": "scoring", + "qname": "sklearn.model_selection._search.BaseSearchCV.__init__.scoring", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.model_selection._search.BaseSearchCV.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/__init__/refit", + "name": "refit", + "qname": "sklearn.model_selection._search.BaseSearchCV.__init__.refit", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/__init__/cv", + "name": "cv", + "qname": "sklearn.model_selection._search.BaseSearchCV.__init__.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/__init__/verbose", + "name": "verbose", + "qname": "sklearn.model_selection._search.BaseSearchCV.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/__init__/pre_dispatch", + "name": "pre_dispatch", + "qname": "sklearn.model_selection._search.BaseSearchCV.__init__.pre_dispatch", + "default_value": "'2*n_jobs'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/__init__/error_score", + "name": "error_score", + "qname": "sklearn.model_selection._search.BaseSearchCV.__init__.error_score", + "default_value": "np.nan", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/__init__/return_train_score", + "name": "return_train_score", + "qname": "sklearn.model_selection._search.BaseSearchCV.__init__.return_train_score", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Abstract base class for hyper parameter search with cross-validation.", + "docstring": "", + "code": " @abstractmethod\n @_deprecate_positional_args\n def __init__(self, estimator, *, scoring=None, n_jobs=None,\n refit=True, cv=None, verbose=0,\n pre_dispatch='2*n_jobs', error_score=np.nan,\n return_train_score=True):\n\n self.scoring = scoring\n self.estimator = estimator\n self.n_jobs = n_jobs\n self.refit = refit\n self.cv = cv\n self.verbose = verbose\n self.pre_dispatch = pre_dispatch\n self.error_score = error_score\n self.return_train_score = return_train_score" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_check_is_fitted", + "name": "_check_is_fitted", + "qname": "sklearn.model_selection._search.BaseSearchCV._check_is_fitted", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_check_is_fitted/self", + "name": "self", + "qname": "sklearn.model_selection._search.BaseSearchCV._check_is_fitted.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_check_is_fitted/method_name", + "name": "method_name", + "qname": "sklearn.model_selection._search.BaseSearchCV._check_is_fitted.method_name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_is_fitted(self, method_name):\n if not self.refit:\n raise NotFittedError('This %s instance was initialized '\n 'with refit=False. %s is '\n 'available only after refitting on the best '\n 'parameters. You can refit an estimator '\n 'manually using the ``best_params_`` '\n 'attribute'\n % (type(self).__name__, method_name))\n else:\n check_is_fitted(self)" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_check_refit_for_multimetric", + "name": "_check_refit_for_multimetric", + "qname": "sklearn.model_selection._search.BaseSearchCV._check_refit_for_multimetric", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_check_refit_for_multimetric/self", + "name": "self", + "qname": "sklearn.model_selection._search.BaseSearchCV._check_refit_for_multimetric.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_check_refit_for_multimetric/scores", + "name": "scores", + "qname": "sklearn.model_selection._search.BaseSearchCV._check_refit_for_multimetric.scores", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check `refit` is compatible with `scores` is valid", + "docstring": "Check `refit` is compatible with `scores` is valid", + "code": " def _check_refit_for_multimetric(self, scores):\n \"\"\"Check `refit` is compatible with `scores` is valid\"\"\"\n multimetric_refit_msg = (\n \"For multi-metric scoring, the parameter refit must be set to a \"\n \"scorer key or a callable to refit an estimator with the best \"\n \"parameter setting on the whole data and make the best_* \"\n \"attributes available for that metric. If this is not needed, \"\n f\"refit should be set to False explicitly. {self.refit!r} was \"\n \"passed.\")\n\n valid_refit_dict = (isinstance(self.refit, str) and\n self.refit in scores)\n\n if (self.refit is not False and not valid_refit_dict\n and not callable(self.refit)):\n raise ValueError(multimetric_refit_msg)" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_estimator_type@getter", + "name": "_estimator_type", + "qname": "sklearn.model_selection._search.BaseSearchCV._estimator_type", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_estimator_type/self", + "name": "self", + "qname": "sklearn.model_selection._search.BaseSearchCV._estimator_type.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def _estimator_type(self):\n return self.estimator._estimator_type" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_format_results", + "name": "_format_results", + "qname": "sklearn.model_selection._search.BaseSearchCV._format_results", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_format_results/self", + "name": "self", + "qname": "sklearn.model_selection._search.BaseSearchCV._format_results.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_format_results/candidate_params", + "name": "candidate_params", + "qname": "sklearn.model_selection._search.BaseSearchCV._format_results.candidate_params", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_format_results/n_splits", + "name": "n_splits", + "qname": "sklearn.model_selection._search.BaseSearchCV._format_results.n_splits", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_format_results/out", + "name": "out", + "qname": "sklearn.model_selection._search.BaseSearchCV._format_results.out", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_format_results/more_results", + "name": "more_results", + "qname": "sklearn.model_selection._search.BaseSearchCV._format_results.more_results", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _format_results(self, candidate_params, n_splits, out,\n more_results=None):\n n_candidates = len(candidate_params)\n out = _aggregate_score_dicts(out)\n\n results = dict(more_results or {})\n for key, val in results.items():\n # each value is a list (as per evaluate_candidate's convention)\n # we convert it to an array for consistency with the other keys\n results[key] = np.asarray(val)\n\n def _store(key_name, array, weights=None, splits=False, rank=False):\n \"\"\"A small helper to store the scores/times to the cv_results_\"\"\"\n # When iterated first by splits, then by parameters\n # We want `array` to have `n_candidates` rows and `n_splits` cols.\n array = np.array(array, dtype=np.float64).reshape(n_candidates,\n n_splits)\n if splits:\n for split_idx in range(n_splits):\n # Uses closure to alter the results\n results[\"split%d_%s\"\n % (split_idx, key_name)] = array[:, split_idx]\n\n array_means = np.average(array, axis=1, weights=weights)\n results['mean_%s' % key_name] = array_means\n\n if (key_name.startswith((\"train_\", \"test_\")) and\n np.any(~np.isfinite(array_means))):\n warnings.warn(\n f\"One or more of the {key_name.split('_')[0]} scores \"\n f\"are non-finite: {array_means}\",\n category=UserWarning\n )\n\n # Weighted std is not directly available in numpy\n array_stds = np.sqrt(np.average((array -\n array_means[:, np.newaxis]) ** 2,\n axis=1, weights=weights))\n results['std_%s' % key_name] = array_stds\n\n if rank:\n results[\"rank_%s\" % key_name] = np.asarray(\n rankdata(-array_means, method='min'), dtype=np.int32)\n\n _store('fit_time', out[\"fit_time\"])\n _store('score_time', out[\"score_time\"])\n # Use one MaskedArray and mask all the places where the param is not\n # applicable for that candidate. Use defaultdict as each candidate may\n # not contain all the params\n param_results = defaultdict(partial(MaskedArray,\n np.empty(n_candidates,),\n mask=True,\n dtype=object))\n for cand_idx, params in enumerate(candidate_params):\n for name, value in params.items():\n # An all masked empty array gets created for the key\n # `\"param_%s\" % name` at the first occurrence of `name`.\n # Setting the value at an index also unmasks that index\n param_results[\"param_%s\" % name][cand_idx] = value\n\n results.update(param_results)\n # Store a list of param dicts at the key 'params'\n results['params'] = candidate_params\n\n test_scores_dict = _normalize_score_results(out[\"test_scores\"])\n if self.return_train_score:\n train_scores_dict = _normalize_score_results(out[\"train_scores\"])\n\n for scorer_name in test_scores_dict:\n # Computed the (weighted) mean and std for test scores alone\n _store('test_%s' % scorer_name, test_scores_dict[scorer_name],\n splits=True, rank=True,\n weights=None)\n if self.return_train_score:\n _store('train_%s' % scorer_name,\n train_scores_dict[scorer_name],\n splits=True)\n\n return results" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_more_tags", + "name": "_more_tags", + "qname": "sklearn.model_selection._search.BaseSearchCV._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_more_tags/self", + "name": "self", + "qname": "sklearn.model_selection._search.BaseSearchCV._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n # allows cross-validation to see 'precomputed' metrics\n return {\n 'pairwise': _safe_tags(self.estimator, \"pairwise\"),\n \"_xfail_checks\": {\"check_supervised_y_2d\":\n \"DataConversionWarning not caught\"},\n }" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_pairwise@getter", + "name": "_pairwise", + "qname": "sklearn.model_selection._search.BaseSearchCV._pairwise", + "decorators": [ + "deprecated('Attribute _pairwise was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_pairwise/self", + "name": "self", + "qname": "sklearn.model_selection._search.BaseSearchCV._pairwise.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n # allows cross-validation to see 'precomputed' metrics\n return getattr(self.estimator, '_pairwise', False)" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_run_search", + "name": "_run_search", + "qname": "sklearn.model_selection._search.BaseSearchCV._run_search", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_run_search/self", + "name": "self", + "qname": "sklearn.model_selection._search.BaseSearchCV._run_search.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/_run_search/evaluate_candidates", + "name": "evaluate_candidates", + "qname": "sklearn.model_selection._search.BaseSearchCV._run_search.evaluate_candidates", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "", + "description": "This callback accepts:\n - a list of candidates, where each candidate is a dict of\n parameter settings.\n - an optional `cv` parameter which can be used to e.g.\n evaluate candidates on different dataset splits, or\n evaluate candidates on subsampled data (as done in the\n SucessiveHaling estimators). By default, the original `cv`\n parameter is used, and it is available as a private\n `_checked_cv_orig` attribute.\n - an optional `more_results` dict. Each key will be added to\n the `cv_results_` attribute. Values should be lists of\n length `n_candidates`\n\nIt returns a dict of all results so far, formatted like\n``cv_results_``.\n\nImportant note (relevant whether the default cv is used or not):\nin randomized splitters, and unless the random_state parameter of\ncv was set to an int, calling cv.split() multiple times will\nyield different splits. Since cv.split() is called in\nevaluate_candidates, this means that candidates will be evaluated\non different splits each time evaluate_candidates is called. This\nmight be a methodological issue depending on the search strategy\nthat you're implementing. To prevent randomized splitters from\nbeing used, you may use _split._yields_constant_splits()" + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Repeatedly calls `evaluate_candidates` to conduct a search.\n\nThis method, implemented in sub-classes, makes it possible to\ncustomize the the scheduling of evaluations: GridSearchCV and\nRandomizedSearchCV schedule evaluations for their whole parameter\nsearch space at once but other more sequential approaches are also\npossible: for instance is possible to iteratively schedule evaluations\nfor new regions of the parameter search space based on previously\ncollected evaluation results. This makes it possible to implement\nBayesian optimization or more generally sequential model-based\noptimization by deriving from the BaseSearchCV abstract base class.\nFor example, Successive Halving is implemented by calling\n`evaluate_candidates` multiples times (once per iteration of the SH\nprocess), each time passing a different set of candidates with `X`\nand `y` of increasing sizes.", + "docstring": "Repeatedly calls `evaluate_candidates` to conduct a search.\n\nThis method, implemented in sub-classes, makes it possible to\ncustomize the the scheduling of evaluations: GridSearchCV and\nRandomizedSearchCV schedule evaluations for their whole parameter\nsearch space at once but other more sequential approaches are also\npossible: for instance is possible to iteratively schedule evaluations\nfor new regions of the parameter search space based on previously\ncollected evaluation results. This makes it possible to implement\nBayesian optimization or more generally sequential model-based\noptimization by deriving from the BaseSearchCV abstract base class.\nFor example, Successive Halving is implemented by calling\n`evaluate_candidates` multiples times (once per iteration of the SH\nprocess), each time passing a different set of candidates with `X`\nand `y` of increasing sizes.\n\nParameters\n----------\nevaluate_candidates : callable\n This callback accepts:\n - a list of candidates, where each candidate is a dict of\n parameter settings.\n - an optional `cv` parameter which can be used to e.g.\n evaluate candidates on different dataset splits, or\n evaluate candidates on subsampled data (as done in the\n SucessiveHaling estimators). By default, the original `cv`\n parameter is used, and it is available as a private\n `_checked_cv_orig` attribute.\n - an optional `more_results` dict. Each key will be added to\n the `cv_results_` attribute. Values should be lists of\n length `n_candidates`\n\n It returns a dict of all results so far, formatted like\n ``cv_results_``.\n\n Important note (relevant whether the default cv is used or not):\n in randomized splitters, and unless the random_state parameter of\n cv was set to an int, calling cv.split() multiple times will\n yield different splits. Since cv.split() is called in\n evaluate_candidates, this means that candidates will be evaluated\n on different splits each time evaluate_candidates is called. This\n might be a methodological issue depending on the search strategy\n that you're implementing. To prevent randomized splitters from\n being used, you may use _split._yields_constant_splits()\n\nExamples\n--------\n\n::\n\n def _run_search(self, evaluate_candidates):\n 'Try C=0.1 only if C=1 is better than C=10'\n all_results = evaluate_candidates([{'C': 1}, {'C': 10}])\n score = all_results['mean_test_score']\n if score[0] < score[1]:\n evaluate_candidates([{'C': 0.1}])", + "code": " def _run_search(self, evaluate_candidates):\n \"\"\"Repeatedly calls `evaluate_candidates` to conduct a search.\n\n This method, implemented in sub-classes, makes it possible to\n customize the the scheduling of evaluations: GridSearchCV and\n RandomizedSearchCV schedule evaluations for their whole parameter\n search space at once but other more sequential approaches are also\n possible: for instance is possible to iteratively schedule evaluations\n for new regions of the parameter search space based on previously\n collected evaluation results. This makes it possible to implement\n Bayesian optimization or more generally sequential model-based\n optimization by deriving from the BaseSearchCV abstract base class.\n For example, Successive Halving is implemented by calling\n `evaluate_candidates` multiples times (once per iteration of the SH\n process), each time passing a different set of candidates with `X`\n and `y` of increasing sizes.\n\n Parameters\n ----------\n evaluate_candidates : callable\n This callback accepts:\n - a list of candidates, where each candidate is a dict of\n parameter settings.\n - an optional `cv` parameter which can be used to e.g.\n evaluate candidates on different dataset splits, or\n evaluate candidates on subsampled data (as done in the\n SucessiveHaling estimators). By default, the original `cv`\n parameter is used, and it is available as a private\n `_checked_cv_orig` attribute.\n - an optional `more_results` dict. Each key will be added to\n the `cv_results_` attribute. Values should be lists of\n length `n_candidates`\n\n It returns a dict of all results so far, formatted like\n ``cv_results_``.\n\n Important note (relevant whether the default cv is used or not):\n in randomized splitters, and unless the random_state parameter of\n cv was set to an int, calling cv.split() multiple times will\n yield different splits. Since cv.split() is called in\n evaluate_candidates, this means that candidates will be evaluated\n on different splits each time evaluate_candidates is called. This\n might be a methodological issue depending on the search strategy\n that you're implementing. To prevent randomized splitters from\n being used, you may use _split._yields_constant_splits()\n\n Examples\n --------\n\n ::\n\n def _run_search(self, evaluate_candidates):\n 'Try C=0.1 only if C=1 is better than C=10'\n all_results = evaluate_candidates([{'C': 1}, {'C': 10}])\n score = all_results['mean_test_score']\n if score[0] < score[1]:\n evaluate_candidates([{'C': 0.1}])\n \"\"\"\n raise NotImplementedError(\"_run_search not implemented.\")" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/classes_@getter", + "name": "classes_", + "qname": "sklearn.model_selection._search.BaseSearchCV.classes_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/classes_/self", + "name": "self", + "qname": "sklearn.model_selection._search.BaseSearchCV.classes_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def classes_(self):\n self._check_is_fitted(\"classes_\")\n return self.best_estimator_.classes_" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/decision_function", + "name": "decision_function", + "qname": "sklearn.model_selection._search.BaseSearchCV.decision_function", + "decorators": ["if_delegate_has_method(delegate=('best_estimator_', 'estimator'))"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/decision_function/self", + "name": "self", + "qname": "sklearn.model_selection._search.BaseSearchCV.decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/decision_function/X", + "name": "X", + "qname": "sklearn.model_selection._search.BaseSearchCV.decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "indexable, length n_samples", + "default_value": "", + "description": "Must fulfill the input assumptions of the\nunderlying estimator." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "indexable" + }, + { + "kind": "NamedType", + "name": "length n_samples" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Call decision_function on the estimator with the best found parameters.\n\nOnly available if ``refit=True`` and the underlying estimator supports\n``decision_function``.", + "docstring": "Call decision_function on the estimator with the best found parameters.\n\nOnly available if ``refit=True`` and the underlying estimator supports\n``decision_function``.\n\nParameters\n----------\nX : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator.", + "code": " @if_delegate_has_method(delegate=('best_estimator_', 'estimator'))\n def decision_function(self, X):\n \"\"\"Call decision_function on the estimator with the best found parameters.\n\n Only available if ``refit=True`` and the underlying estimator supports\n ``decision_function``.\n\n Parameters\n ----------\n X : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator.\n\n \"\"\"\n self._check_is_fitted('decision_function')\n return self.best_estimator_.decision_function(X)" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/fit", + "name": "fit", + "qname": "sklearn.model_selection._search.BaseSearchCV.fit", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/fit/self", + "name": "self", + "qname": "sklearn.model_selection._search.BaseSearchCV.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/fit/X", + "name": "X", + "qname": "sklearn.model_selection._search.BaseSearchCV.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/fit/y", + "name": "y", + "qname": "sklearn.model_selection._search.BaseSearchCV.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_output) or (n_samples,)", + "default_value": "None", + "description": "Target relative to X for classification or regression;\nNone for unsupervised learning." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_output) or (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/fit/groups", + "name": "groups", + "qname": "sklearn.model_selection._search.BaseSearchCV.fit.groups", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Group labels for the samples used while splitting the dataset into\ntrain/test set. Only used in conjunction with a \"Group\" :term:`cv`\ninstance (e.g., :class:`~sklearn.model_selection.GroupKFold`)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/fit/fit_params", + "name": "fit_params", + "qname": "sklearn.model_selection._search.BaseSearchCV.fit.fit_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "dict of str -> object", + "default_value": "", + "description": "Parameters passed to the ``fit`` method of the estimator" + }, + "type": { + "kind": "NamedType", + "name": "dict of str -> object" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Run fit with all sets of parameters.", + "docstring": "Run fit with all sets of parameters.\n\nParameters\n----------\n\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples, n_output) or (n_samples,), default=None\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n**fit_params : dict of str -> object\n Parameters passed to the ``fit`` method of the estimator", + "code": " @_deprecate_positional_args\n def fit(self, X, y=None, *, groups=None, **fit_params):\n \"\"\"Run fit with all sets of parameters.\n\n Parameters\n ----------\n\n X : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples, n_output) \\\n or (n_samples,), default=None\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\n groups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n **fit_params : dict of str -> object\n Parameters passed to the ``fit`` method of the estimator\n \"\"\"\n estimator = self.estimator\n refit_metric = \"score\"\n\n if callable(self.scoring):\n scorers = self.scoring\n elif self.scoring is None or isinstance(self.scoring, str):\n scorers = check_scoring(self.estimator, self.scoring)\n else:\n scorers = _check_multimetric_scoring(self.estimator, self.scoring)\n self._check_refit_for_multimetric(scorers)\n refit_metric = self.refit\n\n X, y, groups = indexable(X, y, groups)\n fit_params = _check_fit_params(X, fit_params)\n\n cv_orig = check_cv(self.cv, y, classifier=is_classifier(estimator))\n n_splits = cv_orig.get_n_splits(X, y, groups)\n\n base_estimator = clone(self.estimator)\n\n parallel = Parallel(n_jobs=self.n_jobs,\n pre_dispatch=self.pre_dispatch)\n\n fit_and_score_kwargs = dict(scorer=scorers,\n fit_params=fit_params,\n return_train_score=self.return_train_score,\n return_n_test_samples=True,\n return_times=True,\n return_parameters=False,\n error_score=self.error_score,\n verbose=self.verbose)\n results = {}\n with parallel:\n all_candidate_params = []\n all_out = []\n all_more_results = defaultdict(list)\n\n def evaluate_candidates(candidate_params, cv=None,\n more_results=None):\n cv = cv or cv_orig\n candidate_params = list(candidate_params)\n n_candidates = len(candidate_params)\n\n if self.verbose > 0:\n print(\"Fitting {0} folds for each of {1} candidates,\"\n \" totalling {2} fits\".format(\n n_splits, n_candidates, n_candidates * n_splits))\n\n out = parallel(delayed(_fit_and_score)(clone(base_estimator),\n X, y,\n train=train, test=test,\n parameters=parameters,\n split_progress=(\n split_idx,\n n_splits),\n candidate_progress=(\n cand_idx,\n n_candidates),\n **fit_and_score_kwargs)\n for (cand_idx, parameters),\n (split_idx, (train, test)) in product(\n enumerate(candidate_params),\n enumerate(cv.split(X, y, groups))))\n\n if len(out) < 1:\n raise ValueError('No fits were performed. '\n 'Was the CV iterator empty? '\n 'Were there no candidates?')\n elif len(out) != n_candidates * n_splits:\n raise ValueError('cv.split and cv.get_n_splits returned '\n 'inconsistent results. Expected {} '\n 'splits, got {}'\n .format(n_splits,\n len(out) // n_candidates))\n\n # For callable self.scoring, the return type is only know after\n # calling. If the return type is a dictionary, the error scores\n # can now be inserted with the correct key. The type checking\n # of out will be done in `_insert_error_scores`.\n if callable(self.scoring):\n _insert_error_scores(out, self.error_score)\n all_candidate_params.extend(candidate_params)\n all_out.extend(out)\n if more_results is not None:\n for key, value in more_results.items():\n all_more_results[key].extend(value)\n\n nonlocal results\n results = self._format_results(\n all_candidate_params, n_splits, all_out,\n all_more_results)\n\n return results\n\n self._run_search(evaluate_candidates)\n\n # multimetric is determined here because in the case of a callable\n # self.scoring the return type is only known after calling\n first_test_score = all_out[0]['test_scores']\n self.multimetric_ = isinstance(first_test_score, dict)\n\n # check refit_metric now for a callabe scorer that is multimetric\n if callable(self.scoring) and self.multimetric_:\n self._check_refit_for_multimetric(first_test_score)\n refit_metric = self.refit\n\n # For multi-metric evaluation, store the best_index_, best_params_ and\n # best_score_ iff refit is one of the scorer names\n # In single metric evaluation, refit_metric is \"score\"\n if self.refit or not self.multimetric_:\n # If callable, refit is expected to return the index of the best\n # parameter set.\n if callable(self.refit):\n self.best_index_ = self.refit(results)\n if not isinstance(self.best_index_, numbers.Integral):\n raise TypeError('best_index_ returned is not an integer')\n if (self.best_index_ < 0 or\n self.best_index_ >= len(results[\"params\"])):\n raise IndexError('best_index_ index out of range')\n else:\n self.best_index_ = results[\"rank_test_%s\"\n % refit_metric].argmin()\n self.best_score_ = results[\"mean_test_%s\" % refit_metric][\n self.best_index_]\n self.best_params_ = results[\"params\"][self.best_index_]\n\n if self.refit:\n # we clone again after setting params in case some\n # of the params are estimators as well.\n self.best_estimator_ = clone(clone(base_estimator).set_params(\n **self.best_params_))\n refit_start_time = time.time()\n if y is not None:\n self.best_estimator_.fit(X, y, **fit_params)\n else:\n self.best_estimator_.fit(X, **fit_params)\n refit_end_time = time.time()\n self.refit_time_ = refit_end_time - refit_start_time\n\n # Store the only scorer not as a dict for single metric evaluation\n self.scorer_ = scorers\n\n self.cv_results_ = results\n self.n_splits_ = n_splits\n\n return self" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.model_selection._search.BaseSearchCV.inverse_transform", + "decorators": ["if_delegate_has_method(delegate=('best_estimator_', 'estimator'))"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/inverse_transform/self", + "name": "self", + "qname": "sklearn.model_selection._search.BaseSearchCV.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/inverse_transform/Xt", + "name": "Xt", + "qname": "sklearn.model_selection._search.BaseSearchCV.inverse_transform.Xt", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "indexable, length n_samples", + "default_value": "", + "description": "Must fulfill the input assumptions of the\nunderlying estimator." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "indexable" + }, + { + "kind": "NamedType", + "name": "length n_samples" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Call inverse_transform on the estimator with the best found params.\n\nOnly available if the underlying estimator implements\n``inverse_transform`` and ``refit=True``.", + "docstring": "Call inverse_transform on the estimator with the best found params.\n\nOnly available if the underlying estimator implements\n``inverse_transform`` and ``refit=True``.\n\nParameters\n----------\nXt : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator.", + "code": " @if_delegate_has_method(delegate=('best_estimator_', 'estimator'))\n def inverse_transform(self, Xt):\n \"\"\"Call inverse_transform on the estimator with the best found params.\n\n Only available if the underlying estimator implements\n ``inverse_transform`` and ``refit=True``.\n\n Parameters\n ----------\n Xt : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator.\n\n \"\"\"\n self._check_is_fitted('inverse_transform')\n return self.best_estimator_.inverse_transform(Xt)" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/n_features_in_@getter", + "name": "n_features_in_", + "qname": "sklearn.model_selection._search.BaseSearchCV.n_features_in_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/n_features_in_/self", + "name": "self", + "qname": "sklearn.model_selection._search.BaseSearchCV.n_features_in_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def n_features_in_(self):\n # For consistency with other estimators we raise a AttributeError so\n # that hasattr() fails if the search estimator isn't fitted.\n try:\n check_is_fitted(self)\n except NotFittedError as nfe:\n raise AttributeError(\n \"{} object has no n_features_in_ attribute.\"\n .format(self.__class__.__name__)\n ) from nfe\n\n return self.best_estimator_.n_features_in_" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/predict", + "name": "predict", + "qname": "sklearn.model_selection._search.BaseSearchCV.predict", + "decorators": ["if_delegate_has_method(delegate=('best_estimator_', 'estimator'))"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/predict/self", + "name": "self", + "qname": "sklearn.model_selection._search.BaseSearchCV.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/predict/X", + "name": "X", + "qname": "sklearn.model_selection._search.BaseSearchCV.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "indexable, length n_samples", + "default_value": "", + "description": "Must fulfill the input assumptions of the\nunderlying estimator." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "indexable" + }, + { + "kind": "NamedType", + "name": "length n_samples" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Call predict on the estimator with the best found parameters.\n\nOnly available if ``refit=True`` and the underlying estimator supports\n``predict``.", + "docstring": "Call predict on the estimator with the best found parameters.\n\nOnly available if ``refit=True`` and the underlying estimator supports\n``predict``.\n\nParameters\n----------\nX : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator.", + "code": " @if_delegate_has_method(delegate=('best_estimator_', 'estimator'))\n def predict(self, X):\n \"\"\"Call predict on the estimator with the best found parameters.\n\n Only available if ``refit=True`` and the underlying estimator supports\n ``predict``.\n\n Parameters\n ----------\n X : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator.\n\n \"\"\"\n self._check_is_fitted('predict')\n return self.best_estimator_.predict(X)" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/predict_log_proba", + "name": "predict_log_proba", + "qname": "sklearn.model_selection._search.BaseSearchCV.predict_log_proba", + "decorators": ["if_delegate_has_method(delegate=('best_estimator_', 'estimator'))"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/predict_log_proba/self", + "name": "self", + "qname": "sklearn.model_selection._search.BaseSearchCV.predict_log_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/predict_log_proba/X", + "name": "X", + "qname": "sklearn.model_selection._search.BaseSearchCV.predict_log_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "indexable, length n_samples", + "default_value": "", + "description": "Must fulfill the input assumptions of the\nunderlying estimator." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "indexable" + }, + { + "kind": "NamedType", + "name": "length n_samples" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Call predict_log_proba on the estimator with the best found parameters.\n\nOnly available if ``refit=True`` and the underlying estimator supports\n``predict_log_proba``.", + "docstring": "Call predict_log_proba on the estimator with the best found parameters.\n\nOnly available if ``refit=True`` and the underlying estimator supports\n``predict_log_proba``.\n\nParameters\n----------\nX : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator.", + "code": " @if_delegate_has_method(delegate=('best_estimator_', 'estimator'))\n def predict_log_proba(self, X):\n \"\"\"Call predict_log_proba on the estimator with the best found parameters.\n\n Only available if ``refit=True`` and the underlying estimator supports\n ``predict_log_proba``.\n\n Parameters\n ----------\n X : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator.\n\n \"\"\"\n self._check_is_fitted('predict_log_proba')\n return self.best_estimator_.predict_log_proba(X)" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/predict_proba", + "name": "predict_proba", + "qname": "sklearn.model_selection._search.BaseSearchCV.predict_proba", + "decorators": ["if_delegate_has_method(delegate=('best_estimator_', 'estimator'))"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/predict_proba/self", + "name": "self", + "qname": "sklearn.model_selection._search.BaseSearchCV.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/predict_proba/X", + "name": "X", + "qname": "sklearn.model_selection._search.BaseSearchCV.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "indexable, length n_samples", + "default_value": "", + "description": "Must fulfill the input assumptions of the\nunderlying estimator." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "indexable" + }, + { + "kind": "NamedType", + "name": "length n_samples" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Call predict_proba on the estimator with the best found parameters.\n\nOnly available if ``refit=True`` and the underlying estimator supports\n``predict_proba``.", + "docstring": "Call predict_proba on the estimator with the best found parameters.\n\nOnly available if ``refit=True`` and the underlying estimator supports\n``predict_proba``.\n\nParameters\n----------\nX : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator.", + "code": " @if_delegate_has_method(delegate=('best_estimator_', 'estimator'))\n def predict_proba(self, X):\n \"\"\"Call predict_proba on the estimator with the best found parameters.\n\n Only available if ``refit=True`` and the underlying estimator supports\n ``predict_proba``.\n\n Parameters\n ----------\n X : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator.\n\n \"\"\"\n self._check_is_fitted('predict_proba')\n return self.best_estimator_.predict_proba(X)" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/score", + "name": "score", + "qname": "sklearn.model_selection._search.BaseSearchCV.score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/score/self", + "name": "self", + "qname": "sklearn.model_selection._search.BaseSearchCV.score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/score/X", + "name": "X", + "qname": "sklearn.model_selection._search.BaseSearchCV.score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/score/y", + "name": "y", + "qname": "sklearn.model_selection._search.BaseSearchCV.score.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_output) or (n_samples,)", + "default_value": "None", + "description": "Target relative to X for classification or regression;\nNone for unsupervised learning." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_output) or (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns the score on the given data, if the estimator has been refit.\n\nThis uses the score defined by ``scoring`` where provided, and the\n``best_estimator_.score`` method otherwise.", + "docstring": "Returns the score on the given data, if the estimator has been refit.\n\nThis uses the score defined by ``scoring`` where provided, and the\n``best_estimator_.score`` method otherwise.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples, n_output) or (n_samples,), default=None\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\nReturns\n-------\nscore : float", + "code": " def score(self, X, y=None):\n \"\"\"Returns the score on the given data, if the estimator has been refit.\n\n This uses the score defined by ``scoring`` where provided, and the\n ``best_estimator_.score`` method otherwise.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Input data, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples, n_output) \\\n or (n_samples,), default=None\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\n Returns\n -------\n score : float\n \"\"\"\n self._check_is_fitted('score')\n if self.scorer_ is None:\n raise ValueError(\"No score function explicitly defined, \"\n \"and the estimator doesn't provide one %s\"\n % self.best_estimator_)\n if isinstance(self.scorer_, dict):\n if self.multimetric_:\n scorer = self.scorer_[self.refit]\n else:\n scorer = self.scorer_\n return scorer(self.best_estimator_, X, y)\n\n # callable\n score = self.scorer_(self.best_estimator_, X, y)\n if self.multimetric_:\n score = score[self.refit]\n return score" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/score_samples", + "name": "score_samples", + "qname": "sklearn.model_selection._search.BaseSearchCV.score_samples", + "decorators": ["if_delegate_has_method(delegate=('best_estimator_', 'estimator'))"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/score_samples/self", + "name": "self", + "qname": "sklearn.model_selection._search.BaseSearchCV.score_samples.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/score_samples/X", + "name": "X", + "qname": "sklearn.model_selection._search.BaseSearchCV.score_samples.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "iterable", + "default_value": "", + "description": "Data to predict on. Must fulfill input requirements\nof the underlying estimator." + }, + "type": { + "kind": "NamedType", + "name": "iterable" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Call score_samples on the estimator with the best found parameters.\n\nOnly available if ``refit=True`` and the underlying estimator supports\n``score_samples``.\n\n.. versionadded:: 0.24", + "docstring": "Call score_samples on the estimator with the best found parameters.\n\nOnly available if ``refit=True`` and the underlying estimator supports\n``score_samples``.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements\n of the underlying estimator.\n\nReturns\n-------\ny_score : ndarray of shape (n_samples,)", + "code": " @if_delegate_has_method(delegate=('best_estimator_', 'estimator'))\n def score_samples(self, X):\n \"\"\"Call score_samples on the estimator with the best found parameters.\n\n Only available if ``refit=True`` and the underlying estimator supports\n ``score_samples``.\n\n .. versionadded:: 0.24\n\n Parameters\n ----------\n X : iterable\n Data to predict on. Must fulfill input requirements\n of the underlying estimator.\n\n Returns\n -------\n y_score : ndarray of shape (n_samples,)\n \"\"\"\n self._check_is_fitted('score_samples')\n return self.best_estimator_.score_samples(X)" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/transform", + "name": "transform", + "qname": "sklearn.model_selection._search.BaseSearchCV.transform", + "decorators": ["if_delegate_has_method(delegate=('best_estimator_', 'estimator'))"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/transform/self", + "name": "self", + "qname": "sklearn.model_selection._search.BaseSearchCV.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/BaseSearchCV/transform/X", + "name": "X", + "qname": "sklearn.model_selection._search.BaseSearchCV.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "indexable, length n_samples", + "default_value": "", + "description": "Must fulfill the input assumptions of the\nunderlying estimator." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "indexable" + }, + { + "kind": "NamedType", + "name": "length n_samples" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Call transform on the estimator with the best found parameters.\n\nOnly available if the underlying estimator supports ``transform`` and\n``refit=True``.", + "docstring": "Call transform on the estimator with the best found parameters.\n\nOnly available if the underlying estimator supports ``transform`` and\n``refit=True``.\n\nParameters\n----------\nX : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator.", + "code": " @if_delegate_has_method(delegate=('best_estimator_', 'estimator'))\n def transform(self, X):\n \"\"\"Call transform on the estimator with the best found parameters.\n\n Only available if the underlying estimator supports ``transform`` and\n ``refit=True``.\n\n Parameters\n ----------\n X : indexable, length n_samples\n Must fulfill the input assumptions of the\n underlying estimator.\n\n \"\"\"\n self._check_is_fitted('transform')\n return self.best_estimator_.transform(X)" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/GridSearchCV/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._search.GridSearchCV.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/GridSearchCV/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._search.GridSearchCV.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/GridSearchCV/__init__/estimator", + "name": "estimator", + "qname": "sklearn.model_selection._search.GridSearchCV.__init__.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator object.", + "default_value": "", + "description": "This is assumed to implement the scikit-learn estimator interface.\nEither estimator needs to provide a ``score`` function,\nor ``scoring`` must be passed." + }, + "type": { + "kind": "NamedType", + "name": "estimator object." + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/GridSearchCV/__init__/param_grid", + "name": "param_grid", + "qname": "sklearn.model_selection._search.GridSearchCV.__init__.param_grid", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict or list of dictionaries", + "default_value": "", + "description": "Dictionary with parameters names (`str`) as keys and lists of\nparameter settings to try as values, or a list of such\ndictionaries, in which case the grids spanned by each dictionary\nin the list are explored. This enables searching over any sequence\nof parameter settings." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "dict" + }, + { + "kind": "NamedType", + "name": "list of dictionaries" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/GridSearchCV/__init__/scoring", + "name": "scoring", + "qname": "sklearn.model_selection._search.GridSearchCV.__init__.scoring", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str, callable, list, tuple or dict", + "default_value": "None", + "description": "Strategy to evaluate the performance of the cross-validated model on\nthe test set.\n\nIf `scoring` represents a single score, one can use:\n\n- a single string (see :ref:`scoring_parameter`);\n- a callable (see :ref:`scoring`) that returns a single value.\n\nIf `scoring` represents multiple scores, one can use:\n\n- a list or tuple of unique strings;\n- a callable returning a dictionary where the keys are the metric\n names and the values are the metric scores;\n- a dictionary with metric names as keys and callables a values.\n\nSee :ref:`multimetric_grid_search` for an example." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + }, + { + "kind": "NamedType", + "name": "list" + }, + { + "kind": "NamedType", + "name": "tuple" + }, + { + "kind": "NamedType", + "name": "dict" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/GridSearchCV/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.model_selection._search.GridSearchCV.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of jobs to run in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details.\n\n.. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/GridSearchCV/__init__/refit", + "name": "refit", + "qname": "sklearn.model_selection._search.GridSearchCV.__init__.refit", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool, str, or callable", + "default_value": "True", + "description": "Refit an estimator using the best found parameters on the whole\ndataset.\n\nFor multiple metric evaluation, this needs to be a `str` denoting the\nscorer that would be used to find the best parameters for refitting\nthe estimator at the end.\n\nWhere there are considerations other than maximum score in\nchoosing a best estimator, ``refit`` can be set to a function which\nreturns the selected ``best_index_`` given ``cv_results_``. In that\ncase, the ``best_estimator_`` and ``best_params_`` will be set\naccording to the returned ``best_index_`` while the ``best_score_``\nattribute will not be available.\n\nThe refitted estimator is made available at the ``best_estimator_``\nattribute and permits using ``predict`` directly on this\n``GridSearchCV`` instance.\n\nAlso for multiple metric evaluation, the attributes ``best_index_``,\n``best_score_`` and ``best_params_`` will only be available if\n``refit`` is set and all of them will be determined w.r.t this specific\nscorer.\n\nSee ``scoring`` parameter to know more about multiple metric\nevaluation.\n\n.. versionchanged:: 0.20\n Support for callable added." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/GridSearchCV/__init__/cv", + "name": "cv", + "qname": "sklearn.model_selection._search.GridSearchCV.__init__.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, cross-validation generator or an iterable", + "default_value": "None", + "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross validation,\n- integer, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide ` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "an iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/GridSearchCV/__init__/verbose", + "name": "verbose", + "qname": "sklearn.model_selection._search.GridSearchCV.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Controls the verbosity: the higher, the more messages.\n\n- >1 : the computation time for each fold and parameter candidate is\n displayed;\n- >2 : the score is also displayed;\n- >3 : the fold and candidate parameter indexes are also displayed\n together with the starting time of the computation." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/GridSearchCV/__init__/pre_dispatch", + "name": "pre_dispatch", + "qname": "sklearn.model_selection._search.GridSearchCV.__init__.pre_dispatch", + "default_value": "'2*n_jobs'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, or str", + "default_value": "n_jobs", + "description": "Controls the number of jobs that get dispatched during parallel\nexecution. Reducing this number can be useful to avoid an\nexplosion of memory consumption when more jobs get dispatched\nthan CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "str" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/GridSearchCV/__init__/error_score", + "name": "error_score", + "qname": "sklearn.model_selection._search.GridSearchCV.__init__.error_score", + "default_value": "np.nan", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'raise' or numeric", + "default_value": "np.nan", + "description": "Value to assign to the score if an error occurs in estimator fitting.\nIf set to 'raise', the error is raised. If a numeric value is given,\nFitFailedWarning is raised. This parameter does not affect the refit\nstep, which will always raise the error." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'raise'" + }, + { + "kind": "NamedType", + "name": "numeric" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/GridSearchCV/__init__/return_train_score", + "name": "return_train_score", + "qname": "sklearn.model_selection._search.GridSearchCV.__init__.return_train_score", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If ``False``, the ``cv_results_`` attribute will not include training\nscores.\nComputing training scores is used to get insights on how different\nparameter settings impact the overfitting/underfitting trade-off.\nHowever computing the scores on the training set can be computationally\nexpensive and is not strictly required to select the parameters that\nyield the best generalization performance.\n\n.. versionadded:: 0.19\n\n.. versionchanged:: 0.21\n Default value was changed from ``True`` to ``False``" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Exhaustive search over specified parameter values for an estimator.\n\nImportant members are fit, predict.\n\nGridSearchCV implements a \"fit\" and a \"score\" method.\nIt also implements \"score_samples\", \"predict\", \"predict_proba\",\n\"decision_function\", \"transform\" and \"inverse_transform\" if they are\nimplemented in the estimator used.\n\nThe parameters of the estimator used to apply these methods are optimized\nby cross-validated grid-search over a parameter grid.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, estimator, param_grid, *, scoring=None,\n n_jobs=None, refit=True, cv=None,\n verbose=0, pre_dispatch='2*n_jobs',\n error_score=np.nan, return_train_score=False):\n super().__init__(\n estimator=estimator, scoring=scoring,\n n_jobs=n_jobs, refit=refit, cv=cv, verbose=verbose,\n pre_dispatch=pre_dispatch, error_score=error_score,\n return_train_score=return_train_score)\n self.param_grid = param_grid\n _check_param_grid(param_grid)" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/GridSearchCV/_run_search", + "name": "_run_search", + "qname": "sklearn.model_selection._search.GridSearchCV._run_search", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/GridSearchCV/_run_search/self", + "name": "self", + "qname": "sklearn.model_selection._search.GridSearchCV._run_search.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/GridSearchCV/_run_search/evaluate_candidates", + "name": "evaluate_candidates", + "qname": "sklearn.model_selection._search.GridSearchCV._run_search.evaluate_candidates", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Search all candidates in param_grid", + "docstring": "Search all candidates in param_grid", + "code": " def _run_search(self, evaluate_candidates):\n \"\"\"Search all candidates in param_grid\"\"\"\n evaluate_candidates(ParameterGrid(self.param_grid))" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/ParameterGrid/__getitem__", + "name": "__getitem__", + "qname": "sklearn.model_selection._search.ParameterGrid.__getitem__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/ParameterGrid/__getitem__/self", + "name": "self", + "qname": "sklearn.model_selection._search.ParameterGrid.__getitem__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/ParameterGrid/__getitem__/ind", + "name": "ind", + "qname": "sklearn.model_selection._search.ParameterGrid.__getitem__.ind", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The iteration index" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Get the parameters that would be ``ind``th in iteration", + "docstring": "Get the parameters that would be ``ind``th in iteration\n\nParameters\n----------\nind : int\n The iteration index\n\nReturns\n-------\nparams : dict of str to any\n Equal to list(self)[ind]", + "code": " def __getitem__(self, ind):\n \"\"\"Get the parameters that would be ``ind``th in iteration\n\n Parameters\n ----------\n ind : int\n The iteration index\n\n Returns\n -------\n params : dict of str to any\n Equal to list(self)[ind]\n \"\"\"\n # This is used to make discrete sampling without replacement memory\n # efficient.\n for sub_grid in self.param_grid:\n # XXX: could memoize information used here\n if not sub_grid:\n if ind == 0:\n return {}\n else:\n ind -= 1\n continue\n\n # Reverse so most frequent cycling parameter comes first\n keys, values_lists = zip(*sorted(sub_grid.items())[::-1])\n sizes = [len(v_list) for v_list in values_lists]\n total = np.product(sizes)\n\n if ind >= total:\n # Try the next grid\n ind -= total\n else:\n out = {}\n for key, v_list, n in zip(keys, values_lists, sizes):\n ind, offset = divmod(ind, n)\n out[key] = v_list[offset]\n return out\n\n raise IndexError('ParameterGrid index out of range')" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/ParameterGrid/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._search.ParameterGrid.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/ParameterGrid/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._search.ParameterGrid.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/ParameterGrid/__init__/param_grid", + "name": "param_grid", + "qname": "sklearn.model_selection._search.ParameterGrid.__init__.param_grid", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict of str to sequence, or sequence of such", + "default_value": "", + "description": "The parameter grid to explore, as a dictionary mapping estimator\nparameters to sequences of allowed values.\n\nAn empty dict signifies default parameters.\n\nA sequence of dicts signifies a sequence of grids to search, and is\nuseful to avoid exploring parameter combinations that make no sense\nor have no effect. See the examples below." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "dict of str to sequence" + }, + { + "kind": "NamedType", + "name": "sequence of such" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Grid of parameters with a discrete number of values for each.\n\nCan be used to iterate over parameter value combinations with the\nPython built-in function iter.\nThe order of the generated parameter combinations is deterministic.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " def __init__(self, param_grid):\n if not isinstance(param_grid, (Mapping, Iterable)):\n raise TypeError('Parameter grid is not a dict or '\n 'a list ({!r})'.format(param_grid))\n\n if isinstance(param_grid, Mapping):\n # wrap dictionary in a singleton list to support either dict\n # or list of dicts\n param_grid = [param_grid]\n\n # check if all entries are dictionaries of lists\n for grid in param_grid:\n if not isinstance(grid, dict):\n raise TypeError('Parameter grid is not a '\n 'dict ({!r})'.format(grid))\n for key in grid:\n if not isinstance(grid[key], Iterable):\n raise TypeError('Parameter grid value is not iterable '\n '(key={!r}, value={!r})'\n .format(key, grid[key]))\n\n self.param_grid = param_grid" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/ParameterGrid/__iter__", + "name": "__iter__", + "qname": "sklearn.model_selection._search.ParameterGrid.__iter__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/ParameterGrid/__iter__/self", + "name": "self", + "qname": "sklearn.model_selection._search.ParameterGrid.__iter__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Iterate over the points in the grid.", + "docstring": "Iterate over the points in the grid.\n\nReturns\n-------\nparams : iterator over dict of str to any\n Yields dictionaries mapping each estimator parameter to one of its\n allowed values.", + "code": " def __iter__(self):\n \"\"\"Iterate over the points in the grid.\n\n Returns\n -------\n params : iterator over dict of str to any\n Yields dictionaries mapping each estimator parameter to one of its\n allowed values.\n \"\"\"\n for p in self.param_grid:\n # Always sort the keys of a dictionary, for reproducibility\n items = sorted(p.items())\n if not items:\n yield {}\n else:\n keys, values = zip(*items)\n for v in product(*values):\n params = dict(zip(keys, v))\n yield params" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/ParameterGrid/__len__", + "name": "__len__", + "qname": "sklearn.model_selection._search.ParameterGrid.__len__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/ParameterGrid/__len__/self", + "name": "self", + "qname": "sklearn.model_selection._search.ParameterGrid.__len__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Number of points on the grid.", + "docstring": "Number of points on the grid.", + "code": " def __len__(self):\n \"\"\"Number of points on the grid.\"\"\"\n # Product function that can handle iterables (np.product can't).\n product = partial(reduce, operator.mul)\n return sum(product(len(v) for v in p.values()) if p else 1\n for p in self.param_grid)" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/ParameterSampler/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._search.ParameterSampler.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/ParameterSampler/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._search.ParameterSampler.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/ParameterSampler/__init__/param_distributions", + "name": "param_distributions", + "qname": "sklearn.model_selection._search.ParameterSampler.__init__.param_distributions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "", + "description": "Dictionary with parameters names (`str`) as keys and distributions\nor lists of parameters to try. Distributions must provide a ``rvs``\nmethod for sampling (such as those from scipy.stats.distributions).\nIf a list is given, it is sampled uniformly.\nIf a list of dicts is given, first a dict is sampled uniformly, and\nthen a parameter is sampled using that dict as above." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/ParameterSampler/__init__/n_iter", + "name": "n_iter", + "qname": "sklearn.model_selection._search.ParameterSampler.__init__.n_iter", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of parameter settings that are produced." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/ParameterSampler/__init__/random_state", + "name": "random_state", + "qname": "sklearn.model_selection._search.ParameterSampler.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Pseudo random number generator state used for random uniform sampling\nfrom lists of possible values instead of scipy.stats distributions.\nPass an int for reproducible output across multiple\nfunction calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generator on parameters sampled from given distributions.\n\nNon-deterministic iterable over random candidate combinations for hyper-\nparameter search. If all parameters are presented as a list,\nsampling without replacement is performed. If at least one parameter\nis given as a distribution, sampling with replacement is used.\nIt is highly recommended to use continuous distributions for continuous\nparameters.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, param_distributions, n_iter, *, random_state=None):\n if not isinstance(param_distributions, (Mapping, Iterable)):\n raise TypeError('Parameter distribution is not a dict or '\n 'a list ({!r})'.format(param_distributions))\n\n if isinstance(param_distributions, Mapping):\n # wrap dictionary in a singleton list to support either dict\n # or list of dicts\n param_distributions = [param_distributions]\n\n for dist in param_distributions:\n if not isinstance(dist, dict):\n raise TypeError('Parameter distribution is not a '\n 'dict ({!r})'.format(dist))\n for key in dist:\n if (not isinstance(dist[key], Iterable)\n and not hasattr(dist[key], 'rvs')):\n raise TypeError('Parameter value is not iterable '\n 'or distribution (key={!r}, value={!r})'\n .format(key, dist[key]))\n self.n_iter = n_iter\n self.random_state = random_state\n self.param_distributions = param_distributions" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/ParameterSampler/__iter__", + "name": "__iter__", + "qname": "sklearn.model_selection._search.ParameterSampler.__iter__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/ParameterSampler/__iter__/self", + "name": "self", + "qname": "sklearn.model_selection._search.ParameterSampler.__iter__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __iter__(self):\n rng = check_random_state(self.random_state)\n\n # if all distributions are given as lists, we want to sample without\n # replacement\n if self._is_all_lists():\n # look up sampled parameter settings in parameter grid\n param_grid = ParameterGrid(self.param_distributions)\n grid_size = len(param_grid)\n n_iter = self.n_iter\n\n if grid_size < n_iter:\n warnings.warn(\n 'The total space of parameters %d is smaller '\n 'than n_iter=%d. Running %d iterations. For exhaustive '\n 'searches, use GridSearchCV.'\n % (grid_size, self.n_iter, grid_size), UserWarning)\n n_iter = grid_size\n for i in sample_without_replacement(grid_size, n_iter,\n random_state=rng):\n yield param_grid[i]\n\n else:\n for _ in range(self.n_iter):\n dist = rng.choice(self.param_distributions)\n # Always sort the keys of a dictionary, for reproducibility\n items = sorted(dist.items())\n params = dict()\n for k, v in items:\n if hasattr(v, \"rvs\"):\n params[k] = v.rvs(random_state=rng)\n else:\n params[k] = v[rng.randint(len(v))]\n yield params" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/ParameterSampler/__len__", + "name": "__len__", + "qname": "sklearn.model_selection._search.ParameterSampler.__len__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/ParameterSampler/__len__/self", + "name": "self", + "qname": "sklearn.model_selection._search.ParameterSampler.__len__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Number of points that will be sampled.", + "docstring": "Number of points that will be sampled.", + "code": " def __len__(self):\n \"\"\"Number of points that will be sampled.\"\"\"\n if self._is_all_lists():\n grid_size = len(ParameterGrid(self.param_distributions))\n return min(self.n_iter, grid_size)\n else:\n return self.n_iter" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/ParameterSampler/_is_all_lists", + "name": "_is_all_lists", + "qname": "sklearn.model_selection._search.ParameterSampler._is_all_lists", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/ParameterSampler/_is_all_lists/self", + "name": "self", + "qname": "sklearn.model_selection._search.ParameterSampler._is_all_lists.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _is_all_lists(self):\n return all(\n all(not hasattr(v, \"rvs\") for v in dist.values())\n for dist in self.param_distributions\n )" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/RandomizedSearchCV/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._search.RandomizedSearchCV.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/RandomizedSearchCV/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._search.RandomizedSearchCV.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/RandomizedSearchCV/__init__/estimator", + "name": "estimator", + "qname": "sklearn.model_selection._search.RandomizedSearchCV.__init__.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator object.", + "default_value": "", + "description": "A object of that type is instantiated for each grid point.\nThis is assumed to implement the scikit-learn estimator interface.\nEither estimator needs to provide a ``score`` function,\nor ``scoring`` must be passed." + }, + "type": { + "kind": "NamedType", + "name": "estimator object." + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/RandomizedSearchCV/__init__/param_distributions", + "name": "param_distributions", + "qname": "sklearn.model_selection._search.RandomizedSearchCV.__init__.param_distributions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict or list of dicts", + "default_value": "", + "description": "Dictionary with parameters names (`str`) as keys and distributions\nor lists of parameters to try. Distributions must provide a ``rvs``\nmethod for sampling (such as those from scipy.stats.distributions).\nIf a list is given, it is sampled uniformly.\nIf a list of dicts is given, first a dict is sampled uniformly, and\nthen a parameter is sampled using that dict as above." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "dict" + }, + { + "kind": "NamedType", + "name": "list of dicts" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/RandomizedSearchCV/__init__/n_iter", + "name": "n_iter", + "qname": "sklearn.model_selection._search.RandomizedSearchCV.__init__.n_iter", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Number of parameter settings that are sampled. n_iter trades\noff runtime vs quality of the solution." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/RandomizedSearchCV/__init__/scoring", + "name": "scoring", + "qname": "sklearn.model_selection._search.RandomizedSearchCV.__init__.scoring", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str, callable, list, tuple or dict", + "default_value": "None", + "description": "Strategy to evaluate the performance of the cross-validated model on\nthe test set.\n\nIf `scoring` represents a single score, one can use:\n\n- a single string (see :ref:`scoring_parameter`);\n- a callable (see :ref:`scoring`) that returns a single value.\n\nIf `scoring` represents multiple scores, one can use:\n\n- a list or tuple of unique strings;\n- a callable returning a dictionary where the keys are the metric\n names and the values are the metric scores;\n- a dictionary with metric names as keys and callables a values.\n\nSee :ref:`multimetric_grid_search` for an example.\n\nIf None, the estimator's score method is used." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + }, + { + "kind": "NamedType", + "name": "list" + }, + { + "kind": "NamedType", + "name": "tuple" + }, + { + "kind": "NamedType", + "name": "dict" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/RandomizedSearchCV/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.model_selection._search.RandomizedSearchCV.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of jobs to run in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details.\n\n.. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/RandomizedSearchCV/__init__/refit", + "name": "refit", + "qname": "sklearn.model_selection._search.RandomizedSearchCV.__init__.refit", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool, str, or callable", + "default_value": "True", + "description": "Refit an estimator using the best found parameters on the whole\ndataset.\n\nFor multiple metric evaluation, this needs to be a `str` denoting the\nscorer that would be used to find the best parameters for refitting\nthe estimator at the end.\n\nWhere there are considerations other than maximum score in\nchoosing a best estimator, ``refit`` can be set to a function which\nreturns the selected ``best_index_`` given the ``cv_results``. In that\ncase, the ``best_estimator_`` and ``best_params_`` will be set\naccording to the returned ``best_index_`` while the ``best_score_``\nattribute will not be available.\n\nThe refitted estimator is made available at the ``best_estimator_``\nattribute and permits using ``predict`` directly on this\n``RandomizedSearchCV`` instance.\n\nAlso for multiple metric evaluation, the attributes ``best_index_``,\n``best_score_`` and ``best_params_`` will only be available if\n``refit`` is set and all of them will be determined w.r.t this specific\nscorer.\n\nSee ``scoring`` parameter to know more about multiple metric\nevaluation.\n\n.. versionchanged:: 0.20\n Support for callable added." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/RandomizedSearchCV/__init__/cv", + "name": "cv", + "qname": "sklearn.model_selection._search.RandomizedSearchCV.__init__.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, cross-validation generator or an iterable", + "default_value": "None", + "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross validation,\n- integer, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide ` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "an iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/RandomizedSearchCV/__init__/verbose", + "name": "verbose", + "qname": "sklearn.model_selection._search.RandomizedSearchCV.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Controls the verbosity: the higher, the more messages." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/RandomizedSearchCV/__init__/pre_dispatch", + "name": "pre_dispatch", + "qname": "sklearn.model_selection._search.RandomizedSearchCV.__init__.pre_dispatch", + "default_value": "'2*n_jobs'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, or str", + "default_value": "None", + "description": "Controls the number of jobs that get dispatched during parallel\nexecution. Reducing this number can be useful to avoid an\nexplosion of memory consumption when more jobs get dispatched\nthan CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "str" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/RandomizedSearchCV/__init__/random_state", + "name": "random_state", + "qname": "sklearn.model_selection._search.RandomizedSearchCV.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Pseudo random number generator state used for random uniform sampling\nfrom lists of possible values instead of scipy.stats distributions.\nPass an int for reproducible output across multiple\nfunction calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/RandomizedSearchCV/__init__/error_score", + "name": "error_score", + "qname": "sklearn.model_selection._search.RandomizedSearchCV.__init__.error_score", + "default_value": "np.nan", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'raise' or numeric", + "default_value": "np.nan", + "description": "Value to assign to the score if an error occurs in estimator fitting.\nIf set to 'raise', the error is raised. If a numeric value is given,\nFitFailedWarning is raised. This parameter does not affect the refit\nstep, which will always raise the error." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'raise'" + }, + { + "kind": "NamedType", + "name": "numeric" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/RandomizedSearchCV/__init__/return_train_score", + "name": "return_train_score", + "qname": "sklearn.model_selection._search.RandomizedSearchCV.__init__.return_train_score", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If ``False``, the ``cv_results_`` attribute will not include training\nscores.\nComputing training scores is used to get insights on how different\nparameter settings impact the overfitting/underfitting trade-off.\nHowever computing the scores on the training set can be computationally\nexpensive and is not strictly required to select the parameters that\nyield the best generalization performance.\n\n.. versionadded:: 0.19\n\n.. versionchanged:: 0.21\n Default value was changed from ``True`` to ``False``" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Randomized search on hyper parameters.\n\nRandomizedSearchCV implements a \"fit\" and a \"score\" method.\nIt also implements \"score_samples\", \"predict\", \"predict_proba\",\n\"decision_function\", \"transform\" and \"inverse_transform\" if they are\nimplemented in the estimator used.\n\nThe parameters of the estimator used to apply these methods are optimized\nby cross-validated search over parameter settings.\n\nIn contrast to GridSearchCV, not all parameter values are tried out, but\nrather a fixed number of parameter settings is sampled from the specified\ndistributions. The number of parameter settings that are tried is\ngiven by n_iter.\n\nIf all parameters are presented as a list,\nsampling without replacement is performed. If at least one parameter\nis given as a distribution, sampling with replacement is used.\nIt is highly recommended to use continuous distributions for continuous\nparameters.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.14", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, estimator, param_distributions, *, n_iter=10,\n scoring=None, n_jobs=None, refit=True,\n cv=None, verbose=0, pre_dispatch='2*n_jobs',\n random_state=None, error_score=np.nan,\n return_train_score=False):\n self.param_distributions = param_distributions\n self.n_iter = n_iter\n self.random_state = random_state\n super().__init__(\n estimator=estimator, scoring=scoring,\n n_jobs=n_jobs, refit=refit, cv=cv, verbose=verbose,\n pre_dispatch=pre_dispatch, error_score=error_score,\n return_train_score=return_train_score)" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/RandomizedSearchCV/_run_search", + "name": "_run_search", + "qname": "sklearn.model_selection._search.RandomizedSearchCV._run_search", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/RandomizedSearchCV/_run_search/self", + "name": "self", + "qname": "sklearn.model_selection._search.RandomizedSearchCV._run_search.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search/RandomizedSearchCV/_run_search/evaluate_candidates", + "name": "evaluate_candidates", + "qname": "sklearn.model_selection._search.RandomizedSearchCV._run_search.evaluate_candidates", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Search n_iter candidates from param_distributions", + "docstring": "Search n_iter candidates from param_distributions", + "code": " def _run_search(self, evaluate_candidates):\n \"\"\"Search n_iter candidates from param_distributions\"\"\"\n evaluate_candidates(ParameterSampler(\n self.param_distributions, self.n_iter,\n random_state=self.random_state))" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/_check_param_grid", + "name": "_check_param_grid", + "qname": "sklearn.model_selection._search._check_param_grid", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/_check_param_grid/param_grid", + "name": "param_grid", + "qname": "sklearn.model_selection._search._check_param_grid.param_grid", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _check_param_grid(param_grid):\n if hasattr(param_grid, 'items'):\n param_grid = [param_grid]\n\n for p in param_grid:\n for name, v in p.items():\n if isinstance(v, np.ndarray) and v.ndim > 1:\n raise ValueError(\"Parameter array should be one-dimensional.\")\n\n if (isinstance(v, str) or\n not isinstance(v, (np.ndarray, Sequence))):\n raise ValueError(\"Parameter grid for parameter ({0}) needs to\"\n \" be a list or numpy array, but got ({1}).\"\n \" Single values need to be wrapped in a list\"\n \" with one element.\".format(name, type(v)))\n\n if len(v) == 0:\n raise ValueError(\"Parameter values for parameter ({0}) need \"\n \"to be a non-empty sequence.\".format(name))" + }, + { + "id": "scikit-learn/sklearn.model_selection._search/fit_grid_point", + "name": "fit_grid_point", + "qname": "sklearn.model_selection._search.fit_grid_point", + "decorators": [ + "deprecated('fit_grid_point is deprecated in version 0.23 and will be removed in version 1.0 (renaming of 0.25)')" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search/fit_grid_point/X", + "name": "X", + "qname": "sklearn.model_selection._search.fit_grid_point.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, sparse matrix or list", + "default_value": "", + "description": "Input data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "sparse matrix" + }, + { + "kind": "NamedType", + "name": "list" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/fit_grid_point/y", + "name": "y", + "qname": "sklearn.model_selection._search.fit_grid_point.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like or None", + "default_value": "", + "description": "Targets for input data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/fit_grid_point/estimator", + "name": "estimator", + "qname": "sklearn.model_selection._search.fit_grid_point.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator object", + "default_value": "", + "description": "A object of that type is instantiated for each grid point.\nThis is assumed to implement the scikit-learn estimator interface.\nEither estimator needs to provide a ``score`` function,\nor ``scoring`` must be passed." + }, + "type": { + "kind": "NamedType", + "name": "estimator object" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/fit_grid_point/parameters", + "name": "parameters", + "qname": "sklearn.model_selection._search.fit_grid_point.parameters", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "", + "description": "Parameters to be set on estimator for this grid point." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/fit_grid_point/train", + "name": "train", + "qname": "sklearn.model_selection._search.fit_grid_point.train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, dtype int or bool", + "default_value": "", + "description": "Boolean mask or indices for training set." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "dtype int" + }, + { + "kind": "NamedType", + "name": "bool" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/fit_grid_point/test", + "name": "test", + "qname": "sklearn.model_selection._search.fit_grid_point.test", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray, dtype int or bool", + "default_value": "", + "description": "Boolean mask or indices for test set." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "dtype int" + }, + { + "kind": "NamedType", + "name": "bool" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/fit_grid_point/scorer", + "name": "scorer", + "qname": "sklearn.model_selection._search.fit_grid_point.scorer", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable or None", + "default_value": "", + "description": "The scorer callable object / function must have its signature as\n``scorer(estimator, X, y)``.\n\nIf ``None`` the estimator's score method is used." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "callable" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/fit_grid_point/verbose", + "name": "verbose", + "qname": "sklearn.model_selection._search.fit_grid_point.verbose", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Verbosity level." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/fit_grid_point/error_score", + "name": "error_score", + "qname": "sklearn.model_selection._search.fit_grid_point.error_score", + "default_value": "np.nan", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "'raise' or numeric", + "default_value": "np.nan", + "description": "Value to assign to the score if an error occurs in estimator fitting.\nIf set to 'raise', the error is raised. If a numeric value is given,\nFitFailedWarning is raised. This parameter does not affect the refit\nstep, which will always raise the error." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'raise'" + }, + { + "kind": "NamedType", + "name": "numeric" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search/fit_grid_point/fit_params", + "name": "fit_params", + "qname": "sklearn.model_selection._search.fit_grid_point.fit_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "kwargs", + "default_value": "", + "description": "Additional parameter passed to the fit function of the estimator." + }, + "type": { + "kind": "NamedType", + "name": "kwargs" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Run fit on one set of parameters.", + "docstring": "Run fit on one set of parameters.\n\nParameters\n----------\nX : array-like, sparse matrix or list\n Input data.\n\ny : array-like or None\n Targets for input data.\n\nestimator : estimator object\n A object of that type is instantiated for each grid point.\n This is assumed to implement the scikit-learn estimator interface.\n Either estimator needs to provide a ``score`` function,\n or ``scoring`` must be passed.\n\nparameters : dict\n Parameters to be set on estimator for this grid point.\n\ntrain : ndarray, dtype int or bool\n Boolean mask or indices for training set.\n\ntest : ndarray, dtype int or bool\n Boolean mask or indices for test set.\n\nscorer : callable or None\n The scorer callable object / function must have its signature as\n ``scorer(estimator, X, y)``.\n\n If ``None`` the estimator's score method is used.\n\nverbose : int\n Verbosity level.\n\n**fit_params : kwargs\n Additional parameter passed to the fit function of the estimator.\n\nerror_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised. If a numeric value is given,\n FitFailedWarning is raised. This parameter does not affect the refit\n step, which will always raise the error.\n\nReturns\n-------\nscore : float\n Score of this parameter setting on given test split.\n\nparameters : dict\n The parameters that have been evaluated.\n\nn_samples_test : int\n Number of test samples in this split.", + "code": "@deprecated(\n \"fit_grid_point is deprecated in version 0.23 \"\n \"and will be removed in version 1.0 (renaming of 0.25)\"\n)\ndef fit_grid_point(X, y, estimator, parameters, train, test, scorer,\n verbose, error_score=np.nan, **fit_params):\n \"\"\"Run fit on one set of parameters.\n\n Parameters\n ----------\n X : array-like, sparse matrix or list\n Input data.\n\n y : array-like or None\n Targets for input data.\n\n estimator : estimator object\n A object of that type is instantiated for each grid point.\n This is assumed to implement the scikit-learn estimator interface.\n Either estimator needs to provide a ``score`` function,\n or ``scoring`` must be passed.\n\n parameters : dict\n Parameters to be set on estimator for this grid point.\n\n train : ndarray, dtype int or bool\n Boolean mask or indices for training set.\n\n test : ndarray, dtype int or bool\n Boolean mask or indices for test set.\n\n scorer : callable or None\n The scorer callable object / function must have its signature as\n ``scorer(estimator, X, y)``.\n\n If ``None`` the estimator's score method is used.\n\n verbose : int\n Verbosity level.\n\n **fit_params : kwargs\n Additional parameter passed to the fit function of the estimator.\n\n error_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised. If a numeric value is given,\n FitFailedWarning is raised. This parameter does not affect the refit\n step, which will always raise the error.\n\n Returns\n -------\n score : float\n Score of this parameter setting on given test split.\n\n parameters : dict\n The parameters that have been evaluated.\n\n n_samples_test : int\n Number of test samples in this split.\n \"\"\"\n # NOTE we are not using the return value as the scorer by itself should be\n # validated before. We use check_scoring only to reject multimetric scorer\n check_scoring(estimator, scorer)\n results = _fit_and_score(estimator, X, y, scorer, train,\n test, verbose, parameters,\n fit_params=fit_params,\n return_n_test_samples=True,\n error_score=error_score)\n return results[\"test_scores\"], parameters, results[\"n_test_samples\"]" + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/__init__/estimator", + "name": "estimator", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving.__init__.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/__init__/scoring", + "name": "scoring", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving.__init__.scoring", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/__init__/refit", + "name": "refit", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving.__init__.refit", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/__init__/cv", + "name": "cv", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving.__init__.cv", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/__init__/verbose", + "name": "verbose", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/__init__/random_state", + "name": "random_state", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/__init__/error_score", + "name": "error_score", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving.__init__.error_score", + "default_value": "np.nan", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/__init__/return_train_score", + "name": "return_train_score", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving.__init__.return_train_score", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/__init__/max_resources", + "name": "max_resources", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving.__init__.max_resources", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/__init__/min_resources", + "name": "min_resources", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving.__init__.min_resources", + "default_value": "'exhaust'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/__init__/resource", + "name": "resource", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving.__init__.resource", + "default_value": "'n_samples'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/__init__/factor", + "name": "factor", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving.__init__.factor", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/__init__/aggressive_elimination", + "name": "aggressive_elimination", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving.__init__.aggressive_elimination", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Implements successive halving.\n\nRef:\nAlmost optimal exploration in multi-armed bandits, ICML 13\nZohar Karnin, Tomer Koren, Oren Somekh", + "docstring": "", + "code": " def __init__(self, estimator, *, scoring=None,\n n_jobs=None, refit=True, cv=5, verbose=0, random_state=None,\n error_score=np.nan, return_train_score=True,\n max_resources='auto', min_resources='exhaust',\n resource='n_samples', factor=3, aggressive_elimination=False):\n\n refit = _refit_callable if refit else False\n super().__init__(estimator, scoring=scoring,\n n_jobs=n_jobs, refit=refit, cv=cv,\n verbose=verbose,\n error_score=error_score,\n return_train_score=return_train_score)\n\n self.random_state = random_state\n self.max_resources = max_resources\n self.resource = resource\n self.factor = factor\n self.min_resources = min_resources\n self.aggressive_elimination = aggressive_elimination" + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/_check_input_parameters", + "name": "_check_input_parameters", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving._check_input_parameters", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/_check_input_parameters/self", + "name": "self", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving._check_input_parameters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/_check_input_parameters/X", + "name": "X", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving._check_input_parameters.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/_check_input_parameters/y", + "name": "y", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving._check_input_parameters.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/_check_input_parameters/groups", + "name": "groups", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving._check_input_parameters.groups", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_input_parameters(self, X, y, groups):\n\n if self.scoring is not None and not (isinstance(self.scoring, str)\n or callable(self.scoring)):\n raise ValueError('scoring parameter must be a string, '\n 'a callable or None. Multimetric scoring is not '\n 'supported.')\n\n # We need to enforce that successive calls to cv.split() yield the same\n # splits: see https://github.com/scikit-learn/scikit-learn/issues/15149\n if not _yields_constant_splits(self._checked_cv_orig):\n raise ValueError(\n \"The cv parameter must yield consistent folds across \"\n \"calls to split(). Set its random_state to an int, or set \"\n \"shuffle=False.\"\n )\n\n if (self.resource != 'n_samples'\n and self.resource not in self.estimator.get_params()):\n raise ValueError(\n f'Cannot use resource={self.resource} which is not supported '\n f'by estimator {self.estimator.__class__.__name__}'\n )\n\n if (isinstance(self.max_resources, str) and\n self.max_resources != 'auto'):\n raise ValueError(\n \"max_resources must be either 'auto' or a positive integer\"\n )\n if self.max_resources != 'auto' and (\n not isinstance(self.max_resources, Integral) or\n self.max_resources <= 0):\n raise ValueError(\n \"max_resources must be either 'auto' or a positive integer\"\n )\n\n if self.min_resources not in ('smallest', 'exhaust') and (\n not isinstance(self.min_resources, Integral) or\n self.min_resources <= 0):\n raise ValueError(\n \"min_resources must be either 'smallest', 'exhaust', \"\n \"or a positive integer \"\n \"no greater than max_resources.\"\n )\n\n if isinstance(self, HalvingRandomSearchCV):\n if self.min_resources == self.n_candidates == 'exhaust':\n # for n_candidates=exhaust to work, we need to know what\n # min_resources is. Similarly min_resources=exhaust needs to\n # know the actual number of candidates.\n raise ValueError(\n \"n_candidates and min_resources cannot be both set to \"\n \"'exhaust'.\"\n )\n if self.n_candidates != 'exhaust' and (\n not isinstance(self.n_candidates, Integral) or\n self.n_candidates <= 0):\n raise ValueError(\n \"n_candidates must be either 'exhaust' \"\n \"or a positive integer\"\n )\n\n self.min_resources_ = self.min_resources\n if self.min_resources_ in ('smallest', 'exhaust'):\n if self.resource == 'n_samples':\n n_splits = self._checked_cv_orig.get_n_splits(X, y, groups)\n # please see https://gph.is/1KjihQe for a justification\n magic_factor = 2\n self.min_resources_ = n_splits * magic_factor\n if is_classifier(self.estimator):\n n_classes = np.unique(y).shape[0]\n self.min_resources_ *= n_classes\n else:\n self.min_resources_ = 1\n # if 'exhaust', min_resources_ might be set to a higher value later\n # in _run_search\n\n self.max_resources_ = self.max_resources\n if self.max_resources_ == 'auto':\n if not self.resource == 'n_samples':\n raise ValueError(\n \"max_resources can only be 'auto' if resource='n_samples'\")\n self.max_resources_ = _num_samples(X)\n\n if self.min_resources_ > self.max_resources_:\n raise ValueError(\n f'min_resources_={self.min_resources_} is greater '\n f'than max_resources_={self.max_resources_}.'\n )" + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/_generate_candidate_params", + "name": "_generate_candidate_params", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving._generate_candidate_params", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/_generate_candidate_params/self", + "name": "self", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving._generate_candidate_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @abstractmethod\n def _generate_candidate_params(self):\n pass" + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/_run_search", + "name": "_run_search", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving._run_search", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/_run_search/self", + "name": "self", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving._run_search.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/_run_search/evaluate_candidates", + "name": "evaluate_candidates", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving._run_search.evaluate_candidates", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _run_search(self, evaluate_candidates):\n candidate_params = self._generate_candidate_params()\n\n if self.resource != 'n_samples' and any(\n self.resource in candidate for candidate in candidate_params):\n # Can only check this now since we need the candidates list\n raise ValueError(\n f\"Cannot use parameter {self.resource} as the resource since \"\n \"it is part of the searched parameters.\"\n )\n\n # n_required_iterations is the number of iterations needed so that the\n # last iterations evaluates less than `factor` candidates.\n n_required_iterations = 1 + floor(log(len(candidate_params),\n self.factor))\n\n if self.min_resources == 'exhaust':\n # To exhaust the resources, we want to start with the biggest\n # min_resources possible so that the last (required) iteration\n # uses as many resources as possible\n last_iteration = n_required_iterations - 1\n self.min_resources_ = max(\n self.min_resources_,\n self.max_resources_ // self.factor**last_iteration\n )\n\n # n_possible_iterations is the number of iterations that we can\n # actually do starting from min_resources and without exceeding\n # max_resources. Depending on max_resources and the number of\n # candidates, this may be higher or smaller than\n # n_required_iterations.\n n_possible_iterations = 1 + floor(log(\n self.max_resources_ // self.min_resources_, self.factor))\n\n if self.aggressive_elimination:\n n_iterations = n_required_iterations\n else:\n n_iterations = min(n_possible_iterations, n_required_iterations)\n\n if self.verbose:\n print(f'n_iterations: {n_iterations}')\n print(f'n_required_iterations: {n_required_iterations}')\n print(f'n_possible_iterations: {n_possible_iterations}')\n print(f'min_resources_: {self.min_resources_}')\n print(f'max_resources_: {self.max_resources_}')\n print(f'aggressive_elimination: {self.aggressive_elimination}')\n print(f'factor: {self.factor}')\n\n self.n_resources_ = []\n self.n_candidates_ = []\n\n for itr in range(n_iterations):\n\n power = itr # default\n if self.aggressive_elimination:\n # this will set n_resources to the initial value (i.e. the\n # value of n_resources at the first iteration) for as many\n # iterations as needed (while candidates are being\n # eliminated), and then go on as usual.\n power = max(\n 0,\n itr - n_required_iterations + n_possible_iterations\n )\n\n n_resources = int(self.factor**power * self.min_resources_)\n # guard, probably not needed\n n_resources = min(n_resources, self.max_resources_)\n self.n_resources_.append(n_resources)\n\n n_candidates = len(candidate_params)\n self.n_candidates_.append(n_candidates)\n\n if self.verbose:\n print('-' * 10)\n print(f'iter: {itr}')\n print(f'n_candidates: {n_candidates}')\n print(f'n_resources: {n_resources}')\n\n if self.resource == 'n_samples':\n # subsampling will be done in cv.split()\n cv = _SubsampleMetaSplitter(\n base_cv=self._checked_cv_orig,\n fraction=n_resources / self._n_samples_orig,\n subsample_test=True,\n random_state=self.random_state\n )\n\n else:\n # Need copy so that the n_resources of next iteration does\n # not overwrite\n candidate_params = [c.copy() for c in candidate_params]\n for candidate in candidate_params:\n candidate[self.resource] = n_resources\n cv = self._checked_cv_orig\n\n more_results = {'iter': [itr] * n_candidates,\n 'n_resources': [n_resources] * n_candidates}\n\n results = evaluate_candidates(candidate_params, cv,\n more_results=more_results)\n\n n_candidates_to_keep = ceil(n_candidates / self.factor)\n candidate_params = _top_k(results, n_candidates_to_keep, itr)\n\n self.n_remaining_candidates_ = len(candidate_params)\n self.n_required_iterations_ = n_required_iterations\n self.n_possible_iterations_ = n_possible_iterations\n self.n_iterations_ = n_iterations" + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/fit", + "name": "fit", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/fit/self", + "name": "self", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/fit/X", + "name": "X", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/fit/y", + "name": "y", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples,) or (n_samples, n_output)", + "default_value": "", + "description": "Target relative to X for classification or regression;\nNone for unsupervised learning." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples,) or (n_samples, n_output)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/fit/groups", + "name": "groups", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving.fit.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Group labels for the samples used while splitting the dataset into\ntrain/test set. Only used in conjunction with a \"Group\" :term:`cv`\ninstance (e.g., :class:`~sklearn.model_selection.GroupKFold`)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/BaseSuccessiveHalving/fit/fit_params", + "name": "fit_params", + "qname": "sklearn.model_selection._search_successive_halving.BaseSuccessiveHalving.fit.fit_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "dict of string -> object", + "default_value": "", + "description": "Parameters passed to the ``fit`` method of the estimator" + }, + "type": { + "kind": "NamedType", + "name": "dict of string -> object" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Run fit with all sets of parameters.", + "docstring": "Run fit with all sets of parameters.\n\nParameters\n----------\n\nX : array-like, shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like, shape (n_samples,) or (n_samples, n_output), optional\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n**fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of the estimator", + "code": " def fit(self, X, y=None, groups=None, **fit_params):\n \"\"\"Run fit with all sets of parameters.\n\n Parameters\n ----------\n\n X : array-like, shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like, shape (n_samples,) or (n_samples, n_output), optional\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\n groups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).\n\n **fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of the estimator\n \"\"\"\n self._checked_cv_orig = check_cv(\n self.cv, y, classifier=is_classifier(self.estimator))\n\n self._check_input_parameters(\n X=X,\n y=y,\n groups=groups,\n )\n\n self._n_samples_orig = _num_samples(X)\n\n super().fit(X, y=y, groups=groups, **fit_params)\n\n # Set best_score_: BaseSearchCV does not set it, as refit is a callable\n self.best_score_ = (\n self.cv_results_['mean_test_score'][self.best_index_])\n\n return self" + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingGridSearchCV/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._search_successive_halving.HalvingGridSearchCV.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingGridSearchCV/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._search_successive_halving.HalvingGridSearchCV.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingGridSearchCV/__init__/estimator", + "name": "estimator", + "qname": "sklearn.model_selection._search_successive_halving.HalvingGridSearchCV.__init__.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator object.", + "default_value": "", + "description": "This is assumed to implement the scikit-learn estimator interface.\nEither estimator needs to provide a ``score`` function,\nor ``scoring`` must be passed." + }, + "type": { + "kind": "NamedType", + "name": "estimator object." + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingGridSearchCV/__init__/param_grid", + "name": "param_grid", + "qname": "sklearn.model_selection._search_successive_halving.HalvingGridSearchCV.__init__.param_grid", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict or list of dictionaries", + "default_value": "", + "description": "Dictionary with parameters names (string) as keys and lists of\nparameter settings to try as values, or a list of such\ndictionaries, in which case the grids spanned by each dictionary\nin the list are explored. This enables searching over any sequence\nof parameter settings." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "dict" + }, + { + "kind": "NamedType", + "name": "list of dictionaries" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingGridSearchCV/__init__/factor", + "name": "factor", + "qname": "sklearn.model_selection._search_successive_halving.HalvingGridSearchCV.__init__.factor", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "3", + "description": "The 'halving' parameter, which determines the proportion of candidates\nthat are selected for each subsequent iteration. For example,\n``factor=3`` means that only one third of the candidates are selected." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingGridSearchCV/__init__/resource", + "name": "resource", + "qname": "sklearn.model_selection._search_successive_halving.HalvingGridSearchCV.__init__.resource", + "default_value": "'n_samples'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "``'n_samples'`` or str", + "default_value": "'n_samples'", + "description": "Defines the resource that increases with each iteration. By default,\nthe resource is the number of samples. It can also be set to any\nparameter of the base estimator that accepts positive integer\nvalues, e.g. 'n_iterations' or 'n_estimators' for a gradient\nboosting estimator. In this case ``max_resources`` cannot be 'auto'\nand must be set explicitly." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "``'n_samples'``" + }, + { + "kind": "NamedType", + "name": "str" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingGridSearchCV/__init__/max_resources", + "name": "max_resources", + "qname": "sklearn.model_selection._search_successive_halving.HalvingGridSearchCV.__init__.max_resources", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "'auto'", + "description": "The maximum amount of resource that any candidate is allowed to use\nfor a given iteration. By default, this is set to ``n_samples`` when\n``resource='n_samples'`` (default), else an error is raised." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingGridSearchCV/__init__/min_resources", + "name": "min_resources", + "qname": "sklearn.model_selection._search_successive_halving.HalvingGridSearchCV.__init__.min_resources", + "default_value": "'exhaust'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'exhaust', 'smallest'} or int", + "default_value": "'exhaust'", + "description": "The minimum amount of resource that any candidate is allowed to use\nfor a given iteration. Equivalently, this defines the amount of\nresources `r0` that are allocated for each candidate at the first\niteration.\n\n- 'smallest' is a heuristic that sets `r0` to a small value:\n - ``n_splits * 2`` when ``resource='n_samples'`` for a regression\n problem\n - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a\n classification problem\n - ``1`` when ``resource != 'n_samples'``\n- 'exhaust' will set `r0` such that the **last** iteration uses as\n much resources as possible. Namely, the last iteration will use the\n highest value smaller than ``max_resources`` that is a multiple of\n both ``min_resources`` and ``factor``. In general, using 'exhaust'\n leads to a more accurate estimator, but is slightly more time\n consuming.\n\nNote that the amount of resources used at each iteration is always a\nmultiple of ``min_resources``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["exhaust", "smallest"] + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingGridSearchCV/__init__/aggressive_elimination", + "name": "aggressive_elimination", + "qname": "sklearn.model_selection._search_successive_halving.HalvingGridSearchCV.__init__.aggressive_elimination", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "This is only relevant in cases where there isn't enough resources to\nreduce the remaining candidates to at most `factor` after the last\niteration. If ``True``, then the search process will 'replay' the\nfirst iteration for as long as needed until the number of candidates\nis small enough. This is ``False`` by default, which means that the\nlast iteration may evaluate more than ``factor`` candidates. See\n:ref:`aggressive_elimination` for more details." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingGridSearchCV/__init__/cv", + "name": "cv", + "qname": "sklearn.model_selection._search_successive_halving.HalvingGridSearchCV.__init__.cv", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, cross-validation generator or iterable", + "default_value": "5", + "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- integer, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide ` for the various\ncross-validation strategies that can be used here.\n\n.. note::\n Due to implementation details, the folds produced by `cv` must be\n the same across multiple calls to `cv.split()`. For\n built-in `scikit-learn` iterators, this can be achieved by\n deactivating shuffling (`shuffle=False`), or by setting the\n `cv`'s `random_state` parameter to an integer." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingGridSearchCV/__init__/scoring", + "name": "scoring", + "qname": "sklearn.model_selection._search_successive_halving.HalvingGridSearchCV.__init__.scoring", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "string, callable, or None", + "default_value": "None", + "description": "A single string (see :ref:`scoring_parameter`) or a callable\n(see :ref:`scoring`) to evaluate the predictions on the test set.\nIf None, the estimator's score method is used." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "string" + }, + { + "kind": "NamedType", + "name": "callable" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingGridSearchCV/__init__/refit", + "name": "refit", + "qname": "sklearn.model_selection._search_successive_halving.HalvingGridSearchCV.__init__.refit", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, refit an estimator using the best found parameters on the\nwhole dataset.\n\nThe refitted estimator is made available at the ``best_estimator_``\nattribute and permits using ``predict`` directly on this\n``HalvingGridSearchCV`` instance." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingGridSearchCV/__init__/error_score", + "name": "error_score", + "qname": "sklearn.model_selection._search_successive_halving.HalvingGridSearchCV.__init__.error_score", + "default_value": "np.nan", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'raise' or numeric", + "default_value": "", + "description": "Value to assign to the score if an error occurs in estimator fitting.\nIf set to 'raise', the error is raised. If a numeric value is given,\nFitFailedWarning is raised. This parameter does not affect the refit\nstep, which will always raise the error. Default is ``np.nan``" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'raise'" + }, + { + "kind": "NamedType", + "name": "numeric" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingGridSearchCV/__init__/return_train_score", + "name": "return_train_score", + "qname": "sklearn.model_selection._search_successive_halving.HalvingGridSearchCV.__init__.return_train_score", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If ``False``, the ``cv_results_`` attribute will not include training\nscores.\nComputing training scores is used to get insights on how different\nparameter settings impact the overfitting/underfitting trade-off.\nHowever computing the scores on the training set can be computationally\nexpensive and is not strictly required to select the parameters that\nyield the best generalization performance." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingGridSearchCV/__init__/random_state", + "name": "random_state", + "qname": "sklearn.model_selection._search_successive_halving.HalvingGridSearchCV.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Pseudo random number generator state used for subsampling the dataset\nwhen `resources != 'n_samples'`. Ignored otherwise.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingGridSearchCV/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.model_selection._search_successive_halving.HalvingGridSearchCV.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or None", + "default_value": "None", + "description": "Number of jobs to run in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingGridSearchCV/__init__/verbose", + "name": "verbose", + "qname": "sklearn.model_selection._search_successive_halving.HalvingGridSearchCV.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Controls the verbosity: the higher, the more messages." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Search over specified parameter values with successive halving.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using\nmore and more resources.\n\nRead more in the :ref:`User guide `.\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_halving_search_cv``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_halving_search_cv # noqa\n >>> # now you can import normally from model_selection\n >>> from sklearn.model_selection import HalvingGridSearchCV", + "docstring": "", + "code": " def __init__(self, estimator, param_grid, *,\n factor=3, resource='n_samples', max_resources='auto',\n min_resources='exhaust', aggressive_elimination=False,\n cv=5, scoring=None, refit=True, error_score=np.nan,\n return_train_score=True, random_state=None, n_jobs=None,\n verbose=0):\n super().__init__(estimator, scoring=scoring,\n n_jobs=n_jobs, refit=refit, verbose=verbose, cv=cv,\n random_state=random_state, error_score=error_score,\n return_train_score=return_train_score,\n max_resources=max_resources, resource=resource,\n factor=factor, min_resources=min_resources,\n aggressive_elimination=aggressive_elimination)\n self.param_grid = param_grid\n _check_param_grid(self.param_grid)" + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingGridSearchCV/_generate_candidate_params", + "name": "_generate_candidate_params", + "qname": "sklearn.model_selection._search_successive_halving.HalvingGridSearchCV._generate_candidate_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingGridSearchCV/_generate_candidate_params/self", + "name": "self", + "qname": "sklearn.model_selection._search_successive_halving.HalvingGridSearchCV._generate_candidate_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _generate_candidate_params(self):\n return ParameterGrid(self.param_grid)" + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingRandomSearchCV/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._search_successive_halving.HalvingRandomSearchCV.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingRandomSearchCV/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._search_successive_halving.HalvingRandomSearchCV.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingRandomSearchCV/__init__/estimator", + "name": "estimator", + "qname": "sklearn.model_selection._search_successive_halving.HalvingRandomSearchCV.__init__.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator object.", + "default_value": "", + "description": "This is assumed to implement the scikit-learn estimator interface.\nEither estimator needs to provide a ``score`` function,\nor ``scoring`` must be passed." + }, + "type": { + "kind": "NamedType", + "name": "estimator object." + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingRandomSearchCV/__init__/param_distributions", + "name": "param_distributions", + "qname": "sklearn.model_selection._search_successive_halving.HalvingRandomSearchCV.__init__.param_distributions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "", + "description": "Dictionary with parameters names (string) as keys and distributions\nor lists of parameters to try. Distributions must provide a ``rvs``\nmethod for sampling (such as those from scipy.stats.distributions).\nIf a list is given, it is sampled uniformly." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingRandomSearchCV/__init__/n_candidates", + "name": "n_candidates", + "qname": "sklearn.model_selection._search_successive_halving.HalvingRandomSearchCV.__init__.n_candidates", + "default_value": "'exhaust'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "'exhaust'", + "description": "The number of candidate parameters to sample, at the first\niteration. Using 'exhaust' will sample enough candidates so that the\nlast iteration uses as many resources as possible, based on\n`min_resources`, `max_resources` and `factor`. In this case,\n`min_resources` cannot be 'exhaust'." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingRandomSearchCV/__init__/factor", + "name": "factor", + "qname": "sklearn.model_selection._search_successive_halving.HalvingRandomSearchCV.__init__.factor", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "3", + "description": "The 'halving' parameter, which determines the proportion of candidates\nthat are selected for each subsequent iteration. For example,\n``factor=3`` means that only one third of the candidates are selected." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingRandomSearchCV/__init__/resource", + "name": "resource", + "qname": "sklearn.model_selection._search_successive_halving.HalvingRandomSearchCV.__init__.resource", + "default_value": "'n_samples'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "``'n_samples'`` or str", + "default_value": "'n_samples'", + "description": "Defines the resource that increases with each iteration. By default,\nthe resource is the number of samples. It can also be set to any\nparameter of the base estimator that accepts positive integer\nvalues, e.g. 'n_iterations' or 'n_estimators' for a gradient\nboosting estimator. In this case ``max_resources`` cannot be 'auto'\nand must be set explicitly." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "``'n_samples'``" + }, + { + "kind": "NamedType", + "name": "str" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingRandomSearchCV/__init__/max_resources", + "name": "max_resources", + "qname": "sklearn.model_selection._search_successive_halving.HalvingRandomSearchCV.__init__.max_resources", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "'auto'", + "description": "The maximum number of resources that any candidate is allowed to use\nfor a given iteration. By default, this is set ``n_samples`` when\n``resource='n_samples'`` (default), else an error is raised." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingRandomSearchCV/__init__/min_resources", + "name": "min_resources", + "qname": "sklearn.model_selection._search_successive_halving.HalvingRandomSearchCV.__init__.min_resources", + "default_value": "'smallest'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'exhaust', 'smallest'} or int", + "default_value": "'smallest'", + "description": "The minimum amount of resource that any candidate is allowed to use\nfor a given iteration. Equivalently, this defines the amount of\nresources `r0` that are allocated for each candidate at the first\niteration.\n\n- 'smallest' is a heuristic that sets `r0` to a small value:\n - ``n_splits * 2`` when ``resource='n_samples'`` for a regression\n problem\n - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a\n classification problem\n - ``1`` when ``resource != 'n_samples'``\n- 'exhaust' will set `r0` such that the **last** iteration uses as\n much resources as possible. Namely, the last iteration will use the\n highest value smaller than ``max_resources`` that is a multiple of\n both ``min_resources`` and ``factor``. In general, using 'exhaust'\n leads to a more accurate estimator, but is slightly more time\n consuming. 'exhaust' isn't available when `n_candidates='exhaust'`.\n\nNote that the amount of resources used at each iteration is always a\nmultiple of ``min_resources``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["exhaust", "smallest"] + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingRandomSearchCV/__init__/aggressive_elimination", + "name": "aggressive_elimination", + "qname": "sklearn.model_selection._search_successive_halving.HalvingRandomSearchCV.__init__.aggressive_elimination", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "This is only relevant in cases where there isn't enough resources to\nreduce the remaining candidates to at most `factor` after the last\niteration. If ``True``, then the search process will 'replay' the\nfirst iteration for as long as needed until the number of candidates\nis small enough. This is ``False`` by default, which means that the\nlast iteration may evaluate more than ``factor`` candidates. See\n:ref:`aggressive_elimination` for more details." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingRandomSearchCV/__init__/cv", + "name": "cv", + "qname": "sklearn.model_selection._search_successive_halving.HalvingRandomSearchCV.__init__.cv", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, cross-validation generator or an iterable", + "default_value": "5", + "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- integer, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide ` for the various\ncross-validation strategies that can be used here.\n\n.. note::\n Due to implementation details, the folds produced by `cv` must be\n the same across multiple calls to `cv.split()`. For\n built-in `scikit-learn` iterators, this can be achieved by\n deactivating shuffling (`shuffle=False`), or by setting the\n `cv`'s `random_state` parameter to an integer." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "an iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingRandomSearchCV/__init__/scoring", + "name": "scoring", + "qname": "sklearn.model_selection._search_successive_halving.HalvingRandomSearchCV.__init__.scoring", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "string, callable, or None", + "default_value": "None", + "description": "A single string (see :ref:`scoring_parameter`) or a callable\n(see :ref:`scoring`) to evaluate the predictions on the test set.\nIf None, the estimator's score method is used." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "string" + }, + { + "kind": "NamedType", + "name": "callable" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingRandomSearchCV/__init__/refit", + "name": "refit", + "qname": "sklearn.model_selection._search_successive_halving.HalvingRandomSearchCV.__init__.refit", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, refit an estimator using the best found parameters on the\nwhole dataset.\n\nThe refitted estimator is made available at the ``best_estimator_``\nattribute and permits using ``predict`` directly on this\n``HalvingRandomSearchCV`` instance." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingRandomSearchCV/__init__/error_score", + "name": "error_score", + "qname": "sklearn.model_selection._search_successive_halving.HalvingRandomSearchCV.__init__.error_score", + "default_value": "np.nan", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'raise' or numeric", + "default_value": "", + "description": "Value to assign to the score if an error occurs in estimator fitting.\nIf set to 'raise', the error is raised. If a numeric value is given,\nFitFailedWarning is raised. This parameter does not affect the refit\nstep, which will always raise the error. Default is ``np.nan``" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'raise'" + }, + { + "kind": "NamedType", + "name": "numeric" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingRandomSearchCV/__init__/return_train_score", + "name": "return_train_score", + "qname": "sklearn.model_selection._search_successive_halving.HalvingRandomSearchCV.__init__.return_train_score", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If ``False``, the ``cv_results_`` attribute will not include training\nscores.\nComputing training scores is used to get insights on how different\nparameter settings impact the overfitting/underfitting trade-off.\nHowever computing the scores on the training set can be computationally\nexpensive and is not strictly required to select the parameters that\nyield the best generalization performance." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingRandomSearchCV/__init__/random_state", + "name": "random_state", + "qname": "sklearn.model_selection._search_successive_halving.HalvingRandomSearchCV.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Pseudo random number generator state used for subsampling the dataset\nwhen `resources != 'n_samples'`. Also used for random uniform\nsampling from lists of possible values instead of scipy.stats\ndistributions.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingRandomSearchCV/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.model_selection._search_successive_halving.HalvingRandomSearchCV.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or None", + "default_value": "None", + "description": "Number of jobs to run in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingRandomSearchCV/__init__/verbose", + "name": "verbose", + "qname": "sklearn.model_selection._search_successive_halving.HalvingRandomSearchCV.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Controls the verbosity: the higher, the more messages." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Randomized search on hyper parameters.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using more\nand more resources.\n\nThe candidates are sampled at random from the parameter space and the\nnumber of sampled candidates is determined by ``n_candidates``.\n\nRead more in the :ref:`User guide`.\n\n.. note::\n\n This estimator is still **experimental** for now: the predictions\n and the API might change without any deprecation cycle. To use it,\n you need to explicitly import ``enable_halving_search_cv``::\n\n >>> # explicitly require this experimental feature\n >>> from sklearn.experimental import enable_halving_search_cv # noqa\n >>> # now you can import normally from model_selection\n >>> from sklearn.model_selection import HalvingRandomSearchCV", + "docstring": "", + "code": " def __init__(self, estimator, param_distributions, *,\n n_candidates='exhaust', factor=3, resource='n_samples',\n max_resources='auto', min_resources='smallest',\n aggressive_elimination=False, cv=5, scoring=None,\n refit=True, error_score=np.nan, return_train_score=True,\n random_state=None, n_jobs=None, verbose=0):\n super().__init__(estimator, scoring=scoring,\n n_jobs=n_jobs, refit=refit, verbose=verbose, cv=cv,\n random_state=random_state, error_score=error_score,\n return_train_score=return_train_score,\n max_resources=max_resources, resource=resource,\n factor=factor, min_resources=min_resources,\n aggressive_elimination=aggressive_elimination)\n self.param_distributions = param_distributions\n self.n_candidates = n_candidates" + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingRandomSearchCV/_generate_candidate_params", + "name": "_generate_candidate_params", + "qname": "sklearn.model_selection._search_successive_halving.HalvingRandomSearchCV._generate_candidate_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/HalvingRandomSearchCV/_generate_candidate_params/self", + "name": "self", + "qname": "sklearn.model_selection._search_successive_halving.HalvingRandomSearchCV._generate_candidate_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _generate_candidate_params(self):\n n_candidates_first_iter = self.n_candidates\n if n_candidates_first_iter == 'exhaust':\n # This will generate enough candidate so that the last iteration\n # uses as much resources as possible\n n_candidates_first_iter = (\n self.max_resources_ // self.min_resources_)\n return ParameterSampler(self.param_distributions,\n n_candidates_first_iter,\n random_state=self.random_state)" + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/_SubsampleMetaSplitter/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._search_successive_halving._SubsampleMetaSplitter.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/_SubsampleMetaSplitter/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._search_successive_halving._SubsampleMetaSplitter.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/_SubsampleMetaSplitter/__init__/base_cv", + "name": "base_cv", + "qname": "sklearn.model_selection._search_successive_halving._SubsampleMetaSplitter.__init__.base_cv", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/_SubsampleMetaSplitter/__init__/fraction", + "name": "fraction", + "qname": "sklearn.model_selection._search_successive_halving._SubsampleMetaSplitter.__init__.fraction", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/_SubsampleMetaSplitter/__init__/subsample_test", + "name": "subsample_test", + "qname": "sklearn.model_selection._search_successive_halving._SubsampleMetaSplitter.__init__.subsample_test", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/_SubsampleMetaSplitter/__init__/random_state", + "name": "random_state", + "qname": "sklearn.model_selection._search_successive_halving._SubsampleMetaSplitter.__init__.random_state", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Splitter that subsamples a given fraction of the dataset", + "docstring": "", + "code": " def __init__(self, *, base_cv, fraction, subsample_test, random_state):\n self.base_cv = base_cv\n self.fraction = fraction\n self.subsample_test = subsample_test\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/_SubsampleMetaSplitter/split", + "name": "split", + "qname": "sklearn.model_selection._search_successive_halving._SubsampleMetaSplitter.split", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/_SubsampleMetaSplitter/split/self", + "name": "self", + "qname": "sklearn.model_selection._search_successive_halving._SubsampleMetaSplitter.split.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/_SubsampleMetaSplitter/split/X", + "name": "X", + "qname": "sklearn.model_selection._search_successive_halving._SubsampleMetaSplitter.split.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/_SubsampleMetaSplitter/split/y", + "name": "y", + "qname": "sklearn.model_selection._search_successive_halving._SubsampleMetaSplitter.split.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/_SubsampleMetaSplitter/split/groups", + "name": "groups", + "qname": "sklearn.model_selection._search_successive_halving._SubsampleMetaSplitter.split.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def split(self, X, y, groups=None):\n for train_idx, test_idx in self.base_cv.split(X, y, groups):\n train_idx = resample(\n train_idx, replace=False, random_state=self.random_state,\n n_samples=int(self.fraction * train_idx.shape[0])\n )\n if self.subsample_test:\n test_idx = resample(\n test_idx, replace=False, random_state=self.random_state,\n n_samples=int(self.fraction * test_idx.shape[0])\n )\n yield train_idx, test_idx" + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/_refit_callable", + "name": "_refit_callable", + "qname": "sklearn.model_selection._search_successive_halving._refit_callable", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/_refit_callable/results", + "name": "results", + "qname": "sklearn.model_selection._search_successive_halving._refit_callable.results", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _refit_callable(results):\n # Custom refit callable to return the index of the best candidate. We want\n # the best candidate out of the last iteration. By default BaseSearchCV\n # would return the best candidate out of all iterations.\n\n last_iter = np.max(results['iter'])\n last_iter_indices = np.flatnonzero(results['iter'] == last_iter)\n best_idx = np.argmax(results['mean_test_score'][last_iter_indices])\n return last_iter_indices[best_idx]" + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/_top_k", + "name": "_top_k", + "qname": "sklearn.model_selection._search_successive_halving._top_k", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/_top_k/results", + "name": "results", + "qname": "sklearn.model_selection._search_successive_halving._top_k.results", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/_top_k/k", + "name": "k", + "qname": "sklearn.model_selection._search_successive_halving._top_k.k", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._search_successive_halving/_top_k/itr", + "name": "itr", + "qname": "sklearn.model_selection._search_successive_halving._top_k.itr", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _top_k(results, k, itr):\n # Return the best candidates of a given iteration\n iteration, mean_test_score, params = (\n np.asarray(a) for a in (results['iter'],\n results['mean_test_score'],\n results['params'])\n )\n iter_indices = np.flatnonzero(iteration == itr)\n sorted_indices = np.argsort(mean_test_score[iter_indices])\n return np.array(params[iter_indices][sorted_indices[-k:]])" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/__repr__", + "name": "__repr__", + "qname": "sklearn.model_selection._split.BaseCrossValidator.__repr__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/__repr__/self", + "name": "self", + "qname": "sklearn.model_selection._split.BaseCrossValidator.__repr__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __repr__(self):\n return _build_repr(self)" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/_iter_test_indices", + "name": "_iter_test_indices", + "qname": "sklearn.model_selection._split.BaseCrossValidator._iter_test_indices", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/_iter_test_indices/self", + "name": "self", + "qname": "sklearn.model_selection._split.BaseCrossValidator._iter_test_indices.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/_iter_test_indices/X", + "name": "X", + "qname": "sklearn.model_selection._split.BaseCrossValidator._iter_test_indices.X", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/_iter_test_indices/y", + "name": "y", + "qname": "sklearn.model_selection._split.BaseCrossValidator._iter_test_indices.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/_iter_test_indices/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.BaseCrossValidator._iter_test_indices.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generates integer indices corresponding to test sets.", + "docstring": "Generates integer indices corresponding to test sets.", + "code": " def _iter_test_indices(self, X=None, y=None, groups=None):\n \"\"\"Generates integer indices corresponding to test sets.\"\"\"\n raise NotImplementedError" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/_iter_test_masks", + "name": "_iter_test_masks", + "qname": "sklearn.model_selection._split.BaseCrossValidator._iter_test_masks", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/_iter_test_masks/self", + "name": "self", + "qname": "sklearn.model_selection._split.BaseCrossValidator._iter_test_masks.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/_iter_test_masks/X", + "name": "X", + "qname": "sklearn.model_selection._split.BaseCrossValidator._iter_test_masks.X", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/_iter_test_masks/y", + "name": "y", + "qname": "sklearn.model_selection._split.BaseCrossValidator._iter_test_masks.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/_iter_test_masks/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.BaseCrossValidator._iter_test_masks.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generates boolean masks corresponding to test sets.\n\nBy default, delegates to _iter_test_indices(X, y, groups)", + "docstring": "Generates boolean masks corresponding to test sets.\n\nBy default, delegates to _iter_test_indices(X, y, groups)", + "code": " def _iter_test_masks(self, X=None, y=None, groups=None):\n \"\"\"Generates boolean masks corresponding to test sets.\n\n By default, delegates to _iter_test_indices(X, y, groups)\n \"\"\"\n for test_index in self._iter_test_indices(X, y, groups):\n test_mask = np.zeros(_num_samples(X), dtype=bool)\n test_mask[test_index] = True\n yield test_mask" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/get_n_splits", + "name": "get_n_splits", + "qname": "sklearn.model_selection._split.BaseCrossValidator.get_n_splits", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/get_n_splits/self", + "name": "self", + "qname": "sklearn.model_selection._split.BaseCrossValidator.get_n_splits.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/get_n_splits/X", + "name": "X", + "qname": "sklearn.model_selection._split.BaseCrossValidator.get_n_splits.X", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/get_n_splits/y", + "name": "y", + "qname": "sklearn.model_selection._split.BaseCrossValidator.get_n_splits.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/get_n_splits/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.BaseCrossValidator.get_n_splits.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns the number of splitting iterations in the cross-validator", + "docstring": "Returns the number of splitting iterations in the cross-validator", + "code": " @abstractmethod\n def get_n_splits(self, X=None, y=None, groups=None):\n \"\"\"Returns the number of splitting iterations in the cross-validator\"\"\"" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/split", + "name": "split", + "qname": "sklearn.model_selection._split.BaseCrossValidator.split", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/split/self", + "name": "self", + "qname": "sklearn.model_selection._split.BaseCrossValidator.split.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/split/X", + "name": "X", + "qname": "sklearn.model_selection._split.BaseCrossValidator.split.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples is the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/split/y", + "name": "y", + "qname": "sklearn.model_selection._split.BaseCrossValidator.split.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The target variable for supervised learning problems." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseCrossValidator/split/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.BaseCrossValidator.split.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Group labels for the samples used while splitting the dataset into\ntrain/test set." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate indices to split data into training and test set.", + "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n The target variable for supervised learning problems.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split.", + "code": " def split(self, X, y=None, groups=None):\n \"\"\"Generate indices to split data into training and test set.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n The target variable for supervised learning problems.\n\n groups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n \"\"\"\n X, y, groups = indexable(X, y, groups)\n indices = np.arange(_num_samples(X))\n for test_index in self._iter_test_masks(X, y, groups):\n train_index = indices[np.logical_not(test_index)]\n test_index = indices[test_index]\n yield train_index, test_index" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._split.BaseShuffleSplit.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._split.BaseShuffleSplit.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/__init__/n_splits", + "name": "n_splits", + "qname": "sklearn.model_selection._split.BaseShuffleSplit.__init__.n_splits", + "default_value": "10", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/__init__/test_size", + "name": "test_size", + "qname": "sklearn.model_selection._split.BaseShuffleSplit.__init__.test_size", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/__init__/train_size", + "name": "train_size", + "qname": "sklearn.model_selection._split.BaseShuffleSplit.__init__.train_size", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/__init__/random_state", + "name": "random_state", + "qname": "sklearn.model_selection._split.BaseShuffleSplit.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base class for ShuffleSplit and StratifiedShuffleSplit", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_splits=10, *, test_size=None, train_size=None,\n random_state=None):\n self.n_splits = n_splits\n self.test_size = test_size\n self.train_size = train_size\n self.random_state = random_state\n self._default_test_size = 0.1" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/__repr__", + "name": "__repr__", + "qname": "sklearn.model_selection._split.BaseShuffleSplit.__repr__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/__repr__/self", + "name": "self", + "qname": "sklearn.model_selection._split.BaseShuffleSplit.__repr__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __repr__(self):\n return _build_repr(self)" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/_iter_indices", + "name": "_iter_indices", + "qname": "sklearn.model_selection._split.BaseShuffleSplit._iter_indices", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/_iter_indices/self", + "name": "self", + "qname": "sklearn.model_selection._split.BaseShuffleSplit._iter_indices.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/_iter_indices/X", + "name": "X", + "qname": "sklearn.model_selection._split.BaseShuffleSplit._iter_indices.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/_iter_indices/y", + "name": "y", + "qname": "sklearn.model_selection._split.BaseShuffleSplit._iter_indices.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/_iter_indices/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.BaseShuffleSplit._iter_indices.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate (train, test) indices", + "docstring": "Generate (train, test) indices", + "code": " @abstractmethod\n def _iter_indices(self, X, y=None, groups=None):\n \"\"\"Generate (train, test) indices\"\"\"" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/get_n_splits", + "name": "get_n_splits", + "qname": "sklearn.model_selection._split.BaseShuffleSplit.get_n_splits", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/get_n_splits/self", + "name": "self", + "qname": "sklearn.model_selection._split.BaseShuffleSplit.get_n_splits.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/get_n_splits/X", + "name": "X", + "qname": "sklearn.model_selection._split.BaseShuffleSplit.get_n_splits.X", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/get_n_splits/y", + "name": "y", + "qname": "sklearn.model_selection._split.BaseShuffleSplit.get_n_splits.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/get_n_splits/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.BaseShuffleSplit.get_n_splits.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns the number of splitting iterations in the cross-validator", + "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nReturns\n-------\nn_splits : int\n Returns the number of splitting iterations in the cross-validator.", + "code": " def get_n_splits(self, X=None, y=None, groups=None):\n \"\"\"Returns the number of splitting iterations in the cross-validator\n\n Parameters\n ----------\n X : object\n Always ignored, exists for compatibility.\n\n y : object\n Always ignored, exists for compatibility.\n\n groups : object\n Always ignored, exists for compatibility.\n\n Returns\n -------\n n_splits : int\n Returns the number of splitting iterations in the cross-validator.\n \"\"\"\n return self.n_splits" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/split", + "name": "split", + "qname": "sklearn.model_selection._split.BaseShuffleSplit.split", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/split/self", + "name": "self", + "qname": "sklearn.model_selection._split.BaseShuffleSplit.split.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/split/X", + "name": "X", + "qname": "sklearn.model_selection._split.BaseShuffleSplit.split.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples is the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/split/y", + "name": "y", + "qname": "sklearn.model_selection._split.BaseShuffleSplit.split.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The target variable for supervised learning problems." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/BaseShuffleSplit/split/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.BaseShuffleSplit.split.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Group labels for the samples used while splitting the dataset into\ntrain/test set." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate indices to split data into training and test set.", + "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n The target variable for supervised learning problems.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split.\n\nNotes\n-----\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer.", + "code": " def split(self, X, y=None, groups=None):\n \"\"\"Generate indices to split data into training and test set.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n The target variable for supervised learning problems.\n\n groups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n\n Notes\n -----\n Randomized CV splitters may return different results for each call of\n split. You can make the results identical by setting `random_state`\n to an integer.\n \"\"\"\n X, y, groups = indexable(X, y, groups)\n for train, test in self._iter_indices(X, y, groups):\n yield train, test" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupKFold/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._split.GroupKFold.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/GroupKFold/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._split.GroupKFold.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupKFold/__init__/n_splits", + "name": "n_splits", + "qname": "sklearn.model_selection._split.GroupKFold.__init__.n_splits", + "default_value": "5", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "Number of folds. Must be at least 2.\n\n.. versionchanged:: 0.22\n ``n_splits`` default value changed from 3 to 5." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "K-fold iterator variant with non-overlapping groups.\n\nThe same group will not appear in two different folds (the number of\ndistinct groups has to be at least equal to the number of folds).\n\nThe folds are approximately balanced in the sense that the number of\ndistinct groups is approximately the same in each fold.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " def __init__(self, n_splits=5):\n super().__init__(n_splits, shuffle=False, random_state=None)" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupKFold/_iter_test_indices", + "name": "_iter_test_indices", + "qname": "sklearn.model_selection._split.GroupKFold._iter_test_indices", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/GroupKFold/_iter_test_indices/self", + "name": "self", + "qname": "sklearn.model_selection._split.GroupKFold._iter_test_indices.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupKFold/_iter_test_indices/X", + "name": "X", + "qname": "sklearn.model_selection._split.GroupKFold._iter_test_indices.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupKFold/_iter_test_indices/y", + "name": "y", + "qname": "sklearn.model_selection._split.GroupKFold._iter_test_indices.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupKFold/_iter_test_indices/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.GroupKFold._iter_test_indices.groups", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _iter_test_indices(self, X, y, groups):\n if groups is None:\n raise ValueError(\"The 'groups' parameter should not be None.\")\n groups = check_array(groups, ensure_2d=False, dtype=None)\n\n unique_groups, groups = np.unique(groups, return_inverse=True)\n n_groups = len(unique_groups)\n\n if self.n_splits > n_groups:\n raise ValueError(\"Cannot have number of splits n_splits=%d greater\"\n \" than the number of groups: %d.\"\n % (self.n_splits, n_groups))\n\n # Weight groups by their number of occurrences\n n_samples_per_group = np.bincount(groups)\n\n # Distribute the most frequent groups first\n indices = np.argsort(n_samples_per_group)[::-1]\n n_samples_per_group = n_samples_per_group[indices]\n\n # Total weight of each fold\n n_samples_per_fold = np.zeros(self.n_splits)\n\n # Mapping from group index to fold index\n group_to_fold = np.zeros(len(unique_groups))\n\n # Distribute samples by adding the largest weight to the lightest fold\n for group_index, weight in enumerate(n_samples_per_group):\n lightest_fold = np.argmin(n_samples_per_fold)\n n_samples_per_fold[lightest_fold] += weight\n group_to_fold[indices[group_index]] = lightest_fold\n\n indices = group_to_fold[groups]\n\n for f in range(self.n_splits):\n yield np.where(indices == f)[0]" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupKFold/split", + "name": "split", + "qname": "sklearn.model_selection._split.GroupKFold.split", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/GroupKFold/split/self", + "name": "self", + "qname": "sklearn.model_selection._split.GroupKFold.split.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupKFold/split/X", + "name": "X", + "qname": "sklearn.model_selection._split.GroupKFold.split.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples is the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupKFold/split/y", + "name": "y", + "qname": "sklearn.model_selection._split.GroupKFold.split.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "The target variable for supervised learning problems." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupKFold/split/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.GroupKFold.split.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Group labels for the samples used while splitting the dataset into\ntrain/test set." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate indices to split data into training and test set.", + "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\ngroups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split.", + "code": " def split(self, X, y=None, groups=None):\n \"\"\"Generate indices to split data into training and test set.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\n groups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n \"\"\"\n return super().split(X, y, groups)" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupShuffleSplit/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._split.GroupShuffleSplit.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/GroupShuffleSplit/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._split.GroupShuffleSplit.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupShuffleSplit/__init__/n_splits", + "name": "n_splits", + "qname": "sklearn.model_selection._split.GroupShuffleSplit.__init__.n_splits", + "default_value": "5", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "Number of re-shuffling & splitting iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupShuffleSplit/__init__/test_size", + "name": "test_size", + "qname": "sklearn.model_selection._split.GroupShuffleSplit.__init__.test_size", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float, int", + "default_value": "0.2", + "description": "If float, should be between 0.0 and 1.0 and represent the proportion\nof groups to include in the test split (rounded up). If int,\nrepresents the absolute number of test groups. If None, the value is\nset to the complement of the train size.\nThe default will change in version 0.21. It will remain 0.2 only\nif ``train_size`` is unspecified, otherwise it will complement\nthe specified ``train_size``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupShuffleSplit/__init__/train_size", + "name": "train_size", + "qname": "sklearn.model_selection._split.GroupShuffleSplit.__init__.train_size", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float or int", + "default_value": "None", + "description": "If float, should be between 0.0 and 1.0 and represent the\nproportion of the groups to include in the train split. If\nint, represents the absolute number of train groups. If None,\nthe value is automatically set to the complement of the test size." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupShuffleSplit/__init__/random_state", + "name": "random_state", + "qname": "sklearn.model_selection._split.GroupShuffleSplit.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the randomness of the training and testing indices produced.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Shuffle-Group(s)-Out cross-validation iterator\n\nProvides randomized train/test indices to split data according to a\nthird-party provided group. This group information can be used to encode\narbitrary domain specific stratifications of the samples as integers.\n\nFor instance the groups could be the year of collection of the samples\nand thus allow for cross-validation against time-based splits.\n\nThe difference between LeavePGroupsOut and GroupShuffleSplit is that\nthe former generates splits using all subsets of size ``p`` unique groups,\nwhereas GroupShuffleSplit generates a user-determined number of random\ntest splits, each with a user-determined fraction of unique groups.\n\nFor example, a less computationally intensive alternative to\n``LeavePGroupsOut(p=10)`` would be\n``GroupShuffleSplit(test_size=10, n_splits=100)``.\n\nNote: The parameters ``test_size`` and ``train_size`` refer to groups, and\nnot to samples, as in ShuffleSplit.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_splits=5, *, test_size=None, train_size=None,\n random_state=None):\n super().__init__(\n n_splits=n_splits,\n test_size=test_size,\n train_size=train_size,\n random_state=random_state)\n self._default_test_size = 0.2" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupShuffleSplit/_iter_indices", + "name": "_iter_indices", + "qname": "sklearn.model_selection._split.GroupShuffleSplit._iter_indices", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/GroupShuffleSplit/_iter_indices/self", + "name": "self", + "qname": "sklearn.model_selection._split.GroupShuffleSplit._iter_indices.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupShuffleSplit/_iter_indices/X", + "name": "X", + "qname": "sklearn.model_selection._split.GroupShuffleSplit._iter_indices.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupShuffleSplit/_iter_indices/y", + "name": "y", + "qname": "sklearn.model_selection._split.GroupShuffleSplit._iter_indices.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupShuffleSplit/_iter_indices/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.GroupShuffleSplit._iter_indices.groups", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _iter_indices(self, X, y, groups):\n if groups is None:\n raise ValueError(\"The 'groups' parameter should not be None.\")\n groups = check_array(groups, ensure_2d=False, dtype=None)\n classes, group_indices = np.unique(groups, return_inverse=True)\n for group_train, group_test in super()._iter_indices(X=classes):\n # these are the indices of classes in the partition\n # invert them into data indices\n\n train = np.flatnonzero(np.in1d(group_indices, group_train))\n test = np.flatnonzero(np.in1d(group_indices, group_test))\n\n yield train, test" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupShuffleSplit/split", + "name": "split", + "qname": "sklearn.model_selection._split.GroupShuffleSplit.split", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/GroupShuffleSplit/split/self", + "name": "self", + "qname": "sklearn.model_selection._split.GroupShuffleSplit.split.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupShuffleSplit/split/X", + "name": "X", + "qname": "sklearn.model_selection._split.GroupShuffleSplit.split.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples is the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupShuffleSplit/split/y", + "name": "y", + "qname": "sklearn.model_selection._split.GroupShuffleSplit.split.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "The target variable for supervised learning problems." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/GroupShuffleSplit/split/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.GroupShuffleSplit.split.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Group labels for the samples used while splitting the dataset into\ntrain/test set." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate indices to split data into training and test set.", + "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\ngroups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split.\n\nNotes\n-----\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer.", + "code": " def split(self, X, y=None, groups=None):\n \"\"\"Generate indices to split data into training and test set.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\n groups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n\n Notes\n -----\n Randomized CV splitters may return different results for each call of\n split. You can make the results identical by setting `random_state`\n to an integer.\n \"\"\"\n return super().split(X, y, groups)" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/KFold/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._split.KFold.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/KFold/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._split.KFold.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/KFold/__init__/n_splits", + "name": "n_splits", + "qname": "sklearn.model_selection._split.KFold.__init__.n_splits", + "default_value": "5", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "Number of folds. Must be at least 2.\n\n.. versionchanged:: 0.22\n ``n_splits`` default value changed from 3 to 5." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/KFold/__init__/shuffle", + "name": "shuffle", + "qname": "sklearn.model_selection._split.KFold.__init__.shuffle", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to shuffle the data before splitting into batches.\nNote that the samples within each split will not be shuffled." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/KFold/__init__/random_state", + "name": "random_state", + "qname": "sklearn.model_selection._split.KFold.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "When `shuffle` is True, `random_state` affects the ordering of the\nindices, which controls the randomness of each fold. Otherwise, this\nparameter has no effect.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "K-Folds cross-validator\n\nProvides train/test indices to split data in train/test sets. Split\ndataset into k consecutive folds (without shuffling by default).\n\nEach fold is then used once as a validation while the k - 1 remaining\nfolds form the training set.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_splits=5, *, shuffle=False,\n random_state=None):\n super().__init__(n_splits=n_splits, shuffle=shuffle,\n random_state=random_state)" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/KFold/_iter_test_indices", + "name": "_iter_test_indices", + "qname": "sklearn.model_selection._split.KFold._iter_test_indices", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/KFold/_iter_test_indices/self", + "name": "self", + "qname": "sklearn.model_selection._split.KFold._iter_test_indices.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/KFold/_iter_test_indices/X", + "name": "X", + "qname": "sklearn.model_selection._split.KFold._iter_test_indices.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/KFold/_iter_test_indices/y", + "name": "y", + "qname": "sklearn.model_selection._split.KFold._iter_test_indices.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/KFold/_iter_test_indices/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.KFold._iter_test_indices.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _iter_test_indices(self, X, y=None, groups=None):\n n_samples = _num_samples(X)\n indices = np.arange(n_samples)\n if self.shuffle:\n check_random_state(self.random_state).shuffle(indices)\n\n n_splits = self.n_splits\n fold_sizes = np.full(n_splits, n_samples // n_splits, dtype=int)\n fold_sizes[:n_samples % n_splits] += 1\n current = 0\n for fold_size in fold_sizes:\n start, stop = current, current + fold_size\n yield indices[start:stop]\n current = stop" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneGroupOut/_iter_test_masks", + "name": "_iter_test_masks", + "qname": "sklearn.model_selection._split.LeaveOneGroupOut._iter_test_masks", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneGroupOut/_iter_test_masks/self", + "name": "self", + "qname": "sklearn.model_selection._split.LeaveOneGroupOut._iter_test_masks.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneGroupOut/_iter_test_masks/X", + "name": "X", + "qname": "sklearn.model_selection._split.LeaveOneGroupOut._iter_test_masks.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneGroupOut/_iter_test_masks/y", + "name": "y", + "qname": "sklearn.model_selection._split.LeaveOneGroupOut._iter_test_masks.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneGroupOut/_iter_test_masks/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.LeaveOneGroupOut._iter_test_masks.groups", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _iter_test_masks(self, X, y, groups):\n if groups is None:\n raise ValueError(\"The 'groups' parameter should not be None.\")\n # We make a copy of groups to avoid side-effects during iteration\n groups = check_array(groups, copy=True, ensure_2d=False, dtype=None)\n unique_groups = np.unique(groups)\n if len(unique_groups) <= 1:\n raise ValueError(\n \"The groups parameter contains fewer than 2 unique groups \"\n \"(%s). LeaveOneGroupOut expects at least 2.\" % unique_groups)\n for i in unique_groups:\n yield groups == i" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneGroupOut/get_n_splits", + "name": "get_n_splits", + "qname": "sklearn.model_selection._split.LeaveOneGroupOut.get_n_splits", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneGroupOut/get_n_splits/self", + "name": "self", + "qname": "sklearn.model_selection._split.LeaveOneGroupOut.get_n_splits.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneGroupOut/get_n_splits/X", + "name": "X", + "qname": "sklearn.model_selection._split.LeaveOneGroupOut.get_n_splits.X", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneGroupOut/get_n_splits/y", + "name": "y", + "qname": "sklearn.model_selection._split.LeaveOneGroupOut.get_n_splits.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneGroupOut/get_n_splits/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.LeaveOneGroupOut.get_n_splits.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Group labels for the samples used while splitting the dataset into\ntrain/test set. This 'groups' parameter must always be specified to\ncalculate the number of splits, though the other parameters can be\nomitted." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns the number of splitting iterations in the cross-validator", + "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set. This 'groups' parameter must always be specified to\n calculate the number of splits, though the other parameters can be\n omitted.\n\nReturns\n-------\nn_splits : int\n Returns the number of splitting iterations in the cross-validator.", + "code": " def get_n_splits(self, X=None, y=None, groups=None):\n \"\"\"Returns the number of splitting iterations in the cross-validator\n\n Parameters\n ----------\n X : object\n Always ignored, exists for compatibility.\n\n y : object\n Always ignored, exists for compatibility.\n\n groups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set. This 'groups' parameter must always be specified to\n calculate the number of splits, though the other parameters can be\n omitted.\n\n Returns\n -------\n n_splits : int\n Returns the number of splitting iterations in the cross-validator.\n \"\"\"\n if groups is None:\n raise ValueError(\"The 'groups' parameter should not be None.\")\n groups = check_array(groups, ensure_2d=False, dtype=None)\n return len(np.unique(groups))" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneGroupOut/split", + "name": "split", + "qname": "sklearn.model_selection._split.LeaveOneGroupOut.split", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneGroupOut/split/self", + "name": "self", + "qname": "sklearn.model_selection._split.LeaveOneGroupOut.split.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneGroupOut/split/X", + "name": "X", + "qname": "sklearn.model_selection._split.LeaveOneGroupOut.split.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples is the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneGroupOut/split/y", + "name": "y", + "qname": "sklearn.model_selection._split.LeaveOneGroupOut.split.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "The target variable for supervised learning problems." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneGroupOut/split/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.LeaveOneGroupOut.split.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Group labels for the samples used while splitting the dataset into\ntrain/test set." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate indices to split data into training and test set.", + "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\ngroups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split.", + "code": " def split(self, X, y=None, groups=None):\n \"\"\"Generate indices to split data into training and test set.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\n groups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n \"\"\"\n return super().split(X, y, groups)" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneOut/_iter_test_indices", + "name": "_iter_test_indices", + "qname": "sklearn.model_selection._split.LeaveOneOut._iter_test_indices", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneOut/_iter_test_indices/self", + "name": "self", + "qname": "sklearn.model_selection._split.LeaveOneOut._iter_test_indices.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneOut/_iter_test_indices/X", + "name": "X", + "qname": "sklearn.model_selection._split.LeaveOneOut._iter_test_indices.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneOut/_iter_test_indices/y", + "name": "y", + "qname": "sklearn.model_selection._split.LeaveOneOut._iter_test_indices.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneOut/_iter_test_indices/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.LeaveOneOut._iter_test_indices.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _iter_test_indices(self, X, y=None, groups=None):\n n_samples = _num_samples(X)\n if n_samples <= 1:\n raise ValueError(\n 'Cannot perform LeaveOneOut with n_samples={}.'.format(\n n_samples)\n )\n return range(n_samples)" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneOut/get_n_splits", + "name": "get_n_splits", + "qname": "sklearn.model_selection._split.LeaveOneOut.get_n_splits", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneOut/get_n_splits/self", + "name": "self", + "qname": "sklearn.model_selection._split.LeaveOneOut.get_n_splits.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneOut/get_n_splits/X", + "name": "X", + "qname": "sklearn.model_selection._split.LeaveOneOut.get_n_splits.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples is the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneOut/get_n_splits/y", + "name": "y", + "qname": "sklearn.model_selection._split.LeaveOneOut.get_n_splits.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeaveOneOut/get_n_splits/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.LeaveOneOut.get_n_splits.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns the number of splitting iterations in the cross-validator", + "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nReturns\n-------\nn_splits : int\n Returns the number of splitting iterations in the cross-validator.", + "code": " def get_n_splits(self, X, y=None, groups=None):\n \"\"\"Returns the number of splitting iterations in the cross-validator\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : object\n Always ignored, exists for compatibility.\n\n groups : object\n Always ignored, exists for compatibility.\n\n Returns\n -------\n n_splits : int\n Returns the number of splitting iterations in the cross-validator.\n \"\"\"\n if X is None:\n raise ValueError(\"The 'X' parameter should not be None.\")\n return _num_samples(X)" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePGroupsOut/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._split.LeavePGroupsOut.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePGroupsOut/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._split.LeavePGroupsOut.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePGroupsOut/__init__/n_groups", + "name": "n_groups", + "qname": "sklearn.model_selection._split.LeavePGroupsOut.__init__.n_groups", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of groups (``p``) to leave out in the test split." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Leave P Group(s) Out cross-validator\n\nProvides train/test indices to split data according to a third-party\nprovided group. This group information can be used to encode arbitrary\ndomain specific stratifications of the samples as integers.\n\nFor instance the groups could be the year of collection of the samples\nand thus allow for cross-validation against time-based splits.\n\nThe difference between LeavePGroupsOut and LeaveOneGroupOut is that\nthe former builds the test sets with all the samples assigned to\n``p`` different values of the groups while the latter uses samples\nall assigned the same groups.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " def __init__(self, n_groups):\n self.n_groups = n_groups" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePGroupsOut/_iter_test_masks", + "name": "_iter_test_masks", + "qname": "sklearn.model_selection._split.LeavePGroupsOut._iter_test_masks", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePGroupsOut/_iter_test_masks/self", + "name": "self", + "qname": "sklearn.model_selection._split.LeavePGroupsOut._iter_test_masks.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePGroupsOut/_iter_test_masks/X", + "name": "X", + "qname": "sklearn.model_selection._split.LeavePGroupsOut._iter_test_masks.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePGroupsOut/_iter_test_masks/y", + "name": "y", + "qname": "sklearn.model_selection._split.LeavePGroupsOut._iter_test_masks.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePGroupsOut/_iter_test_masks/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.LeavePGroupsOut._iter_test_masks.groups", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _iter_test_masks(self, X, y, groups):\n if groups is None:\n raise ValueError(\"The 'groups' parameter should not be None.\")\n groups = check_array(groups, copy=True, ensure_2d=False, dtype=None)\n unique_groups = np.unique(groups)\n if self.n_groups >= len(unique_groups):\n raise ValueError(\n \"The groups parameter contains fewer than (or equal to) \"\n \"n_groups (%d) numbers of unique groups (%s). LeavePGroupsOut \"\n \"expects that at least n_groups + 1 (%d) unique groups be \"\n \"present\" % (self.n_groups, unique_groups, self.n_groups + 1))\n combi = combinations(range(len(unique_groups)), self.n_groups)\n for indices in combi:\n test_index = np.zeros(_num_samples(X), dtype=bool)\n for l in unique_groups[np.array(indices)]:\n test_index[groups == l] = True\n yield test_index" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePGroupsOut/get_n_splits", + "name": "get_n_splits", + "qname": "sklearn.model_selection._split.LeavePGroupsOut.get_n_splits", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePGroupsOut/get_n_splits/self", + "name": "self", + "qname": "sklearn.model_selection._split.LeavePGroupsOut.get_n_splits.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePGroupsOut/get_n_splits/X", + "name": "X", + "qname": "sklearn.model_selection._split.LeavePGroupsOut.get_n_splits.X", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePGroupsOut/get_n_splits/y", + "name": "y", + "qname": "sklearn.model_selection._split.LeavePGroupsOut.get_n_splits.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePGroupsOut/get_n_splits/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.LeavePGroupsOut.get_n_splits.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Group labels for the samples used while splitting the dataset into\ntrain/test set. This 'groups' parameter must always be specified to\ncalculate the number of splits, though the other parameters can be\nomitted." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns the number of splitting iterations in the cross-validator", + "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set. This 'groups' parameter must always be specified to\n calculate the number of splits, though the other parameters can be\n omitted.\n\nReturns\n-------\nn_splits : int\n Returns the number of splitting iterations in the cross-validator.", + "code": " def get_n_splits(self, X=None, y=None, groups=None):\n \"\"\"Returns the number of splitting iterations in the cross-validator\n\n Parameters\n ----------\n X : object\n Always ignored, exists for compatibility.\n\n y : object\n Always ignored, exists for compatibility.\n\n groups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set. This 'groups' parameter must always be specified to\n calculate the number of splits, though the other parameters can be\n omitted.\n\n Returns\n -------\n n_splits : int\n Returns the number of splitting iterations in the cross-validator.\n \"\"\"\n if groups is None:\n raise ValueError(\"The 'groups' parameter should not be None.\")\n groups = check_array(groups, ensure_2d=False, dtype=None)\n return int(comb(len(np.unique(groups)), self.n_groups, exact=True))" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePGroupsOut/split", + "name": "split", + "qname": "sklearn.model_selection._split.LeavePGroupsOut.split", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePGroupsOut/split/self", + "name": "self", + "qname": "sklearn.model_selection._split.LeavePGroupsOut.split.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePGroupsOut/split/X", + "name": "X", + "qname": "sklearn.model_selection._split.LeavePGroupsOut.split.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples is the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePGroupsOut/split/y", + "name": "y", + "qname": "sklearn.model_selection._split.LeavePGroupsOut.split.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "The target variable for supervised learning problems." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePGroupsOut/split/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.LeavePGroupsOut.split.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Group labels for the samples used while splitting the dataset into\ntrain/test set." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate indices to split data into training and test set.", + "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\ngroups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split.", + "code": " def split(self, X, y=None, groups=None):\n \"\"\"Generate indices to split data into training and test set.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\n groups : array-like of shape (n_samples,)\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n \"\"\"\n return super().split(X, y, groups)" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePOut/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._split.LeavePOut.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePOut/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._split.LeavePOut.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePOut/__init__/p", + "name": "p", + "qname": "sklearn.model_selection._split.LeavePOut.__init__.p", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Size of the test sets. Must be strictly less than the number of\nsamples." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Leave-P-Out cross-validator\n\nProvides train/test indices to split data in train/test sets. This results\nin testing on all distinct samples of size p, while the remaining n - p\nsamples form the training set in each iteration.\n\nNote: ``LeavePOut(p)`` is NOT equivalent to\n``KFold(n_splits=n_samples // p)`` which creates non-overlapping test sets.\n\nDue to the high number of iterations which grows combinatorically with the\nnumber of samples this cross-validation method can be very costly. For\nlarge datasets one should favor :class:`KFold`, :class:`StratifiedKFold`\nor :class:`ShuffleSplit`.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " def __init__(self, p):\n self.p = p" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePOut/_iter_test_indices", + "name": "_iter_test_indices", + "qname": "sklearn.model_selection._split.LeavePOut._iter_test_indices", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePOut/_iter_test_indices/self", + "name": "self", + "qname": "sklearn.model_selection._split.LeavePOut._iter_test_indices.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePOut/_iter_test_indices/X", + "name": "X", + "qname": "sklearn.model_selection._split.LeavePOut._iter_test_indices.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePOut/_iter_test_indices/y", + "name": "y", + "qname": "sklearn.model_selection._split.LeavePOut._iter_test_indices.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePOut/_iter_test_indices/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.LeavePOut._iter_test_indices.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _iter_test_indices(self, X, y=None, groups=None):\n n_samples = _num_samples(X)\n if n_samples <= self.p:\n raise ValueError(\n 'p={} must be strictly less than the number of '\n 'samples={}'.format(self.p, n_samples)\n )\n for combination in combinations(range(n_samples), self.p):\n yield np.array(combination)" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePOut/get_n_splits", + "name": "get_n_splits", + "qname": "sklearn.model_selection._split.LeavePOut.get_n_splits", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePOut/get_n_splits/self", + "name": "self", + "qname": "sklearn.model_selection._split.LeavePOut.get_n_splits.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePOut/get_n_splits/X", + "name": "X", + "qname": "sklearn.model_selection._split.LeavePOut.get_n_splits.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples is the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePOut/get_n_splits/y", + "name": "y", + "qname": "sklearn.model_selection._split.LeavePOut.get_n_splits.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/LeavePOut/get_n_splits/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.LeavePOut.get_n_splits.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns the number of splitting iterations in the cross-validator", + "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : object\n Always ignored, exists for compatibility.", + "code": " def get_n_splits(self, X, y=None, groups=None):\n \"\"\"Returns the number of splitting iterations in the cross-validator\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : object\n Always ignored, exists for compatibility.\n\n groups : object\n Always ignored, exists for compatibility.\n \"\"\"\n if X is None:\n raise ValueError(\"The 'X' parameter should not be None.\")\n return int(comb(_num_samples(X), self.p, exact=True))" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/PredefinedSplit/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._split.PredefinedSplit.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/PredefinedSplit/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._split.PredefinedSplit.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/PredefinedSplit/__init__/test_fold", + "name": "test_fold", + "qname": "sklearn.model_selection._split.PredefinedSplit.__init__.test_fold", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The entry ``test_fold[i]`` represents the index of the test set that\nsample ``i`` belongs to. It is possible to exclude sample ``i`` from\nany test set (i.e. include sample ``i`` in every training set) by\nsetting ``test_fold[i]`` equal to -1." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predefined split cross-validator\n\nProvides train/test indices to split data into train/test sets using a\npredefined scheme specified by the user with the ``test_fold`` parameter.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.16", + "docstring": "", + "code": " def __init__(self, test_fold):\n self.test_fold = np.array(test_fold, dtype=int)\n self.test_fold = column_or_1d(self.test_fold)\n self.unique_folds = np.unique(self.test_fold)\n self.unique_folds = self.unique_folds[self.unique_folds != -1]" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/PredefinedSplit/_iter_test_masks", + "name": "_iter_test_masks", + "qname": "sklearn.model_selection._split.PredefinedSplit._iter_test_masks", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/PredefinedSplit/_iter_test_masks/self", + "name": "self", + "qname": "sklearn.model_selection._split.PredefinedSplit._iter_test_masks.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generates boolean masks corresponding to test sets.", + "docstring": "Generates boolean masks corresponding to test sets.", + "code": " def _iter_test_masks(self):\n \"\"\"Generates boolean masks corresponding to test sets.\"\"\"\n for f in self.unique_folds:\n test_index = np.where(self.test_fold == f)[0]\n test_mask = np.zeros(len(self.test_fold), dtype=bool)\n test_mask[test_index] = True\n yield test_mask" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/PredefinedSplit/get_n_splits", + "name": "get_n_splits", + "qname": "sklearn.model_selection._split.PredefinedSplit.get_n_splits", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/PredefinedSplit/get_n_splits/self", + "name": "self", + "qname": "sklearn.model_selection._split.PredefinedSplit.get_n_splits.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/PredefinedSplit/get_n_splits/X", + "name": "X", + "qname": "sklearn.model_selection._split.PredefinedSplit.get_n_splits.X", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/PredefinedSplit/get_n_splits/y", + "name": "y", + "qname": "sklearn.model_selection._split.PredefinedSplit.get_n_splits.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/PredefinedSplit/get_n_splits/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.PredefinedSplit.get_n_splits.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns the number of splitting iterations in the cross-validator", + "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nReturns\n-------\nn_splits : int\n Returns the number of splitting iterations in the cross-validator.", + "code": " def get_n_splits(self, X=None, y=None, groups=None):\n \"\"\"Returns the number of splitting iterations in the cross-validator\n\n Parameters\n ----------\n X : object\n Always ignored, exists for compatibility.\n\n y : object\n Always ignored, exists for compatibility.\n\n groups : object\n Always ignored, exists for compatibility.\n\n Returns\n -------\n n_splits : int\n Returns the number of splitting iterations in the cross-validator.\n \"\"\"\n return len(self.unique_folds)" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/PredefinedSplit/split", + "name": "split", + "qname": "sklearn.model_selection._split.PredefinedSplit.split", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/PredefinedSplit/split/self", + "name": "self", + "qname": "sklearn.model_selection._split.PredefinedSplit.split.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/PredefinedSplit/split/X", + "name": "X", + "qname": "sklearn.model_selection._split.PredefinedSplit.split.X", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/PredefinedSplit/split/y", + "name": "y", + "qname": "sklearn.model_selection._split.PredefinedSplit.split.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/PredefinedSplit/split/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.PredefinedSplit.split.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate indices to split data into training and test set.", + "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split.", + "code": " def split(self, X=None, y=None, groups=None):\n \"\"\"Generate indices to split data into training and test set.\n\n Parameters\n ----------\n X : object\n Always ignored, exists for compatibility.\n\n y : object\n Always ignored, exists for compatibility.\n\n groups : object\n Always ignored, exists for compatibility.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n \"\"\"\n ind = np.arange(len(self.test_fold))\n for test_index in self._iter_test_masks():\n train_index = ind[np.logical_not(test_index)]\n test_index = ind[test_index]\n yield train_index, test_index" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/RepeatedKFold/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._split.RepeatedKFold.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/RepeatedKFold/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._split.RepeatedKFold.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/RepeatedKFold/__init__/n_splits", + "name": "n_splits", + "qname": "sklearn.model_selection._split.RepeatedKFold.__init__.n_splits", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "Number of folds. Must be at least 2." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/RepeatedKFold/__init__/n_repeats", + "name": "n_repeats", + "qname": "sklearn.model_selection._split.RepeatedKFold.__init__.n_repeats", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Number of times cross-validator needs to be repeated." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/RepeatedKFold/__init__/random_state", + "name": "random_state", + "qname": "sklearn.model_selection._split.RepeatedKFold.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the randomness of each repeated cross-validation instance.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Repeated K-Fold cross validator.\n\nRepeats K-Fold n times with different randomization in each repetition.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, n_splits=5, n_repeats=10, random_state=None):\n super().__init__(\n KFold, n_repeats=n_repeats,\n random_state=random_state, n_splits=n_splits)" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/RepeatedStratifiedKFold/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._split.RepeatedStratifiedKFold.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/RepeatedStratifiedKFold/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._split.RepeatedStratifiedKFold.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/RepeatedStratifiedKFold/__init__/n_splits", + "name": "n_splits", + "qname": "sklearn.model_selection._split.RepeatedStratifiedKFold.__init__.n_splits", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "Number of folds. Must be at least 2." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/RepeatedStratifiedKFold/__init__/n_repeats", + "name": "n_repeats", + "qname": "sklearn.model_selection._split.RepeatedStratifiedKFold.__init__.n_repeats", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Number of times cross-validator needs to be repeated." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/RepeatedStratifiedKFold/__init__/random_state", + "name": "random_state", + "qname": "sklearn.model_selection._split.RepeatedStratifiedKFold.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the generation of the random states for each repetition.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Repeated Stratified K-Fold cross validator.\n\nRepeats Stratified K-Fold n times with different randomization in each\nrepetition.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, n_splits=5, n_repeats=10, random_state=None):\n super().__init__(\n StratifiedKFold, n_repeats=n_repeats, random_state=random_state,\n n_splits=n_splits)" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/ShuffleSplit/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._split.ShuffleSplit.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/ShuffleSplit/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._split.ShuffleSplit.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/ShuffleSplit/__init__/n_splits", + "name": "n_splits", + "qname": "sklearn.model_selection._split.ShuffleSplit.__init__.n_splits", + "default_value": "10", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Number of re-shuffling & splitting iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/ShuffleSplit/__init__/test_size", + "name": "test_size", + "qname": "sklearn.model_selection._split.ShuffleSplit.__init__.test_size", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float or int", + "default_value": "None", + "description": "If float, should be between 0.0 and 1.0 and represent the proportion\nof the dataset to include in the test split. If int, represents the\nabsolute number of test samples. If None, the value is set to the\ncomplement of the train size. If ``train_size`` is also None, it will\nbe set to 0.1." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/ShuffleSplit/__init__/train_size", + "name": "train_size", + "qname": "sklearn.model_selection._split.ShuffleSplit.__init__.train_size", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float or int", + "default_value": "None", + "description": "If float, should be between 0.0 and 1.0 and represent the\nproportion of the dataset to include in the train split. If\nint, represents the absolute number of train samples. If None,\nthe value is automatically set to the complement of the test size." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/ShuffleSplit/__init__/random_state", + "name": "random_state", + "qname": "sklearn.model_selection._split.ShuffleSplit.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the randomness of the training and testing indices produced.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Random permutation cross-validator\n\nYields indices to split data into training and test sets.\n\nNote: contrary to other cross-validation strategies, random splits\ndo not guarantee that all folds will be different, although this is\nstill very likely for sizeable datasets.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_splits=10, *, test_size=None, train_size=None,\n random_state=None):\n super().__init__(\n n_splits=n_splits,\n test_size=test_size,\n train_size=train_size,\n random_state=random_state)\n self._default_test_size = 0.1" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/ShuffleSplit/_iter_indices", + "name": "_iter_indices", + "qname": "sklearn.model_selection._split.ShuffleSplit._iter_indices", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/ShuffleSplit/_iter_indices/self", + "name": "self", + "qname": "sklearn.model_selection._split.ShuffleSplit._iter_indices.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/ShuffleSplit/_iter_indices/X", + "name": "X", + "qname": "sklearn.model_selection._split.ShuffleSplit._iter_indices.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/ShuffleSplit/_iter_indices/y", + "name": "y", + "qname": "sklearn.model_selection._split.ShuffleSplit._iter_indices.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/ShuffleSplit/_iter_indices/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.ShuffleSplit._iter_indices.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _iter_indices(self, X, y=None, groups=None):\n n_samples = _num_samples(X)\n n_train, n_test = _validate_shuffle_split(\n n_samples, self.test_size, self.train_size,\n default_test_size=self._default_test_size)\n\n rng = check_random_state(self.random_state)\n for i in range(self.n_splits):\n # random partition\n permutation = rng.permutation(n_samples)\n ind_test = permutation[:n_test]\n ind_train = permutation[n_test:(n_test + n_train)]\n yield ind_train, ind_test" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedKFold/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._split.StratifiedKFold.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedKFold/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._split.StratifiedKFold.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedKFold/__init__/n_splits", + "name": "n_splits", + "qname": "sklearn.model_selection._split.StratifiedKFold.__init__.n_splits", + "default_value": "5", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "Number of folds. Must be at least 2.\n\n.. versionchanged:: 0.22\n ``n_splits`` default value changed from 3 to 5." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedKFold/__init__/shuffle", + "name": "shuffle", + "qname": "sklearn.model_selection._split.StratifiedKFold.__init__.shuffle", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to shuffle each class's samples before splitting into batches.\nNote that the samples within each split will not be shuffled." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedKFold/__init__/random_state", + "name": "random_state", + "qname": "sklearn.model_selection._split.StratifiedKFold.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "When `shuffle` is True, `random_state` affects the ordering of the\nindices, which controls the randomness of each fold for each class.\nOtherwise, leave `random_state` as `None`.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Stratified K-Folds cross-validator.\n\nProvides train/test indices to split data in train/test sets.\n\nThis cross-validation object is a variation of KFold that returns\nstratified folds. The folds are made by preserving the percentage of\nsamples for each class.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_splits=5, *, shuffle=False, random_state=None):\n super().__init__(n_splits=n_splits, shuffle=shuffle,\n random_state=random_state)" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedKFold/_iter_test_masks", + "name": "_iter_test_masks", + "qname": "sklearn.model_selection._split.StratifiedKFold._iter_test_masks", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedKFold/_iter_test_masks/self", + "name": "self", + "qname": "sklearn.model_selection._split.StratifiedKFold._iter_test_masks.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedKFold/_iter_test_masks/X", + "name": "X", + "qname": "sklearn.model_selection._split.StratifiedKFold._iter_test_masks.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedKFold/_iter_test_masks/y", + "name": "y", + "qname": "sklearn.model_selection._split.StratifiedKFold._iter_test_masks.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedKFold/_iter_test_masks/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.StratifiedKFold._iter_test_masks.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _iter_test_masks(self, X, y=None, groups=None):\n test_folds = self._make_test_folds(X, y)\n for i in range(self.n_splits):\n yield test_folds == i" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedKFold/_make_test_folds", + "name": "_make_test_folds", + "qname": "sklearn.model_selection._split.StratifiedKFold._make_test_folds", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedKFold/_make_test_folds/self", + "name": "self", + "qname": "sklearn.model_selection._split.StratifiedKFold._make_test_folds.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedKFold/_make_test_folds/X", + "name": "X", + "qname": "sklearn.model_selection._split.StratifiedKFold._make_test_folds.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedKFold/_make_test_folds/y", + "name": "y", + "qname": "sklearn.model_selection._split.StratifiedKFold._make_test_folds.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _make_test_folds(self, X, y=None):\n rng = check_random_state(self.random_state)\n y = np.asarray(y)\n type_of_target_y = type_of_target(y)\n allowed_target_types = ('binary', 'multiclass')\n if type_of_target_y not in allowed_target_types:\n raise ValueError(\n 'Supported target types are: {}. Got {!r} instead.'.format(\n allowed_target_types, type_of_target_y))\n\n y = column_or_1d(y)\n\n _, y_idx, y_inv = np.unique(y, return_index=True, return_inverse=True)\n # y_inv encodes y according to lexicographic order. We invert y_idx to\n # map the classes so that they are encoded by order of appearance:\n # 0 represents the first label appearing in y, 1 the second, etc.\n _, class_perm = np.unique(y_idx, return_inverse=True)\n y_encoded = class_perm[y_inv]\n\n n_classes = len(y_idx)\n y_counts = np.bincount(y_encoded)\n min_groups = np.min(y_counts)\n if np.all(self.n_splits > y_counts):\n raise ValueError(\"n_splits=%d cannot be greater than the\"\n \" number of members in each class.\"\n % (self.n_splits))\n if self.n_splits > min_groups:\n warnings.warn((\"The least populated class in y has only %d\"\n \" members, which is less than n_splits=%d.\"\n % (min_groups, self.n_splits)), UserWarning)\n\n # Determine the optimal number of samples from each class in each fold,\n # using round robin over the sorted y. (This can be done direct from\n # counts, but that code is unreadable.)\n y_order = np.sort(y_encoded)\n allocation = np.asarray(\n [np.bincount(y_order[i::self.n_splits], minlength=n_classes)\n for i in range(self.n_splits)])\n\n # To maintain the data order dependencies as best as possible within\n # the stratification constraint, we assign samples from each class in\n # blocks (and then mess that up when shuffle=True).\n test_folds = np.empty(len(y), dtype='i')\n for k in range(n_classes):\n # since the kth column of allocation stores the number of samples\n # of class k in each test set, this generates blocks of fold\n # indices corresponding to the allocation for class k.\n folds_for_class = np.arange(self.n_splits).repeat(allocation[:, k])\n if self.shuffle:\n rng.shuffle(folds_for_class)\n test_folds[y_encoded == k] = folds_for_class\n return test_folds" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedKFold/split", + "name": "split", + "qname": "sklearn.model_selection._split.StratifiedKFold.split", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedKFold/split/self", + "name": "self", + "qname": "sklearn.model_selection._split.StratifiedKFold.split.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedKFold/split/X", + "name": "X", + "qname": "sklearn.model_selection._split.StratifiedKFold.split.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples is the number of samples\nand n_features is the number of features.\n\nNote that providing ``y`` is sufficient to generate the splits and\nhence ``np.zeros(n_samples)`` may be used as a placeholder for\n``X`` instead of actual training data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedKFold/split/y", + "name": "y", + "qname": "sklearn.model_selection._split.StratifiedKFold.split.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The target variable for supervised learning problems.\nStratification is done based on the y labels." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedKFold/split/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.StratifiedKFold.split.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate indices to split data into training and test set.", + "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n Note that providing ``y`` is sufficient to generate the splits and\n hence ``np.zeros(n_samples)`` may be used as a placeholder for\n ``X`` instead of actual training data.\n\ny : array-like of shape (n_samples,)\n The target variable for supervised learning problems.\n Stratification is done based on the y labels.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split.\n\nNotes\n-----\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer.", + "code": " def split(self, X, y, groups=None):\n \"\"\"Generate indices to split data into training and test set.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n Note that providing ``y`` is sufficient to generate the splits and\n hence ``np.zeros(n_samples)`` may be used as a placeholder for\n ``X`` instead of actual training data.\n\n y : array-like of shape (n_samples,)\n The target variable for supervised learning problems.\n Stratification is done based on the y labels.\n\n groups : object\n Always ignored, exists for compatibility.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n\n Notes\n -----\n Randomized CV splitters may return different results for each call of\n split. You can make the results identical by setting `random_state`\n to an integer.\n \"\"\"\n y = check_array(y, ensure_2d=False, dtype=None)\n return super().split(X, y, groups)" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedShuffleSplit/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._split.StratifiedShuffleSplit.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedShuffleSplit/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._split.StratifiedShuffleSplit.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedShuffleSplit/__init__/n_splits", + "name": "n_splits", + "qname": "sklearn.model_selection._split.StratifiedShuffleSplit.__init__.n_splits", + "default_value": "10", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Number of re-shuffling & splitting iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedShuffleSplit/__init__/test_size", + "name": "test_size", + "qname": "sklearn.model_selection._split.StratifiedShuffleSplit.__init__.test_size", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float or int", + "default_value": "None", + "description": "If float, should be between 0.0 and 1.0 and represent the proportion\nof the dataset to include in the test split. If int, represents the\nabsolute number of test samples. If None, the value is set to the\ncomplement of the train size. If ``train_size`` is also None, it will\nbe set to 0.1." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedShuffleSplit/__init__/train_size", + "name": "train_size", + "qname": "sklearn.model_selection._split.StratifiedShuffleSplit.__init__.train_size", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float or int", + "default_value": "None", + "description": "If float, should be between 0.0 and 1.0 and represent the\nproportion of the dataset to include in the train split. If\nint, represents the absolute number of train samples. If None,\nthe value is automatically set to the complement of the test size." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedShuffleSplit/__init__/random_state", + "name": "random_state", + "qname": "sklearn.model_selection._split.StratifiedShuffleSplit.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the randomness of the training and testing indices produced.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Stratified ShuffleSplit cross-validator\n\nProvides train/test indices to split data in train/test sets.\n\nThis cross-validation object is a merge of StratifiedKFold and\nShuffleSplit, which returns stratified randomized folds. The folds\nare made by preserving the percentage of samples for each class.\n\nNote: like the ShuffleSplit strategy, stratified random splits\ndo not guarantee that all folds will be different, although this is\nstill very likely for sizeable datasets.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_splits=10, *, test_size=None, train_size=None,\n random_state=None):\n super().__init__(\n n_splits=n_splits,\n test_size=test_size,\n train_size=train_size,\n random_state=random_state)\n self._default_test_size = 0.1" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedShuffleSplit/_iter_indices", + "name": "_iter_indices", + "qname": "sklearn.model_selection._split.StratifiedShuffleSplit._iter_indices", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedShuffleSplit/_iter_indices/self", + "name": "self", + "qname": "sklearn.model_selection._split.StratifiedShuffleSplit._iter_indices.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedShuffleSplit/_iter_indices/X", + "name": "X", + "qname": "sklearn.model_selection._split.StratifiedShuffleSplit._iter_indices.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedShuffleSplit/_iter_indices/y", + "name": "y", + "qname": "sklearn.model_selection._split.StratifiedShuffleSplit._iter_indices.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedShuffleSplit/_iter_indices/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.StratifiedShuffleSplit._iter_indices.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _iter_indices(self, X, y, groups=None):\n n_samples = _num_samples(X)\n y = check_array(y, ensure_2d=False, dtype=None)\n n_train, n_test = _validate_shuffle_split(\n n_samples, self.test_size, self.train_size,\n default_test_size=self._default_test_size)\n\n if y.ndim == 2:\n # for multi-label y, map each distinct row to a string repr\n # using join because str(row) uses an ellipsis if len(row) > 1000\n y = np.array([' '.join(row.astype('str')) for row in y])\n\n classes, y_indices = np.unique(y, return_inverse=True)\n n_classes = classes.shape[0]\n\n class_counts = np.bincount(y_indices)\n if np.min(class_counts) < 2:\n raise ValueError(\"The least populated class in y has only 1\"\n \" member, which is too few. The minimum\"\n \" number of groups for any class cannot\"\n \" be less than 2.\")\n\n if n_train < n_classes:\n raise ValueError('The train_size = %d should be greater or '\n 'equal to the number of classes = %d' %\n (n_train, n_classes))\n if n_test < n_classes:\n raise ValueError('The test_size = %d should be greater or '\n 'equal to the number of classes = %d' %\n (n_test, n_classes))\n\n # Find the sorted list of instances for each class:\n # (np.unique above performs a sort, so code is O(n logn) already)\n class_indices = np.split(np.argsort(y_indices, kind='mergesort'),\n np.cumsum(class_counts)[:-1])\n\n rng = check_random_state(self.random_state)\n\n for _ in range(self.n_splits):\n # if there are ties in the class-counts, we want\n # to make sure to break them anew in each iteration\n n_i = _approximate_mode(class_counts, n_train, rng)\n class_counts_remaining = class_counts - n_i\n t_i = _approximate_mode(class_counts_remaining, n_test, rng)\n\n train = []\n test = []\n\n for i in range(n_classes):\n permutation = rng.permutation(class_counts[i])\n perm_indices_class_i = class_indices[i].take(permutation,\n mode='clip')\n\n train.extend(perm_indices_class_i[:n_i[i]])\n test.extend(perm_indices_class_i[n_i[i]:n_i[i] + t_i[i]])\n\n train = rng.permutation(train)\n test = rng.permutation(test)\n\n yield train, test" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedShuffleSplit/split", + "name": "split", + "qname": "sklearn.model_selection._split.StratifiedShuffleSplit.split", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedShuffleSplit/split/self", + "name": "self", + "qname": "sklearn.model_selection._split.StratifiedShuffleSplit.split.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedShuffleSplit/split/X", + "name": "X", + "qname": "sklearn.model_selection._split.StratifiedShuffleSplit.split.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples is the number of samples\nand n_features is the number of features.\n\nNote that providing ``y`` is sufficient to generate the splits and\nhence ``np.zeros(n_samples)`` may be used as a placeholder for\n``X`` instead of actual training data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedShuffleSplit/split/y", + "name": "y", + "qname": "sklearn.model_selection._split.StratifiedShuffleSplit.split.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_labels)", + "default_value": "", + "description": "The target variable for supervised learning problems.\nStratification is done based on the y labels." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_labels)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/StratifiedShuffleSplit/split/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.StratifiedShuffleSplit.split.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate indices to split data into training and test set.", + "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n Note that providing ``y`` is sufficient to generate the splits and\n hence ``np.zeros(n_samples)`` may be used as a placeholder for\n ``X`` instead of actual training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_labels)\n The target variable for supervised learning problems.\n Stratification is done based on the y labels.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split.\n\nNotes\n-----\nRandomized CV splitters may return different results for each call of\nsplit. You can make the results identical by setting `random_state`\nto an integer.", + "code": " def split(self, X, y, groups=None):\n \"\"\"Generate indices to split data into training and test set.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n Note that providing ``y`` is sufficient to generate the splits and\n hence ``np.zeros(n_samples)`` may be used as a placeholder for\n ``X`` instead of actual training data.\n\n y : array-like of shape (n_samples,) or (n_samples, n_labels)\n The target variable for supervised learning problems.\n Stratification is done based on the y labels.\n\n groups : object\n Always ignored, exists for compatibility.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n\n Notes\n -----\n Randomized CV splitters may return different results for each call of\n split. You can make the results identical by setting `random_state`\n to an integer.\n \"\"\"\n y = check_array(y, ensure_2d=False, dtype=None)\n return super().split(X, y, groups)" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/TimeSeriesSplit/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._split.TimeSeriesSplit.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/TimeSeriesSplit/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._split.TimeSeriesSplit.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/TimeSeriesSplit/__init__/n_splits", + "name": "n_splits", + "qname": "sklearn.model_selection._split.TimeSeriesSplit.__init__.n_splits", + "default_value": "5", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "Number of splits. Must be at least 2.\n\n.. versionchanged:: 0.22\n ``n_splits`` default value changed from 3 to 5." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/TimeSeriesSplit/__init__/max_train_size", + "name": "max_train_size", + "qname": "sklearn.model_selection._split.TimeSeriesSplit.__init__.max_train_size", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Maximum size for a single training set." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/TimeSeriesSplit/__init__/test_size", + "name": "test_size", + "qname": "sklearn.model_selection._split.TimeSeriesSplit.__init__.test_size", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Used to limit the size of the test set. Defaults to\n``n_samples // (n_splits + 1)``, which is the maximum allowed value\nwith ``gap=0``.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/TimeSeriesSplit/__init__/gap", + "name": "gap", + "qname": "sklearn.model_selection._split.TimeSeriesSplit.__init__.gap", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Number of samples to exclude from the end of each train set before\nthe test set.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Time Series cross-validator\n\nProvides train/test indices to split time series data samples\nthat are observed at fixed time intervals, in train/test sets.\nIn each split, test indices must be higher than before, and thus shuffling\nin cross validator is inappropriate.\n\nThis cross-validation object is a variation of :class:`KFold`.\nIn the kth split, it returns first k folds as train set and the\n(k+1)th fold as test set.\n\nNote that unlike standard cross-validation methods, successive\ntraining sets are supersets of those that come before them.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self,\n n_splits=5,\n *,\n max_train_size=None,\n test_size=None,\n gap=0):\n super().__init__(n_splits, shuffle=False, random_state=None)\n self.max_train_size = max_train_size\n self.test_size = test_size\n self.gap = gap" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/TimeSeriesSplit/split", + "name": "split", + "qname": "sklearn.model_selection._split.TimeSeriesSplit.split", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/TimeSeriesSplit/split/self", + "name": "self", + "qname": "sklearn.model_selection._split.TimeSeriesSplit.split.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/TimeSeriesSplit/split/X", + "name": "X", + "qname": "sklearn.model_selection._split.TimeSeriesSplit.split.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples is the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/TimeSeriesSplit/split/y", + "name": "y", + "qname": "sklearn.model_selection._split.TimeSeriesSplit.split.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/TimeSeriesSplit/split/groups", + "name": "groups", + "qname": "sklearn.model_selection._split.TimeSeriesSplit.split.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate indices to split data into training and test set.", + "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Always ignored, exists for compatibility.\n\ngroups : array-like of shape (n_samples,)\n Always ignored, exists for compatibility.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split.", + "code": " def split(self, X, y=None, groups=None):\n \"\"\"Generate indices to split data into training and test set.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Always ignored, exists for compatibility.\n\n groups : array-like of shape (n_samples,)\n Always ignored, exists for compatibility.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n \"\"\"\n X, y, groups = indexable(X, y, groups)\n n_samples = _num_samples(X)\n n_splits = self.n_splits\n n_folds = n_splits + 1\n gap = self.gap\n test_size = self.test_size if self.test_size is not None \\\n else n_samples // n_folds\n\n # Make sure we have enough samples for the given split parameters\n if n_folds > n_samples:\n raise ValueError(\n (f\"Cannot have number of folds={n_folds} greater\"\n f\" than the number of samples={n_samples}.\"))\n if n_samples - gap - (test_size * n_splits) <= 0:\n raise ValueError(\n (f\"Too many splits={n_splits} for number of samples\"\n f\"={n_samples} with test_size={test_size} and gap={gap}.\"))\n\n indices = np.arange(n_samples)\n test_starts = range(n_samples - n_splits * test_size,\n n_samples, test_size)\n\n for test_start in test_starts:\n train_end = test_start - gap\n if self.max_train_size and self.max_train_size < train_end:\n yield (indices[train_end - self.max_train_size:train_end],\n indices[test_start:test_start + test_size])\n else:\n yield (indices[:train_end],\n indices[test_start:test_start + test_size])" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_BaseKFold/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._split._BaseKFold.__init__", + "decorators": ["abstractmethod", "_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/_BaseKFold/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._split._BaseKFold.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_BaseKFold/__init__/n_splits", + "name": "n_splits", + "qname": "sklearn.model_selection._split._BaseKFold.__init__.n_splits", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_BaseKFold/__init__/shuffle", + "name": "shuffle", + "qname": "sklearn.model_selection._split._BaseKFold.__init__.shuffle", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_BaseKFold/__init__/random_state", + "name": "random_state", + "qname": "sklearn.model_selection._split._BaseKFold.__init__.random_state", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base class for KFold, GroupKFold, and StratifiedKFold", + "docstring": "", + "code": " @abstractmethod\n @_deprecate_positional_args\n def __init__(self, n_splits, *, shuffle, random_state):\n if not isinstance(n_splits, numbers.Integral):\n raise ValueError('The number of folds must be of Integral type. '\n '%s of type %s was passed.'\n % (n_splits, type(n_splits)))\n n_splits = int(n_splits)\n\n if n_splits <= 1:\n raise ValueError(\n \"k-fold cross-validation requires at least one\"\n \" train/test split by setting n_splits=2 or more,\"\n \" got n_splits={0}.\".format(n_splits))\n\n if not isinstance(shuffle, bool):\n raise TypeError(\"shuffle must be True or False;\"\n \" got {0}\".format(shuffle))\n\n if not shuffle and random_state is not None: # None is the default\n raise ValueError(\n 'Setting a random_state has no effect since shuffle is '\n 'False. You should leave '\n 'random_state to its default (None), or set shuffle=True.',\n )\n\n self.n_splits = n_splits\n self.shuffle = shuffle\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_BaseKFold/get_n_splits", + "name": "get_n_splits", + "qname": "sklearn.model_selection._split._BaseKFold.get_n_splits", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/_BaseKFold/get_n_splits/self", + "name": "self", + "qname": "sklearn.model_selection._split._BaseKFold.get_n_splits.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_BaseKFold/get_n_splits/X", + "name": "X", + "qname": "sklearn.model_selection._split._BaseKFold.get_n_splits.X", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_BaseKFold/get_n_splits/y", + "name": "y", + "qname": "sklearn.model_selection._split._BaseKFold.get_n_splits.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_BaseKFold/get_n_splits/groups", + "name": "groups", + "qname": "sklearn.model_selection._split._BaseKFold.get_n_splits.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns the number of splitting iterations in the cross-validator", + "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nReturns\n-------\nn_splits : int\n Returns the number of splitting iterations in the cross-validator.", + "code": " def get_n_splits(self, X=None, y=None, groups=None):\n \"\"\"Returns the number of splitting iterations in the cross-validator\n\n Parameters\n ----------\n X : object\n Always ignored, exists for compatibility.\n\n y : object\n Always ignored, exists for compatibility.\n\n groups : object\n Always ignored, exists for compatibility.\n\n Returns\n -------\n n_splits : int\n Returns the number of splitting iterations in the cross-validator.\n \"\"\"\n return self.n_splits" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_BaseKFold/split", + "name": "split", + "qname": "sklearn.model_selection._split._BaseKFold.split", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/_BaseKFold/split/self", + "name": "self", + "qname": "sklearn.model_selection._split._BaseKFold.split.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_BaseKFold/split/X", + "name": "X", + "qname": "sklearn.model_selection._split._BaseKFold.split.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples is the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_BaseKFold/split/y", + "name": "y", + "qname": "sklearn.model_selection._split._BaseKFold.split.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "The target variable for supervised learning problems." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_BaseKFold/split/groups", + "name": "groups", + "qname": "sklearn.model_selection._split._BaseKFold.split.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Group labels for the samples used while splitting the dataset into\ntrain/test set." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate indices to split data into training and test set.", + "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split.", + "code": " def split(self, X, y=None, groups=None):\n \"\"\"Generate indices to split data into training and test set.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : array-like of shape (n_samples,), default=None\n The target variable for supervised learning problems.\n\n groups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n \"\"\"\n X, y, groups = indexable(X, y, groups)\n n_samples = _num_samples(X)\n if self.n_splits > n_samples:\n raise ValueError(\n (\"Cannot have number of splits n_splits={0} greater\"\n \" than the number of samples: n_samples={1}.\")\n .format(self.n_splits, n_samples))\n\n for train, test in super().split(X, y, groups):\n yield train, test" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_CVIterableWrapper/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._split._CVIterableWrapper.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/_CVIterableWrapper/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._split._CVIterableWrapper.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_CVIterableWrapper/__init__/cv", + "name": "cv", + "qname": "sklearn.model_selection._split._CVIterableWrapper.__init__.cv", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Wrapper class for old style cv objects and iterables.", + "docstring": "", + "code": " def __init__(self, cv):\n self.cv = list(cv)" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_CVIterableWrapper/get_n_splits", + "name": "get_n_splits", + "qname": "sklearn.model_selection._split._CVIterableWrapper.get_n_splits", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/_CVIterableWrapper/get_n_splits/self", + "name": "self", + "qname": "sklearn.model_selection._split._CVIterableWrapper.get_n_splits.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_CVIterableWrapper/get_n_splits/X", + "name": "X", + "qname": "sklearn.model_selection._split._CVIterableWrapper.get_n_splits.X", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_CVIterableWrapper/get_n_splits/y", + "name": "y", + "qname": "sklearn.model_selection._split._CVIterableWrapper.get_n_splits.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_CVIterableWrapper/get_n_splits/groups", + "name": "groups", + "qname": "sklearn.model_selection._split._CVIterableWrapper.get_n_splits.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns the number of splitting iterations in the cross-validator", + "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nReturns\n-------\nn_splits : int\n Returns the number of splitting iterations in the cross-validator.", + "code": " def get_n_splits(self, X=None, y=None, groups=None):\n \"\"\"Returns the number of splitting iterations in the cross-validator\n\n Parameters\n ----------\n X : object\n Always ignored, exists for compatibility.\n\n y : object\n Always ignored, exists for compatibility.\n\n groups : object\n Always ignored, exists for compatibility.\n\n Returns\n -------\n n_splits : int\n Returns the number of splitting iterations in the cross-validator.\n \"\"\"\n return len(self.cv)" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_CVIterableWrapper/split", + "name": "split", + "qname": "sklearn.model_selection._split._CVIterableWrapper.split", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/_CVIterableWrapper/split/self", + "name": "self", + "qname": "sklearn.model_selection._split._CVIterableWrapper.split.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_CVIterableWrapper/split/X", + "name": "X", + "qname": "sklearn.model_selection._split._CVIterableWrapper.split.X", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_CVIterableWrapper/split/y", + "name": "y", + "qname": "sklearn.model_selection._split._CVIterableWrapper.split.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_CVIterableWrapper/split/groups", + "name": "groups", + "qname": "sklearn.model_selection._split._CVIterableWrapper.split.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate indices to split data into training and test set.", + "docstring": "Generate indices to split data into training and test set.\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n\ny : object\n Always ignored, exists for compatibility.\n\ngroups : object\n Always ignored, exists for compatibility.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split.", + "code": " def split(self, X=None, y=None, groups=None):\n \"\"\"Generate indices to split data into training and test set.\n\n Parameters\n ----------\n X : object\n Always ignored, exists for compatibility.\n\n y : object\n Always ignored, exists for compatibility.\n\n groups : object\n Always ignored, exists for compatibility.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n \"\"\"\n for train, test in self.cv:\n yield train, test" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_RepeatedSplits/__init__", + "name": "__init__", + "qname": "sklearn.model_selection._split._RepeatedSplits.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/_RepeatedSplits/__init__/self", + "name": "self", + "qname": "sklearn.model_selection._split._RepeatedSplits.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_RepeatedSplits/__init__/cv", + "name": "cv", + "qname": "sklearn.model_selection._split._RepeatedSplits.__init__.cv", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "", + "description": "Cross-validator class." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_RepeatedSplits/__init__/n_repeats", + "name": "n_repeats", + "qname": "sklearn.model_selection._split._RepeatedSplits.__init__.n_repeats", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Number of times cross-validator needs to be repeated." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_RepeatedSplits/__init__/random_state", + "name": "random_state", + "qname": "sklearn.model_selection._split._RepeatedSplits.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Passes `random_state` to the arbitrary repeating cross validator.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_RepeatedSplits/__init__/cvargs", + "name": "cvargs", + "qname": "sklearn.model_selection._split._RepeatedSplits.__init__.cvargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "additional params", + "default_value": "", + "description": "Constructor parameters for cv. Must not contain random_state\nand shuffle." + }, + "type": { + "kind": "NamedType", + "name": "additional params" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Repeated splits for an arbitrary randomized CV splitter.\n\nRepeats splits for cross-validators n times with different randomization\nin each repetition.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, cv, *, n_repeats=10, random_state=None, **cvargs):\n if not isinstance(n_repeats, numbers.Integral):\n raise ValueError(\"Number of repetitions must be of Integral type.\")\n\n if n_repeats <= 0:\n raise ValueError(\"Number of repetitions must be greater than 0.\")\n\n if any(key in cvargs for key in ('random_state', 'shuffle')):\n raise ValueError(\n \"cvargs must not contain random_state or shuffle.\")\n\n self.cv = cv\n self.n_repeats = n_repeats\n self.random_state = random_state\n self.cvargs = cvargs" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_RepeatedSplits/__repr__", + "name": "__repr__", + "qname": "sklearn.model_selection._split._RepeatedSplits.__repr__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/_RepeatedSplits/__repr__/self", + "name": "self", + "qname": "sklearn.model_selection._split._RepeatedSplits.__repr__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __repr__(self):\n return _build_repr(self)" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_RepeatedSplits/get_n_splits", + "name": "get_n_splits", + "qname": "sklearn.model_selection._split._RepeatedSplits.get_n_splits", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/_RepeatedSplits/get_n_splits/self", + "name": "self", + "qname": "sklearn.model_selection._split._RepeatedSplits.get_n_splits.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_RepeatedSplits/get_n_splits/X", + "name": "X", + "qname": "sklearn.model_selection._split._RepeatedSplits.get_n_splits.X", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility.\n``np.zeros(n_samples)`` may be used as a placeholder." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_RepeatedSplits/get_n_splits/y", + "name": "y", + "qname": "sklearn.model_selection._split._RepeatedSplits.get_n_splits.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Always ignored, exists for compatibility.\n``np.zeros(n_samples)`` may be used as a placeholder." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_RepeatedSplits/get_n_splits/groups", + "name": "groups", + "qname": "sklearn.model_selection._split._RepeatedSplits.get_n_splits.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Group labels for the samples used while splitting the dataset into\ntrain/test set." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns the number of splitting iterations in the cross-validator", + "docstring": "Returns the number of splitting iterations in the cross-validator\n\nParameters\n----------\nX : object\n Always ignored, exists for compatibility.\n ``np.zeros(n_samples)`` may be used as a placeholder.\n\ny : object\n Always ignored, exists for compatibility.\n ``np.zeros(n_samples)`` may be used as a placeholder.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nReturns\n-------\nn_splits : int\n Returns the number of splitting iterations in the cross-validator.", + "code": " def get_n_splits(self, X=None, y=None, groups=None):\n \"\"\"Returns the number of splitting iterations in the cross-validator\n\n Parameters\n ----------\n X : object\n Always ignored, exists for compatibility.\n ``np.zeros(n_samples)`` may be used as a placeholder.\n\n y : object\n Always ignored, exists for compatibility.\n ``np.zeros(n_samples)`` may be used as a placeholder.\n\n groups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\n Returns\n -------\n n_splits : int\n Returns the number of splitting iterations in the cross-validator.\n \"\"\"\n rng = check_random_state(self.random_state)\n cv = self.cv(random_state=rng, shuffle=True,\n **self.cvargs)\n return cv.get_n_splits(X, y, groups) * self.n_repeats" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_RepeatedSplits/split", + "name": "split", + "qname": "sklearn.model_selection._split._RepeatedSplits.split", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/_RepeatedSplits/split/self", + "name": "self", + "qname": "sklearn.model_selection._split._RepeatedSplits.split.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_RepeatedSplits/split/X", + "name": "X", + "qname": "sklearn.model_selection._split._RepeatedSplits.split.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data, where n_samples is the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_RepeatedSplits/split/y", + "name": "y", + "qname": "sklearn.model_selection._split._RepeatedSplits.split.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The target variable for supervised learning problems." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_RepeatedSplits/split/groups", + "name": "groups", + "qname": "sklearn.model_selection._split._RepeatedSplits.split.groups", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Group labels for the samples used while splitting the dataset into\ntrain/test set." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generates indices to split data into training and test set.", + "docstring": "Generates indices to split data into training and test set.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n The target variable for supervised learning problems.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\nYields\n------\ntrain : ndarray\n The training set indices for that split.\n\ntest : ndarray\n The testing set indices for that split.", + "code": " def split(self, X, y=None, groups=None):\n \"\"\"Generates indices to split data into training and test set.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n The target variable for supervised learning problems.\n\n groups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set.\n\n Yields\n ------\n train : ndarray\n The training set indices for that split.\n\n test : ndarray\n The testing set indices for that split.\n \"\"\"\n n_repeats = self.n_repeats\n rng = check_random_state(self.random_state)\n\n for idx in range(n_repeats):\n cv = self.cv(random_state=rng, shuffle=True,\n **self.cvargs)\n for train_index, test_index in cv.split(X, y, groups):\n yield train_index, test_index" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_build_repr", + "name": "_build_repr", + "qname": "sklearn.model_selection._split._build_repr", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/_build_repr/self", + "name": "self", + "qname": "sklearn.model_selection._split._build_repr.self", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _build_repr(self):\n # XXX This is copied from BaseEstimator's get_params\n cls = self.__class__\n init = getattr(cls.__init__, 'deprecated_original', cls.__init__)\n # Ignore varargs, kw and default values and pop self\n init_signature = signature(init)\n # Consider the constructor parameters excluding 'self'\n if init is object.__init__:\n args = []\n else:\n args = sorted([p.name for p in init_signature.parameters.values()\n if p.name != 'self' and p.kind != p.VAR_KEYWORD])\n class_name = self.__class__.__name__\n params = dict()\n for key in args:\n # We need deprecation warnings to always be on in order to\n # catch deprecated param values.\n # This is set in utils/__init__.py but it gets overwritten\n # when running under python3 somehow.\n warnings.simplefilter(\"always\", FutureWarning)\n try:\n with warnings.catch_warnings(record=True) as w:\n value = getattr(self, key, None)\n if value is None and hasattr(self, 'cvargs'):\n value = self.cvargs.get(key, None)\n if len(w) and w[0].category == FutureWarning:\n # if the parameter is deprecated, don't show it\n continue\n finally:\n warnings.filters.pop(0)\n params[key] = value\n\n return '%s(%s)' % (class_name, _pprint(params, offset=len(class_name)))" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_validate_shuffle_split", + "name": "_validate_shuffle_split", + "qname": "sklearn.model_selection._split._validate_shuffle_split", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/_validate_shuffle_split/n_samples", + "name": "n_samples", + "qname": "sklearn.model_selection._split._validate_shuffle_split.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_validate_shuffle_split/test_size", + "name": "test_size", + "qname": "sklearn.model_selection._split._validate_shuffle_split.test_size", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_validate_shuffle_split/train_size", + "name": "train_size", + "qname": "sklearn.model_selection._split._validate_shuffle_split.train_size", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_validate_shuffle_split/default_test_size", + "name": "default_test_size", + "qname": "sklearn.model_selection._split._validate_shuffle_split.default_test_size", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Validation helper to check if the test/test sizes are meaningful wrt to the\nsize of the data (n_samples)", + "docstring": "Validation helper to check if the test/test sizes are meaningful wrt to the\nsize of the data (n_samples)", + "code": "def _validate_shuffle_split(n_samples, test_size, train_size,\n default_test_size=None):\n \"\"\"\n Validation helper to check if the test/test sizes are meaningful wrt to the\n size of the data (n_samples)\n \"\"\"\n if test_size is None and train_size is None:\n test_size = default_test_size\n\n test_size_type = np.asarray(test_size).dtype.kind\n train_size_type = np.asarray(train_size).dtype.kind\n\n if (test_size_type == 'i' and (test_size >= n_samples or test_size <= 0)\n or test_size_type == 'f' and (test_size <= 0 or test_size >= 1)):\n raise ValueError('test_size={0} should be either positive and smaller'\n ' than the number of samples {1} or a float in the '\n '(0, 1) range'.format(test_size, n_samples))\n\n if (train_size_type == 'i' and (train_size >= n_samples or train_size <= 0)\n or train_size_type == 'f' and (train_size <= 0 or train_size >= 1)):\n raise ValueError('train_size={0} should be either positive and smaller'\n ' than the number of samples {1} or a float in the '\n '(0, 1) range'.format(train_size, n_samples))\n\n if train_size is not None and train_size_type not in ('i', 'f'):\n raise ValueError(\"Invalid value for train_size: {}\".format(train_size))\n if test_size is not None and test_size_type not in ('i', 'f'):\n raise ValueError(\"Invalid value for test_size: {}\".format(test_size))\n\n if (train_size_type == 'f' and test_size_type == 'f' and\n train_size + test_size > 1):\n raise ValueError(\n 'The sum of test_size and train_size = {}, should be in the (0, 1)'\n ' range. Reduce test_size and/or train_size.'\n .format(train_size + test_size))\n\n if test_size_type == 'f':\n n_test = ceil(test_size * n_samples)\n elif test_size_type == 'i':\n n_test = float(test_size)\n\n if train_size_type == 'f':\n n_train = floor(train_size * n_samples)\n elif train_size_type == 'i':\n n_train = float(train_size)\n\n if train_size is None:\n n_train = n_samples - n_test\n elif test_size is None:\n n_test = n_samples - n_train\n\n if n_train + n_test > n_samples:\n raise ValueError('The sum of train_size and test_size = %d, '\n 'should be smaller than the number of '\n 'samples %d. Reduce test_size and/or '\n 'train_size.' % (n_train + n_test, n_samples))\n\n n_train, n_test = int(n_train), int(n_test)\n\n if n_train == 0:\n raise ValueError(\n 'With n_samples={}, test_size={} and train_size={}, the '\n 'resulting train set will be empty. Adjust any of the '\n 'aforementioned parameters.'.format(n_samples, test_size,\n train_size)\n )\n\n return n_train, n_test" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/_yields_constant_splits", + "name": "_yields_constant_splits", + "qname": "sklearn.model_selection._split._yields_constant_splits", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/_yields_constant_splits/cv", + "name": "cv", + "qname": "sklearn.model_selection._split._yields_constant_splits.cv", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _yields_constant_splits(cv):\n # Return True if calling cv.split() always returns the same splits\n # We assume that if a cv doesn't have a shuffle parameter, it shuffles by\n # default (e.g. ShuffleSplit). If it actually doesn't shuffle (e.g.\n # LeaveOneOut), then it won't have a random_state parameter anyway, in\n # which case it will default to 0, leading to output=True\n shuffle = getattr(cv, 'shuffle', True)\n random_state = getattr(cv, 'random_state', 0)\n return isinstance(random_state, numbers.Integral) or not shuffle" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/check_cv", + "name": "check_cv", + "qname": "sklearn.model_selection._split.check_cv", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/check_cv/cv", + "name": "cv", + "qname": "sklearn.model_selection._split.check_cv.cv", + "default_value": "5", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, cross-validation generator or an iterable", + "default_value": "None", + "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n- None, to use the default 5-fold cross validation,\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if classifier is True and ``y`` is either\nbinary or multiclass, :class:`StratifiedKFold` is used. In all other\ncases, :class:`KFold` is used.\n\nRefer :ref:`User Guide ` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n ``cv`` default value changed from 3-fold to 5-fold." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "an iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/check_cv/y", + "name": "y", + "qname": "sklearn.model_selection._split.check_cv.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "None", + "description": "The target variable for supervised learning problems." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/check_cv/classifier", + "name": "classifier", + "qname": "sklearn.model_selection._split.check_cv.classifier", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether the task is a classification task, in which case\nstratified KFold will be used." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Input checker utility for building a cross-validator", + "docstring": "Input checker utility for building a cross-validator\n\nParameters\n----------\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n - None, to use the default 5-fold cross validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if classifier is True and ``y`` is either\n binary or multiclass, :class:`StratifiedKFold` is used. In all other\n cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value changed from 3-fold to 5-fold.\n\ny : array-like, default=None\n The target variable for supervised learning problems.\n\nclassifier : bool, default=False\n Whether the task is a classification task, in which case\n stratified KFold will be used.\n\nReturns\n-------\nchecked_cv : a cross-validator instance.\n The return value is a cross-validator which generates the train/test\n splits via the ``split`` method.", + "code": "@_deprecate_positional_args\ndef check_cv(cv=5, y=None, *, classifier=False):\n \"\"\"Input checker utility for building a cross-validator\n\n Parameters\n ----------\n cv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n - None, to use the default 5-fold cross validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if classifier is True and ``y`` is either\n binary or multiclass, :class:`StratifiedKFold` is used. In all other\n cases, :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value changed from 3-fold to 5-fold.\n\n y : array-like, default=None\n The target variable for supervised learning problems.\n\n classifier : bool, default=False\n Whether the task is a classification task, in which case\n stratified KFold will be used.\n\n Returns\n -------\n checked_cv : a cross-validator instance.\n The return value is a cross-validator which generates the train/test\n splits via the ``split`` method.\n \"\"\"\n cv = 5 if cv is None else cv\n if isinstance(cv, numbers.Integral):\n if (classifier and (y is not None) and\n (type_of_target(y) in ('binary', 'multiclass'))):\n return StratifiedKFold(cv)\n else:\n return KFold(cv)\n\n if not hasattr(cv, 'split') or isinstance(cv, str):\n if not isinstance(cv, Iterable) or isinstance(cv, str):\n raise ValueError(\"Expected cv as an integer, cross-validation \"\n \"object (from sklearn.model_selection) \"\n \"or an iterable. Got %s.\" % cv)\n return _CVIterableWrapper(cv)\n\n return cv # New style cv objects are passed without any modification" + }, + { + "id": "scikit-learn/sklearn.model_selection._split/train_test_split", + "name": "train_test_split", + "qname": "sklearn.model_selection._split.train_test_split", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._split/train_test_split/arrays", + "name": "arrays", + "qname": "sklearn.model_selection._split.train_test_split.arrays", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "sequence of indexables with same length / shape[0]", + "default_value": "", + "description": "Allowed inputs are lists, numpy arrays, scipy-sparse\nmatrices or pandas dataframes." + }, + "type": { + "kind": "NamedType", + "name": "sequence of indexables with same length / shape[0]" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/train_test_split/test_size", + "name": "test_size", + "qname": "sklearn.model_selection._split.train_test_split.test_size", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float or int", + "default_value": "None", + "description": "If float, should be between 0.0 and 1.0 and represent the proportion\nof the dataset to include in the test split. If int, represents the\nabsolute number of test samples. If None, the value is set to the\ncomplement of the train size. If ``train_size`` is also None, it will\nbe set to 0.25." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/train_test_split/train_size", + "name": "train_size", + "qname": "sklearn.model_selection._split.train_test_split.train_size", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float or int", + "default_value": "None", + "description": "If float, should be between 0.0 and 1.0 and represent the\nproportion of the dataset to include in the train split. If\nint, represents the absolute number of train samples. If None,\nthe value is automatically set to the complement of the test size." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/train_test_split/random_state", + "name": "random_state", + "qname": "sklearn.model_selection._split.train_test_split.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the shuffling applied to the data before applying the split.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/train_test_split/shuffle", + "name": "shuffle", + "qname": "sklearn.model_selection._split.train_test_split.shuffle", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether or not to shuffle the data before splitting. If shuffle=False\nthen stratify must be None." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._split/train_test_split/stratify", + "name": "stratify", + "qname": "sklearn.model_selection._split.train_test_split.stratify", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "None", + "description": "If not None, data is split in a stratified fashion, using this as\nthe class labels.\nRead more in the :ref:`User Guide `." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Split arrays or matrices into random train and test subsets\n\nQuick utility that wraps input validation and\n``next(ShuffleSplit().split(X, y))`` and application to input data\ninto a single call for splitting (and optionally subsampling) data in a\noneliner.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Split arrays or matrices into random train and test subsets\n\nQuick utility that wraps input validation and\n``next(ShuffleSplit().split(X, y))`` and application to input data\ninto a single call for splitting (and optionally subsampling) data in a\noneliner.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n*arrays : sequence of indexables with same length / shape[0]\n Allowed inputs are lists, numpy arrays, scipy-sparse\n matrices or pandas dataframes.\n\ntest_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the proportion\n of the dataset to include in the test split. If int, represents the\n absolute number of test samples. If None, the value is set to the\n complement of the train size. If ``train_size`` is also None, it will\n be set to 0.25.\n\ntrain_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the\n proportion of the dataset to include in the train split. If\n int, represents the absolute number of train samples. If None,\n the value is automatically set to the complement of the test size.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the shuffling applied to the data before applying the split.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n\nshuffle : bool, default=True\n Whether or not to shuffle the data before splitting. If shuffle=False\n then stratify must be None.\n\nstratify : array-like, default=None\n If not None, data is split in a stratified fashion, using this as\n the class labels.\n Read more in the :ref:`User Guide `.\n\nReturns\n-------\nsplitting : list, length=2 * len(arrays)\n List containing train-test split of inputs.\n\n .. versionadded:: 0.16\n If the input is sparse, the output will be a\n ``scipy.sparse.csr_matrix``. Else, output type is the same as the\n input type.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = np.arange(10).reshape((5, 2)), range(5)\n>>> X\narray([[0, 1],\n [2, 3],\n [4, 5],\n [6, 7],\n [8, 9]])\n>>> list(y)\n[0, 1, 2, 3, 4]\n\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, test_size=0.33, random_state=42)\n...\n>>> X_train\narray([[4, 5],\n [0, 1],\n [6, 7]])\n>>> y_train\n[2, 0, 3]\n>>> X_test\narray([[2, 3],\n [8, 9]])\n>>> y_test\n[1, 4]\n\n>>> train_test_split(y, shuffle=False)\n[[0, 1, 2], [3, 4]]", + "code": "def train_test_split(*arrays,\n test_size=None,\n train_size=None,\n random_state=None,\n shuffle=True,\n stratify=None):\n \"\"\"Split arrays or matrices into random train and test subsets\n\n Quick utility that wraps input validation and\n ``next(ShuffleSplit().split(X, y))`` and application to input data\n into a single call for splitting (and optionally subsampling) data in a\n oneliner.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n *arrays : sequence of indexables with same length / shape[0]\n Allowed inputs are lists, numpy arrays, scipy-sparse\n matrices or pandas dataframes.\n\n test_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the proportion\n of the dataset to include in the test split. If int, represents the\n absolute number of test samples. If None, the value is set to the\n complement of the train size. If ``train_size`` is also None, it will\n be set to 0.25.\n\n train_size : float or int, default=None\n If float, should be between 0.0 and 1.0 and represent the\n proportion of the dataset to include in the train split. If\n int, represents the absolute number of train samples. If None,\n the value is automatically set to the complement of the test size.\n\n random_state : int, RandomState instance or None, default=None\n Controls the shuffling applied to the data before applying the split.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n\n shuffle : bool, default=True\n Whether or not to shuffle the data before splitting. If shuffle=False\n then stratify must be None.\n\n stratify : array-like, default=None\n If not None, data is split in a stratified fashion, using this as\n the class labels.\n Read more in the :ref:`User Guide `.\n\n Returns\n -------\n splitting : list, length=2 * len(arrays)\n List containing train-test split of inputs.\n\n .. versionadded:: 0.16\n If the input is sparse, the output will be a\n ``scipy.sparse.csr_matrix``. Else, output type is the same as the\n input type.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.model_selection import train_test_split\n >>> X, y = np.arange(10).reshape((5, 2)), range(5)\n >>> X\n array([[0, 1],\n [2, 3],\n [4, 5],\n [6, 7],\n [8, 9]])\n >>> list(y)\n [0, 1, 2, 3, 4]\n\n >>> X_train, X_test, y_train, y_test = train_test_split(\n ... X, y, test_size=0.33, random_state=42)\n ...\n >>> X_train\n array([[4, 5],\n [0, 1],\n [6, 7]])\n >>> y_train\n [2, 0, 3]\n >>> X_test\n array([[2, 3],\n [8, 9]])\n >>> y_test\n [1, 4]\n\n >>> train_test_split(y, shuffle=False)\n [[0, 1, 2], [3, 4]]\n\n \"\"\"\n n_arrays = len(arrays)\n if n_arrays == 0:\n raise ValueError(\"At least one array required as input\")\n\n arrays = indexable(*arrays)\n\n n_samples = _num_samples(arrays[0])\n n_train, n_test = _validate_shuffle_split(n_samples, test_size, train_size,\n default_test_size=0.25)\n\n if shuffle is False:\n if stratify is not None:\n raise ValueError(\n \"Stratified train/test split is not implemented for \"\n \"shuffle=False\")\n\n train = np.arange(n_train)\n test = np.arange(n_train, n_train + n_test)\n\n else:\n if stratify is not None:\n CVClass = StratifiedShuffleSplit\n else:\n CVClass = ShuffleSplit\n\n cv = CVClass(test_size=n_test,\n train_size=n_train,\n random_state=random_state)\n\n train, test = next(cv.split(X=arrays[0], y=stratify))\n\n return list(chain.from_iterable((_safe_indexing(a, train),\n _safe_indexing(a, test)) for a in arrays))" + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_aggregate_score_dicts", + "name": "_aggregate_score_dicts", + "qname": "sklearn.model_selection._validation._aggregate_score_dicts", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._validation/_aggregate_score_dicts/scores", + "name": "scores", + "qname": "sklearn.model_selection._validation._aggregate_score_dicts.scores", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list of dict", + "default_value": "", + "description": "List of dicts of the scores for all scorers. This is a flat list,\nassumed originally to be of row major order." + }, + "type": { + "kind": "NamedType", + "name": "list of dict" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Aggregate the list of dict to dict of np ndarray\n\nThe aggregated output of _aggregate_score_dicts will be a list of dict\nof form [{'prec': 0.1, 'acc':1.0}, {'prec': 0.1, 'acc':1.0}, ...]\nConvert it to a dict of array {'prec': np.array([0.1 ...]), ...}", + "docstring": "Aggregate the list of dict to dict of np ndarray\n\nThe aggregated output of _aggregate_score_dicts will be a list of dict\nof form [{'prec': 0.1, 'acc':1.0}, {'prec': 0.1, 'acc':1.0}, ...]\nConvert it to a dict of array {'prec': np.array([0.1 ...]), ...}\n\nParameters\n----------\n\nscores : list of dict\n List of dicts of the scores for all scorers. This is a flat list,\n assumed originally to be of row major order.\n\nExample\n-------\n\n>>> scores = [{'a': 1, 'b':10}, {'a': 2, 'b':2}, {'a': 3, 'b':3},\n... {'a': 10, 'b': 10}] # doctest: +SKIP\n>>> _aggregate_score_dicts(scores) # doctest: +SKIP\n{'a': array([1, 2, 3, 10]),\n 'b': array([10, 2, 3, 10])}", + "code": "def _aggregate_score_dicts(scores):\n \"\"\"Aggregate the list of dict to dict of np ndarray\n\n The aggregated output of _aggregate_score_dicts will be a list of dict\n of form [{'prec': 0.1, 'acc':1.0}, {'prec': 0.1, 'acc':1.0}, ...]\n Convert it to a dict of array {'prec': np.array([0.1 ...]), ...}\n\n Parameters\n ----------\n\n scores : list of dict\n List of dicts of the scores for all scorers. This is a flat list,\n assumed originally to be of row major order.\n\n Example\n -------\n\n >>> scores = [{'a': 1, 'b':10}, {'a': 2, 'b':2}, {'a': 3, 'b':3},\n ... {'a': 10, 'b': 10}] # doctest: +SKIP\n >>> _aggregate_score_dicts(scores) # doctest: +SKIP\n {'a': array([1, 2, 3, 10]),\n 'b': array([10, 2, 3, 10])}\n \"\"\"\n return {\n key: np.asarray([score[key] for score in scores])\n if isinstance(scores[0][key], numbers.Number)\n else [score[key] for score in scores]\n for key in scores[0]\n }" + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_check_is_permutation", + "name": "_check_is_permutation", + "qname": "sklearn.model_selection._validation._check_is_permutation", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._validation/_check_is_permutation/indices", + "name": "indices", + "qname": "sklearn.model_selection._validation._check_is_permutation.indices", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "int array to test" + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_check_is_permutation/n_samples", + "name": "n_samples", + "qname": "sklearn.model_selection._validation._check_is_permutation.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "number of expected elements" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check whether indices is a reordering of the array np.arange(n_samples)", + "docstring": "Check whether indices is a reordering of the array np.arange(n_samples)\n\nParameters\n----------\nindices : ndarray\n int array to test\nn_samples : int\n number of expected elements\n\nReturns\n-------\nis_partition : bool\n True iff sorted(indices) is np.arange(n)", + "code": "def _check_is_permutation(indices, n_samples):\n \"\"\"Check whether indices is a reordering of the array np.arange(n_samples)\n\n Parameters\n ----------\n indices : ndarray\n int array to test\n n_samples : int\n number of expected elements\n\n Returns\n -------\n is_partition : bool\n True iff sorted(indices) is np.arange(n)\n \"\"\"\n if len(indices) != n_samples:\n return False\n hit = np.zeros(n_samples, dtype=bool)\n hit[indices] = True\n if not np.all(hit):\n return False\n return True" + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_enforce_prediction_order", + "name": "_enforce_prediction_order", + "qname": "sklearn.model_selection._validation._enforce_prediction_order", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._validation/_enforce_prediction_order/classes", + "name": "classes", + "qname": "sklearn.model_selection._validation._enforce_prediction_order.classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_enforce_prediction_order/predictions", + "name": "predictions", + "qname": "sklearn.model_selection._validation._enforce_prediction_order.predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_enforce_prediction_order/n_classes", + "name": "n_classes", + "qname": "sklearn.model_selection._validation._enforce_prediction_order.n_classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_enforce_prediction_order/method", + "name": "method", + "qname": "sklearn.model_selection._validation._enforce_prediction_order.method", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Ensure that prediction arrays have correct column order\n\nWhen doing cross-validation, if one or more classes are\nnot present in the subset of data used for training,\nthen the output prediction array might not have the same\ncolumns as other folds. Use the list of class names\n(assumed to be ints) to enforce the correct column order.\n\nNote that `classes` is the list of classes in this fold\n(a subset of the classes in the full training set)\nand `n_classes` is the number of classes in the full training set.", + "docstring": "Ensure that prediction arrays have correct column order\n\nWhen doing cross-validation, if one or more classes are\nnot present in the subset of data used for training,\nthen the output prediction array might not have the same\ncolumns as other folds. Use the list of class names\n(assumed to be ints) to enforce the correct column order.\n\nNote that `classes` is the list of classes in this fold\n(a subset of the classes in the full training set)\nand `n_classes` is the number of classes in the full training set.", + "code": "def _enforce_prediction_order(classes, predictions, n_classes, method):\n \"\"\"Ensure that prediction arrays have correct column order\n\n When doing cross-validation, if one or more classes are\n not present in the subset of data used for training,\n then the output prediction array might not have the same\n columns as other folds. Use the list of class names\n (assumed to be ints) to enforce the correct column order.\n\n Note that `classes` is the list of classes in this fold\n (a subset of the classes in the full training set)\n and `n_classes` is the number of classes in the full training set.\n \"\"\"\n if n_classes != len(classes):\n recommendation = (\n 'To fix this, use a cross-validation '\n 'technique resulting in properly '\n 'stratified folds')\n warnings.warn('Number of classes in training fold ({}) does '\n 'not match total number of classes ({}). '\n 'Results may not be appropriate for your use case. '\n '{}'.format(len(classes), n_classes, recommendation),\n RuntimeWarning)\n if method == 'decision_function':\n if (predictions.ndim == 2 and\n predictions.shape[1] != len(classes)):\n # This handles the case when the shape of predictions\n # does not match the number of classes used to train\n # it with. This case is found when sklearn.svm.SVC is\n # set to `decision_function_shape='ovo'`.\n raise ValueError('Output shape {} of {} does not match '\n 'number of classes ({}) in fold. '\n 'Irregular decision_function outputs '\n 'are not currently supported by '\n 'cross_val_predict'.format(\n predictions.shape, method, len(classes)))\n if len(classes) <= 2:\n # In this special case, `predictions` contains a 1D array.\n raise ValueError('Only {} class/es in training fold, but {} '\n 'in overall dataset. This '\n 'is not supported for decision_function '\n 'with imbalanced folds. {}'.format(\n len(classes), n_classes, recommendation))\n\n float_min = np.finfo(predictions.dtype).min\n default_values = {'decision_function': float_min,\n 'predict_log_proba': float_min,\n 'predict_proba': 0}\n predictions_for_all_classes = np.full((_num_samples(predictions),\n n_classes),\n default_values[method],\n dtype=predictions.dtype)\n predictions_for_all_classes[:, classes] = predictions\n predictions = predictions_for_all_classes\n return predictions" + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_predict", + "name": "_fit_and_predict", + "qname": "sklearn.model_selection._validation._fit_and_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_predict/estimator", + "name": "estimator", + "qname": "sklearn.model_selection._validation._fit_and_predict.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator object implementing 'fit' and 'predict'", + "default_value": "", + "description": "The object to use to fit the data." + }, + "type": { + "kind": "NamedType", + "name": "estimator object implementing 'fit' and 'predict'" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_predict/X", + "name": "X", + "qname": "sklearn.model_selection._validation._fit_and_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to fit.\n\n.. versionchanged:: 0.20\n X is only required to be an object with finite length or shape now" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_predict/y", + "name": "y", + "qname": "sklearn.model_selection._validation._fit_and_predict.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs) or None", + "default_value": "", + "description": "The target variable to try to predict in the case of\nsupervised learning." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_predict/train", + "name": "train", + "qname": "sklearn.model_selection._validation._fit_and_predict.train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_train_samples,)", + "default_value": "", + "description": "Indices of training samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_train_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_predict/test", + "name": "test", + "qname": "sklearn.model_selection._validation._fit_and_predict.test", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_test_samples,)", + "default_value": "", + "description": "Indices of test samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_test_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_predict/verbose", + "name": "verbose", + "qname": "sklearn.model_selection._validation._fit_and_predict.verbose", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The verbosity level." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_predict/fit_params", + "name": "fit_params", + "qname": "sklearn.model_selection._validation._fit_and_predict.fit_params", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict or None", + "default_value": "", + "description": "Parameters that will be passed to ``estimator.fit``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "dict" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_predict/method", + "name": "method", + "qname": "sklearn.model_selection._validation._fit_and_predict.method", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "", + "description": "Invokes the passed method name of the passed estimator." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit estimator and predict values for a given dataset split.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Fit estimator and predict values for a given dataset split.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object implementing 'fit' and 'predict'\n The object to use to fit the data.\n\nX : array-like of shape (n_samples, n_features)\n The data to fit.\n\n .. versionchanged:: 0.20\n X is only required to be an object with finite length or shape now\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs) or None\n The target variable to try to predict in the case of\n supervised learning.\n\ntrain : array-like of shape (n_train_samples,)\n Indices of training samples.\n\ntest : array-like of shape (n_test_samples,)\n Indices of test samples.\n\nverbose : int\n The verbosity level.\n\nfit_params : dict or None\n Parameters that will be passed to ``estimator.fit``.\n\nmethod : str\n Invokes the passed method name of the passed estimator.\n\nReturns\n-------\npredictions : sequence\n Result of calling 'estimator.method'", + "code": "def _fit_and_predict(estimator, X, y, train, test, verbose, fit_params,\n method):\n \"\"\"Fit estimator and predict values for a given dataset split.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n estimator : estimator object implementing 'fit' and 'predict'\n The object to use to fit the data.\n\n X : array-like of shape (n_samples, n_features)\n The data to fit.\n\n .. versionchanged:: 0.20\n X is only required to be an object with finite length or shape now\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None\n The target variable to try to predict in the case of\n supervised learning.\n\n train : array-like of shape (n_train_samples,)\n Indices of training samples.\n\n test : array-like of shape (n_test_samples,)\n Indices of test samples.\n\n verbose : int\n The verbosity level.\n\n fit_params : dict or None\n Parameters that will be passed to ``estimator.fit``.\n\n method : str\n Invokes the passed method name of the passed estimator.\n\n Returns\n -------\n predictions : sequence\n Result of calling 'estimator.method'\n \"\"\"\n # Adjust length of sample weights\n fit_params = fit_params if fit_params is not None else {}\n fit_params = _check_fit_params(X, fit_params, train)\n\n X_train, y_train = _safe_split(estimator, X, y, train)\n X_test, _ = _safe_split(estimator, X, y, test, train)\n\n if y_train is None:\n estimator.fit(X_train, **fit_params)\n else:\n estimator.fit(X_train, y_train, **fit_params)\n func = getattr(estimator, method)\n predictions = func(X_test)\n\n encode = method in ['decision_function', 'predict_proba',\n 'predict_log_proba'] and y is not None\n\n if encode:\n if isinstance(predictions, list):\n predictions = [_enforce_prediction_order(\n estimator.classes_[i_label], predictions[i_label],\n n_classes=len(set(y[:, i_label])), method=method)\n for i_label in range(len(predictions))]\n else:\n # A 2D y array should be a binary label indicator matrix\n n_classes = len(set(y)) if y.ndim == 1 else y.shape[1]\n predictions = _enforce_prediction_order(\n estimator.classes_, predictions, n_classes, method)\n return predictions" + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_score", + "name": "_fit_and_score", + "qname": "sklearn.model_selection._validation._fit_and_score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_score/estimator", + "name": "estimator", + "qname": "sklearn.model_selection._validation._fit_and_score.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator object implementing 'fit'", + "default_value": "", + "description": "The object to use to fit the data." + }, + "type": { + "kind": "NamedType", + "name": "estimator object implementing 'fit'" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_score/X", + "name": "X", + "qname": "sklearn.model_selection._validation._fit_and_score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to fit." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_score/y", + "name": "y", + "qname": "sklearn.model_selection._validation._fit_and_score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs) or None", + "default_value": "", + "description": "The target variable to try to predict in the case of\nsupervised learning." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_score/scorer", + "name": "scorer", + "qname": "sklearn.model_selection._validation._fit_and_score.scorer", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "A single callable or dict mapping scorer name to the callable", + "default_value": "", + "description": "If it is a single callable, the return value for ``train_scores`` and\n``test_scores`` is a single float.\n\nFor a dict, it should be one mapping the scorer name to the scorer\ncallable object / function.\n\nThe callable object / fn should have signature\n``scorer(estimator, X, y)``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "A single callable" + }, + { + "kind": "NamedType", + "name": "dict mapping scorer name to the callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_score/train", + "name": "train", + "qname": "sklearn.model_selection._validation._fit_and_score.train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_train_samples,)", + "default_value": "", + "description": "Indices of training samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_train_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_score/test", + "name": "test", + "qname": "sklearn.model_selection._validation._fit_and_score.test", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_test_samples,)", + "default_value": "", + "description": "Indices of test samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_test_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_score/verbose", + "name": "verbose", + "qname": "sklearn.model_selection._validation._fit_and_score.verbose", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The verbosity level." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_score/parameters", + "name": "parameters", + "qname": "sklearn.model_selection._validation._fit_and_score.parameters", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict or None", + "default_value": "", + "description": "Parameters to be set on the estimator." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "dict" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_score/fit_params", + "name": "fit_params", + "qname": "sklearn.model_selection._validation._fit_and_score.fit_params", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict or None", + "default_value": "", + "description": "Parameters that will be passed to ``estimator.fit``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "dict" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_score/return_train_score", + "name": "return_train_score", + "qname": "sklearn.model_selection._validation._fit_and_score.return_train_score", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Compute and return score on training set." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_score/return_parameters", + "name": "return_parameters", + "qname": "sklearn.model_selection._validation._fit_and_score.return_parameters", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Return parameters that has been used for the estimator." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_score/return_n_test_samples", + "name": "return_n_test_samples", + "qname": "sklearn.model_selection._validation._fit_and_score.return_n_test_samples", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to return the ``n_test_samples``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_score/return_times", + "name": "return_times", + "qname": "sklearn.model_selection._validation._fit_and_score.return_times", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to return the fit/score times." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_score/return_estimator", + "name": "return_estimator", + "qname": "sklearn.model_selection._validation._fit_and_score.return_estimator", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to return the fitted estimator." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_score/split_progress", + "name": "split_progress", + "qname": "sklearn.model_selection._validation._fit_and_score.split_progress", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{list, tuple} of int", + "default_value": "None", + "description": "A list or tuple of format (, )." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_score/candidate_progress", + "name": "candidate_progress", + "qname": "sklearn.model_selection._validation._fit_and_score.candidate_progress", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{list, tuple} of int", + "default_value": "None", + "description": "A list or tuple of format\n(, )." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_fit_and_score/error_score", + "name": "error_score", + "qname": "sklearn.model_selection._validation._fit_and_score.error_score", + "default_value": "np.nan", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "'raise' or numeric", + "default_value": "np.nan", + "description": "Value to assign to the score if an error occurs in estimator fitting.\nIf set to 'raise', the error is raised.\nIf a numeric value is given, FitFailedWarning is raised." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'raise'" + }, + { + "kind": "NamedType", + "name": "numeric" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit estimator and compute scores for a given dataset split.", + "docstring": "Fit estimator and compute scores for a given dataset split.\n\nParameters\n----------\nestimator : estimator object implementing 'fit'\n The object to use to fit the data.\n\nX : array-like of shape (n_samples, n_features)\n The data to fit.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs) or None\n The target variable to try to predict in the case of\n supervised learning.\n\nscorer : A single callable or dict mapping scorer name to the callable\n If it is a single callable, the return value for ``train_scores`` and\n ``test_scores`` is a single float.\n\n For a dict, it should be one mapping the scorer name to the scorer\n callable object / function.\n\n The callable object / fn should have signature\n ``scorer(estimator, X, y)``.\n\ntrain : array-like of shape (n_train_samples,)\n Indices of training samples.\n\ntest : array-like of shape (n_test_samples,)\n Indices of test samples.\n\nverbose : int\n The verbosity level.\n\nerror_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised.\n If a numeric value is given, FitFailedWarning is raised.\n\nparameters : dict or None\n Parameters to be set on the estimator.\n\nfit_params : dict or None\n Parameters that will be passed to ``estimator.fit``.\n\nreturn_train_score : bool, default=False\n Compute and return score on training set.\n\nreturn_parameters : bool, default=False\n Return parameters that has been used for the estimator.\n\nsplit_progress : {list, tuple} of int, default=None\n A list or tuple of format (, ).\n\ncandidate_progress : {list, tuple} of int, default=None\n A list or tuple of format\n (, ).\n\nreturn_n_test_samples : bool, default=False\n Whether to return the ``n_test_samples``.\n\nreturn_times : bool, default=False\n Whether to return the fit/score times.\n\nreturn_estimator : bool, default=False\n Whether to return the fitted estimator.\n\nReturns\n-------\nresult : dict with the following attributes\n train_scores : dict of scorer name -> float\n Score on training set (for all the scorers),\n returned only if `return_train_score` is `True`.\n test_scores : dict of scorer name -> float\n Score on testing set (for all the scorers).\n n_test_samples : int\n Number of test samples.\n fit_time : float\n Time spent for fitting in seconds.\n score_time : float\n Time spent for scoring in seconds.\n parameters : dict or None\n The parameters that have been evaluated.\n estimator : estimator object\n The fitted estimator.\n fit_failed : bool\n The estimator failed to fit.", + "code": "def _fit_and_score(estimator, X, y, scorer, train, test, verbose,\n parameters, fit_params, return_train_score=False,\n return_parameters=False, return_n_test_samples=False,\n return_times=False, return_estimator=False,\n split_progress=None, candidate_progress=None,\n error_score=np.nan):\n\n \"\"\"Fit estimator and compute scores for a given dataset split.\n\n Parameters\n ----------\n estimator : estimator object implementing 'fit'\n The object to use to fit the data.\n\n X : array-like of shape (n_samples, n_features)\n The data to fit.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None\n The target variable to try to predict in the case of\n supervised learning.\n\n scorer : A single callable or dict mapping scorer name to the callable\n If it is a single callable, the return value for ``train_scores`` and\n ``test_scores`` is a single float.\n\n For a dict, it should be one mapping the scorer name to the scorer\n callable object / function.\n\n The callable object / fn should have signature\n ``scorer(estimator, X, y)``.\n\n train : array-like of shape (n_train_samples,)\n Indices of training samples.\n\n test : array-like of shape (n_test_samples,)\n Indices of test samples.\n\n verbose : int\n The verbosity level.\n\n error_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised.\n If a numeric value is given, FitFailedWarning is raised.\n\n parameters : dict or None\n Parameters to be set on the estimator.\n\n fit_params : dict or None\n Parameters that will be passed to ``estimator.fit``.\n\n return_train_score : bool, default=False\n Compute and return score on training set.\n\n return_parameters : bool, default=False\n Return parameters that has been used for the estimator.\n\n split_progress : {list, tuple} of int, default=None\n A list or tuple of format (, ).\n\n candidate_progress : {list, tuple} of int, default=None\n A list or tuple of format\n (, ).\n\n return_n_test_samples : bool, default=False\n Whether to return the ``n_test_samples``.\n\n return_times : bool, default=False\n Whether to return the fit/score times.\n\n return_estimator : bool, default=False\n Whether to return the fitted estimator.\n\n Returns\n -------\n result : dict with the following attributes\n train_scores : dict of scorer name -> float\n Score on training set (for all the scorers),\n returned only if `return_train_score` is `True`.\n test_scores : dict of scorer name -> float\n Score on testing set (for all the scorers).\n n_test_samples : int\n Number of test samples.\n fit_time : float\n Time spent for fitting in seconds.\n score_time : float\n Time spent for scoring in seconds.\n parameters : dict or None\n The parameters that have been evaluated.\n estimator : estimator object\n The fitted estimator.\n fit_failed : bool\n The estimator failed to fit.\n \"\"\"\n if not isinstance(error_score, numbers.Number) and error_score != 'raise':\n raise ValueError(\n \"error_score must be the string 'raise' or a numeric value. \"\n \"(Hint: if using 'raise', please make sure that it has been \"\n \"spelled correctly.)\"\n )\n\n progress_msg = \"\"\n if verbose > 2:\n if split_progress is not None:\n progress_msg = f\" {split_progress[0]+1}/{split_progress[1]}\"\n if candidate_progress and verbose > 9:\n progress_msg += (f\"; {candidate_progress[0]+1}/\"\n f\"{candidate_progress[1]}\")\n\n if verbose > 1:\n if parameters is None:\n params_msg = ''\n else:\n sorted_keys = sorted(parameters) # Ensure deterministic o/p\n params_msg = (', '.join(f'{k}={parameters[k]}'\n for k in sorted_keys))\n if verbose > 9:\n start_msg = f\"[CV{progress_msg}] START {params_msg}\"\n print(f\"{start_msg}{(80 - len(start_msg)) * '.'}\")\n\n # Adjust length of sample weights\n fit_params = fit_params if fit_params is not None else {}\n fit_params = _check_fit_params(X, fit_params, train)\n\n if parameters is not None:\n # clone after setting parameters in case any parameters\n # are estimators (like pipeline steps)\n # because pipeline doesn't clone steps in fit\n cloned_parameters = {}\n for k, v in parameters.items():\n cloned_parameters[k] = clone(v, safe=False)\n\n estimator = estimator.set_params(**cloned_parameters)\n\n start_time = time.time()\n\n X_train, y_train = _safe_split(estimator, X, y, train)\n X_test, y_test = _safe_split(estimator, X, y, test, train)\n\n result = {}\n try:\n if y_train is None:\n estimator.fit(X_train, **fit_params)\n else:\n estimator.fit(X_train, y_train, **fit_params)\n\n except Exception as e:\n # Note fit time as time until error\n fit_time = time.time() - start_time\n score_time = 0.0\n if error_score == 'raise':\n raise\n elif isinstance(error_score, numbers.Number):\n if isinstance(scorer, dict):\n test_scores = {name: error_score for name in scorer}\n if return_train_score:\n train_scores = test_scores.copy()\n else:\n test_scores = error_score\n if return_train_score:\n train_scores = error_score\n warnings.warn(\"Estimator fit failed. The score on this train-test\"\n \" partition for these parameters will be set to %f. \"\n \"Details: \\n%s\" %\n (error_score, format_exc()),\n FitFailedWarning)\n result[\"fit_failed\"] = True\n else:\n result[\"fit_failed\"] = False\n\n fit_time = time.time() - start_time\n test_scores = _score(estimator, X_test, y_test, scorer, error_score)\n score_time = time.time() - start_time - fit_time\n if return_train_score:\n train_scores = _score(\n estimator, X_train, y_train, scorer, error_score\n )\n\n if verbose > 1:\n total_time = score_time + fit_time\n end_msg = f\"[CV{progress_msg}] END \"\n result_msg = params_msg + (\";\" if params_msg else \"\")\n if verbose > 2:\n if isinstance(test_scores, dict):\n for scorer_name in sorted(test_scores):\n result_msg += f\" {scorer_name}: (\"\n if return_train_score:\n scorer_scores = train_scores[scorer_name]\n result_msg += f\"train={scorer_scores:.3f}, \"\n result_msg += f\"test={test_scores[scorer_name]:.3f})\"\n else:\n result_msg += \", score=\"\n if return_train_score:\n result_msg += (f\"(train={train_scores:.3f}, \"\n f\"test={test_scores:.3f})\")\n else:\n result_msg += f\"{test_scores:.3f}\"\n result_msg += f\" total time={logger.short_format_time(total_time)}\"\n\n # Right align the result_msg\n end_msg += \".\" * (80 - len(end_msg) - len(result_msg))\n end_msg += result_msg\n print(end_msg)\n\n result[\"test_scores\"] = test_scores\n if return_train_score:\n result[\"train_scores\"] = train_scores\n if return_n_test_samples:\n result[\"n_test_samples\"] = _num_samples(X_test)\n if return_times:\n result[\"fit_time\"] = fit_time\n result[\"score_time\"] = score_time\n if return_parameters:\n result[\"parameters\"] = parameters\n if return_estimator:\n result[\"estimator\"] = estimator\n return result" + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_incremental_fit_estimator", + "name": "_incremental_fit_estimator", + "qname": "sklearn.model_selection._validation._incremental_fit_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._validation/_incremental_fit_estimator/estimator", + "name": "estimator", + "qname": "sklearn.model_selection._validation._incremental_fit_estimator.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_incremental_fit_estimator/X", + "name": "X", + "qname": "sklearn.model_selection._validation._incremental_fit_estimator.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_incremental_fit_estimator/y", + "name": "y", + "qname": "sklearn.model_selection._validation._incremental_fit_estimator.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_incremental_fit_estimator/classes", + "name": "classes", + "qname": "sklearn.model_selection._validation._incremental_fit_estimator.classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_incremental_fit_estimator/train", + "name": "train", + "qname": "sklearn.model_selection._validation._incremental_fit_estimator.train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_incremental_fit_estimator/test", + "name": "test", + "qname": "sklearn.model_selection._validation._incremental_fit_estimator.test", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_incremental_fit_estimator/train_sizes", + "name": "train_sizes", + "qname": "sklearn.model_selection._validation._incremental_fit_estimator.train_sizes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_incremental_fit_estimator/scorer", + "name": "scorer", + "qname": "sklearn.model_selection._validation._incremental_fit_estimator.scorer", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_incremental_fit_estimator/verbose", + "name": "verbose", + "qname": "sklearn.model_selection._validation._incremental_fit_estimator.verbose", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_incremental_fit_estimator/return_times", + "name": "return_times", + "qname": "sklearn.model_selection._validation._incremental_fit_estimator.return_times", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_incremental_fit_estimator/error_score", + "name": "error_score", + "qname": "sklearn.model_selection._validation._incremental_fit_estimator.error_score", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_incremental_fit_estimator/fit_params", + "name": "fit_params", + "qname": "sklearn.model_selection._validation._incremental_fit_estimator.fit_params", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Train estimator on training subsets incrementally and compute scores.", + "docstring": "Train estimator on training subsets incrementally and compute scores.", + "code": "def _incremental_fit_estimator(estimator, X, y, classes, train, test,\n train_sizes, scorer, verbose,\n return_times, error_score, fit_params):\n \"\"\"Train estimator on training subsets incrementally and compute scores.\"\"\"\n train_scores, test_scores, fit_times, score_times = [], [], [], []\n partitions = zip(train_sizes, np.split(train, train_sizes)[:-1])\n if fit_params is None:\n fit_params = {}\n for n_train_samples, partial_train in partitions:\n train_subset = train[:n_train_samples]\n X_train, y_train = _safe_split(estimator, X, y, train_subset)\n X_partial_train, y_partial_train = _safe_split(estimator, X, y,\n partial_train)\n X_test, y_test = _safe_split(estimator, X, y, test, train_subset)\n start_fit = time.time()\n if y_partial_train is None:\n estimator.partial_fit(X_partial_train, classes=classes,\n **fit_params)\n else:\n estimator.partial_fit(X_partial_train, y_partial_train,\n classes=classes, **fit_params)\n fit_time = time.time() - start_fit\n fit_times.append(fit_time)\n\n start_score = time.time()\n\n test_scores.append(\n _score(estimator, X_test, y_test, scorer, error_score)\n )\n train_scores.append(\n _score(estimator, X_train, y_train, scorer, error_score)\n )\n\n score_time = time.time() - start_score\n score_times.append(score_time)\n\n ret = ((train_scores, test_scores, fit_times, score_times)\n if return_times else (train_scores, test_scores))\n\n return np.array(ret).T" + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_insert_error_scores", + "name": "_insert_error_scores", + "qname": "sklearn.model_selection._validation._insert_error_scores", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._validation/_insert_error_scores/results", + "name": "results", + "qname": "sklearn.model_selection._validation._insert_error_scores.results", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_insert_error_scores/error_score", + "name": "error_score", + "qname": "sklearn.model_selection._validation._insert_error_scores.error_score", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Insert error in `results` by replacing them inplace with `error_score`.\n\nThis only applies to multimetric scores because `_fit_and_score` will\nhandle the single metric case.", + "docstring": "Insert error in `results` by replacing them inplace with `error_score`.\n\nThis only applies to multimetric scores because `_fit_and_score` will\nhandle the single metric case.", + "code": "def _insert_error_scores(results, error_score):\n \"\"\"Insert error in `results` by replacing them inplace with `error_score`.\n\n This only applies to multimetric scores because `_fit_and_score` will\n handle the single metric case.\n \"\"\"\n successful_score = None\n failed_indices = []\n for i, result in enumerate(results):\n if result[\"fit_failed\"]:\n failed_indices.append(i)\n elif successful_score is None:\n successful_score = result[\"test_scores\"]\n\n if successful_score is None:\n raise NotFittedError(\"All estimators failed to fit\")\n\n if isinstance(successful_score, dict):\n formatted_error = {name: error_score for name in successful_score}\n for i in failed_indices:\n results[i][\"test_scores\"] = formatted_error.copy()\n if \"train_scores\" in results[i]:\n results[i][\"train_scores\"] = formatted_error.copy()" + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_normalize_score_results", + "name": "_normalize_score_results", + "qname": "sklearn.model_selection._validation._normalize_score_results", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._validation/_normalize_score_results/scores", + "name": "scores", + "qname": "sklearn.model_selection._validation._normalize_score_results.scores", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_normalize_score_results/scaler_score_key", + "name": "scaler_score_key", + "qname": "sklearn.model_selection._validation._normalize_score_results.scaler_score_key", + "default_value": "'score'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Creates a scoring dictionary based on the type of `scores`", + "docstring": "Creates a scoring dictionary based on the type of `scores`", + "code": "def _normalize_score_results(scores, scaler_score_key='score'):\n \"\"\"Creates a scoring dictionary based on the type of `scores`\"\"\"\n if isinstance(scores[0], dict):\n # multimetric scoring\n return _aggregate_score_dicts(scores)\n # scaler\n return {scaler_score_key: scores}" + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_permutation_test_score", + "name": "_permutation_test_score", + "qname": "sklearn.model_selection._validation._permutation_test_score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._validation/_permutation_test_score/estimator", + "name": "estimator", + "qname": "sklearn.model_selection._validation._permutation_test_score.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_permutation_test_score/X", + "name": "X", + "qname": "sklearn.model_selection._validation._permutation_test_score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_permutation_test_score/y", + "name": "y", + "qname": "sklearn.model_selection._validation._permutation_test_score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_permutation_test_score/groups", + "name": "groups", + "qname": "sklearn.model_selection._validation._permutation_test_score.groups", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_permutation_test_score/cv", + "name": "cv", + "qname": "sklearn.model_selection._validation._permutation_test_score.cv", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_permutation_test_score/scorer", + "name": "scorer", + "qname": "sklearn.model_selection._validation._permutation_test_score.scorer", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_permutation_test_score/fit_params", + "name": "fit_params", + "qname": "sklearn.model_selection._validation._permutation_test_score.fit_params", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Auxiliary function for permutation_test_score", + "docstring": "Auxiliary function for permutation_test_score", + "code": "def _permutation_test_score(estimator, X, y, groups, cv, scorer,\n fit_params):\n \"\"\"Auxiliary function for permutation_test_score\"\"\"\n # Adjust length of sample weights\n fit_params = fit_params if fit_params is not None else {}\n avg_score = []\n for train, test in cv.split(X, y, groups):\n X_train, y_train = _safe_split(estimator, X, y, train)\n X_test, y_test = _safe_split(estimator, X, y, test, train)\n fit_params = _check_fit_params(X, fit_params, train)\n estimator.fit(X_train, y_train, **fit_params)\n avg_score.append(scorer(estimator, X_test, y_test))\n return np.mean(avg_score)" + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_score", + "name": "_score", + "qname": "sklearn.model_selection._validation._score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._validation/_score/estimator", + "name": "estimator", + "qname": "sklearn.model_selection._validation._score.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_score/X_test", + "name": "X_test", + "qname": "sklearn.model_selection._validation._score.X_test", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_score/y_test", + "name": "y_test", + "qname": "sklearn.model_selection._validation._score.y_test", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_score/scorer", + "name": "scorer", + "qname": "sklearn.model_selection._validation._score.scorer", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_score/error_score", + "name": "error_score", + "qname": "sklearn.model_selection._validation._score.error_score", + "default_value": "'raise'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the score(s) of an estimator on a given test set.\n\nWill return a dict of floats if `scorer` is a dict, otherwise a single\nfloat is returned.", + "docstring": "Compute the score(s) of an estimator on a given test set.\n\nWill return a dict of floats if `scorer` is a dict, otherwise a single\nfloat is returned.", + "code": "def _score(estimator, X_test, y_test, scorer, error_score=\"raise\"):\n \"\"\"Compute the score(s) of an estimator on a given test set.\n\n Will return a dict of floats if `scorer` is a dict, otherwise a single\n float is returned.\n \"\"\"\n if isinstance(scorer, dict):\n # will cache method calls if needed. scorer() returns a dict\n scorer = _MultimetricScorer(**scorer)\n\n try:\n if y_test is None:\n scores = scorer(estimator, X_test)\n else:\n scores = scorer(estimator, X_test, y_test)\n except Exception:\n if error_score == 'raise':\n raise\n else:\n if isinstance(scorer, _MultimetricScorer):\n scores = {name: error_score for name in scorer._scorers}\n else:\n scores = error_score\n warnings.warn(\n f\"Scoring failed. The score on this train-test partition for \"\n f\"these parameters will be set to {error_score}. Details: \\n\"\n f\"{format_exc()}\",\n UserWarning,\n )\n\n error_msg = (\n \"scoring must return a number, got %s (%s) instead. (scorer=%s)\"\n )\n if isinstance(scores, dict):\n for name, score in scores.items():\n if hasattr(score, 'item'):\n with suppress(ValueError):\n # e.g. unwrap memmapped scalars\n score = score.item()\n if not isinstance(score, numbers.Number):\n raise ValueError(error_msg % (score, type(score), name))\n scores[name] = score\n else: # scalar\n if hasattr(scores, 'item'):\n with suppress(ValueError):\n # e.g. unwrap memmapped scalars\n scores = scores.item()\n if not isinstance(scores, numbers.Number):\n raise ValueError(error_msg % (scores, type(scores), scorer))\n return scores" + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_shuffle", + "name": "_shuffle", + "qname": "sklearn.model_selection._validation._shuffle", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._validation/_shuffle/y", + "name": "y", + "qname": "sklearn.model_selection._validation._shuffle.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_shuffle/groups", + "name": "groups", + "qname": "sklearn.model_selection._validation._shuffle.groups", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_shuffle/random_state", + "name": "random_state", + "qname": "sklearn.model_selection._validation._shuffle.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return a shuffled copy of y eventually shuffle among same groups.", + "docstring": "Return a shuffled copy of y eventually shuffle among same groups.", + "code": "def _shuffle(y, groups, random_state):\n \"\"\"Return a shuffled copy of y eventually shuffle among same groups.\"\"\"\n if groups is None:\n indices = random_state.permutation(len(y))\n else:\n indices = np.arange(len(groups))\n for group in np.unique(groups):\n this_mask = (groups == group)\n indices[this_mask] = random_state.permutation(indices[this_mask])\n return _safe_indexing(y, indices)" + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_translate_train_sizes", + "name": "_translate_train_sizes", + "qname": "sklearn.model_selection._validation._translate_train_sizes", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._validation/_translate_train_sizes/train_sizes", + "name": "train_sizes", + "qname": "sklearn.model_selection._validation._translate_train_sizes.train_sizes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_ticks,)", + "default_value": "", + "description": "Numbers of training examples that will be used to generate the\nlearning curve. If the dtype is float, it is regarded as a\nfraction of 'n_max_training_samples', i.e. it has to be within (0, 1]." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_ticks,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/_translate_train_sizes/n_max_training_samples", + "name": "n_max_training_samples", + "qname": "sklearn.model_selection._validation._translate_train_sizes.n_max_training_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Maximum number of training samples (upper bound of 'train_sizes')." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Determine absolute sizes of training subsets and validate 'train_sizes'.\n\nExamples:\n _translate_train_sizes([0.5, 1.0], 10) -> [5, 10]\n _translate_train_sizes([5, 10], 10) -> [5, 10]", + "docstring": "Determine absolute sizes of training subsets and validate 'train_sizes'.\n\nExamples:\n _translate_train_sizes([0.5, 1.0], 10) -> [5, 10]\n _translate_train_sizes([5, 10], 10) -> [5, 10]\n\nParameters\n----------\ntrain_sizes : array-like of shape (n_ticks,)\n Numbers of training examples that will be used to generate the\n learning curve. If the dtype is float, it is regarded as a\n fraction of 'n_max_training_samples', i.e. it has to be within (0, 1].\n\nn_max_training_samples : int\n Maximum number of training samples (upper bound of 'train_sizes').\n\nReturns\n-------\ntrain_sizes_abs : array of shape (n_unique_ticks,)\n Numbers of training examples that will be used to generate the\n learning curve. Note that the number of ticks might be less\n than n_ticks because duplicate entries will be removed.", + "code": "def _translate_train_sizes(train_sizes, n_max_training_samples):\n \"\"\"Determine absolute sizes of training subsets and validate 'train_sizes'.\n\n Examples:\n _translate_train_sizes([0.5, 1.0], 10) -> [5, 10]\n _translate_train_sizes([5, 10], 10) -> [5, 10]\n\n Parameters\n ----------\n train_sizes : array-like of shape (n_ticks,)\n Numbers of training examples that will be used to generate the\n learning curve. If the dtype is float, it is regarded as a\n fraction of 'n_max_training_samples', i.e. it has to be within (0, 1].\n\n n_max_training_samples : int\n Maximum number of training samples (upper bound of 'train_sizes').\n\n Returns\n -------\n train_sizes_abs : array of shape (n_unique_ticks,)\n Numbers of training examples that will be used to generate the\n learning curve. Note that the number of ticks might be less\n than n_ticks because duplicate entries will be removed.\n \"\"\"\n train_sizes_abs = np.asarray(train_sizes)\n n_ticks = train_sizes_abs.shape[0]\n n_min_required_samples = np.min(train_sizes_abs)\n n_max_required_samples = np.max(train_sizes_abs)\n if np.issubdtype(train_sizes_abs.dtype, np.floating):\n if n_min_required_samples <= 0.0 or n_max_required_samples > 1.0:\n raise ValueError(\"train_sizes has been interpreted as fractions \"\n \"of the maximum number of training samples and \"\n \"must be within (0, 1], but is within [%f, %f].\"\n % (n_min_required_samples,\n n_max_required_samples))\n train_sizes_abs = (train_sizes_abs * n_max_training_samples).astype(\n dtype=int, copy=False)\n train_sizes_abs = np.clip(train_sizes_abs, 1,\n n_max_training_samples)\n else:\n if (n_min_required_samples <= 0 or\n n_max_required_samples > n_max_training_samples):\n raise ValueError(\"train_sizes has been interpreted as absolute \"\n \"numbers of training samples and must be within \"\n \"(0, %d], but is within [%d, %d].\"\n % (n_max_training_samples,\n n_min_required_samples,\n n_max_required_samples))\n\n train_sizes_abs = np.unique(train_sizes_abs)\n if n_ticks > train_sizes_abs.shape[0]:\n warnings.warn(\"Removed duplicate entries from 'train_sizes'. Number \"\n \"of ticks will be less than the size of \"\n \"'train_sizes' %d instead of %d).\"\n % (train_sizes_abs.shape[0], n_ticks), RuntimeWarning)\n\n return train_sizes_abs" + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_val_predict", + "name": "cross_val_predict", + "qname": "sklearn.model_selection._validation.cross_val_predict", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_val_predict/estimator", + "name": "estimator", + "qname": "sklearn.model_selection._validation.cross_val_predict.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator object implementing 'fit' and 'predict'", + "default_value": "", + "description": "The object to use to fit the data." + }, + "type": { + "kind": "NamedType", + "name": "estimator object implementing 'fit' and 'predict'" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_val_predict/X", + "name": "X", + "qname": "sklearn.model_selection._validation.cross_val_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to fit. Can be, for example a list, or an array at least 2d." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_val_predict/y", + "name": "y", + "qname": "sklearn.model_selection._validation.cross_val_predict.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "None", + "description": "The target variable to try to predict in the case of\nsupervised learning." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_val_predict/groups", + "name": "groups", + "qname": "sklearn.model_selection._validation.cross_val_predict.groups", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Group labels for the samples used while splitting the dataset into\ntrain/test set. Only used in conjunction with a \"Group\" :term:`cv`\ninstance (e.g., :class:`GroupKFold`)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_val_predict/cv", + "name": "cv", + "qname": "sklearn.model_selection._validation.cross_val_predict.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, cross-validation generator or an iterable", + "default_value": "None", + "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross validation,\n- int, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide ` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "an iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_val_predict/n_jobs", + "name": "n_jobs", + "qname": "sklearn.model_selection._validation.cross_val_predict.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of jobs to run in parallel. Training the estimator and\npredicting are parallelized over the cross-validation splits.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_val_predict/verbose", + "name": "verbose", + "qname": "sklearn.model_selection._validation.cross_val_predict.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "The verbosity level." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_val_predict/fit_params", + "name": "fit_params", + "qname": "sklearn.model_selection._validation.cross_val_predict.fit_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Parameters to pass to the fit method of the estimator." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_val_predict/pre_dispatch", + "name": "pre_dispatch", + "qname": "sklearn.model_selection._validation.cross_val_predict.pre_dispatch", + "default_value": "'2*n_jobs'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or str", + "default_value": "'2*n_jobs'", + "description": "Controls the number of jobs that get dispatched during parallel\nexecution. Reducing this number can be useful to avoid an\nexplosion of memory consumption when more jobs get dispatched\nthan CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "str" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_val_predict/method", + "name": "method", + "qname": "sklearn.model_selection._validation.cross_val_predict.method", + "default_value": "'predict'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'predict', 'predict_proba', 'predict_log_proba', 'decision_function'}", + "default_value": "'predict'", + "description": "The method to be invoked by `estimator`." + }, + "type": { + "kind": "EnumType", + "values": ["predict_log_proba", "predict_proba", "decision_function", "predict"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate cross-validated estimates for each input data point\n\nThe data is split according to the cv parameter. Each sample belongs\nto exactly one test set, and its prediction is computed with an\nestimator fitted on the corresponding training set.\n\nPassing these predictions into an evaluation metric may not be a valid\nway to measure generalization performance. Results can differ from\n:func:`cross_validate` and :func:`cross_val_score` unless all tests sets\nhave equal size and the metric decomposes over samples.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Generate cross-validated estimates for each input data point\n\nThe data is split according to the cv parameter. Each sample belongs\nto exactly one test set, and its prediction is computed with an\nestimator fitted on the corresponding training set.\n\nPassing these predictions into an evaluation metric may not be a valid\nway to measure generalization performance. Results can differ from\n:func:`cross_validate` and :func:`cross_val_score` unless all tests sets\nhave equal size and the metric decomposes over samples.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object implementing 'fit' and 'predict'\n The object to use to fit the data.\n\nX : array-like of shape (n_samples, n_features)\n The data to fit. Can be, for example a list, or an array at least 2d.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n The target variable to try to predict in the case of\n supervised learning.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`GroupKFold`).\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - int, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used. These splitters are instantiated\n with `shuffle=False` so the splits will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel. Training the estimator and\n predicting are parallelized over the cross-validation splits.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : int, default=0\n The verbosity level.\n\nfit_params : dict, default=None\n Parameters to pass to the fit method of the estimator.\n\npre_dispatch : int or str, default='2*n_jobs'\n Controls the number of jobs that get dispatched during parallel\n execution. Reducing this number can be useful to avoid an\n explosion of memory consumption when more jobs get dispatched\n than CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'\n\nmethod : {'predict', 'predict_proba', 'predict_log_proba', 'decision_function'}, default='predict'\n The method to be invoked by `estimator`.\n\nReturns\n-------\npredictions : ndarray\n This is the result of calling `method`. Shape:\n\n - When `method` is 'predict' and in special case where `method` is\n 'decision_function' and the target is binary: (n_samples,)\n - When `method` is one of {'predict_proba', 'predict_log_proba',\n 'decision_function'} (unless special case above):\n (n_samples, n_classes)\n - If `estimator` is :term:`multioutput`, an extra dimension\n 'n_outputs' is added to the end of each shape above.\n\nSee Also\n--------\ncross_val_score : Calculate score for each CV split.\ncross_validate : Calculate one or more scores and timings for each CV\n split.\n\nNotes\n-----\nIn the case that one or more classes are absent in a training portion, a\ndefault score needs to be assigned to all instances for that class if\n``method`` produces columns per class, as in {'decision_function',\n'predict_proba', 'predict_log_proba'}. For ``predict_proba`` this value is\n0. In order to ensure finite output, we approximate negative infinity by\nthe minimum finite float value for the dtype in other cases.\n\nExamples\n--------\n>>> from sklearn import datasets, linear_model\n>>> from sklearn.model_selection import cross_val_predict\n>>> diabetes = datasets.load_diabetes()\n>>> X = diabetes.data[:150]\n>>> y = diabetes.target[:150]\n>>> lasso = linear_model.Lasso()\n>>> y_pred = cross_val_predict(lasso, X, y, cv=3)", + "code": "@_deprecate_positional_args\ndef cross_val_predict(estimator, X, y=None, *, groups=None, cv=None,\n n_jobs=None, verbose=0, fit_params=None,\n pre_dispatch='2*n_jobs', method='predict'):\n \"\"\"Generate cross-validated estimates for each input data point\n\n The data is split according to the cv parameter. Each sample belongs\n to exactly one test set, and its prediction is computed with an\n estimator fitted on the corresponding training set.\n\n Passing these predictions into an evaluation metric may not be a valid\n way to measure generalization performance. Results can differ from\n :func:`cross_validate` and :func:`cross_val_score` unless all tests sets\n have equal size and the metric decomposes over samples.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n estimator : estimator object implementing 'fit' and 'predict'\n The object to use to fit the data.\n\n X : array-like of shape (n_samples, n_features)\n The data to fit. Can be, for example a list, or an array at least 2d.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs), \\\n default=None\n The target variable to try to predict in the case of\n supervised learning.\n\n groups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`GroupKFold`).\n\n cv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - int, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used. These splitters are instantiated\n with `shuffle=False` so the splits will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\n n_jobs : int, default=None\n Number of jobs to run in parallel. Training the estimator and\n predicting are parallelized over the cross-validation splits.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n verbose : int, default=0\n The verbosity level.\n\n fit_params : dict, default=None\n Parameters to pass to the fit method of the estimator.\n\n pre_dispatch : int or str, default='2*n_jobs'\n Controls the number of jobs that get dispatched during parallel\n execution. Reducing this number can be useful to avoid an\n explosion of memory consumption when more jobs get dispatched\n than CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'\n\n method : {'predict', 'predict_proba', 'predict_log_proba', \\\n 'decision_function'}, default='predict'\n The method to be invoked by `estimator`.\n\n Returns\n -------\n predictions : ndarray\n This is the result of calling `method`. Shape:\n\n - When `method` is 'predict' and in special case where `method` is\n 'decision_function' and the target is binary: (n_samples,)\n - When `method` is one of {'predict_proba', 'predict_log_proba',\n 'decision_function'} (unless special case above):\n (n_samples, n_classes)\n - If `estimator` is :term:`multioutput`, an extra dimension\n 'n_outputs' is added to the end of each shape above.\n\n See Also\n --------\n cross_val_score : Calculate score for each CV split.\n cross_validate : Calculate one or more scores and timings for each CV\n split.\n\n Notes\n -----\n In the case that one or more classes are absent in a training portion, a\n default score needs to be assigned to all instances for that class if\n ``method`` produces columns per class, as in {'decision_function',\n 'predict_proba', 'predict_log_proba'}. For ``predict_proba`` this value is\n 0. In order to ensure finite output, we approximate negative infinity by\n the minimum finite float value for the dtype in other cases.\n\n Examples\n --------\n >>> from sklearn import datasets, linear_model\n >>> from sklearn.model_selection import cross_val_predict\n >>> diabetes = datasets.load_diabetes()\n >>> X = diabetes.data[:150]\n >>> y = diabetes.target[:150]\n >>> lasso = linear_model.Lasso()\n >>> y_pred = cross_val_predict(lasso, X, y, cv=3)\n \"\"\"\n X, y, groups = indexable(X, y, groups)\n\n cv = check_cv(cv, y, classifier=is_classifier(estimator))\n splits = list(cv.split(X, y, groups))\n\n test_indices = np.concatenate([test for _, test in splits])\n if not _check_is_permutation(test_indices, _num_samples(X)):\n raise ValueError('cross_val_predict only works for partitions')\n\n # If classification methods produce multiple columns of output,\n # we need to manually encode classes to ensure consistent column ordering.\n encode = method in ['decision_function', 'predict_proba',\n 'predict_log_proba'] and y is not None\n if encode:\n y = np.asarray(y)\n if y.ndim == 1:\n le = LabelEncoder()\n y = le.fit_transform(y)\n elif y.ndim == 2:\n y_enc = np.zeros_like(y, dtype=int)\n for i_label in range(y.shape[1]):\n y_enc[:, i_label] = LabelEncoder().fit_transform(y[:, i_label])\n y = y_enc\n\n # We clone the estimator to make sure that all the folds are\n # independent, and that it is pickle-able.\n parallel = Parallel(n_jobs=n_jobs, verbose=verbose,\n pre_dispatch=pre_dispatch)\n predictions = parallel(delayed(_fit_and_predict)(\n clone(estimator), X, y, train, test, verbose, fit_params, method)\n for train, test in splits)\n\n inv_test_indices = np.empty(len(test_indices), dtype=int)\n inv_test_indices[test_indices] = np.arange(len(test_indices))\n\n if sp.issparse(predictions[0]):\n predictions = sp.vstack(predictions, format=predictions[0].format)\n elif encode and isinstance(predictions[0], list):\n # `predictions` is a list of method outputs from each fold.\n # If each of those is also a list, then treat this as a\n # multioutput-multiclass task. We need to separately concatenate\n # the method outputs for each label into an `n_labels` long list.\n n_labels = y.shape[1]\n concat_pred = []\n for i_label in range(n_labels):\n label_preds = np.concatenate([p[i_label] for p in predictions])\n concat_pred.append(label_preds)\n predictions = concat_pred\n else:\n predictions = np.concatenate(predictions)\n\n if isinstance(predictions, list):\n return [p[inv_test_indices] for p in predictions]\n else:\n return predictions[inv_test_indices]" + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_val_score", + "name": "cross_val_score", + "qname": "sklearn.model_selection._validation.cross_val_score", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_val_score/estimator", + "name": "estimator", + "qname": "sklearn.model_selection._validation.cross_val_score.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator object implementing 'fit'", + "default_value": "", + "description": "The object to use to fit the data." + }, + "type": { + "kind": "NamedType", + "name": "estimator object implementing 'fit'" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_val_score/X", + "name": "X", + "qname": "sklearn.model_selection._validation.cross_val_score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to fit. Can be for example a list, or an array." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_val_score/y", + "name": "y", + "qname": "sklearn.model_selection._validation.cross_val_score.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "None", + "description": "The target variable to try to predict in the case of\nsupervised learning." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_val_score/groups", + "name": "groups", + "qname": "sklearn.model_selection._validation.cross_val_score.groups", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Group labels for the samples used while splitting the dataset into\ntrain/test set. Only used in conjunction with a \"Group\" :term:`cv`\ninstance (e.g., :class:`GroupKFold`)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_val_score/scoring", + "name": "scoring", + "qname": "sklearn.model_selection._validation.cross_val_score.scoring", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "None", + "description": "A str (see model evaluation documentation) or\na scorer callable object / function with signature\n``scorer(estimator, X, y)`` which should return only\na single value.\n\nSimilar to :func:`cross_validate`\nbut only a single metric is permitted.\n\nIf None, the estimator's default scorer (if available) is used." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_val_score/cv", + "name": "cv", + "qname": "sklearn.model_selection._validation.cross_val_score.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, cross-validation generator or an iterable", + "default_value": "None", + "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross validation,\n- int, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide ` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "an iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_val_score/n_jobs", + "name": "n_jobs", + "qname": "sklearn.model_selection._validation.cross_val_score.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of jobs to run in parallel. Training the estimator and computing\nthe score are parallelized over the cross-validation splits.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_val_score/verbose", + "name": "verbose", + "qname": "sklearn.model_selection._validation.cross_val_score.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "The verbosity level." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_val_score/fit_params", + "name": "fit_params", + "qname": "sklearn.model_selection._validation.cross_val_score.fit_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Parameters to pass to the fit method of the estimator." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_val_score/pre_dispatch", + "name": "pre_dispatch", + "qname": "sklearn.model_selection._validation.cross_val_score.pre_dispatch", + "default_value": "'2*n_jobs'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or str", + "default_value": "'2*n_jobs'", + "description": "Controls the number of jobs that get dispatched during parallel\nexecution. Reducing this number can be useful to avoid an\nexplosion of memory consumption when more jobs get dispatched\nthan CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "str" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_val_score/error_score", + "name": "error_score", + "qname": "sklearn.model_selection._validation.cross_val_score.error_score", + "default_value": "np.nan", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'raise' or numeric", + "default_value": "np.nan", + "description": "Value to assign to the score if an error occurs in estimator fitting.\nIf set to 'raise', the error is raised.\nIf a numeric value is given, FitFailedWarning is raised.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'raise'" + }, + { + "kind": "NamedType", + "name": "numeric" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Evaluate a score by cross-validation\n\nRead more in the :ref:`User Guide `.", + "docstring": "Evaluate a score by cross-validation\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object implementing 'fit'\n The object to use to fit the data.\n\nX : array-like of shape (n_samples, n_features)\n The data to fit. Can be for example a list, or an array.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n The target variable to try to predict in the case of\n supervised learning.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`GroupKFold`).\n\nscoring : str or callable, default=None\n A str (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)`` which should return only\n a single value.\n\n Similar to :func:`cross_validate`\n but only a single metric is permitted.\n\n If None, the estimator's default scorer (if available) is used.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - int, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used. These splitters are instantiated\n with `shuffle=False` so the splits will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel. Training the estimator and computing\n the score are parallelized over the cross-validation splits.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : int, default=0\n The verbosity level.\n\nfit_params : dict, default=None\n Parameters to pass to the fit method of the estimator.\n\npre_dispatch : int or str, default='2*n_jobs'\n Controls the number of jobs that get dispatched during parallel\n execution. Reducing this number can be useful to avoid an\n explosion of memory consumption when more jobs get dispatched\n than CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'\n\nerror_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised.\n If a numeric value is given, FitFailedWarning is raised.\n\n .. versionadded:: 0.20\n\nReturns\n-------\nscores : ndarray of float of shape=(len(list(cv)),)\n Array of scores of the estimator for each run of the cross validation.\n\nExamples\n--------\n>>> from sklearn import datasets, linear_model\n>>> from sklearn.model_selection import cross_val_score\n>>> diabetes = datasets.load_diabetes()\n>>> X = diabetes.data[:150]\n>>> y = diabetes.target[:150]\n>>> lasso = linear_model.Lasso()\n>>> print(cross_val_score(lasso, X, y, cv=3))\n[0.33150734 0.08022311 0.03531764]\n\nSee Also\n---------\ncross_validate : To run cross-validation on multiple metrics and also to\n return train scores, fit times and score times.\n\ncross_val_predict : Get predictions from each split of cross-validation for\n diagnostic purposes.\n\nsklearn.metrics.make_scorer : Make a scorer from a performance metric or\n loss function.", + "code": "@_deprecate_positional_args\ndef cross_val_score(estimator, X, y=None, *, groups=None, scoring=None,\n cv=None, n_jobs=None, verbose=0, fit_params=None,\n pre_dispatch='2*n_jobs', error_score=np.nan):\n \"\"\"Evaluate a score by cross-validation\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n estimator : estimator object implementing 'fit'\n The object to use to fit the data.\n\n X : array-like of shape (n_samples, n_features)\n The data to fit. Can be for example a list, or an array.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs), \\\n default=None\n The target variable to try to predict in the case of\n supervised learning.\n\n groups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`GroupKFold`).\n\n scoring : str or callable, default=None\n A str (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)`` which should return only\n a single value.\n\n Similar to :func:`cross_validate`\n but only a single metric is permitted.\n\n If None, the estimator's default scorer (if available) is used.\n\n cv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - int, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used. These splitters are instantiated\n with `shuffle=False` so the splits will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\n n_jobs : int, default=None\n Number of jobs to run in parallel. Training the estimator and computing\n the score are parallelized over the cross-validation splits.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n verbose : int, default=0\n The verbosity level.\n\n fit_params : dict, default=None\n Parameters to pass to the fit method of the estimator.\n\n pre_dispatch : int or str, default='2*n_jobs'\n Controls the number of jobs that get dispatched during parallel\n execution. Reducing this number can be useful to avoid an\n explosion of memory consumption when more jobs get dispatched\n than CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'\n\n error_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised.\n If a numeric value is given, FitFailedWarning is raised.\n\n .. versionadded:: 0.20\n\n Returns\n -------\n scores : ndarray of float of shape=(len(list(cv)),)\n Array of scores of the estimator for each run of the cross validation.\n\n Examples\n --------\n >>> from sklearn import datasets, linear_model\n >>> from sklearn.model_selection import cross_val_score\n >>> diabetes = datasets.load_diabetes()\n >>> X = diabetes.data[:150]\n >>> y = diabetes.target[:150]\n >>> lasso = linear_model.Lasso()\n >>> print(cross_val_score(lasso, X, y, cv=3))\n [0.33150734 0.08022311 0.03531764]\n\n See Also\n ---------\n cross_validate : To run cross-validation on multiple metrics and also to\n return train scores, fit times and score times.\n\n cross_val_predict : Get predictions from each split of cross-validation for\n diagnostic purposes.\n\n sklearn.metrics.make_scorer : Make a scorer from a performance metric or\n loss function.\n\n \"\"\"\n # To ensure multimetric format is not supported\n scorer = check_scoring(estimator, scoring=scoring)\n\n cv_results = cross_validate(estimator=estimator, X=X, y=y, groups=groups,\n scoring={'score': scorer}, cv=cv,\n n_jobs=n_jobs, verbose=verbose,\n fit_params=fit_params,\n pre_dispatch=pre_dispatch,\n error_score=error_score)\n return cv_results['test_score']" + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_validate", + "name": "cross_validate", + "qname": "sklearn.model_selection._validation.cross_validate", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_validate/estimator", + "name": "estimator", + "qname": "sklearn.model_selection._validation.cross_validate.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator object implementing 'fit'", + "default_value": "", + "description": "The object to use to fit the data." + }, + "type": { + "kind": "NamedType", + "name": "estimator object implementing 'fit'" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_validate/X", + "name": "X", + "qname": "sklearn.model_selection._validation.cross_validate.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to fit. Can be for example a list, or an array." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_validate/y", + "name": "y", + "qname": "sklearn.model_selection._validation.cross_validate.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "None", + "description": "The target variable to try to predict in the case of\nsupervised learning." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_validate/groups", + "name": "groups", + "qname": "sklearn.model_selection._validation.cross_validate.groups", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Group labels for the samples used while splitting the dataset into\ntrain/test set. Only used in conjunction with a \"Group\" :term:`cv`\ninstance (e.g., :class:`GroupKFold`)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_validate/scoring", + "name": "scoring", + "qname": "sklearn.model_selection._validation.cross_validate.scoring", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str, callable, list, tuple, or dict", + "default_value": "None", + "description": "Strategy to evaluate the performance of the cross-validated model on\nthe test set.\n\nIf `scoring` represents a single score, one can use:\n\n- a single string (see :ref:`scoring_parameter`);\n- a callable (see :ref:`scoring`) that returns a single value.\n\nIf `scoring` represents multiple scores, one can use:\n\n- a list or tuple of unique strings;\n- a callable returning a dictionary where the keys are the metric\n names and the values are the metric scores;\n- a dictionary with metric names as keys and callables a values.\n\nSee :ref:`multimetric_grid_search` for an example." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + }, + { + "kind": "NamedType", + "name": "list" + }, + { + "kind": "NamedType", + "name": "tuple" + }, + { + "kind": "NamedType", + "name": "dict" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_validate/cv", + "name": "cv", + "qname": "sklearn.model_selection._validation.cross_validate.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, cross-validation generator or an iterable", + "default_value": "None", + "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross validation,\n- int, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`.Fold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide ` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "an iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_validate/n_jobs", + "name": "n_jobs", + "qname": "sklearn.model_selection._validation.cross_validate.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of jobs to run in parallel. Training the estimator and computing\nthe score are parallelized over the cross-validation splits.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_validate/verbose", + "name": "verbose", + "qname": "sklearn.model_selection._validation.cross_validate.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "The verbosity level." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_validate/fit_params", + "name": "fit_params", + "qname": "sklearn.model_selection._validation.cross_validate.fit_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Parameters to pass to the fit method of the estimator." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_validate/pre_dispatch", + "name": "pre_dispatch", + "qname": "sklearn.model_selection._validation.cross_validate.pre_dispatch", + "default_value": "'2*n_jobs'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or str", + "default_value": "'2*n_jobs'", + "description": "Controls the number of jobs that get dispatched during parallel\nexecution. Reducing this number can be useful to avoid an\nexplosion of memory consumption when more jobs get dispatched\nthan CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "str" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_validate/return_train_score", + "name": "return_train_score", + "qname": "sklearn.model_selection._validation.cross_validate.return_train_score", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to include train scores.\nComputing training scores is used to get insights on how different\nparameter settings impact the overfitting/underfitting trade-off.\nHowever computing the scores on the training set can be computationally\nexpensive and is not strictly required to select the parameters that\nyield the best generalization performance.\n\n.. versionadded:: 0.19\n\n.. versionchanged:: 0.21\n Default value was changed from ``True`` to ``False``" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_validate/return_estimator", + "name": "return_estimator", + "qname": "sklearn.model_selection._validation.cross_validate.return_estimator", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to return the estimators fitted on each split.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/cross_validate/error_score", + "name": "error_score", + "qname": "sklearn.model_selection._validation.cross_validate.error_score", + "default_value": "np.nan", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'raise' or numeric", + "default_value": "np.nan", + "description": "Value to assign to the score if an error occurs in estimator fitting.\nIf set to 'raise', the error is raised.\nIf a numeric value is given, FitFailedWarning is raised.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'raise'" + }, + { + "kind": "NamedType", + "name": "numeric" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Evaluate metric(s) by cross-validation and also record fit/score times.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Evaluate metric(s) by cross-validation and also record fit/score times.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object implementing 'fit'\n The object to use to fit the data.\n\nX : array-like of shape (n_samples, n_features)\n The data to fit. Can be for example a list, or an array.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n The target variable to try to predict in the case of\n supervised learning.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`GroupKFold`).\n\nscoring : str, callable, list, tuple, or dict, default=None\n Strategy to evaluate the performance of the cross-validated model on\n the test set.\n\n If `scoring` represents a single score, one can use:\n\n - a single string (see :ref:`scoring_parameter`);\n - a callable (see :ref:`scoring`) that returns a single value.\n\n If `scoring` represents multiple scores, one can use:\n\n - a list or tuple of unique strings;\n - a callable returning a dictionary where the keys are the metric\n names and the values are the metric scores;\n - a dictionary with metric names as keys and callables a values.\n\n See :ref:`multimetric_grid_search` for an example.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - int, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`.Fold` is used. These splitters are instantiated\n with `shuffle=False` so the splits will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel. Training the estimator and computing\n the score are parallelized over the cross-validation splits.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : int, default=0\n The verbosity level.\n\nfit_params : dict, default=None\n Parameters to pass to the fit method of the estimator.\n\npre_dispatch : int or str, default='2*n_jobs'\n Controls the number of jobs that get dispatched during parallel\n execution. Reducing this number can be useful to avoid an\n explosion of memory consumption when more jobs get dispatched\n than CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'\n\nreturn_train_score : bool, default=False\n Whether to include train scores.\n Computing training scores is used to get insights on how different\n parameter settings impact the overfitting/underfitting trade-off.\n However computing the scores on the training set can be computationally\n expensive and is not strictly required to select the parameters that\n yield the best generalization performance.\n\n .. versionadded:: 0.19\n\n .. versionchanged:: 0.21\n Default value was changed from ``True`` to ``False``\n\nreturn_estimator : bool, default=False\n Whether to return the estimators fitted on each split.\n\n .. versionadded:: 0.20\n\nerror_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised.\n If a numeric value is given, FitFailedWarning is raised.\n\n .. versionadded:: 0.20\n\nReturns\n-------\nscores : dict of float arrays of shape (n_splits,)\n Array of scores of the estimator for each run of the cross validation.\n\n A dict of arrays containing the score/time arrays for each scorer is\n returned. The possible keys for this ``dict`` are:\n\n ``test_score``\n The score array for test scores on each cv split.\n Suffix ``_score`` in ``test_score`` changes to a specific\n metric like ``test_r2`` or ``test_auc`` if there are\n multiple scoring metrics in the scoring parameter.\n ``train_score``\n The score array for train scores on each cv split.\n Suffix ``_score`` in ``train_score`` changes to a specific\n metric like ``train_r2`` or ``train_auc`` if there are\n multiple scoring metrics in the scoring parameter.\n This is available only if ``return_train_score`` parameter\n is ``True``.\n ``fit_time``\n The time for fitting the estimator on the train\n set for each cv split.\n ``score_time``\n The time for scoring the estimator on the test set for each\n cv split. (Note time for scoring on the train set is not\n included even if ``return_train_score`` is set to ``True``\n ``estimator``\n The estimator objects for each cv split.\n This is available only if ``return_estimator`` parameter\n is set to ``True``.\n\nExamples\n--------\n>>> from sklearn import datasets, linear_model\n>>> from sklearn.model_selection import cross_validate\n>>> from sklearn.metrics import make_scorer\n>>> from sklearn.metrics import confusion_matrix\n>>> from sklearn.svm import LinearSVC\n>>> diabetes = datasets.load_diabetes()\n>>> X = diabetes.data[:150]\n>>> y = diabetes.target[:150]\n>>> lasso = linear_model.Lasso()\n\nSingle metric evaluation using ``cross_validate``\n\n>>> cv_results = cross_validate(lasso, X, y, cv=3)\n>>> sorted(cv_results.keys())\n['fit_time', 'score_time', 'test_score']\n>>> cv_results['test_score']\narray([0.33150734, 0.08022311, 0.03531764])\n\nMultiple metric evaluation using ``cross_validate``\n(please refer the ``scoring`` parameter doc for more information)\n\n>>> scores = cross_validate(lasso, X, y, cv=3,\n... scoring=('r2', 'neg_mean_squared_error'),\n... return_train_score=True)\n>>> print(scores['test_neg_mean_squared_error'])\n[-3635.5... -3573.3... -6114.7...]\n>>> print(scores['train_r2'])\n[0.28010158 0.39088426 0.22784852]\n\nSee Also\n---------\ncross_val_score : Run cross-validation for single metric evaluation.\n\ncross_val_predict : Get predictions from each split of cross-validation for\n diagnostic purposes.\n\nsklearn.metrics.make_scorer : Make a scorer from a performance metric or\n loss function.", + "code": "@_deprecate_positional_args\ndef cross_validate(estimator, X, y=None, *, groups=None, scoring=None, cv=None,\n n_jobs=None, verbose=0, fit_params=None,\n pre_dispatch='2*n_jobs', return_train_score=False,\n return_estimator=False, error_score=np.nan):\n \"\"\"Evaluate metric(s) by cross-validation and also record fit/score times.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n estimator : estimator object implementing 'fit'\n The object to use to fit the data.\n\n X : array-like of shape (n_samples, n_features)\n The data to fit. Can be for example a list, or an array.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs), \\\n default=None\n The target variable to try to predict in the case of\n supervised learning.\n\n groups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`GroupKFold`).\n\n scoring : str, callable, list, tuple, or dict, default=None\n Strategy to evaluate the performance of the cross-validated model on\n the test set.\n\n If `scoring` represents a single score, one can use:\n\n - a single string (see :ref:`scoring_parameter`);\n - a callable (see :ref:`scoring`) that returns a single value.\n\n If `scoring` represents multiple scores, one can use:\n\n - a list or tuple of unique strings;\n - a callable returning a dictionary where the keys are the metric\n names and the values are the metric scores;\n - a dictionary with metric names as keys and callables a values.\n\n See :ref:`multimetric_grid_search` for an example.\n\n cv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - int, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`.Fold` is used. These splitters are instantiated\n with `shuffle=False` so the splits will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\n n_jobs : int, default=None\n Number of jobs to run in parallel. Training the estimator and computing\n the score are parallelized over the cross-validation splits.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n verbose : int, default=0\n The verbosity level.\n\n fit_params : dict, default=None\n Parameters to pass to the fit method of the estimator.\n\n pre_dispatch : int or str, default='2*n_jobs'\n Controls the number of jobs that get dispatched during parallel\n execution. Reducing this number can be useful to avoid an\n explosion of memory consumption when more jobs get dispatched\n than CPUs can process. This parameter can be:\n\n - None, in which case all the jobs are immediately\n created and spawned. Use this for lightweight and\n fast-running jobs, to avoid delays due to on-demand\n spawning of the jobs\n\n - An int, giving the exact number of total jobs that are\n spawned\n\n - A str, giving an expression as a function of n_jobs,\n as in '2*n_jobs'\n\n return_train_score : bool, default=False\n Whether to include train scores.\n Computing training scores is used to get insights on how different\n parameter settings impact the overfitting/underfitting trade-off.\n However computing the scores on the training set can be computationally\n expensive and is not strictly required to select the parameters that\n yield the best generalization performance.\n\n .. versionadded:: 0.19\n\n .. versionchanged:: 0.21\n Default value was changed from ``True`` to ``False``\n\n return_estimator : bool, default=False\n Whether to return the estimators fitted on each split.\n\n .. versionadded:: 0.20\n\n error_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised.\n If a numeric value is given, FitFailedWarning is raised.\n\n .. versionadded:: 0.20\n\n Returns\n -------\n scores : dict of float arrays of shape (n_splits,)\n Array of scores of the estimator for each run of the cross validation.\n\n A dict of arrays containing the score/time arrays for each scorer is\n returned. The possible keys for this ``dict`` are:\n\n ``test_score``\n The score array for test scores on each cv split.\n Suffix ``_score`` in ``test_score`` changes to a specific\n metric like ``test_r2`` or ``test_auc`` if there are\n multiple scoring metrics in the scoring parameter.\n ``train_score``\n The score array for train scores on each cv split.\n Suffix ``_score`` in ``train_score`` changes to a specific\n metric like ``train_r2`` or ``train_auc`` if there are\n multiple scoring metrics in the scoring parameter.\n This is available only if ``return_train_score`` parameter\n is ``True``.\n ``fit_time``\n The time for fitting the estimator on the train\n set for each cv split.\n ``score_time``\n The time for scoring the estimator on the test set for each\n cv split. (Note time for scoring on the train set is not\n included even if ``return_train_score`` is set to ``True``\n ``estimator``\n The estimator objects for each cv split.\n This is available only if ``return_estimator`` parameter\n is set to ``True``.\n\n Examples\n --------\n >>> from sklearn import datasets, linear_model\n >>> from sklearn.model_selection import cross_validate\n >>> from sklearn.metrics import make_scorer\n >>> from sklearn.metrics import confusion_matrix\n >>> from sklearn.svm import LinearSVC\n >>> diabetes = datasets.load_diabetes()\n >>> X = diabetes.data[:150]\n >>> y = diabetes.target[:150]\n >>> lasso = linear_model.Lasso()\n\n Single metric evaluation using ``cross_validate``\n\n >>> cv_results = cross_validate(lasso, X, y, cv=3)\n >>> sorted(cv_results.keys())\n ['fit_time', 'score_time', 'test_score']\n >>> cv_results['test_score']\n array([0.33150734, 0.08022311, 0.03531764])\n\n Multiple metric evaluation using ``cross_validate``\n (please refer the ``scoring`` parameter doc for more information)\n\n >>> scores = cross_validate(lasso, X, y, cv=3,\n ... scoring=('r2', 'neg_mean_squared_error'),\n ... return_train_score=True)\n >>> print(scores['test_neg_mean_squared_error'])\n [-3635.5... -3573.3... -6114.7...]\n >>> print(scores['train_r2'])\n [0.28010158 0.39088426 0.22784852]\n\n See Also\n ---------\n cross_val_score : Run cross-validation for single metric evaluation.\n\n cross_val_predict : Get predictions from each split of cross-validation for\n diagnostic purposes.\n\n sklearn.metrics.make_scorer : Make a scorer from a performance metric or\n loss function.\n\n \"\"\"\n X, y, groups = indexable(X, y, groups)\n\n cv = check_cv(cv, y, classifier=is_classifier(estimator))\n\n if callable(scoring):\n scorers = scoring\n elif scoring is None or isinstance(scoring, str):\n scorers = check_scoring(estimator, scoring)\n else:\n scorers = _check_multimetric_scoring(estimator, scoring)\n\n # We clone the estimator to make sure that all the folds are\n # independent, and that it is pickle-able.\n parallel = Parallel(n_jobs=n_jobs, verbose=verbose,\n pre_dispatch=pre_dispatch)\n results = parallel(\n delayed(_fit_and_score)(\n clone(estimator), X, y, scorers, train, test, verbose, None,\n fit_params, return_train_score=return_train_score,\n return_times=True, return_estimator=return_estimator,\n error_score=error_score)\n for train, test in cv.split(X, y, groups))\n\n # For callabe scoring, the return type is only know after calling. If the\n # return type is a dictionary, the error scores can now be inserted with\n # the correct key.\n if callable(scoring):\n _insert_error_scores(results, error_score)\n\n results = _aggregate_score_dicts(results)\n\n ret = {}\n ret['fit_time'] = results[\"fit_time\"]\n ret['score_time'] = results[\"score_time\"]\n\n if return_estimator:\n ret['estimator'] = results[\"estimator\"]\n\n test_scores_dict = _normalize_score_results(results[\"test_scores\"])\n if return_train_score:\n train_scores_dict = _normalize_score_results(results[\"train_scores\"])\n\n for name in test_scores_dict:\n ret['test_%s' % name] = test_scores_dict[name]\n if return_train_score:\n key = 'train_%s' % name\n ret[key] = train_scores_dict[name]\n\n return ret" + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/learning_curve", + "name": "learning_curve", + "qname": "sklearn.model_selection._validation.learning_curve", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._validation/learning_curve/estimator", + "name": "estimator", + "qname": "sklearn.model_selection._validation.learning_curve.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object type that implements the \"fit\" and \"predict\" methods", + "default_value": "", + "description": "An object of that type which is cloned for each validation." + }, + "type": { + "kind": "NamedType", + "name": "object type that implements the \"fit\" and \"predict\" methods" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/learning_curve/X", + "name": "X", + "qname": "sklearn.model_selection._validation.learning_curve.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/learning_curve/y", + "name": "y", + "qname": "sklearn.model_selection._validation.learning_curve.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "Target relative to X for classification or regression;\nNone for unsupervised learning." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/learning_curve/groups", + "name": "groups", + "qname": "sklearn.model_selection._validation.learning_curve.groups", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Group labels for the samples used while splitting the dataset into\ntrain/test set. Only used in conjunction with a \"Group\" :term:`cv`\ninstance (e.g., :class:`GroupKFold`)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/learning_curve/train_sizes", + "name": "train_sizes", + "qname": "sklearn.model_selection._validation.learning_curve.train_sizes", + "default_value": "np.linspace(0.1, 1.0, 5)", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_ticks,)", + "default_value": "np.linspace(0.1, 1.0, 5)", + "description": "Relative or absolute numbers of training examples that will be used to\ngenerate the learning curve. If the dtype is float, it is regarded as a\nfraction of the maximum size of the training set (that is determined\nby the selected validation method), i.e. it has to be within (0, 1].\nOtherwise it is interpreted as absolute sizes of the training sets.\nNote that for classification the number of samples usually have to\nbe big enough to contain at least one sample from each class." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_ticks,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/learning_curve/cv", + "name": "cv", + "qname": "sklearn.model_selection._validation.learning_curve.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, cross-validation generator or an iterable", + "default_value": "None", + "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross validation,\n- int, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide ` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "an iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/learning_curve/scoring", + "name": "scoring", + "qname": "sklearn.model_selection._validation.learning_curve.scoring", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "None", + "description": "A str (see model evaluation documentation) or\na scorer callable object / function with signature\n``scorer(estimator, X, y)``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/learning_curve/exploit_incremental_learning", + "name": "exploit_incremental_learning", + "qname": "sklearn.model_selection._validation.learning_curve.exploit_incremental_learning", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If the estimator supports incremental learning, this will be\nused to speed up fitting for different training set sizes." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/learning_curve/n_jobs", + "name": "n_jobs", + "qname": "sklearn.model_selection._validation.learning_curve.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of jobs to run in parallel. Training the estimator and computing\nthe score are parallelized over the different training and test sets.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/learning_curve/pre_dispatch", + "name": "pre_dispatch", + "qname": "sklearn.model_selection._validation.learning_curve.pre_dispatch", + "default_value": "'all'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or str", + "default_value": "'all'", + "description": "Number of predispatched jobs for parallel execution (default is\nall). The option can reduce the allocated memory. The str can\nbe an expression like '2*n_jobs'." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "str" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/learning_curve/verbose", + "name": "verbose", + "qname": "sklearn.model_selection._validation.learning_curve.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Controls the verbosity: the higher, the more messages." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/learning_curve/shuffle", + "name": "shuffle", + "qname": "sklearn.model_selection._validation.learning_curve.shuffle", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to shuffle training data before taking prefixes of it\nbased on``train_sizes``." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/learning_curve/random_state", + "name": "random_state", + "qname": "sklearn.model_selection._validation.learning_curve.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Used when ``shuffle`` is True. Pass an int for reproducible\noutput across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/learning_curve/error_score", + "name": "error_score", + "qname": "sklearn.model_selection._validation.learning_curve.error_score", + "default_value": "np.nan", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'raise' or numeric", + "default_value": "np.nan", + "description": "Value to assign to the score if an error occurs in estimator fitting.\nIf set to 'raise', the error is raised.\nIf a numeric value is given, FitFailedWarning is raised.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'raise'" + }, + { + "kind": "NamedType", + "name": "numeric" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/learning_curve/return_times", + "name": "return_times", + "qname": "sklearn.model_selection._validation.learning_curve.return_times", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to return the fit and score times." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/learning_curve/fit_params", + "name": "fit_params", + "qname": "sklearn.model_selection._validation.learning_curve.fit_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Parameters to pass to the fit method of the estimator.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Learning curve.\n\nDetermines cross-validated training and test scores for different training\nset sizes.\n\nA cross-validation generator splits the whole dataset k times in training\nand test data. Subsets of the training set with varying sizes will be used\nto train the estimator and a score for each training subset size and the\ntest set will be computed. Afterwards, the scores will be averaged over\nall k runs for each training subset size.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Learning curve.\n\nDetermines cross-validated training and test scores for different training\nset sizes.\n\nA cross-validation generator splits the whole dataset k times in training\nand test data. Subsets of the training set with varying sizes will be used\nto train the estimator and a score for each training subset size and the\ntest set will be computed. Afterwards, the scores will be averaged over\nall k runs for each training subset size.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : object type that implements the \"fit\" and \"predict\" methods\n An object of that type which is cloned for each validation.\n\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`GroupKFold`).\n\ntrain_sizes : array-like of shape (n_ticks,), default=np.linspace(0.1, 1.0, 5)\n Relative or absolute numbers of training examples that will be used to\n generate the learning curve. If the dtype is float, it is regarded as a\n fraction of the maximum size of the training set (that is determined\n by the selected validation method), i.e. it has to be within (0, 1].\n Otherwise it is interpreted as absolute sizes of the training sets.\n Note that for classification the number of samples usually have to\n be big enough to contain at least one sample from each class.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - int, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used. These splitters are instantiated\n with `shuffle=False` so the splits will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nscoring : str or callable, default=None\n A str (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n\nexploit_incremental_learning : bool, default=False\n If the estimator supports incremental learning, this will be\n used to speed up fitting for different training set sizes.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel. Training the estimator and computing\n the score are parallelized over the different training and test sets.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\npre_dispatch : int or str, default='all'\n Number of predispatched jobs for parallel execution (default is\n all). The option can reduce the allocated memory. The str can\n be an expression like '2*n_jobs'.\n\nverbose : int, default=0\n Controls the verbosity: the higher, the more messages.\n\nshuffle : bool, default=False\n Whether to shuffle training data before taking prefixes of it\n based on``train_sizes``.\n\nrandom_state : int, RandomState instance or None, default=None\n Used when ``shuffle`` is True. Pass an int for reproducible\n output across multiple function calls.\n See :term:`Glossary `.\n\nerror_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised.\n If a numeric value is given, FitFailedWarning is raised.\n\n .. versionadded:: 0.20\n\nreturn_times : bool, default=False\n Whether to return the fit and score times.\n\nfit_params : dict, default=None\n Parameters to pass to the fit method of the estimator.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ntrain_sizes_abs : array of shape (n_unique_ticks,)\n Numbers of training examples that has been used to generate the\n learning curve. Note that the number of ticks might be less\n than n_ticks because duplicate entries will be removed.\n\ntrain_scores : array of shape (n_ticks, n_cv_folds)\n Scores on training sets.\n\ntest_scores : array of shape (n_ticks, n_cv_folds)\n Scores on test set.\n\nfit_times : array of shape (n_ticks, n_cv_folds)\n Times spent for fitting in seconds. Only present if ``return_times``\n is True.\n\nscore_times : array of shape (n_ticks, n_cv_folds)\n Times spent for scoring in seconds. Only present if ``return_times``\n is True.\n\nNotes\n-----\nSee :ref:`examples/model_selection/plot_learning_curve.py\n`", + "code": "@_deprecate_positional_args\ndef learning_curve(estimator, X, y, *, groups=None,\n train_sizes=np.linspace(0.1, 1.0, 5), cv=None,\n scoring=None, exploit_incremental_learning=False,\n n_jobs=None, pre_dispatch=\"all\", verbose=0, shuffle=False,\n random_state=None, error_score=np.nan, return_times=False,\n fit_params=None):\n \"\"\"Learning curve.\n\n Determines cross-validated training and test scores for different training\n set sizes.\n\n A cross-validation generator splits the whole dataset k times in training\n and test data. Subsets of the training set with varying sizes will be used\n to train the estimator and a score for each training subset size and the\n test set will be computed. Afterwards, the scores will be averaged over\n all k runs for each training subset size.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n estimator : object type that implements the \"fit\" and \"predict\" methods\n An object of that type which is cloned for each validation.\n\n X : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\n groups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`GroupKFold`).\n\n train_sizes : array-like of shape (n_ticks,), \\\n default=np.linspace(0.1, 1.0, 5)\n Relative or absolute numbers of training examples that will be used to\n generate the learning curve. If the dtype is float, it is regarded as a\n fraction of the maximum size of the training set (that is determined\n by the selected validation method), i.e. it has to be within (0, 1].\n Otherwise it is interpreted as absolute sizes of the training sets.\n Note that for classification the number of samples usually have to\n be big enough to contain at least one sample from each class.\n\n cv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - int, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used. These splitters are instantiated\n with `shuffle=False` so the splits will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\n scoring : str or callable, default=None\n A str (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n\n exploit_incremental_learning : bool, default=False\n If the estimator supports incremental learning, this will be\n used to speed up fitting for different training set sizes.\n\n n_jobs : int, default=None\n Number of jobs to run in parallel. Training the estimator and computing\n the score are parallelized over the different training and test sets.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n pre_dispatch : int or str, default='all'\n Number of predispatched jobs for parallel execution (default is\n all). The option can reduce the allocated memory. The str can\n be an expression like '2*n_jobs'.\n\n verbose : int, default=0\n Controls the verbosity: the higher, the more messages.\n\n shuffle : bool, default=False\n Whether to shuffle training data before taking prefixes of it\n based on``train_sizes``.\n\n random_state : int, RandomState instance or None, default=None\n Used when ``shuffle`` is True. Pass an int for reproducible\n output across multiple function calls.\n See :term:`Glossary `.\n\n error_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised.\n If a numeric value is given, FitFailedWarning is raised.\n\n .. versionadded:: 0.20\n\n return_times : bool, default=False\n Whether to return the fit and score times.\n\n fit_params : dict, default=None\n Parameters to pass to the fit method of the estimator.\n\n .. versionadded:: 0.24\n\n Returns\n -------\n train_sizes_abs : array of shape (n_unique_ticks,)\n Numbers of training examples that has been used to generate the\n learning curve. Note that the number of ticks might be less\n than n_ticks because duplicate entries will be removed.\n\n train_scores : array of shape (n_ticks, n_cv_folds)\n Scores on training sets.\n\n test_scores : array of shape (n_ticks, n_cv_folds)\n Scores on test set.\n\n fit_times : array of shape (n_ticks, n_cv_folds)\n Times spent for fitting in seconds. Only present if ``return_times``\n is True.\n\n score_times : array of shape (n_ticks, n_cv_folds)\n Times spent for scoring in seconds. Only present if ``return_times``\n is True.\n\n Notes\n -----\n See :ref:`examples/model_selection/plot_learning_curve.py\n `\n \"\"\"\n if exploit_incremental_learning and not hasattr(estimator, \"partial_fit\"):\n raise ValueError(\"An estimator must support the partial_fit interface \"\n \"to exploit incremental learning\")\n X, y, groups = indexable(X, y, groups)\n\n cv = check_cv(cv, y, classifier=is_classifier(estimator))\n # Store it as list as we will be iterating over the list multiple times\n cv_iter = list(cv.split(X, y, groups))\n\n scorer = check_scoring(estimator, scoring=scoring)\n\n n_max_training_samples = len(cv_iter[0][0])\n # Because the lengths of folds can be significantly different, it is\n # not guaranteed that we use all of the available training data when we\n # use the first 'n_max_training_samples' samples.\n train_sizes_abs = _translate_train_sizes(train_sizes,\n n_max_training_samples)\n n_unique_ticks = train_sizes_abs.shape[0]\n if verbose > 0:\n print(\"[learning_curve] Training set sizes: \" + str(train_sizes_abs))\n\n parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch,\n verbose=verbose)\n\n if shuffle:\n rng = check_random_state(random_state)\n cv_iter = ((rng.permutation(train), test) for train, test in cv_iter)\n\n if exploit_incremental_learning:\n classes = np.unique(y) if is_classifier(estimator) else None\n out = parallel(delayed(_incremental_fit_estimator)(\n clone(estimator), X, y, classes, train, test, train_sizes_abs,\n scorer, verbose, return_times, error_score=error_score,\n fit_params=fit_params)\n for train, test in cv_iter\n )\n out = np.asarray(out).transpose((2, 1, 0))\n else:\n train_test_proportions = []\n for train, test in cv_iter:\n for n_train_samples in train_sizes_abs:\n train_test_proportions.append((train[:n_train_samples], test))\n\n results = parallel(delayed(_fit_and_score)(\n clone(estimator), X, y, scorer, train, test, verbose,\n parameters=None, fit_params=fit_params, return_train_score=True,\n error_score=error_score, return_times=return_times)\n for train, test in train_test_proportions\n )\n results = _aggregate_score_dicts(results)\n train_scores = results[\"train_scores\"].reshape(-1, n_unique_ticks).T\n test_scores = results[\"test_scores\"].reshape(-1, n_unique_ticks).T\n out = [train_scores, test_scores]\n\n if return_times:\n fit_times = results[\"fit_time\"].reshape(-1, n_unique_ticks).T\n score_times = results[\"score_time\"].reshape(-1, n_unique_ticks).T\n out.extend([fit_times, score_times])\n\n ret = train_sizes_abs, out[0], out[1]\n\n if return_times:\n ret = ret + (out[2], out[3])\n\n return ret" + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/permutation_test_score", + "name": "permutation_test_score", + "qname": "sklearn.model_selection._validation.permutation_test_score", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._validation/permutation_test_score/estimator", + "name": "estimator", + "qname": "sklearn.model_selection._validation.permutation_test_score.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator object implementing 'fit'", + "default_value": "", + "description": "The object to use to fit the data." + }, + "type": { + "kind": "NamedType", + "name": "estimator object implementing 'fit'" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/permutation_test_score/X", + "name": "X", + "qname": "sklearn.model_selection._validation.permutation_test_score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape at least 2D", + "default_value": "", + "description": "The data to fit." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape at least 2D" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/permutation_test_score/y", + "name": "y", + "qname": "sklearn.model_selection._validation.permutation_test_score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs) or None", + "default_value": "", + "description": "The target variable to try to predict in the case of\nsupervised learning." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/permutation_test_score/groups", + "name": "groups", + "qname": "sklearn.model_selection._validation.permutation_test_score.groups", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Labels to constrain permutation within groups, i.e. ``y`` values\nare permuted among samples with the same group identifier.\nWhen not specified, ``y`` values are permuted among all samples.\n\nWhen a grouped cross-validator is used, the group labels are\nalso passed on to the ``split`` method of the cross-validator. The\ncross-validator uses them for grouping the samples while splitting\nthe dataset into train/test set." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/permutation_test_score/cv", + "name": "cv", + "qname": "sklearn.model_selection._validation.permutation_test_score.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, cross-validation generator or an iterable", + "default_value": "None", + "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross validation,\n- int, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide ` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "an iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/permutation_test_score/n_permutations", + "name": "n_permutations", + "qname": "sklearn.model_selection._validation.permutation_test_score.n_permutations", + "default_value": "100", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Number of times to permute ``y``." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/permutation_test_score/n_jobs", + "name": "n_jobs", + "qname": "sklearn.model_selection._validation.permutation_test_score.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of jobs to run in parallel. Training the estimator and computing\nthe cross-validated score are parallelized over the permutations.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/permutation_test_score/random_state", + "name": "random_state", + "qname": "sklearn.model_selection._validation.permutation_test_score.random_state", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "0", + "description": "Pass an int for reproducible output for permutation of\n``y`` values among samples. See :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/permutation_test_score/verbose", + "name": "verbose", + "qname": "sklearn.model_selection._validation.permutation_test_score.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "The verbosity level." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/permutation_test_score/scoring", + "name": "scoring", + "qname": "sklearn.model_selection._validation.permutation_test_score.scoring", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "None", + "description": "A single str (see :ref:`scoring_parameter`) or a callable\n(see :ref:`scoring`) to evaluate the predictions on the test set.\n\nIf None the estimator's score method is used." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/permutation_test_score/fit_params", + "name": "fit_params", + "qname": "sklearn.model_selection._validation.permutation_test_score.fit_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Parameters to pass to the fit method of the estimator.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Evaluate the significance of a cross-validated score with permutations\n\nPermutes targets to generate 'randomized data' and compute the empirical\np-value against the null hypothesis that features and targets are\nindependent.\n\nThe p-value represents the fraction of randomized data sets where the\nestimator performed as well or better than in the original data. A small\np-value suggests that there is a real dependency between features and\ntargets which has been used by the estimator to give good predictions.\nA large p-value may be due to lack of real dependency between features\nand targets or the estimator was not able to use the dependency to\ngive good predictions.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Evaluate the significance of a cross-validated score with permutations\n\nPermutes targets to generate 'randomized data' and compute the empirical\np-value against the null hypothesis that features and targets are\nindependent.\n\nThe p-value represents the fraction of randomized data sets where the\nestimator performed as well or better than in the original data. A small\np-value suggests that there is a real dependency between features and\ntargets which has been used by the estimator to give good predictions.\nA large p-value may be due to lack of real dependency between features\nand targets or the estimator was not able to use the dependency to\ngive good predictions.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object implementing 'fit'\n The object to use to fit the data.\n\nX : array-like of shape at least 2D\n The data to fit.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs) or None\n The target variable to try to predict in the case of\n supervised learning.\n\ngroups : array-like of shape (n_samples,), default=None\n Labels to constrain permutation within groups, i.e. ``y`` values\n are permuted among samples with the same group identifier.\n When not specified, ``y`` values are permuted among all samples.\n\n When a grouped cross-validator is used, the group labels are\n also passed on to the ``split`` method of the cross-validator. The\n cross-validator uses them for grouping the samples while splitting\n the dataset into train/test set.\n\nscoring : str or callable, default=None\n A single str (see :ref:`scoring_parameter`) or a callable\n (see :ref:`scoring`) to evaluate the predictions on the test set.\n\n If None the estimator's score method is used.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - int, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used. These splitters are instantiated\n with `shuffle=False` so the splits will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nn_permutations : int, default=100\n Number of times to permute ``y``.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel. Training the estimator and computing\n the cross-validated score are parallelized over the permutations.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nrandom_state : int, RandomState instance or None, default=0\n Pass an int for reproducible output for permutation of\n ``y`` values among samples. See :term:`Glossary `.\n\nverbose : int, default=0\n The verbosity level.\n\nfit_params : dict, default=None\n Parameters to pass to the fit method of the estimator.\n\n .. versionadded:: 0.24\n\nReturns\n-------\nscore : float\n The true score without permuting targets.\n\npermutation_scores : array of shape (n_permutations,)\n The scores obtained for each permutations.\n\npvalue : float\n The p-value, which approximates the probability that the score would\n be obtained by chance. This is calculated as:\n\n `(C + 1) / (n_permutations + 1)`\n\n Where C is the number of permutations whose score >= the true score.\n\n The best possible p-value is 1/(n_permutations + 1), the worst is 1.0.\n\nNotes\n-----\nThis function implements Test 1 in:\n\n Ojala and Garriga. `Permutation Tests for Studying Classifier\n Performance\n `_. The\n Journal of Machine Learning Research (2010) vol. 11", + "code": "@_deprecate_positional_args\ndef permutation_test_score(estimator, X, y, *, groups=None, cv=None,\n n_permutations=100, n_jobs=None, random_state=0,\n verbose=0, scoring=None, fit_params=None):\n \"\"\"Evaluate the significance of a cross-validated score with permutations\n\n Permutes targets to generate 'randomized data' and compute the empirical\n p-value against the null hypothesis that features and targets are\n independent.\n\n The p-value represents the fraction of randomized data sets where the\n estimator performed as well or better than in the original data. A small\n p-value suggests that there is a real dependency between features and\n targets which has been used by the estimator to give good predictions.\n A large p-value may be due to lack of real dependency between features\n and targets or the estimator was not able to use the dependency to\n give good predictions.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n estimator : estimator object implementing 'fit'\n The object to use to fit the data.\n\n X : array-like of shape at least 2D\n The data to fit.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None\n The target variable to try to predict in the case of\n supervised learning.\n\n groups : array-like of shape (n_samples,), default=None\n Labels to constrain permutation within groups, i.e. ``y`` values\n are permuted among samples with the same group identifier.\n When not specified, ``y`` values are permuted among all samples.\n\n When a grouped cross-validator is used, the group labels are\n also passed on to the ``split`` method of the cross-validator. The\n cross-validator uses them for grouping the samples while splitting\n the dataset into train/test set.\n\n scoring : str or callable, default=None\n A single str (see :ref:`scoring_parameter`) or a callable\n (see :ref:`scoring`) to evaluate the predictions on the test set.\n\n If None the estimator's score method is used.\n\n cv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - int, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used. These splitters are instantiated\n with `shuffle=False` so the splits will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\n n_permutations : int, default=100\n Number of times to permute ``y``.\n\n n_jobs : int, default=None\n Number of jobs to run in parallel. Training the estimator and computing\n the cross-validated score are parallelized over the permutations.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n random_state : int, RandomState instance or None, default=0\n Pass an int for reproducible output for permutation of\n ``y`` values among samples. See :term:`Glossary `.\n\n verbose : int, default=0\n The verbosity level.\n\n fit_params : dict, default=None\n Parameters to pass to the fit method of the estimator.\n\n .. versionadded:: 0.24\n\n Returns\n -------\n score : float\n The true score without permuting targets.\n\n permutation_scores : array of shape (n_permutations,)\n The scores obtained for each permutations.\n\n pvalue : float\n The p-value, which approximates the probability that the score would\n be obtained by chance. This is calculated as:\n\n `(C + 1) / (n_permutations + 1)`\n\n Where C is the number of permutations whose score >= the true score.\n\n The best possible p-value is 1/(n_permutations + 1), the worst is 1.0.\n\n Notes\n -----\n This function implements Test 1 in:\n\n Ojala and Garriga. `Permutation Tests for Studying Classifier\n Performance\n `_. The\n Journal of Machine Learning Research (2010) vol. 11\n\n \"\"\"\n X, y, groups = indexable(X, y, groups)\n\n cv = check_cv(cv, y, classifier=is_classifier(estimator))\n scorer = check_scoring(estimator, scoring=scoring)\n random_state = check_random_state(random_state)\n\n # We clone the estimator to make sure that all the folds are\n # independent, and that it is pickle-able.\n score = _permutation_test_score(clone(estimator), X, y, groups, cv, scorer,\n fit_params=fit_params)\n permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)(\n delayed(_permutation_test_score)(\n clone(estimator), X, _shuffle(y, groups, random_state),\n groups, cv, scorer, fit_params=fit_params)\n for _ in range(n_permutations))\n permutation_scores = np.array(permutation_scores)\n pvalue = (np.sum(permutation_scores >= score) + 1.0) / (n_permutations + 1)\n return score, permutation_scores, pvalue" + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/validation_curve", + "name": "validation_curve", + "qname": "sklearn.model_selection._validation.validation_curve", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.model_selection._validation/validation_curve/estimator", + "name": "estimator", + "qname": "sklearn.model_selection._validation.validation_curve.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object type that implements the \"fit\" and \"predict\" methods", + "default_value": "", + "description": "An object of that type which is cloned for each validation." + }, + "type": { + "kind": "NamedType", + "name": "object type that implements the \"fit\" and \"predict\" methods" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/validation_curve/X", + "name": "X", + "qname": "sklearn.model_selection._validation.validation_curve.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/validation_curve/y", + "name": "y", + "qname": "sklearn.model_selection._validation.validation_curve.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs) or None", + "default_value": "", + "description": "Target relative to X for classification or regression;\nNone for unsupervised learning." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/validation_curve/param_name", + "name": "param_name", + "qname": "sklearn.model_selection._validation.validation_curve.param_name", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "", + "description": "Name of the parameter that will be varied." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/validation_curve/param_range", + "name": "param_range", + "qname": "sklearn.model_selection._validation.validation_curve.param_range", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_values,)", + "default_value": "", + "description": "The values of the parameter that will be evaluated." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_values,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/validation_curve/groups", + "name": "groups", + "qname": "sklearn.model_selection._validation.validation_curve.groups", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Group labels for the samples used while splitting the dataset into\ntrain/test set. Only used in conjunction with a \"Group\" :term:`cv`\ninstance (e.g., :class:`GroupKFold`)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/validation_curve/cv", + "name": "cv", + "qname": "sklearn.model_selection._validation.validation_curve.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, cross-validation generator or an iterable", + "default_value": "None", + "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross validation,\n- int, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide ` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "cross-validation generator" + }, + { + "kind": "NamedType", + "name": "an iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/validation_curve/scoring", + "name": "scoring", + "qname": "sklearn.model_selection._validation.validation_curve.scoring", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "None", + "description": "A str (see model evaluation documentation) or\na scorer callable object / function with signature\n``scorer(estimator, X, y)``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/validation_curve/n_jobs", + "name": "n_jobs", + "qname": "sklearn.model_selection._validation.validation_curve.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of jobs to run in parallel. Training the estimator and computing\nthe score are parallelized over the combinations of each parameter\nvalue and each cross-validation split.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/validation_curve/pre_dispatch", + "name": "pre_dispatch", + "qname": "sklearn.model_selection._validation.validation_curve.pre_dispatch", + "default_value": "'all'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or str", + "default_value": "'all'", + "description": "Number of predispatched jobs for parallel execution (default is\nall). The option can reduce the allocated memory. The str can\nbe an expression like '2*n_jobs'." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "str" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/validation_curve/verbose", + "name": "verbose", + "qname": "sklearn.model_selection._validation.validation_curve.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Controls the verbosity: the higher, the more messages." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/validation_curve/error_score", + "name": "error_score", + "qname": "sklearn.model_selection._validation.validation_curve.error_score", + "default_value": "np.nan", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'raise' or numeric", + "default_value": "np.nan", + "description": "Value to assign to the score if an error occurs in estimator fitting.\nIf set to 'raise', the error is raised.\nIf a numeric value is given, FitFailedWarning is raised.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'raise'" + }, + { + "kind": "NamedType", + "name": "numeric" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.model_selection._validation/validation_curve/fit_params", + "name": "fit_params", + "qname": "sklearn.model_selection._validation.validation_curve.fit_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Parameters to pass to the fit method of the estimator.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Validation curve.\n\nDetermine training and test scores for varying parameter values.\n\nCompute scores for an estimator with different values of a specified\nparameter. This is similar to grid search with one parameter. However, this\nwill also compute training scores and is merely a utility for plotting the\nresults.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Validation curve.\n\nDetermine training and test scores for varying parameter values.\n\nCompute scores for an estimator with different values of a specified\nparameter. This is similar to grid search with one parameter. However, this\nwill also compute training scores and is merely a utility for plotting the\nresults.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : object type that implements the \"fit\" and \"predict\" methods\n An object of that type which is cloned for each validation.\n\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs) or None\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\nparam_name : str\n Name of the parameter that will be varied.\n\nparam_range : array-like of shape (n_values,)\n The values of the parameter that will be evaluated.\n\ngroups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`GroupKFold`).\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - int, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used. These splitters are instantiated\n with `shuffle=False` so the splits will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nscoring : str or callable, default=None\n A str (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel. Training the estimator and computing\n the score are parallelized over the combinations of each parameter\n value and each cross-validation split.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\npre_dispatch : int or str, default='all'\n Number of predispatched jobs for parallel execution (default is\n all). The option can reduce the allocated memory. The str can\n be an expression like '2*n_jobs'.\n\nverbose : int, default=0\n Controls the verbosity: the higher, the more messages.\n\nfit_params : dict, default=None\n Parameters to pass to the fit method of the estimator.\n\n .. versionadded:: 0.24\n\nerror_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised.\n If a numeric value is given, FitFailedWarning is raised.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ntrain_scores : array of shape (n_ticks, n_cv_folds)\n Scores on training sets.\n\ntest_scores : array of shape (n_ticks, n_cv_folds)\n Scores on test set.\n\nNotes\n-----\nSee :ref:`sphx_glr_auto_examples_model_selection_plot_validation_curve.py`", + "code": "@_deprecate_positional_args\ndef validation_curve(estimator, X, y, *, param_name, param_range, groups=None,\n cv=None, scoring=None, n_jobs=None, pre_dispatch=\"all\",\n verbose=0, error_score=np.nan, fit_params=None):\n \"\"\"Validation curve.\n\n Determine training and test scores for varying parameter values.\n\n Compute scores for an estimator with different values of a specified\n parameter. This is similar to grid search with one parameter. However, this\n will also compute training scores and is merely a utility for plotting the\n results.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n estimator : object type that implements the \"fit\" and \"predict\" methods\n An object of that type which is cloned for each validation.\n\n X : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\n param_name : str\n Name of the parameter that will be varied.\n\n param_range : array-like of shape (n_values,)\n The values of the parameter that will be evaluated.\n\n groups : array-like of shape (n_samples,), default=None\n Group labels for the samples used while splitting the dataset into\n train/test set. Only used in conjunction with a \"Group\" :term:`cv`\n instance (e.g., :class:`GroupKFold`).\n\n cv : int, cross-validation generator or an iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross validation,\n - int, to specify the number of folds in a `(Stratified)KFold`,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For int/None inputs, if the estimator is a classifier and ``y`` is\n either binary or multiclass, :class:`StratifiedKFold` is used. In all\n other cases, :class:`KFold` is used. These splitters are instantiated\n with `shuffle=False` so the splits will be the same across calls.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\n scoring : str or callable, default=None\n A str (see model evaluation documentation) or\n a scorer callable object / function with signature\n ``scorer(estimator, X, y)``.\n\n n_jobs : int, default=None\n Number of jobs to run in parallel. Training the estimator and computing\n the score are parallelized over the combinations of each parameter\n value and each cross-validation split.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n pre_dispatch : int or str, default='all'\n Number of predispatched jobs for parallel execution (default is\n all). The option can reduce the allocated memory. The str can\n be an expression like '2*n_jobs'.\n\n verbose : int, default=0\n Controls the verbosity: the higher, the more messages.\n\n fit_params : dict, default=None\n Parameters to pass to the fit method of the estimator.\n\n .. versionadded:: 0.24\n\n error_score : 'raise' or numeric, default=np.nan\n Value to assign to the score if an error occurs in estimator fitting.\n If set to 'raise', the error is raised.\n If a numeric value is given, FitFailedWarning is raised.\n\n .. versionadded:: 0.20\n\n Returns\n -------\n train_scores : array of shape (n_ticks, n_cv_folds)\n Scores on training sets.\n\n test_scores : array of shape (n_ticks, n_cv_folds)\n Scores on test set.\n\n Notes\n -----\n See :ref:`sphx_glr_auto_examples_model_selection_plot_validation_curve.py`\n\n \"\"\"\n X, y, groups = indexable(X, y, groups)\n\n cv = check_cv(cv, y, classifier=is_classifier(estimator))\n scorer = check_scoring(estimator, scoring=scoring)\n\n parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch,\n verbose=verbose)\n results = parallel(delayed(_fit_and_score)(\n clone(estimator), X, y, scorer, train, test, verbose,\n parameters={param_name: v}, fit_params=fit_params,\n return_train_score=True, error_score=error_score)\n\n # NOTE do not change order of iteration to allow one time cv splitters\n for train, test in cv.split(X, y, groups) for v in param_range)\n n_params = len(param_range)\n\n results = _aggregate_score_dicts(results)\n train_scores = results[\"train_scores\"].reshape(-1, n_params).T\n test_scores = results[\"test_scores\"].reshape(-1, n_params).T\n\n return train_scores, test_scores" + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/__init__", + "name": "__init__", + "qname": "sklearn.multiclass.OneVsOneClassifier.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/__init__/self", + "name": "self", + "qname": "sklearn.multiclass.OneVsOneClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/__init__/estimator", + "name": "estimator", + "qname": "sklearn.multiclass.OneVsOneClassifier.__init__.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "estimator object", + "default_value": "", + "description": "An estimator object implementing :term:`fit` and one of\n:term:`decision_function` or :term:`predict_proba`." + }, + "type": { + "kind": "NamedType", + "name": "estimator object" + } + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.multiclass.OneVsOneClassifier.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to use for the computation: the `n_classes * (\nn_classes - 1) / 2` OVO problems are computed in parallel.\n\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "One-vs-one multiclass strategy\n\nThis strategy consists in fitting one classifier per class pair.\nAt prediction time, the class which received the most votes is selected.\nSince it requires to fit `n_classes * (n_classes - 1) / 2` classifiers,\nthis method is usually slower than one-vs-the-rest, due to its\nO(n_classes^2) complexity. However, this method may be advantageous for\nalgorithms such as kernel algorithms which don't scale well with\n`n_samples`. This is because each individual learning problem only involves\na small subset of the data whereas, with one-vs-the-rest, the complete\ndataset is used `n_classes` times.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, estimator, *, n_jobs=None):\n self.estimator = estimator\n self.n_jobs = n_jobs" + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/_more_tags", + "name": "_more_tags", + "qname": "sklearn.multiclass.OneVsOneClassifier._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/_more_tags/self", + "name": "self", + "qname": "sklearn.multiclass.OneVsOneClassifier._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Indicate if wrapped estimator is using a precomputed Gram matrix", + "docstring": "Indicate if wrapped estimator is using a precomputed Gram matrix", + "code": " def _more_tags(self):\n \"\"\"Indicate if wrapped estimator is using a precomputed Gram matrix\"\"\"\n return {\n 'pairwise': _safe_tags(self.estimator, key=\"pairwise\")\n }" + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/_pairwise@getter", + "name": "_pairwise", + "qname": "sklearn.multiclass.OneVsOneClassifier._pairwise", + "decorators": [ + "deprecated('Attribute _pairwise was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/_pairwise/self", + "name": "self", + "qname": "sklearn.multiclass.OneVsOneClassifier._pairwise.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Indicate if wrapped estimator is using a precomputed Gram matrix", + "docstring": "Indicate if wrapped estimator is using a precomputed Gram matrix", + "code": " @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n \"\"\"Indicate if wrapped estimator is using a precomputed Gram matrix\"\"\"\n return getattr(self.estimator, \"_pairwise\", False)" + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/decision_function", + "name": "decision_function", + "qname": "sklearn.multiclass.OneVsOneClassifier.decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/decision_function/self", + "name": "self", + "qname": "sklearn.multiclass.OneVsOneClassifier.decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/decision_function/X", + "name": "X", + "qname": "sklearn.multiclass.OneVsOneClassifier.decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Decision function for the OneVsOneClassifier.\n\nThe decision values for the samples are computed by adding the\nnormalized sum of pair-wise classification confidence levels to the\nvotes in order to disambiguate between the decision values when the\nvotes for all the classes are equal leading to a tie.", + "docstring": "Decision function for the OneVsOneClassifier.\n\nThe decision values for the samples are computed by adding the\nnormalized sum of pair-wise classification confidence levels to the\nvotes in order to disambiguate between the decision values when the\nvotes for all the classes are equal leading to a tie.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nY : array-like of shape (n_samples, n_classes) or (n_samples,) for binary classification.\n\n .. versionchanged:: 0.19\n output shape changed to ``(n_samples,)`` to conform to\n scikit-learn conventions for binary classification.", + "code": " def decision_function(self, X):\n \"\"\"Decision function for the OneVsOneClassifier.\n\n The decision values for the samples are computed by adding the\n normalized sum of pair-wise classification confidence levels to the\n votes in order to disambiguate between the decision values when the\n votes for all the classes are equal leading to a tie.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n Y : array-like of shape (n_samples, n_classes) or (n_samples,) for \\\n binary classification.\n\n .. versionchanged:: 0.19\n output shape changed to ``(n_samples,)`` to conform to\n scikit-learn conventions for binary classification.\n \"\"\"\n check_is_fitted(self)\n\n indices = self.pairwise_indices_\n if indices is None:\n Xs = [X] * len(self.estimators_)\n else:\n Xs = [X[:, idx] for idx in indices]\n\n predictions = np.vstack([est.predict(Xi)\n for est, Xi in zip(self.estimators_, Xs)]).T\n confidences = np.vstack([_predict_binary(est, Xi)\n for est, Xi in zip(self.estimators_, Xs)]).T\n Y = _ovr_decision_function(predictions,\n confidences, len(self.classes_))\n if self.n_classes_ == 2:\n return Y[:, 1]\n return Y" + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/fit", + "name": "fit", + "qname": "sklearn.multiclass.OneVsOneClassifier.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/fit/self", + "name": "self", + "qname": "sklearn.multiclass.OneVsOneClassifier.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/fit/X", + "name": "X", + "qname": "sklearn.multiclass.OneVsOneClassifier.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "(sparse) array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Data." + }, + "type": { + "kind": "NamedType", + "name": "(sparse) array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/fit/y", + "name": "y", + "qname": "sklearn.multiclass.OneVsOneClassifier.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Multi-class targets." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fit underlying estimators.", + "docstring": "Fit underlying estimators.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\ny : array-like of shape (n_samples,)\n Multi-class targets.\n\nReturns\n-------\nself", + "code": " def fit(self, X, y):\n \"\"\"Fit underlying estimators.\n\n Parameters\n ----------\n X : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\n y : array-like of shape (n_samples,)\n Multi-class targets.\n\n Returns\n -------\n self\n \"\"\"\n X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc'],\n force_all_finite=False)\n check_classification_targets(y)\n\n self.classes_ = np.unique(y)\n if len(self.classes_) == 1:\n raise ValueError(\"OneVsOneClassifier can not be fit when only one\"\n \" class is present.\")\n n_classes = self.classes_.shape[0]\n estimators_indices = list(zip(*(Parallel(n_jobs=self.n_jobs)(\n delayed(_fit_ovo_binary)\n (self.estimator, X, y, self.classes_[i], self.classes_[j])\n for i in range(n_classes) for j in range(i + 1, n_classes)))))\n\n self.estimators_ = estimators_indices[0]\n\n pairwise = _is_pairwise(self)\n self.pairwise_indices_ = (\n estimators_indices[1] if pairwise else None)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/n_classes_@getter", + "name": "n_classes_", + "qname": "sklearn.multiclass.OneVsOneClassifier.n_classes_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/n_classes_/self", + "name": "self", + "qname": "sklearn.multiclass.OneVsOneClassifier.n_classes_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def n_classes_(self):\n return len(self.classes_)" + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/partial_fit", + "name": "partial_fit", + "qname": "sklearn.multiclass.OneVsOneClassifier.partial_fit", + "decorators": ["if_delegate_has_method(delegate='estimator')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/partial_fit/self", + "name": "self", + "qname": "sklearn.multiclass.OneVsOneClassifier.partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/partial_fit/X", + "name": "X", + "qname": "sklearn.multiclass.OneVsOneClassifier.partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "(sparse) array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Data." + }, + "type": { + "kind": "NamedType", + "name": "(sparse) array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/partial_fit/y", + "name": "y", + "qname": "sklearn.multiclass.OneVsOneClassifier.partial_fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Multi-class targets." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/partial_fit/classes", + "name": "classes", + "qname": "sklearn.multiclass.OneVsOneClassifier.partial_fit.classes", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array, shape (n_classes, )", + "default_value": "", + "description": "Classes across all calls to partial_fit.\nCan be obtained via `np.unique(y_all)`, where y_all is the\ntarget vector of the entire dataset.\nThis argument is only required in the first call of partial_fit\nand can be omitted in the subsequent calls." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array" + }, + { + "kind": "NamedType", + "name": "shape (n_classes, )" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Partially fit underlying estimators\n\nShould be used when memory is inefficient to train all data. Chunks\nof data can be passed in several iteration, where the first call\nshould have an array of all target variables.", + "docstring": "Partially fit underlying estimators\n\nShould be used when memory is inefficient to train all data. Chunks\nof data can be passed in several iteration, where the first call\nshould have an array of all target variables.\n\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\ny : array-like of shape (n_samples,)\n Multi-class targets.\n\nclasses : array, shape (n_classes, )\n Classes across all calls to partial_fit.\n Can be obtained via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is only required in the first call of partial_fit\n and can be omitted in the subsequent calls.\n\nReturns\n-------\nself", + "code": " @if_delegate_has_method(delegate='estimator')\n def partial_fit(self, X, y, classes=None):\n \"\"\"Partially fit underlying estimators\n\n Should be used when memory is inefficient to train all data. Chunks\n of data can be passed in several iteration, where the first call\n should have an array of all target variables.\n\n\n Parameters\n ----------\n X : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\n y : array-like of shape (n_samples,)\n Multi-class targets.\n\n classes : array, shape (n_classes, )\n Classes across all calls to partial_fit.\n Can be obtained via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is only required in the first call of partial_fit\n and can be omitted in the subsequent calls.\n\n Returns\n -------\n self\n \"\"\"\n if _check_partial_fit_first_call(self, classes):\n self.estimators_ = [clone(self.estimator) for _ in\n range(self.n_classes_ *\n (self.n_classes_ - 1) // 2)]\n\n if len(np.setdiff1d(y, self.classes_)):\n raise ValueError(\"Mini-batch contains {0} while it \"\n \"must be subset of {1}\".format(np.unique(y),\n self.classes_))\n\n X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'],\n force_all_finite=False)\n check_classification_targets(y)\n combinations = itertools.combinations(range(self.n_classes_), 2)\n self.estimators_ = Parallel(\n n_jobs=self.n_jobs)(\n delayed(_partial_fit_ovo_binary)(\n estimator, X, y, self.classes_[i], self.classes_[j])\n for estimator, (i, j) in zip(self.estimators_,\n (combinations)))\n\n self.pairwise_indices_ = None\n\n return self" + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/predict", + "name": "predict", + "qname": "sklearn.multiclass.OneVsOneClassifier.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/predict/self", + "name": "self", + "qname": "sklearn.multiclass.OneVsOneClassifier.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsOneClassifier/predict/X", + "name": "X", + "qname": "sklearn.multiclass.OneVsOneClassifier.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "(sparse) array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Data." + }, + "type": { + "kind": "NamedType", + "name": "(sparse) array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Estimate the best class label for each sample in X.\n\nThis is implemented as ``argmax(decision_function(X), axis=1)`` which\nwill return the label of the class with most votes by estimators\npredicting the outcome of a decision for each possible class pair.", + "docstring": "Estimate the best class label for each sample in X.\n\nThis is implemented as ``argmax(decision_function(X), axis=1)`` which\nwill return the label of the class with most votes by estimators\npredicting the outcome of a decision for each possible class pair.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\nReturns\n-------\ny : numpy array of shape [n_samples]\n Predicted multi-class targets.", + "code": " def predict(self, X):\n \"\"\"Estimate the best class label for each sample in X.\n\n This is implemented as ``argmax(decision_function(X), axis=1)`` which\n will return the label of the class with most votes by estimators\n predicting the outcome of a decision for each possible class pair.\n\n Parameters\n ----------\n X : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\n Returns\n -------\n y : numpy array of shape [n_samples]\n Predicted multi-class targets.\n \"\"\"\n Y = self.decision_function(X)\n if self.n_classes_ == 2:\n return self.classes_[(Y > 0).astype(int)]\n return self.classes_[Y.argmax(axis=1)]" + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/__init__", + "name": "__init__", + "qname": "sklearn.multiclass.OneVsRestClassifier.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/__init__/self", + "name": "self", + "qname": "sklearn.multiclass.OneVsRestClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/__init__/estimator", + "name": "estimator", + "qname": "sklearn.multiclass.OneVsRestClassifier.__init__.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "estimator object", + "default_value": "", + "description": "An estimator object implementing :term:`fit` and one of\n:term:`decision_function` or :term:`predict_proba`." + }, + "type": { + "kind": "NamedType", + "name": "estimator object" + } + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.multiclass.OneVsRestClassifier.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to use for the computation: the `n_classes`\none-vs-rest problems are computed in parallel.\n\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details.\n\n.. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "One-vs-the-rest (OvR) multiclass strategy.\n\nAlso known as one-vs-all, this strategy consists in fitting one classifier\nper class. For each classifier, the class is fitted against all the other\nclasses. In addition to its computational efficiency (only `n_classes`\nclassifiers are needed), one advantage of this approach is its\ninterpretability. Since each class is represented by one and one classifier\nonly, it is possible to gain knowledge about the class by inspecting its\ncorresponding classifier. This is the most commonly used strategy for\nmulticlass classification and is a fair default choice.\n\nOneVsRestClassifier can also be used for multilabel classification. To use\nthis feature, provide an indicator matrix for the target `y` when calling\n`.fit`. In other words, the target labels should be formatted as a 2D\nbinary (0/1) matrix, where [i, j] == 1 indicates the presence of label j\nin sample i. This estimator uses the binary relevance method to perform\nmultilabel classification, which involves training one binary classifier\nindependently for each label.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, estimator, *, n_jobs=None):\n self.estimator = estimator\n self.n_jobs = n_jobs" + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/_first_estimator@getter", + "name": "_first_estimator", + "qname": "sklearn.multiclass.OneVsRestClassifier._first_estimator", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/_first_estimator/self", + "name": "self", + "qname": "sklearn.multiclass.OneVsRestClassifier._first_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def _first_estimator(self):\n return self.estimators_[0]" + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/_more_tags", + "name": "_more_tags", + "qname": "sklearn.multiclass.OneVsRestClassifier._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/_more_tags/self", + "name": "self", + "qname": "sklearn.multiclass.OneVsRestClassifier._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Indicate if wrapped estimator is using a precomputed Gram matrix", + "docstring": "Indicate if wrapped estimator is using a precomputed Gram matrix", + "code": " def _more_tags(self):\n \"\"\"Indicate if wrapped estimator is using a precomputed Gram matrix\"\"\"\n return {'pairwise': _safe_tags(self.estimator, key=\"pairwise\")}" + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/_pairwise@getter", + "name": "_pairwise", + "qname": "sklearn.multiclass.OneVsRestClassifier._pairwise", + "decorators": [ + "deprecated('Attribute _pairwise was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/_pairwise/self", + "name": "self", + "qname": "sklearn.multiclass.OneVsRestClassifier._pairwise.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Indicate if wrapped estimator is using a precomputed Gram matrix", + "docstring": "Indicate if wrapped estimator is using a precomputed Gram matrix", + "code": " @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n \"\"\"Indicate if wrapped estimator is using a precomputed Gram matrix\"\"\"\n return getattr(self.estimator, \"_pairwise\", False)" + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/coef_@getter", + "name": "coef_", + "qname": "sklearn.multiclass.OneVsRestClassifier.coef_", + "decorators": [ + "deprecated('Attribute coef_ was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26). If you observe this warning while using RFE or SelectFromModel, use the importance_getter parameter instead.')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/coef_/self", + "name": "self", + "qname": "sklearn.multiclass.OneVsRestClassifier.coef_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated(\"Attribute coef_ was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26). \"\n \"If you observe this warning while using RFE \"\n \"or SelectFromModel, use the importance_getter \"\n \"parameter instead.\")\n @property\n def coef_(self):\n check_is_fitted(self)\n if not hasattr(self.estimators_[0], \"coef_\"):\n raise AttributeError(\n \"Base estimator doesn't have a coef_ attribute.\")\n coefs = [e.coef_ for e in self.estimators_]\n if sp.issparse(coefs[0]):\n return sp.vstack(coefs)\n return np.vstack(coefs)" + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/decision_function", + "name": "decision_function", + "qname": "sklearn.multiclass.OneVsRestClassifier.decision_function", + "decorators": ["if_delegate_has_method(['_first_estimator', 'estimator'])"], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/decision_function/self", + "name": "self", + "qname": "sklearn.multiclass.OneVsRestClassifier.decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/decision_function/X", + "name": "X", + "qname": "sklearn.multiclass.OneVsRestClassifier.decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the distance of each sample from the decision boundary for\neach class. This can only be used with estimators which implement the\ndecision_function method.", + "docstring": "Returns the distance of each sample from the decision boundary for\neach class. This can only be used with estimators which implement the\ndecision_function method.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nT : array-like of shape (n_samples, n_classes) or (n_samples,) for binary classification.\n\n .. versionchanged:: 0.19\n output shape changed to ``(n_samples,)`` to conform to\n scikit-learn conventions for binary classification.", + "code": " @if_delegate_has_method(['_first_estimator', 'estimator'])\n def decision_function(self, X):\n \"\"\"Returns the distance of each sample from the decision boundary for\n each class. This can only be used with estimators which implement the\n decision_function method.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n T : array-like of shape (n_samples, n_classes) or (n_samples,) for \\\n binary classification.\n\n .. versionchanged:: 0.19\n output shape changed to ``(n_samples,)`` to conform to\n scikit-learn conventions for binary classification.\n \"\"\"\n check_is_fitted(self)\n if len(self.estimators_) == 1:\n return self.estimators_[0].decision_function(X)\n return np.array([est.decision_function(X).ravel()\n for est in self.estimators_]).T" + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/fit", + "name": "fit", + "qname": "sklearn.multiclass.OneVsRestClassifier.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/fit/self", + "name": "self", + "qname": "sklearn.multiclass.OneVsRestClassifier.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/fit/X", + "name": "X", + "qname": "sklearn.multiclass.OneVsRestClassifier.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "(sparse) array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Data." + }, + "type": { + "kind": "NamedType", + "name": "(sparse) array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/fit/y", + "name": "y", + "qname": "sklearn.multiclass.OneVsRestClassifier.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "(sparse) array-like of shape (n_samples,) or (n_samples, n_classes)", + "default_value": "", + "description": "Multi-class targets. An indicator matrix turns on multilabel\nclassification." + }, + "type": { + "kind": "NamedType", + "name": "(sparse) array-like of shape (n_samples,) or (n_samples, n_classes)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fit underlying estimators.", + "docstring": "Fit underlying estimators.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\ny : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n Multi-class targets. An indicator matrix turns on multilabel\n classification.\n\nReturns\n-------\nself", + "code": " def fit(self, X, y):\n \"\"\"Fit underlying estimators.\n\n Parameters\n ----------\n X : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\n y : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n Multi-class targets. An indicator matrix turns on multilabel\n classification.\n\n Returns\n -------\n self\n \"\"\"\n # A sparse LabelBinarizer, with sparse_output=True, has been shown to\n # outperform or match a dense label binarizer in all cases and has also\n # resulted in less or equal memory consumption in the fit_ovr function\n # overall.\n self.label_binarizer_ = LabelBinarizer(sparse_output=True)\n Y = self.label_binarizer_.fit_transform(y)\n Y = Y.tocsc()\n self.classes_ = self.label_binarizer_.classes_\n columns = (col.toarray().ravel() for col in Y.T)\n # In cases where individual estimators are very fast to train setting\n # n_jobs > 1 in can results in slower performance due to the overhead\n # of spawning threads. See joblib issue #112.\n self.estimators_ = Parallel(n_jobs=self.n_jobs)(delayed(_fit_binary)(\n self.estimator, X, column, classes=[\n \"not %s\" % self.label_binarizer_.classes_[i],\n self.label_binarizer_.classes_[i]])\n for i, column in enumerate(columns))\n\n return self" + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/intercept_@getter", + "name": "intercept_", + "qname": "sklearn.multiclass.OneVsRestClassifier.intercept_", + "decorators": [ + "deprecated('Attribute intercept_ was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26). If you observe this warning while using RFE or SelectFromModel, use the importance_getter parameter instead.')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/intercept_/self", + "name": "self", + "qname": "sklearn.multiclass.OneVsRestClassifier.intercept_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated(\"Attribute intercept_ was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26). \"\n \"If you observe this warning while using RFE \"\n \"or SelectFromModel, use the importance_getter \"\n \"parameter instead.\")\n @property\n def intercept_(self):\n check_is_fitted(self)\n if not hasattr(self.estimators_[0], \"intercept_\"):\n raise AttributeError(\n \"Base estimator doesn't have an intercept_ attribute.\")\n return np.array([e.intercept_.ravel() for e in self.estimators_])" + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/multilabel_@getter", + "name": "multilabel_", + "qname": "sklearn.multiclass.OneVsRestClassifier.multilabel_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/multilabel_/self", + "name": "self", + "qname": "sklearn.multiclass.OneVsRestClassifier.multilabel_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Whether this is a multilabel classifier", + "docstring": "Whether this is a multilabel classifier", + "code": " @property\n def multilabel_(self):\n \"\"\"Whether this is a multilabel classifier\"\"\"\n return self.label_binarizer_.y_type_.startswith('multilabel')" + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/n_classes_@getter", + "name": "n_classes_", + "qname": "sklearn.multiclass.OneVsRestClassifier.n_classes_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/n_classes_/self", + "name": "self", + "qname": "sklearn.multiclass.OneVsRestClassifier.n_classes_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def n_classes_(self):\n return len(self.classes_)" + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/n_features_in_@getter", + "name": "n_features_in_", + "qname": "sklearn.multiclass.OneVsRestClassifier.n_features_in_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/n_features_in_/self", + "name": "self", + "qname": "sklearn.multiclass.OneVsRestClassifier.n_features_in_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def n_features_in_(self):\n # For consistency with other estimators we raise a AttributeError so\n # that hasattr() fails if the OVR estimator isn't fitted.\n try:\n check_is_fitted(self)\n except NotFittedError as nfe:\n raise AttributeError(\n \"{} object has no n_features_in_ attribute.\"\n .format(self.__class__.__name__)\n ) from nfe\n return self.estimators_[0].n_features_in_" + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/partial_fit", + "name": "partial_fit", + "qname": "sklearn.multiclass.OneVsRestClassifier.partial_fit", + "decorators": ["if_delegate_has_method('estimator')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/partial_fit/self", + "name": "self", + "qname": "sklearn.multiclass.OneVsRestClassifier.partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/partial_fit/X", + "name": "X", + "qname": "sklearn.multiclass.OneVsRestClassifier.partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "(sparse) array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Data." + }, + "type": { + "kind": "NamedType", + "name": "(sparse) array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/partial_fit/y", + "name": "y", + "qname": "sklearn.multiclass.OneVsRestClassifier.partial_fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "(sparse) array-like of shape (n_samples,) or (n_samples, n_classes)", + "default_value": "", + "description": "Multi-class targets. An indicator matrix turns on multilabel\nclassification." + }, + "type": { + "kind": "NamedType", + "name": "(sparse) array-like of shape (n_samples,) or (n_samples, n_classes)" + } + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/partial_fit/classes", + "name": "classes", + "qname": "sklearn.multiclass.OneVsRestClassifier.partial_fit.classes", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array, shape (n_classes, )", + "default_value": "", + "description": "Classes across all calls to partial_fit.\nCan be obtained via `np.unique(y_all)`, where y_all is the\ntarget vector of the entire dataset.\nThis argument is only required in the first call of partial_fit\nand can be omitted in the subsequent calls." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array" + }, + { + "kind": "NamedType", + "name": "shape (n_classes, )" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Partially fit underlying estimators\n\nShould be used when memory is inefficient to train all data.\nChunks of data can be passed in several iteration.", + "docstring": "Partially fit underlying estimators\n\nShould be used when memory is inefficient to train all data.\nChunks of data can be passed in several iteration.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\ny : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n Multi-class targets. An indicator matrix turns on multilabel\n classification.\n\nclasses : array, shape (n_classes, )\n Classes across all calls to partial_fit.\n Can be obtained via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is only required in the first call of partial_fit\n and can be omitted in the subsequent calls.\n\nReturns\n-------\nself", + "code": " @if_delegate_has_method('estimator')\n def partial_fit(self, X, y, classes=None):\n \"\"\"Partially fit underlying estimators\n\n Should be used when memory is inefficient to train all data.\n Chunks of data can be passed in several iteration.\n\n Parameters\n ----------\n X : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\n y : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n Multi-class targets. An indicator matrix turns on multilabel\n classification.\n\n classes : array, shape (n_classes, )\n Classes across all calls to partial_fit.\n Can be obtained via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is only required in the first call of partial_fit\n and can be omitted in the subsequent calls.\n\n Returns\n -------\n self\n \"\"\"\n if _check_partial_fit_first_call(self, classes):\n if not hasattr(self.estimator, \"partial_fit\"):\n raise ValueError((\"Base estimator {0}, doesn't have \"\n \"partial_fit method\").format(self.estimator))\n self.estimators_ = [clone(self.estimator) for _ in range\n (self.n_classes_)]\n\n # A sparse LabelBinarizer, with sparse_output=True, has been\n # shown to outperform or match a dense label binarizer in all\n # cases and has also resulted in less or equal memory consumption\n # in the fit_ovr function overall.\n self.label_binarizer_ = LabelBinarizer(sparse_output=True)\n self.label_binarizer_.fit(self.classes_)\n\n if len(np.setdiff1d(y, self.classes_)):\n raise ValueError((\"Mini-batch contains {0} while classes \" +\n \"must be subset of {1}\").format(np.unique(y),\n self.classes_))\n\n Y = self.label_binarizer_.transform(y)\n Y = Y.tocsc()\n columns = (col.toarray().ravel() for col in Y.T)\n\n self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n delayed(_partial_fit_binary)(estimator, X, column)\n for estimator, column in zip(self.estimators_, columns))\n\n return self" + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/predict", + "name": "predict", + "qname": "sklearn.multiclass.OneVsRestClassifier.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/predict/self", + "name": "self", + "qname": "sklearn.multiclass.OneVsRestClassifier.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/predict/X", + "name": "X", + "qname": "sklearn.multiclass.OneVsRestClassifier.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "(sparse) array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Data." + }, + "type": { + "kind": "NamedType", + "name": "(sparse) array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Predict multi-class targets using underlying estimators.", + "docstring": "Predict multi-class targets using underlying estimators.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\nReturns\n-------\ny : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n Predicted multi-class targets.", + "code": " def predict(self, X):\n \"\"\"Predict multi-class targets using underlying estimators.\n\n Parameters\n ----------\n X : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\n Returns\n -------\n y : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n Predicted multi-class targets.\n \"\"\"\n check_is_fitted(self)\n\n n_samples = _num_samples(X)\n if self.label_binarizer_.y_type_ == \"multiclass\":\n maxima = np.empty(n_samples, dtype=float)\n maxima.fill(-np.inf)\n argmaxima = np.zeros(n_samples, dtype=int)\n for i, e in enumerate(self.estimators_):\n pred = _predict_binary(e, X)\n np.maximum(maxima, pred, out=maxima)\n argmaxima[maxima == pred] = i\n return self.classes_[argmaxima]\n else:\n if (hasattr(self.estimators_[0], \"decision_function\") and\n is_classifier(self.estimators_[0])):\n thresh = 0\n else:\n thresh = .5\n indices = array.array('i')\n indptr = array.array('i', [0])\n for e in self.estimators_:\n indices.extend(np.where(_predict_binary(e, X) > thresh)[0])\n indptr.append(len(indices))\n data = np.ones(len(indices), dtype=int)\n indicator = sp.csc_matrix((data, indices, indptr),\n shape=(n_samples, len(self.estimators_)))\n return self.label_binarizer_.inverse_transform(indicator)" + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/predict_proba", + "name": "predict_proba", + "qname": "sklearn.multiclass.OneVsRestClassifier.predict_proba", + "decorators": ["if_delegate_has_method(['_first_estimator', 'estimator'])"], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/predict_proba/self", + "name": "self", + "qname": "sklearn.multiclass.OneVsRestClassifier.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/OneVsRestClassifier/predict_proba/X", + "name": "X", + "qname": "sklearn.multiclass.OneVsRestClassifier.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Probability estimates.\n\nThe returned estimates for all classes are ordered by label of classes.\n\nNote that in the multilabel case, each sample can have any number of\nlabels. This returns the marginal probability that the given sample has\nthe label in question. For example, it is entirely consistent that two\nlabels both have a 90% probability of applying to a given sample.\n\nIn the single label multiclass case, the rows of the returned matrix\nsum to 1.", + "docstring": "Probability estimates.\n\nThe returned estimates for all classes are ordered by label of classes.\n\nNote that in the multilabel case, each sample can have any number of\nlabels. This returns the marginal probability that the given sample has\nthe label in question. For example, it is entirely consistent that two\nlabels both have a 90% probability of applying to a given sample.\n\nIn the single label multiclass case, the rows of the returned matrix\nsum to 1.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nT : (sparse) array-like of shape (n_samples, n_classes)\n Returns the probability of the sample for each class in the model,\n where classes are ordered as they are in `self.classes_`.", + "code": " @if_delegate_has_method(['_first_estimator', 'estimator'])\n def predict_proba(self, X):\n \"\"\"Probability estimates.\n\n The returned estimates for all classes are ordered by label of classes.\n\n Note that in the multilabel case, each sample can have any number of\n labels. This returns the marginal probability that the given sample has\n the label in question. For example, it is entirely consistent that two\n labels both have a 90% probability of applying to a given sample.\n\n In the single label multiclass case, the rows of the returned matrix\n sum to 1.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n T : (sparse) array-like of shape (n_samples, n_classes)\n Returns the probability of the sample for each class in the model,\n where classes are ordered as they are in `self.classes_`.\n \"\"\"\n check_is_fitted(self)\n # Y[i, j] gives the probability that sample i has the label j.\n # In the multi-label case, these are not disjoint.\n Y = np.array([e.predict_proba(X)[:, 1] for e in self.estimators_]).T\n\n if len(self.estimators_) == 1:\n # Only one estimator, but we still want to return probabilities\n # for two classes.\n Y = np.concatenate(((1 - Y), Y), axis=1)\n\n if not self.multilabel_:\n # Then, probabilities should be normalized to 1.\n Y /= np.sum(Y, axis=1)[:, np.newaxis]\n return Y" + }, + { + "id": "scikit-learn/sklearn.multiclass/OutputCodeClassifier/__init__", + "name": "__init__", + "qname": "sklearn.multiclass.OutputCodeClassifier.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OutputCodeClassifier/__init__/self", + "name": "self", + "qname": "sklearn.multiclass.OutputCodeClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/OutputCodeClassifier/__init__/estimator", + "name": "estimator", + "qname": "sklearn.multiclass.OutputCodeClassifier.__init__.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "estimator object", + "default_value": "", + "description": "An estimator object implementing :term:`fit` and one of\n:term:`decision_function` or :term:`predict_proba`." + }, + "type": { + "kind": "NamedType", + "name": "estimator object" + } + }, + { + "id": "scikit-learn/sklearn.multiclass/OutputCodeClassifier/__init__/code_size", + "name": "code_size", + "qname": "sklearn.multiclass.OutputCodeClassifier.__init__.code_size", + "default_value": "1.5", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "", + "description": "Percentage of the number of classes to be used to create the code book.\nA number between 0 and 1 will require fewer classifiers than\none-vs-the-rest. A number greater than 1 will require more classifiers\nthan one-vs-the-rest." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.multiclass/OutputCodeClassifier/__init__/random_state", + "name": "random_state", + "qname": "sklearn.multiclass.OutputCodeClassifier.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "The generator used to initialize the codebook.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.multiclass/OutputCodeClassifier/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.multiclass.OutputCodeClassifier.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of jobs to use for the computation: the multiclass problems\nare computed in parallel.\n\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "(Error-Correcting) Output-Code multiclass strategy\n\nOutput-code based strategies consist in representing each class with a\nbinary code (an array of 0s and 1s). At fitting time, one binary\nclassifier per bit in the code book is fitted. At prediction time, the\nclassifiers are used to project new points in the class space and the class\nclosest to the points is chosen. The main advantage of these strategies is\nthat the number of classifiers used can be controlled by the user, either\nfor compressing the model (0 < code_size < 1) or for making the model more\nrobust to errors (code_size > 1). See the documentation for more details.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, estimator, *, code_size=1.5, random_state=None,\n n_jobs=None):\n self.estimator = estimator\n self.code_size = code_size\n self.random_state = random_state\n self.n_jobs = n_jobs" + }, + { + "id": "scikit-learn/sklearn.multiclass/OutputCodeClassifier/fit", + "name": "fit", + "qname": "sklearn.multiclass.OutputCodeClassifier.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OutputCodeClassifier/fit/self", + "name": "self", + "qname": "sklearn.multiclass.OutputCodeClassifier.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/OutputCodeClassifier/fit/X", + "name": "X", + "qname": "sklearn.multiclass.OutputCodeClassifier.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "(sparse) array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Data." + }, + "type": { + "kind": "NamedType", + "name": "(sparse) array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.multiclass/OutputCodeClassifier/fit/y", + "name": "y", + "qname": "sklearn.multiclass.OutputCodeClassifier.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "numpy array of shape [n_samples]", + "default_value": "", + "description": "Multi-class targets." + }, + "type": { + "kind": "NamedType", + "name": "numpy array of shape [n_samples]" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fit underlying estimators.", + "docstring": "Fit underlying estimators.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\ny : numpy array of shape [n_samples]\n Multi-class targets.\n\nReturns\n-------\nself", + "code": " def fit(self, X, y):\n \"\"\"Fit underlying estimators.\n\n Parameters\n ----------\n X : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\n y : numpy array of shape [n_samples]\n Multi-class targets.\n\n Returns\n -------\n self\n \"\"\"\n X, y = self._validate_data(X, y, accept_sparse=True)\n if self.code_size <= 0:\n raise ValueError(\"code_size should be greater than 0, got {0}\"\n \"\".format(self.code_size))\n\n _check_estimator(self.estimator)\n random_state = check_random_state(self.random_state)\n check_classification_targets(y)\n\n self.classes_ = np.unique(y)\n n_classes = self.classes_.shape[0]\n code_size_ = int(n_classes * self.code_size)\n\n # FIXME: there are more elaborate methods than generating the codebook\n # randomly.\n self.code_book_ = random_state.random_sample((n_classes, code_size_))\n self.code_book_[self.code_book_ > 0.5] = 1\n\n if hasattr(self.estimator, \"decision_function\"):\n self.code_book_[self.code_book_ != 1] = -1\n else:\n self.code_book_[self.code_book_ != 1] = 0\n\n classes_index = {c: i for i, c in enumerate(self.classes_)}\n\n Y = np.array([self.code_book_[classes_index[y[i]]]\n for i in range(X.shape[0])], dtype=int)\n\n self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n delayed(_fit_binary)(self.estimator, X, Y[:, i])\n for i in range(Y.shape[1]))\n\n return self" + }, + { + "id": "scikit-learn/sklearn.multiclass/OutputCodeClassifier/predict", + "name": "predict", + "qname": "sklearn.multiclass.OutputCodeClassifier.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/OutputCodeClassifier/predict/self", + "name": "self", + "qname": "sklearn.multiclass.OutputCodeClassifier.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/OutputCodeClassifier/predict/X", + "name": "X", + "qname": "sklearn.multiclass.OutputCodeClassifier.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "(sparse) array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Data." + }, + "type": { + "kind": "NamedType", + "name": "(sparse) array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Predict multi-class targets using underlying estimators.", + "docstring": "Predict multi-class targets using underlying estimators.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\nReturns\n-------\ny : numpy array of shape [n_samples]\n Predicted multi-class targets.", + "code": " def predict(self, X):\n \"\"\"Predict multi-class targets using underlying estimators.\n\n Parameters\n ----------\n X : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\n Returns\n -------\n y : numpy array of shape [n_samples]\n Predicted multi-class targets.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, accept_sparse=True)\n Y = np.array([_predict_binary(e, X) for e in self.estimators_]).T\n pred = euclidean_distances(Y, self.code_book_).argmin(axis=1)\n return self.classes_[pred]" + }, + { + "id": "scikit-learn/sklearn.multiclass/_ConstantPredictor/decision_function", + "name": "decision_function", + "qname": "sklearn.multiclass._ConstantPredictor.decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/_ConstantPredictor/decision_function/self", + "name": "self", + "qname": "sklearn.multiclass._ConstantPredictor.decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/_ConstantPredictor/decision_function/X", + "name": "X", + "qname": "sklearn.multiclass._ConstantPredictor.decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def decision_function(self, X):\n check_is_fitted(self)\n\n return np.repeat(self.y_, X.shape[0])" + }, + { + "id": "scikit-learn/sklearn.multiclass/_ConstantPredictor/fit", + "name": "fit", + "qname": "sklearn.multiclass._ConstantPredictor.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/_ConstantPredictor/fit/self", + "name": "self", + "qname": "sklearn.multiclass._ConstantPredictor.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/_ConstantPredictor/fit/X", + "name": "X", + "qname": "sklearn.multiclass._ConstantPredictor.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/_ConstantPredictor/fit/y", + "name": "y", + "qname": "sklearn.multiclass._ConstantPredictor.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def fit(self, X, y):\n self.y_ = y\n return self" + }, + { + "id": "scikit-learn/sklearn.multiclass/_ConstantPredictor/predict", + "name": "predict", + "qname": "sklearn.multiclass._ConstantPredictor.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/_ConstantPredictor/predict/self", + "name": "self", + "qname": "sklearn.multiclass._ConstantPredictor.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/_ConstantPredictor/predict/X", + "name": "X", + "qname": "sklearn.multiclass._ConstantPredictor.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def predict(self, X):\n check_is_fitted(self)\n\n return np.repeat(self.y_, X.shape[0])" + }, + { + "id": "scikit-learn/sklearn.multiclass/_ConstantPredictor/predict_proba", + "name": "predict_proba", + "qname": "sklearn.multiclass._ConstantPredictor.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/_ConstantPredictor/predict_proba/self", + "name": "self", + "qname": "sklearn.multiclass._ConstantPredictor.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/_ConstantPredictor/predict_proba/X", + "name": "X", + "qname": "sklearn.multiclass._ConstantPredictor.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def predict_proba(self, X):\n check_is_fitted(self)\n\n return np.repeat([np.hstack([1 - self.y_, self.y_])],\n X.shape[0], axis=0)" + }, + { + "id": "scikit-learn/sklearn.multiclass/_check_estimator", + "name": "_check_estimator", + "qname": "sklearn.multiclass._check_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/_check_estimator/estimator", + "name": "estimator", + "qname": "sklearn.multiclass._check_estimator.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Make sure that an estimator implements the necessary methods.", + "docstring": "Make sure that an estimator implements the necessary methods.", + "code": "def _check_estimator(estimator):\n \"\"\"Make sure that an estimator implements the necessary methods.\"\"\"\n if (not hasattr(estimator, \"decision_function\") and\n not hasattr(estimator, \"predict_proba\")):\n raise ValueError(\"The base estimator should implement \"\n \"decision_function or predict_proba!\")" + }, + { + "id": "scikit-learn/sklearn.multiclass/_fit_binary", + "name": "_fit_binary", + "qname": "sklearn.multiclass._fit_binary", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/_fit_binary/estimator", + "name": "estimator", + "qname": "sklearn.multiclass._fit_binary.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/_fit_binary/X", + "name": "X", + "qname": "sklearn.multiclass._fit_binary.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/_fit_binary/y", + "name": "y", + "qname": "sklearn.multiclass._fit_binary.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/_fit_binary/classes", + "name": "classes", + "qname": "sklearn.multiclass._fit_binary.classes", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit a single binary estimator.", + "docstring": "Fit a single binary estimator.", + "code": "def _fit_binary(estimator, X, y, classes=None):\n \"\"\"Fit a single binary estimator.\"\"\"\n unique_y = np.unique(y)\n if len(unique_y) == 1:\n if classes is not None:\n if y[0] == -1:\n c = 0\n else:\n c = y[0]\n warnings.warn(\"Label %s is present in all training examples.\" %\n str(classes[c]))\n estimator = _ConstantPredictor().fit(X, unique_y)\n else:\n estimator = clone(estimator)\n estimator.fit(X, y)\n return estimator" + }, + { + "id": "scikit-learn/sklearn.multiclass/_fit_ovo_binary", + "name": "_fit_ovo_binary", + "qname": "sklearn.multiclass._fit_ovo_binary", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/_fit_ovo_binary/estimator", + "name": "estimator", + "qname": "sklearn.multiclass._fit_ovo_binary.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/_fit_ovo_binary/X", + "name": "X", + "qname": "sklearn.multiclass._fit_ovo_binary.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/_fit_ovo_binary/y", + "name": "y", + "qname": "sklearn.multiclass._fit_ovo_binary.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/_fit_ovo_binary/i", + "name": "i", + "qname": "sklearn.multiclass._fit_ovo_binary.i", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/_fit_ovo_binary/j", + "name": "j", + "qname": "sklearn.multiclass._fit_ovo_binary.j", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit a single binary estimator (one-vs-one).", + "docstring": "Fit a single binary estimator (one-vs-one).", + "code": "def _fit_ovo_binary(estimator, X, y, i, j):\n \"\"\"Fit a single binary estimator (one-vs-one).\"\"\"\n cond = np.logical_or(y == i, y == j)\n y = y[cond]\n y_binary = np.empty(y.shape, int)\n y_binary[y == i] = 0\n y_binary[y == j] = 1\n indcond = np.arange(X.shape[0])[cond]\n return _fit_binary(estimator,\n _safe_split(estimator, X, None, indices=indcond)[0],\n y_binary, classes=[i, j]), indcond" + }, + { + "id": "scikit-learn/sklearn.multiclass/_partial_fit_binary", + "name": "_partial_fit_binary", + "qname": "sklearn.multiclass._partial_fit_binary", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/_partial_fit_binary/estimator", + "name": "estimator", + "qname": "sklearn.multiclass._partial_fit_binary.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/_partial_fit_binary/X", + "name": "X", + "qname": "sklearn.multiclass._partial_fit_binary.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/_partial_fit_binary/y", + "name": "y", + "qname": "sklearn.multiclass._partial_fit_binary.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Partially fit a single binary estimator.", + "docstring": "Partially fit a single binary estimator.", + "code": "def _partial_fit_binary(estimator, X, y):\n \"\"\"Partially fit a single binary estimator.\"\"\"\n estimator.partial_fit(X, y, np.array((0, 1)))\n return estimator" + }, + { + "id": "scikit-learn/sklearn.multiclass/_partial_fit_ovo_binary", + "name": "_partial_fit_ovo_binary", + "qname": "sklearn.multiclass._partial_fit_ovo_binary", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/_partial_fit_ovo_binary/estimator", + "name": "estimator", + "qname": "sklearn.multiclass._partial_fit_ovo_binary.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/_partial_fit_ovo_binary/X", + "name": "X", + "qname": "sklearn.multiclass._partial_fit_ovo_binary.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/_partial_fit_ovo_binary/y", + "name": "y", + "qname": "sklearn.multiclass._partial_fit_ovo_binary.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/_partial_fit_ovo_binary/i", + "name": "i", + "qname": "sklearn.multiclass._partial_fit_ovo_binary.i", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/_partial_fit_ovo_binary/j", + "name": "j", + "qname": "sklearn.multiclass._partial_fit_ovo_binary.j", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Partially fit a single binary estimator(one-vs-one).", + "docstring": "Partially fit a single binary estimator(one-vs-one).", + "code": "def _partial_fit_ovo_binary(estimator, X, y, i, j):\n \"\"\"Partially fit a single binary estimator(one-vs-one).\"\"\"\n\n cond = np.logical_or(y == i, y == j)\n y = y[cond]\n if len(y) != 0:\n y_binary = np.zeros_like(y)\n y_binary[y == j] = 1\n return _partial_fit_binary(estimator, X[cond], y_binary)\n return estimator" + }, + { + "id": "scikit-learn/sklearn.multiclass/_predict_binary", + "name": "_predict_binary", + "qname": "sklearn.multiclass._predict_binary", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multiclass/_predict_binary/estimator", + "name": "estimator", + "qname": "sklearn.multiclass._predict_binary.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multiclass/_predict_binary/X", + "name": "X", + "qname": "sklearn.multiclass._predict_binary.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Make predictions using a single binary estimator.", + "docstring": "Make predictions using a single binary estimator.", + "code": "def _predict_binary(estimator, X):\n \"\"\"Make predictions using a single binary estimator.\"\"\"\n if is_regressor(estimator):\n return estimator.predict(X)\n try:\n score = np.ravel(estimator.decision_function(X))\n except (AttributeError, NotImplementedError):\n # probabilities of the positive class\n score = estimator.predict_proba(X)[:, 1]\n return score" + }, + { + "id": "scikit-learn/sklearn.multioutput/ClassifierChain/_more_tags", + "name": "_more_tags", + "qname": "sklearn.multioutput.ClassifierChain._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multioutput/ClassifierChain/_more_tags/self", + "name": "self", + "qname": "sklearn.multioutput.ClassifierChain._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'_skip_test': True,\n 'multioutput_only': True}" + }, + { + "id": "scikit-learn/sklearn.multioutput/ClassifierChain/decision_function", + "name": "decision_function", + "qname": "sklearn.multioutput.ClassifierChain.decision_function", + "decorators": ["if_delegate_has_method('base_estimator')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.multioutput/ClassifierChain/decision_function/self", + "name": "self", + "qname": "sklearn.multioutput.ClassifierChain.decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/ClassifierChain/decision_function/X", + "name": "X", + "qname": "sklearn.multioutput.ClassifierChain.decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Evaluate the decision_function of the models in the chain.", + "docstring": "Evaluate the decision_function of the models in the chain.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nY_decision : array-like of shape (n_samples, n_classes)\n Returns the decision function of the sample for each model\n in the chain.", + "code": " @if_delegate_has_method('base_estimator')\n def decision_function(self, X):\n \"\"\"Evaluate the decision_function of the models in the chain.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n Y_decision : array-like of shape (n_samples, n_classes)\n Returns the decision function of the sample for each model\n in the chain.\n \"\"\"\n Y_decision_chain = np.zeros((X.shape[0], len(self.estimators_)))\n Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_)))\n for chain_idx, estimator in enumerate(self.estimators_):\n previous_predictions = Y_pred_chain[:, :chain_idx]\n if sp.issparse(X):\n X_aug = sp.hstack((X, previous_predictions))\n else:\n X_aug = np.hstack((X, previous_predictions))\n Y_decision_chain[:, chain_idx] = estimator.decision_function(X_aug)\n Y_pred_chain[:, chain_idx] = estimator.predict(X_aug)\n\n inv_order = np.empty_like(self.order_)\n inv_order[self.order_] = np.arange(len(self.order_))\n Y_decision = Y_decision_chain[:, inv_order]\n\n return Y_decision" + }, + { + "id": "scikit-learn/sklearn.multioutput/ClassifierChain/fit", + "name": "fit", + "qname": "sklearn.multioutput.ClassifierChain.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multioutput/ClassifierChain/fit/self", + "name": "self", + "qname": "sklearn.multioutput.ClassifierChain.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/ClassifierChain/fit/X", + "name": "X", + "qname": "sklearn.multioutput.ClassifierChain.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.multioutput/ClassifierChain/fit/Y", + "name": "Y", + "qname": "sklearn.multioutput.ClassifierChain.fit.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_classes)", + "default_value": "", + "description": "The target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_classes)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fit the model to data matrix X and targets Y.", + "docstring": "Fit the model to data matrix X and targets Y.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\nY : array-like of shape (n_samples, n_classes)\n The target values.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, Y):\n \"\"\"Fit the model to data matrix X and targets Y.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n Y : array-like of shape (n_samples, n_classes)\n The target values.\n\n Returns\n -------\n self : object\n \"\"\"\n super().fit(X, Y)\n self.classes_ = [estimator.classes_\n for chain_idx, estimator\n in enumerate(self.estimators_)]\n return self" + }, + { + "id": "scikit-learn/sklearn.multioutput/ClassifierChain/predict_proba", + "name": "predict_proba", + "qname": "sklearn.multioutput.ClassifierChain.predict_proba", + "decorators": ["if_delegate_has_method('base_estimator')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.multioutput/ClassifierChain/predict_proba/self", + "name": "self", + "qname": "sklearn.multioutput.ClassifierChain.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/ClassifierChain/predict_proba/X", + "name": "X", + "qname": "sklearn.multioutput.ClassifierChain.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Predict probability estimates.", + "docstring": "Predict probability estimates.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n\nReturns\n-------\nY_prob : array-like of shape (n_samples, n_classes)", + "code": " @if_delegate_has_method('base_estimator')\n def predict_proba(self, X):\n \"\"\"Predict probability estimates.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n\n Returns\n -------\n Y_prob : array-like of shape (n_samples, n_classes)\n \"\"\"\n X = check_array(X, accept_sparse=True)\n Y_prob_chain = np.zeros((X.shape[0], len(self.estimators_)))\n Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_)))\n for chain_idx, estimator in enumerate(self.estimators_):\n previous_predictions = Y_pred_chain[:, :chain_idx]\n if sp.issparse(X):\n X_aug = sp.hstack((X, previous_predictions))\n else:\n X_aug = np.hstack((X, previous_predictions))\n Y_prob_chain[:, chain_idx] = estimator.predict_proba(X_aug)[:, 1]\n Y_pred_chain[:, chain_idx] = estimator.predict(X_aug)\n inv_order = np.empty_like(self.order_)\n inv_order[self.order_] = np.arange(len(self.order_))\n Y_prob = Y_prob_chain[:, inv_order]\n\n return Y_prob" + }, + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputClassifier/__init__", + "name": "__init__", + "qname": "sklearn.multioutput.MultiOutputClassifier.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputClassifier/__init__/self", + "name": "self", + "qname": "sklearn.multioutput.MultiOutputClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputClassifier/__init__/estimator", + "name": "estimator", + "qname": "sklearn.multioutput.MultiOutputClassifier.__init__.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "estimator object", + "default_value": "", + "description": "An estimator object implementing :term:`fit`, :term:`score` and\n:term:`predict_proba`." + }, + "type": { + "kind": "NamedType", + "name": "estimator object" + } + }, + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputClassifier/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.multioutput.MultiOutputClassifier.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int or None", + "default_value": "(default=None)", + "description": "The number of jobs to run in parallel.\n:meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported\nby the passed estimator) will be parallelized for each target.\n\nWhen individual estimators are fast to train or predict,\nusing ``n_jobs > 1`` can result in slower performance due\nto the parallelism overhead.\n\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all available processes / threads.\nSee :term:`Glossary ` for more details.\n\n.. versionchanged:: 0.20\n `n_jobs` default changed from 1 to None" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Multi target classification\n\nThis strategy consists of fitting one classifier per target. This is a\nsimple strategy for extending classifiers that do not natively support\nmulti-target classification", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, estimator, *, n_jobs=None):\n super().__init__(estimator, n_jobs=n_jobs)" + }, + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputClassifier/_more_tags", + "name": "_more_tags", + "qname": "sklearn.multioutput.MultiOutputClassifier._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputClassifier/_more_tags/self", + "name": "self", + "qname": "sklearn.multioutput.MultiOutputClassifier._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n # FIXME\n return {'_skip_test': True}" + }, + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputClassifier/_predict_proba", + "name": "_predict_proba", + "qname": "sklearn.multioutput.MultiOutputClassifier._predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputClassifier/_predict_proba/self", + "name": "self", + "qname": "sklearn.multioutput.MultiOutputClassifier._predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputClassifier/_predict_proba/X", + "name": "X", + "qname": "sklearn.multioutput.MultiOutputClassifier._predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _predict_proba(self, X):\n results = [estimator.predict_proba(X) for estimator in\n self.estimators_]\n return results" + }, + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputClassifier/fit", + "name": "fit", + "qname": "sklearn.multioutput.MultiOutputClassifier.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputClassifier/fit/self", + "name": "self", + "qname": "sklearn.multioutput.MultiOutputClassifier.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputClassifier/fit/X", + "name": "X", + "qname": "sklearn.multioutput.MultiOutputClassifier.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputClassifier/fit/Y", + "name": "Y", + "qname": "sklearn.multioutput.MultiOutputClassifier.fit.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_classes)", + "default_value": "", + "description": "The target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_classes)" + } + }, + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputClassifier/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.multioutput.MultiOutputClassifier.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, then samples are equally weighted.\nOnly supported if the underlying classifier supports sample\nweights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputClassifier/fit/fit_params", + "name": "fit_params", + "qname": "sklearn.multioutput.MultiOutputClassifier.fit.fit_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": true, + "docstring": { + "type": "dict of string -> object", + "default_value": "", + "description": "Parameters passed to the ``estimator.fit`` method of each step.\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "NamedType", + "name": "dict of string -> object" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fit the model to data matrix X and targets Y.", + "docstring": "Fit the model to data matrix X and targets Y.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\nY : array-like of shape (n_samples, n_classes)\n The target values.\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Only supported if the underlying classifier supports sample\n weights.\n**fit_params : dict of string -> object\n Parameters passed to the ``estimator.fit`` method of each step.\n\n .. versionadded:: 0.23\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, Y, sample_weight=None, **fit_params):\n \"\"\"Fit the model to data matrix X and targets Y.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n Y : array-like of shape (n_samples, n_classes)\n The target values.\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Only supported if the underlying classifier supports sample\n weights.\n **fit_params : dict of string -> object\n Parameters passed to the ``estimator.fit`` method of each step.\n\n .. versionadded:: 0.23\n\n Returns\n -------\n self : object\n \"\"\"\n super().fit(X, Y, sample_weight, **fit_params)\n self.classes_ = [estimator.classes_ for estimator in self.estimators_]\n return self" + }, + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputClassifier/predict_proba@getter", + "name": "predict_proba", + "qname": "sklearn.multioutput.MultiOutputClassifier.predict_proba", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputClassifier/predict_proba/self", + "name": "self", + "qname": "sklearn.multioutput.MultiOutputClassifier.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Probability estimates.\nReturns prediction probabilities for each class of each output.\n\nThis method will raise a ``ValueError`` if any of the\nestimators do not have ``predict_proba``.", + "docstring": "Probability estimates.\nReturns prediction probabilities for each class of each output.\n\nThis method will raise a ``ValueError`` if any of the\nestimators do not have ``predict_proba``.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data\n\nReturns\n-------\np : array of shape (n_samples, n_classes), or a list of n_outputs such arrays if n_outputs > 1.\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n\n .. versionchanged:: 0.19\n This function now returns a list of arrays where the length of\n the list is ``n_outputs``, and each array is (``n_samples``,\n ``n_classes``) for that particular output.", + "code": " @property\n def predict_proba(self):\n \"\"\"Probability estimates.\n Returns prediction probabilities for each class of each output.\n\n This method will raise a ``ValueError`` if any of the\n estimators do not have ``predict_proba``.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Data\n\n Returns\n -------\n p : array of shape (n_samples, n_classes), or a list of n_outputs \\\n such arrays if n_outputs > 1.\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n\n .. versionchanged:: 0.19\n This function now returns a list of arrays where the length of\n the list is ``n_outputs``, and each array is (``n_samples``,\n ``n_classes``) for that particular output.\n \"\"\"\n check_is_fitted(self)\n if not all([hasattr(estimator, \"predict_proba\")\n for estimator in self.estimators_]):\n raise AttributeError(\"The base estimator should \"\n \"implement predict_proba method\")\n return self._predict_proba" + }, + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputClassifier/score", + "name": "score", + "qname": "sklearn.multioutput.MultiOutputClassifier.score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputClassifier/score/self", + "name": "self", + "qname": "sklearn.multioutput.MultiOutputClassifier.score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputClassifier/score/X", + "name": "X", + "qname": "sklearn.multioutput.MultiOutputClassifier.score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Test samples" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputClassifier/score/y", + "name": "y", + "qname": "sklearn.multioutput.MultiOutputClassifier.score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_outputs)", + "default_value": "", + "description": "True values for X" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_outputs)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the mean accuracy on the given test data and labels.", + "docstring": "Returns the mean accuracy on the given test data and labels.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test samples\n\ny : array-like of shape (n_samples, n_outputs)\n True values for X\n\nReturns\n-------\nscores : float\n accuracy_score of self.predict(X) versus y", + "code": " def score(self, X, y):\n \"\"\"Returns the mean accuracy on the given test data and labels.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Test samples\n\n y : array-like of shape (n_samples, n_outputs)\n True values for X\n\n Returns\n -------\n scores : float\n accuracy_score of self.predict(X) versus y\n \"\"\"\n check_is_fitted(self)\n n_outputs_ = len(self.estimators_)\n if y.ndim == 1:\n raise ValueError(\"y must have at least two dimensions for \"\n \"multi target classification but has only one\")\n if y.shape[1] != n_outputs_:\n raise ValueError(\"The number of outputs of Y for fit {0} and\"\n \" score {1} should be same\".\n format(n_outputs_, y.shape[1]))\n y_pred = self.predict(X)\n return np.mean(np.all(y == y_pred, axis=1))" + }, + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputRegressor/__init__", + "name": "__init__", + "qname": "sklearn.multioutput.MultiOutputRegressor.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputRegressor/__init__/self", + "name": "self", + "qname": "sklearn.multioutput.MultiOutputRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputRegressor/__init__/estimator", + "name": "estimator", + "qname": "sklearn.multioutput.MultiOutputRegressor.__init__.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "estimator object", + "default_value": "", + "description": "An estimator object implementing :term:`fit` and :term:`predict`." + }, + "type": { + "kind": "NamedType", + "name": "estimator object" + } + }, + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputRegressor/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.multioutput.MultiOutputRegressor.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int or None", + "default_value": "(default=None)", + "description": "The number of jobs to run in parallel.\n:meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported\nby the passed estimator) will be parallelized for each target.\n\nWhen individual estimators are fast to train or predict,\nusing ``n_jobs > 1`` can result in slower performance due\nto the parallelism overhead.\n\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all available processes / threads.\nSee :term:`Glossary ` for more details.\n\n.. versionchanged:: 0.20\n `n_jobs` default changed from 1 to None" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Multi target regression\n\nThis strategy consists of fitting one regressor per target. This is a\nsimple strategy for extending regressors that do not natively support\nmulti-target regression.\n\n.. versionadded:: 0.18", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, estimator, *, n_jobs=None):\n super().__init__(estimator, n_jobs=n_jobs)" + }, + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputRegressor/partial_fit", + "name": "partial_fit", + "qname": "sklearn.multioutput.MultiOutputRegressor.partial_fit", + "decorators": ["if_delegate_has_method('estimator')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputRegressor/partial_fit/self", + "name": "self", + "qname": "sklearn.multioutput.MultiOutputRegressor.partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputRegressor/partial_fit/X", + "name": "X", + "qname": "sklearn.multioutput.MultiOutputRegressor.partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputRegressor/partial_fit/y", + "name": "y", + "qname": "sklearn.multioutput.MultiOutputRegressor.partial_fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_outputs)", + "default_value": "", + "description": "Multi-output targets." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_outputs)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.multioutput/MultiOutputRegressor/partial_fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.multioutput.MultiOutputRegressor.partial_fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, then samples are equally weighted.\nOnly supported if the underlying regressor supports sample\nweights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Incrementally fit the model to data.\nFit a separate model for each output variable.", + "docstring": "Incrementally fit the model to data.\nFit a separate model for each output variable.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\ny : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n Multi-output targets.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Only supported if the underlying regressor supports sample\n weights.\n\nReturns\n-------\nself : object", + "code": " @if_delegate_has_method('estimator')\n def partial_fit(self, X, y, sample_weight=None):\n \"\"\"Incrementally fit the model to data.\n Fit a separate model for each output variable.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\n y : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n Multi-output targets.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Only supported if the underlying regressor supports sample\n weights.\n\n Returns\n -------\n self : object\n \"\"\"\n super().partial_fit(\n X, y, sample_weight=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.multioutput/RegressorChain/_more_tags", + "name": "_more_tags", + "qname": "sklearn.multioutput.RegressorChain._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multioutput/RegressorChain/_more_tags/self", + "name": "self", + "qname": "sklearn.multioutput.RegressorChain._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'multioutput_only': True}" + }, + { + "id": "scikit-learn/sklearn.multioutput/RegressorChain/fit", + "name": "fit", + "qname": "sklearn.multioutput.RegressorChain.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multioutput/RegressorChain/fit/self", + "name": "self", + "qname": "sklearn.multioutput.RegressorChain.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/RegressorChain/fit/X", + "name": "X", + "qname": "sklearn.multioutput.RegressorChain.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.multioutput/RegressorChain/fit/Y", + "name": "Y", + "qname": "sklearn.multioutput.RegressorChain.fit.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_classes)", + "default_value": "", + "description": "The target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_classes)" + } + }, + { + "id": "scikit-learn/sklearn.multioutput/RegressorChain/fit/fit_params", + "name": "fit_params", + "qname": "sklearn.multioutput.RegressorChain.fit.fit_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": true, + "docstring": { + "type": "dict of string -> object", + "default_value": "", + "description": "Parameters passed to the `fit` method at each step\nof the regressor chain.\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "NamedType", + "name": "dict of string -> object" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fit the model to data matrix X and targets Y.", + "docstring": "Fit the model to data matrix X and targets Y.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\nY : array-like of shape (n_samples, n_classes)\n The target values.\n\n**fit_params : dict of string -> object\n Parameters passed to the `fit` method at each step\n of the regressor chain.\n\n .. versionadded:: 0.23\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, Y, **fit_params):\n \"\"\"Fit the model to data matrix X and targets Y.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n Y : array-like of shape (n_samples, n_classes)\n The target values.\n\n **fit_params : dict of string -> object\n Parameters passed to the `fit` method at each step\n of the regressor chain.\n\n .. versionadded:: 0.23\n\n Returns\n -------\n self : object\n \"\"\"\n super().fit(X, Y, **fit_params)\n return self" + }, + { + "id": "scikit-learn/sklearn.multioutput/_BaseChain/__init__", + "name": "__init__", + "qname": "sklearn.multioutput._BaseChain.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.multioutput/_BaseChain/__init__/self", + "name": "self", + "qname": "sklearn.multioutput._BaseChain.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/_BaseChain/__init__/base_estimator", + "name": "base_estimator", + "qname": "sklearn.multioutput._BaseChain.__init__.base_estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/_BaseChain/__init__/order", + "name": "order", + "qname": "sklearn.multioutput._BaseChain.__init__.order", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/_BaseChain/__init__/cv", + "name": "cv", + "qname": "sklearn.multioutput._BaseChain.__init__.cv", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/_BaseChain/__init__/random_state", + "name": "random_state", + "qname": "sklearn.multioutput._BaseChain.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, base_estimator, *, order=None, cv=None,\n random_state=None):\n self.base_estimator = base_estimator\n self.order = order\n self.cv = cv\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.multioutput/_BaseChain/fit", + "name": "fit", + "qname": "sklearn.multioutput._BaseChain.fit", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.multioutput/_BaseChain/fit/self", + "name": "self", + "qname": "sklearn.multioutput._BaseChain.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/_BaseChain/fit/X", + "name": "X", + "qname": "sklearn.multioutput._BaseChain.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.multioutput/_BaseChain/fit/Y", + "name": "Y", + "qname": "sklearn.multioutput._BaseChain.fit.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_classes)", + "default_value": "", + "description": "The target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_classes)" + } + }, + { + "id": "scikit-learn/sklearn.multioutput/_BaseChain/fit/fit_params", + "name": "fit_params", + "qname": "sklearn.multioutput._BaseChain.fit.fit_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "dict of string -> object", + "default_value": "", + "description": "Parameters passed to the `fit` method of each step.\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "NamedType", + "name": "dict of string -> object" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model to data matrix X and targets Y.", + "docstring": "Fit the model to data matrix X and targets Y.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\nY : array-like of shape (n_samples, n_classes)\n The target values.\n**fit_params : dict of string -> object\n Parameters passed to the `fit` method of each step.\n\n .. versionadded:: 0.23\n\nReturns\n-------\nself : object", + "code": " @abstractmethod\n def fit(self, X, Y, **fit_params):\n \"\"\"Fit the model to data matrix X and targets Y.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n Y : array-like of shape (n_samples, n_classes)\n The target values.\n **fit_params : dict of string -> object\n Parameters passed to the `fit` method of each step.\n\n .. versionadded:: 0.23\n\n Returns\n -------\n self : object\n \"\"\"\n X, Y = self._validate_data(X, Y, multi_output=True, accept_sparse=True)\n\n random_state = check_random_state(self.random_state)\n check_array(X, accept_sparse=True)\n self.order_ = self.order\n if isinstance(self.order_, tuple):\n self.order_ = np.array(self.order_)\n\n if self.order_ is None:\n self.order_ = np.array(range(Y.shape[1]))\n elif isinstance(self.order_, str):\n if self.order_ == 'random':\n self.order_ = random_state.permutation(Y.shape[1])\n elif sorted(self.order_) != list(range(Y.shape[1])):\n raise ValueError(\"invalid order\")\n\n self.estimators_ = [clone(self.base_estimator)\n for _ in range(Y.shape[1])]\n\n if self.cv is None:\n Y_pred_chain = Y[:, self.order_]\n if sp.issparse(X):\n X_aug = sp.hstack((X, Y_pred_chain), format='lil')\n X_aug = X_aug.tocsr()\n else:\n X_aug = np.hstack((X, Y_pred_chain))\n\n elif sp.issparse(X):\n Y_pred_chain = sp.lil_matrix((X.shape[0], Y.shape[1]))\n X_aug = sp.hstack((X, Y_pred_chain), format='lil')\n\n else:\n Y_pred_chain = np.zeros((X.shape[0], Y.shape[1]))\n X_aug = np.hstack((X, Y_pred_chain))\n\n del Y_pred_chain\n\n for chain_idx, estimator in enumerate(self.estimators_):\n y = Y[:, self.order_[chain_idx]]\n estimator.fit(X_aug[:, :(X.shape[1] + chain_idx)], y,\n **fit_params)\n if self.cv is not None and chain_idx < len(self.estimators_) - 1:\n col_idx = X.shape[1] + chain_idx\n cv_result = cross_val_predict(\n self.base_estimator, X_aug[:, :col_idx],\n y=y, cv=self.cv)\n if sp.issparse(X_aug):\n X_aug[:, col_idx] = np.expand_dims(cv_result, 1)\n else:\n X_aug[:, col_idx] = cv_result\n\n return self" + }, + { + "id": "scikit-learn/sklearn.multioutput/_BaseChain/predict", + "name": "predict", + "qname": "sklearn.multioutput._BaseChain.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multioutput/_BaseChain/predict/self", + "name": "self", + "qname": "sklearn.multioutput._BaseChain.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/_BaseChain/predict/X", + "name": "X", + "qname": "sklearn.multioutput._BaseChain.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict on the data matrix X using the ClassifierChain model.", + "docstring": "Predict on the data matrix X using the ClassifierChain model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\nY_pred : array-like of shape (n_samples, n_classes)\n The predicted values.", + "code": " def predict(self, X):\n \"\"\"Predict on the data matrix X using the ClassifierChain model.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\n Returns\n -------\n Y_pred : array-like of shape (n_samples, n_classes)\n The predicted values.\n\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, accept_sparse=True)\n Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_)))\n for chain_idx, estimator in enumerate(self.estimators_):\n previous_predictions = Y_pred_chain[:, :chain_idx]\n if sp.issparse(X):\n if chain_idx == 0:\n X_aug = X\n else:\n X_aug = sp.hstack((X, previous_predictions))\n else:\n X_aug = np.hstack((X, previous_predictions))\n Y_pred_chain[:, chain_idx] = estimator.predict(X_aug)\n\n inv_order = np.empty_like(self.order_)\n inv_order[self.order_] = np.arange(len(self.order_))\n Y_pred = Y_pred_chain[:, inv_order]\n\n return Y_pred" + }, + { + "id": "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/__init__", + "name": "__init__", + "qname": "sklearn.multioutput._MultiOutputEstimator.__init__", + "decorators": ["abstractmethod", "_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/__init__/self", + "name": "self", + "qname": "sklearn.multioutput._MultiOutputEstimator.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/__init__/estimator", + "name": "estimator", + "qname": "sklearn.multioutput._MultiOutputEstimator.__init__.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.multioutput._MultiOutputEstimator.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @abstractmethod\n @_deprecate_positional_args\n def __init__(self, estimator, *, n_jobs=None):\n self.estimator = estimator\n self.n_jobs = n_jobs" + }, + { + "id": "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/_more_tags", + "name": "_more_tags", + "qname": "sklearn.multioutput._MultiOutputEstimator._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/_more_tags/self", + "name": "self", + "qname": "sklearn.multioutput._MultiOutputEstimator._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'multioutput_only': True}" + }, + { + "id": "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/fit", + "name": "fit", + "qname": "sklearn.multioutput._MultiOutputEstimator.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/fit/self", + "name": "self", + "qname": "sklearn.multioutput._MultiOutputEstimator.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/fit/X", + "name": "X", + "qname": "sklearn.multioutput._MultiOutputEstimator.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/fit/y", + "name": "y", + "qname": "sklearn.multioutput._MultiOutputEstimator.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_outputs)", + "default_value": "", + "description": "Multi-output targets. An indicator matrix turns on multilabel\nestimation." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_outputs)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.multioutput._MultiOutputEstimator.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, then samples are equally weighted.\nOnly supported if the underlying regressor supports sample\nweights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/fit/fit_params", + "name": "fit_params", + "qname": "sklearn.multioutput._MultiOutputEstimator.fit.fit_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "dict of string -> object", + "default_value": "", + "description": "Parameters passed to the ``estimator.fit`` method of each step.\n\n.. versionadded:: 0.23" + }, + "type": { + "kind": "NamedType", + "name": "dict of string -> object" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model to data.\nFit a separate model for each output variable.", + "docstring": "Fit the model to data.\nFit a separate model for each output variable.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\ny : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n Multi-output targets. An indicator matrix turns on multilabel\n estimation.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Only supported if the underlying regressor supports sample\n weights.\n\n**fit_params : dict of string -> object\n Parameters passed to the ``estimator.fit`` method of each step.\n\n .. versionadded:: 0.23\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y, sample_weight=None, **fit_params):\n \"\"\" Fit the model to data.\n Fit a separate model for each output variable.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\n y : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n Multi-output targets. An indicator matrix turns on multilabel\n estimation.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Only supported if the underlying regressor supports sample\n weights.\n\n **fit_params : dict of string -> object\n Parameters passed to the ``estimator.fit`` method of each step.\n\n .. versionadded:: 0.23\n\n Returns\n -------\n self : object\n \"\"\"\n\n if not hasattr(self.estimator, \"fit\"):\n raise ValueError(\"The base estimator should implement\"\n \" a fit method\")\n\n X, y = self._validate_data(X, y,\n force_all_finite=False,\n multi_output=True, accept_sparse=True)\n\n if is_classifier(self):\n check_classification_targets(y)\n\n if y.ndim == 1:\n raise ValueError(\"y must have at least two dimensions for \"\n \"multi-output regression but has only one.\")\n\n if (sample_weight is not None and\n not has_fit_parameter(self.estimator, 'sample_weight')):\n raise ValueError(\"Underlying estimator does not support\"\n \" sample weights.\")\n\n fit_params_validated = _check_fit_params(X, fit_params)\n\n self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n delayed(_fit_estimator)(\n self.estimator, X, y[:, i], sample_weight,\n **fit_params_validated)\n for i in range(y.shape[1]))\n return self" + }, + { + "id": "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/partial_fit", + "name": "partial_fit", + "qname": "sklearn.multioutput._MultiOutputEstimator.partial_fit", + "decorators": ["if_delegate_has_method('estimator')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/partial_fit/self", + "name": "self", + "qname": "sklearn.multioutput._MultiOutputEstimator.partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/partial_fit/X", + "name": "X", + "qname": "sklearn.multioutput._MultiOutputEstimator.partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/partial_fit/y", + "name": "y", + "qname": "sklearn.multioutput._MultiOutputEstimator.partial_fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_outputs)", + "default_value": "", + "description": "Multi-output targets." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_outputs)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/partial_fit/classes", + "name": "classes", + "qname": "sklearn.multioutput._MultiOutputEstimator.partial_fit.classes", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list of ndarray of shape (n_outputs,)", + "default_value": "", + "description": "Each array is unique classes for one output in str/int\nCan be obtained by via\n``[np.unique(y[:, i]) for i in range(y.shape[1])]``, where y is the\ntarget matrix of the entire dataset.\nThis argument is required for the first call to partial_fit\nand can be omitted in the subsequent calls.\nNote that y doesn't need to contain all labels in `classes`." + }, + "type": { + "kind": "NamedType", + "name": "list of ndarray of shape (n_outputs,)" + } + }, + { + "id": "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/partial_fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.multioutput._MultiOutputEstimator.partial_fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, then samples are equally weighted.\nOnly supported if the underlying regressor supports sample\nweights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Incrementally fit the model to data.\nFit a separate model for each output variable.", + "docstring": "Incrementally fit the model to data.\nFit a separate model for each output variable.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\ny : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n Multi-output targets.\n\nclasses : list of ndarray of shape (n_outputs,)\n Each array is unique classes for one output in str/int\n Can be obtained by via\n ``[np.unique(y[:, i]) for i in range(y.shape[1])]``, where y is the\n target matrix of the entire dataset.\n This argument is required for the first call to partial_fit\n and can be omitted in the subsequent calls.\n Note that y doesn't need to contain all labels in `classes`.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Only supported if the underlying regressor supports sample\n weights.\n\nReturns\n-------\nself : object", + "code": " @if_delegate_has_method('estimator')\n def partial_fit(self, X, y, classes=None, sample_weight=None):\n \"\"\"Incrementally fit the model to data.\n Fit a separate model for each output variable.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\n y : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n Multi-output targets.\n\n classes : list of ndarray of shape (n_outputs,)\n Each array is unique classes for one output in str/int\n Can be obtained by via\n ``[np.unique(y[:, i]) for i in range(y.shape[1])]``, where y is the\n target matrix of the entire dataset.\n This argument is required for the first call to partial_fit\n and can be omitted in the subsequent calls.\n Note that y doesn't need to contain all labels in `classes`.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Only supported if the underlying regressor supports sample\n weights.\n\n Returns\n -------\n self : object\n \"\"\"\n X, y = check_X_y(X, y,\n force_all_finite=False,\n multi_output=True,\n accept_sparse=True)\n\n if y.ndim == 1:\n raise ValueError(\"y must have at least two dimensions for \"\n \"multi-output regression but has only one.\")\n\n if (sample_weight is not None and\n not has_fit_parameter(self.estimator, 'sample_weight')):\n raise ValueError(\"Underlying estimator does not support\"\n \" sample weights.\")\n\n first_time = not hasattr(self, 'estimators_')\n\n self.estimators_ = Parallel(n_jobs=self.n_jobs)(\n delayed(_partial_fit_estimator)(\n self.estimators_[i] if not first_time else self.estimator,\n X, y[:, i],\n classes[i] if classes is not None else None,\n sample_weight, first_time) for i in range(y.shape[1]))\n return self" + }, + { + "id": "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/predict", + "name": "predict", + "qname": "sklearn.multioutput._MultiOutputEstimator.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/predict/self", + "name": "self", + "qname": "sklearn.multioutput._MultiOutputEstimator.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/_MultiOutputEstimator/predict/X", + "name": "X", + "qname": "sklearn.multioutput._MultiOutputEstimator.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict multi-output variable using a model\n trained for each target variable.", + "docstring": "Predict multi-output variable using a model\n trained for each target variable.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\nReturns\n-------\ny : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n Multi-output targets predicted across multiple predictors.\n Note: Separate models are generated for each predictor.", + "code": " def predict(self, X):\n \"\"\"Predict multi-output variable using a model\n trained for each target variable.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\n Returns\n -------\n y : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n Multi-output targets predicted across multiple predictors.\n Note: Separate models are generated for each predictor.\n \"\"\"\n check_is_fitted(self)\n if not hasattr(self.estimators_[0], \"predict\"):\n raise ValueError(\"The base estimator should implement\"\n \" a predict method\")\n\n X = check_array(X, force_all_finite=False, accept_sparse=True)\n\n y = Parallel(n_jobs=self.n_jobs)(\n delayed(e.predict)(X)\n for e in self.estimators_)\n\n return np.asarray(y).T" + }, + { + "id": "scikit-learn/sklearn.multioutput/_fit_estimator", + "name": "_fit_estimator", + "qname": "sklearn.multioutput._fit_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multioutput/_fit_estimator/estimator", + "name": "estimator", + "qname": "sklearn.multioutput._fit_estimator.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/_fit_estimator/X", + "name": "X", + "qname": "sklearn.multioutput._fit_estimator.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/_fit_estimator/y", + "name": "y", + "qname": "sklearn.multioutput._fit_estimator.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/_fit_estimator/sample_weight", + "name": "sample_weight", + "qname": "sklearn.multioutput._fit_estimator.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/_fit_estimator/fit_params", + "name": "fit_params", + "qname": "sklearn.multioutput._fit_estimator.fit_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _fit_estimator(estimator, X, y, sample_weight=None, **fit_params):\n estimator = clone(estimator)\n if sample_weight is not None:\n estimator.fit(X, y, sample_weight=sample_weight, **fit_params)\n else:\n estimator.fit(X, y, **fit_params)\n return estimator" + }, + { + "id": "scikit-learn/sklearn.multioutput/_partial_fit_estimator", + "name": "_partial_fit_estimator", + "qname": "sklearn.multioutput._partial_fit_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.multioutput/_partial_fit_estimator/estimator", + "name": "estimator", + "qname": "sklearn.multioutput._partial_fit_estimator.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/_partial_fit_estimator/X", + "name": "X", + "qname": "sklearn.multioutput._partial_fit_estimator.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/_partial_fit_estimator/y", + "name": "y", + "qname": "sklearn.multioutput._partial_fit_estimator.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/_partial_fit_estimator/classes", + "name": "classes", + "qname": "sklearn.multioutput._partial_fit_estimator.classes", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/_partial_fit_estimator/sample_weight", + "name": "sample_weight", + "qname": "sklearn.multioutput._partial_fit_estimator.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.multioutput/_partial_fit_estimator/first_time", + "name": "first_time", + "qname": "sklearn.multioutput._partial_fit_estimator.first_time", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _partial_fit_estimator(estimator, X, y, classes=None, sample_weight=None,\n first_time=True):\n if first_time:\n estimator = clone(estimator)\n\n if sample_weight is not None:\n if classes is not None:\n estimator.partial_fit(X, y, classes=classes,\n sample_weight=sample_weight)\n else:\n estimator.partial_fit(X, y, sample_weight=sample_weight)\n else:\n if classes is not None:\n estimator.partial_fit(X, y, classes=classes)\n else:\n estimator.partial_fit(X, y)\n return estimator" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/BernoulliNB/__init__", + "name": "__init__", + "qname": "sklearn.naive_bayes.BernoulliNB.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/BernoulliNB/__init__/self", + "name": "self", + "qname": "sklearn.naive_bayes.BernoulliNB.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/BernoulliNB/__init__/alpha", + "name": "alpha", + "qname": "sklearn.naive_bayes.BernoulliNB.__init__.alpha", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Additive (Laplace/Lidstone) smoothing parameter\n(0 for no smoothing)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/BernoulliNB/__init__/binarize", + "name": "binarize", + "qname": "sklearn.naive_bayes.BernoulliNB.__init__.binarize", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float or None", + "default_value": "0.0", + "description": "Threshold for binarizing (mapping to booleans) of sample features.\nIf None, input is presumed to already consist of binary vectors." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/BernoulliNB/__init__/fit_prior", + "name": "fit_prior", + "qname": "sklearn.naive_bayes.BernoulliNB.__init__.fit_prior", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to learn class prior probabilities or not.\nIf false, a uniform prior will be used." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/BernoulliNB/__init__/class_prior", + "name": "class_prior", + "qname": "sklearn.naive_bayes.BernoulliNB.__init__.class_prior", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_classes,)", + "default_value": "None", + "description": "Prior probabilities of the classes. If specified the priors are not\nadjusted according to the data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_classes,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Naive Bayes classifier for multivariate Bernoulli models.\n\nLike MultinomialNB, this classifier is suitable for discrete data. The\ndifference is that while MultinomialNB works with occurrence counts,\nBernoulliNB is designed for binary/boolean features.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, alpha=1.0, binarize=.0, fit_prior=True,\n class_prior=None):\n self.alpha = alpha\n self.binarize = binarize\n self.fit_prior = fit_prior\n self.class_prior = class_prior" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/BernoulliNB/_check_X", + "name": "_check_X", + "qname": "sklearn.naive_bayes.BernoulliNB._check_X", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/BernoulliNB/_check_X/self", + "name": "self", + "qname": "sklearn.naive_bayes.BernoulliNB._check_X.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/BernoulliNB/_check_X/X", + "name": "X", + "qname": "sklearn.naive_bayes.BernoulliNB._check_X.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_X(self, X):\n X = super()._check_X(X)\n if self.binarize is not None:\n X = binarize(X, threshold=self.binarize)\n return X" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/BernoulliNB/_check_X_y", + "name": "_check_X_y", + "qname": "sklearn.naive_bayes.BernoulliNB._check_X_y", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/BernoulliNB/_check_X_y/self", + "name": "self", + "qname": "sklearn.naive_bayes.BernoulliNB._check_X_y.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/BernoulliNB/_check_X_y/X", + "name": "X", + "qname": "sklearn.naive_bayes.BernoulliNB._check_X_y.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/BernoulliNB/_check_X_y/y", + "name": "y", + "qname": "sklearn.naive_bayes.BernoulliNB._check_X_y.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_X_y(self, X, y):\n X, y = super()._check_X_y(X, y)\n if self.binarize is not None:\n X = binarize(X, threshold=self.binarize)\n return X, y" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/BernoulliNB/_count", + "name": "_count", + "qname": "sklearn.naive_bayes.BernoulliNB._count", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/BernoulliNB/_count/self", + "name": "self", + "qname": "sklearn.naive_bayes.BernoulliNB._count.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/BernoulliNB/_count/X", + "name": "X", + "qname": "sklearn.naive_bayes.BernoulliNB._count.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/BernoulliNB/_count/Y", + "name": "Y", + "qname": "sklearn.naive_bayes.BernoulliNB._count.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Count and smooth feature occurrences.", + "docstring": "Count and smooth feature occurrences.", + "code": " def _count(self, X, Y):\n \"\"\"Count and smooth feature occurrences.\"\"\"\n self.feature_count_ += safe_sparse_dot(Y.T, X)\n self.class_count_ += Y.sum(axis=0)" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/BernoulliNB/_joint_log_likelihood", + "name": "_joint_log_likelihood", + "qname": "sklearn.naive_bayes.BernoulliNB._joint_log_likelihood", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/BernoulliNB/_joint_log_likelihood/self", + "name": "self", + "qname": "sklearn.naive_bayes.BernoulliNB._joint_log_likelihood.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/BernoulliNB/_joint_log_likelihood/X", + "name": "X", + "qname": "sklearn.naive_bayes.BernoulliNB._joint_log_likelihood.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Calculate the posterior log probability of the samples X", + "docstring": "Calculate the posterior log probability of the samples X", + "code": " def _joint_log_likelihood(self, X):\n \"\"\"Calculate the posterior log probability of the samples X\"\"\"\n n_classes, n_features = self.feature_log_prob_.shape\n n_samples, n_features_X = X.shape\n\n if n_features_X != n_features:\n raise ValueError(\"Expected input with %d features, got %d instead\"\n % (n_features, n_features_X))\n\n neg_prob = np.log(1 - np.exp(self.feature_log_prob_))\n # Compute neg_prob \u00b7 (1 - X).T as \u2211neg_prob - X \u00b7 neg_prob\n jll = safe_sparse_dot(X, (self.feature_log_prob_ - neg_prob).T)\n jll += self.class_log_prior_ + neg_prob.sum(axis=1)\n\n return jll" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/BernoulliNB/_update_feature_log_prob", + "name": "_update_feature_log_prob", + "qname": "sklearn.naive_bayes.BernoulliNB._update_feature_log_prob", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/BernoulliNB/_update_feature_log_prob/self", + "name": "self", + "qname": "sklearn.naive_bayes.BernoulliNB._update_feature_log_prob.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/BernoulliNB/_update_feature_log_prob/alpha", + "name": "alpha", + "qname": "sklearn.naive_bayes.BernoulliNB._update_feature_log_prob.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Apply smoothing to raw counts and recompute log probabilities", + "docstring": "Apply smoothing to raw counts and recompute log probabilities", + "code": " def _update_feature_log_prob(self, alpha):\n \"\"\"Apply smoothing to raw counts and recompute log probabilities\"\"\"\n smoothed_fc = self.feature_count_ + alpha\n smoothed_cc = self.class_count_ + alpha * 2\n\n self.feature_log_prob_ = (np.log(smoothed_fc) -\n np.log(smoothed_cc.reshape(-1, 1)))" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/__init__", + "name": "__init__", + "qname": "sklearn.naive_bayes.CategoricalNB.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/__init__/self", + "name": "self", + "qname": "sklearn.naive_bayes.CategoricalNB.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/__init__/alpha", + "name": "alpha", + "qname": "sklearn.naive_bayes.CategoricalNB.__init__.alpha", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Additive (Laplace/Lidstone) smoothing parameter\n(0 for no smoothing)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/__init__/fit_prior", + "name": "fit_prior", + "qname": "sklearn.naive_bayes.CategoricalNB.__init__.fit_prior", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to learn class prior probabilities or not.\nIf false, a uniform prior will be used." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/__init__/class_prior", + "name": "class_prior", + "qname": "sklearn.naive_bayes.CategoricalNB.__init__.class_prior", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_classes,)", + "default_value": "None", + "description": "Prior probabilities of the classes. If specified the priors are not\nadjusted according to the data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_classes,)" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/__init__/min_categories", + "name": "min_categories", + "qname": "sklearn.naive_bayes.CategoricalNB.__init__.min_categories", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int or array-like of shape (n_features,)", + "default_value": "None", + "description": "Minimum number of categories per feature.\n\n- integer: Sets the minimum number of categories per feature to\n `n_categories` for each features.\n- array-like: shape (n_features,) where `n_categories[i]` holds the\n minimum number of categories for the ith column of the input.\n- None (default): Determines the number of categories automatically\n from the training data.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_features,)" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Naive Bayes classifier for categorical features\n\nThe categorical Naive Bayes classifier is suitable for classification with\ndiscrete features that are categorically distributed. The categories of\neach feature are drawn from a categorical distribution.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, alpha=1.0, fit_prior=True, class_prior=None,\n min_categories=None):\n self.alpha = alpha\n self.fit_prior = fit_prior\n self.class_prior = class_prior\n self.min_categories = min_categories" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_check_X", + "name": "_check_X", + "qname": "sklearn.naive_bayes.CategoricalNB._check_X", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_check_X/self", + "name": "self", + "qname": "sklearn.naive_bayes.CategoricalNB._check_X.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_check_X/X", + "name": "X", + "qname": "sklearn.naive_bayes.CategoricalNB._check_X.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_X(self, X):\n X = check_array(X, dtype='int', accept_sparse=False,\n force_all_finite=True)\n check_non_negative(X, \"CategoricalNB (input X)\")\n return X" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_check_X_y", + "name": "_check_X_y", + "qname": "sklearn.naive_bayes.CategoricalNB._check_X_y", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_check_X_y/self", + "name": "self", + "qname": "sklearn.naive_bayes.CategoricalNB._check_X_y.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_check_X_y/X", + "name": "X", + "qname": "sklearn.naive_bayes.CategoricalNB._check_X_y.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_check_X_y/y", + "name": "y", + "qname": "sklearn.naive_bayes.CategoricalNB._check_X_y.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_X_y(self, X, y):\n X, y = self._validate_data(X, y, dtype='int', accept_sparse=False,\n force_all_finite=True)\n check_non_negative(X, \"CategoricalNB (input X)\")\n return X, y" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_count", + "name": "_count", + "qname": "sklearn.naive_bayes.CategoricalNB._count", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_count/self", + "name": "self", + "qname": "sklearn.naive_bayes.CategoricalNB._count.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_count/X", + "name": "X", + "qname": "sklearn.naive_bayes.CategoricalNB._count.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_count/Y", + "name": "Y", + "qname": "sklearn.naive_bayes.CategoricalNB._count.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _count(self, X, Y):\n def _update_cat_count_dims(cat_count, highest_feature):\n diff = highest_feature + 1 - cat_count.shape[1]\n if diff > 0:\n # we append a column full of zeros for each new category\n return np.pad(cat_count, [(0, 0), (0, diff)], 'constant')\n return cat_count\n\n def _update_cat_count(X_feature, Y, cat_count, n_classes):\n for j in range(n_classes):\n mask = Y[:, j].astype(bool)\n if Y.dtype.type == np.int64:\n weights = None\n else:\n weights = Y[mask, j]\n counts = np.bincount(X_feature[mask], weights=weights)\n indices = np.nonzero(counts)[0]\n cat_count[j, indices] += counts[indices]\n\n self.class_count_ += Y.sum(axis=0)\n self.n_categories_ = self._validate_n_categories(\n X, self.min_categories)\n for i in range(self.n_features_):\n X_feature = X[:, i]\n self.category_count_[i] = _update_cat_count_dims(\n self.category_count_[i], self.n_categories_[i] - 1)\n _update_cat_count(X_feature, Y,\n self.category_count_[i],\n self.class_count_.shape[0])" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_init_counters", + "name": "_init_counters", + "qname": "sklearn.naive_bayes.CategoricalNB._init_counters", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_init_counters/self", + "name": "self", + "qname": "sklearn.naive_bayes.CategoricalNB._init_counters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_init_counters/n_effective_classes", + "name": "n_effective_classes", + "qname": "sklearn.naive_bayes.CategoricalNB._init_counters.n_effective_classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_init_counters/n_features", + "name": "n_features", + "qname": "sklearn.naive_bayes.CategoricalNB._init_counters.n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _init_counters(self, n_effective_classes, n_features):\n self.class_count_ = np.zeros(n_effective_classes, dtype=np.float64)\n self.category_count_ = [np.zeros((n_effective_classes, 0))\n for _ in range(n_features)]" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_joint_log_likelihood", + "name": "_joint_log_likelihood", + "qname": "sklearn.naive_bayes.CategoricalNB._joint_log_likelihood", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_joint_log_likelihood/self", + "name": "self", + "qname": "sklearn.naive_bayes.CategoricalNB._joint_log_likelihood.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_joint_log_likelihood/X", + "name": "X", + "qname": "sklearn.naive_bayes.CategoricalNB._joint_log_likelihood.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _joint_log_likelihood(self, X):\n if not X.shape[1] == self.n_features_:\n raise ValueError(\"Expected input with %d features, got %d instead\"\n % (self.n_features_, X.shape[1]))\n jll = np.zeros((X.shape[0], self.class_count_.shape[0]))\n for i in range(self.n_features_):\n indices = X[:, i]\n jll += self.feature_log_prob_[i][:, indices].T\n total_ll = jll + self.class_log_prior_\n return total_ll" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_more_tags", + "name": "_more_tags", + "qname": "sklearn.naive_bayes.CategoricalNB._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_more_tags/self", + "name": "self", + "qname": "sklearn.naive_bayes.CategoricalNB._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'requires_positive_X': True}" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_update_feature_log_prob", + "name": "_update_feature_log_prob", + "qname": "sklearn.naive_bayes.CategoricalNB._update_feature_log_prob", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_update_feature_log_prob/self", + "name": "self", + "qname": "sklearn.naive_bayes.CategoricalNB._update_feature_log_prob.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_update_feature_log_prob/alpha", + "name": "alpha", + "qname": "sklearn.naive_bayes.CategoricalNB._update_feature_log_prob.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _update_feature_log_prob(self, alpha):\n feature_log_prob = []\n for i in range(self.n_features_):\n smoothed_cat_count = self.category_count_[i] + alpha\n smoothed_class_count = smoothed_cat_count.sum(axis=1)\n feature_log_prob.append(\n np.log(smoothed_cat_count) -\n np.log(smoothed_class_count.reshape(-1, 1)))\n self.feature_log_prob_ = feature_log_prob" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_validate_n_categories", + "name": "_validate_n_categories", + "qname": "sklearn.naive_bayes.CategoricalNB._validate_n_categories", + "decorators": ["staticmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_validate_n_categories/X", + "name": "X", + "qname": "sklearn.naive_bayes.CategoricalNB._validate_n_categories.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/_validate_n_categories/min_categories", + "name": "min_categories", + "qname": "sklearn.naive_bayes.CategoricalNB._validate_n_categories.min_categories", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @staticmethod\n def _validate_n_categories(X, min_categories):\n # rely on max for n_categories categories are encoded between 0...n-1\n n_categories_X = X.max(axis=0) + 1\n min_categories_ = np.array(min_categories)\n if min_categories is not None:\n if not np.issubdtype(min_categories_.dtype, np.signedinteger):\n raise ValueError(\n f\"'min_categories' should have integral type. Got \"\n f\"{min_categories_.dtype} instead.\"\n )\n n_categories_ = np.maximum(n_categories_X,\n min_categories_,\n dtype=np.int64)\n if n_categories_.shape != n_categories_X.shape:\n raise ValueError(\n f\"'min_categories' should have shape ({X.shape[1]},\"\n f\") when an array-like is provided. Got\"\n f\" {min_categories_.shape} instead.\"\n )\n return n_categories_\n else:\n return n_categories_X" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/fit", + "name": "fit", + "qname": "sklearn.naive_bayes.CategoricalNB.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/fit/self", + "name": "self", + "qname": "sklearn.naive_bayes.CategoricalNB.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/fit/X", + "name": "X", + "qname": "sklearn.naive_bayes.CategoricalNB.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vectors, where n_samples is the number of samples and\nn_features is the number of features. Here, each feature of X is\nassumed to be from a different categorical distribution.\nIt is further assumed that all categories of each feature are\nrepresented by the numbers 0, ..., n - 1, where n refers to the\ntotal number of categories for the given feature. This can, for\ninstance, be achieved with the help of OrdinalEncoder." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/fit/y", + "name": "y", + "qname": "sklearn.naive_bayes.CategoricalNB.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.naive_bayes.CategoricalNB.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples)", + "default_value": "None", + "description": "Weights applied to individual samples (1. for unweighted)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fit Naive Bayes classifier according to X, y", + "docstring": "Fit Naive Bayes classifier according to X, y\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features. Here, each feature of X is\n assumed to be from a different categorical distribution.\n It is further assumed that all categories of each feature are\n represented by the numbers 0, ..., n - 1, where n refers to the\n total number of categories for the given feature. This can, for\n instance, be achieved with the help of OrdinalEncoder.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit Naive Bayes classifier according to X, y\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features. Here, each feature of X is\n assumed to be from a different categorical distribution.\n It is further assumed that all categories of each feature are\n represented by the numbers 0, ..., n - 1, where n refers to the\n total number of categories for the given feature. This can, for\n instance, be achieved with the help of OrdinalEncoder.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n sample_weight : array-like of shape (n_samples), default=None\n Weights applied to individual samples (1. for unweighted).\n\n Returns\n -------\n self : object\n \"\"\"\n return super().fit(X, y, sample_weight=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/partial_fit", + "name": "partial_fit", + "qname": "sklearn.naive_bayes.CategoricalNB.partial_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/partial_fit/self", + "name": "self", + "qname": "sklearn.naive_bayes.CategoricalNB.partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/partial_fit/X", + "name": "X", + "qname": "sklearn.naive_bayes.CategoricalNB.partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vectors, where n_samples is the number of samples and\nn_features is the number of features. Here, each feature of X is\nassumed to be from a different categorical distribution.\nIt is further assumed that all categories of each feature are\nrepresented by the numbers 0, ..., n - 1, where n refers to the\ntotal number of categories for the given feature. This can, for\ninstance, be achieved with the help of OrdinalEncoder." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/partial_fit/y", + "name": "y", + "qname": "sklearn.naive_bayes.CategoricalNB.partial_fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/partial_fit/classes", + "name": "classes", + "qname": "sklearn.naive_bayes.CategoricalNB.partial_fit.classes", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_classes)", + "default_value": "None", + "description": "List of all the classes that can possibly appear in the y vector.\n\nMust be provided at the first call to partial_fit, can be omitted\nin subsequent calls." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_classes)" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/CategoricalNB/partial_fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.naive_bayes.CategoricalNB.partial_fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples)", + "default_value": "None", + "description": "Weights applied to individual samples (1. for unweighted)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Incremental fit on a batch of samples.\n\nThis method is expected to be called several times consecutively\non different chunks of a dataset so as to implement out-of-core\nor online learning.\n\nThis is especially useful when the whole dataset is too big to fit in\nmemory at once.\n\nThis method has some performance overhead hence it is better to call\npartial_fit on chunks of data that are as large as possible\n(as long as fitting in the memory budget) to hide the overhead.", + "docstring": "Incremental fit on a batch of samples.\n\nThis method is expected to be called several times consecutively\non different chunks of a dataset so as to implement out-of-core\nor online learning.\n\nThis is especially useful when the whole dataset is too big to fit in\nmemory at once.\n\nThis method has some performance overhead hence it is better to call\npartial_fit on chunks of data that are as large as possible\n(as long as fitting in the memory budget) to hide the overhead.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features. Here, each feature of X is\n assumed to be from a different categorical distribution.\n It is further assumed that all categories of each feature are\n represented by the numbers 0, ..., n - 1, where n refers to the\n total number of categories for the given feature. This can, for\n instance, be achieved with the help of OrdinalEncoder.\n\ny : array-like of shape (n_samples)\n Target values.\n\nclasses : array-like of shape (n_classes), default=None\n List of all the classes that can possibly appear in the y vector.\n\n Must be provided at the first call to partial_fit, can be omitted\n in subsequent calls.\n\nsample_weight : array-like of shape (n_samples), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : object", + "code": " def partial_fit(self, X, y, classes=None, sample_weight=None):\n \"\"\"Incremental fit on a batch of samples.\n\n This method is expected to be called several times consecutively\n on different chunks of a dataset so as to implement out-of-core\n or online learning.\n\n This is especially useful when the whole dataset is too big to fit in\n memory at once.\n\n This method has some performance overhead hence it is better to call\n partial_fit on chunks of data that are as large as possible\n (as long as fitting in the memory budget) to hide the overhead.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features. Here, each feature of X is\n assumed to be from a different categorical distribution.\n It is further assumed that all categories of each feature are\n represented by the numbers 0, ..., n - 1, where n refers to the\n total number of categories for the given feature. This can, for\n instance, be achieved with the help of OrdinalEncoder.\n\n y : array-like of shape (n_samples)\n Target values.\n\n classes : array-like of shape (n_classes), default=None\n List of all the classes that can possibly appear in the y vector.\n\n Must be provided at the first call to partial_fit, can be omitted\n in subsequent calls.\n\n sample_weight : array-like of shape (n_samples), default=None\n Weights applied to individual samples (1. for unweighted).\n\n Returns\n -------\n self : object\n \"\"\"\n return super().partial_fit(X, y, classes,\n sample_weight=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/ComplementNB/__init__", + "name": "__init__", + "qname": "sklearn.naive_bayes.ComplementNB.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/ComplementNB/__init__/self", + "name": "self", + "qname": "sklearn.naive_bayes.ComplementNB.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/ComplementNB/__init__/alpha", + "name": "alpha", + "qname": "sklearn.naive_bayes.ComplementNB.__init__.alpha", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/ComplementNB/__init__/fit_prior", + "name": "fit_prior", + "qname": "sklearn.naive_bayes.ComplementNB.__init__.fit_prior", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Only used in edge case with a single class in the training set." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/ComplementNB/__init__/class_prior", + "name": "class_prior", + "qname": "sklearn.naive_bayes.ComplementNB.__init__.class_prior", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_classes,)", + "default_value": "None", + "description": "Prior probabilities of the classes. Not used." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_classes,)" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/ComplementNB/__init__/norm", + "name": "norm", + "qname": "sklearn.naive_bayes.ComplementNB.__init__.norm", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether or not a second normalization of the weights is performed. The\ndefault behavior mirrors the implementations found in Mahout and Weka,\nwhich do not follow the full algorithm described in Table 9 of the\npaper." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "The Complement Naive Bayes classifier described in Rennie et al. (2003).\n\nThe Complement Naive Bayes classifier was designed to correct the \"severe\nassumptions\" made by the standard Multinomial Naive Bayes classifier. It is\nparticularly suited for imbalanced data sets.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, alpha=1.0, fit_prior=True, class_prior=None,\n norm=False):\n self.alpha = alpha\n self.fit_prior = fit_prior\n self.class_prior = class_prior\n self.norm = norm" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/ComplementNB/_count", + "name": "_count", + "qname": "sklearn.naive_bayes.ComplementNB._count", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/ComplementNB/_count/self", + "name": "self", + "qname": "sklearn.naive_bayes.ComplementNB._count.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/ComplementNB/_count/X", + "name": "X", + "qname": "sklearn.naive_bayes.ComplementNB._count.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/ComplementNB/_count/Y", + "name": "Y", + "qname": "sklearn.naive_bayes.ComplementNB._count.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Count feature occurrences.", + "docstring": "Count feature occurrences.", + "code": " def _count(self, X, Y):\n \"\"\"Count feature occurrences.\"\"\"\n check_non_negative(X, \"ComplementNB (input X)\")\n self.feature_count_ += safe_sparse_dot(Y.T, X)\n self.class_count_ += Y.sum(axis=0)\n self.feature_all_ = self.feature_count_.sum(axis=0)" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/ComplementNB/_joint_log_likelihood", + "name": "_joint_log_likelihood", + "qname": "sklearn.naive_bayes.ComplementNB._joint_log_likelihood", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/ComplementNB/_joint_log_likelihood/self", + "name": "self", + "qname": "sklearn.naive_bayes.ComplementNB._joint_log_likelihood.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/ComplementNB/_joint_log_likelihood/X", + "name": "X", + "qname": "sklearn.naive_bayes.ComplementNB._joint_log_likelihood.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Calculate the class scores for the samples in X.", + "docstring": "Calculate the class scores for the samples in X.", + "code": " def _joint_log_likelihood(self, X):\n \"\"\"Calculate the class scores for the samples in X.\"\"\"\n jll = safe_sparse_dot(X, self.feature_log_prob_.T)\n if len(self.classes_) == 1:\n jll += self.class_log_prior_\n return jll" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/ComplementNB/_more_tags", + "name": "_more_tags", + "qname": "sklearn.naive_bayes.ComplementNB._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/ComplementNB/_more_tags/self", + "name": "self", + "qname": "sklearn.naive_bayes.ComplementNB._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'requires_positive_X': True}" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/ComplementNB/_update_feature_log_prob", + "name": "_update_feature_log_prob", + "qname": "sklearn.naive_bayes.ComplementNB._update_feature_log_prob", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/ComplementNB/_update_feature_log_prob/self", + "name": "self", + "qname": "sklearn.naive_bayes.ComplementNB._update_feature_log_prob.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/ComplementNB/_update_feature_log_prob/alpha", + "name": "alpha", + "qname": "sklearn.naive_bayes.ComplementNB._update_feature_log_prob.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Apply smoothing to raw counts and compute the weights.", + "docstring": "Apply smoothing to raw counts and compute the weights.", + "code": " def _update_feature_log_prob(self, alpha):\n \"\"\"Apply smoothing to raw counts and compute the weights.\"\"\"\n comp_count = self.feature_all_ + alpha - self.feature_count_\n logged = np.log(comp_count / comp_count.sum(axis=1, keepdims=True))\n # _BaseNB.predict uses argmax, but ComplementNB operates with argmin.\n if self.norm:\n summed = logged.sum(axis=1, keepdims=True)\n feature_log_prob = logged / summed\n else:\n feature_log_prob = -logged\n self.feature_log_prob_ = feature_log_prob" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/__init__", + "name": "__init__", + "qname": "sklearn.naive_bayes.GaussianNB.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/__init__/self", + "name": "self", + "qname": "sklearn.naive_bayes.GaussianNB.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/__init__/priors", + "name": "priors", + "qname": "sklearn.naive_bayes.GaussianNB.__init__.priors", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_classes,)", + "default_value": "", + "description": "Prior probabilities of the classes. If specified the priors are not\nadjusted according to the data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_classes,)" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/__init__/var_smoothing", + "name": "var_smoothing", + "qname": "sklearn.naive_bayes.GaussianNB.__init__.var_smoothing", + "default_value": "1e-09", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "1e-9", + "description": "Portion of the largest variance of all features that is added to\nvariances for calculation stability.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Gaussian Naive Bayes (GaussianNB)\n\nCan perform online updates to model parameters via :meth:`partial_fit`.\nFor details on algorithm used to update feature means and variance online,\nsee Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\n http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, priors=None, var_smoothing=1e-9):\n self.priors = priors\n self.var_smoothing = var_smoothing" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/_check_X", + "name": "_check_X", + "qname": "sklearn.naive_bayes.GaussianNB._check_X", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/_check_X/self", + "name": "self", + "qname": "sklearn.naive_bayes.GaussianNB._check_X.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/_check_X/X", + "name": "X", + "qname": "sklearn.naive_bayes.GaussianNB._check_X.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_X(self, X):\n return check_array(X)" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/_joint_log_likelihood", + "name": "_joint_log_likelihood", + "qname": "sklearn.naive_bayes.GaussianNB._joint_log_likelihood", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/_joint_log_likelihood/self", + "name": "self", + "qname": "sklearn.naive_bayes.GaussianNB._joint_log_likelihood.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/_joint_log_likelihood/X", + "name": "X", + "qname": "sklearn.naive_bayes.GaussianNB._joint_log_likelihood.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _joint_log_likelihood(self, X):\n joint_log_likelihood = []\n for i in range(np.size(self.classes_)):\n jointi = np.log(self.class_prior_[i])\n n_ij = - 0.5 * np.sum(np.log(2. * np.pi * self.sigma_[i, :]))\n n_ij -= 0.5 * np.sum(((X - self.theta_[i, :]) ** 2) /\n (self.sigma_[i, :]), 1)\n joint_log_likelihood.append(jointi + n_ij)\n\n joint_log_likelihood = np.array(joint_log_likelihood).T\n return joint_log_likelihood" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/_partial_fit", + "name": "_partial_fit", + "qname": "sklearn.naive_bayes.GaussianNB._partial_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/_partial_fit/self", + "name": "self", + "qname": "sklearn.naive_bayes.GaussianNB._partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/_partial_fit/X", + "name": "X", + "qname": "sklearn.naive_bayes.GaussianNB._partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vectors, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/_partial_fit/y", + "name": "y", + "qname": "sklearn.naive_bayes.GaussianNB._partial_fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/_partial_fit/classes", + "name": "classes", + "qname": "sklearn.naive_bayes.GaussianNB._partial_fit.classes", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_classes,)", + "default_value": "None", + "description": "List of all the classes that can possibly appear in the y vector.\n\nMust be provided at the first call to partial_fit, can be omitted\nin subsequent calls." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_classes,)" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/_partial_fit/_refit", + "name": "_refit", + "qname": "sklearn.naive_bayes.GaussianNB._partial_fit._refit", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If true, act as though this were the first time we called\n_partial_fit (ie, throw away any past fitting and start over)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/_partial_fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.naive_bayes.GaussianNB._partial_fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Weights applied to individual samples (1. for unweighted)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Actual implementation of Gaussian NB fitting.", + "docstring": "Actual implementation of Gaussian NB fitting.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nclasses : array-like of shape (n_classes,), default=None\n List of all the classes that can possibly appear in the y vector.\n\n Must be provided at the first call to partial_fit, can be omitted\n in subsequent calls.\n\n_refit : bool, default=False\n If true, act as though this were the first time we called\n _partial_fit (ie, throw away any past fitting and start over).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : object", + "code": " def _partial_fit(self, X, y, classes=None, _refit=False,\n sample_weight=None):\n \"\"\"Actual implementation of Gaussian NB fitting.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n classes : array-like of shape (n_classes,), default=None\n List of all the classes that can possibly appear in the y vector.\n\n Must be provided at the first call to partial_fit, can be omitted\n in subsequent calls.\n\n _refit : bool, default=False\n If true, act as though this were the first time we called\n _partial_fit (ie, throw away any past fitting and start over).\n\n sample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\n Returns\n -------\n self : object\n \"\"\"\n X, y = check_X_y(X, y)\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X)\n\n # If the ratio of data variance between dimensions is too small, it\n # will cause numerical errors. To address this, we artificially\n # boost the variance by epsilon, a small fraction of the standard\n # deviation of the largest dimension.\n self.epsilon_ = self.var_smoothing * np.var(X, axis=0).max()\n\n if _refit:\n self.classes_ = None\n\n if _check_partial_fit_first_call(self, classes):\n # This is the first call to partial_fit:\n # initialize various cumulative counters\n n_features = X.shape[1]\n n_classes = len(self.classes_)\n self.theta_ = np.zeros((n_classes, n_features))\n self.sigma_ = np.zeros((n_classes, n_features))\n\n self.class_count_ = np.zeros(n_classes, dtype=np.float64)\n\n # Initialise the class prior\n # Take into account the priors\n if self.priors is not None:\n priors = np.asarray(self.priors)\n # Check that the provide prior match the number of classes\n if len(priors) != n_classes:\n raise ValueError('Number of priors must match number of'\n ' classes.')\n # Check that the sum is 1\n if not np.isclose(priors.sum(), 1.0):\n raise ValueError('The sum of the priors should be 1.')\n # Check that the prior are non-negative\n if (priors < 0).any():\n raise ValueError('Priors must be non-negative.')\n self.class_prior_ = priors\n else:\n # Initialize the priors to zeros for each class\n self.class_prior_ = np.zeros(len(self.classes_),\n dtype=np.float64)\n else:\n if X.shape[1] != self.theta_.shape[1]:\n msg = \"Number of features %d does not match previous data %d.\"\n raise ValueError(msg % (X.shape[1], self.theta_.shape[1]))\n # Put epsilon back in each time\n self.sigma_[:, :] -= self.epsilon_\n\n classes = self.classes_\n\n unique_y = np.unique(y)\n unique_y_in_classes = np.in1d(unique_y, classes)\n\n if not np.all(unique_y_in_classes):\n raise ValueError(\"The target label(s) %s in y do not exist in the \"\n \"initial classes %s\" %\n (unique_y[~unique_y_in_classes], classes))\n\n for y_i in unique_y:\n i = classes.searchsorted(y_i)\n X_i = X[y == y_i, :]\n\n if sample_weight is not None:\n sw_i = sample_weight[y == y_i]\n N_i = sw_i.sum()\n else:\n sw_i = None\n N_i = X_i.shape[0]\n\n new_theta, new_sigma = self._update_mean_variance(\n self.class_count_[i], self.theta_[i, :], self.sigma_[i, :],\n X_i, sw_i)\n\n self.theta_[i, :] = new_theta\n self.sigma_[i, :] = new_sigma\n self.class_count_[i] += N_i\n\n self.sigma_[:, :] += self.epsilon_\n\n # Update if only no priors is provided\n if self.priors is None:\n # Empirical prior, with sample_weight taken into account\n self.class_prior_ = self.class_count_ / self.class_count_.sum()\n\n return self" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/_update_mean_variance", + "name": "_update_mean_variance", + "qname": "sklearn.naive_bayes.GaussianNB._update_mean_variance", + "decorators": ["staticmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/_update_mean_variance/n_past", + "name": "n_past", + "qname": "sklearn.naive_bayes.GaussianNB._update_mean_variance.n_past", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of samples represented in old mean and variance. If sample\nweights were given, this should contain the sum of sample\nweights represented in old mean and variance." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/_update_mean_variance/mu", + "name": "mu", + "qname": "sklearn.naive_bayes.GaussianNB._update_mean_variance.mu", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (number of Gaussians,)", + "default_value": "", + "description": "Means for Gaussians in original set." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (number of Gaussians,)" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/_update_mean_variance/var", + "name": "var", + "qname": "sklearn.naive_bayes.GaussianNB._update_mean_variance.var", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (number of Gaussians,)", + "default_value": "", + "description": "Variances for Gaussians in original set." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (number of Gaussians,)" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/_update_mean_variance/X", + "name": "X", + "qname": "sklearn.naive_bayes.GaussianNB._update_mean_variance.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/_update_mean_variance/sample_weight", + "name": "sample_weight", + "qname": "sklearn.naive_bayes.GaussianNB._update_mean_variance.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Weights applied to individual samples (1. for unweighted)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute online update of Gaussian mean and variance.\n\nGiven starting sample count, mean, and variance, a new set of\npoints X, and optionally sample weights, return the updated mean and\nvariance. (NB - each dimension (column) in X is treated as independent\n-- you get variance, not covariance).\n\nCan take scalar mean and variance, or vector mean and variance to\nsimultaneously update a number of independent Gaussians.\n\nSee Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\nhttp://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf", + "docstring": "Compute online update of Gaussian mean and variance.\n\nGiven starting sample count, mean, and variance, a new set of\npoints X, and optionally sample weights, return the updated mean and\nvariance. (NB - each dimension (column) in X is treated as independent\n-- you get variance, not covariance).\n\nCan take scalar mean and variance, or vector mean and variance to\nsimultaneously update a number of independent Gaussians.\n\nSee Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\nhttp://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\nParameters\n----------\nn_past : int\n Number of samples represented in old mean and variance. If sample\n weights were given, this should contain the sum of sample\n weights represented in old mean and variance.\n\nmu : array-like of shape (number of Gaussians,)\n Means for Gaussians in original set.\n\nvar : array-like of shape (number of Gaussians,)\n Variances for Gaussians in original set.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\ntotal_mu : array-like of shape (number of Gaussians,)\n Updated mean for each Gaussian over the combined set.\n\ntotal_var : array-like of shape (number of Gaussians,)\n Updated variance for each Gaussian over the combined set.", + "code": " @staticmethod\n def _update_mean_variance(n_past, mu, var, X, sample_weight=None):\n \"\"\"Compute online update of Gaussian mean and variance.\n\n Given starting sample count, mean, and variance, a new set of\n points X, and optionally sample weights, return the updated mean and\n variance. (NB - each dimension (column) in X is treated as independent\n -- you get variance, not covariance).\n\n Can take scalar mean and variance, or vector mean and variance to\n simultaneously update a number of independent Gaussians.\n\n See Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\n http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\n Parameters\n ----------\n n_past : int\n Number of samples represented in old mean and variance. If sample\n weights were given, this should contain the sum of sample\n weights represented in old mean and variance.\n\n mu : array-like of shape (number of Gaussians,)\n Means for Gaussians in original set.\n\n var : array-like of shape (number of Gaussians,)\n Variances for Gaussians in original set.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\n Returns\n -------\n total_mu : array-like of shape (number of Gaussians,)\n Updated mean for each Gaussian over the combined set.\n\n total_var : array-like of shape (number of Gaussians,)\n Updated variance for each Gaussian over the combined set.\n \"\"\"\n if X.shape[0] == 0:\n return mu, var\n\n # Compute (potentially weighted) mean and variance of new datapoints\n if sample_weight is not None:\n n_new = float(sample_weight.sum())\n new_mu = np.average(X, axis=0, weights=sample_weight)\n new_var = np.average((X - new_mu) ** 2, axis=0,\n weights=sample_weight)\n else:\n n_new = X.shape[0]\n new_var = np.var(X, axis=0)\n new_mu = np.mean(X, axis=0)\n\n if n_past == 0:\n return new_mu, new_var\n\n n_total = float(n_past + n_new)\n\n # Combine mean of old and new data, taking into consideration\n # (weighted) number of observations\n total_mu = (n_new * new_mu + n_past * mu) / n_total\n\n # Combine variance of old and new data, taking into consideration\n # (weighted) number of observations. This is achieved by combining\n # the sum-of-squared-differences (ssd)\n old_ssd = n_past * var\n new_ssd = n_new * new_var\n total_ssd = (old_ssd + new_ssd +\n (n_new * n_past / n_total) * (mu - new_mu) ** 2)\n total_var = total_ssd / n_total\n\n return total_mu, total_var" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/fit", + "name": "fit", + "qname": "sklearn.naive_bayes.GaussianNB.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/fit/self", + "name": "self", + "qname": "sklearn.naive_bayes.GaussianNB.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/fit/X", + "name": "X", + "qname": "sklearn.naive_bayes.GaussianNB.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vectors, where n_samples is the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/fit/y", + "name": "y", + "qname": "sklearn.naive_bayes.GaussianNB.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.naive_bayes.GaussianNB.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Weights applied to individual samples (1. for unweighted).\n\n.. versionadded:: 0.17\n Gaussian Naive Bayes supports fitting with *sample_weight*." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fit Gaussian Naive Bayes according to X, y", + "docstring": "Fit Gaussian Naive Bayes according to X, y\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\n .. versionadded:: 0.17\n Gaussian Naive Bayes supports fitting with *sample_weight*.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit Gaussian Naive Bayes according to X, y\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples\n and n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\n .. versionadded:: 0.17\n Gaussian Naive Bayes supports fitting with *sample_weight*.\n\n Returns\n -------\n self : object\n \"\"\"\n X, y = self._validate_data(X, y)\n y = column_or_1d(y, warn=True)\n return self._partial_fit(X, y, np.unique(y), _refit=True,\n sample_weight=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/partial_fit", + "name": "partial_fit", + "qname": "sklearn.naive_bayes.GaussianNB.partial_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/partial_fit/self", + "name": "self", + "qname": "sklearn.naive_bayes.GaussianNB.partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/partial_fit/X", + "name": "X", + "qname": "sklearn.naive_bayes.GaussianNB.partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vectors, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/partial_fit/y", + "name": "y", + "qname": "sklearn.naive_bayes.GaussianNB.partial_fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/partial_fit/classes", + "name": "classes", + "qname": "sklearn.naive_bayes.GaussianNB.partial_fit.classes", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_classes,)", + "default_value": "None", + "description": "List of all the classes that can possibly appear in the y vector.\n\nMust be provided at the first call to partial_fit, can be omitted\nin subsequent calls." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_classes,)" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/GaussianNB/partial_fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.naive_bayes.GaussianNB.partial_fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Weights applied to individual samples (1. for unweighted).\n\n.. versionadded:: 0.17" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Incremental fit on a batch of samples.\n\nThis method is expected to be called several times consecutively\non different chunks of a dataset so as to implement out-of-core\nor online learning.\n\nThis is especially useful when the whole dataset is too big to fit in\nmemory at once.\n\nThis method has some performance and numerical stability overhead,\nhence it is better to call partial_fit on chunks of data that are\nas large as possible (as long as fitting in the memory budget) to\nhide the overhead.", + "docstring": "Incremental fit on a batch of samples.\n\nThis method is expected to be called several times consecutively\non different chunks of a dataset so as to implement out-of-core\nor online learning.\n\nThis is especially useful when the whole dataset is too big to fit in\nmemory at once.\n\nThis method has some performance and numerical stability overhead,\nhence it is better to call partial_fit on chunks of data that are\nas large as possible (as long as fitting in the memory budget) to\nhide the overhead.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nclasses : array-like of shape (n_classes,), default=None\n List of all the classes that can possibly appear in the y vector.\n\n Must be provided at the first call to partial_fit, can be omitted\n in subsequent calls.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\n .. versionadded:: 0.17\n\nReturns\n-------\nself : object", + "code": " def partial_fit(self, X, y, classes=None, sample_weight=None):\n \"\"\"Incremental fit on a batch of samples.\n\n This method is expected to be called several times consecutively\n on different chunks of a dataset so as to implement out-of-core\n or online learning.\n\n This is especially useful when the whole dataset is too big to fit in\n memory at once.\n\n This method has some performance and numerical stability overhead,\n hence it is better to call partial_fit on chunks of data that are\n as large as possible (as long as fitting in the memory budget) to\n hide the overhead.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n classes : array-like of shape (n_classes,), default=None\n List of all the classes that can possibly appear in the y vector.\n\n Must be provided at the first call to partial_fit, can be omitted\n in subsequent calls.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\n .. versionadded:: 0.17\n\n Returns\n -------\n self : object\n \"\"\"\n return self._partial_fit(X, y, classes, _refit=False,\n sample_weight=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/MultinomialNB/__init__", + "name": "__init__", + "qname": "sklearn.naive_bayes.MultinomialNB.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/MultinomialNB/__init__/self", + "name": "self", + "qname": "sklearn.naive_bayes.MultinomialNB.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/MultinomialNB/__init__/alpha", + "name": "alpha", + "qname": "sklearn.naive_bayes.MultinomialNB.__init__.alpha", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Additive (Laplace/Lidstone) smoothing parameter\n(0 for no smoothing)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/MultinomialNB/__init__/fit_prior", + "name": "fit_prior", + "qname": "sklearn.naive_bayes.MultinomialNB.__init__.fit_prior", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to learn class prior probabilities or not.\nIf false, a uniform prior will be used." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/MultinomialNB/__init__/class_prior", + "name": "class_prior", + "qname": "sklearn.naive_bayes.MultinomialNB.__init__.class_prior", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_classes,)", + "default_value": "None", + "description": "Prior probabilities of the classes. If specified the priors are not\nadjusted according to the data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_classes,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Naive Bayes classifier for multinomial models\n\nThe multinomial Naive Bayes classifier is suitable for classification with\ndiscrete features (e.g., word counts for text classification). The\nmultinomial distribution normally requires integer feature counts. However,\nin practice, fractional counts such as tf-idf may also work.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, alpha=1.0, fit_prior=True, class_prior=None):\n self.alpha = alpha\n self.fit_prior = fit_prior\n self.class_prior = class_prior" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/MultinomialNB/_count", + "name": "_count", + "qname": "sklearn.naive_bayes.MultinomialNB._count", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/MultinomialNB/_count/self", + "name": "self", + "qname": "sklearn.naive_bayes.MultinomialNB._count.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/MultinomialNB/_count/X", + "name": "X", + "qname": "sklearn.naive_bayes.MultinomialNB._count.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/MultinomialNB/_count/Y", + "name": "Y", + "qname": "sklearn.naive_bayes.MultinomialNB._count.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Count and smooth feature occurrences.", + "docstring": "Count and smooth feature occurrences.", + "code": " def _count(self, X, Y):\n \"\"\"Count and smooth feature occurrences.\"\"\"\n check_non_negative(X, \"MultinomialNB (input X)\")\n self.feature_count_ += safe_sparse_dot(Y.T, X)\n self.class_count_ += Y.sum(axis=0)" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/MultinomialNB/_joint_log_likelihood", + "name": "_joint_log_likelihood", + "qname": "sklearn.naive_bayes.MultinomialNB._joint_log_likelihood", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/MultinomialNB/_joint_log_likelihood/self", + "name": "self", + "qname": "sklearn.naive_bayes.MultinomialNB._joint_log_likelihood.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/MultinomialNB/_joint_log_likelihood/X", + "name": "X", + "qname": "sklearn.naive_bayes.MultinomialNB._joint_log_likelihood.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Calculate the posterior log probability of the samples X", + "docstring": "Calculate the posterior log probability of the samples X", + "code": " def _joint_log_likelihood(self, X):\n \"\"\"Calculate the posterior log probability of the samples X\"\"\"\n return (safe_sparse_dot(X, self.feature_log_prob_.T) +\n self.class_log_prior_)" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/MultinomialNB/_more_tags", + "name": "_more_tags", + "qname": "sklearn.naive_bayes.MultinomialNB._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/MultinomialNB/_more_tags/self", + "name": "self", + "qname": "sklearn.naive_bayes.MultinomialNB._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'requires_positive_X': True}" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/MultinomialNB/_update_feature_log_prob", + "name": "_update_feature_log_prob", + "qname": "sklearn.naive_bayes.MultinomialNB._update_feature_log_prob", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/MultinomialNB/_update_feature_log_prob/self", + "name": "self", + "qname": "sklearn.naive_bayes.MultinomialNB._update_feature_log_prob.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/MultinomialNB/_update_feature_log_prob/alpha", + "name": "alpha", + "qname": "sklearn.naive_bayes.MultinomialNB._update_feature_log_prob.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Apply smoothing to raw counts and recompute log probabilities", + "docstring": "Apply smoothing to raw counts and recompute log probabilities", + "code": " def _update_feature_log_prob(self, alpha):\n \"\"\"Apply smoothing to raw counts and recompute log probabilities\"\"\"\n smoothed_fc = self.feature_count_ + alpha\n smoothed_cc = smoothed_fc.sum(axis=1)\n\n self.feature_log_prob_ = (np.log(smoothed_fc) -\n np.log(smoothed_cc.reshape(-1, 1)))" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/_check_X", + "name": "_check_X", + "qname": "sklearn.naive_bayes._BaseDiscreteNB._check_X", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/_check_X/self", + "name": "self", + "qname": "sklearn.naive_bayes._BaseDiscreteNB._check_X.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/_check_X/X", + "name": "X", + "qname": "sklearn.naive_bayes._BaseDiscreteNB._check_X.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_X(self, X):\n return check_array(X, accept_sparse='csr')" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/_check_X_y", + "name": "_check_X_y", + "qname": "sklearn.naive_bayes._BaseDiscreteNB._check_X_y", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/_check_X_y/self", + "name": "self", + "qname": "sklearn.naive_bayes._BaseDiscreteNB._check_X_y.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/_check_X_y/X", + "name": "X", + "qname": "sklearn.naive_bayes._BaseDiscreteNB._check_X_y.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/_check_X_y/y", + "name": "y", + "qname": "sklearn.naive_bayes._BaseDiscreteNB._check_X_y.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_X_y(self, X, y):\n return self._validate_data(X, y, accept_sparse='csr')" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/_check_alpha", + "name": "_check_alpha", + "qname": "sklearn.naive_bayes._BaseDiscreteNB._check_alpha", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/_check_alpha/self", + "name": "self", + "qname": "sklearn.naive_bayes._BaseDiscreteNB._check_alpha.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_alpha(self):\n if np.min(self.alpha) < 0:\n raise ValueError('Smoothing parameter alpha = %.1e. '\n 'alpha should be > 0.' % np.min(self.alpha))\n if isinstance(self.alpha, np.ndarray):\n if not self.alpha.shape[0] == self.n_features_:\n raise ValueError(\"alpha should be a scalar or a numpy array \"\n \"with shape [n_features]\")\n if np.min(self.alpha) < _ALPHA_MIN:\n warnings.warn('alpha too small will result in numeric errors, '\n 'setting alpha = %.1e' % _ALPHA_MIN)\n return np.maximum(self.alpha, _ALPHA_MIN)\n return self.alpha" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/_init_counters", + "name": "_init_counters", + "qname": "sklearn.naive_bayes._BaseDiscreteNB._init_counters", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/_init_counters/self", + "name": "self", + "qname": "sklearn.naive_bayes._BaseDiscreteNB._init_counters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/_init_counters/n_effective_classes", + "name": "n_effective_classes", + "qname": "sklearn.naive_bayes._BaseDiscreteNB._init_counters.n_effective_classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/_init_counters/n_features", + "name": "n_features", + "qname": "sklearn.naive_bayes._BaseDiscreteNB._init_counters.n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _init_counters(self, n_effective_classes, n_features):\n self.class_count_ = np.zeros(n_effective_classes, dtype=np.float64)\n self.feature_count_ = np.zeros((n_effective_classes, n_features),\n dtype=np.float64)" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/_more_tags", + "name": "_more_tags", + "qname": "sklearn.naive_bayes._BaseDiscreteNB._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/_more_tags/self", + "name": "self", + "qname": "sklearn.naive_bayes._BaseDiscreteNB._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'poor_score': True}" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/_update_class_log_prior", + "name": "_update_class_log_prior", + "qname": "sklearn.naive_bayes._BaseDiscreteNB._update_class_log_prior", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/_update_class_log_prior/self", + "name": "self", + "qname": "sklearn.naive_bayes._BaseDiscreteNB._update_class_log_prior.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/_update_class_log_prior/class_prior", + "name": "class_prior", + "qname": "sklearn.naive_bayes._BaseDiscreteNB._update_class_log_prior.class_prior", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _update_class_log_prior(self, class_prior=None):\n n_classes = len(self.classes_)\n if class_prior is not None:\n if len(class_prior) != n_classes:\n raise ValueError(\"Number of priors must match number of\"\n \" classes.\")\n self.class_log_prior_ = np.log(class_prior)\n elif self.fit_prior:\n with warnings.catch_warnings():\n # silence the warning when count is 0 because class was not yet\n # observed\n warnings.simplefilter(\"ignore\", RuntimeWarning)\n log_class_count = np.log(self.class_count_)\n\n # empirical prior, with sample_weight taken into account\n self.class_log_prior_ = (log_class_count -\n np.log(self.class_count_.sum()))\n else:\n self.class_log_prior_ = np.full(n_classes, -np.log(n_classes))" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/coef_@getter", + "name": "coef_", + "qname": "sklearn.naive_bayes._BaseDiscreteNB.coef_", + "decorators": [ + "deprecated('Attribute coef_ was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/coef_/self", + "name": "self", + "qname": "sklearn.naive_bayes._BaseDiscreteNB.coef_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated(\"Attribute coef_ was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def coef_(self):\n return (self.feature_log_prob_[1:]\n if len(self.classes_) == 2 else self.feature_log_prob_)" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/fit", + "name": "fit", + "qname": "sklearn.naive_bayes._BaseDiscreteNB.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/fit/self", + "name": "self", + "qname": "sklearn.naive_bayes._BaseDiscreteNB.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/fit/X", + "name": "X", + "qname": "sklearn.naive_bayes._BaseDiscreteNB.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vectors, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/fit/y", + "name": "y", + "qname": "sklearn.naive_bayes._BaseDiscreteNB.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.naive_bayes._BaseDiscreteNB.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Weights applied to individual samples (1. for unweighted)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit Naive Bayes classifier according to X, y", + "docstring": "Fit Naive Bayes classifier according to X, y\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit Naive Bayes classifier according to X, y\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\n Returns\n -------\n self : object\n \"\"\"\n X, y = self._check_X_y(X, y)\n _, n_features = X.shape\n self.n_features_ = n_features\n\n labelbin = LabelBinarizer()\n Y = labelbin.fit_transform(y)\n self.classes_ = labelbin.classes_\n if Y.shape[1] == 1:\n Y = np.concatenate((1 - Y, Y), axis=1)\n\n # LabelBinarizer().fit_transform() returns arrays with dtype=np.int64.\n # We convert it to np.float64 to support sample_weight consistently;\n # this means we also don't have to cast X to floating point\n if sample_weight is not None:\n Y = Y.astype(np.float64, copy=False)\n sample_weight = _check_sample_weight(sample_weight, X)\n sample_weight = np.atleast_2d(sample_weight)\n Y *= sample_weight.T\n\n class_prior = self.class_prior\n\n # Count raw events from data before updating the class log prior\n # and feature log probas\n n_effective_classes = Y.shape[1]\n\n self._init_counters(n_effective_classes, n_features)\n self._count(X, Y)\n alpha = self._check_alpha()\n self._update_feature_log_prob(alpha)\n self._update_class_log_prior(class_prior=class_prior)\n return self" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/intercept_@getter", + "name": "intercept_", + "qname": "sklearn.naive_bayes._BaseDiscreteNB.intercept_", + "decorators": [ + "deprecated('Attribute intercept_ was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/intercept_/self", + "name": "self", + "qname": "sklearn.naive_bayes._BaseDiscreteNB.intercept_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated(\"Attribute intercept_ was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def intercept_(self):\n return (self.class_log_prior_[1:]\n if len(self.classes_) == 2 else self.class_log_prior_)" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/partial_fit", + "name": "partial_fit", + "qname": "sklearn.naive_bayes._BaseDiscreteNB.partial_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/partial_fit/self", + "name": "self", + "qname": "sklearn.naive_bayes._BaseDiscreteNB.partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/partial_fit/X", + "name": "X", + "qname": "sklearn.naive_bayes._BaseDiscreteNB.partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vectors, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/partial_fit/y", + "name": "y", + "qname": "sklearn.naive_bayes._BaseDiscreteNB.partial_fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/partial_fit/classes", + "name": "classes", + "qname": "sklearn.naive_bayes._BaseDiscreteNB.partial_fit.classes", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_classes)", + "default_value": "None", + "description": "List of all the classes that can possibly appear in the y vector.\n\nMust be provided at the first call to partial_fit, can be omitted\nin subsequent calls." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_classes)" + } + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseDiscreteNB/partial_fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.naive_bayes._BaseDiscreteNB.partial_fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Weights applied to individual samples (1. for unweighted)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Incremental fit on a batch of samples.\n\nThis method is expected to be called several times consecutively\non different chunks of a dataset so as to implement out-of-core\nor online learning.\n\nThis is especially useful when the whole dataset is too big to fit in\nmemory at once.\n\nThis method has some performance overhead hence it is better to call\npartial_fit on chunks of data that are as large as possible\n(as long as fitting in the memory budget) to hide the overhead.", + "docstring": "Incremental fit on a batch of samples.\n\nThis method is expected to be called several times consecutively\non different chunks of a dataset so as to implement out-of-core\nor online learning.\n\nThis is especially useful when the whole dataset is too big to fit in\nmemory at once.\n\nThis method has some performance overhead hence it is better to call\npartial_fit on chunks of data that are as large as possible\n(as long as fitting in the memory budget) to hide the overhead.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nclasses : array-like of shape (n_classes), default=None\n List of all the classes that can possibly appear in the y vector.\n\n Must be provided at the first call to partial_fit, can be omitted\n in subsequent calls.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : object", + "code": " def partial_fit(self, X, y, classes=None, sample_weight=None):\n \"\"\"Incremental fit on a batch of samples.\n\n This method is expected to be called several times consecutively\n on different chunks of a dataset so as to implement out-of-core\n or online learning.\n\n This is especially useful when the whole dataset is too big to fit in\n memory at once.\n\n This method has some performance overhead hence it is better to call\n partial_fit on chunks of data that are as large as possible\n (as long as fitting in the memory budget) to hide the overhead.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target values.\n\n classes : array-like of shape (n_classes), default=None\n List of all the classes that can possibly appear in the y vector.\n\n Must be provided at the first call to partial_fit, can be omitted\n in subsequent calls.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\n Returns\n -------\n self : object\n \"\"\"\n X, y = self._check_X_y(X, y)\n _, n_features = X.shape\n\n if _check_partial_fit_first_call(self, classes):\n # This is the first call to partial_fit:\n # initialize various cumulative counters\n n_effective_classes = len(classes) if len(classes) > 1 else 2\n self._init_counters(n_effective_classes, n_features)\n self.n_features_ = n_features\n elif n_features != self.n_features_:\n msg = \"Number of features %d does not match previous data %d.\"\n raise ValueError(msg % (n_features, self.n_features_))\n\n Y = label_binarize(y, classes=self.classes_)\n if Y.shape[1] == 1:\n Y = np.concatenate((1 - Y, Y), axis=1)\n\n if X.shape[0] != Y.shape[0]:\n msg = \"X.shape[0]=%d and y.shape[0]=%d are incompatible.\"\n raise ValueError(msg % (X.shape[0], y.shape[0]))\n\n # label_binarize() returns arrays with dtype=np.int64.\n # We convert it to np.float64 to support sample_weight consistently\n Y = Y.astype(np.float64, copy=False)\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X)\n sample_weight = np.atleast_2d(sample_weight)\n Y *= sample_weight.T\n\n class_prior = self.class_prior\n\n # Count raw events from data before updating the class log prior\n # and feature log probas\n self._count(X, Y)\n\n # XXX: OPTIM: we could introduce a public finalization method to\n # be called by the user explicitly just once after several consecutive\n # calls to partial_fit and prior any call to predict[_[log_]proba]\n # to avoid computing the smooth log probas at each call to partial fit\n alpha = self._check_alpha()\n self._update_feature_log_prob(alpha)\n self._update_class_log_prior(class_prior=class_prior)\n return self" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseNB/_check_X", + "name": "_check_X", + "qname": "sklearn.naive_bayes._BaseNB._check_X", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseNB/_check_X/self", + "name": "self", + "qname": "sklearn.naive_bayes._BaseNB._check_X.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseNB/_check_X/X", + "name": "X", + "qname": "sklearn.naive_bayes._BaseNB._check_X.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "To be overridden in subclasses with the actual checks.", + "docstring": "To be overridden in subclasses with the actual checks.", + "code": " @abstractmethod\n def _check_X(self, X):\n \"\"\"To be overridden in subclasses with the actual checks.\"\"\"" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseNB/_joint_log_likelihood", + "name": "_joint_log_likelihood", + "qname": "sklearn.naive_bayes._BaseNB._joint_log_likelihood", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseNB/_joint_log_likelihood/self", + "name": "self", + "qname": "sklearn.naive_bayes._BaseNB._joint_log_likelihood.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseNB/_joint_log_likelihood/X", + "name": "X", + "qname": "sklearn.naive_bayes._BaseNB._joint_log_likelihood.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the unnormalized posterior log probability of X\n\nI.e. ``log P(c) + log P(x|c)`` for all rows x of X, as an array-like of\nshape (n_classes, n_samples).\n\nInput is passed to _joint_log_likelihood as-is by predict,\npredict_proba and predict_log_proba.", + "docstring": "Compute the unnormalized posterior log probability of X\n\nI.e. ``log P(c) + log P(x|c)`` for all rows x of X, as an array-like of\nshape (n_classes, n_samples).\n\nInput is passed to _joint_log_likelihood as-is by predict,\npredict_proba and predict_log_proba.", + "code": " @abstractmethod\n def _joint_log_likelihood(self, X):\n \"\"\"Compute the unnormalized posterior log probability of X\n\n I.e. ``log P(c) + log P(x|c)`` for all rows x of X, as an array-like of\n shape (n_classes, n_samples).\n\n Input is passed to _joint_log_likelihood as-is by predict,\n predict_proba and predict_log_proba.\n \"\"\"" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseNB/predict", + "name": "predict", + "qname": "sklearn.naive_bayes._BaseNB.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseNB/predict/self", + "name": "self", + "qname": "sklearn.naive_bayes._BaseNB.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseNB/predict/X", + "name": "X", + "qname": "sklearn.naive_bayes._BaseNB.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform classification on an array of test vectors X.", + "docstring": "Perform classification on an array of test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nC : ndarray of shape (n_samples,)\n Predicted target values for X", + "code": " def predict(self, X):\n \"\"\"\n Perform classification on an array of test vectors X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n C : ndarray of shape (n_samples,)\n Predicted target values for X\n \"\"\"\n check_is_fitted(self)\n X = self._check_X(X)\n jll = self._joint_log_likelihood(X)\n return self.classes_[np.argmax(jll, axis=1)]" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseNB/predict_log_proba", + "name": "predict_log_proba", + "qname": "sklearn.naive_bayes._BaseNB.predict_log_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseNB/predict_log_proba/self", + "name": "self", + "qname": "sklearn.naive_bayes._BaseNB.predict_log_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseNB/predict_log_proba/X", + "name": "X", + "qname": "sklearn.naive_bayes._BaseNB.predict_log_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return log-probability estimates for the test vector X.", + "docstring": "Return log-probability estimates for the test vector X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nC : array-like of shape (n_samples, n_classes)\n Returns the log-probability of the samples for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`.", + "code": " def predict_log_proba(self, X):\n \"\"\"\n Return log-probability estimates for the test vector X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n C : array-like of shape (n_samples, n_classes)\n Returns the log-probability of the samples for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`.\n \"\"\"\n check_is_fitted(self)\n X = self._check_X(X)\n jll = self._joint_log_likelihood(X)\n # normalize by P(x) = P(f_1, ..., f_n)\n log_prob_x = logsumexp(jll, axis=1)\n return jll - np.atleast_2d(log_prob_x).T" + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseNB/predict_proba", + "name": "predict_proba", + "qname": "sklearn.naive_bayes._BaseNB.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseNB/predict_proba/self", + "name": "self", + "qname": "sklearn.naive_bayes._BaseNB.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.naive_bayes/_BaseNB/predict_proba/X", + "name": "X", + "qname": "sklearn.naive_bayes._BaseNB.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return probability estimates for the test vector X.", + "docstring": "Return probability estimates for the test vector X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nC : array-like of shape (n_samples, n_classes)\n Returns the probability of the samples for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`.", + "code": " def predict_proba(self, X):\n \"\"\"\n Return probability estimates for the test vector X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n C : array-like of shape (n_samples, n_classes)\n Returns the probability of the samples for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`.\n \"\"\"\n return np.exp(self.predict_log_proba(X))" + }, + { + "id": "scikit-learn/sklearn.neighbors._base/KNeighborsMixin/_kneighbors_reduce_func", + "name": "_kneighbors_reduce_func", + "qname": "sklearn.neighbors._base.KNeighborsMixin._kneighbors_reduce_func", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._base/KNeighborsMixin/_kneighbors_reduce_func/self", + "name": "self", + "qname": "sklearn.neighbors._base.KNeighborsMixin._kneighbors_reduce_func.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._base/KNeighborsMixin/_kneighbors_reduce_func/dist", + "name": "dist", + "qname": "sklearn.neighbors._base.KNeighborsMixin._kneighbors_reduce_func.dist", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples_chunk, n_samples)", + "default_value": "", + "description": "The distance matrix." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_chunk, n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._base/KNeighborsMixin/_kneighbors_reduce_func/start", + "name": "start", + "qname": "sklearn.neighbors._base.KNeighborsMixin._kneighbors_reduce_func.start", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The index in X which the first row of dist corresponds to." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._base/KNeighborsMixin/_kneighbors_reduce_func/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.neighbors._base.KNeighborsMixin._kneighbors_reduce_func.n_neighbors", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of neighbors required for each sample." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._base/KNeighborsMixin/_kneighbors_reduce_func/return_distance", + "name": "return_distance", + "qname": "sklearn.neighbors._base.KNeighborsMixin._kneighbors_reduce_func.return_distance", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "Whether or not to return the distances." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Reduce a chunk of distances to the nearest neighbors\n\nCallback to :func:`sklearn.metrics.pairwise.pairwise_distances_chunked`", + "docstring": "Reduce a chunk of distances to the nearest neighbors\n\nCallback to :func:`sklearn.metrics.pairwise.pairwise_distances_chunked`\n\nParameters\n----------\ndist : ndarray of shape (n_samples_chunk, n_samples)\n The distance matrix.\n\nstart : int\n The index in X which the first row of dist corresponds to.\n\nn_neighbors : int\n Number of neighbors required for each sample.\n\nreturn_distance : bool\n Whether or not to return the distances.\n\nReturns\n-------\ndist : array of shape (n_samples_chunk, n_neighbors)\n Returned only if `return_distance=True`.\n\nneigh : array of shape (n_samples_chunk, n_neighbors)\n The neighbors indices.", + "code": " def _kneighbors_reduce_func(self, dist, start,\n n_neighbors, return_distance):\n \"\"\"Reduce a chunk of distances to the nearest neighbors\n\n Callback to :func:`sklearn.metrics.pairwise.pairwise_distances_chunked`\n\n Parameters\n ----------\n dist : ndarray of shape (n_samples_chunk, n_samples)\n The distance matrix.\n\n start : int\n The index in X which the first row of dist corresponds to.\n\n n_neighbors : int\n Number of neighbors required for each sample.\n\n return_distance : bool\n Whether or not to return the distances.\n\n Returns\n -------\n dist : array of shape (n_samples_chunk, n_neighbors)\n Returned only if `return_distance=True`.\n\n neigh : array of shape (n_samples_chunk, n_neighbors)\n The neighbors indices.\n \"\"\"\n sample_range = np.arange(dist.shape[0])[:, None]\n neigh_ind = np.argpartition(dist, n_neighbors - 1, axis=1)\n neigh_ind = neigh_ind[:, :n_neighbors]\n # argpartition doesn't guarantee sorted order, so we sort again\n neigh_ind = neigh_ind[\n sample_range, np.argsort(dist[sample_range, neigh_ind])]\n if return_distance:\n if self.effective_metric_ == 'euclidean':\n result = np.sqrt(dist[sample_range, neigh_ind]), neigh_ind\n else:\n result = dist[sample_range, neigh_ind], neigh_ind\n else:\n result = neigh_ind\n return result" + }, + { + "id": "scikit-learn/sklearn.neighbors._base/KNeighborsMixin/kneighbors", + "name": "kneighbors", + "qname": "sklearn.neighbors._base.KNeighborsMixin.kneighbors", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._base/KNeighborsMixin/kneighbors/self", + "name": "self", + "qname": "sklearn.neighbors._base.KNeighborsMixin.kneighbors.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._base/KNeighborsMixin/kneighbors/X", + "name": "X", + "qname": "sklearn.neighbors._base.KNeighborsMixin.kneighbors.X", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'", + "default_value": "None", + "description": "The query point or points.\nIf not provided, neighbors of each indexed point are returned.\nIn this case, the query point is not considered its own neighbor." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_queries, n_features)" + }, + { + "kind": "NamedType", + "name": "(n_queries, n_indexed) if metric == 'precomputed'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._base/KNeighborsMixin/kneighbors/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.neighbors._base.KNeighborsMixin.kneighbors.n_neighbors", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of neighbors required for each sample. The default is the\nvalue passed to the constructor." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._base/KNeighborsMixin/kneighbors/return_distance", + "name": "return_distance", + "qname": "sklearn.neighbors._base.KNeighborsMixin.kneighbors.return_distance", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether or not to return the distances." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Finds the K-neighbors of a point.\n\nReturns indices of and distances to the neighbors of each point.", + "docstring": "Finds the K-neighbors of a point.\n\nReturns indices of and distances to the neighbors of each point.\n\nParameters\n----------\nX : array-like, shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed', default=None\n The query point or points.\n If not provided, neighbors of each indexed point are returned.\n In this case, the query point is not considered its own neighbor.\n\nn_neighbors : int, default=None\n Number of neighbors required for each sample. The default is the\n value passed to the constructor.\n\nreturn_distance : bool, default=True\n Whether or not to return the distances.\n\nReturns\n-------\nneigh_dist : ndarray of shape (n_queries, n_neighbors)\n Array representing the lengths to points, only present if\n return_distance=True\n\nneigh_ind : ndarray of shape (n_queries, n_neighbors)\n Indices of the nearest points in the population matrix.\n\nExamples\n--------\nIn the following example, we construct a NearestNeighbors\nclass from an array representing our data set and ask who's\nthe closest point to [1,1,1]\n\n>>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n>>> from sklearn.neighbors import NearestNeighbors\n>>> neigh = NearestNeighbors(n_neighbors=1)\n>>> neigh.fit(samples)\nNearestNeighbors(n_neighbors=1)\n>>> print(neigh.kneighbors([[1., 1., 1.]]))\n(array([[0.5]]), array([[2]]))\n\nAs you can see, it returns [[0.5]], and [[2]], which means that the\nelement is at distance 0.5 and is the third element of samples\n(indexes start at 0). You can also query for multiple points:\n\n>>> X = [[0., 1., 0.], [1., 0., 1.]]\n>>> neigh.kneighbors(X, return_distance=False)\narray([[1],\n [2]]...)", + "code": " def kneighbors(self, X=None, n_neighbors=None, return_distance=True):\n \"\"\"Finds the K-neighbors of a point.\n\n Returns indices of and distances to the neighbors of each point.\n\n Parameters\n ----------\n X : array-like, shape (n_queries, n_features), \\\n or (n_queries, n_indexed) if metric == 'precomputed', \\\n default=None\n The query point or points.\n If not provided, neighbors of each indexed point are returned.\n In this case, the query point is not considered its own neighbor.\n\n n_neighbors : int, default=None\n Number of neighbors required for each sample. The default is the\n value passed to the constructor.\n\n return_distance : bool, default=True\n Whether or not to return the distances.\n\n Returns\n -------\n neigh_dist : ndarray of shape (n_queries, n_neighbors)\n Array representing the lengths to points, only present if\n return_distance=True\n\n neigh_ind : ndarray of shape (n_queries, n_neighbors)\n Indices of the nearest points in the population matrix.\n\n Examples\n --------\n In the following example, we construct a NearestNeighbors\n class from an array representing our data set and ask who's\n the closest point to [1,1,1]\n\n >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n >>> from sklearn.neighbors import NearestNeighbors\n >>> neigh = NearestNeighbors(n_neighbors=1)\n >>> neigh.fit(samples)\n NearestNeighbors(n_neighbors=1)\n >>> print(neigh.kneighbors([[1., 1., 1.]]))\n (array([[0.5]]), array([[2]]))\n\n As you can see, it returns [[0.5]], and [[2]], which means that the\n element is at distance 0.5 and is the third element of samples\n (indexes start at 0). You can also query for multiple points:\n\n >>> X = [[0., 1., 0.], [1., 0., 1.]]\n >>> neigh.kneighbors(X, return_distance=False)\n array([[1],\n [2]]...)\n \"\"\"\n check_is_fitted(self)\n\n if n_neighbors is None:\n n_neighbors = self.n_neighbors\n elif n_neighbors <= 0:\n raise ValueError(\n \"Expected n_neighbors > 0. Got %d\" %\n n_neighbors\n )\n else:\n if not isinstance(n_neighbors, numbers.Integral):\n raise TypeError(\n \"n_neighbors does not take %s value, \"\n \"enter integer value\" %\n type(n_neighbors))\n\n if X is not None:\n query_is_train = False\n if self.effective_metric_ == 'precomputed':\n X = _check_precomputed(X)\n else:\n X = check_array(X, accept_sparse='csr')\n else:\n query_is_train = True\n X = self._fit_X\n # Include an extra neighbor to account for the sample itself being\n # returned, which is removed later\n n_neighbors += 1\n\n n_samples_fit = self.n_samples_fit_\n if n_neighbors > n_samples_fit:\n raise ValueError(\n \"Expected n_neighbors <= n_samples, \"\n \" but n_samples = %d, n_neighbors = %d\" %\n (n_samples_fit, n_neighbors)\n )\n\n n_jobs = effective_n_jobs(self.n_jobs)\n chunked_results = None\n if (self._fit_method == 'brute' and\n self.effective_metric_ == 'precomputed' and issparse(X)):\n results = _kneighbors_from_graph(\n X, n_neighbors=n_neighbors,\n return_distance=return_distance)\n\n elif self._fit_method == 'brute':\n reduce_func = partial(self._kneighbors_reduce_func,\n n_neighbors=n_neighbors,\n return_distance=return_distance)\n\n # for efficiency, use squared euclidean distances\n if self.effective_metric_ == 'euclidean':\n kwds = {'squared': True}\n else:\n kwds = self.effective_metric_params_\n\n chunked_results = list(pairwise_distances_chunked(\n X, self._fit_X, reduce_func=reduce_func,\n metric=self.effective_metric_, n_jobs=n_jobs,\n **kwds))\n\n elif self._fit_method in ['ball_tree', 'kd_tree']:\n if issparse(X):\n raise ValueError(\n \"%s does not work with sparse matrices. Densify the data, \"\n \"or set algorithm='brute'\" % self._fit_method)\n old_joblib = (\n parse_version(joblib.__version__) < parse_version('0.12'))\n if old_joblib:\n # Deal with change of API in joblib\n parallel_kwargs = {\"backend\": \"threading\"}\n else:\n parallel_kwargs = {\"prefer\": \"threads\"}\n chunked_results = Parallel(n_jobs, **parallel_kwargs)(\n delayed(_tree_query_parallel_helper)(\n self._tree, X[s], n_neighbors, return_distance)\n for s in gen_even_slices(X.shape[0], n_jobs)\n )\n else:\n raise ValueError(\"internal: _fit_method not recognized\")\n\n if chunked_results is not None:\n if return_distance:\n neigh_dist, neigh_ind = zip(*chunked_results)\n results = np.vstack(neigh_dist), np.vstack(neigh_ind)\n else:\n results = np.vstack(chunked_results)\n\n if not query_is_train:\n return results\n else:\n # If the query data is the same as the indexed data, we would like\n # to ignore the first nearest neighbor of every sample, i.e\n # the sample itself.\n if return_distance:\n neigh_dist, neigh_ind = results\n else:\n neigh_ind = results\n\n n_queries, _ = X.shape\n sample_range = np.arange(n_queries)[:, None]\n sample_mask = neigh_ind != sample_range\n\n # Corner case: When the number of duplicates are more\n # than the number of neighbors, the first NN will not\n # be the sample, but a duplicate.\n # In that case mask the first duplicate.\n dup_gr_nbrs = np.all(sample_mask, axis=1)\n sample_mask[:, 0][dup_gr_nbrs] = False\n neigh_ind = np.reshape(\n neigh_ind[sample_mask], (n_queries, n_neighbors - 1))\n\n if return_distance:\n neigh_dist = np.reshape(\n neigh_dist[sample_mask], (n_queries, n_neighbors - 1))\n return neigh_dist, neigh_ind\n return neigh_ind" + }, + { + "id": "scikit-learn/sklearn.neighbors._base/KNeighborsMixin/kneighbors_graph", + "name": "kneighbors_graph", + "qname": "sklearn.neighbors._base.KNeighborsMixin.kneighbors_graph", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._base/KNeighborsMixin/kneighbors_graph/self", + "name": "self", + "qname": "sklearn.neighbors._base.KNeighborsMixin.kneighbors_graph.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._base/KNeighborsMixin/kneighbors_graph/X", + "name": "X", + "qname": "sklearn.neighbors._base.KNeighborsMixin.kneighbors_graph.X", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'", + "default_value": "None", + "description": "The query point or points.\nIf not provided, neighbors of each indexed point are returned.\nIn this case, the query point is not considered its own neighbor.\nFor ``metric='precomputed'`` the shape should be\n(n_queries, n_indexed). Otherwise the shape should be\n(n_queries, n_features)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_queries, n_features)" + }, + { + "kind": "NamedType", + "name": "(n_queries, n_indexed) if metric == 'precomputed'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._base/KNeighborsMixin/kneighbors_graph/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.neighbors._base.KNeighborsMixin.kneighbors_graph.n_neighbors", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of neighbors for each sample. The default is the value\npassed to the constructor." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._base/KNeighborsMixin/kneighbors_graph/mode", + "name": "mode", + "qname": "sklearn.neighbors._base.KNeighborsMixin.kneighbors_graph.mode", + "default_value": "'connectivity'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'connectivity', 'distance'}", + "default_value": "'connectivity'", + "description": "Type of returned matrix: 'connectivity' will return the\nconnectivity matrix with ones and zeros, in 'distance' the\nedges are Euclidean distance between points." + }, + "type": { + "kind": "EnumType", + "values": ["connectivity", "distance"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes the (weighted) graph of k-Neighbors for points in X", + "docstring": "Computes the (weighted) graph of k-Neighbors for points in X\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed', default=None\n The query point or points.\n If not provided, neighbors of each indexed point are returned.\n In this case, the query point is not considered its own neighbor.\n For ``metric='precomputed'`` the shape should be\n (n_queries, n_indexed). Otherwise the shape should be\n (n_queries, n_features).\n\nn_neighbors : int, default=None\n Number of neighbors for each sample. The default is the value\n passed to the constructor.\n\nmode : {'connectivity', 'distance'}, default='connectivity'\n Type of returned matrix: 'connectivity' will return the\n connectivity matrix with ones and zeros, in 'distance' the\n edges are Euclidean distance between points.\n\nReturns\n-------\nA : sparse-matrix of shape (n_queries, n_samples_fit)\n `n_samples_fit` is the number of samples in the fitted data\n `A[i, j]` is assigned the weight of edge that connects `i` to `j`.\n The matrix is of CSR format.\n\nExamples\n--------\n>>> X = [[0], [3], [1]]\n>>> from sklearn.neighbors import NearestNeighbors\n>>> neigh = NearestNeighbors(n_neighbors=2)\n>>> neigh.fit(X)\nNearestNeighbors(n_neighbors=2)\n>>> A = neigh.kneighbors_graph(X)\n>>> A.toarray()\narray([[1., 0., 1.],\n [0., 1., 1.],\n [1., 0., 1.]])\n\nSee Also\n--------\nNearestNeighbors.radius_neighbors_graph", + "code": " def kneighbors_graph(self, X=None, n_neighbors=None,\n mode='connectivity'):\n \"\"\"Computes the (weighted) graph of k-Neighbors for points in X\n\n Parameters\n ----------\n X : array-like of shape (n_queries, n_features), \\\n or (n_queries, n_indexed) if metric == 'precomputed', \\\n default=None\n The query point or points.\n If not provided, neighbors of each indexed point are returned.\n In this case, the query point is not considered its own neighbor.\n For ``metric='precomputed'`` the shape should be\n (n_queries, n_indexed). Otherwise the shape should be\n (n_queries, n_features).\n\n n_neighbors : int, default=None\n Number of neighbors for each sample. The default is the value\n passed to the constructor.\n\n mode : {'connectivity', 'distance'}, default='connectivity'\n Type of returned matrix: 'connectivity' will return the\n connectivity matrix with ones and zeros, in 'distance' the\n edges are Euclidean distance between points.\n\n Returns\n -------\n A : sparse-matrix of shape (n_queries, n_samples_fit)\n `n_samples_fit` is the number of samples in the fitted data\n `A[i, j]` is assigned the weight of edge that connects `i` to `j`.\n The matrix is of CSR format.\n\n Examples\n --------\n >>> X = [[0], [3], [1]]\n >>> from sklearn.neighbors import NearestNeighbors\n >>> neigh = NearestNeighbors(n_neighbors=2)\n >>> neigh.fit(X)\n NearestNeighbors(n_neighbors=2)\n >>> A = neigh.kneighbors_graph(X)\n >>> A.toarray()\n array([[1., 0., 1.],\n [0., 1., 1.],\n [1., 0., 1.]])\n\n See Also\n --------\n NearestNeighbors.radius_neighbors_graph\n \"\"\"\n check_is_fitted(self)\n if n_neighbors is None:\n n_neighbors = self.n_neighbors\n\n # check the input only in self.kneighbors\n\n # construct CSR matrix representation of the k-NN graph\n if mode == 'connectivity':\n A_ind = self.kneighbors(X, n_neighbors, return_distance=False)\n n_queries = A_ind.shape[0]\n A_data = np.ones(n_queries * n_neighbors)\n\n elif mode == 'distance':\n A_data, A_ind = self.kneighbors(\n X, n_neighbors, return_distance=True)\n A_data = np.ravel(A_data)\n\n else:\n raise ValueError(\n 'Unsupported mode, must be one of \"connectivity\" '\n 'or \"distance\" but got \"%s\" instead' % mode)\n\n n_queries = A_ind.shape[0]\n n_samples_fit = self.n_samples_fit_\n n_nonzero = n_queries * n_neighbors\n A_indptr = np.arange(0, n_nonzero + 1, n_neighbors)\n\n kneighbors_graph = csr_matrix((A_data, A_ind.ravel(), A_indptr),\n shape=(n_queries, n_samples_fit))\n\n return kneighbors_graph" + }, + { + "id": "scikit-learn/sklearn.neighbors._base/NeighborsBase/__init__", + "name": "__init__", + "qname": "sklearn.neighbors._base.NeighborsBase.__init__", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._base/NeighborsBase/__init__/self", + "name": "self", + "qname": "sklearn.neighbors._base.NeighborsBase.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._base/NeighborsBase/__init__/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.neighbors._base.NeighborsBase.__init__.n_neighbors", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._base/NeighborsBase/__init__/radius", + "name": "radius", + "qname": "sklearn.neighbors._base.NeighborsBase.__init__.radius", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._base/NeighborsBase/__init__/algorithm", + "name": "algorithm", + "qname": "sklearn.neighbors._base.NeighborsBase.__init__.algorithm", + "default_value": "'auto'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._base/NeighborsBase/__init__/leaf_size", + "name": "leaf_size", + "qname": "sklearn.neighbors._base.NeighborsBase.__init__.leaf_size", + "default_value": "30", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._base/NeighborsBase/__init__/metric", + "name": "metric", + "qname": "sklearn.neighbors._base.NeighborsBase.__init__.metric", + "default_value": "'minkowski'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._base/NeighborsBase/__init__/p", + "name": "p", + "qname": "sklearn.neighbors._base.NeighborsBase.__init__.p", + "default_value": "2", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._base/NeighborsBase/__init__/metric_params", + "name": "metric_params", + "qname": "sklearn.neighbors._base.NeighborsBase.__init__.metric_params", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._base/NeighborsBase/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.neighbors._base.NeighborsBase.__init__.n_jobs", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base class for nearest neighbors estimators.", + "docstring": "", + "code": " @abstractmethod\n def __init__(self, n_neighbors=None, radius=None,\n algorithm='auto', leaf_size=30, metric='minkowski',\n p=2, metric_params=None, n_jobs=None):\n\n self.n_neighbors = n_neighbors\n self.radius = radius\n self.algorithm = algorithm\n self.leaf_size = leaf_size\n self.metric = metric\n self.metric_params = metric_params\n self.p = p\n self.n_jobs = n_jobs\n self._check_algorithm_metric()" + }, + { + "id": "scikit-learn/sklearn.neighbors._base/NeighborsBase/_check_algorithm_metric", + "name": "_check_algorithm_metric", + "qname": "sklearn.neighbors._base.NeighborsBase._check_algorithm_metric", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._base/NeighborsBase/_check_algorithm_metric/self", + "name": "self", + "qname": "sklearn.neighbors._base.NeighborsBase._check_algorithm_metric.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_algorithm_metric(self):\n if self.algorithm not in ['auto', 'brute',\n 'kd_tree', 'ball_tree']:\n raise ValueError(\"unrecognized algorithm: '%s'\" % self.algorithm)\n\n if self.algorithm == 'auto':\n if self.metric == 'precomputed':\n alg_check = 'brute'\n elif (callable(self.metric) or\n self.metric in VALID_METRICS['ball_tree']):\n alg_check = 'ball_tree'\n else:\n alg_check = 'brute'\n else:\n alg_check = self.algorithm\n\n if callable(self.metric):\n if self.algorithm == 'kd_tree':\n # callable metric is only valid for brute force and ball_tree\n raise ValueError(\n \"kd_tree does not support callable metric '%s'\"\n \"Function call overhead will result\"\n \"in very poor performance.\"\n % self.metric)\n elif self.metric not in VALID_METRICS[alg_check]:\n raise ValueError(\"Metric '%s' not valid. Use \"\n \"sorted(sklearn.neighbors.VALID_METRICS['%s']) \"\n \"to get valid options. \"\n \"Metric can also be a callable function.\"\n % (self.metric, alg_check))\n\n if self.metric_params is not None and 'p' in self.metric_params:\n if self.p is not None:\n warnings.warn(\"Parameter p is found in metric_params. \"\n \"The corresponding parameter from __init__ \"\n \"is ignored.\", SyntaxWarning, stacklevel=3)\n effective_p = self.metric_params['p']\n else:\n effective_p = self.p\n\n if self.metric in ['wminkowski', 'minkowski'] and effective_p < 1:\n raise ValueError(\"p must be greater than one for minkowski metric\")" + }, + { + "id": "scikit-learn/sklearn.neighbors._base/NeighborsBase/_fit", + "name": "_fit", + "qname": "sklearn.neighbors._base.NeighborsBase._fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._base/NeighborsBase/_fit/self", + "name": "self", + "qname": "sklearn.neighbors._base.NeighborsBase._fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._base/NeighborsBase/_fit/X", + "name": "X", + "qname": "sklearn.neighbors._base.NeighborsBase._fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._base/NeighborsBase/_fit/y", + "name": "y", + "qname": "sklearn.neighbors._base.NeighborsBase._fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _fit(self, X, y=None):\n if self._get_tags()[\"requires_y\"]:\n if not isinstance(X, (KDTree, BallTree, NeighborsBase)):\n X, y = self._validate_data(X, y, accept_sparse=\"csr\",\n multi_output=True)\n\n if is_classifier(self):\n # Classification targets require a specific format\n if y.ndim == 1 or y.ndim == 2 and y.shape[1] == 1:\n if y.ndim != 1:\n warnings.warn(\"A column-vector y was passed when a \"\n \"1d array was expected. Please change \"\n \"the shape of y to (n_samples,), for \"\n \"example using ravel().\",\n DataConversionWarning, stacklevel=2)\n\n self.outputs_2d_ = False\n y = y.reshape((-1, 1))\n else:\n self.outputs_2d_ = True\n\n check_classification_targets(y)\n self.classes_ = []\n self._y = np.empty(y.shape, dtype=int)\n for k in range(self._y.shape[1]):\n classes, self._y[:, k] = np.unique(\n y[:, k], return_inverse=True)\n self.classes_.append(classes)\n\n if not self.outputs_2d_:\n self.classes_ = self.classes_[0]\n self._y = self._y.ravel()\n else:\n self._y = y\n\n else:\n if not isinstance(X, (KDTree, BallTree, NeighborsBase)):\n X = self._validate_data(X, accept_sparse='csr')\n\n self._check_algorithm_metric()\n if self.metric_params is None:\n self.effective_metric_params_ = {}\n else:\n self.effective_metric_params_ = self.metric_params.copy()\n\n effective_p = self.effective_metric_params_.get('p', self.p)\n if self.metric in ['wminkowski', 'minkowski']:\n self.effective_metric_params_['p'] = effective_p\n\n self.effective_metric_ = self.metric\n # For minkowski distance, use more efficient methods where available\n if self.metric == 'minkowski':\n p = self.effective_metric_params_.pop('p', 2)\n if p < 1:\n raise ValueError(\"p must be greater than one \"\n \"for minkowski metric\")\n elif p == 1:\n self.effective_metric_ = 'manhattan'\n elif p == 2:\n self.effective_metric_ = 'euclidean'\n elif p == np.inf:\n self.effective_metric_ = 'chebyshev'\n else:\n self.effective_metric_params_['p'] = p\n\n if isinstance(X, NeighborsBase):\n self._fit_X = X._fit_X\n self._tree = X._tree\n self._fit_method = X._fit_method\n self.n_samples_fit_ = X.n_samples_fit_\n return self\n\n elif isinstance(X, BallTree):\n self._fit_X = X.data\n self._tree = X\n self._fit_method = 'ball_tree'\n self.n_samples_fit_ = X.data.shape[0]\n return self\n\n elif isinstance(X, KDTree):\n self._fit_X = X.data\n self._tree = X\n self._fit_method = 'kd_tree'\n self.n_samples_fit_ = X.data.shape[0]\n return self\n\n if self.effective_metric_ == 'precomputed':\n X = _check_precomputed(X)\n self.n_features_in_ = X.shape[1]\n\n n_samples = X.shape[0]\n if n_samples == 0:\n raise ValueError(\"n_samples must be greater than 0\")\n\n # Precomputed matrix X must be squared\n if self.metric == 'precomputed' and X.shape[0] != X.shape[1]:\n raise ValueError(\"Precomputed matrix must be a square matrix.\"\n \" Input is a {}x{} matrix.\"\n .format(X.shape[0], X.shape[1]))\n\n if issparse(X):\n if self.algorithm not in ('auto', 'brute'):\n warnings.warn(\"cannot use tree with sparse input: \"\n \"using brute force\")\n if self.effective_metric_ not in VALID_METRICS_SPARSE['brute'] \\\n and not callable(self.effective_metric_):\n raise ValueError(\"Metric '%s' not valid for sparse input. \"\n \"Use sorted(sklearn.neighbors.\"\n \"VALID_METRICS_SPARSE['brute']) \"\n \"to get valid options. \"\n \"Metric can also be a callable function.\"\n % (self.effective_metric_))\n self._fit_X = X.copy()\n self._tree = None\n self._fit_method = 'brute'\n self.n_samples_fit_ = X.shape[0]\n return self\n\n self._fit_method = self.algorithm\n self._fit_X = X\n self.n_samples_fit_ = X.shape[0]\n\n if self._fit_method == 'auto':\n # A tree approach is better for small number of neighbors or small\n # number of features, with KDTree generally faster when available\n if (self.metric == 'precomputed' or self._fit_X.shape[1] > 15 or\n (self.n_neighbors is not None and\n self.n_neighbors >= self._fit_X.shape[0] // 2)):\n self._fit_method = 'brute'\n else:\n if self.effective_metric_ in VALID_METRICS['kd_tree']:\n self._fit_method = 'kd_tree'\n elif (callable(self.effective_metric_) or\n self.effective_metric_ in VALID_METRICS['ball_tree']):\n self._fit_method = 'ball_tree'\n else:\n self._fit_method = 'brute'\n\n if self._fit_method == 'ball_tree':\n self._tree = BallTree(X, self.leaf_size,\n metric=self.effective_metric_,\n **self.effective_metric_params_)\n elif self._fit_method == 'kd_tree':\n self._tree = KDTree(X, self.leaf_size,\n metric=self.effective_metric_,\n **self.effective_metric_params_)\n elif self._fit_method == 'brute':\n self._tree = None\n else:\n raise ValueError(\"algorithm = '%s' not recognized\"\n % self.algorithm)\n\n if self.n_neighbors is not None:\n if self.n_neighbors <= 0:\n raise ValueError(\n \"Expected n_neighbors > 0. Got %d\" %\n self.n_neighbors\n )\n else:\n if not isinstance(self.n_neighbors, numbers.Integral):\n raise TypeError(\n \"n_neighbors does not take %s value, \"\n \"enter integer value\" %\n type(self.n_neighbors))\n\n return self" + }, + { + "id": "scikit-learn/sklearn.neighbors._base/NeighborsBase/_more_tags", + "name": "_more_tags", + "qname": "sklearn.neighbors._base.NeighborsBase._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._base/NeighborsBase/_more_tags/self", + "name": "self", + "qname": "sklearn.neighbors._base.NeighborsBase._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n # For cross-validation routines to split data correctly\n return {'pairwise': self.metric == 'precomputed'}" + }, + { + "id": "scikit-learn/sklearn.neighbors._base/NeighborsBase/_pairwise@getter", + "name": "_pairwise", + "qname": "sklearn.neighbors._base.NeighborsBase._pairwise", + "decorators": [ + "deprecated('Attribute _pairwise was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._base/NeighborsBase/_pairwise/self", + "name": "self", + "qname": "sklearn.neighbors._base.NeighborsBase._pairwise.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n # For cross-validation routines to split data correctly\n return self.metric == 'precomputed'" + }, + { + "id": "scikit-learn/sklearn.neighbors._base/RadiusNeighborsMixin/_radius_neighbors_reduce_func", + "name": "_radius_neighbors_reduce_func", + "qname": "sklearn.neighbors._base.RadiusNeighborsMixin._radius_neighbors_reduce_func", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._base/RadiusNeighborsMixin/_radius_neighbors_reduce_func/self", + "name": "self", + "qname": "sklearn.neighbors._base.RadiusNeighborsMixin._radius_neighbors_reduce_func.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._base/RadiusNeighborsMixin/_radius_neighbors_reduce_func/dist", + "name": "dist", + "qname": "sklearn.neighbors._base.RadiusNeighborsMixin._radius_neighbors_reduce_func.dist", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples_chunk, n_samples)", + "default_value": "", + "description": "The distance matrix." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples_chunk, n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._base/RadiusNeighborsMixin/_radius_neighbors_reduce_func/start", + "name": "start", + "qname": "sklearn.neighbors._base.RadiusNeighborsMixin._radius_neighbors_reduce_func.start", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The index in X which the first row of dist corresponds to." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._base/RadiusNeighborsMixin/_radius_neighbors_reduce_func/radius", + "name": "radius", + "qname": "sklearn.neighbors._base.RadiusNeighborsMixin._radius_neighbors_reduce_func.radius", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "The radius considered when making the nearest neighbors search." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._base/RadiusNeighborsMixin/_radius_neighbors_reduce_func/return_distance", + "name": "return_distance", + "qname": "sklearn.neighbors._base.RadiusNeighborsMixin._radius_neighbors_reduce_func.return_distance", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "Whether or not to return the distances." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Reduce a chunk of distances to the nearest neighbors\n\nCallback to :func:`sklearn.metrics.pairwise.pairwise_distances_chunked`", + "docstring": "Reduce a chunk of distances to the nearest neighbors\n\nCallback to :func:`sklearn.metrics.pairwise.pairwise_distances_chunked`\n\nParameters\n----------\ndist : ndarray of shape (n_samples_chunk, n_samples)\n The distance matrix.\n\nstart : int\n The index in X which the first row of dist corresponds to.\n\nradius : float\n The radius considered when making the nearest neighbors search.\n\nreturn_distance : bool\n Whether or not to return the distances.\n\nReturns\n-------\ndist : list of ndarray of shape (n_samples_chunk,)\n Returned only if `return_distance=True`.\n\nneigh : list of ndarray of shape (n_samples_chunk,)\n The neighbors indices.", + "code": " def _radius_neighbors_reduce_func(self, dist, start,\n radius, return_distance):\n \"\"\"Reduce a chunk of distances to the nearest neighbors\n\n Callback to :func:`sklearn.metrics.pairwise.pairwise_distances_chunked`\n\n Parameters\n ----------\n dist : ndarray of shape (n_samples_chunk, n_samples)\n The distance matrix.\n\n start : int\n The index in X which the first row of dist corresponds to.\n\n radius : float\n The radius considered when making the nearest neighbors search.\n\n return_distance : bool\n Whether or not to return the distances.\n\n Returns\n -------\n dist : list of ndarray of shape (n_samples_chunk,)\n Returned only if `return_distance=True`.\n\n neigh : list of ndarray of shape (n_samples_chunk,)\n The neighbors indices.\n \"\"\"\n neigh_ind = [np.where(d <= radius)[0] for d in dist]\n\n if return_distance:\n if self.effective_metric_ == 'euclidean':\n dist = [np.sqrt(d[neigh_ind[i]])\n for i, d in enumerate(dist)]\n else:\n dist = [d[neigh_ind[i]]\n for i, d in enumerate(dist)]\n results = dist, neigh_ind\n else:\n results = neigh_ind\n return results" + }, + { + "id": "scikit-learn/sklearn.neighbors._base/RadiusNeighborsMixin/radius_neighbors", + "name": "radius_neighbors", + "qname": "sklearn.neighbors._base.RadiusNeighborsMixin.radius_neighbors", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._base/RadiusNeighborsMixin/radius_neighbors/self", + "name": "self", + "qname": "sklearn.neighbors._base.RadiusNeighborsMixin.radius_neighbors.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._base/RadiusNeighborsMixin/radius_neighbors/X", + "name": "X", + "qname": "sklearn.neighbors._base.RadiusNeighborsMixin.radius_neighbors.X", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of (n_samples, n_features)", + "default_value": "None", + "description": "The query point or points.\nIf not provided, neighbors of each indexed point are returned.\nIn this case, the query point is not considered its own neighbor." + }, + "type": { + "kind": "NamedType", + "name": "array-like of (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._base/RadiusNeighborsMixin/radius_neighbors/radius", + "name": "radius", + "qname": "sklearn.neighbors._base.RadiusNeighborsMixin.radius_neighbors.radius", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Limiting distance of neighbors to return. The default is the value\npassed to the constructor." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._base/RadiusNeighborsMixin/radius_neighbors/return_distance", + "name": "return_distance", + "qname": "sklearn.neighbors._base.RadiusNeighborsMixin.radius_neighbors.return_distance", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether or not to return the distances." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._base/RadiusNeighborsMixin/radius_neighbors/sort_results", + "name": "sort_results", + "qname": "sklearn.neighbors._base.RadiusNeighborsMixin.radius_neighbors.sort_results", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the distances and indices will be sorted by increasing\ndistances before being returned. If False, the results may not\nbe sorted. If `return_distance=False`, setting `sort_results=True`\nwill result in an error.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Finds the neighbors within a given radius of a point or points.\n\nReturn the indices and distances of each point from the dataset\nlying in a ball with size ``radius`` around the points of the query\narray. Points lying on the boundary are included in the results.\n\nThe result points are *not* necessarily sorted by distance to their\nquery point.", + "docstring": "Finds the neighbors within a given radius of a point or points.\n\nReturn the indices and distances of each point from the dataset\nlying in a ball with size ``radius`` around the points of the query\narray. Points lying on the boundary are included in the results.\n\nThe result points are *not* necessarily sorted by distance to their\nquery point.\n\nParameters\n----------\nX : array-like of (n_samples, n_features), default=None\n The query point or points.\n If not provided, neighbors of each indexed point are returned.\n In this case, the query point is not considered its own neighbor.\n\nradius : float, default=None\n Limiting distance of neighbors to return. The default is the value\n passed to the constructor.\n\nreturn_distance : bool, default=True\n Whether or not to return the distances.\n\nsort_results : bool, default=False\n If True, the distances and indices will be sorted by increasing\n distances before being returned. If False, the results may not\n be sorted. If `return_distance=False`, setting `sort_results=True`\n will result in an error.\n\n .. versionadded:: 0.22\n\nReturns\n-------\nneigh_dist : ndarray of shape (n_samples,) of arrays\n Array representing the distances to each point, only present if\n `return_distance=True`. The distance values are computed according\n to the ``metric`` constructor parameter.\n\nneigh_ind : ndarray of shape (n_samples,) of arrays\n An array of arrays of indices of the approximate nearest points\n from the population matrix that lie within a ball of size\n ``radius`` around the query points.\n\nExamples\n--------\nIn the following example, we construct a NeighborsClassifier\nclass from an array representing our data set and ask who's\nthe closest point to [1, 1, 1]:\n\n>>> import numpy as np\n>>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n>>> from sklearn.neighbors import NearestNeighbors\n>>> neigh = NearestNeighbors(radius=1.6)\n>>> neigh.fit(samples)\nNearestNeighbors(radius=1.6)\n>>> rng = neigh.radius_neighbors([[1., 1., 1.]])\n>>> print(np.asarray(rng[0][0]))\n[1.5 0.5]\n>>> print(np.asarray(rng[1][0]))\n[1 2]\n\nThe first array returned contains the distances to all points which\nare closer than 1.6, while the second array returned contains their\nindices. In general, multiple points can be queried at the same time.\n\nNotes\n-----\nBecause the number of neighbors of each point is not necessarily\nequal, the results for multiple query points cannot be fit in a\nstandard data array.\nFor efficiency, `radius_neighbors` returns arrays of objects, where\neach object is a 1D array of indices or distances.", + "code": " def radius_neighbors(self, X=None, radius=None, return_distance=True,\n sort_results=False):\n \"\"\"Finds the neighbors within a given radius of a point or points.\n\n Return the indices and distances of each point from the dataset\n lying in a ball with size ``radius`` around the points of the query\n array. Points lying on the boundary are included in the results.\n\n The result points are *not* necessarily sorted by distance to their\n query point.\n\n Parameters\n ----------\n X : array-like of (n_samples, n_features), default=None\n The query point or points.\n If not provided, neighbors of each indexed point are returned.\n In this case, the query point is not considered its own neighbor.\n\n radius : float, default=None\n Limiting distance of neighbors to return. The default is the value\n passed to the constructor.\n\n return_distance : bool, default=True\n Whether or not to return the distances.\n\n sort_results : bool, default=False\n If True, the distances and indices will be sorted by increasing\n distances before being returned. If False, the results may not\n be sorted. If `return_distance=False`, setting `sort_results=True`\n will result in an error.\n\n .. versionadded:: 0.22\n\n Returns\n -------\n neigh_dist : ndarray of shape (n_samples,) of arrays\n Array representing the distances to each point, only present if\n `return_distance=True`. The distance values are computed according\n to the ``metric`` constructor parameter.\n\n neigh_ind : ndarray of shape (n_samples,) of arrays\n An array of arrays of indices of the approximate nearest points\n from the population matrix that lie within a ball of size\n ``radius`` around the query points.\n\n Examples\n --------\n In the following example, we construct a NeighborsClassifier\n class from an array representing our data set and ask who's\n the closest point to [1, 1, 1]:\n\n >>> import numpy as np\n >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n >>> from sklearn.neighbors import NearestNeighbors\n >>> neigh = NearestNeighbors(radius=1.6)\n >>> neigh.fit(samples)\n NearestNeighbors(radius=1.6)\n >>> rng = neigh.radius_neighbors([[1., 1., 1.]])\n >>> print(np.asarray(rng[0][0]))\n [1.5 0.5]\n >>> print(np.asarray(rng[1][0]))\n [1 2]\n\n The first array returned contains the distances to all points which\n are closer than 1.6, while the second array returned contains their\n indices. In general, multiple points can be queried at the same time.\n\n Notes\n -----\n Because the number of neighbors of each point is not necessarily\n equal, the results for multiple query points cannot be fit in a\n standard data array.\n For efficiency, `radius_neighbors` returns arrays of objects, where\n each object is a 1D array of indices or distances.\n \"\"\"\n check_is_fitted(self)\n\n if X is not None:\n query_is_train = False\n if self.effective_metric_ == 'precomputed':\n X = _check_precomputed(X)\n else:\n X = check_array(X, accept_sparse='csr')\n else:\n query_is_train = True\n X = self._fit_X\n\n if radius is None:\n radius = self.radius\n\n if (self._fit_method == 'brute' and\n self.effective_metric_ == 'precomputed' and issparse(X)):\n results = _radius_neighbors_from_graph(\n X, radius=radius, return_distance=return_distance)\n\n elif self._fit_method == 'brute':\n # for efficiency, use squared euclidean distances\n if self.effective_metric_ == 'euclidean':\n radius *= radius\n kwds = {'squared': True}\n else:\n kwds = self.effective_metric_params_\n\n reduce_func = partial(self._radius_neighbors_reduce_func,\n radius=radius,\n return_distance=return_distance)\n\n chunked_results = pairwise_distances_chunked(\n X, self._fit_X, reduce_func=reduce_func,\n metric=self.effective_metric_, n_jobs=self.n_jobs,\n **kwds)\n if return_distance:\n neigh_dist_chunks, neigh_ind_chunks = zip(*chunked_results)\n neigh_dist_list = sum(neigh_dist_chunks, [])\n neigh_ind_list = sum(neigh_ind_chunks, [])\n neigh_dist = _to_object_array(neigh_dist_list)\n neigh_ind = _to_object_array(neigh_ind_list)\n results = neigh_dist, neigh_ind\n else:\n neigh_ind_list = sum(chunked_results, [])\n results = _to_object_array(neigh_ind_list)\n\n if sort_results:\n if not return_distance:\n raise ValueError(\"return_distance must be True \"\n \"if sort_results is True.\")\n for ii in range(len(neigh_dist)):\n order = np.argsort(neigh_dist[ii], kind='mergesort')\n neigh_ind[ii] = neigh_ind[ii][order]\n neigh_dist[ii] = neigh_dist[ii][order]\n results = neigh_dist, neigh_ind\n\n elif self._fit_method in ['ball_tree', 'kd_tree']:\n if issparse(X):\n raise ValueError(\n \"%s does not work with sparse matrices. Densify the data, \"\n \"or set algorithm='brute'\" % self._fit_method)\n\n n_jobs = effective_n_jobs(self.n_jobs)\n delayed_query = delayed(_tree_query_radius_parallel_helper)\n if parse_version(joblib.__version__) < parse_version('0.12'):\n # Deal with change of API in joblib\n parallel_kwargs = {\"backend\": \"threading\"}\n else:\n parallel_kwargs = {\"prefer\": \"threads\"}\n\n chunked_results = Parallel(n_jobs, **parallel_kwargs)(\n delayed_query(self._tree, X[s], radius, return_distance,\n sort_results=sort_results)\n\n for s in gen_even_slices(X.shape[0], n_jobs)\n )\n if return_distance:\n neigh_ind, neigh_dist = tuple(zip(*chunked_results))\n results = np.hstack(neigh_dist), np.hstack(neigh_ind)\n else:\n results = np.hstack(chunked_results)\n else:\n raise ValueError(\"internal: _fit_method not recognized\")\n\n if not query_is_train:\n return results\n else:\n # If the query data is the same as the indexed data, we would like\n # to ignore the first nearest neighbor of every sample, i.e\n # the sample itself.\n if return_distance:\n neigh_dist, neigh_ind = results\n else:\n neigh_ind = results\n\n for ind, ind_neighbor in enumerate(neigh_ind):\n mask = ind_neighbor != ind\n\n neigh_ind[ind] = ind_neighbor[mask]\n if return_distance:\n neigh_dist[ind] = neigh_dist[ind][mask]\n\n if return_distance:\n return neigh_dist, neigh_ind\n return neigh_ind" + }, + { + "id": "scikit-learn/sklearn.neighbors._base/RadiusNeighborsMixin/radius_neighbors_graph", + "name": "radius_neighbors_graph", + "qname": "sklearn.neighbors._base.RadiusNeighborsMixin.radius_neighbors_graph", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._base/RadiusNeighborsMixin/radius_neighbors_graph/self", + "name": "self", + "qname": "sklearn.neighbors._base.RadiusNeighborsMixin.radius_neighbors_graph.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._base/RadiusNeighborsMixin/radius_neighbors_graph/X", + "name": "X", + "qname": "sklearn.neighbors._base.RadiusNeighborsMixin.radius_neighbors_graph.X", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "None", + "description": "The query point or points.\nIf not provided, neighbors of each indexed point are returned.\nIn this case, the query point is not considered its own neighbor." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._base/RadiusNeighborsMixin/radius_neighbors_graph/radius", + "name": "radius", + "qname": "sklearn.neighbors._base.RadiusNeighborsMixin.radius_neighbors_graph.radius", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Radius of neighborhoods. The default is the value passed to the\nconstructor." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._base/RadiusNeighborsMixin/radius_neighbors_graph/mode", + "name": "mode", + "qname": "sklearn.neighbors._base.RadiusNeighborsMixin.radius_neighbors_graph.mode", + "default_value": "'connectivity'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'connectivity', 'distance'}", + "default_value": "'connectivity'", + "description": "Type of returned matrix: 'connectivity' will return the\nconnectivity matrix with ones and zeros, in 'distance' the\nedges are Euclidean distance between points." + }, + "type": { + "kind": "EnumType", + "values": ["connectivity", "distance"] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._base/RadiusNeighborsMixin/radius_neighbors_graph/sort_results", + "name": "sort_results", + "qname": "sklearn.neighbors._base.RadiusNeighborsMixin.radius_neighbors_graph.sort_results", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, in each row of the result, the non-zero entries will be\nsorted by increasing distances. If False, the non-zero entries may\nnot be sorted. Only used with mode='distance'.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes the (weighted) graph of Neighbors for points in X\n\nNeighborhoods are restricted the points at a distance lower than\nradius.", + "docstring": "Computes the (weighted) graph of Neighbors for points in X\n\nNeighborhoods are restricted the points at a distance lower than\nradius.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features), default=None\n The query point or points.\n If not provided, neighbors of each indexed point are returned.\n In this case, the query point is not considered its own neighbor.\n\nradius : float, default=None\n Radius of neighborhoods. The default is the value passed to the\n constructor.\n\nmode : {'connectivity', 'distance'}, default='connectivity'\n Type of returned matrix: 'connectivity' will return the\n connectivity matrix with ones and zeros, in 'distance' the\n edges are Euclidean distance between points.\n\nsort_results : bool, default=False\n If True, in each row of the result, the non-zero entries will be\n sorted by increasing distances. If False, the non-zero entries may\n not be sorted. Only used with mode='distance'.\n\n .. versionadded:: 0.22\n\nReturns\n-------\nA : sparse-matrix of shape (n_queries, n_samples_fit)\n `n_samples_fit` is the number of samples in the fitted data\n `A[i, j]` is assigned the weight of edge that connects `i` to `j`.\n The matrix if of format CSR.\n\nExamples\n--------\n>>> X = [[0], [3], [1]]\n>>> from sklearn.neighbors import NearestNeighbors\n>>> neigh = NearestNeighbors(radius=1.5)\n>>> neigh.fit(X)\nNearestNeighbors(radius=1.5)\n>>> A = neigh.radius_neighbors_graph(X)\n>>> A.toarray()\narray([[1., 0., 1.],\n [0., 1., 0.],\n [1., 0., 1.]])\n\nSee Also\n--------\nkneighbors_graph", + "code": " def radius_neighbors_graph(self, X=None, radius=None, mode='connectivity',\n sort_results=False):\n \"\"\"Computes the (weighted) graph of Neighbors for points in X\n\n Neighborhoods are restricted the points at a distance lower than\n radius.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features), default=None\n The query point or points.\n If not provided, neighbors of each indexed point are returned.\n In this case, the query point is not considered its own neighbor.\n\n radius : float, default=None\n Radius of neighborhoods. The default is the value passed to the\n constructor.\n\n mode : {'connectivity', 'distance'}, default='connectivity'\n Type of returned matrix: 'connectivity' will return the\n connectivity matrix with ones and zeros, in 'distance' the\n edges are Euclidean distance between points.\n\n sort_results : bool, default=False\n If True, in each row of the result, the non-zero entries will be\n sorted by increasing distances. If False, the non-zero entries may\n not be sorted. Only used with mode='distance'.\n\n .. versionadded:: 0.22\n\n Returns\n -------\n A : sparse-matrix of shape (n_queries, n_samples_fit)\n `n_samples_fit` is the number of samples in the fitted data\n `A[i, j]` is assigned the weight of edge that connects `i` to `j`.\n The matrix if of format CSR.\n\n Examples\n --------\n >>> X = [[0], [3], [1]]\n >>> from sklearn.neighbors import NearestNeighbors\n >>> neigh = NearestNeighbors(radius=1.5)\n >>> neigh.fit(X)\n NearestNeighbors(radius=1.5)\n >>> A = neigh.radius_neighbors_graph(X)\n >>> A.toarray()\n array([[1., 0., 1.],\n [0., 1., 0.],\n [1., 0., 1.]])\n\n See Also\n --------\n kneighbors_graph\n \"\"\"\n check_is_fitted(self)\n\n # check the input only in self.radius_neighbors\n\n if radius is None:\n radius = self.radius\n\n # construct CSR matrix representation of the NN graph\n if mode == 'connectivity':\n A_ind = self.radius_neighbors(X, radius,\n return_distance=False)\n A_data = None\n elif mode == 'distance':\n dist, A_ind = self.radius_neighbors(X, radius,\n return_distance=True,\n sort_results=sort_results)\n A_data = np.concatenate(list(dist))\n else:\n raise ValueError(\n 'Unsupported mode, must be one of \"connectivity\", '\n 'or \"distance\" but got %s instead' % mode)\n\n n_queries = A_ind.shape[0]\n n_samples_fit = self.n_samples_fit_\n n_neighbors = np.array([len(a) for a in A_ind])\n A_ind = np.concatenate(list(A_ind))\n if A_data is None:\n A_data = np.ones(len(A_ind))\n A_indptr = np.concatenate((np.zeros(1, dtype=int),\n np.cumsum(n_neighbors)))\n\n return csr_matrix((A_data, A_ind, A_indptr),\n shape=(n_queries, n_samples_fit))" + }, + { + "id": "scikit-learn/sklearn.neighbors._base/_check_precomputed", + "name": "_check_precomputed", + "qname": "sklearn.neighbors._base._check_precomputed", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._base/_check_precomputed/X", + "name": "X", + "qname": "sklearn.neighbors._base._check_precomputed.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{sparse matrix, array-like}, (n_samples, n_samples)", + "default_value": "", + "description": "Distance matrix to other samples. X may be a sparse matrix, in which\ncase only non-zero elements may be considered neighbors." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "(n_samples, n_samples)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check precomputed distance matrix\n\nIf the precomputed distance matrix is sparse, it checks that the non-zero\nentries are sorted by distances. If not, the matrix is copied and sorted.", + "docstring": "Check precomputed distance matrix\n\nIf the precomputed distance matrix is sparse, it checks that the non-zero\nentries are sorted by distances. If not, the matrix is copied and sorted.\n\nParameters\n----------\nX : {sparse matrix, array-like}, (n_samples, n_samples)\n Distance matrix to other samples. X may be a sparse matrix, in which\n case only non-zero elements may be considered neighbors.\n\nReturns\n-------\nX : {sparse matrix, array-like}, (n_samples, n_samples)\n Distance matrix to other samples. X may be a sparse matrix, in which\n case only non-zero elements may be considered neighbors.", + "code": "def _check_precomputed(X):\n \"\"\"Check precomputed distance matrix\n\n If the precomputed distance matrix is sparse, it checks that the non-zero\n entries are sorted by distances. If not, the matrix is copied and sorted.\n\n Parameters\n ----------\n X : {sparse matrix, array-like}, (n_samples, n_samples)\n Distance matrix to other samples. X may be a sparse matrix, in which\n case only non-zero elements may be considered neighbors.\n\n Returns\n -------\n X : {sparse matrix, array-like}, (n_samples, n_samples)\n Distance matrix to other samples. X may be a sparse matrix, in which\n case only non-zero elements may be considered neighbors.\n \"\"\"\n if not issparse(X):\n X = check_array(X)\n check_non_negative(X, whom=\"precomputed distance matrix.\")\n return X\n else:\n graph = X\n\n if graph.format not in ('csr', 'csc', 'coo', 'lil'):\n raise TypeError('Sparse matrix in {!r} format is not supported due to '\n 'its handling of explicit zeros'.format(graph.format))\n copied = graph.format != 'csr'\n graph = check_array(graph, accept_sparse='csr')\n check_non_negative(graph, whom=\"precomputed distance matrix.\")\n\n if not _is_sorted_by_data(graph):\n warnings.warn('Precomputed sparse input was not sorted by data.',\n EfficiencyWarning)\n if not copied:\n graph = graph.copy()\n\n # if each sample has the same number of provided neighbors\n row_nnz = np.diff(graph.indptr)\n if row_nnz.max() == row_nnz.min():\n n_samples = graph.shape[0]\n distances = graph.data.reshape(n_samples, -1)\n\n order = np.argsort(distances, kind='mergesort')\n order += np.arange(n_samples)[:, None] * row_nnz[0]\n order = order.ravel()\n graph.data = graph.data[order]\n graph.indices = graph.indices[order]\n\n else:\n for start, stop in zip(graph.indptr, graph.indptr[1:]):\n order = np.argsort(graph.data[start:stop], kind='mergesort')\n graph.data[start:stop] = graph.data[start:stop][order]\n graph.indices[start:stop] = graph.indices[start:stop][order]\n return graph" + }, + { + "id": "scikit-learn/sklearn.neighbors._base/_check_weights", + "name": "_check_weights", + "qname": "sklearn.neighbors._base._check_weights", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._base/_check_weights/weights", + "name": "weights", + "qname": "sklearn.neighbors._base._check_weights.weights", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check to make sure weights are valid", + "docstring": "Check to make sure weights are valid", + "code": "def _check_weights(weights):\n \"\"\"Check to make sure weights are valid\"\"\"\n if weights in (None, 'uniform', 'distance'):\n return weights\n elif callable(weights):\n return weights\n else:\n raise ValueError(\"weights not recognized: should be 'uniform', \"\n \"'distance', or a callable function\")" + }, + { + "id": "scikit-learn/sklearn.neighbors._base/_get_weights", + "name": "_get_weights", + "qname": "sklearn.neighbors._base._get_weights", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._base/_get_weights/dist", + "name": "dist", + "qname": "sklearn.neighbors._base._get_weights.dist", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "The input distances." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._base/_get_weights/weights", + "name": "weights", + "qname": "sklearn.neighbors._base._get_weights.weights", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'uniform', 'distance' or a callable}", + "default_value": "", + "description": "The kind of weighting used." + }, + "type": { + "kind": "EnumType", + "values": ["distance", "uniform"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Get the weights from an array of distances and a parameter ``weights``", + "docstring": "Get the weights from an array of distances and a parameter ``weights``\n\nParameters\n----------\ndist : ndarray\n The input distances.\n\nweights : {'uniform', 'distance' or a callable}\n The kind of weighting used.\n\nReturns\n-------\nweights_arr : array of the same shape as ``dist``\n If ``weights == 'uniform'``, then returns None.", + "code": "def _get_weights(dist, weights):\n \"\"\"Get the weights from an array of distances and a parameter ``weights``\n\n Parameters\n ----------\n dist : ndarray\n The input distances.\n\n weights : {'uniform', 'distance' or a callable}\n The kind of weighting used.\n\n Returns\n -------\n weights_arr : array of the same shape as ``dist``\n If ``weights == 'uniform'``, then returns None.\n \"\"\"\n if weights in (None, 'uniform'):\n return None\n elif weights == 'distance':\n # if user attempts to classify a point that was zero distance from one\n # or more training points, those training points are weighted as 1.0\n # and the other points as 0.0\n if dist.dtype is np.dtype(object):\n for point_dist_i, point_dist in enumerate(dist):\n # check if point_dist is iterable\n # (ex: RadiusNeighborClassifier.predict may set an element of\n # dist to 1e-6 to represent an 'outlier')\n if hasattr(point_dist, '__contains__') and 0. in point_dist:\n dist[point_dist_i] = point_dist == 0.\n else:\n dist[point_dist_i] = 1. / point_dist\n else:\n with np.errstate(divide='ignore'):\n dist = 1. / dist\n inf_mask = np.isinf(dist)\n inf_row = np.any(inf_mask, axis=1)\n dist[inf_row] = inf_mask[inf_row]\n return dist\n elif callable(weights):\n return weights(dist)\n else:\n raise ValueError(\"weights not recognized: should be 'uniform', \"\n \"'distance', or a callable function\")" + }, + { + "id": "scikit-learn/sklearn.neighbors._base/_is_sorted_by_data", + "name": "_is_sorted_by_data", + "qname": "sklearn.neighbors._base._is_sorted_by_data", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._base/_is_sorted_by_data/graph", + "name": "graph", + "qname": "sklearn.neighbors._base._is_sorted_by_data.graph", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "sparse matrix of shape (n_samples, n_samples)", + "default_value": "", + "description": "Neighbors graph as given by `kneighbors_graph` or\n`radius_neighbors_graph`. Matrix should be of format CSR format." + }, + "type": { + "kind": "NamedType", + "name": "sparse matrix of shape (n_samples, n_samples)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns whether the graph's non-zero entries are sorted by data\n\nThe non-zero entries are stored in graph.data and graph.indices.\nFor each row (or sample), the non-zero entries can be either:\n - sorted by indices, as after graph.sort_indices();\n - sorted by data, as after _check_precomputed(graph);\n - not sorted.", + "docstring": "Returns whether the graph's non-zero entries are sorted by data\n\nThe non-zero entries are stored in graph.data and graph.indices.\nFor each row (or sample), the non-zero entries can be either:\n - sorted by indices, as after graph.sort_indices();\n - sorted by data, as after _check_precomputed(graph);\n - not sorted.\n\nParameters\n----------\ngraph : sparse matrix of shape (n_samples, n_samples)\n Neighbors graph as given by `kneighbors_graph` or\n `radius_neighbors_graph`. Matrix should be of format CSR format.\n\nReturns\n-------\nres : bool\n Whether input graph is sorted by data.", + "code": "def _is_sorted_by_data(graph):\n \"\"\"Returns whether the graph's non-zero entries are sorted by data\n\n The non-zero entries are stored in graph.data and graph.indices.\n For each row (or sample), the non-zero entries can be either:\n - sorted by indices, as after graph.sort_indices();\n - sorted by data, as after _check_precomputed(graph);\n - not sorted.\n\n Parameters\n ----------\n graph : sparse matrix of shape (n_samples, n_samples)\n Neighbors graph as given by `kneighbors_graph` or\n `radius_neighbors_graph`. Matrix should be of format CSR format.\n\n Returns\n -------\n res : bool\n Whether input graph is sorted by data.\n \"\"\"\n assert graph.format == 'csr'\n out_of_order = graph.data[:-1] > graph.data[1:]\n line_change = np.unique(graph.indptr[1:-1] - 1)\n line_change = line_change[line_change < out_of_order.shape[0]]\n return (out_of_order.sum() == out_of_order[line_change].sum())" + }, + { + "id": "scikit-learn/sklearn.neighbors._base/_kneighbors_from_graph", + "name": "_kneighbors_from_graph", + "qname": "sklearn.neighbors._base._kneighbors_from_graph", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._base/_kneighbors_from_graph/graph", + "name": "graph", + "qname": "sklearn.neighbors._base._kneighbors_from_graph.graph", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "sparse matrix of shape (n_samples, n_samples)", + "default_value": "", + "description": "Neighbors graph as given by `kneighbors_graph` or\n`radius_neighbors_graph`. Matrix should be of format CSR format." + }, + "type": { + "kind": "NamedType", + "name": "sparse matrix of shape (n_samples, n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._base/_kneighbors_from_graph/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.neighbors._base._kneighbors_from_graph.n_neighbors", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of neighbors required for each sample." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._base/_kneighbors_from_graph/return_distance", + "name": "return_distance", + "qname": "sklearn.neighbors._base._kneighbors_from_graph.return_distance", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "Whether or not to return the distances." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Decompose a nearest neighbors sparse graph into distances and indices", + "docstring": "Decompose a nearest neighbors sparse graph into distances and indices\n\nParameters\n----------\ngraph : sparse matrix of shape (n_samples, n_samples)\n Neighbors graph as given by `kneighbors_graph` or\n `radius_neighbors_graph`. Matrix should be of format CSR format.\n\nn_neighbors : int\n Number of neighbors required for each sample.\n\nreturn_distance : bool\n Whether or not to return the distances.\n\nReturns\n-------\nneigh_dist : ndarray of shape (n_samples, n_neighbors)\n Distances to nearest neighbors. Only present if `return_distance=True`.\n\nneigh_ind : ndarray of shape (n_samples, n_neighbors)\n Indices of nearest neighbors.", + "code": "def _kneighbors_from_graph(graph, n_neighbors, return_distance):\n \"\"\"Decompose a nearest neighbors sparse graph into distances and indices\n\n Parameters\n ----------\n graph : sparse matrix of shape (n_samples, n_samples)\n Neighbors graph as given by `kneighbors_graph` or\n `radius_neighbors_graph`. Matrix should be of format CSR format.\n\n n_neighbors : int\n Number of neighbors required for each sample.\n\n return_distance : bool\n Whether or not to return the distances.\n\n Returns\n -------\n neigh_dist : ndarray of shape (n_samples, n_neighbors)\n Distances to nearest neighbors. Only present if `return_distance=True`.\n\n neigh_ind : ndarray of shape (n_samples, n_neighbors)\n Indices of nearest neighbors.\n \"\"\"\n n_samples = graph.shape[0]\n assert graph.format == 'csr'\n\n # number of neighbors by samples\n row_nnz = np.diff(graph.indptr)\n row_nnz_min = row_nnz.min()\n if n_neighbors is not None and row_nnz_min < n_neighbors:\n raise ValueError(\n '%d neighbors per samples are required, but some samples have only'\n ' %d neighbors in precomputed graph matrix. Decrease number of '\n 'neighbors used or recompute the graph with more neighbors.'\n % (n_neighbors, row_nnz_min))\n\n def extract(a):\n # if each sample has the same number of provided neighbors\n if row_nnz.max() == row_nnz_min:\n return a.reshape(n_samples, -1)[:, :n_neighbors]\n else:\n idx = np.tile(np.arange(n_neighbors), (n_samples, 1))\n idx += graph.indptr[:-1, None]\n return a.take(idx, mode='clip').reshape(n_samples, n_neighbors)\n\n if return_distance:\n return extract(graph.data), extract(graph.indices)\n else:\n return extract(graph.indices)" + }, + { + "id": "scikit-learn/sklearn.neighbors._base/_radius_neighbors_from_graph", + "name": "_radius_neighbors_from_graph", + "qname": "sklearn.neighbors._base._radius_neighbors_from_graph", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._base/_radius_neighbors_from_graph/graph", + "name": "graph", + "qname": "sklearn.neighbors._base._radius_neighbors_from_graph.graph", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "sparse matrix of shape (n_samples, n_samples)", + "default_value": "", + "description": "Neighbors graph as given by `kneighbors_graph` or\n`radius_neighbors_graph`. Matrix should be of format CSR format." + }, + "type": { + "kind": "NamedType", + "name": "sparse matrix of shape (n_samples, n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._base/_radius_neighbors_from_graph/radius", + "name": "radius", + "qname": "sklearn.neighbors._base._radius_neighbors_from_graph.radius", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Radius of neighborhoods which should be strictly positive." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._base/_radius_neighbors_from_graph/return_distance", + "name": "return_distance", + "qname": "sklearn.neighbors._base._radius_neighbors_from_graph.return_distance", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "Whether or not to return the distances." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Decompose a nearest neighbors sparse graph into distances and indices", + "docstring": "Decompose a nearest neighbors sparse graph into distances and indices\n\nParameters\n----------\ngraph : sparse matrix of shape (n_samples, n_samples)\n Neighbors graph as given by `kneighbors_graph` or\n `radius_neighbors_graph`. Matrix should be of format CSR format.\n\nradius : float\n Radius of neighborhoods which should be strictly positive.\n\nreturn_distance : bool\n Whether or not to return the distances.\n\nReturns\n-------\nneigh_dist : ndarray of shape (n_samples,) of arrays\n Distances to nearest neighbors. Only present if `return_distance=True`.\n\nneigh_ind : ndarray of shape (n_samples,) of arrays\n Indices of nearest neighbors.", + "code": "def _radius_neighbors_from_graph(graph, radius, return_distance):\n \"\"\"Decompose a nearest neighbors sparse graph into distances and indices\n\n Parameters\n ----------\n graph : sparse matrix of shape (n_samples, n_samples)\n Neighbors graph as given by `kneighbors_graph` or\n `radius_neighbors_graph`. Matrix should be of format CSR format.\n\n radius : float\n Radius of neighborhoods which should be strictly positive.\n\n return_distance : bool\n Whether or not to return the distances.\n\n Returns\n -------\n neigh_dist : ndarray of shape (n_samples,) of arrays\n Distances to nearest neighbors. Only present if `return_distance=True`.\n\n neigh_ind : ndarray of shape (n_samples,) of arrays\n Indices of nearest neighbors.\n \"\"\"\n assert graph.format == 'csr'\n\n no_filter_needed = bool(graph.data.max() <= radius)\n\n if no_filter_needed:\n data, indices, indptr = graph.data, graph.indices, graph.indptr\n else:\n mask = graph.data <= radius\n if return_distance:\n data = np.compress(mask, graph.data)\n indices = np.compress(mask, graph.indices)\n indptr = np.concatenate(([0], np.cumsum(mask)))[graph.indptr]\n\n indices = indices.astype(np.intp, copy=no_filter_needed)\n\n if return_distance:\n neigh_dist = _to_object_array(np.split(data, indptr[1:-1]))\n neigh_ind = _to_object_array(np.split(indices, indptr[1:-1]))\n\n if return_distance:\n return neigh_dist, neigh_ind\n else:\n return neigh_ind" + }, + { + "id": "scikit-learn/sklearn.neighbors._base/_tree_query_parallel_helper", + "name": "_tree_query_parallel_helper", + "qname": "sklearn.neighbors._base._tree_query_parallel_helper", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._base/_tree_query_parallel_helper/tree", + "name": "tree", + "qname": "sklearn.neighbors._base._tree_query_parallel_helper.tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._base/_tree_query_parallel_helper/args", + "name": "args", + "qname": "sklearn.neighbors._base._tree_query_parallel_helper.args", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._base/_tree_query_parallel_helper/kwargs", + "name": "kwargs", + "qname": "sklearn.neighbors._base._tree_query_parallel_helper.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Helper for the Parallel calls in KNeighborsMixin.kneighbors\n\nThe Cython method tree.query is not directly picklable by cloudpickle\nunder PyPy.", + "docstring": "Helper for the Parallel calls in KNeighborsMixin.kneighbors\n\nThe Cython method tree.query is not directly picklable by cloudpickle\nunder PyPy.", + "code": "def _tree_query_parallel_helper(tree, *args, **kwargs):\n \"\"\"Helper for the Parallel calls in KNeighborsMixin.kneighbors\n\n The Cython method tree.query is not directly picklable by cloudpickle\n under PyPy.\n \"\"\"\n return tree.query(*args, **kwargs)" + }, + { + "id": "scikit-learn/sklearn.neighbors._base/_tree_query_radius_parallel_helper", + "name": "_tree_query_radius_parallel_helper", + "qname": "sklearn.neighbors._base._tree_query_radius_parallel_helper", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._base/_tree_query_radius_parallel_helper/tree", + "name": "tree", + "qname": "sklearn.neighbors._base._tree_query_radius_parallel_helper.tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._base/_tree_query_radius_parallel_helper/args", + "name": "args", + "qname": "sklearn.neighbors._base._tree_query_radius_parallel_helper.args", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._base/_tree_query_radius_parallel_helper/kwargs", + "name": "kwargs", + "qname": "sklearn.neighbors._base._tree_query_radius_parallel_helper.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Helper for the Parallel calls in RadiusNeighborsMixin.radius_neighbors\n\nThe Cython method tree.query_radius is not directly picklable by\ncloudpickle under PyPy.", + "docstring": "Helper for the Parallel calls in RadiusNeighborsMixin.radius_neighbors\n\nThe Cython method tree.query_radius is not directly picklable by\ncloudpickle under PyPy.", + "code": "def _tree_query_radius_parallel_helper(tree, *args, **kwargs):\n \"\"\"Helper for the Parallel calls in RadiusNeighborsMixin.radius_neighbors\n\n The Cython method tree.query_radius is not directly picklable by\n cloudpickle under PyPy.\n \"\"\"\n return tree.query_radius(*args, **kwargs)" + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/__init__", + "name": "__init__", + "qname": "sklearn.neighbors._classification.KNeighborsClassifier.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/__init__/self", + "name": "self", + "qname": "sklearn.neighbors._classification.KNeighborsClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/__init__/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.neighbors._classification.KNeighborsClassifier.__init__.n_neighbors", + "default_value": "5", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "Number of neighbors to use by default for :meth:`kneighbors` queries." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/__init__/weights", + "name": "weights", + "qname": "sklearn.neighbors._classification.KNeighborsClassifier.__init__.weights", + "default_value": "'uniform'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'uniform', 'distance'} or callable", + "default_value": "'uniform'", + "description": "weight function used in prediction. Possible values:\n\n- 'uniform' : uniform weights. All points in each neighborhood\n are weighted equally.\n- 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n- [callable] : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["distance", "uniform"] + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/__init__/algorithm", + "name": "algorithm", + "qname": "sklearn.neighbors._classification.KNeighborsClassifier.__init__.algorithm", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'ball_tree', 'kd_tree', 'brute'}", + "default_value": "'auto'", + "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force." + }, + "type": { + "kind": "EnumType", + "values": ["kd_tree", "auto", "brute", "ball_tree"] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/__init__/leaf_size", + "name": "leaf_size", + "qname": "sklearn.neighbors._classification.KNeighborsClassifier.__init__.leaf_size", + "default_value": "30", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "30", + "description": "Leaf size passed to BallTree or KDTree. This can affect the\nspeed of the construction and query, as well as the memory\nrequired to store the tree. The optimal value depends on the\nnature of the problem." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/__init__/p", + "name": "p", + "qname": "sklearn.neighbors._classification.KNeighborsClassifier.__init__.p", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Power parameter for the Minkowski metric. When p = 1, this is\nequivalent to using manhattan_distance (l1), and euclidean_distance\n(l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/__init__/metric", + "name": "metric", + "qname": "sklearn.neighbors._classification.KNeighborsClassifier.__init__.metric", + "default_value": "'minkowski'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "'minkowski'", + "description": "the distance metric to use for the tree. The default metric is\nminkowski, and with p=2 is equivalent to the standard Euclidean\nmetric. See the documentation of :class:`DistanceMetric` for a\nlist of available metrics.\nIf metric is \"precomputed\", X is assumed to be a distance matrix and\nmust be square during fit. X may be a :term:`sparse graph`,\nin which case only \"nonzero\" elements may be considered neighbors." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/__init__/metric_params", + "name": "metric_params", + "qname": "sklearn.neighbors._classification.KNeighborsClassifier.__init__.metric_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Additional keyword arguments for the metric function." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.neighbors._classification.KNeighborsClassifier.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of parallel jobs to run for neighbors search.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details.\nDoesn't affect :meth:`fit` method." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/__init__/kwargs", + "name": "kwargs", + "qname": "sklearn.neighbors._classification.KNeighborsClassifier.__init__.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Classifier implementing the k-nearest neighbors vote.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_neighbors=5, *,\n weights='uniform', algorithm='auto', leaf_size=30,\n p=2, metric='minkowski', metric_params=None, n_jobs=None,\n **kwargs):\n super().__init__(\n n_neighbors=n_neighbors,\n algorithm=algorithm,\n leaf_size=leaf_size, metric=metric, p=p,\n metric_params=metric_params,\n n_jobs=n_jobs, **kwargs)\n self.weights = _check_weights(weights)" + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/fit", + "name": "fit", + "qname": "sklearn.neighbors._classification.KNeighborsClassifier.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/fit/self", + "name": "self", + "qname": "sklearn.neighbors._classification.KNeighborsClassifier.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/fit/X", + "name": "X", + "qname": "sklearn.neighbors._classification.KNeighborsClassifier.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/fit/y", + "name": "y", + "qname": "sklearn.neighbors._classification.KNeighborsClassifier.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples,) or (n_samples, n_outputs)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the k-nearest neighbors classifier from the training dataset.", + "docstring": "Fit the k-nearest neighbors classifier from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'\n Training data.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\nReturns\n-------\nself : KNeighborsClassifier\n The fitted k-nearest neighbors classifier.", + "code": " def fit(self, X, y):\n \"\"\"Fit the k-nearest neighbors classifier from the training dataset.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n (n_samples, n_samples) if metric='precomputed'\n Training data.\n\n y : {array-like, sparse matrix} of shape (n_samples,) or \\\n (n_samples, n_outputs)\n Target values.\n\n Returns\n -------\n self : KNeighborsClassifier\n The fitted k-nearest neighbors classifier.\n \"\"\"\n return self._fit(X, y)" + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/predict", + "name": "predict", + "qname": "sklearn.neighbors._classification.KNeighborsClassifier.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/predict/self", + "name": "self", + "qname": "sklearn.neighbors._classification.KNeighborsClassifier.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/predict/X", + "name": "X", + "qname": "sklearn.neighbors._classification.KNeighborsClassifier.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'", + "default_value": "", + "description": "Test samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_queries, n_features)" + }, + { + "kind": "NamedType", + "name": "(n_queries, n_indexed) if metric == 'precomputed'" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict the class labels for the provided data.", + "docstring": "Predict the class labels for the provided data.\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\nReturns\n-------\ny : ndarray of shape (n_queries,) or (n_queries, n_outputs)\n Class labels for each data sample.", + "code": " def predict(self, X):\n \"\"\"Predict the class labels for the provided data.\n\n Parameters\n ----------\n X : array-like of shape (n_queries, n_features), \\\n or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\n Returns\n -------\n y : ndarray of shape (n_queries,) or (n_queries, n_outputs)\n Class labels for each data sample.\n \"\"\"\n X = check_array(X, accept_sparse='csr')\n\n neigh_dist, neigh_ind = self.kneighbors(X)\n classes_ = self.classes_\n _y = self._y\n if not self.outputs_2d_:\n _y = self._y.reshape((-1, 1))\n classes_ = [self.classes_]\n\n n_outputs = len(classes_)\n n_queries = _num_samples(X)\n weights = _get_weights(neigh_dist, self.weights)\n\n y_pred = np.empty((n_queries, n_outputs), dtype=classes_[0].dtype)\n for k, classes_k in enumerate(classes_):\n if weights is None:\n mode, _ = stats.mode(_y[neigh_ind, k], axis=1)\n else:\n mode, _ = weighted_mode(_y[neigh_ind, k], weights, axis=1)\n\n mode = np.asarray(mode.ravel(), dtype=np.intp)\n y_pred[:, k] = classes_k.take(mode)\n\n if not self.outputs_2d_:\n y_pred = y_pred.ravel()\n\n return y_pred" + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/predict_proba", + "name": "predict_proba", + "qname": "sklearn.neighbors._classification.KNeighborsClassifier.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/predict_proba/self", + "name": "self", + "qname": "sklearn.neighbors._classification.KNeighborsClassifier.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/KNeighborsClassifier/predict_proba/X", + "name": "X", + "qname": "sklearn.neighbors._classification.KNeighborsClassifier.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'", + "default_value": "", + "description": "Test samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_queries, n_features)" + }, + { + "kind": "NamedType", + "name": "(n_queries, n_indexed) if metric == 'precomputed'" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return probability estimates for the test data X.", + "docstring": "Return probability estimates for the test data X.\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\nReturns\n-------\np : ndarray of shape (n_queries, n_classes), or a list of n_outputs\n of such arrays if n_outputs > 1.\n The class probabilities of the input samples. Classes are ordered\n by lexicographic order.", + "code": " def predict_proba(self, X):\n \"\"\"Return probability estimates for the test data X.\n\n Parameters\n ----------\n X : array-like of shape (n_queries, n_features), \\\n or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\n Returns\n -------\n p : ndarray of shape (n_queries, n_classes), or a list of n_outputs\n of such arrays if n_outputs > 1.\n The class probabilities of the input samples. Classes are ordered\n by lexicographic order.\n \"\"\"\n X = check_array(X, accept_sparse='csr')\n\n neigh_dist, neigh_ind = self.kneighbors(X)\n\n classes_ = self.classes_\n _y = self._y\n if not self.outputs_2d_:\n _y = self._y.reshape((-1, 1))\n classes_ = [self.classes_]\n\n n_queries = _num_samples(X)\n\n weights = _get_weights(neigh_dist, self.weights)\n if weights is None:\n weights = np.ones_like(neigh_ind)\n\n all_rows = np.arange(X.shape[0])\n probabilities = []\n for k, classes_k in enumerate(classes_):\n pred_labels = _y[:, k][neigh_ind]\n proba_k = np.zeros((n_queries, classes_k.size))\n\n # a simple ':' index doesn't work right\n for i, idx in enumerate(pred_labels.T): # loop is O(n_neighbors)\n proba_k[all_rows, idx] += weights[:, i]\n\n # normalize 'votes' into real [0,1] probabilities\n normalizer = proba_k.sum(axis=1)[:, np.newaxis]\n normalizer[normalizer == 0.0] = 1.0\n proba_k /= normalizer\n\n probabilities.append(proba_k)\n\n if not self.outputs_2d_:\n probabilities = probabilities[0]\n\n return probabilities" + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/__init__", + "name": "__init__", + "qname": "sklearn.neighbors._classification.RadiusNeighborsClassifier.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/__init__/self", + "name": "self", + "qname": "sklearn.neighbors._classification.RadiusNeighborsClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/__init__/radius", + "name": "radius", + "qname": "sklearn.neighbors._classification.RadiusNeighborsClassifier.__init__.radius", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Range of parameter space to use by default for :meth:`radius_neighbors`\nqueries." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/__init__/weights", + "name": "weights", + "qname": "sklearn.neighbors._classification.RadiusNeighborsClassifier.__init__.weights", + "default_value": "'uniform'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'uniform', 'distance'} or callable", + "default_value": "'uniform'", + "description": "weight function used in prediction. Possible values:\n\n- 'uniform' : uniform weights. All points in each neighborhood\n are weighted equally.\n- 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n- [callable] : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights.\n\nUniform weights are used by default." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["distance", "uniform"] + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/__init__/algorithm", + "name": "algorithm", + "qname": "sklearn.neighbors._classification.RadiusNeighborsClassifier.__init__.algorithm", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'ball_tree', 'kd_tree', 'brute'}", + "default_value": "'auto'", + "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force." + }, + "type": { + "kind": "EnumType", + "values": ["kd_tree", "auto", "brute", "ball_tree"] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/__init__/leaf_size", + "name": "leaf_size", + "qname": "sklearn.neighbors._classification.RadiusNeighborsClassifier.__init__.leaf_size", + "default_value": "30", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "30", + "description": "Leaf size passed to BallTree or KDTree. This can affect the\nspeed of the construction and query, as well as the memory\nrequired to store the tree. The optimal value depends on the\nnature of the problem." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/__init__/p", + "name": "p", + "qname": "sklearn.neighbors._classification.RadiusNeighborsClassifier.__init__.p", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Power parameter for the Minkowski metric. When p = 1, this is\nequivalent to using manhattan_distance (l1), and euclidean_distance\n(l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/__init__/metric", + "name": "metric", + "qname": "sklearn.neighbors._classification.RadiusNeighborsClassifier.__init__.metric", + "default_value": "'minkowski'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "'minkowski'", + "description": "the distance metric to use for the tree. The default metric is\nminkowski, and with p=2 is equivalent to the standard Euclidean\nmetric. See the documentation of :class:`DistanceMetric` for a\nlist of available metrics.\nIf metric is \"precomputed\", X is assumed to be a distance matrix and\nmust be square during fit. X may be a :term:`sparse graph`,\nin which case only \"nonzero\" elements may be considered neighbors." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/__init__/outlier_label", + "name": "outlier_label", + "qname": "sklearn.neighbors._classification.RadiusNeighborsClassifier.__init__.outlier_label", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{manual label, 'most_frequent'}", + "default_value": "None", + "description": "label for outlier samples (samples with no neighbors in given radius).\n\n- manual label: str or int label (should be the same type as y)\n or list of manual labels if multi-output is used.\n- 'most_frequent' : assign the most frequent label of y to outliers.\n- None : when any outlier is detected, ValueError will be raised." + }, + "type": { + "kind": "EnumType", + "values": ["most_frequent"] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/__init__/metric_params", + "name": "metric_params", + "qname": "sklearn.neighbors._classification.RadiusNeighborsClassifier.__init__.metric_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Additional keyword arguments for the metric function." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.neighbors._classification.RadiusNeighborsClassifier.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of parallel jobs to run for neighbors search.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/__init__/kwargs", + "name": "kwargs", + "qname": "sklearn.neighbors._classification.RadiusNeighborsClassifier.__init__.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Classifier implementing a vote among neighbors within a given radius\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, radius=1.0, *, weights='uniform',\n algorithm='auto', leaf_size=30, p=2, metric='minkowski',\n outlier_label=None, metric_params=None, n_jobs=None,\n **kwargs):\n super().__init__(\n radius=radius,\n algorithm=algorithm,\n leaf_size=leaf_size,\n metric=metric, p=p, metric_params=metric_params,\n n_jobs=n_jobs, **kwargs)\n self.weights = _check_weights(weights)\n self.outlier_label = outlier_label" + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/fit", + "name": "fit", + "qname": "sklearn.neighbors._classification.RadiusNeighborsClassifier.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/fit/self", + "name": "self", + "qname": "sklearn.neighbors._classification.RadiusNeighborsClassifier.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/fit/X", + "name": "X", + "qname": "sklearn.neighbors._classification.RadiusNeighborsClassifier.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/fit/y", + "name": "y", + "qname": "sklearn.neighbors._classification.RadiusNeighborsClassifier.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples,) or (n_samples, n_outputs)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the radius neighbors classifier from the training dataset.", + "docstring": "Fit the radius neighbors classifier from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'\n Training data.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\nReturns\n-------\nself : RadiusNeighborsClassifier\n The fitted radius neighbors classifier.", + "code": " def fit(self, X, y):\n \"\"\"Fit the radius neighbors classifier from the training dataset.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n (n_samples, n_samples) if metric='precomputed'\n Training data.\n\n y : {array-like, sparse matrix} of shape (n_samples,) or \\\n (n_samples, n_outputs)\n Target values.\n\n Returns\n -------\n self : RadiusNeighborsClassifier\n The fitted radius neighbors classifier.\n \"\"\"\n self._fit(X, y)\n\n classes_ = self.classes_\n _y = self._y\n if not self.outputs_2d_:\n _y = self._y.reshape((-1, 1))\n classes_ = [self.classes_]\n\n if self.outlier_label is None:\n outlier_label_ = None\n\n elif self.outlier_label == 'most_frequent':\n outlier_label_ = []\n # iterate over multi-output, get the most frequent label for each\n # output.\n for k, classes_k in enumerate(classes_):\n label_count = np.bincount(_y[:, k])\n outlier_label_.append(classes_k[label_count.argmax()])\n\n else:\n if (_is_arraylike(self.outlier_label) and\n not isinstance(self.outlier_label, str)):\n if len(self.outlier_label) != len(classes_):\n raise ValueError(\"The length of outlier_label: {} is \"\n \"inconsistent with the output \"\n \"length: {}\".format(self.outlier_label,\n len(classes_)))\n outlier_label_ = self.outlier_label\n else:\n outlier_label_ = [self.outlier_label] * len(classes_)\n\n for classes, label in zip(classes_, outlier_label_):\n if (_is_arraylike(label) and\n not isinstance(label, str)):\n # ensure the outlier lable for each output is a scalar.\n raise TypeError(\"The outlier_label of classes {} is \"\n \"supposed to be a scalar, got \"\n \"{}.\".format(classes, label))\n if np.append(classes, label).dtype != classes.dtype:\n # ensure the dtype of outlier label is consistent with y.\n raise TypeError(\"The dtype of outlier_label {} is \"\n \"inconsistent with classes {} in \"\n \"y.\".format(label, classes))\n\n self.outlier_label_ = outlier_label_\n\n return self" + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/predict", + "name": "predict", + "qname": "sklearn.neighbors._classification.RadiusNeighborsClassifier.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/predict/self", + "name": "self", + "qname": "sklearn.neighbors._classification.RadiusNeighborsClassifier.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/predict/X", + "name": "X", + "qname": "sklearn.neighbors._classification.RadiusNeighborsClassifier.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'", + "default_value": "", + "description": "Test samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_queries, n_features)" + }, + { + "kind": "NamedType", + "name": "(n_queries, n_indexed) if metric == 'precomputed'" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict the class labels for the provided data.", + "docstring": "Predict the class labels for the provided data.\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\nReturns\n-------\ny : ndarray of shape (n_queries,) or (n_queries, n_outputs)\n Class labels for each data sample.", + "code": " def predict(self, X):\n \"\"\"Predict the class labels for the provided data.\n\n Parameters\n ----------\n X : array-like of shape (n_queries, n_features), \\\n or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\n Returns\n -------\n y : ndarray of shape (n_queries,) or (n_queries, n_outputs)\n Class labels for each data sample.\n \"\"\"\n\n probs = self.predict_proba(X)\n classes_ = self.classes_\n\n if not self.outputs_2d_:\n probs = [probs]\n classes_ = [self.classes_]\n\n n_outputs = len(classes_)\n n_queries = probs[0].shape[0]\n y_pred = np.empty((n_queries, n_outputs), dtype=classes_[0].dtype)\n\n for k, prob in enumerate(probs):\n # iterate over multi-output, assign labels based on probabilities\n # of each output.\n max_prob_index = prob.argmax(axis=1)\n y_pred[:, k] = classes_[k].take(max_prob_index)\n\n outlier_zero_probs = (prob == 0).all(axis=1)\n if outlier_zero_probs.any():\n zero_prob_index = np.flatnonzero(outlier_zero_probs)\n y_pred[zero_prob_index, k] = self.outlier_label_[k]\n\n if not self.outputs_2d_:\n y_pred = y_pred.ravel()\n\n return y_pred" + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/predict_proba", + "name": "predict_proba", + "qname": "sklearn.neighbors._classification.RadiusNeighborsClassifier.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/predict_proba/self", + "name": "self", + "qname": "sklearn.neighbors._classification.RadiusNeighborsClassifier.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._classification/RadiusNeighborsClassifier/predict_proba/X", + "name": "X", + "qname": "sklearn.neighbors._classification.RadiusNeighborsClassifier.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'", + "default_value": "", + "description": "Test samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_queries, n_features)" + }, + { + "kind": "NamedType", + "name": "(n_queries, n_indexed) if metric == 'precomputed'" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return probability estimates for the test data X.", + "docstring": "Return probability estimates for the test data X.\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\nReturns\n-------\np : ndarray of shape (n_queries, n_classes), or a list of n_outputs\n of such arrays if n_outputs > 1.\n The class probabilities of the input samples. Classes are ordered\n by lexicographic order.", + "code": " def predict_proba(self, X):\n \"\"\"Return probability estimates for the test data X.\n\n Parameters\n ----------\n X : array-like of shape (n_queries, n_features), \\\n or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\n Returns\n -------\n p : ndarray of shape (n_queries, n_classes), or a list of n_outputs\n of such arrays if n_outputs > 1.\n The class probabilities of the input samples. Classes are ordered\n by lexicographic order.\n \"\"\"\n\n X = check_array(X, accept_sparse='csr')\n n_queries = _num_samples(X)\n\n neigh_dist, neigh_ind = self.radius_neighbors(X)\n outlier_mask = np.zeros(n_queries, dtype=bool)\n outlier_mask[:] = [len(nind) == 0 for nind in neigh_ind]\n outliers = np.flatnonzero(outlier_mask)\n inliers = np.flatnonzero(~outlier_mask)\n\n classes_ = self.classes_\n _y = self._y\n if not self.outputs_2d_:\n _y = self._y.reshape((-1, 1))\n classes_ = [self.classes_]\n\n if self.outlier_label_ is None and outliers.size > 0:\n raise ValueError('No neighbors found for test samples %r, '\n 'you can try using larger radius, '\n 'giving a label for outliers, '\n 'or considering removing them from your dataset.'\n % outliers)\n\n weights = _get_weights(neigh_dist, self.weights)\n if weights is not None:\n weights = weights[inliers]\n\n probabilities = []\n # iterate over multi-output, measure probabilities of the k-th output.\n for k, classes_k in enumerate(classes_):\n pred_labels = np.zeros(len(neigh_ind), dtype=object)\n pred_labels[:] = [_y[ind, k] for ind in neigh_ind]\n\n proba_k = np.zeros((n_queries, classes_k.size))\n proba_inl = np.zeros((len(inliers), classes_k.size))\n\n # samples have different size of neighbors within the same radius\n if weights is None:\n for i, idx in enumerate(pred_labels[inliers]):\n proba_inl[i, :] = np.bincount(idx,\n minlength=classes_k.size)\n else:\n for i, idx in enumerate(pred_labels[inliers]):\n proba_inl[i, :] = np.bincount(idx,\n weights[i],\n minlength=classes_k.size)\n proba_k[inliers, :] = proba_inl\n\n if outliers.size > 0:\n _outlier_label = self.outlier_label_[k]\n label_index = np.flatnonzero(classes_k == _outlier_label)\n if label_index.size == 1:\n proba_k[outliers, label_index[0]] = 1.0\n else:\n warnings.warn('Outlier label {} is not in training '\n 'classes. All class probabilities of '\n 'outliers will be assigned with 0.'\n ''.format(self.outlier_label_[k]))\n\n # normalize 'votes' into real [0,1] probabilities\n normalizer = proba_k.sum(axis=1)[:, np.newaxis]\n normalizer[normalizer == 0.0] = 1.0\n proba_k /= normalizer\n\n probabilities.append(proba_k)\n\n if not self.outputs_2d_:\n probabilities = probabilities[0]\n\n return probabilities" + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/__init__", + "name": "__init__", + "qname": "sklearn.neighbors._graph.KNeighborsTransformer.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/__init__/self", + "name": "self", + "qname": "sklearn.neighbors._graph.KNeighborsTransformer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/__init__/mode", + "name": "mode", + "qname": "sklearn.neighbors._graph.KNeighborsTransformer.__init__.mode", + "default_value": "'distance'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'distance', 'connectivity'}", + "default_value": "'distance'", + "description": "Type of returned matrix: 'connectivity' will return the connectivity\nmatrix with ones and zeros, and 'distance' will return the distances\nbetween neighbors according to the given metric." + }, + "type": { + "kind": "EnumType", + "values": ["connectivity", "distance"] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/__init__/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.neighbors._graph.KNeighborsTransformer.__init__.n_neighbors", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "Number of neighbors for each sample in the transformed sparse graph.\nFor compatibility reasons, as each sample is considered as its own\nneighbor, one extra neighbor will be computed when mode == 'distance'.\nIn this case, the sparse graph contains (n_neighbors + 1) neighbors." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/__init__/algorithm", + "name": "algorithm", + "qname": "sklearn.neighbors._graph.KNeighborsTransformer.__init__.algorithm", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'ball_tree', 'kd_tree', 'brute'}", + "default_value": "'auto'", + "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force." + }, + "type": { + "kind": "EnumType", + "values": ["kd_tree", "auto", "brute", "ball_tree"] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/__init__/leaf_size", + "name": "leaf_size", + "qname": "sklearn.neighbors._graph.KNeighborsTransformer.__init__.leaf_size", + "default_value": "30", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "30", + "description": "Leaf size passed to BallTree or KDTree. This can affect the\nspeed of the construction and query, as well as the memory\nrequired to store the tree. The optimal value depends on the\nnature of the problem." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/__init__/metric", + "name": "metric", + "qname": "sklearn.neighbors._graph.KNeighborsTransformer.__init__.metric", + "default_value": "'minkowski'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "'minkowski'", + "description": "metric to use for distance computation. Any metric from scikit-learn\nor scipy.spatial.distance can be used.\n\nIf metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays as input and return one value indicating the\ndistance between them. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string.\n\nDistance matrices are not supported.\n\nValid values for metric are:\n\n- from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n- from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\nSee the documentation for scipy.spatial.distance for details on these\nmetrics." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/__init__/p", + "name": "p", + "qname": "sklearn.neighbors._graph.KNeighborsTransformer.__init__.p", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Parameter for the Minkowski metric from\nsklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\nequivalent to using manhattan_distance (l1), and euclidean_distance\n(l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/__init__/metric_params", + "name": "metric_params", + "qname": "sklearn.neighbors._graph.KNeighborsTransformer.__init__.metric_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Additional keyword arguments for the metric function." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.neighbors._graph.KNeighborsTransformer.__init__.n_jobs", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "The number of parallel jobs to run for neighbors search.\nIf ``-1``, then the number of jobs is set to the number of CPU cores." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform X into a (weighted) graph of k nearest neighbors\n\nThe transformed data is a sparse graph as returned by kneighbors_graph.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, mode='distance', n_neighbors=5, algorithm='auto',\n leaf_size=30, metric='minkowski', p=2, metric_params=None,\n n_jobs=1):\n super(KNeighborsTransformer, self).__init__(\n n_neighbors=n_neighbors, radius=None, algorithm=algorithm,\n leaf_size=leaf_size, metric=metric, p=p,\n metric_params=metric_params, n_jobs=n_jobs)\n self.mode = mode" + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/_more_tags", + "name": "_more_tags", + "qname": "sklearn.neighbors._graph.KNeighborsTransformer._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/_more_tags/self", + "name": "self", + "qname": "sklearn.neighbors._graph.KNeighborsTransformer._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_methods_sample_order_invariance':\n 'check is not applicable.'\n }\n }" + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/fit", + "name": "fit", + "qname": "sklearn.neighbors._graph.KNeighborsTransformer.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/fit/self", + "name": "self", + "qname": "sklearn.neighbors._graph.KNeighborsTransformer.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/fit/X", + "name": "X", + "qname": "sklearn.neighbors._graph.KNeighborsTransformer.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/fit/y", + "name": "y", + "qname": "sklearn.neighbors._graph.KNeighborsTransformer.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the k-nearest neighbors transformer from the training dataset.", + "docstring": "Fit the k-nearest neighbors transformer from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'\n Training data.\n\nReturns\n-------\nself : KNeighborsTransformer\n The fitted k-nearest neighbors transformer.", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the k-nearest neighbors transformer from the training dataset.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n (n_samples, n_samples) if metric='precomputed'\n Training data.\n\n Returns\n -------\n self : KNeighborsTransformer\n The fitted k-nearest neighbors transformer.\n \"\"\"\n return self._fit(X)" + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/fit_transform", + "name": "fit_transform", + "qname": "sklearn.neighbors._graph.KNeighborsTransformer.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/fit_transform/self", + "name": "self", + "qname": "sklearn.neighbors._graph.KNeighborsTransformer.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/fit_transform/X", + "name": "X", + "qname": "sklearn.neighbors._graph.KNeighborsTransformer.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training set." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/fit_transform/y", + "name": "y", + "qname": "sklearn.neighbors._graph.KNeighborsTransformer.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit to data, then transform it.\n\nFits transformer to X and y with optional parameters fit_params\nand returns a transformed version of X.", + "docstring": "Fit to data, then transform it.\n\nFits transformer to X and y with optional parameters fit_params\nand returns a transformed version of X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training set.\n\ny : ignored\n\nReturns\n-------\nXt : sparse matrix of shape (n_samples, n_samples)\n Xt[i, j] is assigned the weight of edge that connects i to j.\n Only the neighbors have an explicit value.\n The diagonal is always explicit.\n The matrix is of CSR format.", + "code": " def fit_transform(self, X, y=None):\n \"\"\"Fit to data, then transform it.\n\n Fits transformer to X and y with optional parameters fit_params\n and returns a transformed version of X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training set.\n\n y : ignored\n\n Returns\n -------\n Xt : sparse matrix of shape (n_samples, n_samples)\n Xt[i, j] is assigned the weight of edge that connects i to j.\n Only the neighbors have an explicit value.\n The diagonal is always explicit.\n The matrix is of CSR format.\n \"\"\"\n return self.fit(X).transform(X)" + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/transform", + "name": "transform", + "qname": "sklearn.neighbors._graph.KNeighborsTransformer.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/transform/self", + "name": "self", + "qname": "sklearn.neighbors._graph.KNeighborsTransformer.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/KNeighborsTransformer/transform/X", + "name": "X", + "qname": "sklearn.neighbors._graph.KNeighborsTransformer.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples_transform, n_features)", + "default_value": "", + "description": "Sample data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples_transform, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes the (weighted) graph of Neighbors for points in X", + "docstring": "Computes the (weighted) graph of Neighbors for points in X\n\nParameters\n----------\nX : array-like of shape (n_samples_transform, n_features)\n Sample data.\n\nReturns\n-------\nXt : sparse matrix of shape (n_samples_transform, n_samples_fit)\n Xt[i, j] is assigned the weight of edge that connects i to j.\n Only the neighbors have an explicit value.\n The diagonal is always explicit.\n The matrix is of CSR format.", + "code": " def transform(self, X):\n \"\"\"Computes the (weighted) graph of Neighbors for points in X\n\n Parameters\n ----------\n X : array-like of shape (n_samples_transform, n_features)\n Sample data.\n\n Returns\n -------\n Xt : sparse matrix of shape (n_samples_transform, n_samples_fit)\n Xt[i, j] is assigned the weight of edge that connects i to j.\n Only the neighbors have an explicit value.\n The diagonal is always explicit.\n The matrix is of CSR format.\n \"\"\"\n check_is_fitted(self)\n add_one = self.mode == 'distance'\n return self.kneighbors_graph(X, mode=self.mode,\n n_neighbors=self.n_neighbors + add_one)" + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/__init__", + "name": "__init__", + "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/__init__/self", + "name": "self", + "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/__init__/mode", + "name": "mode", + "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer.__init__.mode", + "default_value": "'distance'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'distance', 'connectivity'}", + "default_value": "'distance'", + "description": "Type of returned matrix: 'connectivity' will return the connectivity\nmatrix with ones and zeros, and 'distance' will return the distances\nbetween neighbors according to the given metric." + }, + "type": { + "kind": "EnumType", + "values": ["connectivity", "distance"] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/__init__/radius", + "name": "radius", + "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer.__init__.radius", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.", + "description": "Radius of neighborhood in the transformed sparse graph." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/__init__/algorithm", + "name": "algorithm", + "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer.__init__.algorithm", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'ball_tree', 'kd_tree', 'brute'}", + "default_value": "'auto'", + "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force." + }, + "type": { + "kind": "EnumType", + "values": ["kd_tree", "auto", "brute", "ball_tree"] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/__init__/leaf_size", + "name": "leaf_size", + "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer.__init__.leaf_size", + "default_value": "30", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "30", + "description": "Leaf size passed to BallTree or KDTree. This can affect the\nspeed of the construction and query, as well as the memory\nrequired to store the tree. The optimal value depends on the\nnature of the problem." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/__init__/metric", + "name": "metric", + "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer.__init__.metric", + "default_value": "'minkowski'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "'minkowski'", + "description": "metric to use for distance computation. Any metric from scikit-learn\nor scipy.spatial.distance can be used.\n\nIf metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays as input and return one value indicating the\ndistance between them. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string.\n\nDistance matrices are not supported.\n\nValid values for metric are:\n\n- from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n- from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\nSee the documentation for scipy.spatial.distance for details on these\nmetrics." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/__init__/p", + "name": "p", + "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer.__init__.p", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Parameter for the Minkowski metric from\nsklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\nequivalent to using manhattan_distance (l1), and euclidean_distance\n(l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/__init__/metric_params", + "name": "metric_params", + "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer.__init__.metric_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Additional keyword arguments for the metric function." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer.__init__.n_jobs", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "The number of parallel jobs to run for neighbors search.\nIf ``-1``, then the number of jobs is set to the number of CPU cores." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform X into a (weighted) graph of neighbors nearer than a radius\n\nThe transformed data is a sparse graph as returned by\nradius_neighbors_graph.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.22", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, mode='distance', radius=1., algorithm='auto',\n leaf_size=30, metric='minkowski', p=2, metric_params=None,\n n_jobs=1):\n super(RadiusNeighborsTransformer, self).__init__(\n n_neighbors=None, radius=radius, algorithm=algorithm,\n leaf_size=leaf_size, metric=metric, p=p,\n metric_params=metric_params, n_jobs=n_jobs)\n self.mode = mode" + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/_more_tags", + "name": "_more_tags", + "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/_more_tags/self", + "name": "self", + "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_methods_sample_order_invariance':\n 'check is not applicable.'\n }\n }" + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/fit", + "name": "fit", + "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/fit/self", + "name": "self", + "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/fit/X", + "name": "X", + "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/fit/y", + "name": "y", + "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the radius neighbors transformer from the training dataset.", + "docstring": "Fit the radius neighbors transformer from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'\n Training data.\n\nReturns\n-------\nself : RadiusNeighborsTransformer\n The fitted radius neighbors transformer.", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the radius neighbors transformer from the training dataset.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n (n_samples, n_samples) if metric='precomputed'\n Training data.\n\n Returns\n -------\n self : RadiusNeighborsTransformer\n The fitted radius neighbors transformer.\n \"\"\"\n return self._fit(X)" + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/fit_transform", + "name": "fit_transform", + "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/fit_transform/self", + "name": "self", + "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/fit_transform/X", + "name": "X", + "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training set." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/fit_transform/y", + "name": "y", + "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ignored", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit to data, then transform it.\n\nFits transformer to X and y with optional parameters fit_params\nand returns a transformed version of X.", + "docstring": "Fit to data, then transform it.\n\nFits transformer to X and y with optional parameters fit_params\nand returns a transformed version of X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training set.\n\ny : ignored\n\nReturns\n-------\nXt : sparse matrix of shape (n_samples, n_samples)\n Xt[i, j] is assigned the weight of edge that connects i to j.\n Only the neighbors have an explicit value.\n The diagonal is always explicit.\n The matrix is of CSR format.", + "code": " def fit_transform(self, X, y=None):\n \"\"\"Fit to data, then transform it.\n\n Fits transformer to X and y with optional parameters fit_params\n and returns a transformed version of X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training set.\n\n y : ignored\n\n Returns\n -------\n Xt : sparse matrix of shape (n_samples, n_samples)\n Xt[i, j] is assigned the weight of edge that connects i to j.\n Only the neighbors have an explicit value.\n The diagonal is always explicit.\n The matrix is of CSR format.\n \"\"\"\n return self.fit(X).transform(X)" + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/transform", + "name": "transform", + "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/transform/self", + "name": "self", + "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/RadiusNeighborsTransformer/transform/X", + "name": "X", + "qname": "sklearn.neighbors._graph.RadiusNeighborsTransformer.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples_transform, n_features)", + "default_value": "", + "description": "Sample data" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples_transform, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes the (weighted) graph of Neighbors for points in X", + "docstring": "Computes the (weighted) graph of Neighbors for points in X\n\nParameters\n----------\nX : array-like of shape (n_samples_transform, n_features)\n Sample data\n\nReturns\n-------\nXt : sparse matrix of shape (n_samples_transform, n_samples_fit)\n Xt[i, j] is assigned the weight of edge that connects i to j.\n Only the neighbors have an explicit value.\n The diagonal is always explicit.\n The matrix is of CSR format.", + "code": " def transform(self, X):\n \"\"\"Computes the (weighted) graph of Neighbors for points in X\n\n Parameters\n ----------\n X : array-like of shape (n_samples_transform, n_features)\n Sample data\n\n Returns\n -------\n Xt : sparse matrix of shape (n_samples_transform, n_samples_fit)\n Xt[i, j] is assigned the weight of edge that connects i to j.\n Only the neighbors have an explicit value.\n The diagonal is always explicit.\n The matrix is of CSR format.\n \"\"\"\n check_is_fitted(self)\n return self.radius_neighbors_graph(X, mode=self.mode,\n sort_results=True)" + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/_check_params", + "name": "_check_params", + "qname": "sklearn.neighbors._graph._check_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._graph/_check_params/X", + "name": "X", + "qname": "sklearn.neighbors._graph._check_params.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/_check_params/metric", + "name": "metric", + "qname": "sklearn.neighbors._graph._check_params.metric", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/_check_params/p", + "name": "p", + "qname": "sklearn.neighbors._graph._check_params.p", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/_check_params/metric_params", + "name": "metric_params", + "qname": "sklearn.neighbors._graph._check_params.metric_params", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check the validity of the input parameters", + "docstring": "Check the validity of the input parameters", + "code": "def _check_params(X, metric, p, metric_params):\n \"\"\"Check the validity of the input parameters\"\"\"\n params = zip(['metric', 'p', 'metric_params'],\n [metric, p, metric_params])\n est_params = X.get_params()\n for param_name, func_param in params:\n if func_param != est_params[param_name]:\n raise ValueError(\n \"Got %s for %s, while the estimator has %s for \"\n \"the same parameter.\" % (\n func_param, param_name, est_params[param_name]))" + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/_query_include_self", + "name": "_query_include_self", + "qname": "sklearn.neighbors._graph._query_include_self", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._graph/_query_include_self/X", + "name": "X", + "qname": "sklearn.neighbors._graph._query_include_self.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/_query_include_self/include_self", + "name": "include_self", + "qname": "sklearn.neighbors._graph._query_include_self.include_self", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/_query_include_self/mode", + "name": "mode", + "qname": "sklearn.neighbors._graph._query_include_self.mode", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the query based on include_self param", + "docstring": "Return the query based on include_self param", + "code": "def _query_include_self(X, include_self, mode):\n \"\"\"Return the query based on include_self param\"\"\"\n if include_self == 'auto':\n include_self = mode == 'connectivity'\n\n # it does not include each sample as its own neighbors\n if not include_self:\n X = None\n\n return X" + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/kneighbors_graph", + "name": "kneighbors_graph", + "qname": "sklearn.neighbors._graph.kneighbors_graph", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._graph/kneighbors_graph/X", + "name": "X", + "qname": "sklearn.neighbors._graph.kneighbors_graph.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features) or BallTree", + "default_value": "", + "description": "Sample data, in the form of a numpy array or a precomputed\n:class:`BallTree`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + }, + { + "kind": "NamedType", + "name": "BallTree" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/kneighbors_graph/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.neighbors._graph.kneighbors_graph.n_neighbors", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of neighbors for each sample." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/kneighbors_graph/mode", + "name": "mode", + "qname": "sklearn.neighbors._graph.kneighbors_graph.mode", + "default_value": "'connectivity'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'connectivity', 'distance'}", + "default_value": "'connectivity'", + "description": "Type of returned matrix: 'connectivity' will return the connectivity\nmatrix with ones and zeros, and 'distance' will return the distances\nbetween neighbors according to the given metric." + }, + "type": { + "kind": "EnumType", + "values": ["connectivity", "distance"] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/kneighbors_graph/metric", + "name": "metric", + "qname": "sklearn.neighbors._graph.kneighbors_graph.metric", + "default_value": "'minkowski'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "'minkowski'", + "description": "The distance metric used to calculate the k-Neighbors for each sample\npoint. The DistanceMetric class gives a list of available metrics.\nThe default distance is 'euclidean' ('minkowski' metric with the p\nparam equal to 2.)" + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/kneighbors_graph/p", + "name": "p", + "qname": "sklearn.neighbors._graph.kneighbors_graph.p", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Power parameter for the Minkowski metric. When p = 1, this is\nequivalent to using manhattan_distance (l1), and euclidean_distance\n(l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/kneighbors_graph/metric_params", + "name": "metric_params", + "qname": "sklearn.neighbors._graph.kneighbors_graph.metric_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "additional keyword arguments for the metric function." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/kneighbors_graph/include_self", + "name": "include_self", + "qname": "sklearn.neighbors._graph.kneighbors_graph.include_self", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or 'auto'", + "default_value": "False", + "description": "Whether or not to mark each sample as the first nearest neighbor to\nitself. If 'auto', then True is used for mode='connectivity' and False\nfor mode='distance'." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "'auto'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/kneighbors_graph/n_jobs", + "name": "n_jobs", + "qname": "sklearn.neighbors._graph.kneighbors_graph.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of parallel jobs to run for neighbors search.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes the (weighted) graph of k-Neighbors for points in X\n\nRead more in the :ref:`User Guide `.", + "docstring": "Computes the (weighted) graph of k-Neighbors for points in X\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or BallTree\n Sample data, in the form of a numpy array or a precomputed\n :class:`BallTree`.\n\nn_neighbors : int\n Number of neighbors for each sample.\n\nmode : {'connectivity', 'distance'}, default='connectivity'\n Type of returned matrix: 'connectivity' will return the connectivity\n matrix with ones and zeros, and 'distance' will return the distances\n between neighbors according to the given metric.\n\nmetric : str, default='minkowski'\n The distance metric used to calculate the k-Neighbors for each sample\n point. The DistanceMetric class gives a list of available metrics.\n The default distance is 'euclidean' ('minkowski' metric with the p\n param equal to 2.)\n\np : int, default=2\n Power parameter for the Minkowski metric. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n additional keyword arguments for the metric function.\n\ninclude_self : bool or 'auto', default=False\n Whether or not to mark each sample as the first nearest neighbor to\n itself. If 'auto', then True is used for mode='connectivity' and False\n for mode='distance'.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nReturns\n-------\nA : sparse matrix of shape (n_samples, n_samples)\n Graph where A[i, j] is assigned the weight of edge that\n connects i to j. The matrix is of CSR format.\n\nExamples\n--------\n>>> X = [[0], [3], [1]]\n>>> from sklearn.neighbors import kneighbors_graph\n>>> A = kneighbors_graph(X, 2, mode='connectivity', include_self=True)\n>>> A.toarray()\narray([[1., 0., 1.],\n [0., 1., 1.],\n [1., 0., 1.]])\n\nSee Also\n--------\nradius_neighbors_graph", + "code": "@_deprecate_positional_args\ndef kneighbors_graph(X, n_neighbors, *, mode='connectivity',\n metric='minkowski', p=2, metric_params=None,\n include_self=False, n_jobs=None):\n \"\"\"Computes the (weighted) graph of k-Neighbors for points in X\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or BallTree\n Sample data, in the form of a numpy array or a precomputed\n :class:`BallTree`.\n\n n_neighbors : int\n Number of neighbors for each sample.\n\n mode : {'connectivity', 'distance'}, default='connectivity'\n Type of returned matrix: 'connectivity' will return the connectivity\n matrix with ones and zeros, and 'distance' will return the distances\n between neighbors according to the given metric.\n\n metric : str, default='minkowski'\n The distance metric used to calculate the k-Neighbors for each sample\n point. The DistanceMetric class gives a list of available metrics.\n The default distance is 'euclidean' ('minkowski' metric with the p\n param equal to 2.)\n\n p : int, default=2\n Power parameter for the Minkowski metric. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n metric_params : dict, default=None\n additional keyword arguments for the metric function.\n\n include_self : bool or 'auto', default=False\n Whether or not to mark each sample as the first nearest neighbor to\n itself. If 'auto', then True is used for mode='connectivity' and False\n for mode='distance'.\n\n n_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n Returns\n -------\n A : sparse matrix of shape (n_samples, n_samples)\n Graph where A[i, j] is assigned the weight of edge that\n connects i to j. The matrix is of CSR format.\n\n Examples\n --------\n >>> X = [[0], [3], [1]]\n >>> from sklearn.neighbors import kneighbors_graph\n >>> A = kneighbors_graph(X, 2, mode='connectivity', include_self=True)\n >>> A.toarray()\n array([[1., 0., 1.],\n [0., 1., 1.],\n [1., 0., 1.]])\n\n See Also\n --------\n radius_neighbors_graph\n \"\"\"\n if not isinstance(X, KNeighborsMixin):\n X = NearestNeighbors(n_neighbors=n_neighbors, metric=metric, p=p,\n metric_params=metric_params, n_jobs=n_jobs).fit(X)\n else:\n _check_params(X, metric, p, metric_params)\n\n query = _query_include_self(X._fit_X, include_self, mode)\n return X.kneighbors_graph(X=query, n_neighbors=n_neighbors, mode=mode)" + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/radius_neighbors_graph", + "name": "radius_neighbors_graph", + "qname": "sklearn.neighbors._graph.radius_neighbors_graph", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._graph/radius_neighbors_graph/X", + "name": "X", + "qname": "sklearn.neighbors._graph.radius_neighbors_graph.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features) or BallTree", + "default_value": "", + "description": "Sample data, in the form of a numpy array or a precomputed\n:class:`BallTree`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + }, + { + "kind": "NamedType", + "name": "BallTree" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/radius_neighbors_graph/radius", + "name": "radius", + "qname": "sklearn.neighbors._graph.radius_neighbors_graph.radius", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Radius of neighborhoods." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/radius_neighbors_graph/mode", + "name": "mode", + "qname": "sklearn.neighbors._graph.radius_neighbors_graph.mode", + "default_value": "'connectivity'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'connectivity', 'distance'}", + "default_value": "'connectivity'", + "description": "Type of returned matrix: 'connectivity' will return the connectivity\nmatrix with ones and zeros, and 'distance' will return the distances\nbetween neighbors according to the given metric." + }, + "type": { + "kind": "EnumType", + "values": ["connectivity", "distance"] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/radius_neighbors_graph/metric", + "name": "metric", + "qname": "sklearn.neighbors._graph.radius_neighbors_graph.metric", + "default_value": "'minkowski'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "'minkowski'", + "description": "The distance metric used to calculate the neighbors within a\ngiven radius for each sample point. The DistanceMetric class\ngives a list of available metrics. The default distance is\n'euclidean' ('minkowski' metric with the param equal to 2.)" + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/radius_neighbors_graph/p", + "name": "p", + "qname": "sklearn.neighbors._graph.radius_neighbors_graph.p", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Power parameter for the Minkowski metric. When p = 1, this is\nequivalent to using manhattan_distance (l1), and euclidean_distance\n(l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/radius_neighbors_graph/metric_params", + "name": "metric_params", + "qname": "sklearn.neighbors._graph.radius_neighbors_graph.metric_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "additional keyword arguments for the metric function." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/radius_neighbors_graph/include_self", + "name": "include_self", + "qname": "sklearn.neighbors._graph.radius_neighbors_graph.include_self", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool or 'auto'", + "default_value": "False", + "description": "Whether or not to mark each sample as the first nearest neighbor to\nitself. If 'auto', then True is used for mode='connectivity' and False\nfor mode='distance'." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "'auto'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._graph/radius_neighbors_graph/n_jobs", + "name": "n_jobs", + "qname": "sklearn.neighbors._graph.radius_neighbors_graph.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of parallel jobs to run for neighbors search.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes the (weighted) graph of Neighbors for points in X\n\nNeighborhoods are restricted the points at a distance lower than\nradius.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Computes the (weighted) graph of Neighbors for points in X\n\nNeighborhoods are restricted the points at a distance lower than\nradius.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or BallTree\n Sample data, in the form of a numpy array or a precomputed\n :class:`BallTree`.\n\nradius : float\n Radius of neighborhoods.\n\nmode : {'connectivity', 'distance'}, default='connectivity'\n Type of returned matrix: 'connectivity' will return the connectivity\n matrix with ones and zeros, and 'distance' will return the distances\n between neighbors according to the given metric.\n\nmetric : str, default='minkowski'\n The distance metric used to calculate the neighbors within a\n given radius for each sample point. The DistanceMetric class\n gives a list of available metrics. The default distance is\n 'euclidean' ('minkowski' metric with the param equal to 2.)\n\np : int, default=2\n Power parameter for the Minkowski metric. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n additional keyword arguments for the metric function.\n\ninclude_self : bool or 'auto', default=False\n Whether or not to mark each sample as the first nearest neighbor to\n itself. If 'auto', then True is used for mode='connectivity' and False\n for mode='distance'.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nReturns\n-------\nA : sparse matrix of shape (n_samples, n_samples)\n Graph where A[i, j] is assigned the weight of edge that connects\n i to j. The matrix is of CSR format.\n\nExamples\n--------\n>>> X = [[0], [3], [1]]\n>>> from sklearn.neighbors import radius_neighbors_graph\n>>> A = radius_neighbors_graph(X, 1.5, mode='connectivity',\n... include_self=True)\n>>> A.toarray()\narray([[1., 0., 1.],\n [0., 1., 0.],\n [1., 0., 1.]])\n\nSee Also\n--------\nkneighbors_graph", + "code": "@_deprecate_positional_args\ndef radius_neighbors_graph(X, radius, *, mode='connectivity',\n metric='minkowski', p=2, metric_params=None,\n include_self=False, n_jobs=None):\n \"\"\"Computes the (weighted) graph of Neighbors for points in X\n\n Neighborhoods are restricted the points at a distance lower than\n radius.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or BallTree\n Sample data, in the form of a numpy array or a precomputed\n :class:`BallTree`.\n\n radius : float\n Radius of neighborhoods.\n\n mode : {'connectivity', 'distance'}, default='connectivity'\n Type of returned matrix: 'connectivity' will return the connectivity\n matrix with ones and zeros, and 'distance' will return the distances\n between neighbors according to the given metric.\n\n metric : str, default='minkowski'\n The distance metric used to calculate the neighbors within a\n given radius for each sample point. The DistanceMetric class\n gives a list of available metrics. The default distance is\n 'euclidean' ('minkowski' metric with the param equal to 2.)\n\n p : int, default=2\n Power parameter for the Minkowski metric. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\n metric_params : dict, default=None\n additional keyword arguments for the metric function.\n\n include_self : bool or 'auto', default=False\n Whether or not to mark each sample as the first nearest neighbor to\n itself. If 'auto', then True is used for mode='connectivity' and False\n for mode='distance'.\n\n n_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n Returns\n -------\n A : sparse matrix of shape (n_samples, n_samples)\n Graph where A[i, j] is assigned the weight of edge that connects\n i to j. The matrix is of CSR format.\n\n Examples\n --------\n >>> X = [[0], [3], [1]]\n >>> from sklearn.neighbors import radius_neighbors_graph\n >>> A = radius_neighbors_graph(X, 1.5, mode='connectivity',\n ... include_self=True)\n >>> A.toarray()\n array([[1., 0., 1.],\n [0., 1., 0.],\n [1., 0., 1.]])\n\n See Also\n --------\n kneighbors_graph\n \"\"\"\n if not isinstance(X, RadiusNeighborsMixin):\n X = NearestNeighbors(radius=radius, metric=metric, p=p,\n metric_params=metric_params, n_jobs=n_jobs).fit(X)\n else:\n _check_params(X, metric, p, metric_params)\n\n query = _query_include_self(X._fit_X, include_self, mode)\n return X.radius_neighbors_graph(query, radius, mode)" + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/__init__", + "name": "__init__", + "qname": "sklearn.neighbors._kde.KernelDensity.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/__init__/self", + "name": "self", + "qname": "sklearn.neighbors._kde.KernelDensity.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/__init__/bandwidth", + "name": "bandwidth", + "qname": "sklearn.neighbors._kde.KernelDensity.__init__.bandwidth", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "The bandwidth of the kernel." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/__init__/algorithm", + "name": "algorithm", + "qname": "sklearn.neighbors._kde.KernelDensity.__init__.algorithm", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'kd_tree', 'ball_tree', 'auto'}", + "default_value": "'auto'", + "description": "The tree algorithm to use." + }, + "type": { + "kind": "EnumType", + "values": ["kd_tree", "auto", "ball_tree"] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/__init__/kernel", + "name": "kernel", + "qname": "sklearn.neighbors._kde.KernelDensity.__init__.kernel", + "default_value": "'gaussian'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'gaussian', 'tophat', 'epanechnikov', 'exponential', 'linear', 'cosine'}", + "default_value": "'gaussian'", + "description": "The kernel to use." + }, + "type": { + "kind": "EnumType", + "values": ["cosine", "tophat", "linear", "gaussian", "epanechnikov", "exponential"] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/__init__/metric", + "name": "metric", + "qname": "sklearn.neighbors._kde.KernelDensity.__init__.metric", + "default_value": "'euclidean'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "'euclidean'", + "description": "The distance metric to use. Note that not all metrics are\nvalid with all algorithms. Refer to the documentation of\n:class:`BallTree` and :class:`KDTree` for a description of\navailable algorithms. Note that the normalization of the density\noutput is correct only for the Euclidean distance metric. Default\nis 'euclidean'." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/__init__/atol", + "name": "atol", + "qname": "sklearn.neighbors._kde.KernelDensity.__init__.atol", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0", + "description": "The desired absolute tolerance of the result. A larger tolerance will\ngenerally lead to faster execution." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/__init__/rtol", + "name": "rtol", + "qname": "sklearn.neighbors._kde.KernelDensity.__init__.rtol", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0", + "description": "The desired relative tolerance of the result. A larger tolerance will\ngenerally lead to faster execution." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/__init__/breadth_first", + "name": "breadth_first", + "qname": "sklearn.neighbors._kde.KernelDensity.__init__.breadth_first", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If true (default), use a breadth-first approach to the problem.\nOtherwise use a depth-first approach." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/__init__/leaf_size", + "name": "leaf_size", + "qname": "sklearn.neighbors._kde.KernelDensity.__init__.leaf_size", + "default_value": "40", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "40", + "description": "Specify the leaf size of the underlying tree. See :class:`BallTree`\nor :class:`KDTree` for details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/__init__/metric_params", + "name": "metric_params", + "qname": "sklearn.neighbors._kde.KernelDensity.__init__.metric_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Additional parameters to be passed to the tree for use with the\nmetric. For more information, see the documentation of\n:class:`BallTree` or :class:`KDTree`." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Kernel Density Estimation.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, bandwidth=1.0, algorithm='auto',\n kernel='gaussian', metric=\"euclidean\", atol=0, rtol=0,\n breadth_first=True, leaf_size=40, metric_params=None):\n self.algorithm = algorithm\n self.bandwidth = bandwidth\n self.kernel = kernel\n self.metric = metric\n self.atol = atol\n self.rtol = rtol\n self.breadth_first = breadth_first\n self.leaf_size = leaf_size\n self.metric_params = metric_params\n\n # run the choose algorithm code so that exceptions will happen here\n # we're using clone() in the GenerativeBayes classifier,\n # so we can't do this kind of logic in __init__\n self._choose_algorithm(self.algorithm, self.metric)\n\n if bandwidth <= 0:\n raise ValueError(\"bandwidth must be positive\")\n if kernel not in VALID_KERNELS:\n raise ValueError(\"invalid kernel: '{0}'\".format(kernel))" + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/_choose_algorithm", + "name": "_choose_algorithm", + "qname": "sklearn.neighbors._kde.KernelDensity._choose_algorithm", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/_choose_algorithm/self", + "name": "self", + "qname": "sklearn.neighbors._kde.KernelDensity._choose_algorithm.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/_choose_algorithm/algorithm", + "name": "algorithm", + "qname": "sklearn.neighbors._kde.KernelDensity._choose_algorithm.algorithm", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/_choose_algorithm/metric", + "name": "metric", + "qname": "sklearn.neighbors._kde.KernelDensity._choose_algorithm.metric", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _choose_algorithm(self, algorithm, metric):\n # given the algorithm string + metric string, choose the optimal\n # algorithm to compute the result.\n if algorithm == 'auto':\n # use KD Tree if possible\n if metric in KDTree.valid_metrics:\n return 'kd_tree'\n elif metric in BallTree.valid_metrics:\n return 'ball_tree'\n else:\n raise ValueError(\"invalid metric: '{0}'\".format(metric))\n elif algorithm in TREE_DICT:\n if metric not in TREE_DICT[algorithm].valid_metrics:\n raise ValueError(\"invalid metric for {0}: \"\n \"'{1}'\".format(TREE_DICT[algorithm],\n metric))\n return algorithm\n else:\n raise ValueError(\"invalid algorithm: '{0}'\".format(algorithm))" + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/_more_tags", + "name": "_more_tags", + "qname": "sklearn.neighbors._kde.KernelDensity._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/_more_tags/self", + "name": "self", + "qname": "sklearn.neighbors._kde.KernelDensity._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'sample_weight must have positive values',\n }\n }" + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/fit", + "name": "fit", + "qname": "sklearn.neighbors._kde.KernelDensity.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/fit/self", + "name": "self", + "qname": "sklearn.neighbors._kde.KernelDensity.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/fit/X", + "name": "X", + "qname": "sklearn.neighbors._kde.KernelDensity.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "List of n_features-dimensional data points. Each row\ncorresponds to a single data point." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/fit/y", + "name": "y", + "qname": "sklearn.neighbors._kde.KernelDensity.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None", + "default_value": "", + "description": "Ignored. This parameter exists only for compatibility with\n:class:`~sklearn.pipeline.Pipeline`." + }, + "type": { + "kind": "NamedType", + "name": "None" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.neighbors._kde.KernelDensity.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "List of sample weights attached to the data X.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the Kernel Density model on the data.", + "docstring": "Fit the Kernel Density model on the data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\ny : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\nsample_weight : array-like of shape (n_samples,), default=None\n List of sample weights attached to the data X.\n\n .. versionadded:: 0.20\n\nReturns\n-------\nself : object\n Returns instance of object.", + "code": " def fit(self, X, y=None, sample_weight=None):\n \"\"\"Fit the Kernel Density model on the data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\n y : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\n sample_weight : array-like of shape (n_samples,), default=None\n List of sample weights attached to the data X.\n\n .. versionadded:: 0.20\n\n Returns\n -------\n self : object\n Returns instance of object.\n \"\"\"\n algorithm = self._choose_algorithm(self.algorithm, self.metric)\n X = self._validate_data(X, order='C', dtype=DTYPE)\n\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X, DTYPE)\n if sample_weight.min() <= 0:\n raise ValueError(\"sample_weight must have positive values\")\n\n kwargs = self.metric_params\n if kwargs is None:\n kwargs = {}\n self.tree_ = TREE_DICT[algorithm](X, metric=self.metric,\n leaf_size=self.leaf_size,\n sample_weight=sample_weight,\n **kwargs)\n return self" + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/sample", + "name": "sample", + "qname": "sklearn.neighbors._kde.KernelDensity.sample", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/sample/self", + "name": "self", + "qname": "sklearn.neighbors._kde.KernelDensity.sample.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/sample/n_samples", + "name": "n_samples", + "qname": "sklearn.neighbors._kde.KernelDensity.sample.n_samples", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "Number of samples to generate." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/sample/random_state", + "name": "random_state", + "qname": "sklearn.neighbors._kde.KernelDensity.sample.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation used to generate\nrandom samples. Pass an int for reproducible results\nacross multiple function calls.\nSee :term: `Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate random samples from the model.\n\nCurrently, this is implemented only for gaussian and tophat kernels.", + "docstring": "Generate random samples from the model.\n\nCurrently, this is implemented only for gaussian and tophat kernels.\n\nParameters\n----------\nn_samples : int, default=1\n Number of samples to generate.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation used to generate\n random samples. Pass an int for reproducible results\n across multiple function calls.\n See :term: `Glossary `.\n\nReturns\n-------\nX : array-like of shape (n_samples, n_features)\n List of samples.", + "code": " def sample(self, n_samples=1, random_state=None):\n \"\"\"Generate random samples from the model.\n\n Currently, this is implemented only for gaussian and tophat kernels.\n\n Parameters\n ----------\n n_samples : int, default=1\n Number of samples to generate.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation used to generate\n random samples. Pass an int for reproducible results\n across multiple function calls.\n See :term: `Glossary `.\n\n Returns\n -------\n X : array-like of shape (n_samples, n_features)\n List of samples.\n \"\"\"\n check_is_fitted(self)\n # TODO: implement sampling for other valid kernel shapes\n if self.kernel not in ['gaussian', 'tophat']:\n raise NotImplementedError()\n\n data = np.asarray(self.tree_.data)\n\n rng = check_random_state(random_state)\n u = rng.uniform(0, 1, size=n_samples)\n if self.tree_.sample_weight is None:\n i = (u * data.shape[0]).astype(np.int64)\n else:\n cumsum_weight = np.cumsum(np.asarray(self.tree_.sample_weight))\n sum_weight = cumsum_weight[-1]\n i = np.searchsorted(cumsum_weight, u * sum_weight)\n if self.kernel == 'gaussian':\n return np.atleast_2d(rng.normal(data[i], self.bandwidth))\n\n elif self.kernel == 'tophat':\n # we first draw points from a d-dimensional normal distribution,\n # then use an incomplete gamma function to map them to a uniform\n # d-dimensional tophat distribution.\n dim = data.shape[1]\n X = rng.normal(size=(n_samples, dim))\n s_sq = row_norms(X, squared=True)\n correction = (gammainc(0.5 * dim, 0.5 * s_sq) ** (1. / dim)\n * self.bandwidth / np.sqrt(s_sq))\n return data[i] + X * correction[:, np.newaxis]" + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/score", + "name": "score", + "qname": "sklearn.neighbors._kde.KernelDensity.score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/score/self", + "name": "self", + "qname": "sklearn.neighbors._kde.KernelDensity.score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/score/X", + "name": "X", + "qname": "sklearn.neighbors._kde.KernelDensity.score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "List of n_features-dimensional data points. Each row\ncorresponds to a single data point." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/score/y", + "name": "y", + "qname": "sklearn.neighbors._kde.KernelDensity.score.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None", + "default_value": "", + "description": "Ignored. This parameter exists only for compatibility with\n:class:`~sklearn.pipeline.Pipeline`." + }, + "type": { + "kind": "NamedType", + "name": "None" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the total log probability density under the model.", + "docstring": "Compute the total log probability density under the model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\ny : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\nReturns\n-------\nlogprob : float\n Total log-likelihood of the data in X. This is normalized to be a\n probability density, so the value will be low for high-dimensional\n data.", + "code": " def score(self, X, y=None):\n \"\"\"Compute the total log probability density under the model.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n List of n_features-dimensional data points. Each row\n corresponds to a single data point.\n\n y : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\n Returns\n -------\n logprob : float\n Total log-likelihood of the data in X. This is normalized to be a\n probability density, so the value will be low for high-dimensional\n data.\n \"\"\"\n return np.sum(self.score_samples(X))" + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/score_samples", + "name": "score_samples", + "qname": "sklearn.neighbors._kde.KernelDensity.score_samples", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/score_samples/self", + "name": "self", + "qname": "sklearn.neighbors._kde.KernelDensity.score_samples.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._kde/KernelDensity/score_samples/X", + "name": "X", + "qname": "sklearn.neighbors._kde.KernelDensity.score_samples.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "An array of points to query. Last dimension should match dimension\nof training data (n_features)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Evaluate the log density model on the data.", + "docstring": "Evaluate the log density model on the data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n An array of points to query. Last dimension should match dimension\n of training data (n_features).\n\nReturns\n-------\ndensity : ndarray of shape (n_samples,)\n The array of log(density) evaluations. These are normalized to be\n probability densities, so values will be low for high-dimensional\n data.", + "code": " def score_samples(self, X):\n \"\"\"Evaluate the log density model on the data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n An array of points to query. Last dimension should match dimension\n of training data (n_features).\n\n Returns\n -------\n density : ndarray of shape (n_samples,)\n The array of log(density) evaluations. These are normalized to be\n probability densities, so values will be low for high-dimensional\n data.\n \"\"\"\n check_is_fitted(self)\n # The returned density is normalized to the number of points.\n # For it to be a probability, we must scale it. For this reason\n # we'll also scale atol.\n X = check_array(X, order='C', dtype=DTYPE)\n if self.tree_.sample_weight is None:\n N = self.tree_.data.shape[0]\n else:\n N = self.tree_.sum_weight\n atol_N = self.atol * N\n log_density = self.tree_.kernel_density(\n X, h=self.bandwidth, kernel=self.kernel, atol=atol_N,\n rtol=self.rtol, breadth_first=self.breadth_first, return_log=True)\n log_density -= np.log(N)\n return log_density" + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/__init__", + "name": "__init__", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/__init__/self", + "name": "self", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/__init__/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor.__init__.n_neighbors", + "default_value": "20", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "20", + "description": "Number of neighbors to use by default for :meth:`kneighbors` queries.\nIf n_neighbors is larger than the number of samples provided,\nall samples will be used." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/__init__/algorithm", + "name": "algorithm", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor.__init__.algorithm", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'ball_tree', 'kd_tree', 'brute'}", + "default_value": "'auto'", + "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force." + }, + "type": { + "kind": "EnumType", + "values": ["kd_tree", "auto", "brute", "ball_tree"] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/__init__/leaf_size", + "name": "leaf_size", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor.__init__.leaf_size", + "default_value": "30", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "30", + "description": "Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can\naffect the speed of the construction and query, as well as the memory\nrequired to store the tree. The optimal value depends on the\nnature of the problem." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/__init__/metric", + "name": "metric", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor.__init__.metric", + "default_value": "'minkowski'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "'minkowski'", + "description": "metric used for the distance computation. Any metric from scikit-learn\nor scipy.spatial.distance can be used.\n\nIf metric is \"precomputed\", X is assumed to be a distance matrix and\nmust be square. X may be a sparse matrix, in which case only \"nonzero\"\nelements may be considered neighbors.\n\nIf metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays as input and return one value indicating the\ndistance between them. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string.\n\nValid values for metric are:\n\n- from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n- from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\nSee the documentation for scipy.spatial.distance for details on these\nmetrics:\nhttps://docs.scipy.org/doc/scipy/reference/spatial.distance.html" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/__init__/p", + "name": "p", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor.__init__.p", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Parameter for the Minkowski metric from\n:func:`sklearn.metrics.pairwise.pairwise_distances`. When p = 1, this\nis equivalent to using manhattan_distance (l1), and euclidean_distance\n(l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/__init__/metric_params", + "name": "metric_params", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor.__init__.metric_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Additional keyword arguments for the metric function." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/__init__/contamination", + "name": "contamination", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor.__init__.contamination", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'auto' or float", + "default_value": "'auto'", + "description": "The amount of contamination of the data set, i.e. the proportion\nof outliers in the data set. When fitting this is used to define the\nthreshold on the scores of the samples.\n\n- if 'auto', the threshold is determined as in the\n original paper,\n- if a float, the contamination should be in the range [0, 0.5].\n\n.. versionchanged:: 0.22\n The default value of ``contamination`` changed from 0.1\n to ``'auto'``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "BoundaryType", + "base_type": "float", + "min": 0.0, + "max": 0.5, + "min_inclusive": true, + "max_inclusive": true + }, + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/__init__/novelty", + "name": "novelty", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor.__init__.novelty", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "By default, LocalOutlierFactor is only meant to be used for outlier\ndetection (novelty=False). Set novelty to True if you want to use\nLocalOutlierFactor for novelty detection. In this case be aware that\nyou should only use predict, decision_function and score_samples\non new unseen data and not on the training set.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of parallel jobs to run for neighbors search.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Unsupervised Outlier Detection using Local Outlier Factor (LOF)\n\nThe anomaly score of each sample is called Local Outlier Factor.\nIt measures the local deviation of density of a given sample with\nrespect to its neighbors.\nIt is local in that the anomaly score depends on how isolated the object\nis with respect to the surrounding neighborhood.\nMore precisely, locality is given by k-nearest neighbors, whose distance\nis used to estimate the local density.\nBy comparing the local density of a sample to the local densities of\nits neighbors, one can identify samples that have a substantially lower\ndensity than their neighbors. These are considered outliers.\n\n.. versionadded:: 0.19", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_neighbors=20, *, algorithm='auto', leaf_size=30,\n metric='minkowski', p=2, metric_params=None,\n contamination=\"auto\", novelty=False, n_jobs=None):\n super().__init__(\n n_neighbors=n_neighbors,\n algorithm=algorithm,\n leaf_size=leaf_size, metric=metric, p=p,\n metric_params=metric_params, n_jobs=n_jobs)\n self.contamination = contamination\n self.novelty = novelty" + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/_decision_function", + "name": "_decision_function", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor._decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/_decision_function/self", + "name": "self", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor._decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/_decision_function/X", + "name": "X", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor._decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The query sample or samples to compute the Local Outlier Factor\nw.r.t. the training samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Shifted opposite of the Local Outlier Factor of X.\n\nBigger is better, i.e. large values correspond to inliers.\n\n**Only available for novelty detection (when novelty is set to True).**\nThe shift offset allows a zero threshold for being an outlier.\nThe argument X is supposed to contain *new data*: if X contains a\npoint from training, it considers the later in its own neighborhood.\nAlso, the samples in X are not considered in the neighborhood of any\npoint.", + "docstring": "Shifted opposite of the Local Outlier Factor of X.\n\nBigger is better, i.e. large values correspond to inliers.\n\n**Only available for novelty detection (when novelty is set to True).**\nThe shift offset allows a zero threshold for being an outlier.\nThe argument X is supposed to contain *new data*: if X contains a\npoint from training, it considers the later in its own neighborhood.\nAlso, the samples in X are not considered in the neighborhood of any\npoint.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. the training samples.\n\nReturns\n-------\nshifted_opposite_lof_scores : ndarray of shape (n_samples,)\n The shifted opposite of the Local Outlier Factor of each input\n samples. The lower, the more abnormal. Negative scores represent\n outliers, positive scores represent inliers.", + "code": " def _decision_function(self, X):\n \"\"\"Shifted opposite of the Local Outlier Factor of X.\n\n Bigger is better, i.e. large values correspond to inliers.\n\n **Only available for novelty detection (when novelty is set to True).**\n The shift offset allows a zero threshold for being an outlier.\n The argument X is supposed to contain *new data*: if X contains a\n point from training, it considers the later in its own neighborhood.\n Also, the samples in X are not considered in the neighborhood of any\n point.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. the training samples.\n\n Returns\n -------\n shifted_opposite_lof_scores : ndarray of shape (n_samples,)\n The shifted opposite of the Local Outlier Factor of each input\n samples. The lower, the more abnormal. Negative scores represent\n outliers, positive scores represent inliers.\n \"\"\"\n\n return self._score_samples(X) - self.offset_" + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/_fit_predict", + "name": "_fit_predict", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor._fit_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/_fit_predict/self", + "name": "self", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor._fit_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/_fit_predict/X", + "name": "X", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor._fit_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "None", + "description": "The query sample or samples to compute the Local Outlier Factor\nw.r.t. to the training samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/_fit_predict/y", + "name": "y", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor._fit_predict.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fits the model to the training set X and returns the labels.\n\nLabel is 1 for an inlier and -1 for an outlier according to the LOF\nscore and the contamination parameter.", + "docstring": "Fits the model to the training set X and returns the labels.\n\nLabel is 1 for an inlier and -1 for an outlier according to the LOF\nscore and the contamination parameter.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features), default=None\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. to the training samples.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and 1 for inliers.", + "code": " def _fit_predict(self, X, y=None):\n \"\"\"Fits the model to the training set X and returns the labels.\n\n Label is 1 for an inlier and -1 for an outlier according to the LOF\n score and the contamination parameter.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features), default=None\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. to the training samples.\n\n Returns\n -------\n is_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and 1 for inliers.\n \"\"\"\n\n # As fit_predict would be different from fit.predict, fit_predict is\n # only available for outlier detection (novelty=False)\n\n return self.fit(X)._predict()" + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/_local_reachability_density", + "name": "_local_reachability_density", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor._local_reachability_density", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/_local_reachability_density/self", + "name": "self", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor._local_reachability_density.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/_local_reachability_density/distances_X", + "name": "distances_X", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor._local_reachability_density.distances_X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_queries, self.n_neighbors)", + "default_value": "", + "description": "Distances to the neighbors (in the training samples `self._fit_X`)\nof each query point to compute the LRD." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_queries, self.n_neighbors)" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/_local_reachability_density/neighbors_indices", + "name": "neighbors_indices", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor._local_reachability_density.neighbors_indices", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_queries, self.n_neighbors)", + "default_value": "", + "description": "Neighbors indices (of each query point) among training samples\nself._fit_X." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_queries, self.n_neighbors)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "The local reachability density (LRD)\n\nThe LRD of a sample is the inverse of the average reachability\ndistance of its k-nearest neighbors.", + "docstring": "The local reachability density (LRD)\n\nThe LRD of a sample is the inverse of the average reachability\ndistance of its k-nearest neighbors.\n\nParameters\n----------\ndistances_X : ndarray of shape (n_queries, self.n_neighbors)\n Distances to the neighbors (in the training samples `self._fit_X`)\n of each query point to compute the LRD.\n\nneighbors_indices : ndarray of shape (n_queries, self.n_neighbors)\n Neighbors indices (of each query point) among training samples\n self._fit_X.\n\nReturns\n-------\nlocal_reachability_density : ndarray of shape (n_queries,)\n The local reachability density of each sample.", + "code": " def _local_reachability_density(self, distances_X, neighbors_indices):\n \"\"\"The local reachability density (LRD)\n\n The LRD of a sample is the inverse of the average reachability\n distance of its k-nearest neighbors.\n\n Parameters\n ----------\n distances_X : ndarray of shape (n_queries, self.n_neighbors)\n Distances to the neighbors (in the training samples `self._fit_X`)\n of each query point to compute the LRD.\n\n neighbors_indices : ndarray of shape (n_queries, self.n_neighbors)\n Neighbors indices (of each query point) among training samples\n self._fit_X.\n\n Returns\n -------\n local_reachability_density : ndarray of shape (n_queries,)\n The local reachability density of each sample.\n \"\"\"\n dist_k = self._distances_fit_X_[neighbors_indices,\n self.n_neighbors_ - 1]\n reach_dist_array = np.maximum(distances_X, dist_k)\n\n # 1e-10 to avoid `nan' when nb of duplicates > n_neighbors_:\n return 1. / (np.mean(reach_dist_array, axis=1) + 1e-10)" + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/_predict", + "name": "_predict", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor._predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/_predict/self", + "name": "self", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor._predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/_predict/X", + "name": "X", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor._predict.X", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "None", + "description": "The query sample or samples to compute the Local Outlier Factor\nw.r.t. to the training samples. If None, makes prediction on the\ntraining data without considering them as their own neighbors." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\nIf X is None, returns the same as fit_predict(X_train).", + "docstring": "Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\nIf X is None, returns the same as fit_predict(X_train).\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features), default=None\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. to the training samples. If None, makes prediction on the\n training data without considering them as their own neighbors.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and +1 for inliers.", + "code": " def _predict(self, X=None):\n \"\"\"Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\n If X is None, returns the same as fit_predict(X_train).\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features), default=None\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. to the training samples. If None, makes prediction on the\n training data without considering them as their own neighbors.\n\n Returns\n -------\n is_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and +1 for inliers.\n \"\"\"\n check_is_fitted(self)\n\n if X is not None:\n X = check_array(X, accept_sparse='csr')\n is_inlier = np.ones(X.shape[0], dtype=int)\n is_inlier[self.decision_function(X) < 0] = -1\n else:\n is_inlier = np.ones(self.n_samples_fit_, dtype=int)\n is_inlier[self.negative_outlier_factor_ < self.offset_] = -1\n\n return is_inlier" + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/_score_samples", + "name": "_score_samples", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor._score_samples", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/_score_samples/self", + "name": "self", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor._score_samples.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/_score_samples/X", + "name": "X", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor._score_samples.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The query sample or samples to compute the Local Outlier Factor\nw.r.t. the training samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Opposite of the Local Outlier Factor of X.\n\nIt is the opposite as bigger is better, i.e. large values correspond\nto inliers.\n\n**Only available for novelty detection (when novelty is set to True).**\nThe argument X is supposed to contain *new data*: if X contains a\npoint from training, it considers the later in its own neighborhood.\nAlso, the samples in X are not considered in the neighborhood of any\npoint.\nThe score_samples on training data is available by considering the\nthe ``negative_outlier_factor_`` attribute.", + "docstring": "Opposite of the Local Outlier Factor of X.\n\nIt is the opposite as bigger is better, i.e. large values correspond\nto inliers.\n\n**Only available for novelty detection (when novelty is set to True).**\nThe argument X is supposed to contain *new data*: if X contains a\npoint from training, it considers the later in its own neighborhood.\nAlso, the samples in X are not considered in the neighborhood of any\npoint.\nThe score_samples on training data is available by considering the\nthe ``negative_outlier_factor_`` attribute.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. the training samples.\n\nReturns\n-------\nopposite_lof_scores : ndarray of shape (n_samples,)\n The opposite of the Local Outlier Factor of each input samples.\n The lower, the more abnormal.", + "code": " def _score_samples(self, X):\n \"\"\"Opposite of the Local Outlier Factor of X.\n\n It is the opposite as bigger is better, i.e. large values correspond\n to inliers.\n\n **Only available for novelty detection (when novelty is set to True).**\n The argument X is supposed to contain *new data*: if X contains a\n point from training, it considers the later in its own neighborhood.\n Also, the samples in X are not considered in the neighborhood of any\n point.\n The score_samples on training data is available by considering the\n the ``negative_outlier_factor_`` attribute.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. the training samples.\n\n Returns\n -------\n opposite_lof_scores : ndarray of shape (n_samples,)\n The opposite of the Local Outlier Factor of each input samples.\n The lower, the more abnormal.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, accept_sparse='csr')\n\n distances_X, neighbors_indices_X = (\n self.kneighbors(X, n_neighbors=self.n_neighbors_))\n X_lrd = self._local_reachability_density(distances_X,\n neighbors_indices_X)\n\n lrd_ratios_array = (self._lrd[neighbors_indices_X] /\n X_lrd[:, np.newaxis])\n\n # as bigger is better:\n return -np.mean(lrd_ratios_array, axis=1)" + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/decision_function@getter", + "name": "decision_function", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor.decision_function", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/decision_function/self", + "name": "self", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor.decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Shifted opposite of the Local Outlier Factor of X.\n\nBigger is better, i.e. large values correspond to inliers.\n\n**Only available for novelty detection (when novelty is set to True).**\nThe shift offset allows a zero threshold for being an outlier.\nThe argument X is supposed to contain *new data*: if X contains a\npoint from training, it considers the later in its own neighborhood.\nAlso, the samples in X are not considered in the neighborhood of any\npoint.", + "docstring": "Shifted opposite of the Local Outlier Factor of X.\n\nBigger is better, i.e. large values correspond to inliers.\n\n**Only available for novelty detection (when novelty is set to True).**\nThe shift offset allows a zero threshold for being an outlier.\nThe argument X is supposed to contain *new data*: if X contains a\npoint from training, it considers the later in its own neighborhood.\nAlso, the samples in X are not considered in the neighborhood of any\npoint.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. the training samples.\n\nReturns\n-------\nshifted_opposite_lof_scores : ndarray of shape (n_samples,)\n The shifted opposite of the Local Outlier Factor of each input\n samples. The lower, the more abnormal. Negative scores represent\n outliers, positive scores represent inliers.", + "code": " @property\n def decision_function(self):\n \"\"\"Shifted opposite of the Local Outlier Factor of X.\n\n Bigger is better, i.e. large values correspond to inliers.\n\n **Only available for novelty detection (when novelty is set to True).**\n The shift offset allows a zero threshold for being an outlier.\n The argument X is supposed to contain *new data*: if X contains a\n point from training, it considers the later in its own neighborhood.\n Also, the samples in X are not considered in the neighborhood of any\n point.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. the training samples.\n\n Returns\n -------\n shifted_opposite_lof_scores : ndarray of shape (n_samples,)\n The shifted opposite of the Local Outlier Factor of each input\n samples. The lower, the more abnormal. Negative scores represent\n outliers, positive scores represent inliers.\n \"\"\"\n if not self.novelty:\n msg = ('decision_function is not available when novelty=False. '\n 'Use novelty=True if you want to use LOF for novelty '\n 'detection and compute decision_function for new unseen '\n 'data. Note that the opposite LOF of the training samples '\n 'is always available by considering the '\n 'negative_outlier_factor_ attribute.')\n raise AttributeError(msg)\n\n return self._decision_function" + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/fit", + "name": "fit", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/fit/self", + "name": "self", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/fit/X", + "name": "X", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/fit/y", + "name": "y", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the local outlier factor detector from the training dataset.", + "docstring": "Fit the local outlier factor detector from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'\n Training data.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : LocalOutlierFactor\n The fitted local outlier factor detector.", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the local outlier factor detector from the training dataset.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n (n_samples, n_samples) if metric='precomputed'\n Training data.\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n self : LocalOutlierFactor\n The fitted local outlier factor detector.\n \"\"\"\n self._fit(X)\n\n if self.contamination != 'auto':\n if not(0. < self.contamination <= .5):\n raise ValueError(\"contamination must be in (0, 0.5], \"\n \"got: %f\" % self.contamination)\n\n n_samples = self.n_samples_fit_\n if self.n_neighbors > n_samples:\n warnings.warn(\"n_neighbors (%s) is greater than the \"\n \"total number of samples (%s). n_neighbors \"\n \"will be set to (n_samples - 1) for estimation.\"\n % (self.n_neighbors, n_samples))\n self.n_neighbors_ = max(1, min(self.n_neighbors, n_samples - 1))\n\n self._distances_fit_X_, _neighbors_indices_fit_X_ = self.kneighbors(\n n_neighbors=self.n_neighbors_)\n\n self._lrd = self._local_reachability_density(\n self._distances_fit_X_, _neighbors_indices_fit_X_)\n\n # Compute lof score over training samples to define offset_:\n lrd_ratios_array = (self._lrd[_neighbors_indices_fit_X_] /\n self._lrd[:, np.newaxis])\n\n self.negative_outlier_factor_ = -np.mean(lrd_ratios_array, axis=1)\n\n if self.contamination == \"auto\":\n # inliers score around -1 (the higher, the less abnormal).\n self.offset_ = -1.5\n else:\n self.offset_ = np.percentile(self.negative_outlier_factor_,\n 100. * self.contamination)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/fit_predict@getter", + "name": "fit_predict", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor.fit_predict", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/fit_predict/self", + "name": "self", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor.fit_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fits the model to the training set X and returns the labels.\n\n**Not available for novelty detection (when novelty is set to True).**\nLabel is 1 for an inlier and -1 for an outlier according to the LOF\nscore and the contamination parameter.", + "docstring": "Fits the model to the training set X and returns the labels.\n\n**Not available for novelty detection (when novelty is set to True).**\nLabel is 1 for an inlier and -1 for an outlier according to the LOF\nscore and the contamination parameter.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features), default=None\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. to the training samples.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and 1 for inliers.", + "code": " @property\n def fit_predict(self):\n \"\"\"Fits the model to the training set X and returns the labels.\n\n **Not available for novelty detection (when novelty is set to True).**\n Label is 1 for an inlier and -1 for an outlier according to the LOF\n score and the contamination parameter.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features), default=None\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. to the training samples.\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n is_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and 1 for inliers.\n \"\"\"\n\n # As fit_predict would be different from fit.predict, fit_predict is\n # only available for outlier detection (novelty=False)\n\n if self.novelty:\n msg = ('fit_predict is not available when novelty=True. Use '\n 'novelty=False if you want to predict on the training set.')\n raise AttributeError(msg)\n\n return self._fit_predict" + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/predict@getter", + "name": "predict", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor.predict", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/predict/self", + "name": "self", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\n**Only available for novelty detection (when novelty is set to True).**\nThis method allows to generalize prediction to *new observations* (not\nin the training set).", + "docstring": "Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\n**Only available for novelty detection (when novelty is set to True).**\nThis method allows to generalize prediction to *new observations* (not\nin the training set).\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. to the training samples.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and +1 for inliers.", + "code": " @property\n def predict(self):\n \"\"\"Predict the labels (1 inlier, -1 outlier) of X according to LOF.\n\n **Only available for novelty detection (when novelty is set to True).**\n This method allows to generalize prediction to *new observations* (not\n in the training set).\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. to the training samples.\n\n Returns\n -------\n is_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and +1 for inliers.\n \"\"\"\n if not self.novelty:\n msg = ('predict is not available when novelty=False, use '\n 'fit_predict if you want to predict on training data. Use '\n 'novelty=True if you want to use LOF for novelty detection '\n 'and predict on new unseen data.')\n raise AttributeError(msg)\n\n return self._predict" + }, + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/score_samples@getter", + "name": "score_samples", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor.score_samples", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._lof/LocalOutlierFactor/score_samples/self", + "name": "self", + "qname": "sklearn.neighbors._lof.LocalOutlierFactor.score_samples.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Opposite of the Local Outlier Factor of X.\n\nIt is the opposite as bigger is better, i.e. large values correspond\nto inliers.\n\n**Only available for novelty detection (when novelty is set to True).**\nThe argument X is supposed to contain *new data*: if X contains a\npoint from training, it considers the later in its own neighborhood.\nAlso, the samples in X are not considered in the neighborhood of any\npoint.\nThe score_samples on training data is available by considering the\nthe ``negative_outlier_factor_`` attribute.", + "docstring": "Opposite of the Local Outlier Factor of X.\n\nIt is the opposite as bigger is better, i.e. large values correspond\nto inliers.\n\n**Only available for novelty detection (when novelty is set to True).**\nThe argument X is supposed to contain *new data*: if X contains a\npoint from training, it considers the later in its own neighborhood.\nAlso, the samples in X are not considered in the neighborhood of any\npoint.\nThe score_samples on training data is available by considering the\nthe ``negative_outlier_factor_`` attribute.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. the training samples.\n\nReturns\n-------\nopposite_lof_scores : ndarray of shape (n_samples,)\n The opposite of the Local Outlier Factor of each input samples.\n The lower, the more abnormal.", + "code": " @property\n def score_samples(self):\n \"\"\"Opposite of the Local Outlier Factor of X.\n\n It is the opposite as bigger is better, i.e. large values correspond\n to inliers.\n\n **Only available for novelty detection (when novelty is set to True).**\n The argument X is supposed to contain *new data*: if X contains a\n point from training, it considers the later in its own neighborhood.\n Also, the samples in X are not considered in the neighborhood of any\n point.\n The score_samples on training data is available by considering the\n the ``negative_outlier_factor_`` attribute.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The query sample or samples to compute the Local Outlier Factor\n w.r.t. the training samples.\n\n Returns\n -------\n opposite_lof_scores : ndarray of shape (n_samples,)\n The opposite of the Local Outlier Factor of each input samples.\n The lower, the more abnormal.\n \"\"\"\n if not self.novelty:\n msg = ('score_samples is not available when novelty=False. The '\n 'scores of the training samples are always available '\n 'through the negative_outlier_factor_ attribute. Use '\n 'novelty=True if you want to use LOF for novelty detection '\n 'and compute score_samples for new unseen data.')\n raise AttributeError(msg)\n\n return self._score_samples" + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/__init__", + "name": "__init__", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/__init__/self", + "name": "self", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/__init__/n_components", + "name": "n_components", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis.__init__.n_components", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Preferred dimensionality of the projected space.\nIf None it will be set to ``n_features``." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/__init__/init", + "name": "init", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis.__init__.init", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'pca', 'lda', 'identity', 'random'} or ndarray of shape (n_features_a, n_features_b)", + "default_value": "'auto'", + "description": "Initialization of the linear transformation. Possible options are\n'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape\n(n_features_a, n_features_b).\n\n'auto'\n Depending on ``n_components``, the most reasonable initialization\n will be chosen. If ``n_components <= n_classes`` we use 'lda', as\n it uses labels information. If not, but\n ``n_components < min(n_features, n_samples)``, we use 'pca', as\n it projects data in meaningful directions (those of higher\n variance). Otherwise, we just use 'identity'.\n\n'pca'\n ``n_components`` principal components of the inputs passed\n to :meth:`fit` will be used to initialize the transformation.\n (See :class:`~sklearn.decomposition.PCA`)\n\n'lda'\n ``min(n_components, n_classes)`` most discriminative\n components of the inputs passed to :meth:`fit` will be used to\n initialize the transformation. (If ``n_components > n_classes``,\n the rest of the components will be zero.) (See\n :class:`~sklearn.discriminant_analysis.LinearDiscriminantAnalysis`)\n\n'identity'\n If ``n_components`` is strictly smaller than the\n dimensionality of the inputs passed to :meth:`fit`, the identity\n matrix will be truncated to the first ``n_components`` rows.\n\n'random'\n The initial transformation will be a random array of shape\n `(n_components, n_features)`. Each value is sampled from the\n standard normal distribution.\n\nnumpy array\n n_features_b must match the dimensionality of the inputs passed to\n :meth:`fit` and n_features_a must be less than or equal to that.\n If ``n_components`` is not None, n_features_a must match it." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["lda", "random", "auto", "pca", "identity"] + }, + { + "kind": "NamedType", + "name": "ndarray of shape (n_features_a, n_features_b)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True and :meth:`fit` has been called before, the solution of the\nprevious call to :meth:`fit` is used as the initial linear\ntransformation (``n_components`` and ``init`` will be ignored)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis.__init__.max_iter", + "default_value": "50", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "50", + "description": "Maximum number of iterations in the optimization." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/__init__/tol", + "name": "tol", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis.__init__.tol", + "default_value": "1e-05", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-5", + "description": "Convergence tolerance for the optimization." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/__init__/callback", + "name": "callback", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis.__init__.callback", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "None", + "description": "If not None, this function is called after every iteration of the\noptimizer, taking as arguments the current solution (flattened\ntransformation matrix) and the number of iterations. This might be\nuseful in case one wants to examine or store the transformation\nfound after each iteration." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/__init__/verbose", + "name": "verbose", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "If 0, no progress messages will be printed.\nIf 1, progress messages will be printed to stdout.\nIf > 1, progress messages will be printed and the ``disp``\nparameter of :func:`scipy.optimize.minimize` will be set to\n``verbose - 2``." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/__init__/random_state", + "name": "random_state", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or numpy.RandomState", + "default_value": "None", + "description": "A pseudo random number generator object or a seed for it if int. If\n``init='random'``, ``random_state`` is used to initialize the random\ntransformation. If ``init='pca'``, ``random_state`` is passed as an\nargument to PCA when initializing the transformation. Pass an int\nfor reproducible results across multiple function calls.\nSee :term: `Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "numpy.RandomState" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Neighborhood Components Analysis\n\nNeighborhood Component Analysis (NCA) is a machine learning algorithm for\nmetric learning. It learns a linear transformation in a supervised fashion\nto improve the classification accuracy of a stochastic nearest neighbors\nrule in the transformed space.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_components=None, *, init='auto', warm_start=False,\n max_iter=50, tol=1e-5, callback=None, verbose=0,\n random_state=None):\n self.n_components = n_components\n self.init = init\n self.warm_start = warm_start\n self.max_iter = max_iter\n self.tol = tol\n self.callback = callback\n self.verbose = verbose\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_callback", + "name": "_callback", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis._callback", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_callback/self", + "name": "self", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis._callback.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_callback/transformation", + "name": "transformation", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis._callback.transformation", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_components * n_features,)", + "default_value": "", + "description": "The solution computed by the optimizer in this iteration." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_components * n_features,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Called after each iteration of the optimizer.", + "docstring": "Called after each iteration of the optimizer.\n\nParameters\n----------\ntransformation : ndarray of shape (n_components * n_features,)\n The solution computed by the optimizer in this iteration.", + "code": " def _callback(self, transformation):\n \"\"\"Called after each iteration of the optimizer.\n\n Parameters\n ----------\n transformation : ndarray of shape (n_components * n_features,)\n The solution computed by the optimizer in this iteration.\n \"\"\"\n if self.callback is not None:\n self.callback(transformation, self.n_iter_)\n\n self.n_iter_ += 1" + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_initialize", + "name": "_initialize", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis._initialize", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_initialize/self", + "name": "self", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis._initialize.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_initialize/X", + "name": "X", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis._initialize.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The training samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_initialize/y", + "name": "y", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis._initialize.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The training labels." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_initialize/init", + "name": "init", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis._initialize.init", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str or ndarray of shape (n_features_a, n_features_b)", + "default_value": "", + "description": "The validated initialization of the linear transformation." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "ndarray of shape (n_features_a, n_features_b)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Initialize the transformation.", + "docstring": "Initialize the transformation.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The training samples.\n\ny : array-like of shape (n_samples,)\n The training labels.\n\ninit : str or ndarray of shape (n_features_a, n_features_b)\n The validated initialization of the linear transformation.\n\nReturns\n-------\ntransformation : ndarray of shape (n_components, n_features)\n The initialized linear transformation.", + "code": " def _initialize(self, X, y, init):\n \"\"\"Initialize the transformation.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The training samples.\n\n y : array-like of shape (n_samples,)\n The training labels.\n\n init : str or ndarray of shape (n_features_a, n_features_b)\n The validated initialization of the linear transformation.\n\n Returns\n -------\n transformation : ndarray of shape (n_components, n_features)\n The initialized linear transformation.\n\n \"\"\"\n\n transformation = init\n if self.warm_start and hasattr(self, 'components_'):\n transformation = self.components_\n elif isinstance(init, np.ndarray):\n pass\n else:\n n_samples, n_features = X.shape\n n_components = self.n_components or n_features\n if init == 'auto':\n n_classes = len(np.unique(y))\n if n_components <= min(n_features, n_classes - 1):\n init = 'lda'\n elif n_components < min(n_features, n_samples):\n init = 'pca'\n else:\n init = 'identity'\n if init == 'identity':\n transformation = np.eye(n_components, X.shape[1])\n elif init == 'random':\n transformation = self.random_state_.randn(n_components,\n X.shape[1])\n elif init in {'pca', 'lda'}:\n init_time = time.time()\n if init == 'pca':\n pca = PCA(n_components=n_components,\n random_state=self.random_state_)\n if self.verbose:\n print('Finding principal components... ', end='')\n sys.stdout.flush()\n pca.fit(X)\n transformation = pca.components_\n elif init == 'lda':\n from ..discriminant_analysis import (\n LinearDiscriminantAnalysis)\n lda = LinearDiscriminantAnalysis(n_components=n_components)\n if self.verbose:\n print('Finding most discriminative components... ',\n end='')\n sys.stdout.flush()\n lda.fit(X, y)\n transformation = lda.scalings_.T[:n_components]\n if self.verbose:\n print('done in {:5.2f}s'.format(time.time() - init_time))\n return transformation" + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_loss_grad_lbfgs", + "name": "_loss_grad_lbfgs", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis._loss_grad_lbfgs", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_loss_grad_lbfgs/self", + "name": "self", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis._loss_grad_lbfgs.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_loss_grad_lbfgs/transformation", + "name": "transformation", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis._loss_grad_lbfgs.transformation", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_components * n_features,)", + "default_value": "", + "description": "The raveled linear transformation on which to compute loss and\nevaluate gradient." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_components * n_features,)" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_loss_grad_lbfgs/X", + "name": "X", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis._loss_grad_lbfgs.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "The training samples." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_loss_grad_lbfgs/same_class_mask", + "name": "same_class_mask", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis._loss_grad_lbfgs.same_class_mask", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_samples)", + "default_value": "", + "description": "A mask where ``mask[i, j] == 1`` if ``X[i]`` and ``X[j]`` belong\nto the same class, and ``0`` otherwise." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_loss_grad_lbfgs/sign", + "name": "sign", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis._loss_grad_lbfgs.sign", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the loss and the loss gradient w.r.t. ``transformation``.", + "docstring": "Compute the loss and the loss gradient w.r.t. ``transformation``.\n\nParameters\n----------\ntransformation : ndarray of shape (n_components * n_features,)\n The raveled linear transformation on which to compute loss and\n evaluate gradient.\n\nX : ndarray of shape (n_samples, n_features)\n The training samples.\n\nsame_class_mask : ndarray of shape (n_samples, n_samples)\n A mask where ``mask[i, j] == 1`` if ``X[i]`` and ``X[j]`` belong\n to the same class, and ``0`` otherwise.\n\nReturns\n-------\nloss : float\n The loss computed for the given transformation.\n\ngradient : ndarray of shape (n_components * n_features,)\n The new (flattened) gradient of the loss.", + "code": " def _loss_grad_lbfgs(self, transformation, X, same_class_mask, sign=1.0):\n \"\"\"Compute the loss and the loss gradient w.r.t. ``transformation``.\n\n Parameters\n ----------\n transformation : ndarray of shape (n_components * n_features,)\n The raveled linear transformation on which to compute loss and\n evaluate gradient.\n\n X : ndarray of shape (n_samples, n_features)\n The training samples.\n\n same_class_mask : ndarray of shape (n_samples, n_samples)\n A mask where ``mask[i, j] == 1`` if ``X[i]`` and ``X[j]`` belong\n to the same class, and ``0`` otherwise.\n\n Returns\n -------\n loss : float\n The loss computed for the given transformation.\n\n gradient : ndarray of shape (n_components * n_features,)\n The new (flattened) gradient of the loss.\n \"\"\"\n\n if self.n_iter_ == 0:\n self.n_iter_ += 1\n if self.verbose:\n header_fields = ['Iteration', 'Objective Value', 'Time(s)']\n header_fmt = '{:>10} {:>20} {:>10}'\n header = header_fmt.format(*header_fields)\n cls_name = self.__class__.__name__\n print('[{}]'.format(cls_name))\n print('[{}] {}\\n[{}] {}'.format(cls_name, header,\n cls_name, '-' * len(header)))\n\n t_funcall = time.time()\n\n transformation = transformation.reshape(-1, X.shape[1])\n X_embedded = np.dot(X, transformation.T) # (n_samples, n_components)\n\n # Compute softmax distances\n p_ij = pairwise_distances(X_embedded, squared=True)\n np.fill_diagonal(p_ij, np.inf)\n p_ij = softmax(-p_ij) # (n_samples, n_samples)\n\n # Compute loss\n masked_p_ij = p_ij * same_class_mask\n p = np.sum(masked_p_ij, axis=1, keepdims=True) # (n_samples, 1)\n loss = np.sum(p)\n\n # Compute gradient of loss w.r.t. `transform`\n weighted_p_ij = masked_p_ij - p_ij * p\n weighted_p_ij_sym = weighted_p_ij + weighted_p_ij.T\n np.fill_diagonal(weighted_p_ij_sym, -weighted_p_ij.sum(axis=0))\n gradient = 2 * X_embedded.T.dot(weighted_p_ij_sym).dot(X)\n # time complexity of the gradient: O(n_components x n_samples x (\n # n_samples + n_features))\n\n if self.verbose:\n t_funcall = time.time() - t_funcall\n values_fmt = '[{}] {:>10} {:>20.6e} {:>10.2f}'\n print(values_fmt.format(self.__class__.__name__, self.n_iter_,\n loss, t_funcall))\n sys.stdout.flush()\n\n return sign * loss, sign * gradient.ravel()" + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_more_tags", + "name": "_more_tags", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_more_tags/self", + "name": "self", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'requires_y': True}" + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_validate_params", + "name": "_validate_params", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis._validate_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_validate_params/self", + "name": "self", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis._validate_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_validate_params/X", + "name": "X", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis._validate_params.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The training samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/_validate_params/y", + "name": "y", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis._validate_params.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The corresponding training labels." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Validate parameters as soon as :meth:`fit` is called.", + "docstring": "Validate parameters as soon as :meth:`fit` is called.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The training samples.\n\ny : array-like of shape (n_samples,)\n The corresponding training labels.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The validated training samples.\n\ny : ndarray of shape (n_samples,)\n The validated training labels, encoded to be integers in\n the range(0, n_classes).\n\ninit : str or ndarray of shape (n_features_a, n_features_b)\n The validated initialization of the linear transformation.\n\nRaises\n-------\nTypeError\n If a parameter is not an instance of the desired type.\n\nValueError\n If a parameter's value violates its legal value range or if the\n combination of two or more given parameters is incompatible.", + "code": " def _validate_params(self, X, y):\n \"\"\"Validate parameters as soon as :meth:`fit` is called.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The training samples.\n\n y : array-like of shape (n_samples,)\n The corresponding training labels.\n\n Returns\n -------\n X : ndarray of shape (n_samples, n_features)\n The validated training samples.\n\n y : ndarray of shape (n_samples,)\n The validated training labels, encoded to be integers in\n the range(0, n_classes).\n\n init : str or ndarray of shape (n_features_a, n_features_b)\n The validated initialization of the linear transformation.\n\n Raises\n -------\n TypeError\n If a parameter is not an instance of the desired type.\n\n ValueError\n If a parameter's value violates its legal value range or if the\n combination of two or more given parameters is incompatible.\n \"\"\"\n\n # Validate the inputs X and y, and converts y to numerical classes.\n X, y = self._validate_data(X, y, ensure_min_samples=2)\n check_classification_targets(y)\n y = LabelEncoder().fit_transform(y)\n\n # Check the preferred dimensionality of the projected space\n if self.n_components is not None:\n check_scalar(\n self.n_components, 'n_components', numbers.Integral, min_val=1)\n\n if self.n_components > X.shape[1]:\n raise ValueError('The preferred dimensionality of the '\n 'projected space `n_components` ({}) cannot '\n 'be greater than the given data '\n 'dimensionality ({})!'\n .format(self.n_components, X.shape[1]))\n\n # If warm_start is enabled, check that the inputs are consistent\n check_scalar(self.warm_start, 'warm_start', bool)\n if self.warm_start and hasattr(self, 'components_'):\n if self.components_.shape[1] != X.shape[1]:\n raise ValueError('The new inputs dimensionality ({}) does not '\n 'match the input dimensionality of the '\n 'previously learned transformation ({}).'\n .format(X.shape[1],\n self.components_.shape[1]))\n\n check_scalar(self.max_iter, 'max_iter', numbers.Integral, min_val=1)\n check_scalar(self.tol, 'tol', numbers.Real, min_val=0.)\n check_scalar(self.verbose, 'verbose', numbers.Integral, min_val=0)\n\n if self.callback is not None:\n if not callable(self.callback):\n raise ValueError('`callback` is not callable.')\n\n # Check how the linear transformation should be initialized\n init = self.init\n\n if isinstance(init, np.ndarray):\n init = check_array(init)\n\n # Assert that init.shape[1] = X.shape[1]\n if init.shape[1] != X.shape[1]:\n raise ValueError(\n 'The input dimensionality ({}) of the given '\n 'linear transformation `init` must match the '\n 'dimensionality of the given inputs `X` ({}).'\n .format(init.shape[1], X.shape[1]))\n\n # Assert that init.shape[0] <= init.shape[1]\n if init.shape[0] > init.shape[1]:\n raise ValueError(\n 'The output dimensionality ({}) of the given '\n 'linear transformation `init` cannot be '\n 'greater than its input dimensionality ({}).'\n .format(init.shape[0], init.shape[1]))\n\n if self.n_components is not None:\n # Assert that self.n_components = init.shape[0]\n if self.n_components != init.shape[0]:\n raise ValueError('The preferred dimensionality of the '\n 'projected space `n_components` ({}) does'\n ' not match the output dimensionality of '\n 'the given linear transformation '\n '`init` ({})!'\n .format(self.n_components,\n init.shape[0]))\n elif init in ['auto', 'pca', 'lda', 'identity', 'random']:\n pass\n else:\n raise ValueError(\n \"`init` must be 'auto', 'pca', 'lda', 'identity', 'random' \"\n \"or a numpy array of shape (n_components, n_features).\")\n\n return X, y, init" + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/fit", + "name": "fit", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/fit/self", + "name": "self", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/fit/X", + "name": "X", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The training samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/fit/y", + "name": "y", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "The corresponding training labels." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model according to the given training data.", + "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The training samples.\n\ny : array-like of shape (n_samples,)\n The corresponding training labels.\n\nReturns\n-------\nself : object\n returns a trained NeighborhoodComponentsAnalysis model.", + "code": " def fit(self, X, y):\n \"\"\"Fit the model according to the given training data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The training samples.\n\n y : array-like of shape (n_samples,)\n The corresponding training labels.\n\n Returns\n -------\n self : object\n returns a trained NeighborhoodComponentsAnalysis model.\n \"\"\"\n\n # Verify inputs X and y and NCA parameters, and transform a copy if\n # needed\n X, y, init = self._validate_params(X, y)\n\n # Initialize the random generator\n self.random_state_ = check_random_state(self.random_state)\n\n # Measure the total training time\n t_train = time.time()\n\n # Compute a mask that stays fixed during optimization:\n same_class_mask = y[:, np.newaxis] == y[np.newaxis, :]\n # (n_samples, n_samples)\n\n # Initialize the transformation\n transformation = self._initialize(X, y, init)\n\n # Create a dictionary of parameters to be passed to the optimizer\n disp = self.verbose - 2 if self.verbose > 1 else -1\n optimizer_params = {'method': 'L-BFGS-B',\n 'fun': self._loss_grad_lbfgs,\n 'args': (X, same_class_mask, -1.0),\n 'jac': True,\n 'x0': transformation,\n 'tol': self.tol,\n 'options': dict(maxiter=self.max_iter, disp=disp),\n 'callback': self._callback\n }\n\n # Call the optimizer\n self.n_iter_ = 0\n opt_result = minimize(**optimizer_params)\n\n # Reshape the solution found by the optimizer\n self.components_ = opt_result.x.reshape(-1, X.shape[1])\n\n # Stop timer\n t_train = time.time() - t_train\n if self.verbose:\n cls_name = self.__class__.__name__\n\n # Warn the user if the algorithm did not converge\n if not opt_result.success:\n warn('[{}] NCA did not converge: {}'.format(\n cls_name, opt_result.message),\n ConvergenceWarning)\n\n print('[{}] Training took {:8.2f}s.'.format(cls_name, t_train))\n\n return self" + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/transform", + "name": "transform", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/transform/self", + "name": "self", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._nca/NeighborhoodComponentsAnalysis/transform/X", + "name": "X", + "qname": "sklearn.neighbors._nca.NeighborhoodComponentsAnalysis.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Data samples." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Applies the learned transformation to the given data.", + "docstring": "Applies the learned transformation to the given data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data samples.\n\nReturns\n-------\nX_embedded: ndarray of shape (n_samples, n_components)\n The data samples transformed.\n\nRaises\n------\nNotFittedError\n If :meth:`fit` has not been called before.", + "code": " def transform(self, X):\n \"\"\"Applies the learned transformation to the given data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Data samples.\n\n Returns\n -------\n X_embedded: ndarray of shape (n_samples, n_components)\n The data samples transformed.\n\n Raises\n ------\n NotFittedError\n If :meth:`fit` has not been called before.\n \"\"\"\n\n check_is_fitted(self)\n X = check_array(X)\n\n return np.dot(X, self.components_.T)" + }, + { + "id": "scikit-learn/sklearn.neighbors._nearest_centroid/NearestCentroid/__init__", + "name": "__init__", + "qname": "sklearn.neighbors._nearest_centroid.NearestCentroid.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._nearest_centroid/NearestCentroid/__init__/self", + "name": "self", + "qname": "sklearn.neighbors._nearest_centroid.NearestCentroid.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._nearest_centroid/NearestCentroid/__init__/metric", + "name": "metric", + "qname": "sklearn.neighbors._nearest_centroid.NearestCentroid.__init__.metric", + "default_value": "'euclidean'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "", + "description": "The metric to use when calculating distance between instances in a\nfeature array. If metric is a string or callable, it must be one of\nthe options allowed by metrics.pairwise.pairwise_distances for its\nmetric parameter.\nThe centroids for the samples corresponding to each class is the point\nfrom which the sum of the distances (according to the metric) of all\nsamples that belong to that particular class are minimized.\nIf the \"manhattan\" metric is provided, this centroid is the median and\nfor all other metrics, the centroid is now set to be the mean.\n\n.. versionchanged:: 0.19\n ``metric='precomputed'`` was deprecated and now raises an error" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._nearest_centroid/NearestCentroid/__init__/shrink_threshold", + "name": "shrink_threshold", + "qname": "sklearn.neighbors._nearest_centroid.NearestCentroid.__init__.shrink_threshold", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Threshold for shrinking centroids to remove features." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Nearest centroid classifier.\n\nEach class is represented by its centroid, with test samples classified to\nthe class with the nearest centroid.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, metric='euclidean', *, shrink_threshold=None):\n self.metric = metric\n self.shrink_threshold = shrink_threshold" + }, + { + "id": "scikit-learn/sklearn.neighbors._nearest_centroid/NearestCentroid/fit", + "name": "fit", + "qname": "sklearn.neighbors._nearest_centroid.NearestCentroid.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._nearest_centroid/NearestCentroid/fit/self", + "name": "self", + "qname": "sklearn.neighbors._nearest_centroid.NearestCentroid.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._nearest_centroid/NearestCentroid/fit/X", + "name": "X", + "qname": "sklearn.neighbors._nearest_centroid.NearestCentroid.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples is the number of samples and\nn_features is the number of features.\nNote that centroid shrinking cannot be used with sparse matrices." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._nearest_centroid/NearestCentroid/fit/y", + "name": "y", + "qname": "sklearn.neighbors._nearest_centroid.NearestCentroid.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values (integers)" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the NearestCentroid model according to the given training data.", + "docstring": "Fit the NearestCentroid model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n Note that centroid shrinking cannot be used with sparse matrices.\ny : array-like of shape (n_samples,)\n Target values (integers)", + "code": " def fit(self, X, y):\n \"\"\"\n Fit the NearestCentroid model according to the given training data.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n Note that centroid shrinking cannot be used with sparse matrices.\n y : array-like of shape (n_samples,)\n Target values (integers)\n \"\"\"\n if self.metric == 'precomputed':\n raise ValueError(\"Precomputed is not supported.\")\n # If X is sparse and the metric is \"manhattan\", store it in a csc\n # format is easier to calculate the median.\n if self.metric == 'manhattan':\n X, y = self._validate_data(X, y, accept_sparse=['csc'])\n else:\n X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc'])\n is_X_sparse = sp.issparse(X)\n if is_X_sparse and self.shrink_threshold:\n raise ValueError(\"threshold shrinking not supported\"\n \" for sparse input\")\n check_classification_targets(y)\n\n n_samples, n_features = X.shape\n le = LabelEncoder()\n y_ind = le.fit_transform(y)\n self.classes_ = classes = le.classes_\n n_classes = classes.size\n if n_classes < 2:\n raise ValueError('The number of classes has to be greater than'\n ' one; got %d class' % (n_classes))\n\n # Mask mapping each class to its members.\n self.centroids_ = np.empty((n_classes, n_features), dtype=np.float64)\n # Number of clusters in each class.\n nk = np.zeros(n_classes)\n\n for cur_class in range(n_classes):\n center_mask = y_ind == cur_class\n nk[cur_class] = np.sum(center_mask)\n if is_X_sparse:\n center_mask = np.where(center_mask)[0]\n\n # XXX: Update other averaging methods according to the metrics.\n if self.metric == \"manhattan\":\n # NumPy does not calculate median of sparse matrices.\n if not is_X_sparse:\n self.centroids_[cur_class] = np.median(X[center_mask], axis=0)\n else:\n self.centroids_[cur_class] = csc_median_axis_0(X[center_mask])\n else:\n if self.metric != 'euclidean':\n warnings.warn(\"Averaging for metrics other than \"\n \"euclidean and manhattan not supported. \"\n \"The average is set to be the mean.\"\n )\n self.centroids_[cur_class] = X[center_mask].mean(axis=0)\n\n if self.shrink_threshold:\n if np.all(np.ptp(X, axis=0) == 0):\n raise ValueError(\"All features have zero variance. \"\n \"Division by zero.\")\n dataset_centroid_ = np.mean(X, axis=0)\n\n # m parameter for determining deviation\n m = np.sqrt((1. / nk) - (1. / n_samples))\n # Calculate deviation using the standard deviation of centroids.\n variance = (X - self.centroids_[y_ind]) ** 2\n variance = variance.sum(axis=0)\n s = np.sqrt(variance / (n_samples - n_classes))\n s += np.median(s) # To deter outliers from affecting the results.\n mm = m.reshape(len(m), 1) # Reshape to allow broadcasting.\n ms = mm * s\n deviation = ((self.centroids_ - dataset_centroid_) / ms)\n # Soft thresholding: if the deviation crosses 0 during shrinking,\n # it becomes zero.\n signs = np.sign(deviation)\n deviation = (np.abs(deviation) - self.shrink_threshold)\n np.clip(deviation, 0, None, out=deviation)\n deviation *= signs\n # Now adjust the centroids using the deviation\n msd = ms * deviation\n self.centroids_ = dataset_centroid_[np.newaxis, :] + msd\n return self" + }, + { + "id": "scikit-learn/sklearn.neighbors._nearest_centroid/NearestCentroid/predict", + "name": "predict", + "qname": "sklearn.neighbors._nearest_centroid.NearestCentroid.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._nearest_centroid/NearestCentroid/predict/self", + "name": "self", + "qname": "sklearn.neighbors._nearest_centroid.NearestCentroid.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._nearest_centroid/NearestCentroid/predict/X", + "name": "X", + "qname": "sklearn.neighbors._nearest_centroid.NearestCentroid.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform classification on an array of test vectors X.\n\nThe predicted class C for each sample in X is returned.", + "docstring": "Perform classification on an array of test vectors X.\n\nThe predicted class C for each sample in X is returned.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nC : ndarray of shape (n_samples,)\n\nNotes\n-----\nIf the metric constructor parameter is \"precomputed\", X is assumed to\nbe the distance matrix between the data to be predicted and\n``self.centroids_``.", + "code": " def predict(self, X):\n \"\"\"Perform classification on an array of test vectors X.\n\n The predicted class C for each sample in X is returned.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n C : ndarray of shape (n_samples,)\n\n Notes\n -----\n If the metric constructor parameter is \"precomputed\", X is assumed to\n be the distance matrix between the data to be predicted and\n ``self.centroids_``.\n \"\"\"\n check_is_fitted(self)\n\n X = check_array(X, accept_sparse='csr')\n return self.classes_[pairwise_distances(\n X, self.centroids_, metric=self.metric).argmin(axis=1)]" + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/__init__", + "name": "__init__", + "qname": "sklearn.neighbors._regression.KNeighborsRegressor.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/__init__/self", + "name": "self", + "qname": "sklearn.neighbors._regression.KNeighborsRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/__init__/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.neighbors._regression.KNeighborsRegressor.__init__.n_neighbors", + "default_value": "5", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "Number of neighbors to use by default for :meth:`kneighbors` queries." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/__init__/weights", + "name": "weights", + "qname": "sklearn.neighbors._regression.KNeighborsRegressor.__init__.weights", + "default_value": "'uniform'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'uniform', 'distance'} or callable", + "default_value": "'uniform'", + "description": "weight function used in prediction. Possible values:\n\n- 'uniform' : uniform weights. All points in each neighborhood\n are weighted equally.\n- 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n- [callable] : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights.\n\nUniform weights are used by default." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["distance", "uniform"] + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/__init__/algorithm", + "name": "algorithm", + "qname": "sklearn.neighbors._regression.KNeighborsRegressor.__init__.algorithm", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'ball_tree', 'kd_tree', 'brute'}", + "default_value": "'auto'", + "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force." + }, + "type": { + "kind": "EnumType", + "values": ["kd_tree", "auto", "brute", "ball_tree"] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/__init__/leaf_size", + "name": "leaf_size", + "qname": "sklearn.neighbors._regression.KNeighborsRegressor.__init__.leaf_size", + "default_value": "30", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "30", + "description": "Leaf size passed to BallTree or KDTree. This can affect the\nspeed of the construction and query, as well as the memory\nrequired to store the tree. The optimal value depends on the\nnature of the problem." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/__init__/p", + "name": "p", + "qname": "sklearn.neighbors._regression.KNeighborsRegressor.__init__.p", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Power parameter for the Minkowski metric. When p = 1, this is\nequivalent to using manhattan_distance (l1), and euclidean_distance\n(l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/__init__/metric", + "name": "metric", + "qname": "sklearn.neighbors._regression.KNeighborsRegressor.__init__.metric", + "default_value": "'minkowski'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "'minkowski'", + "description": "the distance metric to use for the tree. The default metric is\nminkowski, and with p=2 is equivalent to the standard Euclidean\nmetric. See the documentation of :class:`DistanceMetric` for a\nlist of available metrics.\nIf metric is \"precomputed\", X is assumed to be a distance matrix and\nmust be square during fit. X may be a :term:`sparse graph`,\nin which case only \"nonzero\" elements may be considered neighbors." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/__init__/metric_params", + "name": "metric_params", + "qname": "sklearn.neighbors._regression.KNeighborsRegressor.__init__.metric_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Additional keyword arguments for the metric function." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.neighbors._regression.KNeighborsRegressor.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of parallel jobs to run for neighbors search.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details.\nDoesn't affect :meth:`fit` method." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/__init__/kwargs", + "name": "kwargs", + "qname": "sklearn.neighbors._regression.KNeighborsRegressor.__init__.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Regression based on k-nearest neighbors.\n\nThe target is predicted by local interpolation of the targets\nassociated of the nearest neighbors in the training set.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.9", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_neighbors=5, *, weights='uniform',\n algorithm='auto', leaf_size=30,\n p=2, metric='minkowski', metric_params=None, n_jobs=None,\n **kwargs):\n super().__init__(\n n_neighbors=n_neighbors,\n algorithm=algorithm,\n leaf_size=leaf_size, metric=metric, p=p,\n metric_params=metric_params, n_jobs=n_jobs, **kwargs)\n self.weights = _check_weights(weights)" + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/_more_tags", + "name": "_more_tags", + "qname": "sklearn.neighbors._regression.KNeighborsRegressor._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/_more_tags/self", + "name": "self", + "qname": "sklearn.neighbors._regression.KNeighborsRegressor._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n # For cross-validation routines to split data correctly\n return {'pairwise': self.metric == 'precomputed'}" + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/_pairwise@getter", + "name": "_pairwise", + "qname": "sklearn.neighbors._regression.KNeighborsRegressor._pairwise", + "decorators": [ + "deprecated('Attribute _pairwise was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/_pairwise/self", + "name": "self", + "qname": "sklearn.neighbors._regression.KNeighborsRegressor._pairwise.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n # For cross-validation routines to split data correctly\n return self.metric == 'precomputed'" + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/fit", + "name": "fit", + "qname": "sklearn.neighbors._regression.KNeighborsRegressor.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/fit/self", + "name": "self", + "qname": "sklearn.neighbors._regression.KNeighborsRegressor.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/fit/X", + "name": "X", + "qname": "sklearn.neighbors._regression.KNeighborsRegressor.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/fit/y", + "name": "y", + "qname": "sklearn.neighbors._regression.KNeighborsRegressor.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples,) or (n_samples, n_outputs)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the k-nearest neighbors regressor from the training dataset.", + "docstring": "Fit the k-nearest neighbors regressor from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'\n Training data.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\nReturns\n-------\nself : KNeighborsRegressor\n The fitted k-nearest neighbors regressor.", + "code": " def fit(self, X, y):\n \"\"\"Fit the k-nearest neighbors regressor from the training dataset.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n (n_samples, n_samples) if metric='precomputed'\n Training data.\n\n y : {array-like, sparse matrix} of shape (n_samples,) or \\\n (n_samples, n_outputs)\n Target values.\n\n Returns\n -------\n self : KNeighborsRegressor\n The fitted k-nearest neighbors regressor.\n \"\"\"\n return self._fit(X, y)" + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/predict", + "name": "predict", + "qname": "sklearn.neighbors._regression.KNeighborsRegressor.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/predict/self", + "name": "self", + "qname": "sklearn.neighbors._regression.KNeighborsRegressor.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/KNeighborsRegressor/predict/X", + "name": "X", + "qname": "sklearn.neighbors._regression.KNeighborsRegressor.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'", + "default_value": "", + "description": "Test samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_queries, n_features)" + }, + { + "kind": "NamedType", + "name": "(n_queries, n_indexed) if metric == 'precomputed'" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict the target for the provided data", + "docstring": "Predict the target for the provided data\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\nReturns\n-------\ny : ndarray of shape (n_queries,) or (n_queries, n_outputs), dtype=int\n Target values.", + "code": " def predict(self, X):\n \"\"\"Predict the target for the provided data\n\n Parameters\n ----------\n X : array-like of shape (n_queries, n_features), \\\n or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\n Returns\n -------\n y : ndarray of shape (n_queries,) or (n_queries, n_outputs), dtype=int\n Target values.\n \"\"\"\n X = check_array(X, accept_sparse='csr')\n\n neigh_dist, neigh_ind = self.kneighbors(X)\n\n weights = _get_weights(neigh_dist, self.weights)\n\n _y = self._y\n if _y.ndim == 1:\n _y = _y.reshape((-1, 1))\n\n if weights is None:\n y_pred = np.mean(_y[neigh_ind], axis=1)\n else:\n y_pred = np.empty((X.shape[0], _y.shape[1]), dtype=np.float64)\n denom = np.sum(weights, axis=1)\n\n for j in range(_y.shape[1]):\n num = np.sum(_y[neigh_ind, j] * weights, axis=1)\n y_pred[:, j] = num / denom\n\n if self._y.ndim == 1:\n y_pred = y_pred.ravel()\n\n return y_pred" + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/RadiusNeighborsRegressor/__init__", + "name": "__init__", + "qname": "sklearn.neighbors._regression.RadiusNeighborsRegressor.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._regression/RadiusNeighborsRegressor/__init__/self", + "name": "self", + "qname": "sklearn.neighbors._regression.RadiusNeighborsRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/RadiusNeighborsRegressor/__init__/radius", + "name": "radius", + "qname": "sklearn.neighbors._regression.RadiusNeighborsRegressor.__init__.radius", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Range of parameter space to use by default for :meth:`radius_neighbors`\nqueries." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/RadiusNeighborsRegressor/__init__/weights", + "name": "weights", + "qname": "sklearn.neighbors._regression.RadiusNeighborsRegressor.__init__.weights", + "default_value": "'uniform'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'uniform', 'distance'} or callable", + "default_value": "'uniform'", + "description": "weight function used in prediction. Possible values:\n\n- 'uniform' : uniform weights. All points in each neighborhood\n are weighted equally.\n- 'distance' : weight points by the inverse of their distance.\n in this case, closer neighbors of a query point will have a\n greater influence than neighbors which are further away.\n- [callable] : a user-defined function which accepts an\n array of distances, and returns an array of the same shape\n containing the weights.\n\nUniform weights are used by default." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["distance", "uniform"] + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/RadiusNeighborsRegressor/__init__/algorithm", + "name": "algorithm", + "qname": "sklearn.neighbors._regression.RadiusNeighborsRegressor.__init__.algorithm", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'ball_tree', 'kd_tree', 'brute'}", + "default_value": "'auto'", + "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force." + }, + "type": { + "kind": "EnumType", + "values": ["kd_tree", "auto", "brute", "ball_tree"] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/RadiusNeighborsRegressor/__init__/leaf_size", + "name": "leaf_size", + "qname": "sklearn.neighbors._regression.RadiusNeighborsRegressor.__init__.leaf_size", + "default_value": "30", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "30", + "description": "Leaf size passed to BallTree or KDTree. This can affect the\nspeed of the construction and query, as well as the memory\nrequired to store the tree. The optimal value depends on the\nnature of the problem." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/RadiusNeighborsRegressor/__init__/p", + "name": "p", + "qname": "sklearn.neighbors._regression.RadiusNeighborsRegressor.__init__.p", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Power parameter for the Minkowski metric. When p = 1, this is\nequivalent to using manhattan_distance (l1), and euclidean_distance\n(l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/RadiusNeighborsRegressor/__init__/metric", + "name": "metric", + "qname": "sklearn.neighbors._regression.RadiusNeighborsRegressor.__init__.metric", + "default_value": "'minkowski'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "'minkowski'", + "description": "the distance metric to use for the tree. The default metric is\nminkowski, and with p=2 is equivalent to the standard Euclidean\nmetric. See the documentation of :class:`DistanceMetric` for a\nlist of available metrics.\nIf metric is \"precomputed\", X is assumed to be a distance matrix and\nmust be square during fit. X may be a :term:`sparse graph`,\nin which case only \"nonzero\" elements may be considered neighbors." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/RadiusNeighborsRegressor/__init__/metric_params", + "name": "metric_params", + "qname": "sklearn.neighbors._regression.RadiusNeighborsRegressor.__init__.metric_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Additional keyword arguments for the metric function." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/RadiusNeighborsRegressor/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.neighbors._regression.RadiusNeighborsRegressor.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of parallel jobs to run for neighbors search.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/RadiusNeighborsRegressor/__init__/kwargs", + "name": "kwargs", + "qname": "sklearn.neighbors._regression.RadiusNeighborsRegressor.__init__.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Regression based on neighbors within a fixed radius.\n\nThe target is predicted by local interpolation of the targets\nassociated of the nearest neighbors in the training set.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.9", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, radius=1.0, *, weights='uniform',\n algorithm='auto', leaf_size=30,\n p=2, metric='minkowski', metric_params=None, n_jobs=None,\n **kwargs):\n super().__init__(\n radius=radius,\n algorithm=algorithm,\n leaf_size=leaf_size,\n p=p, metric=metric, metric_params=metric_params,\n n_jobs=n_jobs, **kwargs)\n self.weights = _check_weights(weights)" + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/RadiusNeighborsRegressor/fit", + "name": "fit", + "qname": "sklearn.neighbors._regression.RadiusNeighborsRegressor.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._regression/RadiusNeighborsRegressor/fit/self", + "name": "self", + "qname": "sklearn.neighbors._regression.RadiusNeighborsRegressor.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/RadiusNeighborsRegressor/fit/X", + "name": "X", + "qname": "sklearn.neighbors._regression.RadiusNeighborsRegressor.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/RadiusNeighborsRegressor/fit/y", + "name": "y", + "qname": "sklearn.neighbors._regression.RadiusNeighborsRegressor.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples,) or (n_samples, n_outputs)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the radius neighbors regressor from the training dataset.", + "docstring": "Fit the radius neighbors regressor from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'\n Training data.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\nReturns\n-------\nself : RadiusNeighborsRegressor\n The fitted radius neighbors regressor.", + "code": " def fit(self, X, y):\n \"\"\"Fit the radius neighbors regressor from the training dataset.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n (n_samples, n_samples) if metric='precomputed'\n Training data.\n\n y : {array-like, sparse matrix} of shape (n_samples,) or \\\n (n_samples, n_outputs)\n Target values.\n\n Returns\n -------\n self : RadiusNeighborsRegressor\n The fitted radius neighbors regressor.\n \"\"\"\n return self._fit(X, y)" + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/RadiusNeighborsRegressor/predict", + "name": "predict", + "qname": "sklearn.neighbors._regression.RadiusNeighborsRegressor.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._regression/RadiusNeighborsRegressor/predict/self", + "name": "self", + "qname": "sklearn.neighbors._regression.RadiusNeighborsRegressor.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._regression/RadiusNeighborsRegressor/predict/X", + "name": "X", + "qname": "sklearn.neighbors._regression.RadiusNeighborsRegressor.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'", + "default_value": "", + "description": "Test samples." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_queries, n_features)" + }, + { + "kind": "NamedType", + "name": "(n_queries, n_indexed) if metric == 'precomputed'" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict the target for the provided data", + "docstring": "Predict the target for the provided data\n\nParameters\n----------\nX : array-like of shape (n_queries, n_features), or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\nReturns\n-------\ny : ndarray of shape (n_queries,) or (n_queries, n_outputs), dtype=double\n Target values.", + "code": " def predict(self, X):\n \"\"\"Predict the target for the provided data\n\n Parameters\n ----------\n X : array-like of shape (n_queries, n_features), \\\n or (n_queries, n_indexed) if metric == 'precomputed'\n Test samples.\n\n Returns\n -------\n y : ndarray of shape (n_queries,) or (n_queries, n_outputs), \\\n dtype=double\n Target values.\n \"\"\"\n X = check_array(X, accept_sparse='csr')\n\n neigh_dist, neigh_ind = self.radius_neighbors(X)\n\n weights = _get_weights(neigh_dist, self.weights)\n\n _y = self._y\n if _y.ndim == 1:\n _y = _y.reshape((-1, 1))\n\n empty_obs = np.full_like(_y[0], np.nan)\n\n if weights is None:\n y_pred = np.array([np.mean(_y[ind, :], axis=0)\n if len(ind) else empty_obs\n for (i, ind) in enumerate(neigh_ind)])\n\n else:\n y_pred = np.array([np.average(_y[ind, :], axis=0,\n weights=weights[i])\n if len(ind) else empty_obs\n for (i, ind) in enumerate(neigh_ind)])\n\n if np.any(np.isnan(y_pred)):\n empty_warning_msg = (\"One or more samples have no neighbors \"\n \"within specified radius; predicting NaN.\")\n warnings.warn(empty_warning_msg)\n\n if self._y.ndim == 1:\n y_pred = y_pred.ravel()\n\n return y_pred" + }, + { + "id": "scikit-learn/sklearn.neighbors._unsupervised/NearestNeighbors/__init__", + "name": "__init__", + "qname": "sklearn.neighbors._unsupervised.NearestNeighbors.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._unsupervised/NearestNeighbors/__init__/self", + "name": "self", + "qname": "sklearn.neighbors._unsupervised.NearestNeighbors.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._unsupervised/NearestNeighbors/__init__/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.neighbors._unsupervised.NearestNeighbors.__init__.n_neighbors", + "default_value": "5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "5", + "description": "Number of neighbors to use by default for :meth:`kneighbors` queries." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._unsupervised/NearestNeighbors/__init__/radius", + "name": "radius", + "qname": "sklearn.neighbors._unsupervised.NearestNeighbors.__init__.radius", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Range of parameter space to use by default for :meth:`radius_neighbors`\nqueries." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._unsupervised/NearestNeighbors/__init__/algorithm", + "name": "algorithm", + "qname": "sklearn.neighbors._unsupervised.NearestNeighbors.__init__.algorithm", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'auto', 'ball_tree', 'kd_tree', 'brute'}", + "default_value": "'auto'", + "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force." + }, + "type": { + "kind": "EnumType", + "values": ["kd_tree", "auto", "brute", "ball_tree"] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._unsupervised/NearestNeighbors/__init__/leaf_size", + "name": "leaf_size", + "qname": "sklearn.neighbors._unsupervised.NearestNeighbors.__init__.leaf_size", + "default_value": "30", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "30", + "description": "Leaf size passed to BallTree or KDTree. This can affect the\nspeed of the construction and query, as well as the memory\nrequired to store the tree. The optimal value depends on the\nnature of the problem." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._unsupervised/NearestNeighbors/__init__/metric", + "name": "metric", + "qname": "sklearn.neighbors._unsupervised.NearestNeighbors.__init__.metric", + "default_value": "'minkowski'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "'minkowski'", + "description": "the distance metric to use for the tree. The default metric is\nminkowski, and with p=2 is equivalent to the standard Euclidean\nmetric. See the documentation of :class:`DistanceMetric` for a\nlist of available metrics.\nIf metric is \"precomputed\", X is assumed to be a distance matrix and\nmust be square during fit. X may be a :term:`sparse graph`,\nin which case only \"nonzero\" elements may be considered neighbors." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._unsupervised/NearestNeighbors/__init__/p", + "name": "p", + "qname": "sklearn.neighbors._unsupervised.NearestNeighbors.__init__.p", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Parameter for the Minkowski metric from\nsklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\nequivalent to using manhattan_distance (l1), and euclidean_distance\n(l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._unsupervised/NearestNeighbors/__init__/metric_params", + "name": "metric_params", + "qname": "sklearn.neighbors._unsupervised.NearestNeighbors.__init__.metric_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Additional keyword arguments for the metric function." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.neighbors._unsupervised/NearestNeighbors/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.neighbors._unsupervised.NearestNeighbors.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of parallel jobs to run for neighbors search.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Unsupervised learner for implementing neighbor searches.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.9", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, n_neighbors=5, radius=1.0,\n algorithm='auto', leaf_size=30, metric='minkowski',\n p=2, metric_params=None, n_jobs=None):\n super().__init__(\n n_neighbors=n_neighbors,\n radius=radius,\n algorithm=algorithm,\n leaf_size=leaf_size, metric=metric, p=p,\n metric_params=metric_params, n_jobs=n_jobs)" + }, + { + "id": "scikit-learn/sklearn.neighbors._unsupervised/NearestNeighbors/fit", + "name": "fit", + "qname": "sklearn.neighbors._unsupervised.NearestNeighbors.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors._unsupervised/NearestNeighbors/fit/self", + "name": "self", + "qname": "sklearn.neighbors._unsupervised.NearestNeighbors.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors._unsupervised/NearestNeighbors/fit/X", + "name": "X", + "qname": "sklearn.neighbors._unsupervised.NearestNeighbors.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neighbors._unsupervised/NearestNeighbors/fit/y", + "name": "y", + "qname": "sklearn.neighbors._unsupervised.NearestNeighbors.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "Not used, present for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the nearest neighbors estimator from the training dataset.", + "docstring": "Fit the nearest neighbors estimator from the training dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples) if metric='precomputed'\n Training data.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : NearestNeighbors\n The fitted nearest neighbors estimator.", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the nearest neighbors estimator from the training dataset.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n (n_samples, n_samples) if metric='precomputed'\n Training data.\n\n y : Ignored\n Not used, present for API consistency by convention.\n\n Returns\n -------\n self : NearestNeighbors\n The fitted nearest neighbors estimator.\n \"\"\"\n return self._fit(X)" + }, + { + "id": "scikit-learn/sklearn.neighbors.setup/configuration", + "name": "configuration", + "qname": "sklearn.neighbors.setup.configuration", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neighbors.setup/configuration/parent_package", + "name": "parent_package", + "qname": "sklearn.neighbors.setup.configuration.parent_package", + "default_value": "''", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neighbors.setup/configuration/top_path", + "name": "top_path", + "qname": "sklearn.neighbors.setup.configuration.top_path", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def configuration(parent_package='', top_path=None):\n import numpy\n from numpy.distutils.misc_util import Configuration\n\n config = Configuration('neighbors', parent_package, top_path)\n libraries = []\n if os.name == 'posix':\n libraries.append('m')\n\n config.add_extension('_ball_tree',\n sources=['_ball_tree.pyx'],\n include_dirs=[numpy.get_include()],\n libraries=libraries)\n\n config.add_extension('_kd_tree',\n sources=['_kd_tree.pyx'],\n include_dirs=[numpy.get_include()],\n libraries=libraries)\n\n config.add_extension('_dist_metrics',\n sources=['_dist_metrics.pyx'],\n include_dirs=[numpy.get_include(),\n os.path.join(numpy.get_include(),\n 'numpy')],\n libraries=libraries)\n\n config.add_extension('_typedefs',\n sources=['_typedefs.pyx'],\n include_dirs=[numpy.get_include()],\n libraries=libraries)\n config.add_extension(\"_quad_tree\",\n sources=[\"_quad_tree.pyx\"],\n include_dirs=[numpy.get_include()],\n libraries=libraries)\n\n config.add_subpackage('tests')\n\n return config" + }, + { + "id": "scikit-learn/sklearn.neural_network._base/binary_log_loss", + "name": "binary_log_loss", + "qname": "sklearn.neural_network._base.binary_log_loss", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._base/binary_log_loss/y_true", + "name": "y_true", + "qname": "sklearn.neural_network._base.binary_log_loss.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like or label indicator matrix", + "default_value": "", + "description": "Ground truth (correct) labels." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "label indicator matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._base/binary_log_loss/y_prob", + "name": "y_prob", + "qname": "sklearn.neural_network._base.binary_log_loss.y_prob", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of float, shape = (n_samples, 1)", + "default_value": "", + "description": "Predicted probabilities, as returned by a classifier's\npredict_proba method." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of float" + }, + { + "kind": "NamedType", + "name": "shape = (n_samples, 1)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute binary logistic loss for classification.\n\nThis is identical to log_loss in binary classification case,\nbut is kept for its use in multilabel case.", + "docstring": "Compute binary logistic loss for classification.\n\nThis is identical to log_loss in binary classification case,\nbut is kept for its use in multilabel case.\n\nParameters\n----------\ny_true : array-like or label indicator matrix\n Ground truth (correct) labels.\n\ny_prob : array-like of float, shape = (n_samples, 1)\n Predicted probabilities, as returned by a classifier's\n predict_proba method.\n\nReturns\n-------\nloss : float\n The degree to which the samples are correctly predicted.", + "code": "def binary_log_loss(y_true, y_prob):\n \"\"\"Compute binary logistic loss for classification.\n\n This is identical to log_loss in binary classification case,\n but is kept for its use in multilabel case.\n\n Parameters\n ----------\n y_true : array-like or label indicator matrix\n Ground truth (correct) labels.\n\n y_prob : array-like of float, shape = (n_samples, 1)\n Predicted probabilities, as returned by a classifier's\n predict_proba method.\n\n Returns\n -------\n loss : float\n The degree to which the samples are correctly predicted.\n \"\"\"\n eps = np.finfo(y_prob.dtype).eps\n y_prob = np.clip(y_prob, eps, 1 - eps)\n return -(xlogy(y_true, y_prob).sum() +\n xlogy(1 - y_true, 1 - y_prob).sum()) / y_prob.shape[0]" + }, + { + "id": "scikit-learn/sklearn.neural_network._base/inplace_identity", + "name": "inplace_identity", + "qname": "sklearn.neural_network._base.inplace_identity", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._base/inplace_identity/X", + "name": "X", + "qname": "sklearn.neural_network._base.inplace_identity.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "Data, where n_samples is the number of samples\nand n_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Simply leave the input array unchanged.", + "docstring": "Simply leave the input array unchanged.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Data, where n_samples is the number of samples\n and n_features is the number of features.", + "code": "def inplace_identity(X):\n \"\"\"Simply leave the input array unchanged.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n Data, where n_samples is the number of samples\n and n_features is the number of features.\n \"\"\"" + }, + { + "id": "scikit-learn/sklearn.neural_network._base/inplace_identity_derivative", + "name": "inplace_identity_derivative", + "qname": "sklearn.neural_network._base.inplace_identity_derivative", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._base/inplace_identity_derivative/Z", + "name": "Z", + "qname": "sklearn.neural_network._base.inplace_identity_derivative.Z", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "The data which was output from the identity activation function during\nthe forward pass." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._base/inplace_identity_derivative/delta", + "name": "delta", + "qname": "sklearn.neural_network._base.inplace_identity_derivative.delta", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like}, shape (n_samples, n_features)", + "default_value": "", + "description": "The backpropagated error signal to be modified inplace." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Apply the derivative of the identity function: do nothing.", + "docstring": "Apply the derivative of the identity function: do nothing.\n\nParameters\n----------\nZ : {array-like, sparse matrix}, shape (n_samples, n_features)\n The data which was output from the identity activation function during\n the forward pass.\n\ndelta : {array-like}, shape (n_samples, n_features)\n The backpropagated error signal to be modified inplace.", + "code": "def inplace_identity_derivative(Z, delta):\n \"\"\"Apply the derivative of the identity function: do nothing.\n\n Parameters\n ----------\n Z : {array-like, sparse matrix}, shape (n_samples, n_features)\n The data which was output from the identity activation function during\n the forward pass.\n\n delta : {array-like}, shape (n_samples, n_features)\n The backpropagated error signal to be modified inplace.\n \"\"\"" + }, + { + "id": "scikit-learn/sklearn.neural_network._base/inplace_logistic", + "name": "inplace_logistic", + "qname": "sklearn.neural_network._base.inplace_logistic", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._base/inplace_logistic/X", + "name": "X", + "qname": "sklearn.neural_network._base.inplace_logistic.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "The input data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the logistic function inplace.", + "docstring": "Compute the logistic function inplace.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data.", + "code": "def inplace_logistic(X):\n \"\"\"Compute the logistic function inplace.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data.\n \"\"\"\n logistic_sigmoid(X, out=X)" + }, + { + "id": "scikit-learn/sklearn.neural_network._base/inplace_logistic_derivative", + "name": "inplace_logistic_derivative", + "qname": "sklearn.neural_network._base.inplace_logistic_derivative", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._base/inplace_logistic_derivative/Z", + "name": "Z", + "qname": "sklearn.neural_network._base.inplace_logistic_derivative.Z", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "The data which was output from the logistic activation function during\nthe forward pass." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._base/inplace_logistic_derivative/delta", + "name": "delta", + "qname": "sklearn.neural_network._base.inplace_logistic_derivative.delta", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like}, shape (n_samples, n_features)", + "default_value": "", + "description": "The backpropagated error signal to be modified inplace." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Apply the derivative of the logistic sigmoid function.\n\nIt exploits the fact that the derivative is a simple function of the output\nvalue from logistic function.", + "docstring": "Apply the derivative of the logistic sigmoid function.\n\nIt exploits the fact that the derivative is a simple function of the output\nvalue from logistic function.\n\nParameters\n----------\nZ : {array-like, sparse matrix}, shape (n_samples, n_features)\n The data which was output from the logistic activation function during\n the forward pass.\n\ndelta : {array-like}, shape (n_samples, n_features)\n The backpropagated error signal to be modified inplace.", + "code": "def inplace_logistic_derivative(Z, delta):\n \"\"\"Apply the derivative of the logistic sigmoid function.\n\n It exploits the fact that the derivative is a simple function of the output\n value from logistic function.\n\n Parameters\n ----------\n Z : {array-like, sparse matrix}, shape (n_samples, n_features)\n The data which was output from the logistic activation function during\n the forward pass.\n\n delta : {array-like}, shape (n_samples, n_features)\n The backpropagated error signal to be modified inplace.\n \"\"\"\n delta *= Z\n delta *= (1 - Z)" + }, + { + "id": "scikit-learn/sklearn.neural_network._base/inplace_relu", + "name": "inplace_relu", + "qname": "sklearn.neural_network._base.inplace_relu", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._base/inplace_relu/X", + "name": "X", + "qname": "sklearn.neural_network._base.inplace_relu.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "The input data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the rectified linear unit function inplace.", + "docstring": "Compute the rectified linear unit function inplace.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data.", + "code": "def inplace_relu(X):\n \"\"\"Compute the rectified linear unit function inplace.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data.\n \"\"\"\n np.maximum(X, 0, out=X)" + }, + { + "id": "scikit-learn/sklearn.neural_network._base/inplace_relu_derivative", + "name": "inplace_relu_derivative", + "qname": "sklearn.neural_network._base.inplace_relu_derivative", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._base/inplace_relu_derivative/Z", + "name": "Z", + "qname": "sklearn.neural_network._base.inplace_relu_derivative.Z", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "The data which was output from the rectified linear units activation\nfunction during the forward pass." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._base/inplace_relu_derivative/delta", + "name": "delta", + "qname": "sklearn.neural_network._base.inplace_relu_derivative.delta", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like}, shape (n_samples, n_features)", + "default_value": "", + "description": "The backpropagated error signal to be modified inplace." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Apply the derivative of the relu function.\n\nIt exploits the fact that the derivative is a simple function of the output\nvalue from rectified linear units activation function.", + "docstring": "Apply the derivative of the relu function.\n\nIt exploits the fact that the derivative is a simple function of the output\nvalue from rectified linear units activation function.\n\nParameters\n----------\nZ : {array-like, sparse matrix}, shape (n_samples, n_features)\n The data which was output from the rectified linear units activation\n function during the forward pass.\n\ndelta : {array-like}, shape (n_samples, n_features)\n The backpropagated error signal to be modified inplace.", + "code": "def inplace_relu_derivative(Z, delta):\n \"\"\"Apply the derivative of the relu function.\n\n It exploits the fact that the derivative is a simple function of the output\n value from rectified linear units activation function.\n\n Parameters\n ----------\n Z : {array-like, sparse matrix}, shape (n_samples, n_features)\n The data which was output from the rectified linear units activation\n function during the forward pass.\n\n delta : {array-like}, shape (n_samples, n_features)\n The backpropagated error signal to be modified inplace.\n \"\"\"\n delta[Z == 0] = 0" + }, + { + "id": "scikit-learn/sklearn.neural_network._base/inplace_softmax", + "name": "inplace_softmax", + "qname": "sklearn.neural_network._base.inplace_softmax", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._base/inplace_softmax/X", + "name": "X", + "qname": "sklearn.neural_network._base.inplace_softmax.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "The input data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the K-way softmax function inplace.", + "docstring": "Compute the K-way softmax function inplace.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data.", + "code": "def inplace_softmax(X):\n \"\"\"Compute the K-way softmax function inplace.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data.\n \"\"\"\n tmp = X - X.max(axis=1)[:, np.newaxis]\n np.exp(tmp, out=X)\n X /= X.sum(axis=1)[:, np.newaxis]" + }, + { + "id": "scikit-learn/sklearn.neural_network._base/inplace_tanh", + "name": "inplace_tanh", + "qname": "sklearn.neural_network._base.inplace_tanh", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._base/inplace_tanh/X", + "name": "X", + "qname": "sklearn.neural_network._base.inplace_tanh.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "The input data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the hyperbolic tan function inplace.", + "docstring": "Compute the hyperbolic tan function inplace.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data.", + "code": "def inplace_tanh(X):\n \"\"\"Compute the hyperbolic tan function inplace.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}, shape (n_samples, n_features)\n The input data.\n \"\"\"\n np.tanh(X, out=X)" + }, + { + "id": "scikit-learn/sklearn.neural_network._base/inplace_tanh_derivative", + "name": "inplace_tanh_derivative", + "qname": "sklearn.neural_network._base.inplace_tanh_derivative", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._base/inplace_tanh_derivative/Z", + "name": "Z", + "qname": "sklearn.neural_network._base.inplace_tanh_derivative.Z", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}, shape (n_samples, n_features)", + "default_value": "", + "description": "The data which was output from the hyperbolic tangent activation\nfunction during the forward pass." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._base/inplace_tanh_derivative/delta", + "name": "delta", + "qname": "sklearn.neural_network._base.inplace_tanh_derivative.delta", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like}, shape (n_samples, n_features)", + "default_value": "", + "description": "The backpropagated error signal to be modified inplace." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Apply the derivative of the hyperbolic tanh function.\n\nIt exploits the fact that the derivative is a simple function of the output\nvalue from hyperbolic tangent.", + "docstring": "Apply the derivative of the hyperbolic tanh function.\n\nIt exploits the fact that the derivative is a simple function of the output\nvalue from hyperbolic tangent.\n\nParameters\n----------\nZ : {array-like, sparse matrix}, shape (n_samples, n_features)\n The data which was output from the hyperbolic tangent activation\n function during the forward pass.\n\ndelta : {array-like}, shape (n_samples, n_features)\n The backpropagated error signal to be modified inplace.", + "code": "def inplace_tanh_derivative(Z, delta):\n \"\"\"Apply the derivative of the hyperbolic tanh function.\n\n It exploits the fact that the derivative is a simple function of the output\n value from hyperbolic tangent.\n\n Parameters\n ----------\n Z : {array-like, sparse matrix}, shape (n_samples, n_features)\n The data which was output from the hyperbolic tangent activation\n function during the forward pass.\n\n delta : {array-like}, shape (n_samples, n_features)\n The backpropagated error signal to be modified inplace.\n \"\"\"\n delta *= (1 - Z ** 2)" + }, + { + "id": "scikit-learn/sklearn.neural_network._base/log_loss", + "name": "log_loss", + "qname": "sklearn.neural_network._base.log_loss", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._base/log_loss/y_true", + "name": "y_true", + "qname": "sklearn.neural_network._base.log_loss.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like or label indicator matrix", + "default_value": "", + "description": "Ground truth (correct) labels." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "label indicator matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._base/log_loss/y_prob", + "name": "y_prob", + "qname": "sklearn.neural_network._base.log_loss.y_prob", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of float, shape = (n_samples, n_classes)", + "default_value": "", + "description": "Predicted probabilities, as returned by a classifier's\npredict_proba method." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of float" + }, + { + "kind": "NamedType", + "name": "shape = (n_samples, n_classes)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute Logistic loss for classification.", + "docstring": "Compute Logistic loss for classification.\n\nParameters\n----------\ny_true : array-like or label indicator matrix\n Ground truth (correct) labels.\n\ny_prob : array-like of float, shape = (n_samples, n_classes)\n Predicted probabilities, as returned by a classifier's\n predict_proba method.\n\nReturns\n-------\nloss : float\n The degree to which the samples are correctly predicted.", + "code": "def log_loss(y_true, y_prob):\n \"\"\"Compute Logistic loss for classification.\n\n Parameters\n ----------\n y_true : array-like or label indicator matrix\n Ground truth (correct) labels.\n\n y_prob : array-like of float, shape = (n_samples, n_classes)\n Predicted probabilities, as returned by a classifier's\n predict_proba method.\n\n Returns\n -------\n loss : float\n The degree to which the samples are correctly predicted.\n \"\"\"\n eps = np.finfo(y_prob.dtype).eps\n y_prob = np.clip(y_prob, eps, 1 - eps)\n if y_prob.shape[1] == 1:\n y_prob = np.append(1 - y_prob, y_prob, axis=1)\n\n if y_true.shape[1] == 1:\n y_true = np.append(1 - y_true, y_true, axis=1)\n\n return - xlogy(y_true, y_prob).sum() / y_prob.shape[0]" + }, + { + "id": "scikit-learn/sklearn.neural_network._base/squared_loss", + "name": "squared_loss", + "qname": "sklearn.neural_network._base.squared_loss", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._base/squared_loss/y_true", + "name": "y_true", + "qname": "sklearn.neural_network._base.squared_loss.y_true", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like or label indicator matrix", + "default_value": "", + "description": "Ground truth (correct) values." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "label indicator matrix" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._base/squared_loss/y_pred", + "name": "y_pred", + "qname": "sklearn.neural_network._base.squared_loss.y_pred", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like or label indicator matrix", + "default_value": "", + "description": "Predicted values, as returned by a regression estimator." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "label indicator matrix" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the squared loss for regression.", + "docstring": "Compute the squared loss for regression.\n\nParameters\n----------\ny_true : array-like or label indicator matrix\n Ground truth (correct) values.\n\ny_pred : array-like or label indicator matrix\n Predicted values, as returned by a regression estimator.\n\nReturns\n-------\nloss : float\n The degree to which the samples are correctly predicted.", + "code": "def squared_loss(y_true, y_pred):\n \"\"\"Compute the squared loss for regression.\n\n Parameters\n ----------\n y_true : array-like or label indicator matrix\n Ground truth (correct) values.\n\n y_pred : array-like or label indicator matrix\n Predicted values, as returned by a regression estimator.\n\n Returns\n -------\n loss : float\n The degree to which the samples are correctly predicted.\n \"\"\"\n return ((y_true - y_pred) ** 2).mean() / 2" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__", + "name": "__init__", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/hidden_layer_sizes", + "name": "hidden_layer_sizes", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.hidden_layer_sizes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/activation", + "name": "activation", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.activation", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/solver", + "name": "solver", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.solver", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/alpha", + "name": "alpha", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.alpha", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/batch_size", + "name": "batch_size", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.batch_size", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/learning_rate", + "name": "learning_rate", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.learning_rate", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/learning_rate_init", + "name": "learning_rate_init", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.learning_rate_init", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/power_t", + "name": "power_t", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.power_t", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.max_iter", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/loss", + "name": "loss", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.loss", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/shuffle", + "name": "shuffle", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.shuffle", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/random_state", + "name": "random_state", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/tol", + "name": "tol", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.tol", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/verbose", + "name": "verbose", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.verbose", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.warm_start", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/momentum", + "name": "momentum", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.momentum", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/nesterovs_momentum", + "name": "nesterovs_momentum", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.nesterovs_momentum", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/early_stopping", + "name": "early_stopping", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.early_stopping", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/validation_fraction", + "name": "validation_fraction", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.validation_fraction", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/beta_1", + "name": "beta_1", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.beta_1", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/beta_2", + "name": "beta_2", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.beta_2", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/epsilon", + "name": "epsilon", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.epsilon", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/n_iter_no_change", + "name": "n_iter_no_change", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.n_iter_no_change", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/__init__/max_fun", + "name": "max_fun", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.__init__.max_fun", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base class for MLP classification and regression.\n\nWarning: This class should not be used directly.\nUse derived classes instead.\n\n.. versionadded:: 0.18", + "docstring": "", + "code": " @abstractmethod\n def __init__(self, hidden_layer_sizes, activation, solver,\n alpha, batch_size, learning_rate, learning_rate_init, power_t,\n max_iter, loss, shuffle, random_state, tol, verbose,\n warm_start, momentum, nesterovs_momentum, early_stopping,\n validation_fraction, beta_1, beta_2, epsilon,\n n_iter_no_change, max_fun):\n self.activation = activation\n self.solver = solver\n self.alpha = alpha\n self.batch_size = batch_size\n self.learning_rate = learning_rate\n self.learning_rate_init = learning_rate_init\n self.power_t = power_t\n self.max_iter = max_iter\n self.loss = loss\n self.hidden_layer_sizes = hidden_layer_sizes\n self.shuffle = shuffle\n self.random_state = random_state\n self.tol = tol\n self.verbose = verbose\n self.warm_start = warm_start\n self.momentum = momentum\n self.nesterovs_momentum = nesterovs_momentum\n self.early_stopping = early_stopping\n self.validation_fraction = validation_fraction\n self.beta_1 = beta_1\n self.beta_2 = beta_2\n self.epsilon = epsilon\n self.n_iter_no_change = n_iter_no_change\n self.max_fun = max_fun" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_backprop", + "name": "_backprop", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._backprop", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_backprop/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._backprop.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_backprop/X", + "name": "X", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._backprop.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_backprop/y", + "name": "y", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._backprop.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "The target values." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_backprop/activations", + "name": "activations", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._backprop.activations", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list, length = n_layers - 1", + "default_value": "", + "description": "The ith element of the list holds the values of the ith layer." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "list" + }, + { + "kind": "NamedType", + "name": "length = n_layers - 1" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_backprop/deltas", + "name": "deltas", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._backprop.deltas", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list, length = n_layers - 1", + "default_value": "", + "description": "The ith element of the list holds the difference between the\nactivations of the i + 1 layer and the backpropagated error.\nMore specifically, deltas are gradients of loss with respect to z\nin each layer, where z = wx + b is the value of a particular layer\nbefore passing through the activation function" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "list" + }, + { + "kind": "NamedType", + "name": "length = n_layers - 1" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_backprop/coef_grads", + "name": "coef_grads", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._backprop.coef_grads", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list, length = n_layers - 1", + "default_value": "", + "description": "The ith element contains the amount of change used to update the\ncoefficient parameters of the ith layer in an iteration." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "list" + }, + { + "kind": "NamedType", + "name": "length = n_layers - 1" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_backprop/intercept_grads", + "name": "intercept_grads", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._backprop.intercept_grads", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list, length = n_layers - 1", + "default_value": "", + "description": "The ith element contains the amount of change used to update the\nintercept parameters of the ith layer in an iteration." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "list" + }, + { + "kind": "NamedType", + "name": "length = n_layers - 1" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the MLP loss function and its corresponding derivatives\nwith respect to each parameter: weights and bias vectors.", + "docstring": "Compute the MLP loss function and its corresponding derivatives\nwith respect to each parameter: weights and bias vectors.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\ny : ndarray of shape (n_samples,)\n The target values.\n\nactivations : list, length = n_layers - 1\n The ith element of the list holds the values of the ith layer.\n\ndeltas : list, length = n_layers - 1\n The ith element of the list holds the difference between the\n activations of the i + 1 layer and the backpropagated error.\n More specifically, deltas are gradients of loss with respect to z\n in each layer, where z = wx + b is the value of a particular layer\n before passing through the activation function\n\ncoef_grads : list, length = n_layers - 1\n The ith element contains the amount of change used to update the\n coefficient parameters of the ith layer in an iteration.\n\nintercept_grads : list, length = n_layers - 1\n The ith element contains the amount of change used to update the\n intercept parameters of the ith layer in an iteration.\n\nReturns\n-------\nloss : float\ncoef_grads : list, length = n_layers - 1\nintercept_grads : list, length = n_layers - 1", + "code": " def _backprop(self, X, y, activations, deltas, coef_grads,\n intercept_grads):\n \"\"\"Compute the MLP loss function and its corresponding derivatives\n with respect to each parameter: weights and bias vectors.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\n y : ndarray of shape (n_samples,)\n The target values.\n\n activations : list, length = n_layers - 1\n The ith element of the list holds the values of the ith layer.\n\n deltas : list, length = n_layers - 1\n The ith element of the list holds the difference between the\n activations of the i + 1 layer and the backpropagated error.\n More specifically, deltas are gradients of loss with respect to z\n in each layer, where z = wx + b is the value of a particular layer\n before passing through the activation function\n\n coef_grads : list, length = n_layers - 1\n The ith element contains the amount of change used to update the\n coefficient parameters of the ith layer in an iteration.\n\n intercept_grads : list, length = n_layers - 1\n The ith element contains the amount of change used to update the\n intercept parameters of the ith layer in an iteration.\n\n Returns\n -------\n loss : float\n coef_grads : list, length = n_layers - 1\n intercept_grads : list, length = n_layers - 1\n \"\"\"\n n_samples = X.shape[0]\n\n # Forward propagate\n activations = self._forward_pass(activations)\n\n # Get loss\n loss_func_name = self.loss\n if loss_func_name == 'log_loss' and self.out_activation_ == 'logistic':\n loss_func_name = 'binary_log_loss'\n loss = LOSS_FUNCTIONS[loss_func_name](y, activations[-1])\n # Add L2 regularization term to loss\n values = 0\n for s in self.coefs_:\n s = s.ravel()\n values += np.dot(s, s)\n loss += (0.5 * self.alpha) * values / n_samples\n\n # Backward propagate\n last = self.n_layers_ - 2\n\n # The calculation of delta[last] here works with following\n # combinations of output activation and loss function:\n # sigmoid and binary cross entropy, softmax and categorical cross\n # entropy, and identity with squared loss\n deltas[last] = activations[-1] - y\n\n # Compute gradient for the last layer\n self._compute_loss_grad(\n last, n_samples, activations, deltas, coef_grads, intercept_grads)\n\n inplace_derivative = DERIVATIVES[self.activation]\n # Iterate over the hidden layers\n for i in range(self.n_layers_ - 2, 0, -1):\n deltas[i - 1] = safe_sparse_dot(deltas[i], self.coefs_[i].T)\n inplace_derivative(activations[i], deltas[i - 1])\n\n self._compute_loss_grad(\n i - 1, n_samples, activations, deltas, coef_grads,\n intercept_grads)\n\n return loss, coef_grads, intercept_grads" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_compute_loss_grad", + "name": "_compute_loss_grad", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._compute_loss_grad", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_compute_loss_grad/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._compute_loss_grad.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_compute_loss_grad/layer", + "name": "layer", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._compute_loss_grad.layer", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_compute_loss_grad/n_samples", + "name": "n_samples", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._compute_loss_grad.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_compute_loss_grad/activations", + "name": "activations", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._compute_loss_grad.activations", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_compute_loss_grad/deltas", + "name": "deltas", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._compute_loss_grad.deltas", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_compute_loss_grad/coef_grads", + "name": "coef_grads", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._compute_loss_grad.coef_grads", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_compute_loss_grad/intercept_grads", + "name": "intercept_grads", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._compute_loss_grad.intercept_grads", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the gradient of loss with respect to coefs and intercept for\nspecified layer.\n\nThis function does backpropagation for the specified one layer.", + "docstring": "Compute the gradient of loss with respect to coefs and intercept for\nspecified layer.\n\nThis function does backpropagation for the specified one layer.", + "code": " def _compute_loss_grad(self, layer, n_samples, activations, deltas,\n coef_grads, intercept_grads):\n \"\"\"Compute the gradient of loss with respect to coefs and intercept for\n specified layer.\n\n This function does backpropagation for the specified one layer.\n \"\"\"\n coef_grads[layer] = safe_sparse_dot(activations[layer].T,\n deltas[layer])\n coef_grads[layer] += (self.alpha * self.coefs_[layer])\n coef_grads[layer] /= n_samples\n\n intercept_grads[layer] = np.mean(deltas[layer], 0)" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit", + "name": "_fit", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit/X", + "name": "X", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit/y", + "name": "y", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit/incremental", + "name": "incremental", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._fit.incremental", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _fit(self, X, y, incremental=False):\n # Make sure self.hidden_layer_sizes is a list\n hidden_layer_sizes = self.hidden_layer_sizes\n if not hasattr(hidden_layer_sizes, \"__iter__\"):\n hidden_layer_sizes = [hidden_layer_sizes]\n hidden_layer_sizes = list(hidden_layer_sizes)\n\n # Validate input parameters.\n self._validate_hyperparameters()\n if np.any(np.array(hidden_layer_sizes) <= 0):\n raise ValueError(\"hidden_layer_sizes must be > 0, got %s.\" %\n hidden_layer_sizes)\n first_pass = (not hasattr(self, 'coefs_') or\n (not self.warm_start and not incremental))\n\n X, y = self._validate_input(X, y, incremental, reset=first_pass)\n\n n_samples, n_features = X.shape\n\n # Ensure y is 2D\n if y.ndim == 1:\n y = y.reshape((-1, 1))\n\n self.n_outputs_ = y.shape[1]\n\n layer_units = ([n_features] + hidden_layer_sizes +\n [self.n_outputs_])\n\n # check random state\n self._random_state = check_random_state(self.random_state)\n\n if first_pass:\n # First time training the model\n self._initialize(y, layer_units, X.dtype)\n\n # Initialize lists\n activations = [X] + [None] * (len(layer_units) - 1)\n deltas = [None] * (len(activations) - 1)\n\n coef_grads = [np.empty((n_fan_in_, n_fan_out_), dtype=X.dtype)\n for n_fan_in_,\n n_fan_out_ in zip(layer_units[:-1],\n layer_units[1:])]\n\n intercept_grads = [np.empty(n_fan_out_, dtype=X.dtype)\n for n_fan_out_ in\n layer_units[1:]]\n\n # Run the Stochastic optimization solver\n if self.solver in _STOCHASTIC_SOLVERS:\n self._fit_stochastic(X, y, activations, deltas, coef_grads,\n intercept_grads, layer_units, incremental)\n\n # Run the LBFGS solver\n elif self.solver == 'lbfgs':\n self._fit_lbfgs(X, y, activations, deltas, coef_grads,\n intercept_grads, layer_units)\n return self" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit_lbfgs", + "name": "_fit_lbfgs", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._fit_lbfgs", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit_lbfgs/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._fit_lbfgs.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit_lbfgs/X", + "name": "X", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._fit_lbfgs.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit_lbfgs/y", + "name": "y", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._fit_lbfgs.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit_lbfgs/activations", + "name": "activations", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._fit_lbfgs.activations", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit_lbfgs/deltas", + "name": "deltas", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._fit_lbfgs.deltas", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit_lbfgs/coef_grads", + "name": "coef_grads", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._fit_lbfgs.coef_grads", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit_lbfgs/intercept_grads", + "name": "intercept_grads", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._fit_lbfgs.intercept_grads", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit_lbfgs/layer_units", + "name": "layer_units", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._fit_lbfgs.layer_units", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _fit_lbfgs(self, X, y, activations, deltas, coef_grads,\n intercept_grads, layer_units):\n # Store meta information for the parameters\n self._coef_indptr = []\n self._intercept_indptr = []\n start = 0\n\n # Save sizes and indices of coefficients for faster unpacking\n for i in range(self.n_layers_ - 1):\n n_fan_in, n_fan_out = layer_units[i], layer_units[i + 1]\n\n end = start + (n_fan_in * n_fan_out)\n self._coef_indptr.append((start, end, (n_fan_in, n_fan_out)))\n start = end\n\n # Save sizes and indices of intercepts for faster unpacking\n for i in range(self.n_layers_ - 1):\n end = start + layer_units[i + 1]\n self._intercept_indptr.append((start, end))\n start = end\n\n # Run LBFGS\n packed_coef_inter = _pack(self.coefs_,\n self.intercepts_)\n\n if self.verbose is True or self.verbose >= 1:\n iprint = 1\n else:\n iprint = -1\n\n opt_res = scipy.optimize.minimize(\n self._loss_grad_lbfgs, packed_coef_inter,\n method=\"L-BFGS-B\", jac=True,\n options={\n \"maxfun\": self.max_fun,\n \"maxiter\": self.max_iter,\n \"iprint\": iprint,\n \"gtol\": self.tol\n },\n args=(X, y, activations, deltas, coef_grads, intercept_grads))\n self.n_iter_ = _check_optimize_result(\"lbfgs\", opt_res, self.max_iter)\n self.loss_ = opt_res.fun\n self._unpack(opt_res.x)" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit_stochastic", + "name": "_fit_stochastic", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._fit_stochastic", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit_stochastic/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._fit_stochastic.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit_stochastic/X", + "name": "X", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._fit_stochastic.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit_stochastic/y", + "name": "y", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._fit_stochastic.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit_stochastic/activations", + "name": "activations", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._fit_stochastic.activations", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit_stochastic/deltas", + "name": "deltas", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._fit_stochastic.deltas", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit_stochastic/coef_grads", + "name": "coef_grads", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._fit_stochastic.coef_grads", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit_stochastic/intercept_grads", + "name": "intercept_grads", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._fit_stochastic.intercept_grads", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit_stochastic/layer_units", + "name": "layer_units", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._fit_stochastic.layer_units", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_fit_stochastic/incremental", + "name": "incremental", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._fit_stochastic.incremental", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _fit_stochastic(self, X, y, activations, deltas, coef_grads,\n intercept_grads, layer_units, incremental):\n\n if not incremental or not hasattr(self, '_optimizer'):\n params = self.coefs_ + self.intercepts_\n\n if self.solver == 'sgd':\n self._optimizer = SGDOptimizer(\n params, self.learning_rate_init, self.learning_rate,\n self.momentum, self.nesterovs_momentum, self.power_t)\n elif self.solver == 'adam':\n self._optimizer = AdamOptimizer(\n params, self.learning_rate_init, self.beta_1, self.beta_2,\n self.epsilon)\n\n # early_stopping in partial_fit doesn't make sense\n early_stopping = self.early_stopping and not incremental\n if early_stopping:\n # don't stratify in multilabel classification\n should_stratify = is_classifier(self) and self.n_outputs_ == 1\n stratify = y if should_stratify else None\n X, X_val, y, y_val = train_test_split(\n X, y, random_state=self._random_state,\n test_size=self.validation_fraction,\n stratify=stratify)\n if is_classifier(self):\n y_val = self._label_binarizer.inverse_transform(y_val)\n else:\n X_val = None\n y_val = None\n\n n_samples = X.shape[0]\n sample_idx = np.arange(n_samples, dtype=int)\n\n if self.batch_size == 'auto':\n batch_size = min(200, n_samples)\n else:\n if self.batch_size < 1 or self.batch_size > n_samples:\n warnings.warn(\"Got `batch_size` less than 1 or larger than \"\n \"sample size. It is going to be clipped\")\n batch_size = np.clip(self.batch_size, 1, n_samples)\n\n try:\n for it in range(self.max_iter):\n if self.shuffle:\n # Only shuffle the sample indices instead of X and y to\n # reduce the memory footprint. These indices will be used\n # to slice the X and y.\n sample_idx = shuffle(sample_idx,\n random_state=self._random_state)\n\n accumulated_loss = 0.0\n for batch_slice in gen_batches(n_samples, batch_size):\n if self.shuffle:\n X_batch = _safe_indexing(X, sample_idx[batch_slice])\n y_batch = y[sample_idx[batch_slice]]\n else:\n X_batch = X[batch_slice]\n y_batch = y[batch_slice]\n\n activations[0] = X_batch\n batch_loss, coef_grads, intercept_grads = self._backprop(\n X_batch, y_batch, activations, deltas,\n coef_grads, intercept_grads)\n accumulated_loss += batch_loss * (batch_slice.stop -\n batch_slice.start)\n\n # update weights\n grads = coef_grads + intercept_grads\n self._optimizer.update_params(grads)\n\n self.n_iter_ += 1\n self.loss_ = accumulated_loss / X.shape[0]\n\n self.t_ += n_samples\n self.loss_curve_.append(self.loss_)\n if self.verbose:\n print(\"Iteration %d, loss = %.8f\" % (self.n_iter_,\n self.loss_))\n\n # update no_improvement_count based on training loss or\n # validation score according to early_stopping\n self._update_no_improvement_count(early_stopping, X_val, y_val)\n\n # for learning rate that needs to be updated at iteration end\n self._optimizer.iteration_ends(self.t_)\n\n if self._no_improvement_count > self.n_iter_no_change:\n # not better than last `n_iter_no_change` iterations by tol\n # stop or decrease learning rate\n if early_stopping:\n msg = (\"Validation score did not improve more than \"\n \"tol=%f for %d consecutive epochs.\" % (\n self.tol, self.n_iter_no_change))\n else:\n msg = (\"Training loss did not improve more than tol=%f\"\n \" for %d consecutive epochs.\" % (\n self.tol, self.n_iter_no_change))\n\n is_stopping = self._optimizer.trigger_stopping(\n msg, self.verbose)\n if is_stopping:\n break\n else:\n self._no_improvement_count = 0\n\n if incremental:\n break\n\n if self.n_iter_ == self.max_iter:\n warnings.warn(\n \"Stochastic Optimizer: Maximum iterations (%d) \"\n \"reached and the optimization hasn't converged yet.\"\n % self.max_iter, ConvergenceWarning)\n except KeyboardInterrupt:\n warnings.warn(\"Training interrupted by user.\")\n\n if early_stopping:\n # restore best weights\n self.coefs_ = self._best_coefs\n self.intercepts_ = self._best_intercepts" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_forward_pass", + "name": "_forward_pass", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._forward_pass", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_forward_pass/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._forward_pass.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_forward_pass/activations", + "name": "activations", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._forward_pass.activations", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list, length = n_layers - 1", + "default_value": "", + "description": "The ith element of the list holds the values of the ith layer." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "list" + }, + { + "kind": "NamedType", + "name": "length = n_layers - 1" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform a forward pass on the network by computing the values\nof the neurons in the hidden layers and the output layer.", + "docstring": "Perform a forward pass on the network by computing the values\nof the neurons in the hidden layers and the output layer.\n\nParameters\n----------\nactivations : list, length = n_layers - 1\n The ith element of the list holds the values of the ith layer.", + "code": " def _forward_pass(self, activations):\n \"\"\"Perform a forward pass on the network by computing the values\n of the neurons in the hidden layers and the output layer.\n\n Parameters\n ----------\n activations : list, length = n_layers - 1\n The ith element of the list holds the values of the ith layer.\n \"\"\"\n hidden_activation = ACTIVATIONS[self.activation]\n # Iterate over the hidden layers\n for i in range(self.n_layers_ - 1):\n activations[i + 1] = safe_sparse_dot(activations[i],\n self.coefs_[i])\n activations[i + 1] += self.intercepts_[i]\n\n # For the hidden layers\n if (i + 1) != (self.n_layers_ - 1):\n hidden_activation(activations[i + 1])\n\n # For the last layer\n output_activation = ACTIVATIONS[self.out_activation_]\n output_activation(activations[i + 1])\n\n return activations" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_forward_pass_fast", + "name": "_forward_pass_fast", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._forward_pass_fast", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_forward_pass_fast/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._forward_pass_fast.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_forward_pass_fast/X", + "name": "X", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._forward_pass_fast.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict using the trained model\n\nThis is the same as _forward_pass but does not record the activations\nof all layers and only returns the last layer's activation.", + "docstring": "Predict using the trained model\n\nThis is the same as _forward_pass but does not record the activations\nof all layers and only returns the last layer's activation.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n The decision function of the samples for each class in the model.", + "code": " def _forward_pass_fast(self, X):\n \"\"\"Predict using the trained model\n\n This is the same as _forward_pass but does not record the activations\n of all layers and only returns the last layer's activation.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\n Returns\n -------\n y_pred : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n The decision function of the samples for each class in the model.\n \"\"\"\n X = self._validate_data(X, accept_sparse=['csr', 'csc'], reset=False)\n\n # Initialize first layer\n activation = X\n\n # Forward propagate\n hidden_activation = ACTIVATIONS[self.activation]\n for i in range(self.n_layers_ - 1):\n activation = safe_sparse_dot(activation, self.coefs_[i])\n activation += self.intercepts_[i]\n if i != self.n_layers_ - 2:\n hidden_activation(activation)\n output_activation = ACTIVATIONS[self.out_activation_]\n output_activation(activation)\n\n return activation" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_init_coef", + "name": "_init_coef", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._init_coef", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_init_coef/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._init_coef.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_init_coef/fan_in", + "name": "fan_in", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._init_coef.fan_in", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_init_coef/fan_out", + "name": "fan_out", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._init_coef.fan_out", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_init_coef/dtype", + "name": "dtype", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._init_coef.dtype", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _init_coef(self, fan_in, fan_out, dtype):\n # Use the initialization method recommended by\n # Glorot et al.\n factor = 6.\n if self.activation == 'logistic':\n factor = 2.\n init_bound = np.sqrt(factor / (fan_in + fan_out))\n\n # Generate weights and bias:\n coef_init = self._random_state.uniform(-init_bound, init_bound,\n (fan_in, fan_out))\n intercept_init = self._random_state.uniform(-init_bound, init_bound,\n fan_out)\n coef_init = coef_init.astype(dtype, copy=False)\n intercept_init = intercept_init.astype(dtype, copy=False)\n return coef_init, intercept_init" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_initialize", + "name": "_initialize", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._initialize", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_initialize/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._initialize.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_initialize/y", + "name": "y", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._initialize.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_initialize/layer_units", + "name": "layer_units", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._initialize.layer_units", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_initialize/dtype", + "name": "dtype", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._initialize.dtype", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _initialize(self, y, layer_units, dtype):\n # set all attributes, allocate weights etc for first call\n # Initialize parameters\n self.n_iter_ = 0\n self.t_ = 0\n self.n_outputs_ = y.shape[1]\n\n # Compute the number of layers\n self.n_layers_ = len(layer_units)\n\n # Output for regression\n if not is_classifier(self):\n self.out_activation_ = 'identity'\n # Output for multi class\n elif self._label_binarizer.y_type_ == 'multiclass':\n self.out_activation_ = 'softmax'\n # Output for binary class and multi-label\n else:\n self.out_activation_ = 'logistic'\n\n # Initialize coefficient and intercept layers\n self.coefs_ = []\n self.intercepts_ = []\n\n for i in range(self.n_layers_ - 1):\n coef_init, intercept_init = self._init_coef(layer_units[i],\n layer_units[i + 1],\n dtype)\n self.coefs_.append(coef_init)\n self.intercepts_.append(intercept_init)\n\n if self.solver in _STOCHASTIC_SOLVERS:\n self.loss_curve_ = []\n self._no_improvement_count = 0\n if self.early_stopping:\n self.validation_scores_ = []\n self.best_validation_score_ = -np.inf\n else:\n self.best_loss_ = np.inf" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_loss_grad_lbfgs", + "name": "_loss_grad_lbfgs", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._loss_grad_lbfgs", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_loss_grad_lbfgs/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._loss_grad_lbfgs.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_loss_grad_lbfgs/packed_coef_inter", + "name": "packed_coef_inter", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._loss_grad_lbfgs.packed_coef_inter", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "A vector comprising the flattened coefficients and intercepts." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_loss_grad_lbfgs/X", + "name": "X", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._loss_grad_lbfgs.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_loss_grad_lbfgs/y", + "name": "y", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._loss_grad_lbfgs.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "The target values." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_loss_grad_lbfgs/activations", + "name": "activations", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._loss_grad_lbfgs.activations", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list, length = n_layers - 1", + "default_value": "", + "description": "The ith element of the list holds the values of the ith layer." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "list" + }, + { + "kind": "NamedType", + "name": "length = n_layers - 1" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_loss_grad_lbfgs/deltas", + "name": "deltas", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._loss_grad_lbfgs.deltas", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list, length = n_layers - 1", + "default_value": "", + "description": "The ith element of the list holds the difference between the\nactivations of the i + 1 layer and the backpropagated error.\nMore specifically, deltas are gradients of loss with respect to z\nin each layer, where z = wx + b is the value of a particular layer\nbefore passing through the activation function" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "list" + }, + { + "kind": "NamedType", + "name": "length = n_layers - 1" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_loss_grad_lbfgs/coef_grads", + "name": "coef_grads", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._loss_grad_lbfgs.coef_grads", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list, length = n_layers - 1", + "default_value": "", + "description": "The ith element contains the amount of change used to update the\ncoefficient parameters of the ith layer in an iteration." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "list" + }, + { + "kind": "NamedType", + "name": "length = n_layers - 1" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_loss_grad_lbfgs/intercept_grads", + "name": "intercept_grads", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._loss_grad_lbfgs.intercept_grads", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list, length = n_layers - 1", + "default_value": "", + "description": "The ith element contains the amount of change used to update the\nintercept parameters of the ith layer in an iteration." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "list" + }, + { + "kind": "NamedType", + "name": "length = n_layers - 1" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the MLP loss function and its corresponding derivatives\nwith respect to the different parameters given in the initialization.\n\nReturned gradients are packed in a single vector so it can be used\nin lbfgs", + "docstring": "Compute the MLP loss function and its corresponding derivatives\nwith respect to the different parameters given in the initialization.\n\nReturned gradients are packed in a single vector so it can be used\nin lbfgs\n\nParameters\n----------\npacked_coef_inter : ndarray\n A vector comprising the flattened coefficients and intercepts.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\ny : ndarray of shape (n_samples,)\n The target values.\n\nactivations : list, length = n_layers - 1\n The ith element of the list holds the values of the ith layer.\n\ndeltas : list, length = n_layers - 1\n The ith element of the list holds the difference between the\n activations of the i + 1 layer and the backpropagated error.\n More specifically, deltas are gradients of loss with respect to z\n in each layer, where z = wx + b is the value of a particular layer\n before passing through the activation function\n\ncoef_grads : list, length = n_layers - 1\n The ith element contains the amount of change used to update the\n coefficient parameters of the ith layer in an iteration.\n\nintercept_grads : list, length = n_layers - 1\n The ith element contains the amount of change used to update the\n intercept parameters of the ith layer in an iteration.\n\nReturns\n-------\nloss : float\ngrad : array-like, shape (number of nodes of all layers,)", + "code": " def _loss_grad_lbfgs(self, packed_coef_inter, X, y, activations, deltas,\n coef_grads, intercept_grads):\n \"\"\"Compute the MLP loss function and its corresponding derivatives\n with respect to the different parameters given in the initialization.\n\n Returned gradients are packed in a single vector so it can be used\n in lbfgs\n\n Parameters\n ----------\n packed_coef_inter : ndarray\n A vector comprising the flattened coefficients and intercepts.\n\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\n y : ndarray of shape (n_samples,)\n The target values.\n\n activations : list, length = n_layers - 1\n The ith element of the list holds the values of the ith layer.\n\n deltas : list, length = n_layers - 1\n The ith element of the list holds the difference between the\n activations of the i + 1 layer and the backpropagated error.\n More specifically, deltas are gradients of loss with respect to z\n in each layer, where z = wx + b is the value of a particular layer\n before passing through the activation function\n\n coef_grads : list, length = n_layers - 1\n The ith element contains the amount of change used to update the\n coefficient parameters of the ith layer in an iteration.\n\n intercept_grads : list, length = n_layers - 1\n The ith element contains the amount of change used to update the\n intercept parameters of the ith layer in an iteration.\n\n Returns\n -------\n loss : float\n grad : array-like, shape (number of nodes of all layers,)\n \"\"\"\n self._unpack(packed_coef_inter)\n loss, coef_grads, intercept_grads = self._backprop(\n X, y, activations, deltas, coef_grads, intercept_grads)\n grad = _pack(coef_grads, intercept_grads)\n return loss, grad" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_partial_fit", + "name": "_partial_fit", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._partial_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_partial_fit/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_partial_fit/X", + "name": "X", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_partial_fit/y", + "name": "y", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._partial_fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _partial_fit(self, X, y):\n return self._fit(X, y, incremental=True)" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_unpack", + "name": "_unpack", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._unpack", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_unpack/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._unpack.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_unpack/packed_parameters", + "name": "packed_parameters", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._unpack.packed_parameters", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Extract the coefficients and intercepts from packed_parameters.", + "docstring": "Extract the coefficients and intercepts from packed_parameters.", + "code": " def _unpack(self, packed_parameters):\n \"\"\"Extract the coefficients and intercepts from packed_parameters.\"\"\"\n for i in range(self.n_layers_ - 1):\n start, end, shape = self._coef_indptr[i]\n self.coefs_[i] = np.reshape(packed_parameters[start:end], shape)\n\n start, end = self._intercept_indptr[i]\n self.intercepts_[i] = packed_parameters[start:end]" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_update_no_improvement_count", + "name": "_update_no_improvement_count", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._update_no_improvement_count", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_update_no_improvement_count/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._update_no_improvement_count.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_update_no_improvement_count/early_stopping", + "name": "early_stopping", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._update_no_improvement_count.early_stopping", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_update_no_improvement_count/X_val", + "name": "X_val", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._update_no_improvement_count.X_val", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_update_no_improvement_count/y_val", + "name": "y_val", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._update_no_improvement_count.y_val", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _update_no_improvement_count(self, early_stopping, X_val, y_val):\n if early_stopping:\n # compute validation score, use that for stopping\n self.validation_scores_.append(self.score(X_val, y_val))\n\n if self.verbose:\n print(\"Validation score: %f\" % self.validation_scores_[-1])\n # update best parameters\n # use validation_scores_, not loss_curve_\n # let's hope no-one overloads .score with mse\n last_valid_score = self.validation_scores_[-1]\n\n if last_valid_score < (self.best_validation_score_ +\n self.tol):\n self._no_improvement_count += 1\n else:\n self._no_improvement_count = 0\n\n if last_valid_score > self.best_validation_score_:\n self.best_validation_score_ = last_valid_score\n self._best_coefs = [c.copy() for c in self.coefs_]\n self._best_intercepts = [i.copy()\n for i in self.intercepts_]\n else:\n if self.loss_curve_[-1] > self.best_loss_ - self.tol:\n self._no_improvement_count += 1\n else:\n self._no_improvement_count = 0\n if self.loss_curve_[-1] < self.best_loss_:\n self.best_loss_ = self.loss_curve_[-1]" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_validate_hyperparameters", + "name": "_validate_hyperparameters", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._validate_hyperparameters", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/_validate_hyperparameters/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron._validate_hyperparameters.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _validate_hyperparameters(self):\n if not isinstance(self.shuffle, bool):\n raise ValueError(\"shuffle must be either True or False, got %s.\" %\n self.shuffle)\n if self.max_iter <= 0:\n raise ValueError(\"max_iter must be > 0, got %s.\" % self.max_iter)\n if self.max_fun <= 0:\n raise ValueError(\"max_fun must be > 0, got %s.\" % self.max_fun)\n if self.alpha < 0.0:\n raise ValueError(\"alpha must be >= 0, got %s.\" % self.alpha)\n if (self.learning_rate in [\"constant\", \"invscaling\", \"adaptive\"] and\n self.learning_rate_init <= 0.0):\n raise ValueError(\"learning_rate_init must be > 0, got %s.\" %\n self.learning_rate)\n if self.momentum > 1 or self.momentum < 0:\n raise ValueError(\"momentum must be >= 0 and <= 1, got %s\" %\n self.momentum)\n if not isinstance(self.nesterovs_momentum, bool):\n raise ValueError(\"nesterovs_momentum must be either True or False,\"\n \" got %s.\" % self.nesterovs_momentum)\n if not isinstance(self.early_stopping, bool):\n raise ValueError(\"early_stopping must be either True or False,\"\n \" got %s.\" % self.early_stopping)\n if self.validation_fraction < 0 or self.validation_fraction >= 1:\n raise ValueError(\"validation_fraction must be >= 0 and < 1, \"\n \"got %s\" % self.validation_fraction)\n if self.beta_1 < 0 or self.beta_1 >= 1:\n raise ValueError(\"beta_1 must be >= 0 and < 1, got %s\" %\n self.beta_1)\n if self.beta_2 < 0 or self.beta_2 >= 1:\n raise ValueError(\"beta_2 must be >= 0 and < 1, got %s\" %\n self.beta_2)\n if self.epsilon <= 0.0:\n raise ValueError(\"epsilon must be > 0, got %s.\" % self.epsilon)\n if self.n_iter_no_change <= 0:\n raise ValueError(\"n_iter_no_change must be > 0, got %s.\"\n % self.n_iter_no_change)\n\n # raise ValueError if not registered\n if self.activation not in ACTIVATIONS:\n raise ValueError(\"The activation '%s' is not supported. Supported \"\n \"activations are %s.\"\n % (self.activation, list(sorted(ACTIVATIONS))))\n if self.learning_rate not in [\"constant\", \"invscaling\", \"adaptive\"]:\n raise ValueError(\"learning rate %s is not supported. \" %\n self.learning_rate)\n supported_solvers = _STOCHASTIC_SOLVERS + [\"lbfgs\"]\n if self.solver not in supported_solvers:\n raise ValueError(\"The solver %s is not supported. \"\n \" Expected one of: %s\" %\n (self.solver, \", \".join(supported_solvers)))" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/fit", + "name": "fit", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/fit/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/fit/X", + "name": "X", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray or sparse matrix of shape (n_samples, n_features)", + "default_value": "", + "description": "The input data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray" + }, + { + "kind": "NamedType", + "name": "sparse matrix of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/fit/y", + "name": "y", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "The target values (class labels in classification, real numbers in\nregression)." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,) or (n_samples, n_outputs)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model to data matrix X and target(s) y.", + "docstring": "Fit the model to data matrix X and target(s) y.\n\nParameters\n----------\nX : ndarray or sparse matrix of shape (n_samples, n_features)\n The input data.\n\ny : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n The target values (class labels in classification, real numbers in\n regression).\n\nReturns\n-------\nself : returns a trained MLP model.", + "code": " def fit(self, X, y):\n \"\"\"Fit the model to data matrix X and target(s) y.\n\n Parameters\n ----------\n X : ndarray or sparse matrix of shape (n_samples, n_features)\n The input data.\n\n y : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n The target values (class labels in classification, real numbers in\n regression).\n\n Returns\n -------\n self : returns a trained MLP model.\n \"\"\"\n return self._fit(X, y, incremental=False)" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/partial_fit@getter", + "name": "partial_fit", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.partial_fit", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/BaseMultilayerPerceptron/partial_fit/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.BaseMultilayerPerceptron.partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Update the model with a single iteration over the given data.", + "docstring": "Update the model with a single iteration over the given data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\ny : ndarray of shape (n_samples,)\n The target values.\n\nReturns\n-------\nself : returns a trained MLP model.", + "code": " @property\n def partial_fit(self):\n \"\"\"Update the model with a single iteration over the given data.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\n y : ndarray of shape (n_samples,)\n The target values.\n\n Returns\n -------\n self : returns a trained MLP model.\n \"\"\"\n if self.solver not in _STOCHASTIC_SOLVERS:\n raise AttributeError(\"partial_fit is only available for stochastic\"\n \" optimizers. %s is not stochastic.\"\n % self.solver)\n return self._partial_fit" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__", + "name": "__init__", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__/hidden_layer_sizes", + "name": "hidden_layer_sizes", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__.hidden_layer_sizes", + "default_value": "(100, )", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "tuple, length = n_layers - 2", + "default_value": "(100,)", + "description": "The ith element represents the number of neurons in the ith\nhidden layer." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "tuple" + }, + { + "kind": "NamedType", + "name": "length = n_layers - 2" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__/activation", + "name": "activation", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__.activation", + "default_value": "'relu'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'identity', 'logistic', 'tanh', 'relu'}", + "default_value": "'relu'", + "description": "Activation function for the hidden layer.\n\n- 'identity', no-op activation, useful to implement linear bottleneck,\n returns f(x) = x\n\n- 'logistic', the logistic sigmoid function,\n returns f(x) = 1 / (1 + exp(-x)).\n\n- 'tanh', the hyperbolic tan function,\n returns f(x) = tanh(x).\n\n- 'relu', the rectified linear unit function,\n returns f(x) = max(0, x)" + }, + "type": { + "kind": "EnumType", + "values": ["relu", "identity", "tanh", "logistic"] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__/solver", + "name": "solver", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__.solver", + "default_value": "'adam'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'lbfgs', 'sgd', 'adam'}", + "default_value": "'adam'", + "description": "The solver for weight optimization.\n\n- 'lbfgs' is an optimizer in the family of quasi-Newton methods.\n\n- 'sgd' refers to stochastic gradient descent.\n\n- 'adam' refers to a stochastic gradient-based optimizer proposed\n by Kingma, Diederik, and Jimmy Ba\n\nNote: The default solver 'adam' works pretty well on relatively\nlarge datasets (with thousands of training samples or more) in terms of\nboth training time and validation score.\nFor small datasets, however, 'lbfgs' can converge faster and perform\nbetter." + }, + "type": { + "kind": "EnumType", + "values": ["sgd", "adam", "lbfgs"] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__/alpha", + "name": "alpha", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__.alpha", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0001", + "description": "L2 penalty (regularization term) parameter." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__/batch_size", + "name": "batch_size", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__.batch_size", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "'auto'", + "description": "Size of minibatches for stochastic optimizers.\nIf the solver is 'lbfgs', the classifier will not use minibatch.\nWhen set to \"auto\", `batch_size=min(200, n_samples)`" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__/learning_rate", + "name": "learning_rate", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__.learning_rate", + "default_value": "'constant'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'constant', 'invscaling', 'adaptive'}", + "default_value": "'constant'", + "description": "Learning rate schedule for weight updates.\n\n- 'constant' is a constant learning rate given by\n 'learning_rate_init'.\n\n- 'invscaling' gradually decreases the learning rate at each\n time step 't' using an inverse scaling exponent of 'power_t'.\n effective_learning_rate = learning_rate_init / pow(t, power_t)\n\n- 'adaptive' keeps the learning rate constant to\n 'learning_rate_init' as long as training loss keeps decreasing.\n Each time two consecutive epochs fail to decrease training loss by at\n least tol, or fail to increase validation score by at least tol if\n 'early_stopping' is on, the current learning rate is divided by 5.\n\nOnly used when ``solver='sgd'``." + }, + "type": { + "kind": "EnumType", + "values": ["adaptive", "invscaling", "constant"] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__/learning_rate_init", + "name": "learning_rate_init", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__.learning_rate_init", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "double", + "default_value": "0.001", + "description": "The initial learning rate used. It controls the step-size\nin updating the weights. Only used when solver='sgd' or 'adam'." + }, + "type": { + "kind": "NamedType", + "name": "double" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__/power_t", + "name": "power_t", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__.power_t", + "default_value": "0.5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "double", + "default_value": "0.5", + "description": "The exponent for inverse scaling learning rate.\nIt is used in updating effective learning rate when the learning_rate\nis set to 'invscaling'. Only used when solver='sgd'." + }, + "type": { + "kind": "NamedType", + "name": "double" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__.max_iter", + "default_value": "200", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "200", + "description": "Maximum number of iterations. The solver iterates until convergence\n(determined by 'tol') or this number of iterations. For stochastic\nsolvers ('sgd', 'adam'), note that this determines the number of epochs\n(how many times each data point will be used), not the number of\ngradient steps." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__/shuffle", + "name": "shuffle", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__.shuffle", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to shuffle samples in each iteration. Only used when\nsolver='sgd' or 'adam'." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__/random_state", + "name": "random_state", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "Determines random number generation for weights and bias\ninitialization, train-test split if early stopping is used, and batch\nsampling when solver='sgd' or 'adam'.\nPass an int for reproducible results across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__/tol", + "name": "tol", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Tolerance for the optimization. When the loss or score is not improving\nby at least ``tol`` for ``n_iter_no_change`` consecutive iterations,\nunless ``learning_rate`` is set to 'adaptive', convergence is\nconsidered to be reached and training stops." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__/verbose", + "name": "verbose", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to print progress messages to stdout." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to True, reuse the solution of the previous\ncall to fit as initialization, otherwise, just erase the\nprevious solution. See :term:`the Glossary `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__/momentum", + "name": "momentum", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__.momentum", + "default_value": "0.9", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.9", + "description": "Momentum for gradient descent update. Should be between 0 and 1. Only\nused when solver='sgd'." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__/nesterovs_momentum", + "name": "nesterovs_momentum", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__.nesterovs_momentum", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to use Nesterov's momentum. Only used when solver='sgd' and\nmomentum > 0." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__/early_stopping", + "name": "early_stopping", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__.early_stopping", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to use early stopping to terminate training when validation\nscore is not improving. If set to true, it will automatically set\naside 10% of training data as validation and terminate training when\nvalidation score is not improving by at least tol for\n``n_iter_no_change`` consecutive epochs. The split is stratified,\nexcept in a multilabel setting.\nIf early stopping is False, then the training stops when the training\nloss does not improve by more than tol for n_iter_no_change consecutive\npasses over the training set.\nOnly effective when solver='sgd' or 'adam'" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__/validation_fraction", + "name": "validation_fraction", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__.validation_fraction", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "The proportion of training data to set aside as validation set for\nearly stopping. Must be between 0 and 1.\nOnly used if early_stopping is True" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__/beta_1", + "name": "beta_1", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__.beta_1", + "default_value": "0.9", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.9", + "description": "Exponential decay rate for estimates of first moment vector in adam,\nshould be in [0, 1). Only used when solver='adam'" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__/beta_2", + "name": "beta_2", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__.beta_2", + "default_value": "0.999", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.999", + "description": "Exponential decay rate for estimates of second moment vector in adam,\nshould be in [0, 1). Only used when solver='adam'" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__/epsilon", + "name": "epsilon", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__.epsilon", + "default_value": "1e-08", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-8", + "description": "Value for numerical stability in adam. Only used when solver='adam'" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__/n_iter_no_change", + "name": "n_iter_no_change", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__.n_iter_no_change", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Maximum number of epochs to not meet ``tol`` improvement.\nOnly effective when solver='sgd' or 'adam'\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/__init__/max_fun", + "name": "max_fun", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.__init__.max_fun", + "default_value": "15000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "15000", + "description": "Only used when solver='lbfgs'. Maximum number of loss function calls.\nThe solver iterates until convergence (determined by 'tol'), number\nof iterations reaches max_iter, or this number of loss function calls.\nNote that number of loss function calls will be greater than or equal\nto the number of iterations for the `MLPClassifier`.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Multi-layer Perceptron classifier.\n\nThis model optimizes the log-loss function using LBFGS or stochastic\ngradient descent.\n\n.. versionadded:: 0.18", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, hidden_layer_sizes=(100,), activation=\"relu\", *,\n solver='adam', alpha=0.0001,\n batch_size='auto', learning_rate=\"constant\",\n learning_rate_init=0.001, power_t=0.5, max_iter=200,\n shuffle=True, random_state=None, tol=1e-4,\n verbose=False, warm_start=False, momentum=0.9,\n nesterovs_momentum=True, early_stopping=False,\n validation_fraction=0.1, beta_1=0.9, beta_2=0.999,\n epsilon=1e-8, n_iter_no_change=10, max_fun=15000):\n super().__init__(\n hidden_layer_sizes=hidden_layer_sizes,\n activation=activation, solver=solver, alpha=alpha,\n batch_size=batch_size, learning_rate=learning_rate,\n learning_rate_init=learning_rate_init, power_t=power_t,\n max_iter=max_iter, loss='log_loss', shuffle=shuffle,\n random_state=random_state, tol=tol, verbose=verbose,\n warm_start=warm_start, momentum=momentum,\n nesterovs_momentum=nesterovs_momentum,\n early_stopping=early_stopping,\n validation_fraction=validation_fraction,\n beta_1=beta_1, beta_2=beta_2, epsilon=epsilon,\n n_iter_no_change=n_iter_no_change, max_fun=max_fun)" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/_partial_fit", + "name": "_partial_fit", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier._partial_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/_partial_fit/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier._partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/_partial_fit/X", + "name": "X", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier._partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/_partial_fit/y", + "name": "y", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier._partial_fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/_partial_fit/classes", + "name": "classes", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier._partial_fit.classes", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _partial_fit(self, X, y, classes=None):\n if _check_partial_fit_first_call(self, classes):\n self._label_binarizer = LabelBinarizer()\n if type_of_target(y).startswith('multilabel'):\n self._label_binarizer.fit(y)\n else:\n self._label_binarizer.fit(classes)\n\n super()._partial_fit(X, y)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/_validate_input", + "name": "_validate_input", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier._validate_input", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/_validate_input/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier._validate_input.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/_validate_input/X", + "name": "X", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier._validate_input.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/_validate_input/y", + "name": "y", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier._validate_input.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/_validate_input/incremental", + "name": "incremental", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier._validate_input.incremental", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/_validate_input/reset", + "name": "reset", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier._validate_input.reset", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _validate_input(self, X, y, incremental, reset):\n X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc'],\n multi_output=True,\n dtype=(np.float64, np.float32),\n reset=reset)\n if y.ndim == 2 and y.shape[1] == 1:\n y = column_or_1d(y, warn=True)\n\n # Matrix of actions to be taken under the possible combinations:\n # The case that incremental == True and classes_ not defined is\n # already checked by _check_partial_fit_first_call that is called\n # in _partial_fit below.\n # The cases are already grouped into the respective if blocks below.\n #\n # incremental warm_start classes_ def action\n # 0 0 0 define classes_\n # 0 1 0 define classes_\n # 0 0 1 redefine classes_\n #\n # 0 1 1 check compat warm_start\n # 1 1 1 check compat warm_start\n #\n # 1 0 1 check compat last fit\n #\n # Note the reliance on short-circuiting here, so that the second\n # or part implies that classes_ is defined.\n if (\n (not hasattr(self, \"classes_\")) or\n (not self.warm_start and not incremental)\n ):\n self._label_binarizer = LabelBinarizer()\n self._label_binarizer.fit(y)\n self.classes_ = self._label_binarizer.classes_\n else:\n classes = unique_labels(y)\n if self.warm_start:\n if set(classes) != set(self.classes_):\n raise ValueError(\n f\"warm_start can only be used where `y` has the same \"\n f\"classes as in the previous call to fit. Previously \"\n f\"got {self.classes_}, `y` has {classes}\"\n )\n elif len(np.setdiff1d(classes, self.classes_, assume_unique=True)):\n raise ValueError(\n f\"`y` has classes not in `self.classes_`. \"\n f\"`self.classes_` has {self.classes_}. 'y' has {classes}.\"\n )\n\n # This downcast to bool is to prevent upcasting when working with\n # float32 data\n y = self._label_binarizer.transform(y).astype(bool)\n return X, y" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/partial_fit@getter", + "name": "partial_fit", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.partial_fit", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/partial_fit/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Update the model with a single iteration over the given data.", + "docstring": "Update the model with a single iteration over the given data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\ny : array-like of shape (n_samples,)\n The target values.\n\nclasses : array of shape (n_classes,), default=None\n Classes across all calls to partial_fit.\n Can be obtained via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is required for the first call to partial_fit\n and can be omitted in the subsequent calls.\n Note that y doesn't need to contain all labels in `classes`.\n\nReturns\n-------\nself : returns a trained MLP model.", + "code": " @property\n def partial_fit(self):\n \"\"\"Update the model with a single iteration over the given data.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\n y : array-like of shape (n_samples,)\n The target values.\n\n classes : array of shape (n_classes,), default=None\n Classes across all calls to partial_fit.\n Can be obtained via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is required for the first call to partial_fit\n and can be omitted in the subsequent calls.\n Note that y doesn't need to contain all labels in `classes`.\n\n Returns\n -------\n self : returns a trained MLP model.\n \"\"\"\n if self.solver not in _STOCHASTIC_SOLVERS:\n raise AttributeError(\"partial_fit is only available for stochastic\"\n \" optimizer. %s is not stochastic\"\n % self.solver)\n return self._partial_fit" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/predict", + "name": "predict", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/predict/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/predict/X", + "name": "X", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict using the multi-layer perceptron classifier", + "docstring": "Predict using the multi-layer perceptron classifier\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\ny : ndarray, shape (n_samples,) or (n_samples, n_classes)\n The predicted classes.", + "code": " def predict(self, X):\n \"\"\"Predict using the multi-layer perceptron classifier\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\n Returns\n -------\n y : ndarray, shape (n_samples,) or (n_samples, n_classes)\n The predicted classes.\n \"\"\"\n check_is_fitted(self)\n y_pred = self._forward_pass_fast(X)\n\n if self.n_outputs_ == 1:\n y_pred = y_pred.ravel()\n\n return self._label_binarizer.inverse_transform(y_pred)" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/predict_log_proba", + "name": "predict_log_proba", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.predict_log_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/predict_log_proba/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.predict_log_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/predict_log_proba/X", + "name": "X", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.predict_log_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "The input data." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the log of probability estimates.", + "docstring": "Return the log of probability estimates.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\nlog_y_prob : ndarray of shape (n_samples, n_classes)\n The predicted log-probability of the sample for each class\n in the model, where classes are ordered as they are in\n `self.classes_`. Equivalent to log(predict_proba(X))", + "code": " def predict_log_proba(self, X):\n \"\"\"Return the log of probability estimates.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n The input data.\n\n Returns\n -------\n log_y_prob : ndarray of shape (n_samples, n_classes)\n The predicted log-probability of the sample for each class\n in the model, where classes are ordered as they are in\n `self.classes_`. Equivalent to log(predict_proba(X))\n \"\"\"\n y_prob = self.predict_proba(X)\n return np.log(y_prob, out=y_prob)" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/predict_proba", + "name": "predict_proba", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/predict_proba/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPClassifier/predict_proba/X", + "name": "X", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPClassifier.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Probability estimates.", + "docstring": "Probability estimates.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\ny_prob : ndarray of shape (n_samples, n_classes)\n The predicted probability of the sample for each class in the\n model, where classes are ordered as they are in `self.classes_`.", + "code": " def predict_proba(self, X):\n \"\"\"Probability estimates.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\n Returns\n -------\n y_prob : ndarray of shape (n_samples, n_classes)\n The predicted probability of the sample for each class in the\n model, where classes are ordered as they are in `self.classes_`.\n \"\"\"\n check_is_fitted(self)\n y_pred = self._forward_pass_fast(X)\n\n if self.n_outputs_ == 1:\n y_pred = y_pred.ravel()\n\n if y_pred.ndim == 1:\n return np.vstack([1 - y_pred, y_pred]).T\n else:\n return y_pred" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__", + "name": "__init__", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__/hidden_layer_sizes", + "name": "hidden_layer_sizes", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__.hidden_layer_sizes", + "default_value": "(100, )", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "tuple, length = n_layers - 2", + "default_value": "(100,)", + "description": "The ith element represents the number of neurons in the ith\nhidden layer." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "tuple" + }, + { + "kind": "NamedType", + "name": "length = n_layers - 2" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__/activation", + "name": "activation", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__.activation", + "default_value": "'relu'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'identity', 'logistic', 'tanh', 'relu'}", + "default_value": "'relu'", + "description": "Activation function for the hidden layer.\n\n- 'identity', no-op activation, useful to implement linear bottleneck,\n returns f(x) = x\n\n- 'logistic', the logistic sigmoid function,\n returns f(x) = 1 / (1 + exp(-x)).\n\n- 'tanh', the hyperbolic tan function,\n returns f(x) = tanh(x).\n\n- 'relu', the rectified linear unit function,\n returns f(x) = max(0, x)" + }, + "type": { + "kind": "EnumType", + "values": ["relu", "identity", "tanh", "logistic"] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__/solver", + "name": "solver", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__.solver", + "default_value": "'adam'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'lbfgs', 'sgd', 'adam'}", + "default_value": "'adam'", + "description": "The solver for weight optimization.\n\n- 'lbfgs' is an optimizer in the family of quasi-Newton methods.\n\n- 'sgd' refers to stochastic gradient descent.\n\n- 'adam' refers to a stochastic gradient-based optimizer proposed by\n Kingma, Diederik, and Jimmy Ba\n\nNote: The default solver 'adam' works pretty well on relatively\nlarge datasets (with thousands of training samples or more) in terms of\nboth training time and validation score.\nFor small datasets, however, 'lbfgs' can converge faster and perform\nbetter." + }, + "type": { + "kind": "EnumType", + "values": ["sgd", "adam", "lbfgs"] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__/alpha", + "name": "alpha", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__.alpha", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0001", + "description": "L2 penalty (regularization term) parameter." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__/batch_size", + "name": "batch_size", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__.batch_size", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "'auto'", + "description": "Size of minibatches for stochastic optimizers.\nIf the solver is 'lbfgs', the classifier will not use minibatch.\nWhen set to \"auto\", `batch_size=min(200, n_samples)`" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__/learning_rate", + "name": "learning_rate", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__.learning_rate", + "default_value": "'constant'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'constant', 'invscaling', 'adaptive'}", + "default_value": "'constant'", + "description": "Learning rate schedule for weight updates.\n\n- 'constant' is a constant learning rate given by\n 'learning_rate_init'.\n\n- 'invscaling' gradually decreases the learning rate ``learning_rate_``\n at each time step 't' using an inverse scaling exponent of 'power_t'.\n effective_learning_rate = learning_rate_init / pow(t, power_t)\n\n- 'adaptive' keeps the learning rate constant to\n 'learning_rate_init' as long as training loss keeps decreasing.\n Each time two consecutive epochs fail to decrease training loss by at\n least tol, or fail to increase validation score by at least tol if\n 'early_stopping' is on, the current learning rate is divided by 5.\n\nOnly used when solver='sgd'." + }, + "type": { + "kind": "EnumType", + "values": ["adaptive", "invscaling", "constant"] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__/learning_rate_init", + "name": "learning_rate_init", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__.learning_rate_init", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "double", + "default_value": "0.001", + "description": "The initial learning rate used. It controls the step-size\nin updating the weights. Only used when solver='sgd' or 'adam'." + }, + "type": { + "kind": "NamedType", + "name": "double" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__/power_t", + "name": "power_t", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__.power_t", + "default_value": "0.5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "double", + "default_value": "0.5", + "description": "The exponent for inverse scaling learning rate.\nIt is used in updating effective learning rate when the learning_rate\nis set to 'invscaling'. Only used when solver='sgd'." + }, + "type": { + "kind": "NamedType", + "name": "double" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__.max_iter", + "default_value": "200", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "200", + "description": "Maximum number of iterations. The solver iterates until convergence\n(determined by 'tol') or this number of iterations. For stochastic\nsolvers ('sgd', 'adam'), note that this determines the number of epochs\n(how many times each data point will be used), not the number of\ngradient steps." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__/shuffle", + "name": "shuffle", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__.shuffle", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to shuffle samples in each iteration. Only used when\nsolver='sgd' or 'adam'." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__/random_state", + "name": "random_state", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance", + "default_value": "None", + "description": "Determines random number generation for weights and bias\ninitialization, train-test split if early stopping is used, and batch\nsampling when solver='sgd' or 'adam'.\nPass an int for reproducible results across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__/tol", + "name": "tol", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Tolerance for the optimization. When the loss or score is not improving\nby at least ``tol`` for ``n_iter_no_change`` consecutive iterations,\nunless ``learning_rate`` is set to 'adaptive', convergence is\nconsidered to be reached and training stops." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__/verbose", + "name": "verbose", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to print progress messages to stdout." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__/warm_start", + "name": "warm_start", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__.warm_start", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to True, reuse the solution of the previous\ncall to fit as initialization, otherwise, just erase the\nprevious solution. See :term:`the Glossary `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__/momentum", + "name": "momentum", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__.momentum", + "default_value": "0.9", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.9", + "description": "Momentum for gradient descent update. Should be between 0 and 1. Only\nused when solver='sgd'." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__/nesterovs_momentum", + "name": "nesterovs_momentum", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__.nesterovs_momentum", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to use Nesterov's momentum. Only used when solver='sgd' and\nmomentum > 0." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__/early_stopping", + "name": "early_stopping", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__.early_stopping", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to use early stopping to terminate training when validation\nscore is not improving. If set to true, it will automatically set\naside 10% of training data as validation and terminate training when\nvalidation score is not improving by at least ``tol`` for\n``n_iter_no_change`` consecutive epochs.\nOnly effective when solver='sgd' or 'adam'" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__/validation_fraction", + "name": "validation_fraction", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__.validation_fraction", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "The proportion of training data to set aside as validation set for\nearly stopping. Must be between 0 and 1.\nOnly used if early_stopping is True" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__/beta_1", + "name": "beta_1", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__.beta_1", + "default_value": "0.9", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.9", + "description": "Exponential decay rate for estimates of first moment vector in adam,\nshould be in [0, 1). Only used when solver='adam'" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__/beta_2", + "name": "beta_2", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__.beta_2", + "default_value": "0.999", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.999", + "description": "Exponential decay rate for estimates of second moment vector in adam,\nshould be in [0, 1). Only used when solver='adam'" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__/epsilon", + "name": "epsilon", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__.epsilon", + "default_value": "1e-08", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-8", + "description": "Value for numerical stability in adam. Only used when solver='adam'" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__/n_iter_no_change", + "name": "n_iter_no_change", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__.n_iter_no_change", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Maximum number of epochs to not meet ``tol`` improvement.\nOnly effective when solver='sgd' or 'adam'\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/__init__/max_fun", + "name": "max_fun", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.__init__.max_fun", + "default_value": "15000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "15000", + "description": "Only used when solver='lbfgs'. Maximum number of function calls.\nThe solver iterates until convergence (determined by 'tol'), number\nof iterations reaches max_iter, or this number of function calls.\nNote that number of function calls will be greater than or equal to\nthe number of iterations for the MLPRegressor.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Multi-layer Perceptron regressor.\n\nThis model optimizes the squared-loss using LBFGS or stochastic gradient\ndescent.\n\n.. versionadded:: 0.18", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, hidden_layer_sizes=(100,), activation=\"relu\", *,\n solver='adam', alpha=0.0001,\n batch_size='auto', learning_rate=\"constant\",\n learning_rate_init=0.001,\n power_t=0.5, max_iter=200, shuffle=True,\n random_state=None, tol=1e-4,\n verbose=False, warm_start=False, momentum=0.9,\n nesterovs_momentum=True, early_stopping=False,\n validation_fraction=0.1, beta_1=0.9, beta_2=0.999,\n epsilon=1e-8, n_iter_no_change=10, max_fun=15000):\n super().__init__(\n hidden_layer_sizes=hidden_layer_sizes,\n activation=activation, solver=solver, alpha=alpha,\n batch_size=batch_size, learning_rate=learning_rate,\n learning_rate_init=learning_rate_init, power_t=power_t,\n max_iter=max_iter, loss='squared_loss', shuffle=shuffle,\n random_state=random_state, tol=tol, verbose=verbose,\n warm_start=warm_start, momentum=momentum,\n nesterovs_momentum=nesterovs_momentum,\n early_stopping=early_stopping,\n validation_fraction=validation_fraction,\n beta_1=beta_1, beta_2=beta_2, epsilon=epsilon,\n n_iter_no_change=n_iter_no_change, max_fun=max_fun)" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/_validate_input", + "name": "_validate_input", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor._validate_input", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/_validate_input/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor._validate_input.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/_validate_input/X", + "name": "X", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor._validate_input.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/_validate_input/y", + "name": "y", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor._validate_input.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/_validate_input/incremental", + "name": "incremental", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor._validate_input.incremental", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/_validate_input/reset", + "name": "reset", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor._validate_input.reset", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _validate_input(self, X, y, incremental, reset):\n X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc'],\n multi_output=True, y_numeric=True,\n dtype=(np.float64, np.float32),\n reset=reset)\n if y.ndim == 2 and y.shape[1] == 1:\n y = column_or_1d(y, warn=True)\n return X, y" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/predict", + "name": "predict", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/predict/self", + "name": "self", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/MLPRegressor/predict/X", + "name": "X", + "qname": "sklearn.neural_network._multilayer_perceptron.MLPRegressor.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict using the multi-layer perceptron model.", + "docstring": "Predict using the multi-layer perceptron model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\ny : ndarray of shape (n_samples, n_outputs)\n The predicted values.", + "code": " def predict(self, X):\n \"\"\"Predict using the multi-layer perceptron model.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\n Returns\n -------\n y : ndarray of shape (n_samples, n_outputs)\n The predicted values.\n \"\"\"\n check_is_fitted(self)\n y_pred = self._forward_pass_fast(X)\n if y_pred.shape[1] == 1:\n return y_pred.ravel()\n return y_pred" + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/_pack", + "name": "_pack", + "qname": "sklearn.neural_network._multilayer_perceptron._pack", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/_pack/coefs_", + "name": "coefs_", + "qname": "sklearn.neural_network._multilayer_perceptron._pack.coefs_", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._multilayer_perceptron/_pack/intercepts_", + "name": "intercepts_", + "qname": "sklearn.neural_network._multilayer_perceptron._pack.intercepts_", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Pack the parameters into a single vector.", + "docstring": "Pack the parameters into a single vector.", + "code": "def _pack(coefs_, intercepts_):\n \"\"\"Pack the parameters into a single vector.\"\"\"\n return np.hstack([l.ravel() for l in coefs_ + intercepts_])" + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/__init__", + "name": "__init__", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/__init__/self", + "name": "self", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/__init__/n_components", + "name": "n_components", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.__init__.n_components", + "default_value": "256", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "256", + "description": "Number of binary hidden units." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/__init__/learning_rate", + "name": "learning_rate", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.__init__.learning_rate", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "The learning rate for weight updates. It is *highly* recommended\nto tune this hyper-parameter. Reasonable values are in the\n10**[0., -3.] range." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/__init__/batch_size", + "name": "batch_size", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.__init__.batch_size", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Number of examples per minibatch." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/__init__/n_iter", + "name": "n_iter", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.__init__.n_iter", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Number of iterations/sweeps over the training dataset to perform\nduring training." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/__init__/verbose", + "name": "verbose", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "The verbosity level. The default, zero, means silent mode." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/__init__/random_state", + "name": "random_state", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for:\n\n- Gibbs sampling from visible and hidden layers.\n\n- Initializing components, sampling from layers during fit.\n\n- Corrupting the data when scoring samples.\n\nPass an int for reproducible results across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Bernoulli Restricted Boltzmann Machine (RBM).\n\nA Restricted Boltzmann Machine with binary visible units and\nbinary hidden units. Parameters are estimated using Stochastic Maximum\nLikelihood (SML), also known as Persistent Contrastive Divergence (PCD)\n[2].\n\nThe time complexity of this implementation is ``O(d ** 2)`` assuming\nd ~ n_features ~ n_components.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_components=256, *, learning_rate=0.1, batch_size=10,\n n_iter=10, verbose=0, random_state=None):\n self.n_components = n_components\n self.learning_rate = learning_rate\n self.batch_size = batch_size\n self.n_iter = n_iter\n self.verbose = verbose\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_fit", + "name": "_fit", + "qname": "sklearn.neural_network._rbm.BernoulliRBM._fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_fit/self", + "name": "self", + "qname": "sklearn.neural_network._rbm.BernoulliRBM._fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_fit/v_pos", + "name": "v_pos", + "qname": "sklearn.neural_network._rbm.BernoulliRBM._fit.v_pos", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to use for training." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_fit/rng", + "name": "rng", + "qname": "sklearn.neural_network._rbm.BernoulliRBM._fit.rng", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "RandomState instance", + "default_value": "", + "description": "Random number generator to use for sampling." + }, + "type": { + "kind": "NamedType", + "name": "RandomState instance" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Inner fit for one mini-batch.\n\nAdjust the parameters to maximize the likelihood of v using\nStochastic Maximum Likelihood (SML).", + "docstring": "Inner fit for one mini-batch.\n\nAdjust the parameters to maximize the likelihood of v using\nStochastic Maximum Likelihood (SML).\n\nParameters\n----------\nv_pos : ndarray of shape (n_samples, n_features)\n The data to use for training.\n\nrng : RandomState instance\n Random number generator to use for sampling.", + "code": " def _fit(self, v_pos, rng):\n \"\"\"Inner fit for one mini-batch.\n\n Adjust the parameters to maximize the likelihood of v using\n Stochastic Maximum Likelihood (SML).\n\n Parameters\n ----------\n v_pos : ndarray of shape (n_samples, n_features)\n The data to use for training.\n\n rng : RandomState instance\n Random number generator to use for sampling.\n \"\"\"\n h_pos = self._mean_hiddens(v_pos)\n v_neg = self._sample_visibles(self.h_samples_, rng)\n h_neg = self._mean_hiddens(v_neg)\n\n lr = float(self.learning_rate) / v_pos.shape[0]\n update = safe_sparse_dot(v_pos.T, h_pos, dense_output=True).T\n update -= np.dot(h_neg.T, v_neg)\n self.components_ += lr * update\n self.intercept_hidden_ += lr * (h_pos.sum(axis=0) - h_neg.sum(axis=0))\n self.intercept_visible_ += lr * (np.asarray(\n v_pos.sum(axis=0)).squeeze() -\n v_neg.sum(axis=0))\n\n h_neg[rng.uniform(size=h_neg.shape) < h_neg] = 1.0 # sample binomial\n self.h_samples_ = np.floor(h_neg, h_neg)" + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_free_energy", + "name": "_free_energy", + "qname": "sklearn.neural_network._rbm.BernoulliRBM._free_energy", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_free_energy/self", + "name": "self", + "qname": "sklearn.neural_network._rbm.BernoulliRBM._free_energy.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_free_energy/v", + "name": "v", + "qname": "sklearn.neural_network._rbm.BernoulliRBM._free_energy.v", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Values of the visible layer." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes the free energy F(v) = - log sum_h exp(-E(v,h)).", + "docstring": "Computes the free energy F(v) = - log sum_h exp(-E(v,h)).\n\nParameters\n----------\nv : ndarray of shape (n_samples, n_features)\n Values of the visible layer.\n\nReturns\n-------\nfree_energy : ndarray of shape (n_samples,)\n The value of the free energy.", + "code": " def _free_energy(self, v):\n \"\"\"Computes the free energy F(v) = - log sum_h exp(-E(v,h)).\n\n Parameters\n ----------\n v : ndarray of shape (n_samples, n_features)\n Values of the visible layer.\n\n Returns\n -------\n free_energy : ndarray of shape (n_samples,)\n The value of the free energy.\n \"\"\"\n return (- safe_sparse_dot(v, self.intercept_visible_)\n - np.logaddexp(0, safe_sparse_dot(v, self.components_.T)\n + self.intercept_hidden_).sum(axis=1))" + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_mean_hiddens", + "name": "_mean_hiddens", + "qname": "sklearn.neural_network._rbm.BernoulliRBM._mean_hiddens", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_mean_hiddens/self", + "name": "self", + "qname": "sklearn.neural_network._rbm.BernoulliRBM._mean_hiddens.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_mean_hiddens/v", + "name": "v", + "qname": "sklearn.neural_network._rbm.BernoulliRBM._mean_hiddens.v", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Values of the visible layer." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes the probabilities P(h=1|v).", + "docstring": "Computes the probabilities P(h=1|v).\n\nParameters\n----------\nv : ndarray of shape (n_samples, n_features)\n Values of the visible layer.\n\nReturns\n-------\nh : ndarray of shape (n_samples, n_components)\n Corresponding mean field values for the hidden layer.", + "code": " def _mean_hiddens(self, v):\n \"\"\"Computes the probabilities P(h=1|v).\n\n Parameters\n ----------\n v : ndarray of shape (n_samples, n_features)\n Values of the visible layer.\n\n Returns\n -------\n h : ndarray of shape (n_samples, n_components)\n Corresponding mean field values for the hidden layer.\n \"\"\"\n p = safe_sparse_dot(v, self.components_.T)\n p += self.intercept_hidden_\n return expit(p, out=p)" + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_more_tags", + "name": "_more_tags", + "qname": "sklearn.neural_network._rbm.BernoulliRBM._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_more_tags/self", + "name": "self", + "qname": "sklearn.neural_network._rbm.BernoulliRBM._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_methods_subset_invariance':\n 'fails for the decision_function method',\n 'check_methods_sample_order_invariance':\n 'fails for the score_samples method',\n }\n }" + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_sample_hiddens", + "name": "_sample_hiddens", + "qname": "sklearn.neural_network._rbm.BernoulliRBM._sample_hiddens", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_sample_hiddens/self", + "name": "self", + "qname": "sklearn.neural_network._rbm.BernoulliRBM._sample_hiddens.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_sample_hiddens/v", + "name": "v", + "qname": "sklearn.neural_network._rbm.BernoulliRBM._sample_hiddens.v", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Values of the visible layer to sample from." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_sample_hiddens/rng", + "name": "rng", + "qname": "sklearn.neural_network._rbm.BernoulliRBM._sample_hiddens.rng", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "RandomState instance", + "default_value": "", + "description": "Random number generator to use." + }, + "type": { + "kind": "NamedType", + "name": "RandomState instance" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Sample from the distribution P(h|v).", + "docstring": "Sample from the distribution P(h|v).\n\nParameters\n----------\nv : ndarray of shape (n_samples, n_features)\n Values of the visible layer to sample from.\n\nrng : RandomState instance\n Random number generator to use.\n\nReturns\n-------\nh : ndarray of shape (n_samples, n_components)\n Values of the hidden layer.", + "code": " def _sample_hiddens(self, v, rng):\n \"\"\"Sample from the distribution P(h|v).\n\n Parameters\n ----------\n v : ndarray of shape (n_samples, n_features)\n Values of the visible layer to sample from.\n\n rng : RandomState instance\n Random number generator to use.\n\n Returns\n -------\n h : ndarray of shape (n_samples, n_components)\n Values of the hidden layer.\n \"\"\"\n p = self._mean_hiddens(v)\n return (rng.random_sample(size=p.shape) < p)" + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_sample_visibles", + "name": "_sample_visibles", + "qname": "sklearn.neural_network._rbm.BernoulliRBM._sample_visibles", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_sample_visibles/self", + "name": "self", + "qname": "sklearn.neural_network._rbm.BernoulliRBM._sample_visibles.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_sample_visibles/h", + "name": "h", + "qname": "sklearn.neural_network._rbm.BernoulliRBM._sample_visibles.h", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_components)", + "default_value": "", + "description": "Values of the hidden layer to sample from." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_components)" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/_sample_visibles/rng", + "name": "rng", + "qname": "sklearn.neural_network._rbm.BernoulliRBM._sample_visibles.rng", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "RandomState instance", + "default_value": "", + "description": "Random number generator to use." + }, + "type": { + "kind": "NamedType", + "name": "RandomState instance" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Sample from the distribution P(v|h).", + "docstring": "Sample from the distribution P(v|h).\n\nParameters\n----------\nh : ndarray of shape (n_samples, n_components)\n Values of the hidden layer to sample from.\n\nrng : RandomState instance\n Random number generator to use.\n\nReturns\n-------\nv : ndarray of shape (n_samples, n_features)\n Values of the visible layer.", + "code": " def _sample_visibles(self, h, rng):\n \"\"\"Sample from the distribution P(v|h).\n\n Parameters\n ----------\n h : ndarray of shape (n_samples, n_components)\n Values of the hidden layer to sample from.\n\n rng : RandomState instance\n Random number generator to use.\n\n Returns\n -------\n v : ndarray of shape (n_samples, n_features)\n Values of the visible layer.\n \"\"\"\n p = np.dot(h, self.components_)\n p += self.intercept_visible_\n expit(p, out=p)\n return (rng.random_sample(size=p.shape) < p)" + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/fit", + "name": "fit", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/fit/self", + "name": "self", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/fit/X", + "name": "X", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/fit/y", + "name": "y", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model to the data X.", + "docstring": "Fit the model to the data X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\nReturns\n-------\nself : BernoulliRBM\n The fitted model.", + "code": " def fit(self, X, y=None):\n \"\"\"Fit the model to the data X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\n Returns\n -------\n self : BernoulliRBM\n The fitted model.\n \"\"\"\n X = self._validate_data(\n X, accept_sparse='csr', dtype=(np.float64, np.float32)\n )\n n_samples = X.shape[0]\n rng = check_random_state(self.random_state)\n\n self.components_ = np.asarray(\n rng.normal(0, 0.01, (self.n_components, X.shape[1])),\n order='F',\n dtype=X.dtype)\n self.intercept_hidden_ = np.zeros(self.n_components, dtype=X.dtype)\n self.intercept_visible_ = np.zeros(X.shape[1], dtype=X.dtype)\n self.h_samples_ = np.zeros((self.batch_size, self.n_components),\n dtype=X.dtype)\n\n n_batches = int(np.ceil(float(n_samples) / self.batch_size))\n batch_slices = list(gen_even_slices(n_batches * self.batch_size,\n n_batches, n_samples=n_samples))\n verbose = self.verbose\n begin = time.time()\n for iteration in range(1, self.n_iter + 1):\n for batch_slice in batch_slices:\n self._fit(X[batch_slice], rng)\n\n if verbose:\n end = time.time()\n print(\"[%s] Iteration %d, pseudo-likelihood = %.2f,\"\n \" time = %.2fs\"\n % (type(self).__name__, iteration,\n self.score_samples(X).mean(), end - begin))\n begin = end\n\n return self" + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/gibbs", + "name": "gibbs", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.gibbs", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/gibbs/self", + "name": "self", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.gibbs.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/gibbs/v", + "name": "v", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.gibbs.v", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Values of the visible layer to start from." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform one Gibbs sampling step.", + "docstring": "Perform one Gibbs sampling step.\n\nParameters\n----------\nv : ndarray of shape (n_samples, n_features)\n Values of the visible layer to start from.\n\nReturns\n-------\nv_new : ndarray of shape (n_samples, n_features)\n Values of the visible layer after one Gibbs step.", + "code": " def gibbs(self, v):\n \"\"\"Perform one Gibbs sampling step.\n\n Parameters\n ----------\n v : ndarray of shape (n_samples, n_features)\n Values of the visible layer to start from.\n\n Returns\n -------\n v_new : ndarray of shape (n_samples, n_features)\n Values of the visible layer after one Gibbs step.\n \"\"\"\n check_is_fitted(self)\n if not hasattr(self, \"random_state_\"):\n self.random_state_ = check_random_state(self.random_state)\n h_ = self._sample_hiddens(v, self.random_state_)\n v_ = self._sample_visibles(h_, self.random_state_)\n\n return v_" + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/partial_fit", + "name": "partial_fit", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.partial_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/partial_fit/self", + "name": "self", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/partial_fit/X", + "name": "X", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "Training data." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/partial_fit/y", + "name": "y", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.partial_fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model to the data X which should contain a partial\nsegment of the data.", + "docstring": "Fit the model to the data X which should contain a partial\nsegment of the data.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Training data.\n\nReturns\n-------\nself : BernoulliRBM\n The fitted model.", + "code": " def partial_fit(self, X, y=None):\n \"\"\"Fit the model to the data X which should contain a partial\n segment of the data.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n Training data.\n\n Returns\n -------\n self : BernoulliRBM\n The fitted model.\n \"\"\"\n first_pass = not hasattr(self, 'components_')\n X = self._validate_data(X, accept_sparse='csr', dtype=np.float64,\n reset=first_pass)\n if not hasattr(self, 'random_state_'):\n self.random_state_ = check_random_state(self.random_state)\n if not hasattr(self, 'components_'):\n self.components_ = np.asarray(\n self.random_state_.normal(\n 0,\n 0.01,\n (self.n_components, X.shape[1])\n ),\n order='F')\n if not hasattr(self, 'intercept_hidden_'):\n self.intercept_hidden_ = np.zeros(self.n_components, )\n if not hasattr(self, 'intercept_visible_'):\n self.intercept_visible_ = np.zeros(X.shape[1], )\n if not hasattr(self, 'h_samples_'):\n self.h_samples_ = np.zeros((self.batch_size, self.n_components))\n\n self._fit(X, self.random_state_)" + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/score_samples", + "name": "score_samples", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.score_samples", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/score_samples/self", + "name": "self", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.score_samples.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/score_samples/X", + "name": "X", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.score_samples.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Values of the visible layer. Must be all-boolean (not checked)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the pseudo-likelihood of X.", + "docstring": "Compute the pseudo-likelihood of X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Values of the visible layer. Must be all-boolean (not checked).\n\nReturns\n-------\npseudo_likelihood : ndarray of shape (n_samples,)\n Value of the pseudo-likelihood (proxy for likelihood).\n\nNotes\n-----\nThis method is not deterministic: it computes a quantity called the\nfree energy on X, then on a randomly corrupted version of X, and\nreturns the log of the logistic function of the difference.", + "code": " def score_samples(self, X):\n \"\"\"Compute the pseudo-likelihood of X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Values of the visible layer. Must be all-boolean (not checked).\n\n Returns\n -------\n pseudo_likelihood : ndarray of shape (n_samples,)\n Value of the pseudo-likelihood (proxy for likelihood).\n\n Notes\n -----\n This method is not deterministic: it computes a quantity called the\n free energy on X, then on a randomly corrupted version of X, and\n returns the log of the logistic function of the difference.\n \"\"\"\n check_is_fitted(self)\n\n v = check_array(X, accept_sparse='csr')\n rng = check_random_state(self.random_state)\n\n # Randomly corrupt one feature in each sample in v.\n ind = (np.arange(v.shape[0]),\n rng.randint(0, v.shape[1], v.shape[0]))\n if sp.issparse(v):\n data = -2 * v[ind] + 1\n v_ = v + sp.csr_matrix((data.A.ravel(), ind), shape=v.shape)\n else:\n v_ = v.copy()\n v_[ind] = 1 - v_[ind]\n\n fe = self._free_energy(v)\n fe_ = self._free_energy(v_)\n return v.shape[1] * log_logistic(fe_ - fe)" + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/transform", + "name": "transform", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/transform/self", + "name": "self", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._rbm/BernoulliRBM/transform/X", + "name": "X", + "qname": "sklearn.neural_network._rbm.BernoulliRBM.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to be transformed." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the hidden layer activation probabilities, P(h=1|v=X).", + "docstring": "Compute the hidden layer activation probabilities, P(h=1|v=X).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to be transformed.\n\nReturns\n-------\nh : ndarray of shape (n_samples, n_components)\n Latent representations of the data.", + "code": " def transform(self, X):\n \"\"\"Compute the hidden layer activation probabilities, P(h=1|v=X).\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to be transformed.\n\n Returns\n -------\n h : ndarray of shape (n_samples, n_components)\n Latent representations of the data.\n \"\"\"\n check_is_fitted(self)\n\n X = self._validate_data(X, accept_sparse='csr', reset=False,\n dtype=(np.float64, np.float32))\n return self._mean_hiddens(X)" + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/AdamOptimizer/__init__", + "name": "__init__", + "qname": "sklearn.neural_network._stochastic_optimizers.AdamOptimizer.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/AdamOptimizer/__init__/self", + "name": "self", + "qname": "sklearn.neural_network._stochastic_optimizers.AdamOptimizer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/AdamOptimizer/__init__/params", + "name": "params", + "qname": "sklearn.neural_network._stochastic_optimizers.AdamOptimizer.__init__.params", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list, length = len(coefs_) + len(intercepts_)", + "default_value": "", + "description": "The concatenated list containing coefs_ and intercepts_ in MLP model.\nUsed for initializing velocities and updating params" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "list" + }, + { + "kind": "NamedType", + "name": "length = len(coefs_) + len(intercepts_)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/AdamOptimizer/__init__/learning_rate_init", + "name": "learning_rate_init", + "qname": "sklearn.neural_network._stochastic_optimizers.AdamOptimizer.__init__.learning_rate_init", + "default_value": "0.001", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.001", + "description": "The initial learning rate used. It controls the step-size in updating\nthe weights" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/AdamOptimizer/__init__/beta_1", + "name": "beta_1", + "qname": "sklearn.neural_network._stochastic_optimizers.AdamOptimizer.__init__.beta_1", + "default_value": "0.9", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.9", + "description": "Exponential decay rate for estimates of first moment vector, should be\nin [0, 1)" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/AdamOptimizer/__init__/beta_2", + "name": "beta_2", + "qname": "sklearn.neural_network._stochastic_optimizers.AdamOptimizer.__init__.beta_2", + "default_value": "0.999", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.999", + "description": "Exponential decay rate for estimates of second moment vector, should be\nin [0, 1)" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/AdamOptimizer/__init__/epsilon", + "name": "epsilon", + "qname": "sklearn.neural_network._stochastic_optimizers.AdamOptimizer.__init__.epsilon", + "default_value": "1e-08", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-8", + "description": "Value for numerical stability" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Stochastic gradient descent optimizer with Adam\n\nNote: All default values are from the original Adam paper", + "docstring": "", + "code": " def __init__(self, params, learning_rate_init=0.001, beta_1=0.9,\n beta_2=0.999, epsilon=1e-8):\n super().__init__(params, learning_rate_init)\n\n self.beta_1 = beta_1\n self.beta_2 = beta_2\n self.epsilon = epsilon\n self.t = 0\n self.ms = [np.zeros_like(param) for param in params]\n self.vs = [np.zeros_like(param) for param in params]" + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/AdamOptimizer/_get_updates", + "name": "_get_updates", + "qname": "sklearn.neural_network._stochastic_optimizers.AdamOptimizer._get_updates", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/AdamOptimizer/_get_updates/self", + "name": "self", + "qname": "sklearn.neural_network._stochastic_optimizers.AdamOptimizer._get_updates.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/AdamOptimizer/_get_updates/grads", + "name": "grads", + "qname": "sklearn.neural_network._stochastic_optimizers.AdamOptimizer._get_updates.grads", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list, length = len(coefs_) + len(intercepts_)", + "default_value": "", + "description": "Containing gradients with respect to coefs_ and intercepts_ in MLP\nmodel. So length should be aligned with params" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "list" + }, + { + "kind": "NamedType", + "name": "length = len(coefs_) + len(intercepts_)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Get the values used to update params with given gradients", + "docstring": "Get the values used to update params with given gradients\n\nParameters\n----------\ngrads : list, length = len(coefs_) + len(intercepts_)\n Containing gradients with respect to coefs_ and intercepts_ in MLP\n model. So length should be aligned with params\n\nReturns\n-------\nupdates : list, length = len(grads)\n The values to add to params", + "code": " def _get_updates(self, grads):\n \"\"\"Get the values used to update params with given gradients\n\n Parameters\n ----------\n grads : list, length = len(coefs_) + len(intercepts_)\n Containing gradients with respect to coefs_ and intercepts_ in MLP\n model. So length should be aligned with params\n\n Returns\n -------\n updates : list, length = len(grads)\n The values to add to params\n \"\"\"\n self.t += 1\n self.ms = [self.beta_1 * m + (1 - self.beta_1) * grad\n for m, grad in zip(self.ms, grads)]\n self.vs = [self.beta_2 * v + (1 - self.beta_2) * (grad ** 2)\n for v, grad in zip(self.vs, grads)]\n self.learning_rate = (self.learning_rate_init *\n np.sqrt(1 - self.beta_2 ** self.t) /\n (1 - self.beta_1 ** self.t))\n updates = [-self.learning_rate * m / (np.sqrt(v) + self.epsilon)\n for m, v in zip(self.ms, self.vs)]\n return updates" + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/BaseOptimizer/__init__", + "name": "__init__", + "qname": "sklearn.neural_network._stochastic_optimizers.BaseOptimizer.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/BaseOptimizer/__init__/self", + "name": "self", + "qname": "sklearn.neural_network._stochastic_optimizers.BaseOptimizer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/BaseOptimizer/__init__/params", + "name": "params", + "qname": "sklearn.neural_network._stochastic_optimizers.BaseOptimizer.__init__.params", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list, length = len(coefs_) + len(intercepts_)", + "default_value": "", + "description": "The concatenated list containing coefs_ and intercepts_ in MLP model.\nUsed for initializing velocities and updating params" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "list" + }, + { + "kind": "NamedType", + "name": "length = len(coefs_) + len(intercepts_)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/BaseOptimizer/__init__/learning_rate_init", + "name": "learning_rate_init", + "qname": "sklearn.neural_network._stochastic_optimizers.BaseOptimizer.__init__.learning_rate_init", + "default_value": "0.1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "The initial learning rate used. It controls the step-size in updating\nthe weights" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base (Stochastic) gradient descent optimizer", + "docstring": "", + "code": " def __init__(self, params, learning_rate_init=0.1):\n self.params = [param for param in params]\n self.learning_rate_init = learning_rate_init\n self.learning_rate = float(learning_rate_init)" + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/BaseOptimizer/iteration_ends", + "name": "iteration_ends", + "qname": "sklearn.neural_network._stochastic_optimizers.BaseOptimizer.iteration_ends", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/BaseOptimizer/iteration_ends/self", + "name": "self", + "qname": "sklearn.neural_network._stochastic_optimizers.BaseOptimizer.iteration_ends.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/BaseOptimizer/iteration_ends/time_step", + "name": "time_step", + "qname": "sklearn.neural_network._stochastic_optimizers.BaseOptimizer.iteration_ends.time_step", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform update to learning rate and potentially other states at the\nend of an iteration", + "docstring": "Perform update to learning rate and potentially other states at the\nend of an iteration", + "code": " def iteration_ends(self, time_step):\n \"\"\"Perform update to learning rate and potentially other states at the\n end of an iteration\n \"\"\"\n pass" + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/BaseOptimizer/trigger_stopping", + "name": "trigger_stopping", + "qname": "sklearn.neural_network._stochastic_optimizers.BaseOptimizer.trigger_stopping", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/BaseOptimizer/trigger_stopping/self", + "name": "self", + "qname": "sklearn.neural_network._stochastic_optimizers.BaseOptimizer.trigger_stopping.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/BaseOptimizer/trigger_stopping/msg", + "name": "msg", + "qname": "sklearn.neural_network._stochastic_optimizers.BaseOptimizer.trigger_stopping.msg", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "", + "description": "Message passed in for verbose output" + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/BaseOptimizer/trigger_stopping/verbose", + "name": "verbose", + "qname": "sklearn.neural_network._stochastic_optimizers.BaseOptimizer.trigger_stopping.verbose", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "Print message to stdin if True" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Decides whether it is time to stop training", + "docstring": "Decides whether it is time to stop training\n\nParameters\n----------\nmsg : str\n Message passed in for verbose output\n\nverbose : bool\n Print message to stdin if True\n\nReturns\n-------\nis_stopping : bool\n True if training needs to stop", + "code": " def trigger_stopping(self, msg, verbose):\n \"\"\"Decides whether it is time to stop training\n\n Parameters\n ----------\n msg : str\n Message passed in for verbose output\n\n verbose : bool\n Print message to stdin if True\n\n Returns\n -------\n is_stopping : bool\n True if training needs to stop\n \"\"\"\n if verbose:\n print(msg + \" Stopping.\")\n return True" + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/BaseOptimizer/update_params", + "name": "update_params", + "qname": "sklearn.neural_network._stochastic_optimizers.BaseOptimizer.update_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/BaseOptimizer/update_params/self", + "name": "self", + "qname": "sklearn.neural_network._stochastic_optimizers.BaseOptimizer.update_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/BaseOptimizer/update_params/grads", + "name": "grads", + "qname": "sklearn.neural_network._stochastic_optimizers.BaseOptimizer.update_params.grads", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list, length = len(params)", + "default_value": "", + "description": "Containing gradients with respect to coefs_ and intercepts_ in MLP\nmodel. So length should be aligned with params" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "list" + }, + { + "kind": "NamedType", + "name": "length = len(params)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Update parameters with given gradients", + "docstring": "Update parameters with given gradients\n\nParameters\n----------\ngrads : list, length = len(params)\n Containing gradients with respect to coefs_ and intercepts_ in MLP\n model. So length should be aligned with params", + "code": " def update_params(self, grads):\n \"\"\"Update parameters with given gradients\n\n Parameters\n ----------\n grads : list, length = len(params)\n Containing gradients with respect to coefs_ and intercepts_ in MLP\n model. So length should be aligned with params\n \"\"\"\n updates = self._get_updates(grads)\n for param, update in zip(self.params, updates):\n param += update" + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/SGDOptimizer/__init__", + "name": "__init__", + "qname": "sklearn.neural_network._stochastic_optimizers.SGDOptimizer.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/SGDOptimizer/__init__/self", + "name": "self", + "qname": "sklearn.neural_network._stochastic_optimizers.SGDOptimizer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/SGDOptimizer/__init__/params", + "name": "params", + "qname": "sklearn.neural_network._stochastic_optimizers.SGDOptimizer.__init__.params", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list, length = len(coefs_) + len(intercepts_)", + "default_value": "", + "description": "The concatenated list containing coefs_ and intercepts_ in MLP model.\nUsed for initializing velocities and updating params" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "list" + }, + { + "kind": "NamedType", + "name": "length = len(coefs_) + len(intercepts_)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/SGDOptimizer/__init__/learning_rate_init", + "name": "learning_rate_init", + "qname": "sklearn.neural_network._stochastic_optimizers.SGDOptimizer.__init__.learning_rate_init", + "default_value": "0.1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "The initial learning rate used. It controls the step-size in updating\nthe weights" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/SGDOptimizer/__init__/lr_schedule", + "name": "lr_schedule", + "qname": "sklearn.neural_network._stochastic_optimizers.SGDOptimizer.__init__.lr_schedule", + "default_value": "'constant'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'constant', 'adaptive', 'invscaling'}", + "default_value": "'constant'", + "description": "Learning rate schedule for weight updates.\n\n-'constant', is a constant learning rate given by\n 'learning_rate_init'.\n\n-'invscaling' gradually decreases the learning rate 'learning_rate_' at\n each time step 't' using an inverse scaling exponent of 'power_t'.\n learning_rate_ = learning_rate_init / pow(t, power_t)\n\n-'adaptive', keeps the learning rate constant to\n 'learning_rate_init' as long as the training keeps decreasing.\n Each time 2 consecutive epochs fail to decrease the training loss by\n tol, or fail to increase validation score by tol if 'early_stopping'\n is on, the current learning rate is divided by 5." + }, + "type": { + "kind": "EnumType", + "values": ["adaptive", "constant", "invscaling"] + } + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/SGDOptimizer/__init__/momentum", + "name": "momentum", + "qname": "sklearn.neural_network._stochastic_optimizers.SGDOptimizer.__init__.momentum", + "default_value": "0.9", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.9", + "description": "Value of momentum used, must be larger than or equal to 0" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/SGDOptimizer/__init__/nesterov", + "name": "nesterov", + "qname": "sklearn.neural_network._stochastic_optimizers.SGDOptimizer.__init__.nesterov", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to use nesterov's momentum or not. Use nesterov's if True" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/SGDOptimizer/__init__/power_t", + "name": "power_t", + "qname": "sklearn.neural_network._stochastic_optimizers.SGDOptimizer.__init__.power_t", + "default_value": "0.5", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.5", + "description": "Power of time step 't' in inverse scaling. See `lr_schedule` for\nmore details." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Stochastic gradient descent optimizer with momentum", + "docstring": "", + "code": " def __init__(self, params, learning_rate_init=0.1, lr_schedule='constant',\n momentum=0.9, nesterov=True, power_t=0.5):\n super().__init__(params, learning_rate_init)\n\n self.lr_schedule = lr_schedule\n self.momentum = momentum\n self.nesterov = nesterov\n self.power_t = power_t\n self.velocities = [np.zeros_like(param) for param in params]" + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/SGDOptimizer/_get_updates", + "name": "_get_updates", + "qname": "sklearn.neural_network._stochastic_optimizers.SGDOptimizer._get_updates", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/SGDOptimizer/_get_updates/self", + "name": "self", + "qname": "sklearn.neural_network._stochastic_optimizers.SGDOptimizer._get_updates.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/SGDOptimizer/_get_updates/grads", + "name": "grads", + "qname": "sklearn.neural_network._stochastic_optimizers.SGDOptimizer._get_updates.grads", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list, length = len(coefs_) + len(intercepts_)", + "default_value": "", + "description": "Containing gradients with respect to coefs_ and intercepts_ in MLP\nmodel. So length should be aligned with params" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "list" + }, + { + "kind": "NamedType", + "name": "length = len(coefs_) + len(intercepts_)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Get the values used to update params with given gradients", + "docstring": "Get the values used to update params with given gradients\n\nParameters\n----------\ngrads : list, length = len(coefs_) + len(intercepts_)\n Containing gradients with respect to coefs_ and intercepts_ in MLP\n model. So length should be aligned with params\n\nReturns\n-------\nupdates : list, length = len(grads)\n The values to add to params", + "code": " def _get_updates(self, grads):\n \"\"\"Get the values used to update params with given gradients\n\n Parameters\n ----------\n grads : list, length = len(coefs_) + len(intercepts_)\n Containing gradients with respect to coefs_ and intercepts_ in MLP\n model. So length should be aligned with params\n\n Returns\n -------\n updates : list, length = len(grads)\n The values to add to params\n \"\"\"\n updates = [self.momentum * velocity - self.learning_rate * grad\n for velocity, grad in zip(self.velocities, grads)]\n self.velocities = updates\n\n if self.nesterov:\n updates = [self.momentum * velocity - self.learning_rate * grad\n for velocity, grad in zip(self.velocities, grads)]\n\n return updates" + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/SGDOptimizer/iteration_ends", + "name": "iteration_ends", + "qname": "sklearn.neural_network._stochastic_optimizers.SGDOptimizer.iteration_ends", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/SGDOptimizer/iteration_ends/self", + "name": "self", + "qname": "sklearn.neural_network._stochastic_optimizers.SGDOptimizer.iteration_ends.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/SGDOptimizer/iteration_ends/time_step", + "name": "time_step", + "qname": "sklearn.neural_network._stochastic_optimizers.SGDOptimizer.iteration_ends.time_step", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "number of training samples trained on so far, used to update\nlearning rate for 'invscaling'" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform updates to learning rate and potential other states at the\nend of an iteration", + "docstring": "Perform updates to learning rate and potential other states at the\nend of an iteration\n\nParameters\n----------\ntime_step : int\n number of training samples trained on so far, used to update\n learning rate for 'invscaling'", + "code": " def iteration_ends(self, time_step):\n \"\"\"Perform updates to learning rate and potential other states at the\n end of an iteration\n\n Parameters\n ----------\n time_step : int\n number of training samples trained on so far, used to update\n learning rate for 'invscaling'\n \"\"\"\n if self.lr_schedule == 'invscaling':\n self.learning_rate = (float(self.learning_rate_init) /\n (time_step + 1) ** self.power_t)" + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/SGDOptimizer/trigger_stopping", + "name": "trigger_stopping", + "qname": "sklearn.neural_network._stochastic_optimizers.SGDOptimizer.trigger_stopping", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/SGDOptimizer/trigger_stopping/self", + "name": "self", + "qname": "sklearn.neural_network._stochastic_optimizers.SGDOptimizer.trigger_stopping.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/SGDOptimizer/trigger_stopping/msg", + "name": "msg", + "qname": "sklearn.neural_network._stochastic_optimizers.SGDOptimizer.trigger_stopping.msg", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.neural_network._stochastic_optimizers/SGDOptimizer/trigger_stopping/verbose", + "name": "verbose", + "qname": "sklearn.neural_network._stochastic_optimizers.SGDOptimizer.trigger_stopping.verbose", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def trigger_stopping(self, msg, verbose):\n if self.lr_schedule != 'adaptive':\n if verbose:\n print(msg + \" Stopping.\")\n return True\n\n if self.learning_rate <= 1e-6:\n if verbose:\n print(msg + \" Learning rate too small. Stopping.\")\n return True\n\n self.learning_rate /= 5.\n if verbose:\n print(msg + \" Setting learning rate to %f\" %\n self.learning_rate)\n return False" + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/__init__", + "name": "__init__", + "qname": "sklearn.pipeline.FeatureUnion.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/__init__/self", + "name": "self", + "qname": "sklearn.pipeline.FeatureUnion.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/__init__/transformer_list", + "name": "transformer_list", + "qname": "sklearn.pipeline.FeatureUnion.__init__.transformer_list", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "list of (string, transformer) tuples", + "default_value": "", + "description": "List of transformer objects to be applied to the data. The first\nhalf of each tuple is the name of the transformer. The tranformer can\nbe 'drop' for it to be ignored.\n\n.. versionchanged:: 0.22\n Deprecated `None` as a transformer in favor of 'drop'." + }, + "type": { + "kind": "NamedType", + "name": "list of (string, transformer) tuples" + } + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.pipeline.FeatureUnion.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of jobs to run in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details.\n\n.. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/__init__/transformer_weights", + "name": "transformer_weights", + "qname": "sklearn.pipeline.FeatureUnion.__init__.transformer_weights", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Multiplicative weights for features per transformer.\nKeys are transformer names, values the weights.\nRaises ValueError if key not present in ``transformer_list``." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/__init__/verbose", + "name": "verbose", + "qname": "sklearn.pipeline.FeatureUnion.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the time elapsed while fitting each transformer will be\nprinted as it is completed." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Concatenates results of multiple transformer objects.\n\nThis estimator applies a list of transformer objects in parallel to the\ninput data, then concatenates the results. This is useful to combine\nseveral feature extraction mechanisms into a single transformer.\n\nParameters of the transformers may be set using its name and the parameter\nname separated by a '__'. A transformer may be replaced entirely by\nsetting the parameter with its name to another transformer,\nor removed by setting to 'drop'.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, transformer_list, *, n_jobs=None,\n transformer_weights=None, verbose=False):\n self.transformer_list = transformer_list\n self.n_jobs = n_jobs\n self.transformer_weights = transformer_weights\n self.verbose = verbose\n self._validate_transformers()" + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_hstack", + "name": "_hstack", + "qname": "sklearn.pipeline.FeatureUnion._hstack", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_hstack/self", + "name": "self", + "qname": "sklearn.pipeline.FeatureUnion._hstack.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_hstack/Xs", + "name": "Xs", + "qname": "sklearn.pipeline.FeatureUnion._hstack.Xs", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _hstack(self, Xs):\n if any(sparse.issparse(f) for f in Xs):\n Xs = sparse.hstack(Xs).tocsr()\n else:\n Xs = np.hstack(Xs)\n return Xs" + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_iter", + "name": "_iter", + "qname": "sklearn.pipeline.FeatureUnion._iter", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_iter/self", + "name": "self", + "qname": "sklearn.pipeline.FeatureUnion._iter.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate (name, trans, weight) tuples excluding None and\n'drop' transformers.", + "docstring": "Generate (name, trans, weight) tuples excluding None and\n'drop' transformers.", + "code": " def _iter(self):\n \"\"\"\n Generate (name, trans, weight) tuples excluding None and\n 'drop' transformers.\n \"\"\"\n get_weight = (self.transformer_weights or {}).get\n return ((name, trans, get_weight(name))\n for name, trans in self.transformer_list\n if trans != 'drop')" + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_log_message", + "name": "_log_message", + "qname": "sklearn.pipeline.FeatureUnion._log_message", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_log_message/self", + "name": "self", + "qname": "sklearn.pipeline.FeatureUnion._log_message.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_log_message/name", + "name": "name", + "qname": "sklearn.pipeline.FeatureUnion._log_message.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_log_message/idx", + "name": "idx", + "qname": "sklearn.pipeline.FeatureUnion._log_message.idx", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_log_message/total", + "name": "total", + "qname": "sklearn.pipeline.FeatureUnion._log_message.total", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _log_message(self, name, idx, total):\n if not self.verbose:\n return None\n return '(step %d of %d) Processing %s' % (idx, total, name)" + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_parallel_func", + "name": "_parallel_func", + "qname": "sklearn.pipeline.FeatureUnion._parallel_func", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_parallel_func/self", + "name": "self", + "qname": "sklearn.pipeline.FeatureUnion._parallel_func.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_parallel_func/X", + "name": "X", + "qname": "sklearn.pipeline.FeatureUnion._parallel_func.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_parallel_func/y", + "name": "y", + "qname": "sklearn.pipeline.FeatureUnion._parallel_func.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_parallel_func/fit_params", + "name": "fit_params", + "qname": "sklearn.pipeline.FeatureUnion._parallel_func.fit_params", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_parallel_func/func", + "name": "func", + "qname": "sklearn.pipeline.FeatureUnion._parallel_func.func", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Runs func in parallel on X and y", + "docstring": "Runs func in parallel on X and y", + "code": " def _parallel_func(self, X, y, fit_params, func):\n \"\"\"Runs func in parallel on X and y\"\"\"\n self.transformer_list = list(self.transformer_list)\n self._validate_transformers()\n self._validate_transformer_weights()\n transformers = list(self._iter())\n\n return Parallel(n_jobs=self.n_jobs)(delayed(func)(\n transformer, X, y, weight,\n message_clsname='FeatureUnion',\n message=self._log_message(name, idx, len(transformers)),\n **fit_params) for idx, (name, transformer,\n weight) in enumerate(transformers, 1))" + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_sk_visual_block_", + "name": "_sk_visual_block_", + "qname": "sklearn.pipeline.FeatureUnion._sk_visual_block_", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_sk_visual_block_/self", + "name": "self", + "qname": "sklearn.pipeline.FeatureUnion._sk_visual_block_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _sk_visual_block_(self):\n names, transformers = zip(*self.transformer_list)\n return _VisualBlock('parallel', transformers, names=names)" + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_update_transformer_list", + "name": "_update_transformer_list", + "qname": "sklearn.pipeline.FeatureUnion._update_transformer_list", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_update_transformer_list/self", + "name": "self", + "qname": "sklearn.pipeline.FeatureUnion._update_transformer_list.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_update_transformer_list/transformers", + "name": "transformers", + "qname": "sklearn.pipeline.FeatureUnion._update_transformer_list.transformers", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _update_transformer_list(self, transformers):\n transformers = iter(transformers)\n self.transformer_list[:] = [(name, old if old == 'drop'\n else next(transformers))\n for name, old in self.transformer_list]" + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_validate_transformer_weights", + "name": "_validate_transformer_weights", + "qname": "sklearn.pipeline.FeatureUnion._validate_transformer_weights", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_validate_transformer_weights/self", + "name": "self", + "qname": "sklearn.pipeline.FeatureUnion._validate_transformer_weights.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _validate_transformer_weights(self):\n if not self.transformer_weights:\n return\n\n transformer_names = set(name for name, _ in self.transformer_list)\n for name in self.transformer_weights:\n if name not in transformer_names:\n raise ValueError(\n f'Attempting to weight transformer \"{name}\", '\n 'but it is not present in transformer_list.'\n )" + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_validate_transformers", + "name": "_validate_transformers", + "qname": "sklearn.pipeline.FeatureUnion._validate_transformers", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/_validate_transformers/self", + "name": "self", + "qname": "sklearn.pipeline.FeatureUnion._validate_transformers.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _validate_transformers(self):\n names, transformers = zip(*self.transformer_list)\n\n # validate names\n self._validate_names(names)\n\n # validate estimators\n for t in transformers:\n if t == 'drop':\n continue\n if (not (hasattr(t, \"fit\") or hasattr(t, \"fit_transform\")) or not\n hasattr(t, \"transform\")):\n raise TypeError(\"All estimators should implement fit and \"\n \"transform. '%s' (type %s) doesn't\" %\n (t, type(t)))" + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/fit", + "name": "fit", + "qname": "sklearn.pipeline.FeatureUnion.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/fit/self", + "name": "self", + "qname": "sklearn.pipeline.FeatureUnion.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/fit/X", + "name": "X", + "qname": "sklearn.pipeline.FeatureUnion.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable or array-like, depending on transformers", + "default_value": "", + "description": "Input data, used to fit transformers." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "iterable" + }, + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "depending on transformers" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/fit/y", + "name": "y", + "qname": "sklearn.pipeline.FeatureUnion.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_outputs)", + "default_value": "None", + "description": "Targets for supervised learning." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/fit/fit_params", + "name": "fit_params", + "qname": "sklearn.pipeline.FeatureUnion.fit.fit_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fit all transformers using X.", + "docstring": "Fit all transformers using X.\n\nParameters\n----------\nX : iterable or array-like, depending on transformers\n Input data, used to fit transformers.\n\ny : array-like of shape (n_samples, n_outputs), default=None\n Targets for supervised learning.\n\nReturns\n-------\nself : FeatureUnion\n This estimator", + "code": " def fit(self, X, y=None, **fit_params):\n \"\"\"Fit all transformers using X.\n\n Parameters\n ----------\n X : iterable or array-like, depending on transformers\n Input data, used to fit transformers.\n\n y : array-like of shape (n_samples, n_outputs), default=None\n Targets for supervised learning.\n\n Returns\n -------\n self : FeatureUnion\n This estimator\n \"\"\"\n transformers = self._parallel_func(X, y, fit_params, _fit_one)\n if not transformers:\n # All transformers are None\n return self\n\n self._update_transformer_list(transformers)\n return self" + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/fit_transform", + "name": "fit_transform", + "qname": "sklearn.pipeline.FeatureUnion.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/fit_transform/self", + "name": "self", + "qname": "sklearn.pipeline.FeatureUnion.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/fit_transform/X", + "name": "X", + "qname": "sklearn.pipeline.FeatureUnion.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable or array-like, depending on transformers", + "default_value": "", + "description": "Input data to be transformed." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "iterable" + }, + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "depending on transformers" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/fit_transform/y", + "name": "y", + "qname": "sklearn.pipeline.FeatureUnion.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples, n_outputs)", + "default_value": "None", + "description": "Targets for supervised learning." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/fit_transform/fit_params", + "name": "fit_params", + "qname": "sklearn.pipeline.FeatureUnion.fit_transform.fit_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fit all transformers, transform the data and concatenate results.", + "docstring": "Fit all transformers, transform the data and concatenate results.\n\nParameters\n----------\nX : iterable or array-like, depending on transformers\n Input data to be transformed.\n\ny : array-like of shape (n_samples, n_outputs), default=None\n Targets for supervised learning.\n\nReturns\n-------\nX_t : array-like or sparse matrix of shape (n_samples, sum_n_components)\n hstack of results of transformers. sum_n_components is the\n sum of n_components (output dimension) over transformers.", + "code": " def fit_transform(self, X, y=None, **fit_params):\n \"\"\"Fit all transformers, transform the data and concatenate results.\n\n Parameters\n ----------\n X : iterable or array-like, depending on transformers\n Input data to be transformed.\n\n y : array-like of shape (n_samples, n_outputs), default=None\n Targets for supervised learning.\n\n Returns\n -------\n X_t : array-like or sparse matrix of \\\n shape (n_samples, sum_n_components)\n hstack of results of transformers. sum_n_components is the\n sum of n_components (output dimension) over transformers.\n \"\"\"\n results = self._parallel_func(X, y, fit_params, _fit_transform_one)\n if not results:\n # All transformers are None\n return np.zeros((X.shape[0], 0))\n\n Xs, transformers = zip(*results)\n self._update_transformer_list(transformers)\n\n return self._hstack(Xs)" + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/get_feature_names", + "name": "get_feature_names", + "qname": "sklearn.pipeline.FeatureUnion.get_feature_names", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/get_feature_names/self", + "name": "self", + "qname": "sklearn.pipeline.FeatureUnion.get_feature_names.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Get feature names from all transformers.", + "docstring": "Get feature names from all transformers.\n\nReturns\n-------\nfeature_names : list of strings\n Names of the features produced by transform.", + "code": " def get_feature_names(self):\n \"\"\"Get feature names from all transformers.\n\n Returns\n -------\n feature_names : list of strings\n Names of the features produced by transform.\n \"\"\"\n feature_names = []\n for name, trans, weight in self._iter():\n if not hasattr(trans, 'get_feature_names'):\n raise AttributeError(\"Transformer %s (type %s) does not \"\n \"provide get_feature_names.\"\n % (str(name), type(trans).__name__))\n feature_names.extend([name + \"__\" + f for f in\n trans.get_feature_names()])\n return feature_names" + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/get_params", + "name": "get_params", + "qname": "sklearn.pipeline.FeatureUnion.get_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/get_params/self", + "name": "self", + "qname": "sklearn.pipeline.FeatureUnion.get_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/get_params/deep", + "name": "deep", + "qname": "sklearn.pipeline.FeatureUnion.get_params.deep", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, will return the parameters for this estimator and\ncontained subobjects that are estimators." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Get parameters for this estimator.\n\nReturns the parameters given in the constructor as well as the\nestimators contained within the `transformer_list` of the\n`FeatureUnion`.", + "docstring": "Get parameters for this estimator.\n\nReturns the parameters given in the constructor as well as the\nestimators contained within the `transformer_list` of the\n`FeatureUnion`.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : mapping of string to any\n Parameter names mapped to their values.", + "code": " def get_params(self, deep=True):\n \"\"\"Get parameters for this estimator.\n\n Returns the parameters given in the constructor as well as the\n estimators contained within the `transformer_list` of the\n `FeatureUnion`.\n\n Parameters\n ----------\n deep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\n Returns\n -------\n params : mapping of string to any\n Parameter names mapped to their values.\n \"\"\"\n return self._get_params('transformer_list', deep=deep)" + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/n_features_in_@getter", + "name": "n_features_in_", + "qname": "sklearn.pipeline.FeatureUnion.n_features_in_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/n_features_in_/self", + "name": "self", + "qname": "sklearn.pipeline.FeatureUnion.n_features_in_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def n_features_in_(self):\n # X is passed to all transformers so we just delegate to the first one\n return self.transformer_list[0][1].n_features_in_" + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/set_params", + "name": "set_params", + "qname": "sklearn.pipeline.FeatureUnion.set_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/set_params/self", + "name": "self", + "qname": "sklearn.pipeline.FeatureUnion.set_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/set_params/kwargs", + "name": "kwargs", + "qname": "sklearn.pipeline.FeatureUnion.set_params.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Set the parameters of this estimator.\n\nValid parameter keys can be listed with ``get_params()``. Note that\nyou can directly set the parameters of the estimators contained in\n`tranformer_list`.", + "docstring": "Set the parameters of this estimator.\n\nValid parameter keys can be listed with ``get_params()``. Note that\nyou can directly set the parameters of the estimators contained in\n`tranformer_list`.\n\nReturns\n-------\nself", + "code": " def set_params(self, **kwargs):\n \"\"\"Set the parameters of this estimator.\n\n Valid parameter keys can be listed with ``get_params()``. Note that\n you can directly set the parameters of the estimators contained in\n `tranformer_list`.\n\n Returns\n -------\n self\n \"\"\"\n self._set_params('transformer_list', **kwargs)\n return self" + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/transform", + "name": "transform", + "qname": "sklearn.pipeline.FeatureUnion.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/transform/self", + "name": "self", + "qname": "sklearn.pipeline.FeatureUnion.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/FeatureUnion/transform/X", + "name": "X", + "qname": "sklearn.pipeline.FeatureUnion.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable or array-like, depending on transformers", + "default_value": "", + "description": "Input data to be transformed." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "iterable" + }, + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "depending on transformers" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Transform X separately by each transformer, concatenate results.", + "docstring": "Transform X separately by each transformer, concatenate results.\n\nParameters\n----------\nX : iterable or array-like, depending on transformers\n Input data to be transformed.\n\nReturns\n-------\nX_t : array-like or sparse matrix of shape (n_samples, sum_n_components)\n hstack of results of transformers. sum_n_components is the\n sum of n_components (output dimension) over transformers.", + "code": " def transform(self, X):\n \"\"\"Transform X separately by each transformer, concatenate results.\n\n Parameters\n ----------\n X : iterable or array-like, depending on transformers\n Input data to be transformed.\n\n Returns\n -------\n X_t : array-like or sparse matrix of \\\n shape (n_samples, sum_n_components)\n hstack of results of transformers. sum_n_components is the\n sum of n_components (output dimension) over transformers.\n \"\"\"\n Xs = Parallel(n_jobs=self.n_jobs)(\n delayed(_transform_one)(trans, X, None, weight)\n for name, trans, weight in self._iter())\n if not Xs:\n # All transformers are None\n return np.zeros((X.shape[0], 0))\n\n return self._hstack(Xs)" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/__getitem__", + "name": "__getitem__", + "qname": "sklearn.pipeline.Pipeline.__getitem__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/__getitem__/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline.__getitem__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/__getitem__/ind", + "name": "ind", + "qname": "sklearn.pipeline.Pipeline.__getitem__.ind", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns a sub-pipeline or a single esimtator in the pipeline\n\nIndexing with an integer will return an estimator; using a slice\nreturns another Pipeline instance which copies a slice of this\nPipeline. This copy is shallow: modifying (or fitting) estimators in\nthe sub-pipeline will affect the larger pipeline and vice-versa.\nHowever, replacing a value in `step` will not affect a copy.", + "docstring": "Returns a sub-pipeline or a single esimtator in the pipeline\n\nIndexing with an integer will return an estimator; using a slice\nreturns another Pipeline instance which copies a slice of this\nPipeline. This copy is shallow: modifying (or fitting) estimators in\nthe sub-pipeline will affect the larger pipeline and vice-versa.\nHowever, replacing a value in `step` will not affect a copy.", + "code": " def __getitem__(self, ind):\n \"\"\"Returns a sub-pipeline or a single esimtator in the pipeline\n\n Indexing with an integer will return an estimator; using a slice\n returns another Pipeline instance which copies a slice of this\n Pipeline. This copy is shallow: modifying (or fitting) estimators in\n the sub-pipeline will affect the larger pipeline and vice-versa.\n However, replacing a value in `step` will not affect a copy.\n \"\"\"\n if isinstance(ind, slice):\n if ind.step not in (1, None):\n raise ValueError(\"Pipeline slicing only supports a step of 1\")\n return self.__class__(\n self.steps[ind], memory=self.memory, verbose=self.verbose\n )\n try:\n name, est = self.steps[ind]\n except TypeError:\n # Not an int, try get step by name\n return self.named_steps[ind]\n return est" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/__init__", + "name": "__init__", + "qname": "sklearn.pipeline.Pipeline.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/__init__/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/__init__/steps", + "name": "steps", + "qname": "sklearn.pipeline.Pipeline.__init__.steps", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "list", + "default_value": "", + "description": "List of (name, transform) tuples (implementing fit/transform) that are\nchained, in the order in which they are chained, with the last object\nan estimator." + }, + "type": { + "kind": "NamedType", + "name": "list" + } + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/__init__/memory", + "name": "memory", + "qname": "sklearn.pipeline.Pipeline.__init__.memory", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "str or object with the joblib.Memory interface", + "default_value": "None", + "description": "Used to cache the fitted transformers of the pipeline. By default,\nno caching is performed. If a string is given, it is the path to\nthe caching directory. Enabling caching triggers a clone of\nthe transformers before fitting. Therefore, the transformer\ninstance given to the pipeline cannot be inspected\ndirectly. Use the attribute ``named_steps`` or ``steps`` to\ninspect estimators within the pipeline. Caching the\ntransformers is advantageous when fitting is time consuming." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "object with the joblib.Memory interface" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/__init__/verbose", + "name": "verbose", + "qname": "sklearn.pipeline.Pipeline.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the time elapsed while fitting each step will be printed as it\nis completed." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Pipeline of transforms with a final estimator.\n\nSequentially apply a list of transforms and a final estimator.\nIntermediate steps of the pipeline must be 'transforms', that is, they\nmust implement fit and transform methods.\nThe final estimator only needs to implement fit.\nThe transformers in the pipeline can be cached using ``memory`` argument.\n\nThe purpose of the pipeline is to assemble several steps that can be\ncross-validated together while setting different parameters.\nFor this, it enables setting parameters of the various steps using their\nnames and the parameter name separated by a '__', as in the example below.\nA step's estimator may be replaced entirely by setting the parameter\nwith its name to another estimator, or a transformer removed by setting\nit to 'passthrough' or ``None``.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.5", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, steps, *, memory=None, verbose=False):\n self.steps = steps\n self.memory = memory\n self.verbose = verbose\n self._validate_steps()" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/__len__", + "name": "__len__", + "qname": "sklearn.pipeline.Pipeline.__len__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/__len__/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline.__len__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns the length of the Pipeline", + "docstring": "Returns the length of the Pipeline", + "code": " def __len__(self):\n \"\"\"\n Returns the length of the Pipeline\n \"\"\"\n return len(self.steps)" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_check_fit_params", + "name": "_check_fit_params", + "qname": "sklearn.pipeline.Pipeline._check_fit_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_check_fit_params/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline._check_fit_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_check_fit_params/fit_params", + "name": "fit_params", + "qname": "sklearn.pipeline.Pipeline._check_fit_params.fit_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_fit_params(self, **fit_params):\n fit_params_steps = {name: {} for name, step in self.steps\n if step is not None}\n for pname, pval in fit_params.items():\n if '__' not in pname:\n raise ValueError(\n \"Pipeline.fit does not accept the {} parameter. \"\n \"You can pass parameters to specific steps of your \"\n \"pipeline using the stepname__parameter format, e.g. \"\n \"`Pipeline.fit(X, y, logisticregression__sample_weight\"\n \"=sample_weight)`.\".format(pname))\n step, param = pname.split('__', 1)\n fit_params_steps[step][param] = pval\n return fit_params_steps" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_estimator_type@getter", + "name": "_estimator_type", + "qname": "sklearn.pipeline.Pipeline._estimator_type", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_estimator_type/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline._estimator_type.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def _estimator_type(self):\n return self.steps[-1][1]._estimator_type" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_final_estimator@getter", + "name": "_final_estimator", + "qname": "sklearn.pipeline.Pipeline._final_estimator", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_final_estimator/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline._final_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def _final_estimator(self):\n estimator = self.steps[-1][1]\n return 'passthrough' if estimator is None else estimator" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_fit", + "name": "_fit", + "qname": "sklearn.pipeline.Pipeline._fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_fit/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline._fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_fit/X", + "name": "X", + "qname": "sklearn.pipeline.Pipeline._fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_fit/y", + "name": "y", + "qname": "sklearn.pipeline.Pipeline._fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_fit/fit_params_steps", + "name": "fit_params_steps", + "qname": "sklearn.pipeline.Pipeline._fit.fit_params_steps", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _fit(self, X, y=None, **fit_params_steps):\n # shallow copy of steps - this should really be steps_\n self.steps = list(self.steps)\n self._validate_steps()\n # Setup the memory\n memory = check_memory(self.memory)\n\n fit_transform_one_cached = memory.cache(_fit_transform_one)\n\n for (step_idx,\n name,\n transformer) in self._iter(with_final=False,\n filter_passthrough=False):\n if (transformer is None or transformer == 'passthrough'):\n with _print_elapsed_time('Pipeline',\n self._log_message(step_idx)):\n continue\n\n if hasattr(memory, 'location'):\n # joblib >= 0.12\n if memory.location is None:\n # we do not clone when caching is disabled to\n # preserve backward compatibility\n cloned_transformer = transformer\n else:\n cloned_transformer = clone(transformer)\n elif hasattr(memory, 'cachedir'):\n # joblib < 0.11\n if memory.cachedir is None:\n # we do not clone when caching is disabled to\n # preserve backward compatibility\n cloned_transformer = transformer\n else:\n cloned_transformer = clone(transformer)\n else:\n cloned_transformer = clone(transformer)\n # Fit or load from cache the current transformer\n X, fitted_transformer = fit_transform_one_cached(\n cloned_transformer, X, y, None,\n message_clsname='Pipeline',\n message=self._log_message(step_idx),\n **fit_params_steps[name])\n # Replace the transformer of the step with the fitted\n # transformer. This is necessary when loading the transformer\n # from the cache.\n self.steps[step_idx] = (name, fitted_transformer)\n return X" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_inverse_transform", + "name": "_inverse_transform", + "qname": "sklearn.pipeline.Pipeline._inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_inverse_transform/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline._inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_inverse_transform/X", + "name": "X", + "qname": "sklearn.pipeline.Pipeline._inverse_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _inverse_transform(self, X):\n Xt = X\n reverse_iter = reversed(list(self._iter()))\n for _, _, transform in reverse_iter:\n Xt = transform.inverse_transform(Xt)\n return Xt" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_iter", + "name": "_iter", + "qname": "sklearn.pipeline.Pipeline._iter", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_iter/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline._iter.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_iter/with_final", + "name": "with_final", + "qname": "sklearn.pipeline.Pipeline._iter.with_final", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_iter/filter_passthrough", + "name": "filter_passthrough", + "qname": "sklearn.pipeline.Pipeline._iter.filter_passthrough", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate (idx, (name, trans)) tuples from self.steps\n\nWhen filter_passthrough is True, 'passthrough' and None transformers\nare filtered out.", + "docstring": "Generate (idx, (name, trans)) tuples from self.steps\n\nWhen filter_passthrough is True, 'passthrough' and None transformers\nare filtered out.", + "code": " def _iter(self, with_final=True, filter_passthrough=True):\n \"\"\"\n Generate (idx, (name, trans)) tuples from self.steps\n\n When filter_passthrough is True, 'passthrough' and None transformers\n are filtered out.\n \"\"\"\n stop = len(self.steps)\n if not with_final:\n stop -= 1\n\n for idx, (name, trans) in enumerate(islice(self.steps, 0, stop)):\n if not filter_passthrough:\n yield idx, name, trans\n elif trans is not None and trans != 'passthrough':\n yield idx, name, trans" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_log_message", + "name": "_log_message", + "qname": "sklearn.pipeline.Pipeline._log_message", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_log_message/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline._log_message.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_log_message/step_idx", + "name": "step_idx", + "qname": "sklearn.pipeline.Pipeline._log_message.step_idx", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _log_message(self, step_idx):\n if not self.verbose:\n return None\n name, step = self.steps[step_idx]\n\n return '(step %d of %d) Processing %s' % (step_idx + 1,\n len(self.steps),\n name)" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_more_tags", + "name": "_more_tags", + "qname": "sklearn.pipeline.Pipeline._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_more_tags/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n # check if first estimator expects pairwise input\n return {'pairwise': _safe_tags(self.steps[0][1], \"pairwise\")}" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_pairwise@getter", + "name": "_pairwise", + "qname": "sklearn.pipeline.Pipeline._pairwise", + "decorators": [ + "deprecated('Attribute _pairwise was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_pairwise/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline._pairwise.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n # check if first estimator expects pairwise input\n return getattr(self.steps[0][1], '_pairwise', False)" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_sk_visual_block_", + "name": "_sk_visual_block_", + "qname": "sklearn.pipeline.Pipeline._sk_visual_block_", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_sk_visual_block_/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline._sk_visual_block_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _sk_visual_block_(self):\n _, estimators = zip(*self.steps)\n\n def _get_name(name, est):\n if est is None or est == 'passthrough':\n return f'{name}: passthrough'\n # Is an estimator\n return f'{name}: {est.__class__.__name__}'\n names = [_get_name(name, est) for name, est in self.steps]\n name_details = [str(est) for est in estimators]\n return _VisualBlock('serial', estimators,\n names=names,\n name_details=name_details,\n dash_wrapped=False)" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_transform", + "name": "_transform", + "qname": "sklearn.pipeline.Pipeline._transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_transform/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline._transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_transform/X", + "name": "X", + "qname": "sklearn.pipeline.Pipeline._transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _transform(self, X):\n Xt = X\n for _, _, transform in self._iter():\n Xt = transform.transform(Xt)\n return Xt" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_validate_steps", + "name": "_validate_steps", + "qname": "sklearn.pipeline.Pipeline._validate_steps", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/_validate_steps/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline._validate_steps.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _validate_steps(self):\n names, estimators = zip(*self.steps)\n\n # validate names\n self._validate_names(names)\n\n # validate estimators\n transformers = estimators[:-1]\n estimator = estimators[-1]\n\n for t in transformers:\n if t is None or t == 'passthrough':\n continue\n if (not (hasattr(t, \"fit\") or hasattr(t, \"fit_transform\")) or not\n hasattr(t, \"transform\")):\n raise TypeError(\"All intermediate steps should be \"\n \"transformers and implement fit and transform \"\n \"or be the string 'passthrough' \"\n \"'%s' (type %s) doesn't\" % (t, type(t)))\n\n # We allow last estimator to be None as an identity transformation\n if (estimator is not None and estimator != 'passthrough'\n and not hasattr(estimator, \"fit\")):\n raise TypeError(\n \"Last step of Pipeline should implement fit \"\n \"or be the string 'passthrough'. \"\n \"'%s' (type %s) doesn't\" % (estimator, type(estimator)))" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/classes_@getter", + "name": "classes_", + "qname": "sklearn.pipeline.Pipeline.classes_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/classes_/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline.classes_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def classes_(self):\n return self.steps[-1][-1].classes_" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/decision_function", + "name": "decision_function", + "qname": "sklearn.pipeline.Pipeline.decision_function", + "decorators": ["if_delegate_has_method(delegate='_final_estimator')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/decision_function/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline.decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/decision_function/X", + "name": "X", + "qname": "sklearn.pipeline.Pipeline.decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable", + "default_value": "", + "description": "Data to predict on. Must fulfill input requirements of first step\nof the pipeline." + }, + "type": { + "kind": "NamedType", + "name": "iterable" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Apply transforms, and decision_function of the final estimator", + "docstring": "Apply transforms, and decision_function of the final estimator\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\nReturns\n-------\ny_score : array-like of shape (n_samples, n_classes)", + "code": " @if_delegate_has_method(delegate='_final_estimator')\n def decision_function(self, X):\n \"\"\"Apply transforms, and decision_function of the final estimator\n\n Parameters\n ----------\n X : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\n Returns\n -------\n y_score : array-like of shape (n_samples, n_classes)\n \"\"\"\n Xt = X\n for _, name, transform in self._iter(with_final=False):\n Xt = transform.transform(Xt)\n return self.steps[-1][-1].decision_function(Xt)" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/fit", + "name": "fit", + "qname": "sklearn.pipeline.Pipeline.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/fit/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/fit/X", + "name": "X", + "qname": "sklearn.pipeline.Pipeline.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable", + "default_value": "", + "description": "Training data. Must fulfill input requirements of first step of the\npipeline." + }, + "type": { + "kind": "NamedType", + "name": "iterable" + } + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/fit/y", + "name": "y", + "qname": "sklearn.pipeline.Pipeline.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable", + "default_value": "None", + "description": "Training targets. Must fulfill label requirements for all steps of\nthe pipeline." + }, + "type": { + "kind": "NamedType", + "name": "iterable" + } + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/fit/fit_params", + "name": "fit_params", + "qname": "sklearn.pipeline.Pipeline.fit.fit_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": true, + "docstring": { + "type": "dict of string -> object", + "default_value": "", + "description": "Parameters passed to the ``fit`` method of each step, where\neach parameter name is prefixed such that parameter ``p`` for step\n``s`` has key ``s__p``." + }, + "type": { + "kind": "NamedType", + "name": "dict of string -> object" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fit the model\n\nFit all the transforms one after the other and transform the\ndata, then fit the transformed data using the final estimator.", + "docstring": "Fit the model\n\nFit all the transforms one after the other and transform the\ndata, then fit the transformed data using the final estimator.\n\nParameters\n----------\nX : iterable\n Training data. Must fulfill input requirements of first step of the\n pipeline.\n\ny : iterable, default=None\n Training targets. Must fulfill label requirements for all steps of\n the pipeline.\n\n**fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of each step, where\n each parameter name is prefixed such that parameter ``p`` for step\n ``s`` has key ``s__p``.\n\nReturns\n-------\nself : Pipeline\n This estimator", + "code": " def fit(self, X, y=None, **fit_params):\n \"\"\"Fit the model\n\n Fit all the transforms one after the other and transform the\n data, then fit the transformed data using the final estimator.\n\n Parameters\n ----------\n X : iterable\n Training data. Must fulfill input requirements of first step of the\n pipeline.\n\n y : iterable, default=None\n Training targets. Must fulfill label requirements for all steps of\n the pipeline.\n\n **fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of each step, where\n each parameter name is prefixed such that parameter ``p`` for step\n ``s`` has key ``s__p``.\n\n Returns\n -------\n self : Pipeline\n This estimator\n \"\"\"\n fit_params_steps = self._check_fit_params(**fit_params)\n Xt = self._fit(X, y, **fit_params_steps)\n with _print_elapsed_time('Pipeline',\n self._log_message(len(self.steps) - 1)):\n if self._final_estimator != 'passthrough':\n fit_params_last_step = fit_params_steps[self.steps[-1][0]]\n self._final_estimator.fit(Xt, y, **fit_params_last_step)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/fit_predict", + "name": "fit_predict", + "qname": "sklearn.pipeline.Pipeline.fit_predict", + "decorators": ["if_delegate_has_method(delegate='_final_estimator')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/fit_predict/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline.fit_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/fit_predict/X", + "name": "X", + "qname": "sklearn.pipeline.Pipeline.fit_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable", + "default_value": "", + "description": "Training data. Must fulfill input requirements of first step of\nthe pipeline." + }, + "type": { + "kind": "NamedType", + "name": "iterable" + } + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/fit_predict/y", + "name": "y", + "qname": "sklearn.pipeline.Pipeline.fit_predict.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable", + "default_value": "None", + "description": "Training targets. Must fulfill label requirements for all steps\nof the pipeline." + }, + "type": { + "kind": "NamedType", + "name": "iterable" + } + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/fit_predict/fit_params", + "name": "fit_params", + "qname": "sklearn.pipeline.Pipeline.fit_predict.fit_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": true, + "docstring": { + "type": "dict of string -> object", + "default_value": "", + "description": "Parameters passed to the ``fit`` method of each step, where\neach parameter name is prefixed such that parameter ``p`` for step\n``s`` has key ``s__p``." + }, + "type": { + "kind": "NamedType", + "name": "dict of string -> object" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Applies fit_predict of last step in pipeline after transforms.\n\nApplies fit_transforms of a pipeline to the data, followed by the\nfit_predict method of the final estimator in the pipeline. Valid\nonly if the final estimator implements fit_predict.", + "docstring": "Applies fit_predict of last step in pipeline after transforms.\n\nApplies fit_transforms of a pipeline to the data, followed by the\nfit_predict method of the final estimator in the pipeline. Valid\nonly if the final estimator implements fit_predict.\n\nParameters\n----------\nX : iterable\n Training data. Must fulfill input requirements of first step of\n the pipeline.\n\ny : iterable, default=None\n Training targets. Must fulfill label requirements for all steps\n of the pipeline.\n\n**fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of each step, where\n each parameter name is prefixed such that parameter ``p`` for step\n ``s`` has key ``s__p``.\n\nReturns\n-------\ny_pred : array-like", + "code": " @if_delegate_has_method(delegate='_final_estimator')\n def fit_predict(self, X, y=None, **fit_params):\n \"\"\"Applies fit_predict of last step in pipeline after transforms.\n\n Applies fit_transforms of a pipeline to the data, followed by the\n fit_predict method of the final estimator in the pipeline. Valid\n only if the final estimator implements fit_predict.\n\n Parameters\n ----------\n X : iterable\n Training data. Must fulfill input requirements of first step of\n the pipeline.\n\n y : iterable, default=None\n Training targets. Must fulfill label requirements for all steps\n of the pipeline.\n\n **fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of each step, where\n each parameter name is prefixed such that parameter ``p`` for step\n ``s`` has key ``s__p``.\n\n Returns\n -------\n y_pred : array-like\n \"\"\"\n fit_params_steps = self._check_fit_params(**fit_params)\n Xt = self._fit(X, y, **fit_params_steps)\n\n fit_params_last_step = fit_params_steps[self.steps[-1][0]]\n with _print_elapsed_time('Pipeline',\n self._log_message(len(self.steps) - 1)):\n y_pred = self.steps[-1][-1].fit_predict(Xt, y,\n **fit_params_last_step)\n return y_pred" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/fit_transform", + "name": "fit_transform", + "qname": "sklearn.pipeline.Pipeline.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/fit_transform/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/fit_transform/X", + "name": "X", + "qname": "sklearn.pipeline.Pipeline.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable", + "default_value": "", + "description": "Training data. Must fulfill input requirements of first step of the\npipeline." + }, + "type": { + "kind": "NamedType", + "name": "iterable" + } + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/fit_transform/y", + "name": "y", + "qname": "sklearn.pipeline.Pipeline.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable", + "default_value": "None", + "description": "Training targets. Must fulfill label requirements for all steps of\nthe pipeline." + }, + "type": { + "kind": "NamedType", + "name": "iterable" + } + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/fit_transform/fit_params", + "name": "fit_params", + "qname": "sklearn.pipeline.Pipeline.fit_transform.fit_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": true, + "docstring": { + "type": "dict of string -> object", + "default_value": "", + "description": "Parameters passed to the ``fit`` method of each step, where\neach parameter name is prefixed such that parameter ``p`` for step\n``s`` has key ``s__p``." + }, + "type": { + "kind": "NamedType", + "name": "dict of string -> object" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fit the model and transform with the final estimator\n\nFits all the transforms one after the other and transforms the\ndata, then uses fit_transform on transformed data with the final\nestimator.", + "docstring": "Fit the model and transform with the final estimator\n\nFits all the transforms one after the other and transforms the\ndata, then uses fit_transform on transformed data with the final\nestimator.\n\nParameters\n----------\nX : iterable\n Training data. Must fulfill input requirements of first step of the\n pipeline.\n\ny : iterable, default=None\n Training targets. Must fulfill label requirements for all steps of\n the pipeline.\n\n**fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of each step, where\n each parameter name is prefixed such that parameter ``p`` for step\n ``s`` has key ``s__p``.\n\nReturns\n-------\nXt : array-like of shape (n_samples, n_transformed_features)\n Transformed samples", + "code": " def fit_transform(self, X, y=None, **fit_params):\n \"\"\"Fit the model and transform with the final estimator\n\n Fits all the transforms one after the other and transforms the\n data, then uses fit_transform on transformed data with the final\n estimator.\n\n Parameters\n ----------\n X : iterable\n Training data. Must fulfill input requirements of first step of the\n pipeline.\n\n y : iterable, default=None\n Training targets. Must fulfill label requirements for all steps of\n the pipeline.\n\n **fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of each step, where\n each parameter name is prefixed such that parameter ``p`` for step\n ``s`` has key ``s__p``.\n\n Returns\n -------\n Xt : array-like of shape (n_samples, n_transformed_features)\n Transformed samples\n \"\"\"\n fit_params_steps = self._check_fit_params(**fit_params)\n Xt = self._fit(X, y, **fit_params_steps)\n\n last_step = self._final_estimator\n with _print_elapsed_time('Pipeline',\n self._log_message(len(self.steps) - 1)):\n if last_step == 'passthrough':\n return Xt\n fit_params_last_step = fit_params_steps[self.steps[-1][0]]\n if hasattr(last_step, 'fit_transform'):\n return last_step.fit_transform(Xt, y, **fit_params_last_step)\n else:\n return last_step.fit(Xt, y,\n **fit_params_last_step).transform(Xt)" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/get_params", + "name": "get_params", + "qname": "sklearn.pipeline.Pipeline.get_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/get_params/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline.get_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/get_params/deep", + "name": "deep", + "qname": "sklearn.pipeline.Pipeline.get_params.deep", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, will return the parameters for this estimator and\ncontained subobjects that are estimators." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Get parameters for this estimator.\n\nReturns the parameters given in the constructor as well as the\nestimators contained within the `steps` of the `Pipeline`.", + "docstring": "Get parameters for this estimator.\n\nReturns the parameters given in the constructor as well as the\nestimators contained within the `steps` of the `Pipeline`.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : mapping of string to any\n Parameter names mapped to their values.", + "code": " def get_params(self, deep=True):\n \"\"\"Get parameters for this estimator.\n\n Returns the parameters given in the constructor as well as the\n estimators contained within the `steps` of the `Pipeline`.\n\n Parameters\n ----------\n deep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\n Returns\n -------\n params : mapping of string to any\n Parameter names mapped to their values.\n \"\"\"\n return self._get_params('steps', deep=deep)" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/inverse_transform@getter", + "name": "inverse_transform", + "qname": "sklearn.pipeline.Pipeline.inverse_transform", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/inverse_transform/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Apply inverse transformations in reverse order\n\nAll estimators in the pipeline must support ``inverse_transform``.", + "docstring": "Apply inverse transformations in reverse order\n\nAll estimators in the pipeline must support ``inverse_transform``.\n\nParameters\n----------\nXt : array-like of shape (n_samples, n_transformed_features)\n Data samples, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features. Must fulfill\n input requirements of last step of pipeline's\n ``inverse_transform`` method.\n\nReturns\n-------\nXt : array-like of shape (n_samples, n_features)", + "code": " @property\n def inverse_transform(self):\n \"\"\"Apply inverse transformations in reverse order\n\n All estimators in the pipeline must support ``inverse_transform``.\n\n Parameters\n ----------\n Xt : array-like of shape (n_samples, n_transformed_features)\n Data samples, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features. Must fulfill\n input requirements of last step of pipeline's\n ``inverse_transform`` method.\n\n Returns\n -------\n Xt : array-like of shape (n_samples, n_features)\n \"\"\"\n # raise AttributeError if necessary for hasattr behaviour\n # XXX: Handling the None case means we can't use if_delegate_has_method\n for _, _, transform in self._iter():\n transform.inverse_transform\n return self._inverse_transform" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/n_features_in_@getter", + "name": "n_features_in_", + "qname": "sklearn.pipeline.Pipeline.n_features_in_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/n_features_in_/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline.n_features_in_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def n_features_in_(self):\n # delegate to first step (which will call _check_is_fitted)\n return self.steps[0][1].n_features_in_" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/named_steps@getter", + "name": "named_steps", + "qname": "sklearn.pipeline.Pipeline.named_steps", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/named_steps/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline.named_steps.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def named_steps(self):\n # Use Bunch object to improve autocomplete\n return Bunch(**dict(self.steps))" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/predict", + "name": "predict", + "qname": "sklearn.pipeline.Pipeline.predict", + "decorators": ["if_delegate_has_method(delegate='_final_estimator')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/predict/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/predict/X", + "name": "X", + "qname": "sklearn.pipeline.Pipeline.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable", + "default_value": "", + "description": "Data to predict on. Must fulfill input requirements of first step\nof the pipeline." + }, + "type": { + "kind": "NamedType", + "name": "iterable" + } + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/predict/predict_params", + "name": "predict_params", + "qname": "sklearn.pipeline.Pipeline.predict.predict_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": true, + "docstring": { + "type": "dict of string -> object", + "default_value": "", + "description": "Parameters to the ``predict`` called at the end of all\ntransformations in the pipeline. Note that while this may be\nused to return uncertainties from some models with return_std\nor return_cov, uncertainties that are generated by the\ntransformations in the pipeline are not propagated to the\nfinal estimator.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "dict of string -> object" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Apply transforms to the data, and predict with the final estimator", + "docstring": "Apply transforms to the data, and predict with the final estimator\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\n**predict_params : dict of string -> object\n Parameters to the ``predict`` called at the end of all\n transformations in the pipeline. Note that while this may be\n used to return uncertainties from some models with return_std\n or return_cov, uncertainties that are generated by the\n transformations in the pipeline are not propagated to the\n final estimator.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ny_pred : array-like", + "code": " @if_delegate_has_method(delegate='_final_estimator')\n def predict(self, X, **predict_params):\n \"\"\"Apply transforms to the data, and predict with the final estimator\n\n Parameters\n ----------\n X : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\n **predict_params : dict of string -> object\n Parameters to the ``predict`` called at the end of all\n transformations in the pipeline. Note that while this may be\n used to return uncertainties from some models with return_std\n or return_cov, uncertainties that are generated by the\n transformations in the pipeline are not propagated to the\n final estimator.\n\n .. versionadded:: 0.20\n\n Returns\n -------\n y_pred : array-like\n \"\"\"\n Xt = X\n for _, name, transform in self._iter(with_final=False):\n Xt = transform.transform(Xt)\n return self.steps[-1][-1].predict(Xt, **predict_params)" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/predict_log_proba", + "name": "predict_log_proba", + "qname": "sklearn.pipeline.Pipeline.predict_log_proba", + "decorators": ["if_delegate_has_method(delegate='_final_estimator')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/predict_log_proba/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline.predict_log_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/predict_log_proba/X", + "name": "X", + "qname": "sklearn.pipeline.Pipeline.predict_log_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable", + "default_value": "", + "description": "Data to predict on. Must fulfill input requirements of first step\nof the pipeline." + }, + "type": { + "kind": "NamedType", + "name": "iterable" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Apply transforms, and predict_log_proba of the final estimator", + "docstring": "Apply transforms, and predict_log_proba of the final estimator\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\nReturns\n-------\ny_score : array-like of shape (n_samples, n_classes)", + "code": " @if_delegate_has_method(delegate='_final_estimator')\n def predict_log_proba(self, X):\n \"\"\"Apply transforms, and predict_log_proba of the final estimator\n\n Parameters\n ----------\n X : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\n Returns\n -------\n y_score : array-like of shape (n_samples, n_classes)\n \"\"\"\n Xt = X\n for _, name, transform in self._iter(with_final=False):\n Xt = transform.transform(Xt)\n return self.steps[-1][-1].predict_log_proba(Xt)" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/predict_proba", + "name": "predict_proba", + "qname": "sklearn.pipeline.Pipeline.predict_proba", + "decorators": ["if_delegate_has_method(delegate='_final_estimator')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/predict_proba/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/predict_proba/X", + "name": "X", + "qname": "sklearn.pipeline.Pipeline.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable", + "default_value": "", + "description": "Data to predict on. Must fulfill input requirements of first step\nof the pipeline." + }, + "type": { + "kind": "NamedType", + "name": "iterable" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Apply transforms, and predict_proba of the final estimator", + "docstring": "Apply transforms, and predict_proba of the final estimator\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\nReturns\n-------\ny_proba : array-like of shape (n_samples, n_classes)", + "code": " @if_delegate_has_method(delegate='_final_estimator')\n def predict_proba(self, X):\n \"\"\"Apply transforms, and predict_proba of the final estimator\n\n Parameters\n ----------\n X : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\n Returns\n -------\n y_proba : array-like of shape (n_samples, n_classes)\n \"\"\"\n Xt = X\n for _, name, transform in self._iter(with_final=False):\n Xt = transform.transform(Xt)\n return self.steps[-1][-1].predict_proba(Xt)" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/score", + "name": "score", + "qname": "sklearn.pipeline.Pipeline.score", + "decorators": ["if_delegate_has_method(delegate='_final_estimator')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/score/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline.score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/score/X", + "name": "X", + "qname": "sklearn.pipeline.Pipeline.score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable", + "default_value": "", + "description": "Data to predict on. Must fulfill input requirements of first step\nof the pipeline." + }, + "type": { + "kind": "NamedType", + "name": "iterable" + } + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/score/y", + "name": "y", + "qname": "sklearn.pipeline.Pipeline.score.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable", + "default_value": "None", + "description": "Targets used for scoring. Must fulfill label requirements for all\nsteps of the pipeline." + }, + "type": { + "kind": "NamedType", + "name": "iterable" + } + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/score/sample_weight", + "name": "sample_weight", + "qname": "sklearn.pipeline.Pipeline.score.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like", + "default_value": "None", + "description": "If not None, this argument is passed as ``sample_weight`` keyword\nargument to the ``score`` method of the final estimator." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Apply transforms, and score with the final estimator", + "docstring": "Apply transforms, and score with the final estimator\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\ny : iterable, default=None\n Targets used for scoring. Must fulfill label requirements for all\n steps of the pipeline.\n\nsample_weight : array-like, default=None\n If not None, this argument is passed as ``sample_weight`` keyword\n argument to the ``score`` method of the final estimator.\n\nReturns\n-------\nscore : float", + "code": " @if_delegate_has_method(delegate='_final_estimator')\n def score(self, X, y=None, sample_weight=None):\n \"\"\"Apply transforms, and score with the final estimator\n\n Parameters\n ----------\n X : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\n y : iterable, default=None\n Targets used for scoring. Must fulfill label requirements for all\n steps of the pipeline.\n\n sample_weight : array-like, default=None\n If not None, this argument is passed as ``sample_weight`` keyword\n argument to the ``score`` method of the final estimator.\n\n Returns\n -------\n score : float\n \"\"\"\n Xt = X\n for _, name, transform in self._iter(with_final=False):\n Xt = transform.transform(Xt)\n score_params = {}\n if sample_weight is not None:\n score_params['sample_weight'] = sample_weight\n return self.steps[-1][-1].score(Xt, y, **score_params)" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/score_samples", + "name": "score_samples", + "qname": "sklearn.pipeline.Pipeline.score_samples", + "decorators": ["if_delegate_has_method(delegate='_final_estimator')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/score_samples/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline.score_samples.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/score_samples/X", + "name": "X", + "qname": "sklearn.pipeline.Pipeline.score_samples.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable", + "default_value": "", + "description": "Data to predict on. Must fulfill input requirements of first step\nof the pipeline." + }, + "type": { + "kind": "NamedType", + "name": "iterable" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Apply transforms, and score_samples of the final estimator.", + "docstring": "Apply transforms, and score_samples of the final estimator.\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\nReturns\n-------\ny_score : ndarray of shape (n_samples,)", + "code": " @if_delegate_has_method(delegate='_final_estimator')\n def score_samples(self, X):\n \"\"\"Apply transforms, and score_samples of the final estimator.\n\n Parameters\n ----------\n X : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\n Returns\n -------\n y_score : ndarray of shape (n_samples,)\n \"\"\"\n Xt = X\n for _, _, transformer in self._iter(with_final=False):\n Xt = transformer.transform(Xt)\n return self.steps[-1][-1].score_samples(Xt)" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/set_params", + "name": "set_params", + "qname": "sklearn.pipeline.Pipeline.set_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/set_params/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline.set_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/set_params/kwargs", + "name": "kwargs", + "qname": "sklearn.pipeline.Pipeline.set_params.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Set the parameters of this estimator.\n\nValid parameter keys can be listed with ``get_params()``. Note that\nyou can directly set the parameters of the estimators contained in\n`steps`.", + "docstring": "Set the parameters of this estimator.\n\nValid parameter keys can be listed with ``get_params()``. Note that\nyou can directly set the parameters of the estimators contained in\n`steps`.\n\nReturns\n-------\nself", + "code": " def set_params(self, **kwargs):\n \"\"\"Set the parameters of this estimator.\n\n Valid parameter keys can be listed with ``get_params()``. Note that\n you can directly set the parameters of the estimators contained in\n `steps`.\n\n Returns\n -------\n self\n \"\"\"\n self._set_params('steps', **kwargs)\n return self" + }, + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/transform@getter", + "name": "transform", + "qname": "sklearn.pipeline.Pipeline.transform", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/Pipeline/transform/self", + "name": "self", + "qname": "sklearn.pipeline.Pipeline.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Apply transforms, and transform with the final estimator\n\nThis also works where final estimator is ``None``: all prior\ntransformations are applied.", + "docstring": "Apply transforms, and transform with the final estimator\n\nThis also works where final estimator is ``None``: all prior\ntransformations are applied.\n\nParameters\n----------\nX : iterable\n Data to transform. Must fulfill input requirements of first step\n of the pipeline.\n\nReturns\n-------\nXt : array-like of shape (n_samples, n_transformed_features)", + "code": " @property\n def transform(self):\n \"\"\"Apply transforms, and transform with the final estimator\n\n This also works where final estimator is ``None``: all prior\n transformations are applied.\n\n Parameters\n ----------\n X : iterable\n Data to transform. Must fulfill input requirements of first step\n of the pipeline.\n\n Returns\n -------\n Xt : array-like of shape (n_samples, n_transformed_features)\n \"\"\"\n # _final_estimator is None or has transform, otherwise attribute error\n # XXX: Handling the None case means we can't use if_delegate_has_method\n if self._final_estimator != 'passthrough':\n self._final_estimator.transform\n return self._transform" + }, + { + "id": "scikit-learn/sklearn.pipeline/_fit_one", + "name": "_fit_one", + "qname": "sklearn.pipeline._fit_one", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/_fit_one/transformer", + "name": "transformer", + "qname": "sklearn.pipeline._fit_one.transformer", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/_fit_one/X", + "name": "X", + "qname": "sklearn.pipeline._fit_one.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/_fit_one/y", + "name": "y", + "qname": "sklearn.pipeline._fit_one.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/_fit_one/weight", + "name": "weight", + "qname": "sklearn.pipeline._fit_one.weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/_fit_one/message_clsname", + "name": "message_clsname", + "qname": "sklearn.pipeline._fit_one.message_clsname", + "default_value": "''", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/_fit_one/message", + "name": "message", + "qname": "sklearn.pipeline._fit_one.message", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/_fit_one/fit_params", + "name": "fit_params", + "qname": "sklearn.pipeline._fit_one.fit_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fits ``transformer`` to ``X`` and ``y``.", + "docstring": "Fits ``transformer`` to ``X`` and ``y``.", + "code": "def _fit_one(transformer,\n X,\n y,\n weight,\n message_clsname='',\n message=None,\n **fit_params):\n \"\"\"\n Fits ``transformer`` to ``X`` and ``y``.\n \"\"\"\n with _print_elapsed_time(message_clsname, message):\n return transformer.fit(X, y, **fit_params)" + }, + { + "id": "scikit-learn/sklearn.pipeline/_fit_transform_one", + "name": "_fit_transform_one", + "qname": "sklearn.pipeline._fit_transform_one", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/_fit_transform_one/transformer", + "name": "transformer", + "qname": "sklearn.pipeline._fit_transform_one.transformer", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/_fit_transform_one/X", + "name": "X", + "qname": "sklearn.pipeline._fit_transform_one.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/_fit_transform_one/y", + "name": "y", + "qname": "sklearn.pipeline._fit_transform_one.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/_fit_transform_one/weight", + "name": "weight", + "qname": "sklearn.pipeline._fit_transform_one.weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/_fit_transform_one/message_clsname", + "name": "message_clsname", + "qname": "sklearn.pipeline._fit_transform_one.message_clsname", + "default_value": "''", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/_fit_transform_one/message", + "name": "message", + "qname": "sklearn.pipeline._fit_transform_one.message", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/_fit_transform_one/fit_params", + "name": "fit_params", + "qname": "sklearn.pipeline._fit_transform_one.fit_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fits ``transformer`` to ``X`` and ``y``. The transformed result is returned\nwith the fitted transformer. If ``weight`` is not ``None``, the result will\nbe multiplied by ``weight``.", + "docstring": "Fits ``transformer`` to ``X`` and ``y``. The transformed result is returned\nwith the fitted transformer. If ``weight`` is not ``None``, the result will\nbe multiplied by ``weight``.", + "code": "def _fit_transform_one(transformer,\n X,\n y,\n weight,\n message_clsname='',\n message=None,\n **fit_params):\n \"\"\"\n Fits ``transformer`` to ``X`` and ``y``. The transformed result is returned\n with the fitted transformer. If ``weight`` is not ``None``, the result will\n be multiplied by ``weight``.\n \"\"\"\n with _print_elapsed_time(message_clsname, message):\n if hasattr(transformer, 'fit_transform'):\n res = transformer.fit_transform(X, y, **fit_params)\n else:\n res = transformer.fit(X, y, **fit_params).transform(X)\n\n if weight is None:\n return res, transformer\n return res * weight, transformer" + }, + { + "id": "scikit-learn/sklearn.pipeline/_name_estimators", + "name": "_name_estimators", + "qname": "sklearn.pipeline._name_estimators", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/_name_estimators/estimators", + "name": "estimators", + "qname": "sklearn.pipeline._name_estimators.estimators", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate names for estimators.", + "docstring": "Generate names for estimators.", + "code": "def _name_estimators(estimators):\n \"\"\"Generate names for estimators.\"\"\"\n\n names = [\n estimator\n if isinstance(estimator, str) else type(estimator).__name__.lower()\n for estimator in estimators\n ]\n namecount = defaultdict(int)\n for est, name in zip(estimators, names):\n namecount[name] += 1\n\n for k, v in list(namecount.items()):\n if v == 1:\n del namecount[k]\n\n for i in reversed(range(len(estimators))):\n name = names[i]\n if name in namecount:\n names[i] += \"-%d\" % namecount[name]\n namecount[name] -= 1\n\n return list(zip(names, estimators))" + }, + { + "id": "scikit-learn/sklearn.pipeline/_transform_one", + "name": "_transform_one", + "qname": "sklearn.pipeline._transform_one", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/_transform_one/transformer", + "name": "transformer", + "qname": "sklearn.pipeline._transform_one.transformer", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/_transform_one/X", + "name": "X", + "qname": "sklearn.pipeline._transform_one.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/_transform_one/y", + "name": "y", + "qname": "sklearn.pipeline._transform_one.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/_transform_one/weight", + "name": "weight", + "qname": "sklearn.pipeline._transform_one.weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.pipeline/_transform_one/fit_params", + "name": "fit_params", + "qname": "sklearn.pipeline._transform_one.fit_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _transform_one(transformer, X, y, weight, **fit_params):\n res = transformer.transform(X)\n # if we have a weight for this transformer, multiply output\n if weight is None:\n return res\n return res * weight" + }, + { + "id": "scikit-learn/sklearn.pipeline/make_pipeline", + "name": "make_pipeline", + "qname": "sklearn.pipeline.make_pipeline", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/make_pipeline/steps", + "name": "steps", + "qname": "sklearn.pipeline.make_pipeline.steps", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": true, + "docstring": { + "type": "list of estimators.", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "list of estimators." + } + }, + { + "id": "scikit-learn/sklearn.pipeline/make_pipeline/memory", + "name": "memory", + "qname": "sklearn.pipeline.make_pipeline.memory", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "str or object with the joblib.Memory interface", + "default_value": "None", + "description": "Used to cache the fitted transformers of the pipeline. By default,\nno caching is performed. If a string is given, it is the path to\nthe caching directory. Enabling caching triggers a clone of\nthe transformers before fitting. Therefore, the transformer\ninstance given to the pipeline cannot be inspected\ndirectly. Use the attribute ``named_steps`` or ``steps`` to\ninspect estimators within the pipeline. Caching the\ntransformers is advantageous when fitting is time consuming." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "object with the joblib.Memory interface" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.pipeline/make_pipeline/verbose", + "name": "verbose", + "qname": "sklearn.pipeline.make_pipeline.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the time elapsed while fitting each step will be printed as it\nis completed." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Construct a Pipeline from the given estimators.\n\nThis is a shorthand for the Pipeline constructor; it does not require, and\ndoes not permit, naming the estimators. Instead, their names will be set\nto the lowercase of their types automatically.", + "docstring": "Construct a Pipeline from the given estimators.\n\nThis is a shorthand for the Pipeline constructor; it does not require, and\ndoes not permit, naming the estimators. Instead, their names will be set\nto the lowercase of their types automatically.\n\nParameters\n----------\n*steps : list of estimators.\n\nmemory : str or object with the joblib.Memory interface, default=None\n Used to cache the fitted transformers of the pipeline. By default,\n no caching is performed. If a string is given, it is the path to\n the caching directory. Enabling caching triggers a clone of\n the transformers before fitting. Therefore, the transformer\n instance given to the pipeline cannot be inspected\n directly. Use the attribute ``named_steps`` or ``steps`` to\n inspect estimators within the pipeline. Caching the\n transformers is advantageous when fitting is time consuming.\n\nverbose : bool, default=False\n If True, the time elapsed while fitting each step will be printed as it\n is completed.\n\nSee Also\n--------\nPipeline : Class for creating a pipeline of transforms with a final\n estimator.\n\nExamples\n--------\n>>> from sklearn.naive_bayes import GaussianNB\n>>> from sklearn.preprocessing import StandardScaler\n>>> make_pipeline(StandardScaler(), GaussianNB(priors=None))\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('gaussiannb', GaussianNB())])\n\nReturns\n-------\np : Pipeline", + "code": "def make_pipeline(*steps, memory=None, verbose=False):\n \"\"\"Construct a Pipeline from the given estimators.\n\n This is a shorthand for the Pipeline constructor; it does not require, and\n does not permit, naming the estimators. Instead, their names will be set\n to the lowercase of their types automatically.\n\n Parameters\n ----------\n *steps : list of estimators.\n\n memory : str or object with the joblib.Memory interface, default=None\n Used to cache the fitted transformers of the pipeline. By default,\n no caching is performed. If a string is given, it is the path to\n the caching directory. Enabling caching triggers a clone of\n the transformers before fitting. Therefore, the transformer\n instance given to the pipeline cannot be inspected\n directly. Use the attribute ``named_steps`` or ``steps`` to\n inspect estimators within the pipeline. Caching the\n transformers is advantageous when fitting is time consuming.\n\n verbose : bool, default=False\n If True, the time elapsed while fitting each step will be printed as it\n is completed.\n\n See Also\n --------\n Pipeline : Class for creating a pipeline of transforms with a final\n estimator.\n\n Examples\n --------\n >>> from sklearn.naive_bayes import GaussianNB\n >>> from sklearn.preprocessing import StandardScaler\n >>> make_pipeline(StandardScaler(), GaussianNB(priors=None))\n Pipeline(steps=[('standardscaler', StandardScaler()),\n ('gaussiannb', GaussianNB())])\n\n Returns\n -------\n p : Pipeline\n \"\"\"\n return Pipeline(_name_estimators(steps), memory=memory, verbose=verbose)" + }, + { + "id": "scikit-learn/sklearn.pipeline/make_union", + "name": "make_union", + "qname": "sklearn.pipeline.make_union", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.pipeline/make_union/transformers", + "name": "transformers", + "qname": "sklearn.pipeline.make_union.transformers", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": true, + "docstring": { + "type": "list of estimators", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "list of estimators" + } + }, + { + "id": "scikit-learn/sklearn.pipeline/make_union/n_jobs", + "name": "n_jobs", + "qname": "sklearn.pipeline.make_union.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of jobs to run in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details.\n\n.. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.pipeline/make_union/verbose", + "name": "verbose", + "qname": "sklearn.pipeline.make_union.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the time elapsed while fitting each transformer will be\nprinted as it is completed." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Construct a FeatureUnion from the given transformers.\n\nThis is a shorthand for the FeatureUnion constructor; it does not require,\nand does not permit, naming the transformers. Instead, they will be given\nnames automatically based on their types. It also does not allow weighting.", + "docstring": "Construct a FeatureUnion from the given transformers.\n\nThis is a shorthand for the FeatureUnion constructor; it does not require,\nand does not permit, naming the transformers. Instead, they will be given\nnames automatically based on their types. It also does not allow weighting.\n\nParameters\n----------\n*transformers : list of estimators\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\nverbose : bool, default=False\n If True, the time elapsed while fitting each transformer will be\n printed as it is completed.\n\nReturns\n-------\nf : FeatureUnion\n\nSee Also\n--------\nFeatureUnion : Class for concatenating the results of multiple transformer\n objects.\n\nExamples\n--------\n>>> from sklearn.decomposition import PCA, TruncatedSVD\n>>> from sklearn.pipeline import make_union\n>>> make_union(PCA(), TruncatedSVD())\n FeatureUnion(transformer_list=[('pca', PCA()),\n ('truncatedsvd', TruncatedSVD())])", + "code": "def make_union(*transformers, n_jobs=None, verbose=False):\n \"\"\"\n Construct a FeatureUnion from the given transformers.\n\n This is a shorthand for the FeatureUnion constructor; it does not require,\n and does not permit, naming the transformers. Instead, they will be given\n names automatically based on their types. It also does not allow weighting.\n\n Parameters\n ----------\n *transformers : list of estimators\n\n n_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\n verbose : bool, default=False\n If True, the time elapsed while fitting each transformer will be\n printed as it is completed.\n\n Returns\n -------\n f : FeatureUnion\n\n See Also\n --------\n FeatureUnion : Class for concatenating the results of multiple transformer\n objects.\n\n Examples\n --------\n >>> from sklearn.decomposition import PCA, TruncatedSVD\n >>> from sklearn.pipeline import make_union\n >>> make_union(PCA(), TruncatedSVD())\n FeatureUnion(transformer_list=[('pca', PCA()),\n ('truncatedsvd', TruncatedSVD())])\n \"\"\"\n return FeatureUnion(\n _name_estimators(transformers), n_jobs=n_jobs, verbose=verbose)" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/Binarizer/__init__", + "name": "__init__", + "qname": "sklearn.preprocessing._data.Binarizer.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/Binarizer/__init__/self", + "name": "self", + "qname": "sklearn.preprocessing._data.Binarizer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/Binarizer/__init__/threshold", + "name": "threshold", + "qname": "sklearn.preprocessing._data.Binarizer.__init__.threshold", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "Feature values below or equal to this are replaced by 0, above it by 1.\nThreshold may not be less than 0 for operations on sparse matrices." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/Binarizer/__init__/copy", + "name": "copy", + "qname": "sklearn.preprocessing._data.Binarizer.__init__.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "set to False to perform inplace binarization and avoid a copy (if\nthe input is already a numpy array or a scipy.sparse CSR matrix)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Binarize data (set feature values to 0 or 1) according to a threshold.\n\nValues greater than the threshold map to 1, while values less than\nor equal to the threshold map to 0. With the default threshold of 0,\nonly positive values map to 1.\n\nBinarization is a common operation on text count data where the\nanalyst can decide to only consider the presence or absence of a\nfeature rather than a quantified number of occurrences for instance.\n\nIt can also be used as a pre-processing step for estimators that\nconsider boolean random variables (e.g. modelled using the Bernoulli\ndistribution in a Bayesian setting).\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, threshold=0.0, copy=True):\n self.threshold = threshold\n self.copy = copy" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/Binarizer/_more_tags", + "name": "_more_tags", + "qname": "sklearn.preprocessing._data.Binarizer._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/Binarizer/_more_tags/self", + "name": "self", + "qname": "sklearn.preprocessing._data.Binarizer._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'stateless': True}" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/Binarizer/fit", + "name": "fit", + "qname": "sklearn.preprocessing._data.Binarizer.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/Binarizer/fit/self", + "name": "self", + "qname": "sklearn.preprocessing._data.Binarizer.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/Binarizer/fit/X", + "name": "X", + "qname": "sklearn.preprocessing._data.Binarizer.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/Binarizer/fit/y", + "name": "y", + "qname": "sklearn.preprocessing._data.Binarizer.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None", + "default_value": "", + "description": "Ignored." + }, + "type": { + "kind": "NamedType", + "name": "None" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Do nothing and return the estimator unchanged.\n\nThis method is just there to implement the usual API and hence\nwork in pipelines.", + "docstring": "Do nothing and return the estimator unchanged.\n\nThis method is just there to implement the usual API and hence\nwork in pipelines.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted transformer.", + "code": " def fit(self, X, y=None):\n \"\"\"Do nothing and return the estimator unchanged.\n\n This method is just there to implement the usual API and hence\n work in pipelines.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data.\n\n y : None\n Ignored.\n\n Returns\n -------\n self : object\n Fitted transformer.\n \"\"\"\n self._validate_data(X, accept_sparse='csr')\n return self" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/Binarizer/transform", + "name": "transform", + "qname": "sklearn.preprocessing._data.Binarizer.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/Binarizer/transform/self", + "name": "self", + "qname": "sklearn.preprocessing._data.Binarizer.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/Binarizer/transform/X", + "name": "X", + "qname": "sklearn.preprocessing._data.Binarizer.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to binarize, element by element.\nscipy.sparse matrices should be in CSR format to avoid an\nun-necessary copy." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/Binarizer/transform/copy", + "name": "copy", + "qname": "sklearn.preprocessing._data.Binarizer.transform.copy", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "Copy the input X or not." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Binarize each element of X.", + "docstring": "Binarize each element of X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to binarize, element by element.\n scipy.sparse matrices should be in CSR format to avoid an\n un-necessary copy.\n\ncopy : bool\n Copy the input X or not.\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array.", + "code": " def transform(self, X, copy=None):\n \"\"\"Binarize each element of X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to binarize, element by element.\n scipy.sparse matrices should be in CSR format to avoid an\n un-necessary copy.\n\n copy : bool\n Copy the input X or not.\n\n Returns\n -------\n X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array.\n \"\"\"\n copy = copy if copy is not None else self.copy\n # TODO: This should be refactored because binarize also calls\n # check_array\n X = self._validate_data(X, accept_sparse=['csr', 'csc'], copy=copy,\n reset=False)\n return binarize(X, threshold=self.threshold, copy=False)" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/KernelCenterer/__init__", + "name": "__init__", + "qname": "sklearn.preprocessing._data.KernelCenterer.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/KernelCenterer/__init__/self", + "name": "self", + "qname": "sklearn.preprocessing._data.KernelCenterer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Center a kernel matrix.\n\nLet K(x, z) be a kernel defined by phi(x)^T phi(z), where phi is a\nfunction mapping x to a Hilbert space. KernelCenterer centers (i.e.,\nnormalize to have zero mean) the data without explicitly computing phi(x).\nIt is equivalent to centering phi(x) with\nsklearn.preprocessing.StandardScaler(with_std=False).\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " def __init__(self):\n # Needed for backported inspect.signature compatibility with PyPy\n pass" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/KernelCenterer/_more_tags", + "name": "_more_tags", + "qname": "sklearn.preprocessing._data.KernelCenterer._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/KernelCenterer/_more_tags/self", + "name": "self", + "qname": "sklearn.preprocessing._data.KernelCenterer._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'pairwise': True}" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/KernelCenterer/_pairwise@getter", + "name": "_pairwise", + "qname": "sklearn.preprocessing._data.KernelCenterer._pairwise", + "decorators": [ + "deprecated('Attribute _pairwise was deprecated in version 0.24 and will be removed in 1.1.')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/KernelCenterer/_pairwise/self", + "name": "self", + "qname": "sklearn.preprocessing._data.KernelCenterer._pairwise.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1.\")\n @property\n def _pairwise(self):\n return True" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/KernelCenterer/fit", + "name": "fit", + "qname": "sklearn.preprocessing._data.KernelCenterer.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/KernelCenterer/fit/self", + "name": "self", + "qname": "sklearn.preprocessing._data.KernelCenterer.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/KernelCenterer/fit/K", + "name": "K", + "qname": "sklearn.preprocessing._data.KernelCenterer.fit.K", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_samples)", + "default_value": "", + "description": "Kernel matrix." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/KernelCenterer/fit/y", + "name": "y", + "qname": "sklearn.preprocessing._data.KernelCenterer.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None", + "default_value": "", + "description": "Ignored." + }, + "type": { + "kind": "NamedType", + "name": "None" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit KernelCenterer", + "docstring": "Fit KernelCenterer\n\nParameters\n----------\nK : ndarray of shape (n_samples, n_samples)\n Kernel matrix.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted transformer.", + "code": " def fit(self, K, y=None):\n \"\"\"Fit KernelCenterer\n\n Parameters\n ----------\n K : ndarray of shape (n_samples, n_samples)\n Kernel matrix.\n\n y : None\n Ignored.\n\n Returns\n -------\n self : object\n Fitted transformer.\n \"\"\"\n\n K = self._validate_data(K, dtype=FLOAT_DTYPES)\n\n if K.shape[0] != K.shape[1]:\n raise ValueError(\"Kernel matrix must be a square matrix.\"\n \" Input is a {}x{} matrix.\"\n .format(K.shape[0], K.shape[1]))\n\n n_samples = K.shape[0]\n self.K_fit_rows_ = np.sum(K, axis=0) / n_samples\n self.K_fit_all_ = self.K_fit_rows_.sum() / n_samples\n return self" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/KernelCenterer/transform", + "name": "transform", + "qname": "sklearn.preprocessing._data.KernelCenterer.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/KernelCenterer/transform/self", + "name": "self", + "qname": "sklearn.preprocessing._data.KernelCenterer.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/KernelCenterer/transform/K", + "name": "K", + "qname": "sklearn.preprocessing._data.KernelCenterer.transform.K", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples1, n_samples2)", + "default_value": "", + "description": "Kernel matrix." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples1, n_samples2)" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/KernelCenterer/transform/copy", + "name": "copy", + "qname": "sklearn.preprocessing._data.KernelCenterer.transform.copy", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Set to False to perform inplace computation." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Center kernel matrix.", + "docstring": "Center kernel matrix.\n\nParameters\n----------\nK : ndarray of shape (n_samples1, n_samples2)\n Kernel matrix.\n\ncopy : bool, default=True\n Set to False to perform inplace computation.\n\nReturns\n-------\nK_new : ndarray of shape (n_samples1, n_samples2)", + "code": " def transform(self, K, copy=True):\n \"\"\"Center kernel matrix.\n\n Parameters\n ----------\n K : ndarray of shape (n_samples1, n_samples2)\n Kernel matrix.\n\n copy : bool, default=True\n Set to False to perform inplace computation.\n\n Returns\n -------\n K_new : ndarray of shape (n_samples1, n_samples2)\n \"\"\"\n check_is_fitted(self)\n\n K = self._validate_data(K, copy=copy, dtype=FLOAT_DTYPES, reset=False)\n\n K_pred_cols = (np.sum(K, axis=1) /\n self.K_fit_rows_.shape[0])[:, np.newaxis]\n\n K -= self.K_fit_rows_\n K -= K_pred_cols\n K += self.K_fit_all_\n\n return K" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/__init__", + "name": "__init__", + "qname": "sklearn.preprocessing._data.MaxAbsScaler.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/__init__/self", + "name": "self", + "qname": "sklearn.preprocessing._data.MaxAbsScaler.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/__init__/copy", + "name": "copy", + "qname": "sklearn.preprocessing._data.MaxAbsScaler.__init__.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Set to False to perform inplace scaling and avoid a copy (if the input\nis already a numpy array)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Scale each feature by its maximum absolute value.\n\nThis estimator scales and translates each feature individually such\nthat the maximal absolute value of each feature in the\ntraining set will be 1.0. It does not shift/center the data, and\nthus does not destroy any sparsity.\n\nThis scaler can also be applied to sparse CSR or CSC matrices.\n\n.. versionadded:: 0.17", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, copy=True):\n self.copy = copy" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/_more_tags", + "name": "_more_tags", + "qname": "sklearn.preprocessing._data.MaxAbsScaler._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/_more_tags/self", + "name": "self", + "qname": "sklearn.preprocessing._data.MaxAbsScaler._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'allow_nan': True}" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/_reset", + "name": "_reset", + "qname": "sklearn.preprocessing._data.MaxAbsScaler._reset", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/_reset/self", + "name": "self", + "qname": "sklearn.preprocessing._data.MaxAbsScaler._reset.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Reset internal data-dependent state of the scaler, if necessary.\n\n__init__ parameters are not touched.", + "docstring": "Reset internal data-dependent state of the scaler, if necessary.\n\n__init__ parameters are not touched.", + "code": " def _reset(self):\n \"\"\"Reset internal data-dependent state of the scaler, if necessary.\n\n __init__ parameters are not touched.\n \"\"\"\n\n # Checking one attribute is enough, becase they are all set together\n # in partial_fit\n if hasattr(self, 'scale_'):\n del self.scale_\n del self.n_samples_seen_\n del self.max_abs_" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/fit", + "name": "fit", + "qname": "sklearn.preprocessing._data.MaxAbsScaler.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/fit/self", + "name": "self", + "qname": "sklearn.preprocessing._data.MaxAbsScaler.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/fit/X", + "name": "X", + "qname": "sklearn.preprocessing._data.MaxAbsScaler.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data used to compute the per-feature minimum and maximum\nused for later scaling along the features axis." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/fit/y", + "name": "y", + "qname": "sklearn.preprocessing._data.MaxAbsScaler.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None", + "default_value": "", + "description": "Ignored." + }, + "type": { + "kind": "NamedType", + "name": "None" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the maximum absolute value to be used for later scaling.", + "docstring": "Compute the maximum absolute value to be used for later scaling.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the per-feature minimum and maximum\n used for later scaling along the features axis.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted scaler.", + "code": " def fit(self, X, y=None):\n \"\"\"Compute the maximum absolute value to be used for later scaling.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the per-feature minimum and maximum\n used for later scaling along the features axis.\n\n y : None\n Ignored.\n\n Returns\n -------\n self : object\n Fitted scaler.\n \"\"\"\n # Reset internal state before fitting\n self._reset()\n return self.partial_fit(X, y)" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.preprocessing._data.MaxAbsScaler.inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/inverse_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._data.MaxAbsScaler.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/inverse_transform/X", + "name": "X", + "qname": "sklearn.preprocessing._data.MaxAbsScaler.inverse_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data that should be transformed back." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Scale back the data to the original representation", + "docstring": "Scale back the data to the original representation\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data that should be transformed back.\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array.", + "code": " def inverse_transform(self, X):\n \"\"\"Scale back the data to the original representation\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data that should be transformed back.\n\n Returns\n -------\n X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,\n estimator=self, dtype=FLOAT_DTYPES,\n force_all_finite='allow-nan')\n\n if sparse.issparse(X):\n inplace_column_scale(X, self.scale_)\n else:\n X *= self.scale_\n return X" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/partial_fit", + "name": "partial_fit", + "qname": "sklearn.preprocessing._data.MaxAbsScaler.partial_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/partial_fit/self", + "name": "self", + "qname": "sklearn.preprocessing._data.MaxAbsScaler.partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/partial_fit/X", + "name": "X", + "qname": "sklearn.preprocessing._data.MaxAbsScaler.partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data used to compute the mean and standard deviation\nused for later scaling along the features axis." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/partial_fit/y", + "name": "y", + "qname": "sklearn.preprocessing._data.MaxAbsScaler.partial_fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None", + "default_value": "", + "description": "Ignored." + }, + "type": { + "kind": "NamedType", + "name": "None" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Online computation of max absolute value of X for later scaling.\n\nAll of X is processed as a single batch. This is intended for cases\nwhen :meth:`fit` is not feasible due to very large number of\n`n_samples` or because X is read from a continuous stream.", + "docstring": "Online computation of max absolute value of X for later scaling.\n\nAll of X is processed as a single batch. This is intended for cases\nwhen :meth:`fit` is not feasible due to very large number of\n`n_samples` or because X is read from a continuous stream.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the mean and standard deviation\n used for later scaling along the features axis.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted scaler.", + "code": " def partial_fit(self, X, y=None):\n \"\"\"\n Online computation of max absolute value of X for later scaling.\n\n All of X is processed as a single batch. This is intended for cases\n when :meth:`fit` is not feasible due to very large number of\n `n_samples` or because X is read from a continuous stream.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the mean and standard deviation\n used for later scaling along the features axis.\n\n y : None\n Ignored.\n\n Returns\n -------\n self : object\n Fitted scaler.\n \"\"\"\n first_pass = not hasattr(self, 'n_samples_seen_')\n X = self._validate_data(X, reset=first_pass,\n accept_sparse=('csr', 'csc'), estimator=self,\n dtype=FLOAT_DTYPES,\n force_all_finite='allow-nan')\n\n if sparse.issparse(X):\n mins, maxs = min_max_axis(X, axis=0, ignore_nan=True)\n max_abs = np.maximum(np.abs(mins), np.abs(maxs))\n else:\n max_abs = np.nanmax(np.abs(X), axis=0)\n\n if first_pass:\n self.n_samples_seen_ = X.shape[0]\n else:\n max_abs = np.maximum(self.max_abs_, max_abs)\n self.n_samples_seen_ += X.shape[0]\n\n self.max_abs_ = max_abs\n self.scale_ = _handle_zeros_in_scale(max_abs)\n return self" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/transform", + "name": "transform", + "qname": "sklearn.preprocessing._data.MaxAbsScaler.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/transform/self", + "name": "self", + "qname": "sklearn.preprocessing._data.MaxAbsScaler.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MaxAbsScaler/transform/X", + "name": "X", + "qname": "sklearn.preprocessing._data.MaxAbsScaler.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data that should be scaled." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Scale the data", + "docstring": "Scale the data\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data that should be scaled.\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array.", + "code": " def transform(self, X):\n \"\"\"Scale the data\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data that should be scaled.\n\n Returns\n -------\n X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array.\n \"\"\"\n check_is_fitted(self)\n X = self._validate_data(X, accept_sparse=('csr', 'csc'),\n copy=self.copy, reset=False,\n estimator=self, dtype=FLOAT_DTYPES,\n force_all_finite='allow-nan')\n\n if sparse.issparse(X):\n inplace_column_scale(X, 1.0 / self.scale_)\n else:\n X /= self.scale_\n return X" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/__init__", + "name": "__init__", + "qname": "sklearn.preprocessing._data.MinMaxScaler.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/__init__/self", + "name": "self", + "qname": "sklearn.preprocessing._data.MinMaxScaler.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/__init__/feature_range", + "name": "feature_range", + "qname": "sklearn.preprocessing._data.MinMaxScaler.__init__.feature_range", + "default_value": "(0, 1)", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "tuple (min, max)", + "default_value": "(0, 1)", + "description": "Desired range of transformed data." + }, + "type": { + "kind": "NamedType", + "name": "tuple (min, max)" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/__init__/copy", + "name": "copy", + "qname": "sklearn.preprocessing._data.MinMaxScaler.__init__.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Set to False to perform inplace row normalization and avoid a\ncopy (if the input is already a numpy array)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/__init__/clip", + "name": "clip", + "qname": "sklearn.preprocessing._data.MinMaxScaler.__init__.clip", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Set to True to clip transformed values of held-out data to\nprovided `feature range`.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform features by scaling each feature to a given range.\n\nThis estimator scales and translates each feature individually such\nthat it is in the given range on the training set, e.g. between\nzero and one.\n\nThe transformation is given by::\n\n X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))\n X_scaled = X_std * (max - min) + min\n\nwhere min, max = feature_range.\n\nThis transformation is often used as an alternative to zero mean,\nunit variance scaling.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, feature_range=(0, 1), *, copy=True, clip=False):\n self.feature_range = feature_range\n self.copy = copy\n self.clip = clip" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/_more_tags", + "name": "_more_tags", + "qname": "sklearn.preprocessing._data.MinMaxScaler._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/_more_tags/self", + "name": "self", + "qname": "sklearn.preprocessing._data.MinMaxScaler._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'allow_nan': True}" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/_reset", + "name": "_reset", + "qname": "sklearn.preprocessing._data.MinMaxScaler._reset", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/_reset/self", + "name": "self", + "qname": "sklearn.preprocessing._data.MinMaxScaler._reset.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Reset internal data-dependent state of the scaler, if necessary.\n\n__init__ parameters are not touched.", + "docstring": "Reset internal data-dependent state of the scaler, if necessary.\n\n__init__ parameters are not touched.", + "code": " def _reset(self):\n \"\"\"Reset internal data-dependent state of the scaler, if necessary.\n\n __init__ parameters are not touched.\n \"\"\"\n\n # Checking one attribute is enough, becase they are all set together\n # in partial_fit\n if hasattr(self, 'scale_'):\n del self.scale_\n del self.min_\n del self.n_samples_seen_\n del self.data_min_\n del self.data_max_\n del self.data_range_" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/fit", + "name": "fit", + "qname": "sklearn.preprocessing._data.MinMaxScaler.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/fit/self", + "name": "self", + "qname": "sklearn.preprocessing._data.MinMaxScaler.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/fit/X", + "name": "X", + "qname": "sklearn.preprocessing._data.MinMaxScaler.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data used to compute the per-feature minimum and maximum\nused for later scaling along the features axis." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/fit/y", + "name": "y", + "qname": "sklearn.preprocessing._data.MinMaxScaler.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None", + "default_value": "", + "description": "Ignored." + }, + "type": { + "kind": "NamedType", + "name": "None" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the minimum and maximum to be used for later scaling.", + "docstring": "Compute the minimum and maximum to be used for later scaling.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data used to compute the per-feature minimum and maximum\n used for later scaling along the features axis.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted scaler.", + "code": " def fit(self, X, y=None):\n \"\"\"Compute the minimum and maximum to be used for later scaling.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data used to compute the per-feature minimum and maximum\n used for later scaling along the features axis.\n\n y : None\n Ignored.\n\n Returns\n -------\n self : object\n Fitted scaler.\n \"\"\"\n\n # Reset internal state before fitting\n self._reset()\n return self.partial_fit(X, y)" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.preprocessing._data.MinMaxScaler.inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/inverse_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._data.MinMaxScaler.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/inverse_transform/X", + "name": "X", + "qname": "sklearn.preprocessing._data.MinMaxScaler.inverse_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data that will be transformed. It cannot be sparse." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Undo the scaling of X according to feature_range.", + "docstring": "Undo the scaling of X according to feature_range.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data that will be transformed. It cannot be sparse.\n\nReturns\n-------\nXt : ndarray of shape (n_samples, n_features)\n Transformed data.", + "code": " def inverse_transform(self, X):\n \"\"\"Undo the scaling of X according to feature_range.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Input data that will be transformed. It cannot be sparse.\n\n Returns\n -------\n Xt : ndarray of shape (n_samples, n_features)\n Transformed data.\n \"\"\"\n check_is_fitted(self)\n\n X = check_array(X, copy=self.copy, dtype=FLOAT_DTYPES,\n force_all_finite=\"allow-nan\")\n\n X -= self.min_\n X /= self.scale_\n return X" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/partial_fit", + "name": "partial_fit", + "qname": "sklearn.preprocessing._data.MinMaxScaler.partial_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/partial_fit/self", + "name": "self", + "qname": "sklearn.preprocessing._data.MinMaxScaler.partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/partial_fit/X", + "name": "X", + "qname": "sklearn.preprocessing._data.MinMaxScaler.partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data used to compute the mean and standard deviation\nused for later scaling along the features axis." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/partial_fit/y", + "name": "y", + "qname": "sklearn.preprocessing._data.MinMaxScaler.partial_fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None", + "default_value": "", + "description": "Ignored." + }, + "type": { + "kind": "NamedType", + "name": "None" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Online computation of min and max on X for later scaling.\n\nAll of X is processed as a single batch. This is intended for cases\nwhen :meth:`fit` is not feasible due to very large number of\n`n_samples` or because X is read from a continuous stream.", + "docstring": "Online computation of min and max on X for later scaling.\n\nAll of X is processed as a single batch. This is intended for cases\nwhen :meth:`fit` is not feasible due to very large number of\n`n_samples` or because X is read from a continuous stream.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data used to compute the mean and standard deviation\n used for later scaling along the features axis.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted scaler.", + "code": " def partial_fit(self, X, y=None):\n \"\"\"Online computation of min and max on X for later scaling.\n\n All of X is processed as a single batch. This is intended for cases\n when :meth:`fit` is not feasible due to very large number of\n `n_samples` or because X is read from a continuous stream.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data used to compute the mean and standard deviation\n used for later scaling along the features axis.\n\n y : None\n Ignored.\n\n Returns\n -------\n self : object\n Fitted scaler.\n \"\"\"\n feature_range = self.feature_range\n if feature_range[0] >= feature_range[1]:\n raise ValueError(\"Minimum of desired feature range must be smaller\"\n \" than maximum. Got %s.\" % str(feature_range))\n\n if sparse.issparse(X):\n raise TypeError(\"MinMaxScaler does not support sparse input. \"\n \"Consider using MaxAbsScaler instead.\")\n\n first_pass = not hasattr(self, 'n_samples_seen_')\n X = self._validate_data(X, reset=first_pass,\n estimator=self, dtype=FLOAT_DTYPES,\n force_all_finite=\"allow-nan\")\n\n data_min = np.nanmin(X, axis=0)\n data_max = np.nanmax(X, axis=0)\n\n if first_pass:\n self.n_samples_seen_ = X.shape[0]\n else:\n data_min = np.minimum(self.data_min_, data_min)\n data_max = np.maximum(self.data_max_, data_max)\n self.n_samples_seen_ += X.shape[0]\n\n data_range = data_max - data_min\n self.scale_ = ((feature_range[1] - feature_range[0]) /\n _handle_zeros_in_scale(data_range))\n self.min_ = feature_range[0] - data_min * self.scale_\n self.data_min_ = data_min\n self.data_max_ = data_max\n self.data_range_ = data_range\n return self" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/transform", + "name": "transform", + "qname": "sklearn.preprocessing._data.MinMaxScaler.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/transform/self", + "name": "self", + "qname": "sklearn.preprocessing._data.MinMaxScaler.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/MinMaxScaler/transform/X", + "name": "X", + "qname": "sklearn.preprocessing._data.MinMaxScaler.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data that will be transformed." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Scale features of X according to feature_range.", + "docstring": "Scale features of X according to feature_range.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data that will be transformed.\n\nReturns\n-------\nXt : ndarray of shape (n_samples, n_features)\n Transformed data.", + "code": " def transform(self, X):\n \"\"\"Scale features of X according to feature_range.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Input data that will be transformed.\n\n Returns\n -------\n Xt : ndarray of shape (n_samples, n_features)\n Transformed data.\n \"\"\"\n check_is_fitted(self)\n\n X = self._validate_data(X, copy=self.copy, dtype=FLOAT_DTYPES,\n force_all_finite=\"allow-nan\", reset=False)\n\n X *= self.scale_\n X += self.min_\n if self.clip:\n np.clip(X, self.feature_range[0], self.feature_range[1], out=X)\n return X" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/Normalizer/__init__", + "name": "__init__", + "qname": "sklearn.preprocessing._data.Normalizer.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/Normalizer/__init__/self", + "name": "self", + "qname": "sklearn.preprocessing._data.Normalizer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/Normalizer/__init__/norm", + "name": "norm", + "qname": "sklearn.preprocessing._data.Normalizer.__init__.norm", + "default_value": "'l2'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'l1', 'l2', 'max'}", + "default_value": "'l2'", + "description": "The norm to use to normalize each non zero sample. If norm='max'\nis used, values will be rescaled by the maximum of the absolute\nvalues." + }, + "type": { + "kind": "EnumType", + "values": ["l2", "l1", "max"] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/Normalizer/__init__/copy", + "name": "copy", + "qname": "sklearn.preprocessing._data.Normalizer.__init__.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "set to False to perform inplace row normalization and avoid a\ncopy (if the input is already a numpy array or a scipy.sparse\nCSR matrix)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Normalize samples individually to unit norm.\n\nEach sample (i.e. each row of the data matrix) with at least one\nnon zero component is rescaled independently of other samples so\nthat its norm (l1, l2 or inf) equals one.\n\nThis transformer is able to work both with dense numpy arrays and\nscipy.sparse matrix (use CSR format if you want to avoid the burden of\na copy / conversion).\n\nScaling inputs to unit norms is a common operation for text\nclassification or clustering for instance. For instance the dot\nproduct of two l2-normalized TF-IDF vectors is the cosine similarity\nof the vectors and is the base similarity metric for the Vector\nSpace Model commonly used by the Information Retrieval community.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, norm='l2', *, copy=True):\n self.norm = norm\n self.copy = copy" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/Normalizer/_more_tags", + "name": "_more_tags", + "qname": "sklearn.preprocessing._data.Normalizer._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/Normalizer/_more_tags/self", + "name": "self", + "qname": "sklearn.preprocessing._data.Normalizer._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'stateless': True}" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/Normalizer/fit", + "name": "fit", + "qname": "sklearn.preprocessing._data.Normalizer.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/Normalizer/fit/self", + "name": "self", + "qname": "sklearn.preprocessing._data.Normalizer.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/Normalizer/fit/X", + "name": "X", + "qname": "sklearn.preprocessing._data.Normalizer.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to estimate the normalization parameters." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/Normalizer/fit/y", + "name": "y", + "qname": "sklearn.preprocessing._data.Normalizer.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None", + "default_value": "", + "description": "Ignored." + }, + "type": { + "kind": "NamedType", + "name": "None" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Do nothing and return the estimator unchanged\n\nThis method is just there to implement the usual API and hence\nwork in pipelines.", + "docstring": "Do nothing and return the estimator unchanged\n\nThis method is just there to implement the usual API and hence\nwork in pipelines.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to estimate the normalization parameters.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted transformer.", + "code": " def fit(self, X, y=None):\n \"\"\"Do nothing and return the estimator unchanged\n\n This method is just there to implement the usual API and hence\n work in pipelines.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to estimate the normalization parameters.\n\n y : None\n Ignored.\n\n Returns\n -------\n self : object\n Fitted transformer.\n \"\"\"\n self._validate_data(X, accept_sparse='csr')\n return self" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/Normalizer/transform", + "name": "transform", + "qname": "sklearn.preprocessing._data.Normalizer.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/Normalizer/transform/self", + "name": "self", + "qname": "sklearn.preprocessing._data.Normalizer.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/Normalizer/transform/X", + "name": "X", + "qname": "sklearn.preprocessing._data.Normalizer.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to normalize, row by row. scipy.sparse matrices should be\nin CSR format to avoid an un-necessary copy." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/Normalizer/transform/copy", + "name": "copy", + "qname": "sklearn.preprocessing._data.Normalizer.transform.copy", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "None", + "description": "Copy the input X or not." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Scale each non zero row of X to unit norm", + "docstring": "Scale each non zero row of X to unit norm\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to normalize, row by row. scipy.sparse matrices should be\n in CSR format to avoid an un-necessary copy.\n\ncopy : bool, default=None\n Copy the input X or not.\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array.", + "code": " def transform(self, X, copy=None):\n \"\"\"Scale each non zero row of X to unit norm\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to normalize, row by row. scipy.sparse matrices should be\n in CSR format to avoid an un-necessary copy.\n\n copy : bool, default=None\n Copy the input X or not.\n\n Returns\n -------\n X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array.\n \"\"\"\n copy = copy if copy is not None else self.copy\n X = self._validate_data(X, accept_sparse='csr', reset=False)\n return normalize(X, norm=self.norm, axis=1, copy=copy)" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/__init__", + "name": "__init__", + "qname": "sklearn.preprocessing._data.PolynomialFeatures.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/__init__/self", + "name": "self", + "qname": "sklearn.preprocessing._data.PolynomialFeatures.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/__init__/degree", + "name": "degree", + "qname": "sklearn.preprocessing._data.PolynomialFeatures.__init__.degree", + "default_value": "2", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "The degree of the polynomial features." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/__init__/interaction_only", + "name": "interaction_only", + "qname": "sklearn.preprocessing._data.PolynomialFeatures.__init__.interaction_only", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If true, only interaction features are produced: features that are\nproducts of at most ``degree`` *distinct* input features (so not\n``x[1] ** 2``, ``x[0] * x[2] ** 3``, etc.)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/__init__/include_bias", + "name": "include_bias", + "qname": "sklearn.preprocessing._data.PolynomialFeatures.__init__.include_bias", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True (default), then include a bias column, the feature in which\nall polynomial powers are zero (i.e. a column of ones - acts as an\nintercept term in a linear model)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/__init__/order", + "name": "order", + "qname": "sklearn.preprocessing._data.PolynomialFeatures.__init__.order", + "default_value": "'C'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'C', 'F'}", + "default_value": "'C'", + "description": "Order of output array in the dense case. 'F' order is faster to\ncompute, but may slow down subsequent estimators.\n\n.. versionadded:: 0.21" + }, + "type": { + "kind": "EnumType", + "values": ["F", "C"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate polynomial and interaction features.\n\nGenerate a new feature matrix consisting of all polynomial combinations\nof the features with degree less than or equal to the specified degree.\nFor example, if an input sample is two dimensional and of the form\n[a, b], the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, degree=2, *, interaction_only=False, include_bias=True,\n order='C'):\n self.degree = degree\n self.interaction_only = interaction_only\n self.include_bias = include_bias\n self.order = order" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/_combinations", + "name": "_combinations", + "qname": "sklearn.preprocessing._data.PolynomialFeatures._combinations", + "decorators": ["staticmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/_combinations/n_features", + "name": "n_features", + "qname": "sklearn.preprocessing._data.PolynomialFeatures._combinations.n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/_combinations/degree", + "name": "degree", + "qname": "sklearn.preprocessing._data.PolynomialFeatures._combinations.degree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/_combinations/interaction_only", + "name": "interaction_only", + "qname": "sklearn.preprocessing._data.PolynomialFeatures._combinations.interaction_only", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/_combinations/include_bias", + "name": "include_bias", + "qname": "sklearn.preprocessing._data.PolynomialFeatures._combinations.include_bias", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @staticmethod\n def _combinations(n_features, degree, interaction_only, include_bias):\n comb = (combinations if interaction_only else combinations_w_r)\n start = int(not include_bias)\n return chain.from_iterable(comb(range(n_features), i)\n for i in range(start, degree + 1))" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/fit", + "name": "fit", + "qname": "sklearn.preprocessing._data.PolynomialFeatures.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/fit/self", + "name": "self", + "qname": "sklearn.preprocessing._data.PolynomialFeatures.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/fit/X", + "name": "X", + "qname": "sklearn.preprocessing._data.PolynomialFeatures.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/fit/y", + "name": "y", + "qname": "sklearn.preprocessing._data.PolynomialFeatures.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None", + "default_value": "", + "description": "Ignored." + }, + "type": { + "kind": "NamedType", + "name": "None" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute number of output features.", + "docstring": "Compute number of output features.\n\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted transformer.", + "code": " def fit(self, X, y=None):\n \"\"\"\n Compute number of output features.\n\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data.\n\n y : None\n Ignored.\n\n Returns\n -------\n self : object\n Fitted transformer.\n \"\"\"\n n_samples, n_features = self._validate_data(\n X, accept_sparse=True).shape\n combinations = self._combinations(n_features, self.degree,\n self.interaction_only,\n self.include_bias)\n self.n_input_features_ = n_features\n self.n_output_features_ = sum(1 for _ in combinations)\n return self" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/get_feature_names", + "name": "get_feature_names", + "qname": "sklearn.preprocessing._data.PolynomialFeatures.get_feature_names", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/get_feature_names/self", + "name": "self", + "qname": "sklearn.preprocessing._data.PolynomialFeatures.get_feature_names.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/get_feature_names/input_features", + "name": "input_features", + "qname": "sklearn.preprocessing._data.PolynomialFeatures.get_feature_names.input_features", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list of str of shape (n_features,)", + "default_value": "None", + "description": "String names for input features if available. By default,\n\"x0\", \"x1\", ... \"xn_features\" is used." + }, + "type": { + "kind": "NamedType", + "name": "list of str of shape (n_features,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return feature names for output features", + "docstring": "Return feature names for output features\n\nParameters\n----------\ninput_features : list of str of shape (n_features,), default=None\n String names for input features if available. By default,\n \"x0\", \"x1\", ... \"xn_features\" is used.\n\nReturns\n-------\noutput_feature_names : list of str of shape (n_output_features,)", + "code": " def get_feature_names(self, input_features=None):\n \"\"\"\n Return feature names for output features\n\n Parameters\n ----------\n input_features : list of str of shape (n_features,), default=None\n String names for input features if available. By default,\n \"x0\", \"x1\", ... \"xn_features\" is used.\n\n Returns\n -------\n output_feature_names : list of str of shape (n_output_features,)\n \"\"\"\n powers = self.powers_\n if input_features is None:\n input_features = ['x%d' % i for i in range(powers.shape[1])]\n feature_names = []\n for row in powers:\n inds = np.where(row)[0]\n if len(inds):\n name = \" \".join(\"%s^%d\" % (input_features[ind], exp)\n if exp != 1 else input_features[ind]\n for ind, exp in zip(inds, row[inds]))\n else:\n name = \"1\"\n feature_names.append(name)\n return feature_names" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/powers_@getter", + "name": "powers_", + "qname": "sklearn.preprocessing._data.PolynomialFeatures.powers_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/powers_/self", + "name": "self", + "qname": "sklearn.preprocessing._data.PolynomialFeatures.powers_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def powers_(self):\n check_is_fitted(self)\n\n combinations = self._combinations(self.n_input_features_, self.degree,\n self.interaction_only,\n self.include_bias)\n return np.vstack([np.bincount(c, minlength=self.n_input_features_)\n for c in combinations])" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/transform", + "name": "transform", + "qname": "sklearn.preprocessing._data.PolynomialFeatures.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/transform/self", + "name": "self", + "qname": "sklearn.preprocessing._data.PolynomialFeatures.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PolynomialFeatures/transform/X", + "name": "X", + "qname": "sklearn.preprocessing._data.PolynomialFeatures.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to transform, row by row.\n\nPrefer CSR over CSC for sparse input (for speed), but CSC is\nrequired if the degree is 4 or higher. If the degree is less than\n4 and the input format is CSC, it will be converted to CSR, have\nits polynomial features generated, then converted back to CSC.\n\nIf the degree is 2 or 3, the method described in \"Leveraging\nSparsity to Speed Up Polynomial Feature Expansions of CSR Matrices\nUsing K-Simplex Numbers\" by Andrew Nystrom and John Hughes is\nused, which is much faster than the method used on CSC input. For\nthis reason, a CSC input will be converted to CSR, and the output\nwill be converted back to CSC prior to being returned, hence the\npreference of CSR." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform data to polynomial features", + "docstring": "Transform data to polynomial features\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to transform, row by row.\n\n Prefer CSR over CSC for sparse input (for speed), but CSC is\n required if the degree is 4 or higher. If the degree is less than\n 4 and the input format is CSC, it will be converted to CSR, have\n its polynomial features generated, then converted back to CSC.\n\n If the degree is 2 or 3, the method described in \"Leveraging\n Sparsity to Speed Up Polynomial Feature Expansions of CSR Matrices\n Using K-Simplex Numbers\" by Andrew Nystrom and John Hughes is\n used, which is much faster than the method used on CSC input. For\n this reason, a CSC input will be converted to CSR, and the output\n will be converted back to CSC prior to being returned, hence the\n preference of CSR.\n\nReturns\n-------\nXP : {ndarray, sparse matrix} of shape (n_samples, NP)\n The matrix of features, where NP is the number of polynomial\n features generated from the combination of inputs. If a sparse\n matrix is provided, it will be converted into a sparse\n ``csr_matrix``.", + "code": " def transform(self, X):\n \"\"\"Transform data to polynomial features\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to transform, row by row.\n\n Prefer CSR over CSC for sparse input (for speed), but CSC is\n required if the degree is 4 or higher. If the degree is less than\n 4 and the input format is CSC, it will be converted to CSR, have\n its polynomial features generated, then converted back to CSC.\n\n If the degree is 2 or 3, the method described in \"Leveraging\n Sparsity to Speed Up Polynomial Feature Expansions of CSR Matrices\n Using K-Simplex Numbers\" by Andrew Nystrom and John Hughes is\n used, which is much faster than the method used on CSC input. For\n this reason, a CSC input will be converted to CSR, and the output\n will be converted back to CSC prior to being returned, hence the\n preference of CSR.\n\n Returns\n -------\n XP : {ndarray, sparse matrix} of shape (n_samples, NP)\n The matrix of features, where NP is the number of polynomial\n features generated from the combination of inputs. If a sparse\n matrix is provided, it will be converted into a sparse\n ``csr_matrix``.\n \"\"\"\n check_is_fitted(self)\n\n X = self._validate_data(X, order='F', dtype=FLOAT_DTYPES, reset=False,\n accept_sparse=('csr', 'csc'))\n\n n_samples, n_features = X.shape\n\n if n_features != self.n_input_features_:\n raise ValueError(\"X shape does not match training shape\")\n\n if sparse.isspmatrix_csr(X):\n if self.degree > 3:\n return self.transform(X.tocsc()).tocsr()\n to_stack = []\n if self.include_bias:\n to_stack.append(np.ones(shape=(n_samples, 1), dtype=X.dtype))\n to_stack.append(X)\n for deg in range(2, self.degree+1):\n Xp_next = _csr_polynomial_expansion(X.data, X.indices,\n X.indptr, X.shape[1],\n self.interaction_only,\n deg)\n if Xp_next is None:\n break\n to_stack.append(Xp_next)\n XP = sparse.hstack(to_stack, format='csr')\n elif sparse.isspmatrix_csc(X) and self.degree < 4:\n return self.transform(X.tocsr()).tocsc()\n else:\n if sparse.isspmatrix(X):\n combinations = self._combinations(n_features, self.degree,\n self.interaction_only,\n self.include_bias)\n columns = []\n for comb in combinations:\n if comb:\n out_col = 1\n for col_idx in comb:\n out_col = X[:, col_idx].multiply(out_col)\n columns.append(out_col)\n else:\n bias = sparse.csc_matrix(np.ones((X.shape[0], 1)))\n columns.append(bias)\n XP = sparse.hstack(columns, dtype=X.dtype).tocsc()\n else:\n XP = np.empty((n_samples, self.n_output_features_),\n dtype=X.dtype, order=self.order)\n\n # What follows is a faster implementation of:\n # for i, comb in enumerate(combinations):\n # XP[:, i] = X[:, comb].prod(1)\n # This implementation uses two optimisations.\n # First one is broadcasting,\n # multiply ([X1, ..., Xn], X1) -> [X1 X1, ..., Xn X1]\n # multiply ([X2, ..., Xn], X2) -> [X2 X2, ..., Xn X2]\n # ...\n # multiply ([X[:, start:end], X[:, start]) -> ...\n # Second optimisation happens for degrees >= 3.\n # Xi^3 is computed reusing previous computation:\n # Xi^3 = Xi^2 * Xi.\n\n if self.include_bias:\n XP[:, 0] = 1\n current_col = 1\n else:\n current_col = 0\n\n # d = 0\n XP[:, current_col:current_col + n_features] = X\n index = list(range(current_col,\n current_col + n_features))\n current_col += n_features\n index.append(current_col)\n\n # d >= 1\n for _ in range(1, self.degree):\n new_index = []\n end = index[-1]\n for feature_idx in range(n_features):\n start = index[feature_idx]\n new_index.append(current_col)\n if self.interaction_only:\n start += (index[feature_idx + 1] -\n index[feature_idx])\n next_col = current_col + end - start\n if next_col <= current_col:\n break\n # XP[:, start:end] are terms of degree d - 1\n # that exclude feature #feature_idx.\n np.multiply(XP[:, start:end],\n X[:, feature_idx:feature_idx + 1],\n out=XP[:, current_col:next_col],\n casting='no')\n current_col = next_col\n\n new_index.append(current_col)\n index = new_index\n\n return XP" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/__init__", + "name": "__init__", + "qname": "sklearn.preprocessing._data.PowerTransformer.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/__init__/self", + "name": "self", + "qname": "sklearn.preprocessing._data.PowerTransformer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/__init__/method", + "name": "method", + "qname": "sklearn.preprocessing._data.PowerTransformer.__init__.method", + "default_value": "'yeo-johnson'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'yeo-johnson', 'box-cox'}", + "default_value": "'yeo-johnson'", + "description": "The power transform method. Available methods are:\n\n- 'yeo-johnson' [1]_, works with positive and negative values\n- 'box-cox' [2]_, only works with strictly positive values" + }, + "type": { + "kind": "EnumType", + "values": ["box-cox", "yeo-johnson"] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/__init__/standardize", + "name": "standardize", + "qname": "sklearn.preprocessing._data.PowerTransformer.__init__.standardize", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Set to True to apply zero-mean, unit-variance normalization to the\ntransformed output." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/__init__/copy", + "name": "copy", + "qname": "sklearn.preprocessing._data.PowerTransformer.__init__.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Set to False to perform inplace computation during transformation." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Apply a power transform featurewise to make data more Gaussian-like.\n\nPower transforms are a family of parametric, monotonic transformations\nthat are applied to make data more Gaussian-like. This is useful for\nmodeling issues related to heteroscedasticity (non-constant variance),\nor other situations where normality is desired.\n\nCurrently, PowerTransformer supports the Box-Cox transform and the\nYeo-Johnson transform. The optimal parameter for stabilizing variance and\nminimizing skewness is estimated through maximum likelihood.\n\nBox-Cox requires input data to be strictly positive, while Yeo-Johnson\nsupports both positive or negative data.\n\nBy default, zero-mean, unit-variance normalization is applied to the\ntransformed data.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, method='yeo-johnson', *, standardize=True, copy=True):\n self.method = method\n self.standardize = standardize\n self.copy = copy" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_box_cox_inverse_tranform", + "name": "_box_cox_inverse_tranform", + "qname": "sklearn.preprocessing._data.PowerTransformer._box_cox_inverse_tranform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_box_cox_inverse_tranform/self", + "name": "self", + "qname": "sklearn.preprocessing._data.PowerTransformer._box_cox_inverse_tranform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_box_cox_inverse_tranform/x", + "name": "x", + "qname": "sklearn.preprocessing._data.PowerTransformer._box_cox_inverse_tranform.x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_box_cox_inverse_tranform/lmbda", + "name": "lmbda", + "qname": "sklearn.preprocessing._data.PowerTransformer._box_cox_inverse_tranform.lmbda", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return inverse-transformed input x following Box-Cox inverse\ntransform with parameter lambda.", + "docstring": "Return inverse-transformed input x following Box-Cox inverse\ntransform with parameter lambda.", + "code": " def _box_cox_inverse_tranform(self, x, lmbda):\n \"\"\"Return inverse-transformed input x following Box-Cox inverse\n transform with parameter lambda.\n \"\"\"\n if lmbda == 0:\n x_inv = np.exp(x)\n else:\n x_inv = (x * lmbda + 1) ** (1 / lmbda)\n\n return x_inv" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_box_cox_optimize", + "name": "_box_cox_optimize", + "qname": "sklearn.preprocessing._data.PowerTransformer._box_cox_optimize", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_box_cox_optimize/self", + "name": "self", + "qname": "sklearn.preprocessing._data.PowerTransformer._box_cox_optimize.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_box_cox_optimize/x", + "name": "x", + "qname": "sklearn.preprocessing._data.PowerTransformer._box_cox_optimize.x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Find and return optimal lambda parameter of the Box-Cox transform by\nMLE, for observed data x.\n\nWe here use scipy builtins which uses the brent optimizer.", + "docstring": "Find and return optimal lambda parameter of the Box-Cox transform by\nMLE, for observed data x.\n\nWe here use scipy builtins which uses the brent optimizer.", + "code": " def _box_cox_optimize(self, x):\n \"\"\"Find and return optimal lambda parameter of the Box-Cox transform by\n MLE, for observed data x.\n\n We here use scipy builtins which uses the brent optimizer.\n \"\"\"\n # the computation of lambda is influenced by NaNs so we need to\n # get rid of them\n _, lmbda = stats.boxcox(x[~np.isnan(x)], lmbda=None)\n\n return lmbda" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_check_input", + "name": "_check_input", + "qname": "sklearn.preprocessing._data.PowerTransformer._check_input", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_check_input/self", + "name": "self", + "qname": "sklearn.preprocessing._data.PowerTransformer._check_input.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_check_input/X", + "name": "X", + "qname": "sklearn.preprocessing._data.PowerTransformer._check_input.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_check_input/in_fit", + "name": "in_fit", + "qname": "sklearn.preprocessing._data.PowerTransformer._check_input.in_fit", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "Whether or not `_check_input` is called from `fit` or other\nmethods, e.g. `predict`, `transform`, etc." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_check_input/check_positive", + "name": "check_positive", + "qname": "sklearn.preprocessing._data.PowerTransformer._check_input.check_positive", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, check that all data is positive and non-zero (only if\n``self.method=='box-cox'``)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_check_input/check_shape", + "name": "check_shape", + "qname": "sklearn.preprocessing._data.PowerTransformer._check_input.check_shape", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, check that n_features matches the length of self.lambdas_" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_check_input/check_method", + "name": "check_method", + "qname": "sklearn.preprocessing._data.PowerTransformer._check_input.check_method", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, check that the transformation method is valid." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Validate the input before fit and transform.", + "docstring": "Validate the input before fit and transform.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nin_fit : bool\n Whether or not `_check_input` is called from `fit` or other\n methods, e.g. `predict`, `transform`, etc.\n\ncheck_positive : bool, default=False\n If True, check that all data is positive and non-zero (only if\n ``self.method=='box-cox'``).\n\ncheck_shape : bool, default=False\n If True, check that n_features matches the length of self.lambdas_\n\ncheck_method : bool, default=False\n If True, check that the transformation method is valid.", + "code": " def _check_input(self, X, in_fit, check_positive=False, check_shape=False,\n check_method=False):\n \"\"\"Validate the input before fit and transform.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n in_fit : bool\n Whether or not `_check_input` is called from `fit` or other\n methods, e.g. `predict`, `transform`, etc.\n\n check_positive : bool, default=False\n If True, check that all data is positive and non-zero (only if\n ``self.method=='box-cox'``).\n\n check_shape : bool, default=False\n If True, check that n_features matches the length of self.lambdas_\n\n check_method : bool, default=False\n If True, check that the transformation method is valid.\n \"\"\"\n X = self._validate_data(X, ensure_2d=True, dtype=FLOAT_DTYPES,\n copy=self.copy, force_all_finite='allow-nan',\n reset=in_fit)\n\n with np.warnings.catch_warnings():\n np.warnings.filterwarnings(\n 'ignore', r'All-NaN (slice|axis) encountered')\n if (check_positive and self.method == 'box-cox' and\n np.nanmin(X) <= 0):\n raise ValueError(\"The Box-Cox transformation can only be \"\n \"applied to strictly positive data\")\n\n if check_shape and not X.shape[1] == len(self.lambdas_):\n raise ValueError(\"Input data has a different number of features \"\n \"than fitting data. Should have {n}, data has {m}\"\n .format(n=len(self.lambdas_), m=X.shape[1]))\n\n valid_methods = ('box-cox', 'yeo-johnson')\n if check_method and self.method not in valid_methods:\n raise ValueError(\"'method' must be one of {}, \"\n \"got {} instead.\"\n .format(valid_methods, self.method))\n\n return X" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_fit", + "name": "_fit", + "qname": "sklearn.preprocessing._data.PowerTransformer._fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_fit/self", + "name": "self", + "qname": "sklearn.preprocessing._data.PowerTransformer._fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_fit/X", + "name": "X", + "qname": "sklearn.preprocessing._data.PowerTransformer._fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_fit/y", + "name": "y", + "qname": "sklearn.preprocessing._data.PowerTransformer._fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_fit/force_transform", + "name": "force_transform", + "qname": "sklearn.preprocessing._data.PowerTransformer._fit.force_transform", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _fit(self, X, y=None, force_transform=False):\n X = self._check_input(X, in_fit=True, check_positive=True,\n check_method=True)\n\n if not self.copy and not force_transform: # if call from fit()\n X = X.copy() # force copy so that fit does not change X inplace\n\n optim_function = {'box-cox': self._box_cox_optimize,\n 'yeo-johnson': self._yeo_johnson_optimize\n }[self.method]\n with np.errstate(invalid='ignore'): # hide NaN warnings\n self.lambdas_ = np.array([optim_function(col) for col in X.T])\n\n if self.standardize or force_transform:\n transform_function = {'box-cox': boxcox,\n 'yeo-johnson': self._yeo_johnson_transform\n }[self.method]\n for i, lmbda in enumerate(self.lambdas_):\n with np.errstate(invalid='ignore'): # hide NaN warnings\n X[:, i] = transform_function(X[:, i], lmbda)\n\n if self.standardize:\n self._scaler = StandardScaler(copy=False)\n if force_transform:\n X = self._scaler.fit_transform(X)\n else:\n self._scaler.fit(X)\n\n return X" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_more_tags", + "name": "_more_tags", + "qname": "sklearn.preprocessing._data.PowerTransformer._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_more_tags/self", + "name": "self", + "qname": "sklearn.preprocessing._data.PowerTransformer._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'allow_nan': True}" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_yeo_johnson_inverse_transform", + "name": "_yeo_johnson_inverse_transform", + "qname": "sklearn.preprocessing._data.PowerTransformer._yeo_johnson_inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_yeo_johnson_inverse_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._data.PowerTransformer._yeo_johnson_inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_yeo_johnson_inverse_transform/x", + "name": "x", + "qname": "sklearn.preprocessing._data.PowerTransformer._yeo_johnson_inverse_transform.x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_yeo_johnson_inverse_transform/lmbda", + "name": "lmbda", + "qname": "sklearn.preprocessing._data.PowerTransformer._yeo_johnson_inverse_transform.lmbda", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return inverse-transformed input x following Yeo-Johnson inverse\ntransform with parameter lambda.", + "docstring": "Return inverse-transformed input x following Yeo-Johnson inverse\ntransform with parameter lambda.", + "code": " def _yeo_johnson_inverse_transform(self, x, lmbda):\n \"\"\"Return inverse-transformed input x following Yeo-Johnson inverse\n transform with parameter lambda.\n \"\"\"\n x_inv = np.zeros_like(x)\n pos = x >= 0\n\n # when x >= 0\n if abs(lmbda) < np.spacing(1.):\n x_inv[pos] = np.exp(x[pos]) - 1\n else: # lmbda != 0\n x_inv[pos] = np.power(x[pos] * lmbda + 1, 1 / lmbda) - 1\n\n # when x < 0\n if abs(lmbda - 2) > np.spacing(1.):\n x_inv[~pos] = 1 - np.power(-(2 - lmbda) * x[~pos] + 1,\n 1 / (2 - lmbda))\n else: # lmbda == 2\n x_inv[~pos] = 1 - np.exp(-x[~pos])\n\n return x_inv" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_yeo_johnson_optimize", + "name": "_yeo_johnson_optimize", + "qname": "sklearn.preprocessing._data.PowerTransformer._yeo_johnson_optimize", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_yeo_johnson_optimize/self", + "name": "self", + "qname": "sklearn.preprocessing._data.PowerTransformer._yeo_johnson_optimize.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_yeo_johnson_optimize/x", + "name": "x", + "qname": "sklearn.preprocessing._data.PowerTransformer._yeo_johnson_optimize.x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Find and return optimal lambda parameter of the Yeo-Johnson\ntransform by MLE, for observed data x.\n\nLike for Box-Cox, MLE is done via the brent optimizer.", + "docstring": "Find and return optimal lambda parameter of the Yeo-Johnson\ntransform by MLE, for observed data x.\n\nLike for Box-Cox, MLE is done via the brent optimizer.", + "code": " def _yeo_johnson_optimize(self, x):\n \"\"\"Find and return optimal lambda parameter of the Yeo-Johnson\n transform by MLE, for observed data x.\n\n Like for Box-Cox, MLE is done via the brent optimizer.\n \"\"\"\n\n def _neg_log_likelihood(lmbda):\n \"\"\"Return the negative log likelihood of the observed data x as a\n function of lambda.\"\"\"\n x_trans = self._yeo_johnson_transform(x, lmbda)\n n_samples = x.shape[0]\n\n loglike = -n_samples / 2 * np.log(x_trans.var())\n loglike += (lmbda - 1) * (np.sign(x) * np.log1p(np.abs(x))).sum()\n\n return -loglike\n\n # the computation of lambda is influenced by NaNs so we need to\n # get rid of them\n x = x[~np.isnan(x)]\n # choosing bracket -2, 2 like for boxcox\n return optimize.brent(_neg_log_likelihood, brack=(-2, 2))" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_yeo_johnson_transform", + "name": "_yeo_johnson_transform", + "qname": "sklearn.preprocessing._data.PowerTransformer._yeo_johnson_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_yeo_johnson_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._data.PowerTransformer._yeo_johnson_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_yeo_johnson_transform/x", + "name": "x", + "qname": "sklearn.preprocessing._data.PowerTransformer._yeo_johnson_transform.x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/_yeo_johnson_transform/lmbda", + "name": "lmbda", + "qname": "sklearn.preprocessing._data.PowerTransformer._yeo_johnson_transform.lmbda", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return transformed input x following Yeo-Johnson transform with\nparameter lambda.", + "docstring": "Return transformed input x following Yeo-Johnson transform with\nparameter lambda.", + "code": " def _yeo_johnson_transform(self, x, lmbda):\n \"\"\"Return transformed input x following Yeo-Johnson transform with\n parameter lambda.\n \"\"\"\n\n out = np.zeros_like(x)\n pos = x >= 0 # binary mask\n\n # when x >= 0\n if abs(lmbda) < np.spacing(1.):\n out[pos] = np.log1p(x[pos])\n else: # lmbda != 0\n out[pos] = (np.power(x[pos] + 1, lmbda) - 1) / lmbda\n\n # when x < 0\n if abs(lmbda - 2) > np.spacing(1.):\n out[~pos] = -(np.power(-x[~pos] + 1, 2 - lmbda) - 1) / (2 - lmbda)\n else: # lmbda == 2\n out[~pos] = -np.log1p(-x[~pos])\n\n return out" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/fit", + "name": "fit", + "qname": "sklearn.preprocessing._data.PowerTransformer.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/fit/self", + "name": "self", + "qname": "sklearn.preprocessing._data.PowerTransformer.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/fit/X", + "name": "X", + "qname": "sklearn.preprocessing._data.PowerTransformer.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data used to estimate the optimal transformation parameters." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/fit/y", + "name": "y", + "qname": "sklearn.preprocessing._data.PowerTransformer.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None", + "default_value": "", + "description": "Ignored." + }, + "type": { + "kind": "NamedType", + "name": "None" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Estimate the optimal parameter lambda for each feature.\n\nThe optimal lambda parameter for minimizing skewness is estimated on\neach feature independently using maximum likelihood.", + "docstring": "Estimate the optimal parameter lambda for each feature.\n\nThe optimal lambda parameter for minimizing skewness is estimated on\neach feature independently using maximum likelihood.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data used to estimate the optimal transformation parameters.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted transformer.", + "code": " def fit(self, X, y=None):\n \"\"\"Estimate the optimal parameter lambda for each feature.\n\n The optimal lambda parameter for minimizing skewness is estimated on\n each feature independently using maximum likelihood.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data used to estimate the optimal transformation parameters.\n\n y : None\n Ignored.\n\n Returns\n -------\n self : object\n Fitted transformer.\n \"\"\"\n self._fit(X, y=y, force_transform=False)\n return self" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/fit_transform", + "name": "fit_transform", + "qname": "sklearn.preprocessing._data.PowerTransformer.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/fit_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._data.PowerTransformer.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/fit_transform/X", + "name": "X", + "qname": "sklearn.preprocessing._data.PowerTransformer.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/fit_transform/y", + "name": "y", + "qname": "sklearn.preprocessing._data.PowerTransformer.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def fit_transform(self, X, y=None):\n return self._fit(X, y, force_transform=True)" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.preprocessing._data.PowerTransformer.inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/inverse_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._data.PowerTransformer.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/inverse_transform/X", + "name": "X", + "qname": "sklearn.preprocessing._data.PowerTransformer.inverse_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The transformed data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Apply the inverse power transformation using the fitted lambdas.\n\nThe inverse of the Box-Cox transformation is given by::\n\n if lambda_ == 0:\n X = exp(X_trans)\n else:\n X = (X_trans * lambda_ + 1) ** (1 / lambda_)\n\nThe inverse of the Yeo-Johnson transformation is given by::\n\n if X >= 0 and lambda_ == 0:\n X = exp(X_trans) - 1\n elif X >= 0 and lambda_ != 0:\n X = (X_trans * lambda_ + 1) ** (1 / lambda_) - 1\n elif X < 0 and lambda_ != 2:\n X = 1 - (-(2 - lambda_) * X_trans + 1) ** (1 / (2 - lambda_))\n elif X < 0 and lambda_ == 2:\n X = 1 - exp(-X_trans)", + "docstring": "Apply the inverse power transformation using the fitted lambdas.\n\nThe inverse of the Box-Cox transformation is given by::\n\n if lambda_ == 0:\n X = exp(X_trans)\n else:\n X = (X_trans * lambda_ + 1) ** (1 / lambda_)\n\nThe inverse of the Yeo-Johnson transformation is given by::\n\n if X >= 0 and lambda_ == 0:\n X = exp(X_trans) - 1\n elif X >= 0 and lambda_ != 0:\n X = (X_trans * lambda_ + 1) ** (1 / lambda_) - 1\n elif X < 0 and lambda_ != 2:\n X = 1 - (-(2 - lambda_) * X_trans + 1) ** (1 / (2 - lambda_))\n elif X < 0 and lambda_ == 2:\n X = 1 - exp(-X_trans)\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The transformed data.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The original data.", + "code": " def inverse_transform(self, X):\n \"\"\"Apply the inverse power transformation using the fitted lambdas.\n\n The inverse of the Box-Cox transformation is given by::\n\n if lambda_ == 0:\n X = exp(X_trans)\n else:\n X = (X_trans * lambda_ + 1) ** (1 / lambda_)\n\n The inverse of the Yeo-Johnson transformation is given by::\n\n if X >= 0 and lambda_ == 0:\n X = exp(X_trans) - 1\n elif X >= 0 and lambda_ != 0:\n X = (X_trans * lambda_ + 1) ** (1 / lambda_) - 1\n elif X < 0 and lambda_ != 2:\n X = 1 - (-(2 - lambda_) * X_trans + 1) ** (1 / (2 - lambda_))\n elif X < 0 and lambda_ == 2:\n X = 1 - exp(-X_trans)\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The transformed data.\n\n Returns\n -------\n X : ndarray of shape (n_samples, n_features)\n The original data.\n \"\"\"\n check_is_fitted(self)\n X = self._check_input(X, in_fit=False, check_shape=True)\n\n if self.standardize:\n X = self._scaler.inverse_transform(X)\n\n inv_fun = {'box-cox': self._box_cox_inverse_tranform,\n 'yeo-johnson': self._yeo_johnson_inverse_transform\n }[self.method]\n for i, lmbda in enumerate(self.lambdas_):\n with np.errstate(invalid='ignore'): # hide NaN warnings\n X[:, i] = inv_fun(X[:, i], lmbda)\n\n return X" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/transform", + "name": "transform", + "qname": "sklearn.preprocessing._data.PowerTransformer.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/transform/self", + "name": "self", + "qname": "sklearn.preprocessing._data.PowerTransformer.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/PowerTransformer/transform/X", + "name": "X", + "qname": "sklearn.preprocessing._data.PowerTransformer.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to be transformed using a power transformation." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Apply the power transform to each feature using the fitted lambdas.", + "docstring": "Apply the power transform to each feature using the fitted lambdas.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data to be transformed using a power transformation.\n\nReturns\n-------\nX_trans : ndarray of shape (n_samples, n_features)\n The transformed data.", + "code": " def transform(self, X):\n \"\"\"Apply the power transform to each feature using the fitted lambdas.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data to be transformed using a power transformation.\n\n Returns\n -------\n X_trans : ndarray of shape (n_samples, n_features)\n The transformed data.\n \"\"\"\n check_is_fitted(self)\n X = self._check_input(X, in_fit=False, check_positive=True,\n check_shape=True)\n\n transform_function = {'box-cox': boxcox,\n 'yeo-johnson': self._yeo_johnson_transform\n }[self.method]\n for i, lmbda in enumerate(self.lambdas_):\n with np.errstate(invalid='ignore'): # hide NaN warnings\n X[:, i] = transform_function(X[:, i], lmbda)\n\n if self.standardize:\n X = self._scaler.transform(X)\n\n return X" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/__init__", + "name": "__init__", + "qname": "sklearn.preprocessing._data.QuantileTransformer.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/__init__/self", + "name": "self", + "qname": "sklearn.preprocessing._data.QuantileTransformer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/__init__/n_quantiles", + "name": "n_quantiles", + "qname": "sklearn.preprocessing._data.QuantileTransformer.__init__.n_quantiles", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000 or n_samples", + "description": "Number of quantiles to be computed. It corresponds to the number\nof landmarks used to discretize the cumulative distribution function.\nIf n_quantiles is larger than the number of samples, n_quantiles is set\nto the number of samples as a larger number of quantiles does not give\na better approximation of the cumulative distribution function\nestimator." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/__init__/output_distribution", + "name": "output_distribution", + "qname": "sklearn.preprocessing._data.QuantileTransformer.__init__.output_distribution", + "default_value": "'uniform'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'uniform', 'normal'}", + "default_value": "'uniform'", + "description": "Marginal distribution for the transformed data. The choices are\n'uniform' (default) or 'normal'." + }, + "type": { + "kind": "EnumType", + "values": ["uniform", "normal"] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/__init__/ignore_implicit_zeros", + "name": "ignore_implicit_zeros", + "qname": "sklearn.preprocessing._data.QuantileTransformer.__init__.ignore_implicit_zeros", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Only applies to sparse matrices. If True, the sparse entries of the\nmatrix are discarded to compute the quantile statistics. If False,\nthese entries are treated as zeros." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/__init__/subsample", + "name": "subsample", + "qname": "sklearn.preprocessing._data.QuantileTransformer.__init__.subsample", + "default_value": "int(100000.0)", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1e5", + "description": "Maximum number of samples used to estimate the quantiles for\ncomputational efficiency. Note that the subsampling procedure may\ndiffer for value-identical sparse and dense matrices." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/__init__/random_state", + "name": "random_state", + "qname": "sklearn.preprocessing._data.QuantileTransformer.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for subsampling and smoothing\nnoise.\nPlease see ``subsample`` for more details.\nPass an int for reproducible results across multiple function calls.\nSee :term:`Glossary `" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/__init__/copy", + "name": "copy", + "qname": "sklearn.preprocessing._data.QuantileTransformer.__init__.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Set to False to perform inplace transformation and avoid a copy (if the\ninput is already a numpy array)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform features using quantiles information.\n\nThis method transforms the features to follow a uniform or a normal\ndistribution. Therefore, for a given feature, this transformation tends\nto spread out the most frequent values. It also reduces the impact of\n(marginal) outliers: this is therefore a robust preprocessing scheme.\n\nThe transformation is applied on each feature independently. First an\nestimate of the cumulative distribution function of a feature is\nused to map the original values to a uniform distribution. The obtained\nvalues are then mapped to the desired output distribution using the\nassociated quantile function. Features values of new/unseen data that fall\nbelow or above the fitted range will be mapped to the bounds of the output\ndistribution. Note that this transform is non-linear. It may distort linear\ncorrelations between variables measured at the same scale but renders\nvariables measured at different scales more directly comparable.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.19", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, n_quantiles=1000, output_distribution='uniform',\n ignore_implicit_zeros=False, subsample=int(1e5),\n random_state=None, copy=True):\n self.n_quantiles = n_quantiles\n self.output_distribution = output_distribution\n self.ignore_implicit_zeros = ignore_implicit_zeros\n self.subsample = subsample\n self.random_state = random_state\n self.copy = copy" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_check_inputs", + "name": "_check_inputs", + "qname": "sklearn.preprocessing._data.QuantileTransformer._check_inputs", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_check_inputs/self", + "name": "self", + "qname": "sklearn.preprocessing._data.QuantileTransformer._check_inputs.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_check_inputs/X", + "name": "X", + "qname": "sklearn.preprocessing._data.QuantileTransformer._check_inputs.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_check_inputs/in_fit", + "name": "in_fit", + "qname": "sklearn.preprocessing._data.QuantileTransformer._check_inputs.in_fit", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_check_inputs/accept_sparse_negative", + "name": "accept_sparse_negative", + "qname": "sklearn.preprocessing._data.QuantileTransformer._check_inputs.accept_sparse_negative", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_check_inputs/copy", + "name": "copy", + "qname": "sklearn.preprocessing._data.QuantileTransformer._check_inputs.copy", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check inputs before fit and transform.", + "docstring": "Check inputs before fit and transform.", + "code": " def _check_inputs(self, X, in_fit, accept_sparse_negative=False,\n copy=False):\n \"\"\"Check inputs before fit and transform.\"\"\"\n X = self._validate_data(X, reset=in_fit,\n accept_sparse='csc', copy=copy,\n dtype=FLOAT_DTYPES,\n force_all_finite='allow-nan')\n # we only accept positive sparse matrix when ignore_implicit_zeros is\n # false and that we call fit or transform.\n with np.errstate(invalid='ignore'): # hide NaN comparison warnings\n if (not accept_sparse_negative and not self.ignore_implicit_zeros\n and (sparse.issparse(X) and np.any(X.data < 0))):\n raise ValueError('QuantileTransformer only accepts'\n ' non-negative sparse matrices.')\n\n # check the output distribution\n if self.output_distribution not in ('normal', 'uniform'):\n raise ValueError(\"'output_distribution' has to be either 'normal'\"\n \" or 'uniform'. Got '{}' instead.\".format(\n self.output_distribution))\n\n return X" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_dense_fit", + "name": "_dense_fit", + "qname": "sklearn.preprocessing._data.QuantileTransformer._dense_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_dense_fit/self", + "name": "self", + "qname": "sklearn.preprocessing._data.QuantileTransformer._dense_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_dense_fit/X", + "name": "X", + "qname": "sklearn.preprocessing._data.QuantileTransformer._dense_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "The data used to scale along the features axis." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_dense_fit/random_state", + "name": "random_state", + "qname": "sklearn.preprocessing._data.QuantileTransformer._dense_fit.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute percentiles for dense matrices.", + "docstring": "Compute percentiles for dense matrices.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n The data used to scale along the features axis.", + "code": " def _dense_fit(self, X, random_state):\n \"\"\"Compute percentiles for dense matrices.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n The data used to scale along the features axis.\n \"\"\"\n if self.ignore_implicit_zeros:\n warnings.warn(\"'ignore_implicit_zeros' takes effect only with\"\n \" sparse matrix. This parameter has no effect.\")\n\n n_samples, n_features = X.shape\n references = self.references_ * 100\n\n self.quantiles_ = []\n for col in X.T:\n if self.subsample < n_samples:\n subsample_idx = random_state.choice(n_samples,\n size=self.subsample,\n replace=False)\n col = col.take(subsample_idx, mode='clip')\n self.quantiles_.append(np.nanpercentile(col, references))\n self.quantiles_ = np.transpose(self.quantiles_)\n # Due to floating-point precision error in `np.nanpercentile`,\n # make sure that quantiles are monotonically increasing.\n # Upstream issue in numpy:\n # https://github.com/numpy/numpy/issues/14685\n self.quantiles_ = np.maximum.accumulate(self.quantiles_)" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_more_tags", + "name": "_more_tags", + "qname": "sklearn.preprocessing._data.QuantileTransformer._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_more_tags/self", + "name": "self", + "qname": "sklearn.preprocessing._data.QuantileTransformer._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'allow_nan': True}" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_sparse_fit", + "name": "_sparse_fit", + "qname": "sklearn.preprocessing._data.QuantileTransformer._sparse_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_sparse_fit/self", + "name": "self", + "qname": "sklearn.preprocessing._data.QuantileTransformer._sparse_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_sparse_fit/X", + "name": "X", + "qname": "sklearn.preprocessing._data.QuantileTransformer._sparse_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "sparse matrix of shape (n_samples, n_features)", + "default_value": "", + "description": "The data used to scale along the features axis. The sparse matrix\nneeds to be nonnegative. If a sparse matrix is provided,\nit will be converted into a sparse ``csc_matrix``." + }, + "type": { + "kind": "NamedType", + "name": "sparse matrix of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_sparse_fit/random_state", + "name": "random_state", + "qname": "sklearn.preprocessing._data.QuantileTransformer._sparse_fit.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute percentiles for sparse matrices.", + "docstring": "Compute percentiles for sparse matrices.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n The data used to scale along the features axis. The sparse matrix\n needs to be nonnegative. If a sparse matrix is provided,\n it will be converted into a sparse ``csc_matrix``.", + "code": " def _sparse_fit(self, X, random_state):\n \"\"\"Compute percentiles for sparse matrices.\n\n Parameters\n ----------\n X : sparse matrix of shape (n_samples, n_features)\n The data used to scale along the features axis. The sparse matrix\n needs to be nonnegative. If a sparse matrix is provided,\n it will be converted into a sparse ``csc_matrix``.\n \"\"\"\n n_samples, n_features = X.shape\n references = self.references_ * 100\n\n self.quantiles_ = []\n for feature_idx in range(n_features):\n column_nnz_data = X.data[X.indptr[feature_idx]:\n X.indptr[feature_idx + 1]]\n if len(column_nnz_data) > self.subsample:\n column_subsample = (self.subsample * len(column_nnz_data) //\n n_samples)\n if self.ignore_implicit_zeros:\n column_data = np.zeros(shape=column_subsample,\n dtype=X.dtype)\n else:\n column_data = np.zeros(shape=self.subsample, dtype=X.dtype)\n column_data[:column_subsample] = random_state.choice(\n column_nnz_data, size=column_subsample, replace=False)\n else:\n if self.ignore_implicit_zeros:\n column_data = np.zeros(shape=len(column_nnz_data),\n dtype=X.dtype)\n else:\n column_data = np.zeros(shape=n_samples, dtype=X.dtype)\n column_data[:len(column_nnz_data)] = column_nnz_data\n\n if not column_data.size:\n # if no nnz, an error will be raised for computing the\n # quantiles. Force the quantiles to be zeros.\n self.quantiles_.append([0] * len(references))\n else:\n self.quantiles_.append(\n np.nanpercentile(column_data, references))\n self.quantiles_ = np.transpose(self.quantiles_)\n # due to floating-point precision error in `np.nanpercentile`,\n # make sure the quantiles are monotonically increasing\n # Upstream issue in numpy:\n # https://github.com/numpy/numpy/issues/14685\n self.quantiles_ = np.maximum.accumulate(self.quantiles_)" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_transform", + "name": "_transform", + "qname": "sklearn.preprocessing._data.QuantileTransformer._transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._data.QuantileTransformer._transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_transform/X", + "name": "X", + "qname": "sklearn.preprocessing._data.QuantileTransformer._transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_features)", + "default_value": "", + "description": "The data used to scale along the features axis." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_transform/inverse", + "name": "inverse", + "qname": "sklearn.preprocessing._data.QuantileTransformer._transform.inverse", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If False, apply forward transform. If True, apply\ninverse transform." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Forward and inverse transform.", + "docstring": "Forward and inverse transform.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n The data used to scale along the features axis.\n\ninverse : bool, default=False\n If False, apply forward transform. If True, apply\n inverse transform.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n Projected data.", + "code": " def _transform(self, X, inverse=False):\n \"\"\"Forward and inverse transform.\n\n Parameters\n ----------\n X : ndarray of shape (n_samples, n_features)\n The data used to scale along the features axis.\n\n inverse : bool, default=False\n If False, apply forward transform. If True, apply\n inverse transform.\n\n Returns\n -------\n X : ndarray of shape (n_samples, n_features)\n Projected data.\n \"\"\"\n\n if sparse.issparse(X):\n for feature_idx in range(X.shape[1]):\n column_slice = slice(X.indptr[feature_idx],\n X.indptr[feature_idx + 1])\n X.data[column_slice] = self._transform_col(\n X.data[column_slice], self.quantiles_[:, feature_idx],\n inverse)\n else:\n for feature_idx in range(X.shape[1]):\n X[:, feature_idx] = self._transform_col(\n X[:, feature_idx], self.quantiles_[:, feature_idx],\n inverse)\n\n return X" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_transform_col", + "name": "_transform_col", + "qname": "sklearn.preprocessing._data.QuantileTransformer._transform_col", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_transform_col/self", + "name": "self", + "qname": "sklearn.preprocessing._data.QuantileTransformer._transform_col.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_transform_col/X_col", + "name": "X_col", + "qname": "sklearn.preprocessing._data.QuantileTransformer._transform_col.X_col", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_transform_col/quantiles", + "name": "quantiles", + "qname": "sklearn.preprocessing._data.QuantileTransformer._transform_col.quantiles", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/_transform_col/inverse", + "name": "inverse", + "qname": "sklearn.preprocessing._data.QuantileTransformer._transform_col.inverse", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Private function to transform a single feature.", + "docstring": "Private function to transform a single feature.", + "code": " def _transform_col(self, X_col, quantiles, inverse):\n \"\"\"Private function to transform a single feature.\"\"\"\n\n output_distribution = self.output_distribution\n\n if not inverse:\n lower_bound_x = quantiles[0]\n upper_bound_x = quantiles[-1]\n lower_bound_y = 0\n upper_bound_y = 1\n else:\n lower_bound_x = 0\n upper_bound_x = 1\n lower_bound_y = quantiles[0]\n upper_bound_y = quantiles[-1]\n # for inverse transform, match a uniform distribution\n with np.errstate(invalid='ignore'): # hide NaN comparison warnings\n if output_distribution == 'normal':\n X_col = stats.norm.cdf(X_col)\n # else output distribution is already a uniform distribution\n\n # find index for lower and higher bounds\n with np.errstate(invalid='ignore'): # hide NaN comparison warnings\n if output_distribution == 'normal':\n lower_bounds_idx = (X_col - BOUNDS_THRESHOLD <\n lower_bound_x)\n upper_bounds_idx = (X_col + BOUNDS_THRESHOLD >\n upper_bound_x)\n if output_distribution == 'uniform':\n lower_bounds_idx = (X_col == lower_bound_x)\n upper_bounds_idx = (X_col == upper_bound_x)\n\n isfinite_mask = ~np.isnan(X_col)\n X_col_finite = X_col[isfinite_mask]\n if not inverse:\n # Interpolate in one direction and in the other and take the\n # mean. This is in case of repeated values in the features\n # and hence repeated quantiles\n #\n # If we don't do this, only one extreme of the duplicated is\n # used (the upper when we do ascending, and the\n # lower for descending). We take the mean of these two\n X_col[isfinite_mask] = .5 * (\n np.interp(X_col_finite, quantiles, self.references_)\n - np.interp(-X_col_finite, -quantiles[::-1],\n -self.references_[::-1]))\n else:\n X_col[isfinite_mask] = np.interp(X_col_finite,\n self.references_, quantiles)\n\n X_col[upper_bounds_idx] = upper_bound_y\n X_col[lower_bounds_idx] = lower_bound_y\n # for forward transform, match the output distribution\n if not inverse:\n with np.errstate(invalid='ignore'): # hide NaN comparison warnings\n if output_distribution == 'normal':\n X_col = stats.norm.ppf(X_col)\n # find the value to clip the data to avoid mapping to\n # infinity. Clip such that the inverse transform will be\n # consistent\n clip_min = stats.norm.ppf(BOUNDS_THRESHOLD - np.spacing(1))\n clip_max = stats.norm.ppf(1 - (BOUNDS_THRESHOLD -\n np.spacing(1)))\n X_col = np.clip(X_col, clip_min, clip_max)\n # else output distribution is uniform and the ppf is the\n # identity function so we let X_col unchanged\n\n return X_col" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/fit", + "name": "fit", + "qname": "sklearn.preprocessing._data.QuantileTransformer.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/fit/self", + "name": "self", + "qname": "sklearn.preprocessing._data.QuantileTransformer.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/fit/X", + "name": "X", + "qname": "sklearn.preprocessing._data.QuantileTransformer.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data used to scale along the features axis. If a sparse\nmatrix is provided, it will be converted into a sparse\n``csc_matrix``. Additionally, the sparse matrix needs to be\nnonnegative if `ignore_implicit_zeros` is False." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/fit/y", + "name": "y", + "qname": "sklearn.preprocessing._data.QuantileTransformer.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None", + "default_value": "", + "description": "Ignored." + }, + "type": { + "kind": "NamedType", + "name": "None" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the quantiles used for transforming.", + "docstring": "Compute the quantiles used for transforming.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the features axis. If a sparse\n matrix is provided, it will be converted into a sparse\n ``csc_matrix``. Additionally, the sparse matrix needs to be\n nonnegative if `ignore_implicit_zeros` is False.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted transformer.", + "code": " def fit(self, X, y=None):\n \"\"\"Compute the quantiles used for transforming.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the features axis. If a sparse\n matrix is provided, it will be converted into a sparse\n ``csc_matrix``. Additionally, the sparse matrix needs to be\n nonnegative if `ignore_implicit_zeros` is False.\n\n y : None\n Ignored.\n\n Returns\n -------\n self : object\n Fitted transformer.\n \"\"\"\n if self.n_quantiles <= 0:\n raise ValueError(\"Invalid value for 'n_quantiles': %d. \"\n \"The number of quantiles must be at least one.\"\n % self.n_quantiles)\n\n if self.subsample <= 0:\n raise ValueError(\"Invalid value for 'subsample': %d. \"\n \"The number of subsamples must be at least one.\"\n % self.subsample)\n\n if self.n_quantiles > self.subsample:\n raise ValueError(\"The number of quantiles cannot be greater than\"\n \" the number of samples used. Got {} quantiles\"\n \" and {} samples.\".format(self.n_quantiles,\n self.subsample))\n\n X = self._check_inputs(X, in_fit=True, copy=False)\n n_samples = X.shape[0]\n\n if self.n_quantiles > n_samples:\n warnings.warn(\"n_quantiles (%s) is greater than the total number \"\n \"of samples (%s). n_quantiles is set to \"\n \"n_samples.\"\n % (self.n_quantiles, n_samples))\n self.n_quantiles_ = max(1, min(self.n_quantiles, n_samples))\n\n rng = check_random_state(self.random_state)\n\n # Create the quantiles of reference\n self.references_ = np.linspace(0, 1, self.n_quantiles_,\n endpoint=True)\n if sparse.issparse(X):\n self._sparse_fit(X, rng)\n else:\n self._dense_fit(X, rng)\n\n return self" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.preprocessing._data.QuantileTransformer.inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/inverse_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._data.QuantileTransformer.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/inverse_transform/X", + "name": "X", + "qname": "sklearn.preprocessing._data.QuantileTransformer.inverse_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data used to scale along the features axis. If a sparse\nmatrix is provided, it will be converted into a sparse\n``csc_matrix``. Additionally, the sparse matrix needs to be\nnonnegative if `ignore_implicit_zeros` is False." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Back-projection to the original space.", + "docstring": "Back-projection to the original space.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the features axis. If a sparse\n matrix is provided, it will be converted into a sparse\n ``csc_matrix``. Additionally, the sparse matrix needs to be\n nonnegative if `ignore_implicit_zeros` is False.\n\nReturns\n-------\nXt : {ndarray, sparse matrix} of (n_samples, n_features)\n The projected data.", + "code": " def inverse_transform(self, X):\n \"\"\"Back-projection to the original space.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the features axis. If a sparse\n matrix is provided, it will be converted into a sparse\n ``csc_matrix``. Additionally, the sparse matrix needs to be\n nonnegative if `ignore_implicit_zeros` is False.\n\n Returns\n -------\n Xt : {ndarray, sparse matrix} of (n_samples, n_features)\n The projected data.\n \"\"\"\n check_is_fitted(self)\n X = self._check_inputs(X, in_fit=False, accept_sparse_negative=True,\n copy=self.copy)\n\n return self._transform(X, inverse=True)" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/transform", + "name": "transform", + "qname": "sklearn.preprocessing._data.QuantileTransformer.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/transform/self", + "name": "self", + "qname": "sklearn.preprocessing._data.QuantileTransformer.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/QuantileTransformer/transform/X", + "name": "X", + "qname": "sklearn.preprocessing._data.QuantileTransformer.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data used to scale along the features axis. If a sparse\nmatrix is provided, it will be converted into a sparse\n``csc_matrix``. Additionally, the sparse matrix needs to be\nnonnegative if `ignore_implicit_zeros` is False." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Feature-wise transformation of the data.", + "docstring": "Feature-wise transformation of the data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the features axis. If a sparse\n matrix is provided, it will be converted into a sparse\n ``csc_matrix``. Additionally, the sparse matrix needs to be\n nonnegative if `ignore_implicit_zeros` is False.\n\nReturns\n-------\nXt : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The projected data.", + "code": " def transform(self, X):\n \"\"\"Feature-wise transformation of the data.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the features axis. If a sparse\n matrix is provided, it will be converted into a sparse\n ``csc_matrix``. Additionally, the sparse matrix needs to be\n nonnegative if `ignore_implicit_zeros` is False.\n\n Returns\n -------\n Xt : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The projected data.\n \"\"\"\n check_is_fitted(self)\n X = self._check_inputs(X, in_fit=False, copy=self.copy)\n\n return self._transform(X, inverse=False)" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/RobustScaler/__init__", + "name": "__init__", + "qname": "sklearn.preprocessing._data.RobustScaler.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/RobustScaler/__init__/self", + "name": "self", + "qname": "sklearn.preprocessing._data.RobustScaler.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/RobustScaler/__init__/with_centering", + "name": "with_centering", + "qname": "sklearn.preprocessing._data.RobustScaler.__init__.with_centering", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, center the data before scaling.\nThis will cause ``transform`` to raise an exception when attempted on\nsparse matrices, because centering them entails building a dense\nmatrix which in common use cases is likely to be too large to fit in\nmemory." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/RobustScaler/__init__/with_scaling", + "name": "with_scaling", + "qname": "sklearn.preprocessing._data.RobustScaler.__init__.with_scaling", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, scale the data to interquartile range." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/RobustScaler/__init__/quantile_range", + "name": "quantile_range", + "qname": "sklearn.preprocessing._data.RobustScaler.__init__.quantile_range", + "default_value": "(25.0, 75.0)", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "tuple (q_min, q_max), 0.0 < q_min < q_max < 100.0", + "default_value": "(25.0, 75.0), == (1st quantile, 3rd quantile), == IQR", + "description": "Quantile range used to calculate ``scale_``.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "tuple (q_min, q_max)" + }, + { + "kind": "NamedType", + "name": "0.0 < q_min < q_max < 100.0" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/RobustScaler/__init__/copy", + "name": "copy", + "qname": "sklearn.preprocessing._data.RobustScaler.__init__.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If False, try to avoid a copy and do inplace scaling instead.\nThis is not guaranteed to always work inplace; e.g. if the data is\nnot a NumPy array or scipy.sparse CSR matrix, a copy may still be\nreturned." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/RobustScaler/__init__/unit_variance", + "name": "unit_variance", + "qname": "sklearn.preprocessing._data.RobustScaler.__init__.unit_variance", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, scale data so that normally distributed features have a\nvariance of 1. In general, if the difference between the x-values of\n``q_max`` and ``q_min`` for a standard normal distribution is greater\nthan 1, the dataset will be scaled down. If less than 1, the dataset\nwill be scaled up.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Scale features using statistics that are robust to outliers.\n\nThis Scaler removes the median and scales the data according to\nthe quantile range (defaults to IQR: Interquartile Range).\nThe IQR is the range between the 1st quartile (25th quantile)\nand the 3rd quartile (75th quantile).\n\nCentering and scaling happen independently on each feature by\ncomputing the relevant statistics on the samples in the training\nset. Median and interquartile range are then stored to be used on\nlater data using the ``transform`` method.\n\nStandardization of a dataset is a common requirement for many\nmachine learning estimators. Typically this is done by removing the mean\nand scaling to unit variance. However, outliers can often influence the\nsample mean / variance in a negative way. In such cases, the median and\nthe interquartile range often give better results.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, with_centering=True, with_scaling=True,\n quantile_range=(25.0, 75.0), copy=True, unit_variance=False):\n self.with_centering = with_centering\n self.with_scaling = with_scaling\n self.quantile_range = quantile_range\n self.unit_variance = unit_variance\n self.copy = copy" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/RobustScaler/_more_tags", + "name": "_more_tags", + "qname": "sklearn.preprocessing._data.RobustScaler._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/RobustScaler/_more_tags/self", + "name": "self", + "qname": "sklearn.preprocessing._data.RobustScaler._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'allow_nan': True}" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/RobustScaler/fit", + "name": "fit", + "qname": "sklearn.preprocessing._data.RobustScaler.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/RobustScaler/fit/self", + "name": "self", + "qname": "sklearn.preprocessing._data.RobustScaler.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/RobustScaler/fit/X", + "name": "X", + "qname": "sklearn.preprocessing._data.RobustScaler.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data used to compute the median and quantiles\nused for later scaling along the features axis." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/RobustScaler/fit/y", + "name": "y", + "qname": "sklearn.preprocessing._data.RobustScaler.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None", + "default_value": "", + "description": "Ignored." + }, + "type": { + "kind": "NamedType", + "name": "None" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the median and quantiles to be used for scaling.", + "docstring": "Compute the median and quantiles to be used for scaling.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the median and quantiles\n used for later scaling along the features axis.\n\ny : None\n Ignored.\n\nReturns\n-------\nself : object\n Fitted scaler.", + "code": " def fit(self, X, y=None):\n \"\"\"Compute the median and quantiles to be used for scaling.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the median and quantiles\n used for later scaling along the features axis.\n\n y : None\n Ignored.\n\n Returns\n -------\n self : object\n Fitted scaler.\n \"\"\"\n # at fit, convert sparse matrices to csc for optimized computation of\n # the quantiles\n X = self._validate_data(X, accept_sparse='csc', estimator=self,\n dtype=FLOAT_DTYPES,\n force_all_finite='allow-nan')\n\n q_min, q_max = self.quantile_range\n if not 0 <= q_min <= q_max <= 100:\n raise ValueError(\"Invalid quantile range: %s\" %\n str(self.quantile_range))\n\n if self.with_centering:\n if sparse.issparse(X):\n raise ValueError(\n \"Cannot center sparse matrices: use `with_centering=False`\"\n \" instead. See docstring for motivation and alternatives.\")\n self.center_ = np.nanmedian(X, axis=0)\n else:\n self.center_ = None\n\n if self.with_scaling:\n quantiles = []\n for feature_idx in range(X.shape[1]):\n if sparse.issparse(X):\n column_nnz_data = X.data[X.indptr[feature_idx]:\n X.indptr[feature_idx + 1]]\n column_data = np.zeros(shape=X.shape[0], dtype=X.dtype)\n column_data[:len(column_nnz_data)] = column_nnz_data\n else:\n column_data = X[:, feature_idx]\n\n quantiles.append(np.nanpercentile(column_data,\n self.quantile_range))\n\n quantiles = np.transpose(quantiles)\n\n self.scale_ = quantiles[1] - quantiles[0]\n self.scale_ = _handle_zeros_in_scale(self.scale_, copy=False)\n if self.unit_variance:\n adjust = (stats.norm.ppf(q_max / 100.0) -\n stats.norm.ppf(q_min / 100.0))\n self.scale_ = self.scale_ / adjust\n else:\n self.scale_ = None\n\n return self" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/RobustScaler/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.preprocessing._data.RobustScaler.inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/RobustScaler/inverse_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._data.RobustScaler.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/RobustScaler/inverse_transform/X", + "name": "X", + "qname": "sklearn.preprocessing._data.RobustScaler.inverse_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The rescaled data to be transformed back." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Scale back the data to the original representation", + "docstring": "Scale back the data to the original representation\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The rescaled data to be transformed back.\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array.", + "code": " def inverse_transform(self, X):\n \"\"\"Scale back the data to the original representation\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The rescaled data to be transformed back.\n\n Returns\n -------\n X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,\n estimator=self, dtype=FLOAT_DTYPES,\n force_all_finite='allow-nan')\n\n if sparse.issparse(X):\n if self.with_scaling:\n inplace_column_scale(X, self.scale_)\n else:\n if self.with_scaling:\n X *= self.scale_\n if self.with_centering:\n X += self.center_\n return X" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/RobustScaler/transform", + "name": "transform", + "qname": "sklearn.preprocessing._data.RobustScaler.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/RobustScaler/transform/self", + "name": "self", + "qname": "sklearn.preprocessing._data.RobustScaler.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/RobustScaler/transform/X", + "name": "X", + "qname": "sklearn.preprocessing._data.RobustScaler.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data used to scale along the specified axis." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Center and scale the data.", + "docstring": "Center and scale the data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the specified axis.\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array.", + "code": " def transform(self, X):\n \"\"\"Center and scale the data.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the specified axis.\n\n Returns\n -------\n X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array.\n \"\"\"\n check_is_fitted(self)\n X = self._validate_data(X, accept_sparse=('csr', 'csc'),\n copy=self.copy, estimator=self,\n dtype=FLOAT_DTYPES, reset=False,\n force_all_finite='allow-nan')\n\n if sparse.issparse(X):\n if self.with_scaling:\n inplace_column_scale(X, 1.0 / self.scale_)\n else:\n if self.with_centering:\n X -= self.center_\n if self.with_scaling:\n X /= self.scale_\n return X" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/__init__", + "name": "__init__", + "qname": "sklearn.preprocessing._data.StandardScaler.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/__init__/self", + "name": "self", + "qname": "sklearn.preprocessing._data.StandardScaler.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/__init__/copy", + "name": "copy", + "qname": "sklearn.preprocessing._data.StandardScaler.__init__.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If False, try to avoid a copy and do inplace scaling instead.\nThis is not guaranteed to always work inplace; e.g. if the data is\nnot a NumPy array or scipy.sparse CSR matrix, a copy may still be\nreturned." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/__init__/with_mean", + "name": "with_mean", + "qname": "sklearn.preprocessing._data.StandardScaler.__init__.with_mean", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, center the data before scaling.\nThis does not work (and will raise an exception) when attempted on\nsparse matrices, because centering them entails building a dense\nmatrix which in common use cases is likely to be too large to fit in\nmemory." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/__init__/with_std", + "name": "with_std", + "qname": "sklearn.preprocessing._data.StandardScaler.__init__.with_std", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, scale the data to unit variance (or equivalently,\nunit standard deviation)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Standardize features by removing the mean and scaling to unit variance\n\nThe standard score of a sample `x` is calculated as:\n\n z = (x - u) / s\n\nwhere `u` is the mean of the training samples or zero if `with_mean=False`,\nand `s` is the standard deviation of the training samples or one if\n`with_std=False`.\n\nCentering and scaling happen independently on each feature by computing\nthe relevant statistics on the samples in the training set. Mean and\nstandard deviation are then stored to be used on later data using\n:meth:`transform`.\n\nStandardization of a dataset is a common requirement for many\nmachine learning estimators: they might behave badly if the\nindividual features do not more or less look like standard normally\ndistributed data (e.g. Gaussian with 0 mean and unit variance).\n\nFor instance many elements used in the objective function of\na learning algorithm (such as the RBF kernel of Support Vector\nMachines or the L1 and L2 regularizers of linear models) assume that\nall features are centered around 0 and have variance in the same\norder. If a feature has a variance that is orders of magnitude larger\nthat others, it might dominate the objective function and make the\nestimator unable to learn from other features correctly as expected.\n\nThis scaler can also be applied to sparse CSR or CSC matrices by passing\n`with_mean=False` to avoid breaking the sparsity structure of the data.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, copy=True, with_mean=True, with_std=True):\n self.with_mean = with_mean\n self.with_std = with_std\n self.copy = copy" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/_more_tags", + "name": "_more_tags", + "qname": "sklearn.preprocessing._data.StandardScaler._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/_more_tags/self", + "name": "self", + "qname": "sklearn.preprocessing._data.StandardScaler._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'allow_nan': True,\n 'preserves_dtype': [np.float64, np.float32]}" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/_reset", + "name": "_reset", + "qname": "sklearn.preprocessing._data.StandardScaler._reset", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/_reset/self", + "name": "self", + "qname": "sklearn.preprocessing._data.StandardScaler._reset.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Reset internal data-dependent state of the scaler, if necessary.\n\n__init__ parameters are not touched.", + "docstring": "Reset internal data-dependent state of the scaler, if necessary.\n\n__init__ parameters are not touched.", + "code": " def _reset(self):\n \"\"\"Reset internal data-dependent state of the scaler, if necessary.\n\n __init__ parameters are not touched.\n \"\"\"\n\n # Checking one attribute is enough, becase they are all set together\n # in partial_fit\n if hasattr(self, 'scale_'):\n del self.scale_\n del self.n_samples_seen_\n del self.mean_\n del self.var_" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/fit", + "name": "fit", + "qname": "sklearn.preprocessing._data.StandardScaler.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/fit/self", + "name": "self", + "qname": "sklearn.preprocessing._data.StandardScaler.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/fit/X", + "name": "X", + "qname": "sklearn.preprocessing._data.StandardScaler.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data used to compute the mean and standard deviation\nused for later scaling along the features axis." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/fit/y", + "name": "y", + "qname": "sklearn.preprocessing._data.StandardScaler.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None", + "default_value": "", + "description": "Ignored." + }, + "type": { + "kind": "NamedType", + "name": "None" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.preprocessing._data.StandardScaler.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Individual weights for each sample.\n\n.. versionadded:: 0.24\n parameter *sample_weight* support to StandardScaler." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the mean and std to be used for later scaling.", + "docstring": "Compute the mean and std to be used for later scaling.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the mean and standard deviation\n used for later scaling along the features axis.\n\ny : None\n Ignored.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Individual weights for each sample.\n\n .. versionadded:: 0.24\n parameter *sample_weight* support to StandardScaler.\n\nReturns\n-------\nself : object\n Fitted scaler.", + "code": " def fit(self, X, y=None, sample_weight=None):\n \"\"\"Compute the mean and std to be used for later scaling.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the mean and standard deviation\n used for later scaling along the features axis.\n\n y : None\n Ignored.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Individual weights for each sample.\n\n .. versionadded:: 0.24\n parameter *sample_weight* support to StandardScaler.\n\n Returns\n -------\n self : object\n Fitted scaler.\n \"\"\"\n\n # Reset internal state before fitting\n self._reset()\n return self.partial_fit(X, y, sample_weight)" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.preprocessing._data.StandardScaler.inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/inverse_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._data.StandardScaler.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/inverse_transform/X", + "name": "X", + "qname": "sklearn.preprocessing._data.StandardScaler.inverse_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data used to scale along the features axis." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/inverse_transform/copy", + "name": "copy", + "qname": "sklearn.preprocessing._data.StandardScaler.inverse_transform.copy", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "None", + "description": "Copy the input X or not." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Scale back the data to the original representation", + "docstring": "Scale back the data to the original representation\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the features axis.\ncopy : bool, default=None\n Copy the input X or not.\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array.", + "code": " def inverse_transform(self, X, copy=None):\n \"\"\"Scale back the data to the original representation\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to scale along the features axis.\n copy : bool, default=None\n Copy the input X or not.\n\n Returns\n -------\n X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array.\n \"\"\"\n check_is_fitted(self)\n\n copy = copy if copy is not None else self.copy\n if sparse.issparse(X):\n if self.with_mean:\n raise ValueError(\n \"Cannot uncenter sparse matrices: pass `with_mean=False` \"\n \"instead See docstring for motivation and alternatives.\")\n if not sparse.isspmatrix_csr(X):\n X = X.tocsr()\n copy = False\n if copy:\n X = X.copy()\n if self.scale_ is not None:\n inplace_column_scale(X, self.scale_)\n else:\n X = np.asarray(X)\n if copy:\n X = X.copy()\n if self.with_std:\n X *= self.scale_\n if self.with_mean:\n X += self.mean_\n return X" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/partial_fit", + "name": "partial_fit", + "qname": "sklearn.preprocessing._data.StandardScaler.partial_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/partial_fit/self", + "name": "self", + "qname": "sklearn.preprocessing._data.StandardScaler.partial_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/partial_fit/X", + "name": "X", + "qname": "sklearn.preprocessing._data.StandardScaler.partial_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data used to compute the mean and standard deviation\nused for later scaling along the features axis." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/partial_fit/y", + "name": "y", + "qname": "sklearn.preprocessing._data.StandardScaler.partial_fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None", + "default_value": "", + "description": "Ignored." + }, + "type": { + "kind": "NamedType", + "name": "None" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/partial_fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.preprocessing._data.StandardScaler.partial_fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Individual weights for each sample.\n\n.. versionadded:: 0.24\n parameter *sample_weight* support to StandardScaler." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Online computation of mean and std on X for later scaling.\n\nAll of X is processed as a single batch. This is intended for cases\nwhen :meth:`fit` is not feasible due to very large number of\n`n_samples` or because X is read from a continuous stream.\n\nThe algorithm for incremental mean and std is given in Equation 1.5a,b\nin Chan, Tony F., Gene H. Golub, and Randall J. LeVeque. \"Algorithms\nfor computing the sample variance: Analysis and recommendations.\"\nThe American Statistician 37.3 (1983): 242-247:", + "docstring": "Online computation of mean and std on X for later scaling.\n\nAll of X is processed as a single batch. This is intended for cases\nwhen :meth:`fit` is not feasible due to very large number of\n`n_samples` or because X is read from a continuous stream.\n\nThe algorithm for incremental mean and std is given in Equation 1.5a,b\nin Chan, Tony F., Gene H. Golub, and Randall J. LeVeque. \"Algorithms\nfor computing the sample variance: Analysis and recommendations.\"\nThe American Statistician 37.3 (1983): 242-247:\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the mean and standard deviation\n used for later scaling along the features axis.\n\ny : None\n Ignored.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Individual weights for each sample.\n\n .. versionadded:: 0.24\n parameter *sample_weight* support to StandardScaler.\n\nReturns\n-------\nself : object\n Fitted scaler.", + "code": " def partial_fit(self, X, y=None, sample_weight=None):\n \"\"\"\n Online computation of mean and std on X for later scaling.\n\n All of X is processed as a single batch. This is intended for cases\n when :meth:`fit` is not feasible due to very large number of\n `n_samples` or because X is read from a continuous stream.\n\n The algorithm for incremental mean and std is given in Equation 1.5a,b\n in Chan, Tony F., Gene H. Golub, and Randall J. LeVeque. \"Algorithms\n for computing the sample variance: Analysis and recommendations.\"\n The American Statistician 37.3 (1983): 242-247:\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data used to compute the mean and standard deviation\n used for later scaling along the features axis.\n\n y : None\n Ignored.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Individual weights for each sample.\n\n .. versionadded:: 0.24\n parameter *sample_weight* support to StandardScaler.\n\n Returns\n -------\n self : object\n Fitted scaler.\n \"\"\"\n first_call = not hasattr(self, \"n_samples_seen_\")\n X = self._validate_data(X, accept_sparse=('csr', 'csc'),\n estimator=self, dtype=FLOAT_DTYPES,\n force_all_finite='allow-nan', reset=first_call)\n n_features = X.shape[1]\n\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X,\n dtype=X.dtype)\n\n # Even in the case of `with_mean=False`, we update the mean anyway\n # This is needed for the incremental computation of the var\n # See incr_mean_variance_axis and _incremental_mean_variance_axis\n\n # if n_samples_seen_ is an integer (i.e. no missing values), we need to\n # transform it to a NumPy array of shape (n_features,) required by\n # incr_mean_variance_axis and _incremental_variance_axis\n dtype = np.int64 if sample_weight is None else X.dtype\n if not hasattr(self, 'n_samples_seen_'):\n self.n_samples_seen_ = np.zeros(n_features, dtype=dtype)\n elif np.size(self.n_samples_seen_) == 1:\n self.n_samples_seen_ = np.repeat(\n self.n_samples_seen_, X.shape[1])\n self.n_samples_seen_ = \\\n self.n_samples_seen_.astype(dtype, copy=False)\n\n if sparse.issparse(X):\n if self.with_mean:\n raise ValueError(\n \"Cannot center sparse matrices: pass `with_mean=False` \"\n \"instead. See docstring for motivation and alternatives.\")\n sparse_constructor = (sparse.csr_matrix\n if X.format == 'csr' else sparse.csc_matrix)\n\n if self.with_std:\n # First pass\n if not hasattr(self, 'scale_'):\n self.mean_, self.var_, self.n_samples_seen_ = \\\n mean_variance_axis(X, axis=0, weights=sample_weight,\n return_sum_weights=True)\n # Next passes\n else:\n self.mean_, self.var_, self.n_samples_seen_ = \\\n incr_mean_variance_axis(X, axis=0,\n last_mean=self.mean_,\n last_var=self.var_,\n last_n=self.n_samples_seen_,\n weights=sample_weight)\n # We force the mean and variance to float64 for large arrays\n # See https://github.com/scikit-learn/scikit-learn/pull/12338\n self.mean_ = self.mean_.astype(np.float64, copy=False)\n self.var_ = self.var_.astype(np.float64, copy=False)\n else:\n self.mean_ = None # as with_mean must be False for sparse\n self.var_ = None\n weights = _check_sample_weight(sample_weight, X)\n sum_weights_nan = weights @ sparse_constructor(\n (np.isnan(X.data), X.indices, X.indptr),\n shape=X.shape)\n self.n_samples_seen_ += (\n (np.sum(weights) - sum_weights_nan).astype(dtype)\n )\n else:\n # First pass\n if not hasattr(self, 'scale_'):\n self.mean_ = .0\n if self.with_std:\n self.var_ = .0\n else:\n self.var_ = None\n\n if not self.with_mean and not self.with_std:\n self.mean_ = None\n self.var_ = None\n self.n_samples_seen_ += X.shape[0] - np.isnan(X).sum(axis=0)\n\n elif sample_weight is not None:\n self.mean_, self.var_, self.n_samples_seen_ = \\\n _incremental_weighted_mean_and_var(X, sample_weight,\n self.mean_,\n self.var_,\n self.n_samples_seen_)\n else:\n self.mean_, self.var_, self.n_samples_seen_ = \\\n _incremental_mean_and_var(X, self.mean_, self.var_,\n self.n_samples_seen_)\n\n # for backward-compatibility, reduce n_samples_seen_ to an integer\n # if the number of samples is the same for each feature (i.e. no\n # missing values)\n if np.ptp(self.n_samples_seen_) == 0:\n self.n_samples_seen_ = self.n_samples_seen_[0]\n\n if self.with_std:\n self.scale_ = _handle_zeros_in_scale(np.sqrt(self.var_))\n else:\n self.scale_ = None\n\n return self" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/transform", + "name": "transform", + "qname": "sklearn.preprocessing._data.StandardScaler.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/transform/self", + "name": "self", + "qname": "sklearn.preprocessing._data.StandardScaler.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/transform/X", + "name": "X", + "qname": "sklearn.preprocessing._data.StandardScaler.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix of shape (n_samples, n_features)", + "default_value": "", + "description": "The data used to scale along the features axis." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "{array-like" + }, + { + "kind": "NamedType", + "name": "sparse matrix of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/StandardScaler/transform/copy", + "name": "copy", + "qname": "sklearn.preprocessing._data.StandardScaler.transform.copy", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "None", + "description": "Copy the input X or not." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform standardization by centering and scaling", + "docstring": "Perform standardization by centering and scaling\n\nParameters\n----------\nX : {array-like, sparse matrix of shape (n_samples, n_features)\n The data used to scale along the features axis.\ncopy : bool, default=None\n Copy the input X or not.\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array.", + "code": " def transform(self, X, copy=None):\n \"\"\"Perform standardization by centering and scaling\n\n Parameters\n ----------\n X : {array-like, sparse matrix of shape (n_samples, n_features)\n The data used to scale along the features axis.\n copy : bool, default=None\n Copy the input X or not.\n\n Returns\n -------\n X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Transformed array.\n \"\"\"\n check_is_fitted(self)\n\n copy = copy if copy is not None else self.copy\n X = self._validate_data(X, reset=False,\n accept_sparse='csr', copy=copy,\n estimator=self, dtype=FLOAT_DTYPES,\n force_all_finite='allow-nan')\n\n if sparse.issparse(X):\n if self.with_mean:\n raise ValueError(\n \"Cannot center sparse matrices: pass `with_mean=False` \"\n \"instead. See docstring for motivation and alternatives.\")\n if self.scale_ is not None:\n inplace_column_scale(X, 1 / self.scale_)\n else:\n if self.with_mean:\n X -= self.mean_\n if self.with_std:\n X /= self.scale_\n return X" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/_handle_zeros_in_scale", + "name": "_handle_zeros_in_scale", + "qname": "sklearn.preprocessing._data._handle_zeros_in_scale", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/_handle_zeros_in_scale/scale", + "name": "scale", + "qname": "sklearn.preprocessing._data._handle_zeros_in_scale.scale", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/_handle_zeros_in_scale/copy", + "name": "copy", + "qname": "sklearn.preprocessing._data._handle_zeros_in_scale.copy", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Makes sure that whenever scale is zero, we handle it correctly.\n\nThis happens in most scalers when we have constant features.", + "docstring": "Makes sure that whenever scale is zero, we handle it correctly.\n\nThis happens in most scalers when we have constant features.", + "code": "def _handle_zeros_in_scale(scale, copy=True):\n \"\"\"Makes sure that whenever scale is zero, we handle it correctly.\n\n This happens in most scalers when we have constant features.\n \"\"\"\n\n # if we are fitting on 1D arrays, scale might be a scalar\n if np.isscalar(scale):\n if scale == .0:\n scale = 1.\n return scale\n elif isinstance(scale, np.ndarray):\n if copy:\n # New array to avoid side-effects\n scale = scale.copy()\n scale[scale == 0.0] = 1.0\n return scale" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/add_dummy_feature", + "name": "add_dummy_feature", + "qname": "sklearn.preprocessing._data.add_dummy_feature", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/add_dummy_feature/X", + "name": "X", + "qname": "sklearn.preprocessing._data.add_dummy_feature.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/add_dummy_feature/value", + "name": "value", + "qname": "sklearn.preprocessing._data.add_dummy_feature.value", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Value to use for the dummy feature." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Augment dataset with an additional dummy feature.\n\nThis is useful for fitting an intercept term with implementations which\ncannot otherwise fit it directly.", + "docstring": "Augment dataset with an additional dummy feature.\n\nThis is useful for fitting an intercept term with implementations which\ncannot otherwise fit it directly.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\nvalue : float\n Value to use for the dummy feature.\n\nReturns\n-------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features + 1)\n Same data with dummy feature added as first column.\n\nExamples\n--------\n>>> from sklearn.preprocessing import add_dummy_feature\n>>> add_dummy_feature([[0, 1], [1, 0]])\narray([[1., 0., 1.],\n [1., 1., 0.]])", + "code": "def add_dummy_feature(X, value=1.0):\n \"\"\"Augment dataset with an additional dummy feature.\n\n This is useful for fitting an intercept term with implementations which\n cannot otherwise fit it directly.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\n value : float\n Value to use for the dummy feature.\n\n Returns\n -------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features + 1)\n Same data with dummy feature added as first column.\n\n Examples\n --------\n >>> from sklearn.preprocessing import add_dummy_feature\n >>> add_dummy_feature([[0, 1], [1, 0]])\n array([[1., 0., 1.],\n [1., 1., 0.]])\n \"\"\"\n X = check_array(X, accept_sparse=['csc', 'csr', 'coo'], dtype=FLOAT_DTYPES)\n n_samples, n_features = X.shape\n shape = (n_samples, n_features + 1)\n if sparse.issparse(X):\n if sparse.isspmatrix_coo(X):\n # Shift columns to the right.\n col = X.col + 1\n # Column indices of dummy feature are 0 everywhere.\n col = np.concatenate((np.zeros(n_samples), col))\n # Row indices of dummy feature are 0, ..., n_samples-1.\n row = np.concatenate((np.arange(n_samples), X.row))\n # Prepend the dummy feature n_samples times.\n data = np.concatenate((np.full(n_samples, value), X.data))\n return sparse.coo_matrix((data, (row, col)), shape)\n elif sparse.isspmatrix_csc(X):\n # Shift index pointers since we need to add n_samples elements.\n indptr = X.indptr + n_samples\n # indptr[0] must be 0.\n indptr = np.concatenate((np.array([0]), indptr))\n # Row indices of dummy feature are 0, ..., n_samples-1.\n indices = np.concatenate((np.arange(n_samples), X.indices))\n # Prepend the dummy feature n_samples times.\n data = np.concatenate((np.full(n_samples, value), X.data))\n return sparse.csc_matrix((data, indices, indptr), shape)\n else:\n klass = X.__class__\n return klass(add_dummy_feature(X.tocoo(), value))\n else:\n return np.hstack((np.full((n_samples, 1), value), X))" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/binarize", + "name": "binarize", + "qname": "sklearn.preprocessing._data.binarize", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/binarize/X", + "name": "X", + "qname": "sklearn.preprocessing._data.binarize.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to binarize, element by element.\nscipy.sparse matrices should be in CSR or CSC format to avoid an\nun-necessary copy." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/binarize/threshold", + "name": "threshold", + "qname": "sklearn.preprocessing._data.binarize.threshold", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "Feature values below or equal to this are replaced by 0, above it by 1.\nThreshold may not be less than 0 for operations on sparse matrices." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/binarize/copy", + "name": "copy", + "qname": "sklearn.preprocessing._data.binarize.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "set to False to perform inplace binarization and avoid a copy\n(if the input is already a numpy array or a scipy.sparse CSR / CSC\nmatrix and if axis is 1)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Boolean thresholding of array-like or scipy.sparse matrix.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Boolean thresholding of array-like or scipy.sparse matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to binarize, element by element.\n scipy.sparse matrices should be in CSR or CSC format to avoid an\n un-necessary copy.\n\nthreshold : float, default=0.0\n Feature values below or equal to this are replaced by 0, above it by 1.\n Threshold may not be less than 0 for operations on sparse matrices.\n\ncopy : bool, default=True\n set to False to perform inplace binarization and avoid a copy\n (if the input is already a numpy array or a scipy.sparse CSR / CSC\n matrix and if axis is 1).\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The transformed data.\n\nSee Also\n--------\nBinarizer : Performs binarization using the Transformer API\n (e.g. as part of a preprocessing :class:`~sklearn.pipeline.Pipeline`).", + "code": "@_deprecate_positional_args\ndef binarize(X, *, threshold=0.0, copy=True):\n \"\"\"Boolean thresholding of array-like or scipy.sparse matrix.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to binarize, element by element.\n scipy.sparse matrices should be in CSR or CSC format to avoid an\n un-necessary copy.\n\n threshold : float, default=0.0\n Feature values below or equal to this are replaced by 0, above it by 1.\n Threshold may not be less than 0 for operations on sparse matrices.\n\n copy : bool, default=True\n set to False to perform inplace binarization and avoid a copy\n (if the input is already a numpy array or a scipy.sparse CSR / CSC\n matrix and if axis is 1).\n\n Returns\n -------\n X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The transformed data.\n\n See Also\n --------\n Binarizer : Performs binarization using the Transformer API\n (e.g. as part of a preprocessing :class:`~sklearn.pipeline.Pipeline`).\n \"\"\"\n X = check_array(X, accept_sparse=['csr', 'csc'], copy=copy)\n if sparse.issparse(X):\n if threshold < 0:\n raise ValueError('Cannot binarize a sparse matrix with threshold '\n '< 0')\n cond = X.data > threshold\n not_cond = np.logical_not(cond)\n X.data[cond] = 1\n X.data[not_cond] = 0\n X.eliminate_zeros()\n else:\n cond = X > threshold\n not_cond = np.logical_not(cond)\n X[cond] = 1\n X[not_cond] = 0\n return X" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/maxabs_scale", + "name": "maxabs_scale", + "qname": "sklearn.preprocessing._data.maxabs_scale", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/maxabs_scale/X", + "name": "X", + "qname": "sklearn.preprocessing._data.maxabs_scale.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/maxabs_scale/axis", + "name": "axis", + "qname": "sklearn.preprocessing._data.maxabs_scale.axis", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "axis used to scale along. If 0, independently scale each feature,\notherwise (if 1) scale each sample." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/maxabs_scale/copy", + "name": "copy", + "qname": "sklearn.preprocessing._data.maxabs_scale.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Set to False to perform inplace scaling and avoid a copy (if the input\nis already a numpy array)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Scale each feature to the [-1, 1] range without breaking the sparsity.\n\nThis estimator scales each feature individually such\nthat the maximal absolute value of each feature in the\ntraining set will be 1.0.\n\nThis scaler can also be applied to sparse CSR or CSC matrices.", + "docstring": "Scale each feature to the [-1, 1] range without breaking the sparsity.\n\nThis estimator scales each feature individually such\nthat the maximal absolute value of each feature in the\ntraining set will be 1.0.\n\nThis scaler can also be applied to sparse CSR or CSC matrices.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data.\n\naxis : int, default=0\n axis used to scale along. If 0, independently scale each feature,\n otherwise (if 1) scale each sample.\n\ncopy : bool, default=True\n Set to False to perform inplace scaling and avoid a copy (if the input\n is already a numpy array).\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The transformed data.\n\n.. warning:: Risk of data leak\n\n Do not use :func:`~sklearn.preprocessing.maxabs_scale` unless you know what\n you are doing. A common mistake is to apply it to the entire data\n *before* splitting into training and test sets. This will bias the\n model evaluation because information would have leaked from the test\n set to the training set.\n In general, we recommend using\n :class:`~sklearn.preprocessing.MaxAbsScaler` within a\n :ref:`Pipeline ` in order to prevent most risks of data\n leaking: `pipe = make_pipeline(MaxAbsScaler(), LogisticRegression())`.\n\nSee Also\n--------\nMaxAbsScaler : Performs scaling to the [-1, 1] range using\n the Transformer API (e.g. as part of a preprocessing\n :class:`~sklearn.pipeline.Pipeline`).\n\nNotes\n-----\nNaNs are treated as missing values: disregarded to compute the statistics,\nand maintained during the data transformation.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.", + "code": "@_deprecate_positional_args\ndef maxabs_scale(X, *, axis=0, copy=True):\n \"\"\"Scale each feature to the [-1, 1] range without breaking the sparsity.\n\n This estimator scales each feature individually such\n that the maximal absolute value of each feature in the\n training set will be 1.0.\n\n This scaler can also be applied to sparse CSR or CSC matrices.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data.\n\n axis : int, default=0\n axis used to scale along. If 0, independently scale each feature,\n otherwise (if 1) scale each sample.\n\n copy : bool, default=True\n Set to False to perform inplace scaling and avoid a copy (if the input\n is already a numpy array).\n\n Returns\n -------\n X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The transformed data.\n\n .. warning:: Risk of data leak\n\n Do not use :func:`~sklearn.preprocessing.maxabs_scale` unless you know what\n you are doing. A common mistake is to apply it to the entire data\n *before* splitting into training and test sets. This will bias the\n model evaluation because information would have leaked from the test\n set to the training set.\n In general, we recommend using\n :class:`~sklearn.preprocessing.MaxAbsScaler` within a\n :ref:`Pipeline ` in order to prevent most risks of data\n leaking: `pipe = make_pipeline(MaxAbsScaler(), LogisticRegression())`.\n\n See Also\n --------\n MaxAbsScaler : Performs scaling to the [-1, 1] range using\n the Transformer API (e.g. as part of a preprocessing\n :class:`~sklearn.pipeline.Pipeline`).\n\n Notes\n -----\n NaNs are treated as missing values: disregarded to compute the statistics,\n and maintained during the data transformation.\n\n For a comparison of the different scalers, transformers, and normalizers,\n see :ref:`examples/preprocessing/plot_all_scaling.py\n `.\n \"\"\" # noqa\n # Unlike the scaler object, this function allows 1d input.\n\n # If copy is required, it will be done inside the scaler object.\n X = check_array(X, accept_sparse=('csr', 'csc'), copy=False,\n ensure_2d=False, dtype=FLOAT_DTYPES,\n force_all_finite='allow-nan')\n original_ndim = X.ndim\n\n if original_ndim == 1:\n X = X.reshape(X.shape[0], 1)\n\n s = MaxAbsScaler(copy=copy)\n if axis == 0:\n X = s.fit_transform(X)\n else:\n X = s.fit_transform(X.T).T\n\n if original_ndim == 1:\n X = X.ravel()\n\n return X" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/minmax_scale", + "name": "minmax_scale", + "qname": "sklearn.preprocessing._data.minmax_scale", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/minmax_scale/X", + "name": "X", + "qname": "sklearn.preprocessing._data.minmax_scale.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/minmax_scale/feature_range", + "name": "feature_range", + "qname": "sklearn.preprocessing._data.minmax_scale.feature_range", + "default_value": "(0, 1)", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "tuple (min, max)", + "default_value": "(0, 1)", + "description": "Desired range of transformed data." + }, + "type": { + "kind": "NamedType", + "name": "tuple (min, max)" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/minmax_scale/axis", + "name": "axis", + "qname": "sklearn.preprocessing._data.minmax_scale.axis", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Axis used to scale along. If 0, independently scale each feature,\notherwise (if 1) scale each sample." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/minmax_scale/copy", + "name": "copy", + "qname": "sklearn.preprocessing._data.minmax_scale.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Set to False to perform inplace scaling and avoid a copy (if the input\nis already a numpy array)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform features by scaling each feature to a given range.\n\nThis estimator scales and translates each feature individually such\nthat it is in the given range on the training set, i.e. between\nzero and one.\n\nThe transformation is given by (when ``axis=0``)::\n\n X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))\n X_scaled = X_std * (max - min) + min\n\nwhere min, max = feature_range.\n\nThe transformation is calculated as (when ``axis=0``)::\n\n X_scaled = scale * X + min - X.min(axis=0) * scale\n where scale = (max - min) / (X.max(axis=0) - X.min(axis=0))\n\nThis transformation is often used as an alternative to zero mean,\nunit variance scaling.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.17\n *minmax_scale* function interface\n to :class:`~sklearn.preprocessing.MinMaxScaler`.", + "docstring": "Transform features by scaling each feature to a given range.\n\nThis estimator scales and translates each feature individually such\nthat it is in the given range on the training set, i.e. between\nzero and one.\n\nThe transformation is given by (when ``axis=0``)::\n\n X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))\n X_scaled = X_std * (max - min) + min\n\nwhere min, max = feature_range.\n\nThe transformation is calculated as (when ``axis=0``)::\n\n X_scaled = scale * X + min - X.min(axis=0) * scale\n where scale = (max - min) / (X.max(axis=0) - X.min(axis=0))\n\nThis transformation is often used as an alternative to zero mean,\nunit variance scaling.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.17\n *minmax_scale* function interface\n to :class:`~sklearn.preprocessing.MinMaxScaler`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data.\n\nfeature_range : tuple (min, max), default=(0, 1)\n Desired range of transformed data.\n\naxis : int, default=0\n Axis used to scale along. If 0, independently scale each feature,\n otherwise (if 1) scale each sample.\n\ncopy : bool, default=True\n Set to False to perform inplace scaling and avoid a copy (if the input\n is already a numpy array).\n\nReturns\n-------\nX_tr : ndarray of shape (n_samples, n_features)\n The transformed data.\n\n.. warning:: Risk of data leak\n\n Do not use :func:`~sklearn.preprocessing.minmax_scale` unless you know\n what you are doing. A common mistake is to apply it to the entire data\n *before* splitting into training and test sets. This will bias the\n model evaluation because information would have leaked from the test\n set to the training set.\n In general, we recommend using\n :class:`~sklearn.preprocessing.MinMaxScaler` within a\n :ref:`Pipeline ` in order to prevent most risks of data\n leaking: `pipe = make_pipeline(MinMaxScaler(), LogisticRegression())`.\n\nSee Also\n--------\nMinMaxScaler : Performs scaling to a given range using the Transformer\n API (e.g. as part of a preprocessing\n :class:`~sklearn.pipeline.Pipeline`).\n\nNotes\n-----\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.", + "code": "@_deprecate_positional_args\ndef minmax_scale(X, feature_range=(0, 1), *, axis=0, copy=True):\n \"\"\"Transform features by scaling each feature to a given range.\n\n This estimator scales and translates each feature individually such\n that it is in the given range on the training set, i.e. between\n zero and one.\n\n The transformation is given by (when ``axis=0``)::\n\n X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))\n X_scaled = X_std * (max - min) + min\n\n where min, max = feature_range.\n\n The transformation is calculated as (when ``axis=0``)::\n\n X_scaled = scale * X + min - X.min(axis=0) * scale\n where scale = (max - min) / (X.max(axis=0) - X.min(axis=0))\n\n This transformation is often used as an alternative to zero mean,\n unit variance scaling.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.17\n *minmax_scale* function interface\n to :class:`~sklearn.preprocessing.MinMaxScaler`.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data.\n\n feature_range : tuple (min, max), default=(0, 1)\n Desired range of transformed data.\n\n axis : int, default=0\n Axis used to scale along. If 0, independently scale each feature,\n otherwise (if 1) scale each sample.\n\n copy : bool, default=True\n Set to False to perform inplace scaling and avoid a copy (if the input\n is already a numpy array).\n\n Returns\n -------\n X_tr : ndarray of shape (n_samples, n_features)\n The transformed data.\n\n .. warning:: Risk of data leak\n\n Do not use :func:`~sklearn.preprocessing.minmax_scale` unless you know\n what you are doing. A common mistake is to apply it to the entire data\n *before* splitting into training and test sets. This will bias the\n model evaluation because information would have leaked from the test\n set to the training set.\n In general, we recommend using\n :class:`~sklearn.preprocessing.MinMaxScaler` within a\n :ref:`Pipeline ` in order to prevent most risks of data\n leaking: `pipe = make_pipeline(MinMaxScaler(), LogisticRegression())`.\n\n See Also\n --------\n MinMaxScaler : Performs scaling to a given range using the Transformer\n API (e.g. as part of a preprocessing\n :class:`~sklearn.pipeline.Pipeline`).\n\n Notes\n -----\n For a comparison of the different scalers, transformers, and normalizers,\n see :ref:`examples/preprocessing/plot_all_scaling.py\n `.\n \"\"\" # noqa\n # Unlike the scaler object, this function allows 1d input.\n # If copy is required, it will be done inside the scaler object.\n X = check_array(X, copy=False, ensure_2d=False,\n dtype=FLOAT_DTYPES, force_all_finite='allow-nan')\n original_ndim = X.ndim\n\n if original_ndim == 1:\n X = X.reshape(X.shape[0], 1)\n\n s = MinMaxScaler(feature_range=feature_range, copy=copy)\n if axis == 0:\n X = s.fit_transform(X)\n else:\n X = s.fit_transform(X.T).T\n\n if original_ndim == 1:\n X = X.ravel()\n\n return X" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/normalize", + "name": "normalize", + "qname": "sklearn.preprocessing._data.normalize", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/normalize/X", + "name": "X", + "qname": "sklearn.preprocessing._data.normalize.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to normalize, element by element.\nscipy.sparse matrices should be in CSR format to avoid an\nun-necessary copy." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/normalize/norm", + "name": "norm", + "qname": "sklearn.preprocessing._data.normalize.norm", + "default_value": "'l2'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'l1', 'l2', 'max'}", + "default_value": "'l2'", + "description": "The norm to use to normalize each non zero sample (or each non-zero\nfeature if axis is 0)." + }, + "type": { + "kind": "EnumType", + "values": ["l2", "l1", "max"] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/normalize/axis", + "name": "axis", + "qname": "sklearn.preprocessing._data.normalize.axis", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{0, 1}", + "default_value": "1", + "description": "axis used to normalize the data along. If 1, independently normalize\neach sample, otherwise (if 0) normalize each feature." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/normalize/copy", + "name": "copy", + "qname": "sklearn.preprocessing._data.normalize.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "set to False to perform inplace row normalization and avoid a\ncopy (if the input is already a numpy array or a scipy.sparse\nCSR matrix and if axis is 1)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/normalize/return_norm", + "name": "return_norm", + "qname": "sklearn.preprocessing._data.normalize.return_norm", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "whether to return the computed norms" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Scale input vectors individually to unit norm (vector length).\n\nRead more in the :ref:`User Guide `.", + "docstring": "Scale input vectors individually to unit norm (vector length).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to normalize, element by element.\n scipy.sparse matrices should be in CSR format to avoid an\n un-necessary copy.\n\nnorm : {'l1', 'l2', 'max'}, default='l2'\n The norm to use to normalize each non zero sample (or each non-zero\n feature if axis is 0).\n\naxis : {0, 1}, default=1\n axis used to normalize the data along. If 1, independently normalize\n each sample, otherwise (if 0) normalize each feature.\n\ncopy : bool, default=True\n set to False to perform inplace row normalization and avoid a\n copy (if the input is already a numpy array or a scipy.sparse\n CSR matrix and if axis is 1).\n\nreturn_norm : bool, default=False\n whether to return the computed norms\n\nReturns\n-------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Normalized input X.\n\nnorms : ndarray of shape (n_samples, ) if axis=1 else (n_features, )\n An array of norms along given axis for X.\n When X is sparse, a NotImplementedError will be raised\n for norm 'l1' or 'l2'.\n\nSee Also\n--------\nNormalizer : Performs normalization using the Transformer API\n (e.g. as part of a preprocessing :class:`~sklearn.pipeline.Pipeline`).\n\nNotes\n-----\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.", + "code": "@_deprecate_positional_args\ndef normalize(X, norm='l2', *, axis=1, copy=True, return_norm=False):\n \"\"\"Scale input vectors individually to unit norm (vector length).\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to normalize, element by element.\n scipy.sparse matrices should be in CSR format to avoid an\n un-necessary copy.\n\n norm : {'l1', 'l2', 'max'}, default='l2'\n The norm to use to normalize each non zero sample (or each non-zero\n feature if axis is 0).\n\n axis : {0, 1}, default=1\n axis used to normalize the data along. If 1, independently normalize\n each sample, otherwise (if 0) normalize each feature.\n\n copy : bool, default=True\n set to False to perform inplace row normalization and avoid a\n copy (if the input is already a numpy array or a scipy.sparse\n CSR matrix and if axis is 1).\n\n return_norm : bool, default=False\n whether to return the computed norms\n\n Returns\n -------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Normalized input X.\n\n norms : ndarray of shape (n_samples, ) if axis=1 else (n_features, )\n An array of norms along given axis for X.\n When X is sparse, a NotImplementedError will be raised\n for norm 'l1' or 'l2'.\n\n See Also\n --------\n Normalizer : Performs normalization using the Transformer API\n (e.g. as part of a preprocessing :class:`~sklearn.pipeline.Pipeline`).\n\n Notes\n -----\n For a comparison of the different scalers, transformers, and normalizers,\n see :ref:`examples/preprocessing/plot_all_scaling.py\n `.\n\n \"\"\"\n if norm not in ('l1', 'l2', 'max'):\n raise ValueError(\"'%s' is not a supported norm\" % norm)\n\n if axis == 0:\n sparse_format = 'csc'\n elif axis == 1:\n sparse_format = 'csr'\n else:\n raise ValueError(\"'%d' is not a supported axis\" % axis)\n\n X = check_array(X, accept_sparse=sparse_format, copy=copy,\n estimator='the normalize function', dtype=FLOAT_DTYPES)\n if axis == 0:\n X = X.T\n\n if sparse.issparse(X):\n if return_norm and norm in ('l1', 'l2'):\n raise NotImplementedError(\"return_norm=True is not implemented \"\n \"for sparse matrices with norm 'l1' \"\n \"or norm 'l2'\")\n if norm == 'l1':\n inplace_csr_row_normalize_l1(X)\n elif norm == 'l2':\n inplace_csr_row_normalize_l2(X)\n elif norm == 'max':\n mins, maxes = min_max_axis(X, 1)\n norms = np.maximum(abs(mins), maxes)\n norms_elementwise = norms.repeat(np.diff(X.indptr))\n mask = norms_elementwise != 0\n X.data[mask] /= norms_elementwise[mask]\n else:\n if norm == 'l1':\n norms = np.abs(X).sum(axis=1)\n elif norm == 'l2':\n norms = row_norms(X)\n elif norm == 'max':\n norms = np.max(abs(X), axis=1)\n norms = _handle_zeros_in_scale(norms, copy=False)\n X /= norms[:, np.newaxis]\n\n if axis == 0:\n X = X.T\n\n if return_norm:\n return X, norms\n else:\n return X" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/power_transform", + "name": "power_transform", + "qname": "sklearn.preprocessing._data.power_transform", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/power_transform/X", + "name": "X", + "qname": "sklearn.preprocessing._data.power_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to be transformed using a power transformation." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/power_transform/method", + "name": "method", + "qname": "sklearn.preprocessing._data.power_transform.method", + "default_value": "'yeo-johnson'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'yeo-johnson', 'box-cox'}", + "default_value": "'yeo-johnson'", + "description": "The power transform method. Available methods are:\n\n- 'yeo-johnson' [1]_, works with positive and negative values\n- 'box-cox' [2]_, only works with strictly positive values\n\n.. versionchanged:: 0.23\n The default value of the `method` parameter changed from\n 'box-cox' to 'yeo-johnson' in 0.23." + }, + "type": { + "kind": "EnumType", + "values": ["box-cox", "yeo-johnson"] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/power_transform/standardize", + "name": "standardize", + "qname": "sklearn.preprocessing._data.power_transform.standardize", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Set to True to apply zero-mean, unit-variance normalization to the\ntransformed output." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/power_transform/copy", + "name": "copy", + "qname": "sklearn.preprocessing._data.power_transform.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Set to False to perform inplace computation during transformation." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Power transforms are a family of parametric, monotonic transformations\nthat are applied to make data more Gaussian-like. This is useful for\nmodeling issues related to heteroscedasticity (non-constant variance),\nor other situations where normality is desired.\n\nCurrently, power_transform supports the Box-Cox transform and the\nYeo-Johnson transform. The optimal parameter for stabilizing variance and\nminimizing skewness is estimated through maximum likelihood.\n\nBox-Cox requires input data to be strictly positive, while Yeo-Johnson\nsupports both positive or negative data.\n\nBy default, zero-mean, unit-variance normalization is applied to the\ntransformed data.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Power transforms are a family of parametric, monotonic transformations\nthat are applied to make data more Gaussian-like. This is useful for\nmodeling issues related to heteroscedasticity (non-constant variance),\nor other situations where normality is desired.\n\nCurrently, power_transform supports the Box-Cox transform and the\nYeo-Johnson transform. The optimal parameter for stabilizing variance and\nminimizing skewness is estimated through maximum likelihood.\n\nBox-Cox requires input data to be strictly positive, while Yeo-Johnson\nsupports both positive or negative data.\n\nBy default, zero-mean, unit-variance normalization is applied to the\ntransformed data.\n\nRead more in the :ref:`User Guide `.\n\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data to be transformed using a power transformation.\n\nmethod : {'yeo-johnson', 'box-cox'}, default='yeo-johnson'\n The power transform method. Available methods are:\n\n - 'yeo-johnson' [1]_, works with positive and negative values\n - 'box-cox' [2]_, only works with strictly positive values\n\n .. versionchanged:: 0.23\n The default value of the `method` parameter changed from\n 'box-cox' to 'yeo-johnson' in 0.23.\n\nstandardize : bool, default=True\n Set to True to apply zero-mean, unit-variance normalization to the\n transformed output.\n\ncopy : bool, default=True\n Set to False to perform inplace computation during transformation.\n\nReturns\n-------\nX_trans : ndarray of shape (n_samples, n_features)\n The transformed data.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import power_transform\n>>> data = [[1, 2], [3, 2], [4, 5]]\n>>> print(power_transform(data, method='box-cox'))\n[[-1.332... -0.707...]\n [ 0.256... -0.707...]\n [ 1.076... 1.414...]]\n\n.. warning:: Risk of data leak.\n Do not use :func:`~sklearn.preprocessing.power_transform` unless you\n know what you are doing. A common mistake is to apply it to the entire\n data *before* splitting into training and test sets. This will bias the\n model evaluation because information would have leaked from the test\n set to the training set.\n In general, we recommend using\n :class:`~sklearn.preprocessing.PowerTransformer` within a\n :ref:`Pipeline ` in order to prevent most risks of data\n leaking, e.g.: `pipe = make_pipeline(PowerTransformer(),\n LogisticRegression())`.\n\nSee Also\n--------\nPowerTransformer : Equivalent transformation with the\n Transformer API (e.g. as part of a preprocessing\n :class:`~sklearn.pipeline.Pipeline`).\n\nquantile_transform : Maps data to a standard normal distribution with\n the parameter `output_distribution='normal'`.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in ``fit``, and maintained\nin ``transform``.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.\n\nReferences\n----------\n\n.. [1] I.K. Yeo and R.A. Johnson, \"A new family of power transformations to\n improve normality or symmetry.\" Biometrika, 87(4), pp.954-959,\n (2000).\n\n.. [2] G.E.P. Box and D.R. Cox, \"An Analysis of Transformations\", Journal\n of the Royal Statistical Society B, 26, 211-252 (1964).", + "code": "@_deprecate_positional_args\ndef power_transform(X, method='yeo-johnson', *, standardize=True, copy=True):\n \"\"\"\n Power transforms are a family of parametric, monotonic transformations\n that are applied to make data more Gaussian-like. This is useful for\n modeling issues related to heteroscedasticity (non-constant variance),\n or other situations where normality is desired.\n\n Currently, power_transform supports the Box-Cox transform and the\n Yeo-Johnson transform. The optimal parameter for stabilizing variance and\n minimizing skewness is estimated through maximum likelihood.\n\n Box-Cox requires input data to be strictly positive, while Yeo-Johnson\n supports both positive or negative data.\n\n By default, zero-mean, unit-variance normalization is applied to the\n transformed data.\n\n Read more in the :ref:`User Guide `.\n\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data to be transformed using a power transformation.\n\n method : {'yeo-johnson', 'box-cox'}, default='yeo-johnson'\n The power transform method. Available methods are:\n\n - 'yeo-johnson' [1]_, works with positive and negative values\n - 'box-cox' [2]_, only works with strictly positive values\n\n .. versionchanged:: 0.23\n The default value of the `method` parameter changed from\n 'box-cox' to 'yeo-johnson' in 0.23.\n\n standardize : bool, default=True\n Set to True to apply zero-mean, unit-variance normalization to the\n transformed output.\n\n copy : bool, default=True\n Set to False to perform inplace computation during transformation.\n\n Returns\n -------\n X_trans : ndarray of shape (n_samples, n_features)\n The transformed data.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.preprocessing import power_transform\n >>> data = [[1, 2], [3, 2], [4, 5]]\n >>> print(power_transform(data, method='box-cox'))\n [[-1.332... -0.707...]\n [ 0.256... -0.707...]\n [ 1.076... 1.414...]]\n\n .. warning:: Risk of data leak.\n Do not use :func:`~sklearn.preprocessing.power_transform` unless you\n know what you are doing. A common mistake is to apply it to the entire\n data *before* splitting into training and test sets. This will bias the\n model evaluation because information would have leaked from the test\n set to the training set.\n In general, we recommend using\n :class:`~sklearn.preprocessing.PowerTransformer` within a\n :ref:`Pipeline ` in order to prevent most risks of data\n leaking, e.g.: `pipe = make_pipeline(PowerTransformer(),\n LogisticRegression())`.\n\n See Also\n --------\n PowerTransformer : Equivalent transformation with the\n Transformer API (e.g. as part of a preprocessing\n :class:`~sklearn.pipeline.Pipeline`).\n\n quantile_transform : Maps data to a standard normal distribution with\n the parameter `output_distribution='normal'`.\n\n Notes\n -----\n NaNs are treated as missing values: disregarded in ``fit``, and maintained\n in ``transform``.\n\n For a comparison of the different scalers, transformers, and normalizers,\n see :ref:`examples/preprocessing/plot_all_scaling.py\n `.\n\n References\n ----------\n\n .. [1] I.K. Yeo and R.A. Johnson, \"A new family of power transformations to\n improve normality or symmetry.\" Biometrika, 87(4), pp.954-959,\n (2000).\n\n .. [2] G.E.P. Box and D.R. Cox, \"An Analysis of Transformations\", Journal\n of the Royal Statistical Society B, 26, 211-252 (1964).\n \"\"\"\n pt = PowerTransformer(method=method, standardize=standardize, copy=copy)\n return pt.fit_transform(X)" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/quantile_transform", + "name": "quantile_transform", + "qname": "sklearn.preprocessing._data.quantile_transform", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/quantile_transform/X", + "name": "X", + "qname": "sklearn.preprocessing._data.quantile_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to transform." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/quantile_transform/axis", + "name": "axis", + "qname": "sklearn.preprocessing._data.quantile_transform.axis", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Axis used to compute the means and standard deviations along. If 0,\ntransform each feature, otherwise (if 1) transform each sample." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/quantile_transform/n_quantiles", + "name": "n_quantiles", + "qname": "sklearn.preprocessing._data.quantile_transform.n_quantiles", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000 or n_samples", + "description": "Number of quantiles to be computed. It corresponds to the number\nof landmarks used to discretize the cumulative distribution function.\nIf n_quantiles is larger than the number of samples, n_quantiles is set\nto the number of samples as a larger number of quantiles does not give\na better approximation of the cumulative distribution function\nestimator." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/quantile_transform/output_distribution", + "name": "output_distribution", + "qname": "sklearn.preprocessing._data.quantile_transform.output_distribution", + "default_value": "'uniform'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'uniform', 'normal'}", + "default_value": "'uniform'", + "description": "Marginal distribution for the transformed data. The choices are\n'uniform' (default) or 'normal'." + }, + "type": { + "kind": "EnumType", + "values": ["uniform", "normal"] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/quantile_transform/ignore_implicit_zeros", + "name": "ignore_implicit_zeros", + "qname": "sklearn.preprocessing._data.quantile_transform.ignore_implicit_zeros", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Only applies to sparse matrices. If True, the sparse entries of the\nmatrix are discarded to compute the quantile statistics. If False,\nthese entries are treated as zeros." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/quantile_transform/subsample", + "name": "subsample", + "qname": "sklearn.preprocessing._data.quantile_transform.subsample", + "default_value": "int(100000.0)", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1e5", + "description": "Maximum number of samples used to estimate the quantiles for\ncomputational efficiency. Note that the subsampling procedure may\ndiffer for value-identical sparse and dense matrices." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/quantile_transform/random_state", + "name": "random_state", + "qname": "sklearn.preprocessing._data.quantile_transform.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for subsampling and smoothing\nnoise.\nPlease see ``subsample`` for more details.\nPass an int for reproducible results across multiple function calls.\nSee :term:`Glossary `" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/quantile_transform/copy", + "name": "copy", + "qname": "sklearn.preprocessing._data.quantile_transform.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Set to False to perform inplace transformation and avoid a copy (if the\ninput is already a numpy array). If True, a copy of `X` is transformed,\nleaving the original `X` unchanged\n\n..versionchanged:: 0.23\n The default value of `copy` changed from False to True in 0.23." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform features using quantiles information.\n\nThis method transforms the features to follow a uniform or a normal\ndistribution. Therefore, for a given feature, this transformation tends\nto spread out the most frequent values. It also reduces the impact of\n(marginal) outliers: this is therefore a robust preprocessing scheme.\n\nThe transformation is applied on each feature independently. First an\nestimate of the cumulative distribution function of a feature is\nused to map the original values to a uniform distribution. The obtained\nvalues are then mapped to the desired output distribution using the\nassociated quantile function. Features values of new/unseen data that fall\nbelow or above the fitted range will be mapped to the bounds of the output\ndistribution. Note that this transform is non-linear. It may distort linear\ncorrelations between variables measured at the same scale but renders\nvariables measured at different scales more directly comparable.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Transform features using quantiles information.\n\nThis method transforms the features to follow a uniform or a normal\ndistribution. Therefore, for a given feature, this transformation tends\nto spread out the most frequent values. It also reduces the impact of\n(marginal) outliers: this is therefore a robust preprocessing scheme.\n\nThe transformation is applied on each feature independently. First an\nestimate of the cumulative distribution function of a feature is\nused to map the original values to a uniform distribution. The obtained\nvalues are then mapped to the desired output distribution using the\nassociated quantile function. Features values of new/unseen data that fall\nbelow or above the fitted range will be mapped to the bounds of the output\ndistribution. Note that this transform is non-linear. It may distort linear\ncorrelations between variables measured at the same scale but renders\nvariables measured at different scales more directly comparable.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to transform.\n\naxis : int, default=0\n Axis used to compute the means and standard deviations along. If 0,\n transform each feature, otherwise (if 1) transform each sample.\n\nn_quantiles : int, default=1000 or n_samples\n Number of quantiles to be computed. It corresponds to the number\n of landmarks used to discretize the cumulative distribution function.\n If n_quantiles is larger than the number of samples, n_quantiles is set\n to the number of samples as a larger number of quantiles does not give\n a better approximation of the cumulative distribution function\n estimator.\n\noutput_distribution : {'uniform', 'normal'}, default='uniform'\n Marginal distribution for the transformed data. The choices are\n 'uniform' (default) or 'normal'.\n\nignore_implicit_zeros : bool, default=False\n Only applies to sparse matrices. If True, the sparse entries of the\n matrix are discarded to compute the quantile statistics. If False,\n these entries are treated as zeros.\n\nsubsample : int, default=1e5\n Maximum number of samples used to estimate the quantiles for\n computational efficiency. Note that the subsampling procedure may\n differ for value-identical sparse and dense matrices.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for subsampling and smoothing\n noise.\n Please see ``subsample`` for more details.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `\n\ncopy : bool, default=True\n Set to False to perform inplace transformation and avoid a copy (if the\n input is already a numpy array). If True, a copy of `X` is transformed,\n leaving the original `X` unchanged\n\n ..versionchanged:: 0.23\n The default value of `copy` changed from False to True in 0.23.\n\nReturns\n-------\nXt : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The transformed data.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.preprocessing import quantile_transform\n>>> rng = np.random.RandomState(0)\n>>> X = np.sort(rng.normal(loc=0.5, scale=0.25, size=(25, 1)), axis=0)\n>>> quantile_transform(X, n_quantiles=10, random_state=0, copy=True)\narray([...])\n\nSee Also\n--------\nQuantileTransformer : Performs quantile-based scaling using the\n Transformer API (e.g. as part of a preprocessing\n :class:`~sklearn.pipeline.Pipeline`).\npower_transform : Maps data to a normal distribution using a\n power transformation.\nscale : Performs standardization that is faster, but less robust\n to outliers.\nrobust_scale : Performs robust standardization that removes the influence\n of outliers but does not put outliers and inliers on the same scale.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in fit, and maintained in\ntransform.\n\n.. warning:: Risk of data leak\n\n Do not use :func:`~sklearn.preprocessing.quantile_transform` unless\n you know what you are doing. A common mistake is to apply it\n to the entire data *before* splitting into training and\n test sets. This will bias the model evaluation because\n information would have leaked from the test set to the\n training set.\n In general, we recommend using\n :class:`~sklearn.preprocessing.QuantileTransformer` within a\n :ref:`Pipeline ` in order to prevent most risks of data\n leaking:`pipe = make_pipeline(QuantileTransformer(),\n LogisticRegression())`.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.", + "code": "@_deprecate_positional_args\ndef quantile_transform(X, *, axis=0, n_quantiles=1000,\n output_distribution='uniform',\n ignore_implicit_zeros=False,\n subsample=int(1e5),\n random_state=None,\n copy=True):\n \"\"\"Transform features using quantiles information.\n\n This method transforms the features to follow a uniform or a normal\n distribution. Therefore, for a given feature, this transformation tends\n to spread out the most frequent values. It also reduces the impact of\n (marginal) outliers: this is therefore a robust preprocessing scheme.\n\n The transformation is applied on each feature independently. First an\n estimate of the cumulative distribution function of a feature is\n used to map the original values to a uniform distribution. The obtained\n values are then mapped to the desired output distribution using the\n associated quantile function. Features values of new/unseen data that fall\n below or above the fitted range will be mapped to the bounds of the output\n distribution. Note that this transform is non-linear. It may distort linear\n correlations between variables measured at the same scale but renders\n variables measured at different scales more directly comparable.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to transform.\n\n axis : int, default=0\n Axis used to compute the means and standard deviations along. If 0,\n transform each feature, otherwise (if 1) transform each sample.\n\n n_quantiles : int, default=1000 or n_samples\n Number of quantiles to be computed. It corresponds to the number\n of landmarks used to discretize the cumulative distribution function.\n If n_quantiles is larger than the number of samples, n_quantiles is set\n to the number of samples as a larger number of quantiles does not give\n a better approximation of the cumulative distribution function\n estimator.\n\n output_distribution : {'uniform', 'normal'}, default='uniform'\n Marginal distribution for the transformed data. The choices are\n 'uniform' (default) or 'normal'.\n\n ignore_implicit_zeros : bool, default=False\n Only applies to sparse matrices. If True, the sparse entries of the\n matrix are discarded to compute the quantile statistics. If False,\n these entries are treated as zeros.\n\n subsample : int, default=1e5\n Maximum number of samples used to estimate the quantiles for\n computational efficiency. Note that the subsampling procedure may\n differ for value-identical sparse and dense matrices.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for subsampling and smoothing\n noise.\n Please see ``subsample`` for more details.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `\n\n copy : bool, default=True\n Set to False to perform inplace transformation and avoid a copy (if the\n input is already a numpy array). If True, a copy of `X` is transformed,\n leaving the original `X` unchanged\n\n ..versionchanged:: 0.23\n The default value of `copy` changed from False to True in 0.23.\n\n Returns\n -------\n Xt : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The transformed data.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.preprocessing import quantile_transform\n >>> rng = np.random.RandomState(0)\n >>> X = np.sort(rng.normal(loc=0.5, scale=0.25, size=(25, 1)), axis=0)\n >>> quantile_transform(X, n_quantiles=10, random_state=0, copy=True)\n array([...])\n\n See Also\n --------\n QuantileTransformer : Performs quantile-based scaling using the\n Transformer API (e.g. as part of a preprocessing\n :class:`~sklearn.pipeline.Pipeline`).\n power_transform : Maps data to a normal distribution using a\n power transformation.\n scale : Performs standardization that is faster, but less robust\n to outliers.\n robust_scale : Performs robust standardization that removes the influence\n of outliers but does not put outliers and inliers on the same scale.\n\n Notes\n -----\n NaNs are treated as missing values: disregarded in fit, and maintained in\n transform.\n\n .. warning:: Risk of data leak\n\n Do not use :func:`~sklearn.preprocessing.quantile_transform` unless\n you know what you are doing. A common mistake is to apply it\n to the entire data *before* splitting into training and\n test sets. This will bias the model evaluation because\n information would have leaked from the test set to the\n training set.\n In general, we recommend using\n :class:`~sklearn.preprocessing.QuantileTransformer` within a\n :ref:`Pipeline ` in order to prevent most risks of data\n leaking:`pipe = make_pipeline(QuantileTransformer(),\n LogisticRegression())`.\n\n For a comparison of the different scalers, transformers, and normalizers,\n see :ref:`examples/preprocessing/plot_all_scaling.py\n `.\n \"\"\"\n n = QuantileTransformer(n_quantiles=n_quantiles,\n output_distribution=output_distribution,\n subsample=subsample,\n ignore_implicit_zeros=ignore_implicit_zeros,\n random_state=random_state,\n copy=copy)\n if axis == 0:\n return n.fit_transform(X)\n elif axis == 1:\n return n.fit_transform(X.T).T\n else:\n raise ValueError(\"axis should be either equal to 0 or 1. Got\"\n \" axis={}\".format(axis))" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/robust_scale", + "name": "robust_scale", + "qname": "sklearn.preprocessing._data.robust_scale", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/robust_scale/X", + "name": "X", + "qname": "sklearn.preprocessing._data.robust_scale.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_sample, n_features)", + "default_value": "", + "description": "The data to center and scale." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_sample, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/robust_scale/axis", + "name": "axis", + "qname": "sklearn.preprocessing._data.robust_scale.axis", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "axis used to compute the medians and IQR along. If 0,\nindependently scale each feature, otherwise (if 1) scale\neach sample." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/robust_scale/with_centering", + "name": "with_centering", + "qname": "sklearn.preprocessing._data.robust_scale.with_centering", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, center the data before scaling." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/robust_scale/with_scaling", + "name": "with_scaling", + "qname": "sklearn.preprocessing._data.robust_scale.with_scaling", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, scale the data to unit variance (or equivalently,\nunit standard deviation)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/robust_scale/quantile_range", + "name": "quantile_range", + "qname": "sklearn.preprocessing._data.robust_scale.quantile_range", + "default_value": "(25.0, 75.0)", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "tuple (q_min, q_max), 0.0 < q_min < q_max < 100.0", + "default_value": "", + "description": "default=(25.0, 75.0), == (1st quantile, 3rd quantile), == IQR\nQuantile range used to calculate ``scale_``.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "tuple (q_min, q_max)" + }, + { + "kind": "NamedType", + "name": "0.0 < q_min < q_max < 100.0" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/robust_scale/copy", + "name": "copy", + "qname": "sklearn.preprocessing._data.robust_scale.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "set to False to perform inplace row normalization and avoid a\ncopy (if the input is already a numpy array or a scipy.sparse\nCSR matrix and if axis is 1)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/robust_scale/unit_variance", + "name": "unit_variance", + "qname": "sklearn.preprocessing._data.robust_scale.unit_variance", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, scale data so that normally distributed features have a\nvariance of 1. In general, if the difference between the x-values of\n``q_max`` and ``q_min`` for a standard normal distribution is greater\nthan 1, the dataset will be scaled down. If less than 1, the dataset\nwill be scaled up.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Standardize a dataset along any axis\n\nCenter to the median and component wise scale\naccording to the interquartile range.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Standardize a dataset along any axis\n\nCenter to the median and component wise scale\naccording to the interquartile range.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_sample, n_features)\n The data to center and scale.\n\naxis : int, default=0\n axis used to compute the medians and IQR along. If 0,\n independently scale each feature, otherwise (if 1) scale\n each sample.\n\nwith_centering : bool, default=True\n If True, center the data before scaling.\n\nwith_scaling : bool, default=True\n If True, scale the data to unit variance (or equivalently,\n unit standard deviation).\n\nquantile_range : tuple (q_min, q_max), 0.0 < q_min < q_max < 100.0\n default=(25.0, 75.0), == (1st quantile, 3rd quantile), == IQR\n Quantile range used to calculate ``scale_``.\n\n .. versionadded:: 0.18\n\ncopy : bool, default=True\n set to False to perform inplace row normalization and avoid a\n copy (if the input is already a numpy array or a scipy.sparse\n CSR matrix and if axis is 1).\n\nunit_variance : bool, default=False\n If True, scale data so that normally distributed features have a\n variance of 1. In general, if the difference between the x-values of\n ``q_max`` and ``q_min`` for a standard normal distribution is greater\n than 1, the dataset will be scaled down. If less than 1, the dataset\n will be scaled up.\n\n .. versionadded:: 0.24\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The transformed data.\n\nNotes\n-----\nThis implementation will refuse to center scipy.sparse matrices\nsince it would make them non-sparse and would potentially crash the\nprogram with memory exhaustion problems.\n\nInstead the caller is expected to either set explicitly\n`with_centering=False` (in that case, only variance scaling will be\nperformed on the features of the CSR matrix) or to call `X.toarray()`\nif he/she expects the materialized dense array to fit in memory.\n\nTo avoid memory copy the caller should pass a CSR matrix.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.\n\n.. warning:: Risk of data leak\n\n Do not use :func:`~sklearn.preprocessing.robust_scale` unless you know\n what you are doing. A common mistake is to apply it to the entire data\n *before* splitting into training and test sets. This will bias the\n model evaluation because information would have leaked from the test\n set to the training set.\n In general, we recommend using\n :class:`~sklearn.preprocessing.RobustScaler` within a\n :ref:`Pipeline ` in order to prevent most risks of data\n leaking: `pipe = make_pipeline(RobustScaler(), LogisticRegression())`.\n\nSee Also\n--------\nRobustScaler : Performs centering and scaling using the Transformer API\n (e.g. as part of a preprocessing :class:`~sklearn.pipeline.Pipeline`).", + "code": "@_deprecate_positional_args\ndef robust_scale(X, *, axis=0, with_centering=True, with_scaling=True,\n quantile_range=(25.0, 75.0), copy=True, unit_variance=False):\n \"\"\"Standardize a dataset along any axis\n\n Center to the median and component wise scale\n according to the interquartile range.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_sample, n_features)\n The data to center and scale.\n\n axis : int, default=0\n axis used to compute the medians and IQR along. If 0,\n independently scale each feature, otherwise (if 1) scale\n each sample.\n\n with_centering : bool, default=True\n If True, center the data before scaling.\n\n with_scaling : bool, default=True\n If True, scale the data to unit variance (or equivalently,\n unit standard deviation).\n\n quantile_range : tuple (q_min, q_max), 0.0 < q_min < q_max < 100.0\n default=(25.0, 75.0), == (1st quantile, 3rd quantile), == IQR\n Quantile range used to calculate ``scale_``.\n\n .. versionadded:: 0.18\n\n copy : bool, default=True\n set to False to perform inplace row normalization and avoid a\n copy (if the input is already a numpy array or a scipy.sparse\n CSR matrix and if axis is 1).\n\n unit_variance : bool, default=False\n If True, scale data so that normally distributed features have a\n variance of 1. In general, if the difference between the x-values of\n ``q_max`` and ``q_min`` for a standard normal distribution is greater\n than 1, the dataset will be scaled down. If less than 1, the dataset\n will be scaled up.\n\n .. versionadded:: 0.24\n\n Returns\n -------\n X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The transformed data.\n\n Notes\n -----\n This implementation will refuse to center scipy.sparse matrices\n since it would make them non-sparse and would potentially crash the\n program with memory exhaustion problems.\n\n Instead the caller is expected to either set explicitly\n `with_centering=False` (in that case, only variance scaling will be\n performed on the features of the CSR matrix) or to call `X.toarray()`\n if he/she expects the materialized dense array to fit in memory.\n\n To avoid memory copy the caller should pass a CSR matrix.\n\n For a comparison of the different scalers, transformers, and normalizers,\n see :ref:`examples/preprocessing/plot_all_scaling.py\n `.\n\n .. warning:: Risk of data leak\n\n Do not use :func:`~sklearn.preprocessing.robust_scale` unless you know\n what you are doing. A common mistake is to apply it to the entire data\n *before* splitting into training and test sets. This will bias the\n model evaluation because information would have leaked from the test\n set to the training set.\n In general, we recommend using\n :class:`~sklearn.preprocessing.RobustScaler` within a\n :ref:`Pipeline ` in order to prevent most risks of data\n leaking: `pipe = make_pipeline(RobustScaler(), LogisticRegression())`.\n\n See Also\n --------\n RobustScaler : Performs centering and scaling using the Transformer API\n (e.g. as part of a preprocessing :class:`~sklearn.pipeline.Pipeline`).\n \"\"\"\n X = check_array(X, accept_sparse=('csr', 'csc'), copy=False,\n ensure_2d=False, dtype=FLOAT_DTYPES,\n force_all_finite='allow-nan')\n original_ndim = X.ndim\n\n if original_ndim == 1:\n X = X.reshape(X.shape[0], 1)\n\n s = RobustScaler(with_centering=with_centering, with_scaling=with_scaling,\n quantile_range=quantile_range,\n unit_variance=unit_variance, copy=copy)\n if axis == 0:\n X = s.fit_transform(X)\n else:\n X = s.fit_transform(X.T).T\n\n if original_ndim == 1:\n X = X.ravel()\n\n return X" + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/scale", + "name": "scale", + "qname": "sklearn.preprocessing._data.scale", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._data/scale/X", + "name": "X", + "qname": "sklearn.preprocessing._data.scale.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to center and scale." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/scale/axis", + "name": "axis", + "qname": "sklearn.preprocessing._data.scale.axis", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "axis used to compute the means and standard deviations along. If 0,\nindependently standardize each feature, otherwise (if 1) standardize\neach sample." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/scale/with_mean", + "name": "with_mean", + "qname": "sklearn.preprocessing._data.scale.with_mean", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, center the data before scaling." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/scale/with_std", + "name": "with_std", + "qname": "sklearn.preprocessing._data.scale.with_std", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, scale the data to unit variance (or equivalently,\nunit standard deviation)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._data/scale/copy", + "name": "copy", + "qname": "sklearn.preprocessing._data.scale.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "set to False to perform inplace row normalization and avoid a\ncopy (if the input is already a numpy array or a scipy.sparse\nCSC matrix and if axis is 1)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Standardize a dataset along any axis.\n\nCenter to the mean and component wise scale to unit variance.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Standardize a dataset along any axis.\n\nCenter to the mean and component wise scale to unit variance.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to center and scale.\n\naxis : int, default=0\n axis used to compute the means and standard deviations along. If 0,\n independently standardize each feature, otherwise (if 1) standardize\n each sample.\n\nwith_mean : bool, default=True\n If True, center the data before scaling.\n\nwith_std : bool, default=True\n If True, scale the data to unit variance (or equivalently,\n unit standard deviation).\n\ncopy : bool, default=True\n set to False to perform inplace row normalization and avoid a\n copy (if the input is already a numpy array or a scipy.sparse\n CSC matrix and if axis is 1).\n\nReturns\n-------\nX_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The transformed data.\n\nNotes\n-----\nThis implementation will refuse to center scipy.sparse matrices\nsince it would make them non-sparse and would potentially crash the\nprogram with memory exhaustion problems.\n\nInstead the caller is expected to either set explicitly\n`with_mean=False` (in that case, only variance scaling will be\nperformed on the features of the CSC matrix) or to call `X.toarray()`\nif he/she expects the materialized dense array to fit in memory.\n\nTo avoid memory copy the caller should pass a CSC matrix.\n\nNaNs are treated as missing values: disregarded to compute the statistics,\nand maintained during the data transformation.\n\nWe use a biased estimator for the standard deviation, equivalent to\n`numpy.std(x, ddof=0)`. Note that the choice of `ddof` is unlikely to\naffect model performance.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n`.\n\n.. warning:: Risk of data leak\n\n Do not use :func:`~sklearn.preprocessing.scale` unless you know\n what you are doing. A common mistake is to apply it to the entire data\n *before* splitting into training and test sets. This will bias the\n model evaluation because information would have leaked from the test\n set to the training set.\n In general, we recommend using\n :class:`~sklearn.preprocessing.StandardScaler` within a\n :ref:`Pipeline ` in order to prevent most risks of data\n leaking: `pipe = make_pipeline(StandardScaler(), LogisticRegression())`.\n\nSee Also\n--------\nStandardScaler : Performs scaling to unit variance using the Transformer\n API (e.g. as part of a preprocessing\n :class:`~sklearn.pipeline.Pipeline`).", + "code": "@_deprecate_positional_args\ndef scale(X, *, axis=0, with_mean=True, with_std=True, copy=True):\n \"\"\"Standardize a dataset along any axis.\n\n Center to the mean and component wise scale to unit variance.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to center and scale.\n\n axis : int, default=0\n axis used to compute the means and standard deviations along. If 0,\n independently standardize each feature, otherwise (if 1) standardize\n each sample.\n\n with_mean : bool, default=True\n If True, center the data before scaling.\n\n with_std : bool, default=True\n If True, scale the data to unit variance (or equivalently,\n unit standard deviation).\n\n copy : bool, default=True\n set to False to perform inplace row normalization and avoid a\n copy (if the input is already a numpy array or a scipy.sparse\n CSC matrix and if axis is 1).\n\n Returns\n -------\n X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The transformed data.\n\n Notes\n -----\n This implementation will refuse to center scipy.sparse matrices\n since it would make them non-sparse and would potentially crash the\n program with memory exhaustion problems.\n\n Instead the caller is expected to either set explicitly\n `with_mean=False` (in that case, only variance scaling will be\n performed on the features of the CSC matrix) or to call `X.toarray()`\n if he/she expects the materialized dense array to fit in memory.\n\n To avoid memory copy the caller should pass a CSC matrix.\n\n NaNs are treated as missing values: disregarded to compute the statistics,\n and maintained during the data transformation.\n\n We use a biased estimator for the standard deviation, equivalent to\n `numpy.std(x, ddof=0)`. Note that the choice of `ddof` is unlikely to\n affect model performance.\n\n For a comparison of the different scalers, transformers, and normalizers,\n see :ref:`examples/preprocessing/plot_all_scaling.py\n `.\n\n .. warning:: Risk of data leak\n\n Do not use :func:`~sklearn.preprocessing.scale` unless you know\n what you are doing. A common mistake is to apply it to the entire data\n *before* splitting into training and test sets. This will bias the\n model evaluation because information would have leaked from the test\n set to the training set.\n In general, we recommend using\n :class:`~sklearn.preprocessing.StandardScaler` within a\n :ref:`Pipeline ` in order to prevent most risks of data\n leaking: `pipe = make_pipeline(StandardScaler(), LogisticRegression())`.\n\n See Also\n --------\n StandardScaler : Performs scaling to unit variance using the Transformer\n API (e.g. as part of a preprocessing\n :class:`~sklearn.pipeline.Pipeline`).\n\n \"\"\" # noqa\n X = check_array(X, accept_sparse='csc', copy=copy, ensure_2d=False,\n estimator='the scale function', dtype=FLOAT_DTYPES,\n force_all_finite='allow-nan')\n if sparse.issparse(X):\n if with_mean:\n raise ValueError(\n \"Cannot center sparse matrices: pass `with_mean=False` instead\"\n \" See docstring for motivation and alternatives.\")\n if axis != 0:\n raise ValueError(\"Can only scale sparse matrix on axis=0, \"\n \" got axis=%d\" % axis)\n if with_std:\n _, var = mean_variance_axis(X, axis=0)\n var = _handle_zeros_in_scale(var, copy=False)\n inplace_column_scale(X, 1 / np.sqrt(var))\n else:\n X = np.asarray(X)\n if with_mean:\n mean_ = np.nanmean(X, axis)\n if with_std:\n scale_ = np.nanstd(X, axis)\n # Xr is a view on the original array that enables easy use of\n # broadcasting on the axis in which we are interested in\n Xr = np.rollaxis(X, axis)\n if with_mean:\n Xr -= mean_\n mean_1 = np.nanmean(Xr, axis=0)\n # Verify that mean_1 is 'close to zero'. If X contains very\n # large values, mean_1 can also be very large, due to a lack of\n # precision of mean_. In this case, a pre-scaling of the\n # concerned feature is efficient, for instance by its mean or\n # maximum.\n if not np.allclose(mean_1, 0):\n warnings.warn(\"Numerical issues were encountered \"\n \"when centering the data \"\n \"and might not be solved. Dataset may \"\n \"contain too large values. You may need \"\n \"to prescale your features.\")\n Xr -= mean_1\n if with_std:\n scale_ = _handle_zeros_in_scale(scale_, copy=False)\n Xr /= scale_\n if with_mean:\n mean_2 = np.nanmean(Xr, axis=0)\n # If mean_2 is not 'close to zero', it comes from the fact that\n # scale_ is very small so that mean_2 = mean_1/scale_ > 0, even\n # if mean_1 was close to zero. The problem is thus essentially\n # due to the lack of precision of mean_. A solution is then to\n # subtract the mean again:\n if not np.allclose(mean_2, 0):\n warnings.warn(\"Numerical issues were encountered \"\n \"when scaling the data \"\n \"and might not be solved. The standard \"\n \"deviation of the data is probably \"\n \"very close to 0. \")\n Xr -= mean_2\n return X" + }, + { + "id": "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer/__init__", + "name": "__init__", + "qname": "sklearn.preprocessing._discretization.KBinsDiscretizer.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer/__init__/self", + "name": "self", + "qname": "sklearn.preprocessing._discretization.KBinsDiscretizer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer/__init__/n_bins", + "name": "n_bins", + "qname": "sklearn.preprocessing._discretization.KBinsDiscretizer.__init__.n_bins", + "default_value": "5", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or array-like of shape (n_features,)", + "default_value": "5", + "description": "The number of bins to produce. Raises ValueError if ``n_bins < 2``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "array-like of shape (n_features,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer/__init__/encode", + "name": "encode", + "qname": "sklearn.preprocessing._discretization.KBinsDiscretizer.__init__.encode", + "default_value": "'onehot'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'onehot', 'onehot-dense', 'ordinal'}", + "default_value": "'onehot'", + "description": "Method used to encode the transformed result.\n\nonehot\n Encode the transformed result with one-hot encoding\n and return a sparse matrix. Ignored features are always\n stacked to the right.\nonehot-dense\n Encode the transformed result with one-hot encoding\n and return a dense array. Ignored features are always\n stacked to the right.\nordinal\n Return the bin identifier encoded as an integer value." + }, + "type": { + "kind": "EnumType", + "values": ["ordinal", "onehot-dense", "onehot"] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer/__init__/strategy", + "name": "strategy", + "qname": "sklearn.preprocessing._discretization.KBinsDiscretizer.__init__.strategy", + "default_value": "'quantile'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'uniform', 'quantile', 'kmeans'}", + "default_value": "'quantile'", + "description": "Strategy used to define the widths of the bins.\n\nuniform\n All bins in each feature have identical widths.\nquantile\n All bins in each feature have the same number of points.\nkmeans\n Values in each bin have the same nearest center of a 1D k-means\n cluster." + }, + "type": { + "kind": "EnumType", + "values": ["kmeans", "quantile", "uniform"] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer/__init__/dtype", + "name": "dtype", + "qname": "sklearn.preprocessing._discretization.KBinsDiscretizer.__init__.dtype", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{np.float32, np.float64}", + "default_value": "None", + "description": "The desired data-type for the output. If None, output dtype is\nconsistent with input dtype. Only np.float32 and np.float64 are\nsupported.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "EnumType", + "values": [] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Bin continuous data into intervals.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_bins=5, *, encode='onehot', strategy='quantile',\n dtype=None):\n self.n_bins = n_bins\n self.encode = encode\n self.strategy = strategy\n self.dtype = dtype" + }, + { + "id": "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer/_validate_n_bins", + "name": "_validate_n_bins", + "qname": "sklearn.preprocessing._discretization.KBinsDiscretizer._validate_n_bins", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer/_validate_n_bins/self", + "name": "self", + "qname": "sklearn.preprocessing._discretization.KBinsDiscretizer._validate_n_bins.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer/_validate_n_bins/n_features", + "name": "n_features", + "qname": "sklearn.preprocessing._discretization.KBinsDiscretizer._validate_n_bins.n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns n_bins_, the number of bins per feature.", + "docstring": "Returns n_bins_, the number of bins per feature.\n ", + "code": " def _validate_n_bins(self, n_features):\n \"\"\"Returns n_bins_, the number of bins per feature.\n \"\"\"\n orig_bins = self.n_bins\n if isinstance(orig_bins, numbers.Number):\n if not isinstance(orig_bins, numbers.Integral):\n raise ValueError(\"{} received an invalid n_bins type. \"\n \"Received {}, expected int.\"\n .format(KBinsDiscretizer.__name__,\n type(orig_bins).__name__))\n if orig_bins < 2:\n raise ValueError(\"{} received an invalid number \"\n \"of bins. Received {}, expected at least 2.\"\n .format(KBinsDiscretizer.__name__, orig_bins))\n return np.full(n_features, orig_bins, dtype=int)\n\n n_bins = check_array(orig_bins, dtype=int, copy=True,\n ensure_2d=False)\n\n if n_bins.ndim > 1 or n_bins.shape[0] != n_features:\n raise ValueError(\"n_bins must be a scalar or array \"\n \"of shape (n_features,).\")\n\n bad_nbins_value = (n_bins < 2) | (n_bins != orig_bins)\n\n violating_indices = np.where(bad_nbins_value)[0]\n if violating_indices.shape[0] > 0:\n indices = \", \".join(str(i) for i in violating_indices)\n raise ValueError(\"{} received an invalid number \"\n \"of bins at indices {}. Number of bins \"\n \"must be at least 2, and must be an int.\"\n .format(KBinsDiscretizer.__name__, indices))\n return n_bins" + }, + { + "id": "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer/fit", + "name": "fit", + "qname": "sklearn.preprocessing._discretization.KBinsDiscretizer.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer/fit/self", + "name": "self", + "qname": "sklearn.preprocessing._discretization.KBinsDiscretizer.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer/fit/X", + "name": "X", + "qname": "sklearn.preprocessing._discretization.KBinsDiscretizer.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Data to be discretized." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer/fit/y", + "name": "y", + "qname": "sklearn.preprocessing._discretization.KBinsDiscretizer.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None", + "default_value": "", + "description": "Ignored. This parameter exists only for compatibility with\n:class:`~sklearn.pipeline.Pipeline`." + }, + "type": { + "kind": "NamedType", + "name": "None" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the estimator.", + "docstring": "Fit the estimator.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data to be discretized.\n\ny : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\nReturns\n-------\nself", + "code": " def fit(self, X, y=None):\n \"\"\"\n Fit the estimator.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Data to be discretized.\n\n y : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\n Returns\n -------\n self\n \"\"\"\n X = self._validate_data(X, dtype='numeric')\n\n supported_dtype = (np.float64, np.float32)\n if self.dtype in supported_dtype:\n output_dtype = self.dtype\n elif self.dtype is None:\n output_dtype = X.dtype\n else:\n raise ValueError(\n f\"Valid options for 'dtype' are \"\n f\"{supported_dtype + (None,)}. Got dtype={self.dtype} \"\n f\" instead.\"\n )\n\n valid_encode = ('onehot', 'onehot-dense', 'ordinal')\n if self.encode not in valid_encode:\n raise ValueError(\"Valid options for 'encode' are {}. \"\n \"Got encode={!r} instead.\"\n .format(valid_encode, self.encode))\n valid_strategy = ('uniform', 'quantile', 'kmeans')\n if self.strategy not in valid_strategy:\n raise ValueError(\"Valid options for 'strategy' are {}. \"\n \"Got strategy={!r} instead.\"\n .format(valid_strategy, self.strategy))\n\n n_features = X.shape[1]\n n_bins = self._validate_n_bins(n_features)\n\n bin_edges = np.zeros(n_features, dtype=object)\n for jj in range(n_features):\n column = X[:, jj]\n col_min, col_max = column.min(), column.max()\n\n if col_min == col_max:\n warnings.warn(\"Feature %d is constant and will be \"\n \"replaced with 0.\" % jj)\n n_bins[jj] = 1\n bin_edges[jj] = np.array([-np.inf, np.inf])\n continue\n\n if self.strategy == 'uniform':\n bin_edges[jj] = np.linspace(col_min, col_max, n_bins[jj] + 1)\n\n elif self.strategy == 'quantile':\n quantiles = np.linspace(0, 100, n_bins[jj] + 1)\n bin_edges[jj] = np.asarray(np.percentile(column, quantiles))\n\n elif self.strategy == 'kmeans':\n from ..cluster import KMeans # fixes import loops\n\n # Deterministic initialization with uniform spacing\n uniform_edges = np.linspace(col_min, col_max, n_bins[jj] + 1)\n init = (uniform_edges[1:] + uniform_edges[:-1])[:, None] * 0.5\n\n # 1D k-means procedure\n km = KMeans(n_clusters=n_bins[jj], init=init, n_init=1)\n centers = km.fit(column[:, None]).cluster_centers_[:, 0]\n # Must sort, centers may be unsorted even with sorted init\n centers.sort()\n bin_edges[jj] = (centers[1:] + centers[:-1]) * 0.5\n bin_edges[jj] = np.r_[col_min, bin_edges[jj], col_max]\n\n # Remove bins whose width are too small (i.e., <= 1e-8)\n if self.strategy in ('quantile', 'kmeans'):\n mask = np.ediff1d(bin_edges[jj], to_begin=np.inf) > 1e-8\n bin_edges[jj] = bin_edges[jj][mask]\n if len(bin_edges[jj]) - 1 != n_bins[jj]:\n warnings.warn('Bins whose width are too small (i.e., <= '\n '1e-8) in feature %d are removed. Consider '\n 'decreasing the number of bins.' % jj)\n n_bins[jj] = len(bin_edges[jj]) - 1\n\n self.bin_edges_ = bin_edges\n self.n_bins_ = n_bins\n\n if 'onehot' in self.encode:\n self._encoder = OneHotEncoder(\n categories=[np.arange(i) for i in self.n_bins_],\n sparse=self.encode == 'onehot',\n dtype=output_dtype)\n # Fit the OneHotEncoder with toy datasets\n # so that it's ready for use after the KBinsDiscretizer is fitted\n self._encoder.fit(np.zeros((1, len(self.n_bins_))))\n\n return self" + }, + { + "id": "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.preprocessing._discretization.KBinsDiscretizer.inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer/inverse_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._discretization.KBinsDiscretizer.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer/inverse_transform/Xt", + "name": "Xt", + "qname": "sklearn.preprocessing._discretization.KBinsDiscretizer.inverse_transform.Xt", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Transformed data in the binned space." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform discretized data back to original feature space.\n\nNote that this function does not regenerate the original data\ndue to discretization rounding.", + "docstring": "Transform discretized data back to original feature space.\n\nNote that this function does not regenerate the original data\ndue to discretization rounding.\n\nParameters\n----------\nXt : array-like of shape (n_samples, n_features)\n Transformed data in the binned space.\n\nReturns\n-------\nXinv : ndarray, dtype={np.float32, np.float64}\n Data in the original feature space.", + "code": " def inverse_transform(self, Xt):\n \"\"\"\n Transform discretized data back to original feature space.\n\n Note that this function does not regenerate the original data\n due to discretization rounding.\n\n Parameters\n ----------\n Xt : array-like of shape (n_samples, n_features)\n Transformed data in the binned space.\n\n Returns\n -------\n Xinv : ndarray, dtype={np.float32, np.float64}\n Data in the original feature space.\n \"\"\"\n check_is_fitted(self)\n\n if 'onehot' in self.encode:\n Xt = self._encoder.inverse_transform(Xt)\n\n Xinv = check_array(Xt, copy=True, dtype=(np.float64, np.float32))\n n_features = self.n_bins_.shape[0]\n if Xinv.shape[1] != n_features:\n raise ValueError(\"Incorrect number of features. Expecting {}, \"\n \"received {}.\".format(n_features, Xinv.shape[1]))\n\n for jj in range(n_features):\n bin_edges = self.bin_edges_[jj]\n bin_centers = (bin_edges[1:] + bin_edges[:-1]) * 0.5\n Xinv[:, jj] = bin_centers[np.int_(Xinv[:, jj])]\n\n return Xinv" + }, + { + "id": "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer/transform", + "name": "transform", + "qname": "sklearn.preprocessing._discretization.KBinsDiscretizer.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer/transform/self", + "name": "self", + "qname": "sklearn.preprocessing._discretization.KBinsDiscretizer.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._discretization/KBinsDiscretizer/transform/X", + "name": "X", + "qname": "sklearn.preprocessing._discretization.KBinsDiscretizer.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Data to be discretized." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Discretize the data.", + "docstring": "Discretize the data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data to be discretized.\n\nReturns\n-------\nXt : {ndarray, sparse matrix}, dtype={np.float32, np.float64}\n Data in the binned space. Will be a sparse matrix if\n `self.encode='onehot'` and ndarray otherwise.", + "code": " def transform(self, X):\n \"\"\"\n Discretize the data.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Data to be discretized.\n\n Returns\n -------\n Xt : {ndarray, sparse matrix}, dtype={np.float32, np.float64}\n Data in the binned space. Will be a sparse matrix if\n `self.encode='onehot'` and ndarray otherwise.\n \"\"\"\n check_is_fitted(self)\n\n # check input and attribute dtypes\n dtype = (np.float64, np.float32) if self.dtype is None else self.dtype\n Xt = self._validate_data(X, copy=True, dtype=dtype, reset=False)\n\n bin_edges = self.bin_edges_\n for jj in range(Xt.shape[1]):\n # Values which are close to a bin edge are susceptible to numeric\n # instability. Add eps to X so these values are binned correctly\n # with respect to their decimal truncation. See documentation of\n # numpy.isclose for an explanation of ``rtol`` and ``atol``.\n rtol = 1.e-5\n atol = 1.e-8\n eps = atol + rtol * np.abs(Xt[:, jj])\n Xt[:, jj] = np.digitize(Xt[:, jj] + eps, bin_edges[jj][1:])\n np.clip(Xt, 0, self.n_bins_ - 1, out=Xt)\n\n if self.encode == 'ordinal':\n return Xt\n\n dtype_init = None\n if 'onehot' in self.encode:\n dtype_init = self._encoder.dtype\n self._encoder.dtype = Xt.dtype\n try:\n Xt_enc = self._encoder.transform(Xt)\n finally:\n # revert the initial dtype to avoid modifying self.\n self._encoder.dtype = dtype_init\n return Xt_enc" + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/__init__", + "name": "__init__", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/__init__/self", + "name": "self", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/__init__/categories", + "name": "categories", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder.__init__.categories", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'auto' or a list of array-like", + "default_value": "'auto'", + "description": "Categories (unique values) per feature:\n\n- 'auto' : Determine categories automatically from the training data.\n- list : ``categories[i]`` holds the categories expected in the ith\n column. The passed categories should not mix strings and numeric\n values within a single feature, and should be sorted in case of\n numeric values.\n\nThe used categories can be found in the ``categories_`` attribute.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "a list of array-like" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/__init__/drop", + "name": "drop", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder.__init__.drop", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'first', 'if_binary'} or a array-like of shape (n_features,)", + "default_value": "None", + "description": "Specifies a methodology to use to drop one of the categories per\nfeature. This is useful in situations where perfectly collinear\nfeatures cause problems, such as when feeding the resulting data\ninto a neural network or an unregularized regression.\n\nHowever, dropping one category breaks the symmetry of the original\nrepresentation and can therefore induce a bias in downstream models,\nfor instance for penalized linear classification or regression models.\n\n- None : retain all features (the default).\n- 'first' : drop the first category in each feature. If only one\n category is present, the feature will be dropped entirely.\n- 'if_binary' : drop the first category in each feature with two\n categories. Features with 1 or more than 2 categories are\n left intact.\n- array : ``drop[i]`` is the category in feature ``X[:, i]`` that\n should be dropped.\n\n.. versionadded:: 0.21\n The parameter `drop` was added in 0.21.\n\n.. versionchanged:: 0.23\n The option `drop='if_binary'` was added in 0.23." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["if_binary", "first"] + }, + { + "kind": "NamedType", + "name": "a array-like of shape (n_features,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/__init__/sparse", + "name": "sparse", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder.__init__.sparse", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Will return sparse matrix if set True else will return an array." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/__init__/dtype", + "name": "dtype", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder.__init__.dtype", + "default_value": "np.float64", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "number type", + "default_value": "float", + "description": "Desired dtype of output." + }, + "type": { + "kind": "NamedType", + "name": "number type" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/__init__/handle_unknown", + "name": "handle_unknown", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder.__init__.handle_unknown", + "default_value": "'error'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'error', 'ignore'}", + "default_value": "'error'", + "description": "Whether to raise an error or ignore if an unknown categorical feature\nis present during transform (default is to raise). When this parameter\nis set to 'ignore' and an unknown category is encountered during\ntransform, the resulting one-hot encoded columns for this feature\nwill be all zeros. In the inverse transform, an unknown category\nwill be denoted as None." + }, + "type": { + "kind": "EnumType", + "values": ["error", "ignore"] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Encode categorical features as a one-hot numeric array.\n\nThe input to this transformer should be an array-like of integers or\nstrings, denoting the values taken on by categorical (discrete) features.\nThe features are encoded using a one-hot (aka 'one-of-K' or 'dummy')\nencoding scheme. This creates a binary column for each category and\nreturns a sparse matrix or dense array (depending on the ``sparse``\nparameter)\n\nBy default, the encoder derives the categories based on the unique values\nin each feature. Alternatively, you can also specify the `categories`\nmanually.\n\nThis encoding is needed for feeding categorical data to many scikit-learn\nestimators, notably linear models and SVMs with the standard kernels.\n\nNote: a one-hot encoding of y labels should use a LabelBinarizer\ninstead.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, categories='auto', drop=None, sparse=True,\n dtype=np.float64, handle_unknown='error'):\n self.categories = categories\n self.sparse = sparse\n self.dtype = dtype\n self.handle_unknown = handle_unknown\n self.drop = drop" + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/_compute_drop_idx", + "name": "_compute_drop_idx", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder._compute_drop_idx", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/_compute_drop_idx/self", + "name": "self", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder._compute_drop_idx.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _compute_drop_idx(self):\n if self.drop is None:\n return None\n elif isinstance(self.drop, str):\n if self.drop == 'first':\n return np.zeros(len(self.categories_), dtype=object)\n elif self.drop == 'if_binary':\n return np.array([0 if len(cats) == 2 else None\n for cats in self.categories_], dtype=object)\n else:\n msg = (\n \"Wrong input for parameter `drop`. Expected \"\n \"'first', 'if_binary', None or array of objects, got {}\"\n )\n raise ValueError(msg.format(type(self.drop)))\n\n else:\n try:\n drop_array = np.asarray(self.drop, dtype=object)\n droplen = len(drop_array)\n except (ValueError, TypeError):\n msg = (\n \"Wrong input for parameter `drop`. Expected \"\n \"'first', 'if_binary', None or array of objects, got {}\"\n )\n raise ValueError(msg.format(type(drop_array)))\n if droplen != len(self.categories_):\n msg = (\"`drop` should have length equal to the number \"\n \"of features ({}), got {}\")\n raise ValueError(msg.format(len(self.categories_), droplen))\n missing_drops = []\n drop_indices = []\n for col_idx, (val, cat_list) in enumerate(zip(drop_array,\n self.categories_)):\n if not is_scalar_nan(val):\n drop_idx = np.where(cat_list == val)[0]\n if drop_idx.size: # found drop idx\n drop_indices.append(drop_idx[0])\n else:\n missing_drops.append((col_idx, val))\n continue\n\n # val is nan, find nan in categories manually\n for cat_idx, cat in enumerate(cat_list):\n if is_scalar_nan(cat):\n drop_indices.append(cat_idx)\n break\n else: # loop did not break thus drop is missing\n missing_drops.append((col_idx, val))\n\n if any(missing_drops):\n msg = (\"The following categories were supposed to be \"\n \"dropped, but were not found in the training \"\n \"data.\\n{}\".format(\n \"\\n\".join(\n [\"Category: {}, Feature: {}\".format(c, v)\n for c, v in missing_drops])))\n raise ValueError(msg)\n return np.array(drop_indices, dtype=object)" + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/_validate_keywords", + "name": "_validate_keywords", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder._validate_keywords", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/_validate_keywords/self", + "name": "self", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder._validate_keywords.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _validate_keywords(self):\n if self.handle_unknown not in ('error', 'ignore'):\n msg = (\"handle_unknown should be either 'error' or 'ignore', \"\n \"got {0}.\".format(self.handle_unknown))\n raise ValueError(msg)\n # If we have both dropped columns and ignored unknown\n # values, there will be ambiguous cells. This creates difficulties\n # in interpreting the model.\n if self.drop is not None and self.handle_unknown != 'error':\n raise ValueError(\n \"`handle_unknown` must be 'error' when the drop parameter is \"\n \"specified, as both would create categories that are all \"\n \"zero.\")" + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/fit", + "name": "fit", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/fit/self", + "name": "self", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/fit/X", + "name": "X", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to determine the categories of each feature." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/fit/y", + "name": "y", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None", + "default_value": "", + "description": "Ignored. This parameter exists only for compatibility with\n:class:`~sklearn.pipeline.Pipeline`." + }, + "type": { + "kind": "NamedType", + "name": "None" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit OneHotEncoder to X.", + "docstring": "Fit OneHotEncoder to X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data to determine the categories of each feature.\n\ny : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\nReturns\n-------\nself", + "code": " def fit(self, X, y=None):\n \"\"\"\n Fit OneHotEncoder to X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data to determine the categories of each feature.\n\n y : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\n Returns\n -------\n self\n \"\"\"\n self._validate_keywords()\n self._fit(X, handle_unknown=self.handle_unknown,\n force_all_finite='allow-nan')\n self.drop_idx_ = self._compute_drop_idx()\n return self" + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/fit_transform", + "name": "fit_transform", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/fit_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/fit_transform/X", + "name": "X", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to encode." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/fit_transform/y", + "name": "y", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None", + "default_value": "", + "description": "Ignored. This parameter exists only for compatibility with\n:class:`~sklearn.pipeline.Pipeline`." + }, + "type": { + "kind": "NamedType", + "name": "None" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit OneHotEncoder to X, then transform X.\n\nEquivalent to fit(X).transform(X) but more convenient.", + "docstring": "Fit OneHotEncoder to X, then transform X.\n\nEquivalent to fit(X).transform(X) but more convenient.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data to encode.\n\ny : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\nReturns\n-------\nX_out : {ndarray, sparse matrix} of shape (n_samples, n_encoded_features)\n Transformed input. If `sparse=True`, a sparse matrix will be\n returned.", + "code": " def fit_transform(self, X, y=None):\n \"\"\"\n Fit OneHotEncoder to X, then transform X.\n\n Equivalent to fit(X).transform(X) but more convenient.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data to encode.\n\n y : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\n Returns\n -------\n X_out : {ndarray, sparse matrix} of shape \\\n (n_samples, n_encoded_features)\n Transformed input. If `sparse=True`, a sparse matrix will be\n returned.\n \"\"\"\n self._validate_keywords()\n return super().fit_transform(X, y)" + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/get_feature_names", + "name": "get_feature_names", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder.get_feature_names", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/get_feature_names/self", + "name": "self", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder.get_feature_names.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/get_feature_names/input_features", + "name": "input_features", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder.get_feature_names.input_features", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list of str of shape (n_features,)", + "default_value": "", + "description": "String names for input features if available. By default,\n\"x0\", \"x1\", ... \"xn_features\" is used." + }, + "type": { + "kind": "NamedType", + "name": "list of str of shape (n_features,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return feature names for output features.", + "docstring": "Return feature names for output features.\n\nParameters\n----------\ninput_features : list of str of shape (n_features,)\n String names for input features if available. By default,\n \"x0\", \"x1\", ... \"xn_features\" is used.\n\nReturns\n-------\noutput_feature_names : ndarray of shape (n_output_features,)\n Array of feature names.", + "code": " def get_feature_names(self, input_features=None):\n \"\"\"\n Return feature names for output features.\n\n Parameters\n ----------\n input_features : list of str of shape (n_features,)\n String names for input features if available. By default,\n \"x0\", \"x1\", ... \"xn_features\" is used.\n\n Returns\n -------\n output_feature_names : ndarray of shape (n_output_features,)\n Array of feature names.\n \"\"\"\n check_is_fitted(self)\n cats = self.categories_\n if input_features is None:\n input_features = ['x%d' % i for i in range(len(cats))]\n elif len(input_features) != len(self.categories_):\n raise ValueError(\n \"input_features should have length equal to number of \"\n \"features ({}), got {}\".format(len(self.categories_),\n len(input_features)))\n\n feature_names = []\n for i in range(len(cats)):\n names = [\n input_features[i] + '_' + str(t) for t in cats[i]]\n if self.drop_idx_ is not None and self.drop_idx_[i] is not None:\n names.pop(self.drop_idx_[i])\n feature_names.extend(names)\n\n return np.array(feature_names, dtype=object)" + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder.inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/inverse_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/inverse_transform/X", + "name": "X", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder.inverse_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_encoded_features)", + "default_value": "", + "description": "The transformed data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_encoded_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Convert the data back to the original representation.\n\nIn case unknown categories are encountered (all zeros in the\none-hot encoding), ``None`` is used to represent this category.", + "docstring": "Convert the data back to the original representation.\n\nIn case unknown categories are encountered (all zeros in the\none-hot encoding), ``None`` is used to represent this category.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_encoded_features)\n The transformed data.\n\nReturns\n-------\nX_tr : ndarray of shape (n_samples, n_features)\n Inverse transformed array.", + "code": " def inverse_transform(self, X):\n \"\"\"\n Convert the data back to the original representation.\n\n In case unknown categories are encountered (all zeros in the\n one-hot encoding), ``None`` is used to represent this category.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape \\\n (n_samples, n_encoded_features)\n The transformed data.\n\n Returns\n -------\n X_tr : ndarray of shape (n_samples, n_features)\n Inverse transformed array.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, accept_sparse='csr')\n\n n_samples, _ = X.shape\n n_features = len(self.categories_)\n if self.drop_idx_ is None:\n n_transformed_features = sum(len(cats)\n for cats in self.categories_)\n else:\n n_transformed_features = sum(\n len(cats) - 1 if to_drop is not None else len(cats)\n for cats, to_drop in zip(self.categories_, self.drop_idx_)\n )\n\n # validate shape of passed X\n msg = (\"Shape of the passed X data is not correct. Expected {0} \"\n \"columns, got {1}.\")\n if X.shape[1] != n_transformed_features:\n raise ValueError(msg.format(n_transformed_features, X.shape[1]))\n\n # create resulting array of appropriate dtype\n dt = np.find_common_type([cat.dtype for cat in self.categories_], [])\n X_tr = np.empty((n_samples, n_features), dtype=dt)\n\n j = 0\n found_unknown = {}\n\n for i in range(n_features):\n if self.drop_idx_ is None or self.drop_idx_[i] is None:\n cats = self.categories_[i]\n else:\n cats = np.delete(self.categories_[i], self.drop_idx_[i])\n n_categories = len(cats)\n\n # Only happens if there was a column with a unique\n # category. In this case we just fill the column with this\n # unique category value.\n if n_categories == 0:\n X_tr[:, i] = self.categories_[i][self.drop_idx_[i]]\n j += n_categories\n continue\n sub = X[:, j:j + n_categories]\n # for sparse X argmax returns 2D matrix, ensure 1D array\n labels = np.asarray(sub.argmax(axis=1)).flatten()\n X_tr[:, i] = cats[labels]\n if self.handle_unknown == 'ignore':\n unknown = np.asarray(sub.sum(axis=1) == 0).flatten()\n # ignored unknown categories: we have a row of all zero\n if unknown.any():\n found_unknown[i] = unknown\n else:\n dropped = np.asarray(sub.sum(axis=1) == 0).flatten()\n if dropped.any():\n if self.drop_idx_ is None:\n all_zero_samples = np.flatnonzero(dropped)\n raise ValueError(\n f\"Samples {all_zero_samples} can not be inverted \"\n \"when drop=None and handle_unknown='error' \"\n \"because they contain all zeros\")\n # we can safely assume that all of the nulls in each column\n # are the dropped value\n X_tr[dropped, i] = self.categories_[i][\n self.drop_idx_[i]\n ]\n\n j += n_categories\n\n # if ignored are found: potentially need to upcast result to\n # insert None values\n if found_unknown:\n if X_tr.dtype != object:\n X_tr = X_tr.astype(object)\n\n for idx, mask in found_unknown.items():\n X_tr[mask, idx] = None\n\n return X_tr" + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/transform", + "name": "transform", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/transform/self", + "name": "self", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OneHotEncoder/transform/X", + "name": "X", + "qname": "sklearn.preprocessing._encoders.OneHotEncoder.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to encode." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform X using one-hot encoding.", + "docstring": "Transform X using one-hot encoding.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data to encode.\n\nReturns\n-------\nX_out : {ndarray, sparse matrix} of shape (n_samples, n_encoded_features)\n Transformed input. If `sparse=True`, a sparse matrix will be\n returned.", + "code": " def transform(self, X):\n \"\"\"\n Transform X using one-hot encoding.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data to encode.\n\n Returns\n -------\n X_out : {ndarray, sparse matrix} of shape \\\n (n_samples, n_encoded_features)\n Transformed input. If `sparse=True`, a sparse matrix will be\n returned.\n \"\"\"\n check_is_fitted(self)\n # validation of X happens in _check_X called by _transform\n X_int, X_mask = self._transform(X, handle_unknown=self.handle_unknown,\n force_all_finite='allow-nan')\n\n n_samples, n_features = X_int.shape\n\n if self.drop_idx_ is not None:\n to_drop = self.drop_idx_.copy()\n # We remove all the dropped categories from mask, and decrement all\n # categories that occur after them to avoid an empty column.\n keep_cells = X_int != to_drop\n n_values = []\n for i, cats in enumerate(self.categories_):\n n_cats = len(cats)\n\n # drop='if_binary' but feature isn't binary\n if to_drop[i] is None:\n # set to cardinality to not drop from X_int\n to_drop[i] = n_cats\n n_values.append(n_cats)\n else: # dropped\n n_values.append(n_cats - 1)\n\n to_drop = to_drop.reshape(1, -1)\n X_int[X_int > to_drop] -= 1\n X_mask &= keep_cells\n else:\n n_values = [len(cats) for cats in self.categories_]\n\n mask = X_mask.ravel()\n feature_indices = np.cumsum([0] + n_values)\n indices = (X_int + feature_indices[:-1]).ravel()[mask]\n\n indptr = np.empty(n_samples + 1, dtype=int)\n indptr[0] = 0\n np.sum(X_mask, axis=1, out=indptr[1:])\n np.cumsum(indptr[1:], out=indptr[1:])\n data = np.ones(indptr[-1])\n\n out = sparse.csr_matrix((data, indices, indptr),\n shape=(n_samples, feature_indices[-1]),\n dtype=self.dtype)\n if not self.sparse:\n return out.toarray()\n else:\n return out" + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OrdinalEncoder/__init__", + "name": "__init__", + "qname": "sklearn.preprocessing._encoders.OrdinalEncoder.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OrdinalEncoder/__init__/self", + "name": "self", + "qname": "sklearn.preprocessing._encoders.OrdinalEncoder.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OrdinalEncoder/__init__/categories", + "name": "categories", + "qname": "sklearn.preprocessing._encoders.OrdinalEncoder.__init__.categories", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "'auto' or a list of array-like", + "default_value": "'auto'", + "description": "Categories (unique values) per feature:\n\n- 'auto' : Determine categories automatically from the training data.\n- list : ``categories[i]`` holds the categories expected in the ith\n column. The passed categories should not mix strings and numeric\n values, and should be sorted in case of numeric values.\n\nThe used categories can be found in the ``categories_`` attribute." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'auto'" + }, + { + "kind": "NamedType", + "name": "a list of array-like" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OrdinalEncoder/__init__/dtype", + "name": "dtype", + "qname": "sklearn.preprocessing._encoders.OrdinalEncoder.__init__.dtype", + "default_value": "np.float64", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "number type", + "default_value": "np.float64", + "description": "Desired dtype of output." + }, + "type": { + "kind": "NamedType", + "name": "number type" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OrdinalEncoder/__init__/handle_unknown", + "name": "handle_unknown", + "qname": "sklearn.preprocessing._encoders.OrdinalEncoder.__init__.handle_unknown", + "default_value": "'error'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'error', 'use_encoded_value'}", + "default_value": "'error'", + "description": "When set to 'error' an error will be raised in case an unknown\ncategorical feature is present during transform. When set to\n'use_encoded_value', the encoded value of unknown categories will be\nset to the value given for the parameter `unknown_value`. In\n:meth:`inverse_transform`, an unknown category will be denoted as None.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "EnumType", + "values": ["use_encoded_value", "error"] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OrdinalEncoder/__init__/unknown_value", + "name": "unknown_value", + "qname": "sklearn.preprocessing._encoders.OrdinalEncoder.__init__.unknown_value", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or np.nan", + "default_value": "None", + "description": "When the parameter handle_unknown is set to 'use_encoded_value', this\nparameter is required and will set the encoded value of unknown\ncategories. It has to be distinct from the values used to encode any of\nthe categories in `fit`. If set to np.nan, the `dtype` parameter must\nbe a float dtype.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "np.nan" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Encode categorical features as an integer array.\n\nThe input to this transformer should be an array-like of integers or\nstrings, denoting the values taken on by categorical (discrete) features.\nThe features are converted to ordinal integers. This results in\na single column of integers (0 to n_categories - 1) per feature.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, categories='auto', dtype=np.float64,\n handle_unknown='error', unknown_value=None):\n self.categories = categories\n self.dtype = dtype\n self.handle_unknown = handle_unknown\n self.unknown_value = unknown_value" + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OrdinalEncoder/fit", + "name": "fit", + "qname": "sklearn.preprocessing._encoders.OrdinalEncoder.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OrdinalEncoder/fit/self", + "name": "self", + "qname": "sklearn.preprocessing._encoders.OrdinalEncoder.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OrdinalEncoder/fit/X", + "name": "X", + "qname": "sklearn.preprocessing._encoders.OrdinalEncoder.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to determine the categories of each feature." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OrdinalEncoder/fit/y", + "name": "y", + "qname": "sklearn.preprocessing._encoders.OrdinalEncoder.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "None", + "default_value": "", + "description": "Ignored. This parameter exists only for compatibility with\n:class:`~sklearn.pipeline.Pipeline`." + }, + "type": { + "kind": "NamedType", + "name": "None" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the OrdinalEncoder to X.", + "docstring": "Fit the OrdinalEncoder to X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data to determine the categories of each feature.\n\ny : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\nReturns\n-------\nself", + "code": " def fit(self, X, y=None):\n \"\"\"\n Fit the OrdinalEncoder to X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data to determine the categories of each feature.\n\n y : None\n Ignored. This parameter exists only for compatibility with\n :class:`~sklearn.pipeline.Pipeline`.\n\n Returns\n -------\n self\n \"\"\"\n handle_unknown_strategies = (\"error\", \"use_encoded_value\")\n if self.handle_unknown not in handle_unknown_strategies:\n raise ValueError(\n f\"handle_unknown should be either 'error' or \"\n f\"'use_encoded_value', got {self.handle_unknown}.\"\n )\n\n if self.handle_unknown == 'use_encoded_value':\n if is_scalar_nan(self.unknown_value):\n if np.dtype(self.dtype).kind != 'f':\n raise ValueError(\n f\"When unknown_value is np.nan, the dtype \"\n \"parameter should be \"\n f\"a float dtype. Got {self.dtype}.\"\n )\n elif not isinstance(self.unknown_value, numbers.Integral):\n raise TypeError(f\"unknown_value should be an integer or \"\n f\"np.nan when \"\n f\"handle_unknown is 'use_encoded_value', \"\n f\"got {self.unknown_value}.\")\n elif self.unknown_value is not None:\n raise TypeError(f\"unknown_value should only be set when \"\n f\"handle_unknown is 'use_encoded_value', \"\n f\"got {self.unknown_value}.\")\n\n self._fit(X)\n\n if self.handle_unknown == 'use_encoded_value':\n for feature_cats in self.categories_:\n if 0 <= self.unknown_value < len(feature_cats):\n raise ValueError(f\"The used value for unknown_value \"\n f\"{self.unknown_value} is one of the \"\n f\"values already used for encoding the \"\n f\"seen categories.\")\n\n return self" + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OrdinalEncoder/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.preprocessing._encoders.OrdinalEncoder.inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OrdinalEncoder/inverse_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._encoders.OrdinalEncoder.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OrdinalEncoder/inverse_transform/X", + "name": "X", + "qname": "sklearn.preprocessing._encoders.OrdinalEncoder.inverse_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The transformed data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Convert the data back to the original representation.", + "docstring": "Convert the data back to the original representation.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The transformed data.\n\nReturns\n-------\nX_tr : ndarray of shape (n_samples, n_features)\n Inverse transformed array.", + "code": " def inverse_transform(self, X):\n \"\"\"\n Convert the data back to the original representation.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The transformed data.\n\n Returns\n -------\n X_tr : ndarray of shape (n_samples, n_features)\n Inverse transformed array.\n \"\"\"\n check_is_fitted(self)\n X = check_array(X, accept_sparse='csr')\n\n n_samples, _ = X.shape\n n_features = len(self.categories_)\n\n # validate shape of passed X\n msg = (\"Shape of the passed X data is not correct. Expected {0} \"\n \"columns, got {1}.\")\n if X.shape[1] != n_features:\n raise ValueError(msg.format(n_features, X.shape[1]))\n\n # create resulting array of appropriate dtype\n dt = np.find_common_type([cat.dtype for cat in self.categories_], [])\n X_tr = np.empty((n_samples, n_features), dtype=dt)\n\n found_unknown = {}\n\n for i in range(n_features):\n labels = X[:, i].astype('int64', copy=False)\n if self.handle_unknown == 'use_encoded_value':\n unknown_labels = labels == self.unknown_value\n X_tr[:, i] = self.categories_[i][np.where(\n unknown_labels, 0, labels)]\n found_unknown[i] = unknown_labels\n else:\n X_tr[:, i] = self.categories_[i][labels]\n\n # insert None values for unknown values\n if found_unknown:\n X_tr = X_tr.astype(object, copy=False)\n\n for idx, mask in found_unknown.items():\n X_tr[mask, idx] = None\n\n return X_tr" + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OrdinalEncoder/transform", + "name": "transform", + "qname": "sklearn.preprocessing._encoders.OrdinalEncoder.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OrdinalEncoder/transform/self", + "name": "self", + "qname": "sklearn.preprocessing._encoders.OrdinalEncoder.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/OrdinalEncoder/transform/X", + "name": "X", + "qname": "sklearn.preprocessing._encoders.OrdinalEncoder.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data to encode." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform X to ordinal codes.", + "docstring": "Transform X to ordinal codes.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data to encode.\n\nReturns\n-------\nX_out : ndarray of shape (n_samples, n_features)\n Transformed input.", + "code": " def transform(self, X):\n \"\"\"\n Transform X to ordinal codes.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data to encode.\n\n Returns\n -------\n X_out : ndarray of shape (n_samples, n_features)\n Transformed input.\n \"\"\"\n X_int, X_mask = self._transform(X, handle_unknown=self.handle_unknown)\n X_trans = X_int.astype(self.dtype, copy=False)\n\n # create separate category for unknown values\n if self.handle_unknown == 'use_encoded_value':\n X_trans[~X_mask] = self.unknown_value\n return X_trans" + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_check_X", + "name": "_check_X", + "qname": "sklearn.preprocessing._encoders._BaseEncoder._check_X", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_check_X/self", + "name": "self", + "qname": "sklearn.preprocessing._encoders._BaseEncoder._check_X.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_check_X/X", + "name": "X", + "qname": "sklearn.preprocessing._encoders._BaseEncoder._check_X.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_check_X/force_all_finite", + "name": "force_all_finite", + "qname": "sklearn.preprocessing._encoders._BaseEncoder._check_X.force_all_finite", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform custom check_array:\n- convert list of strings to object dtype\n- check for missing values for object dtype data (check_array does\n not do that)\n- return list of features (arrays): this list of features is\n constructed feature by feature to preserve the data types\n of pandas DataFrame columns, as otherwise information is lost\n and cannot be used, eg for the `categories_` attribute.", + "docstring": "Perform custom check_array:\n- convert list of strings to object dtype\n- check for missing values for object dtype data (check_array does\n not do that)\n- return list of features (arrays): this list of features is\n constructed feature by feature to preserve the data types\n of pandas DataFrame columns, as otherwise information is lost\n and cannot be used, eg for the `categories_` attribute.", + "code": " def _check_X(self, X, force_all_finite=True):\n \"\"\"\n Perform custom check_array:\n - convert list of strings to object dtype\n - check for missing values for object dtype data (check_array does\n not do that)\n - return list of features (arrays): this list of features is\n constructed feature by feature to preserve the data types\n of pandas DataFrame columns, as otherwise information is lost\n and cannot be used, eg for the `categories_` attribute.\n\n \"\"\"\n if not (hasattr(X, 'iloc') and getattr(X, 'ndim', 0) == 2):\n # if not a dataframe, do normal check_array validation\n X_temp = check_array(X, dtype=None,\n force_all_finite=force_all_finite)\n if (not hasattr(X, 'dtype')\n and np.issubdtype(X_temp.dtype, np.str_)):\n X = check_array(X, dtype=object,\n force_all_finite=force_all_finite)\n else:\n X = X_temp\n needs_validation = False\n else:\n # pandas dataframe, do validation later column by column, in order\n # to keep the dtype information to be used in the encoder.\n needs_validation = force_all_finite\n\n n_samples, n_features = X.shape\n X_columns = []\n\n for i in range(n_features):\n Xi = self._get_feature(X, feature_idx=i)\n Xi = check_array(Xi, ensure_2d=False, dtype=None,\n force_all_finite=needs_validation)\n X_columns.append(Xi)\n\n return X_columns, n_samples, n_features" + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_fit", + "name": "_fit", + "qname": "sklearn.preprocessing._encoders._BaseEncoder._fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_fit/self", + "name": "self", + "qname": "sklearn.preprocessing._encoders._BaseEncoder._fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_fit/X", + "name": "X", + "qname": "sklearn.preprocessing._encoders._BaseEncoder._fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_fit/handle_unknown", + "name": "handle_unknown", + "qname": "sklearn.preprocessing._encoders._BaseEncoder._fit.handle_unknown", + "default_value": "'error'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_fit/force_all_finite", + "name": "force_all_finite", + "qname": "sklearn.preprocessing._encoders._BaseEncoder._fit.force_all_finite", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _fit(self, X, handle_unknown='error', force_all_finite=True):\n X_list, n_samples, n_features = self._check_X(\n X, force_all_finite=force_all_finite)\n\n if self.categories != 'auto':\n if len(self.categories) != n_features:\n raise ValueError(\"Shape mismatch: if categories is an array,\"\n \" it has to be of shape (n_features,).\")\n\n self.categories_ = []\n\n for i in range(n_features):\n Xi = X_list[i]\n if self.categories == 'auto':\n cats = _unique(Xi)\n else:\n cats = np.array(self.categories[i], dtype=Xi.dtype)\n if Xi.dtype.kind not in 'OUS':\n sorted_cats = np.sort(cats)\n error_msg = (\"Unsorted categories are not \"\n \"supported for numerical categories\")\n # if there are nans, nan should be the last element\n stop_idx = -1 if np.isnan(sorted_cats[-1]) else None\n if (np.any(sorted_cats[:stop_idx] != cats[:stop_idx]) or\n (np.isnan(sorted_cats[-1]) and\n not np.isnan(sorted_cats[-1]))):\n raise ValueError(error_msg)\n\n if handle_unknown == 'error':\n diff = _check_unknown(Xi, cats)\n if diff:\n msg = (\"Found unknown categories {0} in column {1}\"\n \" during fit\".format(diff, i))\n raise ValueError(msg)\n self.categories_.append(cats)" + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_get_feature", + "name": "_get_feature", + "qname": "sklearn.preprocessing._encoders._BaseEncoder._get_feature", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_get_feature/self", + "name": "self", + "qname": "sklearn.preprocessing._encoders._BaseEncoder._get_feature.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_get_feature/X", + "name": "X", + "qname": "sklearn.preprocessing._encoders._BaseEncoder._get_feature.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_get_feature/feature_idx", + "name": "feature_idx", + "qname": "sklearn.preprocessing._encoders._BaseEncoder._get_feature.feature_idx", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_feature(self, X, feature_idx):\n if hasattr(X, 'iloc'):\n # pandas dataframes\n return X.iloc[:, feature_idx]\n # numpy arrays, sparse arrays\n return X[:, feature_idx]" + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_more_tags", + "name": "_more_tags", + "qname": "sklearn.preprocessing._encoders._BaseEncoder._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_more_tags/self", + "name": "self", + "qname": "sklearn.preprocessing._encoders._BaseEncoder._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'X_types': ['categorical']}" + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_transform", + "name": "_transform", + "qname": "sklearn.preprocessing._encoders._BaseEncoder._transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._encoders._BaseEncoder._transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_transform/X", + "name": "X", + "qname": "sklearn.preprocessing._encoders._BaseEncoder._transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_transform/handle_unknown", + "name": "handle_unknown", + "qname": "sklearn.preprocessing._encoders._BaseEncoder._transform.handle_unknown", + "default_value": "'error'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._encoders/_BaseEncoder/_transform/force_all_finite", + "name": "force_all_finite", + "qname": "sklearn.preprocessing._encoders._BaseEncoder._transform.force_all_finite", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _transform(self, X, handle_unknown='error', force_all_finite=True):\n X_list, n_samples, n_features = self._check_X(\n X, force_all_finite=force_all_finite)\n\n X_int = np.zeros((n_samples, n_features), dtype=int)\n X_mask = np.ones((n_samples, n_features), dtype=bool)\n\n if n_features != len(self.categories_):\n raise ValueError(\n \"The number of features in X is different to the number of \"\n \"features of the fitted data. The fitted data had {} features \"\n \"and the X has {} features.\"\n .format(len(self.categories_,), n_features)\n )\n\n for i in range(n_features):\n Xi = X_list[i]\n diff, valid_mask = _check_unknown(Xi, self.categories_[i],\n return_mask=True)\n\n if not np.all(valid_mask):\n if handle_unknown == 'error':\n msg = (\"Found unknown categories {0} in column {1}\"\n \" during transform\".format(diff, i))\n raise ValueError(msg)\n else:\n # Set the problematic rows to an acceptable value and\n # continue `The rows are marked `X_mask` and will be\n # removed later.\n X_mask[:, i] = valid_mask\n # cast Xi into the largest string type necessary\n # to handle different lengths of numpy strings\n if (self.categories_[i].dtype.kind in ('U', 'S')\n and self.categories_[i].itemsize > Xi.itemsize):\n Xi = Xi.astype(self.categories_[i].dtype)\n elif (self.categories_[i].dtype.kind == 'O' and\n Xi.dtype.kind == 'U'):\n # categories are objects and Xi are numpy strings.\n # Cast Xi to an object dtype to prevent truncation\n # when setting invalid values.\n Xi = Xi.astype('O')\n else:\n Xi = Xi.copy()\n\n Xi[~valid_mask] = self.categories_[i][0]\n # We use check_unknown=False, since _check_unknown was\n # already called above.\n X_int[:, i] = _encode(Xi, uniques=self.categories_[i],\n check_unknown=False)\n\n return X_int, X_mask" + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/__init__", + "name": "__init__", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/__init__/self", + "name": "self", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/__init__/func", + "name": "func", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer.__init__.func", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "None", + "description": "The callable to use for the transformation. This will be passed\nthe same arguments as transform, with args and kwargs forwarded.\nIf func is None, then func will be the identity function." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/__init__/inverse_func", + "name": "inverse_func", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer.__init__.inverse_func", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "None", + "description": "The callable to use for the inverse transformation. This will be\npassed the same arguments as inverse transform, with args and\nkwargs forwarded. If inverse_func is None, then inverse_func\nwill be the identity function." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/__init__/validate", + "name": "validate", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer.__init__.validate", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Indicate that the input X array should be checked before calling\n``func``. The possibilities are:\n\n- If False, there is no input validation.\n- If True, then X will be converted to a 2-dimensional NumPy array or\n sparse matrix. If the conversion is not possible an exception is\n raised.\n\n.. versionchanged:: 0.22\n The default of ``validate`` changed from True to False." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/__init__/accept_sparse", + "name": "accept_sparse", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer.__init__.accept_sparse", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Indicate that func accepts a sparse matrix as input. If validate is\nFalse, this has no effect. Otherwise, if accept_sparse is false,\nsparse matrix inputs will cause an exception to be raised." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/__init__/check_inverse", + "name": "check_inverse", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer.__init__.check_inverse", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to check that or ``func`` followed by ``inverse_func`` leads to\nthe original inputs. It can be used for a sanity check, raising a\nwarning when the condition is not fulfilled.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/__init__/kw_args", + "name": "kw_args", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer.__init__.kw_args", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Dictionary of additional keyword arguments to pass to func.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/__init__/inv_kw_args", + "name": "inv_kw_args", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer.__init__.inv_kw_args", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "Dictionary of additional keyword arguments to pass to inverse_func.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Constructs a transformer from an arbitrary callable.\n\nA FunctionTransformer forwards its X (and optionally y) arguments to a\nuser-defined function or function object and returns the result of this\nfunction. This is useful for stateless transformations such as taking the\nlog of frequencies, doing custom scaling, etc.\n\nNote: If a lambda is used as the function, then the resulting\ntransformer will not be pickleable.\n\n.. versionadded:: 0.17\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, func=None, inverse_func=None, *, validate=False,\n accept_sparse=False, check_inverse=True, kw_args=None,\n inv_kw_args=None):\n self.func = func\n self.inverse_func = inverse_func\n self.validate = validate\n self.accept_sparse = accept_sparse\n self.check_inverse = check_inverse\n self.kw_args = kw_args\n self.inv_kw_args = inv_kw_args" + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/_check_input", + "name": "_check_input", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer._check_input", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/_check_input/self", + "name": "self", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer._check_input.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/_check_input/X", + "name": "X", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer._check_input.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_input(self, X):\n if self.validate:\n return self._validate_data(X, accept_sparse=self.accept_sparse)\n return X" + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/_check_inverse_transform", + "name": "_check_inverse_transform", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer._check_inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/_check_inverse_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer._check_inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/_check_inverse_transform/X", + "name": "X", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer._check_inverse_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check that func and inverse_func are the inverse.", + "docstring": "Check that func and inverse_func are the inverse.", + "code": " def _check_inverse_transform(self, X):\n \"\"\"Check that func and inverse_func are the inverse.\"\"\"\n idx_selected = slice(None, None, max(1, X.shape[0] // 100))\n X_round_trip = self.inverse_transform(self.transform(X[idx_selected]))\n if not _allclose_dense_sparse(X[idx_selected], X_round_trip):\n warnings.warn(\"The provided functions are not strictly\"\n \" inverse of each other. If you are sure you\"\n \" want to proceed regardless, set\"\n \" 'check_inverse=False'.\", UserWarning)" + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/_more_tags", + "name": "_more_tags", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/_more_tags/self", + "name": "self", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'no_validation': not self.validate,\n 'stateless': True}" + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/_transform", + "name": "_transform", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer._transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer._transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/_transform/X", + "name": "X", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer._transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/_transform/func", + "name": "func", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer._transform.func", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/_transform/kw_args", + "name": "kw_args", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer._transform.kw_args", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _transform(self, X, func=None, kw_args=None):\n X = self._check_input(X)\n\n if func is None:\n func = _identity\n\n return func(X, **(kw_args if kw_args else {}))" + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/fit", + "name": "fit", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/fit/self", + "name": "self", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/fit/X", + "name": "X", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples, n_features)", + "default_value": "", + "description": "Input array." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/fit/y", + "name": "y", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit transformer by checking X.\n\nIf ``validate`` is ``True``, ``X`` will be checked.", + "docstring": "Fit transformer by checking X.\n\nIf ``validate`` is ``True``, ``X`` will be checked.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Input array.\n\nReturns\n-------\nself", + "code": " def fit(self, X, y=None):\n \"\"\"Fit transformer by checking X.\n\n If ``validate`` is ``True``, ``X`` will be checked.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n Input array.\n\n Returns\n -------\n self\n \"\"\"\n X = self._check_input(X)\n if (self.check_inverse and not (self.func is None or\n self.inverse_func is None)):\n self._check_inverse_transform(X)\n return self" + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer.inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/inverse_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/inverse_transform/X", + "name": "X", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer.inverse_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples, n_features)", + "default_value": "", + "description": "Input array." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform X using the inverse function.", + "docstring": "Transform X using the inverse function.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Input array.\n\nReturns\n-------\nX_out : array-like, shape (n_samples, n_features)\n Transformed input.", + "code": " def inverse_transform(self, X):\n \"\"\"Transform X using the inverse function.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n Input array.\n\n Returns\n -------\n X_out : array-like, shape (n_samples, n_features)\n Transformed input.\n \"\"\"\n return self._transform(X, func=self.inverse_func,\n kw_args=self.inv_kw_args)" + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/transform", + "name": "transform", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/transform/self", + "name": "self", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/FunctionTransformer/transform/X", + "name": "X", + "qname": "sklearn.preprocessing._function_transformer.FunctionTransformer.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, shape (n_samples, n_features)", + "default_value": "", + "description": "Input array." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform X using the forward function.", + "docstring": "Transform X using the forward function.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Input array.\n\nReturns\n-------\nX_out : array-like, shape (n_samples, n_features)\n Transformed input.", + "code": " def transform(self, X):\n \"\"\"Transform X using the forward function.\n\n Parameters\n ----------\n X : array-like, shape (n_samples, n_features)\n Input array.\n\n Returns\n -------\n X_out : array-like, shape (n_samples, n_features)\n Transformed input.\n \"\"\"\n return self._transform(X, func=self.func, kw_args=self.kw_args)" + }, + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/_identity", + "name": "_identity", + "qname": "sklearn.preprocessing._function_transformer._identity", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._function_transformer/_identity/X", + "name": "X", + "qname": "sklearn.preprocessing._function_transformer._identity.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "The identity function.", + "docstring": "The identity function.\n ", + "code": "def _identity(X):\n \"\"\"The identity function.\n \"\"\"\n return X" + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/__init__", + "name": "__init__", + "qname": "sklearn.preprocessing._label.LabelBinarizer.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/__init__/self", + "name": "self", + "qname": "sklearn.preprocessing._label.LabelBinarizer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/__init__/neg_label", + "name": "neg_label", + "qname": "sklearn.preprocessing._label.LabelBinarizer.__init__.neg_label", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Value with which negative labels must be encoded." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/__init__/pos_label", + "name": "pos_label", + "qname": "sklearn.preprocessing._label.LabelBinarizer.__init__.pos_label", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "Value with which positive labels must be encoded." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/__init__/sparse_output", + "name": "sparse_output", + "qname": "sklearn.preprocessing._label.LabelBinarizer.__init__.sparse_output", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "True if the returned array from transform is desired to be in sparse\nCSR format." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Binarize labels in a one-vs-all fashion.\n\nSeveral regression and binary classification algorithms are\navailable in scikit-learn. A simple way to extend these algorithms\nto the multi-class classification case is to use the so-called\none-vs-all scheme.\n\nAt learning time, this simply consists in learning one regressor\nor binary classifier per class. In doing so, one needs to convert\nmulti-class labels to binary labels (belong or does not belong\nto the class). LabelBinarizer makes this process easy with the\ntransform method.\n\nAt prediction time, one assigns the class for which the corresponding\nmodel gave the greatest confidence. LabelBinarizer makes this easy\nwith the inverse_transform method.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, neg_label=0, pos_label=1, sparse_output=False):\n if neg_label >= pos_label:\n raise ValueError(\"neg_label={0} must be strictly less than \"\n \"pos_label={1}.\".format(neg_label, pos_label))\n\n if sparse_output and (pos_label == 0 or neg_label != 0):\n raise ValueError(\"Sparse binarization is only supported with non \"\n \"zero pos_label and zero neg_label, got \"\n \"pos_label={0} and neg_label={1}\"\n \"\".format(pos_label, neg_label))\n\n self.neg_label = neg_label\n self.pos_label = pos_label\n self.sparse_output = sparse_output" + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/_more_tags", + "name": "_more_tags", + "qname": "sklearn.preprocessing._label.LabelBinarizer._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/_more_tags/self", + "name": "self", + "qname": "sklearn.preprocessing._label.LabelBinarizer._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'X_types': ['1dlabels']}" + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/fit", + "name": "fit", + "qname": "sklearn.preprocessing._label.LabelBinarizer.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/fit/self", + "name": "self", + "qname": "sklearn.preprocessing._label.LabelBinarizer.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/fit/y", + "name": "y", + "qname": "sklearn.preprocessing._label.LabelBinarizer.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,) or (n_samples, n_classes)", + "default_value": "", + "description": "Target values. The 2-d matrix should only contain 0 and 1,\nrepresents multilabel classification." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,) or (n_samples, n_classes)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit label binarizer.", + "docstring": "Fit label binarizer.\n\nParameters\n----------\ny : ndarray of shape (n_samples,) or (n_samples, n_classes)\n Target values. The 2-d matrix should only contain 0 and 1,\n represents multilabel classification.\n\nReturns\n-------\nself : returns an instance of self.", + "code": " def fit(self, y):\n \"\"\"Fit label binarizer.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,) or (n_samples, n_classes)\n Target values. The 2-d matrix should only contain 0 and 1,\n represents multilabel classification.\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n self.y_type_ = type_of_target(y)\n if 'multioutput' in self.y_type_:\n raise ValueError(\"Multioutput target data is not supported with \"\n \"label binarization\")\n if _num_samples(y) == 0:\n raise ValueError('y has 0 samples: %r' % y)\n\n self.sparse_input_ = sp.issparse(y)\n self.classes_ = unique_labels(y)\n return self" + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/fit_transform", + "name": "fit_transform", + "qname": "sklearn.preprocessing._label.LabelBinarizer.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/fit_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._label.LabelBinarizer.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/fit_transform/y", + "name": "y", + "qname": "sklearn.preprocessing._label.LabelBinarizer.fit_transform.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{ndarray, sparse matrix} of shape (n_samples,) or (n_samples, n_classes)", + "default_value": "", + "description": "Target values. The 2-d matrix should only contain 0 and 1,\nrepresents multilabel classification. Sparse matrix can be\nCSR, CSC, COO, DOK, or LIL." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples,) or (n_samples, n_classes)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit label binarizer and transform multi-class labels to binary\nlabels.\n\nThe output of transform is sometimes referred to as\nthe 1-of-K coding scheme.", + "docstring": "Fit label binarizer and transform multi-class labels to binary\nlabels.\n\nThe output of transform is sometimes referred to as\nthe 1-of-K coding scheme.\n\nParameters\n----------\ny : {ndarray, sparse matrix} of shape (n_samples,) or (n_samples, n_classes)\n Target values. The 2-d matrix should only contain 0 and 1,\n represents multilabel classification. Sparse matrix can be\n CSR, CSC, COO, DOK, or LIL.\n\nReturns\n-------\nY : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n Shape will be (n_samples, 1) for binary problems. Sparse matrix\n will be of CSR format.", + "code": " def fit_transform(self, y):\n \"\"\"Fit label binarizer and transform multi-class labels to binary\n labels.\n\n The output of transform is sometimes referred to as\n the 1-of-K coding scheme.\n\n Parameters\n ----------\n y : {ndarray, sparse matrix} of shape (n_samples,) or \\\n (n_samples, n_classes)\n Target values. The 2-d matrix should only contain 0 and 1,\n represents multilabel classification. Sparse matrix can be\n CSR, CSC, COO, DOK, or LIL.\n\n Returns\n -------\n Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n Shape will be (n_samples, 1) for binary problems. Sparse matrix\n will be of CSR format.\n \"\"\"\n return self.fit(y).transform(y)" + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.preprocessing._label.LabelBinarizer.inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/inverse_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._label.LabelBinarizer.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/inverse_transform/Y", + "name": "Y", + "qname": "sklearn.preprocessing._label.LabelBinarizer.inverse_transform.Y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{ndarray, sparse matrix} of shape (n_samples, n_classes)", + "default_value": "", + "description": "Target values. All sparse matrices are converted to CSR before\ninverse transformation." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_classes)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/inverse_transform/threshold", + "name": "threshold", + "qname": "sklearn.preprocessing._label.LabelBinarizer.inverse_transform.threshold", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Threshold used in the binary and multi-label cases.\n\nUse 0 when ``Y`` contains the output of decision_function\n(classifier).\nUse 0.5 when ``Y`` contains the output of predict_proba.\n\nIf None, the threshold is assumed to be half way between\nneg_label and pos_label." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform binary labels back to multi-class labels.", + "docstring": "Transform binary labels back to multi-class labels.\n\nParameters\n----------\nY : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n Target values. All sparse matrices are converted to CSR before\n inverse transformation.\n\nthreshold : float, default=None\n Threshold used in the binary and multi-label cases.\n\n Use 0 when ``Y`` contains the output of decision_function\n (classifier).\n Use 0.5 when ``Y`` contains the output of predict_proba.\n\n If None, the threshold is assumed to be half way between\n neg_label and pos_label.\n\nReturns\n-------\ny : {ndarray, sparse matrix} of shape (n_samples,)\n Target values. Sparse matrix will be of CSR format.\n\nNotes\n-----\nIn the case when the binary labels are fractional\n(probabilistic), inverse_transform chooses the class with the\ngreatest value. Typically, this allows to use the output of a\nlinear model's decision_function method directly as the input\nof inverse_transform.", + "code": " def inverse_transform(self, Y, threshold=None):\n \"\"\"Transform binary labels back to multi-class labels.\n\n Parameters\n ----------\n Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n Target values. All sparse matrices are converted to CSR before\n inverse transformation.\n\n threshold : float, default=None\n Threshold used in the binary and multi-label cases.\n\n Use 0 when ``Y`` contains the output of decision_function\n (classifier).\n Use 0.5 when ``Y`` contains the output of predict_proba.\n\n If None, the threshold is assumed to be half way between\n neg_label and pos_label.\n\n Returns\n -------\n y : {ndarray, sparse matrix} of shape (n_samples,)\n Target values. Sparse matrix will be of CSR format.\n\n Notes\n -----\n In the case when the binary labels are fractional\n (probabilistic), inverse_transform chooses the class with the\n greatest value. Typically, this allows to use the output of a\n linear model's decision_function method directly as the input\n of inverse_transform.\n \"\"\"\n check_is_fitted(self)\n\n if threshold is None:\n threshold = (self.pos_label + self.neg_label) / 2.\n\n if self.y_type_ == \"multiclass\":\n y_inv = _inverse_binarize_multiclass(Y, self.classes_)\n else:\n y_inv = _inverse_binarize_thresholding(Y, self.y_type_,\n self.classes_, threshold)\n\n if self.sparse_input_:\n y_inv = sp.csr_matrix(y_inv)\n elif sp.issparse(y_inv):\n y_inv = y_inv.toarray()\n\n return y_inv" + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/transform", + "name": "transform", + "qname": "sklearn.preprocessing._label.LabelBinarizer.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/transform/self", + "name": "self", + "qname": "sklearn.preprocessing._label.LabelBinarizer.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelBinarizer/transform/y", + "name": "y", + "qname": "sklearn.preprocessing._label.LabelBinarizer.transform.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array, sparse matrix} of shape (n_samples,) or (n_samples, n_classes)", + "default_value": "", + "description": "Target values. The 2-d matrix should only contain 0 and 1,\nrepresents multilabel classification. Sparse matrix can be\nCSR, CSC, COO, DOK, or LIL." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples,) or (n_samples, n_classes)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform multi-class labels to binary labels.\n\nThe output of transform is sometimes referred to by some authors as\nthe 1-of-K coding scheme.", + "docstring": "Transform multi-class labels to binary labels.\n\nThe output of transform is sometimes referred to by some authors as\nthe 1-of-K coding scheme.\n\nParameters\n----------\ny : {array, sparse matrix} of shape (n_samples,) or (n_samples, n_classes)\n Target values. The 2-d matrix should only contain 0 and 1,\n represents multilabel classification. Sparse matrix can be\n CSR, CSC, COO, DOK, or LIL.\n\nReturns\n-------\nY : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n Shape will be (n_samples, 1) for binary problems. Sparse matrix\n will be of CSR format.", + "code": " def transform(self, y):\n \"\"\"Transform multi-class labels to binary labels.\n\n The output of transform is sometimes referred to by some authors as\n the 1-of-K coding scheme.\n\n Parameters\n ----------\n y : {array, sparse matrix} of shape (n_samples,) or \\\n (n_samples, n_classes)\n Target values. The 2-d matrix should only contain 0 and 1,\n represents multilabel classification. Sparse matrix can be\n CSR, CSC, COO, DOK, or LIL.\n\n Returns\n -------\n Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n Shape will be (n_samples, 1) for binary problems. Sparse matrix\n will be of CSR format.\n \"\"\"\n check_is_fitted(self)\n\n y_is_multilabel = type_of_target(y).startswith('multilabel')\n if y_is_multilabel and not self.y_type_.startswith('multilabel'):\n raise ValueError(\"The object was not fitted with multilabel\"\n \" input.\")\n\n return label_binarize(y, classes=self.classes_,\n pos_label=self.pos_label,\n neg_label=self.neg_label,\n sparse_output=self.sparse_output)" + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelEncoder/_more_tags", + "name": "_more_tags", + "qname": "sklearn.preprocessing._label.LabelEncoder._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelEncoder/_more_tags/self", + "name": "self", + "qname": "sklearn.preprocessing._label.LabelEncoder._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'X_types': ['1dlabels']}" + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelEncoder/fit", + "name": "fit", + "qname": "sklearn.preprocessing._label.LabelEncoder.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelEncoder/fit/self", + "name": "self", + "qname": "sklearn.preprocessing._label.LabelEncoder.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelEncoder/fit/y", + "name": "y", + "qname": "sklearn.preprocessing._label.LabelEncoder.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit label encoder.", + "docstring": "Fit label encoder.\n\nParameters\n----------\ny : array-like of shape (n_samples,)\n Target values.\n\nReturns\n-------\nself : returns an instance of self.", + "code": " def fit(self, y):\n \"\"\"Fit label encoder.\n\n Parameters\n ----------\n y : array-like of shape (n_samples,)\n Target values.\n\n Returns\n -------\n self : returns an instance of self.\n \"\"\"\n y = column_or_1d(y, warn=True)\n self.classes_ = _unique(y)\n return self" + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelEncoder/fit_transform", + "name": "fit_transform", + "qname": "sklearn.preprocessing._label.LabelEncoder.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelEncoder/fit_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._label.LabelEncoder.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelEncoder/fit_transform/y", + "name": "y", + "qname": "sklearn.preprocessing._label.LabelEncoder.fit_transform.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit label encoder and return encoded labels.", + "docstring": "Fit label encoder and return encoded labels.\n\nParameters\n----------\ny : array-like of shape (n_samples,)\n Target values.\n\nReturns\n-------\ny : array-like of shape (n_samples,)", + "code": " def fit_transform(self, y):\n \"\"\"Fit label encoder and return encoded labels.\n\n Parameters\n ----------\n y : array-like of shape (n_samples,)\n Target values.\n\n Returns\n -------\n y : array-like of shape (n_samples,)\n \"\"\"\n y = column_or_1d(y, warn=True)\n self.classes_, y = _unique(y, return_inverse=True)\n return y" + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelEncoder/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.preprocessing._label.LabelEncoder.inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelEncoder/inverse_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._label.LabelEncoder.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelEncoder/inverse_transform/y", + "name": "y", + "qname": "sklearn.preprocessing._label.LabelEncoder.inverse_transform.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform labels back to original encoding.", + "docstring": "Transform labels back to original encoding.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n Target values.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)", + "code": " def inverse_transform(self, y):\n \"\"\"Transform labels back to original encoding.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n Target values.\n\n Returns\n -------\n y : ndarray of shape (n_samples,)\n \"\"\"\n check_is_fitted(self)\n y = column_or_1d(y, warn=True)\n # inverse transform of empty array is empty array\n if _num_samples(y) == 0:\n return np.array([])\n\n diff = np.setdiff1d(y, np.arange(len(self.classes_)))\n if len(diff):\n raise ValueError(\n \"y contains previously unseen labels: %s\" % str(diff))\n y = np.asarray(y)\n return self.classes_[y]" + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelEncoder/transform", + "name": "transform", + "qname": "sklearn.preprocessing._label.LabelEncoder.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelEncoder/transform/self", + "name": "self", + "qname": "sklearn.preprocessing._label.LabelEncoder.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/LabelEncoder/transform/y", + "name": "y", + "qname": "sklearn.preprocessing._label.LabelEncoder.transform.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform labels to normalized encoding.", + "docstring": "Transform labels to normalized encoding.\n\nParameters\n----------\ny : array-like of shape (n_samples,)\n Target values.\n\nReturns\n-------\ny : array-like of shape (n_samples,)", + "code": " def transform(self, y):\n \"\"\"Transform labels to normalized encoding.\n\n Parameters\n ----------\n y : array-like of shape (n_samples,)\n Target values.\n\n Returns\n -------\n y : array-like of shape (n_samples,)\n \"\"\"\n check_is_fitted(self)\n y = column_or_1d(y, warn=True)\n # transform of empty array is empty array\n if _num_samples(y) == 0:\n return np.array([])\n\n return _encode(y, uniques=self.classes_)" + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/__init__", + "name": "__init__", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/__init__/self", + "name": "self", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/__init__/classes", + "name": "classes", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer.__init__.classes", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_classes,)", + "default_value": "None", + "description": "Indicates an ordering for the class labels.\nAll entries should be unique (cannot contain duplicate classes)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_classes,)" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/__init__/sparse_output", + "name": "sparse_output", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer.__init__.sparse_output", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Set to True if output binary array is desired in CSR sparse format." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform between iterable of iterables and a multilabel format.\n\nAlthough a list of sets or tuples is a very intuitive format for multilabel\ndata, it is unwieldy to process. This transformer converts between this\nintuitive format and the supported multilabel format: a (samples x classes)\nbinary matrix indicating the presence of a class label.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, classes=None, sparse_output=False):\n self.classes = classes\n self.sparse_output = sparse_output" + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/_build_cache", + "name": "_build_cache", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer._build_cache", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/_build_cache/self", + "name": "self", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer._build_cache.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _build_cache(self):\n if self._cached_dict is None:\n self._cached_dict = dict(zip(self.classes_,\n range(len(self.classes_))))\n\n return self._cached_dict" + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/_more_tags", + "name": "_more_tags", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/_more_tags/self", + "name": "self", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'X_types': ['2dlabels']}" + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/_transform", + "name": "_transform", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer._transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer._transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/_transform/y", + "name": "y", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer._transform.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "iterable of iterables", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "iterable of iterables" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/_transform/class_mapping", + "name": "class_mapping", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer._transform.class_mapping", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Mapping", + "default_value": "", + "description": "Maps from label to column index in label indicator matrix." + }, + "type": { + "kind": "NamedType", + "name": "Mapping" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transforms the label sets with a given mapping", + "docstring": "Transforms the label sets with a given mapping\n\nParameters\n----------\ny : iterable of iterables\nclass_mapping : Mapping\n Maps from label to column index in label indicator matrix.\n\nReturns\n-------\ny_indicator : sparse matrix of shape (n_samples, n_classes)\n Label indicator matrix. Will be of CSR format.", + "code": " def _transform(self, y, class_mapping):\n \"\"\"Transforms the label sets with a given mapping\n\n Parameters\n ----------\n y : iterable of iterables\n class_mapping : Mapping\n Maps from label to column index in label indicator matrix.\n\n Returns\n -------\n y_indicator : sparse matrix of shape (n_samples, n_classes)\n Label indicator matrix. Will be of CSR format.\n \"\"\"\n indices = array.array('i')\n indptr = array.array('i', [0])\n unknown = set()\n for labels in y:\n index = set()\n for label in labels:\n try:\n index.add(class_mapping[label])\n except KeyError:\n unknown.add(label)\n indices.extend(index)\n indptr.append(len(indices))\n if unknown:\n warnings.warn('unknown class(es) {0} will be ignored'\n .format(sorted(unknown, key=str)))\n data = np.ones(len(indices), dtype=int)\n\n return sp.csr_matrix((data, indices, indptr),\n shape=(len(indptr) - 1, len(class_mapping)))" + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/fit", + "name": "fit", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/fit/self", + "name": "self", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/fit/y", + "name": "y", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "iterable of iterables", + "default_value": "", + "description": "A set of labels (any orderable and hashable object) for each\nsample. If the `classes` parameter is set, `y` will not be\niterated." + }, + "type": { + "kind": "NamedType", + "name": "iterable of iterables" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the label sets binarizer, storing :term:`classes_`.", + "docstring": "Fit the label sets binarizer, storing :term:`classes_`.\n\nParameters\n----------\ny : iterable of iterables\n A set of labels (any orderable and hashable object) for each\n sample. If the `classes` parameter is set, `y` will not be\n iterated.\n\nReturns\n-------\nself : returns this MultiLabelBinarizer instance", + "code": " def fit(self, y):\n \"\"\"Fit the label sets binarizer, storing :term:`classes_`.\n\n Parameters\n ----------\n y : iterable of iterables\n A set of labels (any orderable and hashable object) for each\n sample. If the `classes` parameter is set, `y` will not be\n iterated.\n\n Returns\n -------\n self : returns this MultiLabelBinarizer instance\n \"\"\"\n self._cached_dict = None\n if self.classes is None:\n classes = sorted(set(itertools.chain.from_iterable(y)))\n elif len(set(self.classes)) < len(self.classes):\n raise ValueError(\"The classes argument contains duplicate \"\n \"classes. Remove these duplicates before passing \"\n \"them to MultiLabelBinarizer.\")\n else:\n classes = self.classes\n dtype = int if all(isinstance(c, int) for c in classes) else object\n self.classes_ = np.empty(len(classes), dtype=dtype)\n self.classes_[:] = classes\n return self" + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/fit_transform", + "name": "fit_transform", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/fit_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/fit_transform/y", + "name": "y", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer.fit_transform.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "iterable of iterables", + "default_value": "", + "description": "A set of labels (any orderable and hashable object) for each\nsample. If the `classes` parameter is set, `y` will not be\niterated." + }, + "type": { + "kind": "NamedType", + "name": "iterable of iterables" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the label sets binarizer and transform the given label sets.", + "docstring": "Fit the label sets binarizer and transform the given label sets.\n\nParameters\n----------\ny : iterable of iterables\n A set of labels (any orderable and hashable object) for each\n sample. If the `classes` parameter is set, `y` will not be\n iterated.\n\nReturns\n-------\ny_indicator : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n A matrix such that `y_indicator[i, j] = 1` i.f.f. `classes_[j]`\n is in `y[i]`, and 0 otherwise. Sparse matrix will be of CSR\n format.", + "code": " def fit_transform(self, y):\n \"\"\"Fit the label sets binarizer and transform the given label sets.\n\n Parameters\n ----------\n y : iterable of iterables\n A set of labels (any orderable and hashable object) for each\n sample. If the `classes` parameter is set, `y` will not be\n iterated.\n\n Returns\n -------\n y_indicator : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n A matrix such that `y_indicator[i, j] = 1` i.f.f. `classes_[j]`\n is in `y[i]`, and 0 otherwise. Sparse matrix will be of CSR\n format.\n \"\"\"\n self._cached_dict = None\n\n if self.classes is not None:\n return self.fit(y).transform(y)\n\n # Automatically increment on new class\n class_mapping = defaultdict(int)\n class_mapping.default_factory = class_mapping.__len__\n yt = self._transform(y, class_mapping)\n\n # sort classes and reorder columns\n tmp = sorted(class_mapping, key=class_mapping.get)\n\n # (make safe for tuples)\n dtype = int if all(isinstance(c, int) for c in tmp) else object\n class_mapping = np.empty(len(tmp), dtype=dtype)\n class_mapping[:] = tmp\n self.classes_, inverse = np.unique(class_mapping, return_inverse=True)\n # ensure yt.indices keeps its current dtype\n yt.indices = np.array(inverse[yt.indices], dtype=yt.indices.dtype,\n copy=False)\n\n if not self.sparse_output:\n yt = yt.toarray()\n\n return yt" + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/inverse_transform", + "name": "inverse_transform", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer.inverse_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/inverse_transform/self", + "name": "self", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer.inverse_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/inverse_transform/yt", + "name": "yt", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer.inverse_transform.yt", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{ndarray, sparse matrix} of shape (n_samples, n_classes)", + "default_value": "", + "description": "A matrix containing only 1s ands 0s." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_classes)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform the given indicator matrix into label sets.", + "docstring": "Transform the given indicator matrix into label sets.\n\nParameters\n----------\nyt : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n A matrix containing only 1s ands 0s.\n\nReturns\n-------\ny : list of tuples\n The set of labels for each sample such that `y[i]` consists of\n `classes_[j]` for each `yt[i, j] == 1`.", + "code": " def inverse_transform(self, yt):\n \"\"\"Transform the given indicator matrix into label sets.\n\n Parameters\n ----------\n yt : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n A matrix containing only 1s ands 0s.\n\n Returns\n -------\n y : list of tuples\n The set of labels for each sample such that `y[i]` consists of\n `classes_[j]` for each `yt[i, j] == 1`.\n \"\"\"\n check_is_fitted(self)\n\n if yt.shape[1] != len(self.classes_):\n raise ValueError('Expected indicator for {0} classes, but got {1}'\n .format(len(self.classes_), yt.shape[1]))\n\n if sp.issparse(yt):\n yt = yt.tocsr()\n if len(yt.data) != 0 and len(np.setdiff1d(yt.data, [0, 1])) > 0:\n raise ValueError('Expected only 0s and 1s in label indicator.')\n return [tuple(self.classes_.take(yt.indices[start:end]))\n for start, end in zip(yt.indptr[:-1], yt.indptr[1:])]\n else:\n unexpected = np.setdiff1d(yt, [0, 1])\n if len(unexpected) > 0:\n raise ValueError('Expected only 0s and 1s in label indicator. '\n 'Also got {0}'.format(unexpected))\n return [tuple(self.classes_.compress(indicators)) for indicators\n in yt]" + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/transform", + "name": "transform", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/transform/self", + "name": "self", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/MultiLabelBinarizer/transform/y", + "name": "y", + "qname": "sklearn.preprocessing._label.MultiLabelBinarizer.transform.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "iterable of iterables", + "default_value": "", + "description": "A set of labels (any orderable and hashable object) for each\nsample. If the `classes` parameter is set, `y` will not be\niterated." + }, + "type": { + "kind": "NamedType", + "name": "iterable of iterables" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Transform the given label sets.", + "docstring": "Transform the given label sets.\n\nParameters\n----------\ny : iterable of iterables\n A set of labels (any orderable and hashable object) for each\n sample. If the `classes` parameter is set, `y` will not be\n iterated.\n\nReturns\n-------\ny_indicator : array or CSR matrix, shape (n_samples, n_classes)\n A matrix such that `y_indicator[i, j] = 1` iff `classes_[j]` is in\n `y[i]`, and 0 otherwise.", + "code": " def transform(self, y):\n \"\"\"Transform the given label sets.\n\n Parameters\n ----------\n y : iterable of iterables\n A set of labels (any orderable and hashable object) for each\n sample. If the `classes` parameter is set, `y` will not be\n iterated.\n\n Returns\n -------\n y_indicator : array or CSR matrix, shape (n_samples, n_classes)\n A matrix such that `y_indicator[i, j] = 1` iff `classes_[j]` is in\n `y[i]`, and 0 otherwise.\n \"\"\"\n check_is_fitted(self)\n\n class_to_index = self._build_cache()\n yt = self._transform(y, class_to_index)\n\n if not self.sparse_output:\n yt = yt.toarray()\n\n return yt" + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/_inverse_binarize_multiclass", + "name": "_inverse_binarize_multiclass", + "qname": "sklearn.preprocessing._label._inverse_binarize_multiclass", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._label/_inverse_binarize_multiclass/y", + "name": "y", + "qname": "sklearn.preprocessing._label._inverse_binarize_multiclass.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/_inverse_binarize_multiclass/classes", + "name": "classes", + "qname": "sklearn.preprocessing._label._inverse_binarize_multiclass.classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Inverse label binarization transformation for multiclass.\n\nMulticlass uses the maximal score instead of a threshold.", + "docstring": "Inverse label binarization transformation for multiclass.\n\nMulticlass uses the maximal score instead of a threshold.", + "code": "def _inverse_binarize_multiclass(y, classes):\n \"\"\"Inverse label binarization transformation for multiclass.\n\n Multiclass uses the maximal score instead of a threshold.\n \"\"\"\n classes = np.asarray(classes)\n\n if sp.issparse(y):\n # Find the argmax for each row in y where y is a CSR matrix\n\n y = y.tocsr()\n n_samples, n_outputs = y.shape\n outputs = np.arange(n_outputs)\n row_max = min_max_axis(y, 1)[1]\n row_nnz = np.diff(y.indptr)\n\n y_data_repeated_max = np.repeat(row_max, row_nnz)\n # picks out all indices obtaining the maximum per row\n y_i_all_argmax = np.flatnonzero(y_data_repeated_max == y.data)\n\n # For corner case where last row has a max of 0\n if row_max[-1] == 0:\n y_i_all_argmax = np.append(y_i_all_argmax, [len(y.data)])\n\n # Gets the index of the first argmax in each row from y_i_all_argmax\n index_first_argmax = np.searchsorted(y_i_all_argmax, y.indptr[:-1])\n # first argmax of each row\n y_ind_ext = np.append(y.indices, [0])\n y_i_argmax = y_ind_ext[y_i_all_argmax[index_first_argmax]]\n # Handle rows of all 0\n y_i_argmax[np.where(row_nnz == 0)[0]] = 0\n\n # Handles rows with max of 0 that contain negative numbers\n samples = np.arange(n_samples)[(row_nnz > 0) &\n (row_max.ravel() == 0)]\n for i in samples:\n ind = y.indices[y.indptr[i]:y.indptr[i + 1]]\n y_i_argmax[i] = classes[np.setdiff1d(outputs, ind)][0]\n\n return classes[y_i_argmax]\n else:\n return classes.take(y.argmax(axis=1), mode=\"clip\")" + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/_inverse_binarize_thresholding", + "name": "_inverse_binarize_thresholding", + "qname": "sklearn.preprocessing._label._inverse_binarize_thresholding", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._label/_inverse_binarize_thresholding/y", + "name": "y", + "qname": "sklearn.preprocessing._label._inverse_binarize_thresholding.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/_inverse_binarize_thresholding/output_type", + "name": "output_type", + "qname": "sklearn.preprocessing._label._inverse_binarize_thresholding.output_type", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/_inverse_binarize_thresholding/classes", + "name": "classes", + "qname": "sklearn.preprocessing._label._inverse_binarize_thresholding.classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/_inverse_binarize_thresholding/threshold", + "name": "threshold", + "qname": "sklearn.preprocessing._label._inverse_binarize_thresholding.threshold", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Inverse label binarization transformation using thresholding.", + "docstring": "Inverse label binarization transformation using thresholding.", + "code": "def _inverse_binarize_thresholding(y, output_type, classes, threshold):\n \"\"\"Inverse label binarization transformation using thresholding.\"\"\"\n\n if output_type == \"binary\" and y.ndim == 2 and y.shape[1] > 2:\n raise ValueError(\"output_type='binary', but y.shape = {0}\".\n format(y.shape))\n\n if output_type != \"binary\" and y.shape[1] != len(classes):\n raise ValueError(\"The number of class is not equal to the number of \"\n \"dimension of y.\")\n\n classes = np.asarray(classes)\n\n # Perform thresholding\n if sp.issparse(y):\n if threshold > 0:\n if y.format not in ('csr', 'csc'):\n y = y.tocsr()\n y.data = np.array(y.data > threshold, dtype=int)\n y.eliminate_zeros()\n else:\n y = np.array(y.toarray() > threshold, dtype=int)\n else:\n y = np.array(y > threshold, dtype=int)\n\n # Inverse transform data\n if output_type == \"binary\":\n if sp.issparse(y):\n y = y.toarray()\n if y.ndim == 2 and y.shape[1] == 2:\n return classes[y[:, 1]]\n else:\n if len(classes) == 1:\n return np.repeat(classes[0], len(y))\n else:\n return classes[y.ravel()]\n\n elif output_type == \"multilabel-indicator\":\n return y\n\n else:\n raise ValueError(\"{0} format is not supported\".format(output_type))" + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/label_binarize", + "name": "label_binarize", + "qname": "sklearn.preprocessing._label.label_binarize", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing._label/label_binarize/y", + "name": "y", + "qname": "sklearn.preprocessing._label.label_binarize.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "Sequence of integer labels or multilabel data to encode." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/label_binarize/classes", + "name": "classes", + "qname": "sklearn.preprocessing._label.label_binarize.classes", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_classes,)", + "default_value": "", + "description": "Uniquely holds the label for each class." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_classes,)" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/label_binarize/neg_label", + "name": "neg_label", + "qname": "sklearn.preprocessing._label.label_binarize.neg_label", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Value with which negative labels must be encoded." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/label_binarize/pos_label", + "name": "pos_label", + "qname": "sklearn.preprocessing._label.label_binarize.pos_label", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1", + "description": "Value with which positive labels must be encoded." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.preprocessing._label/label_binarize/sparse_output", + "name": "sparse_output", + "qname": "sklearn.preprocessing._label.label_binarize.sparse_output", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False,", + "description": "Set to true if output binary array is desired in CSR sparse format." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Binarize labels in a one-vs-all fashion.\n\nSeveral regression and binary classification algorithms are\navailable in scikit-learn. A simple way to extend these algorithms\nto the multi-class classification case is to use the so-called\none-vs-all scheme.\n\nThis function makes it possible to compute this transformation for a\nfixed set of class labels known ahead of time.", + "docstring": "Binarize labels in a one-vs-all fashion.\n\nSeveral regression and binary classification algorithms are\navailable in scikit-learn. A simple way to extend these algorithms\nto the multi-class classification case is to use the so-called\none-vs-all scheme.\n\nThis function makes it possible to compute this transformation for a\nfixed set of class labels known ahead of time.\n\nParameters\n----------\ny : array-like\n Sequence of integer labels or multilabel data to encode.\n\nclasses : array-like of shape (n_classes,)\n Uniquely holds the label for each class.\n\nneg_label : int, default=0\n Value with which negative labels must be encoded.\n\npos_label : int, default=1\n Value with which positive labels must be encoded.\n\nsparse_output : bool, default=False,\n Set to true if output binary array is desired in CSR sparse format.\n\nReturns\n-------\nY : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n Shape will be (n_samples, 1) for binary problems. Sparse matrix will\n be of CSR format.\n\nExamples\n--------\n>>> from sklearn.preprocessing import label_binarize\n>>> label_binarize([1, 6], classes=[1, 2, 4, 6])\narray([[1, 0, 0, 0],\n [0, 0, 0, 1]])\n\nThe class ordering is preserved:\n\n>>> label_binarize([1, 6], classes=[1, 6, 4, 2])\narray([[1, 0, 0, 0],\n [0, 1, 0, 0]])\n\nBinary targets transform to a column vector\n\n>>> label_binarize(['yes', 'no', 'no', 'yes'], classes=['no', 'yes'])\narray([[1],\n [0],\n [0],\n [1]])\n\nSee Also\n--------\nLabelBinarizer : Class used to wrap the functionality of label_binarize and\n allow for fitting to classes independently of the transform operation.", + "code": "@_deprecate_positional_args\ndef label_binarize(y, *, classes, neg_label=0, pos_label=1,\n sparse_output=False):\n \"\"\"Binarize labels in a one-vs-all fashion.\n\n Several regression and binary classification algorithms are\n available in scikit-learn. A simple way to extend these algorithms\n to the multi-class classification case is to use the so-called\n one-vs-all scheme.\n\n This function makes it possible to compute this transformation for a\n fixed set of class labels known ahead of time.\n\n Parameters\n ----------\n y : array-like\n Sequence of integer labels or multilabel data to encode.\n\n classes : array-like of shape (n_classes,)\n Uniquely holds the label for each class.\n\n neg_label : int, default=0\n Value with which negative labels must be encoded.\n\n pos_label : int, default=1\n Value with which positive labels must be encoded.\n\n sparse_output : bool, default=False,\n Set to true if output binary array is desired in CSR sparse format.\n\n Returns\n -------\n Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n Shape will be (n_samples, 1) for binary problems. Sparse matrix will\n be of CSR format.\n\n Examples\n --------\n >>> from sklearn.preprocessing import label_binarize\n >>> label_binarize([1, 6], classes=[1, 2, 4, 6])\n array([[1, 0, 0, 0],\n [0, 0, 0, 1]])\n\n The class ordering is preserved:\n\n >>> label_binarize([1, 6], classes=[1, 6, 4, 2])\n array([[1, 0, 0, 0],\n [0, 1, 0, 0]])\n\n Binary targets transform to a column vector\n\n >>> label_binarize(['yes', 'no', 'no', 'yes'], classes=['no', 'yes'])\n array([[1],\n [0],\n [0],\n [1]])\n\n See Also\n --------\n LabelBinarizer : Class used to wrap the functionality of label_binarize and\n allow for fitting to classes independently of the transform operation.\n \"\"\"\n if not isinstance(y, list):\n # XXX Workaround that will be removed when list of list format is\n # dropped\n y = check_array(y, accept_sparse='csr', ensure_2d=False, dtype=None)\n else:\n if _num_samples(y) == 0:\n raise ValueError('y has 0 samples: %r' % y)\n if neg_label >= pos_label:\n raise ValueError(\"neg_label={0} must be strictly less than \"\n \"pos_label={1}.\".format(neg_label, pos_label))\n\n if (sparse_output and (pos_label == 0 or neg_label != 0)):\n raise ValueError(\"Sparse binarization is only supported with non \"\n \"zero pos_label and zero neg_label, got \"\n \"pos_label={0} and neg_label={1}\"\n \"\".format(pos_label, neg_label))\n\n # To account for pos_label == 0 in the dense case\n pos_switch = pos_label == 0\n if pos_switch:\n pos_label = -neg_label\n\n y_type = type_of_target(y)\n if 'multioutput' in y_type:\n raise ValueError(\"Multioutput target data is not supported with label \"\n \"binarization\")\n if y_type == 'unknown':\n raise ValueError(\"The type of target data is not known\")\n\n n_samples = y.shape[0] if sp.issparse(y) else len(y)\n n_classes = len(classes)\n classes = np.asarray(classes)\n\n if y_type == \"binary\":\n if n_classes == 1:\n if sparse_output:\n return sp.csr_matrix((n_samples, 1), dtype=int)\n else:\n Y = np.zeros((len(y), 1), dtype=int)\n Y += neg_label\n return Y\n elif len(classes) >= 3:\n y_type = \"multiclass\"\n\n sorted_class = np.sort(classes)\n if y_type == \"multilabel-indicator\":\n y_n_classes = y.shape[1] if hasattr(y, 'shape') else len(y[0])\n if classes.size != y_n_classes:\n raise ValueError(\"classes {0} mismatch with the labels {1}\"\n \" found in the data\"\n .format(classes, unique_labels(y)))\n\n if y_type in (\"binary\", \"multiclass\"):\n y = column_or_1d(y)\n\n # pick out the known labels from y\n y_in_classes = np.in1d(y, classes)\n y_seen = y[y_in_classes]\n indices = np.searchsorted(sorted_class, y_seen)\n indptr = np.hstack((0, np.cumsum(y_in_classes)))\n\n data = np.empty_like(indices)\n data.fill(pos_label)\n Y = sp.csr_matrix((data, indices, indptr),\n shape=(n_samples, n_classes))\n elif y_type == \"multilabel-indicator\":\n Y = sp.csr_matrix(y)\n if pos_label != 1:\n data = np.empty_like(Y.data)\n data.fill(pos_label)\n Y.data = data\n else:\n raise ValueError(\"%s target data is not supported with label \"\n \"binarization\" % y_type)\n\n if not sparse_output:\n Y = Y.toarray()\n Y = Y.astype(int, copy=False)\n\n if neg_label != 0:\n Y[Y == 0] = neg_label\n\n if pos_switch:\n Y[Y == pos_label] = 0\n else:\n Y.data = Y.data.astype(int, copy=False)\n\n # preserve label ordering\n if np.any(classes != sorted_class):\n indices = np.searchsorted(sorted_class, classes)\n Y = Y[:, indices]\n\n if y_type == \"binary\":\n if sparse_output:\n Y = Y.getcol(-1)\n else:\n Y = Y[:, -1].reshape((-1, 1))\n\n return Y" + }, + { + "id": "scikit-learn/sklearn.preprocessing.setup/configuration", + "name": "configuration", + "qname": "sklearn.preprocessing.setup.configuration", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.preprocessing.setup/configuration/parent_package", + "name": "parent_package", + "qname": "sklearn.preprocessing.setup.configuration.parent_package", + "default_value": "''", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.preprocessing.setup/configuration/top_path", + "name": "top_path", + "qname": "sklearn.preprocessing.setup.configuration.top_path", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def configuration(parent_package='', top_path=None):\n import numpy\n from numpy.distutils.misc_util import Configuration\n\n config = Configuration('preprocessing', parent_package, top_path)\n libraries = []\n if os.name == 'posix':\n libraries.append('m')\n\n config.add_extension('_csr_polynomial_expansion',\n sources=['_csr_polynomial_expansion.pyx'],\n include_dirs=[numpy.get_include()],\n libraries=libraries)\n\n config.add_subpackage('tests')\n\n return config" + }, + { + "id": "scikit-learn/sklearn.random_projection/BaseRandomProjection/__init__", + "name": "__init__", + "qname": "sklearn.random_projection.BaseRandomProjection.__init__", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.random_projection/BaseRandomProjection/__init__/self", + "name": "self", + "qname": "sklearn.random_projection.BaseRandomProjection.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.random_projection/BaseRandomProjection/__init__/n_components", + "name": "n_components", + "qname": "sklearn.random_projection.BaseRandomProjection.__init__.n_components", + "default_value": "'auto'", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.random_projection/BaseRandomProjection/__init__/eps", + "name": "eps", + "qname": "sklearn.random_projection.BaseRandomProjection.__init__.eps", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.random_projection/BaseRandomProjection/__init__/dense_output", + "name": "dense_output", + "qname": "sklearn.random_projection.BaseRandomProjection.__init__.dense_output", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.random_projection/BaseRandomProjection/__init__/random_state", + "name": "random_state", + "qname": "sklearn.random_projection.BaseRandomProjection.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Base class for random projections.\n\nWarning: This class should not be used directly.\nUse derived classes instead.", + "docstring": "", + "code": " @abstractmethod\n def __init__(self, n_components='auto', *, eps=0.1, dense_output=False,\n random_state=None):\n self.n_components = n_components\n self.eps = eps\n self.dense_output = dense_output\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.random_projection/BaseRandomProjection/_make_random_matrix", + "name": "_make_random_matrix", + "qname": "sklearn.random_projection.BaseRandomProjection._make_random_matrix", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.random_projection/BaseRandomProjection/_make_random_matrix/self", + "name": "self", + "qname": "sklearn.random_projection.BaseRandomProjection._make_random_matrix.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.random_projection/BaseRandomProjection/_make_random_matrix/n_components", + "name": "n_components", + "qname": "sklearn.random_projection.BaseRandomProjection._make_random_matrix.n_components", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int,", + "default_value": "", + "description": "Dimensionality of the target projection space." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.random_projection/BaseRandomProjection/_make_random_matrix/n_features", + "name": "n_features", + "qname": "sklearn.random_projection.BaseRandomProjection._make_random_matrix.n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int,", + "default_value": "", + "description": "Dimensionality of the original source space." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate the random projection matrix.", + "docstring": "Generate the random projection matrix.\n\nParameters\n----------\nn_components : int,\n Dimensionality of the target projection space.\n\nn_features : int,\n Dimensionality of the original source space.\n\nReturns\n-------\ncomponents : {ndarray, sparse matrix} of shape (n_components, n_features)\n The generated random matrix. Sparse matrix will be of CSR format.", + "code": " @abstractmethod\n def _make_random_matrix(self, n_components, n_features):\n \"\"\"Generate the random projection matrix.\n\n Parameters\n ----------\n n_components : int,\n Dimensionality of the target projection space.\n\n n_features : int,\n Dimensionality of the original source space.\n\n Returns\n -------\n components : {ndarray, sparse matrix} of shape \\\n (n_components, n_features)\n The generated random matrix. Sparse matrix will be of CSR format.\n\n \"\"\"" + }, + { + "id": "scikit-learn/sklearn.random_projection/BaseRandomProjection/fit", + "name": "fit", + "qname": "sklearn.random_projection.BaseRandomProjection.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.random_projection/BaseRandomProjection/fit/self", + "name": "self", + "qname": "sklearn.random_projection.BaseRandomProjection.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.random_projection/BaseRandomProjection/fit/X", + "name": "X", + "qname": "sklearn.random_projection.BaseRandomProjection.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{ndarray, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training set: only the shape is used to find optimal random\nmatrix dimensions based on the theory referenced in the\nafore mentioned papers." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.random_projection/BaseRandomProjection/fit/y", + "name": "y", + "qname": "sklearn.random_projection.BaseRandomProjection.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "Ignored" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Generate a sparse random projection matrix.", + "docstring": "Generate a sparse random projection matrix.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training set: only the shape is used to find optimal random\n matrix dimensions based on the theory referenced in the\n afore mentioned papers.\n\ny\n Ignored\n\nReturns\n-------\nself", + "code": " def fit(self, X, y=None):\n \"\"\"Generate a sparse random projection matrix.\n\n Parameters\n ----------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training set: only the shape is used to find optimal random\n matrix dimensions based on the theory referenced in the\n afore mentioned papers.\n\n y\n Ignored\n\n Returns\n -------\n self\n\n \"\"\"\n X = self._validate_data(X, accept_sparse=['csr', 'csc'])\n\n n_samples, n_features = X.shape\n\n if self.n_components == 'auto':\n self.n_components_ = johnson_lindenstrauss_min_dim(\n n_samples=n_samples, eps=self.eps)\n\n if self.n_components_ <= 0:\n raise ValueError(\n 'eps=%f and n_samples=%d lead to a target dimension of '\n '%d which is invalid' % (\n self.eps, n_samples, self.n_components_))\n\n elif self.n_components_ > n_features:\n raise ValueError(\n 'eps=%f and n_samples=%d lead to a target dimension of '\n '%d which is larger than the original space with '\n 'n_features=%d' % (self.eps, n_samples, self.n_components_,\n n_features))\n else:\n if self.n_components <= 0:\n raise ValueError(\"n_components must be greater than 0, got %s\"\n % self.n_components)\n\n elif self.n_components > n_features:\n warnings.warn(\n \"The number of components is higher than the number of\"\n \" features: n_features < n_components (%s < %s).\"\n \"The dimensionality of the problem will not be reduced.\"\n % (n_features, self.n_components),\n DataDimensionalityWarning)\n\n self.n_components_ = self.n_components\n\n # Generate a projection matrix of size [n_components, n_features]\n self.components_ = self._make_random_matrix(self.n_components_,\n n_features)\n\n # Check contract\n assert self.components_.shape == (self.n_components_, n_features), (\n 'An error has occurred the self.components_ matrix has '\n ' not the proper shape.')\n\n return self" + }, + { + "id": "scikit-learn/sklearn.random_projection/BaseRandomProjection/transform", + "name": "transform", + "qname": "sklearn.random_projection.BaseRandomProjection.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.random_projection/BaseRandomProjection/transform/self", + "name": "self", + "qname": "sklearn.random_projection.BaseRandomProjection.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.random_projection/BaseRandomProjection/transform/X", + "name": "X", + "qname": "sklearn.random_projection.BaseRandomProjection.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{ndarray, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input data to project into a smaller dimensional space." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Project the data by using matrix product with the random matrix", + "docstring": "Project the data by using matrix product with the random matrix\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The input data to project into a smaller dimensional space.\n\nReturns\n-------\nX_new : {ndarray, sparse matrix} of shape (n_samples, n_components)\n Projected array.", + "code": " def transform(self, X):\n \"\"\"Project the data by using matrix product with the random matrix\n\n Parameters\n ----------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The input data to project into a smaller dimensional space.\n\n Returns\n -------\n X_new : {ndarray, sparse matrix} of shape (n_samples, n_components)\n Projected array.\n \"\"\"\n X = check_array(X, accept_sparse=['csr', 'csc'])\n\n check_is_fitted(self)\n\n if X.shape[1] != self.components_.shape[1]:\n raise ValueError(\n 'Impossible to perform projection:'\n 'X at fit stage had a different number of features. '\n '(%s != %s)' % (X.shape[1], self.components_.shape[1]))\n\n X_new = safe_sparse_dot(X, self.components_.T,\n dense_output=self.dense_output)\n return X_new" + }, + { + "id": "scikit-learn/sklearn.random_projection/GaussianRandomProjection/__init__", + "name": "__init__", + "qname": "sklearn.random_projection.GaussianRandomProjection.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.random_projection/GaussianRandomProjection/__init__/self", + "name": "self", + "qname": "sklearn.random_projection.GaussianRandomProjection.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.random_projection/GaussianRandomProjection/__init__/n_components", + "name": "n_components", + "qname": "sklearn.random_projection.GaussianRandomProjection.__init__.n_components", + "default_value": "'auto'", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int or 'auto'", + "default_value": "'auto'", + "description": "Dimensionality of the target projection space.\n\nn_components can be automatically adjusted according to the\nnumber of samples in the dataset and the bound given by the\nJohnson-Lindenstrauss lemma. In that case the quality of the\nembedding is controlled by the ``eps`` parameter.\n\nIt should be noted that Johnson-Lindenstrauss lemma can yield\nvery conservative estimated of the required number of components\nas it makes no assumption on the structure of the dataset." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "'auto'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.random_projection/GaussianRandomProjection/__init__/eps", + "name": "eps", + "qname": "sklearn.random_projection.GaussianRandomProjection.__init__.eps", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "Parameter to control the quality of the embedding according to\nthe Johnson-Lindenstrauss lemma when `n_components` is set to\n'auto'. The value should be strictly positive.\n\nSmaller values lead to better embedding and higher number of\ndimensions (n_components) in the target projection space." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.random_projection/GaussianRandomProjection/__init__/random_state", + "name": "random_state", + "qname": "sklearn.random_projection.GaussianRandomProjection.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the pseudo random number generator used to generate the\nprojection matrix at fit time.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Reduce dimensionality through Gaussian random projection.\n\nThe components of the random matrix are drawn from N(0, 1 / n_components).\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_components='auto', *, eps=0.1, random_state=None):\n super().__init__(\n n_components=n_components,\n eps=eps,\n dense_output=True,\n random_state=random_state)" + }, + { + "id": "scikit-learn/sklearn.random_projection/GaussianRandomProjection/_make_random_matrix", + "name": "_make_random_matrix", + "qname": "sklearn.random_projection.GaussianRandomProjection._make_random_matrix", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.random_projection/GaussianRandomProjection/_make_random_matrix/self", + "name": "self", + "qname": "sklearn.random_projection.GaussianRandomProjection._make_random_matrix.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.random_projection/GaussianRandomProjection/_make_random_matrix/n_components", + "name": "n_components", + "qname": "sklearn.random_projection.GaussianRandomProjection._make_random_matrix.n_components", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int,", + "default_value": "", + "description": "Dimensionality of the target projection space." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.random_projection/GaussianRandomProjection/_make_random_matrix/n_features", + "name": "n_features", + "qname": "sklearn.random_projection.GaussianRandomProjection._make_random_matrix.n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int,", + "default_value": "", + "description": "Dimensionality of the original source space." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate the random projection matrix.", + "docstring": "Generate the random projection matrix.\n\nParameters\n----------\nn_components : int,\n Dimensionality of the target projection space.\n\nn_features : int,\n Dimensionality of the original source space.\n\nReturns\n-------\ncomponents : {ndarray, sparse matrix} of shape (n_components, n_features)\n The generated random matrix. Sparse matrix will be of CSR format.", + "code": " def _make_random_matrix(self, n_components, n_features):\n \"\"\" Generate the random projection matrix.\n\n Parameters\n ----------\n n_components : int,\n Dimensionality of the target projection space.\n\n n_features : int,\n Dimensionality of the original source space.\n\n Returns\n -------\n components : {ndarray, sparse matrix} of shape \\\n (n_components, n_features)\n The generated random matrix. Sparse matrix will be of CSR format.\n\n \"\"\"\n random_state = check_random_state(self.random_state)\n return _gaussian_random_matrix(n_components,\n n_features,\n random_state=random_state)" + }, + { + "id": "scikit-learn/sklearn.random_projection/SparseRandomProjection/__init__", + "name": "__init__", + "qname": "sklearn.random_projection.SparseRandomProjection.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.random_projection/SparseRandomProjection/__init__/self", + "name": "self", + "qname": "sklearn.random_projection.SparseRandomProjection.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.random_projection/SparseRandomProjection/__init__/n_components", + "name": "n_components", + "qname": "sklearn.random_projection.SparseRandomProjection.__init__.n_components", + "default_value": "'auto'", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int or 'auto'", + "default_value": "'auto'", + "description": "Dimensionality of the target projection space.\n\nn_components can be automatically adjusted according to the\nnumber of samples in the dataset and the bound given by the\nJohnson-Lindenstrauss lemma. In that case the quality of the\nembedding is controlled by the ``eps`` parameter.\n\nIt should be noted that Johnson-Lindenstrauss lemma can yield\nvery conservative estimated of the required number of components\nas it makes no assumption on the structure of the dataset." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "'auto'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.random_projection/SparseRandomProjection/__init__/density", + "name": "density", + "qname": "sklearn.random_projection.SparseRandomProjection.__init__.density", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float or 'auto'", + "default_value": "'auto'", + "description": "Ratio in the range (0, 1] of non-zero component in the random\nprojection matrix.\n\nIf density = 'auto', the value is set to the minimum density\nas recommended by Ping Li et al.: 1 / sqrt(n_features).\n\nUse density = 1 / 3.0 if you want to reproduce the results from\nAchlioptas, 2001." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "BoundaryType", + "base_type": "float", + "min": 0.0, + "max": 1.0, + "min_inclusive": false, + "max_inclusive": true + }, + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "'auto'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.random_projection/SparseRandomProjection/__init__/eps", + "name": "eps", + "qname": "sklearn.random_projection.SparseRandomProjection.__init__.eps", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "Parameter to control the quality of the embedding according to\nthe Johnson-Lindenstrauss lemma when n_components is set to\n'auto'. This value should be strictly positive.\n\nSmaller values lead to better embedding and higher number of\ndimensions (n_components) in the target projection space." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.random_projection/SparseRandomProjection/__init__/dense_output", + "name": "dense_output", + "qname": "sklearn.random_projection.SparseRandomProjection.__init__.dense_output", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, ensure that the output of the random projection is a\ndense numpy array even if the input and random projection matrix\nare both sparse. In practice, if the number of components is\nsmall the number of zero components in the projected data will\nbe very small and it will be more CPU and memory efficient to\nuse a dense representation.\n\nIf False, the projected data uses a sparse representation if\nthe input is sparse." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.random_projection/SparseRandomProjection/__init__/random_state", + "name": "random_state", + "qname": "sklearn.random_projection.SparseRandomProjection.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the pseudo random number generator used to generate the\nprojection matrix at fit time.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Reduce dimensionality through sparse random projection.\n\nSparse random matrix is an alternative to dense random\nprojection matrix that guarantees similar embedding quality while being\nmuch more memory efficient and allowing faster computation of the\nprojected data.\n\nIf we note `s = 1 / density` the components of the random matrix are\ndrawn from:\n\n - -sqrt(s) / sqrt(n_components) with probability 1 / 2s\n - 0 with probability 1 - 1 / s\n - +sqrt(s) / sqrt(n_components) with probability 1 / 2s\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, n_components='auto', *, density='auto', eps=0.1,\n dense_output=False, random_state=None):\n super().__init__(\n n_components=n_components,\n eps=eps,\n dense_output=dense_output,\n random_state=random_state)\n\n self.density = density" + }, + { + "id": "scikit-learn/sklearn.random_projection/SparseRandomProjection/_make_random_matrix", + "name": "_make_random_matrix", + "qname": "sklearn.random_projection.SparseRandomProjection._make_random_matrix", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.random_projection/SparseRandomProjection/_make_random_matrix/self", + "name": "self", + "qname": "sklearn.random_projection.SparseRandomProjection._make_random_matrix.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.random_projection/SparseRandomProjection/_make_random_matrix/n_components", + "name": "n_components", + "qname": "sklearn.random_projection.SparseRandomProjection._make_random_matrix.n_components", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Dimensionality of the target projection space." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.random_projection/SparseRandomProjection/_make_random_matrix/n_features", + "name": "n_features", + "qname": "sklearn.random_projection.SparseRandomProjection._make_random_matrix.n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Dimensionality of the original source space." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate the random projection matrix", + "docstring": "Generate the random projection matrix\n\nParameters\n----------\nn_components : int\n Dimensionality of the target projection space.\n\nn_features : int\n Dimensionality of the original source space.\n\nReturns\n-------\ncomponents : {ndarray, sparse matrix} of shape (n_components, n_features)\n The generated random matrix. Sparse matrix will be of CSR format.", + "code": " def _make_random_matrix(self, n_components, n_features):\n \"\"\" Generate the random projection matrix\n\n Parameters\n ----------\n n_components : int\n Dimensionality of the target projection space.\n\n n_features : int\n Dimensionality of the original source space.\n\n Returns\n -------\n components : {ndarray, sparse matrix} of shape \\\n (n_components, n_features)\n The generated random matrix. Sparse matrix will be of CSR format.\n\n \"\"\"\n random_state = check_random_state(self.random_state)\n self.density_ = _check_density(self.density, n_features)\n return _sparse_random_matrix(n_components,\n n_features,\n density=self.density_,\n random_state=random_state)" + }, + { + "id": "scikit-learn/sklearn.random_projection/_check_density", + "name": "_check_density", + "qname": "sklearn.random_projection._check_density", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.random_projection/_check_density/density", + "name": "density", + "qname": "sklearn.random_projection._check_density.density", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.random_projection/_check_density/n_features", + "name": "n_features", + "qname": "sklearn.random_projection._check_density.n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Factorize density check according to Li et al.", + "docstring": "Factorize density check according to Li et al.", + "code": "def _check_density(density, n_features):\n \"\"\"Factorize density check according to Li et al.\"\"\"\n if density == 'auto':\n density = 1 / np.sqrt(n_features)\n\n elif density <= 0 or density > 1:\n raise ValueError(\"Expected density in range ]0, 1], got: %r\"\n % density)\n return density" + }, + { + "id": "scikit-learn/sklearn.random_projection/_check_input_size", + "name": "_check_input_size", + "qname": "sklearn.random_projection._check_input_size", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.random_projection/_check_input_size/n_components", + "name": "n_components", + "qname": "sklearn.random_projection._check_input_size.n_components", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.random_projection/_check_input_size/n_features", + "name": "n_features", + "qname": "sklearn.random_projection._check_input_size.n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Factorize argument checking for random matrix generation.", + "docstring": "Factorize argument checking for random matrix generation.", + "code": "def _check_input_size(n_components, n_features):\n \"\"\"Factorize argument checking for random matrix generation.\"\"\"\n if n_components <= 0:\n raise ValueError(\"n_components must be strictly positive, got %d\" %\n n_components)\n if n_features <= 0:\n raise ValueError(\"n_features must be strictly positive, got %d\" %\n n_features)" + }, + { + "id": "scikit-learn/sklearn.random_projection/_gaussian_random_matrix", + "name": "_gaussian_random_matrix", + "qname": "sklearn.random_projection._gaussian_random_matrix", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.random_projection/_gaussian_random_matrix/n_components", + "name": "n_components", + "qname": "sklearn.random_projection._gaussian_random_matrix.n_components", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int,", + "default_value": "", + "description": "Dimensionality of the target projection space." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.random_projection/_gaussian_random_matrix/n_features", + "name": "n_features", + "qname": "sklearn.random_projection._gaussian_random_matrix.n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int,", + "default_value": "", + "description": "Dimensionality of the original source space." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.random_projection/_gaussian_random_matrix/random_state", + "name": "random_state", + "qname": "sklearn.random_projection._gaussian_random_matrix.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the pseudo random number generator used to generate the matrix\nat fit time.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate a dense Gaussian random matrix.\n\nThe components of the random matrix are drawn from\n\n N(0, 1.0 / n_components).\n\nRead more in the :ref:`User Guide `.", + "docstring": "Generate a dense Gaussian random matrix.\n\nThe components of the random matrix are drawn from\n\n N(0, 1.0 / n_components).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int,\n Dimensionality of the target projection space.\n\nn_features : int,\n Dimensionality of the original source space.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generator used to generate the matrix\n at fit time.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\ncomponents : ndarray of shape (n_components, n_features)\n The generated Gaussian random matrix.\n\nSee Also\n--------\nGaussianRandomProjection", + "code": "def _gaussian_random_matrix(n_components, n_features, random_state=None):\n \"\"\"Generate a dense Gaussian random matrix.\n\n The components of the random matrix are drawn from\n\n N(0, 1.0 / n_components).\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_components : int,\n Dimensionality of the target projection space.\n\n n_features : int,\n Dimensionality of the original source space.\n\n random_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generator used to generate the matrix\n at fit time.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n components : ndarray of shape (n_components, n_features)\n The generated Gaussian random matrix.\n\n See Also\n --------\n GaussianRandomProjection\n \"\"\"\n _check_input_size(n_components, n_features)\n rng = check_random_state(random_state)\n components = rng.normal(loc=0.0,\n scale=1.0 / np.sqrt(n_components),\n size=(n_components, n_features))\n return components" + }, + { + "id": "scikit-learn/sklearn.random_projection/_sparse_random_matrix", + "name": "_sparse_random_matrix", + "qname": "sklearn.random_projection._sparse_random_matrix", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.random_projection/_sparse_random_matrix/n_components", + "name": "n_components", + "qname": "sklearn.random_projection._sparse_random_matrix.n_components", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int,", + "default_value": "", + "description": "Dimensionality of the target projection space." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.random_projection/_sparse_random_matrix/n_features", + "name": "n_features", + "qname": "sklearn.random_projection._sparse_random_matrix.n_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int,", + "default_value": "", + "description": "Dimensionality of the original source space." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.random_projection/_sparse_random_matrix/density", + "name": "density", + "qname": "sklearn.random_projection._sparse_random_matrix.density", + "default_value": "'auto'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float or 'auto'", + "default_value": "'auto'", + "description": "Ratio of non-zero component in the random projection matrix in the\nrange `(0, 1]`\n\nIf density = 'auto', the value is set to the minimum density\nas recommended by Ping Li et al.: 1 / sqrt(n_features).\n\nUse density = 1 / 3.0 if you want to reproduce the results from\nAchlioptas, 2001." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "'auto'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.random_projection/_sparse_random_matrix/random_state", + "name": "random_state", + "qname": "sklearn.random_projection._sparse_random_matrix.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the pseudo random number generator used to generate the matrix\nat fit time.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generalized Achlioptas random sparse matrix for random projection.\n\nSetting density to 1 / 3 will yield the original matrix by Dimitris\nAchlioptas while setting a lower value will yield the generalization\nby Ping Li et al.\n\nIf we note :math:`s = 1 / density`, the components of the random matrix are\ndrawn from:\n\n - -sqrt(s) / sqrt(n_components) with probability 1 / 2s\n - 0 with probability 1 - 1 / s\n - +sqrt(s) / sqrt(n_components) with probability 1 / 2s\n\nRead more in the :ref:`User Guide `.", + "docstring": "Generalized Achlioptas random sparse matrix for random projection.\n\nSetting density to 1 / 3 will yield the original matrix by Dimitris\nAchlioptas while setting a lower value will yield the generalization\nby Ping Li et al.\n\nIf we note :math:`s = 1 / density`, the components of the random matrix are\ndrawn from:\n\n - -sqrt(s) / sqrt(n_components) with probability 1 / 2s\n - 0 with probability 1 - 1 / s\n - +sqrt(s) / sqrt(n_components) with probability 1 / 2s\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int,\n Dimensionality of the target projection space.\n\nn_features : int,\n Dimensionality of the original source space.\n\ndensity : float or 'auto', default='auto'\n Ratio of non-zero component in the random projection matrix in the\n range `(0, 1]`\n\n If density = 'auto', the value is set to the minimum density\n as recommended by Ping Li et al.: 1 / sqrt(n_features).\n\n Use density = 1 / 3.0 if you want to reproduce the results from\n Achlioptas, 2001.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generator used to generate the matrix\n at fit time.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\ncomponents : {ndarray, sparse matrix} of shape (n_components, n_features)\n The generated Gaussian random matrix. Sparse matrix will be of CSR\n format.\n\nSee Also\n--------\nSparseRandomProjection\n\nReferences\n----------\n\n.. [1] Ping Li, T. Hastie and K. W. Church, 2006,\n \"Very Sparse Random Projections\".\n https://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf\n\n.. [2] D. Achlioptas, 2001, \"Database-friendly random projections\",\n http://www.cs.ucsc.edu/~optas/papers/jl.pdf", + "code": "def _sparse_random_matrix(n_components, n_features, density='auto',\n random_state=None):\n \"\"\"Generalized Achlioptas random sparse matrix for random projection.\n\n Setting density to 1 / 3 will yield the original matrix by Dimitris\n Achlioptas while setting a lower value will yield the generalization\n by Ping Li et al.\n\n If we note :math:`s = 1 / density`, the components of the random matrix are\n drawn from:\n\n - -sqrt(s) / sqrt(n_components) with probability 1 / 2s\n - 0 with probability 1 - 1 / s\n - +sqrt(s) / sqrt(n_components) with probability 1 / 2s\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_components : int,\n Dimensionality of the target projection space.\n\n n_features : int,\n Dimensionality of the original source space.\n\n density : float or 'auto', default='auto'\n Ratio of non-zero component in the random projection matrix in the\n range `(0, 1]`\n\n If density = 'auto', the value is set to the minimum density\n as recommended by Ping Li et al.: 1 / sqrt(n_features).\n\n Use density = 1 / 3.0 if you want to reproduce the results from\n Achlioptas, 2001.\n\n random_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generator used to generate the matrix\n at fit time.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n components : {ndarray, sparse matrix} of shape (n_components, n_features)\n The generated Gaussian random matrix. Sparse matrix will be of CSR\n format.\n\n See Also\n --------\n SparseRandomProjection\n\n References\n ----------\n\n .. [1] Ping Li, T. Hastie and K. W. Church, 2006,\n \"Very Sparse Random Projections\".\n https://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf\n\n .. [2] D. Achlioptas, 2001, \"Database-friendly random projections\",\n http://www.cs.ucsc.edu/~optas/papers/jl.pdf\n\n \"\"\"\n _check_input_size(n_components, n_features)\n density = _check_density(density, n_features)\n rng = check_random_state(random_state)\n\n if density == 1:\n # skip index generation if totally dense\n components = rng.binomial(1, 0.5, (n_components, n_features)) * 2 - 1\n return 1 / np.sqrt(n_components) * components\n\n else:\n # Generate location of non zero elements\n indices = []\n offset = 0\n indptr = [offset]\n for _ in range(n_components):\n # find the indices of the non-zero components for row i\n n_nonzero_i = rng.binomial(n_features, density)\n indices_i = sample_without_replacement(n_features, n_nonzero_i,\n random_state=rng)\n indices.append(indices_i)\n offset += n_nonzero_i\n indptr.append(offset)\n\n indices = np.concatenate(indices)\n\n # Among non zero components the probability of the sign is 50%/50%\n data = rng.binomial(1, 0.5, size=np.size(indices)) * 2 - 1\n\n # build the CSR structure by concatenating the rows\n components = sp.csr_matrix((data, indices, indptr),\n shape=(n_components, n_features))\n\n return np.sqrt(1 / density) / np.sqrt(n_components) * components" + }, + { + "id": "scikit-learn/sklearn.random_projection/johnson_lindenstrauss_min_dim", + "name": "johnson_lindenstrauss_min_dim", + "qname": "sklearn.random_projection.johnson_lindenstrauss_min_dim", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.random_projection/johnson_lindenstrauss_min_dim/n_samples", + "name": "n_samples", + "qname": "sklearn.random_projection.johnson_lindenstrauss_min_dim.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int or array-like of int", + "default_value": "", + "description": "Number of samples that should be a integer greater than 0. If an array\nis given, it will compute a safe number of components array-wise." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "array-like of int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.random_projection/johnson_lindenstrauss_min_dim/eps", + "name": "eps", + "qname": "sklearn.random_projection.johnson_lindenstrauss_min_dim.eps", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float or ndarray of shape (n_components,), dtype=float", + "default_value": "0.1", + "description": "Maximum distortion rate in the range (0,1 ) as defined by the\nJohnson-Lindenstrauss lemma. If an array is given, it will compute a\nsafe number of components array-wise." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "ndarray of shape (n_components,)" + }, + { + "kind": "NamedType", + "name": "dtype=float" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Find a 'safe' number of components to randomly project to.\n\nThe distortion introduced by a random projection `p` only changes the\ndistance between two points by a factor (1 +- eps) in an euclidean space\nwith good probability. The projection `p` is an eps-embedding as defined\nby:\n\n (1 - eps) ||u - v||^2 < ||p(u) - p(v)||^2 < (1 + eps) ||u - v||^2\n\nWhere u and v are any rows taken from a dataset of shape (n_samples,\nn_features), eps is in ]0, 1[ and p is a projection by a random Gaussian\nN(0, 1) matrix of shape (n_components, n_features) (or a sparse\nAchlioptas matrix).\n\nThe minimum number of components to guarantee the eps-embedding is\ngiven by:\n\n n_components >= 4 log(n_samples) / (eps^2 / 2 - eps^3 / 3)\n\nNote that the number of dimensions is independent of the original\nnumber of features but instead depends on the size of the dataset:\nthe larger the dataset, the higher is the minimal dimensionality of\nan eps-embedding.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Find a 'safe' number of components to randomly project to.\n\nThe distortion introduced by a random projection `p` only changes the\ndistance between two points by a factor (1 +- eps) in an euclidean space\nwith good probability. The projection `p` is an eps-embedding as defined\nby:\n\n (1 - eps) ||u - v||^2 < ||p(u) - p(v)||^2 < (1 + eps) ||u - v||^2\n\nWhere u and v are any rows taken from a dataset of shape (n_samples,\nn_features), eps is in ]0, 1[ and p is a projection by a random Gaussian\nN(0, 1) matrix of shape (n_components, n_features) (or a sparse\nAchlioptas matrix).\n\nThe minimum number of components to guarantee the eps-embedding is\ngiven by:\n\n n_components >= 4 log(n_samples) / (eps^2 / 2 - eps^3 / 3)\n\nNote that the number of dimensions is independent of the original\nnumber of features but instead depends on the size of the dataset:\nthe larger the dataset, the higher is the minimal dimensionality of\nan eps-embedding.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int or array-like of int\n Number of samples that should be a integer greater than 0. If an array\n is given, it will compute a safe number of components array-wise.\n\neps : float or ndarray of shape (n_components,), dtype=float, default=0.1\n Maximum distortion rate in the range (0,1 ) as defined by the\n Johnson-Lindenstrauss lemma. If an array is given, it will compute a\n safe number of components array-wise.\n\nReturns\n-------\nn_components : int or ndarray of int\n The minimal number of components to guarantee with good probability\n an eps-embedding with n_samples.\n\nExamples\n--------\n\n>>> johnson_lindenstrauss_min_dim(1e6, eps=0.5)\n663\n\n>>> johnson_lindenstrauss_min_dim(1e6, eps=[0.5, 0.1, 0.01])\narray([ 663, 11841, 1112658])\n\n>>> johnson_lindenstrauss_min_dim([1e4, 1e5, 1e6], eps=0.1)\narray([ 7894, 9868, 11841])\n\nReferences\n----------\n\n.. [1] https://en.wikipedia.org/wiki/Johnson%E2%80%93Lindenstrauss_lemma\n\n.. [2] Sanjoy Dasgupta and Anupam Gupta, 1999,\n \"An elementary proof of the Johnson-Lindenstrauss Lemma.\"\n http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.45.3654", + "code": "@_deprecate_positional_args\ndef johnson_lindenstrauss_min_dim(n_samples, *, eps=0.1):\n \"\"\"Find a 'safe' number of components to randomly project to.\n\n The distortion introduced by a random projection `p` only changes the\n distance between two points by a factor (1 +- eps) in an euclidean space\n with good probability. The projection `p` is an eps-embedding as defined\n by:\n\n (1 - eps) ||u - v||^2 < ||p(u) - p(v)||^2 < (1 + eps) ||u - v||^2\n\n Where u and v are any rows taken from a dataset of shape (n_samples,\n n_features), eps is in ]0, 1[ and p is a projection by a random Gaussian\n N(0, 1) matrix of shape (n_components, n_features) (or a sparse\n Achlioptas matrix).\n\n The minimum number of components to guarantee the eps-embedding is\n given by:\n\n n_components >= 4 log(n_samples) / (eps^2 / 2 - eps^3 / 3)\n\n Note that the number of dimensions is independent of the original\n number of features but instead depends on the size of the dataset:\n the larger the dataset, the higher is the minimal dimensionality of\n an eps-embedding.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n n_samples : int or array-like of int\n Number of samples that should be a integer greater than 0. If an array\n is given, it will compute a safe number of components array-wise.\n\n eps : float or ndarray of shape (n_components,), dtype=float, \\\n default=0.1\n Maximum distortion rate in the range (0,1 ) as defined by the\n Johnson-Lindenstrauss lemma. If an array is given, it will compute a\n safe number of components array-wise.\n\n Returns\n -------\n n_components : int or ndarray of int\n The minimal number of components to guarantee with good probability\n an eps-embedding with n_samples.\n\n Examples\n --------\n\n >>> johnson_lindenstrauss_min_dim(1e6, eps=0.5)\n 663\n\n >>> johnson_lindenstrauss_min_dim(1e6, eps=[0.5, 0.1, 0.01])\n array([ 663, 11841, 1112658])\n\n >>> johnson_lindenstrauss_min_dim([1e4, 1e5, 1e6], eps=0.1)\n array([ 7894, 9868, 11841])\n\n References\n ----------\n\n .. [1] https://en.wikipedia.org/wiki/Johnson%E2%80%93Lindenstrauss_lemma\n\n .. [2] Sanjoy Dasgupta and Anupam Gupta, 1999,\n \"An elementary proof of the Johnson-Lindenstrauss Lemma.\"\n http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.45.3654\n\n \"\"\"\n eps = np.asarray(eps)\n n_samples = np.asarray(n_samples)\n\n if np.any(eps <= 0.0) or np.any(eps >= 1):\n raise ValueError(\n \"The JL bound is defined for eps in ]0, 1[, got %r\" % eps)\n\n if np.any(n_samples) <= 0:\n raise ValueError(\n \"The JL bound is defined for n_samples greater than zero, got %r\"\n % n_samples)\n\n denominator = (eps ** 2 / 2) - (eps ** 3 / 3)\n return (4 * np.log(n_samples) / denominator).astype(np.int64)" + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/__init__", + "name": "__init__", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/__init__/self", + "name": "self", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/__init__/kernel", + "name": "kernel", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation.__init__.kernel", + "default_value": "'rbf'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'knn', 'rbf'} or callable", + "default_value": "'rbf'", + "description": " String identifier for kernel function to use or the kernel function\n itself. Only 'rbf' and 'knn' strings are valid inputs. The function\n passed should take two inputs, each of shape (n_samples, n_features),\n and return a (n_samples, n_samples) shaped weight matrix.\n\ngamma : float, default=20\n Parameter for rbf kernel.\n\nn_neighbors : int, default=7\n Parameter for knn kernel. Need to be strictly positive.\n\nalpha : float, default=1.0\n Clamping factor.\n\nmax_iter : int, default=30\n Change maximum number of iterations allowed.\n\ntol : float, default=1e-3\n Convergence tolerance: threshold to consider the system at steady\n state." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["rbf", "knn"] + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/__init__/gamma", + "name": "gamma", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation.__init__.gamma", + "default_value": "20", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/__init__/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation.__init__.n_neighbors", + "default_value": "7", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/__init__/alpha", + "name": "alpha", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation.__init__.alpha", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation.__init__.max_iter", + "default_value": "30", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/__init__/tol", + "name": "tol", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of parallel jobs to run.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base class for label propagation module.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, kernel='rbf', *, gamma=20, n_neighbors=7,\n alpha=1, max_iter=30, tol=1e-3, n_jobs=None):\n\n self.max_iter = max_iter\n self.tol = tol\n\n # kernel parameters\n self.kernel = kernel\n self.gamma = gamma\n self.n_neighbors = n_neighbors\n\n # clamping factor\n self.alpha = alpha\n\n self.n_jobs = n_jobs" + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/_build_graph", + "name": "_build_graph", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation._build_graph", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/_build_graph/self", + "name": "self", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation._build_graph.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @abstractmethod\n def _build_graph(self):\n raise NotImplementedError(\"Graph construction must be implemented\"\n \" to fit a label propagation model.\")" + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/_get_kernel", + "name": "_get_kernel", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation._get_kernel", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/_get_kernel/self", + "name": "self", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation._get_kernel.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/_get_kernel/X", + "name": "X", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation._get_kernel.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/_get_kernel/y", + "name": "y", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation._get_kernel.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_kernel(self, X, y=None):\n if self.kernel == \"rbf\":\n if y is None:\n return rbf_kernel(X, X, gamma=self.gamma)\n else:\n return rbf_kernel(X, y, gamma=self.gamma)\n elif self.kernel == \"knn\":\n if self.nn_fit is None:\n self.nn_fit = NearestNeighbors(n_neighbors=self.n_neighbors,\n n_jobs=self.n_jobs).fit(X)\n if y is None:\n return self.nn_fit.kneighbors_graph(self.nn_fit._fit_X,\n self.n_neighbors,\n mode='connectivity')\n else:\n return self.nn_fit.kneighbors(y, return_distance=False)\n elif callable(self.kernel):\n if y is None:\n return self.kernel(X, X)\n else:\n return self.kernel(X, y)\n else:\n raise ValueError(\"%s is not a valid kernel. Only rbf and knn\"\n \" or an explicit function \"\n \" are supported at this time.\" % self.kernel)" + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/fit", + "name": "fit", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/fit/self", + "name": "self", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/fit/X", + "name": "X", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "A matrix of shape (n_samples, n_samples) will be created from this." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/fit/y", + "name": "y", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "`n_labeled_samples` (unlabeled points are marked as -1)\nAll unlabeled samples will be transductively assigned labels." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit a semi-supervised label propagation model based\n\nAll the input data is provided matrix X (labeled and unlabeled)\nand corresponding label matrix y with a dedicated marker value for\nunlabeled samples.", + "docstring": "Fit a semi-supervised label propagation model based\n\nAll the input data is provided matrix X (labeled and unlabeled)\nand corresponding label matrix y with a dedicated marker value for\nunlabeled samples.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n A matrix of shape (n_samples, n_samples) will be created from this.\n\ny : array-like of shape (n_samples,)\n `n_labeled_samples` (unlabeled points are marked as -1)\n All unlabeled samples will be transductively assigned labels.\n\nReturns\n-------\nself : object", + "code": " def fit(self, X, y):\n \"\"\"Fit a semi-supervised label propagation model based\n\n All the input data is provided matrix X (labeled and unlabeled)\n and corresponding label matrix y with a dedicated marker value for\n unlabeled samples.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n A matrix of shape (n_samples, n_samples) will be created from this.\n\n y : array-like of shape (n_samples,)\n `n_labeled_samples` (unlabeled points are marked as -1)\n All unlabeled samples will be transductively assigned labels.\n\n Returns\n -------\n self : object\n \"\"\"\n X, y = self._validate_data(X, y)\n self.X_ = X\n check_classification_targets(y)\n\n # actual graph construction (implementations should override this)\n graph_matrix = self._build_graph()\n\n # label construction\n # construct a categorical distribution for classification only\n classes = np.unique(y)\n classes = (classes[classes != -1])\n self.classes_ = classes\n\n n_samples, n_classes = len(y), len(classes)\n\n alpha = self.alpha\n if self._variant == 'spreading' and \\\n (alpha is None or alpha <= 0.0 or alpha >= 1.0):\n raise ValueError('alpha=%s is invalid: it must be inside '\n 'the open interval (0, 1)' % alpha)\n y = np.asarray(y)\n unlabeled = y == -1\n\n # initialize distributions\n self.label_distributions_ = np.zeros((n_samples, n_classes))\n for label in classes:\n self.label_distributions_[y == label, classes == label] = 1\n\n y_static = np.copy(self.label_distributions_)\n if self._variant == 'propagation':\n # LabelPropagation\n y_static[unlabeled] = 0\n else:\n # LabelSpreading\n y_static *= 1 - alpha\n\n l_previous = np.zeros((self.X_.shape[0], n_classes))\n\n unlabeled = unlabeled[:, np.newaxis]\n if sparse.isspmatrix(graph_matrix):\n graph_matrix = graph_matrix.tocsr()\n\n for self.n_iter_ in range(self.max_iter):\n if np.abs(self.label_distributions_ - l_previous).sum() < self.tol:\n break\n\n l_previous = self.label_distributions_\n self.label_distributions_ = safe_sparse_dot(\n graph_matrix, self.label_distributions_)\n\n if self._variant == 'propagation':\n normalizer = np.sum(\n self.label_distributions_, axis=1)[:, np.newaxis]\n self.label_distributions_ /= normalizer\n self.label_distributions_ = np.where(unlabeled,\n self.label_distributions_,\n y_static)\n else:\n # clamp\n self.label_distributions_ = np.multiply(\n alpha, self.label_distributions_) + y_static\n else:\n warnings.warn(\n 'max_iter=%d was reached without convergence.' % self.max_iter,\n category=ConvergenceWarning\n )\n self.n_iter_ += 1\n\n normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis]\n normalizer[normalizer == 0] = 1\n self.label_distributions_ /= normalizer\n\n # set the transduction item\n transduction = self.classes_[np.argmax(self.label_distributions_,\n axis=1)]\n self.transduction_ = transduction.ravel()\n return self" + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/predict", + "name": "predict", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/predict/self", + "name": "self", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/predict/X", + "name": "X", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data matrix." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Performs inductive inference across the model.", + "docstring": "Performs inductive inference across the model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n Predictions for input data.", + "code": " def predict(self, X):\n \"\"\"Performs inductive inference across the model.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data matrix.\n\n Returns\n -------\n y : ndarray of shape (n_samples,)\n Predictions for input data.\n \"\"\"\n probas = self.predict_proba(X)\n return self.classes_[np.argmax(probas, axis=1)].ravel()" + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/predict_proba", + "name": "predict_proba", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/predict_proba/self", + "name": "self", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/BaseLabelPropagation/predict_proba/X", + "name": "X", + "qname": "sklearn.semi_supervised._label_propagation.BaseLabelPropagation.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data matrix." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict probability for each possible outcome.\n\nCompute the probability estimates for each single sample in X\nand each possible outcome seen during training (categorical\ndistribution).", + "docstring": "Predict probability for each possible outcome.\n\nCompute the probability estimates for each single sample in X\nand each possible outcome seen during training (categorical\ndistribution).\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix.\n\nReturns\n-------\nprobabilities : ndarray of shape (n_samples, n_classes)\n Normalized probability distributions across\n class labels.", + "code": " def predict_proba(self, X):\n \"\"\"Predict probability for each possible outcome.\n\n Compute the probability estimates for each single sample in X\n and each possible outcome seen during training (categorical\n distribution).\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data matrix.\n\n Returns\n -------\n probabilities : ndarray of shape (n_samples, n_classes)\n Normalized probability distributions across\n class labels.\n \"\"\"\n check_is_fitted(self)\n\n X_2d = check_array(X, accept_sparse=['csc', 'csr', 'coo', 'dok',\n 'bsr', 'lil', 'dia'])\n weight_matrices = self._get_kernel(self.X_, X_2d)\n if self.kernel == 'knn':\n probabilities = np.array([\n np.sum(self.label_distributions_[weight_matrix], axis=0)\n for weight_matrix in weight_matrices])\n else:\n weight_matrices = weight_matrices.T\n probabilities = safe_sparse_dot(\n weight_matrices, self.label_distributions_)\n normalizer = np.atleast_2d(np.sum(probabilities, axis=1)).T\n probabilities /= normalizer\n return probabilities" + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelPropagation/__init__", + "name": "__init__", + "qname": "sklearn.semi_supervised._label_propagation.LabelPropagation.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelPropagation/__init__/self", + "name": "self", + "qname": "sklearn.semi_supervised._label_propagation.LabelPropagation.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelPropagation/__init__/kernel", + "name": "kernel", + "qname": "sklearn.semi_supervised._label_propagation.LabelPropagation.__init__.kernel", + "default_value": "'rbf'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'knn', 'rbf'} or callable", + "default_value": "'rbf'", + "description": "String identifier for kernel function to use or the kernel function\nitself. Only 'rbf' and 'knn' strings are valid inputs. The function\npassed should take two inputs, each of shape (n_samples, n_features),\nand return a (n_samples, n_samples) shaped weight matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["rbf", "knn"] + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelPropagation/__init__/gamma", + "name": "gamma", + "qname": "sklearn.semi_supervised._label_propagation.LabelPropagation.__init__.gamma", + "default_value": "20", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "20", + "description": "Parameter for rbf kernel." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelPropagation/__init__/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.semi_supervised._label_propagation.LabelPropagation.__init__.n_neighbors", + "default_value": "7", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "7", + "description": "Parameter for knn kernel which need to be strictly positive." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelPropagation/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.semi_supervised._label_propagation.LabelPropagation.__init__.max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "Change maximum number of iterations allowed." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelPropagation/__init__/tol", + "name": "tol", + "qname": "sklearn.semi_supervised._label_propagation.LabelPropagation.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float, 1e-3", + "default_value": "", + "description": "Convergence tolerance: threshold to consider the system at steady\nstate." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "1e-3" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelPropagation/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.semi_supervised._label_propagation.LabelPropagation.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of parallel jobs to run.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Label Propagation classifier\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, kernel='rbf', *, gamma=20, n_neighbors=7,\n max_iter=1000, tol=1e-3, n_jobs=None):\n super().__init__(kernel=kernel, gamma=gamma,\n n_neighbors=n_neighbors, max_iter=max_iter,\n tol=tol, n_jobs=n_jobs, alpha=None)" + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelPropagation/_build_graph", + "name": "_build_graph", + "qname": "sklearn.semi_supervised._label_propagation.LabelPropagation._build_graph", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelPropagation/_build_graph/self", + "name": "self", + "qname": "sklearn.semi_supervised._label_propagation.LabelPropagation._build_graph.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Matrix representing a fully connected graph between each sample\n\nThis basic implementation creates a non-stochastic affinity matrix, so\nclass distributions will exceed 1 (normalization may be desired).", + "docstring": "Matrix representing a fully connected graph between each sample\n\nThis basic implementation creates a non-stochastic affinity matrix, so\nclass distributions will exceed 1 (normalization may be desired).", + "code": " def _build_graph(self):\n \"\"\"Matrix representing a fully connected graph between each sample\n\n This basic implementation creates a non-stochastic affinity matrix, so\n class distributions will exceed 1 (normalization may be desired).\n \"\"\"\n if self.kernel == 'knn':\n self.nn_fit = None\n affinity_matrix = self._get_kernel(self.X_)\n normalizer = affinity_matrix.sum(axis=0)\n if sparse.isspmatrix(affinity_matrix):\n affinity_matrix.data /= np.diag(np.array(normalizer))\n else:\n affinity_matrix /= normalizer[:, np.newaxis]\n return affinity_matrix" + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelPropagation/fit", + "name": "fit", + "qname": "sklearn.semi_supervised._label_propagation.LabelPropagation.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelPropagation/fit/self", + "name": "self", + "qname": "sklearn.semi_supervised._label_propagation.LabelPropagation.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelPropagation/fit/X", + "name": "X", + "qname": "sklearn.semi_supervised._label_propagation.LabelPropagation.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelPropagation/fit/y", + "name": "y", + "qname": "sklearn.semi_supervised._label_propagation.LabelPropagation.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def fit(self, X, y):\n return super().fit(X, y)" + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelSpreading/__init__", + "name": "__init__", + "qname": "sklearn.semi_supervised._label_propagation.LabelSpreading.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelSpreading/__init__/self", + "name": "self", + "qname": "sklearn.semi_supervised._label_propagation.LabelSpreading.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelSpreading/__init__/kernel", + "name": "kernel", + "qname": "sklearn.semi_supervised._label_propagation.LabelSpreading.__init__.kernel", + "default_value": "'rbf'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'knn', 'rbf'} or callable", + "default_value": "'rbf'", + "description": "String identifier for kernel function to use or the kernel function\nitself. Only 'rbf' and 'knn' strings are valid inputs. The function\npassed should take two inputs, each of shape (n_samples, n_features),\nand return a (n_samples, n_samples) shaped weight matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["rbf", "knn"] + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelSpreading/__init__/gamma", + "name": "gamma", + "qname": "sklearn.semi_supervised._label_propagation.LabelSpreading.__init__.gamma", + "default_value": "20", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "20", + "description": "Parameter for rbf kernel." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelSpreading/__init__/n_neighbors", + "name": "n_neighbors", + "qname": "sklearn.semi_supervised._label_propagation.LabelSpreading.__init__.n_neighbors", + "default_value": "7", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "7", + "description": "Parameter for knn kernel which is a strictly positive integer." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelSpreading/__init__/alpha", + "name": "alpha", + "qname": "sklearn.semi_supervised._label_propagation.LabelSpreading.__init__.alpha", + "default_value": "0.2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.2", + "description": "Clamping factor. A value in (0, 1) that specifies the relative amount\nthat an instance should adopt the information from its neighbors as\nopposed to its initial label.\nalpha=0 means keeping the initial label information; alpha=1 means\nreplacing all initial information." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelSpreading/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.semi_supervised._label_propagation.LabelSpreading.__init__.max_iter", + "default_value": "30", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "30", + "description": "Maximum number of iterations allowed." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelSpreading/__init__/tol", + "name": "tol", + "qname": "sklearn.semi_supervised._label_propagation.LabelSpreading.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "Convergence tolerance: threshold to consider the system at steady\nstate." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelSpreading/__init__/n_jobs", + "name": "n_jobs", + "qname": "sklearn.semi_supervised._label_propagation.LabelSpreading.__init__.n_jobs", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The number of parallel jobs to run.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary `\nfor more details." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "LabelSpreading model for semi-supervised learning\n\nThis model is similar to the basic Label Propagation algorithm,\nbut uses affinity matrix based on the normalized graph Laplacian\nand soft clamping across the labels.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, kernel='rbf', *, gamma=20, n_neighbors=7, alpha=0.2,\n max_iter=30, tol=1e-3, n_jobs=None):\n\n # this one has different base parameters\n super().__init__(kernel=kernel, gamma=gamma,\n n_neighbors=n_neighbors, alpha=alpha,\n max_iter=max_iter, tol=tol, n_jobs=n_jobs)" + }, + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelSpreading/_build_graph", + "name": "_build_graph", + "qname": "sklearn.semi_supervised._label_propagation.LabelSpreading._build_graph", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.semi_supervised._label_propagation/LabelSpreading/_build_graph/self", + "name": "self", + "qname": "sklearn.semi_supervised._label_propagation.LabelSpreading._build_graph.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Graph matrix for Label Spreading computes the graph laplacian", + "docstring": "Graph matrix for Label Spreading computes the graph laplacian", + "code": " def _build_graph(self):\n \"\"\"Graph matrix for Label Spreading computes the graph laplacian\"\"\"\n # compute affinity matrix (or gram matrix)\n if self.kernel == 'knn':\n self.nn_fit = None\n n_samples = self.X_.shape[0]\n affinity_matrix = self._get_kernel(self.X_)\n laplacian = csgraph.laplacian(affinity_matrix, normed=True)\n laplacian = -laplacian\n if sparse.isspmatrix(laplacian):\n diag_mask = (laplacian.row == laplacian.col)\n laplacian.data[diag_mask] = 0.0\n else:\n laplacian.flat[::n_samples + 1] = 0.0 # set diag to 0.0\n return laplacian" + }, + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/__init__", + "name": "__init__", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/__init__/self", + "name": "self", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/__init__/base_estimator", + "name": "base_estimator", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.__init__.base_estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator object", + "default_value": "", + "description": "An estimator object implementing ``fit`` and ``predict_proba``.\nInvoking the ``fit`` method will fit a clone of the passed estimator,\nwhich will be stored in the ``base_estimator_`` attribute." + }, + "type": { + "kind": "NamedType", + "name": "estimator object" + } + }, + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/__init__/threshold", + "name": "threshold", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.__init__.threshold", + "default_value": "0.75", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.75", + "description": "The decision threshold for use with `criterion='threshold'`.\nShould be in [0, 1). When using the 'threshold' criterion, a\n:ref:`well calibrated classifier ` should be used." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/__init__/criterion", + "name": "criterion", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.__init__.criterion", + "default_value": "'threshold'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'threshold', 'k_best'}", + "default_value": "'threshold'", + "description": "The selection criterion used to select which labels to add to the\ntraining set. If 'threshold', pseudo-labels with prediction\nprobabilities above `threshold` are added to the dataset. If 'k_best',\nthe `k_best` pseudo-labels with highest prediction probabilities are\nadded to the dataset. When using the 'threshold' criterion, a\n:ref:`well calibrated classifier ` should be used." + }, + "type": { + "kind": "EnumType", + "values": ["k_best", "threshold"] + } + }, + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/__init__/k_best", + "name": "k_best", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.__init__.k_best", + "default_value": "10", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "The amount of samples to add in each iteration. Only used when\n`criterion` is k_best'." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.__init__.max_iter", + "default_value": "10", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int or None", + "default_value": "10", + "description": "Maximum number of iterations allowed. Should be greater than or equal\nto 0. If it is ``None``, the classifier will continue to predict labels\nuntil no new pseudo-labels are added, or all unlabeled samples have\nbeen labeled." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/__init__/verbose", + "name": "verbose", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.__init__.verbose", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Enable verbose output." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Self-training classifier.\n\nThis class allows a given supervised classifier to function as a\nsemi-supervised classifier, allowing it to learn from unlabeled data. It\ndoes this by iteratively predicting pseudo-labels for the unlabeled data\nand adding them to the training set.\n\nThe classifier will continue iterating until either max_iter is reached, or\nno pseudo-labels were added to the training set in the previous iteration.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " def __init__(self,\n base_estimator,\n threshold=0.75,\n criterion='threshold',\n k_best=10,\n max_iter=10,\n verbose=False):\n self.base_estimator = base_estimator\n self.threshold = threshold\n self.criterion = criterion\n self.k_best = k_best\n self.max_iter = max_iter\n self.verbose = verbose" + }, + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/decision_function", + "name": "decision_function", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.decision_function", + "decorators": ["if_delegate_has_method(delegate='base_estimator')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/decision_function/self", + "name": "self", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/decision_function/X", + "name": "X", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Array representing the data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Calls decision function of the `base_estimator`.", + "docstring": "Calls decision function of the `base_estimator`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\nReturns\n-------\ny : ndarray of shape (n_samples, n_features)\n Result of the decision function of the `base_estimator`.", + "code": " @if_delegate_has_method(delegate='base_estimator')\n def decision_function(self, X):\n \"\"\"Calls decision function of the `base_estimator`.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\n Returns\n -------\n y : ndarray of shape (n_samples, n_features)\n Result of the decision function of the `base_estimator`.\n \"\"\"\n check_is_fitted(self)\n return self.base_estimator_.decision_function(X)" + }, + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/fit", + "name": "fit", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/fit/self", + "name": "self", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/fit/X", + "name": "X", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Array representing the data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/fit/y", + "name": "y", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples,)", + "default_value": "", + "description": "Array representing the labels. Unlabeled samples should have the\nlabel -1." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples,)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fits this ``SelfTrainingClassifier`` to a dataset.", + "docstring": "Fits this ``SelfTrainingClassifier`` to a dataset.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\ny : {array-like, sparse matrix} of shape (n_samples,)\n Array representing the labels. Unlabeled samples should have the\n label -1.\n\nReturns\n-------\nself : object\n Returns an instance of self.", + "code": " def fit(self, X, y):\n \"\"\"\n Fits this ``SelfTrainingClassifier`` to a dataset.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\n y : {array-like, sparse matrix} of shape (n_samples,)\n Array representing the labels. Unlabeled samples should have the\n label -1.\n\n Returns\n -------\n self : object\n Returns an instance of self.\n \"\"\"\n # we need row slicing support for sparce matrices\n X, y = self._validate_data(X, y, accept_sparse=[\n 'csr', 'csc', 'lil', 'dok'])\n\n if self.base_estimator is None:\n raise ValueError(\"base_estimator cannot be None!\")\n\n self.base_estimator_ = clone(self.base_estimator)\n\n if self.max_iter is not None and self.max_iter < 0:\n raise ValueError(\"max_iter must be >= 0 or None,\"\n f\" got {self.max_iter}\")\n\n if not (0 <= self.threshold < 1):\n raise ValueError(\"threshold must be in [0,1),\"\n f\" got {self.threshold}\")\n\n if self.criterion not in ['threshold', 'k_best']:\n raise ValueError(f\"criterion must be either 'threshold' \"\n f\"or 'k_best', got {self.criterion}.\")\n\n if y.dtype.kind in ['U', 'S']:\n raise ValueError(\"y has dtype string. If you wish to predict on \"\n \"string targets, use dtype object, and use -1\"\n \" as the label for unlabeled samples.\")\n\n has_label = y != -1\n\n if np.all(has_label):\n warnings.warn(\"y contains no unlabeled samples\", UserWarning)\n\n if self.criterion == 'k_best' and (self.k_best > X.shape[0] -\n np.sum(has_label)):\n warnings.warn(\"k_best is larger than the amount of unlabeled \"\n \"samples. All unlabeled samples will be labeled in \"\n \"the first iteration\", UserWarning)\n\n self.transduction_ = np.copy(y)\n self.labeled_iter_ = np.full_like(y, -1)\n self.labeled_iter_[has_label] = 0\n\n self.n_iter_ = 0\n\n while not np.all(has_label) and (self.max_iter is None or\n self.n_iter_ < self.max_iter):\n self.n_iter_ += 1\n self.base_estimator_.fit(\n X[safe_mask(X, has_label)],\n self.transduction_[has_label])\n\n # Validate the fitted estimator since `predict_proba` can be\n # delegated to an underlying \"final\" fitted estimator as\n # generally done in meta-estimator or pipeline.\n _validate_estimator(self.base_estimator_)\n\n # Predict on the unlabeled samples\n prob = self.base_estimator_.predict_proba(\n X[safe_mask(X, ~has_label)])\n pred = self.base_estimator_.classes_[np.argmax(prob, axis=1)]\n max_proba = np.max(prob, axis=1)\n\n # Select new labeled samples\n if self.criterion == 'threshold':\n selected = max_proba > self.threshold\n else:\n n_to_select = min(self.k_best, max_proba.shape[0])\n if n_to_select == max_proba.shape[0]:\n selected = np.ones_like(max_proba, dtype=bool)\n else:\n # NB these are indicies, not a mask\n selected = \\\n np.argpartition(-max_proba, n_to_select)[:n_to_select]\n\n # Map selected indices into original array\n selected_full = np.nonzero(~has_label)[0][selected]\n\n # Add newly labeled confident predictions to the dataset\n self.transduction_[selected_full] = pred[selected]\n has_label[selected_full] = True\n self.labeled_iter_[selected_full] = self.n_iter_\n\n if selected_full.shape[0] == 0:\n # no changed labels\n self.termination_condition_ = \"no_change\"\n break\n\n if self.verbose:\n print(f\"End of iteration {self.n_iter_},\"\n f\" added {selected_full.shape[0]} new labels.\")\n\n if self.n_iter_ == self.max_iter:\n self.termination_condition_ = \"max_iter\"\n if np.all(has_label):\n self.termination_condition_ = \"all_labeled\"\n\n self.base_estimator_.fit(\n X[safe_mask(X, has_label)],\n self.transduction_[has_label])\n self.classes_ = self.base_estimator_.classes_\n return self" + }, + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/predict", + "name": "predict", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.predict", + "decorators": ["if_delegate_has_method(delegate='base_estimator')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/predict/self", + "name": "self", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/predict/X", + "name": "X", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Array representing the data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict the classes of X.", + "docstring": "Predict the classes of X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n Array with predicted labels.", + "code": " @if_delegate_has_method(delegate='base_estimator')\n def predict(self, X):\n \"\"\"Predict the classes of X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\n Returns\n -------\n y : ndarray of shape (n_samples,)\n Array with predicted labels.\n \"\"\"\n check_is_fitted(self)\n return self.base_estimator_.predict(X)" + }, + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/predict_log_proba", + "name": "predict_log_proba", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.predict_log_proba", + "decorators": ["if_delegate_has_method(delegate='base_estimator')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/predict_log_proba/self", + "name": "self", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.predict_log_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/predict_log_proba/X", + "name": "X", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.predict_log_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Array representing the data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict log probability for each possible outcome.", + "docstring": "Predict log probability for each possible outcome.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\nReturns\n-------\ny : ndarray of shape (n_samples, n_features)\n Array with log prediction probabilities.", + "code": " @if_delegate_has_method(delegate='base_estimator')\n def predict_log_proba(self, X):\n \"\"\"Predict log probability for each possible outcome.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\n Returns\n -------\n y : ndarray of shape (n_samples, n_features)\n Array with log prediction probabilities.\n \"\"\"\n check_is_fitted(self)\n return self.base_estimator_.predict_log_proba(X)" + }, + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/predict_proba", + "name": "predict_proba", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/predict_proba/self", + "name": "self", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/predict_proba/X", + "name": "X", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Array representing the data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict probability for each possible outcome.", + "docstring": "Predict probability for each possible outcome.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\nReturns\n-------\ny : ndarray of shape (n_samples, n_features)\n Array with prediction probabilities.", + "code": " def predict_proba(self, X):\n \"\"\"Predict probability for each possible outcome.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\n Returns\n -------\n y : ndarray of shape (n_samples, n_features)\n Array with prediction probabilities.\n \"\"\"\n check_is_fitted(self)\n return self.base_estimator_.predict_proba(X)" + }, + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/score", + "name": "score", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.score", + "decorators": ["if_delegate_has_method(delegate='base_estimator')"], + "parameters": [ + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/score/self", + "name": "self", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/score/X", + "name": "X", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Array representing the data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/SelfTrainingClassifier/score/y", + "name": "y", + "qname": "sklearn.semi_supervised._self_training.SelfTrainingClassifier.score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Array representing the labels." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Calls score on the `base_estimator`.", + "docstring": "Calls score on the `base_estimator`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\ny : array-like of shape (n_samples,)\n Array representing the labels.\n\nReturns\n-------\nscore : float\n Result of calling score on the `base_estimator`.", + "code": " @if_delegate_has_method(delegate='base_estimator')\n def score(self, X, y):\n \"\"\"Calls score on the `base_estimator`.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Array representing the data.\n\n y : array-like of shape (n_samples,)\n Array representing the labels.\n\n Returns\n -------\n score : float\n Result of calling score on the `base_estimator`.\n \"\"\"\n check_is_fitted(self)\n return self.base_estimator_.score(X, y)" + }, + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/_validate_estimator", + "name": "_validate_estimator", + "qname": "sklearn.semi_supervised._self_training._validate_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.semi_supervised._self_training/_validate_estimator/estimator", + "name": "estimator", + "qname": "sklearn.semi_supervised._self_training._validate_estimator.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Make sure that an estimator implements the necessary methods.", + "docstring": "Make sure that an estimator implements the necessary methods.", + "code": "def _validate_estimator(estimator):\n \"\"\"Make sure that an estimator implements the necessary methods.\"\"\"\n if not hasattr(estimator, \"predict_proba\"):\n msg = \"base_estimator ({}) should implement predict_proba!\"\n raise ValueError(msg.format(type(estimator).__name__))" + }, + { + "id": "scikit-learn/sklearn.setup/configuration", + "name": "configuration", + "qname": "sklearn.setup.configuration", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.setup/configuration/parent_package", + "name": "parent_package", + "qname": "sklearn.setup.configuration.parent_package", + "default_value": "''", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.setup/configuration/top_path", + "name": "top_path", + "qname": "sklearn.setup.configuration.top_path", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def configuration(parent_package='', top_path=None):\n from numpy.distutils.misc_util import Configuration\n import numpy\n\n libraries = []\n if os.name == 'posix':\n libraries.append('m')\n\n config = Configuration('sklearn', parent_package, top_path)\n\n # submodules with build utilities\n config.add_subpackage('__check_build')\n config.add_subpackage('_build_utils')\n\n # submodules which do not have their own setup.py\n # we must manually add sub-submodules & tests\n config.add_subpackage('compose')\n config.add_subpackage('compose/tests')\n config.add_subpackage('covariance')\n config.add_subpackage('covariance/tests')\n config.add_subpackage('cross_decomposition')\n config.add_subpackage('cross_decomposition/tests')\n config.add_subpackage('feature_selection')\n config.add_subpackage('feature_selection/tests')\n config.add_subpackage('gaussian_process')\n config.add_subpackage('gaussian_process/tests')\n config.add_subpackage('impute')\n config.add_subpackage('impute/tests')\n config.add_subpackage('inspection')\n config.add_subpackage('inspection/tests')\n config.add_subpackage('mixture')\n config.add_subpackage('mixture/tests')\n config.add_subpackage('model_selection')\n config.add_subpackage('model_selection/tests')\n config.add_subpackage('neural_network')\n config.add_subpackage('neural_network/tests')\n config.add_subpackage('preprocessing')\n config.add_subpackage('preprocessing/tests')\n config.add_subpackage('semi_supervised')\n config.add_subpackage('semi_supervised/tests')\n config.add_subpackage('experimental')\n config.add_subpackage('experimental/tests')\n config.add_subpackage('ensemble/_hist_gradient_boosting')\n config.add_subpackage('ensemble/_hist_gradient_boosting/tests')\n config.add_subpackage('_loss/')\n config.add_subpackage('_loss/tests')\n config.add_subpackage('externals')\n\n # submodules which have their own setup.py\n config.add_subpackage('cluster')\n config.add_subpackage('datasets')\n config.add_subpackage('decomposition')\n config.add_subpackage('ensemble')\n config.add_subpackage('feature_extraction')\n config.add_subpackage('manifold')\n config.add_subpackage('metrics')\n config.add_subpackage('neighbors')\n config.add_subpackage('tree')\n config.add_subpackage('utils')\n config.add_subpackage('svm')\n config.add_subpackage('linear_model')\n\n # add cython extension module for isotonic regression\n config.add_extension('_isotonic',\n sources=['_isotonic.pyx'],\n include_dirs=[numpy.get_include()],\n libraries=libraries,\n )\n\n # add the test directory\n config.add_subpackage('tests')\n\n # Skip cythonization as we do not want to include the generated\n # C/C++ files in the release tarballs as they are not necessarily\n # forward compatible with future versions of Python for instance.\n if 'sdist' not in sys.argv:\n cythonize_extensions(top_path, config)\n\n return config" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/__init__", + "name": "__init__", + "qname": "sklearn.svm._base.BaseLibSVM.__init__", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/__init__/self", + "name": "self", + "qname": "sklearn.svm._base.BaseLibSVM.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/__init__/kernel", + "name": "kernel", + "qname": "sklearn.svm._base.BaseLibSVM.__init__.kernel", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/__init__/degree", + "name": "degree", + "qname": "sklearn.svm._base.BaseLibSVM.__init__.degree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/__init__/gamma", + "name": "gamma", + "qname": "sklearn.svm._base.BaseLibSVM.__init__.gamma", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/__init__/coef0", + "name": "coef0", + "qname": "sklearn.svm._base.BaseLibSVM.__init__.coef0", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/__init__/tol", + "name": "tol", + "qname": "sklearn.svm._base.BaseLibSVM.__init__.tol", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/__init__/C", + "name": "C", + "qname": "sklearn.svm._base.BaseLibSVM.__init__.C", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/__init__/nu", + "name": "nu", + "qname": "sklearn.svm._base.BaseLibSVM.__init__.nu", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/__init__/epsilon", + "name": "epsilon", + "qname": "sklearn.svm._base.BaseLibSVM.__init__.epsilon", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/__init__/shrinking", + "name": "shrinking", + "qname": "sklearn.svm._base.BaseLibSVM.__init__.shrinking", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/__init__/probability", + "name": "probability", + "qname": "sklearn.svm._base.BaseLibSVM.__init__.probability", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/__init__/cache_size", + "name": "cache_size", + "qname": "sklearn.svm._base.BaseLibSVM.__init__.cache_size", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/__init__/class_weight", + "name": "class_weight", + "qname": "sklearn.svm._base.BaseLibSVM.__init__.class_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/__init__/verbose", + "name": "verbose", + "qname": "sklearn.svm._base.BaseLibSVM.__init__.verbose", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.svm._base.BaseLibSVM.__init__.max_iter", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/__init__/random_state", + "name": "random_state", + "qname": "sklearn.svm._base.BaseLibSVM.__init__.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base class for estimators that use libsvm as backing library.\n\nThis implements support vector machine classification and regression.\n\nParameter documentation is in the derived `SVC` class.", + "docstring": "", + "code": " @abstractmethod\n def __init__(self, kernel, degree, gamma, coef0,\n tol, C, nu, epsilon, shrinking, probability, cache_size,\n class_weight, verbose, max_iter, random_state):\n\n if self._impl not in LIBSVM_IMPL:\n raise ValueError(\"impl should be one of %s, %s was given\" % (\n LIBSVM_IMPL, self._impl))\n\n if gamma == 0:\n msg = (\"The gamma value of 0.0 is invalid. Use 'auto' to set\"\n \" gamma to a value of 1 / n_features.\")\n raise ValueError(msg)\n\n self.kernel = kernel\n self.degree = degree\n self.gamma = gamma\n self.coef0 = coef0\n self.tol = tol\n self.C = C\n self.nu = nu\n self.epsilon = epsilon\n self.shrinking = shrinking\n self.probability = probability\n self.cache_size = cache_size\n self.class_weight = class_weight\n self.verbose = verbose\n self.max_iter = max_iter\n self.random_state = random_state" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_compute_kernel", + "name": "_compute_kernel", + "qname": "sklearn.svm._base.BaseLibSVM._compute_kernel", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_compute_kernel/self", + "name": "self", + "qname": "sklearn.svm._base.BaseLibSVM._compute_kernel.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_compute_kernel/X", + "name": "X", + "qname": "sklearn.svm._base.BaseLibSVM._compute_kernel.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the data transformed by a callable kernel", + "docstring": "Return the data transformed by a callable kernel", + "code": " def _compute_kernel(self, X):\n \"\"\"Return the data transformed by a callable kernel\"\"\"\n if callable(self.kernel):\n # in the case of precomputed kernel given as a function, we\n # have to compute explicitly the kernel matrix\n kernel = self.kernel(X, self.__Xfit)\n if sp.issparse(kernel):\n kernel = kernel.toarray()\n X = np.asarray(kernel, dtype=np.float64, order='C')\n return X" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_decision_function", + "name": "_decision_function", + "qname": "sklearn.svm._base.BaseLibSVM._decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_decision_function/self", + "name": "self", + "qname": "sklearn.svm._base.BaseLibSVM._decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_decision_function/X", + "name": "X", + "qname": "sklearn.svm._base.BaseLibSVM._decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Evaluates the decision function for the samples in X.", + "docstring": "Evaluates the decision function for the samples in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nX : array-like of shape (n_samples, n_class * (n_class-1) / 2)\n Returns the decision function of the sample for each class\n in the model.", + "code": " def _decision_function(self, X):\n \"\"\"Evaluates the decision function for the samples in X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n X : array-like of shape (n_samples, n_class * (n_class-1) / 2)\n Returns the decision function of the sample for each class\n in the model.\n \"\"\"\n # NOTE: _validate_for_predict contains check for is_fitted\n # hence must be placed before any other attributes are used.\n X = self._validate_for_predict(X)\n X = self._compute_kernel(X)\n\n if self._sparse:\n dec_func = self._sparse_decision_function(X)\n else:\n dec_func = self._dense_decision_function(X)\n\n # In binary case, we need to flip the sign of coef, intercept and\n # decision function.\n if self._impl in ['c_svc', 'nu_svc'] and len(self.classes_) == 2:\n return -dec_func.ravel()\n\n return dec_func" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_dense_decision_function", + "name": "_dense_decision_function", + "qname": "sklearn.svm._base.BaseLibSVM._dense_decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_dense_decision_function/self", + "name": "self", + "qname": "sklearn.svm._base.BaseLibSVM._dense_decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_dense_decision_function/X", + "name": "X", + "qname": "sklearn.svm._base.BaseLibSVM._dense_decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _dense_decision_function(self, X):\n X = check_array(X, dtype=np.float64, order=\"C\",\n accept_large_sparse=False)\n\n kernel = self.kernel\n if callable(kernel):\n kernel = 'precomputed'\n\n return libsvm.decision_function(\n X, self.support_, self.support_vectors_, self._n_support,\n self._dual_coef_, self._intercept_,\n self._probA, self._probB,\n svm_type=LIBSVM_IMPL.index(self._impl),\n kernel=kernel, degree=self.degree, cache_size=self.cache_size,\n coef0=self.coef0, gamma=self._gamma)" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_dense_fit", + "name": "_dense_fit", + "qname": "sklearn.svm._base.BaseLibSVM._dense_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_dense_fit/self", + "name": "self", + "qname": "sklearn.svm._base.BaseLibSVM._dense_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_dense_fit/X", + "name": "X", + "qname": "sklearn.svm._base.BaseLibSVM._dense_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_dense_fit/y", + "name": "y", + "qname": "sklearn.svm._base.BaseLibSVM._dense_fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_dense_fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.svm._base.BaseLibSVM._dense_fit.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_dense_fit/solver_type", + "name": "solver_type", + "qname": "sklearn.svm._base.BaseLibSVM._dense_fit.solver_type", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_dense_fit/kernel", + "name": "kernel", + "qname": "sklearn.svm._base.BaseLibSVM._dense_fit.kernel", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_dense_fit/random_seed", + "name": "random_seed", + "qname": "sklearn.svm._base.BaseLibSVM._dense_fit.random_seed", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _dense_fit(self, X, y, sample_weight, solver_type, kernel,\n random_seed):\n if callable(self.kernel):\n # you must store a reference to X to compute the kernel in predict\n # TODO: add keyword copy to copy on demand\n self.__Xfit = X\n X = self._compute_kernel(X)\n\n if X.shape[0] != X.shape[1]:\n raise ValueError(\"X.shape[0] should be equal to X.shape[1]\")\n\n libsvm.set_verbosity_wrap(self.verbose)\n\n # we don't pass **self.get_params() to allow subclasses to\n # add other parameters to __init__\n self.support_, self.support_vectors_, self._n_support, \\\n self.dual_coef_, self.intercept_, self._probA, \\\n self._probB, self.fit_status_ = libsvm.fit(\n X, y,\n svm_type=solver_type, sample_weight=sample_weight,\n class_weight=self.class_weight_, kernel=kernel, C=self.C,\n nu=self.nu, probability=self.probability, degree=self.degree,\n shrinking=self.shrinking, tol=self.tol,\n cache_size=self.cache_size, coef0=self.coef0,\n gamma=self._gamma, epsilon=self.epsilon,\n max_iter=self.max_iter, random_seed=random_seed)\n\n self._warn_from_fit_status()" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_dense_predict", + "name": "_dense_predict", + "qname": "sklearn.svm._base.BaseLibSVM._dense_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_dense_predict/self", + "name": "self", + "qname": "sklearn.svm._base.BaseLibSVM._dense_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_dense_predict/X", + "name": "X", + "qname": "sklearn.svm._base.BaseLibSVM._dense_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _dense_predict(self, X):\n X = self._compute_kernel(X)\n if X.ndim == 1:\n X = check_array(X, order='C', accept_large_sparse=False)\n\n kernel = self.kernel\n if callable(self.kernel):\n kernel = 'precomputed'\n if X.shape[1] != self.shape_fit_[0]:\n raise ValueError(\"X.shape[1] = %d should be equal to %d, \"\n \"the number of samples at training time\" %\n (X.shape[1], self.shape_fit_[0]))\n\n svm_type = LIBSVM_IMPL.index(self._impl)\n\n return libsvm.predict(\n X, self.support_, self.support_vectors_, self._n_support,\n self._dual_coef_, self._intercept_,\n self._probA, self._probB, svm_type=svm_type, kernel=kernel,\n degree=self.degree, coef0=self.coef0, gamma=self._gamma,\n cache_size=self.cache_size)" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_get_coef", + "name": "_get_coef", + "qname": "sklearn.svm._base.BaseLibSVM._get_coef", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_get_coef/self", + "name": "self", + "qname": "sklearn.svm._base.BaseLibSVM._get_coef.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_coef(self):\n return safe_sparse_dot(self._dual_coef_, self.support_vectors_)" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_more_tags", + "name": "_more_tags", + "qname": "sklearn.svm._base.BaseLibSVM._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_more_tags/self", + "name": "self", + "qname": "sklearn.svm._base.BaseLibSVM._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n # Used by cross_val_score.\n return {'pairwise': self.kernel == 'precomputed'}" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_pairwise@getter", + "name": "_pairwise", + "qname": "sklearn.svm._base.BaseLibSVM._pairwise", + "decorators": [ + "deprecated('Attribute _pairwise was deprecated in version 0.24 and will be removed in 1.1 (renaming of 0.26).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_pairwise/self", + "name": "self", + "qname": "sklearn.svm._base.BaseLibSVM._pairwise.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated(\"Attribute _pairwise was deprecated in \" # type: ignore\n \"version 0.24 and will be removed in 1.1 (renaming of 0.26).\")\n @property\n def _pairwise(self):\n # Used by cross_val_score.\n return self.kernel == \"precomputed\"" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_sparse_decision_function", + "name": "_sparse_decision_function", + "qname": "sklearn.svm._base.BaseLibSVM._sparse_decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_sparse_decision_function/self", + "name": "self", + "qname": "sklearn.svm._base.BaseLibSVM._sparse_decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_sparse_decision_function/X", + "name": "X", + "qname": "sklearn.svm._base.BaseLibSVM._sparse_decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _sparse_decision_function(self, X):\n X.data = np.asarray(X.data, dtype=np.float64, order='C')\n\n kernel = self.kernel\n if hasattr(kernel, '__call__'):\n kernel = 'precomputed'\n\n kernel_type = self._sparse_kernels.index(kernel)\n\n return libsvm_sparse.libsvm_sparse_decision_function(\n X.data, X.indices, X.indptr,\n self.support_vectors_.data,\n self.support_vectors_.indices,\n self.support_vectors_.indptr,\n self._dual_coef_.data, self._intercept_,\n LIBSVM_IMPL.index(self._impl), kernel_type,\n self.degree, self._gamma, self.coef0, self.tol,\n self.C, self.class_weight_,\n self.nu, self.epsilon, self.shrinking,\n self.probability, self._n_support,\n self._probA, self._probB)" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_sparse_fit", + "name": "_sparse_fit", + "qname": "sklearn.svm._base.BaseLibSVM._sparse_fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_sparse_fit/self", + "name": "self", + "qname": "sklearn.svm._base.BaseLibSVM._sparse_fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_sparse_fit/X", + "name": "X", + "qname": "sklearn.svm._base.BaseLibSVM._sparse_fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_sparse_fit/y", + "name": "y", + "qname": "sklearn.svm._base.BaseLibSVM._sparse_fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_sparse_fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.svm._base.BaseLibSVM._sparse_fit.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_sparse_fit/solver_type", + "name": "solver_type", + "qname": "sklearn.svm._base.BaseLibSVM._sparse_fit.solver_type", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_sparse_fit/kernel", + "name": "kernel", + "qname": "sklearn.svm._base.BaseLibSVM._sparse_fit.kernel", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_sparse_fit/random_seed", + "name": "random_seed", + "qname": "sklearn.svm._base.BaseLibSVM._sparse_fit.random_seed", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _sparse_fit(self, X, y, sample_weight, solver_type, kernel,\n random_seed):\n X.data = np.asarray(X.data, dtype=np.float64, order='C')\n X.sort_indices()\n\n kernel_type = self._sparse_kernels.index(kernel)\n\n libsvm_sparse.set_verbosity_wrap(self.verbose)\n\n self.support_, self.support_vectors_, dual_coef_data, \\\n self.intercept_, self._n_support, \\\n self._probA, self._probB, self.fit_status_ = \\\n libsvm_sparse.libsvm_sparse_train(\n X.shape[1], X.data, X.indices, X.indptr, y, solver_type,\n kernel_type, self.degree, self._gamma, self.coef0, self.tol,\n self.C, self.class_weight_,\n sample_weight, self.nu, self.cache_size, self.epsilon,\n int(self.shrinking), int(self.probability), self.max_iter,\n random_seed)\n\n self._warn_from_fit_status()\n\n if hasattr(self, \"classes_\"):\n n_class = len(self.classes_) - 1\n else: # regression\n n_class = 1\n n_SV = self.support_vectors_.shape[0]\n\n dual_coef_indices = np.tile(np.arange(n_SV), n_class)\n if not n_SV:\n self.dual_coef_ = sp.csr_matrix([])\n else:\n dual_coef_indptr = np.arange(0, dual_coef_indices.size + 1,\n dual_coef_indices.size / n_class)\n self.dual_coef_ = sp.csr_matrix(\n (dual_coef_data, dual_coef_indices, dual_coef_indptr),\n (n_class, n_SV))" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_sparse_predict", + "name": "_sparse_predict", + "qname": "sklearn.svm._base.BaseLibSVM._sparse_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_sparse_predict/self", + "name": "self", + "qname": "sklearn.svm._base.BaseLibSVM._sparse_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_sparse_predict/X", + "name": "X", + "qname": "sklearn.svm._base.BaseLibSVM._sparse_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _sparse_predict(self, X):\n # Precondition: X is a csr_matrix of dtype np.float64.\n kernel = self.kernel\n if callable(kernel):\n kernel = 'precomputed'\n\n kernel_type = self._sparse_kernels.index(kernel)\n\n C = 0.0 # C is not useful here\n\n return libsvm_sparse.libsvm_sparse_predict(\n X.data, X.indices, X.indptr,\n self.support_vectors_.data,\n self.support_vectors_.indices,\n self.support_vectors_.indptr,\n self._dual_coef_.data, self._intercept_,\n LIBSVM_IMPL.index(self._impl), kernel_type,\n self.degree, self._gamma, self.coef0, self.tol,\n C, self.class_weight_,\n self.nu, self.epsilon, self.shrinking,\n self.probability, self._n_support,\n self._probA, self._probB)" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_validate_for_predict", + "name": "_validate_for_predict", + "qname": "sklearn.svm._base.BaseLibSVM._validate_for_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_validate_for_predict/self", + "name": "self", + "qname": "sklearn.svm._base.BaseLibSVM._validate_for_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_validate_for_predict/X", + "name": "X", + "qname": "sklearn.svm._base.BaseLibSVM._validate_for_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _validate_for_predict(self, X):\n check_is_fitted(self)\n\n if not callable(self.kernel):\n X = check_array(X, accept_sparse='csr', dtype=np.float64,\n order=\"C\", accept_large_sparse=False)\n\n if self._sparse and not sp.isspmatrix(X):\n X = sp.csr_matrix(X)\n if self._sparse:\n X.sort_indices()\n\n if sp.issparse(X) and not self._sparse and not callable(self.kernel):\n raise ValueError(\n \"cannot use sparse input in %r trained on dense data\"\n % type(self).__name__)\n\n if self.kernel == \"precomputed\":\n if X.shape[1] != self.shape_fit_[0]:\n raise ValueError(\"X.shape[1] = %d should be equal to %d, \"\n \"the number of samples at training time\" %\n (X.shape[1], self.shape_fit_[0]))\n elif not callable(self.kernel) and X.shape[1] != self.shape_fit_[1]:\n raise ValueError(\"X.shape[1] = %d should be equal to %d, \"\n \"the number of features at training time\" %\n (X.shape[1], self.shape_fit_[1]))\n return X" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_validate_targets", + "name": "_validate_targets", + "qname": "sklearn.svm._base.BaseLibSVM._validate_targets", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_validate_targets/self", + "name": "self", + "qname": "sklearn.svm._base.BaseLibSVM._validate_targets.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_validate_targets/y", + "name": "y", + "qname": "sklearn.svm._base.BaseLibSVM._validate_targets.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Validation of y and class_weight.\n\nDefault implementation for SVR and one-class; overridden in BaseSVC.", + "docstring": "Validation of y and class_weight.\n\nDefault implementation for SVR and one-class; overridden in BaseSVC.", + "code": " def _validate_targets(self, y):\n \"\"\"Validation of y and class_weight.\n\n Default implementation for SVR and one-class; overridden in BaseSVC.\n \"\"\"\n # XXX this is ugly.\n # Regression models should not have a class_weight_ attribute.\n self.class_weight_ = np.empty(0)\n return column_or_1d(y, warn=True).astype(np.float64, copy=False)" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_warn_from_fit_status", + "name": "_warn_from_fit_status", + "qname": "sklearn.svm._base.BaseLibSVM._warn_from_fit_status", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/_warn_from_fit_status/self", + "name": "self", + "qname": "sklearn.svm._base.BaseLibSVM._warn_from_fit_status.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _warn_from_fit_status(self):\n assert self.fit_status_ in (0, 1)\n if self.fit_status_ == 1:\n warnings.warn('Solver terminated early (max_iter=%i).'\n ' Consider pre-processing your data with'\n ' StandardScaler or MinMaxScaler.'\n % self.max_iter, ConvergenceWarning)" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/coef_@getter", + "name": "coef_", + "qname": "sklearn.svm._base.BaseLibSVM.coef_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/coef_/self", + "name": "self", + "qname": "sklearn.svm._base.BaseLibSVM.coef_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def coef_(self):\n if self.kernel != 'linear':\n raise AttributeError('coef_ is only available when using a '\n 'linear kernel')\n\n coef = self._get_coef()\n\n # coef_ being a read-only property, it's better to mark the value as\n # immutable to avoid hiding potential bugs for the unsuspecting user.\n if sp.issparse(coef):\n # sparse matrix do not have global flags\n coef.data.flags.writeable = False\n else:\n # regular dense array\n coef.flags.writeable = False\n return coef" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/fit", + "name": "fit", + "qname": "sklearn.svm._base.BaseLibSVM.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/fit/self", + "name": "self", + "qname": "sklearn.svm._base.BaseLibSVM.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/fit/X", + "name": "X", + "qname": "sklearn.svm._base.BaseLibSVM.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples)", + "default_value": "", + "description": "Training vectors, where n_samples is the number of samples\nand n_features is the number of features.\nFor kernel=\"precomputed\", the expected shape of X is\n(n_samples, n_samples)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features) or (n_samples, n_samples)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/fit/y", + "name": "y", + "qname": "sklearn.svm._base.BaseLibSVM.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target values (class labels in classification, real numbers in\nregression)." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.svm._base.BaseLibSVM.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Per-sample weights. Rescale C per sample. Higher weights\nforce the classifier to put more emphasis on these points." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the SVM model according to the given training data.", + "docstring": "Fit the SVM model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples)\n Training vectors, where n_samples is the number of samples\n and n_features is the number of features.\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples, n_samples).\n\ny : array-like of shape (n_samples,)\n Target values (class labels in classification, real numbers in\n regression).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Per-sample weights. Rescale C per sample. Higher weights\n force the classifier to put more emphasis on these points.\n\nReturns\n-------\nself : object\n\nNotes\n-----\nIf X and y are not C-ordered and contiguous arrays of np.float64 and\nX is not a scipy.sparse.csr_matrix, X and/or y may be copied.\n\nIf X is a dense array, then the other methods will not support sparse\nmatrices as input.", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the SVM model according to the given training data.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) \\\n or (n_samples, n_samples)\n Training vectors, where n_samples is the number of samples\n and n_features is the number of features.\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples, n_samples).\n\n y : array-like of shape (n_samples,)\n Target values (class labels in classification, real numbers in\n regression).\n\n sample_weight : array-like of shape (n_samples,), default=None\n Per-sample weights. Rescale C per sample. Higher weights\n force the classifier to put more emphasis on these points.\n\n Returns\n -------\n self : object\n\n Notes\n -----\n If X and y are not C-ordered and contiguous arrays of np.float64 and\n X is not a scipy.sparse.csr_matrix, X and/or y may be copied.\n\n If X is a dense array, then the other methods will not support sparse\n matrices as input.\n \"\"\"\n\n rnd = check_random_state(self.random_state)\n\n sparse = sp.isspmatrix(X)\n if sparse and self.kernel == \"precomputed\":\n raise TypeError(\"Sparse precomputed kernels are not supported.\")\n self._sparse = sparse and not callable(self.kernel)\n\n if hasattr(self, 'decision_function_shape'):\n if self.decision_function_shape not in ('ovr', 'ovo'):\n raise ValueError(\n f\"decision_function_shape must be either 'ovr' or 'ovo', \"\n f\"got {self.decision_function_shape}.\"\n )\n\n if callable(self.kernel):\n check_consistent_length(X, y)\n else:\n X, y = self._validate_data(X, y, dtype=np.float64,\n order='C', accept_sparse='csr',\n accept_large_sparse=False)\n\n y = self._validate_targets(y)\n\n sample_weight = np.asarray([]\n if sample_weight is None\n else sample_weight, dtype=np.float64)\n solver_type = LIBSVM_IMPL.index(self._impl)\n\n # input validation\n n_samples = _num_samples(X)\n if solver_type != 2 and n_samples != y.shape[0]:\n raise ValueError(\"X and y have incompatible shapes.\\n\" +\n \"X has %s samples, but y has %s.\" %\n (n_samples, y.shape[0]))\n\n if self.kernel == \"precomputed\" and n_samples != X.shape[1]:\n raise ValueError(\"Precomputed matrix must be a square matrix.\"\n \" Input is a {}x{} matrix.\"\n .format(X.shape[0], X.shape[1]))\n\n if sample_weight.shape[0] > 0 and sample_weight.shape[0] != n_samples:\n raise ValueError(\"sample_weight and X have incompatible shapes: \"\n \"%r vs %r\\n\"\n \"Note: Sparse matrices cannot be indexed w/\"\n \"boolean masks (use `indices=True` in CV).\"\n % (sample_weight.shape, X.shape))\n\n kernel = 'precomputed' if callable(self.kernel) else self.kernel\n\n if kernel == 'precomputed':\n # unused but needs to be a float for cython code that ignores\n # it anyway\n self._gamma = 0.\n elif isinstance(self.gamma, str):\n if self.gamma == 'scale':\n # var = E[X^2] - E[X]^2 if sparse\n X_var = ((X.multiply(X)).mean() - (X.mean()) ** 2\n if sparse else X.var())\n self._gamma = 1.0 / (X.shape[1] * X_var) if X_var != 0 else 1.0\n elif self.gamma == 'auto':\n self._gamma = 1.0 / X.shape[1]\n else:\n raise ValueError(\n \"When 'gamma' is a string, it should be either 'scale' or \"\n \"'auto'. Got '{}' instead.\".format(self.gamma)\n )\n else:\n self._gamma = self.gamma\n\n fit = self._sparse_fit if self._sparse else self._dense_fit\n if self.verbose:\n print('[LibSVM]', end='')\n\n seed = rnd.randint(np.iinfo('i').max)\n fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)\n # see comment on the other call to np.iinfo in this file\n\n self.shape_fit_ = X.shape if hasattr(X, \"shape\") else (n_samples, )\n\n # In binary case, we need to flip the sign of coef, intercept and\n # decision function. Use self._intercept_ and self._dual_coef_\n # internally.\n self._intercept_ = self.intercept_.copy()\n self._dual_coef_ = self.dual_coef_\n if self._impl in ['c_svc', 'nu_svc'] and len(self.classes_) == 2:\n self.intercept_ *= -1\n self.dual_coef_ = -self.dual_coef_\n\n return self" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/n_support_@getter", + "name": "n_support_", + "qname": "sklearn.svm._base.BaseLibSVM.n_support_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/n_support_/self", + "name": "self", + "qname": "sklearn.svm._base.BaseLibSVM.n_support_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def n_support_(self):\n try:\n check_is_fitted(self)\n except NotFittedError:\n raise AttributeError\n\n svm_type = LIBSVM_IMPL.index(self._impl)\n if svm_type in (0, 1):\n return self._n_support\n else:\n # SVR and OneClass\n # _n_support has size 2, we make it size 1\n return np.array([self._n_support[0]])" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/predict", + "name": "predict", + "qname": "sklearn.svm._base.BaseLibSVM.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/predict/self", + "name": "self", + "qname": "sklearn.svm._base.BaseLibSVM.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseLibSVM/predict/X", + "name": "X", + "qname": "sklearn.svm._base.BaseLibSVM.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "For kernel=\"precomputed\", the expected shape of X is\n(n_samples_test, n_samples_train)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform regression on samples in X.\n\nFor an one-class model, +1 (inlier) or -1 (outlier) is returned.", + "docstring": "Perform regression on samples in X.\n\nFor an one-class model, +1 (inlier) or -1 (outlier) is returned.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,)", + "code": " def predict(self, X):\n \"\"\"Perform regression on samples in X.\n\n For an one-class model, +1 (inlier) or -1 (outlier) is returned.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\n Returns\n -------\n y_pred : ndarray of shape (n_samples,)\n \"\"\"\n X = self._validate_for_predict(X)\n predict = self._sparse_predict if self._sparse else self._dense_predict\n return predict(X)" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/__init__", + "name": "__init__", + "qname": "sklearn.svm._base.BaseSVC.__init__", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/__init__/self", + "name": "self", + "qname": "sklearn.svm._base.BaseSVC.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/__init__/kernel", + "name": "kernel", + "qname": "sklearn.svm._base.BaseSVC.__init__.kernel", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/__init__/degree", + "name": "degree", + "qname": "sklearn.svm._base.BaseSVC.__init__.degree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/__init__/gamma", + "name": "gamma", + "qname": "sklearn.svm._base.BaseSVC.__init__.gamma", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/__init__/coef0", + "name": "coef0", + "qname": "sklearn.svm._base.BaseSVC.__init__.coef0", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/__init__/tol", + "name": "tol", + "qname": "sklearn.svm._base.BaseSVC.__init__.tol", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/__init__/C", + "name": "C", + "qname": "sklearn.svm._base.BaseSVC.__init__.C", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/__init__/nu", + "name": "nu", + "qname": "sklearn.svm._base.BaseSVC.__init__.nu", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/__init__/shrinking", + "name": "shrinking", + "qname": "sklearn.svm._base.BaseSVC.__init__.shrinking", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/__init__/probability", + "name": "probability", + "qname": "sklearn.svm._base.BaseSVC.__init__.probability", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/__init__/cache_size", + "name": "cache_size", + "qname": "sklearn.svm._base.BaseSVC.__init__.cache_size", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/__init__/class_weight", + "name": "class_weight", + "qname": "sklearn.svm._base.BaseSVC.__init__.class_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/__init__/verbose", + "name": "verbose", + "qname": "sklearn.svm._base.BaseSVC.__init__.verbose", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.svm._base.BaseSVC.__init__.max_iter", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/__init__/decision_function_shape", + "name": "decision_function_shape", + "qname": "sklearn.svm._base.BaseSVC.__init__.decision_function_shape", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/__init__/random_state", + "name": "random_state", + "qname": "sklearn.svm._base.BaseSVC.__init__.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/__init__/break_ties", + "name": "break_ties", + "qname": "sklearn.svm._base.BaseSVC.__init__.break_ties", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "ABC for LibSVM-based classifiers.", + "docstring": "", + "code": " @abstractmethod\n def __init__(self, kernel, degree, gamma, coef0, tol, C, nu,\n shrinking, probability, cache_size, class_weight, verbose,\n max_iter, decision_function_shape, random_state,\n break_ties):\n self.decision_function_shape = decision_function_shape\n self.break_ties = break_ties\n super().__init__(\n kernel=kernel, degree=degree, gamma=gamma,\n coef0=coef0, tol=tol, C=C, nu=nu, epsilon=0., shrinking=shrinking,\n probability=probability, cache_size=cache_size,\n class_weight=class_weight, verbose=verbose, max_iter=max_iter,\n random_state=random_state)" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/_check_proba", + "name": "_check_proba", + "qname": "sklearn.svm._base.BaseSVC._check_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/_check_proba/self", + "name": "self", + "qname": "sklearn.svm._base.BaseSVC._check_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _check_proba(self):\n if not self.probability:\n raise AttributeError(\"predict_proba is not available when \"\n \" probability=False\")\n if self._impl not in ('c_svc', 'nu_svc'):\n raise AttributeError(\"predict_proba only implemented for SVC\"\n \" and NuSVC\")" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/_dense_predict_proba", + "name": "_dense_predict_proba", + "qname": "sklearn.svm._base.BaseSVC._dense_predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/_dense_predict_proba/self", + "name": "self", + "qname": "sklearn.svm._base.BaseSVC._dense_predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/_dense_predict_proba/X", + "name": "X", + "qname": "sklearn.svm._base.BaseSVC._dense_predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _dense_predict_proba(self, X):\n X = self._compute_kernel(X)\n\n kernel = self.kernel\n if callable(kernel):\n kernel = 'precomputed'\n\n svm_type = LIBSVM_IMPL.index(self._impl)\n pprob = libsvm.predict_proba(\n X, self.support_, self.support_vectors_, self._n_support,\n self._dual_coef_, self._intercept_,\n self._probA, self._probB,\n svm_type=svm_type, kernel=kernel, degree=self.degree,\n cache_size=self.cache_size, coef0=self.coef0, gamma=self._gamma)\n\n return pprob" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/_get_coef", + "name": "_get_coef", + "qname": "sklearn.svm._base.BaseSVC._get_coef", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/_get_coef/self", + "name": "self", + "qname": "sklearn.svm._base.BaseSVC._get_coef.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_coef(self):\n if self.dual_coef_.shape[0] == 1:\n # binary classifier\n coef = safe_sparse_dot(self.dual_coef_, self.support_vectors_)\n else:\n # 1vs1 classifier\n coef = _one_vs_one_coef(self.dual_coef_, self._n_support,\n self.support_vectors_)\n if sp.issparse(coef[0]):\n coef = sp.vstack(coef).tocsr()\n else:\n coef = np.vstack(coef)\n\n return coef" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/_predict_log_proba", + "name": "_predict_log_proba", + "qname": "sklearn.svm._base.BaseSVC._predict_log_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/_predict_log_proba/self", + "name": "self", + "qname": "sklearn.svm._base.BaseSVC._predict_log_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/_predict_log_proba/X", + "name": "X", + "qname": "sklearn.svm._base.BaseSVC._predict_log_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _predict_log_proba(self, X):\n return np.log(self.predict_proba(X))" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/_predict_proba", + "name": "_predict_proba", + "qname": "sklearn.svm._base.BaseSVC._predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/_predict_proba/self", + "name": "self", + "qname": "sklearn.svm._base.BaseSVC._predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/_predict_proba/X", + "name": "X", + "qname": "sklearn.svm._base.BaseSVC._predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _predict_proba(self, X):\n X = self._validate_for_predict(X)\n if self.probA_.size == 0 or self.probB_.size == 0:\n raise NotFittedError(\"predict_proba is not available when fitted \"\n \"with probability=False\")\n pred_proba = (self._sparse_predict_proba\n if self._sparse else self._dense_predict_proba)\n return pred_proba(X)" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/_sparse_predict_proba", + "name": "_sparse_predict_proba", + "qname": "sklearn.svm._base.BaseSVC._sparse_predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/_sparse_predict_proba/self", + "name": "self", + "qname": "sklearn.svm._base.BaseSVC._sparse_predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/_sparse_predict_proba/X", + "name": "X", + "qname": "sklearn.svm._base.BaseSVC._sparse_predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _sparse_predict_proba(self, X):\n X.data = np.asarray(X.data, dtype=np.float64, order='C')\n\n kernel = self.kernel\n if callable(kernel):\n kernel = 'precomputed'\n\n kernel_type = self._sparse_kernels.index(kernel)\n\n return libsvm_sparse.libsvm_sparse_predict_proba(\n X.data, X.indices, X.indptr,\n self.support_vectors_.data,\n self.support_vectors_.indices,\n self.support_vectors_.indptr,\n self._dual_coef_.data, self._intercept_,\n LIBSVM_IMPL.index(self._impl), kernel_type,\n self.degree, self._gamma, self.coef0, self.tol,\n self.C, self.class_weight_,\n self.nu, self.epsilon, self.shrinking,\n self.probability, self._n_support,\n self._probA, self._probB)" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/_validate_targets", + "name": "_validate_targets", + "qname": "sklearn.svm._base.BaseSVC._validate_targets", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/_validate_targets/self", + "name": "self", + "qname": "sklearn.svm._base.BaseSVC._validate_targets.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/_validate_targets/y", + "name": "y", + "qname": "sklearn.svm._base.BaseSVC._validate_targets.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _validate_targets(self, y):\n y_ = column_or_1d(y, warn=True)\n check_classification_targets(y)\n cls, y = np.unique(y_, return_inverse=True)\n self.class_weight_ = compute_class_weight(self.class_weight,\n classes=cls, y=y_)\n if len(cls) < 2:\n raise ValueError(\n \"The number of classes has to be greater than one; got %d\"\n \" class\" % len(cls))\n\n self.classes_ = cls\n\n return np.asarray(y, dtype=np.float64, order='C')" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/decision_function", + "name": "decision_function", + "qname": "sklearn.svm._base.BaseSVC.decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/decision_function/self", + "name": "self", + "qname": "sklearn.svm._base.BaseSVC.decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/decision_function/X", + "name": "X", + "qname": "sklearn.svm._base.BaseSVC.decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Evaluates the decision function for the samples in X.", + "docstring": "Evaluates the decision function for the samples in X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_classes * (n_classes-1) / 2)\n Returns the decision function of the sample for each class\n in the model.\n If decision_function_shape='ovr', the shape is (n_samples,\n n_classes).\n\nNotes\n-----\nIf decision_function_shape='ovo', the function values are proportional\nto the distance of the samples X to the separating hyperplane. If the\nexact distances are required, divide the function values by the norm of\nthe weight vector (``coef_``). See also `this question\n`_ for further details.\nIf decision_function_shape='ovr', the decision function is a monotonic\ntransformation of ovo decision function.", + "code": " def decision_function(self, X):\n \"\"\"Evaluates the decision function for the samples in X.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n\n Returns\n -------\n X : ndarray of shape (n_samples, n_classes * (n_classes-1) / 2)\n Returns the decision function of the sample for each class\n in the model.\n If decision_function_shape='ovr', the shape is (n_samples,\n n_classes).\n\n Notes\n -----\n If decision_function_shape='ovo', the function values are proportional\n to the distance of the samples X to the separating hyperplane. If the\n exact distances are required, divide the function values by the norm of\n the weight vector (``coef_``). See also `this question\n `_ for further details.\n If decision_function_shape='ovr', the decision function is a monotonic\n transformation of ovo decision function.\n \"\"\"\n dec = self._decision_function(X)\n if self.decision_function_shape == 'ovr' and len(self.classes_) > 2:\n return _ovr_decision_function(dec < 0, -dec, len(self.classes_))\n return dec" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/predict", + "name": "predict", + "qname": "sklearn.svm._base.BaseSVC.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/predict/self", + "name": "self", + "qname": "sklearn.svm._base.BaseSVC.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/predict/X", + "name": "X", + "qname": "sklearn.svm._base.BaseSVC.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples_test, n_samples_train)", + "default_value": "", + "description": "For kernel=\"precomputed\", the expected shape of X is\n(n_samples_test, n_samples_train)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features) or (n_samples_test, n_samples_train)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform classification on samples in X.\n\nFor an one-class model, +1 or -1 is returned.", + "docstring": "Perform classification on samples in X.\n\nFor an one-class model, +1 or -1 is returned.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples_test, n_samples_train)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,)\n Class labels for samples in X.", + "code": " def predict(self, X):\n \"\"\"Perform classification on samples in X.\n\n For an one-class model, +1 or -1 is returned.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n (n_samples_test, n_samples_train)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\n Returns\n -------\n y_pred : ndarray of shape (n_samples,)\n Class labels for samples in X.\n \"\"\"\n check_is_fitted(self)\n if self.break_ties and self.decision_function_shape == 'ovo':\n raise ValueError(\"break_ties must be False when \"\n \"decision_function_shape is 'ovo'\")\n\n if (self.break_ties\n and self.decision_function_shape == 'ovr'\n and len(self.classes_) > 2):\n y = np.argmax(self.decision_function(X), axis=1)\n else:\n y = super().predict(X)\n return self.classes_.take(np.asarray(y, dtype=np.intp))" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/predict_log_proba@getter", + "name": "predict_log_proba", + "qname": "sklearn.svm._base.BaseSVC.predict_log_proba", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/predict_log_proba/self", + "name": "self", + "qname": "sklearn.svm._base.BaseSVC.predict_log_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute log probabilities of possible outcomes for samples in X.\n\nThe model need to have probability information computed at training\ntime: fit with attribute `probability` set to True.", + "docstring": "Compute log probabilities of possible outcomes for samples in X.\n\nThe model need to have probability information computed at training\ntime: fit with attribute `probability` set to True.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or (n_samples_test, n_samples_train)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\nReturns\n-------\nT : ndarray of shape (n_samples, n_classes)\n Returns the log-probabilities of the sample for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`.\n\nNotes\n-----\nThe probability model is created using cross validation, so\nthe results can be slightly different than those obtained by\npredict. Also, it will produce meaningless results on very small\ndatasets.", + "code": " @property\n def predict_log_proba(self):\n \"\"\"Compute log probabilities of possible outcomes for samples in X.\n\n The model need to have probability information computed at training\n time: fit with attribute `probability` set to True.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features) or \\\n (n_samples_test, n_samples_train)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\n Returns\n -------\n T : ndarray of shape (n_samples, n_classes)\n Returns the log-probabilities of the sample for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`.\n\n Notes\n -----\n The probability model is created using cross validation, so\n the results can be slightly different than those obtained by\n predict. Also, it will produce meaningless results on very small\n datasets.\n \"\"\"\n self._check_proba()\n return self._predict_log_proba" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/predict_proba@getter", + "name": "predict_proba", + "qname": "sklearn.svm._base.BaseSVC.predict_proba", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/predict_proba/self", + "name": "self", + "qname": "sklearn.svm._base.BaseSVC.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute probabilities of possible outcomes for samples in X.\n\nThe model need to have probability information computed at training\ntime: fit with attribute `probability` set to True.", + "docstring": "Compute probabilities of possible outcomes for samples in X.\n\nThe model need to have probability information computed at training\ntime: fit with attribute `probability` set to True.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\nReturns\n-------\nT : ndarray of shape (n_samples, n_classes)\n Returns the probability of the sample for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`.\n\nNotes\n-----\nThe probability model is created using cross validation, so\nthe results can be slightly different than those obtained by\npredict. Also, it will produce meaningless results on very small\ndatasets.", + "code": " @property\n def predict_proba(self):\n \"\"\"Compute probabilities of possible outcomes for samples in X.\n\n The model need to have probability information computed at training\n time: fit with attribute `probability` set to True.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\n Returns\n -------\n T : ndarray of shape (n_samples, n_classes)\n Returns the probability of the sample for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`.\n\n Notes\n -----\n The probability model is created using cross validation, so\n the results can be slightly different than those obtained by\n predict. Also, it will produce meaningless results on very small\n datasets.\n \"\"\"\n self._check_proba()\n return self._predict_proba" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/probA_@getter", + "name": "probA_", + "qname": "sklearn.svm._base.BaseSVC.probA_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/probA_/self", + "name": "self", + "qname": "sklearn.svm._base.BaseSVC.probA_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def probA_(self):\n return self._probA" + }, + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/probB_@getter", + "name": "probB_", + "qname": "sklearn.svm._base.BaseSVC.probB_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/BaseSVC/probB_/self", + "name": "self", + "qname": "sklearn.svm._base.BaseSVC.probB_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @property\n def probB_(self):\n return self._probB" + }, + { + "id": "scikit-learn/sklearn.svm._base/_fit_liblinear", + "name": "_fit_liblinear", + "qname": "sklearn.svm._base._fit_liblinear", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/_fit_liblinear/X", + "name": "X", + "qname": "sklearn.svm._base._fit_liblinear.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples in the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.svm._base/_fit_liblinear/y", + "name": "y", + "qname": "sklearn.svm._base._fit_liblinear.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target vector relative to X" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.svm._base/_fit_liblinear/C", + "name": "C", + "qname": "sklearn.svm._base._fit_liblinear.C", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Inverse of cross-validation parameter. Lower the C, the more\nthe penalization." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._base/_fit_liblinear/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.svm._base._fit_liblinear.fit_intercept", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "Whether or not to fit the intercept, that is to add a intercept\nterm to the decision function." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.svm._base/_fit_liblinear/intercept_scaling", + "name": "intercept_scaling", + "qname": "sklearn.svm._base._fit_liblinear.intercept_scaling", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "LibLinear internally penalizes the intercept and this term is subject\nto regularization just like the other terms of the feature vector.\nIn order to avoid this, one should increase the intercept_scaling.\nsuch that the feature vector becomes [x, intercept_scaling]." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._base/_fit_liblinear/class_weight", + "name": "class_weight", + "qname": "sklearn.svm._base._fit_liblinear.class_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict or 'balanced'", + "default_value": "None", + "description": "Weights associated with classes in the form ``{class_label: weight}``.\nIf not given, all classes are supposed to have weight one. For\nmulti-output problems, a list of dicts can be provided in the same\norder as the columns of y.\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "dict" + }, + { + "kind": "NamedType", + "name": "'balanced'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.svm._base/_fit_liblinear/penalty", + "name": "penalty", + "qname": "sklearn.svm._base._fit_liblinear.penalty", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'l1', 'l2'}", + "default_value": "", + "description": "The norm of the penalty used in regularization." + }, + "type": { + "kind": "EnumType", + "values": ["l2", "l1"] + } + }, + { + "id": "scikit-learn/sklearn.svm._base/_fit_liblinear/dual", + "name": "dual", + "qname": "sklearn.svm._base._fit_liblinear.dual", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "Dual or primal formulation," + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.svm._base/_fit_liblinear/verbose", + "name": "verbose", + "qname": "sklearn.svm._base._fit_liblinear.verbose", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Set verbose to any positive number for verbosity." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.svm._base/_fit_liblinear/max_iter", + "name": "max_iter", + "qname": "sklearn.svm._base._fit_liblinear.max_iter", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.svm._base/_fit_liblinear/tol", + "name": "tol", + "qname": "sklearn.svm._base._fit_liblinear.tol", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Stopping condition." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._base/_fit_liblinear/random_state", + "name": "random_state", + "qname": "sklearn.svm._base._fit_liblinear.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the pseudo random number generation for shuffling the data.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.svm._base/_fit_liblinear/multi_class", + "name": "multi_class", + "qname": "sklearn.svm._base._fit_liblinear.multi_class", + "default_value": "'ovr'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'ovr', 'crammer_singer'}", + "default_value": "'ovr'", + "description": "`ovr` trains n_classes one-vs-rest classifiers, while `crammer_singer`\noptimizes a joint objective over all classes.\nWhile `crammer_singer` is interesting from an theoretical perspective\nas it is consistent it is seldom used in practice and rarely leads to\nbetter accuracy and is more expensive to compute.\nIf `crammer_singer` is chosen, the options loss, penalty and dual will\nbe ignored." + }, + "type": { + "kind": "EnumType", + "values": ["crammer_singer", "ovr"] + } + }, + { + "id": "scikit-learn/sklearn.svm._base/_fit_liblinear/loss", + "name": "loss", + "qname": "sklearn.svm._base._fit_liblinear.loss", + "default_value": "'logistic_regression'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'logistic_regression', 'hinge', 'squared_hinge', 'epsilon_insensitive', 'squared_epsilon_insensitive}", + "default_value": "'logistic_regression'", + "description": "The loss function used to fit the model." + }, + "type": { + "kind": "EnumType", + "values": ["logistic_regression", "hinge", "squared_hinge", "epsilon_insensitive"] + } + }, + { + "id": "scikit-learn/sklearn.svm._base/_fit_liblinear/epsilon", + "name": "epsilon", + "qname": "sklearn.svm._base._fit_liblinear.epsilon", + "default_value": "0.1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "Epsilon parameter in the epsilon-insensitive loss function. Note\nthat the value of this parameter depends on the scale of the target\nvariable y. If unsure, set epsilon=0." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._base/_fit_liblinear/sample_weight", + "name": "sample_weight", + "qname": "sklearn.svm._base._fit_liblinear.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Weights assigned to each sample." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Used by Logistic Regression (and CV) and LinearSVC/LinearSVR.\n\nPreprocessing is done in this function before supplying it to liblinear.", + "docstring": "Used by Logistic Regression (and CV) and LinearSVC/LinearSVR.\n\nPreprocessing is done in this function before supplying it to liblinear.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target vector relative to X\n\nC : float\n Inverse of cross-validation parameter. Lower the C, the more\n the penalization.\n\nfit_intercept : bool\n Whether or not to fit the intercept, that is to add a intercept\n term to the decision function.\n\nintercept_scaling : float\n LibLinear internally penalizes the intercept and this term is subject\n to regularization just like the other terms of the feature vector.\n In order to avoid this, one should increase the intercept_scaling.\n such that the feature vector becomes [x, intercept_scaling].\n\nclass_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one. For\n multi-output problems, a list of dicts can be provided in the same\n order as the columns of y.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\npenalty : {'l1', 'l2'}\n The norm of the penalty used in regularization.\n\ndual : bool\n Dual or primal formulation,\n\nverbose : int\n Set verbose to any positive number for verbosity.\n\nmax_iter : int\n Number of iterations.\n\ntol : float\n Stopping condition.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generation for shuffling the data.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nmulti_class : {'ovr', 'crammer_singer'}, default='ovr'\n `ovr` trains n_classes one-vs-rest classifiers, while `crammer_singer`\n optimizes a joint objective over all classes.\n While `crammer_singer` is interesting from an theoretical perspective\n as it is consistent it is seldom used in practice and rarely leads to\n better accuracy and is more expensive to compute.\n If `crammer_singer` is chosen, the options loss, penalty and dual will\n be ignored.\n\nloss : {'logistic_regression', 'hinge', 'squared_hinge', 'epsilon_insensitive', 'squared_epsilon_insensitive}, default='logistic_regression'\n The loss function used to fit the model.\n\nepsilon : float, default=0.1\n Epsilon parameter in the epsilon-insensitive loss function. Note\n that the value of this parameter depends on the scale of the target\n variable y. If unsure, set epsilon=0.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights assigned to each sample.\n\nReturns\n-------\ncoef_ : ndarray of shape (n_features, n_features + 1)\n The coefficient vector got by minimizing the objective function.\n\nintercept_ : float\n The intercept term added to the vector.\n\nn_iter_ : int\n Maximum number of iterations run across all classes.", + "code": "def _fit_liblinear(X, y, C, fit_intercept, intercept_scaling, class_weight,\n penalty, dual, verbose, max_iter, tol,\n random_state=None, multi_class='ovr',\n loss='logistic_regression', epsilon=0.1,\n sample_weight=None):\n \"\"\"Used by Logistic Regression (and CV) and LinearSVC/LinearSVR.\n\n Preprocessing is done in this function before supplying it to liblinear.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target vector relative to X\n\n C : float\n Inverse of cross-validation parameter. Lower the C, the more\n the penalization.\n\n fit_intercept : bool\n Whether or not to fit the intercept, that is to add a intercept\n term to the decision function.\n\n intercept_scaling : float\n LibLinear internally penalizes the intercept and this term is subject\n to regularization just like the other terms of the feature vector.\n In order to avoid this, one should increase the intercept_scaling.\n such that the feature vector becomes [x, intercept_scaling].\n\n class_weight : dict or 'balanced', default=None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one. For\n multi-output problems, a list of dicts can be provided in the same\n order as the columns of y.\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data\n as ``n_samples / (n_classes * np.bincount(y))``\n\n penalty : {'l1', 'l2'}\n The norm of the penalty used in regularization.\n\n dual : bool\n Dual or primal formulation,\n\n verbose : int\n Set verbose to any positive number for verbosity.\n\n max_iter : int\n Number of iterations.\n\n tol : float\n Stopping condition.\n\n random_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generation for shuffling the data.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\n multi_class : {'ovr', 'crammer_singer'}, default='ovr'\n `ovr` trains n_classes one-vs-rest classifiers, while `crammer_singer`\n optimizes a joint objective over all classes.\n While `crammer_singer` is interesting from an theoretical perspective\n as it is consistent it is seldom used in practice and rarely leads to\n better accuracy and is more expensive to compute.\n If `crammer_singer` is chosen, the options loss, penalty and dual will\n be ignored.\n\n loss : {'logistic_regression', 'hinge', 'squared_hinge', \\\n 'epsilon_insensitive', 'squared_epsilon_insensitive}, \\\n default='logistic_regression'\n The loss function used to fit the model.\n\n epsilon : float, default=0.1\n Epsilon parameter in the epsilon-insensitive loss function. Note\n that the value of this parameter depends on the scale of the target\n variable y. If unsure, set epsilon=0.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Weights assigned to each sample.\n\n Returns\n -------\n coef_ : ndarray of shape (n_features, n_features + 1)\n The coefficient vector got by minimizing the objective function.\n\n intercept_ : float\n The intercept term added to the vector.\n\n n_iter_ : int\n Maximum number of iterations run across all classes.\n \"\"\"\n if loss not in ['epsilon_insensitive', 'squared_epsilon_insensitive']:\n enc = LabelEncoder()\n y_ind = enc.fit_transform(y)\n classes_ = enc.classes_\n if len(classes_) < 2:\n raise ValueError(\"This solver needs samples of at least 2 classes\"\n \" in the data, but the data contains only one\"\n \" class: %r\" % classes_[0])\n\n class_weight_ = compute_class_weight(class_weight, classes=classes_,\n y=y)\n else:\n class_weight_ = np.empty(0, dtype=np.float64)\n y_ind = y\n liblinear.set_verbosity_wrap(verbose)\n rnd = check_random_state(random_state)\n if verbose:\n print('[LibLinear]', end='')\n\n # LinearSVC breaks when intercept_scaling is <= 0\n bias = -1.0\n if fit_intercept:\n if intercept_scaling <= 0:\n raise ValueError(\"Intercept scaling is %r but needs to be greater \"\n \"than 0. To disable fitting an intercept,\"\n \" set fit_intercept=False.\" % intercept_scaling)\n else:\n bias = intercept_scaling\n\n libsvm.set_verbosity_wrap(verbose)\n libsvm_sparse.set_verbosity_wrap(verbose)\n liblinear.set_verbosity_wrap(verbose)\n\n # Liblinear doesn't support 64bit sparse matrix indices yet\n if sp.issparse(X):\n _check_large_sparse(X)\n\n # LibLinear wants targets as doubles, even for classification\n y_ind = np.asarray(y_ind, dtype=np.float64).ravel()\n y_ind = np.require(y_ind, requirements=\"W\")\n\n sample_weight = _check_sample_weight(sample_weight, X,\n dtype=np.float64)\n\n solver_type = _get_liblinear_solver_type(multi_class, penalty, loss, dual)\n raw_coef_, n_iter_ = liblinear.train_wrap(\n X, y_ind, sp.isspmatrix(X), solver_type, tol, bias, C,\n class_weight_, max_iter, rnd.randint(np.iinfo('i').max),\n epsilon, sample_weight)\n # Regarding rnd.randint(..) in the above signature:\n # seed for srand in range [0..INT_MAX); due to limitations in Numpy\n # on 32-bit platforms, we can't get to the UINT_MAX limit that\n # srand supports\n n_iter_ = max(n_iter_)\n if n_iter_ >= max_iter:\n warnings.warn(\"Liblinear failed to converge, increase \"\n \"the number of iterations.\", ConvergenceWarning)\n\n if fit_intercept:\n coef_ = raw_coef_[:, :-1]\n intercept_ = intercept_scaling * raw_coef_[:, -1]\n else:\n coef_ = raw_coef_\n intercept_ = 0.\n\n return coef_, intercept_, n_iter_" + }, + { + "id": "scikit-learn/sklearn.svm._base/_get_liblinear_solver_type", + "name": "_get_liblinear_solver_type", + "qname": "sklearn.svm._base._get_liblinear_solver_type", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/_get_liblinear_solver_type/multi_class", + "name": "multi_class", + "qname": "sklearn.svm._base._get_liblinear_solver_type.multi_class", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/_get_liblinear_solver_type/penalty", + "name": "penalty", + "qname": "sklearn.svm._base._get_liblinear_solver_type.penalty", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/_get_liblinear_solver_type/loss", + "name": "loss", + "qname": "sklearn.svm._base._get_liblinear_solver_type.loss", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/_get_liblinear_solver_type/dual", + "name": "dual", + "qname": "sklearn.svm._base._get_liblinear_solver_type.dual", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Find the liblinear magic number for the solver.\n\nThis number depends on the values of the following attributes:\n - multi_class\n - penalty\n - loss\n - dual\n\nThe same number is also internally used by LibLinear to determine\nwhich solver to use.", + "docstring": "Find the liblinear magic number for the solver.\n\nThis number depends on the values of the following attributes:\n - multi_class\n - penalty\n - loss\n - dual\n\nThe same number is also internally used by LibLinear to determine\nwhich solver to use.", + "code": "def _get_liblinear_solver_type(multi_class, penalty, loss, dual):\n \"\"\"Find the liblinear magic number for the solver.\n\n This number depends on the values of the following attributes:\n - multi_class\n - penalty\n - loss\n - dual\n\n The same number is also internally used by LibLinear to determine\n which solver to use.\n \"\"\"\n # nested dicts containing level 1: available loss functions,\n # level2: available penalties for the given loss function,\n # level3: whether the dual solver is available for the specified\n # combination of loss function and penalty\n _solver_type_dict = {\n 'logistic_regression': {\n 'l1': {False: 6},\n 'l2': {False: 0, True: 7}},\n 'hinge': {\n 'l2': {True: 3}},\n 'squared_hinge': {\n 'l1': {False: 5},\n 'l2': {False: 2, True: 1}},\n 'epsilon_insensitive': {\n 'l2': {True: 13}},\n 'squared_epsilon_insensitive': {\n 'l2': {False: 11, True: 12}},\n 'crammer_singer': 4\n }\n\n if multi_class == 'crammer_singer':\n return _solver_type_dict[multi_class]\n elif multi_class != 'ovr':\n raise ValueError(\"`multi_class` must be one of `ovr`, \"\n \"`crammer_singer`, got %r\" % multi_class)\n\n _solver_pen = _solver_type_dict.get(loss, None)\n if _solver_pen is None:\n error_string = (\"loss='%s' is not supported\" % loss)\n else:\n _solver_dual = _solver_pen.get(penalty, None)\n if _solver_dual is None:\n error_string = (\"The combination of penalty='%s' \"\n \"and loss='%s' is not supported\"\n % (penalty, loss))\n else:\n solver_num = _solver_dual.get(dual, None)\n if solver_num is None:\n error_string = (\"The combination of penalty='%s' and \"\n \"loss='%s' are not supported when dual=%s\"\n % (penalty, loss, dual))\n else:\n return solver_num\n raise ValueError('Unsupported set of arguments: %s, '\n 'Parameters: penalty=%r, loss=%r, dual=%r'\n % (error_string, penalty, loss, dual))" + }, + { + "id": "scikit-learn/sklearn.svm._base/_one_vs_one_coef", + "name": "_one_vs_one_coef", + "qname": "sklearn.svm._base._one_vs_one_coef", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._base/_one_vs_one_coef/dual_coef", + "name": "dual_coef", + "qname": "sklearn.svm._base._one_vs_one_coef.dual_coef", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/_one_vs_one_coef/n_support", + "name": "n_support", + "qname": "sklearn.svm._base._one_vs_one_coef.n_support", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._base/_one_vs_one_coef/support_vectors", + "name": "support_vectors", + "qname": "sklearn.svm._base._one_vs_one_coef.support_vectors", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate primal coefficients from dual coefficients\nfor the one-vs-one multi class LibSVM in the case\nof a linear kernel.", + "docstring": "Generate primal coefficients from dual coefficients\nfor the one-vs-one multi class LibSVM in the case\nof a linear kernel.", + "code": "def _one_vs_one_coef(dual_coef, n_support, support_vectors):\n \"\"\"Generate primal coefficients from dual coefficients\n for the one-vs-one multi class LibSVM in the case\n of a linear kernel.\"\"\"\n\n # get 1vs1 weights for all n*(n-1) classifiers.\n # this is somewhat messy.\n # shape of dual_coef_ is nSV * (n_classes -1)\n # see docs for details\n n_class = dual_coef.shape[0] + 1\n\n # XXX we could do preallocation of coef but\n # would have to take care in the sparse case\n coef = []\n sv_locs = np.cumsum(np.hstack([[0], n_support]))\n for class1 in range(n_class):\n # SVs for class1:\n sv1 = support_vectors[sv_locs[class1]:sv_locs[class1 + 1], :]\n for class2 in range(class1 + 1, n_class):\n # SVs for class1:\n sv2 = support_vectors[sv_locs[class2]:sv_locs[class2 + 1], :]\n\n # dual coef for class1 SVs:\n alpha1 = dual_coef[class2 - 1, sv_locs[class1]:sv_locs[class1 + 1]]\n # dual coef for class2 SVs:\n alpha2 = dual_coef[class1, sv_locs[class2]:sv_locs[class2 + 1]]\n # build weight for class1 vs class2\n\n coef.append(safe_sparse_dot(alpha1, sv1)\n + safe_sparse_dot(alpha2, sv2))\n return coef" + }, + { + "id": "scikit-learn/sklearn.svm._bounds/l1_min_c", + "name": "l1_min_c", + "qname": "sklearn.svm._bounds.l1_min_c", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._bounds/l1_min_c/X", + "name": "X", + "qname": "sklearn.svm._bounds.l1_min_c.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples in the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.svm._bounds/l1_min_c/y", + "name": "y", + "qname": "sklearn.svm._bounds.l1_min_c.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target vector relative to X." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.svm._bounds/l1_min_c/loss", + "name": "loss", + "qname": "sklearn.svm._bounds.l1_min_c.loss", + "default_value": "'squared_hinge'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'squared_hinge', 'log'}", + "default_value": "'squared_hinge'", + "description": "Specifies the loss function.\nWith 'squared_hinge' it is the squared hinge loss (a.k.a. L2 loss).\nWith 'log' it is the loss of logistic regression models." + }, + "type": { + "kind": "EnumType", + "values": ["log", "squared_hinge"] + } + }, + { + "id": "scikit-learn/sklearn.svm._bounds/l1_min_c/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.svm._bounds.l1_min_c.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Specifies if the intercept should be fitted by the model.\nIt must match the fit() method parameter." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.svm._bounds/l1_min_c/intercept_scaling", + "name": "intercept_scaling", + "qname": "sklearn.svm._bounds.l1_min_c.intercept_scaling", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "when fit_intercept is True, instance vector x becomes\n[x, intercept_scaling],\ni.e. a \"synthetic\" feature with constant value equals to\nintercept_scaling is appended to the instance vector.\nIt must match the fit() method parameter." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the lowest bound for C such that for C in (l1_min_C, infinity)\nthe model is guaranteed not to be empty. This applies to l1 penalized\nclassifiers, such as LinearSVC with penalty='l1' and\nlinear_model.LogisticRegression with penalty='l1'.\n\nThis value is valid if class_weight parameter in fit() is not set.", + "docstring": "Return the lowest bound for C such that for C in (l1_min_C, infinity)\nthe model is guaranteed not to be empty. This applies to l1 penalized\nclassifiers, such as LinearSVC with penalty='l1' and\nlinear_model.LogisticRegression with penalty='l1'.\n\nThis value is valid if class_weight parameter in fit() is not set.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target vector relative to X.\n\nloss : {'squared_hinge', 'log'}, default='squared_hinge'\n Specifies the loss function.\n With 'squared_hinge' it is the squared hinge loss (a.k.a. L2 loss).\n With 'log' it is the loss of logistic regression models.\n\nfit_intercept : bool, default=True\n Specifies if the intercept should be fitted by the model.\n It must match the fit() method parameter.\n\nintercept_scaling : float, default=1.0\n when fit_intercept is True, instance vector x becomes\n [x, intercept_scaling],\n i.e. a \"synthetic\" feature with constant value equals to\n intercept_scaling is appended to the instance vector.\n It must match the fit() method parameter.\n\nReturns\n-------\nl1_min_c : float\n minimum value for C", + "code": "@_deprecate_positional_args\ndef l1_min_c(X, y, *, loss='squared_hinge', fit_intercept=True,\n intercept_scaling=1.0):\n \"\"\"\n Return the lowest bound for C such that for C in (l1_min_C, infinity)\n the model is guaranteed not to be empty. This applies to l1 penalized\n classifiers, such as LinearSVC with penalty='l1' and\n linear_model.LogisticRegression with penalty='l1'.\n\n This value is valid if class_weight parameter in fit() is not set.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target vector relative to X.\n\n loss : {'squared_hinge', 'log'}, default='squared_hinge'\n Specifies the loss function.\n With 'squared_hinge' it is the squared hinge loss (a.k.a. L2 loss).\n With 'log' it is the loss of logistic regression models.\n\n fit_intercept : bool, default=True\n Specifies if the intercept should be fitted by the model.\n It must match the fit() method parameter.\n\n intercept_scaling : float, default=1.0\n when fit_intercept is True, instance vector x becomes\n [x, intercept_scaling],\n i.e. a \"synthetic\" feature with constant value equals to\n intercept_scaling is appended to the instance vector.\n It must match the fit() method parameter.\n\n Returns\n -------\n l1_min_c : float\n minimum value for C\n \"\"\"\n if loss not in ('squared_hinge', 'log'):\n raise ValueError('loss type not in (\"squared_hinge\", \"log\")')\n\n X = check_array(X, accept_sparse='csc')\n check_consistent_length(X, y)\n\n Y = LabelBinarizer(neg_label=-1).fit_transform(y).T\n # maximum absolute value over classes and features\n den = np.max(np.abs(safe_sparse_dot(Y, X)))\n if fit_intercept:\n bias = np.full((np.size(y), 1), intercept_scaling,\n dtype=np.array(intercept_scaling).dtype)\n den = max(den, abs(np.dot(Y, bias)).max())\n\n if den == 0.0:\n raise ValueError('Ill-posed l1_min_c calculation: l1 will always '\n 'select zero coefficients for this data')\n if loss == 'squared_hinge':\n return 0.5 / den\n else: # loss == 'log':\n return 2.0 / den" + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVC/__init__", + "name": "__init__", + "qname": "sklearn.svm._classes.LinearSVC.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVC/__init__/self", + "name": "self", + "qname": "sklearn.svm._classes.LinearSVC.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVC/__init__/penalty", + "name": "penalty", + "qname": "sklearn.svm._classes.LinearSVC.__init__.penalty", + "default_value": "'l2'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'l1', 'l2'}", + "default_value": "'l2'", + "description": "Specifies the norm used in the penalization. The 'l2'\npenalty is the standard used in SVC. The 'l1' leads to ``coef_``\nvectors that are sparse." + }, + "type": { + "kind": "EnumType", + "values": ["l2", "l1"] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVC/__init__/loss", + "name": "loss", + "qname": "sklearn.svm._classes.LinearSVC.__init__.loss", + "default_value": "'squared_hinge'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'hinge', 'squared_hinge'}", + "default_value": "'squared_hinge'", + "description": "Specifies the loss function. 'hinge' is the standard SVM loss\n(used e.g. by the SVC class) while 'squared_hinge' is the\nsquare of the hinge loss. The combination of ``penalty='l1'``\nand ``loss='hinge'`` is not supported." + }, + "type": { + "kind": "EnumType", + "values": ["hinge", "squared_hinge"] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVC/__init__/dual", + "name": "dual", + "qname": "sklearn.svm._classes.LinearSVC.__init__.dual", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Select the algorithm to either solve the dual or primal\noptimization problem. Prefer dual=False when n_samples > n_features." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVC/__init__/tol", + "name": "tol", + "qname": "sklearn.svm._classes.LinearSVC.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Tolerance for stopping criteria." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVC/__init__/C", + "name": "C", + "qname": "sklearn.svm._classes.LinearSVC.__init__.C", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Regularization parameter. The strength of the regularization is\ninversely proportional to C. Must be strictly positive." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVC/__init__/multi_class", + "name": "multi_class", + "qname": "sklearn.svm._classes.LinearSVC.__init__.multi_class", + "default_value": "'ovr'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'ovr', 'crammer_singer'}", + "default_value": "'ovr'", + "description": "Determines the multi-class strategy if `y` contains more than\ntwo classes.\n``\"ovr\"`` trains n_classes one-vs-rest classifiers, while\n``\"crammer_singer\"`` optimizes a joint objective over all classes.\nWhile `crammer_singer` is interesting from a theoretical perspective\nas it is consistent, it is seldom used in practice as it rarely leads\nto better accuracy and is more expensive to compute.\nIf ``\"crammer_singer\"`` is chosen, the options loss, penalty and dual\nwill be ignored." + }, + "type": { + "kind": "EnumType", + "values": ["crammer_singer", "ovr"] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVC/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.svm._classes.LinearSVC.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to calculate the intercept for this model. If set\nto false, no intercept will be used in calculations\n(i.e. data is expected to be already centered)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVC/__init__/intercept_scaling", + "name": "intercept_scaling", + "qname": "sklearn.svm._classes.LinearSVC.__init__.intercept_scaling", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1", + "description": "When self.fit_intercept is True, instance vector x becomes\n``[x, self.intercept_scaling]``,\ni.e. a \"synthetic\" feature with constant value equals to\nintercept_scaling is appended to the instance vector.\nThe intercept becomes intercept_scaling * synthetic feature weight\nNote! the synthetic feature weight is subject to l1/l2 regularization\nas all other features.\nTo lessen the effect of regularization on synthetic feature weight\n(and therefore on the intercept) intercept_scaling has to be increased." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVC/__init__/class_weight", + "name": "class_weight", + "qname": "sklearn.svm._classes.LinearSVC.__init__.class_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict or 'balanced'", + "default_value": "None", + "description": "Set the parameter C of class i to ``class_weight[i]*C`` for\nSVC. If not given, all classes are supposed to have\nweight one.\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "dict" + }, + { + "kind": "NamedType", + "name": "'balanced'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVC/__init__/verbose", + "name": "verbose", + "qname": "sklearn.svm._classes.LinearSVC.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Enable verbose output. Note that this setting takes advantage of a\nper-process runtime setting in liblinear that, if enabled, may not work\nproperly in a multithreaded context." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVC/__init__/random_state", + "name": "random_state", + "qname": "sklearn.svm._classes.LinearSVC.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the pseudo random number generation for shuffling the data for\nthe dual coordinate descent (if ``dual=True``). When ``dual=False`` the\nunderlying implementation of :class:`LinearSVC` is not random and\n``random_state`` has no effect on the results.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVC/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.svm._classes.LinearSVC.__init__.max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "The maximum number of iterations to be run." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Linear Support Vector Classification.\n\nSimilar to SVC with parameter kernel='linear', but implemented in terms of\nliblinear rather than libsvm, so it has more flexibility in the choice of\npenalties and loss functions and should scale better to large numbers of\nsamples.\n\nThis class supports both dense and sparse input and the multiclass support\nis handled according to a one-vs-the-rest scheme.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, penalty='l2', loss='squared_hinge', *, dual=True,\n tol=1e-4, C=1.0, multi_class='ovr', fit_intercept=True,\n intercept_scaling=1, class_weight=None, verbose=0,\n random_state=None, max_iter=1000):\n self.dual = dual\n self.tol = tol\n self.C = C\n self.multi_class = multi_class\n self.fit_intercept = fit_intercept\n self.intercept_scaling = intercept_scaling\n self.class_weight = class_weight\n self.verbose = verbose\n self.random_state = random_state\n self.max_iter = max_iter\n self.penalty = penalty\n self.loss = loss" + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVC/_more_tags", + "name": "_more_tags", + "qname": "sklearn.svm._classes.LinearSVC._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVC/_more_tags/self", + "name": "self", + "qname": "sklearn.svm._classes.LinearSVC._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }" + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVC/fit", + "name": "fit", + "qname": "sklearn.svm._classes.LinearSVC.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVC/fit/self", + "name": "self", + "qname": "sklearn.svm._classes.LinearSVC.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVC/fit/X", + "name": "X", + "qname": "sklearn.svm._classes.LinearSVC.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples in the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVC/fit/y", + "name": "y", + "qname": "sklearn.svm._classes.LinearSVC.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target vector relative to X." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVC/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.svm._classes.LinearSVC.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Array of weights that are assigned to individual\nsamples. If not provided,\nthen each sample is given unit weight.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model according to the given training data.", + "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target vector relative to X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Array of weights that are assigned to individual\n samples. If not provided,\n then each sample is given unit weight.\n\n .. versionadded:: 0.18\n\nReturns\n-------\nself : object\n An instance of the estimator.", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the model according to the given training data.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target vector relative to X.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Array of weights that are assigned to individual\n samples. If not provided,\n then each sample is given unit weight.\n\n .. versionadded:: 0.18\n\n Returns\n -------\n self : object\n An instance of the estimator.\n \"\"\"\n if self.C < 0:\n raise ValueError(\"Penalty term must be positive; got (C=%r)\"\n % self.C)\n\n X, y = self._validate_data(X, y, accept_sparse='csr',\n dtype=np.float64, order=\"C\",\n accept_large_sparse=False)\n check_classification_targets(y)\n self.classes_ = np.unique(y)\n\n self.coef_, self.intercept_, self.n_iter_ = _fit_liblinear(\n X, y, self.C, self.fit_intercept, self.intercept_scaling,\n self.class_weight, self.penalty, self.dual, self.verbose,\n self.max_iter, self.tol, self.random_state, self.multi_class,\n self.loss, sample_weight=sample_weight)\n\n if self.multi_class == \"crammer_singer\" and len(self.classes_) == 2:\n self.coef_ = (self.coef_[1] - self.coef_[0]).reshape(1, -1)\n if self.fit_intercept:\n intercept = self.intercept_[1] - self.intercept_[0]\n self.intercept_ = np.array([intercept])\n\n return self" + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVR/__init__", + "name": "__init__", + "qname": "sklearn.svm._classes.LinearSVR.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVR/__init__/self", + "name": "self", + "qname": "sklearn.svm._classes.LinearSVR.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVR/__init__/epsilon", + "name": "epsilon", + "qname": "sklearn.svm._classes.LinearSVR.__init__.epsilon", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "Epsilon parameter in the epsilon-insensitive loss function. Note\nthat the value of this parameter depends on the scale of the target\nvariable y. If unsure, set ``epsilon=0``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVR/__init__/tol", + "name": "tol", + "qname": "sklearn.svm._classes.LinearSVR.__init__.tol", + "default_value": "0.0001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Tolerance for stopping criteria." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVR/__init__/C", + "name": "C", + "qname": "sklearn.svm._classes.LinearSVR.__init__.C", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Regularization parameter. The strength of the regularization is\ninversely proportional to C. Must be strictly positive." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVR/__init__/loss", + "name": "loss", + "qname": "sklearn.svm._classes.LinearSVR.__init__.loss", + "default_value": "'epsilon_insensitive'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'epsilon_insensitive', 'squared_epsilon_insensitive'}", + "default_value": "'epsilon_insensitive'", + "description": "Specifies the loss function. The epsilon-insensitive loss\n(standard SVR) is the L1 loss, while the squared epsilon-insensitive\nloss ('squared_epsilon_insensitive') is the L2 loss." + }, + "type": { + "kind": "EnumType", + "values": ["squared_epsilon_insensitive", "epsilon_insensitive"] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVR/__init__/fit_intercept", + "name": "fit_intercept", + "qname": "sklearn.svm._classes.LinearSVR.__init__.fit_intercept", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to calculate the intercept for this model. If set\nto false, no intercept will be used in calculations\n(i.e. data is expected to be already centered)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVR/__init__/intercept_scaling", + "name": "intercept_scaling", + "qname": "sklearn.svm._classes.LinearSVR.__init__.intercept_scaling", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.", + "description": "When self.fit_intercept is True, instance vector x becomes\n[x, self.intercept_scaling],\ni.e. a \"synthetic\" feature with constant value equals to\nintercept_scaling is appended to the instance vector.\nThe intercept becomes intercept_scaling * synthetic feature weight\nNote! the synthetic feature weight is subject to l1/l2 regularization\nas all other features.\nTo lessen the effect of regularization on synthetic feature weight\n(and therefore on the intercept) intercept_scaling has to be increased." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVR/__init__/dual", + "name": "dual", + "qname": "sklearn.svm._classes.LinearSVR.__init__.dual", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Select the algorithm to either solve the dual or primal\noptimization problem. Prefer dual=False when n_samples > n_features." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVR/__init__/verbose", + "name": "verbose", + "qname": "sklearn.svm._classes.LinearSVR.__init__.verbose", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Enable verbose output. Note that this setting takes advantage of a\nper-process runtime setting in liblinear that, if enabled, may not work\nproperly in a multithreaded context." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVR/__init__/random_state", + "name": "random_state", + "qname": "sklearn.svm._classes.LinearSVR.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the pseudo random number generation for shuffling the data.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVR/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.svm._classes.LinearSVR.__init__.max_iter", + "default_value": "1000", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "1000", + "description": "The maximum number of iterations to be run." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Linear Support Vector Regression.\n\nSimilar to SVR with parameter kernel='linear', but implemented in terms of\nliblinear rather than libsvm, so it has more flexibility in the choice of\npenalties and loss functions and should scale better to large numbers of\nsamples.\n\nThis class supports both dense and sparse input.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.16", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, epsilon=0.0, tol=1e-4, C=1.0,\n loss='epsilon_insensitive', fit_intercept=True,\n intercept_scaling=1., dual=True, verbose=0,\n random_state=None, max_iter=1000):\n self.tol = tol\n self.C = C\n self.epsilon = epsilon\n self.fit_intercept = fit_intercept\n self.intercept_scaling = intercept_scaling\n self.verbose = verbose\n self.random_state = random_state\n self.max_iter = max_iter\n self.dual = dual\n self.loss = loss" + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVR/_more_tags", + "name": "_more_tags", + "qname": "sklearn.svm._classes.LinearSVR._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVR/_more_tags/self", + "name": "self", + "qname": "sklearn.svm._classes.LinearSVR._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }" + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVR/fit", + "name": "fit", + "qname": "sklearn.svm._classes.LinearSVR.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVR/fit/self", + "name": "self", + "qname": "sklearn.svm._classes.LinearSVR.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVR/fit/X", + "name": "X", + "qname": "sklearn.svm._classes.LinearSVR.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples in the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVR/fit/y", + "name": "y", + "qname": "sklearn.svm._classes.LinearSVR.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Target vector relative to X" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/LinearSVR/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.svm._classes.LinearSVR.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Array of weights that are assigned to individual\nsamples. If not provided,\nthen each sample is given unit weight.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit the model according to the given training data.", + "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target vector relative to X\n\nsample_weight : array-like of shape (n_samples,), default=None\n Array of weights that are assigned to individual\n samples. If not provided,\n then each sample is given unit weight.\n\n .. versionadded:: 0.18\n\nReturns\n-------\nself : object\n An instance of the estimator.", + "code": " def fit(self, X, y, sample_weight=None):\n \"\"\"Fit the model according to the given training data.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples in the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples,)\n Target vector relative to X\n\n sample_weight : array-like of shape (n_samples,), default=None\n Array of weights that are assigned to individual\n samples. If not provided,\n then each sample is given unit weight.\n\n .. versionadded:: 0.18\n\n Returns\n -------\n self : object\n An instance of the estimator.\n \"\"\"\n if self.C < 0:\n raise ValueError(\"Penalty term must be positive; got (C=%r)\"\n % self.C)\n\n X, y = self._validate_data(X, y, accept_sparse='csr',\n dtype=np.float64, order=\"C\",\n accept_large_sparse=False)\n penalty = 'l2' # SVR only accepts l2 penalty\n self.coef_, self.intercept_, self.n_iter_ = _fit_liblinear(\n X, y, self.C, self.fit_intercept, self.intercept_scaling,\n None, penalty, self.dual, self.verbose,\n self.max_iter, self.tol, self.random_state, loss=self.loss,\n epsilon=self.epsilon, sample_weight=sample_weight)\n self.coef_ = self.coef_.ravel()\n\n return self" + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVC/__init__", + "name": "__init__", + "qname": "sklearn.svm._classes.NuSVC.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._classes/NuSVC/__init__/self", + "name": "self", + "qname": "sklearn.svm._classes.NuSVC.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVC/__init__/nu", + "name": "nu", + "qname": "sklearn.svm._classes.NuSVC.__init__.nu", + "default_value": "0.5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.5", + "description": "An upper bound on the fraction of margin errors (see :ref:`User Guide\n`) and a lower bound of the fraction of support vectors.\nShould be in the interval (0, 1]." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "BoundaryType", + "base_type": "float", + "min": 0.0, + "max": 1.0, + "min_inclusive": false, + "max_inclusive": true + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVC/__init__/kernel", + "name": "kernel", + "qname": "sklearn.svm._classes.NuSVC.__init__.kernel", + "default_value": "'rbf'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}", + "default_value": "'rbf'", + "description": "Specifies the kernel type to be used in the algorithm.\nIt must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\na callable.\nIf none is given, 'rbf' will be used. If a callable is given it is\nused to precompute the kernel matrix." + }, + "type": { + "kind": "EnumType", + "values": ["sigmoid", "rbf", "linear", "precomputed", "poly"] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVC/__init__/degree", + "name": "degree", + "qname": "sklearn.svm._classes.NuSVC.__init__.degree", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "3", + "description": "Degree of the polynomial kernel function ('poly').\nIgnored by all other kernels." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVC/__init__/gamma", + "name": "gamma", + "qname": "sklearn.svm._classes.NuSVC.__init__.gamma", + "default_value": "'scale'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'scale', 'auto'} or float", + "default_value": "'scale'", + "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features.\n\n.. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["auto", "scale"] + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVC/__init__/coef0", + "name": "coef0", + "qname": "sklearn.svm._classes.NuSVC.__init__.coef0", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "Independent term in kernel function.\nIt is only significant in 'poly' and 'sigmoid'." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVC/__init__/shrinking", + "name": "shrinking", + "qname": "sklearn.svm._classes.NuSVC.__init__.shrinking", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to use the shrinking heuristic.\nSee the :ref:`User Guide `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVC/__init__/probability", + "name": "probability", + "qname": "sklearn.svm._classes.NuSVC.__init__.probability", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to enable probability estimates. This must be enabled prior\nto calling `fit`, will slow down that method as it internally uses\n5-fold cross-validation, and `predict_proba` may be inconsistent with\n`predict`. Read more in the :ref:`User Guide `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVC/__init__/tol", + "name": "tol", + "qname": "sklearn.svm._classes.NuSVC.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "Tolerance for stopping criterion." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVC/__init__/cache_size", + "name": "cache_size", + "qname": "sklearn.svm._classes.NuSVC.__init__.cache_size", + "default_value": "200", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "200", + "description": "Specify the size of the kernel cache (in MB)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVC/__init__/class_weight", + "name": "class_weight", + "qname": "sklearn.svm._classes.NuSVC.__init__.class_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{dict, 'balanced'}", + "default_value": "None", + "description": "Set the parameter C of class i to class_weight[i]*C for\nSVC. If not given, all classes are supposed to have\nweight one. The \"balanced\" mode uses the values of y to automatically\nadjust weights inversely proportional to class frequencies as\n``n_samples / (n_classes * np.bincount(y))``" + }, + "type": { + "kind": "EnumType", + "values": ["balanced"] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVC/__init__/verbose", + "name": "verbose", + "qname": "sklearn.svm._classes.NuSVC.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Enable verbose output. Note that this setting takes advantage of a\nper-process runtime setting in libsvm that, if enabled, may not work\nproperly in a multithreaded context." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVC/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.svm._classes.NuSVC.__init__.max_iter", + "default_value": "-1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "-1", + "description": "Hard limit on iterations within solver, or -1 for no limit." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVC/__init__/decision_function_shape", + "name": "decision_function_shape", + "qname": "sklearn.svm._classes.NuSVC.__init__.decision_function_shape", + "default_value": "'ovr'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'ovo', 'ovr'}", + "default_value": "'ovr'", + "description": "Whether to return a one-vs-rest ('ovr') decision function of shape\n(n_samples, n_classes) as all other classifiers, or the original\none-vs-one ('ovo') decision function of libsvm which has shape\n(n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one\n('ovo') is always used as multi-class strategy. The parameter is\nignored for binary classification.\n\n.. versionchanged:: 0.19\n decision_function_shape is 'ovr' by default.\n\n.. versionadded:: 0.17\n *decision_function_shape='ovr'* is recommended.\n\n.. versionchanged:: 0.17\n Deprecated *decision_function_shape='ovo' and None*." + }, + "type": { + "kind": "EnumType", + "values": ["ovo", "ovr"] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVC/__init__/break_ties", + "name": "break_ties", + "qname": "sklearn.svm._classes.NuSVC.__init__.break_ties", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If true, ``decision_function_shape='ovr'``, and number of classes > 2,\n:term:`predict` will break ties according to the confidence values of\n:term:`decision_function`; otherwise the first class among the tied\nclasses is returned. Please note that breaking ties comes at a\nrelatively high computational cost compared to a simple predict.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVC/__init__/random_state", + "name": "random_state", + "qname": "sklearn.svm._classes.NuSVC.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the pseudo random number generation for shuffling the data for\nprobability estimates. Ignored when `probability` is False.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Nu-Support Vector Classification.\n\nSimilar to SVC but uses a parameter to control the number of support\nvectors.\n\nThe implementation is based on libsvm.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, nu=0.5, kernel='rbf', degree=3, gamma='scale',\n coef0=0.0, shrinking=True, probability=False, tol=1e-3,\n cache_size=200, class_weight=None, verbose=False, max_iter=-1,\n decision_function_shape='ovr', break_ties=False,\n random_state=None):\n\n super().__init__(\n kernel=kernel, degree=degree, gamma=gamma,\n coef0=coef0, tol=tol, C=0., nu=nu, shrinking=shrinking,\n probability=probability, cache_size=cache_size,\n class_weight=class_weight, verbose=verbose, max_iter=max_iter,\n decision_function_shape=decision_function_shape,\n break_ties=break_ties,\n random_state=random_state)" + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVC/_more_tags", + "name": "_more_tags", + "qname": "sklearn.svm._classes.NuSVC._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._classes/NuSVC/_more_tags/self", + "name": "self", + "qname": "sklearn.svm._classes.NuSVC._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_methods_subset_invariance':\n 'fails for the decision_function method',\n 'check_class_weight_classifiers': 'class_weight is ignored.',\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }" + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVR/__init__", + "name": "__init__", + "qname": "sklearn.svm._classes.NuSVR.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._classes/NuSVR/__init__/self", + "name": "self", + "qname": "sklearn.svm._classes.NuSVR.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVR/__init__/nu", + "name": "nu", + "qname": "sklearn.svm._classes.NuSVR.__init__.nu", + "default_value": "0.5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.5", + "description": "An upper bound on the fraction of training errors and a lower bound of\nthe fraction of support vectors. Should be in the interval (0, 1]. By\ndefault 0.5 will be taken." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "BoundaryType", + "base_type": "float", + "min": 0.0, + "max": 1.0, + "min_inclusive": false, + "max_inclusive": true + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVR/__init__/C", + "name": "C", + "qname": "sklearn.svm._classes.NuSVR.__init__.C", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Penalty parameter C of the error term." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVR/__init__/kernel", + "name": "kernel", + "qname": "sklearn.svm._classes.NuSVR.__init__.kernel", + "default_value": "'rbf'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}", + "default_value": "'rbf'", + "description": "Specifies the kernel type to be used in the algorithm.\nIt must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\na callable.\nIf none is given, 'rbf' will be used. If a callable is given it is\nused to precompute the kernel matrix." + }, + "type": { + "kind": "EnumType", + "values": ["sigmoid", "rbf", "linear", "precomputed", "poly"] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVR/__init__/degree", + "name": "degree", + "qname": "sklearn.svm._classes.NuSVR.__init__.degree", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "3", + "description": "Degree of the polynomial kernel function ('poly').\nIgnored by all other kernels." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVR/__init__/gamma", + "name": "gamma", + "qname": "sklearn.svm._classes.NuSVR.__init__.gamma", + "default_value": "'scale'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'scale', 'auto'} or float", + "default_value": "'scale'", + "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features.\n\n.. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["auto", "scale"] + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVR/__init__/coef0", + "name": "coef0", + "qname": "sklearn.svm._classes.NuSVR.__init__.coef0", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "Independent term in kernel function.\nIt is only significant in 'poly' and 'sigmoid'." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVR/__init__/shrinking", + "name": "shrinking", + "qname": "sklearn.svm._classes.NuSVR.__init__.shrinking", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to use the shrinking heuristic.\nSee the :ref:`User Guide `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVR/__init__/tol", + "name": "tol", + "qname": "sklearn.svm._classes.NuSVR.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "Tolerance for stopping criterion." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVR/__init__/cache_size", + "name": "cache_size", + "qname": "sklearn.svm._classes.NuSVR.__init__.cache_size", + "default_value": "200", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "200", + "description": "Specify the size of the kernel cache (in MB)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVR/__init__/verbose", + "name": "verbose", + "qname": "sklearn.svm._classes.NuSVR.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Enable verbose output. Note that this setting takes advantage of a\nper-process runtime setting in libsvm that, if enabled, may not work\nproperly in a multithreaded context." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVR/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.svm._classes.NuSVR.__init__.max_iter", + "default_value": "-1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "-1", + "description": "Hard limit on iterations within solver, or -1 for no limit." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Nu Support Vector Regression.\n\nSimilar to NuSVC, for regression, uses a parameter nu to control\nthe number of support vectors. However, unlike NuSVC, where nu\nreplaces C, here nu replaces the parameter epsilon of epsilon-SVR.\n\nThe implementation is based on libsvm.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, nu=0.5, C=1.0, kernel='rbf', degree=3,\n gamma='scale', coef0=0.0, shrinking=True,\n tol=1e-3, cache_size=200, verbose=False, max_iter=-1):\n\n super().__init__(\n kernel=kernel, degree=degree, gamma=gamma, coef0=coef0,\n tol=tol, C=C, nu=nu, epsilon=0., shrinking=shrinking,\n probability=False, cache_size=cache_size, class_weight=None,\n verbose=verbose, max_iter=max_iter, random_state=None)" + }, + { + "id": "scikit-learn/sklearn.svm._classes/NuSVR/_more_tags", + "name": "_more_tags", + "qname": "sklearn.svm._classes.NuSVR._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._classes/NuSVR/_more_tags/self", + "name": "self", + "qname": "sklearn.svm._classes.NuSVR._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }" + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/__init__", + "name": "__init__", + "qname": "sklearn.svm._classes.OneClassSVM.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/__init__/self", + "name": "self", + "qname": "sklearn.svm._classes.OneClassSVM.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/__init__/kernel", + "name": "kernel", + "qname": "sklearn.svm._classes.OneClassSVM.__init__.kernel", + "default_value": "'rbf'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}", + "default_value": "'rbf'", + "description": "Specifies the kernel type to be used in the algorithm.\nIt must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\na callable.\nIf none is given, 'rbf' will be used. If a callable is given it is\nused to precompute the kernel matrix." + }, + "type": { + "kind": "EnumType", + "values": ["sigmoid", "rbf", "linear", "precomputed", "poly"] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/__init__/degree", + "name": "degree", + "qname": "sklearn.svm._classes.OneClassSVM.__init__.degree", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "3", + "description": "Degree of the polynomial kernel function ('poly').\nIgnored by all other kernels." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/__init__/gamma", + "name": "gamma", + "qname": "sklearn.svm._classes.OneClassSVM.__init__.gamma", + "default_value": "'scale'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'scale', 'auto'} or float", + "default_value": "'scale'", + "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features.\n\n.. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["auto", "scale"] + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/__init__/coef0", + "name": "coef0", + "qname": "sklearn.svm._classes.OneClassSVM.__init__.coef0", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "Independent term in kernel function.\nIt is only significant in 'poly' and 'sigmoid'." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/__init__/tol", + "name": "tol", + "qname": "sklearn.svm._classes.OneClassSVM.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "Tolerance for stopping criterion." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/__init__/nu", + "name": "nu", + "qname": "sklearn.svm._classes.OneClassSVM.__init__.nu", + "default_value": "0.5", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.5", + "description": "An upper bound on the fraction of training\nerrors and a lower bound of the fraction of support\nvectors. Should be in the interval (0, 1]. By default 0.5\nwill be taken." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "BoundaryType", + "base_type": "float", + "min": 0.0, + "max": 1.0, + "min_inclusive": false, + "max_inclusive": true + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/__init__/shrinking", + "name": "shrinking", + "qname": "sklearn.svm._classes.OneClassSVM.__init__.shrinking", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to use the shrinking heuristic.\nSee the :ref:`User Guide `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/__init__/cache_size", + "name": "cache_size", + "qname": "sklearn.svm._classes.OneClassSVM.__init__.cache_size", + "default_value": "200", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "200", + "description": "Specify the size of the kernel cache (in MB)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/__init__/verbose", + "name": "verbose", + "qname": "sklearn.svm._classes.OneClassSVM.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Enable verbose output. Note that this setting takes advantage of a\nper-process runtime setting in libsvm that, if enabled, may not work\nproperly in a multithreaded context." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.svm._classes.OneClassSVM.__init__.max_iter", + "default_value": "-1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "-1", + "description": "Hard limit on iterations within solver, or -1 for no limit." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Unsupervised Outlier Detection.\n\nEstimate the support of a high-dimensional distribution.\n\nThe implementation is based on libsvm.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, kernel='rbf', degree=3, gamma='scale',\n coef0=0.0, tol=1e-3, nu=0.5, shrinking=True, cache_size=200,\n verbose=False, max_iter=-1):\n\n super().__init__(\n kernel, degree, gamma, coef0, tol, 0., nu, 0.,\n shrinking, False, cache_size, None, verbose, max_iter,\n random_state=None)" + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/_more_tags", + "name": "_more_tags", + "qname": "sklearn.svm._classes.OneClassSVM._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/_more_tags/self", + "name": "self", + "qname": "sklearn.svm._classes.OneClassSVM._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }" + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/decision_function", + "name": "decision_function", + "qname": "sklearn.svm._classes.OneClassSVM.decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/decision_function/self", + "name": "self", + "qname": "sklearn.svm._classes.OneClassSVM.decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/decision_function/X", + "name": "X", + "qname": "sklearn.svm._classes.OneClassSVM.decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data matrix." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Signed distance to the separating hyperplane.\n\nSigned distance is positive for an inlier and negative for an outlier.", + "docstring": "Signed distance to the separating hyperplane.\n\nSigned distance is positive for an inlier and negative for an outlier.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix.\n\nReturns\n-------\ndec : ndarray of shape (n_samples,)\n Returns the decision function of the samples.", + "code": " def decision_function(self, X):\n \"\"\"Signed distance to the separating hyperplane.\n\n Signed distance is positive for an inlier and negative for an outlier.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data matrix.\n\n Returns\n -------\n dec : ndarray of shape (n_samples,)\n Returns the decision function of the samples.\n \"\"\"\n dec = self._decision_function(X).ravel()\n return dec" + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/fit", + "name": "fit", + "qname": "sklearn.svm._classes.OneClassSVM.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/fit/self", + "name": "self", + "qname": "sklearn.svm._classes.OneClassSVM.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/fit/X", + "name": "X", + "qname": "sklearn.svm._classes.OneClassSVM.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Set of samples, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/fit/y", + "name": "y", + "qname": "sklearn.svm._classes.OneClassSVM.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "Ignored", + "default_value": "", + "description": "not used, present for API consistency by convention." + }, + "type": { + "kind": "NamedType", + "name": "Ignored" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.svm._classes.OneClassSVM.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Per-sample weights. Rescale C per sample. Higher weights\nforce the classifier to put more emphasis on these points." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/fit/params", + "name": "params", + "qname": "sklearn.svm._classes.OneClassSVM.fit.params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Detects the soft boundary of the set of samples X.", + "docstring": "Detects the soft boundary of the set of samples X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Set of samples, where n_samples is the number of samples and\n n_features is the number of features.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Per-sample weights. Rescale C per sample. Higher weights\n force the classifier to put more emphasis on these points.\n\ny : Ignored\n not used, present for API consistency by convention.\n\nReturns\n-------\nself : object\n\nNotes\n-----\nIf X is not a C-ordered contiguous array it is copied.", + "code": " def fit(self, X, y=None, sample_weight=None, **params):\n \"\"\"Detects the soft boundary of the set of samples X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n Set of samples, where n_samples is the number of samples and\n n_features is the number of features.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Per-sample weights. Rescale C per sample. Higher weights\n force the classifier to put more emphasis on these points.\n\n y : Ignored\n not used, present for API consistency by convention.\n\n Returns\n -------\n self : object\n\n Notes\n -----\n If X is not a C-ordered contiguous array it is copied.\n\n \"\"\"\n super().fit(X, np.ones(_num_samples(X)),\n sample_weight=sample_weight, **params)\n self.offset_ = -self._intercept_\n return self" + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/predict", + "name": "predict", + "qname": "sklearn.svm._classes.OneClassSVM.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/predict/self", + "name": "self", + "qname": "sklearn.svm._classes.OneClassSVM.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/predict/X", + "name": "X", + "qname": "sklearn.svm._classes.OneClassSVM.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples_test, n_samples_train)", + "default_value": "", + "description": "For kernel=\"precomputed\", the expected shape of X is\n(n_samples_test, n_samples_train)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features) or (n_samples_test, n_samples_train)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Perform classification on samples in X.\n\nFor a one-class model, +1 or -1 is returned.", + "docstring": "Perform classification on samples in X.\n\nFor a one-class model, +1 or -1 is returned.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples_test, n_samples_train)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,)\n Class labels for samples in X.", + "code": " def predict(self, X):\n \"\"\"Perform classification on samples in X.\n\n For a one-class model, +1 or -1 is returned.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features) or \\\n (n_samples_test, n_samples_train)\n For kernel=\"precomputed\", the expected shape of X is\n (n_samples_test, n_samples_train).\n\n Returns\n -------\n y_pred : ndarray of shape (n_samples,)\n Class labels for samples in X.\n \"\"\"\n y = super().predict(X)\n return np.asarray(y, dtype=np.intp)" + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/probA_@getter", + "name": "probA_", + "qname": "sklearn.svm._classes.OneClassSVM.probA_", + "decorators": [ + "deprecated('The probA_ attribute is deprecated in version 0.23 and will be removed in version 1.0.')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/probA_/self", + "name": "self", + "qname": "sklearn.svm._classes.OneClassSVM.probA_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated( # type: ignore\n \"The probA_ attribute is deprecated in version 0.23 and will be \"\n \"removed in version 1.0.\")\n @property\n def probA_(self):\n return self._probA" + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/probB_@getter", + "name": "probB_", + "qname": "sklearn.svm._classes.OneClassSVM.probB_", + "decorators": [ + "deprecated('The probB_ attribute is deprecated in version 0.23 and will be removed in version 1.0.')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/probB_/self", + "name": "self", + "qname": "sklearn.svm._classes.OneClassSVM.probB_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated( # type: ignore\n \"The probB_ attribute is deprecated in version 0.23 and will be \"\n \"removed in version 1.0.\")\n @property\n def probB_(self):\n return self._probB" + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/score_samples", + "name": "score_samples", + "qname": "sklearn.svm._classes.OneClassSVM.score_samples", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/score_samples/self", + "name": "self", + "qname": "sklearn.svm._classes.OneClassSVM.score_samples.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._classes/OneClassSVM/score_samples/X", + "name": "X", + "qname": "sklearn.svm._classes.OneClassSVM.score_samples.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data matrix." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Raw scoring function of the samples.", + "docstring": "Raw scoring function of the samples.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix.\n\nReturns\n-------\nscore_samples : ndarray of shape (n_samples,)\n Returns the (unshifted) scoring function of the samples.", + "code": " def score_samples(self, X):\n \"\"\"Raw scoring function of the samples.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data matrix.\n\n Returns\n -------\n score_samples : ndarray of shape (n_samples,)\n Returns the (unshifted) scoring function of the samples.\n \"\"\"\n return self.decision_function(X) + self.offset_" + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVC/__init__", + "name": "__init__", + "qname": "sklearn.svm._classes.SVC.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._classes/SVC/__init__/self", + "name": "self", + "qname": "sklearn.svm._classes.SVC.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVC/__init__/C", + "name": "C", + "qname": "sklearn.svm._classes.SVC.__init__.C", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Regularization parameter. The strength of the regularization is\ninversely proportional to C. Must be strictly positive. The penalty\nis a squared l2 penalty." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVC/__init__/kernel", + "name": "kernel", + "qname": "sklearn.svm._classes.SVC.__init__.kernel", + "default_value": "'rbf'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}", + "default_value": "'rbf'", + "description": "Specifies the kernel type to be used in the algorithm.\nIt must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\na callable.\nIf none is given, 'rbf' will be used. If a callable is given it is\nused to pre-compute the kernel matrix from data matrices; that matrix\nshould be an array of shape ``(n_samples, n_samples)``." + }, + "type": { + "kind": "EnumType", + "values": ["sigmoid", "rbf", "linear", "precomputed", "poly"] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVC/__init__/degree", + "name": "degree", + "qname": "sklearn.svm._classes.SVC.__init__.degree", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "3", + "description": "Degree of the polynomial kernel function ('poly').\nIgnored by all other kernels." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVC/__init__/gamma", + "name": "gamma", + "qname": "sklearn.svm._classes.SVC.__init__.gamma", + "default_value": "'scale'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'scale', 'auto'} or float", + "default_value": "'scale'", + "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features.\n\n.. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["auto", "scale"] + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVC/__init__/coef0", + "name": "coef0", + "qname": "sklearn.svm._classes.SVC.__init__.coef0", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "Independent term in kernel function.\nIt is only significant in 'poly' and 'sigmoid'." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVC/__init__/shrinking", + "name": "shrinking", + "qname": "sklearn.svm._classes.SVC.__init__.shrinking", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to use the shrinking heuristic.\nSee the :ref:`User Guide `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVC/__init__/probability", + "name": "probability", + "qname": "sklearn.svm._classes.SVC.__init__.probability", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to enable probability estimates. This must be enabled prior\nto calling `fit`, will slow down that method as it internally uses\n5-fold cross-validation, and `predict_proba` may be inconsistent with\n`predict`. Read more in the :ref:`User Guide `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVC/__init__/tol", + "name": "tol", + "qname": "sklearn.svm._classes.SVC.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "Tolerance for stopping criterion." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVC/__init__/cache_size", + "name": "cache_size", + "qname": "sklearn.svm._classes.SVC.__init__.cache_size", + "default_value": "200", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "200", + "description": "Specify the size of the kernel cache (in MB)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVC/__init__/class_weight", + "name": "class_weight", + "qname": "sklearn.svm._classes.SVC.__init__.class_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict or 'balanced'", + "default_value": "None", + "description": "Set the parameter C of class i to class_weight[i]*C for\nSVC. If not given, all classes are supposed to have\nweight one.\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "dict" + }, + { + "kind": "NamedType", + "name": "'balanced'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVC/__init__/verbose", + "name": "verbose", + "qname": "sklearn.svm._classes.SVC.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Enable verbose output. Note that this setting takes advantage of a\nper-process runtime setting in libsvm that, if enabled, may not work\nproperly in a multithreaded context." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVC/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.svm._classes.SVC.__init__.max_iter", + "default_value": "-1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "-1", + "description": "Hard limit on iterations within solver, or -1 for no limit." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVC/__init__/decision_function_shape", + "name": "decision_function_shape", + "qname": "sklearn.svm._classes.SVC.__init__.decision_function_shape", + "default_value": "'ovr'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'ovo', 'ovr'}", + "default_value": "'ovr'", + "description": "Whether to return a one-vs-rest ('ovr') decision function of shape\n(n_samples, n_classes) as all other classifiers, or the original\none-vs-one ('ovo') decision function of libsvm which has shape\n(n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one\n('ovo') is always used as multi-class strategy. The parameter is\nignored for binary classification.\n\n.. versionchanged:: 0.19\n decision_function_shape is 'ovr' by default.\n\n.. versionadded:: 0.17\n *decision_function_shape='ovr'* is recommended.\n\n.. versionchanged:: 0.17\n Deprecated *decision_function_shape='ovo' and None*." + }, + "type": { + "kind": "EnumType", + "values": ["ovo", "ovr"] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVC/__init__/break_ties", + "name": "break_ties", + "qname": "sklearn.svm._classes.SVC.__init__.break_ties", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If true, ``decision_function_shape='ovr'``, and number of classes > 2,\n:term:`predict` will break ties according to the confidence values of\n:term:`decision_function`; otherwise the first class among the tied\nclasses is returned. Please note that breaking ties comes at a\nrelatively high computational cost compared to a simple predict.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVC/__init__/random_state", + "name": "random_state", + "qname": "sklearn.svm._classes.SVC.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the pseudo random number generation for shuffling the data for\nprobability estimates. Ignored when `probability` is False.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "C-Support Vector Classification.\n\nThe implementation is based on libsvm. The fit time scales at least\nquadratically with the number of samples and may be impractical\nbeyond tens of thousands of samples. For large datasets\nconsider using :class:`~sklearn.svm.LinearSVC` or\n:class:`~sklearn.linear_model.SGDClassifier` instead, possibly after a\n:class:`~sklearn.kernel_approximation.Nystroem` transformer.\n\nThe multiclass support is handled according to a one-vs-one scheme.\n\nFor details on the precise mathematical formulation of the provided\nkernel functions and how `gamma`, `coef0` and `degree` affect each\nother, see the corresponding section in the narrative documentation:\n:ref:`svm_kernels`.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, C=1.0, kernel='rbf', degree=3, gamma='scale',\n coef0=0.0, shrinking=True, probability=False,\n tol=1e-3, cache_size=200, class_weight=None,\n verbose=False, max_iter=-1, decision_function_shape='ovr',\n break_ties=False,\n random_state=None):\n\n super().__init__(\n kernel=kernel, degree=degree, gamma=gamma,\n coef0=coef0, tol=tol, C=C, nu=0., shrinking=shrinking,\n probability=probability, cache_size=cache_size,\n class_weight=class_weight, verbose=verbose, max_iter=max_iter,\n decision_function_shape=decision_function_shape,\n break_ties=break_ties,\n random_state=random_state)" + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVC/_more_tags", + "name": "_more_tags", + "qname": "sklearn.svm._classes.SVC._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._classes/SVC/_more_tags/self", + "name": "self", + "qname": "sklearn.svm._classes.SVC._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }" + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVR/__init__", + "name": "__init__", + "qname": "sklearn.svm._classes.SVR.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._classes/SVR/__init__/self", + "name": "self", + "qname": "sklearn.svm._classes.SVR.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVR/__init__/kernel", + "name": "kernel", + "qname": "sklearn.svm._classes.SVR.__init__.kernel", + "default_value": "'rbf'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}", + "default_value": "'rbf'", + "description": "Specifies the kernel type to be used in the algorithm.\nIt must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\na callable.\nIf none is given, 'rbf' will be used. If a callable is given it is\nused to precompute the kernel matrix." + }, + "type": { + "kind": "EnumType", + "values": ["sigmoid", "rbf", "linear", "precomputed", "poly"] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVR/__init__/degree", + "name": "degree", + "qname": "sklearn.svm._classes.SVR.__init__.degree", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "3", + "description": "Degree of the polynomial kernel function ('poly').\nIgnored by all other kernels." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVR/__init__/gamma", + "name": "gamma", + "qname": "sklearn.svm._classes.SVR.__init__.gamma", + "default_value": "'scale'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'scale', 'auto'} or float", + "default_value": "'scale'", + "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n 1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features.\n\n.. versionchanged:: 0.22\n The default value of ``gamma`` changed from 'auto' to 'scale'." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["auto", "scale"] + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVR/__init__/coef0", + "name": "coef0", + "qname": "sklearn.svm._classes.SVR.__init__.coef0", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "Independent term in kernel function.\nIt is only significant in 'poly' and 'sigmoid'." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVR/__init__/tol", + "name": "tol", + "qname": "sklearn.svm._classes.SVR.__init__.tol", + "default_value": "0.001", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-3", + "description": "Tolerance for stopping criterion." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVR/__init__/C", + "name": "C", + "qname": "sklearn.svm._classes.SVR.__init__.C", + "default_value": "1.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1.0", + "description": "Regularization parameter. The strength of the regularization is\ninversely proportional to C. Must be strictly positive.\nThe penalty is a squared l2 penalty." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVR/__init__/epsilon", + "name": "epsilon", + "qname": "sklearn.svm._classes.SVR.__init__.epsilon", + "default_value": "0.1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.1", + "description": "Epsilon in the epsilon-SVR model. It specifies the epsilon-tube\nwithin which no penalty is associated in the training loss function\nwith points predicted within a distance epsilon from the actual\nvalue." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVR/__init__/shrinking", + "name": "shrinking", + "qname": "sklearn.svm._classes.SVR.__init__.shrinking", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to use the shrinking heuristic.\nSee the :ref:`User Guide `." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVR/__init__/cache_size", + "name": "cache_size", + "qname": "sklearn.svm._classes.SVR.__init__.cache_size", + "default_value": "200", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "200", + "description": "Specify the size of the kernel cache (in MB)." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVR/__init__/verbose", + "name": "verbose", + "qname": "sklearn.svm._classes.SVR.__init__.verbose", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Enable verbose output. Note that this setting takes advantage of a\nper-process runtime setting in libsvm that, if enabled, may not work\nproperly in a multithreaded context." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVR/__init__/max_iter", + "name": "max_iter", + "qname": "sklearn.svm._classes.SVR.__init__.max_iter", + "default_value": "-1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "-1", + "description": "Hard limit on iterations within solver, or -1 for no limit." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Epsilon-Support Vector Regression.\n\nThe free parameters in the model are C and epsilon.\n\nThe implementation is based on libsvm. The fit time complexity\nis more than quadratic with the number of samples which makes it hard\nto scale to datasets with more than a couple of 10000 samples. For large\ndatasets consider using :class:`~sklearn.svm.LinearSVR` or\n:class:`~sklearn.linear_model.SGDRegressor` instead, possibly after a\n:class:`~sklearn.kernel_approximation.Nystroem` transformer.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *, kernel='rbf', degree=3, gamma='scale',\n coef0=0.0, tol=1e-3, C=1.0, epsilon=0.1, shrinking=True,\n cache_size=200, verbose=False, max_iter=-1):\n\n super().__init__(\n kernel=kernel, degree=degree, gamma=gamma,\n coef0=coef0, tol=tol, C=C, nu=0., epsilon=epsilon, verbose=verbose,\n shrinking=shrinking, probability=False, cache_size=cache_size,\n class_weight=None, max_iter=max_iter, random_state=None)" + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVR/_more_tags", + "name": "_more_tags", + "qname": "sklearn.svm._classes.SVR._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._classes/SVR/_more_tags/self", + "name": "self", + "qname": "sklearn.svm._classes.SVR._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {\n '_xfail_checks': {\n 'check_sample_weights_invariance':\n 'zero sample_weight is not equivalent to removing samples',\n }\n }" + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVR/probA_@getter", + "name": "probA_", + "qname": "sklearn.svm._classes.SVR.probA_", + "decorators": [ + "deprecated('The probA_ attribute is deprecated in version 0.23 and will be removed in version 1.0 (renaming of 0.25).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._classes/SVR/probA_/self", + "name": "self", + "qname": "sklearn.svm._classes.SVR.probA_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated( # type: ignore\n \"The probA_ attribute is deprecated in version 0.23 and will be \"\n \"removed in version 1.0 (renaming of 0.25).\")\n @property\n def probA_(self):\n return self._probA" + }, + { + "id": "scikit-learn/sklearn.svm._classes/SVR/probB_@getter", + "name": "probB_", + "qname": "sklearn.svm._classes.SVR.probB_", + "decorators": [ + "deprecated('The probB_ attribute is deprecated in version 0.23 and will be removed in version 1.0 (renaming of 0.25).')", + "property" + ], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm._classes/SVR/probB_/self", + "name": "self", + "qname": "sklearn.svm._classes.SVR.probB_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " @deprecated( # type: ignore\n \"The probB_ attribute is deprecated in version 0.23 and will be \"\n \"removed in version 1.0 (renaming of 0.25).\")\n @property\n def probB_(self):\n return self._probB" + }, + { + "id": "scikit-learn/sklearn.svm.setup/configuration", + "name": "configuration", + "qname": "sklearn.svm.setup.configuration", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.svm.setup/configuration/parent_package", + "name": "parent_package", + "qname": "sklearn.svm.setup.configuration.parent_package", + "default_value": "''", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.svm.setup/configuration/top_path", + "name": "top_path", + "qname": "sklearn.svm.setup.configuration.top_path", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def configuration(parent_package='', top_path=None):\n from numpy.distutils.misc_util import Configuration\n\n config = Configuration('svm', parent_package, top_path)\n\n config.add_subpackage('tests')\n\n # newrand wrappers\n config.add_extension('_newrand',\n sources=['_newrand.pyx'],\n include_dirs=[numpy.get_include(),\n join('src', 'newrand')],\n depends=[join('src', 'newrand', 'newrand.h')],\n language='c++',\n # Use C++11 random number generator fix\n extra_compile_args=['-std=c++11']\n )\n\n # Section LibSVM\n\n # we compile both libsvm and libsvm_sparse\n config.add_library('libsvm-skl',\n sources=[join('src', 'libsvm', 'libsvm_template.cpp')],\n depends=[join('src', 'libsvm', 'svm.cpp'),\n join('src', 'libsvm', 'svm.h'),\n join('src', 'newrand', 'newrand.h')],\n # Force C++ linking in case gcc is picked up instead\n # of g++ under windows with some versions of MinGW\n extra_link_args=['-lstdc++'],\n # Use C++11 to use the random number generator fix\n extra_compiler_args=['-std=c++11'],\n )\n\n libsvm_sources = ['_libsvm.pyx']\n libsvm_depends = [join('src', 'libsvm', 'libsvm_helper.c'),\n join('src', 'libsvm', 'libsvm_template.cpp'),\n join('src', 'libsvm', 'svm.cpp'),\n join('src', 'libsvm', 'svm.h'),\n join('src', 'newrand', 'newrand.h')]\n\n config.add_extension('_libsvm',\n sources=libsvm_sources,\n include_dirs=[numpy.get_include(),\n join('src', 'libsvm'),\n join('src', 'newrand')],\n libraries=['libsvm-skl'],\n depends=libsvm_depends,\n )\n\n # liblinear module\n libraries = []\n if os.name == 'posix':\n libraries.append('m')\n\n # precompile liblinear to use C++11 flag\n config.add_library('liblinear-skl',\n sources=[join('src', 'liblinear', 'linear.cpp'),\n join('src', 'liblinear', 'tron.cpp')],\n depends=[join('src', 'liblinear', 'linear.h'),\n join('src', 'liblinear', 'tron.h'),\n join('src', 'newrand', 'newrand.h')],\n # Force C++ linking in case gcc is picked up instead\n # of g++ under windows with some versions of MinGW\n extra_link_args=['-lstdc++'],\n # Use C++11 to use the random number generator fix\n extra_compiler_args=['-std=c++11'],\n )\n\n liblinear_sources = ['_liblinear.pyx']\n liblinear_depends = [join('src', 'liblinear', '*.h'),\n join('src', 'newrand', 'newrand.h'),\n join('src', 'liblinear', 'liblinear_helper.c')]\n\n config.add_extension('_liblinear',\n sources=liblinear_sources,\n libraries=['liblinear-skl'] + libraries,\n include_dirs=[join('.', 'src', 'liblinear'),\n join('.', 'src', 'newrand'),\n join('..', 'utils'),\n numpy.get_include()],\n depends=liblinear_depends,\n # extra_compile_args=['-O0 -fno-inline'],\n )\n\n # end liblinear module\n\n # this should go *after* libsvm-skl\n libsvm_sparse_sources = ['_libsvm_sparse.pyx']\n config.add_extension('_libsvm_sparse', libraries=['libsvm-skl'],\n sources=libsvm_sparse_sources,\n include_dirs=[numpy.get_include(),\n join(\"src\", \"libsvm\"),\n join(\"src\", \"newrand\")],\n depends=[join(\"src\", \"libsvm\", \"svm.h\"),\n join('src', 'newrand', 'newrand.h'),\n join(\"src\", \"libsvm\",\n \"libsvm_sparse_helper.c\")])\n\n return config" + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/__init__", + "name": "__init__", + "qname": "sklearn.tree._classes.BaseDecisionTree.__init__", + "decorators": ["abstractmethod", "_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/__init__/self", + "name": "self", + "qname": "sklearn.tree._classes.BaseDecisionTree.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/__init__/criterion", + "name": "criterion", + "qname": "sklearn.tree._classes.BaseDecisionTree.__init__.criterion", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/__init__/splitter", + "name": "splitter", + "qname": "sklearn.tree._classes.BaseDecisionTree.__init__.splitter", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/__init__/max_depth", + "name": "max_depth", + "qname": "sklearn.tree._classes.BaseDecisionTree.__init__.max_depth", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/__init__/min_samples_split", + "name": "min_samples_split", + "qname": "sklearn.tree._classes.BaseDecisionTree.__init__.min_samples_split", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/__init__/min_samples_leaf", + "name": "min_samples_leaf", + "qname": "sklearn.tree._classes.BaseDecisionTree.__init__.min_samples_leaf", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/__init__/min_weight_fraction_leaf", + "name": "min_weight_fraction_leaf", + "qname": "sklearn.tree._classes.BaseDecisionTree.__init__.min_weight_fraction_leaf", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/__init__/max_features", + "name": "max_features", + "qname": "sklearn.tree._classes.BaseDecisionTree.__init__.max_features", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/__init__/max_leaf_nodes", + "name": "max_leaf_nodes", + "qname": "sklearn.tree._classes.BaseDecisionTree.__init__.max_leaf_nodes", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/__init__/random_state", + "name": "random_state", + "qname": "sklearn.tree._classes.BaseDecisionTree.__init__.random_state", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/__init__/min_impurity_decrease", + "name": "min_impurity_decrease", + "qname": "sklearn.tree._classes.BaseDecisionTree.__init__.min_impurity_decrease", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/__init__/min_impurity_split", + "name": "min_impurity_split", + "qname": "sklearn.tree._classes.BaseDecisionTree.__init__.min_impurity_split", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/__init__/class_weight", + "name": "class_weight", + "qname": "sklearn.tree._classes.BaseDecisionTree.__init__.class_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/__init__/ccp_alpha", + "name": "ccp_alpha", + "qname": "sklearn.tree._classes.BaseDecisionTree.__init__.ccp_alpha", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Base class for decision trees.\n\nWarning: This class should not be used directly.\nUse derived classes instead.", + "docstring": "", + "code": " @abstractmethod\n @_deprecate_positional_args\n def __init__(self, *,\n criterion,\n splitter,\n max_depth,\n min_samples_split,\n min_samples_leaf,\n min_weight_fraction_leaf,\n max_features,\n max_leaf_nodes,\n random_state,\n min_impurity_decrease,\n min_impurity_split,\n class_weight=None,\n ccp_alpha=0.0):\n self.criterion = criterion\n self.splitter = splitter\n self.max_depth = max_depth\n self.min_samples_split = min_samples_split\n self.min_samples_leaf = min_samples_leaf\n self.min_weight_fraction_leaf = min_weight_fraction_leaf\n self.max_features = max_features\n self.max_leaf_nodes = max_leaf_nodes\n self.random_state = random_state\n self.min_impurity_decrease = min_impurity_decrease\n self.min_impurity_split = min_impurity_split\n self.class_weight = class_weight\n self.ccp_alpha = ccp_alpha" + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/_prune_tree", + "name": "_prune_tree", + "qname": "sklearn.tree._classes.BaseDecisionTree._prune_tree", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/_prune_tree/self", + "name": "self", + "qname": "sklearn.tree._classes.BaseDecisionTree._prune_tree.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Prune tree using Minimal Cost-Complexity Pruning.", + "docstring": "Prune tree using Minimal Cost-Complexity Pruning.", + "code": " def _prune_tree(self):\n \"\"\"Prune tree using Minimal Cost-Complexity Pruning.\"\"\"\n check_is_fitted(self)\n\n if self.ccp_alpha < 0.0:\n raise ValueError(\"ccp_alpha must be greater than or equal to 0\")\n\n if self.ccp_alpha == 0.0:\n return\n\n # build pruned tree\n if is_classifier(self):\n n_classes = np.atleast_1d(self.n_classes_)\n pruned_tree = Tree(self.n_features_, n_classes, self.n_outputs_)\n else:\n pruned_tree = Tree(self.n_features_,\n # TODO: the tree shouldn't need this param\n np.array([1] * self.n_outputs_, dtype=np.intp),\n self.n_outputs_)\n _build_pruned_tree_ccp(pruned_tree, self.tree_, self.ccp_alpha)\n\n self.tree_ = pruned_tree" + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/_validate_X_predict", + "name": "_validate_X_predict", + "qname": "sklearn.tree._classes.BaseDecisionTree._validate_X_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/_validate_X_predict/self", + "name": "self", + "qname": "sklearn.tree._classes.BaseDecisionTree._validate_X_predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/_validate_X_predict/X", + "name": "X", + "qname": "sklearn.tree._classes.BaseDecisionTree._validate_X_predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/_validate_X_predict/check_input", + "name": "check_input", + "qname": "sklearn.tree._classes.BaseDecisionTree._validate_X_predict.check_input", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Validate the training data on predict (probabilities).", + "docstring": "Validate the training data on predict (probabilities).", + "code": " def _validate_X_predict(self, X, check_input):\n \"\"\"Validate the training data on predict (probabilities).\"\"\"\n if check_input:\n X = self._validate_data(X, dtype=DTYPE, accept_sparse=\"csr\",\n reset=False)\n if issparse(X) and (X.indices.dtype != np.intc or\n X.indptr.dtype != np.intc):\n raise ValueError(\"No support for np.int64 index based \"\n \"sparse matrices\")\n else:\n # The number of features is checked regardless of `check_input`\n self._check_n_features(X, reset=False)\n return X" + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/apply", + "name": "apply", + "qname": "sklearn.tree._classes.BaseDecisionTree.apply", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/apply/self", + "name": "self", + "qname": "sklearn.tree._classes.BaseDecisionTree.apply.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/apply/X", + "name": "X", + "qname": "sklearn.tree._classes.BaseDecisionTree.apply.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, it will be converted to\n``dtype=np.float32`` and if a sparse matrix is provided\nto a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/apply/check_input", + "name": "check_input", + "qname": "sklearn.tree._classes.BaseDecisionTree.apply.check_input", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Allow to bypass several input checking.\nDon't use this parameter unless you know what you do." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the index of the leaf that each sample is predicted as.\n\n.. versionadded:: 0.17", + "docstring": "Return the index of the leaf that each sample is predicted as.\n\n.. versionadded:: 0.17\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\ncheck_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\nReturns\n-------\nX_leaves : array-like of shape (n_samples,)\n For each datapoint x in X, return the index of the leaf x\n ends up in. Leaves are numbered within\n ``[0; self.tree_.node_count)``, possibly with gaps in the\n numbering.", + "code": " def apply(self, X, check_input=True):\n \"\"\"Return the index of the leaf that each sample is predicted as.\n\n .. versionadded:: 0.17\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n check_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\n Returns\n -------\n X_leaves : array-like of shape (n_samples,)\n For each datapoint x in X, return the index of the leaf x\n ends up in. Leaves are numbered within\n ``[0; self.tree_.node_count)``, possibly with gaps in the\n numbering.\n \"\"\"\n check_is_fitted(self)\n X = self._validate_X_predict(X, check_input)\n return self.tree_.apply(X)" + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/cost_complexity_pruning_path", + "name": "cost_complexity_pruning_path", + "qname": "sklearn.tree._classes.BaseDecisionTree.cost_complexity_pruning_path", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/cost_complexity_pruning_path/self", + "name": "self", + "qname": "sklearn.tree._classes.BaseDecisionTree.cost_complexity_pruning_path.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/cost_complexity_pruning_path/X", + "name": "X", + "qname": "sklearn.tree._classes.BaseDecisionTree.cost_complexity_pruning_path.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples. Internally, it will be converted to\n``dtype=np.float32`` and if a sparse matrix is provided\nto a sparse ``csc_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/cost_complexity_pruning_path/y", + "name": "y", + "qname": "sklearn.tree._classes.BaseDecisionTree.cost_complexity_pruning_path.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "The target values (class labels) as integers or strings." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/cost_complexity_pruning_path/sample_weight", + "name": "sample_weight", + "qname": "sklearn.tree._classes.BaseDecisionTree.cost_complexity_pruning_path.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, then samples are equally weighted. Splits\nthat would create child nodes with net zero or negative weight are\nignored while searching for a split in each node. Splits are also\nignored if they would result in any single class carrying a\nnegative weight in either child node." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the pruning path during Minimal Cost-Complexity Pruning.\n\nSee :ref:`minimal_cost_complexity_pruning` for details on the pruning\nprocess.", + "docstring": "Compute the pruning path during Minimal Cost-Complexity Pruning.\n\nSee :ref:`minimal_cost_complexity_pruning` for details on the pruning\nprocess.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csc_matrix``.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The target values (class labels) as integers or strings.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. Splits are also\n ignored if they would result in any single class carrying a\n negative weight in either child node.\n\nReturns\n-------\nccp_path : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n ccp_alphas : ndarray\n Effective alphas of subtree during pruning.\n\n impurities : ndarray\n Sum of the impurities of the subtree leaves for the\n corresponding alpha value in ``ccp_alphas``.", + "code": " def cost_complexity_pruning_path(self, X, y, sample_weight=None):\n \"\"\"Compute the pruning path during Minimal Cost-Complexity Pruning.\n\n See :ref:`minimal_cost_complexity_pruning` for details on the pruning\n process.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csc_matrix``.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The target values (class labels) as integers or strings.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. Splits are also\n ignored if they would result in any single class carrying a\n negative weight in either child node.\n\n Returns\n -------\n ccp_path : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n ccp_alphas : ndarray\n Effective alphas of subtree during pruning.\n\n impurities : ndarray\n Sum of the impurities of the subtree leaves for the\n corresponding alpha value in ``ccp_alphas``.\n \"\"\"\n est = clone(self).set_params(ccp_alpha=0.0)\n est.fit(X, y, sample_weight=sample_weight)\n return Bunch(**ccp_pruning_path(est.tree_))" + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/decision_path", + "name": "decision_path", + "qname": "sklearn.tree._classes.BaseDecisionTree.decision_path", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/decision_path/self", + "name": "self", + "qname": "sklearn.tree._classes.BaseDecisionTree.decision_path.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/decision_path/X", + "name": "X", + "qname": "sklearn.tree._classes.BaseDecisionTree.decision_path.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, it will be converted to\n``dtype=np.float32`` and if a sparse matrix is provided\nto a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/decision_path/check_input", + "name": "check_input", + "qname": "sklearn.tree._classes.BaseDecisionTree.decision_path.check_input", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Allow to bypass several input checking.\nDon't use this parameter unless you know what you do." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the decision path in the tree.\n\n.. versionadded:: 0.18", + "docstring": "Return the decision path in the tree.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\ncheck_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\nReturns\n-------\nindicator : sparse matrix of shape (n_samples, n_nodes)\n Return a node indicator CSR matrix where non zero elements\n indicates that the samples goes through the nodes.", + "code": " def decision_path(self, X, check_input=True):\n \"\"\"Return the decision path in the tree.\n\n .. versionadded:: 0.18\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n check_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\n Returns\n -------\n indicator : sparse matrix of shape (n_samples, n_nodes)\n Return a node indicator CSR matrix where non zero elements\n indicates that the samples goes through the nodes.\n \"\"\"\n X = self._validate_X_predict(X, check_input)\n return self.tree_.decision_path(X)" + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/feature_importances_@getter", + "name": "feature_importances_", + "qname": "sklearn.tree._classes.BaseDecisionTree.feature_importances_", + "decorators": ["property"], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/feature_importances_/self", + "name": "self", + "qname": "sklearn.tree._classes.BaseDecisionTree.feature_importances_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the feature importances.\n\nThe importance of a feature is computed as the (normalized) total\nreduction of the criterion brought by that feature.\nIt is also known as the Gini importance.\n\nWarning: impurity-based feature importances can be misleading for\nhigh cardinality features (many unique values). See\n:func:`sklearn.inspection.permutation_importance` as an alternative.", + "docstring": "Return the feature importances.\n\nThe importance of a feature is computed as the (normalized) total\nreduction of the criterion brought by that feature.\nIt is also known as the Gini importance.\n\nWarning: impurity-based feature importances can be misleading for\nhigh cardinality features (many unique values). See\n:func:`sklearn.inspection.permutation_importance` as an alternative.\n\nReturns\n-------\nfeature_importances_ : ndarray of shape (n_features,)\n Normalized total reduction of criteria by feature\n (Gini importance).", + "code": " @property\n def feature_importances_(self):\n \"\"\"Return the feature importances.\n\n The importance of a feature is computed as the (normalized) total\n reduction of the criterion brought by that feature.\n It is also known as the Gini importance.\n\n Warning: impurity-based feature importances can be misleading for\n high cardinality features (many unique values). See\n :func:`sklearn.inspection.permutation_importance` as an alternative.\n\n Returns\n -------\n feature_importances_ : ndarray of shape (n_features,)\n Normalized total reduction of criteria by feature\n (Gini importance).\n \"\"\"\n check_is_fitted(self)\n\n return self.tree_.compute_feature_importances()" + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/fit", + "name": "fit", + "qname": "sklearn.tree._classes.BaseDecisionTree.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/fit/self", + "name": "self", + "qname": "sklearn.tree._classes.BaseDecisionTree.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/fit/X", + "name": "X", + "qname": "sklearn.tree._classes.BaseDecisionTree.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/fit/y", + "name": "y", + "qname": "sklearn.tree._classes.BaseDecisionTree.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.tree._classes.BaseDecisionTree.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/fit/check_input", + "name": "check_input", + "qname": "sklearn.tree._classes.BaseDecisionTree.fit.check_input", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/fit/X_idx_sorted", + "name": "X_idx_sorted", + "qname": "sklearn.tree._classes.BaseDecisionTree.fit.X_idx_sorted", + "default_value": "'deprecated'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def fit(self, X, y, sample_weight=None, check_input=True,\n X_idx_sorted=\"deprecated\"):\n\n random_state = check_random_state(self.random_state)\n\n if self.ccp_alpha < 0.0:\n raise ValueError(\"ccp_alpha must be greater than or equal to 0\")\n\n if check_input:\n # Need to validate separately here.\n # We can't pass multi_ouput=True because that would allow y to be\n # csr.\n check_X_params = dict(dtype=DTYPE, accept_sparse=\"csc\")\n check_y_params = dict(ensure_2d=False, dtype=None)\n X, y = self._validate_data(X, y,\n validate_separately=(check_X_params,\n check_y_params))\n if issparse(X):\n X.sort_indices()\n\n if X.indices.dtype != np.intc or X.indptr.dtype != np.intc:\n raise ValueError(\"No support for np.int64 index based \"\n \"sparse matrices\")\n\n if self.criterion == \"poisson\":\n if np.any(y < 0):\n raise ValueError(\"Some value(s) of y are negative which is\"\n \" not allowed for Poisson regression.\")\n if np.sum(y) <= 0:\n raise ValueError(\"Sum of y is not positive which is \"\n \"necessary for Poisson regression.\")\n\n # Determine output settings\n n_samples, self.n_features_ = X.shape\n self.n_features_in_ = self.n_features_\n is_classification = is_classifier(self)\n\n y = np.atleast_1d(y)\n expanded_class_weight = None\n\n if y.ndim == 1:\n # reshape is necessary to preserve the data contiguity against vs\n # [:, np.newaxis] that does not.\n y = np.reshape(y, (-1, 1))\n\n self.n_outputs_ = y.shape[1]\n\n if is_classification:\n check_classification_targets(y)\n y = np.copy(y)\n\n self.classes_ = []\n self.n_classes_ = []\n\n if self.class_weight is not None:\n y_original = np.copy(y)\n\n y_encoded = np.zeros(y.shape, dtype=int)\n for k in range(self.n_outputs_):\n classes_k, y_encoded[:, k] = np.unique(y[:, k],\n return_inverse=True)\n self.classes_.append(classes_k)\n self.n_classes_.append(classes_k.shape[0])\n y = y_encoded\n\n if self.class_weight is not None:\n expanded_class_weight = compute_sample_weight(\n self.class_weight, y_original)\n\n self.n_classes_ = np.array(self.n_classes_, dtype=np.intp)\n\n if getattr(y, \"dtype\", None) != DOUBLE or not y.flags.contiguous:\n y = np.ascontiguousarray(y, dtype=DOUBLE)\n\n # Check parameters\n max_depth = (np.iinfo(np.int32).max if self.max_depth is None\n else self.max_depth)\n max_leaf_nodes = (-1 if self.max_leaf_nodes is None\n else self.max_leaf_nodes)\n\n if isinstance(self.min_samples_leaf, numbers.Integral):\n if not 1 <= self.min_samples_leaf:\n raise ValueError(\"min_samples_leaf must be at least 1 \"\n \"or in (0, 0.5], got %s\"\n % self.min_samples_leaf)\n min_samples_leaf = self.min_samples_leaf\n else: # float\n if not 0. < self.min_samples_leaf <= 0.5:\n raise ValueError(\"min_samples_leaf must be at least 1 \"\n \"or in (0, 0.5], got %s\"\n % self.min_samples_leaf)\n min_samples_leaf = int(ceil(self.min_samples_leaf * n_samples))\n\n if isinstance(self.min_samples_split, numbers.Integral):\n if not 2 <= self.min_samples_split:\n raise ValueError(\"min_samples_split must be an integer \"\n \"greater than 1 or a float in (0.0, 1.0]; \"\n \"got the integer %s\"\n % self.min_samples_split)\n min_samples_split = self.min_samples_split\n else: # float\n if not 0. < self.min_samples_split <= 1.:\n raise ValueError(\"min_samples_split must be an integer \"\n \"greater than 1 or a float in (0.0, 1.0]; \"\n \"got the float %s\"\n % self.min_samples_split)\n min_samples_split = int(ceil(self.min_samples_split * n_samples))\n min_samples_split = max(2, min_samples_split)\n\n min_samples_split = max(min_samples_split, 2 * min_samples_leaf)\n\n if isinstance(self.max_features, str):\n if self.max_features == \"auto\":\n if is_classification:\n max_features = max(1, int(np.sqrt(self.n_features_)))\n else:\n max_features = self.n_features_\n elif self.max_features == \"sqrt\":\n max_features = max(1, int(np.sqrt(self.n_features_)))\n elif self.max_features == \"log2\":\n max_features = max(1, int(np.log2(self.n_features_)))\n else:\n raise ValueError(\"Invalid value for max_features. \"\n \"Allowed string values are 'auto', \"\n \"'sqrt' or 'log2'.\")\n elif self.max_features is None:\n max_features = self.n_features_\n elif isinstance(self.max_features, numbers.Integral):\n max_features = self.max_features\n else: # float\n if self.max_features > 0.0:\n max_features = max(1,\n int(self.max_features * self.n_features_))\n else:\n max_features = 0\n\n self.max_features_ = max_features\n\n if len(y) != n_samples:\n raise ValueError(\"Number of labels=%d does not match \"\n \"number of samples=%d\" % (len(y), n_samples))\n if not 0 <= self.min_weight_fraction_leaf <= 0.5:\n raise ValueError(\"min_weight_fraction_leaf must in [0, 0.5]\")\n if max_depth <= 0:\n raise ValueError(\"max_depth must be greater than zero. \")\n if not (0 < max_features <= self.n_features_):\n raise ValueError(\"max_features must be in (0, n_features]\")\n if not isinstance(max_leaf_nodes, numbers.Integral):\n raise ValueError(\"max_leaf_nodes must be integral number but was \"\n \"%r\" % max_leaf_nodes)\n if -1 < max_leaf_nodes < 2:\n raise ValueError((\"max_leaf_nodes {0} must be either None \"\n \"or larger than 1\").format(max_leaf_nodes))\n\n if sample_weight is not None:\n sample_weight = _check_sample_weight(sample_weight, X, DOUBLE)\n\n if expanded_class_weight is not None:\n if sample_weight is not None:\n sample_weight = sample_weight * expanded_class_weight\n else:\n sample_weight = expanded_class_weight\n\n # Set min_weight_leaf from min_weight_fraction_leaf\n if sample_weight is None:\n min_weight_leaf = (self.min_weight_fraction_leaf *\n n_samples)\n else:\n min_weight_leaf = (self.min_weight_fraction_leaf *\n np.sum(sample_weight))\n\n min_impurity_split = self.min_impurity_split\n if min_impurity_split is not None:\n warnings.warn(\n \"The min_impurity_split parameter is deprecated. Its default \"\n \"value has changed from 1e-7 to 0 in version 0.23, and it \"\n \"will be removed in 1.0 (renaming of 0.25). Use the \"\n \"min_impurity_decrease parameter instead.\",\n FutureWarning\n )\n\n if min_impurity_split < 0.:\n raise ValueError(\"min_impurity_split must be greater than \"\n \"or equal to 0\")\n else:\n min_impurity_split = 0\n\n if self.min_impurity_decrease < 0.:\n raise ValueError(\"min_impurity_decrease must be greater than \"\n \"or equal to 0\")\n\n # TODO: Remove in 1.1\n if X_idx_sorted != \"deprecated\":\n warnings.warn(\n \"The parameter 'X_idx_sorted' is deprecated and has no \"\n \"effect. It will be removed in 1.1 (renaming of 0.26). You \"\n \"can suppress this warning by not passing any value to the \"\n \"'X_idx_sorted' parameter.\",\n FutureWarning\n )\n\n # Build tree\n criterion = self.criterion\n if not isinstance(criterion, Criterion):\n if is_classification:\n criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,\n self.n_classes_)\n else:\n criterion = CRITERIA_REG[self.criterion](self.n_outputs_,\n n_samples)\n else:\n # Make a deepcopy in case the criterion has mutable attributes that\n # might be shared and modified concurrently during parallel fitting\n criterion = copy.deepcopy(criterion)\n\n SPLITTERS = SPARSE_SPLITTERS if issparse(X) else DENSE_SPLITTERS\n\n splitter = self.splitter\n if not isinstance(self.splitter, Splitter):\n splitter = SPLITTERS[self.splitter](criterion,\n self.max_features_,\n min_samples_leaf,\n min_weight_leaf,\n random_state)\n\n if is_classifier(self):\n self.tree_ = Tree(self.n_features_,\n self.n_classes_, self.n_outputs_)\n else:\n self.tree_ = Tree(self.n_features_,\n # TODO: tree should't need this in this case\n np.array([1] * self.n_outputs_, dtype=np.intp),\n self.n_outputs_)\n\n # Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise\n if max_leaf_nodes < 0:\n builder = DepthFirstTreeBuilder(splitter, min_samples_split,\n min_samples_leaf,\n min_weight_leaf,\n max_depth,\n self.min_impurity_decrease,\n min_impurity_split)\n else:\n builder = BestFirstTreeBuilder(splitter, min_samples_split,\n min_samples_leaf,\n min_weight_leaf,\n max_depth,\n max_leaf_nodes,\n self.min_impurity_decrease,\n min_impurity_split)\n\n builder.build(self.tree_, X, y, sample_weight)\n\n if self.n_outputs_ == 1 and is_classifier(self):\n self.n_classes_ = self.n_classes_[0]\n self.classes_ = self.classes_[0]\n\n self._prune_tree()\n\n return self" + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/get_depth", + "name": "get_depth", + "qname": "sklearn.tree._classes.BaseDecisionTree.get_depth", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/get_depth/self", + "name": "self", + "qname": "sklearn.tree._classes.BaseDecisionTree.get_depth.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the depth of the decision tree.\n\nThe depth of a tree is the maximum distance between the root\nand any leaf.", + "docstring": "Return the depth of the decision tree.\n\nThe depth of a tree is the maximum distance between the root\nand any leaf.\n\nReturns\n-------\nself.tree_.max_depth : int\n The maximum depth of the tree.", + "code": " def get_depth(self):\n \"\"\"Return the depth of the decision tree.\n\n The depth of a tree is the maximum distance between the root\n and any leaf.\n\n Returns\n -------\n self.tree_.max_depth : int\n The maximum depth of the tree.\n \"\"\"\n check_is_fitted(self)\n return self.tree_.max_depth" + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/get_n_leaves", + "name": "get_n_leaves", + "qname": "sklearn.tree._classes.BaseDecisionTree.get_n_leaves", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/get_n_leaves/self", + "name": "self", + "qname": "sklearn.tree._classes.BaseDecisionTree.get_n_leaves.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the number of leaves of the decision tree.", + "docstring": "Return the number of leaves of the decision tree.\n\nReturns\n-------\nself.tree_.n_leaves : int\n Number of leaves.", + "code": " def get_n_leaves(self):\n \"\"\"Return the number of leaves of the decision tree.\n\n Returns\n -------\n self.tree_.n_leaves : int\n Number of leaves.\n \"\"\"\n check_is_fitted(self)\n return self.tree_.n_leaves" + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/predict", + "name": "predict", + "qname": "sklearn.tree._classes.BaseDecisionTree.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/predict/self", + "name": "self", + "qname": "sklearn.tree._classes.BaseDecisionTree.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/predict/X", + "name": "X", + "qname": "sklearn.tree._classes.BaseDecisionTree.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, it will be converted to\n``dtype=np.float32`` and if a sparse matrix is provided\nto a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/BaseDecisionTree/predict/check_input", + "name": "check_input", + "qname": "sklearn.tree._classes.BaseDecisionTree.predict.check_input", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Allow to bypass several input checking.\nDon't use this parameter unless you know what you do." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class or regression value for X.\n\nFor a classification model, the predicted class for each sample in X is\nreturned. For a regression model, the predicted value based on X is\nreturned.", + "docstring": "Predict class or regression value for X.\n\nFor a classification model, the predicted class for each sample in X is\nreturned. For a regression model, the predicted value based on X is\nreturned.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\ncheck_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\nReturns\n-------\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The predicted classes, or the predict values.", + "code": " def predict(self, X, check_input=True):\n \"\"\"Predict class or regression value for X.\n\n For a classification model, the predicted class for each sample in X is\n returned. For a regression model, the predicted value based on X is\n returned.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n check_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\n Returns\n -------\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The predicted classes, or the predict values.\n \"\"\"\n check_is_fitted(self)\n X = self._validate_X_predict(X, check_input)\n proba = self.tree_.predict(X)\n n_samples = X.shape[0]\n\n # Classification\n if is_classifier(self):\n if self.n_outputs_ == 1:\n return self.classes_.take(np.argmax(proba, axis=1), axis=0)\n\n else:\n class_type = self.classes_[0].dtype\n predictions = np.zeros((n_samples, self.n_outputs_),\n dtype=class_type)\n for k in range(self.n_outputs_):\n predictions[:, k] = self.classes_[k].take(\n np.argmax(proba[:, k], axis=1),\n axis=0)\n\n return predictions\n\n # Regression\n else:\n if self.n_outputs_ == 1:\n return proba[:, 0]\n\n else:\n return proba[:, :, 0]" + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/__init__", + "name": "__init__", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/__init__/self", + "name": "self", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/__init__/criterion", + "name": "criterion", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.__init__.criterion", + "default_value": "'gini'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{\"gini\", \"entropy\"}", + "default_value": "\"gini\"", + "description": "The function to measure the quality of a split. Supported criteria are\n\"gini\" for the Gini impurity and \"entropy\" for the information gain." + }, + "type": { + "kind": "EnumType", + "values": ["entropy", "gini"] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/__init__/splitter", + "name": "splitter", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.__init__.splitter", + "default_value": "'best'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{\"best\", \"random\"}", + "default_value": "\"best\"", + "description": "The strategy used to choose the split at each node. Supported\nstrategies are \"best\" to choose the best split and \"random\" to choose\nthe best random split." + }, + "type": { + "kind": "EnumType", + "values": ["best", "random"] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/__init__/max_depth", + "name": "max_depth", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.__init__.max_depth", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The maximum depth of the tree. If None, then nodes are expanded until\nall leaves are pure or until all leaves contain less than\nmin_samples_split samples." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/__init__/min_samples_split", + "name": "min_samples_split", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.__init__.min_samples_split", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "2", + "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n.. versionchanged:: 0.18\n Added float values for fractions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/__init__/min_samples_leaf", + "name": "min_samples_leaf", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.__init__.min_samples_leaf", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "1", + "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches. This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n.. versionchanged:: 0.18\n Added float values for fractions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/__init__/min_weight_fraction_leaf", + "name": "min_weight_fraction_leaf", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.__init__.min_weight_fraction_leaf", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "The minimum weighted fraction of the sum total of weights (of all\nthe input samples) required to be at a leaf node. Samples have\nequal weight when sample_weight is not provided." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/__init__/max_features", + "name": "max_features", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.__init__.max_features", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, float or {\"auto\", \"sqrt\", \"log2\"}", + "default_value": "None", + "description": "The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=sqrt(n_features)`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["auto", "log2", "sqrt"] + }, + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/__init__/random_state", + "name": "random_state", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the randomness of the estimator. The features are always\nrandomly permuted at each split, even if ``splitter`` is set to\n``\"best\"``. When ``max_features < n_features``, the algorithm will\nselect ``max_features`` at random at each split before finding the best\nsplit among them. But the best found split may vary across different\nruns, even if ``max_features=n_features``. That is the case, if the\nimprovement of the criterion is identical for several splits and one\nsplit has to be selected at random. To obtain a deterministic behaviour\nduring fitting, ``random_state`` has to be fixed to an integer.\nSee :term:`Glossary ` for details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/__init__/max_leaf_nodes", + "name": "max_leaf_nodes", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.__init__.max_leaf_nodes", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Grow a tree with ``max_leaf_nodes`` in best-first fashion.\nBest nodes are defined as relative reduction in impurity.\nIf None then unlimited number of leaf nodes." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/__init__/min_impurity_decrease", + "name": "min_impurity_decrease", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.__init__.min_impurity_decrease", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/__init__/min_impurity_split", + "name": "min_impurity_split", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.__init__.min_impurity_split", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0", + "description": "Threshold for early stopping in tree growth. A node will split\nif its impurity is above the threshold, otherwise it is a leaf.\n\n.. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/__init__/class_weight", + "name": "class_weight", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.__init__.class_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict, list of dict or \"balanced\"", + "default_value": "None", + "description": "Weights associated with classes in the form ``{class_label: weight}``.\nIf None, all classes are supposed to have weight one. For\nmulti-output problems, a list of dicts can be provided in the same\norder as the columns of y.\n\nNote that for multioutput (including multilabel) weights should be\ndefined for each class of every column in its own dict. For example,\nfor four-class multilabel classification weights should be\n[{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n[{1:1}, {2:5}, {3:1}, {4:1}].\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``\n\nFor multi-output, the weights of each column of y will be multiplied.\n\nNote that these weights will be multiplied with sample_weight (passed\nthrough the fit method) if sample_weight is specified." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "dict" + }, + { + "kind": "NamedType", + "name": "list of dict" + }, + { + "kind": "NamedType", + "name": "\"balanced\"" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/__init__/ccp_alpha", + "name": "ccp_alpha", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.__init__.ccp_alpha", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "non-negative float", + "default_value": "0.0", + "description": "Complexity parameter used for Minimal Cost-Complexity Pruning. The\nsubtree with the largest cost complexity that is smaller than\n``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n:ref:`minimal_cost_complexity_pruning` for details.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "non-negative float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "A decision tree classifier.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *,\n criterion=\"gini\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.,\n max_features=None,\n random_state=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.,\n min_impurity_split=None,\n class_weight=None,\n ccp_alpha=0.0):\n super().__init__(\n criterion=criterion,\n splitter=splitter,\n max_depth=max_depth,\n min_samples_split=min_samples_split,\n min_samples_leaf=min_samples_leaf,\n min_weight_fraction_leaf=min_weight_fraction_leaf,\n max_features=max_features,\n max_leaf_nodes=max_leaf_nodes,\n class_weight=class_weight,\n random_state=random_state,\n min_impurity_decrease=min_impurity_decrease,\n min_impurity_split=min_impurity_split,\n ccp_alpha=ccp_alpha)" + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/fit", + "name": "fit", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/fit/self", + "name": "self", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/fit/X", + "name": "X", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples. Internally, it will be converted to\n``dtype=np.float32`` and if a sparse matrix is provided\nto a sparse ``csc_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/fit/y", + "name": "y", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "The target values (class labels) as integers or strings." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, then samples are equally weighted. Splits\nthat would create child nodes with net zero or negative weight are\nignored while searching for a split in each node. Splits are also\nignored if they would result in any single class carrying a\nnegative weight in either child node." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/fit/check_input", + "name": "check_input", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.fit.check_input", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Allow to bypass several input checking.\nDon't use this parameter unless you know what you do." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/fit/X_idx_sorted", + "name": "X_idx_sorted", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.fit.X_idx_sorted", + "default_value": "'deprecated'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "deprecated", + "default_value": "\"deprecated\"", + "description": "This parameter is deprecated and has no effect.\nIt will be removed in 1.1 (renaming of 0.26).\n\n.. deprecated :: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "deprecated" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Build a decision tree classifier from the training set (X, y).", + "docstring": "Build a decision tree classifier from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csc_matrix``.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The target values (class labels) as integers or strings.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. Splits are also\n ignored if they would result in any single class carrying a\n negative weight in either child node.\n\ncheck_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\nX_idx_sorted : deprecated, default=\"deprecated\"\n This parameter is deprecated and has no effect.\n It will be removed in 1.1 (renaming of 0.26).\n\n .. deprecated :: 0.24\n\nReturns\n-------\nself : DecisionTreeClassifier\n Fitted estimator.", + "code": " def fit(self, X, y, sample_weight=None, check_input=True,\n X_idx_sorted=\"deprecated\"):\n \"\"\"Build a decision tree classifier from the training set (X, y).\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csc_matrix``.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The target values (class labels) as integers or strings.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node. Splits are also\n ignored if they would result in any single class carrying a\n negative weight in either child node.\n\n check_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\n X_idx_sorted : deprecated, default=\"deprecated\"\n This parameter is deprecated and has no effect.\n It will be removed in 1.1 (renaming of 0.26).\n\n .. deprecated :: 0.24\n\n Returns\n -------\n self : DecisionTreeClassifier\n Fitted estimator.\n \"\"\"\n\n super().fit(\n X, y,\n sample_weight=sample_weight,\n check_input=check_input,\n X_idx_sorted=X_idx_sorted)\n return self" + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/predict_log_proba", + "name": "predict_log_proba", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.predict_log_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/predict_log_proba/self", + "name": "self", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.predict_log_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/predict_log_proba/X", + "name": "X", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.predict_log_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, it will be converted to\n``dtype=np.float32`` and if a sparse matrix is provided\nto a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class log-probabilities of the input samples X.", + "docstring": "Predict class log-probabilities of the input samples X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\nReturns\n-------\nproba : ndarray of shape (n_samples, n_classes) or list of n_outputs such arrays if n_outputs > 1\n The class log-probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.", + "code": " def predict_log_proba(self, X):\n \"\"\"Predict class log-probabilities of the input samples X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n Returns\n -------\n proba : ndarray of shape (n_samples, n_classes) or list of n_outputs \\\n such arrays if n_outputs > 1\n The class log-probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n \"\"\"\n proba = self.predict_proba(X)\n\n if self.n_outputs_ == 1:\n return np.log(proba)\n\n else:\n for k in range(self.n_outputs_):\n proba[k] = np.log(proba[k])\n\n return proba" + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/predict_proba", + "name": "predict_proba", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/predict_proba/self", + "name": "self", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/predict_proba/X", + "name": "X", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The input samples. Internally, it will be converted to\n``dtype=np.float32`` and if a sparse matrix is provided\nto a sparse ``csr_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeClassifier/predict_proba/check_input", + "name": "check_input", + "qname": "sklearn.tree._classes.DecisionTreeClassifier.predict_proba.check_input", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Allow to bypass several input checking.\nDon't use this parameter unless you know what you do." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict class probabilities of the input samples X.\n\nThe predicted class probability is the fraction of samples of the same\nclass in a leaf.", + "docstring": "Predict class probabilities of the input samples X.\n\nThe predicted class probability is the fraction of samples of the same\nclass in a leaf.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\ncheck_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\nReturns\n-------\nproba : ndarray of shape (n_samples, n_classes) or list of n_outputs such arrays if n_outputs > 1\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.", + "code": " def predict_proba(self, X, check_input=True):\n \"\"\"Predict class probabilities of the input samples X.\n\n The predicted class probability is the fraction of samples of the same\n class in a leaf.\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csr_matrix``.\n\n check_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\n Returns\n -------\n proba : ndarray of shape (n_samples, n_classes) or list of n_outputs \\\n such arrays if n_outputs > 1\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n \"\"\"\n check_is_fitted(self)\n X = self._validate_X_predict(X, check_input)\n proba = self.tree_.predict(X)\n\n if self.n_outputs_ == 1:\n proba = proba[:, :self.n_classes_]\n normalizer = proba.sum(axis=1)[:, np.newaxis]\n normalizer[normalizer == 0.0] = 1.0\n proba /= normalizer\n\n return proba\n\n else:\n all_proba = []\n\n for k in range(self.n_outputs_):\n proba_k = proba[:, k, :self.n_classes_[k]]\n normalizer = proba_k.sum(axis=1)[:, np.newaxis]\n normalizer[normalizer == 0.0] = 1.0\n proba_k /= normalizer\n all_proba.append(proba_k)\n\n return all_proba" + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/__init__", + "name": "__init__", + "qname": "sklearn.tree._classes.DecisionTreeRegressor.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/__init__/self", + "name": "self", + "qname": "sklearn.tree._classes.DecisionTreeRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/__init__/criterion", + "name": "criterion", + "qname": "sklearn.tree._classes.DecisionTreeRegressor.__init__.criterion", + "default_value": "'mse'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{\"mse\", \"friedman_mse\", \"mae\", \"poisson\"}", + "default_value": "\"mse\"", + "description": "The function to measure the quality of a split. Supported criteria\nare \"mse\" for the mean squared error, which is equal to variance\nreduction as feature selection criterion and minimizes the L2 loss\nusing the mean of each terminal node, \"friedman_mse\", which uses mean\nsquared error with Friedman's improvement score for potential splits,\n\"mae\" for the mean absolute error, which minimizes the L1 loss using\nthe median of each terminal node, and \"poisson\" which uses reduction in\nPoisson deviance to find splits.\n\n.. versionadded:: 0.18\n Mean Absolute Error (MAE) criterion.\n\n.. versionadded:: 0.24\n Poisson deviance criterion." + }, + "type": { + "kind": "EnumType", + "values": ["mse", "friedman_mse", "mae", "poisson"] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/__init__/splitter", + "name": "splitter", + "qname": "sklearn.tree._classes.DecisionTreeRegressor.__init__.splitter", + "default_value": "'best'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{\"best\", \"random\"}", + "default_value": "\"best\"", + "description": "The strategy used to choose the split at each node. Supported\nstrategies are \"best\" to choose the best split and \"random\" to choose\nthe best random split." + }, + "type": { + "kind": "EnumType", + "values": ["best", "random"] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/__init__/max_depth", + "name": "max_depth", + "qname": "sklearn.tree._classes.DecisionTreeRegressor.__init__.max_depth", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The maximum depth of the tree. If None, then nodes are expanded until\nall leaves are pure or until all leaves contain less than\nmin_samples_split samples." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/__init__/min_samples_split", + "name": "min_samples_split", + "qname": "sklearn.tree._classes.DecisionTreeRegressor.__init__.min_samples_split", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "2", + "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n.. versionchanged:: 0.18\n Added float values for fractions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/__init__/min_samples_leaf", + "name": "min_samples_leaf", + "qname": "sklearn.tree._classes.DecisionTreeRegressor.__init__.min_samples_leaf", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "1", + "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches. This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n.. versionchanged:: 0.18\n Added float values for fractions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/__init__/min_weight_fraction_leaf", + "name": "min_weight_fraction_leaf", + "qname": "sklearn.tree._classes.DecisionTreeRegressor.__init__.min_weight_fraction_leaf", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "The minimum weighted fraction of the sum total of weights (of all\nthe input samples) required to be at a leaf node. Samples have\nequal weight when sample_weight is not provided." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/__init__/max_features", + "name": "max_features", + "qname": "sklearn.tree._classes.DecisionTreeRegressor.__init__.max_features", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, float or {\"auto\", \"sqrt\", \"log2\"}", + "default_value": "None", + "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n- If \"auto\", then `max_features=n_features`.\n- If \"sqrt\", then `max_features=sqrt(n_features)`.\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["auto", "log2", "sqrt"] + }, + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/__init__/random_state", + "name": "random_state", + "qname": "sklearn.tree._classes.DecisionTreeRegressor.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the randomness of the estimator. The features are always\nrandomly permuted at each split, even if ``splitter`` is set to\n``\"best\"``. When ``max_features < n_features``, the algorithm will\nselect ``max_features`` at random at each split before finding the best\nsplit among them. But the best found split may vary across different\nruns, even if ``max_features=n_features``. That is the case, if the\nimprovement of the criterion is identical for several splits and one\nsplit has to be selected at random. To obtain a deterministic behaviour\nduring fitting, ``random_state`` has to be fixed to an integer.\nSee :term:`Glossary ` for details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/__init__/max_leaf_nodes", + "name": "max_leaf_nodes", + "qname": "sklearn.tree._classes.DecisionTreeRegressor.__init__.max_leaf_nodes", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Grow a tree with ``max_leaf_nodes`` in best-first fashion.\nBest nodes are defined as relative reduction in impurity.\nIf None then unlimited number of leaf nodes." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/__init__/min_impurity_decrease", + "name": "min_impurity_decrease", + "qname": "sklearn.tree._classes.DecisionTreeRegressor.__init__.min_impurity_decrease", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/__init__/min_impurity_split", + "name": "min_impurity_split", + "qname": "sklearn.tree._classes.DecisionTreeRegressor.__init__.min_impurity_split", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0", + "description": "Threshold for early stopping in tree growth. A node will split\nif its impurity is above the threshold, otherwise it is a leaf.\n\n.. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/__init__/ccp_alpha", + "name": "ccp_alpha", + "qname": "sklearn.tree._classes.DecisionTreeRegressor.__init__.ccp_alpha", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "non-negative float", + "default_value": "0.0", + "description": "Complexity parameter used for Minimal Cost-Complexity Pruning. The\nsubtree with the largest cost complexity that is smaller than\n``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n:ref:`minimal_cost_complexity_pruning` for details.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "non-negative float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "A decision tree regressor.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *,\n criterion=\"mse\",\n splitter=\"best\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.,\n max_features=None,\n random_state=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.,\n min_impurity_split=None,\n ccp_alpha=0.0):\n super().__init__(\n criterion=criterion,\n splitter=splitter,\n max_depth=max_depth,\n min_samples_split=min_samples_split,\n min_samples_leaf=min_samples_leaf,\n min_weight_fraction_leaf=min_weight_fraction_leaf,\n max_features=max_features,\n max_leaf_nodes=max_leaf_nodes,\n random_state=random_state,\n min_impurity_decrease=min_impurity_decrease,\n min_impurity_split=min_impurity_split,\n ccp_alpha=ccp_alpha)" + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/_compute_partial_dependence_recursion", + "name": "_compute_partial_dependence_recursion", + "qname": "sklearn.tree._classes.DecisionTreeRegressor._compute_partial_dependence_recursion", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/_compute_partial_dependence_recursion/self", + "name": "self", + "qname": "sklearn.tree._classes.DecisionTreeRegressor._compute_partial_dependence_recursion.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/_compute_partial_dependence_recursion/grid", + "name": "grid", + "qname": "sklearn.tree._classes.DecisionTreeRegressor._compute_partial_dependence_recursion.grid", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_samples, n_target_features)", + "default_value": "", + "description": "The grid points on which the partial dependence should be\nevaluated." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples, n_target_features)" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/_compute_partial_dependence_recursion/target_features", + "name": "target_features", + "qname": "sklearn.tree._classes.DecisionTreeRegressor._compute_partial_dependence_recursion.target_features", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_target_features)", + "default_value": "", + "description": "The set of target features for which the partial dependence\nshould be evaluated." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_target_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fast partial dependence computation.", + "docstring": "Fast partial dependence computation.\n\nParameters\n----------\ngrid : ndarray of shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\ntarget_features : ndarray of shape (n_target_features)\n The set of target features for which the partial dependence\n should be evaluated.\n\nReturns\n-------\naveraged_predictions : ndarray of shape (n_samples,)\n The value of the partial dependence function on each grid point.", + "code": " def _compute_partial_dependence_recursion(self, grid, target_features):\n \"\"\"Fast partial dependence computation.\n\n Parameters\n ----------\n grid : ndarray of shape (n_samples, n_target_features)\n The grid points on which the partial dependence should be\n evaluated.\n target_features : ndarray of shape (n_target_features)\n The set of target features for which the partial dependence\n should be evaluated.\n\n Returns\n -------\n averaged_predictions : ndarray of shape (n_samples,)\n The value of the partial dependence function on each grid point.\n \"\"\"\n grid = np.asarray(grid, dtype=DTYPE, order='C')\n averaged_predictions = np.zeros(shape=grid.shape[0],\n dtype=np.float64, order='C')\n\n self.tree_.compute_partial_dependence(\n grid, target_features, averaged_predictions)\n return averaged_predictions" + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/fit", + "name": "fit", + "qname": "sklearn.tree._classes.DecisionTreeRegressor.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/fit/self", + "name": "self", + "qname": "sklearn.tree._classes.DecisionTreeRegressor.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/fit/X", + "name": "X", + "qname": "sklearn.tree._classes.DecisionTreeRegressor.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "The training input samples. Internally, it will be converted to\n``dtype=np.float32`` and if a sparse matrix is provided\nto a sparse ``csc_matrix``." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/fit/y", + "name": "y", + "qname": "sklearn.tree._classes.DecisionTreeRegressor.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "The target values (real numbers). Use ``dtype=np.float64`` and\n``order='C'`` for maximum efficiency." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/fit/sample_weight", + "name": "sample_weight", + "qname": "sklearn.tree._classes.DecisionTreeRegressor.fit.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights. If None, then samples are equally weighted. Splits\nthat would create child nodes with net zero or negative weight are\nignored while searching for a split in each node." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/fit/check_input", + "name": "check_input", + "qname": "sklearn.tree._classes.DecisionTreeRegressor.fit.check_input", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Allow to bypass several input checking.\nDon't use this parameter unless you know what you do." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/DecisionTreeRegressor/fit/X_idx_sorted", + "name": "X_idx_sorted", + "qname": "sklearn.tree._classes.DecisionTreeRegressor.fit.X_idx_sorted", + "default_value": "'deprecated'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "deprecated", + "default_value": "\"deprecated\"", + "description": "This parameter is deprecated and has no effect.\nIt will be removed in 1.1 (renaming of 0.26).\n\n.. deprecated :: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "deprecated" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Build a decision tree regressor from the training set (X, y).", + "docstring": "Build a decision tree regressor from the training set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csc_matrix``.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The target values (real numbers). Use ``dtype=np.float64`` and\n ``order='C'`` for maximum efficiency.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node.\n\ncheck_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\nX_idx_sorted : deprecated, default=\"deprecated\"\n This parameter is deprecated and has no effect.\n It will be removed in 1.1 (renaming of 0.26).\n\n .. deprecated :: 0.24\n\nReturns\n-------\nself : DecisionTreeRegressor\n Fitted estimator.", + "code": " def fit(self, X, y, sample_weight=None, check_input=True,\n X_idx_sorted=\"deprecated\"):\n \"\"\"Build a decision tree regressor from the training set (X, y).\n\n Parameters\n ----------\n X : {array-like, sparse matrix} of shape (n_samples, n_features)\n The training input samples. Internally, it will be converted to\n ``dtype=np.float32`` and if a sparse matrix is provided\n to a sparse ``csc_matrix``.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n The target values (real numbers). Use ``dtype=np.float64`` and\n ``order='C'`` for maximum efficiency.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted. Splits\n that would create child nodes with net zero or negative weight are\n ignored while searching for a split in each node.\n\n check_input : bool, default=True\n Allow to bypass several input checking.\n Don't use this parameter unless you know what you do.\n\n X_idx_sorted : deprecated, default=\"deprecated\"\n This parameter is deprecated and has no effect.\n It will be removed in 1.1 (renaming of 0.26).\n\n .. deprecated :: 0.24\n\n Returns\n -------\n self : DecisionTreeRegressor\n Fitted estimator.\n \"\"\"\n\n super().fit(\n X, y,\n sample_weight=sample_weight,\n check_input=check_input,\n X_idx_sorted=X_idx_sorted)\n return self" + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeClassifier/__init__", + "name": "__init__", + "qname": "sklearn.tree._classes.ExtraTreeClassifier.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeClassifier/__init__/self", + "name": "self", + "qname": "sklearn.tree._classes.ExtraTreeClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeClassifier/__init__/criterion", + "name": "criterion", + "qname": "sklearn.tree._classes.ExtraTreeClassifier.__init__.criterion", + "default_value": "'gini'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{\"gini\", \"entropy\"}", + "default_value": "\"gini\"", + "description": "The function to measure the quality of a split. Supported criteria are\n\"gini\" for the Gini impurity and \"entropy\" for the information gain." + }, + "type": { + "kind": "EnumType", + "values": ["entropy", "gini"] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeClassifier/__init__/splitter", + "name": "splitter", + "qname": "sklearn.tree._classes.ExtraTreeClassifier.__init__.splitter", + "default_value": "'random'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{\"random\", \"best\"}", + "default_value": "\"random\"", + "description": "The strategy used to choose the split at each node. Supported\nstrategies are \"best\" to choose the best split and \"random\" to choose\nthe best random split." + }, + "type": { + "kind": "EnumType", + "values": ["best", "random"] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeClassifier/__init__/max_depth", + "name": "max_depth", + "qname": "sklearn.tree._classes.ExtraTreeClassifier.__init__.max_depth", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The maximum depth of the tree. If None, then nodes are expanded until\nall leaves are pure or until all leaves contain less than\nmin_samples_split samples." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeClassifier/__init__/min_samples_split", + "name": "min_samples_split", + "qname": "sklearn.tree._classes.ExtraTreeClassifier.__init__.min_samples_split", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "2", + "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n.. versionchanged:: 0.18\n Added float values for fractions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeClassifier/__init__/min_samples_leaf", + "name": "min_samples_leaf", + "qname": "sklearn.tree._classes.ExtraTreeClassifier.__init__.min_samples_leaf", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "1", + "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches. This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n.. versionchanged:: 0.18\n Added float values for fractions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeClassifier/__init__/min_weight_fraction_leaf", + "name": "min_weight_fraction_leaf", + "qname": "sklearn.tree._classes.ExtraTreeClassifier.__init__.min_weight_fraction_leaf", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "The minimum weighted fraction of the sum total of weights (of all\nthe input samples) required to be at a leaf node. Samples have\nequal weight when sample_weight is not provided." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeClassifier/__init__/max_features", + "name": "max_features", + "qname": "sklearn.tree._classes.ExtraTreeClassifier.__init__.max_features", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, float, {\"auto\", \"sqrt\", \"log2\"} or None", + "default_value": "\"auto\"", + "description": "The number of features to consider when looking for the best split:\n\n - If int, then consider `max_features` features at each split.\n - If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n - If \"auto\", then `max_features=sqrt(n_features)`.\n - If \"sqrt\", then `max_features=sqrt(n_features)`.\n - If \"log2\", then `max_features=log2(n_features)`.\n - If None, then `max_features=n_features`.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["auto", "log2", "sqrt"] + }, + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeClassifier/__init__/random_state", + "name": "random_state", + "qname": "sklearn.tree._classes.ExtraTreeClassifier.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Used to pick randomly the `max_features` used at each split.\nSee :term:`Glossary ` for details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeClassifier/__init__/max_leaf_nodes", + "name": "max_leaf_nodes", + "qname": "sklearn.tree._classes.ExtraTreeClassifier.__init__.max_leaf_nodes", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Grow a tree with ``max_leaf_nodes`` in best-first fashion.\nBest nodes are defined as relative reduction in impurity.\nIf None then unlimited number of leaf nodes." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeClassifier/__init__/min_impurity_decrease", + "name": "min_impurity_decrease", + "qname": "sklearn.tree._classes.ExtraTreeClassifier.__init__.min_impurity_decrease", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeClassifier/__init__/min_impurity_split", + "name": "min_impurity_split", + "qname": "sklearn.tree._classes.ExtraTreeClassifier.__init__.min_impurity_split", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Threshold for early stopping in tree growth. A node will split\nif its impurity is above the threshold, otherwise it is a leaf.\n\n.. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeClassifier/__init__/class_weight", + "name": "class_weight", + "qname": "sklearn.tree._classes.ExtraTreeClassifier.__init__.class_weight", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict, list of dict or \"balanced\"", + "default_value": "None", + "description": "Weights associated with classes in the form ``{class_label: weight}``.\nIf None, all classes are supposed to have weight one. For\nmulti-output problems, a list of dicts can be provided in the same\norder as the columns of y.\n\nNote that for multioutput (including multilabel) weights should be\ndefined for each class of every column in its own dict. For example,\nfor four-class multilabel classification weights should be\n[{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n[{1:1}, {2:5}, {3:1}, {4:1}].\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``\n\nFor multi-output, the weights of each column of y will be multiplied.\n\nNote that these weights will be multiplied with sample_weight (passed\nthrough the fit method) if sample_weight is specified." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "dict" + }, + { + "kind": "NamedType", + "name": "list of dict" + }, + { + "kind": "NamedType", + "name": "\"balanced\"" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeClassifier/__init__/ccp_alpha", + "name": "ccp_alpha", + "qname": "sklearn.tree._classes.ExtraTreeClassifier.__init__.ccp_alpha", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "non-negative float", + "default_value": "0.0", + "description": "Complexity parameter used for Minimal Cost-Complexity Pruning. The\nsubtree with the largest cost complexity that is smaller than\n``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n:ref:`minimal_cost_complexity_pruning` for details.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "non-negative float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "An extremely randomized tree classifier.\n\nExtra-trees differ from classic decision trees in the way they are built.\nWhen looking for the best split to separate the samples of a node into two\ngroups, random splits are drawn for each of the `max_features` randomly\nselected features and the best split among those is chosen. When\n`max_features` is set 1, this amounts to building a totally random\ndecision tree.\n\nWarning: Extra-trees should only be used within ensemble methods.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *,\n criterion=\"gini\",\n splitter=\"random\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.,\n max_features=\"auto\",\n random_state=None,\n max_leaf_nodes=None,\n min_impurity_decrease=0.,\n min_impurity_split=None,\n class_weight=None,\n ccp_alpha=0.0):\n super().__init__(\n criterion=criterion,\n splitter=splitter,\n max_depth=max_depth,\n min_samples_split=min_samples_split,\n min_samples_leaf=min_samples_leaf,\n min_weight_fraction_leaf=min_weight_fraction_leaf,\n max_features=max_features,\n max_leaf_nodes=max_leaf_nodes,\n class_weight=class_weight,\n min_impurity_decrease=min_impurity_decrease,\n min_impurity_split=min_impurity_split,\n random_state=random_state,\n ccp_alpha=ccp_alpha)" + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeRegressor/__init__", + "name": "__init__", + "qname": "sklearn.tree._classes.ExtraTreeRegressor.__init__", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeRegressor/__init__/self", + "name": "self", + "qname": "sklearn.tree._classes.ExtraTreeRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeRegressor/__init__/criterion", + "name": "criterion", + "qname": "sklearn.tree._classes.ExtraTreeRegressor.__init__.criterion", + "default_value": "'mse'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{\"mse\", \"friedman_mse\", \"mae\"}", + "default_value": "\"mse\"", + "description": "The function to measure the quality of a split. Supported criteria\nare \"mse\" for the mean squared error, which is equal to variance\nreduction as feature selection criterion and \"mae\" for the mean\nabsolute error.\n\n.. versionadded:: 0.18\n Mean Absolute Error (MAE) criterion.\n\n.. versionadded:: 0.24\n Poisson deviance criterion." + }, + "type": { + "kind": "EnumType", + "values": ["mse", "friedman_mse", "mae"] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeRegressor/__init__/splitter", + "name": "splitter", + "qname": "sklearn.tree._classes.ExtraTreeRegressor.__init__.splitter", + "default_value": "'random'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{\"random\", \"best\"}", + "default_value": "\"random\"", + "description": "The strategy used to choose the split at each node. Supported\nstrategies are \"best\" to choose the best split and \"random\" to choose\nthe best random split." + }, + "type": { + "kind": "EnumType", + "values": ["best", "random"] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeRegressor/__init__/max_depth", + "name": "max_depth", + "qname": "sklearn.tree._classes.ExtraTreeRegressor.__init__.max_depth", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The maximum depth of the tree. If None, then nodes are expanded until\nall leaves are pure or until all leaves contain less than\nmin_samples_split samples." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeRegressor/__init__/min_samples_split", + "name": "min_samples_split", + "qname": "sklearn.tree._classes.ExtraTreeRegressor.__init__.min_samples_split", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "2", + "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n `ceil(min_samples_split * n_samples)` are the minimum\n number of samples for each split.\n\n.. versionchanged:: 0.18\n Added float values for fractions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeRegressor/__init__/min_samples_leaf", + "name": "min_samples_leaf", + "qname": "sklearn.tree._classes.ExtraTreeRegressor.__init__.min_samples_leaf", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int or float", + "default_value": "1", + "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches. This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n `ceil(min_samples_leaf * n_samples)` are the minimum\n number of samples for each node.\n\n.. versionchanged:: 0.18\n Added float values for fractions." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeRegressor/__init__/min_weight_fraction_leaf", + "name": "min_weight_fraction_leaf", + "qname": "sklearn.tree._classes.ExtraTreeRegressor.__init__.min_weight_fraction_leaf", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "The minimum weighted fraction of the sum total of weights (of all\nthe input samples) required to be at a leaf node. Samples have\nequal weight when sample_weight is not provided." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeRegressor/__init__/max_features", + "name": "max_features", + "qname": "sklearn.tree._classes.ExtraTreeRegressor.__init__.max_features", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, float, {\"auto\", \"sqrt\", \"log2\"} or None", + "default_value": "\"auto\"", + "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n `int(max_features * n_features)` features are considered at each\n split.\n- If \"auto\", then `max_features=n_features`.\n- If \"sqrt\", then `max_features=sqrt(n_features)`.\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["auto", "log2", "sqrt"] + }, + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeRegressor/__init__/random_state", + "name": "random_state", + "qname": "sklearn.tree._classes.ExtraTreeRegressor.__init__.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Used to pick randomly the `max_features` used at each split.\nSee :term:`Glossary ` for details." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeRegressor/__init__/min_impurity_decrease", + "name": "min_impurity_decrease", + "qname": "sklearn.tree._classes.ExtraTreeRegressor.__init__.min_impurity_decrease", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "0.0", + "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\n N_t / N * (impurity - N_t_R / N_t * right_impurity\n - N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19" + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeRegressor/__init__/min_impurity_split", + "name": "min_impurity_split", + "qname": "sklearn.tree._classes.ExtraTreeRegressor.__init__.min_impurity_split", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "None", + "description": "Threshold for early stopping in tree growth. A node will split\nif its impurity is above the threshold, otherwise it is a leaf.\n\n.. deprecated:: 0.19\n ``min_impurity_split`` has been deprecated in favor of\n ``min_impurity_decrease`` in 0.19. The default value of\n ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n will be removed in 1.0 (renaming of 0.25).\n Use ``min_impurity_decrease`` instead." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeRegressor/__init__/max_leaf_nodes", + "name": "max_leaf_nodes", + "qname": "sklearn.tree._classes.ExtraTreeRegressor.__init__.max_leaf_nodes", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Grow a tree with ``max_leaf_nodes`` in best-first fashion.\nBest nodes are defined as relative reduction in impurity.\nIf None then unlimited number of leaf nodes." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.tree._classes/ExtraTreeRegressor/__init__/ccp_alpha", + "name": "ccp_alpha", + "qname": "sklearn.tree._classes.ExtraTreeRegressor.__init__.ccp_alpha", + "default_value": "0.0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "non-negative float", + "default_value": "0.0", + "description": "Complexity parameter used for Minimal Cost-Complexity Pruning. The\nsubtree with the largest cost complexity that is smaller than\n``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n:ref:`minimal_cost_complexity_pruning` for details.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "non-negative float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "An extremely randomized tree regressor.\n\nExtra-trees differ from classic decision trees in the way they are built.\nWhen looking for the best split to separate the samples of a node into two\ngroups, random splits are drawn for each of the `max_features` randomly\nselected features and the best split among those is chosen. When\n`max_features` is set 1, this amounts to building a totally random\ndecision tree.\n\nWarning: Extra-trees should only be used within ensemble methods.\n\nRead more in the :ref:`User Guide `.", + "docstring": "", + "code": " @_deprecate_positional_args\n def __init__(self, *,\n criterion=\"mse\",\n splitter=\"random\",\n max_depth=None,\n min_samples_split=2,\n min_samples_leaf=1,\n min_weight_fraction_leaf=0.,\n max_features=\"auto\",\n random_state=None,\n min_impurity_decrease=0.,\n min_impurity_split=None,\n max_leaf_nodes=None,\n ccp_alpha=0.0):\n super().__init__(\n criterion=criterion,\n splitter=splitter,\n max_depth=max_depth,\n min_samples_split=min_samples_split,\n min_samples_leaf=min_samples_leaf,\n min_weight_fraction_leaf=min_weight_fraction_leaf,\n max_features=max_features,\n max_leaf_nodes=max_leaf_nodes,\n min_impurity_decrease=min_impurity_decrease,\n min_impurity_split=min_impurity_split,\n random_state=random_state,\n ccp_alpha=ccp_alpha)" + }, + { + "id": "scikit-learn/sklearn.tree._export/Sentinel/__repr__", + "name": "__repr__", + "qname": "sklearn.tree._export.Sentinel.__repr__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._export/Sentinel/__repr__/self", + "name": "self", + "qname": "sklearn.tree._export.Sentinel.__repr__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __repr__(self):\n return '\"tree.dot\"'" + }, + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/__init__", + "name": "__init__", + "qname": "sklearn.tree._export._BaseTreeExporter.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/__init__/self", + "name": "self", + "qname": "sklearn.tree._export._BaseTreeExporter.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/__init__/max_depth", + "name": "max_depth", + "qname": "sklearn.tree._export._BaseTreeExporter.__init__.max_depth", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/__init__/feature_names", + "name": "feature_names", + "qname": "sklearn.tree._export._BaseTreeExporter.__init__.feature_names", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/__init__/class_names", + "name": "class_names", + "qname": "sklearn.tree._export._BaseTreeExporter.__init__.class_names", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/__init__/label", + "name": "label", + "qname": "sklearn.tree._export._BaseTreeExporter.__init__.label", + "default_value": "'all'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/__init__/filled", + "name": "filled", + "qname": "sklearn.tree._export._BaseTreeExporter.__init__.filled", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/__init__/impurity", + "name": "impurity", + "qname": "sklearn.tree._export._BaseTreeExporter.__init__.impurity", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/__init__/node_ids", + "name": "node_ids", + "qname": "sklearn.tree._export._BaseTreeExporter.__init__.node_ids", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/__init__/proportion", + "name": "proportion", + "qname": "sklearn.tree._export._BaseTreeExporter.__init__.proportion", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/__init__/rotate", + "name": "rotate", + "qname": "sklearn.tree._export._BaseTreeExporter.__init__.rotate", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/__init__/rounded", + "name": "rounded", + "qname": "sklearn.tree._export._BaseTreeExporter.__init__.rounded", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/__init__/precision", + "name": "precision", + "qname": "sklearn.tree._export._BaseTreeExporter.__init__.precision", + "default_value": "3", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/__init__/fontsize", + "name": "fontsize", + "qname": "sklearn.tree._export._BaseTreeExporter.__init__.fontsize", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __init__(self, max_depth=None, feature_names=None,\n class_names=None, label='all', filled=False,\n impurity=True, node_ids=False,\n proportion=False, rotate=False, rounded=False,\n precision=3, fontsize=None):\n self.max_depth = max_depth\n self.feature_names = feature_names\n self.class_names = class_names\n self.label = label\n self.filled = filled\n self.impurity = impurity\n self.node_ids = node_ids\n self.proportion = proportion\n self.rotate = rotate\n self.rounded = rounded\n self.precision = precision\n self.fontsize = fontsize" + }, + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/get_color", + "name": "get_color", + "qname": "sklearn.tree._export._BaseTreeExporter.get_color", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/get_color/self", + "name": "self", + "qname": "sklearn.tree._export._BaseTreeExporter.get_color.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/get_color/value", + "name": "value", + "qname": "sklearn.tree._export._BaseTreeExporter.get_color.value", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def get_color(self, value):\n # Find the appropriate color & intensity for a node\n if self.colors['bounds'] is None:\n # Classification tree\n color = list(self.colors['rgb'][np.argmax(value)])\n sorted_values = sorted(value, reverse=True)\n if len(sorted_values) == 1:\n alpha = 0\n else:\n alpha = ((sorted_values[0] - sorted_values[1])\n / (1 - sorted_values[1]))\n else:\n # Regression tree or multi-output\n color = list(self.colors['rgb'][0])\n alpha = ((value - self.colors['bounds'][0]) /\n (self.colors['bounds'][1] - self.colors['bounds'][0]))\n # unpack numpy scalars\n alpha = float(alpha)\n # compute the color as alpha against white\n color = [int(round(alpha * c + (1 - alpha) * 255, 0)) for c in color]\n # Return html color code in #RRGGBB format\n return '#%2x%2x%2x' % tuple(color)" + }, + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/get_fill_color", + "name": "get_fill_color", + "qname": "sklearn.tree._export._BaseTreeExporter.get_fill_color", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/get_fill_color/self", + "name": "self", + "qname": "sklearn.tree._export._BaseTreeExporter.get_fill_color.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/get_fill_color/tree", + "name": "tree", + "qname": "sklearn.tree._export._BaseTreeExporter.get_fill_color.tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/get_fill_color/node_id", + "name": "node_id", + "qname": "sklearn.tree._export._BaseTreeExporter.get_fill_color.node_id", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def get_fill_color(self, tree, node_id):\n # Fetch appropriate color for node\n if 'rgb' not in self.colors:\n # Initialize colors and bounds if required\n self.colors['rgb'] = _color_brew(tree.n_classes[0])\n if tree.n_outputs != 1:\n # Find max and min impurities for multi-output\n self.colors['bounds'] = (np.min(-tree.impurity),\n np.max(-tree.impurity))\n elif (tree.n_classes[0] == 1 and\n len(np.unique(tree.value)) != 1):\n # Find max and min values in leaf nodes for regression\n self.colors['bounds'] = (np.min(tree.value),\n np.max(tree.value))\n if tree.n_outputs == 1:\n node_val = (tree.value[node_id][0, :] /\n tree.weighted_n_node_samples[node_id])\n if tree.n_classes[0] == 1:\n # Regression\n node_val = tree.value[node_id][0, :]\n else:\n # If multi-output color node by impurity\n node_val = -tree.impurity[node_id]\n return self.get_color(node_val)" + }, + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/node_to_str", + "name": "node_to_str", + "qname": "sklearn.tree._export._BaseTreeExporter.node_to_str", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/node_to_str/self", + "name": "self", + "qname": "sklearn.tree._export._BaseTreeExporter.node_to_str.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/node_to_str/tree", + "name": "tree", + "qname": "sklearn.tree._export._BaseTreeExporter.node_to_str.tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/node_to_str/node_id", + "name": "node_id", + "qname": "sklearn.tree._export._BaseTreeExporter.node_to_str.node_id", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_BaseTreeExporter/node_to_str/criterion", + "name": "criterion", + "qname": "sklearn.tree._export._BaseTreeExporter.node_to_str.criterion", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def node_to_str(self, tree, node_id, criterion):\n # Generate the node content string\n if tree.n_outputs == 1:\n value = tree.value[node_id][0, :]\n else:\n value = tree.value[node_id]\n\n # Should labels be shown?\n labels = (self.label == 'root' and node_id == 0) or self.label == 'all'\n\n characters = self.characters\n node_string = characters[-1]\n\n # Write node ID\n if self.node_ids:\n if labels:\n node_string += 'node '\n node_string += characters[0] + str(node_id) + characters[4]\n\n # Write decision criteria\n if tree.children_left[node_id] != _tree.TREE_LEAF:\n # Always write node decision criteria, except for leaves\n if self.feature_names is not None:\n feature = self.feature_names[tree.feature[node_id]]\n else:\n feature = \"X%s%s%s\" % (characters[1],\n tree.feature[node_id],\n characters[2])\n node_string += '%s %s %s%s' % (feature,\n characters[3],\n round(tree.threshold[node_id],\n self.precision),\n characters[4])\n\n # Write impurity\n if self.impurity:\n if isinstance(criterion, _criterion.FriedmanMSE):\n criterion = \"friedman_mse\"\n elif not isinstance(criterion, str):\n criterion = \"impurity\"\n if labels:\n node_string += '%s = ' % criterion\n node_string += (str(round(tree.impurity[node_id], self.precision))\n + characters[4])\n\n # Write node sample count\n if labels:\n node_string += 'samples = '\n if self.proportion:\n percent = (100. * tree.n_node_samples[node_id] /\n float(tree.n_node_samples[0]))\n node_string += (str(round(percent, 1)) + '%' +\n characters[4])\n else:\n node_string += (str(tree.n_node_samples[node_id]) +\n characters[4])\n\n # Write node class distribution / regression value\n if self.proportion and tree.n_classes[0] != 1:\n # For classification this will show the proportion of samples\n value = value / tree.weighted_n_node_samples[node_id]\n if labels:\n node_string += 'value = '\n if tree.n_classes[0] == 1:\n # Regression\n value_text = np.around(value, self.precision)\n elif self.proportion:\n # Classification\n value_text = np.around(value, self.precision)\n elif np.all(np.equal(np.mod(value, 1), 0)):\n # Classification without floating-point weights\n value_text = value.astype(int)\n else:\n # Classification with floating-point weights\n value_text = np.around(value, self.precision)\n # Strip whitespace\n value_text = str(value_text.astype('S32')).replace(\"b'\", \"'\")\n value_text = value_text.replace(\"' '\", \", \").replace(\"'\", \"\")\n if tree.n_classes[0] == 1 and tree.n_outputs == 1:\n value_text = value_text.replace(\"[\", \"\").replace(\"]\", \"\")\n value_text = value_text.replace(\"\\n \", characters[4])\n node_string += value_text + characters[4]\n\n # Write node majority class\n if (self.class_names is not None and\n tree.n_classes[0] != 1 and\n tree.n_outputs == 1):\n # Only done for single-output classification trees\n if labels:\n node_string += 'class = '\n if self.class_names is not True:\n class_name = self.class_names[np.argmax(value)]\n else:\n class_name = \"y%s%s%s\" % (characters[1],\n np.argmax(value),\n characters[2])\n node_string += class_name\n\n # Clean up any trailing newlines\n if node_string.endswith(characters[4]):\n node_string = node_string[:-len(characters[4])]\n\n return node_string + characters[5]" + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/__init__", + "name": "__init__", + "qname": "sklearn.tree._export._DOTTreeExporter.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/__init__/self", + "name": "self", + "qname": "sklearn.tree._export._DOTTreeExporter.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/__init__/out_file", + "name": "out_file", + "qname": "sklearn.tree._export._DOTTreeExporter.__init__.out_file", + "default_value": "SENTINEL", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/__init__/max_depth", + "name": "max_depth", + "qname": "sklearn.tree._export._DOTTreeExporter.__init__.max_depth", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/__init__/feature_names", + "name": "feature_names", + "qname": "sklearn.tree._export._DOTTreeExporter.__init__.feature_names", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/__init__/class_names", + "name": "class_names", + "qname": "sklearn.tree._export._DOTTreeExporter.__init__.class_names", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/__init__/label", + "name": "label", + "qname": "sklearn.tree._export._DOTTreeExporter.__init__.label", + "default_value": "'all'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/__init__/filled", + "name": "filled", + "qname": "sklearn.tree._export._DOTTreeExporter.__init__.filled", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/__init__/leaves_parallel", + "name": "leaves_parallel", + "qname": "sklearn.tree._export._DOTTreeExporter.__init__.leaves_parallel", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/__init__/impurity", + "name": "impurity", + "qname": "sklearn.tree._export._DOTTreeExporter.__init__.impurity", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/__init__/node_ids", + "name": "node_ids", + "qname": "sklearn.tree._export._DOTTreeExporter.__init__.node_ids", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/__init__/proportion", + "name": "proportion", + "qname": "sklearn.tree._export._DOTTreeExporter.__init__.proportion", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/__init__/rotate", + "name": "rotate", + "qname": "sklearn.tree._export._DOTTreeExporter.__init__.rotate", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/__init__/rounded", + "name": "rounded", + "qname": "sklearn.tree._export._DOTTreeExporter.__init__.rounded", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/__init__/special_characters", + "name": "special_characters", + "qname": "sklearn.tree._export._DOTTreeExporter.__init__.special_characters", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/__init__/precision", + "name": "precision", + "qname": "sklearn.tree._export._DOTTreeExporter.__init__.precision", + "default_value": "3", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __init__(self, out_file=SENTINEL, max_depth=None,\n feature_names=None, class_names=None, label='all',\n filled=False, leaves_parallel=False, impurity=True,\n node_ids=False, proportion=False, rotate=False, rounded=False,\n special_characters=False, precision=3):\n\n super().__init__(\n max_depth=max_depth, feature_names=feature_names,\n class_names=class_names, label=label, filled=filled,\n impurity=impurity,\n node_ids=node_ids, proportion=proportion, rotate=rotate,\n rounded=rounded,\n precision=precision)\n self.leaves_parallel = leaves_parallel\n self.out_file = out_file\n self.special_characters = special_characters\n\n # PostScript compatibility for special characters\n if special_characters:\n self.characters = ['#', '', '', '≤', '
',\n '>', '<']\n else:\n self.characters = ['#', '[', ']', '<=', '\\\\n', '\"', '\"']\n\n # validate\n if isinstance(precision, Integral):\n if precision < 0:\n raise ValueError(\"'precision' should be greater or equal to 0.\"\n \" Got {} instead.\".format(precision))\n else:\n raise ValueError(\"'precision' should be an integer. Got {}\"\n \" instead.\".format(type(precision)))\n\n # The depth of each node for plotting with 'leaf' option\n self.ranks = {'leaves': []}\n # The colors to render each node with\n self.colors = {'bounds': None}" + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/export", + "name": "export", + "qname": "sklearn.tree._export._DOTTreeExporter.export", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/export/self", + "name": "self", + "qname": "sklearn.tree._export._DOTTreeExporter.export.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/export/decision_tree", + "name": "decision_tree", + "qname": "sklearn.tree._export._DOTTreeExporter.export.decision_tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def export(self, decision_tree):\n # Check length of feature_names before getting into the tree node\n # Raise error if length of feature_names does not match\n # n_features_ in the decision_tree\n if self.feature_names is not None:\n if len(self.feature_names) != decision_tree.n_features_:\n raise ValueError(\"Length of feature_names, %d \"\n \"does not match number of features, %d\"\n % (len(self.feature_names),\n decision_tree.n_features_))\n # each part writes to out_file\n self.head()\n # Now recurse the tree and add node & edge attributes\n if isinstance(decision_tree, _tree.Tree):\n self.recurse(decision_tree, 0, criterion=\"impurity\")\n else:\n self.recurse(decision_tree.tree_, 0,\n criterion=decision_tree.criterion)\n\n self.tail()" + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/head", + "name": "head", + "qname": "sklearn.tree._export._DOTTreeExporter.head", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/head/self", + "name": "self", + "qname": "sklearn.tree._export._DOTTreeExporter.head.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def head(self):\n self.out_file.write('digraph Tree {\\n')\n\n # Specify node aesthetics\n self.out_file.write('node [shape=box')\n rounded_filled = []\n if self.filled:\n rounded_filled.append('filled')\n if self.rounded:\n rounded_filled.append('rounded')\n if len(rounded_filled) > 0:\n self.out_file.write(\n ', style=\"%s\", color=\"black\"'\n % \", \".join(rounded_filled))\n if self.rounded:\n self.out_file.write(', fontname=helvetica')\n self.out_file.write('] ;\\n')\n\n # Specify graph & edge aesthetics\n if self.leaves_parallel:\n self.out_file.write(\n 'graph [ranksep=equally, splines=polyline] ;\\n')\n if self.rounded:\n self.out_file.write('edge [fontname=helvetica] ;\\n')\n if self.rotate:\n self.out_file.write('rankdir=LR ;\\n')" + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/recurse", + "name": "recurse", + "qname": "sklearn.tree._export._DOTTreeExporter.recurse", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/recurse/self", + "name": "self", + "qname": "sklearn.tree._export._DOTTreeExporter.recurse.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/recurse/tree", + "name": "tree", + "qname": "sklearn.tree._export._DOTTreeExporter.recurse.tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/recurse/node_id", + "name": "node_id", + "qname": "sklearn.tree._export._DOTTreeExporter.recurse.node_id", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/recurse/criterion", + "name": "criterion", + "qname": "sklearn.tree._export._DOTTreeExporter.recurse.criterion", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/recurse/parent", + "name": "parent", + "qname": "sklearn.tree._export._DOTTreeExporter.recurse.parent", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/recurse/depth", + "name": "depth", + "qname": "sklearn.tree._export._DOTTreeExporter.recurse.depth", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def recurse(self, tree, node_id, criterion, parent=None, depth=0):\n if node_id == _tree.TREE_LEAF:\n raise ValueError(\"Invalid node_id %s\" % _tree.TREE_LEAF)\n\n left_child = tree.children_left[node_id]\n right_child = tree.children_right[node_id]\n\n # Add node with description\n if self.max_depth is None or depth <= self.max_depth:\n\n # Collect ranks for 'leaf' option in plot_options\n if left_child == _tree.TREE_LEAF:\n self.ranks['leaves'].append(str(node_id))\n elif str(depth) not in self.ranks:\n self.ranks[str(depth)] = [str(node_id)]\n else:\n self.ranks[str(depth)].append(str(node_id))\n\n self.out_file.write(\n '%d [label=%s' % (node_id, self.node_to_str(tree, node_id,\n criterion)))\n\n if self.filled:\n self.out_file.write(', fillcolor=\"%s\"'\n % self.get_fill_color(tree, node_id))\n self.out_file.write('] ;\\n')\n\n if parent is not None:\n # Add edge to parent\n self.out_file.write('%d -> %d' % (parent, node_id))\n if parent == 0:\n # Draw True/False labels if parent is root node\n angles = np.array([45, -45]) * ((self.rotate - .5) * -2)\n self.out_file.write(' [labeldistance=2.5, labelangle=')\n if node_id == 1:\n self.out_file.write('%d, headlabel=\"True\"]' %\n angles[0])\n else:\n self.out_file.write('%d, headlabel=\"False\"]' %\n angles[1])\n self.out_file.write(' ;\\n')\n\n if left_child != _tree.TREE_LEAF:\n self.recurse(tree, left_child, criterion=criterion,\n parent=node_id, depth=depth + 1)\n self.recurse(tree, right_child, criterion=criterion,\n parent=node_id, depth=depth + 1)\n\n else:\n self.ranks['leaves'].append(str(node_id))\n\n self.out_file.write('%d [label=\"(...)\"' % node_id)\n if self.filled:\n # color cropped nodes grey\n self.out_file.write(', fillcolor=\"#C0C0C0\"')\n self.out_file.write('] ;\\n' % node_id)\n\n if parent is not None:\n # Add edge to parent\n self.out_file.write('%d -> %d ;\\n' % (parent, node_id))" + }, + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/tail", + "name": "tail", + "qname": "sklearn.tree._export._DOTTreeExporter.tail", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._export/_DOTTreeExporter/tail/self", + "name": "self", + "qname": "sklearn.tree._export._DOTTreeExporter.tail.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def tail(self):\n # If required, draw leaf nodes at same depth as each other\n if self.leaves_parallel:\n for rank in sorted(self.ranks):\n self.out_file.write(\n \"{rank=same ; \" +\n \"; \".join(r for r in self.ranks[rank]) + \"} ;\\n\")\n self.out_file.write(\"}\")" + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/__init__", + "name": "__init__", + "qname": "sklearn.tree._export._MPLTreeExporter.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/__init__/self", + "name": "self", + "qname": "sklearn.tree._export._MPLTreeExporter.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/__init__/max_depth", + "name": "max_depth", + "qname": "sklearn.tree._export._MPLTreeExporter.__init__.max_depth", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/__init__/feature_names", + "name": "feature_names", + "qname": "sklearn.tree._export._MPLTreeExporter.__init__.feature_names", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/__init__/class_names", + "name": "class_names", + "qname": "sklearn.tree._export._MPLTreeExporter.__init__.class_names", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/__init__/label", + "name": "label", + "qname": "sklearn.tree._export._MPLTreeExporter.__init__.label", + "default_value": "'all'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/__init__/filled", + "name": "filled", + "qname": "sklearn.tree._export._MPLTreeExporter.__init__.filled", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/__init__/impurity", + "name": "impurity", + "qname": "sklearn.tree._export._MPLTreeExporter.__init__.impurity", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/__init__/node_ids", + "name": "node_ids", + "qname": "sklearn.tree._export._MPLTreeExporter.__init__.node_ids", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/__init__/proportion", + "name": "proportion", + "qname": "sklearn.tree._export._MPLTreeExporter.__init__.proportion", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/__init__/rotate", + "name": "rotate", + "qname": "sklearn.tree._export._MPLTreeExporter.__init__.rotate", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/__init__/rounded", + "name": "rounded", + "qname": "sklearn.tree._export._MPLTreeExporter.__init__.rounded", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/__init__/precision", + "name": "precision", + "qname": "sklearn.tree._export._MPLTreeExporter.__init__.precision", + "default_value": "3", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/__init__/fontsize", + "name": "fontsize", + "qname": "sklearn.tree._export._MPLTreeExporter.__init__.fontsize", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __init__(self, max_depth=None, feature_names=None,\n class_names=None, label='all', filled=False,\n impurity=True, node_ids=False,\n proportion=False, rotate=False, rounded=False,\n precision=3, fontsize=None):\n\n super().__init__(\n max_depth=max_depth, feature_names=feature_names,\n class_names=class_names, label=label, filled=filled,\n impurity=impurity, node_ids=node_ids, proportion=proportion,\n rotate=rotate, rounded=rounded, precision=precision)\n self.fontsize = fontsize\n\n # validate\n if isinstance(precision, Integral):\n if precision < 0:\n raise ValueError(\"'precision' should be greater or equal to 0.\"\n \" Got {} instead.\".format(precision))\n else:\n raise ValueError(\"'precision' should be an integer. Got {}\"\n \" instead.\".format(type(precision)))\n\n # The depth of each node for plotting with 'leaf' option\n self.ranks = {'leaves': []}\n # The colors to render each node with\n self.colors = {'bounds': None}\n\n self.characters = ['#', '[', ']', '<=', '\\n', '', '']\n self.bbox_args = dict()\n if self.rounded:\n self.bbox_args['boxstyle'] = \"round\"\n\n self.arrow_args = dict(arrowstyle=\"<-\")" + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/_make_tree", + "name": "_make_tree", + "qname": "sklearn.tree._export._MPLTreeExporter._make_tree", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/_make_tree/self", + "name": "self", + "qname": "sklearn.tree._export._MPLTreeExporter._make_tree.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/_make_tree/node_id", + "name": "node_id", + "qname": "sklearn.tree._export._MPLTreeExporter._make_tree.node_id", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/_make_tree/et", + "name": "et", + "qname": "sklearn.tree._export._MPLTreeExporter._make_tree.et", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/_make_tree/criterion", + "name": "criterion", + "qname": "sklearn.tree._export._MPLTreeExporter._make_tree.criterion", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/_make_tree/depth", + "name": "depth", + "qname": "sklearn.tree._export._MPLTreeExporter._make_tree.depth", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _make_tree(self, node_id, et, criterion, depth=0):\n # traverses _tree.Tree recursively, builds intermediate\n # \"_reingold_tilford.Tree\" object\n name = self.node_to_str(et, node_id, criterion=criterion)\n if (et.children_left[node_id] != _tree.TREE_LEAF\n and (self.max_depth is None or depth <= self.max_depth)):\n children = [self._make_tree(et.children_left[node_id], et,\n criterion, depth=depth + 1),\n self._make_tree(et.children_right[node_id], et,\n criterion, depth=depth + 1)]\n else:\n return Tree(name, node_id)\n return Tree(name, node_id, *children)" + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/export", + "name": "export", + "qname": "sklearn.tree._export._MPLTreeExporter.export", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/export/self", + "name": "self", + "qname": "sklearn.tree._export._MPLTreeExporter.export.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/export/decision_tree", + "name": "decision_tree", + "qname": "sklearn.tree._export._MPLTreeExporter.export.decision_tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/export/ax", + "name": "ax", + "qname": "sklearn.tree._export._MPLTreeExporter.export.ax", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def export(self, decision_tree, ax=None):\n import matplotlib.pyplot as plt\n from matplotlib.text import Annotation\n\n if ax is None:\n ax = plt.gca()\n ax.clear()\n ax.set_axis_off()\n my_tree = self._make_tree(0, decision_tree.tree_,\n decision_tree.criterion)\n draw_tree = buchheim(my_tree)\n\n # important to make sure we're still\n # inside the axis after drawing the box\n # this makes sense because the width of a box\n # is about the same as the distance between boxes\n max_x, max_y = draw_tree.max_extents() + 1\n ax_width = ax.get_window_extent().width\n ax_height = ax.get_window_extent().height\n\n scale_x = ax_width / max_x\n scale_y = ax_height / max_y\n\n self.recurse(draw_tree, decision_tree.tree_, ax,\n scale_x, scale_y, ax_height)\n\n anns = [ann for ann in ax.get_children()\n if isinstance(ann, Annotation)]\n\n # update sizes of all bboxes\n renderer = ax.figure.canvas.get_renderer()\n\n for ann in anns:\n ann.update_bbox_position_size(renderer)\n\n if self.fontsize is None:\n # get figure to data transform\n # adjust fontsize to avoid overlap\n # get max box width and height\n extents = [ann.get_bbox_patch().get_window_extent()\n for ann in anns]\n max_width = max([extent.width for extent in extents])\n max_height = max([extent.height for extent in extents])\n # width should be around scale_x in axis coordinates\n size = anns[0].get_fontsize() * min(scale_x / max_width,\n scale_y / max_height)\n for ann in anns:\n ann.set_fontsize(size)\n\n return anns" + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/recurse", + "name": "recurse", + "qname": "sklearn.tree._export._MPLTreeExporter.recurse", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/recurse/self", + "name": "self", + "qname": "sklearn.tree._export._MPLTreeExporter.recurse.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/recurse/node", + "name": "node", + "qname": "sklearn.tree._export._MPLTreeExporter.recurse.node", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/recurse/tree", + "name": "tree", + "qname": "sklearn.tree._export._MPLTreeExporter.recurse.tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/recurse/ax", + "name": "ax", + "qname": "sklearn.tree._export._MPLTreeExporter.recurse.ax", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/recurse/scale_x", + "name": "scale_x", + "qname": "sklearn.tree._export._MPLTreeExporter.recurse.scale_x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/recurse/scale_y", + "name": "scale_y", + "qname": "sklearn.tree._export._MPLTreeExporter.recurse.scale_y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/recurse/height", + "name": "height", + "qname": "sklearn.tree._export._MPLTreeExporter.recurse.height", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_MPLTreeExporter/recurse/depth", + "name": "depth", + "qname": "sklearn.tree._export._MPLTreeExporter.recurse.depth", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def recurse(self, node, tree, ax, scale_x, scale_y, height, depth=0):\n import matplotlib.pyplot as plt\n kwargs = dict(bbox=self.bbox_args.copy(), ha='center', va='center',\n zorder=100 - 10 * depth, xycoords='axes pixels',\n arrowprops=self.arrow_args.copy())\n kwargs['arrowprops']['edgecolor'] = plt.rcParams['text.color']\n\n if self.fontsize is not None:\n kwargs['fontsize'] = self.fontsize\n\n # offset things by .5 to center them in plot\n xy = ((node.x + .5) * scale_x, height - (node.y + .5) * scale_y)\n\n if self.max_depth is None or depth <= self.max_depth:\n if self.filled:\n kwargs['bbox']['fc'] = self.get_fill_color(tree,\n node.tree.node_id)\n else:\n kwargs['bbox']['fc'] = ax.get_facecolor()\n\n if node.parent is None:\n # root\n ax.annotate(node.tree.label, xy, **kwargs)\n else:\n xy_parent = ((node.parent.x + .5) * scale_x,\n height - (node.parent.y + .5) * scale_y)\n ax.annotate(node.tree.label, xy_parent, xy, **kwargs)\n for child in node.children:\n self.recurse(child, tree, ax, scale_x, scale_y, height,\n depth=depth + 1)\n\n else:\n xy_parent = ((node.parent.x + .5) * scale_x,\n height - (node.parent.y + .5) * scale_y)\n kwargs['bbox']['fc'] = 'grey'\n ax.annotate(\"\\n (...) \\n\", xy_parent, xy, **kwargs)" + }, + { + "id": "scikit-learn/sklearn.tree._export/_color_brew", + "name": "_color_brew", + "qname": "sklearn.tree._export._color_brew", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._export/_color_brew/n", + "name": "n", + "qname": "sklearn.tree._export._color_brew.n", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The number of colors required." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate n colors with equally spaced hues.", + "docstring": "Generate n colors with equally spaced hues.\n\nParameters\n----------\nn : int\n The number of colors required.\n\nReturns\n-------\ncolor_list : list, length n\n List of n tuples of form (R, G, B) being the components of each color.", + "code": "def _color_brew(n):\n \"\"\"Generate n colors with equally spaced hues.\n\n Parameters\n ----------\n n : int\n The number of colors required.\n\n Returns\n -------\n color_list : list, length n\n List of n tuples of form (R, G, B) being the components of each color.\n \"\"\"\n color_list = []\n\n # Initialize saturation & value; calculate chroma & value shift\n s, v = 0.75, 0.9\n c = s * v\n m = v - c\n\n for h in np.arange(25, 385, 360. / n).astype(int):\n # Calculate some intermediate values\n h_bar = h / 60.\n x = c * (1 - abs((h_bar % 2) - 1))\n # Initialize RGB with same hue & chroma as our color\n rgb = [(c, x, 0),\n (x, c, 0),\n (0, c, x),\n (0, x, c),\n (x, 0, c),\n (c, 0, x),\n (c, x, 0)]\n r, g, b = rgb[int(h_bar)]\n # Shift the initial RGB values to match value and store\n rgb = [(int(255 * (r + m))),\n (int(255 * (g + m))),\n (int(255 * (b + m)))]\n color_list.append(rgb)\n\n return color_list" + }, + { + "id": "scikit-learn/sklearn.tree._export/_compute_depth", + "name": "_compute_depth", + "qname": "sklearn.tree._export._compute_depth", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._export/_compute_depth/tree", + "name": "tree", + "qname": "sklearn.tree._export._compute_depth.tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._export/_compute_depth/node", + "name": "node", + "qname": "sklearn.tree._export._compute_depth.node", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns the depth of the subtree rooted in node.", + "docstring": "Returns the depth of the subtree rooted in node.", + "code": "def _compute_depth(tree, node):\n \"\"\"\n Returns the depth of the subtree rooted in node.\n \"\"\"\n def compute_depth_(current_node, current_depth,\n children_left, children_right, depths):\n depths += [current_depth]\n left = children_left[current_node]\n right = children_right[current_node]\n if left != -1 and right != -1:\n compute_depth_(left, current_depth+1,\n children_left, children_right, depths)\n compute_depth_(right, current_depth+1,\n children_left, children_right, depths)\n\n depths = []\n compute_depth_(node, 1, tree.children_left, tree.children_right, depths)\n return max(depths)" + }, + { + "id": "scikit-learn/sklearn.tree._export/export_graphviz", + "name": "export_graphviz", + "qname": "sklearn.tree._export.export_graphviz", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._export/export_graphviz/decision_tree", + "name": "decision_tree", + "qname": "sklearn.tree._export.export_graphviz.decision_tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "decision tree classifier", + "default_value": "", + "description": "The decision tree to be exported to GraphViz." + }, + "type": { + "kind": "NamedType", + "name": "decision tree classifier" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/export_graphviz/out_file", + "name": "out_file", + "qname": "sklearn.tree._export.export_graphviz.out_file", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object or str", + "default_value": "None", + "description": "Handle or name of the output file. If ``None``, the result is\nreturned as a string.\n\n.. versionchanged:: 0.20\n Default of out_file changed from \"tree.dot\" to None." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "object" + }, + { + "kind": "NamedType", + "name": "str" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._export/export_graphviz/max_depth", + "name": "max_depth", + "qname": "sklearn.tree._export.export_graphviz.max_depth", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The maximum depth of the representation. If None, the tree is fully\ngenerated." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/export_graphviz/feature_names", + "name": "feature_names", + "qname": "sklearn.tree._export.export_graphviz.feature_names", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "list of str", + "default_value": "None", + "description": "Names of each of the features.\nIf None generic names will be used (\"feature_0\", \"feature_1\", ...)." + }, + "type": { + "kind": "NamedType", + "name": "list of str" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/export_graphviz/class_names", + "name": "class_names", + "qname": "sklearn.tree._export.export_graphviz.class_names", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "list of str or bool", + "default_value": "None", + "description": "Names of each of the target classes in ascending numerical order.\nOnly relevant for classification and not supported for multi-output.\nIf ``True``, shows a symbolic representation of the class name." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "list of str" + }, + { + "kind": "NamedType", + "name": "bool" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._export/export_graphviz/label", + "name": "label", + "qname": "sklearn.tree._export.export_graphviz.label", + "default_value": "'all'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'all', 'root', 'none'}", + "default_value": "'all'", + "description": "Whether to show informative labels for impurity, etc.\nOptions include 'all' to show at every node, 'root' to show only at\nthe top root node, or 'none' to not show at any node." + }, + "type": { + "kind": "EnumType", + "values": ["all", "root", "none"] + } + }, + { + "id": "scikit-learn/sklearn.tree._export/export_graphviz/filled", + "name": "filled", + "qname": "sklearn.tree._export.export_graphviz.filled", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, paint nodes to indicate majority class for\nclassification, extremity of values for regression, or purity of node\nfor multi-output." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/export_graphviz/leaves_parallel", + "name": "leaves_parallel", + "qname": "sklearn.tree._export.export_graphviz.leaves_parallel", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, draw all leaf nodes at the bottom of the tree." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/export_graphviz/impurity", + "name": "impurity", + "qname": "sklearn.tree._export.export_graphviz.impurity", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "When set to ``True``, show the impurity at each node." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/export_graphviz/node_ids", + "name": "node_ids", + "qname": "sklearn.tree._export.export_graphviz.node_ids", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, show the ID number on each node." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/export_graphviz/proportion", + "name": "proportion", + "qname": "sklearn.tree._export.export_graphviz.proportion", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, change the display of 'values' and/or 'samples'\nto be proportions and percentages respectively." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/export_graphviz/rotate", + "name": "rotate", + "qname": "sklearn.tree._export.export_graphviz.rotate", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, orient tree left to right rather than top-down." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/export_graphviz/rounded", + "name": "rounded", + "qname": "sklearn.tree._export.export_graphviz.rounded", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, draw node boxes with rounded corners and use\nHelvetica fonts instead of Times-Roman." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/export_graphviz/special_characters", + "name": "special_characters", + "qname": "sklearn.tree._export.export_graphviz.special_characters", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``False``, ignore special characters for PostScript\ncompatibility." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/export_graphviz/precision", + "name": "precision", + "qname": "sklearn.tree._export.export_graphviz.precision", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "3", + "description": "Number of digits of precision for floating point in the values of\nimpurity, threshold and value attributes of each node." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Export a decision tree in DOT format.\n\nThis function generates a GraphViz representation of the decision tree,\nwhich is then written into `out_file`. Once exported, graphical renderings\ncan be generated using, for example::\n\n $ dot -Tps tree.dot -o tree.ps (PostScript format)\n $ dot -Tpng tree.dot -o tree.png (PNG format)\n\nThe sample counts that are shown are weighted with any sample_weights that\nmight be present.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Export a decision tree in DOT format.\n\nThis function generates a GraphViz representation of the decision tree,\nwhich is then written into `out_file`. Once exported, graphical renderings\ncan be generated using, for example::\n\n $ dot -Tps tree.dot -o tree.ps (PostScript format)\n $ dot -Tpng tree.dot -o tree.png (PNG format)\n\nThe sample counts that are shown are weighted with any sample_weights that\nmight be present.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndecision_tree : decision tree classifier\n The decision tree to be exported to GraphViz.\n\nout_file : object or str, default=None\n Handle or name of the output file. If ``None``, the result is\n returned as a string.\n\n .. versionchanged:: 0.20\n Default of out_file changed from \"tree.dot\" to None.\n\nmax_depth : int, default=None\n The maximum depth of the representation. If None, the tree is fully\n generated.\n\nfeature_names : list of str, default=None\n Names of each of the features.\n If None generic names will be used (\"feature_0\", \"feature_1\", ...).\n\nclass_names : list of str or bool, default=None\n Names of each of the target classes in ascending numerical order.\n Only relevant for classification and not supported for multi-output.\n If ``True``, shows a symbolic representation of the class name.\n\nlabel : {'all', 'root', 'none'}, default='all'\n Whether to show informative labels for impurity, etc.\n Options include 'all' to show at every node, 'root' to show only at\n the top root node, or 'none' to not show at any node.\n\nfilled : bool, default=False\n When set to ``True``, paint nodes to indicate majority class for\n classification, extremity of values for regression, or purity of node\n for multi-output.\n\nleaves_parallel : bool, default=False\n When set to ``True``, draw all leaf nodes at the bottom of the tree.\n\nimpurity : bool, default=True\n When set to ``True``, show the impurity at each node.\n\nnode_ids : bool, default=False\n When set to ``True``, show the ID number on each node.\n\nproportion : bool, default=False\n When set to ``True``, change the display of 'values' and/or 'samples'\n to be proportions and percentages respectively.\n\nrotate : bool, default=False\n When set to ``True``, orient tree left to right rather than top-down.\n\nrounded : bool, default=False\n When set to ``True``, draw node boxes with rounded corners and use\n Helvetica fonts instead of Times-Roman.\n\nspecial_characters : bool, default=False\n When set to ``False``, ignore special characters for PostScript\n compatibility.\n\nprecision : int, default=3\n Number of digits of precision for floating point in the values of\n impurity, threshold and value attributes of each node.\n\nReturns\n-------\ndot_data : string\n String representation of the input tree in GraphViz dot format.\n Only returned if ``out_file`` is None.\n\n .. versionadded:: 0.18\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn import tree\n\n>>> clf = tree.DecisionTreeClassifier()\n>>> iris = load_iris()\n\n>>> clf = clf.fit(iris.data, iris.target)\n>>> tree.export_graphviz(clf)\n'digraph Tree {...", + "code": "@_deprecate_positional_args\ndef export_graphviz(decision_tree, out_file=None, *, max_depth=None,\n feature_names=None, class_names=None, label='all',\n filled=False, leaves_parallel=False, impurity=True,\n node_ids=False, proportion=False, rotate=False,\n rounded=False, special_characters=False, precision=3):\n \"\"\"Export a decision tree in DOT format.\n\n This function generates a GraphViz representation of the decision tree,\n which is then written into `out_file`. Once exported, graphical renderings\n can be generated using, for example::\n\n $ dot -Tps tree.dot -o tree.ps (PostScript format)\n $ dot -Tpng tree.dot -o tree.png (PNG format)\n\n The sample counts that are shown are weighted with any sample_weights that\n might be present.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n decision_tree : decision tree classifier\n The decision tree to be exported to GraphViz.\n\n out_file : object or str, default=None\n Handle or name of the output file. If ``None``, the result is\n returned as a string.\n\n .. versionchanged:: 0.20\n Default of out_file changed from \"tree.dot\" to None.\n\n max_depth : int, default=None\n The maximum depth of the representation. If None, the tree is fully\n generated.\n\n feature_names : list of str, default=None\n Names of each of the features.\n If None generic names will be used (\"feature_0\", \"feature_1\", ...).\n\n class_names : list of str or bool, default=None\n Names of each of the target classes in ascending numerical order.\n Only relevant for classification and not supported for multi-output.\n If ``True``, shows a symbolic representation of the class name.\n\n label : {'all', 'root', 'none'}, default='all'\n Whether to show informative labels for impurity, etc.\n Options include 'all' to show at every node, 'root' to show only at\n the top root node, or 'none' to not show at any node.\n\n filled : bool, default=False\n When set to ``True``, paint nodes to indicate majority class for\n classification, extremity of values for regression, or purity of node\n for multi-output.\n\n leaves_parallel : bool, default=False\n When set to ``True``, draw all leaf nodes at the bottom of the tree.\n\n impurity : bool, default=True\n When set to ``True``, show the impurity at each node.\n\n node_ids : bool, default=False\n When set to ``True``, show the ID number on each node.\n\n proportion : bool, default=False\n When set to ``True``, change the display of 'values' and/or 'samples'\n to be proportions and percentages respectively.\n\n rotate : bool, default=False\n When set to ``True``, orient tree left to right rather than top-down.\n\n rounded : bool, default=False\n When set to ``True``, draw node boxes with rounded corners and use\n Helvetica fonts instead of Times-Roman.\n\n special_characters : bool, default=False\n When set to ``False``, ignore special characters for PostScript\n compatibility.\n\n precision : int, default=3\n Number of digits of precision for floating point in the values of\n impurity, threshold and value attributes of each node.\n\n Returns\n -------\n dot_data : string\n String representation of the input tree in GraphViz dot format.\n Only returned if ``out_file`` is None.\n\n .. versionadded:: 0.18\n\n Examples\n --------\n >>> from sklearn.datasets import load_iris\n >>> from sklearn import tree\n\n >>> clf = tree.DecisionTreeClassifier()\n >>> iris = load_iris()\n\n >>> clf = clf.fit(iris.data, iris.target)\n >>> tree.export_graphviz(clf)\n 'digraph Tree {...\n \"\"\"\n\n check_is_fitted(decision_tree)\n own_file = False\n return_string = False\n try:\n if isinstance(out_file, str):\n out_file = open(out_file, \"w\", encoding=\"utf-8\")\n own_file = True\n\n if out_file is None:\n return_string = True\n out_file = StringIO()\n\n exporter = _DOTTreeExporter(\n out_file=out_file, max_depth=max_depth,\n feature_names=feature_names, class_names=class_names, label=label,\n filled=filled, leaves_parallel=leaves_parallel, impurity=impurity,\n node_ids=node_ids, proportion=proportion, rotate=rotate,\n rounded=rounded, special_characters=special_characters,\n precision=precision)\n exporter.export(decision_tree)\n\n if return_string:\n return exporter.out_file.getvalue()\n\n finally:\n if own_file:\n out_file.close()" + }, + { + "id": "scikit-learn/sklearn.tree._export/export_text", + "name": "export_text", + "qname": "sklearn.tree._export.export_text", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._export/export_text/decision_tree", + "name": "decision_tree", + "qname": "sklearn.tree._export.export_text.decision_tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "The decision tree estimator to be exported.\nIt can be an instance of\nDecisionTreeClassifier or DecisionTreeRegressor." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/export_text/feature_names", + "name": "feature_names", + "qname": "sklearn.tree._export.export_text.feature_names", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "list of str", + "default_value": "None", + "description": "A list of length n_features containing the feature names.\nIf None generic names will be used (\"feature_0\", \"feature_1\", ...)." + }, + "type": { + "kind": "NamedType", + "name": "list of str" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/export_text/max_depth", + "name": "max_depth", + "qname": "sklearn.tree._export.export_text.max_depth", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Only the first max_depth levels of the tree are exported.\nTruncated branches will be marked with \"...\"." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/export_text/spacing", + "name": "spacing", + "qname": "sklearn.tree._export.export_text.spacing", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "3", + "description": "Number of spaces between edges. The higher it is, the wider the result." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/export_text/decimals", + "name": "decimals", + "qname": "sklearn.tree._export.export_text.decimals", + "default_value": "2", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "2", + "description": "Number of decimal digits to display." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/export_text/show_weights", + "name": "show_weights", + "qname": "sklearn.tree._export.export_text.show_weights", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If true the classification weights will be exported on each leaf.\nThe classification weights are the number of samples each class." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Build a text report showing the rules of a decision tree.\n\nNote that backwards compatibility may not be supported.", + "docstring": "Build a text report showing the rules of a decision tree.\n\nNote that backwards compatibility may not be supported.\n\nParameters\n----------\ndecision_tree : object\n The decision tree estimator to be exported.\n It can be an instance of\n DecisionTreeClassifier or DecisionTreeRegressor.\n\nfeature_names : list of str, default=None\n A list of length n_features containing the feature names.\n If None generic names will be used (\"feature_0\", \"feature_1\", ...).\n\nmax_depth : int, default=10\n Only the first max_depth levels of the tree are exported.\n Truncated branches will be marked with \"...\".\n\nspacing : int, default=3\n Number of spaces between edges. The higher it is, the wider the result.\n\ndecimals : int, default=2\n Number of decimal digits to display.\n\nshow_weights : bool, default=False\n If true the classification weights will be exported on each leaf.\n The classification weights are the number of samples each class.\n\nReturns\n-------\nreport : string\n Text summary of all the rules in the decision tree.\n\nExamples\n--------\n\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.tree import DecisionTreeClassifier\n>>> from sklearn.tree import export_text\n>>> iris = load_iris()\n>>> X = iris['data']\n>>> y = iris['target']\n>>> decision_tree = DecisionTreeClassifier(random_state=0, max_depth=2)\n>>> decision_tree = decision_tree.fit(X, y)\n>>> r = export_text(decision_tree, feature_names=iris['feature_names'])\n>>> print(r)\n|--- petal width (cm) <= 0.80\n| |--- class: 0\n|--- petal width (cm) > 0.80\n| |--- petal width (cm) <= 1.75\n| | |--- class: 1\n| |--- petal width (cm) > 1.75\n| | |--- class: 2", + "code": "@_deprecate_positional_args\ndef export_text(decision_tree, *, feature_names=None, max_depth=10,\n spacing=3, decimals=2, show_weights=False):\n \"\"\"Build a text report showing the rules of a decision tree.\n\n Note that backwards compatibility may not be supported.\n\n Parameters\n ----------\n decision_tree : object\n The decision tree estimator to be exported.\n It can be an instance of\n DecisionTreeClassifier or DecisionTreeRegressor.\n\n feature_names : list of str, default=None\n A list of length n_features containing the feature names.\n If None generic names will be used (\"feature_0\", \"feature_1\", ...).\n\n max_depth : int, default=10\n Only the first max_depth levels of the tree are exported.\n Truncated branches will be marked with \"...\".\n\n spacing : int, default=3\n Number of spaces between edges. The higher it is, the wider the result.\n\n decimals : int, default=2\n Number of decimal digits to display.\n\n show_weights : bool, default=False\n If true the classification weights will be exported on each leaf.\n The classification weights are the number of samples each class.\n\n Returns\n -------\n report : string\n Text summary of all the rules in the decision tree.\n\n Examples\n --------\n\n >>> from sklearn.datasets import load_iris\n >>> from sklearn.tree import DecisionTreeClassifier\n >>> from sklearn.tree import export_text\n >>> iris = load_iris()\n >>> X = iris['data']\n >>> y = iris['target']\n >>> decision_tree = DecisionTreeClassifier(random_state=0, max_depth=2)\n >>> decision_tree = decision_tree.fit(X, y)\n >>> r = export_text(decision_tree, feature_names=iris['feature_names'])\n >>> print(r)\n |--- petal width (cm) <= 0.80\n | |--- class: 0\n |--- petal width (cm) > 0.80\n | |--- petal width (cm) <= 1.75\n | | |--- class: 1\n | |--- petal width (cm) > 1.75\n | | |--- class: 2\n \"\"\"\n check_is_fitted(decision_tree)\n tree_ = decision_tree.tree_\n if is_classifier(decision_tree):\n class_names = decision_tree.classes_\n right_child_fmt = \"{} {} <= {}\\n\"\n left_child_fmt = \"{} {} > {}\\n\"\n truncation_fmt = \"{} {}\\n\"\n\n if max_depth < 0:\n raise ValueError(\"max_depth bust be >= 0, given %d\" % max_depth)\n\n if (feature_names is not None and\n len(feature_names) != tree_.n_features):\n raise ValueError(\"feature_names must contain \"\n \"%d elements, got %d\" % (tree_.n_features,\n len(feature_names)))\n\n if spacing <= 0:\n raise ValueError(\"spacing must be > 0, given %d\" % spacing)\n\n if decimals < 0:\n raise ValueError(\"decimals must be >= 0, given %d\" % decimals)\n\n if isinstance(decision_tree, DecisionTreeClassifier):\n value_fmt = \"{}{} weights: {}\\n\"\n if not show_weights:\n value_fmt = \"{}{}{}\\n\"\n else:\n value_fmt = \"{}{} value: {}\\n\"\n\n if feature_names:\n feature_names_ = [feature_names[i] if i != _tree.TREE_UNDEFINED\n else None for i in tree_.feature]\n else:\n feature_names_ = [\"feature_{}\".format(i) for i in tree_.feature]\n\n export_text.report = \"\"\n\n def _add_leaf(value, class_name, indent):\n val = ''\n is_classification = isinstance(decision_tree,\n DecisionTreeClassifier)\n if show_weights or not is_classification:\n val = [\"{1:.{0}f}, \".format(decimals, v) for v in value]\n val = '['+''.join(val)[:-2]+']'\n if is_classification:\n val += ' class: ' + str(class_name)\n export_text.report += value_fmt.format(indent, '', val)\n\n def print_tree_recurse(node, depth):\n indent = (\"|\" + (\" \" * spacing)) * depth\n indent = indent[:-spacing] + \"-\" * spacing\n\n value = None\n if tree_.n_outputs == 1:\n value = tree_.value[node][0]\n else:\n value = tree_.value[node].T[0]\n class_name = np.argmax(value)\n\n if (tree_.n_classes[0] != 1 and\n tree_.n_outputs == 1):\n class_name = class_names[class_name]\n\n if depth <= max_depth+1:\n info_fmt = \"\"\n info_fmt_left = info_fmt\n info_fmt_right = info_fmt\n\n if tree_.feature[node] != _tree.TREE_UNDEFINED:\n name = feature_names_[node]\n threshold = tree_.threshold[node]\n threshold = \"{1:.{0}f}\".format(decimals, threshold)\n export_text.report += right_child_fmt.format(indent,\n name,\n threshold)\n export_text.report += info_fmt_left\n print_tree_recurse(tree_.children_left[node], depth+1)\n\n export_text.report += left_child_fmt.format(indent,\n name,\n threshold)\n export_text.report += info_fmt_right\n print_tree_recurse(tree_.children_right[node], depth+1)\n else: # leaf\n _add_leaf(value, class_name, indent)\n else:\n subtree_depth = _compute_depth(tree_, node)\n if subtree_depth == 1:\n _add_leaf(value, class_name, indent)\n else:\n trunc_report = 'truncated branch of depth %d' % subtree_depth\n export_text.report += truncation_fmt.format(indent,\n trunc_report)\n\n print_tree_recurse(0, 1)\n return export_text.report" + }, + { + "id": "scikit-learn/sklearn.tree._export/plot_tree", + "name": "plot_tree", + "qname": "sklearn.tree._export.plot_tree", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._export/plot_tree/decision_tree", + "name": "decision_tree", + "qname": "sklearn.tree._export.plot_tree.decision_tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "decision tree regressor or classifier", + "default_value": "", + "description": "The decision tree to be plotted." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "decision tree regressor" + }, + { + "kind": "NamedType", + "name": "classifier" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._export/plot_tree/max_depth", + "name": "max_depth", + "qname": "sklearn.tree._export.plot_tree.max_depth", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The maximum depth of the representation. If None, the tree is fully\ngenerated." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/plot_tree/feature_names", + "name": "feature_names", + "qname": "sklearn.tree._export.plot_tree.feature_names", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "list of strings", + "default_value": "None", + "description": "Names of each of the features.\nIf None, generic names will be used (\"X[0]\", \"X[1]\", ...)." + }, + "type": { + "kind": "NamedType", + "name": "list of strings" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/plot_tree/class_names", + "name": "class_names", + "qname": "sklearn.tree._export.plot_tree.class_names", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "list of str or bool", + "default_value": "None", + "description": "Names of each of the target classes in ascending numerical order.\nOnly relevant for classification and not supported for multi-output.\nIf ``True``, shows a symbolic representation of the class name." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "list of str" + }, + { + "kind": "NamedType", + "name": "bool" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.tree._export/plot_tree/label", + "name": "label", + "qname": "sklearn.tree._export.plot_tree.label", + "default_value": "'all'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "{'all', 'root', 'none'}", + "default_value": "'all'", + "description": "Whether to show informative labels for impurity, etc.\nOptions include 'all' to show at every node, 'root' to show only at\nthe top root node, or 'none' to not show at any node." + }, + "type": { + "kind": "EnumType", + "values": ["all", "root", "none"] + } + }, + { + "id": "scikit-learn/sklearn.tree._export/plot_tree/filled", + "name": "filled", + "qname": "sklearn.tree._export.plot_tree.filled", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, paint nodes to indicate majority class for\nclassification, extremity of values for regression, or purity of node\nfor multi-output." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/plot_tree/impurity", + "name": "impurity", + "qname": "sklearn.tree._export.plot_tree.impurity", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "When set to ``True``, show the impurity at each node." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/plot_tree/node_ids", + "name": "node_ids", + "qname": "sklearn.tree._export.plot_tree.node_ids", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, show the ID number on each node." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/plot_tree/proportion", + "name": "proportion", + "qname": "sklearn.tree._export.plot_tree.proportion", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, change the display of 'values' and/or 'samples'\nto be proportions and percentages respectively." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/plot_tree/rotate", + "name": "rotate", + "qname": "sklearn.tree._export.plot_tree.rotate", + "default_value": "'deprecated'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "This parameter has no effect on the matplotlib tree visualisation and\nit is kept here for backward compatibility.\n\n.. deprecated:: 0.23\n ``rotate`` is deprecated in 0.23 and will be removed in 1.0\n (renaming of 0.25)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/plot_tree/rounded", + "name": "rounded", + "qname": "sklearn.tree._export.plot_tree.rounded", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When set to ``True``, draw node boxes with rounded corners and use\nHelvetica fonts instead of Times-Roman." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/plot_tree/precision", + "name": "precision", + "qname": "sklearn.tree._export.plot_tree.precision", + "default_value": "3", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "3", + "description": "Number of digits of precision for floating point in the values of\nimpurity, threshold and value attributes of each node." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/plot_tree/ax", + "name": "ax", + "qname": "sklearn.tree._export.plot_tree.ax", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "matplotlib axis", + "default_value": "None", + "description": "Axes to plot to. If None, use current axis. Any previous content\nis cleared." + }, + "type": { + "kind": "NamedType", + "name": "matplotlib axis" + } + }, + { + "id": "scikit-learn/sklearn.tree._export/plot_tree/fontsize", + "name": "fontsize", + "qname": "sklearn.tree._export.plot_tree.fontsize", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Size of text font. If None, determined automatically to fit figure." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Plot a decision tree.\n\nThe sample counts that are shown are weighted with any sample_weights that\nmight be present.\n\nThe visualization is fit automatically to the size of the axis.\nUse the ``figsize`` or ``dpi`` arguments of ``plt.figure`` to control\nthe size of the rendering.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.21", + "docstring": "Plot a decision tree.\n\nThe sample counts that are shown are weighted with any sample_weights that\nmight be present.\n\nThe visualization is fit automatically to the size of the axis.\nUse the ``figsize`` or ``dpi`` arguments of ``plt.figure`` to control\nthe size of the rendering.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.21\n\nParameters\n----------\ndecision_tree : decision tree regressor or classifier\n The decision tree to be plotted.\n\nmax_depth : int, default=None\n The maximum depth of the representation. If None, the tree is fully\n generated.\n\nfeature_names : list of strings, default=None\n Names of each of the features.\n If None, generic names will be used (\"X[0]\", \"X[1]\", ...).\n\nclass_names : list of str or bool, default=None\n Names of each of the target classes in ascending numerical order.\n Only relevant for classification and not supported for multi-output.\n If ``True``, shows a symbolic representation of the class name.\n\nlabel : {'all', 'root', 'none'}, default='all'\n Whether to show informative labels for impurity, etc.\n Options include 'all' to show at every node, 'root' to show only at\n the top root node, or 'none' to not show at any node.\n\nfilled : bool, default=False\n When set to ``True``, paint nodes to indicate majority class for\n classification, extremity of values for regression, or purity of node\n for multi-output.\n\nimpurity : bool, default=True\n When set to ``True``, show the impurity at each node.\n\nnode_ids : bool, default=False\n When set to ``True``, show the ID number on each node.\n\nproportion : bool, default=False\n When set to ``True``, change the display of 'values' and/or 'samples'\n to be proportions and percentages respectively.\n\nrotate : bool, default=False\n This parameter has no effect on the matplotlib tree visualisation and\n it is kept here for backward compatibility.\n\n .. deprecated:: 0.23\n ``rotate`` is deprecated in 0.23 and will be removed in 1.0\n (renaming of 0.25).\n\nrounded : bool, default=False\n When set to ``True``, draw node boxes with rounded corners and use\n Helvetica fonts instead of Times-Roman.\n\nprecision : int, default=3\n Number of digits of precision for floating point in the values of\n impurity, threshold and value attributes of each node.\n\nax : matplotlib axis, default=None\n Axes to plot to. If None, use current axis. Any previous content\n is cleared.\n\nfontsize : int, default=None\n Size of text font. If None, determined automatically to fit figure.\n\nReturns\n-------\nannotations : list of artists\n List containing the artists for the annotation boxes making up the\n tree.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn import tree\n\n>>> clf = tree.DecisionTreeClassifier(random_state=0)\n>>> iris = load_iris()\n\n>>> clf = clf.fit(iris.data, iris.target)\n>>> tree.plot_tree(clf) # doctest: +SKIP\n[Text(251.5,345.217,'X[3] <= 0.8...", + "code": "@_deprecate_positional_args\ndef plot_tree(decision_tree, *, max_depth=None, feature_names=None,\n class_names=None, label='all', filled=False,\n impurity=True, node_ids=False,\n proportion=False, rotate='deprecated', rounded=False,\n precision=3, ax=None, fontsize=None):\n \"\"\"Plot a decision tree.\n\n The sample counts that are shown are weighted with any sample_weights that\n might be present.\n\n The visualization is fit automatically to the size of the axis.\n Use the ``figsize`` or ``dpi`` arguments of ``plt.figure`` to control\n the size of the rendering.\n\n Read more in the :ref:`User Guide `.\n\n .. versionadded:: 0.21\n\n Parameters\n ----------\n decision_tree : decision tree regressor or classifier\n The decision tree to be plotted.\n\n max_depth : int, default=None\n The maximum depth of the representation. If None, the tree is fully\n generated.\n\n feature_names : list of strings, default=None\n Names of each of the features.\n If None, generic names will be used (\"X[0]\", \"X[1]\", ...).\n\n class_names : list of str or bool, default=None\n Names of each of the target classes in ascending numerical order.\n Only relevant for classification and not supported for multi-output.\n If ``True``, shows a symbolic representation of the class name.\n\n label : {'all', 'root', 'none'}, default='all'\n Whether to show informative labels for impurity, etc.\n Options include 'all' to show at every node, 'root' to show only at\n the top root node, or 'none' to not show at any node.\n\n filled : bool, default=False\n When set to ``True``, paint nodes to indicate majority class for\n classification, extremity of values for regression, or purity of node\n for multi-output.\n\n impurity : bool, default=True\n When set to ``True``, show the impurity at each node.\n\n node_ids : bool, default=False\n When set to ``True``, show the ID number on each node.\n\n proportion : bool, default=False\n When set to ``True``, change the display of 'values' and/or 'samples'\n to be proportions and percentages respectively.\n\n rotate : bool, default=False\n This parameter has no effect on the matplotlib tree visualisation and\n it is kept here for backward compatibility.\n\n .. deprecated:: 0.23\n ``rotate`` is deprecated in 0.23 and will be removed in 1.0\n (renaming of 0.25).\n\n rounded : bool, default=False\n When set to ``True``, draw node boxes with rounded corners and use\n Helvetica fonts instead of Times-Roman.\n\n precision : int, default=3\n Number of digits of precision for floating point in the values of\n impurity, threshold and value attributes of each node.\n\n ax : matplotlib axis, default=None\n Axes to plot to. If None, use current axis. Any previous content\n is cleared.\n\n fontsize : int, default=None\n Size of text font. If None, determined automatically to fit figure.\n\n Returns\n -------\n annotations : list of artists\n List containing the artists for the annotation boxes making up the\n tree.\n\n Examples\n --------\n >>> from sklearn.datasets import load_iris\n >>> from sklearn import tree\n\n >>> clf = tree.DecisionTreeClassifier(random_state=0)\n >>> iris = load_iris()\n\n >>> clf = clf.fit(iris.data, iris.target)\n >>> tree.plot_tree(clf) # doctest: +SKIP\n [Text(251.5,345.217,'X[3] <= 0.8...\n\n \"\"\"\n\n check_is_fitted(decision_tree)\n\n if rotate != 'deprecated':\n warnings.warn((\"'rotate' has no effect and is deprecated in 0.23. \"\n \"It will be removed in 1.0 (renaming of 0.25).\"),\n FutureWarning)\n\n exporter = _MPLTreeExporter(\n max_depth=max_depth, feature_names=feature_names,\n class_names=class_names, label=label, filled=filled,\n impurity=impurity, node_ids=node_ids,\n proportion=proportion, rotate=rotate, rounded=rounded,\n precision=precision, fontsize=fontsize)\n return exporter.export(decision_tree, ax=ax)" + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/__init__", + "name": "__init__", + "qname": "sklearn.tree._reingold_tilford.DrawTree.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/__init__/self", + "name": "self", + "qname": "sklearn.tree._reingold_tilford.DrawTree.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/__init__/tree", + "name": "tree", + "qname": "sklearn.tree._reingold_tilford.DrawTree.__init__.tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/__init__/parent", + "name": "parent", + "qname": "sklearn.tree._reingold_tilford.DrawTree.__init__.parent", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/__init__/depth", + "name": "depth", + "qname": "sklearn.tree._reingold_tilford.DrawTree.__init__.depth", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/__init__/number", + "name": "number", + "qname": "sklearn.tree._reingold_tilford.DrawTree.__init__.number", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __init__(self, tree, parent=None, depth=0, number=1):\n self.x = -1.\n self.y = depth\n self.tree = tree\n self.children = [DrawTree(c, self, depth + 1, i + 1)\n for i, c\n in enumerate(tree.children)]\n self.parent = parent\n self.thread = None\n self.mod = 0\n self.ancestor = self\n self.change = self.shift = 0\n self._lmost_sibling = None\n # this is the number of the node in its group of siblings 1..n\n self.number = number" + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/__repr__", + "name": "__repr__", + "qname": "sklearn.tree._reingold_tilford.DrawTree.__repr__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/__repr__/self", + "name": "self", + "qname": "sklearn.tree._reingold_tilford.DrawTree.__repr__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __repr__(self):\n return self.__str__()" + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/__str__", + "name": "__str__", + "qname": "sklearn.tree._reingold_tilford.DrawTree.__str__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/__str__/self", + "name": "self", + "qname": "sklearn.tree._reingold_tilford.DrawTree.__str__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __str__(self):\n return \"%s: x=%s mod=%s\" % (self.tree, self.x, self.mod)" + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/get_lmost_sibling", + "name": "get_lmost_sibling", + "qname": "sklearn.tree._reingold_tilford.DrawTree.get_lmost_sibling", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/get_lmost_sibling/self", + "name": "self", + "qname": "sklearn.tree._reingold_tilford.DrawTree.get_lmost_sibling.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def get_lmost_sibling(self):\n if not self._lmost_sibling and self.parent and self != \\\n self.parent.children[0]:\n self._lmost_sibling = self.parent.children[0]\n return self._lmost_sibling" + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/lbrother", + "name": "lbrother", + "qname": "sklearn.tree._reingold_tilford.DrawTree.lbrother", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/lbrother/self", + "name": "self", + "qname": "sklearn.tree._reingold_tilford.DrawTree.lbrother.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def lbrother(self):\n n = None\n if self.parent:\n for node in self.parent.children:\n if node == self:\n return n\n else:\n n = node\n return n" + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/left", + "name": "left", + "qname": "sklearn.tree._reingold_tilford.DrawTree.left", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/left/self", + "name": "self", + "qname": "sklearn.tree._reingold_tilford.DrawTree.left.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def left(self):\n return self.thread or len(self.children) and self.children[0]" + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/max_extents", + "name": "max_extents", + "qname": "sklearn.tree._reingold_tilford.DrawTree.max_extents", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/max_extents/self", + "name": "self", + "qname": "sklearn.tree._reingold_tilford.DrawTree.max_extents.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def max_extents(self):\n extents = [c.max_extents() for c in self. children]\n extents.append((self.x, self.y))\n return np.max(extents, axis=0)" + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/right", + "name": "right", + "qname": "sklearn.tree._reingold_tilford.DrawTree.right", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/DrawTree/right/self", + "name": "self", + "qname": "sklearn.tree._reingold_tilford.DrawTree.right.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def right(self):\n return self.thread or len(self.children) and self.children[-1]" + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/Tree/__init__", + "name": "__init__", + "qname": "sklearn.tree._reingold_tilford.Tree.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/Tree/__init__/self", + "name": "self", + "qname": "sklearn.tree._reingold_tilford.Tree.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/Tree/__init__/label", + "name": "label", + "qname": "sklearn.tree._reingold_tilford.Tree.__init__.label", + "default_value": "''", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/Tree/__init__/node_id", + "name": "node_id", + "qname": "sklearn.tree._reingold_tilford.Tree.__init__.node_id", + "default_value": "-1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/Tree/__init__/children", + "name": "children", + "qname": "sklearn.tree._reingold_tilford.Tree.__init__.children", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __init__(self, label=\"\", node_id=-1, *children):\n self.label = label\n self.node_id = node_id\n if children:\n self.children = children\n else:\n self.children = []" + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/ancestor", + "name": "ancestor", + "qname": "sklearn.tree._reingold_tilford.ancestor", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/ancestor/vil", + "name": "vil", + "qname": "sklearn.tree._reingold_tilford.ancestor.vil", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/ancestor/v", + "name": "v", + "qname": "sklearn.tree._reingold_tilford.ancestor.v", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/ancestor/default_ancestor", + "name": "default_ancestor", + "qname": "sklearn.tree._reingold_tilford.ancestor.default_ancestor", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def ancestor(vil, v, default_ancestor):\n # the relevant text is at the bottom of page 7 of\n # \"Improving Walker's Algorithm to Run in Linear Time\" by Buchheim et al,\n # (2002)\n # http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.16.8757&rep=rep1&type=pdf\n if vil.ancestor in v.parent.children:\n return vil.ancestor\n else:\n return default_ancestor" + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/apportion", + "name": "apportion", + "qname": "sklearn.tree._reingold_tilford.apportion", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/apportion/v", + "name": "v", + "qname": "sklearn.tree._reingold_tilford.apportion.v", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/apportion/default_ancestor", + "name": "default_ancestor", + "qname": "sklearn.tree._reingold_tilford.apportion.default_ancestor", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/apportion/distance", + "name": "distance", + "qname": "sklearn.tree._reingold_tilford.apportion.distance", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def apportion(v, default_ancestor, distance):\n w = v.lbrother()\n if w is not None:\n # in buchheim notation:\n # i == inner; o == outer; r == right; l == left; r = +; l = -\n vir = vor = v\n vil = w\n vol = v.lmost_sibling\n sir = sor = v.mod\n sil = vil.mod\n sol = vol.mod\n while vil.right() and vir.left():\n vil = vil.right()\n vir = vir.left()\n vol = vol.left()\n vor = vor.right()\n vor.ancestor = v\n shift = (vil.x + sil) - (vir.x + sir) + distance\n if shift > 0:\n move_subtree(ancestor(vil, v, default_ancestor), v, shift)\n sir = sir + shift\n sor = sor + shift\n sil += vil.mod\n sir += vir.mod\n sol += vol.mod\n sor += vor.mod\n if vil.right() and not vor.right():\n vor.thread = vil.right()\n vor.mod += sil - sor\n else:\n if vir.left() and not vol.left():\n vol.thread = vir.left()\n vol.mod += sir - sol\n default_ancestor = v\n return default_ancestor" + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/buchheim", + "name": "buchheim", + "qname": "sklearn.tree._reingold_tilford.buchheim", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/buchheim/tree", + "name": "tree", + "qname": "sklearn.tree._reingold_tilford.buchheim.tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def buchheim(tree):\n dt = first_walk(DrawTree(tree))\n min = second_walk(dt)\n if min < 0:\n third_walk(dt, -min)\n return dt" + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/execute_shifts", + "name": "execute_shifts", + "qname": "sklearn.tree._reingold_tilford.execute_shifts", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/execute_shifts/v", + "name": "v", + "qname": "sklearn.tree._reingold_tilford.execute_shifts.v", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def execute_shifts(v):\n shift = change = 0\n for w in v.children[::-1]:\n # print(\"shift:\", w, shift, w.change)\n w.x += shift\n w.mod += shift\n change += w.change\n shift += w.shift + change" + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/first_walk", + "name": "first_walk", + "qname": "sklearn.tree._reingold_tilford.first_walk", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/first_walk/v", + "name": "v", + "qname": "sklearn.tree._reingold_tilford.first_walk.v", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/first_walk/distance", + "name": "distance", + "qname": "sklearn.tree._reingold_tilford.first_walk.distance", + "default_value": "1.0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def first_walk(v, distance=1.):\n if len(v.children) == 0:\n if v.lmost_sibling:\n v.x = v.lbrother().x + distance\n else:\n v.x = 0.\n else:\n default_ancestor = v.children[0]\n for w in v.children:\n first_walk(w)\n default_ancestor = apportion(w, default_ancestor, distance)\n # print(\"finished v =\", v.tree, \"children\")\n execute_shifts(v)\n\n midpoint = (v.children[0].x + v.children[-1].x) / 2\n\n w = v.lbrother()\n if w:\n v.x = w.x + distance\n v.mod = v.x - midpoint\n else:\n v.x = midpoint\n return v" + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/move_subtree", + "name": "move_subtree", + "qname": "sklearn.tree._reingold_tilford.move_subtree", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/move_subtree/wl", + "name": "wl", + "qname": "sklearn.tree._reingold_tilford.move_subtree.wl", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/move_subtree/wr", + "name": "wr", + "qname": "sklearn.tree._reingold_tilford.move_subtree.wr", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/move_subtree/shift", + "name": "shift", + "qname": "sklearn.tree._reingold_tilford.move_subtree.shift", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def move_subtree(wl, wr, shift):\n subtrees = wr.number - wl.number\n # print(wl.tree, \"is conflicted with\", wr.tree, 'moving', subtrees,\n # 'shift', shift)\n # print wl, wr, wr.number, wl.number, shift, subtrees, shift/subtrees\n wr.change -= shift / subtrees\n wr.shift += shift\n wl.change += shift / subtrees\n wr.x += shift\n wr.mod += shift" + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/second_walk", + "name": "second_walk", + "qname": "sklearn.tree._reingold_tilford.second_walk", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/second_walk/v", + "name": "v", + "qname": "sklearn.tree._reingold_tilford.second_walk.v", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/second_walk/m", + "name": "m", + "qname": "sklearn.tree._reingold_tilford.second_walk.m", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/second_walk/depth", + "name": "depth", + "qname": "sklearn.tree._reingold_tilford.second_walk.depth", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/second_walk/min", + "name": "min", + "qname": "sklearn.tree._reingold_tilford.second_walk.min", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def second_walk(v, m=0, depth=0, min=None):\n v.x += m\n v.y = depth\n\n if min is None or v.x < min:\n min = v.x\n\n for w in v.children:\n min = second_walk(w, m + v.mod, depth + 1, min)\n\n return min" + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/third_walk", + "name": "third_walk", + "qname": "sklearn.tree._reingold_tilford.third_walk", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/third_walk/tree", + "name": "tree", + "qname": "sklearn.tree._reingold_tilford.third_walk.tree", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree._reingold_tilford/third_walk/n", + "name": "n", + "qname": "sklearn.tree._reingold_tilford.third_walk.n", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def third_walk(tree, n):\n tree.x += n\n for c in tree.children:\n third_walk(c, n)" + }, + { + "id": "scikit-learn/sklearn.tree.setup/configuration", + "name": "configuration", + "qname": "sklearn.tree.setup.configuration", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.tree.setup/configuration/parent_package", + "name": "parent_package", + "qname": "sklearn.tree.setup.configuration.parent_package", + "default_value": "''", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.tree.setup/configuration/top_path", + "name": "top_path", + "qname": "sklearn.tree.setup.configuration.top_path", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def configuration(parent_package=\"\", top_path=None):\n config = Configuration(\"tree\", parent_package, top_path)\n libraries = []\n if os.name == 'posix':\n libraries.append('m')\n config.add_extension(\"_tree\",\n sources=[\"_tree.pyx\"],\n include_dirs=[numpy.get_include()],\n libraries=libraries,\n extra_compile_args=[\"-O3\"])\n config.add_extension(\"_splitter\",\n sources=[\"_splitter.pyx\"],\n include_dirs=[numpy.get_include()],\n libraries=libraries,\n extra_compile_args=[\"-O3\"])\n config.add_extension(\"_criterion\",\n sources=[\"_criterion.pyx\"],\n include_dirs=[numpy.get_include()],\n libraries=libraries,\n extra_compile_args=[\"-O3\"])\n config.add_extension(\"_utils\",\n sources=[\"_utils.pyx\"],\n include_dirs=[numpy.get_include()],\n libraries=libraries,\n extra_compile_args=[\"-O3\"])\n\n config.add_subpackage(\"tests\")\n\n return config" + }, + { + "id": "scikit-learn/sklearn.utils._arpack/_init_arpack_v0", + "name": "_init_arpack_v0", + "qname": "sklearn.utils._arpack._init_arpack_v0", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._arpack/_init_arpack_v0/size", + "name": "size", + "qname": "sklearn.utils._arpack._init_arpack_v0.size", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "The size of the eigenvalue vector to be initialized." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils._arpack/_init_arpack_v0/random_state", + "name": "random_state", + "qname": "sklearn.utils._arpack._init_arpack_v0.random_state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "The seed of the pseudo random number generator used to generate a\nuniform distribution. If int, random_state is the seed used by the\nrandom number generator; If RandomState instance, random_state is the\nrandom number generator; If None, the random number generator is the\nRandomState instance used by `np.random`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Initialize the starting vector for iteration in ARPACK functions.\n\nInitialize a ndarray with values sampled from the uniform distribution on\n[-1, 1]. This initialization model has been chosen to be consistent with\nthe ARPACK one as another initialization can lead to convergence issues.", + "docstring": "Initialize the starting vector for iteration in ARPACK functions.\n\nInitialize a ndarray with values sampled from the uniform distribution on\n[-1, 1]. This initialization model has been chosen to be consistent with\nthe ARPACK one as another initialization can lead to convergence issues.\n\nParameters\n----------\nsize : int\n The size of the eigenvalue vector to be initialized.\n\nrandom_state : int, RandomState instance or None, default=None\n The seed of the pseudo random number generator used to generate a\n uniform distribution. If int, random_state is the seed used by the\n random number generator; If RandomState instance, random_state is the\n random number generator; If None, the random number generator is the\n RandomState instance used by `np.random`.\n\nReturns\n-------\nv0 : ndarray of shape (size,)\n The initialized vector.", + "code": "def _init_arpack_v0(size, random_state):\n \"\"\"Initialize the starting vector for iteration in ARPACK functions.\n\n Initialize a ndarray with values sampled from the uniform distribution on\n [-1, 1]. This initialization model has been chosen to be consistent with\n the ARPACK one as another initialization can lead to convergence issues.\n\n Parameters\n ----------\n size : int\n The size of the eigenvalue vector to be initialized.\n\n random_state : int, RandomState instance or None, default=None\n The seed of the pseudo random number generator used to generate a\n uniform distribution. If int, random_state is the seed used by the\n random number generator; If RandomState instance, random_state is the\n random number generator; If None, the random number generator is the\n RandomState instance used by `np.random`.\n\n Returns\n -------\n v0 : ndarray of shape (size,)\n The initialized vector.\n \"\"\"\n random_state = check_random_state(random_state)\n v0 = random_state.uniform(-1, 1, size)\n return v0" + }, + { + "id": "scikit-learn/sklearn.utils._encode/MissingValues/to_list", + "name": "to_list", + "qname": "sklearn.utils._encode.MissingValues.to_list", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._encode/MissingValues/to_list/self", + "name": "self", + "qname": "sklearn.utils._encode.MissingValues.to_list.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Convert tuple to a list where None is always first.", + "docstring": "Convert tuple to a list where None is always first.", + "code": " def to_list(self):\n \"\"\"Convert tuple to a list where None is always first.\"\"\"\n output = []\n if self.none:\n output.append(None)\n if self.nan:\n output.append(np.nan)\n return output" + }, + { + "id": "scikit-learn/sklearn.utils._encode/_check_unknown", + "name": "_check_unknown", + "qname": "sklearn.utils._encode._check_unknown", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._encode/_check_unknown/values", + "name": "values", + "qname": "sklearn.utils._encode._check_unknown.values", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array", + "default_value": "", + "description": "Values to check for unknowns." + }, + "type": { + "kind": "NamedType", + "name": "array" + } + }, + { + "id": "scikit-learn/sklearn.utils._encode/_check_unknown/known_values", + "name": "known_values", + "qname": "sklearn.utils._encode._check_unknown.known_values", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array", + "default_value": "", + "description": "Known values. Must be unique." + }, + "type": { + "kind": "NamedType", + "name": "array" + } + }, + { + "id": "scikit-learn/sklearn.utils._encode/_check_unknown/return_mask", + "name": "return_mask", + "qname": "sklearn.utils._encode._check_unknown.return_mask", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, return a mask of the same shape as `values` indicating\nthe valid values." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Helper function to check for unknowns in values to be encoded.\n\nUses pure python method for object dtype, and numpy method for\nall other dtypes.", + "docstring": "Helper function to check for unknowns in values to be encoded.\n\nUses pure python method for object dtype, and numpy method for\nall other dtypes.\n\nParameters\n----------\nvalues : array\n Values to check for unknowns.\nknown_values : array\n Known values. Must be unique.\nreturn_mask : bool, default=False\n If True, return a mask of the same shape as `values` indicating\n the valid values.\n\nReturns\n-------\ndiff : list\n The unique values present in `values` and not in `know_values`.\nvalid_mask : boolean array\n Additionally returned if ``return_mask=True``.", + "code": "def _check_unknown(values, known_values, return_mask=False):\n \"\"\"\n Helper function to check for unknowns in values to be encoded.\n\n Uses pure python method for object dtype, and numpy method for\n all other dtypes.\n\n Parameters\n ----------\n values : array\n Values to check for unknowns.\n known_values : array\n Known values. Must be unique.\n return_mask : bool, default=False\n If True, return a mask of the same shape as `values` indicating\n the valid values.\n\n Returns\n -------\n diff : list\n The unique values present in `values` and not in `know_values`.\n valid_mask : boolean array\n Additionally returned if ``return_mask=True``.\n\n \"\"\"\n valid_mask = None\n\n if values.dtype.kind in 'OUS':\n values_set = set(values)\n values_set, missing_in_values = _extract_missing(values_set)\n\n uniques_set = set(known_values)\n uniques_set, missing_in_uniques = _extract_missing(uniques_set)\n diff = values_set - uniques_set\n\n nan_in_diff = missing_in_values.nan and not missing_in_uniques.nan\n none_in_diff = missing_in_values.none and not missing_in_uniques.none\n\n def is_valid(value):\n return (value in uniques_set or\n missing_in_uniques.none and value is None or\n missing_in_uniques.nan and is_scalar_nan(value))\n\n if return_mask:\n if diff or nan_in_diff or none_in_diff:\n valid_mask = np.array([is_valid(value) for value in values])\n else:\n valid_mask = np.ones(len(values), dtype=bool)\n\n diff = list(diff)\n if none_in_diff:\n diff.append(None)\n if nan_in_diff:\n diff.append(np.nan)\n else:\n unique_values = np.unique(values)\n diff = np.setdiff1d(unique_values, known_values,\n assume_unique=True)\n if return_mask:\n if diff.size:\n valid_mask = np.in1d(values, known_values)\n else:\n valid_mask = np.ones(len(values), dtype=bool)\n\n # check for nans in the known_values\n if np.isnan(known_values).any():\n diff_is_nan = np.isnan(diff)\n if diff_is_nan.any():\n # removes nan from valid_mask\n if diff.size and return_mask:\n is_nan = np.isnan(values)\n valid_mask[is_nan] = 1\n\n # remove nan from diff\n diff = diff[~diff_is_nan]\n diff = list(diff)\n\n if return_mask:\n return diff, valid_mask\n return diff" + }, + { + "id": "scikit-learn/sklearn.utils._encode/_encode", + "name": "_encode", + "qname": "sklearn.utils._encode._encode", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._encode/_encode/values", + "name": "values", + "qname": "sklearn.utils._encode._encode.values", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "Values to encode." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.utils._encode/_encode/uniques", + "name": "uniques", + "qname": "sklearn.utils._encode._encode.uniques", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "The unique values in `values`. If the dtype is not object, then\n`uniques` needs to be sorted." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.utils._encode/_encode/check_unknown", + "name": "check_unknown", + "qname": "sklearn.utils._encode._encode.check_unknown", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, check for values in `values` that are not in `unique`\nand raise an error. This is ignored for object dtype, and treated as\nTrue in this case. This parameter is useful for\n_BaseEncoder._transform() to avoid calling _check_unknown()\ntwice." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Helper function to encode values into [0, n_uniques - 1].\n\nUses pure python method for object dtype, and numpy method for\nall other dtypes.\nThe numpy method has the limitation that the `uniques` need to\nbe sorted. Importantly, this is not checked but assumed to already be\nthe case. The calling method needs to ensure this for all non-object\nvalues.", + "docstring": "Helper function to encode values into [0, n_uniques - 1].\n\nUses pure python method for object dtype, and numpy method for\nall other dtypes.\nThe numpy method has the limitation that the `uniques` need to\nbe sorted. Importantly, this is not checked but assumed to already be\nthe case. The calling method needs to ensure this for all non-object\nvalues.\n\nParameters\n----------\nvalues : ndarray\n Values to encode.\nuniques : ndarray\n The unique values in `values`. If the dtype is not object, then\n `uniques` needs to be sorted.\ncheck_unknown : bool, default=True\n If True, check for values in `values` that are not in `unique`\n and raise an error. This is ignored for object dtype, and treated as\n True in this case. This parameter is useful for\n _BaseEncoder._transform() to avoid calling _check_unknown()\n twice.\n\nReturns\n-------\nencoded : ndarray\n Encoded values", + "code": "def _encode(values, *, uniques, check_unknown=True):\n \"\"\"Helper function to encode values into [0, n_uniques - 1].\n\n Uses pure python method for object dtype, and numpy method for\n all other dtypes.\n The numpy method has the limitation that the `uniques` need to\n be sorted. Importantly, this is not checked but assumed to already be\n the case. The calling method needs to ensure this for all non-object\n values.\n\n Parameters\n ----------\n values : ndarray\n Values to encode.\n uniques : ndarray\n The unique values in `values`. If the dtype is not object, then\n `uniques` needs to be sorted.\n check_unknown : bool, default=True\n If True, check for values in `values` that are not in `unique`\n and raise an error. This is ignored for object dtype, and treated as\n True in this case. This parameter is useful for\n _BaseEncoder._transform() to avoid calling _check_unknown()\n twice.\n\n Returns\n -------\n encoded : ndarray\n Encoded values\n \"\"\"\n if values.dtype.kind in 'OUS':\n try:\n return _map_to_integer(values, uniques)\n except KeyError as e:\n raise ValueError(f\"y contains previously unseen labels: {str(e)}\")\n else:\n if check_unknown:\n diff = _check_unknown(values, uniques)\n if diff:\n raise ValueError(f\"y contains previously unseen labels: \"\n f\"{str(diff)}\")\n return np.searchsorted(uniques, values)" + }, + { + "id": "scikit-learn/sklearn.utils._encode/_extract_missing", + "name": "_extract_missing", + "qname": "sklearn.utils._encode._extract_missing", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._encode/_extract_missing/values", + "name": "values", + "qname": "sklearn.utils._encode._extract_missing.values", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Extract missing values from `values`.", + "docstring": "Extract missing values from `values`.\n\nParameters\n----------\nvalues: set\n Set of values to extract missing from.\n\nReturns\n-------\noutput: set\n Set with missing values extracted.\n\nmissing_values: MissingValues\n Object with missing value information.", + "code": "def _extract_missing(values):\n \"\"\"Extract missing values from `values`.\n\n Parameters\n ----------\n values: set\n Set of values to extract missing from.\n\n Returns\n -------\n output: set\n Set with missing values extracted.\n\n missing_values: MissingValues\n Object with missing value information.\n \"\"\"\n missing_values_set = {value for value in values\n if value is None or is_scalar_nan(value)}\n\n if not missing_values_set:\n return values, MissingValues(nan=False, none=False)\n\n if None in missing_values_set:\n if len(missing_values_set) == 1:\n output_missing_values = MissingValues(nan=False, none=True)\n else:\n # If there is more than one missing value, then it has to be\n # float('nan') or np.nan\n output_missing_values = MissingValues(nan=True, none=True)\n else:\n output_missing_values = MissingValues(nan=True, none=False)\n\n # create set without the missing values\n output = values - missing_values_set\n return output, output_missing_values" + }, + { + "id": "scikit-learn/sklearn.utils._encode/_map_to_integer", + "name": "_map_to_integer", + "qname": "sklearn.utils._encode._map_to_integer", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._encode/_map_to_integer/values", + "name": "values", + "qname": "sklearn.utils._encode._map_to_integer.values", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._encode/_map_to_integer/uniques", + "name": "uniques", + "qname": "sklearn.utils._encode._map_to_integer.uniques", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Map values based on its position in uniques.", + "docstring": "Map values based on its position in uniques.", + "code": "def _map_to_integer(values, uniques):\n \"\"\"Map values based on its position in uniques.\"\"\"\n table = _nandict({val: i for i, val in enumerate(uniques)})\n return np.array([table[v] for v in values])" + }, + { + "id": "scikit-learn/sklearn.utils._encode/_nandict/__init__", + "name": "__init__", + "qname": "sklearn.utils._encode._nandict.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._encode/_nandict/__init__/self", + "name": "self", + "qname": "sklearn.utils._encode._nandict.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._encode/_nandict/__init__/mapping", + "name": "mapping", + "qname": "sklearn.utils._encode._nandict.__init__.mapping", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Dictionary with support for nans.", + "docstring": "", + "code": " def __init__(self, mapping):\n super().__init__(mapping)\n for key, value in mapping.items():\n if is_scalar_nan(key):\n self.nan_value = value\n break" + }, + { + "id": "scikit-learn/sklearn.utils._encode/_nandict/__missing__", + "name": "__missing__", + "qname": "sklearn.utils._encode._nandict.__missing__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._encode/_nandict/__missing__/self", + "name": "self", + "qname": "sklearn.utils._encode._nandict.__missing__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._encode/_nandict/__missing__/key", + "name": "key", + "qname": "sklearn.utils._encode._nandict.__missing__.key", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __missing__(self, key):\n if hasattr(self, 'nan_value') and is_scalar_nan(key):\n return self.nan_value\n raise KeyError(key)" + }, + { + "id": "scikit-learn/sklearn.utils._encode/_unique", + "name": "_unique", + "qname": "sklearn.utils._encode._unique", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._encode/_unique/values", + "name": "values", + "qname": "sklearn.utils._encode._unique.values", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "Values to check for unknowns." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.utils._encode/_unique/return_inverse", + "name": "return_inverse", + "qname": "sklearn.utils._encode._unique.return_inverse", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, also return the indices of the unique values." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Helper function to find unique values with support for python objects.\n\nUses pure python method for object dtype, and numpy method for\nall other dtypes.", + "docstring": "Helper function to find unique values with support for python objects.\n\nUses pure python method for object dtype, and numpy method for\nall other dtypes.\n\nParameters\n----------\nvalues : ndarray\n Values to check for unknowns.\n\nreturn_inverse : bool, default=False\n If True, also return the indices of the unique values.\n\nReturns\n-------\nunique : ndarray\n The sorted unique values.\n\nunique_inverse : ndarray\n The indices to reconstruct the original array from the unique array.\n Only provided if `return_inverse` is True.", + "code": "def _unique(values, *, return_inverse=False):\n \"\"\"Helper function to find unique values with support for python objects.\n\n Uses pure python method for object dtype, and numpy method for\n all other dtypes.\n\n Parameters\n ----------\n values : ndarray\n Values to check for unknowns.\n\n return_inverse : bool, default=False\n If True, also return the indices of the unique values.\n\n Returns\n -------\n unique : ndarray\n The sorted unique values.\n\n unique_inverse : ndarray\n The indices to reconstruct the original array from the unique array.\n Only provided if `return_inverse` is True.\n \"\"\"\n if values.dtype == object:\n return _unique_python(values, return_inverse=return_inverse)\n # numerical\n out = np.unique(values, return_inverse=return_inverse)\n\n if return_inverse:\n uniques, inverse = out\n else:\n uniques = out\n\n # np.unique will have duplicate missing values at the end of `uniques`\n # here we clip the nans and remove it from uniques\n if uniques.size and is_scalar_nan(uniques[-1]):\n nan_idx = np.searchsorted(uniques, np.nan)\n uniques = uniques[:nan_idx + 1]\n if return_inverse:\n inverse[inverse > nan_idx] = nan_idx\n\n if return_inverse:\n return uniques, inverse\n return uniques" + }, + { + "id": "scikit-learn/sklearn.utils._encode/_unique_python", + "name": "_unique_python", + "qname": "sklearn.utils._encode._unique_python", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._encode/_unique_python/values", + "name": "values", + "qname": "sklearn.utils._encode._unique_python.values", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._encode/_unique_python/return_inverse", + "name": "return_inverse", + "qname": "sklearn.utils._encode._unique_python.return_inverse", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _unique_python(values, *, return_inverse):\n # Only used in `_uniques`, see docstring there for details\n try:\n uniques_set = set(values)\n uniques_set, missing_values = _extract_missing(uniques_set)\n\n uniques = sorted(uniques_set)\n uniques.extend(missing_values.to_list())\n uniques = np.array(uniques, dtype=values.dtype)\n except TypeError:\n types = sorted(t.__qualname__\n for t in set(type(v) for v in values))\n raise TypeError(\"Encoders require their input to be uniformly \"\n f\"strings or numbers. Got {types}\")\n\n if return_inverse:\n return uniques, _map_to_integer(values, uniques)\n\n return uniques" + }, + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_VisualBlock/__init__", + "name": "__init__", + "qname": "sklearn.utils._estimator_html_repr._VisualBlock.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_VisualBlock/__init__/self", + "name": "self", + "qname": "sklearn.utils._estimator_html_repr._VisualBlock.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_VisualBlock/__init__/kind", + "name": "kind", + "qname": "sklearn.utils._estimator_html_repr._VisualBlock.__init__.kind", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{'serial', 'parallel', 'single'}", + "default_value": "", + "description": "kind of HTML block" + }, + "type": { + "kind": "EnumType", + "values": ["single", "serial", "parallel"] + } + }, + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_VisualBlock/__init__/estimators", + "name": "estimators", + "qname": "sklearn.utils._estimator_html_repr._VisualBlock.__init__.estimators", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list of estimators or `_VisualBlock`s or a single estimator", + "default_value": "", + "description": "If kind != 'single', then `estimators` is a list of\nestimators.\nIf kind == 'single', then `estimators` is a single estimator." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "list of estimators" + }, + { + "kind": "NamedType", + "name": "`_VisualBlock`s" + }, + { + "kind": "NamedType", + "name": "a single estimator" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_VisualBlock/__init__/names", + "name": "names", + "qname": "sklearn.utils._estimator_html_repr._VisualBlock.__init__.names", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "list of str", + "default_value": "None", + "description": "If kind != 'single', then `names` corresponds to estimators.\nIf kind == 'single', then `names` is a single string corresponding to\nthe single estimator." + }, + "type": { + "kind": "NamedType", + "name": "list of str" + } + }, + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_VisualBlock/__init__/name_details", + "name": "name_details", + "qname": "sklearn.utils._estimator_html_repr._VisualBlock.__init__.name_details", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "list of str, str, or None", + "default_value": "None", + "description": "If kind != 'single', then `name_details` corresponds to `names`.\nIf kind == 'single', then `name_details` is a single string\ncorresponding to the single estimator." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "list of str" + }, + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_VisualBlock/__init__/dash_wrapped", + "name": "dash_wrapped", + "qname": "sklearn.utils._estimator_html_repr._VisualBlock.__init__.dash_wrapped", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If true, wrapped HTML element will be wrapped with a dashed border.\nOnly active when kind != 'single'." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "HTML Representation of Estimator", + "docstring": "", + "code": " def __init__(self, kind, estimators, *, names=None, name_details=None,\n dash_wrapped=True):\n self.kind = kind\n self.estimators = estimators\n self.dash_wrapped = dash_wrapped\n\n if self.kind in ('parallel', 'serial'):\n if names is None:\n names = (None, ) * len(estimators)\n if name_details is None:\n name_details = (None, ) * len(estimators)\n\n self.names = names\n self.name_details = name_details" + }, + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_VisualBlock/_sk_visual_block_", + "name": "_sk_visual_block_", + "qname": "sklearn.utils._estimator_html_repr._VisualBlock._sk_visual_block_", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_VisualBlock/_sk_visual_block_/self", + "name": "self", + "qname": "sklearn.utils._estimator_html_repr._VisualBlock._sk_visual_block_.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _sk_visual_block_(self):\n return self" + }, + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_get_visual_block", + "name": "_get_visual_block", + "qname": "sklearn.utils._estimator_html_repr._get_visual_block", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_get_visual_block/estimator", + "name": "estimator", + "qname": "sklearn.utils._estimator_html_repr._get_visual_block.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate information about how to display an estimator.", + "docstring": "Generate information about how to display an estimator.\n ", + "code": "def _get_visual_block(estimator):\n \"\"\"Generate information about how to display an estimator.\n \"\"\"\n with suppress(AttributeError):\n return estimator._sk_visual_block_()\n\n if isinstance(estimator, str):\n return _VisualBlock('single', estimator,\n names=estimator, name_details=estimator)\n elif estimator is None:\n return _VisualBlock('single', estimator,\n names='None', name_details='None')\n\n # check if estimator looks like a meta estimator wraps estimators\n if hasattr(estimator, 'get_params'):\n estimators = []\n for key, value in estimator.get_params().items():\n # Only look at the estimators in the first layer\n if '__' not in key and hasattr(value, 'get_params'):\n estimators.append(value)\n if len(estimators):\n return _VisualBlock('parallel', estimators, names=None)\n\n return _VisualBlock('single', estimator,\n names=estimator.__class__.__name__,\n name_details=str(estimator))" + }, + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_write_estimator_html", + "name": "_write_estimator_html", + "qname": "sklearn.utils._estimator_html_repr._write_estimator_html", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_write_estimator_html/out", + "name": "out", + "qname": "sklearn.utils._estimator_html_repr._write_estimator_html.out", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_write_estimator_html/estimator", + "name": "estimator", + "qname": "sklearn.utils._estimator_html_repr._write_estimator_html.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_write_estimator_html/estimator_label", + "name": "estimator_label", + "qname": "sklearn.utils._estimator_html_repr._write_estimator_html.estimator_label", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_write_estimator_html/estimator_label_details", + "name": "estimator_label_details", + "qname": "sklearn.utils._estimator_html_repr._write_estimator_html.estimator_label_details", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_write_estimator_html/first_call", + "name": "first_call", + "qname": "sklearn.utils._estimator_html_repr._write_estimator_html.first_call", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Write estimator to html in serial, parallel, or by itself (single).", + "docstring": "Write estimator to html in serial, parallel, or by itself (single).\n ", + "code": "def _write_estimator_html(out, estimator, estimator_label,\n estimator_label_details, first_call=False):\n \"\"\"Write estimator to html in serial, parallel, or by itself (single).\n \"\"\"\n if first_call:\n est_block = _get_visual_block(estimator)\n else:\n with config_context(print_changed_only=True):\n est_block = _get_visual_block(estimator)\n\n if est_block.kind in ('serial', 'parallel'):\n dashed_wrapped = first_call or est_block.dash_wrapped\n dash_cls = \" sk-dashed-wrapped\" if dashed_wrapped else \"\"\n out.write(f'
')\n\n if estimator_label:\n _write_label_html(out, estimator_label, estimator_label_details)\n\n kind = est_block.kind\n out.write(f'
')\n est_infos = zip(est_block.estimators, est_block.names,\n est_block.name_details)\n\n for est, name, name_details in est_infos:\n if kind == 'serial':\n _write_estimator_html(out, est, name, name_details)\n else: # parallel\n out.write('
')\n # wrap element in a serial visualblock\n serial_block = _VisualBlock('serial', [est],\n dash_wrapped=False)\n _write_estimator_html(out, serial_block, name, name_details)\n out.write('
') # sk-parallel-item\n\n out.write('
')\n elif est_block.kind == 'single':\n _write_label_html(out, est_block.names, est_block.name_details,\n outer_class=\"sk-item\", inner_class=\"sk-estimator\",\n checked=first_call)" + }, + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_write_label_html", + "name": "_write_label_html", + "qname": "sklearn.utils._estimator_html_repr._write_label_html", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_write_label_html/out", + "name": "out", + "qname": "sklearn.utils._estimator_html_repr._write_label_html.out", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_write_label_html/name", + "name": "name", + "qname": "sklearn.utils._estimator_html_repr._write_label_html.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_write_label_html/name_details", + "name": "name_details", + "qname": "sklearn.utils._estimator_html_repr._write_label_html.name_details", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_write_label_html/outer_class", + "name": "outer_class", + "qname": "sklearn.utils._estimator_html_repr._write_label_html.outer_class", + "default_value": "'sk-label-container'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_write_label_html/inner_class", + "name": "inner_class", + "qname": "sklearn.utils._estimator_html_repr._write_label_html.inner_class", + "default_value": "'sk-label'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/_write_label_html/checked", + "name": "checked", + "qname": "sklearn.utils._estimator_html_repr._write_label_html.checked", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Write labeled html with or without a dropdown with named details", + "docstring": "Write labeled html with or without a dropdown with named details", + "code": "def _write_label_html(out, name, name_details,\n outer_class=\"sk-label-container\",\n inner_class=\"sk-label\",\n checked=False):\n \"\"\"Write labeled html with or without a dropdown with named details\"\"\"\n out.write(f'
'\n f'
')\n name = html.escape(name)\n\n if name_details is not None:\n checked_str = 'checked' if checked else ''\n est_id = uuid.uuid4()\n out.write(f''\n f''\n f'
{name_details}'\n                  f'
')\n else:\n out.write(f'')\n out.write('
') # outer_class inner_class" + }, + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/estimator_html_repr", + "name": "estimator_html_repr", + "qname": "sklearn.utils._estimator_html_repr.estimator_html_repr", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._estimator_html_repr/estimator_html_repr/estimator", + "name": "estimator", + "qname": "sklearn.utils._estimator_html_repr.estimator_html_repr.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator object", + "default_value": "", + "description": "The estimator to visualize." + }, + "type": { + "kind": "NamedType", + "name": "estimator object" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Build a HTML representation of an estimator.\n\nRead more in the :ref:`User Guide `.", + "docstring": "Build a HTML representation of an estimator.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object\n The estimator to visualize.\n\nReturns\n-------\nhtml: str\n HTML representation of estimator.", + "code": "def estimator_html_repr(estimator):\n \"\"\"Build a HTML representation of an estimator.\n\n Read more in the :ref:`User Guide `.\n\n Parameters\n ----------\n estimator : estimator object\n The estimator to visualize.\n\n Returns\n -------\n html: str\n HTML representation of estimator.\n \"\"\"\n with closing(StringIO()) as out:\n container_id = \"sk-\" + str(uuid.uuid4())\n style_template = Template(_STYLE)\n style_with_id = style_template.substitute(id=container_id)\n out.write(f''\n f'
'\n '
')\n _write_estimator_html(out, estimator, estimator.__class__.__name__,\n str(estimator), first_call=True)\n out.write('
')\n\n html_output = out.getvalue()\n return html_output" + }, + { + "id": "scikit-learn/sklearn.utils._mask/_get_dense_mask", + "name": "_get_dense_mask", + "qname": "sklearn.utils._mask._get_dense_mask", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._mask/_get_dense_mask/X", + "name": "X", + "qname": "sklearn.utils._mask._get_dense_mask.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._mask/_get_dense_mask/value_to_mask", + "name": "value_to_mask", + "qname": "sklearn.utils._mask._get_dense_mask.value_to_mask", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _get_dense_mask(X, value_to_mask):\n if is_scalar_nan(value_to_mask):\n if X.dtype.kind == \"f\":\n Xt = np.isnan(X)\n elif X.dtype.kind in (\"i\", \"u\"):\n # can't have NaNs in integer array.\n Xt = np.zeros(X.shape, dtype=bool)\n else:\n # np.isnan does not work on object dtypes.\n Xt = _object_dtype_isnan(X)\n else:\n Xt = X == value_to_mask\n\n return Xt" + }, + { + "id": "scikit-learn/sklearn.utils._mask/_get_mask", + "name": "_get_mask", + "qname": "sklearn.utils._mask._get_mask", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._mask/_get_mask/X", + "name": "X", + "qname": "sklearn.utils._mask._get_mask.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{ndarray, sparse matrix} of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data, where ``n_samples`` is the number of samples and\n``n_features`` is the number of features." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils._mask/_get_mask/value_to_mask", + "name": "value_to_mask", + "qname": "sklearn.utils._mask._get_mask.value_to_mask", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{int, float}", + "default_value": "", + "description": "The value which is to be masked in X." + }, + "type": { + "kind": "EnumType", + "values": [] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the boolean mask X == value_to_mask.", + "docstring": "Compute the boolean mask X == value_to_mask.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Input data, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features.\n\nvalue_to_mask : {int, float}\n The value which is to be masked in X.\n\nReturns\n-------\nX_mask : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Missing mask.", + "code": "def _get_mask(X, value_to_mask):\n \"\"\"Compute the boolean mask X == value_to_mask.\n\n Parameters\n ----------\n X : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Input data, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features.\n\n value_to_mask : {int, float}\n The value which is to be masked in X.\n\n Returns\n -------\n X_mask : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Missing mask.\n \"\"\"\n if not sp.issparse(X):\n # For all cases apart of a sparse input where we need to reconstruct\n # a sparse output\n return _get_dense_mask(X, value_to_mask)\n\n Xt = _get_dense_mask(X.data, value_to_mask)\n\n sparse_constructor = (sp.csr_matrix if X.format == 'csr'\n else sp.csc_matrix)\n Xt_sparse = sparse_constructor(\n (Xt, X.indices.copy(), X.indptr.copy()), shape=X.shape, dtype=bool\n )\n\n return Xt_sparse" + }, + { + "id": "scikit-learn/sklearn.utils._mocking/ArraySlicingWrapper/__getitem__", + "name": "__getitem__", + "qname": "sklearn.utils._mocking.ArraySlicingWrapper.__getitem__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._mocking/ArraySlicingWrapper/__getitem__/self", + "name": "self", + "qname": "sklearn.utils._mocking.ArraySlicingWrapper.__getitem__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._mocking/ArraySlicingWrapper/__getitem__/aslice", + "name": "aslice", + "qname": "sklearn.utils._mocking.ArraySlicingWrapper.__getitem__.aslice", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __getitem__(self, aslice):\n return MockDataFrame(self.array[aslice])" + }, + { + "id": "scikit-learn/sklearn.utils._mocking/ArraySlicingWrapper/__init__", + "name": "__init__", + "qname": "sklearn.utils._mocking.ArraySlicingWrapper.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._mocking/ArraySlicingWrapper/__init__/self", + "name": "self", + "qname": "sklearn.utils._mocking.ArraySlicingWrapper.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._mocking/ArraySlicingWrapper/__init__/array", + "name": "array", + "qname": "sklearn.utils._mocking.ArraySlicingWrapper.__init__.array", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __init__(self, array):\n self.array = array" + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/__init__", + "name": "__init__", + "qname": "sklearn.utils._mocking.CheckingClassifier.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/__init__/self", + "name": "self", + "qname": "sklearn.utils._mocking.CheckingClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/__init__/check_y", + "name": "check_y", + "qname": "sklearn.utils._mocking.CheckingClassifier.__init__.check_y", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "None", + "description": "The callable used to validate `X` and `y`. These callable should return\na bool where `False` will trigger an `AssertionError`." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/__init__/check_y_params", + "name": "check_y_params", + "qname": "sklearn.utils._mocking.CheckingClassifier.__init__.check_y_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "The optional parameters to pass to `check_X` and `check_y`." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/__init__/check_X", + "name": "check_X", + "qname": "sklearn.utils._mocking.CheckingClassifier.__init__.check_X", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "None", + "description": "The callable used to validate `X` and `y`. These callable should return\na bool where `False` will trigger an `AssertionError`." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/__init__/check_X_params", + "name": "check_X_params", + "qname": "sklearn.utils._mocking.CheckingClassifier.__init__.check_X_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "None", + "description": "The optional parameters to pass to `check_X` and `check_y`." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/__init__/methods_to_check", + "name": "methods_to_check", + "qname": "sklearn.utils._mocking.CheckingClassifier.__init__.methods_to_check", + "default_value": "'all'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "\"all\" or list of str", + "default_value": "\"all\"", + "description": "The methods in which the checks should be applied. By default,\nall checks will be done on all methods (`fit`, `predict`,\n`predict_proba`, `decision_function` and `score`)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "\"all\"" + }, + { + "kind": "NamedType", + "name": "list of str" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/__init__/foo_param", + "name": "foo_param", + "qname": "sklearn.utils._mocking.CheckingClassifier.__init__.foo_param", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "A `foo` param. When `foo > 1`, the output of :meth:`score` will be 1\notherwise it is 0." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/__init__/expected_fit_params", + "name": "expected_fit_params", + "qname": "sklearn.utils._mocking.CheckingClassifier.__init__.expected_fit_params", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "list of str", + "default_value": "None", + "description": "A list of the expected parameters given when calling `fit`." + }, + "type": { + "kind": "NamedType", + "name": "list of str" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Dummy classifier to test pipelining and meta-estimators.\n\nChecks some property of `X` and `y`in fit / predict.\nThis allows testing whether pipelines / cross-validation or metaestimators\nchanged the input.\n\nCan also be used to check if `fit_params` are passed correctly, and\nto force a certain score to be returned.", + "docstring": "", + "code": " def __init__(self, *, check_y=None, check_y_params=None,\n check_X=None, check_X_params=None, methods_to_check=\"all\",\n foo_param=0, expected_fit_params=None):\n self.check_y = check_y\n self.check_y_params = check_y_params\n self.check_X = check_X\n self.check_X_params = check_X_params\n self.methods_to_check = methods_to_check\n self.foo_param = foo_param\n self.expected_fit_params = expected_fit_params" + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/_check_X_y", + "name": "_check_X_y", + "qname": "sklearn.utils._mocking.CheckingClassifier._check_X_y", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/_check_X_y/self", + "name": "self", + "qname": "sklearn.utils._mocking.CheckingClassifier._check_X_y.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/_check_X_y/X", + "name": "X", + "qname": "sklearn.utils._mocking.CheckingClassifier._check_X_y.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The data set." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/_check_X_y/y", + "name": "y", + "qname": "sklearn.utils._mocking.CheckingClassifier._check_X_y.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples)", + "default_value": "None", + "description": "The corresponding target, by default None." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples)" + } + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/_check_X_y/should_be_fitted", + "name": "should_be_fitted", + "qname": "sklearn.utils._mocking.CheckingClassifier._check_X_y.should_be_fitted", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether or not the classifier should be already fitted.\nBy default True." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Validate X and y and make extra check.", + "docstring": "Validate X and y and make extra check.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data set.\ny : array-like of shape (n_samples), default=None\n The corresponding target, by default None.\nshould_be_fitted : bool, default=True\n Whether or not the classifier should be already fitted.\n By default True.\n\nReturns\n-------\nX, y", + "code": " def _check_X_y(self, X, y=None, should_be_fitted=True):\n \"\"\"Validate X and y and make extra check.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The data set.\n y : array-like of shape (n_samples), default=None\n The corresponding target, by default None.\n should_be_fitted : bool, default=True\n Whether or not the classifier should be already fitted.\n By default True.\n\n Returns\n -------\n X, y\n \"\"\"\n if should_be_fitted:\n check_is_fitted(self)\n if self.check_X is not None:\n params = {} if self.check_X_params is None else self.check_X_params\n checked_X = self.check_X(X, **params)\n if isinstance(checked_X, (bool, np.bool_)):\n assert checked_X\n else:\n X = checked_X\n if y is not None and self.check_y is not None:\n params = {} if self.check_y_params is None else self.check_y_params\n checked_y = self.check_y(y, **params)\n if isinstance(checked_y, (bool, np.bool_)):\n assert checked_y\n else:\n y = checked_y\n return X, y" + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/_more_tags", + "name": "_more_tags", + "qname": "sklearn.utils._mocking.CheckingClassifier._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/_more_tags/self", + "name": "self", + "qname": "sklearn.utils._mocking.CheckingClassifier._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'_skip_test': True, 'X_types': ['1dlabel']}" + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/decision_function", + "name": "decision_function", + "qname": "sklearn.utils._mocking.CheckingClassifier.decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/decision_function/self", + "name": "self", + "qname": "sklearn.utils._mocking.CheckingClassifier.decision_function.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/decision_function/X", + "name": "X", + "qname": "sklearn.utils._mocking.CheckingClassifier.decision_function.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The input data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Confidence score.", + "docstring": "Confidence score.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\ndecision : ndarray of shape (n_samples,) if n_classes == 2 else (n_samples, n_classes)\n Confidence score.", + "code": " def decision_function(self, X):\n \"\"\"Confidence score.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input data.\n\n Returns\n -------\n decision : ndarray of shape (n_samples,) if n_classes == 2\\\n else (n_samples, n_classes)\n Confidence score.\n \"\"\"\n if (self.methods_to_check == \"all\" or\n \"decision_function\" in self.methods_to_check):\n X, y = self._check_X_y(X)\n if len(self.classes_) == 2:\n # for binary classifier, the confidence score is related to\n # classes_[1] and therefore should be null.\n return np.zeros(_num_samples(X))\n else:\n decision = np.zeros((_num_samples(X), len(self.classes_)))\n decision[:, 0] = 1\n return decision" + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/fit", + "name": "fit", + "qname": "sklearn.utils._mocking.CheckingClassifier.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/fit/self", + "name": "self", + "qname": "sklearn.utils._mocking.CheckingClassifier.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/fit/X", + "name": "X", + "qname": "sklearn.utils._mocking.CheckingClassifier.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Training vector, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/fit/y", + "name": "y", + "qname": "sklearn.utils._mocking.CheckingClassifier.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_outputs) or (n_samples,)", + "default_value": "None", + "description": "Target relative to X for classification or regression;\nNone for unsupervised learning." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_outputs) or (n_samples,)" + } + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/fit/fit_params", + "name": "fit_params", + "qname": "sklearn.utils._mocking.CheckingClassifier.fit.fit_params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "dict of string -> object", + "default_value": "", + "description": "Parameters passed to the ``fit`` method of the estimator" + }, + "type": { + "kind": "NamedType", + "name": "dict of string -> object" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fit classifier.", + "docstring": "Fit classifier.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples, n_outputs) or (n_samples,), default=None\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\n**fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of the estimator\n\nReturns\n-------\nself", + "code": " def fit(self, X, y, **fit_params):\n \"\"\"Fit classifier.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\n y : array-like of shape (n_samples, n_outputs) or (n_samples,), \\\n default=None\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\n **fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of the estimator\n\n Returns\n -------\n self\n \"\"\"\n assert _num_samples(X) == _num_samples(y)\n if self.methods_to_check == \"all\" or \"fit\" in self.methods_to_check:\n X, y = self._check_X_y(X, y, should_be_fitted=False)\n self.n_features_in_ = np.shape(X)[1]\n self.classes_ = np.unique(\n check_array(y, ensure_2d=False, allow_nd=True)\n )\n if self.expected_fit_params:\n missing = set(self.expected_fit_params) - set(fit_params)\n if missing:\n raise AssertionError(\n f'Expected fit parameter(s) {list(missing)} not seen.'\n )\n for key, value in fit_params.items():\n if _num_samples(value) != _num_samples(X):\n raise AssertionError(\n f'Fit parameter {key} has length {_num_samples(value)}'\n f'; expected {_num_samples(X)}.'\n )\n\n return self" + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/predict", + "name": "predict", + "qname": "sklearn.utils._mocking.CheckingClassifier.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/predict/self", + "name": "self", + "qname": "sklearn.utils._mocking.CheckingClassifier.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/predict/X", + "name": "X", + "qname": "sklearn.utils._mocking.CheckingClassifier.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The input data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict the first class seen in `classes_`.", + "docstring": "Predict the first class seen in `classes_`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\npreds : ndarray of shape (n_samples,)\n Predictions of the first class seens in `classes_`.", + "code": " def predict(self, X):\n \"\"\"Predict the first class seen in `classes_`.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input data.\n\n Returns\n -------\n preds : ndarray of shape (n_samples,)\n Predictions of the first class seens in `classes_`.\n \"\"\"\n if (self.methods_to_check == \"all\" or\n \"predict\" in self.methods_to_check):\n X, y = self._check_X_y(X)\n return self.classes_[np.zeros(_num_samples(X), dtype=int)]" + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/predict_proba", + "name": "predict_proba", + "qname": "sklearn.utils._mocking.CheckingClassifier.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/predict_proba/self", + "name": "self", + "qname": "sklearn.utils._mocking.CheckingClassifier.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/predict_proba/X", + "name": "X", + "qname": "sklearn.utils._mocking.CheckingClassifier.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "The input data." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Predict probabilities for each class.\n\nHere, the dummy classifier will provide a probability of 1 for the\nfirst class of `classes_` and 0 otherwise.", + "docstring": "Predict probabilities for each class.\n\nHere, the dummy classifier will provide a probability of 1 for the\nfirst class of `classes_` and 0 otherwise.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\nproba : ndarray of shape (n_samples, n_classes)\n The probabilities for each sample and class.", + "code": " def predict_proba(self, X):\n \"\"\"Predict probabilities for each class.\n\n Here, the dummy classifier will provide a probability of 1 for the\n first class of `classes_` and 0 otherwise.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n The input data.\n\n Returns\n -------\n proba : ndarray of shape (n_samples, n_classes)\n The probabilities for each sample and class.\n \"\"\"\n if (self.methods_to_check == \"all\" or\n \"predict_proba\" in self.methods_to_check):\n X, y = self._check_X_y(X)\n proba = np.zeros((_num_samples(X), len(self.classes_)))\n proba[:, 0] = 1\n return proba" + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/score", + "name": "score", + "qname": "sklearn.utils._mocking.CheckingClassifier.score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/score/self", + "name": "self", + "qname": "sklearn.utils._mocking.CheckingClassifier.score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/score/X", + "name": "X", + "qname": "sklearn.utils._mocking.CheckingClassifier.score.X", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data, where n_samples is the number of samples and\nn_features is the number of features." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.utils._mocking/CheckingClassifier/score/Y", + "name": "Y", + "qname": "sklearn.utils._mocking.CheckingClassifier.score.Y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_output) or (n_samples,)", + "default_value": "", + "description": "Target relative to X for classification or regression;\nNone for unsupervised learning." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_output) or (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Fake score.", + "docstring": "Fake score.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data, where n_samples is the number of samples and\n n_features is the number of features.\n\nY : array-like of shape (n_samples, n_output) or (n_samples,)\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\nReturns\n-------\nscore : float\n Either 0 or 1 depending of `foo_param` (i.e. `foo_param > 1 =>\n score=1` otherwise `score=0`).", + "code": " def score(self, X=None, Y=None):\n \"\"\"Fake score.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Input data, where n_samples is the number of samples and\n n_features is the number of features.\n\n Y : array-like of shape (n_samples, n_output) or (n_samples,)\n Target relative to X for classification or regression;\n None for unsupervised learning.\n\n Returns\n -------\n score : float\n Either 0 or 1 depending of `foo_param` (i.e. `foo_param > 1 =>\n score=1` otherwise `score=0`).\n \"\"\"\n if self.methods_to_check == \"all\" or \"score\" in self.methods_to_check:\n self._check_X_y(X, Y)\n if self.foo_param > 1:\n score = 1.\n else:\n score = 0.\n return score" + }, + { + "id": "scikit-learn/sklearn.utils._mocking/MockDataFrame/__array__", + "name": "__array__", + "qname": "sklearn.utils._mocking.MockDataFrame.__array__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._mocking/MockDataFrame/__array__/self", + "name": "self", + "qname": "sklearn.utils._mocking.MockDataFrame.__array__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._mocking/MockDataFrame/__array__/dtype", + "name": "dtype", + "qname": "sklearn.utils._mocking.MockDataFrame.__array__.dtype", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __array__(self, dtype=None):\n # Pandas data frames also are array-like: we want to make sure that\n # input validation in cross-validation does not try to call that\n # method.\n return self.array" + }, + { + "id": "scikit-learn/sklearn.utils._mocking/MockDataFrame/__eq__", + "name": "__eq__", + "qname": "sklearn.utils._mocking.MockDataFrame.__eq__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._mocking/MockDataFrame/__eq__/self", + "name": "self", + "qname": "sklearn.utils._mocking.MockDataFrame.__eq__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._mocking/MockDataFrame/__eq__/other", + "name": "other", + "qname": "sklearn.utils._mocking.MockDataFrame.__eq__.other", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __eq__(self, other):\n return MockDataFrame(self.array == other.array)" + }, + { + "id": "scikit-learn/sklearn.utils._mocking/MockDataFrame/__init__", + "name": "__init__", + "qname": "sklearn.utils._mocking.MockDataFrame.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._mocking/MockDataFrame/__init__/self", + "name": "self", + "qname": "sklearn.utils._mocking.MockDataFrame.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._mocking/MockDataFrame/__init__/array", + "name": "array", + "qname": "sklearn.utils._mocking.MockDataFrame.__init__.array", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __init__(self, array):\n self.array = array\n self.values = array\n self.shape = array.shape\n self.ndim = array.ndim\n # ugly hack to make iloc work.\n self.iloc = ArraySlicingWrapper(array)" + }, + { + "id": "scikit-learn/sklearn.utils._mocking/MockDataFrame/__len__", + "name": "__len__", + "qname": "sklearn.utils._mocking.MockDataFrame.__len__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._mocking/MockDataFrame/__len__/self", + "name": "self", + "qname": "sklearn.utils._mocking.MockDataFrame.__len__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __len__(self):\n return len(self.array)" + }, + { + "id": "scikit-learn/sklearn.utils._mocking/MockDataFrame/__ne__", + "name": "__ne__", + "qname": "sklearn.utils._mocking.MockDataFrame.__ne__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._mocking/MockDataFrame/__ne__/self", + "name": "self", + "qname": "sklearn.utils._mocking.MockDataFrame.__ne__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._mocking/MockDataFrame/__ne__/other", + "name": "other", + "qname": "sklearn.utils._mocking.MockDataFrame.__ne__.other", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __ne__(self, other):\n return not self == other" + }, + { + "id": "scikit-learn/sklearn.utils._mocking/NoSampleWeightWrapper/__init__", + "name": "__init__", + "qname": "sklearn.utils._mocking.NoSampleWeightWrapper.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._mocking/NoSampleWeightWrapper/__init__/self", + "name": "self", + "qname": "sklearn.utils._mocking.NoSampleWeightWrapper.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._mocking/NoSampleWeightWrapper/__init__/est", + "name": "est", + "qname": "sklearn.utils._mocking.NoSampleWeightWrapper.__init__.est", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator", + "default_value": "None", + "description": "The estimator to wrap." + }, + "type": { + "kind": "NamedType", + "name": "estimator" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Wrap estimator which will not expose `sample_weight`.", + "docstring": "", + "code": " def __init__(self, est=None):\n self.est = est" + }, + { + "id": "scikit-learn/sklearn.utils._mocking/NoSampleWeightWrapper/_more_tags", + "name": "_more_tags", + "qname": "sklearn.utils._mocking.NoSampleWeightWrapper._more_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._mocking/NoSampleWeightWrapper/_more_tags/self", + "name": "self", + "qname": "sklearn.utils._mocking.NoSampleWeightWrapper._more_tags.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _more_tags(self):\n return {'_skip_test': True}" + }, + { + "id": "scikit-learn/sklearn.utils._mocking/NoSampleWeightWrapper/fit", + "name": "fit", + "qname": "sklearn.utils._mocking.NoSampleWeightWrapper.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._mocking/NoSampleWeightWrapper/fit/self", + "name": "self", + "qname": "sklearn.utils._mocking.NoSampleWeightWrapper.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._mocking/NoSampleWeightWrapper/fit/X", + "name": "X", + "qname": "sklearn.utils._mocking.NoSampleWeightWrapper.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._mocking/NoSampleWeightWrapper/fit/y", + "name": "y", + "qname": "sklearn.utils._mocking.NoSampleWeightWrapper.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def fit(self, X, y):\n return self.est.fit(X, y)" + }, + { + "id": "scikit-learn/sklearn.utils._mocking/NoSampleWeightWrapper/predict", + "name": "predict", + "qname": "sklearn.utils._mocking.NoSampleWeightWrapper.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._mocking/NoSampleWeightWrapper/predict/self", + "name": "self", + "qname": "sklearn.utils._mocking.NoSampleWeightWrapper.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._mocking/NoSampleWeightWrapper/predict/X", + "name": "X", + "qname": "sklearn.utils._mocking.NoSampleWeightWrapper.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def predict(self, X):\n return self.est.predict(X)" + }, + { + "id": "scikit-learn/sklearn.utils._mocking/NoSampleWeightWrapper/predict_proba", + "name": "predict_proba", + "qname": "sklearn.utils._mocking.NoSampleWeightWrapper.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._mocking/NoSampleWeightWrapper/predict_proba/self", + "name": "self", + "qname": "sklearn.utils._mocking.NoSampleWeightWrapper.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._mocking/NoSampleWeightWrapper/predict_proba/X", + "name": "X", + "qname": "sklearn.utils._mocking.NoSampleWeightWrapper.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def predict_proba(self, X):\n return self.est.predict_proba(X)" + }, + { + "id": "scikit-learn/sklearn.utils._pprint/KeyValTuple/__repr__", + "name": "__repr__", + "qname": "sklearn.utils._pprint.KeyValTuple.__repr__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._pprint/KeyValTuple/__repr__/self", + "name": "self", + "qname": "sklearn.utils._pprint.KeyValTuple.__repr__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __repr__(self):\n # needed for _dispatch[tuple.__repr__] not to be overridden\n return super().__repr__()" + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/__init__", + "name": "__init__", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/__init__/self", + "name": "self", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/__init__/indent", + "name": "indent", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter.__init__.indent", + "default_value": "1", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/__init__/width", + "name": "width", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter.__init__.width", + "default_value": "80", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/__init__/depth", + "name": "depth", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter.__init__.depth", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/__init__/stream", + "name": "stream", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter.__init__.stream", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/__init__/compact", + "name": "compact", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter.__init__.compact", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/__init__/indent_at_name", + "name": "indent_at_name", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter.__init__.indent_at_name", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/__init__/n_max_elements_to_show", + "name": "n_max_elements_to_show", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter.__init__.n_max_elements_to_show", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Pretty Printer class for estimator objects.\n\nThis extends the pprint.PrettyPrinter class, because:\n- we need estimators to be printed with their parameters, e.g.\n Estimator(param1=value1, ...) which is not supported by default.\n- the 'compact' parameter of PrettyPrinter is ignored for dicts, which\n may lead to very long representations that we want to avoid.\n\nQuick overview of pprint.PrettyPrinter (see also\nhttps://stackoverflow.com/questions/49565047/pprint-with-hex-numbers):\n\n- the entry point is the _format() method which calls format() (overridden\n here)\n- format() directly calls _safe_repr() for a first try at rendering the\n object\n- _safe_repr formats the whole object reccursively, only calling itself,\n not caring about line length or anything\n- back to _format(), if the output string is too long, _format() then calls\n the appropriate _pprint_TYPE() method (e.g. _pprint_list()) depending on\n the type of the object. This where the line length and the compact\n parameters are taken into account.\n- those _pprint_TYPE() methods will internally use the format() method for\n rendering the nested objects of an object (e.g. the elements of a list)\n\nIn the end, everything has to be implemented twice: in _safe_repr and in\nthe custom _pprint_TYPE methods. Unfortunately PrettyPrinter is really not\nstraightforward to extend (especially when we want a compact output), so\nthe code is a bit convoluted.\n\nThis class overrides:\n- format() to support the changed_only parameter\n- _safe_repr to support printing of estimators (for when they fit on a\n single line)\n- _format_dict_items so that dict are correctly 'compacted'\n- _format_items so that ellipsis is used on long lists and tuples\n\nWhen estimators cannot be printed on a single line, the builtin _format()\nwill call _pprint_estimator() because it was registered to do so (see\n_dispatch[BaseEstimator.__repr__] = _pprint_estimator).\n\nboth _format_dict_items() and _pprint_estimator() use the\n_format_params_or_dict_items() method that will format parameters and\nkey-value pairs respecting the compact parameter. This method needs another\nsubroutine _pprint_key_val_tuple() used when a parameter or a key-value\npair is too long to fit on a single line. This subroutine is called in\n_format() and is registered as well in the _dispatch dict (just like\n_pprint_estimator). We had to create the two classes KeyValTuple and\nKeyValTupleParam for this.", + "docstring": "", + "code": " def __init__(self, indent=1, width=80, depth=None, stream=None, *,\n compact=False, indent_at_name=True,\n n_max_elements_to_show=None):\n super().__init__(indent, width, depth, stream, compact=compact)\n self._indent_at_name = indent_at_name\n if self._indent_at_name:\n self._indent_per_level = 1 # ignore indent param\n self._changed_only = get_config()['print_changed_only']\n # Max number of elements in a list, dict, tuple until we start using\n # ellipsis. This also affects the number of arguments of an estimators\n # (they are treated as dicts)\n self.n_max_elements_to_show = n_max_elements_to_show" + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_dict_items", + "name": "_format_dict_items", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_dict_items", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_dict_items/self", + "name": "self", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_dict_items.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_dict_items/items", + "name": "items", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_dict_items.items", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_dict_items/stream", + "name": "stream", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_dict_items.stream", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_dict_items/indent", + "name": "indent", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_dict_items.indent", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_dict_items/allowance", + "name": "allowance", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_dict_items.allowance", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_dict_items/context", + "name": "context", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_dict_items.context", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_dict_items/level", + "name": "level", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_dict_items.level", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _format_dict_items(self, items, stream, indent, allowance, context,\n level):\n return self._format_params_or_dict_items(\n items, stream, indent, allowance, context, level, is_dict=True)" + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_items", + "name": "_format_items", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_items", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_items/self", + "name": "self", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_items.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_items/items", + "name": "items", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_items.items", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_items/stream", + "name": "stream", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_items.stream", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_items/indent", + "name": "indent", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_items.indent", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_items/allowance", + "name": "allowance", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_items.allowance", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_items/context", + "name": "context", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_items.context", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_items/level", + "name": "level", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_items.level", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Format the items of an iterable (list, tuple...). Same as the\nbuilt-in _format_items, with support for ellipsis if the number of\nelements is greater than self.n_max_elements_to_show.", + "docstring": "Format the items of an iterable (list, tuple...). Same as the\nbuilt-in _format_items, with support for ellipsis if the number of\nelements is greater than self.n_max_elements_to_show.", + "code": " def _format_items(self, items, stream, indent, allowance, context, level):\n \"\"\"Format the items of an iterable (list, tuple...). Same as the\n built-in _format_items, with support for ellipsis if the number of\n elements is greater than self.n_max_elements_to_show.\n \"\"\"\n write = stream.write\n indent += self._indent_per_level\n if self._indent_per_level > 1:\n write((self._indent_per_level - 1) * ' ')\n delimnl = ',\\n' + ' ' * indent\n delim = ''\n width = max_width = self._width - indent + 1\n it = iter(items)\n try:\n next_ent = next(it)\n except StopIteration:\n return\n last = False\n n_items = 0\n while not last:\n if n_items == self.n_max_elements_to_show:\n write(', ...')\n break\n n_items += 1\n ent = next_ent\n try:\n next_ent = next(it)\n except StopIteration:\n last = True\n max_width -= allowance\n width -= allowance\n if self._compact:\n rep = self._repr(ent, context, level)\n w = len(rep) + 2\n if width < w:\n width = max_width\n if delim:\n delim = delimnl\n if width >= w:\n width -= w\n write(delim)\n delim = ', '\n write(rep)\n continue\n write(delim)\n delim = delimnl\n self._format(ent, stream, indent,\n allowance if last else 1, context, level)" + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_params", + "name": "_format_params", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_params/self", + "name": "self", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_params/items", + "name": "items", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_params.items", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_params/stream", + "name": "stream", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_params.stream", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_params/indent", + "name": "indent", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_params.indent", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_params/allowance", + "name": "allowance", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_params.allowance", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_params/context", + "name": "context", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_params.context", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_params/level", + "name": "level", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_params.level", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _format_params(self, items, stream, indent, allowance, context, level):\n return self._format_params_or_dict_items(\n items, stream, indent, allowance, context, level, is_dict=False)" + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_params_or_dict_items", + "name": "_format_params_or_dict_items", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_params_or_dict_items", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_params_or_dict_items/self", + "name": "self", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_params_or_dict_items.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_params_or_dict_items/object", + "name": "object", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_params_or_dict_items.object", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_params_or_dict_items/stream", + "name": "stream", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_params_or_dict_items.stream", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_params_or_dict_items/indent", + "name": "indent", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_params_or_dict_items.indent", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_params_or_dict_items/allowance", + "name": "allowance", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_params_or_dict_items.allowance", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_params_or_dict_items/context", + "name": "context", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_params_or_dict_items.context", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_params_or_dict_items/level", + "name": "level", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_params_or_dict_items.level", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_format_params_or_dict_items/is_dict", + "name": "is_dict", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._format_params_or_dict_items.is_dict", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Format dict items or parameters respecting the compact=True\nparameter. For some reason, the builtin rendering of dict items doesn't\nrespect compact=True and will use one line per key-value if all cannot\nfit in a single line.\nDict items will be rendered as <'key': value> while params will be\nrendered as . The implementation is mostly copy/pasting from\nthe builtin _format_items().\nThis also adds ellipsis if the number of items is greater than\nself.n_max_elements_to_show.", + "docstring": "Format dict items or parameters respecting the compact=True\nparameter. For some reason, the builtin rendering of dict items doesn't\nrespect compact=True and will use one line per key-value if all cannot\nfit in a single line.\nDict items will be rendered as <'key': value> while params will be\nrendered as . The implementation is mostly copy/pasting from\nthe builtin _format_items().\nThis also adds ellipsis if the number of items is greater than\nself.n_max_elements_to_show.", + "code": " def _format_params_or_dict_items(self, object, stream, indent, allowance,\n context, level, is_dict):\n \"\"\"Format dict items or parameters respecting the compact=True\n parameter. For some reason, the builtin rendering of dict items doesn't\n respect compact=True and will use one line per key-value if all cannot\n fit in a single line.\n Dict items will be rendered as <'key': value> while params will be\n rendered as . The implementation is mostly copy/pasting from\n the builtin _format_items().\n This also adds ellipsis if the number of items is greater than\n self.n_max_elements_to_show.\n \"\"\"\n write = stream.write\n indent += self._indent_per_level\n delimnl = ',\\n' + ' ' * indent\n delim = ''\n width = max_width = self._width - indent + 1\n it = iter(object)\n try:\n next_ent = next(it)\n except StopIteration:\n return\n last = False\n n_items = 0\n while not last:\n if n_items == self.n_max_elements_to_show:\n write(', ...')\n break\n n_items += 1\n ent = next_ent\n try:\n next_ent = next(it)\n except StopIteration:\n last = True\n max_width -= allowance\n width -= allowance\n if self._compact:\n k, v = ent\n krepr = self._repr(k, context, level)\n vrepr = self._repr(v, context, level)\n if not is_dict:\n krepr = krepr.strip(\"'\")\n middle = ': ' if is_dict else '='\n rep = krepr + middle + vrepr\n w = len(rep) + 2\n if width < w:\n width = max_width\n if delim:\n delim = delimnl\n if width >= w:\n width -= w\n write(delim)\n delim = ', '\n write(rep)\n continue\n write(delim)\n delim = delimnl\n class_ = KeyValTuple if is_dict else KeyValTupleParam\n self._format(class_(ent), stream, indent,\n allowance if last else 1, context, level)" + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_pprint_estimator", + "name": "_pprint_estimator", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._pprint_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_pprint_estimator/self", + "name": "self", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._pprint_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_pprint_estimator/object", + "name": "object", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._pprint_estimator.object", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_pprint_estimator/stream", + "name": "stream", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._pprint_estimator.stream", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_pprint_estimator/indent", + "name": "indent", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._pprint_estimator.indent", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_pprint_estimator/allowance", + "name": "allowance", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._pprint_estimator.allowance", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_pprint_estimator/context", + "name": "context", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._pprint_estimator.context", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_pprint_estimator/level", + "name": "level", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._pprint_estimator.level", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _pprint_estimator(self, object, stream, indent, allowance, context,\n level):\n stream.write(object.__class__.__name__ + '(')\n if self._indent_at_name:\n indent += len(object.__class__.__name__)\n\n if self._changed_only:\n params = _changed_params(object)\n else:\n params = object.get_params(deep=False)\n\n params = OrderedDict((name, val)\n for (name, val) in sorted(params.items()))\n\n self._format_params(params.items(), stream, indent, allowance + 1,\n context, level)\n stream.write(')')" + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_pprint_key_val_tuple", + "name": "_pprint_key_val_tuple", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._pprint_key_val_tuple", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_pprint_key_val_tuple/self", + "name": "self", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._pprint_key_val_tuple.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_pprint_key_val_tuple/object", + "name": "object", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._pprint_key_val_tuple.object", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_pprint_key_val_tuple/stream", + "name": "stream", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._pprint_key_val_tuple.stream", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_pprint_key_val_tuple/indent", + "name": "indent", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._pprint_key_val_tuple.indent", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_pprint_key_val_tuple/allowance", + "name": "allowance", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._pprint_key_val_tuple.allowance", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_pprint_key_val_tuple/context", + "name": "context", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._pprint_key_val_tuple.context", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/_pprint_key_val_tuple/level", + "name": "level", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter._pprint_key_val_tuple.level", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Pretty printing for key-value tuples from dict or parameters.", + "docstring": "Pretty printing for key-value tuples from dict or parameters.", + "code": " def _pprint_key_val_tuple(self, object, stream, indent, allowance, context,\n level):\n \"\"\"Pretty printing for key-value tuples from dict or parameters.\"\"\"\n k, v = object\n rep = self._repr(k, context, level)\n if isinstance(object, KeyValTupleParam):\n rep = rep.strip(\"'\")\n middle = '='\n else:\n middle = ': '\n stream.write(rep)\n stream.write(middle)\n self._format(v, stream, indent + len(rep) + len(middle), allowance,\n context, level)" + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/format", + "name": "format", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter.format", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/format/self", + "name": "self", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter.format.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/format/object", + "name": "object", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter.format.object", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/format/context", + "name": "context", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter.format.context", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/format/maxlevels", + "name": "maxlevels", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter.format.maxlevels", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_EstimatorPrettyPrinter/format/level", + "name": "level", + "qname": "sklearn.utils._pprint._EstimatorPrettyPrinter.format.level", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def format(self, object, context, maxlevels, level):\n return _safe_repr(object, context, maxlevels, level,\n changed_only=self._changed_only)" + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_changed_params", + "name": "_changed_params", + "qname": "sklearn.utils._pprint._changed_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._pprint/_changed_params/estimator", + "name": "estimator", + "qname": "sklearn.utils._pprint._changed_params.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return dict (param_name: value) of parameters that were given to\nestimator with non-default values.", + "docstring": "Return dict (param_name: value) of parameters that were given to\nestimator with non-default values.", + "code": "def _changed_params(estimator):\n \"\"\"Return dict (param_name: value) of parameters that were given to\n estimator with non-default values.\"\"\"\n\n params = estimator.get_params(deep=False)\n init_func = getattr(estimator.__init__, 'deprecated_original',\n estimator.__init__)\n init_params = inspect.signature(init_func).parameters\n init_params = {name: param.default for name, param in init_params.items()}\n\n def has_changed(k, v):\n if k not in init_params: # happens if k is part of a **kwargs\n return True\n if init_params[k] == inspect._empty: # k has no default value\n return True\n # try to avoid calling repr on nested estimators\n if (isinstance(v, BaseEstimator) and\n v.__class__ != init_params[k].__class__):\n return True\n # Use repr as a last resort. It may be expensive.\n if (repr(v) != repr(init_params[k]) and\n not (is_scalar_nan(init_params[k]) and is_scalar_nan(v))):\n return True\n return False\n\n return {k: v for k, v in params.items() if has_changed(k, v)}" + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_safe_repr", + "name": "_safe_repr", + "qname": "sklearn.utils._pprint._safe_repr", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._pprint/_safe_repr/object", + "name": "object", + "qname": "sklearn.utils._pprint._safe_repr.object", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_safe_repr/context", + "name": "context", + "qname": "sklearn.utils._pprint._safe_repr.context", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_safe_repr/maxlevels", + "name": "maxlevels", + "qname": "sklearn.utils._pprint._safe_repr.maxlevels", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_safe_repr/level", + "name": "level", + "qname": "sklearn.utils._pprint._safe_repr.level", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._pprint/_safe_repr/changed_only", + "name": "changed_only", + "qname": "sklearn.utils._pprint._safe_repr.changed_only", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Same as the builtin _safe_repr, with added support for Estimator\nobjects.", + "docstring": "Same as the builtin _safe_repr, with added support for Estimator\nobjects.", + "code": "def _safe_repr(object, context, maxlevels, level, changed_only=False):\n \"\"\"Same as the builtin _safe_repr, with added support for Estimator\n objects.\"\"\"\n typ = type(object)\n\n if typ in pprint._builtin_scalars:\n return repr(object), True, False\n\n r = getattr(typ, \"__repr__\", None)\n if issubclass(typ, dict) and r is dict.__repr__:\n if not object:\n return \"{}\", True, False\n objid = id(object)\n if maxlevels and level >= maxlevels:\n return \"{...}\", False, objid in context\n if objid in context:\n return pprint._recursion(object), False, True\n context[objid] = 1\n readable = True\n recursive = False\n components = []\n append = components.append\n level += 1\n saferepr = _safe_repr\n items = sorted(object.items(), key=pprint._safe_tuple)\n for k, v in items:\n krepr, kreadable, krecur = saferepr(\n k, context, maxlevels, level, changed_only=changed_only)\n vrepr, vreadable, vrecur = saferepr(\n v, context, maxlevels, level, changed_only=changed_only)\n append(\"%s: %s\" % (krepr, vrepr))\n readable = readable and kreadable and vreadable\n if krecur or vrecur:\n recursive = True\n del context[objid]\n return \"{%s}\" % \", \".join(components), readable, recursive\n\n if (issubclass(typ, list) and r is list.__repr__) or \\\n (issubclass(typ, tuple) and r is tuple.__repr__):\n if issubclass(typ, list):\n if not object:\n return \"[]\", True, False\n format = \"[%s]\"\n elif len(object) == 1:\n format = \"(%s,)\"\n else:\n if not object:\n return \"()\", True, False\n format = \"(%s)\"\n objid = id(object)\n if maxlevels and level >= maxlevels:\n return format % \"...\", False, objid in context\n if objid in context:\n return pprint._recursion(object), False, True\n context[objid] = 1\n readable = True\n recursive = False\n components = []\n append = components.append\n level += 1\n for o in object:\n orepr, oreadable, orecur = _safe_repr(\n o, context, maxlevels, level, changed_only=changed_only)\n append(orepr)\n if not oreadable:\n readable = False\n if orecur:\n recursive = True\n del context[objid]\n return format % \", \".join(components), readable, recursive\n\n if issubclass(typ, BaseEstimator):\n objid = id(object)\n if maxlevels and level >= maxlevels:\n return \"{...}\", False, objid in context\n if objid in context:\n return pprint._recursion(object), False, True\n context[objid] = 1\n readable = True\n recursive = False\n if changed_only:\n params = _changed_params(object)\n else:\n params = object.get_params(deep=False)\n components = []\n append = components.append\n level += 1\n saferepr = _safe_repr\n items = sorted(params.items(), key=pprint._safe_tuple)\n for k, v in items:\n krepr, kreadable, krecur = saferepr(\n k, context, maxlevels, level, changed_only=changed_only)\n vrepr, vreadable, vrecur = saferepr(\n v, context, maxlevels, level, changed_only=changed_only)\n append(\"%s=%s\" % (krepr.strip(\"'\"), vrepr))\n readable = readable and kreadable and vreadable\n if krecur or vrecur:\n recursive = True\n del context[objid]\n return (\"%s(%s)\" % (typ.__name__, \", \".join(components)), readable,\n recursive)\n\n rep = repr(object)\n return rep, (rep and not rep.startswith('<')), False" + }, + { + "id": "scikit-learn/sklearn.utils._show_versions/_get_deps_info", + "name": "_get_deps_info", + "qname": "sklearn.utils._show_versions._get_deps_info", + "decorators": [], + "parameters": [], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Overview of the installed version of main dependencies", + "docstring": "Overview of the installed version of main dependencies\n\nReturns\n-------\ndeps_info: dict\n version information on relevant Python libraries", + "code": "def _get_deps_info():\n \"\"\"Overview of the installed version of main dependencies\n\n Returns\n -------\n deps_info: dict\n version information on relevant Python libraries\n\n \"\"\"\n deps = [\n \"pip\",\n \"setuptools\",\n \"sklearn\",\n \"numpy\",\n \"scipy\",\n \"Cython\",\n \"pandas\",\n \"matplotlib\",\n \"joblib\",\n \"threadpoolctl\"\n ]\n\n def get_version(module):\n return module.__version__\n\n deps_info = {}\n\n for modname in deps:\n try:\n if modname in sys.modules:\n mod = sys.modules[modname]\n else:\n mod = importlib.import_module(modname)\n ver = get_version(mod)\n deps_info[modname] = ver\n except ImportError:\n deps_info[modname] = None\n\n return deps_info" + }, + { + "id": "scikit-learn/sklearn.utils._show_versions/_get_sys_info", + "name": "_get_sys_info", + "qname": "sklearn.utils._show_versions._get_sys_info", + "decorators": [], + "parameters": [], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "System information", + "docstring": "System information\n\nReturns\n-------\nsys_info : dict\n system and Python version information", + "code": "def _get_sys_info():\n \"\"\"System information\n\n Returns\n -------\n sys_info : dict\n system and Python version information\n\n \"\"\"\n python = sys.version.replace('\\n', ' ')\n\n blob = [\n (\"python\", python),\n ('executable', sys.executable),\n (\"machine\", platform.platform()),\n ]\n\n return dict(blob)" + }, + { + "id": "scikit-learn/sklearn.utils._show_versions/show_versions", + "name": "show_versions", + "qname": "sklearn.utils._show_versions.show_versions", + "decorators": [], + "parameters": [], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Print useful debugging information\"\n\n.. versionadded:: 0.20", + "docstring": "Print useful debugging information\"\n\n.. versionadded:: 0.20", + "code": "def show_versions():\n \"\"\"Print useful debugging information\"\n\n .. versionadded:: 0.20\n \"\"\"\n\n sys_info = _get_sys_info()\n deps_info = _get_deps_info()\n\n print('\\nSystem:')\n for k, stat in sys_info.items():\n print(\"{k:>10}: {stat}\".format(k=k, stat=stat))\n\n print('\\nPython dependencies:')\n for k, stat in deps_info.items():\n print(\"{k:>13}: {stat}\".format(k=k, stat=stat))\n\n print(\"\\n{k}: {stat}\".format(k=\"Built with OpenMP\",\n stat=_openmp_parallelism_enabled()))" + }, + { + "id": "scikit-learn/sklearn.utils._tags/_safe_tags", + "name": "_safe_tags", + "qname": "sklearn.utils._tags._safe_tags", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._tags/_safe_tags/estimator", + "name": "estimator", + "qname": "sklearn.utils._tags._safe_tags.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator object", + "default_value": "", + "description": "The estimator from which to get the tag." + }, + "type": { + "kind": "NamedType", + "name": "estimator object" + } + }, + { + "id": "scikit-learn/sklearn.utils._tags/_safe_tags/key", + "name": "key", + "qname": "sklearn.utils._tags._safe_tags.key", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Tag name to get. By default (`None`), all tags are returned." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Safely get estimator tags.\n\n:class:`~sklearn.BaseEstimator` provides the estimator tags machinery.\nHowever, if an estimator does not inherit from this base class, we should\nfall-back to the default tags.\n\nFor scikit-learn built-in estimators, we should still rely on\n`self._get_tags()`. `_safe_tags(est)` should be used when we are not sure\nwhere `est` comes from: typically `_safe_tags(self.base_estimator)` where\n`self` is a meta-estimator, or in the common checks.", + "docstring": "Safely get estimator tags.\n\n:class:`~sklearn.BaseEstimator` provides the estimator tags machinery.\nHowever, if an estimator does not inherit from this base class, we should\nfall-back to the default tags.\n\nFor scikit-learn built-in estimators, we should still rely on\n`self._get_tags()`. `_safe_tags(est)` should be used when we are not sure\nwhere `est` comes from: typically `_safe_tags(self.base_estimator)` where\n`self` is a meta-estimator, or in the common checks.\n\nParameters\n----------\nestimator : estimator object\n The estimator from which to get the tag.\n\nkey : str, default=None\n Tag name to get. By default (`None`), all tags are returned.\n\nReturns\n-------\ntags : dict or tag value\n The estimator tags. A single value is returned if `key` is not None.", + "code": "def _safe_tags(estimator, key=None):\n \"\"\"Safely get estimator tags.\n\n :class:`~sklearn.BaseEstimator` provides the estimator tags machinery.\n However, if an estimator does not inherit from this base class, we should\n fall-back to the default tags.\n\n For scikit-learn built-in estimators, we should still rely on\n `self._get_tags()`. `_safe_tags(est)` should be used when we are not sure\n where `est` comes from: typically `_safe_tags(self.base_estimator)` where\n `self` is a meta-estimator, or in the common checks.\n\n Parameters\n ----------\n estimator : estimator object\n The estimator from which to get the tag.\n\n key : str, default=None\n Tag name to get. By default (`None`), all tags are returned.\n\n Returns\n -------\n tags : dict or tag value\n The estimator tags. A single value is returned if `key` is not None.\n \"\"\"\n if hasattr(estimator, \"_get_tags\"):\n tags_provider = \"_get_tags()\"\n tags = estimator._get_tags()\n elif hasattr(estimator, \"_more_tags\"):\n tags_provider = \"_more_tags()\"\n tags = {**_DEFAULT_TAGS, **estimator._more_tags()}\n else:\n tags_provider = \"_DEFAULT_TAGS\"\n tags = _DEFAULT_TAGS\n\n if key is not None:\n if key not in tags:\n raise ValueError(\n f\"The key {key} is not defined in {tags_provider} for the \"\n f\"class {estimator.__class__.__name__}.\"\n )\n return tags[key]\n return tags" + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalClassifier/__init__", + "name": "__init__", + "qname": "sklearn.utils._testing.MinimalClassifier.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/MinimalClassifier/__init__/self", + "name": "self", + "qname": "sklearn.utils._testing.MinimalClassifier.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalClassifier/__init__/param", + "name": "param", + "qname": "sklearn.utils._testing.MinimalClassifier.__init__.param", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Minimal classifier implementation with inheriting from BaseEstimator.\n\nThis estimator should be tested with:\n\n* `check_estimator` in `test_estimator_checks.py`;\n* within a `Pipeline` in `test_pipeline.py`;\n* within a `SearchCV` in `test_search.py`.", + "docstring": "", + "code": " def __init__(self, param=None):\n self.param = param" + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalClassifier/fit", + "name": "fit", + "qname": "sklearn.utils._testing.MinimalClassifier.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/MinimalClassifier/fit/self", + "name": "self", + "qname": "sklearn.utils._testing.MinimalClassifier.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalClassifier/fit/X", + "name": "X", + "qname": "sklearn.utils._testing.MinimalClassifier.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalClassifier/fit/y", + "name": "y", + "qname": "sklearn.utils._testing.MinimalClassifier.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def fit(self, X, y):\n X, y = check_X_y(X, y)\n check_classification_targets(y)\n self.classes_, counts = np.unique(y, return_counts=True)\n self._most_frequent_class_idx = counts.argmax()\n return self" + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalClassifier/get_params", + "name": "get_params", + "qname": "sklearn.utils._testing.MinimalClassifier.get_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/MinimalClassifier/get_params/self", + "name": "self", + "qname": "sklearn.utils._testing.MinimalClassifier.get_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalClassifier/get_params/deep", + "name": "deep", + "qname": "sklearn.utils._testing.MinimalClassifier.get_params.deep", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def get_params(self, deep=True):\n return {\"param\": self.param}" + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalClassifier/predict", + "name": "predict", + "qname": "sklearn.utils._testing.MinimalClassifier.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/MinimalClassifier/predict/self", + "name": "self", + "qname": "sklearn.utils._testing.MinimalClassifier.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalClassifier/predict/X", + "name": "X", + "qname": "sklearn.utils._testing.MinimalClassifier.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def predict(self, X):\n y_proba = self.predict_proba(X)\n y_pred = y_proba.argmax(axis=1)\n return self.classes_[y_pred]" + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalClassifier/predict_proba", + "name": "predict_proba", + "qname": "sklearn.utils._testing.MinimalClassifier.predict_proba", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/MinimalClassifier/predict_proba/self", + "name": "self", + "qname": "sklearn.utils._testing.MinimalClassifier.predict_proba.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalClassifier/predict_proba/X", + "name": "X", + "qname": "sklearn.utils._testing.MinimalClassifier.predict_proba.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def predict_proba(self, X):\n check_is_fitted(self)\n X = check_array(X)\n proba_shape = (X.shape[0], self.classes_.size)\n y_proba = np.zeros(shape=proba_shape, dtype=np.float64)\n y_proba[:, self._most_frequent_class_idx] = 1.0\n return y_proba" + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalClassifier/score", + "name": "score", + "qname": "sklearn.utils._testing.MinimalClassifier.score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/MinimalClassifier/score/self", + "name": "self", + "qname": "sklearn.utils._testing.MinimalClassifier.score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalClassifier/score/X", + "name": "X", + "qname": "sklearn.utils._testing.MinimalClassifier.score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalClassifier/score/y", + "name": "y", + "qname": "sklearn.utils._testing.MinimalClassifier.score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def score(self, X, y):\n from sklearn.metrics import accuracy_score\n return accuracy_score(y, self.predict(X))" + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalClassifier/set_params", + "name": "set_params", + "qname": "sklearn.utils._testing.MinimalClassifier.set_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/MinimalClassifier/set_params/self", + "name": "self", + "qname": "sklearn.utils._testing.MinimalClassifier.set_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalClassifier/set_params/params", + "name": "params", + "qname": "sklearn.utils._testing.MinimalClassifier.set_params.params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def set_params(self, **params):\n for key, value in params.items():\n setattr(self, key, value)\n return self" + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalRegressor/__init__", + "name": "__init__", + "qname": "sklearn.utils._testing.MinimalRegressor.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/MinimalRegressor/__init__/self", + "name": "self", + "qname": "sklearn.utils._testing.MinimalRegressor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalRegressor/__init__/param", + "name": "param", + "qname": "sklearn.utils._testing.MinimalRegressor.__init__.param", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Minimal regressor implementation with inheriting from BaseEstimator.\n\nThis estimator should be tested with:\n\n* `check_estimator` in `test_estimator_checks.py`;\n* within a `Pipeline` in `test_pipeline.py`;\n* within a `SearchCV` in `test_search.py`.", + "docstring": "", + "code": " def __init__(self, param=None):\n self.param = param" + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalRegressor/fit", + "name": "fit", + "qname": "sklearn.utils._testing.MinimalRegressor.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/MinimalRegressor/fit/self", + "name": "self", + "qname": "sklearn.utils._testing.MinimalRegressor.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalRegressor/fit/X", + "name": "X", + "qname": "sklearn.utils._testing.MinimalRegressor.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalRegressor/fit/y", + "name": "y", + "qname": "sklearn.utils._testing.MinimalRegressor.fit.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def fit(self, X, y):\n X, y = check_X_y(X, y)\n self.is_fitted_ = True\n self._mean = np.mean(y)\n return self" + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalRegressor/get_params", + "name": "get_params", + "qname": "sklearn.utils._testing.MinimalRegressor.get_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/MinimalRegressor/get_params/self", + "name": "self", + "qname": "sklearn.utils._testing.MinimalRegressor.get_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalRegressor/get_params/deep", + "name": "deep", + "qname": "sklearn.utils._testing.MinimalRegressor.get_params.deep", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def get_params(self, deep=True):\n return {\"param\": self.param}" + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalRegressor/predict", + "name": "predict", + "qname": "sklearn.utils._testing.MinimalRegressor.predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/MinimalRegressor/predict/self", + "name": "self", + "qname": "sklearn.utils._testing.MinimalRegressor.predict.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalRegressor/predict/X", + "name": "X", + "qname": "sklearn.utils._testing.MinimalRegressor.predict.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def predict(self, X):\n check_is_fitted(self)\n X = check_array(X)\n return np.ones(shape=(X.shape[0],)) * self._mean" + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalRegressor/score", + "name": "score", + "qname": "sklearn.utils._testing.MinimalRegressor.score", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/MinimalRegressor/score/self", + "name": "self", + "qname": "sklearn.utils._testing.MinimalRegressor.score.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalRegressor/score/X", + "name": "X", + "qname": "sklearn.utils._testing.MinimalRegressor.score.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalRegressor/score/y", + "name": "y", + "qname": "sklearn.utils._testing.MinimalRegressor.score.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def score(self, X, y):\n from sklearn.metrics import r2_score\n return r2_score(y, self.predict(X))" + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalRegressor/set_params", + "name": "set_params", + "qname": "sklearn.utils._testing.MinimalRegressor.set_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/MinimalRegressor/set_params/self", + "name": "self", + "qname": "sklearn.utils._testing.MinimalRegressor.set_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalRegressor/set_params/params", + "name": "params", + "qname": "sklearn.utils._testing.MinimalRegressor.set_params.params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def set_params(self, **params):\n for key, value in params.items():\n setattr(self, key, value)\n return self" + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalTransformer/__init__", + "name": "__init__", + "qname": "sklearn.utils._testing.MinimalTransformer.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/MinimalTransformer/__init__/self", + "name": "self", + "qname": "sklearn.utils._testing.MinimalTransformer.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalTransformer/__init__/param", + "name": "param", + "qname": "sklearn.utils._testing.MinimalTransformer.__init__.param", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Minimal transformer implementation with inheriting from\nBaseEstimator.\n\nThis estimator should be tested with:\n\n* `check_estimator` in `test_estimator_checks.py`;\n* within a `Pipeline` in `test_pipeline.py`;\n* within a `SearchCV` in `test_search.py`.", + "docstring": "", + "code": " def __init__(self, param=None):\n self.param = param" + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalTransformer/fit", + "name": "fit", + "qname": "sklearn.utils._testing.MinimalTransformer.fit", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/MinimalTransformer/fit/self", + "name": "self", + "qname": "sklearn.utils._testing.MinimalTransformer.fit.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalTransformer/fit/X", + "name": "X", + "qname": "sklearn.utils._testing.MinimalTransformer.fit.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalTransformer/fit/y", + "name": "y", + "qname": "sklearn.utils._testing.MinimalTransformer.fit.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def fit(self, X, y=None):\n X = check_array(X)\n self.is_fitted_ = True\n return self" + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalTransformer/fit_transform", + "name": "fit_transform", + "qname": "sklearn.utils._testing.MinimalTransformer.fit_transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/MinimalTransformer/fit_transform/self", + "name": "self", + "qname": "sklearn.utils._testing.MinimalTransformer.fit_transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalTransformer/fit_transform/X", + "name": "X", + "qname": "sklearn.utils._testing.MinimalTransformer.fit_transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalTransformer/fit_transform/y", + "name": "y", + "qname": "sklearn.utils._testing.MinimalTransformer.fit_transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def fit_transform(self, X, y=None):\n return self.fit(X, y).transform(X, y)" + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalTransformer/get_params", + "name": "get_params", + "qname": "sklearn.utils._testing.MinimalTransformer.get_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/MinimalTransformer/get_params/self", + "name": "self", + "qname": "sklearn.utils._testing.MinimalTransformer.get_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalTransformer/get_params/deep", + "name": "deep", + "qname": "sklearn.utils._testing.MinimalTransformer.get_params.deep", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def get_params(self, deep=True):\n return {\"param\": self.param}" + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalTransformer/set_params", + "name": "set_params", + "qname": "sklearn.utils._testing.MinimalTransformer.set_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/MinimalTransformer/set_params/self", + "name": "self", + "qname": "sklearn.utils._testing.MinimalTransformer.set_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalTransformer/set_params/params", + "name": "params", + "qname": "sklearn.utils._testing.MinimalTransformer.set_params.params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def set_params(self, **params):\n for key, value in params.items():\n setattr(self, key, value)\n return self" + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalTransformer/transform", + "name": "transform", + "qname": "sklearn.utils._testing.MinimalTransformer.transform", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/MinimalTransformer/transform/self", + "name": "self", + "qname": "sklearn.utils._testing.MinimalTransformer.transform.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalTransformer/transform/X", + "name": "X", + "qname": "sklearn.utils._testing.MinimalTransformer.transform.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/MinimalTransformer/transform/y", + "name": "y", + "qname": "sklearn.utils._testing.MinimalTransformer.transform.y", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def transform(self, X, y=None):\n check_is_fitted(self)\n X = check_array(X)\n return X" + }, + { + "id": "scikit-learn/sklearn.utils._testing/TempMemmap/__enter__", + "name": "__enter__", + "qname": "sklearn.utils._testing.TempMemmap.__enter__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/TempMemmap/__enter__/self", + "name": "self", + "qname": "sklearn.utils._testing.TempMemmap.__enter__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __enter__(self):\n data_read_only, self.temp_folder = create_memmap_backed_data(\n self.data, mmap_mode=self.mmap_mode, return_folder=True)\n return data_read_only" + }, + { + "id": "scikit-learn/sklearn.utils._testing/TempMemmap/__exit__", + "name": "__exit__", + "qname": "sklearn.utils._testing.TempMemmap.__exit__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/TempMemmap/__exit__/self", + "name": "self", + "qname": "sklearn.utils._testing.TempMemmap.__exit__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/TempMemmap/__exit__/exc_type", + "name": "exc_type", + "qname": "sklearn.utils._testing.TempMemmap.__exit__.exc_type", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/TempMemmap/__exit__/exc_val", + "name": "exc_val", + "qname": "sklearn.utils._testing.TempMemmap.__exit__.exc_val", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/TempMemmap/__exit__/exc_tb", + "name": "exc_tb", + "qname": "sklearn.utils._testing.TempMemmap.__exit__.exc_tb", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __exit__(self, exc_type, exc_val, exc_tb):\n _delete_folder(self.temp_folder)" + }, + { + "id": "scikit-learn/sklearn.utils._testing/TempMemmap/__init__", + "name": "__init__", + "qname": "sklearn.utils._testing.TempMemmap.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/TempMemmap/__init__/self", + "name": "self", + "qname": "sklearn.utils._testing.TempMemmap.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/TempMemmap/__init__/data", + "name": "data", + "qname": "sklearn.utils._testing.TempMemmap.__init__.data", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/TempMemmap/__init__/mmap_mode", + "name": "mmap_mode", + "qname": "sklearn.utils._testing.TempMemmap.__init__.mmap_mode", + "default_value": "'r'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "'r'", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __init__(self, data, mmap_mode='r'):\n self.mmap_mode = mmap_mode\n self.data = data" + }, + { + "id": "scikit-learn/sklearn.utils._testing/_IgnoreWarnings/__call__", + "name": "__call__", + "qname": "sklearn.utils._testing._IgnoreWarnings.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/_IgnoreWarnings/__call__/self", + "name": "self", + "qname": "sklearn.utils._testing._IgnoreWarnings.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/_IgnoreWarnings/__call__/fn", + "name": "fn", + "qname": "sklearn.utils._testing._IgnoreWarnings.__call__.fn", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Decorator to catch and hide warnings without visual nesting.", + "docstring": "Decorator to catch and hide warnings without visual nesting.", + "code": " def __call__(self, fn):\n \"\"\"Decorator to catch and hide warnings without visual nesting.\"\"\"\n @wraps(fn)\n def wrapper(*args, **kwargs):\n with warnings.catch_warnings():\n warnings.simplefilter(\"ignore\", self.category)\n return fn(*args, **kwargs)\n\n return wrapper" + }, + { + "id": "scikit-learn/sklearn.utils._testing/_IgnoreWarnings/__enter__", + "name": "__enter__", + "qname": "sklearn.utils._testing._IgnoreWarnings.__enter__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/_IgnoreWarnings/__enter__/self", + "name": "self", + "qname": "sklearn.utils._testing._IgnoreWarnings.__enter__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __enter__(self):\n if self._entered:\n raise RuntimeError(\"Cannot enter %r twice\" % self)\n self._entered = True\n self._filters = self._module.filters\n self._module.filters = self._filters[:]\n self._showwarning = self._module.showwarning\n warnings.simplefilter(\"ignore\", self.category)" + }, + { + "id": "scikit-learn/sklearn.utils._testing/_IgnoreWarnings/__exit__", + "name": "__exit__", + "qname": "sklearn.utils._testing._IgnoreWarnings.__exit__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/_IgnoreWarnings/__exit__/self", + "name": "self", + "qname": "sklearn.utils._testing._IgnoreWarnings.__exit__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/_IgnoreWarnings/__exit__/exc_info", + "name": "exc_info", + "qname": "sklearn.utils._testing._IgnoreWarnings.__exit__.exc_info", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __exit__(self, *exc_info):\n if not self._entered:\n raise RuntimeError(\"Cannot exit %r without entering first\" % self)\n self._module.filters = self._filters\n self._module.showwarning = self._showwarning\n self.log[:] = []" + }, + { + "id": "scikit-learn/sklearn.utils._testing/_IgnoreWarnings/__init__", + "name": "__init__", + "qname": "sklearn.utils._testing._IgnoreWarnings.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/_IgnoreWarnings/__init__/self", + "name": "self", + "qname": "sklearn.utils._testing._IgnoreWarnings.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/_IgnoreWarnings/__init__/category", + "name": "category", + "qname": "sklearn.utils._testing._IgnoreWarnings.__init__.category", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "tuple of warning class", + "default_value": "Warning", + "description": "The category to filter. By default, all the categories will be muted." + }, + "type": { + "kind": "NamedType", + "name": "tuple of warning class" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Improved and simplified Python warnings context manager and decorator.\n\nThis class allows the user to ignore the warnings raised by a function.\nCopied from Python 2.7.5 and modified as required.", + "docstring": "", + "code": " def __init__(self, category):\n self._record = True\n self._module = sys.modules['warnings']\n self._entered = False\n self.log = []\n self.category = category" + }, + { + "id": "scikit-learn/sklearn.utils._testing/_IgnoreWarnings/__repr__", + "name": "__repr__", + "qname": "sklearn.utils._testing._IgnoreWarnings.__repr__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/_IgnoreWarnings/__repr__/self", + "name": "self", + "qname": "sklearn.utils._testing._IgnoreWarnings.__repr__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __repr__(self):\n args = []\n if self._record:\n args.append(\"record=True\")\n if self._module is not sys.modules['warnings']:\n args.append(\"module=%r\" % self._module)\n name = type(self).__name__\n return \"%s(%s)\" % (name, \", \".join(args))" + }, + { + "id": "scikit-learn/sklearn.utils._testing/_Raises/__exit__", + "name": "__exit__", + "qname": "sklearn.utils._testing._Raises.__exit__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/_Raises/__exit__/self", + "name": "self", + "qname": "sklearn.utils._testing._Raises.__exit__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/_Raises/__exit__/exc_type", + "name": "exc_type", + "qname": "sklearn.utils._testing._Raises.__exit__.exc_type", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/_Raises/__exit__/exc_value", + "name": "exc_value", + "qname": "sklearn.utils._testing._Raises.__exit__.exc_value", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/_Raises/__exit__/_", + "name": "_", + "qname": "sklearn.utils._testing._Raises.__exit__._", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __exit__(self, exc_type, exc_value, _):\n # see\n # https://docs.python.org/2.5/whatsnew/pep-343.html#SECTION000910000000000000000\n\n if exc_type is None: # No exception was raised in the block\n if self.may_pass:\n return True # CM is happy\n else:\n err_msg = (\n self.err_msg or f\"Did not raise: {self.expected_exc_types}\"\n )\n raise AssertionError(err_msg)\n\n if not any(\n issubclass(exc_type, expected_type)\n for expected_type in self.expected_exc_types\n ):\n if self.err_msg is not None:\n raise AssertionError(self.err_msg) from exc_value\n else:\n return False # will re-raise the original exception\n\n if self.matches is not None:\n err_msg = self.err_msg or (\n \"The error message should contain one of the following \"\n \"patterns:\\n{}\\nGot {}\".format(\n \"\\n\".join(self.matches), str(exc_value)\n )\n )\n if not any(re.search(match, str(exc_value))\n for match in self.matches):\n raise AssertionError(err_msg) from exc_value\n self.raised_and_matched = True\n\n return True" + }, + { + "id": "scikit-learn/sklearn.utils._testing/_Raises/__init__", + "name": "__init__", + "qname": "sklearn.utils._testing._Raises.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/_Raises/__init__/self", + "name": "self", + "qname": "sklearn.utils._testing._Raises.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/_Raises/__init__/expected_exc_type", + "name": "expected_exc_type", + "qname": "sklearn.utils._testing._Raises.__init__.expected_exc_type", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/_Raises/__init__/match", + "name": "match", + "qname": "sklearn.utils._testing._Raises.__init__.match", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/_Raises/__init__/may_pass", + "name": "may_pass", + "qname": "sklearn.utils._testing._Raises.__init__.may_pass", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/_Raises/__init__/err_msg", + "name": "err_msg", + "qname": "sklearn.utils._testing._Raises.__init__.err_msg", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __init__(self, expected_exc_type, match, may_pass, err_msg):\n self.expected_exc_types = (\n expected_exc_type\n if isinstance(expected_exc_type, Iterable)\n else [expected_exc_type]\n )\n self.matches = [match] if isinstance(match, str) else match\n self.may_pass = may_pass\n self.err_msg = err_msg\n self.raised_and_matched = False" + }, + { + "id": "scikit-learn/sklearn.utils._testing/_convert_container", + "name": "_convert_container", + "qname": "sklearn.utils._testing._convert_container", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/_convert_container/container", + "name": "container", + "qname": "sklearn.utils._testing._convert_container.container", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "The container to convert." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/_convert_container/constructor_name", + "name": "constructor_name", + "qname": "sklearn.utils._testing._convert_container.constructor_name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{\"list\", \"tuple\", \"array\", \"sparse\", \"dataframe\", \"series\", \"index\", \"slice\", \"sparse_csr\", \"sparse_csc\"}", + "default_value": "", + "description": "The type of the returned container." + }, + "type": { + "kind": "EnumType", + "values": [ + "array", + "sparse", + "series", + "sparse_csr", + "list", + "index", + "tuple", + "dataframe", + "slice", + "sparse_csc" + ] + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/_convert_container/columns_name", + "name": "columns_name", + "qname": "sklearn.utils._testing._convert_container.columns_name", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "index or array-like", + "default_value": "None", + "description": "For pandas container supporting `columns_names`, it will affect\nspecific names." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "index" + }, + { + "kind": "NamedType", + "name": "array-like" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/_convert_container/dtype", + "name": "dtype", + "qname": "sklearn.utils._testing._convert_container.dtype", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dtype", + "default_value": "None", + "description": "Force the dtype of the container. Does not apply to `\"slice\"`\ncontainer." + }, + "type": { + "kind": "NamedType", + "name": "dtype" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Convert a given container to a specific array-like with a dtype.", + "docstring": "Convert a given container to a specific array-like with a dtype.\n\nParameters\n----------\ncontainer : array-like\n The container to convert.\nconstructor_name : {\"list\", \"tuple\", \"array\", \"sparse\", \"dataframe\", \"series\", \"index\", \"slice\", \"sparse_csr\", \"sparse_csc\"}\n The type of the returned container.\ncolumns_name : index or array-like, default=None\n For pandas container supporting `columns_names`, it will affect\n specific names.\ndtype : dtype, default=None\n Force the dtype of the container. Does not apply to `\"slice\"`\n container.\n\nReturns\n-------\nconverted_container", + "code": "def _convert_container(\n container, constructor_name, columns_name=None, dtype=None\n):\n \"\"\"Convert a given container to a specific array-like with a dtype.\n\n Parameters\n ----------\n container : array-like\n The container to convert.\n constructor_name : {\"list\", \"tuple\", \"array\", \"sparse\", \"dataframe\", \\\n \"series\", \"index\", \"slice\", \"sparse_csr\", \"sparse_csc\"}\n The type of the returned container.\n columns_name : index or array-like, default=None\n For pandas container supporting `columns_names`, it will affect\n specific names.\n dtype : dtype, default=None\n Force the dtype of the container. Does not apply to `\"slice\"`\n container.\n\n Returns\n -------\n converted_container\n \"\"\"\n if constructor_name == 'list':\n if dtype is None:\n return list(container)\n else:\n return np.asarray(container, dtype=dtype).tolist()\n elif constructor_name == 'tuple':\n if dtype is None:\n return tuple(container)\n else:\n return tuple(np.asarray(container, dtype=dtype).tolist())\n elif constructor_name == 'array':\n return np.asarray(container, dtype=dtype)\n elif constructor_name == 'sparse':\n return sp.sparse.csr_matrix(container, dtype=dtype)\n elif constructor_name == 'dataframe':\n pd = pytest.importorskip('pandas')\n return pd.DataFrame(container, columns=columns_name, dtype=dtype)\n elif constructor_name == 'series':\n pd = pytest.importorskip('pandas')\n return pd.Series(container, dtype=dtype)\n elif constructor_name == 'index':\n pd = pytest.importorskip('pandas')\n return pd.Index(container, dtype=dtype)\n elif constructor_name == 'slice':\n return slice(container[0], container[1])\n elif constructor_name == 'sparse_csr':\n return sp.sparse.csr_matrix(container, dtype=dtype)\n elif constructor_name == 'sparse_csc':\n return sp.sparse.csc_matrix(container, dtype=dtype)" + }, + { + "id": "scikit-learn/sklearn.utils._testing/_delete_folder", + "name": "_delete_folder", + "qname": "sklearn.utils._testing._delete_folder", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/_delete_folder/folder_path", + "name": "folder_path", + "qname": "sklearn.utils._testing._delete_folder.folder_path", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/_delete_folder/warn", + "name": "warn", + "qname": "sklearn.utils._testing._delete_folder.warn", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Utility function to cleanup a temporary folder if still existing.\n\nCopy from joblib.pool (for independence).", + "docstring": "Utility function to cleanup a temporary folder if still existing.\n\nCopy from joblib.pool (for independence).", + "code": "def _delete_folder(folder_path, warn=False):\n \"\"\"Utility function to cleanup a temporary folder if still existing.\n\n Copy from joblib.pool (for independence).\n \"\"\"\n try:\n if os.path.exists(folder_path):\n # This can fail under windows,\n # but will succeed when called by atexit\n shutil.rmtree(folder_path)\n except WindowsError:\n if warn:\n warnings.warn(\"Could not delete temporary folder %s\" % folder_path)" + }, + { + "id": "scikit-learn/sklearn.utils._testing/_get_args", + "name": "_get_args", + "qname": "sklearn.utils._testing._get_args", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/_get_args/function", + "name": "function", + "qname": "sklearn.utils._testing._get_args.function", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/_get_args/varargs", + "name": "varargs", + "qname": "sklearn.utils._testing._get_args.varargs", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Helper to get function arguments.", + "docstring": "Helper to get function arguments.", + "code": "def _get_args(function, varargs=False):\n \"\"\"Helper to get function arguments.\"\"\"\n\n try:\n params = signature(function).parameters\n except ValueError:\n # Error on builtin C function\n return []\n args = [key for key, param in params.items()\n if param.kind not in (param.VAR_POSITIONAL, param.VAR_KEYWORD)]\n if varargs:\n varargs = [param.name for param in params.values()\n if param.kind == param.VAR_POSITIONAL]\n if len(varargs) == 0:\n varargs = None\n return args, varargs\n else:\n return args" + }, + { + "id": "scikit-learn/sklearn.utils._testing/_get_func_name", + "name": "_get_func_name", + "qname": "sklearn.utils._testing._get_func_name", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/_get_func_name/func", + "name": "func", + "qname": "sklearn.utils._testing._get_func_name.func", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "", + "description": "The function object." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Get function full name.", + "docstring": "Get function full name.\n\nParameters\n----------\nfunc : callable\n The function object.\n\nReturns\n-------\nname : str\n The function name.", + "code": "def _get_func_name(func):\n \"\"\"Get function full name.\n\n Parameters\n ----------\n func : callable\n The function object.\n\n Returns\n -------\n name : str\n The function name.\n \"\"\"\n parts = []\n module = inspect.getmodule(func)\n if module:\n parts.append(module.__name__)\n\n qualname = func.__qualname__\n if qualname != func.__name__:\n parts.append(qualname[:qualname.find('.')])\n\n parts.append(func.__name__)\n return '.'.join(parts)" + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_allclose_dense_sparse", + "name": "assert_allclose_dense_sparse", + "qname": "sklearn.utils._testing.assert_allclose_dense_sparse", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/assert_allclose_dense_sparse/x", + "name": "x", + "qname": "sklearn.utils._testing.assert_allclose_dense_sparse.x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}", + "default_value": "", + "description": "First array to compare." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_allclose_dense_sparse/y", + "name": "y", + "qname": "sklearn.utils._testing.assert_allclose_dense_sparse.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}", + "default_value": "", + "description": "Second array to compare." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_allclose_dense_sparse/rtol", + "name": "rtol", + "qname": "sklearn.utils._testing.assert_allclose_dense_sparse.rtol", + "default_value": "1e-07", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-07", + "description": "relative tolerance; see numpy.allclose." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_allclose_dense_sparse/atol", + "name": "atol", + "qname": "sklearn.utils._testing.assert_allclose_dense_sparse.atol", + "default_value": "1e-09", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-9", + "description": "absolute tolerance; see numpy.allclose. Note that the default here is\nmore tolerant than the default for numpy.testing.assert_allclose, where\natol=0." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_allclose_dense_sparse/err_msg", + "name": "err_msg", + "qname": "sklearn.utils._testing.assert_allclose_dense_sparse.err_msg", + "default_value": "''", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "''", + "description": "Error message to raise." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Assert allclose for sparse and dense data.\n\nBoth x and y need to be either sparse or dense, they\ncan't be mixed.", + "docstring": "Assert allclose for sparse and dense data.\n\nBoth x and y need to be either sparse or dense, they\ncan't be mixed.\n\nParameters\n----------\nx : {array-like, sparse matrix}\n First array to compare.\n\ny : {array-like, sparse matrix}\n Second array to compare.\n\nrtol : float, default=1e-07\n relative tolerance; see numpy.allclose.\n\natol : float, default=1e-9\n absolute tolerance; see numpy.allclose. Note that the default here is\n more tolerant than the default for numpy.testing.assert_allclose, where\n atol=0.\n\nerr_msg : str, default=''\n Error message to raise.", + "code": "def assert_allclose_dense_sparse(x, y, rtol=1e-07, atol=1e-9, err_msg=''):\n \"\"\"Assert allclose for sparse and dense data.\n\n Both x and y need to be either sparse or dense, they\n can't be mixed.\n\n Parameters\n ----------\n x : {array-like, sparse matrix}\n First array to compare.\n\n y : {array-like, sparse matrix}\n Second array to compare.\n\n rtol : float, default=1e-07\n relative tolerance; see numpy.allclose.\n\n atol : float, default=1e-9\n absolute tolerance; see numpy.allclose. Note that the default here is\n more tolerant than the default for numpy.testing.assert_allclose, where\n atol=0.\n\n err_msg : str, default=''\n Error message to raise.\n \"\"\"\n if sp.sparse.issparse(x) and sp.sparse.issparse(y):\n x = x.tocsr()\n y = y.tocsr()\n x.sum_duplicates()\n y.sum_duplicates()\n assert_array_equal(x.indices, y.indices, err_msg=err_msg)\n assert_array_equal(x.indptr, y.indptr, err_msg=err_msg)\n assert_allclose(x.data, y.data, rtol=rtol, atol=atol, err_msg=err_msg)\n elif not sp.sparse.issparse(x) and not sp.sparse.issparse(y):\n # both dense\n assert_allclose(x, y, rtol=rtol, atol=atol, err_msg=err_msg)\n else:\n raise ValueError(\"Can only compare two sparse matrices,\"\n \" not a sparse matrix and an array.\")" + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_no_warnings", + "name": "assert_no_warnings", + "qname": "sklearn.utils._testing.assert_no_warnings", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/assert_no_warnings/func", + "name": "func", + "qname": "sklearn.utils._testing.assert_no_warnings.func", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_no_warnings/args", + "name": "args", + "qname": "sklearn.utils._testing.assert_no_warnings.args", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_no_warnings/kw", + "name": "kw", + "qname": "sklearn.utils._testing.assert_no_warnings.kw", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "Parameters\n----------\nfunc\n*args\n**kw", + "code": "def assert_no_warnings(func, *args, **kw):\n \"\"\"\n Parameters\n ----------\n func\n *args\n **kw\n \"\"\"\n # very important to avoid uncontrolled state propagation\n with warnings.catch_warnings(record=True) as w:\n warnings.simplefilter('always')\n\n result = func(*args, **kw)\n if hasattr(np, 'FutureWarning'):\n # Filter out numpy-specific warnings in numpy >= 1.9\n w = [e for e in w\n if e.category is not np.VisibleDeprecationWarning]\n\n if len(w) > 0:\n raise AssertionError(\"Got warnings when calling %s: [%s]\"\n % (func.__name__,\n ', '.join(str(warning) for warning in w)))\n return result" + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_raise_message", + "name": "assert_raise_message", + "qname": "sklearn.utils._testing.assert_raise_message", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/assert_raise_message/exceptions", + "name": "exceptions", + "qname": "sklearn.utils._testing.assert_raise_message.exceptions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "exception or tuple of exception", + "default_value": "", + "description": "An Exception object." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "exception" + }, + { + "kind": "NamedType", + "name": "tuple of exception" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_raise_message/message", + "name": "message", + "qname": "sklearn.utils._testing.assert_raise_message.message", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "", + "description": "The error message or a substring of the error message." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_raise_message/function", + "name": "function", + "qname": "sklearn.utils._testing.assert_raise_message.function", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "", + "description": "Callable object to raise error." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_raise_message/args", + "name": "args", + "qname": "sklearn.utils._testing.assert_raise_message.args", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "the positional arguments to `function`.", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "the positional arguments to `function`." + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_raise_message/kwargs", + "name": "kwargs", + "qname": "sklearn.utils._testing.assert_raise_message.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "the keyword arguments to `function`.", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "the keyword arguments to `function`." + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Helper function to test the message raised in an exception.\n\nGiven an exception, a callable to raise the exception, and\na message string, tests that the correct exception is raised and\nthat the message is a substring of the error thrown. Used to test\nthat the specific message thrown during an exception is correct.", + "docstring": "Helper function to test the message raised in an exception.\n\nGiven an exception, a callable to raise the exception, and\na message string, tests that the correct exception is raised and\nthat the message is a substring of the error thrown. Used to test\nthat the specific message thrown during an exception is correct.\n\nParameters\n----------\nexceptions : exception or tuple of exception\n An Exception object.\n\nmessage : str\n The error message or a substring of the error message.\n\nfunction : callable\n Callable object to raise error.\n\n*args : the positional arguments to `function`.\n\n**kwargs : the keyword arguments to `function`.", + "code": "def assert_raise_message(exceptions, message, function, *args, **kwargs):\n \"\"\"Helper function to test the message raised in an exception.\n\n Given an exception, a callable to raise the exception, and\n a message string, tests that the correct exception is raised and\n that the message is a substring of the error thrown. Used to test\n that the specific message thrown during an exception is correct.\n\n Parameters\n ----------\n exceptions : exception or tuple of exception\n An Exception object.\n\n message : str\n The error message or a substring of the error message.\n\n function : callable\n Callable object to raise error.\n\n *args : the positional arguments to `function`.\n\n **kwargs : the keyword arguments to `function`.\n \"\"\"\n try:\n function(*args, **kwargs)\n except exceptions as e:\n error_message = str(e)\n if message not in error_message:\n raise AssertionError(\"Error message does not include the expected\"\n \" string: %r. Observed error message: %r\" %\n (message, error_message))\n else:\n # concatenate exception names\n if isinstance(exceptions, tuple):\n names = \" or \".join(e.__name__ for e in exceptions)\n else:\n names = exceptions.__name__\n\n raise AssertionError(\"%s not raised by %s\" %\n (names, function.__name__))" + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_run_python_script", + "name": "assert_run_python_script", + "qname": "sklearn.utils._testing.assert_run_python_script", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/assert_run_python_script/source_code", + "name": "source_code", + "qname": "sklearn.utils._testing.assert_run_python_script.source_code", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "", + "description": "The Python source code to execute." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_run_python_script/timeout", + "name": "timeout", + "qname": "sklearn.utils._testing.assert_run_python_script.timeout", + "default_value": "60", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "60", + "description": "Time in seconds before timeout." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Utility to check assertions in an independent Python subprocess.\n\nThe script provided in the source code should return 0 and not print\nanything on stderr or stdout.\n\nThis is a port from cloudpickle https://github.com/cloudpipe/cloudpickle", + "docstring": "Utility to check assertions in an independent Python subprocess.\n\nThe script provided in the source code should return 0 and not print\nanything on stderr or stdout.\n\nThis is a port from cloudpickle https://github.com/cloudpipe/cloudpickle\n\nParameters\n----------\nsource_code : str\n The Python source code to execute.\ntimeout : int, default=60\n Time in seconds before timeout.", + "code": "def assert_run_python_script(source_code, timeout=60):\n \"\"\"Utility to check assertions in an independent Python subprocess.\n\n The script provided in the source code should return 0 and not print\n anything on stderr or stdout.\n\n This is a port from cloudpickle https://github.com/cloudpipe/cloudpickle\n\n Parameters\n ----------\n source_code : str\n The Python source code to execute.\n timeout : int, default=60\n Time in seconds before timeout.\n \"\"\"\n fd, source_file = tempfile.mkstemp(suffix='_src_test_sklearn.py')\n os.close(fd)\n try:\n with open(source_file, 'wb') as f:\n f.write(source_code.encode('utf-8'))\n cmd = [sys.executable, source_file]\n cwd = op.normpath(op.join(op.dirname(sklearn.__file__), '..'))\n env = os.environ.copy()\n try:\n env[\"PYTHONPATH\"] = os.pathsep.join([cwd, env[\"PYTHONPATH\"]])\n except KeyError:\n env[\"PYTHONPATH\"] = cwd\n kwargs = {\n 'cwd': cwd,\n 'stderr': STDOUT,\n 'env': env\n }\n # If coverage is running, pass the config file to the subprocess\n coverage_rc = os.environ.get(\"COVERAGE_PROCESS_START\")\n if coverage_rc:\n kwargs['env']['COVERAGE_PROCESS_START'] = coverage_rc\n\n kwargs['timeout'] = timeout\n try:\n try:\n out = check_output(cmd, **kwargs)\n except CalledProcessError as e:\n raise RuntimeError(u\"script errored with output:\\n%s\"\n % e.output.decode('utf-8'))\n if out != b\"\":\n raise AssertionError(out.decode('utf-8'))\n except TimeoutExpired as e:\n raise RuntimeError(u\"script timeout, output so far:\\n%s\"\n % e.output.decode('utf-8'))\n finally:\n os.unlink(source_file)" + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_warns", + "name": "assert_warns", + "qname": "sklearn.utils._testing.assert_warns", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/assert_warns/warning_class", + "name": "warning_class", + "qname": "sklearn.utils._testing.assert_warns.warning_class", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "the warning class", + "default_value": "", + "description": "The class to test for, e.g. UserWarning." + }, + "type": { + "kind": "NamedType", + "name": "the warning class" + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_warns/func", + "name": "func", + "qname": "sklearn.utils._testing.assert_warns.func", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "", + "description": "Callable object to trigger warnings." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_warns/args", + "name": "args", + "qname": "sklearn.utils._testing.assert_warns.args", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "the positional arguments to `func`.", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "the positional arguments to `func`." + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_warns/kw", + "name": "kw", + "qname": "sklearn.utils._testing.assert_warns.kw", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "the keyword arguments to `func`", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "the keyword arguments to `func`" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Test that a certain warning occurs.", + "docstring": "Test that a certain warning occurs.\n\nParameters\n----------\nwarning_class : the warning class\n The class to test for, e.g. UserWarning.\n\nfunc : callable\n Callable object to trigger warnings.\n\n*args : the positional arguments to `func`.\n\n**kw : the keyword arguments to `func`\n\nReturns\n-------\nresult : the return value of `func`", + "code": "def assert_warns(warning_class, func, *args, **kw):\n \"\"\"Test that a certain warning occurs.\n\n Parameters\n ----------\n warning_class : the warning class\n The class to test for, e.g. UserWarning.\n\n func : callable\n Callable object to trigger warnings.\n\n *args : the positional arguments to `func`.\n\n **kw : the keyword arguments to `func`\n\n Returns\n -------\n result : the return value of `func`\n\n \"\"\"\n with warnings.catch_warnings(record=True) as w:\n # Cause all warnings to always be triggered.\n warnings.simplefilter(\"always\")\n # Trigger a warning.\n result = func(*args, **kw)\n if hasattr(np, 'FutureWarning'):\n # Filter out numpy-specific warnings in numpy >= 1.9\n w = [e for e in w\n if e.category is not np.VisibleDeprecationWarning]\n\n # Verify some things\n if not len(w) > 0:\n raise AssertionError(\"No warning raised when calling %s\"\n % func.__name__)\n\n found = any(warning.category is warning_class for warning in w)\n if not found:\n raise AssertionError(\"%s did not give warning: %s( is %s)\"\n % (func.__name__, warning_class, w))\n return result" + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_warns_div0", + "name": "assert_warns_div0", + "qname": "sklearn.utils._testing.assert_warns_div0", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/assert_warns_div0/func", + "name": "func", + "qname": "sklearn.utils._testing.assert_warns_div0.func", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_warns_div0/args", + "name": "args", + "qname": "sklearn.utils._testing.assert_warns_div0.args", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_warns_div0/kw", + "name": "kw", + "qname": "sklearn.utils._testing.assert_warns_div0.kw", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Assume that numpy's warning for divide by zero is raised.\n\nHandles the case of platforms that do not support warning on divide by\nzero.", + "docstring": "Assume that numpy's warning for divide by zero is raised.\n\nHandles the case of platforms that do not support warning on divide by\nzero.\n\nParameters\n----------\nfunc\n*args\n**kw", + "code": "def assert_warns_div0(func, *args, **kw):\n \"\"\"Assume that numpy's warning for divide by zero is raised.\n\n Handles the case of platforms that do not support warning on divide by\n zero.\n\n Parameters\n ----------\n func\n *args\n **kw\n \"\"\"\n\n with np.errstate(divide='warn', invalid='warn'):\n try:\n assert_warns(RuntimeWarning, np.divide, 1, np.zeros(1))\n except AssertionError:\n # This platform does not report numpy divide by zeros\n return func(*args, **kw)\n return assert_warns_message(RuntimeWarning,\n 'invalid value encountered',\n func, *args, **kw)" + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_warns_message", + "name": "assert_warns_message", + "qname": "sklearn.utils._testing.assert_warns_message", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/assert_warns_message/warning_class", + "name": "warning_class", + "qname": "sklearn.utils._testing.assert_warns_message.warning_class", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "the warning class", + "default_value": "", + "description": "The class to test for, e.g. UserWarning." + }, + "type": { + "kind": "NamedType", + "name": "the warning class" + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_warns_message/message", + "name": "message", + "qname": "sklearn.utils._testing.assert_warns_message.message", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str or callable", + "default_value": "", + "description": "The message or a substring of the message to test for. If callable,\nit takes a string as the argument and will trigger an AssertionError\nif the callable returns `False`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_warns_message/func", + "name": "func", + "qname": "sklearn.utils._testing.assert_warns_message.func", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "", + "description": "Callable object to trigger warnings." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_warns_message/args", + "name": "args", + "qname": "sklearn.utils._testing.assert_warns_message.args", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "the positional arguments to `func`.", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "the positional arguments to `func`." + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/assert_warns_message/kw", + "name": "kw", + "qname": "sklearn.utils._testing.assert_warns_message.kw", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "the keyword arguments to `func`.", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "the keyword arguments to `func`." + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Test that a certain warning occurs and with a certain message.", + "docstring": "Test that a certain warning occurs and with a certain message.\n\nParameters\n----------\nwarning_class : the warning class\n The class to test for, e.g. UserWarning.\n\nmessage : str or callable\n The message or a substring of the message to test for. If callable,\n it takes a string as the argument and will trigger an AssertionError\n if the callable returns `False`.\n\nfunc : callable\n Callable object to trigger warnings.\n\n*args : the positional arguments to `func`.\n\n**kw : the keyword arguments to `func`.\n\nReturns\n-------\nresult : the return value of `func`", + "code": "def assert_warns_message(warning_class, message, func, *args, **kw):\n # very important to avoid uncontrolled state propagation\n \"\"\"Test that a certain warning occurs and with a certain message.\n\n Parameters\n ----------\n warning_class : the warning class\n The class to test for, e.g. UserWarning.\n\n message : str or callable\n The message or a substring of the message to test for. If callable,\n it takes a string as the argument and will trigger an AssertionError\n if the callable returns `False`.\n\n func : callable\n Callable object to trigger warnings.\n\n *args : the positional arguments to `func`.\n\n **kw : the keyword arguments to `func`.\n\n Returns\n -------\n result : the return value of `func`\n\n \"\"\"\n with warnings.catch_warnings(record=True) as w:\n # Cause all warnings to always be triggered.\n warnings.simplefilter(\"always\")\n if hasattr(np, 'FutureWarning'):\n # Let's not catch the numpy internal DeprecationWarnings\n warnings.simplefilter('ignore', np.VisibleDeprecationWarning)\n # Trigger a warning.\n result = func(*args, **kw)\n # Verify some things\n if not len(w) > 0:\n raise AssertionError(\"No warning raised when calling %s\"\n % func.__name__)\n\n found = [issubclass(warning.category, warning_class) for warning in w]\n if not any(found):\n raise AssertionError(\"No warning raised for %s with class \"\n \"%s\"\n % (func.__name__, warning_class))\n\n message_found = False\n # Checks the message of all warnings belong to warning_class\n for index in [i for i, x in enumerate(found) if x]:\n # substring will match, the entire message with typo won't\n msg = w[index].message # For Python 3 compatibility\n msg = str(msg.args[0] if hasattr(msg, 'args') else msg)\n if callable(message): # add support for certain tests\n check_in_message = message\n else:\n def check_in_message(msg): return message in msg\n\n if check_in_message(msg):\n message_found = True\n break\n\n if not message_found:\n raise AssertionError(\"Did not receive the message you expected \"\n \"('%s') for <%s>, got: '%s'\"\n % (message, func.__name__, msg))\n\n return result" + }, + { + "id": "scikit-learn/sklearn.utils._testing/check_docstring_parameters", + "name": "check_docstring_parameters", + "qname": "sklearn.utils._testing.check_docstring_parameters", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/check_docstring_parameters/func", + "name": "func", + "qname": "sklearn.utils._testing.check_docstring_parameters.func", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "", + "description": "The function object to test." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/check_docstring_parameters/doc", + "name": "doc", + "qname": "sklearn.utils._testing.check_docstring_parameters.doc", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Docstring if it is passed manually to the test." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/check_docstring_parameters/ignore", + "name": "ignore", + "qname": "sklearn.utils._testing.check_docstring_parameters.ignore", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list", + "default_value": "None", + "description": "Parameters to ignore." + }, + "type": { + "kind": "NamedType", + "name": "list" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Helper to check docstring.", + "docstring": "Helper to check docstring.\n\nParameters\n----------\nfunc : callable\n The function object to test.\ndoc : str, default=None\n Docstring if it is passed manually to the test.\nignore : list, default=None\n Parameters to ignore.\n\nReturns\n-------\nincorrect : list\n A list of string describing the incorrect results.", + "code": "def check_docstring_parameters(func, doc=None, ignore=None):\n \"\"\"Helper to check docstring.\n\n Parameters\n ----------\n func : callable\n The function object to test.\n doc : str, default=None\n Docstring if it is passed manually to the test.\n ignore : list, default=None\n Parameters to ignore.\n\n Returns\n -------\n incorrect : list\n A list of string describing the incorrect results.\n \"\"\"\n from numpydoc import docscrape\n incorrect = []\n ignore = [] if ignore is None else ignore\n\n func_name = _get_func_name(func)\n if (not func_name.startswith('sklearn.') or\n func_name.startswith('sklearn.externals')):\n return incorrect\n # Don't check docstring for property-functions\n if inspect.isdatadescriptor(func):\n return incorrect\n # Don't check docstring for setup / teardown pytest functions\n if func_name.split('.')[-1] in ('setup_module', 'teardown_module'):\n return incorrect\n # Dont check estimator_checks module\n if func_name.split('.')[2] == 'estimator_checks':\n return incorrect\n # Get the arguments from the function signature\n param_signature = list(filter(lambda x: x not in ignore, _get_args(func)))\n # drop self\n if len(param_signature) > 0 and param_signature[0] == 'self':\n param_signature.remove('self')\n\n # Analyze function's docstring\n if doc is None:\n with warnings.catch_warnings(record=True) as w:\n try:\n doc = docscrape.FunctionDoc(func)\n except Exception as exp:\n incorrect += [func_name + ' parsing error: ' + str(exp)]\n return incorrect\n if len(w):\n raise RuntimeError('Error for %s:\\n%s' % (func_name, w[0]))\n\n param_docs = []\n for name, type_definition, param_doc in doc['Parameters']:\n # Type hints are empty only if parameter name ended with :\n if not type_definition.strip():\n if ':' in name and name[:name.index(':')][-1:].strip():\n incorrect += [func_name +\n ' There was no space between the param name and '\n 'colon (%r)' % name]\n elif name.rstrip().endswith(':'):\n incorrect += [func_name +\n ' Parameter %r has an empty type spec. '\n 'Remove the colon' % (name.lstrip())]\n\n # Create a list of parameters to compare with the parameters gotten\n # from the func signature\n if '*' not in name:\n param_docs.append(name.split(':')[0].strip('` '))\n\n # If one of the docstring's parameters had an error then return that\n # incorrect message\n if len(incorrect) > 0:\n return incorrect\n\n # Remove the parameters that should be ignored from list\n param_docs = list(filter(lambda x: x not in ignore, param_docs))\n\n # The following is derived from pytest, Copyright (c) 2004-2017 Holger\n # Krekel and others, Licensed under MIT License. See\n # https://github.com/pytest-dev/pytest\n\n message = []\n for i in range(min(len(param_docs), len(param_signature))):\n if param_signature[i] != param_docs[i]:\n message += [\"There's a parameter name mismatch in function\"\n \" docstring w.r.t. function signature, at index %s\"\n \" diff: %r != %r\" %\n (i, param_signature[i], param_docs[i])]\n break\n if len(param_signature) > len(param_docs):\n message += [\"Parameters in function docstring have less items w.r.t.\"\n \" function signature, first missing item: %s\" %\n param_signature[len(param_docs)]]\n\n elif len(param_signature) < len(param_docs):\n message += [\"Parameters in function docstring have more items w.r.t.\"\n \" function signature, first extra item: %s\" %\n param_docs[len(param_signature)]]\n\n # If there wasn't any difference in the parameters themselves between\n # docstring and signature including having the same length then return\n # empty list\n if len(message) == 0:\n return []\n\n import difflib\n import pprint\n\n param_docs_formatted = pprint.pformat(param_docs).splitlines()\n param_signature_formatted = pprint.pformat(param_signature).splitlines()\n\n message += [\"Full diff:\"]\n\n message.extend(\n line.strip() for line in difflib.ndiff(param_signature_formatted,\n param_docs_formatted)\n )\n\n incorrect.extend(message)\n\n # Prepend function name\n incorrect = ['In function: ' + func_name] + incorrect\n\n return incorrect" + }, + { + "id": "scikit-learn/sklearn.utils._testing/check_skip_network", + "name": "check_skip_network", + "qname": "sklearn.utils._testing.check_skip_network", + "decorators": [], + "parameters": [], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def check_skip_network():\n if int(os.environ.get('SKLEARN_SKIP_NETWORK_TESTS', 0)):\n raise SkipTest(\"Text tutorial requires large dataset download\")" + }, + { + "id": "scikit-learn/sklearn.utils._testing/create_memmap_backed_data", + "name": "create_memmap_backed_data", + "qname": "sklearn.utils._testing.create_memmap_backed_data", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/create_memmap_backed_data/data", + "name": "data", + "qname": "sklearn.utils._testing.create_memmap_backed_data.data", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/create_memmap_backed_data/mmap_mode", + "name": "mmap_mode", + "qname": "sklearn.utils._testing.create_memmap_backed_data.mmap_mode", + "default_value": "'r'", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "'r'", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/create_memmap_backed_data/return_folder", + "name": "return_folder", + "qname": "sklearn.utils._testing.create_memmap_backed_data.return_folder", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "Parameters\n----------\ndata\nmmap_mode : str, default='r'\nreturn_folder : bool, default=False", + "code": "def create_memmap_backed_data(data, mmap_mode='r', return_folder=False):\n \"\"\"\n Parameters\n ----------\n data\n mmap_mode : str, default='r'\n return_folder : bool, default=False\n \"\"\"\n temp_folder = tempfile.mkdtemp(prefix='sklearn_testing_')\n atexit.register(functools.partial(_delete_folder, temp_folder, warn=True))\n filename = op.join(temp_folder, 'data.pkl')\n joblib.dump(data, filename)\n memmap_backed_data = joblib.load(filename, mmap_mode=mmap_mode)\n result = (memmap_backed_data if not return_folder\n else (memmap_backed_data, temp_folder))\n return result" + }, + { + "id": "scikit-learn/sklearn.utils._testing/ignore_warnings", + "name": "ignore_warnings", + "qname": "sklearn.utils._testing.ignore_warnings", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/ignore_warnings/obj", + "name": "obj", + "qname": "sklearn.utils._testing.ignore_warnings.obj", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "None", + "description": "callable where you want to ignore the warnings." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/ignore_warnings/category", + "name": "category", + "qname": "sklearn.utils._testing.ignore_warnings.category", + "default_value": "Warning", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "warning class", + "default_value": "Warning", + "description": "The category to filter. If Warning, all categories will be muted." + }, + "type": { + "kind": "NamedType", + "name": "warning class" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Context manager and decorator to ignore warnings.\n\nNote: Using this (in both variants) will clear all warnings\nfrom all python modules loaded. In case you need to test\ncross-module-warning-logging, this is not your tool of choice.", + "docstring": "Context manager and decorator to ignore warnings.\n\nNote: Using this (in both variants) will clear all warnings\nfrom all python modules loaded. In case you need to test\ncross-module-warning-logging, this is not your tool of choice.\n\nParameters\n----------\nobj : callable, default=None\n callable where you want to ignore the warnings.\ncategory : warning class, default=Warning\n The category to filter. If Warning, all categories will be muted.\n\nExamples\n--------\n>>> with ignore_warnings():\n... warnings.warn('buhuhuhu')\n\n>>> def nasty_warn():\n... warnings.warn('buhuhuhu')\n... print(42)\n\n>>> ignore_warnings(nasty_warn)()\n42", + "code": "def ignore_warnings(obj=None, category=Warning):\n \"\"\"Context manager and decorator to ignore warnings.\n\n Note: Using this (in both variants) will clear all warnings\n from all python modules loaded. In case you need to test\n cross-module-warning-logging, this is not your tool of choice.\n\n Parameters\n ----------\n obj : callable, default=None\n callable where you want to ignore the warnings.\n category : warning class, default=Warning\n The category to filter. If Warning, all categories will be muted.\n\n Examples\n --------\n >>> with ignore_warnings():\n ... warnings.warn('buhuhuhu')\n\n >>> def nasty_warn():\n ... warnings.warn('buhuhuhu')\n ... print(42)\n\n >>> ignore_warnings(nasty_warn)()\n 42\n \"\"\"\n if isinstance(obj, type) and issubclass(obj, Warning):\n # Avoid common pitfall of passing category as the first positional\n # argument which result in the test not being run\n warning_name = obj.__name__\n raise ValueError(\n \"'obj' should be a callable where you want to ignore warnings. \"\n \"You passed a warning class instead: 'obj={warning_name}'. \"\n \"If you want to pass a warning class to ignore_warnings, \"\n \"you should use 'category={warning_name}'\".format(\n warning_name=warning_name))\n elif callable(obj):\n return _IgnoreWarnings(category=category)(obj)\n else:\n return _IgnoreWarnings(category=category)" + }, + { + "id": "scikit-learn/sklearn.utils._testing/raises", + "name": "raises", + "qname": "sklearn.utils._testing.raises", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/raises/expected_exc_type", + "name": "expected_exc_type", + "qname": "sklearn.utils._testing.raises.expected_exc_type", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils._testing/raises/match", + "name": "match", + "qname": "sklearn.utils._testing.raises.match", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str or list of str", + "default_value": "None", + "description": "A regex that the exception message should match. If a list, one of\nthe entries must match. If None, match isn't enforced." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "list of str" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/raises/may_pass", + "name": "may_pass", + "qname": "sklearn.utils._testing.raises.may_pass", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, the block is allowed to not raise an exception. Useful in\ncases where some estimators may support a feature but others must\nfail with an appropriate error message. By default, the context\nmanager will raise an exception if the block does not raise an\nexception." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/raises/err_msg", + "name": "err_msg", + "qname": "sklearn.utils._testing.raises.err_msg", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "If the context manager fails (e.g. the block fails to raise the\nproper exception, or fails to match), then an AssertionError is\nraised with this message. By default, an AssertionError is raised\nwith a default error message (depends on the kind of failure). Use\nthis to indicate how users should fix their estimators to pass the\nchecks." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Context manager to ensure exceptions are raised within a code block.\n\nThis is similar to and inspired from pytest.raises, but supports a few\nother cases.\n\nThis is only intended to be used in estimator_checks.py where we don't\nwant to use pytest. In the rest of the code base, just use pytest.raises\ninstead.", + "docstring": "Context manager to ensure exceptions are raised within a code block.\n\nThis is similar to and inspired from pytest.raises, but supports a few\nother cases.\n\nThis is only intended to be used in estimator_checks.py where we don't\nwant to use pytest. In the rest of the code base, just use pytest.raises\ninstead.\n\nParameters\n----------\nexcepted_exc_type : Exception or list of Exception\n The exception that should be raised by the block. If a list, the block\n should raise one of the exceptions.\nmatch : str or list of str, default=None\n A regex that the exception message should match. If a list, one of\n the entries must match. If None, match isn't enforced.\nmay_pass : bool, default=False\n If True, the block is allowed to not raise an exception. Useful in\n cases where some estimators may support a feature but others must\n fail with an appropriate error message. By default, the context\n manager will raise an exception if the block does not raise an\n exception.\nerr_msg : str, default=None\n If the context manager fails (e.g. the block fails to raise the\n proper exception, or fails to match), then an AssertionError is\n raised with this message. By default, an AssertionError is raised\n with a default error message (depends on the kind of failure). Use\n this to indicate how users should fix their estimators to pass the\n checks.\n\nAttributes\n----------\nraised_and_matched : bool\n True if an exception was raised and a match was found, False otherwise.", + "code": "def raises(expected_exc_type, match=None, may_pass=False, err_msg=None):\n \"\"\"Context manager to ensure exceptions are raised within a code block.\n\n This is similar to and inspired from pytest.raises, but supports a few\n other cases.\n\n This is only intended to be used in estimator_checks.py where we don't\n want to use pytest. In the rest of the code base, just use pytest.raises\n instead.\n\n Parameters\n ----------\n excepted_exc_type : Exception or list of Exception\n The exception that should be raised by the block. If a list, the block\n should raise one of the exceptions.\n match : str or list of str, default=None\n A regex that the exception message should match. If a list, one of\n the entries must match. If None, match isn't enforced.\n may_pass : bool, default=False\n If True, the block is allowed to not raise an exception. Useful in\n cases where some estimators may support a feature but others must\n fail with an appropriate error message. By default, the context\n manager will raise an exception if the block does not raise an\n exception.\n err_msg : str, default=None\n If the context manager fails (e.g. the block fails to raise the\n proper exception, or fails to match), then an AssertionError is\n raised with this message. By default, an AssertionError is raised\n with a default error message (depends on the kind of failure). Use\n this to indicate how users should fix their estimators to pass the\n checks.\n\n Attributes\n ----------\n raised_and_matched : bool\n True if an exception was raised and a match was found, False otherwise.\n \"\"\"\n return _Raises(expected_exc_type, match, may_pass, err_msg)" + }, + { + "id": "scikit-learn/sklearn.utils._testing/set_random_state", + "name": "set_random_state", + "qname": "sklearn.utils._testing.set_random_state", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils._testing/set_random_state/estimator", + "name": "estimator", + "qname": "sklearn.utils._testing.set_random_state.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "The estimator." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.utils._testing/set_random_state/random_state", + "name": "random_state", + "qname": "sklearn.utils._testing.set_random_state.random_state", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "0", + "description": "Pseudo random number generator state.\nPass an int for reproducible results across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Set random state of an estimator if it has the `random_state` param.", + "docstring": "Set random state of an estimator if it has the `random_state` param.\n\nParameters\n----------\nestimator : object\n The estimator.\nrandom_state : int, RandomState instance or None, default=0\n Pseudo random number generator state.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.", + "code": "def set_random_state(estimator, random_state=0):\n \"\"\"Set random state of an estimator if it has the `random_state` param.\n\n Parameters\n ----------\n estimator : object\n The estimator.\n random_state : int, RandomState instance or None, default=0\n Pseudo random number generator state.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n \"\"\"\n if \"random_state\" in estimator.get_params():\n estimator.set_params(random_state=random_state)" + }, + { + "id": "scikit-learn/sklearn.utils.class_weight/compute_class_weight", + "name": "compute_class_weight", + "qname": "sklearn.utils.class_weight.compute_class_weight", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.class_weight/compute_class_weight/class_weight", + "name": "class_weight", + "qname": "sklearn.utils.class_weight.compute_class_weight.class_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "dict, 'balanced' or None", + "default_value": "", + "description": "If 'balanced', class weights will be given by\n``n_samples / (n_classes * np.bincount(y))``.\nIf a dictionary is given, keys are classes and values\nare corresponding class weights.\nIf None is given, the class weights will be uniform." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "dict" + }, + { + "kind": "NamedType", + "name": "'balanced'" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.class_weight/compute_class_weight/classes", + "name": "classes", + "qname": "sklearn.utils.class_weight.compute_class_weight.classes", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "Array of the classes occurring in the data, as given by\n``np.unique(y_org)`` with ``y_org`` the original class labels." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.utils.class_weight/compute_class_weight/y", + "name": "y", + "qname": "sklearn.utils.class_weight.compute_class_weight.y", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "", + "description": "Array of original class labels per sample." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Estimate class weights for unbalanced datasets.", + "docstring": "Estimate class weights for unbalanced datasets.\n\nParameters\n----------\nclass_weight : dict, 'balanced' or None\n If 'balanced', class weights will be given by\n ``n_samples / (n_classes * np.bincount(y))``.\n If a dictionary is given, keys are classes and values\n are corresponding class weights.\n If None is given, the class weights will be uniform.\n\nclasses : ndarray\n Array of the classes occurring in the data, as given by\n ``np.unique(y_org)`` with ``y_org`` the original class labels.\n\ny : array-like of shape (n_samples,)\n Array of original class labels per sample.\n\nReturns\n-------\nclass_weight_vect : ndarray of shape (n_classes,)\n Array with class_weight_vect[i] the weight for i-th class.\n\nReferences\n----------\nThe \"balanced\" heuristic is inspired by\nLogistic Regression in Rare Events Data, King, Zen, 2001.", + "code": "@_deprecate_positional_args\ndef compute_class_weight(class_weight, *, classes, y):\n \"\"\"Estimate class weights for unbalanced datasets.\n\n Parameters\n ----------\n class_weight : dict, 'balanced' or None\n If 'balanced', class weights will be given by\n ``n_samples / (n_classes * np.bincount(y))``.\n If a dictionary is given, keys are classes and values\n are corresponding class weights.\n If None is given, the class weights will be uniform.\n\n classes : ndarray\n Array of the classes occurring in the data, as given by\n ``np.unique(y_org)`` with ``y_org`` the original class labels.\n\n y : array-like of shape (n_samples,)\n Array of original class labels per sample.\n\n Returns\n -------\n class_weight_vect : ndarray of shape (n_classes,)\n Array with class_weight_vect[i] the weight for i-th class.\n\n References\n ----------\n The \"balanced\" heuristic is inspired by\n Logistic Regression in Rare Events Data, King, Zen, 2001.\n \"\"\"\n # Import error caused by circular imports.\n from ..preprocessing import LabelEncoder\n\n if set(y) - set(classes):\n raise ValueError(\"classes should include all valid labels that can \"\n \"be in y\")\n if class_weight is None or len(class_weight) == 0:\n # uniform class weights\n weight = np.ones(classes.shape[0], dtype=np.float64, order='C')\n elif class_weight == 'balanced':\n # Find the weight of each class as present in y.\n le = LabelEncoder()\n y_ind = le.fit_transform(y)\n if not all(np.in1d(classes, le.classes_)):\n raise ValueError(\"classes should have valid labels that are in y\")\n\n recip_freq = len(y) / (len(le.classes_) *\n np.bincount(y_ind).astype(np.float64))\n weight = recip_freq[le.transform(classes)]\n else:\n # user-defined dictionary\n weight = np.ones(classes.shape[0], dtype=np.float64, order='C')\n if not isinstance(class_weight, dict):\n raise ValueError(\"class_weight must be dict, 'balanced', or None,\"\n \" got: %r\" % class_weight)\n for c in class_weight:\n i = np.searchsorted(classes, c)\n if i >= len(classes) or classes[i] != c:\n raise ValueError(\"Class label {} not present.\".format(c))\n else:\n weight[i] = class_weight[c]\n\n return weight" + }, + { + "id": "scikit-learn/sklearn.utils.class_weight/compute_sample_weight", + "name": "compute_sample_weight", + "qname": "sklearn.utils.class_weight.compute_sample_weight", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.class_weight/compute_sample_weight/class_weight", + "name": "class_weight", + "qname": "sklearn.utils.class_weight.compute_sample_weight.class_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "dict, list of dicts, \"balanced\", or None", + "default_value": "", + "description": "Weights associated with classes in the form ``{class_label: weight}``.\nIf not given, all classes are supposed to have weight one. For\nmulti-output problems, a list of dicts can be provided in the same\norder as the columns of y.\n\nNote that for multioutput (including multilabel) weights should be\ndefined for each class of every column in its own dict. For example,\nfor four-class multilabel classification weights should be\n[{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n[{1:1}, {2:5}, {3:1}, {4:1}].\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data:\n``n_samples / (n_classes * np.bincount(y))``.\n\nFor multi-output, the weights of each column of y will be multiplied." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "dict" + }, + { + "kind": "NamedType", + "name": "list of dicts" + }, + { + "kind": "NamedType", + "name": "\"balanced\"" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.class_weight/compute_sample_weight/y", + "name": "y", + "qname": "sklearn.utils.class_weight.compute_sample_weight.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "Array of original class labels per sample." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.utils.class_weight/compute_sample_weight/indices", + "name": "indices", + "qname": "sklearn.utils.class_weight.compute_sample_weight.indices", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_subsample,)", + "default_value": "None", + "description": "Array of indices to be used in a subsample. Can be of length less than\nn_samples in the case of a subsample, or equal to n_samples in the\ncase of a bootstrap subsample with repeated indices. If None, the\nsample weight will be calculated over the full sample. Only \"balanced\"\nis supported for class_weight if this is provided." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_subsample,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Estimate sample weights by class for unbalanced datasets.", + "docstring": "Estimate sample weights by class for unbalanced datasets.\n\nParameters\n----------\nclass_weight : dict, list of dicts, \"balanced\", or None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one. For\n multi-output problems, a list of dicts can be provided in the same\n order as the columns of y.\n\n Note that for multioutput (including multilabel) weights should be\n defined for each class of every column in its own dict. For example,\n for four-class multilabel classification weights should be\n [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n [{1:1}, {2:5}, {3:1}, {4:1}].\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data:\n ``n_samples / (n_classes * np.bincount(y))``.\n\n For multi-output, the weights of each column of y will be multiplied.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Array of original class labels per sample.\n\nindices : array-like of shape (n_subsample,), default=None\n Array of indices to be used in a subsample. Can be of length less than\n n_samples in the case of a subsample, or equal to n_samples in the\n case of a bootstrap subsample with repeated indices. If None, the\n sample weight will be calculated over the full sample. Only \"balanced\"\n is supported for class_weight if this is provided.\n\nReturns\n-------\nsample_weight_vect : ndarray of shape (n_samples,)\n Array with sample weights as applied to the original y.", + "code": "@_deprecate_positional_args\ndef compute_sample_weight(class_weight, y, *, indices=None):\n \"\"\"Estimate sample weights by class for unbalanced datasets.\n\n Parameters\n ----------\n class_weight : dict, list of dicts, \"balanced\", or None\n Weights associated with classes in the form ``{class_label: weight}``.\n If not given, all classes are supposed to have weight one. For\n multi-output problems, a list of dicts can be provided in the same\n order as the columns of y.\n\n Note that for multioutput (including multilabel) weights should be\n defined for each class of every column in its own dict. For example,\n for four-class multilabel classification weights should be\n [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n [{1:1}, {2:5}, {3:1}, {4:1}].\n\n The \"balanced\" mode uses the values of y to automatically adjust\n weights inversely proportional to class frequencies in the input data:\n ``n_samples / (n_classes * np.bincount(y))``.\n\n For multi-output, the weights of each column of y will be multiplied.\n\n y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Array of original class labels per sample.\n\n indices : array-like of shape (n_subsample,), default=None\n Array of indices to be used in a subsample. Can be of length less than\n n_samples in the case of a subsample, or equal to n_samples in the\n case of a bootstrap subsample with repeated indices. If None, the\n sample weight will be calculated over the full sample. Only \"balanced\"\n is supported for class_weight if this is provided.\n\n Returns\n -------\n sample_weight_vect : ndarray of shape (n_samples,)\n Array with sample weights as applied to the original y.\n \"\"\"\n\n y = np.atleast_1d(y)\n if y.ndim == 1:\n y = np.reshape(y, (-1, 1))\n n_outputs = y.shape[1]\n\n if isinstance(class_weight, str):\n if class_weight not in ['balanced']:\n raise ValueError('The only valid preset for class_weight is '\n '\"balanced\". Given \"%s\".' % class_weight)\n elif (indices is not None and\n not isinstance(class_weight, str)):\n raise ValueError('The only valid class_weight for subsampling is '\n '\"balanced\". Given \"%s\".' % class_weight)\n elif n_outputs > 1:\n if (not hasattr(class_weight, \"__iter__\") or\n isinstance(class_weight, dict)):\n raise ValueError(\"For multi-output, class_weight should be a \"\n \"list of dicts, or a valid string.\")\n if len(class_weight) != n_outputs:\n raise ValueError(\"For multi-output, number of elements in \"\n \"class_weight should match number of outputs.\")\n\n expanded_class_weight = []\n for k in range(n_outputs):\n\n y_full = y[:, k]\n classes_full = np.unique(y_full)\n classes_missing = None\n\n if class_weight == 'balanced' or n_outputs == 1:\n class_weight_k = class_weight\n else:\n class_weight_k = class_weight[k]\n\n if indices is not None:\n # Get class weights for the subsample, covering all classes in\n # case some labels that were present in the original data are\n # missing from the sample.\n y_subsample = y[indices, k]\n classes_subsample = np.unique(y_subsample)\n\n weight_k = np.take(compute_class_weight(class_weight_k,\n classes=classes_subsample,\n y=y_subsample),\n np.searchsorted(classes_subsample,\n classes_full),\n mode='clip')\n\n classes_missing = set(classes_full) - set(classes_subsample)\n else:\n weight_k = compute_class_weight(class_weight_k,\n classes=classes_full,\n y=y_full)\n\n weight_k = weight_k[np.searchsorted(classes_full, y_full)]\n\n if classes_missing:\n # Make missing classes' weight zero\n weight_k[np.in1d(y_full, list(classes_missing))] = 0.\n\n expanded_class_weight.append(weight_k)\n\n expanded_class_weight = np.prod(expanded_class_weight,\n axis=0,\n dtype=np.float64)\n\n return expanded_class_weight" + }, + { + "id": "scikit-learn/sklearn.utils.deprecation/_is_deprecated", + "name": "_is_deprecated", + "qname": "sklearn.utils.deprecation._is_deprecated", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.deprecation/_is_deprecated/func", + "name": "func", + "qname": "sklearn.utils.deprecation._is_deprecated.func", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Helper to check if func is wrapped by our deprecated decorator", + "docstring": "Helper to check if func is wrapped by our deprecated decorator", + "code": "def _is_deprecated(func):\n \"\"\"Helper to check if func is wrapped by our deprecated decorator\"\"\"\n closures = getattr(func, '__closure__', [])\n if closures is None:\n closures = []\n is_deprecated = ('deprecated' in ''.join([c.cell_contents\n for c in closures\n if isinstance(c.cell_contents, str)]))\n return is_deprecated" + }, + { + "id": "scikit-learn/sklearn.utils.deprecation/deprecated/__call__", + "name": "__call__", + "qname": "sklearn.utils.deprecation.deprecated.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.deprecation/deprecated/__call__/self", + "name": "self", + "qname": "sklearn.utils.deprecation.deprecated.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.deprecation/deprecated/__call__/obj", + "name": "obj", + "qname": "sklearn.utils.deprecation.deprecated.__call__.obj", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "object", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "object" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Call method", + "docstring": "Call method\n\nParameters\n----------\nobj : object", + "code": " def __call__(self, obj):\n \"\"\"Call method\n\n Parameters\n ----------\n obj : object\n \"\"\"\n if isinstance(obj, type):\n return self._decorate_class(obj)\n elif isinstance(obj, property):\n # Note that this is only triggered properly if the `property`\n # decorator comes before the `deprecated` decorator, like so:\n #\n # @deprecated(msg)\n # @property\n # def deprecated_attribute_(self):\n # ...\n return self._decorate_property(obj)\n else:\n return self._decorate_fun(obj)" + }, + { + "id": "scikit-learn/sklearn.utils.deprecation/deprecated/__init__", + "name": "__init__", + "qname": "sklearn.utils.deprecation.deprecated.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.deprecation/deprecated/__init__/self", + "name": "self", + "qname": "sklearn.utils.deprecation.deprecated.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.deprecation/deprecated/__init__/extra", + "name": "extra", + "qname": "sklearn.utils.deprecation.deprecated.__init__.extra", + "default_value": "''", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "str", + "default_value": "''", + "description": "To be added to the deprecation messages." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Decorator to mark a function or class as deprecated.\n\nIssue a warning when the function is called/the class is instantiated and\nadds a warning to the docstring.\n\nThe optional extra argument will be appended to the deprecation message\nand the docstring. Note: to use this with the default value for extra, put\nin an empty of parentheses:\n\n>>> from sklearn.utils import deprecated\n>>> deprecated()\n\n\n>>> @deprecated()\n... def some_function(): pass", + "docstring": "", + "code": " def __init__(self, extra=''):\n self.extra = extra" + }, + { + "id": "scikit-learn/sklearn.utils.deprecation/deprecated/_decorate_class", + "name": "_decorate_class", + "qname": "sklearn.utils.deprecation.deprecated._decorate_class", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.deprecation/deprecated/_decorate_class/self", + "name": "self", + "qname": "sklearn.utils.deprecation.deprecated._decorate_class.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.deprecation/deprecated/_decorate_class/cls", + "name": "cls", + "qname": "sklearn.utils.deprecation.deprecated._decorate_class.cls", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _decorate_class(self, cls):\n msg = \"Class %s is deprecated\" % cls.__name__\n if self.extra:\n msg += \"; %s\" % self.extra\n\n # FIXME: we should probably reset __new__ for full generality\n init = cls.__init__\n\n def wrapped(*args, **kwargs):\n warnings.warn(msg, category=FutureWarning)\n return init(*args, **kwargs)\n cls.__init__ = wrapped\n\n wrapped.__name__ = '__init__'\n wrapped.__doc__ = self._update_doc(init.__doc__)\n wrapped.deprecated_original = init\n\n return cls" + }, + { + "id": "scikit-learn/sklearn.utils.deprecation/deprecated/_decorate_fun", + "name": "_decorate_fun", + "qname": "sklearn.utils.deprecation.deprecated._decorate_fun", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.deprecation/deprecated/_decorate_fun/self", + "name": "self", + "qname": "sklearn.utils.deprecation.deprecated._decorate_fun.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.deprecation/deprecated/_decorate_fun/fun", + "name": "fun", + "qname": "sklearn.utils.deprecation.deprecated._decorate_fun.fun", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Decorate function fun", + "docstring": "Decorate function fun", + "code": " def _decorate_fun(self, fun):\n \"\"\"Decorate function fun\"\"\"\n\n msg = \"Function %s is deprecated\" % fun.__name__\n if self.extra:\n msg += \"; %s\" % self.extra\n\n @functools.wraps(fun)\n def wrapped(*args, **kwargs):\n warnings.warn(msg, category=FutureWarning)\n return fun(*args, **kwargs)\n\n wrapped.__doc__ = self._update_doc(wrapped.__doc__)\n # Add a reference to the wrapped function so that we can introspect\n # on function arguments in Python 2 (already works in Python 3)\n wrapped.__wrapped__ = fun\n\n return wrapped" + }, + { + "id": "scikit-learn/sklearn.utils.deprecation/deprecated/_decorate_property", + "name": "_decorate_property", + "qname": "sklearn.utils.deprecation.deprecated._decorate_property", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.deprecation/deprecated/_decorate_property/self", + "name": "self", + "qname": "sklearn.utils.deprecation.deprecated._decorate_property.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.deprecation/deprecated/_decorate_property/prop", + "name": "prop", + "qname": "sklearn.utils.deprecation.deprecated._decorate_property.prop", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _decorate_property(self, prop):\n msg = self.extra\n\n @property\n def wrapped(*args, **kwargs):\n warnings.warn(msg, category=FutureWarning)\n return prop.fget(*args, **kwargs)\n\n return wrapped" + }, + { + "id": "scikit-learn/sklearn.utils.deprecation/deprecated/_update_doc", + "name": "_update_doc", + "qname": "sklearn.utils.deprecation.deprecated._update_doc", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.deprecation/deprecated/_update_doc/self", + "name": "self", + "qname": "sklearn.utils.deprecation.deprecated._update_doc.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.deprecation/deprecated/_update_doc/olddoc", + "name": "olddoc", + "qname": "sklearn.utils.deprecation.deprecated._update_doc.olddoc", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _update_doc(self, olddoc):\n newdoc = \"DEPRECATED\"\n if self.extra:\n newdoc = \"%s: %s\" % (newdoc, self.extra)\n if olddoc:\n newdoc = \"%s\\n\\n %s\" % (newdoc, olddoc)\n return newdoc" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_NotAnArray/__array__", + "name": "__array__", + "qname": "sklearn.utils.estimator_checks._NotAnArray.__array__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_NotAnArray/__array__/self", + "name": "self", + "qname": "sklearn.utils.estimator_checks._NotAnArray.__array__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_NotAnArray/__array__/dtype", + "name": "dtype", + "qname": "sklearn.utils.estimator_checks._NotAnArray.__array__.dtype", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __array__(self, dtype=None):\n return self.data" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_NotAnArray/__array_function__", + "name": "__array_function__", + "qname": "sklearn.utils.estimator_checks._NotAnArray.__array_function__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_NotAnArray/__array_function__/self", + "name": "self", + "qname": "sklearn.utils.estimator_checks._NotAnArray.__array_function__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_NotAnArray/__array_function__/func", + "name": "func", + "qname": "sklearn.utils.estimator_checks._NotAnArray.__array_function__.func", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_NotAnArray/__array_function__/types", + "name": "types", + "qname": "sklearn.utils.estimator_checks._NotAnArray.__array_function__.types", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_NotAnArray/__array_function__/args", + "name": "args", + "qname": "sklearn.utils.estimator_checks._NotAnArray.__array_function__.args", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_NotAnArray/__array_function__/kwargs", + "name": "kwargs", + "qname": "sklearn.utils.estimator_checks._NotAnArray.__array_function__.kwargs", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __array_function__(self, func, types, args, kwargs):\n if func.__name__ == \"may_share_memory\":\n return True\n raise TypeError(\"Don't want to call array_function {}!\".format(\n func.__name__))" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_NotAnArray/__init__", + "name": "__init__", + "qname": "sklearn.utils.estimator_checks._NotAnArray.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_NotAnArray/__init__/self", + "name": "self", + "qname": "sklearn.utils.estimator_checks._NotAnArray.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_NotAnArray/__init__/data", + "name": "data", + "qname": "sklearn.utils.estimator_checks._NotAnArray.__init__.data", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "The data." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "An object that is convertible to an array.", + "docstring": "", + "code": " def __init__(self, data):\n self.data = np.asarray(data)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_apply_on_subsets", + "name": "_apply_on_subsets", + "qname": "sklearn.utils.estimator_checks._apply_on_subsets", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_apply_on_subsets/func", + "name": "func", + "qname": "sklearn.utils.estimator_checks._apply_on_subsets.func", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_apply_on_subsets/X", + "name": "X", + "qname": "sklearn.utils.estimator_checks._apply_on_subsets.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _apply_on_subsets(func, X):\n # apply function on the whole set and on mini batches\n result_full = func(X)\n n_features = X.shape[1]\n result_by_batch = [func(batch.reshape(1, n_features))\n for batch in X]\n\n # func can output tuple (e.g. score_samples)\n if type(result_full) == tuple:\n result_full = result_full[0]\n result_by_batch = list(map(lambda x: x[0], result_by_batch))\n\n if sparse.issparse(result_full):\n result_full = result_full.A\n result_by_batch = [x.A for x in result_by_batch]\n\n return np.ravel(result_full), np.ravel(result_by_batch)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_check_transformer", + "name": "_check_transformer", + "qname": "sklearn.utils.estimator_checks._check_transformer", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_check_transformer/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks._check_transformer.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_check_transformer/transformer_orig", + "name": "transformer_orig", + "qname": "sklearn.utils.estimator_checks._check_transformer.transformer_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_check_transformer/X", + "name": "X", + "qname": "sklearn.utils.estimator_checks._check_transformer.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_check_transformer/y", + "name": "y", + "qname": "sklearn.utils.estimator_checks._check_transformer.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _check_transformer(name, transformer_orig, X, y):\n n_samples, n_features = np.asarray(X).shape\n transformer = clone(transformer_orig)\n set_random_state(transformer)\n\n # fit\n\n if name in CROSS_DECOMPOSITION:\n y_ = np.c_[np.asarray(y), np.asarray(y)]\n y_[::2, 1] *= 2\n if isinstance(X, _NotAnArray):\n y_ = _NotAnArray(y_)\n else:\n y_ = y\n\n transformer.fit(X, y_)\n # fit_transform method should work on non fitted estimator\n transformer_clone = clone(transformer)\n X_pred = transformer_clone.fit_transform(X, y=y_)\n\n if isinstance(X_pred, tuple):\n for x_pred in X_pred:\n assert x_pred.shape[0] == n_samples\n else:\n # check for consistent n_samples\n assert X_pred.shape[0] == n_samples\n\n if hasattr(transformer, 'transform'):\n if name in CROSS_DECOMPOSITION:\n X_pred2 = transformer.transform(X, y_)\n X_pred3 = transformer.fit_transform(X, y=y_)\n else:\n X_pred2 = transformer.transform(X)\n X_pred3 = transformer.fit_transform(X, y=y_)\n\n if _safe_tags(transformer_orig, key='non_deterministic'):\n msg = name + ' is non deterministic'\n raise SkipTest(msg)\n if isinstance(X_pred, tuple) and isinstance(X_pred2, tuple):\n for x_pred, x_pred2, x_pred3 in zip(X_pred, X_pred2, X_pred3):\n assert_allclose_dense_sparse(\n x_pred, x_pred2, atol=1e-2,\n err_msg=\"fit_transform and transform outcomes \"\n \"not consistent in %s\"\n % transformer)\n assert_allclose_dense_sparse(\n x_pred, x_pred3, atol=1e-2,\n err_msg=\"consecutive fit_transform outcomes \"\n \"not consistent in %s\"\n % transformer)\n else:\n assert_allclose_dense_sparse(\n X_pred, X_pred2,\n err_msg=\"fit_transform and transform outcomes \"\n \"not consistent in %s\"\n % transformer, atol=1e-2)\n assert_allclose_dense_sparse(\n X_pred, X_pred3, atol=1e-2,\n err_msg=\"consecutive fit_transform outcomes \"\n \"not consistent in %s\"\n % transformer)\n assert _num_samples(X_pred2) == n_samples\n assert _num_samples(X_pred3) == n_samples\n\n # raises error on malformed input for transform\n if hasattr(X, 'shape') and \\\n not _safe_tags(transformer, key=\"stateless\") and \\\n X.ndim == 2 and X.shape[1] > 1:\n\n # If it's not an array, it does not have a 'T' property\n with raises(\n ValueError,\n err_msg=f\"The transformer {name} does not raise an error \"\n \"when the number of features in transform is different from \"\n \"the number of features in fit.\"\n ):\n transformer.transform(X[:, :-1])" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_choose_check_classifiers_labels", + "name": "_choose_check_classifiers_labels", + "qname": "sklearn.utils.estimator_checks._choose_check_classifiers_labels", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_choose_check_classifiers_labels/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks._choose_check_classifiers_labels.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_choose_check_classifiers_labels/y", + "name": "y", + "qname": "sklearn.utils.estimator_checks._choose_check_classifiers_labels.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_choose_check_classifiers_labels/y_names", + "name": "y_names", + "qname": "sklearn.utils.estimator_checks._choose_check_classifiers_labels.y_names", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _choose_check_classifiers_labels(name, y, y_names):\n # Semisupervised classifers use -1 as the indicator for an unlabeled\n # sample.\n return y if name in [\"LabelPropagation\",\n \"LabelSpreading\",\n \"SelfTrainingClassifier\"] else y_names" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_construct_instance", + "name": "_construct_instance", + "qname": "sklearn.utils.estimator_checks._construct_instance", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_construct_instance/Estimator", + "name": "Estimator", + "qname": "sklearn.utils.estimator_checks._construct_instance.Estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Construct Estimator instance if possible.", + "docstring": "Construct Estimator instance if possible.", + "code": "def _construct_instance(Estimator):\n \"\"\"Construct Estimator instance if possible.\"\"\"\n required_parameters = getattr(Estimator, \"_required_parameters\", [])\n if len(required_parameters):\n if required_parameters in ([\"estimator\"], [\"base_estimator\"]):\n if issubclass(Estimator, RegressorMixin):\n estimator = Estimator(Ridge())\n else:\n estimator = Estimator(LogisticRegression(C=1))\n elif required_parameters in (['estimators'],):\n # Heterogeneous ensemble classes (i.e. stacking, voting)\n if issubclass(Estimator, RegressorMixin):\n estimator = Estimator(estimators=[\n (\"est1\", Ridge(alpha=0.1)),\n (\"est2\", Ridge(alpha=1))\n ])\n else:\n estimator = Estimator(estimators=[\n (\"est1\", LogisticRegression(C=0.1)),\n (\"est2\", LogisticRegression(C=1))\n ])\n else:\n msg = (f\"Can't instantiate estimator {Estimator.__name__} \"\n f\"parameters {required_parameters}\")\n # raise additional warning to be shown by pytest\n warnings.warn(msg, SkipTestWarning)\n raise SkipTest(msg)\n else:\n estimator = Estimator()\n return estimator" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_enforce_estimator_tags_x", + "name": "_enforce_estimator_tags_x", + "qname": "sklearn.utils.estimator_checks._enforce_estimator_tags_x", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_enforce_estimator_tags_x/estimator", + "name": "estimator", + "qname": "sklearn.utils.estimator_checks._enforce_estimator_tags_x.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_enforce_estimator_tags_x/X", + "name": "X", + "qname": "sklearn.utils.estimator_checks._enforce_estimator_tags_x.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _enforce_estimator_tags_x(estimator, X):\n # Pairwise estimators only accept\n # X of shape (`n_samples`, `n_samples`)\n if _is_pairwise(estimator):\n X = X.dot(X.T)\n # Estimators with `1darray` in `X_types` tag only accept\n # X of shape (`n_samples`,)\n if '1darray' in _safe_tags(estimator, key='X_types'):\n X = X[:, 0]\n # Estimators with a `requires_positive_X` tag only accept\n # strictly positive data\n if _safe_tags(estimator, key='requires_positive_X'):\n X -= X.min()\n return X" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_enforce_estimator_tags_y", + "name": "_enforce_estimator_tags_y", + "qname": "sklearn.utils.estimator_checks._enforce_estimator_tags_y", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_enforce_estimator_tags_y/estimator", + "name": "estimator", + "qname": "sklearn.utils.estimator_checks._enforce_estimator_tags_y.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_enforce_estimator_tags_y/y", + "name": "y", + "qname": "sklearn.utils.estimator_checks._enforce_estimator_tags_y.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _enforce_estimator_tags_y(estimator, y):\n # Estimators with a `requires_positive_y` tag only accept strictly positive\n # data\n if _safe_tags(estimator, key=\"requires_positive_y\"):\n # Create strictly positive y. The minimal increment above 0 is 1, as\n # y could be of integer dtype.\n y += 1 + abs(y.min())\n # Estimators with a `binary_only` tag only accept up to two unique y values\n if _safe_tags(estimator, key=\"binary_only\") and y.size > 0:\n y = np.where(y == y.flat[0], y, y.flat[0] + 1)\n # Estimators in mono_output_task_error raise ValueError if y is of 1-D\n # Convert into a 2-D y for those estimators.\n if _safe_tags(estimator, key=\"multioutput_only\"):\n return np.reshape(y, (-1, 1))\n return y" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_generate_sparse_matrix", + "name": "_generate_sparse_matrix", + "qname": "sklearn.utils.estimator_checks._generate_sparse_matrix", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_generate_sparse_matrix/X_csr", + "name": "X_csr", + "qname": "sklearn.utils.estimator_checks._generate_sparse_matrix.X_csr", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate sparse matrices with {32,64}bit indices of diverse format.", + "docstring": "Generate sparse matrices with {32,64}bit indices of diverse format.\n\nParameters\n----------\nX_csr: CSR Matrix\n Input matrix in CSR format.\n\nReturns\n-------\nout: iter(Matrices)\n In format['dok', 'lil', 'dia', 'bsr', 'csr', 'csc', 'coo',\n 'coo_64', 'csc_64', 'csr_64']", + "code": "def _generate_sparse_matrix(X_csr):\n \"\"\"Generate sparse matrices with {32,64}bit indices of diverse format.\n\n Parameters\n ----------\n X_csr: CSR Matrix\n Input matrix in CSR format.\n\n Returns\n -------\n out: iter(Matrices)\n In format['dok', 'lil', 'dia', 'bsr', 'csr', 'csc', 'coo',\n 'coo_64', 'csc_64', 'csr_64']\n \"\"\"\n\n assert X_csr.format == 'csr'\n yield 'csr', X_csr.copy()\n for sparse_format in ['dok', 'lil', 'dia', 'bsr', 'csc', 'coo']:\n yield sparse_format, X_csr.asformat(sparse_format)\n\n # Generate large indices matrix only if its supported by scipy\n X_coo = X_csr.asformat('coo')\n X_coo.row = X_coo.row.astype('int64')\n X_coo.col = X_coo.col.astype('int64')\n yield \"coo_64\", X_coo\n\n for sparse_format in ['csc', 'csr']:\n X = X_csr.asformat(sparse_format)\n X.indices = X.indices.astype('int64')\n X.indptr = X.indptr.astype('int64')\n yield sparse_format + \"_64\", X" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_get_check_estimator_ids", + "name": "_get_check_estimator_ids", + "qname": "sklearn.utils.estimator_checks._get_check_estimator_ids", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_get_check_estimator_ids/obj", + "name": "obj", + "qname": "sklearn.utils.estimator_checks._get_check_estimator_ids.obj", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "estimator or function", + "default_value": "", + "description": "Items generated by `check_estimator`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "estimator" + }, + { + "kind": "NamedType", + "name": "function" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Create pytest ids for checks.\n\nWhen `obj` is an estimator, this returns the pprint version of the\nestimator (with `print_changed_only=True`). When `obj` is a function, the\nname of the function is returned with its keyword arguments.\n\n`_get_check_estimator_ids` is designed to be used as the `id` in\n`pytest.mark.parametrize` where `check_estimator(..., generate_only=True)`\nis yielding estimators and checks.", + "docstring": "Create pytest ids for checks.\n\nWhen `obj` is an estimator, this returns the pprint version of the\nestimator (with `print_changed_only=True`). When `obj` is a function, the\nname of the function is returned with its keyword arguments.\n\n`_get_check_estimator_ids` is designed to be used as the `id` in\n`pytest.mark.parametrize` where `check_estimator(..., generate_only=True)`\nis yielding estimators and checks.\n\nParameters\n----------\nobj : estimator or function\n Items generated by `check_estimator`.\n\nReturns\n-------\nid : str or None\n\nSee Also\n--------\ncheck_estimator", + "code": "def _get_check_estimator_ids(obj):\n \"\"\"Create pytest ids for checks.\n\n When `obj` is an estimator, this returns the pprint version of the\n estimator (with `print_changed_only=True`). When `obj` is a function, the\n name of the function is returned with its keyword arguments.\n\n `_get_check_estimator_ids` is designed to be used as the `id` in\n `pytest.mark.parametrize` where `check_estimator(..., generate_only=True)`\n is yielding estimators and checks.\n\n Parameters\n ----------\n obj : estimator or function\n Items generated by `check_estimator`.\n\n Returns\n -------\n id : str or None\n\n See Also\n --------\n check_estimator\n \"\"\"\n if callable(obj):\n if not isinstance(obj, partial):\n return obj.__name__\n\n if not obj.keywords:\n return obj.func.__name__\n\n kwstring = \",\".join([\"{}={}\".format(k, v)\n for k, v in obj.keywords.items()])\n return \"{}({})\".format(obj.func.__name__, kwstring)\n if hasattr(obj, \"get_params\"):\n with config_context(print_changed_only=True):\n return re.sub(r\"\\s\", \"\", str(obj))" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_is_pairwise_metric", + "name": "_is_pairwise_metric", + "qname": "sklearn.utils.estimator_checks._is_pairwise_metric", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_is_pairwise_metric/estimator", + "name": "estimator", + "qname": "sklearn.utils.estimator_checks._is_pairwise_metric.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Estimator object to test." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns True if estimator accepts pairwise metric.", + "docstring": "Returns True if estimator accepts pairwise metric.\n\nParameters\n----------\nestimator : object\n Estimator object to test.\n\nReturns\n-------\nout : bool\n True if _pairwise is set to True and False otherwise.", + "code": "def _is_pairwise_metric(estimator):\n \"\"\"Returns True if estimator accepts pairwise metric.\n\n Parameters\n ----------\n estimator : object\n Estimator object to test.\n\n Returns\n -------\n out : bool\n True if _pairwise is set to True and False otherwise.\n \"\"\"\n metric = getattr(estimator, \"metric\", None)\n\n return bool(metric == 'precomputed')" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_is_public_parameter", + "name": "_is_public_parameter", + "qname": "sklearn.utils.estimator_checks._is_public_parameter", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_is_public_parameter/attr", + "name": "attr", + "qname": "sklearn.utils.estimator_checks._is_public_parameter.attr", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _is_public_parameter(attr):\n return not (attr.startswith('_') or attr.endswith('_'))" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_maybe_mark_xfail", + "name": "_maybe_mark_xfail", + "qname": "sklearn.utils.estimator_checks._maybe_mark_xfail", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_maybe_mark_xfail/estimator", + "name": "estimator", + "qname": "sklearn.utils.estimator_checks._maybe_mark_xfail.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_maybe_mark_xfail/check", + "name": "check", + "qname": "sklearn.utils.estimator_checks._maybe_mark_xfail.check", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_maybe_mark_xfail/pytest", + "name": "pytest", + "qname": "sklearn.utils.estimator_checks._maybe_mark_xfail.pytest", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _maybe_mark_xfail(estimator, check, pytest):\n # Mark (estimator, check) pairs as XFAIL if needed (see conditions in\n # _should_be_skipped_or_marked())\n # This is similar to _maybe_skip(), but this one is used by\n # @parametrize_with_checks() instead of check_estimator()\n\n should_be_marked, reason = _should_be_skipped_or_marked(estimator, check)\n if not should_be_marked:\n return estimator, check\n else:\n return pytest.param(estimator, check,\n marks=pytest.mark.xfail(reason=reason))" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_maybe_skip", + "name": "_maybe_skip", + "qname": "sklearn.utils.estimator_checks._maybe_skip", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_maybe_skip/estimator", + "name": "estimator", + "qname": "sklearn.utils.estimator_checks._maybe_skip.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_maybe_skip/check", + "name": "check", + "qname": "sklearn.utils.estimator_checks._maybe_skip.check", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _maybe_skip(estimator, check):\n # Wrap a check so that it's skipped if needed (see conditions in\n # _should_be_skipped_or_marked())\n # This is similar to _maybe_mark_xfail(), but this one is used by\n # check_estimator() instead of @parametrize_with_checks which requires\n # pytest\n should_be_skipped, reason = _should_be_skipped_or_marked(estimator, check)\n if not should_be_skipped:\n return check\n\n check_name = (check.func.__name__ if isinstance(check, partial)\n else check.__name__)\n\n @wraps(check)\n def wrapped(*args, **kwargs):\n raise SkipTest(\n f\"Skipping {check_name} for {estimator.__class__.__name__}: \"\n f\"{reason}\"\n )\n\n return wrapped" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_pairwise_estimator_convert_X", + "name": "_pairwise_estimator_convert_X", + "qname": "sklearn.utils.estimator_checks._pairwise_estimator_convert_X", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_pairwise_estimator_convert_X/X", + "name": "X", + "qname": "sklearn.utils.estimator_checks._pairwise_estimator_convert_X.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_pairwise_estimator_convert_X/estimator", + "name": "estimator", + "qname": "sklearn.utils.estimator_checks._pairwise_estimator_convert_X.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_pairwise_estimator_convert_X/kernel", + "name": "kernel", + "qname": "sklearn.utils.estimator_checks._pairwise_estimator_convert_X.kernel", + "default_value": "linear_kernel", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _pairwise_estimator_convert_X(X, estimator, kernel=linear_kernel):\n\n if _is_pairwise_metric(estimator):\n return pairwise_distances(X, metric='euclidean')\n if _is_pairwise(estimator):\n return kernel(X, X)\n\n return X" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_regression_dataset", + "name": "_regression_dataset", + "qname": "sklearn.utils.estimator_checks._regression_dataset", + "decorators": [], + "parameters": [], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _regression_dataset():\n global REGRESSION_DATASET\n if REGRESSION_DATASET is None:\n X, y = make_regression(\n n_samples=200, n_features=10, n_informative=1,\n bias=5.0, noise=20, random_state=42,\n )\n X = StandardScaler().fit_transform(X)\n REGRESSION_DATASET = X, y\n return REGRESSION_DATASET" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_set_checking_parameters", + "name": "_set_checking_parameters", + "qname": "sklearn.utils.estimator_checks._set_checking_parameters", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_set_checking_parameters/estimator", + "name": "estimator", + "qname": "sklearn.utils.estimator_checks._set_checking_parameters.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _set_checking_parameters(estimator):\n # set parameters to speed up some estimators and\n # avoid deprecated behaviour\n params = estimator.get_params()\n name = estimator.__class__.__name__\n if (\"n_iter\" in params and name != \"TSNE\"):\n estimator.set_params(n_iter=5)\n if \"max_iter\" in params:\n if estimator.max_iter is not None:\n estimator.set_params(max_iter=min(5, estimator.max_iter))\n # LinearSVR, LinearSVC\n if estimator.__class__.__name__ in ['LinearSVR', 'LinearSVC']:\n estimator.set_params(max_iter=20)\n # NMF\n if estimator.__class__.__name__ == 'NMF':\n # FIXME : init should be removed in 1.1\n estimator.set_params(max_iter=500, init='nndsvda')\n # MLP\n if estimator.__class__.__name__ in ['MLPClassifier', 'MLPRegressor']:\n estimator.set_params(max_iter=100)\n if \"n_resampling\" in params:\n # randomized lasso\n estimator.set_params(n_resampling=5)\n if \"n_estimators\" in params:\n estimator.set_params(n_estimators=min(5, estimator.n_estimators))\n if \"max_trials\" in params:\n # RANSAC\n estimator.set_params(max_trials=10)\n if \"n_init\" in params:\n # K-Means\n estimator.set_params(n_init=2)\n\n if name == 'TruncatedSVD':\n # TruncatedSVD doesn't run with n_components = n_features\n # This is ugly :-/\n estimator.n_components = 1\n\n if hasattr(estimator, \"n_clusters\"):\n estimator.n_clusters = min(estimator.n_clusters, 2)\n\n if hasattr(estimator, \"n_best\"):\n estimator.n_best = 1\n\n if name == \"SelectFdr\":\n # be tolerant of noisy datasets (not actually speed)\n estimator.set_params(alpha=.5)\n\n if name == \"TheilSenRegressor\":\n estimator.max_subpopulation = 100\n\n if isinstance(estimator, BaseRandomProjection):\n # Due to the jl lemma and often very few samples, the number\n # of components of the random matrix projection will be probably\n # greater than the number of features.\n # So we impose a smaller number (avoid \"auto\" mode)\n estimator.set_params(n_components=2)\n\n if isinstance(estimator, SelectKBest):\n # SelectKBest has a default of k=10\n # which is more feature than we have in most case.\n estimator.set_params(k=1)\n\n if name in ('HistGradientBoostingClassifier',\n 'HistGradientBoostingRegressor'):\n # The default min_samples_leaf (20) isn't appropriate for small\n # datasets (only very shallow trees are built) that the checks use.\n estimator.set_params(min_samples_leaf=5)\n\n if name == 'DummyClassifier':\n # the default strategy prior would output constant predictions and fail\n # for check_classifiers_predictions\n estimator.set_params(strategy='stratified')\n\n # Speed-up by reducing the number of CV or splits for CV estimators\n loo_cv = ['RidgeCV']\n if name not in loo_cv and hasattr(estimator, 'cv'):\n estimator.set_params(cv=3)\n if hasattr(estimator, 'n_splits'):\n estimator.set_params(n_splits=3)\n\n if name == 'OneHotEncoder':\n estimator.set_params(handle_unknown='ignore')" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_should_be_skipped_or_marked", + "name": "_should_be_skipped_or_marked", + "qname": "sklearn.utils.estimator_checks._should_be_skipped_or_marked", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_should_be_skipped_or_marked/estimator", + "name": "estimator", + "qname": "sklearn.utils.estimator_checks._should_be_skipped_or_marked.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_should_be_skipped_or_marked/check", + "name": "check", + "qname": "sklearn.utils.estimator_checks._should_be_skipped_or_marked.check", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _should_be_skipped_or_marked(estimator, check):\n # Return whether a check should be skipped (when using check_estimator())\n # or marked as XFAIL (when using @parametrize_with_checks()), along with a\n # reason.\n # Currently, a check should be skipped or marked if\n # the check is in the _xfail_checks tag of the estimator\n\n check_name = (check.func.__name__ if isinstance(check, partial)\n else check.__name__)\n\n xfail_checks = _safe_tags(estimator, key='_xfail_checks') or {}\n if check_name in xfail_checks:\n return True, xfail_checks[check_name]\n\n return False, 'placeholder reason that will never be used'" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_yield_all_checks", + "name": "_yield_all_checks", + "qname": "sklearn.utils.estimator_checks._yield_all_checks", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_yield_all_checks/estimator", + "name": "estimator", + "qname": "sklearn.utils.estimator_checks._yield_all_checks.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _yield_all_checks(estimator):\n name = estimator.__class__.__name__\n tags = _safe_tags(estimator)\n if \"2darray\" not in tags[\"X_types\"]:\n warnings.warn(\"Can't test estimator {} which requires input \"\n \" of type {}\".format(name, tags[\"X_types\"]),\n SkipTestWarning)\n return\n if tags[\"_skip_test\"]:\n warnings.warn(\"Explicit SKIP via _skip_test tag for estimator \"\n \"{}.\".format(name),\n SkipTestWarning)\n return\n\n for check in _yield_checks(estimator):\n yield check\n if is_classifier(estimator):\n for check in _yield_classifier_checks(estimator):\n yield check\n if is_regressor(estimator):\n for check in _yield_regressor_checks(estimator):\n yield check\n if hasattr(estimator, 'transform'):\n for check in _yield_transformer_checks(estimator):\n yield check\n if isinstance(estimator, ClusterMixin):\n for check in _yield_clustering_checks(estimator):\n yield check\n if is_outlier_detector(estimator):\n for check in _yield_outliers_checks(estimator):\n yield check\n yield check_parameters_default_constructible\n yield check_methods_sample_order_invariance\n yield check_methods_subset_invariance\n yield check_fit2d_1sample\n yield check_fit2d_1feature\n yield check_get_params_invariance\n yield check_set_params\n yield check_dict_unchanged\n yield check_dont_overwrite_parameters\n yield check_fit_idempotent\n if not tags[\"no_validation\"]:\n yield check_n_features_in\n yield check_fit1d\n yield check_fit2d_predict1d\n if tags[\"requires_y\"]:\n yield check_requires_y_none\n if tags[\"requires_positive_X\"]:\n yield check_fit_non_negative" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_yield_checks", + "name": "_yield_checks", + "qname": "sklearn.utils.estimator_checks._yield_checks", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_yield_checks/estimator", + "name": "estimator", + "qname": "sklearn.utils.estimator_checks._yield_checks.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _yield_checks(estimator):\n name = estimator.__class__.__name__\n tags = _safe_tags(estimator)\n pairwise = _is_pairwise(estimator)\n\n yield check_no_attributes_set_in_init\n yield check_estimators_dtypes\n yield check_fit_score_takes_y\n yield check_sample_weights_pandas_series\n yield check_sample_weights_not_an_array\n yield check_sample_weights_list\n yield check_sample_weights_shape\n if has_fit_parameter(estimator, \"sample_weight\") and not pairwise:\n # We skip pairwise because the data is not pairwise\n yield partial(check_sample_weights_invariance, kind='ones')\n yield partial(check_sample_weights_invariance, kind='zeros')\n yield check_estimators_fit_returns_self\n yield partial(check_estimators_fit_returns_self, readonly_memmap=True)\n\n # Check that all estimator yield informative messages when\n # trained on empty datasets\n if not tags[\"no_validation\"]:\n yield check_complex_data\n yield check_dtype_object\n yield check_estimators_empty_data_messages\n\n if name not in CROSS_DECOMPOSITION:\n # cross-decomposition's \"transform\" returns X and Y\n yield check_pipeline_consistency\n\n if not tags[\"allow_nan\"] and not tags[\"no_validation\"]:\n # Test that all estimators check their input for NaN's and infs\n yield check_estimators_nan_inf\n\n if pairwise:\n # Check that pairwise estimator throws error on non-square input\n yield check_nonsquare_error\n\n yield check_estimators_overwrite_params\n if hasattr(estimator, 'sparsify'):\n yield check_sparsify_coefficients\n\n yield check_estimator_sparse_data\n\n # Test that estimators can be pickled, and once pickled\n # give the same answer as before.\n yield check_estimators_pickle\n\n yield check_estimator_get_tags_default_keys" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_yield_classifier_checks", + "name": "_yield_classifier_checks", + "qname": "sklearn.utils.estimator_checks._yield_classifier_checks", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_yield_classifier_checks/classifier", + "name": "classifier", + "qname": "sklearn.utils.estimator_checks._yield_classifier_checks.classifier", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _yield_classifier_checks(classifier):\n tags = _safe_tags(classifier)\n\n # test classifiers can handle non-array data and pandas objects\n yield check_classifier_data_not_an_array\n # test classifiers trained on a single label always return this label\n yield check_classifiers_one_label\n yield check_classifiers_classes\n yield check_estimators_partial_fit_n_features\n if tags[\"multioutput\"]:\n yield check_classifier_multioutput\n # basic consistency testing\n yield check_classifiers_train\n yield partial(check_classifiers_train, readonly_memmap=True)\n yield partial(check_classifiers_train, readonly_memmap=True,\n X_dtype='float32')\n yield check_classifiers_regression_target\n if tags[\"multilabel\"]:\n yield check_classifiers_multilabel_representation_invariance\n if not tags[\"no_validation\"]:\n yield check_supervised_y_no_nan\n if not tags['multioutput_only']:\n yield check_supervised_y_2d\n if tags[\"requires_fit\"]:\n yield check_estimators_unfitted\n if 'class_weight' in classifier.get_params().keys():\n yield check_class_weight_classifiers\n\n yield check_non_transformer_estimators_n_iter\n # test if predict_proba is a monotonic transformation of decision_function\n yield check_decision_proba_consistency" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_yield_clustering_checks", + "name": "_yield_clustering_checks", + "qname": "sklearn.utils.estimator_checks._yield_clustering_checks", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_yield_clustering_checks/clusterer", + "name": "clusterer", + "qname": "sklearn.utils.estimator_checks._yield_clustering_checks.clusterer", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _yield_clustering_checks(clusterer):\n yield check_clusterer_compute_labels_predict\n name = clusterer.__class__.__name__\n if name not in ('WardAgglomeration', \"FeatureAgglomeration\"):\n # this is clustering on the features\n # let's not test that here.\n yield check_clustering\n yield partial(check_clustering, readonly_memmap=True)\n yield check_estimators_partial_fit_n_features\n yield check_non_transformer_estimators_n_iter" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_yield_outliers_checks", + "name": "_yield_outliers_checks", + "qname": "sklearn.utils.estimator_checks._yield_outliers_checks", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_yield_outliers_checks/estimator", + "name": "estimator", + "qname": "sklearn.utils.estimator_checks._yield_outliers_checks.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _yield_outliers_checks(estimator):\n\n # checks for outlier detectors that have a fit_predict method\n if hasattr(estimator, 'fit_predict'):\n yield check_outliers_fit_predict\n\n # checks for estimators that can be used on a test set\n if hasattr(estimator, 'predict'):\n yield check_outliers_train\n yield partial(check_outliers_train, readonly_memmap=True)\n # test outlier detectors can handle non-array data\n yield check_classifier_data_not_an_array\n # test if NotFittedError is raised\n if _safe_tags(estimator, key=\"requires_fit\"):\n yield check_estimators_unfitted" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_yield_regressor_checks", + "name": "_yield_regressor_checks", + "qname": "sklearn.utils.estimator_checks._yield_regressor_checks", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_yield_regressor_checks/regressor", + "name": "regressor", + "qname": "sklearn.utils.estimator_checks._yield_regressor_checks.regressor", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _yield_regressor_checks(regressor):\n tags = _safe_tags(regressor)\n # TODO: test with intercept\n # TODO: test with multiple responses\n # basic testing\n yield check_regressors_train\n yield partial(check_regressors_train, readonly_memmap=True)\n yield partial(check_regressors_train, readonly_memmap=True,\n X_dtype='float32')\n yield check_regressor_data_not_an_array\n yield check_estimators_partial_fit_n_features\n if tags[\"multioutput\"]:\n yield check_regressor_multioutput\n yield check_regressors_no_decision_function\n if not tags[\"no_validation\"] and not tags['multioutput_only']:\n yield check_supervised_y_2d\n yield check_supervised_y_no_nan\n name = regressor.__class__.__name__\n if name != 'CCA':\n # check that the regressor handles int input\n yield check_regressors_int\n if tags[\"requires_fit\"]:\n yield check_estimators_unfitted\n yield check_non_transformer_estimators_n_iter" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_yield_transformer_checks", + "name": "_yield_transformer_checks", + "qname": "sklearn.utils.estimator_checks._yield_transformer_checks", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/_yield_transformer_checks/transformer", + "name": "transformer", + "qname": "sklearn.utils.estimator_checks._yield_transformer_checks.transformer", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _yield_transformer_checks(transformer):\n tags = _safe_tags(transformer)\n # All transformers should either deal with sparse data or raise an\n # exception with type TypeError and an intelligible error message\n if not tags[\"no_validation\"]:\n yield check_transformer_data_not_an_array\n # these don't actually fit the data, so don't raise errors\n yield check_transformer_general\n if tags[\"preserves_dtype\"]:\n yield check_transformer_preserve_dtypes\n yield partial(check_transformer_general, readonly_memmap=True)\n if not _safe_tags(transformer, key=\"stateless\"):\n yield check_transformers_unfitted\n # Dependent on external solvers and hence accessing the iter\n # param is non-trivial.\n external_solver = ['Isomap', 'KernelPCA', 'LocallyLinearEmbedding',\n 'RandomizedLasso', 'LogisticRegressionCV']\n\n name = transformer.__class__.__name__\n if name not in external_solver:\n yield check_transformer_n_iter" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_class_weight_balanced_classifiers", + "name": "check_class_weight_balanced_classifiers", + "qname": "sklearn.utils.estimator_checks.check_class_weight_balanced_classifiers", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_class_weight_balanced_classifiers/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_class_weight_balanced_classifiers.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_class_weight_balanced_classifiers/classifier_orig", + "name": "classifier_orig", + "qname": "sklearn.utils.estimator_checks.check_class_weight_balanced_classifiers.classifier_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_class_weight_balanced_classifiers/X_train", + "name": "X_train", + "qname": "sklearn.utils.estimator_checks.check_class_weight_balanced_classifiers.X_train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_class_weight_balanced_classifiers/y_train", + "name": "y_train", + "qname": "sklearn.utils.estimator_checks.check_class_weight_balanced_classifiers.y_train", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_class_weight_balanced_classifiers/X_test", + "name": "X_test", + "qname": "sklearn.utils.estimator_checks.check_class_weight_balanced_classifiers.X_test", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_class_weight_balanced_classifiers/y_test", + "name": "y_test", + "qname": "sklearn.utils.estimator_checks.check_class_weight_balanced_classifiers.y_test", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_class_weight_balanced_classifiers/weights", + "name": "weights", + "qname": "sklearn.utils.estimator_checks.check_class_weight_balanced_classifiers.weights", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_class_weight_balanced_classifiers(\n name, classifier_orig, X_train, y_train, X_test, y_test, weights\n):\n classifier = clone(classifier_orig)\n if hasattr(classifier, \"n_iter\"):\n classifier.set_params(n_iter=100)\n if hasattr(classifier, \"max_iter\"):\n classifier.set_params(max_iter=1000)\n\n set_random_state(classifier)\n classifier.fit(X_train, y_train)\n y_pred = classifier.predict(X_test)\n\n classifier.set_params(class_weight='balanced')\n classifier.fit(X_train, y_train)\n y_pred_balanced = classifier.predict(X_test)\n assert (f1_score(y_test, y_pred_balanced, average='weighted') >\n f1_score(y_test, y_pred, average='weighted'))" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_class_weight_balanced_linear_classifier", + "name": "check_class_weight_balanced_linear_classifier", + "qname": "sklearn.utils.estimator_checks.check_class_weight_balanced_linear_classifier", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_class_weight_balanced_linear_classifier/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_class_weight_balanced_linear_classifier.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_class_weight_balanced_linear_classifier/Classifier", + "name": "Classifier", + "qname": "sklearn.utils.estimator_checks.check_class_weight_balanced_linear_classifier.Classifier", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Test class weights with non-contiguous class labels.", + "docstring": "Test class weights with non-contiguous class labels.", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_class_weight_balanced_linear_classifier(name, Classifier):\n \"\"\"Test class weights with non-contiguous class labels.\"\"\"\n # this is run on classes, not instances, though this should be changed\n X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],\n [1.0, 1.0], [1.0, 0.0]])\n y = np.array([1, 1, 1, -1, -1])\n\n classifier = Classifier()\n\n if hasattr(classifier, \"n_iter\"):\n # This is a very small dataset, default n_iter are likely to prevent\n # convergence\n classifier.set_params(n_iter=1000)\n if hasattr(classifier, \"max_iter\"):\n classifier.set_params(max_iter=1000)\n if hasattr(classifier, 'cv'):\n classifier.set_params(cv=3)\n set_random_state(classifier)\n\n # Let the model compute the class frequencies\n classifier.set_params(class_weight='balanced')\n coef_balanced = classifier.fit(X, y).coef_.copy()\n\n # Count each label occurrence to reweight manually\n n_samples = len(y)\n n_classes = float(len(np.unique(y)))\n\n class_weight = {1: n_samples / (np.sum(y == 1) * n_classes),\n -1: n_samples / (np.sum(y == -1) * n_classes)}\n classifier.set_params(class_weight=class_weight)\n coef_manual = classifier.fit(X, y).coef_.copy()\n\n assert_allclose(coef_balanced, coef_manual,\n err_msg=\"Classifier %s is not computing\"\n \" class_weight=balanced properly.\"\n % name)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_class_weight_classifiers", + "name": "check_class_weight_classifiers", + "qname": "sklearn.utils.estimator_checks.check_class_weight_classifiers", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_class_weight_classifiers/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_class_weight_classifiers.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_class_weight_classifiers/classifier_orig", + "name": "classifier_orig", + "qname": "sklearn.utils.estimator_checks.check_class_weight_classifiers.classifier_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_class_weight_classifiers(name, classifier_orig):\n\n if _safe_tags(classifier_orig, key='binary_only'):\n problems = [2]\n else:\n problems = [2, 3]\n\n for n_centers in problems:\n # create a very noisy dataset\n X, y = make_blobs(centers=n_centers, random_state=0, cluster_std=20)\n X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5,\n random_state=0)\n\n # can't use gram_if_pairwise() here, setting up gram matrix manually\n if _is_pairwise(classifier_orig):\n X_test = rbf_kernel(X_test, X_train)\n X_train = rbf_kernel(X_train, X_train)\n\n n_centers = len(np.unique(y_train))\n\n if n_centers == 2:\n class_weight = {0: 1000, 1: 0.0001}\n else:\n class_weight = {0: 1000, 1: 0.0001, 2: 0.0001}\n\n classifier = clone(classifier_orig).set_params(\n class_weight=class_weight)\n if hasattr(classifier, \"n_iter\"):\n classifier.set_params(n_iter=100)\n if hasattr(classifier, \"max_iter\"):\n classifier.set_params(max_iter=1000)\n if hasattr(classifier, \"min_weight_fraction_leaf\"):\n classifier.set_params(min_weight_fraction_leaf=0.01)\n if hasattr(classifier, \"n_iter_no_change\"):\n classifier.set_params(n_iter_no_change=20)\n\n set_random_state(classifier)\n classifier.fit(X_train, y_train)\n y_pred = classifier.predict(X_test)\n # XXX: Generally can use 0.89 here. On Windows, LinearSVC gets\n # 0.88 (Issue #9111)\n if not _safe_tags(classifier_orig, key='poor_score'):\n assert np.mean(y_pred == 0) > 0.87" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifier_data_not_an_array", + "name": "check_classifier_data_not_an_array", + "qname": "sklearn.utils.estimator_checks.check_classifier_data_not_an_array", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifier_data_not_an_array/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_classifier_data_not_an_array.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifier_data_not_an_array/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_classifier_data_not_an_array.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_classifier_data_not_an_array(name, estimator_orig):\n X = np.array([[3, 0], [0, 1], [0, 2], [1, 1], [1, 2], [2, 1],\n [0, 3], [1, 0], [2, 0], [4, 4], [2, 3], [3, 2]])\n X = _pairwise_estimator_convert_X(X, estimator_orig)\n y = np.array([1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2])\n y = _enforce_estimator_tags_y(estimator_orig, y)\n for obj_type in [\"NotAnArray\", \"PandasDataframe\"]:\n check_estimators_data_not_an_array(name, estimator_orig, X, y,\n obj_type)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifier_multioutput", + "name": "check_classifier_multioutput", + "qname": "sklearn.utils.estimator_checks.check_classifier_multioutput", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifier_multioutput/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_classifier_multioutput.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifier_multioutput/estimator", + "name": "estimator", + "qname": "sklearn.utils.estimator_checks.check_classifier_multioutput.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_classifier_multioutput(name, estimator):\n n_samples, n_labels, n_classes = 42, 5, 3\n tags = _safe_tags(estimator)\n estimator = clone(estimator)\n X, y = make_multilabel_classification(random_state=42,\n n_samples=n_samples,\n n_labels=n_labels,\n n_classes=n_classes)\n estimator.fit(X, y)\n y_pred = estimator.predict(X)\n\n assert y_pred.shape == (n_samples, n_classes), (\n \"The shape of the prediction for multioutput data is \"\n \"incorrect. Expected {}, got {}.\"\n .format((n_samples, n_labels), y_pred.shape))\n assert y_pred.dtype.kind == 'i'\n\n if hasattr(estimator, \"decision_function\"):\n decision = estimator.decision_function(X)\n assert isinstance(decision, np.ndarray)\n assert decision.shape == (n_samples, n_classes), (\n \"The shape of the decision function output for \"\n \"multioutput data is incorrect. Expected {}, got {}.\"\n .format((n_samples, n_classes), decision.shape))\n\n dec_pred = (decision > 0).astype(int)\n dec_exp = estimator.classes_[dec_pred]\n assert_array_equal(dec_exp, y_pred)\n\n if hasattr(estimator, \"predict_proba\"):\n y_prob = estimator.predict_proba(X)\n\n if isinstance(y_prob, list) and not tags['poor_score']:\n for i in range(n_classes):\n assert y_prob[i].shape == (n_samples, 2), (\n \"The shape of the probability for multioutput data is\"\n \" incorrect. Expected {}, got {}.\"\n .format((n_samples, 2), y_prob[i].shape))\n assert_array_equal(\n np.argmax(y_prob[i], axis=1).astype(int),\n y_pred[:, i]\n )\n elif not tags['poor_score']:\n assert y_prob.shape == (n_samples, n_classes), (\n \"The shape of the probability for multioutput data is\"\n \" incorrect. Expected {}, got {}.\"\n .format((n_samples, n_classes), y_prob.shape))\n assert_array_equal(y_prob.round().astype(int), y_pred)\n\n if (hasattr(estimator, \"decision_function\") and\n hasattr(estimator, \"predict_proba\")):\n for i in range(n_classes):\n y_proba = estimator.predict_proba(X)[:, i]\n y_decision = estimator.decision_function(X)\n assert_array_equal(rankdata(y_proba), rankdata(y_decision[:, i]))" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_classes", + "name": "check_classifiers_classes", + "qname": "sklearn.utils.estimator_checks.check_classifiers_classes", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_classes/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_classifiers_classes.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_classes/classifier_orig", + "name": "classifier_orig", + "qname": "sklearn.utils.estimator_checks.check_classifiers_classes.classifier_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def check_classifiers_classes(name, classifier_orig):\n X_multiclass, y_multiclass = make_blobs(n_samples=30, random_state=0,\n cluster_std=0.1)\n X_multiclass, y_multiclass = shuffle(X_multiclass, y_multiclass,\n random_state=7)\n X_multiclass = StandardScaler().fit_transform(X_multiclass)\n # We need to make sure that we have non negative data, for things\n # like NMF\n X_multiclass -= X_multiclass.min() - .1\n\n X_binary = X_multiclass[y_multiclass != 2]\n y_binary = y_multiclass[y_multiclass != 2]\n\n X_multiclass = _pairwise_estimator_convert_X(X_multiclass, classifier_orig)\n X_binary = _pairwise_estimator_convert_X(X_binary, classifier_orig)\n\n labels_multiclass = [\"one\", \"two\", \"three\"]\n labels_binary = [\"one\", \"two\"]\n\n y_names_multiclass = np.take(labels_multiclass, y_multiclass)\n y_names_binary = np.take(labels_binary, y_binary)\n\n problems = [(X_binary, y_binary, y_names_binary)]\n if not _safe_tags(classifier_orig, key='binary_only'):\n problems.append((X_multiclass, y_multiclass, y_names_multiclass))\n\n for X, y, y_names in problems:\n for y_names_i in [y_names, y_names.astype('O')]:\n y_ = _choose_check_classifiers_labels(name, y, y_names_i)\n check_classifiers_predictions(X, y_, name, classifier_orig)\n\n labels_binary = [-1, 1]\n y_names_binary = np.take(labels_binary, y_binary)\n y_binary = _choose_check_classifiers_labels(name, y_binary, y_names_binary)\n check_classifiers_predictions(X_binary, y_binary, name, classifier_orig)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_multilabel_representation_invariance", + "name": "check_classifiers_multilabel_representation_invariance", + "qname": "sklearn.utils.estimator_checks.check_classifiers_multilabel_representation_invariance", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_multilabel_representation_invariance/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_classifiers_multilabel_representation_invariance.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_multilabel_representation_invariance/classifier_orig", + "name": "classifier_orig", + "qname": "sklearn.utils.estimator_checks.check_classifiers_multilabel_representation_invariance.classifier_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=(FutureWarning))\ndef check_classifiers_multilabel_representation_invariance(\n name, classifier_orig\n):\n\n X, y = make_multilabel_classification(n_samples=100, n_features=20,\n n_classes=5, n_labels=3,\n length=50, allow_unlabeled=True,\n random_state=0)\n\n X_train, y_train = X[:80], y[:80]\n X_test = X[80:]\n\n y_train_list_of_lists = y_train.tolist()\n y_train_list_of_arrays = list(y_train)\n\n classifier = clone(classifier_orig)\n set_random_state(classifier)\n\n y_pred = classifier.fit(X_train, y_train).predict(X_test)\n\n y_pred_list_of_lists = classifier.fit(\n X_train, y_train_list_of_lists).predict(X_test)\n\n y_pred_list_of_arrays = classifier.fit(\n X_train, y_train_list_of_arrays).predict(X_test)\n\n assert_array_equal(y_pred, y_pred_list_of_arrays)\n assert_array_equal(y_pred, y_pred_list_of_lists)\n\n assert y_pred.dtype == y_pred_list_of_arrays.dtype\n assert y_pred.dtype == y_pred_list_of_lists.dtype\n assert type(y_pred) == type(y_pred_list_of_arrays)\n assert type(y_pred) == type(y_pred_list_of_lists)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_one_label", + "name": "check_classifiers_one_label", + "qname": "sklearn.utils.estimator_checks.check_classifiers_one_label", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_one_label/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_classifiers_one_label.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_one_label/classifier_orig", + "name": "classifier_orig", + "qname": "sklearn.utils.estimator_checks.check_classifiers_one_label.classifier_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_classifiers_one_label(name, classifier_orig):\n error_string_fit = \"Classifier can't train when only one class is present.\"\n error_string_predict = (\"Classifier can't predict when only one class is \"\n \"present.\")\n rnd = np.random.RandomState(0)\n X_train = rnd.uniform(size=(10, 3))\n X_test = rnd.uniform(size=(10, 3))\n y = np.ones(10)\n # catch deprecation warnings\n with ignore_warnings(category=FutureWarning):\n classifier = clone(classifier_orig)\n with raises(\n ValueError, match=\"class\", may_pass=True, err_msg=error_string_fit\n ) as cm:\n classifier.fit(X_train, y)\n\n if cm.raised_and_matched:\n # ValueError was raised with proper error message\n return\n\n assert_array_equal(\n classifier.predict(X_test), y, err_msg=error_string_predict\n )" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_predictions", + "name": "check_classifiers_predictions", + "qname": "sklearn.utils.estimator_checks.check_classifiers_predictions", + "decorators": ["ignore_warnings"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_predictions/X", + "name": "X", + "qname": "sklearn.utils.estimator_checks.check_classifiers_predictions.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_predictions/y", + "name": "y", + "qname": "sklearn.utils.estimator_checks.check_classifiers_predictions.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_predictions/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_classifiers_predictions.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_predictions/classifier_orig", + "name": "classifier_orig", + "qname": "sklearn.utils.estimator_checks.check_classifiers_predictions.classifier_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings\ndef check_classifiers_predictions(X, y, name, classifier_orig):\n classes = np.unique(y)\n classifier = clone(classifier_orig)\n if name == 'BernoulliNB':\n X = X > X.mean()\n set_random_state(classifier)\n\n classifier.fit(X, y)\n y_pred = classifier.predict(X)\n\n if hasattr(classifier, \"decision_function\"):\n decision = classifier.decision_function(X)\n assert isinstance(decision, np.ndarray)\n if len(classes) == 2:\n dec_pred = (decision.ravel() > 0).astype(int)\n dec_exp = classifier.classes_[dec_pred]\n assert_array_equal(dec_exp, y_pred,\n err_msg=\"decision_function does not match \"\n \"classifier for %r: expected '%s', got '%s'\" %\n (classifier, \", \".join(map(str, dec_exp)),\n \", \".join(map(str, y_pred))))\n elif getattr(classifier, 'decision_function_shape', 'ovr') == 'ovr':\n decision_y = np.argmax(decision, axis=1).astype(int)\n y_exp = classifier.classes_[decision_y]\n assert_array_equal(y_exp, y_pred,\n err_msg=\"decision_function does not match \"\n \"classifier for %r: expected '%s', got '%s'\" %\n (classifier, \", \".join(map(str, y_exp)),\n \", \".join(map(str, y_pred))))\n\n # training set performance\n if name != \"ComplementNB\":\n # This is a pathological data set for ComplementNB.\n # For some specific cases 'ComplementNB' predicts less classes\n # than expected\n assert_array_equal(np.unique(y), np.unique(y_pred))\n assert_array_equal(classes, classifier.classes_,\n err_msg=\"Unexpected classes_ attribute for %r: \"\n \"expected '%s', got '%s'\" %\n (classifier, \", \".join(map(str, classes)),\n \", \".join(map(str, classifier.classes_))))" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_regression_target", + "name": "check_classifiers_regression_target", + "qname": "sklearn.utils.estimator_checks.check_classifiers_regression_target", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_regression_target/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_classifiers_regression_target.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_regression_target/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_classifiers_regression_target.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_classifiers_regression_target(name, estimator_orig):\n # Check if classifier throws an exception when fed regression targets\n\n X, y = _regression_dataset()\n\n X = X + 1 + abs(X.min(axis=0)) # be sure that X is non-negative\n e = clone(estimator_orig)\n msg = \"Unknown label type: \"\n if not _safe_tags(e, key=\"no_validation\"):\n with raises(ValueError, match=msg):\n e.fit(X, y)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_train", + "name": "check_classifiers_train", + "qname": "sklearn.utils.estimator_checks.check_classifiers_train", + "decorators": ["ignore_warnings"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_train/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_classifiers_train.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_train/classifier_orig", + "name": "classifier_orig", + "qname": "sklearn.utils.estimator_checks.check_classifiers_train.classifier_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_train/readonly_memmap", + "name": "readonly_memmap", + "qname": "sklearn.utils.estimator_checks.check_classifiers_train.readonly_memmap", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_classifiers_train/X_dtype", + "name": "X_dtype", + "qname": "sklearn.utils.estimator_checks.check_classifiers_train.X_dtype", + "default_value": "'float64'", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings # Warnings are raised by decision function\ndef check_classifiers_train(\n name, classifier_orig, readonly_memmap=False, X_dtype=\"float64\"\n):\n X_m, y_m = make_blobs(n_samples=300, random_state=0)\n X_m = X_m.astype(X_dtype)\n X_m, y_m = shuffle(X_m, y_m, random_state=7)\n X_m = StandardScaler().fit_transform(X_m)\n # generate binary problem from multi-class one\n y_b = y_m[y_m != 2]\n X_b = X_m[y_m != 2]\n\n if name in ['BernoulliNB', 'MultinomialNB', 'ComplementNB',\n 'CategoricalNB']:\n X_m -= X_m.min()\n X_b -= X_b.min()\n\n if readonly_memmap:\n X_m, y_m, X_b, y_b = create_memmap_backed_data([X_m, y_m, X_b, y_b])\n\n problems = [(X_b, y_b)]\n tags = _safe_tags(classifier_orig)\n if not tags['binary_only']:\n problems.append((X_m, y_m))\n\n for (X, y) in problems:\n classes = np.unique(y)\n n_classes = len(classes)\n n_samples, n_features = X.shape\n classifier = clone(classifier_orig)\n X = _pairwise_estimator_convert_X(X, classifier)\n y = _enforce_estimator_tags_y(classifier, y)\n\n set_random_state(classifier)\n # raises error on malformed input for fit\n if not tags[\"no_validation\"]:\n with raises(\n ValueError,\n err_msg=f\"The classifier {name} does not raise an error when \"\n \"incorrect/malformed input data for fit is passed. The number \"\n \"of training examples is not the same as the number of \"\n \"labels. Perhaps use check_X_y in fit.\",\n ):\n classifier.fit(X, y[:-1])\n\n # fit\n classifier.fit(X, y)\n # with lists\n classifier.fit(X.tolist(), y.tolist())\n assert hasattr(classifier, \"classes_\")\n y_pred = classifier.predict(X)\n\n assert y_pred.shape == (n_samples,)\n # training set performance\n if not tags['poor_score']:\n assert accuracy_score(y, y_pred) > 0.83\n\n # raises error on malformed input for predict\n msg_pairwise = (\n \"The classifier {} does not raise an error when shape of X in \"\n \" {} is not equal to (n_test_samples, n_training_samples)\")\n msg = (\"The classifier {} does not raise an error when the number of \"\n \"features in {} is different from the number of features in \"\n \"fit.\")\n\n if not tags[\"no_validation\"]:\n if _is_pairwise(classifier):\n with raises(\n ValueError,\n err_msg=msg_pairwise.format(name, \"predict\"),\n ):\n classifier.predict(X.reshape(-1, 1))\n else:\n with raises(ValueError, err_msg=msg.format(name, \"predict\")):\n classifier.predict(X.T)\n if hasattr(classifier, \"decision_function\"):\n try:\n # decision_function agrees with predict\n decision = classifier.decision_function(X)\n if n_classes == 2:\n if not tags[\"multioutput_only\"]:\n assert decision.shape == (n_samples,)\n else:\n assert decision.shape == (n_samples, 1)\n dec_pred = (decision.ravel() > 0).astype(int)\n assert_array_equal(dec_pred, y_pred)\n else:\n assert decision.shape == (n_samples, n_classes)\n assert_array_equal(np.argmax(decision, axis=1), y_pred)\n\n # raises error on malformed input for decision_function\n if not tags[\"no_validation\"]:\n if _is_pairwise(classifier):\n with raises(\n ValueError,\n err_msg=msg_pairwise.format(\n name, \"decision_function\"\n ),\n ):\n classifier.decision_function(X.reshape(-1, 1))\n else:\n with raises(\n ValueError,\n err_msg=msg.format(name, \"decision_function\"),\n ):\n classifier.decision_function(X.T)\n except NotImplementedError:\n pass\n\n if hasattr(classifier, \"predict_proba\"):\n # predict_proba agrees with predict\n y_prob = classifier.predict_proba(X)\n assert y_prob.shape == (n_samples, n_classes)\n assert_array_equal(np.argmax(y_prob, axis=1), y_pred)\n # check that probas for all classes sum to one\n assert_array_almost_equal(np.sum(y_prob, axis=1),\n np.ones(n_samples))\n if not tags[\"no_validation\"]:\n # raises error on malformed input for predict_proba\n if _is_pairwise(classifier_orig):\n with raises(\n ValueError,\n err_msg=msg_pairwise.format(name, \"predict_proba\"),\n ):\n classifier.predict_proba(X.reshape(-1, 1))\n else:\n with raises(\n ValueError,\n err_msg=msg.format(name, \"predict_proba\"),\n ):\n classifier.predict_proba(X.T)\n if hasattr(classifier, \"predict_log_proba\"):\n # predict_log_proba is a transformation of predict_proba\n y_log_prob = classifier.predict_log_proba(X)\n assert_allclose(y_log_prob, np.log(y_prob), 8, atol=1e-9)\n assert_array_equal(np.argsort(y_log_prob), np.argsort(y_prob))" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_clusterer_compute_labels_predict", + "name": "check_clusterer_compute_labels_predict", + "qname": "sklearn.utils.estimator_checks.check_clusterer_compute_labels_predict", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_clusterer_compute_labels_predict/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_clusterer_compute_labels_predict.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_clusterer_compute_labels_predict/clusterer_orig", + "name": "clusterer_orig", + "qname": "sklearn.utils.estimator_checks.check_clusterer_compute_labels_predict.clusterer_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Check that predict is invariant of compute_labels.", + "docstring": "Check that predict is invariant of compute_labels.", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_clusterer_compute_labels_predict(name, clusterer_orig):\n \"\"\"Check that predict is invariant of compute_labels.\"\"\"\n X, y = make_blobs(n_samples=20, random_state=0)\n clusterer = clone(clusterer_orig)\n set_random_state(clusterer)\n\n if hasattr(clusterer, \"compute_labels\"):\n # MiniBatchKMeans\n X_pred1 = clusterer.fit(X).predict(X)\n clusterer.set_params(compute_labels=False)\n X_pred2 = clusterer.fit(X).predict(X)\n assert_array_equal(X_pred1, X_pred2)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_clustering", + "name": "check_clustering", + "qname": "sklearn.utils.estimator_checks.check_clustering", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_clustering/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_clustering.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_clustering/clusterer_orig", + "name": "clusterer_orig", + "qname": "sklearn.utils.estimator_checks.check_clustering.clusterer_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_clustering/readonly_memmap", + "name": "readonly_memmap", + "qname": "sklearn.utils.estimator_checks.check_clustering.readonly_memmap", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_clustering(name, clusterer_orig, readonly_memmap=False):\n clusterer = clone(clusterer_orig)\n X, y = make_blobs(n_samples=50, random_state=1)\n X, y = shuffle(X, y, random_state=7)\n X = StandardScaler().fit_transform(X)\n rng = np.random.RandomState(7)\n X_noise = np.concatenate([X, rng.uniform(low=-3, high=3, size=(5, 2))])\n\n if readonly_memmap:\n X, y, X_noise = create_memmap_backed_data([X, y, X_noise])\n\n n_samples, n_features = X.shape\n # catch deprecation and neighbors warnings\n if hasattr(clusterer, \"n_clusters\"):\n clusterer.set_params(n_clusters=3)\n set_random_state(clusterer)\n if name == 'AffinityPropagation':\n clusterer.set_params(preference=-100)\n clusterer.set_params(max_iter=100)\n\n # fit\n clusterer.fit(X)\n # with lists\n clusterer.fit(X.tolist())\n\n pred = clusterer.labels_\n assert pred.shape == (n_samples,)\n assert adjusted_rand_score(pred, y) > 0.4\n if _safe_tags(clusterer, key='non_deterministic'):\n return\n set_random_state(clusterer)\n with warnings.catch_warnings(record=True):\n pred2 = clusterer.fit_predict(X)\n assert_array_equal(pred, pred2)\n\n # fit_predict(X) and labels_ should be of type int\n assert pred.dtype in [np.dtype('int32'), np.dtype('int64')]\n assert pred2.dtype in [np.dtype('int32'), np.dtype('int64')]\n\n # Add noise to X to test the possible values of the labels\n labels = clusterer.fit_predict(X_noise)\n\n # There should be at least one sample in every cluster. Equivalently\n # labels_ should contain all the consecutive values between its\n # min and its max.\n labels_sorted = np.unique(labels)\n assert_array_equal(labels_sorted, np.arange(labels_sorted[0],\n labels_sorted[-1] + 1))\n\n # Labels are expected to start at 0 (no noise) or -1 (if noise)\n assert labels_sorted[0] in [0, -1]\n # Labels should be less than n_clusters - 1\n if hasattr(clusterer, 'n_clusters'):\n n_clusters = getattr(clusterer, 'n_clusters')\n assert n_clusters - 1 >= labels_sorted[-1]" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_complex_data", + "name": "check_complex_data", + "qname": "sklearn.utils.estimator_checks.check_complex_data", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_complex_data/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_complex_data.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_complex_data/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_complex_data.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def check_complex_data(name, estimator_orig):\n # check that estimators raise an exception on providing complex data\n X = np.random.sample(10) + 1j * np.random.sample(10)\n X = X.reshape(-1, 1)\n y = np.random.sample(10) + 1j * np.random.sample(10)\n estimator = clone(estimator_orig)\n with raises(ValueError, match=\"Complex data not supported\"):\n estimator.fit(X, y)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_decision_proba_consistency", + "name": "check_decision_proba_consistency", + "qname": "sklearn.utils.estimator_checks.check_decision_proba_consistency", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_decision_proba_consistency/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_decision_proba_consistency.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_decision_proba_consistency/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_decision_proba_consistency.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_decision_proba_consistency(name, estimator_orig):\n # Check whether an estimator having both decision_function and\n # predict_proba methods has outputs with perfect rank correlation.\n\n centers = [(2, 2), (4, 4)]\n X, y = make_blobs(n_samples=100, random_state=0, n_features=4,\n centers=centers, cluster_std=1.0, shuffle=True)\n X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,\n random_state=0)\n estimator = clone(estimator_orig)\n\n if (hasattr(estimator, \"decision_function\") and\n hasattr(estimator, \"predict_proba\")):\n\n estimator.fit(X_train, y_train)\n # Since the link function from decision_function() to predict_proba()\n # is sometimes not precise enough (typically expit), we round to the\n # 10th decimal to avoid numerical issues: we compare the rank\n # with deterministic ties rather than get platform specific rank\n # inversions in case of machine level differences.\n a = estimator.predict_proba(X_test)[:, 1].round(decimals=10)\n b = estimator.decision_function(X_test).round(decimals=10)\n assert_array_equal(rankdata(a), rankdata(b))" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_dict_unchanged", + "name": "check_dict_unchanged", + "qname": "sklearn.utils.estimator_checks.check_dict_unchanged", + "decorators": ["ignore_warnings"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_dict_unchanged/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_dict_unchanged.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_dict_unchanged/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_dict_unchanged.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings\ndef check_dict_unchanged(name, estimator_orig):\n # this estimator raises\n # ValueError: Found array with 0 feature(s) (shape=(23, 0))\n # while a minimum of 1 is required.\n # error\n if name in ['SpectralCoclustering']:\n return\n rnd = np.random.RandomState(0)\n if name in ['RANSACRegressor']:\n X = 3 * rnd.uniform(size=(20, 3))\n else:\n X = 2 * rnd.uniform(size=(20, 3))\n\n X = _pairwise_estimator_convert_X(X, estimator_orig)\n\n y = X[:, 0].astype(int)\n estimator = clone(estimator_orig)\n y = _enforce_estimator_tags_y(estimator, y)\n if hasattr(estimator, \"n_components\"):\n estimator.n_components = 1\n\n if hasattr(estimator, \"n_clusters\"):\n estimator.n_clusters = 1\n\n if hasattr(estimator, \"n_best\"):\n estimator.n_best = 1\n\n set_random_state(estimator, 1)\n\n estimator.fit(X, y)\n for method in [\"predict\", \"transform\", \"decision_function\",\n \"predict_proba\"]:\n if hasattr(estimator, method):\n dict_before = estimator.__dict__.copy()\n getattr(estimator, method)(X)\n assert estimator.__dict__ == dict_before, (\n 'Estimator changes __dict__ during %s' % method)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_dont_overwrite_parameters", + "name": "check_dont_overwrite_parameters", + "qname": "sklearn.utils.estimator_checks.check_dont_overwrite_parameters", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_dont_overwrite_parameters/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_dont_overwrite_parameters.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_dont_overwrite_parameters/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_dont_overwrite_parameters.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_dont_overwrite_parameters(name, estimator_orig):\n # check that fit method only changes or sets private attributes\n if hasattr(estimator_orig.__init__, \"deprecated_original\"):\n # to not check deprecated classes\n return\n estimator = clone(estimator_orig)\n rnd = np.random.RandomState(0)\n X = 3 * rnd.uniform(size=(20, 3))\n X = _pairwise_estimator_convert_X(X, estimator_orig)\n y = X[:, 0].astype(int)\n y = _enforce_estimator_tags_y(estimator, y)\n\n if hasattr(estimator, \"n_components\"):\n estimator.n_components = 1\n if hasattr(estimator, \"n_clusters\"):\n estimator.n_clusters = 1\n\n set_random_state(estimator, 1)\n dict_before_fit = estimator.__dict__.copy()\n estimator.fit(X, y)\n\n dict_after_fit = estimator.__dict__\n\n public_keys_after_fit = [key for key in dict_after_fit.keys()\n if _is_public_parameter(key)]\n\n attrs_added_by_fit = [key for key in public_keys_after_fit\n if key not in dict_before_fit.keys()]\n\n # check that fit doesn't add any public attribute\n assert not attrs_added_by_fit, (\n 'Estimator adds public attribute(s) during' ' the fit method.'\n ' Estimators are only allowed to add private attributes'\n ' either started with _ or ended'\n ' with _ but %s added'\n % ', '.join(attrs_added_by_fit))\n\n # check that fit doesn't change any public attribute\n attrs_changed_by_fit = [key for key in public_keys_after_fit\n if (dict_before_fit[key]\n is not dict_after_fit[key])]\n\n assert not attrs_changed_by_fit, (\n 'Estimator changes public attribute(s) during'\n ' the fit method. Estimators are only allowed'\n ' to change attributes started'\n ' or ended with _, but'\n ' %s changed'\n % ', '.join(attrs_changed_by_fit))" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_dtype_object", + "name": "check_dtype_object", + "qname": "sklearn.utils.estimator_checks.check_dtype_object", + "decorators": ["ignore_warnings(category=(FutureWarning, UserWarning))"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_dtype_object/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_dtype_object.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_dtype_object/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_dtype_object.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=(FutureWarning, UserWarning))\ndef check_dtype_object(name, estimator_orig):\n # check that estimators treat dtype object as numeric if possible\n rng = np.random.RandomState(0)\n X = _pairwise_estimator_convert_X(rng.rand(40, 10), estimator_orig)\n X = X.astype(object)\n tags = _safe_tags(estimator_orig)\n y = (X[:, 0] * 4).astype(int)\n estimator = clone(estimator_orig)\n y = _enforce_estimator_tags_y(estimator, y)\n\n estimator.fit(X, y)\n if hasattr(estimator, \"predict\"):\n estimator.predict(X)\n\n if hasattr(estimator, \"transform\"):\n estimator.transform(X)\n\n with raises(Exception, match=\"Unknown label type\", may_pass=True):\n estimator.fit(X, y.astype(object))\n\n if 'string' not in tags['X_types']:\n X[0, 0] = {'foo': 'bar'}\n msg = \"argument must be a string.* number\"\n with raises(TypeError, match=msg):\n estimator.fit(X, y)\n else:\n # Estimators supporting string will not call np.asarray to convert the\n # data to numeric and therefore, the error will not be raised.\n # Checking for each element dtype in the input array will be costly.\n # Refer to #11401 for full discussion.\n estimator.fit(X, y)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimator", + "name": "check_estimator", + "qname": "sklearn.utils.estimator_checks.check_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimator/Estimator", + "name": "Estimator", + "qname": "sklearn.utils.estimator_checks.check_estimator.Estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "estimator object", + "default_value": "", + "description": "Estimator instance to check.\n\n.. versionchanged:: 0.24\n Passing a class was deprecated in version 0.23, and support for\n classes was removed in 0.24." + }, + "type": { + "kind": "NamedType", + "name": "estimator object" + } + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimator/generate_only", + "name": "generate_only", + "qname": "sklearn.utils.estimator_checks.check_estimator.generate_only", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When `False`, checks are evaluated when `check_estimator` is called.\nWhen `True`, `check_estimator` returns a generator that yields\n(estimator, check) tuples. The check is run by calling\n`check(estimator)`.\n\n.. versionadded:: 0.22" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Check if estimator adheres to scikit-learn conventions.\n\nThis estimator will run an extensive test-suite for input validation,\nshapes, etc, making sure that the estimator complies with `scikit-learn`\nconventions as detailed in :ref:`rolling_your_own_estimator`.\nAdditional tests for classifiers, regressors, clustering or transformers\nwill be run if the Estimator class inherits from the corresponding mixin\nfrom sklearn.base.\n\nSetting `generate_only=True` returns a generator that yields (estimator,\ncheck) tuples where the check can be called independently from each\nother, i.e. `check(estimator)`. This allows all checks to be run\nindependently and report the checks that are failing.\n\nscikit-learn provides a pytest specific decorator,\n:func:`~sklearn.utils.parametrize_with_checks`, making it easier to test\nmultiple estimators.", + "docstring": "Check if estimator adheres to scikit-learn conventions.\n\nThis estimator will run an extensive test-suite for input validation,\nshapes, etc, making sure that the estimator complies with `scikit-learn`\nconventions as detailed in :ref:`rolling_your_own_estimator`.\nAdditional tests for classifiers, regressors, clustering or transformers\nwill be run if the Estimator class inherits from the corresponding mixin\nfrom sklearn.base.\n\nSetting `generate_only=True` returns a generator that yields (estimator,\ncheck) tuples where the check can be called independently from each\nother, i.e. `check(estimator)`. This allows all checks to be run\nindependently and report the checks that are failing.\n\nscikit-learn provides a pytest specific decorator,\n:func:`~sklearn.utils.parametrize_with_checks`, making it easier to test\nmultiple estimators.\n\nParameters\n----------\nEstimator : estimator object\n Estimator instance to check.\n\n .. versionchanged:: 0.24\n Passing a class was deprecated in version 0.23, and support for\n classes was removed in 0.24.\n\ngenerate_only : bool, default=False\n When `False`, checks are evaluated when `check_estimator` is called.\n When `True`, `check_estimator` returns a generator that yields\n (estimator, check) tuples. The check is run by calling\n `check(estimator)`.\n\n .. versionadded:: 0.22\n\nReturns\n-------\nchecks_generator : generator\n Generator that yields (estimator, check) tuples. Returned when\n `generate_only=True`.", + "code": "def check_estimator(Estimator, generate_only=False):\n \"\"\"Check if estimator adheres to scikit-learn conventions.\n\n This estimator will run an extensive test-suite for input validation,\n shapes, etc, making sure that the estimator complies with `scikit-learn`\n conventions as detailed in :ref:`rolling_your_own_estimator`.\n Additional tests for classifiers, regressors, clustering or transformers\n will be run if the Estimator class inherits from the corresponding mixin\n from sklearn.base.\n\n Setting `generate_only=True` returns a generator that yields (estimator,\n check) tuples where the check can be called independently from each\n other, i.e. `check(estimator)`. This allows all checks to be run\n independently and report the checks that are failing.\n\n scikit-learn provides a pytest specific decorator,\n :func:`~sklearn.utils.parametrize_with_checks`, making it easier to test\n multiple estimators.\n\n Parameters\n ----------\n Estimator : estimator object\n Estimator instance to check.\n\n .. versionchanged:: 0.24\n Passing a class was deprecated in version 0.23, and support for\n classes was removed in 0.24.\n\n generate_only : bool, default=False\n When `False`, checks are evaluated when `check_estimator` is called.\n When `True`, `check_estimator` returns a generator that yields\n (estimator, check) tuples. The check is run by calling\n `check(estimator)`.\n\n .. versionadded:: 0.22\n\n Returns\n -------\n checks_generator : generator\n Generator that yields (estimator, check) tuples. Returned when\n `generate_only=True`.\n \"\"\"\n if isinstance(Estimator, type):\n msg = (\"Passing a class was deprecated in version 0.23 \"\n \"and isn't supported anymore from 0.24.\"\n \"Please pass an instance instead.\")\n raise TypeError(msg)\n\n estimator = Estimator\n name = type(estimator).__name__\n\n def checks_generator():\n for check in _yield_all_checks(estimator):\n check = _maybe_skip(estimator, check)\n yield estimator, partial(check, name)\n\n if generate_only:\n return checks_generator()\n\n for estimator, check in checks_generator():\n try:\n check(estimator)\n except SkipTest as exception:\n # SkipTest is thrown when pandas can't be imported, or by checks\n # that are in the xfail_checks tag\n warnings.warn(str(exception), SkipTestWarning)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimator_get_tags_default_keys", + "name": "check_estimator_get_tags_default_keys", + "qname": "sklearn.utils.estimator_checks.check_estimator_get_tags_default_keys", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimator_get_tags_default_keys/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_estimator_get_tags_default_keys.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimator_get_tags_default_keys/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_estimator_get_tags_default_keys.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def check_estimator_get_tags_default_keys(name, estimator_orig):\n # check that if _get_tags is implemented, it contains all keys from\n # _DEFAULT_KEYS\n estimator = clone(estimator_orig)\n if not hasattr(estimator, \"_get_tags\"):\n return\n\n tags_keys = set(estimator._get_tags().keys())\n default_tags_keys = set(_DEFAULT_TAGS.keys())\n assert tags_keys.intersection(default_tags_keys) == default_tags_keys, (\n f\"{name}._get_tags() is missing entries for the following default tags\"\n f\": {default_tags_keys - tags_keys.intersection(default_tags_keys)}\"\n )" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimator_sparse_data", + "name": "check_estimator_sparse_data", + "qname": "sklearn.utils.estimator_checks.check_estimator_sparse_data", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimator_sparse_data/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_estimator_sparse_data.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimator_sparse_data/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_estimator_sparse_data.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def check_estimator_sparse_data(name, estimator_orig):\n rng = np.random.RandomState(0)\n X = rng.rand(40, 10)\n X[X < .8] = 0\n X = _pairwise_estimator_convert_X(X, estimator_orig)\n X_csr = sparse.csr_matrix(X)\n y = (4 * rng.rand(40)).astype(int)\n # catch deprecation warnings\n with ignore_warnings(category=FutureWarning):\n estimator = clone(estimator_orig)\n y = _enforce_estimator_tags_y(estimator, y)\n tags = _safe_tags(estimator_orig)\n for matrix_format, X in _generate_sparse_matrix(X_csr):\n # catch deprecation warnings\n with ignore_warnings(category=FutureWarning):\n estimator = clone(estimator_orig)\n if name in ['Scaler', 'StandardScaler']:\n estimator.set_params(with_mean=False)\n # fit and predict\n if \"64\" in matrix_format:\n err_msg = (\n f\"Estimator {name} doesn't seem to support {matrix_format} \"\n \"matrix, and is not failing gracefully, e.g. by using \"\n \"check_array(X, accept_large_sparse=False)\"\n )\n else:\n err_msg = (\n f\"Estimator {name} doesn't seem to fail gracefully on sparse \"\n \"data: error message should state explicitly that sparse \"\n \"input is not supported if this is not the case.\"\n )\n with raises(\n (TypeError, ValueError),\n match=[\"sparse\", \"Sparse\"],\n may_pass=True,\n err_msg=err_msg,\n ):\n with ignore_warnings(category=FutureWarning):\n estimator.fit(X, y)\n if hasattr(estimator, \"predict\"):\n pred = estimator.predict(X)\n if tags['multioutput_only']:\n assert pred.shape == (X.shape[0], 1)\n else:\n assert pred.shape == (X.shape[0],)\n if hasattr(estimator, 'predict_proba'):\n probs = estimator.predict_proba(X)\n if tags['binary_only']:\n expected_probs_shape = (X.shape[0], 2)\n else:\n expected_probs_shape = (X.shape[0], 4)\n assert probs.shape == expected_probs_shape" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_data_not_an_array", + "name": "check_estimators_data_not_an_array", + "qname": "sklearn.utils.estimator_checks.check_estimators_data_not_an_array", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_data_not_an_array/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_estimators_data_not_an_array.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_data_not_an_array/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_estimators_data_not_an_array.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_data_not_an_array/X", + "name": "X", + "qname": "sklearn.utils.estimator_checks.check_estimators_data_not_an_array.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_data_not_an_array/y", + "name": "y", + "qname": "sklearn.utils.estimator_checks.check_estimators_data_not_an_array.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_data_not_an_array/obj_type", + "name": "obj_type", + "qname": "sklearn.utils.estimator_checks.check_estimators_data_not_an_array.obj_type", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_estimators_data_not_an_array(name, estimator_orig, X, y, obj_type):\n if name in CROSS_DECOMPOSITION:\n raise SkipTest(\"Skipping check_estimators_data_not_an_array \"\n \"for cross decomposition module as estimators \"\n \"are not deterministic.\")\n # separate estimators to control random seeds\n estimator_1 = clone(estimator_orig)\n estimator_2 = clone(estimator_orig)\n set_random_state(estimator_1)\n set_random_state(estimator_2)\n\n if obj_type not in [\"NotAnArray\", 'PandasDataframe']:\n raise ValueError(\"Data type {0} not supported\".format(obj_type))\n\n if obj_type == \"NotAnArray\":\n y_ = _NotAnArray(np.asarray(y))\n X_ = _NotAnArray(np.asarray(X))\n else:\n # Here pandas objects (Series and DataFrame) are tested explicitly\n # because some estimators may handle them (especially their indexing)\n # specially.\n try:\n import pandas as pd\n y_ = np.asarray(y)\n if y_.ndim == 1:\n y_ = pd.Series(y_)\n else:\n y_ = pd.DataFrame(y_)\n X_ = pd.DataFrame(np.asarray(X))\n\n except ImportError:\n raise SkipTest(\"pandas is not installed: not checking estimators \"\n \"for pandas objects.\")\n\n # fit\n estimator_1.fit(X_, y_)\n pred1 = estimator_1.predict(X_)\n estimator_2.fit(X, y)\n pred2 = estimator_2.predict(X)\n assert_allclose(pred1, pred2, atol=1e-2, err_msg=name)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_dtypes", + "name": "check_estimators_dtypes", + "qname": "sklearn.utils.estimator_checks.check_estimators_dtypes", + "decorators": ["ignore_warnings"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_dtypes/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_estimators_dtypes.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_dtypes/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_estimators_dtypes.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings\ndef check_estimators_dtypes(name, estimator_orig):\n rnd = np.random.RandomState(0)\n X_train_32 = 3 * rnd.uniform(size=(20, 5)).astype(np.float32)\n X_train_32 = _pairwise_estimator_convert_X(X_train_32, estimator_orig)\n X_train_64 = X_train_32.astype(np.float64)\n X_train_int_64 = X_train_32.astype(np.int64)\n X_train_int_32 = X_train_32.astype(np.int32)\n y = X_train_int_64[:, 0]\n y = _enforce_estimator_tags_y(estimator_orig, y)\n\n methods = [\"predict\", \"transform\", \"decision_function\", \"predict_proba\"]\n\n for X_train in [X_train_32, X_train_64, X_train_int_64, X_train_int_32]:\n estimator = clone(estimator_orig)\n set_random_state(estimator, 1)\n estimator.fit(X_train, y)\n\n for method in methods:\n if hasattr(estimator, method):\n getattr(estimator, method)(X_train)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_empty_data_messages", + "name": "check_estimators_empty_data_messages", + "qname": "sklearn.utils.estimator_checks.check_estimators_empty_data_messages", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_empty_data_messages/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_estimators_empty_data_messages.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_empty_data_messages/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_estimators_empty_data_messages.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_estimators_empty_data_messages(name, estimator_orig):\n e = clone(estimator_orig)\n set_random_state(e, 1)\n\n X_zero_samples = np.empty(0).reshape(0, 3)\n # The precise message can change depending on whether X or y is\n # validated first. Let us test the type of exception only:\n err_msg = (\n f\"The estimator {name} does not raise an error when an \"\n \"empty data is used to train. Perhaps use check_array in train.\"\n )\n with raises(ValueError, err_msg=err_msg):\n e.fit(X_zero_samples, [])\n\n X_zero_features = np.empty(0).reshape(12, 0)\n # the following y should be accepted by both classifiers and regressors\n # and ignored by unsupervised models\n y = _enforce_estimator_tags_y(\n e, np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0])\n )\n msg = (\n r\"0 feature\\(s\\) \\(shape=\\(\\d*, 0\\)\\) while a minimum of \\d* \"\n \"is required.\"\n )\n with raises(ValueError, match=msg):\n e.fit(X_zero_features, y)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_fit_returns_self", + "name": "check_estimators_fit_returns_self", + "qname": "sklearn.utils.estimator_checks.check_estimators_fit_returns_self", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_fit_returns_self/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_estimators_fit_returns_self.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_fit_returns_self/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_estimators_fit_returns_self.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_fit_returns_self/readonly_memmap", + "name": "readonly_memmap", + "qname": "sklearn.utils.estimator_checks.check_estimators_fit_returns_self.readonly_memmap", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Check if self is returned when calling fit.", + "docstring": "Check if self is returned when calling fit.", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_estimators_fit_returns_self(\n name, estimator_orig, readonly_memmap=False\n):\n \"\"\"Check if self is returned when calling fit.\"\"\"\n X, y = make_blobs(random_state=0, n_samples=21)\n # some want non-negative input\n X -= X.min()\n X = _pairwise_estimator_convert_X(X, estimator_orig)\n\n estimator = clone(estimator_orig)\n y = _enforce_estimator_tags_y(estimator, y)\n\n if readonly_memmap:\n X, y = create_memmap_backed_data([X, y])\n\n set_random_state(estimator)\n assert estimator.fit(X, y) is estimator" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_nan_inf", + "name": "check_estimators_nan_inf", + "qname": "sklearn.utils.estimator_checks.check_estimators_nan_inf", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_nan_inf/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_estimators_nan_inf.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_nan_inf/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_estimators_nan_inf.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_estimators_nan_inf(name, estimator_orig):\n # Checks that Estimator X's do not contain NaN or inf.\n rnd = np.random.RandomState(0)\n X_train_finite = _pairwise_estimator_convert_X(rnd.uniform(size=(10, 3)),\n estimator_orig)\n X_train_nan = rnd.uniform(size=(10, 3))\n X_train_nan[0, 0] = np.nan\n X_train_inf = rnd.uniform(size=(10, 3))\n X_train_inf[0, 0] = np.inf\n y = np.ones(10)\n y[:5] = 0\n y = _enforce_estimator_tags_y(estimator_orig, y)\n error_string_fit = \"Estimator doesn't check for NaN and inf in fit.\"\n error_string_predict = (\"Estimator doesn't check for NaN and inf in\"\n \" predict.\")\n error_string_transform = (\"Estimator doesn't check for NaN and inf in\"\n \" transform.\")\n for X_train in [X_train_nan, X_train_inf]:\n # catch deprecation warnings\n with ignore_warnings(category=FutureWarning):\n estimator = clone(estimator_orig)\n set_random_state(estimator, 1)\n # try to fit\n with raises(\n ValueError, match=[\"inf\", \"NaN\"], err_msg=error_string_fit\n ):\n estimator.fit(X_train, y)\n # actually fit\n estimator.fit(X_train_finite, y)\n\n # predict\n if hasattr(estimator, \"predict\"):\n with raises(\n ValueError,\n match=[\"inf\", \"NaN\"],\n err_msg=error_string_predict,\n ):\n estimator.predict(X_train)\n\n # transform\n if hasattr(estimator, \"transform\"):\n with raises(\n ValueError,\n match=[\"inf\", \"NaN\"],\n err_msg=error_string_transform,\n ):\n estimator.transform(X_train)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_overwrite_params", + "name": "check_estimators_overwrite_params", + "qname": "sklearn.utils.estimator_checks.check_estimators_overwrite_params", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_overwrite_params/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_estimators_overwrite_params.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_overwrite_params/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_estimators_overwrite_params.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_estimators_overwrite_params(name, estimator_orig):\n X, y = make_blobs(random_state=0, n_samples=21)\n # some want non-negative input\n X -= X.min()\n X = _pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)\n estimator = clone(estimator_orig)\n y = _enforce_estimator_tags_y(estimator, y)\n\n set_random_state(estimator)\n\n # Make a physical copy of the original estimator parameters before fitting.\n params = estimator.get_params()\n original_params = deepcopy(params)\n\n # Fit the model\n estimator.fit(X, y)\n\n # Compare the state of the model parameters with the original parameters\n new_params = estimator.get_params()\n for param_name, original_value in original_params.items():\n new_value = new_params[param_name]\n\n # We should never change or mutate the internal state of input\n # parameters by default. To check this we use the joblib.hash function\n # that introspects recursively any subobjects to compute a checksum.\n # The only exception to this rule of immutable constructor parameters\n # is possible RandomState instance but in this check we explicitly\n # fixed the random_state params recursively to be integer seeds.\n assert joblib.hash(new_value) == joblib.hash(original_value), (\n \"Estimator %s should not change or mutate \"\n \" the parameter %s from %s to %s during fit.\"\n % (name, param_name, original_value, new_value))" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_partial_fit_n_features", + "name": "check_estimators_partial_fit_n_features", + "qname": "sklearn.utils.estimator_checks.check_estimators_partial_fit_n_features", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_partial_fit_n_features/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_estimators_partial_fit_n_features.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_partial_fit_n_features/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_estimators_partial_fit_n_features.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_estimators_partial_fit_n_features(name, estimator_orig):\n # check if number of features changes between calls to partial_fit.\n if not hasattr(estimator_orig, 'partial_fit'):\n return\n estimator = clone(estimator_orig)\n X, y = make_blobs(n_samples=50, random_state=1)\n X -= X.min()\n y = _enforce_estimator_tags_y(estimator_orig, y)\n\n try:\n if is_classifier(estimator):\n classes = np.unique(y)\n estimator.partial_fit(X, y, classes=classes)\n else:\n estimator.partial_fit(X, y)\n except NotImplementedError:\n return\n\n with raises(\n ValueError,\n err_msg=f\"The estimator {name} does not raise an error when the \"\n \"number of features changes between calls to partial_fit.\",\n ):\n estimator.partial_fit(X[:, :-1], y)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_pickle", + "name": "check_estimators_pickle", + "qname": "sklearn.utils.estimator_checks.check_estimators_pickle", + "decorators": ["ignore_warnings"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_pickle/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_estimators_pickle.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_pickle/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_estimators_pickle.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Test that we can pickle all estimators.", + "docstring": "Test that we can pickle all estimators.", + "code": "@ignore_warnings\ndef check_estimators_pickle(name, estimator_orig):\n \"\"\"Test that we can pickle all estimators.\"\"\"\n check_methods = [\"predict\", \"transform\", \"decision_function\",\n \"predict_proba\"]\n\n X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],\n random_state=0, n_features=2, cluster_std=0.1)\n\n # some estimators can't do features less than 0\n X -= X.min()\n X = _pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)\n\n tags = _safe_tags(estimator_orig)\n # include NaN values when the estimator should deal with them\n if tags['allow_nan']:\n # set randomly 10 elements to np.nan\n rng = np.random.RandomState(42)\n mask = rng.choice(X.size, 10, replace=False)\n X.reshape(-1)[mask] = np.nan\n\n estimator = clone(estimator_orig)\n\n y = _enforce_estimator_tags_y(estimator, y)\n\n set_random_state(estimator)\n estimator.fit(X, y)\n\n # pickle and unpickle!\n pickled_estimator = pickle.dumps(estimator)\n module_name = estimator.__module__\n if module_name.startswith('sklearn.') and not (\n \"test_\" in module_name or module_name.endswith(\"_testing\")\n ):\n # strict check for sklearn estimators that are not implemented in test\n # modules.\n assert b\"version\" in pickled_estimator\n unpickled_estimator = pickle.loads(pickled_estimator)\n\n result = dict()\n for method in check_methods:\n if hasattr(estimator, method):\n result[method] = getattr(estimator, method)(X)\n\n for method in result:\n unpickled_result = getattr(unpickled_estimator, method)(X)\n assert_allclose_dense_sparse(result[method], unpickled_result)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_unfitted", + "name": "check_estimators_unfitted", + "qname": "sklearn.utils.estimator_checks.check_estimators_unfitted", + "decorators": ["ignore_warnings"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_unfitted/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_estimators_unfitted.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_estimators_unfitted/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_estimators_unfitted.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Check that predict raises an exception in an unfitted estimator.\n\nUnfitted estimators should raise a NotFittedError.", + "docstring": "Check that predict raises an exception in an unfitted estimator.\n\nUnfitted estimators should raise a NotFittedError.", + "code": "@ignore_warnings\ndef check_estimators_unfitted(name, estimator_orig):\n \"\"\"Check that predict raises an exception in an unfitted estimator.\n\n Unfitted estimators should raise a NotFittedError.\n \"\"\"\n # Common test for Regressors, Classifiers and Outlier detection estimators\n X, y = _regression_dataset()\n\n estimator = clone(estimator_orig)\n for method in ('decision_function', 'predict', 'predict_proba',\n 'predict_log_proba'):\n if hasattr(estimator, method):\n with raises(NotFittedError):\n getattr(estimator, method)(X)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_fit1d", + "name": "check_fit1d", + "qname": "sklearn.utils.estimator_checks.check_fit1d", + "decorators": ["ignore_warnings"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_fit1d/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_fit1d.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_fit1d/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_fit1d.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings\ndef check_fit1d(name, estimator_orig):\n # check fitting 1d X array raises a ValueError\n rnd = np.random.RandomState(0)\n X = 3 * rnd.uniform(size=(20))\n y = X.astype(int)\n estimator = clone(estimator_orig)\n y = _enforce_estimator_tags_y(estimator, y)\n\n if hasattr(estimator, \"n_components\"):\n estimator.n_components = 1\n if hasattr(estimator, \"n_clusters\"):\n estimator.n_clusters = 1\n\n set_random_state(estimator, 1)\n with raises(ValueError):\n estimator.fit(X, y)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_fit2d_1feature", + "name": "check_fit2d_1feature", + "qname": "sklearn.utils.estimator_checks.check_fit2d_1feature", + "decorators": ["ignore_warnings"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_fit2d_1feature/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_fit2d_1feature.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_fit2d_1feature/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_fit2d_1feature.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings\ndef check_fit2d_1feature(name, estimator_orig):\n # check fitting a 2d array with only 1 feature either works or returns\n # informative message\n rnd = np.random.RandomState(0)\n X = 3 * rnd.uniform(size=(10, 1))\n X = _pairwise_estimator_convert_X(X, estimator_orig)\n y = X[:, 0].astype(int)\n estimator = clone(estimator_orig)\n y = _enforce_estimator_tags_y(estimator, y)\n\n if hasattr(estimator, \"n_components\"):\n estimator.n_components = 1\n if hasattr(estimator, \"n_clusters\"):\n estimator.n_clusters = 1\n # ensure two labels in subsample for RandomizedLogisticRegression\n if name == 'RandomizedLogisticRegression':\n estimator.sample_fraction = 1\n # ensure non skipped trials for RANSACRegressor\n if name == 'RANSACRegressor':\n estimator.residual_threshold = 0.5\n\n y = _enforce_estimator_tags_y(estimator, y)\n set_random_state(estimator, 1)\n\n msgs = [r\"1 feature\\(s\\)\", \"n_features = 1\", \"n_features=1\"]\n\n with raises(ValueError, match=msgs, may_pass=True):\n estimator.fit(X, y)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_fit2d_1sample", + "name": "check_fit2d_1sample", + "qname": "sklearn.utils.estimator_checks.check_fit2d_1sample", + "decorators": ["ignore_warnings"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_fit2d_1sample/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_fit2d_1sample.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_fit2d_1sample/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_fit2d_1sample.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings\ndef check_fit2d_1sample(name, estimator_orig):\n # Check that fitting a 2d array with only one sample either works or\n # returns an informative message. The error message should either mention\n # the number of samples or the number of classes.\n rnd = np.random.RandomState(0)\n X = 3 * rnd.uniform(size=(1, 10))\n X = _pairwise_estimator_convert_X(X, estimator_orig)\n\n y = X[:, 0].astype(int)\n estimator = clone(estimator_orig)\n y = _enforce_estimator_tags_y(estimator, y)\n\n if hasattr(estimator, \"n_components\"):\n estimator.n_components = 1\n if hasattr(estimator, \"n_clusters\"):\n estimator.n_clusters = 1\n\n set_random_state(estimator, 1)\n\n # min_cluster_size cannot be less than the data size for OPTICS.\n if name == 'OPTICS':\n estimator.set_params(min_samples=1)\n\n msgs = [\"1 sample\", \"n_samples = 1\", \"n_samples=1\", \"one sample\",\n \"1 class\", \"one class\"]\n\n with raises(ValueError, match=msgs, may_pass=True):\n estimator.fit(X, y)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_fit2d_predict1d", + "name": "check_fit2d_predict1d", + "qname": "sklearn.utils.estimator_checks.check_fit2d_predict1d", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_fit2d_predict1d/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_fit2d_predict1d.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_fit2d_predict1d/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_fit2d_predict1d.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_fit2d_predict1d(name, estimator_orig):\n # check by fitting a 2d array and predicting with a 1d array\n rnd = np.random.RandomState(0)\n X = 3 * rnd.uniform(size=(20, 3))\n X = _pairwise_estimator_convert_X(X, estimator_orig)\n y = X[:, 0].astype(int)\n estimator = clone(estimator_orig)\n y = _enforce_estimator_tags_y(estimator, y)\n\n if hasattr(estimator, \"n_components\"):\n estimator.n_components = 1\n if hasattr(estimator, \"n_clusters\"):\n estimator.n_clusters = 1\n\n set_random_state(estimator, 1)\n estimator.fit(X, y)\n\n for method in [\"predict\", \"transform\", \"decision_function\",\n \"predict_proba\"]:\n if hasattr(estimator, method):\n assert_raise_message(ValueError, \"Reshape your data\",\n getattr(estimator, method), X[0])" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_fit_idempotent", + "name": "check_fit_idempotent", + "qname": "sklearn.utils.estimator_checks.check_fit_idempotent", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_fit_idempotent/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_fit_idempotent.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_fit_idempotent/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_fit_idempotent.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def check_fit_idempotent(name, estimator_orig):\n # Check that est.fit(X) is the same as est.fit(X).fit(X). Ideally we would\n # check that the estimated parameters during training (e.g. coefs_) are\n # the same, but having a universal comparison function for those\n # attributes is difficult and full of edge cases. So instead we check that\n # predict(), predict_proba(), decision_function() and transform() return\n # the same results.\n\n check_methods = [\"predict\", \"transform\", \"decision_function\",\n \"predict_proba\"]\n rng = np.random.RandomState(0)\n\n estimator = clone(estimator_orig)\n set_random_state(estimator)\n if 'warm_start' in estimator.get_params().keys():\n estimator.set_params(warm_start=False)\n\n n_samples = 100\n X = rng.normal(loc=100, size=(n_samples, 2))\n X = _pairwise_estimator_convert_X(X, estimator)\n if is_regressor(estimator_orig):\n y = rng.normal(size=n_samples)\n else:\n y = rng.randint(low=0, high=2, size=n_samples)\n y = _enforce_estimator_tags_y(estimator, y)\n\n train, test = next(ShuffleSplit(test_size=.2, random_state=rng).split(X))\n X_train, y_train = _safe_split(estimator, X, y, train)\n X_test, y_test = _safe_split(estimator, X, y, test, train)\n\n # Fit for the first time\n estimator.fit(X_train, y_train)\n\n result = {method: getattr(estimator, method)(X_test)\n for method in check_methods\n if hasattr(estimator, method)}\n\n # Fit again\n set_random_state(estimator)\n estimator.fit(X_train, y_train)\n\n for method in check_methods:\n if hasattr(estimator, method):\n new_result = getattr(estimator, method)(X_test)\n if np.issubdtype(new_result.dtype, np.floating):\n tol = 2*np.finfo(new_result.dtype).eps\n else:\n tol = 2*np.finfo(np.float64).eps\n assert_allclose_dense_sparse(\n result[method], new_result,\n atol=max(tol, 1e-9), rtol=max(tol, 1e-7),\n err_msg=\"Idempotency check failed for method {}\".format(method)\n )" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_fit_non_negative", + "name": "check_fit_non_negative", + "qname": "sklearn.utils.estimator_checks.check_fit_non_negative", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_fit_non_negative/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_fit_non_negative.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_fit_non_negative/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_fit_non_negative.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def check_fit_non_negative(name, estimator_orig):\n # Check that proper warning is raised for non-negative X\n # when tag requires_positive_X is present\n X = np.array([[-1., 1], [-1., 1]])\n y = np.array([1, 2])\n estimator = clone(estimator_orig)\n with raises(ValueError):\n estimator.fit(X, y)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_fit_score_takes_y", + "name": "check_fit_score_takes_y", + "qname": "sklearn.utils.estimator_checks.check_fit_score_takes_y", + "decorators": ["ignore_warnings"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_fit_score_takes_y/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_fit_score_takes_y.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_fit_score_takes_y/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_fit_score_takes_y.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings\ndef check_fit_score_takes_y(name, estimator_orig):\n # check that all estimators accept an optional y\n # in fit and score so they can be used in pipelines\n rnd = np.random.RandomState(0)\n n_samples = 30\n X = rnd.uniform(size=(n_samples, 3))\n X = _pairwise_estimator_convert_X(X, estimator_orig)\n y = np.arange(n_samples) % 3\n estimator = clone(estimator_orig)\n y = _enforce_estimator_tags_y(estimator, y)\n set_random_state(estimator)\n\n funcs = [\"fit\", \"score\", \"partial_fit\", \"fit_predict\", \"fit_transform\"]\n for func_name in funcs:\n func = getattr(estimator, func_name, None)\n if func is not None:\n func(X, y)\n args = [p.name for p in signature(func).parameters.values()]\n if args[0] == \"self\":\n # if_delegate_has_method makes methods into functions\n # with an explicit \"self\", so need to shift arguments\n args = args[1:]\n assert args[1] in [\"y\", \"Y\"], (\n \"Expected y or Y as second argument for method \"\n \"%s of %s. Got arguments: %r.\"\n % (func_name, type(estimator).__name__, args))" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_get_params_invariance", + "name": "check_get_params_invariance", + "qname": "sklearn.utils.estimator_checks.check_get_params_invariance", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_get_params_invariance/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_get_params_invariance.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_get_params_invariance/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_get_params_invariance.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_get_params_invariance(name, estimator_orig):\n # Checks if get_params(deep=False) is a subset of get_params(deep=True)\n e = clone(estimator_orig)\n\n shallow_params = e.get_params(deep=False)\n deep_params = e.get_params(deep=True)\n\n assert all(item in deep_params.items() for item in\n shallow_params.items())" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_methods_sample_order_invariance", + "name": "check_methods_sample_order_invariance", + "qname": "sklearn.utils.estimator_checks.check_methods_sample_order_invariance", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_methods_sample_order_invariance/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_methods_sample_order_invariance.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_methods_sample_order_invariance/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_methods_sample_order_invariance.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_methods_sample_order_invariance(name, estimator_orig):\n # check that method gives invariant results if applied\n # on a subset with different sample order\n rnd = np.random.RandomState(0)\n X = 3 * rnd.uniform(size=(20, 3))\n X = _pairwise_estimator_convert_X(X, estimator_orig)\n y = X[:, 0].astype(np.int64)\n if _safe_tags(estimator_orig, key='binary_only'):\n y[y == 2] = 1\n estimator = clone(estimator_orig)\n y = _enforce_estimator_tags_y(estimator, y)\n\n if hasattr(estimator, \"n_components\"):\n estimator.n_components = 1\n if hasattr(estimator, \"n_clusters\"):\n estimator.n_clusters = 2\n\n set_random_state(estimator, 1)\n estimator.fit(X, y)\n\n idx = np.random.permutation(X.shape[0])\n\n for method in [\"predict\", \"transform\", \"decision_function\",\n \"score_samples\", \"predict_proba\"]:\n msg = (\"{method} of {name} is not invariant when applied to a dataset\"\n \"with different sample order.\").format(method=method, name=name)\n\n if hasattr(estimator, method):\n assert_allclose_dense_sparse(getattr(estimator, method)(X)[idx],\n getattr(estimator, method)(X[idx]),\n atol=1e-9,\n err_msg=msg)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_methods_subset_invariance", + "name": "check_methods_subset_invariance", + "qname": "sklearn.utils.estimator_checks.check_methods_subset_invariance", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_methods_subset_invariance/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_methods_subset_invariance.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_methods_subset_invariance/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_methods_subset_invariance.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_methods_subset_invariance(name, estimator_orig):\n # check that method gives invariant results if applied\n # on mini batches or the whole set\n rnd = np.random.RandomState(0)\n X = 3 * rnd.uniform(size=(20, 3))\n X = _pairwise_estimator_convert_X(X, estimator_orig)\n y = X[:, 0].astype(int)\n estimator = clone(estimator_orig)\n y = _enforce_estimator_tags_y(estimator, y)\n\n if hasattr(estimator, \"n_components\"):\n estimator.n_components = 1\n if hasattr(estimator, \"n_clusters\"):\n estimator.n_clusters = 1\n\n set_random_state(estimator, 1)\n estimator.fit(X, y)\n\n for method in [\"predict\", \"transform\", \"decision_function\",\n \"score_samples\", \"predict_proba\"]:\n\n msg = (\"{method} of {name} is not invariant when applied \"\n \"to a subset.\").format(method=method, name=name)\n\n if hasattr(estimator, method):\n result_full, result_by_batch = _apply_on_subsets(\n getattr(estimator, method), X)\n assert_allclose(result_full, result_by_batch,\n atol=1e-7, err_msg=msg)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_n_features_in", + "name": "check_n_features_in", + "qname": "sklearn.utils.estimator_checks.check_n_features_in", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_n_features_in/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_n_features_in.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_n_features_in/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_n_features_in.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def check_n_features_in(name, estimator_orig):\n # Make sure that n_features_in_ attribute doesn't exist until fit is\n # called, and that its value is correct.\n\n rng = np.random.RandomState(0)\n\n estimator = clone(estimator_orig)\n set_random_state(estimator)\n if 'warm_start' in estimator.get_params():\n estimator.set_params(warm_start=False)\n\n n_samples = 100\n X = rng.normal(loc=100, size=(n_samples, 2))\n X = _pairwise_estimator_convert_X(X, estimator)\n if is_regressor(estimator_orig):\n y = rng.normal(size=n_samples)\n else:\n y = rng.randint(low=0, high=2, size=n_samples)\n y = _enforce_estimator_tags_y(estimator, y)\n\n assert not hasattr(estimator, 'n_features_in_')\n estimator.fit(X, y)\n if hasattr(estimator, 'n_features_in_'):\n assert estimator.n_features_in_ == X.shape[1]\n else:\n warnings.warn(\n \"As of scikit-learn 0.23, estimators should expose a \"\n \"n_features_in_ attribute, unless the 'no_validation' tag is \"\n \"True. This attribute should be equal to the number of features \"\n \"passed to the fit method. \"\n \"An error will be raised from version 1.0 (renaming of 0.25) \"\n \"when calling check_estimator(). \"\n \"See SLEP010: \"\n \"https://scikit-learn-enhancement-proposals.readthedocs.io/en/latest/slep010/proposal.html\", # noqa\n FutureWarning\n )" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_n_features_in_after_fitting", + "name": "check_n_features_in_after_fitting", + "qname": "sklearn.utils.estimator_checks.check_n_features_in_after_fitting", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_n_features_in_after_fitting/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_n_features_in_after_fitting.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_n_features_in_after_fitting/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_n_features_in_after_fitting.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def check_n_features_in_after_fitting(name, estimator_orig):\n # Make sure that n_features_in are checked after fitting\n tags = _safe_tags(estimator_orig)\n\n if \"2darray\" not in tags[\"X_types\"] or tags[\"no_validation\"]:\n return\n\n rng = np.random.RandomState(0)\n\n estimator = clone(estimator_orig)\n set_random_state(estimator)\n if 'warm_start' in estimator.get_params():\n estimator.set_params(warm_start=False)\n\n n_samples = 100\n X = rng.normal(loc=100, size=(n_samples, 2))\n X = _pairwise_estimator_convert_X(X, estimator)\n if is_regressor(estimator):\n y = rng.normal(size=n_samples)\n else:\n y = rng.randint(low=0, high=2, size=n_samples)\n y = _enforce_estimator_tags_y(estimator, y)\n\n estimator.fit(X, y)\n assert estimator.n_features_in_ == X.shape[1]\n\n # check methods will check n_features_in_\n check_methods = [\"predict\", \"transform\", \"decision_function\",\n \"predict_proba\"]\n X_bad = X[:, [1]]\n\n msg = (f\"X has 1 features, but \\\\w+ is expecting {X.shape[1]} \"\n \"features as input\")\n for method in check_methods:\n if not hasattr(estimator, method):\n continue\n with raises(ValueError, match=msg):\n getattr(estimator, method)(X_bad)\n\n # partial_fit will check in the second call\n if not hasattr(estimator, \"partial_fit\"):\n return\n\n estimator = clone(estimator_orig)\n if is_classifier(estimator):\n estimator.partial_fit(X, y, classes=np.unique(y))\n else:\n estimator.partial_fit(X, y)\n assert estimator.n_features_in_ == X.shape[1]\n\n with raises(ValueError, match=msg):\n estimator.partial_fit(X_bad, y)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_no_attributes_set_in_init", + "name": "check_no_attributes_set_in_init", + "qname": "sklearn.utils.estimator_checks.check_no_attributes_set_in_init", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_no_attributes_set_in_init/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_no_attributes_set_in_init.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_no_attributes_set_in_init/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_no_attributes_set_in_init.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Check setting during init.", + "docstring": "Check setting during init.", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_no_attributes_set_in_init(name, estimator_orig):\n \"\"\"Check setting during init.\"\"\"\n try:\n # Clone fails if the estimator does not store\n # all parameters as an attribute during init\n estimator = clone(estimator_orig)\n except AttributeError:\n raise AttributeError(f\"Estimator {name} should store all \"\n \"parameters as an attribute during init.\")\n\n if hasattr(type(estimator).__init__, \"deprecated_original\"):\n return\n\n init_params = _get_args(type(estimator).__init__)\n if IS_PYPY:\n # __init__ signature has additional objects in PyPy\n for key in ['obj']:\n if key in init_params:\n init_params.remove(key)\n parents_init_params = [param for params_parent in\n (_get_args(parent) for parent in\n type(estimator).__mro__)\n for param in params_parent]\n\n # Test for no setting apart from parameters during init\n invalid_attr = (set(vars(estimator)) - set(init_params)\n - set(parents_init_params))\n assert not invalid_attr, (\n \"Estimator %s should not set any attribute apart\"\n \" from parameters during init. Found attributes %s.\"\n % (name, sorted(invalid_attr)))" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_non_transformer_estimators_n_iter", + "name": "check_non_transformer_estimators_n_iter", + "qname": "sklearn.utils.estimator_checks.check_non_transformer_estimators_n_iter", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_non_transformer_estimators_n_iter/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_non_transformer_estimators_n_iter.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_non_transformer_estimators_n_iter/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_non_transformer_estimators_n_iter.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_non_transformer_estimators_n_iter(name, estimator_orig):\n # Test that estimators that are not transformers with a parameter\n # max_iter, return the attribute of n_iter_ at least 1.\n\n # These models are dependent on external solvers like\n # libsvm and accessing the iter parameter is non-trivial.\n # SelfTrainingClassifier does not perform an iteration if all samples are\n # labeled, hence n_iter_ = 0 is valid.\n not_run_check_n_iter = ['Ridge', 'SVR', 'NuSVR', 'NuSVC',\n 'RidgeClassifier', 'SVC', 'RandomizedLasso',\n 'LogisticRegressionCV', 'LinearSVC',\n 'LogisticRegression', 'SelfTrainingClassifier']\n\n # Tested in test_transformer_n_iter\n not_run_check_n_iter += CROSS_DECOMPOSITION\n if name in not_run_check_n_iter:\n return\n\n # LassoLars stops early for the default alpha=1.0 the iris dataset.\n if name == 'LassoLars':\n estimator = clone(estimator_orig).set_params(alpha=0.)\n else:\n estimator = clone(estimator_orig)\n if hasattr(estimator, 'max_iter'):\n iris = load_iris()\n X, y_ = iris.data, iris.target\n y_ = _enforce_estimator_tags_y(estimator, y_)\n\n set_random_state(estimator, 0)\n\n estimator.fit(X, y_)\n\n assert estimator.n_iter_ >= 1" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_nonsquare_error", + "name": "check_nonsquare_error", + "qname": "sklearn.utils.estimator_checks.check_nonsquare_error", + "decorators": ["ignore_warnings"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_nonsquare_error/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_nonsquare_error.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_nonsquare_error/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_nonsquare_error.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Test that error is thrown when non-square data provided.", + "docstring": "Test that error is thrown when non-square data provided.", + "code": "@ignore_warnings\ndef check_nonsquare_error(name, estimator_orig):\n \"\"\"Test that error is thrown when non-square data provided.\"\"\"\n\n X, y = make_blobs(n_samples=20, n_features=10)\n estimator = clone(estimator_orig)\n\n with raises(\n ValueError,\n err_msg=f\"The pairwise estimator {name} does not raise an error \"\n \"on non-square data\",\n ):\n estimator.fit(X, y)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_outlier_corruption", + "name": "check_outlier_corruption", + "qname": "sklearn.utils.estimator_checks.check_outlier_corruption", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_outlier_corruption/num_outliers", + "name": "num_outliers", + "qname": "sklearn.utils.estimator_checks.check_outlier_corruption.num_outliers", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_outlier_corruption/expected_outliers", + "name": "expected_outliers", + "qname": "sklearn.utils.estimator_checks.check_outlier_corruption.expected_outliers", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_outlier_corruption/decision", + "name": "decision", + "qname": "sklearn.utils.estimator_checks.check_outlier_corruption.decision", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def check_outlier_corruption(num_outliers, expected_outliers, decision):\n # Check for deviation from the precise given contamination level that may\n # be due to ties in the anomaly scores.\n if num_outliers < expected_outliers:\n start = num_outliers\n end = expected_outliers + 1\n else:\n start = expected_outliers\n end = num_outliers + 1\n\n # ensure that all values in the 'critical area' are tied,\n # leading to the observed discrepancy between provided\n # and actual contamination levels.\n sorted_decision = np.sort(decision)\n msg = ('The number of predicted outliers is not equal to the expected '\n 'number of outliers and this difference is not explained by the '\n 'number of ties in the decision_function values')\n assert len(np.unique(sorted_decision[start:end])) == 1, msg" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_outliers_fit_predict", + "name": "check_outliers_fit_predict", + "qname": "sklearn.utils.estimator_checks.check_outliers_fit_predict", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_outliers_fit_predict/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_outliers_fit_predict.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_outliers_fit_predict/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_outliers_fit_predict.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def check_outliers_fit_predict(name, estimator_orig):\n # Check fit_predict for outlier detectors.\n\n n_samples = 300\n X, _ = make_blobs(n_samples=n_samples, random_state=0)\n X = shuffle(X, random_state=7)\n n_samples, n_features = X.shape\n estimator = clone(estimator_orig)\n\n set_random_state(estimator)\n\n y_pred = estimator.fit_predict(X)\n assert y_pred.shape == (n_samples,)\n assert y_pred.dtype.kind == 'i'\n assert_array_equal(np.unique(y_pred), np.array([-1, 1]))\n\n # check fit_predict = fit.predict when the estimator has both a predict and\n # a fit_predict method. recall that it is already assumed here that the\n # estimator has a fit_predict method\n if hasattr(estimator, 'predict'):\n y_pred_2 = estimator.fit(X).predict(X)\n assert_array_equal(y_pred, y_pred_2)\n\n if hasattr(estimator, \"contamination\"):\n # proportion of outliers equal to contamination parameter when not\n # set to 'auto'\n expected_outliers = 30\n contamination = float(expected_outliers)/n_samples\n estimator.set_params(contamination=contamination)\n y_pred = estimator.fit_predict(X)\n\n num_outliers = np.sum(y_pred != 1)\n # num_outliers should be equal to expected_outliers unless\n # there are ties in the decision_function values. this can\n # only be tested for estimators with a decision_function\n # method\n if (num_outliers != expected_outliers and\n hasattr(estimator, 'decision_function')):\n decision = estimator.decision_function(X)\n check_outlier_corruption(num_outliers, expected_outliers, decision)\n\n # raises error when contamination is a scalar and not in [0,1]\n for contamination in [-0.5, 2.3]:\n estimator.set_params(contamination=contamination)\n with raises(ValueError):\n estimator.fit_predict(X)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_outliers_train", + "name": "check_outliers_train", + "qname": "sklearn.utils.estimator_checks.check_outliers_train", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_outliers_train/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_outliers_train.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_outliers_train/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_outliers_train.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_outliers_train/readonly_memmap", + "name": "readonly_memmap", + "qname": "sklearn.utils.estimator_checks.check_outliers_train.readonly_memmap", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def check_outliers_train(name, estimator_orig, readonly_memmap=True):\n n_samples = 300\n X, _ = make_blobs(n_samples=n_samples, random_state=0)\n X = shuffle(X, random_state=7)\n\n if readonly_memmap:\n X = create_memmap_backed_data(X)\n\n n_samples, n_features = X.shape\n estimator = clone(estimator_orig)\n set_random_state(estimator)\n\n # fit\n estimator.fit(X)\n # with lists\n estimator.fit(X.tolist())\n\n y_pred = estimator.predict(X)\n assert y_pred.shape == (n_samples,)\n assert y_pred.dtype.kind == 'i'\n assert_array_equal(np.unique(y_pred), np.array([-1, 1]))\n\n decision = estimator.decision_function(X)\n scores = estimator.score_samples(X)\n for output in [decision, scores]:\n assert output.dtype == np.dtype('float')\n assert output.shape == (n_samples,)\n\n # raises error on malformed input for predict\n with raises(ValueError):\n estimator.predict(X.T)\n\n # decision_function agrees with predict\n dec_pred = (decision >= 0).astype(int)\n dec_pred[dec_pred == 0] = -1\n assert_array_equal(dec_pred, y_pred)\n\n # raises error on malformed input for decision_function\n with raises(ValueError):\n estimator.decision_function(X.T)\n\n # decision_function is a translation of score_samples\n y_dec = scores - estimator.offset_\n assert_allclose(y_dec, decision)\n\n # raises error on malformed input for score_samples\n with raises(ValueError):\n estimator.score_samples(X.T)\n\n # contamination parameter (not for OneClassSVM which has the nu parameter)\n if (hasattr(estimator, 'contamination')\n and not hasattr(estimator, 'novelty')):\n # proportion of outliers equal to contamination parameter when not\n # set to 'auto'. This is true for the training set and cannot thus be\n # checked as follows for estimators with a novelty parameter such as\n # LocalOutlierFactor (tested in check_outliers_fit_predict)\n expected_outliers = 30\n contamination = expected_outliers / n_samples\n estimator.set_params(contamination=contamination)\n estimator.fit(X)\n y_pred = estimator.predict(X)\n\n num_outliers = np.sum(y_pred != 1)\n # num_outliers should be equal to expected_outliers unless\n # there are ties in the decision_function values. this can\n # only be tested for estimators with a decision_function\n # method, i.e. all estimators except LOF which is already\n # excluded from this if branch.\n if num_outliers != expected_outliers:\n decision = estimator.decision_function(X)\n check_outlier_corruption(num_outliers, expected_outliers, decision)\n\n # raises error when contamination is a scalar and not in [0,1]\n for contamination in [-0.5, 2.3]:\n estimator.set_params(contamination=contamination)\n with raises(ValueError):\n estimator.fit(X)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_parameters_default_constructible", + "name": "check_parameters_default_constructible", + "qname": "sklearn.utils.estimator_checks.check_parameters_default_constructible", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_parameters_default_constructible/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_parameters_default_constructible.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_parameters_default_constructible/Estimator", + "name": "Estimator", + "qname": "sklearn.utils.estimator_checks.check_parameters_default_constructible.Estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def check_parameters_default_constructible(name, Estimator):\n # test default-constructibility\n # get rid of deprecation warnings\n\n Estimator = Estimator.__class__\n\n with ignore_warnings(category=FutureWarning):\n estimator = _construct_instance(Estimator)\n # test cloning\n clone(estimator)\n # test __repr__\n repr(estimator)\n # test that set_params returns self\n assert estimator.set_params() is estimator\n\n # test if init does nothing but set parameters\n # this is important for grid_search etc.\n # We get the default parameters from init and then\n # compare these against the actual values of the attributes.\n\n # this comes from getattr. Gets rid of deprecation decorator.\n init = getattr(estimator.__init__, 'deprecated_original',\n estimator.__init__)\n\n try:\n def param_filter(p):\n \"\"\"Identify hyper parameters of an estimator.\"\"\"\n return (p.name != 'self' and\n p.kind != p.VAR_KEYWORD and\n p.kind != p.VAR_POSITIONAL)\n\n init_params = [p for p in signature(init).parameters.values()\n if param_filter(p)]\n\n except (TypeError, ValueError):\n # init is not a python function.\n # true for mixins\n return\n params = estimator.get_params()\n # they can need a non-default argument\n init_params = init_params[len(getattr(\n estimator, '_required_parameters', [])):]\n\n for init_param in init_params:\n assert init_param.default != init_param.empty, (\n \"parameter %s for %s has no default value\"\n % (init_param.name, type(estimator).__name__))\n allowed_types = {\n str,\n int,\n float,\n bool,\n tuple,\n type(None),\n type,\n types.FunctionType,\n joblib.Memory,\n }\n # Any numpy numeric such as np.int32.\n allowed_types.update(np.core.numerictypes.allTypes.values())\n assert type(init_param.default) in allowed_types, (\n f\"Parameter '{init_param.name}' of estimator \"\n f\"'{Estimator.__name__}' is of type \"\n f\"{type(init_param.default).__name__} which is not \"\n f\"allowed. All init parameters have to be immutable to \"\n f\"make cloning possible. Therefore we restrict the set of \"\n f\"legal types to \"\n f\"{set(type.__name__ for type in allowed_types)}.\"\n )\n if init_param.name not in params.keys():\n # deprecated parameter, not in get_params\n assert init_param.default is None, (\n f\"Estimator parameter '{init_param.name}' of estimator \"\n f\"'{Estimator.__name__}' is not returned by get_params. \"\n f\"If it is deprecated, set its default value to None.\"\n )\n continue\n\n param_value = params[init_param.name]\n if isinstance(param_value, np.ndarray):\n assert_array_equal(param_value, init_param.default)\n else:\n failure_text = (\n f\"Parameter {init_param.name} was mutated on init. All \"\n f\"parameters must be stored unchanged.\"\n )\n if is_scalar_nan(param_value):\n # Allows to set default parameters to np.nan\n assert param_value is init_param.default, failure_text\n else:\n assert param_value == init_param.default, failure_text" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_pipeline_consistency", + "name": "check_pipeline_consistency", + "qname": "sklearn.utils.estimator_checks.check_pipeline_consistency", + "decorators": ["ignore_warnings"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_pipeline_consistency/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_pipeline_consistency.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_pipeline_consistency/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_pipeline_consistency.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings\ndef check_pipeline_consistency(name, estimator_orig):\n if _safe_tags(estimator_orig, key='non_deterministic'):\n msg = name + ' is non deterministic'\n raise SkipTest(msg)\n\n # check that make_pipeline(est) gives same score as est\n X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],\n random_state=0, n_features=2, cluster_std=0.1)\n X -= X.min()\n X = _pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)\n estimator = clone(estimator_orig)\n y = _enforce_estimator_tags_y(estimator, y)\n set_random_state(estimator)\n pipeline = make_pipeline(estimator)\n estimator.fit(X, y)\n pipeline.fit(X, y)\n\n funcs = [\"score\", \"fit_transform\"]\n\n for func_name in funcs:\n func = getattr(estimator, func_name, None)\n if func is not None:\n func_pipeline = getattr(pipeline, func_name)\n result = func(X, y)\n result_pipe = func_pipeline(X, y)\n assert_allclose_dense_sparse(result, result_pipe)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_regressor_data_not_an_array", + "name": "check_regressor_data_not_an_array", + "qname": "sklearn.utils.estimator_checks.check_regressor_data_not_an_array", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_regressor_data_not_an_array/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_regressor_data_not_an_array.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_regressor_data_not_an_array/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_regressor_data_not_an_array.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_regressor_data_not_an_array(name, estimator_orig):\n X, y = _regression_dataset()\n X = _pairwise_estimator_convert_X(X, estimator_orig)\n y = _enforce_estimator_tags_y(estimator_orig, y)\n for obj_type in [\"NotAnArray\", \"PandasDataframe\"]:\n check_estimators_data_not_an_array(name, estimator_orig, X, y,\n obj_type)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_regressor_multioutput", + "name": "check_regressor_multioutput", + "qname": "sklearn.utils.estimator_checks.check_regressor_multioutput", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_regressor_multioutput/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_regressor_multioutput.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_regressor_multioutput/estimator", + "name": "estimator", + "qname": "sklearn.utils.estimator_checks.check_regressor_multioutput.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_regressor_multioutput(name, estimator):\n estimator = clone(estimator)\n n_samples = n_features = 10\n\n if not _is_pairwise_metric(estimator):\n n_samples = n_samples + 1\n\n X, y = make_regression(random_state=42, n_targets=5,\n n_samples=n_samples, n_features=n_features)\n X = _pairwise_estimator_convert_X(X, estimator)\n\n estimator.fit(X, y)\n y_pred = estimator.predict(X)\n\n assert y_pred.dtype == np.dtype('float64'), (\n \"Multioutput predictions by a regressor are expected to be\"\n \" floating-point precision. Got {} instead\".format(y_pred.dtype))\n assert y_pred.shape == y.shape, (\n \"The shape of the prediction for multioutput data is incorrect.\"\n \" Expected {}, got {}.\")" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_regressors_int", + "name": "check_regressors_int", + "qname": "sklearn.utils.estimator_checks.check_regressors_int", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_regressors_int/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_regressors_int.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_regressors_int/regressor_orig", + "name": "regressor_orig", + "qname": "sklearn.utils.estimator_checks.check_regressors_int.regressor_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_regressors_int(name, regressor_orig):\n X, _ = _regression_dataset()\n X = _pairwise_estimator_convert_X(X[:50], regressor_orig)\n rnd = np.random.RandomState(0)\n y = rnd.randint(3, size=X.shape[0])\n y = _enforce_estimator_tags_y(regressor_orig, y)\n rnd = np.random.RandomState(0)\n # separate estimators to control random seeds\n regressor_1 = clone(regressor_orig)\n regressor_2 = clone(regressor_orig)\n set_random_state(regressor_1)\n set_random_state(regressor_2)\n\n if name in CROSS_DECOMPOSITION:\n y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))])\n y_ = y_.T\n else:\n y_ = y\n\n # fit\n regressor_1.fit(X, y_)\n pred1 = regressor_1.predict(X)\n regressor_2.fit(X, y_.astype(float))\n pred2 = regressor_2.predict(X)\n assert_allclose(pred1, pred2, atol=1e-2, err_msg=name)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_regressors_no_decision_function", + "name": "check_regressors_no_decision_function", + "qname": "sklearn.utils.estimator_checks.check_regressors_no_decision_function", + "decorators": ["ignore_warnings"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_regressors_no_decision_function/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_regressors_no_decision_function.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_regressors_no_decision_function/regressor_orig", + "name": "regressor_orig", + "qname": "sklearn.utils.estimator_checks.check_regressors_no_decision_function.regressor_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings\ndef check_regressors_no_decision_function(name, regressor_orig):\n # check that regressors don't have a decision_function, predict_proba, or\n # predict_log_proba method.\n rng = np.random.RandomState(0)\n regressor = clone(regressor_orig)\n\n X = rng.normal(size=(10, 4))\n X = _pairwise_estimator_convert_X(X, regressor_orig)\n y = _enforce_estimator_tags_y(regressor, X[:, 0])\n\n regressor.fit(X, y)\n funcs = [\"decision_function\", \"predict_proba\", \"predict_log_proba\"]\n for func_name in funcs:\n assert not hasattr(regressor, func_name)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_regressors_train", + "name": "check_regressors_train", + "qname": "sklearn.utils.estimator_checks.check_regressors_train", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_regressors_train/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_regressors_train.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_regressors_train/regressor_orig", + "name": "regressor_orig", + "qname": "sklearn.utils.estimator_checks.check_regressors_train.regressor_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_regressors_train/readonly_memmap", + "name": "readonly_memmap", + "qname": "sklearn.utils.estimator_checks.check_regressors_train.readonly_memmap", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_regressors_train/X_dtype", + "name": "X_dtype", + "qname": "sklearn.utils.estimator_checks.check_regressors_train.X_dtype", + "default_value": "np.float64", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_regressors_train(\n name, regressor_orig, readonly_memmap=False, X_dtype=np.float64\n):\n X, y = _regression_dataset()\n X = X.astype(X_dtype)\n X = _pairwise_estimator_convert_X(X, regressor_orig)\n y = scale(y) # X is already scaled\n regressor = clone(regressor_orig)\n y = _enforce_estimator_tags_y(regressor, y)\n if name in CROSS_DECOMPOSITION:\n rnd = np.random.RandomState(0)\n y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))])\n y_ = y_.T\n else:\n y_ = y\n\n if readonly_memmap:\n X, y, y_ = create_memmap_backed_data([X, y, y_])\n\n if not hasattr(regressor, 'alphas') and hasattr(regressor, 'alpha'):\n # linear regressors need to set alpha, but not generalized CV ones\n regressor.alpha = 0.01\n if name == 'PassiveAggressiveRegressor':\n regressor.C = 0.01\n\n # raises error on malformed input for fit\n with raises(\n ValueError,\n err_msg=f\"The classifier {name} does not raise an error when \"\n \"incorrect/malformed input data for fit is passed. The number of \"\n \"training examples is not the same as the number of labels. Perhaps \"\n \"use check_X_y in fit.\",\n ):\n regressor.fit(X, y[:-1])\n # fit\n set_random_state(regressor)\n regressor.fit(X, y_)\n regressor.fit(X.tolist(), y_.tolist())\n y_pred = regressor.predict(X)\n assert y_pred.shape == y_.shape\n\n # TODO: find out why PLS and CCA fail. RANSAC is random\n # and furthermore assumes the presence of outliers, hence\n # skipped\n if not _safe_tags(regressor, key=\"poor_score\"):\n assert regressor.score(X, y_) > 0.5" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_requires_y_none", + "name": "check_requires_y_none", + "qname": "sklearn.utils.estimator_checks.check_requires_y_none", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_requires_y_none/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_requires_y_none.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_requires_y_none/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_requires_y_none.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def check_requires_y_none(name, estimator_orig):\n # Make sure that an estimator with requires_y=True fails gracefully when\n # given y=None\n\n rng = np.random.RandomState(0)\n\n estimator = clone(estimator_orig)\n set_random_state(estimator)\n\n n_samples = 100\n X = rng.normal(loc=100, size=(n_samples, 2))\n X = _pairwise_estimator_convert_X(X, estimator)\n\n warning_msg = (\"As of scikit-learn 0.23, estimators should have a \"\n \"'requires_y' tag set to the appropriate value. \"\n \"The default value of the tag is False. \"\n \"An error will be raised from version 1.0 when calling \"\n \"check_estimator() if the tag isn't properly set.\")\n\n expected_err_msgs = (\n \"requires y to be passed, but the target y is None\",\n \"Expected array-like (array or non-string sequence), got None\",\n \"y should be a 1d array\"\n )\n\n try:\n estimator.fit(X, None)\n except ValueError as ve:\n if not any(msg in str(ve) for msg in expected_err_msgs):\n warnings.warn(warning_msg, FutureWarning)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_sample_weights_invariance", + "name": "check_sample_weights_invariance", + "qname": "sklearn.utils.estimator_checks.check_sample_weights_invariance", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_sample_weights_invariance/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_sample_weights_invariance.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_sample_weights_invariance/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_sample_weights_invariance.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_sample_weights_invariance/kind", + "name": "kind", + "qname": "sklearn.utils.estimator_checks.check_sample_weights_invariance.kind", + "default_value": "'ones'", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_sample_weights_invariance(name, estimator_orig, kind=\"ones\"):\n # For kind=\"ones\" check that the estimators yield same results for\n # unit weights and no weights\n # For kind=\"zeros\" check that setting sample_weight to 0 is equivalent\n # to removing corresponding samples.\n estimator1 = clone(estimator_orig)\n estimator2 = clone(estimator_orig)\n set_random_state(estimator1, random_state=0)\n set_random_state(estimator2, random_state=0)\n\n X1 = np.array([[1, 3], [1, 3], [1, 3], [1, 3],\n [2, 1], [2, 1], [2, 1], [2, 1],\n [3, 3], [3, 3], [3, 3], [3, 3],\n [4, 1], [4, 1], [4, 1], [4, 1]], dtype=np.float64)\n y1 = np.array([1, 1, 1, 1, 2, 2, 2, 2,\n 1, 1, 1, 1, 2, 2, 2, 2], dtype=int)\n\n if kind == 'ones':\n X2 = X1\n y2 = y1\n sw2 = np.ones(shape=len(y1))\n err_msg = (f\"For {name} sample_weight=None is not equivalent to \"\n f\"sample_weight=ones\")\n elif kind == 'zeros':\n # Construct a dataset that is very different to (X, y) if weights\n # are disregarded, but identical to (X, y) given weights.\n X2 = np.vstack([X1, X1 + 1])\n y2 = np.hstack([y1, 3 - y1])\n sw2 = np.ones(shape=len(y1) * 2)\n sw2[len(y1):] = 0\n X2, y2, sw2 = shuffle(X2, y2, sw2, random_state=0)\n\n err_msg = (f\"For {name}, a zero sample_weight is not equivalent \"\n f\"to removing the sample\")\n else: # pragma: no cover\n raise ValueError\n\n y1 = _enforce_estimator_tags_y(estimator1, y1)\n y2 = _enforce_estimator_tags_y(estimator2, y2)\n\n estimator1.fit(X1, y=y1, sample_weight=None)\n estimator2.fit(X2, y=y2, sample_weight=sw2)\n\n for method in [\"predict\", \"predict_proba\",\n \"decision_function\", \"transform\"]:\n if hasattr(estimator_orig, method):\n X_pred1 = getattr(estimator1, method)(X1)\n X_pred2 = getattr(estimator2, method)(X1)\n assert_allclose_dense_sparse(X_pred1, X_pred2, err_msg=err_msg)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_sample_weights_list", + "name": "check_sample_weights_list", + "qname": "sklearn.utils.estimator_checks.check_sample_weights_list", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_sample_weights_list/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_sample_weights_list.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_sample_weights_list/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_sample_weights_list.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=(FutureWarning))\ndef check_sample_weights_list(name, estimator_orig):\n # check that estimators will accept a 'sample_weight' parameter of\n # type list in the 'fit' function.\n if has_fit_parameter(estimator_orig, \"sample_weight\"):\n estimator = clone(estimator_orig)\n rnd = np.random.RandomState(0)\n n_samples = 30\n X = _pairwise_estimator_convert_X(rnd.uniform(size=(n_samples, 3)),\n estimator_orig)\n y = np.arange(n_samples) % 3\n y = _enforce_estimator_tags_y(estimator, y)\n sample_weight = [3] * n_samples\n # Test that estimators don't raise any exception\n estimator.fit(X, y, sample_weight=sample_weight)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_sample_weights_not_an_array", + "name": "check_sample_weights_not_an_array", + "qname": "sklearn.utils.estimator_checks.check_sample_weights_not_an_array", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_sample_weights_not_an_array/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_sample_weights_not_an_array.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_sample_weights_not_an_array/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_sample_weights_not_an_array.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=(FutureWarning))\ndef check_sample_weights_not_an_array(name, estimator_orig):\n # check that estimators will accept a 'sample_weight' parameter of\n # type _NotAnArray in the 'fit' function.\n estimator = clone(estimator_orig)\n if has_fit_parameter(estimator, \"sample_weight\"):\n X = np.array([[1, 1], [1, 2], [1, 3], [1, 4],\n [2, 1], [2, 2], [2, 3], [2, 4],\n [3, 1], [3, 2], [3, 3], [3, 4]])\n X = _NotAnArray(_pairwise_estimator_convert_X(X, estimator_orig))\n y = _NotAnArray([1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 2])\n weights = _NotAnArray([1] * 12)\n if _safe_tags(estimator, key=\"multioutput_only\"):\n y = _NotAnArray(y.data.reshape(-1, 1))\n estimator.fit(X, y, sample_weight=weights)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_sample_weights_pandas_series", + "name": "check_sample_weights_pandas_series", + "qname": "sklearn.utils.estimator_checks.check_sample_weights_pandas_series", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_sample_weights_pandas_series/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_sample_weights_pandas_series.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_sample_weights_pandas_series/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_sample_weights_pandas_series.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_sample_weights_pandas_series(name, estimator_orig):\n # check that estimators will accept a 'sample_weight' parameter of\n # type pandas.Series in the 'fit' function.\n estimator = clone(estimator_orig)\n if has_fit_parameter(estimator, \"sample_weight\"):\n try:\n import pandas as pd\n X = np.array([[1, 1], [1, 2], [1, 3], [1, 4],\n [2, 1], [2, 2], [2, 3], [2, 4],\n [3, 1], [3, 2], [3, 3], [3, 4]])\n X = pd.DataFrame(_pairwise_estimator_convert_X(X, estimator_orig))\n y = pd.Series([1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 2])\n weights = pd.Series([1] * 12)\n if _safe_tags(estimator, key=\"multioutput_only\"):\n y = pd.DataFrame(y)\n try:\n estimator.fit(X, y, sample_weight=weights)\n except ValueError:\n raise ValueError(\"Estimator {0} raises error if \"\n \"'sample_weight' parameter is of \"\n \"type pandas.Series\".format(name))\n except ImportError:\n raise SkipTest(\"pandas is not installed: not testing for \"\n \"input of type pandas.Series to class weight.\")" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_sample_weights_shape", + "name": "check_sample_weights_shape", + "qname": "sklearn.utils.estimator_checks.check_sample_weights_shape", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_sample_weights_shape/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_sample_weights_shape.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_sample_weights_shape/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_sample_weights_shape.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_sample_weights_shape(name, estimator_orig):\n # check that estimators raise an error if sample_weight\n # shape mismatches the input\n if (has_fit_parameter(estimator_orig, \"sample_weight\") and\n not _is_pairwise(estimator_orig)):\n estimator = clone(estimator_orig)\n X = np.array([[1, 3], [1, 3], [1, 3], [1, 3],\n [2, 1], [2, 1], [2, 1], [2, 1],\n [3, 3], [3, 3], [3, 3], [3, 3],\n [4, 1], [4, 1], [4, 1], [4, 1]])\n y = np.array([1, 1, 1, 1, 2, 2, 2, 2,\n 1, 1, 1, 1, 2, 2, 2, 2])\n y = _enforce_estimator_tags_y(estimator, y)\n\n estimator.fit(X, y, sample_weight=np.ones(len(y)))\n\n with raises(ValueError):\n estimator.fit(X, y, sample_weight=np.ones(2 * len(y)))\n\n with raises(ValueError):\n estimator.fit(X, y, sample_weight=np.ones((len(y), 2)))" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_set_params", + "name": "check_set_params", + "qname": "sklearn.utils.estimator_checks.check_set_params", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_set_params/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_set_params.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_set_params/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_set_params.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_set_params(name, estimator_orig):\n # Check that get_params() returns the same thing\n # before and after set_params() with some fuzz\n estimator = clone(estimator_orig)\n\n orig_params = estimator.get_params(deep=False)\n msg = \"get_params result does not match what was passed to set_params\"\n\n estimator.set_params(**orig_params)\n curr_params = estimator.get_params(deep=False)\n assert set(orig_params.keys()) == set(curr_params.keys()), msg\n for k, v in curr_params.items():\n assert orig_params[k] is v, msg\n\n # some fuzz values\n test_values = [-np.inf, np.inf, None]\n\n test_params = deepcopy(orig_params)\n for param_name in orig_params.keys():\n default_value = orig_params[param_name]\n for value in test_values:\n test_params[param_name] = value\n try:\n estimator.set_params(**test_params)\n except (TypeError, ValueError) as e:\n e_type = e.__class__.__name__\n # Exception occurred, possibly parameter validation\n warnings.warn(\"{0} occurred during set_params of param {1} on \"\n \"{2}. It is recommended to delay parameter \"\n \"validation until fit.\".format(e_type,\n param_name,\n name))\n\n change_warning_msg = \"Estimator's parameters changed after \" \\\n \"set_params raised {}\".format(e_type)\n params_before_exception = curr_params\n curr_params = estimator.get_params(deep=False)\n try:\n assert (set(params_before_exception.keys()) ==\n set(curr_params.keys()))\n for k, v in curr_params.items():\n assert params_before_exception[k] is v\n except AssertionError:\n warnings.warn(change_warning_msg)\n else:\n curr_params = estimator.get_params(deep=False)\n assert (set(test_params.keys()) ==\n set(curr_params.keys())), msg\n for k, v in curr_params.items():\n assert test_params[k] is v, msg\n test_params[param_name] = default_value" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_sparsify_coefficients", + "name": "check_sparsify_coefficients", + "qname": "sklearn.utils.estimator_checks.check_sparsify_coefficients", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_sparsify_coefficients/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_sparsify_coefficients.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_sparsify_coefficients/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_sparsify_coefficients.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_sparsify_coefficients(name, estimator_orig):\n X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1],\n [-1, -2], [2, 2], [-2, -2]])\n y = np.array([1, 1, 1, 2, 2, 2, 3, 3, 3])\n y = _enforce_estimator_tags_y(estimator_orig, y)\n est = clone(estimator_orig)\n\n est.fit(X, y)\n pred_orig = est.predict(X)\n\n # test sparsify with dense inputs\n est.sparsify()\n assert sparse.issparse(est.coef_)\n pred = est.predict(X)\n assert_array_equal(pred, pred_orig)\n\n # pickle and unpickle with sparse coef_\n est = pickle.loads(pickle.dumps(est))\n assert sparse.issparse(est.coef_)\n pred = est.predict(X)\n assert_array_equal(pred, pred_orig)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_supervised_y_2d", + "name": "check_supervised_y_2d", + "qname": "sklearn.utils.estimator_checks.check_supervised_y_2d", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_supervised_y_2d/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_supervised_y_2d.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_supervised_y_2d/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_supervised_y_2d.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_supervised_y_2d(name, estimator_orig):\n tags = _safe_tags(estimator_orig)\n rnd = np.random.RandomState(0)\n n_samples = 30\n X = _pairwise_estimator_convert_X(\n rnd.uniform(size=(n_samples, 3)), estimator_orig\n )\n y = np.arange(n_samples) % 3\n y = _enforce_estimator_tags_y(estimator_orig, y)\n estimator = clone(estimator_orig)\n set_random_state(estimator)\n # fit\n estimator.fit(X, y)\n y_pred = estimator.predict(X)\n\n set_random_state(estimator)\n # Check that when a 2D y is given, a DataConversionWarning is\n # raised\n with warnings.catch_warnings(record=True) as w:\n warnings.simplefilter(\"always\", DataConversionWarning)\n warnings.simplefilter(\"ignore\", RuntimeWarning)\n estimator.fit(X, y[:, np.newaxis])\n y_pred_2d = estimator.predict(X)\n msg = \"expected 1 DataConversionWarning, got: %s\" % (\n \", \".join([str(w_x) for w_x in w]))\n if not tags['multioutput']:\n # check that we warned if we don't support multi-output\n assert len(w) > 0, msg\n assert \"DataConversionWarning('A column-vector y\" \\\n \" was passed when a 1d array was expected\" in msg\n assert_allclose(y_pred.ravel(), y_pred_2d.ravel())" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_supervised_y_no_nan", + "name": "check_supervised_y_no_nan", + "qname": "sklearn.utils.estimator_checks.check_supervised_y_no_nan", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_supervised_y_no_nan/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_supervised_y_no_nan.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_supervised_y_no_nan/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_supervised_y_no_nan.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_supervised_y_no_nan(name, estimator_orig):\n # Checks that the Estimator targets are not NaN.\n estimator = clone(estimator_orig)\n rng = np.random.RandomState(888)\n X = rng.randn(10, 5)\n y = np.full(10, np.inf)\n y = _enforce_estimator_tags_y(estimator, y)\n\n match = (\n \"Input contains NaN, infinity or a value too large for \"\n r\"dtype\\('float64'\\).\"\n )\n err_msg = (\n f\"Estimator {name} should have raised error on fitting \"\n \"array y with NaN value.\"\n )\n with raises(ValueError, match=match, err_msg=err_msg):\n estimator.fit(X, y)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_transformer_data_not_an_array", + "name": "check_transformer_data_not_an_array", + "qname": "sklearn.utils.estimator_checks.check_transformer_data_not_an_array", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_transformer_data_not_an_array/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_transformer_data_not_an_array.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_transformer_data_not_an_array/transformer", + "name": "transformer", + "qname": "sklearn.utils.estimator_checks.check_transformer_data_not_an_array.transformer", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_transformer_data_not_an_array(name, transformer):\n X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],\n random_state=0, n_features=2, cluster_std=0.1)\n X = StandardScaler().fit_transform(X)\n # We need to make sure that we have non negative data, for things\n # like NMF\n X -= X.min() - .1\n X = _pairwise_estimator_convert_X(X, transformer)\n this_X = _NotAnArray(X)\n this_y = _NotAnArray(np.asarray(y))\n _check_transformer(name, transformer, this_X, this_y)\n # try the same with some list\n _check_transformer(name, transformer, X.tolist(), y.tolist())" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_transformer_general", + "name": "check_transformer_general", + "qname": "sklearn.utils.estimator_checks.check_transformer_general", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_transformer_general/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_transformer_general.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_transformer_general/transformer", + "name": "transformer", + "qname": "sklearn.utils.estimator_checks.check_transformer_general.transformer", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_transformer_general/readonly_memmap", + "name": "readonly_memmap", + "qname": "sklearn.utils.estimator_checks.check_transformer_general.readonly_memmap", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_transformer_general(name, transformer, readonly_memmap=False):\n X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],\n random_state=0, n_features=2, cluster_std=0.1)\n X = StandardScaler().fit_transform(X)\n X -= X.min()\n X = _pairwise_estimator_convert_X(X, transformer)\n\n if readonly_memmap:\n X, y = create_memmap_backed_data([X, y])\n\n _check_transformer(name, transformer, X, y)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_transformer_n_iter", + "name": "check_transformer_n_iter", + "qname": "sklearn.utils.estimator_checks.check_transformer_n_iter", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_transformer_n_iter/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_transformer_n_iter.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_transformer_n_iter/estimator_orig", + "name": "estimator_orig", + "qname": "sklearn.utils.estimator_checks.check_transformer_n_iter.estimator_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_transformer_n_iter(name, estimator_orig):\n # Test that transformers with a parameter max_iter, return the\n # attribute of n_iter_ at least 1.\n estimator = clone(estimator_orig)\n if hasattr(estimator, \"max_iter\"):\n if name in CROSS_DECOMPOSITION:\n # Check using default data\n X = [[0., 0., 1.], [1., 0., 0.], [2., 2., 2.], [2., 5., 4.]]\n y_ = [[0.1, -0.2], [0.9, 1.1], [0.1, -0.5], [0.3, -0.2]]\n\n else:\n X, y_ = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],\n random_state=0, n_features=2, cluster_std=0.1)\n X -= X.min() - 0.1\n set_random_state(estimator, 0)\n estimator.fit(X, y_)\n\n # These return a n_iter per component.\n if name in CROSS_DECOMPOSITION:\n for iter_ in estimator.n_iter_:\n assert iter_ >= 1\n else:\n assert estimator.n_iter_ >= 1" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_transformer_preserve_dtypes", + "name": "check_transformer_preserve_dtypes", + "qname": "sklearn.utils.estimator_checks.check_transformer_preserve_dtypes", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_transformer_preserve_dtypes/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_transformer_preserve_dtypes.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_transformer_preserve_dtypes/transformer_orig", + "name": "transformer_orig", + "qname": "sklearn.utils.estimator_checks.check_transformer_preserve_dtypes.transformer_orig", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def check_transformer_preserve_dtypes(name, transformer_orig):\n # check that dtype are preserved meaning if input X is of some dtype\n # X_transformed should be from the same dtype.\n X, y = make_blobs(\n n_samples=30,\n centers=[[0, 0, 0], [1, 1, 1]],\n random_state=0,\n cluster_std=0.1,\n )\n X = StandardScaler().fit_transform(X)\n X -= X.min()\n X = _pairwise_estimator_convert_X(X, transformer_orig)\n\n for dtype in _safe_tags(transformer_orig, key=\"preserves_dtype\"):\n X_cast = X.astype(dtype)\n transformer = clone(transformer_orig)\n set_random_state(transformer)\n X_trans = transformer.fit_transform(X_cast, y)\n\n if isinstance(X_trans, tuple):\n # cross-decompostion returns a tuple of (x_scores, y_scores)\n # when given y with fit_transform; only check the first element\n X_trans = X_trans[0]\n\n # check that the output dtype is preserved\n assert X_trans.dtype == dtype, (\n f'Estimator transform dtype: {X_trans.dtype} - '\n f'original/expected dtype: {dtype.__name__}'\n )" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_transformers_unfitted", + "name": "check_transformers_unfitted", + "qname": "sklearn.utils.estimator_checks.check_transformers_unfitted", + "decorators": ["ignore_warnings(category=FutureWarning)"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_transformers_unfitted/name", + "name": "name", + "qname": "sklearn.utils.estimator_checks.check_transformers_unfitted.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/check_transformers_unfitted/transformer", + "name": "transformer", + "qname": "sklearn.utils.estimator_checks.check_transformers_unfitted.transformer", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "@ignore_warnings(category=FutureWarning)\ndef check_transformers_unfitted(name, transformer):\n X, y = _regression_dataset()\n\n transformer = clone(transformer)\n with raises(\n (AttributeError, ValueError),\n err_msg=\"The unfitted \"\n f\"transformer {name} does not raise an error when \"\n \"transform is called. Perhaps use \"\n \"check_is_fitted in transform.\",\n ):\n transformer.transform(X)" + }, + { + "id": "scikit-learn/sklearn.utils.estimator_checks/parametrize_with_checks", + "name": "parametrize_with_checks", + "qname": "sklearn.utils.estimator_checks.parametrize_with_checks", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.estimator_checks/parametrize_with_checks/estimators", + "name": "estimators", + "qname": "sklearn.utils.estimator_checks.parametrize_with_checks.estimators", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "list of estimators instances", + "default_value": "", + "description": "Estimators to generated checks for.\n\n.. versionchanged:: 0.24\n Passing a class was deprecated in version 0.23, and support for\n classes was removed in 0.24. Pass an instance instead.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "list of estimators instances" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Pytest specific decorator for parametrizing estimator checks.\n\nThe `id` of each check is set to be a pprint version of the estimator\nand the name of the check with its keyword arguments.\nThis allows to use `pytest -k` to specify which tests to run::\n\n pytest test_check_estimators.py -k check_estimators_fit_returns_self", + "docstring": "Pytest specific decorator for parametrizing estimator checks.\n\nThe `id` of each check is set to be a pprint version of the estimator\nand the name of the check with its keyword arguments.\nThis allows to use `pytest -k` to specify which tests to run::\n\n pytest test_check_estimators.py -k check_estimators_fit_returns_self\n\nParameters\n----------\nestimators : list of estimators instances\n Estimators to generated checks for.\n\n .. versionchanged:: 0.24\n Passing a class was deprecated in version 0.23, and support for\n classes was removed in 0.24. Pass an instance instead.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ndecorator : `pytest.mark.parametrize`\n\nExamples\n--------\n>>> from sklearn.utils.estimator_checks import parametrize_with_checks\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.tree import DecisionTreeRegressor\n\n>>> @parametrize_with_checks([LogisticRegression(),\n... DecisionTreeRegressor()])\n... def test_sklearn_compatible_estimator(estimator, check):\n... check(estimator)", + "code": "def parametrize_with_checks(estimators):\n \"\"\"Pytest specific decorator for parametrizing estimator checks.\n\n The `id` of each check is set to be a pprint version of the estimator\n and the name of the check with its keyword arguments.\n This allows to use `pytest -k` to specify which tests to run::\n\n pytest test_check_estimators.py -k check_estimators_fit_returns_self\n\n Parameters\n ----------\n estimators : list of estimators instances\n Estimators to generated checks for.\n\n .. versionchanged:: 0.24\n Passing a class was deprecated in version 0.23, and support for\n classes was removed in 0.24. Pass an instance instead.\n\n .. versionadded:: 0.24\n\n Returns\n -------\n decorator : `pytest.mark.parametrize`\n\n Examples\n --------\n >>> from sklearn.utils.estimator_checks import parametrize_with_checks\n >>> from sklearn.linear_model import LogisticRegression\n >>> from sklearn.tree import DecisionTreeRegressor\n\n >>> @parametrize_with_checks([LogisticRegression(),\n ... DecisionTreeRegressor()])\n ... def test_sklearn_compatible_estimator(estimator, check):\n ... check(estimator)\n\n \"\"\"\n import pytest\n\n if any(isinstance(est, type) for est in estimators):\n msg = (\"Passing a class was deprecated in version 0.23 \"\n \"and isn't supported anymore from 0.24.\"\n \"Please pass an instance instead.\")\n raise TypeError(msg)\n\n def checks_generator():\n for estimator in estimators:\n name = type(estimator).__name__\n for check in _yield_all_checks(estimator):\n check = partial(check, name)\n yield _maybe_mark_xfail(estimator, check, pytest)\n\n return pytest.mark.parametrize(\"estimator, check\", checks_generator(),\n ids=_get_check_estimator_ids)" + }, + { + "id": "scikit-learn/sklearn.utils.extmath/_deterministic_vector_sign_flip", + "name": "_deterministic_vector_sign_flip", + "qname": "sklearn.utils.extmath._deterministic_vector_sign_flip", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.extmath/_deterministic_vector_sign_flip/u", + "name": "u", + "qname": "sklearn.utils.extmath._deterministic_vector_sign_flip.u", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "Array with vectors as its rows." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Modify the sign of vectors for reproducibility.\n\nFlips the sign of elements of all the vectors (rows of u) such that\nthe absolute maximum element of each vector is positive.", + "docstring": "Modify the sign of vectors for reproducibility.\n\nFlips the sign of elements of all the vectors (rows of u) such that\nthe absolute maximum element of each vector is positive.\n\nParameters\n----------\nu : ndarray\n Array with vectors as its rows.\n\nReturns\n-------\nu_flipped : ndarray with same shape as u\n Array with the sign flipped vectors as its rows.", + "code": "def _deterministic_vector_sign_flip(u):\n \"\"\"Modify the sign of vectors for reproducibility.\n\n Flips the sign of elements of all the vectors (rows of u) such that\n the absolute maximum element of each vector is positive.\n\n Parameters\n ----------\n u : ndarray\n Array with vectors as its rows.\n\n Returns\n -------\n u_flipped : ndarray with same shape as u\n Array with the sign flipped vectors as its rows.\n \"\"\"\n max_abs_rows = np.argmax(np.abs(u), axis=1)\n signs = np.sign(u[range(u.shape[0]), max_abs_rows])\n u *= signs[:, np.newaxis]\n return u" + }, + { + "id": "scikit-learn/sklearn.utils.extmath/_incremental_mean_and_var", + "name": "_incremental_mean_and_var", + "qname": "sklearn.utils.extmath._incremental_mean_and_var", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.extmath/_incremental_mean_and_var/X", + "name": "X", + "qname": "sklearn.utils.extmath._incremental_mean_and_var.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Data to use for variance update." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/_incremental_mean_and_var/last_mean", + "name": "last_mean", + "qname": "sklearn.utils.extmath._incremental_mean_and_var.last_mean", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_features,)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_features,)" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/_incremental_mean_and_var/last_variance", + "name": "last_variance", + "qname": "sklearn.utils.extmath._incremental_mean_and_var.last_variance", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_features,)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_features,)" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/_incremental_mean_and_var/last_sample_count", + "name": "last_sample_count", + "qname": "sklearn.utils.extmath._incremental_mean_and_var.last_sample_count", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_features,)", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_features,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Calculate mean update and a Youngs and Cramer variance update.\n\nlast_mean and last_variance are statistics computed at the last step by the\nfunction. Both must be initialized to 0.0. In case no scaling is required\nlast_variance can be None. The mean is always required and returned because\nnecessary for the calculation of the variance. last_n_samples_seen is the\nnumber of samples encountered until now.\n\nFrom the paper \"Algorithms for computing the sample variance: analysis and\nrecommendations\", by Chan, Golub, and LeVeque.", + "docstring": "Calculate mean update and a Youngs and Cramer variance update.\n\nlast_mean and last_variance are statistics computed at the last step by the\nfunction. Both must be initialized to 0.0. In case no scaling is required\nlast_variance can be None. The mean is always required and returned because\nnecessary for the calculation of the variance. last_n_samples_seen is the\nnumber of samples encountered until now.\n\nFrom the paper \"Algorithms for computing the sample variance: analysis and\nrecommendations\", by Chan, Golub, and LeVeque.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data to use for variance update.\n\nlast_mean : array-like of shape (n_features,)\n\nlast_variance : array-like of shape (n_features,)\n\nlast_sample_count : array-like of shape (n_features,)\n\nReturns\n-------\nupdated_mean : ndarray of shape (n_features,)\n\nupdated_variance : ndarray of shape (n_features,)\n If None, only mean is computed.\n\nupdated_sample_count : ndarray of shape (n_features,)\n\nNotes\n-----\nNaNs are ignored during the algorithm.\n\nReferences\n----------\nT. Chan, G. Golub, R. LeVeque. Algorithms for computing the sample\n variance: recommendations, The American Statistician, Vol. 37, No. 3,\n pp. 242-247\n\nAlso, see the sparse implementation of this in\n`utils.sparsefuncs.incr_mean_variance_axis` and\n`utils.sparsefuncs_fast.incr_mean_variance_axis0`", + "code": "def _incremental_mean_and_var(X, last_mean, last_variance, last_sample_count):\n \"\"\"Calculate mean update and a Youngs and Cramer variance update.\n\n last_mean and last_variance are statistics computed at the last step by the\n function. Both must be initialized to 0.0. In case no scaling is required\n last_variance can be None. The mean is always required and returned because\n necessary for the calculation of the variance. last_n_samples_seen is the\n number of samples encountered until now.\n\n From the paper \"Algorithms for computing the sample variance: analysis and\n recommendations\", by Chan, Golub, and LeVeque.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Data to use for variance update.\n\n last_mean : array-like of shape (n_features,)\n\n last_variance : array-like of shape (n_features,)\n\n last_sample_count : array-like of shape (n_features,)\n\n Returns\n -------\n updated_mean : ndarray of shape (n_features,)\n\n updated_variance : ndarray of shape (n_features,)\n If None, only mean is computed.\n\n updated_sample_count : ndarray of shape (n_features,)\n\n Notes\n -----\n NaNs are ignored during the algorithm.\n\n References\n ----------\n T. Chan, G. Golub, R. LeVeque. Algorithms for computing the sample\n variance: recommendations, The American Statistician, Vol. 37, No. 3,\n pp. 242-247\n\n Also, see the sparse implementation of this in\n `utils.sparsefuncs.incr_mean_variance_axis` and\n `utils.sparsefuncs_fast.incr_mean_variance_axis0`\n \"\"\"\n # old = stats until now\n # new = the current increment\n # updated = the aggregated stats\n last_sum = last_mean * last_sample_count\n new_sum = _safe_accumulator_op(np.nansum, X, axis=0)\n\n new_sample_count = np.sum(~np.isnan(X), axis=0)\n updated_sample_count = last_sample_count + new_sample_count\n\n updated_mean = (last_sum + new_sum) / updated_sample_count\n\n if last_variance is None:\n updated_variance = None\n else:\n new_unnormalized_variance = (\n _safe_accumulator_op(np.nanvar, X, axis=0) * new_sample_count)\n last_unnormalized_variance = last_variance * last_sample_count\n\n with np.errstate(divide='ignore', invalid='ignore'):\n last_over_new_count = last_sample_count / new_sample_count\n updated_unnormalized_variance = (\n last_unnormalized_variance + new_unnormalized_variance +\n last_over_new_count / updated_sample_count *\n (last_sum / last_over_new_count - new_sum) ** 2)\n\n zeros = last_sample_count == 0\n updated_unnormalized_variance[zeros] = new_unnormalized_variance[zeros]\n updated_variance = updated_unnormalized_variance / updated_sample_count\n\n return updated_mean, updated_variance, updated_sample_count" + }, + { + "id": "scikit-learn/sklearn.utils.extmath/_incremental_weighted_mean_and_var", + "name": "_incremental_weighted_mean_and_var", + "qname": "sklearn.utils.extmath._incremental_weighted_mean_and_var", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.extmath/_incremental_weighted_mean_and_var/X", + "name": "X", + "qname": "sklearn.utils.extmath._incremental_weighted_mean_and_var.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Data to use for mean and variance update." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/_incremental_weighted_mean_and_var/sample_weight", + "name": "sample_weight", + "qname": "sklearn.utils.extmath._incremental_weighted_mean_and_var.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,) or None", + "default_value": "", + "description": "Sample weights. If None, then samples are equally weighted." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/_incremental_weighted_mean_and_var/last_mean", + "name": "last_mean", + "qname": "sklearn.utils.extmath._incremental_weighted_mean_and_var.last_mean", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_features,)", + "default_value": "", + "description": "Mean before the incremental update." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_features,)" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/_incremental_weighted_mean_and_var/last_variance", + "name": "last_variance", + "qname": "sklearn.utils.extmath._incremental_weighted_mean_and_var.last_variance", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_features,) or None", + "default_value": "", + "description": "Variance before the incremental update.\nIf None, variance update is not computed (in case scaling is not\nrequired)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like of shape (n_features,)" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/_incremental_weighted_mean_and_var/last_weight_sum", + "name": "last_weight_sum", + "qname": "sklearn.utils.extmath._incremental_weighted_mean_and_var.last_weight_sum", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_features,)", + "default_value": "", + "description": "Sum of weights before the incremental update." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_features,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Calculate weighted mean and weighted variance incremental update.\n\n.. versionadded:: 0.24", + "docstring": "Calculate weighted mean and weighted variance incremental update.\n\n.. versionadded:: 0.24\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data to use for mean and variance update.\n\nsample_weight : array-like of shape (n_samples,) or None\n Sample weights. If None, then samples are equally weighted.\n\nlast_mean : array-like of shape (n_features,)\n Mean before the incremental update.\n\nlast_variance : array-like of shape (n_features,) or None\n Variance before the incremental update.\n If None, variance update is not computed (in case scaling is not\n required).\n\nlast_weight_sum : array-like of shape (n_features,)\n Sum of weights before the incremental update.\n\nReturns\n-------\nupdated_mean : array of shape (n_features,)\n\nupdated_variance : array of shape (n_features,) or None\n If None, only mean is computed.\n\nupdated_weight_sum : array of shape (n_features,)\n\nNotes\n-----\nNaNs in `X` are ignored.\n\n`last_mean` and `last_variance` are statistics computed at the last step\nby the function. Both must be initialized to 0.0.\nThe mean is always required (`last_mean`) and returned (`updated_mean`),\nwhereas the variance can be None (`last_variance` and `updated_variance`).\n\nFor further details on the algorithm to perform the computation in a\nnumerically stable way, see [Finch2009]_, Sections 4 and 5.\n\nReferences\n----------\n.. [Finch2009] `Tony Finch,\n \"Incremental calculation of weighted mean and variance\",\n University of Cambridge Computing Service, February 2009.\n `_", + "code": "def _incremental_weighted_mean_and_var(X, sample_weight,\n last_mean,\n last_variance,\n last_weight_sum):\n \"\"\"Calculate weighted mean and weighted variance incremental update.\n\n .. versionadded:: 0.24\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Data to use for mean and variance update.\n\n sample_weight : array-like of shape (n_samples,) or None\n Sample weights. If None, then samples are equally weighted.\n\n last_mean : array-like of shape (n_features,)\n Mean before the incremental update.\n\n last_variance : array-like of shape (n_features,) or None\n Variance before the incremental update.\n If None, variance update is not computed (in case scaling is not\n required).\n\n last_weight_sum : array-like of shape (n_features,)\n Sum of weights before the incremental update.\n\n Returns\n -------\n updated_mean : array of shape (n_features,)\n\n updated_variance : array of shape (n_features,) or None\n If None, only mean is computed.\n\n updated_weight_sum : array of shape (n_features,)\n\n Notes\n -----\n NaNs in `X` are ignored.\n\n `last_mean` and `last_variance` are statistics computed at the last step\n by the function. Both must be initialized to 0.0.\n The mean is always required (`last_mean`) and returned (`updated_mean`),\n whereas the variance can be None (`last_variance` and `updated_variance`).\n\n For further details on the algorithm to perform the computation in a\n numerically stable way, see [Finch2009]_, Sections 4 and 5.\n\n References\n ----------\n .. [Finch2009] `Tony Finch,\n \"Incremental calculation of weighted mean and variance\",\n University of Cambridge Computing Service, February 2009.\n `_\n\n \"\"\"\n # last = stats before the increment\n # new = the current increment\n # updated = the aggregated stats\n if sample_weight is None:\n return _incremental_mean_and_var(X, last_mean, last_variance,\n last_weight_sum)\n nan_mask = np.isnan(X)\n sample_weight_T = np.reshape(sample_weight, (1, -1))\n # new_weight_sum with shape (n_features,)\n new_weight_sum = np.dot(sample_weight_T,\n ~nan_mask).ravel().astype(np.float64)\n total_weight_sum = _safe_accumulator_op(np.sum, sample_weight, axis=0)\n\n X_0 = np.where(nan_mask, 0, X)\n new_mean = np.average(X_0,\n weights=sample_weight, axis=0).astype(np.float64)\n new_mean *= total_weight_sum / new_weight_sum\n updated_weight_sum = last_weight_sum + new_weight_sum\n updated_mean = (\n (last_weight_sum * last_mean + new_weight_sum * new_mean)\n / updated_weight_sum)\n\n if last_variance is None:\n updated_variance = None\n else:\n X_0 = np.where(nan_mask, 0, (X-new_mean)**2)\n new_variance =\\\n _safe_accumulator_op(\n np.average, X_0, weights=sample_weight, axis=0)\n new_variance *= total_weight_sum / new_weight_sum\n new_term = (\n new_weight_sum *\n (new_variance +\n (new_mean - updated_mean) ** 2))\n last_term = (\n last_weight_sum *\n (last_variance +\n (last_mean - updated_mean) ** 2))\n updated_variance = (new_term + last_term) / updated_weight_sum\n\n return updated_mean, updated_variance, updated_weight_sum" + }, + { + "id": "scikit-learn/sklearn.utils.extmath/_safe_accumulator_op", + "name": "_safe_accumulator_op", + "qname": "sklearn.utils.extmath._safe_accumulator_op", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.extmath/_safe_accumulator_op/op", + "name": "op", + "qname": "sklearn.utils.extmath._safe_accumulator_op.op", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "function", + "default_value": "", + "description": "A numpy accumulator function such as np.mean or np.sum." + }, + "type": { + "kind": "NamedType", + "name": "function" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/_safe_accumulator_op/x", + "name": "x", + "qname": "sklearn.utils.extmath._safe_accumulator_op.x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "A numpy array to apply the accumulator function." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/_safe_accumulator_op/args", + "name": "args", + "qname": "sklearn.utils.extmath._safe_accumulator_op.args", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "positional arguments", + "default_value": "", + "description": "Positional arguments passed to the accumulator function after the\ninput x." + }, + "type": { + "kind": "NamedType", + "name": "positional arguments" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/_safe_accumulator_op/kwargs", + "name": "kwargs", + "qname": "sklearn.utils.extmath._safe_accumulator_op.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "keyword arguments", + "default_value": "", + "description": "Keyword arguments passed to the accumulator function." + }, + "type": { + "kind": "NamedType", + "name": "keyword arguments" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "This function provides numpy accumulator functions with a float64 dtype\nwhen used on a floating point input. This prevents accumulator overflow on\nsmaller floating point dtypes.", + "docstring": "This function provides numpy accumulator functions with a float64 dtype\nwhen used on a floating point input. This prevents accumulator overflow on\nsmaller floating point dtypes.\n\nParameters\n----------\nop : function\n A numpy accumulator function such as np.mean or np.sum.\nx : ndarray\n A numpy array to apply the accumulator function.\n*args : positional arguments\n Positional arguments passed to the accumulator function after the\n input x.\n**kwargs : keyword arguments\n Keyword arguments passed to the accumulator function.\n\nReturns\n-------\nresult\n The output of the accumulator function passed to this function.", + "code": "def _safe_accumulator_op(op, x, *args, **kwargs):\n \"\"\"\n This function provides numpy accumulator functions with a float64 dtype\n when used on a floating point input. This prevents accumulator overflow on\n smaller floating point dtypes.\n\n Parameters\n ----------\n op : function\n A numpy accumulator function such as np.mean or np.sum.\n x : ndarray\n A numpy array to apply the accumulator function.\n *args : positional arguments\n Positional arguments passed to the accumulator function after the\n input x.\n **kwargs : keyword arguments\n Keyword arguments passed to the accumulator function.\n\n Returns\n -------\n result\n The output of the accumulator function passed to this function.\n \"\"\"\n if np.issubdtype(x.dtype, np.floating) and x.dtype.itemsize < 8:\n result = op(x, *args, **kwargs, dtype=np.float64)\n else:\n result = op(x, *args, **kwargs)\n return result" + }, + { + "id": "scikit-learn/sklearn.utils.extmath/cartesian", + "name": "cartesian", + "qname": "sklearn.utils.extmath.cartesian", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.extmath/cartesian/arrays", + "name": "arrays", + "qname": "sklearn.utils.extmath.cartesian.arrays", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "list of array-like", + "default_value": "", + "description": "1-D arrays to form the cartesian product of." + }, + "type": { + "kind": "NamedType", + "name": "list of array-like" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/cartesian/out", + "name": "out", + "qname": "sklearn.utils.extmath.cartesian.out", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray", + "default_value": "None", + "description": "Array to place the cartesian product in." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Generate a cartesian product of input arrays.", + "docstring": "Generate a cartesian product of input arrays.\n\nParameters\n----------\narrays : list of array-like\n 1-D arrays to form the cartesian product of.\nout : ndarray, default=None\n Array to place the cartesian product in.\n\nReturns\n-------\nout : ndarray\n 2-D array of shape (M, len(arrays)) containing cartesian products\n formed of input arrays.\n\nExamples\n--------\n>>> cartesian(([1, 2, 3], [4, 5], [6, 7]))\narray([[1, 4, 6],\n [1, 4, 7],\n [1, 5, 6],\n [1, 5, 7],\n [2, 4, 6],\n [2, 4, 7],\n [2, 5, 6],\n [2, 5, 7],\n [3, 4, 6],\n [3, 4, 7],\n [3, 5, 6],\n [3, 5, 7]])\n\nNotes\n-----\nThis function may not be used on more than 32 arrays\nbecause the underlying numpy functions do not support it.", + "code": "def cartesian(arrays, out=None):\n \"\"\"Generate a cartesian product of input arrays.\n\n Parameters\n ----------\n arrays : list of array-like\n 1-D arrays to form the cartesian product of.\n out : ndarray, default=None\n Array to place the cartesian product in.\n\n Returns\n -------\n out : ndarray\n 2-D array of shape (M, len(arrays)) containing cartesian products\n formed of input arrays.\n\n Examples\n --------\n >>> cartesian(([1, 2, 3], [4, 5], [6, 7]))\n array([[1, 4, 6],\n [1, 4, 7],\n [1, 5, 6],\n [1, 5, 7],\n [2, 4, 6],\n [2, 4, 7],\n [2, 5, 6],\n [2, 5, 7],\n [3, 4, 6],\n [3, 4, 7],\n [3, 5, 6],\n [3, 5, 7]])\n\n Notes\n -----\n This function may not be used on more than 32 arrays\n because the underlying numpy functions do not support it.\n \"\"\"\n arrays = [np.asarray(x) for x in arrays]\n shape = (len(x) for x in arrays)\n dtype = arrays[0].dtype\n\n ix = np.indices(shape)\n ix = ix.reshape(len(arrays), -1).T\n\n if out is None:\n out = np.empty_like(ix, dtype=dtype)\n\n for n, arr in enumerate(arrays):\n out[:, n] = arrays[n][ix[:, n]]\n\n return out" + }, + { + "id": "scikit-learn/sklearn.utils.extmath/density", + "name": "density", + "qname": "sklearn.utils.extmath.density", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.extmath/density/w", + "name": "w", + "qname": "sklearn.utils.extmath.density.w", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "The sparse vector." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/density/kwargs", + "name": "kwargs", + "qname": "sklearn.utils.extmath.density.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Compute density of a sparse vector.", + "docstring": "Compute density of a sparse vector.\n\nParameters\n----------\nw : array-like\n The sparse vector.\n\nReturns\n-------\nfloat\n The density of w, between 0 and 1.", + "code": "def density(w, **kwargs):\n \"\"\"Compute density of a sparse vector.\n\n Parameters\n ----------\n w : array-like\n The sparse vector.\n\n Returns\n -------\n float\n The density of w, between 0 and 1.\n \"\"\"\n if hasattr(w, \"toarray\"):\n d = float(w.nnz) / (w.shape[0] * w.shape[1])\n else:\n d = 0 if w is None else float((w != 0).sum()) / w.size\n return d" + }, + { + "id": "scikit-learn/sklearn.utils.extmath/fast_logdet", + "name": "fast_logdet", + "qname": "sklearn.utils.extmath.fast_logdet", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.extmath/fast_logdet/A", + "name": "A", + "qname": "sklearn.utils.extmath.fast_logdet.A", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "The matrix." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Compute log(det(A)) for A symmetric.\n\nEquivalent to : np.log(nl.det(A)) but more robust.\nIt returns -Inf if det(A) is non positive or is not defined.", + "docstring": "Compute log(det(A)) for A symmetric.\n\nEquivalent to : np.log(nl.det(A)) but more robust.\nIt returns -Inf if det(A) is non positive or is not defined.\n\nParameters\n----------\nA : array-like\n The matrix.", + "code": "def fast_logdet(A):\n \"\"\"Compute log(det(A)) for A symmetric.\n\n Equivalent to : np.log(nl.det(A)) but more robust.\n It returns -Inf if det(A) is non positive or is not defined.\n\n Parameters\n ----------\n A : array-like\n The matrix.\n \"\"\"\n sign, ld = np.linalg.slogdet(A)\n if not sign > 0:\n return -np.inf\n return ld" + }, + { + "id": "scikit-learn/sklearn.utils.extmath/log_logistic", + "name": "log_logistic", + "qname": "sklearn.utils.extmath.log_logistic", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.extmath/log_logistic/X", + "name": "X", + "qname": "sklearn.utils.extmath.log_logistic.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (M, N) or (M,)", + "default_value": "", + "description": "Argument to the logistic function." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (M, N) or (M,)" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/log_logistic/out", + "name": "out", + "qname": "sklearn.utils.extmath.log_logistic.out", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (M, N) or (M,)", + "default_value": "None", + "description": "Preallocated output array." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (M, N) or (M,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Compute the log of the logistic function, ``log(1 / (1 + e ** -x))``.\n\nThis implementation is numerically stable because it splits positive and\nnegative values::\n\n -log(1 + exp(-x_i)) if x_i > 0\n x_i - log(1 + exp(x_i)) if x_i <= 0\n\nFor the ordinary logistic function, use ``scipy.special.expit``.", + "docstring": "Compute the log of the logistic function, ``log(1 / (1 + e ** -x))``.\n\nThis implementation is numerically stable because it splits positive and\nnegative values::\n\n -log(1 + exp(-x_i)) if x_i > 0\n x_i - log(1 + exp(x_i)) if x_i <= 0\n\nFor the ordinary logistic function, use ``scipy.special.expit``.\n\nParameters\n----------\nX : array-like of shape (M, N) or (M,)\n Argument to the logistic function.\n\nout : array-like of shape (M, N) or (M,), default=None\n Preallocated output array.\n\nReturns\n-------\nout : ndarray of shape (M, N) or (M,)\n Log of the logistic function evaluated at every point in x.\n\nNotes\n-----\nSee the blog post describing this implementation:\nhttp://fa.bianp.net/blog/2013/numerical-optimizers-for-logistic-regression/", + "code": "def log_logistic(X, out=None):\n \"\"\"Compute the log of the logistic function, ``log(1 / (1 + e ** -x))``.\n\n This implementation is numerically stable because it splits positive and\n negative values::\n\n -log(1 + exp(-x_i)) if x_i > 0\n x_i - log(1 + exp(x_i)) if x_i <= 0\n\n For the ordinary logistic function, use ``scipy.special.expit``.\n\n Parameters\n ----------\n X : array-like of shape (M, N) or (M,)\n Argument to the logistic function.\n\n out : array-like of shape (M, N) or (M,), default=None\n Preallocated output array.\n\n Returns\n -------\n out : ndarray of shape (M, N) or (M,)\n Log of the logistic function evaluated at every point in x.\n\n Notes\n -----\n See the blog post describing this implementation:\n http://fa.bianp.net/blog/2013/numerical-optimizers-for-logistic-regression/\n \"\"\"\n is_1d = X.ndim == 1\n X = np.atleast_2d(X)\n X = check_array(X, dtype=np.float64)\n\n n_samples, n_features = X.shape\n\n if out is None:\n out = np.empty_like(X)\n\n _log_logistic_sigmoid(n_samples, n_features, X, out)\n\n if is_1d:\n return np.squeeze(out)\n return out" + }, + { + "id": "scikit-learn/sklearn.utils.extmath/make_nonnegative", + "name": "make_nonnegative", + "qname": "sklearn.utils.extmath.make_nonnegative", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.extmath/make_nonnegative/X", + "name": "X", + "qname": "sklearn.utils.extmath.make_nonnegative.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "The matrix to make non-negative." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/make_nonnegative/min_value", + "name": "min_value", + "qname": "sklearn.utils.extmath.make_nonnegative.min_value", + "default_value": "0", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "0", + "description": "The threshold value." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Ensure `X.min()` >= `min_value`.", + "docstring": "Ensure `X.min()` >= `min_value`.\n\nParameters\n----------\nX : array-like\n The matrix to make non-negative.\nmin_value : float, default=0\n The threshold value.\n\nReturns\n-------\narray-like\n The thresholded array.\n\nRaises\n------\nValueError\n When X is sparse.", + "code": "def make_nonnegative(X, min_value=0):\n \"\"\"Ensure `X.min()` >= `min_value`.\n\n Parameters\n ----------\n X : array-like\n The matrix to make non-negative.\n min_value : float, default=0\n The threshold value.\n\n Returns\n -------\n array-like\n The thresholded array.\n\n Raises\n ------\n ValueError\n When X is sparse.\n \"\"\"\n min_ = X.min()\n if min_ < min_value:\n if sparse.issparse(X):\n raise ValueError(\"Cannot make the data matrix\"\n \" nonnegative because it is sparse.\"\n \" Adding a value to every entry would\"\n \" make it no longer sparse.\")\n X = X + (min_value - min_)\n return X" + }, + { + "id": "scikit-learn/sklearn.utils.extmath/randomized_range_finder", + "name": "randomized_range_finder", + "qname": "sklearn.utils.extmath.randomized_range_finder", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.extmath/randomized_range_finder/A", + "name": "A", + "qname": "sklearn.utils.extmath.randomized_range_finder.A", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "2D array", + "default_value": "", + "description": "The input data matrix." + }, + "type": { + "kind": "NamedType", + "name": "2D array" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/randomized_range_finder/size", + "name": "size", + "qname": "sklearn.utils.extmath.randomized_range_finder.size", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "", + "description": "Size of the return array." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/randomized_range_finder/n_iter", + "name": "n_iter", + "qname": "sklearn.utils.extmath.randomized_range_finder.n_iter", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of power iterations used to stabilize the result." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/randomized_range_finder/power_iteration_normalizer", + "name": "power_iteration_normalizer", + "qname": "sklearn.utils.extmath.randomized_range_finder.power_iteration_normalizer", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'auto', 'QR', 'LU', 'none'}", + "default_value": "'auto'", + "description": "Whether the power iterations are normalized with step-by-step\nQR factorization (the slowest but most accurate), 'none'\n(the fastest but numerically unstable when `n_iter` is large, e.g.\ntypically 5 or larger), or 'LU' factorization (numerically stable\nbut can lose slightly in accuracy). The 'auto' mode applies no\nnormalization if `n_iter` <= 2 and switches to LU otherwise.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "EnumType", + "values": ["LU", "auto", "QR", "none"] + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/randomized_range_finder/random_state", + "name": "random_state", + "qname": "sklearn.utils.extmath.randomized_range_finder.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "The seed of the pseudo random number generator to use when shuffling\nthe data, i.e. getting the random vectors to initialize the algorithm.\nPass an int for reproducible results across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Computes an orthonormal matrix whose range approximates the range of A.", + "docstring": "Computes an orthonormal matrix whose range approximates the range of A.\n\nParameters\n----------\nA : 2D array\n The input data matrix.\n\nsize : int\n Size of the return array.\n\nn_iter : int\n Number of power iterations used to stabilize the result.\n\npower_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'\n Whether the power iterations are normalized with step-by-step\n QR factorization (the slowest but most accurate), 'none'\n (the fastest but numerically unstable when `n_iter` is large, e.g.\n typically 5 or larger), or 'LU' factorization (numerically stable\n but can lose slightly in accuracy). The 'auto' mode applies no\n normalization if `n_iter` <= 2 and switches to LU otherwise.\n\n .. versionadded:: 0.18\n\nrandom_state : int, RandomState instance or None, default=None\n The seed of the pseudo random number generator to use when shuffling\n the data, i.e. getting the random vectors to initialize the algorithm.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nQ : ndarray\n A (size x size) projection matrix, the range of which\n approximates well the range of the input matrix A.\n\nNotes\n-----\n\nFollows Algorithm 4.3 of\nFinding structure with randomness: Stochastic algorithms for constructing\napproximate matrix decompositions\nHalko, et al., 2009 (arXiv:909) https://arxiv.org/pdf/0909.4061.pdf\n\nAn implementation of a randomized algorithm for principal component\nanalysis\nA. Szlam et al. 2014", + "code": "@_deprecate_positional_args\ndef randomized_range_finder(A, *, size, n_iter,\n power_iteration_normalizer='auto',\n random_state=None):\n \"\"\"Computes an orthonormal matrix whose range approximates the range of A.\n\n Parameters\n ----------\n A : 2D array\n The input data matrix.\n\n size : int\n Size of the return array.\n\n n_iter : int\n Number of power iterations used to stabilize the result.\n\n power_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'\n Whether the power iterations are normalized with step-by-step\n QR factorization (the slowest but most accurate), 'none'\n (the fastest but numerically unstable when `n_iter` is large, e.g.\n typically 5 or larger), or 'LU' factorization (numerically stable\n but can lose slightly in accuracy). The 'auto' mode applies no\n normalization if `n_iter` <= 2 and switches to LU otherwise.\n\n .. versionadded:: 0.18\n\n random_state : int, RandomState instance or None, default=None\n The seed of the pseudo random number generator to use when shuffling\n the data, i.e. getting the random vectors to initialize the algorithm.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\n Returns\n -------\n Q : ndarray\n A (size x size) projection matrix, the range of which\n approximates well the range of the input matrix A.\n\n Notes\n -----\n\n Follows Algorithm 4.3 of\n Finding structure with randomness: Stochastic algorithms for constructing\n approximate matrix decompositions\n Halko, et al., 2009 (arXiv:909) https://arxiv.org/pdf/0909.4061.pdf\n\n An implementation of a randomized algorithm for principal component\n analysis\n A. Szlam et al. 2014\n \"\"\"\n random_state = check_random_state(random_state)\n\n # Generating normal random vectors with shape: (A.shape[1], size)\n Q = random_state.normal(size=(A.shape[1], size))\n if A.dtype.kind == 'f':\n # Ensure f32 is preserved as f32\n Q = Q.astype(A.dtype, copy=False)\n\n # Deal with \"auto\" mode\n if power_iteration_normalizer == 'auto':\n if n_iter <= 2:\n power_iteration_normalizer = 'none'\n else:\n power_iteration_normalizer = 'LU'\n\n # Perform power iterations with Q to further 'imprint' the top\n # singular vectors of A in Q\n for i in range(n_iter):\n if power_iteration_normalizer == 'none':\n Q = safe_sparse_dot(A, Q)\n Q = safe_sparse_dot(A.T, Q)\n elif power_iteration_normalizer == 'LU':\n Q, _ = linalg.lu(safe_sparse_dot(A, Q), permute_l=True)\n Q, _ = linalg.lu(safe_sparse_dot(A.T, Q), permute_l=True)\n elif power_iteration_normalizer == 'QR':\n Q, _ = linalg.qr(safe_sparse_dot(A, Q), mode='economic')\n Q, _ = linalg.qr(safe_sparse_dot(A.T, Q), mode='economic')\n\n # Sample the range of A using by linear projection of Q\n # Extract an orthonormal basis\n Q, _ = linalg.qr(safe_sparse_dot(A, Q), mode='economic')\n return Q" + }, + { + "id": "scikit-learn/sklearn.utils.extmath/randomized_svd", + "name": "randomized_svd", + "qname": "sklearn.utils.extmath.randomized_svd", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.extmath/randomized_svd/M", + "name": "M", + "qname": "sklearn.utils.extmath.randomized_svd.M", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{ndarray, sparse matrix}", + "default_value": "", + "description": "Matrix to decompose." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/randomized_svd/n_components", + "name": "n_components", + "qname": "sklearn.utils.extmath.randomized_svd.n_components", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of singular values and vectors to extract." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/randomized_svd/n_oversamples", + "name": "n_oversamples", + "qname": "sklearn.utils.extmath.randomized_svd.n_oversamples", + "default_value": "10", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "10", + "description": "Additional number of random vectors to sample the range of M so as\nto ensure proper conditioning. The total number of random vectors\nused to find the range of M is n_components + n_oversamples. Smaller\nnumber can improve speed but can negatively impact the quality of\napproximation of singular vectors and singular values." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/randomized_svd/n_iter", + "name": "n_iter", + "qname": "sklearn.utils.extmath.randomized_svd.n_iter", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int or 'auto'", + "default_value": "'auto'", + "description": "Number of power iterations. It can be used to deal with very noisy\nproblems. When 'auto', it is set to 4, unless `n_components` is small\n(< .1 * min(X.shape)) `n_iter` in which case is set to 7.\nThis improves precision with few components.\n\n.. versionchanged:: 0.18" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "'auto'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/randomized_svd/power_iteration_normalizer", + "name": "power_iteration_normalizer", + "qname": "sklearn.utils.extmath.randomized_svd.power_iteration_normalizer", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'auto', 'QR', 'LU', 'none'}", + "default_value": "'auto'", + "description": "Whether the power iterations are normalized with step-by-step\nQR factorization (the slowest but most accurate), 'none'\n(the fastest but numerically unstable when `n_iter` is large, e.g.\ntypically 5 or larger), or 'LU' factorization (numerically stable\nbut can lose slightly in accuracy). The 'auto' mode applies no\nnormalization if `n_iter` <= 2 and switches to LU otherwise.\n\n.. versionadded:: 0.18" + }, + "type": { + "kind": "EnumType", + "values": ["LU", "auto", "QR", "none"] + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/randomized_svd/transpose", + "name": "transpose", + "qname": "sklearn.utils.extmath.randomized_svd.transpose", + "default_value": "'auto'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool or 'auto'", + "default_value": "'auto'", + "description": "Whether the algorithm should be applied to M.T instead of M. The\nresult should approximately be the same. The 'auto' mode will\ntrigger the transposition if M.shape[1] > M.shape[0] since this\nimplementation of randomized SVD tend to be a little faster in that\ncase.\n\n.. versionchanged:: 0.18" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "'auto'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/randomized_svd/flip_sign", + "name": "flip_sign", + "qname": "sklearn.utils.extmath.randomized_svd.flip_sign", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "The output of a singular value decomposition is only unique up to a\npermutation of the signs of the singular vectors. If `flip_sign` is\nset to `True`, the sign ambiguity is resolved by making the largest\nloadings for each component in the left singular vectors positive." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/randomized_svd/random_state", + "name": "random_state", + "qname": "sklearn.utils.extmath.randomized_svd.random_state", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "0", + "description": "The seed of the pseudo random number generator to use when shuffling\nthe data, i.e. getting the random vectors to initialize the algorithm.\nPass an int for reproducible results across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Computes a truncated randomized SVD.", + "docstring": "Computes a truncated randomized SVD.\n\nParameters\n----------\nM : {ndarray, sparse matrix}\n Matrix to decompose.\n\nn_components : int\n Number of singular values and vectors to extract.\n\nn_oversamples : int, default=10\n Additional number of random vectors to sample the range of M so as\n to ensure proper conditioning. The total number of random vectors\n used to find the range of M is n_components + n_oversamples. Smaller\n number can improve speed but can negatively impact the quality of\n approximation of singular vectors and singular values.\n\nn_iter : int or 'auto', default='auto'\n Number of power iterations. It can be used to deal with very noisy\n problems. When 'auto', it is set to 4, unless `n_components` is small\n (< .1 * min(X.shape)) `n_iter` in which case is set to 7.\n This improves precision with few components.\n\n .. versionchanged:: 0.18\n\npower_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'\n Whether the power iterations are normalized with step-by-step\n QR factorization (the slowest but most accurate), 'none'\n (the fastest but numerically unstable when `n_iter` is large, e.g.\n typically 5 or larger), or 'LU' factorization (numerically stable\n but can lose slightly in accuracy). The 'auto' mode applies no\n normalization if `n_iter` <= 2 and switches to LU otherwise.\n\n .. versionadded:: 0.18\n\ntranspose : bool or 'auto', default='auto'\n Whether the algorithm should be applied to M.T instead of M. The\n result should approximately be the same. The 'auto' mode will\n trigger the transposition if M.shape[1] > M.shape[0] since this\n implementation of randomized SVD tend to be a little faster in that\n case.\n\n .. versionchanged:: 0.18\n\nflip_sign : bool, default=True\n The output of a singular value decomposition is only unique up to a\n permutation of the signs of the singular vectors. If `flip_sign` is\n set to `True`, the sign ambiguity is resolved by making the largest\n loadings for each component in the left singular vectors positive.\n\nrandom_state : int, RandomState instance or None, default=0\n The seed of the pseudo random number generator to use when shuffling\n the data, i.e. getting the random vectors to initialize the algorithm.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nNotes\n-----\nThis algorithm finds a (usually very good) approximate truncated\nsingular value decomposition using randomization to speed up the\ncomputations. It is particularly fast on large matrices on which\nyou wish to extract only a small number of components. In order to\nobtain further speed up, `n_iter` can be set <=2 (at the cost of\nloss of precision).\n\nReferences\n----------\n* Finding structure with randomness: Stochastic algorithms for constructing\n approximate matrix decompositions\n Halko, et al., 2009 https://arxiv.org/abs/0909.4061\n\n* A randomized algorithm for the decomposition of matrices\n Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert\n\n* An implementation of a randomized algorithm for principal component\n analysis\n A. Szlam et al. 2014", + "code": "@_deprecate_positional_args\ndef randomized_svd(M, n_components, *, n_oversamples=10, n_iter='auto',\n power_iteration_normalizer='auto', transpose='auto',\n flip_sign=True, random_state=0):\n \"\"\"Computes a truncated randomized SVD.\n\n Parameters\n ----------\n M : {ndarray, sparse matrix}\n Matrix to decompose.\n\n n_components : int\n Number of singular values and vectors to extract.\n\n n_oversamples : int, default=10\n Additional number of random vectors to sample the range of M so as\n to ensure proper conditioning. The total number of random vectors\n used to find the range of M is n_components + n_oversamples. Smaller\n number can improve speed but can negatively impact the quality of\n approximation of singular vectors and singular values.\n\n n_iter : int or 'auto', default='auto'\n Number of power iterations. It can be used to deal with very noisy\n problems. When 'auto', it is set to 4, unless `n_components` is small\n (< .1 * min(X.shape)) `n_iter` in which case is set to 7.\n This improves precision with few components.\n\n .. versionchanged:: 0.18\n\n power_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'\n Whether the power iterations are normalized with step-by-step\n QR factorization (the slowest but most accurate), 'none'\n (the fastest but numerically unstable when `n_iter` is large, e.g.\n typically 5 or larger), or 'LU' factorization (numerically stable\n but can lose slightly in accuracy). The 'auto' mode applies no\n normalization if `n_iter` <= 2 and switches to LU otherwise.\n\n .. versionadded:: 0.18\n\n transpose : bool or 'auto', default='auto'\n Whether the algorithm should be applied to M.T instead of M. The\n result should approximately be the same. The 'auto' mode will\n trigger the transposition if M.shape[1] > M.shape[0] since this\n implementation of randomized SVD tend to be a little faster in that\n case.\n\n .. versionchanged:: 0.18\n\n flip_sign : bool, default=True\n The output of a singular value decomposition is only unique up to a\n permutation of the signs of the singular vectors. If `flip_sign` is\n set to `True`, the sign ambiguity is resolved by making the largest\n loadings for each component in the left singular vectors positive.\n\n random_state : int, RandomState instance or None, default=0\n The seed of the pseudo random number generator to use when shuffling\n the data, i.e. getting the random vectors to initialize the algorithm.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\n Notes\n -----\n This algorithm finds a (usually very good) approximate truncated\n singular value decomposition using randomization to speed up the\n computations. It is particularly fast on large matrices on which\n you wish to extract only a small number of components. In order to\n obtain further speed up, `n_iter` can be set <=2 (at the cost of\n loss of precision).\n\n References\n ----------\n * Finding structure with randomness: Stochastic algorithms for constructing\n approximate matrix decompositions\n Halko, et al., 2009 https://arxiv.org/abs/0909.4061\n\n * A randomized algorithm for the decomposition of matrices\n Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert\n\n * An implementation of a randomized algorithm for principal component\n analysis\n A. Szlam et al. 2014\n \"\"\"\n if isinstance(M, (sparse.lil_matrix, sparse.dok_matrix)):\n warnings.warn(\"Calculating SVD of a {} is expensive. \"\n \"csr_matrix is more efficient.\".format(\n type(M).__name__),\n sparse.SparseEfficiencyWarning)\n\n random_state = check_random_state(random_state)\n n_random = n_components + n_oversamples\n n_samples, n_features = M.shape\n\n if n_iter == 'auto':\n # Checks if the number of iterations is explicitly specified\n # Adjust n_iter. 7 was found a good compromise for PCA. See #5299\n n_iter = 7 if n_components < .1 * min(M.shape) else 4\n\n if transpose == 'auto':\n transpose = n_samples < n_features\n if transpose:\n # this implementation is a bit faster with smaller shape[1]\n M = M.T\n\n Q = randomized_range_finder(\n M, size=n_random, n_iter=n_iter,\n power_iteration_normalizer=power_iteration_normalizer,\n random_state=random_state)\n\n # project M to the (k + p) dimensional space using the basis vectors\n B = safe_sparse_dot(Q.T, M)\n\n # compute the SVD on the thin matrix: (k + p) wide\n Uhat, s, Vt = linalg.svd(B, full_matrices=False)\n\n del B\n U = np.dot(Q, Uhat)\n\n if flip_sign:\n if not transpose:\n U, Vt = svd_flip(U, Vt)\n else:\n # In case of transpose u_based_decision=false\n # to actually flip based on u and not v.\n U, Vt = svd_flip(U, Vt, u_based_decision=False)\n\n if transpose:\n # transpose back the results according to the input convention\n return Vt[:n_components, :].T, s[:n_components], U[:, :n_components].T\n else:\n return U[:, :n_components], s[:n_components], Vt[:n_components, :]" + }, + { + "id": "scikit-learn/sklearn.utils.extmath/row_norms", + "name": "row_norms", + "qname": "sklearn.utils.extmath.row_norms", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.extmath/row_norms/X", + "name": "X", + "qname": "sklearn.utils.extmath.row_norms.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "The input array." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/row_norms/squared", + "name": "squared", + "qname": "sklearn.utils.extmath.row_norms.squared", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, return squared norms." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Row-wise (squared) Euclidean norm of X.\n\nEquivalent to np.sqrt((X * X).sum(axis=1)), but also supports sparse\nmatrices and does not create an X.shape-sized temporary.\n\nPerforms no input validation.", + "docstring": "Row-wise (squared) Euclidean norm of X.\n\nEquivalent to np.sqrt((X * X).sum(axis=1)), but also supports sparse\nmatrices and does not create an X.shape-sized temporary.\n\nPerforms no input validation.\n\nParameters\n----------\nX : array-like\n The input array.\nsquared : bool, default=False\n If True, return squared norms.\n\nReturns\n-------\narray-like\n The row-wise (squared) Euclidean norm of X.", + "code": "def row_norms(X, squared=False):\n \"\"\"Row-wise (squared) Euclidean norm of X.\n\n Equivalent to np.sqrt((X * X).sum(axis=1)), but also supports sparse\n matrices and does not create an X.shape-sized temporary.\n\n Performs no input validation.\n\n Parameters\n ----------\n X : array-like\n The input array.\n squared : bool, default=False\n If True, return squared norms.\n\n Returns\n -------\n array-like\n The row-wise (squared) Euclidean norm of X.\n \"\"\"\n if sparse.issparse(X):\n if not isinstance(X, sparse.csr_matrix):\n X = sparse.csr_matrix(X)\n norms = csr_row_norms(X)\n else:\n norms = np.einsum('ij,ij->i', X, X)\n\n if not squared:\n np.sqrt(norms, norms)\n return norms" + }, + { + "id": "scikit-learn/sklearn.utils.extmath/safe_sparse_dot", + "name": "safe_sparse_dot", + "qname": "sklearn.utils.extmath.safe_sparse_dot", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.extmath/safe_sparse_dot/a", + "name": "a", + "qname": "sklearn.utils.extmath.safe_sparse_dot.a", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{ndarray, sparse matrix}", + "default_value": "", + "description": "" + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/safe_sparse_dot/b", + "name": "b", + "qname": "sklearn.utils.extmath.safe_sparse_dot.b", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{ndarray, sparse matrix}", + "default_value": "", + "description": "" + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/safe_sparse_dot/dense_output", + "name": "dense_output", + "qname": "sklearn.utils.extmath.safe_sparse_dot.dense_output", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When False, ``a`` and ``b`` both being sparse will yield sparse output.\nWhen True, output will always be a dense array." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Dot product that handle the sparse matrix case correctly.", + "docstring": "Dot product that handle the sparse matrix case correctly.\n\nParameters\n----------\na : {ndarray, sparse matrix}\nb : {ndarray, sparse matrix}\ndense_output : bool, default=False\n When False, ``a`` and ``b`` both being sparse will yield sparse output.\n When True, output will always be a dense array.\n\nReturns\n-------\ndot_product : {ndarray, sparse matrix}\n Sparse if ``a`` and ``b`` are sparse and ``dense_output=False``.", + "code": "@_deprecate_positional_args\ndef safe_sparse_dot(a, b, *, dense_output=False):\n \"\"\"Dot product that handle the sparse matrix case correctly.\n\n Parameters\n ----------\n a : {ndarray, sparse matrix}\n b : {ndarray, sparse matrix}\n dense_output : bool, default=False\n When False, ``a`` and ``b`` both being sparse will yield sparse output.\n When True, output will always be a dense array.\n\n Returns\n -------\n dot_product : {ndarray, sparse matrix}\n Sparse if ``a`` and ``b`` are sparse and ``dense_output=False``.\n \"\"\"\n if a.ndim > 2 or b.ndim > 2:\n if sparse.issparse(a):\n # sparse is always 2D. Implies b is 3D+\n # [i, j] @ [k, ..., l, m, n] -> [i, k, ..., l, n]\n b_ = np.rollaxis(b, -2)\n b_2d = b_.reshape((b.shape[-2], -1))\n ret = a @ b_2d\n ret = ret.reshape(a.shape[0], *b_.shape[1:])\n elif sparse.issparse(b):\n # sparse is always 2D. Implies a is 3D+\n # [k, ..., l, m] @ [i, j] -> [k, ..., l, j]\n a_2d = a.reshape(-1, a.shape[-1])\n ret = a_2d @ b\n ret = ret.reshape(*a.shape[:-1], b.shape[1])\n else:\n ret = np.dot(a, b)\n else:\n ret = a @ b\n\n if (sparse.issparse(a) and sparse.issparse(b)\n and dense_output and hasattr(ret, \"toarray\")):\n return ret.toarray()\n return ret" + }, + { + "id": "scikit-learn/sklearn.utils.extmath/softmax", + "name": "softmax", + "qname": "sklearn.utils.extmath.softmax", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.extmath/softmax/X", + "name": "X", + "qname": "sklearn.utils.extmath.softmax.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of float of shape (M, N)", + "default_value": "", + "description": "Argument to the logistic function." + }, + "type": { + "kind": "NamedType", + "name": "array-like of float of shape (M, N)" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/softmax/copy", + "name": "copy", + "qname": "sklearn.utils.extmath.softmax.copy", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Copy X or not." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Calculate the softmax function.\n\nThe softmax function is calculated by\nnp.exp(X) / np.sum(np.exp(X), axis=1)\n\nThis will cause overflow when large values are exponentiated.\nHence the largest value in each row is subtracted from each data\npoint to prevent this.", + "docstring": "Calculate the softmax function.\n\nThe softmax function is calculated by\nnp.exp(X) / np.sum(np.exp(X), axis=1)\n\nThis will cause overflow when large values are exponentiated.\nHence the largest value in each row is subtracted from each data\npoint to prevent this.\n\nParameters\n----------\nX : array-like of float of shape (M, N)\n Argument to the logistic function.\n\ncopy : bool, default=True\n Copy X or not.\n\nReturns\n-------\nout : ndarray of shape (M, N)\n Softmax function evaluated at every point in x.", + "code": "def softmax(X, copy=True):\n \"\"\"\n Calculate the softmax function.\n\n The softmax function is calculated by\n np.exp(X) / np.sum(np.exp(X), axis=1)\n\n This will cause overflow when large values are exponentiated.\n Hence the largest value in each row is subtracted from each data\n point to prevent this.\n\n Parameters\n ----------\n X : array-like of float of shape (M, N)\n Argument to the logistic function.\n\n copy : bool, default=True\n Copy X or not.\n\n Returns\n -------\n out : ndarray of shape (M, N)\n Softmax function evaluated at every point in x.\n \"\"\"\n if copy:\n X = np.copy(X)\n max_prob = np.max(X, axis=1).reshape((-1, 1))\n X -= max_prob\n np.exp(X, X)\n sum_prob = np.sum(X, axis=1).reshape((-1, 1))\n X /= sum_prob\n return X" + }, + { + "id": "scikit-learn/sklearn.utils.extmath/squared_norm", + "name": "squared_norm", + "qname": "sklearn.utils.extmath.squared_norm", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.extmath/squared_norm/x", + "name": "x", + "qname": "sklearn.utils.extmath.squared_norm.x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Squared Euclidean or Frobenius norm of x.\n\nFaster than norm(x) ** 2.", + "docstring": "Squared Euclidean or Frobenius norm of x.\n\nFaster than norm(x) ** 2.\n\nParameters\n----------\nx : array-like\n\nReturns\n-------\nfloat\n The Euclidean norm when x is a vector, the Frobenius norm when x\n is a matrix (2-d array).", + "code": "def squared_norm(x):\n \"\"\"Squared Euclidean or Frobenius norm of x.\n\n Faster than norm(x) ** 2.\n\n Parameters\n ----------\n x : array-like\n\n Returns\n -------\n float\n The Euclidean norm when x is a vector, the Frobenius norm when x\n is a matrix (2-d array).\n \"\"\"\n x = np.ravel(x, order='K')\n if np.issubdtype(x.dtype, np.integer):\n warnings.warn('Array type is integer, np.dot may overflow. '\n 'Data should be float type to avoid this issue',\n UserWarning)\n return np.dot(x, x)" + }, + { + "id": "scikit-learn/sklearn.utils.extmath/stable_cumsum", + "name": "stable_cumsum", + "qname": "sklearn.utils.extmath.stable_cumsum", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.extmath/stable_cumsum/arr", + "name": "arr", + "qname": "sklearn.utils.extmath.stable_cumsum.arr", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "To be cumulatively summed as flat." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/stable_cumsum/axis", + "name": "axis", + "qname": "sklearn.utils.extmath.stable_cumsum.axis", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Axis along which the cumulative sum is computed.\nThe default (None) is to compute the cumsum over the flattened array." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/stable_cumsum/rtol", + "name": "rtol", + "qname": "sklearn.utils.extmath.stable_cumsum.rtol", + "default_value": "1e-05", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "1e-05", + "description": "Relative tolerance, see ``np.allclose``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/stable_cumsum/atol", + "name": "atol", + "qname": "sklearn.utils.extmath.stable_cumsum.atol", + "default_value": "1e-08", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "1e-08", + "description": "Absolute tolerance, see ``np.allclose``." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Use high precision for cumsum and check that final value matches sum.", + "docstring": "Use high precision for cumsum and check that final value matches sum.\n\nParameters\n----------\narr : array-like\n To be cumulatively summed as flat.\naxis : int, default=None\n Axis along which the cumulative sum is computed.\n The default (None) is to compute the cumsum over the flattened array.\nrtol : float, default=1e-05\n Relative tolerance, see ``np.allclose``.\natol : float, default=1e-08\n Absolute tolerance, see ``np.allclose``.", + "code": "def stable_cumsum(arr, axis=None, rtol=1e-05, atol=1e-08):\n \"\"\"Use high precision for cumsum and check that final value matches sum.\n\n Parameters\n ----------\n arr : array-like\n To be cumulatively summed as flat.\n axis : int, default=None\n Axis along which the cumulative sum is computed.\n The default (None) is to compute the cumsum over the flattened array.\n rtol : float, default=1e-05\n Relative tolerance, see ``np.allclose``.\n atol : float, default=1e-08\n Absolute tolerance, see ``np.allclose``.\n \"\"\"\n out = np.cumsum(arr, axis=axis, dtype=np.float64)\n expected = np.sum(arr, axis=axis, dtype=np.float64)\n if not np.all(np.isclose(out.take(-1, axis=axis), expected, rtol=rtol,\n atol=atol, equal_nan=True)):\n warnings.warn('cumsum was found to be unstable: '\n 'its last element does not correspond to sum',\n RuntimeWarning)\n return out" + }, + { + "id": "scikit-learn/sklearn.utils.extmath/svd_flip", + "name": "svd_flip", + "qname": "sklearn.utils.extmath.svd_flip", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.extmath/svd_flip/u", + "name": "u", + "qname": "sklearn.utils.extmath.svd_flip.u", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "u and v are the output of `linalg.svd` or\n:func:`~sklearn.utils.extmath.randomized_svd`, with matching inner\ndimensions so one can compute `np.dot(u * s, v)`." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/svd_flip/v", + "name": "v", + "qname": "sklearn.utils.extmath.svd_flip.v", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "u and v are the output of `linalg.svd` or\n:func:`~sklearn.utils.extmath.randomized_svd`, with matching inner\ndimensions so one can compute `np.dot(u * s, v)`.\nThe input v should really be called vt to be consistent with scipy's\nouput." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/svd_flip/u_based_decision", + "name": "u_based_decision", + "qname": "sklearn.utils.extmath.svd_flip.u_based_decision", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, use the columns of u as the basis for sign flipping.\nOtherwise, use the rows of v. The choice of which variable to base the\ndecision on is generally algorithm dependent." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Sign correction to ensure deterministic output from SVD.\n\nAdjusts the columns of u and the rows of v such that the loadings in the\ncolumns in u that are largest in absolute value are always positive.", + "docstring": "Sign correction to ensure deterministic output from SVD.\n\nAdjusts the columns of u and the rows of v such that the loadings in the\ncolumns in u that are largest in absolute value are always positive.\n\nParameters\n----------\nu : ndarray\n u and v are the output of `linalg.svd` or\n :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner\n dimensions so one can compute `np.dot(u * s, v)`.\n\nv : ndarray\n u and v are the output of `linalg.svd` or\n :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner\n dimensions so one can compute `np.dot(u * s, v)`.\n The input v should really be called vt to be consistent with scipy's\n ouput.\n\nu_based_decision : bool, default=True\n If True, use the columns of u as the basis for sign flipping.\n Otherwise, use the rows of v. The choice of which variable to base the\n decision on is generally algorithm dependent.\n\n\nReturns\n-------\nu_adjusted, v_adjusted : arrays with the same dimensions as the input.", + "code": "def svd_flip(u, v, u_based_decision=True):\n \"\"\"Sign correction to ensure deterministic output from SVD.\n\n Adjusts the columns of u and the rows of v such that the loadings in the\n columns in u that are largest in absolute value are always positive.\n\n Parameters\n ----------\n u : ndarray\n u and v are the output of `linalg.svd` or\n :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner\n dimensions so one can compute `np.dot(u * s, v)`.\n\n v : ndarray\n u and v are the output of `linalg.svd` or\n :func:`~sklearn.utils.extmath.randomized_svd`, with matching inner\n dimensions so one can compute `np.dot(u * s, v)`.\n The input v should really be called vt to be consistent with scipy's\n ouput.\n\n u_based_decision : bool, default=True\n If True, use the columns of u as the basis for sign flipping.\n Otherwise, use the rows of v. The choice of which variable to base the\n decision on is generally algorithm dependent.\n\n\n Returns\n -------\n u_adjusted, v_adjusted : arrays with the same dimensions as the input.\n\n \"\"\"\n if u_based_decision:\n # columns of u, rows of v\n max_abs_cols = np.argmax(np.abs(u), axis=0)\n signs = np.sign(u[max_abs_cols, range(u.shape[1])])\n u *= signs\n v *= signs[:, np.newaxis]\n else:\n # rows of v, columns of u\n max_abs_rows = np.argmax(np.abs(v), axis=1)\n signs = np.sign(v[range(v.shape[0]), max_abs_rows])\n u *= signs\n v *= signs[:, np.newaxis]\n return u, v" + }, + { + "id": "scikit-learn/sklearn.utils.extmath/weighted_mode", + "name": "weighted_mode", + "qname": "sklearn.utils.extmath.weighted_mode", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.extmath/weighted_mode/a", + "name": "a", + "qname": "sklearn.utils.extmath.weighted_mode.a", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "n-dimensional array of which to find mode(s)." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/weighted_mode/w", + "name": "w", + "qname": "sklearn.utils.extmath.weighted_mode.w", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "n-dimensional array of weights for each value." + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.utils.extmath/weighted_mode/axis", + "name": "axis", + "qname": "sklearn.utils.extmath.weighted_mode.axis", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Axis along which to operate. Default is 0, i.e. the first axis." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Returns an array of the weighted modal (most common) value in a.\n\nIf there is more than one such value, only the first is returned.\nThe bin-count for the modal bins is also returned.\n\nThis is an extension of the algorithm in scipy.stats.mode.", + "docstring": "Returns an array of the weighted modal (most common) value in a.\n\nIf there is more than one such value, only the first is returned.\nThe bin-count for the modal bins is also returned.\n\nThis is an extension of the algorithm in scipy.stats.mode.\n\nParameters\n----------\na : array-like\n n-dimensional array of which to find mode(s).\nw : array-like\n n-dimensional array of weights for each value.\naxis : int, default=0\n Axis along which to operate. Default is 0, i.e. the first axis.\n\nReturns\n-------\nvals : ndarray\n Array of modal values.\nscore : ndarray\n Array of weighted counts for each mode.\n\nExamples\n--------\n>>> from sklearn.utils.extmath import weighted_mode\n>>> x = [4, 1, 4, 2, 4, 2]\n>>> weights = [1, 1, 1, 1, 1, 1]\n>>> weighted_mode(x, weights)\n(array([4.]), array([3.]))\n\nThe value 4 appears three times: with uniform weights, the result is\nsimply the mode of the distribution.\n\n>>> weights = [1, 3, 0.5, 1.5, 1, 2] # deweight the 4's\n>>> weighted_mode(x, weights)\n(array([2.]), array([3.5]))\n\nThe value 2 has the highest score: it appears twice with weights of\n1.5 and 2: the sum of these is 3.5.\n\nSee Also\n--------\nscipy.stats.mode", + "code": "@_deprecate_positional_args\ndef weighted_mode(a, w, *, axis=0):\n \"\"\"Returns an array of the weighted modal (most common) value in a.\n\n If there is more than one such value, only the first is returned.\n The bin-count for the modal bins is also returned.\n\n This is an extension of the algorithm in scipy.stats.mode.\n\n Parameters\n ----------\n a : array-like\n n-dimensional array of which to find mode(s).\n w : array-like\n n-dimensional array of weights for each value.\n axis : int, default=0\n Axis along which to operate. Default is 0, i.e. the first axis.\n\n Returns\n -------\n vals : ndarray\n Array of modal values.\n score : ndarray\n Array of weighted counts for each mode.\n\n Examples\n --------\n >>> from sklearn.utils.extmath import weighted_mode\n >>> x = [4, 1, 4, 2, 4, 2]\n >>> weights = [1, 1, 1, 1, 1, 1]\n >>> weighted_mode(x, weights)\n (array([4.]), array([3.]))\n\n The value 4 appears three times: with uniform weights, the result is\n simply the mode of the distribution.\n\n >>> weights = [1, 3, 0.5, 1.5, 1, 2] # deweight the 4's\n >>> weighted_mode(x, weights)\n (array([2.]), array([3.5]))\n\n The value 2 has the highest score: it appears twice with weights of\n 1.5 and 2: the sum of these is 3.5.\n\n See Also\n --------\n scipy.stats.mode\n \"\"\"\n if axis is None:\n a = np.ravel(a)\n w = np.ravel(w)\n axis = 0\n else:\n a = np.asarray(a)\n w = np.asarray(w)\n\n if a.shape != w.shape:\n w = np.full(a.shape, w, dtype=w.dtype)\n\n scores = np.unique(np.ravel(a)) # get ALL unique values\n testshape = list(a.shape)\n testshape[axis] = 1\n oldmostfreq = np.zeros(testshape)\n oldcounts = np.zeros(testshape)\n for score in scores:\n template = np.zeros(a.shape)\n ind = (a == score)\n template[ind] = w[ind]\n counts = np.expand_dims(np.sum(template, axis), axis)\n mostfrequent = np.where(counts > oldcounts, score, oldmostfreq)\n oldcounts = np.maximum(counts, oldcounts)\n oldmostfreq = mostfrequent\n return mostfrequent, oldcounts" + }, + { + "id": "scikit-learn/sklearn.utils.fixes/_FuncWrapper/__call__", + "name": "__call__", + "qname": "sklearn.utils.fixes._FuncWrapper.__call__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.fixes/_FuncWrapper/__call__/self", + "name": "self", + "qname": "sklearn.utils.fixes._FuncWrapper.__call__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.fixes/_FuncWrapper/__call__/args", + "name": "args", + "qname": "sklearn.utils.fixes._FuncWrapper.__call__.args", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.fixes/_FuncWrapper/__call__/kwargs", + "name": "kwargs", + "qname": "sklearn.utils.fixes._FuncWrapper.__call__.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __call__(self, *args, **kwargs):\n with config_context(**self.config):\n return self.function(*args, **kwargs)" + }, + { + "id": "scikit-learn/sklearn.utils.fixes/_FuncWrapper/__init__", + "name": "__init__", + "qname": "sklearn.utils.fixes._FuncWrapper.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.fixes/_FuncWrapper/__init__/self", + "name": "self", + "qname": "sklearn.utils.fixes._FuncWrapper.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.fixes/_FuncWrapper/__init__/function", + "name": "function", + "qname": "sklearn.utils.fixes._FuncWrapper.__init__.function", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "\"Load the global configuration before calling the function.", + "docstring": "", + "code": " def __init__(self, function):\n self.function = function\n self.config = get_config()\n update_wrapper(self, self.function)" + }, + { + "id": "scikit-learn/sklearn.utils.fixes/_astype_copy_false", + "name": "_astype_copy_false", + "qname": "sklearn.utils.fixes._astype_copy_false", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.fixes/_astype_copy_false/X", + "name": "X", + "qname": "sklearn.utils.fixes._astype_copy_false.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns the copy=False parameter for\n{ndarray, csr_matrix, csc_matrix}.astype when possible,\notherwise don't specify", + "docstring": "Returns the copy=False parameter for\n{ndarray, csr_matrix, csc_matrix}.astype when possible,\notherwise don't specify", + "code": "def _astype_copy_false(X):\n \"\"\"Returns the copy=False parameter for\n {ndarray, csr_matrix, csc_matrix}.astype when possible,\n otherwise don't specify\n \"\"\"\n if sp_version >= parse_version('1.1') or not sp.issparse(X):\n return {'copy': False}\n else:\n return {}" + }, + { + "id": "scikit-learn/sklearn.utils.fixes/_joblib_parallel_args", + "name": "_joblib_parallel_args", + "qname": "sklearn.utils.fixes._joblib_parallel_args", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.fixes/_joblib_parallel_args/kwargs", + "name": "kwargs", + "qname": "sklearn.utils.fixes._joblib_parallel_args.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Set joblib.Parallel arguments in a compatible way for 0.11 and 0.12+\n\nFor joblib 0.11 this maps both ``prefer`` and ``require`` parameters to\na specific ``backend``.", + "docstring": "Set joblib.Parallel arguments in a compatible way for 0.11 and 0.12+\n\nFor joblib 0.11 this maps both ``prefer`` and ``require`` parameters to\na specific ``backend``.\n\nParameters\n----------\n\nprefer : str in {'processes', 'threads'} or None\n Soft hint to choose the default backend if no specific backend\n was selected with the parallel_backend context manager.\n\nrequire : 'sharedmem' or None\n Hard condstraint to select the backend. If set to 'sharedmem',\n the selected backend will be single-host and thread-based even\n if the user asked for a non-thread based backend with\n parallel_backend.\n\nSee joblib.Parallel documentation for more details", + "code": "def _joblib_parallel_args(**kwargs):\n \"\"\"Set joblib.Parallel arguments in a compatible way for 0.11 and 0.12+\n\n For joblib 0.11 this maps both ``prefer`` and ``require`` parameters to\n a specific ``backend``.\n\n Parameters\n ----------\n\n prefer : str in {'processes', 'threads'} or None\n Soft hint to choose the default backend if no specific backend\n was selected with the parallel_backend context manager.\n\n require : 'sharedmem' or None\n Hard condstraint to select the backend. If set to 'sharedmem',\n the selected backend will be single-host and thread-based even\n if the user asked for a non-thread based backend with\n parallel_backend.\n\n See joblib.Parallel documentation for more details\n \"\"\"\n import joblib\n\n if parse_version(joblib.__version__) >= parse_version('0.12'):\n return kwargs\n\n extra_args = set(kwargs.keys()).difference({'prefer', 'require'})\n if extra_args:\n raise NotImplementedError('unhandled arguments %s with joblib %s'\n % (list(extra_args), joblib.__version__))\n args = {}\n if 'prefer' in kwargs:\n prefer = kwargs['prefer']\n if prefer not in ['threads', 'processes', None]:\n raise ValueError('prefer=%s is not supported' % prefer)\n args['backend'] = {'threads': 'threading',\n 'processes': 'multiprocessing',\n None: None}[prefer]\n\n if 'require' in kwargs:\n require = kwargs['require']\n if require not in [None, 'sharedmem']:\n raise ValueError('require=%s is not supported' % require)\n if require == 'sharedmem':\n args['backend'] = 'threading'\n return args" + }, + { + "id": "scikit-learn/sklearn.utils.fixes/_object_dtype_isnan", + "name": "_object_dtype_isnan", + "qname": "sklearn.utils.fixes._object_dtype_isnan", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.fixes/_object_dtype_isnan/X", + "name": "X", + "qname": "sklearn.utils.fixes._object_dtype_isnan.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _object_dtype_isnan(X):\n return X != X" + }, + { + "id": "scikit-learn/sklearn.utils.fixes/_take_along_axis", + "name": "_take_along_axis", + "qname": "sklearn.utils.fixes._take_along_axis", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.fixes/_take_along_axis/arr", + "name": "arr", + "qname": "sklearn.utils.fixes._take_along_axis.arr", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.fixes/_take_along_axis/indices", + "name": "indices", + "qname": "sklearn.utils.fixes._take_along_axis.indices", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.fixes/_take_along_axis/axis", + "name": "axis", + "qname": "sklearn.utils.fixes._take_along_axis.axis", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Implements a simplified version of np.take_along_axis if numpy\nversion < 1.15", + "docstring": "Implements a simplified version of np.take_along_axis if numpy\nversion < 1.15", + "code": "def _take_along_axis(arr, indices, axis):\n \"\"\"Implements a simplified version of np.take_along_axis if numpy\n version < 1.15\"\"\"\n if np_version >= parse_version('1.15'):\n return np.take_along_axis(arr=arr, indices=indices, axis=axis)\n else:\n if axis is None:\n arr = arr.flatten()\n\n if not np.issubdtype(indices.dtype, np.intp):\n raise IndexError('`indices` must be an integer array')\n if arr.ndim != indices.ndim:\n raise ValueError(\n \"`indices` and `arr` must have the same number of dimensions\")\n\n shape_ones = (1,) * indices.ndim\n dest_dims = (\n list(range(axis)) +\n [None] +\n list(range(axis+1, indices.ndim))\n )\n\n # build a fancy index, consisting of orthogonal aranges, with the\n # requested index inserted at the right location\n fancy_index = []\n for dim, n in zip(dest_dims, arr.shape):\n if dim is None:\n fancy_index.append(indices)\n else:\n ind_shape = shape_ones[:dim] + (-1,) + shape_ones[dim+1:]\n fancy_index.append(np.arange(n).reshape(ind_shape))\n\n fancy_index = tuple(fancy_index)\n return arr[fancy_index]" + }, + { + "id": "scikit-learn/sklearn.utils.fixes/delayed", + "name": "delayed", + "qname": "sklearn.utils.fixes.delayed", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.fixes/delayed/function", + "name": "function", + "qname": "sklearn.utils.fixes.delayed.function", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Decorator used to capture the arguments of a function.", + "docstring": "Decorator used to capture the arguments of a function.", + "code": "def delayed(function):\n \"\"\"Decorator used to capture the arguments of a function.\"\"\"\n @functools.wraps(function)\n def delayed_function(*args, **kwargs):\n return _FuncWrapper(function), args, kwargs\n return delayed_function" + }, + { + "id": "scikit-learn/sklearn.utils.graph/single_source_shortest_path_length", + "name": "single_source_shortest_path_length", + "qname": "sklearn.utils.graph.single_source_shortest_path_length", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.graph/single_source_shortest_path_length/graph", + "name": "graph", + "qname": "sklearn.utils.graph.single_source_shortest_path_length.graph", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{sparse matrix, ndarray} of shape (n, n)", + "default_value": "", + "description": "Adjacency matrix of the graph. Sparse matrix of format LIL is\npreferred." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of shape (n, n)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.graph/single_source_shortest_path_length/source", + "name": "source", + "qname": "sklearn.utils.graph.single_source_shortest_path_length.source", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "", + "description": "Starting node for path." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils.graph/single_source_shortest_path_length/cutoff", + "name": "cutoff", + "qname": "sklearn.utils.graph.single_source_shortest_path_length.cutoff", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Depth to stop the search - only paths of length <= cutoff are returned." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return the shortest path length from source to all reachable nodes.\n\nReturns a dictionary of shortest path lengths keyed by target.", + "docstring": "Return the shortest path length from source to all reachable nodes.\n\nReturns a dictionary of shortest path lengths keyed by target.\n\nParameters\n----------\ngraph : {sparse matrix, ndarray} of shape (n, n)\n Adjacency matrix of the graph. Sparse matrix of format LIL is\n preferred.\n\nsource : int\n Starting node for path.\n\ncutoff : int, default=None\n Depth to stop the search - only paths of length <= cutoff are returned.\n\nExamples\n--------\n>>> from sklearn.utils.graph import single_source_shortest_path_length\n>>> import numpy as np\n>>> graph = np.array([[ 0, 1, 0, 0],\n... [ 1, 0, 1, 0],\n... [ 0, 1, 0, 1],\n... [ 0, 0, 1, 0]])\n>>> list(sorted(single_source_shortest_path_length(graph, 0).items()))\n[(0, 0), (1, 1), (2, 2), (3, 3)]\n>>> graph = np.ones((6, 6))\n>>> list(sorted(single_source_shortest_path_length(graph, 2).items()))\n[(0, 1), (1, 1), (2, 0), (3, 1), (4, 1), (5, 1)]", + "code": "@_deprecate_positional_args\ndef single_source_shortest_path_length(graph, source, *, cutoff=None):\n \"\"\"Return the shortest path length from source to all reachable nodes.\n\n Returns a dictionary of shortest path lengths keyed by target.\n\n Parameters\n ----------\n graph : {sparse matrix, ndarray} of shape (n, n)\n Adjacency matrix of the graph. Sparse matrix of format LIL is\n preferred.\n\n source : int\n Starting node for path.\n\n cutoff : int, default=None\n Depth to stop the search - only paths of length <= cutoff are returned.\n\n Examples\n --------\n >>> from sklearn.utils.graph import single_source_shortest_path_length\n >>> import numpy as np\n >>> graph = np.array([[ 0, 1, 0, 0],\n ... [ 1, 0, 1, 0],\n ... [ 0, 1, 0, 1],\n ... [ 0, 0, 1, 0]])\n >>> list(sorted(single_source_shortest_path_length(graph, 0).items()))\n [(0, 0), (1, 1), (2, 2), (3, 3)]\n >>> graph = np.ones((6, 6))\n >>> list(sorted(single_source_shortest_path_length(graph, 2).items()))\n [(0, 1), (1, 1), (2, 0), (3, 1), (4, 1), (5, 1)]\n \"\"\"\n if sparse.isspmatrix(graph):\n graph = graph.tolil()\n else:\n graph = sparse.lil_matrix(graph)\n seen = {} # level (number of hops) when seen in BFS\n level = 0 # the current level\n next_level = [source] # dict of nodes to check at next level\n while next_level:\n this_level = next_level # advance to next level\n next_level = set() # and start a new list (fringe)\n for v in this_level:\n if v not in seen:\n seen[v] = level # set the level of vertex v\n next_level.update(graph.rows[v])\n if cutoff is not None and cutoff <= level:\n break\n level += 1\n return seen # return all path lengths as dictionary" + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_BaseComposition/__init__", + "name": "__init__", + "qname": "sklearn.utils.metaestimators._BaseComposition.__init__", + "decorators": ["abstractmethod"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.metaestimators/_BaseComposition/__init__/self", + "name": "self", + "qname": "sklearn.utils.metaestimators._BaseComposition.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Handles parameter management for classifiers composed of named estimators.", + "docstring": "", + "code": " @abstractmethod\n def __init__(self):\n pass" + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_BaseComposition/_get_params", + "name": "_get_params", + "qname": "sklearn.utils.metaestimators._BaseComposition._get_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.metaestimators/_BaseComposition/_get_params/self", + "name": "self", + "qname": "sklearn.utils.metaestimators._BaseComposition._get_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_BaseComposition/_get_params/attr", + "name": "attr", + "qname": "sklearn.utils.metaestimators._BaseComposition._get_params.attr", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_BaseComposition/_get_params/deep", + "name": "deep", + "qname": "sklearn.utils.metaestimators._BaseComposition._get_params.deep", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _get_params(self, attr, deep=True):\n out = super().get_params(deep=deep)\n if not deep:\n return out\n estimators = getattr(self, attr)\n out.update(estimators)\n for name, estimator in estimators:\n if hasattr(estimator, 'get_params'):\n for key, value in estimator.get_params(deep=True).items():\n out['%s__%s' % (name, key)] = value\n return out" + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_BaseComposition/_replace_estimator", + "name": "_replace_estimator", + "qname": "sklearn.utils.metaestimators._BaseComposition._replace_estimator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.metaestimators/_BaseComposition/_replace_estimator/self", + "name": "self", + "qname": "sklearn.utils.metaestimators._BaseComposition._replace_estimator.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_BaseComposition/_replace_estimator/attr", + "name": "attr", + "qname": "sklearn.utils.metaestimators._BaseComposition._replace_estimator.attr", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_BaseComposition/_replace_estimator/name", + "name": "name", + "qname": "sklearn.utils.metaestimators._BaseComposition._replace_estimator.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_BaseComposition/_replace_estimator/new_val", + "name": "new_val", + "qname": "sklearn.utils.metaestimators._BaseComposition._replace_estimator.new_val", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _replace_estimator(self, attr, name, new_val):\n # assumes `name` is a valid estimator name\n new_estimators = list(getattr(self, attr))\n for i, (estimator_name, _) in enumerate(new_estimators):\n if estimator_name == name:\n new_estimators[i] = (name, new_val)\n break\n setattr(self, attr, new_estimators)" + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_BaseComposition/_set_params", + "name": "_set_params", + "qname": "sklearn.utils.metaestimators._BaseComposition._set_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.metaestimators/_BaseComposition/_set_params/self", + "name": "self", + "qname": "sklearn.utils.metaestimators._BaseComposition._set_params.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_BaseComposition/_set_params/attr", + "name": "attr", + "qname": "sklearn.utils.metaestimators._BaseComposition._set_params.attr", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_BaseComposition/_set_params/params", + "name": "params", + "qname": "sklearn.utils.metaestimators._BaseComposition._set_params.params", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _set_params(self, attr, **params):\n # Ensure strict ordering of parameter setting:\n # 1. All steps\n if attr in params:\n setattr(self, attr, params.pop(attr))\n # 2. Step replacement\n items = getattr(self, attr)\n names = []\n if items:\n names, _ = zip(*items)\n for name in list(params.keys()):\n if '__' not in name and name in names:\n self._replace_estimator(attr, name, params.pop(name))\n # 3. Step parameters and other initialisation arguments\n super().set_params(**params)\n return self" + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_BaseComposition/_validate_names", + "name": "_validate_names", + "qname": "sklearn.utils.metaestimators._BaseComposition._validate_names", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.metaestimators/_BaseComposition/_validate_names/self", + "name": "self", + "qname": "sklearn.utils.metaestimators._BaseComposition._validate_names.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_BaseComposition/_validate_names/names", + "name": "names", + "qname": "sklearn.utils.metaestimators._BaseComposition._validate_names.names", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def _validate_names(self, names):\n if len(set(names)) != len(names):\n raise ValueError('Names provided are not unique: '\n '{0!r}'.format(list(names)))\n invalid_names = set(names).intersection(self.get_params(deep=False))\n if invalid_names:\n raise ValueError('Estimator names conflict with constructor '\n 'arguments: {0!r}'.format(sorted(invalid_names)))\n invalid_names = [name for name in names if '__' in name]\n if invalid_names:\n raise ValueError('Estimator names must not contain __: got '\n '{0!r}'.format(invalid_names))" + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_IffHasAttrDescriptor/__get__", + "name": "__get__", + "qname": "sklearn.utils.metaestimators._IffHasAttrDescriptor.__get__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.metaestimators/_IffHasAttrDescriptor/__get__/self", + "name": "self", + "qname": "sklearn.utils.metaestimators._IffHasAttrDescriptor.__get__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_IffHasAttrDescriptor/__get__/obj", + "name": "obj", + "qname": "sklearn.utils.metaestimators._IffHasAttrDescriptor.__get__.obj", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_IffHasAttrDescriptor/__get__/type", + "name": "type", + "qname": "sklearn.utils.metaestimators._IffHasAttrDescriptor.__get__.type", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __get__(self, obj, type=None):\n # raise an AttributeError if the attribute is not present on the object\n if obj is not None:\n # delegate only on instances, not the classes.\n # this is to allow access to the docstrings.\n for delegate_name in self.delegate_names:\n try:\n delegate = attrgetter(delegate_name)(obj)\n except AttributeError:\n continue\n else:\n getattr(delegate, self.attribute_name)\n break\n else:\n attrgetter(self.delegate_names[-1])(obj)\n\n # lambda, but not partial, allows help() to work with update_wrapper\n out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)\n # update the docstring of the returned function\n update_wrapper(out, self.fn)\n return out" + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_IffHasAttrDescriptor/__init__", + "name": "__init__", + "qname": "sklearn.utils.metaestimators._IffHasAttrDescriptor.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.metaestimators/_IffHasAttrDescriptor/__init__/self", + "name": "self", + "qname": "sklearn.utils.metaestimators._IffHasAttrDescriptor.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_IffHasAttrDescriptor/__init__/fn", + "name": "fn", + "qname": "sklearn.utils.metaestimators._IffHasAttrDescriptor.__init__.fn", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_IffHasAttrDescriptor/__init__/delegate_names", + "name": "delegate_names", + "qname": "sklearn.utils.metaestimators._IffHasAttrDescriptor.__init__.delegate_names", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_IffHasAttrDescriptor/__init__/attribute_name", + "name": "attribute_name", + "qname": "sklearn.utils.metaestimators._IffHasAttrDescriptor.__init__.attribute_name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Implements a conditional property using the descriptor protocol.\n\nUsing this class to create a decorator will raise an ``AttributeError``\nif none of the delegates (specified in ``delegate_names``) is an attribute\nof the base object or the first found delegate does not have an attribute\n``attribute_name``.\n\nThis allows ducktyping of the decorated method based on\n``delegate.attribute_name``. Here ``delegate`` is the first item in\n``delegate_names`` for which ``hasattr(object, delegate) is True``.\n\nSee https://docs.python.org/3/howto/descriptor.html for an explanation of\ndescriptors.", + "docstring": "", + "code": " def __init__(self, fn, delegate_names, attribute_name):\n self.fn = fn\n self.delegate_names = delegate_names\n self.attribute_name = attribute_name\n\n # update the docstring of the descriptor\n update_wrapper(self, fn)" + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_safe_split", + "name": "_safe_split", + "qname": "sklearn.utils.metaestimators._safe_split", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.metaestimators/_safe_split/estimator", + "name": "estimator", + "qname": "sklearn.utils.metaestimators._safe_split.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "object", + "default_value": "", + "description": "Estimator to determine whether we should slice only rows or rows and\ncolumns." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_safe_split/X", + "name": "X", + "qname": "sklearn.utils.metaestimators._safe_split.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, sparse matrix or iterable", + "default_value": "", + "description": "Data to be indexed. If ``estimator._pairwise is True``,\nthis needs to be a square array-like or sparse matrix." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "sparse matrix" + }, + { + "kind": "NamedType", + "name": "iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_safe_split/y", + "name": "y", + "qname": "sklearn.utils.metaestimators._safe_split.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, sparse matrix or iterable", + "default_value": "", + "description": "Targets to be indexed." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "sparse matrix" + }, + { + "kind": "NamedType", + "name": "iterable" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_safe_split/indices", + "name": "indices", + "qname": "sklearn.utils.metaestimators._safe_split.indices", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of int", + "default_value": "", + "description": "Rows to select from X and y.\nIf ``estimator._pairwise is True`` and ``train_indices is None``\nthen ``indices`` will also be used to slice columns." + }, + "type": { + "kind": "NamedType", + "name": "array of int" + } + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/_safe_split/train_indices", + "name": "train_indices", + "qname": "sklearn.utils.metaestimators._safe_split.train_indices", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of int or None", + "default_value": "None", + "description": "If ``estimator._pairwise is True`` and ``train_indices is not None``,\nthen ``train_indices`` will be use to slice the columns of X." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array of int" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Create subset of dataset and properly handle kernels.\n\nSlice X, y according to indices for cross-validation, but take care of\nprecomputed kernel-matrices or pairwise affinities / distances.\n\nIf ``estimator._pairwise is True``, X needs to be square and\nwe slice rows and columns. If ``train_indices`` is not None,\nwe slice rows using ``indices`` (assumed the test set) and columns\nusing ``train_indices``, indicating the training set.\n\n.. deprecated:: 0.24\n\n The _pairwise attribute is deprecated in 0.24. From 1.1\n (renaming of 0.26) and onward, this function will check for the\n pairwise estimator tag.\n\nLabels y will always be indexed only along the first axis.", + "docstring": "Create subset of dataset and properly handle kernels.\n\nSlice X, y according to indices for cross-validation, but take care of\nprecomputed kernel-matrices or pairwise affinities / distances.\n\nIf ``estimator._pairwise is True``, X needs to be square and\nwe slice rows and columns. If ``train_indices`` is not None,\nwe slice rows using ``indices`` (assumed the test set) and columns\nusing ``train_indices``, indicating the training set.\n\n.. deprecated:: 0.24\n\n The _pairwise attribute is deprecated in 0.24. From 1.1\n (renaming of 0.26) and onward, this function will check for the\n pairwise estimator tag.\n\nLabels y will always be indexed only along the first axis.\n\nParameters\n----------\nestimator : object\n Estimator to determine whether we should slice only rows or rows and\n columns.\n\nX : array-like, sparse matrix or iterable\n Data to be indexed. If ``estimator._pairwise is True``,\n this needs to be a square array-like or sparse matrix.\n\ny : array-like, sparse matrix or iterable\n Targets to be indexed.\n\nindices : array of int\n Rows to select from X and y.\n If ``estimator._pairwise is True`` and ``train_indices is None``\n then ``indices`` will also be used to slice columns.\n\ntrain_indices : array of int or None, default=None\n If ``estimator._pairwise is True`` and ``train_indices is not None``,\n then ``train_indices`` will be use to slice the columns of X.\n\nReturns\n-------\nX_subset : array-like, sparse matrix or list\n Indexed data.\n\ny_subset : array-like, sparse matrix or list\n Indexed targets.", + "code": "def _safe_split(estimator, X, y, indices, train_indices=None):\n \"\"\"Create subset of dataset and properly handle kernels.\n\n Slice X, y according to indices for cross-validation, but take care of\n precomputed kernel-matrices or pairwise affinities / distances.\n\n If ``estimator._pairwise is True``, X needs to be square and\n we slice rows and columns. If ``train_indices`` is not None,\n we slice rows using ``indices`` (assumed the test set) and columns\n using ``train_indices``, indicating the training set.\n\n .. deprecated:: 0.24\n\n The _pairwise attribute is deprecated in 0.24. From 1.1\n (renaming of 0.26) and onward, this function will check for the\n pairwise estimator tag.\n\n Labels y will always be indexed only along the first axis.\n\n Parameters\n ----------\n estimator : object\n Estimator to determine whether we should slice only rows or rows and\n columns.\n\n X : array-like, sparse matrix or iterable\n Data to be indexed. If ``estimator._pairwise is True``,\n this needs to be a square array-like or sparse matrix.\n\n y : array-like, sparse matrix or iterable\n Targets to be indexed.\n\n indices : array of int\n Rows to select from X and y.\n If ``estimator._pairwise is True`` and ``train_indices is None``\n then ``indices`` will also be used to slice columns.\n\n train_indices : array of int or None, default=None\n If ``estimator._pairwise is True`` and ``train_indices is not None``,\n then ``train_indices`` will be use to slice the columns of X.\n\n Returns\n -------\n X_subset : array-like, sparse matrix or list\n Indexed data.\n\n y_subset : array-like, sparse matrix or list\n Indexed targets.\n\n \"\"\"\n if _is_pairwise(estimator):\n if not hasattr(X, \"shape\"):\n raise ValueError(\"Precomputed kernels or affinity matrices have \"\n \"to be passed as arrays or sparse matrices.\")\n # X is a precomputed square kernel matrix\n if X.shape[0] != X.shape[1]:\n raise ValueError(\"X should be a square kernel matrix\")\n if train_indices is None:\n X_subset = X[np.ix_(indices, indices)]\n else:\n X_subset = X[np.ix_(indices, train_indices)]\n else:\n X_subset = _safe_indexing(X, indices)\n\n if y is not None:\n y_subset = _safe_indexing(y, indices)\n else:\n y_subset = None\n\n return X_subset, y_subset" + }, + { + "id": "scikit-learn/sklearn.utils.metaestimators/if_delegate_has_method", + "name": "if_delegate_has_method", + "qname": "sklearn.utils.metaestimators.if_delegate_has_method", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.metaestimators/if_delegate_has_method/delegate", + "name": "delegate", + "qname": "sklearn.utils.metaestimators.if_delegate_has_method.delegate", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "string, list of strings or tuple of strings", + "default_value": "", + "description": "Name of the sub-estimator that can be accessed as an attribute of the\nbase object. If a list or a tuple of names are provided, the first\nsub-estimator that is an attribute of the base object will be used." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "string" + }, + { + "kind": "NamedType", + "name": "list of strings" + }, + { + "kind": "NamedType", + "name": "tuple of strings" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Create a decorator for methods that are delegated to a sub-estimator\n\nThis enables ducktyping by hasattr returning True according to the\nsub-estimator.", + "docstring": "Create a decorator for methods that are delegated to a sub-estimator\n\nThis enables ducktyping by hasattr returning True according to the\nsub-estimator.\n\nParameters\n----------\ndelegate : string, list of strings or tuple of strings\n Name of the sub-estimator that can be accessed as an attribute of the\n base object. If a list or a tuple of names are provided, the first\n sub-estimator that is an attribute of the base object will be used.", + "code": "def if_delegate_has_method(delegate):\n \"\"\"Create a decorator for methods that are delegated to a sub-estimator\n\n This enables ducktyping by hasattr returning True according to the\n sub-estimator.\n\n Parameters\n ----------\n delegate : string, list of strings or tuple of strings\n Name of the sub-estimator that can be accessed as an attribute of the\n base object. If a list or a tuple of names are provided, the first\n sub-estimator that is an attribute of the base object will be used.\n\n \"\"\"\n if isinstance(delegate, list):\n delegate = tuple(delegate)\n if not isinstance(delegate, tuple):\n delegate = (delegate,)\n\n return lambda fn: _IffHasAttrDescriptor(fn, delegate,\n attribute_name=fn.__name__)" + }, + { + "id": "scikit-learn/sklearn.utils.multiclass/_check_partial_fit_first_call", + "name": "_check_partial_fit_first_call", + "qname": "sklearn.utils.multiclass._check_partial_fit_first_call", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.multiclass/_check_partial_fit_first_call/clf", + "name": "clf", + "qname": "sklearn.utils.multiclass._check_partial_fit_first_call.clf", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.multiclass/_check_partial_fit_first_call/classes", + "name": "classes", + "qname": "sklearn.utils.multiclass._check_partial_fit_first_call.classes", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Private helper function for factorizing common classes param logic.\n\nEstimators that implement the ``partial_fit`` API need to be provided with\nthe list of possible classes at the first call to partial_fit.\n\nSubsequent calls to partial_fit should check that ``classes`` is still\nconsistent with a previous value of ``clf.classes_`` when provided.\n\nThis function returns True if it detects that this was the first call to\n``partial_fit`` on ``clf``. In that case the ``classes_`` attribute is also\nset on ``clf``.", + "docstring": "Private helper function for factorizing common classes param logic.\n\nEstimators that implement the ``partial_fit`` API need to be provided with\nthe list of possible classes at the first call to partial_fit.\n\nSubsequent calls to partial_fit should check that ``classes`` is still\nconsistent with a previous value of ``clf.classes_`` when provided.\n\nThis function returns True if it detects that this was the first call to\n``partial_fit`` on ``clf``. In that case the ``classes_`` attribute is also\nset on ``clf``.", + "code": "def _check_partial_fit_first_call(clf, classes=None):\n \"\"\"Private helper function for factorizing common classes param logic.\n\n Estimators that implement the ``partial_fit`` API need to be provided with\n the list of possible classes at the first call to partial_fit.\n\n Subsequent calls to partial_fit should check that ``classes`` is still\n consistent with a previous value of ``clf.classes_`` when provided.\n\n This function returns True if it detects that this was the first call to\n ``partial_fit`` on ``clf``. In that case the ``classes_`` attribute is also\n set on ``clf``.\n\n \"\"\"\n if getattr(clf, 'classes_', None) is None and classes is None:\n raise ValueError(\"classes must be passed on the first call \"\n \"to partial_fit.\")\n\n elif classes is not None:\n if getattr(clf, 'classes_', None) is not None:\n if not np.array_equal(clf.classes_, unique_labels(classes)):\n raise ValueError(\n \"`classes=%r` is not the same as on last call \"\n \"to partial_fit, was: %r\" % (classes, clf.classes_))\n\n else:\n # This is the first call to partial_fit\n clf.classes_ = unique_labels(classes)\n return True\n\n # classes is None and clf.classes_ has already previously been set:\n # nothing to do\n return False" + }, + { + "id": "scikit-learn/sklearn.utils.multiclass/_is_integral_float", + "name": "_is_integral_float", + "qname": "sklearn.utils.multiclass._is_integral_float", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.multiclass/_is_integral_float/y", + "name": "y", + "qname": "sklearn.utils.multiclass._is_integral_float.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _is_integral_float(y):\n return y.dtype.kind == 'f' and np.all(y.astype(int) == y)" + }, + { + "id": "scikit-learn/sklearn.utils.multiclass/_ovr_decision_function", + "name": "_ovr_decision_function", + "qname": "sklearn.utils.multiclass._ovr_decision_function", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.multiclass/_ovr_decision_function/predictions", + "name": "predictions", + "qname": "sklearn.utils.multiclass._ovr_decision_function.predictions", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_classifiers)", + "default_value": "", + "description": "Predicted classes for each binary classifier." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_classifiers)" + } + }, + { + "id": "scikit-learn/sklearn.utils.multiclass/_ovr_decision_function/confidences", + "name": "confidences", + "qname": "sklearn.utils.multiclass._ovr_decision_function.confidences", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_classifiers)", + "default_value": "", + "description": "Decision functions or predicted probabilities for positive class\nfor each binary classifier." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_classifiers)" + } + }, + { + "id": "scikit-learn/sklearn.utils.multiclass/_ovr_decision_function/n_classes", + "name": "n_classes", + "qname": "sklearn.utils.multiclass._ovr_decision_function.n_classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of classes. n_classifiers must be\n``n_classes * (n_classes - 1 ) / 2``." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute a continuous, tie-breaking OvR decision function from OvO.\n\nIt is important to include a continuous value, not only votes,\nto make computing AUC or calibration meaningful.", + "docstring": "Compute a continuous, tie-breaking OvR decision function from OvO.\n\nIt is important to include a continuous value, not only votes,\nto make computing AUC or calibration meaningful.\n\nParameters\n----------\npredictions : array-like of shape (n_samples, n_classifiers)\n Predicted classes for each binary classifier.\n\nconfidences : array-like of shape (n_samples, n_classifiers)\n Decision functions or predicted probabilities for positive class\n for each binary classifier.\n\nn_classes : int\n Number of classes. n_classifiers must be\n ``n_classes * (n_classes - 1 ) / 2``.", + "code": "def _ovr_decision_function(predictions, confidences, n_classes):\n \"\"\"Compute a continuous, tie-breaking OvR decision function from OvO.\n\n It is important to include a continuous value, not only votes,\n to make computing AUC or calibration meaningful.\n\n Parameters\n ----------\n predictions : array-like of shape (n_samples, n_classifiers)\n Predicted classes for each binary classifier.\n\n confidences : array-like of shape (n_samples, n_classifiers)\n Decision functions or predicted probabilities for positive class\n for each binary classifier.\n\n n_classes : int\n Number of classes. n_classifiers must be\n ``n_classes * (n_classes - 1 ) / 2``.\n \"\"\"\n n_samples = predictions.shape[0]\n votes = np.zeros((n_samples, n_classes))\n sum_of_confidences = np.zeros((n_samples, n_classes))\n\n k = 0\n for i in range(n_classes):\n for j in range(i + 1, n_classes):\n sum_of_confidences[:, i] -= confidences[:, k]\n sum_of_confidences[:, j] += confidences[:, k]\n votes[predictions[:, k] == 0, i] += 1\n votes[predictions[:, k] == 1, j] += 1\n k += 1\n\n # Monotonically transform the sum_of_confidences to (-1/3, 1/3)\n # and add it with votes. The monotonic transformation is\n # f: x -> x / (3 * (|x| + 1)), it uses 1/3 instead of 1/2\n # to ensure that we won't reach the limits and change vote order.\n # The motivation is to use confidence levels as a way to break ties in\n # the votes without switching any decision made based on a difference\n # of 1 vote.\n transformed_confidences = (sum_of_confidences /\n (3 * (np.abs(sum_of_confidences) + 1)))\n return votes + transformed_confidences" + }, + { + "id": "scikit-learn/sklearn.utils.multiclass/_unique_indicator", + "name": "_unique_indicator", + "qname": "sklearn.utils.multiclass._unique_indicator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.multiclass/_unique_indicator/y", + "name": "y", + "qname": "sklearn.utils.multiclass._unique_indicator.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _unique_indicator(y):\n return np.arange(\n check_array(y, accept_sparse=['csr', 'csc', 'coo']).shape[1]\n )" + }, + { + "id": "scikit-learn/sklearn.utils.multiclass/_unique_multiclass", + "name": "_unique_multiclass", + "qname": "sklearn.utils.multiclass._unique_multiclass", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.multiclass/_unique_multiclass/y", + "name": "y", + "qname": "sklearn.utils.multiclass._unique_multiclass.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _unique_multiclass(y):\n if hasattr(y, '__array__'):\n return np.unique(np.asarray(y))\n else:\n return set(y)" + }, + { + "id": "scikit-learn/sklearn.utils.multiclass/check_classification_targets", + "name": "check_classification_targets", + "qname": "sklearn.utils.multiclass.check_classification_targets", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.multiclass/check_classification_targets/y", + "name": "y", + "qname": "sklearn.utils.multiclass.check_classification_targets.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Ensure that target y is of a non-regression type.\n\nOnly the following target types (as defined in type_of_target) are allowed:\n 'binary', 'multiclass', 'multiclass-multioutput',\n 'multilabel-indicator', 'multilabel-sequences'", + "docstring": "Ensure that target y is of a non-regression type.\n\nOnly the following target types (as defined in type_of_target) are allowed:\n 'binary', 'multiclass', 'multiclass-multioutput',\n 'multilabel-indicator', 'multilabel-sequences'\n\nParameters\n----------\ny : array-like", + "code": "def check_classification_targets(y):\n \"\"\"Ensure that target y is of a non-regression type.\n\n Only the following target types (as defined in type_of_target) are allowed:\n 'binary', 'multiclass', 'multiclass-multioutput',\n 'multilabel-indicator', 'multilabel-sequences'\n\n Parameters\n ----------\n y : array-like\n \"\"\"\n y_type = type_of_target(y)\n if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',\n 'multilabel-indicator', 'multilabel-sequences']:\n raise ValueError(\"Unknown label type: %r\" % y_type)" + }, + { + "id": "scikit-learn/sklearn.utils.multiclass/class_distribution", + "name": "class_distribution", + "qname": "sklearn.utils.multiclass.class_distribution", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.multiclass/class_distribution/y", + "name": "y", + "qname": "sklearn.utils.multiclass.class_distribution.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix} of size (n_samples, n_outputs)", + "default_value": "", + "description": "The labels for each example." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "of size (n_samples, n_outputs)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.multiclass/class_distribution/sample_weight", + "name": "sample_weight", + "qname": "sklearn.utils.multiclass.class_distribution.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Sample weights." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Compute class priors from multioutput-multiclass target data.", + "docstring": "Compute class priors from multioutput-multiclass target data.\n\nParameters\n----------\ny : {array-like, sparse matrix} of size (n_samples, n_outputs)\n The labels for each example.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nclasses : list of size n_outputs of ndarray of size (n_classes,)\n List of classes for each column.\n\nn_classes : list of int of size n_outputs\n Number of classes in each column.\n\nclass_prior : list of size n_outputs of ndarray of size (n_classes,)\n Class distribution of each column.", + "code": "def class_distribution(y, sample_weight=None):\n \"\"\"Compute class priors from multioutput-multiclass target data.\n\n Parameters\n ----------\n y : {array-like, sparse matrix} of size (n_samples, n_outputs)\n The labels for each example.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\n Returns\n -------\n classes : list of size n_outputs of ndarray of size (n_classes,)\n List of classes for each column.\n\n n_classes : list of int of size n_outputs\n Number of classes in each column.\n\n class_prior : list of size n_outputs of ndarray of size (n_classes,)\n Class distribution of each column.\n\n \"\"\"\n classes = []\n n_classes = []\n class_prior = []\n\n n_samples, n_outputs = y.shape\n if sample_weight is not None:\n sample_weight = np.asarray(sample_weight)\n\n if issparse(y):\n y = y.tocsc()\n y_nnz = np.diff(y.indptr)\n\n for k in range(n_outputs):\n col_nonzero = y.indices[y.indptr[k]:y.indptr[k + 1]]\n # separate sample weights for zero and non-zero elements\n if sample_weight is not None:\n nz_samp_weight = sample_weight[col_nonzero]\n zeros_samp_weight_sum = (np.sum(sample_weight) -\n np.sum(nz_samp_weight))\n else:\n nz_samp_weight = None\n zeros_samp_weight_sum = y.shape[0] - y_nnz[k]\n\n classes_k, y_k = np.unique(y.data[y.indptr[k]:y.indptr[k + 1]],\n return_inverse=True)\n class_prior_k = np.bincount(y_k, weights=nz_samp_weight)\n\n # An explicit zero was found, combine its weight with the weight\n # of the implicit zeros\n if 0 in classes_k:\n class_prior_k[classes_k == 0] += zeros_samp_weight_sum\n\n # If an there is an implicit zero and it is not in classes and\n # class_prior, make an entry for it\n if 0 not in classes_k and y_nnz[k] < y.shape[0]:\n classes_k = np.insert(classes_k, 0, 0)\n class_prior_k = np.insert(class_prior_k, 0,\n zeros_samp_weight_sum)\n\n classes.append(classes_k)\n n_classes.append(classes_k.shape[0])\n class_prior.append(class_prior_k / class_prior_k.sum())\n else:\n for k in range(n_outputs):\n classes_k, y_k = np.unique(y[:, k], return_inverse=True)\n classes.append(classes_k)\n n_classes.append(classes_k.shape[0])\n class_prior_k = np.bincount(y_k, weights=sample_weight)\n class_prior.append(class_prior_k / class_prior_k.sum())\n\n return (classes, n_classes, class_prior)" + }, + { + "id": "scikit-learn/sklearn.utils.multiclass/is_multilabel", + "name": "is_multilabel", + "qname": "sklearn.utils.multiclass.is_multilabel", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.multiclass/is_multilabel/y", + "name": "y", + "qname": "sklearn.utils.multiclass.is_multilabel.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples,)", + "default_value": "", + "description": "Target values." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Check if ``y`` is in a multilabel format.", + "docstring": "Check if ``y`` is in a multilabel format.\n\nParameters\n----------\ny : ndarray of shape (n_samples,)\n Target values.\n\nReturns\n-------\nout : bool\n Return ``True``, if ``y`` is in a multilabel format, else ```False``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.utils.multiclass import is_multilabel\n>>> is_multilabel([0, 1, 0, 1])\nFalse\n>>> is_multilabel([[1], [0, 2], []])\nFalse\n>>> is_multilabel(np.array([[1, 0], [0, 0]]))\nTrue\n>>> is_multilabel(np.array([[1], [0], [0]]))\nFalse\n>>> is_multilabel(np.array([[1, 0, 0]]))\nTrue", + "code": "def is_multilabel(y):\n \"\"\" Check if ``y`` is in a multilabel format.\n\n Parameters\n ----------\n y : ndarray of shape (n_samples,)\n Target values.\n\n Returns\n -------\n out : bool\n Return ``True``, if ``y`` is in a multilabel format, else ```False``.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.utils.multiclass import is_multilabel\n >>> is_multilabel([0, 1, 0, 1])\n False\n >>> is_multilabel([[1], [0, 2], []])\n False\n >>> is_multilabel(np.array([[1, 0], [0, 0]]))\n True\n >>> is_multilabel(np.array([[1], [0], [0]]))\n False\n >>> is_multilabel(np.array([[1, 0, 0]]))\n True\n \"\"\"\n if hasattr(y, '__array__') or isinstance(y, Sequence):\n # DeprecationWarning will be replaced by ValueError, see NEP 34\n # https://numpy.org/neps/nep-0034-infer-dtype-is-object.html\n with warnings.catch_warnings():\n warnings.simplefilter('error', np.VisibleDeprecationWarning)\n try:\n y = np.asarray(y)\n except np.VisibleDeprecationWarning:\n # dtype=object should be provided explicitly for ragged arrays,\n # see NEP 34\n y = np.array(y, dtype=object)\n\n if not (hasattr(y, \"shape\") and y.ndim == 2 and y.shape[1] > 1):\n return False\n\n if issparse(y):\n if isinstance(y, (dok_matrix, lil_matrix)):\n y = y.tocsr()\n return (len(y.data) == 0 or np.unique(y.data).size == 1 and\n (y.dtype.kind in 'biu' or # bool, int, uint\n _is_integral_float(np.unique(y.data))))\n else:\n labels = np.unique(y)\n\n return len(labels) < 3 and (y.dtype.kind in 'biu' or # bool, int, uint\n _is_integral_float(labels))" + }, + { + "id": "scikit-learn/sklearn.utils.multiclass/type_of_target", + "name": "type_of_target", + "qname": "sklearn.utils.multiclass.type_of_target", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.multiclass/type_of_target/y", + "name": "y", + "qname": "sklearn.utils.multiclass.type_of_target.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Determine the type of data indicated by the target.\n\nNote that this type is the most specific type that can be inferred.\nFor example:\n\n * ``binary`` is more specific but compatible with ``multiclass``.\n * ``multiclass`` of integers is more specific but compatible with\n ``continuous``.\n * ``multilabel-indicator`` is more specific but compatible with\n ``multiclass-multioutput``.", + "docstring": "Determine the type of data indicated by the target.\n\nNote that this type is the most specific type that can be inferred.\nFor example:\n\n * ``binary`` is more specific but compatible with ``multiclass``.\n * ``multiclass`` of integers is more specific but compatible with\n ``continuous``.\n * ``multilabel-indicator`` is more specific but compatible with\n ``multiclass-multioutput``.\n\nParameters\n----------\ny : array-like\n\nReturns\n-------\ntarget_type : str\n One of:\n\n * 'continuous': `y` is an array-like of floats that are not all\n integers, and is 1d or a column vector.\n * 'continuous-multioutput': `y` is a 2d array of floats that are\n not all integers, and both dimensions are of size > 1.\n * 'binary': `y` contains <= 2 discrete values and is 1d or a column\n vector.\n * 'multiclass': `y` contains more than two discrete values, is not a\n sequence of sequences, and is 1d or a column vector.\n * 'multiclass-multioutput': `y` is a 2d array that contains more\n than two discrete values, is not a sequence of sequences, and both\n dimensions are of size > 1.\n * 'multilabel-indicator': `y` is a label indicator matrix, an array\n of two dimensions with at least two columns, and at most 2 unique\n values.\n * 'unknown': `y` is array-like but none of the above, such as a 3d\n array, sequence of sequences, or an array of non-sequence objects.\n\nExamples\n--------\n>>> import numpy as np\n>>> type_of_target([0.1, 0.6])\n'continuous'\n>>> type_of_target([1, -1, -1, 1])\n'binary'\n>>> type_of_target(['a', 'b', 'a'])\n'binary'\n>>> type_of_target([1.0, 2.0])\n'binary'\n>>> type_of_target([1, 0, 2])\n'multiclass'\n>>> type_of_target([1.0, 0.0, 3.0])\n'multiclass'\n>>> type_of_target(['a', 'b', 'c'])\n'multiclass'\n>>> type_of_target(np.array([[1, 2], [3, 1]]))\n'multiclass-multioutput'\n>>> type_of_target([[1, 2]])\n'multilabel-indicator'\n>>> type_of_target(np.array([[1.5, 2.0], [3.0, 1.6]]))\n'continuous-multioutput'\n>>> type_of_target(np.array([[0, 1], [1, 1]]))\n'multilabel-indicator'", + "code": "def type_of_target(y):\n \"\"\"Determine the type of data indicated by the target.\n\n Note that this type is the most specific type that can be inferred.\n For example:\n\n * ``binary`` is more specific but compatible with ``multiclass``.\n * ``multiclass`` of integers is more specific but compatible with\n ``continuous``.\n * ``multilabel-indicator`` is more specific but compatible with\n ``multiclass-multioutput``.\n\n Parameters\n ----------\n y : array-like\n\n Returns\n -------\n target_type : str\n One of:\n\n * 'continuous': `y` is an array-like of floats that are not all\n integers, and is 1d or a column vector.\n * 'continuous-multioutput': `y` is a 2d array of floats that are\n not all integers, and both dimensions are of size > 1.\n * 'binary': `y` contains <= 2 discrete values and is 1d or a column\n vector.\n * 'multiclass': `y` contains more than two discrete values, is not a\n sequence of sequences, and is 1d or a column vector.\n * 'multiclass-multioutput': `y` is a 2d array that contains more\n than two discrete values, is not a sequence of sequences, and both\n dimensions are of size > 1.\n * 'multilabel-indicator': `y` is a label indicator matrix, an array\n of two dimensions with at least two columns, and at most 2 unique\n values.\n * 'unknown': `y` is array-like but none of the above, such as a 3d\n array, sequence of sequences, or an array of non-sequence objects.\n\n Examples\n --------\n >>> import numpy as np\n >>> type_of_target([0.1, 0.6])\n 'continuous'\n >>> type_of_target([1, -1, -1, 1])\n 'binary'\n >>> type_of_target(['a', 'b', 'a'])\n 'binary'\n >>> type_of_target([1.0, 2.0])\n 'binary'\n >>> type_of_target([1, 0, 2])\n 'multiclass'\n >>> type_of_target([1.0, 0.0, 3.0])\n 'multiclass'\n >>> type_of_target(['a', 'b', 'c'])\n 'multiclass'\n >>> type_of_target(np.array([[1, 2], [3, 1]]))\n 'multiclass-multioutput'\n >>> type_of_target([[1, 2]])\n 'multilabel-indicator'\n >>> type_of_target(np.array([[1.5, 2.0], [3.0, 1.6]]))\n 'continuous-multioutput'\n >>> type_of_target(np.array([[0, 1], [1, 1]]))\n 'multilabel-indicator'\n \"\"\"\n valid = ((isinstance(y, (Sequence, spmatrix)) or hasattr(y, '__array__'))\n and not isinstance(y, str))\n\n if not valid:\n raise ValueError('Expected array-like (array or non-string sequence), '\n 'got %r' % y)\n\n sparse_pandas = (y.__class__.__name__ in ['SparseSeries', 'SparseArray'])\n if sparse_pandas:\n raise ValueError(\"y cannot be class 'SparseSeries' or 'SparseArray'\")\n\n if is_multilabel(y):\n return 'multilabel-indicator'\n\n # DeprecationWarning will be replaced by ValueError, see NEP 34\n # https://numpy.org/neps/nep-0034-infer-dtype-is-object.html\n with warnings.catch_warnings():\n warnings.simplefilter('error', np.VisibleDeprecationWarning)\n try:\n y = np.asarray(y)\n except np.VisibleDeprecationWarning:\n # dtype=object should be provided explicitly for ragged arrays,\n # see NEP 34\n y = np.asarray(y, dtype=object)\n\n # The old sequence of sequences format\n try:\n if (not hasattr(y[0], '__array__') and isinstance(y[0], Sequence)\n and not isinstance(y[0], str)):\n raise ValueError('You appear to be using a legacy multi-label data'\n ' representation. Sequence of sequences are no'\n ' longer supported; use a binary array or sparse'\n ' matrix instead - the MultiLabelBinarizer'\n ' transformer can convert to this format.')\n except IndexError:\n pass\n\n # Invalid inputs\n if y.ndim > 2 or (y.dtype == object and len(y) and\n not isinstance(y.flat[0], str)):\n return 'unknown' # [[[1, 2]]] or [obj_1] and not [\"label_1\"]\n\n if y.ndim == 2 and y.shape[1] == 0:\n return 'unknown' # [[]]\n\n if y.ndim == 2 and y.shape[1] > 1:\n suffix = \"-multioutput\" # [[1, 2], [1, 2]]\n else:\n suffix = \"\" # [1, 2, 3] or [[1], [2], [3]]\n\n # check float and contains non-integer float values\n if y.dtype.kind == 'f' and np.any(y != y.astype(int)):\n # [.1, .2, 3] or [[.1, .2, 3]] or [[1., .2]] and not [1., 2., 3.]\n _assert_all_finite(y)\n return 'continuous' + suffix\n\n if (len(np.unique(y)) > 2) or (y.ndim >= 2 and len(y[0]) > 1):\n return 'multiclass' + suffix # [1, 2, 3] or [[1., 2., 3]] or [[1, 2]]\n else:\n return 'binary' # [1, 2] or [[\"a\"], [\"b\"]]" + }, + { + "id": "scikit-learn/sklearn.utils.multiclass/unique_labels", + "name": "unique_labels", + "qname": "sklearn.utils.multiclass.unique_labels", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.multiclass/unique_labels/ys", + "name": "ys", + "qname": "sklearn.utils.multiclass.unique_labels.ys", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": true, + "docstring": { + "type": "array-likes", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-likes" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Extract an ordered array of unique labels.\n\nWe don't allow:\n - mix of multilabel and multiclass (single label) targets\n - mix of label indicator matrix and anything else,\n because there are no explicit labels)\n - mix of label indicator matrices of different sizes\n - mix of string and integer labels\n\nAt the moment, we also don't allow \"multiclass-multioutput\" input type.", + "docstring": "Extract an ordered array of unique labels.\n\nWe don't allow:\n - mix of multilabel and multiclass (single label) targets\n - mix of label indicator matrix and anything else,\n because there are no explicit labels)\n - mix of label indicator matrices of different sizes\n - mix of string and integer labels\n\nAt the moment, we also don't allow \"multiclass-multioutput\" input type.\n\nParameters\n----------\n*ys : array-likes\n\nReturns\n-------\nout : ndarray of shape (n_unique_labels,)\n An ordered array of unique labels.\n\nExamples\n--------\n>>> from sklearn.utils.multiclass import unique_labels\n>>> unique_labels([3, 5, 5, 5, 7, 7])\narray([3, 5, 7])\n>>> unique_labels([1, 2, 3, 4], [2, 2, 3, 4])\narray([1, 2, 3, 4])\n>>> unique_labels([1, 2, 10], [5, 11])\narray([ 1, 2, 5, 10, 11])", + "code": "def unique_labels(*ys):\n \"\"\"Extract an ordered array of unique labels.\n\n We don't allow:\n - mix of multilabel and multiclass (single label) targets\n - mix of label indicator matrix and anything else,\n because there are no explicit labels)\n - mix of label indicator matrices of different sizes\n - mix of string and integer labels\n\n At the moment, we also don't allow \"multiclass-multioutput\" input type.\n\n Parameters\n ----------\n *ys : array-likes\n\n Returns\n -------\n out : ndarray of shape (n_unique_labels,)\n An ordered array of unique labels.\n\n Examples\n --------\n >>> from sklearn.utils.multiclass import unique_labels\n >>> unique_labels([3, 5, 5, 5, 7, 7])\n array([3, 5, 7])\n >>> unique_labels([1, 2, 3, 4], [2, 2, 3, 4])\n array([1, 2, 3, 4])\n >>> unique_labels([1, 2, 10], [5, 11])\n array([ 1, 2, 5, 10, 11])\n \"\"\"\n if not ys:\n raise ValueError('No argument has been passed.')\n # Check that we don't mix label format\n\n ys_types = set(type_of_target(x) for x in ys)\n if ys_types == {\"binary\", \"multiclass\"}:\n ys_types = {\"multiclass\"}\n\n if len(ys_types) > 1:\n raise ValueError(\"Mix type of y not allowed, got types %s\" % ys_types)\n\n label_type = ys_types.pop()\n\n # Check consistency for the indicator format\n if (label_type == \"multilabel-indicator\" and\n len(set(check_array(y,\n accept_sparse=['csr', 'csc', 'coo']).shape[1]\n for y in ys)) > 1):\n raise ValueError(\"Multi-label binary indicator input with \"\n \"different numbers of labels\")\n\n # Get the unique set of labels\n _unique_labels = _FN_UNIQUE_LABELS.get(label_type, None)\n if not _unique_labels:\n raise ValueError(\"Unknown label type: %s\" % repr(ys))\n\n ys_labels = set(chain.from_iterable(_unique_labels(y) for y in ys))\n\n # Check that we don't mix string type with number type\n if (len(set(isinstance(label, str) for label in ys_labels)) > 1):\n raise ValueError(\"Mix of label input types (string and number)\")\n\n return np.array(sorted(ys_labels))" + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_cg", + "name": "_cg", + "qname": "sklearn.utils.optimize._cg", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.optimize/_cg/fhess_p", + "name": "fhess_p", + "qname": "sklearn.utils.optimize._cg.fhess_p", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "", + "description": "Function that takes the gradient as a parameter and returns the\nmatrix product of the Hessian and gradient." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_cg/fgrad", + "name": "fgrad", + "qname": "sklearn.utils.optimize._cg.fgrad", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of shape (n_features,) or (n_features + 1,)", + "default_value": "", + "description": "Gradient vector." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_features,) or (n_features + 1,)" + } + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_cg/maxiter", + "name": "maxiter", + "qname": "sklearn.utils.optimize._cg.maxiter", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of CG iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_cg/tol", + "name": "tol", + "qname": "sklearn.utils.optimize._cg.tol", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "", + "description": "Stopping criterion." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Solve iteratively the linear system 'fhess_p . xsupi = fgrad'\nwith a conjugate gradient descent.", + "docstring": "Solve iteratively the linear system 'fhess_p . xsupi = fgrad'\nwith a conjugate gradient descent.\n\nParameters\n----------\nfhess_p : callable\n Function that takes the gradient as a parameter and returns the\n matrix product of the Hessian and gradient.\n\nfgrad : ndarray of shape (n_features,) or (n_features + 1,)\n Gradient vector.\n\nmaxiter : int\n Number of CG iterations.\n\ntol : float\n Stopping criterion.\n\nReturns\n-------\nxsupi : ndarray of shape (n_features,) or (n_features + 1,)\n Estimated solution.", + "code": "def _cg(fhess_p, fgrad, maxiter, tol):\n \"\"\"\n Solve iteratively the linear system 'fhess_p . xsupi = fgrad'\n with a conjugate gradient descent.\n\n Parameters\n ----------\n fhess_p : callable\n Function that takes the gradient as a parameter and returns the\n matrix product of the Hessian and gradient.\n\n fgrad : ndarray of shape (n_features,) or (n_features + 1,)\n Gradient vector.\n\n maxiter : int\n Number of CG iterations.\n\n tol : float\n Stopping criterion.\n\n Returns\n -------\n xsupi : ndarray of shape (n_features,) or (n_features + 1,)\n Estimated solution.\n \"\"\"\n xsupi = np.zeros(len(fgrad), dtype=fgrad.dtype)\n ri = fgrad\n psupi = -ri\n i = 0\n dri0 = np.dot(ri, ri)\n\n while i <= maxiter:\n if np.sum(np.abs(ri)) <= tol:\n break\n\n Ap = fhess_p(psupi)\n # check curvature\n curv = np.dot(psupi, Ap)\n if 0 <= curv <= 3 * np.finfo(np.float64).eps:\n break\n elif curv < 0:\n if i > 0:\n break\n else:\n # fall back to steepest descent direction\n xsupi += dri0 / curv * psupi\n break\n alphai = dri0 / curv\n xsupi += alphai * psupi\n ri = ri + alphai * Ap\n dri1 = np.dot(ri, ri)\n betai = dri1 / dri0\n psupi = -ri + betai * psupi\n i = i + 1\n dri0 = dri1 # update np.dot(ri,ri) for next time.\n\n return xsupi" + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_check_optimize_result", + "name": "_check_optimize_result", + "qname": "sklearn.utils.optimize._check_optimize_result", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.optimize/_check_optimize_result/solver", + "name": "solver", + "qname": "sklearn.utils.optimize._check_optimize_result.solver", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "", + "description": "Solver name. Currently only `lbfgs` is supported." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_check_optimize_result/result", + "name": "result", + "qname": "sklearn.utils.optimize._check_optimize_result.result", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "OptimizeResult", + "default_value": "", + "description": "Result of the scipy.optimize.minimize function." + }, + "type": { + "kind": "NamedType", + "name": "OptimizeResult" + } + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_check_optimize_result/max_iter", + "name": "max_iter", + "qname": "sklearn.utils.optimize._check_optimize_result.max_iter", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Expected maximum number of iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_check_optimize_result/extra_warning_msg", + "name": "extra_warning_msg", + "qname": "sklearn.utils.optimize._check_optimize_result.extra_warning_msg", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Extra warning message." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check the OptimizeResult for successful convergence", + "docstring": "Check the OptimizeResult for successful convergence\n\nParameters\n----------\nsolver : str\n Solver name. Currently only `lbfgs` is supported.\n\nresult : OptimizeResult\n Result of the scipy.optimize.minimize function.\n\nmax_iter : int, default=None\n Expected maximum number of iterations.\n\nextra_warning_msg : str, default=None\n Extra warning message.\n\nReturns\n-------\nn_iter : int\n Number of iterations.", + "code": "def _check_optimize_result(solver, result, max_iter=None,\n extra_warning_msg=None):\n \"\"\"Check the OptimizeResult for successful convergence\n\n Parameters\n ----------\n solver : str\n Solver name. Currently only `lbfgs` is supported.\n\n result : OptimizeResult\n Result of the scipy.optimize.minimize function.\n\n max_iter : int, default=None\n Expected maximum number of iterations.\n\n extra_warning_msg : str, default=None\n Extra warning message.\n\n Returns\n -------\n n_iter : int\n Number of iterations.\n \"\"\"\n # handle both scipy and scikit-learn solver names\n if solver == \"lbfgs\":\n if result.status != 0:\n try:\n # The message is already decoded in scipy>=1.6.0\n result_message = result.message.decode(\"latin1\")\n except AttributeError:\n result_message = result.message\n warning_msg = (\n \"{} failed to converge (status={}):\\n{}.\\n\\n\"\n \"Increase the number of iterations (max_iter) \"\n \"or scale the data as shown in:\\n\"\n \" https://scikit-learn.org/stable/modules/\"\n \"preprocessing.html\"\n ).format(solver, result.status, result_message)\n if extra_warning_msg is not None:\n warning_msg += \"\\n\" + extra_warning_msg\n warnings.warn(warning_msg, ConvergenceWarning, stacklevel=2)\n if max_iter is not None:\n # In scipy <= 1.0.0, nit may exceed maxiter for lbfgs.\n # See https://github.com/scipy/scipy/issues/7854\n n_iter_i = min(result.nit, max_iter)\n else:\n n_iter_i = result.nit\n else:\n raise NotImplementedError\n\n return n_iter_i" + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_line_search_wolfe12", + "name": "_line_search_wolfe12", + "qname": "sklearn.utils.optimize._line_search_wolfe12", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.optimize/_line_search_wolfe12/f", + "name": "f", + "qname": "sklearn.utils.optimize._line_search_wolfe12.f", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_line_search_wolfe12/fprime", + "name": "fprime", + "qname": "sklearn.utils.optimize._line_search_wolfe12.fprime", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_line_search_wolfe12/xk", + "name": "xk", + "qname": "sklearn.utils.optimize._line_search_wolfe12.xk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_line_search_wolfe12/pk", + "name": "pk", + "qname": "sklearn.utils.optimize._line_search_wolfe12.pk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_line_search_wolfe12/gfk", + "name": "gfk", + "qname": "sklearn.utils.optimize._line_search_wolfe12.gfk", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_line_search_wolfe12/old_fval", + "name": "old_fval", + "qname": "sklearn.utils.optimize._line_search_wolfe12.old_fval", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_line_search_wolfe12/old_old_fval", + "name": "old_old_fval", + "qname": "sklearn.utils.optimize._line_search_wolfe12.old_old_fval", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_line_search_wolfe12/kwargs", + "name": "kwargs", + "qname": "sklearn.utils.optimize._line_search_wolfe12.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Same as line_search_wolfe1, but fall back to line_search_wolfe2 if\nsuitable step length is not found, and raise an exception if a\nsuitable step length is not found.", + "docstring": "Same as line_search_wolfe1, but fall back to line_search_wolfe2 if\nsuitable step length is not found, and raise an exception if a\nsuitable step length is not found.\n\nRaises\n------\n_LineSearchError\n If no suitable step size is found.", + "code": "def _line_search_wolfe12(f, fprime, xk, pk, gfk, old_fval, old_old_fval,\n **kwargs):\n \"\"\"\n Same as line_search_wolfe1, but fall back to line_search_wolfe2 if\n suitable step length is not found, and raise an exception if a\n suitable step length is not found.\n\n Raises\n ------\n _LineSearchError\n If no suitable step size is found.\n\n \"\"\"\n ret = line_search_wolfe1(f, fprime, xk, pk, gfk,\n old_fval, old_old_fval,\n **kwargs)\n\n if ret[0] is None:\n # line search failed: try different one.\n ret = line_search_wolfe2(f, fprime, xk, pk, gfk,\n old_fval, old_old_fval, **kwargs)\n\n if ret[0] is None:\n raise _LineSearchError()\n\n return ret" + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_newton_cg", + "name": "_newton_cg", + "qname": "sklearn.utils.optimize._newton_cg", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.optimize/_newton_cg/grad_hess", + "name": "grad_hess", + "qname": "sklearn.utils.optimize._newton_cg.grad_hess", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "", + "description": "Should return the gradient and a callable returning the matvec product\nof the Hessian." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_newton_cg/func", + "name": "func", + "qname": "sklearn.utils.optimize._newton_cg.func", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "", + "description": "Should return the value of the function." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_newton_cg/grad", + "name": "grad", + "qname": "sklearn.utils.optimize._newton_cg.grad", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "", + "description": "Should return the function value and the gradient. This is used\nby the linesearch functions." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_newton_cg/x0", + "name": "x0", + "qname": "sklearn.utils.optimize._newton_cg.x0", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array of float", + "default_value": "", + "description": "Initial guess." + }, + "type": { + "kind": "NamedType", + "name": "array of float" + } + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_newton_cg/args", + "name": "args", + "qname": "sklearn.utils.optimize._newton_cg.args", + "default_value": "()", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "tuple", + "default_value": "()", + "description": "Arguments passed to func_grad_hess, func and grad." + }, + "type": { + "kind": "NamedType", + "name": "tuple" + } + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_newton_cg/tol", + "name": "tol", + "qname": "sklearn.utils.optimize._newton_cg.tol", + "default_value": "0.0001", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-4", + "description": "Stopping criterion. The iteration will stop when\n``max{|g_i | i = 1, ..., n} <= tol``\nwhere ``g_i`` is the i-th component of the gradient." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_newton_cg/maxiter", + "name": "maxiter", + "qname": "sklearn.utils.optimize._newton_cg.maxiter", + "default_value": "100", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "100", + "description": "Number of Newton iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_newton_cg/maxinner", + "name": "maxinner", + "qname": "sklearn.utils.optimize._newton_cg.maxinner", + "default_value": "200", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "200", + "description": "Number of CG iterations." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_newton_cg/line_search", + "name": "line_search", + "qname": "sklearn.utils.optimize._newton_cg.line_search", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to use a line search or not." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.utils.optimize/_newton_cg/warn", + "name": "warn", + "qname": "sklearn.utils.optimize._newton_cg.warn", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to warn when didn't converge." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Minimization of scalar function of one or more variables using the\nNewton-CG algorithm.", + "docstring": "Minimization of scalar function of one or more variables using the\nNewton-CG algorithm.\n\nParameters\n----------\ngrad_hess : callable\n Should return the gradient and a callable returning the matvec product\n of the Hessian.\n\nfunc : callable\n Should return the value of the function.\n\ngrad : callable\n Should return the function value and the gradient. This is used\n by the linesearch functions.\n\nx0 : array of float\n Initial guess.\n\nargs : tuple, default=()\n Arguments passed to func_grad_hess, func and grad.\n\ntol : float, default=1e-4\n Stopping criterion. The iteration will stop when\n ``max{|g_i | i = 1, ..., n} <= tol``\n where ``g_i`` is the i-th component of the gradient.\n\nmaxiter : int, default=100\n Number of Newton iterations.\n\nmaxinner : int, default=200\n Number of CG iterations.\n\nline_search : bool, default=True\n Whether to use a line search or not.\n\nwarn : bool, default=True\n Whether to warn when didn't converge.\n\nReturns\n-------\nxk : ndarray of float\n Estimated minimum.", + "code": "def _newton_cg(grad_hess, func, grad, x0, args=(), tol=1e-4,\n maxiter=100, maxinner=200, line_search=True, warn=True):\n \"\"\"\n Minimization of scalar function of one or more variables using the\n Newton-CG algorithm.\n\n Parameters\n ----------\n grad_hess : callable\n Should return the gradient and a callable returning the matvec product\n of the Hessian.\n\n func : callable\n Should return the value of the function.\n\n grad : callable\n Should return the function value and the gradient. This is used\n by the linesearch functions.\n\n x0 : array of float\n Initial guess.\n\n args : tuple, default=()\n Arguments passed to func_grad_hess, func and grad.\n\n tol : float, default=1e-4\n Stopping criterion. The iteration will stop when\n ``max{|g_i | i = 1, ..., n} <= tol``\n where ``g_i`` is the i-th component of the gradient.\n\n maxiter : int, default=100\n Number of Newton iterations.\n\n maxinner : int, default=200\n Number of CG iterations.\n\n line_search : bool, default=True\n Whether to use a line search or not.\n\n warn : bool, default=True\n Whether to warn when didn't converge.\n\n Returns\n -------\n xk : ndarray of float\n Estimated minimum.\n \"\"\"\n x0 = np.asarray(x0).flatten()\n xk = x0\n k = 0\n\n if line_search:\n old_fval = func(x0, *args)\n old_old_fval = None\n\n # Outer loop: our Newton iteration\n while k < maxiter:\n # Compute a search direction pk by applying the CG method to\n # del2 f(xk) p = - fgrad f(xk) starting from 0.\n fgrad, fhess_p = grad_hess(xk, *args)\n\n absgrad = np.abs(fgrad)\n if np.max(absgrad) <= tol:\n break\n\n maggrad = np.sum(absgrad)\n eta = min([0.5, np.sqrt(maggrad)])\n termcond = eta * maggrad\n\n # Inner loop: solve the Newton update by conjugate gradient, to\n # avoid inverting the Hessian\n xsupi = _cg(fhess_p, fgrad, maxiter=maxinner, tol=termcond)\n\n alphak = 1.0\n\n if line_search:\n try:\n alphak, fc, gc, old_fval, old_old_fval, gfkp1 = \\\n _line_search_wolfe12(func, grad, xk, xsupi, fgrad,\n old_fval, old_old_fval, args=args)\n except _LineSearchError:\n warnings.warn('Line Search failed')\n break\n\n xk = xk + alphak * xsupi # upcast if necessary\n k += 1\n\n if warn and k >= maxiter:\n warnings.warn(\"newton-cg failed to converge. Increase the \"\n \"number of iterations.\", ConvergenceWarning)\n return xk, k" + }, + { + "id": "scikit-learn/sklearn.utils.random/_random_choice_csc", + "name": "_random_choice_csc", + "qname": "sklearn.utils.random._random_choice_csc", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.random/_random_choice_csc/n_samples", + "name": "n_samples", + "qname": "sklearn.utils.random._random_choice_csc.n_samples", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int,", + "default_value": "", + "description": "Number of samples to draw in each column." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils.random/_random_choice_csc/classes", + "name": "classes", + "qname": "sklearn.utils.random._random_choice_csc.classes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list of size n_outputs of arrays of size (n_classes,)", + "default_value": "", + "description": "List of classes for each column." + }, + "type": { + "kind": "NamedType", + "name": "list of size n_outputs of arrays of size (n_classes,)" + } + }, + { + "id": "scikit-learn/sklearn.utils.random/_random_choice_csc/class_probability", + "name": "class_probability", + "qname": "sklearn.utils.random._random_choice_csc.class_probability", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "list of size n_outputs of arrays of shape (n_classes,)", + "default_value": "None", + "description": "Class distribution of each column. If None, uniform distribution is\nassumed." + }, + "type": { + "kind": "NamedType", + "name": "list of size n_outputs of arrays of shape (n_classes,)" + } + }, + { + "id": "scikit-learn/sklearn.utils.random/_random_choice_csc/random_state", + "name": "random_state", + "qname": "sklearn.utils.random._random_choice_csc.random_state", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Controls the randomness of the sampled classes.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Generate a sparse random matrix given column class distributions", + "docstring": "Generate a sparse random matrix given column class distributions\n\nParameters\n----------\nn_samples : int,\n Number of samples to draw in each column.\n\nclasses : list of size n_outputs of arrays of size (n_classes,)\n List of classes for each column.\n\nclass_probability : list of size n_outputs of arrays of shape (n_classes,), default=None\n Class distribution of each column. If None, uniform distribution is\n assumed.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness of the sampled classes.\n See :term:`Glossary `.\n\nReturns\n-------\nrandom_matrix : sparse csc matrix of size (n_samples, n_outputs)", + "code": "def _random_choice_csc(n_samples, classes, class_probability=None,\n random_state=None):\n \"\"\"Generate a sparse random matrix given column class distributions\n\n Parameters\n ----------\n n_samples : int,\n Number of samples to draw in each column.\n\n classes : list of size n_outputs of arrays of size (n_classes,)\n List of classes for each column.\n\n class_probability : list of size n_outputs of arrays of \\\n shape (n_classes,), default=None\n Class distribution of each column. If None, uniform distribution is\n assumed.\n\n random_state : int, RandomState instance or None, default=None\n Controls the randomness of the sampled classes.\n See :term:`Glossary `.\n\n Returns\n -------\n random_matrix : sparse csc matrix of size (n_samples, n_outputs)\n\n \"\"\"\n data = array.array('i')\n indices = array.array('i')\n indptr = array.array('i', [0])\n\n for j in range(len(classes)):\n classes[j] = np.asarray(classes[j])\n if classes[j].dtype.kind != 'i':\n raise ValueError(\"class dtype %s is not supported\" %\n classes[j].dtype)\n classes[j] = classes[j].astype(np.int64, copy=False)\n\n # use uniform distribution if no class_probability is given\n if class_probability is None:\n class_prob_j = np.empty(shape=classes[j].shape[0])\n class_prob_j.fill(1 / classes[j].shape[0])\n else:\n class_prob_j = np.asarray(class_probability[j])\n\n if not np.isclose(np.sum(class_prob_j), 1.0):\n raise ValueError(\"Probability array at index {0} does not sum to \"\n \"one\".format(j))\n\n if class_prob_j.shape[0] != classes[j].shape[0]:\n raise ValueError(\"classes[{0}] (length {1}) and \"\n \"class_probability[{0}] (length {2}) have \"\n \"different length.\".format(j,\n classes[j].shape[0],\n class_prob_j.shape[0]))\n\n # If 0 is not present in the classes insert it with a probability 0.0\n if 0 not in classes[j]:\n classes[j] = np.insert(classes[j], 0, 0)\n class_prob_j = np.insert(class_prob_j, 0, 0.0)\n\n # If there are nonzero classes choose randomly using class_probability\n rng = check_random_state(random_state)\n if classes[j].shape[0] > 1:\n p_nonzero = 1 - class_prob_j[classes[j] == 0]\n nnz = int(n_samples * p_nonzero)\n ind_sample = sample_without_replacement(n_population=n_samples,\n n_samples=nnz,\n random_state=random_state)\n indices.extend(ind_sample)\n\n # Normalize probabilities for the nonzero elements\n classes_j_nonzero = classes[j] != 0\n class_probability_nz = class_prob_j[classes_j_nonzero]\n class_probability_nz_norm = (class_probability_nz /\n np.sum(class_probability_nz))\n classes_ind = np.searchsorted(class_probability_nz_norm.cumsum(),\n rng.rand(nnz))\n data.extend(classes[j][classes_j_nonzero][classes_ind])\n indptr.append(len(indices))\n\n return sp.csc_matrix((data, indices, indptr),\n (n_samples, len(classes)),\n dtype=int)" + }, + { + "id": "scikit-learn/sklearn.utils.setup/configuration", + "name": "configuration", + "qname": "sklearn.utils.setup.configuration", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.setup/configuration/parent_package", + "name": "parent_package", + "qname": "sklearn.utils.setup.configuration.parent_package", + "default_value": "''", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.setup/configuration/top_path", + "name": "top_path", + "qname": "sklearn.utils.setup.configuration.top_path", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def configuration(parent_package='', top_path=None):\n import numpy\n from numpy.distutils.misc_util import Configuration\n\n config = Configuration('utils', parent_package, top_path)\n\n libraries = []\n if os.name == 'posix':\n libraries.append('m')\n\n config.add_extension('sparsefuncs_fast',\n sources=['sparsefuncs_fast.pyx'],\n libraries=libraries)\n\n config.add_extension('_cython_blas',\n sources=['_cython_blas.pyx'],\n libraries=libraries)\n\n config.add_extension('arrayfuncs',\n sources=['arrayfuncs.pyx'],\n include_dirs=[numpy.get_include()],\n libraries=libraries)\n\n config.add_extension('murmurhash',\n sources=['murmurhash.pyx', join(\n 'src', 'MurmurHash3.cpp')],\n include_dirs=['src'])\n\n config.add_extension('graph_shortest_path',\n sources=['graph_shortest_path.pyx'],\n include_dirs=[numpy.get_include()])\n\n config.add_extension('_fast_dict',\n sources=['_fast_dict.pyx'],\n language=\"c++\",\n include_dirs=[numpy.get_include()],\n libraries=libraries)\n\n config.add_extension('_openmp_helpers',\n sources=['_openmp_helpers.pyx'],\n libraries=libraries)\n\n # generate _seq_dataset from template\n templates = ['sklearn/utils/_seq_dataset.pyx.tp',\n 'sklearn/utils/_seq_dataset.pxd.tp']\n gen_from_templates(templates, top_path)\n\n config.add_extension('_seq_dataset',\n sources=['_seq_dataset.pyx'],\n include_dirs=[numpy.get_include()])\n\n config.add_extension('_weight_vector',\n sources=['_weight_vector.pyx'],\n include_dirs=[numpy.get_include()],\n libraries=libraries)\n\n config.add_extension(\"_random\",\n sources=[\"_random.pyx\"],\n include_dirs=[numpy.get_include()],\n libraries=libraries)\n\n config.add_extension(\"_logistic_sigmoid\",\n sources=[\"_logistic_sigmoid.pyx\"],\n include_dirs=[numpy.get_include()],\n libraries=libraries)\n\n config.add_subpackage('tests')\n\n return config" + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_get_elem_at_rank", + "name": "_get_elem_at_rank", + "qname": "sklearn.utils.sparsefuncs._get_elem_at_rank", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_get_elem_at_rank/rank", + "name": "rank", + "qname": "sklearn.utils.sparsefuncs._get_elem_at_rank.rank", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_get_elem_at_rank/data", + "name": "data", + "qname": "sklearn.utils.sparsefuncs._get_elem_at_rank.data", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_get_elem_at_rank/n_negative", + "name": "n_negative", + "qname": "sklearn.utils.sparsefuncs._get_elem_at_rank.n_negative", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_get_elem_at_rank/n_zeros", + "name": "n_zeros", + "qname": "sklearn.utils.sparsefuncs._get_elem_at_rank.n_zeros", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Find the value in data augmented with n_zeros for the given rank", + "docstring": "Find the value in data augmented with n_zeros for the given rank", + "code": "def _get_elem_at_rank(rank, data, n_negative, n_zeros):\n \"\"\"Find the value in data augmented with n_zeros for the given rank\"\"\"\n if rank < n_negative:\n return data[rank]\n if rank - n_negative < n_zeros:\n return 0\n return data[rank - n_zeros]" + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_get_median", + "name": "_get_median", + "qname": "sklearn.utils.sparsefuncs._get_median", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_get_median/data", + "name": "data", + "qname": "sklearn.utils.sparsefuncs._get_median.data", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_get_median/n_zeros", + "name": "n_zeros", + "qname": "sklearn.utils.sparsefuncs._get_median.n_zeros", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute the median of data with n_zeros additional zeros.\n\nThis function is used to support sparse matrices; it modifies data\nin-place.", + "docstring": "Compute the median of data with n_zeros additional zeros.\n\nThis function is used to support sparse matrices; it modifies data\nin-place.", + "code": "def _get_median(data, n_zeros):\n \"\"\"Compute the median of data with n_zeros additional zeros.\n\n This function is used to support sparse matrices; it modifies data\n in-place.\n \"\"\"\n n_elems = len(data) + n_zeros\n if not n_elems:\n return np.nan\n n_negative = np.count_nonzero(data < 0)\n middle, is_odd = divmod(n_elems, 2)\n data.sort()\n\n if is_odd:\n return _get_elem_at_rank(middle, data, n_negative, n_zeros)\n\n return (_get_elem_at_rank(middle - 1, data, n_negative, n_zeros) +\n _get_elem_at_rank(middle, data, n_negative, n_zeros)) / 2." + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_min_or_max_axis", + "name": "_min_or_max_axis", + "qname": "sklearn.utils.sparsefuncs._min_or_max_axis", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_min_or_max_axis/X", + "name": "X", + "qname": "sklearn.utils.sparsefuncs._min_or_max_axis.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_min_or_max_axis/axis", + "name": "axis", + "qname": "sklearn.utils.sparsefuncs._min_or_max_axis.axis", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_min_or_max_axis/min_or_max", + "name": "min_or_max", + "qname": "sklearn.utils.sparsefuncs._min_or_max_axis.min_or_max", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _min_or_max_axis(X, axis, min_or_max):\n N = X.shape[axis]\n if N == 0:\n raise ValueError(\"zero-size array to reduction operation\")\n M = X.shape[1 - axis]\n mat = X.tocsc() if axis == 0 else X.tocsr()\n mat.sum_duplicates()\n major_index, value = _minor_reduce(mat, min_or_max)\n not_full = np.diff(mat.indptr)[major_index] < N\n value[not_full] = min_or_max(value[not_full], 0)\n mask = value != 0\n major_index = np.compress(mask, major_index)\n value = np.compress(mask, value)\n\n if axis == 0:\n res = sp.coo_matrix((value, (np.zeros(len(value)), major_index)),\n dtype=X.dtype, shape=(1, M))\n else:\n res = sp.coo_matrix((value, (major_index, np.zeros(len(value)))),\n dtype=X.dtype, shape=(M, 1))\n return res.A.ravel()" + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_minor_reduce", + "name": "_minor_reduce", + "qname": "sklearn.utils.sparsefuncs._minor_reduce", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_minor_reduce/X", + "name": "X", + "qname": "sklearn.utils.sparsefuncs._minor_reduce.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_minor_reduce/ufunc", + "name": "ufunc", + "qname": "sklearn.utils.sparsefuncs._minor_reduce.ufunc", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _minor_reduce(X, ufunc):\n major_index = np.flatnonzero(np.diff(X.indptr))\n\n # reduceat tries casts X.indptr to intp, which errors\n # if it is int64 on a 32 bit system.\n # Reinitializing prevents this where possible, see #13737\n X = type(X)((X.data, X.indices, X.indptr), shape=X.shape)\n value = ufunc.reduceat(X.data, X.indptr[major_index])\n return major_index, value" + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_raise_error_wrong_axis", + "name": "_raise_error_wrong_axis", + "qname": "sklearn.utils.sparsefuncs._raise_error_wrong_axis", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_raise_error_wrong_axis/axis", + "name": "axis", + "qname": "sklearn.utils.sparsefuncs._raise_error_wrong_axis.axis", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _raise_error_wrong_axis(axis):\n if axis not in (0, 1):\n raise ValueError(\n \"Unknown axis value: %d. Use 0 for rows, or 1 for columns\" % axis)" + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_raise_typeerror", + "name": "_raise_typeerror", + "qname": "sklearn.utils.sparsefuncs._raise_typeerror", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_raise_typeerror/X", + "name": "X", + "qname": "sklearn.utils.sparsefuncs._raise_typeerror.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Raises a TypeError if X is not a CSR or CSC matrix", + "docstring": "Raises a TypeError if X is not a CSR or CSC matrix", + "code": "def _raise_typeerror(X):\n \"\"\"Raises a TypeError if X is not a CSR or CSC matrix\"\"\"\n input_type = X.format if sp.issparse(X) else type(X)\n err = \"Expected a CSR or CSC sparse matrix, got %s.\" % input_type\n raise TypeError(err)" + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_sparse_min_max", + "name": "_sparse_min_max", + "qname": "sklearn.utils.sparsefuncs._sparse_min_max", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_sparse_min_max/X", + "name": "X", + "qname": "sklearn.utils.sparsefuncs._sparse_min_max.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_sparse_min_max/axis", + "name": "axis", + "qname": "sklearn.utils.sparsefuncs._sparse_min_max.axis", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _sparse_min_max(X, axis):\n return (_sparse_min_or_max(X, axis, np.minimum),\n _sparse_min_or_max(X, axis, np.maximum))" + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_sparse_min_or_max", + "name": "_sparse_min_or_max", + "qname": "sklearn.utils.sparsefuncs._sparse_min_or_max", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_sparse_min_or_max/X", + "name": "X", + "qname": "sklearn.utils.sparsefuncs._sparse_min_or_max.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_sparse_min_or_max/axis", + "name": "axis", + "qname": "sklearn.utils.sparsefuncs._sparse_min_or_max.axis", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_sparse_min_or_max/min_or_max", + "name": "min_or_max", + "qname": "sklearn.utils.sparsefuncs._sparse_min_or_max.min_or_max", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _sparse_min_or_max(X, axis, min_or_max):\n if axis is None:\n if 0 in X.shape:\n raise ValueError(\"zero-size array to reduction operation\")\n zero = X.dtype.type(0)\n if X.nnz == 0:\n return zero\n m = min_or_max.reduce(X.data.ravel())\n if X.nnz != np.product(X.shape):\n m = min_or_max(zero, m)\n return m\n if axis < 0:\n axis += 2\n if (axis == 0) or (axis == 1):\n return _min_or_max_axis(X, axis, min_or_max)\n else:\n raise ValueError(\"invalid axis, use 0 for rows, or 1 for columns\")" + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_sparse_nan_min_max", + "name": "_sparse_nan_min_max", + "qname": "sklearn.utils.sparsefuncs._sparse_nan_min_max", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_sparse_nan_min_max/X", + "name": "X", + "qname": "sklearn.utils.sparsefuncs._sparse_nan_min_max.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/_sparse_nan_min_max/axis", + "name": "axis", + "qname": "sklearn.utils.sparsefuncs._sparse_nan_min_max.axis", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _sparse_nan_min_max(X, axis):\n return(_sparse_min_or_max(X, axis, np.fmin),\n _sparse_min_or_max(X, axis, np.fmax))" + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/count_nonzero", + "name": "count_nonzero", + "qname": "sklearn.utils.sparsefuncs.count_nonzero", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/count_nonzero/X", + "name": "X", + "qname": "sklearn.utils.sparsefuncs.count_nonzero.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "sparse matrix of shape (n_samples, n_labels)", + "default_value": "", + "description": "Input data. It should be of CSR format." + }, + "type": { + "kind": "NamedType", + "name": "sparse matrix of shape (n_samples, n_labels)" + } + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/count_nonzero/axis", + "name": "axis", + "qname": "sklearn.utils.sparsefuncs.count_nonzero.axis", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{0, 1}", + "default_value": "None", + "description": "The axis on which the data is aggregated." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/count_nonzero/sample_weight", + "name": "sample_weight", + "qname": "sklearn.utils.sparsefuncs.count_nonzero.sample_weight", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Weight for each row of X." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "A variant of X.getnnz() with extension to weighting on axis 0\n\nUseful in efficiently calculating multilabel metrics.", + "docstring": "A variant of X.getnnz() with extension to weighting on axis 0\n\nUseful in efficiently calculating multilabel metrics.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_labels)\n Input data. It should be of CSR format.\n\naxis : {0, 1}, default=None\n The axis on which the data is aggregated.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weight for each row of X.", + "code": "def count_nonzero(X, axis=None, sample_weight=None):\n \"\"\"A variant of X.getnnz() with extension to weighting on axis 0\n\n Useful in efficiently calculating multilabel metrics.\n\n Parameters\n ----------\n X : sparse matrix of shape (n_samples, n_labels)\n Input data. It should be of CSR format.\n\n axis : {0, 1}, default=None\n The axis on which the data is aggregated.\n\n sample_weight : array-like of shape (n_samples,), default=None\n Weight for each row of X.\n \"\"\"\n if axis == -1:\n axis = 1\n elif axis == -2:\n axis = 0\n elif X.format != 'csr':\n raise TypeError('Expected CSR sparse format, got {0}'.format(X.format))\n\n # We rely here on the fact that np.diff(Y.indptr) for a CSR\n # will return the number of nonzero entries in each row.\n # A bincount over Y.indices will return the number of nonzeros\n # in each column. See ``csr_matrix.getnnz`` in scipy >= 0.14.\n if axis is None:\n if sample_weight is None:\n return X.nnz\n else:\n return np.dot(np.diff(X.indptr), sample_weight)\n elif axis == 1:\n out = np.diff(X.indptr)\n if sample_weight is None:\n # astype here is for consistency with axis=0 dtype\n return out.astype('intp')\n return out * sample_weight\n elif axis == 0:\n if sample_weight is None:\n return np.bincount(X.indices, minlength=X.shape[1])\n else:\n weights = np.repeat(sample_weight, np.diff(X.indptr))\n return np.bincount(X.indices, minlength=X.shape[1],\n weights=weights)\n else:\n raise ValueError('Unsupported axis: {0}'.format(axis))" + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/csc_median_axis_0", + "name": "csc_median_axis_0", + "qname": "sklearn.utils.sparsefuncs.csc_median_axis_0", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/csc_median_axis_0/X", + "name": "X", + "qname": "sklearn.utils.sparsefuncs.csc_median_axis_0.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "sparse matrix of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data. It should be of CSC format." + }, + "type": { + "kind": "NamedType", + "name": "sparse matrix of shape (n_samples, n_features)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Find the median across axis 0 of a CSC matrix.\nIt is equivalent to doing np.median(X, axis=0).", + "docstring": "Find the median across axis 0 of a CSC matrix.\nIt is equivalent to doing np.median(X, axis=0).\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Input data. It should be of CSC format.\n\nReturns\n-------\nmedian : ndarray of shape (n_features,)\n Median.", + "code": "def csc_median_axis_0(X):\n \"\"\"Find the median across axis 0 of a CSC matrix.\n It is equivalent to doing np.median(X, axis=0).\n\n Parameters\n ----------\n X : sparse matrix of shape (n_samples, n_features)\n Input data. It should be of CSC format.\n\n Returns\n -------\n median : ndarray of shape (n_features,)\n Median.\n\n \"\"\"\n if not isinstance(X, sp.csc_matrix):\n raise TypeError(\"Expected matrix of CSC format, got %s\" % X.format)\n\n indptr = X.indptr\n n_samples, n_features = X.shape\n median = np.zeros(n_features)\n\n for f_ind, (start, end) in enumerate(zip(indptr[:-1], indptr[1:])):\n\n # Prevent modifying X in place\n data = np.copy(X.data[start: end])\n nz = n_samples - data.size\n median[f_ind] = _get_median(data, nz)\n\n return median" + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/incr_mean_variance_axis", + "name": "incr_mean_variance_axis", + "qname": "sklearn.utils.sparsefuncs.incr_mean_variance_axis", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/incr_mean_variance_axis/X", + "name": "X", + "qname": "sklearn.utils.sparsefuncs.incr_mean_variance_axis.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "CSR or CSC sparse matrix of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "CSR" + }, + { + "kind": "NamedType", + "name": "CSC sparse matrix of shape (n_samples, n_features)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/incr_mean_variance_axis/axis", + "name": "axis", + "qname": "sklearn.utils.sparsefuncs.incr_mean_variance_axis.axis", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{0, 1}", + "default_value": "", + "description": "Axis along which the axis should be computed." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/incr_mean_variance_axis/last_mean", + "name": "last_mean", + "qname": "sklearn.utils.sparsefuncs.incr_mean_variance_axis.last_mean", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_features,) or (n_samples,), dtype=floating", + "default_value": "", + "description": "Array of means to update with the new data X.\nShould be of shape (n_features,) if axis=0 or (n_samples,) if axis=1." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray of shape (n_features,) or (n_samples,)" + }, + { + "kind": "NamedType", + "name": "dtype=floating" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/incr_mean_variance_axis/last_var", + "name": "last_var", + "qname": "sklearn.utils.sparsefuncs.incr_mean_variance_axis.last_var", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_features,) or (n_samples,), dtype=floating", + "default_value": "", + "description": "Array of variances to update with the new data X.\nShould be of shape (n_features,) if axis=0 or (n_samples,) if axis=1." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "ndarray of shape (n_features,) or (n_samples,)" + }, + { + "kind": "NamedType", + "name": "dtype=floating" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/incr_mean_variance_axis/last_n", + "name": "last_n", + "qname": "sklearn.utils.sparsefuncs.incr_mean_variance_axis.last_n", + "default_value": null, + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float or ndarray of shape (n_features,) or (n_samples,), dtype=floating", + "default_value": "", + "description": "Sum of the weights seen so far, excluding the current weights\nIf not float, it should be of shape (n_samples,) if\naxis=0 or (n_features,) if axis=1. If float it corresponds to\nhaving same weights for all samples (or features)." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "ndarray of shape (n_features,) or (n_samples,)" + }, + { + "kind": "NamedType", + "name": "dtype=floating" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/incr_mean_variance_axis/weights", + "name": "weights", + "qname": "sklearn.utils.sparsefuncs.incr_mean_variance_axis.weights", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples,) or (n_features,)", + "default_value": "None", + "description": "If axis is set to 0 shape is (n_samples,) or\nif axis is set to 1 shape is (n_features,).\nIf it is set to None, then samples are equally weighted.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,) or (n_features,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Compute incremental mean and variance along an axis on a CSR or\nCSC matrix.\n\nlast_mean, last_var are the statistics computed at the last step by this\nfunction. Both must be initialized to 0-arrays of the proper size, i.e.\nthe number of features in X. last_n is the number of samples encountered\nuntil now.", + "docstring": "Compute incremental mean and variance along an axis on a CSR or\nCSC matrix.\n\nlast_mean, last_var are the statistics computed at the last step by this\nfunction. Both must be initialized to 0-arrays of the proper size, i.e.\nthe number of features in X. last_n is the number of samples encountered\nuntil now.\n\nParameters\n----------\nX : CSR or CSC sparse matrix of shape (n_samples, n_features)\n Input data.\n\naxis : {0, 1}\n Axis along which the axis should be computed.\n\nlast_mean : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n Array of means to update with the new data X.\n Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.\n\nlast_var : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n Array of variances to update with the new data X.\n Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.\n\nlast_n : float or ndarray of shape (n_features,) or (n_samples,), dtype=floating\n Sum of the weights seen so far, excluding the current weights\n If not float, it should be of shape (n_samples,) if\n axis=0 or (n_features,) if axis=1. If float it corresponds to\n having same weights for all samples (or features).\n\nweights : ndarray of shape (n_samples,) or (n_features,), default=None\n If axis is set to 0 shape is (n_samples,) or\n if axis is set to 1 shape is (n_features,).\n If it is set to None, then samples are equally weighted.\n\n .. versionadded:: 0.24\n\nReturns\n-------\nmeans : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n Updated feature-wise means if axis = 0 or\n sample-wise means if axis = 1.\n\nvariances : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n Updated feature-wise variances if axis = 0 or\n sample-wise variances if axis = 1.\n\nn : ndarray of shape (n_features,) or (n_samples,), dtype=integral\n Updated number of seen samples per feature if axis=0\n or number of seen features per sample if axis=1.\n\n If weights is not None, n is a sum of the weights of the seen\n samples or features instead of the actual number of seen\n samples or features.\n\nNotes\n-----\nNaNs are ignored in the algorithm.", + "code": "@_deprecate_positional_args\ndef incr_mean_variance_axis(X, *, axis, last_mean, last_var, last_n,\n weights=None):\n \"\"\"Compute incremental mean and variance along an axis on a CSR or\n CSC matrix.\n\n last_mean, last_var are the statistics computed at the last step by this\n function. Both must be initialized to 0-arrays of the proper size, i.e.\n the number of features in X. last_n is the number of samples encountered\n until now.\n\n Parameters\n ----------\n X : CSR or CSC sparse matrix of shape (n_samples, n_features)\n Input data.\n\n axis : {0, 1}\n Axis along which the axis should be computed.\n\n last_mean : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n Array of means to update with the new data X.\n Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.\n\n last_var : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n Array of variances to update with the new data X.\n Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.\n\n last_n : float or ndarray of shape (n_features,) or (n_samples,), \\\n dtype=floating\n Sum of the weights seen so far, excluding the current weights\n If not float, it should be of shape (n_samples,) if\n axis=0 or (n_features,) if axis=1. If float it corresponds to\n having same weights for all samples (or features).\n\n weights : ndarray of shape (n_samples,) or (n_features,), default=None\n If axis is set to 0 shape is (n_samples,) or\n if axis is set to 1 shape is (n_features,).\n If it is set to None, then samples are equally weighted.\n\n .. versionadded:: 0.24\n\n Returns\n -------\n means : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n Updated feature-wise means if axis = 0 or\n sample-wise means if axis = 1.\n\n variances : ndarray of shape (n_features,) or (n_samples,), dtype=floating\n Updated feature-wise variances if axis = 0 or\n sample-wise variances if axis = 1.\n\n n : ndarray of shape (n_features,) or (n_samples,), dtype=integral\n Updated number of seen samples per feature if axis=0\n or number of seen features per sample if axis=1.\n\n If weights is not None, n is a sum of the weights of the seen\n samples or features instead of the actual number of seen\n samples or features.\n\n Notes\n -----\n NaNs are ignored in the algorithm.\n \"\"\"\n _raise_error_wrong_axis(axis)\n\n if not isinstance(X, (sp.csr_matrix, sp.csc_matrix)):\n _raise_typeerror(X)\n\n if np.size(last_n) == 1:\n last_n = np.full(last_mean.shape, last_n, dtype=last_mean.dtype)\n\n if not (np.size(last_mean) == np.size(last_var) == np.size(last_n)):\n raise ValueError(\n \"last_mean, last_var, last_n do not have the same shapes.\"\n )\n\n if axis == 1:\n if np.size(last_mean) != X.shape[0]:\n raise ValueError(\n f\"If axis=1, then last_mean, last_n, last_var should be of \"\n f\"size n_samples {X.shape[0]} (Got {np.size(last_mean)}).\"\n )\n else: # axis == 0\n if np.size(last_mean) != X.shape[1]:\n raise ValueError(\n f\"If axis=0, then last_mean, last_n, last_var should be of \"\n f\"size n_features {X.shape[1]} (Got {np.size(last_mean)}).\"\n )\n\n X = X.T if axis == 1 else X\n\n if weights is not None:\n weights = _check_sample_weight(weights, X, dtype=X.dtype)\n\n return _incr_mean_var_axis0(X, last_mean=last_mean,\n last_var=last_var, last_n=last_n,\n weights=weights)" + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_column_scale", + "name": "inplace_column_scale", + "qname": "sklearn.utils.sparsefuncs.inplace_column_scale", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_column_scale/X", + "name": "X", + "qname": "sklearn.utils.sparsefuncs.inplace_column_scale.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "sparse matrix of shape (n_samples, n_features)", + "default_value": "", + "description": "Matrix to normalize using the variance of the features. It should be\nof CSC or CSR format." + }, + "type": { + "kind": "NamedType", + "name": "sparse matrix of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_column_scale/scale", + "name": "scale", + "qname": "sklearn.utils.sparsefuncs.inplace_column_scale.scale", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_features,), dtype={np.float32, np.float64}", + "default_value": "", + "description": "Array of precomputed feature-wise values to use for scaling." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "ndarray of shape (n_features,)" + }, + { + "kind": "NamedType", + "name": "dtype=" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Inplace column scaling of a CSC/CSR matrix.\n\nScale each feature of the data matrix by multiplying with specific scale\nprovided by the caller assuming a (n_samples, n_features) shape.", + "docstring": "Inplace column scaling of a CSC/CSR matrix.\n\nScale each feature of the data matrix by multiplying with specific scale\nprovided by the caller assuming a (n_samples, n_features) shape.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Matrix to normalize using the variance of the features. It should be\n of CSC or CSR format.\n\nscale : ndarray of shape (n_features,), dtype={np.float32, np.float64}\n Array of precomputed feature-wise values to use for scaling.", + "code": "def inplace_column_scale(X, scale):\n \"\"\"Inplace column scaling of a CSC/CSR matrix.\n\n Scale each feature of the data matrix by multiplying with specific scale\n provided by the caller assuming a (n_samples, n_features) shape.\n\n Parameters\n ----------\n X : sparse matrix of shape (n_samples, n_features)\n Matrix to normalize using the variance of the features. It should be\n of CSC or CSR format.\n\n scale : ndarray of shape (n_features,), dtype={np.float32, np.float64}\n Array of precomputed feature-wise values to use for scaling.\n \"\"\"\n if isinstance(X, sp.csc_matrix):\n inplace_csr_row_scale(X.T, scale)\n elif isinstance(X, sp.csr_matrix):\n inplace_csr_column_scale(X, scale)\n else:\n _raise_typeerror(X)" + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_csr_column_scale", + "name": "inplace_csr_column_scale", + "qname": "sklearn.utils.sparsefuncs.inplace_csr_column_scale", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_csr_column_scale/X", + "name": "X", + "qname": "sklearn.utils.sparsefuncs.inplace_csr_column_scale.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "sparse matrix of shape (n_samples, n_features)", + "default_value": "", + "description": "Matrix to normalize using the variance of the features.\nIt should be of CSR format." + }, + "type": { + "kind": "NamedType", + "name": "sparse matrix of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_csr_column_scale/scale", + "name": "scale", + "qname": "sklearn.utils.sparsefuncs.inplace_csr_column_scale.scale", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_features,), dtype={np.float32, np.float64}", + "default_value": "", + "description": "Array of precomputed feature-wise values to use for scaling." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "ndarray of shape (n_features,)" + }, + { + "kind": "NamedType", + "name": "dtype=" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Inplace column scaling of a CSR matrix.\n\nScale each feature of the data matrix by multiplying with specific scale\nprovided by the caller assuming a (n_samples, n_features) shape.", + "docstring": "Inplace column scaling of a CSR matrix.\n\nScale each feature of the data matrix by multiplying with specific scale\nprovided by the caller assuming a (n_samples, n_features) shape.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Matrix to normalize using the variance of the features.\n It should be of CSR format.\n\nscale : ndarray of shape (n_features,), dtype={np.float32, np.float64}\n Array of precomputed feature-wise values to use for scaling.", + "code": "def inplace_csr_column_scale(X, scale):\n \"\"\"Inplace column scaling of a CSR matrix.\n\n Scale each feature of the data matrix by multiplying with specific scale\n provided by the caller assuming a (n_samples, n_features) shape.\n\n Parameters\n ----------\n X : sparse matrix of shape (n_samples, n_features)\n Matrix to normalize using the variance of the features.\n It should be of CSR format.\n\n scale : ndarray of shape (n_features,), dtype={np.float32, np.float64}\n Array of precomputed feature-wise values to use for scaling.\n \"\"\"\n assert scale.shape[0] == X.shape[1]\n X.data *= scale.take(X.indices, mode='clip')" + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_csr_row_scale", + "name": "inplace_csr_row_scale", + "qname": "sklearn.utils.sparsefuncs.inplace_csr_row_scale", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_csr_row_scale/X", + "name": "X", + "qname": "sklearn.utils.sparsefuncs.inplace_csr_row_scale.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "sparse matrix of shape (n_samples, n_features)", + "default_value": "", + "description": "Matrix to be scaled. It should be of CSR format." + }, + "type": { + "kind": "NamedType", + "name": "sparse matrix of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_csr_row_scale/scale", + "name": "scale", + "qname": "sklearn.utils.sparsefuncs.inplace_csr_row_scale.scale", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of float of shape (n_samples,)", + "default_value": "", + "description": "Array of precomputed sample-wise values to use for scaling." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of float of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Inplace row scaling of a CSR matrix.\n\nScale each sample of the data matrix by multiplying with specific scale\nprovided by the caller assuming a (n_samples, n_features) shape.", + "docstring": "Inplace row scaling of a CSR matrix.\n\nScale each sample of the data matrix by multiplying with specific scale\nprovided by the caller assuming a (n_samples, n_features) shape.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Matrix to be scaled. It should be of CSR format.\n\nscale : ndarray of float of shape (n_samples,)\n Array of precomputed sample-wise values to use for scaling.", + "code": "def inplace_csr_row_scale(X, scale):\n \"\"\" Inplace row scaling of a CSR matrix.\n\n Scale each sample of the data matrix by multiplying with specific scale\n provided by the caller assuming a (n_samples, n_features) shape.\n\n Parameters\n ----------\n X : sparse matrix of shape (n_samples, n_features)\n Matrix to be scaled. It should be of CSR format.\n\n scale : ndarray of float of shape (n_samples,)\n Array of precomputed sample-wise values to use for scaling.\n \"\"\"\n assert scale.shape[0] == X.shape[0]\n X.data *= np.repeat(scale, np.diff(X.indptr))" + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_row_scale", + "name": "inplace_row_scale", + "qname": "sklearn.utils.sparsefuncs.inplace_row_scale", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_row_scale/X", + "name": "X", + "qname": "sklearn.utils.sparsefuncs.inplace_row_scale.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "sparse matrix of shape (n_samples, n_features)", + "default_value": "", + "description": "Matrix to be scaled. It should be of CSR or CSC format." + }, + "type": { + "kind": "NamedType", + "name": "sparse matrix of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_row_scale/scale", + "name": "scale", + "qname": "sklearn.utils.sparsefuncs.inplace_row_scale.scale", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_features,), dtype={np.float32, np.float64}", + "default_value": "", + "description": "Array of precomputed sample-wise values to use for scaling." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "ndarray of shape (n_features,)" + }, + { + "kind": "NamedType", + "name": "dtype=" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Inplace row scaling of a CSR or CSC matrix.\n\nScale each row of the data matrix by multiplying with specific scale\nprovided by the caller assuming a (n_samples, n_features) shape.", + "docstring": "Inplace row scaling of a CSR or CSC matrix.\n\nScale each row of the data matrix by multiplying with specific scale\nprovided by the caller assuming a (n_samples, n_features) shape.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Matrix to be scaled. It should be of CSR or CSC format.\n\nscale : ndarray of shape (n_features,), dtype={np.float32, np.float64}\n Array of precomputed sample-wise values to use for scaling.", + "code": "def inplace_row_scale(X, scale):\n \"\"\" Inplace row scaling of a CSR or CSC matrix.\n\n Scale each row of the data matrix by multiplying with specific scale\n provided by the caller assuming a (n_samples, n_features) shape.\n\n Parameters\n ----------\n X : sparse matrix of shape (n_samples, n_features)\n Matrix to be scaled. It should be of CSR or CSC format.\n\n scale : ndarray of shape (n_features,), dtype={np.float32, np.float64}\n Array of precomputed sample-wise values to use for scaling.\n \"\"\"\n if isinstance(X, sp.csc_matrix):\n inplace_csr_column_scale(X.T, scale)\n elif isinstance(X, sp.csr_matrix):\n inplace_csr_row_scale(X, scale)\n else:\n _raise_typeerror(X)" + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_swap_column", + "name": "inplace_swap_column", + "qname": "sklearn.utils.sparsefuncs.inplace_swap_column", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_swap_column/X", + "name": "X", + "qname": "sklearn.utils.sparsefuncs.inplace_swap_column.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "sparse matrix of shape (n_samples, n_features)", + "default_value": "", + "description": "Matrix whose two columns are to be swapped. It should be of\nCSR or CSC format." + }, + "type": { + "kind": "NamedType", + "name": "sparse matrix of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_swap_column/m", + "name": "m", + "qname": "sklearn.utils.sparsefuncs.inplace_swap_column.m", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "", + "description": "Index of the column of X to be swapped." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_swap_column/n", + "name": "n", + "qname": "sklearn.utils.sparsefuncs.inplace_swap_column.n", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "", + "description": "Index of the column of X to be swapped." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Swaps two columns of a CSC/CSR matrix in-place.", + "docstring": "Swaps two columns of a CSC/CSR matrix in-place.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Matrix whose two columns are to be swapped. It should be of\n CSR or CSC format.\n\nm : int\n Index of the column of X to be swapped.\n\nn : int\n Index of the column of X to be swapped.", + "code": "def inplace_swap_column(X, m, n):\n \"\"\"\n Swaps two columns of a CSC/CSR matrix in-place.\n\n Parameters\n ----------\n X : sparse matrix of shape (n_samples, n_features)\n Matrix whose two columns are to be swapped. It should be of\n CSR or CSC format.\n\n m : int\n Index of the column of X to be swapped.\n\n n : int\n Index of the column of X to be swapped.\n \"\"\"\n if m < 0:\n m += X.shape[1]\n if n < 0:\n n += X.shape[1]\n if isinstance(X, sp.csc_matrix):\n inplace_swap_row_csr(X, m, n)\n elif isinstance(X, sp.csr_matrix):\n inplace_swap_row_csc(X, m, n)\n else:\n _raise_typeerror(X)" + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_swap_row", + "name": "inplace_swap_row", + "qname": "sklearn.utils.sparsefuncs.inplace_swap_row", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_swap_row/X", + "name": "X", + "qname": "sklearn.utils.sparsefuncs.inplace_swap_row.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "sparse matrix of shape (n_samples, n_features)", + "default_value": "", + "description": "Matrix whose two rows are to be swapped. It should be of CSR or\nCSC format." + }, + "type": { + "kind": "NamedType", + "name": "sparse matrix of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_swap_row/m", + "name": "m", + "qname": "sklearn.utils.sparsefuncs.inplace_swap_row.m", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "", + "description": "Index of the row of X to be swapped." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_swap_row/n", + "name": "n", + "qname": "sklearn.utils.sparsefuncs.inplace_swap_row.n", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "", + "description": "Index of the row of X to be swapped." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Swaps two rows of a CSC/CSR matrix in-place.", + "docstring": "Swaps two rows of a CSC/CSR matrix in-place.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Matrix whose two rows are to be swapped. It should be of CSR or\n CSC format.\n\nm : int\n Index of the row of X to be swapped.\n\nn : int\n Index of the row of X to be swapped.", + "code": "def inplace_swap_row(X, m, n):\n \"\"\"\n Swaps two rows of a CSC/CSR matrix in-place.\n\n Parameters\n ----------\n X : sparse matrix of shape (n_samples, n_features)\n Matrix whose two rows are to be swapped. It should be of CSR or\n CSC format.\n\n m : int\n Index of the row of X to be swapped.\n\n n : int\n Index of the row of X to be swapped.\n \"\"\"\n if isinstance(X, sp.csc_matrix):\n inplace_swap_row_csc(X, m, n)\n elif isinstance(X, sp.csr_matrix):\n inplace_swap_row_csr(X, m, n)\n else:\n _raise_typeerror(X)" + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_swap_row_csc", + "name": "inplace_swap_row_csc", + "qname": "sklearn.utils.sparsefuncs.inplace_swap_row_csc", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_swap_row_csc/X", + "name": "X", + "qname": "sklearn.utils.sparsefuncs.inplace_swap_row_csc.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "sparse matrix of shape (n_samples, n_features)", + "default_value": "", + "description": "Matrix whose two rows are to be swapped. It should be of\nCSC format." + }, + "type": { + "kind": "NamedType", + "name": "sparse matrix of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_swap_row_csc/m", + "name": "m", + "qname": "sklearn.utils.sparsefuncs.inplace_swap_row_csc.m", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "", + "description": "Index of the row of X to be swapped." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_swap_row_csc/n", + "name": "n", + "qname": "sklearn.utils.sparsefuncs.inplace_swap_row_csc.n", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "", + "description": "Index of the row of X to be swapped." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Swaps two rows of a CSC matrix in-place.", + "docstring": "Swaps two rows of a CSC matrix in-place.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Matrix whose two rows are to be swapped. It should be of\n CSC format.\n\nm : int\n Index of the row of X to be swapped.\n\nn : int\n Index of the row of X to be swapped.", + "code": "def inplace_swap_row_csc(X, m, n):\n \"\"\"\n Swaps two rows of a CSC matrix in-place.\n\n Parameters\n ----------\n X : sparse matrix of shape (n_samples, n_features)\n Matrix whose two rows are to be swapped. It should be of\n CSC format.\n\n m : int\n Index of the row of X to be swapped.\n\n n : int\n Index of the row of X to be swapped.\n \"\"\"\n for t in [m, n]:\n if isinstance(t, np.ndarray):\n raise TypeError(\"m and n should be valid integers\")\n\n if m < 0:\n m += X.shape[0]\n if n < 0:\n n += X.shape[0]\n\n m_mask = X.indices == m\n X.indices[X.indices == n] = m\n X.indices[m_mask] = n" + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_swap_row_csr", + "name": "inplace_swap_row_csr", + "qname": "sklearn.utils.sparsefuncs.inplace_swap_row_csr", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_swap_row_csr/X", + "name": "X", + "qname": "sklearn.utils.sparsefuncs.inplace_swap_row_csr.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "sparse matrix of shape (n_samples, n_features)", + "default_value": "", + "description": "Matrix whose two rows are to be swapped. It should be of\nCSR format." + }, + "type": { + "kind": "NamedType", + "name": "sparse matrix of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_swap_row_csr/m", + "name": "m", + "qname": "sklearn.utils.sparsefuncs.inplace_swap_row_csr.m", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "", + "description": "Index of the row of X to be swapped." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/inplace_swap_row_csr/n", + "name": "n", + "qname": "sklearn.utils.sparsefuncs.inplace_swap_row_csr.n", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "", + "description": "Index of the row of X to be swapped." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Swaps two rows of a CSR matrix in-place.", + "docstring": "Swaps two rows of a CSR matrix in-place.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Matrix whose two rows are to be swapped. It should be of\n CSR format.\n\nm : int\n Index of the row of X to be swapped.\n\nn : int\n Index of the row of X to be swapped.", + "code": "def inplace_swap_row_csr(X, m, n):\n \"\"\"\n Swaps two rows of a CSR matrix in-place.\n\n Parameters\n ----------\n X : sparse matrix of shape (n_samples, n_features)\n Matrix whose two rows are to be swapped. It should be of\n CSR format.\n\n m : int\n Index of the row of X to be swapped.\n\n n : int\n Index of the row of X to be swapped.\n \"\"\"\n for t in [m, n]:\n if isinstance(t, np.ndarray):\n raise TypeError(\"m and n should be valid integers\")\n\n if m < 0:\n m += X.shape[0]\n if n < 0:\n n += X.shape[0]\n\n # The following swapping makes life easier since m is assumed to be the\n # smaller integer below.\n if m > n:\n m, n = n, m\n\n indptr = X.indptr\n m_start = indptr[m]\n m_stop = indptr[m + 1]\n n_start = indptr[n]\n n_stop = indptr[n + 1]\n nz_m = m_stop - m_start\n nz_n = n_stop - n_start\n\n if nz_m != nz_n:\n # Modify indptr first\n X.indptr[m + 2:n] += nz_n - nz_m\n X.indptr[m + 1] = m_start + nz_n\n X.indptr[n] = n_stop - nz_m\n\n X.indices = np.concatenate([X.indices[:m_start],\n X.indices[n_start:n_stop],\n X.indices[m_stop:n_start],\n X.indices[m_start:m_stop],\n X.indices[n_stop:]])\n X.data = np.concatenate([X.data[:m_start],\n X.data[n_start:n_stop],\n X.data[m_stop:n_start],\n X.data[m_start:m_stop],\n X.data[n_stop:]])" + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/mean_variance_axis", + "name": "mean_variance_axis", + "qname": "sklearn.utils.sparsefuncs.mean_variance_axis", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/mean_variance_axis/X", + "name": "X", + "qname": "sklearn.utils.sparsefuncs.mean_variance_axis.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "sparse matrix of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data. It can be of CSR or CSC format." + }, + "type": { + "kind": "NamedType", + "name": "sparse matrix of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/mean_variance_axis/axis", + "name": "axis", + "qname": "sklearn.utils.sparsefuncs.mean_variance_axis.axis", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{0, 1}", + "default_value": "", + "description": "Axis along which the axis should be computed." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/mean_variance_axis/weights", + "name": "weights", + "qname": "sklearn.utils.sparsefuncs.mean_variance_axis.weights", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray of shape (n_samples,) or (n_features,)", + "default_value": "None", + "description": "if axis is set to 0 shape is (n_samples,) or\nif axis is set to 1 shape is (n_features,).\nIf it is set to None, then samples are equally weighted.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "ndarray of shape (n_samples,) or (n_features,)" + } + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/mean_variance_axis/return_sum_weights", + "name": "return_sum_weights", + "qname": "sklearn.utils.sparsefuncs.mean_variance_axis.return_sum_weights", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, returns the sum of weights seen for each feature\nif `axis=0` or each sample if `axis=1`.\n\n.. versionadded:: 0.24" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Compute mean and variance along an axis on a CSR or CSC matrix.", + "docstring": "Compute mean and variance along an axis on a CSR or CSC matrix.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Input data. It can be of CSR or CSC format.\n\naxis : {0, 1}\n Axis along which the axis should be computed.\n\nweights : ndarray of shape (n_samples,) or (n_features,), default=None\n if axis is set to 0 shape is (n_samples,) or\n if axis is set to 1 shape is (n_features,).\n If it is set to None, then samples are equally weighted.\n\n .. versionadded:: 0.24\n\nreturn_sum_weights : bool, default=False\n If True, returns the sum of weights seen for each feature\n if `axis=0` or each sample if `axis=1`.\n\n .. versionadded:: 0.24\n\nReturns\n-------\n\nmeans : ndarray of shape (n_features,), dtype=floating\n Feature-wise means.\n\nvariances : ndarray of shape (n_features,), dtype=floating\n Feature-wise variances.\n\nsum_weights : ndarray of shape (n_features,), dtype=floating\n Returned if `return_sum_weights` is `True`.", + "code": "def mean_variance_axis(X, axis, weights=None, return_sum_weights=False):\n \"\"\"Compute mean and variance along an axis on a CSR or CSC matrix.\n\n Parameters\n ----------\n X : sparse matrix of shape (n_samples, n_features)\n Input data. It can be of CSR or CSC format.\n\n axis : {0, 1}\n Axis along which the axis should be computed.\n\n weights : ndarray of shape (n_samples,) or (n_features,), default=None\n if axis is set to 0 shape is (n_samples,) or\n if axis is set to 1 shape is (n_features,).\n If it is set to None, then samples are equally weighted.\n\n .. versionadded:: 0.24\n\n return_sum_weights : bool, default=False\n If True, returns the sum of weights seen for each feature\n if `axis=0` or each sample if `axis=1`.\n\n .. versionadded:: 0.24\n\n Returns\n -------\n\n means : ndarray of shape (n_features,), dtype=floating\n Feature-wise means.\n\n variances : ndarray of shape (n_features,), dtype=floating\n Feature-wise variances.\n\n sum_weights : ndarray of shape (n_features,), dtype=floating\n Returned if `return_sum_weights` is `True`.\n \"\"\"\n _raise_error_wrong_axis(axis)\n\n if isinstance(X, sp.csr_matrix):\n if axis == 0:\n return _csr_mean_var_axis0(\n X, weights=weights, return_sum_weights=return_sum_weights)\n else:\n return _csc_mean_var_axis0(\n X.T, weights=weights, return_sum_weights=return_sum_weights)\n elif isinstance(X, sp.csc_matrix):\n if axis == 0:\n return _csc_mean_var_axis0(\n X, weights=weights, return_sum_weights=return_sum_weights)\n else:\n return _csr_mean_var_axis0(\n X.T, weights=weights, return_sum_weights=return_sum_weights)\n else:\n _raise_typeerror(X)" + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/min_max_axis", + "name": "min_max_axis", + "qname": "sklearn.utils.sparsefuncs.min_max_axis", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/min_max_axis/X", + "name": "X", + "qname": "sklearn.utils.sparsefuncs.min_max_axis.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "sparse matrix of shape (n_samples, n_features)", + "default_value": "", + "description": "Input data. It should be of CSR or CSC format." + }, + "type": { + "kind": "NamedType", + "name": "sparse matrix of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/min_max_axis/axis", + "name": "axis", + "qname": "sklearn.utils.sparsefuncs.min_max_axis.axis", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{0, 1}", + "default_value": "", + "description": "Axis along which the axis should be computed." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.utils.sparsefuncs/min_max_axis/ignore_nan", + "name": "ignore_nan", + "qname": "sklearn.utils.sparsefuncs.min_max_axis.ignore_nan", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Ignore or passing through NaN values.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Compute minimum and maximum along an axis on a CSR or CSC matrix and\noptionally ignore NaN values.", + "docstring": "Compute minimum and maximum along an axis on a CSR or CSC matrix and\noptionally ignore NaN values.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n Input data. It should be of CSR or CSC format.\n\naxis : {0, 1}\n Axis along which the axis should be computed.\n\nignore_nan : bool, default=False\n Ignore or passing through NaN values.\n\n .. versionadded:: 0.20\n\nReturns\n-------\n\nmins : ndarray of shape (n_features,), dtype={np.float32, np.float64}\n Feature-wise minima.\n\nmaxs : ndarray of shape (n_features,), dtype={np.float32, np.float64}\n Feature-wise maxima.", + "code": "def min_max_axis(X, axis, ignore_nan=False):\n \"\"\"Compute minimum and maximum along an axis on a CSR or CSC matrix and\n optionally ignore NaN values.\n\n Parameters\n ----------\n X : sparse matrix of shape (n_samples, n_features)\n Input data. It should be of CSR or CSC format.\n\n axis : {0, 1}\n Axis along which the axis should be computed.\n\n ignore_nan : bool, default=False\n Ignore or passing through NaN values.\n\n .. versionadded:: 0.20\n\n Returns\n -------\n\n mins : ndarray of shape (n_features,), dtype={np.float32, np.float64}\n Feature-wise minima.\n\n maxs : ndarray of shape (n_features,), dtype={np.float32, np.float64}\n Feature-wise maxima.\n \"\"\"\n if isinstance(X, sp.csr_matrix) or isinstance(X, sp.csc_matrix):\n if ignore_nan:\n return _sparse_nan_min_max(X, axis=axis)\n else:\n return _sparse_min_max(X, axis=axis)\n else:\n _raise_typeerror(X)" + }, + { + "id": "scikit-learn/sklearn.utils.stats/_weighted_percentile", + "name": "_weighted_percentile", + "qname": "sklearn.utils.stats._weighted_percentile", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.stats/_weighted_percentile/array", + "name": "array", + "qname": "sklearn.utils.stats._weighted_percentile.array", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "1D or 2D array", + "default_value": "", + "description": "Values to take the weighted percentile of." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "1D" + }, + { + "kind": "NamedType", + "name": "2D array" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.stats/_weighted_percentile/sample_weight", + "name": "sample_weight", + "qname": "sklearn.utils.stats._weighted_percentile.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.stats/_weighted_percentile/percentile", + "name": "percentile", + "qname": "sklearn.utils.stats._weighted_percentile.percentile", + "default_value": "50", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Compute weighted percentile\n\nComputes lower weighted percentile. If `array` is a 2D array, the\n`percentile` is computed along the axis 0.\n\n .. versionchanged:: 0.24\n Accepts 2D `array`.", + "docstring": "Compute weighted percentile\n\nComputes lower weighted percentile. If `array` is a 2D array, the\n`percentile` is computed along the axis 0.\n\n .. versionchanged:: 0.24\n Accepts 2D `array`.\n\nParameters\n----------\narray : 1D or 2D array\n Values to take the weighted percentile of.\n\nsample_weight: 1D or 2D array\n Weights for each value in `array`. Must be same shape as `array` or\n of shape `(array.shape[0],)`.\n\npercentile: int, default=50\n Percentile to compute. Must be value between 0 and 100.\n\nReturns\n-------\npercentile : int if `array` 1D, ndarray if `array` 2D\n Weighted percentile.", + "code": "def _weighted_percentile(array, sample_weight, percentile=50):\n \"\"\"Compute weighted percentile\n\n Computes lower weighted percentile. If `array` is a 2D array, the\n `percentile` is computed along the axis 0.\n\n .. versionchanged:: 0.24\n Accepts 2D `array`.\n\n Parameters\n ----------\n array : 1D or 2D array\n Values to take the weighted percentile of.\n\n sample_weight: 1D or 2D array\n Weights for each value in `array`. Must be same shape as `array` or\n of shape `(array.shape[0],)`.\n\n percentile: int, default=50\n Percentile to compute. Must be value between 0 and 100.\n\n Returns\n -------\n percentile : int if `array` 1D, ndarray if `array` 2D\n Weighted percentile.\n \"\"\"\n n_dim = array.ndim\n if n_dim == 0:\n return array[()]\n if array.ndim == 1:\n array = array.reshape((-1, 1))\n # When sample_weight 1D, repeat for each array.shape[1]\n if (array.shape != sample_weight.shape and\n array.shape[0] == sample_weight.shape[0]):\n sample_weight = np.tile(sample_weight, (array.shape[1], 1)).T\n sorted_idx = np.argsort(array, axis=0)\n sorted_weights = _take_along_axis(sample_weight, sorted_idx, axis=0)\n\n # Find index of median prediction for each sample\n weight_cdf = stable_cumsum(sorted_weights, axis=0)\n adjusted_percentile = percentile / 100 * weight_cdf[-1]\n percentile_idx = np.array([\n np.searchsorted(weight_cdf[:, i], adjusted_percentile[i])\n for i in range(weight_cdf.shape[1])\n ])\n percentile_idx = np.array(percentile_idx)\n # In rare cases, percentile_idx equals to sorted_idx.shape[0]\n max_idx = sorted_idx.shape[0] - 1\n percentile_idx = np.apply_along_axis(lambda x: np.clip(x, 0, max_idx),\n axis=0, arr=percentile_idx)\n\n col_index = np.arange(array.shape[1])\n percentile_in_sorted = sorted_idx[percentile_idx, col_index]\n percentile = array[percentile_in_sorted, col_index]\n return percentile[0] if n_dim == 1 else percentile" + }, + { + "id": "scikit-learn/sklearn.utils.validation/_allclose_dense_sparse", + "name": "_allclose_dense_sparse", + "qname": "sklearn.utils.validation._allclose_dense_sparse", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/_allclose_dense_sparse/x", + "name": "x", + "qname": "sklearn.utils.validation._allclose_dense_sparse.x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}", + "default_value": "", + "description": "First array to compare." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/_allclose_dense_sparse/y", + "name": "y", + "qname": "sklearn.utils.validation._allclose_dense_sparse.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{array-like, sparse matrix}", + "default_value": "", + "description": "Second array to compare." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/_allclose_dense_sparse/rtol", + "name": "rtol", + "qname": "sklearn.utils.validation._allclose_dense_sparse.rtol", + "default_value": "1e-07", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-7", + "description": "Relative tolerance; see numpy.allclose." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/_allclose_dense_sparse/atol", + "name": "atol", + "qname": "sklearn.utils.validation._allclose_dense_sparse.atol", + "default_value": "1e-09", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "float", + "default_value": "1e-9", + "description": "absolute tolerance; see numpy.allclose. Note that the default here is\nmore tolerant than the default for numpy.testing.assert_allclose, where\natol=0." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check allclose for sparse and dense data.\n\nBoth x and y need to be either sparse or dense, they\ncan't be mixed.", + "docstring": "Check allclose for sparse and dense data.\n\nBoth x and y need to be either sparse or dense, they\ncan't be mixed.\n\nParameters\n----------\nx : {array-like, sparse matrix}\n First array to compare.\n\ny : {array-like, sparse matrix}\n Second array to compare.\n\nrtol : float, default=1e-7\n Relative tolerance; see numpy.allclose.\n\natol : float, default=1e-9\n absolute tolerance; see numpy.allclose. Note that the default here is\n more tolerant than the default for numpy.testing.assert_allclose, where\n atol=0.", + "code": "def _allclose_dense_sparse(x, y, rtol=1e-7, atol=1e-9):\n \"\"\"Check allclose for sparse and dense data.\n\n Both x and y need to be either sparse or dense, they\n can't be mixed.\n\n Parameters\n ----------\n x : {array-like, sparse matrix}\n First array to compare.\n\n y : {array-like, sparse matrix}\n Second array to compare.\n\n rtol : float, default=1e-7\n Relative tolerance; see numpy.allclose.\n\n atol : float, default=1e-9\n absolute tolerance; see numpy.allclose. Note that the default here is\n more tolerant than the default for numpy.testing.assert_allclose, where\n atol=0.\n \"\"\"\n if sp.issparse(x) and sp.issparse(y):\n x = x.tocsr()\n y = y.tocsr()\n x.sum_duplicates()\n y.sum_duplicates()\n return (np.array_equal(x.indices, y.indices) and\n np.array_equal(x.indptr, y.indptr) and\n np.allclose(x.data, y.data, rtol=rtol, atol=atol))\n elif not sp.issparse(x) and not sp.issparse(y):\n return np.allclose(x, y, rtol=rtol, atol=atol)\n raise ValueError(\"Can only compare two sparse matrices, not a sparse \"\n \"matrix and an array\")" + }, + { + "id": "scikit-learn/sklearn.utils.validation/_assert_all_finite", + "name": "_assert_all_finite", + "qname": "sklearn.utils.validation._assert_all_finite", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/_assert_all_finite/X", + "name": "X", + "qname": "sklearn.utils.validation._assert_all_finite.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.validation/_assert_all_finite/allow_nan", + "name": "allow_nan", + "qname": "sklearn.utils.validation._assert_all_finite.allow_nan", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.validation/_assert_all_finite/msg_dtype", + "name": "msg_dtype", + "qname": "sklearn.utils.validation._assert_all_finite.msg_dtype", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Like assert_all_finite, but only for ndarray.", + "docstring": "Like assert_all_finite, but only for ndarray.", + "code": "def _assert_all_finite(X, allow_nan=False, msg_dtype=None):\n \"\"\"Like assert_all_finite, but only for ndarray.\"\"\"\n # validation is also imported in extmath\n from .extmath import _safe_accumulator_op\n\n if _get_config()['assume_finite']:\n return\n X = np.asanyarray(X)\n # First try an O(n) time, O(1) space solution for the common case that\n # everything is finite; fall back to O(n) space np.isfinite to prevent\n # false positives from overflow in sum method. The sum is also calculated\n # safely to reduce dtype induced overflows.\n is_float = X.dtype.kind in 'fc'\n if is_float and (np.isfinite(_safe_accumulator_op(np.sum, X))):\n pass\n elif is_float:\n msg_err = \"Input contains {} or a value too large for {!r}.\"\n if (allow_nan and np.isinf(X).any() or\n not allow_nan and not np.isfinite(X).all()):\n type_err = 'infinity' if allow_nan else 'NaN, infinity'\n raise ValueError(\n msg_err.format\n (type_err,\n msg_dtype if msg_dtype is not None else X.dtype)\n )\n # for object dtype data, we only check for NaNs (GH-13254)\n elif X.dtype == np.dtype('object') and not allow_nan:\n if _object_dtype_isnan(X).any():\n raise ValueError(\"Input contains NaN\")" + }, + { + "id": "scikit-learn/sklearn.utils.validation/_check_fit_params", + "name": "_check_fit_params", + "qname": "sklearn.utils.validation._check_fit_params", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/_check_fit_params/X", + "name": "X", + "qname": "sklearn.utils.validation._check_fit_params.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples, n_features)", + "default_value": "", + "description": "Data array." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples, n_features)" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/_check_fit_params/fit_params", + "name": "fit_params", + "qname": "sklearn.utils.validation._check_fit_params.fit_params", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "dict", + "default_value": "", + "description": "Dictionary containing the parameters passed at fit." + }, + "type": { + "kind": "NamedType", + "name": "dict" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/_check_fit_params/indices", + "name": "indices", + "qname": "sklearn.utils.validation._check_fit_params.indices", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_samples,)", + "default_value": "None", + "description": "Indices to be selected if the parameter has the same size as `X`." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check and validate the parameters passed during `fit`.", + "docstring": "Check and validate the parameters passed during `fit`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data array.\n\nfit_params : dict\n Dictionary containing the parameters passed at fit.\n\nindices : array-like of shape (n_samples,), default=None\n Indices to be selected if the parameter has the same size as `X`.\n\nReturns\n-------\nfit_params_validated : dict\n Validated parameters. We ensure that the values support indexing.", + "code": "def _check_fit_params(X, fit_params, indices=None):\n \"\"\"Check and validate the parameters passed during `fit`.\n\n Parameters\n ----------\n X : array-like of shape (n_samples, n_features)\n Data array.\n\n fit_params : dict\n Dictionary containing the parameters passed at fit.\n\n indices : array-like of shape (n_samples,), default=None\n Indices to be selected if the parameter has the same size as `X`.\n\n Returns\n -------\n fit_params_validated : dict\n Validated parameters. We ensure that the values support indexing.\n \"\"\"\n from . import _safe_indexing\n fit_params_validated = {}\n for param_key, param_value in fit_params.items():\n if (not _is_arraylike(param_value) or\n _num_samples(param_value) != _num_samples(X)):\n # Non-indexable pass-through (for now for backward-compatibility).\n # https://github.com/scikit-learn/scikit-learn/issues/15805\n fit_params_validated[param_key] = param_value\n else:\n # Any other fit_params should support indexing\n # (e.g. for cross-validation).\n fit_params_validated[param_key] = _make_indexable(param_value)\n fit_params_validated[param_key] = _safe_indexing(\n fit_params_validated[param_key], indices\n )\n\n return fit_params_validated" + }, + { + "id": "scikit-learn/sklearn.utils.validation/_check_large_sparse", + "name": "_check_large_sparse", + "qname": "sklearn.utils.validation._check_large_sparse", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/_check_large_sparse/X", + "name": "X", + "qname": "sklearn.utils.validation._check_large_sparse.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.validation/_check_large_sparse/accept_large_sparse", + "name": "accept_large_sparse", + "qname": "sklearn.utils.validation._check_large_sparse.accept_large_sparse", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Raise a ValueError if X has 64bit indices and accept_large_sparse=False", + "docstring": "Raise a ValueError if X has 64bit indices and accept_large_sparse=False\n ", + "code": "def _check_large_sparse(X, accept_large_sparse=False):\n \"\"\"Raise a ValueError if X has 64bit indices and accept_large_sparse=False\n \"\"\"\n if not accept_large_sparse:\n supported_indices = [\"int32\"]\n if X.getformat() == \"coo\":\n index_keys = ['col', 'row']\n elif X.getformat() in [\"csr\", \"csc\", \"bsr\"]:\n index_keys = ['indices', 'indptr']\n else:\n return\n for key in index_keys:\n indices_datatype = getattr(X, key).dtype\n if (indices_datatype not in supported_indices):\n raise ValueError(\"Only sparse matrices with 32-bit integer\"\n \" indices are accepted. Got %s indices.\"\n % indices_datatype)" + }, + { + "id": "scikit-learn/sklearn.utils.validation/_check_psd_eigenvalues", + "name": "_check_psd_eigenvalues", + "qname": "sklearn.utils.validation._check_psd_eigenvalues", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/_check_psd_eigenvalues/lambdas", + "name": "lambdas", + "qname": "sklearn.utils.validation._check_psd_eigenvalues.lambdas", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_eigenvalues,)", + "default_value": "", + "description": "Array of eigenvalues to check / fix." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_eigenvalues,)" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/_check_psd_eigenvalues/enable_warnings", + "name": "enable_warnings", + "qname": "sklearn.utils.validation._check_psd_eigenvalues.enable_warnings", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "When this is set to ``True``, a ``PositiveSpectrumWarning`` will be\nraised when there are imaginary parts, negative eigenvalues, or\nextremely small non-zero eigenvalues. Otherwise no warning will be\nraised. In both cases, imaginary parts, negative eigenvalues, and\nextremely small non-zero eigenvalues will be set to zero." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Check the eigenvalues of a positive semidefinite (PSD) matrix.\n\nChecks the provided array of PSD matrix eigenvalues for numerical or\nconditioning issues and returns a fixed validated version. This method\nshould typically be used if the PSD matrix is user-provided (e.g. a\nGram matrix) or computed using a user-provided dissimilarity metric\n(e.g. kernel function), or if the decomposition process uses approximation\nmethods (randomized SVD, etc.).\n\nIt checks for three things:\n\n- that there are no significant imaginary parts in eigenvalues (more than\n 1e-5 times the maximum real part). If this check fails, it raises a\n ``ValueError``. Otherwise all non-significant imaginary parts that may\n remain are set to zero. This operation is traced with a\n ``PositiveSpectrumWarning`` when ``enable_warnings=True``.\n\n- that eigenvalues are not all negative. If this check fails, it raises a\n ``ValueError``\n\n- that there are no significant negative eigenvalues with absolute value\n more than 1e-10 (1e-6) and more than 1e-5 (5e-3) times the largest\n positive eigenvalue in double (simple) precision. If this check fails,\n it raises a ``ValueError``. Otherwise all negative eigenvalues that may\n remain are set to zero. This operation is traced with a\n ``PositiveSpectrumWarning`` when ``enable_warnings=True``.\n\nFinally, all the positive eigenvalues that are too small (with a value\nsmaller than the maximum eigenvalue multiplied by 1e-12 (2e-7)) are set to\nzero. This operation is traced with a ``PositiveSpectrumWarning`` when\n``enable_warnings=True``.", + "docstring": "Check the eigenvalues of a positive semidefinite (PSD) matrix.\n\nChecks the provided array of PSD matrix eigenvalues for numerical or\nconditioning issues and returns a fixed validated version. This method\nshould typically be used if the PSD matrix is user-provided (e.g. a\nGram matrix) or computed using a user-provided dissimilarity metric\n(e.g. kernel function), or if the decomposition process uses approximation\nmethods (randomized SVD, etc.).\n\nIt checks for three things:\n\n- that there are no significant imaginary parts in eigenvalues (more than\n 1e-5 times the maximum real part). If this check fails, it raises a\n ``ValueError``. Otherwise all non-significant imaginary parts that may\n remain are set to zero. This operation is traced with a\n ``PositiveSpectrumWarning`` when ``enable_warnings=True``.\n\n- that eigenvalues are not all negative. If this check fails, it raises a\n ``ValueError``\n\n- that there are no significant negative eigenvalues with absolute value\n more than 1e-10 (1e-6) and more than 1e-5 (5e-3) times the largest\n positive eigenvalue in double (simple) precision. If this check fails,\n it raises a ``ValueError``. Otherwise all negative eigenvalues that may\n remain are set to zero. This operation is traced with a\n ``PositiveSpectrumWarning`` when ``enable_warnings=True``.\n\nFinally, all the positive eigenvalues that are too small (with a value\nsmaller than the maximum eigenvalue multiplied by 1e-12 (2e-7)) are set to\nzero. This operation is traced with a ``PositiveSpectrumWarning`` when\n``enable_warnings=True``.\n\nParameters\n----------\nlambdas : array-like of shape (n_eigenvalues,)\n Array of eigenvalues to check / fix.\n\nenable_warnings : bool, default=False\n When this is set to ``True``, a ``PositiveSpectrumWarning`` will be\n raised when there are imaginary parts, negative eigenvalues, or\n extremely small non-zero eigenvalues. Otherwise no warning will be\n raised. In both cases, imaginary parts, negative eigenvalues, and\n extremely small non-zero eigenvalues will be set to zero.\n\nReturns\n-------\nlambdas_fixed : ndarray of shape (n_eigenvalues,)\n A fixed validated copy of the array of eigenvalues.\n\nExamples\n--------\n>>> _check_psd_eigenvalues([1, 2]) # nominal case\narray([1, 2])\n>>> _check_psd_eigenvalues([5, 5j]) # significant imag part\nTraceback (most recent call last):\n ...\nValueError: There are significant imaginary parts in eigenvalues (1\n of the maximum real part). Either the matrix is not PSD, or there was\n an issue while computing the eigendecomposition of the matrix.\n>>> _check_psd_eigenvalues([5, 5e-5j]) # insignificant imag part\narray([5., 0.])\n>>> _check_psd_eigenvalues([-5, -1]) # all negative\nTraceback (most recent call last):\n ...\nValueError: All eigenvalues are negative (maximum is -1). Either the\n matrix is not PSD, or there was an issue while computing the\n eigendecomposition of the matrix.\n>>> _check_psd_eigenvalues([5, -1]) # significant negative\nTraceback (most recent call last):\n ...\nValueError: There are significant negative eigenvalues (0.2 of the\n maximum positive). Either the matrix is not PSD, or there was an issue\n while computing the eigendecomposition of the matrix.\n>>> _check_psd_eigenvalues([5, -5e-5]) # insignificant negative\narray([5., 0.])\n>>> _check_psd_eigenvalues([5, 4e-12]) # bad conditioning (too small)\narray([5., 0.])", + "code": "def _check_psd_eigenvalues(lambdas, enable_warnings=False):\n \"\"\"Check the eigenvalues of a positive semidefinite (PSD) matrix.\n\n Checks the provided array of PSD matrix eigenvalues for numerical or\n conditioning issues and returns a fixed validated version. This method\n should typically be used if the PSD matrix is user-provided (e.g. a\n Gram matrix) or computed using a user-provided dissimilarity metric\n (e.g. kernel function), or if the decomposition process uses approximation\n methods (randomized SVD, etc.).\n\n It checks for three things:\n\n - that there are no significant imaginary parts in eigenvalues (more than\n 1e-5 times the maximum real part). If this check fails, it raises a\n ``ValueError``. Otherwise all non-significant imaginary parts that may\n remain are set to zero. This operation is traced with a\n ``PositiveSpectrumWarning`` when ``enable_warnings=True``.\n\n - that eigenvalues are not all negative. If this check fails, it raises a\n ``ValueError``\n\n - that there are no significant negative eigenvalues with absolute value\n more than 1e-10 (1e-6) and more than 1e-5 (5e-3) times the largest\n positive eigenvalue in double (simple) precision. If this check fails,\n it raises a ``ValueError``. Otherwise all negative eigenvalues that may\n remain are set to zero. This operation is traced with a\n ``PositiveSpectrumWarning`` when ``enable_warnings=True``.\n\n Finally, all the positive eigenvalues that are too small (with a value\n smaller than the maximum eigenvalue multiplied by 1e-12 (2e-7)) are set to\n zero. This operation is traced with a ``PositiveSpectrumWarning`` when\n ``enable_warnings=True``.\n\n Parameters\n ----------\n lambdas : array-like of shape (n_eigenvalues,)\n Array of eigenvalues to check / fix.\n\n enable_warnings : bool, default=False\n When this is set to ``True``, a ``PositiveSpectrumWarning`` will be\n raised when there are imaginary parts, negative eigenvalues, or\n extremely small non-zero eigenvalues. Otherwise no warning will be\n raised. In both cases, imaginary parts, negative eigenvalues, and\n extremely small non-zero eigenvalues will be set to zero.\n\n Returns\n -------\n lambdas_fixed : ndarray of shape (n_eigenvalues,)\n A fixed validated copy of the array of eigenvalues.\n\n Examples\n --------\n >>> _check_psd_eigenvalues([1, 2]) # nominal case\n array([1, 2])\n >>> _check_psd_eigenvalues([5, 5j]) # significant imag part\n Traceback (most recent call last):\n ...\n ValueError: There are significant imaginary parts in eigenvalues (1\n of the maximum real part). Either the matrix is not PSD, or there was\n an issue while computing the eigendecomposition of the matrix.\n >>> _check_psd_eigenvalues([5, 5e-5j]) # insignificant imag part\n array([5., 0.])\n >>> _check_psd_eigenvalues([-5, -1]) # all negative\n Traceback (most recent call last):\n ...\n ValueError: All eigenvalues are negative (maximum is -1). Either the\n matrix is not PSD, or there was an issue while computing the\n eigendecomposition of the matrix.\n >>> _check_psd_eigenvalues([5, -1]) # significant negative\n Traceback (most recent call last):\n ...\n ValueError: There are significant negative eigenvalues (0.2 of the\n maximum positive). Either the matrix is not PSD, or there was an issue\n while computing the eigendecomposition of the matrix.\n >>> _check_psd_eigenvalues([5, -5e-5]) # insignificant negative\n array([5., 0.])\n >>> _check_psd_eigenvalues([5, 4e-12]) # bad conditioning (too small)\n array([5., 0.])\n\n \"\"\"\n\n lambdas = np.array(lambdas)\n is_double_precision = lambdas.dtype == np.float64\n\n # note: the minimum value available is\n # - single-precision: np.finfo('float32').eps = 1.2e-07\n # - double-precision: np.finfo('float64').eps = 2.2e-16\n\n # the various thresholds used for validation\n # we may wish to change the value according to precision.\n significant_imag_ratio = 1e-5\n significant_neg_ratio = 1e-5 if is_double_precision else 5e-3\n significant_neg_value = 1e-10 if is_double_precision else 1e-6\n small_pos_ratio = 1e-12 if is_double_precision else 2e-7\n\n # Check that there are no significant imaginary parts\n if not np.isreal(lambdas).all():\n max_imag_abs = np.abs(np.imag(lambdas)).max()\n max_real_abs = np.abs(np.real(lambdas)).max()\n if max_imag_abs > significant_imag_ratio * max_real_abs:\n raise ValueError(\n \"There are significant imaginary parts in eigenvalues (%g \"\n \"of the maximum real part). Either the matrix is not PSD, or \"\n \"there was an issue while computing the eigendecomposition \"\n \"of the matrix.\"\n % (max_imag_abs / max_real_abs))\n\n # warn about imaginary parts being removed\n if enable_warnings:\n warnings.warn(\"There are imaginary parts in eigenvalues (%g \"\n \"of the maximum real part). Either the matrix is not\"\n \" PSD, or there was an issue while computing the \"\n \"eigendecomposition of the matrix. Only the real \"\n \"parts will be kept.\"\n % (max_imag_abs / max_real_abs),\n PositiveSpectrumWarning)\n\n # Remove all imaginary parts (even if zero)\n lambdas = np.real(lambdas)\n\n # Check that there are no significant negative eigenvalues\n max_eig = lambdas.max()\n if max_eig < 0:\n raise ValueError(\"All eigenvalues are negative (maximum is %g). \"\n \"Either the matrix is not PSD, or there was an \"\n \"issue while computing the eigendecomposition of \"\n \"the matrix.\" % max_eig)\n\n else:\n min_eig = lambdas.min()\n if (min_eig < -significant_neg_ratio * max_eig\n and min_eig < -significant_neg_value):\n raise ValueError(\"There are significant negative eigenvalues (%g\"\n \" of the maximum positive). Either the matrix is \"\n \"not PSD, or there was an issue while computing \"\n \"the eigendecomposition of the matrix.\"\n % (-min_eig / max_eig))\n elif min_eig < 0:\n # Remove all negative values and warn about it\n if enable_warnings:\n warnings.warn(\"There are negative eigenvalues (%g of the \"\n \"maximum positive). Either the matrix is not \"\n \"PSD, or there was an issue while computing the\"\n \" eigendecomposition of the matrix. Negative \"\n \"eigenvalues will be replaced with 0.\"\n % (-min_eig / max_eig),\n PositiveSpectrumWarning)\n lambdas[lambdas < 0] = 0\n\n # Check for conditioning (small positive non-zeros)\n too_small_lambdas = (0 < lambdas) & (lambdas < small_pos_ratio * max_eig)\n if too_small_lambdas.any():\n if enable_warnings:\n warnings.warn(\"Badly conditioned PSD matrix spectrum: the largest \"\n \"eigenvalue is more than %g times the smallest. \"\n \"Small eigenvalues will be replaced with 0.\"\n \"\" % (1 / small_pos_ratio),\n PositiveSpectrumWarning)\n lambdas[too_small_lambdas] = 0\n\n return lambdas" + }, + { + "id": "scikit-learn/sklearn.utils.validation/_check_sample_weight", + "name": "_check_sample_weight", + "qname": "sklearn.utils.validation._check_sample_weight", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/_check_sample_weight/sample_weight", + "name": "sample_weight", + "qname": "sklearn.utils.validation._check_sample_weight.sample_weight", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{ndarray, Number or None}, shape (n_samples,)", + "default_value": "", + "description": "Input sample weights." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "shape (n_samples,)" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/_check_sample_weight/X", + "name": "X", + "qname": "sklearn.utils.validation._check_sample_weight.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{ndarray, list, sparse matrix}", + "default_value": "", + "description": "Input data." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/_check_sample_weight/dtype", + "name": "dtype", + "qname": "sklearn.utils.validation._check_sample_weight.dtype", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils.validation/_check_sample_weight/copy", + "name": "copy", + "qname": "sklearn.utils.validation._check_sample_weight.copy", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True, a copy of sample_weight will be created." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Validate sample weights.\n\nNote that passing sample_weight=None will output an array of ones.\nTherefore, in some cases, you may want to protect the call with:\nif sample_weight is not None:\n sample_weight = _check_sample_weight(...)", + "docstring": "Validate sample weights.\n\nNote that passing sample_weight=None will output an array of ones.\nTherefore, in some cases, you may want to protect the call with:\nif sample_weight is not None:\n sample_weight = _check_sample_weight(...)\n\nParameters\n----------\nsample_weight : {ndarray, Number or None}, shape (n_samples,)\n Input sample weights.\n\nX : {ndarray, list, sparse matrix}\n Input data.\n\ndtype: dtype, default=None\n dtype of the validated `sample_weight`.\n If None, and the input `sample_weight` is an array, the dtype of the\n input is preserved; otherwise an array with the default numpy dtype\n is be allocated. If `dtype` is not one of `float32`, `float64`,\n `None`, the output will be of dtype `float64`.\n\ncopy : bool, default=False\n If True, a copy of sample_weight will be created.\n\nReturns\n-------\nsample_weight : ndarray of shape (n_samples,)\n Validated sample weight. It is guaranteed to be \"C\" contiguous.", + "code": "def _check_sample_weight(sample_weight, X, dtype=None, copy=False):\n \"\"\"Validate sample weights.\n\n Note that passing sample_weight=None will output an array of ones.\n Therefore, in some cases, you may want to protect the call with:\n if sample_weight is not None:\n sample_weight = _check_sample_weight(...)\n\n Parameters\n ----------\n sample_weight : {ndarray, Number or None}, shape (n_samples,)\n Input sample weights.\n\n X : {ndarray, list, sparse matrix}\n Input data.\n\n dtype: dtype, default=None\n dtype of the validated `sample_weight`.\n If None, and the input `sample_weight` is an array, the dtype of the\n input is preserved; otherwise an array with the default numpy dtype\n is be allocated. If `dtype` is not one of `float32`, `float64`,\n `None`, the output will be of dtype `float64`.\n\n copy : bool, default=False\n If True, a copy of sample_weight will be created.\n\n Returns\n -------\n sample_weight : ndarray of shape (n_samples,)\n Validated sample weight. It is guaranteed to be \"C\" contiguous.\n \"\"\"\n n_samples = _num_samples(X)\n\n if dtype is not None and dtype not in [np.float32, np.float64]:\n dtype = np.float64\n\n if sample_weight is None:\n sample_weight = np.ones(n_samples, dtype=dtype)\n elif isinstance(sample_weight, numbers.Number):\n sample_weight = np.full(n_samples, sample_weight, dtype=dtype)\n else:\n if dtype is None:\n dtype = [np.float64, np.float32]\n sample_weight = check_array(\n sample_weight, accept_sparse=False, ensure_2d=False, dtype=dtype,\n order=\"C\", copy=copy\n )\n if sample_weight.ndim != 1:\n raise ValueError(\"Sample weights must be 1D array or scalar\")\n\n if sample_weight.shape != (n_samples,):\n raise ValueError(\"sample_weight.shape == {}, expected {}!\"\n .format(sample_weight.shape, (n_samples,)))\n return sample_weight" + }, + { + "id": "scikit-learn/sklearn.utils.validation/_deprecate_positional_args", + "name": "_deprecate_positional_args", + "qname": "sklearn.utils.validation._deprecate_positional_args", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/_deprecate_positional_args/func", + "name": "func", + "qname": "sklearn.utils.validation._deprecate_positional_args.func", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "None", + "description": "Function to check arguments on." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/_deprecate_positional_args/version", + "name": "version", + "qname": "sklearn.utils.validation._deprecate_positional_args.version", + "default_value": "'1.0 (renaming of 0.25)'", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "callable", + "default_value": "\"1.0 (renaming of 0.25)\"", + "description": "The version when positional arguments will result in error." + }, + "type": { + "kind": "NamedType", + "name": "callable" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Decorator for methods that issues warnings for positional arguments.\n\nUsing the keyword-only argument syntax in pep 3102, arguments after the\n* will issue a warning when passed as a positional argument.", + "docstring": "Decorator for methods that issues warnings for positional arguments.\n\nUsing the keyword-only argument syntax in pep 3102, arguments after the\n* will issue a warning when passed as a positional argument.\n\nParameters\n----------\nfunc : callable, default=None\n Function to check arguments on.\nversion : callable, default=\"1.0 (renaming of 0.25)\"\n The version when positional arguments will result in error.", + "code": "def _deprecate_positional_args(func=None, *, version=\"1.0 (renaming of 0.25)\"):\n \"\"\"Decorator for methods that issues warnings for positional arguments.\n\n Using the keyword-only argument syntax in pep 3102, arguments after the\n * will issue a warning when passed as a positional argument.\n\n Parameters\n ----------\n func : callable, default=None\n Function to check arguments on.\n version : callable, default=\"1.0 (renaming of 0.25)\"\n The version when positional arguments will result in error.\n \"\"\"\n def _inner_deprecate_positional_args(f):\n sig = signature(f)\n kwonly_args = []\n all_args = []\n\n for name, param in sig.parameters.items():\n if param.kind == Parameter.POSITIONAL_OR_KEYWORD:\n all_args.append(name)\n elif param.kind == Parameter.KEYWORD_ONLY:\n kwonly_args.append(name)\n\n @wraps(f)\n def inner_f(*args, **kwargs):\n extra_args = len(args) - len(all_args)\n if extra_args <= 0:\n return f(*args, **kwargs)\n\n # extra_args > 0\n args_msg = ['{}={}'.format(name, arg)\n for name, arg in zip(kwonly_args[:extra_args],\n args[-extra_args:])]\n args_msg = \", \".join(args_msg)\n warnings.warn(f\"Pass {args_msg} as keyword args. From version \"\n f\"{version} passing these as positional arguments \"\n \"will result in an error\", FutureWarning)\n kwargs.update(zip(sig.parameters, args))\n return f(**kwargs)\n return inner_f\n\n if func is not None:\n return _inner_deprecate_positional_args(func)\n\n return _inner_deprecate_positional_args" + }, + { + "id": "scikit-learn/sklearn.utils.validation/_ensure_no_complex_data", + "name": "_ensure_no_complex_data", + "qname": "sklearn.utils.validation._ensure_no_complex_data", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/_ensure_no_complex_data/array", + "name": "array", + "qname": "sklearn.utils.validation._ensure_no_complex_data.array", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "", + "docstring": "", + "code": "def _ensure_no_complex_data(array):\n if hasattr(array, 'dtype') and array.dtype is not None \\\n and hasattr(array.dtype, 'kind') and array.dtype.kind == \"c\":\n raise ValueError(\"Complex data not supported\\n\"\n \"{}\\n\".format(array))" + }, + { + "id": "scikit-learn/sklearn.utils.validation/_ensure_sparse_format", + "name": "_ensure_sparse_format", + "qname": "sklearn.utils.validation._ensure_sparse_format", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/_ensure_sparse_format/spmatrix", + "name": "spmatrix", + "qname": "sklearn.utils.validation._ensure_sparse_format.spmatrix", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "sparse matrix", + "default_value": "", + "description": "Input to validate and convert." + }, + "type": { + "kind": "NamedType", + "name": "sparse matrix" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/_ensure_sparse_format/accept_sparse", + "name": "accept_sparse", + "qname": "sklearn.utils.validation._ensure_sparse_format.accept_sparse", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str, bool or list/tuple of str", + "default_value": "", + "description": "String[s] representing allowed sparse matrix formats ('csc',\n'csr', 'coo', 'dok', 'bsr', 'lil', 'dia'). If the input is sparse but\nnot in the allowed format, it will be converted to the first listed\nformat. True allows the input to be any format. False means\nthat a sparse matrix input will raise an error." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "list/tuple of str" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/_ensure_sparse_format/dtype", + "name": "dtype", + "qname": "sklearn.utils.validation._ensure_sparse_format.dtype", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str, type or None", + "default_value": "", + "description": "Data type of result. If None, the dtype of the input is preserved." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "type" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/_ensure_sparse_format/copy", + "name": "copy", + "qname": "sklearn.utils.validation._ensure_sparse_format.copy", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "", + "description": "Whether a forced copy will be triggered. If copy=False, a copy might\nbe triggered by a conversion." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/_ensure_sparse_format/force_all_finite", + "name": "force_all_finite", + "qname": "sklearn.utils.validation._ensure_sparse_format.force_all_finite", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool or 'allow-nan'", + "default_value": "", + "description": "Whether to raise an error on np.inf, np.nan, pd.NA in X. The\npossibilities are:\n\n- True: Force all values of X to be finite.\n- False: accepts np.inf, np.nan, pd.NA in X.\n- 'allow-nan': accepts only np.nan and pd.NA values in X. Values cannot\n be infinite.\n\n.. versionadded:: 0.20\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n.. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "'allow-nan'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/_ensure_sparse_format/accept_large_sparse", + "name": "accept_large_sparse", + "qname": "sklearn.utils.validation._ensure_sparse_format.accept_large_sparse", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Convert a sparse matrix to a given format.\n\nChecks the sparse format of spmatrix and converts if necessary.", + "docstring": "Convert a sparse matrix to a given format.\n\nChecks the sparse format of spmatrix and converts if necessary.\n\nParameters\n----------\nspmatrix : sparse matrix\n Input to validate and convert.\n\naccept_sparse : str, bool or list/tuple of str\n String[s] representing allowed sparse matrix formats ('csc',\n 'csr', 'coo', 'dok', 'bsr', 'lil', 'dia'). If the input is sparse but\n not in the allowed format, it will be converted to the first listed\n format. True allows the input to be any format. False means\n that a sparse matrix input will raise an error.\n\ndtype : str, type or None\n Data type of result. If None, the dtype of the input is preserved.\n\ncopy : bool\n Whether a forced copy will be triggered. If copy=False, a copy might\n be triggered by a conversion.\n\nforce_all_finite : bool or 'allow-nan'\n Whether to raise an error on np.inf, np.nan, pd.NA in X. The\n possibilities are:\n\n - True: Force all values of X to be finite.\n - False: accepts np.inf, np.nan, pd.NA in X.\n - 'allow-nan': accepts only np.nan and pd.NA values in X. Values cannot\n be infinite.\n\n .. versionadded:: 0.20\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n .. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`\n\nReturns\n-------\nspmatrix_converted : sparse matrix.\n Matrix that is ensured to have an allowed type.", + "code": "def _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy,\n force_all_finite, accept_large_sparse):\n \"\"\"Convert a sparse matrix to a given format.\n\n Checks the sparse format of spmatrix and converts if necessary.\n\n Parameters\n ----------\n spmatrix : sparse matrix\n Input to validate and convert.\n\n accept_sparse : str, bool or list/tuple of str\n String[s] representing allowed sparse matrix formats ('csc',\n 'csr', 'coo', 'dok', 'bsr', 'lil', 'dia'). If the input is sparse but\n not in the allowed format, it will be converted to the first listed\n format. True allows the input to be any format. False means\n that a sparse matrix input will raise an error.\n\n dtype : str, type or None\n Data type of result. If None, the dtype of the input is preserved.\n\n copy : bool\n Whether a forced copy will be triggered. If copy=False, a copy might\n be triggered by a conversion.\n\n force_all_finite : bool or 'allow-nan'\n Whether to raise an error on np.inf, np.nan, pd.NA in X. The\n possibilities are:\n\n - True: Force all values of X to be finite.\n - False: accepts np.inf, np.nan, pd.NA in X.\n - 'allow-nan': accepts only np.nan and pd.NA values in X. Values cannot\n be infinite.\n\n .. versionadded:: 0.20\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n .. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`\n\n Returns\n -------\n spmatrix_converted : sparse matrix.\n Matrix that is ensured to have an allowed type.\n \"\"\"\n if dtype is None:\n dtype = spmatrix.dtype\n\n changed_format = False\n\n if isinstance(accept_sparse, str):\n accept_sparse = [accept_sparse]\n\n # Indices dtype validation\n _check_large_sparse(spmatrix, accept_large_sparse)\n\n if accept_sparse is False:\n raise TypeError('A sparse matrix was passed, but dense '\n 'data is required. Use X.toarray() to '\n 'convert to a dense numpy array.')\n elif isinstance(accept_sparse, (list, tuple)):\n if len(accept_sparse) == 0:\n raise ValueError(\"When providing 'accept_sparse' \"\n \"as a tuple or list, it must contain at \"\n \"least one string value.\")\n # ensure correct sparse format\n if spmatrix.format not in accept_sparse:\n # create new with correct sparse\n spmatrix = spmatrix.asformat(accept_sparse[0])\n changed_format = True\n elif accept_sparse is not True:\n # any other type\n raise ValueError(\"Parameter 'accept_sparse' should be a string, \"\n \"boolean or list of strings. You provided \"\n \"'accept_sparse={}'.\".format(accept_sparse))\n\n if dtype != spmatrix.dtype:\n # convert dtype\n spmatrix = spmatrix.astype(dtype)\n elif copy and not changed_format:\n # force copy\n spmatrix = spmatrix.copy()\n\n if force_all_finite:\n if not hasattr(spmatrix, \"data\"):\n warnings.warn(\"Can't check %s sparse matrix for nan or inf.\"\n % spmatrix.format, stacklevel=2)\n else:\n _assert_all_finite(spmatrix.data,\n allow_nan=force_all_finite == 'allow-nan')\n\n return spmatrix" + }, + { + "id": "scikit-learn/sklearn.utils.validation/_is_arraylike", + "name": "_is_arraylike", + "qname": "sklearn.utils.validation._is_arraylike", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/_is_arraylike/x", + "name": "x", + "qname": "sklearn.utils.validation._is_arraylike.x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Returns whether the input is array-like.", + "docstring": "Returns whether the input is array-like.", + "code": "def _is_arraylike(x):\n \"\"\"Returns whether the input is array-like.\"\"\"\n return (hasattr(x, '__len__') or\n hasattr(x, 'shape') or\n hasattr(x, '__array__'))" + }, + { + "id": "scikit-learn/sklearn.utils.validation/_make_indexable", + "name": "_make_indexable", + "qname": "sklearn.utils.validation._make_indexable", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/_make_indexable/iterable", + "name": "iterable", + "qname": "sklearn.utils.validation._make_indexable.iterable", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "{list, dataframe, ndarray, sparse matrix} or None", + "default_value": "", + "description": "Object to be converted to an indexable iterable." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Ensure iterable supports indexing or convert to an indexable variant.\n\nConvert sparse matrices to csr and other non-indexable iterable to arrays.\nLet `None` and indexable objects (e.g. pandas dataframes) pass unchanged.", + "docstring": "Ensure iterable supports indexing or convert to an indexable variant.\n\nConvert sparse matrices to csr and other non-indexable iterable to arrays.\nLet `None` and indexable objects (e.g. pandas dataframes) pass unchanged.\n\nParameters\n----------\niterable : {list, dataframe, ndarray, sparse matrix} or None\n Object to be converted to an indexable iterable.", + "code": "def _make_indexable(iterable):\n \"\"\"Ensure iterable supports indexing or convert to an indexable variant.\n\n Convert sparse matrices to csr and other non-indexable iterable to arrays.\n Let `None` and indexable objects (e.g. pandas dataframes) pass unchanged.\n\n Parameters\n ----------\n iterable : {list, dataframe, ndarray, sparse matrix} or None\n Object to be converted to an indexable iterable.\n \"\"\"\n if sp.issparse(iterable):\n return iterable.tocsr()\n elif hasattr(iterable, \"__getitem__\") or hasattr(iterable, \"iloc\"):\n return iterable\n elif iterable is None:\n return iterable\n return np.array(iterable)" + }, + { + "id": "scikit-learn/sklearn.utils.validation/_num_features", + "name": "_num_features", + "qname": "sklearn.utils.validation._num_features", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/_num_features/X", + "name": "X", + "qname": "sklearn.utils.validation._num_features.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return the number of features in an array-like X.\n\nThis helper function tries hard to avoid to materialize an array version\nof X unless necessary. For instance, if X is a list of lists,\nthis function will return the length of the first element, assuming\nthat subsequent elements are all lists of the same length without\nchecking.\nParameters\n----------\nX : array-like\n array-like to get the number of features.", + "docstring": "Return the number of features in an array-like X.\n\nThis helper function tries hard to avoid to materialize an array version\nof X unless necessary. For instance, if X is a list of lists,\nthis function will return the length of the first element, assuming\nthat subsequent elements are all lists of the same length without\nchecking.\nParameters\n----------\nX : array-like\n array-like to get the number of features.\n\nReturns\n-------\nfeatures : int\n Number of features", + "code": "def _num_features(X):\n \"\"\"Return the number of features in an array-like X.\n\n This helper function tries hard to avoid to materialize an array version\n of X unless necessary. For instance, if X is a list of lists,\n this function will return the length of the first element, assuming\n that subsequent elements are all lists of the same length without\n checking.\n Parameters\n ----------\n X : array-like\n array-like to get the number of features.\n\n Returns\n -------\n features : int\n Number of features\n \"\"\"\n type_ = type(X)\n if type_.__module__ == \"builtins\":\n type_name = type_.__qualname__\n else:\n type_name = f\"{type_.__module__}.{type_.__qualname__}\"\n message = (\n \"Unable to find the number of features from X of type \"\n f\"{type_name}\"\n )\n if not hasattr(X, '__len__') and not hasattr(X, 'shape'):\n if not hasattr(X, '__array__'):\n raise TypeError(message)\n # Only convert X to a numpy array if there is no cheaper, heuristic\n # option.\n X = np.asarray(X)\n\n if hasattr(X, 'shape'):\n if not hasattr(X.shape, '__len__') or len(X.shape) <= 1:\n message += f\" with shape {X.shape}\"\n raise TypeError(message)\n return X.shape[1]\n\n first_sample = X[0]\n\n # Do not consider an array-like of strings or dicts to be a 2D array\n if isinstance(first_sample, (str, bytes, dict)):\n message += (f\" where the samples are of type \"\n f\"{type(first_sample).__qualname__}\")\n raise TypeError(message)\n\n try:\n # If X is a list of lists, for instance, we assume that all nested\n # lists have the same length without checking or converting to\n # a numpy array to keep this function call as cheap as possible.\n return len(first_sample)\n except Exception as err:\n raise TypeError(message) from err" + }, + { + "id": "scikit-learn/sklearn.utils.validation/_num_samples", + "name": "_num_samples", + "qname": "sklearn.utils.validation._num_samples", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/_num_samples/x", + "name": "x", + "qname": "sklearn.utils.validation._num_samples.x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return number of samples in array-like x.", + "docstring": "Return number of samples in array-like x.", + "code": "def _num_samples(x):\n \"\"\"Return number of samples in array-like x.\"\"\"\n message = 'Expected sequence or array-like, got %s' % type(x)\n if hasattr(x, 'fit') and callable(x.fit):\n # Don't get num_samples from an ensembles length!\n raise TypeError(message)\n\n if not hasattr(x, '__len__') and not hasattr(x, 'shape'):\n if hasattr(x, '__array__'):\n x = np.asarray(x)\n else:\n raise TypeError(message)\n\n if hasattr(x, 'shape') and x.shape is not None:\n if len(x.shape) == 0:\n raise TypeError(\"Singleton array %r cannot be considered\"\n \" a valid collection.\" % x)\n # Check that shape is returning an integer or default to len\n # Dask dataframes may not return numeric shape[0] value\n if isinstance(x.shape[0], numbers.Integral):\n return x.shape[0]\n\n try:\n return len(x)\n except TypeError as type_error:\n raise TypeError(message) from type_error" + }, + { + "id": "scikit-learn/sklearn.utils.validation/as_float_array", + "name": "as_float_array", + "qname": "sklearn.utils.validation.as_float_array", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/as_float_array/X", + "name": "X", + "qname": "sklearn.utils.validation.as_float_array.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix}", + "default_value": "", + "description": "" + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/as_float_array/copy", + "name": "copy", + "qname": "sklearn.utils.validation.as_float_array.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True, a copy of X will be created. If False, a copy may still be\nreturned if X's dtype is not a floating point type." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/as_float_array/force_all_finite", + "name": "force_all_finite", + "qname": "sklearn.utils.validation.as_float_array.force_all_finite", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool or 'allow-nan'", + "default_value": "True", + "description": "Whether to raise an error on np.inf, np.nan, pd.NA in X. The\npossibilities are:\n\n- True: Force all values of X to be finite.\n- False: accepts np.inf, np.nan, pd.NA in X.\n- 'allow-nan': accepts only np.nan and pd.NA values in X. Values cannot\n be infinite.\n\n.. versionadded:: 0.20\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n.. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "'allow-nan'" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Converts an array-like to an array of floats.\n\nThe new dtype will be np.float32 or np.float64, depending on the original\ntype. The function can create a copy or modify the argument depending\non the argument copy.", + "docstring": "Converts an array-like to an array of floats.\n\nThe new dtype will be np.float32 or np.float64, depending on the original\ntype. The function can create a copy or modify the argument depending\non the argument copy.\n\nParameters\n----------\nX : {array-like, sparse matrix}\n\ncopy : bool, default=True\n If True, a copy of X will be created. If False, a copy may still be\n returned if X's dtype is not a floating point type.\n\nforce_all_finite : bool or 'allow-nan', default=True\n Whether to raise an error on np.inf, np.nan, pd.NA in X. The\n possibilities are:\n\n - True: Force all values of X to be finite.\n - False: accepts np.inf, np.nan, pd.NA in X.\n - 'allow-nan': accepts only np.nan and pd.NA values in X. Values cannot\n be infinite.\n\n .. versionadded:: 0.20\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n .. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`\n\nReturns\n-------\nXT : {ndarray, sparse matrix}\n An array of type float.", + "code": "@_deprecate_positional_args\ndef as_float_array(X, *, copy=True, force_all_finite=True):\n \"\"\"Converts an array-like to an array of floats.\n\n The new dtype will be np.float32 or np.float64, depending on the original\n type. The function can create a copy or modify the argument depending\n on the argument copy.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}\n\n copy : bool, default=True\n If True, a copy of X will be created. If False, a copy may still be\n returned if X's dtype is not a floating point type.\n\n force_all_finite : bool or 'allow-nan', default=True\n Whether to raise an error on np.inf, np.nan, pd.NA in X. The\n possibilities are:\n\n - True: Force all values of X to be finite.\n - False: accepts np.inf, np.nan, pd.NA in X.\n - 'allow-nan': accepts only np.nan and pd.NA values in X. Values cannot\n be infinite.\n\n .. versionadded:: 0.20\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n .. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`\n\n Returns\n -------\n XT : {ndarray, sparse matrix}\n An array of type float.\n \"\"\"\n if isinstance(X, np.matrix) or (not isinstance(X, np.ndarray)\n and not sp.issparse(X)):\n return check_array(X, accept_sparse=['csr', 'csc', 'coo'],\n dtype=np.float64, copy=copy,\n force_all_finite=force_all_finite, ensure_2d=False)\n elif sp.issparse(X) and X.dtype in [np.float32, np.float64]:\n return X.copy() if copy else X\n elif X.dtype in [np.float32, np.float64]: # is numpy array\n return X.copy('F' if X.flags['F_CONTIGUOUS'] else 'C') if copy else X\n else:\n if X.dtype.kind in 'uib' and X.dtype.itemsize <= 4:\n return_dtype = np.float32\n else:\n return_dtype = np.float64\n return X.astype(return_dtype)" + }, + { + "id": "scikit-learn/sklearn.utils.validation/assert_all_finite", + "name": "assert_all_finite", + "qname": "sklearn.utils.validation.assert_all_finite", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/assert_all_finite/X", + "name": "X", + "qname": "sklearn.utils.validation.assert_all_finite.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{ndarray, sparse matrix}", + "default_value": "", + "description": "" + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/assert_all_finite/allow_nan", + "name": "allow_nan", + "qname": "sklearn.utils.validation.assert_all_finite.allow_nan", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Throw a ValueError if X contains NaN or infinity.", + "docstring": "Throw a ValueError if X contains NaN or infinity.\n\nParameters\n----------\nX : {ndarray, sparse matrix}\n\nallow_nan : bool, default=False", + "code": "@_deprecate_positional_args\ndef assert_all_finite(X, *, allow_nan=False):\n \"\"\"Throw a ValueError if X contains NaN or infinity.\n\n Parameters\n ----------\n X : {ndarray, sparse matrix}\n\n allow_nan : bool, default=False\n \"\"\"\n _assert_all_finite(X.data if sp.issparse(X) else X, allow_nan)" + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_X_y", + "name": "check_X_y", + "qname": "sklearn.utils.validation.check_X_y", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/check_X_y/X", + "name": "X", + "qname": "sklearn.utils.validation.check_X_y.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{ndarray, list, sparse matrix}", + "default_value": "", + "description": "Input data." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_X_y/y", + "name": "y", + "qname": "sklearn.utils.validation.check_X_y.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{ndarray, list, sparse matrix}", + "default_value": "", + "description": "Labels." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_X_y/accept_sparse", + "name": "accept_sparse", + "qname": "sklearn.utils.validation.check_X_y.accept_sparse", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "str, bool or list of str", + "default_value": "False", + "description": "String[s] representing allowed sparse matrix formats, such as 'csc',\n'csr', etc. If the input is sparse but not in the allowed format,\nit will be converted to the first listed format. True allows the input\nto be any format. False means that a sparse matrix input will\nraise an error." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "list of str" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_X_y/accept_large_sparse", + "name": "accept_large_sparse", + "qname": "sklearn.utils.validation.check_X_y.accept_large_sparse", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If a CSR, CSC, COO or BSR sparse matrix is supplied and accepted by\naccept_sparse, accept_large_sparse will cause it to be accepted only\nif its indices are stored with a 32-bit dtype.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_X_y/dtype", + "name": "dtype", + "qname": "sklearn.utils.validation.check_X_y.dtype", + "default_value": "'numeric'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "'numeric', type, list of type or None", + "default_value": "'numeric'", + "description": "Data type of result. If None, the dtype of the input is preserved.\nIf \"numeric\", dtype is preserved unless array.dtype is object.\nIf dtype is a list of types, conversion on the first type is only\nperformed if the dtype of the input is not in the list." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'numeric'" + }, + { + "kind": "NamedType", + "name": "type" + }, + { + "kind": "NamedType", + "name": "list of type" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_X_y/order", + "name": "order", + "qname": "sklearn.utils.validation.check_X_y.order", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'F', 'C'}", + "default_value": "None", + "description": "Whether an array will be forced to be fortran or c-style." + }, + "type": { + "kind": "EnumType", + "values": ["F", "C"] + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_X_y/copy", + "name": "copy", + "qname": "sklearn.utils.validation.check_X_y.copy", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether a forced copy will be triggered. If copy=False, a copy might\nbe triggered by a conversion." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_X_y/force_all_finite", + "name": "force_all_finite", + "qname": "sklearn.utils.validation.check_X_y.force_all_finite", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool or 'allow-nan'", + "default_value": "True", + "description": "Whether to raise an error on np.inf, np.nan, pd.NA in X. This parameter\ndoes not influence whether y can have np.inf, np.nan, pd.NA values.\nThe possibilities are:\n\n- True: Force all values of X to be finite.\n- False: accepts np.inf, np.nan, pd.NA in X.\n- 'allow-nan': accepts only np.nan or pd.NA values in X. Values cannot\n be infinite.\n\n.. versionadded:: 0.20\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n.. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "'allow-nan'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_X_y/ensure_2d", + "name": "ensure_2d", + "qname": "sklearn.utils.validation.check_X_y.ensure_2d", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to raise a value error if X is not 2D." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_X_y/allow_nd", + "name": "allow_nd", + "qname": "sklearn.utils.validation.check_X_y.allow_nd", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to allow X.ndim > 2." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_X_y/multi_output", + "name": "multi_output", + "qname": "sklearn.utils.validation.check_X_y.multi_output", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to allow 2D y (array or sparse matrix). If false, y will be\nvalidated as a vector. y cannot have np.nan or np.inf values if\nmulti_output=True." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_X_y/ensure_min_samples", + "name": "ensure_min_samples", + "qname": "sklearn.utils.validation.check_X_y.ensure_min_samples", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "1", + "description": "Make sure that X has a minimum number of samples in its first\naxis (rows for a 2D array)." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_X_y/ensure_min_features", + "name": "ensure_min_features", + "qname": "sklearn.utils.validation.check_X_y.ensure_min_features", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "1", + "description": "Make sure that the 2D array has some minimum number of features\n(columns). The default value of 1 rejects empty datasets.\nThis check is only enforced when X has effectively 2 dimensions or\nis originally 1D and ``ensure_2d`` is True. Setting to 0 disables\nthis check." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_X_y/y_numeric", + "name": "y_numeric", + "qname": "sklearn.utils.validation.check_X_y.y_numeric", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to ensure that y has a numeric type. If dtype of y is object,\nit is converted to float64. Should only be used for regression\nalgorithms." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_X_y/estimator", + "name": "estimator", + "qname": "sklearn.utils.validation.check_X_y.estimator", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "str or estimator instance", + "default_value": "None", + "description": "If passed, include the name of the estimator in warning messages." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "estimator instance" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Input validation for standard estimators.\n\nChecks X and y for consistent length, enforces X to be 2D and y 1D. By\ndefault, X is checked to be non-empty and containing only finite values.\nStandard input checks are also applied to y, such as checking that y\ndoes not have np.nan or np.inf targets. For multi-label y, set\nmulti_output=True to allow 2D and sparse y. If the dtype of X is\nobject, attempt converting to float, raising on failure.", + "docstring": "Input validation for standard estimators.\n\nChecks X and y for consistent length, enforces X to be 2D and y 1D. By\ndefault, X is checked to be non-empty and containing only finite values.\nStandard input checks are also applied to y, such as checking that y\ndoes not have np.nan or np.inf targets. For multi-label y, set\nmulti_output=True to allow 2D and sparse y. If the dtype of X is\nobject, attempt converting to float, raising on failure.\n\nParameters\n----------\nX : {ndarray, list, sparse matrix}\n Input data.\n\ny : {ndarray, list, sparse matrix}\n Labels.\n\naccept_sparse : str, bool or list of str, default=False\n String[s] representing allowed sparse matrix formats, such as 'csc',\n 'csr', etc. If the input is sparse but not in the allowed format,\n it will be converted to the first listed format. True allows the input\n to be any format. False means that a sparse matrix input will\n raise an error.\n\naccept_large_sparse : bool, default=True\n If a CSR, CSC, COO or BSR sparse matrix is supplied and accepted by\n accept_sparse, accept_large_sparse will cause it to be accepted only\n if its indices are stored with a 32-bit dtype.\n\n .. versionadded:: 0.20\n\ndtype : 'numeric', type, list of type or None, default='numeric'\n Data type of result. If None, the dtype of the input is preserved.\n If \"numeric\", dtype is preserved unless array.dtype is object.\n If dtype is a list of types, conversion on the first type is only\n performed if the dtype of the input is not in the list.\n\norder : {'F', 'C'}, default=None\n Whether an array will be forced to be fortran or c-style.\n\ncopy : bool, default=False\n Whether a forced copy will be triggered. If copy=False, a copy might\n be triggered by a conversion.\n\nforce_all_finite : bool or 'allow-nan', default=True\n Whether to raise an error on np.inf, np.nan, pd.NA in X. This parameter\n does not influence whether y can have np.inf, np.nan, pd.NA values.\n The possibilities are:\n\n - True: Force all values of X to be finite.\n - False: accepts np.inf, np.nan, pd.NA in X.\n - 'allow-nan': accepts only np.nan or pd.NA values in X. Values cannot\n be infinite.\n\n .. versionadded:: 0.20\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n .. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`\n\nensure_2d : bool, default=True\n Whether to raise a value error if X is not 2D.\n\nallow_nd : bool, default=False\n Whether to allow X.ndim > 2.\n\nmulti_output : bool, default=False\n Whether to allow 2D y (array or sparse matrix). If false, y will be\n validated as a vector. y cannot have np.nan or np.inf values if\n multi_output=True.\n\nensure_min_samples : int, default=1\n Make sure that X has a minimum number of samples in its first\n axis (rows for a 2D array).\n\nensure_min_features : int, default=1\n Make sure that the 2D array has some minimum number of features\n (columns). The default value of 1 rejects empty datasets.\n This check is only enforced when X has effectively 2 dimensions or\n is originally 1D and ``ensure_2d`` is True. Setting to 0 disables\n this check.\n\ny_numeric : bool, default=False\n Whether to ensure that y has a numeric type. If dtype of y is object,\n it is converted to float64. Should only be used for regression\n algorithms.\n\nestimator : str or estimator instance, default=None\n If passed, include the name of the estimator in warning messages.\n\nReturns\n-------\nX_converted : object\n The converted and validated X.\n\ny_converted : object\n The converted and validated y.", + "code": "@_deprecate_positional_args\ndef check_X_y(X, y, accept_sparse=False, *, accept_large_sparse=True,\n dtype=\"numeric\", order=None, copy=False, force_all_finite=True,\n ensure_2d=True, allow_nd=False, multi_output=False,\n ensure_min_samples=1, ensure_min_features=1, y_numeric=False,\n estimator=None):\n \"\"\"Input validation for standard estimators.\n\n Checks X and y for consistent length, enforces X to be 2D and y 1D. By\n default, X is checked to be non-empty and containing only finite values.\n Standard input checks are also applied to y, such as checking that y\n does not have np.nan or np.inf targets. For multi-label y, set\n multi_output=True to allow 2D and sparse y. If the dtype of X is\n object, attempt converting to float, raising on failure.\n\n Parameters\n ----------\n X : {ndarray, list, sparse matrix}\n Input data.\n\n y : {ndarray, list, sparse matrix}\n Labels.\n\n accept_sparse : str, bool or list of str, default=False\n String[s] representing allowed sparse matrix formats, such as 'csc',\n 'csr', etc. If the input is sparse but not in the allowed format,\n it will be converted to the first listed format. True allows the input\n to be any format. False means that a sparse matrix input will\n raise an error.\n\n accept_large_sparse : bool, default=True\n If a CSR, CSC, COO or BSR sparse matrix is supplied and accepted by\n accept_sparse, accept_large_sparse will cause it to be accepted only\n if its indices are stored with a 32-bit dtype.\n\n .. versionadded:: 0.20\n\n dtype : 'numeric', type, list of type or None, default='numeric'\n Data type of result. If None, the dtype of the input is preserved.\n If \"numeric\", dtype is preserved unless array.dtype is object.\n If dtype is a list of types, conversion on the first type is only\n performed if the dtype of the input is not in the list.\n\n order : {'F', 'C'}, default=None\n Whether an array will be forced to be fortran or c-style.\n\n copy : bool, default=False\n Whether a forced copy will be triggered. If copy=False, a copy might\n be triggered by a conversion.\n\n force_all_finite : bool or 'allow-nan', default=True\n Whether to raise an error on np.inf, np.nan, pd.NA in X. This parameter\n does not influence whether y can have np.inf, np.nan, pd.NA values.\n The possibilities are:\n\n - True: Force all values of X to be finite.\n - False: accepts np.inf, np.nan, pd.NA in X.\n - 'allow-nan': accepts only np.nan or pd.NA values in X. Values cannot\n be infinite.\n\n .. versionadded:: 0.20\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n .. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`\n\n ensure_2d : bool, default=True\n Whether to raise a value error if X is not 2D.\n\n allow_nd : bool, default=False\n Whether to allow X.ndim > 2.\n\n multi_output : bool, default=False\n Whether to allow 2D y (array or sparse matrix). If false, y will be\n validated as a vector. y cannot have np.nan or np.inf values if\n multi_output=True.\n\n ensure_min_samples : int, default=1\n Make sure that X has a minimum number of samples in its first\n axis (rows for a 2D array).\n\n ensure_min_features : int, default=1\n Make sure that the 2D array has some minimum number of features\n (columns). The default value of 1 rejects empty datasets.\n This check is only enforced when X has effectively 2 dimensions or\n is originally 1D and ``ensure_2d`` is True. Setting to 0 disables\n this check.\n\n y_numeric : bool, default=False\n Whether to ensure that y has a numeric type. If dtype of y is object,\n it is converted to float64. Should only be used for regression\n algorithms.\n\n estimator : str or estimator instance, default=None\n If passed, include the name of the estimator in warning messages.\n\n Returns\n -------\n X_converted : object\n The converted and validated X.\n\n y_converted : object\n The converted and validated y.\n \"\"\"\n if y is None:\n raise ValueError(\"y cannot be None\")\n\n X = check_array(X, accept_sparse=accept_sparse,\n accept_large_sparse=accept_large_sparse,\n dtype=dtype, order=order, copy=copy,\n force_all_finite=force_all_finite,\n ensure_2d=ensure_2d, allow_nd=allow_nd,\n ensure_min_samples=ensure_min_samples,\n ensure_min_features=ensure_min_features,\n estimator=estimator)\n if multi_output:\n y = check_array(y, accept_sparse='csr', force_all_finite=True,\n ensure_2d=False, dtype=None)\n else:\n y = column_or_1d(y, warn=True)\n _assert_all_finite(y)\n if y_numeric and y.dtype.kind == 'O':\n y = y.astype(np.float64)\n\n check_consistent_length(X, y)\n\n return X, y" + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_array", + "name": "check_array", + "qname": "sklearn.utils.validation.check_array", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/check_array/array", + "name": "array", + "qname": "sklearn.utils.validation.check_array.array", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "object", + "default_value": "", + "description": "Input object to check / convert." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_array/accept_sparse", + "name": "accept_sparse", + "qname": "sklearn.utils.validation.check_array.accept_sparse", + "default_value": "False", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "str, bool or list/tuple of str", + "default_value": "False", + "description": "String[s] representing allowed sparse matrix formats, such as 'csc',\n'csr', etc. If the input is sparse but not in the allowed format,\nit will be converted to the first listed format. True allows the input\nto be any format. False means that a sparse matrix input will\nraise an error." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "list/tuple of str" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_array/accept_large_sparse", + "name": "accept_large_sparse", + "qname": "sklearn.utils.validation.check_array.accept_large_sparse", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If a CSR, CSC, COO or BSR sparse matrix is supplied and accepted by\naccept_sparse, accept_large_sparse=False will cause it to be accepted\nonly if its indices are stored with a 32-bit dtype.\n\n.. versionadded:: 0.20" + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_array/dtype", + "name": "dtype", + "qname": "sklearn.utils.validation.check_array.dtype", + "default_value": "'numeric'", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "'numeric', type, list of type or None", + "default_value": "'numeric'", + "description": "Data type of result. If None, the dtype of the input is preserved.\nIf \"numeric\", dtype is preserved unless array.dtype is object.\nIf dtype is a list of types, conversion on the first type is only\nperformed if the dtype of the input is not in the list." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "'numeric'" + }, + { + "kind": "NamedType", + "name": "type" + }, + { + "kind": "NamedType", + "name": "list of type" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_array/order", + "name": "order", + "qname": "sklearn.utils.validation.check_array.order", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "{'F', 'C'} or None", + "default_value": "None", + "description": "Whether an array will be forced to be fortran or c-style.\nWhen order is None (default), then if copy=False, nothing is ensured\nabout the memory layout of the output array; otherwise (copy=True)\nthe memory layout of the returned array is kept as close as possible\nto the original array." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["F", "C"] + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_array/copy", + "name": "copy", + "qname": "sklearn.utils.validation.check_array.copy", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether a forced copy will be triggered. If copy=False, a copy might\nbe triggered by a conversion." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_array/force_all_finite", + "name": "force_all_finite", + "qname": "sklearn.utils.validation.check_array.force_all_finite", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool or 'allow-nan'", + "default_value": "True", + "description": "Whether to raise an error on np.inf, np.nan, pd.NA in array. The\npossibilities are:\n\n- True: Force all values of array to be finite.\n- False: accepts np.inf, np.nan, pd.NA in array.\n- 'allow-nan': accepts only np.nan and pd.NA values in array. Values\n cannot be infinite.\n\n.. versionadded:: 0.20\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n.. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "'allow-nan'" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_array/ensure_2d", + "name": "ensure_2d", + "qname": "sklearn.utils.validation.check_array.ensure_2d", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to raise a value error if array is not 2D." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_array/allow_nd", + "name": "allow_nd", + "qname": "sklearn.utils.validation.check_array.allow_nd", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "Whether to allow array.ndim > 2." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_array/ensure_min_samples", + "name": "ensure_min_samples", + "qname": "sklearn.utils.validation.check_array.ensure_min_samples", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "1", + "description": "Make sure that the array has a minimum number of samples in its first\naxis (rows for a 2D array). Setting to 0 disables this check." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_array/ensure_min_features", + "name": "ensure_min_features", + "qname": "sklearn.utils.validation.check_array.ensure_min_features", + "default_value": "1", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "1", + "description": "Make sure that the 2D array has some minimum number of features\n(columns). The default value of 1 rejects empty datasets.\nThis check is only enforced when the input data has effectively 2\ndimensions or is originally 1D and ``ensure_2d`` is True. Setting to 0\ndisables this check." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_array/estimator", + "name": "estimator", + "qname": "sklearn.utils.validation.check_array.estimator", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "str or estimator instance", + "default_value": "None", + "description": "If passed, include the name of the estimator in warning messages." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "estimator instance" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Input validation on an array, list, sparse matrix or similar.\n\nBy default, the input is checked to be a non-empty 2D array containing\nonly finite values. If the dtype of the array is object, attempt\nconverting to float, raising on failure.", + "docstring": "Input validation on an array, list, sparse matrix or similar.\n\nBy default, the input is checked to be a non-empty 2D array containing\nonly finite values. If the dtype of the array is object, attempt\nconverting to float, raising on failure.\n\nParameters\n----------\narray : object\n Input object to check / convert.\n\naccept_sparse : str, bool or list/tuple of str, default=False\n String[s] representing allowed sparse matrix formats, such as 'csc',\n 'csr', etc. If the input is sparse but not in the allowed format,\n it will be converted to the first listed format. True allows the input\n to be any format. False means that a sparse matrix input will\n raise an error.\n\naccept_large_sparse : bool, default=True\n If a CSR, CSC, COO or BSR sparse matrix is supplied and accepted by\n accept_sparse, accept_large_sparse=False will cause it to be accepted\n only if its indices are stored with a 32-bit dtype.\n\n .. versionadded:: 0.20\n\ndtype : 'numeric', type, list of type or None, default='numeric'\n Data type of result. If None, the dtype of the input is preserved.\n If \"numeric\", dtype is preserved unless array.dtype is object.\n If dtype is a list of types, conversion on the first type is only\n performed if the dtype of the input is not in the list.\n\norder : {'F', 'C'} or None, default=None\n Whether an array will be forced to be fortran or c-style.\n When order is None (default), then if copy=False, nothing is ensured\n about the memory layout of the output array; otherwise (copy=True)\n the memory layout of the returned array is kept as close as possible\n to the original array.\n\ncopy : bool, default=False\n Whether a forced copy will be triggered. If copy=False, a copy might\n be triggered by a conversion.\n\nforce_all_finite : bool or 'allow-nan', default=True\n Whether to raise an error on np.inf, np.nan, pd.NA in array. The\n possibilities are:\n\n - True: Force all values of array to be finite.\n - False: accepts np.inf, np.nan, pd.NA in array.\n - 'allow-nan': accepts only np.nan and pd.NA values in array. Values\n cannot be infinite.\n\n .. versionadded:: 0.20\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n .. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`\n\nensure_2d : bool, default=True\n Whether to raise a value error if array is not 2D.\n\nallow_nd : bool, default=False\n Whether to allow array.ndim > 2.\n\nensure_min_samples : int, default=1\n Make sure that the array has a minimum number of samples in its first\n axis (rows for a 2D array). Setting to 0 disables this check.\n\nensure_min_features : int, default=1\n Make sure that the 2D array has some minimum number of features\n (columns). The default value of 1 rejects empty datasets.\n This check is only enforced when the input data has effectively 2\n dimensions or is originally 1D and ``ensure_2d`` is True. Setting to 0\n disables this check.\n\nestimator : str or estimator instance, default=None\n If passed, include the name of the estimator in warning messages.\n\nReturns\n-------\narray_converted : object\n The converted and validated array.", + "code": "@_deprecate_positional_args\ndef check_array(array, accept_sparse=False, *, accept_large_sparse=True,\n dtype=\"numeric\", order=None, copy=False, force_all_finite=True,\n ensure_2d=True, allow_nd=False, ensure_min_samples=1,\n ensure_min_features=1, estimator=None):\n\n \"\"\"Input validation on an array, list, sparse matrix or similar.\n\n By default, the input is checked to be a non-empty 2D array containing\n only finite values. If the dtype of the array is object, attempt\n converting to float, raising on failure.\n\n Parameters\n ----------\n array : object\n Input object to check / convert.\n\n accept_sparse : str, bool or list/tuple of str, default=False\n String[s] representing allowed sparse matrix formats, such as 'csc',\n 'csr', etc. If the input is sparse but not in the allowed format,\n it will be converted to the first listed format. True allows the input\n to be any format. False means that a sparse matrix input will\n raise an error.\n\n accept_large_sparse : bool, default=True\n If a CSR, CSC, COO or BSR sparse matrix is supplied and accepted by\n accept_sparse, accept_large_sparse=False will cause it to be accepted\n only if its indices are stored with a 32-bit dtype.\n\n .. versionadded:: 0.20\n\n dtype : 'numeric', type, list of type or None, default='numeric'\n Data type of result. If None, the dtype of the input is preserved.\n If \"numeric\", dtype is preserved unless array.dtype is object.\n If dtype is a list of types, conversion on the first type is only\n performed if the dtype of the input is not in the list.\n\n order : {'F', 'C'} or None, default=None\n Whether an array will be forced to be fortran or c-style.\n When order is None (default), then if copy=False, nothing is ensured\n about the memory layout of the output array; otherwise (copy=True)\n the memory layout of the returned array is kept as close as possible\n to the original array.\n\n copy : bool, default=False\n Whether a forced copy will be triggered. If copy=False, a copy might\n be triggered by a conversion.\n\n force_all_finite : bool or 'allow-nan', default=True\n Whether to raise an error on np.inf, np.nan, pd.NA in array. The\n possibilities are:\n\n - True: Force all values of array to be finite.\n - False: accepts np.inf, np.nan, pd.NA in array.\n - 'allow-nan': accepts only np.nan and pd.NA values in array. Values\n cannot be infinite.\n\n .. versionadded:: 0.20\n ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n .. versionchanged:: 0.23\n Accepts `pd.NA` and converts it into `np.nan`\n\n ensure_2d : bool, default=True\n Whether to raise a value error if array is not 2D.\n\n allow_nd : bool, default=False\n Whether to allow array.ndim > 2.\n\n ensure_min_samples : int, default=1\n Make sure that the array has a minimum number of samples in its first\n axis (rows for a 2D array). Setting to 0 disables this check.\n\n ensure_min_features : int, default=1\n Make sure that the 2D array has some minimum number of features\n (columns). The default value of 1 rejects empty datasets.\n This check is only enforced when the input data has effectively 2\n dimensions or is originally 1D and ``ensure_2d`` is True. Setting to 0\n disables this check.\n\n estimator : str or estimator instance, default=None\n If passed, include the name of the estimator in warning messages.\n\n Returns\n -------\n array_converted : object\n The converted and validated array.\n \"\"\"\n # store reference to original array to check if copy is needed when\n # function returns\n array_orig = array\n\n # store whether originally we wanted numeric dtype\n dtype_numeric = isinstance(dtype, str) and dtype == \"numeric\"\n\n dtype_orig = getattr(array, \"dtype\", None)\n if not hasattr(dtype_orig, 'kind'):\n # not a data type (e.g. a column named dtype in a pandas DataFrame)\n dtype_orig = None\n\n # check if the object contains several dtypes (typically a pandas\n # DataFrame), and store them. If not, store None.\n dtypes_orig = None\n has_pd_integer_array = False\n if hasattr(array, \"dtypes\") and hasattr(array.dtypes, '__array__'):\n # throw warning if columns are sparse. If all columns are sparse, then\n # array.sparse exists and sparsity will be perserved (later).\n with suppress(ImportError):\n from pandas.api.types import is_sparse\n if (not hasattr(array, 'sparse') and\n array.dtypes.apply(is_sparse).any()):\n warnings.warn(\n \"pandas.DataFrame with sparse columns found.\"\n \"It will be converted to a dense numpy array.\"\n )\n\n dtypes_orig = list(array.dtypes)\n # pandas boolean dtype __array__ interface coerces bools to objects\n for i, dtype_iter in enumerate(dtypes_orig):\n if dtype_iter.kind == 'b':\n dtypes_orig[i] = np.dtype(object)\n elif dtype_iter.name.startswith((\"Int\", \"UInt\")):\n # name looks like an Integer Extension Array, now check for\n # the dtype\n with suppress(ImportError):\n from pandas import (Int8Dtype, Int16Dtype,\n Int32Dtype, Int64Dtype,\n UInt8Dtype, UInt16Dtype,\n UInt32Dtype, UInt64Dtype)\n if isinstance(dtype_iter, (Int8Dtype, Int16Dtype,\n Int32Dtype, Int64Dtype,\n UInt8Dtype, UInt16Dtype,\n UInt32Dtype, UInt64Dtype)):\n has_pd_integer_array = True\n\n if all(isinstance(dtype, np.dtype) for dtype in dtypes_orig):\n dtype_orig = np.result_type(*dtypes_orig)\n\n if dtype_numeric:\n if dtype_orig is not None and dtype_orig.kind == \"O\":\n # if input is object, convert to float.\n dtype = np.float64\n else:\n dtype = None\n\n if isinstance(dtype, (list, tuple)):\n if dtype_orig is not None and dtype_orig in dtype:\n # no dtype conversion required\n dtype = None\n else:\n # dtype conversion required. Let's select the first element of the\n # list of accepted types.\n dtype = dtype[0]\n\n if has_pd_integer_array:\n # If there are any pandas integer extension arrays,\n array = array.astype(dtype)\n\n if force_all_finite not in (True, False, 'allow-nan'):\n raise ValueError('force_all_finite should be a bool or \"allow-nan\"'\n '. Got {!r} instead'.format(force_all_finite))\n\n if estimator is not None:\n if isinstance(estimator, str):\n estimator_name = estimator\n else:\n estimator_name = estimator.__class__.__name__\n else:\n estimator_name = \"Estimator\"\n context = \" by %s\" % estimator_name if estimator is not None else \"\"\n\n # When all dataframe columns are sparse, convert to a sparse array\n if hasattr(array, 'sparse') and array.ndim > 1:\n # DataFrame.sparse only supports `to_coo`\n array = array.sparse.to_coo()\n if array.dtype == np.dtype('object'):\n unique_dtypes = set(\n [dt.subtype.name for dt in array_orig.dtypes]\n )\n if len(unique_dtypes) > 1:\n raise ValueError(\n \"Pandas DataFrame with mixed sparse extension arrays \"\n \"generated a sparse matrix with object dtype which \"\n \"can not be converted to a scipy sparse matrix.\"\n \"Sparse extension arrays should all have the same \"\n \"numeric type.\")\n\n if sp.issparse(array):\n _ensure_no_complex_data(array)\n array = _ensure_sparse_format(array, accept_sparse=accept_sparse,\n dtype=dtype, copy=copy,\n force_all_finite=force_all_finite,\n accept_large_sparse=accept_large_sparse)\n else:\n # If np.array(..) gives ComplexWarning, then we convert the warning\n # to an error. This is needed because specifying a non complex\n # dtype to the function converts complex to real dtype,\n # thereby passing the test made in the lines following the scope\n # of warnings context manager.\n with warnings.catch_warnings():\n try:\n warnings.simplefilter('error', ComplexWarning)\n if dtype is not None and np.dtype(dtype).kind in 'iu':\n # Conversion float -> int should not contain NaN or\n # inf (numpy#14412). We cannot use casting='safe' because\n # then conversion float -> int would be disallowed.\n array = np.asarray(array, order=order)\n if array.dtype.kind == 'f':\n _assert_all_finite(array, allow_nan=False,\n msg_dtype=dtype)\n array = array.astype(dtype, casting=\"unsafe\", copy=False)\n else:\n array = np.asarray(array, order=order, dtype=dtype)\n except ComplexWarning as complex_warning:\n raise ValueError(\"Complex data not supported\\n\"\n \"{}\\n\".format(array)) from complex_warning\n\n # It is possible that the np.array(..) gave no warning. This happens\n # when no dtype conversion happened, for example dtype = None. The\n # result is that np.array(..) produces an array of complex dtype\n # and we need to catch and raise exception for such cases.\n _ensure_no_complex_data(array)\n\n if ensure_2d:\n # If input is scalar raise error\n if array.ndim == 0:\n raise ValueError(\n \"Expected 2D array, got scalar array instead:\\narray={}.\\n\"\n \"Reshape your data either using array.reshape(-1, 1) if \"\n \"your data has a single feature or array.reshape(1, -1) \"\n \"if it contains a single sample.\".format(array))\n # If input is 1D raise error\n if array.ndim == 1:\n raise ValueError(\n \"Expected 2D array, got 1D array instead:\\narray={}.\\n\"\n \"Reshape your data either using array.reshape(-1, 1) if \"\n \"your data has a single feature or array.reshape(1, -1) \"\n \"if it contains a single sample.\".format(array))\n\n # make sure we actually converted to numeric:\n if dtype_numeric and array.dtype.kind in \"OUSV\":\n warnings.warn(\n \"Arrays of bytes/strings is being converted to decimal \"\n \"numbers if dtype='numeric'. This behavior is deprecated in \"\n \"0.24 and will be removed in 1.1 (renaming of 0.26). Please \"\n \"convert your data to numeric values explicitly instead.\",\n FutureWarning, stacklevel=2\n )\n try:\n array = array.astype(np.float64)\n except ValueError as e:\n raise ValueError(\n \"Unable to convert array of bytes/strings \"\n \"into decimal numbers with dtype='numeric'\") from e\n if not allow_nd and array.ndim >= 3:\n raise ValueError(\"Found array with dim %d. %s expected <= 2.\"\n % (array.ndim, estimator_name))\n\n if force_all_finite:\n _assert_all_finite(array,\n allow_nan=force_all_finite == 'allow-nan')\n\n if ensure_min_samples > 0:\n n_samples = _num_samples(array)\n if n_samples < ensure_min_samples:\n raise ValueError(\"Found array with %d sample(s) (shape=%s) while a\"\n \" minimum of %d is required%s.\"\n % (n_samples, array.shape, ensure_min_samples,\n context))\n\n if ensure_min_features > 0 and array.ndim == 2:\n n_features = array.shape[1]\n if n_features < ensure_min_features:\n raise ValueError(\"Found array with %d feature(s) (shape=%s) while\"\n \" a minimum of %d is required%s.\"\n % (n_features, array.shape, ensure_min_features,\n context))\n\n if copy and np.may_share_memory(array, array_orig):\n array = np.array(array, dtype=dtype, order=order)\n\n return array" + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_consistent_length", + "name": "check_consistent_length", + "qname": "sklearn.utils.validation.check_consistent_length", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/check_consistent_length/arrays", + "name": "arrays", + "qname": "sklearn.utils.validation.check_consistent_length.arrays", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": true, + "docstring": { + "type": "list or tuple of input objects.", + "default_value": "", + "description": "Objects that will be checked for consistent length." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "list" + }, + { + "kind": "NamedType", + "name": "tuple of input objects." + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Check that all arrays have consistent first dimensions.\n\nChecks whether all objects in arrays have the same shape or length.", + "docstring": "Check that all arrays have consistent first dimensions.\n\nChecks whether all objects in arrays have the same shape or length.\n\nParameters\n----------\n*arrays : list or tuple of input objects.\n Objects that will be checked for consistent length.", + "code": "def check_consistent_length(*arrays):\n \"\"\"Check that all arrays have consistent first dimensions.\n\n Checks whether all objects in arrays have the same shape or length.\n\n Parameters\n ----------\n *arrays : list or tuple of input objects.\n Objects that will be checked for consistent length.\n \"\"\"\n\n lengths = [_num_samples(X) for X in arrays if X is not None]\n uniques = np.unique(lengths)\n if len(uniques) > 1:\n raise ValueError(\"Found input variables with inconsistent numbers of\"\n \" samples: %r\" % [int(l) for l in lengths])" + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_is_fitted", + "name": "check_is_fitted", + "qname": "sklearn.utils.validation.check_is_fitted", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/check_is_fitted/estimator", + "name": "estimator", + "qname": "sklearn.utils.validation.check_is_fitted.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "estimator instance", + "default_value": "", + "description": "estimator instance for which the check is performed." + }, + "type": { + "kind": "NamedType", + "name": "estimator instance" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_is_fitted/attributes", + "name": "attributes", + "qname": "sklearn.utils.validation.check_is_fitted.attributes", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "str, list or tuple of str", + "default_value": "None", + "description": "Attribute name(s) given as string or a list/tuple of strings\nEg.: ``[\"coef_\", \"estimator_\", ...], \"coef_\"``\n\nIf `None`, `estimator` is considered fitted if there exist an\nattribute that ends with a underscore and does not start with double\nunderscore." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "list" + }, + { + "kind": "NamedType", + "name": "tuple of str" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_is_fitted/msg", + "name": "msg", + "qname": "sklearn.utils.validation.check_is_fitted.msg", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "str", + "default_value": "None", + "description": "The default error message is, \"This %(name)s instance is not fitted\nyet. Call 'fit' with appropriate arguments before using this\nestimator.\"\n\nFor custom messages if \"%(name)s\" is present in the message string,\nit is substituted for the estimator name.\n\nEg. : \"Estimator, %(name)s, must be fitted before sparsifying\"." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_is_fitted/all_or_any", + "name": "all_or_any", + "qname": "sklearn.utils.validation.check_is_fitted.all_or_any", + "default_value": "all", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "callable, {all, any}", + "default_value": "all", + "description": "Specify whether all or any of the given attributes must exist." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": [] + }, + { + "kind": "NamedType", + "name": "callable" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Perform is_fitted validation for estimator.\n\nChecks if the estimator is fitted by verifying the presence of\nfitted attributes (ending with a trailing underscore) and otherwise\nraises a NotFittedError with the given message.\n\nThis utility is meant to be used internally by estimators themselves,\ntypically in their own predict / transform methods.", + "docstring": "Perform is_fitted validation for estimator.\n\nChecks if the estimator is fitted by verifying the presence of\nfitted attributes (ending with a trailing underscore) and otherwise\nraises a NotFittedError with the given message.\n\nThis utility is meant to be used internally by estimators themselves,\ntypically in their own predict / transform methods.\n\nParameters\n----------\nestimator : estimator instance\n estimator instance for which the check is performed.\n\nattributes : str, list or tuple of str, default=None\n Attribute name(s) given as string or a list/tuple of strings\n Eg.: ``[\"coef_\", \"estimator_\", ...], \"coef_\"``\n\n If `None`, `estimator` is considered fitted if there exist an\n attribute that ends with a underscore and does not start with double\n underscore.\n\nmsg : str, default=None\n The default error message is, \"This %(name)s instance is not fitted\n yet. Call 'fit' with appropriate arguments before using this\n estimator.\"\n\n For custom messages if \"%(name)s\" is present in the message string,\n it is substituted for the estimator name.\n\n Eg. : \"Estimator, %(name)s, must be fitted before sparsifying\".\n\nall_or_any : callable, {all, any}, default=all\n Specify whether all or any of the given attributes must exist.\n\nReturns\n-------\nNone\n\nRaises\n------\nNotFittedError\n If the attributes are not found.", + "code": "@_deprecate_positional_args\ndef check_is_fitted(estimator, attributes=None, *, msg=None, all_or_any=all):\n \"\"\"Perform is_fitted validation for estimator.\n\n Checks if the estimator is fitted by verifying the presence of\n fitted attributes (ending with a trailing underscore) and otherwise\n raises a NotFittedError with the given message.\n\n This utility is meant to be used internally by estimators themselves,\n typically in their own predict / transform methods.\n\n Parameters\n ----------\n estimator : estimator instance\n estimator instance for which the check is performed.\n\n attributes : str, list or tuple of str, default=None\n Attribute name(s) given as string or a list/tuple of strings\n Eg.: ``[\"coef_\", \"estimator_\", ...], \"coef_\"``\n\n If `None`, `estimator` is considered fitted if there exist an\n attribute that ends with a underscore and does not start with double\n underscore.\n\n msg : str, default=None\n The default error message is, \"This %(name)s instance is not fitted\n yet. Call 'fit' with appropriate arguments before using this\n estimator.\"\n\n For custom messages if \"%(name)s\" is present in the message string,\n it is substituted for the estimator name.\n\n Eg. : \"Estimator, %(name)s, must be fitted before sparsifying\".\n\n all_or_any : callable, {all, any}, default=all\n Specify whether all or any of the given attributes must exist.\n\n Returns\n -------\n None\n\n Raises\n ------\n NotFittedError\n If the attributes are not found.\n \"\"\"\n if isclass(estimator):\n raise TypeError(\"{} is a class, not an instance.\".format(estimator))\n if msg is None:\n msg = (\"This %(name)s instance is not fitted yet. Call 'fit' with \"\n \"appropriate arguments before using this estimator.\")\n\n if not hasattr(estimator, 'fit'):\n raise TypeError(\"%s is not an estimator instance.\" % (estimator))\n\n if attributes is not None:\n if not isinstance(attributes, (list, tuple)):\n attributes = [attributes]\n attrs = all_or_any([hasattr(estimator, attr) for attr in attributes])\n else:\n attrs = [v for v in vars(estimator)\n if v.endswith(\"_\") and not v.startswith(\"__\")]\n\n if not attrs:\n raise NotFittedError(msg % {'name': type(estimator).__name__})" + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_memory", + "name": "check_memory", + "qname": "sklearn.utils.validation.check_memory", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/check_memory/memory", + "name": "memory", + "qname": "sklearn.utils.validation.check_memory.memory", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "None, str or object with the joblib.Memory interface", + "default_value": "", + "description": "" + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "None" + }, + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "object with the joblib.Memory interface" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Check that ``memory`` is joblib.Memory-like.\n\njoblib.Memory-like means that ``memory`` can be converted into a\njoblib.Memory instance (typically a str denoting the ``location``)\nor has the same interface (has a ``cache`` method).", + "docstring": "Check that ``memory`` is joblib.Memory-like.\n\njoblib.Memory-like means that ``memory`` can be converted into a\njoblib.Memory instance (typically a str denoting the ``location``)\nor has the same interface (has a ``cache`` method).\n\nParameters\n----------\nmemory : None, str or object with the joblib.Memory interface\n\nReturns\n-------\nmemory : object with the joblib.Memory interface\n\nRaises\n------\nValueError\n If ``memory`` is not joblib.Memory-like.", + "code": "def check_memory(memory):\n \"\"\"Check that ``memory`` is joblib.Memory-like.\n\n joblib.Memory-like means that ``memory`` can be converted into a\n joblib.Memory instance (typically a str denoting the ``location``)\n or has the same interface (has a ``cache`` method).\n\n Parameters\n ----------\n memory : None, str or object with the joblib.Memory interface\n\n Returns\n -------\n memory : object with the joblib.Memory interface\n\n Raises\n ------\n ValueError\n If ``memory`` is not joblib.Memory-like.\n \"\"\"\n\n if memory is None or isinstance(memory, str):\n if parse_version(joblib.__version__) < parse_version('0.12'):\n memory = joblib.Memory(cachedir=memory, verbose=0)\n else:\n memory = joblib.Memory(location=memory, verbose=0)\n elif not hasattr(memory, 'cache'):\n raise ValueError(\"'memory' should be None, a string or have the same\"\n \" interface as joblib.Memory.\"\n \" Got memory='{}' instead.\".format(memory))\n return memory" + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_non_negative", + "name": "check_non_negative", + "qname": "sklearn.utils.validation.check_non_negative", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/check_non_negative/X", + "name": "X", + "qname": "sklearn.utils.validation.check_non_negative.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix}", + "default_value": "", + "description": "Input data." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_non_negative/whom", + "name": "whom", + "qname": "sklearn.utils.validation.check_non_negative.whom", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "str", + "default_value": "", + "description": "Who passed X to this function." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Check if there is any negative value in an array.", + "docstring": "Check if there is any negative value in an array.\n\nParameters\n----------\nX : {array-like, sparse matrix}\n Input data.\n\nwhom : str\n Who passed X to this function.", + "code": "def check_non_negative(X, whom):\n \"\"\"\n Check if there is any negative value in an array.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}\n Input data.\n\n whom : str\n Who passed X to this function.\n \"\"\"\n # avoid X.min() on sparse matrix since it also sorts the indices\n if sp.issparse(X):\n if X.format in ['lil', 'dok']:\n X = X.tocsr()\n if X.data.size == 0:\n X_min = 0\n else:\n X_min = X.data.min()\n else:\n X_min = X.min()\n\n if X_min < 0:\n raise ValueError(\"Negative values in data passed to %s\" % whom)" + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_random_state", + "name": "check_random_state", + "qname": "sklearn.utils.validation.check_random_state", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/check_random_state/seed", + "name": "seed", + "qname": "sklearn.utils.validation.check_random_state.seed", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "None, int or instance of RandomState", + "default_value": "", + "description": "If seed is None, return the RandomState singleton used by np.random.\nIf seed is an int, return a new RandomState instance seeded with seed.\nIf seed is already a RandomState instance, return it.\nOtherwise raise ValueError." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "None" + }, + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "instance of RandomState" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Turn seed into a np.random.RandomState instance", + "docstring": "Turn seed into a np.random.RandomState instance\n\nParameters\n----------\nseed : None, int or instance of RandomState\n If seed is None, return the RandomState singleton used by np.random.\n If seed is an int, return a new RandomState instance seeded with seed.\n If seed is already a RandomState instance, return it.\n Otherwise raise ValueError.", + "code": "def check_random_state(seed):\n \"\"\"Turn seed into a np.random.RandomState instance\n\n Parameters\n ----------\n seed : None, int or instance of RandomState\n If seed is None, return the RandomState singleton used by np.random.\n If seed is an int, return a new RandomState instance seeded with seed.\n If seed is already a RandomState instance, return it.\n Otherwise raise ValueError.\n \"\"\"\n if seed is None or seed is np.random:\n return np.random.mtrand._rand\n if isinstance(seed, numbers.Integral):\n return np.random.RandomState(seed)\n if isinstance(seed, np.random.RandomState):\n return seed\n raise ValueError('%r cannot be used to seed a numpy.random.RandomState'\n ' instance' % seed)" + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_scalar", + "name": "check_scalar", + "qname": "sklearn.utils.validation.check_scalar", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/check_scalar/x", + "name": "x", + "qname": "sklearn.utils.validation.check_scalar.x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "object", + "default_value": "", + "description": "The scalar parameter to validate." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_scalar/name", + "name": "name", + "qname": "sklearn.utils.validation.check_scalar.name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "str", + "default_value": "", + "description": "The name of the parameter to be printed in error messages." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_scalar/target_type", + "name": "target_type", + "qname": "sklearn.utils.validation.check_scalar.target_type", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "type or tuple", + "default_value": "", + "description": "Acceptable data types for the parameter." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "type" + }, + { + "kind": "NamedType", + "name": "tuple" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_scalar/min_val", + "name": "min_val", + "qname": "sklearn.utils.validation.check_scalar.min_val", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float or int", + "default_value": "None", + "description": "The minimum valid value the parameter can take. If None (default) it\nis implied that the parameter does not have a lower bound." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_scalar/max_val", + "name": "max_val", + "qname": "sklearn.utils.validation.check_scalar.max_val", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float or int", + "default_value": "None", + "description": "The maximum valid value the parameter can take. If None (default) it\nis implied that the parameter does not have an upper bound." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "float" + }, + { + "kind": "NamedType", + "name": "int" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Validate scalar parameters type and value.", + "docstring": "Validate scalar parameters type and value.\n\nParameters\n----------\nx : object\n The scalar parameter to validate.\n\nname : str\n The name of the parameter to be printed in error messages.\n\ntarget_type : type or tuple\n Acceptable data types for the parameter.\n\nmin_val : float or int, default=None\n The minimum valid value the parameter can take. If None (default) it\n is implied that the parameter does not have a lower bound.\n\nmax_val : float or int, default=None\n The maximum valid value the parameter can take. If None (default) it\n is implied that the parameter does not have an upper bound.\n\nRaises\n-------\nTypeError\n If the parameter's type does not match the desired type.\n\nValueError\n If the parameter's value violates the given bounds.", + "code": "def check_scalar(x, name, target_type, *, min_val=None, max_val=None):\n \"\"\"Validate scalar parameters type and value.\n\n Parameters\n ----------\n x : object\n The scalar parameter to validate.\n\n name : str\n The name of the parameter to be printed in error messages.\n\n target_type : type or tuple\n Acceptable data types for the parameter.\n\n min_val : float or int, default=None\n The minimum valid value the parameter can take. If None (default) it\n is implied that the parameter does not have a lower bound.\n\n max_val : float or int, default=None\n The maximum valid value the parameter can take. If None (default) it\n is implied that the parameter does not have an upper bound.\n\n Raises\n -------\n TypeError\n If the parameter's type does not match the desired type.\n\n ValueError\n If the parameter's value violates the given bounds.\n \"\"\"\n\n if not isinstance(x, target_type):\n raise TypeError('`{}` must be an instance of {}, not {}.'\n .format(name, target_type, type(x)))\n\n if min_val is not None and x < min_val:\n raise ValueError('`{}`= {}, must be >= {}.'.format(name, x, min_val))\n\n if max_val is not None and x > max_val:\n raise ValueError('`{}`= {}, must be <= {}.'.format(name, x, max_val))" + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_symmetric", + "name": "check_symmetric", + "qname": "sklearn.utils.validation.check_symmetric", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/check_symmetric/array", + "name": "array", + "qname": "sklearn.utils.validation.check_symmetric.array", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{ndarray, sparse matrix}", + "default_value": "", + "description": "Input object to check / convert. Must be two-dimensional and square,\notherwise a ValueError will be raised." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_symmetric/tol", + "name": "tol", + "qname": "sklearn.utils.validation.check_symmetric.tol", + "default_value": "1e-10", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "float", + "default_value": "1e-10", + "description": "Absolute tolerance for equivalence of arrays. Default = 1E-10." + }, + "type": { + "kind": "NamedType", + "name": "float" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_symmetric/raise_warning", + "name": "raise_warning", + "qname": "sklearn.utils.validation.check_symmetric.raise_warning", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "If True then raise a warning if conversion is required." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/check_symmetric/raise_exception", + "name": "raise_exception", + "qname": "sklearn.utils.validation.check_symmetric.raise_exception", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "If True then raise an exception if array is not symmetric." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Make sure that array is 2D, square and symmetric.\n\nIf the array is not symmetric, then a symmetrized version is returned.\nOptionally, a warning or exception is raised if the matrix is not\nsymmetric.", + "docstring": "Make sure that array is 2D, square and symmetric.\n\nIf the array is not symmetric, then a symmetrized version is returned.\nOptionally, a warning or exception is raised if the matrix is not\nsymmetric.\n\nParameters\n----------\narray : {ndarray, sparse matrix}\n Input object to check / convert. Must be two-dimensional and square,\n otherwise a ValueError will be raised.\n\ntol : float, default=1e-10\n Absolute tolerance for equivalence of arrays. Default = 1E-10.\n\nraise_warning : bool, default=True\n If True then raise a warning if conversion is required.\n\nraise_exception : bool, default=False\n If True then raise an exception if array is not symmetric.\n\nReturns\n-------\narray_sym : {ndarray, sparse matrix}\n Symmetrized version of the input array, i.e. the average of array\n and array.transpose(). If sparse, then duplicate entries are first\n summed and zeros are eliminated.", + "code": "@_deprecate_positional_args\ndef check_symmetric(array, *, tol=1E-10, raise_warning=True,\n raise_exception=False):\n \"\"\"Make sure that array is 2D, square and symmetric.\n\n If the array is not symmetric, then a symmetrized version is returned.\n Optionally, a warning or exception is raised if the matrix is not\n symmetric.\n\n Parameters\n ----------\n array : {ndarray, sparse matrix}\n Input object to check / convert. Must be two-dimensional and square,\n otherwise a ValueError will be raised.\n\n tol : float, default=1e-10\n Absolute tolerance for equivalence of arrays. Default = 1E-10.\n\n raise_warning : bool, default=True\n If True then raise a warning if conversion is required.\n\n raise_exception : bool, default=False\n If True then raise an exception if array is not symmetric.\n\n Returns\n -------\n array_sym : {ndarray, sparse matrix}\n Symmetrized version of the input array, i.e. the average of array\n and array.transpose(). If sparse, then duplicate entries are first\n summed and zeros are eliminated.\n \"\"\"\n if (array.ndim != 2) or (array.shape[0] != array.shape[1]):\n raise ValueError(\"array must be 2-dimensional and square. \"\n \"shape = {0}\".format(array.shape))\n\n if sp.issparse(array):\n diff = array - array.T\n # only csr, csc, and coo have `data` attribute\n if diff.format not in ['csr', 'csc', 'coo']:\n diff = diff.tocsr()\n symmetric = np.all(abs(diff.data) < tol)\n else:\n symmetric = np.allclose(array, array.T, atol=tol)\n\n if not symmetric:\n if raise_exception:\n raise ValueError(\"Array must be symmetric\")\n if raise_warning:\n warnings.warn(\"Array is not symmetric, and will be converted \"\n \"to symmetric by average with its transpose.\",\n stacklevel=2)\n if sp.issparse(array):\n conversion = 'to' + array.format\n array = getattr(0.5 * (array + array.T), conversion)()\n else:\n array = 0.5 * (array + array.T)\n\n return array" + }, + { + "id": "scikit-learn/sklearn.utils.validation/column_or_1d", + "name": "column_or_1d", + "qname": "sklearn.utils.validation.column_or_1d", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/column_or_1d/y", + "name": "y", + "qname": "sklearn.utils.validation.column_or_1d.y", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "array-like", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "array-like" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/column_or_1d/warn", + "name": "warn", + "qname": "sklearn.utils.validation.column_or_1d.warn", + "default_value": "False", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "False", + "description": "To control display of warnings." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Ravel column or 1d numpy array, else raises an error.", + "docstring": "Ravel column or 1d numpy array, else raises an error.\n\nParameters\n----------\ny : array-like\n\nwarn : bool, default=False\n To control display of warnings.\n\nReturns\n-------\ny : ndarray", + "code": "@_deprecate_positional_args\ndef column_or_1d(y, *, warn=False):\n \"\"\" Ravel column or 1d numpy array, else raises an error.\n\n Parameters\n ----------\n y : array-like\n\n warn : bool, default=False\n To control display of warnings.\n\n Returns\n -------\n y : ndarray\n\n \"\"\"\n y = np.asarray(y)\n shape = np.shape(y)\n if len(shape) == 1:\n return np.ravel(y)\n if len(shape) == 2 and shape[1] == 1:\n if warn:\n warnings.warn(\"A column-vector y was passed when a 1d array was\"\n \" expected. Please change the shape of y to \"\n \"(n_samples, ), for example using ravel().\",\n DataConversionWarning, stacklevel=2)\n return np.ravel(y)\n\n raise ValueError(\n \"y should be a 1d array, \"\n \"got an array of shape {} instead.\".format(shape))" + }, + { + "id": "scikit-learn/sklearn.utils.validation/has_fit_parameter", + "name": "has_fit_parameter", + "qname": "sklearn.utils.validation.has_fit_parameter", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/has_fit_parameter/estimator", + "name": "estimator", + "qname": "sklearn.utils.validation.has_fit_parameter.estimator", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "object", + "default_value": "", + "description": "An estimator to inspect." + }, + "type": { + "kind": "NamedType", + "name": "object" + } + }, + { + "id": "scikit-learn/sklearn.utils.validation/has_fit_parameter/parameter", + "name": "parameter", + "qname": "sklearn.utils.validation.has_fit_parameter.parameter", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "str", + "default_value": "", + "description": "The searched parameter." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Checks whether the estimator's fit method supports the given parameter.", + "docstring": "Checks whether the estimator's fit method supports the given parameter.\n\nParameters\n----------\nestimator : object\n An estimator to inspect.\n\nparameter : str\n The searched parameter.\n\nReturns\n-------\nis_parameter: bool\n Whether the parameter was found to be a named parameter of the\n estimator's fit method.\n\nExamples\n--------\n>>> from sklearn.svm import SVC\n>>> has_fit_parameter(SVC(), \"sample_weight\")\nTrue", + "code": "def has_fit_parameter(estimator, parameter):\n \"\"\"Checks whether the estimator's fit method supports the given parameter.\n\n Parameters\n ----------\n estimator : object\n An estimator to inspect.\n\n parameter : str\n The searched parameter.\n\n Returns\n -------\n is_parameter: bool\n Whether the parameter was found to be a named parameter of the\n estimator's fit method.\n\n Examples\n --------\n >>> from sklearn.svm import SVC\n >>> has_fit_parameter(SVC(), \"sample_weight\")\n True\n\n \"\"\"\n return parameter in signature(estimator.fit).parameters" + }, + { + "id": "scikit-learn/sklearn.utils.validation/indexable", + "name": "indexable", + "qname": "sklearn.utils.validation.indexable", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils.validation/indexable/iterables", + "name": "iterables", + "qname": "sklearn.utils.validation.indexable.iterables", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": true, + "docstring": { + "type": "{lists, dataframes, ndarrays, sparse matrices}", + "default_value": "", + "description": "List of objects to ensure sliceability." + }, + "type": { + "kind": "EnumType", + "values": [] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Make arrays indexable for cross-validation.\n\nChecks consistent length, passes through None, and ensures that everything\ncan be indexed by converting sparse matrices to csr and converting\nnon-interable objects to arrays.", + "docstring": "Make arrays indexable for cross-validation.\n\nChecks consistent length, passes through None, and ensures that everything\ncan be indexed by converting sparse matrices to csr and converting\nnon-interable objects to arrays.\n\nParameters\n----------\n*iterables : {lists, dataframes, ndarrays, sparse matrices}\n List of objects to ensure sliceability.", + "code": "def indexable(*iterables):\n \"\"\"Make arrays indexable for cross-validation.\n\n Checks consistent length, passes through None, and ensures that everything\n can be indexed by converting sparse matrices to csr and converting\n non-interable objects to arrays.\n\n Parameters\n ----------\n *iterables : {lists, dataframes, ndarrays, sparse matrices}\n List of objects to ensure sliceability.\n \"\"\"\n result = [_make_indexable(X) for X in iterables]\n check_consistent_length(*result)\n return result" + }, + { + "id": "scikit-learn/sklearn.utils/Bunch/__dir__", + "name": "__dir__", + "qname": "sklearn.utils.Bunch.__dir__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/Bunch/__dir__/self", + "name": "self", + "qname": "sklearn.utils.Bunch.__dir__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __dir__(self):\n return self.keys()" + }, + { + "id": "scikit-learn/sklearn.utils/Bunch/__getattr__", + "name": "__getattr__", + "qname": "sklearn.utils.Bunch.__getattr__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/Bunch/__getattr__/self", + "name": "self", + "qname": "sklearn.utils.Bunch.__getattr__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils/Bunch/__getattr__/key", + "name": "key", + "qname": "sklearn.utils.Bunch.__getattr__.key", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __getattr__(self, key):\n try:\n return self[key]\n except KeyError:\n raise AttributeError(key)" + }, + { + "id": "scikit-learn/sklearn.utils/Bunch/__init__", + "name": "__init__", + "qname": "sklearn.utils.Bunch.__init__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/Bunch/__init__/self", + "name": "self", + "qname": "sklearn.utils.Bunch.__init__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils/Bunch/__init__/kwargs", + "name": "kwargs", + "qname": "sklearn.utils.Bunch.__init__.kwargs", + "default_value": null, + "assigned_by": "NAMED_VARARG", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Container object exposing keys as attributes.\n\nBunch objects are sometimes used as an output for functions and methods.\nThey extend dictionaries by enabling values to be accessed by key,\n`bunch[\"value_key\"]`, or by an attribute, `bunch.value_key`.", + "docstring": "", + "code": " def __init__(self, **kwargs):\n super().__init__(kwargs)" + }, + { + "id": "scikit-learn/sklearn.utils/Bunch/__setattr__", + "name": "__setattr__", + "qname": "sklearn.utils.Bunch.__setattr__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/Bunch/__setattr__/self", + "name": "self", + "qname": "sklearn.utils.Bunch.__setattr__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils/Bunch/__setattr__/key", + "name": "key", + "qname": "sklearn.utils.Bunch.__setattr__.key", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils/Bunch/__setattr__/value", + "name": "value", + "qname": "sklearn.utils.Bunch.__setattr__.value", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __setattr__(self, key, value):\n self[key] = value" + }, + { + "id": "scikit-learn/sklearn.utils/Bunch/__setstate__", + "name": "__setstate__", + "qname": "sklearn.utils.Bunch.__setstate__", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/Bunch/__setstate__/self", + "name": "self", + "qname": "sklearn.utils.Bunch.__setstate__.self", + "default_value": null, + "assigned_by": "IMPLICIT", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils/Bunch/__setstate__/state", + "name": "state", + "qname": "sklearn.utils.Bunch.__setstate__.state", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "", + "docstring": "", + "code": " def __setstate__(self, state):\n # Bunch pickles generated with scikit-learn 0.16.* have an non\n # empty __dict__. This causes a surprising behaviour when\n # loading these pickles scikit-learn 0.17: reading bunch.key\n # uses __dict__ but assigning to bunch.key use __setattr__ and\n # only changes bunch['key']. More details can be found at:\n # https://github.com/scikit-learn/scikit-learn/issues/6196.\n # Overriding __setstate__ to be a noop has the effect of\n # ignoring the pickled __dict__\n pass" + }, + { + "id": "scikit-learn/sklearn.utils/_approximate_mode", + "name": "_approximate_mode", + "qname": "sklearn.utils._approximate_mode", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/_approximate_mode/class_counts", + "name": "class_counts", + "qname": "sklearn.utils._approximate_mode.class_counts", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "ndarray of int", + "default_value": "", + "description": "Population per class." + }, + "type": { + "kind": "NamedType", + "name": "ndarray of int" + } + }, + { + "id": "scikit-learn/sklearn.utils/_approximate_mode/n_draws", + "name": "n_draws", + "qname": "sklearn.utils._approximate_mode.n_draws", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of draws (samples to draw) from the overall population." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils/_approximate_mode/rng", + "name": "rng", + "qname": "sklearn.utils._approximate_mode.rng", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "random state", + "default_value": "", + "description": "Used to break ties." + }, + "type": { + "kind": "NamedType", + "name": "random state" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Computes approximate mode of multivariate hypergeometric.\n\nThis is an approximation to the mode of the multivariate\nhypergeometric given by class_counts and n_draws.\nIt shouldn't be off by more than one.\n\nIt is the mostly likely outcome of drawing n_draws many\nsamples from the population given by class_counts.", + "docstring": "Computes approximate mode of multivariate hypergeometric.\n\nThis is an approximation to the mode of the multivariate\nhypergeometric given by class_counts and n_draws.\nIt shouldn't be off by more than one.\n\nIt is the mostly likely outcome of drawing n_draws many\nsamples from the population given by class_counts.\n\nParameters\n----------\nclass_counts : ndarray of int\n Population per class.\nn_draws : int\n Number of draws (samples to draw) from the overall population.\nrng : random state\n Used to break ties.\n\nReturns\n-------\nsampled_classes : ndarray of int\n Number of samples drawn from each class.\n np.sum(sampled_classes) == n_draws\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.utils import _approximate_mode\n>>> _approximate_mode(class_counts=np.array([4, 2]), n_draws=3, rng=0)\narray([2, 1])\n>>> _approximate_mode(class_counts=np.array([5, 2]), n_draws=4, rng=0)\narray([3, 1])\n>>> _approximate_mode(class_counts=np.array([2, 2, 2, 1]),\n... n_draws=2, rng=0)\narray([0, 1, 1, 0])\n>>> _approximate_mode(class_counts=np.array([2, 2, 2, 1]),\n... n_draws=2, rng=42)\narray([1, 1, 0, 0])", + "code": "def _approximate_mode(class_counts, n_draws, rng):\n \"\"\"Computes approximate mode of multivariate hypergeometric.\n\n This is an approximation to the mode of the multivariate\n hypergeometric given by class_counts and n_draws.\n It shouldn't be off by more than one.\n\n It is the mostly likely outcome of drawing n_draws many\n samples from the population given by class_counts.\n\n Parameters\n ----------\n class_counts : ndarray of int\n Population per class.\n n_draws : int\n Number of draws (samples to draw) from the overall population.\n rng : random state\n Used to break ties.\n\n Returns\n -------\n sampled_classes : ndarray of int\n Number of samples drawn from each class.\n np.sum(sampled_classes) == n_draws\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.utils import _approximate_mode\n >>> _approximate_mode(class_counts=np.array([4, 2]), n_draws=3, rng=0)\n array([2, 1])\n >>> _approximate_mode(class_counts=np.array([5, 2]), n_draws=4, rng=0)\n array([3, 1])\n >>> _approximate_mode(class_counts=np.array([2, 2, 2, 1]),\n ... n_draws=2, rng=0)\n array([0, 1, 1, 0])\n >>> _approximate_mode(class_counts=np.array([2, 2, 2, 1]),\n ... n_draws=2, rng=42)\n array([1, 1, 0, 0])\n \"\"\"\n rng = check_random_state(rng)\n # this computes a bad approximation to the mode of the\n # multivariate hypergeometric given by class_counts and n_draws\n continuous = n_draws * class_counts / class_counts.sum()\n # floored means we don't overshoot n_samples, but probably undershoot\n floored = np.floor(continuous)\n # we add samples according to how much \"left over\" probability\n # they had, until we arrive at n_samples\n need_to_add = int(n_draws - floored.sum())\n if need_to_add > 0:\n remainder = continuous - floored\n values = np.sort(np.unique(remainder))[::-1]\n # add according to remainder, but break ties\n # randomly to avoid biases\n for value in values:\n inds, = np.where(remainder == value)\n # if we need_to_add less than what's in inds\n # we draw randomly from them.\n # if we need to add more, we add them all and\n # go to the next value\n add_now = min(len(inds), need_to_add)\n inds = rng.choice(inds, size=add_now, replace=False)\n floored[inds] += 1\n need_to_add -= add_now\n if need_to_add == 0:\n break\n return floored.astype(int)" + }, + { + "id": "scikit-learn/sklearn.utils/_array_indexing", + "name": "_array_indexing", + "qname": "sklearn.utils._array_indexing", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/_array_indexing/array", + "name": "array", + "qname": "sklearn.utils._array_indexing.array", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils/_array_indexing/key", + "name": "key", + "qname": "sklearn.utils._array_indexing.key", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils/_array_indexing/key_dtype", + "name": "key_dtype", + "qname": "sklearn.utils._array_indexing.key_dtype", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils/_array_indexing/axis", + "name": "axis", + "qname": "sklearn.utils._array_indexing.axis", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Index an array or scipy.sparse consistently across NumPy version.", + "docstring": "Index an array or scipy.sparse consistently across NumPy version.", + "code": "def _array_indexing(array, key, key_dtype, axis):\n \"\"\"Index an array or scipy.sparse consistently across NumPy version.\"\"\"\n if np_version < parse_version('1.12') or issparse(array):\n # FIXME: Remove the check for NumPy when using >= 1.12\n # check if we have an boolean array-likes to make the proper indexing\n if key_dtype == 'bool':\n key = np.asarray(key)\n if isinstance(key, tuple):\n key = list(key)\n return array[key] if axis == 0 else array[:, key]" + }, + { + "id": "scikit-learn/sklearn.utils/_chunk_generator", + "name": "_chunk_generator", + "qname": "sklearn.utils._chunk_generator", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/_chunk_generator/gen", + "name": "gen", + "qname": "sklearn.utils._chunk_generator.gen", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils/_chunk_generator/chunksize", + "name": "chunksize", + "qname": "sklearn.utils._chunk_generator.chunksize", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Chunk generator, ``gen`` into lists of length ``chunksize``. The last\nchunk may have a length less than ``chunksize``.", + "docstring": "Chunk generator, ``gen`` into lists of length ``chunksize``. The last\nchunk may have a length less than ``chunksize``.", + "code": "def _chunk_generator(gen, chunksize):\n \"\"\"Chunk generator, ``gen`` into lists of length ``chunksize``. The last\n chunk may have a length less than ``chunksize``.\"\"\"\n while True:\n chunk = list(islice(gen, chunksize))\n if chunk:\n yield chunk\n else:\n return" + }, + { + "id": "scikit-learn/sklearn.utils/_determine_key_type", + "name": "_determine_key_type", + "qname": "sklearn.utils._determine_key_type", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/_determine_key_type/key", + "name": "key", + "qname": "sklearn.utils._determine_key_type.key", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "scalar, slice or array-like", + "default_value": "", + "description": "The key from which we want to infer the data type." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "scalar" + }, + { + "kind": "NamedType", + "name": "slice" + }, + { + "kind": "NamedType", + "name": "array-like" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils/_determine_key_type/accept_slice", + "name": "accept_slice", + "qname": "sklearn.utils._determine_key_type.accept_slice", + "default_value": "True", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether or not to raise an error if the key is a slice." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Determine the data type of key.", + "docstring": "Determine the data type of key.\n\nParameters\n----------\nkey : scalar, slice or array-like\n The key from which we want to infer the data type.\n\naccept_slice : bool, default=True\n Whether or not to raise an error if the key is a slice.\n\nReturns\n-------\ndtype : {'int', 'str', 'bool', None}\n Returns the data type of key.", + "code": "def _determine_key_type(key, accept_slice=True):\n \"\"\"Determine the data type of key.\n\n Parameters\n ----------\n key : scalar, slice or array-like\n The key from which we want to infer the data type.\n\n accept_slice : bool, default=True\n Whether or not to raise an error if the key is a slice.\n\n Returns\n -------\n dtype : {'int', 'str', 'bool', None}\n Returns the data type of key.\n \"\"\"\n err_msg = (\"No valid specification of the columns. Only a scalar, list or \"\n \"slice of all integers or all strings, or boolean mask is \"\n \"allowed\")\n\n dtype_to_str = {int: 'int', str: 'str', bool: 'bool', np.bool_: 'bool'}\n array_dtype_to_str = {'i': 'int', 'u': 'int', 'b': 'bool', 'O': 'str',\n 'U': 'str', 'S': 'str'}\n\n if key is None:\n return None\n if isinstance(key, tuple(dtype_to_str.keys())):\n try:\n return dtype_to_str[type(key)]\n except KeyError:\n raise ValueError(err_msg)\n if isinstance(key, slice):\n if not accept_slice:\n raise TypeError(\n 'Only array-like or scalar are supported. '\n 'A Python slice was given.'\n )\n if key.start is None and key.stop is None:\n return None\n key_start_type = _determine_key_type(key.start)\n key_stop_type = _determine_key_type(key.stop)\n if key_start_type is not None and key_stop_type is not None:\n if key_start_type != key_stop_type:\n raise ValueError(err_msg)\n if key_start_type is not None:\n return key_start_type\n return key_stop_type\n if isinstance(key, (list, tuple)):\n unique_key = set(key)\n key_type = {_determine_key_type(elt) for elt in unique_key}\n if not key_type:\n return None\n if len(key_type) != 1:\n raise ValueError(err_msg)\n return key_type.pop()\n if hasattr(key, 'dtype'):\n try:\n return array_dtype_to_str[key.dtype.kind]\n except KeyError:\n raise ValueError(err_msg)\n raise ValueError(err_msg)" + }, + { + "id": "scikit-learn/sklearn.utils/_get_column_indices", + "name": "_get_column_indices", + "qname": "sklearn.utils._get_column_indices", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/_get_column_indices/X", + "name": "X", + "qname": "sklearn.utils._get_column_indices.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils/_get_column_indices/key", + "name": "key", + "qname": "sklearn.utils._get_column_indices.key", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Get feature column indices for input data X and key.\n\nFor accepted values of `key`, see the docstring of\n:func:`_safe_indexing_column`.", + "docstring": "Get feature column indices for input data X and key.\n\nFor accepted values of `key`, see the docstring of\n:func:`_safe_indexing_column`.", + "code": "def _get_column_indices(X, key):\n \"\"\"Get feature column indices for input data X and key.\n\n For accepted values of `key`, see the docstring of\n :func:`_safe_indexing_column`.\n \"\"\"\n n_columns = X.shape[1]\n\n key_dtype = _determine_key_type(key)\n\n if isinstance(key, (list, tuple)) and not key:\n # we get an empty list\n return []\n elif key_dtype in ('bool', 'int'):\n # Convert key into positive indexes\n try:\n idx = _safe_indexing(np.arange(n_columns), key)\n except IndexError as e:\n raise ValueError(\n 'all features must be in [0, {}] or [-{}, 0]'\n .format(n_columns - 1, n_columns)\n ) from e\n return np.atleast_1d(idx).tolist()\n elif key_dtype == 'str':\n try:\n all_columns = X.columns\n except AttributeError:\n raise ValueError(\"Specifying the columns using strings is only \"\n \"supported for pandas DataFrames\")\n if isinstance(key, str):\n columns = [key]\n elif isinstance(key, slice):\n start, stop = key.start, key.stop\n if start is not None:\n start = all_columns.get_loc(start)\n if stop is not None:\n # pandas indexing with strings is endpoint included\n stop = all_columns.get_loc(stop) + 1\n else:\n stop = n_columns + 1\n return list(range(n_columns)[slice(start, stop)])\n else:\n columns = list(key)\n\n try:\n column_indices = []\n for col in columns:\n col_idx = all_columns.get_loc(col)\n if not isinstance(col_idx, numbers.Integral):\n raise ValueError(f\"Selected columns, {columns}, are not \"\n \"unique in dataframe\")\n column_indices.append(col_idx)\n\n except KeyError as e:\n raise ValueError(\n \"A given column is not a column of the dataframe\"\n ) from e\n\n return column_indices\n else:\n raise ValueError(\"No valid specification of the columns. Only a \"\n \"scalar, list or slice of all integers or all \"\n \"strings, or boolean mask is allowed\")" + }, + { + "id": "scikit-learn/sklearn.utils/_list_indexing", + "name": "_list_indexing", + "qname": "sklearn.utils._list_indexing", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/_list_indexing/X", + "name": "X", + "qname": "sklearn.utils._list_indexing.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils/_list_indexing/key", + "name": "key", + "qname": "sklearn.utils._list_indexing.key", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils/_list_indexing/key_dtype", + "name": "key_dtype", + "qname": "sklearn.utils._list_indexing.key_dtype", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Index a Python list.", + "docstring": "Index a Python list.", + "code": "def _list_indexing(X, key, key_dtype):\n \"\"\"Index a Python list.\"\"\"\n if np.isscalar(key) or isinstance(key, slice):\n # key is a slice or a scalar\n return X[key]\n if key_dtype == 'bool':\n # key is a boolean array-like\n return list(compress(X, key))\n # key is a integer array-like of key\n return [X[idx] for idx in key]" + }, + { + "id": "scikit-learn/sklearn.utils/_message_with_time", + "name": "_message_with_time", + "qname": "sklearn.utils._message_with_time", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/_message_with_time/source", + "name": "source", + "qname": "sklearn.utils._message_with_time.source", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "", + "description": "String indicating the source or the reference of the message." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.utils/_message_with_time/message", + "name": "message", + "qname": "sklearn.utils._message_with_time.message", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "", + "description": "Short message." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.utils/_message_with_time/time", + "name": "time", + "qname": "sklearn.utils._message_with_time.time", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "", + "description": "Time in seconds." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Create one line message for logging purposes.", + "docstring": "Create one line message for logging purposes.\n\nParameters\n----------\nsource : str\n String indicating the source or the reference of the message.\n\nmessage : str\n Short message.\n\ntime : int\n Time in seconds.", + "code": "def _message_with_time(source, message, time):\n \"\"\"Create one line message for logging purposes.\n\n Parameters\n ----------\n source : str\n String indicating the source or the reference of the message.\n\n message : str\n Short message.\n\n time : int\n Time in seconds.\n \"\"\"\n start_message = \"[%s] \" % source\n\n # adapted from joblib.logger.short_format_time without the Windows -.1s\n # adjustment\n if time > 60:\n time_str = \"%4.1fmin\" % (time / 60)\n else:\n time_str = \" %5.1fs\" % time\n end_message = \" %s, total=%s\" % (message, time_str)\n dots_len = (70 - len(start_message) - len(end_message))\n return \"%s%s%s\" % (start_message, dots_len * '.', end_message)" + }, + { + "id": "scikit-learn/sklearn.utils/_pandas_indexing", + "name": "_pandas_indexing", + "qname": "sklearn.utils._pandas_indexing", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/_pandas_indexing/X", + "name": "X", + "qname": "sklearn.utils._pandas_indexing.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils/_pandas_indexing/key", + "name": "key", + "qname": "sklearn.utils._pandas_indexing.key", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils/_pandas_indexing/key_dtype", + "name": "key_dtype", + "qname": "sklearn.utils._pandas_indexing.key_dtype", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + }, + { + "id": "scikit-learn/sklearn.utils/_pandas_indexing/axis", + "name": "axis", + "qname": "sklearn.utils._pandas_indexing.axis", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Index a pandas dataframe or a series.", + "docstring": "Index a pandas dataframe or a series.", + "code": "def _pandas_indexing(X, key, key_dtype, axis):\n \"\"\"Index a pandas dataframe or a series.\"\"\"\n if hasattr(key, 'shape'):\n # Work-around for indexing with read-only key in pandas\n # FIXME: solved in pandas 0.25\n key = np.asarray(key)\n key = key if key.flags.writeable else key.copy()\n elif isinstance(key, tuple):\n key = list(key)\n # check whether we should index with loc or iloc\n indexer = X.iloc if key_dtype == 'int' else X.loc\n return indexer[:, key] if axis else indexer[key]" + }, + { + "id": "scikit-learn/sklearn.utils/_print_elapsed_time", + "name": "_print_elapsed_time", + "qname": "sklearn.utils._print_elapsed_time", + "decorators": ["contextmanager"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/_print_elapsed_time/source", + "name": "source", + "qname": "sklearn.utils._print_elapsed_time.source", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "", + "description": "String indicating the source or the reference of the message." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + }, + { + "id": "scikit-learn/sklearn.utils/_print_elapsed_time/message", + "name": "message", + "qname": "sklearn.utils._print_elapsed_time.message", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "str", + "default_value": "None", + "description": "Short message. If None, nothing will be printed." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Log elapsed time to stdout when the context is exited.", + "docstring": "Log elapsed time to stdout when the context is exited.\n\nParameters\n----------\nsource : str\n String indicating the source or the reference of the message.\n\nmessage : str, default=None\n Short message. If None, nothing will be printed.\n\nReturns\n-------\ncontext_manager\n Prints elapsed time upon exit if verbose.", + "code": "@contextmanager\ndef _print_elapsed_time(source, message=None):\n \"\"\"Log elapsed time to stdout when the context is exited.\n\n Parameters\n ----------\n source : str\n String indicating the source or the reference of the message.\n\n message : str, default=None\n Short message. If None, nothing will be printed.\n\n Returns\n -------\n context_manager\n Prints elapsed time upon exit if verbose.\n \"\"\"\n if message is None:\n yield\n else:\n start = timeit.default_timer()\n yield\n print(\n _message_with_time(source, message,\n timeit.default_timer() - start))" + }, + { + "id": "scikit-learn/sklearn.utils/_safe_indexing", + "name": "_safe_indexing", + "qname": "sklearn.utils._safe_indexing", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/_safe_indexing/X", + "name": "X", + "qname": "sklearn.utils._safe_indexing.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like, sparse-matrix, list, pandas.DataFrame, pandas.Series", + "default_value": "", + "description": "Data from which to sample rows, items or columns. `list` are only\nsupported when `axis=0`." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "array-like" + }, + { + "kind": "NamedType", + "name": "sparse-matrix" + }, + { + "kind": "NamedType", + "name": "list" + }, + { + "kind": "NamedType", + "name": "pandas.DataFrame" + }, + { + "kind": "NamedType", + "name": "pandas.Series" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils/_safe_indexing/indices", + "name": "indices", + "qname": "sklearn.utils._safe_indexing.indices", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "bool, int, str, slice, array-like", + "default_value": "", + "description": "- If `axis=0`, boolean and integer array-like, integer slice,\n and scalar integer are supported.\n- If `axis=1`:\n - to select a single column, `indices` can be of `int` type for\n all `X` types and `str` only for dataframe. The selected subset\n will be 1D, unless `X` is a sparse matrix in which case it will\n be 2D.\n - to select multiples columns, `indices` can be one of the\n following: `list`, `array`, `slice`. The type used in\n these containers can be one of the following: `int`, 'bool' and\n `str`. However, `str` is only supported when `X` is a dataframe.\n The selected subset will be 2D." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "bool" + }, + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "str" + }, + { + "kind": "NamedType", + "name": "slice" + }, + { + "kind": "NamedType", + "name": "array-like" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils/_safe_indexing/axis", + "name": "axis", + "qname": "sklearn.utils._safe_indexing.axis", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": false, + "docstring": { + "type": "int", + "default_value": "0", + "description": "The axis along which `X` will be subsampled. `axis=0` will select\nrows while `axis=1` will select columns." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Return rows, items or columns of X using indices.\n\n.. warning::\n\n This utility is documented, but **private**. This means that\n backward compatibility might be broken without any deprecation\n cycle.", + "docstring": "Return rows, items or columns of X using indices.\n\n.. warning::\n\n This utility is documented, but **private**. This means that\n backward compatibility might be broken without any deprecation\n cycle.\n\nParameters\n----------\nX : array-like, sparse-matrix, list, pandas.DataFrame, pandas.Series\n Data from which to sample rows, items or columns. `list` are only\n supported when `axis=0`.\nindices : bool, int, str, slice, array-like\n - If `axis=0`, boolean and integer array-like, integer slice,\n and scalar integer are supported.\n - If `axis=1`:\n - to select a single column, `indices` can be of `int` type for\n all `X` types and `str` only for dataframe. The selected subset\n will be 1D, unless `X` is a sparse matrix in which case it will\n be 2D.\n - to select multiples columns, `indices` can be one of the\n following: `list`, `array`, `slice`. The type used in\n these containers can be one of the following: `int`, 'bool' and\n `str`. However, `str` is only supported when `X` is a dataframe.\n The selected subset will be 2D.\naxis : int, default=0\n The axis along which `X` will be subsampled. `axis=0` will select\n rows while `axis=1` will select columns.\n\nReturns\n-------\nsubset\n Subset of X on axis 0 or 1.\n\nNotes\n-----\nCSR, CSC, and LIL sparse matrices are supported. COO sparse matrices are\nnot supported.", + "code": "def _safe_indexing(X, indices, *, axis=0):\n \"\"\"Return rows, items or columns of X using indices.\n\n .. warning::\n\n This utility is documented, but **private**. This means that\n backward compatibility might be broken without any deprecation\n cycle.\n\n Parameters\n ----------\n X : array-like, sparse-matrix, list, pandas.DataFrame, pandas.Series\n Data from which to sample rows, items or columns. `list` are only\n supported when `axis=0`.\n indices : bool, int, str, slice, array-like\n - If `axis=0`, boolean and integer array-like, integer slice,\n and scalar integer are supported.\n - If `axis=1`:\n - to select a single column, `indices` can be of `int` type for\n all `X` types and `str` only for dataframe. The selected subset\n will be 1D, unless `X` is a sparse matrix in which case it will\n be 2D.\n - to select multiples columns, `indices` can be one of the\n following: `list`, `array`, `slice`. The type used in\n these containers can be one of the following: `int`, 'bool' and\n `str`. However, `str` is only supported when `X` is a dataframe.\n The selected subset will be 2D.\n axis : int, default=0\n The axis along which `X` will be subsampled. `axis=0` will select\n rows while `axis=1` will select columns.\n\n Returns\n -------\n subset\n Subset of X on axis 0 or 1.\n\n Notes\n -----\n CSR, CSC, and LIL sparse matrices are supported. COO sparse matrices are\n not supported.\n \"\"\"\n if indices is None:\n return X\n\n if axis not in (0, 1):\n raise ValueError(\n \"'axis' should be either 0 (to index rows) or 1 (to index \"\n \" column). Got {} instead.\".format(axis)\n )\n\n indices_dtype = _determine_key_type(indices)\n\n if axis == 0 and indices_dtype == 'str':\n raise ValueError(\n \"String indexing is not supported with 'axis=0'\"\n )\n\n if axis == 1 and X.ndim != 2:\n raise ValueError(\n \"'X' should be a 2D NumPy array, 2D sparse matrix or pandas \"\n \"dataframe when indexing the columns (i.e. 'axis=1'). \"\n \"Got {} instead with {} dimension(s).\".format(type(X), X.ndim)\n )\n\n if axis == 1 and indices_dtype == 'str' and not hasattr(X, 'loc'):\n raise ValueError(\n \"Specifying the columns using strings is only supported for \"\n \"pandas DataFrames\"\n )\n\n if hasattr(X, \"iloc\"):\n return _pandas_indexing(X, indices, indices_dtype, axis=axis)\n elif hasattr(X, \"shape\"):\n return _array_indexing(X, indices, indices_dtype, axis=axis)\n else:\n return _list_indexing(X, indices, indices_dtype)" + }, + { + "id": "scikit-learn/sklearn.utils/_to_object_array", + "name": "_to_object_array", + "qname": "sklearn.utils._to_object_array", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/_to_object_array/sequence", + "name": "sequence", + "qname": "sklearn.utils._to_object_array.sequence", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": false, + "docstring": { + "type": "array-like of shape (n_elements,)", + "default_value": "", + "description": "The sequence to be converted." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_elements,)" + } + } + ], + "results": [], + "is_public": false, + "reexported_by": [], + "description": "Convert sequence to a 1-D NumPy array of object dtype.\n\nnumpy.array constructor has a similar use but it's output\nis ambiguous. It can be 1-D NumPy array of object dtype if\nthe input is a ragged array, but if the input is a list of\nequal length arrays, then the output is a 2D numpy.array.\n_to_object_array solves this ambiguity by guarantying that\nthe output is a 1-D NumPy array of objects for any input.", + "docstring": "Convert sequence to a 1-D NumPy array of object dtype.\n\nnumpy.array constructor has a similar use but it's output\nis ambiguous. It can be 1-D NumPy array of object dtype if\nthe input is a ragged array, but if the input is a list of\nequal length arrays, then the output is a 2D numpy.array.\n_to_object_array solves this ambiguity by guarantying that\nthe output is a 1-D NumPy array of objects for any input.\n\nParameters\n----------\nsequence : array-like of shape (n_elements,)\n The sequence to be converted.\n\nReturns\n-------\nout : ndarray of shape (n_elements,), dtype=object\n The converted sequence into a 1-D NumPy array of object dtype.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.utils import _to_object_array\n>>> _to_object_array([np.array([0]), np.array([1])])\narray([array([0]), array([1])], dtype=object)\n>>> _to_object_array([np.array([0]), np.array([1, 2])])\narray([array([0]), array([1, 2])], dtype=object)\n>>> _to_object_array([np.array([0]), np.array([1, 2])])\narray([array([0]), array([1, 2])], dtype=object)", + "code": "def _to_object_array(sequence):\n \"\"\"Convert sequence to a 1-D NumPy array of object dtype.\n\n numpy.array constructor has a similar use but it's output\n is ambiguous. It can be 1-D NumPy array of object dtype if\n the input is a ragged array, but if the input is a list of\n equal length arrays, then the output is a 2D numpy.array.\n _to_object_array solves this ambiguity by guarantying that\n the output is a 1-D NumPy array of objects for any input.\n\n Parameters\n ----------\n sequence : array-like of shape (n_elements,)\n The sequence to be converted.\n\n Returns\n -------\n out : ndarray of shape (n_elements,), dtype=object\n The converted sequence into a 1-D NumPy array of object dtype.\n\n Examples\n --------\n >>> import numpy as np\n >>> from sklearn.utils import _to_object_array\n >>> _to_object_array([np.array([0]), np.array([1])])\n array([array([0]), array([1])], dtype=object)\n >>> _to_object_array([np.array([0]), np.array([1, 2])])\n array([array([0]), array([1, 2])], dtype=object)\n >>> _to_object_array([np.array([0]), np.array([1, 2])])\n array([array([0]), array([1, 2])], dtype=object)\n \"\"\"\n out = np.empty(len(sequence), dtype=object)\n out[:] = sequence\n return out" + }, + { + "id": "scikit-learn/sklearn.utils/all_estimators", + "name": "all_estimators", + "qname": "sklearn.utils.all_estimators", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/all_estimators/type_filter", + "name": "type_filter", + "qname": "sklearn.utils.all_estimators.type_filter", + "default_value": "None", + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{\"classifier\", \"regressor\", \"cluster\", \"transformer\"} or list of such str", + "default_value": "None", + "description": "Which kind of estimators should be returned. If None, no filter is\napplied and all estimators are returned. Possible values are\n'classifier', 'regressor', 'cluster' and 'transformer' to get\nestimators only of these specific types, or a list of these to\nget the estimators that fit at least one of the types." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "EnumType", + "values": ["cluster", "classifier", "transformer", "regressor"] + }, + { + "kind": "NamedType", + "name": "list of such str" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Get a list of all estimators from sklearn.\n\nThis function crawls the module and gets all classes that inherit\nfrom BaseEstimator. Classes that are defined in test-modules are not\nincluded.", + "docstring": "Get a list of all estimators from sklearn.\n\nThis function crawls the module and gets all classes that inherit\nfrom BaseEstimator. Classes that are defined in test-modules are not\nincluded.\n\nParameters\n----------\ntype_filter : {\"classifier\", \"regressor\", \"cluster\", \"transformer\"} or list of such str, default=None\n Which kind of estimators should be returned. If None, no filter is\n applied and all estimators are returned. Possible values are\n 'classifier', 'regressor', 'cluster' and 'transformer' to get\n estimators only of these specific types, or a list of these to\n get the estimators that fit at least one of the types.\n\nReturns\n-------\nestimators : list of tuples\n List of (name, class), where ``name`` is the class name as string\n and ``class`` is the actuall type of the class.", + "code": "def all_estimators(type_filter=None):\n \"\"\"Get a list of all estimators from sklearn.\n\n This function crawls the module and gets all classes that inherit\n from BaseEstimator. Classes that are defined in test-modules are not\n included.\n\n Parameters\n ----------\n type_filter : {\"classifier\", \"regressor\", \"cluster\", \"transformer\"} \\\n or list of such str, default=None\n Which kind of estimators should be returned. If None, no filter is\n applied and all estimators are returned. Possible values are\n 'classifier', 'regressor', 'cluster' and 'transformer' to get\n estimators only of these specific types, or a list of these to\n get the estimators that fit at least one of the types.\n\n Returns\n -------\n estimators : list of tuples\n List of (name, class), where ``name`` is the class name as string\n and ``class`` is the actuall type of the class.\n \"\"\"\n # lazy import to avoid circular imports from sklearn.base\n from ._testing import ignore_warnings\n from ..base import (BaseEstimator, ClassifierMixin, RegressorMixin,\n TransformerMixin, ClusterMixin)\n\n def is_abstract(c):\n if not(hasattr(c, '__abstractmethods__')):\n return False\n if not len(c.__abstractmethods__):\n return False\n return True\n\n all_classes = []\n modules_to_ignore = {\"tests\", \"externals\", \"setup\", \"conftest\"}\n root = str(Path(__file__).parent.parent) # sklearn package\n # Ignore deprecation warnings triggered at import time and from walking\n # packages\n with ignore_warnings(category=FutureWarning):\n for importer, modname, ispkg in pkgutil.walk_packages(\n path=[root], prefix='sklearn.'):\n mod_parts = modname.split(\".\")\n if (any(part in modules_to_ignore for part in mod_parts)\n or '._' in modname):\n continue\n module = import_module(modname)\n classes = inspect.getmembers(module, inspect.isclass)\n classes = [(name, est_cls) for name, est_cls in classes\n if not name.startswith(\"_\")]\n\n # TODO: Remove when FeatureHasher is implemented in PYPY\n # Skips FeatureHasher for PYPY\n if IS_PYPY and 'feature_extraction' in modname:\n classes = [(name, est_cls) for name, est_cls in classes\n if name == \"FeatureHasher\"]\n\n all_classes.extend(classes)\n\n all_classes = set(all_classes)\n\n estimators = [c for c in all_classes\n if (issubclass(c[1], BaseEstimator) and\n c[0] != 'BaseEstimator')]\n # get rid of abstract base classes\n estimators = [c for c in estimators if not is_abstract(c[1])]\n\n if type_filter is not None:\n if not isinstance(type_filter, list):\n type_filter = [type_filter]\n else:\n type_filter = list(type_filter) # copy\n filtered_estimators = []\n filters = {'classifier': ClassifierMixin,\n 'regressor': RegressorMixin,\n 'transformer': TransformerMixin,\n 'cluster': ClusterMixin}\n for name, mixin in filters.items():\n if name in type_filter:\n type_filter.remove(name)\n filtered_estimators.extend([est for est in estimators\n if issubclass(est[1], mixin)])\n estimators = filtered_estimators\n if type_filter:\n raise ValueError(\"Parameter type_filter must be 'classifier', \"\n \"'regressor', 'transformer', 'cluster' or \"\n \"None, got\"\n \" %s.\" % repr(type_filter))\n\n # drop duplicates, sort for reproducibility\n # itemgetter is used to ensure the sort does not extend to the 2nd item of\n # the tuple\n return sorted(set(estimators), key=itemgetter(0))" + }, + { + "id": "scikit-learn/sklearn.utils/axis0_safe_slice", + "name": "axis0_safe_slice", + "qname": "sklearn.utils.axis0_safe_slice", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/axis0_safe_slice/X", + "name": "X", + "qname": "sklearn.utils.axis0_safe_slice.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix}", + "default_value": "", + "description": "Data on which to apply mask." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.utils/axis0_safe_slice/mask", + "name": "mask", + "qname": "sklearn.utils.axis0_safe_slice.mask", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "Mask to be used on X." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + }, + { + "id": "scikit-learn/sklearn.utils/axis0_safe_slice/len_mask", + "name": "len_mask", + "qname": "sklearn.utils.axis0_safe_slice.len_mask", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "", + "description": "The length of the mask." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "This mask is safer than safe_mask since it returns an\nempty array, when a sparse matrix is sliced with a boolean mask\nwith all False, instead of raising an unhelpful error in older\nversions of SciPy.\n\nSee: https://github.com/scipy/scipy/issues/5361\n\nAlso note that we can avoid doing the dot product by checking if\nthe len_mask is not zero in _huber_loss_and_gradient but this\nis not going to be the bottleneck, since the number of outliers\nand non_outliers are typically non-zero and it makes the code\ntougher to follow.", + "docstring": "This mask is safer than safe_mask since it returns an\nempty array, when a sparse matrix is sliced with a boolean mask\nwith all False, instead of raising an unhelpful error in older\nversions of SciPy.\n\nSee: https://github.com/scipy/scipy/issues/5361\n\nAlso note that we can avoid doing the dot product by checking if\nthe len_mask is not zero in _huber_loss_and_gradient but this\nis not going to be the bottleneck, since the number of outliers\nand non_outliers are typically non-zero and it makes the code\ntougher to follow.\n\nParameters\n----------\nX : {array-like, sparse matrix}\n Data on which to apply mask.\n\nmask : ndarray\n Mask to be used on X.\n\nlen_mask : int\n The length of the mask.\n\nReturns\n-------\n mask", + "code": "def axis0_safe_slice(X, mask, len_mask):\n \"\"\"\n This mask is safer than safe_mask since it returns an\n empty array, when a sparse matrix is sliced with a boolean mask\n with all False, instead of raising an unhelpful error in older\n versions of SciPy.\n\n See: https://github.com/scipy/scipy/issues/5361\n\n Also note that we can avoid doing the dot product by checking if\n the len_mask is not zero in _huber_loss_and_gradient but this\n is not going to be the bottleneck, since the number of outliers\n and non_outliers are typically non-zero and it makes the code\n tougher to follow.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}\n Data on which to apply mask.\n\n mask : ndarray\n Mask to be used on X.\n\n len_mask : int\n The length of the mask.\n\n Returns\n -------\n mask\n \"\"\"\n if len_mask != 0:\n return X[safe_mask(X, mask), :]\n return np.zeros(shape=(0, X.shape[1]))" + }, + { + "id": "scikit-learn/sklearn.utils/check_matplotlib_support", + "name": "check_matplotlib_support", + "qname": "sklearn.utils.check_matplotlib_support", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/check_matplotlib_support/caller_name", + "name": "caller_name", + "qname": "sklearn.utils.check_matplotlib_support.caller_name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "str", + "default_value": "", + "description": "The name of the caller that requires matplotlib." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Raise ImportError with detailed error message if mpl is not installed.\n\nPlot utilities like :func:`plot_partial_dependence` should lazily import\nmatplotlib and call this helper before any computation.", + "docstring": "Raise ImportError with detailed error message if mpl is not installed.\n\nPlot utilities like :func:`plot_partial_dependence` should lazily import\nmatplotlib and call this helper before any computation.\n\nParameters\n----------\ncaller_name : str\n The name of the caller that requires matplotlib.", + "code": "def check_matplotlib_support(caller_name):\n \"\"\"Raise ImportError with detailed error message if mpl is not installed.\n\n Plot utilities like :func:`plot_partial_dependence` should lazily import\n matplotlib and call this helper before any computation.\n\n Parameters\n ----------\n caller_name : str\n The name of the caller that requires matplotlib.\n \"\"\"\n try:\n import matplotlib # noqa\n except ImportError as e:\n raise ImportError(\n \"{} requires matplotlib. You can install matplotlib with \"\n \"`pip install matplotlib`\".format(caller_name)\n ) from e" + }, + { + "id": "scikit-learn/sklearn.utils/check_pandas_support", + "name": "check_pandas_support", + "qname": "sklearn.utils.check_pandas_support", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/check_pandas_support/caller_name", + "name": "caller_name", + "qname": "sklearn.utils.check_pandas_support.caller_name", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "str", + "default_value": "", + "description": "The name of the caller that requires pandas." + }, + "type": { + "kind": "NamedType", + "name": "str" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Raise ImportError with detailed error message if pandas is not\ninstalled.\n\nPlot utilities like :func:`fetch_openml` should lazily import\npandas and call this helper before any computation.", + "docstring": "Raise ImportError with detailed error message if pandas is not\ninstalled.\n\nPlot utilities like :func:`fetch_openml` should lazily import\npandas and call this helper before any computation.\n\nParameters\n----------\ncaller_name : str\n The name of the caller that requires pandas.", + "code": "def check_pandas_support(caller_name):\n \"\"\"Raise ImportError with detailed error message if pandas is not\n installed.\n\n Plot utilities like :func:`fetch_openml` should lazily import\n pandas and call this helper before any computation.\n\n Parameters\n ----------\n caller_name : str\n The name of the caller that requires pandas.\n \"\"\"\n try:\n import pandas # noqa\n return pandas\n except ImportError as e:\n raise ImportError(\n \"{} requires pandas.\".format(caller_name)\n ) from e" + }, + { + "id": "scikit-learn/sklearn.utils/gen_batches", + "name": "gen_batches", + "qname": "sklearn.utils.gen_batches", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/gen_batches/n", + "name": "n", + "qname": "sklearn.utils.gen_batches.n", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils/gen_batches/batch_size", + "name": "batch_size", + "qname": "sklearn.utils.gen_batches.batch_size", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of element in each batch." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils/gen_batches/min_batch_size", + "name": "min_batch_size", + "qname": "sklearn.utils.gen_batches.min_batch_size", + "default_value": "0", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "0", + "description": "Minimum batch size to produce." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Generator to create slices containing batch_size elements, from 0 to n.\n\nThe last slice may contain less than batch_size elements, when batch_size\ndoes not divide n.", + "docstring": "Generator to create slices containing batch_size elements, from 0 to n.\n\nThe last slice may contain less than batch_size elements, when batch_size\ndoes not divide n.\n\nParameters\n----------\nn : int\nbatch_size : int\n Number of element in each batch.\nmin_batch_size : int, default=0\n Minimum batch size to produce.\n\nYields\n------\nslice of batch_size elements\n\nSee Also\n--------\ngen_even_slices: Generator to create n_packs slices going up to n.\n\nExamples\n--------\n>>> from sklearn.utils import gen_batches\n>>> list(gen_batches(7, 3))\n[slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)]\n>>> list(gen_batches(6, 3))\n[slice(0, 3, None), slice(3, 6, None)]\n>>> list(gen_batches(2, 3))\n[slice(0, 2, None)]\n>>> list(gen_batches(7, 3, min_batch_size=0))\n[slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)]\n>>> list(gen_batches(7, 3, min_batch_size=2))\n[slice(0, 3, None), slice(3, 7, None)]", + "code": "@_deprecate_positional_args\ndef gen_batches(n, batch_size, *, min_batch_size=0):\n \"\"\"Generator to create slices containing batch_size elements, from 0 to n.\n\n The last slice may contain less than batch_size elements, when batch_size\n does not divide n.\n\n Parameters\n ----------\n n : int\n batch_size : int\n Number of element in each batch.\n min_batch_size : int, default=0\n Minimum batch size to produce.\n\n Yields\n ------\n slice of batch_size elements\n\n See Also\n --------\n gen_even_slices: Generator to create n_packs slices going up to n.\n\n Examples\n --------\n >>> from sklearn.utils import gen_batches\n >>> list(gen_batches(7, 3))\n [slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)]\n >>> list(gen_batches(6, 3))\n [slice(0, 3, None), slice(3, 6, None)]\n >>> list(gen_batches(2, 3))\n [slice(0, 2, None)]\n >>> list(gen_batches(7, 3, min_batch_size=0))\n [slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)]\n >>> list(gen_batches(7, 3, min_batch_size=2))\n [slice(0, 3, None), slice(3, 7, None)]\n \"\"\"\n if not isinstance(batch_size, numbers.Integral):\n raise TypeError(\"gen_batches got batch_size=%s, must be an\"\n \" integer\" % batch_size)\n if batch_size <= 0:\n raise ValueError(\"gen_batches got batch_size=%s, must be\"\n \" positive\" % batch_size)\n start = 0\n for _ in range(int(n // batch_size)):\n end = start + batch_size\n if end + min_batch_size > n:\n continue\n yield slice(start, end)\n start = end\n if start < n:\n yield slice(start, n)" + }, + { + "id": "scikit-learn/sklearn.utils/gen_even_slices", + "name": "gen_even_slices", + "qname": "sklearn.utils.gen_even_slices", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/gen_even_slices/n", + "name": "n", + "qname": "sklearn.utils.gen_even_slices.n", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils/gen_even_slices/n_packs", + "name": "n_packs", + "qname": "sklearn.utils.gen_even_slices.n_packs", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "", + "description": "Number of slices to generate." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils/gen_even_slices/n_samples", + "name": "n_samples", + "qname": "sklearn.utils.gen_even_slices.n_samples", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of samples. Pass n_samples when the slices are to be used for\nsparse matrix indexing; slicing off-the-end raises an exception, while\nit works for NumPy arrays." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Generator to create n_packs slices going up to n.", + "docstring": "Generator to create n_packs slices going up to n.\n\nParameters\n----------\nn : int\nn_packs : int\n Number of slices to generate.\nn_samples : int, default=None\n Number of samples. Pass n_samples when the slices are to be used for\n sparse matrix indexing; slicing off-the-end raises an exception, while\n it works for NumPy arrays.\n\nYields\n------\nslice\n\nSee Also\n--------\ngen_batches: Generator to create slices containing batch_size elements\n from 0 to n.\n\nExamples\n--------\n>>> from sklearn.utils import gen_even_slices\n>>> list(gen_even_slices(10, 1))\n[slice(0, 10, None)]\n>>> list(gen_even_slices(10, 10))\n[slice(0, 1, None), slice(1, 2, None), ..., slice(9, 10, None)]\n>>> list(gen_even_slices(10, 5))\n[slice(0, 2, None), slice(2, 4, None), ..., slice(8, 10, None)]\n>>> list(gen_even_slices(10, 3))\n[slice(0, 4, None), slice(4, 7, None), slice(7, 10, None)]", + "code": "@_deprecate_positional_args\ndef gen_even_slices(n, n_packs, *, n_samples=None):\n \"\"\"Generator to create n_packs slices going up to n.\n\n Parameters\n ----------\n n : int\n n_packs : int\n Number of slices to generate.\n n_samples : int, default=None\n Number of samples. Pass n_samples when the slices are to be used for\n sparse matrix indexing; slicing off-the-end raises an exception, while\n it works for NumPy arrays.\n\n Yields\n ------\n slice\n\n See Also\n --------\n gen_batches: Generator to create slices containing batch_size elements\n from 0 to n.\n\n Examples\n --------\n >>> from sklearn.utils import gen_even_slices\n >>> list(gen_even_slices(10, 1))\n [slice(0, 10, None)]\n >>> list(gen_even_slices(10, 10))\n [slice(0, 1, None), slice(1, 2, None), ..., slice(9, 10, None)]\n >>> list(gen_even_slices(10, 5))\n [slice(0, 2, None), slice(2, 4, None), ..., slice(8, 10, None)]\n >>> list(gen_even_slices(10, 3))\n [slice(0, 4, None), slice(4, 7, None), slice(7, 10, None)]\n \"\"\"\n start = 0\n if n_packs < 1:\n raise ValueError(\"gen_even_slices got n_packs=%s, must be >=1\"\n % n_packs)\n for pack_num in range(n_packs):\n this_n = n // n_packs\n if pack_num < n % n_packs:\n this_n += 1\n if this_n > 0:\n end = start + this_n\n if n_samples is not None:\n end = min(n_samples, end)\n yield slice(start, end, None)\n start = end" + }, + { + "id": "scikit-learn/sklearn.utils/get_chunk_n_rows", + "name": "get_chunk_n_rows", + "qname": "sklearn.utils.get_chunk_n_rows", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/get_chunk_n_rows/row_bytes", + "name": "row_bytes", + "qname": "sklearn.utils.get_chunk_n_rows.row_bytes", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "", + "description": "The expected number of bytes of memory that will be consumed\nduring the processing of each row." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils/get_chunk_n_rows/max_n_rows", + "name": "max_n_rows", + "qname": "sklearn.utils.get_chunk_n_rows.max_n_rows", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "None", + "description": "The maximum return value." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils/get_chunk_n_rows/working_memory", + "name": "working_memory", + "qname": "sklearn.utils.get_chunk_n_rows.working_memory", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int or float", + "default_value": "None", + "description": "The number of rows to fit inside this number of MiB will be returned.\nWhen None (default), the value of\n``sklearn.get_config()['working_memory']`` is used." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "float" + } + ] + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Calculates how many rows can be processed within working_memory.", + "docstring": "Calculates how many rows can be processed within working_memory.\n\nParameters\n----------\nrow_bytes : int\n The expected number of bytes of memory that will be consumed\n during the processing of each row.\nmax_n_rows : int, default=None\n The maximum return value.\nworking_memory : int or float, default=None\n The number of rows to fit inside this number of MiB will be returned.\n When None (default), the value of\n ``sklearn.get_config()['working_memory']`` is used.\n\nReturns\n-------\nint or the value of n_samples\n\nWarns\n-----\nIssues a UserWarning if ``row_bytes`` exceeds ``working_memory`` MiB.", + "code": "@_deprecate_positional_args\ndef get_chunk_n_rows(row_bytes, *, max_n_rows=None, working_memory=None):\n \"\"\"Calculates how many rows can be processed within working_memory.\n\n Parameters\n ----------\n row_bytes : int\n The expected number of bytes of memory that will be consumed\n during the processing of each row.\n max_n_rows : int, default=None\n The maximum return value.\n working_memory : int or float, default=None\n The number of rows to fit inside this number of MiB will be returned.\n When None (default), the value of\n ``sklearn.get_config()['working_memory']`` is used.\n\n Returns\n -------\n int or the value of n_samples\n\n Warns\n -----\n Issues a UserWarning if ``row_bytes`` exceeds ``working_memory`` MiB.\n \"\"\"\n\n if working_memory is None:\n working_memory = get_config()['working_memory']\n\n chunk_n_rows = int(working_memory * (2 ** 20) // row_bytes)\n if max_n_rows is not None:\n chunk_n_rows = min(chunk_n_rows, max_n_rows)\n if chunk_n_rows < 1:\n warnings.warn('Could not adhere to working_memory config. '\n 'Currently %.0fMiB, %.0fMiB required.' %\n (working_memory, np.ceil(row_bytes * 2 ** -20)))\n chunk_n_rows = 1\n return chunk_n_rows" + }, + { + "id": "scikit-learn/sklearn.utils/indices_to_mask", + "name": "indices_to_mask", + "qname": "sklearn.utils.indices_to_mask", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/indices_to_mask/indices", + "name": "indices", + "qname": "sklearn.utils.indices_to_mask.indices", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "list-like", + "default_value": "", + "description": "List of integers treated as indices." + }, + "type": { + "kind": "NamedType", + "name": "list-like" + } + }, + { + "id": "scikit-learn/sklearn.utils/indices_to_mask/mask_length", + "name": "mask_length", + "qname": "sklearn.utils.indices_to_mask.mask_length", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "", + "description": "Length of boolean mask to be generated.\nThis parameter must be greater than max(indices)." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Convert list of indices to boolean mask.", + "docstring": "Convert list of indices to boolean mask.\n\nParameters\n----------\nindices : list-like\n List of integers treated as indices.\nmask_length : int\n Length of boolean mask to be generated.\n This parameter must be greater than max(indices).\n\nReturns\n-------\nmask : 1d boolean nd-array\n Boolean array that is True where indices are present, else False.\n\nExamples\n--------\n>>> from sklearn.utils import indices_to_mask\n>>> indices = [1, 2 , 3, 4]\n>>> indices_to_mask(indices, 5)\narray([False, True, True, True, True])", + "code": "def indices_to_mask(indices, mask_length):\n \"\"\"Convert list of indices to boolean mask.\n\n Parameters\n ----------\n indices : list-like\n List of integers treated as indices.\n mask_length : int\n Length of boolean mask to be generated.\n This parameter must be greater than max(indices).\n\n Returns\n -------\n mask : 1d boolean nd-array\n Boolean array that is True where indices are present, else False.\n\n Examples\n --------\n >>> from sklearn.utils import indices_to_mask\n >>> indices = [1, 2 , 3, 4]\n >>> indices_to_mask(indices, 5)\n array([False, True, True, True, True])\n \"\"\"\n if mask_length <= np.max(indices):\n raise ValueError(\"mask_length must be greater than max(indices)\")\n\n mask = np.zeros(mask_length, dtype=bool)\n mask[indices] = True\n\n return mask" + }, + { + "id": "scikit-learn/sklearn.utils/is_scalar_nan", + "name": "is_scalar_nan", + "qname": "sklearn.utils.is_scalar_nan", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/is_scalar_nan/x", + "name": "x", + "qname": "sklearn.utils.is_scalar_nan.x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "any type", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "any type" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Tests if x is NaN.\n\nThis function is meant to overcome the issue that np.isnan does not allow\nnon-numerical types as input, and that np.nan is not float('nan').", + "docstring": "Tests if x is NaN.\n\nThis function is meant to overcome the issue that np.isnan does not allow\nnon-numerical types as input, and that np.nan is not float('nan').\n\nParameters\n----------\nx : any type\n\nReturns\n-------\nboolean\n\nExamples\n--------\n>>> is_scalar_nan(np.nan)\nTrue\n>>> is_scalar_nan(float(\"nan\"))\nTrue\n>>> is_scalar_nan(None)\nFalse\n>>> is_scalar_nan(\"\")\nFalse\n>>> is_scalar_nan([np.nan])\nFalse", + "code": "def is_scalar_nan(x):\n \"\"\"Tests if x is NaN.\n\n This function is meant to overcome the issue that np.isnan does not allow\n non-numerical types as input, and that np.nan is not float('nan').\n\n Parameters\n ----------\n x : any type\n\n Returns\n -------\n boolean\n\n Examples\n --------\n >>> is_scalar_nan(np.nan)\n True\n >>> is_scalar_nan(float(\"nan\"))\n True\n >>> is_scalar_nan(None)\n False\n >>> is_scalar_nan(\"\")\n False\n >>> is_scalar_nan([np.nan])\n False\n \"\"\"\n # convert from numpy.bool_ to python bool to ensure that testing\n # is_scalar_nan(x) is True does not fail.\n return bool(isinstance(x, numbers.Real) and np.isnan(x))" + }, + { + "id": "scikit-learn/sklearn.utils/resample", + "name": "resample", + "qname": "sklearn.utils.resample", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/resample/arrays", + "name": "arrays", + "qname": "sklearn.utils.resample.arrays", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": true, + "docstring": { + "type": "sequence of array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "", + "description": "Indexable data-structures can be arrays, lists, dataframes or scipy\nsparse matrices with consistent first dimension." + }, + "type": { + "kind": "NamedType", + "name": "sequence of array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + }, + { + "id": "scikit-learn/sklearn.utils/resample/replace", + "name": "replace", + "qname": "sklearn.utils.resample.replace", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Implements resampling with replacement. If False, this will implement\n(sliced) random permutations." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + }, + { + "id": "scikit-learn/sklearn.utils/resample/n_samples", + "name": "n_samples", + "qname": "sklearn.utils.resample.n_samples", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of samples to generate. If left to None this is\nautomatically set to the first dimension of the arrays.\nIf replace is False it should not be larger than the length of\narrays." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + }, + { + "id": "scikit-learn/sklearn.utils/resample/random_state", + "name": "random_state", + "qname": "sklearn.utils.resample.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for shuffling\nthe data.\nPass an int for reproducible results across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils/resample/stratify", + "name": "stratify", + "qname": "sklearn.utils.resample.stratify", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "array-like of shape (n_samples,) or (n_samples, n_outputs)", + "default_value": "None", + "description": "If not None, data is split in a stratified fashion, using this as\nthe class labels." + }, + "type": { + "kind": "NamedType", + "name": "array-like of shape (n_samples,) or (n_samples, n_outputs)" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Resample arrays or sparse matrices in a consistent way.\n\nThe default strategy implements one step of the bootstrapping\nprocedure.", + "docstring": "Resample arrays or sparse matrices in a consistent way.\n\nThe default strategy implements one step of the bootstrapping\nprocedure.\n\nParameters\n----------\n*arrays : sequence of array-like of shape (n_samples,) or (n_samples, n_outputs)\n Indexable data-structures can be arrays, lists, dataframes or scipy\n sparse matrices with consistent first dimension.\n\nreplace : bool, default=True\n Implements resampling with replacement. If False, this will implement\n (sliced) random permutations.\n\nn_samples : int, default=None\n Number of samples to generate. If left to None this is\n automatically set to the first dimension of the arrays.\n If replace is False it should not be larger than the length of\n arrays.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for shuffling\n the data.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nstratify : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n If not None, data is split in a stratified fashion, using this as\n the class labels.\n\nReturns\n-------\nresampled_arrays : sequence of array-like of shape (n_samples,) or (n_samples, n_outputs)\n Sequence of resampled copies of the collections. The original arrays\n are not impacted.\n\nExamples\n--------\nIt is possible to mix sparse and dense arrays in the same run::\n\n >>> X = np.array([[1., 0.], [2., 1.], [0., 0.]])\n >>> y = np.array([0, 1, 2])\n\n >>> from scipy.sparse import coo_matrix\n >>> X_sparse = coo_matrix(X)\n\n >>> from sklearn.utils import resample\n >>> X, X_sparse, y = resample(X, X_sparse, y, random_state=0)\n >>> X\n array([[1., 0.],\n [2., 1.],\n [1., 0.]])\n\n >>> X_sparse\n <3x2 sparse matrix of type '<... 'numpy.float64'>'\n with 4 stored elements in Compressed Sparse Row format>\n\n >>> X_sparse.toarray()\n array([[1., 0.],\n [2., 1.],\n [1., 0.]])\n\n >>> y\n array([0, 1, 0])\n\n >>> resample(y, n_samples=2, random_state=0)\n array([0, 1])\n\nExample using stratification::\n\n >>> y = [0, 0, 1, 1, 1, 1, 1, 1, 1]\n >>> resample(y, n_samples=5, replace=False, stratify=y,\n ... random_state=0)\n [1, 1, 1, 0, 1]\n\nSee Also\n--------\nshuffle", + "code": "def resample(*arrays,\n replace=True,\n n_samples=None,\n random_state=None,\n stratify=None):\n \"\"\"Resample arrays or sparse matrices in a consistent way.\n\n The default strategy implements one step of the bootstrapping\n procedure.\n\n Parameters\n ----------\n *arrays : sequence of array-like of shape (n_samples,) or \\\n (n_samples, n_outputs)\n Indexable data-structures can be arrays, lists, dataframes or scipy\n sparse matrices with consistent first dimension.\n\n replace : bool, default=True\n Implements resampling with replacement. If False, this will implement\n (sliced) random permutations.\n\n n_samples : int, default=None\n Number of samples to generate. If left to None this is\n automatically set to the first dimension of the arrays.\n If replace is False it should not be larger than the length of\n arrays.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for shuffling\n the data.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\n stratify : array-like of shape (n_samples,) or (n_samples, n_outputs), \\\n default=None\n If not None, data is split in a stratified fashion, using this as\n the class labels.\n\n Returns\n -------\n resampled_arrays : sequence of array-like of shape (n_samples,) or \\\n (n_samples, n_outputs)\n Sequence of resampled copies of the collections. The original arrays\n are not impacted.\n\n Examples\n --------\n It is possible to mix sparse and dense arrays in the same run::\n\n >>> X = np.array([[1., 0.], [2., 1.], [0., 0.]])\n >>> y = np.array([0, 1, 2])\n\n >>> from scipy.sparse import coo_matrix\n >>> X_sparse = coo_matrix(X)\n\n >>> from sklearn.utils import resample\n >>> X, X_sparse, y = resample(X, X_sparse, y, random_state=0)\n >>> X\n array([[1., 0.],\n [2., 1.],\n [1., 0.]])\n\n >>> X_sparse\n <3x2 sparse matrix of type '<... 'numpy.float64'>'\n with 4 stored elements in Compressed Sparse Row format>\n\n >>> X_sparse.toarray()\n array([[1., 0.],\n [2., 1.],\n [1., 0.]])\n\n >>> y\n array([0, 1, 0])\n\n >>> resample(y, n_samples=2, random_state=0)\n array([0, 1])\n\n Example using stratification::\n\n >>> y = [0, 0, 1, 1, 1, 1, 1, 1, 1]\n >>> resample(y, n_samples=5, replace=False, stratify=y,\n ... random_state=0)\n [1, 1, 1, 0, 1]\n\n See Also\n --------\n shuffle\n \"\"\"\n max_n_samples = n_samples\n random_state = check_random_state(random_state)\n\n if len(arrays) == 0:\n return None\n\n first = arrays[0]\n n_samples = first.shape[0] if hasattr(first, 'shape') else len(first)\n\n if max_n_samples is None:\n max_n_samples = n_samples\n elif (max_n_samples > n_samples) and (not replace):\n raise ValueError(\"Cannot sample %d out of arrays with dim %d \"\n \"when replace is False\" % (max_n_samples,\n n_samples))\n\n check_consistent_length(*arrays)\n\n if stratify is None:\n if replace:\n indices = random_state.randint(0, n_samples, size=(max_n_samples,))\n else:\n indices = np.arange(n_samples)\n random_state.shuffle(indices)\n indices = indices[:max_n_samples]\n else:\n # Code adapted from StratifiedShuffleSplit()\n y = check_array(stratify, ensure_2d=False, dtype=None)\n if y.ndim == 2:\n # for multi-label y, map each distinct row to a string repr\n # using join because str(row) uses an ellipsis if len(row) > 1000\n y = np.array([' '.join(row.astype('str')) for row in y])\n\n classes, y_indices = np.unique(y, return_inverse=True)\n n_classes = classes.shape[0]\n\n class_counts = np.bincount(y_indices)\n\n # Find the sorted list of instances for each class:\n # (np.unique above performs a sort, so code is O(n logn) already)\n class_indices = np.split(np.argsort(y_indices, kind='mergesort'),\n np.cumsum(class_counts)[:-1])\n\n n_i = _approximate_mode(class_counts, max_n_samples, random_state)\n\n indices = []\n\n for i in range(n_classes):\n indices_i = random_state.choice(class_indices[i], n_i[i],\n replace=replace)\n indices.extend(indices_i)\n\n indices = random_state.permutation(indices)\n\n # convert sparse matrices to CSR for row-based indexing\n arrays = [a.tocsr() if issparse(a) else a for a in arrays]\n resampled_arrays = [_safe_indexing(a, indices) for a in arrays]\n if len(resampled_arrays) == 1:\n # syntactic sugar for the unit argument case\n return resampled_arrays[0]\n else:\n return resampled_arrays" + }, + { + "id": "scikit-learn/sklearn.utils/safe_mask", + "name": "safe_mask", + "qname": "sklearn.utils.safe_mask", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/safe_mask/X", + "name": "X", + "qname": "sklearn.utils.safe_mask.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, sparse matrix}", + "default_value": "", + "description": "Data on which to apply mask." + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.utils/safe_mask/mask", + "name": "mask", + "qname": "sklearn.utils.safe_mask.mask", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "ndarray", + "default_value": "", + "description": "Mask to be used on X." + }, + "type": { + "kind": "NamedType", + "name": "ndarray" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Return a mask which is safe to use on X.", + "docstring": "Return a mask which is safe to use on X.\n\nParameters\n----------\nX : {array-like, sparse matrix}\n Data on which to apply mask.\n\nmask : ndarray\n Mask to be used on X.\n\nReturns\n-------\n mask", + "code": "def safe_mask(X, mask):\n \"\"\"Return a mask which is safe to use on X.\n\n Parameters\n ----------\n X : {array-like, sparse matrix}\n Data on which to apply mask.\n\n mask : ndarray\n Mask to be used on X.\n\n Returns\n -------\n mask\n \"\"\"\n mask = np.asarray(mask)\n if np.issubdtype(mask.dtype, np.signedinteger):\n return mask\n\n if hasattr(X, \"toarray\"):\n ind = np.arange(mask.shape[0])\n mask = ind[mask]\n return mask" + }, + { + "id": "scikit-learn/sklearn.utils/safe_sqr", + "name": "safe_sqr", + "qname": "sklearn.utils.safe_sqr", + "decorators": ["_deprecate_positional_args"], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/safe_sqr/X", + "name": "X", + "qname": "sklearn.utils.safe_sqr.X", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "{array-like, ndarray, sparse matrix}", + "default_value": "", + "description": "" + }, + "type": { + "kind": "EnumType", + "values": [] + } + }, + { + "id": "scikit-learn/sklearn.utils/safe_sqr/copy", + "name": "copy", + "qname": "sklearn.utils.safe_sqr.copy", + "default_value": "True", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "bool", + "default_value": "True", + "description": "Whether to create a copy of X and operate on it or to perform\ninplace computation (default behaviour)." + }, + "type": { + "kind": "NamedType", + "name": "bool" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Element wise squaring of array-likes and sparse matrices.", + "docstring": "Element wise squaring of array-likes and sparse matrices.\n\nParameters\n----------\nX : {array-like, ndarray, sparse matrix}\n\ncopy : bool, default=True\n Whether to create a copy of X and operate on it or to perform\n inplace computation (default behaviour).\n\nReturns\n-------\nX ** 2 : element wise square", + "code": "@_deprecate_positional_args\ndef safe_sqr(X, *, copy=True):\n \"\"\"Element wise squaring of array-likes and sparse matrices.\n\n Parameters\n ----------\n X : {array-like, ndarray, sparse matrix}\n\n copy : bool, default=True\n Whether to create a copy of X and operate on it or to perform\n inplace computation (default behaviour).\n\n Returns\n -------\n X ** 2 : element wise square\n \"\"\"\n X = check_array(X, accept_sparse=['csr', 'csc', 'coo'], ensure_2d=False)\n if issparse(X):\n if copy:\n X = X.copy()\n X.data **= 2\n else:\n if copy:\n X = X ** 2\n else:\n X **= 2\n return X" + }, + { + "id": "scikit-learn/sklearn.utils/shuffle", + "name": "shuffle", + "qname": "sklearn.utils.shuffle", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/shuffle/arrays", + "name": "arrays", + "qname": "sklearn.utils.shuffle.arrays", + "default_value": null, + "assigned_by": "POSITIONAL_VARARG", + "is_public": true, + "docstring": { + "type": "sequence of indexable data-structures", + "default_value": "", + "description": "Indexable data-structures can be arrays, lists, dataframes or scipy\nsparse matrices with consistent first dimension." + }, + "type": { + "kind": "NamedType", + "name": "sequence of indexable data-structures" + } + }, + { + "id": "scikit-learn/sklearn.utils/shuffle/random_state", + "name": "random_state", + "qname": "sklearn.utils.shuffle.random_state", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int, RandomState instance or None", + "default_value": "None", + "description": "Determines random number generation for shuffling\nthe data.\nPass an int for reproducible results across multiple function calls.\nSee :term:`Glossary `." + }, + "type": { + "kind": "UnionType", + "types": [ + { + "kind": "NamedType", + "name": "int" + }, + { + "kind": "NamedType", + "name": "RandomState instance" + }, + { + "kind": "NamedType", + "name": "None" + } + ] + } + }, + { + "id": "scikit-learn/sklearn.utils/shuffle/n_samples", + "name": "n_samples", + "qname": "sklearn.utils.shuffle.n_samples", + "default_value": "None", + "assigned_by": "NAME_ONLY", + "is_public": true, + "docstring": { + "type": "int", + "default_value": "None", + "description": "Number of samples to generate. If left to None this is\nautomatically set to the first dimension of the arrays. It should\nnot be larger than the length of arrays." + }, + "type": { + "kind": "NamedType", + "name": "int" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Shuffle arrays or sparse matrices in a consistent way.\n\nThis is a convenience alias to ``resample(*arrays, replace=False)`` to do\nrandom permutations of the collections.", + "docstring": "Shuffle arrays or sparse matrices in a consistent way.\n\nThis is a convenience alias to ``resample(*arrays, replace=False)`` to do\nrandom permutations of the collections.\n\nParameters\n----------\n*arrays : sequence of indexable data-structures\n Indexable data-structures can be arrays, lists, dataframes or scipy\n sparse matrices with consistent first dimension.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for shuffling\n the data.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\nn_samples : int, default=None\n Number of samples to generate. If left to None this is\n automatically set to the first dimension of the arrays. It should\n not be larger than the length of arrays.\n\nReturns\n-------\nshuffled_arrays : sequence of indexable data-structures\n Sequence of shuffled copies of the collections. The original arrays\n are not impacted.\n\nExamples\n--------\nIt is possible to mix sparse and dense arrays in the same run::\n\n >>> X = np.array([[1., 0.], [2., 1.], [0., 0.]])\n >>> y = np.array([0, 1, 2])\n\n >>> from scipy.sparse import coo_matrix\n >>> X_sparse = coo_matrix(X)\n\n >>> from sklearn.utils import shuffle\n >>> X, X_sparse, y = shuffle(X, X_sparse, y, random_state=0)\n >>> X\n array([[0., 0.],\n [2., 1.],\n [1., 0.]])\n\n >>> X_sparse\n <3x2 sparse matrix of type '<... 'numpy.float64'>'\n with 3 stored elements in Compressed Sparse Row format>\n\n >>> X_sparse.toarray()\n array([[0., 0.],\n [2., 1.],\n [1., 0.]])\n\n >>> y\n array([2, 1, 0])\n\n >>> shuffle(y, n_samples=2, random_state=0)\n array([0, 1])\n\nSee Also\n--------\nresample", + "code": "def shuffle(*arrays, random_state=None, n_samples=None):\n \"\"\"Shuffle arrays or sparse matrices in a consistent way.\n\n This is a convenience alias to ``resample(*arrays, replace=False)`` to do\n random permutations of the collections.\n\n Parameters\n ----------\n *arrays : sequence of indexable data-structures\n Indexable data-structures can be arrays, lists, dataframes or scipy\n sparse matrices with consistent first dimension.\n\n random_state : int, RandomState instance or None, default=None\n Determines random number generation for shuffling\n the data.\n Pass an int for reproducible results across multiple function calls.\n See :term:`Glossary `.\n\n n_samples : int, default=None\n Number of samples to generate. If left to None this is\n automatically set to the first dimension of the arrays. It should\n not be larger than the length of arrays.\n\n Returns\n -------\n shuffled_arrays : sequence of indexable data-structures\n Sequence of shuffled copies of the collections. The original arrays\n are not impacted.\n\n Examples\n --------\n It is possible to mix sparse and dense arrays in the same run::\n\n >>> X = np.array([[1., 0.], [2., 1.], [0., 0.]])\n >>> y = np.array([0, 1, 2])\n\n >>> from scipy.sparse import coo_matrix\n >>> X_sparse = coo_matrix(X)\n\n >>> from sklearn.utils import shuffle\n >>> X, X_sparse, y = shuffle(X, X_sparse, y, random_state=0)\n >>> X\n array([[0., 0.],\n [2., 1.],\n [1., 0.]])\n\n >>> X_sparse\n <3x2 sparse matrix of type '<... 'numpy.float64'>'\n with 3 stored elements in Compressed Sparse Row format>\n\n >>> X_sparse.toarray()\n array([[0., 0.],\n [2., 1.],\n [1., 0.]])\n\n >>> y\n array([2, 1, 0])\n\n >>> shuffle(y, n_samples=2, random_state=0)\n array([0, 1])\n\n See Also\n --------\n resample\n \"\"\"\n return resample(*arrays, replace=False, n_samples=n_samples,\n random_state=random_state)" + }, + { + "id": "scikit-learn/sklearn.utils/tosequence", + "name": "tosequence", + "qname": "sklearn.utils.tosequence", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn.utils/tosequence/x", + "name": "x", + "qname": "sklearn.utils.tosequence.x", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "iterable", + "default_value": "", + "description": "" + }, + "type": { + "kind": "NamedType", + "name": "iterable" + } + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Cast iterable x to a Sequence, avoiding a copy if possible.", + "docstring": "Cast iterable x to a Sequence, avoiding a copy if possible.\n\nParameters\n----------\nx : iterable", + "code": "def tosequence(x):\n \"\"\"Cast iterable x to a Sequence, avoiding a copy if possible.\n\n Parameters\n ----------\n x : iterable\n \"\"\"\n if isinstance(x, np.ndarray):\n return np.asarray(x)\n elif isinstance(x, Sequence):\n return x\n else:\n return list(x)" + }, + { + "id": "scikit-learn/sklearn/setup_module", + "name": "setup_module", + "qname": "sklearn.setup_module", + "decorators": [], + "parameters": [ + { + "id": "scikit-learn/sklearn/setup_module/module", + "name": "module", + "qname": "sklearn.setup_module.module", + "default_value": null, + "assigned_by": "POSITION_OR_NAME", + "is_public": true, + "docstring": { + "type": "", + "default_value": "", + "description": "" + }, + "type": {} + } + ], + "results": [], + "is_public": true, + "reexported_by": [], + "description": "Fixture for the tests to assure globally controllable seeding of RNGs", + "docstring": "Fixture for the tests to assure globally controllable seeding of RNGs", + "code": "def setup_module(module):\n \"\"\"Fixture for the tests to assure globally controllable seeding of RNGs\"\"\"\n\n import numpy as np\n\n # Check if a random seed exists in the environment, if not create one.\n _random_seed = os.environ.get('SKLEARN_SEED', None)\n if _random_seed is None:\n _random_seed = np.random.uniform() * np.iinfo(np.int32).max\n _random_seed = int(_random_seed)\n print(\"I: Seeding RNGs with %r\" % _random_seed)\n np.random.seed(_random_seed)\n random.seed(_random_seed)" + } + ] +} diff --git a/package-parser/README.md b/package-parser/README.md index 0fac9974d..623d2dfdf 100644 --- a/package-parser/README.md +++ b/package-parser/README.md @@ -2,7 +2,7 @@ A tool to analyze client and API code written in Python. -### Installation +## Installation 1. Install Python 3.10. 2. Install [poetry](https://python-poetry.org/docs/master/#installation). @@ -15,7 +15,7 @@ A tool to analyze client and API code written in Python. poetry shell ``` -### Example usage +## Example usage 1. Analyze an API: ```shell @@ -32,5 +32,5 @@ A tool to analyze client and API code written in Python. ``` 4. Migrate annotations for a new version of the API: ```shell - parse-package migrate -a1 data/api/sklearn__api.json -a2 data/api/sklearn__apiv2.json -a data/annotations/annotations.json -o out + parse-package migrate -a1 data/api/scikit-learn_v0.24.2_api.json -a2 data/api/sklearn__apiv2.json -a data/annotations/annotations.json -o out ``` diff --git a/package-parser/package_parser/cli/_read_and_write_file.py b/package-parser/package_parser/cli/_read_and_write_file.py new file mode 100644 index 000000000..856584b08 --- /dev/null +++ b/package-parser/package_parser/cli/_read_and_write_file.py @@ -0,0 +1,52 @@ +import json +from pathlib import Path + +from package_parser.cli._json_encoder import CustomEncoder +from package_parser.processing.annotations.model import AnnotationStore +from package_parser.processing.api.model import API +from package_parser.processing.usages.model import UsageCountStore +from package_parser.utils import ensure_file_exists + + +def _read_annotations_file(annotations_file_path: Path) -> AnnotationStore: + with open(annotations_file_path, encoding="utf-8") as annotations_file: + annotations_json = json.load(annotations_file) + + return AnnotationStore.from_json(annotations_json) + + +def _write_annotations_file( + annotations: AnnotationStore, annotations_file_path: Path +) -> None: + ensure_file_exists(annotations_file_path) + with annotations_file_path.open("w", encoding="utf-8") as f: + json.dump(annotations.to_json(), f, indent=2) + + +def _read_api_file(api_file_path: Path) -> API: + with open(api_file_path, encoding="utf-8") as api_file: + api_json = json.load(api_file) + + return API.from_json(api_json) + + +def _read_usages_file(usages_file_path: Path) -> UsageCountStore: + with open(usages_file_path, encoding="utf-8") as usages_file: + usages_json = json.load(usages_file) + + return UsageCountStore.from_json(usages_json) + + +def _write_api_file(api: API, out_dir_path: Path) -> Path: + out_file_api = out_dir_path.joinpath(f"{api.package}__api.json") + ensure_file_exists(out_file_api) + with out_file_api.open("w", encoding="utf-8") as f: + json.dump(api.to_json(), f, indent=2, cls=CustomEncoder) + return out_file_api + + +def _write_api_dependency_file(api: API, api_dependencies, out): + out_file_api_dependencies = out.joinpath(f"{api.package}__api_dependencies.json") + ensure_file_exists(out_file_api_dependencies) + with out_file_api_dependencies.open("w") as f: + json.dump(api_dependencies.to_json(), f, indent=2, cls=CustomEncoder) diff --git a/package-parser/package_parser/cli/_run_annotations.py b/package-parser/package_parser/cli/_run_annotations.py index ecb6221c8..4fa77a93b 100644 --- a/package-parser/package_parser/cli/_run_annotations.py +++ b/package-parser/package_parser/cli/_run_annotations.py @@ -1,11 +1,12 @@ -import json from pathlib import Path from package_parser.processing.annotations import generate_annotations -from package_parser.processing.annotations.model import AnnotationStore -from package_parser.processing.api.model import API -from package_parser.processing.usages.model import UsageCountStore -from package_parser.utils import ensure_file_exists + +from ._read_and_write_file import ( + _read_api_file, + _read_usages_file, + _write_annotations_file, +) def _run_annotations( @@ -23,25 +24,3 @@ def _run_annotations( usages = _read_usages_file(usages_file_path) annotations = generate_annotations(api, usages) _write_annotations_file(annotations, annotations_file_path) - - -def _read_api_file(api_file_path: Path) -> API: - with open(api_file_path) as api_file: - api_json = json.load(api_file) - - return API.from_json(api_json) - - -def _read_usages_file(usages_file_path: Path) -> UsageCountStore: - with open(usages_file_path) as usages_file: - usages_json = json.load(usages_file) - - return UsageCountStore.from_json(usages_json) - - -def _write_annotations_file( - annotations: AnnotationStore, annotations_file_path: Path -) -> None: - ensure_file_exists(annotations_file_path) - with annotations_file_path.open("w") as f: - json.dump(annotations.to_json(), f, indent=2) diff --git a/package-parser/package_parser/cli/_run_api.py b/package-parser/package_parser/cli/_run_api.py index a8372d7e6..cf611dfd9 100644 --- a/package-parser/package_parser/cli/_run_api.py +++ b/package-parser/package_parser/cli/_run_api.py @@ -1,13 +1,11 @@ -import json from pathlib import Path from typing import Optional -from package_parser.cli._json_encoder import CustomEncoder -from package_parser.cli._shared_constants import _API_KEY from package_parser.processing.api import get_api -from package_parser.processing.api.model import API from package_parser.processing.dependencies import get_dependencies -from package_parser.utils import ensure_file_exists + +from ._read_and_write_file import _write_api_dependency_file, _write_api_file +from ._shared_constants import _API_KEY def _run_api_command( @@ -24,18 +22,3 @@ def _run_api_command( if result_dict is not None: result_dict[_API_KEY] = api_file_path - - -def _write_api_file(api: API, out_dir_path: Path) -> Path: - out_file_api = out_dir_path.joinpath(f"{api.package}__api.json") - ensure_file_exists(out_file_api) - with out_file_api.open("w") as f: - json.dump(api.to_json(), f, indent=2, cls=CustomEncoder) - return out_file_api - - -def _write_api_dependency_file(api: API, api_dependencies, out): - out_file_api_dependencies = out.joinpath(f"{api.package}__api_dependencies.json") - ensure_file_exists(out_file_api_dependencies) - with out_file_api_dependencies.open("w") as f: - json.dump(api_dependencies.to_json(), f, indent=2, cls=CustomEncoder) diff --git a/package-parser/package_parser/cli/_run_migrate.py b/package-parser/package_parser/cli/_run_migrate.py index 79cf3f67c..1d7027af2 100644 --- a/package-parser/package_parser/cli/_run_migrate.py +++ b/package-parser/package_parser/cli/_run_migrate.py @@ -1,8 +1,13 @@ -import json from pathlib import Path -from package_parser.processing.annotations.model import AnnotationStore -from package_parser.processing.api.model import API +from package_parser.processing.migration import migrate_annotations +from package_parser.processing.migration.model import APIMapping, SimpleDiffer + +from ._read_and_write_file import ( + _read_annotations_file, + _read_api_file, + _write_annotations_file, +) def _run_migrate_command( @@ -11,21 +16,11 @@ def _run_migrate_command( apiv2_file_path: Path, out_dir_path: Path, ) -> None: - # pylint: disable=unused-argument - _read_api_file(apiv1_file_path) - _read_api_file(apiv2_file_path) - _read_annotations_file(annotations_file_path) - - -def _read_annotations_file(annotations_file_path: Path) -> AnnotationStore: - with open(annotations_file_path, encoding="utf-8") as annotations_file: - annotations_json = json.load(annotations_file) - - return AnnotationStore.from_json(annotations_json) - - -def _read_api_file(api_file_path: Path) -> API: - with open(api_file_path, encoding="utf-8") as api_file: - api_json = json.load(api_file) - - return API.from_json(api_json) + apiv1 = _read_api_file(apiv1_file_path) + apiv2 = _read_api_file(apiv2_file_path) + annotationsv1 = _read_annotations_file(annotations_file_path) + differ = SimpleDiffer() + api_mapping = APIMapping(apiv1, apiv2, differ) + mappings = api_mapping.map_api() + annotationsv2 = migrate_annotations(annotationsv1, mappings) + _write_annotations_file(annotationsv2, out_dir_path) diff --git a/package-parser/package_parser/processing/annotations/model/_AnnotationStore.py b/package-parser/package_parser/processing/annotations/model/_AnnotationStore.py index 9e1d51161..cf9b3639c 100644 --- a/package-parser/package_parser/processing/annotations/model/_AnnotationStore.py +++ b/package-parser/package_parser/processing/annotations/model/_AnnotationStore.py @@ -5,6 +5,7 @@ from ._annotations import ( ANNOTATION_SCHEMA_VERSION, + AbstractAnnotation, BoundaryAnnotation, CalledAfterAnnotation, CompleteAnnotation, @@ -105,6 +106,34 @@ def from_json(json: Any) -> AnnotationStore: valueAnnotations, ) + def add_annotation(self, annotation: AbstractAnnotation): + if isinstance(annotation, BoundaryAnnotation): + self.boundaryAnnotations.append(annotation) + if isinstance(annotation, BoundaryAnnotation): + self.boundaryAnnotations.append(annotation) + if isinstance(annotation, CalledAfterAnnotation): + self.calledAfterAnnotations.append(annotation) + if isinstance(annotation, CompleteAnnotation): + self.completeAnnotations.append(annotation) + if isinstance(annotation, DescriptionAnnotation): + self.descriptionAnnotations.append(annotation) + if isinstance(annotation, EnumAnnotation): + self.enumAnnotations.append(annotation) + if isinstance(annotation, GroupAnnotation): + self.groupAnnotations.append(annotation) + if isinstance(annotation, MoveAnnotation): + self.moveAnnotations.append(annotation) + if isinstance(annotation, PureAnnotation): + self.pureAnnotations.append(annotation) + if isinstance(annotation, RemoveAnnotation): + self.removeAnnotations.append(annotation) + if isinstance(annotation, RenameAnnotation): + self.renameAnnotations.append(annotation) + if isinstance(annotation, TodoAnnotation): + self.todoAnnotations.append(annotation) + if isinstance(annotation, ValueAnnotation): + self.valueAnnotations.append(annotation) + def to_json(self) -> dict: return { "schemaVersion": ANNOTATION_SCHEMA_VERSION, diff --git a/package-parser/package_parser/processing/migration/__init__.py b/package-parser/package_parser/processing/migration/__init__.py index 9235c3315..07d73bd30 100644 --- a/package-parser/package_parser/processing/migration/__init__.py +++ b/package-parser/package_parser/processing/migration/__init__.py @@ -1,9 +1,12 @@ -from ._differ import AbstractDiffer, SimpleDiffer -from ._mapping import ( +from package_parser.processing.migration.model import ( + AbstractDiffer, APIMapping, ManyToManyMapping, ManyToOneMapping, Mapping, OneToManyMapping, OneToOneMapping, + SimpleDiffer, ) + +from ._migrate import migrate_annotations diff --git a/package-parser/package_parser/processing/migration/_migrate.py b/package-parser/package_parser/processing/migration/_migrate.py new file mode 100644 index 000000000..5efce7ae2 --- /dev/null +++ b/package-parser/package_parser/processing/migration/_migrate.py @@ -0,0 +1,36 @@ +from typing import Optional + +from package_parser.processing.annotations.model import ( + AbstractAnnotation, + AnnotationStore, +) +from package_parser.processing.api.model import Attribute, Result +from package_parser.processing.migration.annotations import migrate_rename_annotation +from package_parser.processing.migration.model import Mapping + + +def _get_mapping_from_annotation( + annotation: AbstractAnnotation, mappings: list[Mapping] +) -> Optional[Mapping]: + for mapping in mappings: + for element in mapping.get_apiv1_elements(): + if ( + not isinstance(element, (Attribute, Result)) + and element.id == annotation.target + ): + return mapping + return None + + +def migrate_annotations( + annotationsv1: AnnotationStore, mappings: list[Mapping] +) -> AnnotationStore: + migrated_annotation_store = AnnotationStore() + + for rename_annotation in annotationsv1.renameAnnotations: + mapping = _get_mapping_from_annotation(rename_annotation, mappings) + if mapping is not None: + for annotation in migrate_rename_annotation(rename_annotation, mapping): + migrated_annotation_store.add_annotation(annotation) + + return migrated_annotation_store diff --git a/package-parser/package_parser/processing/migration/annotations/__init__.py b/package-parser/package_parser/processing/migration/annotations/__init__.py new file mode 100644 index 000000000..46404f106 --- /dev/null +++ b/package-parser/package_parser/processing/migration/annotations/__init__.py @@ -0,0 +1,2 @@ +from ._constants import migration_author +from ._migrate_rename_annotation import migrate_rename_annotation diff --git a/package-parser/package_parser/processing/migration/annotations/_constants.py b/package-parser/package_parser/processing/migration/annotations/_constants.py new file mode 100644 index 000000000..e192bad93 --- /dev/null +++ b/package-parser/package_parser/processing/migration/annotations/_constants.py @@ -0,0 +1 @@ +migration_author = "migration" diff --git a/package-parser/package_parser/processing/migration/annotations/_migrate_rename_annotation.py b/package-parser/package_parser/processing/migration/annotations/_migrate_rename_annotation.py new file mode 100644 index 000000000..bad401591 --- /dev/null +++ b/package-parser/package_parser/processing/migration/annotations/_migrate_rename_annotation.py @@ -0,0 +1,64 @@ +from copy import deepcopy + +from package_parser.processing.annotations.model import ( + AbstractAnnotation, + EnumReviewResult, + RenameAnnotation, + TodoAnnotation, +) +from package_parser.processing.api.model import Attribute, Result +from package_parser.processing.migration.model import ( + ManyToOneMapping, + Mapping, + OneToOneMapping, +) + +from ._constants import migration_author + + +def migrate_rename_annotation( + rename_annotation: RenameAnnotation, mapping: Mapping +) -> list[AbstractAnnotation]: + rename_annotation = deepcopy(rename_annotation) + new_name = rename_annotation.newName + authors = rename_annotation.authors + authors.append(migration_author) + rename_annotation.authors = authors + + if isinstance(mapping, (ManyToOneMapping, OneToOneMapping)): + element = mapping.get_apiv2_elements()[0] + if isinstance(element, (Attribute, Result)): + return [] + rename_annotation.target = element.id + return [rename_annotation] + + migrate_text = ( + "The @Rename Annotation with the new name '" + + rename_annotation.newName + + "' from the previous version was at '" + + rename_annotation.target + + "' and the possible alternatives in the new version of the api are: " + + ", ".join( + map(lambda api_element: api_element.name, mapping.get_apiv2_elements()) + ) + ) + + todo_annotations: list[AbstractAnnotation] = [] + for element in mapping.get_apiv2_elements(): + if not isinstance(element, (Attribute, Result)): + if element.name in ( + new_name, + rename_annotation.target.split(".")[-1], + ): + rename_annotation.target = element.id + rename_annotation.reviewResult = EnumReviewResult.UNSURE + if len(rename_annotation.comment) > 0: + rename_annotation.comment += "\n" + rename_annotation.comment += migrate_text + return [rename_annotation] + todo_annotations.append( + TodoAnnotation( + element.id, authors, [], "", EnumReviewResult.NONE, migrate_text + ) + ) + return todo_annotations diff --git a/package-parser/package_parser/processing/migration/model/__init__.py b/package-parser/package_parser/processing/migration/model/__init__.py new file mode 100644 index 000000000..9235c3315 --- /dev/null +++ b/package-parser/package_parser/processing/migration/model/__init__.py @@ -0,0 +1,9 @@ +from ._differ import AbstractDiffer, SimpleDiffer +from ._mapping import ( + APIMapping, + ManyToManyMapping, + ManyToOneMapping, + Mapping, + OneToManyMapping, + OneToOneMapping, +) diff --git a/package-parser/package_parser/processing/migration/_differ.py b/package-parser/package_parser/processing/migration/model/_differ.py similarity index 100% rename from package-parser/package_parser/processing/migration/_differ.py rename to package-parser/package_parser/processing/migration/model/_differ.py diff --git a/package-parser/package_parser/processing/migration/_mapping.py b/package-parser/package_parser/processing/migration/model/_mapping.py similarity index 100% rename from package-parser/package_parser/processing/migration/_mapping.py rename to package-parser/package_parser/processing/migration/model/_mapping.py diff --git a/package-parser/tests/processing/migration/test_migration.py b/package-parser/tests/processing/migration/test_migration.py new file mode 100644 index 000000000..d6eefdde9 --- /dev/null +++ b/package-parser/tests/processing/migration/test_migration.py @@ -0,0 +1,220 @@ +from typing import Callable, Tuple + +import pytest +from package_parser.processing.annotations.model import ( + AbstractAnnotation, + AnnotationStore, + EnumReviewResult, + RenameAnnotation, + TodoAnnotation, +) +from package_parser.processing.api.model import ( + Parameter, + ParameterAssignment, + ParameterDocumentation, +) +from package_parser.processing.migration import migrate_annotations +from package_parser.processing.migration.annotations import ( + migrate_rename_annotation, + migration_author, +) +from package_parser.processing.migration.model import ( + Mapping, + OneToManyMapping, + OneToOneMapping, +) + + +def migrate_rename_annotation_data_one_to_one_mapping() -> Tuple[ + Mapping, + AbstractAnnotation, + list[AbstractAnnotation], + Callable[[RenameAnnotation, Mapping], list[AbstractAnnotation]], +]: + parameterv1 = Parameter( + id_="test/test.Test_", + name="Test", + qname="test.Test", + default_value=None, + assigned_by=ParameterAssignment.POSITION_OR_NAME, + is_public=True, + documentation=ParameterDocumentation("", "", ""), + ) + parameterv2 = Parameter( + id_="test/test.TestB", + name="TestB", + qname="test.TestB", + default_value=None, + assigned_by=ParameterAssignment.POSITION_OR_NAME, + is_public=True, + documentation=ParameterDocumentation("", "", ""), + ) + mappings = OneToOneMapping(1.0, parameterv1, parameterv2) + annotationsv1 = RenameAnnotation( + target="test/test.Test_", + authors=["testauthor"], + reviewers=[], + comment="", + reviewResult=EnumReviewResult.NONE, + newName="TestE", + ) + annotationsv2 = RenameAnnotation( + target="test/test.TestB", + authors=["testauthor", migration_author], + reviewers=[], + comment="", + reviewResult=EnumReviewResult.NONE, + newName="TestE", + ) + return mappings, annotationsv1, [annotationsv2], migrate_rename_annotation + + +def migrate_rename_annotation_data_one_to_many_mapping__with_changed_new_name() -> Tuple[ + Mapping, + AbstractAnnotation, + list[AbstractAnnotation], + Callable[[RenameAnnotation, Mapping], list[AbstractAnnotation]], +]: + parameterv1 = Parameter( + id_="test/test.Test", + name="Test", + qname="test.Test", + default_value=None, + assigned_by=ParameterAssignment.POSITION_OR_NAME, + is_public=True, + documentation=ParameterDocumentation("", "", ""), + ) + parameterv2_a = Parameter( + id_="test/test.TestA", + name="TestA", + qname="test.TestA", + default_value=None, + assigned_by=ParameterAssignment.POSITION_OR_NAME, + is_public=True, + documentation=ParameterDocumentation("", "", ""), + ) + parameterv2_b = Parameter( + id_="test/test.TestB", + name="TestB", + qname="test.TestB", + default_value=None, + assigned_by=ParameterAssignment.POSITION_OR_NAME, + is_public=True, + documentation=ParameterDocumentation("", "", ""), + ) + mappings = OneToManyMapping(1.0, parameterv1, [parameterv2_a, parameterv2_b]) + annotationsv1 = RenameAnnotation( + target="test/test.Test", + authors=["testauthor"], + reviewers=[], + comment="", + reviewResult=EnumReviewResult.NONE, + newName="TestA", + ) + annotationsv2 = RenameAnnotation( + target="test/test.TestA", + authors=["testauthor", migration_author], + reviewers=[], + comment="The @Rename Annotation with the new name 'TestA' from the previous version was at 'test/test.Test' and the possible alternatives in the new version of the api are: TestA, TestB", + reviewResult=EnumReviewResult.UNSURE, + newName="TestA", + ) + return mappings, annotationsv1, [annotationsv2], migrate_rename_annotation + + +def migrate_rename_annotation_data_one_to_many_mapping() -> Tuple[ + Mapping, + AbstractAnnotation, + list[AbstractAnnotation], + Callable[[RenameAnnotation, Mapping], list[AbstractAnnotation]], +]: + parameterv1 = Parameter( + id_="test/test.Test", + name="Test", + qname="test.Test", + default_value=None, + assigned_by=ParameterAssignment.POSITION_OR_NAME, + is_public=True, + documentation=ParameterDocumentation("", "", ""), + ) + parameterv2_a = Parameter( + id_="test/test.TestA", + name="TestA", + qname="test.TestA", + default_value=None, + assigned_by=ParameterAssignment.POSITION_OR_NAME, + is_public=True, + documentation=ParameterDocumentation("", "", ""), + ) + parameterv2_b = Parameter( + id_="test/test.TestB", + name="TestB", + qname="test.TestB", + default_value=None, + assigned_by=ParameterAssignment.POSITION_OR_NAME, + is_public=True, + documentation=ParameterDocumentation("", "", ""), + ) + mappings = OneToManyMapping(1.0, parameterv1, [parameterv2_a, parameterv2_b]) + annotationsv1 = RenameAnnotation( + target="test/test.Test", + authors=["testauthor"], + reviewers=[], + comment="", + reviewResult=EnumReviewResult.NONE, + newName="TestZ", + ) + annotationsv2_a = TodoAnnotation( + target="test/test.TestA", + authors=["testauthor", migration_author], + reviewers=[], + comment="", + reviewResult=EnumReviewResult.NONE, + newTodo="The @Rename Annotation with the new name 'TestZ' from the previous version was at 'test/test.Test' and the possible alternatives in the new version of the api are: TestA, TestB", + ) + annotationsv2_b = TodoAnnotation( + target="test/test.TestB", + authors=["testauthor", migration_author], + reviewers=[], + comment="", + reviewResult=EnumReviewResult.NONE, + newTodo="The @Rename Annotation with the new name 'TestZ' from the previous version was at 'test/test.Test' and the possible alternatives in the new version of the api are: TestA, TestB", + ) + return ( + mappings, + annotationsv1, + [annotationsv2_a, annotationsv2_b], + migrate_rename_annotation, + ) + + +test_data = [ + migrate_rename_annotation_data_one_to_many_mapping__with_changed_new_name(), + migrate_rename_annotation_data_one_to_one_mapping(), + migrate_rename_annotation_data_one_to_many_mapping(), +] + + +@pytest.mark.parametrize( + "mappings,annotationv1,expected_annotationsv2,migrate", test_data +) +def test_migrate_annotations( + mappings: Mapping, + annotationv1: AbstractAnnotation, + expected_annotationsv2: list[AbstractAnnotation], + migrate: Callable[[AbstractAnnotation, Mapping], list[AbstractAnnotation]], +): + assert migrate(annotationv1, mappings) == expected_annotationsv2 + + +def test_migrate_all_annotations(): + mappings: list[Mapping] = [] + annotation_store: AnnotationStore = AnnotationStore() + expected_annotation_store: AnnotationStore = AnnotationStore() + + for mapping, annotationv1, annotationsv2, _ in test_data: + mappings.append(mapping) + annotation_store.add_annotation(annotationv1) + for expected_annotation in annotationsv2: + expected_annotation_store.add_annotation(expected_annotation) + assert migrate_annotations(annotation_store, mappings) == expected_annotation_store